1/* 2 strchr - find a character in a string 3 4 Copyright (c) 2014, ARM Limited 5 All rights Reserved. 6 7 Redistribution and use in source and binary forms, with or without 8 modification, are permitted provided that the following conditions are met: 9 * Redistributions of source code must retain the above copyright 10 notice, this list of conditions and the following disclaimer. 11 * Redistributions in binary form must reproduce the above copyright 12 notice, this list of conditions and the following disclaimer in the 13 documentation and/or other materials provided with the distribution. 14 * Neither the name of the company nor the names of its contributors 15 may be used to endorse or promote products derived from this 16 software without specific prior written permission. 17 18 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ 29 30#if (defined (__OPTIMIZE_SIZE__) || defined (PREFER_SIZE_OVER_SPEED)) || !defined(__LP64__) 31/* See strchr-stub.c */ 32#else 33 34/* Assumptions: 35 * 36 * ARMv8-a, AArch64 37 * Neon Available. 38 */ 39 40/* Arguments and results. */ 41#define srcin x0 42#define chrin w1 43 44#define result x0 45 46#define src x2 47#define tmp1 x3 48#define wtmp2 w4 49#define tmp3 x5 50 51#define vrepchr v0 52#define vdata1 v1 53#define vdata2 v2 54#define vhas_nul1 v3 55#define vhas_nul2 v4 56#define vhas_chr1 v5 57#define vhas_chr2 v6 58#define vrepmask_0 v7 59#define vrepmask_c v16 60#define vend1 v17 61#define vend2 v18 62 63/* Core algorithm. 64 65 For each 32-byte hunk we calculate a 64-bit syndrome value, with 66 two bits per byte (LSB is always in bits 0 and 1, for both big 67 and little-endian systems). For each tuple, bit 0 is set iff 68 the relevant byte matched the requested character; bit 1 is set 69 iff the relevant byte matched the NUL end of string (we trigger 70 off bit0 for the special case of looking for NUL). Since the bits 71 in the syndrome reflect exactly the order in which things occur 72 in the original string a count_trailing_zeros() operation will 73 identify exactly which byte is causing the termination, and why. */ 74 75/* Locals and temporaries. */ 76 77 .macro def_fn f p2align=0 78 .text 79 .p2align \p2align 80 .global \f 81 .type \f, %function 82\f: 83 .endm 84 85def_fn strchr 86 /* Magic constant 0x40100401 to allow us to identify which lane 87 matches the requested byte. Magic constant 0x80200802 used 88 similarly for NUL termination. */ 89 mov wtmp2, #0x0401 90 movk wtmp2, #0x4010, lsl #16 91 dup vrepchr.16b, chrin 92 bic src, srcin, #31 /* Work with aligned 32-byte hunks. */ 93 dup vrepmask_c.4s, wtmp2 94 ands tmp1, srcin, #31 95 add vrepmask_0.4s, vrepmask_c.4s, vrepmask_c.4s /* equiv: lsl #1 */ 96 b.eq .Lloop 97 98 /* Input string is not 32-byte aligned. Rather than forcing 99 the padding bytes to a safe value, we calculate the syndrome 100 for all the bytes, but then mask off those bits of the 101 syndrome that are related to the padding. */ 102 ld1 {vdata1.16b, vdata2.16b}, [src], #32 103 neg tmp1, tmp1 104 cmeq vhas_nul1.16b, vdata1.16b, #0 105 cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b 106 cmeq vhas_nul2.16b, vdata2.16b, #0 107 cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b 108 and vhas_nul1.16b, vhas_nul1.16b, vrepmask_0.16b 109 and vhas_nul2.16b, vhas_nul2.16b, vrepmask_0.16b 110 and vhas_chr1.16b, vhas_chr1.16b, vrepmask_c.16b 111 and vhas_chr2.16b, vhas_chr2.16b, vrepmask_c.16b 112 orr vend1.16b, vhas_nul1.16b, vhas_chr1.16b 113 orr vend2.16b, vhas_nul2.16b, vhas_chr2.16b 114 lsl tmp1, tmp1, #1 115 addp vend1.16b, vend1.16b, vend2.16b // 256->128 116 mov tmp3, #~0 117 addp vend1.16b, vend1.16b, vend2.16b // 128->64 118 lsr tmp1, tmp3, tmp1 119 120 mov tmp3, vend1.d[0] 121 bic tmp1, tmp3, tmp1 // Mask padding bits. 122 cbnz tmp1, .Ltail 123 124.Lloop: 125 ld1 {vdata1.16b, vdata2.16b}, [src], #32 126 cmeq vhas_nul1.16b, vdata1.16b, #0 127 cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b 128 cmeq vhas_nul2.16b, vdata2.16b, #0 129 cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b 130 /* Use a fast check for the termination condition. */ 131 orr vend1.16b, vhas_nul1.16b, vhas_chr1.16b 132 orr vend2.16b, vhas_nul2.16b, vhas_chr2.16b 133 orr vend1.16b, vend1.16b, vend2.16b 134 addp vend1.2d, vend1.2d, vend1.2d 135 mov tmp1, vend1.d[0] 136 cbz tmp1, .Lloop 137 138 /* Termination condition found. Now need to establish exactly why 139 we terminated. */ 140 and vhas_nul1.16b, vhas_nul1.16b, vrepmask_0.16b 141 and vhas_nul2.16b, vhas_nul2.16b, vrepmask_0.16b 142 and vhas_chr1.16b, vhas_chr1.16b, vrepmask_c.16b 143 and vhas_chr2.16b, vhas_chr2.16b, vrepmask_c.16b 144 orr vend1.16b, vhas_nul1.16b, vhas_chr1.16b 145 orr vend2.16b, vhas_nul2.16b, vhas_chr2.16b 146 addp vend1.16b, vend1.16b, vend2.16b // 256->128 147 addp vend1.16b, vend1.16b, vend2.16b // 128->64 148 149 mov tmp1, vend1.d[0] 150.Ltail: 151 /* Count the trailing zeros, by bit reversing... */ 152 rbit tmp1, tmp1 153 /* Re-bias source. */ 154 sub src, src, #32 155 clz tmp1, tmp1 /* And counting the leading zeros. */ 156 /* Tmp1 is even if the target charager was found first. Otherwise 157 we've found the end of string and we weren't looking for NUL. */ 158 tst tmp1, #1 159 add result, src, tmp1, lsr #1 160 csel result, result, xzr, eq 161 ret 162 163 .size strchr, . - strchr 164#endif 165