1/* 2 strrchr - find last instance of a character in a string 3 4 Copyright (c) 2014, ARM Limited 5 All rights Reserved. 6 7 Redistribution and use in source and binary forms, with or without 8 modification, are permitted provided that the following conditions are met: 9 * Redistributions of source code must retain the above copyright 10 notice, this list of conditions and the following disclaimer. 11 * Redistributions in binary form must reproduce the above copyright 12 notice, this list of conditions and the following disclaimer in the 13 documentation and/or other materials provided with the distribution. 14 * Neither the name of the company nor the names of its contributors 15 may be used to endorse or promote products derived from this 16 software without specific prior written permission. 17 18 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ 29 30#if (defined (__OPTIMIZE_SIZE__) || defined (PREFER_SIZE_OVER_SPEED)) || !defined(__LP64__) 31/* See strchr-stub.c */ 32#else 33 34/* Assumptions: 35 * 36 * ARMv8-a, AArch64 37 * Neon Available. 38 */ 39 40/* Arguments and results. */ 41#define srcin x0 42#define chrin w1 43 44#define result x0 45 46#define src x2 47#define tmp1 x3 48#define wtmp2 w4 49#define tmp3 x5 50#define src_match x6 51#define src_offset x7 52#define const_m1 x8 53#define tmp4 x9 54#define nul_match x10 55#define chr_match x11 56 57#define vrepchr v0 58#define vdata1 v1 59#define vdata2 v2 60#define vhas_nul1 v3 61#define vhas_nul2 v4 62#define vhas_chr1 v5 63#define vhas_chr2 v6 64#define vrepmask_0 v7 65#define vrepmask_c v16 66#define vend1 v17 67#define vend2 v18 68 69/* Core algorithm. 70 71 For each 32-byte hunk we calculate a 64-bit syndrome value, with 72 two bits per byte (LSB is always in bits 0 and 1, for both big 73 and little-endian systems). For each tuple, bit 0 is set iff 74 the relevant byte matched the requested character; bit 1 is set 75 iff the relevant byte matched the NUL end of string (we trigger 76 off bit0 for the special case of looking for NUL). Since the bits 77 in the syndrome reflect exactly the order in which things occur 78 in the original string a count_trailing_zeros() operation will 79 identify exactly which byte is causing the termination, and why. */ 80 81/* Locals and temporaries. */ 82 83 .macro def_fn f p2align=0 84 .text 85 .p2align \p2align 86 .global \f 87 .type \f, %function 88\f: 89 .endm 90 91def_fn strrchr 92 /* Magic constant 0x40100401 to allow us to identify which lane 93 matches the requested byte. Magic constant 0x80200802 used 94 similarly for NUL termination. */ 95 mov wtmp2, #0x0401 96 movk wtmp2, #0x4010, lsl #16 97 dup vrepchr.16b, chrin 98 bic src, srcin, #31 /* Work with aligned 32-byte hunks. */ 99 dup vrepmask_c.4s, wtmp2 100 mov src_offset, #0 101 ands tmp1, srcin, #31 102 add vrepmask_0.4s, vrepmask_c.4s, vrepmask_c.4s /* equiv: lsl #1 */ 103 b.eq .Laligned 104 105 /* Input string is not 32-byte aligned. Rather than forcing 106 the padding bytes to a safe value, we calculate the syndrome 107 for all the bytes, but then mask off those bits of the 108 syndrome that are related to the padding. */ 109 ld1 {vdata1.16b, vdata2.16b}, [src], #32 110 neg tmp1, tmp1 111 cmeq vhas_nul1.16b, vdata1.16b, #0 112 cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b 113 cmeq vhas_nul2.16b, vdata2.16b, #0 114 cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b 115 and vhas_nul1.16b, vhas_nul1.16b, vrepmask_0.16b 116 and vhas_chr1.16b, vhas_chr1.16b, vrepmask_c.16b 117 and vhas_nul2.16b, vhas_nul2.16b, vrepmask_0.16b 118 and vhas_chr2.16b, vhas_chr2.16b, vrepmask_c.16b 119 addp vhas_nul1.16b, vhas_nul1.16b, vhas_nul2.16b // 256->128 120 addp vhas_chr1.16b, vhas_chr1.16b, vhas_chr2.16b // 256->128 121 addp vhas_nul1.16b, vhas_nul1.16b, vhas_nul1.16b // 128->64 122 addp vhas_chr1.16b, vhas_chr1.16b, vhas_chr1.16b // 128->64 123 mov nul_match, vhas_nul1.d[0] 124 lsl tmp1, tmp1, #1 125 mov const_m1, #~0 126 mov chr_match, vhas_chr1.d[0] 127 lsr tmp3, const_m1, tmp1 128 129 bic nul_match, nul_match, tmp3 // Mask padding bits. 130 bic chr_match, chr_match, tmp3 // Mask padding bits. 131 cbnz nul_match, .Ltail 132 133.Lloop: 134 cmp chr_match, #0 135 csel src_match, src, src_match, ne 136 csel src_offset, chr_match, src_offset, ne 137.Laligned: 138 ld1 {vdata1.16b, vdata2.16b}, [src], #32 139 cmeq vhas_nul1.16b, vdata1.16b, #0 140 cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b 141 cmeq vhas_nul2.16b, vdata2.16b, #0 142 cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b 143 addp vend1.16b, vhas_nul1.16b, vhas_nul2.16b // 256->128 144 and vhas_chr1.16b, vhas_chr1.16b, vrepmask_c.16b 145 and vhas_chr2.16b, vhas_chr2.16b, vrepmask_c.16b 146 addp vhas_chr1.16b, vhas_chr1.16b, vhas_chr2.16b // 256->128 147 addp vend1.16b, vend1.16b, vend1.16b // 128->64 148 addp vhas_chr1.16b, vhas_chr1.16b, vhas_chr1.16b // 128->64 149 mov nul_match, vend1.d[0] 150 mov chr_match, vhas_chr1.d[0] 151 cbz nul_match, .Lloop 152 153 and vhas_nul1.16b, vhas_nul1.16b, vrepmask_0.16b 154 and vhas_nul2.16b, vhas_nul2.16b, vrepmask_0.16b 155 addp vhas_nul1.16b, vhas_nul1.16b, vhas_nul2.16b 156 addp vhas_nul1.16b, vhas_nul1.16b, vhas_nul1.16b 157 mov nul_match, vhas_nul1.d[0] 158 159.Ltail: 160 /* Work out exactly where the string ends. */ 161 sub tmp4, nul_match, #1 162 eor tmp4, tmp4, nul_match 163 ands chr_match, chr_match, tmp4 164 /* And pick the values corresponding to the last match. */ 165 csel src_match, src, src_match, ne 166 csel src_offset, chr_match, src_offset, ne 167 168 /* Count down from the top of the syndrome to find the last match. */ 169 clz tmp3, src_offset 170 /* Src_match points beyond the word containing the match, so we can 171 simply subtract half the bit-offset into the syndrome. Because 172 we are counting down, we need to go back one more character. */ 173 add tmp3, tmp3, #2 174 sub result, src_match, tmp3, lsr #1 175 /* But if the syndrome shows no match was found, then return NULL. */ 176 cmp src_offset, #0 177 csel result, result, xzr, ne 178 179 ret 180 181 .size strrchr, . - strrchr 182#endif 183