1/* 2 strrchr - find last instance of a character in a string 3 4 Copyright (c) 2014, ARM Limited 5 All rights Reserved. 6 7 Redistribution and use in source and binary forms, with or without 8 modification, are permitted provided that the following conditions are met: 9 * Redistributions of source code must retain the above copyright 10 notice, this list of conditions and the following disclaimer. 11 * Redistributions in binary form must reproduce the above copyright 12 notice, this list of conditions and the following disclaimer in the 13 documentation and/or other materials provided with the distribution. 14 * Neither the name of the company nor the names of its contributors 15 may be used to endorse or promote products derived from this 16 software without specific prior written permission. 17 18 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ 29 30#if (defined (__OPTIMIZE_SIZE__) || defined (PREFER_SIZE_OVER_SPEED)) 31/* See strrchr-stub.c */ 32#else 33 34/* Assumptions: 35 * 36 * ARMv8-a, AArch64 37 * Neon Available. 38 */ 39 40#include "asmdefs.h" 41 42/* Arguments and results. */ 43#define srcin x0 44#define chrin w1 45 46#define result x0 47 48#define src x2 49#define tmp1 x3 50#define wtmp2 w4 51#define tmp3 x5 52#define src_match x6 53#define src_offset x7 54#define const_m1 x8 55#define tmp4 x9 56#define nul_match x10 57#define chr_match x11 58 59#define vrepchr v0 60#define vdata1 v1 61#define vdata2 v2 62#define vhas_nul1 v3 63#define vhas_nul2 v4 64#define vhas_chr1 v5 65#define vhas_chr2 v6 66#define vrepmask_0 v7 67#define vrepmask_c v16 68#define vend1 v17 69#define vend2 v18 70 71/* Core algorithm. 72 73 For each 32-byte hunk we calculate a 64-bit syndrome value, with 74 two bits per byte (LSB is always in bits 0 and 1, for both big 75 and little-endian systems). For each tuple, bit 0 is set iff 76 the relevant byte matched the requested character; bit 1 is set 77 iff the relevant byte matched the NUL end of string (we trigger 78 off bit0 for the special case of looking for NUL). Since the bits 79 in the syndrome reflect exactly the order in which things occur 80 in the original string a count_trailing_zeros() operation will 81 identify exactly which byte is causing the termination, and why. */ 82 83ENTRY (strrchr) 84 PTR_ARG (0) 85 /* Magic constant 0x40100401 to allow us to identify which lane 86 matches the requested byte. Magic constant 0x80200802 used 87 similarly for NUL termination. */ 88 mov wtmp2, #0x0401 89 movk wtmp2, #0x4010, lsl #16 90 dup vrepchr.16b, chrin 91 bic src, srcin, #31 /* Work with aligned 32-byte hunks. */ 92 dup vrepmask_c.4s, wtmp2 93 mov src_offset, #0 94 ands tmp1, srcin, #31 95 add vrepmask_0.4s, vrepmask_c.4s, vrepmask_c.4s /* equiv: lsl #1 */ 96 b.eq L(aligned) 97 98 /* Input string is not 32-byte aligned. Rather than forcing 99 the padding bytes to a safe value, we calculate the syndrome 100 for all the bytes, but then mask off those bits of the 101 syndrome that are related to the padding. */ 102 ld1 {vdata1.16b, vdata2.16b}, [src], #32 103 neg tmp1, tmp1 104 cmeq vhas_nul1.16b, vdata1.16b, #0 105 cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b 106 cmeq vhas_nul2.16b, vdata2.16b, #0 107 cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b 108 and vhas_nul1.16b, vhas_nul1.16b, vrepmask_0.16b 109 and vhas_chr1.16b, vhas_chr1.16b, vrepmask_c.16b 110 and vhas_nul2.16b, vhas_nul2.16b, vrepmask_0.16b 111 and vhas_chr2.16b, vhas_chr2.16b, vrepmask_c.16b 112 addp vhas_nul1.16b, vhas_nul1.16b, vhas_nul2.16b // 256->128 113 addp vhas_chr1.16b, vhas_chr1.16b, vhas_chr2.16b // 256->128 114 addp vend1.16b, vhas_nul1.16b, vhas_chr1.16b // 128->64 115 mov nul_match, vend1.d[0] 116 lsl tmp1, tmp1, #1 117 mov const_m1, #~0 118 lsr tmp3, const_m1, tmp1 119 mov chr_match, vend1.d[1] 120 121 bic nul_match, nul_match, tmp3 // Mask padding bits. 122 bic chr_match, chr_match, tmp3 // Mask padding bits. 123 cbnz nul_match, L(tail) 124 125 .p2align 4 126L(loop): 127 cmp chr_match, #0 128 csel src_match, src, src_match, ne 129 csel src_offset, chr_match, src_offset, ne 130L(aligned): 131 ld1 {vdata1.16b, vdata2.16b}, [src], #32 132 cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b 133 cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b 134 uminp vend1.16b, vdata1.16b, vdata2.16b 135 and vhas_chr1.16b, vhas_chr1.16b, vrepmask_c.16b 136 and vhas_chr2.16b, vhas_chr2.16b, vrepmask_c.16b 137 cmeq vend1.16b, vend1.16b, 0 138 addp vhas_chr1.16b, vhas_chr1.16b, vhas_chr2.16b // 256->128 139 addp vend1.16b, vend1.16b, vhas_chr1.16b // 128->64 140 mov nul_match, vend1.d[0] 141 mov chr_match, vend1.d[1] 142 cbz nul_match, L(loop) 143 144 cmeq vhas_nul1.16b, vdata1.16b, #0 145 cmeq vhas_nul2.16b, vdata2.16b, #0 146 and vhas_nul1.16b, vhas_nul1.16b, vrepmask_0.16b 147 and vhas_nul2.16b, vhas_nul2.16b, vrepmask_0.16b 148 addp vhas_nul1.16b, vhas_nul1.16b, vhas_nul2.16b 149 addp vhas_nul1.16b, vhas_nul1.16b, vhas_nul1.16b 150 mov nul_match, vhas_nul1.d[0] 151 152L(tail): 153 /* Work out exactly where the string ends. */ 154 sub tmp4, nul_match, #1 155 eor tmp4, tmp4, nul_match 156 ands chr_match, chr_match, tmp4 157 /* And pick the values corresponding to the last match. */ 158 csel src_match, src, src_match, ne 159 csel src_offset, chr_match, src_offset, ne 160 161 /* Count down from the top of the syndrome to find the last match. */ 162 clz tmp3, src_offset 163 /* Src_match points beyond the word containing the match, so we can 164 simply subtract half the bit-offset into the syndrome. Because 165 we are counting down, we need to go back one more character. */ 166 add tmp3, tmp3, #2 167 sub result, src_match, tmp3, lsr #1 168 /* But if the syndrome shows no match was found, then return NULL. */ 169 cmp src_offset, #0 170 csel result, result, xzr, ne 171 172 ret 173 174END (strrchr) 175#endif 176