1/* 2 strrchr - find last instance of a character in a string 3 4 Copyright (c) 2014, ARM Limited 5 All rights Reserved. 6 7 Redistribution and use in source and binary forms, with or without 8 modification, are permitted provided that the following conditions are met: 9 * Redistributions of source code must retain the above copyright 10 notice, this list of conditions and the following disclaimer. 11 * Redistributions in binary form must reproduce the above copyright 12 notice, this list of conditions and the following disclaimer in the 13 documentation and/or other materials provided with the distribution. 14 * Neither the name of the company nor the names of its contributors 15 may be used to endorse or promote products derived from this 16 software without specific prior written permission. 17 18 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ 29 30#include <picolibc.h> 31 32#if (defined (__OPTIMIZE_SIZE__) || defined (PREFER_SIZE_OVER_SPEED)) || !defined(__LP64__) || !defined(__ARM_NEON) 33/* See strrchr-stub.c */ 34#else 35 36/* Assumptions: 37 * 38 * ARMv8-a, AArch64 39 * Neon Available. 40 */ 41 42#include "asmdefs.h" 43 44/* Arguments and results. */ 45#define srcin x0 46#define chrin w1 47 48#define result x0 49 50#define src x2 51#define tmp1 x3 52#define wtmp2 w4 53#define tmp3 x5 54#define src_match x6 55#define src_offset x7 56#define const_m1 x8 57#define tmp4 x9 58#define nul_match x10 59#define chr_match x11 60 61#define vrepchr v0 62#define vdata1 v1 63#define vdata2 v2 64#define vhas_nul1 v3 65#define vhas_nul2 v4 66#define vhas_chr1 v5 67#define vhas_chr2 v6 68#define vrepmask_0 v7 69#define vrepmask_c v16 70#define vend1 v17 71#define vend2 v18 72 73/* Core algorithm. 74 75 For each 32-byte hunk we calculate a 64-bit syndrome value, with 76 two bits per byte (LSB is always in bits 0 and 1, for both big 77 and little-endian systems). For each tuple, bit 0 is set iff 78 the relevant byte matched the requested character; bit 1 is set 79 iff the relevant byte matched the NUL end of string (we trigger 80 off bit0 for the special case of looking for NUL). Since the bits 81 in the syndrome reflect exactly the order in which things occur 82 in the original string a count_trailing_zeros() operation will 83 identify exactly which byte is causing the termination, and why. */ 84 85ENTRY (strrchr) 86 PTR_ARG (0) 87 /* Magic constant 0x40100401 to allow us to identify which lane 88 matches the requested byte. Magic constant 0x80200802 used 89 similarly for NUL termination. */ 90 mov wtmp2, #0x0401 91 movk wtmp2, #0x4010, lsl #16 92 dup vrepchr.16b, chrin 93 bic src, srcin, #31 /* Work with aligned 32-byte hunks. */ 94 dup vrepmask_c.4s, wtmp2 95 mov src_offset, #0 96 ands tmp1, srcin, #31 97 add vrepmask_0.4s, vrepmask_c.4s, vrepmask_c.4s /* equiv: lsl #1 */ 98 b.eq L(aligned) 99 100 /* Input string is not 32-byte aligned. Rather than forcing 101 the padding bytes to a safe value, we calculate the syndrome 102 for all the bytes, but then mask off those bits of the 103 syndrome that are related to the padding. */ 104 ld1 {vdata1.16b, vdata2.16b}, [src], #32 105 neg tmp1, tmp1 106 cmeq vhas_nul1.16b, vdata1.16b, #0 107 cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b 108 cmeq vhas_nul2.16b, vdata2.16b, #0 109 cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b 110 and vhas_nul1.16b, vhas_nul1.16b, vrepmask_0.16b 111 and vhas_chr1.16b, vhas_chr1.16b, vrepmask_c.16b 112 and vhas_nul2.16b, vhas_nul2.16b, vrepmask_0.16b 113 and vhas_chr2.16b, vhas_chr2.16b, vrepmask_c.16b 114 addp vhas_nul1.16b, vhas_nul1.16b, vhas_nul2.16b // 256->128 115 addp vhas_chr1.16b, vhas_chr1.16b, vhas_chr2.16b // 256->128 116 addp vend1.16b, vhas_nul1.16b, vhas_chr1.16b // 128->64 117 mov nul_match, vend1.d[0] 118 lsl tmp1, tmp1, #1 119 mov const_m1, #~0 120 lsr tmp3, const_m1, tmp1 121 mov chr_match, vend1.d[1] 122 123 bic nul_match, nul_match, tmp3 // Mask padding bits. 124 bic chr_match, chr_match, tmp3 // Mask padding bits. 125 cbnz nul_match, L(tail) 126 127 .p2align 4 128L(loop): 129 cmp chr_match, #0 130 csel src_match, src, src_match, ne 131 csel src_offset, chr_match, src_offset, ne 132L(aligned): 133 ld1 {vdata1.16b, vdata2.16b}, [src], #32 134 cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b 135 cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b 136 uminp vend1.16b, vdata1.16b, vdata2.16b 137 and vhas_chr1.16b, vhas_chr1.16b, vrepmask_c.16b 138 and vhas_chr2.16b, vhas_chr2.16b, vrepmask_c.16b 139 cmeq vend1.16b, vend1.16b, 0 140 addp vhas_chr1.16b, vhas_chr1.16b, vhas_chr2.16b // 256->128 141 addp vend1.16b, vend1.16b, vhas_chr1.16b // 128->64 142 mov nul_match, vend1.d[0] 143 mov chr_match, vend1.d[1] 144 cbz nul_match, L(loop) 145 146 cmeq vhas_nul1.16b, vdata1.16b, #0 147 cmeq vhas_nul2.16b, vdata2.16b, #0 148 and vhas_nul1.16b, vhas_nul1.16b, vrepmask_0.16b 149 and vhas_nul2.16b, vhas_nul2.16b, vrepmask_0.16b 150 addp vhas_nul1.16b, vhas_nul1.16b, vhas_nul2.16b 151 addp vhas_nul1.16b, vhas_nul1.16b, vhas_nul1.16b 152 mov nul_match, vhas_nul1.d[0] 153 154L(tail): 155 /* Work out exactly where the string ends. */ 156 sub tmp4, nul_match, #1 157 eor tmp4, tmp4, nul_match 158 ands chr_match, chr_match, tmp4 159 /* And pick the values corresponding to the last match. */ 160 csel src_match, src, src_match, ne 161 csel src_offset, chr_match, src_offset, ne 162 163 /* Count down from the top of the syndrome to find the last match. */ 164 clz tmp3, src_offset 165 /* Src_match points beyond the word containing the match, so we can 166 simply subtract half the bit-offset into the syndrome. Because 167 we are counting down, we need to go back one more character. */ 168 add tmp3, tmp3, #2 169 sub result, src_match, tmp3, lsr #1 170 /* But if the syndrome shows no match was found, then return NULL. */ 171 cmp src_offset, #0 172 csel result, result, xzr, ne 173 174 ret 175 176END (strrchr) 177#endif 178