1/* 2 strchr - find a character in a string 3 4 Copyright (c) 2014-2022, ARM Limited 5 All rights Reserved. 6 7 Redistribution and use in source and binary forms, with or without 8 modification, are permitted provided that the following conditions are met: 9 * Redistributions of source code must retain the above copyright 10 notice, this list of conditions and the following disclaimer. 11 * Redistributions in binary form must reproduce the above copyright 12 notice, this list of conditions and the following disclaimer in the 13 documentation and/or other materials provided with the distribution. 14 * Neither the name of the company nor the names of its contributors 15 may be used to endorse or promote products derived from this 16 software without specific prior written permission. 17 18 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ 29#if (defined (__OPTIMIZE_SIZE__) || defined (PREFER_SIZE_OVER_SPEED)) 30/* See strchr-stub.c */ 31#else 32 33/* Assumptions: 34 * 35 * ARMv8-a, AArch64 36 * Neon Available. 37 */ 38 39#include "asmdefs.h" 40 41/* Arguments and results. */ 42#define srcin x0 43#define chrin w1 44 45#define result x0 46 47#define src x2 48#define tmp1 x3 49#define wtmp2 w4 50#define tmp3 x5 51 52#define vrepchr v0 53#define vdata1 v1 54#define vdata2 v2 55#define vhas_nul1 v3 56#define vhas_nul2 v4 57#define vhas_chr1 v5 58#define vhas_chr2 v6 59#define vrepmask_0 v7 60#define vrepmask_c v16 61#define vend1 v17 62#define vend2 v18 63 64/* Core algorithm. 65 66 For each 32-byte hunk we calculate a 64-bit syndrome value, with 67 two bits per byte (LSB is always in bits 0 and 1, for both big 68 and little-endian systems). For each tuple, bit 0 is set iff 69 the relevant byte matched the requested character; bit 1 is set 70 iff the relevant byte matched the NUL end of string (we trigger 71 off bit0 for the special case of looking for NUL). Since the bits 72 in the syndrome reflect exactly the order in which things occur 73 in the original string a count_trailing_zeros() operation will 74 identify exactly which byte is causing the termination, and why. */ 75 76/* Locals and temporaries. */ 77 78ENTRY (strchr) 79 PTR_ARG (0) 80 /* Magic constant 0xc0300c03 to allow us to identify which lane 81 matches the requested byte. Even bits are set if the character 82 matches, odd bits if either the char is NUL or matches. */ 83 mov wtmp2, 0x0c03 84 movk wtmp2, 0xc030, lsl 16 85 dup vrepchr.16b, chrin 86 bic src, srcin, #31 /* Work with aligned 32-byte hunks. */ 87 dup vrepmask_c.4s, wtmp2 88 ands tmp1, srcin, #31 89 add vrepmask_0.4s, vrepmask_c.4s, vrepmask_c.4s /* equiv: lsl #1 */ 90 b.eq L(loop) 91 92 /* Input string is not 32-byte aligned. Rather than forcing 93 the padding bytes to a safe value, we calculate the syndrome 94 for all the bytes, but then mask off those bits of the 95 syndrome that are related to the padding. */ 96 ld1 {vdata1.16b, vdata2.16b}, [src], #32 97 neg tmp1, tmp1 98 cmeq vhas_nul1.16b, vdata1.16b, #0 99 cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b 100 cmeq vhas_nul2.16b, vdata2.16b, #0 101 cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b 102 bif vhas_nul1.16b, vhas_chr1.16b, vrepmask_0.16b 103 bif vhas_nul2.16b, vhas_chr2.16b, vrepmask_0.16b 104 and vend1.16b, vhas_nul1.16b, vrepmask_c.16b 105 and vend2.16b, vhas_nul2.16b, vrepmask_c.16b 106 lsl tmp1, tmp1, #1 107 addp vend1.16b, vend1.16b, vend2.16b // 256->128 108 mov tmp3, #~0 109 addp vend1.16b, vend1.16b, vend2.16b // 128->64 110 lsr tmp1, tmp3, tmp1 111 112 mov tmp3, vend1.d[0] 113 bic tmp1, tmp3, tmp1 // Mask padding bits. 114 cbnz tmp1, L(tail) 115 116 .p2align 4 117L(loop): 118 ld1 {vdata1.16b, vdata2.16b}, [src], #32 119 cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b 120 cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b 121 cmhs vhas_nul1.16b, vhas_chr1.16b, vdata1.16b 122 cmhs vhas_nul2.16b, vhas_chr2.16b, vdata2.16b 123 orr vend1.16b, vhas_nul1.16b, vhas_nul2.16b 124 umaxp vend1.16b, vend1.16b, vend1.16b 125 mov tmp1, vend1.d[0] 126 cbz tmp1, L(loop) 127 128 /* Termination condition found. Now need to establish exactly why 129 we terminated. */ 130 bif vhas_nul1.16b, vhas_chr1.16b, vrepmask_0.16b 131 bif vhas_nul2.16b, vhas_chr2.16b, vrepmask_0.16b 132 and vend1.16b, vhas_nul1.16b, vrepmask_c.16b 133 and vend2.16b, vhas_nul2.16b, vrepmask_c.16b 134 addp vend1.16b, vend1.16b, vend2.16b // 256->128 135 addp vend1.16b, vend1.16b, vend2.16b // 128->64 136 mov tmp1, vend1.d[0] 137L(tail): 138 /* Count the trailing zeros, by bit reversing... */ 139 rbit tmp1, tmp1 140 /* Re-bias source. */ 141 sub src, src, #32 142 clz tmp1, tmp1 /* And counting the leading zeros. */ 143 /* Tmp1 is even if the target charager was found first. Otherwise 144 we've found the end of string and we weren't looking for NUL. */ 145 tst tmp1, #1 146 add result, src, tmp1, lsr #1 147 csel result, result, xzr, eq 148 ret 149 150END (strchr) 151#endif 152