1/* 2 strchr - find a character in a string 3 4 Copyright (c) 2014-2022, ARM Limited 5 All rights Reserved. 6 7 Redistribution and use in source and binary forms, with or without 8 modification, are permitted provided that the following conditions are met: 9 * Redistributions of source code must retain the above copyright 10 notice, this list of conditions and the following disclaimer. 11 * Redistributions in binary form must reproduce the above copyright 12 notice, this list of conditions and the following disclaimer in the 13 documentation and/or other materials provided with the distribution. 14 * Neither the name of the company nor the names of its contributors 15 may be used to endorse or promote products derived from this 16 software without specific prior written permission. 17 18 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ 29#include <picolibc.h> 30 31#if (defined (__OPTIMIZE_SIZE__) || defined (PREFER_SIZE_OVER_SPEED)) || !defined(__LP64__) || !defined(__ARM_NEON) 32/* See strchr-stub.c */ 33#else 34 35/* Assumptions: 36 * 37 * ARMv8-a, AArch64 38 * Neon Available. 39 */ 40 41#include "asmdefs.h" 42 43/* Arguments and results. */ 44#define srcin x0 45#define chrin w1 46 47#define result x0 48 49#define src x2 50#define tmp1 x3 51#define wtmp2 w4 52#define tmp3 x5 53 54#define vrepchr v0 55#define vdata1 v1 56#define vdata2 v2 57#define vhas_nul1 v3 58#define vhas_nul2 v4 59#define vhas_chr1 v5 60#define vhas_chr2 v6 61#define vrepmask_0 v7 62#define vrepmask_c v16 63#define vend1 v17 64#define vend2 v18 65 66/* Core algorithm. 67 68 For each 32-byte hunk we calculate a 64-bit syndrome value, with 69 two bits per byte (LSB is always in bits 0 and 1, for both big 70 and little-endian systems). For each tuple, bit 0 is set iff 71 the relevant byte matched the requested character; bit 1 is set 72 iff the relevant byte matched the NUL end of string (we trigger 73 off bit0 for the special case of looking for NUL). Since the bits 74 in the syndrome reflect exactly the order in which things occur 75 in the original string a count_trailing_zeros() operation will 76 identify exactly which byte is causing the termination, and why. */ 77 78/* Locals and temporaries. */ 79 80ENTRY (strchr) 81 PTR_ARG (0) 82 /* Magic constant 0xc0300c03 to allow us to identify which lane 83 matches the requested byte. Even bits are set if the character 84 matches, odd bits if either the char is NUL or matches. */ 85 mov wtmp2, 0x0c03 86 movk wtmp2, 0xc030, lsl 16 87 dup vrepchr.16b, chrin 88 bic src, srcin, #31 /* Work with aligned 32-byte hunks. */ 89 dup vrepmask_c.4s, wtmp2 90 ands tmp1, srcin, #31 91 add vrepmask_0.4s, vrepmask_c.4s, vrepmask_c.4s /* equiv: lsl #1 */ 92 b.eq L(loop) 93 94 /* Input string is not 32-byte aligned. Rather than forcing 95 the padding bytes to a safe value, we calculate the syndrome 96 for all the bytes, but then mask off those bits of the 97 syndrome that are related to the padding. */ 98 ld1 {vdata1.16b, vdata2.16b}, [src], #32 99 neg tmp1, tmp1 100 cmeq vhas_nul1.16b, vdata1.16b, #0 101 cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b 102 cmeq vhas_nul2.16b, vdata2.16b, #0 103 cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b 104 bif vhas_nul1.16b, vhas_chr1.16b, vrepmask_0.16b 105 bif vhas_nul2.16b, vhas_chr2.16b, vrepmask_0.16b 106 and vend1.16b, vhas_nul1.16b, vrepmask_c.16b 107 and vend2.16b, vhas_nul2.16b, vrepmask_c.16b 108 lsl tmp1, tmp1, #1 109 addp vend1.16b, vend1.16b, vend2.16b // 256->128 110 mov tmp3, #~0 111 addp vend1.16b, vend1.16b, vend2.16b // 128->64 112 lsr tmp1, tmp3, tmp1 113 114 mov tmp3, vend1.d[0] 115 bic tmp1, tmp3, tmp1 // Mask padding bits. 116 cbnz tmp1, L(tail) 117 118 .p2align 4 119L(loop): 120 ld1 {vdata1.16b, vdata2.16b}, [src], #32 121 cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b 122 cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b 123 cmhs vhas_nul1.16b, vhas_chr1.16b, vdata1.16b 124 cmhs vhas_nul2.16b, vhas_chr2.16b, vdata2.16b 125 orr vend1.16b, vhas_nul1.16b, vhas_nul2.16b 126 umaxp vend1.16b, vend1.16b, vend1.16b 127 mov tmp1, vend1.d[0] 128 cbz tmp1, L(loop) 129 130 /* Termination condition found. Now need to establish exactly why 131 we terminated. */ 132 bif vhas_nul1.16b, vhas_chr1.16b, vrepmask_0.16b 133 bif vhas_nul2.16b, vhas_chr2.16b, vrepmask_0.16b 134 and vend1.16b, vhas_nul1.16b, vrepmask_c.16b 135 and vend2.16b, vhas_nul2.16b, vrepmask_c.16b 136 addp vend1.16b, vend1.16b, vend2.16b // 256->128 137 addp vend1.16b, vend1.16b, vend2.16b // 128->64 138 mov tmp1, vend1.d[0] 139L(tail): 140 /* Count the trailing zeros, by bit reversing... */ 141 rbit tmp1, tmp1 142 /* Re-bias source. */ 143 sub src, src, #32 144 clz tmp1, tmp1 /* And counting the leading zeros. */ 145 /* Tmp1 is even if the target charager was found first. Otherwise 146 we've found the end of string and we weren't looking for NUL. */ 147 tst tmp1, #1 148 add result, src, tmp1, lsr #1 149 csel result, result, xzr, eq 150 ret 151 152END (strchr) 153#endif 154