1/* 2 strchrnul - find a character or nul in a string 3 4 Copyright (c) 2014-2022, ARM Limited 5 All rights Reserved. 6 7 Redistribution and use in source and binary forms, with or without 8 modification, are permitted provided that the following conditions are met: 9 * Redistributions of source code must retain the above copyright 10 notice, this list of conditions and the following disclaimer. 11 * Redistributions in binary form must reproduce the above copyright 12 notice, this list of conditions and the following disclaimer in the 13 documentation and/or other materials provided with the distribution. 14 * Neither the name of the company nor the names of its contributors 15 may be used to endorse or promote products derived from this 16 software without specific prior written permission. 17 18 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ 29#if (defined (__OPTIMIZE_SIZE__) || defined (PREFER_SIZE_OVER_SPEED)) 30/* See strchrnul-stub.c */ 31#else 32 33/* Assumptions: 34 * 35 * ARMv8-a, AArch64 36 * Neon Available. 37 */ 38 39#include "asmdefs.h" 40 41/* Arguments and results. */ 42#define srcin x0 43#define chrin w1 44 45#define result x0 46 47#define src x2 48#define tmp1 x3 49#define wtmp2 w4 50#define tmp3 x5 51 52#define vrepchr v0 53#define vdata1 v1 54#define vdata2 v2 55#define vhas_nul1 v3 56#define vhas_nul2 v4 57#define vhas_chr1 v5 58#define vhas_chr2 v6 59#define vrepmask v7 60#define vend1 v16 61 62/* Core algorithm. 63 64 For each 32-byte hunk we calculate a 64-bit syndrome value, with 65 two bits per byte (LSB is always in bits 0 and 1, for both big 66 and little-endian systems). For each tuple, bit 0 is set iff 67 the relevant byte matched the requested character or nul. Since the 68 bits in the syndrome reflect exactly the order in which things occur 69 in the original string a count_trailing_zeros() operation will 70 identify exactly which byte is causing the termination. */ 71 72/* Locals and temporaries. */ 73 74ENTRY (strchrnul) 75 PTR_ARG (0) 76 /* Magic constant 0x40100401 to allow us to identify which lane 77 matches the termination condition. */ 78 mov wtmp2, #0x0401 79 movk wtmp2, #0x4010, lsl #16 80 dup vrepchr.16b, chrin 81 bic src, srcin, #31 /* Work with aligned 32-byte hunks. */ 82 dup vrepmask.4s, wtmp2 83 ands tmp1, srcin, #31 84 b.eq L(loop) 85 86 /* Input string is not 32-byte aligned. Rather than forcing 87 the padding bytes to a safe value, we calculate the syndrome 88 for all the bytes, but then mask off those bits of the 89 syndrome that are related to the padding. */ 90 ld1 {vdata1.16b, vdata2.16b}, [src], #32 91 neg tmp1, tmp1 92 cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b 93 cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b 94 cmhs vhas_nul1.16b, vhas_chr1.16b, vdata1.16b 95 cmhs vhas_nul2.16b, vhas_chr2.16b, vdata2.16b 96 and vhas_chr1.16b, vhas_nul1.16b, vrepmask.16b 97 and vhas_chr2.16b, vhas_nul2.16b, vrepmask.16b 98 lsl tmp1, tmp1, #1 99 addp vend1.16b, vhas_chr1.16b, vhas_chr2.16b // 256->128 100 mov tmp3, #~0 101 addp vend1.16b, vend1.16b, vend1.16b // 128->64 102 lsr tmp1, tmp3, tmp1 103 104 mov tmp3, vend1.d[0] 105 bic tmp1, tmp3, tmp1 // Mask padding bits. 106 cbnz tmp1, L(tail) 107 108 .p2align 4 109L(loop): 110 ld1 {vdata1.16b, vdata2.16b}, [src], #32 111 cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b 112 cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b 113 cmhs vhas_nul1.16b, vhas_chr1.16b, vdata1.16b 114 cmhs vhas_nul2.16b, vhas_chr2.16b, vdata2.16b 115 orr vend1.16b, vhas_nul1.16b, vhas_nul2.16b 116 umaxp vend1.16b, vend1.16b, vend1.16b 117 mov tmp1, vend1.d[0] 118 cbz tmp1, L(loop) 119 120 /* Termination condition found. Now need to establish exactly why 121 we terminated. */ 122 and vhas_chr1.16b, vhas_nul1.16b, vrepmask.16b 123 and vhas_chr2.16b, vhas_nul2.16b, vrepmask.16b 124 addp vend1.16b, vhas_chr1.16b, vhas_chr2.16b // 256->128 125 addp vend1.16b, vend1.16b, vend1.16b // 128->64 126 127 mov tmp1, vend1.d[0] 128L(tail): 129 /* Count the trailing zeros, by bit reversing... */ 130 rbit tmp1, tmp1 131 /* Re-bias source. */ 132 sub src, src, #32 133 clz tmp1, tmp1 /* ... and counting the leading zeros. */ 134 /* tmp1 is twice the offset into the fragment. */ 135 add result, src, tmp1, lsr #1 136 ret 137 138END (strchrnul) 139#endif 140