1/* 2 strchrnul - find a character or nul in a string 3 4 Copyright (c) 2014-2022, ARM Limited 5 All rights Reserved. 6 7 Redistribution and use in source and binary forms, with or without 8 modification, are permitted provided that the following conditions are met: 9 * Redistributions of source code must retain the above copyright 10 notice, this list of conditions and the following disclaimer. 11 * Redistributions in binary form must reproduce the above copyright 12 notice, this list of conditions and the following disclaimer in the 13 documentation and/or other materials provided with the distribution. 14 * Neither the name of the company nor the names of its contributors 15 may be used to endorse or promote products derived from this 16 software without specific prior written permission. 17 18 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ 29#include <picolibc.h> 30 31#if (defined (__OPTIMIZE_SIZE__) || defined (PREFER_SIZE_OVER_SPEED)) || !defined(__LP64__) || !defined(__ARM_NEON) 32/* See strchrnul-stub.c */ 33#else 34 35/* Assumptions: 36 * 37 * ARMv8-a, AArch64 38 * Neon Available. 39 */ 40 41#include "asmdefs.h" 42 43/* Arguments and results. */ 44#define srcin x0 45#define chrin w1 46 47#define result x0 48 49#define src x2 50#define tmp1 x3 51#define wtmp2 w4 52#define tmp3 x5 53 54#define vrepchr v0 55#define vdata1 v1 56#define vdata2 v2 57#define vhas_nul1 v3 58#define vhas_nul2 v4 59#define vhas_chr1 v5 60#define vhas_chr2 v6 61#define vrepmask v7 62#define vend1 v16 63 64/* Core algorithm. 65 66 For each 32-byte hunk we calculate a 64-bit syndrome value, with 67 two bits per byte (LSB is always in bits 0 and 1, for both big 68 and little-endian systems). For each tuple, bit 0 is set iff 69 the relevant byte matched the requested character or nul. Since the 70 bits in the syndrome reflect exactly the order in which things occur 71 in the original string a count_trailing_zeros() operation will 72 identify exactly which byte is causing the termination. */ 73 74/* Locals and temporaries. */ 75 76ENTRY (strchrnul) 77 PTR_ARG (0) 78 /* Magic constant 0x40100401 to allow us to identify which lane 79 matches the termination condition. */ 80 mov wtmp2, #0x0401 81 movk wtmp2, #0x4010, lsl #16 82 dup vrepchr.16b, chrin 83 bic src, srcin, #31 /* Work with aligned 32-byte hunks. */ 84 dup vrepmask.4s, wtmp2 85 ands tmp1, srcin, #31 86 b.eq L(loop) 87 88 /* Input string is not 32-byte aligned. Rather than forcing 89 the padding bytes to a safe value, we calculate the syndrome 90 for all the bytes, but then mask off those bits of the 91 syndrome that are related to the padding. */ 92 ld1 {vdata1.16b, vdata2.16b}, [src], #32 93 neg tmp1, tmp1 94 cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b 95 cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b 96 cmhs vhas_nul1.16b, vhas_chr1.16b, vdata1.16b 97 cmhs vhas_nul2.16b, vhas_chr2.16b, vdata2.16b 98 and vhas_chr1.16b, vhas_nul1.16b, vrepmask.16b 99 and vhas_chr2.16b, vhas_nul2.16b, vrepmask.16b 100 lsl tmp1, tmp1, #1 101 addp vend1.16b, vhas_chr1.16b, vhas_chr2.16b // 256->128 102 mov tmp3, #~0 103 addp vend1.16b, vend1.16b, vend1.16b // 128->64 104 lsr tmp1, tmp3, tmp1 105 106 mov tmp3, vend1.d[0] 107 bic tmp1, tmp3, tmp1 // Mask padding bits. 108 cbnz tmp1, L(tail) 109 110 .p2align 4 111L(loop): 112 ld1 {vdata1.16b, vdata2.16b}, [src], #32 113 cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b 114 cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b 115 cmhs vhas_nul1.16b, vhas_chr1.16b, vdata1.16b 116 cmhs vhas_nul2.16b, vhas_chr2.16b, vdata2.16b 117 orr vend1.16b, vhas_nul1.16b, vhas_nul2.16b 118 umaxp vend1.16b, vend1.16b, vend1.16b 119 mov tmp1, vend1.d[0] 120 cbz tmp1, L(loop) 121 122 /* Termination condition found. Now need to establish exactly why 123 we terminated. */ 124 and vhas_chr1.16b, vhas_nul1.16b, vrepmask.16b 125 and vhas_chr2.16b, vhas_nul2.16b, vrepmask.16b 126 addp vend1.16b, vhas_chr1.16b, vhas_chr2.16b // 256->128 127 addp vend1.16b, vend1.16b, vend1.16b // 128->64 128 129 mov tmp1, vend1.d[0] 130L(tail): 131 /* Count the trailing zeros, by bit reversing... */ 132 rbit tmp1, tmp1 133 /* Re-bias source. */ 134 sub src, src, #32 135 clz tmp1, tmp1 /* ... and counting the leading zeros. */ 136 /* tmp1 is twice the offset into the fragment. */ 137 add result, src, tmp1, lsr #1 138 ret 139 140END (strchrnul) 141#endif 142