1/* 2 * memchr - find a character in a memory zone 3 * 4 * Copyright (c) 2014-2022, Arm Limited. 5 * SPDX-License-Identifier: MIT 6 */ 7 8#include <picolibc.h> 9 10#if (defined (__OPTIMIZE_SIZE__) || defined (PREFER_SIZE_OVER_SPEED)) || !defined(__LP64__) || !defined(__ARM_NEON) 11/* See memchr-stub.c */ 12#else 13/* Assumptions: 14 * 15 * ARMv8-a, AArch64 16 * Neon Available. 17 */ 18 19#include "asmdefs.h" 20 21/* Arguments and results. */ 22#define srcin x0 23#define chrin w1 24#define cntin x2 25 26#define result x0 27 28#define src x3 29#define tmp x4 30#define wtmp2 w5 31#define synd x6 32#define soff x9 33#define cntrem x10 34 35#define vrepchr v0 36#define vdata1 v1 37#define vdata2 v2 38#define vhas_chr1 v3 39#define vhas_chr2 v4 40#define vrepmask v5 41#define vend v6 42 43/* 44 * Core algorithm: 45 * 46 * For each 32-byte chunk we calculate a 64-bit syndrome value, with two bits 47 * per byte. For each tuple, bit 0 is set if the relevant byte matched the 48 * requested character and bit 1 is not used (faster than using a 32bit 49 * syndrome). Since the bits in the syndrome reflect exactly the order in which 50 * things occur in the original string, counting trailing zeros allows to 51 * identify exactly which byte has matched. 52 */ 53 54ENTRY (memchr) 55 PTR_ARG (0) 56 SIZE_ARG (2) 57 /* Do not dereference srcin if no bytes to compare. */ 58 cbz cntin, L(zero_length) 59 /* 60 * Magic constant 0x40100401 allows us to identify which lane matches 61 * the requested byte. 62 */ 63 mov wtmp2, #0x0401 64 movk wtmp2, #0x4010, lsl #16 65 dup vrepchr.16b, chrin 66 /* Work with aligned 32-byte chunks */ 67 bic src, srcin, #31 68 dup vrepmask.4s, wtmp2 69 ands soff, srcin, #31 70 and cntrem, cntin, #31 71 b.eq L(loop) 72 73 /* 74 * Input string is not 32-byte aligned. We calculate the syndrome 75 * value for the aligned 32 bytes block containing the first bytes 76 * and mask the irrelevant part. 77 */ 78 79 ld1 {vdata1.16b, vdata2.16b}, [src], #32 80 sub tmp, soff, #32 81 adds cntin, cntin, tmp 82 cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b 83 cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b 84 and vhas_chr1.16b, vhas_chr1.16b, vrepmask.16b 85 and vhas_chr2.16b, vhas_chr2.16b, vrepmask.16b 86 addp vend.16b, vhas_chr1.16b, vhas_chr2.16b /* 256->128 */ 87 addp vend.16b, vend.16b, vend.16b /* 128->64 */ 88 mov synd, vend.d[0] 89 /* Clear the soff*2 lower bits */ 90 lsl tmp, soff, #1 91 lsr synd, synd, tmp 92 lsl synd, synd, tmp 93 /* The first block can also be the last */ 94 b.ls L(masklast) 95 /* Have we found something already? */ 96 cbnz synd, L(tail) 97 98L(loop): 99 ld1 {vdata1.16b, vdata2.16b}, [src], #32 100 subs cntin, cntin, #32 101 cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b 102 cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b 103 /* If we're out of data we finish regardless of the result */ 104 b.ls L(end) 105 /* Use a fast check for the termination condition */ 106 orr vend.16b, vhas_chr1.16b, vhas_chr2.16b 107 addp vend.2d, vend.2d, vend.2d 108 mov synd, vend.d[0] 109 /* We're not out of data, loop if we haven't found the character */ 110 cbz synd, L(loop) 111 112L(end): 113 /* Termination condition found, let's calculate the syndrome value */ 114 and vhas_chr1.16b, vhas_chr1.16b, vrepmask.16b 115 and vhas_chr2.16b, vhas_chr2.16b, vrepmask.16b 116 addp vend.16b, vhas_chr1.16b, vhas_chr2.16b /* 256->128 */ 117 addp vend.16b, vend.16b, vend.16b /* 128->64 */ 118 mov synd, vend.d[0] 119 /* Only do the clear for the last possible block */ 120 b.hs L(tail) 121 122L(masklast): 123 /* Clear the (32 - ((cntrem + soff) % 32)) * 2 upper bits */ 124 add tmp, cntrem, soff 125 and tmp, tmp, #31 126 sub tmp, tmp, #32 127 neg tmp, tmp, lsl #1 128 lsl synd, synd, tmp 129 lsr synd, synd, tmp 130 131L(tail): 132 /* Count the trailing zeros using bit reversing */ 133 rbit synd, synd 134 /* Compensate the last post-increment */ 135 sub src, src, #32 136 /* Check that we have found a character */ 137 cmp synd, #0 138 /* And count the leading zeros */ 139 clz synd, synd 140 /* Compute the potential result */ 141 add result, src, synd, lsr #1 142 /* Select result or NULL */ 143 csel result, xzr, result, eq 144 ret 145 146L(zero_length): 147 mov result, #0 148 ret 149 150END (memchr) 151#endif 152