1/* strnlen - calculate the length of a string with limit. 2 3 Copyright (c) 2013, Linaro Limited 4 All rights reserved. 5 6 Redistribution and use in source and binary forms, with or without 7 modification, are permitted provided that the following conditions are met: 8 * Redistributions of source code must retain the above copyright 9 notice, this list of conditions and the following disclaimer. 10 * Redistributions in binary form must reproduce the above copyright 11 notice, this list of conditions and the following disclaimer in the 12 documentation and/or other materials provided with the distribution. 13 * Neither the name of the Linaro nor the 14 names of its contributors may be used to endorse or promote products 15 derived from this software without specific prior written permission. 16 17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21 HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ 28 29#if (defined (__OPTIMIZE_SIZE__) || defined (PREFER_SIZE_OVER_SPEED)) 30/* See strnlen-stub.c */ 31#else 32 33/* Assumptions: 34 * 35 * ARMv8-a, AArch64, Advanced SIMD. 36 * MTE compatible. 37 */ 38 39#include "asmdefs.h" 40 41#define srcin x0 42#define cntin x1 43#define result x0 44 45#define src x2 46#define synd x3 47#define shift x4 48#define tmp x4 49#define cntrem x5 50 51#define qdata q0 52#define vdata v0 53#define vhas_chr v1 54#define vend v2 55#define dend d2 56 57/* 58 Core algorithm: 59 Process the string in 16-byte aligned chunks. Compute a 64-bit mask with 60 four bits per byte using the shrn instruction. A count trailing zeros then 61 identifies the first zero byte. */ 62 63ENTRY (strnlen) 64 PTR_ARG (0) 65 SIZE_ARG (1) 66 bic src, srcin, 15 67 cbz cntin, L(nomatch) 68 ld1 {vdata.16b}, [src] 69 cmeq vhas_chr.16b, vdata.16b, 0 70 lsl shift, srcin, 2 71 shrn vend.8b, vhas_chr.8h, 4 /* 128->64 */ 72 fmov synd, dend 73 lsr synd, synd, shift 74 cbz synd, L(start_loop) 75L(finish): 76 rbit synd, synd 77 clz synd, synd 78 lsr result, synd, 2 79 cmp cntin, result 80 csel result, cntin, result, ls 81 ret 82 83L(nomatch): 84 mov result, cntin 85 ret 86 87L(start_loop): 88 sub tmp, src, srcin 89 add tmp, tmp, 17 90 subs cntrem, cntin, tmp 91 b.lo L(nomatch) 92 93 /* Make sure that it won't overread by a 16-byte chunk */ 94 tbz cntrem, 4, L(loop32_2) 95 sub src, src, 16 96 .p2align 5 97L(loop32): 98 ldr qdata, [src, 32]! 99 cmeq vhas_chr.16b, vdata.16b, 0 100 umaxp vend.16b, vhas_chr.16b, vhas_chr.16b /* 128->64 */ 101 fmov synd, dend 102 cbnz synd, L(end) 103L(loop32_2): 104 ldr qdata, [src, 16] 105 subs cntrem, cntrem, 32 106 cmeq vhas_chr.16b, vdata.16b, 0 107 b.lo L(end_2) 108 umaxp vend.16b, vhas_chr.16b, vhas_chr.16b /* 128->64 */ 109 fmov synd, dend 110 cbz synd, L(loop32) 111L(end_2): 112 add src, src, 16 113L(end): 114 shrn vend.8b, vhas_chr.8h, 4 /* 128->64 */ 115 sub result, src, srcin 116 fmov synd, dend 117#ifndef __AARCH64EB__ 118 rbit synd, synd 119#endif 120 clz synd, synd 121 add result, result, synd, lsr 2 122 cmp cntin, result 123 csel result, cntin, result, ls 124 ret 125 126END (strnlen) 127#endif 128