1/* strnlen - calculate the length of a string with limit.
2
3   Copyright (c) 2013, Linaro Limited
4   All rights reserved.
5
6   Redistribution and use in source and binary forms, with or without
7   modification, are permitted provided that the following conditions are met:
8       * Redistributions of source code must retain the above copyright
9         notice, this list of conditions and the following disclaimer.
10       * Redistributions in binary form must reproduce the above copyright
11         notice, this list of conditions and the following disclaimer in the
12         documentation and/or other materials provided with the distribution.
13       * Neither the name of the Linaro nor the
14         names of its contributors may be used to endorse or promote products
15         derived from this software without specific prior written permission.
16
17   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21   HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
28
29#include <picolibc.h>
30
31#if (defined (__OPTIMIZE_SIZE__) || defined (PREFER_SIZE_OVER_SPEED)) || !defined(__LP64__) || !defined(__ARM_NEON)
32/* See strnlen-stub.c  */
33#else
34
35/* Assumptions:
36 *
37 * ARMv8-a, AArch64, Advanced SIMD.
38 * MTE compatible.
39 */
40
41#include "asmdefs.h"
42
43#define srcin		x0
44#define cntin		x1
45#define result		x0
46
47#define src		x2
48#define synd		x3
49#define	shift		x4
50#define tmp		x4
51#define cntrem		x5
52
53#define qdata		q0
54#define vdata		v0
55#define vhas_chr	v1
56#define vend		v2
57#define dend		d2
58
59/*
60   Core algorithm:
61   Process the string in 16-byte aligned chunks. Compute a 64-bit mask with
62   four bits per byte using the shrn instruction. A count trailing zeros then
63   identifies the first zero byte.  */
64
65ENTRY (strnlen)
66	PTR_ARG (0)
67	SIZE_ARG (1)
68	bic	src, srcin, 15
69	cbz	cntin, L(nomatch)
70	ld1	{vdata.16b}, [src]
71	cmeq	vhas_chr.16b, vdata.16b, 0
72	lsl	shift, srcin, 2
73	shrn	vend.8b, vhas_chr.8h, 4		/* 128->64 */
74	fmov	synd, dend
75	lsr	synd, synd, shift
76	cbz	synd, L(start_loop)
77L(finish):
78	rbit	synd, synd
79	clz	synd, synd
80	lsr	result, synd, 2
81	cmp	cntin, result
82	csel	result, cntin, result, ls
83	ret
84
85L(nomatch):
86	mov	result, cntin
87	ret
88
89L(start_loop):
90	sub	tmp, src, srcin
91	add	tmp, tmp, 17
92	subs	cntrem, cntin, tmp
93	b.lo	L(nomatch)
94
95	/* Make sure that it won't overread by a 16-byte chunk */
96	tbz	cntrem, 4, L(loop32_2)
97	sub	src, src, 16
98	.p2align 5
99L(loop32):
100	ldr	qdata, [src, 32]!
101	cmeq	vhas_chr.16b, vdata.16b, 0
102	umaxp	vend.16b, vhas_chr.16b, vhas_chr.16b		/* 128->64 */
103	fmov	synd, dend
104	cbnz	synd, L(end)
105L(loop32_2):
106	ldr	qdata, [src, 16]
107	subs	cntrem, cntrem, 32
108	cmeq	vhas_chr.16b, vdata.16b, 0
109	b.lo	L(end_2)
110	umaxp	vend.16b, vhas_chr.16b, vhas_chr.16b		/* 128->64 */
111	fmov	synd, dend
112	cbz	synd, L(loop32)
113L(end_2):
114	add	src, src, 16
115L(end):
116	shrn	vend.8b, vhas_chr.8h, 4		/* 128->64 */
117	sub	result, src, srcin
118	fmov	synd, dend
119#ifndef __AARCH64EB__
120	rbit	synd, synd
121#endif
122	clz	synd, synd
123	add	result, result, synd, lsr 2
124	cmp	cntin, result
125	csel	result, cntin, result, ls
126	ret
127
128END (strnlen)
129#endif
130