1/* strnlen - calculate the length of a string with limit.
2
3   Copyright (c) 2013, Linaro Limited
4   All rights reserved.
5
6   Redistribution and use in source and binary forms, with or without
7   modification, are permitted provided that the following conditions are met:
8       * Redistributions of source code must retain the above copyright
9         notice, this list of conditions and the following disclaimer.
10       * Redistributions in binary form must reproduce the above copyright
11         notice, this list of conditions and the following disclaimer in the
12         documentation and/or other materials provided with the distribution.
13       * Neither the name of the Linaro nor the
14         names of its contributors may be used to endorse or promote products
15         derived from this software without specific prior written permission.
16
17   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21   HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
28
29#if (defined (__OPTIMIZE_SIZE__) || defined (PREFER_SIZE_OVER_SPEED))
30/* See strnlen-stub.c  */
31#else
32
33/* Assumptions:
34 *
35 * ARMv8-a, AArch64, Advanced SIMD.
36 * MTE compatible.
37 */
38
39#include "asmdefs.h"
40
41#define srcin		x0
42#define cntin		x1
43#define result		x0
44
45#define src		x2
46#define synd		x3
47#define	shift		x4
48#define tmp		x4
49#define cntrem		x5
50
51#define qdata		q0
52#define vdata		v0
53#define vhas_chr	v1
54#define vend		v2
55#define dend		d2
56
57/*
58   Core algorithm:
59   Process the string in 16-byte aligned chunks. Compute a 64-bit mask with
60   four bits per byte using the shrn instruction. A count trailing zeros then
61   identifies the first zero byte.  */
62
63ENTRY (strnlen)
64	PTR_ARG (0)
65	SIZE_ARG (1)
66	bic	src, srcin, 15
67	cbz	cntin, L(nomatch)
68	ld1	{vdata.16b}, [src]
69	cmeq	vhas_chr.16b, vdata.16b, 0
70	lsl	shift, srcin, 2
71	shrn	vend.8b, vhas_chr.8h, 4		/* 128->64 */
72	fmov	synd, dend
73	lsr	synd, synd, shift
74	cbz	synd, L(start_loop)
75L(finish):
76	rbit	synd, synd
77	clz	synd, synd
78	lsr	result, synd, 2
79	cmp	cntin, result
80	csel	result, cntin, result, ls
81	ret
82
83L(nomatch):
84	mov	result, cntin
85	ret
86
87L(start_loop):
88	sub	tmp, src, srcin
89	add	tmp, tmp, 17
90	subs	cntrem, cntin, tmp
91	b.lo	L(nomatch)
92
93	/* Make sure that it won't overread by a 16-byte chunk */
94	tbz	cntrem, 4, L(loop32_2)
95	sub	src, src, 16
96	.p2align 5
97L(loop32):
98	ldr	qdata, [src, 32]!
99	cmeq	vhas_chr.16b, vdata.16b, 0
100	umaxp	vend.16b, vhas_chr.16b, vhas_chr.16b		/* 128->64 */
101	fmov	synd, dend
102	cbnz	synd, L(end)
103L(loop32_2):
104	ldr	qdata, [src, 16]
105	subs	cntrem, cntrem, 32
106	cmeq	vhas_chr.16b, vdata.16b, 0
107	b.lo	L(end_2)
108	umaxp	vend.16b, vhas_chr.16b, vhas_chr.16b		/* 128->64 */
109	fmov	synd, dend
110	cbz	synd, L(loop32)
111L(end_2):
112	add	src, src, 16
113L(end):
114	shrn	vend.8b, vhas_chr.8h, 4		/* 128->64 */
115	sub	result, src, srcin
116	fmov	synd, dend
117#ifndef __AARCH64EB__
118	rbit	synd, synd
119#endif
120	clz	synd, synd
121	add	result, result, synd, lsr 2
122	cmp	cntin, result
123	csel	result, cntin, result, ls
124	ret
125
126END (strnlen)
127#endif
128