1 /* Optimized strstr function.
2 Copyright (c) 2018 Arm Ltd. All rights reserved.
3
4 SPDX-License-Identifier: BSD-3-Clause
5
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions
8 are met:
9 1. Redistributions of source code must retain the above copyright
10 notice, this list of conditions and the following disclaimer.
11 2. Redistributions in binary form must reproduce the above copyright
12 notice, this list of conditions and the following disclaimer in the
13 documentation and/or other materials provided with the distribution.
14 3. The name of the company may not be used to endorse or promote
15 products derived from this software without specific prior written
16 permission.
17
18 THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
19 WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
20 MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21 IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
23 TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
24 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
25 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
26 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
27 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
28
29 /*
30 Copyright (c) 1994 Cygnus Support.
31 All rights reserved.
32
33 Redistribution and use in source and binary forms are permitted
34 provided that the above copyright notice and this paragraph are
35 duplicated in all such forms and that any documentation,
36 and/or other materials related to such
37 distribution and use acknowledge that the software was developed
38 at Cygnus Support, Inc. Cygnus Support, Inc. may not be used to
39 endorse or promote products derived from this software without
40 specific prior written permission.
41 THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
42 IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
43 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
44 */
45 /*
46 FUNCTION
47 <<strstr>>---find string segment
48
49 INDEX
50 strstr
51
52 SYNOPSIS
53 #include <string.h>
54 char *strstr(const char *<[s1]>, const char *<[s2]>);
55
56 DESCRIPTION
57 Locates the first occurrence in the string pointed to by <[s1]> of
58 the sequence of characters in the string pointed to by <[s2]>
59 (excluding the terminating null character).
60
61 RETURNS
62 Returns a pointer to the located string segment, or a null
63 pointer if the string <[s2]> is not found. If <[s2]> points to
64 a string with zero length, <[s1]> is returned.
65
66 PORTABILITY
67 <<strstr>> is ANSI C.
68
69 <<strstr>> requires no supporting OS subroutines.
70
71 QUICKREF
72 strstr ansi pure
73 */
74
75 #define _DEFAULT_SOURCE
76 #include <string.h>
77 #include <limits.h>
78
79 #if defined(PREFER_SIZE_OVER_SPEED) || defined(__OPTIMIZE_SIZE__) \
80 || CHAR_BIT > 8
81
82 /* Small and efficient strstr implementation. */
83 char *
strstr(const char * hs,const char * ne)84 strstr (const char *hs, const char *ne)
85 {
86 size_t i;
87 int c = ne[0];
88
89 if (c == 0)
90 return (char*)hs;
91
92 for ( ; hs[0] != '\0'; hs++)
93 {
94 if (hs[0] != c)
95 continue;
96 for (i = 1; ne[i] != 0; i++)
97 if (hs[i] != ne[i])
98 break;
99 if (ne[i] == '\0')
100 return (char*)hs;
101 }
102
103 return NULL;
104 }
105
106 #else /* compilation for speed */
107
108 # define RETURN_TYPE char *
109 # define AVAILABLE(h, h_l, j, n_l) (((j) <= (h_l) - (n_l)) \
110 || ((h_l) += strnlen ((const char *) (h) + (h_l), (n_l) | 2048), ((j) <= (h_l) - (n_l))))
111
112 # include "str-two-way.h"
113
114 /* Number of bits used to index shift table. */
115 #define SHIFT_TABLE_BITS 6
116
117 static inline char *
strstr2(const unsigned char * hs,const unsigned char * ne)118 strstr2 (const unsigned char *hs, const unsigned char *ne)
119 {
120 uint32_t h1 = ((uint32_t) ne[0] << 16) | ne[1];
121 uint32_t h2 = 0;
122 int c;
123 for (c = hs[0]; h1 != h2 && c != 0; c = *++hs)
124 h2 = (h2 << 16) | c;
125 return h1 == h2 ? (char *)hs - 2 : NULL;
126 }
127
128 static inline char *
strstr3(const unsigned char * hs,const unsigned char * ne)129 strstr3 (const unsigned char *hs, const unsigned char *ne)
130 {
131 uint32_t h1 = ((uint32_t) ne[0] << 24) | ((uint32_t) ne[1] << 16) | ((uint32_t) ne[2] << 8);
132 uint32_t h2 = 0;
133 int c;
134 for (c = hs[0]; h1 != h2 && c != 0; c = *++hs)
135 h2 = (h2 | c) << 8;
136 return h1 == h2 ? (char *)hs - 3 : NULL;
137 }
138
139 static inline char *
strstr4(const unsigned char * hs,const unsigned char * ne)140 strstr4 (const unsigned char *hs, const unsigned char *ne)
141 {
142 uint32_t h1 = ((uint32_t) ne[0] << 24) | ((uint32_t) ne[1] << 16) | ((uint32_t) ne[2] << 8) | ne[3];
143 uint32_t h2 = 0;
144 int c;
145 for (c = hs[0]; c != 0 && h1 != h2; c = *++hs)
146 h2 = (h2 << 8) | c;
147 return h1 == h2 ? (char *)hs - 4 : NULL;
148 }
149
150 /* Extremely fast strstr algorithm with guaranteed linear-time performance.
151 Small needles up to size 4 use a dedicated linear search. Longer needles
152 up to size 254 use Sunday's Quick-Search algorithm. Due to its simplicity
153 it has the best average performance of string matching algorithms on almost
154 all inputs. It uses a bad-character shift table to skip past mismatches.
155 By limiting the needle length to 254, the shift table can be reduced to 8
156 bits per entry, lowering preprocessing overhead and minimizing cache effects.
157 The limit also implies the worst-case performance is linear.
158 Even larger needles are processed by the linear-time Two-Way algorithm.
159 */
160 char *
strstr(const char * haystack,const char * needle)161 strstr (const char *haystack, const char *needle)
162 {
163 const unsigned char *hs = (const unsigned char *) haystack;
164 const unsigned char *ne = (const unsigned char *) needle;
165 size_t i;
166
167 /* Handle short needle special cases first. */
168 if (ne[0] == '\0')
169 return (char *) hs;
170 if (ne[1] == '\0')
171 return (char*)strchr ((const char *) hs, (char) ne[0]);
172 if (ne[2] == '\0')
173 return strstr2 (hs, ne);
174 if (ne[3] == '\0')
175 return strstr3 (hs, ne);
176 if (ne[4] == '\0')
177 return strstr4 (hs, ne);
178
179 size_t ne_len = strlen ((const char *) ne);
180 size_t hs_len = strnlen ((const char *) hs, ne_len | 512);
181
182 /* Ensure haystack length is >= needle length. */
183 if (hs_len < ne_len)
184 return NULL;
185
186 /* Use the Quick-Search algorithm for needle lengths less than 255. */
187 if (__builtin_expect (ne_len < 255, 1))
188 {
189 uint8_t shift[1 << SHIFT_TABLE_BITS];
190 const unsigned char *end = hs + hs_len - ne_len;
191
192 /* Initialize bad character shift hash table. */
193 memset (shift, ne_len + 1, sizeof (shift));
194 for (i = 0; i < ne_len; i++)
195 shift[ne[i] % sizeof (shift)] = ne_len - i;
196
197 do
198 {
199 hs--;
200
201 /* Search by skipping past bad characters. */
202 size_t tmp = shift[hs[ne_len] % sizeof (shift)];
203 for (hs += tmp; hs <= end; hs += tmp)
204 {
205 tmp = shift[hs[ne_len] % sizeof (shift)];
206 if (memcmp (hs, ne, ne_len) == 0)
207 return (char*) hs;
208 }
209 if (end[ne_len] == 0)
210 return NULL;
211 end += strnlen ((const char *) (end + ne_len), 2048);
212 }
213 while (hs <= end);
214
215 return NULL;
216 }
217
218 /* Use Two-Way algorithm for very long needles. */
219 return two_way_long_needle (hs, hs_len, ne, ne_len);
220 }
221 #endif /* compilation for speed */
222