1 /* Optimized strstr function.
2    Copyright (c) 2018 Arm Ltd.  All rights reserved.
3 
4    SPDX-License-Identifier: BSD-3-Clause
5 
6    Redistribution and use in source and binary forms, with or without
7    modification, are permitted provided that the following conditions
8    are met:
9    1. Redistributions of source code must retain the above copyright
10       notice, this list of conditions and the following disclaimer.
11    2. Redistributions in binary form must reproduce the above copyright
12       notice, this list of conditions and the following disclaimer in the
13       documentation and/or other materials provided with the distribution.
14    3. The name of the company may not be used to endorse or promote
15       products derived from this software without specific prior written
16       permission.
17 
18    THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
19    WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
20    MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21    IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
23    TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
24    PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
25    LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
26    NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
27    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
28 
29 /*
30 Copyright (c) 1994 Cygnus Support.
31 All rights reserved.
32 
33 Redistribution and use in source and binary forms are permitted
34 provided that the above copyright notice and this paragraph are
35 duplicated in all such forms and that any documentation,
36 and/or other materials related to such
37 distribution and use acknowledge that the software was developed
38 at Cygnus Support, Inc.  Cygnus Support, Inc. may not be used to
39 endorse or promote products derived from this software without
40 specific prior written permission.
41 THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
42 IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
43 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
44  */
45 /*
46 FUNCTION
47 	<<strstr>>---find string segment
48 
49 INDEX
50 	strstr
51 
52 SYNOPSIS
53 	#include <string.h>
54 	char *strstr(const char *<[s1]>, const char *<[s2]>);
55 
56 DESCRIPTION
57 	Locates the first occurrence in the string pointed to by <[s1]> of
58 	the sequence of characters in the string pointed to by <[s2]>
59 	(excluding the terminating null character).
60 
61 RETURNS
62 	Returns a pointer to the located string segment, or a null
63 	pointer if the string <[s2]> is not found. If <[s2]> points to
64 	a string with zero length, <[s1]> is returned.
65 
66 PORTABILITY
67 <<strstr>> is ANSI C.
68 
69 <<strstr>> requires no supporting OS subroutines.
70 
71 QUICKREF
72 	strstr ansi pure
73 */
74 
75 #define _DEFAULT_SOURCE
76 #include <string.h>
77 #include <limits.h>
78 
79 #if defined(PREFER_SIZE_OVER_SPEED) || defined(__OPTIMIZE_SIZE__) \
80     || CHAR_BIT > 8
81 
82 /* Small and efficient strstr implementation.  */
83 char *
strstr(const char * hs,const char * ne)84 strstr (const char *hs, const char *ne)
85 {
86   size_t i;
87   int c = ne[0];
88 
89   if (c == 0)
90     return (char*)hs;
91 
92   for ( ; hs[0] != '\0'; hs++)
93     {
94       if (hs[0] != c)
95 	continue;
96       for (i = 1; ne[i] != 0; i++)
97 	if (hs[i] != ne[i])
98 	  break;
99       if (ne[i] == '\0')
100 	return (char*)hs;
101     }
102 
103   return NULL;
104 }
105 
106 #else /* compilation for speed */
107 
108 # define RETURN_TYPE char *
109 # define AVAILABLE(h, h_l, j, n_l) (((j) <= (h_l) - (n_l)) \
110    || ((h_l) += strnlen ((const char *) (h) + (h_l), (n_l) | 2048), ((j) <= (h_l) - (n_l))))
111 
112 # include "str-two-way.h"
113 
114 /* Number of bits used to index shift table.  */
115 #define SHIFT_TABLE_BITS 6
116 
117 static inline char *
strstr2(const unsigned char * hs,const unsigned char * ne)118 strstr2 (const unsigned char *hs, const unsigned char *ne)
119 {
120   uint32_t h1 = ((uint32_t) ne[0] << 16) | ne[1];
121   uint32_t h2 = 0;
122   int c;
123   for (c = hs[0]; h1 != h2 && c != 0; c = *++hs)
124       h2 = (h2 << 16) | c;
125   return h1 == h2 ? (char *)hs - 2 : NULL;
126 }
127 
128 static inline char *
strstr3(const unsigned char * hs,const unsigned char * ne)129 strstr3 (const unsigned char *hs, const unsigned char *ne)
130 {
131   uint32_t h1 = ((uint32_t) ne[0] << 24) | ((uint32_t) ne[1] << 16) | ((uint32_t) ne[2] << 8);
132   uint32_t h2 = 0;
133   int c;
134   for (c = hs[0]; h1 != h2 && c != 0; c = *++hs)
135       h2 = (h2 | c) << 8;
136   return h1 == h2 ? (char *)hs - 3 : NULL;
137 }
138 
139 static inline char *
strstr4(const unsigned char * hs,const unsigned char * ne)140 strstr4 (const unsigned char *hs, const unsigned char *ne)
141 {
142   uint32_t h1 = ((uint32_t) ne[0] << 24) | ((uint32_t) ne[1] << 16) | ((uint32_t) ne[2] << 8) | ne[3];
143   uint32_t h2 = 0;
144   int c;
145   for (c = hs[0]; c != 0 && h1 != h2; c = *++hs)
146     h2 = (h2 << 8) | c;
147   return h1 == h2 ? (char *)hs - 4 : NULL;
148 }
149 
150 /* Extremely fast strstr algorithm with guaranteed linear-time performance.
151    Small needles up to size 4 use a dedicated linear search.  Longer needles
152    up to size 254 use Sunday's Quick-Search algorithm.  Due to its simplicity
153    it has the best average performance of string matching algorithms on almost
154    all inputs.  It uses a bad-character shift table to skip past mismatches.
155    By limiting the needle length to 254, the shift table can be reduced to 8
156    bits per entry, lowering preprocessing overhead and minimizing cache effects.
157    The limit also implies the worst-case performance is linear.
158    Even larger needles are processed by the linear-time Two-Way algorithm.
159 */
160 char *
strstr(const char * haystack,const char * needle)161 strstr (const char *haystack, const char *needle)
162 {
163   const unsigned char *hs = (const unsigned char *) haystack;
164   const unsigned char *ne = (const unsigned char *) needle;
165   size_t i;
166 
167   /* Handle short needle special cases first.  */
168   if (ne[0] == '\0')
169     return (char *) hs;
170   if (ne[1] == '\0')
171     return (char*)strchr ((const char *) hs, (char) ne[0]);
172   if (ne[2] == '\0')
173     return strstr2 (hs, ne);
174   if (ne[3] == '\0')
175     return strstr3 (hs, ne);
176   if (ne[4] == '\0')
177     return strstr4 (hs, ne);
178 
179   size_t ne_len = strlen ((const char *) ne);
180   size_t hs_len = strnlen ((const char *) hs, ne_len | 512);
181 
182   /* Ensure haystack length is >= needle length.  */
183   if (hs_len < ne_len)
184     return NULL;
185 
186   /* Use the Quick-Search algorithm for needle lengths less than 255.  */
187   if (__builtin_expect (ne_len < 255, 1))
188     {
189       uint8_t shift[1 << SHIFT_TABLE_BITS];
190       const unsigned char *end = hs + hs_len - ne_len;
191 
192       /* Initialize bad character shift hash table.  */
193       memset (shift, ne_len + 1, sizeof (shift));
194       for (i = 0; i < ne_len; i++)
195 	shift[ne[i] % sizeof (shift)] = ne_len - i;
196 
197       do
198 	{
199 	  hs--;
200 
201 	  /* Search by skipping past bad characters.  */
202 	  size_t tmp = shift[hs[ne_len] % sizeof (shift)];
203 	  for (hs += tmp; hs <= end; hs += tmp)
204 	    {
205 	      tmp = shift[hs[ne_len] % sizeof (shift)];
206 	      if (memcmp (hs, ne, ne_len) == 0)
207 		return (char*) hs;
208 	    }
209 	  if (end[ne_len] == 0)
210 	    return NULL;
211 	  end += strnlen ((const char *) (end + ne_len), 2048);
212 	}
213       while (hs <= end);
214 
215       return NULL;
216     }
217 
218   /* Use Two-Way algorithm for very long needles.  */
219   return two_way_long_needle (hs, hs_len, ne, ne_len);
220 }
221 #endif /* compilation for speed */
222