1 /* Based on src/http/ngx_http_parse.c from NGINX copyright Igor Sysoev
2 *
3 * Additional changes are licensed under the same terms as NGINX and
4 * copyright Joyent, Inc. and other Node contributors. All rights reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22 * IN THE SOFTWARE.
23 */
24 #include "http_parser.h"
25 #include <assert.h>
26 #include <stddef.h>
27 #include <ctype.h>
28 #include <stdlib.h>
29 #include <string.h>
30 #include <limits.h>
31
32 #ifndef ULLONG_MAX
33 # define ULLONG_MAX ((uint64_t) -1) /* 2^64-1 */
34 #endif
35
36 #ifndef MIN
37 # define MIN(a,b) ((a) < (b) ? (a) : (b))
38 #endif
39
40 #ifndef ARRAY_SIZE
41 # define ARRAY_SIZE(a) (sizeof(a) / sizeof((a)[0]))
42 #endif
43
44 #ifndef BIT_AT
45 # define BIT_AT(a, i) \
46 (!!((unsigned int) (a)[(unsigned int) (i) >> 3] & \
47 (1 << ((unsigned int) (i) & 7))))
48 #endif
49
50 #ifndef ELEM_AT
51 # define ELEM_AT(a, i, v) ((unsigned int) (i) < ARRAY_SIZE(a) ? (a)[(i)] : (v))
52 #endif
53
54 #define SET_ERRNO(e) \
55 do { \
56 parser->http_errno = (e); \
57 } while(0)
58
59 #define CURRENT_STATE() p_state
60 #define UPDATE_STATE(V) p_state = (enum state) (V);
61 #define RETURN(V) \
62 do { \
63 parser->state = CURRENT_STATE(); \
64 return (V); \
65 } while (0);
66 #define REEXECUTE() \
67 goto reexecute; \
68
69
70 #ifdef __GNUC__
71 # define LIKELY(X) __builtin_expect(!!(X), 1)
72 # define UNLIKELY(X) __builtin_expect(!!(X), 0)
73 #else
74 # define LIKELY(X) (X)
75 # define UNLIKELY(X) (X)
76 #endif
77
78
79 /* Run the notify callback FOR, returning ER if it fails */
80 #define CALLBACK_NOTIFY_(FOR, ER) \
81 do { \
82 assert(HTTP_PARSER_ERRNO(parser) == HPE_OK); \
83 \
84 if (LIKELY(settings->on_##FOR)) { \
85 parser->state = CURRENT_STATE(); \
86 if (UNLIKELY(0 != settings->on_##FOR(parser))) { \
87 SET_ERRNO(HPE_CB_##FOR); \
88 } \
89 UPDATE_STATE(parser->state); \
90 \
91 /* We either errored above or got paused; get out */ \
92 if (UNLIKELY(HTTP_PARSER_ERRNO(parser) != HPE_OK)) { \
93 return (ER); \
94 } \
95 } \
96 } while (0)
97
98 /* Run the notify callback FOR and consume the current byte */
99 #define CALLBACK_NOTIFY(FOR) CALLBACK_NOTIFY_(FOR, p - data + 1)
100
101 /* Run the notify callback FOR and don't consume the current byte */
102 #define CALLBACK_NOTIFY_NOADVANCE(FOR) CALLBACK_NOTIFY_(FOR, p - data)
103
104 /* Run data callback FOR with LEN bytes, returning ER if it fails */
105 #define CALLBACK_DATA_(FOR, LEN, ER) \
106 do { \
107 assert(HTTP_PARSER_ERRNO(parser) == HPE_OK); \
108 \
109 if (FOR##_mark) { \
110 if (LIKELY(settings->on_##FOR)) { \
111 parser->state = CURRENT_STATE(); \
112 if (UNLIKELY(0 != \
113 settings->on_##FOR(parser, FOR##_mark, (LEN)))) { \
114 SET_ERRNO(HPE_CB_##FOR); \
115 } \
116 UPDATE_STATE(parser->state); \
117 \
118 /* We either errored above or got paused; get out */ \
119 if (UNLIKELY(HTTP_PARSER_ERRNO(parser) != HPE_OK)) { \
120 return (ER); \
121 } \
122 } \
123 FOR##_mark = NULL; \
124 } \
125 } while (0)
126
127 /* Run the data callback FOR and consume the current byte */
128 #define CALLBACK_DATA(FOR) \
129 CALLBACK_DATA_(FOR, p - FOR##_mark, p - data + 1)
130
131 /* Run the data callback FOR and don't consume the current byte */
132 #define CALLBACK_DATA_NOADVANCE(FOR) \
133 CALLBACK_DATA_(FOR, p - FOR##_mark, p - data)
134
135 /* Set the mark FOR; non-destructive if mark is already set */
136 #define MARK(FOR) \
137 do { \
138 if (!FOR##_mark) { \
139 FOR##_mark = p; \
140 } \
141 } while (0)
142
143 /* Don't allow the total size of the HTTP headers (including the status
144 * line) to exceed HTTP_MAX_HEADER_SIZE. This check is here to protect
145 * embedders against denial-of-service attacks where the attacker feeds
146 * us a never-ending header that the embedder keeps buffering.
147 *
148 * This check is arguably the responsibility of embedders but we're doing
149 * it on the embedder's behalf because most won't bother and this way we
150 * make the web a little safer. HTTP_MAX_HEADER_SIZE is still far bigger
151 * than any reasonable request or response so this should never affect
152 * day-to-day operation.
153 */
154 #define COUNT_HEADER_SIZE(V) \
155 do { \
156 parser->nread += (V); \
157 if (UNLIKELY(parser->nread > (HTTP_MAX_HEADER_SIZE))) { \
158 SET_ERRNO(HPE_HEADER_OVERFLOW); \
159 goto error; \
160 } \
161 } while (0)
162
163
164 #define PROXY_CONNECTION "proxy-connection"
165 #define CONNECTION "connection"
166 #define CONTENT_LENGTH "content-length"
167 #define TRANSFER_ENCODING "transfer-encoding"
168 #define UPGRADE "upgrade"
169 #define CHUNKED "chunked"
170 #define KEEP_ALIVE "keep-alive"
171 #define CLOSE "close"
172
173
174 static const char *method_strings[] =
175 {
176 #define XX(num, name, string) #string,
177 HTTP_METHOD_MAP(XX)
178 #undef XX
179 };
180
181
182 /* Tokens as defined by rfc 2616. Also lowercases them.
183 * token = 1*<any CHAR except CTLs or separators>
184 * separators = "(" | ")" | "<" | ">" | "@"
185 * | "," | ";" | ":" | "\" | <">
186 * | "/" | "[" | "]" | "?" | "="
187 * | "{" | "}" | SP | HT
188 */
189 static const char tokens[256] = {
190 /* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */
191 0, 0, 0, 0, 0, 0, 0, 0,
192 /* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */
193 0, 0, 0, 0, 0, 0, 0, 0,
194 /* 16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */
195 0, 0, 0, 0, 0, 0, 0, 0,
196 /* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */
197 0, 0, 0, 0, 0, 0, 0, 0,
198 /* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */
199 0, '!', 0, '#', '$', '%', '&', '\'',
200 /* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */
201 0, 0, '*', '+', 0, '-', '.', 0,
202 /* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */
203 '0', '1', '2', '3', '4', '5', '6', '7',
204 /* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */
205 '8', '9', 0, 0, 0, 0, 0, 0,
206 /* 64 @ 65 A 66 B 67 C 68 D 69 E 70 F 71 G */
207 0, 'a', 'b', 'c', 'd', 'e', 'f', 'g',
208 /* 72 H 73 I 74 J 75 K 76 L 77 M 78 N 79 O */
209 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
210 /* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */
211 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
212 /* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */
213 'x', 'y', 'z', 0, 0, 0, '^', '_',
214 /* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */
215 '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
216 /* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */
217 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
218 /* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */
219 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
220 /* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */
221 'x', 'y', 'z', 0, '|', 0, '~', 0 };
222
223
224 static const int8_t unhex[256] =
225 {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
226 ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
227 ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
228 , 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1
229 ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1
230 ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
231 ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1
232 ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
233 };
234
235
236 #if HTTP_PARSER_STRICT
237 # define T(v) 0
238 #else
239 # define T(v) v
240 #endif
241
242
243 static const uint8_t normal_url_char[32] = {
244 /* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */
245 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0,
246 /* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */
247 0 | T(2) | 0 | 0 | T(16) | 0 | 0 | 0,
248 /* 16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */
249 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0,
250 /* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */
251 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0,
252 /* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */
253 0 | 2 | 4 | 0 | 16 | 32 | 64 | 128,
254 /* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */
255 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
256 /* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */
257 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
258 /* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */
259 1 | 2 | 4 | 8 | 16 | 32 | 64 | 0,
260 /* 64 @ 65 A 66 B 67 C 68 D 69 E 70 F 71 G */
261 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
262 /* 72 H 73 I 74 J 75 K 76 L 77 M 78 N 79 O */
263 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
264 /* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */
265 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
266 /* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */
267 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
268 /* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */
269 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
270 /* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */
271 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
272 /* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */
273 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
274 /* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */
275 1 | 2 | 4 | 8 | 16 | 32 | 64 | 0, };
276
277 #undef T
278
279 enum state
280 { s_dead = 1 /* important that this is > 0 */
281
282 , s_start_req_or_res
283 , s_res_or_resp_I /* for ICY URIs */
284 , s_res_or_resp_H
285 , s_start_res
286 , s_res_I /* for ICY URIs */
287 , s_res_IC /* for ICY URIs */
288 , s_res_H
289 , s_res_HT
290 , s_res_HTT
291 , s_res_HTTP
292 , s_res_first_http_major
293 , s_res_http_major
294 , s_res_first_http_minor
295 , s_res_http_minor
296 , s_res_first_status_code
297 , s_res_status_code
298 , s_res_status_start
299 , s_res_status
300 , s_res_line_almost_done
301
302 , s_start_req
303
304 , s_req_method
305 , s_req_spaces_before_url
306 , s_req_schema
307 , s_req_schema_slash
308 , s_req_schema_slash_slash
309 , s_req_server_start
310 , s_req_server
311 , s_req_server_with_at
312 , s_req_path
313 , s_req_query_string_start
314 , s_req_query_string
315 , s_req_fragment_start
316 , s_req_fragment
317 , s_req_http_start
318 , s_req_http_H
319 , s_req_http_HT
320 , s_req_http_HTT
321 , s_req_http_HTTP
322 , s_req_first_http_major
323 , s_req_http_major
324 , s_req_first_http_minor
325 , s_req_http_minor
326 , s_req_line_almost_done
327
328 , s_header_field_start
329 , s_header_field
330 , s_header_value_discard_ws
331 , s_header_value_discard_ws_almost_done
332 , s_header_value_discard_lws
333 , s_header_value_start
334 , s_header_value
335 , s_header_value_lws
336
337 , s_header_almost_done
338
339 , s_chunk_size_start
340 , s_chunk_size
341 , s_chunk_parameters
342 , s_chunk_size_almost_done
343
344 , s_headers_almost_done
345 , s_headers_done
346
347 /* Important: 's_headers_done' must be the last 'header' state. All
348 * states beyond this must be 'body' states. It is used for overflow
349 * checking. See the PARSING_HEADER() macro.
350 */
351
352 , s_chunk_data
353 , s_chunk_data_almost_done
354 , s_chunk_data_done
355
356 , s_body_identity
357 , s_body_identity_eof
358
359 , s_message_done
360 };
361
362
363 #define PARSING_HEADER(state) (state <= s_headers_done)
364
365
366 enum header_states
367 { h_general = 0
368 , h_C
369 , h_CO
370 , h_CON
371
372 , h_matching_connection
373 , h_matching_proxy_connection
374 , h_matching_content_length
375 , h_matching_transfer_encoding
376 , h_matching_upgrade
377
378 , h_connection
379 , h_content_length
380 , h_transfer_encoding
381 , h_upgrade
382
383 , h_matching_transfer_encoding_chunked
384 , h_matching_connection_token_start
385 , h_matching_connection_keep_alive
386 , h_matching_connection_close
387 , h_matching_connection_upgrade
388 , h_matching_connection_token
389
390 , h_transfer_encoding_chunked
391 , h_connection_keep_alive
392 , h_connection_close
393 , h_connection_upgrade
394 };
395
396 enum http_host_state
397 {
398 s_http_host_dead = 1
399 , s_http_userinfo_start
400 , s_http_userinfo
401 , s_http_host_start
402 , s_http_host_v6_start
403 , s_http_host
404 , s_http_host_v6
405 , s_http_host_v6_end
406 , s_http_host_v6_zone_start
407 , s_http_host_v6_zone
408 , s_http_host_port_start
409 , s_http_host_port
410 };
411
412 /* Macros for character classes; depends on strict-mode */
413 #define CR '\r'
414 #define LF '\n'
415 #define LOWER(c) (unsigned char)(c | 0x20)
416 #define IS_ALPHA(c) (LOWER(c) >= 'a' && LOWER(c) <= 'z')
417 #define IS_NUM(c) ((c) >= '0' && (c) <= '9')
418 #define IS_ALPHANUM(c) (IS_ALPHA(c) || IS_NUM(c))
419 #define IS_HEX(c) (IS_NUM(c) || (LOWER(c) >= 'a' && LOWER(c) <= 'f'))
420 #define IS_MARK(c) ((c) == '-' || (c) == '_' || (c) == '.' || \
421 (c) == '!' || (c) == '~' || (c) == '*' || (c) == '\'' || (c) == '(' || \
422 (c) == ')')
423 #define IS_USERINFO_CHAR(c) (IS_ALPHANUM(c) || IS_MARK(c) || (c) == '%' || \
424 (c) == ';' || (c) == ':' || (c) == '&' || (c) == '=' || (c) == '+' || \
425 (c) == '$' || (c) == ',')
426
427 #define STRICT_TOKEN(c) (tokens[(unsigned char)c])
428
429 #if HTTP_PARSER_STRICT
430 #define TOKEN(c) (tokens[(unsigned char)c])
431 #define IS_URL_CHAR(c) (BIT_AT(normal_url_char, (unsigned char)c))
432 #define IS_HOST_CHAR(c) (IS_ALPHANUM(c) || (c) == '.' || (c) == '-')
433 #else
434 #define TOKEN(c) ((c == ' ') ? ' ' : tokens[(unsigned char)c])
435 #define IS_URL_CHAR(c) \
436 (BIT_AT(normal_url_char, (unsigned char)c) || ((c) & 0x80))
437 #define IS_HOST_CHAR(c) \
438 (IS_ALPHANUM(c) || (c) == '.' || (c) == '-' || (c) == '_')
439 #endif
440
441 /**
442 * Verify that a char is a valid visible (printable) US-ASCII
443 * character or %x80-FF
444 **/
445 #define IS_HEADER_CHAR(ch) \
446 (ch == CR || ch == LF || ch == 9 || ((unsigned char)ch > 31 && ch != 127))
447
448 #define start_state (parser->type == HTTP_REQUEST ? s_start_req : s_start_res)
449
450
451 #if HTTP_PARSER_STRICT
452 # define STRICT_CHECK(cond) \
453 do { \
454 if (cond) { \
455 SET_ERRNO(HPE_STRICT); \
456 goto error; \
457 } \
458 } while (0)
459 # define NEW_MESSAGE() (http_should_keep_alive(parser) ? start_state : s_dead)
460 #else
461 # define STRICT_CHECK(cond)
462 # define NEW_MESSAGE() start_state
463 #endif
464
465
466 /* Map errno values to strings for human-readable output */
467 #define HTTP_STRERROR_GEN(n, s) { "HPE_" #n, s },
468 static struct {
469 const char *name;
470 const char *description;
471 } http_strerror_tab[] = {
472 HTTP_ERRNO_MAP(HTTP_STRERROR_GEN)
473 };
474 #undef HTTP_STRERROR_GEN
475
476 int http_message_needs_eof(const http_parser *parser);
477
478 /* Our URL parser.
479 *
480 * This is designed to be shared by http_parser_execute() for URL validation,
481 * hence it has a state transition + byte-for-byte interface. In addition, it
482 * is meant to be embedded in http_parser_parse_url(), which does the dirty
483 * work of turning state transitions URL components for its API.
484 *
485 * This function should only be invoked with non-space characters. It is
486 * assumed that the caller cares about (and can detect) the transition between
487 * URL and non-URL states by looking for these.
488 */
489 static enum state
parse_url_char(enum state s,const char ch)490 parse_url_char(enum state s, const char ch)
491 {
492 if (ch == ' ' || ch == '\r' || ch == '\n') {
493 return s_dead;
494 }
495
496 #if HTTP_PARSER_STRICT
497 if (ch == '\t' || ch == '\f') {
498 return s_dead;
499 }
500 #endif
501
502 switch (s) {
503 case s_req_spaces_before_url:
504 /* Proxied requests are followed by scheme of an absolute URI (alpha).
505 * All methods except CONNECT are followed by '/' or '*'.
506 */
507
508 if (ch == '/' || ch == '*') {
509 return s_req_path;
510 }
511
512 if (IS_ALPHA(ch)) {
513 return s_req_schema;
514 }
515
516 break;
517
518 case s_req_schema:
519 if (IS_ALPHA(ch)) {
520 return s;
521 }
522
523 if (ch == ':') {
524 return s_req_schema_slash;
525 }
526
527 break;
528
529 case s_req_schema_slash:
530 if (ch == '/') {
531 return s_req_schema_slash_slash;
532 }
533
534 break;
535
536 case s_req_schema_slash_slash:
537 if (ch == '/') {
538 return s_req_server_start;
539 }
540
541 break;
542
543 case s_req_server_with_at:
544 if (ch == '@') {
545 return s_dead;
546 }
547
548 /* FALLTHROUGH */
549 case s_req_server_start:
550 case s_req_server:
551 if (ch == '/') {
552 return s_req_path;
553 }
554
555 if (ch == '?') {
556 return s_req_query_string_start;
557 }
558
559 if (ch == '@') {
560 return s_req_server_with_at;
561 }
562
563 if (IS_USERINFO_CHAR(ch) || ch == '[' || ch == ']') {
564 return s_req_server;
565 }
566
567 break;
568
569 case s_req_path:
570 if (IS_URL_CHAR(ch)) {
571 return s;
572 }
573
574 switch (ch) {
575 case '?':
576 return s_req_query_string_start;
577
578 case '#':
579 return s_req_fragment_start;
580 }
581
582 break;
583
584 case s_req_query_string_start:
585 case s_req_query_string:
586 if (IS_URL_CHAR(ch)) {
587 return s_req_query_string;
588 }
589
590 switch (ch) {
591 case '?':
592 /* allow extra '?' in query string */
593 return s_req_query_string;
594
595 case '#':
596 return s_req_fragment_start;
597 }
598
599 break;
600
601 case s_req_fragment_start:
602 if (IS_URL_CHAR(ch)) {
603 return s_req_fragment;
604 }
605
606 switch (ch) {
607 case '?':
608 return s_req_fragment;
609
610 case '#':
611 return s;
612 }
613
614 break;
615
616 case s_req_fragment:
617 if (IS_URL_CHAR(ch)) {
618 return s;
619 }
620
621 switch (ch) {
622 case '?':
623 case '#':
624 return s;
625 }
626
627 break;
628
629 default:
630 break;
631 }
632
633 /* We should never fall out of the switch above unless there's an error */
634 return s_dead;
635 }
636
http_parser_execute(http_parser * parser,const http_parser_settings * settings,const char * data,size_t len)637 size_t http_parser_execute (http_parser *parser,
638 const http_parser_settings *settings,
639 const char *data,
640 size_t len)
641 {
642 char c, ch;
643 int8_t unhex_val;
644 const char *p = data;
645 const char *header_field_mark = 0;
646 const char *header_value_mark = 0;
647 const char *url_mark = 0;
648 const char *body_mark = 0;
649 const char *status_mark = 0;
650 enum state p_state = (enum state) parser->state;
651 const unsigned int lenient = parser->lenient_http_headers;
652
653 /* We're in an error state. Don't bother doing anything. */
654 if (HTTP_PARSER_ERRNO(parser) != HPE_OK) {
655 return 0;
656 }
657
658 if (len == 0) {
659 switch (CURRENT_STATE()) {
660 case s_body_identity_eof:
661 /* Use of CALLBACK_NOTIFY() here would erroneously return 1 byte read if
662 * we got paused.
663 */
664 CALLBACK_NOTIFY_NOADVANCE(message_complete);
665 return 0;
666
667 case s_dead:
668 case s_start_req_or_res:
669 case s_start_res:
670 case s_start_req:
671 return 0;
672
673 default:
674 SET_ERRNO(HPE_INVALID_EOF_STATE);
675 return 1;
676 }
677 }
678
679
680 if (CURRENT_STATE() == s_header_field)
681 header_field_mark = data;
682 if (CURRENT_STATE() == s_header_value)
683 header_value_mark = data;
684 switch (CURRENT_STATE()) {
685 case s_req_path:
686 case s_req_schema:
687 case s_req_schema_slash:
688 case s_req_schema_slash_slash:
689 case s_req_server_start:
690 case s_req_server:
691 case s_req_server_with_at:
692 case s_req_query_string_start:
693 case s_req_query_string:
694 case s_req_fragment_start:
695 case s_req_fragment:
696 url_mark = data;
697 break;
698 case s_res_status:
699 status_mark = data;
700 break;
701 default:
702 break;
703 }
704
705 for (p=data; p != data + len; p++) {
706 ch = *p;
707
708 if (PARSING_HEADER(CURRENT_STATE()))
709 COUNT_HEADER_SIZE(1);
710
711 reexecute:
712 switch (CURRENT_STATE()) {
713
714 case s_dead:
715 /* this state is used after a 'Connection: close' message
716 * the parser will error out if it reads another message
717 */
718 if (LIKELY(ch == CR || ch == LF))
719 break;
720
721 SET_ERRNO(HPE_CLOSED_CONNECTION);
722 goto error;
723
724 case s_start_req_or_res:
725 {
726 if (ch == CR || ch == LF)
727 break;
728 parser->flags = 0;
729 parser->content_length = ULLONG_MAX;
730
731 if (ch == 'H') {
732 UPDATE_STATE(s_res_or_resp_H);
733
734 CALLBACK_NOTIFY(message_begin);
735 } else if (ch == 'I') {
736 UPDATE_STATE(s_res_or_resp_I);
737
738 CALLBACK_NOTIFY(message_begin);
739 } else {
740 parser->type = HTTP_REQUEST;
741 UPDATE_STATE(s_start_req);
742 REEXECUTE();
743 }
744
745 break;
746 }
747
748 case s_res_or_resp_I: /* ICY URI case */
749 if (ch == 'C') {
750 parser->type = HTTP_RESPONSE;
751 UPDATE_STATE(s_res_IC);
752 }
753 break;
754
755 case s_res_or_resp_H:
756 if (ch == 'T') {
757 parser->type = HTTP_RESPONSE;
758 UPDATE_STATE(s_res_HT);
759 } else {
760 if (UNLIKELY(ch != 'E')) {
761 SET_ERRNO(HPE_INVALID_CONSTANT);
762 goto error;
763 }
764
765 parser->type = HTTP_REQUEST;
766 parser->method = HTTP_HEAD;
767 parser->index = 2;
768 UPDATE_STATE(s_req_method);
769 }
770 break;
771
772 case s_start_res:
773 {
774 parser->flags = 0;
775 parser->content_length = ULLONG_MAX;
776
777 switch (ch) {
778 case 'H':
779 UPDATE_STATE(s_res_H);
780 break;
781 case 'I': /* ICY URI */
782 UPDATE_STATE(s_res_I);
783 break;
784 case CR:
785 case LF:
786 break;
787
788 default:
789 SET_ERRNO(HPE_INVALID_CONSTANT);
790 goto error;
791 }
792
793 CALLBACK_NOTIFY(message_begin);
794 break;
795 }
796 case s_res_I:
797 STRICT_CHECK(ch != 'C');
798 UPDATE_STATE(s_res_IC);
799 break;
800
801 case s_res_IC:
802 STRICT_CHECK(ch != 'Y');
803 UPDATE_STATE(s_res_http_minor);
804 break;
805
806 case s_res_H:
807 STRICT_CHECK(ch != 'T');
808 UPDATE_STATE(s_res_HT);
809 break;
810
811 case s_res_HT:
812 STRICT_CHECK(ch != 'T');
813 UPDATE_STATE(s_res_HTT);
814 break;
815
816 case s_res_HTT:
817 STRICT_CHECK(ch != 'P');
818 UPDATE_STATE(s_res_HTTP);
819 break;
820
821 case s_res_HTTP:
822 STRICT_CHECK(ch != '/');
823 UPDATE_STATE(s_res_first_http_major);
824 break;
825
826 case s_res_first_http_major:
827 if (UNLIKELY(ch < '0' || ch > '9')) {
828 SET_ERRNO(HPE_INVALID_VERSION);
829 goto error;
830 }
831
832 parser->http_major = ch - '0';
833 UPDATE_STATE(s_res_http_major);
834 break;
835
836 /* major HTTP version or dot */
837 case s_res_http_major:
838 {
839 if (ch == '.') {
840 UPDATE_STATE(s_res_first_http_minor);
841 break;
842 }
843
844 if (!IS_NUM(ch)) {
845 SET_ERRNO(HPE_INVALID_VERSION);
846 goto error;
847 }
848
849 parser->http_major *= 10;
850 parser->http_major += ch - '0';
851
852 if (UNLIKELY(parser->http_major > 999)) {
853 SET_ERRNO(HPE_INVALID_VERSION);
854 goto error;
855 }
856
857 break;
858 }
859
860 /* first digit of minor HTTP version */
861 case s_res_first_http_minor:
862 if (UNLIKELY(!IS_NUM(ch))) {
863 SET_ERRNO(HPE_INVALID_VERSION);
864 goto error;
865 }
866
867 parser->http_minor = ch - '0';
868 UPDATE_STATE(s_res_http_minor);
869 break;
870
871 /* minor HTTP version or end of request line */
872 case s_res_http_minor:
873 {
874 if (ch == ' ') {
875 UPDATE_STATE(s_res_first_status_code);
876 break;
877 }
878
879 if (UNLIKELY(!IS_NUM(ch))) {
880 SET_ERRNO(HPE_INVALID_VERSION);
881 goto error;
882 }
883
884 parser->http_minor *= 10;
885 parser->http_minor += ch - '0';
886
887 if (UNLIKELY(parser->http_minor > 999)) {
888 SET_ERRNO(HPE_INVALID_VERSION);
889 goto error;
890 }
891
892 break;
893 }
894
895 case s_res_first_status_code:
896 {
897 if (!IS_NUM(ch)) {
898 if (ch == ' ') {
899 break;
900 }
901
902 SET_ERRNO(HPE_INVALID_STATUS);
903 goto error;
904 }
905 parser->status_code = ch - '0';
906 UPDATE_STATE(s_res_status_code);
907 break;
908 }
909
910 case s_res_status_code:
911 {
912 if (!IS_NUM(ch)) {
913 switch (ch) {
914 case ' ':
915 UPDATE_STATE(s_res_status_start);
916 break;
917 case CR:
918 UPDATE_STATE(s_res_line_almost_done);
919 break;
920 case LF:
921 UPDATE_STATE(s_header_field_start);
922 break;
923 default:
924 SET_ERRNO(HPE_INVALID_STATUS);
925 goto error;
926 }
927 break;
928 }
929
930 parser->status_code *= 10;
931 parser->status_code += ch - '0';
932
933 if (UNLIKELY(parser->status_code > 999)) {
934 SET_ERRNO(HPE_INVALID_STATUS);
935 goto error;
936 }
937
938 break;
939 }
940
941 case s_res_status_start:
942 {
943 if (ch == CR) {
944 UPDATE_STATE(s_res_line_almost_done);
945 break;
946 }
947
948 if (ch == LF) {
949 UPDATE_STATE(s_header_field_start);
950 break;
951 }
952
953 MARK(status);
954 UPDATE_STATE(s_res_status);
955 parser->index = 0;
956 break;
957 }
958
959 case s_res_status:
960 if (ch == CR) {
961 UPDATE_STATE(s_res_line_almost_done);
962 CALLBACK_DATA(status);
963 break;
964 }
965
966 if (ch == LF) {
967 UPDATE_STATE(s_header_field_start);
968 CALLBACK_DATA(status);
969 break;
970 }
971
972 break;
973
974 case s_res_line_almost_done:
975 STRICT_CHECK(ch != LF);
976 UPDATE_STATE(s_header_field_start);
977 break;
978
979 case s_start_req:
980 {
981 if (ch == CR || ch == LF)
982 break;
983 parser->flags = 0;
984 parser->content_length = ULLONG_MAX;
985
986 if (UNLIKELY(!IS_ALPHA(ch))) {
987 SET_ERRNO(HPE_INVALID_METHOD);
988 goto error;
989 }
990
991 parser->method = (enum http_method) 0;
992 parser->index = 1;
993 switch (ch) {
994 case 'A': parser->method = HTTP_ACL; break;
995 case 'B': parser->method = HTTP_BIND; break;
996 case 'C': parser->method = HTTP_CONNECT; /* or COPY, CHECKOUT */ break;
997 case 'D': parser->method = HTTP_DELETE; break;
998 case 'G': parser->method = HTTP_GET; break;
999 case 'H': parser->method = HTTP_HEAD; break;
1000 case 'L': parser->method = HTTP_LOCK; /* or LINK */ break;
1001 case 'M': parser->method = HTTP_MKCOL; /* or MOVE, MKACTIVITY, MERGE, M-SEARCH, MKCALENDAR */ break;
1002 case 'N': parser->method = HTTP_NOTIFY; break;
1003 case 'O': parser->method = HTTP_OPTIONS; break;
1004 case 'P': parser->method = HTTP_POST;
1005 /* or PROPFIND|PROPPATCH|PUT|PATCH|PURGE */
1006 break;
1007 case 'R': parser->method = HTTP_REPORT; /* or REBIND */ break;
1008 case 'S': parser->method = HTTP_SUBSCRIBE; /* or SEARCH */ break;
1009 case 'T': parser->method = HTTP_TRACE; break;
1010 case 'U': parser->method = HTTP_UNLOCK; /* or UNSUBSCRIBE, UNBIND, UNLINK */ break;
1011 default:
1012 SET_ERRNO(HPE_INVALID_METHOD);
1013 goto error;
1014 }
1015 UPDATE_STATE(s_req_method);
1016
1017 CALLBACK_NOTIFY(message_begin);
1018
1019 break;
1020 }
1021
1022 case s_req_method:
1023 {
1024 const char *matcher;
1025 if (UNLIKELY(ch == '\0')) {
1026 SET_ERRNO(HPE_INVALID_METHOD);
1027 goto error;
1028 }
1029
1030 matcher = method_strings[parser->method];
1031 if (ch == ' ' && matcher[parser->index] == '\0') {
1032 UPDATE_STATE(s_req_spaces_before_url);
1033 } else if (ch == matcher[parser->index]) {
1034 ; /* nada */
1035 } else if (IS_ALPHA(ch)) {
1036
1037 switch (parser->method << 16 | parser->index << 8 | ch) {
1038 #define XX(meth, pos, ch, new_meth) \
1039 case (HTTP_##meth << 16 | pos << 8 | ch): \
1040 parser->method = HTTP_##new_meth; break;
1041
1042 XX(POST, 1, 'U', PUT)
1043 XX(POST, 1, 'A', PATCH)
1044 XX(CONNECT, 1, 'H', CHECKOUT)
1045 XX(CONNECT, 2, 'P', COPY)
1046 XX(MKCOL, 1, 'O', MOVE)
1047 XX(MKCOL, 1, 'E', MERGE)
1048 XX(MKCOL, 2, 'A', MKACTIVITY)
1049 XX(MKCOL, 3, 'A', MKCALENDAR)
1050 XX(SUBSCRIBE, 1, 'E', SEARCH)
1051 XX(REPORT, 2, 'B', REBIND)
1052 XX(POST, 1, 'R', PROPFIND)
1053 XX(PROPFIND, 4, 'P', PROPPATCH)
1054 XX(PUT, 2, 'R', PURGE)
1055 XX(LOCK, 1, 'I', LINK)
1056 XX(UNLOCK, 2, 'S', UNSUBSCRIBE)
1057 XX(UNLOCK, 2, 'B', UNBIND)
1058 XX(UNLOCK, 3, 'I', UNLINK)
1059 #undef XX
1060
1061 default:
1062 SET_ERRNO(HPE_INVALID_METHOD);
1063 goto error;
1064 }
1065 } else if (ch == '-' &&
1066 parser->index == 1 &&
1067 parser->method == HTTP_MKCOL) {
1068 parser->method = HTTP_MSEARCH;
1069 } else {
1070 SET_ERRNO(HPE_INVALID_METHOD);
1071 goto error;
1072 }
1073
1074 ++parser->index;
1075 break;
1076 }
1077
1078 case s_req_spaces_before_url:
1079 {
1080 if (ch == ' ') break;
1081
1082 MARK(url);
1083 if (parser->method == HTTP_CONNECT) {
1084 UPDATE_STATE(s_req_server_start);
1085 }
1086
1087 UPDATE_STATE(parse_url_char(CURRENT_STATE(), ch));
1088 if (UNLIKELY(CURRENT_STATE() == s_dead)) {
1089 SET_ERRNO(HPE_INVALID_URL);
1090 goto error;
1091 }
1092
1093 break;
1094 }
1095
1096 case s_req_schema:
1097 case s_req_schema_slash:
1098 case s_req_schema_slash_slash:
1099 case s_req_server_start:
1100 {
1101 switch (ch) {
1102 /* No whitespace allowed here */
1103 case ' ':
1104 case CR:
1105 case LF:
1106 SET_ERRNO(HPE_INVALID_URL);
1107 goto error;
1108 default:
1109 UPDATE_STATE(parse_url_char(CURRENT_STATE(), ch));
1110 if (UNLIKELY(CURRENT_STATE() == s_dead)) {
1111 SET_ERRNO(HPE_INVALID_URL);
1112 goto error;
1113 }
1114 }
1115
1116 break;
1117 }
1118
1119 case s_req_server:
1120 case s_req_server_with_at:
1121 case s_req_path:
1122 case s_req_query_string_start:
1123 case s_req_query_string:
1124 case s_req_fragment_start:
1125 case s_req_fragment:
1126 {
1127 switch (ch) {
1128 case ' ':
1129 UPDATE_STATE(s_req_http_start);
1130 CALLBACK_DATA(url);
1131 break;
1132 case CR:
1133 case LF:
1134 parser->http_major = 0;
1135 parser->http_minor = 9;
1136 UPDATE_STATE((ch == CR) ?
1137 s_req_line_almost_done :
1138 s_header_field_start);
1139 CALLBACK_DATA(url);
1140 break;
1141 default:
1142 UPDATE_STATE(parse_url_char(CURRENT_STATE(), ch));
1143 if (UNLIKELY(CURRENT_STATE() == s_dead)) {
1144 SET_ERRNO(HPE_INVALID_URL);
1145 goto error;
1146 }
1147 }
1148 break;
1149 }
1150
1151 case s_req_http_start:
1152 switch (ch) {
1153 case 'H':
1154 UPDATE_STATE(s_req_http_H);
1155 break;
1156 case ' ':
1157 break;
1158 default:
1159 SET_ERRNO(HPE_INVALID_CONSTANT);
1160 goto error;
1161 }
1162 break;
1163
1164 case s_req_http_H:
1165 STRICT_CHECK(ch != 'T');
1166 UPDATE_STATE(s_req_http_HT);
1167 break;
1168
1169 case s_req_http_HT:
1170 STRICT_CHECK(ch != 'T');
1171 UPDATE_STATE(s_req_http_HTT);
1172 break;
1173
1174 case s_req_http_HTT:
1175 STRICT_CHECK(ch != 'P');
1176 UPDATE_STATE(s_req_http_HTTP);
1177 break;
1178
1179 case s_req_http_HTTP:
1180 STRICT_CHECK(ch != '/');
1181 UPDATE_STATE(s_req_first_http_major);
1182 break;
1183
1184 /* first digit of major HTTP version */
1185 case s_req_first_http_major:
1186 if (UNLIKELY(ch < '1' || ch > '9')) {
1187 SET_ERRNO(HPE_INVALID_VERSION);
1188 goto error;
1189 }
1190
1191 parser->http_major = ch - '0';
1192 UPDATE_STATE(s_req_http_major);
1193 break;
1194
1195 /* major HTTP version or dot */
1196 case s_req_http_major:
1197 {
1198 if (ch == '.') {
1199 UPDATE_STATE(s_req_first_http_minor);
1200 break;
1201 }
1202
1203 if (UNLIKELY(!IS_NUM(ch))) {
1204 SET_ERRNO(HPE_INVALID_VERSION);
1205 goto error;
1206 }
1207
1208 parser->http_major *= 10;
1209 parser->http_major += ch - '0';
1210
1211 if (UNLIKELY(parser->http_major > 999)) {
1212 SET_ERRNO(HPE_INVALID_VERSION);
1213 goto error;
1214 }
1215
1216 break;
1217 }
1218
1219 /* first digit of minor HTTP version */
1220 case s_req_first_http_minor:
1221 if (UNLIKELY(!IS_NUM(ch))) {
1222 SET_ERRNO(HPE_INVALID_VERSION);
1223 goto error;
1224 }
1225
1226 parser->http_minor = ch - '0';
1227 UPDATE_STATE(s_req_http_minor);
1228 break;
1229
1230 /* minor HTTP version or end of request line */
1231 case s_req_http_minor:
1232 {
1233 if (ch == CR) {
1234 UPDATE_STATE(s_req_line_almost_done);
1235 break;
1236 }
1237
1238 if (ch == LF) {
1239 UPDATE_STATE(s_header_field_start);
1240 break;
1241 }
1242
1243 /* XXX allow spaces after digit? */
1244
1245 if (UNLIKELY(!IS_NUM(ch))) {
1246 SET_ERRNO(HPE_INVALID_VERSION);
1247 goto error;
1248 }
1249
1250 parser->http_minor *= 10;
1251 parser->http_minor += ch - '0';
1252
1253 if (UNLIKELY(parser->http_minor > 999)) {
1254 SET_ERRNO(HPE_INVALID_VERSION);
1255 goto error;
1256 }
1257
1258 break;
1259 }
1260
1261 /* end of request line */
1262 case s_req_line_almost_done:
1263 {
1264 if (UNLIKELY(ch != LF)) {
1265 SET_ERRNO(HPE_LF_EXPECTED);
1266 goto error;
1267 }
1268
1269 UPDATE_STATE(s_header_field_start);
1270 break;
1271 }
1272
1273 case s_header_field_start:
1274 {
1275 if (ch == CR) {
1276 UPDATE_STATE(s_headers_almost_done);
1277 break;
1278 }
1279
1280 if (ch == LF) {
1281 /* they might be just sending \n instead of \r\n so this would be
1282 * the second \n to denote the end of headers*/
1283 UPDATE_STATE(s_headers_almost_done);
1284 REEXECUTE();
1285 }
1286
1287 c = TOKEN(ch);
1288
1289 if (UNLIKELY(!c)) {
1290 SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
1291 goto error;
1292 }
1293
1294 MARK(header_field);
1295
1296 parser->index = 0;
1297 UPDATE_STATE(s_header_field);
1298
1299 switch (c) {
1300 case 'c':
1301 parser->header_state = h_C;
1302 break;
1303
1304 case 'p':
1305 parser->header_state = h_matching_proxy_connection;
1306 break;
1307
1308 case 't':
1309 parser->header_state = h_matching_transfer_encoding;
1310 break;
1311
1312 case 'u':
1313 parser->header_state = h_matching_upgrade;
1314 break;
1315
1316 default:
1317 parser->header_state = h_general;
1318 break;
1319 }
1320 break;
1321 }
1322
1323 case s_header_field:
1324 {
1325 const char* start = p;
1326 for (; p != data + len; p++) {
1327 ch = *p;
1328 c = TOKEN(ch);
1329
1330 if (!c)
1331 break;
1332
1333 switch (parser->header_state) {
1334 case h_general:
1335 break;
1336
1337 case h_C:
1338 parser->index++;
1339 parser->header_state = (c == 'o' ? h_CO : h_general);
1340 break;
1341
1342 case h_CO:
1343 parser->index++;
1344 parser->header_state = (c == 'n' ? h_CON : h_general);
1345 break;
1346
1347 case h_CON:
1348 parser->index++;
1349 switch (c) {
1350 case 'n':
1351 parser->header_state = h_matching_connection;
1352 break;
1353 case 't':
1354 parser->header_state = h_matching_content_length;
1355 break;
1356 default:
1357 parser->header_state = h_general;
1358 break;
1359 }
1360 break;
1361
1362 /* connection */
1363
1364 case h_matching_connection:
1365 parser->index++;
1366 if (parser->index > sizeof(CONNECTION)-1
1367 || c != CONNECTION[parser->index]) {
1368 parser->header_state = h_general;
1369 } else if (parser->index == sizeof(CONNECTION)-2) {
1370 parser->header_state = h_connection;
1371 }
1372 break;
1373
1374 /* proxy-connection */
1375
1376 case h_matching_proxy_connection:
1377 parser->index++;
1378 if (parser->index > sizeof(PROXY_CONNECTION)-1
1379 || c != PROXY_CONNECTION[parser->index]) {
1380 parser->header_state = h_general;
1381 } else if (parser->index == sizeof(PROXY_CONNECTION)-2) {
1382 parser->header_state = h_connection;
1383 }
1384 break;
1385
1386 /* content-length */
1387
1388 case h_matching_content_length:
1389 parser->index++;
1390 if (parser->index > sizeof(CONTENT_LENGTH)-1
1391 || c != CONTENT_LENGTH[parser->index]) {
1392 parser->header_state = h_general;
1393 } else if (parser->index == sizeof(CONTENT_LENGTH)-2) {
1394 if (parser->flags & F_CONTENTLENGTH) {
1395 SET_ERRNO(HPE_UNEXPECTED_CONTENT_LENGTH);
1396 goto error;
1397 }
1398 parser->header_state = h_content_length;
1399 parser->flags |= F_CONTENTLENGTH;
1400 }
1401 break;
1402
1403 /* transfer-encoding */
1404
1405 case h_matching_transfer_encoding:
1406 parser->index++;
1407 if (parser->index > sizeof(TRANSFER_ENCODING)-1
1408 || c != TRANSFER_ENCODING[parser->index]) {
1409 parser->header_state = h_general;
1410 } else if (parser->index == sizeof(TRANSFER_ENCODING)-2) {
1411 parser->header_state = h_transfer_encoding;
1412 }
1413 break;
1414
1415 /* upgrade */
1416
1417 case h_matching_upgrade:
1418 parser->index++;
1419 if (parser->index > sizeof(UPGRADE)-1
1420 || c != UPGRADE[parser->index]) {
1421 parser->header_state = h_general;
1422 } else if (parser->index == sizeof(UPGRADE)-2) {
1423 parser->header_state = h_upgrade;
1424 }
1425 break;
1426
1427 case h_connection:
1428 case h_content_length:
1429 case h_transfer_encoding:
1430 case h_upgrade:
1431 if (ch != ' ') parser->header_state = h_general;
1432 break;
1433
1434 default:
1435 assert(0 && "Unknown header_state");
1436 break;
1437 }
1438 }
1439
1440 COUNT_HEADER_SIZE(p - start);
1441
1442 if (p == data + len) {
1443 --p;
1444 break;
1445 }
1446
1447 if (ch == ':') {
1448 UPDATE_STATE(s_header_value_discard_ws);
1449 CALLBACK_DATA(header_field);
1450 break;
1451 }
1452
1453 SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
1454 goto error;
1455 }
1456
1457 case s_header_value_discard_ws:
1458 if (ch == ' ' || ch == '\t') break;
1459
1460 if (ch == CR) {
1461 UPDATE_STATE(s_header_value_discard_ws_almost_done);
1462 break;
1463 }
1464
1465 if (ch == LF) {
1466 UPDATE_STATE(s_header_value_discard_lws);
1467 break;
1468 }
1469
1470 /* FALLTHROUGH */
1471
1472 case s_header_value_start:
1473 {
1474 MARK(header_value);
1475
1476 UPDATE_STATE(s_header_value);
1477 parser->index = 0;
1478
1479 c = LOWER(ch);
1480
1481 switch (parser->header_state) {
1482 case h_upgrade:
1483 parser->flags |= F_UPGRADE;
1484 parser->header_state = h_general;
1485 break;
1486
1487 case h_transfer_encoding:
1488 /* looking for 'Transfer-Encoding: chunked' */
1489 if ('c' == c) {
1490 parser->header_state = h_matching_transfer_encoding_chunked;
1491 } else {
1492 parser->header_state = h_general;
1493 }
1494 break;
1495
1496 case h_content_length:
1497 if (UNLIKELY(!IS_NUM(ch))) {
1498 SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1499 goto error;
1500 }
1501
1502 parser->content_length = ch - '0';
1503 break;
1504
1505 case h_connection:
1506 /* looking for 'Connection: keep-alive' */
1507 if (c == 'k') {
1508 parser->header_state = h_matching_connection_keep_alive;
1509 /* looking for 'Connection: close' */
1510 } else if (c == 'c') {
1511 parser->header_state = h_matching_connection_close;
1512 } else if (c == 'u') {
1513 parser->header_state = h_matching_connection_upgrade;
1514 } else {
1515 parser->header_state = h_matching_connection_token;
1516 }
1517 break;
1518
1519 /* Multi-value `Connection` header */
1520 case h_matching_connection_token_start:
1521 break;
1522
1523 default:
1524 parser->header_state = h_general;
1525 break;
1526 }
1527 break;
1528 }
1529
1530 case s_header_value:
1531 {
1532 const char* start = p;
1533 enum header_states h_state = (enum header_states) parser->header_state;
1534 for (; p != data + len; p++) {
1535 ch = *p;
1536 if (ch == CR) {
1537 UPDATE_STATE(s_header_almost_done);
1538 parser->header_state = h_state;
1539 CALLBACK_DATA(header_value);
1540 break;
1541 }
1542
1543 if (ch == LF) {
1544 UPDATE_STATE(s_header_almost_done);
1545 COUNT_HEADER_SIZE(p - start);
1546 parser->header_state = h_state;
1547 CALLBACK_DATA_NOADVANCE(header_value);
1548 REEXECUTE();
1549 }
1550
1551 if (!lenient && !IS_HEADER_CHAR(ch)) {
1552 SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
1553 goto error;
1554 }
1555
1556 c = LOWER(ch);
1557
1558 switch (h_state) {
1559 case h_general:
1560 {
1561 const char* p_cr;
1562 const char* p_lf;
1563 size_t limit = data + len - p;
1564
1565 limit = MIN(limit, HTTP_MAX_HEADER_SIZE);
1566
1567 p_cr = (const char*) memchr(p, CR, limit);
1568 p_lf = (const char*) memchr(p, LF, limit);
1569 if (p_cr != NULL) {
1570 if (p_lf != NULL && p_cr >= p_lf)
1571 p = p_lf;
1572 else
1573 p = p_cr;
1574 } else if (UNLIKELY(p_lf != NULL)) {
1575 p = p_lf;
1576 } else {
1577 p = data + len;
1578 }
1579 --p;
1580
1581 break;
1582 }
1583
1584 case h_connection:
1585 case h_transfer_encoding:
1586 assert(0 && "Shouldn't get here.");
1587 break;
1588
1589 case h_content_length:
1590 {
1591 uint64_t t;
1592
1593 if (ch == ' ') break;
1594
1595 if (UNLIKELY(!IS_NUM(ch))) {
1596 SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1597 parser->header_state = h_state;
1598 goto error;
1599 }
1600
1601 t = parser->content_length;
1602 t *= 10;
1603 t += ch - '0';
1604
1605 /* Overflow? Test against a conservative limit for simplicity. */
1606 if (UNLIKELY((ULLONG_MAX - 10) / 10 < parser->content_length)) {
1607 SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1608 parser->header_state = h_state;
1609 goto error;
1610 }
1611
1612 parser->content_length = t;
1613 break;
1614 }
1615
1616 /* Transfer-Encoding: chunked */
1617 case h_matching_transfer_encoding_chunked:
1618 parser->index++;
1619 if (parser->index > sizeof(CHUNKED)-1
1620 || c != CHUNKED[parser->index]) {
1621 h_state = h_general;
1622 } else if (parser->index == sizeof(CHUNKED)-2) {
1623 h_state = h_transfer_encoding_chunked;
1624 }
1625 break;
1626
1627 case h_matching_connection_token_start:
1628 /* looking for 'Connection: keep-alive' */
1629 if (c == 'k') {
1630 h_state = h_matching_connection_keep_alive;
1631 /* looking for 'Connection: close' */
1632 } else if (c == 'c') {
1633 h_state = h_matching_connection_close;
1634 } else if (c == 'u') {
1635 h_state = h_matching_connection_upgrade;
1636 } else if (STRICT_TOKEN(c)) {
1637 h_state = h_matching_connection_token;
1638 } else if (c == ' ' || c == '\t') {
1639 /* Skip lws */
1640 } else {
1641 h_state = h_general;
1642 }
1643 break;
1644
1645 /* looking for 'Connection: keep-alive' */
1646 case h_matching_connection_keep_alive:
1647 parser->index++;
1648 if (parser->index > sizeof(KEEP_ALIVE)-1
1649 || c != KEEP_ALIVE[parser->index]) {
1650 h_state = h_matching_connection_token;
1651 } else if (parser->index == sizeof(KEEP_ALIVE)-2) {
1652 h_state = h_connection_keep_alive;
1653 }
1654 break;
1655
1656 /* looking for 'Connection: close' */
1657 case h_matching_connection_close:
1658 parser->index++;
1659 if (parser->index > sizeof(CLOSE)-1 || c != CLOSE[parser->index]) {
1660 h_state = h_matching_connection_token;
1661 } else if (parser->index == sizeof(CLOSE)-2) {
1662 h_state = h_connection_close;
1663 }
1664 break;
1665
1666 /* looking for 'Connection: upgrade' */
1667 case h_matching_connection_upgrade:
1668 parser->index++;
1669 if (parser->index > sizeof(UPGRADE) - 1 ||
1670 c != UPGRADE[parser->index]) {
1671 h_state = h_matching_connection_token;
1672 } else if (parser->index == sizeof(UPGRADE)-2) {
1673 h_state = h_connection_upgrade;
1674 }
1675 break;
1676
1677 case h_matching_connection_token:
1678 if (ch == ',') {
1679 h_state = h_matching_connection_token_start;
1680 parser->index = 0;
1681 }
1682 break;
1683
1684 case h_transfer_encoding_chunked:
1685 if (ch != ' ') h_state = h_general;
1686 break;
1687
1688 case h_connection_keep_alive:
1689 case h_connection_close:
1690 case h_connection_upgrade:
1691 if (ch == ',') {
1692 if (h_state == h_connection_keep_alive) {
1693 parser->flags |= F_CONNECTION_KEEP_ALIVE;
1694 } else if (h_state == h_connection_close) {
1695 parser->flags |= F_CONNECTION_CLOSE;
1696 } else if (h_state == h_connection_upgrade) {
1697 parser->flags |= F_CONNECTION_UPGRADE;
1698 }
1699 h_state = h_matching_connection_token_start;
1700 parser->index = 0;
1701 } else if (ch != ' ') {
1702 h_state = h_matching_connection_token;
1703 }
1704 break;
1705
1706 default:
1707 UPDATE_STATE(s_header_value);
1708 h_state = h_general;
1709 break;
1710 }
1711 }
1712 parser->header_state = h_state;
1713
1714 COUNT_HEADER_SIZE(p - start);
1715
1716 if (p == data + len)
1717 --p;
1718 break;
1719 }
1720
1721 case s_header_almost_done:
1722 {
1723 if (UNLIKELY(ch != LF)) {
1724 SET_ERRNO(HPE_LF_EXPECTED);
1725 goto error;
1726 }
1727
1728 UPDATE_STATE(s_header_value_lws);
1729 break;
1730 }
1731
1732 case s_header_value_lws:
1733 {
1734 if (ch == ' ' || ch == '\t') {
1735 UPDATE_STATE(s_header_value_start);
1736 REEXECUTE();
1737 }
1738
1739 /* finished the header */
1740 switch (parser->header_state) {
1741 case h_connection_keep_alive:
1742 parser->flags |= F_CONNECTION_KEEP_ALIVE;
1743 break;
1744 case h_connection_close:
1745 parser->flags |= F_CONNECTION_CLOSE;
1746 break;
1747 case h_transfer_encoding_chunked:
1748 parser->flags |= F_CHUNKED;
1749 break;
1750 case h_connection_upgrade:
1751 parser->flags |= F_CONNECTION_UPGRADE;
1752 break;
1753 default:
1754 break;
1755 }
1756
1757 UPDATE_STATE(s_header_field_start);
1758 REEXECUTE();
1759 }
1760
1761 case s_header_value_discard_ws_almost_done:
1762 {
1763 STRICT_CHECK(ch != LF);
1764 UPDATE_STATE(s_header_value_discard_lws);
1765 break;
1766 }
1767
1768 case s_header_value_discard_lws:
1769 {
1770 if (ch == ' ' || ch == '\t') {
1771 UPDATE_STATE(s_header_value_discard_ws);
1772 break;
1773 } else {
1774 switch (parser->header_state) {
1775 case h_connection_keep_alive:
1776 parser->flags |= F_CONNECTION_KEEP_ALIVE;
1777 break;
1778 case h_connection_close:
1779 parser->flags |= F_CONNECTION_CLOSE;
1780 break;
1781 case h_connection_upgrade:
1782 parser->flags |= F_CONNECTION_UPGRADE;
1783 break;
1784 case h_transfer_encoding_chunked:
1785 parser->flags |= F_CHUNKED;
1786 break;
1787 default:
1788 break;
1789 }
1790
1791 /* header value was empty */
1792 MARK(header_value);
1793 UPDATE_STATE(s_header_field_start);
1794 CALLBACK_DATA_NOADVANCE(header_value);
1795 REEXECUTE();
1796 }
1797 }
1798
1799 case s_headers_almost_done:
1800 {
1801 STRICT_CHECK(ch != LF);
1802
1803 if (parser->flags & F_TRAILING) {
1804 /* End of a chunked request */
1805 UPDATE_STATE(s_message_done);
1806 CALLBACK_NOTIFY_NOADVANCE(chunk_complete);
1807 REEXECUTE();
1808 }
1809
1810 /* Cannot use chunked encoding and a content-length header together
1811 per the HTTP specification. */
1812 if ((parser->flags & F_CHUNKED) &&
1813 (parser->flags & F_CONTENTLENGTH)) {
1814 SET_ERRNO(HPE_UNEXPECTED_CONTENT_LENGTH);
1815 goto error;
1816 }
1817
1818 UPDATE_STATE(s_headers_done);
1819
1820 /* Set this here so that on_headers_complete() callbacks can see it */
1821 parser->upgrade =
1822 ((parser->flags & (F_UPGRADE | F_CONNECTION_UPGRADE)) ==
1823 (F_UPGRADE | F_CONNECTION_UPGRADE) ||
1824 parser->method == HTTP_CONNECT);
1825
1826 /* Here we call the headers_complete callback. This is somewhat
1827 * different than other callbacks because if the user returns 1, we
1828 * will interpret that as saying that this message has no body. This
1829 * is needed for the annoying case of recieving a response to a HEAD
1830 * request.
1831 *
1832 * We'd like to use CALLBACK_NOTIFY_NOADVANCE() here but we cannot, so
1833 * we have to simulate it by handling a change in errno below.
1834 */
1835 if (settings->on_headers_complete) {
1836 switch (settings->on_headers_complete(parser)) {
1837 case 0:
1838 break;
1839
1840 case 2:
1841 parser->upgrade = 1;
1842 /* falls through */
1843 case 1:
1844 parser->flags |= F_SKIPBODY;
1845 break;
1846
1847 default:
1848 SET_ERRNO(HPE_CB_headers_complete);
1849 RETURN(p - data); /* Error */
1850 }
1851 }
1852
1853 if (HTTP_PARSER_ERRNO(parser) != HPE_OK) {
1854 RETURN(p - data);
1855 }
1856
1857 REEXECUTE();
1858 }
1859
1860 case s_headers_done:
1861 {
1862 int hasBody;
1863 STRICT_CHECK(ch != LF);
1864
1865 parser->nread = 0;
1866
1867 hasBody = parser->flags & F_CHUNKED ||
1868 (parser->content_length > 0 && parser->content_length != ULLONG_MAX);
1869 if (parser->upgrade && (parser->method == HTTP_CONNECT ||
1870 (parser->flags & F_SKIPBODY) || !hasBody)) {
1871 /* Exit, the rest of the message is in a different protocol. */
1872 UPDATE_STATE(NEW_MESSAGE());
1873 CALLBACK_NOTIFY(message_complete);
1874 RETURN((p - data) + 1);
1875 }
1876
1877 if (parser->flags & F_SKIPBODY) {
1878 UPDATE_STATE(NEW_MESSAGE());
1879 CALLBACK_NOTIFY(message_complete);
1880 } else if (parser->flags & F_CHUNKED) {
1881 /* chunked encoding - ignore Content-Length header */
1882 UPDATE_STATE(s_chunk_size_start);
1883 } else {
1884 if (parser->content_length == 0) {
1885 /* Content-Length header given but zero: Content-Length: 0\r\n */
1886 UPDATE_STATE(NEW_MESSAGE());
1887 CALLBACK_NOTIFY(message_complete);
1888 } else if (parser->content_length != ULLONG_MAX) {
1889 /* Content-Length header given and non-zero */
1890 UPDATE_STATE(s_body_identity);
1891 } else {
1892 if (!http_message_needs_eof(parser)) {
1893 /* Assume content-length 0 - read the next */
1894 UPDATE_STATE(NEW_MESSAGE());
1895 CALLBACK_NOTIFY(message_complete);
1896 } else {
1897 /* Read body until EOF */
1898 UPDATE_STATE(s_body_identity_eof);
1899 }
1900 }
1901 }
1902
1903 break;
1904 }
1905
1906 case s_body_identity:
1907 {
1908 uint64_t to_read = MIN(parser->content_length,
1909 (uint64_t) ((data + len) - p));
1910
1911 assert(parser->content_length != 0
1912 && parser->content_length != ULLONG_MAX);
1913
1914 /* The difference between advancing content_length and p is because
1915 * the latter will automaticaly advance on the next loop iteration.
1916 * Further, if content_length ends up at 0, we want to see the last
1917 * byte again for our message complete callback.
1918 */
1919 MARK(body);
1920 parser->content_length -= to_read;
1921 p += to_read - 1;
1922
1923 if (parser->content_length == 0) {
1924 UPDATE_STATE(s_message_done);
1925
1926 /* Mimic CALLBACK_DATA_NOADVANCE() but with one extra byte.
1927 *
1928 * The alternative to doing this is to wait for the next byte to
1929 * trigger the data callback, just as in every other case. The
1930 * problem with this is that this makes it difficult for the test
1931 * harness to distinguish between complete-on-EOF and
1932 * complete-on-length. It's not clear that this distinction is
1933 * important for applications, but let's keep it for now.
1934 */
1935 CALLBACK_DATA_(body, p - body_mark + 1, p - data);
1936 REEXECUTE();
1937 }
1938
1939 break;
1940 }
1941
1942 /* read until EOF */
1943 case s_body_identity_eof:
1944 MARK(body);
1945 p = data + len - 1;
1946
1947 break;
1948
1949 case s_message_done:
1950 UPDATE_STATE(NEW_MESSAGE());
1951 CALLBACK_NOTIFY(message_complete);
1952 if (parser->upgrade) {
1953 /* Exit, the rest of the message is in a different protocol. */
1954 RETURN((p - data) + 1);
1955 }
1956 break;
1957
1958 case s_chunk_size_start:
1959 {
1960 assert(parser->nread == 1);
1961 assert(parser->flags & F_CHUNKED);
1962
1963 unhex_val = unhex[(unsigned char)ch];
1964 if (UNLIKELY(unhex_val == -1)) {
1965 SET_ERRNO(HPE_INVALID_CHUNK_SIZE);
1966 goto error;
1967 }
1968
1969 parser->content_length = unhex_val;
1970 UPDATE_STATE(s_chunk_size);
1971 break;
1972 }
1973
1974 case s_chunk_size:
1975 {
1976 uint64_t t;
1977
1978 assert(parser->flags & F_CHUNKED);
1979
1980 if (ch == CR) {
1981 UPDATE_STATE(s_chunk_size_almost_done);
1982 break;
1983 }
1984
1985 unhex_val = unhex[(unsigned char)ch];
1986
1987 if (unhex_val == -1) {
1988 if (ch == ';' || ch == ' ') {
1989 UPDATE_STATE(s_chunk_parameters);
1990 break;
1991 }
1992
1993 SET_ERRNO(HPE_INVALID_CHUNK_SIZE);
1994 goto error;
1995 }
1996
1997 t = parser->content_length;
1998 t *= 16;
1999 t += unhex_val;
2000
2001 /* Overflow? Test against a conservative limit for simplicity. */
2002 if (UNLIKELY((ULLONG_MAX - 16) / 16 < parser->content_length)) {
2003 SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
2004 goto error;
2005 }
2006
2007 parser->content_length = t;
2008 break;
2009 }
2010
2011 case s_chunk_parameters:
2012 {
2013 assert(parser->flags & F_CHUNKED);
2014 /* just ignore this shit. TODO check for overflow */
2015 if (ch == CR) {
2016 UPDATE_STATE(s_chunk_size_almost_done);
2017 break;
2018 }
2019 break;
2020 }
2021
2022 case s_chunk_size_almost_done:
2023 {
2024 assert(parser->flags & F_CHUNKED);
2025 STRICT_CHECK(ch != LF);
2026
2027 parser->nread = 0;
2028
2029 if (parser->content_length == 0) {
2030 parser->flags |= F_TRAILING;
2031 UPDATE_STATE(s_header_field_start);
2032 } else {
2033 UPDATE_STATE(s_chunk_data);
2034 }
2035 CALLBACK_NOTIFY(chunk_header);
2036 break;
2037 }
2038
2039 case s_chunk_data:
2040 {
2041 uint64_t to_read = MIN(parser->content_length,
2042 (uint64_t) ((data + len) - p));
2043
2044 assert(parser->flags & F_CHUNKED);
2045 assert(parser->content_length != 0
2046 && parser->content_length != ULLONG_MAX);
2047
2048 /* See the explanation in s_body_identity for why the content
2049 * length and data pointers are managed this way.
2050 */
2051 MARK(body);
2052 parser->content_length -= to_read;
2053 p += to_read - 1;
2054
2055 if (parser->content_length == 0) {
2056 UPDATE_STATE(s_chunk_data_almost_done);
2057 }
2058
2059 break;
2060 }
2061
2062 case s_chunk_data_almost_done:
2063 assert(parser->flags & F_CHUNKED);
2064 assert(parser->content_length == 0);
2065 STRICT_CHECK(ch != CR);
2066 UPDATE_STATE(s_chunk_data_done);
2067 CALLBACK_DATA(body);
2068 break;
2069
2070 case s_chunk_data_done:
2071 assert(parser->flags & F_CHUNKED);
2072 STRICT_CHECK(ch != LF);
2073 parser->nread = 0;
2074 UPDATE_STATE(s_chunk_size_start);
2075 CALLBACK_NOTIFY(chunk_complete);
2076 break;
2077
2078 default:
2079 assert(0 && "unhandled state");
2080 SET_ERRNO(HPE_INVALID_INTERNAL_STATE);
2081 goto error;
2082 }
2083 }
2084
2085 /* Run callbacks for any marks that we have leftover after we ran our of
2086 * bytes. There should be at most one of these set, so it's OK to invoke
2087 * them in series (unset marks will not result in callbacks).
2088 *
2089 * We use the NOADVANCE() variety of callbacks here because 'p' has already
2090 * overflowed 'data' and this allows us to correct for the off-by-one that
2091 * we'd otherwise have (since CALLBACK_DATA() is meant to be run with a 'p'
2092 * value that's in-bounds).
2093 */
2094
2095 assert(((header_field_mark ? 1 : 0) +
2096 (header_value_mark ? 1 : 0) +
2097 (url_mark ? 1 : 0) +
2098 (body_mark ? 1 : 0) +
2099 (status_mark ? 1 : 0)) <= 1);
2100
2101 CALLBACK_DATA_NOADVANCE(header_field);
2102 CALLBACK_DATA_NOADVANCE(header_value);
2103 CALLBACK_DATA_NOADVANCE(url);
2104 CALLBACK_DATA_NOADVANCE(body);
2105 CALLBACK_DATA_NOADVANCE(status);
2106
2107 RETURN(len);
2108
2109 error:
2110 if (HTTP_PARSER_ERRNO(parser) == HPE_OK) {
2111 SET_ERRNO(HPE_UNKNOWN);
2112 }
2113
2114 RETURN(p - data);
2115 }
2116
2117
2118 /* Does the parser need to see an EOF to find the end of the message? */
2119 int
http_message_needs_eof(const http_parser * parser)2120 http_message_needs_eof (const http_parser *parser)
2121 {
2122 if (parser->type == HTTP_REQUEST) {
2123 return 0;
2124 }
2125
2126 /* See RFC 2616 section 4.4 */
2127 if (parser->status_code / 100 == 1 || /* 1xx e.g. Continue */
2128 parser->status_code == 204 || /* No Content */
2129 parser->status_code == 304 || /* Not Modified */
2130 parser->flags & F_SKIPBODY) { /* response to a HEAD request */
2131 return 0;
2132 }
2133
2134 if ((parser->flags & F_CHUNKED) || parser->content_length != ULLONG_MAX) {
2135 return 0;
2136 }
2137
2138 return 1;
2139 }
2140
2141
2142 int
http_should_keep_alive(const http_parser * parser)2143 http_should_keep_alive (const http_parser *parser)
2144 {
2145 if (parser->http_major > 0 && parser->http_minor > 0) {
2146 /* HTTP/1.1 */
2147 if (parser->flags & F_CONNECTION_CLOSE) {
2148 return 0;
2149 }
2150 } else {
2151 /* HTTP/1.0 or earlier */
2152 if (!(parser->flags & F_CONNECTION_KEEP_ALIVE)) {
2153 return 0;
2154 }
2155 }
2156
2157 return !http_message_needs_eof(parser);
2158 }
2159
2160
2161 const char *
http_method_str(enum http_method m)2162 http_method_str (enum http_method m)
2163 {
2164 return ELEM_AT(method_strings, m, "<unknown>");
2165 }
2166
2167
2168 void
http_parser_init(http_parser * parser,enum http_parser_type t)2169 http_parser_init (http_parser *parser, enum http_parser_type t)
2170 {
2171 void *data = parser->data; /* preserve application data */
2172 memset(parser, 0, sizeof(*parser));
2173 parser->data = data;
2174 parser->type = t;
2175 parser->state = (t == HTTP_REQUEST ? s_start_req : (t == HTTP_RESPONSE ? s_start_res : s_start_req_or_res));
2176 parser->http_errno = HPE_OK;
2177 }
2178
2179 void
http_parser_settings_init(http_parser_settings * settings)2180 http_parser_settings_init(http_parser_settings *settings)
2181 {
2182 memset(settings, 0, sizeof(*settings));
2183 }
2184
2185 const char *
http_errno_name(enum http_errno err)2186 http_errno_name(enum http_errno err) {
2187 assert(((size_t) err) < ARRAY_SIZE(http_strerror_tab));
2188 return http_strerror_tab[err].name;
2189 }
2190
2191 const char *
http_errno_description(enum http_errno err)2192 http_errno_description(enum http_errno err) {
2193 assert(((size_t) err) < ARRAY_SIZE(http_strerror_tab));
2194 return http_strerror_tab[err].description;
2195 }
2196
2197 static enum http_host_state
http_parse_host_char(enum http_host_state s,const char ch)2198 http_parse_host_char(enum http_host_state s, const char ch) {
2199 switch(s) {
2200 case s_http_userinfo:
2201 case s_http_userinfo_start:
2202 if (ch == '@') {
2203 return s_http_host_start;
2204 }
2205
2206 if (IS_USERINFO_CHAR(ch)) {
2207 return s_http_userinfo;
2208 }
2209 break;
2210
2211 case s_http_host_start:
2212 if (ch == '[') {
2213 return s_http_host_v6_start;
2214 }
2215
2216 if (IS_HOST_CHAR(ch)) {
2217 return s_http_host;
2218 }
2219
2220 break;
2221
2222 case s_http_host:
2223 if (IS_HOST_CHAR(ch)) {
2224 return s_http_host;
2225 }
2226
2227 /* FALLTHROUGH */
2228 case s_http_host_v6_end:
2229 if (ch == ':') {
2230 return s_http_host_port_start;
2231 }
2232
2233 break;
2234
2235 case s_http_host_v6:
2236 if (ch == ']') {
2237 return s_http_host_v6_end;
2238 }
2239
2240 /* FALLTHROUGH */
2241 case s_http_host_v6_start:
2242 if (IS_HEX(ch) || ch == ':' || ch == '.') {
2243 return s_http_host_v6;
2244 }
2245
2246 if (s == s_http_host_v6 && ch == '%') {
2247 return s_http_host_v6_zone_start;
2248 }
2249 break;
2250
2251 case s_http_host_v6_zone:
2252 if (ch == ']') {
2253 return s_http_host_v6_end;
2254 }
2255
2256 /* FALLTHROUGH */
2257 case s_http_host_v6_zone_start:
2258 /* RFC 6874 Zone ID consists of 1*( unreserved / pct-encoded) */
2259 if (IS_ALPHANUM(ch) || ch == '%' || ch == '.' || ch == '-' || ch == '_' ||
2260 ch == '~') {
2261 return s_http_host_v6_zone;
2262 }
2263 break;
2264
2265 case s_http_host_port:
2266 case s_http_host_port_start:
2267 if (IS_NUM(ch)) {
2268 return s_http_host_port;
2269 }
2270
2271 break;
2272
2273 default:
2274 break;
2275 }
2276 return s_http_host_dead;
2277 }
2278
2279 static int
http_parse_host(const char * buf,struct http_parser_url * u,int found_at)2280 http_parse_host(const char * buf, struct http_parser_url *u, int found_at) {
2281 enum http_host_state s;
2282
2283 const char *p;
2284 size_t buflen = u->field_data[UF_HOST].off + u->field_data[UF_HOST].len;
2285
2286 assert(u->field_set & (1 << UF_HOST));
2287
2288 u->field_data[UF_HOST].len = 0;
2289
2290 s = found_at ? s_http_userinfo_start : s_http_host_start;
2291
2292 for (p = buf + u->field_data[UF_HOST].off; p < buf + buflen; p++) {
2293 enum http_host_state new_s = http_parse_host_char(s, *p);
2294
2295 if (new_s == s_http_host_dead) {
2296 return 1;
2297 }
2298
2299 switch(new_s) {
2300 case s_http_host:
2301 if (s != s_http_host) {
2302 u->field_data[UF_HOST].off = p - buf;
2303 }
2304 u->field_data[UF_HOST].len++;
2305 break;
2306
2307 case s_http_host_v6:
2308 if (s != s_http_host_v6) {
2309 u->field_data[UF_HOST].off = p - buf;
2310 }
2311 u->field_data[UF_HOST].len++;
2312 break;
2313
2314 case s_http_host_v6_zone_start:
2315 case s_http_host_v6_zone:
2316 u->field_data[UF_HOST].len++;
2317 break;
2318
2319 case s_http_host_port:
2320 if (s != s_http_host_port) {
2321 u->field_data[UF_PORT].off = p - buf;
2322 u->field_data[UF_PORT].len = 0;
2323 u->field_set |= (1 << UF_PORT);
2324 }
2325 u->field_data[UF_PORT].len++;
2326 break;
2327
2328 case s_http_userinfo:
2329 if (s != s_http_userinfo) {
2330 u->field_data[UF_USERINFO].off = p - buf ;
2331 u->field_data[UF_USERINFO].len = 0;
2332 u->field_set |= (1 << UF_USERINFO);
2333 }
2334 u->field_data[UF_USERINFO].len++;
2335 break;
2336
2337 default:
2338 break;
2339 }
2340 s = new_s;
2341 }
2342
2343 /* Make sure we don't end somewhere unexpected */
2344 switch (s) {
2345 case s_http_host_start:
2346 case s_http_host_v6_start:
2347 case s_http_host_v6:
2348 case s_http_host_v6_zone_start:
2349 case s_http_host_v6_zone:
2350 case s_http_host_port_start:
2351 case s_http_userinfo:
2352 case s_http_userinfo_start:
2353 return 1;
2354 default:
2355 break;
2356 }
2357
2358 return 0;
2359 }
2360
2361 void
http_parser_url_init(struct http_parser_url * u)2362 http_parser_url_init(struct http_parser_url *u) {
2363 memset(u, 0, sizeof(*u));
2364 }
2365
2366 int
http_parser_parse_url(const char * buf,size_t buflen,int is_connect,struct http_parser_url * u)2367 http_parser_parse_url(const char *buf, size_t buflen, int is_connect,
2368 struct http_parser_url *u)
2369 {
2370 enum state s;
2371 const char *p;
2372 enum http_parser_url_fields uf, old_uf;
2373 int found_at = 0;
2374
2375 u->port = u->field_set = 0;
2376 s = is_connect ? s_req_server_start : s_req_spaces_before_url;
2377 old_uf = UF_MAX;
2378
2379 for (p = buf; p < buf + buflen; p++) {
2380 s = parse_url_char(s, *p);
2381
2382 /* Figure out the next field that we're operating on */
2383 switch (s) {
2384 case s_dead:
2385 return 1;
2386
2387 /* Skip delimeters */
2388 case s_req_schema_slash:
2389 case s_req_schema_slash_slash:
2390 case s_req_server_start:
2391 case s_req_query_string_start:
2392 case s_req_fragment_start:
2393 continue;
2394
2395 case s_req_schema:
2396 uf = UF_SCHEMA;
2397 break;
2398
2399 case s_req_server_with_at:
2400 found_at = 1;
2401
2402 /* falls through */
2403 case s_req_server:
2404 uf = UF_HOST;
2405 break;
2406
2407 case s_req_path:
2408 uf = UF_PATH;
2409 break;
2410
2411 case s_req_query_string:
2412 uf = UF_QUERY;
2413 break;
2414
2415 case s_req_fragment:
2416 uf = UF_FRAGMENT;
2417 break;
2418
2419 default:
2420 assert(!"Unexpected state");
2421 return 1;
2422 }
2423
2424 /* Nothing's changed; soldier on */
2425 if (uf == old_uf) {
2426 u->field_data[uf].len++;
2427 continue;
2428 }
2429
2430 u->field_data[uf].off = p - buf;
2431 u->field_data[uf].len = 1;
2432
2433 u->field_set |= (1 << uf);
2434 old_uf = uf;
2435 }
2436
2437 /* host must be present if there is a schema */
2438 /* parsing http:///toto will fail */
2439 if ((u->field_set & (1 << UF_SCHEMA)) &&
2440 (u->field_set & (1 << UF_HOST)) == 0) {
2441 return 1;
2442 }
2443
2444 if (u->field_set & (1 << UF_HOST)) {
2445 if (http_parse_host(buf, u, found_at) != 0) {
2446 return 1;
2447 }
2448 }
2449
2450 /* CONNECT requests can only contain "hostname:port" */
2451 if (is_connect && u->field_set != ((1 << UF_HOST)|(1 << UF_PORT))) {
2452 return 1;
2453 }
2454
2455 if (u->field_set & (1 << UF_PORT)) {
2456 /* Don't bother with endp; we've already validated the string */
2457 unsigned long v = strtoul(buf + u->field_data[UF_PORT].off, NULL, 10);
2458
2459 /* Ports have a max value of 2^16 */
2460 if (v > 0xffff) {
2461 return 1;
2462 }
2463
2464 u->port = (uint16_t) v;
2465 }
2466
2467 return 0;
2468 }
2469
2470 void
http_parser_pause(http_parser * parser,int paused)2471 http_parser_pause(http_parser *parser, int paused) {
2472 /* Users should only be pausing/unpausing a parser that is not in an error
2473 * state. In non-debug builds, there's not much that we can do about this
2474 * other than ignore it.
2475 */
2476 if (HTTP_PARSER_ERRNO(parser) == HPE_OK ||
2477 HTTP_PARSER_ERRNO(parser) == HPE_PAUSED) {
2478 SET_ERRNO((paused) ? HPE_PAUSED : HPE_OK);
2479 } else {
2480 assert(0 && "Attempting to pause parser in error state");
2481 }
2482 }
2483
2484 int
http_body_is_final(const struct http_parser * parser)2485 http_body_is_final(const struct http_parser *parser) {
2486 return parser->state == s_message_done;
2487 }
2488
2489 unsigned long
http_parser_version(void)2490 http_parser_version(void) {
2491 return HTTP_PARSER_VERSION_MAJOR * 0x10000 |
2492 HTTP_PARSER_VERSION_MINOR * 0x00100 |
2493 HTTP_PARSER_VERSION_PATCH * 0x00001;
2494 }
2495