1 /* Based on src/http/ngx_http_parse.c from NGINX copyright Igor Sysoev
2  *
3  * Additional changes are licensed under the same terms as NGINX and
4  * copyright Joyent, Inc. and other Node contributors. All rights reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22  * IN THE SOFTWARE.
23  */
24 #include "http_parser.h"
25 #include <assert.h>
26 #include <stddef.h>
27 #include <ctype.h>
28 #include <stdlib.h>
29 #include <string.h>
30 #include <limits.h>
31 
32 #ifndef ULLONG_MAX
33 # define ULLONG_MAX ((uint64_t) -1) /* 2^64-1 */
34 #endif
35 
36 #ifndef MIN
37 # define MIN(a,b) ((a) < (b) ? (a) : (b))
38 #endif
39 
40 #ifndef ARRAY_SIZE
41 # define ARRAY_SIZE(a) (sizeof(a) / sizeof((a)[0]))
42 #endif
43 
44 #ifndef BIT_AT
45 # define BIT_AT(a, i)                                                \
46   (!!((unsigned int) (a)[(unsigned int) (i) >> 3] &                  \
47    (1 << ((unsigned int) (i) & 7))))
48 #endif
49 
50 #ifndef ELEM_AT
51 # define ELEM_AT(a, i, v) ((unsigned int) (i) < ARRAY_SIZE(a) ? (a)[(i)] : (v))
52 #endif
53 
54 #define SET_ERRNO(e)                                                 \
55 do {                                                                 \
56   parser->http_errno = (e);                                          \
57 } while(0)
58 
59 #define CURRENT_STATE() p_state
60 #define UPDATE_STATE(V) p_state = (enum state) (V);
61 #define RETURN(V)                                                    \
62 do {                                                                 \
63   parser->state = CURRENT_STATE();                                   \
64   return (V);                                                        \
65 } while (0);
66 #define REEXECUTE()                                                  \
67   goto reexecute;                                                    \
68 
69 
70 #ifdef __GNUC__
71 # define LIKELY(X) __builtin_expect(!!(X), 1)
72 # define UNLIKELY(X) __builtin_expect(!!(X), 0)
73 #else
74 # define LIKELY(X) (X)
75 # define UNLIKELY(X) (X)
76 #endif
77 
78 
79 /* Run the notify callback FOR, returning ER if it fails */
80 #define CALLBACK_NOTIFY_(FOR, ER)                                    \
81 do {                                                                 \
82   assert(HTTP_PARSER_ERRNO(parser) == HPE_OK);                       \
83                                                                      \
84   if (LIKELY(settings->on_##FOR)) {                                  \
85     parser->state = CURRENT_STATE();                                 \
86     if (UNLIKELY(0 != settings->on_##FOR(parser))) {                 \
87       SET_ERRNO(HPE_CB_##FOR);                                       \
88     }                                                                \
89     UPDATE_STATE(parser->state);                                     \
90                                                                      \
91     /* We either errored above or got paused; get out */             \
92     if (UNLIKELY(HTTP_PARSER_ERRNO(parser) != HPE_OK)) {             \
93       return (ER);                                                   \
94     }                                                                \
95   }                                                                  \
96 } while (0)
97 
98 /* Run the notify callback FOR and consume the current byte */
99 #define CALLBACK_NOTIFY(FOR)            CALLBACK_NOTIFY_(FOR, p - data + 1)
100 
101 /* Run the notify callback FOR and don't consume the current byte */
102 #define CALLBACK_NOTIFY_NOADVANCE(FOR)  CALLBACK_NOTIFY_(FOR, p - data)
103 
104 /* Run data callback FOR with LEN bytes, returning ER if it fails */
105 #define CALLBACK_DATA_(FOR, LEN, ER)                                 \
106 do {                                                                 \
107   assert(HTTP_PARSER_ERRNO(parser) == HPE_OK);                       \
108                                                                      \
109   if (FOR##_mark) {                                                  \
110     if (LIKELY(settings->on_##FOR)) {                                \
111       parser->state = CURRENT_STATE();                               \
112       if (UNLIKELY(0 !=                                              \
113                    settings->on_##FOR(parser, FOR##_mark, (LEN)))) { \
114         SET_ERRNO(HPE_CB_##FOR);                                     \
115       }                                                              \
116       UPDATE_STATE(parser->state);                                   \
117                                                                      \
118       /* We either errored above or got paused; get out */           \
119       if (UNLIKELY(HTTP_PARSER_ERRNO(parser) != HPE_OK)) {           \
120         return (ER);                                                 \
121       }                                                              \
122     }                                                                \
123     FOR##_mark = NULL;                                               \
124   }                                                                  \
125 } while (0)
126 
127 /* Run the data callback FOR and consume the current byte */
128 #define CALLBACK_DATA(FOR)                                           \
129     CALLBACK_DATA_(FOR, p - FOR##_mark, p - data + 1)
130 
131 /* Run the data callback FOR and don't consume the current byte */
132 #define CALLBACK_DATA_NOADVANCE(FOR)                                 \
133     CALLBACK_DATA_(FOR, p - FOR##_mark, p - data)
134 
135 /* Set the mark FOR; non-destructive if mark is already set */
136 #define MARK(FOR)                                                    \
137 do {                                                                 \
138   if (!FOR##_mark) {                                                 \
139     FOR##_mark = p;                                                  \
140   }                                                                  \
141 } while (0)
142 
143 /* Don't allow the total size of the HTTP headers (including the status
144  * line) to exceed HTTP_MAX_HEADER_SIZE.  This check is here to protect
145  * embedders against denial-of-service attacks where the attacker feeds
146  * us a never-ending header that the embedder keeps buffering.
147  *
148  * This check is arguably the responsibility of embedders but we're doing
149  * it on the embedder's behalf because most won't bother and this way we
150  * make the web a little safer.  HTTP_MAX_HEADER_SIZE is still far bigger
151  * than any reasonable request or response so this should never affect
152  * day-to-day operation.
153  */
154 #define COUNT_HEADER_SIZE(V)                                         \
155 do {                                                                 \
156   parser->nread += (V);                                              \
157   if (UNLIKELY(parser->nread > (HTTP_MAX_HEADER_SIZE))) {            \
158     SET_ERRNO(HPE_HEADER_OVERFLOW);                                  \
159     goto error;                                                      \
160   }                                                                  \
161 } while (0)
162 
163 
164 #define PROXY_CONNECTION "proxy-connection"
165 #define CONNECTION "connection"
166 #define CONTENT_LENGTH "content-length"
167 #define TRANSFER_ENCODING "transfer-encoding"
168 #define UPGRADE "upgrade"
169 #define CHUNKED "chunked"
170 #define KEEP_ALIVE "keep-alive"
171 #define CLOSE "close"
172 
173 
174 static const char *method_strings[] =
175   {
176 #define XX(num, name, string) #string,
177   HTTP_METHOD_MAP(XX)
178 #undef XX
179   };
180 
181 
182 /* Tokens as defined by rfc 2616. Also lowercases them.
183  *        token       = 1*<any CHAR except CTLs or separators>
184  *     separators     = "(" | ")" | "<" | ">" | "@"
185  *                    | "," | ";" | ":" | "\" | <">
186  *                    | "/" | "[" | "]" | "?" | "="
187  *                    | "{" | "}" | SP | HT
188  */
189 static const char tokens[256] = {
190 /*   0 nul    1 soh    2 stx    3 etx    4 eot    5 enq    6 ack    7 bel  */
191         0,       0,       0,       0,       0,       0,       0,       0,
192 /*   8 bs     9 ht    10 nl    11 vt    12 np    13 cr    14 so    15 si   */
193         0,       0,       0,       0,       0,       0,       0,       0,
194 /*  16 dle   17 dc1   18 dc2   19 dc3   20 dc4   21 nak   22 syn   23 etb */
195         0,       0,       0,       0,       0,       0,       0,       0,
196 /*  24 can   25 em    26 sub   27 esc   28 fs    29 gs    30 rs    31 us  */
197         0,       0,       0,       0,       0,       0,       0,       0,
198 /*  32 sp    33  !    34  "    35  #    36  $    37  %    38  &    39  '  */
199         0,      '!',      0,      '#',     '$',     '%',     '&',    '\'',
200 /*  40  (    41  )    42  *    43  +    44  ,    45  -    46  .    47  /  */
201         0,       0,      '*',     '+',      0,      '-',     '.',      0,
202 /*  48  0    49  1    50  2    51  3    52  4    53  5    54  6    55  7  */
203        '0',     '1',     '2',     '3',     '4',     '5',     '6',     '7',
204 /*  56  8    57  9    58  :    59  ;    60  <    61  =    62  >    63  ?  */
205        '8',     '9',      0,       0,       0,       0,       0,       0,
206 /*  64  @    65  A    66  B    67  C    68  D    69  E    70  F    71  G  */
207         0,      'a',     'b',     'c',     'd',     'e',     'f',     'g',
208 /*  72  H    73  I    74  J    75  K    76  L    77  M    78  N    79  O  */
209        'h',     'i',     'j',     'k',     'l',     'm',     'n',     'o',
210 /*  80  P    81  Q    82  R    83  S    84  T    85  U    86  V    87  W  */
211        'p',     'q',     'r',     's',     't',     'u',     'v',     'w',
212 /*  88  X    89  Y    90  Z    91  [    92  \    93  ]    94  ^    95  _  */
213        'x',     'y',     'z',      0,       0,       0,      '^',     '_',
214 /*  96  `    97  a    98  b    99  c   100  d   101  e   102  f   103  g  */
215        '`',     'a',     'b',     'c',     'd',     'e',     'f',     'g',
216 /* 104  h   105  i   106  j   107  k   108  l   109  m   110  n   111  o  */
217        'h',     'i',     'j',     'k',     'l',     'm',     'n',     'o',
218 /* 112  p   113  q   114  r   115  s   116  t   117  u   118  v   119  w  */
219        'p',     'q',     'r',     's',     't',     'u',     'v',     'w',
220 /* 120  x   121  y   122  z   123  {   124  |   125  }   126  ~   127 del */
221        'x',     'y',     'z',      0,      '|',      0,      '~',       0 };
222 
223 
224 static const int8_t unhex[256] =
225   {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
226   ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
227   ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
228   , 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1
229   ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1
230   ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
231   ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1
232   ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
233   };
234 
235 
236 #if HTTP_PARSER_STRICT
237 # define T(v) 0
238 #else
239 # define T(v) v
240 #endif
241 
242 
243 static const uint8_t normal_url_char[32] = {
244 /*   0 nul    1 soh    2 stx    3 etx    4 eot    5 enq    6 ack    7 bel  */
245         0    |   0    |   0    |   0    |   0    |   0    |   0    |   0,
246 /*   8 bs     9 ht    10 nl    11 vt    12 np    13 cr    14 so    15 si   */
247         0    | T(2)   |   0    |   0    | T(16)  |   0    |   0    |   0,
248 /*  16 dle   17 dc1   18 dc2   19 dc3   20 dc4   21 nak   22 syn   23 etb */
249         0    |   0    |   0    |   0    |   0    |   0    |   0    |   0,
250 /*  24 can   25 em    26 sub   27 esc   28 fs    29 gs    30 rs    31 us  */
251         0    |   0    |   0    |   0    |   0    |   0    |   0    |   0,
252 /*  32 sp    33  !    34  "    35  #    36  $    37  %    38  &    39  '  */
253         0    |   2    |   4    |   0    |   16   |   32   |   64   |  128,
254 /*  40  (    41  )    42  *    43  +    44  ,    45  -    46  .    47  /  */
255         1    |   2    |   4    |   8    |   16   |   32   |   64   |  128,
256 /*  48  0    49  1    50  2    51  3    52  4    53  5    54  6    55  7  */
257         1    |   2    |   4    |   8    |   16   |   32   |   64   |  128,
258 /*  56  8    57  9    58  :    59  ;    60  <    61  =    62  >    63  ?  */
259         1    |   2    |   4    |   8    |   16   |   32   |   64   |   0,
260 /*  64  @    65  A    66  B    67  C    68  D    69  E    70  F    71  G  */
261         1    |   2    |   4    |   8    |   16   |   32   |   64   |  128,
262 /*  72  H    73  I    74  J    75  K    76  L    77  M    78  N    79  O  */
263         1    |   2    |   4    |   8    |   16   |   32   |   64   |  128,
264 /*  80  P    81  Q    82  R    83  S    84  T    85  U    86  V    87  W  */
265         1    |   2    |   4    |   8    |   16   |   32   |   64   |  128,
266 /*  88  X    89  Y    90  Z    91  [    92  \    93  ]    94  ^    95  _  */
267         1    |   2    |   4    |   8    |   16   |   32   |   64   |  128,
268 /*  96  `    97  a    98  b    99  c   100  d   101  e   102  f   103  g  */
269         1    |   2    |   4    |   8    |   16   |   32   |   64   |  128,
270 /* 104  h   105  i   106  j   107  k   108  l   109  m   110  n   111  o  */
271         1    |   2    |   4    |   8    |   16   |   32   |   64   |  128,
272 /* 112  p   113  q   114  r   115  s   116  t   117  u   118  v   119  w  */
273         1    |   2    |   4    |   8    |   16   |   32   |   64   |  128,
274 /* 120  x   121  y   122  z   123  {   124  |   125  }   126  ~   127 del */
275         1    |   2    |   4    |   8    |   16   |   32   |   64   |   0, };
276 
277 #undef T
278 
279 enum state
280   { s_dead = 1 /* important that this is > 0 */
281 
282   , s_start_req_or_res
283   , s_res_or_resp_I /* for ICY URIs */
284   , s_res_or_resp_H
285   , s_start_res
286   , s_res_I         /* for ICY URIs */
287   , s_res_IC        /* for ICY URIs */
288   , s_res_H
289   , s_res_HT
290   , s_res_HTT
291   , s_res_HTTP
292   , s_res_first_http_major
293   , s_res_http_major
294   , s_res_first_http_minor
295   , s_res_http_minor
296   , s_res_first_status_code
297   , s_res_status_code
298   , s_res_status_start
299   , s_res_status
300   , s_res_line_almost_done
301 
302   , s_start_req
303 
304   , s_req_method
305   , s_req_spaces_before_url
306   , s_req_schema
307   , s_req_schema_slash
308   , s_req_schema_slash_slash
309   , s_req_server_start
310   , s_req_server
311   , s_req_server_with_at
312   , s_req_path
313   , s_req_query_string_start
314   , s_req_query_string
315   , s_req_fragment_start
316   , s_req_fragment
317   , s_req_http_start
318   , s_req_http_H
319   , s_req_http_HT
320   , s_req_http_HTT
321   , s_req_http_HTTP
322   , s_req_first_http_major
323   , s_req_http_major
324   , s_req_first_http_minor
325   , s_req_http_minor
326   , s_req_line_almost_done
327 
328   , s_header_field_start
329   , s_header_field
330   , s_header_value_discard_ws
331   , s_header_value_discard_ws_almost_done
332   , s_header_value_discard_lws
333   , s_header_value_start
334   , s_header_value
335   , s_header_value_lws
336 
337   , s_header_almost_done
338 
339   , s_chunk_size_start
340   , s_chunk_size
341   , s_chunk_parameters
342   , s_chunk_size_almost_done
343 
344   , s_headers_almost_done
345   , s_headers_done
346 
347   /* Important: 's_headers_done' must be the last 'header' state. All
348    * states beyond this must be 'body' states. It is used for overflow
349    * checking. See the PARSING_HEADER() macro.
350    */
351 
352   , s_chunk_data
353   , s_chunk_data_almost_done
354   , s_chunk_data_done
355 
356   , s_body_identity
357   , s_body_identity_eof
358 
359   , s_message_done
360   };
361 
362 
363 #define PARSING_HEADER(state) (state <= s_headers_done)
364 
365 
366 enum header_states
367   { h_general = 0
368   , h_C
369   , h_CO
370   , h_CON
371 
372   , h_matching_connection
373   , h_matching_proxy_connection
374   , h_matching_content_length
375   , h_matching_transfer_encoding
376   , h_matching_upgrade
377 
378   , h_connection
379   , h_content_length
380   , h_transfer_encoding
381   , h_upgrade
382 
383   , h_matching_transfer_encoding_chunked
384   , h_matching_connection_token_start
385   , h_matching_connection_keep_alive
386   , h_matching_connection_close
387   , h_matching_connection_upgrade
388   , h_matching_connection_token
389 
390   , h_transfer_encoding_chunked
391   , h_connection_keep_alive
392   , h_connection_close
393   , h_connection_upgrade
394   };
395 
396 enum http_host_state
397   {
398     s_http_host_dead = 1
399   , s_http_userinfo_start
400   , s_http_userinfo
401   , s_http_host_start
402   , s_http_host_v6_start
403   , s_http_host
404   , s_http_host_v6
405   , s_http_host_v6_end
406   , s_http_host_v6_zone_start
407   , s_http_host_v6_zone
408   , s_http_host_port_start
409   , s_http_host_port
410 };
411 
412 /* Macros for character classes; depends on strict-mode  */
413 #define CR                  '\r'
414 #define LF                  '\n'
415 #define LOWER(c)            (unsigned char)(c | 0x20)
416 #define IS_ALPHA(c)         (LOWER(c) >= 'a' && LOWER(c) <= 'z')
417 #define IS_NUM(c)           ((c) >= '0' && (c) <= '9')
418 #define IS_ALPHANUM(c)      (IS_ALPHA(c) || IS_NUM(c))
419 #define IS_HEX(c)           (IS_NUM(c) || (LOWER(c) >= 'a' && LOWER(c) <= 'f'))
420 #define IS_MARK(c)          ((c) == '-' || (c) == '_' || (c) == '.' || \
421   (c) == '!' || (c) == '~' || (c) == '*' || (c) == '\'' || (c) == '(' || \
422   (c) == ')')
423 #define IS_USERINFO_CHAR(c) (IS_ALPHANUM(c) || IS_MARK(c) || (c) == '%' || \
424   (c) == ';' || (c) == ':' || (c) == '&' || (c) == '=' || (c) == '+' || \
425   (c) == '$' || (c) == ',')
426 
427 #define STRICT_TOKEN(c)     (tokens[(unsigned char)c])
428 
429 #if HTTP_PARSER_STRICT
430 #define TOKEN(c)            (tokens[(unsigned char)c])
431 #define IS_URL_CHAR(c)      (BIT_AT(normal_url_char, (unsigned char)c))
432 #define IS_HOST_CHAR(c)     (IS_ALPHANUM(c) || (c) == '.' || (c) == '-')
433 #else
434 #define TOKEN(c)            ((c == ' ') ? ' ' : tokens[(unsigned char)c])
435 #define IS_URL_CHAR(c)                                                         \
436   (BIT_AT(normal_url_char, (unsigned char)c) || ((c) & 0x80))
437 #define IS_HOST_CHAR(c)                                                        \
438   (IS_ALPHANUM(c) || (c) == '.' || (c) == '-' || (c) == '_')
439 #endif
440 
441 /**
442  * Verify that a char is a valid visible (printable) US-ASCII
443  * character or %x80-FF
444  **/
445 #define IS_HEADER_CHAR(ch)                                                     \
446   (ch == CR || ch == LF || ch == 9 || ((unsigned char)ch > 31 && ch != 127))
447 
448 #define start_state (parser->type == HTTP_REQUEST ? s_start_req : s_start_res)
449 
450 
451 #if HTTP_PARSER_STRICT
452 # define STRICT_CHECK(cond)                                          \
453 do {                                                                 \
454   if (cond) {                                                        \
455     SET_ERRNO(HPE_STRICT);                                           \
456     goto error;                                                      \
457   }                                                                  \
458 } while (0)
459 # define NEW_MESSAGE() (http_should_keep_alive(parser) ? start_state : s_dead)
460 #else
461 # define STRICT_CHECK(cond)
462 # define NEW_MESSAGE() start_state
463 #endif
464 
465 
466 /* Map errno values to strings for human-readable output */
467 #define HTTP_STRERROR_GEN(n, s) { "HPE_" #n, s },
468 static struct {
469   const char *name;
470   const char *description;
471 } http_strerror_tab[] = {
472   HTTP_ERRNO_MAP(HTTP_STRERROR_GEN)
473 };
474 #undef HTTP_STRERROR_GEN
475 
476 int http_message_needs_eof(const http_parser *parser);
477 
478 /* Our URL parser.
479  *
480  * This is designed to be shared by http_parser_execute() for URL validation,
481  * hence it has a state transition + byte-for-byte interface. In addition, it
482  * is meant to be embedded in http_parser_parse_url(), which does the dirty
483  * work of turning state transitions URL components for its API.
484  *
485  * This function should only be invoked with non-space characters. It is
486  * assumed that the caller cares about (and can detect) the transition between
487  * URL and non-URL states by looking for these.
488  */
489 static enum state
parse_url_char(enum state s,const char ch)490 parse_url_char(enum state s, const char ch)
491 {
492   if (ch == ' ' || ch == '\r' || ch == '\n') {
493     return s_dead;
494   }
495 
496 #if HTTP_PARSER_STRICT
497   if (ch == '\t' || ch == '\f') {
498     return s_dead;
499   }
500 #endif
501 
502   switch (s) {
503     case s_req_spaces_before_url:
504       /* Proxied requests are followed by scheme of an absolute URI (alpha).
505        * All methods except CONNECT are followed by '/' or '*'.
506        */
507 
508       if (ch == '/' || ch == '*') {
509         return s_req_path;
510       }
511 
512       if (IS_ALPHA(ch)) {
513         return s_req_schema;
514       }
515 
516       break;
517 
518     case s_req_schema:
519       if (IS_ALPHA(ch)) {
520         return s;
521       }
522 
523       if (ch == ':') {
524         return s_req_schema_slash;
525       }
526 
527       break;
528 
529     case s_req_schema_slash:
530       if (ch == '/') {
531         return s_req_schema_slash_slash;
532       }
533 
534       break;
535 
536     case s_req_schema_slash_slash:
537       if (ch == '/') {
538         return s_req_server_start;
539       }
540 
541       break;
542 
543     case s_req_server_with_at:
544       if (ch == '@') {
545         return s_dead;
546       }
547 
548     /* FALLTHROUGH */
549     case s_req_server_start:
550     case s_req_server:
551       if (ch == '/') {
552         return s_req_path;
553       }
554 
555       if (ch == '?') {
556         return s_req_query_string_start;
557       }
558 
559       if (ch == '@') {
560         return s_req_server_with_at;
561       }
562 
563       if (IS_USERINFO_CHAR(ch) || ch == '[' || ch == ']') {
564         return s_req_server;
565       }
566 
567       break;
568 
569     case s_req_path:
570       if (IS_URL_CHAR(ch)) {
571         return s;
572       }
573 
574       switch (ch) {
575         case '?':
576           return s_req_query_string_start;
577 
578         case '#':
579           return s_req_fragment_start;
580       }
581 
582       break;
583 
584     case s_req_query_string_start:
585     case s_req_query_string:
586       if (IS_URL_CHAR(ch)) {
587         return s_req_query_string;
588       }
589 
590       switch (ch) {
591         case '?':
592           /* allow extra '?' in query string */
593           return s_req_query_string;
594 
595         case '#':
596           return s_req_fragment_start;
597       }
598 
599       break;
600 
601     case s_req_fragment_start:
602       if (IS_URL_CHAR(ch)) {
603         return s_req_fragment;
604       }
605 
606       switch (ch) {
607         case '?':
608           return s_req_fragment;
609 
610         case '#':
611           return s;
612       }
613 
614       break;
615 
616     case s_req_fragment:
617       if (IS_URL_CHAR(ch)) {
618         return s;
619       }
620 
621       switch (ch) {
622         case '?':
623         case '#':
624           return s;
625       }
626 
627       break;
628 
629     default:
630       break;
631   }
632 
633   /* We should never fall out of the switch above unless there's an error */
634   return s_dead;
635 }
636 
http_parser_execute(http_parser * parser,const http_parser_settings * settings,const char * data,size_t len)637 size_t http_parser_execute (http_parser *parser,
638                             const http_parser_settings *settings,
639                             const char *data,
640                             size_t len)
641 {
642   char c, ch;
643   int8_t unhex_val;
644   const char *p = data;
645   const char *header_field_mark = 0;
646   const char *header_value_mark = 0;
647   const char *url_mark = 0;
648   const char *body_mark = 0;
649   const char *status_mark = 0;
650   enum state p_state = (enum state) parser->state;
651   const unsigned int lenient = parser->lenient_http_headers;
652 
653   /* We're in an error state. Don't bother doing anything. */
654   if (HTTP_PARSER_ERRNO(parser) != HPE_OK) {
655     return 0;
656   }
657 
658   if (len == 0) {
659     switch (CURRENT_STATE()) {
660       case s_body_identity_eof:
661         /* Use of CALLBACK_NOTIFY() here would erroneously return 1 byte read if
662          * we got paused.
663          */
664         CALLBACK_NOTIFY_NOADVANCE(message_complete);
665         return 0;
666 
667       case s_dead:
668       case s_start_req_or_res:
669       case s_start_res:
670       case s_start_req:
671         return 0;
672 
673       default:
674         SET_ERRNO(HPE_INVALID_EOF_STATE);
675         return 1;
676     }
677   }
678 
679 
680   if (CURRENT_STATE() == s_header_field)
681     header_field_mark = data;
682   if (CURRENT_STATE() == s_header_value)
683     header_value_mark = data;
684   switch (CURRENT_STATE()) {
685   case s_req_path:
686   case s_req_schema:
687   case s_req_schema_slash:
688   case s_req_schema_slash_slash:
689   case s_req_server_start:
690   case s_req_server:
691   case s_req_server_with_at:
692   case s_req_query_string_start:
693   case s_req_query_string:
694   case s_req_fragment_start:
695   case s_req_fragment:
696     url_mark = data;
697     break;
698   case s_res_status:
699     status_mark = data;
700     break;
701   default:
702     break;
703   }
704 
705   for (p=data; p != data + len; p++) {
706     ch = *p;
707 
708     if (PARSING_HEADER(CURRENT_STATE()))
709       COUNT_HEADER_SIZE(1);
710 
711 reexecute:
712     switch (CURRENT_STATE()) {
713 
714       case s_dead:
715         /* this state is used after a 'Connection: close' message
716          * the parser will error out if it reads another message
717          */
718         if (LIKELY(ch == CR || ch == LF))
719           break;
720 
721         SET_ERRNO(HPE_CLOSED_CONNECTION);
722         goto error;
723 
724       case s_start_req_or_res:
725       {
726         if (ch == CR || ch == LF)
727           break;
728         parser->flags = 0;
729         parser->content_length = ULLONG_MAX;
730 
731         if (ch == 'H') {
732           UPDATE_STATE(s_res_or_resp_H);
733 
734           CALLBACK_NOTIFY(message_begin);
735         } else if (ch == 'I') {
736           UPDATE_STATE(s_res_or_resp_I);
737 
738           CALLBACK_NOTIFY(message_begin);
739         } else {
740           parser->type = HTTP_REQUEST;
741           UPDATE_STATE(s_start_req);
742           REEXECUTE();
743         }
744 
745         break;
746       }
747 
748       case s_res_or_resp_I: /* ICY URI case */
749         if (ch == 'C') {
750           parser->type = HTTP_RESPONSE;
751           UPDATE_STATE(s_res_IC);
752         }
753         break;
754 
755       case s_res_or_resp_H:
756         if (ch == 'T') {
757           parser->type = HTTP_RESPONSE;
758           UPDATE_STATE(s_res_HT);
759         } else {
760           if (UNLIKELY(ch != 'E')) {
761             SET_ERRNO(HPE_INVALID_CONSTANT);
762             goto error;
763           }
764 
765           parser->type = HTTP_REQUEST;
766           parser->method = HTTP_HEAD;
767           parser->index = 2;
768           UPDATE_STATE(s_req_method);
769         }
770         break;
771 
772       case s_start_res:
773       {
774         parser->flags = 0;
775         parser->content_length = ULLONG_MAX;
776 
777         switch (ch) {
778           case 'H':
779             UPDATE_STATE(s_res_H);
780             break;
781           case 'I': /* ICY URI */
782             UPDATE_STATE(s_res_I);
783             break;
784           case CR:
785           case LF:
786             break;
787 
788           default:
789             SET_ERRNO(HPE_INVALID_CONSTANT);
790             goto error;
791         }
792 
793         CALLBACK_NOTIFY(message_begin);
794         break;
795       }
796       case s_res_I:
797         STRICT_CHECK(ch != 'C');
798         UPDATE_STATE(s_res_IC);
799         break;
800 
801       case s_res_IC:
802         STRICT_CHECK(ch != 'Y');
803         UPDATE_STATE(s_res_http_minor);
804         break;
805 
806       case s_res_H:
807         STRICT_CHECK(ch != 'T');
808         UPDATE_STATE(s_res_HT);
809         break;
810 
811       case s_res_HT:
812         STRICT_CHECK(ch != 'T');
813         UPDATE_STATE(s_res_HTT);
814         break;
815 
816       case s_res_HTT:
817         STRICT_CHECK(ch != 'P');
818         UPDATE_STATE(s_res_HTTP);
819         break;
820 
821       case s_res_HTTP:
822         STRICT_CHECK(ch != '/');
823         UPDATE_STATE(s_res_first_http_major);
824         break;
825 
826       case s_res_first_http_major:
827         if (UNLIKELY(ch < '0' || ch > '9')) {
828           SET_ERRNO(HPE_INVALID_VERSION);
829           goto error;
830         }
831 
832         parser->http_major = ch - '0';
833         UPDATE_STATE(s_res_http_major);
834         break;
835 
836       /* major HTTP version or dot */
837       case s_res_http_major:
838       {
839         if (ch == '.') {
840           UPDATE_STATE(s_res_first_http_minor);
841           break;
842         }
843 
844         if (!IS_NUM(ch)) {
845           SET_ERRNO(HPE_INVALID_VERSION);
846           goto error;
847         }
848 
849         parser->http_major *= 10;
850         parser->http_major += ch - '0';
851 
852         if (UNLIKELY(parser->http_major > 999)) {
853           SET_ERRNO(HPE_INVALID_VERSION);
854           goto error;
855         }
856 
857         break;
858       }
859 
860       /* first digit of minor HTTP version */
861       case s_res_first_http_minor:
862         if (UNLIKELY(!IS_NUM(ch))) {
863           SET_ERRNO(HPE_INVALID_VERSION);
864           goto error;
865         }
866 
867         parser->http_minor = ch - '0';
868         UPDATE_STATE(s_res_http_minor);
869         break;
870 
871       /* minor HTTP version or end of request line */
872       case s_res_http_minor:
873       {
874         if (ch == ' ') {
875           UPDATE_STATE(s_res_first_status_code);
876           break;
877         }
878 
879         if (UNLIKELY(!IS_NUM(ch))) {
880           SET_ERRNO(HPE_INVALID_VERSION);
881           goto error;
882         }
883 
884         parser->http_minor *= 10;
885         parser->http_minor += ch - '0';
886 
887         if (UNLIKELY(parser->http_minor > 999)) {
888           SET_ERRNO(HPE_INVALID_VERSION);
889           goto error;
890         }
891 
892         break;
893       }
894 
895       case s_res_first_status_code:
896       {
897         if (!IS_NUM(ch)) {
898           if (ch == ' ') {
899             break;
900           }
901 
902           SET_ERRNO(HPE_INVALID_STATUS);
903           goto error;
904         }
905         parser->status_code = ch - '0';
906         UPDATE_STATE(s_res_status_code);
907         break;
908       }
909 
910       case s_res_status_code:
911       {
912         if (!IS_NUM(ch)) {
913           switch (ch) {
914             case ' ':
915               UPDATE_STATE(s_res_status_start);
916               break;
917             case CR:
918               UPDATE_STATE(s_res_line_almost_done);
919               break;
920             case LF:
921               UPDATE_STATE(s_header_field_start);
922               break;
923             default:
924               SET_ERRNO(HPE_INVALID_STATUS);
925               goto error;
926           }
927           break;
928         }
929 
930         parser->status_code *= 10;
931         parser->status_code += ch - '0';
932 
933         if (UNLIKELY(parser->status_code > 999)) {
934           SET_ERRNO(HPE_INVALID_STATUS);
935           goto error;
936         }
937 
938         break;
939       }
940 
941       case s_res_status_start:
942       {
943         if (ch == CR) {
944           UPDATE_STATE(s_res_line_almost_done);
945           break;
946         }
947 
948         if (ch == LF) {
949           UPDATE_STATE(s_header_field_start);
950           break;
951         }
952 
953         MARK(status);
954         UPDATE_STATE(s_res_status);
955         parser->index = 0;
956         break;
957       }
958 
959       case s_res_status:
960         if (ch == CR) {
961           UPDATE_STATE(s_res_line_almost_done);
962           CALLBACK_DATA(status);
963           break;
964         }
965 
966         if (ch == LF) {
967           UPDATE_STATE(s_header_field_start);
968           CALLBACK_DATA(status);
969           break;
970         }
971 
972         break;
973 
974       case s_res_line_almost_done:
975         STRICT_CHECK(ch != LF);
976         UPDATE_STATE(s_header_field_start);
977         break;
978 
979       case s_start_req:
980       {
981         if (ch == CR || ch == LF)
982           break;
983         parser->flags = 0;
984         parser->content_length = ULLONG_MAX;
985 
986         if (UNLIKELY(!IS_ALPHA(ch))) {
987           SET_ERRNO(HPE_INVALID_METHOD);
988           goto error;
989         }
990 
991         parser->method = (enum http_method) 0;
992         parser->index = 1;
993         switch (ch) {
994           case 'A': parser->method = HTTP_ACL; break;
995           case 'B': parser->method = HTTP_BIND; break;
996           case 'C': parser->method = HTTP_CONNECT; /* or COPY, CHECKOUT */ break;
997           case 'D': parser->method = HTTP_DELETE; break;
998           case 'G': parser->method = HTTP_GET; break;
999           case 'H': parser->method = HTTP_HEAD; break;
1000           case 'L': parser->method = HTTP_LOCK; /* or LINK */ break;
1001           case 'M': parser->method = HTTP_MKCOL; /* or MOVE, MKACTIVITY, MERGE, M-SEARCH, MKCALENDAR */ break;
1002           case 'N': parser->method = HTTP_NOTIFY; break;
1003           case 'O': parser->method = HTTP_OPTIONS; break;
1004           case 'P': parser->method = HTTP_POST;
1005             /* or PROPFIND|PROPPATCH|PUT|PATCH|PURGE */
1006             break;
1007           case 'R': parser->method = HTTP_REPORT; /* or REBIND */ break;
1008           case 'S': parser->method = HTTP_SUBSCRIBE; /* or SEARCH */ break;
1009           case 'T': parser->method = HTTP_TRACE; break;
1010           case 'U': parser->method = HTTP_UNLOCK; /* or UNSUBSCRIBE, UNBIND, UNLINK */ break;
1011           default:
1012             SET_ERRNO(HPE_INVALID_METHOD);
1013             goto error;
1014         }
1015         UPDATE_STATE(s_req_method);
1016 
1017         CALLBACK_NOTIFY(message_begin);
1018 
1019         break;
1020       }
1021 
1022       case s_req_method:
1023       {
1024         const char *matcher;
1025         if (UNLIKELY(ch == '\0')) {
1026           SET_ERRNO(HPE_INVALID_METHOD);
1027           goto error;
1028         }
1029 
1030         matcher = method_strings[parser->method];
1031         if (ch == ' ' && matcher[parser->index] == '\0') {
1032           UPDATE_STATE(s_req_spaces_before_url);
1033         } else if (ch == matcher[parser->index]) {
1034           ; /* nada */
1035         } else if (IS_ALPHA(ch)) {
1036 
1037           switch (parser->method << 16 | parser->index << 8 | ch) {
1038 #define XX(meth, pos, ch, new_meth) \
1039             case (HTTP_##meth << 16 | pos << 8 | ch): \
1040               parser->method = HTTP_##new_meth; break;
1041 
1042             XX(POST,      1, 'U', PUT)
1043             XX(POST,      1, 'A', PATCH)
1044             XX(CONNECT,   1, 'H', CHECKOUT)
1045             XX(CONNECT,   2, 'P', COPY)
1046             XX(MKCOL,     1, 'O', MOVE)
1047             XX(MKCOL,     1, 'E', MERGE)
1048             XX(MKCOL,     2, 'A', MKACTIVITY)
1049             XX(MKCOL,     3, 'A', MKCALENDAR)
1050             XX(SUBSCRIBE, 1, 'E', SEARCH)
1051             XX(REPORT,    2, 'B', REBIND)
1052             XX(POST,      1, 'R', PROPFIND)
1053             XX(PROPFIND,  4, 'P', PROPPATCH)
1054             XX(PUT,       2, 'R', PURGE)
1055             XX(LOCK,      1, 'I', LINK)
1056             XX(UNLOCK,    2, 'S', UNSUBSCRIBE)
1057             XX(UNLOCK,    2, 'B', UNBIND)
1058             XX(UNLOCK,    3, 'I', UNLINK)
1059 #undef XX
1060 
1061             default:
1062               SET_ERRNO(HPE_INVALID_METHOD);
1063               goto error;
1064           }
1065         } else if (ch == '-' &&
1066                    parser->index == 1 &&
1067                    parser->method == HTTP_MKCOL) {
1068           parser->method = HTTP_MSEARCH;
1069         } else {
1070           SET_ERRNO(HPE_INVALID_METHOD);
1071           goto error;
1072         }
1073 
1074         ++parser->index;
1075         break;
1076       }
1077 
1078       case s_req_spaces_before_url:
1079       {
1080         if (ch == ' ') break;
1081 
1082         MARK(url);
1083         if (parser->method == HTTP_CONNECT) {
1084           UPDATE_STATE(s_req_server_start);
1085         }
1086 
1087         UPDATE_STATE(parse_url_char(CURRENT_STATE(), ch));
1088         if (UNLIKELY(CURRENT_STATE() == s_dead)) {
1089           SET_ERRNO(HPE_INVALID_URL);
1090           goto error;
1091         }
1092 
1093         break;
1094       }
1095 
1096       case s_req_schema:
1097       case s_req_schema_slash:
1098       case s_req_schema_slash_slash:
1099       case s_req_server_start:
1100       {
1101         switch (ch) {
1102           /* No whitespace allowed here */
1103           case ' ':
1104           case CR:
1105           case LF:
1106             SET_ERRNO(HPE_INVALID_URL);
1107             goto error;
1108           default:
1109             UPDATE_STATE(parse_url_char(CURRENT_STATE(), ch));
1110             if (UNLIKELY(CURRENT_STATE() == s_dead)) {
1111               SET_ERRNO(HPE_INVALID_URL);
1112               goto error;
1113             }
1114         }
1115 
1116         break;
1117       }
1118 
1119       case s_req_server:
1120       case s_req_server_with_at:
1121       case s_req_path:
1122       case s_req_query_string_start:
1123       case s_req_query_string:
1124       case s_req_fragment_start:
1125       case s_req_fragment:
1126       {
1127         switch (ch) {
1128           case ' ':
1129             UPDATE_STATE(s_req_http_start);
1130             CALLBACK_DATA(url);
1131             break;
1132           case CR:
1133           case LF:
1134             parser->http_major = 0;
1135             parser->http_minor = 9;
1136             UPDATE_STATE((ch == CR) ?
1137               s_req_line_almost_done :
1138               s_header_field_start);
1139             CALLBACK_DATA(url);
1140             break;
1141           default:
1142             UPDATE_STATE(parse_url_char(CURRENT_STATE(), ch));
1143             if (UNLIKELY(CURRENT_STATE() == s_dead)) {
1144               SET_ERRNO(HPE_INVALID_URL);
1145               goto error;
1146             }
1147         }
1148         break;
1149       }
1150 
1151       case s_req_http_start:
1152         switch (ch) {
1153           case 'H':
1154             UPDATE_STATE(s_req_http_H);
1155             break;
1156           case ' ':
1157             break;
1158           default:
1159             SET_ERRNO(HPE_INVALID_CONSTANT);
1160             goto error;
1161         }
1162         break;
1163 
1164       case s_req_http_H:
1165         STRICT_CHECK(ch != 'T');
1166         UPDATE_STATE(s_req_http_HT);
1167         break;
1168 
1169       case s_req_http_HT:
1170         STRICT_CHECK(ch != 'T');
1171         UPDATE_STATE(s_req_http_HTT);
1172         break;
1173 
1174       case s_req_http_HTT:
1175         STRICT_CHECK(ch != 'P');
1176         UPDATE_STATE(s_req_http_HTTP);
1177         break;
1178 
1179       case s_req_http_HTTP:
1180         STRICT_CHECK(ch != '/');
1181         UPDATE_STATE(s_req_first_http_major);
1182         break;
1183 
1184       /* first digit of major HTTP version */
1185       case s_req_first_http_major:
1186         if (UNLIKELY(ch < '1' || ch > '9')) {
1187           SET_ERRNO(HPE_INVALID_VERSION);
1188           goto error;
1189         }
1190 
1191         parser->http_major = ch - '0';
1192         UPDATE_STATE(s_req_http_major);
1193         break;
1194 
1195       /* major HTTP version or dot */
1196       case s_req_http_major:
1197       {
1198         if (ch == '.') {
1199           UPDATE_STATE(s_req_first_http_minor);
1200           break;
1201         }
1202 
1203         if (UNLIKELY(!IS_NUM(ch))) {
1204           SET_ERRNO(HPE_INVALID_VERSION);
1205           goto error;
1206         }
1207 
1208         parser->http_major *= 10;
1209         parser->http_major += ch - '0';
1210 
1211         if (UNLIKELY(parser->http_major > 999)) {
1212           SET_ERRNO(HPE_INVALID_VERSION);
1213           goto error;
1214         }
1215 
1216         break;
1217       }
1218 
1219       /* first digit of minor HTTP version */
1220       case s_req_first_http_minor:
1221         if (UNLIKELY(!IS_NUM(ch))) {
1222           SET_ERRNO(HPE_INVALID_VERSION);
1223           goto error;
1224         }
1225 
1226         parser->http_minor = ch - '0';
1227         UPDATE_STATE(s_req_http_minor);
1228         break;
1229 
1230       /* minor HTTP version or end of request line */
1231       case s_req_http_minor:
1232       {
1233         if (ch == CR) {
1234           UPDATE_STATE(s_req_line_almost_done);
1235           break;
1236         }
1237 
1238         if (ch == LF) {
1239           UPDATE_STATE(s_header_field_start);
1240           break;
1241         }
1242 
1243         /* XXX allow spaces after digit? */
1244 
1245         if (UNLIKELY(!IS_NUM(ch))) {
1246           SET_ERRNO(HPE_INVALID_VERSION);
1247           goto error;
1248         }
1249 
1250         parser->http_minor *= 10;
1251         parser->http_minor += ch - '0';
1252 
1253         if (UNLIKELY(parser->http_minor > 999)) {
1254           SET_ERRNO(HPE_INVALID_VERSION);
1255           goto error;
1256         }
1257 
1258         break;
1259       }
1260 
1261       /* end of request line */
1262       case s_req_line_almost_done:
1263       {
1264         if (UNLIKELY(ch != LF)) {
1265           SET_ERRNO(HPE_LF_EXPECTED);
1266           goto error;
1267         }
1268 
1269         UPDATE_STATE(s_header_field_start);
1270         break;
1271       }
1272 
1273       case s_header_field_start:
1274       {
1275         if (ch == CR) {
1276           UPDATE_STATE(s_headers_almost_done);
1277           break;
1278         }
1279 
1280         if (ch == LF) {
1281           /* they might be just sending \n instead of \r\n so this would be
1282            * the second \n to denote the end of headers*/
1283           UPDATE_STATE(s_headers_almost_done);
1284           REEXECUTE();
1285         }
1286 
1287         c = TOKEN(ch);
1288 
1289         if (UNLIKELY(!c)) {
1290           SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
1291           goto error;
1292         }
1293 
1294         MARK(header_field);
1295 
1296         parser->index = 0;
1297         UPDATE_STATE(s_header_field);
1298 
1299         switch (c) {
1300           case 'c':
1301             parser->header_state = h_C;
1302             break;
1303 
1304           case 'p':
1305             parser->header_state = h_matching_proxy_connection;
1306             break;
1307 
1308           case 't':
1309             parser->header_state = h_matching_transfer_encoding;
1310             break;
1311 
1312           case 'u':
1313             parser->header_state = h_matching_upgrade;
1314             break;
1315 
1316           default:
1317             parser->header_state = h_general;
1318             break;
1319         }
1320         break;
1321       }
1322 
1323       case s_header_field:
1324       {
1325         const char* start = p;
1326         for (; p != data + len; p++) {
1327           ch = *p;
1328           c = TOKEN(ch);
1329 
1330           if (!c)
1331             break;
1332 
1333           switch (parser->header_state) {
1334             case h_general:
1335               break;
1336 
1337             case h_C:
1338               parser->index++;
1339               parser->header_state = (c == 'o' ? h_CO : h_general);
1340               break;
1341 
1342             case h_CO:
1343               parser->index++;
1344               parser->header_state = (c == 'n' ? h_CON : h_general);
1345               break;
1346 
1347             case h_CON:
1348               parser->index++;
1349               switch (c) {
1350                 case 'n':
1351                   parser->header_state = h_matching_connection;
1352                   break;
1353                 case 't':
1354                   parser->header_state = h_matching_content_length;
1355                   break;
1356                 default:
1357                   parser->header_state = h_general;
1358                   break;
1359               }
1360               break;
1361 
1362             /* connection */
1363 
1364             case h_matching_connection:
1365               parser->index++;
1366               if (parser->index > sizeof(CONNECTION)-1
1367                   || c != CONNECTION[parser->index]) {
1368                 parser->header_state = h_general;
1369               } else if (parser->index == sizeof(CONNECTION)-2) {
1370                 parser->header_state = h_connection;
1371               }
1372               break;
1373 
1374             /* proxy-connection */
1375 
1376             case h_matching_proxy_connection:
1377               parser->index++;
1378               if (parser->index > sizeof(PROXY_CONNECTION)-1
1379                   || c != PROXY_CONNECTION[parser->index]) {
1380                 parser->header_state = h_general;
1381               } else if (parser->index == sizeof(PROXY_CONNECTION)-2) {
1382                 parser->header_state = h_connection;
1383               }
1384               break;
1385 
1386             /* content-length */
1387 
1388             case h_matching_content_length:
1389               parser->index++;
1390               if (parser->index > sizeof(CONTENT_LENGTH)-1
1391                   || c != CONTENT_LENGTH[parser->index]) {
1392                 parser->header_state = h_general;
1393               } else if (parser->index == sizeof(CONTENT_LENGTH)-2) {
1394                 if (parser->flags & F_CONTENTLENGTH) {
1395                   SET_ERRNO(HPE_UNEXPECTED_CONTENT_LENGTH);
1396                   goto error;
1397                 }
1398                 parser->header_state = h_content_length;
1399                 parser->flags |= F_CONTENTLENGTH;
1400               }
1401               break;
1402 
1403             /* transfer-encoding */
1404 
1405             case h_matching_transfer_encoding:
1406               parser->index++;
1407               if (parser->index > sizeof(TRANSFER_ENCODING)-1
1408                   || c != TRANSFER_ENCODING[parser->index]) {
1409                 parser->header_state = h_general;
1410               } else if (parser->index == sizeof(TRANSFER_ENCODING)-2) {
1411                 parser->header_state = h_transfer_encoding;
1412               }
1413               break;
1414 
1415             /* upgrade */
1416 
1417             case h_matching_upgrade:
1418               parser->index++;
1419               if (parser->index > sizeof(UPGRADE)-1
1420                   || c != UPGRADE[parser->index]) {
1421                 parser->header_state = h_general;
1422               } else if (parser->index == sizeof(UPGRADE)-2) {
1423                 parser->header_state = h_upgrade;
1424               }
1425               break;
1426 
1427             case h_connection:
1428             case h_content_length:
1429             case h_transfer_encoding:
1430             case h_upgrade:
1431               if (ch != ' ') parser->header_state = h_general;
1432               break;
1433 
1434             default:
1435               assert(0 && "Unknown header_state");
1436               break;
1437           }
1438         }
1439 
1440         COUNT_HEADER_SIZE(p - start);
1441 
1442         if (p == data + len) {
1443           --p;
1444           break;
1445         }
1446 
1447         if (ch == ':') {
1448           UPDATE_STATE(s_header_value_discard_ws);
1449           CALLBACK_DATA(header_field);
1450           break;
1451         }
1452 
1453         SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
1454         goto error;
1455       }
1456 
1457       case s_header_value_discard_ws:
1458         if (ch == ' ' || ch == '\t') break;
1459 
1460         if (ch == CR) {
1461           UPDATE_STATE(s_header_value_discard_ws_almost_done);
1462           break;
1463         }
1464 
1465         if (ch == LF) {
1466           UPDATE_STATE(s_header_value_discard_lws);
1467           break;
1468         }
1469 
1470         /* FALLTHROUGH */
1471 
1472       case s_header_value_start:
1473       {
1474         MARK(header_value);
1475 
1476         UPDATE_STATE(s_header_value);
1477         parser->index = 0;
1478 
1479         c = LOWER(ch);
1480 
1481         switch (parser->header_state) {
1482           case h_upgrade:
1483             parser->flags |= F_UPGRADE;
1484             parser->header_state = h_general;
1485             break;
1486 
1487           case h_transfer_encoding:
1488             /* looking for 'Transfer-Encoding: chunked' */
1489             if ('c' == c) {
1490               parser->header_state = h_matching_transfer_encoding_chunked;
1491             } else {
1492               parser->header_state = h_general;
1493             }
1494             break;
1495 
1496           case h_content_length:
1497             if (UNLIKELY(!IS_NUM(ch))) {
1498               SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1499               goto error;
1500             }
1501 
1502             parser->content_length = ch - '0';
1503             break;
1504 
1505           case h_connection:
1506             /* looking for 'Connection: keep-alive' */
1507             if (c == 'k') {
1508               parser->header_state = h_matching_connection_keep_alive;
1509             /* looking for 'Connection: close' */
1510             } else if (c == 'c') {
1511               parser->header_state = h_matching_connection_close;
1512             } else if (c == 'u') {
1513               parser->header_state = h_matching_connection_upgrade;
1514             } else {
1515               parser->header_state = h_matching_connection_token;
1516             }
1517             break;
1518 
1519           /* Multi-value `Connection` header */
1520           case h_matching_connection_token_start:
1521             break;
1522 
1523           default:
1524             parser->header_state = h_general;
1525             break;
1526         }
1527         break;
1528       }
1529 
1530       case s_header_value:
1531       {
1532         const char* start = p;
1533         enum header_states h_state = (enum header_states) parser->header_state;
1534         for (; p != data + len; p++) {
1535           ch = *p;
1536           if (ch == CR) {
1537             UPDATE_STATE(s_header_almost_done);
1538             parser->header_state = h_state;
1539             CALLBACK_DATA(header_value);
1540             break;
1541           }
1542 
1543           if (ch == LF) {
1544             UPDATE_STATE(s_header_almost_done);
1545             COUNT_HEADER_SIZE(p - start);
1546             parser->header_state = h_state;
1547             CALLBACK_DATA_NOADVANCE(header_value);
1548             REEXECUTE();
1549           }
1550 
1551           if (!lenient && !IS_HEADER_CHAR(ch)) {
1552             SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
1553             goto error;
1554           }
1555 
1556           c = LOWER(ch);
1557 
1558           switch (h_state) {
1559             case h_general:
1560             {
1561               const char* p_cr;
1562               const char* p_lf;
1563               size_t limit = data + len - p;
1564 
1565               limit = MIN(limit, HTTP_MAX_HEADER_SIZE);
1566 
1567               p_cr = (const char*) memchr(p, CR, limit);
1568               p_lf = (const char*) memchr(p, LF, limit);
1569               if (p_cr != NULL) {
1570                 if (p_lf != NULL && p_cr >= p_lf)
1571                   p = p_lf;
1572                 else
1573                   p = p_cr;
1574               } else if (UNLIKELY(p_lf != NULL)) {
1575                 p = p_lf;
1576               } else {
1577                 p = data + len;
1578               }
1579               --p;
1580 
1581               break;
1582             }
1583 
1584             case h_connection:
1585             case h_transfer_encoding:
1586               assert(0 && "Shouldn't get here.");
1587               break;
1588 
1589             case h_content_length:
1590             {
1591               uint64_t t;
1592 
1593               if (ch == ' ') break;
1594 
1595               if (UNLIKELY(!IS_NUM(ch))) {
1596                 SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1597                 parser->header_state = h_state;
1598                 goto error;
1599               }
1600 
1601               t = parser->content_length;
1602               t *= 10;
1603               t += ch - '0';
1604 
1605               /* Overflow? Test against a conservative limit for simplicity. */
1606               if (UNLIKELY((ULLONG_MAX - 10) / 10 < parser->content_length)) {
1607                 SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1608                 parser->header_state = h_state;
1609                 goto error;
1610               }
1611 
1612               parser->content_length = t;
1613               break;
1614             }
1615 
1616             /* Transfer-Encoding: chunked */
1617             case h_matching_transfer_encoding_chunked:
1618               parser->index++;
1619               if (parser->index > sizeof(CHUNKED)-1
1620                   || c != CHUNKED[parser->index]) {
1621                 h_state = h_general;
1622               } else if (parser->index == sizeof(CHUNKED)-2) {
1623                 h_state = h_transfer_encoding_chunked;
1624               }
1625               break;
1626 
1627             case h_matching_connection_token_start:
1628               /* looking for 'Connection: keep-alive' */
1629               if (c == 'k') {
1630                 h_state = h_matching_connection_keep_alive;
1631               /* looking for 'Connection: close' */
1632               } else if (c == 'c') {
1633                 h_state = h_matching_connection_close;
1634               } else if (c == 'u') {
1635                 h_state = h_matching_connection_upgrade;
1636               } else if (STRICT_TOKEN(c)) {
1637                 h_state = h_matching_connection_token;
1638               } else if (c == ' ' || c == '\t') {
1639                 /* Skip lws */
1640               } else {
1641                 h_state = h_general;
1642               }
1643               break;
1644 
1645             /* looking for 'Connection: keep-alive' */
1646             case h_matching_connection_keep_alive:
1647               parser->index++;
1648               if (parser->index > sizeof(KEEP_ALIVE)-1
1649                   || c != KEEP_ALIVE[parser->index]) {
1650                 h_state = h_matching_connection_token;
1651               } else if (parser->index == sizeof(KEEP_ALIVE)-2) {
1652                 h_state = h_connection_keep_alive;
1653               }
1654               break;
1655 
1656             /* looking for 'Connection: close' */
1657             case h_matching_connection_close:
1658               parser->index++;
1659               if (parser->index > sizeof(CLOSE)-1 || c != CLOSE[parser->index]) {
1660                 h_state = h_matching_connection_token;
1661               } else if (parser->index == sizeof(CLOSE)-2) {
1662                 h_state = h_connection_close;
1663               }
1664               break;
1665 
1666             /* looking for 'Connection: upgrade' */
1667             case h_matching_connection_upgrade:
1668               parser->index++;
1669               if (parser->index > sizeof(UPGRADE) - 1 ||
1670                   c != UPGRADE[parser->index]) {
1671                 h_state = h_matching_connection_token;
1672               } else if (parser->index == sizeof(UPGRADE)-2) {
1673                 h_state = h_connection_upgrade;
1674               }
1675               break;
1676 
1677             case h_matching_connection_token:
1678               if (ch == ',') {
1679                 h_state = h_matching_connection_token_start;
1680                 parser->index = 0;
1681               }
1682               break;
1683 
1684             case h_transfer_encoding_chunked:
1685               if (ch != ' ') h_state = h_general;
1686               break;
1687 
1688             case h_connection_keep_alive:
1689             case h_connection_close:
1690             case h_connection_upgrade:
1691               if (ch == ',') {
1692                 if (h_state == h_connection_keep_alive) {
1693                   parser->flags |= F_CONNECTION_KEEP_ALIVE;
1694                 } else if (h_state == h_connection_close) {
1695                   parser->flags |= F_CONNECTION_CLOSE;
1696                 } else if (h_state == h_connection_upgrade) {
1697                   parser->flags |= F_CONNECTION_UPGRADE;
1698                 }
1699                 h_state = h_matching_connection_token_start;
1700                 parser->index = 0;
1701               } else if (ch != ' ') {
1702                 h_state = h_matching_connection_token;
1703               }
1704               break;
1705 
1706             default:
1707               UPDATE_STATE(s_header_value);
1708               h_state = h_general;
1709               break;
1710           }
1711         }
1712         parser->header_state = h_state;
1713 
1714         COUNT_HEADER_SIZE(p - start);
1715 
1716         if (p == data + len)
1717           --p;
1718         break;
1719       }
1720 
1721       case s_header_almost_done:
1722       {
1723         if (UNLIKELY(ch != LF)) {
1724           SET_ERRNO(HPE_LF_EXPECTED);
1725           goto error;
1726         }
1727 
1728         UPDATE_STATE(s_header_value_lws);
1729         break;
1730       }
1731 
1732       case s_header_value_lws:
1733       {
1734         if (ch == ' ' || ch == '\t') {
1735           UPDATE_STATE(s_header_value_start);
1736           REEXECUTE();
1737         }
1738 
1739         /* finished the header */
1740         switch (parser->header_state) {
1741           case h_connection_keep_alive:
1742             parser->flags |= F_CONNECTION_KEEP_ALIVE;
1743             break;
1744           case h_connection_close:
1745             parser->flags |= F_CONNECTION_CLOSE;
1746             break;
1747           case h_transfer_encoding_chunked:
1748             parser->flags |= F_CHUNKED;
1749             break;
1750           case h_connection_upgrade:
1751             parser->flags |= F_CONNECTION_UPGRADE;
1752             break;
1753           default:
1754             break;
1755         }
1756 
1757         UPDATE_STATE(s_header_field_start);
1758         REEXECUTE();
1759       }
1760 
1761       case s_header_value_discard_ws_almost_done:
1762       {
1763         STRICT_CHECK(ch != LF);
1764         UPDATE_STATE(s_header_value_discard_lws);
1765         break;
1766       }
1767 
1768       case s_header_value_discard_lws:
1769       {
1770         if (ch == ' ' || ch == '\t') {
1771           UPDATE_STATE(s_header_value_discard_ws);
1772           break;
1773         } else {
1774           switch (parser->header_state) {
1775             case h_connection_keep_alive:
1776               parser->flags |= F_CONNECTION_KEEP_ALIVE;
1777               break;
1778             case h_connection_close:
1779               parser->flags |= F_CONNECTION_CLOSE;
1780               break;
1781             case h_connection_upgrade:
1782               parser->flags |= F_CONNECTION_UPGRADE;
1783               break;
1784             case h_transfer_encoding_chunked:
1785               parser->flags |= F_CHUNKED;
1786               break;
1787             default:
1788               break;
1789           }
1790 
1791           /* header value was empty */
1792           MARK(header_value);
1793           UPDATE_STATE(s_header_field_start);
1794           CALLBACK_DATA_NOADVANCE(header_value);
1795           REEXECUTE();
1796         }
1797       }
1798 
1799       case s_headers_almost_done:
1800       {
1801         STRICT_CHECK(ch != LF);
1802 
1803         if (parser->flags & F_TRAILING) {
1804           /* End of a chunked request */
1805           UPDATE_STATE(s_message_done);
1806           CALLBACK_NOTIFY_NOADVANCE(chunk_complete);
1807           REEXECUTE();
1808         }
1809 
1810         /* Cannot use chunked encoding and a content-length header together
1811            per the HTTP specification. */
1812         if ((parser->flags & F_CHUNKED) &&
1813             (parser->flags & F_CONTENTLENGTH)) {
1814           SET_ERRNO(HPE_UNEXPECTED_CONTENT_LENGTH);
1815           goto error;
1816         }
1817 
1818         UPDATE_STATE(s_headers_done);
1819 
1820         /* Set this here so that on_headers_complete() callbacks can see it */
1821         parser->upgrade =
1822           ((parser->flags & (F_UPGRADE | F_CONNECTION_UPGRADE)) ==
1823            (F_UPGRADE | F_CONNECTION_UPGRADE) ||
1824            parser->method == HTTP_CONNECT);
1825 
1826         /* Here we call the headers_complete callback. This is somewhat
1827          * different than other callbacks because if the user returns 1, we
1828          * will interpret that as saying that this message has no body. This
1829          * is needed for the annoying case of recieving a response to a HEAD
1830          * request.
1831          *
1832          * We'd like to use CALLBACK_NOTIFY_NOADVANCE() here but we cannot, so
1833          * we have to simulate it by handling a change in errno below.
1834          */
1835         if (settings->on_headers_complete) {
1836           switch (settings->on_headers_complete(parser)) {
1837             case 0:
1838               break;
1839 
1840             case 2:
1841               parser->upgrade = 1;
1842               /* falls through */
1843             case 1:
1844               parser->flags |= F_SKIPBODY;
1845               break;
1846 
1847             default:
1848               SET_ERRNO(HPE_CB_headers_complete);
1849               RETURN(p - data); /* Error */
1850           }
1851         }
1852 
1853         if (HTTP_PARSER_ERRNO(parser) != HPE_OK) {
1854           RETURN(p - data);
1855         }
1856 
1857         REEXECUTE();
1858       }
1859 
1860       case s_headers_done:
1861       {
1862         int hasBody;
1863         STRICT_CHECK(ch != LF);
1864 
1865         parser->nread = 0;
1866 
1867         hasBody = parser->flags & F_CHUNKED ||
1868           (parser->content_length > 0 && parser->content_length != ULLONG_MAX);
1869         if (parser->upgrade && (parser->method == HTTP_CONNECT ||
1870                                 (parser->flags & F_SKIPBODY) || !hasBody)) {
1871           /* Exit, the rest of the message is in a different protocol. */
1872           UPDATE_STATE(NEW_MESSAGE());
1873           CALLBACK_NOTIFY(message_complete);
1874           RETURN((p - data) + 1);
1875         }
1876 
1877         if (parser->flags & F_SKIPBODY) {
1878           UPDATE_STATE(NEW_MESSAGE());
1879           CALLBACK_NOTIFY(message_complete);
1880         } else if (parser->flags & F_CHUNKED) {
1881           /* chunked encoding - ignore Content-Length header */
1882           UPDATE_STATE(s_chunk_size_start);
1883         } else {
1884           if (parser->content_length == 0) {
1885             /* Content-Length header given but zero: Content-Length: 0\r\n */
1886             UPDATE_STATE(NEW_MESSAGE());
1887             CALLBACK_NOTIFY(message_complete);
1888           } else if (parser->content_length != ULLONG_MAX) {
1889             /* Content-Length header given and non-zero */
1890             UPDATE_STATE(s_body_identity);
1891           } else {
1892             if (!http_message_needs_eof(parser)) {
1893               /* Assume content-length 0 - read the next */
1894               UPDATE_STATE(NEW_MESSAGE());
1895               CALLBACK_NOTIFY(message_complete);
1896             } else {
1897               /* Read body until EOF */
1898               UPDATE_STATE(s_body_identity_eof);
1899             }
1900           }
1901         }
1902 
1903         break;
1904       }
1905 
1906       case s_body_identity:
1907       {
1908         uint64_t to_read = MIN(parser->content_length,
1909                                (uint64_t) ((data + len) - p));
1910 
1911         assert(parser->content_length != 0
1912             && parser->content_length != ULLONG_MAX);
1913 
1914         /* The difference between advancing content_length and p is because
1915          * the latter will automaticaly advance on the next loop iteration.
1916          * Further, if content_length ends up at 0, we want to see the last
1917          * byte again for our message complete callback.
1918          */
1919         MARK(body);
1920         parser->content_length -= to_read;
1921         p += to_read - 1;
1922 
1923         if (parser->content_length == 0) {
1924           UPDATE_STATE(s_message_done);
1925 
1926           /* Mimic CALLBACK_DATA_NOADVANCE() but with one extra byte.
1927            *
1928            * The alternative to doing this is to wait for the next byte to
1929            * trigger the data callback, just as in every other case. The
1930            * problem with this is that this makes it difficult for the test
1931            * harness to distinguish between complete-on-EOF and
1932            * complete-on-length. It's not clear that this distinction is
1933            * important for applications, but let's keep it for now.
1934            */
1935           CALLBACK_DATA_(body, p - body_mark + 1, p - data);
1936           REEXECUTE();
1937         }
1938 
1939         break;
1940       }
1941 
1942       /* read until EOF */
1943       case s_body_identity_eof:
1944         MARK(body);
1945         p = data + len - 1;
1946 
1947         break;
1948 
1949       case s_message_done:
1950         UPDATE_STATE(NEW_MESSAGE());
1951         CALLBACK_NOTIFY(message_complete);
1952         if (parser->upgrade) {
1953           /* Exit, the rest of the message is in a different protocol. */
1954           RETURN((p - data) + 1);
1955         }
1956         break;
1957 
1958       case s_chunk_size_start:
1959       {
1960         assert(parser->nread == 1);
1961         assert(parser->flags & F_CHUNKED);
1962 
1963         unhex_val = unhex[(unsigned char)ch];
1964         if (UNLIKELY(unhex_val == -1)) {
1965           SET_ERRNO(HPE_INVALID_CHUNK_SIZE);
1966           goto error;
1967         }
1968 
1969         parser->content_length = unhex_val;
1970         UPDATE_STATE(s_chunk_size);
1971         break;
1972       }
1973 
1974       case s_chunk_size:
1975       {
1976         uint64_t t;
1977 
1978         assert(parser->flags & F_CHUNKED);
1979 
1980         if (ch == CR) {
1981           UPDATE_STATE(s_chunk_size_almost_done);
1982           break;
1983         }
1984 
1985         unhex_val = unhex[(unsigned char)ch];
1986 
1987         if (unhex_val == -1) {
1988           if (ch == ';' || ch == ' ') {
1989             UPDATE_STATE(s_chunk_parameters);
1990             break;
1991           }
1992 
1993           SET_ERRNO(HPE_INVALID_CHUNK_SIZE);
1994           goto error;
1995         }
1996 
1997         t = parser->content_length;
1998         t *= 16;
1999         t += unhex_val;
2000 
2001         /* Overflow? Test against a conservative limit for simplicity. */
2002         if (UNLIKELY((ULLONG_MAX - 16) / 16 < parser->content_length)) {
2003           SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
2004           goto error;
2005         }
2006 
2007         parser->content_length = t;
2008         break;
2009       }
2010 
2011       case s_chunk_parameters:
2012       {
2013         assert(parser->flags & F_CHUNKED);
2014         /* just ignore this shit. TODO check for overflow */
2015         if (ch == CR) {
2016           UPDATE_STATE(s_chunk_size_almost_done);
2017           break;
2018         }
2019         break;
2020       }
2021 
2022       case s_chunk_size_almost_done:
2023       {
2024         assert(parser->flags & F_CHUNKED);
2025         STRICT_CHECK(ch != LF);
2026 
2027         parser->nread = 0;
2028 
2029         if (parser->content_length == 0) {
2030           parser->flags |= F_TRAILING;
2031           UPDATE_STATE(s_header_field_start);
2032         } else {
2033           UPDATE_STATE(s_chunk_data);
2034         }
2035         CALLBACK_NOTIFY(chunk_header);
2036         break;
2037       }
2038 
2039       case s_chunk_data:
2040       {
2041         uint64_t to_read = MIN(parser->content_length,
2042                                (uint64_t) ((data + len) - p));
2043 
2044         assert(parser->flags & F_CHUNKED);
2045         assert(parser->content_length != 0
2046             && parser->content_length != ULLONG_MAX);
2047 
2048         /* See the explanation in s_body_identity for why the content
2049          * length and data pointers are managed this way.
2050          */
2051         MARK(body);
2052         parser->content_length -= to_read;
2053         p += to_read - 1;
2054 
2055         if (parser->content_length == 0) {
2056           UPDATE_STATE(s_chunk_data_almost_done);
2057         }
2058 
2059         break;
2060       }
2061 
2062       case s_chunk_data_almost_done:
2063         assert(parser->flags & F_CHUNKED);
2064         assert(parser->content_length == 0);
2065         STRICT_CHECK(ch != CR);
2066         UPDATE_STATE(s_chunk_data_done);
2067         CALLBACK_DATA(body);
2068         break;
2069 
2070       case s_chunk_data_done:
2071         assert(parser->flags & F_CHUNKED);
2072         STRICT_CHECK(ch != LF);
2073         parser->nread = 0;
2074         UPDATE_STATE(s_chunk_size_start);
2075         CALLBACK_NOTIFY(chunk_complete);
2076         break;
2077 
2078       default:
2079         assert(0 && "unhandled state");
2080         SET_ERRNO(HPE_INVALID_INTERNAL_STATE);
2081         goto error;
2082     }
2083   }
2084 
2085   /* Run callbacks for any marks that we have leftover after we ran our of
2086    * bytes. There should be at most one of these set, so it's OK to invoke
2087    * them in series (unset marks will not result in callbacks).
2088    *
2089    * We use the NOADVANCE() variety of callbacks here because 'p' has already
2090    * overflowed 'data' and this allows us to correct for the off-by-one that
2091    * we'd otherwise have (since CALLBACK_DATA() is meant to be run with a 'p'
2092    * value that's in-bounds).
2093    */
2094 
2095   assert(((header_field_mark ? 1 : 0) +
2096           (header_value_mark ? 1 : 0) +
2097           (url_mark ? 1 : 0)  +
2098           (body_mark ? 1 : 0) +
2099           (status_mark ? 1 : 0)) <= 1);
2100 
2101   CALLBACK_DATA_NOADVANCE(header_field);
2102   CALLBACK_DATA_NOADVANCE(header_value);
2103   CALLBACK_DATA_NOADVANCE(url);
2104   CALLBACK_DATA_NOADVANCE(body);
2105   CALLBACK_DATA_NOADVANCE(status);
2106 
2107   RETURN(len);
2108 
2109 error:
2110   if (HTTP_PARSER_ERRNO(parser) == HPE_OK) {
2111     SET_ERRNO(HPE_UNKNOWN);
2112   }
2113 
2114   RETURN(p - data);
2115 }
2116 
2117 
2118 /* Does the parser need to see an EOF to find the end of the message? */
2119 int
http_message_needs_eof(const http_parser * parser)2120 http_message_needs_eof (const http_parser *parser)
2121 {
2122   if (parser->type == HTTP_REQUEST) {
2123     return 0;
2124   }
2125 
2126   /* See RFC 2616 section 4.4 */
2127   if (parser->status_code / 100 == 1 || /* 1xx e.g. Continue */
2128       parser->status_code == 204 ||     /* No Content */
2129       parser->status_code == 304 ||     /* Not Modified */
2130       parser->flags & F_SKIPBODY) {     /* response to a HEAD request */
2131     return 0;
2132   }
2133 
2134   if ((parser->flags & F_CHUNKED) || parser->content_length != ULLONG_MAX) {
2135     return 0;
2136   }
2137 
2138   return 1;
2139 }
2140 
2141 
2142 int
http_should_keep_alive(const http_parser * parser)2143 http_should_keep_alive (const http_parser *parser)
2144 {
2145   if (parser->http_major > 0 && parser->http_minor > 0) {
2146     /* HTTP/1.1 */
2147     if (parser->flags & F_CONNECTION_CLOSE) {
2148       return 0;
2149     }
2150   } else {
2151     /* HTTP/1.0 or earlier */
2152     if (!(parser->flags & F_CONNECTION_KEEP_ALIVE)) {
2153       return 0;
2154     }
2155   }
2156 
2157   return !http_message_needs_eof(parser);
2158 }
2159 
2160 
2161 const char *
http_method_str(enum http_method m)2162 http_method_str (enum http_method m)
2163 {
2164   return ELEM_AT(method_strings, m, "<unknown>");
2165 }
2166 
2167 
2168 void
http_parser_init(http_parser * parser,enum http_parser_type t)2169 http_parser_init (http_parser *parser, enum http_parser_type t)
2170 {
2171   void *data = parser->data; /* preserve application data */
2172   memset(parser, 0, sizeof(*parser));
2173   parser->data = data;
2174   parser->type = t;
2175   parser->state = (t == HTTP_REQUEST ? s_start_req : (t == HTTP_RESPONSE ? s_start_res : s_start_req_or_res));
2176   parser->http_errno = HPE_OK;
2177 }
2178 
2179 void
http_parser_settings_init(http_parser_settings * settings)2180 http_parser_settings_init(http_parser_settings *settings)
2181 {
2182   memset(settings, 0, sizeof(*settings));
2183 }
2184 
2185 const char *
http_errno_name(enum http_errno err)2186 http_errno_name(enum http_errno err) {
2187   assert(((size_t) err) < ARRAY_SIZE(http_strerror_tab));
2188   return http_strerror_tab[err].name;
2189 }
2190 
2191 const char *
http_errno_description(enum http_errno err)2192 http_errno_description(enum http_errno err) {
2193   assert(((size_t) err) < ARRAY_SIZE(http_strerror_tab));
2194   return http_strerror_tab[err].description;
2195 }
2196 
2197 static enum http_host_state
http_parse_host_char(enum http_host_state s,const char ch)2198 http_parse_host_char(enum http_host_state s, const char ch) {
2199   switch(s) {
2200     case s_http_userinfo:
2201     case s_http_userinfo_start:
2202       if (ch == '@') {
2203         return s_http_host_start;
2204       }
2205 
2206       if (IS_USERINFO_CHAR(ch)) {
2207         return s_http_userinfo;
2208       }
2209       break;
2210 
2211     case s_http_host_start:
2212       if (ch == '[') {
2213         return s_http_host_v6_start;
2214       }
2215 
2216       if (IS_HOST_CHAR(ch)) {
2217         return s_http_host;
2218       }
2219 
2220       break;
2221 
2222     case s_http_host:
2223       if (IS_HOST_CHAR(ch)) {
2224         return s_http_host;
2225       }
2226 
2227     /* FALLTHROUGH */
2228     case s_http_host_v6_end:
2229       if (ch == ':') {
2230         return s_http_host_port_start;
2231       }
2232 
2233       break;
2234 
2235     case s_http_host_v6:
2236       if (ch == ']') {
2237         return s_http_host_v6_end;
2238       }
2239 
2240     /* FALLTHROUGH */
2241     case s_http_host_v6_start:
2242       if (IS_HEX(ch) || ch == ':' || ch == '.') {
2243         return s_http_host_v6;
2244       }
2245 
2246       if (s == s_http_host_v6 && ch == '%') {
2247         return s_http_host_v6_zone_start;
2248       }
2249       break;
2250 
2251     case s_http_host_v6_zone:
2252       if (ch == ']') {
2253         return s_http_host_v6_end;
2254       }
2255 
2256     /* FALLTHROUGH */
2257     case s_http_host_v6_zone_start:
2258       /* RFC 6874 Zone ID consists of 1*( unreserved / pct-encoded) */
2259       if (IS_ALPHANUM(ch) || ch == '%' || ch == '.' || ch == '-' || ch == '_' ||
2260           ch == '~') {
2261         return s_http_host_v6_zone;
2262       }
2263       break;
2264 
2265     case s_http_host_port:
2266     case s_http_host_port_start:
2267       if (IS_NUM(ch)) {
2268         return s_http_host_port;
2269       }
2270 
2271       break;
2272 
2273     default:
2274       break;
2275   }
2276   return s_http_host_dead;
2277 }
2278 
2279 static int
http_parse_host(const char * buf,struct http_parser_url * u,int found_at)2280 http_parse_host(const char * buf, struct http_parser_url *u, int found_at) {
2281   enum http_host_state s;
2282 
2283   const char *p;
2284   size_t buflen = u->field_data[UF_HOST].off + u->field_data[UF_HOST].len;
2285 
2286   assert(u->field_set & (1 << UF_HOST));
2287 
2288   u->field_data[UF_HOST].len = 0;
2289 
2290   s = found_at ? s_http_userinfo_start : s_http_host_start;
2291 
2292   for (p = buf + u->field_data[UF_HOST].off; p < buf + buflen; p++) {
2293     enum http_host_state new_s = http_parse_host_char(s, *p);
2294 
2295     if (new_s == s_http_host_dead) {
2296       return 1;
2297     }
2298 
2299     switch(new_s) {
2300       case s_http_host:
2301         if (s != s_http_host) {
2302           u->field_data[UF_HOST].off = p - buf;
2303         }
2304         u->field_data[UF_HOST].len++;
2305         break;
2306 
2307       case s_http_host_v6:
2308         if (s != s_http_host_v6) {
2309           u->field_data[UF_HOST].off = p - buf;
2310         }
2311         u->field_data[UF_HOST].len++;
2312         break;
2313 
2314       case s_http_host_v6_zone_start:
2315       case s_http_host_v6_zone:
2316         u->field_data[UF_HOST].len++;
2317         break;
2318 
2319       case s_http_host_port:
2320         if (s != s_http_host_port) {
2321           u->field_data[UF_PORT].off = p - buf;
2322           u->field_data[UF_PORT].len = 0;
2323           u->field_set |= (1 << UF_PORT);
2324         }
2325         u->field_data[UF_PORT].len++;
2326         break;
2327 
2328       case s_http_userinfo:
2329         if (s != s_http_userinfo) {
2330           u->field_data[UF_USERINFO].off = p - buf ;
2331           u->field_data[UF_USERINFO].len = 0;
2332           u->field_set |= (1 << UF_USERINFO);
2333         }
2334         u->field_data[UF_USERINFO].len++;
2335         break;
2336 
2337       default:
2338         break;
2339     }
2340     s = new_s;
2341   }
2342 
2343   /* Make sure we don't end somewhere unexpected */
2344   switch (s) {
2345     case s_http_host_start:
2346     case s_http_host_v6_start:
2347     case s_http_host_v6:
2348     case s_http_host_v6_zone_start:
2349     case s_http_host_v6_zone:
2350     case s_http_host_port_start:
2351     case s_http_userinfo:
2352     case s_http_userinfo_start:
2353       return 1;
2354     default:
2355       break;
2356   }
2357 
2358   return 0;
2359 }
2360 
2361 void
http_parser_url_init(struct http_parser_url * u)2362 http_parser_url_init(struct http_parser_url *u) {
2363   memset(u, 0, sizeof(*u));
2364 }
2365 
2366 int
http_parser_parse_url(const char * buf,size_t buflen,int is_connect,struct http_parser_url * u)2367 http_parser_parse_url(const char *buf, size_t buflen, int is_connect,
2368                       struct http_parser_url *u)
2369 {
2370   enum state s;
2371   const char *p;
2372   enum http_parser_url_fields uf, old_uf;
2373   int found_at = 0;
2374 
2375   u->port = u->field_set = 0;
2376   s = is_connect ? s_req_server_start : s_req_spaces_before_url;
2377   old_uf = UF_MAX;
2378 
2379   for (p = buf; p < buf + buflen; p++) {
2380     s = parse_url_char(s, *p);
2381 
2382     /* Figure out the next field that we're operating on */
2383     switch (s) {
2384       case s_dead:
2385         return 1;
2386 
2387       /* Skip delimeters */
2388       case s_req_schema_slash:
2389       case s_req_schema_slash_slash:
2390       case s_req_server_start:
2391       case s_req_query_string_start:
2392       case s_req_fragment_start:
2393         continue;
2394 
2395       case s_req_schema:
2396         uf = UF_SCHEMA;
2397         break;
2398 
2399       case s_req_server_with_at:
2400         found_at = 1;
2401 
2402         /* falls through */
2403       case s_req_server:
2404         uf = UF_HOST;
2405         break;
2406 
2407       case s_req_path:
2408         uf = UF_PATH;
2409         break;
2410 
2411       case s_req_query_string:
2412         uf = UF_QUERY;
2413         break;
2414 
2415       case s_req_fragment:
2416         uf = UF_FRAGMENT;
2417         break;
2418 
2419       default:
2420         assert(!"Unexpected state");
2421         return 1;
2422     }
2423 
2424     /* Nothing's changed; soldier on */
2425     if (uf == old_uf) {
2426       u->field_data[uf].len++;
2427       continue;
2428     }
2429 
2430     u->field_data[uf].off = p - buf;
2431     u->field_data[uf].len = 1;
2432 
2433     u->field_set |= (1 << uf);
2434     old_uf = uf;
2435   }
2436 
2437   /* host must be present if there is a schema */
2438   /* parsing http:///toto will fail */
2439   if ((u->field_set & (1 << UF_SCHEMA)) &&
2440       (u->field_set & (1 << UF_HOST)) == 0) {
2441     return 1;
2442   }
2443 
2444   if (u->field_set & (1 << UF_HOST)) {
2445     if (http_parse_host(buf, u, found_at) != 0) {
2446       return 1;
2447     }
2448   }
2449 
2450   /* CONNECT requests can only contain "hostname:port" */
2451   if (is_connect && u->field_set != ((1 << UF_HOST)|(1 << UF_PORT))) {
2452     return 1;
2453   }
2454 
2455   if (u->field_set & (1 << UF_PORT)) {
2456     /* Don't bother with endp; we've already validated the string */
2457     unsigned long v = strtoul(buf + u->field_data[UF_PORT].off, NULL, 10);
2458 
2459     /* Ports have a max value of 2^16 */
2460     if (v > 0xffff) {
2461       return 1;
2462     }
2463 
2464     u->port = (uint16_t) v;
2465   }
2466 
2467   return 0;
2468 }
2469 
2470 void
http_parser_pause(http_parser * parser,int paused)2471 http_parser_pause(http_parser *parser, int paused) {
2472   /* Users should only be pausing/unpausing a parser that is not in an error
2473    * state. In non-debug builds, there's not much that we can do about this
2474    * other than ignore it.
2475    */
2476   if (HTTP_PARSER_ERRNO(parser) == HPE_OK ||
2477       HTTP_PARSER_ERRNO(parser) == HPE_PAUSED) {
2478     SET_ERRNO((paused) ? HPE_PAUSED : HPE_OK);
2479   } else {
2480     assert(0 && "Attempting to pause parser in error state");
2481   }
2482 }
2483 
2484 int
http_body_is_final(const struct http_parser * parser)2485 http_body_is_final(const struct http_parser *parser) {
2486     return parser->state == s_message_done;
2487 }
2488 
2489 unsigned long
http_parser_version(void)2490 http_parser_version(void) {
2491   return HTTP_PARSER_VERSION_MAJOR * 0x10000 |
2492          HTTP_PARSER_VERSION_MINOR * 0x00100 |
2493          HTTP_PARSER_VERSION_PATCH * 0x00001;
2494 }
2495