1/*
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
9 *
10 *   http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing,
13 * software distributed under the License is distributed on an
14 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 * KIND, either express or implied. See the License for the
16 * specific language governing permissions and limitations
17 * under the License.
18 */
19
20/**
21 * Thrift scanner.
22 *
23 * Tokenizes a thrift definition file.
24 */
25
26%{
27
28/* This is redundant with some of the flags in Makefile.am, but it works
29 * when people override CXXFLAGS without being careful. The pragmas are
30 * the 'right' way to do it, but don't work on old-enough GCC (in particular
31 * the GCC that ship on Mac OS X 10.6.5, *counter* to what the GNU docs say)
32 *
33 * We should revert the Makefile.am changes once Apple ships a reasonable
34 * GCC.
35 */
36#ifdef __GNUC__
37#pragma GCC diagnostic ignored "-Wunused-function"
38#pragma GCC diagnostic ignored "-Wunused-label"
39#endif
40
41#ifdef _MSC_VER
42#pragma warning( push )
43
44// warning C4102: 'find_rule' : unreferenced label
45#pragma warning( disable : 4102 )
46
47// warning C4267: 'argument' : conversion from 'size_t' to 'int', possible loss of data
48#pragma warning( disable : 4267 )
49
50// avoid isatty redefinition
51#define YY_NEVER_INTERACTIVE 1
52
53#define YY_NO_UNISTD_H 1
54#endif
55
56#include <cassert>
57#include <string>
58#include <errno.h>
59#include <stdlib.h>
60
61#ifdef _MSC_VER
62#include "thrift/windows/config.h"
63#endif
64#include "thrift/main.h"
65#include "thrift/common.h"
66#include "thrift/globals.h"
67#include "thrift/parse/t_program.h"
68
69/**
70 * Must be included AFTER parse/t_program.h, but I can't remember why anymore
71 * because I wrote this a while ago.
72 */
73#if defined(BISON_USE_PARSER_H_EXTENSION)
74#include "thrift/thrifty.h"
75#else
76#include "thrift/thrifty.hh"
77#endif
78
79void integer_overflow(const char* text) {
80  yyerror("This integer is too big: \"%s\"\n", text);
81  exit(1);
82}
83
84void unexpected_token(const char* text) {
85  yyerror("Unexpected token in input: \"%s\"\n", text);
86  exit(1);
87}
88
89void error_no_longer_supported(const char* text, const char* replace_with) {
90  yyerror("\"%s\" is no longer supported, use \"%s\" instead. Line %d\n", text, replace_with, yylineno);
91  exit(1);
92}
93
94
95%}
96
97/**
98 * Provides the yylineno global, useful for debugging output
99 */
100%option lex-compat
101
102/**
103 * Our inputs are all single files, so no need for yywrap
104 */
105%option noyywrap
106
107/**
108 * We don't use it, and it fires up warnings at -Wall
109 */
110%option nounput
111
112/**
113 * Helper definitions, comments, constants, and whatnot
114 */
115
116intconstant   ([+-]?[0-9]+)
117hexconstant   ([+-]?"0x"[0-9A-Fa-f]+)
118dubconstant   ([+-]?[0-9]*(\.[0-9]+)?([eE][+-]?[0-9]+)?)
119identifier    ([a-zA-Z_](\.[a-zA-Z_0-9]|[a-zA-Z_0-9])*)
120whitespace    ([ \t\r\n]*)
121sillycomm     ("/*""*"*"*/")
122multicm_begin ("/*")
123doctext_begin ("/**")
124comment       ("//"[^\n]*)
125unixcomment   ("#"[^\n]*)
126symbol        ([:;\,\{\}\(\)\=<>\[\]])
127literal_begin (['\"])
128
129%%
130
131{whitespace}         { /* do nothing */                 }
132{sillycomm}          { /* do nothing */                 }
133
134{doctext_begin} {
135  std::string parsed("/**");
136  int state = 0;  // 0 = normal, 1 = "*" seen, "*/" seen
137  while(state < 2)
138  {
139    int ch = yyinput();
140    parsed.push_back(ch);
141    switch (ch) {
142      case EOF:
143        yyerror("Unexpected end of file in doc-comment at %d\n", yylineno);
144        exit(1);
145      case '*':
146        state = 1;
147        break;
148      case '/':
149        state = (state == 1) ? 2 : 0;
150        break;
151      default:
152        state = 0;
153        break;
154    }
155  }
156  pdebug("doctext = \"%s\"\n",parsed.c_str());
157
158 /* This does not show up in the parse tree. */
159 /* Rather, the parser will grab it out of the global. */
160  if (g_parse_mode == PROGRAM) {
161    clear_doctext();
162    g_doctext = strdup(parsed.c_str() + 3);
163    assert(strlen(g_doctext) >= 2);
164    g_doctext[strlen(g_doctext) - 2] = ' ';
165    g_doctext[strlen(g_doctext) - 1] = '\0';
166    g_doctext = clean_up_doctext(g_doctext);
167    g_doctext_lineno = yylineno;
168    if( (g_program_doctext_candidate == nullptr) && (g_program_doctext_status == INVALID)){
169      g_program_doctext_candidate = strdup(g_doctext);
170      g_program_doctext_lineno = g_doctext_lineno;
171      g_program_doctext_status = STILL_CANDIDATE;
172      pdebug("%s","program doctext set to STILL_CANDIDATE");
173    }
174  }
175}
176
177{multicm_begin}  { /* parsed, but thrown away */
178  std::string parsed("/*");
179  int state = 0;  // 0 = normal, 1 = "*" seen, "*/" seen
180  while(state < 2)
181  {
182    int ch = yyinput();
183    parsed.push_back(ch);
184    switch (ch) {
185      case EOF:
186        yyerror("Unexpected end of file in multiline comment at %d\n", yylineno);
187        exit(1);
188      case '*':
189        state = 1;
190        break;
191      case '/':
192        state = (state == 1) ? 2 : 0;
193        break;
194      default:
195        state = 0;
196        break;
197    }
198  }
199  pdebug("multi_comm = \"%s\"\n",parsed.c_str());
200}
201
202{comment}            { /* do nothing */                 }
203{unixcomment}        { /* do nothing */                 }
204
205{symbol}             { return yytext[0];                }
206"*"                  { return yytext[0];                }
207
208"false"              { yylval.iconst=0; return tok_int_constant; }
209"true"               { yylval.iconst=1; return tok_int_constant; }
210
211"namespace"          { return tok_namespace;            }
212"cpp_include"        { return tok_cpp_include;          }
213"cpp_type"           { return tok_cpp_type;             }
214"xsd_all"            { return tok_xsd_all;              }
215"xsd_optional"       { return tok_xsd_optional;         }
216"xsd_nillable"       { return tok_xsd_nillable;         }
217"xsd_attrs"          { return tok_xsd_attrs;            }
218"include"            { return tok_include;              }
219"void"               { return tok_void;                 }
220"bool"               { return tok_bool;                 }
221"byte"               { emit_byte_type_warning(); return tok_byte; }
222"i8"                 { return tok_i8;                   }
223"i16"                { return tok_i16;                  }
224"i32"                { return tok_i32;                  }
225"i64"                { return tok_i64;                  }
226"double"             { return tok_double;               }
227"string"             { return tok_string;               }
228"binary"             { return tok_binary;               }
229"uuid"               { return tok_uuid;                 }
230"map"                { return tok_map;                  }
231"list"               { return tok_list;                 }
232"set"                { return tok_set;                  }
233"oneway"             { return tok_oneway;               }
234"typedef"            { return tok_typedef;              }
235"struct"             { return tok_struct;               }
236"union"              { return tok_union;                }
237"exception"          { return tok_xception;             }
238"extends"            { return tok_extends;              }
239"throws"             { return tok_throws;               }
240"service"            { return tok_service;              }
241"enum"               { return tok_enum;                 }
242"const"              { return tok_const;                }
243"required"           { return tok_required;             }
244"optional"           { return tok_optional;             }
245"async" {
246  pwarning(0, "\"async\" is deprecated.  It is called \"oneway\" now.\n");
247  return tok_async;
248}
249"&"                  { return tok_reference;            }
250
251{intconstant} {
252  errno = 0;
253  yylval.iconst = strtoll(yytext, nullptr, 10);
254  if (errno == ERANGE) {
255    integer_overflow(yytext);
256  }
257  return tok_int_constant;
258}
259
260{hexconstant} {
261  errno = 0;
262  char sign = yytext[0];
263  int shift = sign == '0' ? 2 : 3;
264  yylval.iconst = strtoll(yytext+shift, nullptr, 16);
265  if (sign == '-') {
266    yylval.iconst = -yylval.iconst;
267  }
268  if (errno == ERANGE) {
269    integer_overflow(yytext);
270  }
271  return tok_int_constant;
272}
273
274{identifier} {
275  yylval.id = strdup(yytext);
276  return tok_identifier;
277}
278
279{dubconstant} {
280 /* Deliberately placed after identifier, since "e10" is NOT a double literal (THRIFT-3477) */
281  yylval.dconst = atof(yytext);
282  return tok_dub_constant;
283}
284
285{literal_begin} {
286  char mark = yytext[0];
287  std::string result;
288  for(;;)
289  {
290    int ch = yyinput();
291    switch (ch) {
292      case EOF:
293        yyerror("End of file while read string at %d\n", yylineno);
294        exit(1);
295      case '\n':
296        yyerror("End of line while read string at %d\n", yylineno - 1);
297        exit(1);
298      case '\\':
299        ch = yyinput();
300        switch (ch) {
301          case 'r':
302            result.push_back('\r');
303            continue;
304          case 'n':
305            result.push_back('\n');
306            continue;
307          case 't':
308            result.push_back('\t');
309            continue;
310          case '"':
311            result.push_back('"');
312            continue;
313          case '\'':
314            result.push_back('\'');
315            continue;
316          case '\\':
317            result.push_back('\\');
318            continue;
319          default:
320            yyerror("Bad escape character\n");
321            return -1;
322        }
323        break;
324      default:
325        if (ch == mark) {
326          yylval.id = strdup(result.c_str());
327          return tok_literal;
328        } else {
329          result.push_back(ch);
330        }
331    }
332  }
333}
334
335
336. {
337  unexpected_token(yytext);
338}
339
340%%
341
342#ifdef _MSC_VER
343#pragma warning( pop )
344#endif
345
346/* vim: filetype=lex
347*/
348