1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one
3  * or more contributor license agreements. See the NOTICE file
4  * distributed with this work for additional information
5  * regarding copyright ownership. The ASF licenses this file
6  * to you under the Apache License, Version 2.0 (the
7  * "License"); you may not use this file except in compliance
8  * with the License. You may obtain a copy of the License at
9  *
10  *   http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing,
13  * software distributed under the License is distributed on an
14  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15  * KIND, either express or implied. See the License for the
16  * specific language governing permissions and limitations
17  * under the License.
18  */
19 
20 /**
21  * thrift - a lightweight cross-language rpc/serialization tool
22  *
23  * This file contains the main compiler engine for Thrift, which invokes the
24  * scanner/parser to build the thrift object tree. The interface generation
25  * code for each language lives in a file by the language name under the
26  * generate/ folder, and all parse structures live in parse/
27  *
28  */
29 
30 #include <cassert>
31 #include <stdlib.h>
32 #include <stdio.h>
33 #include <stdarg.h>
34 #include <time.h>
35 #include <string>
36 #include <algorithm>
37 #include <sys/types.h>
38 #include <sys/stat.h>
39 #include <errno.h>
40 #include <limits.h>
41 
42 #ifdef _WIN32
43 #include <windows.h> /* for GetFullPathName */
44 #endif
45 
46 // Careful: must include globals first for extern definitions
47 #include "thrift/common.h"
48 #include "thrift/globals.h"
49 
50 #include "thrift/platform.h"
51 #include "thrift/main.h"
52 #include "thrift/parse/t_program.h"
53 #include "thrift/parse/t_scope.h"
54 #include "thrift/generate/t_generator.h"
55 #include "thrift/audit/t_audit.h"
56 
57 #include "thrift/version.h"
58 
59 using namespace std;
60 
61 /**
62  * Global program tree
63  */
64 t_program* g_program;
65 
66 /**
67  * Global scope
68  */
69 t_scope* g_scope;
70 
71 /**
72  * Parent scope to also parse types
73  */
74 t_scope* g_parent_scope;
75 
76 /**
77  * Prefix for putting types in parent scope
78  */
79 string g_parent_prefix;
80 
81 /**
82  * Parsing pass
83  */
84 PARSE_MODE g_parse_mode;
85 
86 /**
87  * Current directory of file being parsed
88  */
89 string g_curdir;
90 
91 /**
92  * Current file being parsed
93  */
94 string g_curpath;
95 
96 /**
97  * Search path for inclusions
98  */
99 vector<string> g_incl_searchpath;
100 
101 /**
102  * Global debug state
103  */
104 int g_debug = 0;
105 
106 /**
107  * Strictness level
108  */
109 int g_strict = 127;
110 
111 /**
112  * Warning level
113  */
114 int g_warn = 1;
115 
116 /**
117  * Verbose output
118  */
119 int g_verbose = 0;
120 
121 /**
122  * Global time string
123  */
124 char* g_time_str;
125 
126 /**
127  * The last parsed doctext comment.
128  */
129 char* g_doctext;
130 
131 /**
132  * The First doctext comment
133  */
134 char* g_program_doctext_candidate;
135 
136 /**
137  * Whether or not negative field keys are accepted.
138  */
139 int g_allow_neg_field_keys;
140 
141 /**
142  * Whether or not 64-bit constants will generate a warning.
143  */
144 int g_allow_64bit_consts = 0;
145 
146 /**
147  * Flags to control code generation
148  */
149 bool gen_recurse = false;
150 
151 /**
152  * Flags to control thrift audit
153  */
154 bool g_audit = false;
155 
156 /**
157  * Flag to control return status
158  */
159 bool g_return_failure = false;
160 bool g_audit_fatal = true;
161 bool g_generator_failure = false;
162 
163 /**
164  * Win32 doesn't have realpath, so use fallback implementation in that case,
165  * otherwise this just calls through to realpath
166  */
saferealpath(const char * path,char * resolved_path)167 char* saferealpath(const char* path, char* resolved_path) {
168 #ifdef _WIN32
169   char buf[MAX_PATH];
170   char* basename;
171   DWORD len = GetFullPathNameA(path, MAX_PATH, buf, &basename);
172   if (len == 0 || len > MAX_PATH - 1) {
173     strcpy(resolved_path, path);
174   } else {
175     strcpy(resolved_path, buf);
176   }
177 
178   // Replace backslashes with forward slashes so the
179   // rest of the code behaves correctly.
180   size_t resolved_len = strlen(resolved_path);
181   for (size_t i = 0; i < resolved_len; i++) {
182     if (resolved_path[i] == '\\') {
183       resolved_path[i] = '/';
184     }
185   }
186   return resolved_path;
187 #else
188   return realpath(path, resolved_path);
189 #endif
190 }
191 
check_is_directory(const char * dir_name)192 bool check_is_directory(const char* dir_name) {
193 #ifdef _WIN32
194   DWORD attributes = ::GetFileAttributesA(dir_name);
195   if (attributes == INVALID_FILE_ATTRIBUTES) {
196     fprintf(stderr,
197             "Output directory %s is unusable: GetLastError() = %ld\n",
198             dir_name,
199             GetLastError());
200     return false;
201   }
202   if ((attributes & FILE_ATTRIBUTE_DIRECTORY) != FILE_ATTRIBUTE_DIRECTORY) {
203     fprintf(stderr, "Output directory %s exists but is not a directory\n", dir_name);
204     return false;
205   }
206   return true;
207 #else
208   struct stat sb;
209   if (stat(dir_name, &sb) < 0) {
210     fprintf(stderr, "Output directory %s is unusable: %s\n", dir_name, strerror(errno));
211     return false;
212   }
213   if (!S_ISDIR(sb.st_mode)) {
214     fprintf(stderr, "Output directory %s exists but is not a directory\n", dir_name);
215     return false;
216   }
217   return true;
218 #endif
219 }
220 
221 /**
222  * Report an error to the user. This is called yyerror for historical
223  * reasons (lex and yacc expect the error reporting routine to be called
224  * this). Call this function to report any errors to the user.
225  * yyerror takes printf style arguments.
226  *
227  * @param fmt C format string followed by additional arguments
228  */
yyerror(const char * fmt,...)229 void yyerror(const char* fmt, ...) {
230   va_list args;
231   fprintf(stderr, "[ERROR:%s:%d] (last token was '%s')\n", g_curpath.c_str(), yylineno, yytext);
232 
233   va_start(args, fmt);
234   vfprintf(stderr, fmt, args);
235   va_end(args);
236 
237   fprintf(stderr, "\n");
238 }
239 
240 /**
241  * Prints a debug message from the parser.
242  *
243  * @param fmt C format string followed by additional arguments
244  */
pdebug(const char * fmt,...)245 void pdebug(const char* fmt, ...) {
246   if (g_debug == 0) {
247     return;
248   }
249   va_list args;
250   printf("[PARSE:%d] ", yylineno);
251   va_start(args, fmt);
252   vprintf(fmt, args);
253   va_end(args);
254   printf("\n");
255 }
256 
257 /**
258  * Prints a verbose output mode message
259  *
260  * @param fmt C format string followed by additional arguments
261  */
pverbose(const char * fmt,...)262 void pverbose(const char* fmt, ...) {
263   if (g_verbose == 0) {
264     return;
265   }
266   va_list args;
267   va_start(args, fmt);
268   vprintf(fmt, args);
269   va_end(args);
270 }
271 
272 /**
273  * Prints a warning message
274  *
275  * @param fmt C format string followed by additional arguments
276  */
pwarning(int level,const char * fmt,...)277 void pwarning(int level, const char* fmt, ...) {
278   if (g_warn < level) {
279     return;
280   }
281   va_list args;
282   printf("[WARNING:%s:%d] ", g_curpath.c_str(), yylineno);
283   va_start(args, fmt);
284   vprintf(fmt, args);
285   va_end(args);
286   printf("\n");
287 }
288 
289 /**
290  * Prints a failure message and exits
291  *
292  * @param fmt C format string followed by additional arguments
293  */
failure(const char * fmt,...)294 void failure(const char* fmt, ...) {
295   va_list args;
296   fprintf(stderr, "[FAILURE:%s:%d] ", g_curpath.c_str(), yylineno);
297   va_start(args, fmt);
298   vfprintf(stderr, fmt, args);
299   va_end(args);
300   printf("\n");
301   exit(1);
302 }
303 
304 /**
305  * Converts a string filename into a thrift program name
306  */
program_name(string filename)307 string program_name(string filename) {
308   string::size_type slash = filename.rfind("/");
309   if (slash != string::npos) {
310     filename = filename.substr(slash + 1);
311   }
312   string::size_type dot = filename.rfind(".");
313   if (dot != string::npos) {
314     filename = filename.substr(0, dot);
315   }
316   return filename;
317 }
318 
319 /**
320  * Gets the directory path of a filename
321  */
directory_name(string filename)322 string directory_name(string filename) {
323   string::size_type slash = filename.rfind("/");
324   // No slash, just use the current directory
325   if (slash == string::npos) {
326     return ".";
327   }
328   return filename.substr(0, slash);
329 }
330 
331 /**
332  * Finds the appropriate file path for the given filename
333  */
include_file(string filename)334 string include_file(string filename) {
335   // Absolute path? Just try that
336   if (filename[0] == '/') {
337     // Realpath!
338     char rp[THRIFT_PATH_MAX];
339     // cppcheck-suppress uninitvar
340     if (saferealpath(filename.c_str(), rp) == nullptr) {
341       pwarning(0, "Cannot open include file %s\n", filename.c_str());
342       return std::string();
343     }
344 
345     // Stat this file
346     struct stat finfo;
347     if (stat(rp, &finfo) == 0) {
348       return rp;
349     }
350   } else { // relative path, start searching
351     // new search path with current dir global
352     vector<string> sp = g_incl_searchpath;
353     sp.insert(sp.begin(), g_curdir);
354 
355     // iterate through paths
356     vector<string>::iterator it;
357     for (it = sp.begin(); it != sp.end(); it++) {
358       string sfilename = *(it) + "/" + filename;
359 
360       // Realpath!
361       char rp[THRIFT_PATH_MAX];
362       // cppcheck-suppress uninitvar
363       if (saferealpath(sfilename.c_str(), rp) == nullptr) {
364         continue;
365       }
366 
367       // Stat this files
368       struct stat finfo;
369       if (stat(rp, &finfo) == 0) {
370         return rp;
371       }
372     }
373   }
374 
375   // Uh oh
376   pwarning(0, "Could not find include file %s\n", filename.c_str());
377   return std::string();
378 }
379 
380 /**
381  * Clears any previously stored doctext string.
382  * Also prints a warning if we are discarding information.
383  */
clear_doctext()384 void clear_doctext() {
385   if (g_doctext != nullptr) {
386     pwarning(2, "Uncaptured doctext at on line %d.", g_doctext_lineno);
387   }
388   free(g_doctext);
389   g_doctext = nullptr;
390 }
391 
392 /**
393  * Reset program doctext information after processing a file
394  */
reset_program_doctext_info()395 void reset_program_doctext_info() {
396   if (g_program_doctext_candidate != nullptr) {
397     free(g_program_doctext_candidate);
398     g_program_doctext_candidate = nullptr;
399   }
400   g_program_doctext_lineno = 0;
401   g_program_doctext_status = INVALID;
402   pdebug("%s", "program doctext set to INVALID");
403 }
404 
405 /**
406  * We are sure the program doctext candidate is really the program doctext.
407  */
declare_valid_program_doctext()408 void declare_valid_program_doctext() {
409   if ((g_program_doctext_candidate != nullptr) && (g_program_doctext_status == STILL_CANDIDATE)) {
410     g_program_doctext_status = ABSOLUTELY_SURE;
411     pdebug("%s", "program doctext set to ABSOLUTELY_SURE");
412   } else {
413     g_program_doctext_status = NO_PROGRAM_DOCTEXT;
414     pdebug("%s", "program doctext set to NO_PROGRAM_DOCTEXT");
415   }
416 }
417 
418 /**
419  * Cleans up text commonly found in doxygen-like comments
420  *
421  * Warning: if you mix tabs and spaces in a non-uniform way,
422  * you will get what you deserve.
423  */
clean_up_doctext(char * doctext)424 char* clean_up_doctext(char* doctext) {
425   // Convert to C++ string, and remove Windows's carriage returns.
426   string docstring = doctext;
427   docstring.erase(remove(docstring.begin(), docstring.end(), '\r'), docstring.end());
428 
429   // Separate into lines.
430   vector<string> lines;
431   string::size_type pos = string::npos;
432   string::size_type last;
433   while (true) {
434     last = (pos == string::npos) ? 0 : pos + 1;
435     pos = docstring.find('\n', last);
436     if (pos == string::npos) {
437       // First bit of cleaning.  If the last line is only whitespace, drop it.
438       string::size_type nonwhite = docstring.find_first_not_of(" \t", last);
439       if (nonwhite != string::npos) {
440         lines.push_back(docstring.substr(last));
441       }
442       break;
443     }
444     lines.push_back(docstring.substr(last, pos - last));
445   }
446 
447   // A very profound docstring.
448   if (lines.empty()) {
449     return nullptr;
450   }
451 
452   // Clear leading whitespace from the first line.
453   pos = lines.front().find_first_not_of(" \t");
454   lines.front().erase(0, pos);
455 
456   // If every nonblank line after the first has the same number of spaces/tabs,
457   // then a star, remove them.
458   bool have_prefix = true;
459   bool found_prefix = false;
460   string::size_type prefix_len = 0;
461   vector<string>::iterator l_iter;
462   for (l_iter = lines.begin() + 1; l_iter != lines.end(); ++l_iter) {
463     if (l_iter->empty()) {
464       continue;
465     }
466 
467     pos = l_iter->find_first_not_of(" \t");
468     if (!found_prefix) {
469       if (pos != string::npos) {
470         if (l_iter->at(pos) == '*') {
471           found_prefix = true;
472           prefix_len = pos;
473         } else {
474           have_prefix = false;
475           break;
476         }
477       } else {
478         // Whitespace-only line.  Truncate it.
479         l_iter->clear();
480       }
481     } else if (l_iter->size() > pos && l_iter->at(pos) == '*' && pos == prefix_len) {
482       // Business as usual.
483     } else if (pos == string::npos) {
484       // Whitespace-only line.  Let's truncate it for them.
485       l_iter->clear();
486     } else {
487       // The pattern has been broken.
488       have_prefix = false;
489       break;
490     }
491   }
492 
493   // If our prefix survived, delete it from every line.
494   if (have_prefix) {
495     // Get the star too.
496     prefix_len++;
497     for (l_iter = lines.begin() + 1; l_iter != lines.end(); ++l_iter) {
498       l_iter->erase(0, prefix_len);
499     }
500   }
501 
502   // Now delete the minimum amount of leading whitespace from each line.
503   prefix_len = string::npos;
504   for (l_iter = lines.begin() + 1; l_iter != lines.end(); ++l_iter) {
505     if (l_iter->empty()) {
506       continue;
507     }
508     pos = l_iter->find_first_not_of(" \t");
509     if (pos != string::npos && (prefix_len == string::npos || pos < prefix_len)) {
510       prefix_len = pos;
511     }
512   }
513 
514   // If our prefix survived, delete it from every line.
515   if (prefix_len != string::npos) {
516     for (l_iter = lines.begin() + 1; l_iter != lines.end(); ++l_iter) {
517       l_iter->erase(0, prefix_len);
518     }
519   }
520 
521   // Remove trailing whitespace from every line.
522   for (l_iter = lines.begin(); l_iter != lines.end(); ++l_iter) {
523     pos = l_iter->find_last_not_of(" \t");
524     if (pos != string::npos && pos != l_iter->length() - 1) {
525       l_iter->erase(pos + 1);
526     }
527   }
528 
529   // If the first line is empty, remove it.
530   // Don't do this earlier because a lot of steps skip the first line.
531   if (lines.front().empty()) {
532     lines.erase(lines.begin());
533   }
534 
535   // Now rejoin the lines and copy them back into doctext.
536   docstring.clear();
537   for (l_iter = lines.begin(); l_iter != lines.end(); ++l_iter) {
538     docstring += *l_iter;
539     docstring += '\n';
540   }
541 
542   // assert(docstring.length() <= strlen(doctext));  may happen, see THRIFT-1755
543   if (docstring.length() <= strlen(doctext)) {
544     strcpy(doctext, docstring.c_str());
545   } else {
546     free(doctext); // too short
547     doctext = strdup(docstring.c_str());
548   }
549   return doctext;
550 }
551 
552 /** Set to true to debug docstring parsing */
553 static bool dump_docs = false;
554 
555 /**
556  * Dumps docstrings to stdout
557  * Only works for top-level definitions and the whole program doc
558  * (i.e., not enum constants, struct fields, or functions.
559  */
dump_docstrings(t_program * program)560 void dump_docstrings(t_program* program) {
561   string progdoc = program->get_doc();
562   if (!progdoc.empty()) {
563     printf("Whole program doc:\n%s\n", progdoc.c_str());
564   }
565   const vector<t_typedef*>& typedefs = program->get_typedefs();
566   vector<t_typedef*>::const_iterator t_iter;
567   for (t_iter = typedefs.begin(); t_iter != typedefs.end(); ++t_iter) {
568     t_typedef* td = *t_iter;
569     if (td->has_doc()) {
570       printf("typedef %s:\n%s\n", td->get_name().c_str(), td->get_doc().c_str());
571     }
572   }
573   const vector<t_enum*>& enums = program->get_enums();
574   vector<t_enum*>::const_iterator e_iter;
575   for (e_iter = enums.begin(); e_iter != enums.end(); ++e_iter) {
576     t_enum* en = *e_iter;
577     if (en->has_doc()) {
578       printf("enum %s:\n%s\n", en->get_name().c_str(), en->get_doc().c_str());
579     }
580   }
581   const vector<t_const*>& consts = program->get_consts();
582   vector<t_const*>::const_iterator c_iter;
583   for (c_iter = consts.begin(); c_iter != consts.end(); ++c_iter) {
584     t_const* co = *c_iter;
585     if (co->has_doc()) {
586       printf("const %s:\n%s\n", co->get_name().c_str(), co->get_doc().c_str());
587     }
588   }
589   const vector<t_struct*>& structs = program->get_structs();
590   vector<t_struct*>::const_iterator s_iter;
591   for (s_iter = structs.begin(); s_iter != structs.end(); ++s_iter) {
592     t_struct* st = *s_iter;
593     if (st->has_doc()) {
594       printf("struct %s:\n%s\n", st->get_name().c_str(), st->get_doc().c_str());
595     }
596   }
597   const vector<t_struct*>& xceptions = program->get_xceptions();
598   vector<t_struct*>::const_iterator x_iter;
599   for (x_iter = xceptions.begin(); x_iter != xceptions.end(); ++x_iter) {
600     t_struct* xn = *x_iter;
601     if (xn->has_doc()) {
602       printf("xception %s:\n%s\n", xn->get_name().c_str(), xn->get_doc().c_str());
603     }
604   }
605   const vector<t_service*>& services = program->get_services();
606   vector<t_service*>::const_iterator v_iter;
607   for (v_iter = services.begin(); v_iter != services.end(); ++v_iter) {
608     t_service* sv = *v_iter;
609     if (sv->has_doc()) {
610       printf("service %s:\n%s\n", sv->get_name().c_str(), sv->get_doc().c_str());
611     }
612   }
613 }
614 
615 /**
616  * Emits a warning on list<byte>, binary type is typically a much better choice.
617  */
check_for_list_of_bytes(t_type * list_elem_type)618 void check_for_list_of_bytes(t_type* list_elem_type) {
619   if ((g_parse_mode == PROGRAM) && (list_elem_type != nullptr) && list_elem_type->is_base_type()) {
620     t_base_type* tbase = (t_base_type*)list_elem_type;
621     if (tbase->get_base() == t_base_type::TYPE_I8) {
622       pwarning(1, "Consider using the more efficient \"binary\" type instead of \"list<byte>\".");
623     }
624   }
625 }
626 
627 static bool g_byte_warning_emitted = false;
628 
629 /**
630  * Emits a one-time warning on byte type, promoting the new i8 type instead
631  */
emit_byte_type_warning()632 void emit_byte_type_warning() {
633   if (!g_byte_warning_emitted) {
634     pwarning(1,
635              "The \"byte\" type is a compatibility alias for \"i8\". Use \"i8\" to emphasize the "
636              "signedness of this type.\n");
637     g_byte_warning_emitted = true;
638   }
639 }
640 
641 /**
642  * Prints deprecation notice for old NS declarations that are no longer supported
643  * If new_form is nullptr, old_form is assumed to be a language identifier, such as "cpp"
644  * If new_form is not nullptr, both arguments are used exactly as given
645  */
error_unsupported_namespace_decl(const char * old_form,const char * new_form)646 void error_unsupported_namespace_decl(const char* old_form, const char* new_form) {
647   const char* remainder = "";
648   if( new_form == nullptr) {
649     new_form = old_form;
650     remainder = "_namespace";
651   }
652   failure("Unsupported declaration '%s%s'. Use 'namespace %s' instead.", old_form, remainder, new_form);
653 }
654 
655 /**
656  * Prints the version number
657  */
version()658 void version() {
659   printf("Thrift version %s\n", THRIFT_VERSION);
660 }
661 
662 /**
663  * Display the usage message and then exit with an error code.
664  */
usage()665 void usage() {
666   fprintf(stderr, "Usage: thrift [options] file\n\n");
667   fprintf(stderr, "Use thrift -help for a list of options\n");
668   exit(1);
669 }
670 
671 /**
672  * Diplays the help message and then exits with an error code.
673  */
help()674 void help() {
675   fprintf(stderr, "Usage: thrift [options] file\n");
676   fprintf(stderr, "Options:\n");
677   fprintf(stderr, "  -version    Print the compiler version\n");
678   fprintf(stderr, "  -o dir      Set the output directory for gen-* packages\n");
679   fprintf(stderr, "               (default: current directory)\n");
680   fprintf(stderr, "  -out dir    Set the ouput location for generated files.\n");
681   fprintf(stderr, "               (no gen-* folder will be created)\n");
682   fprintf(stderr, "  -I dir      Add a directory to the list of directories\n");
683   fprintf(stderr, "                searched for include directives\n");
684   fprintf(stderr, "  -nowarn     Suppress all compiler warnings (BAD!)\n");
685   fprintf(stderr, "  -strict     Strict compiler warnings on\n");
686   fprintf(stderr, "  -v[erbose]  Verbose mode\n");
687   fprintf(stderr, "  -r[ecurse]  Also generate included files\n");
688   fprintf(stderr, "  -debug      Parse debug trace to stdout\n");
689   fprintf(stderr,
690           "  --allow-neg-keys  Allow negative field keys (Used to "
691           "preserve protocol\n");
692   fprintf(stderr, "                compatibility with older .thrift files)\n");
693   fprintf(stderr, "  --allow-64bit-consts  Do not print warnings about using 64-bit constants\n");
694   fprintf(stderr, "  --gen STR   Generate code with a dynamically-registered generator.\n");
695   fprintf(stderr, "                STR has the form language[:key1=val1[,key2[,key3=val3]]].\n");
696   fprintf(stderr, "                Keys and values are options passed to the generator.\n");
697   fprintf(stderr, "                Many options will not require values.\n");
698   fprintf(stderr, "\n");
699   fprintf(stderr, "Options related to audit operation\n");
700   fprintf(stderr, "   --audit OldFile   Old Thrift file to be audited with 'file'\n");
701   fprintf(stderr, "  -Iold dir    Add a directory to the list of directories\n");
702   fprintf(stderr, "                searched for include directives for old thrift file\n");
703   fprintf(stderr, "  -Inew dir    Add a directory to the list of directories\n");
704   fprintf(stderr, "                searched for include directives for new thrift file\n");
705   fprintf(stderr, "\n");
706   fprintf(stderr, "Available generators (and options):\n");
707 
708   t_generator_registry::gen_map_t gen_map = t_generator_registry::get_generator_map();
709   t_generator_registry::gen_map_t::iterator iter;
710   for (iter = gen_map.begin(); iter != gen_map.end(); ++iter) {
711     fprintf(stderr,
712             "  %s (%s):\n",
713             iter->second->get_short_name().c_str(),
714             iter->second->get_long_name().c_str());
715     fprintf(stderr, "%s", iter->second->get_documentation().c_str());
716   }
717   exit(1);
718 }
719 
720 /**
721  * You know, when I started working on Thrift I really thought it wasn't going
722  * to become a programming language because it was just a generator and it
723  * wouldn't need runtime type information and all that jazz. But then we
724  * decided to add constants, and all of a sudden that means runtime type
725  * validation and inference, except the "runtime" is the code generator
726  * runtime.
727  */
validate_const_rec(std::string name,t_type * type,t_const_value * value)728 void validate_const_rec(std::string name, t_type* type, t_const_value* value) {
729   if (type->is_void()) {
730     throw "type error: cannot declare a void const: " + name;
731   }
732 
733   if (type->is_base_type()) {
734     t_base_type::t_base tbase = ((t_base_type*)type)->get_base();
735     switch (tbase) {
736     case t_base_type::TYPE_STRING:
737       if (value->get_type() != t_const_value::CV_STRING) {
738         throw "type error: const \"" + name + "\" was declared as string";
739       }
740       break;
741     case t_base_type::TYPE_UUID:
742       if (value->get_type() != t_const_value::CV_STRING) {
743         throw "type error: const \"" + name + "\" was declared as uuid";
744       }
745       value->set_uuid(value->get_uuid()); // validates constant
746       break;
747     case t_base_type::TYPE_BOOL:
748       if (value->get_type() != t_const_value::CV_INTEGER) {
749         throw "type error: const \"" + name + "\" was declared as bool";
750       }
751       break;
752     case t_base_type::TYPE_I8:
753       if (value->get_type() != t_const_value::CV_INTEGER) {
754         throw "type error: const \"" + name + "\" was declared as byte";
755       }
756       break;
757     case t_base_type::TYPE_I16:
758       if (value->get_type() != t_const_value::CV_INTEGER) {
759         throw "type error: const \"" + name + "\" was declared as i16";
760       }
761       break;
762     case t_base_type::TYPE_I32:
763       if (value->get_type() != t_const_value::CV_INTEGER) {
764         throw "type error: const \"" + name + "\" was declared as i32";
765       }
766       break;
767     case t_base_type::TYPE_I64:
768       if (value->get_type() != t_const_value::CV_INTEGER) {
769         throw "type error: const \"" + name + "\" was declared as i64";
770       }
771       break;
772     case t_base_type::TYPE_DOUBLE:
773       if (value->get_type() != t_const_value::CV_INTEGER
774           && value->get_type() != t_const_value::CV_DOUBLE) {
775         throw "type error: const \"" + name + "\" was declared as double";
776       }
777       break;
778     default:
779       throw "compiler error: no const of base type " + t_base_type::t_base_name(tbase) + name;
780     }
781   } else if (type->is_enum()) {
782     if (value->get_type() != t_const_value::CV_IDENTIFIER) {
783       throw "type error: const \"" + name + "\" was declared as enum";
784     }
785 
786     // see if there's a dot in the identifier
787     std::string name_portion = value->get_identifier_name();
788 
789     const vector<t_enum_value*>& enum_values = ((t_enum*)type)->get_constants();
790     vector<t_enum_value*>::const_iterator c_iter;
791     bool found = false;
792 
793     for (c_iter = enum_values.begin(); c_iter != enum_values.end(); ++c_iter) {
794       if ((*c_iter)->get_name() == name_portion) {
795         found = true;
796         break;
797       }
798     }
799     if (!found) {
800       throw "type error: const " + name + " was declared as type " + type->get_name()
801           + " which is an enum, but " + value->get_identifier()
802           + " is not a valid value for that enum";
803     }
804   } else if (type->is_struct() || type->is_xception()) {
805     if (value->get_type() != t_const_value::CV_MAP) {
806       throw "type error: const \"" + name + "\" was declared as struct/xception";
807     }
808     const vector<t_field*>& fields = ((t_struct*)type)->get_members();
809     vector<t_field*>::const_iterator f_iter;
810 
811     const map<t_const_value*, t_const_value*, t_const_value::value_compare>& val = value->get_map();
812     map<t_const_value*, t_const_value*, t_const_value::value_compare>::const_iterator v_iter;
813     for (v_iter = val.begin(); v_iter != val.end(); ++v_iter) {
814       if (v_iter->first->get_type() != t_const_value::CV_STRING) {
815         throw "type error: " + name + " struct key must be string";
816       }
817       t_type* field_type = nullptr;
818       for (f_iter = fields.begin(); f_iter != fields.end(); ++f_iter) {
819         if ((*f_iter)->get_name() == v_iter->first->get_string()) {
820           field_type = (*f_iter)->get_type();
821         }
822       }
823       if (field_type == nullptr) {
824         throw "type error: " + type->get_name() + " has no field " + v_iter->first->get_string();
825       }
826 
827       validate_const_rec(name + "." + v_iter->first->get_string(), field_type, v_iter->second);
828     }
829   } else if (type->is_map()) {
830     t_type* k_type = ((t_map*)type)->get_key_type();
831     t_type* v_type = ((t_map*)type)->get_val_type();
832     const map<t_const_value*, t_const_value*, t_const_value::value_compare>& val = value->get_map();
833     map<t_const_value*, t_const_value*, t_const_value::value_compare>::const_iterator v_iter;
834     for (v_iter = val.begin(); v_iter != val.end(); ++v_iter) {
835       validate_const_rec(name + "<key>", k_type, v_iter->first);
836       validate_const_rec(name + "<val>", v_type, v_iter->second);
837     }
838   } else if (type->is_list() || type->is_set()) {
839     t_type* e_type;
840     if (type->is_list()) {
841       e_type = ((t_list*)type)->get_elem_type();
842     } else {
843       e_type = ((t_set*)type)->get_elem_type();
844     }
845     const vector<t_const_value*>& val = value->get_list();
846     vector<t_const_value*>::const_iterator v_iter;
847     for (v_iter = val.begin(); v_iter != val.end(); ++v_iter) {
848       validate_const_rec(name + "<elem>", e_type, *v_iter);
849     }
850   }
851 }
852 
853 /**
854  * Check simple identifier names
855  * It's easier to do it this way instead of rewriting the whole grammar etc.
856  */
validate_simple_identifier(const char * identifier)857 void validate_simple_identifier(const char* identifier) {
858   string name(identifier);
859   if (name.find(".") != string::npos) {
860     yyerror("Identifier %s can't have a dot.", identifier);
861     exit(1);
862   }
863 }
864 
865 /**
866  * Check the type of the parsed const information against its declared type
867  */
validate_const_type(t_const * c)868 void validate_const_type(t_const* c) {
869   validate_const_rec(c->get_name(), c->get_type(), c->get_value());
870 }
871 
872 /**
873  * Check the type of a default value assigned to a field.
874  */
validate_field_value(t_field * field,t_const_value * cv)875 void validate_field_value(t_field* field, t_const_value* cv) {
876   validate_const_rec(field->get_name(), field->get_type(), cv);
877 }
878 
879 /**
880  * Check that all the elements of a throws block are actually exceptions.
881  */
validate_throws(t_struct * throws)882 bool validate_throws(t_struct* throws) {
883   const vector<t_field*>& members = throws->get_members();
884   vector<t_field*>::const_iterator m_iter;
885   for (m_iter = members.begin(); m_iter != members.end(); ++m_iter) {
886     if (!t_generator::get_true_type((*m_iter)->get_type())->is_xception()) {
887       return false;
888     }
889   }
890   return true;
891 }
892 
893 /**
894  * Skips UTF-8 BOM if there is one
895  */
skip_utf8_bom(FILE * f)896 bool skip_utf8_bom(FILE* f) {
897 
898   // pretty straightforward, but works
899   if (fgetc(f) == 0xEF) {
900     if (fgetc(f) == 0xBB) {
901       if (fgetc(f) == 0xBF) {
902         return true;
903       }
904     }
905   }
906 
907   rewind(f);
908   return false;
909 }
910 
911 /**
912  * Parses a program
913  */
parse(t_program * program,t_program * parent_program)914 void parse(t_program* program, t_program* parent_program) {
915   // Get scope file path
916   string path = program->get_path();
917 
918   // Set current dir global, which is used in the include_file function
919   g_curdir = directory_name(path);
920   g_curpath = path;
921 
922   // Open the file
923   // skip UTF-8 BOM if there is one
924   yyin = fopen(path.c_str(), "r");
925   if (yyin == 0) {
926     failure("Could not open input file: \"%s\"", path.c_str());
927   }
928   if (skip_utf8_bom(yyin))
929     pverbose("Skipped UTF-8 BOM at %s\n", path.c_str());
930 
931   // Create new scope and scan for includes
932   pverbose("Scanning %s for includes\n", path.c_str());
933   g_parse_mode = INCLUDES;
934   g_program = program;
935   g_scope = program->scope();
936   try {
937     yylineno = 1;
938     if (yyparse() != 0) {
939       failure("Parser error during include pass.");
940     }
941   } catch (string &x) {
942     failure(x.c_str());
943   }
944   fclose(yyin);
945 
946   // Recursively parse all the include programs
947   vector<t_program*>& includes = program->get_includes();
948   vector<t_program*>::iterator iter;
949   for (iter = includes.begin(); iter != includes.end(); ++iter) {
950     parse(*iter, program);
951   }
952 
953   // reset program doctext status before parsing a new file
954   reset_program_doctext_info();
955 
956   // Parse the program file
957   g_parse_mode = PROGRAM;
958   g_program = program;
959   g_scope = program->scope();
960   g_parent_scope = (parent_program != nullptr) ? parent_program->scope() : nullptr;
961   g_parent_prefix = program->get_name() + ".";
962   g_curpath = path;
963 
964   // Open the file
965   // skip UTF-8 BOM if there is one
966   yyin = fopen(path.c_str(), "r");
967   if (yyin == 0) {
968     failure("Could not open input file: \"%s\"", path.c_str());
969   }
970   if (skip_utf8_bom(yyin))
971     pverbose("Skipped UTF-8 BOM at %s\n", path.c_str());
972 
973   pverbose("Parsing %s for types\n", path.c_str());
974   yylineno = 1;
975   try {
976     if (yyparse() != 0) {
977       failure("Parser error during types pass.");
978     }
979   } catch (string &x) {
980     failure(x.c_str());
981   }
982   fclose(yyin);
983 }
984 
985 /**
986  * Generate code
987  */
generate(t_program * program,const vector<string> & generator_strings)988 void generate(t_program* program, const vector<string>& generator_strings) {
989   // Oooohh, recursive code generation, hot!!
990   if (gen_recurse) {
991     program->set_recursive(true);
992     const vector<t_program*>& includes = program->get_includes();
993     for (auto include : includes) {
994       // Propagate output path from parent to child programs
995       include->set_out_path(program->get_out_path(), program->is_out_path_absolute());
996 
997       generate(include, generator_strings);
998     }
999   }
1000 
1001   // Generate code!
1002   try {
1003     pverbose("Program: %s\n", program->get_path().c_str());
1004 
1005     if (dump_docs) {
1006       dump_docstrings(program);
1007     }
1008 
1009     // make sure all symbolic constants are properly resolved
1010     program->scope()->resolve_all_consts();
1011 
1012     vector<string>::const_iterator iter;
1013     for (iter = generator_strings.begin(); iter != generator_strings.end(); ++iter) {
1014       t_generator* generator = t_generator_registry::get_generator(program, *iter);
1015 
1016       if (generator == nullptr) {
1017         pwarning(1, "Unable to get a generator for \"%s\".\n", iter->c_str());
1018         g_generator_failure = true;
1019       } else if (generator) {
1020         generator->validate_input();
1021         pverbose("Generating \"%s\"\n", iter->c_str());
1022         generator->generate_program();
1023         delete generator;
1024       }
1025     }
1026   } catch (string &s) {
1027     failure("Error: %s\n", s.c_str());
1028   } catch (const char* exc) {
1029     failure("Error: %s\n", exc);
1030   } catch (const std::invalid_argument& invalid_argument_exception) {
1031     failure("Error: %s\n", invalid_argument_exception.what());
1032   }
1033 }
1034 
audit(t_program * new_program,t_program * old_program,string new_thrift_include_path,string old_thrift_include_path)1035 void audit(t_program* new_program,
1036            t_program* old_program,
1037            string new_thrift_include_path,
1038            string old_thrift_include_path) {
1039   vector<string> temp_incl_searchpath = g_incl_searchpath;
1040   if (!old_thrift_include_path.empty()) {
1041     g_incl_searchpath.push_back(old_thrift_include_path);
1042   }
1043 
1044   parse(old_program, nullptr);
1045 
1046   g_incl_searchpath = temp_incl_searchpath;
1047   if (!new_thrift_include_path.empty()) {
1048     g_incl_searchpath.push_back(new_thrift_include_path);
1049   }
1050 
1051   parse(new_program, nullptr);
1052 
1053   compare_namespace(new_program, old_program);
1054   compare_services(new_program->get_services(), old_program->get_services());
1055   compare_enums(new_program->get_enums(), old_program->get_enums());
1056   compare_structs(new_program->get_structs(), old_program->get_structs());
1057   compare_structs(new_program->get_xceptions(), old_program->get_xceptions());
1058   compare_consts(new_program->get_consts(), old_program->get_consts());
1059 }
1060 
1061 /**
1062  * Parse it up.. then spit it back out, in pretty much every language. Alright
1063  * not that many languages, but the cool ones that we care about.
1064  */
main(int argc,char ** argv)1065 int main(int argc, char** argv) {
1066   int i;
1067   std::string out_path;
1068   bool out_path_is_absolute = false;
1069 
1070   // Setup time string
1071   time_t now = time(nullptr);
1072   g_time_str = ctime(&now);
1073 
1074   // Check for necessary arguments, you gotta have at least a filename and
1075   // an output language flag
1076   if (argc < 2) {
1077     usage();
1078   }
1079 
1080   vector<string> generator_strings;
1081   string old_thrift_include_path;
1082   string new_thrift_include_path;
1083   string old_input_file;
1084 
1085   // Set the current path to a dummy value to make warning messages clearer.
1086   g_curpath = "arguments";
1087 
1088   // Hacky parameter handling... I didn't feel like using a library sorry!
1089   for (i = 1; i < argc - 1; i++) {
1090     char* arg;
1091 
1092     arg = strtok(argv[i], " ");
1093     while (arg != nullptr) {
1094       // Treat double dashes as single dashes
1095       if (arg[0] == '-' && arg[1] == '-') {
1096         ++arg;
1097       }
1098 
1099       if (strcmp(arg, "-help") == 0) {
1100         help();
1101       } else if (strcmp(arg, "-version") == 0) {
1102         version();
1103         exit(0);
1104       } else if (strcmp(arg, "-debug") == 0) {
1105         g_debug = 1;
1106       } else if (strcmp(arg, "-nowarn") == 0) {
1107         g_warn = 0;
1108       } else if (strcmp(arg, "-strict") == 0) {
1109         g_strict = 255;
1110         g_warn = 2;
1111       } else if (strcmp(arg, "-v") == 0 || strcmp(arg, "-verbose") == 0) {
1112         g_verbose = 1;
1113       } else if (strcmp(arg, "-r") == 0 || strcmp(arg, "-recurse") == 0) {
1114         gen_recurse = true;
1115       } else if (strcmp(arg, "-allow-neg-keys") == 0) {
1116         g_allow_neg_field_keys = true;
1117       } else if (strcmp(arg, "-allow-64bit-consts") == 0) {
1118         g_allow_64bit_consts = true;
1119       } else if (strcmp(arg, "-gen") == 0) {
1120         arg = argv[++i];
1121         if (arg == nullptr) {
1122           fprintf(stderr, "Missing generator specification\n");
1123           usage();
1124         }
1125         generator_strings.emplace_back(arg);
1126       } else if (strcmp(arg, "-I") == 0) {
1127         // An argument of "-I\ asdf" is invalid and has unknown results
1128         arg = argv[++i];
1129 
1130         if (arg == nullptr) {
1131           fprintf(stderr, "Missing Include directory\n");
1132           usage();
1133         }
1134         g_incl_searchpath.emplace_back(arg);
1135       } else if ((strcmp(arg, "-o") == 0) || (strcmp(arg, "-out") == 0)) {
1136         out_path_is_absolute = (strcmp(arg, "-out") == 0) ? true : false;
1137         arg = argv[++i];
1138         if (arg == nullptr) {
1139           fprintf(stderr, "-o: missing output directory\n");
1140           usage();
1141         }
1142         out_path = arg;
1143 
1144 #ifdef _WIN32
1145         // strip out trailing \ on Windows
1146         std::string::size_type last = out_path.length() - 1;
1147         if (out_path[last] == '\\') {
1148           out_path.erase(last);
1149         }
1150 #endif
1151         if (!check_is_directory(out_path.c_str()))
1152           return -1;
1153       } else if (strcmp(arg, "-audit") == 0) {
1154         g_audit = true;
1155         arg = argv[++i];
1156         if (arg == nullptr) {
1157           fprintf(stderr, "Missing old thrift file name for audit operation\n");
1158           usage();
1159         }
1160         char old_thrift_file_rp[THRIFT_PATH_MAX];
1161 
1162         // cppcheck-suppress uninitvar
1163         if (saferealpath(arg, old_thrift_file_rp) == nullptr) {
1164           failure("Could not open input file with realpath: %s", arg);
1165         }
1166         old_input_file = string(old_thrift_file_rp);
1167       } else if (strcmp(arg, "-audit-nofatal") == 0) {
1168         g_audit_fatal = false;
1169       } else if (strcmp(arg, "-Iold") == 0) {
1170         arg = argv[++i];
1171         if (arg == nullptr) {
1172           fprintf(stderr, "Missing Include directory for old thrift file\n");
1173           usage();
1174         }
1175         old_thrift_include_path = string(arg);
1176       } else if (strcmp(arg, "-Inew") == 0) {
1177         arg = argv[++i];
1178         if (arg == nullptr) {
1179           fprintf(stderr, "Missing Include directory for new thrift file\n");
1180           usage();
1181         }
1182         new_thrift_include_path = string(arg);
1183       } else {
1184         fprintf(stderr, "Unrecognized option: %s\n", arg);
1185         usage();
1186       }
1187 
1188       // Tokenize more
1189       arg = strtok(nullptr, " ");
1190     }
1191   }
1192 
1193   // display help
1194   if ((strcmp(argv[argc - 1], "-help") == 0) || (strcmp(argv[argc - 1], "--help") == 0)) {
1195     help();
1196   }
1197 
1198   // if you're asking for version, you have a right not to pass a file
1199   if ((strcmp(argv[argc - 1], "-version") == 0) || (strcmp(argv[argc - 1], "--version") == 0)) {
1200     version();
1201     exit(0);
1202   }
1203 
1204   // Initialize global types
1205   initGlobals();
1206 
1207   if (g_audit) {
1208     // Audit operation
1209 
1210     if (old_input_file.empty()) {
1211       fprintf(stderr, "Missing file name of old thrift file for audit\n");
1212       usage();
1213     }
1214 
1215     char new_thrift_file_rp[THRIFT_PATH_MAX];
1216     if (argv[i] == nullptr) {
1217       fprintf(stderr, "Missing file name of new thrift file for audit\n");
1218       usage();
1219     }
1220     // cppcheck-suppress uninitvar
1221     if (saferealpath(argv[i], new_thrift_file_rp) == nullptr) {
1222       failure("Could not open input file with realpath: %s", argv[i]);
1223     }
1224     string new_input_file(new_thrift_file_rp);
1225 
1226     t_program new_program(new_input_file);
1227     t_program old_program(old_input_file);
1228 
1229     audit(&new_program, &old_program, new_thrift_include_path, old_thrift_include_path);
1230 
1231   } else {
1232     // Generate options
1233 
1234     // You gotta generate something!
1235     if (generator_strings.empty()) {
1236       fprintf(stderr, "No output language(s) specified\n");
1237       usage();
1238     }
1239 
1240     // Real-pathify it
1241     char rp[THRIFT_PATH_MAX];
1242     if (argv[i] == nullptr) {
1243       fprintf(stderr, "Missing file name\n");
1244       usage();
1245     }
1246     // cppcheck-suppress uninitvar
1247     if (saferealpath(argv[i], rp) == nullptr) {
1248       failure("Could not open input file with realpath: %s", argv[i]);
1249     }
1250     string input_file(rp);
1251 
1252     // Instance of the global parse tree
1253     t_program* program = new t_program(input_file);
1254     if (out_path.size()) {
1255       program->set_out_path(out_path, out_path_is_absolute);
1256     }
1257 
1258     // Compute the cpp include prefix.
1259     // infer this from the filename passed in
1260     string input_filename = argv[i];
1261     string include_prefix;
1262 
1263     string::size_type last_slash = string::npos;
1264     if ((last_slash = input_filename.rfind("/")) != string::npos) {
1265       include_prefix = input_filename.substr(0, last_slash);
1266     }
1267 
1268     program->set_include_prefix(include_prefix);
1269 
1270     // Parse it!
1271     parse(program, nullptr);
1272 
1273     // The current path is not really relevant when we are doing generation.
1274     // Reset the variable to make warning messages clearer.
1275     g_curpath = "generation";
1276     // Reset yylineno for the heck of it.  Use 1 instead of 0 because
1277     // That is what shows up during argument parsing.
1278     yylineno = 1;
1279 
1280     // Generate it!
1281     generate(program, generator_strings);
1282     delete program;
1283   }
1284 
1285   // Clean up. Who am I kidding... this program probably orphans heap memory
1286   // all over the place, but who cares because it is about to exit and it is
1287   // all referenced and used by this wacky parse tree up until now anyways.
1288   clearGlobals();
1289 
1290   // Finished
1291   if (g_return_failure && g_audit_fatal) {
1292     exit(2);
1293   }
1294   if (g_generator_failure) {
1295     exit(3);
1296   }
1297   // Finished
1298   return 0;
1299 }
1300