1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing,
13 * software distributed under the License is distributed on an
14 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 * KIND, either express or implied. See the License for the
16 * specific language governing permissions and limitations
17 * under the License.
18 */
19
20 /**
21 * thrift - a lightweight cross-language rpc/serialization tool
22 *
23 * This file contains the main compiler engine for Thrift, which invokes the
24 * scanner/parser to build the thrift object tree. The interface generation
25 * code for each language lives in a file by the language name under the
26 * generate/ folder, and all parse structures live in parse/
27 *
28 */
29
30 #include <cassert>
31 #include <stdlib.h>
32 #include <stdio.h>
33 #include <stdarg.h>
34 #include <time.h>
35 #include <string>
36 #include <algorithm>
37 #include <sys/types.h>
38 #include <sys/stat.h>
39 #include <errno.h>
40 #include <limits.h>
41
42 #ifdef _WIN32
43 #include <windows.h> /* for GetFullPathName */
44 #endif
45
46 // Careful: must include globals first for extern definitions
47 #include "thrift/common.h"
48 #include "thrift/globals.h"
49
50 #include "thrift/platform.h"
51 #include "thrift/main.h"
52 #include "thrift/parse/t_program.h"
53 #include "thrift/parse/t_scope.h"
54 #include "thrift/generate/t_generator.h"
55 #include "thrift/audit/t_audit.h"
56
57 #include "thrift/version.h"
58
59 using namespace std;
60
61 /**
62 * Global program tree
63 */
64 t_program* g_program;
65
66 /**
67 * Global scope
68 */
69 t_scope* g_scope;
70
71 /**
72 * Parent scope to also parse types
73 */
74 t_scope* g_parent_scope;
75
76 /**
77 * Prefix for putting types in parent scope
78 */
79 string g_parent_prefix;
80
81 /**
82 * Parsing pass
83 */
84 PARSE_MODE g_parse_mode;
85
86 /**
87 * Current directory of file being parsed
88 */
89 string g_curdir;
90
91 /**
92 * Current file being parsed
93 */
94 string g_curpath;
95
96 /**
97 * Search path for inclusions
98 */
99 vector<string> g_incl_searchpath;
100
101 /**
102 * Global debug state
103 */
104 int g_debug = 0;
105
106 /**
107 * Strictness level
108 */
109 int g_strict = 127;
110
111 /**
112 * Warning level
113 */
114 int g_warn = 1;
115
116 /**
117 * Verbose output
118 */
119 int g_verbose = 0;
120
121 /**
122 * Global time string
123 */
124 char* g_time_str;
125
126 /**
127 * The last parsed doctext comment.
128 */
129 char* g_doctext;
130
131 /**
132 * The First doctext comment
133 */
134 char* g_program_doctext_candidate;
135
136 /**
137 * Whether or not negative field keys are accepted.
138 */
139 int g_allow_neg_field_keys;
140
141 /**
142 * Whether or not 64-bit constants will generate a warning.
143 */
144 int g_allow_64bit_consts = 0;
145
146 /**
147 * Flags to control code generation
148 */
149 bool gen_recurse = false;
150
151 /**
152 * Flags to control thrift audit
153 */
154 bool g_audit = false;
155
156 /**
157 * Flag to control return status
158 */
159 bool g_return_failure = false;
160 bool g_audit_fatal = true;
161 bool g_generator_failure = false;
162
163 /**
164 * Win32 doesn't have realpath, so use fallback implementation in that case,
165 * otherwise this just calls through to realpath
166 */
saferealpath(const char * path,char * resolved_path)167 char* saferealpath(const char* path, char* resolved_path) {
168 #ifdef _WIN32
169 char buf[MAX_PATH];
170 char* basename;
171 DWORD len = GetFullPathNameA(path, MAX_PATH, buf, &basename);
172 if (len == 0 || len > MAX_PATH - 1) {
173 strcpy(resolved_path, path);
174 } else {
175 strcpy(resolved_path, buf);
176 }
177
178 // Replace backslashes with forward slashes so the
179 // rest of the code behaves correctly.
180 size_t resolved_len = strlen(resolved_path);
181 for (size_t i = 0; i < resolved_len; i++) {
182 if (resolved_path[i] == '\\') {
183 resolved_path[i] = '/';
184 }
185 }
186 return resolved_path;
187 #else
188 return realpath(path, resolved_path);
189 #endif
190 }
191
check_is_directory(const char * dir_name)192 bool check_is_directory(const char* dir_name) {
193 #ifdef _WIN32
194 DWORD attributes = ::GetFileAttributesA(dir_name);
195 if (attributes == INVALID_FILE_ATTRIBUTES) {
196 fprintf(stderr,
197 "Output directory %s is unusable: GetLastError() = %ld\n",
198 dir_name,
199 GetLastError());
200 return false;
201 }
202 if ((attributes & FILE_ATTRIBUTE_DIRECTORY) != FILE_ATTRIBUTE_DIRECTORY) {
203 fprintf(stderr, "Output directory %s exists but is not a directory\n", dir_name);
204 return false;
205 }
206 return true;
207 #else
208 struct stat sb;
209 if (stat(dir_name, &sb) < 0) {
210 fprintf(stderr, "Output directory %s is unusable: %s\n", dir_name, strerror(errno));
211 return false;
212 }
213 if (!S_ISDIR(sb.st_mode)) {
214 fprintf(stderr, "Output directory %s exists but is not a directory\n", dir_name);
215 return false;
216 }
217 return true;
218 #endif
219 }
220
221 /**
222 * Report an error to the user. This is called yyerror for historical
223 * reasons (lex and yacc expect the error reporting routine to be called
224 * this). Call this function to report any errors to the user.
225 * yyerror takes printf style arguments.
226 *
227 * @param fmt C format string followed by additional arguments
228 */
yyerror(const char * fmt,...)229 void yyerror(const char* fmt, ...) {
230 va_list args;
231 fprintf(stderr, "[ERROR:%s:%d] (last token was '%s')\n", g_curpath.c_str(), yylineno, yytext);
232
233 va_start(args, fmt);
234 vfprintf(stderr, fmt, args);
235 va_end(args);
236
237 fprintf(stderr, "\n");
238 }
239
240 /**
241 * Prints a debug message from the parser.
242 *
243 * @param fmt C format string followed by additional arguments
244 */
pdebug(const char * fmt,...)245 void pdebug(const char* fmt, ...) {
246 if (g_debug == 0) {
247 return;
248 }
249 va_list args;
250 printf("[PARSE:%d] ", yylineno);
251 va_start(args, fmt);
252 vprintf(fmt, args);
253 va_end(args);
254 printf("\n");
255 }
256
257 /**
258 * Prints a verbose output mode message
259 *
260 * @param fmt C format string followed by additional arguments
261 */
pverbose(const char * fmt,...)262 void pverbose(const char* fmt, ...) {
263 if (g_verbose == 0) {
264 return;
265 }
266 va_list args;
267 va_start(args, fmt);
268 vprintf(fmt, args);
269 va_end(args);
270 }
271
272 /**
273 * Prints a warning message
274 *
275 * @param fmt C format string followed by additional arguments
276 */
pwarning(int level,const char * fmt,...)277 void pwarning(int level, const char* fmt, ...) {
278 if (g_warn < level) {
279 return;
280 }
281 va_list args;
282 printf("[WARNING:%s:%d] ", g_curpath.c_str(), yylineno);
283 va_start(args, fmt);
284 vprintf(fmt, args);
285 va_end(args);
286 printf("\n");
287 }
288
289 /**
290 * Prints a failure message and exits
291 *
292 * @param fmt C format string followed by additional arguments
293 */
failure(const char * fmt,...)294 void failure(const char* fmt, ...) {
295 va_list args;
296 fprintf(stderr, "[FAILURE:%s:%d] ", g_curpath.c_str(), yylineno);
297 va_start(args, fmt);
298 vfprintf(stderr, fmt, args);
299 va_end(args);
300 printf("\n");
301 exit(1);
302 }
303
304 /**
305 * Converts a string filename into a thrift program name
306 */
program_name(string filename)307 string program_name(string filename) {
308 string::size_type slash = filename.rfind("/");
309 if (slash != string::npos) {
310 filename = filename.substr(slash + 1);
311 }
312 string::size_type dot = filename.rfind(".");
313 if (dot != string::npos) {
314 filename = filename.substr(0, dot);
315 }
316 return filename;
317 }
318
319 /**
320 * Gets the directory path of a filename
321 */
directory_name(string filename)322 string directory_name(string filename) {
323 string::size_type slash = filename.rfind("/");
324 // No slash, just use the current directory
325 if (slash == string::npos) {
326 return ".";
327 }
328 return filename.substr(0, slash);
329 }
330
331 /**
332 * Finds the appropriate file path for the given filename
333 */
include_file(string filename)334 string include_file(string filename) {
335 // Absolute path? Just try that
336 if (filename[0] == '/') {
337 // Realpath!
338 char rp[THRIFT_PATH_MAX];
339 // cppcheck-suppress uninitvar
340 if (saferealpath(filename.c_str(), rp) == nullptr) {
341 pwarning(0, "Cannot open include file %s\n", filename.c_str());
342 return std::string();
343 }
344
345 // Stat this file
346 struct stat finfo;
347 if (stat(rp, &finfo) == 0) {
348 return rp;
349 }
350 } else { // relative path, start searching
351 // new search path with current dir global
352 vector<string> sp = g_incl_searchpath;
353 sp.insert(sp.begin(), g_curdir);
354
355 // iterate through paths
356 vector<string>::iterator it;
357 for (it = sp.begin(); it != sp.end(); it++) {
358 string sfilename = *(it) + "/" + filename;
359
360 // Realpath!
361 char rp[THRIFT_PATH_MAX];
362 // cppcheck-suppress uninitvar
363 if (saferealpath(sfilename.c_str(), rp) == nullptr) {
364 continue;
365 }
366
367 // Stat this files
368 struct stat finfo;
369 if (stat(rp, &finfo) == 0) {
370 return rp;
371 }
372 }
373 }
374
375 // Uh oh
376 pwarning(0, "Could not find include file %s\n", filename.c_str());
377 return std::string();
378 }
379
380 /**
381 * Clears any previously stored doctext string.
382 * Also prints a warning if we are discarding information.
383 */
clear_doctext()384 void clear_doctext() {
385 if (g_doctext != nullptr) {
386 pwarning(2, "Uncaptured doctext at on line %d.", g_doctext_lineno);
387 }
388 free(g_doctext);
389 g_doctext = nullptr;
390 }
391
392 /**
393 * Reset program doctext information after processing a file
394 */
reset_program_doctext_info()395 void reset_program_doctext_info() {
396 if (g_program_doctext_candidate != nullptr) {
397 free(g_program_doctext_candidate);
398 g_program_doctext_candidate = nullptr;
399 }
400 g_program_doctext_lineno = 0;
401 g_program_doctext_status = INVALID;
402 pdebug("%s", "program doctext set to INVALID");
403 }
404
405 /**
406 * We are sure the program doctext candidate is really the program doctext.
407 */
declare_valid_program_doctext()408 void declare_valid_program_doctext() {
409 if ((g_program_doctext_candidate != nullptr) && (g_program_doctext_status == STILL_CANDIDATE)) {
410 g_program_doctext_status = ABSOLUTELY_SURE;
411 pdebug("%s", "program doctext set to ABSOLUTELY_SURE");
412 } else {
413 g_program_doctext_status = NO_PROGRAM_DOCTEXT;
414 pdebug("%s", "program doctext set to NO_PROGRAM_DOCTEXT");
415 }
416 }
417
418 /**
419 * Cleans up text commonly found in doxygen-like comments
420 *
421 * Warning: if you mix tabs and spaces in a non-uniform way,
422 * you will get what you deserve.
423 */
clean_up_doctext(char * doctext)424 char* clean_up_doctext(char* doctext) {
425 // Convert to C++ string, and remove Windows's carriage returns.
426 string docstring = doctext;
427 docstring.erase(remove(docstring.begin(), docstring.end(), '\r'), docstring.end());
428
429 // Separate into lines.
430 vector<string> lines;
431 string::size_type pos = string::npos;
432 string::size_type last;
433 while (true) {
434 last = (pos == string::npos) ? 0 : pos + 1;
435 pos = docstring.find('\n', last);
436 if (pos == string::npos) {
437 // First bit of cleaning. If the last line is only whitespace, drop it.
438 string::size_type nonwhite = docstring.find_first_not_of(" \t", last);
439 if (nonwhite != string::npos) {
440 lines.push_back(docstring.substr(last));
441 }
442 break;
443 }
444 lines.push_back(docstring.substr(last, pos - last));
445 }
446
447 // A very profound docstring.
448 if (lines.empty()) {
449 return nullptr;
450 }
451
452 // Clear leading whitespace from the first line.
453 pos = lines.front().find_first_not_of(" \t");
454 lines.front().erase(0, pos);
455
456 // If every nonblank line after the first has the same number of spaces/tabs,
457 // then a star, remove them.
458 bool have_prefix = true;
459 bool found_prefix = false;
460 string::size_type prefix_len = 0;
461 vector<string>::iterator l_iter;
462 for (l_iter = lines.begin() + 1; l_iter != lines.end(); ++l_iter) {
463 if (l_iter->empty()) {
464 continue;
465 }
466
467 pos = l_iter->find_first_not_of(" \t");
468 if (!found_prefix) {
469 if (pos != string::npos) {
470 if (l_iter->at(pos) == '*') {
471 found_prefix = true;
472 prefix_len = pos;
473 } else {
474 have_prefix = false;
475 break;
476 }
477 } else {
478 // Whitespace-only line. Truncate it.
479 l_iter->clear();
480 }
481 } else if (l_iter->size() > pos && l_iter->at(pos) == '*' && pos == prefix_len) {
482 // Business as usual.
483 } else if (pos == string::npos) {
484 // Whitespace-only line. Let's truncate it for them.
485 l_iter->clear();
486 } else {
487 // The pattern has been broken.
488 have_prefix = false;
489 break;
490 }
491 }
492
493 // If our prefix survived, delete it from every line.
494 if (have_prefix) {
495 // Get the star too.
496 prefix_len++;
497 for (l_iter = lines.begin() + 1; l_iter != lines.end(); ++l_iter) {
498 l_iter->erase(0, prefix_len);
499 }
500 }
501
502 // Now delete the minimum amount of leading whitespace from each line.
503 prefix_len = string::npos;
504 for (l_iter = lines.begin() + 1; l_iter != lines.end(); ++l_iter) {
505 if (l_iter->empty()) {
506 continue;
507 }
508 pos = l_iter->find_first_not_of(" \t");
509 if (pos != string::npos && (prefix_len == string::npos || pos < prefix_len)) {
510 prefix_len = pos;
511 }
512 }
513
514 // If our prefix survived, delete it from every line.
515 if (prefix_len != string::npos) {
516 for (l_iter = lines.begin() + 1; l_iter != lines.end(); ++l_iter) {
517 l_iter->erase(0, prefix_len);
518 }
519 }
520
521 // Remove trailing whitespace from every line.
522 for (l_iter = lines.begin(); l_iter != lines.end(); ++l_iter) {
523 pos = l_iter->find_last_not_of(" \t");
524 if (pos != string::npos && pos != l_iter->length() - 1) {
525 l_iter->erase(pos + 1);
526 }
527 }
528
529 // If the first line is empty, remove it.
530 // Don't do this earlier because a lot of steps skip the first line.
531 if (lines.front().empty()) {
532 lines.erase(lines.begin());
533 }
534
535 // Now rejoin the lines and copy them back into doctext.
536 docstring.clear();
537 for (l_iter = lines.begin(); l_iter != lines.end(); ++l_iter) {
538 docstring += *l_iter;
539 docstring += '\n';
540 }
541
542 // assert(docstring.length() <= strlen(doctext)); may happen, see THRIFT-1755
543 if (docstring.length() <= strlen(doctext)) {
544 strcpy(doctext, docstring.c_str());
545 } else {
546 free(doctext); // too short
547 doctext = strdup(docstring.c_str());
548 }
549 return doctext;
550 }
551
552 /** Set to true to debug docstring parsing */
553 static bool dump_docs = false;
554
555 /**
556 * Dumps docstrings to stdout
557 * Only works for top-level definitions and the whole program doc
558 * (i.e., not enum constants, struct fields, or functions.
559 */
dump_docstrings(t_program * program)560 void dump_docstrings(t_program* program) {
561 string progdoc = program->get_doc();
562 if (!progdoc.empty()) {
563 printf("Whole program doc:\n%s\n", progdoc.c_str());
564 }
565 const vector<t_typedef*>& typedefs = program->get_typedefs();
566 vector<t_typedef*>::const_iterator t_iter;
567 for (t_iter = typedefs.begin(); t_iter != typedefs.end(); ++t_iter) {
568 t_typedef* td = *t_iter;
569 if (td->has_doc()) {
570 printf("typedef %s:\n%s\n", td->get_name().c_str(), td->get_doc().c_str());
571 }
572 }
573 const vector<t_enum*>& enums = program->get_enums();
574 vector<t_enum*>::const_iterator e_iter;
575 for (e_iter = enums.begin(); e_iter != enums.end(); ++e_iter) {
576 t_enum* en = *e_iter;
577 if (en->has_doc()) {
578 printf("enum %s:\n%s\n", en->get_name().c_str(), en->get_doc().c_str());
579 }
580 }
581 const vector<t_const*>& consts = program->get_consts();
582 vector<t_const*>::const_iterator c_iter;
583 for (c_iter = consts.begin(); c_iter != consts.end(); ++c_iter) {
584 t_const* co = *c_iter;
585 if (co->has_doc()) {
586 printf("const %s:\n%s\n", co->get_name().c_str(), co->get_doc().c_str());
587 }
588 }
589 const vector<t_struct*>& structs = program->get_structs();
590 vector<t_struct*>::const_iterator s_iter;
591 for (s_iter = structs.begin(); s_iter != structs.end(); ++s_iter) {
592 t_struct* st = *s_iter;
593 if (st->has_doc()) {
594 printf("struct %s:\n%s\n", st->get_name().c_str(), st->get_doc().c_str());
595 }
596 }
597 const vector<t_struct*>& xceptions = program->get_xceptions();
598 vector<t_struct*>::const_iterator x_iter;
599 for (x_iter = xceptions.begin(); x_iter != xceptions.end(); ++x_iter) {
600 t_struct* xn = *x_iter;
601 if (xn->has_doc()) {
602 printf("xception %s:\n%s\n", xn->get_name().c_str(), xn->get_doc().c_str());
603 }
604 }
605 const vector<t_service*>& services = program->get_services();
606 vector<t_service*>::const_iterator v_iter;
607 for (v_iter = services.begin(); v_iter != services.end(); ++v_iter) {
608 t_service* sv = *v_iter;
609 if (sv->has_doc()) {
610 printf("service %s:\n%s\n", sv->get_name().c_str(), sv->get_doc().c_str());
611 }
612 }
613 }
614
615 /**
616 * Emits a warning on list<byte>, binary type is typically a much better choice.
617 */
check_for_list_of_bytes(t_type * list_elem_type)618 void check_for_list_of_bytes(t_type* list_elem_type) {
619 if ((g_parse_mode == PROGRAM) && (list_elem_type != nullptr) && list_elem_type->is_base_type()) {
620 t_base_type* tbase = (t_base_type*)list_elem_type;
621 if (tbase->get_base() == t_base_type::TYPE_I8) {
622 pwarning(1, "Consider using the more efficient \"binary\" type instead of \"list<byte>\".");
623 }
624 }
625 }
626
627 static bool g_byte_warning_emitted = false;
628
629 /**
630 * Emits a one-time warning on byte type, promoting the new i8 type instead
631 */
emit_byte_type_warning()632 void emit_byte_type_warning() {
633 if (!g_byte_warning_emitted) {
634 pwarning(1,
635 "The \"byte\" type is a compatibility alias for \"i8\". Use \"i8\" to emphasize the "
636 "signedness of this type.\n");
637 g_byte_warning_emitted = true;
638 }
639 }
640
641 /**
642 * Prints deprecation notice for old NS declarations that are no longer supported
643 * If new_form is nullptr, old_form is assumed to be a language identifier, such as "cpp"
644 * If new_form is not nullptr, both arguments are used exactly as given
645 */
error_unsupported_namespace_decl(const char * old_form,const char * new_form)646 void error_unsupported_namespace_decl(const char* old_form, const char* new_form) {
647 const char* remainder = "";
648 if( new_form == nullptr) {
649 new_form = old_form;
650 remainder = "_namespace";
651 }
652 failure("Unsupported declaration '%s%s'. Use 'namespace %s' instead.", old_form, remainder, new_form);
653 }
654
655 /**
656 * Prints the version number
657 */
version()658 void version() {
659 printf("Thrift version %s\n", THRIFT_VERSION);
660 }
661
662 /**
663 * Display the usage message and then exit with an error code.
664 */
usage()665 void usage() {
666 fprintf(stderr, "Usage: thrift [options] file\n\n");
667 fprintf(stderr, "Use thrift -help for a list of options\n");
668 exit(1);
669 }
670
671 /**
672 * Diplays the help message and then exits with an error code.
673 */
help()674 void help() {
675 fprintf(stderr, "Usage: thrift [options] file\n");
676 fprintf(stderr, "Options:\n");
677 fprintf(stderr, " -version Print the compiler version\n");
678 fprintf(stderr, " -o dir Set the output directory for gen-* packages\n");
679 fprintf(stderr, " (default: current directory)\n");
680 fprintf(stderr, " -out dir Set the ouput location for generated files.\n");
681 fprintf(stderr, " (no gen-* folder will be created)\n");
682 fprintf(stderr, " -I dir Add a directory to the list of directories\n");
683 fprintf(stderr, " searched for include directives\n");
684 fprintf(stderr, " -nowarn Suppress all compiler warnings (BAD!)\n");
685 fprintf(stderr, " -strict Strict compiler warnings on\n");
686 fprintf(stderr, " -v[erbose] Verbose mode\n");
687 fprintf(stderr, " -r[ecurse] Also generate included files\n");
688 fprintf(stderr, " -debug Parse debug trace to stdout\n");
689 fprintf(stderr,
690 " --allow-neg-keys Allow negative field keys (Used to "
691 "preserve protocol\n");
692 fprintf(stderr, " compatibility with older .thrift files)\n");
693 fprintf(stderr, " --allow-64bit-consts Do not print warnings about using 64-bit constants\n");
694 fprintf(stderr, " --gen STR Generate code with a dynamically-registered generator.\n");
695 fprintf(stderr, " STR has the form language[:key1=val1[,key2[,key3=val3]]].\n");
696 fprintf(stderr, " Keys and values are options passed to the generator.\n");
697 fprintf(stderr, " Many options will not require values.\n");
698 fprintf(stderr, "\n");
699 fprintf(stderr, "Options related to audit operation\n");
700 fprintf(stderr, " --audit OldFile Old Thrift file to be audited with 'file'\n");
701 fprintf(stderr, " -Iold dir Add a directory to the list of directories\n");
702 fprintf(stderr, " searched for include directives for old thrift file\n");
703 fprintf(stderr, " -Inew dir Add a directory to the list of directories\n");
704 fprintf(stderr, " searched for include directives for new thrift file\n");
705 fprintf(stderr, "\n");
706 fprintf(stderr, "Available generators (and options):\n");
707
708 t_generator_registry::gen_map_t gen_map = t_generator_registry::get_generator_map();
709 t_generator_registry::gen_map_t::iterator iter;
710 for (iter = gen_map.begin(); iter != gen_map.end(); ++iter) {
711 fprintf(stderr,
712 " %s (%s):\n",
713 iter->second->get_short_name().c_str(),
714 iter->second->get_long_name().c_str());
715 fprintf(stderr, "%s", iter->second->get_documentation().c_str());
716 }
717 exit(1);
718 }
719
720 /**
721 * You know, when I started working on Thrift I really thought it wasn't going
722 * to become a programming language because it was just a generator and it
723 * wouldn't need runtime type information and all that jazz. But then we
724 * decided to add constants, and all of a sudden that means runtime type
725 * validation and inference, except the "runtime" is the code generator
726 * runtime.
727 */
validate_const_rec(std::string name,t_type * type,t_const_value * value)728 void validate_const_rec(std::string name, t_type* type, t_const_value* value) {
729 if (type->is_void()) {
730 throw "type error: cannot declare a void const: " + name;
731 }
732
733 if (type->is_base_type()) {
734 t_base_type::t_base tbase = ((t_base_type*)type)->get_base();
735 switch (tbase) {
736 case t_base_type::TYPE_STRING:
737 if (value->get_type() != t_const_value::CV_STRING) {
738 throw "type error: const \"" + name + "\" was declared as string";
739 }
740 break;
741 case t_base_type::TYPE_UUID:
742 if (value->get_type() != t_const_value::CV_STRING) {
743 throw "type error: const \"" + name + "\" was declared as uuid";
744 }
745 value->set_uuid(value->get_uuid()); // validates constant
746 break;
747 case t_base_type::TYPE_BOOL:
748 if (value->get_type() != t_const_value::CV_INTEGER) {
749 throw "type error: const \"" + name + "\" was declared as bool";
750 }
751 break;
752 case t_base_type::TYPE_I8:
753 if (value->get_type() != t_const_value::CV_INTEGER) {
754 throw "type error: const \"" + name + "\" was declared as byte";
755 }
756 break;
757 case t_base_type::TYPE_I16:
758 if (value->get_type() != t_const_value::CV_INTEGER) {
759 throw "type error: const \"" + name + "\" was declared as i16";
760 }
761 break;
762 case t_base_type::TYPE_I32:
763 if (value->get_type() != t_const_value::CV_INTEGER) {
764 throw "type error: const \"" + name + "\" was declared as i32";
765 }
766 break;
767 case t_base_type::TYPE_I64:
768 if (value->get_type() != t_const_value::CV_INTEGER) {
769 throw "type error: const \"" + name + "\" was declared as i64";
770 }
771 break;
772 case t_base_type::TYPE_DOUBLE:
773 if (value->get_type() != t_const_value::CV_INTEGER
774 && value->get_type() != t_const_value::CV_DOUBLE) {
775 throw "type error: const \"" + name + "\" was declared as double";
776 }
777 break;
778 default:
779 throw "compiler error: no const of base type " + t_base_type::t_base_name(tbase) + name;
780 }
781 } else if (type->is_enum()) {
782 if (value->get_type() != t_const_value::CV_IDENTIFIER) {
783 throw "type error: const \"" + name + "\" was declared as enum";
784 }
785
786 // see if there's a dot in the identifier
787 std::string name_portion = value->get_identifier_name();
788
789 const vector<t_enum_value*>& enum_values = ((t_enum*)type)->get_constants();
790 vector<t_enum_value*>::const_iterator c_iter;
791 bool found = false;
792
793 for (c_iter = enum_values.begin(); c_iter != enum_values.end(); ++c_iter) {
794 if ((*c_iter)->get_name() == name_portion) {
795 found = true;
796 break;
797 }
798 }
799 if (!found) {
800 throw "type error: const " + name + " was declared as type " + type->get_name()
801 + " which is an enum, but " + value->get_identifier()
802 + " is not a valid value for that enum";
803 }
804 } else if (type->is_struct() || type->is_xception()) {
805 if (value->get_type() != t_const_value::CV_MAP) {
806 throw "type error: const \"" + name + "\" was declared as struct/xception";
807 }
808 const vector<t_field*>& fields = ((t_struct*)type)->get_members();
809 vector<t_field*>::const_iterator f_iter;
810
811 const map<t_const_value*, t_const_value*, t_const_value::value_compare>& val = value->get_map();
812 map<t_const_value*, t_const_value*, t_const_value::value_compare>::const_iterator v_iter;
813 for (v_iter = val.begin(); v_iter != val.end(); ++v_iter) {
814 if (v_iter->first->get_type() != t_const_value::CV_STRING) {
815 throw "type error: " + name + " struct key must be string";
816 }
817 t_type* field_type = nullptr;
818 for (f_iter = fields.begin(); f_iter != fields.end(); ++f_iter) {
819 if ((*f_iter)->get_name() == v_iter->first->get_string()) {
820 field_type = (*f_iter)->get_type();
821 }
822 }
823 if (field_type == nullptr) {
824 throw "type error: " + type->get_name() + " has no field " + v_iter->first->get_string();
825 }
826
827 validate_const_rec(name + "." + v_iter->first->get_string(), field_type, v_iter->second);
828 }
829 } else if (type->is_map()) {
830 t_type* k_type = ((t_map*)type)->get_key_type();
831 t_type* v_type = ((t_map*)type)->get_val_type();
832 const map<t_const_value*, t_const_value*, t_const_value::value_compare>& val = value->get_map();
833 map<t_const_value*, t_const_value*, t_const_value::value_compare>::const_iterator v_iter;
834 for (v_iter = val.begin(); v_iter != val.end(); ++v_iter) {
835 validate_const_rec(name + "<key>", k_type, v_iter->first);
836 validate_const_rec(name + "<val>", v_type, v_iter->second);
837 }
838 } else if (type->is_list() || type->is_set()) {
839 t_type* e_type;
840 if (type->is_list()) {
841 e_type = ((t_list*)type)->get_elem_type();
842 } else {
843 e_type = ((t_set*)type)->get_elem_type();
844 }
845 const vector<t_const_value*>& val = value->get_list();
846 vector<t_const_value*>::const_iterator v_iter;
847 for (v_iter = val.begin(); v_iter != val.end(); ++v_iter) {
848 validate_const_rec(name + "<elem>", e_type, *v_iter);
849 }
850 }
851 }
852
853 /**
854 * Check simple identifier names
855 * It's easier to do it this way instead of rewriting the whole grammar etc.
856 */
validate_simple_identifier(const char * identifier)857 void validate_simple_identifier(const char* identifier) {
858 string name(identifier);
859 if (name.find(".") != string::npos) {
860 yyerror("Identifier %s can't have a dot.", identifier);
861 exit(1);
862 }
863 }
864
865 /**
866 * Check the type of the parsed const information against its declared type
867 */
validate_const_type(t_const * c)868 void validate_const_type(t_const* c) {
869 validate_const_rec(c->get_name(), c->get_type(), c->get_value());
870 }
871
872 /**
873 * Check the type of a default value assigned to a field.
874 */
validate_field_value(t_field * field,t_const_value * cv)875 void validate_field_value(t_field* field, t_const_value* cv) {
876 validate_const_rec(field->get_name(), field->get_type(), cv);
877 }
878
879 /**
880 * Check that all the elements of a throws block are actually exceptions.
881 */
validate_throws(t_struct * throws)882 bool validate_throws(t_struct* throws) {
883 const vector<t_field*>& members = throws->get_members();
884 vector<t_field*>::const_iterator m_iter;
885 for (m_iter = members.begin(); m_iter != members.end(); ++m_iter) {
886 if (!t_generator::get_true_type((*m_iter)->get_type())->is_xception()) {
887 return false;
888 }
889 }
890 return true;
891 }
892
893 /**
894 * Skips UTF-8 BOM if there is one
895 */
skip_utf8_bom(FILE * f)896 bool skip_utf8_bom(FILE* f) {
897
898 // pretty straightforward, but works
899 if (fgetc(f) == 0xEF) {
900 if (fgetc(f) == 0xBB) {
901 if (fgetc(f) == 0xBF) {
902 return true;
903 }
904 }
905 }
906
907 rewind(f);
908 return false;
909 }
910
911 /**
912 * Parses a program
913 */
parse(t_program * program,t_program * parent_program)914 void parse(t_program* program, t_program* parent_program) {
915 // Get scope file path
916 string path = program->get_path();
917
918 // Set current dir global, which is used in the include_file function
919 g_curdir = directory_name(path);
920 g_curpath = path;
921
922 // Open the file
923 // skip UTF-8 BOM if there is one
924 yyin = fopen(path.c_str(), "r");
925 if (yyin == 0) {
926 failure("Could not open input file: \"%s\"", path.c_str());
927 }
928 if (skip_utf8_bom(yyin))
929 pverbose("Skipped UTF-8 BOM at %s\n", path.c_str());
930
931 // Create new scope and scan for includes
932 pverbose("Scanning %s for includes\n", path.c_str());
933 g_parse_mode = INCLUDES;
934 g_program = program;
935 g_scope = program->scope();
936 try {
937 yylineno = 1;
938 if (yyparse() != 0) {
939 failure("Parser error during include pass.");
940 }
941 } catch (string &x) {
942 failure(x.c_str());
943 }
944 fclose(yyin);
945
946 // Recursively parse all the include programs
947 vector<t_program*>& includes = program->get_includes();
948 vector<t_program*>::iterator iter;
949 for (iter = includes.begin(); iter != includes.end(); ++iter) {
950 parse(*iter, program);
951 }
952
953 // reset program doctext status before parsing a new file
954 reset_program_doctext_info();
955
956 // Parse the program file
957 g_parse_mode = PROGRAM;
958 g_program = program;
959 g_scope = program->scope();
960 g_parent_scope = (parent_program != nullptr) ? parent_program->scope() : nullptr;
961 g_parent_prefix = program->get_name() + ".";
962 g_curpath = path;
963
964 // Open the file
965 // skip UTF-8 BOM if there is one
966 yyin = fopen(path.c_str(), "r");
967 if (yyin == 0) {
968 failure("Could not open input file: \"%s\"", path.c_str());
969 }
970 if (skip_utf8_bom(yyin))
971 pverbose("Skipped UTF-8 BOM at %s\n", path.c_str());
972
973 pverbose("Parsing %s for types\n", path.c_str());
974 yylineno = 1;
975 try {
976 if (yyparse() != 0) {
977 failure("Parser error during types pass.");
978 }
979 } catch (string &x) {
980 failure(x.c_str());
981 }
982 fclose(yyin);
983 }
984
985 /**
986 * Generate code
987 */
generate(t_program * program,const vector<string> & generator_strings)988 void generate(t_program* program, const vector<string>& generator_strings) {
989 // Oooohh, recursive code generation, hot!!
990 if (gen_recurse) {
991 program->set_recursive(true);
992 const vector<t_program*>& includes = program->get_includes();
993 for (auto include : includes) {
994 // Propagate output path from parent to child programs
995 include->set_out_path(program->get_out_path(), program->is_out_path_absolute());
996
997 generate(include, generator_strings);
998 }
999 }
1000
1001 // Generate code!
1002 try {
1003 pverbose("Program: %s\n", program->get_path().c_str());
1004
1005 if (dump_docs) {
1006 dump_docstrings(program);
1007 }
1008
1009 // make sure all symbolic constants are properly resolved
1010 program->scope()->resolve_all_consts();
1011
1012 vector<string>::const_iterator iter;
1013 for (iter = generator_strings.begin(); iter != generator_strings.end(); ++iter) {
1014 t_generator* generator = t_generator_registry::get_generator(program, *iter);
1015
1016 if (generator == nullptr) {
1017 pwarning(1, "Unable to get a generator for \"%s\".\n", iter->c_str());
1018 g_generator_failure = true;
1019 } else if (generator) {
1020 generator->validate_input();
1021 pverbose("Generating \"%s\"\n", iter->c_str());
1022 generator->generate_program();
1023 delete generator;
1024 }
1025 }
1026 } catch (string &s) {
1027 failure("Error: %s\n", s.c_str());
1028 } catch (const char* exc) {
1029 failure("Error: %s\n", exc);
1030 } catch (const std::invalid_argument& invalid_argument_exception) {
1031 failure("Error: %s\n", invalid_argument_exception.what());
1032 }
1033 }
1034
audit(t_program * new_program,t_program * old_program,string new_thrift_include_path,string old_thrift_include_path)1035 void audit(t_program* new_program,
1036 t_program* old_program,
1037 string new_thrift_include_path,
1038 string old_thrift_include_path) {
1039 vector<string> temp_incl_searchpath = g_incl_searchpath;
1040 if (!old_thrift_include_path.empty()) {
1041 g_incl_searchpath.push_back(old_thrift_include_path);
1042 }
1043
1044 parse(old_program, nullptr);
1045
1046 g_incl_searchpath = temp_incl_searchpath;
1047 if (!new_thrift_include_path.empty()) {
1048 g_incl_searchpath.push_back(new_thrift_include_path);
1049 }
1050
1051 parse(new_program, nullptr);
1052
1053 compare_namespace(new_program, old_program);
1054 compare_services(new_program->get_services(), old_program->get_services());
1055 compare_enums(new_program->get_enums(), old_program->get_enums());
1056 compare_structs(new_program->get_structs(), old_program->get_structs());
1057 compare_structs(new_program->get_xceptions(), old_program->get_xceptions());
1058 compare_consts(new_program->get_consts(), old_program->get_consts());
1059 }
1060
1061 /**
1062 * Parse it up.. then spit it back out, in pretty much every language. Alright
1063 * not that many languages, but the cool ones that we care about.
1064 */
main(int argc,char ** argv)1065 int main(int argc, char** argv) {
1066 int i;
1067 std::string out_path;
1068 bool out_path_is_absolute = false;
1069
1070 // Setup time string
1071 time_t now = time(nullptr);
1072 g_time_str = ctime(&now);
1073
1074 // Check for necessary arguments, you gotta have at least a filename and
1075 // an output language flag
1076 if (argc < 2) {
1077 usage();
1078 }
1079
1080 vector<string> generator_strings;
1081 string old_thrift_include_path;
1082 string new_thrift_include_path;
1083 string old_input_file;
1084
1085 // Set the current path to a dummy value to make warning messages clearer.
1086 g_curpath = "arguments";
1087
1088 // Hacky parameter handling... I didn't feel like using a library sorry!
1089 for (i = 1; i < argc - 1; i++) {
1090 char* arg;
1091
1092 arg = strtok(argv[i], " ");
1093 while (arg != nullptr) {
1094 // Treat double dashes as single dashes
1095 if (arg[0] == '-' && arg[1] == '-') {
1096 ++arg;
1097 }
1098
1099 if (strcmp(arg, "-help") == 0) {
1100 help();
1101 } else if (strcmp(arg, "-version") == 0) {
1102 version();
1103 exit(0);
1104 } else if (strcmp(arg, "-debug") == 0) {
1105 g_debug = 1;
1106 } else if (strcmp(arg, "-nowarn") == 0) {
1107 g_warn = 0;
1108 } else if (strcmp(arg, "-strict") == 0) {
1109 g_strict = 255;
1110 g_warn = 2;
1111 } else if (strcmp(arg, "-v") == 0 || strcmp(arg, "-verbose") == 0) {
1112 g_verbose = 1;
1113 } else if (strcmp(arg, "-r") == 0 || strcmp(arg, "-recurse") == 0) {
1114 gen_recurse = true;
1115 } else if (strcmp(arg, "-allow-neg-keys") == 0) {
1116 g_allow_neg_field_keys = true;
1117 } else if (strcmp(arg, "-allow-64bit-consts") == 0) {
1118 g_allow_64bit_consts = true;
1119 } else if (strcmp(arg, "-gen") == 0) {
1120 arg = argv[++i];
1121 if (arg == nullptr) {
1122 fprintf(stderr, "Missing generator specification\n");
1123 usage();
1124 }
1125 generator_strings.emplace_back(arg);
1126 } else if (strcmp(arg, "-I") == 0) {
1127 // An argument of "-I\ asdf" is invalid and has unknown results
1128 arg = argv[++i];
1129
1130 if (arg == nullptr) {
1131 fprintf(stderr, "Missing Include directory\n");
1132 usage();
1133 }
1134 g_incl_searchpath.emplace_back(arg);
1135 } else if ((strcmp(arg, "-o") == 0) || (strcmp(arg, "-out") == 0)) {
1136 out_path_is_absolute = (strcmp(arg, "-out") == 0) ? true : false;
1137 arg = argv[++i];
1138 if (arg == nullptr) {
1139 fprintf(stderr, "-o: missing output directory\n");
1140 usage();
1141 }
1142 out_path = arg;
1143
1144 #ifdef _WIN32
1145 // strip out trailing \ on Windows
1146 std::string::size_type last = out_path.length() - 1;
1147 if (out_path[last] == '\\') {
1148 out_path.erase(last);
1149 }
1150 #endif
1151 if (!check_is_directory(out_path.c_str()))
1152 return -1;
1153 } else if (strcmp(arg, "-audit") == 0) {
1154 g_audit = true;
1155 arg = argv[++i];
1156 if (arg == nullptr) {
1157 fprintf(stderr, "Missing old thrift file name for audit operation\n");
1158 usage();
1159 }
1160 char old_thrift_file_rp[THRIFT_PATH_MAX];
1161
1162 // cppcheck-suppress uninitvar
1163 if (saferealpath(arg, old_thrift_file_rp) == nullptr) {
1164 failure("Could not open input file with realpath: %s", arg);
1165 }
1166 old_input_file = string(old_thrift_file_rp);
1167 } else if (strcmp(arg, "-audit-nofatal") == 0) {
1168 g_audit_fatal = false;
1169 } else if (strcmp(arg, "-Iold") == 0) {
1170 arg = argv[++i];
1171 if (arg == nullptr) {
1172 fprintf(stderr, "Missing Include directory for old thrift file\n");
1173 usage();
1174 }
1175 old_thrift_include_path = string(arg);
1176 } else if (strcmp(arg, "-Inew") == 0) {
1177 arg = argv[++i];
1178 if (arg == nullptr) {
1179 fprintf(stderr, "Missing Include directory for new thrift file\n");
1180 usage();
1181 }
1182 new_thrift_include_path = string(arg);
1183 } else {
1184 fprintf(stderr, "Unrecognized option: %s\n", arg);
1185 usage();
1186 }
1187
1188 // Tokenize more
1189 arg = strtok(nullptr, " ");
1190 }
1191 }
1192
1193 // display help
1194 if ((strcmp(argv[argc - 1], "-help") == 0) || (strcmp(argv[argc - 1], "--help") == 0)) {
1195 help();
1196 }
1197
1198 // if you're asking for version, you have a right not to pass a file
1199 if ((strcmp(argv[argc - 1], "-version") == 0) || (strcmp(argv[argc - 1], "--version") == 0)) {
1200 version();
1201 exit(0);
1202 }
1203
1204 // Initialize global types
1205 initGlobals();
1206
1207 if (g_audit) {
1208 // Audit operation
1209
1210 if (old_input_file.empty()) {
1211 fprintf(stderr, "Missing file name of old thrift file for audit\n");
1212 usage();
1213 }
1214
1215 char new_thrift_file_rp[THRIFT_PATH_MAX];
1216 if (argv[i] == nullptr) {
1217 fprintf(stderr, "Missing file name of new thrift file for audit\n");
1218 usage();
1219 }
1220 // cppcheck-suppress uninitvar
1221 if (saferealpath(argv[i], new_thrift_file_rp) == nullptr) {
1222 failure("Could not open input file with realpath: %s", argv[i]);
1223 }
1224 string new_input_file(new_thrift_file_rp);
1225
1226 t_program new_program(new_input_file);
1227 t_program old_program(old_input_file);
1228
1229 audit(&new_program, &old_program, new_thrift_include_path, old_thrift_include_path);
1230
1231 } else {
1232 // Generate options
1233
1234 // You gotta generate something!
1235 if (generator_strings.empty()) {
1236 fprintf(stderr, "No output language(s) specified\n");
1237 usage();
1238 }
1239
1240 // Real-pathify it
1241 char rp[THRIFT_PATH_MAX];
1242 if (argv[i] == nullptr) {
1243 fprintf(stderr, "Missing file name\n");
1244 usage();
1245 }
1246 // cppcheck-suppress uninitvar
1247 if (saferealpath(argv[i], rp) == nullptr) {
1248 failure("Could not open input file with realpath: %s", argv[i]);
1249 }
1250 string input_file(rp);
1251
1252 // Instance of the global parse tree
1253 t_program* program = new t_program(input_file);
1254 if (out_path.size()) {
1255 program->set_out_path(out_path, out_path_is_absolute);
1256 }
1257
1258 // Compute the cpp include prefix.
1259 // infer this from the filename passed in
1260 string input_filename = argv[i];
1261 string include_prefix;
1262
1263 string::size_type last_slash = string::npos;
1264 if ((last_slash = input_filename.rfind("/")) != string::npos) {
1265 include_prefix = input_filename.substr(0, last_slash);
1266 }
1267
1268 program->set_include_prefix(include_prefix);
1269
1270 // Parse it!
1271 parse(program, nullptr);
1272
1273 // The current path is not really relevant when we are doing generation.
1274 // Reset the variable to make warning messages clearer.
1275 g_curpath = "generation";
1276 // Reset yylineno for the heck of it. Use 1 instead of 0 because
1277 // That is what shows up during argument parsing.
1278 yylineno = 1;
1279
1280 // Generate it!
1281 generate(program, generator_strings);
1282 delete program;
1283 }
1284
1285 // Clean up. Who am I kidding... this program probably orphans heap memory
1286 // all over the place, but who cares because it is about to exit and it is
1287 // all referenced and used by this wacky parse tree up until now anyways.
1288 clearGlobals();
1289
1290 // Finished
1291 if (g_return_failure && g_audit_fatal) {
1292 exit(2);
1293 }
1294 if (g_generator_failure) {
1295 exit(3);
1296 }
1297 // Finished
1298 return 0;
1299 }
1300