1#!/usr/bin/env python3 2# 3# Copyright The Mbed TLS Contributors 4# SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later 5 6""" 7This script confirms that the naming of all symbols and identifiers in Mbed TLS 8are consistent with the house style and are also self-consistent. It only runs 9on Linux and macOS since it depends on nm. 10 11It contains two major Python classes, CodeParser and NameChecker. They both have 12a comprehensive "run-all" function (comprehensive_parse() and perform_checks()) 13but the individual functions can also be used for specific needs. 14 15CodeParser makes heavy use of regular expressions to parse the code, and is 16dependent on the current code formatting. Many Python C parser libraries require 17preprocessed C code, which means no macro parsing. Compiler tools are also not 18very helpful when we want the exact location in the original source (which 19becomes impossible when e.g. comments are stripped). 20 21NameChecker performs the following checks: 22 23- All exported and available symbols in the library object files, are explicitly 24 declared in the header files. This uses the nm command. 25- All macros, constants, and identifiers (function names, struct names, etc) 26 follow the required regex pattern. 27- Typo checking: All words that begin with MBED|PSA exist as macros or constants. 28 29The script returns 0 on success, 1 on test failure, and 2 if there is a script 30error. It must be run from Mbed TLS root. 31""" 32 33import abc 34import argparse 35import fnmatch 36import glob 37import textwrap 38import os 39import sys 40import traceback 41import re 42import enum 43import shutil 44import subprocess 45import logging 46 47import scripts_path # pylint: disable=unused-import 48from mbedtls_dev import build_tree 49 50 51# Naming patterns to check against. These are defined outside the NameCheck 52# class for ease of modification. 53PUBLIC_MACRO_PATTERN = r"^(MBEDTLS|PSA)_[0-9A-Z_]*[0-9A-Z]$" 54INTERNAL_MACRO_PATTERN = r"^[0-9A-Za-z_]*[0-9A-Z]$" 55CONSTANTS_PATTERN = PUBLIC_MACRO_PATTERN 56IDENTIFIER_PATTERN = r"^(mbedtls|psa)_[0-9a-z_]*[0-9a-z]$" 57 58class Match(): # pylint: disable=too-few-public-methods 59 """ 60 A class representing a match, together with its found position. 61 62 Fields: 63 * filename: the file that the match was in. 64 * line: the full line containing the match. 65 * line_no: the line number. 66 * pos: a tuple of (start, end) positions on the line where the match is. 67 * name: the match itself. 68 """ 69 def __init__(self, filename, line, line_no, pos, name): 70 # pylint: disable=too-many-arguments 71 self.filename = filename 72 self.line = line 73 self.line_no = line_no 74 self.pos = pos 75 self.name = name 76 77 def __str__(self): 78 """ 79 Return a formatted code listing representation of the erroneous line. 80 """ 81 gutter = format(self.line_no, "4d") 82 underline = self.pos[0] * " " + (self.pos[1] - self.pos[0]) * "^" 83 84 return ( 85 " {0} |\n".format(" " * len(gutter)) + 86 " {0} | {1}".format(gutter, self.line) + 87 " {0} | {1}\n".format(" " * len(gutter), underline) 88 ) 89 90class Problem(abc.ABC): # pylint: disable=too-few-public-methods 91 """ 92 An abstract parent class representing a form of static analysis error. 93 It extends an Abstract Base Class, which means it is not instantiable, and 94 it also mandates certain abstract methods to be implemented in subclasses. 95 """ 96 # Class variable to control the quietness of all problems 97 quiet = False 98 def __init__(self): 99 self.textwrapper = textwrap.TextWrapper() 100 self.textwrapper.width = 80 101 self.textwrapper.initial_indent = " > " 102 self.textwrapper.subsequent_indent = " " 103 104 def __str__(self): 105 """ 106 Unified string representation method for all Problems. 107 """ 108 if self.__class__.quiet: 109 return self.quiet_output() 110 return self.verbose_output() 111 112 @abc.abstractmethod 113 def quiet_output(self): 114 """ 115 The output when --quiet is enabled. 116 """ 117 pass 118 119 @abc.abstractmethod 120 def verbose_output(self): 121 """ 122 The default output with explanation and code snippet if appropriate. 123 """ 124 pass 125 126class SymbolNotInHeader(Problem): # pylint: disable=too-few-public-methods 127 """ 128 A problem that occurs when an exported/available symbol in the object file 129 is not explicitly declared in header files. Created with 130 NameCheck.check_symbols_declared_in_header() 131 132 Fields: 133 * symbol_name: the name of the symbol. 134 """ 135 def __init__(self, symbol_name): 136 self.symbol_name = symbol_name 137 Problem.__init__(self) 138 139 def quiet_output(self): 140 return "{0}".format(self.symbol_name) 141 142 def verbose_output(self): 143 return self.textwrapper.fill( 144 "'{0}' was found as an available symbol in the output of nm, " 145 "however it was not declared in any header files." 146 .format(self.symbol_name)) 147 148class PatternMismatch(Problem): # pylint: disable=too-few-public-methods 149 """ 150 A problem that occurs when something doesn't match the expected pattern. 151 Created with NameCheck.check_match_pattern() 152 153 Fields: 154 * pattern: the expected regex pattern 155 * match: the Match object in question 156 """ 157 def __init__(self, pattern, match): 158 self.pattern = pattern 159 self.match = match 160 Problem.__init__(self) 161 162 163 def quiet_output(self): 164 return ( 165 "{0}:{1}:{2}" 166 .format(self.match.filename, self.match.line_no, self.match.name) 167 ) 168 169 def verbose_output(self): 170 return self.textwrapper.fill( 171 "{0}:{1}: '{2}' does not match the required pattern '{3}'." 172 .format( 173 self.match.filename, 174 self.match.line_no, 175 self.match.name, 176 self.pattern 177 ) 178 ) + "\n" + str(self.match) 179 180class Typo(Problem): # pylint: disable=too-few-public-methods 181 """ 182 A problem that occurs when a word using MBED or PSA doesn't 183 appear to be defined as constants nor enum values. Created with 184 NameCheck.check_for_typos() 185 186 Fields: 187 * match: the Match object of the MBED|PSA name in question. 188 """ 189 def __init__(self, match): 190 self.match = match 191 Problem.__init__(self) 192 193 def quiet_output(self): 194 return ( 195 "{0}:{1}:{2}" 196 .format(self.match.filename, self.match.line_no, self.match.name) 197 ) 198 199 def verbose_output(self): 200 return self.textwrapper.fill( 201 "{0}:{1}: '{2}' looks like a typo. It was not found in any " 202 "macros or any enums. If this is not a typo, put " 203 "//no-check-names after it." 204 .format(self.match.filename, self.match.line_no, self.match.name) 205 ) + "\n" + str(self.match) 206 207class CodeParser(): 208 """ 209 Class for retrieving files and parsing the code. This can be used 210 independently of the checks that NameChecker performs, for example for 211 list_internal_identifiers.py. 212 """ 213 def __init__(self, log): 214 self.log = log 215 build_tree.check_repo_path() 216 217 # Memo for storing "glob expression": set(filepaths) 218 self.files = {} 219 220 # Globally excluded filenames. 221 # Note that "*" can match directory separators in exclude lists. 222 self.excluded_files = ["*/bn_mul", "*/compat-2.x.h"] 223 224 def comprehensive_parse(self): 225 """ 226 Comprehensive ("default") function to call each parsing function and 227 retrieve various elements of the code, together with the source location. 228 229 Returns a dict of parsed item key to the corresponding List of Matches. 230 """ 231 self.log.info("Parsing source code...") 232 self.log.debug( 233 "The following files are excluded from the search: {}" 234 .format(str(self.excluded_files)) 235 ) 236 237 all_macros = {"public": [], "internal": [], "private":[]} 238 all_macros["public"] = self.parse_macros([ 239 "include/mbedtls/*.h", 240 "include/psa/*.h", 241 "3rdparty/everest/include/everest/everest.h", 242 "3rdparty/everest/include/everest/x25519.h" 243 ]) 244 all_macros["internal"] = self.parse_macros([ 245 "library/*.h", 246 "tests/include/test/drivers/*.h", 247 ]) 248 all_macros["private"] = self.parse_macros([ 249 "library/*.c", 250 ]) 251 enum_consts = self.parse_enum_consts([ 252 "include/mbedtls/*.h", 253 "include/psa/*.h", 254 "library/*.h", 255 "library/*.c", 256 "3rdparty/everest/include/everest/everest.h", 257 "3rdparty/everest/include/everest/x25519.h" 258 ]) 259 identifiers, excluded_identifiers = self.parse_identifiers([ 260 "include/mbedtls/*.h", 261 "include/psa/*.h", 262 "library/*.h", 263 "3rdparty/everest/include/everest/everest.h", 264 "3rdparty/everest/include/everest/x25519.h" 265 ], ["3rdparty/p256-m/p256-m/p256-m.h"]) 266 mbed_psa_words = self.parse_mbed_psa_words([ 267 "include/mbedtls/*.h", 268 "include/psa/*.h", 269 "library/*.h", 270 "3rdparty/everest/include/everest/everest.h", 271 "3rdparty/everest/include/everest/x25519.h", 272 "library/*.c", 273 "3rdparty/everest/library/everest.c", 274 "3rdparty/everest/library/x25519.c" 275 ], ["library/psa_crypto_driver_wrappers.h"]) 276 symbols = self.parse_symbols() 277 278 # Remove identifier macros like mbedtls_printf or mbedtls_calloc 279 identifiers_justname = [x.name for x in identifiers] 280 actual_macros = {"public": [], "internal": []} 281 for scope in actual_macros: 282 for macro in all_macros[scope]: 283 if macro.name not in identifiers_justname: 284 actual_macros[scope].append(macro) 285 286 self.log.debug("Found:") 287 # Aligns the counts on the assumption that none exceeds 4 digits 288 for scope in actual_macros: 289 self.log.debug(" {:4} Total {} Macros" 290 .format(len(all_macros[scope]), scope)) 291 self.log.debug(" {:4} {} Non-identifier Macros" 292 .format(len(actual_macros[scope]), scope)) 293 self.log.debug(" {:4} Enum Constants".format(len(enum_consts))) 294 self.log.debug(" {:4} Identifiers".format(len(identifiers))) 295 self.log.debug(" {:4} Exported Symbols".format(len(symbols))) 296 return { 297 "public_macros": actual_macros["public"], 298 "internal_macros": actual_macros["internal"], 299 "private_macros": all_macros["private"], 300 "enum_consts": enum_consts, 301 "identifiers": identifiers, 302 "excluded_identifiers": excluded_identifiers, 303 "symbols": symbols, 304 "mbed_psa_words": mbed_psa_words 305 } 306 307 def is_file_excluded(self, path, exclude_wildcards): 308 """Whether the given file path is excluded.""" 309 # exclude_wildcards may be None. Also, consider the global exclusions. 310 exclude_wildcards = (exclude_wildcards or []) + self.excluded_files 311 for pattern in exclude_wildcards: 312 if fnmatch.fnmatch(path, pattern): 313 return True 314 return False 315 316 def get_all_files(self, include_wildcards, exclude_wildcards): 317 """ 318 Get all files that match any of the included UNIX-style wildcards 319 and filter them into included and excluded lists. 320 While the check_names script is designed only for use on UNIX/macOS 321 (due to nm), this function alone will work fine on Windows even with 322 forward slashes in the wildcard. 323 324 Args: 325 * include_wildcards: a List of shell-style wildcards to match filepaths. 326 * exclude_wildcards: a List of shell-style wildcards to exclude. 327 328 Returns: 329 * inc_files: A List of relative filepaths for included files. 330 * exc_files: A List of relative filepaths for excluded files. 331 """ 332 accumulator = set() 333 all_wildcards = include_wildcards + (exclude_wildcards or []) 334 for wildcard in all_wildcards: 335 accumulator = accumulator.union(glob.iglob(wildcard)) 336 337 inc_files = [] 338 exc_files = [] 339 for path in accumulator: 340 if self.is_file_excluded(path, exclude_wildcards): 341 exc_files.append(path) 342 else: 343 inc_files.append(path) 344 return (inc_files, exc_files) 345 346 def get_included_files(self, include_wildcards, exclude_wildcards): 347 """ 348 Get all files that match any of the included UNIX-style wildcards. 349 While the check_names script is designed only for use on UNIX/macOS 350 (due to nm), this function alone will work fine on Windows even with 351 forward slashes in the wildcard. 352 353 Args: 354 * include_wildcards: a List of shell-style wildcards to match filepaths. 355 * exclude_wildcards: a List of shell-style wildcards to exclude. 356 357 Returns a List of relative filepaths. 358 """ 359 accumulator = set() 360 361 for include_wildcard in include_wildcards: 362 accumulator = accumulator.union(glob.iglob(include_wildcard)) 363 364 return list(path for path in accumulator 365 if not self.is_file_excluded(path, exclude_wildcards)) 366 367 def parse_macros(self, include, exclude=None): 368 """ 369 Parse all macros defined by #define preprocessor directives. 370 371 Args: 372 * include: A List of glob expressions to look for files through. 373 * exclude: A List of glob expressions for excluding files. 374 375 Returns a List of Match objects for the found macros. 376 """ 377 macro_regex = re.compile(r"# *define +(?P<macro>\w+)") 378 exclusions = ( 379 "asm", "inline", "EMIT", "_CRT_SECURE_NO_DEPRECATE", "MULADDC_" 380 ) 381 382 files = self.get_included_files(include, exclude) 383 self.log.debug("Looking for macros in {} files".format(len(files))) 384 385 macros = [] 386 for header_file in files: 387 with open(header_file, "r", encoding="utf-8") as header: 388 for line_no, line in enumerate(header): 389 for macro in macro_regex.finditer(line): 390 if macro.group("macro").startswith(exclusions): 391 continue 392 393 macros.append(Match( 394 header_file, 395 line, 396 line_no, 397 macro.span("macro"), 398 macro.group("macro"))) 399 400 return macros 401 402 def parse_mbed_psa_words(self, include, exclude=None): 403 """ 404 Parse all words in the file that begin with MBED|PSA, in and out of 405 macros, comments, anything. 406 407 Args: 408 * include: A List of glob expressions to look for files through. 409 * exclude: A List of glob expressions for excluding files. 410 411 Returns a List of Match objects for words beginning with MBED|PSA. 412 """ 413 # Typos of TLS are common, hence the broader check below than MBEDTLS. 414 mbed_regex = re.compile(r"\b(MBED.+?|PSA)_[A-Z0-9_]*") 415 exclusions = re.compile(r"// *no-check-names|#error") 416 417 files = self.get_included_files(include, exclude) 418 self.log.debug( 419 "Looking for MBED|PSA words in {} files" 420 .format(len(files)) 421 ) 422 423 mbed_psa_words = [] 424 for filename in files: 425 with open(filename, "r", encoding="utf-8") as fp: 426 for line_no, line in enumerate(fp): 427 if exclusions.search(line): 428 continue 429 430 for name in mbed_regex.finditer(line): 431 mbed_psa_words.append(Match( 432 filename, 433 line, 434 line_no, 435 name.span(0), 436 name.group(0))) 437 438 return mbed_psa_words 439 440 def parse_enum_consts(self, include, exclude=None): 441 """ 442 Parse all enum value constants that are declared. 443 444 Args: 445 * include: A List of glob expressions to look for files through. 446 * exclude: A List of glob expressions for excluding files. 447 448 Returns a List of Match objects for the findings. 449 """ 450 files = self.get_included_files(include, exclude) 451 self.log.debug("Looking for enum consts in {} files".format(len(files))) 452 453 # Emulate a finite state machine to parse enum declarations. 454 # OUTSIDE_KEYWORD = outside the enum keyword 455 # IN_BRACES = inside enum opening braces 456 # IN_BETWEEN = between enum keyword and opening braces 457 states = enum.Enum("FSM", ["OUTSIDE_KEYWORD", "IN_BRACES", "IN_BETWEEN"]) 458 enum_consts = [] 459 for header_file in files: 460 state = states.OUTSIDE_KEYWORD 461 with open(header_file, "r", encoding="utf-8") as header: 462 for line_no, line in enumerate(header): 463 # Match typedefs and brackets only when they are at the 464 # beginning of the line -- if they are indented, they might 465 # be sub-structures within structs, etc. 466 optional_c_identifier = r"([_a-zA-Z][_a-zA-Z0-9]*)?" 467 if (state == states.OUTSIDE_KEYWORD and 468 re.search(r"^(typedef +)?enum " + \ 469 optional_c_identifier + \ 470 r" *{", line)): 471 state = states.IN_BRACES 472 elif (state == states.OUTSIDE_KEYWORD and 473 re.search(r"^(typedef +)?enum", line)): 474 state = states.IN_BETWEEN 475 elif (state == states.IN_BETWEEN and 476 re.search(r"^{", line)): 477 state = states.IN_BRACES 478 elif (state == states.IN_BRACES and 479 re.search(r"^}", line)): 480 state = states.OUTSIDE_KEYWORD 481 elif (state == states.IN_BRACES and 482 not re.search(r"^ *#", line)): 483 enum_const = re.search(r"^ *(?P<enum_const>\w+)", line) 484 if not enum_const: 485 continue 486 487 enum_consts.append(Match( 488 header_file, 489 line, 490 line_no, 491 enum_const.span("enum_const"), 492 enum_const.group("enum_const"))) 493 494 return enum_consts 495 496 IGNORED_CHUNK_REGEX = re.compile('|'.join([ 497 r'/\*.*?\*/', # block comment entirely on one line 498 r'//.*', # line comment 499 r'(?P<string>")(?:[^\\\"]|\\.)*"', # string literal 500 ])) 501 502 def strip_comments_and_literals(self, line, in_block_comment): 503 """Strip comments and string literals from line. 504 505 Continuation lines are not supported. 506 507 If in_block_comment is true, assume that the line starts inside a 508 block comment. 509 510 Return updated values of (line, in_block_comment) where: 511 * Comments in line have been replaced by a space (or nothing at the 512 start or end of the line). 513 * String contents have been removed. 514 * in_block_comment indicates whether the line ends inside a block 515 comment that continues on the next line. 516 """ 517 518 # Terminate current multiline comment? 519 if in_block_comment: 520 m = re.search(r"\*/", line) 521 if m: 522 in_block_comment = False 523 line = line[m.end(0):] 524 else: 525 return '', True 526 527 # Remove full comments and string literals. 528 # Do it all together to handle cases like "/*" correctly. 529 # Note that continuation lines are not supported. 530 line = re.sub(self.IGNORED_CHUNK_REGEX, 531 lambda s: '""' if s.group('string') else ' ', 532 line) 533 534 # Start an unfinished comment? 535 # (If `/*` was part of a complete comment, it's already been removed.) 536 m = re.search(r"/\*", line) 537 if m: 538 in_block_comment = True 539 line = line[:m.start(0)] 540 541 return line, in_block_comment 542 543 IDENTIFIER_REGEX = re.compile('|'.join([ 544 # Match " something(a" or " *something(a". Functions. 545 # Assumptions: 546 # - function definition from return type to one of its arguments is 547 # all on one line 548 # - function definition line only contains alphanumeric, asterisk, 549 # underscore, and open bracket 550 r".* \**(\w+) *\( *\w", 551 # Match "(*something)(". 552 r".*\( *\* *(\w+) *\) *\(", 553 # Match names of named data structures. 554 r"(?:typedef +)?(?:struct|union|enum) +(\w+)(?: *{)?$", 555 # Match names of typedef instances, after closing bracket. 556 r"}? *(\w+)[;[].*", 557 ])) 558 # The regex below is indented for clarity. 559 EXCLUSION_LINES = re.compile("|".join([ 560 r"extern +\"C\"", 561 r"(typedef +)?(struct|union|enum)( *{)?$", 562 r"} *;?$", 563 r"$", 564 r"//", 565 r"#", 566 ])) 567 568 def parse_identifiers_in_file(self, header_file, identifiers): 569 """ 570 Parse all lines of a header where a function/enum/struct/union/typedef 571 identifier is declared, based on some regex and heuristics. Highly 572 dependent on formatting style. 573 574 Append found matches to the list ``identifiers``. 575 """ 576 577 with open(header_file, "r", encoding="utf-8") as header: 578 in_block_comment = False 579 # The previous line variable is used for concatenating lines 580 # when identifiers are formatted and spread across multiple 581 # lines. 582 previous_line = "" 583 584 for line_no, line in enumerate(header): 585 line, in_block_comment = \ 586 self.strip_comments_and_literals(line, in_block_comment) 587 588 if self.EXCLUSION_LINES.match(line): 589 previous_line = "" 590 continue 591 592 # If the line contains only space-separated alphanumeric 593 # characters (or underscore, asterisk, or open parenthesis), 594 # and nothing else, high chance it's a declaration that 595 # continues on the next line 596 if re.search(r"^([\w\*\(]+\s+)+$", line): 597 previous_line += line 598 continue 599 600 # If previous line seemed to start an unfinished declaration 601 # (as above), concat and treat them as one. 602 if previous_line: 603 line = previous_line.strip() + " " + line.strip() + "\n" 604 previous_line = "" 605 606 # Skip parsing if line has a space in front = heuristic to 607 # skip function argument lines (highly subject to formatting 608 # changes) 609 if line[0] == " ": 610 continue 611 612 identifier = self.IDENTIFIER_REGEX.search(line) 613 614 if not identifier: 615 continue 616 617 # Find the group that matched, and append it 618 for group in identifier.groups(): 619 if not group: 620 continue 621 622 identifiers.append(Match( 623 header_file, 624 line, 625 line_no, 626 identifier.span(), 627 group)) 628 629 def parse_identifiers(self, include, exclude=None): 630 """ 631 Parse all lines of a header where a function/enum/struct/union/typedef 632 identifier is declared, based on some regex and heuristics. Highly 633 dependent on formatting style. Identifiers in excluded files are still 634 parsed 635 636 Args: 637 * include: A List of glob expressions to look for files through. 638 * exclude: A List of glob expressions for excluding files. 639 640 Returns: a Tuple of two Lists of Match objects with identifiers. 641 * included_identifiers: A List of Match objects with identifiers from 642 included files. 643 * excluded_identifiers: A List of Match objects with identifiers from 644 excluded files. 645 """ 646 647 included_files, excluded_files = \ 648 self.get_all_files(include, exclude) 649 650 self.log.debug("Looking for included identifiers in {} files".format \ 651 (len(included_files))) 652 653 included_identifiers = [] 654 excluded_identifiers = [] 655 for header_file in included_files: 656 self.parse_identifiers_in_file(header_file, included_identifiers) 657 for header_file in excluded_files: 658 self.parse_identifiers_in_file(header_file, excluded_identifiers) 659 660 return (included_identifiers, excluded_identifiers) 661 662 def parse_symbols(self): 663 """ 664 Compile the Mbed TLS libraries, and parse the TLS, Crypto, and x509 665 object files using nm to retrieve the list of referenced symbols. 666 Exceptions thrown here are rethrown because they would be critical 667 errors that void several tests, and thus needs to halt the program. This 668 is explicitly done for clarity. 669 670 Returns a List of unique symbols defined and used in the libraries. 671 """ 672 self.log.info("Compiling...") 673 symbols = [] 674 675 # Back up the config and atomically compile with the full configuration. 676 shutil.copy( 677 "include/mbedtls/mbedtls_config.h", 678 "include/mbedtls/mbedtls_config.h.bak" 679 ) 680 try: 681 # Use check=True in all subprocess calls so that failures are raised 682 # as exceptions and logged. 683 subprocess.run( 684 ["python3", "scripts/config.py", "full"], 685 universal_newlines=True, 686 check=True 687 ) 688 my_environment = os.environ.copy() 689 my_environment["CFLAGS"] = "-fno-asynchronous-unwind-tables" 690 # Run make clean separately to lib to prevent unwanted behavior when 691 # make is invoked with parallelism. 692 subprocess.run( 693 ["make", "clean"], 694 universal_newlines=True, 695 check=True 696 ) 697 subprocess.run( 698 ["make", "lib"], 699 env=my_environment, 700 universal_newlines=True, 701 stdout=subprocess.PIPE, 702 stderr=subprocess.STDOUT, 703 check=True 704 ) 705 706 # Perform object file analysis using nm 707 symbols = self.parse_symbols_from_nm([ 708 "library/libmbedcrypto.a", 709 "library/libmbedtls.a", 710 "library/libmbedx509.a" 711 ]) 712 713 subprocess.run( 714 ["make", "clean"], 715 universal_newlines=True, 716 check=True 717 ) 718 except subprocess.CalledProcessError as error: 719 self.log.debug(error.output) 720 raise error 721 finally: 722 # Put back the original config regardless of there being errors. 723 # Works also for keyboard interrupts. 724 shutil.move( 725 "include/mbedtls/mbedtls_config.h.bak", 726 "include/mbedtls/mbedtls_config.h" 727 ) 728 729 return symbols 730 731 def parse_symbols_from_nm(self, object_files): 732 """ 733 Run nm to retrieve the list of referenced symbols in each object file. 734 Does not return the position data since it is of no use. 735 736 Args: 737 * object_files: a List of compiled object filepaths to search through. 738 739 Returns a List of unique symbols defined and used in any of the object 740 files. 741 """ 742 nm_undefined_regex = re.compile(r"^\S+: +U |^$|^\S+:$") 743 nm_valid_regex = re.compile(r"^\S+( [0-9A-Fa-f]+)* . _*(?P<symbol>\w+)") 744 exclusions = ("FStar", "Hacl") 745 746 symbols = [] 747 748 # Gather all outputs of nm 749 nm_output = "" 750 for lib in object_files: 751 nm_output += subprocess.run( 752 ["nm", "-og", lib], 753 universal_newlines=True, 754 stdout=subprocess.PIPE, 755 stderr=subprocess.STDOUT, 756 check=True 757 ).stdout 758 759 for line in nm_output.splitlines(): 760 if not nm_undefined_regex.search(line): 761 symbol = nm_valid_regex.search(line) 762 if (symbol and not symbol.group("symbol").startswith(exclusions)): 763 symbols.append(symbol.group("symbol")) 764 else: 765 self.log.error(line) 766 767 return symbols 768 769class NameChecker(): 770 """ 771 Representation of the core name checking operation performed by this script. 772 """ 773 def __init__(self, parse_result, log): 774 self.parse_result = parse_result 775 self.log = log 776 777 def perform_checks(self, quiet=False): 778 """ 779 A comprehensive checker that performs each check in order, and outputs 780 a final verdict. 781 782 Args: 783 * quiet: whether to hide detailed problem explanation. 784 """ 785 self.log.info("=============") 786 Problem.quiet = quiet 787 problems = 0 788 problems += self.check_symbols_declared_in_header() 789 790 pattern_checks = [ 791 ("public_macros", PUBLIC_MACRO_PATTERN), 792 ("internal_macros", INTERNAL_MACRO_PATTERN), 793 ("enum_consts", CONSTANTS_PATTERN), 794 ("identifiers", IDENTIFIER_PATTERN) 795 ] 796 for group, check_pattern in pattern_checks: 797 problems += self.check_match_pattern(group, check_pattern) 798 799 problems += self.check_for_typos() 800 801 self.log.info("=============") 802 if problems > 0: 803 self.log.info("FAIL: {0} problem(s) to fix".format(str(problems))) 804 if quiet: 805 self.log.info("Remove --quiet to see explanations.") 806 else: 807 self.log.info("Use --quiet for minimal output.") 808 return 1 809 else: 810 self.log.info("PASS") 811 return 0 812 813 def check_symbols_declared_in_header(self): 814 """ 815 Perform a check that all detected symbols in the library object files 816 are properly declared in headers. 817 Assumes parse_names_in_source() was called before this. 818 819 Returns the number of problems that need fixing. 820 """ 821 problems = [] 822 all_identifiers = self.parse_result["identifiers"] + \ 823 self.parse_result["excluded_identifiers"] 824 825 for symbol in self.parse_result["symbols"]: 826 found_symbol_declared = False 827 for identifier_match in all_identifiers: 828 if symbol == identifier_match.name: 829 found_symbol_declared = True 830 break 831 832 if not found_symbol_declared: 833 problems.append(SymbolNotInHeader(symbol)) 834 835 self.output_check_result("All symbols in header", problems) 836 return len(problems) 837 838 def check_match_pattern(self, group_to_check, check_pattern): 839 """ 840 Perform a check that all items of a group conform to a regex pattern. 841 Assumes parse_names_in_source() was called before this. 842 843 Args: 844 * group_to_check: string key to index into self.parse_result. 845 * check_pattern: the regex to check against. 846 847 Returns the number of problems that need fixing. 848 """ 849 problems = [] 850 851 for item_match in self.parse_result[group_to_check]: 852 if not re.search(check_pattern, item_match.name): 853 problems.append(PatternMismatch(check_pattern, item_match)) 854 # Double underscore should not be used for names 855 if re.search(r".*__.*", item_match.name): 856 problems.append( 857 PatternMismatch("no double underscore allowed", item_match)) 858 859 self.output_check_result( 860 "Naming patterns of {}".format(group_to_check), 861 problems) 862 return len(problems) 863 864 def check_for_typos(self): 865 """ 866 Perform a check that all words in the source code beginning with MBED are 867 either defined as macros, or as enum constants. 868 Assumes parse_names_in_source() was called before this. 869 870 Returns the number of problems that need fixing. 871 """ 872 problems = [] 873 874 # Set comprehension, equivalent to a list comprehension wrapped by set() 875 all_caps_names = { 876 match.name 877 for match 878 in self.parse_result["public_macros"] + 879 self.parse_result["internal_macros"] + 880 self.parse_result["private_macros"] + 881 self.parse_result["enum_consts"] 882 } 883 typo_exclusion = re.compile(r"XXX|__|_$|^MBEDTLS_.*CONFIG_FILE$|" 884 r"MBEDTLS_TEST_LIBTESTDRIVER*|" 885 r"PSA_CRYPTO_DRIVER_TEST") 886 887 for name_match in self.parse_result["mbed_psa_words"]: 888 found = name_match.name in all_caps_names 889 890 # Since MBEDTLS_PSA_ACCEL_XXX defines are defined by the 891 # PSA driver, they will not exist as macros. However, they 892 # should still be checked for typos using the equivalent 893 # BUILTINs that exist. 894 if "MBEDTLS_PSA_ACCEL_" in name_match.name: 895 found = name_match.name.replace( 896 "MBEDTLS_PSA_ACCEL_", 897 "MBEDTLS_PSA_BUILTIN_") in all_caps_names 898 899 if not found and not typo_exclusion.search(name_match.name): 900 problems.append(Typo(name_match)) 901 902 self.output_check_result("Likely typos", problems) 903 return len(problems) 904 905 def output_check_result(self, name, problems): 906 """ 907 Write out the PASS/FAIL status of a performed check depending on whether 908 there were problems. 909 910 Args: 911 * name: the name of the test 912 * problems: a List of encountered Problems 913 """ 914 if problems: 915 self.log.info("{}: FAIL\n".format(name)) 916 for problem in problems: 917 self.log.warning(str(problem)) 918 else: 919 self.log.info("{}: PASS".format(name)) 920 921def main(): 922 """ 923 Perform argument parsing, and create an instance of CodeParser and 924 NameChecker to begin the core operation. 925 """ 926 parser = argparse.ArgumentParser( 927 formatter_class=argparse.RawDescriptionHelpFormatter, 928 description=( 929 "This script confirms that the naming of all symbols and identifiers " 930 "in Mbed TLS are consistent with the house style and are also " 931 "self-consistent.\n\n" 932 "Expected to be run from the Mbed TLS root directory.") 933 ) 934 parser.add_argument( 935 "-v", "--verbose", 936 action="store_true", 937 help="show parse results" 938 ) 939 parser.add_argument( 940 "-q", "--quiet", 941 action="store_true", 942 help="hide unnecessary text, explanations, and highlights" 943 ) 944 945 args = parser.parse_args() 946 947 # Configure the global logger, which is then passed to the classes below 948 log = logging.getLogger() 949 log.setLevel(logging.DEBUG if args.verbose else logging.INFO) 950 log.addHandler(logging.StreamHandler()) 951 952 try: 953 code_parser = CodeParser(log) 954 parse_result = code_parser.comprehensive_parse() 955 except Exception: # pylint: disable=broad-except 956 traceback.print_exc() 957 sys.exit(2) 958 959 name_checker = NameChecker(parse_result, log) 960 return_code = name_checker.perform_checks(quiet=args.quiet) 961 962 sys.exit(return_code) 963 964if __name__ == "__main__": 965 main() 966