1#!/usr/bin/env python3 2# 3# Copyright The Mbed TLS Contributors 4# SPDX-License-Identifier: Apache-2.0 5# 6# Licensed under the Apache License, Version 2.0 (the "License"); you may 7# not use this file except in compliance with the License. 8# You may obtain a copy of the License at 9# 10# http://www.apache.org/licenses/LICENSE-2.0 11# 12# Unless required by applicable law or agreed to in writing, software 13# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 14# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15# See the License for the specific language governing permissions and 16# limitations under the License. 17 18""" 19This script confirms that the naming of all symbols and identifiers in Mbed TLS 20are consistent with the house style and are also self-consistent. It only runs 21on Linux and macOS since it depends on nm. 22 23It contains two major Python classes, CodeParser and NameChecker. They both have 24a comprehensive "run-all" function (comprehensive_parse() and perform_checks()) 25but the individual functions can also be used for specific needs. 26 27CodeParser makes heavy use of regular expressions to parse the code, and is 28dependent on the current code formatting. Many Python C parser libraries require 29preprocessed C code, which means no macro parsing. Compiler tools are also not 30very helpful when we want the exact location in the original source (which 31becomes impossible when e.g. comments are stripped). 32 33NameChecker performs the following checks: 34 35- All exported and available symbols in the library object files, are explicitly 36 declared in the header files. This uses the nm command. 37- All macros, constants, and identifiers (function names, struct names, etc) 38 follow the required regex pattern. 39- Typo checking: All words that begin with MBED|PSA exist as macros or constants. 40 41The script returns 0 on success, 1 on test failure, and 2 if there is a script 42error. It must be run from Mbed TLS root. 43""" 44 45import abc 46import argparse 47import fnmatch 48import glob 49import textwrap 50import os 51import sys 52import traceback 53import re 54import enum 55import shutil 56import subprocess 57import logging 58 59import scripts_path # pylint: disable=unused-import 60from mbedtls_dev import build_tree 61 62 63# Naming patterns to check against. These are defined outside the NameCheck 64# class for ease of modification. 65PUBLIC_MACRO_PATTERN = r"^(MBEDTLS|PSA)_[0-9A-Z_]*[0-9A-Z]$" 66INTERNAL_MACRO_PATTERN = r"^[0-9A-Za-z_]*[0-9A-Z]$" 67CONSTANTS_PATTERN = PUBLIC_MACRO_PATTERN 68IDENTIFIER_PATTERN = r"^(mbedtls|psa)_[0-9a-z_]*[0-9a-z]$" 69 70class Match(): # pylint: disable=too-few-public-methods 71 """ 72 A class representing a match, together with its found position. 73 74 Fields: 75 * filename: the file that the match was in. 76 * line: the full line containing the match. 77 * line_no: the line number. 78 * pos: a tuple of (start, end) positions on the line where the match is. 79 * name: the match itself. 80 """ 81 def __init__(self, filename, line, line_no, pos, name): 82 # pylint: disable=too-many-arguments 83 self.filename = filename 84 self.line = line 85 self.line_no = line_no 86 self.pos = pos 87 self.name = name 88 89 def __str__(self): 90 """ 91 Return a formatted code listing representation of the erroneous line. 92 """ 93 gutter = format(self.line_no, "4d") 94 underline = self.pos[0] * " " + (self.pos[1] - self.pos[0]) * "^" 95 96 return ( 97 " {0} |\n".format(" " * len(gutter)) + 98 " {0} | {1}".format(gutter, self.line) + 99 " {0} | {1}\n".format(" " * len(gutter), underline) 100 ) 101 102class Problem(abc.ABC): # pylint: disable=too-few-public-methods 103 """ 104 An abstract parent class representing a form of static analysis error. 105 It extends an Abstract Base Class, which means it is not instantiable, and 106 it also mandates certain abstract methods to be implemented in subclasses. 107 """ 108 # Class variable to control the quietness of all problems 109 quiet = False 110 def __init__(self): 111 self.textwrapper = textwrap.TextWrapper() 112 self.textwrapper.width = 80 113 self.textwrapper.initial_indent = " > " 114 self.textwrapper.subsequent_indent = " " 115 116 def __str__(self): 117 """ 118 Unified string representation method for all Problems. 119 """ 120 if self.__class__.quiet: 121 return self.quiet_output() 122 return self.verbose_output() 123 124 @abc.abstractmethod 125 def quiet_output(self): 126 """ 127 The output when --quiet is enabled. 128 """ 129 pass 130 131 @abc.abstractmethod 132 def verbose_output(self): 133 """ 134 The default output with explanation and code snippet if appropriate. 135 """ 136 pass 137 138class SymbolNotInHeader(Problem): # pylint: disable=too-few-public-methods 139 """ 140 A problem that occurs when an exported/available symbol in the object file 141 is not explicitly declared in header files. Created with 142 NameCheck.check_symbols_declared_in_header() 143 144 Fields: 145 * symbol_name: the name of the symbol. 146 """ 147 def __init__(self, symbol_name): 148 self.symbol_name = symbol_name 149 Problem.__init__(self) 150 151 def quiet_output(self): 152 return "{0}".format(self.symbol_name) 153 154 def verbose_output(self): 155 return self.textwrapper.fill( 156 "'{0}' was found as an available symbol in the output of nm, " 157 "however it was not declared in any header files." 158 .format(self.symbol_name)) 159 160class PatternMismatch(Problem): # pylint: disable=too-few-public-methods 161 """ 162 A problem that occurs when something doesn't match the expected pattern. 163 Created with NameCheck.check_match_pattern() 164 165 Fields: 166 * pattern: the expected regex pattern 167 * match: the Match object in question 168 """ 169 def __init__(self, pattern, match): 170 self.pattern = pattern 171 self.match = match 172 Problem.__init__(self) 173 174 175 def quiet_output(self): 176 return ( 177 "{0}:{1}:{2}" 178 .format(self.match.filename, self.match.line_no, self.match.name) 179 ) 180 181 def verbose_output(self): 182 return self.textwrapper.fill( 183 "{0}:{1}: '{2}' does not match the required pattern '{3}'." 184 .format( 185 self.match.filename, 186 self.match.line_no, 187 self.match.name, 188 self.pattern 189 ) 190 ) + "\n" + str(self.match) 191 192class Typo(Problem): # pylint: disable=too-few-public-methods 193 """ 194 A problem that occurs when a word using MBED or PSA doesn't 195 appear to be defined as constants nor enum values. Created with 196 NameCheck.check_for_typos() 197 198 Fields: 199 * match: the Match object of the MBED|PSA name in question. 200 """ 201 def __init__(self, match): 202 self.match = match 203 Problem.__init__(self) 204 205 def quiet_output(self): 206 return ( 207 "{0}:{1}:{2}" 208 .format(self.match.filename, self.match.line_no, self.match.name) 209 ) 210 211 def verbose_output(self): 212 return self.textwrapper.fill( 213 "{0}:{1}: '{2}' looks like a typo. It was not found in any " 214 "macros or any enums. If this is not a typo, put " 215 "//no-check-names after it." 216 .format(self.match.filename, self.match.line_no, self.match.name) 217 ) + "\n" + str(self.match) 218 219class CodeParser(): 220 """ 221 Class for retrieving files and parsing the code. This can be used 222 independently of the checks that NameChecker performs, for example for 223 list_internal_identifiers.py. 224 """ 225 def __init__(self, log): 226 self.log = log 227 build_tree.check_repo_path() 228 229 # Memo for storing "glob expression": set(filepaths) 230 self.files = {} 231 232 # Globally excluded filenames. 233 # Note that "*" can match directory separators in exclude lists. 234 self.excluded_files = ["*/bn_mul", "*/compat-2.x.h"] 235 236 def comprehensive_parse(self): 237 """ 238 Comprehensive ("default") function to call each parsing function and 239 retrieve various elements of the code, together with the source location. 240 241 Returns a dict of parsed item key to the corresponding List of Matches. 242 """ 243 self.log.info("Parsing source code...") 244 self.log.debug( 245 "The following files are excluded from the search: {}" 246 .format(str(self.excluded_files)) 247 ) 248 249 all_macros = {"public": [], "internal": [], "private":[]} 250 all_macros["public"] = self.parse_macros([ 251 "include/mbedtls/*.h", 252 "include/psa/*.h", 253 "3rdparty/everest/include/everest/everest.h", 254 "3rdparty/everest/include/everest/x25519.h" 255 ]) 256 all_macros["internal"] = self.parse_macros([ 257 "library/*.h", 258 "tests/include/test/drivers/*.h", 259 ]) 260 all_macros["private"] = self.parse_macros([ 261 "library/*.c", 262 ]) 263 enum_consts = self.parse_enum_consts([ 264 "include/mbedtls/*.h", 265 "include/psa/*.h", 266 "library/*.h", 267 "library/*.c", 268 "3rdparty/everest/include/everest/everest.h", 269 "3rdparty/everest/include/everest/x25519.h" 270 ]) 271 identifiers, excluded_identifiers = self.parse_identifiers([ 272 "include/mbedtls/*.h", 273 "include/psa/*.h", 274 "library/*.h", 275 "3rdparty/everest/include/everest/everest.h", 276 "3rdparty/everest/include/everest/x25519.h" 277 ], ["3rdparty/p256-m/p256-m/p256-m.h"]) 278 mbed_psa_words = self.parse_mbed_psa_words([ 279 "include/mbedtls/*.h", 280 "include/psa/*.h", 281 "library/*.h", 282 "3rdparty/everest/include/everest/everest.h", 283 "3rdparty/everest/include/everest/x25519.h", 284 "library/*.c", 285 "3rdparty/everest/library/everest.c", 286 "3rdparty/everest/library/x25519.c" 287 ], ["library/psa_crypto_driver_wrappers.c"]) 288 symbols = self.parse_symbols() 289 290 # Remove identifier macros like mbedtls_printf or mbedtls_calloc 291 identifiers_justname = [x.name for x in identifiers] 292 actual_macros = {"public": [], "internal": []} 293 for scope in actual_macros: 294 for macro in all_macros[scope]: 295 if macro.name not in identifiers_justname: 296 actual_macros[scope].append(macro) 297 298 self.log.debug("Found:") 299 # Aligns the counts on the assumption that none exceeds 4 digits 300 for scope in actual_macros: 301 self.log.debug(" {:4} Total {} Macros" 302 .format(len(all_macros[scope]), scope)) 303 self.log.debug(" {:4} {} Non-identifier Macros" 304 .format(len(actual_macros[scope]), scope)) 305 self.log.debug(" {:4} Enum Constants".format(len(enum_consts))) 306 self.log.debug(" {:4} Identifiers".format(len(identifiers))) 307 self.log.debug(" {:4} Exported Symbols".format(len(symbols))) 308 return { 309 "public_macros": actual_macros["public"], 310 "internal_macros": actual_macros["internal"], 311 "private_macros": all_macros["private"], 312 "enum_consts": enum_consts, 313 "identifiers": identifiers, 314 "excluded_identifiers": excluded_identifiers, 315 "symbols": symbols, 316 "mbed_psa_words": mbed_psa_words 317 } 318 319 def is_file_excluded(self, path, exclude_wildcards): 320 """Whether the given file path is excluded.""" 321 # exclude_wildcards may be None. Also, consider the global exclusions. 322 exclude_wildcards = (exclude_wildcards or []) + self.excluded_files 323 for pattern in exclude_wildcards: 324 if fnmatch.fnmatch(path, pattern): 325 return True 326 return False 327 328 def get_all_files(self, include_wildcards, exclude_wildcards): 329 """ 330 Get all files that match any of the included UNIX-style wildcards 331 and filter them into included and excluded lists. 332 While the check_names script is designed only for use on UNIX/macOS 333 (due to nm), this function alone will work fine on Windows even with 334 forward slashes in the wildcard. 335 336 Args: 337 * include_wildcards: a List of shell-style wildcards to match filepaths. 338 * exclude_wildcards: a List of shell-style wildcards to exclude. 339 340 Returns: 341 * inc_files: A List of relative filepaths for included files. 342 * exc_files: A List of relative filepaths for excluded files. 343 """ 344 accumulator = set() 345 all_wildcards = include_wildcards + (exclude_wildcards or []) 346 for wildcard in all_wildcards: 347 accumulator = accumulator.union(glob.iglob(wildcard)) 348 349 inc_files = [] 350 exc_files = [] 351 for path in accumulator: 352 if self.is_file_excluded(path, exclude_wildcards): 353 exc_files.append(path) 354 else: 355 inc_files.append(path) 356 return (inc_files, exc_files) 357 358 def get_included_files(self, include_wildcards, exclude_wildcards): 359 """ 360 Get all files that match any of the included UNIX-style wildcards. 361 While the check_names script is designed only for use on UNIX/macOS 362 (due to nm), this function alone will work fine on Windows even with 363 forward slashes in the wildcard. 364 365 Args: 366 * include_wildcards: a List of shell-style wildcards to match filepaths. 367 * exclude_wildcards: a List of shell-style wildcards to exclude. 368 369 Returns a List of relative filepaths. 370 """ 371 accumulator = set() 372 373 for include_wildcard in include_wildcards: 374 accumulator = accumulator.union(glob.iglob(include_wildcard)) 375 376 return list(path for path in accumulator 377 if not self.is_file_excluded(path, exclude_wildcards)) 378 379 def parse_macros(self, include, exclude=None): 380 """ 381 Parse all macros defined by #define preprocessor directives. 382 383 Args: 384 * include: A List of glob expressions to look for files through. 385 * exclude: A List of glob expressions for excluding files. 386 387 Returns a List of Match objects for the found macros. 388 """ 389 macro_regex = re.compile(r"# *define +(?P<macro>\w+)") 390 exclusions = ( 391 "asm", "inline", "EMIT", "_CRT_SECURE_NO_DEPRECATE", "MULADDC_" 392 ) 393 394 files = self.get_included_files(include, exclude) 395 self.log.debug("Looking for macros in {} files".format(len(files))) 396 397 macros = [] 398 for header_file in files: 399 with open(header_file, "r", encoding="utf-8") as header: 400 for line_no, line in enumerate(header): 401 for macro in macro_regex.finditer(line): 402 if macro.group("macro").startswith(exclusions): 403 continue 404 405 macros.append(Match( 406 header_file, 407 line, 408 line_no, 409 macro.span("macro"), 410 macro.group("macro"))) 411 412 return macros 413 414 def parse_mbed_psa_words(self, include, exclude=None): 415 """ 416 Parse all words in the file that begin with MBED|PSA, in and out of 417 macros, comments, anything. 418 419 Args: 420 * include: A List of glob expressions to look for files through. 421 * exclude: A List of glob expressions for excluding files. 422 423 Returns a List of Match objects for words beginning with MBED|PSA. 424 """ 425 # Typos of TLS are common, hence the broader check below than MBEDTLS. 426 mbed_regex = re.compile(r"\b(MBED.+?|PSA)_[A-Z0-9_]*") 427 exclusions = re.compile(r"// *no-check-names|#error") 428 429 files = self.get_included_files(include, exclude) 430 self.log.debug( 431 "Looking for MBED|PSA words in {} files" 432 .format(len(files)) 433 ) 434 435 mbed_psa_words = [] 436 for filename in files: 437 with open(filename, "r", encoding="utf-8") as fp: 438 for line_no, line in enumerate(fp): 439 if exclusions.search(line): 440 continue 441 442 for name in mbed_regex.finditer(line): 443 mbed_psa_words.append(Match( 444 filename, 445 line, 446 line_no, 447 name.span(0), 448 name.group(0))) 449 450 return mbed_psa_words 451 452 def parse_enum_consts(self, include, exclude=None): 453 """ 454 Parse all enum value constants that are declared. 455 456 Args: 457 * include: A List of glob expressions to look for files through. 458 * exclude: A List of glob expressions for excluding files. 459 460 Returns a List of Match objects for the findings. 461 """ 462 files = self.get_included_files(include, exclude) 463 self.log.debug("Looking for enum consts in {} files".format(len(files))) 464 465 # Emulate a finite state machine to parse enum declarations. 466 # OUTSIDE_KEYWORD = outside the enum keyword 467 # IN_BRACES = inside enum opening braces 468 # IN_BETWEEN = between enum keyword and opening braces 469 states = enum.Enum("FSM", ["OUTSIDE_KEYWORD", "IN_BRACES", "IN_BETWEEN"]) 470 enum_consts = [] 471 for header_file in files: 472 state = states.OUTSIDE_KEYWORD 473 with open(header_file, "r", encoding="utf-8") as header: 474 for line_no, line in enumerate(header): 475 # Match typedefs and brackets only when they are at the 476 # beginning of the line -- if they are indented, they might 477 # be sub-structures within structs, etc. 478 optional_c_identifier = r"([_a-zA-Z][_a-zA-Z0-9]*)?" 479 if (state == states.OUTSIDE_KEYWORD and 480 re.search(r"^(typedef +)?enum " + \ 481 optional_c_identifier + \ 482 r" *{", line)): 483 state = states.IN_BRACES 484 elif (state == states.OUTSIDE_KEYWORD and 485 re.search(r"^(typedef +)?enum", line)): 486 state = states.IN_BETWEEN 487 elif (state == states.IN_BETWEEN and 488 re.search(r"^{", line)): 489 state = states.IN_BRACES 490 elif (state == states.IN_BRACES and 491 re.search(r"^}", line)): 492 state = states.OUTSIDE_KEYWORD 493 elif (state == states.IN_BRACES and 494 not re.search(r"^ *#", line)): 495 enum_const = re.search(r"^ *(?P<enum_const>\w+)", line) 496 if not enum_const: 497 continue 498 499 enum_consts.append(Match( 500 header_file, 501 line, 502 line_no, 503 enum_const.span("enum_const"), 504 enum_const.group("enum_const"))) 505 506 return enum_consts 507 508 IGNORED_CHUNK_REGEX = re.compile('|'.join([ 509 r'/\*.*?\*/', # block comment entirely on one line 510 r'//.*', # line comment 511 r'(?P<string>")(?:[^\\\"]|\\.)*"', # string literal 512 ])) 513 514 def strip_comments_and_literals(self, line, in_block_comment): 515 """Strip comments and string literals from line. 516 517 Continuation lines are not supported. 518 519 If in_block_comment is true, assume that the line starts inside a 520 block comment. 521 522 Return updated values of (line, in_block_comment) where: 523 * Comments in line have been replaced by a space (or nothing at the 524 start or end of the line). 525 * String contents have been removed. 526 * in_block_comment indicates whether the line ends inside a block 527 comment that continues on the next line. 528 """ 529 530 # Terminate current multiline comment? 531 if in_block_comment: 532 m = re.search(r"\*/", line) 533 if m: 534 in_block_comment = False 535 line = line[m.end(0):] 536 else: 537 return '', True 538 539 # Remove full comments and string literals. 540 # Do it all together to handle cases like "/*" correctly. 541 # Note that continuation lines are not supported. 542 line = re.sub(self.IGNORED_CHUNK_REGEX, 543 lambda s: '""' if s.group('string') else ' ', 544 line) 545 546 # Start an unfinished comment? 547 # (If `/*` was part of a complete comment, it's already been removed.) 548 m = re.search(r"/\*", line) 549 if m: 550 in_block_comment = True 551 line = line[:m.start(0)] 552 553 return line, in_block_comment 554 555 IDENTIFIER_REGEX = re.compile('|'.join([ 556 # Match " something(a" or " *something(a". Functions. 557 # Assumptions: 558 # - function definition from return type to one of its arguments is 559 # all on one line 560 # - function definition line only contains alphanumeric, asterisk, 561 # underscore, and open bracket 562 r".* \**(\w+) *\( *\w", 563 # Match "(*something)(". 564 r".*\( *\* *(\w+) *\) *\(", 565 # Match names of named data structures. 566 r"(?:typedef +)?(?:struct|union|enum) +(\w+)(?: *{)?$", 567 # Match names of typedef instances, after closing bracket. 568 r"}? *(\w+)[;[].*", 569 ])) 570 # The regex below is indented for clarity. 571 EXCLUSION_LINES = re.compile("|".join([ 572 r"extern +\"C\"", 573 r"(typedef +)?(struct|union|enum)( *{)?$", 574 r"} *;?$", 575 r"$", 576 r"//", 577 r"#", 578 ])) 579 580 def parse_identifiers_in_file(self, header_file, identifiers): 581 """ 582 Parse all lines of a header where a function/enum/struct/union/typedef 583 identifier is declared, based on some regex and heuristics. Highly 584 dependent on formatting style. 585 586 Append found matches to the list ``identifiers``. 587 """ 588 589 with open(header_file, "r", encoding="utf-8") as header: 590 in_block_comment = False 591 # The previous line variable is used for concatenating lines 592 # when identifiers are formatted and spread across multiple 593 # lines. 594 previous_line = "" 595 596 for line_no, line in enumerate(header): 597 line, in_block_comment = \ 598 self.strip_comments_and_literals(line, in_block_comment) 599 600 if self.EXCLUSION_LINES.match(line): 601 previous_line = "" 602 continue 603 604 # If the line contains only space-separated alphanumeric 605 # characters (or underscore, asterisk, or open parenthesis), 606 # and nothing else, high chance it's a declaration that 607 # continues on the next line 608 if re.search(r"^([\w\*\(]+\s+)+$", line): 609 previous_line += line 610 continue 611 612 # If previous line seemed to start an unfinished declaration 613 # (as above), concat and treat them as one. 614 if previous_line: 615 line = previous_line.strip() + " " + line.strip() + "\n" 616 previous_line = "" 617 618 # Skip parsing if line has a space in front = heuristic to 619 # skip function argument lines (highly subject to formatting 620 # changes) 621 if line[0] == " ": 622 continue 623 624 identifier = self.IDENTIFIER_REGEX.search(line) 625 626 if not identifier: 627 continue 628 629 # Find the group that matched, and append it 630 for group in identifier.groups(): 631 if not group: 632 continue 633 634 identifiers.append(Match( 635 header_file, 636 line, 637 line_no, 638 identifier.span(), 639 group)) 640 641 def parse_identifiers(self, include, exclude=None): 642 """ 643 Parse all lines of a header where a function/enum/struct/union/typedef 644 identifier is declared, based on some regex and heuristics. Highly 645 dependent on formatting style. Identifiers in excluded files are still 646 parsed 647 648 Args: 649 * include: A List of glob expressions to look for files through. 650 * exclude: A List of glob expressions for excluding files. 651 652 Returns: a Tuple of two Lists of Match objects with identifiers. 653 * included_identifiers: A List of Match objects with identifiers from 654 included files. 655 * excluded_identifiers: A List of Match objects with identifiers from 656 excluded files. 657 """ 658 659 included_files, excluded_files = \ 660 self.get_all_files(include, exclude) 661 662 self.log.debug("Looking for included identifiers in {} files".format \ 663 (len(included_files))) 664 665 included_identifiers = [] 666 excluded_identifiers = [] 667 for header_file in included_files: 668 self.parse_identifiers_in_file(header_file, included_identifiers) 669 for header_file in excluded_files: 670 self.parse_identifiers_in_file(header_file, excluded_identifiers) 671 672 return (included_identifiers, excluded_identifiers) 673 674 def parse_symbols(self): 675 """ 676 Compile the Mbed TLS libraries, and parse the TLS, Crypto, and x509 677 object files using nm to retrieve the list of referenced symbols. 678 Exceptions thrown here are rethrown because they would be critical 679 errors that void several tests, and thus needs to halt the program. This 680 is explicitly done for clarity. 681 682 Returns a List of unique symbols defined and used in the libraries. 683 """ 684 self.log.info("Compiling...") 685 symbols = [] 686 687 # Back up the config and atomically compile with the full configuration. 688 shutil.copy( 689 "include/mbedtls/mbedtls_config.h", 690 "include/mbedtls/mbedtls_config.h.bak" 691 ) 692 try: 693 # Use check=True in all subprocess calls so that failures are raised 694 # as exceptions and logged. 695 subprocess.run( 696 ["python3", "scripts/config.py", "full"], 697 universal_newlines=True, 698 check=True 699 ) 700 my_environment = os.environ.copy() 701 my_environment["CFLAGS"] = "-fno-asynchronous-unwind-tables" 702 # Run make clean separately to lib to prevent unwanted behavior when 703 # make is invoked with parallelism. 704 subprocess.run( 705 ["make", "clean"], 706 universal_newlines=True, 707 check=True 708 ) 709 subprocess.run( 710 ["make", "lib"], 711 env=my_environment, 712 universal_newlines=True, 713 stdout=subprocess.PIPE, 714 stderr=subprocess.STDOUT, 715 check=True 716 ) 717 718 # Perform object file analysis using nm 719 symbols = self.parse_symbols_from_nm([ 720 "library/libmbedcrypto.a", 721 "library/libmbedtls.a", 722 "library/libmbedx509.a" 723 ]) 724 725 subprocess.run( 726 ["make", "clean"], 727 universal_newlines=True, 728 check=True 729 ) 730 except subprocess.CalledProcessError as error: 731 self.log.debug(error.output) 732 raise error 733 finally: 734 # Put back the original config regardless of there being errors. 735 # Works also for keyboard interrupts. 736 shutil.move( 737 "include/mbedtls/mbedtls_config.h.bak", 738 "include/mbedtls/mbedtls_config.h" 739 ) 740 741 return symbols 742 743 def parse_symbols_from_nm(self, object_files): 744 """ 745 Run nm to retrieve the list of referenced symbols in each object file. 746 Does not return the position data since it is of no use. 747 748 Args: 749 * object_files: a List of compiled object filepaths to search through. 750 751 Returns a List of unique symbols defined and used in any of the object 752 files. 753 """ 754 nm_undefined_regex = re.compile(r"^\S+: +U |^$|^\S+:$") 755 nm_valid_regex = re.compile(r"^\S+( [0-9A-Fa-f]+)* . _*(?P<symbol>\w+)") 756 exclusions = ("FStar", "Hacl") 757 758 symbols = [] 759 760 # Gather all outputs of nm 761 nm_output = "" 762 for lib in object_files: 763 nm_output += subprocess.run( 764 ["nm", "-og", lib], 765 universal_newlines=True, 766 stdout=subprocess.PIPE, 767 stderr=subprocess.STDOUT, 768 check=True 769 ).stdout 770 771 for line in nm_output.splitlines(): 772 if not nm_undefined_regex.search(line): 773 symbol = nm_valid_regex.search(line) 774 if (symbol and not symbol.group("symbol").startswith(exclusions)): 775 symbols.append(symbol.group("symbol")) 776 else: 777 self.log.error(line) 778 779 return symbols 780 781class NameChecker(): 782 """ 783 Representation of the core name checking operation performed by this script. 784 """ 785 def __init__(self, parse_result, log): 786 self.parse_result = parse_result 787 self.log = log 788 789 def perform_checks(self, quiet=False): 790 """ 791 A comprehensive checker that performs each check in order, and outputs 792 a final verdict. 793 794 Args: 795 * quiet: whether to hide detailed problem explanation. 796 """ 797 self.log.info("=============") 798 Problem.quiet = quiet 799 problems = 0 800 problems += self.check_symbols_declared_in_header() 801 802 pattern_checks = [ 803 ("public_macros", PUBLIC_MACRO_PATTERN), 804 ("internal_macros", INTERNAL_MACRO_PATTERN), 805 ("enum_consts", CONSTANTS_PATTERN), 806 ("identifiers", IDENTIFIER_PATTERN) 807 ] 808 for group, check_pattern in pattern_checks: 809 problems += self.check_match_pattern(group, check_pattern) 810 811 problems += self.check_for_typos() 812 813 self.log.info("=============") 814 if problems > 0: 815 self.log.info("FAIL: {0} problem(s) to fix".format(str(problems))) 816 if quiet: 817 self.log.info("Remove --quiet to see explanations.") 818 else: 819 self.log.info("Use --quiet for minimal output.") 820 return 1 821 else: 822 self.log.info("PASS") 823 return 0 824 825 def check_symbols_declared_in_header(self): 826 """ 827 Perform a check that all detected symbols in the library object files 828 are properly declared in headers. 829 Assumes parse_names_in_source() was called before this. 830 831 Returns the number of problems that need fixing. 832 """ 833 problems = [] 834 all_identifiers = self.parse_result["identifiers"] + \ 835 self.parse_result["excluded_identifiers"] 836 837 for symbol in self.parse_result["symbols"]: 838 found_symbol_declared = False 839 for identifier_match in all_identifiers: 840 if symbol == identifier_match.name: 841 found_symbol_declared = True 842 break 843 844 if not found_symbol_declared: 845 problems.append(SymbolNotInHeader(symbol)) 846 847 self.output_check_result("All symbols in header", problems) 848 return len(problems) 849 850 def check_match_pattern(self, group_to_check, check_pattern): 851 """ 852 Perform a check that all items of a group conform to a regex pattern. 853 Assumes parse_names_in_source() was called before this. 854 855 Args: 856 * group_to_check: string key to index into self.parse_result. 857 * check_pattern: the regex to check against. 858 859 Returns the number of problems that need fixing. 860 """ 861 problems = [] 862 863 for item_match in self.parse_result[group_to_check]: 864 if not re.search(check_pattern, item_match.name): 865 problems.append(PatternMismatch(check_pattern, item_match)) 866 # Double underscore should not be used for names 867 if re.search(r".*__.*", item_match.name): 868 problems.append( 869 PatternMismatch("no double underscore allowed", item_match)) 870 871 self.output_check_result( 872 "Naming patterns of {}".format(group_to_check), 873 problems) 874 return len(problems) 875 876 def check_for_typos(self): 877 """ 878 Perform a check that all words in the source code beginning with MBED are 879 either defined as macros, or as enum constants. 880 Assumes parse_names_in_source() was called before this. 881 882 Returns the number of problems that need fixing. 883 """ 884 problems = [] 885 886 # Set comprehension, equivalent to a list comprehension wrapped by set() 887 all_caps_names = { 888 match.name 889 for match 890 in self.parse_result["public_macros"] + 891 self.parse_result["internal_macros"] + 892 self.parse_result["private_macros"] + 893 self.parse_result["enum_consts"] 894 } 895 typo_exclusion = re.compile(r"XXX|__|_$|^MBEDTLS_.*CONFIG_FILE$|" 896 r"MBEDTLS_TEST_LIBTESTDRIVER*|" 897 r"PSA_CRYPTO_DRIVER_TEST") 898 899 for name_match in self.parse_result["mbed_psa_words"]: 900 found = name_match.name in all_caps_names 901 902 # Since MBEDTLS_PSA_ACCEL_XXX defines are defined by the 903 # PSA driver, they will not exist as macros. However, they 904 # should still be checked for typos using the equivalent 905 # BUILTINs that exist. 906 if "MBEDTLS_PSA_ACCEL_" in name_match.name: 907 found = name_match.name.replace( 908 "MBEDTLS_PSA_ACCEL_", 909 "MBEDTLS_PSA_BUILTIN_") in all_caps_names 910 911 if not found and not typo_exclusion.search(name_match.name): 912 problems.append(Typo(name_match)) 913 914 self.output_check_result("Likely typos", problems) 915 return len(problems) 916 917 def output_check_result(self, name, problems): 918 """ 919 Write out the PASS/FAIL status of a performed check depending on whether 920 there were problems. 921 922 Args: 923 * name: the name of the test 924 * problems: a List of encountered Problems 925 """ 926 if problems: 927 self.log.info("{}: FAIL\n".format(name)) 928 for problem in problems: 929 self.log.warning(str(problem)) 930 else: 931 self.log.info("{}: PASS".format(name)) 932 933def main(): 934 """ 935 Perform argument parsing, and create an instance of CodeParser and 936 NameChecker to begin the core operation. 937 """ 938 parser = argparse.ArgumentParser( 939 formatter_class=argparse.RawDescriptionHelpFormatter, 940 description=( 941 "This script confirms that the naming of all symbols and identifiers " 942 "in Mbed TLS are consistent with the house style and are also " 943 "self-consistent.\n\n" 944 "Expected to be run from the MbedTLS root directory.") 945 ) 946 parser.add_argument( 947 "-v", "--verbose", 948 action="store_true", 949 help="show parse results" 950 ) 951 parser.add_argument( 952 "-q", "--quiet", 953 action="store_true", 954 help="hide unnecessary text, explanations, and highlights" 955 ) 956 957 args = parser.parse_args() 958 959 # Configure the global logger, which is then passed to the classes below 960 log = logging.getLogger() 961 log.setLevel(logging.DEBUG if args.verbose else logging.INFO) 962 log.addHandler(logging.StreamHandler()) 963 964 try: 965 code_parser = CodeParser(log) 966 parse_result = code_parser.comprehensive_parse() 967 except Exception: # pylint: disable=broad-except 968 traceback.print_exc() 969 sys.exit(2) 970 971 name_checker = NameChecker(parse_result, log) 972 return_code = name_checker.perform_checks(quiet=args.quiet) 973 974 sys.exit(return_code) 975 976if __name__ == "__main__": 977 main() 978