1#!/usr/bin/env python3
2#
3# Copyright The Mbed TLS Contributors
4# SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
5
6"""
7This script confirms that the naming of all symbols and identifiers in Mbed TLS
8are consistent with the house style and are also self-consistent. It only runs
9on Linux and macOS since it depends on nm.
10
11It contains two major Python classes, CodeParser and NameChecker. They both have
12a comprehensive "run-all" function (comprehensive_parse() and perform_checks())
13but the individual functions can also be used for specific needs.
14
15CodeParser makes heavy use of regular expressions to parse the code, and is
16dependent on the current code formatting. Many Python C parser libraries require
17preprocessed C code, which means no macro parsing. Compiler tools are also not
18very helpful when we want the exact location in the original source (which
19becomes impossible when e.g. comments are stripped).
20
21NameChecker performs the following checks:
22
23- All exported and available symbols in the library object files, are explicitly
24  declared in the header files. This uses the nm command.
25- All macros, constants, and identifiers (function names, struct names, etc)
26  follow the required regex pattern.
27- Typo checking: All words that begin with MBED|PSA exist as macros or constants.
28
29The script returns 0 on success, 1 on test failure, and 2 if there is a script
30error. It must be run from Mbed TLS root.
31"""
32
33import abc
34import argparse
35import fnmatch
36import glob
37import textwrap
38import os
39import sys
40import traceback
41import re
42import enum
43import shutil
44import subprocess
45import logging
46
47import scripts_path # pylint: disable=unused-import
48from mbedtls_dev import build_tree
49
50
51# Naming patterns to check against. These are defined outside the NameCheck
52# class for ease of modification.
53PUBLIC_MACRO_PATTERN = r"^(MBEDTLS|PSA)_[0-9A-Z_]*[0-9A-Z]$"
54INTERNAL_MACRO_PATTERN = r"^[0-9A-Za-z_]*[0-9A-Z]$"
55CONSTANTS_PATTERN = PUBLIC_MACRO_PATTERN
56IDENTIFIER_PATTERN = r"^(mbedtls|psa)_[0-9a-z_]*[0-9a-z]$"
57
58class Match(): # pylint: disable=too-few-public-methods
59    """
60    A class representing a match, together with its found position.
61
62    Fields:
63    * filename: the file that the match was in.
64    * line: the full line containing the match.
65    * line_no: the line number.
66    * pos: a tuple of (start, end) positions on the line where the match is.
67    * name: the match itself.
68    """
69    def __init__(self, filename, line, line_no, pos, name):
70        # pylint: disable=too-many-arguments
71        self.filename = filename
72        self.line = line
73        self.line_no = line_no
74        self.pos = pos
75        self.name = name
76
77    def __str__(self):
78        """
79        Return a formatted code listing representation of the erroneous line.
80        """
81        gutter = format(self.line_no, "4d")
82        underline = self.pos[0] * " " + (self.pos[1] - self.pos[0]) * "^"
83
84        return (
85            " {0} |\n".format(" " * len(gutter)) +
86            " {0} | {1}".format(gutter, self.line) +
87            " {0} | {1}\n".format(" " * len(gutter), underline)
88        )
89
90class Problem(abc.ABC): # pylint: disable=too-few-public-methods
91    """
92    An abstract parent class representing a form of static analysis error.
93    It extends an Abstract Base Class, which means it is not instantiable, and
94    it also mandates certain abstract methods to be implemented in subclasses.
95    """
96    # Class variable to control the quietness of all problems
97    quiet = False
98    def __init__(self):
99        self.textwrapper = textwrap.TextWrapper()
100        self.textwrapper.width = 80
101        self.textwrapper.initial_indent = "    > "
102        self.textwrapper.subsequent_indent = "      "
103
104    def __str__(self):
105        """
106        Unified string representation method for all Problems.
107        """
108        if self.__class__.quiet:
109            return self.quiet_output()
110        return self.verbose_output()
111
112    @abc.abstractmethod
113    def quiet_output(self):
114        """
115        The output when --quiet is enabled.
116        """
117        pass
118
119    @abc.abstractmethod
120    def verbose_output(self):
121        """
122        The default output with explanation and code snippet if appropriate.
123        """
124        pass
125
126class SymbolNotInHeader(Problem): # pylint: disable=too-few-public-methods
127    """
128    A problem that occurs when an exported/available symbol in the object file
129    is not explicitly declared in header files. Created with
130    NameCheck.check_symbols_declared_in_header()
131
132    Fields:
133    * symbol_name: the name of the symbol.
134    """
135    def __init__(self, symbol_name):
136        self.symbol_name = symbol_name
137        Problem.__init__(self)
138
139    def quiet_output(self):
140        return "{0}".format(self.symbol_name)
141
142    def verbose_output(self):
143        return self.textwrapper.fill(
144            "'{0}' was found as an available symbol in the output of nm, "
145            "however it was not declared in any header files."
146            .format(self.symbol_name))
147
148class PatternMismatch(Problem): # pylint: disable=too-few-public-methods
149    """
150    A problem that occurs when something doesn't match the expected pattern.
151    Created with NameCheck.check_match_pattern()
152
153    Fields:
154    * pattern: the expected regex pattern
155    * match: the Match object in question
156    """
157    def __init__(self, pattern, match):
158        self.pattern = pattern
159        self.match = match
160        Problem.__init__(self)
161
162
163    def quiet_output(self):
164        return (
165            "{0}:{1}:{2}"
166            .format(self.match.filename, self.match.line_no, self.match.name)
167        )
168
169    def verbose_output(self):
170        return self.textwrapper.fill(
171            "{0}:{1}: '{2}' does not match the required pattern '{3}'."
172            .format(
173                self.match.filename,
174                self.match.line_no,
175                self.match.name,
176                self.pattern
177            )
178        ) + "\n" + str(self.match)
179
180class Typo(Problem): # pylint: disable=too-few-public-methods
181    """
182    A problem that occurs when a word using MBED or PSA doesn't
183    appear to be defined as constants nor enum values. Created with
184    NameCheck.check_for_typos()
185
186    Fields:
187    * match: the Match object of the MBED|PSA name in question.
188    """
189    def __init__(self, match):
190        self.match = match
191        Problem.__init__(self)
192
193    def quiet_output(self):
194        return (
195            "{0}:{1}:{2}"
196            .format(self.match.filename, self.match.line_no, self.match.name)
197        )
198
199    def verbose_output(self):
200        return self.textwrapper.fill(
201            "{0}:{1}: '{2}' looks like a typo. It was not found in any "
202            "macros or any enums. If this is not a typo, put "
203            "//no-check-names after it."
204            .format(self.match.filename, self.match.line_no, self.match.name)
205        ) + "\n" + str(self.match)
206
207class CodeParser():
208    """
209    Class for retrieving files and parsing the code. This can be used
210    independently of the checks that NameChecker performs, for example for
211    list_internal_identifiers.py.
212    """
213    def __init__(self, log):
214        self.log = log
215        build_tree.check_repo_path()
216
217        # Memo for storing "glob expression": set(filepaths)
218        self.files = {}
219
220        # Globally excluded filenames.
221        # Note that "*" can match directory separators in exclude lists.
222        self.excluded_files = ["*/bn_mul", "*/compat-2.x.h"]
223
224    def comprehensive_parse(self):
225        """
226        Comprehensive ("default") function to call each parsing function and
227        retrieve various elements of the code, together with the source location.
228
229        Returns a dict of parsed item key to the corresponding List of Matches.
230        """
231        self.log.info("Parsing source code...")
232        self.log.debug(
233            "The following files are excluded from the search: {}"
234            .format(str(self.excluded_files))
235        )
236
237        all_macros = {"public": [], "internal": [], "private":[]}
238        all_macros["public"] = self.parse_macros([
239            "include/mbedtls/*.h",
240            "include/psa/*.h",
241            "3rdparty/everest/include/everest/everest.h",
242            "3rdparty/everest/include/everest/x25519.h"
243        ])
244        all_macros["internal"] = self.parse_macros([
245            "library/*.h",
246            "tests/include/test/drivers/*.h",
247        ])
248        all_macros["private"] = self.parse_macros([
249            "library/*.c",
250        ])
251        enum_consts = self.parse_enum_consts([
252            "include/mbedtls/*.h",
253            "include/psa/*.h",
254            "library/*.h",
255            "library/*.c",
256            "3rdparty/everest/include/everest/everest.h",
257            "3rdparty/everest/include/everest/x25519.h"
258        ])
259        identifiers, excluded_identifiers = self.parse_identifiers([
260            "include/mbedtls/*.h",
261            "include/psa/*.h",
262            "library/*.h",
263            "3rdparty/everest/include/everest/everest.h",
264            "3rdparty/everest/include/everest/x25519.h"
265        ], ["3rdparty/p256-m/p256-m/p256-m.h"])
266        mbed_psa_words = self.parse_mbed_psa_words([
267            "include/mbedtls/*.h",
268            "include/psa/*.h",
269            "library/*.h",
270            "3rdparty/everest/include/everest/everest.h",
271            "3rdparty/everest/include/everest/x25519.h",
272            "library/*.c",
273            "3rdparty/everest/library/everest.c",
274            "3rdparty/everest/library/x25519.c"
275        ], ["library/psa_crypto_driver_wrappers.h"])
276        symbols = self.parse_symbols()
277
278        # Remove identifier macros like mbedtls_printf or mbedtls_calloc
279        identifiers_justname = [x.name for x in identifiers]
280        actual_macros = {"public": [], "internal": []}
281        for scope in actual_macros:
282            for macro in all_macros[scope]:
283                if macro.name not in identifiers_justname:
284                    actual_macros[scope].append(macro)
285
286        self.log.debug("Found:")
287        # Aligns the counts on the assumption that none exceeds 4 digits
288        for scope in actual_macros:
289            self.log.debug("  {:4} Total {} Macros"
290                           .format(len(all_macros[scope]), scope))
291            self.log.debug("  {:4} {} Non-identifier Macros"
292                           .format(len(actual_macros[scope]), scope))
293        self.log.debug("  {:4} Enum Constants".format(len(enum_consts)))
294        self.log.debug("  {:4} Identifiers".format(len(identifiers)))
295        self.log.debug("  {:4} Exported Symbols".format(len(symbols)))
296        return {
297            "public_macros": actual_macros["public"],
298            "internal_macros": actual_macros["internal"],
299            "private_macros": all_macros["private"],
300            "enum_consts": enum_consts,
301            "identifiers": identifiers,
302            "excluded_identifiers": excluded_identifiers,
303            "symbols": symbols,
304            "mbed_psa_words": mbed_psa_words
305        }
306
307    def is_file_excluded(self, path, exclude_wildcards):
308        """Whether the given file path is excluded."""
309        # exclude_wildcards may be None. Also, consider the global exclusions.
310        exclude_wildcards = (exclude_wildcards or []) + self.excluded_files
311        for pattern in exclude_wildcards:
312            if fnmatch.fnmatch(path, pattern):
313                return True
314        return False
315
316    def get_all_files(self, include_wildcards, exclude_wildcards):
317        """
318        Get all files that match any of the included UNIX-style wildcards
319        and filter them into included and excluded lists.
320        While the check_names script is designed only for use on UNIX/macOS
321        (due to nm), this function alone will work fine on Windows even with
322        forward slashes in the wildcard.
323
324        Args:
325        * include_wildcards: a List of shell-style wildcards to match filepaths.
326        * exclude_wildcards: a List of shell-style wildcards to exclude.
327
328        Returns:
329        * inc_files: A List of relative filepaths for included files.
330        * exc_files: A List of relative filepaths for excluded files.
331        """
332        accumulator = set()
333        all_wildcards = include_wildcards + (exclude_wildcards or [])
334        for wildcard in all_wildcards:
335            accumulator = accumulator.union(glob.iglob(wildcard))
336
337        inc_files = []
338        exc_files = []
339        for path in accumulator:
340            if self.is_file_excluded(path, exclude_wildcards):
341                exc_files.append(path)
342            else:
343                inc_files.append(path)
344        return (inc_files, exc_files)
345
346    def get_included_files(self, include_wildcards, exclude_wildcards):
347        """
348        Get all files that match any of the included UNIX-style wildcards.
349        While the check_names script is designed only for use on UNIX/macOS
350        (due to nm), this function alone will work fine on Windows even with
351        forward slashes in the wildcard.
352
353        Args:
354        * include_wildcards: a List of shell-style wildcards to match filepaths.
355        * exclude_wildcards: a List of shell-style wildcards to exclude.
356
357        Returns a List of relative filepaths.
358        """
359        accumulator = set()
360
361        for include_wildcard in include_wildcards:
362            accumulator = accumulator.union(glob.iglob(include_wildcard))
363
364        return list(path for path in accumulator
365                    if not self.is_file_excluded(path, exclude_wildcards))
366
367    def parse_macros(self, include, exclude=None):
368        """
369        Parse all macros defined by #define preprocessor directives.
370
371        Args:
372        * include: A List of glob expressions to look for files through.
373        * exclude: A List of glob expressions for excluding files.
374
375        Returns a List of Match objects for the found macros.
376        """
377        macro_regex = re.compile(r"# *define +(?P<macro>\w+)")
378        exclusions = (
379            "asm", "inline", "EMIT", "_CRT_SECURE_NO_DEPRECATE", "MULADDC_"
380        )
381
382        files = self.get_included_files(include, exclude)
383        self.log.debug("Looking for macros in {} files".format(len(files)))
384
385        macros = []
386        for header_file in files:
387            with open(header_file, "r", encoding="utf-8") as header:
388                for line_no, line in enumerate(header):
389                    for macro in macro_regex.finditer(line):
390                        if macro.group("macro").startswith(exclusions):
391                            continue
392
393                        macros.append(Match(
394                            header_file,
395                            line,
396                            line_no,
397                            macro.span("macro"),
398                            macro.group("macro")))
399
400        return macros
401
402    def parse_mbed_psa_words(self, include, exclude=None):
403        """
404        Parse all words in the file that begin with MBED|PSA, in and out of
405        macros, comments, anything.
406
407        Args:
408        * include: A List of glob expressions to look for files through.
409        * exclude: A List of glob expressions for excluding files.
410
411        Returns a List of Match objects for words beginning with MBED|PSA.
412        """
413        # Typos of TLS are common, hence the broader check below than MBEDTLS.
414        mbed_regex = re.compile(r"\b(MBED.+?|PSA)_[A-Z0-9_]*")
415        exclusions = re.compile(r"// *no-check-names|#error")
416
417        files = self.get_included_files(include, exclude)
418        self.log.debug(
419            "Looking for MBED|PSA words in {} files"
420            .format(len(files))
421        )
422
423        mbed_psa_words = []
424        for filename in files:
425            with open(filename, "r", encoding="utf-8") as fp:
426                for line_no, line in enumerate(fp):
427                    if exclusions.search(line):
428                        continue
429
430                    for name in mbed_regex.finditer(line):
431                        mbed_psa_words.append(Match(
432                            filename,
433                            line,
434                            line_no,
435                            name.span(0),
436                            name.group(0)))
437
438        return mbed_psa_words
439
440    def parse_enum_consts(self, include, exclude=None):
441        """
442        Parse all enum value constants that are declared.
443
444        Args:
445        * include: A List of glob expressions to look for files through.
446        * exclude: A List of glob expressions for excluding files.
447
448        Returns a List of Match objects for the findings.
449        """
450        files = self.get_included_files(include, exclude)
451        self.log.debug("Looking for enum consts in {} files".format(len(files)))
452
453        # Emulate a finite state machine to parse enum declarations.
454        # OUTSIDE_KEYWORD = outside the enum keyword
455        # IN_BRACES = inside enum opening braces
456        # IN_BETWEEN = between enum keyword and opening braces
457        states = enum.Enum("FSM", ["OUTSIDE_KEYWORD", "IN_BRACES", "IN_BETWEEN"])
458        enum_consts = []
459        for header_file in files:
460            state = states.OUTSIDE_KEYWORD
461            with open(header_file, "r", encoding="utf-8") as header:
462                for line_no, line in enumerate(header):
463                    # Match typedefs and brackets only when they are at the
464                    # beginning of the line -- if they are indented, they might
465                    # be sub-structures within structs, etc.
466                    optional_c_identifier = r"([_a-zA-Z][_a-zA-Z0-9]*)?"
467                    if (state == states.OUTSIDE_KEYWORD and
468                            re.search(r"^(typedef +)?enum " + \
469                                    optional_c_identifier + \
470                                    r" *{", line)):
471                        state = states.IN_BRACES
472                    elif (state == states.OUTSIDE_KEYWORD and
473                          re.search(r"^(typedef +)?enum", line)):
474                        state = states.IN_BETWEEN
475                    elif (state == states.IN_BETWEEN and
476                          re.search(r"^{", line)):
477                        state = states.IN_BRACES
478                    elif (state == states.IN_BRACES and
479                          re.search(r"^}", line)):
480                        state = states.OUTSIDE_KEYWORD
481                    elif (state == states.IN_BRACES and
482                          not re.search(r"^ *#", line)):
483                        enum_const = re.search(r"^ *(?P<enum_const>\w+)", line)
484                        if not enum_const:
485                            continue
486
487                        enum_consts.append(Match(
488                            header_file,
489                            line,
490                            line_no,
491                            enum_const.span("enum_const"),
492                            enum_const.group("enum_const")))
493
494        return enum_consts
495
496    IGNORED_CHUNK_REGEX = re.compile('|'.join([
497        r'/\*.*?\*/', # block comment entirely on one line
498        r'//.*', # line comment
499        r'(?P<string>")(?:[^\\\"]|\\.)*"', # string literal
500    ]))
501
502    def strip_comments_and_literals(self, line, in_block_comment):
503        """Strip comments and string literals from line.
504
505        Continuation lines are not supported.
506
507        If in_block_comment is true, assume that the line starts inside a
508        block comment.
509
510        Return updated values of (line, in_block_comment) where:
511        * Comments in line have been replaced by a space (or nothing at the
512          start or end of the line).
513        * String contents have been removed.
514        * in_block_comment indicates whether the line ends inside a block
515          comment that continues on the next line.
516        """
517
518        # Terminate current multiline comment?
519        if in_block_comment:
520            m = re.search(r"\*/", line)
521            if m:
522                in_block_comment = False
523                line = line[m.end(0):]
524            else:
525                return '', True
526
527        # Remove full comments and string literals.
528        # Do it all together to handle cases like "/*" correctly.
529        # Note that continuation lines are not supported.
530        line = re.sub(self.IGNORED_CHUNK_REGEX,
531                      lambda s: '""' if s.group('string') else ' ',
532                      line)
533
534        # Start an unfinished comment?
535        # (If `/*` was part of a complete comment, it's already been removed.)
536        m = re.search(r"/\*", line)
537        if m:
538            in_block_comment = True
539            line = line[:m.start(0)]
540
541        return line, in_block_comment
542
543    IDENTIFIER_REGEX = re.compile('|'.join([
544        # Match " something(a" or " *something(a". Functions.
545        # Assumptions:
546        # - function definition from return type to one of its arguments is
547        #   all on one line
548        # - function definition line only contains alphanumeric, asterisk,
549        #   underscore, and open bracket
550        r".* \**(\w+) *\( *\w",
551        # Match "(*something)(".
552        r".*\( *\* *(\w+) *\) *\(",
553        # Match names of named data structures.
554        r"(?:typedef +)?(?:struct|union|enum) +(\w+)(?: *{)?$",
555        # Match names of typedef instances, after closing bracket.
556        r"}? *(\w+)[;[].*",
557    ]))
558    # The regex below is indented for clarity.
559    EXCLUSION_LINES = re.compile("|".join([
560        r"extern +\"C\"",
561        r"(typedef +)?(struct|union|enum)( *{)?$",
562        r"} *;?$",
563        r"$",
564        r"//",
565        r"#",
566    ]))
567
568    def parse_identifiers_in_file(self, header_file, identifiers):
569        """
570        Parse all lines of a header where a function/enum/struct/union/typedef
571        identifier is declared, based on some regex and heuristics. Highly
572        dependent on formatting style.
573
574        Append found matches to the list ``identifiers``.
575        """
576
577        with open(header_file, "r", encoding="utf-8") as header:
578            in_block_comment = False
579            # The previous line variable is used for concatenating lines
580            # when identifiers are formatted and spread across multiple
581            # lines.
582            previous_line = ""
583
584            for line_no, line in enumerate(header):
585                line, in_block_comment = \
586                    self.strip_comments_and_literals(line, in_block_comment)
587
588                if self.EXCLUSION_LINES.match(line):
589                    previous_line = ""
590                    continue
591
592                # If the line contains only space-separated alphanumeric
593                # characters (or underscore, asterisk, or open parenthesis),
594                # and nothing else, high chance it's a declaration that
595                # continues on the next line
596                if re.search(r"^([\w\*\(]+\s+)+$", line):
597                    previous_line += line
598                    continue
599
600                # If previous line seemed to start an unfinished declaration
601                # (as above), concat and treat them as one.
602                if previous_line:
603                    line = previous_line.strip() + " " + line.strip() + "\n"
604                    previous_line = ""
605
606                # Skip parsing if line has a space in front = heuristic to
607                # skip function argument lines (highly subject to formatting
608                # changes)
609                if line[0] == " ":
610                    continue
611
612                identifier = self.IDENTIFIER_REGEX.search(line)
613
614                if not identifier:
615                    continue
616
617                # Find the group that matched, and append it
618                for group in identifier.groups():
619                    if not group:
620                        continue
621
622                    identifiers.append(Match(
623                        header_file,
624                        line,
625                        line_no,
626                        identifier.span(),
627                        group))
628
629    def parse_identifiers(self, include, exclude=None):
630        """
631        Parse all lines of a header where a function/enum/struct/union/typedef
632        identifier is declared, based on some regex and heuristics. Highly
633        dependent on formatting style. Identifiers in excluded files are still
634        parsed
635
636        Args:
637        * include: A List of glob expressions to look for files through.
638        * exclude: A List of glob expressions for excluding files.
639
640        Returns: a Tuple of two Lists of Match objects with identifiers.
641        * included_identifiers: A List of Match objects with identifiers from
642          included files.
643        * excluded_identifiers: A List of Match objects with identifiers from
644          excluded files.
645        """
646
647        included_files, excluded_files = \
648            self.get_all_files(include, exclude)
649
650        self.log.debug("Looking for included identifiers in {} files".format \
651            (len(included_files)))
652
653        included_identifiers = []
654        excluded_identifiers = []
655        for header_file in included_files:
656            self.parse_identifiers_in_file(header_file, included_identifiers)
657        for header_file in excluded_files:
658            self.parse_identifiers_in_file(header_file, excluded_identifiers)
659
660        return (included_identifiers, excluded_identifiers)
661
662    def parse_symbols(self):
663        """
664        Compile the Mbed TLS libraries, and parse the TLS, Crypto, and x509
665        object files using nm to retrieve the list of referenced symbols.
666        Exceptions thrown here are rethrown because they would be critical
667        errors that void several tests, and thus needs to halt the program. This
668        is explicitly done for clarity.
669
670        Returns a List of unique symbols defined and used in the libraries.
671        """
672        self.log.info("Compiling...")
673        symbols = []
674
675        # Back up the config and atomically compile with the full configuration.
676        shutil.copy(
677            "include/mbedtls/mbedtls_config.h",
678            "include/mbedtls/mbedtls_config.h.bak"
679        )
680        try:
681            # Use check=True in all subprocess calls so that failures are raised
682            # as exceptions and logged.
683            subprocess.run(
684                ["python3", "scripts/config.py", "full"],
685                universal_newlines=True,
686                check=True
687            )
688            my_environment = os.environ.copy()
689            my_environment["CFLAGS"] = "-fno-asynchronous-unwind-tables"
690            # Run make clean separately to lib to prevent unwanted behavior when
691            # make is invoked with parallelism.
692            subprocess.run(
693                ["make", "clean"],
694                universal_newlines=True,
695                check=True
696            )
697            subprocess.run(
698                ["make", "lib"],
699                env=my_environment,
700                universal_newlines=True,
701                stdout=subprocess.PIPE,
702                stderr=subprocess.STDOUT,
703                check=True
704            )
705
706            # Perform object file analysis using nm
707            symbols = self.parse_symbols_from_nm([
708                "library/libmbedcrypto.a",
709                "library/libmbedtls.a",
710                "library/libmbedx509.a"
711            ])
712
713            subprocess.run(
714                ["make", "clean"],
715                universal_newlines=True,
716                check=True
717            )
718        except subprocess.CalledProcessError as error:
719            self.log.debug(error.output)
720            raise error
721        finally:
722            # Put back the original config regardless of there being errors.
723            # Works also for keyboard interrupts.
724            shutil.move(
725                "include/mbedtls/mbedtls_config.h.bak",
726                "include/mbedtls/mbedtls_config.h"
727            )
728
729        return symbols
730
731    def parse_symbols_from_nm(self, object_files):
732        """
733        Run nm to retrieve the list of referenced symbols in each object file.
734        Does not return the position data since it is of no use.
735
736        Args:
737        * object_files: a List of compiled object filepaths to search through.
738
739        Returns a List of unique symbols defined and used in any of the object
740        files.
741        """
742        nm_undefined_regex = re.compile(r"^\S+: +U |^$|^\S+:$")
743        nm_valid_regex = re.compile(r"^\S+( [0-9A-Fa-f]+)* . _*(?P<symbol>\w+)")
744        exclusions = ("FStar", "Hacl")
745
746        symbols = []
747
748        # Gather all outputs of nm
749        nm_output = ""
750        for lib in object_files:
751            nm_output += subprocess.run(
752                ["nm", "-og", lib],
753                universal_newlines=True,
754                stdout=subprocess.PIPE,
755                stderr=subprocess.STDOUT,
756                check=True
757            ).stdout
758
759        for line in nm_output.splitlines():
760            if not nm_undefined_regex.search(line):
761                symbol = nm_valid_regex.search(line)
762                if (symbol and not symbol.group("symbol").startswith(exclusions)):
763                    symbols.append(symbol.group("symbol"))
764                else:
765                    self.log.error(line)
766
767        return symbols
768
769class NameChecker():
770    """
771    Representation of the core name checking operation performed by this script.
772    """
773    def __init__(self, parse_result, log):
774        self.parse_result = parse_result
775        self.log = log
776
777    def perform_checks(self, quiet=False):
778        """
779        A comprehensive checker that performs each check in order, and outputs
780        a final verdict.
781
782        Args:
783        * quiet: whether to hide detailed problem explanation.
784        """
785        self.log.info("=============")
786        Problem.quiet = quiet
787        problems = 0
788        problems += self.check_symbols_declared_in_header()
789
790        pattern_checks = [
791            ("public_macros", PUBLIC_MACRO_PATTERN),
792            ("internal_macros", INTERNAL_MACRO_PATTERN),
793            ("enum_consts", CONSTANTS_PATTERN),
794            ("identifiers", IDENTIFIER_PATTERN)
795        ]
796        for group, check_pattern in pattern_checks:
797            problems += self.check_match_pattern(group, check_pattern)
798
799        problems += self.check_for_typos()
800
801        self.log.info("=============")
802        if problems > 0:
803            self.log.info("FAIL: {0} problem(s) to fix".format(str(problems)))
804            if quiet:
805                self.log.info("Remove --quiet to see explanations.")
806            else:
807                self.log.info("Use --quiet for minimal output.")
808            return 1
809        else:
810            self.log.info("PASS")
811            return 0
812
813    def check_symbols_declared_in_header(self):
814        """
815        Perform a check that all detected symbols in the library object files
816        are properly declared in headers.
817        Assumes parse_names_in_source() was called before this.
818
819        Returns the number of problems that need fixing.
820        """
821        problems = []
822        all_identifiers = self.parse_result["identifiers"] +  \
823            self.parse_result["excluded_identifiers"]
824
825        for symbol in self.parse_result["symbols"]:
826            found_symbol_declared = False
827            for identifier_match in all_identifiers:
828                if symbol == identifier_match.name:
829                    found_symbol_declared = True
830                    break
831
832            if not found_symbol_declared:
833                problems.append(SymbolNotInHeader(symbol))
834
835        self.output_check_result("All symbols in header", problems)
836        return len(problems)
837
838    def check_match_pattern(self, group_to_check, check_pattern):
839        """
840        Perform a check that all items of a group conform to a regex pattern.
841        Assumes parse_names_in_source() was called before this.
842
843        Args:
844        * group_to_check: string key to index into self.parse_result.
845        * check_pattern: the regex to check against.
846
847        Returns the number of problems that need fixing.
848        """
849        problems = []
850
851        for item_match in self.parse_result[group_to_check]:
852            if not re.search(check_pattern, item_match.name):
853                problems.append(PatternMismatch(check_pattern, item_match))
854            # Double underscore should not be used for names
855            if re.search(r".*__.*", item_match.name):
856                problems.append(
857                    PatternMismatch("no double underscore allowed", item_match))
858
859        self.output_check_result(
860            "Naming patterns of {}".format(group_to_check),
861            problems)
862        return len(problems)
863
864    def check_for_typos(self):
865        """
866        Perform a check that all words in the source code beginning with MBED are
867        either defined as macros, or as enum constants.
868        Assumes parse_names_in_source() was called before this.
869
870        Returns the number of problems that need fixing.
871        """
872        problems = []
873
874        # Set comprehension, equivalent to a list comprehension wrapped by set()
875        all_caps_names = {
876            match.name
877            for match
878            in self.parse_result["public_macros"] +
879            self.parse_result["internal_macros"] +
880            self.parse_result["private_macros"] +
881            self.parse_result["enum_consts"]
882            }
883        typo_exclusion = re.compile(r"XXX|__|_$|^MBEDTLS_.*CONFIG_FILE$|"
884                                    r"MBEDTLS_TEST_LIBTESTDRIVER*|"
885                                    r"PSA_CRYPTO_DRIVER_TEST")
886
887        for name_match in self.parse_result["mbed_psa_words"]:
888            found = name_match.name in all_caps_names
889
890            # Since MBEDTLS_PSA_ACCEL_XXX defines are defined by the
891            # PSA driver, they will not exist as macros. However, they
892            # should still be checked for typos using the equivalent
893            # BUILTINs that exist.
894            if "MBEDTLS_PSA_ACCEL_" in name_match.name:
895                found = name_match.name.replace(
896                    "MBEDTLS_PSA_ACCEL_",
897                    "MBEDTLS_PSA_BUILTIN_") in all_caps_names
898
899            if not found and not typo_exclusion.search(name_match.name):
900                problems.append(Typo(name_match))
901
902        self.output_check_result("Likely typos", problems)
903        return len(problems)
904
905    def output_check_result(self, name, problems):
906        """
907        Write out the PASS/FAIL status of a performed check depending on whether
908        there were problems.
909
910        Args:
911        * name: the name of the test
912        * problems: a List of encountered Problems
913        """
914        if problems:
915            self.log.info("{}: FAIL\n".format(name))
916            for problem in problems:
917                self.log.warning(str(problem))
918        else:
919            self.log.info("{}: PASS".format(name))
920
921def main():
922    """
923    Perform argument parsing, and create an instance of CodeParser and
924    NameChecker to begin the core operation.
925    """
926    parser = argparse.ArgumentParser(
927        formatter_class=argparse.RawDescriptionHelpFormatter,
928        description=(
929            "This script confirms that the naming of all symbols and identifiers "
930            "in Mbed TLS are consistent with the house style and are also "
931            "self-consistent.\n\n"
932            "Expected to be run from the Mbed TLS root directory.")
933    )
934    parser.add_argument(
935        "-v", "--verbose",
936        action="store_true",
937        help="show parse results"
938    )
939    parser.add_argument(
940        "-q", "--quiet",
941        action="store_true",
942        help="hide unnecessary text, explanations, and highlights"
943    )
944
945    args = parser.parse_args()
946
947    # Configure the global logger, which is then passed to the classes below
948    log = logging.getLogger()
949    log.setLevel(logging.DEBUG if args.verbose else logging.INFO)
950    log.addHandler(logging.StreamHandler())
951
952    try:
953        code_parser = CodeParser(log)
954        parse_result = code_parser.comprehensive_parse()
955    except Exception: # pylint: disable=broad-except
956        traceback.print_exc()
957        sys.exit(2)
958
959    name_checker = NameChecker(parse_result, log)
960    return_code = name_checker.perform_checks(quiet=args.quiet)
961
962    sys.exit(return_code)
963
964if __name__ == "__main__":
965    main()
966