1#!/usr/bin/env python3
2#
3# Copyright The Mbed TLS Contributors
4# SPDX-License-Identifier: Apache-2.0
5#
6# Licensed under the Apache License, Version 2.0 (the "License"); you may
7# not use this file except in compliance with the License.
8# You may obtain a copy of the License at
9#
10# http://www.apache.org/licenses/LICENSE-2.0
11#
12# Unless required by applicable law or agreed to in writing, software
13# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
14# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15# See the License for the specific language governing permissions and
16# limitations under the License.
17
18"""
19This script confirms that the naming of all symbols and identifiers in Mbed TLS
20are consistent with the house style and are also self-consistent. It only runs
21on Linux and macOS since it depends on nm.
22
23It contains two major Python classes, CodeParser and NameChecker. They both have
24a comprehensive "run-all" function (comprehensive_parse() and perform_checks())
25but the individual functions can also be used for specific needs.
26
27CodeParser makes heavy use of regular expressions to parse the code, and is
28dependent on the current code formatting. Many Python C parser libraries require
29preprocessed C code, which means no macro parsing. Compiler tools are also not
30very helpful when we want the exact location in the original source (which
31becomes impossible when e.g. comments are stripped).
32
33NameChecker performs the following checks:
34
35- All exported and available symbols in the library object files, are explicitly
36  declared in the header files. This uses the nm command.
37- All macros, constants, and identifiers (function names, struct names, etc)
38  follow the required regex pattern.
39- Typo checking: All words that begin with MBED|PSA exist as macros or constants.
40
41The script returns 0 on success, 1 on test failure, and 2 if there is a script
42error. It must be run from Mbed TLS root.
43"""
44
45import abc
46import argparse
47import fnmatch
48import glob
49import textwrap
50import os
51import sys
52import traceback
53import re
54import enum
55import shutil
56import subprocess
57import logging
58
59import scripts_path # pylint: disable=unused-import
60from mbedtls_dev import build_tree
61
62
63# Naming patterns to check against. These are defined outside the NameCheck
64# class for ease of modification.
65PUBLIC_MACRO_PATTERN = r"^(MBEDTLS|PSA)_[0-9A-Z_]*[0-9A-Z]$"
66INTERNAL_MACRO_PATTERN = r"^[0-9A-Za-z_]*[0-9A-Z]$"
67CONSTANTS_PATTERN = PUBLIC_MACRO_PATTERN
68IDENTIFIER_PATTERN = r"^(mbedtls|psa)_[0-9a-z_]*[0-9a-z]$"
69
70class Match(): # pylint: disable=too-few-public-methods
71    """
72    A class representing a match, together with its found position.
73
74    Fields:
75    * filename: the file that the match was in.
76    * line: the full line containing the match.
77    * line_no: the line number.
78    * pos: a tuple of (start, end) positions on the line where the match is.
79    * name: the match itself.
80    """
81    def __init__(self, filename, line, line_no, pos, name):
82        # pylint: disable=too-many-arguments
83        self.filename = filename
84        self.line = line
85        self.line_no = line_no
86        self.pos = pos
87        self.name = name
88
89    def __str__(self):
90        """
91        Return a formatted code listing representation of the erroneous line.
92        """
93        gutter = format(self.line_no, "4d")
94        underline = self.pos[0] * " " + (self.pos[1] - self.pos[0]) * "^"
95
96        return (
97            " {0} |\n".format(" " * len(gutter)) +
98            " {0} | {1}".format(gutter, self.line) +
99            " {0} | {1}\n".format(" " * len(gutter), underline)
100        )
101
102class Problem(abc.ABC): # pylint: disable=too-few-public-methods
103    """
104    An abstract parent class representing a form of static analysis error.
105    It extends an Abstract Base Class, which means it is not instantiable, and
106    it also mandates certain abstract methods to be implemented in subclasses.
107    """
108    # Class variable to control the quietness of all problems
109    quiet = False
110    def __init__(self):
111        self.textwrapper = textwrap.TextWrapper()
112        self.textwrapper.width = 80
113        self.textwrapper.initial_indent = "    > "
114        self.textwrapper.subsequent_indent = "      "
115
116    def __str__(self):
117        """
118        Unified string representation method for all Problems.
119        """
120        if self.__class__.quiet:
121            return self.quiet_output()
122        return self.verbose_output()
123
124    @abc.abstractmethod
125    def quiet_output(self):
126        """
127        The output when --quiet is enabled.
128        """
129        pass
130
131    @abc.abstractmethod
132    def verbose_output(self):
133        """
134        The default output with explanation and code snippet if appropriate.
135        """
136        pass
137
138class SymbolNotInHeader(Problem): # pylint: disable=too-few-public-methods
139    """
140    A problem that occurs when an exported/available symbol in the object file
141    is not explicitly declared in header files. Created with
142    NameCheck.check_symbols_declared_in_header()
143
144    Fields:
145    * symbol_name: the name of the symbol.
146    """
147    def __init__(self, symbol_name):
148        self.symbol_name = symbol_name
149        Problem.__init__(self)
150
151    def quiet_output(self):
152        return "{0}".format(self.symbol_name)
153
154    def verbose_output(self):
155        return self.textwrapper.fill(
156            "'{0}' was found as an available symbol in the output of nm, "
157            "however it was not declared in any header files."
158            .format(self.symbol_name))
159
160class PatternMismatch(Problem): # pylint: disable=too-few-public-methods
161    """
162    A problem that occurs when something doesn't match the expected pattern.
163    Created with NameCheck.check_match_pattern()
164
165    Fields:
166    * pattern: the expected regex pattern
167    * match: the Match object in question
168    """
169    def __init__(self, pattern, match):
170        self.pattern = pattern
171        self.match = match
172        Problem.__init__(self)
173
174
175    def quiet_output(self):
176        return (
177            "{0}:{1}:{2}"
178            .format(self.match.filename, self.match.line_no, self.match.name)
179        )
180
181    def verbose_output(self):
182        return self.textwrapper.fill(
183            "{0}:{1}: '{2}' does not match the required pattern '{3}'."
184            .format(
185                self.match.filename,
186                self.match.line_no,
187                self.match.name,
188                self.pattern
189            )
190        ) + "\n" + str(self.match)
191
192class Typo(Problem): # pylint: disable=too-few-public-methods
193    """
194    A problem that occurs when a word using MBED or PSA doesn't
195    appear to be defined as constants nor enum values. Created with
196    NameCheck.check_for_typos()
197
198    Fields:
199    * match: the Match object of the MBED|PSA name in question.
200    """
201    def __init__(self, match):
202        self.match = match
203        Problem.__init__(self)
204
205    def quiet_output(self):
206        return (
207            "{0}:{1}:{2}"
208            .format(self.match.filename, self.match.line_no, self.match.name)
209        )
210
211    def verbose_output(self):
212        return self.textwrapper.fill(
213            "{0}:{1}: '{2}' looks like a typo. It was not found in any "
214            "macros or any enums. If this is not a typo, put "
215            "//no-check-names after it."
216            .format(self.match.filename, self.match.line_no, self.match.name)
217        ) + "\n" + str(self.match)
218
219class CodeParser():
220    """
221    Class for retrieving files and parsing the code. This can be used
222    independently of the checks that NameChecker performs, for example for
223    list_internal_identifiers.py.
224    """
225    def __init__(self, log):
226        self.log = log
227        build_tree.check_repo_path()
228
229        # Memo for storing "glob expression": set(filepaths)
230        self.files = {}
231
232        # Globally excluded filenames.
233        # Note that "*" can match directory separators in exclude lists.
234        self.excluded_files = ["*/bn_mul", "*/compat-2.x.h"]
235
236    def comprehensive_parse(self):
237        """
238        Comprehensive ("default") function to call each parsing function and
239        retrieve various elements of the code, together with the source location.
240
241        Returns a dict of parsed item key to the corresponding List of Matches.
242        """
243        self.log.info("Parsing source code...")
244        self.log.debug(
245            "The following files are excluded from the search: {}"
246            .format(str(self.excluded_files))
247        )
248
249        all_macros = {"public": [], "internal": [], "private":[]}
250        all_macros["public"] = self.parse_macros([
251            "include/mbedtls/*.h",
252            "include/psa/*.h",
253            "3rdparty/everest/include/everest/everest.h",
254            "3rdparty/everest/include/everest/x25519.h"
255        ])
256        all_macros["internal"] = self.parse_macros([
257            "library/*.h",
258            "tests/include/test/drivers/*.h",
259        ])
260        all_macros["private"] = self.parse_macros([
261            "library/*.c",
262        ])
263        enum_consts = self.parse_enum_consts([
264            "include/mbedtls/*.h",
265            "include/psa/*.h",
266            "library/*.h",
267            "library/*.c",
268            "3rdparty/everest/include/everest/everest.h",
269            "3rdparty/everest/include/everest/x25519.h"
270        ])
271        identifiers, excluded_identifiers = self.parse_identifiers([
272            "include/mbedtls/*.h",
273            "include/psa/*.h",
274            "library/*.h",
275            "3rdparty/everest/include/everest/everest.h",
276            "3rdparty/everest/include/everest/x25519.h"
277        ], ["3rdparty/p256-m/p256-m/p256-m.h"])
278        mbed_psa_words = self.parse_mbed_psa_words([
279            "include/mbedtls/*.h",
280            "include/psa/*.h",
281            "library/*.h",
282            "3rdparty/everest/include/everest/everest.h",
283            "3rdparty/everest/include/everest/x25519.h",
284            "library/*.c",
285            "3rdparty/everest/library/everest.c",
286            "3rdparty/everest/library/x25519.c"
287        ], ["library/psa_crypto_driver_wrappers.c"])
288        symbols = self.parse_symbols()
289
290        # Remove identifier macros like mbedtls_printf or mbedtls_calloc
291        identifiers_justname = [x.name for x in identifiers]
292        actual_macros = {"public": [], "internal": []}
293        for scope in actual_macros:
294            for macro in all_macros[scope]:
295                if macro.name not in identifiers_justname:
296                    actual_macros[scope].append(macro)
297
298        self.log.debug("Found:")
299        # Aligns the counts on the assumption that none exceeds 4 digits
300        for scope in actual_macros:
301            self.log.debug("  {:4} Total {} Macros"
302                           .format(len(all_macros[scope]), scope))
303            self.log.debug("  {:4} {} Non-identifier Macros"
304                           .format(len(actual_macros[scope]), scope))
305        self.log.debug("  {:4} Enum Constants".format(len(enum_consts)))
306        self.log.debug("  {:4} Identifiers".format(len(identifiers)))
307        self.log.debug("  {:4} Exported Symbols".format(len(symbols)))
308        return {
309            "public_macros": actual_macros["public"],
310            "internal_macros": actual_macros["internal"],
311            "private_macros": all_macros["private"],
312            "enum_consts": enum_consts,
313            "identifiers": identifiers,
314            "excluded_identifiers": excluded_identifiers,
315            "symbols": symbols,
316            "mbed_psa_words": mbed_psa_words
317        }
318
319    def is_file_excluded(self, path, exclude_wildcards):
320        """Whether the given file path is excluded."""
321        # exclude_wildcards may be None. Also, consider the global exclusions.
322        exclude_wildcards = (exclude_wildcards or []) + self.excluded_files
323        for pattern in exclude_wildcards:
324            if fnmatch.fnmatch(path, pattern):
325                return True
326        return False
327
328    def get_all_files(self, include_wildcards, exclude_wildcards):
329        """
330        Get all files that match any of the included UNIX-style wildcards
331        and filter them into included and excluded lists.
332        While the check_names script is designed only for use on UNIX/macOS
333        (due to nm), this function alone will work fine on Windows even with
334        forward slashes in the wildcard.
335
336        Args:
337        * include_wildcards: a List of shell-style wildcards to match filepaths.
338        * exclude_wildcards: a List of shell-style wildcards to exclude.
339
340        Returns:
341        * inc_files: A List of relative filepaths for included files.
342        * exc_files: A List of relative filepaths for excluded files.
343        """
344        accumulator = set()
345        all_wildcards = include_wildcards + (exclude_wildcards or [])
346        for wildcard in all_wildcards:
347            accumulator = accumulator.union(glob.iglob(wildcard))
348
349        inc_files = []
350        exc_files = []
351        for path in accumulator:
352            if self.is_file_excluded(path, exclude_wildcards):
353                exc_files.append(path)
354            else:
355                inc_files.append(path)
356        return (inc_files, exc_files)
357
358    def get_included_files(self, include_wildcards, exclude_wildcards):
359        """
360        Get all files that match any of the included UNIX-style wildcards.
361        While the check_names script is designed only for use on UNIX/macOS
362        (due to nm), this function alone will work fine on Windows even with
363        forward slashes in the wildcard.
364
365        Args:
366        * include_wildcards: a List of shell-style wildcards to match filepaths.
367        * exclude_wildcards: a List of shell-style wildcards to exclude.
368
369        Returns a List of relative filepaths.
370        """
371        accumulator = set()
372
373        for include_wildcard in include_wildcards:
374            accumulator = accumulator.union(glob.iglob(include_wildcard))
375
376        return list(path for path in accumulator
377                    if not self.is_file_excluded(path, exclude_wildcards))
378
379    def parse_macros(self, include, exclude=None):
380        """
381        Parse all macros defined by #define preprocessor directives.
382
383        Args:
384        * include: A List of glob expressions to look for files through.
385        * exclude: A List of glob expressions for excluding files.
386
387        Returns a List of Match objects for the found macros.
388        """
389        macro_regex = re.compile(r"# *define +(?P<macro>\w+)")
390        exclusions = (
391            "asm", "inline", "EMIT", "_CRT_SECURE_NO_DEPRECATE", "MULADDC_"
392        )
393
394        files = self.get_included_files(include, exclude)
395        self.log.debug("Looking for macros in {} files".format(len(files)))
396
397        macros = []
398        for header_file in files:
399            with open(header_file, "r", encoding="utf-8") as header:
400                for line_no, line in enumerate(header):
401                    for macro in macro_regex.finditer(line):
402                        if macro.group("macro").startswith(exclusions):
403                            continue
404
405                        macros.append(Match(
406                            header_file,
407                            line,
408                            line_no,
409                            macro.span("macro"),
410                            macro.group("macro")))
411
412        return macros
413
414    def parse_mbed_psa_words(self, include, exclude=None):
415        """
416        Parse all words in the file that begin with MBED|PSA, in and out of
417        macros, comments, anything.
418
419        Args:
420        * include: A List of glob expressions to look for files through.
421        * exclude: A List of glob expressions for excluding files.
422
423        Returns a List of Match objects for words beginning with MBED|PSA.
424        """
425        # Typos of TLS are common, hence the broader check below than MBEDTLS.
426        mbed_regex = re.compile(r"\b(MBED.+?|PSA)_[A-Z0-9_]*")
427        exclusions = re.compile(r"// *no-check-names|#error")
428
429        files = self.get_included_files(include, exclude)
430        self.log.debug(
431            "Looking for MBED|PSA words in {} files"
432            .format(len(files))
433        )
434
435        mbed_psa_words = []
436        for filename in files:
437            with open(filename, "r", encoding="utf-8") as fp:
438                for line_no, line in enumerate(fp):
439                    if exclusions.search(line):
440                        continue
441
442                    for name in mbed_regex.finditer(line):
443                        mbed_psa_words.append(Match(
444                            filename,
445                            line,
446                            line_no,
447                            name.span(0),
448                            name.group(0)))
449
450        return mbed_psa_words
451
452    def parse_enum_consts(self, include, exclude=None):
453        """
454        Parse all enum value constants that are declared.
455
456        Args:
457        * include: A List of glob expressions to look for files through.
458        * exclude: A List of glob expressions for excluding files.
459
460        Returns a List of Match objects for the findings.
461        """
462        files = self.get_included_files(include, exclude)
463        self.log.debug("Looking for enum consts in {} files".format(len(files)))
464
465        # Emulate a finite state machine to parse enum declarations.
466        # OUTSIDE_KEYWORD = outside the enum keyword
467        # IN_BRACES = inside enum opening braces
468        # IN_BETWEEN = between enum keyword and opening braces
469        states = enum.Enum("FSM", ["OUTSIDE_KEYWORD", "IN_BRACES", "IN_BETWEEN"])
470        enum_consts = []
471        for header_file in files:
472            state = states.OUTSIDE_KEYWORD
473            with open(header_file, "r", encoding="utf-8") as header:
474                for line_no, line in enumerate(header):
475                    # Match typedefs and brackets only when they are at the
476                    # beginning of the line -- if they are indented, they might
477                    # be sub-structures within structs, etc.
478                    optional_c_identifier = r"([_a-zA-Z][_a-zA-Z0-9]*)?"
479                    if (state == states.OUTSIDE_KEYWORD and
480                            re.search(r"^(typedef +)?enum " + \
481                                    optional_c_identifier + \
482                                    r" *{", line)):
483                        state = states.IN_BRACES
484                    elif (state == states.OUTSIDE_KEYWORD and
485                          re.search(r"^(typedef +)?enum", line)):
486                        state = states.IN_BETWEEN
487                    elif (state == states.IN_BETWEEN and
488                          re.search(r"^{", line)):
489                        state = states.IN_BRACES
490                    elif (state == states.IN_BRACES and
491                          re.search(r"^}", line)):
492                        state = states.OUTSIDE_KEYWORD
493                    elif (state == states.IN_BRACES and
494                          not re.search(r"^ *#", line)):
495                        enum_const = re.search(r"^ *(?P<enum_const>\w+)", line)
496                        if not enum_const:
497                            continue
498
499                        enum_consts.append(Match(
500                            header_file,
501                            line,
502                            line_no,
503                            enum_const.span("enum_const"),
504                            enum_const.group("enum_const")))
505
506        return enum_consts
507
508    IGNORED_CHUNK_REGEX = re.compile('|'.join([
509        r'/\*.*?\*/', # block comment entirely on one line
510        r'//.*', # line comment
511        r'(?P<string>")(?:[^\\\"]|\\.)*"', # string literal
512    ]))
513
514    def strip_comments_and_literals(self, line, in_block_comment):
515        """Strip comments and string literals from line.
516
517        Continuation lines are not supported.
518
519        If in_block_comment is true, assume that the line starts inside a
520        block comment.
521
522        Return updated values of (line, in_block_comment) where:
523        * Comments in line have been replaced by a space (or nothing at the
524          start or end of the line).
525        * String contents have been removed.
526        * in_block_comment indicates whether the line ends inside a block
527          comment that continues on the next line.
528        """
529
530        # Terminate current multiline comment?
531        if in_block_comment:
532            m = re.search(r"\*/", line)
533            if m:
534                in_block_comment = False
535                line = line[m.end(0):]
536            else:
537                return '', True
538
539        # Remove full comments and string literals.
540        # Do it all together to handle cases like "/*" correctly.
541        # Note that continuation lines are not supported.
542        line = re.sub(self.IGNORED_CHUNK_REGEX,
543                      lambda s: '""' if s.group('string') else ' ',
544                      line)
545
546        # Start an unfinished comment?
547        # (If `/*` was part of a complete comment, it's already been removed.)
548        m = re.search(r"/\*", line)
549        if m:
550            in_block_comment = True
551            line = line[:m.start(0)]
552
553        return line, in_block_comment
554
555    IDENTIFIER_REGEX = re.compile('|'.join([
556        # Match " something(a" or " *something(a". Functions.
557        # Assumptions:
558        # - function definition from return type to one of its arguments is
559        #   all on one line
560        # - function definition line only contains alphanumeric, asterisk,
561        #   underscore, and open bracket
562        r".* \**(\w+) *\( *\w",
563        # Match "(*something)(".
564        r".*\( *\* *(\w+) *\) *\(",
565        # Match names of named data structures.
566        r"(?:typedef +)?(?:struct|union|enum) +(\w+)(?: *{)?$",
567        # Match names of typedef instances, after closing bracket.
568        r"}? *(\w+)[;[].*",
569    ]))
570    # The regex below is indented for clarity.
571    EXCLUSION_LINES = re.compile("|".join([
572        r"extern +\"C\"",
573        r"(typedef +)?(struct|union|enum)( *{)?$",
574        r"} *;?$",
575        r"$",
576        r"//",
577        r"#",
578    ]))
579
580    def parse_identifiers_in_file(self, header_file, identifiers):
581        """
582        Parse all lines of a header where a function/enum/struct/union/typedef
583        identifier is declared, based on some regex and heuristics. Highly
584        dependent on formatting style.
585
586        Append found matches to the list ``identifiers``.
587        """
588
589        with open(header_file, "r", encoding="utf-8") as header:
590            in_block_comment = False
591            # The previous line variable is used for concatenating lines
592            # when identifiers are formatted and spread across multiple
593            # lines.
594            previous_line = ""
595
596            for line_no, line in enumerate(header):
597                line, in_block_comment = \
598                    self.strip_comments_and_literals(line, in_block_comment)
599
600                if self.EXCLUSION_LINES.match(line):
601                    previous_line = ""
602                    continue
603
604                # If the line contains only space-separated alphanumeric
605                # characters (or underscore, asterisk, or open parenthesis),
606                # and nothing else, high chance it's a declaration that
607                # continues on the next line
608                if re.search(r"^([\w\*\(]+\s+)+$", line):
609                    previous_line += line
610                    continue
611
612                # If previous line seemed to start an unfinished declaration
613                # (as above), concat and treat them as one.
614                if previous_line:
615                    line = previous_line.strip() + " " + line.strip() + "\n"
616                    previous_line = ""
617
618                # Skip parsing if line has a space in front = heuristic to
619                # skip function argument lines (highly subject to formatting
620                # changes)
621                if line[0] == " ":
622                    continue
623
624                identifier = self.IDENTIFIER_REGEX.search(line)
625
626                if not identifier:
627                    continue
628
629                # Find the group that matched, and append it
630                for group in identifier.groups():
631                    if not group:
632                        continue
633
634                    identifiers.append(Match(
635                        header_file,
636                        line,
637                        line_no,
638                        identifier.span(),
639                        group))
640
641    def parse_identifiers(self, include, exclude=None):
642        """
643        Parse all lines of a header where a function/enum/struct/union/typedef
644        identifier is declared, based on some regex and heuristics. Highly
645        dependent on formatting style. Identifiers in excluded files are still
646        parsed
647
648        Args:
649        * include: A List of glob expressions to look for files through.
650        * exclude: A List of glob expressions for excluding files.
651
652        Returns: a Tuple of two Lists of Match objects with identifiers.
653        * included_identifiers: A List of Match objects with identifiers from
654          included files.
655        * excluded_identifiers: A List of Match objects with identifiers from
656          excluded files.
657        """
658
659        included_files, excluded_files = \
660            self.get_all_files(include, exclude)
661
662        self.log.debug("Looking for included identifiers in {} files".format \
663            (len(included_files)))
664
665        included_identifiers = []
666        excluded_identifiers = []
667        for header_file in included_files:
668            self.parse_identifiers_in_file(header_file, included_identifiers)
669        for header_file in excluded_files:
670            self.parse_identifiers_in_file(header_file, excluded_identifiers)
671
672        return (included_identifiers, excluded_identifiers)
673
674    def parse_symbols(self):
675        """
676        Compile the Mbed TLS libraries, and parse the TLS, Crypto, and x509
677        object files using nm to retrieve the list of referenced symbols.
678        Exceptions thrown here are rethrown because they would be critical
679        errors that void several tests, and thus needs to halt the program. This
680        is explicitly done for clarity.
681
682        Returns a List of unique symbols defined and used in the libraries.
683        """
684        self.log.info("Compiling...")
685        symbols = []
686
687        # Back up the config and atomically compile with the full configuration.
688        shutil.copy(
689            "include/mbedtls/mbedtls_config.h",
690            "include/mbedtls/mbedtls_config.h.bak"
691        )
692        try:
693            # Use check=True in all subprocess calls so that failures are raised
694            # as exceptions and logged.
695            subprocess.run(
696                ["python3", "scripts/config.py", "full"],
697                universal_newlines=True,
698                check=True
699            )
700            my_environment = os.environ.copy()
701            my_environment["CFLAGS"] = "-fno-asynchronous-unwind-tables"
702            # Run make clean separately to lib to prevent unwanted behavior when
703            # make is invoked with parallelism.
704            subprocess.run(
705                ["make", "clean"],
706                universal_newlines=True,
707                check=True
708            )
709            subprocess.run(
710                ["make", "lib"],
711                env=my_environment,
712                universal_newlines=True,
713                stdout=subprocess.PIPE,
714                stderr=subprocess.STDOUT,
715                check=True
716            )
717
718            # Perform object file analysis using nm
719            symbols = self.parse_symbols_from_nm([
720                "library/libmbedcrypto.a",
721                "library/libmbedtls.a",
722                "library/libmbedx509.a"
723            ])
724
725            subprocess.run(
726                ["make", "clean"],
727                universal_newlines=True,
728                check=True
729            )
730        except subprocess.CalledProcessError as error:
731            self.log.debug(error.output)
732            raise error
733        finally:
734            # Put back the original config regardless of there being errors.
735            # Works also for keyboard interrupts.
736            shutil.move(
737                "include/mbedtls/mbedtls_config.h.bak",
738                "include/mbedtls/mbedtls_config.h"
739            )
740
741        return symbols
742
743    def parse_symbols_from_nm(self, object_files):
744        """
745        Run nm to retrieve the list of referenced symbols in each object file.
746        Does not return the position data since it is of no use.
747
748        Args:
749        * object_files: a List of compiled object filepaths to search through.
750
751        Returns a List of unique symbols defined and used in any of the object
752        files.
753        """
754        nm_undefined_regex = re.compile(r"^\S+: +U |^$|^\S+:$")
755        nm_valid_regex = re.compile(r"^\S+( [0-9A-Fa-f]+)* . _*(?P<symbol>\w+)")
756        exclusions = ("FStar", "Hacl")
757
758        symbols = []
759
760        # Gather all outputs of nm
761        nm_output = ""
762        for lib in object_files:
763            nm_output += subprocess.run(
764                ["nm", "-og", lib],
765                universal_newlines=True,
766                stdout=subprocess.PIPE,
767                stderr=subprocess.STDOUT,
768                check=True
769            ).stdout
770
771        for line in nm_output.splitlines():
772            if not nm_undefined_regex.search(line):
773                symbol = nm_valid_regex.search(line)
774                if (symbol and not symbol.group("symbol").startswith(exclusions)):
775                    symbols.append(symbol.group("symbol"))
776                else:
777                    self.log.error(line)
778
779        return symbols
780
781class NameChecker():
782    """
783    Representation of the core name checking operation performed by this script.
784    """
785    def __init__(self, parse_result, log):
786        self.parse_result = parse_result
787        self.log = log
788
789    def perform_checks(self, quiet=False):
790        """
791        A comprehensive checker that performs each check in order, and outputs
792        a final verdict.
793
794        Args:
795        * quiet: whether to hide detailed problem explanation.
796        """
797        self.log.info("=============")
798        Problem.quiet = quiet
799        problems = 0
800        problems += self.check_symbols_declared_in_header()
801
802        pattern_checks = [
803            ("public_macros", PUBLIC_MACRO_PATTERN),
804            ("internal_macros", INTERNAL_MACRO_PATTERN),
805            ("enum_consts", CONSTANTS_PATTERN),
806            ("identifiers", IDENTIFIER_PATTERN)
807        ]
808        for group, check_pattern in pattern_checks:
809            problems += self.check_match_pattern(group, check_pattern)
810
811        problems += self.check_for_typos()
812
813        self.log.info("=============")
814        if problems > 0:
815            self.log.info("FAIL: {0} problem(s) to fix".format(str(problems)))
816            if quiet:
817                self.log.info("Remove --quiet to see explanations.")
818            else:
819                self.log.info("Use --quiet for minimal output.")
820            return 1
821        else:
822            self.log.info("PASS")
823            return 0
824
825    def check_symbols_declared_in_header(self):
826        """
827        Perform a check that all detected symbols in the library object files
828        are properly declared in headers.
829        Assumes parse_names_in_source() was called before this.
830
831        Returns the number of problems that need fixing.
832        """
833        problems = []
834        all_identifiers = self.parse_result["identifiers"] +  \
835            self.parse_result["excluded_identifiers"]
836
837        for symbol in self.parse_result["symbols"]:
838            found_symbol_declared = False
839            for identifier_match in all_identifiers:
840                if symbol == identifier_match.name:
841                    found_symbol_declared = True
842                    break
843
844            if not found_symbol_declared:
845                problems.append(SymbolNotInHeader(symbol))
846
847        self.output_check_result("All symbols in header", problems)
848        return len(problems)
849
850    def check_match_pattern(self, group_to_check, check_pattern):
851        """
852        Perform a check that all items of a group conform to a regex pattern.
853        Assumes parse_names_in_source() was called before this.
854
855        Args:
856        * group_to_check: string key to index into self.parse_result.
857        * check_pattern: the regex to check against.
858
859        Returns the number of problems that need fixing.
860        """
861        problems = []
862
863        for item_match in self.parse_result[group_to_check]:
864            if not re.search(check_pattern, item_match.name):
865                problems.append(PatternMismatch(check_pattern, item_match))
866            # Double underscore should not be used for names
867            if re.search(r".*__.*", item_match.name):
868                problems.append(
869                    PatternMismatch("no double underscore allowed", item_match))
870
871        self.output_check_result(
872            "Naming patterns of {}".format(group_to_check),
873            problems)
874        return len(problems)
875
876    def check_for_typos(self):
877        """
878        Perform a check that all words in the source code beginning with MBED are
879        either defined as macros, or as enum constants.
880        Assumes parse_names_in_source() was called before this.
881
882        Returns the number of problems that need fixing.
883        """
884        problems = []
885
886        # Set comprehension, equivalent to a list comprehension wrapped by set()
887        all_caps_names = {
888            match.name
889            for match
890            in self.parse_result["public_macros"] +
891            self.parse_result["internal_macros"] +
892            self.parse_result["private_macros"] +
893            self.parse_result["enum_consts"]
894            }
895        typo_exclusion = re.compile(r"XXX|__|_$|^MBEDTLS_.*CONFIG_FILE$|"
896                                    r"MBEDTLS_TEST_LIBTESTDRIVER*|"
897                                    r"PSA_CRYPTO_DRIVER_TEST")
898
899        for name_match in self.parse_result["mbed_psa_words"]:
900            found = name_match.name in all_caps_names
901
902            # Since MBEDTLS_PSA_ACCEL_XXX defines are defined by the
903            # PSA driver, they will not exist as macros. However, they
904            # should still be checked for typos using the equivalent
905            # BUILTINs that exist.
906            if "MBEDTLS_PSA_ACCEL_" in name_match.name:
907                found = name_match.name.replace(
908                    "MBEDTLS_PSA_ACCEL_",
909                    "MBEDTLS_PSA_BUILTIN_") in all_caps_names
910
911            if not found and not typo_exclusion.search(name_match.name):
912                problems.append(Typo(name_match))
913
914        self.output_check_result("Likely typos", problems)
915        return len(problems)
916
917    def output_check_result(self, name, problems):
918        """
919        Write out the PASS/FAIL status of a performed check depending on whether
920        there were problems.
921
922        Args:
923        * name: the name of the test
924        * problems: a List of encountered Problems
925        """
926        if problems:
927            self.log.info("{}: FAIL\n".format(name))
928            for problem in problems:
929                self.log.warning(str(problem))
930        else:
931            self.log.info("{}: PASS".format(name))
932
933def main():
934    """
935    Perform argument parsing, and create an instance of CodeParser and
936    NameChecker to begin the core operation.
937    """
938    parser = argparse.ArgumentParser(
939        formatter_class=argparse.RawDescriptionHelpFormatter,
940        description=(
941            "This script confirms that the naming of all symbols and identifiers "
942            "in Mbed TLS are consistent with the house style and are also "
943            "self-consistent.\n\n"
944            "Expected to be run from the MbedTLS root directory.")
945    )
946    parser.add_argument(
947        "-v", "--verbose",
948        action="store_true",
949        help="show parse results"
950    )
951    parser.add_argument(
952        "-q", "--quiet",
953        action="store_true",
954        help="hide unnecessary text, explanations, and highlights"
955    )
956
957    args = parser.parse_args()
958
959    # Configure the global logger, which is then passed to the classes below
960    log = logging.getLogger()
961    log.setLevel(logging.DEBUG if args.verbose else logging.INFO)
962    log.addHandler(logging.StreamHandler())
963
964    try:
965        code_parser = CodeParser(log)
966        parse_result = code_parser.comprehensive_parse()
967    except Exception: # pylint: disable=broad-except
968        traceback.print_exc()
969        sys.exit(2)
970
971    name_checker = NameChecker(parse_result, log)
972    return_code = name_checker.perform_checks(quiet=args.quiet)
973
974    sys.exit(return_code)
975
976if __name__ == "__main__":
977    main()
978