1#!/usr/bin/env python3
2#
3# Copyright The Mbed TLS Contributors
4# SPDX-License-Identifier: Apache-2.0
5#
6# Licensed under the Apache License, Version 2.0 (the "License"); you may
7# not use this file except in compliance with the License.
8# You may obtain a copy of the License at
9#
10# http://www.apache.org/licenses/LICENSE-2.0
11#
12# Unless required by applicable law or agreed to in writing, software
13# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
14# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15# See the License for the specific language governing permissions and
16# limitations under the License.
17
18"""
19This script confirms that the naming of all symbols and identifiers in Mbed TLS
20are consistent with the house style and are also self-consistent. It only runs
21on Linux and macOS since it depends on nm.
22
23It contains two major Python classes, CodeParser and NameChecker. They both have
24a comprehensive "run-all" function (comprehensive_parse() and perform_checks())
25but the individual functions can also be used for specific needs.
26
27CodeParser makes heavy use of regular expressions to parse the code, and is
28dependent on the current code formatting. Many Python C parser libraries require
29preprocessed C code, which means no macro parsing. Compiler tools are also not
30very helpful when we want the exact location in the original source (which
31becomes impossible when e.g. comments are stripped).
32
33NameChecker performs the following checks:
34
35- All exported and available symbols in the library object files, are explicitly
36  declared in the header files. This uses the nm command.
37- All macros, constants, and identifiers (function names, struct names, etc)
38  follow the required regex pattern.
39- Typo checking: All words that begin with MBED exist as macros or constants.
40
41The script returns 0 on success, 1 on test failure, and 2 if there is a script
42error. It must be run from Mbed TLS root.
43"""
44
45import abc
46import argparse
47import fnmatch
48import glob
49import textwrap
50import os
51import sys
52import traceback
53import re
54import enum
55import shutil
56import subprocess
57import logging
58
59import scripts_path # pylint: disable=unused-import
60from mbedtls_dev import build_tree
61
62
63# Naming patterns to check against. These are defined outside the NameCheck
64# class for ease of modification.
65PUBLIC_MACRO_PATTERN = r"^(MBEDTLS|PSA)_[0-9A-Z_]*[0-9A-Z]$"
66INTERNAL_MACRO_PATTERN = r"^[0-9A-Za-z_]*[0-9A-Z]$"
67CONSTANTS_PATTERN = PUBLIC_MACRO_PATTERN
68IDENTIFIER_PATTERN = r"^(mbedtls|psa)_[0-9a-z_]*[0-9a-z]$"
69
70class Match(): # pylint: disable=too-few-public-methods
71    """
72    A class representing a match, together with its found position.
73
74    Fields:
75    * filename: the file that the match was in.
76    * line: the full line containing the match.
77    * line_no: the line number.
78    * pos: a tuple of (start, end) positions on the line where the match is.
79    * name: the match itself.
80    """
81    def __init__(self, filename, line, line_no, pos, name):
82        # pylint: disable=too-many-arguments
83        self.filename = filename
84        self.line = line
85        self.line_no = line_no
86        self.pos = pos
87        self.name = name
88
89    def __str__(self):
90        """
91        Return a formatted code listing representation of the erroneous line.
92        """
93        gutter = format(self.line_no, "4d")
94        underline = self.pos[0] * " " + (self.pos[1] - self.pos[0]) * "^"
95
96        return (
97            " {0} |\n".format(" " * len(gutter)) +
98            " {0} | {1}".format(gutter, self.line) +
99            " {0} | {1}\n".format(" " * len(gutter), underline)
100        )
101
102class Problem(abc.ABC): # pylint: disable=too-few-public-methods
103    """
104    An abstract parent class representing a form of static analysis error.
105    It extends an Abstract Base Class, which means it is not instantiable, and
106    it also mandates certain abstract methods to be implemented in subclasses.
107    """
108    # Class variable to control the quietness of all problems
109    quiet = False
110    def __init__(self):
111        self.textwrapper = textwrap.TextWrapper()
112        self.textwrapper.width = 80
113        self.textwrapper.initial_indent = "    > "
114        self.textwrapper.subsequent_indent = "      "
115
116    def __str__(self):
117        """
118        Unified string representation method for all Problems.
119        """
120        if self.__class__.quiet:
121            return self.quiet_output()
122        return self.verbose_output()
123
124    @abc.abstractmethod
125    def quiet_output(self):
126        """
127        The output when --quiet is enabled.
128        """
129        pass
130
131    @abc.abstractmethod
132    def verbose_output(self):
133        """
134        The default output with explanation and code snippet if appropriate.
135        """
136        pass
137
138class SymbolNotInHeader(Problem): # pylint: disable=too-few-public-methods
139    """
140    A problem that occurs when an exported/available symbol in the object file
141    is not explicitly declared in header files. Created with
142    NameCheck.check_symbols_declared_in_header()
143
144    Fields:
145    * symbol_name: the name of the symbol.
146    """
147    def __init__(self, symbol_name):
148        self.symbol_name = symbol_name
149        Problem.__init__(self)
150
151    def quiet_output(self):
152        return "{0}".format(self.symbol_name)
153
154    def verbose_output(self):
155        return self.textwrapper.fill(
156            "'{0}' was found as an available symbol in the output of nm, "
157            "however it was not declared in any header files."
158            .format(self.symbol_name))
159
160class PatternMismatch(Problem): # pylint: disable=too-few-public-methods
161    """
162    A problem that occurs when something doesn't match the expected pattern.
163    Created with NameCheck.check_match_pattern()
164
165    Fields:
166    * pattern: the expected regex pattern
167    * match: the Match object in question
168    """
169    def __init__(self, pattern, match):
170        self.pattern = pattern
171        self.match = match
172        Problem.__init__(self)
173
174
175    def quiet_output(self):
176        return (
177            "{0}:{1}:{2}"
178            .format(self.match.filename, self.match.line_no, self.match.name)
179        )
180
181    def verbose_output(self):
182        return self.textwrapper.fill(
183            "{0}:{1}: '{2}' does not match the required pattern '{3}'."
184            .format(
185                self.match.filename,
186                self.match.line_no,
187                self.match.name,
188                self.pattern
189            )
190        ) + "\n" + str(self.match)
191
192class Typo(Problem): # pylint: disable=too-few-public-methods
193    """
194    A problem that occurs when a word using MBED doesn't appear to be defined as
195    constants nor enum values. Created with NameCheck.check_for_typos()
196
197    Fields:
198    * match: the Match object of the MBED name in question.
199    """
200    def __init__(self, match):
201        self.match = match
202        Problem.__init__(self)
203
204    def quiet_output(self):
205        return (
206            "{0}:{1}:{2}"
207            .format(self.match.filename, self.match.line_no, self.match.name)
208        )
209
210    def verbose_output(self):
211        return self.textwrapper.fill(
212            "{0}:{1}: '{2}' looks like a typo. It was not found in any "
213            "macros or any enums. If this is not a typo, put "
214            "//no-check-names after it."
215            .format(self.match.filename, self.match.line_no, self.match.name)
216        ) + "\n" + str(self.match)
217
218class CodeParser():
219    """
220    Class for retrieving files and parsing the code. This can be used
221    independently of the checks that NameChecker performs, for example for
222    list_internal_identifiers.py.
223    """
224    def __init__(self, log):
225        self.log = log
226        build_tree.check_repo_path()
227
228        # Memo for storing "glob expression": set(filepaths)
229        self.files = {}
230
231        # Globally excluded filenames.
232        # Note that "*" can match directory separators in exclude lists.
233        self.excluded_files = ["*/bn_mul", "*/compat-2.x.h"]
234
235    def comprehensive_parse(self):
236        """
237        Comprehensive ("default") function to call each parsing function and
238        retrieve various elements of the code, together with the source location.
239
240        Returns a dict of parsed item key to the corresponding List of Matches.
241        """
242        self.log.info("Parsing source code...")
243        self.log.debug(
244            "The following files are excluded from the search: {}"
245            .format(str(self.excluded_files))
246        )
247
248        all_macros = {"public": [], "internal": []}
249        all_macros["public"] = self.parse_macros([
250            "include/mbedtls/*.h",
251            "include/psa/*.h",
252            "3rdparty/everest/include/everest/everest.h",
253            "3rdparty/everest/include/everest/x25519.h"
254        ])
255        all_macros["internal"] = self.parse_macros([
256            "library/*.h",
257            "tests/include/test/drivers/*.h",
258        ])
259        enum_consts = self.parse_enum_consts([
260            "include/mbedtls/*.h",
261            "library/*.h",
262            "3rdparty/everest/include/everest/everest.h",
263            "3rdparty/everest/include/everest/x25519.h"
264        ])
265        identifiers = self.parse_identifiers([
266            "include/mbedtls/*.h",
267            "include/psa/*.h",
268            "library/*.h",
269            "3rdparty/everest/include/everest/everest.h",
270            "3rdparty/everest/include/everest/x25519.h"
271        ])
272        mbed_words = self.parse_mbed_words([
273            "include/mbedtls/*.h",
274            "include/psa/*.h",
275            "library/*.h",
276            "3rdparty/everest/include/everest/everest.h",
277            "3rdparty/everest/include/everest/x25519.h",
278            "library/*.c",
279            "3rdparty/everest/library/everest.c",
280            "3rdparty/everest/library/x25519.c"
281        ], ["library/psa_crypto_driver_wrappers.c"])
282        symbols = self.parse_symbols()
283
284        # Remove identifier macros like mbedtls_printf or mbedtls_calloc
285        identifiers_justname = [x.name for x in identifiers]
286        actual_macros = {"public": [], "internal": []}
287        for scope in actual_macros:
288            for macro in all_macros[scope]:
289                if macro.name not in identifiers_justname:
290                    actual_macros[scope].append(macro)
291
292        self.log.debug("Found:")
293        # Aligns the counts on the assumption that none exceeds 4 digits
294        for scope in actual_macros:
295            self.log.debug("  {:4} Total {} Macros"
296                           .format(len(all_macros[scope]), scope))
297            self.log.debug("  {:4} {} Non-identifier Macros"
298                           .format(len(actual_macros[scope]), scope))
299        self.log.debug("  {:4} Enum Constants".format(len(enum_consts)))
300        self.log.debug("  {:4} Identifiers".format(len(identifiers)))
301        self.log.debug("  {:4} Exported Symbols".format(len(symbols)))
302        return {
303            "public_macros": actual_macros["public"],
304            "internal_macros": actual_macros["internal"],
305            "enum_consts": enum_consts,
306            "identifiers": identifiers,
307            "symbols": symbols,
308            "mbed_words": mbed_words
309        }
310
311    def is_file_excluded(self, path, exclude_wildcards):
312        """Whether the given file path is excluded."""
313        # exclude_wildcards may be None. Also, consider the global exclusions.
314        exclude_wildcards = (exclude_wildcards or []) + self.excluded_files
315        for pattern in exclude_wildcards:
316            if fnmatch.fnmatch(path, pattern):
317                return True
318        return False
319
320    def get_files(self, include_wildcards, exclude_wildcards):
321        """
322        Get all files that match any of the UNIX-style wildcards. While the
323        check_names script is designed only for use on UNIX/macOS (due to nm),
324        this function alone would work fine on Windows even with forward slashes
325        in the wildcard.
326
327        Args:
328        * include_wildcards: a List of shell-style wildcards to match filepaths.
329        * exclude_wildcards: a List of shell-style wildcards to exclude.
330
331        Returns a List of relative filepaths.
332        """
333        accumulator = set()
334
335        for include_wildcard in include_wildcards:
336            accumulator = accumulator.union(glob.iglob(include_wildcard))
337
338        return list(path for path in accumulator
339                    if not self.is_file_excluded(path, exclude_wildcards))
340
341    def parse_macros(self, include, exclude=None):
342        """
343        Parse all macros defined by #define preprocessor directives.
344
345        Args:
346        * include: A List of glob expressions to look for files through.
347        * exclude: A List of glob expressions for excluding files.
348
349        Returns a List of Match objects for the found macros.
350        """
351        macro_regex = re.compile(r"# *define +(?P<macro>\w+)")
352        exclusions = (
353            "asm", "inline", "EMIT", "_CRT_SECURE_NO_DEPRECATE", "MULADDC_"
354        )
355
356        files = self.get_files(include, exclude)
357        self.log.debug("Looking for macros in {} files".format(len(files)))
358
359        macros = []
360        for header_file in files:
361            with open(header_file, "r", encoding="utf-8") as header:
362                for line_no, line in enumerate(header):
363                    for macro in macro_regex.finditer(line):
364                        if macro.group("macro").startswith(exclusions):
365                            continue
366
367                        macros.append(Match(
368                            header_file,
369                            line,
370                            line_no,
371                            macro.span("macro"),
372                            macro.group("macro")))
373
374        return macros
375
376    def parse_mbed_words(self, include, exclude=None):
377        """
378        Parse all words in the file that begin with MBED, in and out of macros,
379        comments, anything.
380
381        Args:
382        * include: A List of glob expressions to look for files through.
383        * exclude: A List of glob expressions for excluding files.
384
385        Returns a List of Match objects for words beginning with MBED.
386        """
387        # Typos of TLS are common, hence the broader check below than MBEDTLS.
388        mbed_regex = re.compile(r"\bMBED.+?_[A-Z0-9_]*")
389        exclusions = re.compile(r"// *no-check-names|#error")
390
391        files = self.get_files(include, exclude)
392        self.log.debug("Looking for MBED words in {} files".format(len(files)))
393
394        mbed_words = []
395        for filename in files:
396            with open(filename, "r", encoding="utf-8") as fp:
397                for line_no, line in enumerate(fp):
398                    if exclusions.search(line):
399                        continue
400
401                    for name in mbed_regex.finditer(line):
402                        mbed_words.append(Match(
403                            filename,
404                            line,
405                            line_no,
406                            name.span(0),
407                            name.group(0)))
408
409        return mbed_words
410
411    def parse_enum_consts(self, include, exclude=None):
412        """
413        Parse all enum value constants that are declared.
414
415        Args:
416        * include: A List of glob expressions to look for files through.
417        * exclude: A List of glob expressions for excluding files.
418
419        Returns a List of Match objects for the findings.
420        """
421        files = self.get_files(include, exclude)
422        self.log.debug("Looking for enum consts in {} files".format(len(files)))
423
424        # Emulate a finite state machine to parse enum declarations.
425        # OUTSIDE_KEYWORD = outside the enum keyword
426        # IN_BRACES = inside enum opening braces
427        # IN_BETWEEN = between enum keyword and opening braces
428        states = enum.Enum("FSM", ["OUTSIDE_KEYWORD", "IN_BRACES", "IN_BETWEEN"])
429        enum_consts = []
430        for header_file in files:
431            state = states.OUTSIDE_KEYWORD
432            with open(header_file, "r", encoding="utf-8") as header:
433                for line_no, line in enumerate(header):
434                    # Match typedefs and brackets only when they are at the
435                    # beginning of the line -- if they are indented, they might
436                    # be sub-structures within structs, etc.
437                    if (state == states.OUTSIDE_KEYWORD and
438                            re.search(r"^(typedef +)?enum +{", line)):
439                        state = states.IN_BRACES
440                    elif (state == states.OUTSIDE_KEYWORD and
441                          re.search(r"^(typedef +)?enum", line)):
442                        state = states.IN_BETWEEN
443                    elif (state == states.IN_BETWEEN and
444                          re.search(r"^{", line)):
445                        state = states.IN_BRACES
446                    elif (state == states.IN_BRACES and
447                          re.search(r"^}", line)):
448                        state = states.OUTSIDE_KEYWORD
449                    elif (state == states.IN_BRACES and
450                          not re.search(r"^ *#", line)):
451                        enum_const = re.search(r"^ *(?P<enum_const>\w+)", line)
452                        if not enum_const:
453                            continue
454
455                        enum_consts.append(Match(
456                            header_file,
457                            line,
458                            line_no,
459                            enum_const.span("enum_const"),
460                            enum_const.group("enum_const")))
461
462        return enum_consts
463
464    IGNORED_CHUNK_REGEX = re.compile('|'.join([
465        r'/\*.*?\*/', # block comment entirely on one line
466        r'//.*', # line comment
467        r'(?P<string>")(?:[^\\\"]|\\.)*"', # string literal
468    ]))
469
470    def strip_comments_and_literals(self, line, in_block_comment):
471        """Strip comments and string literals from line.
472
473        Continuation lines are not supported.
474
475        If in_block_comment is true, assume that the line starts inside a
476        block comment.
477
478        Return updated values of (line, in_block_comment) where:
479        * Comments in line have been replaced by a space (or nothing at the
480          start or end of the line).
481        * String contents have been removed.
482        * in_block_comment indicates whether the line ends inside a block
483          comment that continues on the next line.
484        """
485
486        # Terminate current multiline comment?
487        if in_block_comment:
488            m = re.search(r"\*/", line)
489            if m:
490                in_block_comment = False
491                line = line[m.end(0):]
492            else:
493                return '', True
494
495        # Remove full comments and string literals.
496        # Do it all together to handle cases like "/*" correctly.
497        # Note that continuation lines are not supported.
498        line = re.sub(self.IGNORED_CHUNK_REGEX,
499                      lambda s: '""' if s.group('string') else ' ',
500                      line)
501
502        # Start an unfinished comment?
503        # (If `/*` was part of a complete comment, it's already been removed.)
504        m = re.search(r"/\*", line)
505        if m:
506            in_block_comment = True
507            line = line[:m.start(0)]
508
509        return line, in_block_comment
510
511    IDENTIFIER_REGEX = re.compile('|'.join([
512        # Match " something(a" or " *something(a". Functions.
513        # Assumptions:
514        # - function definition from return type to one of its arguments is
515        #   all on one line
516        # - function definition line only contains alphanumeric, asterisk,
517        #   underscore, and open bracket
518        r".* \**(\w+) *\( *\w",
519        # Match "(*something)(".
520        r".*\( *\* *(\w+) *\) *\(",
521        # Match names of named data structures.
522        r"(?:typedef +)?(?:struct|union|enum) +(\w+)(?: *{)?$",
523        # Match names of typedef instances, after closing bracket.
524        r"}? *(\w+)[;[].*",
525    ]))
526    # The regex below is indented for clarity.
527    EXCLUSION_LINES = re.compile("|".join([
528        r"extern +\"C\"",
529        r"(typedef +)?(struct|union|enum)( *{)?$",
530        r"} *;?$",
531        r"$",
532        r"//",
533        r"#",
534    ]))
535
536    def parse_identifiers_in_file(self, header_file, identifiers):
537        """
538        Parse all lines of a header where a function/enum/struct/union/typedef
539        identifier is declared, based on some regex and heuristics. Highly
540        dependent on formatting style.
541
542        Append found matches to the list ``identifiers``.
543        """
544
545        with open(header_file, "r", encoding="utf-8") as header:
546            in_block_comment = False
547            # The previous line variable is used for concatenating lines
548            # when identifiers are formatted and spread across multiple
549            # lines.
550            previous_line = ""
551
552            for line_no, line in enumerate(header):
553                line, in_block_comment = \
554                    self.strip_comments_and_literals(line, in_block_comment)
555
556                if self.EXCLUSION_LINES.match(line):
557                    previous_line = ""
558                    continue
559
560                # If the line contains only space-separated alphanumeric
561                # characters (or underscore, asterisk, or open parenthesis),
562                # and nothing else, high chance it's a declaration that
563                # continues on the next line
564                if re.search(r"^([\w\*\(]+\s+)+$", line):
565                    previous_line += line
566                    continue
567
568                # If previous line seemed to start an unfinished declaration
569                # (as above), concat and treat them as one.
570                if previous_line:
571                    line = previous_line.strip() + " " + line.strip() + "\n"
572                    previous_line = ""
573
574                # Skip parsing if line has a space in front = heuristic to
575                # skip function argument lines (highly subject to formatting
576                # changes)
577                if line[0] == " ":
578                    continue
579
580                identifier = self.IDENTIFIER_REGEX.search(line)
581
582                if not identifier:
583                    continue
584
585                # Find the group that matched, and append it
586                for group in identifier.groups():
587                    if not group:
588                        continue
589
590                    identifiers.append(Match(
591                        header_file,
592                        line,
593                        line_no,
594                        identifier.span(),
595                        group))
596
597    def parse_identifiers(self, include, exclude=None):
598        """
599        Parse all lines of a header where a function/enum/struct/union/typedef
600        identifier is declared, based on some regex and heuristics. Highly
601        dependent on formatting style.
602
603        Args:
604        * include: A List of glob expressions to look for files through.
605        * exclude: A List of glob expressions for excluding files.
606
607        Returns a List of Match objects with identifiers.
608        """
609
610        files = self.get_files(include, exclude)
611        self.log.debug("Looking for identifiers in {} files".format(len(files)))
612
613        identifiers = []
614        for header_file in files:
615            self.parse_identifiers_in_file(header_file, identifiers)
616
617        return identifiers
618
619    def parse_symbols(self):
620        """
621        Compile the Mbed TLS libraries, and parse the TLS, Crypto, and x509
622        object files using nm to retrieve the list of referenced symbols.
623        Exceptions thrown here are rethrown because they would be critical
624        errors that void several tests, and thus needs to halt the program. This
625        is explicitly done for clarity.
626
627        Returns a List of unique symbols defined and used in the libraries.
628        """
629        self.log.info("Compiling...")
630        symbols = []
631
632        # Back up the config and atomically compile with the full configuration.
633        shutil.copy(
634            "include/mbedtls/mbedtls_config.h",
635            "include/mbedtls/mbedtls_config.h.bak"
636        )
637        try:
638            # Use check=True in all subprocess calls so that failures are raised
639            # as exceptions and logged.
640            subprocess.run(
641                ["python3", "scripts/config.py", "full"],
642                universal_newlines=True,
643                check=True
644            )
645            my_environment = os.environ.copy()
646            my_environment["CFLAGS"] = "-fno-asynchronous-unwind-tables"
647            # Run make clean separately to lib to prevent unwanted behavior when
648            # make is invoked with parallelism.
649            subprocess.run(
650                ["make", "clean"],
651                universal_newlines=True,
652                check=True
653            )
654            subprocess.run(
655                ["make", "lib"],
656                env=my_environment,
657                universal_newlines=True,
658                stdout=subprocess.PIPE,
659                stderr=subprocess.STDOUT,
660                check=True
661            )
662
663            # Perform object file analysis using nm
664            symbols = self.parse_symbols_from_nm([
665                "library/libmbedcrypto.a",
666                "library/libmbedtls.a",
667                "library/libmbedx509.a"
668            ])
669
670            subprocess.run(
671                ["make", "clean"],
672                universal_newlines=True,
673                check=True
674            )
675        except subprocess.CalledProcessError as error:
676            self.log.debug(error.output)
677            raise error
678        finally:
679            # Put back the original config regardless of there being errors.
680            # Works also for keyboard interrupts.
681            shutil.move(
682                "include/mbedtls/mbedtls_config.h.bak",
683                "include/mbedtls/mbedtls_config.h"
684            )
685
686        return symbols
687
688    def parse_symbols_from_nm(self, object_files):
689        """
690        Run nm to retrieve the list of referenced symbols in each object file.
691        Does not return the position data since it is of no use.
692
693        Args:
694        * object_files: a List of compiled object filepaths to search through.
695
696        Returns a List of unique symbols defined and used in any of the object
697        files.
698        """
699        nm_undefined_regex = re.compile(r"^\S+: +U |^$|^\S+:$")
700        nm_valid_regex = re.compile(r"^\S+( [0-9A-Fa-f]+)* . _*(?P<symbol>\w+)")
701        exclusions = ("FStar", "Hacl")
702
703        symbols = []
704
705        # Gather all outputs of nm
706        nm_output = ""
707        for lib in object_files:
708            nm_output += subprocess.run(
709                ["nm", "-og", lib],
710                universal_newlines=True,
711                stdout=subprocess.PIPE,
712                stderr=subprocess.STDOUT,
713                check=True
714            ).stdout
715
716        for line in nm_output.splitlines():
717            if not nm_undefined_regex.search(line):
718                symbol = nm_valid_regex.search(line)
719                if (symbol and not symbol.group("symbol").startswith(exclusions)):
720                    symbols.append(symbol.group("symbol"))
721                else:
722                    self.log.error(line)
723
724        return symbols
725
726class NameChecker():
727    """
728    Representation of the core name checking operation performed by this script.
729    """
730    def __init__(self, parse_result, log):
731        self.parse_result = parse_result
732        self.log = log
733
734    def perform_checks(self, quiet=False):
735        """
736        A comprehensive checker that performs each check in order, and outputs
737        a final verdict.
738
739        Args:
740        * quiet: whether to hide detailed problem explanation.
741        """
742        self.log.info("=============")
743        Problem.quiet = quiet
744        problems = 0
745        problems += self.check_symbols_declared_in_header()
746
747        pattern_checks = [
748            ("public_macros", PUBLIC_MACRO_PATTERN),
749            ("internal_macros", INTERNAL_MACRO_PATTERN),
750            ("enum_consts", CONSTANTS_PATTERN),
751            ("identifiers", IDENTIFIER_PATTERN)
752        ]
753        for group, check_pattern in pattern_checks:
754            problems += self.check_match_pattern(group, check_pattern)
755
756        problems += self.check_for_typos()
757
758        self.log.info("=============")
759        if problems > 0:
760            self.log.info("FAIL: {0} problem(s) to fix".format(str(problems)))
761            if quiet:
762                self.log.info("Remove --quiet to see explanations.")
763            else:
764                self.log.info("Use --quiet for minimal output.")
765            return 1
766        else:
767            self.log.info("PASS")
768            return 0
769
770    def check_symbols_declared_in_header(self):
771        """
772        Perform a check that all detected symbols in the library object files
773        are properly declared in headers.
774        Assumes parse_names_in_source() was called before this.
775
776        Returns the number of problems that need fixing.
777        """
778        problems = []
779
780        for symbol in self.parse_result["symbols"]:
781            found_symbol_declared = False
782            for identifier_match in self.parse_result["identifiers"]:
783                if symbol == identifier_match.name:
784                    found_symbol_declared = True
785                    break
786
787            if not found_symbol_declared:
788                problems.append(SymbolNotInHeader(symbol))
789
790        self.output_check_result("All symbols in header", problems)
791        return len(problems)
792
793    def check_match_pattern(self, group_to_check, check_pattern):
794        """
795        Perform a check that all items of a group conform to a regex pattern.
796        Assumes parse_names_in_source() was called before this.
797
798        Args:
799        * group_to_check: string key to index into self.parse_result.
800        * check_pattern: the regex to check against.
801
802        Returns the number of problems that need fixing.
803        """
804        problems = []
805
806        for item_match in self.parse_result[group_to_check]:
807            if not re.search(check_pattern, item_match.name):
808                problems.append(PatternMismatch(check_pattern, item_match))
809            # Double underscore should not be used for names
810            if re.search(r".*__.*", item_match.name):
811                problems.append(
812                    PatternMismatch("no double underscore allowed", item_match))
813
814        self.output_check_result(
815            "Naming patterns of {}".format(group_to_check),
816            problems)
817        return len(problems)
818
819    def check_for_typos(self):
820        """
821        Perform a check that all words in the source code beginning with MBED are
822        either defined as macros, or as enum constants.
823        Assumes parse_names_in_source() was called before this.
824
825        Returns the number of problems that need fixing.
826        """
827        problems = []
828
829        # Set comprehension, equivalent to a list comprehension wrapped by set()
830        all_caps_names = {
831            match.name
832            for match
833            in self.parse_result["public_macros"] +
834            self.parse_result["internal_macros"] +
835            self.parse_result["enum_consts"]
836            }
837        typo_exclusion = re.compile(r"XXX|__|_$|^MBEDTLS_.*CONFIG_FILE$|"
838                                    r"MBEDTLS_TEST_LIBTESTDRIVER*")
839
840        for name_match in self.parse_result["mbed_words"]:
841            found = name_match.name in all_caps_names
842
843            # Since MBEDTLS_PSA_ACCEL_XXX defines are defined by the
844            # PSA driver, they will not exist as macros. However, they
845            # should still be checked for typos using the equivalent
846            # BUILTINs that exist.
847            if "MBEDTLS_PSA_ACCEL_" in name_match.name:
848                found = name_match.name.replace(
849                    "MBEDTLS_PSA_ACCEL_",
850                    "MBEDTLS_PSA_BUILTIN_") in all_caps_names
851
852            if not found and not typo_exclusion.search(name_match.name):
853                problems.append(Typo(name_match))
854
855        self.output_check_result("Likely typos", problems)
856        return len(problems)
857
858    def output_check_result(self, name, problems):
859        """
860        Write out the PASS/FAIL status of a performed check depending on whether
861        there were problems.
862
863        Args:
864        * name: the name of the test
865        * problems: a List of encountered Problems
866        """
867        if problems:
868            self.log.info("{}: FAIL\n".format(name))
869            for problem in problems:
870                self.log.warning(str(problem))
871        else:
872            self.log.info("{}: PASS".format(name))
873
874def main():
875    """
876    Perform argument parsing, and create an instance of CodeParser and
877    NameChecker to begin the core operation.
878    """
879    parser = argparse.ArgumentParser(
880        formatter_class=argparse.RawDescriptionHelpFormatter,
881        description=(
882            "This script confirms that the naming of all symbols and identifiers "
883            "in Mbed TLS are consistent with the house style and are also "
884            "self-consistent.\n\n"
885            "Expected to be run from the MbedTLS root directory.")
886    )
887    parser.add_argument(
888        "-v", "--verbose",
889        action="store_true",
890        help="show parse results"
891    )
892    parser.add_argument(
893        "-q", "--quiet",
894        action="store_true",
895        help="hide unnecessary text, explanations, and highlights"
896    )
897
898    args = parser.parse_args()
899
900    # Configure the global logger, which is then passed to the classes below
901    log = logging.getLogger()
902    log.setLevel(logging.DEBUG if args.verbose else logging.INFO)
903    log.addHandler(logging.StreamHandler())
904
905    try:
906        code_parser = CodeParser(log)
907        parse_result = code_parser.comprehensive_parse()
908    except Exception: # pylint: disable=broad-except
909        traceback.print_exc()
910        sys.exit(2)
911
912    name_checker = NameChecker(parse_result, log)
913    return_code = name_checker.perform_checks(quiet=args.quiet)
914
915    sys.exit(return_code)
916
917if __name__ == "__main__":
918    main()
919