1#!/usr/bin/env python3
2"""This script compares the interfaces of two versions of Mbed TLS, looking
3for backward incompatibilities between two different Git revisions within
4an Mbed TLS repository. It must be run from the root of a Git working tree.
5
6### How the script works ###
7
8For the source (API) and runtime (ABI) interface compatibility, this script
9is a small wrapper around the abi-compliance-checker and abi-dumper tools,
10applying them to compare the header and library files.
11
12For the storage format, this script compares the automatically generated
13storage tests and the manual read tests, and complains if there is a
14reduction in coverage. A change in test data will be signaled as a
15coverage reduction since the old test data is no longer present. A change in
16how test data is presented will be signaled as well; this would be a false
17positive.
18
19The results of the API/ABI comparison are either formatted as HTML and stored
20at a configurable location, or are given as a brief list of problems.
21Returns 0 on success, 1 on non-compliance, and 2 if there is an error
22while running the script.
23
24### How to interpret non-compliance ###
25
26This script has relatively common false positives. In many scenarios, it only
27reports a pass if there is a strict textual match between the old version and
28the new version, and it reports problems where there is a sufficient semantic
29match but not a textual match. This section lists some common false positives.
30This is not an exhaustive list: in the end what matters is whether we are
31breaking a backward compatibility goal.
32
33**API**: the goal is that if an application works with the old version of the
34library, it can be recompiled against the new version and will still work.
35This is normally validated by comparing the declarations in `include/*/*.h`.
36A failure is a declaration that has disappeared or that now has a different
37type.
38
39  * It's ok to change or remove macros and functions that are documented as
40    for internal use only or as experimental.
41  * It's ok to rename function or macro parameters as long as the semantics
42    has not changed.
43  * It's ok to change or remove structure fields that are documented as
44    private.
45  * It's ok to add fields to a structure that already had private fields
46    or was documented as extensible.
47
48**ABI**: the goal is that if an application was built against the old version
49of the library, the same binary will work when linked against the new version.
50This is normally validated by comparing the symbols exported by `libmbed*.so`.
51A failure is a symbol that is no longer exported by the same library or that
52now has a different type.
53
54  * All ABI changes are acceptable if the library version is bumped
55    (see `scripts/bump_version.sh`).
56  * ABI changes that concern functions which are declared only inside the
57    library directory, and not in `include/*/*.h`, are acceptable only if
58    the function was only ever used inside the same library (libmbedcrypto,
59    libmbedx509, libmbedtls). As a counter example, if the old version
60    of libmbedtls calls mbedtls_foo() from libmbedcrypto, and the new version
61    of libmbedcrypto no longer has a compatible mbedtls_foo(), this does
62    require a version bump for libmbedcrypto.
63
64**Storage format**: the goal is to check that persistent keys stored by the
65old version can be read by the new version. This is normally validated by
66comparing the `*read*` test cases in `test_suite*storage_format*.data`.
67A failure is a storage read test case that is no longer present with the same
68function name and parameter list.
69
70  * It's ok if the same test data is present, but its presentation has changed,
71    for example if a test function is renamed or has different parameters.
72  * It's ok if redundant tests are removed.
73
74**Generated test coverage**: the goal is to check that automatically
75generated tests have as much coverage as before. This is normally validated
76by comparing the test cases that are automatically generated by a script.
77A failure is a generated test case that is no longer present with the same
78function name and parameter list.
79
80  * It's ok if the same test data is present, but its presentation has changed,
81    for example if a test function is renamed or has different parameters.
82  * It's ok if redundant tests are removed.
83
84"""
85
86# Copyright The Mbed TLS Contributors
87# SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
88
89import glob
90import os
91import re
92import sys
93import traceback
94import shutil
95import subprocess
96import argparse
97import logging
98import tempfile
99import fnmatch
100from types import SimpleNamespace
101
102import xml.etree.ElementTree as ET
103
104from mbedtls_dev import build_tree
105
106
107class AbiChecker:
108    """API and ABI checker."""
109
110    def __init__(self, old_version, new_version, configuration):
111        """Instantiate the API/ABI checker.
112
113        old_version: RepoVersion containing details to compare against
114        new_version: RepoVersion containing details to check
115        configuration.report_dir: directory for output files
116        configuration.keep_all_reports: if false, delete old reports
117        configuration.brief: if true, output shorter report to stdout
118        configuration.check_abi: if true, compare ABIs
119        configuration.check_api: if true, compare APIs
120        configuration.check_storage: if true, compare storage format tests
121        configuration.skip_file: path to file containing symbols and types to skip
122        """
123        self.repo_path = "."
124        self.log = None
125        self.verbose = configuration.verbose
126        self._setup_logger()
127        self.report_dir = os.path.abspath(configuration.report_dir)
128        self.keep_all_reports = configuration.keep_all_reports
129        self.can_remove_report_dir = not (os.path.exists(self.report_dir) or
130                                          self.keep_all_reports)
131        self.old_version = old_version
132        self.new_version = new_version
133        self.skip_file = configuration.skip_file
134        self.check_abi = configuration.check_abi
135        self.check_api = configuration.check_api
136        if self.check_abi != self.check_api:
137            raise Exception('Checking API without ABI or vice versa is not supported')
138        self.check_storage_tests = configuration.check_storage
139        self.brief = configuration.brief
140        self.git_command = "git"
141        self.make_command = "make"
142
143    def _setup_logger(self):
144        self.log = logging.getLogger()
145        if self.verbose:
146            self.log.setLevel(logging.DEBUG)
147        else:
148            self.log.setLevel(logging.INFO)
149        self.log.addHandler(logging.StreamHandler())
150
151    @staticmethod
152    def check_abi_tools_are_installed():
153        for command in ["abi-dumper", "abi-compliance-checker"]:
154            if not shutil.which(command):
155                raise Exception("{} not installed, aborting".format(command))
156
157    def _get_clean_worktree_for_git_revision(self, version):
158        """Make a separate worktree with version.revision checked out.
159        Do not modify the current worktree."""
160        git_worktree_path = tempfile.mkdtemp()
161        if version.repository:
162            self.log.debug(
163                "Checking out git worktree for revision {} from {}".format(
164                    version.revision, version.repository
165                )
166            )
167            fetch_output = subprocess.check_output(
168                [self.git_command, "fetch",
169                 version.repository, version.revision],
170                cwd=self.repo_path,
171                stderr=subprocess.STDOUT
172            )
173            self.log.debug(fetch_output.decode("utf-8"))
174            worktree_rev = "FETCH_HEAD"
175        else:
176            self.log.debug("Checking out git worktree for revision {}".format(
177                version.revision
178            ))
179            worktree_rev = version.revision
180        worktree_output = subprocess.check_output(
181            [self.git_command, "worktree", "add", "--detach",
182             git_worktree_path, worktree_rev],
183            cwd=self.repo_path,
184            stderr=subprocess.STDOUT
185        )
186        self.log.debug(worktree_output.decode("utf-8"))
187        version.commit = subprocess.check_output(
188            [self.git_command, "rev-parse", "HEAD"],
189            cwd=git_worktree_path,
190            stderr=subprocess.STDOUT
191        ).decode("ascii").rstrip()
192        self.log.debug("Commit is {}".format(version.commit))
193        return git_worktree_path
194
195    def _update_git_submodules(self, git_worktree_path, version):
196        """If the crypto submodule is present, initialize it.
197        if version.crypto_revision exists, update it to that revision,
198        otherwise update it to the default revision"""
199        update_output = subprocess.check_output(
200            [self.git_command, "submodule", "update", "--init", '--recursive'],
201            cwd=git_worktree_path,
202            stderr=subprocess.STDOUT
203        )
204        self.log.debug(update_output.decode("utf-8"))
205        if not (os.path.exists(os.path.join(git_worktree_path, "crypto"))
206                and version.crypto_revision):
207            return
208
209        if version.crypto_repository:
210            fetch_output = subprocess.check_output(
211                [self.git_command, "fetch", version.crypto_repository,
212                 version.crypto_revision],
213                cwd=os.path.join(git_worktree_path, "crypto"),
214                stderr=subprocess.STDOUT
215            )
216            self.log.debug(fetch_output.decode("utf-8"))
217            crypto_rev = "FETCH_HEAD"
218        else:
219            crypto_rev = version.crypto_revision
220
221        checkout_output = subprocess.check_output(
222            [self.git_command, "checkout", crypto_rev],
223            cwd=os.path.join(git_worktree_path, "crypto"),
224            stderr=subprocess.STDOUT
225        )
226        self.log.debug(checkout_output.decode("utf-8"))
227
228    def _build_shared_libraries(self, git_worktree_path, version):
229        """Build the shared libraries in the specified worktree."""
230        my_environment = os.environ.copy()
231        my_environment["CFLAGS"] = "-g -Og"
232        my_environment["SHARED"] = "1"
233        if os.path.exists(os.path.join(git_worktree_path, "crypto")):
234            my_environment["USE_CRYPTO_SUBMODULE"] = "1"
235        make_output = subprocess.check_output(
236            [self.make_command, "lib"],
237            env=my_environment,
238            cwd=git_worktree_path,
239            stderr=subprocess.STDOUT
240        )
241        self.log.debug(make_output.decode("utf-8"))
242        for root, _dirs, files in os.walk(git_worktree_path):
243            for file in fnmatch.filter(files, "*.so"):
244                version.modules[os.path.splitext(file)[0]] = (
245                    os.path.join(root, file)
246                )
247
248    @staticmethod
249    def _pretty_revision(version):
250        if version.revision == version.commit:
251            return version.revision
252        else:
253            return "{} ({})".format(version.revision, version.commit)
254
255    def _get_abi_dumps_from_shared_libraries(self, version):
256        """Generate the ABI dumps for the specified git revision.
257        The shared libraries must have been built and the module paths
258        present in version.modules."""
259        for mbed_module, module_path in version.modules.items():
260            output_path = os.path.join(
261                self.report_dir, "{}-{}-{}.dump".format(
262                    mbed_module, version.revision, version.version
263                )
264            )
265            abi_dump_command = [
266                "abi-dumper",
267                module_path,
268                "-o", output_path,
269                "-lver", self._pretty_revision(version),
270            ]
271            abi_dump_output = subprocess.check_output(
272                abi_dump_command,
273                stderr=subprocess.STDOUT
274            )
275            self.log.debug(abi_dump_output.decode("utf-8"))
276            version.abi_dumps[mbed_module] = output_path
277
278    @staticmethod
279    def _normalize_storage_test_case_data(line):
280        """Eliminate cosmetic or irrelevant details in storage format test cases."""
281        line = re.sub(r'\s+', r'', line)
282        return line
283
284    def _read_storage_tests(self,
285                            directory,
286                            filename,
287                            is_generated,
288                            storage_tests):
289        """Record storage tests from the given file.
290
291        Populate the storage_tests dictionary with test cases read from
292        filename under directory.
293        """
294        at_paragraph_start = True
295        description = None
296        full_path = os.path.join(directory, filename)
297        with open(full_path) as fd:
298            for line_number, line in enumerate(fd, 1):
299                line = line.strip()
300                if not line:
301                    at_paragraph_start = True
302                    continue
303                if line.startswith('#'):
304                    continue
305                if at_paragraph_start:
306                    description = line.strip()
307                    at_paragraph_start = False
308                    continue
309                if line.startswith('depends_on:'):
310                    continue
311                # We've reached a test case data line
312                test_case_data = self._normalize_storage_test_case_data(line)
313                if not is_generated:
314                    # In manual test data, only look at read tests.
315                    function_name = test_case_data.split(':', 1)[0]
316                    if 'read' not in function_name.split('_'):
317                        continue
318                metadata = SimpleNamespace(
319                    filename=filename,
320                    line_number=line_number,
321                    description=description
322                )
323                storage_tests[test_case_data] = metadata
324
325    @staticmethod
326    def _list_generated_test_data_files(git_worktree_path):
327        """List the generated test data files."""
328        output = subprocess.check_output(
329            ['tests/scripts/generate_psa_tests.py', '--list'],
330            cwd=git_worktree_path,
331        ).decode('ascii')
332        return [line for line in output.split('\n') if line]
333
334    def _get_storage_format_tests(self, version, git_worktree_path):
335        """Record the storage format tests for the specified git version.
336
337        The storage format tests are the test suite data files whose name
338        contains "storage_format".
339
340        The version must be checked out at git_worktree_path.
341
342        This function creates or updates the generated data files.
343        """
344        # Existing test data files. This may be missing some automatically
345        # generated files if they haven't been generated yet.
346        storage_data_files = set(glob.glob(
347            'tests/suites/test_suite_*storage_format*.data'
348        ))
349        # Discover and (re)generate automatically generated data files.
350        to_be_generated = set()
351        for filename in self._list_generated_test_data_files(git_worktree_path):
352            if 'storage_format' in filename:
353                storage_data_files.add(filename)
354                to_be_generated.add(filename)
355        subprocess.check_call(
356            ['tests/scripts/generate_psa_tests.py'] + sorted(to_be_generated),
357            cwd=git_worktree_path,
358        )
359        for test_file in sorted(storage_data_files):
360            self._read_storage_tests(git_worktree_path,
361                                     test_file,
362                                     test_file in to_be_generated,
363                                     version.storage_tests)
364
365    def _cleanup_worktree(self, git_worktree_path):
366        """Remove the specified git worktree."""
367        shutil.rmtree(git_worktree_path)
368        worktree_output = subprocess.check_output(
369            [self.git_command, "worktree", "prune"],
370            cwd=self.repo_path,
371            stderr=subprocess.STDOUT
372        )
373        self.log.debug(worktree_output.decode("utf-8"))
374
375    def _get_abi_dump_for_ref(self, version):
376        """Generate the interface information for the specified git revision."""
377        git_worktree_path = self._get_clean_worktree_for_git_revision(version)
378        self._update_git_submodules(git_worktree_path, version)
379        if self.check_abi:
380            self._build_shared_libraries(git_worktree_path, version)
381            self._get_abi_dumps_from_shared_libraries(version)
382        if self.check_storage_tests:
383            self._get_storage_format_tests(version, git_worktree_path)
384        self._cleanup_worktree(git_worktree_path)
385
386    def _remove_children_with_tag(self, parent, tag):
387        children = parent.getchildren()
388        for child in children:
389            if child.tag == tag:
390                parent.remove(child)
391            else:
392                self._remove_children_with_tag(child, tag)
393
394    def _remove_extra_detail_from_report(self, report_root):
395        for tag in ['test_info', 'test_results', 'problem_summary',
396                    'added_symbols', 'affected']:
397            self._remove_children_with_tag(report_root, tag)
398
399        for report in report_root:
400            for problems in report.getchildren()[:]:
401                if not problems.getchildren():
402                    report.remove(problems)
403
404    def _abi_compliance_command(self, mbed_module, output_path):
405        """Build the command to run to analyze the library mbed_module.
406        The report will be placed in output_path."""
407        abi_compliance_command = [
408            "abi-compliance-checker",
409            "-l", mbed_module,
410            "-old", self.old_version.abi_dumps[mbed_module],
411            "-new", self.new_version.abi_dumps[mbed_module],
412            "-strict",
413            "-report-path", output_path,
414        ]
415        if self.skip_file:
416            abi_compliance_command += ["-skip-symbols", self.skip_file,
417                                       "-skip-types", self.skip_file]
418        if self.brief:
419            abi_compliance_command += ["-report-format", "xml",
420                                       "-stdout"]
421        return abi_compliance_command
422
423    def _is_library_compatible(self, mbed_module, compatibility_report):
424        """Test if the library mbed_module has remained compatible.
425        Append a message regarding compatibility to compatibility_report."""
426        output_path = os.path.join(
427            self.report_dir, "{}-{}-{}.html".format(
428                mbed_module, self.old_version.revision,
429                self.new_version.revision
430            )
431        )
432        try:
433            subprocess.check_output(
434                self._abi_compliance_command(mbed_module, output_path),
435                stderr=subprocess.STDOUT
436            )
437        except subprocess.CalledProcessError as err:
438            if err.returncode != 1:
439                raise err
440            if self.brief:
441                self.log.info(
442                    "Compatibility issues found for {}".format(mbed_module)
443                )
444                report_root = ET.fromstring(err.output.decode("utf-8"))
445                self._remove_extra_detail_from_report(report_root)
446                self.log.info(ET.tostring(report_root).decode("utf-8"))
447            else:
448                self.can_remove_report_dir = False
449                compatibility_report.append(
450                    "Compatibility issues found for {}, "
451                    "for details see {}".format(mbed_module, output_path)
452                )
453            return False
454        compatibility_report.append(
455            "No compatibility issues for {}".format(mbed_module)
456        )
457        if not (self.keep_all_reports or self.brief):
458            os.remove(output_path)
459        return True
460
461    @staticmethod
462    def _is_storage_format_compatible(old_tests, new_tests,
463                                      compatibility_report):
464        """Check whether all tests present in old_tests are also in new_tests.
465
466        Append a message regarding compatibility to compatibility_report.
467        """
468        missing = frozenset(old_tests.keys()).difference(new_tests.keys())
469        for test_data in sorted(missing):
470            metadata = old_tests[test_data]
471            compatibility_report.append(
472                'Test case from {} line {} "{}" has disappeared: {}'.format(
473                    metadata.filename, metadata.line_number,
474                    metadata.description, test_data
475                )
476            )
477        compatibility_report.append(
478            'FAIL: {}/{} storage format test cases have changed or disappeared.'.format(
479                len(missing), len(old_tests)
480            ) if missing else
481            'PASS: All {} storage format test cases are preserved.'.format(
482                len(old_tests)
483            )
484        )
485        compatibility_report.append(
486            'Info: number of storage format tests cases: {} -> {}.'.format(
487                len(old_tests), len(new_tests)
488            )
489        )
490        return not missing
491
492    def get_abi_compatibility_report(self):
493        """Generate a report of the differences between the reference ABI
494        and the new ABI. ABI dumps from self.old_version and self.new_version
495        must be available."""
496        compatibility_report = ["Checking evolution from {} to {}".format(
497            self._pretty_revision(self.old_version),
498            self._pretty_revision(self.new_version)
499        )]
500        compliance_return_code = 0
501
502        if self.check_abi:
503            shared_modules = list(set(self.old_version.modules.keys()) &
504                                  set(self.new_version.modules.keys()))
505            for mbed_module in shared_modules:
506                if not self._is_library_compatible(mbed_module,
507                                                   compatibility_report):
508                    compliance_return_code = 1
509
510        if self.check_storage_tests:
511            if not self._is_storage_format_compatible(
512                    self.old_version.storage_tests,
513                    self.new_version.storage_tests,
514                    compatibility_report):
515                compliance_return_code = 1
516
517        for version in [self.old_version, self.new_version]:
518            for mbed_module, mbed_module_dump in version.abi_dumps.items():
519                os.remove(mbed_module_dump)
520        if self.can_remove_report_dir:
521            os.rmdir(self.report_dir)
522        self.log.info("\n".join(compatibility_report))
523        return compliance_return_code
524
525    def check_for_abi_changes(self):
526        """Generate a report of ABI differences
527        between self.old_rev and self.new_rev."""
528        build_tree.check_repo_path()
529        if self.check_api or self.check_abi:
530            self.check_abi_tools_are_installed()
531        self._get_abi_dump_for_ref(self.old_version)
532        self._get_abi_dump_for_ref(self.new_version)
533        return self.get_abi_compatibility_report()
534
535
536def run_main():
537    try:
538        parser = argparse.ArgumentParser(
539            description=__doc__
540        )
541        parser.add_argument(
542            "-v", "--verbose", action="store_true",
543            help="set verbosity level",
544        )
545        parser.add_argument(
546            "-r", "--report-dir", type=str, default="reports",
547            help="directory where reports are stored, default is reports",
548        )
549        parser.add_argument(
550            "-k", "--keep-all-reports", action="store_true",
551            help="keep all reports, even if there are no compatibility issues",
552        )
553        parser.add_argument(
554            "-o", "--old-rev", type=str, help="revision for old version.",
555            required=True,
556        )
557        parser.add_argument(
558            "-or", "--old-repo", type=str, help="repository for old version."
559        )
560        parser.add_argument(
561            "-oc", "--old-crypto-rev", type=str,
562            help="revision for old crypto submodule."
563        )
564        parser.add_argument(
565            "-ocr", "--old-crypto-repo", type=str,
566            help="repository for old crypto submodule."
567        )
568        parser.add_argument(
569            "-n", "--new-rev", type=str, help="revision for new version",
570            required=True,
571        )
572        parser.add_argument(
573            "-nr", "--new-repo", type=str, help="repository for new version."
574        )
575        parser.add_argument(
576            "-nc", "--new-crypto-rev", type=str,
577            help="revision for new crypto version"
578        )
579        parser.add_argument(
580            "-ncr", "--new-crypto-repo", type=str,
581            help="repository for new crypto submodule."
582        )
583        parser.add_argument(
584            "-s", "--skip-file", type=str,
585            help=("path to file containing symbols and types to skip "
586                  "(typically \"-s identifiers\" after running "
587                  "\"tests/scripts/list-identifiers.sh --internal\")")
588        )
589        parser.add_argument(
590            "--check-abi",
591            action='store_true', default=True,
592            help="Perform ABI comparison (default: yes)"
593        )
594        parser.add_argument("--no-check-abi", action='store_false', dest='check_abi')
595        parser.add_argument(
596            "--check-api",
597            action='store_true', default=True,
598            help="Perform API comparison (default: yes)"
599        )
600        parser.add_argument("--no-check-api", action='store_false', dest='check_api')
601        parser.add_argument(
602            "--check-storage",
603            action='store_true', default=True,
604            help="Perform storage tests comparison (default: yes)"
605        )
606        parser.add_argument("--no-check-storage", action='store_false', dest='check_storage')
607        parser.add_argument(
608            "-b", "--brief", action="store_true",
609            help="output only the list of issues to stdout, instead of a full report",
610        )
611        abi_args = parser.parse_args()
612        if os.path.isfile(abi_args.report_dir):
613            print("Error: {} is not a directory".format(abi_args.report_dir))
614            parser.exit()
615        old_version = SimpleNamespace(
616            version="old",
617            repository=abi_args.old_repo,
618            revision=abi_args.old_rev,
619            commit=None,
620            crypto_repository=abi_args.old_crypto_repo,
621            crypto_revision=abi_args.old_crypto_rev,
622            abi_dumps={},
623            storage_tests={},
624            modules={}
625        )
626        new_version = SimpleNamespace(
627            version="new",
628            repository=abi_args.new_repo,
629            revision=abi_args.new_rev,
630            commit=None,
631            crypto_repository=abi_args.new_crypto_repo,
632            crypto_revision=abi_args.new_crypto_rev,
633            abi_dumps={},
634            storage_tests={},
635            modules={}
636        )
637        configuration = SimpleNamespace(
638            verbose=abi_args.verbose,
639            report_dir=abi_args.report_dir,
640            keep_all_reports=abi_args.keep_all_reports,
641            brief=abi_args.brief,
642            check_abi=abi_args.check_abi,
643            check_api=abi_args.check_api,
644            check_storage=abi_args.check_storage,
645            skip_file=abi_args.skip_file
646        )
647        abi_check = AbiChecker(old_version, new_version, configuration)
648        return_code = abi_check.check_for_abi_changes()
649        sys.exit(return_code)
650    except Exception: # pylint: disable=broad-except
651        # Print the backtrace and exit explicitly so as to exit with
652        # status 2, not 1.
653        traceback.print_exc()
654        sys.exit(2)
655
656
657if __name__ == "__main__":
658    run_main()
659