1#!/usr/bin/env python3
2"""This script compares the interfaces of two versions of Mbed TLS, looking
3for backward incompatibilities between two different Git revisions within
4an Mbed TLS repository. It must be run from the root of a Git working tree.
5
6### How the script works ###
7
8For the source (API) and runtime (ABI) interface compatibility, this script
9is a small wrapper around the abi-compliance-checker and abi-dumper tools,
10applying them to compare the header and library files.
11
12For the storage format, this script compares the automatically generated
13storage tests and the manual read tests, and complains if there is a
14reduction in coverage. A change in test data will be signaled as a
15coverage reduction since the old test data is no longer present. A change in
16how test data is presented will be signaled as well; this would be a false
17positive.
18
19The results of the API/ABI comparison are either formatted as HTML and stored
20at a configurable location, or are given as a brief list of problems.
21Returns 0 on success, 1 on non-compliance, and 2 if there is an error
22while running the script.
23
24### How to interpret non-compliance ###
25
26This script has relatively common false positives. In many scenarios, it only
27reports a pass if there is a strict textual match between the old version and
28the new version, and it reports problems where there is a sufficient semantic
29match but not a textual match. This section lists some common false positives.
30This is not an exhaustive list: in the end what matters is whether we are
31breaking a backward compatibility goal.
32
33**API**: the goal is that if an application works with the old version of the
34library, it can be recompiled against the new version and will still work.
35This is normally validated by comparing the declarations in `include/*/*.h`.
36A failure is a declaration that has disappeared or that now has a different
37type.
38
39  * It's ok to change or remove macros and functions that are documented as
40    for internal use only or as experimental.
41  * It's ok to rename function or macro parameters as long as the semantics
42    has not changed.
43  * It's ok to change or remove structure fields that are documented as
44    private.
45  * It's ok to add fields to a structure that already had private fields
46    or was documented as extensible.
47
48**ABI**: the goal is that if an application was built against the old version
49of the library, the same binary will work when linked against the new version.
50This is normally validated by comparing the symbols exported by `libmbed*.so`.
51A failure is a symbol that is no longer exported by the same library or that
52now has a different type.
53
54  * All ABI changes are acceptable if the library version is bumped
55    (see `scripts/bump_version.sh`).
56  * ABI changes that concern functions which are declared only inside the
57    library directory, and not in `include/*/*.h`, are acceptable only if
58    the function was only ever used inside the same library (libmbedcrypto,
59    libmbedx509, libmbedtls). As a counter example, if the old version
60    of libmbedtls calls mbedtls_foo() from libmbedcrypto, and the new version
61    of libmbedcrypto no longer has a compatible mbedtls_foo(), this does
62    require a version bump for libmbedcrypto.
63
64**Storage format**: the goal is to check that persistent keys stored by the
65old version can be read by the new version. This is normally validated by
66comparing the `*read*` test cases in `test_suite*storage_format*.data`.
67A failure is a storage read test case that is no longer present with the same
68function name and parameter list.
69
70  * It's ok if the same test data is present, but its presentation has changed,
71    for example if a test function is renamed or has different parameters.
72  * It's ok if redundant tests are removed.
73
74**Generated test coverage**: the goal is to check that automatically
75generated tests have as much coverage as before. This is normally validated
76by comparing the test cases that are automatically generated by a script.
77A failure is a generated test case that is no longer present with the same
78function name and parameter list.
79
80  * It's ok if the same test data is present, but its presentation has changed,
81    for example if a test function is renamed or has different parameters.
82  * It's ok if redundant tests are removed.
83
84"""
85
86# Copyright The Mbed TLS Contributors
87# SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
88
89import glob
90import os
91import re
92import sys
93import traceback
94import shutil
95import subprocess
96import argparse
97import logging
98import tempfile
99import fnmatch
100from types import SimpleNamespace
101
102import xml.etree.ElementTree as ET
103
104import framework_scripts_path # pylint: disable=unused-import
105from mbedtls_framework import build_tree
106
107
108class AbiChecker:
109    """API and ABI checker."""
110
111    def __init__(self, old_version, new_version, configuration):
112        """Instantiate the API/ABI checker.
113
114        old_version: RepoVersion containing details to compare against
115        new_version: RepoVersion containing details to check
116        configuration.report_dir: directory for output files
117        configuration.keep_all_reports: if false, delete old reports
118        configuration.brief: if true, output shorter report to stdout
119        configuration.check_abi: if true, compare ABIs
120        configuration.check_api: if true, compare APIs
121        configuration.check_storage: if true, compare storage format tests
122        configuration.skip_file: path to file containing symbols and types to skip
123        """
124        self.repo_path = "."
125        self.log = None
126        self.verbose = configuration.verbose
127        self._setup_logger()
128        self.report_dir = os.path.abspath(configuration.report_dir)
129        self.keep_all_reports = configuration.keep_all_reports
130        self.can_remove_report_dir = not (os.path.exists(self.report_dir) or
131                                          self.keep_all_reports)
132        self.old_version = old_version
133        self.new_version = new_version
134        self.skip_file = configuration.skip_file
135        self.check_abi = configuration.check_abi
136        self.check_api = configuration.check_api
137        if self.check_abi != self.check_api:
138            raise Exception('Checking API without ABI or vice versa is not supported')
139        self.check_storage_tests = configuration.check_storage
140        self.brief = configuration.brief
141        self.git_command = "git"
142        self.make_command = "make"
143
144    def _setup_logger(self):
145        self.log = logging.getLogger()
146        if self.verbose:
147            self.log.setLevel(logging.DEBUG)
148        else:
149            self.log.setLevel(logging.INFO)
150        self.log.addHandler(logging.StreamHandler())
151
152    @staticmethod
153    def check_abi_tools_are_installed():
154        for command in ["abi-dumper", "abi-compliance-checker"]:
155            if not shutil.which(command):
156                raise Exception("{} not installed, aborting".format(command))
157
158    def _get_clean_worktree_for_git_revision(self, version):
159        """Make a separate worktree with version.revision checked out.
160        Do not modify the current worktree."""
161        git_worktree_path = tempfile.mkdtemp()
162        if version.repository:
163            self.log.debug(
164                "Checking out git worktree for revision {} from {}".format(
165                    version.revision, version.repository
166                )
167            )
168            fetch_output = subprocess.check_output(
169                [self.git_command, "fetch",
170                 version.repository, version.revision],
171                cwd=self.repo_path,
172                stderr=subprocess.STDOUT
173            )
174            self.log.debug(fetch_output.decode("utf-8"))
175            worktree_rev = "FETCH_HEAD"
176        else:
177            self.log.debug("Checking out git worktree for revision {}".format(
178                version.revision
179            ))
180            worktree_rev = version.revision
181        worktree_output = subprocess.check_output(
182            [self.git_command, "worktree", "add", "--detach",
183             git_worktree_path, worktree_rev],
184            cwd=self.repo_path,
185            stderr=subprocess.STDOUT
186        )
187        self.log.debug(worktree_output.decode("utf-8"))
188        version.commit = subprocess.check_output(
189            [self.git_command, "rev-parse", "HEAD"],
190            cwd=git_worktree_path,
191            stderr=subprocess.STDOUT
192        ).decode("ascii").rstrip()
193        self.log.debug("Commit is {}".format(version.commit))
194        return git_worktree_path
195
196    def _update_git_submodules(self, git_worktree_path, version):
197        """If the crypto submodule is present, initialize it.
198        if version.crypto_revision exists, update it to that revision,
199        otherwise update it to the default revision"""
200        update_output = subprocess.check_output(
201            [self.git_command, "submodule", "update", "--init", '--recursive'],
202            cwd=git_worktree_path,
203            stderr=subprocess.STDOUT
204        )
205        self.log.debug(update_output.decode("utf-8"))
206        if not (os.path.exists(os.path.join(git_worktree_path, "crypto"))
207                and version.crypto_revision):
208            return
209
210        if version.crypto_repository:
211            fetch_output = subprocess.check_output(
212                [self.git_command, "fetch", version.crypto_repository,
213                 version.crypto_revision],
214                cwd=os.path.join(git_worktree_path, "crypto"),
215                stderr=subprocess.STDOUT
216            )
217            self.log.debug(fetch_output.decode("utf-8"))
218            crypto_rev = "FETCH_HEAD"
219        else:
220            crypto_rev = version.crypto_revision
221
222        checkout_output = subprocess.check_output(
223            [self.git_command, "checkout", crypto_rev],
224            cwd=os.path.join(git_worktree_path, "crypto"),
225            stderr=subprocess.STDOUT
226        )
227        self.log.debug(checkout_output.decode("utf-8"))
228
229    def _build_shared_libraries(self, git_worktree_path, version):
230        """Build the shared libraries in the specified worktree."""
231        my_environment = os.environ.copy()
232        my_environment["CFLAGS"] = "-g -Og"
233        my_environment["SHARED"] = "1"
234        if os.path.exists(os.path.join(git_worktree_path, "crypto")):
235            my_environment["USE_CRYPTO_SUBMODULE"] = "1"
236        make_output = subprocess.check_output(
237            [self.make_command, "lib"],
238            env=my_environment,
239            cwd=git_worktree_path,
240            stderr=subprocess.STDOUT
241        )
242        self.log.debug(make_output.decode("utf-8"))
243        for root, _dirs, files in os.walk(git_worktree_path):
244            for file in fnmatch.filter(files, "*.so"):
245                version.modules[os.path.splitext(file)[0]] = (
246                    os.path.join(root, file)
247                )
248
249    @staticmethod
250    def _pretty_revision(version):
251        if version.revision == version.commit:
252            return version.revision
253        else:
254            return "{} ({})".format(version.revision, version.commit)
255
256    def _get_abi_dumps_from_shared_libraries(self, version):
257        """Generate the ABI dumps for the specified git revision.
258        The shared libraries must have been built and the module paths
259        present in version.modules."""
260        for mbed_module, module_path in version.modules.items():
261            output_path = os.path.join(
262                self.report_dir, "{}-{}-{}.dump".format(
263                    mbed_module, version.revision, version.version
264                )
265            )
266            abi_dump_command = [
267                "abi-dumper",
268                module_path,
269                "-o", output_path,
270                "-lver", self._pretty_revision(version),
271            ]
272            abi_dump_output = subprocess.check_output(
273                abi_dump_command,
274                stderr=subprocess.STDOUT
275            )
276            self.log.debug(abi_dump_output.decode("utf-8"))
277            version.abi_dumps[mbed_module] = output_path
278
279    @staticmethod
280    def _normalize_storage_test_case_data(line):
281        """Eliminate cosmetic or irrelevant details in storage format test cases."""
282        line = re.sub(r'\s+', r'', line)
283        return line
284
285    def _read_storage_tests(self,
286                            directory,
287                            filename,
288                            is_generated,
289                            storage_tests):
290        """Record storage tests from the given file.
291
292        Populate the storage_tests dictionary with test cases read from
293        filename under directory.
294        """
295        at_paragraph_start = True
296        description = None
297        full_path = os.path.join(directory, filename)
298        with open(full_path) as fd:
299            for line_number, line in enumerate(fd, 1):
300                line = line.strip()
301                if not line:
302                    at_paragraph_start = True
303                    continue
304                if line.startswith('#'):
305                    continue
306                if at_paragraph_start:
307                    description = line.strip()
308                    at_paragraph_start = False
309                    continue
310                if line.startswith('depends_on:'):
311                    continue
312                # We've reached a test case data line
313                test_case_data = self._normalize_storage_test_case_data(line)
314                if not is_generated:
315                    # In manual test data, only look at read tests.
316                    function_name = test_case_data.split(':', 1)[0]
317                    if 'read' not in function_name.split('_'):
318                        continue
319                metadata = SimpleNamespace(
320                    filename=filename,
321                    line_number=line_number,
322                    description=description
323                )
324                storage_tests[test_case_data] = metadata
325
326    @staticmethod
327    def _list_generated_test_data_files(git_worktree_path):
328        """List the generated test data files."""
329        generate_psa_tests = 'framework/scripts/generate_psa_tests.py'
330        if not os.path.isfile(git_worktree_path + '/' + generate_psa_tests):
331            # The checked-out revision is from before generate_psa_tests.py
332            # was moved to the framework submodule. Use the old location.
333            generate_psa_tests = 'tests/scripts/generate_psa_tests.py'
334
335        output = subprocess.check_output(
336            [generate_psa_tests, '--list'],
337            cwd=git_worktree_path,
338        ).decode('ascii')
339        return [line for line in output.split('\n') if line]
340
341    def _get_storage_format_tests(self, version, git_worktree_path):
342        """Record the storage format tests for the specified git version.
343
344        The storage format tests are the test suite data files whose name
345        contains "storage_format".
346
347        The version must be checked out at git_worktree_path.
348
349        This function creates or updates the generated data files.
350        """
351        # Existing test data files. This may be missing some automatically
352        # generated files if they haven't been generated yet.
353        storage_data_files = set(glob.glob(
354            'tests/suites/test_suite_*storage_format*.data'
355        ))
356        # Discover and (re)generate automatically generated data files.
357        to_be_generated = set()
358        for filename in self._list_generated_test_data_files(git_worktree_path):
359            if 'storage_format' in filename:
360                storage_data_files.add(filename)
361                to_be_generated.add(filename)
362
363        generate_psa_tests = 'framework/scripts/generate_psa_tests.py'
364        if not os.path.isfile(git_worktree_path + '/' + generate_psa_tests):
365            # The checked-out revision is from before generate_psa_tests.py
366            # was moved to the framework submodule. Use the old location.
367            generate_psa_tests = 'tests/scripts/generate_psa_tests.py'
368        subprocess.check_call(
369            [generate_psa_tests] + sorted(to_be_generated),
370            cwd=git_worktree_path,
371        )
372        for test_file in sorted(storage_data_files):
373            self._read_storage_tests(git_worktree_path,
374                                     test_file,
375                                     test_file in to_be_generated,
376                                     version.storage_tests)
377
378    def _cleanup_worktree(self, git_worktree_path):
379        """Remove the specified git worktree."""
380        shutil.rmtree(git_worktree_path)
381        worktree_output = subprocess.check_output(
382            [self.git_command, "worktree", "prune"],
383            cwd=self.repo_path,
384            stderr=subprocess.STDOUT
385        )
386        self.log.debug(worktree_output.decode("utf-8"))
387
388    def _get_abi_dump_for_ref(self, version):
389        """Generate the interface information for the specified git revision."""
390        git_worktree_path = self._get_clean_worktree_for_git_revision(version)
391        self._update_git_submodules(git_worktree_path, version)
392        if self.check_abi:
393            self._build_shared_libraries(git_worktree_path, version)
394            self._get_abi_dumps_from_shared_libraries(version)
395        if self.check_storage_tests:
396            self._get_storage_format_tests(version, git_worktree_path)
397        self._cleanup_worktree(git_worktree_path)
398
399    def _remove_children_with_tag(self, parent, tag):
400        children = parent.getchildren()
401        for child in children:
402            if child.tag == tag:
403                parent.remove(child)
404            else:
405                self._remove_children_with_tag(child, tag)
406
407    def _remove_extra_detail_from_report(self, report_root):
408        for tag in ['test_info', 'test_results', 'problem_summary',
409                    'added_symbols', 'affected']:
410            self._remove_children_with_tag(report_root, tag)
411
412        for report in report_root:
413            for problems in report.getchildren()[:]:
414                if not problems.getchildren():
415                    report.remove(problems)
416
417    def _abi_compliance_command(self, mbed_module, output_path):
418        """Build the command to run to analyze the library mbed_module.
419        The report will be placed in output_path."""
420        abi_compliance_command = [
421            "abi-compliance-checker",
422            "-l", mbed_module,
423            "-old", self.old_version.abi_dumps[mbed_module],
424            "-new", self.new_version.abi_dumps[mbed_module],
425            "-strict",
426            "-report-path", output_path,
427        ]
428        if self.skip_file:
429            abi_compliance_command += ["-skip-symbols", self.skip_file,
430                                       "-skip-types", self.skip_file]
431        if self.brief:
432            abi_compliance_command += ["-report-format", "xml",
433                                       "-stdout"]
434        return abi_compliance_command
435
436    def _is_library_compatible(self, mbed_module, compatibility_report):
437        """Test if the library mbed_module has remained compatible.
438        Append a message regarding compatibility to compatibility_report."""
439        output_path = os.path.join(
440            self.report_dir, "{}-{}-{}.html".format(
441                mbed_module, self.old_version.revision,
442                self.new_version.revision
443            )
444        )
445        try:
446            subprocess.check_output(
447                self._abi_compliance_command(mbed_module, output_path),
448                stderr=subprocess.STDOUT
449            )
450        except subprocess.CalledProcessError as err:
451            if err.returncode != 1:
452                raise err
453            if self.brief:
454                self.log.info(
455                    "Compatibility issues found for {}".format(mbed_module)
456                )
457                report_root = ET.fromstring(err.output.decode("utf-8"))
458                self._remove_extra_detail_from_report(report_root)
459                self.log.info(ET.tostring(report_root).decode("utf-8"))
460            else:
461                self.can_remove_report_dir = False
462                compatibility_report.append(
463                    "Compatibility issues found for {}, "
464                    "for details see {}".format(mbed_module, output_path)
465                )
466            return False
467        compatibility_report.append(
468            "No compatibility issues for {}".format(mbed_module)
469        )
470        if not (self.keep_all_reports or self.brief):
471            os.remove(output_path)
472        return True
473
474    @staticmethod
475    def _is_storage_format_compatible(old_tests, new_tests,
476                                      compatibility_report):
477        """Check whether all tests present in old_tests are also in new_tests.
478
479        Append a message regarding compatibility to compatibility_report.
480        """
481        missing = frozenset(old_tests.keys()).difference(new_tests.keys())
482        for test_data in sorted(missing):
483            metadata = old_tests[test_data]
484            compatibility_report.append(
485                'Test case from {} line {} "{}" has disappeared: {}'.format(
486                    metadata.filename, metadata.line_number,
487                    metadata.description, test_data
488                )
489            )
490        compatibility_report.append(
491            'FAIL: {}/{} storage format test cases have changed or disappeared.'.format(
492                len(missing), len(old_tests)
493            ) if missing else
494            'PASS: All {} storage format test cases are preserved.'.format(
495                len(old_tests)
496            )
497        )
498        compatibility_report.append(
499            'Info: number of storage format tests cases: {} -> {}.'.format(
500                len(old_tests), len(new_tests)
501            )
502        )
503        return not missing
504
505    def get_abi_compatibility_report(self):
506        """Generate a report of the differences between the reference ABI
507        and the new ABI. ABI dumps from self.old_version and self.new_version
508        must be available."""
509        compatibility_report = ["Checking evolution from {} to {}".format(
510            self._pretty_revision(self.old_version),
511            self._pretty_revision(self.new_version)
512        )]
513        compliance_return_code = 0
514
515        if self.check_abi:
516            shared_modules = list(set(self.old_version.modules.keys()) &
517                                  set(self.new_version.modules.keys()))
518            for mbed_module in shared_modules:
519                if not self._is_library_compatible(mbed_module,
520                                                   compatibility_report):
521                    compliance_return_code = 1
522
523        if self.check_storage_tests:
524            if not self._is_storage_format_compatible(
525                    self.old_version.storage_tests,
526                    self.new_version.storage_tests,
527                    compatibility_report):
528                compliance_return_code = 1
529
530        for version in [self.old_version, self.new_version]:
531            for mbed_module, mbed_module_dump in version.abi_dumps.items():
532                os.remove(mbed_module_dump)
533        if self.can_remove_report_dir:
534            os.rmdir(self.report_dir)
535        self.log.info("\n".join(compatibility_report))
536        return compliance_return_code
537
538    def check_for_abi_changes(self):
539        """Generate a report of ABI differences
540        between self.old_rev and self.new_rev."""
541        build_tree.check_repo_path()
542        if self.check_api or self.check_abi:
543            self.check_abi_tools_are_installed()
544        self._get_abi_dump_for_ref(self.old_version)
545        self._get_abi_dump_for_ref(self.new_version)
546        return self.get_abi_compatibility_report()
547
548
549def run_main():
550    try:
551        parser = argparse.ArgumentParser(
552            description=__doc__
553        )
554        parser.add_argument(
555            "-v", "--verbose", action="store_true",
556            help="set verbosity level",
557        )
558        parser.add_argument(
559            "-r", "--report-dir", type=str, default="reports",
560            help="directory where reports are stored, default is reports",
561        )
562        parser.add_argument(
563            "-k", "--keep-all-reports", action="store_true",
564            help="keep all reports, even if there are no compatibility issues",
565        )
566        parser.add_argument(
567            "-o", "--old-rev", type=str, help="revision for old version.",
568            required=True,
569        )
570        parser.add_argument(
571            "-or", "--old-repo", type=str, help="repository for old version."
572        )
573        parser.add_argument(
574            "-oc", "--old-crypto-rev", type=str,
575            help="revision for old crypto submodule."
576        )
577        parser.add_argument(
578            "-ocr", "--old-crypto-repo", type=str,
579            help="repository for old crypto submodule."
580        )
581        parser.add_argument(
582            "-n", "--new-rev", type=str, help="revision for new version",
583            required=True,
584        )
585        parser.add_argument(
586            "-nr", "--new-repo", type=str, help="repository for new version."
587        )
588        parser.add_argument(
589            "-nc", "--new-crypto-rev", type=str,
590            help="revision for new crypto version"
591        )
592        parser.add_argument(
593            "-ncr", "--new-crypto-repo", type=str,
594            help="repository for new crypto submodule."
595        )
596        parser.add_argument(
597            "-s", "--skip-file", type=str,
598            help=("path to file containing symbols and types to skip "
599                  "(typically \"-s identifiers\" after running "
600                  "\"tests/scripts/list-identifiers.sh --internal\")")
601        )
602        parser.add_argument(
603            "--check-abi",
604            action='store_true', default=True,
605            help="Perform ABI comparison (default: yes)"
606        )
607        parser.add_argument("--no-check-abi", action='store_false', dest='check_abi')
608        parser.add_argument(
609            "--check-api",
610            action='store_true', default=True,
611            help="Perform API comparison (default: yes)"
612        )
613        parser.add_argument("--no-check-api", action='store_false', dest='check_api')
614        parser.add_argument(
615            "--check-storage",
616            action='store_true', default=True,
617            help="Perform storage tests comparison (default: yes)"
618        )
619        parser.add_argument("--no-check-storage", action='store_false', dest='check_storage')
620        parser.add_argument(
621            "-b", "--brief", action="store_true",
622            help="output only the list of issues to stdout, instead of a full report",
623        )
624        abi_args = parser.parse_args()
625        if os.path.isfile(abi_args.report_dir):
626            print("Error: {} is not a directory".format(abi_args.report_dir))
627            parser.exit()
628        old_version = SimpleNamespace(
629            version="old",
630            repository=abi_args.old_repo,
631            revision=abi_args.old_rev,
632            commit=None,
633            crypto_repository=abi_args.old_crypto_repo,
634            crypto_revision=abi_args.old_crypto_rev,
635            abi_dumps={},
636            storage_tests={},
637            modules={}
638        )
639        new_version = SimpleNamespace(
640            version="new",
641            repository=abi_args.new_repo,
642            revision=abi_args.new_rev,
643            commit=None,
644            crypto_repository=abi_args.new_crypto_repo,
645            crypto_revision=abi_args.new_crypto_rev,
646            abi_dumps={},
647            storage_tests={},
648            modules={}
649        )
650        configuration = SimpleNamespace(
651            verbose=abi_args.verbose,
652            report_dir=abi_args.report_dir,
653            keep_all_reports=abi_args.keep_all_reports,
654            brief=abi_args.brief,
655            check_abi=abi_args.check_abi,
656            check_api=abi_args.check_api,
657            check_storage=abi_args.check_storage,
658            skip_file=abi_args.skip_file
659        )
660        abi_check = AbiChecker(old_version, new_version, configuration)
661        return_code = abi_check.check_for_abi_changes()
662        sys.exit(return_code)
663    except Exception: # pylint: disable=broad-except
664        # Print the backtrace and exit explicitly so as to exit with
665        # status 2, not 1.
666        traceback.print_exc()
667        sys.exit(2)
668
669
670if __name__ == "__main__":
671    run_main()
672