1#!/usr/bin/env python3
2"""This script compares the interfaces of two versions of Mbed TLS, looking
3for backward incompatibilities between two different Git revisions within
4an Mbed TLS repository. It must be run from the root of a Git working tree.
5
6### How the script works ###
7
8For the source (API) and runtime (ABI) interface compatibility, this script
9is a small wrapper around the abi-compliance-checker and abi-dumper tools,
10applying them to compare the header and library files.
11
12For the storage format, this script compares the automatically generated
13storage tests and the manual read tests, and complains if there is a
14reduction in coverage. A change in test data will be signaled as a
15coverage reduction since the old test data is no longer present. A change in
16how test data is presented will be signaled as well; this would be a false
17positive.
18
19The results of the API/ABI comparison are either formatted as HTML and stored
20at a configurable location, or are given as a brief list of problems.
21Returns 0 on success, 1 on non-compliance, and 2 if there is an error
22while running the script.
23
24### How to interpret non-compliance ###
25
26This script has relatively common false positives. In many scenarios, it only
27reports a pass if there is a strict textual match between the old version and
28the new version, and it reports problems where there is a sufficient semantic
29match but not a textual match. This section lists some common false positives.
30This is not an exhaustive list: in the end what matters is whether we are
31breaking a backward compatibility goal.
32
33**API**: the goal is that if an application works with the old version of the
34library, it can be recompiled against the new version and will still work.
35This is normally validated by comparing the declarations in `include/*/*.h`.
36A failure is a declaration that has disappeared or that now has a different
37type.
38
39  * It's ok to change or remove macros and functions that are documented as
40    for internal use only or as experimental.
41  * It's ok to rename function or macro parameters as long as the semantics
42    has not changed.
43  * It's ok to change or remove structure fields that are documented as
44    private.
45  * It's ok to add fields to a structure that already had private fields
46    or was documented as extensible.
47
48**ABI**: the goal is that if an application was built against the old version
49of the library, the same binary will work when linked against the new version.
50This is normally validated by comparing the symbols exported by `libmbed*.so`.
51A failure is a symbol that is no longer exported by the same library or that
52now has a different type.
53
54  * All ABI changes are acceptable if the library version is bumped
55    (see `scripts/bump_version.sh`).
56  * ABI changes that concern functions which are declared only inside the
57    library directory, and not in `include/*/*.h`, are acceptable only if
58    the function was only ever used inside the same library (libmbedcrypto,
59    libmbedx509, libmbedtls). As a counter example, if the old version
60    of libmbedtls calls mbedtls_foo() from libmbedcrypto, and the new version
61    of libmbedcrypto no longer has a compatible mbedtls_foo(), this does
62    require a version bump for libmbedcrypto.
63
64**Storage format**: the goal is to check that persistent keys stored by the
65old version can be read by the new version. This is normally validated by
66comparing the `*read*` test cases in `test_suite*storage_format*.data`.
67A failure is a storage read test case that is no longer present with the same
68function name and parameter list.
69
70  * It's ok if the same test data is present, but its presentation has changed,
71    for example if a test function is renamed or has different parameters.
72  * It's ok if redundant tests are removed.
73
74**Generated test coverage**: the goal is to check that automatically
75generated tests have as much coverage as before. This is normally validated
76by comparing the test cases that are automatically generated by a script.
77A failure is a generated test case that is no longer present with the same
78function name and parameter list.
79
80  * It's ok if the same test data is present, but its presentation has changed,
81    for example if a test function is renamed or has different parameters.
82  * It's ok if redundant tests are removed.
83
84"""
85
86# Copyright The Mbed TLS Contributors
87# SPDX-License-Identifier: Apache-2.0
88#
89# Licensed under the Apache License, Version 2.0 (the "License"); you may
90# not use this file except in compliance with the License.
91# You may obtain a copy of the License at
92#
93# http://www.apache.org/licenses/LICENSE-2.0
94#
95# Unless required by applicable law or agreed to in writing, software
96# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
97# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
98# See the License for the specific language governing permissions and
99# limitations under the License.
100
101import glob
102import os
103import re
104import sys
105import traceback
106import shutil
107import subprocess
108import argparse
109import logging
110import tempfile
111import fnmatch
112from types import SimpleNamespace
113
114import xml.etree.ElementTree as ET
115
116from mbedtls_dev import build_tree
117
118
119class AbiChecker:
120    """API and ABI checker."""
121
122    def __init__(self, old_version, new_version, configuration):
123        """Instantiate the API/ABI checker.
124
125        old_version: RepoVersion containing details to compare against
126        new_version: RepoVersion containing details to check
127        configuration.report_dir: directory for output files
128        configuration.keep_all_reports: if false, delete old reports
129        configuration.brief: if true, output shorter report to stdout
130        configuration.check_abi: if true, compare ABIs
131        configuration.check_api: if true, compare APIs
132        configuration.check_storage: if true, compare storage format tests
133        configuration.skip_file: path to file containing symbols and types to skip
134        """
135        self.repo_path = "."
136        self.log = None
137        self.verbose = configuration.verbose
138        self._setup_logger()
139        self.report_dir = os.path.abspath(configuration.report_dir)
140        self.keep_all_reports = configuration.keep_all_reports
141        self.can_remove_report_dir = not (os.path.exists(self.report_dir) or
142                                          self.keep_all_reports)
143        self.old_version = old_version
144        self.new_version = new_version
145        self.skip_file = configuration.skip_file
146        self.check_abi = configuration.check_abi
147        self.check_api = configuration.check_api
148        if self.check_abi != self.check_api:
149            raise Exception('Checking API without ABI or vice versa is not supported')
150        self.check_storage_tests = configuration.check_storage
151        self.brief = configuration.brief
152        self.git_command = "git"
153        self.make_command = "make"
154
155    def _setup_logger(self):
156        self.log = logging.getLogger()
157        if self.verbose:
158            self.log.setLevel(logging.DEBUG)
159        else:
160            self.log.setLevel(logging.INFO)
161        self.log.addHandler(logging.StreamHandler())
162
163    @staticmethod
164    def check_abi_tools_are_installed():
165        for command in ["abi-dumper", "abi-compliance-checker"]:
166            if not shutil.which(command):
167                raise Exception("{} not installed, aborting".format(command))
168
169    def _get_clean_worktree_for_git_revision(self, version):
170        """Make a separate worktree with version.revision checked out.
171        Do not modify the current worktree."""
172        git_worktree_path = tempfile.mkdtemp()
173        if version.repository:
174            self.log.debug(
175                "Checking out git worktree for revision {} from {}".format(
176                    version.revision, version.repository
177                )
178            )
179            fetch_output = subprocess.check_output(
180                [self.git_command, "fetch",
181                 version.repository, version.revision],
182                cwd=self.repo_path,
183                stderr=subprocess.STDOUT
184            )
185            self.log.debug(fetch_output.decode("utf-8"))
186            worktree_rev = "FETCH_HEAD"
187        else:
188            self.log.debug("Checking out git worktree for revision {}".format(
189                version.revision
190            ))
191            worktree_rev = version.revision
192        worktree_output = subprocess.check_output(
193            [self.git_command, "worktree", "add", "--detach",
194             git_worktree_path, worktree_rev],
195            cwd=self.repo_path,
196            stderr=subprocess.STDOUT
197        )
198        self.log.debug(worktree_output.decode("utf-8"))
199        version.commit = subprocess.check_output(
200            [self.git_command, "rev-parse", "HEAD"],
201            cwd=git_worktree_path,
202            stderr=subprocess.STDOUT
203        ).decode("ascii").rstrip()
204        self.log.debug("Commit is {}".format(version.commit))
205        return git_worktree_path
206
207    def _update_git_submodules(self, git_worktree_path, version):
208        """If the crypto submodule is present, initialize it.
209        if version.crypto_revision exists, update it to that revision,
210        otherwise update it to the default revision"""
211        update_output = subprocess.check_output(
212            [self.git_command, "submodule", "update", "--init", '--recursive'],
213            cwd=git_worktree_path,
214            stderr=subprocess.STDOUT
215        )
216        self.log.debug(update_output.decode("utf-8"))
217        if not (os.path.exists(os.path.join(git_worktree_path, "crypto"))
218                and version.crypto_revision):
219            return
220
221        if version.crypto_repository:
222            fetch_output = subprocess.check_output(
223                [self.git_command, "fetch", version.crypto_repository,
224                 version.crypto_revision],
225                cwd=os.path.join(git_worktree_path, "crypto"),
226                stderr=subprocess.STDOUT
227            )
228            self.log.debug(fetch_output.decode("utf-8"))
229            crypto_rev = "FETCH_HEAD"
230        else:
231            crypto_rev = version.crypto_revision
232
233        checkout_output = subprocess.check_output(
234            [self.git_command, "checkout", crypto_rev],
235            cwd=os.path.join(git_worktree_path, "crypto"),
236            stderr=subprocess.STDOUT
237        )
238        self.log.debug(checkout_output.decode("utf-8"))
239
240    def _build_shared_libraries(self, git_worktree_path, version):
241        """Build the shared libraries in the specified worktree."""
242        my_environment = os.environ.copy()
243        my_environment["CFLAGS"] = "-g -Og"
244        my_environment["SHARED"] = "1"
245        if os.path.exists(os.path.join(git_worktree_path, "crypto")):
246            my_environment["USE_CRYPTO_SUBMODULE"] = "1"
247        make_output = subprocess.check_output(
248            [self.make_command, "lib"],
249            env=my_environment,
250            cwd=git_worktree_path,
251            stderr=subprocess.STDOUT
252        )
253        self.log.debug(make_output.decode("utf-8"))
254        for root, _dirs, files in os.walk(git_worktree_path):
255            for file in fnmatch.filter(files, "*.so"):
256                version.modules[os.path.splitext(file)[0]] = (
257                    os.path.join(root, file)
258                )
259
260    @staticmethod
261    def _pretty_revision(version):
262        if version.revision == version.commit:
263            return version.revision
264        else:
265            return "{} ({})".format(version.revision, version.commit)
266
267    def _get_abi_dumps_from_shared_libraries(self, version):
268        """Generate the ABI dumps for the specified git revision.
269        The shared libraries must have been built and the module paths
270        present in version.modules."""
271        for mbed_module, module_path in version.modules.items():
272            output_path = os.path.join(
273                self.report_dir, "{}-{}-{}.dump".format(
274                    mbed_module, version.revision, version.version
275                )
276            )
277            abi_dump_command = [
278                "abi-dumper",
279                module_path,
280                "-o", output_path,
281                "-lver", self._pretty_revision(version),
282            ]
283            abi_dump_output = subprocess.check_output(
284                abi_dump_command,
285                stderr=subprocess.STDOUT
286            )
287            self.log.debug(abi_dump_output.decode("utf-8"))
288            version.abi_dumps[mbed_module] = output_path
289
290    @staticmethod
291    def _normalize_storage_test_case_data(line):
292        """Eliminate cosmetic or irrelevant details in storage format test cases."""
293        line = re.sub(r'\s+', r'', line)
294        return line
295
296    def _read_storage_tests(self,
297                            directory,
298                            filename,
299                            is_generated,
300                            storage_tests):
301        """Record storage tests from the given file.
302
303        Populate the storage_tests dictionary with test cases read from
304        filename under directory.
305        """
306        at_paragraph_start = True
307        description = None
308        full_path = os.path.join(directory, filename)
309        with open(full_path) as fd:
310            for line_number, line in enumerate(fd, 1):
311                line = line.strip()
312                if not line:
313                    at_paragraph_start = True
314                    continue
315                if line.startswith('#'):
316                    continue
317                if at_paragraph_start:
318                    description = line.strip()
319                    at_paragraph_start = False
320                    continue
321                if line.startswith('depends_on:'):
322                    continue
323                # We've reached a test case data line
324                test_case_data = self._normalize_storage_test_case_data(line)
325                if not is_generated:
326                    # In manual test data, only look at read tests.
327                    function_name = test_case_data.split(':', 1)[0]
328                    if 'read' not in function_name.split('_'):
329                        continue
330                metadata = SimpleNamespace(
331                    filename=filename,
332                    line_number=line_number,
333                    description=description
334                )
335                storage_tests[test_case_data] = metadata
336
337    @staticmethod
338    def _list_generated_test_data_files(git_worktree_path):
339        """List the generated test data files."""
340        output = subprocess.check_output(
341            ['tests/scripts/generate_psa_tests.py', '--list'],
342            cwd=git_worktree_path,
343        ).decode('ascii')
344        return [line for line in output.split('\n') if line]
345
346    def _get_storage_format_tests(self, version, git_worktree_path):
347        """Record the storage format tests for the specified git version.
348
349        The storage format tests are the test suite data files whose name
350        contains "storage_format".
351
352        The version must be checked out at git_worktree_path.
353
354        This function creates or updates the generated data files.
355        """
356        # Existing test data files. This may be missing some automatically
357        # generated files if they haven't been generated yet.
358        storage_data_files = set(glob.glob(
359            'tests/suites/test_suite_*storage_format*.data'
360        ))
361        # Discover and (re)generate automatically generated data files.
362        to_be_generated = set()
363        for filename in self._list_generated_test_data_files(git_worktree_path):
364            if 'storage_format' in filename:
365                storage_data_files.add(filename)
366                to_be_generated.add(filename)
367        subprocess.check_call(
368            ['tests/scripts/generate_psa_tests.py'] + sorted(to_be_generated),
369            cwd=git_worktree_path,
370        )
371        for test_file in sorted(storage_data_files):
372            self._read_storage_tests(git_worktree_path,
373                                     test_file,
374                                     test_file in to_be_generated,
375                                     version.storage_tests)
376
377    def _cleanup_worktree(self, git_worktree_path):
378        """Remove the specified git worktree."""
379        shutil.rmtree(git_worktree_path)
380        worktree_output = subprocess.check_output(
381            [self.git_command, "worktree", "prune"],
382            cwd=self.repo_path,
383            stderr=subprocess.STDOUT
384        )
385        self.log.debug(worktree_output.decode("utf-8"))
386
387    def _get_abi_dump_for_ref(self, version):
388        """Generate the interface information for the specified git revision."""
389        git_worktree_path = self._get_clean_worktree_for_git_revision(version)
390        self._update_git_submodules(git_worktree_path, version)
391        if self.check_abi:
392            self._build_shared_libraries(git_worktree_path, version)
393            self._get_abi_dumps_from_shared_libraries(version)
394        if self.check_storage_tests:
395            self._get_storage_format_tests(version, git_worktree_path)
396        self._cleanup_worktree(git_worktree_path)
397
398    def _remove_children_with_tag(self, parent, tag):
399        children = parent.getchildren()
400        for child in children:
401            if child.tag == tag:
402                parent.remove(child)
403            else:
404                self._remove_children_with_tag(child, tag)
405
406    def _remove_extra_detail_from_report(self, report_root):
407        for tag in ['test_info', 'test_results', 'problem_summary',
408                    'added_symbols', 'affected']:
409            self._remove_children_with_tag(report_root, tag)
410
411        for report in report_root:
412            for problems in report.getchildren()[:]:
413                if not problems.getchildren():
414                    report.remove(problems)
415
416    def _abi_compliance_command(self, mbed_module, output_path):
417        """Build the command to run to analyze the library mbed_module.
418        The report will be placed in output_path."""
419        abi_compliance_command = [
420            "abi-compliance-checker",
421            "-l", mbed_module,
422            "-old", self.old_version.abi_dumps[mbed_module],
423            "-new", self.new_version.abi_dumps[mbed_module],
424            "-strict",
425            "-report-path", output_path,
426        ]
427        if self.skip_file:
428            abi_compliance_command += ["-skip-symbols", self.skip_file,
429                                       "-skip-types", self.skip_file]
430        if self.brief:
431            abi_compliance_command += ["-report-format", "xml",
432                                       "-stdout"]
433        return abi_compliance_command
434
435    def _is_library_compatible(self, mbed_module, compatibility_report):
436        """Test if the library mbed_module has remained compatible.
437        Append a message regarding compatibility to compatibility_report."""
438        output_path = os.path.join(
439            self.report_dir, "{}-{}-{}.html".format(
440                mbed_module, self.old_version.revision,
441                self.new_version.revision
442            )
443        )
444        try:
445            subprocess.check_output(
446                self._abi_compliance_command(mbed_module, output_path),
447                stderr=subprocess.STDOUT
448            )
449        except subprocess.CalledProcessError as err:
450            if err.returncode != 1:
451                raise err
452            if self.brief:
453                self.log.info(
454                    "Compatibility issues found for {}".format(mbed_module)
455                )
456                report_root = ET.fromstring(err.output.decode("utf-8"))
457                self._remove_extra_detail_from_report(report_root)
458                self.log.info(ET.tostring(report_root).decode("utf-8"))
459            else:
460                self.can_remove_report_dir = False
461                compatibility_report.append(
462                    "Compatibility issues found for {}, "
463                    "for details see {}".format(mbed_module, output_path)
464                )
465            return False
466        compatibility_report.append(
467            "No compatibility issues for {}".format(mbed_module)
468        )
469        if not (self.keep_all_reports or self.brief):
470            os.remove(output_path)
471        return True
472
473    @staticmethod
474    def _is_storage_format_compatible(old_tests, new_tests,
475                                      compatibility_report):
476        """Check whether all tests present in old_tests are also in new_tests.
477
478        Append a message regarding compatibility to compatibility_report.
479        """
480        missing = frozenset(old_tests.keys()).difference(new_tests.keys())
481        for test_data in sorted(missing):
482            metadata = old_tests[test_data]
483            compatibility_report.append(
484                'Test case from {} line {} "{}" has disappeared: {}'.format(
485                    metadata.filename, metadata.line_number,
486                    metadata.description, test_data
487                )
488            )
489        compatibility_report.append(
490            'FAIL: {}/{} storage format test cases have changed or disappeared.'.format(
491                len(missing), len(old_tests)
492            ) if missing else
493            'PASS: All {} storage format test cases are preserved.'.format(
494                len(old_tests)
495            )
496        )
497        compatibility_report.append(
498            'Info: number of storage format tests cases: {} -> {}.'.format(
499                len(old_tests), len(new_tests)
500            )
501        )
502        return not missing
503
504    def get_abi_compatibility_report(self):
505        """Generate a report of the differences between the reference ABI
506        and the new ABI. ABI dumps from self.old_version and self.new_version
507        must be available."""
508        compatibility_report = ["Checking evolution from {} to {}".format(
509            self._pretty_revision(self.old_version),
510            self._pretty_revision(self.new_version)
511        )]
512        compliance_return_code = 0
513
514        if self.check_abi:
515            shared_modules = list(set(self.old_version.modules.keys()) &
516                                  set(self.new_version.modules.keys()))
517            for mbed_module in shared_modules:
518                if not self._is_library_compatible(mbed_module,
519                                                   compatibility_report):
520                    compliance_return_code = 1
521
522        if self.check_storage_tests:
523            if not self._is_storage_format_compatible(
524                    self.old_version.storage_tests,
525                    self.new_version.storage_tests,
526                    compatibility_report):
527                compliance_return_code = 1
528
529        for version in [self.old_version, self.new_version]:
530            for mbed_module, mbed_module_dump in version.abi_dumps.items():
531                os.remove(mbed_module_dump)
532        if self.can_remove_report_dir:
533            os.rmdir(self.report_dir)
534        self.log.info("\n".join(compatibility_report))
535        return compliance_return_code
536
537    def check_for_abi_changes(self):
538        """Generate a report of ABI differences
539        between self.old_rev and self.new_rev."""
540        build_tree.check_repo_path()
541        if self.check_api or self.check_abi:
542            self.check_abi_tools_are_installed()
543        self._get_abi_dump_for_ref(self.old_version)
544        self._get_abi_dump_for_ref(self.new_version)
545        return self.get_abi_compatibility_report()
546
547
548def run_main():
549    try:
550        parser = argparse.ArgumentParser(
551            description=__doc__
552        )
553        parser.add_argument(
554            "-v", "--verbose", action="store_true",
555            help="set verbosity level",
556        )
557        parser.add_argument(
558            "-r", "--report-dir", type=str, default="reports",
559            help="directory where reports are stored, default is reports",
560        )
561        parser.add_argument(
562            "-k", "--keep-all-reports", action="store_true",
563            help="keep all reports, even if there are no compatibility issues",
564        )
565        parser.add_argument(
566            "-o", "--old-rev", type=str, help="revision for old version.",
567            required=True,
568        )
569        parser.add_argument(
570            "-or", "--old-repo", type=str, help="repository for old version."
571        )
572        parser.add_argument(
573            "-oc", "--old-crypto-rev", type=str,
574            help="revision for old crypto submodule."
575        )
576        parser.add_argument(
577            "-ocr", "--old-crypto-repo", type=str,
578            help="repository for old crypto submodule."
579        )
580        parser.add_argument(
581            "-n", "--new-rev", type=str, help="revision for new version",
582            required=True,
583        )
584        parser.add_argument(
585            "-nr", "--new-repo", type=str, help="repository for new version."
586        )
587        parser.add_argument(
588            "-nc", "--new-crypto-rev", type=str,
589            help="revision for new crypto version"
590        )
591        parser.add_argument(
592            "-ncr", "--new-crypto-repo", type=str,
593            help="repository for new crypto submodule."
594        )
595        parser.add_argument(
596            "-s", "--skip-file", type=str,
597            help=("path to file containing symbols and types to skip "
598                  "(typically \"-s identifiers\" after running "
599                  "\"tests/scripts/list-identifiers.sh --internal\")")
600        )
601        parser.add_argument(
602            "--check-abi",
603            action='store_true', default=True,
604            help="Perform ABI comparison (default: yes)"
605        )
606        parser.add_argument("--no-check-abi", action='store_false', dest='check_abi')
607        parser.add_argument(
608            "--check-api",
609            action='store_true', default=True,
610            help="Perform API comparison (default: yes)"
611        )
612        parser.add_argument("--no-check-api", action='store_false', dest='check_api')
613        parser.add_argument(
614            "--check-storage",
615            action='store_true', default=True,
616            help="Perform storage tests comparison (default: yes)"
617        )
618        parser.add_argument("--no-check-storage", action='store_false', dest='check_storage')
619        parser.add_argument(
620            "-b", "--brief", action="store_true",
621            help="output only the list of issues to stdout, instead of a full report",
622        )
623        abi_args = parser.parse_args()
624        if os.path.isfile(abi_args.report_dir):
625            print("Error: {} is not a directory".format(abi_args.report_dir))
626            parser.exit()
627        old_version = SimpleNamespace(
628            version="old",
629            repository=abi_args.old_repo,
630            revision=abi_args.old_rev,
631            commit=None,
632            crypto_repository=abi_args.old_crypto_repo,
633            crypto_revision=abi_args.old_crypto_rev,
634            abi_dumps={},
635            storage_tests={},
636            modules={}
637        )
638        new_version = SimpleNamespace(
639            version="new",
640            repository=abi_args.new_repo,
641            revision=abi_args.new_rev,
642            commit=None,
643            crypto_repository=abi_args.new_crypto_repo,
644            crypto_revision=abi_args.new_crypto_rev,
645            abi_dumps={},
646            storage_tests={},
647            modules={}
648        )
649        configuration = SimpleNamespace(
650            verbose=abi_args.verbose,
651            report_dir=abi_args.report_dir,
652            keep_all_reports=abi_args.keep_all_reports,
653            brief=abi_args.brief,
654            check_abi=abi_args.check_abi,
655            check_api=abi_args.check_api,
656            check_storage=abi_args.check_storage,
657            skip_file=abi_args.skip_file
658        )
659        abi_check = AbiChecker(old_version, new_version, configuration)
660        return_code = abi_check.check_for_abi_changes()
661        sys.exit(return_code)
662    except Exception: # pylint: disable=broad-except
663        # Print the backtrace and exit explicitly so as to exit with
664        # status 2, not 1.
665        traceback.print_exc()
666        sys.exit(2)
667
668
669if __name__ == "__main__":
670    run_main()
671