1#!/usr/bin/env python3 2"""This script compares the interfaces of two versions of Mbed TLS, looking 3for backward incompatibilities between two different Git revisions within 4an Mbed TLS repository. It must be run from the root of a Git working tree. 5 6### How the script works ### 7 8For the source (API) and runtime (ABI) interface compatibility, this script 9is a small wrapper around the abi-compliance-checker and abi-dumper tools, 10applying them to compare the header and library files. 11 12For the storage format, this script compares the automatically generated 13storage tests and the manual read tests, and complains if there is a 14reduction in coverage. A change in test data will be signaled as a 15coverage reduction since the old test data is no longer present. A change in 16how test data is presented will be signaled as well; this would be a false 17positive. 18 19The results of the API/ABI comparison are either formatted as HTML and stored 20at a configurable location, or are given as a brief list of problems. 21Returns 0 on success, 1 on non-compliance, and 2 if there is an error 22while running the script. 23 24### How to interpret non-compliance ### 25 26This script has relatively common false positives. In many scenarios, it only 27reports a pass if there is a strict textual match between the old version and 28the new version, and it reports problems where there is a sufficient semantic 29match but not a textual match. This section lists some common false positives. 30This is not an exhaustive list: in the end what matters is whether we are 31breaking a backward compatibility goal. 32 33**API**: the goal is that if an application works with the old version of the 34library, it can be recompiled against the new version and will still work. 35This is normally validated by comparing the declarations in `include/*/*.h`. 36A failure is a declaration that has disappeared or that now has a different 37type. 38 39 * It's ok to change or remove macros and functions that are documented as 40 for internal use only or as experimental. 41 * It's ok to rename function or macro parameters as long as the semantics 42 has not changed. 43 * It's ok to change or remove structure fields that are documented as 44 private. 45 * It's ok to add fields to a structure that already had private fields 46 or was documented as extensible. 47 48**ABI**: the goal is that if an application was built against the old version 49of the library, the same binary will work when linked against the new version. 50This is normally validated by comparing the symbols exported by `libmbed*.so`. 51A failure is a symbol that is no longer exported by the same library or that 52now has a different type. 53 54 * All ABI changes are acceptable if the library version is bumped 55 (see `scripts/bump_version.sh`). 56 * ABI changes that concern functions which are declared only inside the 57 library directory, and not in `include/*/*.h`, are acceptable only if 58 the function was only ever used inside the same library (libmbedcrypto, 59 libmbedx509, libmbedtls). As a counter example, if the old version 60 of libmbedtls calls mbedtls_foo() from libmbedcrypto, and the new version 61 of libmbedcrypto no longer has a compatible mbedtls_foo(), this does 62 require a version bump for libmbedcrypto. 63 64**Storage format**: the goal is to check that persistent keys stored by the 65old version can be read by the new version. This is normally validated by 66comparing the `*read*` test cases in `test_suite*storage_format*.data`. 67A failure is a storage read test case that is no longer present with the same 68function name and parameter list. 69 70 * It's ok if the same test data is present, but its presentation has changed, 71 for example if a test function is renamed or has different parameters. 72 * It's ok if redundant tests are removed. 73 74**Generated test coverage**: the goal is to check that automatically 75generated tests have as much coverage as before. This is normally validated 76by comparing the test cases that are automatically generated by a script. 77A failure is a generated test case that is no longer present with the same 78function name and parameter list. 79 80 * It's ok if the same test data is present, but its presentation has changed, 81 for example if a test function is renamed or has different parameters. 82 * It's ok if redundant tests are removed. 83 84""" 85 86# Copyright The Mbed TLS Contributors 87# SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later 88 89import glob 90import os 91import re 92import sys 93import traceback 94import shutil 95import subprocess 96import argparse 97import logging 98import tempfile 99import fnmatch 100from types import SimpleNamespace 101 102import xml.etree.ElementTree as ET 103 104import framework_scripts_path # pylint: disable=unused-import 105from mbedtls_framework import build_tree 106 107 108class AbiChecker: 109 """API and ABI checker.""" 110 111 def __init__(self, old_version, new_version, configuration): 112 """Instantiate the API/ABI checker. 113 114 old_version: RepoVersion containing details to compare against 115 new_version: RepoVersion containing details to check 116 configuration.report_dir: directory for output files 117 configuration.keep_all_reports: if false, delete old reports 118 configuration.brief: if true, output shorter report to stdout 119 configuration.check_abi: if true, compare ABIs 120 configuration.check_api: if true, compare APIs 121 configuration.check_storage: if true, compare storage format tests 122 configuration.skip_file: path to file containing symbols and types to skip 123 """ 124 self.repo_path = "." 125 self.log = None 126 self.verbose = configuration.verbose 127 self._setup_logger() 128 self.report_dir = os.path.abspath(configuration.report_dir) 129 self.keep_all_reports = configuration.keep_all_reports 130 self.can_remove_report_dir = not (os.path.exists(self.report_dir) or 131 self.keep_all_reports) 132 self.old_version = old_version 133 self.new_version = new_version 134 self.skip_file = configuration.skip_file 135 self.check_abi = configuration.check_abi 136 self.check_api = configuration.check_api 137 if self.check_abi != self.check_api: 138 raise Exception('Checking API without ABI or vice versa is not supported') 139 self.check_storage_tests = configuration.check_storage 140 self.brief = configuration.brief 141 self.git_command = "git" 142 self.make_command = "make" 143 144 def _setup_logger(self): 145 self.log = logging.getLogger() 146 if self.verbose: 147 self.log.setLevel(logging.DEBUG) 148 else: 149 self.log.setLevel(logging.INFO) 150 self.log.addHandler(logging.StreamHandler()) 151 152 @staticmethod 153 def check_abi_tools_are_installed(): 154 for command in ["abi-dumper", "abi-compliance-checker"]: 155 if not shutil.which(command): 156 raise Exception("{} not installed, aborting".format(command)) 157 158 def _get_clean_worktree_for_git_revision(self, version): 159 """Make a separate worktree with version.revision checked out. 160 Do not modify the current worktree.""" 161 git_worktree_path = tempfile.mkdtemp() 162 if version.repository: 163 self.log.debug( 164 "Checking out git worktree for revision {} from {}".format( 165 version.revision, version.repository 166 ) 167 ) 168 fetch_output = subprocess.check_output( 169 [self.git_command, "fetch", 170 version.repository, version.revision], 171 cwd=self.repo_path, 172 stderr=subprocess.STDOUT 173 ) 174 self.log.debug(fetch_output.decode("utf-8")) 175 worktree_rev = "FETCH_HEAD" 176 else: 177 self.log.debug("Checking out git worktree for revision {}".format( 178 version.revision 179 )) 180 worktree_rev = version.revision 181 worktree_output = subprocess.check_output( 182 [self.git_command, "worktree", "add", "--detach", 183 git_worktree_path, worktree_rev], 184 cwd=self.repo_path, 185 stderr=subprocess.STDOUT 186 ) 187 self.log.debug(worktree_output.decode("utf-8")) 188 version.commit = subprocess.check_output( 189 [self.git_command, "rev-parse", "HEAD"], 190 cwd=git_worktree_path, 191 stderr=subprocess.STDOUT 192 ).decode("ascii").rstrip() 193 self.log.debug("Commit is {}".format(version.commit)) 194 return git_worktree_path 195 196 def _update_git_submodules(self, git_worktree_path, version): 197 """If the crypto submodule is present, initialize it. 198 if version.crypto_revision exists, update it to that revision, 199 otherwise update it to the default revision""" 200 update_output = subprocess.check_output( 201 [self.git_command, "submodule", "update", "--init", '--recursive'], 202 cwd=git_worktree_path, 203 stderr=subprocess.STDOUT 204 ) 205 self.log.debug(update_output.decode("utf-8")) 206 if not (os.path.exists(os.path.join(git_worktree_path, "crypto")) 207 and version.crypto_revision): 208 return 209 210 if version.crypto_repository: 211 fetch_output = subprocess.check_output( 212 [self.git_command, "fetch", version.crypto_repository, 213 version.crypto_revision], 214 cwd=os.path.join(git_worktree_path, "crypto"), 215 stderr=subprocess.STDOUT 216 ) 217 self.log.debug(fetch_output.decode("utf-8")) 218 crypto_rev = "FETCH_HEAD" 219 else: 220 crypto_rev = version.crypto_revision 221 222 checkout_output = subprocess.check_output( 223 [self.git_command, "checkout", crypto_rev], 224 cwd=os.path.join(git_worktree_path, "crypto"), 225 stderr=subprocess.STDOUT 226 ) 227 self.log.debug(checkout_output.decode("utf-8")) 228 229 def _build_shared_libraries(self, git_worktree_path, version): 230 """Build the shared libraries in the specified worktree.""" 231 my_environment = os.environ.copy() 232 my_environment["CFLAGS"] = "-g -Og" 233 my_environment["SHARED"] = "1" 234 if os.path.exists(os.path.join(git_worktree_path, "crypto")): 235 my_environment["USE_CRYPTO_SUBMODULE"] = "1" 236 make_output = subprocess.check_output( 237 [self.make_command, "lib"], 238 env=my_environment, 239 cwd=git_worktree_path, 240 stderr=subprocess.STDOUT 241 ) 242 self.log.debug(make_output.decode("utf-8")) 243 for root, _dirs, files in os.walk(git_worktree_path): 244 for file in fnmatch.filter(files, "*.so"): 245 version.modules[os.path.splitext(file)[0]] = ( 246 os.path.join(root, file) 247 ) 248 249 @staticmethod 250 def _pretty_revision(version): 251 if version.revision == version.commit: 252 return version.revision 253 else: 254 return "{} ({})".format(version.revision, version.commit) 255 256 def _get_abi_dumps_from_shared_libraries(self, version): 257 """Generate the ABI dumps for the specified git revision. 258 The shared libraries must have been built and the module paths 259 present in version.modules.""" 260 for mbed_module, module_path in version.modules.items(): 261 output_path = os.path.join( 262 self.report_dir, "{}-{}-{}.dump".format( 263 mbed_module, version.revision, version.version 264 ) 265 ) 266 abi_dump_command = [ 267 "abi-dumper", 268 module_path, 269 "-o", output_path, 270 "-lver", self._pretty_revision(version), 271 ] 272 abi_dump_output = subprocess.check_output( 273 abi_dump_command, 274 stderr=subprocess.STDOUT 275 ) 276 self.log.debug(abi_dump_output.decode("utf-8")) 277 version.abi_dumps[mbed_module] = output_path 278 279 @staticmethod 280 def _normalize_storage_test_case_data(line): 281 """Eliminate cosmetic or irrelevant details in storage format test cases.""" 282 line = re.sub(r'\s+', r'', line) 283 return line 284 285 def _read_storage_tests(self, 286 directory, 287 filename, 288 is_generated, 289 storage_tests): 290 """Record storage tests from the given file. 291 292 Populate the storage_tests dictionary with test cases read from 293 filename under directory. 294 """ 295 at_paragraph_start = True 296 description = None 297 full_path = os.path.join(directory, filename) 298 with open(full_path) as fd: 299 for line_number, line in enumerate(fd, 1): 300 line = line.strip() 301 if not line: 302 at_paragraph_start = True 303 continue 304 if line.startswith('#'): 305 continue 306 if at_paragraph_start: 307 description = line.strip() 308 at_paragraph_start = False 309 continue 310 if line.startswith('depends_on:'): 311 continue 312 # We've reached a test case data line 313 test_case_data = self._normalize_storage_test_case_data(line) 314 if not is_generated: 315 # In manual test data, only look at read tests. 316 function_name = test_case_data.split(':', 1)[0] 317 if 'read' not in function_name.split('_'): 318 continue 319 metadata = SimpleNamespace( 320 filename=filename, 321 line_number=line_number, 322 description=description 323 ) 324 storage_tests[test_case_data] = metadata 325 326 @staticmethod 327 def _list_generated_test_data_files(git_worktree_path): 328 """List the generated test data files.""" 329 generate_psa_tests = 'framework/scripts/generate_psa_tests.py' 330 if not os.path.isfile(git_worktree_path + '/' + generate_psa_tests): 331 # The checked-out revision is from before generate_psa_tests.py 332 # was moved to the framework submodule. Use the old location. 333 generate_psa_tests = 'tests/scripts/generate_psa_tests.py' 334 335 output = subprocess.check_output( 336 [generate_psa_tests, '--list'], 337 cwd=git_worktree_path, 338 ).decode('ascii') 339 return [line for line in output.split('\n') if line] 340 341 def _get_storage_format_tests(self, version, git_worktree_path): 342 """Record the storage format tests for the specified git version. 343 344 The storage format tests are the test suite data files whose name 345 contains "storage_format". 346 347 The version must be checked out at git_worktree_path. 348 349 This function creates or updates the generated data files. 350 """ 351 # Existing test data files. This may be missing some automatically 352 # generated files if they haven't been generated yet. 353 storage_data_files = set(glob.glob( 354 'tests/suites/test_suite_*storage_format*.data' 355 )) 356 # Discover and (re)generate automatically generated data files. 357 to_be_generated = set() 358 for filename in self._list_generated_test_data_files(git_worktree_path): 359 if 'storage_format' in filename: 360 storage_data_files.add(filename) 361 to_be_generated.add(filename) 362 363 generate_psa_tests = 'framework/scripts/generate_psa_tests.py' 364 if not os.path.isfile(git_worktree_path + '/' + generate_psa_tests): 365 # The checked-out revision is from before generate_psa_tests.py 366 # was moved to the framework submodule. Use the old location. 367 generate_psa_tests = 'tests/scripts/generate_psa_tests.py' 368 subprocess.check_call( 369 [generate_psa_tests] + sorted(to_be_generated), 370 cwd=git_worktree_path, 371 ) 372 for test_file in sorted(storage_data_files): 373 self._read_storage_tests(git_worktree_path, 374 test_file, 375 test_file in to_be_generated, 376 version.storage_tests) 377 378 def _cleanup_worktree(self, git_worktree_path): 379 """Remove the specified git worktree.""" 380 shutil.rmtree(git_worktree_path) 381 worktree_output = subprocess.check_output( 382 [self.git_command, "worktree", "prune"], 383 cwd=self.repo_path, 384 stderr=subprocess.STDOUT 385 ) 386 self.log.debug(worktree_output.decode("utf-8")) 387 388 def _get_abi_dump_for_ref(self, version): 389 """Generate the interface information for the specified git revision.""" 390 git_worktree_path = self._get_clean_worktree_for_git_revision(version) 391 self._update_git_submodules(git_worktree_path, version) 392 if self.check_abi: 393 self._build_shared_libraries(git_worktree_path, version) 394 self._get_abi_dumps_from_shared_libraries(version) 395 if self.check_storage_tests: 396 self._get_storage_format_tests(version, git_worktree_path) 397 self._cleanup_worktree(git_worktree_path) 398 399 def _remove_children_with_tag(self, parent, tag): 400 children = parent.getchildren() 401 for child in children: 402 if child.tag == tag: 403 parent.remove(child) 404 else: 405 self._remove_children_with_tag(child, tag) 406 407 def _remove_extra_detail_from_report(self, report_root): 408 for tag in ['test_info', 'test_results', 'problem_summary', 409 'added_symbols', 'affected']: 410 self._remove_children_with_tag(report_root, tag) 411 412 for report in report_root: 413 for problems in report.getchildren()[:]: 414 if not problems.getchildren(): 415 report.remove(problems) 416 417 def _abi_compliance_command(self, mbed_module, output_path): 418 """Build the command to run to analyze the library mbed_module. 419 The report will be placed in output_path.""" 420 abi_compliance_command = [ 421 "abi-compliance-checker", 422 "-l", mbed_module, 423 "-old", self.old_version.abi_dumps[mbed_module], 424 "-new", self.new_version.abi_dumps[mbed_module], 425 "-strict", 426 "-report-path", output_path, 427 ] 428 if self.skip_file: 429 abi_compliance_command += ["-skip-symbols", self.skip_file, 430 "-skip-types", self.skip_file] 431 if self.brief: 432 abi_compliance_command += ["-report-format", "xml", 433 "-stdout"] 434 return abi_compliance_command 435 436 def _is_library_compatible(self, mbed_module, compatibility_report): 437 """Test if the library mbed_module has remained compatible. 438 Append a message regarding compatibility to compatibility_report.""" 439 output_path = os.path.join( 440 self.report_dir, "{}-{}-{}.html".format( 441 mbed_module, self.old_version.revision, 442 self.new_version.revision 443 ) 444 ) 445 try: 446 subprocess.check_output( 447 self._abi_compliance_command(mbed_module, output_path), 448 stderr=subprocess.STDOUT 449 ) 450 except subprocess.CalledProcessError as err: 451 if err.returncode != 1: 452 raise err 453 if self.brief: 454 self.log.info( 455 "Compatibility issues found for {}".format(mbed_module) 456 ) 457 report_root = ET.fromstring(err.output.decode("utf-8")) 458 self._remove_extra_detail_from_report(report_root) 459 self.log.info(ET.tostring(report_root).decode("utf-8")) 460 else: 461 self.can_remove_report_dir = False 462 compatibility_report.append( 463 "Compatibility issues found for {}, " 464 "for details see {}".format(mbed_module, output_path) 465 ) 466 return False 467 compatibility_report.append( 468 "No compatibility issues for {}".format(mbed_module) 469 ) 470 if not (self.keep_all_reports or self.brief): 471 os.remove(output_path) 472 return True 473 474 @staticmethod 475 def _is_storage_format_compatible(old_tests, new_tests, 476 compatibility_report): 477 """Check whether all tests present in old_tests are also in new_tests. 478 479 Append a message regarding compatibility to compatibility_report. 480 """ 481 missing = frozenset(old_tests.keys()).difference(new_tests.keys()) 482 for test_data in sorted(missing): 483 metadata = old_tests[test_data] 484 compatibility_report.append( 485 'Test case from {} line {} "{}" has disappeared: {}'.format( 486 metadata.filename, metadata.line_number, 487 metadata.description, test_data 488 ) 489 ) 490 compatibility_report.append( 491 'FAIL: {}/{} storage format test cases have changed or disappeared.'.format( 492 len(missing), len(old_tests) 493 ) if missing else 494 'PASS: All {} storage format test cases are preserved.'.format( 495 len(old_tests) 496 ) 497 ) 498 compatibility_report.append( 499 'Info: number of storage format tests cases: {} -> {}.'.format( 500 len(old_tests), len(new_tests) 501 ) 502 ) 503 return not missing 504 505 def get_abi_compatibility_report(self): 506 """Generate a report of the differences between the reference ABI 507 and the new ABI. ABI dumps from self.old_version and self.new_version 508 must be available.""" 509 compatibility_report = ["Checking evolution from {} to {}".format( 510 self._pretty_revision(self.old_version), 511 self._pretty_revision(self.new_version) 512 )] 513 compliance_return_code = 0 514 515 if self.check_abi: 516 shared_modules = list(set(self.old_version.modules.keys()) & 517 set(self.new_version.modules.keys())) 518 for mbed_module in shared_modules: 519 if not self._is_library_compatible(mbed_module, 520 compatibility_report): 521 compliance_return_code = 1 522 523 if self.check_storage_tests: 524 if not self._is_storage_format_compatible( 525 self.old_version.storage_tests, 526 self.new_version.storage_tests, 527 compatibility_report): 528 compliance_return_code = 1 529 530 for version in [self.old_version, self.new_version]: 531 for mbed_module, mbed_module_dump in version.abi_dumps.items(): 532 os.remove(mbed_module_dump) 533 if self.can_remove_report_dir: 534 os.rmdir(self.report_dir) 535 self.log.info("\n".join(compatibility_report)) 536 return compliance_return_code 537 538 def check_for_abi_changes(self): 539 """Generate a report of ABI differences 540 between self.old_rev and self.new_rev.""" 541 build_tree.check_repo_path() 542 if self.check_api or self.check_abi: 543 self.check_abi_tools_are_installed() 544 self._get_abi_dump_for_ref(self.old_version) 545 self._get_abi_dump_for_ref(self.new_version) 546 return self.get_abi_compatibility_report() 547 548 549def run_main(): 550 try: 551 parser = argparse.ArgumentParser( 552 description=__doc__ 553 ) 554 parser.add_argument( 555 "-v", "--verbose", action="store_true", 556 help="set verbosity level", 557 ) 558 parser.add_argument( 559 "-r", "--report-dir", type=str, default="reports", 560 help="directory where reports are stored, default is reports", 561 ) 562 parser.add_argument( 563 "-k", "--keep-all-reports", action="store_true", 564 help="keep all reports, even if there are no compatibility issues", 565 ) 566 parser.add_argument( 567 "-o", "--old-rev", type=str, help="revision for old version.", 568 required=True, 569 ) 570 parser.add_argument( 571 "-or", "--old-repo", type=str, help="repository for old version." 572 ) 573 parser.add_argument( 574 "-oc", "--old-crypto-rev", type=str, 575 help="revision for old crypto submodule." 576 ) 577 parser.add_argument( 578 "-ocr", "--old-crypto-repo", type=str, 579 help="repository for old crypto submodule." 580 ) 581 parser.add_argument( 582 "-n", "--new-rev", type=str, help="revision for new version", 583 required=True, 584 ) 585 parser.add_argument( 586 "-nr", "--new-repo", type=str, help="repository for new version." 587 ) 588 parser.add_argument( 589 "-nc", "--new-crypto-rev", type=str, 590 help="revision for new crypto version" 591 ) 592 parser.add_argument( 593 "-ncr", "--new-crypto-repo", type=str, 594 help="repository for new crypto submodule." 595 ) 596 parser.add_argument( 597 "-s", "--skip-file", type=str, 598 help=("path to file containing symbols and types to skip " 599 "(typically \"-s identifiers\" after running " 600 "\"tests/scripts/list-identifiers.sh --internal\")") 601 ) 602 parser.add_argument( 603 "--check-abi", 604 action='store_true', default=True, 605 help="Perform ABI comparison (default: yes)" 606 ) 607 parser.add_argument("--no-check-abi", action='store_false', dest='check_abi') 608 parser.add_argument( 609 "--check-api", 610 action='store_true', default=True, 611 help="Perform API comparison (default: yes)" 612 ) 613 parser.add_argument("--no-check-api", action='store_false', dest='check_api') 614 parser.add_argument( 615 "--check-storage", 616 action='store_true', default=True, 617 help="Perform storage tests comparison (default: yes)" 618 ) 619 parser.add_argument("--no-check-storage", action='store_false', dest='check_storage') 620 parser.add_argument( 621 "-b", "--brief", action="store_true", 622 help="output only the list of issues to stdout, instead of a full report", 623 ) 624 abi_args = parser.parse_args() 625 if os.path.isfile(abi_args.report_dir): 626 print("Error: {} is not a directory".format(abi_args.report_dir)) 627 parser.exit() 628 old_version = SimpleNamespace( 629 version="old", 630 repository=abi_args.old_repo, 631 revision=abi_args.old_rev, 632 commit=None, 633 crypto_repository=abi_args.old_crypto_repo, 634 crypto_revision=abi_args.old_crypto_rev, 635 abi_dumps={}, 636 storage_tests={}, 637 modules={} 638 ) 639 new_version = SimpleNamespace( 640 version="new", 641 repository=abi_args.new_repo, 642 revision=abi_args.new_rev, 643 commit=None, 644 crypto_repository=abi_args.new_crypto_repo, 645 crypto_revision=abi_args.new_crypto_rev, 646 abi_dumps={}, 647 storage_tests={}, 648 modules={} 649 ) 650 configuration = SimpleNamespace( 651 verbose=abi_args.verbose, 652 report_dir=abi_args.report_dir, 653 keep_all_reports=abi_args.keep_all_reports, 654 brief=abi_args.brief, 655 check_abi=abi_args.check_abi, 656 check_api=abi_args.check_api, 657 check_storage=abi_args.check_storage, 658 skip_file=abi_args.skip_file 659 ) 660 abi_check = AbiChecker(old_version, new_version, configuration) 661 return_code = abi_check.check_for_abi_changes() 662 sys.exit(return_code) 663 except Exception: # pylint: disable=broad-except 664 # Print the backtrace and exit explicitly so as to exit with 665 # status 2, not 1. 666 traceback.print_exc() 667 sys.exit(2) 668 669 670if __name__ == "__main__": 671 run_main() 672