1#!/usr/bin/env python3 2 3"""Assemble Mbed TLS change log entries into the change log file. 4 5Add changelog entries to the first level-2 section. 6Create a new level-2 section for unreleased changes if needed. 7Remove the input files unless --keep-entries is specified. 8 9In each level-3 section, entries are sorted in chronological order 10(oldest first). From oldest to newest: 11* Merged entry files are sorted according to their merge date (date of 12 the merge commit that brought the commit that created the file into 13 the target branch). 14* Committed but unmerged entry files are sorted according to the date 15 of the commit that adds them. 16* Uncommitted entry files are sorted according to their modification time. 17 18You must run this program from within a git working directory. 19""" 20 21# Copyright The Mbed TLS Contributors 22# SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later 23 24import argparse 25from collections import OrderedDict, namedtuple 26import datetime 27import functools 28import glob 29import os 30import re 31import subprocess 32import sys 33 34class InputFormatError(Exception): 35 def __init__(self, filename, line_number, message, *args, **kwargs): 36 message = '{}:{}: {}'.format(filename, line_number, 37 message.format(*args, **kwargs)) 38 super().__init__(message) 39 40class CategoryParseError(Exception): 41 def __init__(self, line_offset, error_message): 42 self.line_offset = line_offset 43 self.error_message = error_message 44 super().__init__('{}: {}'.format(line_offset, error_message)) 45 46class LostContent(Exception): 47 def __init__(self, filename, line): 48 message = ('Lost content from {}: "{}"'.format(filename, line)) 49 super().__init__(message) 50 51# The category names we use in the changelog. 52# If you edit this, update ChangeLog.d/README.md. 53STANDARD_CATEGORIES = ( 54 'API changes', 55 'Default behavior changes', 56 'Requirement changes', 57 'New deprecations', 58 'Removals', 59 'Features', 60 'Security', 61 'Bugfix', 62 'Changes', 63) 64 65# The maximum line length for an entry 66MAX_LINE_LENGTH = 80 67 68CategoryContent = namedtuple('CategoryContent', [ 69 'name', 'title_line', # Title text and line number of the title 70 'body', 'body_line', # Body text and starting line number of the body 71]) 72 73class ChangelogFormat: 74 """Virtual class documenting how to write a changelog format class.""" 75 76 @classmethod 77 def extract_top_version(cls, changelog_file_content): 78 """Split out the top version section. 79 80 If the top version is already released, create a new top 81 version section for an unreleased version. 82 83 Return ``(header, top_version_title, top_version_body, trailer)`` 84 where the "top version" is the existing top version section if it's 85 for unreleased changes, and a newly created section otherwise. 86 To assemble the changelog after modifying top_version_body, 87 concatenate the four pieces. 88 """ 89 raise NotImplementedError 90 91 @classmethod 92 def version_title_text(cls, version_title): 93 """Return the text of a formatted version section title.""" 94 raise NotImplementedError 95 96 @classmethod 97 def split_categories(cls, version_body): 98 """Split a changelog version section body into categories. 99 100 Return a list of `CategoryContent` the name is category title 101 without any formatting. 102 """ 103 raise NotImplementedError 104 105 @classmethod 106 def format_category(cls, title, body): 107 """Construct the text of a category section from its title and body.""" 108 raise NotImplementedError 109 110class TextChangelogFormat(ChangelogFormat): 111 """The traditional Mbed TLS changelog format.""" 112 113 _unreleased_version_text = '= Mbed TLS x.x.x branch released xxxx-xx-xx' 114 @classmethod 115 def is_released_version(cls, title): 116 # Look for an incomplete release date 117 return not re.search(r'[0-9x]{4}-[0-9x]{2}-[0-9x]?x', title) 118 119 _top_version_re = re.compile(r'(?:\A|\n)(=[^\n]*\n+)(.*?\n)(?:=|$)', 120 re.DOTALL) 121 @classmethod 122 def extract_top_version(cls, changelog_file_content): 123 """A version section starts with a line starting with '='.""" 124 m = re.search(cls._top_version_re, changelog_file_content) 125 top_version_start = m.start(1) 126 top_version_end = m.end(2) 127 top_version_title = m.group(1) 128 top_version_body = m.group(2) 129 if cls.is_released_version(top_version_title): 130 top_version_end = top_version_start 131 top_version_title = cls._unreleased_version_text + '\n\n' 132 top_version_body = '' 133 return (changelog_file_content[:top_version_start], 134 top_version_title, top_version_body, 135 changelog_file_content[top_version_end:]) 136 137 @classmethod 138 def version_title_text(cls, version_title): 139 return re.sub(r'\n.*', version_title, re.DOTALL) 140 141 _category_title_re = re.compile(r'(^\w.*)\n+', re.MULTILINE) 142 @classmethod 143 def split_categories(cls, version_body): 144 """A category title is a line with the title in column 0.""" 145 if not version_body: 146 return [] 147 title_matches = list(re.finditer(cls._category_title_re, version_body)) 148 if not title_matches or title_matches[0].start() != 0: 149 # There is junk before the first category. 150 raise CategoryParseError(0, 'Junk found where category expected') 151 title_starts = [m.start(1) for m in title_matches] 152 body_starts = [m.end(0) for m in title_matches] 153 body_ends = title_starts[1:] + [len(version_body)] 154 bodies = [version_body[body_start:body_end].rstrip('\n') + '\n' 155 for (body_start, body_end) in zip(body_starts, body_ends)] 156 title_lines = [version_body[:pos].count('\n') for pos in title_starts] 157 body_lines = [version_body[:pos].count('\n') for pos in body_starts] 158 return [CategoryContent(title_match.group(1), title_line, 159 body, body_line) 160 for title_match, title_line, body, body_line 161 in zip(title_matches, title_lines, bodies, body_lines)] 162 163 @classmethod 164 def format_category(cls, title, body): 165 # `split_categories` ensures that each body ends with a newline. 166 # Make sure that there is additionally a blank line between categories. 167 if not body.endswith('\n\n'): 168 body += '\n' 169 return title + '\n' + body 170 171class ChangeLog: 172 """An Mbed TLS changelog. 173 174 A changelog file consists of some header text followed by one or 175 more version sections. The version sections are in reverse 176 chronological order. Each version section consists of a title and a body. 177 178 The body of a version section consists of zero or more category 179 subsections. Each category subsection consists of a title and a body. 180 181 A changelog entry file has the same format as the body of a version section. 182 183 A `ChangelogFormat` object defines the concrete syntax of the changelog. 184 Entry files must have the same format as the changelog file. 185 """ 186 187 # Only accept dotted version numbers (e.g. "3.1", not "3"). 188 # Refuse ".x" in a version number where x is a letter: this indicates 189 # a version that is not yet released. Something like "3.1a" is accepted. 190 _version_number_re = re.compile(r'[0-9]+\.[0-9A-Za-z.]+') 191 _incomplete_version_number_re = re.compile(r'.*\.[A-Za-z]') 192 _only_url_re = re.compile(r'^\s*\w+://\S+\s*$') 193 _has_url_re = re.compile(r'.*://.*') 194 195 def add_categories_from_text(self, filename, line_offset, 196 text, allow_unknown_category): 197 """Parse a version section or entry file.""" 198 try: 199 categories = self.format.split_categories(text) 200 except CategoryParseError as e: 201 raise InputFormatError(filename, line_offset + e.line_offset, 202 e.error_message) 203 for category in categories: 204 if not allow_unknown_category and \ 205 category.name not in self.categories: 206 raise InputFormatError(filename, 207 line_offset + category.title_line, 208 'Unknown category: "{}"', 209 category.name) 210 211 body_split = category.body.splitlines() 212 213 for line_number, line in enumerate(body_split, 1): 214 if not self._only_url_re.match(line) and \ 215 len(line) > MAX_LINE_LENGTH: 216 long_url_msg = '. URL exceeding length limit must be alone in its line.' \ 217 if self._has_url_re.match(line) else "" 218 raise InputFormatError(filename, 219 category.body_line + line_number, 220 'Line is longer than allowed: ' 221 'Length {} (Max {}){}', 222 len(line), MAX_LINE_LENGTH, 223 long_url_msg) 224 225 self.categories[category.name] += category.body 226 227 def __init__(self, input_stream, changelog_format): 228 """Create a changelog object. 229 230 Populate the changelog object from the content of the file 231 input_stream. 232 """ 233 self.format = changelog_format 234 whole_file = input_stream.read() 235 (self.header, 236 self.top_version_title, top_version_body, 237 self.trailer) = self.format.extract_top_version(whole_file) 238 # Split the top version section into categories. 239 self.categories = OrderedDict() 240 for category in STANDARD_CATEGORIES: 241 self.categories[category] = '' 242 offset = (self.header + self.top_version_title).count('\n') + 1 243 self.add_categories_from_text(input_stream.name, offset, 244 top_version_body, True) 245 246 def add_file(self, input_stream): 247 """Add changelog entries from a file. 248 """ 249 self.add_categories_from_text(input_stream.name, 1, 250 input_stream.read(), False) 251 252 def write(self, filename): 253 """Write the changelog to the specified file. 254 """ 255 with open(filename, 'w', encoding='utf-8') as out: 256 out.write(self.header) 257 out.write(self.top_version_title) 258 for title, body in self.categories.items(): 259 if not body: 260 continue 261 out.write(self.format.format_category(title, body)) 262 out.write(self.trailer) 263 264 265@functools.total_ordering 266class EntryFileSortKey: 267 """This classes defines an ordering on changelog entry files: older < newer. 268 269 * Merged entry files are sorted according to their merge date (date of 270 the merge commit that brought the commit that created the file into 271 the target branch). 272 * Committed but unmerged entry files are sorted according to the date 273 of the commit that adds them. 274 * Uncommitted entry files are sorted according to their modification time. 275 276 This class assumes that the file is in a git working directory with 277 the target branch checked out. 278 """ 279 280 # Categories of files. A lower number is considered older. 281 MERGED = 0 282 COMMITTED = 1 283 LOCAL = 2 284 285 @staticmethod 286 def creation_hash(filename): 287 """Return the git commit id at which the given file was created. 288 289 Return None if the file was never checked into git. 290 """ 291 hashes = subprocess.check_output(['git', 'log', '--format=%H', 292 '--follow', 293 '--', filename]) 294 m = re.search('(.+)$', hashes.decode('ascii')) 295 if not m: 296 # The git output is empty. This means that the file was 297 # never checked in. 298 return None 299 # The last commit in the log is the oldest one, which is when the 300 # file was created. 301 return m.group(0) 302 303 @staticmethod 304 def list_merges(some_hash, target, *options): 305 """List merge commits from some_hash to target. 306 307 Pass options to git to select which commits are included. 308 """ 309 text = subprocess.check_output(['git', 'rev-list', 310 '--merges', *options, 311 '..'.join([some_hash, target])]) 312 return text.decode('ascii').rstrip('\n').split('\n') 313 314 @classmethod 315 def merge_hash(cls, some_hash): 316 """Return the git commit id at which the given commit was merged. 317 318 Return None if the given commit was never merged. 319 """ 320 target = 'HEAD' 321 # List the merges from some_hash to the target in two ways. 322 # The ancestry list is the ones that are both descendants of 323 # some_hash and ancestors of the target. 324 ancestry = frozenset(cls.list_merges(some_hash, target, 325 '--ancestry-path')) 326 # The first_parents list only contains merges that are directly 327 # on the target branch. We want it in reverse order (oldest first). 328 first_parents = cls.list_merges(some_hash, target, 329 '--first-parent', '--reverse') 330 # Look for the oldest merge commit that's both on the direct path 331 # and directly on the target branch. That's the place where some_hash 332 # was merged on the target branch. See 333 # https://stackoverflow.com/questions/8475448/find-merge-commit-which-include-a-specific-commit 334 for commit in first_parents: 335 if commit in ancestry: 336 return commit 337 return None 338 339 @staticmethod 340 def commit_timestamp(commit_id): 341 """Return the timestamp of the given commit.""" 342 text = subprocess.check_output(['git', 'show', '-s', 343 '--format=%ct', 344 commit_id]) 345 return datetime.datetime.utcfromtimestamp(int(text)) 346 347 @staticmethod 348 def file_timestamp(filename): 349 """Return the modification timestamp of the given file.""" 350 mtime = os.stat(filename).st_mtime 351 return datetime.datetime.fromtimestamp(mtime) 352 353 def __init__(self, filename): 354 """Determine position of the file in the changelog entry order. 355 356 This constructor returns an object that can be used with comparison 357 operators, with `sort` and `sorted`, etc. Older entries are sorted 358 before newer entries. 359 """ 360 self.filename = filename 361 creation_hash = self.creation_hash(filename) 362 if not creation_hash: 363 self.category = self.LOCAL 364 self.datetime = self.file_timestamp(filename) 365 return 366 merge_hash = self.merge_hash(creation_hash) 367 if not merge_hash: 368 self.category = self.COMMITTED 369 self.datetime = self.commit_timestamp(creation_hash) 370 return 371 self.category = self.MERGED 372 self.datetime = self.commit_timestamp(merge_hash) 373 374 def sort_key(self): 375 """"Return a concrete sort key for this entry file sort key object. 376 377 ``ts1 < ts2`` is implemented as ``ts1.sort_key() < ts2.sort_key()``. 378 """ 379 return (self.category, self.datetime, self.filename) 380 381 def __eq__(self, other): 382 return self.sort_key() == other.sort_key() 383 384 def __lt__(self, other): 385 return self.sort_key() < other.sort_key() 386 387 388def check_output(generated_output_file, main_input_file, merged_files): 389 """Make sanity checks on the generated output. 390 391 The intent of these sanity checks is to have reasonable confidence 392 that no content has been lost. 393 394 The sanity check is that every line that is present in an input file 395 is also present in an output file. This is not perfect but good enough 396 for now. 397 """ 398 with open(generated_output_file, 'r', encoding='utf-8') as fd: 399 generated_output = set(fd) 400 for line in open(main_input_file, 'r', encoding='utf-8'): 401 if line not in generated_output: 402 raise LostContent('original file', line) 403 for merged_file in merged_files: 404 for line in open(merged_file, 'r', encoding='utf-8'): 405 if line not in generated_output: 406 raise LostContent(merged_file, line) 407 408def finish_output(changelog, output_file, input_file, merged_files): 409 """Write the changelog to the output file. 410 411 The input file and the list of merged files are used only for sanity 412 checks on the output. 413 """ 414 if os.path.exists(output_file) and not os.path.isfile(output_file): 415 # The output is a non-regular file (e.g. pipe). Write to it directly. 416 output_temp = output_file 417 else: 418 # The output is a regular file. Write to a temporary file, 419 # then move it into place atomically. 420 output_temp = output_file + '.tmp' 421 changelog.write(output_temp) 422 check_output(output_temp, input_file, merged_files) 423 if output_temp != output_file: 424 os.rename(output_temp, output_file) 425 426def remove_merged_entries(files_to_remove): 427 for filename in files_to_remove: 428 os.remove(filename) 429 430def list_files_to_merge(options): 431 """List the entry files to merge, oldest first. 432 433 "Oldest" is defined by `EntryFileSortKey`. 434 """ 435 files_to_merge = glob.glob(os.path.join(options.dir, '*.txt')) 436 files_to_merge.sort(key=EntryFileSortKey) 437 return files_to_merge 438 439def merge_entries(options): 440 """Merge changelog entries into the changelog file. 441 442 Read the changelog file from options.input. 443 Read entries to merge from the directory options.dir. 444 Write the new changelog to options.output. 445 Remove the merged entries if options.keep_entries is false. 446 """ 447 with open(options.input, 'r', encoding='utf-8') as input_file: 448 changelog = ChangeLog(input_file, TextChangelogFormat) 449 files_to_merge = list_files_to_merge(options) 450 if not files_to_merge: 451 sys.stderr.write('There are no pending changelog entries.\n') 452 return 453 for filename in files_to_merge: 454 with open(filename, 'r', encoding='utf-8') as input_file: 455 changelog.add_file(input_file) 456 finish_output(changelog, options.output, options.input, files_to_merge) 457 if not options.keep_entries: 458 remove_merged_entries(files_to_merge) 459 460def show_file_timestamps(options): 461 """List the files to merge and their timestamp. 462 463 This is only intended for debugging purposes. 464 """ 465 files = list_files_to_merge(options) 466 for filename in files: 467 ts = EntryFileSortKey(filename) 468 print(ts.category, ts.datetime, filename) 469 470def set_defaults(options): 471 """Add default values for missing options.""" 472 output_file = getattr(options, 'output', None) 473 if output_file is None: 474 options.output = options.input 475 if getattr(options, 'keep_entries', None) is None: 476 options.keep_entries = (output_file is not None) 477 478def main(): 479 """Command line entry point.""" 480 parser = argparse.ArgumentParser(description=__doc__) 481 parser.add_argument('--dir', '-d', metavar='DIR', 482 default='ChangeLog.d', 483 help='Directory to read entries from' 484 ' (default: ChangeLog.d)') 485 parser.add_argument('--input', '-i', metavar='FILE', 486 default='ChangeLog', 487 help='Existing changelog file to read from and augment' 488 ' (default: ChangeLog)') 489 parser.add_argument('--keep-entries', 490 action='store_true', dest='keep_entries', default=None, 491 help='Keep the files containing entries' 492 ' (default: remove them if --output/-o is not specified)') 493 parser.add_argument('--no-keep-entries', 494 action='store_false', dest='keep_entries', 495 help='Remove the files containing entries after they are merged' 496 ' (default: remove them if --output/-o is not specified)') 497 parser.add_argument('--output', '-o', metavar='FILE', 498 help='Output changelog file' 499 ' (default: overwrite the input)') 500 parser.add_argument('--list-files-only', 501 action='store_true', 502 help=('Only list the files that would be processed ' 503 '(with some debugging information)')) 504 options = parser.parse_args() 505 set_defaults(options) 506 if options.list_files_only: 507 show_file_timestamps(options) 508 return 509 merge_entries(options) 510 511if __name__ == '__main__': 512 main() 513