1#!/usr/bin/env python3 2 3# Copyright (c) 2019 Nordic Semiconductor ASA 4# SPDX-License-Identifier: Apache-2.0 5 6""" 7Lists maintainers for files or commits. Similar in function to 8scripts/get_maintainer.pl from Linux, but geared towards GitHub. The mapping is 9in MAINTAINERS.yml. 10 11The comment at the top of MAINTAINERS.yml in Zephyr documents the file format. 12 13See the help texts for the various subcommands for more information. They can 14be viewed with e.g. 15 16 ./get_maintainer.py path --help 17 18This executable doubles as a Python library. Identifiers not prefixed with '_' 19are part of the library API. The library documentation can be viewed with this 20command: 21 22 $ pydoc get_maintainer 23""" 24 25import argparse 26import operator 27import os 28import pathlib 29import re 30import shlex 31import subprocess 32import sys 33 34from yaml import load, YAMLError 35try: 36 # Use the speedier C LibYAML parser if available 37 from yaml import CSafeLoader as SafeLoader 38except ImportError: 39 from yaml import SafeLoader 40 41 42def _main(): 43 # Entry point when run as an executable 44 45 args = _parse_args() 46 try: 47 args.cmd_fn(Maintainers(args.maintainers), args) 48 except (MaintainersError, GitError) as e: 49 _serr(e) 50 51 52def _parse_args(): 53 # Parses arguments when run as an executable 54 55 parser = argparse.ArgumentParser( 56 formatter_class=argparse.RawDescriptionHelpFormatter, 57 description=__doc__, allow_abbrev=False) 58 59 parser.add_argument( 60 "-m", "--maintainers", 61 metavar="MAINTAINERS_FILE", 62 help="Maintainers file to load. If not specified, MAINTAINERS.yml in " 63 "the top-level repository directory is used, and must exist. " 64 "Paths in the maintainers file will always be taken as relative " 65 "to the top-level directory.") 66 67 subparsers = parser.add_subparsers( 68 help="Available commands (each has a separate --help text)") 69 70 id_parser = subparsers.add_parser( 71 "path", 72 help="List area(s) for paths") 73 id_parser.add_argument( 74 "paths", 75 metavar="PATH", 76 nargs="*", 77 help="Path to list areas for") 78 id_parser.set_defaults(cmd_fn=Maintainers._path_cmd) 79 80 commits_parser = subparsers.add_parser( 81 "commits", 82 help="List area(s) for commit range") 83 commits_parser.add_argument( 84 "commits", 85 metavar="COMMIT_RANGE", 86 nargs="*", 87 help="Commit range to list areas for (default: HEAD~..)") 88 commits_parser.set_defaults(cmd_fn=Maintainers._commits_cmd) 89 90 list_parser = subparsers.add_parser( 91 "list", 92 help="List files in areas") 93 list_parser.add_argument( 94 "area", 95 metavar="AREA", 96 nargs="?", 97 help="Name of area to list files in. If not specified, all " 98 "non-orphaned files are listed (all files that do not appear in " 99 "any area).") 100 list_parser.set_defaults(cmd_fn=Maintainers._list_cmd) 101 102 areas_parser = subparsers.add_parser( 103 "areas", 104 help="List areas and maintainers") 105 areas_parser.add_argument( 106 "maintainer", 107 metavar="MAINTAINER", 108 nargs="?", 109 help="List all areas maintained by maintainer.") 110 111 areas_parser.set_defaults(cmd_fn=Maintainers._areas_cmd) 112 113 orphaned_parser = subparsers.add_parser( 114 "orphaned", 115 help="List orphaned files (files that do not appear in any area)") 116 orphaned_parser.add_argument( 117 "path", 118 metavar="PATH", 119 nargs="?", 120 help="Limit to files under PATH") 121 orphaned_parser.set_defaults(cmd_fn=Maintainers._orphaned_cmd) 122 123 count_parser = subparsers.add_parser( 124 "count", 125 help="Count areas, unique maintainers, and / or unique collaborators") 126 count_parser.add_argument( 127 "-a", 128 "--count-areas", 129 action="store_true", 130 help="Count the number of areas") 131 count_parser.add_argument( 132 "-c", 133 "--count-collaborators", 134 action="store_true", 135 help="Count the number of unique collaborators") 136 count_parser.add_argument( 137 "-n", 138 "--count-maintainers", 139 action="store_true", 140 help="Count the number of unique maintainers") 141 count_parser.add_argument( 142 "-o", 143 "--count-unmaintained", 144 action="store_true", 145 help="Count the number of unmaintained areas") 146 count_parser.set_defaults(cmd_fn=Maintainers._count_cmd) 147 148 args = parser.parse_args() 149 if not hasattr(args, "cmd_fn"): 150 # Called without a subcommand 151 sys.exit(parser.format_usage().rstrip()) 152 153 return args 154 155 156class Maintainers: 157 """ 158 Represents the contents of a maintainers YAML file. 159 160 These attributes are available: 161 162 areas: 163 A dictionary that maps area names to Area instances, for all areas 164 defined in the maintainers file 165 166 filename: 167 The path to the maintainers file 168 """ 169 def __init__(self, filename=None): 170 """ 171 Creates a Maintainers instance. 172 173 filename (default: None): 174 Path to the maintainers file to parse. If None, MAINTAINERS.yml in 175 the top-level directory of the Git repository is used, and must 176 exist. 177 """ 178 if (filename is not None) and (pathlib.Path(filename).exists()): 179 self.filename = pathlib.Path(filename) 180 self._toplevel = self.filename.parent 181 else: 182 self._toplevel = pathlib.Path(_git("rev-parse", "--show-toplevel")) 183 self.filename = self._toplevel / "MAINTAINERS.yml" 184 185 self.areas = {} 186 for area_name, area_dict in _load_maintainers(self.filename).items(): 187 area = Area() 188 area.name = area_name 189 area.status = area_dict.get("status") 190 area.maintainers = area_dict.get("maintainers", []) 191 area.collaborators = area_dict.get("collaborators", []) 192 area.inform = area_dict.get("inform", []) 193 area.labels = area_dict.get("labels", []) 194 area.tests = area_dict.get("tests", []) 195 area.tags = area_dict.get("tags", []) 196 area.description = area_dict.get("description") 197 198 # area._match_fn(path) tests if the path matches files and/or 199 # files-regex 200 area._match_fn = \ 201 _get_match_fn(area_dict.get("files"), 202 area_dict.get("files-regex")) 203 204 # Like area._match_fn(path), but for files-exclude and 205 # files-regex-exclude 206 area._exclude_match_fn = \ 207 _get_match_fn(area_dict.get("files-exclude"), 208 area_dict.get("files-regex-exclude")) 209 210 self.areas[area_name] = area 211 212 def path2areas(self, path): 213 """ 214 Returns a list of Area instances for the areas that contain 'path', 215 taken as relative to the current directory 216 """ 217 # Make directory paths end in '/' so that foo/bar matches foo/bar/. 218 # Skip this check in _contains() itself, because the isdir() makes it 219 # twice as slow in cases where it's not needed. 220 is_dir = os.path.isdir(path) 221 222 # Make 'path' relative to the repository root and normalize it. 223 # normpath() would remove a trailing '/', so we add it afterwards. 224 path = os.path.normpath(os.path.join( 225 os.path.relpath(os.getcwd(), self._toplevel), 226 path)) 227 228 if is_dir: 229 path += "/" 230 231 return [area for area in self.areas.values() 232 if area._contains(path)] 233 234 def commits2areas(self, commits): 235 """ 236 Returns a set() of Area instances for the areas that contain files that 237 are modified by the commit range in 'commits'. 'commits' could be e.g. 238 "HEAD~..", to inspect the tip commit 239 """ 240 res = set() 241 # Final '--' is to make sure 'commits' is interpreted as a commit range 242 # rather than a path. That might give better error messages. 243 for path in _git("diff", "--name-only", commits, "--").splitlines(): 244 res.update(self.path2areas(path)) 245 return res 246 247 def __repr__(self): 248 return "<Maintainers for '{}'>".format(self.filename) 249 250 # 251 # Command-line subcommands 252 # 253 254 def _path_cmd(self, args): 255 # 'path' subcommand implementation 256 257 for path in args.paths: 258 if not os.path.exists(path): 259 _serr("'{}': no such file or directory".format(path)) 260 261 res = set() 262 orphaned = [] 263 for path in args.paths: 264 areas = self.path2areas(path) 265 res.update(areas) 266 if not areas: 267 orphaned.append(path) 268 269 _print_areas(res) 270 if orphaned: 271 if res: 272 print() 273 print("Orphaned paths (not in any area):\n" + "\n".join(orphaned)) 274 275 def _commits_cmd(self, args): 276 # 'commits' subcommand implementation 277 278 commits = args.commits or ("HEAD~..",) 279 _print_areas({area for commit_range in commits 280 for area in self.commits2areas(commit_range)}) 281 282 def _areas_cmd(self, args): 283 # 'areas' subcommand implementation 284 for area in self.areas.values(): 285 if args.maintainer: 286 if args.maintainer in area.maintainers: 287 print("{:25}\t{}".format(area.name, ",".join(area.maintainers))) 288 else: 289 print("{:25}\t{}".format(area.name, ",".join(area.maintainers))) 290 291 def _count_cmd(self, args): 292 # 'count' subcommand implementation 293 294 if not (args.count_areas or args.count_collaborators or args.count_maintainers or args.count_unmaintained): 295 # if no specific count is provided, print them all 296 args.count_areas = True 297 args.count_collaborators = True 298 args.count_maintainers = True 299 args.count_unmaintained = True 300 301 unmaintained = 0 302 collaborators = set() 303 maintainers = set() 304 305 for area in self.areas.values(): 306 if area.status == 'maintained': 307 maintainers = maintainers.union(set(area.maintainers)) 308 elif area.status == 'odd fixes': 309 unmaintained += 1 310 collaborators = collaborators.union(set(area.collaborators)) 311 312 if args.count_areas: 313 print('{:14}\t{}'.format('areas:', len(self.areas))) 314 if args.count_maintainers: 315 print('{:14}\t{}'.format('maintainers:', len(maintainers))) 316 if args.count_collaborators: 317 print('{:14}\t{}'.format('collaborators:', len(collaborators))) 318 if args.count_unmaintained: 319 print('{:14}\t{}'.format('unmaintained:', unmaintained)) 320 321 def _list_cmd(self, args): 322 # 'list' subcommand implementation 323 324 if args.area is None: 325 # List all files that appear in some area 326 for path in _ls_files(): 327 for area in self.areas.values(): 328 if area._contains(path): 329 print(path) 330 break 331 else: 332 # List all files that appear in the given area 333 area = self.areas.get(args.area) 334 if area is None: 335 _serr("'{}': no such area defined in '{}'" 336 .format(args.area, self.filename)) 337 338 for path in _ls_files(): 339 if area._contains(path): 340 print(path) 341 342 def _orphaned_cmd(self, args): 343 # 'orphaned' subcommand implementation 344 345 if args.path is not None and not os.path.exists(args.path): 346 _serr("'{}': no such file or directory".format(args.path)) 347 348 for path in _ls_files(args.path): 349 for area in self.areas.values(): 350 if area._contains(path): 351 break 352 else: 353 print(path) # We get here if we never hit the 'break' 354 355 356class Area: 357 """ 358 Represents an entry for an area in MAINTAINERS.yml. 359 360 These attributes are available: 361 362 status: 363 The status of the area, as a string. None if the area has no 'status' 364 key. See MAINTAINERS.yml. 365 366 maintainers: 367 List of maintainers. Empty if the area has no 'maintainers' key. 368 369 collaborators: 370 List of collaborators. Empty if the area has no 'collaborators' key. 371 372 inform: 373 List of people to inform on pull requests. Empty if the area has no 374 'inform' key. 375 376 labels: 377 List of GitHub labels for the area. Empty if the area has no 'labels' 378 key. 379 380 description: 381 Text from 'description' key, or None if the area has no 'description' 382 key 383 """ 384 def _contains(self, path): 385 # Returns True if the area contains 'path', and False otherwise 386 387 return self._match_fn and self._match_fn(path) and not \ 388 (self._exclude_match_fn and self._exclude_match_fn(path)) 389 390 def __repr__(self): 391 return "<Area {}>".format(self.name) 392 393 394def _print_areas(areas): 395 first = True 396 for area in sorted(areas, key=operator.attrgetter("name")): 397 if not first: 398 print() 399 first = False 400 401 print("""\ 402{} 403\tstatus: {} 404\tmaintainers: {} 405\tcollaborators: {} 406\tinform: {} 407\tlabels: {} 408\ttests: {} 409\ttags: {} 410\tdescription: {}""".format(area.name, 411 area.status, 412 ", ".join(area.maintainers), 413 ", ".join(area.collaborators), 414 ", ".join(area.inform), 415 ", ".join(area.labels), 416 ", ".join(area.tests), 417 ", ".join(area.tags), 418 area.description or "")) 419 420 421def _get_match_fn(globs, regexes): 422 # Constructs a single regex that tests for matches against the globs in 423 # 'globs' and the regexes in 'regexes'. Parts are joined with '|' (OR). 424 # Returns the search() method of the compiled regex. 425 # 426 # Returns None if there are neither globs nor regexes, which should be 427 # interpreted as no match. 428 429 if not (globs or regexes): 430 return None 431 432 regex = "" 433 434 if globs: 435 glob_regexes = [] 436 for glob in globs: 437 # Construct a regex equivalent to the glob 438 glob_regex = glob.replace(".", "\\.").replace("*", "[^/]*") \ 439 .replace("?", "[^/]") 440 441 if not glob.endswith("/"): 442 # Require a full match for globs that don't end in / 443 glob_regex += "$" 444 445 glob_regexes.append(glob_regex) 446 447 # The glob regexes must anchor to the beginning of the path, since we 448 # return search(). (?:) is a non-capturing group. 449 regex += "^(?:{})".format("|".join(glob_regexes)) 450 451 if regexes: 452 if regex: 453 regex += "|" 454 regex += "|".join(regexes) 455 456 return re.compile(regex).search 457 458 459def _load_maintainers(path): 460 # Returns the parsed contents of the maintainers file 'filename', also 461 # running checks on the contents. The returned format is plain Python 462 # dicts/lists/etc., mirroring the structure of the file. 463 464 with open(path, encoding="utf-8") as f: 465 try: 466 yaml = load(f, Loader=SafeLoader) 467 except YAMLError as e: 468 raise MaintainersError("{}: YAML error: {}".format(path, e)) 469 470 _check_maintainers(path, yaml) 471 return yaml 472 473 474def _check_maintainers(maints_path, yaml): 475 # Checks the maintainers data in 'yaml', which comes from the maintainers 476 # file at maints_path, which is a pathlib.Path instance 477 478 root = maints_path.parent 479 480 def ferr(msg): 481 _err("{}: {}".format(maints_path, msg)) # Prepend the filename 482 483 if not isinstance(yaml, dict): 484 ferr("empty or malformed YAML (not a dict)") 485 486 ok_keys = {"status", "maintainers", "collaborators", "inform", "files", 487 "files-exclude", "files-regex", "files-regex-exclude", 488 "labels", "description", "tests", "tags"} 489 490 ok_status = {"maintained", "odd fixes", "unmaintained", "obsolete"} 491 ok_status_s = ", ".join('"' + s + '"' for s in ok_status) # For messages 492 493 for area_name, area_dict in yaml.items(): 494 if not isinstance(area_dict, dict): 495 ferr("malformed entry for area '{}' (not a dict)" 496 .format(area_name)) 497 498 for key in area_dict: 499 if key not in ok_keys: 500 ferr("unknown key '{}' in area '{}'" 501 .format(key, area_name)) 502 503 if "status" in area_dict and \ 504 area_dict["status"] not in ok_status: 505 ferr("bad 'status' key on area '{}', should be one of {}" 506 .format(area_name, ok_status_s)) 507 508 if not area_dict.keys() & {"files", "files-regex"}: 509 ferr("either 'files' or 'files-regex' (or both) must be specified " 510 "for area '{}'".format(area_name)) 511 512 if not area_dict.get("maintainers") and area_dict.get("status") == "maintained": 513 ferr("maintained area '{}' with no maintainers".format(area_name)) 514 515 for list_name in "maintainers", "collaborators", "inform", "files", \ 516 "files-regex", "labels", "tags", "tests": 517 if list_name in area_dict: 518 lst = area_dict[list_name] 519 if not (isinstance(lst, list) and 520 all(isinstance(elm, str) for elm in lst)): 521 ferr("malformed '{}' value for area '{}' -- should " 522 "be a list of strings".format(list_name, area_name)) 523 524 for files_key in "files", "files-exclude": 525 if files_key in area_dict: 526 for glob_pattern in area_dict[files_key]: 527 # This could be changed if it turns out to be too slow, 528 # e.g. to only check non-globbing filenames. The tuple() is 529 # needed due to pathlib's glob() returning a generator. 530 paths = tuple(root.glob(glob_pattern)) 531 if not paths: 532 ferr("glob pattern '{}' in '{}' in area '{}' does not " 533 "match any files".format(glob_pattern, files_key, 534 area_name)) 535 if not glob_pattern.endswith("/"): 536 if all(path.is_dir() for path in paths): 537 ferr("glob pattern '{}' in '{}' in area '{}' " 538 "matches only directories, but has no " 539 "trailing '/'" 540 .format(glob_pattern, files_key, 541 area_name)) 542 543 for files_regex_key in "files-regex", "files-regex-exclude": 544 if files_regex_key in area_dict: 545 for regex in area_dict[files_regex_key]: 546 try: 547 re.compile(regex) 548 except re.error as e: 549 ferr("bad regular expression '{}' in '{}' in " 550 "'{}': {}".format(regex, files_regex_key, 551 area_name, e.msg)) 552 553 if "description" in area_dict and \ 554 not isinstance(area_dict["description"], str): 555 ferr("malformed 'description' value for area '{}' -- should be a " 556 "string".format(area_name)) 557 558 559def _git(*args): 560 # Helper for running a Git command. Returns the rstrip()ed stdout output. 561 # Called like git("diff"). Exits with SystemError (raised by sys.exit()) on 562 # errors. 563 564 git_cmd = ("git",) + args 565 git_cmd_s = " ".join(shlex.quote(word) for word in git_cmd) # For errors 566 567 try: 568 git_process = subprocess.Popen( 569 git_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) 570 except FileNotFoundError: 571 _giterr("git executable not found (when running '{}'). Check that " 572 "it's in listed in the PATH environment variable" 573 .format(git_cmd_s)) 574 except OSError as e: 575 _giterr("error running '{}': {}".format(git_cmd_s, e)) 576 577 stdout, stderr = git_process.communicate() 578 if git_process.returncode: 579 _giterr("error running '{}'\n\nstdout:\n{}\nstderr:\n{}".format( 580 git_cmd_s, stdout.decode("utf-8"), stderr.decode("utf-8"))) 581 582 return stdout.decode("utf-8").rstrip() 583 584 585def _ls_files(path=None): 586 cmd = ["ls-files"] 587 if path is not None: 588 cmd.append(path) 589 return _git(*cmd).splitlines() 590 591 592def _err(msg): 593 raise MaintainersError(msg) 594 595 596def _giterr(msg): 597 raise GitError(msg) 598 599 600def _serr(msg): 601 # For reporting errors when get_maintainer.py is run as a script. 602 # sys.exit() shouldn't be used otherwise. 603 sys.exit("{}: error: {}".format(sys.argv[0], msg)) 604 605 606class MaintainersError(Exception): 607 "Exception raised for MAINTAINERS.yml-related errors" 608 609 610class GitError(Exception): 611 "Exception raised for Git-related errors" 612 613 614if __name__ == "__main__": 615 _main() 616