1#!/usr/bin/env python3 2 3# Copyright (c) 2019 Nordic Semiconductor ASA 4# SPDX-License-Identifier: Apache-2.0 5 6""" 7Lists maintainers for files or commits. Similar in function to 8scripts/get_maintainer.pl from Linux, but geared towards GitHub. The mapping is 9in MAINTAINERS.yml. 10 11The comment at the top of MAINTAINERS.yml in Zephyr documents the file format. 12 13See the help texts for the various subcommands for more information. They can 14be viewed with e.g. 15 16 ./get_maintainer.py path --help 17 18This executable doubles as a Python library. Identifiers not prefixed with '_' 19are part of the library API. The library documentation can be viewed with this 20command: 21 22 $ pydoc get_maintainer 23""" 24 25import argparse 26import operator 27import os 28import pathlib 29import re 30import shlex 31import subprocess 32import sys 33 34from yaml import load, YAMLError 35try: 36 # Use the speedier C LibYAML parser if available 37 from yaml import CSafeLoader as SafeLoader 38except ImportError: 39 from yaml import SafeLoader 40 41 42def _main(): 43 # Entry point when run as an executable 44 45 args = _parse_args() 46 try: 47 args.cmd_fn(Maintainers(args.maintainers), args) 48 except (MaintainersError, GitError) as e: 49 _serr(e) 50 51 52def _parse_args(): 53 # Parses arguments when run as an executable 54 55 parser = argparse.ArgumentParser( 56 formatter_class=argparse.RawDescriptionHelpFormatter, 57 description=__doc__, allow_abbrev=False) 58 59 parser.add_argument( 60 "-m", "--maintainers", 61 metavar="MAINTAINERS_FILE", 62 help="Maintainers file to load. If not specified, MAINTAINERS.yml in " 63 "the top-level repository directory is used, and must exist. " 64 "Paths in the maintainers file will always be taken as relative " 65 "to the top-level directory.") 66 67 subparsers = parser.add_subparsers( 68 help="Available commands (each has a separate --help text)") 69 70 id_parser = subparsers.add_parser( 71 "path", 72 help="List area(s) for paths") 73 id_parser.add_argument( 74 "paths", 75 metavar="PATH", 76 nargs="*", 77 help="Path to list areas for") 78 id_parser.set_defaults(cmd_fn=Maintainers._path_cmd) 79 80 commits_parser = subparsers.add_parser( 81 "commits", 82 help="List area(s) for commit range") 83 commits_parser.add_argument( 84 "commits", 85 metavar="COMMIT_RANGE", 86 nargs="*", 87 help="Commit range to list areas for (default: HEAD~..)") 88 commits_parser.set_defaults(cmd_fn=Maintainers._commits_cmd) 89 90 list_parser = subparsers.add_parser( 91 "list", 92 help="List files in areas") 93 list_parser.add_argument( 94 "area", 95 metavar="AREA", 96 nargs="?", 97 help="Name of area to list files in. If not specified, all " 98 "non-orphaned files are listed (all files that do not appear in " 99 "any area).") 100 list_parser.set_defaults(cmd_fn=Maintainers._list_cmd) 101 102 areas_parser = subparsers.add_parser( 103 "areas", 104 help="List areas and maintainers") 105 areas_parser.add_argument( 106 "maintainer", 107 metavar="MAINTAINER", 108 nargs="?", 109 help="List all areas maintained by maintainer.") 110 111 areas_parser.set_defaults(cmd_fn=Maintainers._areas_cmd) 112 113 orphaned_parser = subparsers.add_parser( 114 "orphaned", 115 help="List orphaned files (files that do not appear in any area)") 116 orphaned_parser.add_argument( 117 "path", 118 metavar="PATH", 119 nargs="?", 120 help="Limit to files under PATH") 121 orphaned_parser.set_defaults(cmd_fn=Maintainers._orphaned_cmd) 122 123 count_parser = subparsers.add_parser( 124 "count", 125 help="Count areas, unique maintainers, and / or unique collaborators") 126 count_parser.add_argument( 127 "-a", 128 "--count-areas", 129 action="store_true", 130 help="Count the number of areas") 131 count_parser.add_argument( 132 "-c", 133 "--count-collaborators", 134 action="store_true", 135 help="Count the number of unique collaborators") 136 count_parser.add_argument( 137 "-n", 138 "--count-maintainers", 139 action="store_true", 140 help="Count the number of unique maintainers") 141 count_parser.add_argument( 142 "-o", 143 "--count-unmaintained", 144 action="store_true", 145 help="Count the number of unmaintained areas") 146 count_parser.set_defaults(cmd_fn=Maintainers._count_cmd) 147 148 args = parser.parse_args() 149 if not hasattr(args, "cmd_fn"): 150 # Called without a subcommand 151 sys.exit(parser.format_usage().rstrip()) 152 153 return args 154 155 156class Maintainers: 157 """ 158 Represents the contents of a maintainers YAML file. 159 160 These attributes are available: 161 162 areas: 163 A dictionary that maps area names to Area instances, for all areas 164 defined in the maintainers file 165 166 filename: 167 The path to the maintainers file 168 """ 169 def __init__(self, filename=None): 170 """ 171 Creates a Maintainers instance. 172 173 filename (default: None): 174 Path to the maintainers file to parse. If None, MAINTAINERS.yml in 175 the top-level directory of the Git repository is used, and must 176 exist. 177 """ 178 self._toplevel = pathlib.Path(_git("rev-parse", "--show-toplevel")) 179 180 if filename is None: 181 self.filename = self._toplevel / "MAINTAINERS.yml" 182 else: 183 self.filename = pathlib.Path(filename) 184 185 self.areas = {} 186 for area_name, area_dict in _load_maintainers(self.filename).items(): 187 area = Area() 188 area.name = area_name 189 area.status = area_dict.get("status") 190 area.maintainers = area_dict.get("maintainers", []) 191 area.collaborators = area_dict.get("collaborators", []) 192 area.inform = area_dict.get("inform", []) 193 area.labels = area_dict.get("labels", []) 194 area.description = area_dict.get("description") 195 196 # area._match_fn(path) tests if the path matches files and/or 197 # files-regex 198 area._match_fn = \ 199 _get_match_fn(area_dict.get("files"), 200 area_dict.get("files-regex")) 201 202 # Like area._match_fn(path), but for files-exclude and 203 # files-regex-exclude 204 area._exclude_match_fn = \ 205 _get_match_fn(area_dict.get("files-exclude"), 206 area_dict.get("files-regex-exclude")) 207 208 self.areas[area_name] = area 209 210 def path2areas(self, path): 211 """ 212 Returns a list of Area instances for the areas that contain 'path', 213 taken as relative to the current directory 214 """ 215 # Make directory paths end in '/' so that foo/bar matches foo/bar/. 216 # Skip this check in _contains() itself, because the isdir() makes it 217 # twice as slow in cases where it's not needed. 218 is_dir = os.path.isdir(path) 219 220 # Make 'path' relative to the repository root and normalize it. 221 # normpath() would remove a trailing '/', so we add it afterwards. 222 path = os.path.normpath(os.path.join( 223 os.path.relpath(os.getcwd(), self._toplevel), 224 path)) 225 226 if is_dir: 227 path += "/" 228 229 return [area for area in self.areas.values() 230 if area._contains(path)] 231 232 def commits2areas(self, commits): 233 """ 234 Returns a set() of Area instances for the areas that contain files that 235 are modified by the commit range in 'commits'. 'commits' could be e.g. 236 "HEAD~..", to inspect the tip commit 237 """ 238 res = set() 239 # Final '--' is to make sure 'commits' is interpreted as a commit range 240 # rather than a path. That might give better error messages. 241 for path in _git("diff", "--name-only", commits, "--").splitlines(): 242 res.update(self.path2areas(path)) 243 return res 244 245 def __repr__(self): 246 return "<Maintainers for '{}'>".format(self.filename) 247 248 # 249 # Command-line subcommands 250 # 251 252 def _path_cmd(self, args): 253 # 'path' subcommand implementation 254 255 for path in args.paths: 256 if not os.path.exists(path): 257 _serr("'{}': no such file or directory".format(path)) 258 259 res = set() 260 orphaned = [] 261 for path in args.paths: 262 areas = self.path2areas(path) 263 res.update(areas) 264 if not areas: 265 orphaned.append(path) 266 267 _print_areas(res) 268 if orphaned: 269 if res: 270 print() 271 print("Orphaned paths (not in any area):\n" + "\n".join(orphaned)) 272 273 def _commits_cmd(self, args): 274 # 'commits' subcommand implementation 275 276 commits = args.commits or ("HEAD~..",) 277 _print_areas({area for commit_range in commits 278 for area in self.commits2areas(commit_range)}) 279 280 def _areas_cmd(self, args): 281 # 'areas' subcommand implementation 282 for area in self.areas.values(): 283 if args.maintainer: 284 if args.maintainer in area.maintainers: 285 print("{:25}\t{}".format(area.name, ",".join(area.maintainers))) 286 else: 287 print("{:25}\t{}".format(area.name, ",".join(area.maintainers))) 288 289 def _count_cmd(self, args): 290 # 'count' subcommand implementation 291 292 if not (args.count_areas or args.count_collaborators or args.count_maintainers or args.count_unmaintained): 293 # if no specific count is provided, print them all 294 args.count_areas = True 295 args.count_collaborators = True 296 args.count_maintainers = True 297 args.count_unmaintained = True 298 299 unmaintained = 0 300 collaborators = set() 301 maintainers = set() 302 303 for area in self.areas.values(): 304 if area.status == 'maintained': 305 maintainers = maintainers.union(set(area.maintainers)) 306 elif area.status == 'odd fixes': 307 unmaintained += 1 308 collaborators = collaborators.union(set(area.collaborators)) 309 310 if args.count_areas: 311 print('{:14}\t{}'.format('areas:', len(self.areas))) 312 if args.count_maintainers: 313 print('{:14}\t{}'.format('maintainers:', len(maintainers))) 314 if args.count_collaborators: 315 print('{:14}\t{}'.format('collaborators:', len(collaborators))) 316 if args.count_unmaintained: 317 print('{:14}\t{}'.format('unmaintained:', unmaintained)) 318 319 def _list_cmd(self, args): 320 # 'list' subcommand implementation 321 322 if args.area is None: 323 # List all files that appear in some area 324 for path in _ls_files(): 325 for area in self.areas.values(): 326 if area._contains(path): 327 print(path) 328 break 329 else: 330 # List all files that appear in the given area 331 area = self.areas.get(args.area) 332 if area is None: 333 _serr("'{}': no such area defined in '{}'" 334 .format(args.area, self.filename)) 335 336 for path in _ls_files(): 337 if area._contains(path): 338 print(path) 339 340 def _orphaned_cmd(self, args): 341 # 'orphaned' subcommand implementation 342 343 if args.path is not None and not os.path.exists(args.path): 344 _serr("'{}': no such file or directory".format(args.path)) 345 346 for path in _ls_files(args.path): 347 for area in self.areas.values(): 348 if area._contains(path): 349 break 350 else: 351 print(path) # We get here if we never hit the 'break' 352 353 354class Area: 355 """ 356 Represents an entry for an area in MAINTAINERS.yml. 357 358 These attributes are available: 359 360 status: 361 The status of the area, as a string. None if the area has no 'status' 362 key. See MAINTAINERS.yml. 363 364 maintainers: 365 List of maintainers. Empty if the area has no 'maintainers' key. 366 367 collaborators: 368 List of collaborators. Empty if the area has no 'collaborators' key. 369 370 inform: 371 List of people to inform on pull requests. Empty if the area has no 372 'inform' key. 373 374 labels: 375 List of GitHub labels for the area. Empty if the area has no 'labels' 376 key. 377 378 description: 379 Text from 'description' key, or None if the area has no 'description' 380 key 381 """ 382 def _contains(self, path): 383 # Returns True if the area contains 'path', and False otherwise 384 385 return self._match_fn and self._match_fn(path) and not \ 386 (self._exclude_match_fn and self._exclude_match_fn(path)) 387 388 def __repr__(self): 389 return "<Area {}>".format(self.name) 390 391 392def _print_areas(areas): 393 first = True 394 for area in sorted(areas, key=operator.attrgetter("name")): 395 if not first: 396 print() 397 first = False 398 399 print("""\ 400{} 401\tstatus: {} 402\tmaintainers: {} 403\tcollaborators: {} 404\tinform: {} 405\tlabels: {} 406\tdescription: {}""".format(area.name, 407 area.status, 408 ", ".join(area.maintainers), 409 ", ".join(area.collaborators), 410 ", ".join(area.inform), 411 ", ".join(area.labels), 412 area.description or "")) 413 414 415def _get_match_fn(globs, regexes): 416 # Constructs a single regex that tests for matches against the globs in 417 # 'globs' and the regexes in 'regexes'. Parts are joined with '|' (OR). 418 # Returns the search() method of the compiled regex. 419 # 420 # Returns None if there are neither globs nor regexes, which should be 421 # interpreted as no match. 422 423 if not (globs or regexes): 424 return None 425 426 regex = "" 427 428 if globs: 429 glob_regexes = [] 430 for glob in globs: 431 # Construct a regex equivalent to the glob 432 glob_regex = glob.replace(".", "\\.").replace("*", "[^/]*") \ 433 .replace("?", "[^/]") 434 435 if not glob.endswith("/"): 436 # Require a full match for globs that don't end in / 437 glob_regex += "$" 438 439 glob_regexes.append(glob_regex) 440 441 # The glob regexes must anchor to the beginning of the path, since we 442 # return search(). (?:) is a non-capturing group. 443 regex += "^(?:{})".format("|".join(glob_regexes)) 444 445 if regexes: 446 if regex: 447 regex += "|" 448 regex += "|".join(regexes) 449 450 return re.compile(regex).search 451 452 453def _load_maintainers(path): 454 # Returns the parsed contents of the maintainers file 'filename', also 455 # running checks on the contents. The returned format is plain Python 456 # dicts/lists/etc., mirroring the structure of the file. 457 458 with open(path, encoding="utf-8") as f: 459 try: 460 yaml = load(f, Loader=SafeLoader) 461 except YAMLError as e: 462 raise MaintainersError("{}: YAML error: {}".format(path, e)) 463 464 _check_maintainers(path, yaml) 465 return yaml 466 467 468def _check_maintainers(maints_path, yaml): 469 # Checks the maintainers data in 'yaml', which comes from the maintainers 470 # file at maints_path, which is a pathlib.Path instance 471 472 root = maints_path.parent 473 474 def ferr(msg): 475 _err("{}: {}".format(maints_path, msg)) # Prepend the filename 476 477 if not isinstance(yaml, dict): 478 ferr("empty or malformed YAML (not a dict)") 479 480 ok_keys = {"status", "maintainers", "collaborators", "inform", "files", 481 "files-exclude", "files-regex", "files-regex-exclude", 482 "labels", "description"} 483 484 ok_status = {"maintained", "odd fixes", "unmaintained", "obsolete"} 485 ok_status_s = ", ".join('"' + s + '"' for s in ok_status) # For messages 486 487 for area_name, area_dict in yaml.items(): 488 if not isinstance(area_dict, dict): 489 ferr("malformed entry for area '{}' (not a dict)" 490 .format(area_name)) 491 492 for key in area_dict: 493 if key not in ok_keys: 494 ferr("unknown key '{}' in area '{}'" 495 .format(key, area_name)) 496 497 if "status" in area_dict and \ 498 area_dict["status"] not in ok_status: 499 ferr("bad 'status' key on area '{}', should be one of {}" 500 .format(area_name, ok_status_s)) 501 502 if not area_dict.keys() & {"files", "files-regex"}: 503 ferr("either 'files' or 'files-regex' (or both) must be specified " 504 "for area '{}'".format(area_name)) 505 506 for list_name in "maintainers", "collaborators", "inform", "files", \ 507 "files-regex", "labels": 508 if list_name in area_dict: 509 lst = area_dict[list_name] 510 if not (isinstance(lst, list) and 511 all(isinstance(elm, str) for elm in lst)): 512 ferr("malformed '{}' value for area '{}' -- should " 513 "be a list of strings".format(list_name, area_name)) 514 515 for files_key in "files", "files-exclude": 516 if files_key in area_dict: 517 for glob_pattern in area_dict[files_key]: 518 # This could be changed if it turns out to be too slow, 519 # e.g. to only check non-globbing filenames. The tuple() is 520 # needed due to pathlib's glob() returning a generator. 521 paths = tuple(root.glob(glob_pattern)) 522 if not paths: 523 ferr("glob pattern '{}' in '{}' in area '{}' does not " 524 "match any files".format(glob_pattern, files_key, 525 area_name)) 526 if not glob_pattern.endswith("/"): 527 if all(path.is_dir() for path in paths): 528 ferr("glob pattern '{}' in '{}' in area '{}' " 529 "matches only directories, but has no " 530 "trailing '/'" 531 .format(glob_pattern, files_key, 532 area_name)) 533 534 for files_regex_key in "files-regex", "files-regex-exclude": 535 if files_regex_key in area_dict: 536 for regex in area_dict[files_regex_key]: 537 try: 538 re.compile(regex) 539 except re.error as e: 540 ferr("bad regular expression '{}' in '{}' in " 541 "'{}': {}".format(regex, files_regex_key, 542 area_name, e.msg)) 543 544 if "description" in area_dict and \ 545 not isinstance(area_dict["description"], str): 546 ferr("malformed 'description' value for area '{}' -- should be a " 547 "string".format(area_name)) 548 549 550def _git(*args): 551 # Helper for running a Git command. Returns the rstrip()ed stdout output. 552 # Called like git("diff"). Exits with SystemError (raised by sys.exit()) on 553 # errors. 554 555 git_cmd = ("git",) + args 556 git_cmd_s = " ".join(shlex.quote(word) for word in git_cmd) # For errors 557 558 try: 559 git_process = subprocess.Popen( 560 git_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) 561 except FileNotFoundError: 562 _giterr("git executable not found (when running '{}'). Check that " 563 "it's in listed in the PATH environment variable" 564 .format(git_cmd_s)) 565 except OSError as e: 566 _giterr("error running '{}': {}".format(git_cmd_s, e)) 567 568 stdout, stderr = git_process.communicate() 569 if git_process.returncode: 570 _giterr("error running '{}'\n\nstdout:\n{}\nstderr:\n{}".format( 571 git_cmd_s, stdout.decode("utf-8"), stderr.decode("utf-8"))) 572 573 return stdout.decode("utf-8").rstrip() 574 575 576def _ls_files(path=None): 577 cmd = ["ls-files"] 578 if path is not None: 579 cmd.append(path) 580 return _git(*cmd).splitlines() 581 582 583def _err(msg): 584 raise MaintainersError(msg) 585 586 587def _giterr(msg): 588 raise GitError(msg) 589 590 591def _serr(msg): 592 # For reporting errors when get_maintainer.py is run as a script. 593 # sys.exit() shouldn't be used otherwise. 594 sys.exit("{}: error: {}".format(sys.argv[0], msg)) 595 596 597class MaintainersError(Exception): 598 "Exception raised for MAINTAINERS.yml-related errors" 599 600 601class GitError(Exception): 602 "Exception raised for Git-related errors" 603 604 605if __name__ == "__main__": 606 _main() 607