1#!/usr/bin/env python3 2 3# Copyright (c) 2019 Nordic Semiconductor ASA 4# SPDX-License-Identifier: Apache-2.0 5 6""" 7Lists maintainers for files or commits. Similar in function to 8scripts/get_maintainer.pl from Linux, but geared towards GitHub. The mapping is 9in MAINTAINERS.yml. 10 11The comment at the top of MAINTAINERS.yml in Zephyr documents the file format. 12 13See the help texts for the various subcommands for more information. They can 14be viewed with e.g. 15 16 ./get_maintainer.py path --help 17 18This executable doubles as a Python library. Identifiers not prefixed with '_' 19are part of the library API. The library documentation can be viewed with this 20command: 21 22 $ pydoc get_maintainer 23""" 24 25import argparse 26import operator 27import os 28import pathlib 29import re 30import shlex 31import subprocess 32import sys 33 34from yaml import load, YAMLError 35try: 36 # Use the speedier C LibYAML parser if available 37 from yaml import CLoader as Loader 38except ImportError: 39 from yaml import Loader 40 41 42def _main(): 43 # Entry point when run as an executable 44 45 args = _parse_args() 46 try: 47 args.cmd_fn(Maintainers(args.maintainers), args) 48 except (MaintainersError, GitError) as e: 49 _serr(e) 50 51 52def _parse_args(): 53 # Parses arguments when run as an executable 54 55 parser = argparse.ArgumentParser( 56 formatter_class=argparse.RawDescriptionHelpFormatter, 57 description=__doc__) 58 59 parser.add_argument( 60 "-m", "--maintainers", 61 metavar="MAINTAINERS_FILE", 62 help="Maintainers file to load. If not specified, MAINTAINERS.yml in " 63 "the top-level repository directory is used, and must exist. " 64 "Paths in the maintainers file will always be taken as relative " 65 "to the top-level directory.") 66 67 subparsers = parser.add_subparsers( 68 help="Available commands (each has a separate --help text)") 69 70 id_parser = subparsers.add_parser( 71 "path", 72 help="List area(s) for paths") 73 id_parser.add_argument( 74 "paths", 75 metavar="PATH", 76 nargs="*", 77 help="Path to list areas for") 78 id_parser.set_defaults(cmd_fn=Maintainers._path_cmd) 79 80 commits_parser = subparsers.add_parser( 81 "commits", 82 help="List area(s) for commit range") 83 commits_parser.add_argument( 84 "commits", 85 metavar="COMMIT_RANGE", 86 nargs="*", 87 help="Commit range to list areas for (default: HEAD~..)") 88 commits_parser.set_defaults(cmd_fn=Maintainers._commits_cmd) 89 90 list_parser = subparsers.add_parser( 91 "list", 92 help="List files in areas") 93 list_parser.add_argument( 94 "area", 95 metavar="AREA", 96 nargs="?", 97 help="Name of area to list files in. If not specified, all " 98 "non-orphaned files are listed (all files that do not appear in " 99 "any area).") 100 list_parser.set_defaults(cmd_fn=Maintainers._list_cmd) 101 102 areas_parser = subparsers.add_parser( 103 "areas", 104 help="List areas and maintainers") 105 areas_parser.add_argument( 106 "maintainer", 107 metavar="MAINTAINER", 108 nargs="?", 109 help="List all areas maintained by maintaier.") 110 111 areas_parser.set_defaults(cmd_fn=Maintainers._areas_cmd) 112 113 orphaned_parser = subparsers.add_parser( 114 "orphaned", 115 help="List orphaned files (files that do not appear in any area)") 116 orphaned_parser.add_argument( 117 "path", 118 metavar="PATH", 119 nargs="?", 120 help="Limit to files under PATH") 121 orphaned_parser.set_defaults(cmd_fn=Maintainers._orphaned_cmd) 122 123 args = parser.parse_args() 124 if not hasattr(args, "cmd_fn"): 125 # Called without a subcommand 126 sys.exit(parser.format_usage().rstrip()) 127 128 return args 129 130 131class Maintainers: 132 """ 133 Represents the contents of a maintainers YAML file. 134 135 These attributes are available: 136 137 areas: 138 A dictionary that maps area names to Area instances, for all areas 139 defined in the maintainers file 140 141 filename: 142 The path to the maintainers file 143 """ 144 def __init__(self, filename=None): 145 """ 146 Creates a Maintainers instance. 147 148 filename (default: None): 149 Path to the maintainers file to parse. If None, MAINTAINERS.yml in 150 the top-level directory of the Git repository is used, and must 151 exist. 152 """ 153 self._toplevel = pathlib.Path(_git("rev-parse", "--show-toplevel")) 154 155 if filename is None: 156 self.filename = self._toplevel / "MAINTAINERS.yml" 157 else: 158 self.filename = pathlib.Path(filename) 159 160 self.areas = {} 161 for area_name, area_dict in _load_maintainers(self.filename).items(): 162 area = Area() 163 area.name = area_name 164 area.status = area_dict.get("status") 165 area.maintainers = area_dict.get("maintainers", []) 166 area.collaborators = area_dict.get("collaborators", []) 167 area.inform = area_dict.get("inform", []) 168 area.labels = area_dict.get("labels", []) 169 area.description = area_dict.get("description") 170 171 # area._match_fn(path) tests if the path matches files and/or 172 # files-regex 173 area._match_fn = \ 174 _get_match_fn(area_dict.get("files"), 175 area_dict.get("files-regex")) 176 177 # Like area._match_fn(path), but for files-exclude and 178 # files-regex-exclude 179 area._exclude_match_fn = \ 180 _get_match_fn(area_dict.get("files-exclude"), 181 area_dict.get("files-regex-exclude")) 182 183 self.areas[area_name] = area 184 185 def path2areas(self, path): 186 """ 187 Returns a list of Area instances for the areas that contain 'path', 188 taken as relative to the current directory 189 """ 190 # Make directory paths end in '/' so that foo/bar matches foo/bar/. 191 # Skip this check in _contains() itself, because the isdir() makes it 192 # twice as slow in cases where it's not needed. 193 is_dir = os.path.isdir(path) 194 195 # Make 'path' relative to the repository root and normalize it. 196 # normpath() would remove a trailing '/', so we add it afterwards. 197 path = os.path.normpath(os.path.join( 198 os.path.relpath(os.getcwd(), self._toplevel), 199 path)) 200 201 if is_dir: 202 path += "/" 203 204 return [area for area in self.areas.values() 205 if area._contains(path)] 206 207 def commits2areas(self, commits): 208 """ 209 Returns a set() of Area instances for the areas that contain files that 210 are modified by the commit range in 'commits'. 'commits' could be e.g. 211 "HEAD~..", to inspect the tip commit 212 """ 213 res = set() 214 # Final '--' is to make sure 'commits' is interpreted as a commit range 215 # rather than a path. That might give better error messages. 216 for path in _git("diff", "--name-only", commits, "--").splitlines(): 217 res.update(self.path2areas(path)) 218 return res 219 220 def __repr__(self): 221 return "<Maintainers for '{}'>".format(self.filename) 222 223 # 224 # Command-line subcommands 225 # 226 227 def _path_cmd(self, args): 228 # 'path' subcommand implementation 229 230 for path in args.paths: 231 if not os.path.exists(path): 232 _serr("'{}': no such file or directory".format(path)) 233 234 res = set() 235 orphaned = [] 236 for path in args.paths: 237 areas = self.path2areas(path) 238 res.update(areas) 239 if not areas: 240 orphaned.append(path) 241 242 _print_areas(res) 243 if orphaned: 244 if res: 245 print() 246 print("Orphaned paths (not in any area):\n" + "\n".join(orphaned)) 247 248 def _commits_cmd(self, args): 249 # 'commits' subcommand implementation 250 251 commits = args.commits or ("HEAD~..",) 252 _print_areas({area for commit_range in commits 253 for area in self.commits2areas(commit_range)}) 254 255 def _areas_cmd(self, args): 256 # 'areas' subcommand implementation 257 for area in self.areas.values(): 258 if args.maintainer: 259 if args.maintainer in area.maintainers: 260 print("{:25}\t{}".format(area.name, ",".join(area.maintainers))) 261 else: 262 print("{:25}\t{}".format(area.name, ",".join(area.maintainers))) 263 264 def _list_cmd(self, args): 265 # 'list' subcommand implementation 266 267 if args.area is None: 268 # List all files that appear in some area 269 for path in _ls_files(): 270 for area in self.areas.values(): 271 if area._contains(path): 272 print(path) 273 break 274 else: 275 # List all files that appear in the given area 276 area = self.areas.get(args.area) 277 if area is None: 278 _serr("'{}': no such area defined in '{}'" 279 .format(args.area, self.filename)) 280 281 for path in _ls_files(): 282 if area._contains(path): 283 print(path) 284 285 def _orphaned_cmd(self, args): 286 # 'orphaned' subcommand implementation 287 288 if args.path is not None and not os.path.exists(args.path): 289 _serr("'{}': no such file or directory".format(args.path)) 290 291 for path in _ls_files(args.path): 292 for area in self.areas.values(): 293 if area._contains(path): 294 break 295 else: 296 print(path) # We get here if we never hit the 'break' 297 298 299class Area: 300 """ 301 Represents an entry for an area in MAINTAINERS.yml. 302 303 These attributes are available: 304 305 status: 306 The status of the area, as a string. None if the area has no 'status' 307 key. See MAINTAINERS.yml. 308 309 maintainers: 310 List of maintainers. Empty if the area has no 'maintainers' key. 311 312 collaborators: 313 List of collaborators. Empty if the area has no 'collaborators' key. 314 315 inform: 316 List of people to inform on pull requests. Empty if the area has no 317 'inform' key. 318 319 labels: 320 List of GitHub labels for the area. Empty if the area has no 'labels' 321 key. 322 323 description: 324 Text from 'description' key, or None if the area has no 'description' 325 key 326 """ 327 def _contains(self, path): 328 # Returns True if the area contains 'path', and False otherwise 329 330 return self._match_fn and self._match_fn(path) and not \ 331 (self._exclude_match_fn and self._exclude_match_fn(path)) 332 333 def __repr__(self): 334 return "<Area {}>".format(self.name) 335 336 337def _print_areas(areas): 338 first = True 339 for area in sorted(areas, key=operator.attrgetter("name")): 340 if not first: 341 print() 342 first = False 343 344 print("""\ 345{} 346\tstatus: {} 347\tmaintainers: {} 348\tcollaborators: {} 349\tinform: {} 350\tlabels: {} 351\tdescription: {}""".format(area.name, 352 area.status, 353 ", ".join(area.maintainers), 354 ", ".join(area.collaborators), 355 ", ".join(area.inform), 356 ", ".join(area.labels), 357 area.description or "")) 358 359 360def _get_match_fn(globs, regexes): 361 # Constructs a single regex that tests for matches against the globs in 362 # 'globs' and the regexes in 'regexes'. Parts are joined with '|' (OR). 363 # Returns the search() method of the compiled regex. 364 # 365 # Returns None if there are neither globs nor regexes, which should be 366 # interpreted as no match. 367 368 if not (globs or regexes): 369 return None 370 371 regex = "" 372 373 if globs: 374 glob_regexes = [] 375 for glob in globs: 376 # Construct a regex equivalent to the glob 377 glob_regex = glob.replace(".", "\\.").replace("*", "[^/]*") \ 378 .replace("?", "[^/]") 379 380 if not glob.endswith("/"): 381 # Require a full match for globs that don't end in / 382 glob_regex += "$" 383 384 glob_regexes.append(glob_regex) 385 386 # The glob regexes must anchor to the beginning of the path, since we 387 # return search(). (?:) is a non-capturing group. 388 regex += "^(?:{})".format("|".join(glob_regexes)) 389 390 if regexes: 391 if regex: 392 regex += "|" 393 regex += "|".join(regexes) 394 395 return re.compile(regex).search 396 397 398def _load_maintainers(path): 399 # Returns the parsed contents of the maintainers file 'filename', also 400 # running checks on the contents. The returned format is plain Python 401 # dicts/lists/etc., mirroring the structure of the file. 402 403 with open(path, encoding="utf-8") as f: 404 try: 405 yaml = load(f, Loader=Loader) 406 except YAMLError as e: 407 raise MaintainersError("{}: YAML error: {}".format(path, e)) 408 409 _check_maintainers(path, yaml) 410 return yaml 411 412 413def _check_maintainers(maints_path, yaml): 414 # Checks the maintainers data in 'yaml', which comes from the maintainers 415 # file at maints_path, which is a pathlib.Path instance 416 417 root = maints_path.parent 418 419 def ferr(msg): 420 _err("{}: {}".format(maints_path, msg)) # Prepend the filename 421 422 if not isinstance(yaml, dict): 423 ferr("empty or malformed YAML (not a dict)") 424 425 ok_keys = {"status", "maintainers", "collaborators", "inform", "files", 426 "files-exclude", "files-regex", "files-regex-exclude", 427 "labels", "description"} 428 429 ok_status = {"maintained", "odd fixes", "orphaned", "obsolete"} 430 ok_status_s = ", ".join('"' + s + '"' for s in ok_status) # For messages 431 432 for area_name, area_dict in yaml.items(): 433 if not isinstance(area_dict, dict): 434 ferr("malformed entry for area '{}' (not a dict)" 435 .format(area_name)) 436 437 for key in area_dict: 438 if key not in ok_keys: 439 ferr("unknown key '{}' in area '{}'" 440 .format(key, area_name)) 441 442 if "status" in area_dict and \ 443 area_dict["status"] not in ok_status: 444 ferr("bad 'status' key on area '{}', should be one of {}" 445 .format(area_name, ok_status_s)) 446 447 if not area_dict.keys() & {"files", "files-regex"}: 448 ferr("either 'files' or 'files-regex' (or both) must be specified " 449 "for area '{}'".format(area_name)) 450 451 for list_name in "maintainers", "collaborators", "inform", "files", \ 452 "files-regex", "labels": 453 if list_name in area_dict: 454 lst = area_dict[list_name] 455 if not (isinstance(lst, list) and 456 all(isinstance(elm, str) for elm in lst)): 457 ferr("malformed '{}' value for area '{}' -- should " 458 "be a list of strings".format(list_name, area_name)) 459 460 for files_key in "files", "files-exclude": 461 if files_key in area_dict: 462 for glob_pattern in area_dict[files_key]: 463 # This could be changed if it turns out to be too slow, 464 # e.g. to only check non-globbing filenames. The tuple() is 465 # needed due to pathlib's glob() returning a generator. 466 paths = tuple(root.glob(glob_pattern)) 467 if not paths: 468 ferr("glob pattern '{}' in '{}' in area '{}' does not " 469 "match any files".format(glob_pattern, files_key, 470 area_name)) 471 if not glob_pattern.endswith("/"): 472 for path in paths: 473 if path.is_dir(): 474 ferr("glob pattern '{}' in '{}' in area '{}' " 475 "matches a directory, but has no " 476 "trailing '/'" 477 .format(glob_pattern, files_key, 478 area_name)) 479 480 for files_regex_key in "files-regex", "files-regex-exclude": 481 if files_regex_key in area_dict: 482 for regex in area_dict[files_regex_key]: 483 try: 484 re.compile(regex) 485 except re.error as e: 486 ferr("bad regular expression '{}' in '{}' in " 487 "'{}': {}".format(regex, files_regex_key, 488 area_name, e.msg)) 489 490 if "description" in area_dict and \ 491 not isinstance(area_dict["description"], str): 492 ferr("malformed 'description' value for area '{}' -- should be a " 493 "string".format(area_name)) 494 495 496def _git(*args): 497 # Helper for running a Git command. Returns the rstrip()ed stdout output. 498 # Called like git("diff"). Exits with SystemError (raised by sys.exit()) on 499 # errors. 500 501 git_cmd = ("git",) + args 502 git_cmd_s = " ".join(shlex.quote(word) for word in git_cmd) # For errors 503 504 try: 505 git_process = subprocess.Popen( 506 git_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) 507 except FileNotFoundError: 508 _giterr("git executable not found (when running '{}'). Check that " 509 "it's in listed in the PATH environment variable" 510 .format(git_cmd_s)) 511 except OSError as e: 512 _giterr("error running '{}': {}".format(git_cmd_s, e)) 513 514 stdout, stderr = git_process.communicate() 515 if git_process.returncode: 516 _giterr("error running '{}'\n\nstdout:\n{}\nstderr:\n{}".format( 517 git_cmd_s, stdout.decode("utf-8"), stderr.decode("utf-8"))) 518 519 return stdout.decode("utf-8").rstrip() 520 521 522def _ls_files(path=None): 523 cmd = ["ls-files"] 524 if path is not None: 525 cmd.append(path) 526 return _git(*cmd).splitlines() 527 528 529def _err(msg): 530 raise MaintainersError(msg) 531 532 533def _giterr(msg): 534 raise GitError(msg) 535 536 537def _serr(msg): 538 # For reporting errors when get_maintainer.py is run as a script. 539 # sys.exit() shouldn't be used otherwise. 540 sys.exit("{}: error: {}".format(sys.argv[0], msg)) 541 542 543class MaintainersError(Exception): 544 "Exception raised for MAINTAINERS.yml-related errors" 545 546 547class GitError(Exception): 548 "Exception raised for Git-related errors" 549 550 551if __name__ == "__main__": 552 _main() 553