1#!/usr/bin/env python3
2
3# Copyright (c) 2019 Nordic Semiconductor ASA
4# SPDX-License-Identifier: Apache-2.0
5
6"""
7Lists maintainers for files or commits. Similar in function to
8scripts/get_maintainer.pl from Linux, but geared towards GitHub. The mapping is
9in MAINTAINERS.yml.
10
11The comment at the top of MAINTAINERS.yml in Zephyr documents the file format.
12
13See the help texts for the various subcommands for more information. They can
14be viewed with e.g.
15
16    ./get_maintainer.py path --help
17
18This executable doubles as a Python library. Identifiers not prefixed with '_'
19are part of the library API. The library documentation can be viewed with this
20command:
21
22    $ pydoc get_maintainer
23"""
24
25import argparse
26import operator
27import os
28import pathlib
29import re
30import shlex
31import subprocess
32import sys
33
34from yaml import load, YAMLError
35try:
36    # Use the speedier C LibYAML parser if available
37    from yaml import CSafeLoader as SafeLoader
38except ImportError:
39    from yaml import SafeLoader
40
41
42def _main():
43    # Entry point when run as an executable
44
45    args = _parse_args()
46    try:
47        args.cmd_fn(Maintainers(args.maintainers), args)
48    except (MaintainersError, GitError) as e:
49        _serr(e)
50
51
52def _parse_args():
53    # Parses arguments when run as an executable
54
55    parser = argparse.ArgumentParser(
56        formatter_class=argparse.RawDescriptionHelpFormatter,
57        description=__doc__, allow_abbrev=False)
58
59    parser.add_argument(
60        "-m", "--maintainers",
61        metavar="MAINTAINERS_FILE",
62        help="Maintainers file to load. If not specified, MAINTAINERS.yml in "
63             "the top-level repository directory is used, and must exist. "
64             "Paths in the maintainers file will always be taken as relative "
65             "to the top-level directory.")
66
67    subparsers = parser.add_subparsers(
68        help="Available commands (each has a separate --help text)")
69
70    id_parser = subparsers.add_parser(
71        "path",
72        help="List area(s) for paths")
73    id_parser.add_argument(
74        "paths",
75        metavar="PATH",
76        nargs="*",
77        help="Path to list areas for")
78    id_parser.set_defaults(cmd_fn=Maintainers._path_cmd)
79
80    commits_parser = subparsers.add_parser(
81        "commits",
82        help="List area(s) for commit range")
83    commits_parser.add_argument(
84        "commits",
85        metavar="COMMIT_RANGE",
86        nargs="*",
87        help="Commit range to list areas for (default: HEAD~..)")
88    commits_parser.set_defaults(cmd_fn=Maintainers._commits_cmd)
89
90    list_parser = subparsers.add_parser(
91        "list",
92        help="List files in areas")
93    list_parser.add_argument(
94        "area",
95        metavar="AREA",
96        nargs="?",
97        help="Name of area to list files in. If not specified, all "
98             "non-orphaned files are listed (all files that do not appear in "
99             "any area).")
100    list_parser.set_defaults(cmd_fn=Maintainers._list_cmd)
101
102    areas_parser = subparsers.add_parser(
103        "areas",
104        help="List areas and maintainers")
105    areas_parser.add_argument(
106        "maintainer",
107        metavar="MAINTAINER",
108        nargs="?",
109        help="List all areas maintained by maintainer.")
110
111    areas_parser.set_defaults(cmd_fn=Maintainers._areas_cmd)
112
113    orphaned_parser = subparsers.add_parser(
114        "orphaned",
115        help="List orphaned files (files that do not appear in any area)")
116    orphaned_parser.add_argument(
117        "path",
118        metavar="PATH",
119        nargs="?",
120        help="Limit to files under PATH")
121    orphaned_parser.set_defaults(cmd_fn=Maintainers._orphaned_cmd)
122
123    count_parser = subparsers.add_parser(
124        "count",
125        help="Count areas, unique maintainers, and / or unique collaborators")
126    count_parser.add_argument(
127        "-a",
128        "--count-areas",
129        action="store_true",
130        help="Count the number of areas")
131    count_parser.add_argument(
132        "-c",
133        "--count-collaborators",
134        action="store_true",
135        help="Count the number of unique collaborators")
136    count_parser.add_argument(
137        "-n",
138        "--count-maintainers",
139        action="store_true",
140        help="Count the number of unique maintainers")
141    count_parser.add_argument(
142        "-o",
143        "--count-unmaintained",
144        action="store_true",
145        help="Count the number of unmaintained areas")
146    count_parser.set_defaults(cmd_fn=Maintainers._count_cmd)
147
148    args = parser.parse_args()
149    if not hasattr(args, "cmd_fn"):
150        # Called without a subcommand
151        sys.exit(parser.format_usage().rstrip())
152
153    return args
154
155
156class Maintainers:
157    """
158    Represents the contents of a maintainers YAML file.
159
160    These attributes are available:
161
162    areas:
163        A dictionary that maps area names to Area instances, for all areas
164        defined in the maintainers file
165
166    filename:
167        The path to the maintainers file
168    """
169    def __init__(self, filename=None):
170        """
171        Creates a Maintainers instance.
172
173        filename (default: None):
174            Path to the maintainers file to parse. If None, MAINTAINERS.yml in
175            the top-level directory of the Git repository is used, and must
176            exist.
177        """
178        self._toplevel = pathlib.Path(_git("rev-parse", "--show-toplevel"))
179
180        if filename is None:
181            self.filename = self._toplevel / "MAINTAINERS.yml"
182        else:
183            self.filename = pathlib.Path(filename)
184
185        self.areas = {}
186        for area_name, area_dict in _load_maintainers(self.filename).items():
187            area = Area()
188            area.name = area_name
189            area.status = area_dict.get("status")
190            area.maintainers = area_dict.get("maintainers", [])
191            area.collaborators = area_dict.get("collaborators", [])
192            area.inform = area_dict.get("inform", [])
193            area.labels = area_dict.get("labels", [])
194            area.description = area_dict.get("description")
195
196            # area._match_fn(path) tests if the path matches files and/or
197            # files-regex
198            area._match_fn = \
199                _get_match_fn(area_dict.get("files"),
200                              area_dict.get("files-regex"))
201
202            # Like area._match_fn(path), but for files-exclude and
203            # files-regex-exclude
204            area._exclude_match_fn = \
205                _get_match_fn(area_dict.get("files-exclude"),
206                              area_dict.get("files-regex-exclude"))
207
208            self.areas[area_name] = area
209
210    def path2areas(self, path):
211        """
212        Returns a list of Area instances for the areas that contain 'path',
213        taken as relative to the current directory
214        """
215        # Make directory paths end in '/' so that foo/bar matches foo/bar/.
216        # Skip this check in _contains() itself, because the isdir() makes it
217        # twice as slow in cases where it's not needed.
218        is_dir = os.path.isdir(path)
219
220        # Make 'path' relative to the repository root and normalize it.
221        # normpath() would remove a trailing '/', so we add it afterwards.
222        path = os.path.normpath(os.path.join(
223            os.path.relpath(os.getcwd(), self._toplevel),
224            path))
225
226        if is_dir:
227            path += "/"
228
229        return [area for area in self.areas.values()
230                if area._contains(path)]
231
232    def commits2areas(self, commits):
233        """
234        Returns a set() of Area instances for the areas that contain files that
235        are modified by the commit range in 'commits'. 'commits' could be e.g.
236        "HEAD~..", to inspect the tip commit
237        """
238        res = set()
239        # Final '--' is to make sure 'commits' is interpreted as a commit range
240        # rather than a path. That might give better error messages.
241        for path in _git("diff", "--name-only", commits, "--").splitlines():
242            res.update(self.path2areas(path))
243        return res
244
245    def __repr__(self):
246        return "<Maintainers for '{}'>".format(self.filename)
247
248    #
249    # Command-line subcommands
250    #
251
252    def _path_cmd(self, args):
253        # 'path' subcommand implementation
254
255        for path in args.paths:
256            if not os.path.exists(path):
257                _serr("'{}': no such file or directory".format(path))
258
259        res = set()
260        orphaned = []
261        for path in args.paths:
262            areas = self.path2areas(path)
263            res.update(areas)
264            if not areas:
265                orphaned.append(path)
266
267        _print_areas(res)
268        if orphaned:
269            if res:
270                print()
271            print("Orphaned paths (not in any area):\n" + "\n".join(orphaned))
272
273    def _commits_cmd(self, args):
274        # 'commits' subcommand implementation
275
276        commits = args.commits or ("HEAD~..",)
277        _print_areas({area for commit_range in commits
278                           for area in self.commits2areas(commit_range)})
279
280    def _areas_cmd(self, args):
281        # 'areas' subcommand implementation
282        for area in self.areas.values():
283            if args.maintainer:
284                if args.maintainer in area.maintainers:
285                    print("{:25}\t{}".format(area.name, ",".join(area.maintainers)))
286            else:
287                print("{:25}\t{}".format(area.name, ",".join(area.maintainers)))
288
289    def _count_cmd(self, args):
290        # 'count' subcommand implementation
291
292        if not (args.count_areas or args.count_collaborators or args.count_maintainers or args.count_unmaintained):
293            # if no specific count is provided, print them all
294            args.count_areas = True
295            args.count_collaborators = True
296            args.count_maintainers = True
297            args.count_unmaintained = True
298
299        unmaintained = 0
300        collaborators = set()
301        maintainers = set()
302
303        for area in self.areas.values():
304            if area.status == 'maintained':
305                maintainers = maintainers.union(set(area.maintainers))
306            elif area.status == 'odd fixes':
307                unmaintained += 1
308            collaborators = collaborators.union(set(area.collaborators))
309
310        if args.count_areas:
311            print('{:14}\t{}'.format('areas:', len(self.areas)))
312        if args.count_maintainers:
313            print('{:14}\t{}'.format('maintainers:', len(maintainers)))
314        if args.count_collaborators:
315            print('{:14}\t{}'.format('collaborators:', len(collaborators)))
316        if args.count_unmaintained:
317            print('{:14}\t{}'.format('unmaintained:', unmaintained))
318
319    def _list_cmd(self, args):
320        # 'list' subcommand implementation
321
322        if args.area is None:
323            # List all files that appear in some area
324            for path in _ls_files():
325                for area in self.areas.values():
326                    if area._contains(path):
327                        print(path)
328                        break
329        else:
330            # List all files that appear in the given area
331            area = self.areas.get(args.area)
332            if area is None:
333                _serr("'{}': no such area defined in '{}'"
334                      .format(args.area, self.filename))
335
336            for path in _ls_files():
337                if area._contains(path):
338                    print(path)
339
340    def _orphaned_cmd(self, args):
341        # 'orphaned' subcommand implementation
342
343        if args.path is not None and not os.path.exists(args.path):
344            _serr("'{}': no such file or directory".format(args.path))
345
346        for path in _ls_files(args.path):
347            for area in self.areas.values():
348                if area._contains(path):
349                    break
350            else:
351                print(path)  # We get here if we never hit the 'break'
352
353
354class Area:
355    """
356    Represents an entry for an area in MAINTAINERS.yml.
357
358    These attributes are available:
359
360    status:
361        The status of the area, as a string. None if the area has no 'status'
362        key. See MAINTAINERS.yml.
363
364    maintainers:
365        List of maintainers. Empty if the area has no 'maintainers' key.
366
367    collaborators:
368        List of collaborators. Empty if the area has no 'collaborators' key.
369
370    inform:
371        List of people to inform on pull requests. Empty if the area has no
372        'inform' key.
373
374    labels:
375        List of GitHub labels for the area. Empty if the area has no 'labels'
376        key.
377
378    description:
379        Text from 'description' key, or None if the area has no 'description'
380        key
381    """
382    def _contains(self, path):
383        # Returns True if the area contains 'path', and False otherwise
384
385        return self._match_fn and self._match_fn(path) and not \
386            (self._exclude_match_fn and self._exclude_match_fn(path))
387
388    def __repr__(self):
389        return "<Area {}>".format(self.name)
390
391
392def _print_areas(areas):
393    first = True
394    for area in sorted(areas, key=operator.attrgetter("name")):
395        if not first:
396            print()
397        first = False
398
399        print("""\
400{}
401\tstatus: {}
402\tmaintainers: {}
403\tcollaborators: {}
404\tinform: {}
405\tlabels: {}
406\tdescription: {}""".format(area.name,
407                            area.status,
408                            ", ".join(area.maintainers),
409                            ", ".join(area.collaborators),
410                            ", ".join(area.inform),
411                            ", ".join(area.labels),
412                            area.description or ""))
413
414
415def _get_match_fn(globs, regexes):
416    # Constructs a single regex that tests for matches against the globs in
417    # 'globs' and the regexes in 'regexes'. Parts are joined with '|' (OR).
418    # Returns the search() method of the compiled regex.
419    #
420    # Returns None if there are neither globs nor regexes, which should be
421    # interpreted as no match.
422
423    if not (globs or regexes):
424        return None
425
426    regex = ""
427
428    if globs:
429        glob_regexes = []
430        for glob in globs:
431            # Construct a regex equivalent to the glob
432            glob_regex = glob.replace(".", "\\.").replace("*", "[^/]*") \
433                             .replace("?", "[^/]")
434
435            if not glob.endswith("/"):
436                # Require a full match for globs that don't end in /
437                glob_regex += "$"
438
439            glob_regexes.append(glob_regex)
440
441        # The glob regexes must anchor to the beginning of the path, since we
442        # return search(). (?:) is a non-capturing group.
443        regex += "^(?:{})".format("|".join(glob_regexes))
444
445    if regexes:
446        if regex:
447            regex += "|"
448        regex += "|".join(regexes)
449
450    return re.compile(regex).search
451
452
453def _load_maintainers(path):
454    # Returns the parsed contents of the maintainers file 'filename', also
455    # running checks on the contents. The returned format is plain Python
456    # dicts/lists/etc., mirroring the structure of the file.
457
458    with open(path, encoding="utf-8") as f:
459        try:
460            yaml = load(f, Loader=SafeLoader)
461        except YAMLError as e:
462            raise MaintainersError("{}: YAML error: {}".format(path, e))
463
464        _check_maintainers(path, yaml)
465        return yaml
466
467
468def _check_maintainers(maints_path, yaml):
469    # Checks the maintainers data in 'yaml', which comes from the maintainers
470    # file at maints_path, which is a pathlib.Path instance
471
472    root = maints_path.parent
473
474    def ferr(msg):
475        _err("{}: {}".format(maints_path, msg))  # Prepend the filename
476
477    if not isinstance(yaml, dict):
478        ferr("empty or malformed YAML (not a dict)")
479
480    ok_keys = {"status", "maintainers", "collaborators", "inform", "files",
481               "files-exclude", "files-regex", "files-regex-exclude",
482               "labels", "description"}
483
484    ok_status = {"maintained", "odd fixes", "unmaintained", "obsolete"}
485    ok_status_s = ", ".join('"' + s + '"' for s in ok_status)  # For messages
486
487    for area_name, area_dict in yaml.items():
488        if not isinstance(area_dict, dict):
489            ferr("malformed entry for area '{}' (not a dict)"
490                 .format(area_name))
491
492        for key in area_dict:
493            if key not in ok_keys:
494                ferr("unknown key '{}' in area '{}'"
495                     .format(key, area_name))
496
497        if "status" in area_dict and \
498           area_dict["status"] not in ok_status:
499            ferr("bad 'status' key on area '{}', should be one of {}"
500                 .format(area_name, ok_status_s))
501
502        if not area_dict.keys() & {"files", "files-regex"}:
503            ferr("either 'files' or 'files-regex' (or both) must be specified "
504                 "for area '{}'".format(area_name))
505
506        for list_name in "maintainers", "collaborators", "inform", "files", \
507                         "files-regex", "labels":
508            if list_name in area_dict:
509                lst = area_dict[list_name]
510                if not (isinstance(lst, list) and
511                        all(isinstance(elm, str) for elm in lst)):
512                    ferr("malformed '{}' value for area '{}' -- should "
513                         "be a list of strings".format(list_name, area_name))
514
515        for files_key in "files", "files-exclude":
516            if files_key in area_dict:
517                for glob_pattern in area_dict[files_key]:
518                    # This could be changed if it turns out to be too slow,
519                    # e.g. to only check non-globbing filenames. The tuple() is
520                    # needed due to pathlib's glob() returning a generator.
521                    paths = tuple(root.glob(glob_pattern))
522                    if not paths:
523                        ferr("glob pattern '{}' in '{}' in area '{}' does not "
524                             "match any files".format(glob_pattern, files_key,
525                                                      area_name))
526                    if not glob_pattern.endswith("/"):
527                        if all(path.is_dir() for path in paths):
528                            ferr("glob pattern '{}' in '{}' in area '{}' "
529                                     "matches only directories, but has no "
530                                     "trailing '/'"
531                                     .format(glob_pattern, files_key,
532                                             area_name))
533
534        for files_regex_key in "files-regex", "files-regex-exclude":
535            if files_regex_key in area_dict:
536                for regex in area_dict[files_regex_key]:
537                    try:
538                        re.compile(regex)
539                    except re.error as e:
540                        ferr("bad regular expression '{}' in '{}' in "
541                             "'{}': {}".format(regex, files_regex_key,
542                                               area_name, e.msg))
543
544        if "description" in area_dict and \
545           not isinstance(area_dict["description"], str):
546            ferr("malformed 'description' value for area '{}' -- should be a "
547                 "string".format(area_name))
548
549
550def _git(*args):
551    # Helper for running a Git command. Returns the rstrip()ed stdout output.
552    # Called like git("diff"). Exits with SystemError (raised by sys.exit()) on
553    # errors.
554
555    git_cmd = ("git",) + args
556    git_cmd_s = " ".join(shlex.quote(word) for word in git_cmd)  # For errors
557
558    try:
559        git_process = subprocess.Popen(
560            git_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
561    except FileNotFoundError:
562        _giterr("git executable not found (when running '{}'). Check that "
563                "it's in listed in the PATH environment variable"
564                .format(git_cmd_s))
565    except OSError as e:
566        _giterr("error running '{}': {}".format(git_cmd_s, e))
567
568    stdout, stderr = git_process.communicate()
569    if git_process.returncode:
570        _giterr("error running '{}'\n\nstdout:\n{}\nstderr:\n{}".format(
571            git_cmd_s, stdout.decode("utf-8"), stderr.decode("utf-8")))
572
573    return stdout.decode("utf-8").rstrip()
574
575
576def _ls_files(path=None):
577    cmd = ["ls-files"]
578    if path is not None:
579        cmd.append(path)
580    return _git(*cmd).splitlines()
581
582
583def _err(msg):
584    raise MaintainersError(msg)
585
586
587def _giterr(msg):
588    raise GitError(msg)
589
590
591def _serr(msg):
592    # For reporting errors when get_maintainer.py is run as a script.
593    # sys.exit() shouldn't be used otherwise.
594    sys.exit("{}: error: {}".format(sys.argv[0], msg))
595
596
597class MaintainersError(Exception):
598    "Exception raised for MAINTAINERS.yml-related errors"
599
600
601class GitError(Exception):
602    "Exception raised for Git-related errors"
603
604
605if __name__ == "__main__":
606    _main()
607