1#!/usr/bin/env python3
2
3# Copyright (c) 2019 Nordic Semiconductor ASA
4# SPDX-License-Identifier: Apache-2.0
5
6"""
7Lists maintainers for files or commits. Similar in function to
8scripts/get_maintainer.pl from Linux, but geared towards GitHub. The mapping is
9in MAINTAINERS.yml.
10
11The comment at the top of MAINTAINERS.yml in Zephyr documents the file format.
12
13See the help texts for the various subcommands for more information. They can
14be viewed with e.g.
15
16    ./get_maintainer.py path --help
17
18This executable doubles as a Python library. Identifiers not prefixed with '_'
19are part of the library API. The library documentation can be viewed with this
20command:
21
22    $ pydoc get_maintainer
23"""
24
25import argparse
26import operator
27import os
28import pathlib
29import re
30import shlex
31import subprocess
32import sys
33
34from yaml import load, YAMLError
35try:
36    # Use the speedier C LibYAML parser if available
37    from yaml import CSafeLoader as SafeLoader
38except ImportError:
39    from yaml import SafeLoader
40
41
42def _main():
43    # Entry point when run as an executable
44
45    args = _parse_args()
46    try:
47        args.cmd_fn(Maintainers(args.maintainers), args)
48    except (MaintainersError, GitError) as e:
49        _serr(e)
50
51
52def _parse_args():
53    # Parses arguments when run as an executable
54
55    parser = argparse.ArgumentParser(
56        formatter_class=argparse.RawDescriptionHelpFormatter,
57        description=__doc__, allow_abbrev=False)
58
59    parser.add_argument(
60        "-m", "--maintainers",
61        metavar="MAINTAINERS_FILE",
62        help="Maintainers file to load. If not specified, MAINTAINERS.yml in "
63             "the top-level repository directory is used, and must exist. "
64             "Paths in the maintainers file will always be taken as relative "
65             "to the top-level directory.")
66
67    subparsers = parser.add_subparsers(
68        help="Available commands (each has a separate --help text)")
69
70    id_parser = subparsers.add_parser(
71        "path",
72        help="List area(s) for paths")
73    id_parser.add_argument(
74        "paths",
75        metavar="PATH",
76        nargs="*",
77        help="Path to list areas for")
78    id_parser.set_defaults(cmd_fn=Maintainers._path_cmd)
79
80    commits_parser = subparsers.add_parser(
81        "commits",
82        help="List area(s) for commit range")
83    commits_parser.add_argument(
84        "commits",
85        metavar="COMMIT_RANGE",
86        nargs="*",
87        help="Commit range to list areas for (default: HEAD~..)")
88    commits_parser.set_defaults(cmd_fn=Maintainers._commits_cmd)
89
90    list_parser = subparsers.add_parser(
91        "list",
92        help="List files in areas")
93    list_parser.add_argument(
94        "area",
95        metavar="AREA",
96        nargs="?",
97        help="Name of area to list files in. If not specified, all "
98             "non-orphaned files are listed (all files that do not appear in "
99             "any area).")
100    list_parser.set_defaults(cmd_fn=Maintainers._list_cmd)
101
102    areas_parser = subparsers.add_parser(
103        "areas",
104        help="List areas and maintainers")
105    areas_parser.add_argument(
106        "maintainer",
107        metavar="MAINTAINER",
108        nargs="?",
109        help="List all areas maintained by maintainer.")
110
111    areas_parser.set_defaults(cmd_fn=Maintainers._areas_cmd)
112
113    orphaned_parser = subparsers.add_parser(
114        "orphaned",
115        help="List orphaned files (files that do not appear in any area)")
116    orphaned_parser.add_argument(
117        "path",
118        metavar="PATH",
119        nargs="?",
120        help="Limit to files under PATH")
121    orphaned_parser.set_defaults(cmd_fn=Maintainers._orphaned_cmd)
122
123    count_parser = subparsers.add_parser(
124        "count",
125        help="Count areas, unique maintainers, and / or unique collaborators")
126    count_parser.add_argument(
127        "-a",
128        "--count-areas",
129        action="store_true",
130        help="Count the number of areas")
131    count_parser.add_argument(
132        "-c",
133        "--count-collaborators",
134        action="store_true",
135        help="Count the number of unique collaborators")
136    count_parser.add_argument(
137        "-n",
138        "--count-maintainers",
139        action="store_true",
140        help="Count the number of unique maintainers")
141    count_parser.add_argument(
142        "-o",
143        "--count-unmaintained",
144        action="store_true",
145        help="Count the number of unmaintained areas")
146    count_parser.set_defaults(cmd_fn=Maintainers._count_cmd)
147
148    args = parser.parse_args()
149    if not hasattr(args, "cmd_fn"):
150        # Called without a subcommand
151        sys.exit(parser.format_usage().rstrip())
152
153    return args
154
155
156class Maintainers:
157    """
158    Represents the contents of a maintainers YAML file.
159
160    These attributes are available:
161
162    areas:
163        A dictionary that maps area names to Area instances, for all areas
164        defined in the maintainers file
165
166    filename:
167        The path to the maintainers file
168    """
169    def __init__(self, filename=None):
170        """
171        Creates a Maintainers instance.
172
173        filename (default: None):
174            Path to the maintainers file to parse. If None, MAINTAINERS.yml in
175            the top-level directory of the Git repository is used, and must
176            exist.
177        """
178        if (filename is not None) and (pathlib.Path(filename).exists()):
179            self.filename = pathlib.Path(filename)
180            self._toplevel = self.filename.parent
181        else:
182            self._toplevel = pathlib.Path(_git("rev-parse", "--show-toplevel"))
183            self.filename = self._toplevel / "MAINTAINERS.yml"
184
185        self.areas = {}
186        for area_name, area_dict in _load_maintainers(self.filename).items():
187            area = Area()
188            area.name = area_name
189            area.status = area_dict.get("status")
190            area.maintainers = area_dict.get("maintainers", [])
191            area.collaborators = area_dict.get("collaborators", [])
192            area.inform = area_dict.get("inform", [])
193            area.labels = area_dict.get("labels", [])
194            area.tests = area_dict.get("tests", [])
195            area.tags = area_dict.get("tags", [])
196            area.description = area_dict.get("description")
197
198            # area._match_fn(path) tests if the path matches files and/or
199            # files-regex
200            area._match_fn = \
201                _get_match_fn(area_dict.get("files"),
202                              area_dict.get("files-regex"))
203
204            # Like area._match_fn(path), but for files-exclude and
205            # files-regex-exclude
206            area._exclude_match_fn = \
207                _get_match_fn(area_dict.get("files-exclude"),
208                              area_dict.get("files-regex-exclude"))
209
210            self.areas[area_name] = area
211
212    def path2areas(self, path):
213        """
214        Returns a list of Area instances for the areas that contain 'path',
215        taken as relative to the current directory
216        """
217        # Make directory paths end in '/' so that foo/bar matches foo/bar/.
218        # Skip this check in _contains() itself, because the isdir() makes it
219        # twice as slow in cases where it's not needed.
220        is_dir = os.path.isdir(path)
221
222        # Make 'path' relative to the repository root and normalize it.
223        # normpath() would remove a trailing '/', so we add it afterwards.
224        path = os.path.normpath(os.path.join(
225            os.path.relpath(os.getcwd(), self._toplevel),
226            path))
227
228        if is_dir:
229            path += "/"
230
231        return [area for area in self.areas.values()
232                if area._contains(path)]
233
234    def commits2areas(self, commits):
235        """
236        Returns a set() of Area instances for the areas that contain files that
237        are modified by the commit range in 'commits'. 'commits' could be e.g.
238        "HEAD~..", to inspect the tip commit
239        """
240        res = set()
241        # Final '--' is to make sure 'commits' is interpreted as a commit range
242        # rather than a path. That might give better error messages.
243        for path in _git("diff", "--name-only", commits, "--").splitlines():
244            res.update(self.path2areas(path))
245        return res
246
247    def __repr__(self):
248        return "<Maintainers for '{}'>".format(self.filename)
249
250    #
251    # Command-line subcommands
252    #
253
254    def _path_cmd(self, args):
255        # 'path' subcommand implementation
256
257        for path in args.paths:
258            if not os.path.exists(path):
259                _serr("'{}': no such file or directory".format(path))
260
261        res = set()
262        orphaned = []
263        for path in args.paths:
264            areas = self.path2areas(path)
265            res.update(areas)
266            if not areas:
267                orphaned.append(path)
268
269        _print_areas(res)
270        if orphaned:
271            if res:
272                print()
273            print("Orphaned paths (not in any area):\n" + "\n".join(orphaned))
274
275    def _commits_cmd(self, args):
276        # 'commits' subcommand implementation
277
278        commits = args.commits or ("HEAD~..",)
279        _print_areas({area for commit_range in commits
280                           for area in self.commits2areas(commit_range)})
281
282    def _areas_cmd(self, args):
283        # 'areas' subcommand implementation
284        for area in self.areas.values():
285            if args.maintainer:
286                if args.maintainer in area.maintainers:
287                    print("{:25}\t{}".format(area.name, ",".join(area.maintainers)))
288            else:
289                print("{:25}\t{}".format(area.name, ",".join(area.maintainers)))
290
291    def _count_cmd(self, args):
292        # 'count' subcommand implementation
293
294        if not (args.count_areas or args.count_collaborators or args.count_maintainers or args.count_unmaintained):
295            # if no specific count is provided, print them all
296            args.count_areas = True
297            args.count_collaborators = True
298            args.count_maintainers = True
299            args.count_unmaintained = True
300
301        unmaintained = 0
302        collaborators = set()
303        maintainers = set()
304
305        for area in self.areas.values():
306            if area.status == 'maintained':
307                maintainers = maintainers.union(set(area.maintainers))
308            elif area.status == 'odd fixes':
309                unmaintained += 1
310            collaborators = collaborators.union(set(area.collaborators))
311
312        if args.count_areas:
313            print('{:14}\t{}'.format('areas:', len(self.areas)))
314        if args.count_maintainers:
315            print('{:14}\t{}'.format('maintainers:', len(maintainers)))
316        if args.count_collaborators:
317            print('{:14}\t{}'.format('collaborators:', len(collaborators)))
318        if args.count_unmaintained:
319            print('{:14}\t{}'.format('unmaintained:', unmaintained))
320
321    def _list_cmd(self, args):
322        # 'list' subcommand implementation
323
324        if args.area is None:
325            # List all files that appear in some area
326            for path in _ls_files():
327                for area in self.areas.values():
328                    if area._contains(path):
329                        print(path)
330                        break
331        else:
332            # List all files that appear in the given area
333            area = self.areas.get(args.area)
334            if area is None:
335                _serr("'{}': no such area defined in '{}'"
336                      .format(args.area, self.filename))
337
338            for path in _ls_files():
339                if area._contains(path):
340                    print(path)
341
342    def _orphaned_cmd(self, args):
343        # 'orphaned' subcommand implementation
344
345        if args.path is not None and not os.path.exists(args.path):
346            _serr("'{}': no such file or directory".format(args.path))
347
348        for path in _ls_files(args.path):
349            for area in self.areas.values():
350                if area._contains(path):
351                    break
352            else:
353                print(path)  # We get here if we never hit the 'break'
354
355
356class Area:
357    """
358    Represents an entry for an area in MAINTAINERS.yml.
359
360    These attributes are available:
361
362    status:
363        The status of the area, as a string. None if the area has no 'status'
364        key. See MAINTAINERS.yml.
365
366    maintainers:
367        List of maintainers. Empty if the area has no 'maintainers' key.
368
369    collaborators:
370        List of collaborators. Empty if the area has no 'collaborators' key.
371
372    inform:
373        List of people to inform on pull requests. Empty if the area has no
374        'inform' key.
375
376    labels:
377        List of GitHub labels for the area. Empty if the area has no 'labels'
378        key.
379
380    description:
381        Text from 'description' key, or None if the area has no 'description'
382        key
383    """
384    def _contains(self, path):
385        # Returns True if the area contains 'path', and False otherwise
386
387        return self._match_fn and self._match_fn(path) and not \
388            (self._exclude_match_fn and self._exclude_match_fn(path))
389
390    def __repr__(self):
391        return "<Area {}>".format(self.name)
392
393
394def _print_areas(areas):
395    first = True
396    for area in sorted(areas, key=operator.attrgetter("name")):
397        if not first:
398            print()
399        first = False
400
401        print("""\
402{}
403\tstatus: {}
404\tmaintainers: {}
405\tcollaborators: {}
406\tinform: {}
407\tlabels: {}
408\ttests: {}
409\ttags: {}
410\tdescription: {}""".format(area.name,
411                            area.status,
412                            ", ".join(area.maintainers),
413                            ", ".join(area.collaborators),
414                            ", ".join(area.inform),
415                            ", ".join(area.labels),
416                            ", ".join(area.tests),
417                            ", ".join(area.tags),
418                            area.description or ""))
419
420
421def _get_match_fn(globs, regexes):
422    # Constructs a single regex that tests for matches against the globs in
423    # 'globs' and the regexes in 'regexes'. Parts are joined with '|' (OR).
424    # Returns the search() method of the compiled regex.
425    #
426    # Returns None if there are neither globs nor regexes, which should be
427    # interpreted as no match.
428
429    if not (globs or regexes):
430        return None
431
432    regex = ""
433
434    if globs:
435        glob_regexes = []
436        for glob in globs:
437            # Construct a regex equivalent to the glob
438            glob_regex = glob.replace(".", "\\.").replace("*", "[^/]*") \
439                             .replace("?", "[^/]")
440
441            if not glob.endswith("/"):
442                # Require a full match for globs that don't end in /
443                glob_regex += "$"
444
445            glob_regexes.append(glob_regex)
446
447        # The glob regexes must anchor to the beginning of the path, since we
448        # return search(). (?:) is a non-capturing group.
449        regex += "^(?:{})".format("|".join(glob_regexes))
450
451    if regexes:
452        if regex:
453            regex += "|"
454        regex += "|".join(regexes)
455
456    return re.compile(regex).search
457
458
459def _load_maintainers(path):
460    # Returns the parsed contents of the maintainers file 'filename', also
461    # running checks on the contents. The returned format is plain Python
462    # dicts/lists/etc., mirroring the structure of the file.
463
464    with open(path, encoding="utf-8") as f:
465        try:
466            yaml = load(f, Loader=SafeLoader)
467        except YAMLError as e:
468            raise MaintainersError("{}: YAML error: {}".format(path, e))
469
470        _check_maintainers(path, yaml)
471        return yaml
472
473
474def _check_maintainers(maints_path, yaml):
475    # Checks the maintainers data in 'yaml', which comes from the maintainers
476    # file at maints_path, which is a pathlib.Path instance
477
478    root = maints_path.parent
479
480    def ferr(msg):
481        _err("{}: {}".format(maints_path, msg))  # Prepend the filename
482
483    if not isinstance(yaml, dict):
484        ferr("empty or malformed YAML (not a dict)")
485
486    ok_keys = {"status", "maintainers", "collaborators", "inform", "files",
487               "files-exclude", "files-regex", "files-regex-exclude",
488               "labels", "description", "tests", "tags"}
489
490    ok_status = {"maintained", "odd fixes", "unmaintained", "obsolete"}
491    ok_status_s = ", ".join('"' + s + '"' for s in ok_status)  # For messages
492
493    for area_name, area_dict in yaml.items():
494        if not isinstance(area_dict, dict):
495            ferr("malformed entry for area '{}' (not a dict)"
496                 .format(area_name))
497
498        for key in area_dict:
499            if key not in ok_keys:
500                ferr("unknown key '{}' in area '{}'"
501                     .format(key, area_name))
502
503        if "status" in area_dict and \
504           area_dict["status"] not in ok_status:
505            ferr("bad 'status' key on area '{}', should be one of {}"
506                 .format(area_name, ok_status_s))
507
508        if not area_dict.keys() & {"files", "files-regex"}:
509            ferr("either 'files' or 'files-regex' (or both) must be specified "
510                 "for area '{}'".format(area_name))
511
512        if not area_dict.get("maintainers") and area_dict.get("status") == "maintained":
513            ferr("maintained area '{}' with no maintainers".format(area_name))
514
515        for list_name in "maintainers", "collaborators", "inform", "files", \
516                         "files-regex", "labels", "tags", "tests":
517            if list_name in area_dict:
518                lst = area_dict[list_name]
519                if not (isinstance(lst, list) and
520                        all(isinstance(elm, str) for elm in lst)):
521                    ferr("malformed '{}' value for area '{}' -- should "
522                         "be a list of strings".format(list_name, area_name))
523
524        for files_key in "files", "files-exclude":
525            if files_key in area_dict:
526                for glob_pattern in area_dict[files_key]:
527                    # This could be changed if it turns out to be too slow,
528                    # e.g. to only check non-globbing filenames. The tuple() is
529                    # needed due to pathlib's glob() returning a generator.
530                    paths = tuple(root.glob(glob_pattern))
531                    if not paths:
532                        ferr("glob pattern '{}' in '{}' in area '{}' does not "
533                             "match any files".format(glob_pattern, files_key,
534                                                      area_name))
535                    if not glob_pattern.endswith("/"):
536                        if all(path.is_dir() for path in paths):
537                            ferr("glob pattern '{}' in '{}' in area '{}' "
538                                     "matches only directories, but has no "
539                                     "trailing '/'"
540                                     .format(glob_pattern, files_key,
541                                             area_name))
542
543        for files_regex_key in "files-regex", "files-regex-exclude":
544            if files_regex_key in area_dict:
545                for regex in area_dict[files_regex_key]:
546                    try:
547                        re.compile(regex)
548                    except re.error as e:
549                        ferr("bad regular expression '{}' in '{}' in "
550                             "'{}': {}".format(regex, files_regex_key,
551                                               area_name, e.msg))
552
553        if "description" in area_dict and \
554           not isinstance(area_dict["description"], str):
555            ferr("malformed 'description' value for area '{}' -- should be a "
556                 "string".format(area_name))
557
558
559def _git(*args):
560    # Helper for running a Git command. Returns the rstrip()ed stdout output.
561    # Called like git("diff"). Exits with SystemError (raised by sys.exit()) on
562    # errors.
563
564    git_cmd = ("git",) + args
565    git_cmd_s = " ".join(shlex.quote(word) for word in git_cmd)  # For errors
566
567    try:
568        git_process = subprocess.Popen(
569            git_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
570    except FileNotFoundError:
571        _giterr("git executable not found (when running '{}'). Check that "
572                "it's in listed in the PATH environment variable"
573                .format(git_cmd_s))
574    except OSError as e:
575        _giterr("error running '{}': {}".format(git_cmd_s, e))
576
577    stdout, stderr = git_process.communicate()
578    if git_process.returncode:
579        _giterr("error running '{}'\n\nstdout:\n{}\nstderr:\n{}".format(
580            git_cmd_s, stdout.decode("utf-8"), stderr.decode("utf-8")))
581
582    return stdout.decode("utf-8").rstrip()
583
584
585def _ls_files(path=None):
586    cmd = ["ls-files"]
587    if path is not None:
588        cmd.append(path)
589    return _git(*cmd).splitlines()
590
591
592def _err(msg):
593    raise MaintainersError(msg)
594
595
596def _giterr(msg):
597    raise GitError(msg)
598
599
600def _serr(msg):
601    # For reporting errors when get_maintainer.py is run as a script.
602    # sys.exit() shouldn't be used otherwise.
603    sys.exit("{}: error: {}".format(sys.argv[0], msg))
604
605
606class MaintainersError(Exception):
607    "Exception raised for MAINTAINERS.yml-related errors"
608
609
610class GitError(Exception):
611    "Exception raised for Git-related errors"
612
613
614if __name__ == "__main__":
615    _main()
616