1#!/usr/bin/env python3
2
3# Copyright (c) 2019 Nordic Semiconductor ASA
4# SPDX-License-Identifier: Apache-2.0
5
6"""
7Lists maintainers for files or commits. Similar in function to
8scripts/get_maintainer.pl from Linux, but geared towards GitHub. The mapping is
9in MAINTAINERS.yml.
10
11The comment at the top of MAINTAINERS.yml in Zephyr documents the file format.
12
13See the help texts for the various subcommands for more information. They can
14be viewed with e.g.
15
16    ./get_maintainer.py path --help
17
18This executable doubles as a Python library. Identifiers not prefixed with '_'
19are part of the library API. The library documentation can be viewed with this
20command:
21
22    $ pydoc get_maintainer
23"""
24
25import argparse
26import operator
27import os
28import pathlib
29import re
30import shlex
31import subprocess
32import sys
33
34from yaml import load, YAMLError
35try:
36    # Use the speedier C LibYAML parser if available
37    from yaml import CLoader as Loader
38except ImportError:
39    from yaml import Loader
40
41
42def _main():
43    # Entry point when run as an executable
44
45    args = _parse_args()
46    try:
47        args.cmd_fn(Maintainers(args.maintainers), args)
48    except (MaintainersError, GitError) as e:
49        _serr(e)
50
51
52def _parse_args():
53    # Parses arguments when run as an executable
54
55    parser = argparse.ArgumentParser(
56        formatter_class=argparse.RawDescriptionHelpFormatter,
57        description=__doc__)
58
59    parser.add_argument(
60        "-m", "--maintainers",
61        metavar="MAINTAINERS_FILE",
62        help="Maintainers file to load. If not specified, MAINTAINERS.yml in "
63             "the top-level repository directory is used, and must exist. "
64             "Paths in the maintainers file will always be taken as relative "
65             "to the top-level directory.")
66
67    subparsers = parser.add_subparsers(
68        help="Available commands (each has a separate --help text)")
69
70    id_parser = subparsers.add_parser(
71        "path",
72        help="List area(s) for paths")
73    id_parser.add_argument(
74        "paths",
75        metavar="PATH",
76        nargs="*",
77        help="Path to list areas for")
78    id_parser.set_defaults(cmd_fn=Maintainers._path_cmd)
79
80    commits_parser = subparsers.add_parser(
81        "commits",
82        help="List area(s) for commit range")
83    commits_parser.add_argument(
84        "commits",
85        metavar="COMMIT_RANGE",
86        nargs="*",
87        help="Commit range to list areas for (default: HEAD~..)")
88    commits_parser.set_defaults(cmd_fn=Maintainers._commits_cmd)
89
90    list_parser = subparsers.add_parser(
91        "list",
92        help="List files in areas")
93    list_parser.add_argument(
94        "area",
95        metavar="AREA",
96        nargs="?",
97        help="Name of area to list files in. If not specified, all "
98             "non-orphaned files are listed (all files that do not appear in "
99             "any area).")
100    list_parser.set_defaults(cmd_fn=Maintainers._list_cmd)
101
102    areas_parser = subparsers.add_parser(
103        "areas",
104        help="List areas and maintainers")
105    areas_parser.add_argument(
106        "maintainer",
107        metavar="MAINTAINER",
108        nargs="?",
109        help="List all areas maintained by maintaier.")
110
111    areas_parser.set_defaults(cmd_fn=Maintainers._areas_cmd)
112
113    orphaned_parser = subparsers.add_parser(
114        "orphaned",
115        help="List orphaned files (files that do not appear in any area)")
116    orphaned_parser.add_argument(
117        "path",
118        metavar="PATH",
119        nargs="?",
120        help="Limit to files under PATH")
121    orphaned_parser.set_defaults(cmd_fn=Maintainers._orphaned_cmd)
122
123    args = parser.parse_args()
124    if not hasattr(args, "cmd_fn"):
125        # Called without a subcommand
126        sys.exit(parser.format_usage().rstrip())
127
128    return args
129
130
131class Maintainers:
132    """
133    Represents the contents of a maintainers YAML file.
134
135    These attributes are available:
136
137    areas:
138        A dictionary that maps area names to Area instances, for all areas
139        defined in the maintainers file
140
141    filename:
142        The path to the maintainers file
143    """
144    def __init__(self, filename=None):
145        """
146        Creates a Maintainers instance.
147
148        filename (default: None):
149            Path to the maintainers file to parse. If None, MAINTAINERS.yml in
150            the top-level directory of the Git repository is used, and must
151            exist.
152        """
153        self._toplevel = pathlib.Path(_git("rev-parse", "--show-toplevel"))
154
155        if filename is None:
156            self.filename = self._toplevel / "MAINTAINERS.yml"
157        else:
158            self.filename = pathlib.Path(filename)
159
160        self.areas = {}
161        for area_name, area_dict in _load_maintainers(self.filename).items():
162            area = Area()
163            area.name = area_name
164            area.status = area_dict.get("status")
165            area.maintainers = area_dict.get("maintainers", [])
166            area.collaborators = area_dict.get("collaborators", [])
167            area.inform = area_dict.get("inform", [])
168            area.labels = area_dict.get("labels", [])
169            area.description = area_dict.get("description")
170
171            # area._match_fn(path) tests if the path matches files and/or
172            # files-regex
173            area._match_fn = \
174                _get_match_fn(area_dict.get("files"),
175                              area_dict.get("files-regex"))
176
177            # Like area._match_fn(path), but for files-exclude and
178            # files-regex-exclude
179            area._exclude_match_fn = \
180                _get_match_fn(area_dict.get("files-exclude"),
181                              area_dict.get("files-regex-exclude"))
182
183            self.areas[area_name] = area
184
185    def path2areas(self, path):
186        """
187        Returns a list of Area instances for the areas that contain 'path',
188        taken as relative to the current directory
189        """
190        # Make directory paths end in '/' so that foo/bar matches foo/bar/.
191        # Skip this check in _contains() itself, because the isdir() makes it
192        # twice as slow in cases where it's not needed.
193        is_dir = os.path.isdir(path)
194
195        # Make 'path' relative to the repository root and normalize it.
196        # normpath() would remove a trailing '/', so we add it afterwards.
197        path = os.path.normpath(os.path.join(
198            os.path.relpath(os.getcwd(), self._toplevel),
199            path))
200
201        if is_dir:
202            path += "/"
203
204        return [area for area in self.areas.values()
205                if area._contains(path)]
206
207    def commits2areas(self, commits):
208        """
209        Returns a set() of Area instances for the areas that contain files that
210        are modified by the commit range in 'commits'. 'commits' could be e.g.
211        "HEAD~..", to inspect the tip commit
212        """
213        res = set()
214        # Final '--' is to make sure 'commits' is interpreted as a commit range
215        # rather than a path. That might give better error messages.
216        for path in _git("diff", "--name-only", commits, "--").splitlines():
217            res.update(self.path2areas(path))
218        return res
219
220    def __repr__(self):
221        return "<Maintainers for '{}'>".format(self.filename)
222
223    #
224    # Command-line subcommands
225    #
226
227    def _path_cmd(self, args):
228        # 'path' subcommand implementation
229
230        for path in args.paths:
231            if not os.path.exists(path):
232                _serr("'{}': no such file or directory".format(path))
233
234        res = set()
235        orphaned = []
236        for path in args.paths:
237            areas = self.path2areas(path)
238            res.update(areas)
239            if not areas:
240                orphaned.append(path)
241
242        _print_areas(res)
243        if orphaned:
244            if res:
245                print()
246            print("Orphaned paths (not in any area):\n" + "\n".join(orphaned))
247
248    def _commits_cmd(self, args):
249        # 'commits' subcommand implementation
250
251        commits = args.commits or ("HEAD~..",)
252        _print_areas({area for commit_range in commits
253                           for area in self.commits2areas(commit_range)})
254
255    def _areas_cmd(self, args):
256        # 'areas' subcommand implementation
257        for area in self.areas.values():
258            if args.maintainer:
259                if args.maintainer in area.maintainers:
260                    print("{:25}\t{}".format(area.name, ",".join(area.maintainers)))
261            else:
262                print("{:25}\t{}".format(area.name, ",".join(area.maintainers)))
263
264    def _list_cmd(self, args):
265        # 'list' subcommand implementation
266
267        if args.area is None:
268            # List all files that appear in some area
269            for path in _ls_files():
270                for area in self.areas.values():
271                    if area._contains(path):
272                        print(path)
273                        break
274        else:
275            # List all files that appear in the given area
276            area = self.areas.get(args.area)
277            if area is None:
278                _serr("'{}': no such area defined in '{}'"
279                      .format(args.area, self.filename))
280
281            for path in _ls_files():
282                if area._contains(path):
283                    print(path)
284
285    def _orphaned_cmd(self, args):
286        # 'orphaned' subcommand implementation
287
288        if args.path is not None and not os.path.exists(args.path):
289            _serr("'{}': no such file or directory".format(args.path))
290
291        for path in _ls_files(args.path):
292            for area in self.areas.values():
293                if area._contains(path):
294                    break
295            else:
296                print(path)  # We get here if we never hit the 'break'
297
298
299class Area:
300    """
301    Represents an entry for an area in MAINTAINERS.yml.
302
303    These attributes are available:
304
305    status:
306        The status of the area, as a string. None if the area has no 'status'
307        key. See MAINTAINERS.yml.
308
309    maintainers:
310        List of maintainers. Empty if the area has no 'maintainers' key.
311
312    collaborators:
313        List of collaborators. Empty if the area has no 'collaborators' key.
314
315    inform:
316        List of people to inform on pull requests. Empty if the area has no
317        'inform' key.
318
319    labels:
320        List of GitHub labels for the area. Empty if the area has no 'labels'
321        key.
322
323    description:
324        Text from 'description' key, or None if the area has no 'description'
325        key
326    """
327    def _contains(self, path):
328        # Returns True if the area contains 'path', and False otherwise
329
330        return self._match_fn and self._match_fn(path) and not \
331            (self._exclude_match_fn and self._exclude_match_fn(path))
332
333    def __repr__(self):
334        return "<Area {}>".format(self.name)
335
336
337def _print_areas(areas):
338    first = True
339    for area in sorted(areas, key=operator.attrgetter("name")):
340        if not first:
341            print()
342        first = False
343
344        print("""\
345{}
346\tstatus: {}
347\tmaintainers: {}
348\tcollaborators: {}
349\tinform: {}
350\tlabels: {}
351\tdescription: {}""".format(area.name,
352                            area.status,
353                            ", ".join(area.maintainers),
354                            ", ".join(area.collaborators),
355                            ", ".join(area.inform),
356                            ", ".join(area.labels),
357                            area.description or ""))
358
359
360def _get_match_fn(globs, regexes):
361    # Constructs a single regex that tests for matches against the globs in
362    # 'globs' and the regexes in 'regexes'. Parts are joined with '|' (OR).
363    # Returns the search() method of the compiled regex.
364    #
365    # Returns None if there are neither globs nor regexes, which should be
366    # interpreted as no match.
367
368    if not (globs or regexes):
369        return None
370
371    regex = ""
372
373    if globs:
374        glob_regexes = []
375        for glob in globs:
376            # Construct a regex equivalent to the glob
377            glob_regex = glob.replace(".", "\\.").replace("*", "[^/]*") \
378                             .replace("?", "[^/]")
379
380            if not glob.endswith("/"):
381                # Require a full match for globs that don't end in /
382                glob_regex += "$"
383
384            glob_regexes.append(glob_regex)
385
386        # The glob regexes must anchor to the beginning of the path, since we
387        # return search(). (?:) is a non-capturing group.
388        regex += "^(?:{})".format("|".join(glob_regexes))
389
390    if regexes:
391        if regex:
392            regex += "|"
393        regex += "|".join(regexes)
394
395    return re.compile(regex).search
396
397
398def _load_maintainers(path):
399    # Returns the parsed contents of the maintainers file 'filename', also
400    # running checks on the contents. The returned format is plain Python
401    # dicts/lists/etc., mirroring the structure of the file.
402
403    with open(path, encoding="utf-8") as f:
404        try:
405            yaml = load(f, Loader=Loader)
406        except YAMLError as e:
407            raise MaintainersError("{}: YAML error: {}".format(path, e))
408
409        _check_maintainers(path, yaml)
410        return yaml
411
412
413def _check_maintainers(maints_path, yaml):
414    # Checks the maintainers data in 'yaml', which comes from the maintainers
415    # file at maints_path, which is a pathlib.Path instance
416
417    root = maints_path.parent
418
419    def ferr(msg):
420        _err("{}: {}".format(maints_path, msg))  # Prepend the filename
421
422    if not isinstance(yaml, dict):
423        ferr("empty or malformed YAML (not a dict)")
424
425    ok_keys = {"status", "maintainers", "collaborators", "inform", "files",
426               "files-exclude", "files-regex", "files-regex-exclude",
427               "labels", "description"}
428
429    ok_status = {"maintained", "odd fixes", "orphaned", "obsolete"}
430    ok_status_s = ", ".join('"' + s + '"' for s in ok_status)  # For messages
431
432    for area_name, area_dict in yaml.items():
433        if not isinstance(area_dict, dict):
434            ferr("malformed entry for area '{}' (not a dict)"
435                 .format(area_name))
436
437        for key in area_dict:
438            if key not in ok_keys:
439                ferr("unknown key '{}' in area '{}'"
440                     .format(key, area_name))
441
442        if "status" in area_dict and \
443           area_dict["status"] not in ok_status:
444            ferr("bad 'status' key on area '{}', should be one of {}"
445                 .format(area_name, ok_status_s))
446
447        if not area_dict.keys() & {"files", "files-regex"}:
448            ferr("either 'files' or 'files-regex' (or both) must be specified "
449                 "for area '{}'".format(area_name))
450
451        for list_name in "maintainers", "collaborators", "inform", "files", \
452                         "files-regex", "labels":
453            if list_name in area_dict:
454                lst = area_dict[list_name]
455                if not (isinstance(lst, list) and
456                        all(isinstance(elm, str) for elm in lst)):
457                    ferr("malformed '{}' value for area '{}' -- should "
458                         "be a list of strings".format(list_name, area_name))
459
460        for files_key in "files", "files-exclude":
461            if files_key in area_dict:
462                for glob_pattern in area_dict[files_key]:
463                    # This could be changed if it turns out to be too slow,
464                    # e.g. to only check non-globbing filenames. The tuple() is
465                    # needed due to pathlib's glob() returning a generator.
466                    paths = tuple(root.glob(glob_pattern))
467                    if not paths:
468                        ferr("glob pattern '{}' in '{}' in area '{}' does not "
469                             "match any files".format(glob_pattern, files_key,
470                                                      area_name))
471                    if not glob_pattern.endswith("/"):
472                        for path in paths:
473                            if path.is_dir():
474                                ferr("glob pattern '{}' in '{}' in area '{}' "
475                                     "matches a directory, but has no "
476                                     "trailing '/'"
477                                     .format(glob_pattern, files_key,
478                                             area_name))
479
480        for files_regex_key in "files-regex", "files-regex-exclude":
481            if files_regex_key in area_dict:
482                for regex in area_dict[files_regex_key]:
483                    try:
484                        re.compile(regex)
485                    except re.error as e:
486                        ferr("bad regular expression '{}' in '{}' in "
487                             "'{}': {}".format(regex, files_regex_key,
488                                               area_name, e.msg))
489
490        if "description" in area_dict and \
491           not isinstance(area_dict["description"], str):
492            ferr("malformed 'description' value for area '{}' -- should be a "
493                 "string".format(area_name))
494
495
496def _git(*args):
497    # Helper for running a Git command. Returns the rstrip()ed stdout output.
498    # Called like git("diff"). Exits with SystemError (raised by sys.exit()) on
499    # errors.
500
501    git_cmd = ("git",) + args
502    git_cmd_s = " ".join(shlex.quote(word) for word in git_cmd)  # For errors
503
504    try:
505        git_process = subprocess.Popen(
506            git_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
507    except FileNotFoundError:
508        _giterr("git executable not found (when running '{}'). Check that "
509                "it's in listed in the PATH environment variable"
510                .format(git_cmd_s))
511    except OSError as e:
512        _giterr("error running '{}': {}".format(git_cmd_s, e))
513
514    stdout, stderr = git_process.communicate()
515    if git_process.returncode:
516        _giterr("error running '{}'\n\nstdout:\n{}\nstderr:\n{}".format(
517            git_cmd_s, stdout.decode("utf-8"), stderr.decode("utf-8")))
518
519    return stdout.decode("utf-8").rstrip()
520
521
522def _ls_files(path=None):
523    cmd = ["ls-files"]
524    if path is not None:
525        cmd.append(path)
526    return _git(*cmd).splitlines()
527
528
529def _err(msg):
530    raise MaintainersError(msg)
531
532
533def _giterr(msg):
534    raise GitError(msg)
535
536
537def _serr(msg):
538    # For reporting errors when get_maintainer.py is run as a script.
539    # sys.exit() shouldn't be used otherwise.
540    sys.exit("{}: error: {}".format(sys.argv[0], msg))
541
542
543class MaintainersError(Exception):
544    "Exception raised for MAINTAINERS.yml-related errors"
545
546
547class GitError(Exception):
548    "Exception raised for Git-related errors"
549
550
551if __name__ == "__main__":
552    _main()
553