1#!/usr/bin/env python3
2
3# Copyright (c) 2019 Nordic Semiconductor ASA
4# SPDX-License-Identifier: Apache-2.0
5
6"""
7Lists maintainers for files or commits. Similar in function to
8scripts/get_maintainer.pl from Linux, but geared towards GitHub. The mapping is
9in MAINTAINERS.yml.
10
11The comment at the top of MAINTAINERS.yml in Zephyr documents the file format.
12
13See the help texts for the various subcommands for more information. They can
14be viewed with e.g.
15
16    ./get_maintainer.py path --help
17
18This executable doubles as a Python library. Identifiers not prefixed with '_'
19are part of the library API. The library documentation can be viewed with this
20command:
21
22    $ pydoc get_maintainer
23"""
24
25import argparse
26import operator
27import os
28import pathlib
29import re
30import shlex
31import subprocess
32import sys
33from tabulate import tabulate
34
35from yaml import load, YAMLError
36try:
37    # Use the speedier C LibYAML parser if available
38    from yaml import CSafeLoader as SafeLoader
39except ImportError:
40    from yaml import SafeLoader
41
42
43def _main():
44    # Entry point when run as an executable
45
46    args = _parse_args()
47    try:
48        args.cmd_fn(Maintainers(args.maintainers), args)
49    except (MaintainersError, GitError) as e:
50        _serr(e)
51
52
53def _parse_args():
54    # Parses arguments when run as an executable
55
56    parser = argparse.ArgumentParser(
57        formatter_class=argparse.RawDescriptionHelpFormatter,
58        description=__doc__, allow_abbrev=False)
59
60    parser.add_argument(
61        "-m", "--maintainers",
62        metavar="MAINTAINERS_FILE",
63        help="Maintainers file to load. If not specified, MAINTAINERS.yml in "
64             "the top-level repository directory is used, and must exist. "
65             "Paths in the maintainers file will always be taken as relative "
66             "to the top-level directory.")
67
68    subparsers = parser.add_subparsers(
69        help="Available commands (each has a separate --help text)")
70
71    id_parser = subparsers.add_parser(
72        "path",
73        help="List area(s) for paths")
74    id_parser.add_argument(
75        "paths",
76        metavar="PATH",
77        nargs="*",
78        help="Path to list areas for")
79    id_parser.set_defaults(cmd_fn=Maintainers._path_cmd)
80
81    commits_parser = subparsers.add_parser(
82        "commits",
83        help="List area(s) for commit range")
84    commits_parser.add_argument(
85        "commits",
86        metavar="COMMIT_RANGE",
87        nargs="*",
88        help="Commit range to list areas for (default: HEAD~..)")
89    commits_parser.set_defaults(cmd_fn=Maintainers._commits_cmd)
90
91    list_parser = subparsers.add_parser(
92        "list",
93        help="List files in areas")
94    list_parser.add_argument(
95        "area",
96        metavar="AREA",
97        nargs="?",
98        help="Name of area to list files in. If not specified, all "
99             "non-orphaned files are listed (all files that do not appear in "
100             "any area).")
101    list_parser.set_defaults(cmd_fn=Maintainers._list_cmd)
102
103    areas_parser = subparsers.add_parser(
104        "areas",
105        help="List areas and maintainers")
106    areas_parser.add_argument(
107        "maintainer",
108        metavar="MAINTAINER",
109        nargs="?",
110        help="List all areas maintained by maintainer.")
111
112
113    area_parser = subparsers.add_parser(
114        "area",
115        help="List area(s) by name")
116    area_parser.add_argument(
117        "name",
118        metavar="AREA",
119        nargs="?",
120        help="List all areas with the given name.")
121
122    area_parser.set_defaults(cmd_fn=Maintainers._area_cmd)
123
124    # New arguments for filtering
125    areas_parser.add_argument(
126        "--without-maintainers",
127        action="store_true",
128        help="Exclude areas that have maintainers")
129    areas_parser.add_argument(
130        "--without-collaborators",
131        action="store_true",
132        help="Exclude areas that have collaborators")
133
134    areas_parser.set_defaults(cmd_fn=Maintainers._areas_cmd)
135
136    orphaned_parser = subparsers.add_parser(
137        "orphaned",
138        help="List orphaned files (files that do not appear in any area)")
139    orphaned_parser.add_argument(
140        "path",
141        metavar="PATH",
142        nargs="?",
143        help="Limit to files under PATH")
144    orphaned_parser.set_defaults(cmd_fn=Maintainers._orphaned_cmd)
145
146    count_parser = subparsers.add_parser(
147        "count",
148        help="Count areas, unique maintainers, and / or unique collaborators")
149    count_parser.add_argument(
150        "-a",
151        "--count-areas",
152        action="store_true",
153        help="Count the number of areas")
154    count_parser.add_argument(
155        "-c",
156        "--count-collaborators",
157        action="store_true",
158        help="Count the number of unique collaborators")
159    count_parser.add_argument(
160        "-n",
161        "--count-maintainers",
162        action="store_true",
163        help="Count the number of unique maintainers")
164    count_parser.add_argument(
165        "-o",
166        "--count-unmaintained",
167        action="store_true",
168        help="Count the number of unmaintained areas")
169    count_parser.set_defaults(cmd_fn=Maintainers._count_cmd)
170
171    args = parser.parse_args()
172    if not hasattr(args, "cmd_fn"):
173        # Called without a subcommand
174        sys.exit(parser.format_usage().rstrip())
175
176    return args
177
178
179class Maintainers:
180    """
181    Represents the contents of a maintainers YAML file.
182
183    These attributes are available:
184
185    areas:
186        A dictionary that maps area names to Area instances, for all areas
187        defined in the maintainers file
188
189    filename:
190        The path to the maintainers file
191    """
192    def __init__(self, filename=None):
193        """
194        Creates a Maintainers instance.
195
196        filename (default: None):
197            Path to the maintainers file to parse. If None, MAINTAINERS.yml in
198            the top-level directory of the Git repository is used, and must
199            exist.
200        """
201        if (filename is not None) and (pathlib.Path(filename).exists()):
202            self.filename = pathlib.Path(filename)
203            self._toplevel = self.filename.parent
204        else:
205            self._toplevel = pathlib.Path(_git("rev-parse", "--show-toplevel"))
206            self.filename = self._toplevel / "MAINTAINERS.yml"
207
208        self.areas = {}
209        for area_name, area_dict in _load_maintainers(self.filename).items():
210            area = Area()
211            area.name = area_name
212            area.status = area_dict.get("status")
213            area.maintainers = area_dict.get("maintainers", [])
214            area.collaborators = area_dict.get("collaborators", [])
215            area.inform = area_dict.get("inform", [])
216            area.labels = area_dict.get("labels", [])
217            area.tests = area_dict.get("tests", [])
218            area.tags = area_dict.get("tags", [])
219            area.description = area_dict.get("description")
220
221            # Initialize file groups if present
222            area.file_groups = []
223            if "file-groups" in area_dict:
224                for group_dict in area_dict["file-groups"]:
225                    file_group = FileGroup()
226                    file_group.name = group_dict.get("name", "Unnamed Group")
227                    file_group.description = group_dict.get("description")
228                    file_group.collaborators = group_dict.get("collaborators", [])
229
230                    # Create match functions for this file group
231                    file_group._match_fn = \
232                        _get_match_fn(group_dict.get("files"),
233                                      group_dict.get("files-regex"))
234
235                    file_group._exclude_match_fn = \
236                        _get_match_fn(group_dict.get("files-exclude"),
237                                      group_dict.get("files-regex-exclude"))
238
239                    # Store reference to parent area for inheritance
240                    file_group._parent_area = area
241
242                    area.file_groups.append(file_group)
243
244            # area._match_fn(path) tests if the path matches files and/or
245            # files-regex
246            area._match_fn = \
247                _get_match_fn(area_dict.get("files"),
248                              area_dict.get("files-regex"))
249
250            # Like area._match_fn(path), but for files-exclude and
251            # files-regex-exclude
252            area._exclude_match_fn = \
253                _get_match_fn(area_dict.get("files-exclude"),
254                              area_dict.get("files-regex-exclude"))
255
256            self.areas[area_name] = area
257
258    def name2areas(self, name):
259        """
260        Returns a list of Area instances for the areas that match 'name'.
261        """
262        return [area for area in self.areas.values() if area.name == name]
263
264    def path2areas(self, path):
265        """
266        Returns a list of Area instances for the areas that contain 'path',
267        taken as relative to the current directory
268        """
269        # Make directory paths end in '/' so that foo/bar matches foo/bar/.
270        # Skip this check in _contains() itself, because the isdir() makes it
271        # twice as slow in cases where it's not needed.
272        is_dir = os.path.isdir(path)
273
274        # Make 'path' relative to the repository root and normalize it.
275        # normpath() would remove a trailing '/', so we add it afterwards.
276        path = os.path.normpath(os.path.join(
277            os.path.relpath(os.getcwd(), self._toplevel),
278            path))
279
280        if is_dir:
281            path += "/"
282
283        return [area for area in self.areas.values()
284                if area._contains(path)]
285
286    def path2area_info(self, path):
287        """
288        Returns a list of tuples (Area, FileGroup) for the areas that contain 'path'.
289        FileGroup will be None if the path matches the area's general files rather
290        than a specific file group.
291        """
292        areas = self.path2areas(path)
293        result = []
294
295        # Make directory paths end in '/' so that foo/bar matches foo/bar/.
296        is_dir = os.path.isdir(path)
297
298        # Make 'path' relative to the repository root and normalize it.
299        path = os.path.normpath(os.path.join(
300            os.path.relpath(os.getcwd(), self._toplevel),
301            path))
302
303        if is_dir:
304            path += "/"
305
306        for area in areas:
307            file_group = area.get_file_group_for_path(path)
308            result.append((area, file_group))
309
310        return result
311
312    def commits2areas(self, commits):
313        """
314        Returns a set() of Area instances for the areas that contain files that
315        are modified by the commit range in 'commits'. 'commits' could be e.g.
316        "HEAD~..", to inspect the tip commit
317        """
318        res = set()
319        # Final '--' is to make sure 'commits' is interpreted as a commit range
320        # rather than a path. That might give better error messages.
321        for path in _git("diff", "--name-only", commits, "--").splitlines():
322            res.update(self.path2areas(path))
323        return res
324
325    def __repr__(self):
326        return "<Maintainers for '{}'>".format(self.filename)
327
328    #
329    # Command-line subcommands
330    #
331
332    def _area_cmd(self, args):
333        # 'area' subcommand implementation
334
335        res = set()
336        areas = self.name2areas(args.name)
337        res.update(areas)
338        _print_areas(res)
339
340    def _path_cmd(self, args):
341        # 'path' subcommand implementation
342
343        for path in args.paths:
344            if not os.path.exists(path):
345                _serr("'{}': no such file or directory".format(path))
346
347        res = set()
348        orphaned = []
349        for path in args.paths:
350            areas = self.path2areas(path)
351            res.update(areas)
352            if not areas:
353                orphaned.append(path)
354
355        _print_areas(res)
356        if orphaned:
357            if res:
358                print()
359            print("Orphaned paths (not in any area):\n" + "\n".join(orphaned))
360
361    def _commits_cmd(self, args):
362        # 'commits' subcommand implementation
363
364        commits = args.commits or ("HEAD~..",)
365        _print_areas({area for commit_range in commits
366                           for area in self.commits2areas(commit_range)})
367
368    def _areas_cmd(self, args):
369        # 'areas' subcommand implementation
370        def multiline(items):
371            # Each item on its own line, empty string if none
372            return "\n".join(items) if items else ""
373
374        table = []
375        for area in self.areas.values():
376            maintainers = multiline(area.maintainers)
377            collaborators = multiline(area.collaborators)
378
379            # Filter based on new arguments
380            if getattr(args, "without_maintainers", False) and area.maintainers:
381                continue
382            if getattr(args, "without_collaborators", False) and area.collaborators:
383                continue
384
385            if args.maintainer:
386                if args.maintainer in area.maintainers:
387                    table.append([
388                        area.name,
389                        maintainers,
390                        collaborators
391                    ])
392            else:
393                table.append([
394                    area.name,
395                    maintainers,
396                    collaborators
397                ])
398        if table:
399            print(tabulate(
400                table,
401                headers=["Area", "Maintainers", "Collaborators"],
402                tablefmt="grid",
403                stralign="left",
404                disable_numparse=True
405            ))
406
407    def _count_cmd(self, args):
408        # 'count' subcommand implementation
409
410        if not (args.count_areas or args.count_collaborators or args.count_maintainers or args.count_unmaintained):
411            # if no specific count is provided, print them all
412            args.count_areas = True
413            args.count_collaborators = True
414            args.count_maintainers = True
415            args.count_unmaintained = True
416
417        unmaintained = 0
418        collaborators = set()
419        maintainers = set()
420
421        for area in self.areas.values():
422            if area.status == 'maintained':
423                maintainers = maintainers.union(set(area.maintainers))
424            elif area.status == 'odd fixes':
425                unmaintained += 1
426            collaborators = collaborators.union(set(area.collaborators))
427
428        if args.count_areas:
429            print('{:14}\t{}'.format('areas:', len(self.areas)))
430        if args.count_maintainers:
431            print('{:14}\t{}'.format('maintainers:', len(maintainers)))
432        if args.count_collaborators:
433            print('{:14}\t{}'.format('collaborators:', len(collaborators)))
434        if args.count_unmaintained:
435            print('{:14}\t{}'.format('unmaintained:', unmaintained))
436
437    def _list_cmd(self, args):
438        # 'list' subcommand implementation
439
440        if args.area is None:
441            # List all files that appear in some area
442            for path in _ls_files():
443                for area in self.areas.values():
444                    if area._contains(path):
445                        print(path)
446                        break
447        else:
448            # List all files that appear in the given area
449            area = self.areas.get(args.area)
450            if area is None:
451                _serr("'{}': no such area defined in '{}'"
452                      .format(args.area, self.filename))
453
454            for path in _ls_files():
455                if area._contains(path):
456                    print(path)
457
458    def _orphaned_cmd(self, args):
459        # 'orphaned' subcommand implementation
460
461        if args.path is not None and not os.path.exists(args.path):
462            _serr("'{}': no such file or directory".format(args.path))
463
464        for path in _ls_files(args.path):
465            for area in self.areas.values():
466                if area._contains(path):
467                    break
468            else:
469                print(path)  # We get here if we never hit the 'break'
470
471
472class FileGroup:
473    """
474    Represents a file group within an area in MAINTAINERS.yml.
475
476    File groups inherit file patterns from their parent area. A file will only
477    match a file group if it first matches the parent area's patterns, and then
478    also matches the file group's own patterns. This allows file groups to
479    further filter and subdivide files that are already covered by the area.
480
481    These attributes are available:
482
483    name:
484        The name of the file group, as specified in the 'name' key
485
486    description:
487        Text from 'description' key, or None if the group has no 'description'
488
489    collaborators:
490        List of collaborators specific to this file group
491    """
492    def _parent_area_contains(self, path):
493        """
494        Returns True if the parent area contains 'path', False otherwise.
495        """
496        return (self._parent_area._match_fn and
497                self._parent_area._match_fn(path) and not
498                (self._parent_area._exclude_match_fn and
499                 self._parent_area._exclude_match_fn(path)))
500
501    def _contains(self, path):
502        # Returns True if the file group contains 'path', and False otherwise
503        # File groups inherit from their parent area - a file must match the
504        # parent area's patterns first, then the file group's patterns
505
506        # First check if the path matches the parent area's patterns
507        if not self._parent_area_contains(path):
508            return False
509
510        # Then check if it matches this file group's patterns
511        return self._match_fn and self._match_fn(path) and not \
512            (self._exclude_match_fn and self._exclude_match_fn(path))
513
514    def __repr__(self):
515        return "<FileGroup {}>".format(self.name)
516
517
518class Area:
519    """
520    Represents an entry for an area in MAINTAINERS.yml.
521
522    These attributes are available:
523
524    status:
525        The status of the area, as a string. None if the area has no 'status'
526        key. See MAINTAINERS.yml.
527
528    maintainers:
529        List of maintainers. Empty if the area has no 'maintainers' key.
530
531    collaborators:
532        List of collaborators. Empty if the area has no 'collaborators' key.
533
534    inform:
535        List of people to inform on pull requests. Empty if the area has no
536        'inform' key.
537
538    labels:
539        List of GitHub labels for the area. Empty if the area has no 'labels'
540        key.
541
542    description:
543        Text from 'description' key, or None if the area has no 'description'
544        key
545
546    file_groups:
547        List of FileGroup instances for any file-groups defined in the area.
548        Empty if the area has no 'file-groups' key.
549    """
550    def _contains(self, path):
551        # Returns True if the area contains 'path', and False otherwise
552        # First check if path matches any file groups - they take precedence
553        for file_group in self.file_groups:
554            if file_group._contains(path):
555                return True
556
557        # If no file group matches, check area-level patterns
558        return self._match_fn and self._match_fn(path) and not \
559            (self._exclude_match_fn and self._exclude_match_fn(path))
560
561    def get_collaborators_for_path(self, path):
562        """
563        Returns a list of collaborators for a specific path.
564        If the path matches a file group, returns the file group's collaborators.
565        Otherwise, returns the area's general collaborators.
566        """
567        # Check file groups first
568        for file_group in self.file_groups:
569            if file_group._contains(path):
570                return file_group.collaborators
571
572        # Return general area collaborators if no file group matches
573        return self.collaborators
574
575    def get_file_group_for_path(self, path):
576        """
577        Returns the FileGroup instance that contains the given path,
578        or None if the path doesn't match any file group.
579        """
580        for file_group in self.file_groups:
581            if file_group._contains(path):
582                return file_group
583        return None
584
585    def __repr__(self):
586        return "<Area {}>".format(self.name)
587
588
589def _print_areas(areas):
590    first = True
591    for area in sorted(areas, key=operator.attrgetter("name")):
592        if not first:
593            print()
594        first = False
595
596        print("""\
597{}
598\tstatus: {}
599\tmaintainers: {}
600\tcollaborators: {}
601\tinform: {}
602\tlabels: {}
603\ttests: {}
604\ttags: {}
605\tdescription: {}""".format(area.name,
606                            area.status,
607                            ", ".join(area.maintainers),
608                            ", ".join(area.collaborators),
609                            ", ".join(area.inform),
610                            ", ".join(area.labels),
611                            ", ".join(area.tests),
612                            ", ".join(area.tags),
613                            area.description or ""))
614
615        # Print file groups if any exist
616        if area.file_groups:
617            print("\tfile-groups:")
618            for file_group in area.file_groups:
619                print("\t\t{}: {}".format(
620                    file_group.name,
621                    ", ".join(file_group.collaborators) if file_group.collaborators else "no collaborators"
622                ))
623                if file_group.description:
624                    print("\t\t  description: {}".format(file_group.description))
625
626
627def _get_match_fn(globs, regexes):
628    # Constructs a single regex that tests for matches against the globs in
629    # 'globs' and the regexes in 'regexes'. Parts are joined with '|' (OR).
630    # Returns the search() method of the compiled regex.
631    #
632    # Returns None if there are neither globs nor regexes, which should be
633    # interpreted as no match.
634
635    if not (globs or regexes):
636        return None
637
638    regex = ""
639
640    if globs:
641        glob_regexes = []
642        for glob in globs:
643            # Construct a regex equivalent to the glob
644            glob_regex = glob.replace(".", "\\.").replace("*", "[^/]*") \
645                             .replace("?", "[^/]")
646
647            if not glob.endswith("/"):
648                # Require a full match for globs that don't end in /
649                glob_regex += "$"
650
651            glob_regexes.append(glob_regex)
652
653        # The glob regexes must anchor to the beginning of the path, since we
654        # return search(). (?:) is a non-capturing group.
655        regex += "^(?:{})".format("|".join(glob_regexes))
656
657    if regexes:
658        if regex:
659            regex += "|"
660        regex += "|".join(regexes)
661
662    return re.compile(regex).search
663
664
665def _load_maintainers(path):
666    # Returns the parsed contents of the maintainers file 'filename', also
667    # running checks on the contents. The returned format is plain Python
668    # dicts/lists/etc., mirroring the structure of the file.
669
670    with open(path, encoding="utf-8") as f:
671        try:
672            yaml = load(f, Loader=SafeLoader)
673        except YAMLError as e:
674            raise MaintainersError("{}: YAML error: {}".format(path, e))
675
676        _check_maintainers(path, yaml)
677        return yaml
678
679
680def _check_maintainers(maints_path, yaml):
681    # Checks the maintainers data in 'yaml', which comes from the maintainers
682    # file at maints_path, which is a pathlib.Path instance
683
684    root = maints_path.parent
685
686    def ferr(msg):
687        _err("{}: {}".format(maints_path, msg))  # Prepend the filename
688
689    if not isinstance(yaml, dict):
690        ferr("empty or malformed YAML (not a dict)")
691
692    ok_keys = {"status", "maintainers", "collaborators", "inform", "files",
693               "files-exclude", "files-regex", "files-regex-exclude",
694               "labels", "description", "tests", "tags", "file-groups"}
695
696    ok_status = {"maintained", "odd fixes", "unmaintained", "obsolete"}
697    ok_status_s = ", ".join('"' + s + '"' for s in ok_status)  # For messages
698
699    for area_name, area_dict in yaml.items():
700        if not isinstance(area_dict, dict):
701            ferr("malformed entry for area '{}' (not a dict)"
702                 .format(area_name))
703
704        for key in area_dict:
705            if key not in ok_keys:
706                ferr("unknown key '{}' in area '{}'"
707                     .format(key, area_name))
708
709        if "status" in area_dict and \
710           area_dict["status"] not in ok_status:
711            ferr("bad 'status' key on area '{}', should be one of {}"
712                 .format(area_name, ok_status_s))
713
714        if not area_dict.keys() & {"files", "files-regex", "file-groups"}:
715            ferr("either 'files', 'files-regex', or 'file-groups' (or combinations) must be specified "
716                 "for area '{}'".format(area_name))
717
718        if not area_dict.get("maintainers") and area_dict.get("status") == "maintained":
719            ferr("maintained area '{}' with no maintainers".format(area_name))
720
721        for list_name in "maintainers", "collaborators", "inform", "files", \
722                         "files-regex", "labels", "tags", "tests":
723            if list_name in area_dict:
724                lst = area_dict[list_name]
725                if not (isinstance(lst, list) and
726                        all(isinstance(elm, str) for elm in lst)):
727                    ferr("malformed '{}' value for area '{}' -- should "
728                         "be a list of strings".format(list_name, area_name))
729
730        for files_key in "files", "files-exclude":
731            if files_key in area_dict:
732                for glob_pattern in area_dict[files_key]:
733                    # This could be changed if it turns out to be too slow,
734                    # e.g. to only check non-globbing filenames. The tuple() is
735                    # needed due to pathlib's glob() returning a generator.
736                    paths = tuple(root.glob(glob_pattern))
737                    if not paths:
738                        ferr("glob pattern '{}' in '{}' in area '{}' does not "
739                             "match any files".format(glob_pattern, files_key,
740                                                      area_name))
741                    if not glob_pattern.endswith("/"):
742                        if all(path.is_dir() for path in paths):
743                            ferr("glob pattern '{}' in '{}' in area '{}' "
744                                     "matches only directories, but has no "
745                                     "trailing '/'"
746                                     .format(glob_pattern, files_key,
747                                             area_name))
748
749        for files_regex_key in "files-regex", "files-regex-exclude":
750            if files_regex_key in area_dict:
751                for regex in area_dict[files_regex_key]:
752                    try:
753                        re.compile(regex)
754                    except re.error as e:
755                        ferr("bad regular expression '{}' in '{}' in "
756                             "'{}': {}".format(regex, files_regex_key,
757                                               area_name, e.msg))
758
759        # Validate file-groups structure
760        if "file-groups" in area_dict:
761            file_groups = area_dict["file-groups"]
762            if not isinstance(file_groups, list):
763                ferr("malformed 'file-groups' value for area '{}' -- should be a list"
764                     .format(area_name))
765
766            ok_group_keys = {"name", "description", "collaborators", "files",
767                           "files-exclude", "files-regex", "files-regex-exclude"}
768
769            for i, group_dict in enumerate(file_groups):
770                if not isinstance(group_dict, dict):
771                    ferr("malformed file group {} in area '{}' -- should be a dict"
772                         .format(i, area_name))
773
774                for key in group_dict:
775                    if key not in ok_group_keys:
776                        ferr("unknown key '{}' in file group {} in area '{}'"
777                             .format(key, i, area_name))
778
779                # Each file group must have either files or files-regex
780                if not group_dict.keys() & {"files", "files-regex"}:
781                    ferr("file group {} in area '{}' must specify either 'files' or 'files-regex'"
782                         .format(i, area_name))
783
784                # Validate string fields in file groups
785                for str_field in ["name", "description"]:
786                    if str_field in group_dict and not isinstance(group_dict[str_field], str):
787                        ferr("malformed '{}' in file group {} in area '{}' -- should be a string"
788                             .format(str_field, i, area_name))
789
790                # Validate list fields in file groups
791                for list_field in ["collaborators", "files", "files-exclude", "files-regex", "files-regex-exclude"]:
792                    if list_field in group_dict:
793                        lst = group_dict[list_field]
794                        if not (isinstance(lst, list) and all(isinstance(elm, str) for elm in lst)):
795                            ferr("malformed '{}' in file group {} in area '{}' -- should be a list of strings"
796                                 .format(list_field, i, area_name))
797
798                # Validate file patterns in file groups
799                for files_key in "files", "files-exclude":
800                    if files_key in group_dict:
801                        for glob_pattern in group_dict[files_key]:
802                            paths = tuple(root.glob(glob_pattern))
803                            if not paths:
804                                ferr("glob pattern '{}' in '{}' in file group {} in area '{}' does not "
805                                     "match any files".format(glob_pattern, files_key, i, area_name))
806
807                # Validate regex patterns in file groups
808                for files_regex_key in "files-regex", "files-regex-exclude":
809                    if files_regex_key in group_dict:
810                        for regex in group_dict[files_regex_key]:
811                            try:
812                                re.compile(regex)
813                            except re.error as e:
814                                ferr("bad regular expression '{}' in '{}' in file group {} in area '{}': {}"
815                                     .format(regex, files_regex_key, i, area_name, e.msg))
816
817        if "description" in area_dict and \
818           not isinstance(area_dict["description"], str):
819            ferr("malformed 'description' value for area '{}' -- should be a "
820                 "string".format(area_name))
821
822
823def _git(*args):
824    # Helper for running a Git command. Returns the rstrip()ed stdout output.
825    # Called like git("diff"). Exits with SystemError (raised by sys.exit()) on
826    # errors.
827
828    git_cmd = ("git",) + args
829    git_cmd_s = " ".join(shlex.quote(word) for word in git_cmd)  # For errors
830
831    try:
832        git_process = subprocess.Popen(
833            git_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
834    except FileNotFoundError:
835        _giterr("git executable not found (when running '{}'). Check that "
836                "it's in listed in the PATH environment variable"
837                .format(git_cmd_s))
838    except OSError as e:
839        _giterr("error running '{}': {}".format(git_cmd_s, e))
840
841    stdout, stderr = git_process.communicate()
842    if git_process.returncode:
843        _giterr("error running '{}'\n\nstdout:\n{}\nstderr:\n{}".format(
844            git_cmd_s, stdout.decode("utf-8"), stderr.decode("utf-8")))
845
846    return stdout.decode("utf-8").rstrip()
847
848
849def _ls_files(path=None):
850    cmd = ["ls-files"]
851    if path is not None:
852        cmd.append(path)
853    return _git(*cmd).splitlines()
854
855
856def _err(msg):
857    raise MaintainersError(msg)
858
859
860def _giterr(msg):
861    raise GitError(msg)
862
863
864def _serr(msg):
865    # For reporting errors when get_maintainer.py is run as a script.
866    # sys.exit() shouldn't be used otherwise.
867    sys.exit("{}: error: {}".format(sys.argv[0], msg))
868
869
870class MaintainersError(Exception):
871    "Exception raised for MAINTAINERS.yml-related errors"
872
873
874class GitError(Exception):
875    "Exception raised for Git-related errors"
876
877
878if __name__ == "__main__":
879    _main()
880