1#!/usr/bin/env python3
2
3# Copyright (c) 2022 Intel Corp.
4# SPDX-License-Identifier: Apache-2.0
5
6import argparse
7import datetime
8import os
9import sys
10import time
11from collections import defaultdict
12from pathlib import Path
13
14import yaml
15from github import Auth, Github, GithubException
16from github.GithubException import UnknownObjectException
17from west.manifest import Manifest, ManifestProject
18
19TOP_DIR = os.path.join(os.path.dirname(__file__))
20sys.path.insert(0, str(Path(__file__).resolve().parents[1]))
21from get_maintainer import Maintainers  # noqa: E402
22
23ZEPHYR_BASE = os.environ.get('ZEPHYR_BASE')
24if ZEPHYR_BASE:
25    ZEPHYR_BASE = Path(ZEPHYR_BASE)
26else:
27    ZEPHYR_BASE = Path(__file__).resolve().parents[2]
28    # Propagate this decision to child processes.
29    os.environ['ZEPHYR_BASE'] = str(ZEPHYR_BASE)
30
31
32def log(s):
33    if args.verbose > 0:
34        print(s, file=sys.stdout)
35
36
37def parse_args():
38    global args
39    parser = argparse.ArgumentParser(
40        description=__doc__,
41        formatter_class=argparse.RawDescriptionHelpFormatter,
42        allow_abbrev=False,
43    )
44
45    parser.add_argument(
46        "-M",
47        "--maintainer-file",
48        required=False,
49        default="MAINTAINERS.yml",
50        help="Maintainer file to be used.",
51    )
52
53    group = parser.add_mutually_exclusive_group()
54    group.add_argument(
55        "-P",
56        "--pull_request",
57        required=False,
58        default=None,
59        type=int,
60        help="Operate on one pull-request only.",
61    )
62    group.add_argument(
63        "-I", "--issue", required=False, default=None, type=int, help="Operate on one issue only."
64    )
65    group.add_argument("-s", "--since", required=False, help="Process pull-requests since date.")
66    group.add_argument(
67        "-m", "--modules", action="store_true", help="Process pull-requests from modules."
68    )
69
70    parser.add_argument("-y", "--dry-run", action="store_true", default=False, help="Dry run only.")
71
72    parser.add_argument("-o", "--org", default="zephyrproject-rtos", help="Github organisation")
73
74    parser.add_argument("-r", "--repo", default="zephyr", help="Github repository")
75
76    parser.add_argument(
77        "--updated-manifest",
78        default=None,
79        help="Updated manifest file to compare against current west.yml",
80    )
81
82    parser.add_argument(
83        "--updated-maintainer-file",
84        default=None,
85        help="Updated maintainer file to compare against current MAINTAINERS.yml",
86    )
87
88    parser.add_argument("-v", "--verbose", action="count", default=0, help="Verbose Output")
89
90    args = parser.parse_args()
91
92
93def load_areas(filename: str):
94    with open(filename) as f:
95        doc = yaml.safe_load(f)
96    return {
97        k: v for k, v in doc.items() if isinstance(v, dict) and ("files" in v or "files-regex" in v)
98    }
99
100
101def process_manifest(old_manifest_file):
102    log("Processing manifest changes")
103    if not os.path.isfile("west.yml") or not os.path.isfile(old_manifest_file):
104        log("No west.yml found, skipping...")
105        return []
106    old_manifest = Manifest.from_file(old_manifest_file)
107    new_manifest = Manifest.from_file("west.yml")
108    old_projs = set((p.name, p.revision) for p in old_manifest.projects)
109    new_projs = set((p.name, p.revision) for p in new_manifest.projects)
110    # Removed projects
111    rprojs = set(filter(lambda p: p[0] not in list(p[0] for p in new_projs), old_projs - new_projs))
112    # Updated projects
113    uprojs = set(filter(lambda p: p[0] in list(p[0] for p in old_projs), new_projs - old_projs))
114    # Added projects
115    aprojs = new_projs - old_projs - uprojs
116
117    # All projs
118    projs = rprojs | uprojs | aprojs
119    projs_names = [name for name, rev in projs]
120
121    log(f"found modified projects: {projs_names}")
122    areas = []
123    for p in projs_names:
124        areas.append(f'West project: {p}')
125
126    log(f'manifest areas: {areas}')
127    return areas
128
129
130def set_or_empty(d, key):
131    return set(d.get(key, []) or [])
132
133
134def compare_areas(old, new, repo_fullname=None, token=None):
135    old_areas = set(old.keys())
136    new_areas = set(new.keys())
137
138    changed_areas = set()
139    added_areas = new_areas - old_areas
140    removed_areas = old_areas - new_areas
141    common_areas = old_areas & new_areas
142
143    print("=== Areas Added ===")
144    for area in sorted(added_areas):
145        print(f"+ {area}")
146
147    print("\n=== Areas Removed ===")
148    for area in sorted(removed_areas):
149        print(f"- {area}")
150
151    print("\n=== Area Changes ===")
152    for area in sorted(common_areas):
153        changes = []
154        old_entry = old[area]
155        new_entry = new[area]
156
157        # Compare maintainers
158        old_maint = set_or_empty(old_entry, "maintainers")
159        new_maint = set_or_empty(new_entry, "maintainers")
160        added_maint = new_maint - old_maint
161        removed_maint = old_maint - new_maint
162        if added_maint:
163            changes.append(f"  Maintainers added: {', '.join(sorted(added_maint))}")
164        if removed_maint:
165            changes.append(f"  Maintainers removed: {', '.join(sorted(removed_maint))}")
166
167        # Compare collaborators
168        old_collab = set_or_empty(old_entry, "collaborators")
169        new_collab = set_or_empty(new_entry, "collaborators")
170        added_collab = new_collab - old_collab
171        removed_collab = old_collab - new_collab
172        if added_collab:
173            changes.append(f"  Collaborators added: {', '.join(sorted(added_collab))}")
174        if removed_collab:
175            changes.append(f"  Collaborators removed: {', '.join(sorted(removed_collab))}")
176
177        # Compare status
178        old_status = old_entry.get("status")
179        new_status = new_entry.get("status")
180        if old_status != new_status:
181            changes.append(f"  Status changed: {old_status} -> {new_status}")
182
183        # Compare labels
184        old_labels = set_or_empty(old_entry, "labels")
185        new_labels = set_or_empty(new_entry, "labels")
186        added_labels = new_labels - old_labels
187        removed_labels = old_labels - new_labels
188        if added_labels:
189            changes.append(f"  Labels added: {', '.join(sorted(added_labels))}")
190        if removed_labels:
191            changes.append(f"  Labels removed: {', '.join(sorted(removed_labels))}")
192
193        # Compare files
194        old_files = set_or_empty(old_entry, "files")
195        new_files = set_or_empty(new_entry, "files")
196        added_files = new_files - old_files
197        removed_files = old_files - new_files
198        if added_files:
199            changes.append(f"  Files added: {', '.join(sorted(added_files))}")
200        if removed_files:
201            changes.append(f"  Files removed: {', '.join(sorted(removed_files))}")
202
203        # Compare files-regex
204        old_regex = set_or_empty(old_entry, "files-regex")
205        new_regex = set_or_empty(new_entry, "files-regex")
206        added_regex = new_regex - old_regex
207        removed_regex = old_regex - new_regex
208        if added_regex:
209            changes.append(f"  files-regex added: {', '.join(sorted(added_regex))}")
210        if removed_regex:
211            changes.append(f"  files-regex removed: {', '.join(sorted(removed_regex))}")
212
213        if changes:
214            changed_areas.add(area)
215            print(f"area changed: {area}")
216
217    return added_areas | removed_areas | changed_areas
218
219
220def process_pr(gh, maintainer_file, number):
221    gh_repo = gh.get_repo(f"{args.org}/{args.repo}")
222    pr = gh_repo.get_pull(number)
223
224    log(f"working on https://github.com/{args.org}/{args.repo}/pull/{pr.number} : {pr.title}")
225
226    labels = set()
227    area_counter = defaultdict(int)
228    found_maintainers = defaultdict(int)
229
230    num_files = 0
231    fn = list(pr.get_files())
232
233    # Check if PR currently has 'size: XS' label
234    current_labels = [label.name for label in pr.labels]
235    has_size_xs_label = 'size: XS' in current_labels
236
237    # Determine if PR qualifies for 'size: XS' label
238    qualifies_for_xs = pr.commits == 1 and (pr.additions <= 1 and pr.deletions <= 1)
239
240    if qualifies_for_xs:
241        labels = {'size: XS'}
242    elif has_size_xs_label and not qualifies_for_xs:
243        # Remove 'size: XS' label if PR no longer qualifies
244        log(
245            f"removing 'size: XS' label (commits: {pr.commits}, "
246            f"additions: {pr.additions}, deletions: {pr.deletions})..."
247        )
248        if not args.dry_run:
249            pr.remove_from_labels('size: XS')
250
251    if len(fn) > 500:
252        log(f"Too many files changed ({len(fn)}), skipping....")
253        return
254
255    # areas where assignment happens if only said areas are affected
256    meta_areas = ['Release Notes', 'Documentation', 'Samples', 'Tests']
257
258    collab_per_path = set()
259    additional_reviews = set()
260    for changed_file in fn:
261        num_files += 1
262        log(f"file: {changed_file.filename}")
263
264        areas = []
265        if changed_file.filename in ['west.yml', 'submanifests/optional.yaml']:
266            if not args.updated_manifest:
267                log("No updated manifest, cannot process west.yml changes, skipping...")
268                continue
269            parsed_areas = process_manifest(old_manifest_file=args.updated_manifest)
270            for _area in parsed_areas:
271                area_match = maintainer_file.name2areas(_area)
272                if area_match:
273                    _area_obj = area_match[0]
274                    collabs_for_area = _area_obj.get_collaborators_for_path(changed_file.filename)
275                    collab_per_path.update(collabs_for_area)
276                    areas.extend(area_match)
277        elif changed_file.filename in ['MAINTAINERS.yml']:
278            areas = maintainer_file.path2areas(changed_file.filename)
279            if args.updated_maintainer_file:
280                log("cannot process MAINTAINERS.yml changes, skipping...")
281
282                old_areas = load_areas(args.updated_maintainer_file)
283                new_areas = load_areas('MAINTAINERS.yml')
284                changed_areas = compare_areas(old_areas, new_areas)
285                for _area in changed_areas:
286                    area_match = maintainer_file.name2areas(_area)
287                    if area_match:
288                        # get list of maintainers for changed area
289                        additional_reviews.update(maintainer_file.areas[_area].maintainers)
290                log(f"MAINTAINERS.yml changed, adding reviewrs: {additional_reviews}")
291        else:
292            areas = maintainer_file.path2areas(changed_file.filename)
293            for _area in areas:
294                collab_per_path.update(_area.get_collaborators_for_path(changed_file.filename))
295
296        log(f"  areas: {areas}")
297
298        if not areas:
299            continue
300
301        # instance of an area, for example a driver or a board, not APIs or subsys code.
302        is_instance = False
303        sorted_areas = sorted(areas, key=lambda x: 'Platform' in x.name, reverse=True)
304        for area in sorted_areas:
305            # do not count cmake file changes, i.e. when there are changes to
306            # instances of an area listed in both the subsystem and the
307            # platform implementing it
308            if 'CMakeLists.txt' in changed_file.filename or area.name in meta_areas:
309                c = 0
310            else:
311                c = 1 if not is_instance else 0
312
313            area_counter[area] += c
314            log(f"area counter: {area_counter}")
315            labels.update(area.labels)
316            # FIXME: Here we count the same file multiple times if it exists in
317            # multiple areas with same maintainer
318            for area_maintainer in area.maintainers:
319                found_maintainers[area_maintainer] += c
320
321            if 'Platform' in area.name:
322                is_instance = True
323
324            for _area in sorted_areas:
325                collab_per_path.update(_area.get_collaborators_for_path(changed_file.filename))
326
327    area_counter = dict(sorted(area_counter.items(), key=lambda item: item[1], reverse=True))
328    log(f"Area matches: {area_counter}")
329    log(f"labels: {labels}")
330
331    # Create a list of collaborators ordered by the area match
332    collab = list()
333    for area in area_counter:
334        collab += maintainer_file.areas[area.name].maintainers
335        collab += maintainer_file.areas[area.name].collaborators
336        collab += collab_per_path
337
338    collab = list(dict.fromkeys(collab))
339    # add more reviewers based on maintainer file changes.
340    collab += list(additional_reviews)
341    log(f"collab: {collab}")
342
343    _all_maintainers = dict(
344        sorted(found_maintainers.items(), key=lambda item: item[1], reverse=True)
345    )
346
347    log(f"Submitted by: {pr.user.login}")
348    log(f"candidate maintainers: {_all_maintainers}")
349
350    ranked_assignees = []
351    assignees = None
352
353    # we start with areas with most files changed and pick the maintainer from the first one.
354    # if the first area is an implementation, i.e. driver or platform, we
355    # continue searching for any other areas involved
356    for area, count in area_counter.items():
357        # if only meta area is affected, assign one of the maintainers of that area
358        if area.name in meta_areas and len(area_counter) == 1:
359            assignees = area.maintainers
360            break
361        # if no maintainers, skip
362        if count == 0 or len(area.maintainers) == 0:
363            continue
364        # if there are maintainers, but no assignees yet, set them
365        if len(area.maintainers) > 0:
366            if pr.user.login in area.maintainers:
367                # If submitter = assignee, try to pick next area and assign
368                # someone else other than the submitter, otherwise when there
369                # are other maintainers for the area, assign them.
370                if len(area.maintainers) > 1:
371                    assignees = area.maintainers.copy()
372                    assignees.remove(pr.user.login)
373                else:
374                    continue
375            else:
376                assignees = area.maintainers
377
378        # found a non-platform area that was changed, pick assignee from this
379        # area and put them on top of the list, otherwise just append.
380        if 'Platform' not in area.name:
381            ranked_assignees.insert(0, area.maintainers)
382            break
383        else:
384            ranked_assignees.append(area.maintainers)
385
386    if ranked_assignees:
387        assignees = ranked_assignees[0]
388
389    if assignees:
390        prop = (found_maintainers[assignees[0]] / num_files) * 100
391        log(f"Picked assignees: {assignees} ({prop:.2f}% ownership)")
392        log("+++++++++++++++++++++++++")
393    elif len(_all_maintainers) > 0:
394        # if we have maintainers found, but could not pick one based on area,
395        # then pick the one with most changes
396        assignees = [next(iter(_all_maintainers))]
397
398    # Set labels
399    if labels:
400        if len(labels) < 10:
401            for label in labels:
402                log(f"adding label {label}...")
403                if not args.dry_run:
404                    pr.add_to_labels(label)
405        else:
406            log("Too many labels to be applied")
407
408    if collab:
409        reviewers = []
410        existing_reviewers = set()
411
412        revs = pr.get_reviews()
413        for review in revs:
414            existing_reviewers.add(review.user)
415
416        rl = pr.get_review_requests()
417        for page, r in enumerate(rl):
418            existing_reviewers |= set(r.get_page(page))
419
420        # check for reviewers that remove themselves from list of reviewer and
421        # do not attempt to add them again based on MAINTAINERS file.
422        self_removal = []
423        for event in pr.get_issue_events():
424            if event.event == 'review_request_removed' and event.actor == event.requested_reviewer:
425                self_removal.append(event.actor)
426
427        for collaborator in collab:
428            try:
429                gh_user = gh.get_user(collaborator)
430                if pr.user == gh_user or gh_user in existing_reviewers:
431                    continue
432                if not gh_repo.has_in_collaborators(gh_user):
433                    log(f"Skip '{collaborator}': not in collaborators")
434                    continue
435                if gh_user in self_removal:
436                    log(f"Skip '{collaborator}': self removed")
437                    continue
438                reviewers.append(collaborator)
439            except UnknownObjectException as e:
440                log(f"Can't get user '{collaborator}', account does not exist anymore? ({e})")
441
442        if len(existing_reviewers) < 15:
443            reviewer_vacancy = 15 - len(existing_reviewers)
444            reviewers = reviewers[:reviewer_vacancy]
445        else:
446            log(
447                "not adding reviewers because the existing reviewer count is greater than or "
448                "equal to 15. Adding maintainers of all areas as reviewers instead."
449            )
450            # FIXME: Here we could also add collaborators of the areas most
451            # affected, i.e. the one with the final assigne.
452            reviewers = list(_all_maintainers.keys())
453
454        if reviewers:
455            try:
456                log(f"adding reviewers {reviewers}...")
457                if not args.dry_run:
458                    pr.create_review_request(reviewers=reviewers)
459            except GithubException:
460                log("can't add reviewer")
461
462    ms = []
463    # assignees
464    if assignees and (not pr.assignee or args.dry_run):
465        try:
466            for assignee in assignees:
467                u = gh.get_user(assignee)
468                ms.append(u)
469        except GithubException:
470            log("Error: Unknown user")
471
472        for mm in ms:
473            log(f"Adding assignee {mm}...")
474            if not args.dry_run:
475                pr.add_to_assignees(mm)
476    else:
477        log("not setting assignee")
478
479    time.sleep(1)
480
481
482def process_issue(gh, maintainer_file, number):
483    gh_repo = gh.get_repo(f"{args.org}/{args.repo}")
484    issue = gh_repo.get_issue(number)
485
486    log(f"Working on {issue.url}: {issue.title}")
487
488    if issue.assignees:
489        print(f"Already assigned {issue.assignees}, bailing out")
490        return
491
492    label_to_maintainer = defaultdict(set)
493    for _, area in maintainer_file.areas.items():
494        if not area.labels:
495            continue
496
497        labels = set()
498        for label in area.labels:
499            labels.add(label.lower())
500        labels = tuple(sorted(labels))
501
502        for maintainer in area.maintainers:
503            label_to_maintainer[labels].add(maintainer)
504
505    # Add extra entries for areas with multiple labels so they match with just
506    # one label if it's specific enough.
507    for areas, maintainers in dict(label_to_maintainer).items():
508        for area in areas:
509            if tuple([area]) not in label_to_maintainer:
510                label_to_maintainer[tuple([area])] = maintainers
511
512    issue_labels = set()
513    for label in issue.labels:
514        label_name = label.name.lower()
515        if tuple([label_name]) not in label_to_maintainer:
516            print(f"Ignoring label: {label}")
517            continue
518        issue_labels.add(label_name)
519    issue_labels = tuple(sorted(issue_labels))
520
521    print(f"Using labels: {issue_labels}")
522
523    if issue_labels not in label_to_maintainer:
524        print("no match for the label set, not assigning")
525        return
526
527    for maintainer in label_to_maintainer[issue_labels]:
528        log(f"Adding {maintainer} to {issue.html_url}")
529        if not args.dry_run:
530            issue.add_to_assignees(maintainer)
531
532
533def process_modules(gh, maintainers_file):
534    manifest = Manifest.from_file()
535
536    repos = {}
537    for project in manifest.get_projects([]):
538        if not manifest.is_active(project):
539            continue
540
541        if isinstance(project, ManifestProject):
542            continue
543
544        area = f"West project: {project.name}"
545        if area not in maintainers_file.areas:
546            log(f"No area for: {area}")
547            continue
548
549        maintainers = maintainers_file.areas[area].maintainers
550        if not maintainers:
551            log(f"No maintainers for: {area}")
552            continue
553
554        collaborators = maintainers_file.areas[area].collaborators
555
556        log(f"Found {area}, maintainers={maintainers}, collaborators={collaborators}")
557
558        repo_name = f"{args.org}/{project.name}"
559        repos[repo_name] = maintainers_file.areas[area]
560
561    query = "is:open is:pr no:assignee"
562    if repos:
563        query += ' ' + ' '.join(f"repo:{repo}" for repo in repos)
564
565    issues = gh.search_issues(query=query)
566    for issue in issues:
567        pull = issue.as_pull_request()
568
569        if pull.draft:
570            continue
571
572        if pull.assignees:
573            log(f"ERROR: {pull.html_url} should have no assignees, found {pull.assignees}")
574            continue
575
576        repo_name = f"{args.org}/{issue.repository.name}"
577        area = repos[repo_name]
578
579        for maintainer in area.maintainers:
580            log(f"Assigning {maintainer} to {pull.html_url}")
581            if not args.dry_run:
582                pull.add_to_assignees(maintainer)
583                pull.create_review_request(maintainer)
584
585        for collaborator in area.collaborators:
586            log(f"Adding {collaborator} to {pull.html_url}")
587            if not args.dry_run:
588                pull.create_review_request(collaborator)
589
590
591def main():
592    parse_args()
593
594    token = os.environ.get('GITHUB_TOKEN', None)
595    if not token:
596        sys.exit(
597            'Github token not set in environment, please set the '
598            'GITHUB_TOKEN environment variable and retry.'
599        )
600
601    gh = Github(auth=Auth.Token(token))
602    maintainer_file = Maintainers(args.maintainer_file)
603
604    if args.pull_request:
605        process_pr(gh, maintainer_file, args.pull_request)
606    elif args.issue:
607        process_issue(gh, maintainer_file, args.issue)
608    elif args.modules:
609        process_modules(gh, maintainer_file)
610    else:
611        if args.since:
612            since = args.since
613        else:
614            today = datetime.date.today()
615            since = today - datetime.timedelta(days=1)
616
617        common_prs = (
618            f'repo:{args.org}/{args.repo} is:open is:pr base:main '
619            f'-is:draft no:assignee created:>{since}'
620        )
621        pulls = gh.search_issues(query=f'{common_prs}')
622
623        for issue in pulls:
624            process_pr(gh, maintainer_file, issue.number)
625
626
627if __name__ == "__main__":
628    main()
629