1#!/usr/bin/env python3 2 3# Copyright (c) 2022 Intel Corp. 4# SPDX-License-Identifier: Apache-2.0 5 6import argparse 7import datetime 8import os 9import sys 10import time 11from collections import defaultdict 12from pathlib import Path 13 14import yaml 15from github import Auth, Github, GithubException 16from github.GithubException import UnknownObjectException 17from west.manifest import Manifest, ManifestProject 18 19TOP_DIR = os.path.join(os.path.dirname(__file__)) 20sys.path.insert(0, str(Path(__file__).resolve().parents[1])) 21from get_maintainer import Maintainers # noqa: E402 22 23ZEPHYR_BASE = os.environ.get('ZEPHYR_BASE') 24if ZEPHYR_BASE: 25 ZEPHYR_BASE = Path(ZEPHYR_BASE) 26else: 27 ZEPHYR_BASE = Path(__file__).resolve().parents[2] 28 # Propagate this decision to child processes. 29 os.environ['ZEPHYR_BASE'] = str(ZEPHYR_BASE) 30 31 32def log(s): 33 if args.verbose > 0: 34 print(s, file=sys.stdout) 35 36 37def parse_args(): 38 global args 39 parser = argparse.ArgumentParser( 40 description=__doc__, 41 formatter_class=argparse.RawDescriptionHelpFormatter, 42 allow_abbrev=False, 43 ) 44 45 parser.add_argument( 46 "-M", 47 "--maintainer-file", 48 required=False, 49 default="MAINTAINERS.yml", 50 help="Maintainer file to be used.", 51 ) 52 53 group = parser.add_mutually_exclusive_group() 54 group.add_argument( 55 "-P", 56 "--pull_request", 57 required=False, 58 default=None, 59 type=int, 60 help="Operate on one pull-request only.", 61 ) 62 group.add_argument( 63 "-I", "--issue", required=False, default=None, type=int, help="Operate on one issue only." 64 ) 65 group.add_argument("-s", "--since", required=False, help="Process pull-requests since date.") 66 group.add_argument( 67 "-m", "--modules", action="store_true", help="Process pull-requests from modules." 68 ) 69 70 parser.add_argument("-y", "--dry-run", action="store_true", default=False, help="Dry run only.") 71 72 parser.add_argument("-o", "--org", default="zephyrproject-rtos", help="Github organisation") 73 74 parser.add_argument("-r", "--repo", default="zephyr", help="Github repository") 75 76 parser.add_argument( 77 "--updated-manifest", 78 default=None, 79 help="Updated manifest file to compare against current west.yml", 80 ) 81 82 parser.add_argument( 83 "--updated-maintainer-file", 84 default=None, 85 help="Updated maintainer file to compare against current MAINTAINERS.yml", 86 ) 87 88 parser.add_argument("-v", "--verbose", action="count", default=0, help="Verbose Output") 89 90 args = parser.parse_args() 91 92 93def load_areas(filename: str): 94 with open(filename) as f: 95 doc = yaml.safe_load(f) 96 return { 97 k: v for k, v in doc.items() if isinstance(v, dict) and ("files" in v or "files-regex" in v) 98 } 99 100 101def process_manifest(old_manifest_file): 102 log("Processing manifest changes") 103 if not os.path.isfile("west.yml") or not os.path.isfile(old_manifest_file): 104 log("No west.yml found, skipping...") 105 return [] 106 old_manifest = Manifest.from_file(old_manifest_file) 107 new_manifest = Manifest.from_file("west.yml") 108 old_projs = set((p.name, p.revision) for p in old_manifest.projects) 109 new_projs = set((p.name, p.revision) for p in new_manifest.projects) 110 # Removed projects 111 rprojs = set(filter(lambda p: p[0] not in list(p[0] for p in new_projs), old_projs - new_projs)) 112 # Updated projects 113 uprojs = set(filter(lambda p: p[0] in list(p[0] for p in old_projs), new_projs - old_projs)) 114 # Added projects 115 aprojs = new_projs - old_projs - uprojs 116 117 # All projs 118 projs = rprojs | uprojs | aprojs 119 projs_names = [name for name, rev in projs] 120 121 log(f"found modified projects: {projs_names}") 122 areas = [] 123 for p in projs_names: 124 areas.append(f'West project: {p}') 125 126 log(f'manifest areas: {areas}') 127 return areas 128 129 130def set_or_empty(d, key): 131 return set(d.get(key, []) or []) 132 133 134def compare_areas(old, new, repo_fullname=None, token=None): 135 old_areas = set(old.keys()) 136 new_areas = set(new.keys()) 137 138 changed_areas = set() 139 added_areas = new_areas - old_areas 140 removed_areas = old_areas - new_areas 141 common_areas = old_areas & new_areas 142 143 print("=== Areas Added ===") 144 for area in sorted(added_areas): 145 print(f"+ {area}") 146 147 print("\n=== Areas Removed ===") 148 for area in sorted(removed_areas): 149 print(f"- {area}") 150 151 print("\n=== Area Changes ===") 152 for area in sorted(common_areas): 153 changes = [] 154 old_entry = old[area] 155 new_entry = new[area] 156 157 # Compare maintainers 158 old_maint = set_or_empty(old_entry, "maintainers") 159 new_maint = set_or_empty(new_entry, "maintainers") 160 added_maint = new_maint - old_maint 161 removed_maint = old_maint - new_maint 162 if added_maint: 163 changes.append(f" Maintainers added: {', '.join(sorted(added_maint))}") 164 if removed_maint: 165 changes.append(f" Maintainers removed: {', '.join(sorted(removed_maint))}") 166 167 # Compare collaborators 168 old_collab = set_or_empty(old_entry, "collaborators") 169 new_collab = set_or_empty(new_entry, "collaborators") 170 added_collab = new_collab - old_collab 171 removed_collab = old_collab - new_collab 172 if added_collab: 173 changes.append(f" Collaborators added: {', '.join(sorted(added_collab))}") 174 if removed_collab: 175 changes.append(f" Collaborators removed: {', '.join(sorted(removed_collab))}") 176 177 # Compare status 178 old_status = old_entry.get("status") 179 new_status = new_entry.get("status") 180 if old_status != new_status: 181 changes.append(f" Status changed: {old_status} -> {new_status}") 182 183 # Compare labels 184 old_labels = set_or_empty(old_entry, "labels") 185 new_labels = set_or_empty(new_entry, "labels") 186 added_labels = new_labels - old_labels 187 removed_labels = old_labels - new_labels 188 if added_labels: 189 changes.append(f" Labels added: {', '.join(sorted(added_labels))}") 190 if removed_labels: 191 changes.append(f" Labels removed: {', '.join(sorted(removed_labels))}") 192 193 # Compare files 194 old_files = set_or_empty(old_entry, "files") 195 new_files = set_or_empty(new_entry, "files") 196 added_files = new_files - old_files 197 removed_files = old_files - new_files 198 if added_files: 199 changes.append(f" Files added: {', '.join(sorted(added_files))}") 200 if removed_files: 201 changes.append(f" Files removed: {', '.join(sorted(removed_files))}") 202 203 # Compare files-regex 204 old_regex = set_or_empty(old_entry, "files-regex") 205 new_regex = set_or_empty(new_entry, "files-regex") 206 added_regex = new_regex - old_regex 207 removed_regex = old_regex - new_regex 208 if added_regex: 209 changes.append(f" files-regex added: {', '.join(sorted(added_regex))}") 210 if removed_regex: 211 changes.append(f" files-regex removed: {', '.join(sorted(removed_regex))}") 212 213 if changes: 214 changed_areas.add(area) 215 print(f"area changed: {area}") 216 217 return added_areas | removed_areas | changed_areas 218 219 220def process_pr(gh, maintainer_file, number): 221 gh_repo = gh.get_repo(f"{args.org}/{args.repo}") 222 pr = gh_repo.get_pull(number) 223 224 log(f"working on https://github.com/{args.org}/{args.repo}/pull/{pr.number} : {pr.title}") 225 226 labels = set() 227 area_counter = defaultdict(int) 228 found_maintainers = defaultdict(int) 229 230 num_files = 0 231 fn = list(pr.get_files()) 232 233 # Check if PR currently has 'size: XS' label 234 current_labels = [label.name for label in pr.labels] 235 has_size_xs_label = 'size: XS' in current_labels 236 237 # Determine if PR qualifies for 'size: XS' label 238 qualifies_for_xs = pr.commits == 1 and (pr.additions <= 1 and pr.deletions <= 1) 239 240 if qualifies_for_xs: 241 labels = {'size: XS'} 242 elif has_size_xs_label and not qualifies_for_xs: 243 # Remove 'size: XS' label if PR no longer qualifies 244 log( 245 f"removing 'size: XS' label (commits: {pr.commits}, " 246 f"additions: {pr.additions}, deletions: {pr.deletions})..." 247 ) 248 if not args.dry_run: 249 pr.remove_from_labels('size: XS') 250 251 if len(fn) > 500: 252 log(f"Too many files changed ({len(fn)}), skipping....") 253 return 254 255 # areas where assignment happens if only said areas are affected 256 meta_areas = ['Release Notes', 'Documentation', 'Samples', 'Tests'] 257 258 collab_per_path = set() 259 additional_reviews = set() 260 for changed_file in fn: 261 num_files += 1 262 log(f"file: {changed_file.filename}") 263 264 areas = [] 265 if changed_file.filename in ['west.yml', 'submanifests/optional.yaml']: 266 if not args.updated_manifest: 267 log("No updated manifest, cannot process west.yml changes, skipping...") 268 continue 269 parsed_areas = process_manifest(old_manifest_file=args.updated_manifest) 270 for _area in parsed_areas: 271 area_match = maintainer_file.name2areas(_area) 272 if area_match: 273 _area_obj = area_match[0] 274 collabs_for_area = _area_obj.get_collaborators_for_path(changed_file.filename) 275 collab_per_path.update(collabs_for_area) 276 areas.extend(area_match) 277 elif changed_file.filename in ['MAINTAINERS.yml']: 278 areas = maintainer_file.path2areas(changed_file.filename) 279 if args.updated_maintainer_file: 280 log("cannot process MAINTAINERS.yml changes, skipping...") 281 282 old_areas = load_areas(args.updated_maintainer_file) 283 new_areas = load_areas('MAINTAINERS.yml') 284 changed_areas = compare_areas(old_areas, new_areas) 285 for _area in changed_areas: 286 area_match = maintainer_file.name2areas(_area) 287 if area_match: 288 # get list of maintainers for changed area 289 additional_reviews.update(maintainer_file.areas[_area].maintainers) 290 log(f"MAINTAINERS.yml changed, adding reviewrs: {additional_reviews}") 291 else: 292 areas = maintainer_file.path2areas(changed_file.filename) 293 for _area in areas: 294 collab_per_path.update(_area.get_collaborators_for_path(changed_file.filename)) 295 296 log(f" areas: {areas}") 297 298 if not areas: 299 continue 300 301 # instance of an area, for example a driver or a board, not APIs or subsys code. 302 is_instance = False 303 sorted_areas = sorted(areas, key=lambda x: 'Platform' in x.name, reverse=True) 304 for area in sorted_areas: 305 # do not count cmake file changes, i.e. when there are changes to 306 # instances of an area listed in both the subsystem and the 307 # platform implementing it 308 if 'CMakeLists.txt' in changed_file.filename or area.name in meta_areas: 309 c = 0 310 else: 311 c = 1 if not is_instance else 0 312 313 area_counter[area] += c 314 log(f"area counter: {area_counter}") 315 labels.update(area.labels) 316 # FIXME: Here we count the same file multiple times if it exists in 317 # multiple areas with same maintainer 318 for area_maintainer in area.maintainers: 319 found_maintainers[area_maintainer] += c 320 321 if 'Platform' in area.name: 322 is_instance = True 323 324 for _area in sorted_areas: 325 collab_per_path.update(_area.get_collaborators_for_path(changed_file.filename)) 326 327 area_counter = dict(sorted(area_counter.items(), key=lambda item: item[1], reverse=True)) 328 log(f"Area matches: {area_counter}") 329 log(f"labels: {labels}") 330 331 # Create a list of collaborators ordered by the area match 332 collab = list() 333 for area in area_counter: 334 collab += maintainer_file.areas[area.name].maintainers 335 collab += maintainer_file.areas[area.name].collaborators 336 collab += collab_per_path 337 338 collab = list(dict.fromkeys(collab)) 339 # add more reviewers based on maintainer file changes. 340 collab += list(additional_reviews) 341 log(f"collab: {collab}") 342 343 _all_maintainers = dict( 344 sorted(found_maintainers.items(), key=lambda item: item[1], reverse=True) 345 ) 346 347 log(f"Submitted by: {pr.user.login}") 348 log(f"candidate maintainers: {_all_maintainers}") 349 350 ranked_assignees = [] 351 assignees = None 352 353 # we start with areas with most files changed and pick the maintainer from the first one. 354 # if the first area is an implementation, i.e. driver or platform, we 355 # continue searching for any other areas involved 356 for area, count in area_counter.items(): 357 # if only meta area is affected, assign one of the maintainers of that area 358 if area.name in meta_areas and len(area_counter) == 1: 359 assignees = area.maintainers 360 break 361 # if no maintainers, skip 362 if count == 0 or len(area.maintainers) == 0: 363 continue 364 # if there are maintainers, but no assignees yet, set them 365 if len(area.maintainers) > 0: 366 if pr.user.login in area.maintainers: 367 # If submitter = assignee, try to pick next area and assign 368 # someone else other than the submitter, otherwise when there 369 # are other maintainers for the area, assign them. 370 if len(area.maintainers) > 1: 371 assignees = area.maintainers.copy() 372 assignees.remove(pr.user.login) 373 else: 374 continue 375 else: 376 assignees = area.maintainers 377 378 # found a non-platform area that was changed, pick assignee from this 379 # area and put them on top of the list, otherwise just append. 380 if 'Platform' not in area.name: 381 ranked_assignees.insert(0, area.maintainers) 382 break 383 else: 384 ranked_assignees.append(area.maintainers) 385 386 if ranked_assignees: 387 assignees = ranked_assignees[0] 388 389 if assignees: 390 prop = (found_maintainers[assignees[0]] / num_files) * 100 391 log(f"Picked assignees: {assignees} ({prop:.2f}% ownership)") 392 log("+++++++++++++++++++++++++") 393 elif len(_all_maintainers) > 0: 394 # if we have maintainers found, but could not pick one based on area, 395 # then pick the one with most changes 396 assignees = [next(iter(_all_maintainers))] 397 398 # Set labels 399 if labels: 400 if len(labels) < 10: 401 for label in labels: 402 log(f"adding label {label}...") 403 if not args.dry_run: 404 pr.add_to_labels(label) 405 else: 406 log("Too many labels to be applied") 407 408 if collab: 409 reviewers = [] 410 existing_reviewers = set() 411 412 revs = pr.get_reviews() 413 for review in revs: 414 existing_reviewers.add(review.user) 415 416 rl = pr.get_review_requests() 417 for page, r in enumerate(rl): 418 existing_reviewers |= set(r.get_page(page)) 419 420 # check for reviewers that remove themselves from list of reviewer and 421 # do not attempt to add them again based on MAINTAINERS file. 422 self_removal = [] 423 for event in pr.get_issue_events(): 424 if event.event == 'review_request_removed' and event.actor == event.requested_reviewer: 425 self_removal.append(event.actor) 426 427 for collaborator in collab: 428 try: 429 gh_user = gh.get_user(collaborator) 430 if pr.user == gh_user or gh_user in existing_reviewers: 431 continue 432 if not gh_repo.has_in_collaborators(gh_user): 433 log(f"Skip '{collaborator}': not in collaborators") 434 continue 435 if gh_user in self_removal: 436 log(f"Skip '{collaborator}': self removed") 437 continue 438 reviewers.append(collaborator) 439 except UnknownObjectException as e: 440 log(f"Can't get user '{collaborator}', account does not exist anymore? ({e})") 441 442 if len(existing_reviewers) < 15: 443 reviewer_vacancy = 15 - len(existing_reviewers) 444 reviewers = reviewers[:reviewer_vacancy] 445 else: 446 log( 447 "not adding reviewers because the existing reviewer count is greater than or " 448 "equal to 15. Adding maintainers of all areas as reviewers instead." 449 ) 450 # FIXME: Here we could also add collaborators of the areas most 451 # affected, i.e. the one with the final assigne. 452 reviewers = list(_all_maintainers.keys()) 453 454 if reviewers: 455 try: 456 log(f"adding reviewers {reviewers}...") 457 if not args.dry_run: 458 pr.create_review_request(reviewers=reviewers) 459 except GithubException: 460 log("can't add reviewer") 461 462 ms = [] 463 # assignees 464 if assignees and (not pr.assignee or args.dry_run): 465 try: 466 for assignee in assignees: 467 u = gh.get_user(assignee) 468 ms.append(u) 469 except GithubException: 470 log("Error: Unknown user") 471 472 for mm in ms: 473 log(f"Adding assignee {mm}...") 474 if not args.dry_run: 475 pr.add_to_assignees(mm) 476 else: 477 log("not setting assignee") 478 479 time.sleep(1) 480 481 482def process_issue(gh, maintainer_file, number): 483 gh_repo = gh.get_repo(f"{args.org}/{args.repo}") 484 issue = gh_repo.get_issue(number) 485 486 log(f"Working on {issue.url}: {issue.title}") 487 488 if issue.assignees: 489 print(f"Already assigned {issue.assignees}, bailing out") 490 return 491 492 label_to_maintainer = defaultdict(set) 493 for _, area in maintainer_file.areas.items(): 494 if not area.labels: 495 continue 496 497 labels = set() 498 for label in area.labels: 499 labels.add(label.lower()) 500 labels = tuple(sorted(labels)) 501 502 for maintainer in area.maintainers: 503 label_to_maintainer[labels].add(maintainer) 504 505 # Add extra entries for areas with multiple labels so they match with just 506 # one label if it's specific enough. 507 for areas, maintainers in dict(label_to_maintainer).items(): 508 for area in areas: 509 if tuple([area]) not in label_to_maintainer: 510 label_to_maintainer[tuple([area])] = maintainers 511 512 issue_labels = set() 513 for label in issue.labels: 514 label_name = label.name.lower() 515 if tuple([label_name]) not in label_to_maintainer: 516 print(f"Ignoring label: {label}") 517 continue 518 issue_labels.add(label_name) 519 issue_labels = tuple(sorted(issue_labels)) 520 521 print(f"Using labels: {issue_labels}") 522 523 if issue_labels not in label_to_maintainer: 524 print("no match for the label set, not assigning") 525 return 526 527 for maintainer in label_to_maintainer[issue_labels]: 528 log(f"Adding {maintainer} to {issue.html_url}") 529 if not args.dry_run: 530 issue.add_to_assignees(maintainer) 531 532 533def process_modules(gh, maintainers_file): 534 manifest = Manifest.from_file() 535 536 repos = {} 537 for project in manifest.get_projects([]): 538 if not manifest.is_active(project): 539 continue 540 541 if isinstance(project, ManifestProject): 542 continue 543 544 area = f"West project: {project.name}" 545 if area not in maintainers_file.areas: 546 log(f"No area for: {area}") 547 continue 548 549 maintainers = maintainers_file.areas[area].maintainers 550 if not maintainers: 551 log(f"No maintainers for: {area}") 552 continue 553 554 collaborators = maintainers_file.areas[area].collaborators 555 556 log(f"Found {area}, maintainers={maintainers}, collaborators={collaborators}") 557 558 repo_name = f"{args.org}/{project.name}" 559 repos[repo_name] = maintainers_file.areas[area] 560 561 query = "is:open is:pr no:assignee" 562 if repos: 563 query += ' ' + ' '.join(f"repo:{repo}" for repo in repos) 564 565 issues = gh.search_issues(query=query) 566 for issue in issues: 567 pull = issue.as_pull_request() 568 569 if pull.draft: 570 continue 571 572 if pull.assignees: 573 log(f"ERROR: {pull.html_url} should have no assignees, found {pull.assignees}") 574 continue 575 576 repo_name = f"{args.org}/{issue.repository.name}" 577 area = repos[repo_name] 578 579 for maintainer in area.maintainers: 580 log(f"Assigning {maintainer} to {pull.html_url}") 581 if not args.dry_run: 582 pull.add_to_assignees(maintainer) 583 pull.create_review_request(maintainer) 584 585 for collaborator in area.collaborators: 586 log(f"Adding {collaborator} to {pull.html_url}") 587 if not args.dry_run: 588 pull.create_review_request(collaborator) 589 590 591def main(): 592 parse_args() 593 594 token = os.environ.get('GITHUB_TOKEN', None) 595 if not token: 596 sys.exit( 597 'Github token not set in environment, please set the ' 598 'GITHUB_TOKEN environment variable and retry.' 599 ) 600 601 gh = Github(auth=Auth.Token(token)) 602 maintainer_file = Maintainers(args.maintainer_file) 603 604 if args.pull_request: 605 process_pr(gh, maintainer_file, args.pull_request) 606 elif args.issue: 607 process_issue(gh, maintainer_file, args.issue) 608 elif args.modules: 609 process_modules(gh, maintainer_file) 610 else: 611 if args.since: 612 since = args.since 613 else: 614 today = datetime.date.today() 615 since = today - datetime.timedelta(days=1) 616 617 common_prs = ( 618 f'repo:{args.org}/{args.repo} is:open is:pr base:main ' 619 f'-is:draft no:assignee created:>{since}' 620 ) 621 pulls = gh.search_issues(query=f'{common_prs}') 622 623 for issue in pulls: 624 process_pr(gh, maintainer_file, issue.number) 625 626 627if __name__ == "__main__": 628 main() 629