1#!/usr/bin/env python3 2# Copyright (c) 2024 Intel Corp. 3# SPDX-License-Identifier: Apache-2.0 4 5# Script that operates on a merged PR and sends data to elasticsearch for 6# further insepctions using the PR dashboard at 7# https://kibana.zephyrproject.io/ 8 9import sys 10import os 11from github import Github 12import argparse 13from elasticsearch import Elasticsearch 14from elasticsearch.helpers import bulk 15from datetime import timedelta 16import pprint 17 18 19date_format = '%Y-%m-%d %H:%M:%S' 20 21def parse_args() -> argparse.Namespace: 22 parser = argparse.ArgumentParser( 23 formatter_class=argparse.RawDescriptionHelpFormatter, allow_abbrev=False) 24 25 parser.add_argument('--pull-request', help='pull request number', type=int) 26 parser.add_argument('--range', help='execute based on a date range, for example 2023-01-01..2023-01-05') 27 parser.add_argument('--repo', help='github repo', default='zephyrproject-rtos/zephyr') 28 parser.add_argument('--es-index', help='Elasticsearch index') 29 parser.add_argument('-y','--dry-run', action="store_true", help='dry run, do not upload data') 30 31 return parser.parse_args() 32 33def gendata(data, index): 34 for t in data: 35 yield { 36 "_index": index, 37 "_source": t 38 } 39 40def process_pr(pr): 41 reviews = pr.get_reviews() 42 print(f'#{pr.number}: {pr.title} - {pr.comments} Comments, reviews: {reviews.totalCount}, {len(pr.assignees)} Assignees (Updated {pr.updated_at})') 43 assignee_reviews = 0 44 prj = {} 45 46 assignees = [] 47 labels = [] 48 for label in pr.labels: 49 labels.append(label.name) 50 51 reviewers = set() 52 for review in reviews: 53 # get list of all approved reviews 54 if review.user and review.state == 'APPROVED': 55 reviewers.add(review.user.login) 56 57 for assignee in pr.assignees: 58 # list assignees for later checks 59 assignees.append(assignee.login) 60 if assignee.login in reviewers: 61 assignee_reviews += 1 62 63 if assignee_reviews > 0 or pr.merged_by.login in assignees: 64 # in case of assignee reviews or if PR was merged by an assignee 65 prj['review_rule'] = "yes" 66 elif not pr.assignees or \ 67 (pr.user.login in assignees and len(assignees) == 1) or \ 68 ('Trivial' in labels or 'Hotfix' in labels): 69 # in case where no assignees set or if submitter is the only assignee 70 # or in case of trivial or hotfixes 71 prj['review_rule'] = "na" 72 else: 73 # everything else 74 prj['review_rule'] = "no" 75 76 77 created = pr.created_at 78 # if a PR was made ready for review from draft, calculate based on when it 79 # was moved out of draft. 80 for event in pr.get_issue_events(): 81 if event.event == 'ready_for_review': 82 created = event.created_at 83 84 # calculate time the PR was in review, hours and business days. 85 delta = pr.closed_at - created 86 deltah = delta.total_seconds() / 3600 87 prj['hours_open'] = deltah 88 89 dates = (created + timedelta(idx + 1) for idx in range((pr.closed_at - created).days)) 90 91 # Get number of business days per the guidelines, we need at least 2. 92 business_days = sum(1 for day in dates if day.weekday() < 5) 93 prj['business_days_open'] = business_days 94 95 trivial = 'Trivial' in labels 96 hotfix = 'Hotfix' in labels 97 min_review_time_rule = "no" 98 99 if hotfix or (trivial and deltah >= 4) or business_days >= 2: 100 min_review_time_rule = "yes" 101 102 prj['time_rule'] = min_review_time_rule 103 104 # This is all data we get easily though the Github API and serves as the basis 105 # for displaying some trends and metrics. 106 # Data can be extended in the future if we find more information that 107 # is useful through the API 108 109 prj['nr'] = pr.number 110 prj['url'] = pr.url 111 prj['title'] = pr.title 112 prj['comments'] = pr.comments 113 prj['reviews'] = reviews.totalCount 114 prj['assignees'] = assignees 115 prj['updated'] = pr.updated_at.strftime("%Y-%m-%d %H:%M:%S") 116 prj['created'] = pr.created_at.strftime("%Y-%m-%d %H:%M:%S") 117 prj['closed'] = pr.closed_at.strftime("%Y-%m-%d %H:%M:%S") 118 prj['merged_by'] = pr.merged_by.login 119 prj['submitted_by'] = pr.user.login 120 prj['changed_files'] = pr.changed_files 121 prj['additions'] = pr.additions 122 prj['deletions'] = pr.deletions 123 prj['commits'] = pr.commits 124 # The branch we are targeting. main vs release branches. 125 prj['base'] = pr.base.ref 126 127 # list all reviewers 128 prj['reviewers'] = list(reviewers) 129 prj['labels'] = labels 130 131 return prj 132 133def main(): 134 args = parse_args() 135 token = os.environ.get('GITHUB_TOKEN') 136 if not token: 137 sys.exit('Github token not set in environment, please set the ' 138 'GITHUB_TOKEN environment variable and retry.') 139 140 gh = Github(token) 141 json_list = [] 142 gh_repo = gh.get_repo(args.repo) 143 144 if args.pull_request: 145 pr = gh_repo.get_pull(args.pull_request) 146 prj = process_pr(pr) 147 json_list.append(prj) 148 elif args.range: 149 query = f'repo:{args.repo} merged:{args.range} is:pr is:closed sort:updated-desc base:main' 150 prs = gh.search_issues(query=f'{query}') 151 for _pr in prs: 152 pr = gh_repo.get_pull(_pr.number) 153 prj = process_pr(pr) 154 json_list.append(prj) 155 156 if json_list and not args.dry_run: 157 # Send data over to elasticsearch. 158 es = Elasticsearch( 159 [os.environ['ELASTICSEARCH_SERVER']], 160 api_key=os.environ['ELASTICSEARCH_KEY'], 161 verify_certs=False 162 ) 163 164 try: 165 if args.es_index: 166 index = args.es_index 167 else: 168 index = os.environ['PR_STAT_ES_INDEX'] 169 bulk(es, gendata(json_list, index)) 170 except KeyError as e: 171 print(f"Error: {e} not set.") 172 print(json_list) 173 if args.dry_run: 174 pprint.pprint(json_list) 175 176if __name__ == "__main__": 177 main() 178