1#!/usr/bin/env python3
2# Copyright (c) 2024 Intel Corp.
3# SPDX-License-Identifier: Apache-2.0
4
5# Script that operates on a merged PR and sends data to elasticsearch for
6# further insepctions using the PR dashboard at
7# https://kibana.zephyrproject.io/
8
9import sys
10import os
11from github import Github
12import argparse
13from elasticsearch import Elasticsearch
14from elasticsearch.helpers import bulk
15from datetime import timedelta
16import pprint
17
18
19date_format = '%Y-%m-%d %H:%M:%S'
20
21def parse_args() -> argparse.Namespace:
22    parser = argparse.ArgumentParser(
23        formatter_class=argparse.RawDescriptionHelpFormatter, allow_abbrev=False)
24
25    parser.add_argument('--pull-request', help='pull request number', type=int)
26    parser.add_argument('--range', help='execute based on a date range, for example 2023-01-01..2023-01-05')
27    parser.add_argument('--repo', help='github repo', default='zephyrproject-rtos/zephyr')
28    parser.add_argument('--es-index', help='Elasticsearch index')
29    parser.add_argument('-y','--dry-run', action="store_true", help='dry run, do not upload data')
30
31    return parser.parse_args()
32
33def gendata(data, index):
34    for t in data:
35        yield {
36                "_index": index,
37                "_source": t
38                }
39
40def process_pr(pr):
41    reviews = pr.get_reviews()
42    print(f'#{pr.number}: {pr.title} - {pr.comments} Comments, reviews: {reviews.totalCount}, {len(pr.assignees)} Assignees (Updated {pr.updated_at})')
43    assignee_reviews = 0
44    prj = {}
45
46    assignees = []
47    labels = []
48    for label in pr.labels:
49        labels.append(label.name)
50
51    reviewers = set()
52    for review in reviews:
53        # get list of all approved reviews
54        if review.user and review.state == 'APPROVED':
55            reviewers.add(review.user.login)
56
57    for assignee in pr.assignees:
58        # list assignees for later checks
59        assignees.append(assignee.login)
60        if assignee.login in reviewers:
61            assignee_reviews += 1
62
63    if assignee_reviews > 0 or pr.merged_by.login in assignees:
64        # in case of assignee reviews or if PR was merged by an assignee
65        prj['review_rule'] = "yes"
66    elif not pr.assignees or \
67            (pr.user.login in assignees and len(assignees) == 1) or \
68            ('Trivial' in labels or 'Hotfix' in labels):
69        # in case where no assignees set or if submitter is the only assignee
70        # or in case of trivial or hotfixes
71        prj['review_rule'] = "na"
72    else:
73        # everything else
74        prj['review_rule'] = "no"
75
76
77    created = pr.created_at
78    # if a PR was made ready for review from draft, calculate based on when it
79    # was moved out of draft.
80    for event in pr.get_issue_events():
81        if event.event == 'ready_for_review':
82            created = event.created_at
83
84    # calculate time the PR was in review, hours and business days.
85    delta = pr.closed_at - created
86    deltah = delta.total_seconds() / 3600
87    prj['hours_open'] = deltah
88
89    dates = (created + timedelta(idx + 1) for idx in range((pr.closed_at - created).days))
90
91    # Get number of business days per the guidelines, we need at least 2.
92    business_days = sum(1 for day in dates if day.weekday() < 5)
93    prj['business_days_open'] = business_days
94
95    trivial = 'Trivial' in labels
96    hotfix = 'Hotfix' in labels
97    min_review_time_rule = "no"
98
99    if hotfix or (trivial and deltah >= 4) or business_days >= 2:
100        min_review_time_rule = "yes"
101
102    prj['time_rule'] = min_review_time_rule
103
104    # This is all data we get easily though the Github API and serves as the basis
105    # for displaying some trends and metrics.
106    # Data can be extended in the future if we find more information that
107    # is useful through the API
108
109    prj['nr'] = pr.number
110    prj['url'] = pr.url
111    prj['title'] = pr.title
112    prj['comments'] = pr.comments
113    prj['reviews'] = reviews.totalCount
114    prj['assignees'] = assignees
115    prj['updated'] = pr.updated_at.strftime("%Y-%m-%d %H:%M:%S")
116    prj['created'] = pr.created_at.strftime("%Y-%m-%d %H:%M:%S")
117    prj['closed'] = pr.closed_at.strftime("%Y-%m-%d %H:%M:%S")
118    prj['merged_by'] = pr.merged_by.login
119    prj['submitted_by'] = pr.user.login
120    prj['changed_files'] = pr.changed_files
121    prj['additions'] = pr.additions
122    prj['deletions'] = pr.deletions
123    prj['commits'] = pr.commits
124    # The branch we are targeting. main vs release branches.
125    prj['base'] = pr.base.ref
126
127    # list all reviewers
128    prj['reviewers'] = list(reviewers)
129    prj['labels'] = labels
130
131    return prj
132
133def main():
134    args = parse_args()
135    token = os.environ.get('GITHUB_TOKEN')
136    if not token:
137        sys.exit('Github token not set in environment, please set the '
138                 'GITHUB_TOKEN environment variable and retry.')
139
140    gh = Github(token)
141    json_list = []
142    gh_repo = gh.get_repo(args.repo)
143
144    if args.pull_request:
145        pr = gh_repo.get_pull(args.pull_request)
146        prj = process_pr(pr)
147        json_list.append(prj)
148    elif args.range:
149        query = f'repo:{args.repo} merged:{args.range} is:pr is:closed sort:updated-desc base:main'
150        prs = gh.search_issues(query=f'{query}')
151        for _pr in prs:
152            pr = gh_repo.get_pull(_pr.number)
153            prj = process_pr(pr)
154            json_list.append(prj)
155
156    if json_list and not args.dry_run:
157        # Send data over to elasticsearch.
158        es = Elasticsearch(
159                [os.environ['ELASTICSEARCH_SERVER']],
160                api_key=os.environ['ELASTICSEARCH_KEY'],
161                verify_certs=False
162                )
163
164        try:
165            if args.es_index:
166                index = args.es_index
167            else:
168                index = os.environ['PR_STAT_ES_INDEX']
169            bulk(es, gendata(json_list, index))
170        except KeyError as e:
171            print(f"Error: {e} not set.")
172            print(json_list)
173    if args.dry_run:
174        pprint.pprint(json_list)
175
176if __name__ == "__main__":
177    main()
178