1#!/usr/bin/env python3
2# Copyright (c) 2021, Facebook
3#
4# SPDX-License-Identifier: Apache-2.0
5
6"""Query the Top-Ten Bug Bashers
7
8This script will query the top-ten Bug Bashers in a specified date window.
9
10Usage:
11    ./scripts/bug-bash.py -t ~/.ghtoken -b 2021-07-26 -e 2021-08-07
12    GITHUB_TOKEN="..." ./scripts/bug-bash.py -b 2021-07-26 -e 2021-08-07
13"""
14
15import argparse
16from datetime import datetime, timedelta
17import operator
18import os
19
20# Requires PyGithub
21from github import Github
22
23
24def parse_args():
25    parser = argparse.ArgumentParser(allow_abbrev=False)
26    parser.add_argument('-a', '--all', dest='all',
27                        help='Show all bugs squashed', action='store_true')
28    parser.add_argument('-t', '--token', dest='tokenfile',
29                        help='File containing GitHub token (alternatively, use GITHUB_TOKEN env variable)', metavar='FILE')
30    parser.add_argument('-s', '--start', dest='start', help='start date (YYYY-mm-dd)',
31                        metavar='START_DATE', type=valid_date_type, required=True)
32    parser.add_argument('-e', '--end', dest='end', help='end date (YYYY-mm-dd)',
33                        metavar='END_DATE', type=valid_date_type, required=True)
34
35    args = parser.parse_args()
36
37    if args.end < args.start:
38        raise ValueError(
39            'end date {} is before start date {}'.format(args.end, args.start))
40
41    if args.tokenfile:
42        with open(args.tokenfile, 'r') as file:
43            token = file.read()
44            token = token.strip()
45    else:
46        if 'GITHUB_TOKEN' not in os.environ:
47            raise ValueError('No credentials specified')
48        token = os.environ['GITHUB_TOKEN']
49
50    setattr(args, 'token', token)
51
52    return args
53
54
55class BugBashTally(object):
56    def __init__(self, gh, start_date, end_date):
57        """Create a BugBashTally object with the provided Github object,
58        start datetime object, and end datetime object"""
59        self._gh = gh
60        self._repo = gh.get_repo('zephyrproject-rtos/zephyr')
61        self._start_date = start_date
62        self._end_date = end_date
63
64        self._issues = []
65        self._pulls = []
66
67    def get_tally(self):
68        """Return a dict with (key = user, value = score)"""
69        tally = dict()
70        for p in self.get_pulls():
71            user = p.user.login
72            tally[user] = tally.get(user, 0) + 1
73
74        return tally
75
76    def get_rev_tally(self):
77        """Return a dict with (key = score, value = list<user>) sorted in
78        descending order"""
79        # there may be ties!
80        rev_tally = dict()
81        for user, score in self.get_tally().items():
82            if score not in rev_tally:
83                rev_tally[score] = [user]
84            else:
85                rev_tally[score].append(user)
86
87        # sort in descending order by score
88        rev_tally = dict(
89            sorted(rev_tally.items(), key=operator.itemgetter(0), reverse=True))
90
91        return rev_tally
92
93    def get_top_ten(self):
94        """Return a dict with (key = score, value = user) sorted in
95        descending order"""
96        top_ten = []
97        for score, users in self.get_rev_tally().items():
98            # do not sort users by login - hopefully fair-ish
99            for user in users:
100                if len(top_ten) == 10:
101                    return top_ten
102
103                top_ten.append(tuple([score, user]))
104
105        return top_ten
106
107    def get_pulls(self):
108        """Return GitHub pull requests that squash bugs in the provided
109        date window"""
110        if self._pulls:
111            return self._pulls
112
113        self.get_issues()
114
115        return self._pulls
116
117    def get_issues(self):
118        """Return GitHub issues representing bugs in the provided date
119        window"""
120        if self._issues:
121            return self._issues
122
123        cutoff = self._end_date + timedelta(1)
124        issues = self._repo.get_issues(state='closed', labels=[
125            'bug'], since=self._start_date)
126
127        for i in issues:
128            # the PyGithub API and v3 REST API do not facilitate 'until'
129            # or 'end date' :-/
130            if i.closed_at < self._start_date or i.closed_at > cutoff:
131                continue
132
133            ipr = i.pull_request
134            if ipr is None:
135                # ignore issues without a linked pull request
136                continue
137
138            prid = int(ipr.html_url.split('/')[-1])
139            pr = self._repo.get_pull(prid)
140            if not pr.merged:
141                # pull requests that were not merged do not count
142                continue
143
144            self._pulls.append(pr)
145            self._issues.append(i)
146
147        return self._issues
148
149
150# https://gist.github.com/monkut/e60eea811ef085a6540f
151def valid_date_type(arg_date_str):
152    """custom argparse *date* type for user dates values given from the
153    command line"""
154    try:
155        return datetime.strptime(arg_date_str, "%Y-%m-%d")
156    except ValueError:
157        msg = "Given Date ({0}) not valid! Expected format, YYYY-MM-DD!".format(arg_date_str)
158        raise argparse.ArgumentTypeError(msg)
159
160
161def print_top_ten(top_ten):
162    """Print the top-ten bug bashers"""
163    for score, user in top_ten:
164        # print tab-separated value, to allow for ./script ... > foo.csv
165        print('{}\t{}'.format(score, user))
166
167
168def main():
169    args = parse_args()
170    bbt = BugBashTally(Github(args.token), args.start, args.end)
171    if args.all:
172        # print one issue per line
173        issues = bbt.get_issues()
174        pulls = bbt.get_pulls()
175        n = len(issues)
176        m = len(pulls)
177        assert n == m
178        for i in range(0, n):
179            print('{}\t{}\t{}'.format(
180                issues[i].number, pulls[i].user.login, pulls[i].title))
181    else:
182        # print the top ten
183        print_top_ten(bbt.get_top_ten())
184
185
186if __name__ == '__main__':
187    main()
188