1#!/usr/bin/env python3
2# Copyright (c) 2022, Meta
3#
4# SPDX-License-Identifier: Apache-2.0
5
6"""Query issues in a release branch
7
8This script searches for issues referenced via pull-requests in a release
9branch in order to simplify tracking changes such as automated backports,
10manual backports, security fixes, and stability fixes.
11
12A formatted report is printed to standard output either in JSON or
13reStructuredText.
14
15Since an issue is required for all changes to release branches, merged PRs
16must have at least one instance of the phrase "Fixes #1234" in the body. This
17script will throw an error if a PR has been made without an associated issue.
18
19Usage:
20    ./scripts/release/list_backports.py \
21        -t ~/.ghtoken \
22        -b v2.7-branch \
23        -s 2021-12-15 -e 2022-04-22 \
24        -P 45074 -P 45868 -P 44918 -P 41234 -P 41174 \
25        -j | jq . | tee /tmp/backports.json
26
27    GITHUB_TOKEN="<secret>" \
28    ./scripts/release/list_backports.py \
29        -b v3.0-branch \
30        -p 43381 \
31        -j | jq . | tee /tmp/backports.json
32"""
33
34import argparse
35from datetime import datetime, timedelta
36import io
37import json
38import logging
39import os
40import re
41import sys
42
43# Requires PyGithub
44from github import Github
45
46
47# https://gist.github.com/monkut/e60eea811ef085a6540f
48def valid_date_type(arg_date_str):
49    """custom argparse *date* type for user dates values given from the
50    command line"""
51    try:
52        return datetime.strptime(arg_date_str, "%Y-%m-%d")
53    except ValueError:
54        msg = "Given Date ({0}) not valid! Expected format, YYYY-MM-DD!".format(arg_date_str)
55        raise argparse.ArgumentTypeError(msg)
56
57
58def parse_args():
59    parser = argparse.ArgumentParser()
60    parser.add_argument('-t', '--token', dest='tokenfile',
61                        help='File containing GitHub token (alternatively, use GITHUB_TOKEN env variable)', metavar='FILE')
62    parser.add_argument('-b', '--base', dest='base',
63                        help='branch (base) for PRs (e.g. v2.7-branch)', metavar='BRANCH', required=True)
64    parser.add_argument('-j', '--json', dest='json', action='store_true',
65                        help='print output in JSON rather than RST')
66    parser.add_argument('-s', '--start', dest='start', help='start date (YYYY-mm-dd)',
67                        metavar='START_DATE', type=valid_date_type)
68    parser.add_argument('-e', '--end', dest='end', help='end date (YYYY-mm-dd)',
69                        metavar='END_DATE', type=valid_date_type)
70    parser.add_argument("-o", "--org", default="zephyrproject-rtos",
71                        help="Github organisation")
72    parser.add_argument('-p', '--include-pull', dest='includes',
73                        help='include pull request (can be specified multiple times)',
74                        metavar='PR', type=int, action='append', default=[])
75    parser.add_argument('-P', '--exclude-pull', dest='excludes',
76                        help='exlude pull request (can be specified multiple times, helpful for version bumps and release notes)',
77                        metavar='PR', type=int, action='append', default=[])
78    parser.add_argument("-r", "--repo", default="zephyr",
79                        help="Github repository")
80
81    args = parser.parse_args()
82
83    if args.includes:
84        if getattr(args, 'start'):
85            logging.error(
86                'the --start argument should not be used with --include-pull')
87            return None
88        if getattr(args, 'end'):
89            logging.error(
90                'the --end argument should not be used with --include-pull')
91            return None
92    else:
93        if not getattr(args, 'start'):
94            logging.error(
95                'if --include-pr PR is not used, --start START_DATE is required')
96            return None
97
98        if not getattr(args, 'end'):
99            setattr(args, 'end', datetime.now())
100
101        if args.end < args.start:
102            logging.error(
103                f'end date {args.end} is before start date {args.start}')
104            return None
105
106    if args.tokenfile:
107        with open(args.tokenfile, 'r') as file:
108            token = file.read()
109            token = token.strip()
110    else:
111        if 'GITHUB_TOKEN' not in os.environ:
112            raise ValueError('No credentials specified')
113        token = os.environ['GITHUB_TOKEN']
114
115    setattr(args, 'token', token)
116
117    return args
118
119
120class Backport(object):
121    def __init__(self, repo, base, pulls):
122        self._base = base
123        self._repo = repo
124        self._issues = []
125        self._pulls = pulls
126
127        self._pulls_without_an_issue = []
128        self._pulls_with_invalid_issues = {}
129
130    @staticmethod
131    def by_date_range(repo, base, start_date, end_date, excludes):
132        """Create a Backport object with the provided repo,
133        base, start datetime object, and end datetime objects, and
134        list of excluded PRs"""
135
136        pulls = []
137
138        unfiltered_pulls = repo.get_pulls(
139            base=base, state='closed')
140        for p in unfiltered_pulls:
141            if not p.merged:
142                # only consider merged backports
143                continue
144
145            if p.closed_at < start_date or p.closed_at >= end_date + timedelta(1):
146                # only concerned with PRs within time window
147                continue
148
149            if p.number in excludes:
150                # skip PRs that have been explicitly excluded
151                continue
152
153            pulls.append(p)
154
155        # paginated_list.sort() does not exist
156        pulls = sorted(pulls, key=lambda x: x.number)
157
158        return Backport(repo, base, pulls)
159
160    @staticmethod
161    def by_included_prs(repo, base, includes):
162        """Create a Backport object with the provided repo,
163        base, and list of included PRs"""
164
165        pulls = []
166
167        for i in includes:
168            try:
169                p = repo.get_pull(i)
170            except Exception:
171                p = None
172
173            if not p:
174                logging.error(f'{i} is not a valid pull request')
175                return None
176
177            if p.base.ref != base:
178                logging.error(
179                    f'{i} is not a valid pull request for base {base} ({p.base.label})')
180                return None
181
182            pulls.append(p)
183
184        # paginated_list.sort() does not exist
185        pulls = sorted(pulls, key=lambda x: x.number)
186
187        return Backport(repo, base, pulls)
188
189    @staticmethod
190    def sanitize_title(title):
191        # TODO: sanitize titles such that they are suitable for both JSON and ReStructured Text
192        # could also automatically fix titles like "Automated backport of PR #1234"
193        return title
194
195    def print(self):
196        for i in self.get_issues():
197            title = Backport.sanitize_title(i.title)
198            # * :github:`38972` - logging: Cleaning references to tracing in logging
199            print(f'* :github:`{i.number}` - {title}')
200
201    def print_json(self):
202        issue_objects = []
203        for i in self.get_issues():
204            obj = {}
205            obj['id'] = i.number
206            obj['title'] = Backport.sanitize_title(i.title)
207            obj['url'] = f'https://github.com/{self._repo.organization.login}/{self._repo.name}/pull/{i.number}'
208            issue_objects.append(obj)
209
210        print(json.dumps(issue_objects))
211
212    def get_pulls(self):
213        return self._pulls
214
215    def get_issues(self):
216        """Return GitHub issues fixed in the provided date window"""
217        if self._issues:
218            return self._issues
219
220        issue_map = {}
221        self._pulls_without_an_issue = []
222        self._pulls_with_invalid_issues = {}
223
224        for p in self._pulls:
225            # check for issues in this pr
226            issues_for_this_pr = {}
227            with io.StringIO(p.body) as buf:
228                for line in buf.readlines():
229                    line = line.strip()
230                    match = re.search(r"^Fixes[:]?\s*#([1-9][0-9]*).*", line)
231                    if not match:
232                        match = re.search(
233                            rf"^Fixes[:]?\s*https://github\.com/{self._repo.organization.login}/{self._repo.name}/issues/([1-9][0-9]*).*", line)
234                    if not match:
235                        continue
236                    issue_number = int(match[1])
237                    issue = self._repo.get_issue(issue_number)
238                    if not issue:
239                        if not self._pulls_with_invalid_issues[p.number]:
240                            self._pulls_with_invalid_issues[p.number] = [
241                                issue_number]
242                        else:
243                            self._pulls_with_invalid_issues[p.number].append(
244                                issue_number)
245                        logging.error(
246                            f'https://github.com/{self._repo.organization.login}/{self._repo.name}/pull/{p.number} references invalid issue number {issue_number}')
247                        continue
248                    issues_for_this_pr[issue_number] = issue
249
250            # report prs missing issues later
251            if len(issues_for_this_pr) == 0:
252                logging.error(
253                    f'https://github.com/{self._repo.organization.login}/{self._repo.name}/pull/{p.number} does not have an associated issue')
254                self._pulls_without_an_issue.append(p)
255                continue
256
257            # FIXME: when we have upgrade to python3.9+, use "issue_map | issues_for_this_pr"
258            issue_map = {**issue_map, **issues_for_this_pr}
259
260        issues = list(issue_map.values())
261
262        # paginated_list.sort() does not exist
263        issues = sorted(issues, key=lambda x: x.number)
264
265        self._issues = issues
266
267        return self._issues
268
269    def get_pulls_without_issues(self):
270        if self._pulls_without_an_issue:
271            return self._pulls_without_an_issue
272
273        self.get_issues()
274
275        return self._pulls_without_an_issue
276
277    def get_pulls_with_invalid_issues(self):
278        if self._pulls_with_invalid_issues:
279            return self._pulls_with_invalid_issues
280
281        self.get_issues()
282
283        return self._pulls_with_invalid_issues
284
285
286def main():
287    args = parse_args()
288
289    if not args:
290        return os.EX_DATAERR
291
292    try:
293        gh = Github(args.token)
294    except Exception:
295        logging.error('failed to authenticate with GitHub')
296        return os.EX_DATAERR
297
298    try:
299        repo = gh.get_repo(args.org + '/' + args.repo)
300    except Exception:
301        logging.error('failed to obtain Github repository')
302        return os.EX_DATAERR
303
304    bp = None
305    if args.includes:
306        bp = Backport.by_included_prs(repo, args.base, set(args.includes))
307    else:
308        bp = Backport.by_date_range(repo, args.base,
309                                    args.start, args.end, set(args.excludes))
310
311    if not bp:
312        return os.EX_DATAERR
313
314    pulls_with_invalid_issues = bp.get_pulls_with_invalid_issues()
315    if pulls_with_invalid_issues:
316        logging.error('The following PRs link to invalid issues:')
317        for (p, lst) in pulls_with_invalid_issues:
318            logging.error(
319                f'\nhttps://github.com/{repo.organization.login}/{repo.name}/pull/{p.number}: {lst}')
320        return os.EX_DATAERR
321
322    pulls_without_issues = bp.get_pulls_without_issues()
323    if pulls_without_issues:
324        logging.error(
325            'Please ensure the body of each PR to a release branch contains "Fixes #1234"')
326        logging.error('The following PRs are lacking associated issues:')
327        for p in pulls_without_issues:
328            logging.error(
329                f'https://github.com/{repo.organization.login}/{repo.name}/pull/{p.number}')
330        return os.EX_DATAERR
331
332    if args.json:
333        bp.print_json()
334    else:
335        bp.print()
336
337    return os.EX_OK
338
339
340if __name__ == '__main__':
341    sys.exit(main())
342