1#!/bin/sh
2# SPDX-License-Identifier: BSD-3-Clause
3
4set -e
5
6# This script fetches Pull Request commits missing from a shallow clone
7# and creates a PR_SHAs.txt file. This script has a limit of 500 commits but the
8# github API used has a lower limit of 250 commits.
9
10# It does not rely on git merge-bases which basically don't work with
11# shallow clones:
12# https://github.com/thesofproject/linux/issues/2556
13
14# Design goals:
15#
16# - Keep the code short and as simple as possible. No one is interested
17#   in maintaining this sort of script.
18#
19# - Fast and accurate for small Pull Requests
20#
21# - For large Pull Requests _with merges_ the only objective is to
22#   complete in a reasonable time; say less than 10 minutes. It's very
23#   unlikely will look at 250 checkpatch results and time optimizations
24#   should not make this script more complex.
25
26
27# Sample usage:
28#   $0  thesoftproject/linux  2772
29main()
30{
31    local gh_project="$1"
32    local pr_number="$2"
33
34    printf '%s: fetching PR %d for project %s\n' "$0" "$pr_number" "$gh_project"
35
36    # As of March 2021, Github's documented limit is 250 commits
37    # Let's have another cap a 500.
38    # https://docs.github.com/en/rest/reference/pulls#list-commits-on-a-pull-request
39    local pagelen PRlen=0
40    for i in 1 2 3 4 5; do
41        curl -H 'Accept: application/vnd.github.v3+json' \
42         "https://api.github.com/repos/$gh_project/pulls/$pr_number/commits?per_page=100&page=$i" \
43         > commits_"$i".json
44        pagelen=$(jq length < commits_$i.json)
45        if [ "$pagelen" -eq 0 ]; then
46            break
47        fi
48        PRlen=$((PRlen + pagelen))
49    done
50
51    printf 'Found %d commits, SHA1 list is in PR_SHAs.txt\n' "$PRlen"
52
53    # 'cut' removes double quotes
54    cat commits_?.json |
55        jq '.[] | .sha' |
56        cut -b2-41 > PR_SHAs.txt
57
58    # PRlen+1 gets us the merge base for simple, linear histories. For
59    # pull requests with merges, depth=PRLEN goes already much further
60    # than needed and +1 makes little difference. It's not clear when
61    # and for what sort of PRs git fetching individual commits would be
62    # faster so keep a single and simple fetch for now.
63
64    set -x # this command may take a while so show it
65    git fetch --depth "$((PRlen+1))" "https://github.com/$gh_project" "pull/$pr_number/head"
66
67}
68
69main "$@"
70