ci/github-script/prepare: identify real base branch

When a contributor mistakenly sets the wrong target branch for a Pull
Request, this can lead to bad consequences for CI. Most prominent is the
mass ping of codeowners, that is already handled in
`ci/request-reviews/verify-base-branch.sh`. But there are other things
that go wrong:
- After eval, a mass ping of maintainers would still be possible, in
theory. Practically, this doesn't happen, because we have a limit of 10
reviewer requests at the same time.
- This will most often contain a change to `ci/pinned.json`, thus the
full Eval matrix of all Lix/Nix versions will be run, burning a lot of
resources.
- The PR will be labelled with almost all labels that are available.

We can improve on the current situation with some API calls to determine
the "best" merge-base for the current PR. We then consider this as the
"real base". If the current target is not the real base, we fail the
prepare step, which is early enough to prevent all other CI from
running.

Changed files
+147 -142
.github
workflows
ci
-1
.github/workflows/push.yml
··· 2 2 3 3 on: 4 4 push: 5 - # Keep this synced with ci/request-reviews/dev-branches.txt 6 5 branches: 7 6 - master 8 7 - staging
+129 -1
ci/github-script/prepare.js
··· 51 51 throw new Error('The PR targets a channel branch.') 52 52 } 53 53 54 + if (headClassification.type.includes('wip')) { 55 + // In the following, we look at the git history to determine the base branch that 56 + // this Pull Request branched off of. This is *supposed* to be the branch that it 57 + // merges into, but humans make mistakes. Once that happens we want to error out as 58 + // early as possible. 59 + 60 + // To determine the "real base", we are looking at the merge-base of primary development 61 + // branches and the head of the PR. The merge-base which results in the least number of 62 + // commits between that base and head is the real base. We can query for this via GitHub's 63 + // REST API. There can be multiple candidates for the real base with the same number of 64 + // commits. In this case we pick the "best" candidate by a fixed ordering of branches, 65 + // as defined in ci/supportedBranches.js. 66 + // 67 + // These requests take a while, when comparing against the wrong release - they need 68 + // to look at way more than 10k commits in that case. Thus, we try to minimize the 69 + // number of requests across releases: 70 + // - First, we look at the primary development branches only: master and release-xx.yy. 71 + // The branch with the fewest commits gives us the release this PR belongs to. 72 + // - We then compare this number against the relevant staging branches for this release 73 + // to find the exact branch that this belongs to. 74 + 75 + // All potential development branches 76 + const branches = ( 77 + await github.paginate(github.rest.repos.listBranches, { 78 + ...context.repo, 79 + per_page: 100, 80 + }) 81 + ).map(({ name }) => classify(name)) 82 + 83 + // All stable primary development branches from latest to oldest. 84 + const releases = branches 85 + .filter(({ stable, type }) => type.includes('primary') && stable) 86 + .sort((a, b) => b.version.localeCompare(a.version)) 87 + 88 + async function mergeBase({ branch, order, version }) { 89 + const { data } = await github.rest.repos.compareCommitsWithBasehead({ 90 + ...context.repo, 91 + basehead: `${branch}...${head.sha}`, 92 + // Pagination for this endpoint is about the commits listed, which we don't care about. 93 + per_page: 1, 94 + // Taking the second page skips the list of files of this changeset. 95 + page: 2, 96 + }) 97 + return { 98 + branch, 99 + order, 100 + version, 101 + commits: data.total_commits, 102 + sha: data.merge_base_commit.sha, 103 + } 104 + } 105 + 106 + // Multiple branches can be OK at the same time, if the PR was created of a merge-base, 107 + // thus storing as array. 108 + let candidates = [await mergeBase(classify('master'))] 109 + for (const release of releases) { 110 + const nextCandidate = await mergeBase(release) 111 + if (candidates[0].commits === nextCandidate.commits) 112 + candidates.push(nextCandidate) 113 + if (candidates[0].commits > nextCandidate.commits) 114 + candidates = [nextCandidate] 115 + // The number 10000 is principally arbitrary, but the GitHub API returns this value 116 + // when the number of commits exceeds it in reality. The difference between two stable releases 117 + // is certainly more than 10k commits, thus this works for us as well: If we're targeting 118 + // a wrong release, the number *will* be 10000. 119 + if (candidates[0].commits < 10000) break 120 + } 121 + 122 + core.info(`This PR is for NixOS ${candidates[0].version}.`) 123 + 124 + // Secondary development branches for the selected version only. 125 + const secondary = branches.filter( 126 + ({ branch, type, version }) => 127 + type.includes('secondary') && version === candidates[0].version, 128 + ) 129 + 130 + // Make sure that we always check the current target as well, even if its a WIP branch. 131 + // If it's not a WIP branch, it was already included in either releases or secondary. 132 + if (classify(base.ref).type.includes('wip')) { 133 + secondary.push(classify(base.ref)) 134 + } 135 + 136 + for (const branch of secondary) { 137 + const nextCandidate = await mergeBase(branch) 138 + if (candidates[0].commits === nextCandidate.commits) 139 + candidates.push(nextCandidate) 140 + if (candidates[0].commits > nextCandidate.commits) 141 + candidates = [nextCandidate] 142 + } 143 + 144 + // If the current branch is among the candidates, this is always better than any other, 145 + // thus sorting at -1. 146 + candidates = candidates 147 + .map((candidate) => 148 + candidate.branch === base.ref 149 + ? { ...candidate, order: -1 } 150 + : candidate, 151 + ) 152 + .sort((a, b) => a.order - b.order) 153 + 154 + const best = candidates.at(0) 155 + 156 + core.info('The base branches for this PR are:') 157 + core.info(`github: ${base.ref}`) 158 + core.info( 159 + `candidates: ${candidates.map(({ branch }) => branch).join(',')}`, 160 + ) 161 + core.info(`best candidate: ${best.branch}`) 162 + 163 + if (best.branch !== base.ref) { 164 + const current = await mergeBase(classify(base.ref)) 165 + const body = [ 166 + `The PR's base branch is set to \`${current.branch}\`, but ${current.commits === 10000 ? 'at least 10000' : current.commits - best.commits} commits from the \`${best.branch}\` branch are included. Make sure you know the [right base branch for your changes](https://github.com/NixOS/nixpkgs/blob/master/CONTRIBUTING.md#branch-conventions), then:`, 167 + `- If the changes should go to the \`${best.branch}\` branch, [change the base branch](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/changing-the-base-branch-of-a-pull-request).`, 168 + `- If the changes should go to the \`${current.branch}\` branch, rebase your PR onto the correct merge-base:`, 169 + ' ```bash', 170 + ` # git rebase --onto $(git merge-base upstream/${current.branch} HEAD) $(git merge-base upstream/${best.branch} HEAD)`, 171 + ` git rebase --onto ${current.sha} ${best.sha}`, 172 + ` git push --force-with-lease`, 173 + ' ```', 174 + ].join('\n') 175 + 176 + await postReview({ github, context, core, dry, body }) 177 + 178 + throw new Error(`The PR contains commits from a different base.`) 179 + } 180 + } 181 + 54 182 let mergedSha, targetSha 55 183 56 184 if (prInfo.mergeable) { ··· 66 194 } else { 67 195 core.warning('The PR has a merge conflict.') 68 196 69 - mergedSha = prInfo.head.sha 197 + mergedSha = head.sha 70 198 targetSha = ( 71 199 await github.rest.repos.compareCommitsWithBasehead({ 72 200 ...context.repo,
+3 -2
ci/github-script/run
··· 46 46 .argument('<owner>', 'Owner of the GitHub repository to check (Example: NixOS)') 47 47 .argument('<repo>', 'Name of the GitHub repository to check (Example: nixpkgs)') 48 48 .argument('<pr>', 'Number of the Pull Request to check') 49 - .action(async (owner, repo, pr) => { 49 + .option('--no-dry', 'Make actual modifications') 50 + .action(async (owner, repo, pr, options) => { 50 51 const prepare = (await import('./prepare.js')).default 51 - run(prepare, owner, repo, pr) 52 + run(prepare, owner, repo, pr, options) 52 53 }) 53 54 54 55 program
-3
ci/request-reviews/default.nix
··· 17 17 ./get-code-owners.sh 18 18 ./request-reviewers.sh 19 19 ./request-code-owner-reviews.sh 20 - ./verify-base-branch.sh 21 - ./dev-branches.txt 22 20 ]; 23 21 }; 24 22 nativeBuildInputs = [ makeWrapper ]; 25 23 dontBuild = true; 26 24 installPhase = '' 27 25 mkdir -p $out/bin 28 - mv dev-branches.txt $out/bin 29 26 for bin in *.sh; do 30 27 mv "$bin" "$out/bin" 31 28 wrapProgram "$out/bin/$bin" \
-8
ci/request-reviews/dev-branches.txt
··· 1 - # Trusted development branches: 2 - # These generally require PRs to update and are built by Hydra. 3 - # Keep this synced with the branches in .github/workflows/eval.yml 4 - master 5 - staging 6 - release-* 7 - staging-* 8 - haskell-updates
+1 -23
ci/request-reviews/request-code-owner-reviews.sh
··· 1 1 #!/usr/bin/env bash 2 2 3 - # Requests reviews for a PR after verifying that the base branch is correct 3 + # Requests reviews for a PR 4 4 5 5 set -euo pipefail 6 6 tmp=$(mktemp -d) ··· 9 9 10 10 log() { 11 11 echo "$@" >&2 12 - } 13 - 14 - effect() { 15 - if [[ -n "${DRY_MODE:-}" ]]; then 16 - log "Skipping in dry mode:" "${@@Q}" 17 - else 18 - "$@" 19 - fi 20 12 } 21 13 22 14 if (( $# < 3 )); then ··· 62 54 63 55 git -C "$tmp/nixpkgs.git" fetch --no-tags fork "$prBranch" 64 56 headRef=$(git -C "$tmp/nixpkgs.git" rev-parse refs/remotes/fork/"$prBranch") 65 - 66 - log "Checking correctness of the base branch" 67 - if ! "$SCRIPT_DIR"/verify-base-branch.sh "$tmp/nixpkgs.git" "$headRef" "$baseRepo" "$baseBranch" "$prRepo" "$prBranch" | tee "$tmp/invalid-base-error" >&2; then 68 - log "Posting error as comment" 69 - if ! response=$(effect gh api \ 70 - --method POST \ 71 - -H "Accept: application/vnd.github+json" \ 72 - -H "X-GitHub-Api-Version: 2022-11-28" \ 73 - "/repos/$baseRepo/issues/$prNumber/comments" \ 74 - -F "body=@$tmp/invalid-base-error"); then 75 - log "Failed to post the comment: $response" 76 - fi 77 - exit 1 78 - fi 79 57 80 58 log "Requesting reviews from code owners" 81 59 "$SCRIPT_DIR"/get-code-owners.sh "$tmp/nixpkgs.git" "$ownersFile" "$baseBranch" "$headRef" | \
-104
ci/request-reviews/verify-base-branch.sh
··· 1 - #!/usr/bin/env bash 2 - 3 - # Check that a PR doesn't include commits from other development branches. 4 - # Fails with next steps if it does 5 - 6 - set -euo pipefail 7 - tmp=$(mktemp -d) 8 - trap 'rm -rf "$tmp"' exit 9 - SCRIPT_DIR=$(dirname "$0") 10 - 11 - log() { 12 - echo "$@" >&2 13 - } 14 - 15 - # Small helper to check whether an element is in a list 16 - # Usage: `elementIn foo "${list[@]}"` 17 - elementIn() { 18 - local e match=$1 19 - shift 20 - for e; do 21 - if [[ "$e" == "$match" ]]; then 22 - return 0 23 - fi 24 - done 25 - return 1 26 - } 27 - 28 - if (( $# < 6 )); then 29 - log "Usage: $0 LOCAL_REPO HEAD_REF BASE_REPO BASE_BRANCH PR_REPO PR_BRANCH" 30 - exit 1 31 - fi 32 - localRepo=$1 33 - headRef=$2 34 - baseRepo=$3 35 - baseBranch=$4 36 - prRepo=$5 37 - prBranch=$6 38 - 39 - # All development branches 40 - devBranchPatterns=() 41 - while read -r pattern; do 42 - if [[ "$pattern" != '#'* ]]; then 43 - devBranchPatterns+=("$pattern") 44 - fi 45 - done < "$SCRIPT_DIR/dev-branches.txt" 46 - 47 - git -C "$localRepo" branch --list --format "%(refname:short)" "${devBranchPatterns[@]}" > "$tmp/dev-branches" 48 - readarray -t devBranches < "$tmp/dev-branches" 49 - 50 - if [[ "$baseRepo" == "$prRepo" ]] && elementIn "$prBranch" "${devBranches[@]}"; then 51 - log "This PR merges $prBranch into $baseBranch, no commit check necessary" 52 - exit 0 53 - fi 54 - 55 - # The current merge base of the PR 56 - prMergeBase=$(git -C "$localRepo" merge-base "$baseBranch" "$headRef") 57 - log "The PR's merge base with the base branch $baseBranch is $prMergeBase" 58 - 59 - # This is purely for debugging 60 - git -C "$localRepo" rev-list --reverse "$baseBranch".."$headRef" > "$tmp/pr-commits" 61 - log "The PR includes these $(wc -l < "$tmp/pr-commits") commits:" 62 - cat <"$tmp/pr-commits" >&2 63 - 64 - for testBranch in "${devBranches[@]}"; do 65 - 66 - if [[ -z "$(git -C "$localRepo" rev-list -1 --since="1 month ago" "$testBranch")" ]]; then 67 - log "Not checking $testBranch, was inactive for the last month" 68 - continue 69 - fi 70 - log "Checking if commits from $testBranch are included in the PR" 71 - 72 - # We need to check for any commits that are in the PR which are also in the test branch. 73 - # We could check each commit from the PR individually, but that's unnecessarily slow. 74 - # 75 - # This does _almost_ what we want: `git rev-list --count headRef testBranch ^baseBranch`, 76 - # except that it includes commits that are reachable from _either_ headRef or testBranch, 77 - # instead of restricting it to ones reachable by both 78 - 79 - # Easily fixable though, because we can use `git merge-base testBranch headRef` 80 - # to get the least common ancestor (aka merge base) commit reachable by both. 81 - # If the branch being tested is indeed the right base branch, 82 - # this is then also the commit from that branch that the PR is based on top of. 83 - testMergeBase=$(git -C "$localRepo" merge-base "$testBranch" "$headRef") 84 - 85 - # And then use the `git rev-list --count`, but replacing the non-working 86 - # `headRef testBranch` with the merge base of the two. 87 - extraCommits=$(git -C "$localRepo" rev-list --count "$testMergeBase" ^"$baseBranch") 88 - 89 - if (( extraCommits != 0 )); then 90 - log -e "\e[33m" 91 - echo "The PR's base branch is set to $baseBranch, but $extraCommits commits from the $testBranch branch are included. Make sure you know the [right base branch for your changes](https://github.com/NixOS/nixpkgs/blob/master/CONTRIBUTING.md#branch-conventions), then:" 92 - echo "- If the changes should go to the $testBranch branch, [change the base branch](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/changing-the-base-branch-of-a-pull-request) to $testBranch" 93 - echo "- If the changes should go to the $baseBranch branch, rebase your PR onto the merge base with the $baseBranch branch:" 94 - echo " \`\`\`bash" 95 - echo " # git rebase --onto \$(git merge-base upstream/$baseBranch HEAD) \$(git merge-base upstream/$testBranch HEAD)" 96 - echo " git rebase --onto $prMergeBase $testMergeBase" 97 - echo " git push --force-with-lease" 98 - echo " \`\`\`" 99 - log -e "\e[m" 100 - exit 1 101 - fi 102 - done 103 - 104 - log "Base branch is correct, no commits from development branches are included"
+14
ci/supportedBranches.js
··· 13 13 nixpkgs: ['channel'], 14 14 } 15 15 16 + // "order" ranks the development branches by how likely they are the intended base branch 17 + // when they are an otherwise equally good fit according to ci/github-script/prepare.js. 18 + const orderConfig = { 19 + master: 0, 20 + release: 1, 21 + staging: 2, 22 + 'haskell-updates': 3, 23 + 'staging-next': 4, 24 + } 25 + 16 26 function split(branch) { 17 27 return { 18 28 ...branch.match( ··· 24 34 function classify(branch) { 25 35 const { prefix, version } = split(branch) 26 36 return { 37 + branch, 38 + order: orderConfig[prefix] ?? Infinity, 27 39 stable: (version ?? 'unstable') !== 'unstable', 28 40 type: typeConfig[prefix] ?? ['wip'], 29 41 version: version ?? 'unstable', ··· 40 52 } 41 53 testSplit('master') 42 54 testSplit('release-25.05') 55 + testSplit('staging') 43 56 testSplit('staging-next') 44 57 testSplit('staging-25.05') 45 58 testSplit('staging-next-25.05') ··· 56 69 } 57 70 testClassify('master') 58 71 testClassify('release-25.05') 72 + testClassify('staging') 59 73 testClassify('staging-next') 60 74 testClassify('staging-25.05') 61 75 testClassify('staging-next-25.05')