ci/github-script/labels.js at python-updates · tjh.dev/nixpkgs

tjh.dev / nixpkgs
Clone of https://github.com/NixOS/nixpkgs.git (to stress-test knotserver)
nixpkgs / ci / github-script / labels.js
at python-updates 16 kB view raw
  1module.exports = async function ({ github, context, core, dry }) {
  2  const path = require('node:path')
  3  const { DefaultArtifactClient } = require('@actions/artifact')
  4  const { readFile, writeFile } = require('node:fs/promises')
  5  const withRateLimit = require('./withRateLimit.js')
  6
  7  const artifactClient = new DefaultArtifactClient()
  8
  9  async function handlePullRequest({ item, stats }) {
 10    const log = (k, v) => core.info(`PR #${item.number} - ${k}: ${v}`)
 11
 12    const pull_number = item.number
 13
 14    // This API request is important for the merge-conflict label, because it triggers the
 15    // creation of a new test merge commit. This is needed to actually determine the state of a PR.
 16    const pull_request = (
 17      await github.rest.pulls.get({
 18        ...context.repo,
 19        pull_number,
 20      })
 21    ).data
 22
 23    const reviews = await github.paginate(github.rest.pulls.listReviews, {
 24      ...context.repo,
 25      pull_number,
 26    })
 27
 28    const approvals = new Set(
 29      reviews
 30        .filter((review) => review.state == 'APPROVED')
 31        .map((review) => review.user?.id),
 32    )
 33
 34    // After creation of a Pull Request, `merge_commit_sha` will be null initially:
 35    // The very first merge commit will only be calculated after a little while.
 36    // To avoid labeling the PR as conflicted before that, we wait a few minutes.
 37    // This is intentionally less than the time that Eval takes, so that the label job
 38    // running after Eval can indeed label the PR as conflicted if that is the case.
 39    const merge_commit_sha_valid =
 40      new Date() - new Date(pull_request.created_at) > 3 * 60 * 1000
 41
 42    const prLabels = {
 43      // We intentionally don't use the mergeable or mergeable_state attributes.
 44      // Those have an intermediate state while the test merge commit is created.
 45      // This doesn't work well for us, because we might have just triggered another
 46      // test merge commit creation by request the pull request via API at the start
 47      // of this function.
 48      // The attribute merge_commit_sha keeps the old value of null or the hash *until*
 49      // the new test merge commit has either successfully been created or failed so.
 50      // This essentially means we are updating the merge conflict label in two steps:
 51      // On the first pass of the day, we just fetch the pull request, which triggers
 52      // the creation. At this stage, the label is likely not updated, yet.
 53      // The second pass will then read the result from the first pass and set the label.
 54      '2.status: merge conflict':
 55        merge_commit_sha_valid && !pull_request.merge_commit_sha,
 56      '12.approvals: 1': approvals.size == 1,
 57      '12.approvals: 2': approvals.size == 2,
 58      '12.approvals: 3+': approvals.size >= 3,
 59      '12.first-time contribution': [
 60        'NONE',
 61        'FIRST_TIMER',
 62        'FIRST_TIME_CONTRIBUTOR',
 63      ].includes(pull_request.author_association),
 64    }
 65
 66    const { id: run_id, conclusion } =
 67      (
 68        await github.rest.actions.listWorkflowRuns({
 69          ...context.repo,
 70          workflow_id: 'pr.yml',
 71          event: 'pull_request_target',
 72          exclude_pull_requests: true,
 73          head_sha: pull_request.head.sha,
 74        })
 75      ).data.workflow_runs[0] ??
 76      // TODO: Remove this after 2025-09-17, at which point all eval.yml artifacts will have expired.
 77      (
 78        await github.rest.actions.listWorkflowRuns({
 79          ...context.repo,
 80          // In older PRs, we need eval.yml instead of pr.yml.
 81          workflow_id: 'eval.yml',
 82          event: 'pull_request_target',
 83          status: 'success',
 84          exclude_pull_requests: true,
 85          head_sha: pull_request.head.sha,
 86        })
 87      ).data.workflow_runs[0] ??
 88      {}
 89
 90    // Newer PRs might not have run Eval to completion, yet.
 91    // Older PRs might not have an eval.yml workflow, yet.
 92    // In either case we continue without fetching an artifact on a best-effort basis.
 93    log('Last eval run', run_id ?? '<n/a>')
 94
 95    if (conclusion === 'success') {
 96      Object.assign(prLabels, {
 97        // We only set this label if the latest eval run was successful, because if it was not, it
 98        // *could* have requested reviewers. We will let the PR author fix CI first, before "escalating"
 99        // this PR to "needs: reviewer".
100        // Since the first Eval run on a PR always sets rebuild labels, the same PR will be "recently
101        // updated" for the next scheduled run. Thus, this label will still be set within a few minutes
102        // after a PR is created, if required.
103        // Note that a "requested reviewer" disappears once they have given a review, so we check
104        // existing reviews, too.
105        '9.needs: reviewer':
106          !pull_request.draft &&
107          pull_request.requested_reviewers.length == 0 &&
108          reviews.length == 0,
109      })
110    }
111
112    const artifact =
113      run_id &&
114      (
115        await github.rest.actions.listWorkflowRunArtifacts({
116          ...context.repo,
117          run_id,
118          name: 'comparison',
119        })
120      ).data.artifacts[0]
121
122    // Instead of checking the boolean artifact.expired, we will give us a minute to
123    // actually download the artifact in the next step and avoid that race condition.
124    // Older PRs, where the workflow run was already eval.yml, but the artifact was not
125    // called "comparison", yet, will skip the download.
126    const expired =
127      !artifact ||
128      new Date(artifact?.expires_at ?? 0) <
129        new Date(new Date().getTime() + 60 * 1000)
130    log('Artifact expires at', artifact?.expires_at ?? '<n/a>')
131    if (!expired) {
132      stats.artifacts++
133
134      await artifactClient.downloadArtifact(artifact.id, {
135        findBy: {
136          repositoryName: context.repo.repo,
137          repositoryOwner: context.repo.owner,
138          token: core.getInput('github-token'),
139        },
140        path: path.resolve(pull_number.toString()),
141        expectedHash: artifact.digest,
142      })
143
144      const maintainers = new Set(
145        Object.keys(
146          JSON.parse(
147            await readFile(`${pull_number}/maintainers.json`, 'utf-8'),
148          ),
149        ).map((m) => Number.parseInt(m, 10)),
150      )
151
152      const evalLabels = JSON.parse(
153        await readFile(`${pull_number}/changed-paths.json`, 'utf-8'),
154      ).labels
155
156      Object.assign(
157        prLabels,
158        // Ignore `evalLabels` if it's an array.
159        // This can happen for older eval runs, before we switched to objects.
160        // The old eval labels would have been set by the eval run,
161        // so now they'll be present in `before`.
162        // TODO: Simplify once old eval results have expired (~2025-10)
163        Array.isArray(evalLabels) ? undefined : evalLabels,
164        {
165          '12.approved-by: package-maintainer': Array.from(maintainers).some(
166            (m) => approvals.has(m),
167          ),
168        },
169      )
170    }
171
172    return prLabels
173  }
174
175  async function handle({ item, stats }) {
176    try {
177      const log = (k, v, skip) => {
178        core.info(`#${item.number} - ${k}: ${v}` + (skip ? ' (skipped)' : ''))
179        return skip
180      }
181
182      log('Last updated at', item.updated_at)
183      log('URL', item.html_url)
184
185      const issue_number = item.number
186
187      const itemLabels = {}
188
189      if (item.pull_request || context.payload.pull_request) {
190        stats.prs++
191        Object.assign(itemLabels, await handlePullRequest({ item, stats }))
192      } else {
193        stats.issues++
194      }
195
196      const latest_event_at = new Date(
197        (
198          await github.paginate(github.rest.issues.listEventsForTimeline, {
199            ...context.repo,
200            issue_number,
201            per_page: 100,
202          })
203        )
204          .filter(({ event }) =>
205            [
206              // These events are hand-picked from:
207              //   https://docs.github.com/en/rest/using-the-rest-api/issue-event-types?apiVersion=2022-11-28
208              // Each of those causes a PR/issue to *not* be considered as stale anymore.
209              // Most of these use created_at.
210              'assigned',
211              'commented', // uses updated_at, because that could be > created_at
212              'committed', // uses committer.date
213              'head_ref_force_pushed',
214              'milestoned',
215              'pinned',
216              'ready_for_review',
217              'renamed',
218              'reopened',
219              'review_dismissed',
220              'review_requested',
221              'reviewed', // uses submitted_at
222              'unlocked',
223              'unmarked_as_duplicate',
224            ].includes(event),
225          )
226          .map(
227            ({ created_at, updated_at, committer, submitted_at }) =>
228              new Date(
229                updated_at ?? created_at ?? submitted_at ?? committer.date,
230              ),
231          )
232          // Reverse sort by date value. The default sort() sorts by string representation, which is bad for dates.
233          .sort((a, b) => b - a)
234          .at(0) ?? item.created_at,
235      )
236      log('latest_event_at', latest_event_at.toISOString())
237
238      const stale_at = new Date(new Date().setDate(new Date().getDate() - 180))
239
240      // Create a map (Label -> Boolean) of all currently set labels.
241      // Each label is set to True and can be disabled later.
242      const before = Object.fromEntries(
243        (
244          await github.paginate(github.rest.issues.listLabelsOnIssue, {
245            ...context.repo,
246            issue_number,
247          })
248        ).map(({ name }) => [name, true]),
249      )
250
251      Object.assign(itemLabels, {
252        '2.status: stale':
253          !before['1.severity: security'] && latest_event_at < stale_at,
254      })
255
256      const after = Object.assign({}, before, itemLabels)
257
258      // No need for an API request, if all labels are the same.
259      const hasChanges = Object.keys(after).some(
260        (name) => (before[name] ?? false) != after[name],
261      )
262      if (log('Has changes', hasChanges, !hasChanges)) return
263
264      // Skipping labeling on a pull_request event, because we have no privileges.
265      const labels = Object.entries(after)
266        .filter(([, value]) => value)
267        .map(([name]) => name)
268      if (log('Set labels', labels, dry)) return
269
270      await github.rest.issues.setLabels({
271        ...context.repo,
272        issue_number,
273        labels,
274      })
275    } catch (cause) {
276      throw new Error(`Labeling #${item.number} failed.`, { cause })
277    }
278  }
279
280  await withRateLimit({ github, core }, async (stats) => {
281    if (context.payload.pull_request) {
282      await handle({ item: context.payload.pull_request, stats })
283    } else {
284      const lastRun = (
285        await github.rest.actions.listWorkflowRuns({
286          ...context.repo,
287          workflow_id: 'labels.yml',
288          event: 'schedule',
289          status: 'success',
290          exclude_pull_requests: true,
291          per_page: 1,
292        })
293      ).data.workflow_runs[0]
294
295      const cutoff = new Date(
296        Math.max(
297          // Go back as far as the last successful run of this workflow to make sure
298          // we are not leaving anyone behind on GHA failures.
299          // Defaults to go back 1 hour on the first run.
300          new Date(lastRun?.created_at ?? new Date().getTime() - 1 * 60 * 60 * 1000).getTime(),
301          // Go back max. 1 day to prevent hitting all API rate limits immediately,
302          // when GH API returns a wrong workflow by accident.
303          new Date().getTime() - 24 * 60 * 60 * 1000,
304        ),
305      )
306      core.info('cutoff timestamp: ' + cutoff.toISOString())
307
308      const updatedItems = await github.paginate(
309        github.rest.search.issuesAndPullRequests,
310        {
311          q: [
312            `repo:"${context.repo.owner}/${context.repo.repo}"`,
313            'is:open',
314            `updated:>=${cutoff.toISOString()}`,
315          ].join(' AND '),
316          per_page: 100,
317          // TODO: Remove in 2025-10, when it becomes the default.
318          advanced_search: true,
319        },
320      )
321
322      let cursor
323
324      // No workflow run available the first time.
325      if (lastRun) {
326        // The cursor to iterate through the full list of issues and pull requests
327        // is passed between jobs as an artifact.
328        const artifact = (
329          await github.rest.actions.listWorkflowRunArtifacts({
330            ...context.repo,
331            run_id: lastRun.id,
332            name: 'pagination-cursor',
333          })
334        ).data.artifacts[0]
335
336        // If the artifact is not available, the next iteration starts at the beginning.
337        if (artifact) {
338          stats.artifacts++
339
340          const { downloadPath } = await artifactClient.downloadArtifact(
341            artifact.id,
342            {
343              findBy: {
344                repositoryName: context.repo.repo,
345                repositoryOwner: context.repo.owner,
346                token: core.getInput('github-token'),
347              },
348              expectedHash: artifact.digest,
349            },
350          )
351
352          cursor = await readFile(path.resolve(downloadPath, 'cursor'), 'utf-8')
353        }
354      }
355
356      // From GitHub's API docs:
357      //   GitHub's REST API considers every pull request an issue, but not every issue is a pull request.
358      //   For this reason, "Issues" endpoints may return both issues and pull requests in the response.
359      //   You can identify pull requests by the pull_request key.
360      const allItems = await github.rest.issues.listForRepo({
361        ...context.repo,
362        state: 'open',
363        sort: 'created',
364        direction: 'asc',
365        per_page: 100,
366        after: cursor,
367      })
368
369      // Regex taken and comment adjusted from:
370      // https://github.com/octokit/plugin-paginate-rest.js/blob/8e5da25f975d2f31dda6b8b588d71f2c768a8df2/src/iterator.ts#L36-L41
371      // `allItems.headers.link` format:
372      //   <https://api.github.com/repositories/4542716/issues?page=3&per_page=100&after=Y3Vyc29yOnYyOpLPAAABl8qNnYDOvnSJxA%3D%3D>; rel="next",
373      //   <https://api.github.com/repositories/4542716/issues?page=1&per_page=100&before=Y3Vyc29yOnYyOpLPAAABl8xFV9DOvoouJg%3D%3D>; rel="prev"
374      // Sets `next` to undefined if "next" URL is not present or `link` header is not set.
375      const next = ((allItems.headers.link ?? '').match(
376        /<([^<>]+)>;\s*rel="next"/,
377      ) ?? [])[1]
378      if (next) {
379        cursor = new URL(next).searchParams.get('after')
380        const uploadPath = path.resolve('cursor')
381        await writeFile(uploadPath, cursor, 'utf-8')
382        if (dry) {
383          core.info(`pagination-cursor: ${cursor} (upload skipped)`)
384        } else {
385          // No stats.artifacts++, because this does not allow passing a custom token.
386          // Thus, the upload will not happen with the app token, but the default github.token.
387          await artifactClient.uploadArtifact(
388            'pagination-cursor',
389            [uploadPath],
390            path.resolve('.'),
391            {
392              retentionDays: 1,
393            },
394          )
395        }
396      }
397
398      // Some items might be in both search results, so filtering out duplicates as well.
399      const items = []
400        .concat(updatedItems, allItems.data)
401        .filter(
402          (thisItem, idx, arr) =>
403            idx ==
404            arr.findIndex((firstItem) => firstItem.number == thisItem.number),
405        )
406
407      ;(await Promise.allSettled(items.map((item) => handle({ item, stats }))))
408        .filter(({ status }) => status == 'rejected')
409        .map(({ reason }) =>
410          core.setFailed(`${reason.message}\n${reason.cause.stack}`),
411        )
412    }
413  })
414}