1#!/usr/bin/env python3
2
3"""
4Update a Python package expression by passing in the `.nix` file, or the directory containing it.
5You can pass in multiple files or paths.
6
7You'll likely want to use
8``
9 $ ./update-python-libraries ../../pkgs/development/python-modules/**/default.nix
10``
11to update all non-pinned libraries in that folder.
12"""
13
14import argparse
15import json
16import logging
17import os
18import re
19import requests
20from concurrent.futures import ThreadPoolExecutor as Pool
21from packaging.version import Version as _Version
22from packaging.version import InvalidVersion
23from packaging.specifiers import SpecifierSet
24from typing import Optional, Any
25import collections
26import subprocess
27
28INDEX = "https://pypi.io/pypi"
29"""url of PyPI"""
30
31EXTENSIONS = ['tar.gz', 'tar.bz2', 'tar', 'zip', '.whl']
32"""Permitted file extensions. These are evaluated from left to right and the first occurance is returned."""
33
34PRERELEASES = False
35
36BULK_UPDATE = False
37
38GIT = "git"
39
40NIXPKGS_ROOT = subprocess.check_output(["git", "rev-parse", "--show-toplevel"]).decode('utf-8').strip()
41
42logging.basicConfig(level=logging.INFO)
43
44
45class Version(_Version, collections.abc.Sequence):
46
47 def __init__(self, version):
48 super().__init__(version)
49 # We cannot use `str(Version(0.04.21))` because that becomes `0.4.21`
50 # https://github.com/avian2/unidecode/issues/13#issuecomment-354538882
51 self.raw_version = version
52
53 def __getitem__(self, i):
54 return self._version.release[i]
55
56 def __len__(self):
57 return len(self._version.release)
58
59 def __iter__(self):
60 yield from self._version.release
61
62
63def _get_values(attribute, text):
64 """Match attribute in text and return all matches.
65
66 :returns: List of matches.
67 """
68 regex = fr'{re.escape(attribute)}\s+=\s+"(.*)";'
69 regex = re.compile(regex)
70 values = regex.findall(text)
71 return values
72
73
74def _get_attr_value(attr_path: str) -> Optional[Any]:
75 try:
76 response = subprocess.check_output([
77 "nix",
78 "--extra-experimental-features", "nix-command",
79 "eval",
80 "-f", f"{NIXPKGS_ROOT}/default.nix",
81 "--json",
82 f"{attr_path}"
83 ])
84 return json.loads(response.decode())
85 except (subprocess.CalledProcessError, ValueError):
86 return None
87
88
89def _get_unique_value(attribute, text):
90 """Match attribute in text and return unique match.
91
92 :returns: Single match.
93 """
94 values = _get_values(attribute, text)
95 n = len(values)
96 if n > 1:
97 raise ValueError("found too many values for {}".format(attribute))
98 elif n == 1:
99 return values[0]
100 else:
101 raise ValueError("no value found for {}".format(attribute))
102
103def _get_line_and_value(attribute, text, value=None):
104 """Match attribute in text. Return the line and the value of the attribute."""
105 if value is None:
106 regex = rf'({re.escape(attribute)}\s+=\s+\"(.*)\";)'
107 else:
108 regex = rf'({re.escape(attribute)}\s+=\s+\"({re.escape(value)})\";)'
109 regex = re.compile(regex)
110 results = regex.findall(text)
111 n = len(results)
112 if n > 1:
113 raise ValueError("found too many values for {}".format(attribute))
114 elif n == 1:
115 return results[0]
116 else:
117 raise ValueError("no value found for {}".format(attribute))
118
119
120def _replace_value(attribute, value, text, oldvalue=None):
121 """Search and replace value of attribute in text."""
122 if oldvalue is None:
123 old_line, old_value = _get_line_and_value(attribute, text)
124 else:
125 old_line, old_value = _get_line_and_value(attribute, text, oldvalue)
126 new_line = old_line.replace(old_value, value)
127 new_text = text.replace(old_line, new_line)
128 return new_text
129
130
131def _fetch_page(url):
132 r = requests.get(url)
133 if r.status_code == requests.codes.ok:
134 return r.json()
135 else:
136 raise ValueError("request for {} failed".format(url))
137
138
139def _fetch_github(url):
140 headers = {}
141 token = os.environ.get('GITHUB_API_TOKEN')
142 if token:
143 headers["Authorization"] = f"token {token}"
144 r = requests.get(url, headers=headers)
145
146 if r.status_code == requests.codes.ok:
147 return r.json()
148 else:
149 raise ValueError("request for {} failed".format(url))
150
151
152def _hash_to_sri(algorithm, value):
153 """Convert a hash to its SRI representation"""
154 return subprocess.check_output([
155 "nix",
156 "hash",
157 "to-sri",
158 "--type", algorithm,
159 value
160 ]).decode().strip()
161
162
163def _skip_bulk_update(attr_name: str) -> bool:
164 return bool(_get_attr_value(
165 f"{attr_name}.skipBulkUpdate"
166 ))
167
168
169SEMVER = {
170 'major' : 0,
171 'minor' : 1,
172 'patch' : 2,
173}
174
175
176def _determine_latest_version(current_version, target, versions):
177 """Determine latest version, given `target`.
178 """
179 current_version = Version(current_version)
180
181 def _parse_versions(versions):
182 for v in versions:
183 try:
184 yield Version(v)
185 except InvalidVersion:
186 pass
187
188 versions = _parse_versions(versions)
189
190 index = SEMVER[target]
191
192 ceiling = list(current_version[0:index])
193 if len(ceiling) == 0:
194 ceiling = None
195 else:
196 ceiling[-1]+=1
197 ceiling = Version(".".join(map(str, ceiling)))
198
199 # We do not want prereleases
200 versions = SpecifierSet(prereleases=PRERELEASES).filter(versions)
201
202 if ceiling is not None:
203 versions = SpecifierSet(f"<{ceiling}").filter(versions)
204
205 return (max(sorted(versions))).raw_version
206
207
208def _get_latest_version_pypi(package, extension, current_version, target):
209 """Get latest version and hash from PyPI."""
210 url = "{}/{}/json".format(INDEX, package)
211 json = _fetch_page(url)
212
213 versions = json['releases'].keys()
214 version = _determine_latest_version(current_version, target, versions)
215
216 try:
217 releases = json['releases'][version]
218 except KeyError as e:
219 raise KeyError('Could not find version {} for {}'.format(version, package)) from e
220 for release in releases:
221 if release['filename'].endswith(extension):
222 # TODO: In case of wheel we need to do further checks!
223 sha256 = release['digests']['sha256']
224 break
225 else:
226 sha256 = None
227 return version, sha256, None
228
229
230def _get_latest_version_github(package, extension, current_version, target):
231 def strip_prefix(tag):
232 return re.sub("^[^0-9]*", "", tag)
233
234 def get_prefix(string):
235 matches = re.findall(r"^([^0-9]*)", string)
236 return next(iter(matches), "")
237
238 # when invoked as an updateScript, UPDATE_NIX_ATTR_PATH will be set
239 # this allows us to work with packages which live outside of python-modules
240 attr_path = os.environ.get("UPDATE_NIX_ATTR_PATH", f"python3Packages.{package}")
241 try:
242 homepage = subprocess.check_output(
243 ["nix", "eval", "-f", f"{NIXPKGS_ROOT}/default.nix", "--raw", f"{attr_path}.src.meta.homepage"])\
244 .decode('utf-8')
245 except Exception as e:
246 raise ValueError(f"Unable to determine homepage: {e}")
247 owner_repo = homepage[len("https://github.com/"):] # remove prefix
248 owner, repo = owner_repo.split("/")
249
250 url = f"https://api.github.com/repos/{owner}/{repo}/releases"
251 all_releases = _fetch_github(url)
252 releases = list(filter(lambda x: not x['prerelease'], all_releases))
253
254 if len(releases) == 0:
255 raise ValueError(f"{homepage} does not contain any stable releases")
256
257 versions = map(lambda x: strip_prefix(x['tag_name']), releases)
258 version = _determine_latest_version(current_version, target, versions)
259
260 release = next(filter(lambda x: strip_prefix(x['tag_name']) == version, releases))
261 prefix = get_prefix(release['tag_name'])
262
263 # some attributes require using the fetchgit
264 git_fetcher_args = []
265 if (_get_attr_value(f"{attr_path}.src.fetchSubmodules")):
266 git_fetcher_args.append("--fetch-submodules")
267 if (_get_attr_value(f"{attr_path}.src.fetchLFS")):
268 git_fetcher_args.append("--fetch-lfs")
269 if (_get_attr_value(f"{attr_path}.src.leaveDotGit")):
270 git_fetcher_args.append("--leave-dotGit")
271
272 if git_fetcher_args:
273 algorithm = "sha256"
274 cmd = [
275 "nix-prefetch-git",
276 f"https://github.com/{owner}/{repo}.git",
277 "--hash", algorithm,
278 "--rev", f"refs/tags/{release['tag_name']}"
279 ]
280 cmd.extend(git_fetcher_args)
281 response = subprocess.check_output(cmd)
282 document = json.loads(response.decode())
283 hash = _hash_to_sri(algorithm, document[algorithm])
284 else:
285 try:
286 hash = subprocess.check_output([
287 "nix-prefetch-url",
288 "--type", "sha256",
289 "--unpack",
290 f"{release['tarball_url']}"
291 ], stderr=subprocess.DEVNULL).decode('utf-8').strip()
292 except (subprocess.CalledProcessError, UnicodeError):
293 # this may fail if they have both a branch and a tag of the same name, attempt tag name
294 tag_url = str(release['tarball_url']).replace("tarball","tarball/refs/tags")
295 hash = subprocess.check_output([
296 "nix-prefetch-url",
297 "--type", "sha256",
298 "--unpack",
299 tag_url
300 ], stderr=subprocess.DEVNULL).decode('utf-8').strip()
301
302 return version, hash, prefix
303
304
305FETCHERS = {
306 'fetchFromGitHub' : _get_latest_version_github,
307 'fetchPypi' : _get_latest_version_pypi,
308 'fetchurl' : _get_latest_version_pypi,
309}
310
311
312DEFAULT_SETUPTOOLS_EXTENSION = 'tar.gz'
313
314
315FORMATS = {
316 'setuptools' : DEFAULT_SETUPTOOLS_EXTENSION,
317 'wheel' : 'whl',
318 'pyproject' : 'tar.gz',
319 'flit' : 'tar.gz'
320}
321
322def _determine_fetcher(text):
323 # Count occurrences of fetchers.
324 nfetchers = sum(text.count('src = {}'.format(fetcher)) for fetcher in FETCHERS.keys())
325 if nfetchers == 0:
326 raise ValueError("no fetcher.")
327 elif nfetchers > 1:
328 raise ValueError("multiple fetchers.")
329 else:
330 # Then we check which fetcher to use.
331 for fetcher in FETCHERS.keys():
332 if 'src = {}'.format(fetcher) in text:
333 return fetcher
334
335
336def _determine_extension(text, fetcher):
337 """Determine what extension is used in the expression.
338
339 If we use:
340 - fetchPypi, we check if format is specified.
341 - fetchurl, we determine the extension from the url.
342 - fetchFromGitHub we simply use `.tar.gz`.
343 """
344 if fetcher == 'fetchPypi':
345 try:
346 src_format = _get_unique_value('format', text)
347 except ValueError:
348 src_format = None # format was not given
349
350 try:
351 extension = _get_unique_value('extension', text)
352 except ValueError:
353 extension = None # extension was not given
354
355 if extension is None:
356 if src_format is None:
357 src_format = 'setuptools'
358 elif src_format == 'other':
359 raise ValueError("Don't know how to update a format='other' package.")
360 extension = FORMATS[src_format]
361
362 elif fetcher == 'fetchurl':
363 url = _get_unique_value('url', text)
364 extension = os.path.splitext(url)[1]
365 if 'pypi' not in url:
366 raise ValueError('url does not point to PyPI.')
367
368 elif fetcher == 'fetchFromGitHub':
369 extension = "tar.gz"
370
371 return extension
372
373
374def _update_package(path, target):
375
376 # Read the expression
377 with open(path, 'r') as f:
378 text = f.read()
379
380 # Determine pname. Many files have more than one pname
381 pnames = _get_values('pname', text)
382
383 # Determine version.
384 version = _get_unique_value('version', text)
385
386 # First we check how many fetchers are mentioned.
387 fetcher = _determine_fetcher(text)
388
389 extension = _determine_extension(text, fetcher)
390
391 # Attempt a fetch using each pname, e.g. backports-zoneinfo vs backports.zoneinfo
392 successful_fetch = False
393 for pname in pnames:
394 if BULK_UPDATE and _skip_bulk_update(f"python3Packages.{pname}"):
395 raise ValueError(f"Bulk update skipped for {pname}")
396 try:
397 new_version, new_sha256, prefix = FETCHERS[fetcher](pname, extension, version, target)
398 successful_fetch = True
399 break
400 except ValueError:
401 continue
402
403 if not successful_fetch:
404 raise ValueError(f"Unable to find correct package using these pnames: {pnames}")
405
406 if new_version == version:
407 logging.info("Path {}: no update available for {}.".format(path, pname))
408 return False
409 elif Version(new_version) <= Version(version):
410 raise ValueError("downgrade for {}.".format(pname))
411 if not new_sha256:
412 raise ValueError("no file available for {}.".format(pname))
413
414 text = _replace_value('version', new_version, text)
415
416 # hashes from pypi are 16-bit encoded sha256's, normalize it to sri to avoid merge conflicts
417 # sri hashes have been the default format since nix 2.4+
418 sri_hash = _hash_to_sri("sha256", new_sha256)
419
420 # retrieve the old output hash for a more precise match
421 if old_hash := _get_attr_value(f"python3Packages.{pname}.src.outputHash"):
422 # fetchers can specify a sha256, or a sri hash
423 try:
424 text = _replace_value('hash', sri_hash, text, old_hash)
425 except ValueError:
426 text = _replace_value('sha256', sri_hash, text, old_hash)
427 else:
428 raise ValueError(f"Unable to retrieve old hash for {pname}")
429
430 if fetcher == 'fetchFromGitHub':
431 # in the case of fetchFromGitHub, it's common to see `rev = version;` or `rev = "v${version}";`
432 # in which no string value is meant to be substituted. However, we can just overwrite the previous value.
433 regex = r'(rev\s+=\s+[^;]*;)'
434 regex = re.compile(regex)
435 matches = regex.findall(text)
436 n = len(matches)
437
438 if n == 0:
439 raise ValueError("Unable to find rev value for {}.".format(pname))
440 else:
441 # forcefully rewrite rev, incase tagging conventions changed for a release
442 match = matches[0]
443 text = text.replace(match, f'rev = "refs/tags/{prefix}${{version}}";')
444 # incase there's no prefix, just rewrite without interpolation
445 text = text.replace('"${version}";', 'version;')
446
447 with open(path, 'w') as f:
448 f.write(text)
449
450 logging.info("Path {}: updated {} from {} to {}".format(path, pname, version, new_version))
451
452 result = {
453 'path' : path,
454 'target': target,
455 'pname': pname,
456 'old_version' : version,
457 'new_version' : new_version,
458 #'fetcher' : fetcher,
459 }
460
461 return result
462
463
464def _update(path, target):
465
466 # We need to read and modify a Nix expression.
467 if os.path.isdir(path):
468 path = os.path.join(path, 'default.nix')
469
470 # If a default.nix does not exist, we quit.
471 if not os.path.isfile(path):
472 logging.info("Path {}: does not exist.".format(path))
473 return False
474
475 # If file is not a Nix expression, we quit.
476 if not path.endswith(".nix"):
477 logging.info("Path {}: does not end with `.nix`.".format(path))
478 return False
479
480 try:
481 return _update_package(path, target)
482 except ValueError as e:
483 logging.warning("Path {}: {}".format(path, e))
484 return False
485
486
487def _commit(path, pname, old_version, new_version, pkgs_prefix="python: ", **kwargs):
488 """Commit result.
489 """
490
491 msg = f'{pkgs_prefix}{pname}: {old_version} -> {new_version}'
492
493 try:
494 subprocess.check_call([GIT, 'add', path])
495 subprocess.check_call([GIT, 'commit', '-m', msg])
496 except subprocess.CalledProcessError as e:
497 subprocess.check_call([GIT, 'checkout', path])
498 raise subprocess.CalledProcessError(f'Could not commit {path}') from e
499
500 return True
501
502
503def main():
504
505 epilog = """
506environment variables:
507 GITHUB_API_TOKEN\tGitHub API token used when updating github packages
508 """
509 parser = argparse.ArgumentParser(formatter_class=argparse.RawDescriptionHelpFormatter, epilog=epilog)
510 parser.add_argument('package', type=str, nargs='+')
511 parser.add_argument('--target', type=str, choices=SEMVER.keys(), default='major')
512 parser.add_argument('--commit', action='store_true', help='Create a commit for each package update')
513 parser.add_argument('--use-pkgs-prefix', action='store_true', help='Use python3Packages.${pname}: instead of python: ${pname}: when making commits')
514
515 args = parser.parse_args()
516 target = args.target
517
518 packages = list(map(os.path.abspath, args.package))
519
520 if len(packages) > 1:
521 global BULK_UPDATE
522 BULK_UPDATE = True
523
524 logging.info("Updating packages...")
525
526 # Use threads to update packages concurrently
527 with Pool() as p:
528 results = list(filter(bool, p.map(lambda pkg: _update(pkg, target), packages)))
529
530 logging.info("Finished updating packages.")
531
532 commit_options = {}
533 if args.use_pkgs_prefix:
534 logging.info("Using python3Packages. prefix for commits")
535 commit_options["pkgs_prefix"] = "python3Packages."
536
537 # Commits are created sequentially.
538 if args.commit:
539 logging.info("Committing updates...")
540 # list forces evaluation
541 list(map(lambda x: _commit(**x, **commit_options), results))
542 logging.info("Finished committing updates")
543
544 count = len(results)
545 logging.info("{} package(s) updated".format(count))
546
547
548
549if __name__ == '__main__':
550 main()