1#!/usr/bin/env python3
2
3"""
4Update a Python package expression by passing in the `.nix` file, or the directory containing it.
5You can pass in multiple files or paths.
6
7You'll likely want to use
8``
9 $ ./update-python-libraries ../../pkgs/development/python-modules/**/default.nix
10``
11to update all non-pinned libraries in that folder.
12"""
13
14import argparse
15import os
16import pathlib
17import re
18import requests
19from concurrent.futures import ThreadPoolExecutor as Pool
20from packaging.version import Version as _Version
21from packaging.version import InvalidVersion
22from packaging.specifiers import SpecifierSet
23import collections
24import subprocess
25
26INDEX = "https://pypi.io/pypi"
27"""url of PyPI"""
28
29EXTENSIONS = ['tar.gz', 'tar.bz2', 'tar', 'zip', '.whl']
30"""Permitted file extensions. These are evaluated from left to right and the first occurance is returned."""
31
32PRERELEASES = False
33
34GIT = "git"
35
36NIXPGKS_ROOT = subprocess.check_output(["git", "rev-parse", "--show-toplevel"]).decode('utf-8').strip()
37
38import logging
39logging.basicConfig(level=logging.INFO)
40
41
42class Version(_Version, collections.abc.Sequence):
43
44 def __init__(self, version):
45 super().__init__(version)
46 # We cannot use `str(Version(0.04.21))` because that becomes `0.4.21`
47 # https://github.com/avian2/unidecode/issues/13#issuecomment-354538882
48 self.raw_version = version
49
50 def __getitem__(self, i):
51 return self._version.release[i]
52
53 def __len__(self):
54 return len(self._version.release)
55
56 def __iter__(self):
57 yield from self._version.release
58
59
60def _get_values(attribute, text):
61 """Match attribute in text and return all matches.
62
63 :returns: List of matches.
64 """
65 regex = '{}\s+=\s+"(.*)";'.format(attribute)
66 regex = re.compile(regex)
67 values = regex.findall(text)
68 return values
69
70def _get_unique_value(attribute, text):
71 """Match attribute in text and return unique match.
72
73 :returns: Single match.
74 """
75 values = _get_values(attribute, text)
76 n = len(values)
77 if n > 1:
78 raise ValueError("found too many values for {}".format(attribute))
79 elif n == 1:
80 return values[0]
81 else:
82 raise ValueError("no value found for {}".format(attribute))
83
84def _get_line_and_value(attribute, text):
85 """Match attribute in text. Return the line and the value of the attribute."""
86 regex = '({}\s+=\s+"(.*)";)'.format(attribute)
87 regex = re.compile(regex)
88 value = regex.findall(text)
89 n = len(value)
90 if n > 1:
91 raise ValueError("found too many values for {}".format(attribute))
92 elif n == 1:
93 return value[0]
94 else:
95 raise ValueError("no value found for {}".format(attribute))
96
97
98def _replace_value(attribute, value, text):
99 """Search and replace value of attribute in text."""
100 old_line, old_value = _get_line_and_value(attribute, text)
101 new_line = old_line.replace(old_value, value)
102 new_text = text.replace(old_line, new_line)
103 return new_text
104
105
106def _fetch_page(url):
107 r = requests.get(url)
108 if r.status_code == requests.codes.ok:
109 return r.json()
110 else:
111 raise ValueError("request for {} failed".format(url))
112
113
114def _fetch_github(url):
115 headers = {}
116 token = os.environ.get('GITHUB_API_TOKEN')
117 if token:
118 headers["Authorization"] = f"token {token}"
119 r = requests.get(url, headers=headers)
120
121 if r.status_code == requests.codes.ok:
122 return r.json()
123 else:
124 raise ValueError("request for {} failed".format(url))
125
126
127SEMVER = {
128 'major' : 0,
129 'minor' : 1,
130 'patch' : 2,
131}
132
133
134def _determine_latest_version(current_version, target, versions):
135 """Determine latest version, given `target`.
136 """
137 current_version = Version(current_version)
138
139 def _parse_versions(versions):
140 for v in versions:
141 try:
142 yield Version(v)
143 except InvalidVersion:
144 pass
145
146 versions = _parse_versions(versions)
147
148 index = SEMVER[target]
149
150 ceiling = list(current_version[0:index])
151 if len(ceiling) == 0:
152 ceiling = None
153 else:
154 ceiling[-1]+=1
155 ceiling = Version(".".join(map(str, ceiling)))
156
157 # We do not want prereleases
158 versions = SpecifierSet(prereleases=PRERELEASES).filter(versions)
159
160 if ceiling is not None:
161 versions = SpecifierSet(f"<{ceiling}").filter(versions)
162
163 return (max(sorted(versions))).raw_version
164
165
166def _get_latest_version_pypi(package, extension, current_version, target):
167 """Get latest version and hash from PyPI."""
168 url = "{}/{}/json".format(INDEX, package)
169 json = _fetch_page(url)
170
171 versions = json['releases'].keys()
172 version = _determine_latest_version(current_version, target, versions)
173
174 try:
175 releases = json['releases'][version]
176 except KeyError as e:
177 raise KeyError('Could not find version {} for {}'.format(version, package)) from e
178 for release in releases:
179 if release['filename'].endswith(extension):
180 # TODO: In case of wheel we need to do further checks!
181 sha256 = release['digests']['sha256']
182 break
183 else:
184 sha256 = None
185 return version, sha256, None
186
187
188def _get_latest_version_github(package, extension, current_version, target):
189 def strip_prefix(tag):
190 return re.sub("^[^0-9]*", "", tag)
191
192 def get_prefix(string):
193 matches = re.findall(r"^([^0-9]*)", string)
194 return next(iter(matches), "")
195
196 # when invoked as an updateScript, UPDATE_NIX_ATTR_PATH will be set
197 # this allows us to work with packages which live outside of python-modules
198 attr_path = os.environ.get("UPDATE_NIX_ATTR_PATH", f"python3Packages.{package}")
199 try:
200 homepage = subprocess.check_output(
201 ["nix", "eval", "-f", f"{NIXPGKS_ROOT}/default.nix", "--raw", f"{attr_path}.src.meta.homepage"])\
202 .decode('utf-8')
203 except Exception as e:
204 raise ValueError(f"Unable to determine homepage: {e}")
205 owner_repo = homepage[len("https://github.com/"):] # remove prefix
206 owner, repo = owner_repo.split("/")
207
208 url = f"https://api.github.com/repos/{owner}/{repo}/releases"
209 all_releases = _fetch_github(url)
210 releases = list(filter(lambda x: not x['prerelease'], all_releases))
211
212 if len(releases) == 0:
213 raise ValueError(f"{homepage} does not contain any stable releases")
214
215 versions = map(lambda x: strip_prefix(x['tag_name']), releases)
216 version = _determine_latest_version(current_version, target, versions)
217
218 release = next(filter(lambda x: strip_prefix(x['tag_name']) == version, releases))
219 prefix = get_prefix(release['tag_name'])
220 try:
221 sha256 = subprocess.check_output(["nix-prefetch-url", "--type", "sha256", "--unpack", f"{release['tarball_url']}"], stderr=subprocess.DEVNULL)\
222 .decode('utf-8').strip()
223 except:
224 # this may fail if they have both a branch and a tag of the same name, attempt tag name
225 tag_url = str(release['tarball_url']).replace("tarball","tarball/refs/tags")
226 sha256 = subprocess.check_output(["nix-prefetch-url", "--type", "sha256", "--unpack", tag_url], stderr=subprocess.DEVNULL)\
227 .decode('utf-8').strip()
228
229
230 return version, sha256, prefix
231
232
233FETCHERS = {
234 'fetchFromGitHub' : _get_latest_version_github,
235 'fetchPypi' : _get_latest_version_pypi,
236 'fetchurl' : _get_latest_version_pypi,
237}
238
239
240DEFAULT_SETUPTOOLS_EXTENSION = 'tar.gz'
241
242
243FORMATS = {
244 'setuptools' : DEFAULT_SETUPTOOLS_EXTENSION,
245 'wheel' : 'whl',
246 'pyproject' : 'tar.gz',
247 'flit' : 'tar.gz'
248}
249
250def _determine_fetcher(text):
251 # Count occurences of fetchers.
252 nfetchers = sum(text.count('src = {}'.format(fetcher)) for fetcher in FETCHERS.keys())
253 if nfetchers == 0:
254 raise ValueError("no fetcher.")
255 elif nfetchers > 1:
256 raise ValueError("multiple fetchers.")
257 else:
258 # Then we check which fetcher to use.
259 for fetcher in FETCHERS.keys():
260 if 'src = {}'.format(fetcher) in text:
261 return fetcher
262
263
264def _determine_extension(text, fetcher):
265 """Determine what extension is used in the expression.
266
267 If we use:
268 - fetchPypi, we check if format is specified.
269 - fetchurl, we determine the extension from the url.
270 - fetchFromGitHub we simply use `.tar.gz`.
271 """
272 if fetcher == 'fetchPypi':
273 try:
274 src_format = _get_unique_value('format', text)
275 except ValueError as e:
276 src_format = None # format was not given
277
278 try:
279 extension = _get_unique_value('extension', text)
280 except ValueError as e:
281 extension = None # extension was not given
282
283 if extension is None:
284 if src_format is None:
285 src_format = 'setuptools'
286 elif src_format == 'other':
287 raise ValueError("Don't know how to update a format='other' package.")
288 extension = FORMATS[src_format]
289
290 elif fetcher == 'fetchurl':
291 url = _get_unique_value('url', text)
292 extension = os.path.splitext(url)[1]
293 if 'pypi' not in url:
294 raise ValueError('url does not point to PyPI.')
295
296 elif fetcher == 'fetchFromGitHub':
297 if "fetchSubmodules" in text:
298 raise ValueError("fetchFromGitHub fetcher doesn't support submodules")
299 extension = "tar.gz"
300
301 return extension
302
303
304def _update_package(path, target):
305
306 # Read the expression
307 with open(path, 'r') as f:
308 text = f.read()
309
310 # Determine pname. Many files have more than one pname
311 pnames = _get_values('pname', text)
312
313 # Determine version.
314 version = _get_unique_value('version', text)
315
316 # First we check how many fetchers are mentioned.
317 fetcher = _determine_fetcher(text)
318
319 extension = _determine_extension(text, fetcher)
320
321 # Attempt a fetch using each pname, e.g. backports-zoneinfo vs backports.zoneinfo
322 successful_fetch = False
323 for pname in pnames:
324 try:
325 new_version, new_sha256, prefix = FETCHERS[fetcher](pname, extension, version, target)
326 successful_fetch = True
327 break
328 except ValueError:
329 continue
330
331 if not successful_fetch:
332 raise ValueError(f"Unable to find correct package using these pnames: {pnames}")
333
334 if new_version == version:
335 logging.info("Path {}: no update available for {}.".format(path, pname))
336 return False
337 elif Version(new_version) <= Version(version):
338 raise ValueError("downgrade for {}.".format(pname))
339 if not new_sha256:
340 raise ValueError("no file available for {}.".format(pname))
341
342 text = _replace_value('version', new_version, text)
343 # hashes from pypi are 16-bit encoded sha256's, normalize it to sri to avoid merge conflicts
344 # sri hashes have been the default format since nix 2.4+
345 sri_hash = subprocess.check_output(["nix", "--extra-experimental-features", "nix-command", "hash", "to-sri", "--type", "sha256", new_sha256]).decode('utf-8').strip()
346
347
348 # fetchers can specify a sha256, or a sri hash
349 try:
350 text = _replace_value('sha256', sri_hash, text)
351 except ValueError:
352 text = _replace_value('hash', sri_hash, text)
353
354 if fetcher == 'fetchFromGitHub':
355 # in the case of fetchFromGitHub, it's common to see `rev = version;` or `rev = "v${version}";`
356 # in which no string value is meant to be substituted. However, we can just overwrite the previous value.
357 regex = '(rev\s+=\s+[^;]*;)'
358 regex = re.compile(regex)
359 matches = regex.findall(text)
360 n = len(matches)
361
362 if n == 0:
363 raise ValueError("Unable to find rev value for {}.".format(pname))
364 else:
365 # forcefully rewrite rev, incase tagging conventions changed for a release
366 match = matches[0]
367 text = text.replace(match, f'rev = "refs/tags/{prefix}${{version}}";')
368 # incase there's no prefix, just rewrite without interpolation
369 text = text.replace('"${version}";', 'version;')
370
371 with open(path, 'w') as f:
372 f.write(text)
373
374 logging.info("Path {}: updated {} from {} to {}".format(path, pname, version, new_version))
375
376 result = {
377 'path' : path,
378 'target': target,
379 'pname': pname,
380 'old_version' : version,
381 'new_version' : new_version,
382 #'fetcher' : fetcher,
383 }
384
385 return result
386
387
388def _update(path, target):
389
390 # We need to read and modify a Nix expression.
391 if os.path.isdir(path):
392 path = os.path.join(path, 'default.nix')
393
394 # If a default.nix does not exist, we quit.
395 if not os.path.isfile(path):
396 logging.info("Path {}: does not exist.".format(path))
397 return False
398
399 # If file is not a Nix expression, we quit.
400 if not path.endswith(".nix"):
401 logging.info("Path {}: does not end with `.nix`.".format(path))
402 return False
403
404 try:
405 return _update_package(path, target)
406 except ValueError as e:
407 logging.warning("Path {}: {}".format(path, e))
408 return False
409
410
411def _commit(path, pname, old_version, new_version, pkgs_prefix="python: ", **kwargs):
412 """Commit result.
413 """
414
415 msg = f'{pkgs_prefix}{pname}: {old_version} -> {new_version}'
416
417 try:
418 subprocess.check_call([GIT, 'add', path])
419 subprocess.check_call([GIT, 'commit', '-m', msg])
420 except subprocess.CalledProcessError as e:
421 subprocess.check_call([GIT, 'checkout', path])
422 raise subprocess.CalledProcessError(f'Could not commit {path}') from e
423
424 return True
425
426
427def main():
428
429 epilog = """
430environment variables:
431 GITHUB_API_TOKEN\tGitHub API token used when updating github packages
432 """
433 parser = argparse.ArgumentParser(formatter_class=argparse.RawDescriptionHelpFormatter, epilog=epilog)
434 parser.add_argument('package', type=str, nargs='+')
435 parser.add_argument('--target', type=str, choices=SEMVER.keys(), default='major')
436 parser.add_argument('--commit', action='store_true', help='Create a commit for each package update')
437 parser.add_argument('--use-pkgs-prefix', action='store_true', help='Use python3Packages.${pname}: instead of python: ${pname}: when making commits')
438
439 args = parser.parse_args()
440 target = args.target
441
442 packages = list(map(os.path.abspath, args.package))
443
444 logging.info("Updating packages...")
445
446 # Use threads to update packages concurrently
447 with Pool() as p:
448 results = list(filter(bool, p.map(lambda pkg: _update(pkg, target), packages)))
449
450 logging.info("Finished updating packages.")
451
452 commit_options = {}
453 if args.use_pkgs_prefix:
454 logging.info("Using python3Packages. prefix for commits")
455 commit_options["pkgs_prefix"] = "python3Packages."
456
457 # Commits are created sequentially.
458 if args.commit:
459 logging.info("Committing updates...")
460 # list forces evaluation
461 list(map(lambda x: _commit(**x, **commit_options), results))
462 logging.info("Finished committing updates")
463
464 count = len(results)
465 logging.info("{} package(s) updated".format(count))
466
467
468
469if __name__ == '__main__':
470 main()