at 23.11-beta 18 kB view raw
1#!/usr/bin/env python3 2 3""" 4Update a Python package expression by passing in the `.nix` file, or the directory containing it. 5You can pass in multiple files or paths. 6 7You'll likely want to use 8`` 9 $ ./update-python-libraries ../../pkgs/development/python-modules/**/default.nix 10`` 11to update all non-pinned libraries in that folder. 12""" 13 14import argparse 15import json 16import logging 17import os 18import re 19import requests 20from concurrent.futures import ThreadPoolExecutor as Pool 21from packaging.version import Version as _Version 22from packaging.version import InvalidVersion 23from packaging.specifiers import SpecifierSet 24from typing import Optional, Any 25import collections 26import subprocess 27 28INDEX = "https://pypi.io/pypi" 29"""url of PyPI""" 30 31EXTENSIONS = ['tar.gz', 'tar.bz2', 'tar', 'zip', '.whl'] 32"""Permitted file extensions. These are evaluated from left to right and the first occurance is returned.""" 33 34PRERELEASES = False 35 36BULK_UPDATE = False 37 38GIT = "git" 39 40NIXPKGS_ROOT = subprocess.check_output(["git", "rev-parse", "--show-toplevel"]).decode('utf-8').strip() 41 42logging.basicConfig(level=logging.INFO) 43 44 45class Version(_Version, collections.abc.Sequence): 46 47 def __init__(self, version): 48 super().__init__(version) 49 # We cannot use `str(Version(0.04.21))` because that becomes `0.4.21` 50 # https://github.com/avian2/unidecode/issues/13#issuecomment-354538882 51 self.raw_version = version 52 53 def __getitem__(self, i): 54 return self._version.release[i] 55 56 def __len__(self): 57 return len(self._version.release) 58 59 def __iter__(self): 60 yield from self._version.release 61 62 63def _get_values(attribute, text): 64 """Match attribute in text and return all matches. 65 66 :returns: List of matches. 67 """ 68 regex = fr'{re.escape(attribute)}\s+=\s+"(.*)";' 69 regex = re.compile(regex) 70 values = regex.findall(text) 71 return values 72 73 74def _get_attr_value(attr_path: str) -> Optional[Any]: 75 try: 76 response = subprocess.check_output([ 77 "nix", 78 "--extra-experimental-features", "nix-command", 79 "eval", 80 "-f", f"{NIXPKGS_ROOT}/default.nix", 81 "--json", 82 f"{attr_path}" 83 ]) 84 return json.loads(response.decode()) 85 except (subprocess.CalledProcessError, ValueError): 86 return None 87 88 89def _get_unique_value(attribute, text): 90 """Match attribute in text and return unique match. 91 92 :returns: Single match. 93 """ 94 values = _get_values(attribute, text) 95 n = len(values) 96 if n > 1: 97 raise ValueError("found too many values for {}".format(attribute)) 98 elif n == 1: 99 return values[0] 100 else: 101 raise ValueError("no value found for {}".format(attribute)) 102 103def _get_line_and_value(attribute, text, value=None): 104 """Match attribute in text. Return the line and the value of the attribute.""" 105 if value is None: 106 regex = rf'({re.escape(attribute)}\s+=\s+\"(.*)\";)' 107 else: 108 regex = rf'({re.escape(attribute)}\s+=\s+\"({re.escape(value)})\";)' 109 regex = re.compile(regex) 110 results = regex.findall(text) 111 n = len(results) 112 if n > 1: 113 raise ValueError("found too many values for {}".format(attribute)) 114 elif n == 1: 115 return results[0] 116 else: 117 raise ValueError("no value found for {}".format(attribute)) 118 119 120def _replace_value(attribute, value, text, oldvalue=None): 121 """Search and replace value of attribute in text.""" 122 if oldvalue is None: 123 old_line, old_value = _get_line_and_value(attribute, text) 124 else: 125 old_line, old_value = _get_line_and_value(attribute, text, oldvalue) 126 new_line = old_line.replace(old_value, value) 127 new_text = text.replace(old_line, new_line) 128 return new_text 129 130 131def _fetch_page(url): 132 r = requests.get(url) 133 if r.status_code == requests.codes.ok: 134 return r.json() 135 else: 136 raise ValueError("request for {} failed".format(url)) 137 138 139def _fetch_github(url): 140 headers = {} 141 token = os.environ.get('GITHUB_API_TOKEN') 142 if token: 143 headers["Authorization"] = f"token {token}" 144 r = requests.get(url, headers=headers) 145 146 if r.status_code == requests.codes.ok: 147 return r.json() 148 else: 149 raise ValueError("request for {} failed".format(url)) 150 151 152def _hash_to_sri(algorithm, value): 153 """Convert a hash to its SRI representation""" 154 return subprocess.check_output([ 155 "nix", 156 "hash", 157 "to-sri", 158 "--type", algorithm, 159 value 160 ]).decode().strip() 161 162 163def _skip_bulk_update(attr_name: str) -> bool: 164 return bool(_get_attr_value( 165 f"{attr_name}.skipBulkUpdate" 166 )) 167 168 169SEMVER = { 170 'major' : 0, 171 'minor' : 1, 172 'patch' : 2, 173} 174 175 176def _determine_latest_version(current_version, target, versions): 177 """Determine latest version, given `target`. 178 """ 179 current_version = Version(current_version) 180 181 def _parse_versions(versions): 182 for v in versions: 183 try: 184 yield Version(v) 185 except InvalidVersion: 186 pass 187 188 versions = _parse_versions(versions) 189 190 index = SEMVER[target] 191 192 ceiling = list(current_version[0:index]) 193 if len(ceiling) == 0: 194 ceiling = None 195 else: 196 ceiling[-1]+=1 197 ceiling = Version(".".join(map(str, ceiling))) 198 199 # We do not want prereleases 200 versions = SpecifierSet(prereleases=PRERELEASES).filter(versions) 201 202 if ceiling is not None: 203 versions = SpecifierSet(f"<{ceiling}").filter(versions) 204 205 return (max(sorted(versions))).raw_version 206 207 208def _get_latest_version_pypi(package, extension, current_version, target): 209 """Get latest version and hash from PyPI.""" 210 url = "{}/{}/json".format(INDEX, package) 211 json = _fetch_page(url) 212 213 versions = json['releases'].keys() 214 version = _determine_latest_version(current_version, target, versions) 215 216 try: 217 releases = json['releases'][version] 218 except KeyError as e: 219 raise KeyError('Could not find version {} for {}'.format(version, package)) from e 220 for release in releases: 221 if release['filename'].endswith(extension): 222 # TODO: In case of wheel we need to do further checks! 223 sha256 = release['digests']['sha256'] 224 break 225 else: 226 sha256 = None 227 return version, sha256, None 228 229 230def _get_latest_version_github(package, extension, current_version, target): 231 def strip_prefix(tag): 232 return re.sub("^[^0-9]*", "", tag) 233 234 def get_prefix(string): 235 matches = re.findall(r"^([^0-9]*)", string) 236 return next(iter(matches), "") 237 238 # when invoked as an updateScript, UPDATE_NIX_ATTR_PATH will be set 239 # this allows us to work with packages which live outside of python-modules 240 attr_path = os.environ.get("UPDATE_NIX_ATTR_PATH", f"python3Packages.{package}") 241 try: 242 homepage = subprocess.check_output( 243 ["nix", "eval", "-f", f"{NIXPKGS_ROOT}/default.nix", "--raw", f"{attr_path}.src.meta.homepage"])\ 244 .decode('utf-8') 245 except Exception as e: 246 raise ValueError(f"Unable to determine homepage: {e}") 247 owner_repo = homepage[len("https://github.com/"):] # remove prefix 248 owner, repo = owner_repo.split("/") 249 250 url = f"https://api.github.com/repos/{owner}/{repo}/releases" 251 all_releases = _fetch_github(url) 252 releases = list(filter(lambda x: not x['prerelease'], all_releases)) 253 254 if len(releases) == 0: 255 raise ValueError(f"{homepage} does not contain any stable releases") 256 257 versions = map(lambda x: strip_prefix(x['tag_name']), releases) 258 version = _determine_latest_version(current_version, target, versions) 259 260 release = next(filter(lambda x: strip_prefix(x['tag_name']) == version, releases)) 261 prefix = get_prefix(release['tag_name']) 262 263 # some attributes require using the fetchgit 264 git_fetcher_args = [] 265 if (_get_attr_value(f"{attr_path}.src.fetchSubmodules")): 266 git_fetcher_args.append("--fetch-submodules") 267 if (_get_attr_value(f"{attr_path}.src.fetchLFS")): 268 git_fetcher_args.append("--fetch-lfs") 269 if (_get_attr_value(f"{attr_path}.src.leaveDotGit")): 270 git_fetcher_args.append("--leave-dotGit") 271 272 if git_fetcher_args: 273 algorithm = "sha256" 274 cmd = [ 275 "nix-prefetch-git", 276 f"https://github.com/{owner}/{repo}.git", 277 "--hash", algorithm, 278 "--rev", f"refs/tags/{release['tag_name']}" 279 ] 280 cmd.extend(git_fetcher_args) 281 response = subprocess.check_output(cmd) 282 document = json.loads(response.decode()) 283 hash = _hash_to_sri(algorithm, document[algorithm]) 284 else: 285 try: 286 hash = subprocess.check_output([ 287 "nix-prefetch-url", 288 "--type", "sha256", 289 "--unpack", 290 f"{release['tarball_url']}" 291 ], stderr=subprocess.DEVNULL).decode('utf-8').strip() 292 except (subprocess.CalledProcessError, UnicodeError): 293 # this may fail if they have both a branch and a tag of the same name, attempt tag name 294 tag_url = str(release['tarball_url']).replace("tarball","tarball/refs/tags") 295 hash = subprocess.check_output([ 296 "nix-prefetch-url", 297 "--type", "sha256", 298 "--unpack", 299 tag_url 300 ], stderr=subprocess.DEVNULL).decode('utf-8').strip() 301 302 return version, hash, prefix 303 304 305FETCHERS = { 306 'fetchFromGitHub' : _get_latest_version_github, 307 'fetchPypi' : _get_latest_version_pypi, 308 'fetchurl' : _get_latest_version_pypi, 309} 310 311 312DEFAULT_SETUPTOOLS_EXTENSION = 'tar.gz' 313 314 315FORMATS = { 316 'setuptools' : DEFAULT_SETUPTOOLS_EXTENSION, 317 'wheel' : 'whl', 318 'pyproject' : 'tar.gz', 319 'flit' : 'tar.gz' 320} 321 322def _determine_fetcher(text): 323 # Count occurrences of fetchers. 324 nfetchers = sum(text.count('src = {}'.format(fetcher)) for fetcher in FETCHERS.keys()) 325 if nfetchers == 0: 326 raise ValueError("no fetcher.") 327 elif nfetchers > 1: 328 raise ValueError("multiple fetchers.") 329 else: 330 # Then we check which fetcher to use. 331 for fetcher in FETCHERS.keys(): 332 if 'src = {}'.format(fetcher) in text: 333 return fetcher 334 335 336def _determine_extension(text, fetcher): 337 """Determine what extension is used in the expression. 338 339 If we use: 340 - fetchPypi, we check if format is specified. 341 - fetchurl, we determine the extension from the url. 342 - fetchFromGitHub we simply use `.tar.gz`. 343 """ 344 if fetcher == 'fetchPypi': 345 try: 346 src_format = _get_unique_value('format', text) 347 except ValueError: 348 src_format = None # format was not given 349 350 try: 351 extension = _get_unique_value('extension', text) 352 except ValueError: 353 extension = None # extension was not given 354 355 if extension is None: 356 if src_format is None: 357 src_format = 'setuptools' 358 elif src_format == 'other': 359 raise ValueError("Don't know how to update a format='other' package.") 360 extension = FORMATS[src_format] 361 362 elif fetcher == 'fetchurl': 363 url = _get_unique_value('url', text) 364 extension = os.path.splitext(url)[1] 365 if 'pypi' not in url: 366 raise ValueError('url does not point to PyPI.') 367 368 elif fetcher == 'fetchFromGitHub': 369 extension = "tar.gz" 370 371 return extension 372 373 374def _update_package(path, target): 375 376 # Read the expression 377 with open(path, 'r') as f: 378 text = f.read() 379 380 # Determine pname. Many files have more than one pname 381 pnames = _get_values('pname', text) 382 383 # Determine version. 384 version = _get_unique_value('version', text) 385 386 # First we check how many fetchers are mentioned. 387 fetcher = _determine_fetcher(text) 388 389 extension = _determine_extension(text, fetcher) 390 391 # Attempt a fetch using each pname, e.g. backports-zoneinfo vs backports.zoneinfo 392 successful_fetch = False 393 for pname in pnames: 394 if BULK_UPDATE and _skip_bulk_update(f"python3Packages.{pname}"): 395 raise ValueError(f"Bulk update skipped for {pname}") 396 try: 397 new_version, new_sha256, prefix = FETCHERS[fetcher](pname, extension, version, target) 398 successful_fetch = True 399 break 400 except ValueError: 401 continue 402 403 if not successful_fetch: 404 raise ValueError(f"Unable to find correct package using these pnames: {pnames}") 405 406 if new_version == version: 407 logging.info("Path {}: no update available for {}.".format(path, pname)) 408 return False 409 elif Version(new_version) <= Version(version): 410 raise ValueError("downgrade for {}.".format(pname)) 411 if not new_sha256: 412 raise ValueError("no file available for {}.".format(pname)) 413 414 text = _replace_value('version', new_version, text) 415 416 # hashes from pypi are 16-bit encoded sha256's, normalize it to sri to avoid merge conflicts 417 # sri hashes have been the default format since nix 2.4+ 418 sri_hash = _hash_to_sri("sha256", new_sha256) 419 420 # retrieve the old output hash for a more precise match 421 if old_hash := _get_attr_value(f"python3Packages.{pname}.src.outputHash"): 422 # fetchers can specify a sha256, or a sri hash 423 try: 424 text = _replace_value('hash', sri_hash, text, old_hash) 425 except ValueError: 426 text = _replace_value('sha256', sri_hash, text, old_hash) 427 else: 428 raise ValueError(f"Unable to retrieve old hash for {pname}") 429 430 if fetcher == 'fetchFromGitHub': 431 # in the case of fetchFromGitHub, it's common to see `rev = version;` or `rev = "v${version}";` 432 # in which no string value is meant to be substituted. However, we can just overwrite the previous value. 433 regex = r'(rev\s+=\s+[^;]*;)' 434 regex = re.compile(regex) 435 matches = regex.findall(text) 436 n = len(matches) 437 438 if n == 0: 439 raise ValueError("Unable to find rev value for {}.".format(pname)) 440 else: 441 # forcefully rewrite rev, incase tagging conventions changed for a release 442 match = matches[0] 443 text = text.replace(match, f'rev = "refs/tags/{prefix}${{version}}";') 444 # incase there's no prefix, just rewrite without interpolation 445 text = text.replace('"${version}";', 'version;') 446 447 with open(path, 'w') as f: 448 f.write(text) 449 450 logging.info("Path {}: updated {} from {} to {}".format(path, pname, version, new_version)) 451 452 result = { 453 'path' : path, 454 'target': target, 455 'pname': pname, 456 'old_version' : version, 457 'new_version' : new_version, 458 #'fetcher' : fetcher, 459 } 460 461 return result 462 463 464def _update(path, target): 465 466 # We need to read and modify a Nix expression. 467 if os.path.isdir(path): 468 path = os.path.join(path, 'default.nix') 469 470 # If a default.nix does not exist, we quit. 471 if not os.path.isfile(path): 472 logging.info("Path {}: does not exist.".format(path)) 473 return False 474 475 # If file is not a Nix expression, we quit. 476 if not path.endswith(".nix"): 477 logging.info("Path {}: does not end with `.nix`.".format(path)) 478 return False 479 480 try: 481 return _update_package(path, target) 482 except ValueError as e: 483 logging.warning("Path {}: {}".format(path, e)) 484 return False 485 486 487def _commit(path, pname, old_version, new_version, pkgs_prefix="python: ", **kwargs): 488 """Commit result. 489 """ 490 491 msg = f'{pkgs_prefix}{pname}: {old_version} -> {new_version}' 492 493 try: 494 subprocess.check_call([GIT, 'add', path]) 495 subprocess.check_call([GIT, 'commit', '-m', msg]) 496 except subprocess.CalledProcessError as e: 497 subprocess.check_call([GIT, 'checkout', path]) 498 raise subprocess.CalledProcessError(f'Could not commit {path}') from e 499 500 return True 501 502 503def main(): 504 505 epilog = """ 506environment variables: 507 GITHUB_API_TOKEN\tGitHub API token used when updating github packages 508 """ 509 parser = argparse.ArgumentParser(formatter_class=argparse.RawDescriptionHelpFormatter, epilog=epilog) 510 parser.add_argument('package', type=str, nargs='+') 511 parser.add_argument('--target', type=str, choices=SEMVER.keys(), default='major') 512 parser.add_argument('--commit', action='store_true', help='Create a commit for each package update') 513 parser.add_argument('--use-pkgs-prefix', action='store_true', help='Use python3Packages.${pname}: instead of python: ${pname}: when making commits') 514 515 args = parser.parse_args() 516 target = args.target 517 518 packages = list(map(os.path.abspath, args.package)) 519 520 if len(packages) > 1: 521 global BULK_UPDATE 522 BULK_UPDATE = True 523 524 logging.info("Updating packages...") 525 526 # Use threads to update packages concurrently 527 with Pool() as p: 528 results = list(filter(bool, p.map(lambda pkg: _update(pkg, target), packages))) 529 530 logging.info("Finished updating packages.") 531 532 commit_options = {} 533 if args.use_pkgs_prefix: 534 logging.info("Using python3Packages. prefix for commits") 535 commit_options["pkgs_prefix"] = "python3Packages." 536 537 # Commits are created sequentially. 538 if args.commit: 539 logging.info("Committing updates...") 540 # list forces evaluation 541 list(map(lambda x: _commit(**x, **commit_options), results)) 542 logging.info("Finished committing updates") 543 544 count = len(results) 545 logging.info("{} package(s) updated".format(count)) 546 547 548 549if __name__ == '__main__': 550 main()