nixpkgs mirror (for testing) github.com/NixOS/nixpkgs
nix
at devShellTools-shell 162 lines 5.5 kB view raw
1# Some repositories (such as Devpi) expose the Pypi legacy API 2# (https://warehouse.pypa.io/api-reference/legacy.html). 3# 4# Note it is not possible to use pip 5# https://discuss.python.org/t/pip-download-just-the-source-packages-no-building-no-metadata-etc/4651/12 6 7import base64 8import argparse 9import netrc 10import os 11import shutil 12import ssl 13import sys 14import urllib.request 15from html.parser import HTMLParser 16from os.path import normpath 17from typing import Optional 18from urllib.parse import urlparse, urlunparse 19 20 21# Parse the legacy index page to extract the href and package names 22class Pep503(HTMLParser): 23 def __init__(self) -> None: 24 super().__init__() 25 self.sources: dict[str, str] = {} 26 self.url: Optional[str] = None 27 self.name: Optional[str] = None 28 29 def handle_data(self, data: str) -> None: 30 if self.url is not None: 31 self.name = data 32 33 def handle_starttag(self, tag: str, attrs: list[tuple[str, Optional[str]]]) -> None: 34 if tag == "a": 35 for name, value in attrs: 36 if name == "href": 37 self.url = value 38 39 def handle_endtag(self, tag: str) -> None: 40 if self.url is not None: 41 if not self.name: 42 raise ValueError("Name not set") 43 44 self.sources[self.name] = self.url 45 self.url = None 46 47 48def try_fetch(url: str, package_name: str, package_filename: str) -> None: 49 index_url = url + "/" + package_name + "/" 50 51 # Parse username and password for this host from the netrc file if given. 52 username: Optional[str] = None 53 password: Optional[str] = None 54 if os.environ.get("NETRC", "") != "": 55 netrc_obj = netrc.netrc(os.environ["NETRC"]) 56 host = urlparse(index_url).netloc 57 # Strip port number if present 58 if ":" in host: 59 host = host.split(":")[0] 60 authenticators = netrc_obj.authenticators(host) 61 if authenticators: 62 username, _, password = authenticators 63 64 print("Reading index %s" % index_url) 65 66 context = ssl.create_default_context() 67 68 # Extract out username/password from index_url, if present. 69 parsed_url = urlparse(index_url) 70 username = parsed_url.username or username 71 password = parsed_url.password or password 72 index_url = parsed_url._replace(netloc=parsed_url.netloc.rpartition("@")[-1]).geturl() 73 74 req = urllib.request.Request(index_url) 75 76 if username and password: # Add authentication 77 password_b64 = base64.b64encode(":".join((username, password)).encode()).decode("utf-8") 78 req.add_header("Authorization", "Basic {}".format(password_b64)) 79 else: # If we are not using authentication disable TLS verification for long term reproducibility 80 context.check_hostname = False 81 context.verify_mode = ssl.CERT_NONE 82 83 response = urllib.request.urlopen(req, context=context) 84 index = response.read() 85 86 parser = Pep503() 87 parser.feed(str(index, "utf-8")) 88 if package_filename not in parser.sources: 89 print("The file %s has not be found in the index %s" % (package_filename, index_url)) 90 exit(1) 91 92 package_file = open(package_filename, "wb") 93 # Sometimes the href is a relative or absolute path within the index's domain. 94 indicated_url = urlparse(parser.sources[package_filename]) 95 if indicated_url.netloc == "": 96 parsed_url = urlparse(index_url) 97 98 if indicated_url.path.startswith("/"): 99 # An absolute path within the index's domain. 100 path = parser.sources[package_filename] 101 else: 102 # A relative path. 103 path = parsed_url.path + "/" + parser.sources[package_filename] 104 105 package_url = urlunparse( 106 ( 107 parsed_url.scheme, 108 parsed_url.netloc, 109 path, 110 None, 111 None, 112 None, 113 ) 114 ) 115 else: 116 package_url = parser.sources[package_filename] 117 118 # Handle urls containing "../" 119 parsed_url = urlparse(package_url) 120 real_package_url = urlunparse( 121 ( 122 parsed_url.scheme, 123 parsed_url.netloc, 124 normpath(parsed_url.path), 125 parsed_url.params, 126 parsed_url.query, 127 parsed_url.fragment, 128 ) 129 ) 130 print("Downloading %s" % real_package_url) 131 132 req = urllib.request.Request(real_package_url) 133 if username and password: 134 req.add_unredirected_header("Authorization", "Basic {}".format(password_b64)) 135 response = urllib.request.urlopen(req, context=context) 136 137 with response as r: 138 shutil.copyfileobj(r, package_file) 139 140 141argparser = argparse.ArgumentParser(description="Fetch file from legacy pypi API") 142argparser.add_argument("--url", action="append", required=True) 143argparser.add_argument("--pname", action="store", required=True) 144argparser.add_argument("--filename", action="store", required=True) 145 146 147if __name__ == "__main__": 148 args = argparser.parse_args() 149 for url in args.url: 150 try: 151 try_fetch(url, args.pname, args.filename) 152 except urllib.error.HTTPError as e: 153 print("Got exception'", e, "', trying next package index", file=sys.stderr) 154 continue 155 else: 156 break 157 else: 158 print( 159 f"Could not fetch package '{args.pname}' file '{args.filename}' from any mirrors: {args.url}", 160 file=sys.stderr, 161 ) 162 exit(1)