Clone of https://github.com/NixOS/nixpkgs.git (to stress-test knotserver)
at devShellTools-shell 346 lines 9.7 kB view raw
1#!/usr/bin/env python3 2 3""" 4Converts the LibreOffice `download.lst` file into a Nix expression. 5 6Requires an environment variable named `downloadList` identifying the path 7of the input file, and writes the result to stdout. 8 9todo - Ideally we would move as much as possible into derivation dependencies. 10""" 11import collections, itertools, json, re, subprocess, sys, os 12import urllib.request, urllib.error 13 14def main(): 15 16 packages = list(get_packages()) 17 18 for x in packages: 19 print(x, file=sys.stderr) 20 21 print('[') 22 23 for x in packages: 24 25 md5 = x['md5'] 26 upstream_sha256 = x['sha256'] 27 if upstream_sha256: 28 hash = upstream_sha256 29 hashtype = 'sha256' 30 else: 31 hash = md5 32 hashtype = 'md5' 33 tarball = x['tarball'] 34 35 url = construct_url(x) 36 print('url: {}'.format(url), file=sys.stderr) 37 38 path = download(url, tarball, hash, hashtype) 39 print('path: {}'.format(path), file=sys.stderr) 40 41 sha256 = get_sha256(path) 42 print('sha256: {}'.format(sha256), file=sys.stderr) 43 44 print(' {') 45 print(' name = "{}";'.format(tarball)) 46 print(' url = "{}";'.format(url)) 47 print(' sha256 = "{}";'.format(sha256)) 48 print(' md5 = "{}";'.format(md5)) 49 print(' md5name = "{}-{}";'.format(md5 or upstream_sha256,tarball)) 50 print(' }') 51 52 print(']') 53 54 55def construct_url(x): 56 if x['brief']: 57 url = 'https://dev-www.libreoffice.org/src/{}{}'.format( 58 x.get('subdir', ''), x['tarball']) 59 else: 60 url = 'https://dev-www.libreoffice.org/src/{}{}-{}'.format( 61 x.get('subdir', ''), x['md5'], x['tarball']) 62 63 if x['name'].startswith('FONT_NOTO_') and not probe_url(url): 64 return 'https://noto-website-2.storage.googleapis.com/pkgs/{}'.format(x['tarball']) 65 66 if x['name'] == 'FONT_OPENDYSLEXIC': 67 return 'https://github.com/antijingoist/opendyslexic/releases/download/v0.91.12/{}'.format(x['tarball']) 68 69 return url 70 71 72def probe_url(url: str) -> bool: 73 request = urllib.request.Request(url, method='HEAD') 74 try: 75 with urllib.request.urlopen(request) as response: 76 return response.status == 200 77 except urllib.error.HTTPError as e: 78 return False 79 80 81def download(url, name, hash, hashtype): 82 cmd = ['nix-prefetch-url', url, hash, '--print-path', 83 '--type', hashtype, '--name', name] 84 proc = subprocess.run(cmd, stdout=subprocess.PIPE, check=True, 85 universal_newlines=True) 86 return proc.stdout.split('\n')[1].strip() 87 88 89def get_sha256(path): 90 cmd = ['sha256sum', path] 91 proc = subprocess.run(cmd, stdout=subprocess.PIPE, check=True, 92 universal_newlines=True) 93 return proc.stdout.split(' ')[0].strip() 94 95 96def get_packages(): 97 """ 98 All of the package data: What's parsed from download.lst, 99 plus our additions. 100 """ 101 return apply_additions(get_packages_from_download_list(), 102 get_additions()) 103 104 105def get_additions(): 106 """ 107 A mapping from package name (the all-caps identifiers used in 108 `download.lst`) to a dict of additional attributes to set on the package. 109 """ 110 with open('./libreoffice-srcs-additions.json') as f: 111 return json.load(f) 112 113 114def apply_additions(xs, additions): 115 for x in xs: 116 yield dict_merge([x, 117 additions.get(x['name'], {})]) 118 119 120def get_packages_from_download_list(): 121 """ 122 The result of parsing `download.lst`: A list of dicts containing keys 123 'name', 'tarball', 'md5', 'brief'. 124 """ 125 126 def lines(): 127 for x in sub_symbols(parse_lines(get_lines())): 128 129 interpretation = interpret(x) 130 131 if interpretation == 'unrecognized': 132 print_skipped_line(x) 133 else: 134 yield dict_merge([x, 135 interpretation]) 136 137 def cluster(xs): 138 """ 139 Groups lines according to their order within the file, to support 140 packages that are listed in `download.lst` more than once. 141 """ 142 keys = ['tarball', 'md5', 'sha256', 'brief'] 143 a = {k: [x for x in xs if k in x['attrs']] for k in keys} 144 return zip(*[a[k] for k in keys]) 145 146 def packages(): 147 for (name, group) in groupby(lines(), lambda x: x['name']): 148 for xs in cluster(group): 149 yield {'name': name, 150 'attrs': dict_merge(x['attrs'] for x in xs), 151 'index': min(x['index'] for x in xs)} 152 153 for x in sorted(packages(), key=lambda x: x['index']): 154 yield dict_merge([{'name': x['name']}, 155 x['attrs']]) 156 157 158def dict_merge(xs): 159 """ 160 >>> dict_merge([{1: 2}, {3: 4}, {3: 5}]) 161 {1: 2, 3: 4} 162 """ 163 return dict(collections.ChainMap(*xs)) 164 165 166def groupby(xs, f): 167 """ 168 >>> groupby([1, 2, 3, 4], lambda x: x % 2) 169 [(0, [2, 4]), (1, [1, 3])] 170 """ 171 for (k, iter) in itertools.groupby(sorted(xs, key=f), f): 172 group = list(iter) 173 yield (f(group[0]), group) 174 175 176def get_lines(): 177 178 download_list = os.getenv('downloadList') 179 180 with open(download_list) as f: 181 return f.read().splitlines() 182 183 184def print_skipped_line(x): 185 186 print('Skipped line {}: {}'.format(x['index'], 187 x['original']), 188 file=sys.stderr) 189 190 191def parse_lines(lines): 192 """ 193 Input: List of strings (the lines from `download.lst` 194 Output: Iterator of dicts with keys 'key', 'value', and 'index' 195 """ 196 for (index, line) in enumerate(lines): 197 198 x = { 'index': index, 'original': line } 199 200 result = parse_line(line) 201 202 if result == 'nothing': 203 pass 204 elif result == 'unrecognized': 205 print_skipped_line(x) 206 else: 207 yield dict_merge([x, 208 result]) 209 210 211def parse_line(line): 212 """ 213 Input: A string 214 Output: One of 1. A dict with keys 'key', 'value' 215 2. 'nothing' (if the line contains no information) 216 2. 'unrecognized' (if parsing failed) 217 """ 218 219 if re.match('\s*(#.*)?$', line): 220 return 'nothing' 221 222 match = re.match('([^:\s]+)\s*:=\s*(.*)$', line) 223 224 if match: 225 return { 226 'key': match.group(1), 227 'value': match.group(2).strip() 228 } 229 else: 230 return 'unrecognized' 231 232 233def sub_symbols(xs): 234 """ 235 Do substitution of variables across all lines. 236 237 >>> sub_symbols([{'key': 'a', 'value': 'x'}, 238 ... {'key': 'c': 'value': '$(a)yz'}]) 239 [{'key': 'a', 'value': 'x'}, {'key': 'c': 'value': 'xyz'}] 240 """ 241 242 xs = list(xs) 243 244 symbols = {x['key']: x for x in xs} 245 246 def get_value(k): 247 x = symbols.get(k) 248 return x['value'] if x is not None else '' 249 250 for x in xs: 251 yield dict_merge([{'value': sub_str(x['value'], get_value)}, 252 x]) 253 254 255def sub_str(string, func): 256 """ 257 Do substitution of variables in a single line. 258 259 >>> sub_str("x = $(x)", lambda k: {'x': 'a'}[k]) 260 "x = a" 261 """ 262 263 def func2(m): 264 x = m.group(1) 265 result = func(x) 266 return result if result is not None else x 267 268 return re.sub(r'\$\(([^\$\(\)]+)\)', func2, string) 269 270 271def interpret(x): 272 """ 273 Input: Dict with keys 'key' and 'value' 274 Output: One of 1. Dict with keys 'name' and 'attrs' 275 2. 'unrecognized' (if interpretation failed) 276 """ 277 for f in [interpret_md5, interpret_sha256, interpret_tarball_with_md5, interpret_tarball, interpret_jar]: 278 result = f(x) 279 if result is not None: 280 return result 281 282 return 'unrecognized' 283 284 285def interpret_md5(x): 286 """ 287 >>> interpret_md5("ODFGEN_MD5SUM", "32572ea48d9021bbd6fa317ddb697abc") 288 {'name': 'ODFGEN', 'attrs': {'md5': '32572ea48d9021bbd6fa317ddb697abc'}} 289 """ 290 291 match = re.match('^(.*)_MD5SUM$', x['key']) 292 293 if match: 294 return {'name': match.group(1), 295 'attrs': {'md5': x['value'], 'sha256': ''}} 296 297def interpret_sha256(x): 298 match = re.match('^(.*)_SHA256SUM$', x['key']) 299 300 if match: 301 return {'name': match.group(1), 302 'attrs': {'sha256': x['value'], 'md5': ''}} 303 304def interpret_tarball(x): 305 """ 306 >>> interpret_tarball("FREEHAND_TARBALL", "libfreehand-0.1.1.tar.bz2") 307 {'name': 'FREEHAND', 308 'attrs': {'tarball': 'libfreehand-0.1.1.tar.bz2', 'brief': True}} 309 """ 310 311 match = re.match('^(.*)_TARBALL$', x['key']) 312 313 if match: 314 return {'name': match.group(1), 315 'attrs': {'tarball': x['value'], 'brief': True}} 316 317def interpret_jar(x): 318 match = re.match('^(.*)_JAR$', x['key']) 319 320 if match: 321 return {'name': match.group(1), 322 'attrs': {'tarball': x['value'], 'brief': True}} 323 324 325def interpret_tarball_with_md5(x): 326 """ 327 >>> interpret_tarball_with_md5("CLUCENE_TARBALL",\ 328 "48d647fbd8ef8889e5a7f422c1bfda94-clucene-core-2.3.3.4.tar.gz") 329 {'name': 'CLUCENE', 330 'attrs': {'tarball': 'clucene-core-2.3.3.4.tar.gz', 331 'md5': '48d647fbd8ef8889e5a7f422c1bfda94', 'brief': False}} 332 """ 333 334 match = {'key': re.match('^(.*)_(TARBALL|JAR)$', x['key']), 335 'value': re.match('(?P<md5>[0-9a-fA-F]{32})-(?P<tarball>.+)$', 336 x['value'])} 337 338 if match['key'] and match['value']: 339 return {'name': match['key'].group(1), 340 'attrs': {'tarball': match['value'].group('tarball'), 341 'md5': match['value'].group('md5'), 342 'sha256': '', 343 'brief': False}} 344 345 346main()