pkgs/applications/office/libreoffice/generate-libreoffice-srcs.py at devShellTools-shell · tjh.dev/nixpkgs

tjh.dev / nixpkgs
Clone of https://github.com/NixOS/nixpkgs.git (to stress-test knotserver)
nixpkgs / pkgs / applications / office / libreoffice / generate-libreoffice-srcs.py
at devShellTools-shell 346 lines 9.7 kB view raw
  1#!/usr/bin/env python3
  2
  3"""
  4Converts the LibreOffice `download.lst` file into a Nix expression.
  5
  6Requires an environment variable named `downloadList` identifying the path
  7of the input file, and writes the result to stdout.
  8
  9todo - Ideally we would move as much as possible into derivation dependencies.
 10"""
 11import collections, itertools, json, re, subprocess, sys, os
 12import urllib.request, urllib.error
 13
 14def main():
 15
 16    packages = list(get_packages())
 17
 18    for x in packages:
 19        print(x, file=sys.stderr)
 20
 21    print('[')
 22
 23    for x in packages:
 24
 25        md5 = x['md5']
 26        upstream_sha256 = x['sha256']
 27        if upstream_sha256:
 28            hash = upstream_sha256
 29            hashtype = 'sha256'
 30        else:
 31            hash = md5
 32            hashtype = 'md5'
 33        tarball = x['tarball']
 34
 35        url = construct_url(x)
 36        print('url: {}'.format(url), file=sys.stderr)
 37
 38        path = download(url, tarball, hash, hashtype)
 39        print('path: {}'.format(path), file=sys.stderr)
 40
 41        sha256 = get_sha256(path)
 42        print('sha256: {}'.format(sha256), file=sys.stderr)
 43
 44        print('  {')
 45        print('    name = "{}";'.format(tarball))
 46        print('    url = "{}";'.format(url))
 47        print('    sha256 = "{}";'.format(sha256))
 48        print('    md5 = "{}";'.format(md5))
 49        print('    md5name = "{}-{}";'.format(md5 or upstream_sha256,tarball))
 50        print('  }')
 51
 52    print(']')
 53
 54
 55def construct_url(x):
 56    if x['brief']:
 57        url = 'https://dev-www.libreoffice.org/src/{}{}'.format(
 58            x.get('subdir', ''), x['tarball'])
 59    else:
 60        url = 'https://dev-www.libreoffice.org/src/{}{}-{}'.format(
 61            x.get('subdir', ''), x['md5'], x['tarball'])
 62
 63    if x['name'].startswith('FONT_NOTO_') and not probe_url(url):
 64        return 'https://noto-website-2.storage.googleapis.com/pkgs/{}'.format(x['tarball'])
 65
 66    if x['name'] == 'FONT_OPENDYSLEXIC':
 67        return 'https://github.com/antijingoist/opendyslexic/releases/download/v0.91.12/{}'.format(x['tarball'])
 68
 69    return url
 70
 71
 72def probe_url(url: str) -> bool:
 73    request = urllib.request.Request(url, method='HEAD')
 74    try:
 75        with urllib.request.urlopen(request) as response:
 76            return response.status == 200
 77    except urllib.error.HTTPError as e:
 78        return False
 79
 80
 81def download(url, name, hash, hashtype):
 82    cmd = ['nix-prefetch-url', url, hash, '--print-path',
 83           '--type', hashtype, '--name', name]
 84    proc = subprocess.run(cmd, stdout=subprocess.PIPE, check=True,
 85                          universal_newlines=True)
 86    return proc.stdout.split('\n')[1].strip()
 87
 88
 89def get_sha256(path):
 90    cmd = ['sha256sum', path]
 91    proc = subprocess.run(cmd, stdout=subprocess.PIPE, check=True,
 92                          universal_newlines=True)
 93    return proc.stdout.split(' ')[0].strip()
 94
 95
 96def get_packages():
 97    """
 98    All of the package data: What's parsed from download.lst,
 99    plus our additions.
100    """
101    return apply_additions(get_packages_from_download_list(),
102                           get_additions())
103
104
105def get_additions():
106    """
107    A mapping from package name (the all-caps identifiers used in
108    `download.lst`) to a dict of additional attributes to set on the package.
109    """
110    with open('./libreoffice-srcs-additions.json') as f:
111        return json.load(f)
112
113
114def apply_additions(xs, additions):
115    for x in xs:
116        yield dict_merge([x,
117                          additions.get(x['name'], {})])
118
119
120def get_packages_from_download_list():
121    """
122    The result of parsing `download.lst`: A list of dicts containing keys
123    'name', 'tarball', 'md5', 'brief'.
124    """
125
126    def lines():
127        for x in sub_symbols(parse_lines(get_lines())):
128
129            interpretation = interpret(x)
130
131            if interpretation == 'unrecognized':
132                print_skipped_line(x)
133            else:
134                yield dict_merge([x,
135                                  interpretation])
136
137    def cluster(xs):
138        """
139        Groups lines according to their order within the file, to support
140        packages that are listed in `download.lst` more than once.
141        """
142        keys = ['tarball', 'md5', 'sha256', 'brief']
143        a = {k: [x for x in xs if k in x['attrs']] for k in keys}
144        return zip(*[a[k] for k in keys])
145
146    def packages():
147        for (name, group) in groupby(lines(), lambda x: x['name']):
148            for xs in cluster(group):
149                yield {'name': name,
150                       'attrs': dict_merge(x['attrs'] for x in xs),
151                       'index': min(x['index'] for x in xs)}
152
153    for x in sorted(packages(), key=lambda x: x['index']):
154        yield dict_merge([{'name': x['name']},
155                          x['attrs']])
156
157
158def dict_merge(xs):
159    """
160    >>> dict_merge([{1: 2}, {3: 4}, {3: 5}])
161    {1: 2, 3: 4}
162    """
163    return dict(collections.ChainMap(*xs))
164
165
166def groupby(xs, f):
167    """
168    >>> groupby([1, 2, 3, 4], lambda x: x % 2)
169    [(0, [2, 4]), (1, [1, 3])]
170    """
171    for (k, iter) in itertools.groupby(sorted(xs, key=f), f):
172        group = list(iter)
173        yield (f(group[0]), group)
174
175
176def get_lines():
177
178    download_list = os.getenv('downloadList')
179
180    with open(download_list) as f:
181        return f.read().splitlines()
182
183
184def print_skipped_line(x):
185
186    print('Skipped line {}: {}'.format(x['index'],
187                                       x['original']),
188          file=sys.stderr)
189
190
191def parse_lines(lines):
192    """
193    Input: List of strings (the lines from `download.lst`
194    Output: Iterator of dicts with keys 'key', 'value', and 'index'
195    """
196    for (index, line) in enumerate(lines):
197
198        x = { 'index': index, 'original': line }
199
200        result = parse_line(line)
201
202        if result == 'nothing':
203            pass
204        elif result == 'unrecognized':
205            print_skipped_line(x)
206        else:
207            yield dict_merge([x,
208                             result])
209
210
211def parse_line(line):
212    """
213    Input: A string
214    Output: One of 1. A dict with keys 'key', 'value'
215                   2. 'nothing' (if the line contains no information)
216                   2. 'unrecognized' (if parsing failed)
217    """
218
219    if re.match('\s*(#.*)?$', line):
220        return 'nothing'
221
222    match = re.match('([^:\s]+)\s*:=\s*(.*)$', line)
223
224    if match:
225        return {
226            'key': match.group(1),
227            'value': match.group(2).strip()
228        }
229    else:
230        return 'unrecognized'
231
232
233def sub_symbols(xs):
234    """
235    Do substitution of variables across all lines.
236
237    >>> sub_symbols([{'key': 'a', 'value': 'x'},
238    ...              {'key': 'c': 'value': '$(a)yz'}])
239    [{'key': 'a', 'value': 'x'}, {'key': 'c': 'value': 'xyz'}]
240    """
241
242    xs = list(xs)
243
244    symbols = {x['key']: x for x in xs}
245
246    def get_value(k):
247        x = symbols.get(k)
248        return x['value'] if x is not None else ''
249
250    for x in xs:
251        yield dict_merge([{'value': sub_str(x['value'], get_value)},
252                          x])
253
254
255def sub_str(string, func):
256    """
257    Do substitution of variables in a single line.
258
259    >>> sub_str("x = $(x)", lambda k: {'x': 'a'}[k])
260    "x = a"
261    """
262
263    def func2(m):
264        x = m.group(1)
265        result = func(x)
266        return result if result is not None else x
267
268    return re.sub(r'\$\(([^\$\(\)]+)\)', func2, string)
269
270
271def interpret(x):
272    """
273    Input: Dict with keys 'key' and 'value'
274    Output: One of 1. Dict with keys 'name' and 'attrs'
275                   2. 'unrecognized' (if interpretation failed)
276    """
277    for f in [interpret_md5, interpret_sha256, interpret_tarball_with_md5, interpret_tarball, interpret_jar]:
278        result = f(x)
279        if result is not None:
280            return result
281
282    return 'unrecognized'
283
284
285def interpret_md5(x):
286    """
287    >>> interpret_md5("ODFGEN_MD5SUM", "32572ea48d9021bbd6fa317ddb697abc")
288    {'name': 'ODFGEN', 'attrs': {'md5': '32572ea48d9021bbd6fa317ddb697abc'}}
289    """
290
291    match = re.match('^(.*)_MD5SUM$', x['key'])
292
293    if match:
294        return {'name': match.group(1),
295                'attrs': {'md5': x['value'], 'sha256': ''}}
296
297def interpret_sha256(x):
298    match = re.match('^(.*)_SHA256SUM$', x['key'])
299
300    if match:
301        return {'name': match.group(1),
302                'attrs': {'sha256': x['value'], 'md5': ''}}
303
304def interpret_tarball(x):
305    """
306    >>> interpret_tarball("FREEHAND_TARBALL", "libfreehand-0.1.1.tar.bz2")
307    {'name': 'FREEHAND',
308     'attrs': {'tarball': 'libfreehand-0.1.1.tar.bz2', 'brief': True}}
309    """
310
311    match = re.match('^(.*)_TARBALL$', x['key'])
312
313    if match:
314        return {'name': match.group(1),
315                'attrs': {'tarball': x['value'], 'brief': True}}
316
317def interpret_jar(x):
318    match = re.match('^(.*)_JAR$', x['key'])
319
320    if match:
321        return {'name': match.group(1),
322                'attrs': {'tarball': x['value'], 'brief': True}}
323
324
325def interpret_tarball_with_md5(x):
326    """
327    >>> interpret_tarball_with_md5("CLUCENE_TARBALL",\
328        "48d647fbd8ef8889e5a7f422c1bfda94-clucene-core-2.3.3.4.tar.gz")
329    {'name': 'CLUCENE',
330     'attrs': {'tarball': 'clucene-core-2.3.3.4.tar.gz',
331               'md5': '48d647fbd8ef8889e5a7f422c1bfda94', 'brief': False}}
332    """
333
334    match = {'key': re.match('^(.*)_(TARBALL|JAR)$', x['key']),
335             'value': re.match('(?P<md5>[0-9a-fA-F]{32})-(?P<tarball>.+)$',
336                               x['value'])}
337
338    if match['key'] and match['value']:
339        return {'name': match['key'].group(1),
340                'attrs': {'tarball': match['value'].group('tarball'),
341                          'md5': match['value'].group('md5'),
342                          'sha256': '',
343                          'brief': False}}
344
345
346main()