1#!/usr/bin/env bash
2
3# Usage:
4# ./fetch-language-hashes <tessdataRev> [<language code>…]
5#
6# Fetches all languages if no language codes are given.
7#
8# Example:
9# ./fetch-language-hashes 4.0.0 eng spa
10#
11# Output:
12# eng = "0iy0...";
13# spa = "15kw...";
14
15set -e
16
17(( $# >= 1 )) || exit 1
18tessdataRev=$1
19shift
20
21if (( $# > 0 )); then
22 langCodes="$@"
23else
24 repoPage=$(curl -fs https://github.com/tesseract-ocr/tessdata/tree/$tessdataRev || {
25 >&2 echo "Invalid tessdataRev: $tessdataRev"
26 exit 1
27 })
28 langCodes=$(echo $(echo "$repoPage" | grep -ohP "(?<=/)[^/ ]+?(?=\.traineddata)" | sort -u))
29fi
30
31for lang in $langCodes; do
32 url=https://github.com/tesseract-ocr/tessdata/raw/$tessdataRev/$lang.traineddata
33 hash=$(nix --extra-experimental-features nix-command hash to-sri --type sha256 $(nix-prefetch-url $url 2>/dev/null))
34 echo "$lang = \"$hash\";"
35done