at 25.11-pre 3.7 kB view raw
1{ 2 lib, 3 buildPythonPackage, 4 fetchurl, 5 protobuf, 6 pymorphy3, 7 pymorphy3-dicts-uk, 8 sentencepiece, 9 setuptools, 10 spacy, 11 spacy-pkuseg, 12 spacy-curated-transformers, 13 sudachipy, 14 sudachidict-core, 15 transformers, 16 writeScript, 17 stdenv, 18 jq, 19 nix, 20 moreutils, 21}: 22let 23 buildModelPackage = 24 { 25 pname, 26 version, 27 sha256, 28 license, 29 }: 30 31 let 32 lang = builtins.substring 0 2 pname; 33 requires-protobuf = 34 pname == "fr_dep_news_trf" || pname == "sl_core_news_trf" || pname == "uk_core_news_trf"; 35 requires-sentencepiece = pname == "fr_dep_news_trf" || pname == "sl_core_news_trf"; 36 requires-transformers = pname == "uk_core_news_trf"; 37 in 38 buildPythonPackage { 39 inherit pname version; 40 pyproject = true; 41 42 src = fetchurl { 43 url = "https://github.com/explosion/spacy-models/releases/download/${pname}-${version}/${pname}-${version}.tar.gz"; 44 inherit sha256; 45 }; 46 47 propagatedBuildInputs = 48 [ spacy ] 49 ++ lib.optionals (lib.hasSuffix "_trf" pname) [ spacy-curated-transformers ] 50 ++ lib.optionals requires-transformers [ transformers ] 51 ++ lib.optionals (lang == "ja") [ 52 sudachidict-core 53 sudachipy 54 ] 55 ++ lib.optionals (lang == "ru") [ pymorphy3 ] 56 ++ lib.optionals (lang == "uk") [ 57 pymorphy3 58 pymorphy3-dicts-uk 59 ] 60 ++ lib.optionals (lang == "zh") [ spacy-pkuseg ] 61 ++ lib.optionals requires-sentencepiece [ sentencepiece ]; 62 63 postPatch = 64 lib.optionalString requires-protobuf '' 65 substituteInPlace meta.json \ 66 --replace-fail "protobuf<3.21.0" "protobuf" 67 '' 68 + lib.optionalString (lang == "zh") '' 69 # Uses numpy 2.x, while the rest of the dependencies still uses 70 # numpy 1.x. Remove once all spaCy packages are updated for 71 # numpy 2.x. 72 substituteInPlace meta.json \ 73 --replace-fail "spacy-pkuseg>=1.0.0,<2.0.0" "spacy-pkuseg" 74 ''; 75 76 nativeBuildInputs = [ setuptools ] ++ lib.optionals requires-protobuf [ protobuf ]; 77 78 pythonImportsCheck = [ pname ]; 79 80 passthru.updateScript = writeScript "update-spacy-models" '' 81 #!${stdenv.shell} 82 set -eou pipefail 83 PATH=${ 84 lib.makeBinPath [ 85 jq 86 nix 87 moreutils 88 ] 89 } 90 91 IFS=. read -r major minor patch <<<"${spacy.version}" 92 spacyVersion="$(echo "$major.$minor.0")" 93 94 pushd pkgs/development/python-modules/spacy/ || exit 95 96 jq -r '.[] | .pname' models.json | while IFS= read -r pname; do 97 if [ "$(jq --arg pname "$pname" -r '.[] | select(.pname == $pname) | .version' models.json)" == "$spacyVersion" ]; then 98 continue 99 fi 100 101 newHash="$(nix-prefetch-url "https://github.com/explosion/spacy-models/releases/download/$pname-$spacyVersion/$pname-$spacyVersion.tar.gz")" 102 jq --arg newHash "$newHash" --arg pname "$pname" --arg spacyVersion "$spacyVersion" \ 103 '[(.[] | select(.pname != $pname)), (.[] | select(.pname == $pname) | .sha256 = $newHash | .version = $spacyVersion)] | sort_by(.pname)' \ 104 models.json | sponge models.json 105 done 106 107 popd || exit 108 ''; 109 110 meta = { 111 description = "Models for the spaCy NLP library"; 112 homepage = "https://github.com/explosion/spacy-models"; 113 license = lib.licenses.${license}; 114 }; 115 }; 116 117 makeModelSet = 118 models: lib.listToAttrs (map (m: lib.nameValuePair m.pname (buildModelPackage m)) models); 119in 120makeModelSet (lib.importJSON ./models.json)