Revert "nltk-data: make searchable, add all downloadables" (#409843)

authored by winter.bsky.social and committed by GitHub 34fc0d11 40499761

+45 -224
+1 -1
nixos/modules/services/web-apps/mealie.nix
··· 76 API_PORT = toString cfg.port; 77 BASE_URL = "http://localhost:${toString cfg.port}"; 78 DATA_DIR = "/var/lib/mealie"; 79 - NLTK_DATA = pkgs.nltk-data.averaged-perceptron-tagger-eng; 80 } // (builtins.mapAttrs (_: val: toString val) cfg.settings); 81 82 serviceConfig = {
··· 76 API_PORT = toString cfg.port; 77 BASE_URL = "http://localhost:${toString cfg.port}"; 78 DATA_DIR = "/var/lib/mealie"; 79 + NLTK_DATA = pkgs.nltk-data.averaged_perceptron_tagger_eng; 80 } // (builtins.mapAttrs (_: val: toString val) cfg.settings); 81 82 serviceConfig = {
+1 -1
pkgs/by-name/me/mealie/package.nix
··· 109 110 # Needed for tests 111 preCheck = '' 112 - export NLTK_DATA=${nltk-data.averaged-perceptron-tagger-eng} 113 ''; 114 115 disabledTestPaths = [
··· 109 110 # Needed for tests 111 preCheck = '' 112 + export NLTK_DATA=${nltk-data.averaged_perceptron_tagger_eng} 113 ''; 114 115 disabledTestPaths = [
+2 -2
pkgs/by-name/pa/paperless-ngx/package.nix
··· 305 tesseract5 306 ; 307 nltkData = with nltk-data; [ 308 - punkt-tab 309 - snowball-data 310 stopwords 311 ]; 312 tests = { inherit (nixosTests) paperless; };
··· 305 tesseract5 306 ; 307 nltkData = with nltk-data; [ 308 + punkt_tab 309 + snowball_data 310 stopwords 311 ]; 312 tests = { inherit (nixosTests) paperless; };
+1 -1
pkgs/by-name/un/unstructured-api/package.nix
··· 152 153 paths = [ 154 nltk-data.punkt 155 - nltk-data.averaged-perceptron-tagger 156 ]; 157 }; 158 in
··· 152 153 paths = [ 154 nltk-data.punkt 155 + nltk-data.averaged_perceptron_tagger 156 ]; 157 }; 158 in
+1 -1
pkgs/development/python-modules/aider-chat/default.nix
··· 125 aider-nltk-data = symlinkJoin { 126 name = "aider-nltk-data"; 127 paths = [ 128 - nltk-data.punkt-tab 129 nltk-data.stopwords 130 ]; 131 };
··· 125 aider-nltk-data = symlinkJoin { 126 name = "aider-nltk-data"; 127 paths = [ 128 + nltk-data.punkt_tab 129 nltk-data.stopwords 130 ]; 131 };
+1 -1
pkgs/development/python-modules/ingredient-parser-nlp/default.nix
··· 44 45 # Needed for tests 46 preCheck = '' 47 - export NLTK_DATA=${nltk-data.averaged-perceptron-tagger-eng} 48 ''; 49 50 meta = {
··· 44 45 # Needed for tests 46 preCheck = '' 47 + export NLTK_DATA=${nltk-data.averaged_perceptron_tagger_eng} 48 ''; 49 50 meta = {
+1 -1
pkgs/development/python-modules/type-infer/default.nix
··· 24 name = "nltk-test-data"; 25 paths = [ 26 nltk-data.punkt 27 - nltk-data.punkt-tab 28 nltk-data.stopwords 29 ]; 30 };
··· 24 name = "nltk-test-data"; 25 paths = [ 26 nltk-data.punkt 27 + nltk-data.punkt_tab 28 nltk-data.stopwords 29 ]; 30 };
+36 -211
pkgs/tools/text/nltk-data/default.nix
··· 10 version = "0-unstable-2024-07-29"; 11 nativeBuildInputs = [ unzip ]; 12 dontBuild = true; 13 - dontFixup = true; 14 meta = with lib; { 15 description = "NLTK Data"; 16 homepage = "https://github.com/nltk/nltk_data"; 17 license = licenses.asl20; 18 platforms = platforms.all; 19 - maintainers = with maintainers; [ 20 - bengsparks 21 - happysalada 22 - ]; 23 }; 24 }; 25 makeNltkDataPackage = ··· 54 ''; 55 } 56 ); 57 - 58 - makeChunker = 59 - pname: 60 - makeNltkDataPackage { 61 - inherit pname; 62 - location = "chunkers"; 63 - hash = "sha256-kemjqaCM9hlKAdMw8oVJnp62EAC9rMQ50dKg7wlAwEc="; 64 - }; 65 - 66 - makeCorpus = 67 - pname: 68 - makeNltkDataPackage { 69 - inherit pname; 70 - location = "corpora"; 71 - hash = "sha256-8lMjW5YI8h6dHJ/83HVY2OYGDyKPpgkUAKPISiAKqqk="; 72 - }; 73 - 74 - makeGrammar = 75 - pname: 76 - makeNltkDataPackage { 77 - inherit pname; 78 - location = "grammars"; 79 - hash = "sha256-pyLEcX3Azv8j1kCGvVYonuiNgVJxtWt7veU0S/yNbIM="; 80 - }; 81 - 82 - makeHelp = 83 - pname: 84 - makeNltkDataPackage { 85 - inherit pname; 86 - location = "help"; 87 - hash = "sha256-97mYLNES5WujLF5gD8Ul4cJ6LqSzz+jDzclUsdBeHNE="; 88 - }; 89 - 90 - makeMisc = 91 - pname: 92 - makeNltkDataPackage { 93 - inherit pname; 94 - location = "misc"; 95 - hash = "sha256-XtizfEsc8TYWqvvC/eSFdha2ClC5/ZiJM8nue0vXLb4="; 96 - }; 97 - 98 - makeModel = 99 - pname: 100 - makeNltkDataPackage { 101 - inherit pname; 102 - location = "models"; 103 - hash = "sha256-iq3weEgCci6rgLW2j28F2eRLprJtInGXKe/awJPSVG4="; 104 - }; 105 - 106 - makeTagger = 107 - pname: 108 - makeNltkDataPackage { 109 - inherit pname; 110 - location = "taggers"; 111 - hash = "sha256-tl3Cn2okhBkUtTXvAmFRx72Brez6iTGRdmFTwFmpk3M="; 112 - }; 113 - 114 - makeTokenizer = 115 - pname: 116 - makeNltkDataPackage { 117 - inherit pname; 118 - location = "tokenizers"; 119 - hash = "sha256-OzMkruoYbFKqzuimOXIpE5lhHz8tmSqOFoLT+fjdTVg="; 120 - }; 121 - 122 - makeStemmer = 123 - pname: 124 - makeNltkDataPackage { 125 - inherit pname; 126 - location = "stemmers"; 127 - hash = "sha256-mNefwOPVJGz9kXV3LV4DuV7FJpNir/Nwg4ujd0CogEk="; 128 - }; 129 in 130 lib.makeScope newScope (self: { 131 - ## Chunkers 132 - maxent-ne-chunker = makeChunker "maxent_ne_chunker"; 133 - maxent-ne-chunker-tab = makeChunker "maxent_ne_chunker_tab"; 134 - 135 - ## Corpora 136 - abc = makeCorpus "abc"; 137 - alpino = makeCorpus "alpino"; 138 - bcp47 = makeCorpus "bcp47"; 139 - biocreative-ppi = makeCorpus "biocreative_ppi"; 140 - brown = makeCorpus "brown"; 141 - brown-tei = makeCorpus "brown_tei"; 142 - cess-cat = makeCorpus "cess_cat"; 143 - cess-esp = makeCorpus "cess_esp"; 144 - chat80 = makeCorpus "chat80"; 145 - city-database = makeCorpus "city_database"; 146 - cmudict = makeCorpus "cmudict"; 147 - comparative-sentences = makeCorpus "comparative_sentences"; 148 - comtrans = makeCorpus "comtrans"; 149 - conll2000 = makeCorpus "conll2000"; 150 - conll2002 = makeCorpus "conll2002"; 151 - conll2007 = makeCorpus "conll2007"; 152 - crubadan = makeCorpus "crubadan"; 153 - dependency-treebank = makeCorpus "dependency_treebank"; 154 - dolch = makeCorpus "dolch"; 155 - europarl-raw = makeCorpus "europarl_raw"; 156 - extended-omw = makeCorpus "extended_omw"; 157 - floresta = makeCorpus "floresta"; 158 - framenet-v15 = makeCorpus "framenet_v15"; 159 - framenet-v17 = makeCorpus "framenet_v17"; 160 - gazetteers = makeCorpus "gazetteers"; 161 - genesis = makeCorpus "genesis"; 162 - gutenberg = makeCorpus "gutenberg"; 163 - ieer = makeCorpus "ieer"; 164 - inaugural = makeCorpus "inaugural"; 165 - indian = makeCorpus "indian"; 166 - jeita = makeCorpus "jeita"; 167 - kimmo = makeCorpus "kimmo"; 168 - knbc = makeCorpus "knbc"; 169 - lin-thesaurus = makeCorpus "lin_thesaurus"; 170 - mac-morpho = makeCorpus "mac_morpho"; 171 - machado = makeCorpus "machado"; 172 - masc-tagged = makeCorpus "masc_tagged"; 173 - movie-reviews = makeCorpus "movie_reviews"; 174 - mte-teip5 = makeCorpus "mte_teip5"; 175 - names = makeCorpus "names"; 176 - nombank-1-0 = makeCorpus "nombank.1.0"; 177 - nonbreaking-prefixes = makeCorpus "nonbreaking_prefixes"; 178 - nps-chat = makeCorpus "nps_chat"; 179 - omw = makeCorpus "omw"; 180 - omw-1-4 = makeCorpus "omw-1.4"; 181 - opinion-lexicon = makeCorpus "opinion_lexicon"; 182 - panlex-swadesh = makeCorpus "panlex_swadesh"; 183 - paradigms = makeCorpus "paradigms"; 184 - pe08 = makeCorpus "pe08"; 185 - pil = makeCorpus "pil"; 186 - pl196x = makeCorpus "pl196x"; 187 - ppattach = makeCorpus "ppattach"; 188 - problem-reports = makeCorpus "problem_reports"; 189 - product-reviews-1 = makeCorpus "product_reviews_1"; 190 - product-reviews-2 = makeCorpus "product_reviews_2"; 191 - propbank = makeCorpus "propbank"; 192 - pros-cons = makeCorpus "pros_cons"; 193 - ptb = makeCorpus "ptb"; 194 - qc = makeCorpus "qc"; 195 - reuters = makeCorpus "reuters"; 196 - rte = makeCorpus "rte"; 197 - semcor = makeCorpus "semcor"; 198 - senseval = makeCorpus "senseval"; 199 - sentence-polarity = makeCorpus "sentence_polarity"; 200 - sentiwordnet = makeCorpus "sentiwordnet"; 201 - shakespeare = makeCorpus "shakespeare"; 202 - sinica-treebank = makeCorpus "sinica_treebank"; 203 - smultron = makeCorpus "smultron"; 204 - state-union = makeCorpus "state_union"; 205 - stopwords = makeCorpus "stopwords"; 206 - subjectivity = makeCorpus "subjectivity"; 207 - swadesh = makeCorpus "swadesh"; 208 - switchboard = makeCorpus "switchboard"; 209 - timit = makeCorpus "timit"; 210 - toolbox = makeCorpus "toolbox"; 211 - treebank = makeCorpus "treebank"; 212 - twitter-samples = makeCorpus "twitter_samples"; 213 - udhr = makeCorpus "udhr"; 214 - udhr2 = makeCorpus "udhr2"; 215 - unicode-samples = makeCorpus "unicode_samples"; 216 - universal-treebanks-v20 = makeCorpus "universal_treebanks_v20"; 217 - verbnet = makeCorpus "verbnet"; 218 - verbnet3 = makeCorpus "verbnet3"; 219 - webtext = makeCorpus "webtext"; 220 - wordnet = makeCorpus "wordnet"; 221 - wordnet-ic = makeCorpus "wordnet_ic"; 222 - wordnet2021 = makeCorpus "wordnet2021"; 223 - wordnet2022 = makeCorpus "wordnet2022"; 224 - wordnet31 = makeCorpus "wordnet31"; 225 - words = makeCorpus "words"; 226 - ycoe = makeCorpus "ycoe"; 227 - 228 - ## Grammars 229 - basque-grammars = makeGrammar "basque_grammars"; 230 - book-grammars = makeGrammar "book_grammars"; 231 - large-grammars = makeGrammar "large_grammars"; 232 - sample-grammars = makeGrammar "sample_grammars"; 233 - spanish-grammars = makeGrammar "spanish_grammars"; 234 - 235 - ## Help 236 - tagsets-json = makeHelp "tagsets_json"; 237 - 238 - ## Misc 239 - mwa-ppdb = makeMisc "mwa_ppdb"; 240 - perluniprops = makeMisc "perluniprops"; 241 - 242 - ## Models 243 - bllip-wsj-no-aux = makeModel "bllip_wsj_no_aux"; 244 - moses-sample = makeModel "moses_sample"; 245 - wmt15-eval = makeModel "wmt15_eval"; 246 - word2vec-sample = makeModel "word2vec_sample"; 247 - 248 - ## Taggers 249 - averaged-perceptron-tagger = makeTagger "averaged_perceptron_tagger"; 250 - averaged-perceptron-tagger-eng = makeTagger "averaged_perceptron_tagger_eng"; 251 - averaged-perceptron-tagger-ru = makeTagger "averaged_perceptron_tagger_ru"; 252 - averaged-perceptron-tagger-rus = makeTagger "averaged_perceptron_tagger_rus"; 253 - maxent-treebank-pos-tagger = makeTagger "maxent_treebank_pos_tagger"; 254 - maxent-treebank-pos-tagger-tab = makeTagger "maxent_treebank_pos_tagger_tab"; 255 - universal-tagset = makeTagger "universal_tagset"; 256 - 257 - ## Tokenizers 258 - punkt = makeTokenizer "punkt"; 259 - punkt-tab = makeTokenizer "punkt_tab"; 260 - 261 - ## Stemmers 262 - porter-test = makeStemmer "porter_test"; 263 - rslp = makeStemmer "rslp"; 264 - snowball-data = makeStemmer "snowball_data"; 265 })
··· 10 version = "0-unstable-2024-07-29"; 11 nativeBuildInputs = [ unzip ]; 12 dontBuild = true; 13 meta = with lib; { 14 description = "NLTK Data"; 15 homepage = "https://github.com/nltk/nltk_data"; 16 license = licenses.asl20; 17 platforms = platforms.all; 18 + maintainers = with maintainers; [ happysalada ]; 19 }; 20 }; 21 makeNltkDataPackage = ··· 50 ''; 51 } 52 ); 53 in 54 lib.makeScope newScope (self: { 55 + punkt = makeNltkDataPackage { 56 + pname = "punkt"; 57 + location = "tokenizers"; 58 + hash = "sha256-OzMkruoYbFKqzuimOXIpE5lhHz8tmSqOFoLT+fjdTVg="; 59 + }; 60 + punkt_tab = makeNltkDataPackage { 61 + pname = "punkt_tab"; 62 + location = "tokenizers"; 63 + hash = "sha256-OzMkruoYbFKqzuimOXIpE5lhHz8tmSqOFoLT+fjdTVg="; 64 + }; 65 + averaged_perceptron_tagger = makeNltkDataPackage { 66 + pname = "averaged_perceptron_tagger"; 67 + location = "taggers"; 68 + hash = "sha256-tl3Cn2okhBkUtTXvAmFRx72Brez6iTGRdmFTwFmpk3M="; 69 + }; 70 + averaged_perceptron_tagger_eng = makeNltkDataPackage { 71 + pname = "averaged_perceptron_tagger_eng"; 72 + location = "taggers"; 73 + hash = "sha256-tl3Cn2okhBkUtTXvAmFRx72Brez6iTGRdmFTwFmpk3M="; 74 + }; 75 + snowball_data = makeNltkDataPackage { 76 + pname = "snowball_data"; 77 + location = "stemmers"; 78 + hash = "sha256-mNefwOPVJGz9kXV3LV4DuV7FJpNir/Nwg4ujd0CogEk="; 79 + }; 80 + stopwords = makeNltkDataPackage { 81 + pname = "stopwords"; 82 + location = "corpora"; 83 + hash = "sha256-8lMjW5YI8h6dHJ/83HVY2OYGDyKPpgkUAKPISiAKqqk="; 84 + }; 85 + wordnet = makeNltkDataPackage { 86 + pname = "wordnet"; 87 + location = "corpora"; 88 + hash = "sha256-8lMjW5YI8h6dHJ/83HVY2OYGDyKPpgkUAKPISiAKqqk="; 89 + }; 90 })
-4
pkgs/top-level/aliases.nix
··· 1354 # When the nixops_unstable alias is removed, nixops_unstable_minimal can be renamed to nixops_unstable. 1355 1356 nixosTest = testers.nixosTest; # Added 2022-05-05 1357 - nltk-data.averaged_perceptron_tagger = nltk-data.averaged-perceptron-tagger; # Added 2025-05-21 1358 - nltk-data.averaged_perceptron_tagger_eng = nltk-data.averaged-perceptron-tagger-eng; # Added 2025-05-21 1359 - nltk-data.punkt_tab = nltk-data.punkt-tab; # Added 2025-05-21 1360 - nltk-data.snowball_data = nltk-data.snowball-data; # Added 2025-05-21 1361 nmap-unfree = throw "'nmap-unfree' has been renamed to/replaced by 'nmap'"; # Converted to throw 2024-10-17 1362 noah = throw "'noah' has been removed because it was broken and its upstream archived"; # Added 2025-05-10 1363 nodejs_18 = throw "Node.js 18.x has reached End-Of-Life and has been removed"; # Added 2025-04-23
··· 1354 # When the nixops_unstable alias is removed, nixops_unstable_minimal can be renamed to nixops_unstable. 1355 1356 nixosTest = testers.nixosTest; # Added 2022-05-05 1357 nmap-unfree = throw "'nmap-unfree' has been renamed to/replaced by 'nmap'"; # Converted to throw 2024-10-17 1358 noah = throw "'noah' has been removed because it was broken and its upstream archived"; # Added 2025-05-10 1359 nodejs_18 = throw "Node.js 18.x has reached End-Of-Life and has been removed"; # Added 2025-04-23
+1 -1
pkgs/top-level/all-packages.nix
··· 2344 2345 mpd-sima = python3Packages.callPackage ../tools/audio/mpd-sima { }; 2346 2347 - nltk-data = lib.recurseIntoAttrs (callPackage ../tools/text/nltk-data { }); 2348 2349 seabios-coreboot = seabios.override { ___build-type = "coreboot"; }; 2350 seabios-csm = seabios.override { ___build-type = "csm"; };
··· 2344 2345 mpd-sima = python3Packages.callPackage ../tools/audio/mpd-sima { }; 2346 2347 + nltk-data = callPackage ../tools/text/nltk-data { }; 2348 2349 seabios-coreboot = seabios.override { ___build-type = "coreboot"; }; 2350 seabios-csm = seabios.override { ___build-type = "csm"; };