Clone of https://github.com/NixOS/nixpkgs.git (to stress-test knotserver)
at devShellTools-shell 265 lines 7.8 kB view raw
1{ 2 lib, 3 newScope, 4 fetchFromGitHub, 5 unzip, 6 stdenvNoCC, 7}: 8let 9 base = { 10 version = "0-unstable-2024-07-29"; 11 nativeBuildInputs = [ unzip ]; 12 dontBuild = true; 13 dontFixup = true; 14 meta = with lib; { 15 description = "NLTK Data"; 16 homepage = "https://github.com/nltk/nltk_data"; 17 license = licenses.asl20; 18 platforms = platforms.all; 19 maintainers = with maintainers; [ 20 bengsparks 21 happysalada 22 ]; 23 }; 24 }; 25 makeNltkDataPackage = 26 { 27 pname, 28 location, 29 hash, 30 }: 31 let 32 src = fetchFromGitHub { 33 owner = "nltk"; 34 repo = "nltk_data"; 35 rev = "cfe82914f3c2d24363687f1db3b05e8b9f687e2b"; 36 inherit hash; 37 sparseCheckout = [ "packages/${location}/${pname}.zip" ]; 38 }; 39 in 40 stdenvNoCC.mkDerivation ( 41 base 42 // { 43 inherit pname src; 44 inherit (base) version; 45 installPhase = '' 46 runHook preInstall 47 48 mkdir -p $out 49 unzip ${src}/packages/${location}/${pname}.zip 50 mkdir -p $out/${location} 51 cp -R ${pname}/ $out/${location} 52 53 runHook postInstall 54 ''; 55 } 56 ); 57 58 makeChunker = 59 pname: 60 makeNltkDataPackage { 61 inherit pname; 62 location = "chunkers"; 63 hash = "sha256-kemjqaCM9hlKAdMw8oVJnp62EAC9rMQ50dKg7wlAwEc="; 64 }; 65 66 makeCorpus = 67 pname: 68 makeNltkDataPackage { 69 inherit pname; 70 location = "corpora"; 71 hash = "sha256-8lMjW5YI8h6dHJ/83HVY2OYGDyKPpgkUAKPISiAKqqk="; 72 }; 73 74 makeGrammar = 75 pname: 76 makeNltkDataPackage { 77 inherit pname; 78 location = "grammars"; 79 hash = "sha256-pyLEcX3Azv8j1kCGvVYonuiNgVJxtWt7veU0S/yNbIM="; 80 }; 81 82 makeHelp = 83 pname: 84 makeNltkDataPackage { 85 inherit pname; 86 location = "help"; 87 hash = "sha256-97mYLNES5WujLF5gD8Ul4cJ6LqSzz+jDzclUsdBeHNE="; 88 }; 89 90 makeMisc = 91 pname: 92 makeNltkDataPackage { 93 inherit pname; 94 location = "misc"; 95 hash = "sha256-XtizfEsc8TYWqvvC/eSFdha2ClC5/ZiJM8nue0vXLb4="; 96 }; 97 98 makeModel = 99 pname: 100 makeNltkDataPackage { 101 inherit pname; 102 location = "models"; 103 hash = "sha256-iq3weEgCci6rgLW2j28F2eRLprJtInGXKe/awJPSVG4="; 104 }; 105 106 makeTagger = 107 pname: 108 makeNltkDataPackage { 109 inherit pname; 110 location = "taggers"; 111 hash = "sha256-tl3Cn2okhBkUtTXvAmFRx72Brez6iTGRdmFTwFmpk3M="; 112 }; 113 114 makeTokenizer = 115 pname: 116 makeNltkDataPackage { 117 inherit pname; 118 location = "tokenizers"; 119 hash = "sha256-OzMkruoYbFKqzuimOXIpE5lhHz8tmSqOFoLT+fjdTVg="; 120 }; 121 122 makeStemmer = 123 pname: 124 makeNltkDataPackage { 125 inherit pname; 126 location = "stemmers"; 127 hash = "sha256-mNefwOPVJGz9kXV3LV4DuV7FJpNir/Nwg4ujd0CogEk="; 128 }; 129in 130lib.makeScope newScope (self: { 131 ## Chunkers 132 maxent-ne-chunker = makeChunker "maxent_ne_chunker"; 133 maxent-ne-chunker-tab = makeChunker "maxent_ne_chunker_tab"; 134 135 ## Corpora 136 abc = makeCorpus "abc"; 137 alpino = makeCorpus "alpino"; 138 bcp47 = makeCorpus "bcp47"; 139 biocreative-ppi = makeCorpus "biocreative_ppi"; 140 brown = makeCorpus "brown"; 141 brown-tei = makeCorpus "brown_tei"; 142 cess-cat = makeCorpus "cess_cat"; 143 cess-esp = makeCorpus "cess_esp"; 144 chat80 = makeCorpus "chat80"; 145 city-database = makeCorpus "city_database"; 146 cmudict = makeCorpus "cmudict"; 147 comparative-sentences = makeCorpus "comparative_sentences"; 148 comtrans = makeCorpus "comtrans"; 149 conll2000 = makeCorpus "conll2000"; 150 conll2002 = makeCorpus "conll2002"; 151 conll2007 = makeCorpus "conll2007"; 152 crubadan = makeCorpus "crubadan"; 153 dependency-treebank = makeCorpus "dependency_treebank"; 154 dolch = makeCorpus "dolch"; 155 europarl-raw = makeCorpus "europarl_raw"; 156 extended-omw = makeCorpus "extended_omw"; 157 floresta = makeCorpus "floresta"; 158 framenet-v15 = makeCorpus "framenet_v15"; 159 framenet-v17 = makeCorpus "framenet_v17"; 160 gazetteers = makeCorpus "gazetteers"; 161 genesis = makeCorpus "genesis"; 162 gutenberg = makeCorpus "gutenberg"; 163 ieer = makeCorpus "ieer"; 164 inaugural = makeCorpus "inaugural"; 165 indian = makeCorpus "indian"; 166 jeita = makeCorpus "jeita"; 167 kimmo = makeCorpus "kimmo"; 168 knbc = makeCorpus "knbc"; 169 lin-thesaurus = makeCorpus "lin_thesaurus"; 170 mac-morpho = makeCorpus "mac_morpho"; 171 machado = makeCorpus "machado"; 172 masc-tagged = makeCorpus "masc_tagged"; 173 movie-reviews = makeCorpus "movie_reviews"; 174 mte-teip5 = makeCorpus "mte_teip5"; 175 names = makeCorpus "names"; 176 nombank-1-0 = makeCorpus "nombank.1.0"; 177 nonbreaking-prefixes = makeCorpus "nonbreaking_prefixes"; 178 nps-chat = makeCorpus "nps_chat"; 179 omw = makeCorpus "omw"; 180 omw-1-4 = makeCorpus "omw-1.4"; 181 opinion-lexicon = makeCorpus "opinion_lexicon"; 182 panlex-swadesh = makeCorpus "panlex_swadesh"; 183 paradigms = makeCorpus "paradigms"; 184 pe08 = makeCorpus "pe08"; 185 pil = makeCorpus "pil"; 186 pl196x = makeCorpus "pl196x"; 187 ppattach = makeCorpus "ppattach"; 188 problem-reports = makeCorpus "problem_reports"; 189 product-reviews-1 = makeCorpus "product_reviews_1"; 190 product-reviews-2 = makeCorpus "product_reviews_2"; 191 propbank = makeCorpus "propbank"; 192 pros-cons = makeCorpus "pros_cons"; 193 ptb = makeCorpus "ptb"; 194 qc = makeCorpus "qc"; 195 reuters = makeCorpus "reuters"; 196 rte = makeCorpus "rte"; 197 semcor = makeCorpus "semcor"; 198 senseval = makeCorpus "senseval"; 199 sentence-polarity = makeCorpus "sentence_polarity"; 200 sentiwordnet = makeCorpus "sentiwordnet"; 201 shakespeare = makeCorpus "shakespeare"; 202 sinica-treebank = makeCorpus "sinica_treebank"; 203 smultron = makeCorpus "smultron"; 204 state-union = makeCorpus "state_union"; 205 stopwords = makeCorpus "stopwords"; 206 subjectivity = makeCorpus "subjectivity"; 207 swadesh = makeCorpus "swadesh"; 208 switchboard = makeCorpus "switchboard"; 209 timit = makeCorpus "timit"; 210 toolbox = makeCorpus "toolbox"; 211 treebank = makeCorpus "treebank"; 212 twitter-samples = makeCorpus "twitter_samples"; 213 udhr = makeCorpus "udhr"; 214 udhr2 = makeCorpus "udhr2"; 215 unicode-samples = makeCorpus "unicode_samples"; 216 universal-treebanks-v20 = makeCorpus "universal_treebanks_v20"; 217 verbnet = makeCorpus "verbnet"; 218 verbnet3 = makeCorpus "verbnet3"; 219 webtext = makeCorpus "webtext"; 220 wordnet = makeCorpus "wordnet"; 221 wordnet-ic = makeCorpus "wordnet_ic"; 222 wordnet2021 = makeCorpus "wordnet2021"; 223 wordnet2022 = makeCorpus "wordnet2022"; 224 wordnet31 = makeCorpus "wordnet31"; 225 words = makeCorpus "words"; 226 ycoe = makeCorpus "ycoe"; 227 228 ## Grammars 229 basque-grammars = makeGrammar "basque_grammars"; 230 book-grammars = makeGrammar "book_grammars"; 231 large-grammars = makeGrammar "large_grammars"; 232 sample-grammars = makeGrammar "sample_grammars"; 233 spanish-grammars = makeGrammar "spanish_grammars"; 234 235 ## Help 236 tagsets-json = makeHelp "tagsets_json"; 237 238 ## Misc 239 mwa-ppdb = makeMisc "mwa_ppdb"; 240 perluniprops = makeMisc "perluniprops"; 241 242 ## Models 243 bllip-wsj-no-aux = makeModel "bllip_wsj_no_aux"; 244 moses-sample = makeModel "moses_sample"; 245 wmt15-eval = makeModel "wmt15_eval"; 246 word2vec-sample = makeModel "word2vec_sample"; 247 248 ## Taggers 249 averaged-perceptron-tagger = makeTagger "averaged_perceptron_tagger"; 250 averaged-perceptron-tagger-eng = makeTagger "averaged_perceptron_tagger_eng"; 251 averaged-perceptron-tagger-ru = makeTagger "averaged_perceptron_tagger_ru"; 252 averaged-perceptron-tagger-rus = makeTagger "averaged_perceptron_tagger_rus"; 253 maxent-treebank-pos-tagger = makeTagger "maxent_treebank_pos_tagger"; 254 maxent-treebank-pos-tagger-tab = makeTagger "maxent_treebank_pos_tagger_tab"; 255 universal-tagset = makeTagger "universal_tagset"; 256 257 ## Tokenizers 258 punkt = makeTokenizer "punkt"; 259 punkt-tab = makeTokenizer "punkt_tab"; 260 261 ## Stemmers 262 porter-test = makeStemmer "porter_test"; 263 rslp = makeStemmer "rslp"; 264 snowball-data = makeStemmer "snowball_data"; 265})