+111
Cargo.lock
+111
Cargo.lock
···
34
34
checksum = "9e1b586273c5702936fe7b7d6896644d8be71e6314cfe09d3167c95f712589e8"
35
35
36
36
[[package]]
37
+
name = "block-buffer"
38
+
version = "0.10.4"
39
+
source = "registry+https://github.com/rust-lang/crates.io-index"
40
+
checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71"
41
+
dependencies = [
42
+
"generic-array",
43
+
]
44
+
45
+
[[package]]
37
46
name = "bumpalo"
38
47
version = "3.19.0"
39
48
source = "registry+https://github.com/rust-lang/crates.io-index"
···
72
81
version = "0.8.7"
73
82
source = "registry+https://github.com/rust-lang/crates.io-index"
74
83
checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b"
84
+
85
+
[[package]]
86
+
name = "cpufeatures"
87
+
version = "0.2.17"
88
+
source = "registry+https://github.com/rust-lang/crates.io-index"
89
+
checksum = "59ed5838eebb26a2bb2e58f6d5b5316989ae9d08bab10e0e6d103e656d1b0280"
90
+
dependencies = [
91
+
"libc",
92
+
]
93
+
94
+
[[package]]
95
+
name = "crypto-common"
96
+
version = "0.1.7"
97
+
source = "registry+https://github.com/rust-lang/crates.io-index"
98
+
checksum = "78c8292055d1c1df0cce5d180393dc8cce0abec0a7102adb6c7b1eef6016d60a"
99
+
dependencies = [
100
+
"generic-array",
101
+
"typenum",
102
+
]
75
103
76
104
[[package]]
77
105
name = "darling"
···
119
147
]
120
148
121
149
[[package]]
150
+
name = "digest"
151
+
version = "0.10.7"
152
+
source = "registry+https://github.com/rust-lang/crates.io-index"
153
+
checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292"
154
+
dependencies = [
155
+
"block-buffer",
156
+
"crypto-common",
157
+
]
158
+
159
+
[[package]]
122
160
name = "find-msvc-tools"
123
161
version = "0.1.5"
124
162
source = "registry+https://github.com/rust-lang/crates.io-index"
···
131
169
checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"
132
170
133
171
[[package]]
172
+
name = "generic-array"
173
+
version = "0.14.7"
174
+
source = "registry+https://github.com/rust-lang/crates.io-index"
175
+
checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a"
176
+
dependencies = [
177
+
"typenum",
178
+
"version_check",
179
+
]
180
+
181
+
[[package]]
134
182
name = "hashbrown"
135
183
version = "0.12.3"
136
184
source = "registry+https://github.com/rust-lang/crates.io-index"
···
268
316
dependencies = [
269
317
"atrium-lex",
270
318
"heck",
319
+
"hex",
271
320
"pyo3",
272
321
"serde",
273
322
"serde_json",
323
+
"sha2",
274
324
"thiserror",
325
+
"walkdir",
275
326
]
276
327
277
328
[[package]]
···
378
429
checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f"
379
430
380
431
[[package]]
432
+
name = "same-file"
433
+
version = "1.0.6"
434
+
source = "registry+https://github.com/rust-lang/crates.io-index"
435
+
checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502"
436
+
dependencies = [
437
+
"winapi-util",
438
+
]
439
+
440
+
[[package]]
381
441
name = "serde"
382
442
version = "1.0.228"
383
443
source = "registry+https://github.com/rust-lang/crates.io-index"
···
460
520
]
461
521
462
522
[[package]]
523
+
name = "sha2"
524
+
version = "0.10.9"
525
+
source = "registry+https://github.com/rust-lang/crates.io-index"
526
+
checksum = "a7507d819769d01a365ab707794a4084392c824f54a7a6a7862f8c3d0892b283"
527
+
dependencies = [
528
+
"cfg-if",
529
+
"cpufeatures",
530
+
"digest",
531
+
]
532
+
533
+
[[package]]
463
534
name = "shlex"
464
535
version = "1.3.0"
465
536
source = "registry+https://github.com/rust-lang/crates.io-index"
···
540
611
]
541
612
542
613
[[package]]
614
+
name = "typenum"
615
+
version = "1.19.0"
616
+
source = "registry+https://github.com/rust-lang/crates.io-index"
617
+
checksum = "562d481066bde0658276a35467c4af00bdc6ee726305698a55b86e61d7ad82bb"
618
+
619
+
[[package]]
543
620
name = "unicode-ident"
544
621
version = "1.0.22"
545
622
source = "registry+https://github.com/rust-lang/crates.io-index"
···
552
629
checksum = "7264e107f553ccae879d21fbea1d6724ac785e8c3bfc762137959b5802826ef3"
553
630
554
631
[[package]]
632
+
name = "version_check"
633
+
version = "0.9.5"
634
+
source = "registry+https://github.com/rust-lang/crates.io-index"
635
+
checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a"
636
+
637
+
[[package]]
638
+
name = "walkdir"
639
+
version = "2.5.0"
640
+
source = "registry+https://github.com/rust-lang/crates.io-index"
641
+
checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b"
642
+
dependencies = [
643
+
"same-file",
644
+
"winapi-util",
645
+
]
646
+
647
+
[[package]]
555
648
name = "wasm-bindgen"
556
649
version = "0.2.106"
557
650
source = "registry+https://github.com/rust-lang/crates.io-index"
···
597
690
]
598
691
599
692
[[package]]
693
+
name = "winapi-util"
694
+
version = "0.1.11"
695
+
source = "registry+https://github.com/rust-lang/crates.io-index"
696
+
checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22"
697
+
dependencies = [
698
+
"windows-sys",
699
+
]
700
+
701
+
[[package]]
600
702
name = "windows-core"
601
703
version = "0.62.2"
602
704
source = "registry+https://github.com/rust-lang/crates.io-index"
···
654
756
dependencies = [
655
757
"windows-link",
656
758
]
759
+
760
+
[[package]]
761
+
name = "windows-sys"
762
+
version = "0.61.2"
763
+
source = "registry+https://github.com/rust-lang/crates.io-index"
764
+
checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc"
765
+
dependencies = [
766
+
"windows-link",
767
+
]
+3
Cargo.toml
+3
Cargo.toml
+20
-11
justfile
+20
-11
justfile
···
1
-
# build rust extension
1
+
# build rust extension in dev mode
2
+
dev:
3
+
uvx maturin develop
4
+
5
+
# run tests
6
+
test: dev
7
+
uv run pytest -v
8
+
9
+
# build release wheels
2
10
build:
3
-
uv run maturin develop
11
+
uvx maturin build --release
4
12
5
-
# run tests (requires build first)
6
-
test: build
7
-
uv run pytest
8
-
9
-
# lint python
13
+
# lint
10
14
lint:
11
-
uv run ruff check
12
-
uv run ruff format --check
15
+
uv run ruff check .
16
+
uv run ruff format --check .
13
17
14
-
# format python
18
+
# format
15
19
fmt:
16
-
uv run ruff format
20
+
uv run ruff check --fix .
21
+
uv run ruff format .
22
+
23
+
# clean build artifacts
24
+
clean:
25
+
rm -rf target dist *.egg-info
+4
-25
python/pmgfal/__init__.py
+4
-25
python/pmgfal/__init__.py
···
3
3
from __future__ import annotations
4
4
5
5
import argparse
6
-
import hashlib
7
6
import os
8
7
import shutil
9
8
import sys
10
9
from pathlib import Path
11
10
12
-
from pmgfal._pmgfal import __version__, generate
11
+
from pmgfal._pmgfal import __version__, generate, hash_lexicons
13
12
14
-
__all__ = ["__version__", "generate", "main", "get_cache_dir"]
13
+
__all__ = ["__version__", "generate", "hash_lexicons", "main", "get_cache_dir"]
15
14
16
15
17
16
def get_cache_dir() -> Path:
···
23
22
else:
24
23
base = Path(os.environ.get("XDG_CACHE_HOME", Path.home() / ".cache"))
25
24
return base / "pmgfal"
26
-
27
-
28
-
def hash_lexicons(lexicon_dir: Path, prefix: str | None = None) -> str:
29
-
"""compute a hash of all lexicon files in a directory."""
30
-
hasher = hashlib.sha256()
31
-
32
-
# include version in hash so cache invalidates on upgrades
33
-
hasher.update(__version__.encode())
34
-
35
-
# include prefix in hash
36
-
if prefix:
37
-
hasher.update(prefix.encode())
38
-
39
-
# hash all json files in sorted order for determinism
40
-
json_files = sorted(lexicon_dir.rglob("*.json"))
41
-
for path in json_files:
42
-
hasher.update(path.name.encode())
43
-
hasher.update(path.read_bytes())
44
-
45
-
return hasher.hexdigest()[:16]
46
25
47
26
48
27
def main(args: list[str] | None = None) -> int:
···
99
78
return 1
100
79
101
80
try:
102
-
# compute hash of lexicons
103
-
lexicon_hash = hash_lexicons(lexicon_dir, parsed.prefix)
81
+
# compute hash of lexicons (in rust)
82
+
lexicon_hash = hash_lexicons(str(lexicon_dir), parsed.prefix)
104
83
cache_dir = get_cache_dir() / lexicon_hash
105
84
106
85
# check cache
+42
src/lib.rs
+42
src/lib.rs
···
5
5
mod parser;
6
6
mod types;
7
7
8
+
use std::fs;
8
9
use std::path::Path;
9
10
10
11
use pyo3::prelude::*;
12
+
use sha2::{Digest, Sha256};
13
+
14
+
/// compute a hash of all lexicon files in a directory
15
+
#[pyfunction]
16
+
#[pyo3(signature = (lexicon_dir, namespace_prefix=None))]
17
+
fn hash_lexicons(lexicon_dir: &str, namespace_prefix: Option<&str>) -> PyResult<String> {
18
+
let lexicon_path = Path::new(lexicon_dir);
19
+
20
+
let mut hasher = Sha256::new();
21
+
22
+
// include version in hash so cache invalidates on upgrades
23
+
hasher.update(env!("CARGO_PKG_VERSION").as_bytes());
24
+
25
+
// include prefix in hash
26
+
if let Some(prefix) = namespace_prefix {
27
+
hasher.update(prefix.as_bytes());
28
+
}
29
+
30
+
// collect and sort json files for deterministic hashing
31
+
let mut json_files: Vec<_> = walkdir::WalkDir::new(lexicon_path)
32
+
.into_iter()
33
+
.filter_map(|e| e.ok())
34
+
.filter(|e| e.path().extension().is_some_and(|ext| ext == "json"))
35
+
.collect();
36
+
37
+
json_files.sort_by(|a, b| a.path().cmp(b.path()));
38
+
39
+
for entry in json_files {
40
+
let path = entry.path();
41
+
if let Some(name) = path.file_name() {
42
+
hasher.update(name.as_encoded_bytes());
43
+
}
44
+
if let Ok(content) = fs::read(path) {
45
+
hasher.update(&content);
46
+
}
47
+
}
48
+
49
+
let result = hasher.finalize();
50
+
Ok(hex::encode(&result[..8])) // 16 hex chars
51
+
}
11
52
12
53
/// generate pydantic models from lexicon files
13
54
#[pyfunction]
···
32
73
#[pymodule]
33
74
fn _pmgfal(m: &Bound<'_, PyModule>) -> PyResult<()> {
34
75
m.add_function(wrap_pyfunction!(generate, m)?)?;
76
+
m.add_function(wrap_pyfunction!(hash_lexicons, m)?)?;
35
77
m.add("__version__", env!("CARGO_PKG_VERSION"))?;
36
78
Ok(())
37
79
}