pydantic model generator for atproto lexicons

feat: add git url support for cloning lexicons

- clone repos from github/tangled/etc as lexicon source
- auto-detect lexicons/ subdir in cloned repos
- cleanup temp dir after generation

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

Changed files
+50 -18
python
pmgfal
tests
+42 -16
python/pmgfal/__init__.py
··· 5 5 import argparse 6 6 import os 7 7 import shutil 8 + import subprocess 8 9 import sys 10 + import tempfile 9 11 from pathlib import Path 10 12 11 13 from pmgfal._pmgfal import __version__, generate, hash_lexicons 12 14 13 - __all__ = ["__version__", "generate", "hash_lexicons", "main", "get_cache_dir"] 15 + __all__ = ["__version__", "generate", "get_cache_dir", "hash_lexicons", "main"] 14 16 15 17 16 18 def get_cache_dir() -> Path: ··· 24 26 return base / "pmgfal" 25 27 26 28 29 + def is_git_url(path: str) -> bool: 30 + """check if path looks like a git url.""" 31 + return path.startswith(("https://", "git@", "ssh://", "git://")) 32 + 33 + 27 34 def main(args: list[str] | None = None) -> int: 28 35 """cli entry point.""" 29 36 parser = argparse.ArgumentParser( ··· 31 38 description="pydantic model generator for atproto lexicons", 32 39 ) 33 40 parser.add_argument( 34 - "lexicon_dir", 41 + "lexicon_source", 35 42 nargs="?", 36 - type=Path, 37 - help="directory containing lexicon json files (default: ./lexicons or .)", 43 + help="directory or git url containing lexicon json files (default: ./lexicons or .)", 38 44 ) 39 45 parser.add_argument( 40 46 "-o", ··· 64 70 65 71 parsed = parser.parse_args(args) 66 72 67 - # auto-detect lexicon directory 68 - if parsed.lexicon_dir is None: 69 - if Path("./lexicons").is_dir(): 70 - lexicon_dir = Path("./lexicons") 73 + temp_dir = None 74 + try: 75 + # handle git urls by cloning to temp dir 76 + if parsed.lexicon_source and is_git_url(parsed.lexicon_source): 77 + temp_dir = tempfile.mkdtemp(prefix="pmgfal-") 78 + print(f"cloning {parsed.lexicon_source}...") 79 + result = subprocess.run( 80 + ["git", "clone", "--depth=1", parsed.lexicon_source, temp_dir], 81 + capture_output=True, 82 + text=True, 83 + ) 84 + if result.returncode != 0: 85 + print(f"error: git clone failed: {result.stderr}", file=sys.stderr) 86 + return 1 87 + # look for lexicons subdir in cloned repo 88 + if (Path(temp_dir) / "lexicons").is_dir(): 89 + lexicon_dir = Path(temp_dir) / "lexicons" 90 + else: 91 + lexicon_dir = Path(temp_dir) 92 + # auto-detect lexicon directory 93 + elif parsed.lexicon_source is None: 94 + if Path("./lexicons").is_dir(): 95 + lexicon_dir = Path("./lexicons") 96 + else: 97 + lexicon_dir = Path(".") 71 98 else: 72 - lexicon_dir = Path(".") 73 - else: 74 - lexicon_dir = parsed.lexicon_dir 99 + lexicon_dir = Path(parsed.lexicon_source) 75 100 76 - if not lexicon_dir.is_dir(): 77 - print(f"error: not a directory: {lexicon_dir}", file=sys.stderr) 78 - return 1 79 - 80 - try: 101 + if not lexicon_dir.is_dir(): 102 + print(f"error: not a directory: {lexicon_dir}", file=sys.stderr) 103 + return 1 81 104 # compute hash of lexicons (in rust) 82 105 lexicon_hash = hash_lexicons(str(lexicon_dir), parsed.prefix) 83 106 cache_dir = get_cache_dir() / lexicon_hash ··· 114 137 except Exception as e: 115 138 print(f"error: {e}", file=sys.stderr) 116 139 return 1 140 + finally: 141 + if temp_dir and Path(temp_dir).exists(): 142 + shutil.rmtree(temp_dir) 117 143 118 144 119 145 if __name__ == "__main__":
+8 -2
tests/test_generate.py
··· 266 266 "defs": { 267 267 "main": { 268 268 "type": "record", 269 - "record": {"type": "object", "properties": {"x": {"type": "string"}}}, 269 + "record": { 270 + "type": "object", 271 + "properties": {"x": {"type": "string"}}, 272 + }, 270 273 } 271 274 }, 272 275 } ··· 301 304 "defs": { 302 305 "main": { 303 306 "type": "record", 304 - "record": {"type": "object", "properties": {"y": {"type": "string"}}}, 307 + "record": { 308 + "type": "object", 309 + "properties": {"y": {"type": "string"}}, 310 + }, 305 311 } 306 312 }, 307 313 }