pydantic model generator for atproto lexicons

feat: shorthand git provider support and integration tests (#2)

* feat: shorthand git provider support and integration tests

- add owner/repo shorthand (tries github, then tangled, then fails)
- add colored log output with ANSI codes (respects NO_COLOR)
- add integration tests for git providers with pytest parametrize
- update help text to mention shorthand support

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

* feat: add tangled integration tests and pytest markers

- add pytest.mark.integration for network tests
- add tangled.org tests (full url and shorthand)
- update justfile: `test` (unit), `test-integration`, `test-all`
- fix tangled url to use tangled.org

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

* ci: add uv caching and separate integration job

- enable uv cache in setup-uv for faster CI
- run unit tests in build jobs (fast feedback)
- add separate integration job after linux build
- add plyr.fm validation test (checks generated models compile)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

* fix(ci): create venv before maturin develop

maturin develop requires a virtualenv - run uv sync first

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

---------

Co-authored-by: Claude <noreply@anthropic.com>

authored by zzstoatzz.io Claude and committed by GitHub 9a7f5fac 38bc5727

Changed files
+292 -29
.github
workflows
python
pmgfal
scripts
tests
+28 -6
.github/workflows/publish.yml
··· 37 37 - name: Install uv 38 38 if: matrix.target == 'x86_64' 39 39 uses: astral-sh/setup-uv@v7 40 - - name: Test 40 + with: 41 + enable-cache: true 42 + - name: Unit tests 41 43 if: matrix.target == 'x86_64' 42 44 run: | 43 45 unset RUSTC_WRAPPER 44 46 uv sync 45 - uv run pytest -v 47 + uv run pytest -v -m "not integration" 46 48 47 49 windows: 48 50 runs-on: windows-latest ··· 67 69 path: dist 68 70 - name: Install uv 69 71 uses: astral-sh/setup-uv@v7 70 - - name: Test 72 + with: 73 + enable-cache: true 74 + - name: Unit tests 71 75 shell: pwsh 72 76 run: | 73 77 $env:RUSTC_WRAPPER = $null 74 78 uv sync 75 - uv run pytest -v 79 + uv run pytest -v -m "not integration" 76 80 77 81 macos: 78 82 runs-on: ${{ matrix.runner }} ··· 101 105 path: dist 102 106 - name: Install uv 103 107 uses: astral-sh/setup-uv@v7 104 - - name: Test 108 + with: 109 + enable-cache: true 110 + - name: Unit tests 105 111 run: | 106 112 unset RUSTC_WRAPPER 107 113 uv sync 108 - uv run pytest -v 114 + uv run pytest -v -m "not integration" 115 + 116 + integration: 117 + name: Integration tests 118 + runs-on: ubuntu-latest 119 + needs: [linux] 120 + steps: 121 + - uses: actions/checkout@v4 122 + - uses: astral-sh/setup-uv@v7 123 + with: 124 + enable-cache: true 125 + - name: Build and install 126 + run: | 127 + uv sync 128 + uv run maturin develop --release 129 + - name: Integration tests 130 + run: uv run pytest -v -m integration 109 131 110 132 sdist: 111 133 runs-on: ubuntu-latest
+9 -1
justfile
··· 2 2 dev: 3 3 uvx maturin develop 4 4 5 - # run tests 5 + # run unit tests only 6 6 test: dev 7 + uv run pytest -v -m "not integration" 8 + 9 + # run integration tests (requires network) 10 + test-integration: dev 11 + uv run pytest -v -m integration 12 + 13 + # run all tests 14 + test-all: dev 7 15 uv run pytest -v 8 16 9 17 # build release wheels
+3
pyproject.toml
··· 51 51 [tool.pytest.ini_options] 52 52 pythonpath = ["."] 53 53 testpaths = ["tests"] 54 + markers = [ 55 + "integration: marks tests that require network access (clone from git providers)", 56 + ] 54 57 55 58 [tool.ruff.lint] 56 59 fixable = ["ALL"]
+112 -19
python/pmgfal/__init__.py
··· 4 4 5 5 import argparse 6 6 import os 7 + import re 7 8 import shutil 8 9 import subprocess 9 10 import sys ··· 13 14 from pmgfal._pmgfal import __version__, generate, hash_lexicons 14 15 15 16 __all__ = ["__version__", "generate", "get_cache_dir", "hash_lexicons", "main"] 17 + 18 + # ansi color codes 19 + _RESET = "\033[0m" 20 + _DIM = "\033[2m" 21 + _CYAN = "\033[36m" 22 + _YELLOW = "\033[33m" 23 + _GREEN = "\033[32m" 24 + _RED = "\033[31m" 25 + 26 + # git provider templates (tried in order) 27 + GIT_PROVIDERS = [ 28 + ("github", "https://github.com/{}.git"), 29 + ("tangled", "https://tangled.org/{}.git"), 30 + ] 31 + 32 + 33 + def _supports_color() -> bool: 34 + """check if terminal supports color.""" 35 + if os.environ.get("NO_COLOR"): 36 + return False 37 + if not hasattr(sys.stdout, "isatty"): 38 + return False 39 + return sys.stdout.isatty() 40 + 41 + 42 + def _log(msg: str, color: str = "", dim: bool = False) -> None: 43 + """print a log message with optional color.""" 44 + if _supports_color(): 45 + prefix = _DIM if dim else "" 46 + print(f"{prefix}{color}{msg}{_RESET}") 47 + else: 48 + print(msg) 49 + 50 + 51 + def _log_info(msg: str) -> None: 52 + _log(msg, _CYAN) 53 + 54 + 55 + def _log_warn(msg: str) -> None: 56 + _log(msg, _YELLOW) 57 + 58 + 59 + def _log_success(msg: str) -> None: 60 + _log(msg, _GREEN) 61 + 62 + 63 + def _log_error(msg: str) -> None: 64 + _log(msg, _RED) 65 + 66 + 67 + def _log_dim(msg: str) -> None: 68 + _log(msg, dim=True) 16 69 17 70 18 71 def get_cache_dir() -> Path: ··· 31 84 return path.startswith(("https://", "git@", "ssh://", "git://")) 32 85 33 86 87 + def is_shorthand(path: str) -> bool: 88 + """check if path looks like owner/repo shorthand.""" 89 + return bool(re.match(r"^[\w.-]+/[\w.-]+$", path)) 90 + 91 + 92 + def clone_repo(source: str, dest: str) -> tuple[bool, str]: 93 + """clone a git repo, returns (success, url_used).""" 94 + if is_git_url(source): 95 + # full url - just try it 96 + _log_info(f"cloning {source}...") 97 + result = subprocess.run( 98 + ["git", "clone", "--depth=1", source, dest], 99 + capture_output=True, 100 + text=True, 101 + ) 102 + return result.returncode == 0, source 103 + 104 + if is_shorthand(source): 105 + # try providers in order 106 + for provider_name, url_template in GIT_PROVIDERS: 107 + url = url_template.format(source) 108 + _log_info(f"trying {provider_name}: {url}") 109 + result = subprocess.run( 110 + ["git", "clone", "--depth=1", url, dest], 111 + capture_output=True, 112 + text=True, 113 + ) 114 + if result.returncode == 0: 115 + return True, url 116 + _log_dim(f" not found on {provider_name}") 117 + # clean up failed clone attempt 118 + if Path(dest).exists(): 119 + shutil.rmtree(dest) 120 + 121 + _log_error(f"could not find '{source}' on any provider") 122 + _log_error("tried: " + ", ".join(name for name, _ in GIT_PROVIDERS)) 123 + _log_error("use a full git url instead") 124 + return False, "" 125 + 126 + return False, "" 127 + 128 + 34 129 def main(args: list[str] | None = None) -> int: 35 130 """cli entry point.""" 36 131 parser = argparse.ArgumentParser( ··· 40 135 parser.add_argument( 41 136 "lexicon_source", 42 137 nargs="?", 43 - help="directory or git url containing lexicon json files (default: ./lexicons or .)", 138 + help="directory, git url, or owner/repo shorthand (default: ./lexicons or .)", 44 139 ) 45 140 parser.add_argument( 46 141 "-o", ··· 72 167 73 168 temp_dir = None 74 169 try: 75 - # handle git urls by cloning to temp dir 76 - if parsed.lexicon_source and is_git_url(parsed.lexicon_source): 170 + source = parsed.lexicon_source 171 + 172 + # handle git urls or shorthand by cloning to temp dir 173 + if source and (is_git_url(source) or is_shorthand(source)): 77 174 temp_dir = tempfile.mkdtemp(prefix="pmgfal-") 78 - print(f"cloning {parsed.lexicon_source}...") 79 - result = subprocess.run( 80 - ["git", "clone", "--depth=1", parsed.lexicon_source, temp_dir], 81 - capture_output=True, 82 - text=True, 83 - ) 84 - if result.returncode != 0: 85 - print(f"error: git clone failed: {result.stderr}", file=sys.stderr) 175 + success, _ = clone_repo(source, temp_dir) 176 + if not success: 86 177 return 1 87 178 # look for lexicons subdir in cloned repo 88 179 if (Path(temp_dir) / "lexicons").is_dir(): ··· 90 181 else: 91 182 lexicon_dir = Path(temp_dir) 92 183 # auto-detect lexicon directory 93 - elif parsed.lexicon_source is None: 184 + elif source is None: 94 185 if Path("./lexicons").is_dir(): 95 186 lexicon_dir = Path("./lexicons") 96 187 else: 97 188 lexicon_dir = Path(".") 98 189 else: 99 - lexicon_dir = Path(parsed.lexicon_source) 190 + lexicon_dir = Path(source) 100 191 101 192 if not lexicon_dir.is_dir(): 102 - print(f"error: not a directory: {lexicon_dir}", file=sys.stderr) 193 + _log_error(f"not a directory: {lexicon_dir}") 103 194 return 1 104 195 # compute hash of lexicons (in rust) 105 196 lexicon_hash = hash_lexicons(str(lexicon_dir), parsed.prefix) ··· 113 204 for cached in cached_files: 114 205 dest = parsed.output / cached.name 115 206 shutil.copy2(cached, dest) 116 - print(f"cache hit ({lexicon_hash}) - copied {len(cached_files)} file(s):") 207 + _log_success( 208 + f"cache hit ({lexicon_hash}) - copied {len(cached_files)} file(s):" 209 + ) 117 210 for f in cached_files: 118 - print(f" {parsed.output / f.name}") 211 + _log_dim(f" {parsed.output / f.name}") 119 212 return 0 120 213 121 214 # cache miss - generate ··· 130 223 for f in files: 131 224 shutil.copy2(f, cache_dir / Path(f).name) 132 225 133 - print(f"generated {len(files)} file(s) (cached as {lexicon_hash}):") 226 + _log_success(f"generated {len(files)} file(s) (cached as {lexicon_hash}):") 134 227 for f in files: 135 - print(f" {f}") 228 + _log_dim(f" {f}") 136 229 return 0 137 230 except Exception as e: 138 - print(f"error: {e}", file=sys.stderr) 231 + _log_error(f"error: {e}") 139 232 return 1 140 233 finally: 141 234 if temp_dir and Path(temp_dir).exists():
+17 -3
scripts/bench.py
··· 13 13 # clone atproto 14 14 print("cloning atproto lexicons...") 15 15 subprocess.run( 16 - ["git", "clone", "--depth=1", "https://github.com/bluesky-social/atproto.git", tmp], 16 + [ 17 + "git", 18 + "clone", 19 + "--depth=1", 20 + "https://github.com/bluesky-social/atproto.git", 21 + tmp, 22 + ], 17 23 capture_output=True, 18 24 check=True, 19 25 ) ··· 27 33 # benchmark generation (cold) 28 34 start = time.perf_counter() 29 35 subprocess.run( 30 - ["uv", "run", "pmgfal", str(lexicon_dir), "-o", str(output_dir), "--no-cache"], 36 + [ 37 + "uv", 38 + "run", 39 + "pmgfal", 40 + str(lexicon_dir), 41 + "-o", 42 + str(output_dir), 43 + "--no-cache", 44 + ], 31 45 check=True, 32 46 ) 33 47 cold_time = time.perf_counter() - start ··· 44 58 ) 45 59 cache_time = time.perf_counter() - start 46 60 47 - print(f"\nresults:") 61 + print("\nresults:") 48 62 print(f" lexicons: {len(json_files)}") 49 63 print(f" output: {lines} lines") 50 64 print(f" cold generation: {cold_time:.3f}s")
+123
tests/test_git_providers.py
··· 1 + """integration tests for git provider support.""" 2 + 3 + import subprocess 4 + import tempfile 5 + from pathlib import Path 6 + 7 + import pytest 8 + 9 + pytestmark = pytest.mark.integration 10 + 11 + # repos known to exist on each provider with lexicons 12 + GIT_PROVIDER_REPOS = [ 13 + pytest.param( 14 + "https://github.com/bluesky-social/atproto.git", 15 + id="github-full-url", 16 + ), 17 + pytest.param( 18 + "bluesky-social/atproto", 19 + id="github-shorthand", 20 + ), 21 + pytest.param( 22 + "https://tangled.org/zzstoatzz.io/plyr.fm.git", 23 + id="tangled-full-url", 24 + ), 25 + pytest.param( 26 + "zzstoatzz.io/plyr.fm", 27 + id="tangled-shorthand", 28 + ), 29 + ] 30 + 31 + 32 + @pytest.mark.parametrize("source", GIT_PROVIDER_REPOS) 33 + def test_clone_and_generate(source: str) -> None: 34 + """test cloning and generating from various git sources.""" 35 + with tempfile.TemporaryDirectory() as tmp: 36 + output_dir = Path(tmp) / "output" 37 + result = subprocess.run( 38 + ["uv", "run", "pmgfal", source, "-o", str(output_dir), "--no-cache"], 39 + capture_output=True, 40 + text=True, 41 + timeout=120, 42 + ) 43 + assert result.returncode == 0, f"failed: {result.stderr}" 44 + assert output_dir.exists() 45 + py_files = list(output_dir.glob("*.py")) 46 + assert len(py_files) > 0, "no python files generated" 47 + 48 + 49 + def test_shorthand_fallback_on_nonexistent() -> None: 50 + """test that shorthand tries providers in order and fails gracefully.""" 51 + result = subprocess.run( 52 + [ 53 + "uv", 54 + "run", 55 + "pmgfal", 56 + "nonexistent-owner-12345/nonexistent-repo-67890", 57 + "-o", 58 + "/tmp/test", 59 + ], 60 + capture_output=True, 61 + text=True, 62 + timeout=120, 63 + ) 64 + assert result.returncode == 1 65 + output = result.stdout + result.stderr 66 + assert "trying github" in output 67 + assert "trying tangled" in output 68 + assert "could not find" in output 69 + 70 + 71 + def test_invalid_source_not_directory() -> None: 72 + """test error on invalid local path.""" 73 + result = subprocess.run( 74 + ["uv", "run", "pmgfal", "/nonexistent/path/to/lexicons", "-o", "/tmp/test"], 75 + capture_output=True, 76 + text=True, 77 + ) 78 + assert result.returncode == 1 79 + output = result.stdout + result.stderr 80 + assert "not a directory" in output 81 + 82 + 83 + def test_plyr_fm_generates_valid_models() -> None: 84 + """test that plyr.fm lexicons generate importable pydantic models.""" 85 + with tempfile.TemporaryDirectory() as tmp: 86 + output_dir = Path(tmp) / "output" 87 + 88 + # generate models from plyr.fm 89 + result = subprocess.run( 90 + [ 91 + "uv", 92 + "run", 93 + "pmgfal", 94 + "zzstoatzz.io/plyr.fm", 95 + "-o", 96 + str(output_dir), 97 + "-p", 98 + "fm.plyr", 99 + "--no-cache", 100 + ], 101 + capture_output=True, 102 + text=True, 103 + timeout=120, 104 + ) 105 + assert result.returncode == 0, ( 106 + f"generation failed: {result.stdout}{result.stderr}" 107 + ) 108 + 109 + # find the generated file 110 + py_files = list(output_dir.glob("*.py")) 111 + assert len(py_files) == 1, f"expected 1 file, got {py_files}" 112 + 113 + # read and validate the generated code 114 + content = py_files[0].read_text() 115 + 116 + # should have pydantic imports 117 + assert "from pydantic import BaseModel" in content 118 + 119 + # should have fm.plyr models (check for common ones) 120 + assert "class FmPlyr" in content 121 + 122 + # compile the generated code to check for syntax errors 123 + compile(content, py_files[0].name, "exec")