A better Rust ATProto crate

reworking schema stuff

Orual 16691f29 988f0eed

+64 -23
Cargo.lock
··· 1295 1295 checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" 1296 1296 dependencies = [ 1297 1297 "libc", 1298 - "windows-sys 0.59.0", 1298 + "windows-sys 0.61.2", 1299 1299 ] 1300 1300 1301 1301 [[package]] ··· 1913 1913 "libc", 1914 1914 "percent-encoding", 1915 1915 "pin-project-lite", 1916 - "socket2 0.5.10", 1916 + "socket2 0.6.1", 1917 1917 "system-configuration", 1918 1918 "tokio", 1919 1919 "tower-service", ··· 1933 1933 "js-sys", 1934 1934 "log", 1935 1935 "wasm-bindgen", 1936 - "windows-core", 1936 + "windows-core 0.62.2", 1937 1937 ] 1938 1938 1939 1939 [[package]] ··· 2288 2288 [[package]] 2289 2289 name = "jacquard-api" 2290 2290 version = "0.8.0" 2291 - source = "git+https://tangled.org/@nonbinary.computer/jacquard#4c31392df7f719950dabd64ef8e7c9cf84ba4212" 2291 + source = "git+https://tangled.org/@nonbinary.computer/jacquard#988f0eedfc499d0e2cdd667f6adab086465984e2" 2292 2292 dependencies = [ 2293 2293 "bon", 2294 2294 "bytes", ··· 2378 2378 [[package]] 2379 2379 name = "jacquard-common" 2380 2380 version = "0.8.0" 2381 - source = "git+https://tangled.org/@nonbinary.computer/jacquard#4c31392df7f719950dabd64ef8e7c9cf84ba4212" 2381 + source = "git+https://tangled.org/@nonbinary.computer/jacquard#988f0eedfc499d0e2cdd667f6adab086465984e2" 2382 2382 dependencies = [ 2383 2383 "base64 0.22.1", 2384 2384 "bon", ··· 2431 2431 [[package]] 2432 2432 name = "jacquard-derive" 2433 2433 version = "0.8.0" 2434 - source = "git+https://tangled.org/@nonbinary.computer/jacquard#4c31392df7f719950dabd64ef8e7c9cf84ba4212" 2434 + source = "git+https://tangled.org/@nonbinary.computer/jacquard#988f0eedfc499d0e2cdd667f6adab086465984e2" 2435 2435 dependencies = [ 2436 2436 "heck 0.5.0", 2437 2437 "jacquard-lexicon 0.8.0 (git+https://tangled.org/@nonbinary.computer/jacquard)", ··· 2468 2468 [[package]] 2469 2469 name = "jacquard-identity" 2470 2470 version = "0.8.0" 2471 - source = "git+https://tangled.org/@nonbinary.computer/jacquard#4c31392df7f719950dabd64ef8e7c9cf84ba4212" 2471 + source = "git+https://tangled.org/@nonbinary.computer/jacquard#988f0eedfc499d0e2cdd667f6adab086465984e2" 2472 2472 dependencies = [ 2473 2473 "bon", 2474 2474 "bytes", ··· 2508 2508 "reqwest", 2509 2509 "serde", 2510 2510 "serde_json", 2511 + "syn 2.0.108", 2511 2512 "tempfile", 2512 2513 "thiserror 2.0.17", 2513 2514 "tokio", 2515 + "toml", 2514 2516 "walkdir", 2515 2517 ] 2516 2518 ··· 2540 2542 [[package]] 2541 2543 name = "jacquard-lexicon" 2542 2544 version = "0.8.0" 2543 - source = "git+https://tangled.org/@nonbinary.computer/jacquard#4c31392df7f719950dabd64ef8e7c9cf84ba4212" 2545 + source = "git+https://tangled.org/@nonbinary.computer/jacquard#988f0eedfc499d0e2cdd667f6adab086465984e2" 2544 2546 dependencies = [ 2545 2547 "glob", 2546 2548 "heck 0.5.0", ··· 3152 3154 source = "registry+https://github.com/rust-lang/crates.io-index" 3153 3155 checksum = "7957b9740744892f114936ab4a57b3f487491bbeafaf8083688b16841a4240e5" 3154 3156 dependencies = [ 3155 - "windows-sys 0.59.0", 3157 + "windows-sys 0.61.2", 3156 3158 ] 3157 3159 3158 3160 [[package]] ··· 3697 3699 "quinn-udp", 3698 3700 "rustc-hash", 3699 3701 "rustls", 3700 - "socket2 0.5.10", 3702 + "socket2 0.6.1", 3701 3703 "thiserror 2.0.17", 3702 3704 "tokio", 3703 3705 "tracing", ··· 3734 3736 "cfg_aliases", 3735 3737 "libc", 3736 3738 "once_cell", 3737 - "socket2 0.5.10", 3739 + "socket2 0.6.1", 3738 3740 "tracing", 3739 - "windows-sys 0.59.0", 3741 + "windows-sys 0.60.2", 3740 3742 ] 3741 3743 3742 3744 [[package]] ··· 4155 4157 "errno", 4156 4158 "libc", 4157 4159 "linux-raw-sys 0.11.0", 4158 - "windows-sys 0.59.0", 4160 + "windows-sys 0.61.2", 4159 4161 ] 4160 4162 4161 4163 [[package]] ··· 4408 4410 source = "registry+https://github.com/rust-lang/crates.io-index" 4409 4411 checksum = "402a6f66d8c709116cf22f558eab210f5a50187f702eb4d7e5ef38d9a7f1c79c" 4410 4412 dependencies = [ 4413 + "indexmap 2.12.0", 4411 4414 "itoa", 4412 4415 "memchr", 4413 4416 "ryu", ··· 4818 4821 "getrandom 0.3.4", 4819 4822 "once_cell", 4820 4823 "rustix 1.1.2", 4821 - "windows-sys 0.59.0", 4824 + "windows-sys 0.61.2", 4822 4825 ] 4823 4826 4824 4827 [[package]] ··· 5132 5135 "serde", 5133 5136 "serde_spanned", 5134 5137 "toml_datetime", 5138 + "toml_write", 5135 5139 "winnow 0.7.13", 5136 5140 ] 5137 5141 5138 5142 [[package]] 5143 + name = "toml_write" 5144 + version = "0.1.2" 5145 + source = "registry+https://github.com/rust-lang/crates.io-index" 5146 + checksum = "5d99f8c9a7727884afe522e9bd5edbfc91a3312b36a77b5fb8926e4c31a41801" 5147 + 5148 + [[package]] 5139 5149 name = "tower" 5140 5150 version = "0.5.2" 5141 5151 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 5689 5699 source = "registry+https://github.com/rust-lang/crates.io-index" 5690 5700 checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" 5691 5701 dependencies = [ 5692 - "windows-sys 0.48.0", 5702 + "windows-sys 0.61.2", 5693 5703 ] 5694 5704 5695 5705 [[package]] ··· 5705 5715 checksum = "9babd3a767a4c1aef6900409f85f5d53ce2544ccdfaa86dad48c91782c6d6893" 5706 5716 dependencies = [ 5707 5717 "windows-collections", 5708 - "windows-core", 5718 + "windows-core 0.61.2", 5709 5719 "windows-future", 5710 5720 "windows-link 0.1.3", 5711 5721 "windows-numerics", ··· 5717 5727 source = "registry+https://github.com/rust-lang/crates.io-index" 5718 5728 checksum = "3beeceb5e5cfd9eb1d76b381630e82c4241ccd0d27f1a39ed41b2760b255c5e8" 5719 5729 dependencies = [ 5720 - "windows-core", 5730 + "windows-core 0.61.2", 5721 5731 ] 5722 5732 5723 5733 [[package]] ··· 5729 5739 "windows-implement", 5730 5740 "windows-interface", 5731 5741 "windows-link 0.1.3", 5732 - "windows-result", 5733 - "windows-strings", 5742 + "windows-result 0.3.4", 5743 + "windows-strings 0.4.2", 5744 + ] 5745 + 5746 + [[package]] 5747 + name = "windows-core" 5748 + version = "0.62.2" 5749 + source = "registry+https://github.com/rust-lang/crates.io-index" 5750 + checksum = "b8e83a14d34d0623b51dce9581199302a221863196a1dde71a7663a4c2be9deb" 5751 + dependencies = [ 5752 + "windows-implement", 5753 + "windows-interface", 5754 + "windows-link 0.2.1", 5755 + "windows-result 0.4.1", 5756 + "windows-strings 0.5.1", 5734 5757 ] 5735 5758 5736 5759 [[package]] ··· 5739 5762 source = "registry+https://github.com/rust-lang/crates.io-index" 5740 5763 checksum = "fc6a41e98427b19fe4b73c550f060b59fa592d7d686537eebf9385621bfbad8e" 5741 5764 dependencies = [ 5742 - "windows-core", 5765 + "windows-core 0.61.2", 5743 5766 "windows-link 0.1.3", 5744 5767 "windows-threading", 5745 5768 ] ··· 5784 5807 source = "registry+https://github.com/rust-lang/crates.io-index" 5785 5808 checksum = "9150af68066c4c5c07ddc0ce30421554771e528bde427614c61038bc2c92c2b1" 5786 5809 dependencies = [ 5787 - "windows-core", 5810 + "windows-core 0.61.2", 5788 5811 "windows-link 0.1.3", 5789 5812 ] 5790 5813 ··· 5795 5818 checksum = "5b8a9ed28765efc97bbc954883f4e6796c33a06546ebafacbabee9696967499e" 5796 5819 dependencies = [ 5797 5820 "windows-link 0.1.3", 5798 - "windows-result", 5799 - "windows-strings", 5821 + "windows-result 0.3.4", 5822 + "windows-strings 0.4.2", 5800 5823 ] 5801 5824 5802 5825 [[package]] ··· 5809 5832 ] 5810 5833 5811 5834 [[package]] 5835 + name = "windows-result" 5836 + version = "0.4.1" 5837 + source = "registry+https://github.com/rust-lang/crates.io-index" 5838 + checksum = "7781fa89eaf60850ac3d2da7af8e5242a5ea78d1a11c49bf2910bb5a73853eb5" 5839 + dependencies = [ 5840 + "windows-link 0.2.1", 5841 + ] 5842 + 5843 + [[package]] 5812 5844 name = "windows-strings" 5813 5845 version = "0.4.2" 5814 5846 source = "registry+https://github.com/rust-lang/crates.io-index" 5815 5847 checksum = "56e6c93f3a0c3b36176cb1327a4958a0353d5d166c2a35cb268ace15e91d3b57" 5816 5848 dependencies = [ 5817 5849 "windows-link 0.1.3", 5850 + ] 5851 + 5852 + [[package]] 5853 + name = "windows-strings" 5854 + version = "0.5.1" 5855 + source = "registry+https://github.com/rust-lang/crates.io-index" 5856 + checksum = "7837d08f69c77cf6b07689544538e017c1bfcf57e34b4c0ff58e6c2cd3b37091" 5857 + dependencies = [ 5858 + "windows-link 0.2.1", 5818 5859 ] 5819 5860 5820 5861 [[package]]
+3 -1
crates/jacquard-lexgen/Cargo.toml
··· 37 37 miette = { workspace = true, features = ["fancy"] } 38 38 reqwest = { workspace = true, features = ["json", "http2", "system-proxy", "rustls-tls"] } 39 39 serde.workspace = true 40 - serde_json.workspace = true 40 + serde_json = { workspace = true, features = ["preserve_order"] } 41 + syn.workspace = true 41 42 tempfile = "3.23" 42 43 thiserror.workspace = true 43 44 tokio = { workspace = true, features = ["full"] } 45 + toml = "0.8" 44 46 walkdir = "2.5" 45 47 46 48 [dev-dependencies]
+117
crates/jacquard-lexgen/SCHEMA_DISCOVERY.md
··· 1 + # Schema Discovery Approaches 2 + 3 + Jacquard provides two complementary approaches for discovering lexicon schemas from Rust types: 4 + 5 + ## 1. Inventory-Based Discovery (Link-Time) 6 + 7 + **Module:** `schema_extraction` 8 + 9 + Uses the `inventory` crate to collect schema types at link time. 10 + 11 + ### Pros 12 + - ✅ Fast - schemas already in memory 13 + - ✅ Works with compiled dependencies 14 + - ✅ No parsing overhead 15 + - ✅ Guaranteed to match compiled code 16 + 17 + ### Cons 18 + - ❌ Only discovers types that are **linked** into the binary 19 + - ❌ Requires creating a custom binary that imports your types 20 + - ❌ Won't see unused types that the linker removes 21 + 22 + ### Usage 23 + 24 + ```rust 25 + // bin/extract_schemas.rs 26 + use jacquard_lexgen::schema_extraction; 27 + use my_app::models::*; // ← Must import to link 28 + 29 + fn main() -> miette::Result<()> { 30 + schema_extraction::run("lexicons", true) 31 + } 32 + ``` 33 + 34 + ### Best For 35 + - Extracting schemas from your own crate 36 + - When you already have types imported/used 37 + - Production builds where you want to match exactly what's compiled 38 + 39 + ## 2. Workspace Discovery (Source Scanning) 40 + 41 + **Module:** `schema_discovery` 42 + 43 + Parses workspace source files directly using `syn`. 44 + 45 + ### Pros 46 + - ✅ Discovers **all** types in workspace 47 + - ✅ No linking required 48 + - ✅ Works across workspace members 49 + - ✅ Sees types even if they're not used 50 + 51 + ### Cons 52 + - ❌ Slower - parses all .rs files 53 + - ❌ Doesn't work with binary dependencies 54 + - ❌ Must re-parse source on every run 55 + 56 + ### Usage 57 + 58 + ```rust 59 + use jacquard_lexgen::schema_discovery::WorkspaceDiscovery; 60 + 61 + fn main() -> miette::Result<()> { 62 + let schemas = WorkspaceDiscovery::new() 63 + .verbose(true) 64 + .scan()?; 65 + 66 + for schema in schemas { 67 + println!("{}: {}", schema.nsid, schema.type_name); 68 + } 69 + 70 + Ok(()) 71 + } 72 + ``` 73 + 74 + ### Best For 75 + - Workspace-wide schema auditing 76 + - Finding all schema types regardless of usage 77 + - Development workflows where you want comprehensive discovery 78 + - When you don't want to maintain import lists 79 + 80 + ## Comparison 81 + 82 + | Feature | Inventory | Workspace Scan | 83 + |---------|-----------|----------------| 84 + | Speed | Fast (runtime) | Slower (parsing) | 85 + | Coverage | Linked types only | All types in workspace | 86 + | Binary deps | ✅ Yes | ❌ No | 87 + | Unused types | ❌ No | ✅ Yes | 88 + | Workspace-wide | ❌ No | ✅ Yes | 89 + | Setup complexity | Medium (need imports) | Low (just run) | 90 + 91 + ## Hybrid Approach 92 + 93 + For best results, use both: 94 + 95 + 1. **Development:** Use workspace scan for comprehensive discovery 96 + 2. **CI/Production:** Use inventory for fast, exact extraction 97 + 98 + ```bash 99 + # Development: find all schemas 100 + cargo run --example workspace_discovery 101 + 102 + # Production: extract linked schemas 103 + cargo run --bin extract-schemas 104 + ``` 105 + 106 + ## Future: Schema Generation 107 + 108 + Phase 3 currently only **discovers** schemas. A future enhancement could combine 109 + workspace discovery with the derive macro's schema generation logic to actually 110 + **generate** lexicon JSON without needing to link anything. 111 + 112 + This would require: 113 + - Extracting schema generation logic from the derive macro 114 + - Calling it directly from the scanner 115 + - Managing dependencies between schema types 116 + 117 + Tracked in: [Issue #TBD]
+50
crates/jacquard-lexgen/examples/workspace_discovery.rs
··· 1 + #!/usr/bin/env cargo 2 + //! Example: Discover schemas across the workspace without link-time discovery 3 + //! 4 + //! Run with: cargo run --example workspace_discovery 5 + 6 + use jacquard_lexgen::schema_discovery::WorkspaceDiscovery; 7 + 8 + fn main() -> miette::Result<()> { 9 + println!("Workspace Schema Discovery Example\n"); 10 + 11 + // Create workspace discovery 12 + let discovery = WorkspaceDiscovery::new().verbose(true); 13 + 14 + // Scan workspace 15 + let schemas = discovery.scan()?; 16 + 17 + println!("\n━━━ Results ━━━"); 18 + println!("Discovered {} schema types:\n", schemas.len()); 19 + 20 + // Group by crate 21 + use std::collections::HashMap; 22 + let mut by_crate: HashMap<String, Vec<_>> = HashMap::new(); 23 + 24 + for schema in &schemas { 25 + let crate_name = schema 26 + .source_path 27 + .components() 28 + .find_map(|c| { 29 + let s = c.as_os_str().to_str()?; 30 + if s.starts_with("jacquard-") || s == "jacquard" { 31 + Some(s.to_string()) 32 + } else { 33 + None 34 + } 35 + }) 36 + .unwrap_or_else(|| "unknown".to_string()); 37 + 38 + by_crate.entry(crate_name).or_default().push(schema); 39 + } 40 + 41 + for (crate_name, crate_schemas) in by_crate { 42 + println!("📦 {} ({} schemas)", crate_name, crate_schemas.len()); 43 + for schema in crate_schemas { 44 + println!(" • {} ({})", schema.nsid, schema.type_name); 45 + } 46 + println!(); 47 + } 48 + 49 + Ok(()) 50 + }
+20 -1
crates/jacquard-lexgen/src/bin/extract_schemas.rs
··· 1 + //! Extract AT Protocol lexicon schemas from compiled Rust types 2 + //! 3 + //! This binary discovers types with `#[derive(LexiconSchema)]` via inventory 4 + //! and generates lexicon JSON files. See the `schema_extraction` module docs 5 + //! for usage patterns and integration examples. 6 + 1 7 use clap::Parser; 2 - use jacquard_lexgen::schema_extraction::{ExtractOptions, SchemaExtractor}; 8 + use jacquard_lexgen::schema_extraction::{self, ExtractOptions, SchemaExtractor}; 3 9 use miette::Result; 4 10 5 11 /// Extract lexicon schemas from compiled Rust types 6 12 #[derive(Parser, Debug)] 7 13 #[command(name = "extract-schemas")] 8 14 #[command(about = "Extract AT Protocol lexicon schemas from Rust types")] 15 + #[command(long_about = r#" 16 + Discovers types implementing LexiconSchema via inventory and generates 17 + lexicon JSON files. The binary only discovers types that are linked, 18 + so you need to import your schema types in this binary or a custom one. 19 + 20 + See: https://docs.rs/jacquard-lexgen/latest/jacquard_lexgen/schema_extraction/ 21 + "#)] 9 22 struct Args { 10 23 /// Output directory for generated schema files 11 24 #[arg(short, long, default_value = "lexicons")] ··· 35 48 fn main() -> Result<()> { 36 49 let args = Args::parse(); 37 50 51 + // Simple case: use convenience function 52 + if !args.watch && args.filter.is_none() && args.validate && args.pretty { 53 + return schema_extraction::run(&args.output, args.verbose); 54 + } 55 + 56 + // Advanced case: use full options 38 57 let options = ExtractOptions { 39 58 output_dir: args.output.into(), 40 59 verbose: args.verbose,
+3 -1
crates/jacquard-lexgen/src/lib.rs
··· 29 29 //! 30 30 //! - [`fetch`] - Ingests lexicons from git, atproto, http fetch, and other sources 31 31 //! - [`cli`] - CLI argument parsing utilities 32 - //! - [`schema_extraction`] - Extract lexicon schemas from Rust types via inventory 32 + //! - [`schema_extraction`] - Extract lexicon schemas from Rust types via inventory (link-time discovery) 33 + //! - [`schema_discovery`] - Discover schemas by scanning workspace source files (no linking required) 33 34 34 35 pub mod cli; 35 36 pub mod fetch; 37 + pub mod schema_discovery; 36 38 pub mod schema_extraction; 37 39 #[cfg(any(test, debug_assertions))] 38 40 pub mod test_schemas;
+339
crates/jacquard-lexgen/src/schema_discovery.rs
··· 1 + //! # Workspace Schema Discovery 2 + //! 3 + //! Alternative to inventory-based discovery that scans workspace source files directly. 4 + //! This avoids the link-time limitation and can discover schemas across the entire workspace. 5 + //! 6 + //! ## Usage 7 + //! 8 + //! ```rust,no_run 9 + //! use jacquard_lexgen::schema_discovery::WorkspaceDiscovery; 10 + //! 11 + //! fn main() -> miette::Result<()> { 12 + //! // Discover all schemas in workspace 13 + //! let schemas = WorkspaceDiscovery::new() 14 + //! .scan()?; 15 + //! 16 + //! println!("Found {} schemas", schemas.len()); 17 + //! 18 + //! for schema in schemas { 19 + //! println!(" {}: {}", schema.nsid, schema.source_path.display()); 20 + //! } 21 + //! 22 + //! Ok(()) 23 + //! } 24 + //! ``` 25 + 26 + use miette::{IntoDiagnostic, Result}; 27 + use std::path::{Path, PathBuf}; 28 + use syn::{Attribute, Item}; 29 + 30 + /// Discovered schema type 31 + #[derive(Debug, Clone)] 32 + pub struct DiscoveredSchema { 33 + /// The NSID from the lexicon attribute 34 + pub nsid: String, 35 + /// Source file containing this type 36 + pub source_path: PathBuf, 37 + /// The type name 38 + pub type_name: String, 39 + /// Whether this is a struct or enum 40 + pub kind: SchemaKind, 41 + /// Parsed lexicon attributes 42 + pub attributes: LexiconAttributes, 43 + } 44 + 45 + #[derive(Debug, Clone, PartialEq)] 46 + pub enum SchemaKind { 47 + Struct, 48 + Enum, 49 + } 50 + 51 + /// Parsed lexicon attributes from source 52 + #[derive(Debug, Clone, Default)] 53 + pub struct LexiconAttributes { 54 + pub nsid: Option<String>, 55 + pub fragment: Option<String>, 56 + pub record: bool, 57 + pub query: bool, 58 + pub procedure: bool, 59 + pub key: Option<String>, 60 + } 61 + 62 + /// Workspace schema discovery via source scanning 63 + pub struct WorkspaceDiscovery { 64 + workspace_root: PathBuf, 65 + verbose: bool, 66 + } 67 + 68 + impl WorkspaceDiscovery { 69 + /// Create a new workspace discovery 70 + /// 71 + /// Defaults to current directory as workspace root. 72 + pub fn new() -> Self { 73 + Self { 74 + workspace_root: std::env::current_dir().unwrap_or_else(|_| PathBuf::from(".")), 75 + verbose: false, 76 + } 77 + } 78 + 79 + /// Set the workspace root directory 80 + pub fn workspace_root(mut self, path: impl Into<PathBuf>) -> Self { 81 + self.workspace_root = path.into(); 82 + self 83 + } 84 + 85 + /// Enable verbose output 86 + pub fn verbose(mut self, verbose: bool) -> Self { 87 + self.verbose = verbose; 88 + self 89 + } 90 + 91 + /// Scan workspace for schema types 92 + pub fn scan(&self) -> Result<Vec<DiscoveredSchema>> { 93 + if self.verbose { 94 + println!("Scanning workspace at {}", self.workspace_root.display()); 95 + } 96 + 97 + let members = self.find_workspace_members()?; 98 + 99 + if self.verbose { 100 + println!("Found {} workspace members", members.len()); 101 + } 102 + 103 + let mut schemas = Vec::new(); 104 + 105 + for member in members { 106 + if self.verbose { 107 + println!("Scanning member: {}", member.display()); 108 + } 109 + 110 + let member_schemas = self.scan_member(&member)?; 111 + schemas.extend(member_schemas); 112 + } 113 + 114 + if self.verbose { 115 + println!("Discovered {} total schemas", schemas.len()); 116 + } 117 + 118 + Ok(schemas) 119 + } 120 + 121 + /// Find workspace members by parsing Cargo.toml 122 + fn find_workspace_members(&self) -> Result<Vec<PathBuf>> { 123 + let cargo_toml = self.workspace_root.join("Cargo.toml"); 124 + 125 + if !cargo_toml.exists() { 126 + return Err(miette::miette!( 127 + "Cargo.toml not found at {}", 128 + cargo_toml.display() 129 + )); 130 + } 131 + 132 + let contents = std::fs::read_to_string(&cargo_toml).into_diagnostic()?; 133 + let manifest: toml::Value = toml::from_str(&contents).into_diagnostic()?; 134 + 135 + let mut members = Vec::new(); 136 + 137 + // Check if this is a workspace 138 + if let Some(workspace) = manifest.get("workspace") { 139 + if let Some(member_list) = workspace.get("members").and_then(|v| v.as_array()) { 140 + for member in member_list { 141 + if let Some(pattern) = member.as_str() { 142 + // Handle glob patterns like "crates/*" 143 + let glob_pattern = self.workspace_root.join(pattern).join("Cargo.toml"); 144 + 145 + for entry in glob::glob(glob_pattern.to_str().unwrap()).into_diagnostic()? { 146 + let path = entry.into_diagnostic()?; 147 + if let Some(dir) = path.parent() { 148 + members.push(dir.to_path_buf()); 149 + } 150 + } 151 + } 152 + } 153 + } 154 + } else { 155 + // Not a workspace - just this crate 156 + members.push(self.workspace_root.clone()); 157 + } 158 + 159 + Ok(members) 160 + } 161 + 162 + /// Scan a single workspace member 163 + fn scan_member(&self, member_path: &Path) -> Result<Vec<DiscoveredSchema>> { 164 + let src_dir = member_path.join("src"); 165 + 166 + if !src_dir.exists() { 167 + return Ok(Vec::new()); 168 + } 169 + 170 + let mut schemas = Vec::new(); 171 + 172 + // Recursively scan all .rs files 173 + for entry in walkdir::WalkDir::new(&src_dir) 174 + .into_iter() 175 + .filter_map(|e| e.ok()) 176 + { 177 + if entry.path().extension().and_then(|s| s.to_str()) == Some("rs") { 178 + if let Ok(file_schemas) = self.scan_file(entry.path()) { 179 + schemas.extend(file_schemas); 180 + } 181 + } 182 + } 183 + 184 + Ok(schemas) 185 + } 186 + 187 + /// Scan a single Rust source file 188 + fn scan_file(&self, path: &Path) -> Result<Vec<DiscoveredSchema>> { 189 + let contents = std::fs::read_to_string(path).into_diagnostic()?; 190 + let file = syn::parse_file(&contents).into_diagnostic()?; 191 + 192 + let mut schemas = Vec::new(); 193 + 194 + for item in file.items { 195 + match item { 196 + Item::Struct(item_struct) => { 197 + if let Some(schema) = self.extract_schema_from_attrs( 198 + &item_struct.attrs, 199 + &item_struct.ident.to_string(), 200 + SchemaKind::Struct, 201 + path, 202 + )? { 203 + schemas.push(schema); 204 + } 205 + } 206 + Item::Enum(item_enum) => { 207 + if let Some(schema) = self.extract_schema_from_attrs( 208 + &item_enum.attrs, 209 + &item_enum.ident.to_string(), 210 + SchemaKind::Enum, 211 + path, 212 + )? { 213 + schemas.push(schema); 214 + } 215 + } 216 + _ => {} 217 + } 218 + } 219 + 220 + Ok(schemas) 221 + } 222 + 223 + /// Extract schema info from attributes 224 + fn extract_schema_from_attrs( 225 + &self, 226 + attrs: &[Attribute], 227 + type_name: &str, 228 + kind: SchemaKind, 229 + source_path: &Path, 230 + ) -> Result<Option<DiscoveredSchema>> { 231 + // Check if this type has #[derive(LexiconSchema)] 232 + let has_derive = attrs.iter().any(|attr| { 233 + if attr.path().is_ident("derive") { 234 + if let Ok(meta) = attr.parse_args::<syn::Meta>() { 235 + if let syn::Meta::Path(path) = meta { 236 + return path.is_ident("LexiconSchema"); 237 + } 238 + } 239 + } 240 + false 241 + }); 242 + 243 + if !has_derive { 244 + return Ok(None); 245 + } 246 + 247 + // Parse #[lexicon(...)] attributes 248 + let mut lex_attrs = LexiconAttributes::default(); 249 + 250 + for attr in attrs { 251 + if attr.path().is_ident("lexicon") { 252 + attr.parse_nested_meta(|meta| { 253 + if meta.path.is_ident("nsid") { 254 + let value = meta.value()?; 255 + let lit: syn::LitStr = value.parse()?; 256 + lex_attrs.nsid = Some(lit.value()); 257 + } else if meta.path.is_ident("fragment") { 258 + if meta.input.peek(syn::Token![=]) { 259 + let value = meta.value()?; 260 + let lit: syn::LitStr = value.parse()?; 261 + lex_attrs.fragment = Some(lit.value()); 262 + } else { 263 + lex_attrs.fragment = Some(String::new()); 264 + } 265 + } else if meta.path.is_ident("record") { 266 + lex_attrs.record = true; 267 + } else if meta.path.is_ident("query") { 268 + lex_attrs.query = true; 269 + } else if meta.path.is_ident("procedure") { 270 + lex_attrs.procedure = true; 271 + } else if meta.path.is_ident("key") { 272 + let value = meta.value()?; 273 + let lit: syn::LitStr = value.parse()?; 274 + lex_attrs.key = Some(lit.value()); 275 + } 276 + Ok(()) 277 + }).into_diagnostic()?; 278 + } 279 + } 280 + 281 + let nsid = lex_attrs.nsid.clone().ok_or_else(|| { 282 + miette::miette!( 283 + "Type {} has #[derive(LexiconSchema)] but no nsid attribute", 284 + type_name 285 + ) 286 + })?; 287 + 288 + Ok(Some(DiscoveredSchema { 289 + nsid, 290 + source_path: source_path.to_path_buf(), 291 + type_name: type_name.to_string(), 292 + kind, 293 + attributes: lex_attrs, 294 + })) 295 + } 296 + } 297 + 298 + impl Default for WorkspaceDiscovery { 299 + fn default() -> Self { 300 + Self::new() 301 + } 302 + } 303 + 304 + #[cfg(test)] 305 + mod tests { 306 + use super::*; 307 + 308 + #[test] 309 + fn test_parse_simple_struct() { 310 + let code = r#" 311 + #[derive(LexiconSchema)] 312 + #[lexicon(nsid = "com.example.test", record)] 313 + struct Test { 314 + field: String, 315 + } 316 + "#; 317 + 318 + let file = syn::parse_file(code).unwrap(); 319 + let discovery = WorkspaceDiscovery::new(); 320 + 321 + if let Item::Struct(item_struct) = &file.items[0] { 322 + let schema = discovery 323 + .extract_schema_from_attrs( 324 + &item_struct.attrs, 325 + "Test", 326 + SchemaKind::Struct, 327 + Path::new("test.rs"), 328 + ) 329 + .unwrap() 330 + .unwrap(); 331 + 332 + assert_eq!(schema.nsid, "com.example.test"); 333 + assert_eq!(schema.type_name, "Test"); 334 + assert!(schema.attributes.record); 335 + } else { 336 + panic!("Expected struct"); 337 + } 338 + } 339 + }
+169 -7
crates/jacquard-lexgen/src/schema_extraction.rs
··· 1 + //! # Schema Extraction 2 + //! 3 + //! Extract AT Protocol lexicon schemas from Rust types via `inventory` discovery. 4 + //! 5 + //! ## Usage Pattern 6 + //! 7 + //! This module provides schema extraction for types implementing `LexiconSchema`. 8 + //! The extraction binary discovers schemas at **link time** via `inventory`, so you need 9 + //! to create a binary in your workspace that links your schema types. 10 + //! 11 + //! ### Simple Usage 12 + //! 13 + //! ```rust,no_run 14 + //! // bin/extract_schemas.rs 15 + //! use jacquard_lexgen::schema_extraction; 16 + //! 17 + //! // Import your types so they get linked 18 + //! use my_app::models::*; 19 + //! 20 + //! fn main() -> miette::Result<()> { 21 + //! schema_extraction::run( 22 + //! "lexicons", // output directory 23 + //! true, // verbose 24 + //! ) 25 + //! } 26 + //! ``` 27 + //! 28 + //! ### Advanced Usage 29 + //! 30 + //! ```rust,no_run 31 + //! use jacquard_lexgen::schema_extraction::{ExtractOptions, SchemaExtractor}; 32 + //! use my_app::models::*; // Your schema types 33 + //! 34 + //! fn main() -> miette::Result<()> { 35 + //! let options = ExtractOptions { 36 + //! output_dir: "lexicons".into(), 37 + //! verbose: true, 38 + //! filter: Some("app.bsky".into()), // Only extract app.bsky.* schemas 39 + //! validate: true, 40 + //! pretty: true, 41 + //! }; 42 + //! 43 + //! SchemaExtractor::new(options).extract_all() 44 + //! } 45 + //! ``` 46 + //! 47 + //! ### Integration with Build Tools 48 + //! 49 + //! **Just:** 50 + //! ```justfile 51 + //! # Generate lexicon schemas from Rust types 52 + //! extract-schemas: 53 + //! cargo run --bin extract-schemas 54 + //! ``` 55 + //! 56 + //! **Cargo xtask:** 57 + //! ```rust,ignore 58 + //! // xtask/src/main.rs 59 + //! match args { 60 + //! "codegen" => { 61 + //! run_command("cargo", &["run", "--bin", "extract-schemas"])?; 62 + //! } 63 + //! } 64 + //! ``` 65 + //! 66 + //! **Pre-commit hook:** 67 + //! ```bash 68 + //! #!/bin/bash 69 + //! # Regenerate schemas when Rust files change 70 + //! if git diff --cached --name-only | grep -E '\.rs$'; then 71 + //! cargo run --bin extract-schemas 72 + //! git add lexicons/*.json 73 + //! fi 74 + //! ``` 75 + 1 76 use jacquard_lexicon::lexicon::LexiconDoc; 2 77 use jacquard_lexicon::schema::LexiconSchemaRef; 3 78 use miette::{IntoDiagnostic, Result}; 4 79 use std::collections::BTreeMap; 5 80 use std::fs; 6 - use std::path::PathBuf; 81 + use std::path::{Path, PathBuf}; 7 82 83 + /// Options for schema extraction 8 84 pub struct ExtractOptions { 85 + /// Output directory for generated schema files 9 86 pub output_dir: PathBuf, 87 + /// Enable verbose output 10 88 pub verbose: bool, 89 + /// Filter by NSID prefix (e.g., "app.bsky") 11 90 pub filter: Option<String>, 91 + /// Validate schemas before writing 12 92 pub validate: bool, 93 + /// Pretty-print JSON output 13 94 pub pretty: bool, 14 95 } 15 96 97 + impl Default for ExtractOptions { 98 + fn default() -> Self { 99 + Self { 100 + output_dir: PathBuf::from("lexicons"), 101 + verbose: false, 102 + filter: None, 103 + validate: true, 104 + pretty: true, 105 + } 106 + } 107 + } 108 + 109 + /// Run schema extraction with simple defaults 110 + /// 111 + /// Convenience function for the common case. For more control, use [`SchemaExtractor`]. 112 + /// 113 + /// # Arguments 114 + /// 115 + /// * `output_dir` - Directory to write schema files (will be created if needed) 116 + /// * `verbose` - Print progress information 117 + /// 118 + /// # Example 119 + /// 120 + /// ```rust,no_run 121 + /// use jacquard_lexgen::schema_extraction; 122 + /// use my_app::models::*; // Your types with #[derive(LexiconSchema)] 123 + /// 124 + /// fn main() -> miette::Result<()> { 125 + /// schema_extraction::run("lexicons", true) 126 + /// } 127 + /// ``` 128 + pub fn run(output_dir: impl AsRef<Path>, verbose: bool) -> Result<()> { 129 + let options = ExtractOptions { 130 + output_dir: output_dir.as_ref().to_path_buf(), 131 + verbose, 132 + ..Default::default() 133 + }; 134 + 135 + SchemaExtractor::new(options).extract_all() 136 + } 137 + 16 138 pub struct SchemaExtractor { 17 139 options: ExtractOptions, 18 140 } ··· 147 269 self.validate_schema(&final_doc)?; 148 270 } 149 271 150 - // Serialize to JSON 151 - let json = if self.options.pretty { 152 - serde_json::to_string_pretty(&final_doc).into_diagnostic()? 153 - } else { 154 - serde_json::to_string(&final_doc).into_diagnostic()? 155 - }; 272 + // Serialize to JSON with "main" def first 273 + let json = self.serialize_with_main_first(&final_doc)?; 156 274 157 275 // Write to file 158 276 let filename = base_nsid.replace('.', "_") + ".json"; ··· 203 321 println!("Watch mode not yet implemented"); 204 322 println!("Run with --help to see available options"); 205 323 Ok(()) 324 + } 325 + 326 + /// Serialize a lexicon doc with "main" def first 327 + fn serialize_with_main_first(&self, doc: &LexiconDoc) -> Result<String> { 328 + use serde_json::{json, Map, Value}; 329 + 330 + // Build defs map with main first 331 + let mut defs_map = Map::new(); 332 + 333 + // Insert main first if it exists 334 + if let Some(main_def) = doc.defs.get("main") { 335 + let main_value = serde_json::to_value(main_def).into_diagnostic()?; 336 + defs_map.insert("main".to_string(), main_value); 337 + } 338 + 339 + // Insert all other defs in sorted order 340 + for (name, def) in &doc.defs { 341 + if name != "main" { 342 + let def_value = serde_json::to_value(def).into_diagnostic()?; 343 + defs_map.insert(name.to_string(), def_value); 344 + } 345 + } 346 + 347 + // Build final JSON object 348 + let mut obj = Map::new(); 349 + obj.insert("lexicon".to_string(), json!(1)); 350 + obj.insert("id".to_string(), json!(doc.id.as_ref())); 351 + 352 + if let Some(rev) = &doc.revision { 353 + obj.insert("revision".to_string(), json!(rev)); 354 + } 355 + 356 + if let Some(desc) = &doc.description { 357 + obj.insert("description".to_string(), json!(desc)); 358 + } 359 + 360 + obj.insert("defs".to_string(), Value::Object(defs_map)); 361 + 362 + // Serialize with or without pretty printing 363 + if self.options.pretty { 364 + serde_json::to_string_pretty(&Value::Object(obj)).into_diagnostic() 365 + } else { 366 + serde_json::to_string(&Value::Object(obj)).into_diagnostic() 367 + } 206 368 } 207 369 } 208 370
+5 -286
crates/jacquard-lexicon/src/derive_impl/lexicon_schema.rs
··· 1 1 //! Implementation of #[derive(LexiconSchema)] macro 2 2 3 + use crate::schema::from_ast::{ 4 + LexiconFieldAttrs, LexiconTypeAttrs, LexiconTypeKind, RenameRule, SerdeAttrs, determine_nsid, 5 + extract_option_inner, parse_field_attrs, parse_serde_attrs, parse_serde_rename_all, 6 + parse_type_attrs, 7 + }; 3 8 use crate::schema::type_mapping::{LexiconPrimitiveType, StringFormat, rust_type_to_lexicon_type}; 4 9 use heck::{ToKebabCase, ToLowerCamelCase, ToPascalCase, ToShoutySnakeCase, ToSnakeCase}; 5 10 use proc_macro2::TokenStream; ··· 37 42 } 38 43 } 39 44 40 - /// Parsed lexicon attributes from type 41 - #[derive(Debug, Default)] 42 - struct LexiconTypeAttrs { 43 - /// NSID for this type (required for primary types) 44 - nsid: Option<String>, 45 - 46 - /// Fragment name (None = not a fragment, Some("") = infer from type name) 47 - fragment: Option<String>, 48 - 49 - /// Type kind 50 - kind: Option<LexiconTypeKind>, 51 - 52 - /// Record key type (for records) 53 - key: Option<String>, 54 - } 55 - 56 - #[derive(Debug, Clone, Copy)] 57 - enum LexiconTypeKind { 58 - Record, 59 - Query, 60 - Procedure, 61 - Subscription, 62 - Object, 63 - Union, 64 - } 65 - 66 - /// Parse type-level lexicon attributes 67 - fn parse_type_attrs(attrs: &[Attribute]) -> syn::Result<LexiconTypeAttrs> { 68 - let mut result = LexiconTypeAttrs::default(); 69 - 70 - for attr in attrs { 71 - if !attr.path().is_ident("lexicon") { 72 - continue; 73 - } 74 - 75 - attr.parse_nested_meta(|meta| { 76 - if meta.path.is_ident("nsid") { 77 - let value = meta.value()?; 78 - let lit: LitStr = value.parse()?; 79 - result.nsid = Some(lit.value()); 80 - Ok(()) 81 - } else if meta.path.is_ident("fragment") { 82 - // Two forms: #[lexicon(fragment)] or #[lexicon(fragment = "name")] 83 - if meta.input.peek(syn::Token![=]) { 84 - let value = meta.value()?; 85 - let lit: LitStr = value.parse()?; 86 - result.fragment = Some(lit.value()); 87 - } else { 88 - result.fragment = Some(String::new()); // Infer from type name 89 - } 90 - Ok(()) 91 - } else if meta.path.is_ident("record") { 92 - result.kind = Some(LexiconTypeKind::Record); 93 - Ok(()) 94 - } else if meta.path.is_ident("query") { 95 - result.kind = Some(LexiconTypeKind::Query); 96 - Ok(()) 97 - } else if meta.path.is_ident("procedure") { 98 - result.kind = Some(LexiconTypeKind::Procedure); 99 - Ok(()) 100 - } else if meta.path.is_ident("subscription") { 101 - result.kind = Some(LexiconTypeKind::Subscription); 102 - Ok(()) 103 - } else if meta.path.is_ident("key") { 104 - let value = meta.value()?; 105 - let lit: LitStr = value.parse()?; 106 - result.key = Some(lit.value()); 107 - Ok(()) 108 - } else { 109 - Err(meta.error("unknown lexicon attribute")) 110 - } 111 - })?; 112 - } 113 - 114 - Ok(result) 115 - } 116 - 117 - /// Parsed lexicon attributes from field 118 - #[derive(Debug, Default)] 119 - struct LexiconFieldAttrs { 120 - max_length: Option<usize>, 121 - max_graphemes: Option<usize>, 122 - min_length: Option<usize>, 123 - min_graphemes: Option<usize>, 124 - minimum: Option<i64>, 125 - maximum: Option<i64>, 126 - explicit_ref: Option<String>, 127 - format: Option<String>, 128 - } 129 - 130 - /// Parse field-level lexicon attributes 131 - fn parse_field_attrs(attrs: &[Attribute]) -> syn::Result<LexiconFieldAttrs> { 132 - let mut result = LexiconFieldAttrs::default(); 133 - 134 - for attr in attrs { 135 - if !attr.path().is_ident("lexicon") { 136 - continue; 137 - } 138 - 139 - attr.parse_nested_meta(|meta| { 140 - if meta.path.is_ident("max_length") { 141 - let value = meta.value()?; 142 - let lit: syn::LitInt = value.parse()?; 143 - result.max_length = Some(lit.base10_parse()?); 144 - Ok(()) 145 - } else if meta.path.is_ident("max_graphemes") { 146 - let value = meta.value()?; 147 - let lit: syn::LitInt = value.parse()?; 148 - result.max_graphemes = Some(lit.base10_parse()?); 149 - Ok(()) 150 - } else if meta.path.is_ident("min_length") { 151 - let value = meta.value()?; 152 - let lit: syn::LitInt = value.parse()?; 153 - result.min_length = Some(lit.base10_parse()?); 154 - Ok(()) 155 - } else if meta.path.is_ident("min_graphemes") { 156 - let value = meta.value()?; 157 - let lit: syn::LitInt = value.parse()?; 158 - result.min_graphemes = Some(lit.base10_parse()?); 159 - Ok(()) 160 - } else if meta.path.is_ident("minimum") { 161 - let value = meta.value()?; 162 - let lit: syn::LitInt = value.parse()?; 163 - result.minimum = Some(lit.base10_parse()?); 164 - Ok(()) 165 - } else if meta.path.is_ident("maximum") { 166 - let value = meta.value()?; 167 - let lit: syn::LitInt = value.parse()?; 168 - result.maximum = Some(lit.base10_parse()?); 169 - Ok(()) 170 - } else if meta.path.is_ident("ref") { 171 - let value = meta.value()?; 172 - let lit: LitStr = value.parse()?; 173 - result.explicit_ref = Some(lit.value()); 174 - Ok(()) 175 - } else if meta.path.is_ident("format") { 176 - let value = meta.value()?; 177 - let lit: LitStr = value.parse()?; 178 - result.format = Some(lit.value()); 179 - Ok(()) 180 - } else { 181 - Err(meta.error("unknown lexicon field attribute")) 182 - } 183 - })?; 184 - } 185 - 186 - Ok(result) 187 - } 188 - 189 - /// Parsed serde attributes relevant to lexicon schema 190 - #[derive(Debug, Default)] 191 - struct SerdeAttrs { 192 - rename: Option<String>, 193 - skip: bool, 194 - } 195 - 196 - /// Parse serde attributes for a field 197 - fn parse_serde_attrs(attrs: &[Attribute]) -> syn::Result<SerdeAttrs> { 198 - let mut result = SerdeAttrs::default(); 199 - 200 - for attr in attrs { 201 - if !attr.path().is_ident("serde") { 202 - continue; 203 - } 204 - 205 - attr.parse_nested_meta(|meta| { 206 - if meta.path.is_ident("rename") { 207 - let value = meta.value()?; 208 - let lit: LitStr = value.parse()?; 209 - result.rename = Some(lit.value()); 210 - Ok(()) 211 - } else if meta.path.is_ident("skip") { 212 - result.skip = true; 213 - Ok(()) 214 - } else { 215 - // Ignore other serde attributes 216 - Ok(()) 217 - } 218 - })?; 219 - } 220 - 221 - Ok(result) 222 - } 223 - 224 - /// Parse container-level serde rename_all 225 - fn parse_serde_rename_all(attrs: &[Attribute]) -> syn::Result<Option<RenameRule>> { 226 - for attr in attrs { 227 - if !attr.path().is_ident("serde") { 228 - continue; 229 - } 230 - 231 - let mut found_rule = None; 232 - attr.parse_nested_meta(|meta| { 233 - if meta.path.is_ident("rename_all") { 234 - let value = meta.value()?; 235 - let lit: LitStr = value.parse()?; 236 - found_rule = RenameRule::from_str(&lit.value()); 237 - Ok(()) 238 - } else { 239 - Ok(()) 240 - } 241 - })?; 242 - 243 - if found_rule.is_some() { 244 - return Ok(found_rule); 245 - } 246 - } 247 - 248 - // Default to camelCase (lexicon standard) 249 - Ok(Some(RenameRule::CamelCase)) 250 - } 251 - 252 - #[derive(Debug, Clone, Copy)] 253 - enum RenameRule { 254 - CamelCase, 255 - SnakeCase, 256 - PascalCase, 257 - ScreamingSnakeCase, 258 - KebabCase, 259 - } 260 - 261 - impl RenameRule { 262 - fn from_str(s: &str) -> Option<Self> { 263 - match s { 264 - "camelCase" => Some(RenameRule::CamelCase), 265 - "snake_case" => Some(RenameRule::SnakeCase), 266 - "PascalCase" => Some(RenameRule::PascalCase), 267 - "SCREAMING_SNAKE_CASE" => Some(RenameRule::ScreamingSnakeCase), 268 - "kebab-case" => Some(RenameRule::KebabCase), 269 - _ => None, 270 - } 271 - } 272 - 273 - fn apply(&self, input: &str) -> String { 274 - match self { 275 - RenameRule::CamelCase => input.to_lower_camel_case(), 276 - RenameRule::SnakeCase => input.to_snake_case(), 277 - RenameRule::PascalCase => input.to_pascal_case(), 278 - RenameRule::ScreamingSnakeCase => input.to_shouty_snake_case(), 279 - RenameRule::KebabCase => input.to_kebab_case(), 280 - } 281 - } 282 - } 283 - 284 - /// Determine NSID from attributes and context 285 - fn determine_nsid(attrs: &LexiconTypeAttrs, input: &DeriveInput) -> syn::Result<String> { 286 - // Explicit NSID in lexicon attribute 287 - if let Some(nsid) = &attrs.nsid { 288 - return Ok(nsid.clone()); 289 - } 290 - 291 - // Fragment - need to find module NSID (not implemented yet) 292 - if attrs.fragment.is_some() { 293 - return Err(syn::Error::new_spanned( 294 - input, 295 - "fragments require explicit nsid or module-level primary type (not yet implemented)", 296 - )); 297 - } 298 - 299 - // Check for XrpcRequest derive with NSID 300 - if let Some(nsid) = extract_xrpc_nsid(&input.attrs)? { 301 - return Ok(nsid); 302 - } 303 - 304 - Err(syn::Error::new_spanned( 305 - input, 306 - "missing required `nsid` attribute (use #[lexicon(nsid = \"...\")] or #[xrpc(nsid = \"...\")])", 307 - )) 308 - } 309 - 310 45 /// Extract NSID from XrpcRequest attributes (cross-derive coordination) 311 46 fn extract_xrpc_nsid(attrs: &[Attribute]) -> syn::Result<Option<String>> { 312 47 for attr in attrs { ··· 481 216 } 482 217 483 218 Ok(defs) 484 - } 485 - 486 - /// Extract T from Option<T>, return (type, is_required) 487 - fn extract_option_inner(ty: &Type) -> (&Type, bool) { 488 - if let Type::Path(type_path) = ty { 489 - if let Some(segment) = type_path.path.segments.last() { 490 - if segment.ident == "Option" { 491 - if let syn::PathArguments::AngleBracketed(args) = &segment.arguments { 492 - if let Some(syn::GenericArgument::Type(inner)) = args.args.first() { 493 - return (inner, false); 494 - } 495 - } 496 - } 497 - } 498 - } 499 - (ty, true) 500 219 } 501 220 502 221 /// Generate LexObjectProperty tokens for a field
+1
crates/jacquard-lexicon/src/schema.rs
··· 57 57 //! - **Validation**: Runtime constraint checking via `validate()` method 58 58 59 59 pub mod builder; 60 + pub mod from_ast; 60 61 pub mod type_mapping; 61 62 62 63 use crate::lexicon::{LexObjectProperty, LexRef, LexUserType, Lexicon, LexiconDoc};
+252
crates/jacquard-lexicon/src/schema/from_ast/builders.rs
··· 1 + //! Top-level builder functions 2 + 3 + use super::parse::{determine_nsid, extract_variant_ref, has_open_union_attr, parse_serde_rename_all, parse_type_attrs}; 4 + use super::properties::build_object_properties; 5 + use super::types::*; 6 + use crate::lexicon::*; 7 + use heck::ToLowerCamelCase; 8 + use jacquard_common::smol_str::SmolStr; 9 + use std::collections::BTreeMap; 10 + use syn::DeriveInput; 11 + 12 + /// Build schema from a struct 13 + pub fn build_struct_schema(input: &DeriveInput) -> syn::Result<BuiltSchema> { 14 + // Parse type-level attributes 15 + let type_attrs = parse_type_attrs(&input.attrs)?; 16 + 17 + // Determine NSID 18 + let nsid = determine_nsid(&type_attrs, input)?; 19 + 20 + // Parse fields based on data type 21 + let data_struct = match &input.data { 22 + syn::Data::Struct(data_struct) => data_struct, 23 + _ => { 24 + return Err(syn::Error::new_spanned( 25 + input, 26 + "build_struct_schema requires a struct", 27 + )); 28 + } 29 + }; 30 + 31 + // Parse serde container attributes 32 + let rename_all = parse_serde_rename_all(&input.attrs)?; 33 + 34 + // Build properties 35 + let field_properties = build_object_properties(&data_struct.fields, rename_all)?; 36 + 37 + // Extract properties map, required list, and unresolved refs 38 + let mut properties = BTreeMap::new(); 39 + let mut required = Vec::new(); 40 + let mut all_validations = Vec::new(); 41 + let mut all_unresolved = Vec::new(); 42 + 43 + for field_prop in field_properties { 44 + properties.insert(field_prop.schema_name.clone().into(), field_prop.property); 45 + if field_prop.required { 46 + required.push(field_prop.schema_name.into()); 47 + } 48 + all_validations.extend(field_prop.validations); 49 + all_unresolved.extend(field_prop.unresolved_refs); 50 + } 51 + 52 + // Build main def based on kind 53 + let user_type = build_user_type(&type_attrs, properties, required)?; 54 + 55 + // Build lexicon doc 56 + let mut defs = BTreeMap::new(); 57 + defs.insert("main".into(), user_type); 58 + 59 + let doc = LexiconDoc { 60 + lexicon: Lexicon::Lexicon1, 61 + id: nsid.clone().into(), 62 + revision: None, 63 + description: None, 64 + defs, 65 + }; 66 + 67 + // Determine schema_id (add fragment if needed) 68 + let schema_id = if let Some(fragment) = &type_attrs.fragment { 69 + let frag_name = if fragment.is_empty() { 70 + input.ident.to_string().to_lower_camel_case() 71 + } else { 72 + fragment.clone() 73 + }; 74 + format!("{}#{}", nsid, frag_name) 75 + } else { 76 + nsid.clone() 77 + }; 78 + 79 + Ok(BuiltSchema { 80 + nsid, 81 + schema_id, 82 + doc, 83 + validation_checks: all_validations, 84 + unresolved_refs: all_unresolved, 85 + }) 86 + } 87 + 88 + /// Build LexUserType based on kind 89 + fn build_user_type( 90 + type_attrs: &LexiconTypeAttrs, 91 + properties: BTreeMap<SmolStr, LexObjectProperty<'static>>, 92 + required: Vec<SmolStr>, 93 + ) -> syn::Result<LexUserType<'static>> { 94 + let required_field = if required.is_empty() { 95 + None 96 + } else { 97 + Some(required) 98 + }; 99 + 100 + let obj = LexObject { 101 + description: None, 102 + required: required_field, 103 + nullable: None, 104 + properties, 105 + }; 106 + 107 + match type_attrs.kind { 108 + Some(LexiconTypeKind::Record) => Ok(LexUserType::Record(LexRecord { 109 + description: None, 110 + key: type_attrs.key.clone().map(Into::into), 111 + record: LexRecordRecord::Object(obj), 112 + })), 113 + Some(LexiconTypeKind::Query) => { 114 + // Convert properties to parameters 115 + let params = LexXrpcParameters { 116 + description: None, 117 + required: obj.required.clone(), 118 + properties: obj 119 + .properties 120 + .into_iter() 121 + .map(|(k, v)| (k, convert_object_prop_to_param_prop(v))) 122 + .collect(), 123 + }; 124 + Ok(LexUserType::XrpcQuery(LexXrpcQuery { 125 + description: None, 126 + parameters: Some(LexXrpcQueryParameter::Params(params)), 127 + output: None, 128 + errors: None, 129 + })) 130 + } 131 + Some(LexiconTypeKind::Procedure) => Ok(LexUserType::XrpcProcedure(LexXrpcProcedure { 132 + description: None, 133 + parameters: None, 134 + input: Some(LexXrpcBody { 135 + description: None, 136 + encoding: "application/json".into(), 137 + schema: Some(LexXrpcBodySchema::Object(obj)), 138 + }), 139 + output: None, 140 + errors: None, 141 + })), 142 + Some(LexiconTypeKind::Subscription) => { 143 + let params = LexXrpcParameters { 144 + description: None, 145 + required: obj.required.clone(), 146 + properties: obj 147 + .properties 148 + .into_iter() 149 + .map(|(k, v)| (k, convert_object_prop_to_param_prop(v))) 150 + .collect(), 151 + }; 152 + Ok(LexUserType::XrpcSubscription(LexXrpcSubscription { 153 + description: None, 154 + parameters: Some(LexXrpcSubscriptionParameter::Params(params)), 155 + message: None, 156 + infos: None, 157 + errors: None, 158 + })) 159 + } 160 + _ => Ok(LexUserType::Object(obj)), 161 + } 162 + } 163 + 164 + /// Convert LexObjectProperty to LexXrpcParametersProperty 165 + fn convert_object_prop_to_param_prop( 166 + prop: LexObjectProperty<'static>, 167 + ) -> LexXrpcParametersProperty<'static> { 168 + match prop { 169 + LexObjectProperty::Boolean(b) => LexXrpcParametersProperty::Boolean(b), 170 + LexObjectProperty::Integer(i) => LexXrpcParametersProperty::Integer(i), 171 + LexObjectProperty::String(s) => LexXrpcParametersProperty::String(s), 172 + LexObjectProperty::Unknown(u) => LexXrpcParametersProperty::Unknown(u), 173 + LexObjectProperty::Array(a) => { 174 + // Convert LexArray to LexPrimitiveArray 175 + let primitive_item = match a.items { 176 + LexArrayItem::Boolean(b) => LexPrimitiveArrayItem::Boolean(b), 177 + LexArrayItem::Integer(i) => LexPrimitiveArrayItem::Integer(i), 178 + LexArrayItem::String(s) => LexPrimitiveArrayItem::String(s), 179 + // Non-primitive items become Unknown 180 + _ => LexPrimitiveArrayItem::Unknown(LexUnknown { description: None }), 181 + }; 182 + LexXrpcParametersProperty::Array(LexPrimitiveArray { 183 + description: a.description, 184 + items: primitive_item, 185 + min_length: a.min_length, 186 + max_length: a.max_length, 187 + }) 188 + } 189 + // Other types not valid in parameters - shouldn't happen 190 + _ => LexXrpcParametersProperty::Unknown(LexUnknown { description: None }), 191 + } 192 + } 193 + 194 + /// Build schema from an enum (union) 195 + pub fn build_enum_schema(input: &DeriveInput) -> syn::Result<BuiltSchema> { 196 + let type_attrs = parse_type_attrs(&input.attrs)?; 197 + let nsid = determine_nsid(&type_attrs, input)?; 198 + 199 + let data_enum = match &input.data { 200 + syn::Data::Enum(data_enum) => data_enum, 201 + _ => { 202 + return Err(syn::Error::new_spanned( 203 + input, 204 + "build_enum_schema requires an enum", 205 + )); 206 + } 207 + }; 208 + 209 + // Check if open union 210 + let is_open = has_open_union_attr(&input.attrs); 211 + 212 + // Extract variant refs 213 + let mut refs = Vec::new(); 214 + for variant in &data_enum.variants { 215 + // Skip Unknown variant (added by #[open_union] macro) 216 + if variant.ident == "Unknown" { 217 + continue; 218 + } 219 + 220 + let variant_ref = extract_variant_ref(variant, &nsid)?; 221 + refs.push(variant_ref.into()); 222 + } 223 + 224 + // Build union 225 + let user_type = LexUserType::Union(LexRefUnion { 226 + description: None, 227 + refs, 228 + closed: if is_open { None } else { Some(true) }, 229 + }); 230 + 231 + let mut defs = BTreeMap::new(); 232 + defs.insert("main".into(), user_type); 233 + 234 + let doc = LexiconDoc { 235 + lexicon: Lexicon::Lexicon1, 236 + id: nsid.clone().into(), 237 + revision: None, 238 + description: None, 239 + defs, 240 + }; 241 + 242 + // Unions don't have fragments in typical usage 243 + let schema_id = nsid.clone(); 244 + 245 + Ok(BuiltSchema { 246 + nsid, 247 + schema_id, 248 + doc, 249 + validation_checks: Vec::new(), // Unions don't have validation 250 + unresolved_refs: Vec::new(), // Union variants use explicit refs 251 + }) 252 + }
+24
crates/jacquard-lexicon/src/schema/from_ast/mod.rs
··· 1 + //! Build lexicon schemas from Rust AST 2 + //! 3 + //! This module contains shared logic for building lexicon schemas from Rust syntax trees. 4 + //! It returns actual LexiconDoc structs (not tokens), enabling both: 5 + //! 1. Derive macro path: doc → tokens → compiled code 6 + //! 2. Workspace discovery path: doc directly → JSON 7 + 8 + mod builders; 9 + mod parse; 10 + mod properties; 11 + mod types; 12 + 13 + // Re-export public API 14 + pub use builders::{build_enum_schema, build_struct_schema}; 15 + pub use types::{ 16 + BuiltSchema, ConstraintCheck, FieldProperty, LexiconFieldAttrs, LexiconTypeAttrs, 17 + LexiconTypeKind, RenameRule, SerdeAttrs, UnresolvedRef, ValidationCheck, 18 + }; 19 + 20 + // Re-export parsing functions for derive macro 21 + pub use parse::{ 22 + determine_nsid, extract_option_inner, extract_variant_ref, has_open_union_attr, 23 + parse_field_attrs, parse_serde_attrs, parse_serde_rename_all, parse_type_attrs, 24 + };
+295
crates/jacquard-lexicon/src/schema/from_ast/parse.rs
··· 1 + //! Attribute parsing functions 2 + 3 + use super::types::*; 4 + use syn::{Attribute, DeriveInput, LitStr}; 5 + 6 + /// Parse type-level lexicon attributes 7 + pub fn parse_type_attrs(attrs: &[Attribute]) -> syn::Result<LexiconTypeAttrs> { 8 + let mut result = LexiconTypeAttrs::default(); 9 + 10 + for attr in attrs { 11 + if !attr.path().is_ident("lexicon") { 12 + continue; 13 + } 14 + 15 + attr.parse_nested_meta(|meta| { 16 + if meta.path.is_ident("nsid") { 17 + let value = meta.value()?; 18 + let lit: LitStr = value.parse()?; 19 + result.nsid = Some(lit.value()); 20 + Ok(()) 21 + } else if meta.path.is_ident("fragment") { 22 + if meta.input.peek(syn::Token![=]) { 23 + let value = meta.value()?; 24 + let lit: LitStr = value.parse()?; 25 + result.fragment = Some(lit.value()); 26 + } else { 27 + result.fragment = Some(String::new()); 28 + } 29 + Ok(()) 30 + } else if meta.path.is_ident("record") { 31 + result.kind = Some(LexiconTypeKind::Record); 32 + Ok(()) 33 + } else if meta.path.is_ident("query") { 34 + result.kind = Some(LexiconTypeKind::Query); 35 + Ok(()) 36 + } else if meta.path.is_ident("procedure") { 37 + result.kind = Some(LexiconTypeKind::Procedure); 38 + Ok(()) 39 + } else if meta.path.is_ident("subscription") { 40 + result.kind = Some(LexiconTypeKind::Subscription); 41 + Ok(()) 42 + } else if meta.path.is_ident("key") { 43 + let value = meta.value()?; 44 + let lit: LitStr = value.parse()?; 45 + result.key = Some(lit.value()); 46 + Ok(()) 47 + } else { 48 + Err(meta.error("unknown lexicon attribute")) 49 + } 50 + })?; 51 + } 52 + 53 + Ok(result) 54 + } 55 + 56 + /// Parse field-level lexicon attributes 57 + pub fn parse_field_attrs(attrs: &[Attribute]) -> syn::Result<LexiconFieldAttrs> { 58 + let mut result = LexiconFieldAttrs::default(); 59 + 60 + for attr in attrs { 61 + if !attr.path().is_ident("lexicon") { 62 + continue; 63 + } 64 + 65 + attr.parse_nested_meta(|meta| { 66 + if meta.path.is_ident("max_length") { 67 + let value = meta.value()?; 68 + let lit: syn::LitInt = value.parse()?; 69 + result.max_length = Some(lit.base10_parse()?); 70 + Ok(()) 71 + } else if meta.path.is_ident("max_graphemes") { 72 + let value = meta.value()?; 73 + let lit: syn::LitInt = value.parse()?; 74 + result.max_graphemes = Some(lit.base10_parse()?); 75 + Ok(()) 76 + } else if meta.path.is_ident("min_length") { 77 + let value = meta.value()?; 78 + let lit: syn::LitInt = value.parse()?; 79 + result.min_length = Some(lit.base10_parse()?); 80 + Ok(()) 81 + } else if meta.path.is_ident("min_graphemes") { 82 + let value = meta.value()?; 83 + let lit: syn::LitInt = value.parse()?; 84 + result.min_graphemes = Some(lit.base10_parse()?); 85 + Ok(()) 86 + } else if meta.path.is_ident("minimum") { 87 + let value = meta.value()?; 88 + let lit: syn::LitInt = value.parse()?; 89 + result.minimum = Some(lit.base10_parse()?); 90 + Ok(()) 91 + } else if meta.path.is_ident("maximum") { 92 + let value = meta.value()?; 93 + let lit: syn::LitInt = value.parse()?; 94 + result.maximum = Some(lit.base10_parse()?); 95 + Ok(()) 96 + } else if meta.path.is_ident("ref") { 97 + let value = meta.value()?; 98 + let lit: LitStr = value.parse()?; 99 + result.explicit_ref = Some(lit.value()); 100 + Ok(()) 101 + } else if meta.path.is_ident("format") { 102 + let value = meta.value()?; 103 + let lit: LitStr = value.parse()?; 104 + result.format = Some(lit.value()); 105 + Ok(()) 106 + } else { 107 + Err(meta.error("unknown lexicon field attribute")) 108 + } 109 + })?; 110 + } 111 + 112 + Ok(result) 113 + } 114 + 115 + /// Parse serde attributes for a field 116 + pub fn parse_serde_attrs(attrs: &[Attribute]) -> syn::Result<SerdeAttrs> { 117 + let mut result = SerdeAttrs::default(); 118 + 119 + for attr in attrs { 120 + if !attr.path().is_ident("serde") { 121 + continue; 122 + } 123 + 124 + attr.parse_nested_meta(|meta| { 125 + if meta.path.is_ident("rename") { 126 + let value = meta.value()?; 127 + let lit: LitStr = value.parse()?; 128 + result.rename = Some(lit.value()); 129 + Ok(()) 130 + } else if meta.path.is_ident("skip") { 131 + result.skip = true; 132 + Ok(()) 133 + } else { 134 + // Ignore other serde attributes 135 + Ok(()) 136 + } 137 + })?; 138 + } 139 + 140 + Ok(result) 141 + } 142 + 143 + /// Parse container-level serde rename_all 144 + pub fn parse_serde_rename_all(attrs: &[Attribute]) -> syn::Result<Option<RenameRule>> { 145 + for attr in attrs { 146 + if !attr.path().is_ident("serde") { 147 + continue; 148 + } 149 + 150 + let mut found_rule = None; 151 + attr.parse_nested_meta(|meta| { 152 + if meta.path.is_ident("rename_all") { 153 + let value = meta.value()?; 154 + let lit: LitStr = value.parse()?; 155 + found_rule = RenameRule::from_str(&lit.value()); 156 + Ok(()) 157 + } else { 158 + Ok(()) 159 + } 160 + })?; 161 + 162 + if found_rule.is_some() { 163 + return Ok(found_rule); 164 + } 165 + } 166 + 167 + // Default to camelCase (lexicon standard) 168 + Ok(Some(RenameRule::CamelCase)) 169 + } 170 + 171 + /// Determine NSID from attributes and context 172 + pub fn determine_nsid(attrs: &LexiconTypeAttrs, input: &DeriveInput) -> syn::Result<String> { 173 + if let Some(nsid) = &attrs.nsid { 174 + return Ok(nsid.clone()); 175 + } 176 + 177 + if attrs.fragment.is_some() { 178 + return Err(syn::Error::new_spanned( 179 + input, 180 + "fragments require explicit nsid or module-level primary type (not yet implemented)", 181 + )); 182 + } 183 + 184 + // Check for XrpcRequest derive with NSID 185 + if let Some(nsid) = extract_xrpc_nsid(&input.attrs)? { 186 + return Ok(nsid); 187 + } 188 + 189 + Err(syn::Error::new_spanned( 190 + input, 191 + "missing required `nsid` attribute (use #[lexicon(nsid = \"...\")] or #[xrpc(nsid = \"...\")])", 192 + )) 193 + } 194 + 195 + /// Extract NSID from XrpcRequest attributes (cross-derive coordination) 196 + fn extract_xrpc_nsid(attrs: &[Attribute]) -> syn::Result<Option<String>> { 197 + for attr in attrs { 198 + if !attr.path().is_ident("xrpc") { 199 + continue; 200 + } 201 + 202 + let mut nsid = None; 203 + attr.parse_nested_meta(|meta| { 204 + if meta.path.is_ident("nsid") { 205 + let value = meta.value()?; 206 + let lit: LitStr = value.parse()?; 207 + nsid = Some(lit.value()); 208 + } 209 + Ok(()) 210 + })?; 211 + 212 + if let Some(nsid) = nsid { 213 + return Ok(Some(nsid)); 214 + } 215 + } 216 + Ok(None) 217 + } 218 + 219 + /// Extract T from Option<T>, return (type, is_required) 220 + pub fn extract_option_inner(ty: &syn::Type) -> (&syn::Type, bool) { 221 + if let syn::Type::Path(type_path) = ty { 222 + if let Some(segment) = type_path.path.segments.last() { 223 + if segment.ident == "Option" { 224 + if let syn::PathArguments::AngleBracketed(args) = &segment.arguments { 225 + if let Some(syn::GenericArgument::Type(inner)) = args.args.first() { 226 + return (inner, false); 227 + } 228 + } 229 + } 230 + } 231 + } 232 + (ty, true) 233 + } 234 + 235 + /// Check if type has #[open_union] attribute 236 + pub fn has_open_union_attr(attrs: &[Attribute]) -> bool { 237 + attrs.iter().any(|attr| attr.path().is_ident("open_union")) 238 + } 239 + 240 + /// Extract NSID ref for a variant 241 + pub fn extract_variant_ref(variant: &syn::Variant, base_nsid: &str) -> syn::Result<String> { 242 + use heck::ToLowerCamelCase; 243 + 244 + // Priority 1: Check for #[nsid = "..."] attribute 245 + for attr in &variant.attrs { 246 + if attr.path().is_ident("nsid") { 247 + if let syn::Meta::NameValue(meta) = &attr.meta { 248 + if let syn::Expr::Lit(expr_lit) = &meta.value { 249 + if let syn::Lit::Str(lit_str) = &expr_lit.lit { 250 + return Ok(lit_str.value()); 251 + } 252 + } 253 + } 254 + } 255 + } 256 + 257 + // Priority 2: Check for #[serde(rename = "...")] attribute 258 + for attr in &variant.attrs { 259 + if !attr.path().is_ident("serde") { 260 + continue; 261 + } 262 + 263 + let mut rename = None; 264 + let _ = attr.parse_nested_meta(|meta| { 265 + if meta.path.is_ident("rename") { 266 + let value = meta.value()?; 267 + let lit: LitStr = value.parse()?; 268 + rename = Some(lit.value()); 269 + } 270 + Ok(()) 271 + }); 272 + 273 + if let Some(rename) = rename { 274 + return Ok(rename); 275 + } 276 + } 277 + 278 + // Priority 3: Generate fragment ref for unit variants 279 + match &variant.fields { 280 + syn::Fields::Unit => { 281 + let variant_name = variant.ident.to_string().to_lower_camel_case(); 282 + Ok(format!("{}#{}", base_nsid, variant_name)) 283 + } 284 + syn::Fields::Unnamed(fields) if fields.unnamed.len() == 1 => { 285 + Err(syn::Error::new_spanned( 286 + variant, 287 + "union variants with non-primitive types must use #[nsid] or #[serde(rename)] attribute to specify the ref", 288 + )) 289 + } 290 + _ => Err(syn::Error::new_spanned( 291 + variant, 292 + "union variants must be unit variants or have single unnamed field", 293 + )), 294 + } 295 + }
+432
crates/jacquard-lexicon/src/schema/from_ast/properties.rs
··· 1 + //! Property building functions 2 + 3 + use super::parse::{parse_field_attrs, parse_serde_attrs}; 4 + use super::types::*; 5 + use crate::lexicon::*; 6 + use crate::schema::type_mapping::{rust_type_to_lexicon_type, LexiconPrimitiveType}; 7 + use heck::ToLowerCamelCase; 8 + use std::collections::BTreeMap; 9 + use syn::Type; 10 + 11 + /// Build object properties from struct fields 12 + pub fn build_object_properties( 13 + fields: &syn::Fields, 14 + rename_rule: Option<RenameRule>, 15 + ) -> syn::Result<Vec<FieldProperty>> { 16 + let named_fields = match fields { 17 + syn::Fields::Named(fields) => &fields.named, 18 + _ => { 19 + return Err(syn::Error::new_spanned( 20 + fields, 21 + "LexiconSchema only supports structs with named fields", 22 + )); 23 + } 24 + }; 25 + 26 + let mut properties = Vec::new(); 27 + 28 + for field in named_fields { 29 + let field_name = field.ident.as_ref().unwrap().to_string(); 30 + 31 + // Skip extra_data field (added by #[lexicon] attribute macro) 32 + if field_name == "extra_data" { 33 + continue; 34 + } 35 + 36 + // Parse attributes 37 + let serde_attrs = parse_serde_attrs(&field.attrs)?; 38 + let lex_attrs = parse_field_attrs(&field.attrs)?; 39 + 40 + // Skip if serde(skip) 41 + if serde_attrs.skip { 42 + continue; 43 + } 44 + 45 + // Determine schema name 46 + let schema_name = if let Some(rename) = serde_attrs.rename { 47 + rename 48 + } else if let Some(rule) = rename_rule { 49 + rule.apply(&field_name) 50 + } else { 51 + field_name.clone() 52 + }; 53 + 54 + // Determine if required (Option<T> = optional) 55 + let (inner_type, required) = super::parse::extract_option_inner(&field.ty); 56 + 57 + // Build property and validations 58 + let field_prop = 59 + build_field_property(&field_name, &schema_name, inner_type, required, &lex_attrs)?; 60 + 61 + properties.push(field_prop); 62 + } 63 + 64 + Ok(properties) 65 + } 66 + 67 + /// Build a single field property 68 + fn build_field_property( 69 + field_name: &str, 70 + schema_name: &str, 71 + rust_type: &Type, 72 + required: bool, 73 + constraints: &LexiconFieldAttrs, 74 + ) -> syn::Result<FieldProperty> { 75 + // Build the lexicon property 76 + let (property, mut unresolved_refs) = build_lex_property(rust_type, constraints)?; 77 + 78 + // Update field paths in unresolved refs 79 + for uref in &mut unresolved_refs { 80 + uref.field_path = format!("main.properties.{}", schema_name); 81 + } 82 + 83 + // Build validation checks 84 + let validations = build_validations(field_name, schema_name, rust_type, required, constraints)?; 85 + 86 + Ok(FieldProperty { 87 + field_name: field_name.to_string(), 88 + schema_name: schema_name.to_string(), 89 + rust_type: rust_type.clone(), 90 + property, 91 + required, 92 + validations, 93 + unresolved_refs, 94 + }) 95 + } 96 + 97 + /// Build LexObjectProperty from Rust type and constraints 98 + /// Returns (property, unresolved_refs) 99 + fn build_lex_property( 100 + rust_type: &Type, 101 + constraints: &LexiconFieldAttrs, 102 + ) -> syn::Result<(LexObjectProperty<'static>, Vec<UnresolvedRef>)> { 103 + // Try to detect primitive type 104 + let lex_type = rust_type_to_lexicon_type(rust_type); 105 + 106 + match lex_type { 107 + Some(LexiconPrimitiveType::Boolean) => Ok((LexObjectProperty::Boolean(LexBoolean { 108 + description: None, 109 + default: None, 110 + r#const: None, 111 + }), Vec::new())), 112 + Some(LexiconPrimitiveType::Integer) => Ok((LexObjectProperty::Integer(LexInteger { 113 + description: None, 114 + default: None, 115 + minimum: constraints.minimum, 116 + maximum: constraints.maximum, 117 + r#enum: None, 118 + r#const: None, 119 + }), Vec::new())), 120 + Some(LexiconPrimitiveType::String(format)) => Ok((LexObjectProperty::String( 121 + build_string_property(format, constraints), 122 + ), Vec::new())), 123 + Some(LexiconPrimitiveType::Bytes) => Ok((LexObjectProperty::Bytes(LexBytes { 124 + description: None, 125 + max_length: constraints.max_length, 126 + min_length: constraints.min_length, 127 + }), Vec::new())), 128 + Some(LexiconPrimitiveType::CidLink) => { 129 + Ok((LexObjectProperty::CidLink(LexCidLink { description: None }), Vec::new())) 130 + } 131 + Some(LexiconPrimitiveType::Blob) => Ok((LexObjectProperty::Blob(LexBlob { 132 + description: None, 133 + accept: None, 134 + max_size: None, 135 + }), Vec::new())), 136 + Some(LexiconPrimitiveType::Unknown) => { 137 + Ok((LexObjectProperty::Unknown(LexUnknown { description: None }), Vec::new())) 138 + } 139 + Some(LexiconPrimitiveType::Array(item_type)) => { 140 + let (item_prop, unresolved) = build_array_item(*item_type)?; 141 + Ok((LexObjectProperty::Array(LexArray { 142 + description: None, 143 + items: item_prop, 144 + min_length: constraints.min_length, 145 + max_length: constraints.max_length, 146 + }), unresolved)) 147 + } 148 + Some(LexiconPrimitiveType::Object) => { 149 + // Nested object - shouldn't typically happen, use Unknown 150 + Ok((LexObjectProperty::Unknown(LexUnknown { description: None }), Vec::new())) 151 + } 152 + Some(LexiconPrimitiveType::Ref(ref_nsid)) => Ok((LexObjectProperty::Ref(LexRef { 153 + description: None, 154 + r#ref: ref_nsid.into(), 155 + }), Vec::new())), 156 + Some(LexiconPrimitiveType::Union(_refs)) => { 157 + // Union types detected - would need to be generated differently 158 + // For now, use Unknown 159 + Ok((LexObjectProperty::Unknown(LexUnknown { description: None }), Vec::new())) 160 + } 161 + None => { 162 + // Not a primitive - check for explicit ref 163 + if let Some(ref_nsid) = &constraints.explicit_ref { 164 + Ok((LexObjectProperty::Ref(LexRef { 165 + description: None, 166 + r#ref: ref_nsid.clone().into(), 167 + }), Vec::new())) 168 + } else { 169 + // Type doesn't have explicit ref - create placeholder and track as unresolved 170 + let type_str = quote::quote!(#rust_type).to_string(); 171 + let placeholder = format!("#unresolved:{}", extract_type_name(&type_str)); 172 + 173 + let unresolved = UnresolvedRef { 174 + rust_type: type_str, 175 + field_path: String::new(), // Will be filled in by caller 176 + placeholder_ref: placeholder.clone(), 177 + }; 178 + 179 + Ok((LexObjectProperty::Ref(LexRef { 180 + description: None, 181 + r#ref: placeholder.into(), 182 + }), vec![unresolved])) 183 + } 184 + } 185 + } 186 + } 187 + 188 + /// Extract simple type name from type path (e.g., "FeedViewPost" from "app::bsky::FeedViewPost") 189 + fn extract_type_name(type_str: &str) -> String { 190 + type_str 191 + .split("::") 192 + .last() 193 + .unwrap_or(type_str) 194 + .trim_matches(|c| c == '<' || c == '>' || c == ' ') 195 + .to_string() 196 + .to_lower_camel_case() 197 + } 198 + 199 + /// Build array item property 200 + /// Returns (item, unresolved_refs) 201 + fn build_array_item(item_type: LexiconPrimitiveType) -> syn::Result<(LexArrayItem<'static>, Vec<UnresolvedRef>)> { 202 + match item_type { 203 + LexiconPrimitiveType::String(format) => { 204 + let format_enum = match format { 205 + crate::schema::type_mapping::StringFormat::Plain => None, 206 + crate::schema::type_mapping::StringFormat::Did => Some(LexStringFormat::Did), 207 + crate::schema::type_mapping::StringFormat::Handle => Some(LexStringFormat::Handle), 208 + crate::schema::type_mapping::StringFormat::AtUri => Some(LexStringFormat::AtUri), 209 + crate::schema::type_mapping::StringFormat::Nsid => Some(LexStringFormat::Nsid), 210 + crate::schema::type_mapping::StringFormat::Cid => Some(LexStringFormat::Cid), 211 + crate::schema::type_mapping::StringFormat::Datetime => { 212 + Some(LexStringFormat::Datetime) 213 + } 214 + crate::schema::type_mapping::StringFormat::Language => { 215 + Some(LexStringFormat::Language) 216 + } 217 + crate::schema::type_mapping::StringFormat::Tid => Some(LexStringFormat::Tid), 218 + crate::schema::type_mapping::StringFormat::RecordKey => { 219 + Some(LexStringFormat::RecordKey) 220 + } 221 + crate::schema::type_mapping::StringFormat::AtIdentifier => { 222 + Some(LexStringFormat::AtIdentifier) 223 + } 224 + crate::schema::type_mapping::StringFormat::Uri => Some(LexStringFormat::Uri), 225 + }; 226 + Ok((LexArrayItem::String(LexString { 227 + description: None, 228 + format: format_enum, 229 + default: None, 230 + min_length: None, 231 + max_length: None, 232 + min_graphemes: None, 233 + max_graphemes: None, 234 + r#enum: None, 235 + r#const: None, 236 + known_values: None, 237 + }), Vec::new())) 238 + } 239 + LexiconPrimitiveType::Integer => Ok((LexArrayItem::Integer(LexInteger { 240 + description: None, 241 + default: None, 242 + minimum: None, 243 + maximum: None, 244 + r#enum: None, 245 + r#const: None, 246 + }), Vec::new())), 247 + LexiconPrimitiveType::Boolean => Ok((LexArrayItem::Boolean(LexBoolean { 248 + description: None, 249 + default: None, 250 + r#const: None, 251 + }), Vec::new())), 252 + LexiconPrimitiveType::Bytes => Ok((LexArrayItem::Bytes(LexBytes { 253 + description: None, 254 + max_length: None, 255 + min_length: None, 256 + }), Vec::new())), 257 + LexiconPrimitiveType::CidLink => { 258 + Ok((LexArrayItem::CidLink(LexCidLink { description: None }), Vec::new())) 259 + } 260 + LexiconPrimitiveType::Blob => Ok((LexArrayItem::Blob(LexBlob { 261 + description: None, 262 + accept: None, 263 + max_size: None, 264 + }), Vec::new())), 265 + LexiconPrimitiveType::Unknown => { 266 + Ok((LexArrayItem::Unknown(LexUnknown { description: None }), Vec::new())) 267 + } 268 + LexiconPrimitiveType::Ref(ref_nsid) => Ok((LexArrayItem::Ref(LexRef { 269 + description: None, 270 + r#ref: ref_nsid.into(), 271 + }), Vec::new())), 272 + LexiconPrimitiveType::Object => { 273 + // Object in array - return empty object 274 + Ok((LexArrayItem::Object(LexObject { 275 + description: None, 276 + required: None, 277 + nullable: None, 278 + properties: BTreeMap::new(), 279 + }), Vec::new())) 280 + } 281 + LexiconPrimitiveType::Union(refs) => { 282 + // Union in array - create union with refs 283 + Ok((LexArrayItem::Union(LexRefUnion { 284 + description: None, 285 + refs: refs.into_iter().map(Into::into).collect(), 286 + closed: None, 287 + }), Vec::new())) 288 + } 289 + LexiconPrimitiveType::Array(_) => { 290 + // Nested arrays not supported in lexicon - return Unknown 291 + Ok((LexArrayItem::Unknown(LexUnknown { 292 + description: None, 293 + }), Vec::new())) 294 + } 295 + } 296 + } 297 + 298 + /// Build string property with format 299 + fn build_string_property( 300 + format: crate::schema::type_mapping::StringFormat, 301 + constraints: &LexiconFieldAttrs, 302 + ) -> LexString<'static> { 303 + use crate::schema::type_mapping::StringFormat; 304 + 305 + let format_enum = match format { 306 + StringFormat::Plain => None, 307 + StringFormat::Did => Some(LexStringFormat::Did), 308 + StringFormat::Handle => Some(LexStringFormat::Handle), 309 + StringFormat::AtUri => Some(LexStringFormat::AtUri), 310 + StringFormat::Nsid => Some(LexStringFormat::Nsid), 311 + StringFormat::Cid => Some(LexStringFormat::Cid), 312 + StringFormat::Datetime => Some(LexStringFormat::Datetime), 313 + StringFormat::Language => Some(LexStringFormat::Language), 314 + StringFormat::Tid => Some(LexStringFormat::Tid), 315 + StringFormat::RecordKey => Some(LexStringFormat::RecordKey), 316 + StringFormat::AtIdentifier => Some(LexStringFormat::AtIdentifier), 317 + StringFormat::Uri => Some(LexStringFormat::Uri), 318 + }; 319 + 320 + LexString { 321 + description: None, 322 + format: format_enum, 323 + default: None, 324 + min_length: constraints.min_length, 325 + max_length: constraints.max_length, 326 + min_graphemes: constraints.min_graphemes, 327 + max_graphemes: constraints.max_graphemes, 328 + r#enum: None, 329 + r#const: None, 330 + known_values: None, 331 + } 332 + } 333 + 334 + /// Build validation checks for a field 335 + fn build_validations( 336 + field_name: &str, 337 + schema_name: &str, 338 + field_type: &Type, 339 + is_required: bool, 340 + constraints: &LexiconFieldAttrs, 341 + ) -> syn::Result<Vec<ValidationCheck>> { 342 + let mut checks = Vec::new(); 343 + let lex_type = rust_type_to_lexicon_type(field_type); 344 + 345 + let field_type_str = quote::quote!(#field_type).to_string(); 346 + 347 + match lex_type { 348 + Some(LexiconPrimitiveType::String(_)) => { 349 + if let Some(max) = constraints.max_length { 350 + checks.push(ValidationCheck { 351 + field_name: field_name.to_string(), 352 + schema_name: schema_name.to_string(), 353 + field_type: field_type_str.clone(), 354 + is_required, 355 + check: ConstraintCheck::MaxLength { max }, 356 + }); 357 + } 358 + if let Some(max) = constraints.max_graphemes { 359 + checks.push(ValidationCheck { 360 + field_name: field_name.to_string(), 361 + schema_name: schema_name.to_string(), 362 + field_type: field_type_str.clone(), 363 + is_required, 364 + check: ConstraintCheck::MaxGraphemes { max }, 365 + }); 366 + } 367 + if let Some(min) = constraints.min_length { 368 + checks.push(ValidationCheck { 369 + field_name: field_name.to_string(), 370 + schema_name: schema_name.to_string(), 371 + field_type: field_type_str.clone(), 372 + is_required, 373 + check: ConstraintCheck::MinLength { min }, 374 + }); 375 + } 376 + if let Some(min) = constraints.min_graphemes { 377 + checks.push(ValidationCheck { 378 + field_name: field_name.to_string(), 379 + schema_name: schema_name.to_string(), 380 + field_type: field_type_str, 381 + is_required, 382 + check: ConstraintCheck::MinGraphemes { min }, 383 + }); 384 + } 385 + } 386 + Some(LexiconPrimitiveType::Integer) => { 387 + if let Some(max) = constraints.maximum { 388 + checks.push(ValidationCheck { 389 + field_name: field_name.to_string(), 390 + schema_name: schema_name.to_string(), 391 + field_type: field_type_str.clone(), 392 + is_required, 393 + check: ConstraintCheck::Maximum { max }, 394 + }); 395 + } 396 + if let Some(min) = constraints.minimum { 397 + checks.push(ValidationCheck { 398 + field_name: field_name.to_string(), 399 + schema_name: schema_name.to_string(), 400 + field_type: field_type_str, 401 + is_required, 402 + check: ConstraintCheck::Minimum { min }, 403 + }); 404 + } 405 + } 406 + Some(LexiconPrimitiveType::Array(_)) => { 407 + if let Some(max) = constraints.max_length { 408 + checks.push(ValidationCheck { 409 + field_name: field_name.to_string(), 410 + schema_name: schema_name.to_string(), 411 + field_type: field_type_str.clone(), 412 + is_required, 413 + check: ConstraintCheck::MaxLength { max }, 414 + }); 415 + } 416 + if let Some(min) = constraints.min_length { 417 + checks.push(ValidationCheck { 418 + field_name: field_name.to_string(), 419 + schema_name: schema_name.to_string(), 420 + field_type: field_type_str, 421 + is_required, 422 + check: ConstraintCheck::MinLength { min }, 423 + }); 424 + } 425 + } 426 + _ => { 427 + // No validation for other types 428 + } 429 + } 430 + 431 + Ok(checks) 432 + }
+150
crates/jacquard-lexicon/src/schema/from_ast/types.rs
··· 1 + //! Type definitions for schema building 2 + 3 + use crate::lexicon::*; 4 + use syn::Type; 5 + 6 + /// Result of building a schema from AST 7 + #[derive(Debug, Clone)] 8 + pub struct BuiltSchema { 9 + /// Base NSID (without fragment) 10 + pub nsid: String, 11 + /// Full schema ID (NSID + fragment if applicable) 12 + pub schema_id: String, 13 + /// The lexicon document 14 + pub doc: LexiconDoc<'static>, 15 + /// Runtime validation checks 16 + pub validation_checks: Vec<ValidationCheck>, 17 + /// Unresolved type refs (for two-pass resolution in workspace discovery) 18 + pub unresolved_refs: Vec<UnresolvedRef>, 19 + } 20 + 21 + /// A reference to a type that couldn't be resolved at build time 22 + #[derive(Debug, Clone)] 23 + pub struct UnresolvedRef { 24 + /// The Rust type that needs resolution 25 + pub rust_type: String, 26 + /// Field path where this ref appears (e.g., "main.properties.author") 27 + pub field_path: String, 28 + /// Placeholder ref currently in the schema (will be replaced) 29 + pub placeholder_ref: String, 30 + } 31 + 32 + /// A runtime validation requirement 33 + #[derive(Debug, Clone)] 34 + pub struct ValidationCheck { 35 + /// Field name (Rust identifier) 36 + pub field_name: String, 37 + /// Schema field name (JSON name after serde rename) 38 + pub schema_name: String, 39 + /// Rust type path (for diagnostic purposes) 40 + pub field_type: String, 41 + /// Is this field required (not Option<T>)? 42 + pub is_required: bool, 43 + /// The specific constraint to check 44 + pub check: ConstraintCheck, 45 + } 46 + 47 + /// Specific constraint checks 48 + #[derive(Debug, Clone)] 49 + pub enum ConstraintCheck { 50 + MaxLength { max: usize }, 51 + MaxGraphemes { max: usize }, 52 + MinLength { min: usize }, 53 + MinGraphemes { min: usize }, 54 + Maximum { max: i64 }, 55 + Minimum { min: i64 }, 56 + } 57 + 58 + /// Parsed lexicon attributes from type 59 + #[derive(Debug, Default)] 60 + pub struct LexiconTypeAttrs { 61 + /// NSID for this type (required for primary types) 62 + pub nsid: Option<String>, 63 + /// Fragment name (None = not a fragment, Some("") = infer from type name) 64 + pub fragment: Option<String>, 65 + /// Type kind 66 + pub kind: Option<LexiconTypeKind>, 67 + /// Record key type (for records) 68 + pub key: Option<String>, 69 + } 70 + 71 + #[derive(Debug, Clone, Copy)] 72 + pub enum LexiconTypeKind { 73 + Record, 74 + Query, 75 + Procedure, 76 + Subscription, 77 + Object, 78 + Union, 79 + } 80 + 81 + /// Parsed lexicon attributes from field 82 + #[derive(Debug, Default, Clone)] 83 + pub struct LexiconFieldAttrs { 84 + pub max_length: Option<usize>, 85 + pub max_graphemes: Option<usize>, 86 + pub min_length: Option<usize>, 87 + pub min_graphemes: Option<usize>, 88 + pub minimum: Option<i64>, 89 + pub maximum: Option<i64>, 90 + pub explicit_ref: Option<String>, 91 + pub format: Option<String>, 92 + } 93 + 94 + /// Parsed serde attributes relevant to lexicon schema 95 + #[derive(Debug, Default)] 96 + pub struct SerdeAttrs { 97 + pub rename: Option<String>, 98 + pub skip: bool, 99 + } 100 + 101 + #[derive(Debug, Clone, Copy)] 102 + pub enum RenameRule { 103 + CamelCase, 104 + SnakeCase, 105 + PascalCase, 106 + ScreamingSnakeCase, 107 + KebabCase, 108 + } 109 + 110 + impl RenameRule { 111 + pub fn from_str(s: &str) -> Option<Self> { 112 + match s { 113 + "camelCase" => Some(RenameRule::CamelCase), 114 + "snake_case" => Some(RenameRule::SnakeCase), 115 + "PascalCase" => Some(RenameRule::PascalCase), 116 + "SCREAMING_SNAKE_CASE" => Some(RenameRule::ScreamingSnakeCase), 117 + "kebab-case" => Some(RenameRule::KebabCase), 118 + _ => None, 119 + } 120 + } 121 + 122 + pub fn apply(&self, input: &str) -> String { 123 + use heck::*; 124 + match self { 125 + RenameRule::CamelCase => input.to_lower_camel_case(), 126 + RenameRule::SnakeCase => input.to_snake_case(), 127 + RenameRule::PascalCase => input.to_pascal_case(), 128 + RenameRule::ScreamingSnakeCase => input.to_shouty_snake_case(), 129 + RenameRule::KebabCase => input.to_kebab_case(), 130 + } 131 + } 132 + } 133 + 134 + /// Field property (intermediate representation) 135 + pub struct FieldProperty { 136 + /// Rust field name 137 + pub field_name: String, 138 + /// JSON field name (after serde rename) 139 + pub schema_name: String, 140 + /// Rust type 141 + pub rust_type: Type, 142 + /// Lexicon property 143 + pub property: LexObjectProperty<'static>, 144 + /// Is required? 145 + pub required: bool, 146 + /// Validation checks 147 + pub validations: Vec<ValidationCheck>, 148 + /// Unresolved refs from this field 149 + pub unresolved_refs: Vec<UnresolvedRef>, 150 + }