A better Rust ATProto crate

lexicon extraction binary

Orual 988f0eed 4c31392d

Changed files
+514 -78
crates
jacquard-derive
jacquard-lexgen
jacquard-lexicon
src
derive_impl
+35 -70
Cargo.lock
··· 577 577 578 578 [[package]] 579 579 name = "cc" 580 - version = "1.2.41" 580 + version = "1.2.43" 581 581 source = "registry+https://github.com/rust-lang/crates.io-index" 582 - checksum = "ac9fe6cdbb24b6ade63616c0a0688e45bb56732262c158df3c0c4bea4ca47cb7" 582 + checksum = "739eb0f94557554b3ca9a86d2d37bebd49c5e6d0c1d2bda35ba5bdac830befc2" 583 583 dependencies = [ 584 584 "find-msvc-tools", 585 585 "jobserver", ··· 1080 1080 1081 1081 [[package]] 1082 1082 name = "deranged" 1083 - version = "0.5.4" 1083 + version = "0.5.5" 1084 1084 source = "registry+https://github.com/rust-lang/crates.io-index" 1085 - checksum = "a41953f86f8a05768a6cda24def994fd2f424b04ec5c719cf89989779f199071" 1085 + checksum = "ececcb659e7ba858fb4f10388c250a7252eb0a27373f1a72b8748afdd248e587" 1086 1086 dependencies = [ 1087 1087 "powerfmt", 1088 1088 "serde_core", ··· 1295 1295 checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" 1296 1296 dependencies = [ 1297 1297 "libc", 1298 - "windows-sys 0.61.2", 1298 + "windows-sys 0.59.0", 1299 1299 ] 1300 1300 1301 1301 [[package]] ··· 1412 1412 1413 1413 [[package]] 1414 1414 name = "flate2" 1415 - version = "1.1.4" 1415 + version = "1.1.5" 1416 1416 source = "registry+https://github.com/rust-lang/crates.io-index" 1417 - checksum = "dc5a4e564e38c699f2880d3fda590bedc2e69f3f84cd48b457bd892ce61d0aa9" 1417 + checksum = "bfe33edd8e85a12a67454e37f8c75e730830d83e313556ab9ebf9ee7fbeb3bfb" 1418 1418 dependencies = [ 1419 1419 "crc32fast", 1420 1420 "miniz_oxide 0.8.9", ··· 1913 1913 "libc", 1914 1914 "percent-encoding", 1915 1915 "pin-project-lite", 1916 - "socket2 0.6.1", 1916 + "socket2 0.5.10", 1917 1917 "system-configuration", 1918 1918 "tokio", 1919 1919 "tower-service", ··· 1933 1933 "js-sys", 1934 1934 "log", 1935 1935 "wasm-bindgen", 1936 - "windows-core 0.62.2", 1936 + "windows-core", 1937 1937 ] 1938 1938 1939 1939 [[package]] ··· 2288 2288 [[package]] 2289 2289 name = "jacquard-api" 2290 2290 version = "0.8.0" 2291 - source = "git+https://tangled.org/@nonbinary.computer/jacquard#b8978f162ed3306519ec3155a5fd01fbda2390a3" 2291 + source = "git+https://tangled.org/@nonbinary.computer/jacquard#4c31392df7f719950dabd64ef8e7c9cf84ba4212" 2292 2292 dependencies = [ 2293 2293 "bon", 2294 2294 "bytes", ··· 2378 2378 [[package]] 2379 2379 name = "jacquard-common" 2380 2380 version = "0.8.0" 2381 - source = "git+https://tangled.org/@nonbinary.computer/jacquard#b8978f162ed3306519ec3155a5fd01fbda2390a3" 2381 + source = "git+https://tangled.org/@nonbinary.computer/jacquard#4c31392df7f719950dabd64ef8e7c9cf84ba4212" 2382 2382 dependencies = [ 2383 2383 "base64 0.22.1", 2384 2384 "bon", ··· 2431 2431 [[package]] 2432 2432 name = "jacquard-derive" 2433 2433 version = "0.8.0" 2434 - source = "git+https://tangled.org/@nonbinary.computer/jacquard#b8978f162ed3306519ec3155a5fd01fbda2390a3" 2434 + source = "git+https://tangled.org/@nonbinary.computer/jacquard#4c31392df7f719950dabd64ef8e7c9cf84ba4212" 2435 2435 dependencies = [ 2436 + "heck 0.5.0", 2437 + "jacquard-lexicon 0.8.0 (git+https://tangled.org/@nonbinary.computer/jacquard)", 2436 2438 "proc-macro2", 2437 2439 "quote", 2438 2440 "syn 2.0.108", ··· 2466 2468 [[package]] 2467 2469 name = "jacquard-identity" 2468 2470 version = "0.8.0" 2469 - source = "git+https://tangled.org/@nonbinary.computer/jacquard#b8978f162ed3306519ec3155a5fd01fbda2390a3" 2471 + source = "git+https://tangled.org/@nonbinary.computer/jacquard#4c31392df7f719950dabd64ef8e7c9cf84ba4212" 2470 2472 dependencies = [ 2471 2473 "bon", 2472 2474 "bytes", ··· 2495 2497 "clap_complete", 2496 2498 "clap_mangen", 2497 2499 "glob", 2500 + "inventory", 2498 2501 "jacquard-api 0.8.0 (git+https://tangled.org/@nonbinary.computer/jacquard)", 2499 2502 "jacquard-common 0.8.0 (git+https://tangled.org/@nonbinary.computer/jacquard)", 2503 + "jacquard-derive 0.8.0 (git+https://tangled.org/@nonbinary.computer/jacquard)", 2500 2504 "jacquard-identity 0.8.0 (git+https://tangled.org/@nonbinary.computer/jacquard)", 2501 2505 "jacquard-lexicon 0.8.0 (git+https://tangled.org/@nonbinary.computer/jacquard)", 2502 2506 "kdl", ··· 2536 2540 [[package]] 2537 2541 name = "jacquard-lexicon" 2538 2542 version = "0.8.0" 2539 - source = "git+https://tangled.org/@nonbinary.computer/jacquard#b8978f162ed3306519ec3155a5fd01fbda2390a3" 2543 + source = "git+https://tangled.org/@nonbinary.computer/jacquard#4c31392df7f719950dabd64ef8e7c9cf84ba4212" 2540 2544 dependencies = [ 2541 - "async-trait", 2542 - "clap", 2543 - "clap_complete", 2544 - "clap_mangen", 2545 2545 "glob", 2546 2546 "heck 0.5.0", 2547 - "jacquard-api 0.8.0 (git+https://tangled.org/@nonbinary.computer/jacquard)", 2547 + "inventory", 2548 2548 "jacquard-common 0.8.0 (git+https://tangled.org/@nonbinary.computer/jacquard)", 2549 - "jacquard-identity 0.8.0 (git+https://tangled.org/@nonbinary.computer/jacquard)", 2550 - "kdl", 2551 2549 "miette", 2552 2550 "prettyplease", 2553 2551 "proc-macro2", 2554 2552 "quote", 2555 - "reqwest", 2556 2553 "serde", 2557 2554 "serde_json", 2558 2555 "serde_repr", 2559 2556 "serde_with", 2560 2557 "syn 2.0.108", 2561 - "tempfile", 2562 2558 "thiserror 2.0.17", 2563 - "tokio", 2559 + "unicode-segmentation", 2564 2560 "walkdir", 2565 2561 ] 2566 2562 ··· 3156 3152 source = "registry+https://github.com/rust-lang/crates.io-index" 3157 3153 checksum = "7957b9740744892f114936ab4a57b3f487491bbeafaf8083688b16841a4240e5" 3158 3154 dependencies = [ 3159 - "windows-sys 0.61.2", 3155 + "windows-sys 0.59.0", 3160 3156 ] 3161 3157 3162 3158 [[package]] ··· 3701 3697 "quinn-udp", 3702 3698 "rustc-hash", 3703 3699 "rustls", 3704 - "socket2 0.6.1", 3700 + "socket2 0.5.10", 3705 3701 "thiserror 2.0.17", 3706 3702 "tokio", 3707 3703 "tracing", ··· 3738 3734 "cfg_aliases", 3739 3735 "libc", 3740 3736 "once_cell", 3741 - "socket2 0.6.1", 3737 + "socket2 0.5.10", 3742 3738 "tracing", 3743 - "windows-sys 0.60.2", 3739 + "windows-sys 0.59.0", 3744 3740 ] 3745 3741 3746 3742 [[package]] ··· 4159 4155 "errno", 4160 4156 "libc", 4161 4157 "linux-raw-sys 0.11.0", 4162 - "windows-sys 0.61.2", 4158 + "windows-sys 0.59.0", 4163 4159 ] 4164 4160 4165 4161 [[package]] ··· 4822 4818 "getrandom 0.3.4", 4823 4819 "once_cell", 4824 4820 "rustix 1.1.2", 4825 - "windows-sys 0.61.2", 4821 + "windows-sys 0.59.0", 4826 4822 ] 4827 4823 4828 4824 [[package]] ··· 5693 5689 source = "registry+https://github.com/rust-lang/crates.io-index" 5694 5690 checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" 5695 5691 dependencies = [ 5696 - "windows-sys 0.61.2", 5692 + "windows-sys 0.48.0", 5697 5693 ] 5698 5694 5699 5695 [[package]] ··· 5709 5705 checksum = "9babd3a767a4c1aef6900409f85f5d53ce2544ccdfaa86dad48c91782c6d6893" 5710 5706 dependencies = [ 5711 5707 "windows-collections", 5712 - "windows-core 0.61.2", 5708 + "windows-core", 5713 5709 "windows-future", 5714 5710 "windows-link 0.1.3", 5715 5711 "windows-numerics", ··· 5721 5717 source = "registry+https://github.com/rust-lang/crates.io-index" 5722 5718 checksum = "3beeceb5e5cfd9eb1d76b381630e82c4241ccd0d27f1a39ed41b2760b255c5e8" 5723 5719 dependencies = [ 5724 - "windows-core 0.61.2", 5720 + "windows-core", 5725 5721 ] 5726 5722 5727 5723 [[package]] ··· 5733 5729 "windows-implement", 5734 5730 "windows-interface", 5735 5731 "windows-link 0.1.3", 5736 - "windows-result 0.3.4", 5737 - "windows-strings 0.4.2", 5738 - ] 5739 - 5740 - [[package]] 5741 - name = "windows-core" 5742 - version = "0.62.2" 5743 - source = "registry+https://github.com/rust-lang/crates.io-index" 5744 - checksum = "b8e83a14d34d0623b51dce9581199302a221863196a1dde71a7663a4c2be9deb" 5745 - dependencies = [ 5746 - "windows-implement", 5747 - "windows-interface", 5748 - "windows-link 0.2.1", 5749 - "windows-result 0.4.1", 5750 - "windows-strings 0.5.1", 5732 + "windows-result", 5733 + "windows-strings", 5751 5734 ] 5752 5735 5753 5736 [[package]] ··· 5756 5739 source = "registry+https://github.com/rust-lang/crates.io-index" 5757 5740 checksum = "fc6a41e98427b19fe4b73c550f060b59fa592d7d686537eebf9385621bfbad8e" 5758 5741 dependencies = [ 5759 - "windows-core 0.61.2", 5742 + "windows-core", 5760 5743 "windows-link 0.1.3", 5761 5744 "windows-threading", 5762 5745 ] ··· 5801 5784 source = "registry+https://github.com/rust-lang/crates.io-index" 5802 5785 checksum = "9150af68066c4c5c07ddc0ce30421554771e528bde427614c61038bc2c92c2b1" 5803 5786 dependencies = [ 5804 - "windows-core 0.61.2", 5787 + "windows-core", 5805 5788 "windows-link 0.1.3", 5806 5789 ] 5807 5790 ··· 5812 5795 checksum = "5b8a9ed28765efc97bbc954883f4e6796c33a06546ebafacbabee9696967499e" 5813 5796 dependencies = [ 5814 5797 "windows-link 0.1.3", 5815 - "windows-result 0.3.4", 5816 - "windows-strings 0.4.2", 5798 + "windows-result", 5799 + "windows-strings", 5817 5800 ] 5818 5801 5819 5802 [[package]] ··· 5826 5809 ] 5827 5810 5828 5811 [[package]] 5829 - name = "windows-result" 5830 - version = "0.4.1" 5831 - source = "registry+https://github.com/rust-lang/crates.io-index" 5832 - checksum = "7781fa89eaf60850ac3d2da7af8e5242a5ea78d1a11c49bf2910bb5a73853eb5" 5833 - dependencies = [ 5834 - "windows-link 0.2.1", 5835 - ] 5836 - 5837 - [[package]] 5838 5812 name = "windows-strings" 5839 5813 version = "0.4.2" 5840 5814 source = "registry+https://github.com/rust-lang/crates.io-index" 5841 5815 checksum = "56e6c93f3a0c3b36176cb1327a4958a0353d5d166c2a35cb268ace15e91d3b57" 5842 5816 dependencies = [ 5843 5817 "windows-link 0.1.3", 5844 - ] 5845 - 5846 - [[package]] 5847 - name = "windows-strings" 5848 - version = "0.5.1" 5849 - source = "registry+https://github.com/rust-lang/crates.io-index" 5850 - checksum = "7837d08f69c77cf6b07689544538e017c1bfcf57e34b4c0ff58e6c2cd3b37091" 5851 - dependencies = [ 5852 - "windows-link 0.2.1", 5853 5818 ] 5854 5819 5855 5820 [[package]]
+10
crates/jacquard-derive/tests/lexicon_schema_derive.rs
··· 9 9 #[derive(LexiconSchema)] 10 10 #[lexicon(nsid = "com.example.simple", record, key = "tid")] 11 11 struct SimpleRecord<'a> { 12 + #[allow(dead_code)] 12 13 pub text: CowStr<'a>, 14 + #[allow(dead_code)] 13 15 pub created_at: Datetime, 14 16 } 15 17 ··· 124 126 #[derive(LexiconSchema)] 125 127 #[lexicon(nsid = "com.example.camel", record)] 126 128 struct CamelCaseRecord { 129 + #[allow(dead_code)] 127 130 pub field_one: i64, 131 + #[allow(dead_code)] 128 132 pub field_two: i64, 129 133 } 130 134 ··· 145 149 #[lexicon(nsid = "com.example.union")] 146 150 enum BasicUnion { 147 151 #[nsid = "com.example.variant.one"] 152 + #[allow(dead_code)] 148 153 VariantOne, 149 154 150 155 #[nsid = "com.example.variant.two"] 156 + #[allow(dead_code)] 151 157 VariantTwo, 152 158 } 153 159 ··· 173 179 #[open_union] 174 180 enum OpenUnion<'a> { 175 181 #[nsid = "com.example.variant"] 182 + #[allow(dead_code)] 176 183 Variant, 177 184 185 + #[allow(dead_code)] 178 186 Unknown(jacquard_common::types::value::Data<'a>), 179 187 } 180 188 ··· 217 225 #[lexicon(nsid = "com.example.fragments")] 218 226 enum FragmentUnion { 219 227 // Should generate com.example.fragments#variantOne 228 + #[allow(dead_code)] 220 229 VariantOne, 221 230 // Should generate com.example.fragments#variantTwo 231 + #[allow(dead_code)] 222 232 VariantTwo, 223 233 } 224 234
+6
crates/jacquard-lexgen/Cargo.toml
··· 19 19 name = "lex-fetch" 20 20 path = "src/bin/lex_fetch.rs" 21 21 22 + [[bin]] 23 + name = "extract-schemas" 24 + path = "src/bin/extract_schemas.rs" 25 + 22 26 [dependencies] 23 27 async-trait = "0.1" 24 28 clap.workspace = true 25 29 glob = "0.3" 30 + inventory = "0.3" 26 31 jacquard-api = { version = "0.8", git = "https://tangled.org/@nonbinary.computer/jacquard" } 27 32 jacquard-common = { version = "0.8", features = [ "reqwest-client" ], git = "https://tangled.org/@nonbinary.computer/jacquard" } 33 + jacquard-derive = { version = "0.8", git = "https://tangled.org/@nonbinary.computer/jacquard" } 28 34 jacquard-identity = { version = "0.8", git = "https://tangled.org/@nonbinary.computer/jacquard" } 29 35 jacquard-lexicon = { version = "0.8", git = "https://tangled.org/@nonbinary.computer/jacquard" } 30 36 kdl = "6"
+55
crates/jacquard-lexgen/src/bin/extract_schemas.rs
··· 1 + use clap::Parser; 2 + use jacquard_lexgen::schema_extraction::{ExtractOptions, SchemaExtractor}; 3 + use miette::Result; 4 + 5 + /// Extract lexicon schemas from compiled Rust types 6 + #[derive(Parser, Debug)] 7 + #[command(name = "extract-schemas")] 8 + #[command(about = "Extract AT Protocol lexicon schemas from Rust types")] 9 + struct Args { 10 + /// Output directory for generated schema files 11 + #[arg(short, long, default_value = "lexicons")] 12 + output: String, 13 + 14 + /// Verbose output 15 + #[arg(short, long)] 16 + verbose: bool, 17 + 18 + /// Filter by NSID prefix (e.g., "app.bsky") 19 + #[arg(short, long)] 20 + filter: Option<String>, 21 + 22 + /// Validate schemas before writing 23 + #[arg(short = 'V', long, default_value = "true")] 24 + validate: bool, 25 + 26 + /// Pretty-print JSON output 27 + #[arg(short, long, default_value = "true")] 28 + pretty: bool, 29 + 30 + /// Watch mode - regenerate on changes 31 + #[arg(short, long)] 32 + watch: bool, 33 + } 34 + 35 + fn main() -> Result<()> { 36 + let args = Args::parse(); 37 + 38 + let options = ExtractOptions { 39 + output_dir: args.output.into(), 40 + verbose: args.verbose, 41 + filter: args.filter, 42 + validate: args.validate, 43 + pretty: args.pretty, 44 + }; 45 + 46 + let extractor = SchemaExtractor::new(options); 47 + 48 + if args.watch { 49 + extractor.watch()?; 50 + } else { 51 + extractor.extract_all()?; 52 + } 53 + 54 + Ok(()) 55 + }
+4
crates/jacquard-lexgen/src/lib.rs
··· 29 29 //! 30 30 //! - [`fetch`] - Ingests lexicons from git, atproto, http fetch, and other sources 31 31 //! - [`cli`] - CLI argument parsing utilities 32 + //! - [`schema_extraction`] - Extract lexicon schemas from Rust types via inventory 32 33 33 34 pub mod cli; 34 35 pub mod fetch; 36 + pub mod schema_extraction; 37 + #[cfg(any(test, debug_assertions))] 38 + pub mod test_schemas; 35 39 36 40 pub use fetch::{Config, Fetcher};
+288
crates/jacquard-lexgen/src/schema_extraction.rs
··· 1 + use jacquard_lexicon::lexicon::LexiconDoc; 2 + use jacquard_lexicon::schema::LexiconSchemaRef; 3 + use miette::{IntoDiagnostic, Result}; 4 + use std::collections::BTreeMap; 5 + use std::fs; 6 + use std::path::PathBuf; 7 + 8 + pub struct ExtractOptions { 9 + pub output_dir: PathBuf, 10 + pub verbose: bool, 11 + pub filter: Option<String>, 12 + pub validate: bool, 13 + pub pretty: bool, 14 + } 15 + 16 + pub struct SchemaExtractor { 17 + options: ExtractOptions, 18 + } 19 + 20 + impl SchemaExtractor { 21 + pub fn new(options: ExtractOptions) -> Self { 22 + Self { options } 23 + } 24 + 25 + /// Extract all schemas from inventory 26 + pub fn extract_all(&self) -> Result<()> { 27 + if self.options.verbose { 28 + println!("Discovering schemas via inventory..."); 29 + } 30 + 31 + // Collect all schema refs from inventory 32 + let refs: Vec<&LexiconSchemaRef> = inventory::iter::<LexiconSchemaRef>().collect(); 33 + 34 + if self.options.verbose { 35 + println!("Found {} schema types", refs.len()); 36 + } 37 + 38 + // Group by base NSID 39 + let grouped = self.group_by_base_nsid(&refs)?; 40 + 41 + // Create output directory 42 + fs::create_dir_all(&self.options.output_dir).into_diagnostic()?; 43 + 44 + // Process each group 45 + let mut written = 0; 46 + for (base_nsid, group_refs) in grouped { 47 + // Apply filter if specified 48 + if let Some(filter) = &self.options.filter { 49 + if !base_nsid.starts_with(filter) { 50 + continue; 51 + } 52 + } 53 + 54 + if self.options.verbose { 55 + println!("Processing {} ({} types)", base_nsid, group_refs.len()); 56 + } 57 + 58 + self.write_lexicon(&base_nsid, &group_refs)?; 59 + written += 1; 60 + } 61 + 62 + println!( 63 + "✓ Wrote {} lexicon files to {}", 64 + written, 65 + self.options.output_dir.display() 66 + ); 67 + 68 + Ok(()) 69 + } 70 + 71 + /// Group refs by base NSID (strip fragment suffix) 72 + fn group_by_base_nsid<'a>( 73 + &self, 74 + refs: &[&'a LexiconSchemaRef], 75 + ) -> Result<BTreeMap<String, Vec<&'a LexiconSchemaRef>>> { 76 + let mut groups: BTreeMap<String, Vec<&'a LexiconSchemaRef>> = BTreeMap::new(); 77 + 78 + for schema_ref in refs { 79 + let nsid = schema_ref.nsid; 80 + 81 + // Split on # to get base NSID 82 + let base_nsid = if let Some(pos) = nsid.find('#') { 83 + &nsid[..pos] 84 + } else { 85 + nsid 86 + }; 87 + 88 + groups 89 + .entry(base_nsid.to_string()) 90 + .or_default() 91 + .push(schema_ref); 92 + } 93 + 94 + Ok(groups) 95 + } 96 + 97 + /// Write a single lexicon file 98 + fn write_lexicon(&self, base_nsid: &str, refs: &[&LexiconSchemaRef]) -> Result<()> { 99 + // Generate all schemas in this group 100 + let mut all_defs = BTreeMap::new(); 101 + let mut primary_doc: Option<LexiconDoc> = None; 102 + 103 + for schema_ref in refs { 104 + let doc = (schema_ref.provider)(); 105 + 106 + // Determine if this is the primary def or a fragment 107 + if schema_ref.nsid.contains('#') { 108 + // Fragment - extract def name and add to defs 109 + let fragment_name = schema_ref.nsid.split('#').nth(1).unwrap(); 110 + 111 + // Merge defs from fragment doc 112 + for (def_name, def) in doc.defs { 113 + // Use fragment name if def is "main", otherwise use as-is 114 + let final_name = if def_name == "main" { 115 + fragment_name.to_string() 116 + } else { 117 + def_name.to_string() 118 + }; 119 + all_defs.insert(final_name, def); 120 + } 121 + } else { 122 + // Primary type - use as base doc 123 + primary_doc = Some(doc); 124 + } 125 + } 126 + 127 + // Build final doc 128 + let mut final_doc = primary_doc.unwrap_or_else(|| { 129 + // No primary doc - create one 130 + use jacquard_lexicon::lexicon::Lexicon; 131 + LexiconDoc { 132 + lexicon: Lexicon::Lexicon1, 133 + id: base_nsid.into(), 134 + revision: None, 135 + description: None, 136 + defs: BTreeMap::new(), 137 + } 138 + }); 139 + 140 + // Merge in all defs (convert String keys to SmolStr) 141 + for (k, v) in all_defs { 142 + final_doc.defs.insert(k.into(), v); 143 + } 144 + 145 + // Validate if requested 146 + if self.options.validate { 147 + self.validate_schema(&final_doc)?; 148 + } 149 + 150 + // Serialize to JSON 151 + let json = if self.options.pretty { 152 + serde_json::to_string_pretty(&final_doc).into_diagnostic()? 153 + } else { 154 + serde_json::to_string(&final_doc).into_diagnostic()? 155 + }; 156 + 157 + // Write to file 158 + let filename = base_nsid.replace('.', "_") + ".json"; 159 + let path = self.options.output_dir.join(&filename); 160 + 161 + fs::write(&path, json).into_diagnostic()?; 162 + 163 + if self.options.verbose { 164 + println!(" Wrote {} ({} defs)", filename, final_doc.defs.len()); 165 + } 166 + 167 + Ok(()) 168 + } 169 + 170 + /// Validate a schema document 171 + fn validate_schema(&self, doc: &LexiconDoc) -> Result<()> { 172 + // Must have at least one def 173 + if doc.defs.is_empty() { 174 + return Err(miette::miette!("lexicon {} has no defs", doc.id)); 175 + } 176 + 177 + // Warn if no "main" def and doesn't follow .defs convention 178 + if !doc.defs.contains_key("main") { 179 + let id_str = doc.id.as_ref(); 180 + if !id_str.ends_with(".defs") { 181 + eprintln!( 182 + "⚠️ Warning: lexicon {} has no 'main' def - consider naming it {}.defs", 183 + id_str, id_str 184 + ); 185 + if self.options.verbose { 186 + eprintln!( 187 + " Lexicons without a primary type should use the .defs suffix (e.g., app.bsky.actor.defs)" 188 + ); 189 + } 190 + } 191 + } 192 + 193 + // Validate NSID format 194 + if !is_valid_nsid(&doc.id) { 195 + return Err(miette::miette!("invalid NSID format: {}", doc.id)); 196 + } 197 + 198 + Ok(()) 199 + } 200 + 201 + /// Watch mode - regenerate on file changes 202 + pub fn watch(&self) -> Result<()> { 203 + println!("Watch mode not yet implemented"); 204 + println!("Run with --help to see available options"); 205 + Ok(()) 206 + } 207 + } 208 + 209 + /// Validate NSID format: domain.name.record 210 + fn is_valid_nsid(nsid: &str) -> bool { 211 + let parts: Vec<&str> = nsid.split('.').collect(); 212 + 213 + // Must have at least 3 parts 214 + if parts.len() < 3 { 215 + return false; 216 + } 217 + 218 + // Each part must be valid 219 + for part in parts { 220 + if part.is_empty() { 221 + return false; 222 + } 223 + 224 + // Must be alphanumeric, hyphens, or underscores 225 + if !part 226 + .chars() 227 + .all(|c| c.is_alphanumeric() || c == '-' || c == '_') 228 + { 229 + return false; 230 + } 231 + } 232 + 233 + true 234 + } 235 + 236 + #[cfg(test)] 237 + mod tests { 238 + use super::*; 239 + 240 + #[test] 241 + fn test_is_valid_nsid() { 242 + assert!(is_valid_nsid("com.example.test")); 243 + assert!(is_valid_nsid("app.bsky.feed.post")); 244 + assert!(is_valid_nsid("com.example.with_underscore")); 245 + assert!(is_valid_nsid("com.example.with-hyphen")); 246 + 247 + assert!(!is_valid_nsid("com.example")); // Too short 248 + assert!(!is_valid_nsid("com")); // Too short 249 + assert!(!is_valid_nsid("com.example.invalid!")); // Invalid char 250 + assert!(!is_valid_nsid("com..example")); // Empty segment 251 + } 252 + 253 + #[test] 254 + fn test_group_by_base_nsid() { 255 + let refs = vec![ 256 + LexiconSchemaRef { 257 + nsid: "com.example.test", 258 + provider: || todo!(), 259 + }, 260 + LexiconSchemaRef { 261 + nsid: "com.example.test#fragment", 262 + provider: || todo!(), 263 + }, 264 + LexiconSchemaRef { 265 + nsid: "com.example.other", 266 + provider: || todo!(), 267 + }, 268 + ]; 269 + 270 + let ref_ptrs: Vec<&LexiconSchemaRef> = refs.iter().collect(); 271 + 272 + let extractor = SchemaExtractor::new(ExtractOptions { 273 + output_dir: PathBuf::from("test"), 274 + verbose: false, 275 + filter: None, 276 + validate: false, 277 + pretty: true, 278 + }); 279 + 280 + let grouped = extractor.group_by_base_nsid(&ref_ptrs).unwrap(); 281 + 282 + assert_eq!(grouped.len(), 2); 283 + assert!(grouped.contains_key("com.example.test")); 284 + assert!(grouped.contains_key("com.example.other")); 285 + assert_eq!(grouped["com.example.test"].len(), 2); 286 + assert_eq!(grouped["com.example.other"].len(), 1); 287 + } 288 + }
+31
crates/jacquard-lexgen/src/test_schemas.rs
··· 1 + // Test schemas for verifying extraction works 2 + // These are only compiled in tests/dev builds 3 + 4 + use jacquard_common::CowStr; 5 + use jacquard_derive::LexiconSchema; 6 + 7 + #[derive(LexiconSchema)] 8 + #[lexicon(nsid = "com.example.testRecord", record, key = "tid")] 9 + pub struct TestRecord<'a> { 10 + #[lexicon(max_length = 100)] 11 + pub text: CowStr<'a>, 12 + pub count: i64, 13 + } 14 + 15 + #[derive(LexiconSchema)] 16 + #[lexicon(nsid = "com.example.testRecord#fragment")] 17 + pub struct TestFragment { 18 + pub field: i64, 19 + } 20 + 21 + #[derive(LexiconSchema)] 22 + #[lexicon(nsid = "com.example.testDefs.defs#defOne")] 23 + pub struct DefOne { 24 + pub value: String, 25 + } 26 + 27 + #[derive(LexiconSchema)] 28 + #[lexicon(nsid = "com.example.testDefs.defs#defTwo")] 29 + pub struct DefTwo { 30 + pub number: i64, 31 + }
+82
crates/jacquard-lexgen/tests/schema_extraction.rs
··· 1 + use jacquard_lexgen::schema_extraction::{ExtractOptions, SchemaExtractor}; 2 + use tempfile::TempDir; 3 + 4 + #[test] 5 + fn test_extract_all_creates_output_dir() { 6 + let temp_dir = TempDir::new().unwrap(); 7 + 8 + let options = ExtractOptions { 9 + output_dir: temp_dir.path().to_path_buf(), 10 + verbose: false, 11 + filter: None, 12 + validate: true, 13 + pretty: true, 14 + }; 15 + 16 + let extractor = SchemaExtractor::new(options); 17 + 18 + // This will discover any schemas registered via inventory in the binary 19 + // In a minimal test environment, this might be 0 20 + let result = extractor.extract_all(); 21 + 22 + // Should succeed even if no schemas found 23 + assert!(result.is_ok()); 24 + 25 + // Directory should exist 26 + assert!(temp_dir.path().exists()); 27 + } 28 + 29 + #[test] 30 + fn test_extract_with_filter() { 31 + let temp_dir = TempDir::new().unwrap(); 32 + 33 + let options = ExtractOptions { 34 + output_dir: temp_dir.path().to_path_buf(), 35 + verbose: false, 36 + filter: Some("com.example.nonexistent".into()), 37 + validate: true, 38 + pretty: true, 39 + }; 40 + 41 + let extractor = SchemaExtractor::new(options); 42 + let result = extractor.extract_all(); 43 + 44 + // Should succeed (just won't write any files) 45 + assert!(result.is_ok()); 46 + } 47 + 48 + #[test] 49 + fn test_extract_with_verbose() { 50 + let temp_dir = TempDir::new().unwrap(); 51 + 52 + let options = ExtractOptions { 53 + output_dir: temp_dir.path().to_path_buf(), 54 + verbose: true, 55 + filter: None, 56 + validate: true, 57 + pretty: true, 58 + }; 59 + 60 + let extractor = SchemaExtractor::new(options); 61 + let result = extractor.extract_all(); 62 + 63 + assert!(result.is_ok()); 64 + } 65 + 66 + #[test] 67 + fn test_extract_compact_json() { 68 + let temp_dir = TempDir::new().unwrap(); 69 + 70 + let options = ExtractOptions { 71 + output_dir: temp_dir.path().to_path_buf(), 72 + verbose: false, 73 + filter: None, 74 + validate: true, 75 + pretty: false, // Compact JSON 76 + }; 77 + 78 + let extractor = SchemaExtractor::new(options); 79 + let result = extractor.extract_all(); 80 + 81 + assert!(result.is_ok()); 82 + }
+3 -8
crates/jacquard-lexicon/src/derive_impl/lexicon_schema.rs
··· 1 1 //! Implementation of #[derive(LexiconSchema)] macro 2 2 3 - use crate::lexicon::{ 4 - LexArray, LexBlob, LexBoolean, LexBytes, LexCidLink, LexInteger, LexObject, LexObjectProperty, 5 - LexRef, LexRefUnion, LexString, LexStringFormat, LexUnknown, LexUserType, 6 - }; 7 3 use crate::schema::type_mapping::{LexiconPrimitiveType, StringFormat, rust_type_to_lexicon_type}; 8 4 use heck::{ToKebabCase, ToLowerCamelCase, ToPascalCase, ToShoutySnakeCase, ToSnakeCase}; 9 - use jacquard_common::smol_str::{SmolStr, ToSmolStr}; 10 5 use proc_macro2::TokenStream; 11 - use quote::{ToTokens, quote}; 6 + use quote::quote; 12 7 use syn::{Attribute, Data, DeriveInput, Fields, Ident, LitStr, Type, parse2}; 13 8 14 9 /// Implementation for the LexiconSchema derive macro ··· 422 417 nsid: #nsid, 423 418 provider: || { 424 419 let mut generator = ::jacquard_lexicon::schema::LexiconGenerator::new(#nsid); 425 - #name::lexicon_doc(&mut generator) 420 + <#name as ::jacquard_lexicon::schema::LexiconSchema>::lexicon_doc(&mut generator) 426 421 }, 427 422 } 428 423 } ··· 1202 1197 nsid: #nsid, 1203 1198 provider: || { 1204 1199 let mut generator = ::jacquard_lexicon::schema::LexiconGenerator::new(#nsid); 1205 - #name::lexicon_doc(&mut generator) 1200 + <#name as ::jacquard_lexicon::schema::LexiconSchema>::lexicon_doc(&mut generator) 1206 1201 }, 1207 1202 } 1208 1203 }