A better Rust ATProto crate

more cleanup

Orual cc815abb 117b66ad

Changed files
+247 -641
crates
+1 -1
crates/jacquard-api/Cargo.toml
··· 19 19 bytes = { workspace = true, features = ["serde"] } 20 20 jacquard-common = { version = "0.8", path = "../jacquard-common" } 21 21 jacquard-derive = { version = "0.8", path = "../jacquard-derive" } 22 - jacquard-lexicon = { version = "0.8", path = "../jacquard-lexicon" } 22 + jacquard-lexicon = { version = "0.8", path = "../jacquard-lexicon", default-features = false } 23 23 miette.workspace = true 24 24 serde.workspace = true 25 25 serde_ipld_dagcbor.workspace = true
+1 -1
crates/jacquard-derive/Cargo.toml
··· 16 16 17 17 [dependencies] 18 18 heck.workspace = true 19 - jacquard-lexicon = { version = "0.8", path = "../jacquard-lexicon" } 19 + jacquard-lexicon = { version = "0.8", path = "../jacquard-lexicon", features = ["codegen"] } 20 20 proc-macro2.workspace = true 21 21 quote.workspace = true 22 22 syn.workspace = true
+9 -5
crates/jacquard-lexicon/Cargo.toml
··· 11 11 exclude.workspace = true 12 12 license.workspace = true 13 13 14 + [features] 15 + default = ["codegen"] 16 + codegen = ["dep:prettyplease", "dep:syn", "dep:quote", "dep:proc-macro2", "dep:heck"] 17 + 14 18 [dependencies] 15 19 cid.workspace = true 16 20 dashmap.workspace = true 17 - heck.workspace = true 21 + heck = { workspace = true, optional = true } 18 22 inventory = "0.3" 19 23 jacquard-common = { version = "0.8", path = "../jacquard-common" } 20 24 miette = { workspace = true } 21 25 multihash.workspace = true 22 - prettyplease.workspace = true 23 - proc-macro2.workspace = true 24 - quote.workspace = true 26 + prettyplease = { workspace = true, optional = true } 27 + proc-macro2 = { workspace = true, optional = true } 28 + quote = { workspace = true, optional = true } 25 29 serde.workspace = true 26 30 serde_ipld_dagcbor.workspace = true 27 31 serde_json.workspace = true 28 32 serde_repr.workspace = true 29 33 serde_with.workspace = true 30 34 sha2.workspace = true 31 - syn.workspace = true 35 + syn = { workspace = true, optional = true } 32 36 thiserror.workspace = true 33 37 unicode-segmentation = "1.12" 34 38
+28 -30
crates/jacquard-lexicon/src/codegen.rs
··· 4 4 use proc_macro2::TokenStream; 5 5 use quote::quote; 6 6 7 - pub mod builder_heuristics; 8 - pub mod lifetime; 9 - pub mod names; 10 - pub mod nsid_utils; 11 - pub mod output; 12 - pub mod schema_impl; 13 - pub mod structs; 14 - pub mod types; 15 - pub mod union_codegen; 16 - pub mod utils; 17 - pub mod xrpc; 7 + pub(crate) mod builder_heuristics; 8 + pub(crate) mod lifetime; 9 + pub(crate) mod names; 10 + pub(crate) mod nsid_utils; 11 + pub(crate) mod output; 12 + pub(crate) mod schema_impl; 13 + pub(crate) mod structs; 14 + pub(crate) mod types; 15 + pub(crate) mod union_codegen; 16 + pub(crate) mod utils; 17 + pub(crate) mod xrpc; 18 18 19 19 /// Code generator for lexicon types 20 20 pub struct CodeGenerator<'c> { ··· 66 66 let shared_fn = if !generated.contains(nsid) { 67 67 generated.insert(nsid.to_string()); 68 68 // Codegen from JSON doesn't have union_fields (those are for Rust -> lexicon derive) 69 - let doc_literal = crate::derive_impl::doc_to_tokens::doc_to_tokens(lex_doc, &std::collections::BTreeMap::new()); 69 + let doc_literal = crate::derive_impl::doc_to_tokens::doc_to_tokens( 70 + lex_doc, 71 + &std::collections::BTreeMap::new(), 72 + ); 70 73 Some(quote! { 71 74 fn #shared_fn_ident() -> ::jacquard_lexicon::lexicon::LexiconDoc<'static> { 72 75 #doc_literal ··· 86 89 87 90 // Extract validation checks for this specific def 88 91 let validation_checks = schema_impl::extract_validation_checks(lex_doc, def_name); 89 - let validation_code = crate::derive_impl::doc_to_tokens::validations_to_tokens(&validation_checks); 92 + let validation_code = 93 + crate::derive_impl::doc_to_tokens::validations_to_tokens(&validation_checks); 90 94 91 95 let trait_impl = quote! { 92 96 impl #impl_generics ::jacquard_lexicon::schema::LexiconSchema for #type_ident #type_generics { ··· 514 518 LexiconCorpus::load_from_dir("tests/fixtures/test_lexicons").expect("load corpus"); 515 519 let codegen = CodeGenerator::new(&corpus, "jacquard_api"); 516 520 517 - let doc = corpus 518 - .get("app.bsky.embed.images") 519 - .expect("get images"); 521 + let doc = corpus.get("app.bsky.embed.images").expect("get images"); 520 522 let def = doc.defs.get("viewImage").expect("get viewImage def"); 521 523 522 524 let tokens = codegen ··· 539 541 LexiconCorpus::load_from_dir("tests/fixtures/test_lexicons").expect("load corpus"); 540 542 let codegen = CodeGenerator::new(&corpus, "jacquard_api"); 541 543 542 - let doc = corpus 543 - .get("test.array.types") 544 - .expect("get array types"); 544 + let doc = corpus.get("test.array.types").expect("get array types"); 545 545 let def = doc.defs.get("main").expect("get main def"); 546 546 547 547 let tokens = codegen ··· 569 569 LexiconCorpus::load_from_dir("tests/fixtures/test_lexicons").expect("load corpus"); 570 570 let codegen = CodeGenerator::new(&corpus, "jacquard_api"); 571 571 572 - let doc = corpus 573 - .get("test.binary.types") 574 - .expect("get binary types"); 572 + let doc = corpus.get("test.binary.types").expect("get binary types"); 575 573 let def = doc.defs.get("main").expect("get main def"); 576 574 577 575 let tokens = codegen ··· 597 595 LexiconCorpus::load_from_dir("tests/fixtures/test_lexicons").expect("load corpus"); 598 596 let codegen = CodeGenerator::new(&corpus, "jacquard_api"); 599 597 600 - let doc = corpus 601 - .get("test.empty.object") 602 - .expect("get empty object"); 598 + let doc = corpus.get("test.empty.object").expect("get empty object"); 603 599 let def = doc.defs.get("emptyDef").expect("get emptyDef"); 604 600 605 601 let tokens = codegen ··· 655 651 let vote_file: syn::File = syn::parse2(vote_tokens).expect("parse vote tokens"); 656 652 let vote_formatted = prettyplease::unparse(&vote_file); 657 653 println!("\nVote:\n{}\n", vote_formatted); 658 - assert!(vote_formatted.contains("struct DefinitionVote") || vote_formatted.contains("struct Vote")); 654 + assert!( 655 + vote_formatted.contains("struct DefinitionVote") 656 + || vote_formatted.contains("struct Vote") 657 + ); 659 658 assert!(vote_formatted.contains("pub poll_ref")); 660 659 assert!(vote_formatted.contains("pub option_index")); 661 660 } ··· 713 712 // Local ref #option should resolve to DefinitionOption type (fully qualified or local) 714 713 assert!( 715 714 formatted.contains("Vec<DefinitionOption") 716 - || formatted.contains("Vec<jacquard_api::pub_leaflet::poll::definition::DefinitionOption") 715 + || formatted 716 + .contains("Vec<jacquard_api::pub_leaflet::poll::definition::DefinitionOption") 717 717 ); 718 718 } 719 719 ··· 723 723 LexiconCorpus::load_from_dir("tests/fixtures/test_lexicons").expect("load corpus"); 724 724 let codegen = CodeGenerator::new(&corpus, "jacquard_api"); 725 725 726 - let doc = corpus 727 - .get("test.binary.types") 728 - .expect("get binary types"); 726 + let doc = corpus.get("test.binary.types").expect("get binary types"); 729 727 let def = doc.defs.get("main").expect("get main def"); 730 728 731 729 let tokens = codegen
+3 -187
crates/jacquard-lexicon/src/codegen/nsid_utils.rs
··· 1 1 //! Utilities for parsing and working with NSIDs and refs 2 - 3 - /// Parsed NSID components for easier manipulation 4 - #[derive(Debug, Clone, PartialEq, Eq)] 5 - pub struct NsidPath<'a> { 6 - nsid: &'a str, 7 - segments: Vec<&'a str>, 8 - } 9 - 10 - impl<'a> NsidPath<'a> { 11 - /// Parse an NSID into its component segments 12 - pub fn parse(nsid: &'a str) -> Self { 13 - let segments: Vec<&str> = nsid.split('.').collect(); 14 - Self { nsid, segments } 15 - } 16 - 17 - /// Get the namespace (first two segments joined with '.') 18 - /// Returns "com.atproto" from "com.atproto.repo.strongRef" 19 - pub fn namespace(&self) -> String { 20 - if self.segments.len() >= 2 { 21 - format!("{}.{}", self.segments[0], self.segments[1]) 22 - } else { 23 - self.nsid.to_string() 24 - } 25 - } 26 - 27 - /// Get the last segment of the NSID 28 - pub fn last_segment(&self) -> &str { 29 - self.segments.last().copied().unwrap_or(self.nsid) 30 - } 31 - 32 - /// Get all segments except the last 33 - pub fn parent_segments(&self) -> &[&str] { 34 - if self.segments.is_empty() { 35 - &[] 36 - } else { 37 - &self.segments[..self.segments.len() - 1] 38 - } 39 - } 40 - 41 - /// Check if this is a "defs" NSID (ends with "defs") 42 - pub fn is_defs(&self) -> bool { 43 - self.last_segment() == "defs" 44 - } 45 - 46 - /// Get all segments 47 - pub fn segments(&self) -> &[&str] { 48 - &self.segments 49 - } 50 - 51 - /// Get the original NSID string 52 - pub fn as_str(&self) -> &str { 53 - self.nsid 54 - } 55 - 56 - /// Get number of segments 57 - pub fn len(&self) -> usize { 58 - self.segments.len() 59 - } 60 - 61 - /// Check if empty (should not happen with valid NSIDs) 62 - pub fn is_empty(&self) -> bool { 63 - self.segments.is_empty() 64 - } 65 - } 66 - 67 - /// Parsed reference with NSID and optional fragment 68 - #[derive(Debug, Clone, PartialEq, Eq)] 69 - pub struct RefPath<'a> { 70 - nsid: &'a str, 71 - def: &'a str, 72 - } 73 - 74 - impl<'a> RefPath<'a> { 75 - /// Parse a reference string, normalizing it based on current NSID context 76 - pub fn parse(ref_str: &'a str, current_nsid: Option<&'a str>) -> Self { 77 - if let Some(fragment) = ref_str.strip_prefix('#') { 78 - // Local ref: #option → use current_nsid 79 - let nsid = current_nsid.unwrap_or(""); 80 - Self { 81 - nsid, 82 - def: fragment, 83 - } 84 - } else if let Some((nsid, def)) = ref_str.split_once('#') { 85 - // Full ref with fragment: nsid#def 86 - Self { nsid, def } 87 - } else { 88 - // Full ref without fragment: nsid (implicit "main") 89 - Self { 90 - nsid: ref_str, 91 - def: "main", 92 - } 93 - } 94 - } 95 - 96 - /// Get the NSID portion of the ref 97 - pub fn nsid(&self) -> &str { 98 - self.nsid 99 - } 100 - 101 - /// Get the def name (fragment) portion of the ref 102 - pub fn def(&self) -> &str { 103 - self.def 104 - } 105 - 106 - /// Check if this is a local ref (was parsed from #fragment) 107 - pub fn is_local(&self, current_nsid: &str) -> bool { 108 - self.nsid == current_nsid && self.def != "main" 109 - } 110 - 111 - /// Get the full ref string (nsid#def) 112 - pub fn full_ref(&self) -> String { 113 - if self.def == "main" { 114 - self.nsid.to_string() 115 - } else { 116 - format!("{}#{}", self.nsid, self.def) 117 - } 118 - } 119 - 120 - /// Normalize a local ref by prepending the current NSID if needed 121 - /// Returns the normalized ref string suitable for corpus lookup 122 - pub fn normalize(ref_str: &str, current_nsid: &str) -> String { 123 - if ref_str.starts_with('#') { 124 - format!("{}{}", current_nsid, ref_str) 125 - } else { 126 - ref_str.to_string() 127 - } 128 - } 129 - } 130 - 131 - #[cfg(test)] 132 - mod tests { 133 - use super::*; 2 + //! 3 + //! Re-exports core ref parsing utilities from `crate::ref_utils`. 134 4 135 - #[test] 136 - fn test_nsid_path_parse() { 137 - let path = NsidPath::parse("com.atproto.repo.strongRef"); 138 - assert_eq!(path.segments(), &["com", "atproto", "repo", "strongRef"]); 139 - assert_eq!(path.namespace(), "com.atproto"); 140 - assert_eq!(path.last_segment(), "strongRef"); 141 - assert_eq!(path.parent_segments(), &["com", "atproto", "repo"]); 142 - assert!(!path.is_defs()); 143 - } 144 - 145 - #[test] 146 - fn test_nsid_path_defs() { 147 - let path = NsidPath::parse("com.atproto.label.defs"); 148 - assert!(path.is_defs()); 149 - assert_eq!(path.last_segment(), "defs"); 150 - } 151 - 152 - #[test] 153 - fn test_ref_path_local() { 154 - let ref_path = RefPath::parse("#option", Some("com.example.foo")); 155 - assert_eq!(ref_path.nsid(), "com.example.foo"); 156 - assert_eq!(ref_path.def(), "option"); 157 - assert!(ref_path.is_local("com.example.foo")); 158 - assert_eq!(ref_path.full_ref(), "com.example.foo#option"); 159 - } 160 - 161 - #[test] 162 - fn test_ref_path_with_fragment() { 163 - let ref_path = RefPath::parse("com.example.foo#bar", None); 164 - assert_eq!(ref_path.nsid(), "com.example.foo"); 165 - assert_eq!(ref_path.def(), "bar"); 166 - assert!(!ref_path.is_local("com.other.baz")); 167 - assert_eq!(ref_path.full_ref(), "com.example.foo#bar"); 168 - } 169 - 170 - #[test] 171 - fn test_ref_path_implicit_main() { 172 - let ref_path = RefPath::parse("com.example.foo", None); 173 - assert_eq!(ref_path.nsid(), "com.example.foo"); 174 - assert_eq!(ref_path.def(), "main"); 175 - assert_eq!(ref_path.full_ref(), "com.example.foo"); 176 - } 177 - 178 - #[test] 179 - fn test_ref_path_normalize() { 180 - assert_eq!( 181 - RefPath::normalize("#option", "com.example.foo"), 182 - "com.example.foo#option" 183 - ); 184 - assert_eq!( 185 - RefPath::normalize("com.other.bar#baz", "com.example.foo"), 186 - "com.other.bar#baz" 187 - ); 188 - } 189 - } 5 + pub use crate::ref_utils::{NsidPath, RefPath};
+5 -77
crates/jacquard-lexicon/src/codegen/schema_impl.rs
··· 1 1 //! Generate LexiconSchema trait implementations for generated types 2 2 3 - use crate::derive_impl::doc_to_tokens; 4 3 use crate::lexicon::{ 5 - LexInteger, LexObject, LexObjectProperty, LexRecordRecord, LexString, 6 - LexUserType, LexiconDoc, 4 + LexInteger, LexObject, LexObjectProperty, LexRecordRecord, LexString, LexUserType, LexiconDoc, 7 5 }; 8 6 use crate::schema::from_ast::{ConstraintCheck, ValidationCheck}; 9 - use proc_macro2::TokenStream; 10 - use quote::quote; 11 - 12 - /// Generate LexiconSchema impl for a generated type 13 - /// 14 - /// Takes the original lexicon doc and type metadata to generate a complete 15 - /// impl with const literal and validation code. 16 - pub fn generate_schema_impl( 17 - type_name: &str, 18 - doc: &LexiconDoc, 19 - def_name: &str, 20 - has_lifetime: bool, 21 - ) -> TokenStream { 22 - let nsid = doc.id.as_ref(); 23 - 24 - // Generate lifetime parameter 25 - let (impl_generics, type_generics) = if has_lifetime { 26 - (quote! { <'a> }, quote! { <'a> }) 27 - } else { 28 - (quote! {}, quote! {}) 29 - }; 30 - 31 - // Generate the lexicon doc literal using existing doc_to_tokens 32 - // Codegen from JSON doesn't have union_fields (those are for Rust -> lexicon derive) 33 - let doc_literal = doc_to_tokens::doc_to_tokens(doc, &std::collections::BTreeMap::new()); 34 - 35 - // Extract validation checks from lexicon doc for the specific def 36 - let validation_checks = extract_validation_checks(doc, def_name); 37 - 38 - // Generate validation code using existing validations_to_tokens 39 - let validation_code = doc_to_tokens::validations_to_tokens(&validation_checks); 40 - 41 - let type_ident = syn::Ident::new(type_name, proc_macro2::Span::call_site()); 42 - 43 - quote! { 44 - impl #impl_generics ::jacquard_lexicon::schema::LexiconSchema for #type_ident #type_generics { 45 - fn nsid() -> &'static str { 46 - #nsid 47 - } 48 - 49 - fn def_name() -> &'static str { 50 - #def_name 51 - } 52 - 53 - fn lexicon_doc() -> ::jacquard_lexicon::lexicon::LexiconDoc<'static> { 54 - #doc_literal 55 - } 56 - 57 - fn validate(&self) -> ::std::result::Result<(), ::jacquard_lexicon::validation::ConstraintError> { 58 - #validation_code 59 - } 60 - } 61 - } 62 - } 63 7 64 8 /// Extract validation checks from a LexiconDoc 65 9 /// ··· 71 15 // Get the specified def 72 16 if let Some(def) = doc.defs.get(def_name) { 73 17 match def { 74 - LexUserType::Record(rec) => { 75 - match &rec.record { 76 - LexRecordRecord::Object(obj) => { 77 - checks.extend(extract_object_validations(obj)); 78 - } 18 + LexUserType::Record(rec) => match &rec.record { 19 + LexRecordRecord::Object(obj) => { 20 + checks.extend(extract_object_validations(obj)); 79 21 } 80 - } 22 + }, 81 23 LexUserType::Object(obj) => { 82 24 checks.extend(extract_object_validations(obj)); 83 25 } ··· 270 212 use heck::ToSnakeCase; 271 213 schema_name.to_snake_case() 272 214 } 273 - 274 - #[cfg(test)] 275 - mod tests { 276 - use super::*; 277 - 278 - #[test] 279 - fn test_field_name_from_schema() { 280 - assert_eq!(field_name_from_schema("createdAt"), "created_at"); 281 - assert_eq!(field_name_from_schema("maxLength"), "max_length"); 282 - assert_eq!(field_name_from_schema("text"), "text"); 283 - assert_eq!(field_name_from_schema("ref"), "ref"); // r# added by make_ident later 284 - assert_eq!(field_name_from_schema("type"), "type"); // r# added by make_ident later 285 - } 286 - }
+1 -1
crates/jacquard-lexicon/src/corpus.rs
··· 1 - use crate::codegen::nsid_utils::RefPath; 1 + use crate::ref_utils::RefPath; 2 2 use crate::error::Result; 3 3 use crate::lexicon::{LexUserType, LexiconDoc}; 4 4 use jacquard_common::{into_static::IntoStatic, smol_str::SmolStr};
+7 -1
crates/jacquard-lexicon/src/lib.rs
··· 15 15 //! - [`derive_impl`] - Implementation functions for derive macros (used by jacquard-derive) 16 16 //! - [`validation`] - Runtime validation of Data against lexicon schemas 17 17 18 + #[cfg(feature = "codegen")] 18 19 pub mod codegen; 20 + #[cfg(feature = "codegen")] 19 21 pub mod corpus; 22 + #[cfg(feature = "codegen")] 23 + #[doc(hidden)] 20 24 pub mod derive_impl; 25 + #[cfg(feature = "codegen")] 21 26 pub mod error; 27 + #[cfg(feature = "codegen")] 22 28 pub mod fs; 23 29 pub mod lexicon; 30 + pub mod ref_utils; 24 31 pub mod schema; 25 - pub mod union_registry; 26 32 pub mod validation;
+189
crates/jacquard-lexicon/src/ref_utils.rs
··· 1 + //! Utilities for parsing and working with NSIDs and refs 2 + 3 + /// Parsed NSID components for easier manipulation 4 + #[derive(Debug, Clone, PartialEq, Eq)] 5 + pub struct NsidPath<'a> { 6 + nsid: &'a str, 7 + segments: Vec<&'a str>, 8 + } 9 + 10 + impl<'a> NsidPath<'a> { 11 + /// Parse an NSID into its component segments 12 + pub fn parse(nsid: &'a str) -> Self { 13 + let segments: Vec<&str> = nsid.split('.').collect(); 14 + Self { nsid, segments } 15 + } 16 + 17 + /// Get the namespace (first two segments joined with '.') 18 + /// Returns "com.atproto" from "com.atproto.repo.strongRef" 19 + pub fn namespace(&self) -> String { 20 + if self.segments.len() >= 2 { 21 + format!("{}.{}", self.segments[0], self.segments[1]) 22 + } else { 23 + self.nsid.to_string() 24 + } 25 + } 26 + 27 + /// Get the last segment of the NSID 28 + pub fn last_segment(&self) -> &str { 29 + self.segments.last().copied().unwrap_or(self.nsid) 30 + } 31 + 32 + /// Get all segments except the last 33 + pub fn parent_segments(&self) -> &[&str] { 34 + if self.segments.is_empty() { 35 + &[] 36 + } else { 37 + &self.segments[..self.segments.len() - 1] 38 + } 39 + } 40 + 41 + /// Check if this is a "defs" NSID (ends with "defs") 42 + pub fn is_defs(&self) -> bool { 43 + self.last_segment() == "defs" 44 + } 45 + 46 + /// Get all segments 47 + pub fn segments(&self) -> &[&str] { 48 + &self.segments 49 + } 50 + 51 + /// Get the original NSID string 52 + pub fn as_str(&self) -> &str { 53 + self.nsid 54 + } 55 + 56 + /// Get number of segments 57 + pub fn len(&self) -> usize { 58 + self.segments.len() 59 + } 60 + 61 + /// Check if empty (should not happen with valid NSIDs) 62 + pub fn is_empty(&self) -> bool { 63 + self.segments.is_empty() 64 + } 65 + } 66 + 67 + /// Parsed reference with NSID and optional fragment 68 + #[derive(Debug, Clone, PartialEq, Eq)] 69 + pub struct RefPath<'a> { 70 + nsid: &'a str, 71 + def: &'a str, 72 + } 73 + 74 + impl<'a> RefPath<'a> { 75 + /// Parse a reference string, normalizing it based on current NSID context 76 + pub fn parse(ref_str: &'a str, current_nsid: Option<&'a str>) -> Self { 77 + if let Some(fragment) = ref_str.strip_prefix('#') { 78 + // Local ref: #option → use current_nsid 79 + let nsid = current_nsid.unwrap_or(""); 80 + Self { 81 + nsid, 82 + def: fragment, 83 + } 84 + } else if let Some((nsid, def)) = ref_str.split_once('#') { 85 + // Full ref with fragment: nsid#def 86 + Self { nsid, def } 87 + } else { 88 + // Full ref without fragment: nsid (implicit "main") 89 + Self { 90 + nsid: ref_str, 91 + def: "main", 92 + } 93 + } 94 + } 95 + 96 + /// Get the NSID portion of the ref 97 + pub fn nsid(&self) -> &str { 98 + self.nsid 99 + } 100 + 101 + /// Get the def name (fragment) portion of the ref 102 + pub fn def(&self) -> &str { 103 + self.def 104 + } 105 + 106 + /// Check if this is a local ref (was parsed from #fragment) 107 + pub fn is_local(&self, current_nsid: &str) -> bool { 108 + self.nsid == current_nsid && self.def != "main" 109 + } 110 + 111 + /// Get the full ref string (nsid#def) 112 + pub fn full_ref(&self) -> String { 113 + if self.def == "main" { 114 + self.nsid.to_string() 115 + } else { 116 + format!("{}#{}", self.nsid, self.def) 117 + } 118 + } 119 + 120 + /// Normalize a local ref by prepending the current NSID if needed 121 + /// Returns the normalized ref string suitable for corpus lookup 122 + pub fn normalize(ref_str: &str, current_nsid: &str) -> String { 123 + if ref_str.starts_with('#') { 124 + format!("{}{}", current_nsid, ref_str) 125 + } else { 126 + ref_str.to_string() 127 + } 128 + } 129 + } 130 + 131 + #[cfg(test)] 132 + mod tests { 133 + use super::*; 134 + 135 + #[test] 136 + fn test_nsid_path_parse() { 137 + let path = NsidPath::parse("com.atproto.repo.strongRef"); 138 + assert_eq!(path.segments(), &["com", "atproto", "repo", "strongRef"]); 139 + assert_eq!(path.namespace(), "com.atproto"); 140 + assert_eq!(path.last_segment(), "strongRef"); 141 + assert_eq!(path.parent_segments(), &["com", "atproto", "repo"]); 142 + assert!(!path.is_defs()); 143 + } 144 + 145 + #[test] 146 + fn test_nsid_path_defs() { 147 + let path = NsidPath::parse("com.atproto.label.defs"); 148 + assert!(path.is_defs()); 149 + assert_eq!(path.last_segment(), "defs"); 150 + } 151 + 152 + #[test] 153 + fn test_ref_path_local() { 154 + let ref_path = RefPath::parse("#option", Some("com.example.foo")); 155 + assert_eq!(ref_path.nsid(), "com.example.foo"); 156 + assert_eq!(ref_path.def(), "option"); 157 + assert!(ref_path.is_local("com.example.foo")); 158 + assert_eq!(ref_path.full_ref(), "com.example.foo#option"); 159 + } 160 + 161 + #[test] 162 + fn test_ref_path_with_fragment() { 163 + let ref_path = RefPath::parse("com.example.foo#bar", None); 164 + assert_eq!(ref_path.nsid(), "com.example.foo"); 165 + assert_eq!(ref_path.def(), "bar"); 166 + assert!(!ref_path.is_local("com.other.baz")); 167 + assert_eq!(ref_path.full_ref(), "com.example.foo#bar"); 168 + } 169 + 170 + #[test] 171 + fn test_ref_path_implicit_main() { 172 + let ref_path = RefPath::parse("com.example.foo", None); 173 + assert_eq!(ref_path.nsid(), "com.example.foo"); 174 + assert_eq!(ref_path.def(), "main"); 175 + assert_eq!(ref_path.full_ref(), "com.example.foo"); 176 + } 177 + 178 + #[test] 179 + fn test_ref_path_normalize() { 180 + assert_eq!( 181 + RefPath::normalize("#option", "com.example.foo"), 182 + "com.example.foo#option" 183 + ); 184 + assert_eq!( 185 + RefPath::normalize("com.other.bar#baz", "com.example.foo"), 186 + "com.other.bar#baz" 187 + ); 188 + } 189 + }
+2
crates/jacquard-lexicon/src/schema.rs
··· 74 74 //! - **Validation**: Runtime constraint checking via `validate()` method 75 75 76 76 pub mod builder; 77 + #[cfg(feature = "codegen")] 77 78 pub mod from_ast; 79 + #[cfg(feature = "codegen")] 78 80 pub mod type_mapping; 79 81 80 82 use crate::lexicon::LexiconDoc;
-337
crates/jacquard-lexicon/src/union_registry.rs
··· 1 - use crate::corpus::LexiconCorpus; 2 - use crate::lexicon::{ 3 - LexArrayItem, LexObjectProperty, LexUserType, LexXrpcBodySchema, 4 - LexXrpcSubscriptionMessageSchema, 5 - }; 6 - use jacquard_common::smol_str::{SmolStr, ToSmolStr}; 7 - use jacquard_common::{CowStr, smol_str}; 8 - use std::collections::{BTreeMap, BTreeSet}; 9 - 10 - /// Information about a single union type found in the corpus 11 - #[derive(Debug, Clone)] 12 - pub struct UnionInfo { 13 - /// NSID of the lexicon containing this union 14 - pub lexicon_nsid: SmolStr, 15 - /// Name of the def containing this union (e.g., "main", "replyRef") 16 - pub def_name: SmolStr, 17 - /// Field path within the def (e.g., "embed", "properties.embed") 18 - pub field_path: CowStr<'static>, 19 - /// Refs that exist in the corpus 20 - pub known_refs: Vec<CowStr<'static>>, 21 - /// Refs that don't exist in the corpus 22 - pub unknown_refs: Vec<CowStr<'static>>, 23 - /// Whether the union is closed (default true if not specified) 24 - pub closed: bool, 25 - } 26 - 27 - impl UnionInfo { 28 - /// Get the source text for this union's lexicon from the corpus 29 - pub fn get_source<'c>(&self, corpus: &'c LexiconCorpus) -> Option<&'c str> { 30 - corpus.get_source(&self.lexicon_nsid) 31 - } 32 - 33 - /// Check if this union has any unknown refs 34 - pub fn has_unknown_refs(&self) -> bool { 35 - !self.unknown_refs.is_empty() 36 - } 37 - 38 - /// Get all refs (known + unknown) 39 - pub fn all_refs(&self) -> impl Iterator<Item = &CowStr<'static>> { 40 - self.known_refs.iter().chain(self.unknown_refs.iter()) 41 - } 42 - } 43 - 44 - /// Registry of all union types found in the corpus 45 - #[derive(Debug, Clone)] 46 - pub struct UnionRegistry { 47 - /// Map from union identifier to union info 48 - /// Key is "{lexicon_nsid}#{def_name}:{field_path}" 49 - unions: BTreeMap<SmolStr, UnionInfo>, 50 - } 51 - 52 - impl UnionRegistry { 53 - /// Create a new empty union registry 54 - pub fn new() -> Self { 55 - Self { 56 - unions: BTreeMap::new(), 57 - } 58 - } 59 - 60 - /// Build a union registry from a corpus 61 - pub fn from_corpus(corpus: &LexiconCorpus) -> Self { 62 - let mut registry = Self::new(); 63 - 64 - for (nsid, doc) in corpus.iter() { 65 - for (def_name, def) in &doc.defs { 66 - registry.collect_unions_from_def(corpus, nsid, def_name, def); 67 - } 68 - } 69 - 70 - registry 71 - } 72 - 73 - /// Collect unions from a single def 74 - fn collect_unions_from_def( 75 - &mut self, 76 - corpus: &LexiconCorpus, 77 - nsid: &SmolStr, 78 - def_name: &SmolStr, 79 - def: &LexUserType<'static>, 80 - ) { 81 - match def { 82 - LexUserType::Record(record) => match &record.record { 83 - crate::lexicon::LexRecordRecord::Object(obj) => { 84 - self.collect_unions_from_object(corpus, nsid, def_name, "", obj); 85 - } 86 - }, 87 - LexUserType::Object(obj) => { 88 - self.collect_unions_from_object(corpus, nsid, def_name, "", obj); 89 - } 90 - LexUserType::XrpcQuery(query) => { 91 - if let Some(output) = &query.output { 92 - if let Some(schema) = &output.schema { 93 - self.collect_unions_from_xrpc_body_schema( 94 - corpus, nsid, def_name, "output", schema, 95 - ); 96 - } 97 - } 98 - } 99 - LexUserType::XrpcProcedure(proc) => { 100 - if let Some(input) = &proc.input { 101 - if let Some(schema) = &input.schema { 102 - self.collect_unions_from_xrpc_body_schema( 103 - corpus, nsid, def_name, "input", schema, 104 - ); 105 - } 106 - } 107 - if let Some(output) = &proc.output { 108 - if let Some(schema) = &output.schema { 109 - self.collect_unions_from_xrpc_body_schema( 110 - corpus, nsid, def_name, "output", schema, 111 - ); 112 - } 113 - } 114 - } 115 - LexUserType::XrpcSubscription(sub) => { 116 - if let Some(message) = &sub.message { 117 - if let Some(schema) = &message.schema { 118 - self.collect_unions_from_subscription_message_schema( 119 - corpus, nsid, def_name, "message", schema, 120 - ); 121 - } 122 - } 123 - } 124 - _ => {} 125 - } 126 - } 127 - 128 - /// Collect unions from an object's properties 129 - fn collect_unions_from_object( 130 - &mut self, 131 - corpus: &LexiconCorpus, 132 - nsid: &SmolStr, 133 - def_name: &SmolStr, 134 - path_prefix: &str, 135 - obj: &crate::lexicon::LexObject<'static>, 136 - ) { 137 - for (prop_name, prop) in &obj.properties { 138 - let prop_path = if path_prefix.is_empty() { 139 - prop_name.to_smolstr() 140 - } else { 141 - smol_str::format_smolstr!("{}.{}", path_prefix, prop_name) 142 - }; 143 - 144 - match prop { 145 - LexObjectProperty::Union(union) => { 146 - self.register_union( 147 - corpus, 148 - nsid, 149 - def_name, 150 - &prop_path, 151 - &union.refs, 152 - union.closed, 153 - ); 154 - } 155 - LexObjectProperty::Array(array) => { 156 - if let LexArrayItem::Union(union) = &array.items { 157 - let array_path = format!("{}[]", prop_path); 158 - self.register_union( 159 - corpus, 160 - nsid, 161 - def_name, 162 - &array_path, 163 - &union.refs, 164 - union.closed, 165 - ); 166 - } 167 - } 168 - LexObjectProperty::Ref(ref_type) => { 169 - // Check if ref points to a union 170 - if let Some((_, ref_def)) = corpus.resolve_ref(ref_type.r#ref.as_ref()) { 171 - if matches!(ref_def, LexUserType::Object(_)) { 172 - // Recursively check the referenced object 173 - // (we'll handle this in a future iteration if needed) 174 - } 175 - } 176 - } 177 - _ => {} 178 - } 179 - } 180 - } 181 - 182 - /// Collect unions from XRPC body schema 183 - fn collect_unions_from_xrpc_body_schema( 184 - &mut self, 185 - corpus: &LexiconCorpus, 186 - nsid: &SmolStr, 187 - def_name: &SmolStr, 188 - path: &str, 189 - schema: &LexXrpcBodySchema<'static>, 190 - ) { 191 - match schema { 192 - LexXrpcBodySchema::Union(union) => { 193 - self.register_union(corpus, nsid, def_name, path, &union.refs, union.closed); 194 - } 195 - LexXrpcBodySchema::Object(obj) => { 196 - self.collect_unions_from_object(corpus, nsid, def_name, path, obj); 197 - } 198 - _ => {} 199 - } 200 - } 201 - 202 - /// Collect unions from subscription message schema 203 - fn collect_unions_from_subscription_message_schema( 204 - &mut self, 205 - corpus: &LexiconCorpus, 206 - nsid: &SmolStr, 207 - def_name: &SmolStr, 208 - path: &str, 209 - schema: &LexXrpcSubscriptionMessageSchema<'static>, 210 - ) { 211 - match schema { 212 - LexXrpcSubscriptionMessageSchema::Union(union) => { 213 - self.register_union(corpus, nsid, def_name, path, &union.refs, union.closed); 214 - } 215 - LexXrpcSubscriptionMessageSchema::Object(obj) => { 216 - self.collect_unions_from_object(corpus, nsid, def_name, path, obj); 217 - } 218 - _ => {} 219 - } 220 - } 221 - 222 - /// Register a union with the registry 223 - fn register_union( 224 - &mut self, 225 - corpus: &LexiconCorpus, 226 - nsid: &SmolStr, 227 - def_name: &SmolStr, 228 - field_path: &str, 229 - refs: &[jacquard_common::CowStr<'static>], 230 - closed: Option<bool>, 231 - ) { 232 - let mut known_refs = Vec::new(); 233 - let mut unknown_refs = Vec::new(); 234 - 235 - for ref_str in refs { 236 - if corpus.ref_exists(&ref_str) { 237 - known_refs.push(ref_str.clone()); 238 - } else { 239 - unknown_refs.push(ref_str.clone()); 240 - } 241 - } 242 - 243 - let key = smol_str::format_smolstr!("{}#{}:{}", nsid, def_name, field_path); 244 - self.unions.insert( 245 - key, 246 - UnionInfo { 247 - lexicon_nsid: nsid.clone(), 248 - def_name: def_name.clone(), 249 - field_path: CowStr::Owned(field_path.to_smolstr()), 250 - known_refs, 251 - unknown_refs, 252 - closed: closed.unwrap_or(true), 253 - }, 254 - ); 255 - } 256 - 257 - /// Get all unions 258 - pub fn iter(&self) -> impl Iterator<Item = (&SmolStr, &UnionInfo)> { 259 - self.unions.iter() 260 - } 261 - 262 - /// Get a specific union 263 - pub fn get(&self, key: &str) -> Option<&UnionInfo> { 264 - self.unions.get(key) 265 - } 266 - 267 - /// Number of unions in registry 268 - pub fn len(&self) -> usize { 269 - self.unions.len() 270 - } 271 - 272 - /// Check if registry is empty 273 - pub fn is_empty(&self) -> bool { 274 - self.unions.is_empty() 275 - } 276 - 277 - /// Get all unique refs across all unions 278 - pub fn all_refs(&self) -> BTreeSet<CowStr<'static>> { 279 - let mut refs = BTreeSet::new(); 280 - for union in self.unions.values() { 281 - refs.extend(union.known_refs.iter().cloned()); 282 - refs.extend(union.unknown_refs.iter().cloned()); 283 - } 284 - refs 285 - } 286 - } 287 - 288 - impl Default for UnionRegistry { 289 - fn default() -> Self { 290 - Self::new() 291 - } 292 - } 293 - 294 - #[cfg(test)] 295 - mod tests { 296 - use super::*; 297 - 298 - #[test] 299 - fn test_union_registry_from_corpus() { 300 - let corpus = LexiconCorpus::load_from_dir("tests/fixtures/test_lexicons") 301 - .expect("failed to load lexicons"); 302 - 303 - let registry = UnionRegistry::from_corpus(&corpus); 304 - 305 - assert!(!registry.is_empty()); 306 - 307 - // Check that we found the embed union in post 308 - let post_embed = registry 309 - .iter() 310 - .find(|(_, info)| { 311 - info.lexicon_nsid == "app.bsky.feed.post" 312 - && info.def_name == "main" 313 - && info.field_path.contains("embed") 314 - }) 315 - .expect("should find post embed union"); 316 - 317 - let info = post_embed.1; 318 - assert!(info.known_refs.contains(&"app.bsky.embed.images".into())); 319 - assert!(info.known_refs.contains(&"app.bsky.embed.video".into())); 320 - assert!(info.known_refs.contains(&"app.bsky.embed.external".into())); 321 - } 322 - 323 - #[test] 324 - fn test_union_registry_tracks_unknown_refs() { 325 - let corpus = LexiconCorpus::load_from_dir("tests/fixtures/test_lexicons") 326 - .expect("failed to load lexicons"); 327 - 328 - let registry = UnionRegistry::from_corpus(&corpus); 329 - 330 - // If there are any unknown refs, they should be tracked 331 - for (_, info) in registry.iter() { 332 - for unknown in &info.unknown_refs { 333 - assert!(!corpus.ref_exists(unknown)); 334 - } 335 - } 336 - } 337 - }
+1 -1
crates/jacquard-lexicon/src/validation.rs
··· 3 3 //! This module provides infrastructure for validating untyped `Data` values against 4 4 //! lexicon schemas, enabling partial deserialization, debugging, and schema migration. 5 5 6 - use crate::codegen::nsid_utils::RefPath; 7 6 use crate::lexicon::{LexArrayItem, LexObjectProperty}; 7 + use crate::ref_utils::RefPath; 8 8 use crate::schema::SchemaRegistry; 9 9 use cid::Cid as IpldCid; 10 10 use dashmap::DashMap;