A better Rust ATProto crate

codegen cleanup part 1

Orual 03932ab0 6f0e2f92

Changed files
+385 -259
crates
+1
crates/jacquard-lexicon/src/codegen.rs
··· 6 6 7 7 pub mod lifetime; 8 8 pub mod names; 9 + pub mod nsid_utils; 9 10 pub mod output; 10 11 pub mod schema_impl; 11 12 pub mod structs;
+91 -56
crates/jacquard-lexicon/src/codegen/lifetime.rs
··· 1 1 use super::CodeGenerator; 2 - use crate::lexicon::{LexArrayItem, LexObjectProperty, LexString, LexStringFormat, LexUserType}; 2 + use crate::lexicon::{ 3 + LexArrayItem, LexObjectProperty, LexPrimitiveArrayItem, LexString, LexStringFormat, 4 + LexUserType, LexXrpcParametersProperty, 5 + }; 6 + 7 + /// Trait for lexicon types that can determine lifetime requirements 8 + trait HasLifetime { 9 + /// Check if this type needs a lifetime parameter when generated 10 + fn needs_lifetime(&self, generator: &CodeGenerator) -> bool; 11 + } 3 12 4 - impl<'c> CodeGenerator<'c> { 5 - /// Check if a property type needs a lifetime parameter 6 - pub(super) fn property_needs_lifetime(&self, prop: &LexObjectProperty<'static>) -> bool { 7 - match prop { 13 + impl HasLifetime for LexObjectProperty<'_> { 14 + fn needs_lifetime(&self, generator: &CodeGenerator) -> bool { 15 + match self { 8 16 LexObjectProperty::Boolean(_) | LexObjectProperty::Integer(_) => false, 9 - LexObjectProperty::String(s) => self.string_needs_lifetime(s), 17 + LexObjectProperty::String(s) => s.needs_lifetime(generator), 10 18 LexObjectProperty::Bytes(_) => false, // Bytes is owned 11 19 LexObjectProperty::CidLink(_) 12 20 | LexObjectProperty::Blob(_) 13 21 | LexObjectProperty::Unknown(_) => true, 14 - LexObjectProperty::Array(array) => self.array_item_needs_lifetime(&array.items), 22 + LexObjectProperty::Array(array) => array.items.needs_lifetime(generator), 15 23 LexObjectProperty::Object(_) => true, // Nested objects have lifetimes 16 - LexObjectProperty::Ref(ref_type) => { 17 - // Check if the ref target actually needs a lifetime 18 - self.ref_needs_lifetime(&ref_type.r#ref) 19 - } 24 + LexObjectProperty::Ref(ref_type) => generator.ref_needs_lifetime(&ref_type.r#ref), 20 25 LexObjectProperty::Union(_) => true, // Unions generally have lifetimes 21 26 } 22 27 } 28 + } 23 29 24 - /// Check if an array item type needs a lifetime parameter 25 - pub(super) fn array_item_needs_lifetime(&self, item: &LexArrayItem) -> bool { 26 - match item { 30 + impl HasLifetime for LexArrayItem<'_> { 31 + fn needs_lifetime(&self, generator: &CodeGenerator) -> bool { 32 + match self { 27 33 LexArrayItem::Boolean(_) | LexArrayItem::Integer(_) => false, 28 - LexArrayItem::String(s) => self.string_needs_lifetime(s), 34 + LexArrayItem::String(s) => s.needs_lifetime(generator), 29 35 LexArrayItem::Bytes(_) => false, 30 36 LexArrayItem::CidLink(_) | LexArrayItem::Blob(_) | LexArrayItem::Unknown(_) => true, 31 37 LexArrayItem::Object(_) => true, // Nested objects have lifetimes 32 - LexArrayItem::Ref(ref_type) => self.ref_needs_lifetime(&ref_type.r#ref), 38 + LexArrayItem::Ref(ref_type) => generator.ref_needs_lifetime(&ref_type.r#ref), 33 39 LexArrayItem::Union(_) => true, 34 40 } 35 41 } 42 + } 36 43 37 - /// Check if a string type needs a lifetime parameter 38 - pub(super) fn string_needs_lifetime(&self, s: &LexString) -> bool { 39 - match s.format { 44 + impl HasLifetime for LexString<'_> { 45 + fn needs_lifetime(&self, _generator: &CodeGenerator) -> bool { 46 + match self.format { 40 47 Some(LexStringFormat::Datetime) 41 48 | Some(LexStringFormat::Language) 42 49 | Some(LexStringFormat::Tid) => false, 43 50 _ => true, // Most string types borrow 44 51 } 45 52 } 53 + } 46 54 47 - /// Check if a ref needs a lifetime parameter 48 - pub(super) fn ref_needs_lifetime(&self, ref_str: &str) -> bool { 49 - // Try to resolve the ref 50 - if let Some((_doc, def)) = self.corpus.resolve_ref(ref_str) { 51 - self.def_needs_lifetime(def) 52 - } else { 53 - // If we can't resolve it, assume it needs a lifetime (safe default) 54 - true 55 - } 56 - } 57 - 58 - /// Check if a lexicon def needs a lifetime parameter 59 - pub(super) fn def_needs_lifetime(&self, def: &LexUserType<'static>) -> bool { 60 - match def { 55 + impl HasLifetime for LexUserType<'_> { 56 + fn needs_lifetime(&self, generator: &CodeGenerator) -> bool { 57 + match self { 61 58 LexUserType::Record(_) => true, 62 59 LexUserType::Object(_) => true, 63 60 LexUserType::Token(_) => false, ··· 67 64 // Known values enums have Other(CowStr<'a>) variant 68 65 true 69 66 } else { 70 - self.string_needs_lifetime(s) 67 + s.needs_lifetime(generator) 71 68 } 72 69 } 73 70 LexUserType::Integer(_) => false, 74 71 LexUserType::Boolean(_) => false, 75 72 LexUserType::Bytes(_) => false, 76 73 LexUserType::CidLink(_) | LexUserType::Blob(_) | LexUserType::Unknown(_) => true, 77 - LexUserType::Array(array) => self.array_item_needs_lifetime(&array.items), 74 + LexUserType::Array(array) => array.items.needs_lifetime(generator), 78 75 LexUserType::XrpcQuery(_) 79 76 | LexUserType::XrpcProcedure(_) 80 77 | LexUserType::XrpcSubscription(_) => { ··· 85 82 LexUserType::Union(_) => false, // Unions are just refs, no lifetime needed 86 83 } 87 84 } 85 + } 86 + 87 + impl HasLifetime for LexXrpcParametersProperty<'_> { 88 + fn needs_lifetime(&self, generator: &CodeGenerator) -> bool { 89 + match self { 90 + LexXrpcParametersProperty::Boolean(_) | LexXrpcParametersProperty::Integer(_) => false, 91 + LexXrpcParametersProperty::String(s) => s.needs_lifetime(generator), 92 + LexXrpcParametersProperty::Unknown(_) => true, 93 + LexXrpcParametersProperty::Array(arr) => arr.items.needs_lifetime(generator), 94 + } 95 + } 96 + } 97 + 98 + impl HasLifetime for LexPrimitiveArrayItem<'_> { 99 + fn needs_lifetime(&self, generator: &CodeGenerator) -> bool { 100 + match self { 101 + LexPrimitiveArrayItem::Boolean(_) | LexPrimitiveArrayItem::Integer(_) => false, 102 + LexPrimitiveArrayItem::String(s) => s.needs_lifetime(generator), 103 + LexPrimitiveArrayItem::Unknown(_) => true, 104 + } 105 + } 106 + } 107 + 108 + impl<'c> CodeGenerator<'c> { 109 + /// Check if a property type needs a lifetime parameter 110 + pub(super) fn property_needs_lifetime(&self, prop: &LexObjectProperty<'_>) -> bool { 111 + prop.needs_lifetime(self) 112 + } 113 + 114 + /// Check if an array item type needs a lifetime parameter 115 + pub(super) fn array_item_needs_lifetime(&self, item: &LexArrayItem<'_>) -> bool { 116 + item.needs_lifetime(self) 117 + } 118 + 119 + /// Check if a string type needs a lifetime parameter 120 + pub(super) fn string_needs_lifetime(&self, s: &LexString<'_>) -> bool { 121 + s.needs_lifetime(self) 122 + } 123 + 124 + /// Check if a ref needs a lifetime parameter 125 + pub(super) fn ref_needs_lifetime(&self, ref_str: &str) -> bool { 126 + // Try to resolve the ref 127 + if let Some((_doc, def)) = self.corpus.resolve_ref(ref_str) { 128 + def.needs_lifetime(self) 129 + } else { 130 + // If we can't resolve it, assume it needs a lifetime (safe default) 131 + true 132 + } 133 + } 134 + 135 + /// Check if a lexicon def needs a lifetime parameter 136 + pub(super) fn def_needs_lifetime(&self, def: &LexUserType<'_>) -> bool { 137 + def.needs_lifetime(self) 138 + } 88 139 89 140 /// Check if xrpc params need a lifetime parameter 90 141 pub(super) fn params_need_lifetime( 91 142 &self, 92 - params: &crate::lexicon::LexXrpcParameters<'static>, 143 + params: &crate::lexicon::LexXrpcParameters<'_>, 93 144 ) -> bool { 94 - params.properties.values().any(|prop| { 95 - use crate::lexicon::LexXrpcParametersProperty; 96 - match prop { 97 - LexXrpcParametersProperty::Boolean(_) | LexXrpcParametersProperty::Integer(_) => { 98 - false 99 - } 100 - LexXrpcParametersProperty::String(s) => self.string_needs_lifetime(s), 101 - LexXrpcParametersProperty::Unknown(_) => true, 102 - LexXrpcParametersProperty::Array(arr) => { 103 - use crate::lexicon::LexPrimitiveArrayItem; 104 - match &arr.items { 105 - LexPrimitiveArrayItem::Boolean(_) | LexPrimitiveArrayItem::Integer(_) => { 106 - false 107 - } 108 - LexPrimitiveArrayItem::String(s) => self.string_needs_lifetime(s), 109 - LexPrimitiveArrayItem::Unknown(_) => true, 110 - } 111 - } 112 - } 113 - }) 145 + params 146 + .properties 147 + .values() 148 + .any(|prop| prop.needs_lifetime(self)) 114 149 } 115 150 }
+9 -5
crates/jacquard-lexicon/src/codegen/names.rs
··· 1 + use super::nsid_utils::NsidPath; 1 2 use super::utils::sanitize_name; 2 3 use super::CodeGenerator; 3 4 use heck::{ToPascalCase, ToSnakeCase}; ··· 66 67 fn def_to_base_type_name(&self, nsid: &str, def_name: &str) -> String { 67 68 if def_name == "main" { 68 69 // Use last segment of NSID 69 - let base_name = nsid.split('.').last().unwrap().to_pascal_case(); 70 + let nsid_path = NsidPath::parse(nsid); 71 + let base_name = nsid_path.last_segment().to_pascal_case(); 70 72 71 73 // Check if any other def would collide with this name 72 74 if let Some(doc) = self.corpus.get(nsid) { ··· 101 103 // Add contextual prefix to avoid collision 102 104 if def_name == "main" { 103 105 // Use second-to-last NSID segment for main defs 104 - let parts: Vec<_> = nsid.split('.').collect(); 106 + let nsid_path = NsidPath::parse(nsid); 107 + let parts = nsid_path.segments(); 105 108 if parts.len() >= 2 { 106 109 format!("{}{}", parts[parts.len() - 2].to_pascal_case(), base_name) 107 110 } else { ··· 125 128 /// - `app.bsky.feed.post` → `app_bsky/feed/post.rs` 126 129 /// - `com.atproto.label.defs` → `com_atproto/label.rs` (defs go in parent) 127 130 pub(super) fn nsid_to_file_path(&self, nsid: &str) -> std::path::PathBuf { 128 - let parts: Vec<&str> = nsid.split('.').collect(); 131 + let nsid_path = NsidPath::parse(nsid); 132 + let parts = nsid_path.segments(); 129 133 130 134 if parts.len() < 2 { 131 135 // Shouldn't happen with valid NSIDs, but handle gracefully 132 136 return format!("{}.rs", sanitize_name(parts[0])).into(); 133 137 } 134 138 135 - let last = parts.last().unwrap(); 139 + let last = nsid_path.last_segment(); 136 140 137 - if *last == "defs" && parts.len() >= 3 { 141 + if nsid_path.is_defs() && parts.len() >= 3 { 138 142 // defs go in parent module: com.atproto.label.defs → com_atproto/label.rs 139 143 let first_two = format!("{}_{}", sanitize_name(parts[0]), sanitize_name(parts[1])); 140 144 if parts.len() == 3 {
+189
crates/jacquard-lexicon/src/codegen/nsid_utils.rs
··· 1 + //! Utilities for parsing and working with NSIDs and refs 2 + 3 + /// Parsed NSID components for easier manipulation 4 + #[derive(Debug, Clone, PartialEq, Eq)] 5 + pub struct NsidPath<'a> { 6 + nsid: &'a str, 7 + segments: Vec<&'a str>, 8 + } 9 + 10 + impl<'a> NsidPath<'a> { 11 + /// Parse an NSID into its component segments 12 + pub fn parse(nsid: &'a str) -> Self { 13 + let segments: Vec<&str> = nsid.split('.').collect(); 14 + Self { nsid, segments } 15 + } 16 + 17 + /// Get the namespace (first two segments joined with '.') 18 + /// Returns "com.atproto" from "com.atproto.repo.strongRef" 19 + pub fn namespace(&self) -> String { 20 + if self.segments.len() >= 2 { 21 + format!("{}.{}", self.segments[0], self.segments[1]) 22 + } else { 23 + self.nsid.to_string() 24 + } 25 + } 26 + 27 + /// Get the last segment of the NSID 28 + pub fn last_segment(&self) -> &str { 29 + self.segments.last().copied().unwrap_or(self.nsid) 30 + } 31 + 32 + /// Get all segments except the last 33 + pub fn parent_segments(&self) -> &[&str] { 34 + if self.segments.is_empty() { 35 + &[] 36 + } else { 37 + &self.segments[..self.segments.len() - 1] 38 + } 39 + } 40 + 41 + /// Check if this is a "defs" NSID (ends with "defs") 42 + pub fn is_defs(&self) -> bool { 43 + self.last_segment() == "defs" 44 + } 45 + 46 + /// Get all segments 47 + pub fn segments(&self) -> &[&str] { 48 + &self.segments 49 + } 50 + 51 + /// Get the original NSID string 52 + pub fn as_str(&self) -> &str { 53 + self.nsid 54 + } 55 + 56 + /// Get number of segments 57 + pub fn len(&self) -> usize { 58 + self.segments.len() 59 + } 60 + 61 + /// Check if empty (should not happen with valid NSIDs) 62 + pub fn is_empty(&self) -> bool { 63 + self.segments.is_empty() 64 + } 65 + } 66 + 67 + /// Parsed reference with NSID and optional fragment 68 + #[derive(Debug, Clone, PartialEq, Eq)] 69 + pub struct RefPath<'a> { 70 + nsid: &'a str, 71 + def: &'a str, 72 + } 73 + 74 + impl<'a> RefPath<'a> { 75 + /// Parse a reference string, normalizing it based on current NSID context 76 + pub fn parse(ref_str: &'a str, current_nsid: Option<&'a str>) -> Self { 77 + if let Some(fragment) = ref_str.strip_prefix('#') { 78 + // Local ref: #option → use current_nsid 79 + let nsid = current_nsid.unwrap_or(""); 80 + Self { 81 + nsid, 82 + def: fragment, 83 + } 84 + } else if let Some((nsid, def)) = ref_str.split_once('#') { 85 + // Full ref with fragment: nsid#def 86 + Self { nsid, def } 87 + } else { 88 + // Full ref without fragment: nsid (implicit "main") 89 + Self { 90 + nsid: ref_str, 91 + def: "main", 92 + } 93 + } 94 + } 95 + 96 + /// Get the NSID portion of the ref 97 + pub fn nsid(&self) -> &str { 98 + self.nsid 99 + } 100 + 101 + /// Get the def name (fragment) portion of the ref 102 + pub fn def(&self) -> &str { 103 + self.def 104 + } 105 + 106 + /// Check if this is a local ref (was parsed from #fragment) 107 + pub fn is_local(&self, current_nsid: &str) -> bool { 108 + self.nsid == current_nsid && self.def != "main" 109 + } 110 + 111 + /// Get the full ref string (nsid#def) 112 + pub fn full_ref(&self) -> String { 113 + if self.def == "main" { 114 + self.nsid.to_string() 115 + } else { 116 + format!("{}#{}", self.nsid, self.def) 117 + } 118 + } 119 + 120 + /// Normalize a local ref by prepending the current NSID if needed 121 + /// Returns the normalized ref string suitable for corpus lookup 122 + pub fn normalize(ref_str: &str, current_nsid: &str) -> String { 123 + if ref_str.starts_with('#') { 124 + format!("{}{}", current_nsid, ref_str) 125 + } else { 126 + ref_str.to_string() 127 + } 128 + } 129 + } 130 + 131 + #[cfg(test)] 132 + mod tests { 133 + use super::*; 134 + 135 + #[test] 136 + fn test_nsid_path_parse() { 137 + let path = NsidPath::parse("com.atproto.repo.strongRef"); 138 + assert_eq!(path.segments(), &["com", "atproto", "repo", "strongRef"]); 139 + assert_eq!(path.namespace(), "com.atproto"); 140 + assert_eq!(path.last_segment(), "strongRef"); 141 + assert_eq!(path.parent_segments(), &["com", "atproto", "repo"]); 142 + assert!(!path.is_defs()); 143 + } 144 + 145 + #[test] 146 + fn test_nsid_path_defs() { 147 + let path = NsidPath::parse("com.atproto.label.defs"); 148 + assert!(path.is_defs()); 149 + assert_eq!(path.last_segment(), "defs"); 150 + } 151 + 152 + #[test] 153 + fn test_ref_path_local() { 154 + let ref_path = RefPath::parse("#option", Some("com.example.foo")); 155 + assert_eq!(ref_path.nsid(), "com.example.foo"); 156 + assert_eq!(ref_path.def(), "option"); 157 + assert!(ref_path.is_local("com.example.foo")); 158 + assert_eq!(ref_path.full_ref(), "com.example.foo#option"); 159 + } 160 + 161 + #[test] 162 + fn test_ref_path_with_fragment() { 163 + let ref_path = RefPath::parse("com.example.foo#bar", None); 164 + assert_eq!(ref_path.nsid(), "com.example.foo"); 165 + assert_eq!(ref_path.def(), "bar"); 166 + assert!(!ref_path.is_local("com.other.baz")); 167 + assert_eq!(ref_path.full_ref(), "com.example.foo#bar"); 168 + } 169 + 170 + #[test] 171 + fn test_ref_path_implicit_main() { 172 + let ref_path = RefPath::parse("com.example.foo", None); 173 + assert_eq!(ref_path.nsid(), "com.example.foo"); 174 + assert_eq!(ref_path.def(), "main"); 175 + assert_eq!(ref_path.full_ref(), "com.example.foo"); 176 + } 177 + 178 + #[test] 179 + fn test_ref_path_normalize() { 180 + assert_eq!( 181 + RefPath::normalize("#option", "com.example.foo"), 182 + "com.example.foo#option" 183 + ); 184 + assert_eq!( 185 + RefPath::normalize("com.other.bar#baz", "com.example.foo"), 186 + "com.other.bar#baz" 187 + ); 188 + } 189 + }
+3 -6
crates/jacquard-lexicon/src/codegen/output.rs
··· 3 3 use quote::quote; 4 4 use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet}; 5 5 6 + use super::nsid_utils::NsidPath; 6 7 use super::utils::{make_ident, sanitize_name}; 7 8 use super::CodeGenerator; 8 9 ··· 249 250 250 251 // Collect all namespaces from the corpus (first two segments of each NSID) 251 252 for (nsid, _doc) in self.corpus.iter() { 252 - let parts: Vec<_> = nsid.as_str().splitn(3, '.').collect(); 253 - let namespace = if parts.len() >= 2 { 254 - format!("{}.{}", parts[0], parts[1]) 255 - } else { 256 - nsid.to_string() 257 - }; 253 + let nsid_path = NsidPath::parse(nsid.as_str()); 254 + let namespace = nsid_path.namespace(); 258 255 all_namespaces.insert(namespace); 259 256 } 260 257
+11 -24
crates/jacquard-lexicon/src/codegen/structs.rs
··· 6 6 use proc_macro2::TokenStream; 7 7 use quote::quote; 8 8 9 + use super::nsid_utils::{NsidPath, RefPath}; 9 10 use super::CodeGenerator; 10 11 use super::utils::{make_ident, value_to_variant_name}; 11 12 ··· 467 468 let enum_ident = syn::Ident::new(union_name, proc_macro2::Span::call_site()); 468 469 469 470 // Extract namespace prefix from current NSID (first two segments: "sh.weaver" from "sh.weaver.embed.recordWithMedia") 470 - let parts: Vec<_> = current_nsid.splitn(3, '.').collect(); 471 - let current_namespace = if parts.len() >= 2 { 472 - format!("{}.{}", parts[0], parts[1]) 473 - } else { 474 - current_nsid.to_string() 475 - }; 471 + let current_nsid_path = NsidPath::parse(current_nsid); 472 + let current_namespace = current_nsid_path.namespace(); 476 473 477 474 // First pass: collect all variant names and detect collisions 478 475 #[derive(Debug)] ··· 486 483 let mut variant_infos = Vec::new(); 487 484 for ref_str in refs { 488 485 // Normalize local refs (starting with #) by prepending current NSID 489 - let normalized_ref = if ref_str.starts_with('#') { 490 - format!("{}{}", current_nsid, ref_str) 491 - } else { 492 - ref_str.to_string() 493 - }; 486 + let normalized_ref = RefPath::normalize(ref_str, current_nsid); 494 487 495 488 // Parse ref to get NSID and def name 496 - let (ref_nsid_str, ref_def) = 497 - if let Some((nsid, fragment)) = normalized_ref.split_once('#') { 498 - (nsid, fragment) 499 - } else { 500 - (normalized_ref.as_str(), "main") 501 - }; 489 + let ref_path = RefPath::parse(&normalized_ref, None); 490 + let ref_nsid_str = ref_path.nsid(); 491 + let ref_def = ref_path.def(); 502 492 503 493 // Skip unknown refs - they'll be handled by Unknown variant 504 494 if !self.corpus.ref_exists(&normalized_ref) { ··· 555 545 556 546 // Track namespace dependency for foreign refs 557 547 if !info.is_current_namespace { 558 - let parts: Vec<_> = info.ref_nsid.splitn(3, '.').collect(); 559 - let foreign_namespace = if parts.len() >= 2 { 560 - format!("{}.{}", parts[0], parts[1]) 561 - } else { 562 - info.ref_nsid.to_string() 563 - }; 548 + let ref_nsid_path = NsidPath::parse(&info.ref_nsid); 549 + let foreign_namespace = ref_nsid_path.namespace(); 564 550 self.namespace_deps 565 551 .borrow_mut() 566 552 .entry(current_namespace.clone()) ··· 571 557 // Disambiguate: add second NSID segment prefix only to foreign refs when there's a collision 572 558 let variant_name = if has_collision && !info.is_current_namespace { 573 559 // Get second segment (namespace identifier: "bsky" from "app.bsky.embed.images") 574 - let segments: Vec<&str> = info.ref_nsid.split('.').collect(); 560 + let ref_nsid_path = NsidPath::parse(&info.ref_nsid); 561 + let segments = ref_nsid_path.segments(); 575 562 let prefix = if segments.len() >= 2 { 576 563 segments[1].to_pascal_case() 577 564 } else {
+13 -17
crates/jacquard-lexicon/src/codegen/types.rs
··· 4 4 use proc_macro2::TokenStream; 5 5 use quote::quote; 6 6 7 + use super::nsid_utils::{NsidPath, RefPath}; 7 8 use super::CodeGenerator; 8 9 9 10 impl<'c> CodeGenerator<'c> { ··· 90 91 }; 91 92 92 93 // Parse ref to get type name 93 - let (ref_nsid, ref_def) = 94 - if let Some((nsid_part, fragment)) = ref_str.split_once('#') { 95 - (nsid_part, fragment) 96 - } else { 97 - (ref_str.as_str(), "main") 98 - }; 99 - let ref_type_name = self.def_to_type_name(ref_nsid, ref_def); 94 + let ref_path = RefPath::parse(&ref_str, None); 95 + let ref_type_name = self.def_to_type_name(ref_path.nsid(), ref_path.def()); 100 96 101 97 // If self-referential, keep union for indirection (variants are boxed) 102 98 if ref_type_name == parent_type_name { ··· 185 181 use super::utils::sanitize_name; 186 182 use crate::error::CodegenError; 187 183 188 - // Parse NSID and fragment 189 - let (ref_nsid, ref_def) = if let Some((nsid, fragment)) = ref_str.split_once('#') { 190 - (nsid, fragment) 191 - } else { 192 - (ref_str, "main") 193 - }; 184 + // Parse ref to get NSID and def 185 + let ref_path = RefPath::parse(ref_str, None); 186 + let ref_nsid = ref_path.nsid(); 187 + let ref_def = ref_path.def(); 194 188 195 189 // Check if ref exists 196 190 if !self.corpus.ref_exists(ref_str) { ··· 198 192 return Ok(quote! { jacquard_common::types::value::Data<'a> }); 199 193 } 200 194 195 + // Parse NSID into components 196 + let nsid_path = NsidPath::parse(ref_nsid); 197 + let parts = nsid_path.segments(); 198 + let last_segment = nsid_path.last_segment(); 199 + 201 200 // Convert NSID to module path 202 201 // com.atproto.repo.strongRef -> com_atproto::repo::strong_ref::StrongRef 203 202 // app.bsky.richtext.facet -> app_bsky::richtext::facet::Facet 204 203 // app.bsky.actor.defs#nux -> app_bsky::actor::Nux (defs go in parent module) 205 - let parts: Vec<&str> = ref_nsid.split('.').collect(); 206 - let last_segment = parts.last().unwrap(); 207 - 208 204 let type_name = self.def_to_type_name(ref_nsid, ref_def); 209 205 210 - let path_str = if *last_segment == "defs" && parts.len() >= 3 { 206 + let path_str = if nsid_path.is_defs() && parts.len() >= 3 { 211 207 // defs types go in parent module 212 208 let first_two = format!("{}_{}", sanitize_name(parts[0]), sanitize_name(parts[1])); 213 209 if parts.len() == 3 {
+7 -12
crates/jacquard-lexicon/src/codegen/xrpc.rs
··· 7 7 use proc_macro2::TokenStream; 8 8 use quote::quote; 9 9 10 + use super::nsid_utils::{NsidPath, RefPath}; 10 11 use super::CodeGenerator; 11 12 use super::utils::make_ident; 12 13 ··· 230 231 let ref_str_s = ref_str.as_ref(); 231 232 232 233 // Normalize local refs (starting with #) by prepending current NSID 233 - let normalized_ref = if ref_str.starts_with('#') { 234 - format!("{}{}", nsid, ref_str) 235 - } else { 236 - ref_str.to_string() 237 - }; 234 + let normalized_ref = RefPath::normalize(ref_str, nsid); 238 235 239 236 // Parse ref to get NSID and def name 240 - let (ref_nsid, ref_def) = 241 - if let Some((nsid_part, fragment)) = normalized_ref.split_once('#') { 242 - (nsid_part, fragment) 243 - } else { 244 - (normalized_ref.as_str(), "main") 245 - }; 237 + let ref_path = RefPath::parse(&normalized_ref, None); 238 + let ref_nsid = ref_path.nsid(); 239 + let ref_def = ref_path.def(); 246 240 247 241 let variant_name = if ref_def == "main" { 248 - ref_nsid.split('.').last().unwrap().to_pascal_case() 242 + let ref_nsid_path = NsidPath::parse(ref_nsid); 243 + ref_nsid_path.last_segment().to_pascal_case() 249 244 } else { 250 245 ref_def.to_pascal_case() 251 246 };
+4 -8
crates/jacquard-lexicon/src/corpus.rs
··· 1 + use crate::codegen::nsid_utils::RefPath; 1 2 use crate::error::Result; 2 3 use crate::lexicon::{LexUserType, LexiconDoc}; 3 4 use jacquard_common::{into_static::IntoStatic, smol_str::SmolStr}; ··· 64 65 &self, 65 66 ref_str: &str, 66 67 ) -> Option<(&LexiconDoc<'static>, &LexUserType<'static>)> { 67 - let (nsid, def_name) = if let Some((nsid, fragment)) = ref_str.split_once('#') { 68 - (nsid, fragment) 69 - } else { 70 - (ref_str, "main") 71 - }; 72 - 73 - let doc = self.get(nsid)?; 74 - let def = doc.defs.get(def_name)?; 68 + let ref_path = RefPath::parse(ref_str, None); 69 + let doc = self.get(ref_path.nsid())?; 70 + let def = doc.defs.get(ref_path.def())?; 75 71 Some((doc, def)) 76 72 } 77 73
+57 -131
crates/jacquard-lexicon/src/validation.rs
··· 3 3 //! This module provides infrastructure for validating untyped `Data` values against 4 4 //! lexicon schemas, enabling partial deserialization, debugging, and schema migration. 5 5 6 + use crate::codegen::nsid_utils::RefPath; 7 + use crate::lexicon::{LexArrayItem, LexObjectProperty}; 6 8 use crate::schema::SchemaRegistry; 7 9 use cid::Cid as IpldCid; 8 10 use dashmap::DashMap; ··· 255 257 Ok(IpldCid::new_v1(0x71, multihash)) 256 258 } 257 259 260 + /// Trait for converting lexicon types to object properties 261 + /// 262 + /// This enables type-safe conversion between array items and object properties 263 + /// for unified validation logic. 264 + trait IntoObjectProperty<'a> { 265 + /// Convert this type to an equivalent object property 266 + fn into_object_property(self) -> LexObjectProperty<'a>; 267 + } 268 + 269 + impl<'a> IntoObjectProperty<'a> for LexArrayItem<'a> { 270 + fn into_object_property(self) -> LexObjectProperty<'a> { 271 + match self { 272 + LexArrayItem::String(s) => LexObjectProperty::String(s), 273 + LexArrayItem::Integer(i) => LexObjectProperty::Integer(i), 274 + LexArrayItem::Boolean(b) => LexObjectProperty::Boolean(b), 275 + LexArrayItem::Object(o) => LexObjectProperty::Object(o), 276 + LexArrayItem::Unknown(u) => LexObjectProperty::Unknown(u), 277 + LexArrayItem::Bytes(b) => LexObjectProperty::Bytes(b), 278 + LexArrayItem::CidLink(c) => LexObjectProperty::CidLink(c), 279 + LexArrayItem::Blob(b) => LexObjectProperty::Blob(b), 280 + LexArrayItem::Ref(r) => LexObjectProperty::Ref(r), 281 + LexArrayItem::Union(u) => LexObjectProperty::Union(u), 282 + } 283 + } 284 + } 285 + 258 286 /// Result of validating Data against a schema 259 287 /// 260 288 /// Distinguishes between structural errors (type mismatches, missing fields) and ··· 487 515 } 488 516 } 489 517 490 - /// Normalize a ref string to (nsid, def_name) 491 - fn normalize_ref(ref_str: &str, current_nsid: &str) -> (String, String) { 492 - if let Some(fragment) = ref_str.strip_prefix('#') { 493 - // #option -> (current_nsid, "option") 494 - (current_nsid.to_string(), fragment.to_string()) 495 - } else if let Some((nsid, def)) = ref_str.split_once('#') { 496 - // com.example.foo#bar -> ("com.example.foo", "bar") 497 - (nsid.to_string(), def.to_string()) 498 - } else { 499 - // com.example.foo -> ("com.example.foo", "main") 500 - (ref_str.to_string(), "main".to_string()) 501 - } 502 - } 503 518 504 519 /// Validate data against a lexicon def 505 520 fn validate_def( ··· 720 735 721 736 // Try to match against refs 722 737 for variant_ref in &u.refs { 723 - let (variant_nsid, variant_def) = 724 - normalize_ref(variant_ref.as_ref(), &ctx.current_nsid); 725 - let full_variant = format!("{}#{}", variant_nsid, variant_def); 738 + let ref_path = RefPath::parse(variant_ref.as_ref(), Some(&ctx.current_nsid)); 739 + let variant_nsid = ref_path.nsid().to_string(); 740 + let variant_def = ref_path.def().to_string(); 741 + let full_variant = ref_path.full_ref(); 726 742 727 743 // Match by full ref or just nsid 728 744 if type_str == full_variant || type_str == variant_nsid { ··· 779 795 } 780 796 781 797 // Normalize ref 782 - let (ref_nsid, ref_def) = normalize_ref(r.r#ref.as_ref(), &ctx.current_nsid); 783 - let full_ref = format!("{}#{}", ref_nsid, ref_def); 798 + let ref_path = RefPath::parse(r.r#ref.as_ref(), Some(&ctx.current_nsid)); 799 + let ref_nsid = ref_path.nsid().to_string(); 800 + let ref_def = ref_path.def().to_string(); 801 + let full_ref = ref_path.full_ref(); 784 802 785 803 // Cycle detection 786 804 if ctx.ref_stack.contains(&full_ref) { ··· 861 879 fn validate_array_item( 862 880 path: &mut ValidationPath, 863 881 data: &Data, 864 - item_schema: &crate::lexicon::LexArrayItem, 882 + item_schema: &LexArrayItem, 865 883 registry: &SchemaRegistry, 866 884 ctx: &mut ValidationContext, 867 885 ) -> Vec<StructuralError> { 868 - use crate::lexicon::LexArrayItem; 869 - 870 - match item_schema { 871 - LexArrayItem::String(s) => validate_property( 872 - path, 873 - data, 874 - &crate::lexicon::LexObjectProperty::String(s.clone()), 875 - registry, 876 - ctx, 877 - ), 878 - LexArrayItem::Integer(i) => validate_property( 879 - path, 880 - data, 881 - &crate::lexicon::LexObjectProperty::Integer(i.clone()), 882 - registry, 883 - ctx, 884 - ), 885 - LexArrayItem::Boolean(b) => validate_property( 886 - path, 887 - data, 888 - &crate::lexicon::LexObjectProperty::Boolean(b.clone()), 889 - registry, 890 - ctx, 891 - ), 892 - LexArrayItem::Object(o) => validate_property( 893 - path, 894 - data, 895 - &crate::lexicon::LexObjectProperty::Object(o.clone()), 896 - registry, 897 - ctx, 898 - ), 899 - LexArrayItem::Unknown(u) => validate_property( 900 - path, 901 - data, 902 - &crate::lexicon::LexObjectProperty::Unknown(u.clone()), 903 - registry, 904 - ctx, 905 - ), 906 - LexArrayItem::Bytes(b) => validate_property( 907 - path, 908 - data, 909 - &crate::lexicon::LexObjectProperty::Bytes(b.clone()), 910 - registry, 911 - ctx, 912 - ), 913 - LexArrayItem::CidLink(c) => validate_property( 914 - path, 915 - data, 916 - &crate::lexicon::LexObjectProperty::CidLink(c.clone()), 917 - registry, 918 - ctx, 919 - ), 920 - LexArrayItem::Blob(b) => validate_property( 921 - path, 922 - data, 923 - &crate::lexicon::LexObjectProperty::Blob(b.clone()), 924 - registry, 925 - ctx, 926 - ), 927 - LexArrayItem::Ref(r) => validate_property( 928 - path, 929 - data, 930 - &crate::lexicon::LexObjectProperty::Ref(r.clone()), 931 - registry, 932 - ctx, 933 - ), 934 - LexArrayItem::Union(u) => validate_property( 935 - path, 936 - data, 937 - &crate::lexicon::LexObjectProperty::Union(u.clone()), 938 - registry, 939 - ctx, 940 - ), 941 - } 886 + validate_property( 887 + path, 888 + data, 889 + &item_schema.clone().into_object_property(), 890 + registry, 891 + ctx, 892 + ) 942 893 } 943 894 944 895 // ============================================================================ ··· 1115 1066 1116 1067 LexObjectProperty::Ref(r) => { 1117 1068 // Follow ref and check constraints 1118 - let (ref_nsid, ref_def) = normalize_ref(r.r#ref.as_ref(), current_nsid); 1069 + let ref_path = RefPath::parse(r.r#ref.as_ref(), Some(current_nsid)); 1070 + let ref_nsid = ref_path.nsid(); 1071 + let ref_def = ref_path.def(); 1119 1072 1120 - if registry.get_def(&ref_nsid, &ref_def).is_some() { 1121 - validate_constraints_impl(path, data, &ref_nsid, &ref_def, registry) 1073 + if registry.get_def(ref_nsid, ref_def).is_some() { 1074 + validate_constraints_impl(path, data, ref_nsid, ref_def, registry) 1122 1075 } else { 1123 1076 Vec::new() 1124 1077 } ··· 1256 1209 fn check_array_item_constraints( 1257 1210 path: &mut ValidationPath, 1258 1211 data: &Data, 1259 - item_schema: &crate::lexicon::LexArrayItem, 1212 + item_schema: &LexArrayItem, 1260 1213 current_nsid: &str, 1261 1214 registry: &SchemaRegistry, 1262 1215 ) -> Vec<ConstraintError> { 1263 - use crate::lexicon::LexArrayItem; 1264 - 1265 - match item_schema { 1266 - LexArrayItem::String(s) => check_property_constraints( 1267 - path, 1268 - data, 1269 - &crate::lexicon::LexObjectProperty::String(s.clone()), 1270 - current_nsid, 1271 - registry, 1272 - ), 1273 - LexArrayItem::Integer(i) => check_property_constraints( 1274 - path, 1275 - data, 1276 - &crate::lexicon::LexObjectProperty::Integer(i.clone()), 1277 - current_nsid, 1278 - registry, 1279 - ), 1280 - LexArrayItem::Object(o) => check_property_constraints( 1281 - path, 1282 - data, 1283 - &crate::lexicon::LexObjectProperty::Object(o.clone()), 1284 - current_nsid, 1285 - registry, 1286 - ), 1287 - LexArrayItem::Ref(r) => check_property_constraints( 1288 - path, 1289 - data, 1290 - &crate::lexicon::LexObjectProperty::Ref(r.clone()), 1291 - current_nsid, 1292 - registry, 1293 - ), 1294 - // Other array item types don't have constraints 1295 - _ => Vec::new(), 1296 - } 1216 + check_property_constraints( 1217 + path, 1218 + data, 1219 + &item_schema.clone().into_object_property(), 1220 + current_nsid, 1221 + registry, 1222 + ) 1297 1223 } 1298 1224 1299 1225 #[cfg(test)]