+1
crates/jacquard-lexicon/src/codegen.rs
+1
crates/jacquard-lexicon/src/codegen.rs
+91
-56
crates/jacquard-lexicon/src/codegen/lifetime.rs
+91
-56
crates/jacquard-lexicon/src/codegen/lifetime.rs
···
1
1
use super::CodeGenerator;
2
-
use crate::lexicon::{LexArrayItem, LexObjectProperty, LexString, LexStringFormat, LexUserType};
2
+
use crate::lexicon::{
3
+
LexArrayItem, LexObjectProperty, LexPrimitiveArrayItem, LexString, LexStringFormat,
4
+
LexUserType, LexXrpcParametersProperty,
5
+
};
6
+
7
+
/// Trait for lexicon types that can determine lifetime requirements
8
+
trait HasLifetime {
9
+
/// Check if this type needs a lifetime parameter when generated
10
+
fn needs_lifetime(&self, generator: &CodeGenerator) -> bool;
11
+
}
3
12
4
-
impl<'c> CodeGenerator<'c> {
5
-
/// Check if a property type needs a lifetime parameter
6
-
pub(super) fn property_needs_lifetime(&self, prop: &LexObjectProperty<'static>) -> bool {
7
-
match prop {
13
+
impl HasLifetime for LexObjectProperty<'_> {
14
+
fn needs_lifetime(&self, generator: &CodeGenerator) -> bool {
15
+
match self {
8
16
LexObjectProperty::Boolean(_) | LexObjectProperty::Integer(_) => false,
9
-
LexObjectProperty::String(s) => self.string_needs_lifetime(s),
17
+
LexObjectProperty::String(s) => s.needs_lifetime(generator),
10
18
LexObjectProperty::Bytes(_) => false, // Bytes is owned
11
19
LexObjectProperty::CidLink(_)
12
20
| LexObjectProperty::Blob(_)
13
21
| LexObjectProperty::Unknown(_) => true,
14
-
LexObjectProperty::Array(array) => self.array_item_needs_lifetime(&array.items),
22
+
LexObjectProperty::Array(array) => array.items.needs_lifetime(generator),
15
23
LexObjectProperty::Object(_) => true, // Nested objects have lifetimes
16
-
LexObjectProperty::Ref(ref_type) => {
17
-
// Check if the ref target actually needs a lifetime
18
-
self.ref_needs_lifetime(&ref_type.r#ref)
19
-
}
24
+
LexObjectProperty::Ref(ref_type) => generator.ref_needs_lifetime(&ref_type.r#ref),
20
25
LexObjectProperty::Union(_) => true, // Unions generally have lifetimes
21
26
}
22
27
}
28
+
}
23
29
24
-
/// Check if an array item type needs a lifetime parameter
25
-
pub(super) fn array_item_needs_lifetime(&self, item: &LexArrayItem) -> bool {
26
-
match item {
30
+
impl HasLifetime for LexArrayItem<'_> {
31
+
fn needs_lifetime(&self, generator: &CodeGenerator) -> bool {
32
+
match self {
27
33
LexArrayItem::Boolean(_) | LexArrayItem::Integer(_) => false,
28
-
LexArrayItem::String(s) => self.string_needs_lifetime(s),
34
+
LexArrayItem::String(s) => s.needs_lifetime(generator),
29
35
LexArrayItem::Bytes(_) => false,
30
36
LexArrayItem::CidLink(_) | LexArrayItem::Blob(_) | LexArrayItem::Unknown(_) => true,
31
37
LexArrayItem::Object(_) => true, // Nested objects have lifetimes
32
-
LexArrayItem::Ref(ref_type) => self.ref_needs_lifetime(&ref_type.r#ref),
38
+
LexArrayItem::Ref(ref_type) => generator.ref_needs_lifetime(&ref_type.r#ref),
33
39
LexArrayItem::Union(_) => true,
34
40
}
35
41
}
42
+
}
36
43
37
-
/// Check if a string type needs a lifetime parameter
38
-
pub(super) fn string_needs_lifetime(&self, s: &LexString) -> bool {
39
-
match s.format {
44
+
impl HasLifetime for LexString<'_> {
45
+
fn needs_lifetime(&self, _generator: &CodeGenerator) -> bool {
46
+
match self.format {
40
47
Some(LexStringFormat::Datetime)
41
48
| Some(LexStringFormat::Language)
42
49
| Some(LexStringFormat::Tid) => false,
43
50
_ => true, // Most string types borrow
44
51
}
45
52
}
53
+
}
46
54
47
-
/// Check if a ref needs a lifetime parameter
48
-
pub(super) fn ref_needs_lifetime(&self, ref_str: &str) -> bool {
49
-
// Try to resolve the ref
50
-
if let Some((_doc, def)) = self.corpus.resolve_ref(ref_str) {
51
-
self.def_needs_lifetime(def)
52
-
} else {
53
-
// If we can't resolve it, assume it needs a lifetime (safe default)
54
-
true
55
-
}
56
-
}
57
-
58
-
/// Check if a lexicon def needs a lifetime parameter
59
-
pub(super) fn def_needs_lifetime(&self, def: &LexUserType<'static>) -> bool {
60
-
match def {
55
+
impl HasLifetime for LexUserType<'_> {
56
+
fn needs_lifetime(&self, generator: &CodeGenerator) -> bool {
57
+
match self {
61
58
LexUserType::Record(_) => true,
62
59
LexUserType::Object(_) => true,
63
60
LexUserType::Token(_) => false,
···
67
64
// Known values enums have Other(CowStr<'a>) variant
68
65
true
69
66
} else {
70
-
self.string_needs_lifetime(s)
67
+
s.needs_lifetime(generator)
71
68
}
72
69
}
73
70
LexUserType::Integer(_) => false,
74
71
LexUserType::Boolean(_) => false,
75
72
LexUserType::Bytes(_) => false,
76
73
LexUserType::CidLink(_) | LexUserType::Blob(_) | LexUserType::Unknown(_) => true,
77
-
LexUserType::Array(array) => self.array_item_needs_lifetime(&array.items),
74
+
LexUserType::Array(array) => array.items.needs_lifetime(generator),
78
75
LexUserType::XrpcQuery(_)
79
76
| LexUserType::XrpcProcedure(_)
80
77
| LexUserType::XrpcSubscription(_) => {
···
85
82
LexUserType::Union(_) => false, // Unions are just refs, no lifetime needed
86
83
}
87
84
}
85
+
}
86
+
87
+
impl HasLifetime for LexXrpcParametersProperty<'_> {
88
+
fn needs_lifetime(&self, generator: &CodeGenerator) -> bool {
89
+
match self {
90
+
LexXrpcParametersProperty::Boolean(_) | LexXrpcParametersProperty::Integer(_) => false,
91
+
LexXrpcParametersProperty::String(s) => s.needs_lifetime(generator),
92
+
LexXrpcParametersProperty::Unknown(_) => true,
93
+
LexXrpcParametersProperty::Array(arr) => arr.items.needs_lifetime(generator),
94
+
}
95
+
}
96
+
}
97
+
98
+
impl HasLifetime for LexPrimitiveArrayItem<'_> {
99
+
fn needs_lifetime(&self, generator: &CodeGenerator) -> bool {
100
+
match self {
101
+
LexPrimitiveArrayItem::Boolean(_) | LexPrimitiveArrayItem::Integer(_) => false,
102
+
LexPrimitiveArrayItem::String(s) => s.needs_lifetime(generator),
103
+
LexPrimitiveArrayItem::Unknown(_) => true,
104
+
}
105
+
}
106
+
}
107
+
108
+
impl<'c> CodeGenerator<'c> {
109
+
/// Check if a property type needs a lifetime parameter
110
+
pub(super) fn property_needs_lifetime(&self, prop: &LexObjectProperty<'_>) -> bool {
111
+
prop.needs_lifetime(self)
112
+
}
113
+
114
+
/// Check if an array item type needs a lifetime parameter
115
+
pub(super) fn array_item_needs_lifetime(&self, item: &LexArrayItem<'_>) -> bool {
116
+
item.needs_lifetime(self)
117
+
}
118
+
119
+
/// Check if a string type needs a lifetime parameter
120
+
pub(super) fn string_needs_lifetime(&self, s: &LexString<'_>) -> bool {
121
+
s.needs_lifetime(self)
122
+
}
123
+
124
+
/// Check if a ref needs a lifetime parameter
125
+
pub(super) fn ref_needs_lifetime(&self, ref_str: &str) -> bool {
126
+
// Try to resolve the ref
127
+
if let Some((_doc, def)) = self.corpus.resolve_ref(ref_str) {
128
+
def.needs_lifetime(self)
129
+
} else {
130
+
// If we can't resolve it, assume it needs a lifetime (safe default)
131
+
true
132
+
}
133
+
}
134
+
135
+
/// Check if a lexicon def needs a lifetime parameter
136
+
pub(super) fn def_needs_lifetime(&self, def: &LexUserType<'_>) -> bool {
137
+
def.needs_lifetime(self)
138
+
}
88
139
89
140
/// Check if xrpc params need a lifetime parameter
90
141
pub(super) fn params_need_lifetime(
91
142
&self,
92
-
params: &crate::lexicon::LexXrpcParameters<'static>,
143
+
params: &crate::lexicon::LexXrpcParameters<'_>,
93
144
) -> bool {
94
-
params.properties.values().any(|prop| {
95
-
use crate::lexicon::LexXrpcParametersProperty;
96
-
match prop {
97
-
LexXrpcParametersProperty::Boolean(_) | LexXrpcParametersProperty::Integer(_) => {
98
-
false
99
-
}
100
-
LexXrpcParametersProperty::String(s) => self.string_needs_lifetime(s),
101
-
LexXrpcParametersProperty::Unknown(_) => true,
102
-
LexXrpcParametersProperty::Array(arr) => {
103
-
use crate::lexicon::LexPrimitiveArrayItem;
104
-
match &arr.items {
105
-
LexPrimitiveArrayItem::Boolean(_) | LexPrimitiveArrayItem::Integer(_) => {
106
-
false
107
-
}
108
-
LexPrimitiveArrayItem::String(s) => self.string_needs_lifetime(s),
109
-
LexPrimitiveArrayItem::Unknown(_) => true,
110
-
}
111
-
}
112
-
}
113
-
})
145
+
params
146
+
.properties
147
+
.values()
148
+
.any(|prop| prop.needs_lifetime(self))
114
149
}
115
150
}
+9
-5
crates/jacquard-lexicon/src/codegen/names.rs
+9
-5
crates/jacquard-lexicon/src/codegen/names.rs
···
1
+
use super::nsid_utils::NsidPath;
1
2
use super::utils::sanitize_name;
2
3
use super::CodeGenerator;
3
4
use heck::{ToPascalCase, ToSnakeCase};
···
66
67
fn def_to_base_type_name(&self, nsid: &str, def_name: &str) -> String {
67
68
if def_name == "main" {
68
69
// Use last segment of NSID
69
-
let base_name = nsid.split('.').last().unwrap().to_pascal_case();
70
+
let nsid_path = NsidPath::parse(nsid);
71
+
let base_name = nsid_path.last_segment().to_pascal_case();
70
72
71
73
// Check if any other def would collide with this name
72
74
if let Some(doc) = self.corpus.get(nsid) {
···
101
103
// Add contextual prefix to avoid collision
102
104
if def_name == "main" {
103
105
// Use second-to-last NSID segment for main defs
104
-
let parts: Vec<_> = nsid.split('.').collect();
106
+
let nsid_path = NsidPath::parse(nsid);
107
+
let parts = nsid_path.segments();
105
108
if parts.len() >= 2 {
106
109
format!("{}{}", parts[parts.len() - 2].to_pascal_case(), base_name)
107
110
} else {
···
125
128
/// - `app.bsky.feed.post` → `app_bsky/feed/post.rs`
126
129
/// - `com.atproto.label.defs` → `com_atproto/label.rs` (defs go in parent)
127
130
pub(super) fn nsid_to_file_path(&self, nsid: &str) -> std::path::PathBuf {
128
-
let parts: Vec<&str> = nsid.split('.').collect();
131
+
let nsid_path = NsidPath::parse(nsid);
132
+
let parts = nsid_path.segments();
129
133
130
134
if parts.len() < 2 {
131
135
// Shouldn't happen with valid NSIDs, but handle gracefully
132
136
return format!("{}.rs", sanitize_name(parts[0])).into();
133
137
}
134
138
135
-
let last = parts.last().unwrap();
139
+
let last = nsid_path.last_segment();
136
140
137
-
if *last == "defs" && parts.len() >= 3 {
141
+
if nsid_path.is_defs() && parts.len() >= 3 {
138
142
// defs go in parent module: com.atproto.label.defs → com_atproto/label.rs
139
143
let first_two = format!("{}_{}", sanitize_name(parts[0]), sanitize_name(parts[1]));
140
144
if parts.len() == 3 {
+189
crates/jacquard-lexicon/src/codegen/nsid_utils.rs
+189
crates/jacquard-lexicon/src/codegen/nsid_utils.rs
···
1
+
//! Utilities for parsing and working with NSIDs and refs
2
+
3
+
/// Parsed NSID components for easier manipulation
4
+
#[derive(Debug, Clone, PartialEq, Eq)]
5
+
pub struct NsidPath<'a> {
6
+
nsid: &'a str,
7
+
segments: Vec<&'a str>,
8
+
}
9
+
10
+
impl<'a> NsidPath<'a> {
11
+
/// Parse an NSID into its component segments
12
+
pub fn parse(nsid: &'a str) -> Self {
13
+
let segments: Vec<&str> = nsid.split('.').collect();
14
+
Self { nsid, segments }
15
+
}
16
+
17
+
/// Get the namespace (first two segments joined with '.')
18
+
/// Returns "com.atproto" from "com.atproto.repo.strongRef"
19
+
pub fn namespace(&self) -> String {
20
+
if self.segments.len() >= 2 {
21
+
format!("{}.{}", self.segments[0], self.segments[1])
22
+
} else {
23
+
self.nsid.to_string()
24
+
}
25
+
}
26
+
27
+
/// Get the last segment of the NSID
28
+
pub fn last_segment(&self) -> &str {
29
+
self.segments.last().copied().unwrap_or(self.nsid)
30
+
}
31
+
32
+
/// Get all segments except the last
33
+
pub fn parent_segments(&self) -> &[&str] {
34
+
if self.segments.is_empty() {
35
+
&[]
36
+
} else {
37
+
&self.segments[..self.segments.len() - 1]
38
+
}
39
+
}
40
+
41
+
/// Check if this is a "defs" NSID (ends with "defs")
42
+
pub fn is_defs(&self) -> bool {
43
+
self.last_segment() == "defs"
44
+
}
45
+
46
+
/// Get all segments
47
+
pub fn segments(&self) -> &[&str] {
48
+
&self.segments
49
+
}
50
+
51
+
/// Get the original NSID string
52
+
pub fn as_str(&self) -> &str {
53
+
self.nsid
54
+
}
55
+
56
+
/// Get number of segments
57
+
pub fn len(&self) -> usize {
58
+
self.segments.len()
59
+
}
60
+
61
+
/// Check if empty (should not happen with valid NSIDs)
62
+
pub fn is_empty(&self) -> bool {
63
+
self.segments.is_empty()
64
+
}
65
+
}
66
+
67
+
/// Parsed reference with NSID and optional fragment
68
+
#[derive(Debug, Clone, PartialEq, Eq)]
69
+
pub struct RefPath<'a> {
70
+
nsid: &'a str,
71
+
def: &'a str,
72
+
}
73
+
74
+
impl<'a> RefPath<'a> {
75
+
/// Parse a reference string, normalizing it based on current NSID context
76
+
pub fn parse(ref_str: &'a str, current_nsid: Option<&'a str>) -> Self {
77
+
if let Some(fragment) = ref_str.strip_prefix('#') {
78
+
// Local ref: #option → use current_nsid
79
+
let nsid = current_nsid.unwrap_or("");
80
+
Self {
81
+
nsid,
82
+
def: fragment,
83
+
}
84
+
} else if let Some((nsid, def)) = ref_str.split_once('#') {
85
+
// Full ref with fragment: nsid#def
86
+
Self { nsid, def }
87
+
} else {
88
+
// Full ref without fragment: nsid (implicit "main")
89
+
Self {
90
+
nsid: ref_str,
91
+
def: "main",
92
+
}
93
+
}
94
+
}
95
+
96
+
/// Get the NSID portion of the ref
97
+
pub fn nsid(&self) -> &str {
98
+
self.nsid
99
+
}
100
+
101
+
/// Get the def name (fragment) portion of the ref
102
+
pub fn def(&self) -> &str {
103
+
self.def
104
+
}
105
+
106
+
/// Check if this is a local ref (was parsed from #fragment)
107
+
pub fn is_local(&self, current_nsid: &str) -> bool {
108
+
self.nsid == current_nsid && self.def != "main"
109
+
}
110
+
111
+
/// Get the full ref string (nsid#def)
112
+
pub fn full_ref(&self) -> String {
113
+
if self.def == "main" {
114
+
self.nsid.to_string()
115
+
} else {
116
+
format!("{}#{}", self.nsid, self.def)
117
+
}
118
+
}
119
+
120
+
/// Normalize a local ref by prepending the current NSID if needed
121
+
/// Returns the normalized ref string suitable for corpus lookup
122
+
pub fn normalize(ref_str: &str, current_nsid: &str) -> String {
123
+
if ref_str.starts_with('#') {
124
+
format!("{}{}", current_nsid, ref_str)
125
+
} else {
126
+
ref_str.to_string()
127
+
}
128
+
}
129
+
}
130
+
131
+
#[cfg(test)]
132
+
mod tests {
133
+
use super::*;
134
+
135
+
#[test]
136
+
fn test_nsid_path_parse() {
137
+
let path = NsidPath::parse("com.atproto.repo.strongRef");
138
+
assert_eq!(path.segments(), &["com", "atproto", "repo", "strongRef"]);
139
+
assert_eq!(path.namespace(), "com.atproto");
140
+
assert_eq!(path.last_segment(), "strongRef");
141
+
assert_eq!(path.parent_segments(), &["com", "atproto", "repo"]);
142
+
assert!(!path.is_defs());
143
+
}
144
+
145
+
#[test]
146
+
fn test_nsid_path_defs() {
147
+
let path = NsidPath::parse("com.atproto.label.defs");
148
+
assert!(path.is_defs());
149
+
assert_eq!(path.last_segment(), "defs");
150
+
}
151
+
152
+
#[test]
153
+
fn test_ref_path_local() {
154
+
let ref_path = RefPath::parse("#option", Some("com.example.foo"));
155
+
assert_eq!(ref_path.nsid(), "com.example.foo");
156
+
assert_eq!(ref_path.def(), "option");
157
+
assert!(ref_path.is_local("com.example.foo"));
158
+
assert_eq!(ref_path.full_ref(), "com.example.foo#option");
159
+
}
160
+
161
+
#[test]
162
+
fn test_ref_path_with_fragment() {
163
+
let ref_path = RefPath::parse("com.example.foo#bar", None);
164
+
assert_eq!(ref_path.nsid(), "com.example.foo");
165
+
assert_eq!(ref_path.def(), "bar");
166
+
assert!(!ref_path.is_local("com.other.baz"));
167
+
assert_eq!(ref_path.full_ref(), "com.example.foo#bar");
168
+
}
169
+
170
+
#[test]
171
+
fn test_ref_path_implicit_main() {
172
+
let ref_path = RefPath::parse("com.example.foo", None);
173
+
assert_eq!(ref_path.nsid(), "com.example.foo");
174
+
assert_eq!(ref_path.def(), "main");
175
+
assert_eq!(ref_path.full_ref(), "com.example.foo");
176
+
}
177
+
178
+
#[test]
179
+
fn test_ref_path_normalize() {
180
+
assert_eq!(
181
+
RefPath::normalize("#option", "com.example.foo"),
182
+
"com.example.foo#option"
183
+
);
184
+
assert_eq!(
185
+
RefPath::normalize("com.other.bar#baz", "com.example.foo"),
186
+
"com.other.bar#baz"
187
+
);
188
+
}
189
+
}
+3
-6
crates/jacquard-lexicon/src/codegen/output.rs
+3
-6
crates/jacquard-lexicon/src/codegen/output.rs
···
3
3
use quote::quote;
4
4
use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet};
5
5
6
+
use super::nsid_utils::NsidPath;
6
7
use super::utils::{make_ident, sanitize_name};
7
8
use super::CodeGenerator;
8
9
···
249
250
250
251
// Collect all namespaces from the corpus (first two segments of each NSID)
251
252
for (nsid, _doc) in self.corpus.iter() {
252
-
let parts: Vec<_> = nsid.as_str().splitn(3, '.').collect();
253
-
let namespace = if parts.len() >= 2 {
254
-
format!("{}.{}", parts[0], parts[1])
255
-
} else {
256
-
nsid.to_string()
257
-
};
253
+
let nsid_path = NsidPath::parse(nsid.as_str());
254
+
let namespace = nsid_path.namespace();
258
255
all_namespaces.insert(namespace);
259
256
}
260
257
+11
-24
crates/jacquard-lexicon/src/codegen/structs.rs
+11
-24
crates/jacquard-lexicon/src/codegen/structs.rs
···
6
6
use proc_macro2::TokenStream;
7
7
use quote::quote;
8
8
9
+
use super::nsid_utils::{NsidPath, RefPath};
9
10
use super::CodeGenerator;
10
11
use super::utils::{make_ident, value_to_variant_name};
11
12
···
467
468
let enum_ident = syn::Ident::new(union_name, proc_macro2::Span::call_site());
468
469
469
470
// Extract namespace prefix from current NSID (first two segments: "sh.weaver" from "sh.weaver.embed.recordWithMedia")
470
-
let parts: Vec<_> = current_nsid.splitn(3, '.').collect();
471
-
let current_namespace = if parts.len() >= 2 {
472
-
format!("{}.{}", parts[0], parts[1])
473
-
} else {
474
-
current_nsid.to_string()
475
-
};
471
+
let current_nsid_path = NsidPath::parse(current_nsid);
472
+
let current_namespace = current_nsid_path.namespace();
476
473
477
474
// First pass: collect all variant names and detect collisions
478
475
#[derive(Debug)]
···
486
483
let mut variant_infos = Vec::new();
487
484
for ref_str in refs {
488
485
// Normalize local refs (starting with #) by prepending current NSID
489
-
let normalized_ref = if ref_str.starts_with('#') {
490
-
format!("{}{}", current_nsid, ref_str)
491
-
} else {
492
-
ref_str.to_string()
493
-
};
486
+
let normalized_ref = RefPath::normalize(ref_str, current_nsid);
494
487
495
488
// Parse ref to get NSID and def name
496
-
let (ref_nsid_str, ref_def) =
497
-
if let Some((nsid, fragment)) = normalized_ref.split_once('#') {
498
-
(nsid, fragment)
499
-
} else {
500
-
(normalized_ref.as_str(), "main")
501
-
};
489
+
let ref_path = RefPath::parse(&normalized_ref, None);
490
+
let ref_nsid_str = ref_path.nsid();
491
+
let ref_def = ref_path.def();
502
492
503
493
// Skip unknown refs - they'll be handled by Unknown variant
504
494
if !self.corpus.ref_exists(&normalized_ref) {
···
555
545
556
546
// Track namespace dependency for foreign refs
557
547
if !info.is_current_namespace {
558
-
let parts: Vec<_> = info.ref_nsid.splitn(3, '.').collect();
559
-
let foreign_namespace = if parts.len() >= 2 {
560
-
format!("{}.{}", parts[0], parts[1])
561
-
} else {
562
-
info.ref_nsid.to_string()
563
-
};
548
+
let ref_nsid_path = NsidPath::parse(&info.ref_nsid);
549
+
let foreign_namespace = ref_nsid_path.namespace();
564
550
self.namespace_deps
565
551
.borrow_mut()
566
552
.entry(current_namespace.clone())
···
571
557
// Disambiguate: add second NSID segment prefix only to foreign refs when there's a collision
572
558
let variant_name = if has_collision && !info.is_current_namespace {
573
559
// Get second segment (namespace identifier: "bsky" from "app.bsky.embed.images")
574
-
let segments: Vec<&str> = info.ref_nsid.split('.').collect();
560
+
let ref_nsid_path = NsidPath::parse(&info.ref_nsid);
561
+
let segments = ref_nsid_path.segments();
575
562
let prefix = if segments.len() >= 2 {
576
563
segments[1].to_pascal_case()
577
564
} else {
+13
-17
crates/jacquard-lexicon/src/codegen/types.rs
+13
-17
crates/jacquard-lexicon/src/codegen/types.rs
···
4
4
use proc_macro2::TokenStream;
5
5
use quote::quote;
6
6
7
+
use super::nsid_utils::{NsidPath, RefPath};
7
8
use super::CodeGenerator;
8
9
9
10
impl<'c> CodeGenerator<'c> {
···
90
91
};
91
92
92
93
// Parse ref to get type name
93
-
let (ref_nsid, ref_def) =
94
-
if let Some((nsid_part, fragment)) = ref_str.split_once('#') {
95
-
(nsid_part, fragment)
96
-
} else {
97
-
(ref_str.as_str(), "main")
98
-
};
99
-
let ref_type_name = self.def_to_type_name(ref_nsid, ref_def);
94
+
let ref_path = RefPath::parse(&ref_str, None);
95
+
let ref_type_name = self.def_to_type_name(ref_path.nsid(), ref_path.def());
100
96
101
97
// If self-referential, keep union for indirection (variants are boxed)
102
98
if ref_type_name == parent_type_name {
···
185
181
use super::utils::sanitize_name;
186
182
use crate::error::CodegenError;
187
183
188
-
// Parse NSID and fragment
189
-
let (ref_nsid, ref_def) = if let Some((nsid, fragment)) = ref_str.split_once('#') {
190
-
(nsid, fragment)
191
-
} else {
192
-
(ref_str, "main")
193
-
};
184
+
// Parse ref to get NSID and def
185
+
let ref_path = RefPath::parse(ref_str, None);
186
+
let ref_nsid = ref_path.nsid();
187
+
let ref_def = ref_path.def();
194
188
195
189
// Check if ref exists
196
190
if !self.corpus.ref_exists(ref_str) {
···
198
192
return Ok(quote! { jacquard_common::types::value::Data<'a> });
199
193
}
200
194
195
+
// Parse NSID into components
196
+
let nsid_path = NsidPath::parse(ref_nsid);
197
+
let parts = nsid_path.segments();
198
+
let last_segment = nsid_path.last_segment();
199
+
201
200
// Convert NSID to module path
202
201
// com.atproto.repo.strongRef -> com_atproto::repo::strong_ref::StrongRef
203
202
// app.bsky.richtext.facet -> app_bsky::richtext::facet::Facet
204
203
// app.bsky.actor.defs#nux -> app_bsky::actor::Nux (defs go in parent module)
205
-
let parts: Vec<&str> = ref_nsid.split('.').collect();
206
-
let last_segment = parts.last().unwrap();
207
-
208
204
let type_name = self.def_to_type_name(ref_nsid, ref_def);
209
205
210
-
let path_str = if *last_segment == "defs" && parts.len() >= 3 {
206
+
let path_str = if nsid_path.is_defs() && parts.len() >= 3 {
211
207
// defs types go in parent module
212
208
let first_two = format!("{}_{}", sanitize_name(parts[0]), sanitize_name(parts[1]));
213
209
if parts.len() == 3 {
+7
-12
crates/jacquard-lexicon/src/codegen/xrpc.rs
+7
-12
crates/jacquard-lexicon/src/codegen/xrpc.rs
···
7
7
use proc_macro2::TokenStream;
8
8
use quote::quote;
9
9
10
+
use super::nsid_utils::{NsidPath, RefPath};
10
11
use super::CodeGenerator;
11
12
use super::utils::make_ident;
12
13
···
230
231
let ref_str_s = ref_str.as_ref();
231
232
232
233
// Normalize local refs (starting with #) by prepending current NSID
233
-
let normalized_ref = if ref_str.starts_with('#') {
234
-
format!("{}{}", nsid, ref_str)
235
-
} else {
236
-
ref_str.to_string()
237
-
};
234
+
let normalized_ref = RefPath::normalize(ref_str, nsid);
238
235
239
236
// Parse ref to get NSID and def name
240
-
let (ref_nsid, ref_def) =
241
-
if let Some((nsid_part, fragment)) = normalized_ref.split_once('#') {
242
-
(nsid_part, fragment)
243
-
} else {
244
-
(normalized_ref.as_str(), "main")
245
-
};
237
+
let ref_path = RefPath::parse(&normalized_ref, None);
238
+
let ref_nsid = ref_path.nsid();
239
+
let ref_def = ref_path.def();
246
240
247
241
let variant_name = if ref_def == "main" {
248
-
ref_nsid.split('.').last().unwrap().to_pascal_case()
242
+
let ref_nsid_path = NsidPath::parse(ref_nsid);
243
+
ref_nsid_path.last_segment().to_pascal_case()
249
244
} else {
250
245
ref_def.to_pascal_case()
251
246
};
+4
-8
crates/jacquard-lexicon/src/corpus.rs
+4
-8
crates/jacquard-lexicon/src/corpus.rs
···
1
+
use crate::codegen::nsid_utils::RefPath;
1
2
use crate::error::Result;
2
3
use crate::lexicon::{LexUserType, LexiconDoc};
3
4
use jacquard_common::{into_static::IntoStatic, smol_str::SmolStr};
···
64
65
&self,
65
66
ref_str: &str,
66
67
) -> Option<(&LexiconDoc<'static>, &LexUserType<'static>)> {
67
-
let (nsid, def_name) = if let Some((nsid, fragment)) = ref_str.split_once('#') {
68
-
(nsid, fragment)
69
-
} else {
70
-
(ref_str, "main")
71
-
};
72
-
73
-
let doc = self.get(nsid)?;
74
-
let def = doc.defs.get(def_name)?;
68
+
let ref_path = RefPath::parse(ref_str, None);
69
+
let doc = self.get(ref_path.nsid())?;
70
+
let def = doc.defs.get(ref_path.def())?;
75
71
Some((doc, def))
76
72
}
77
73
+57
-131
crates/jacquard-lexicon/src/validation.rs
+57
-131
crates/jacquard-lexicon/src/validation.rs
···
3
3
//! This module provides infrastructure for validating untyped `Data` values against
4
4
//! lexicon schemas, enabling partial deserialization, debugging, and schema migration.
5
5
6
+
use crate::codegen::nsid_utils::RefPath;
7
+
use crate::lexicon::{LexArrayItem, LexObjectProperty};
6
8
use crate::schema::SchemaRegistry;
7
9
use cid::Cid as IpldCid;
8
10
use dashmap::DashMap;
···
255
257
Ok(IpldCid::new_v1(0x71, multihash))
256
258
}
257
259
260
+
/// Trait for converting lexicon types to object properties
261
+
///
262
+
/// This enables type-safe conversion between array items and object properties
263
+
/// for unified validation logic.
264
+
trait IntoObjectProperty<'a> {
265
+
/// Convert this type to an equivalent object property
266
+
fn into_object_property(self) -> LexObjectProperty<'a>;
267
+
}
268
+
269
+
impl<'a> IntoObjectProperty<'a> for LexArrayItem<'a> {
270
+
fn into_object_property(self) -> LexObjectProperty<'a> {
271
+
match self {
272
+
LexArrayItem::String(s) => LexObjectProperty::String(s),
273
+
LexArrayItem::Integer(i) => LexObjectProperty::Integer(i),
274
+
LexArrayItem::Boolean(b) => LexObjectProperty::Boolean(b),
275
+
LexArrayItem::Object(o) => LexObjectProperty::Object(o),
276
+
LexArrayItem::Unknown(u) => LexObjectProperty::Unknown(u),
277
+
LexArrayItem::Bytes(b) => LexObjectProperty::Bytes(b),
278
+
LexArrayItem::CidLink(c) => LexObjectProperty::CidLink(c),
279
+
LexArrayItem::Blob(b) => LexObjectProperty::Blob(b),
280
+
LexArrayItem::Ref(r) => LexObjectProperty::Ref(r),
281
+
LexArrayItem::Union(u) => LexObjectProperty::Union(u),
282
+
}
283
+
}
284
+
}
285
+
258
286
/// Result of validating Data against a schema
259
287
///
260
288
/// Distinguishes between structural errors (type mismatches, missing fields) and
···
487
515
}
488
516
}
489
517
490
-
/// Normalize a ref string to (nsid, def_name)
491
-
fn normalize_ref(ref_str: &str, current_nsid: &str) -> (String, String) {
492
-
if let Some(fragment) = ref_str.strip_prefix('#') {
493
-
// #option -> (current_nsid, "option")
494
-
(current_nsid.to_string(), fragment.to_string())
495
-
} else if let Some((nsid, def)) = ref_str.split_once('#') {
496
-
// com.example.foo#bar -> ("com.example.foo", "bar")
497
-
(nsid.to_string(), def.to_string())
498
-
} else {
499
-
// com.example.foo -> ("com.example.foo", "main")
500
-
(ref_str.to_string(), "main".to_string())
501
-
}
502
-
}
503
518
504
519
/// Validate data against a lexicon def
505
520
fn validate_def(
···
720
735
721
736
// Try to match against refs
722
737
for variant_ref in &u.refs {
723
-
let (variant_nsid, variant_def) =
724
-
normalize_ref(variant_ref.as_ref(), &ctx.current_nsid);
725
-
let full_variant = format!("{}#{}", variant_nsid, variant_def);
738
+
let ref_path = RefPath::parse(variant_ref.as_ref(), Some(&ctx.current_nsid));
739
+
let variant_nsid = ref_path.nsid().to_string();
740
+
let variant_def = ref_path.def().to_string();
741
+
let full_variant = ref_path.full_ref();
726
742
727
743
// Match by full ref or just nsid
728
744
if type_str == full_variant || type_str == variant_nsid {
···
779
795
}
780
796
781
797
// Normalize ref
782
-
let (ref_nsid, ref_def) = normalize_ref(r.r#ref.as_ref(), &ctx.current_nsid);
783
-
let full_ref = format!("{}#{}", ref_nsid, ref_def);
798
+
let ref_path = RefPath::parse(r.r#ref.as_ref(), Some(&ctx.current_nsid));
799
+
let ref_nsid = ref_path.nsid().to_string();
800
+
let ref_def = ref_path.def().to_string();
801
+
let full_ref = ref_path.full_ref();
784
802
785
803
// Cycle detection
786
804
if ctx.ref_stack.contains(&full_ref) {
···
861
879
fn validate_array_item(
862
880
path: &mut ValidationPath,
863
881
data: &Data,
864
-
item_schema: &crate::lexicon::LexArrayItem,
882
+
item_schema: &LexArrayItem,
865
883
registry: &SchemaRegistry,
866
884
ctx: &mut ValidationContext,
867
885
) -> Vec<StructuralError> {
868
-
use crate::lexicon::LexArrayItem;
869
-
870
-
match item_schema {
871
-
LexArrayItem::String(s) => validate_property(
872
-
path,
873
-
data,
874
-
&crate::lexicon::LexObjectProperty::String(s.clone()),
875
-
registry,
876
-
ctx,
877
-
),
878
-
LexArrayItem::Integer(i) => validate_property(
879
-
path,
880
-
data,
881
-
&crate::lexicon::LexObjectProperty::Integer(i.clone()),
882
-
registry,
883
-
ctx,
884
-
),
885
-
LexArrayItem::Boolean(b) => validate_property(
886
-
path,
887
-
data,
888
-
&crate::lexicon::LexObjectProperty::Boolean(b.clone()),
889
-
registry,
890
-
ctx,
891
-
),
892
-
LexArrayItem::Object(o) => validate_property(
893
-
path,
894
-
data,
895
-
&crate::lexicon::LexObjectProperty::Object(o.clone()),
896
-
registry,
897
-
ctx,
898
-
),
899
-
LexArrayItem::Unknown(u) => validate_property(
900
-
path,
901
-
data,
902
-
&crate::lexicon::LexObjectProperty::Unknown(u.clone()),
903
-
registry,
904
-
ctx,
905
-
),
906
-
LexArrayItem::Bytes(b) => validate_property(
907
-
path,
908
-
data,
909
-
&crate::lexicon::LexObjectProperty::Bytes(b.clone()),
910
-
registry,
911
-
ctx,
912
-
),
913
-
LexArrayItem::CidLink(c) => validate_property(
914
-
path,
915
-
data,
916
-
&crate::lexicon::LexObjectProperty::CidLink(c.clone()),
917
-
registry,
918
-
ctx,
919
-
),
920
-
LexArrayItem::Blob(b) => validate_property(
921
-
path,
922
-
data,
923
-
&crate::lexicon::LexObjectProperty::Blob(b.clone()),
924
-
registry,
925
-
ctx,
926
-
),
927
-
LexArrayItem::Ref(r) => validate_property(
928
-
path,
929
-
data,
930
-
&crate::lexicon::LexObjectProperty::Ref(r.clone()),
931
-
registry,
932
-
ctx,
933
-
),
934
-
LexArrayItem::Union(u) => validate_property(
935
-
path,
936
-
data,
937
-
&crate::lexicon::LexObjectProperty::Union(u.clone()),
938
-
registry,
939
-
ctx,
940
-
),
941
-
}
886
+
validate_property(
887
+
path,
888
+
data,
889
+
&item_schema.clone().into_object_property(),
890
+
registry,
891
+
ctx,
892
+
)
942
893
}
943
894
944
895
// ============================================================================
···
1115
1066
1116
1067
LexObjectProperty::Ref(r) => {
1117
1068
// Follow ref and check constraints
1118
-
let (ref_nsid, ref_def) = normalize_ref(r.r#ref.as_ref(), current_nsid);
1069
+
let ref_path = RefPath::parse(r.r#ref.as_ref(), Some(current_nsid));
1070
+
let ref_nsid = ref_path.nsid();
1071
+
let ref_def = ref_path.def();
1119
1072
1120
-
if registry.get_def(&ref_nsid, &ref_def).is_some() {
1121
-
validate_constraints_impl(path, data, &ref_nsid, &ref_def, registry)
1073
+
if registry.get_def(ref_nsid, ref_def).is_some() {
1074
+
validate_constraints_impl(path, data, ref_nsid, ref_def, registry)
1122
1075
} else {
1123
1076
Vec::new()
1124
1077
}
···
1256
1209
fn check_array_item_constraints(
1257
1210
path: &mut ValidationPath,
1258
1211
data: &Data,
1259
-
item_schema: &crate::lexicon::LexArrayItem,
1212
+
item_schema: &LexArrayItem,
1260
1213
current_nsid: &str,
1261
1214
registry: &SchemaRegistry,
1262
1215
) -> Vec<ConstraintError> {
1263
-
use crate::lexicon::LexArrayItem;
1264
-
1265
-
match item_schema {
1266
-
LexArrayItem::String(s) => check_property_constraints(
1267
-
path,
1268
-
data,
1269
-
&crate::lexicon::LexObjectProperty::String(s.clone()),
1270
-
current_nsid,
1271
-
registry,
1272
-
),
1273
-
LexArrayItem::Integer(i) => check_property_constraints(
1274
-
path,
1275
-
data,
1276
-
&crate::lexicon::LexObjectProperty::Integer(i.clone()),
1277
-
current_nsid,
1278
-
registry,
1279
-
),
1280
-
LexArrayItem::Object(o) => check_property_constraints(
1281
-
path,
1282
-
data,
1283
-
&crate::lexicon::LexObjectProperty::Object(o.clone()),
1284
-
current_nsid,
1285
-
registry,
1286
-
),
1287
-
LexArrayItem::Ref(r) => check_property_constraints(
1288
-
path,
1289
-
data,
1290
-
&crate::lexicon::LexObjectProperty::Ref(r.clone()),
1291
-
current_nsid,
1292
-
registry,
1293
-
),
1294
-
// Other array item types don't have constraints
1295
-
_ => Vec::new(),
1296
-
}
1216
+
check_property_constraints(
1217
+
path,
1218
+
data,
1219
+
&item_schema.clone().into_object_property(),
1220
+
current_nsid,
1221
+
registry,
1222
+
)
1297
1223
}
1298
1224
1299
1225
#[cfg(test)]