+1
-1
crates/jacquard-api/Cargo.toml
+1
-1
crates/jacquard-api/Cargo.toml
···
19
19
bytes = { workspace = true, features = ["serde"] }
20
20
jacquard-common = { version = "0.8", path = "../jacquard-common" }
21
21
jacquard-derive = { version = "0.8", path = "../jacquard-derive" }
22
-
jacquard-lexicon = { version = "0.8", path = "../jacquard-lexicon" }
22
+
jacquard-lexicon = { version = "0.8", path = "../jacquard-lexicon", default-features = false }
23
23
miette.workspace = true
24
24
serde.workspace = true
25
25
serde_ipld_dagcbor.workspace = true
+1
-1
crates/jacquard-derive/Cargo.toml
+1
-1
crates/jacquard-derive/Cargo.toml
···
16
16
17
17
[dependencies]
18
18
heck.workspace = true
19
-
jacquard-lexicon = { version = "0.8", path = "../jacquard-lexicon" }
19
+
jacquard-lexicon = { version = "0.8", path = "../jacquard-lexicon", features = ["codegen"] }
20
20
proc-macro2.workspace = true
21
21
quote.workspace = true
22
22
syn.workspace = true
+9
-5
crates/jacquard-lexicon/Cargo.toml
+9
-5
crates/jacquard-lexicon/Cargo.toml
···
11
11
exclude.workspace = true
12
12
license.workspace = true
13
13
14
+
[features]
15
+
default = ["codegen"]
16
+
codegen = ["dep:prettyplease", "dep:syn", "dep:quote", "dep:proc-macro2", "dep:heck"]
17
+
14
18
[dependencies]
15
19
cid.workspace = true
16
20
dashmap.workspace = true
17
-
heck.workspace = true
21
+
heck = { workspace = true, optional = true }
18
22
inventory = "0.3"
19
23
jacquard-common = { version = "0.8", path = "../jacquard-common" }
20
24
miette = { workspace = true }
21
25
multihash.workspace = true
22
-
prettyplease.workspace = true
23
-
proc-macro2.workspace = true
24
-
quote.workspace = true
26
+
prettyplease = { workspace = true, optional = true }
27
+
proc-macro2 = { workspace = true, optional = true }
28
+
quote = { workspace = true, optional = true }
25
29
serde.workspace = true
26
30
serde_ipld_dagcbor.workspace = true
27
31
serde_json.workspace = true
28
32
serde_repr.workspace = true
29
33
serde_with.workspace = true
30
34
sha2.workspace = true
31
-
syn.workspace = true
35
+
syn = { workspace = true, optional = true }
32
36
thiserror.workspace = true
33
37
unicode-segmentation = "1.12"
34
38
+28
-30
crates/jacquard-lexicon/src/codegen.rs
+28
-30
crates/jacquard-lexicon/src/codegen.rs
···
4
4
use proc_macro2::TokenStream;
5
5
use quote::quote;
6
6
7
-
pub mod builder_heuristics;
8
-
pub mod lifetime;
9
-
pub mod names;
10
-
pub mod nsid_utils;
11
-
pub mod output;
12
-
pub mod schema_impl;
13
-
pub mod structs;
14
-
pub mod types;
15
-
pub mod union_codegen;
16
-
pub mod utils;
17
-
pub mod xrpc;
7
+
pub(crate) mod builder_heuristics;
8
+
pub(crate) mod lifetime;
9
+
pub(crate) mod names;
10
+
pub(crate) mod nsid_utils;
11
+
pub(crate) mod output;
12
+
pub(crate) mod schema_impl;
13
+
pub(crate) mod structs;
14
+
pub(crate) mod types;
15
+
pub(crate) mod union_codegen;
16
+
pub(crate) mod utils;
17
+
pub(crate) mod xrpc;
18
18
19
19
/// Code generator for lexicon types
20
20
pub struct CodeGenerator<'c> {
···
66
66
let shared_fn = if !generated.contains(nsid) {
67
67
generated.insert(nsid.to_string());
68
68
// Codegen from JSON doesn't have union_fields (those are for Rust -> lexicon derive)
69
-
let doc_literal = crate::derive_impl::doc_to_tokens::doc_to_tokens(lex_doc, &std::collections::BTreeMap::new());
69
+
let doc_literal = crate::derive_impl::doc_to_tokens::doc_to_tokens(
70
+
lex_doc,
71
+
&std::collections::BTreeMap::new(),
72
+
);
70
73
Some(quote! {
71
74
fn #shared_fn_ident() -> ::jacquard_lexicon::lexicon::LexiconDoc<'static> {
72
75
#doc_literal
···
86
89
87
90
// Extract validation checks for this specific def
88
91
let validation_checks = schema_impl::extract_validation_checks(lex_doc, def_name);
89
-
let validation_code = crate::derive_impl::doc_to_tokens::validations_to_tokens(&validation_checks);
92
+
let validation_code =
93
+
crate::derive_impl::doc_to_tokens::validations_to_tokens(&validation_checks);
90
94
91
95
let trait_impl = quote! {
92
96
impl #impl_generics ::jacquard_lexicon::schema::LexiconSchema for #type_ident #type_generics {
···
514
518
LexiconCorpus::load_from_dir("tests/fixtures/test_lexicons").expect("load corpus");
515
519
let codegen = CodeGenerator::new(&corpus, "jacquard_api");
516
520
517
-
let doc = corpus
518
-
.get("app.bsky.embed.images")
519
-
.expect("get images");
521
+
let doc = corpus.get("app.bsky.embed.images").expect("get images");
520
522
let def = doc.defs.get("viewImage").expect("get viewImage def");
521
523
522
524
let tokens = codegen
···
539
541
LexiconCorpus::load_from_dir("tests/fixtures/test_lexicons").expect("load corpus");
540
542
let codegen = CodeGenerator::new(&corpus, "jacquard_api");
541
543
542
-
let doc = corpus
543
-
.get("test.array.types")
544
-
.expect("get array types");
544
+
let doc = corpus.get("test.array.types").expect("get array types");
545
545
let def = doc.defs.get("main").expect("get main def");
546
546
547
547
let tokens = codegen
···
569
569
LexiconCorpus::load_from_dir("tests/fixtures/test_lexicons").expect("load corpus");
570
570
let codegen = CodeGenerator::new(&corpus, "jacquard_api");
571
571
572
-
let doc = corpus
573
-
.get("test.binary.types")
574
-
.expect("get binary types");
572
+
let doc = corpus.get("test.binary.types").expect("get binary types");
575
573
let def = doc.defs.get("main").expect("get main def");
576
574
577
575
let tokens = codegen
···
597
595
LexiconCorpus::load_from_dir("tests/fixtures/test_lexicons").expect("load corpus");
598
596
let codegen = CodeGenerator::new(&corpus, "jacquard_api");
599
597
600
-
let doc = corpus
601
-
.get("test.empty.object")
602
-
.expect("get empty object");
598
+
let doc = corpus.get("test.empty.object").expect("get empty object");
603
599
let def = doc.defs.get("emptyDef").expect("get emptyDef");
604
600
605
601
let tokens = codegen
···
655
651
let vote_file: syn::File = syn::parse2(vote_tokens).expect("parse vote tokens");
656
652
let vote_formatted = prettyplease::unparse(&vote_file);
657
653
println!("\nVote:\n{}\n", vote_formatted);
658
-
assert!(vote_formatted.contains("struct DefinitionVote") || vote_formatted.contains("struct Vote"));
654
+
assert!(
655
+
vote_formatted.contains("struct DefinitionVote")
656
+
|| vote_formatted.contains("struct Vote")
657
+
);
659
658
assert!(vote_formatted.contains("pub poll_ref"));
660
659
assert!(vote_formatted.contains("pub option_index"));
661
660
}
···
713
712
// Local ref #option should resolve to DefinitionOption type (fully qualified or local)
714
713
assert!(
715
714
formatted.contains("Vec<DefinitionOption")
716
-
|| formatted.contains("Vec<jacquard_api::pub_leaflet::poll::definition::DefinitionOption")
715
+
|| formatted
716
+
.contains("Vec<jacquard_api::pub_leaflet::poll::definition::DefinitionOption")
717
717
);
718
718
}
719
719
···
723
723
LexiconCorpus::load_from_dir("tests/fixtures/test_lexicons").expect("load corpus");
724
724
let codegen = CodeGenerator::new(&corpus, "jacquard_api");
725
725
726
-
let doc = corpus
727
-
.get("test.binary.types")
728
-
.expect("get binary types");
726
+
let doc = corpus.get("test.binary.types").expect("get binary types");
729
727
let def = doc.defs.get("main").expect("get main def");
730
728
731
729
let tokens = codegen
+3
-187
crates/jacquard-lexicon/src/codegen/nsid_utils.rs
+3
-187
crates/jacquard-lexicon/src/codegen/nsid_utils.rs
···
1
1
//! Utilities for parsing and working with NSIDs and refs
2
-
3
-
/// Parsed NSID components for easier manipulation
4
-
#[derive(Debug, Clone, PartialEq, Eq)]
5
-
pub struct NsidPath<'a> {
6
-
nsid: &'a str,
7
-
segments: Vec<&'a str>,
8
-
}
9
-
10
-
impl<'a> NsidPath<'a> {
11
-
/// Parse an NSID into its component segments
12
-
pub fn parse(nsid: &'a str) -> Self {
13
-
let segments: Vec<&str> = nsid.split('.').collect();
14
-
Self { nsid, segments }
15
-
}
16
-
17
-
/// Get the namespace (first two segments joined with '.')
18
-
/// Returns "com.atproto" from "com.atproto.repo.strongRef"
19
-
pub fn namespace(&self) -> String {
20
-
if self.segments.len() >= 2 {
21
-
format!("{}.{}", self.segments[0], self.segments[1])
22
-
} else {
23
-
self.nsid.to_string()
24
-
}
25
-
}
26
-
27
-
/// Get the last segment of the NSID
28
-
pub fn last_segment(&self) -> &str {
29
-
self.segments.last().copied().unwrap_or(self.nsid)
30
-
}
31
-
32
-
/// Get all segments except the last
33
-
pub fn parent_segments(&self) -> &[&str] {
34
-
if self.segments.is_empty() {
35
-
&[]
36
-
} else {
37
-
&self.segments[..self.segments.len() - 1]
38
-
}
39
-
}
40
-
41
-
/// Check if this is a "defs" NSID (ends with "defs")
42
-
pub fn is_defs(&self) -> bool {
43
-
self.last_segment() == "defs"
44
-
}
45
-
46
-
/// Get all segments
47
-
pub fn segments(&self) -> &[&str] {
48
-
&self.segments
49
-
}
50
-
51
-
/// Get the original NSID string
52
-
pub fn as_str(&self) -> &str {
53
-
self.nsid
54
-
}
55
-
56
-
/// Get number of segments
57
-
pub fn len(&self) -> usize {
58
-
self.segments.len()
59
-
}
60
-
61
-
/// Check if empty (should not happen with valid NSIDs)
62
-
pub fn is_empty(&self) -> bool {
63
-
self.segments.is_empty()
64
-
}
65
-
}
66
-
67
-
/// Parsed reference with NSID and optional fragment
68
-
#[derive(Debug, Clone, PartialEq, Eq)]
69
-
pub struct RefPath<'a> {
70
-
nsid: &'a str,
71
-
def: &'a str,
72
-
}
73
-
74
-
impl<'a> RefPath<'a> {
75
-
/// Parse a reference string, normalizing it based on current NSID context
76
-
pub fn parse(ref_str: &'a str, current_nsid: Option<&'a str>) -> Self {
77
-
if let Some(fragment) = ref_str.strip_prefix('#') {
78
-
// Local ref: #option → use current_nsid
79
-
let nsid = current_nsid.unwrap_or("");
80
-
Self {
81
-
nsid,
82
-
def: fragment,
83
-
}
84
-
} else if let Some((nsid, def)) = ref_str.split_once('#') {
85
-
// Full ref with fragment: nsid#def
86
-
Self { nsid, def }
87
-
} else {
88
-
// Full ref without fragment: nsid (implicit "main")
89
-
Self {
90
-
nsid: ref_str,
91
-
def: "main",
92
-
}
93
-
}
94
-
}
95
-
96
-
/// Get the NSID portion of the ref
97
-
pub fn nsid(&self) -> &str {
98
-
self.nsid
99
-
}
100
-
101
-
/// Get the def name (fragment) portion of the ref
102
-
pub fn def(&self) -> &str {
103
-
self.def
104
-
}
105
-
106
-
/// Check if this is a local ref (was parsed from #fragment)
107
-
pub fn is_local(&self, current_nsid: &str) -> bool {
108
-
self.nsid == current_nsid && self.def != "main"
109
-
}
110
-
111
-
/// Get the full ref string (nsid#def)
112
-
pub fn full_ref(&self) -> String {
113
-
if self.def == "main" {
114
-
self.nsid.to_string()
115
-
} else {
116
-
format!("{}#{}", self.nsid, self.def)
117
-
}
118
-
}
119
-
120
-
/// Normalize a local ref by prepending the current NSID if needed
121
-
/// Returns the normalized ref string suitable for corpus lookup
122
-
pub fn normalize(ref_str: &str, current_nsid: &str) -> String {
123
-
if ref_str.starts_with('#') {
124
-
format!("{}{}", current_nsid, ref_str)
125
-
} else {
126
-
ref_str.to_string()
127
-
}
128
-
}
129
-
}
130
-
131
-
#[cfg(test)]
132
-
mod tests {
133
-
use super::*;
2
+
//!
3
+
//! Re-exports core ref parsing utilities from `crate::ref_utils`.
134
4
135
-
#[test]
136
-
fn test_nsid_path_parse() {
137
-
let path = NsidPath::parse("com.atproto.repo.strongRef");
138
-
assert_eq!(path.segments(), &["com", "atproto", "repo", "strongRef"]);
139
-
assert_eq!(path.namespace(), "com.atproto");
140
-
assert_eq!(path.last_segment(), "strongRef");
141
-
assert_eq!(path.parent_segments(), &["com", "atproto", "repo"]);
142
-
assert!(!path.is_defs());
143
-
}
144
-
145
-
#[test]
146
-
fn test_nsid_path_defs() {
147
-
let path = NsidPath::parse("com.atproto.label.defs");
148
-
assert!(path.is_defs());
149
-
assert_eq!(path.last_segment(), "defs");
150
-
}
151
-
152
-
#[test]
153
-
fn test_ref_path_local() {
154
-
let ref_path = RefPath::parse("#option", Some("com.example.foo"));
155
-
assert_eq!(ref_path.nsid(), "com.example.foo");
156
-
assert_eq!(ref_path.def(), "option");
157
-
assert!(ref_path.is_local("com.example.foo"));
158
-
assert_eq!(ref_path.full_ref(), "com.example.foo#option");
159
-
}
160
-
161
-
#[test]
162
-
fn test_ref_path_with_fragment() {
163
-
let ref_path = RefPath::parse("com.example.foo#bar", None);
164
-
assert_eq!(ref_path.nsid(), "com.example.foo");
165
-
assert_eq!(ref_path.def(), "bar");
166
-
assert!(!ref_path.is_local("com.other.baz"));
167
-
assert_eq!(ref_path.full_ref(), "com.example.foo#bar");
168
-
}
169
-
170
-
#[test]
171
-
fn test_ref_path_implicit_main() {
172
-
let ref_path = RefPath::parse("com.example.foo", None);
173
-
assert_eq!(ref_path.nsid(), "com.example.foo");
174
-
assert_eq!(ref_path.def(), "main");
175
-
assert_eq!(ref_path.full_ref(), "com.example.foo");
176
-
}
177
-
178
-
#[test]
179
-
fn test_ref_path_normalize() {
180
-
assert_eq!(
181
-
RefPath::normalize("#option", "com.example.foo"),
182
-
"com.example.foo#option"
183
-
);
184
-
assert_eq!(
185
-
RefPath::normalize("com.other.bar#baz", "com.example.foo"),
186
-
"com.other.bar#baz"
187
-
);
188
-
}
189
-
}
5
+
pub use crate::ref_utils::{NsidPath, RefPath};
+5
-77
crates/jacquard-lexicon/src/codegen/schema_impl.rs
+5
-77
crates/jacquard-lexicon/src/codegen/schema_impl.rs
···
1
1
//! Generate LexiconSchema trait implementations for generated types
2
2
3
-
use crate::derive_impl::doc_to_tokens;
4
3
use crate::lexicon::{
5
-
LexInteger, LexObject, LexObjectProperty, LexRecordRecord, LexString,
6
-
LexUserType, LexiconDoc,
4
+
LexInteger, LexObject, LexObjectProperty, LexRecordRecord, LexString, LexUserType, LexiconDoc,
7
5
};
8
6
use crate::schema::from_ast::{ConstraintCheck, ValidationCheck};
9
-
use proc_macro2::TokenStream;
10
-
use quote::quote;
11
-
12
-
/// Generate LexiconSchema impl for a generated type
13
-
///
14
-
/// Takes the original lexicon doc and type metadata to generate a complete
15
-
/// impl with const literal and validation code.
16
-
pub fn generate_schema_impl(
17
-
type_name: &str,
18
-
doc: &LexiconDoc,
19
-
def_name: &str,
20
-
has_lifetime: bool,
21
-
) -> TokenStream {
22
-
let nsid = doc.id.as_ref();
23
-
24
-
// Generate lifetime parameter
25
-
let (impl_generics, type_generics) = if has_lifetime {
26
-
(quote! { <'a> }, quote! { <'a> })
27
-
} else {
28
-
(quote! {}, quote! {})
29
-
};
30
-
31
-
// Generate the lexicon doc literal using existing doc_to_tokens
32
-
// Codegen from JSON doesn't have union_fields (those are for Rust -> lexicon derive)
33
-
let doc_literal = doc_to_tokens::doc_to_tokens(doc, &std::collections::BTreeMap::new());
34
-
35
-
// Extract validation checks from lexicon doc for the specific def
36
-
let validation_checks = extract_validation_checks(doc, def_name);
37
-
38
-
// Generate validation code using existing validations_to_tokens
39
-
let validation_code = doc_to_tokens::validations_to_tokens(&validation_checks);
40
-
41
-
let type_ident = syn::Ident::new(type_name, proc_macro2::Span::call_site());
42
-
43
-
quote! {
44
-
impl #impl_generics ::jacquard_lexicon::schema::LexiconSchema for #type_ident #type_generics {
45
-
fn nsid() -> &'static str {
46
-
#nsid
47
-
}
48
-
49
-
fn def_name() -> &'static str {
50
-
#def_name
51
-
}
52
-
53
-
fn lexicon_doc() -> ::jacquard_lexicon::lexicon::LexiconDoc<'static> {
54
-
#doc_literal
55
-
}
56
-
57
-
fn validate(&self) -> ::std::result::Result<(), ::jacquard_lexicon::validation::ConstraintError> {
58
-
#validation_code
59
-
}
60
-
}
61
-
}
62
-
}
63
7
64
8
/// Extract validation checks from a LexiconDoc
65
9
///
···
71
15
// Get the specified def
72
16
if let Some(def) = doc.defs.get(def_name) {
73
17
match def {
74
-
LexUserType::Record(rec) => {
75
-
match &rec.record {
76
-
LexRecordRecord::Object(obj) => {
77
-
checks.extend(extract_object_validations(obj));
78
-
}
18
+
LexUserType::Record(rec) => match &rec.record {
19
+
LexRecordRecord::Object(obj) => {
20
+
checks.extend(extract_object_validations(obj));
79
21
}
80
-
}
22
+
},
81
23
LexUserType::Object(obj) => {
82
24
checks.extend(extract_object_validations(obj));
83
25
}
···
270
212
use heck::ToSnakeCase;
271
213
schema_name.to_snake_case()
272
214
}
273
-
274
-
#[cfg(test)]
275
-
mod tests {
276
-
use super::*;
277
-
278
-
#[test]
279
-
fn test_field_name_from_schema() {
280
-
assert_eq!(field_name_from_schema("createdAt"), "created_at");
281
-
assert_eq!(field_name_from_schema("maxLength"), "max_length");
282
-
assert_eq!(field_name_from_schema("text"), "text");
283
-
assert_eq!(field_name_from_schema("ref"), "ref"); // r# added by make_ident later
284
-
assert_eq!(field_name_from_schema("type"), "type"); // r# added by make_ident later
285
-
}
286
-
}
+1
-1
crates/jacquard-lexicon/src/corpus.rs
+1
-1
crates/jacquard-lexicon/src/corpus.rs
+7
-1
crates/jacquard-lexicon/src/lib.rs
+7
-1
crates/jacquard-lexicon/src/lib.rs
···
15
15
//! - [`derive_impl`] - Implementation functions for derive macros (used by jacquard-derive)
16
16
//! - [`validation`] - Runtime validation of Data against lexicon schemas
17
17
18
+
#[cfg(feature = "codegen")]
18
19
pub mod codegen;
20
+
#[cfg(feature = "codegen")]
19
21
pub mod corpus;
22
+
#[cfg(feature = "codegen")]
23
+
#[doc(hidden)]
20
24
pub mod derive_impl;
25
+
#[cfg(feature = "codegen")]
21
26
pub mod error;
27
+
#[cfg(feature = "codegen")]
22
28
pub mod fs;
23
29
pub mod lexicon;
30
+
pub mod ref_utils;
24
31
pub mod schema;
25
-
pub mod union_registry;
26
32
pub mod validation;
+189
crates/jacquard-lexicon/src/ref_utils.rs
+189
crates/jacquard-lexicon/src/ref_utils.rs
···
1
+
//! Utilities for parsing and working with NSIDs and refs
2
+
3
+
/// Parsed NSID components for easier manipulation
4
+
#[derive(Debug, Clone, PartialEq, Eq)]
5
+
pub struct NsidPath<'a> {
6
+
nsid: &'a str,
7
+
segments: Vec<&'a str>,
8
+
}
9
+
10
+
impl<'a> NsidPath<'a> {
11
+
/// Parse an NSID into its component segments
12
+
pub fn parse(nsid: &'a str) -> Self {
13
+
let segments: Vec<&str> = nsid.split('.').collect();
14
+
Self { nsid, segments }
15
+
}
16
+
17
+
/// Get the namespace (first two segments joined with '.')
18
+
/// Returns "com.atproto" from "com.atproto.repo.strongRef"
19
+
pub fn namespace(&self) -> String {
20
+
if self.segments.len() >= 2 {
21
+
format!("{}.{}", self.segments[0], self.segments[1])
22
+
} else {
23
+
self.nsid.to_string()
24
+
}
25
+
}
26
+
27
+
/// Get the last segment of the NSID
28
+
pub fn last_segment(&self) -> &str {
29
+
self.segments.last().copied().unwrap_or(self.nsid)
30
+
}
31
+
32
+
/// Get all segments except the last
33
+
pub fn parent_segments(&self) -> &[&str] {
34
+
if self.segments.is_empty() {
35
+
&[]
36
+
} else {
37
+
&self.segments[..self.segments.len() - 1]
38
+
}
39
+
}
40
+
41
+
/// Check if this is a "defs" NSID (ends with "defs")
42
+
pub fn is_defs(&self) -> bool {
43
+
self.last_segment() == "defs"
44
+
}
45
+
46
+
/// Get all segments
47
+
pub fn segments(&self) -> &[&str] {
48
+
&self.segments
49
+
}
50
+
51
+
/// Get the original NSID string
52
+
pub fn as_str(&self) -> &str {
53
+
self.nsid
54
+
}
55
+
56
+
/// Get number of segments
57
+
pub fn len(&self) -> usize {
58
+
self.segments.len()
59
+
}
60
+
61
+
/// Check if empty (should not happen with valid NSIDs)
62
+
pub fn is_empty(&self) -> bool {
63
+
self.segments.is_empty()
64
+
}
65
+
}
66
+
67
+
/// Parsed reference with NSID and optional fragment
68
+
#[derive(Debug, Clone, PartialEq, Eq)]
69
+
pub struct RefPath<'a> {
70
+
nsid: &'a str,
71
+
def: &'a str,
72
+
}
73
+
74
+
impl<'a> RefPath<'a> {
75
+
/// Parse a reference string, normalizing it based on current NSID context
76
+
pub fn parse(ref_str: &'a str, current_nsid: Option<&'a str>) -> Self {
77
+
if let Some(fragment) = ref_str.strip_prefix('#') {
78
+
// Local ref: #option → use current_nsid
79
+
let nsid = current_nsid.unwrap_or("");
80
+
Self {
81
+
nsid,
82
+
def: fragment,
83
+
}
84
+
} else if let Some((nsid, def)) = ref_str.split_once('#') {
85
+
// Full ref with fragment: nsid#def
86
+
Self { nsid, def }
87
+
} else {
88
+
// Full ref without fragment: nsid (implicit "main")
89
+
Self {
90
+
nsid: ref_str,
91
+
def: "main",
92
+
}
93
+
}
94
+
}
95
+
96
+
/// Get the NSID portion of the ref
97
+
pub fn nsid(&self) -> &str {
98
+
self.nsid
99
+
}
100
+
101
+
/// Get the def name (fragment) portion of the ref
102
+
pub fn def(&self) -> &str {
103
+
self.def
104
+
}
105
+
106
+
/// Check if this is a local ref (was parsed from #fragment)
107
+
pub fn is_local(&self, current_nsid: &str) -> bool {
108
+
self.nsid == current_nsid && self.def != "main"
109
+
}
110
+
111
+
/// Get the full ref string (nsid#def)
112
+
pub fn full_ref(&self) -> String {
113
+
if self.def == "main" {
114
+
self.nsid.to_string()
115
+
} else {
116
+
format!("{}#{}", self.nsid, self.def)
117
+
}
118
+
}
119
+
120
+
/// Normalize a local ref by prepending the current NSID if needed
121
+
/// Returns the normalized ref string suitable for corpus lookup
122
+
pub fn normalize(ref_str: &str, current_nsid: &str) -> String {
123
+
if ref_str.starts_with('#') {
124
+
format!("{}{}", current_nsid, ref_str)
125
+
} else {
126
+
ref_str.to_string()
127
+
}
128
+
}
129
+
}
130
+
131
+
#[cfg(test)]
132
+
mod tests {
133
+
use super::*;
134
+
135
+
#[test]
136
+
fn test_nsid_path_parse() {
137
+
let path = NsidPath::parse("com.atproto.repo.strongRef");
138
+
assert_eq!(path.segments(), &["com", "atproto", "repo", "strongRef"]);
139
+
assert_eq!(path.namespace(), "com.atproto");
140
+
assert_eq!(path.last_segment(), "strongRef");
141
+
assert_eq!(path.parent_segments(), &["com", "atproto", "repo"]);
142
+
assert!(!path.is_defs());
143
+
}
144
+
145
+
#[test]
146
+
fn test_nsid_path_defs() {
147
+
let path = NsidPath::parse("com.atproto.label.defs");
148
+
assert!(path.is_defs());
149
+
assert_eq!(path.last_segment(), "defs");
150
+
}
151
+
152
+
#[test]
153
+
fn test_ref_path_local() {
154
+
let ref_path = RefPath::parse("#option", Some("com.example.foo"));
155
+
assert_eq!(ref_path.nsid(), "com.example.foo");
156
+
assert_eq!(ref_path.def(), "option");
157
+
assert!(ref_path.is_local("com.example.foo"));
158
+
assert_eq!(ref_path.full_ref(), "com.example.foo#option");
159
+
}
160
+
161
+
#[test]
162
+
fn test_ref_path_with_fragment() {
163
+
let ref_path = RefPath::parse("com.example.foo#bar", None);
164
+
assert_eq!(ref_path.nsid(), "com.example.foo");
165
+
assert_eq!(ref_path.def(), "bar");
166
+
assert!(!ref_path.is_local("com.other.baz"));
167
+
assert_eq!(ref_path.full_ref(), "com.example.foo#bar");
168
+
}
169
+
170
+
#[test]
171
+
fn test_ref_path_implicit_main() {
172
+
let ref_path = RefPath::parse("com.example.foo", None);
173
+
assert_eq!(ref_path.nsid(), "com.example.foo");
174
+
assert_eq!(ref_path.def(), "main");
175
+
assert_eq!(ref_path.full_ref(), "com.example.foo");
176
+
}
177
+
178
+
#[test]
179
+
fn test_ref_path_normalize() {
180
+
assert_eq!(
181
+
RefPath::normalize("#option", "com.example.foo"),
182
+
"com.example.foo#option"
183
+
);
184
+
assert_eq!(
185
+
RefPath::normalize("com.other.bar#baz", "com.example.foo"),
186
+
"com.other.bar#baz"
187
+
);
188
+
}
189
+
}
+2
crates/jacquard-lexicon/src/schema.rs
+2
crates/jacquard-lexicon/src/schema.rs
-337
crates/jacquard-lexicon/src/union_registry.rs
-337
crates/jacquard-lexicon/src/union_registry.rs
···
1
-
use crate::corpus::LexiconCorpus;
2
-
use crate::lexicon::{
3
-
LexArrayItem, LexObjectProperty, LexUserType, LexXrpcBodySchema,
4
-
LexXrpcSubscriptionMessageSchema,
5
-
};
6
-
use jacquard_common::smol_str::{SmolStr, ToSmolStr};
7
-
use jacquard_common::{CowStr, smol_str};
8
-
use std::collections::{BTreeMap, BTreeSet};
9
-
10
-
/// Information about a single union type found in the corpus
11
-
#[derive(Debug, Clone)]
12
-
pub struct UnionInfo {
13
-
/// NSID of the lexicon containing this union
14
-
pub lexicon_nsid: SmolStr,
15
-
/// Name of the def containing this union (e.g., "main", "replyRef")
16
-
pub def_name: SmolStr,
17
-
/// Field path within the def (e.g., "embed", "properties.embed")
18
-
pub field_path: CowStr<'static>,
19
-
/// Refs that exist in the corpus
20
-
pub known_refs: Vec<CowStr<'static>>,
21
-
/// Refs that don't exist in the corpus
22
-
pub unknown_refs: Vec<CowStr<'static>>,
23
-
/// Whether the union is closed (default true if not specified)
24
-
pub closed: bool,
25
-
}
26
-
27
-
impl UnionInfo {
28
-
/// Get the source text for this union's lexicon from the corpus
29
-
pub fn get_source<'c>(&self, corpus: &'c LexiconCorpus) -> Option<&'c str> {
30
-
corpus.get_source(&self.lexicon_nsid)
31
-
}
32
-
33
-
/// Check if this union has any unknown refs
34
-
pub fn has_unknown_refs(&self) -> bool {
35
-
!self.unknown_refs.is_empty()
36
-
}
37
-
38
-
/// Get all refs (known + unknown)
39
-
pub fn all_refs(&self) -> impl Iterator<Item = &CowStr<'static>> {
40
-
self.known_refs.iter().chain(self.unknown_refs.iter())
41
-
}
42
-
}
43
-
44
-
/// Registry of all union types found in the corpus
45
-
#[derive(Debug, Clone)]
46
-
pub struct UnionRegistry {
47
-
/// Map from union identifier to union info
48
-
/// Key is "{lexicon_nsid}#{def_name}:{field_path}"
49
-
unions: BTreeMap<SmolStr, UnionInfo>,
50
-
}
51
-
52
-
impl UnionRegistry {
53
-
/// Create a new empty union registry
54
-
pub fn new() -> Self {
55
-
Self {
56
-
unions: BTreeMap::new(),
57
-
}
58
-
}
59
-
60
-
/// Build a union registry from a corpus
61
-
pub fn from_corpus(corpus: &LexiconCorpus) -> Self {
62
-
let mut registry = Self::new();
63
-
64
-
for (nsid, doc) in corpus.iter() {
65
-
for (def_name, def) in &doc.defs {
66
-
registry.collect_unions_from_def(corpus, nsid, def_name, def);
67
-
}
68
-
}
69
-
70
-
registry
71
-
}
72
-
73
-
/// Collect unions from a single def
74
-
fn collect_unions_from_def(
75
-
&mut self,
76
-
corpus: &LexiconCorpus,
77
-
nsid: &SmolStr,
78
-
def_name: &SmolStr,
79
-
def: &LexUserType<'static>,
80
-
) {
81
-
match def {
82
-
LexUserType::Record(record) => match &record.record {
83
-
crate::lexicon::LexRecordRecord::Object(obj) => {
84
-
self.collect_unions_from_object(corpus, nsid, def_name, "", obj);
85
-
}
86
-
},
87
-
LexUserType::Object(obj) => {
88
-
self.collect_unions_from_object(corpus, nsid, def_name, "", obj);
89
-
}
90
-
LexUserType::XrpcQuery(query) => {
91
-
if let Some(output) = &query.output {
92
-
if let Some(schema) = &output.schema {
93
-
self.collect_unions_from_xrpc_body_schema(
94
-
corpus, nsid, def_name, "output", schema,
95
-
);
96
-
}
97
-
}
98
-
}
99
-
LexUserType::XrpcProcedure(proc) => {
100
-
if let Some(input) = &proc.input {
101
-
if let Some(schema) = &input.schema {
102
-
self.collect_unions_from_xrpc_body_schema(
103
-
corpus, nsid, def_name, "input", schema,
104
-
);
105
-
}
106
-
}
107
-
if let Some(output) = &proc.output {
108
-
if let Some(schema) = &output.schema {
109
-
self.collect_unions_from_xrpc_body_schema(
110
-
corpus, nsid, def_name, "output", schema,
111
-
);
112
-
}
113
-
}
114
-
}
115
-
LexUserType::XrpcSubscription(sub) => {
116
-
if let Some(message) = &sub.message {
117
-
if let Some(schema) = &message.schema {
118
-
self.collect_unions_from_subscription_message_schema(
119
-
corpus, nsid, def_name, "message", schema,
120
-
);
121
-
}
122
-
}
123
-
}
124
-
_ => {}
125
-
}
126
-
}
127
-
128
-
/// Collect unions from an object's properties
129
-
fn collect_unions_from_object(
130
-
&mut self,
131
-
corpus: &LexiconCorpus,
132
-
nsid: &SmolStr,
133
-
def_name: &SmolStr,
134
-
path_prefix: &str,
135
-
obj: &crate::lexicon::LexObject<'static>,
136
-
) {
137
-
for (prop_name, prop) in &obj.properties {
138
-
let prop_path = if path_prefix.is_empty() {
139
-
prop_name.to_smolstr()
140
-
} else {
141
-
smol_str::format_smolstr!("{}.{}", path_prefix, prop_name)
142
-
};
143
-
144
-
match prop {
145
-
LexObjectProperty::Union(union) => {
146
-
self.register_union(
147
-
corpus,
148
-
nsid,
149
-
def_name,
150
-
&prop_path,
151
-
&union.refs,
152
-
union.closed,
153
-
);
154
-
}
155
-
LexObjectProperty::Array(array) => {
156
-
if let LexArrayItem::Union(union) = &array.items {
157
-
let array_path = format!("{}[]", prop_path);
158
-
self.register_union(
159
-
corpus,
160
-
nsid,
161
-
def_name,
162
-
&array_path,
163
-
&union.refs,
164
-
union.closed,
165
-
);
166
-
}
167
-
}
168
-
LexObjectProperty::Ref(ref_type) => {
169
-
// Check if ref points to a union
170
-
if let Some((_, ref_def)) = corpus.resolve_ref(ref_type.r#ref.as_ref()) {
171
-
if matches!(ref_def, LexUserType::Object(_)) {
172
-
// Recursively check the referenced object
173
-
// (we'll handle this in a future iteration if needed)
174
-
}
175
-
}
176
-
}
177
-
_ => {}
178
-
}
179
-
}
180
-
}
181
-
182
-
/// Collect unions from XRPC body schema
183
-
fn collect_unions_from_xrpc_body_schema(
184
-
&mut self,
185
-
corpus: &LexiconCorpus,
186
-
nsid: &SmolStr,
187
-
def_name: &SmolStr,
188
-
path: &str,
189
-
schema: &LexXrpcBodySchema<'static>,
190
-
) {
191
-
match schema {
192
-
LexXrpcBodySchema::Union(union) => {
193
-
self.register_union(corpus, nsid, def_name, path, &union.refs, union.closed);
194
-
}
195
-
LexXrpcBodySchema::Object(obj) => {
196
-
self.collect_unions_from_object(corpus, nsid, def_name, path, obj);
197
-
}
198
-
_ => {}
199
-
}
200
-
}
201
-
202
-
/// Collect unions from subscription message schema
203
-
fn collect_unions_from_subscription_message_schema(
204
-
&mut self,
205
-
corpus: &LexiconCorpus,
206
-
nsid: &SmolStr,
207
-
def_name: &SmolStr,
208
-
path: &str,
209
-
schema: &LexXrpcSubscriptionMessageSchema<'static>,
210
-
) {
211
-
match schema {
212
-
LexXrpcSubscriptionMessageSchema::Union(union) => {
213
-
self.register_union(corpus, nsid, def_name, path, &union.refs, union.closed);
214
-
}
215
-
LexXrpcSubscriptionMessageSchema::Object(obj) => {
216
-
self.collect_unions_from_object(corpus, nsid, def_name, path, obj);
217
-
}
218
-
_ => {}
219
-
}
220
-
}
221
-
222
-
/// Register a union with the registry
223
-
fn register_union(
224
-
&mut self,
225
-
corpus: &LexiconCorpus,
226
-
nsid: &SmolStr,
227
-
def_name: &SmolStr,
228
-
field_path: &str,
229
-
refs: &[jacquard_common::CowStr<'static>],
230
-
closed: Option<bool>,
231
-
) {
232
-
let mut known_refs = Vec::new();
233
-
let mut unknown_refs = Vec::new();
234
-
235
-
for ref_str in refs {
236
-
if corpus.ref_exists(&ref_str) {
237
-
known_refs.push(ref_str.clone());
238
-
} else {
239
-
unknown_refs.push(ref_str.clone());
240
-
}
241
-
}
242
-
243
-
let key = smol_str::format_smolstr!("{}#{}:{}", nsid, def_name, field_path);
244
-
self.unions.insert(
245
-
key,
246
-
UnionInfo {
247
-
lexicon_nsid: nsid.clone(),
248
-
def_name: def_name.clone(),
249
-
field_path: CowStr::Owned(field_path.to_smolstr()),
250
-
known_refs,
251
-
unknown_refs,
252
-
closed: closed.unwrap_or(true),
253
-
},
254
-
);
255
-
}
256
-
257
-
/// Get all unions
258
-
pub fn iter(&self) -> impl Iterator<Item = (&SmolStr, &UnionInfo)> {
259
-
self.unions.iter()
260
-
}
261
-
262
-
/// Get a specific union
263
-
pub fn get(&self, key: &str) -> Option<&UnionInfo> {
264
-
self.unions.get(key)
265
-
}
266
-
267
-
/// Number of unions in registry
268
-
pub fn len(&self) -> usize {
269
-
self.unions.len()
270
-
}
271
-
272
-
/// Check if registry is empty
273
-
pub fn is_empty(&self) -> bool {
274
-
self.unions.is_empty()
275
-
}
276
-
277
-
/// Get all unique refs across all unions
278
-
pub fn all_refs(&self) -> BTreeSet<CowStr<'static>> {
279
-
let mut refs = BTreeSet::new();
280
-
for union in self.unions.values() {
281
-
refs.extend(union.known_refs.iter().cloned());
282
-
refs.extend(union.unknown_refs.iter().cloned());
283
-
}
284
-
refs
285
-
}
286
-
}
287
-
288
-
impl Default for UnionRegistry {
289
-
fn default() -> Self {
290
-
Self::new()
291
-
}
292
-
}
293
-
294
-
#[cfg(test)]
295
-
mod tests {
296
-
use super::*;
297
-
298
-
#[test]
299
-
fn test_union_registry_from_corpus() {
300
-
let corpus = LexiconCorpus::load_from_dir("tests/fixtures/test_lexicons")
301
-
.expect("failed to load lexicons");
302
-
303
-
let registry = UnionRegistry::from_corpus(&corpus);
304
-
305
-
assert!(!registry.is_empty());
306
-
307
-
// Check that we found the embed union in post
308
-
let post_embed = registry
309
-
.iter()
310
-
.find(|(_, info)| {
311
-
info.lexicon_nsid == "app.bsky.feed.post"
312
-
&& info.def_name == "main"
313
-
&& info.field_path.contains("embed")
314
-
})
315
-
.expect("should find post embed union");
316
-
317
-
let info = post_embed.1;
318
-
assert!(info.known_refs.contains(&"app.bsky.embed.images".into()));
319
-
assert!(info.known_refs.contains(&"app.bsky.embed.video".into()));
320
-
assert!(info.known_refs.contains(&"app.bsky.embed.external".into()));
321
-
}
322
-
323
-
#[test]
324
-
fn test_union_registry_tracks_unknown_refs() {
325
-
let corpus = LexiconCorpus::load_from_dir("tests/fixtures/test_lexicons")
326
-
.expect("failed to load lexicons");
327
-
328
-
let registry = UnionRegistry::from_corpus(&corpus);
329
-
330
-
// If there are any unknown refs, they should be tracked
331
-
for (_, info) in registry.iter() {
332
-
for unknown in &info.unknown_refs {
333
-
assert!(!corpus.ref_exists(unknown));
334
-
}
335
-
}
336
-
}
337
-
}
+1
-1
crates/jacquard-lexicon/src/validation.rs
+1
-1
crates/jacquard-lexicon/src/validation.rs
···
3
3
//! This module provides infrastructure for validating untyped `Data` values against
4
4
//! lexicon schemas, enabling partial deserialization, debugging, and schema migration.
5
5
6
-
use crate::codegen::nsid_utils::RefPath;
7
6
use crate::lexicon::{LexArrayItem, LexObjectProperty};
7
+
use crate::ref_utils::RefPath;
8
8
use crate::schema::SchemaRegistry;
9
9
use cid::Cid as IpldCid;
10
10
use dashmap::DashMap;