+35
-70
Cargo.lock
+35
-70
Cargo.lock
···
577
577
578
578
[[package]]
579
579
name = "cc"
580
-
version = "1.2.41"
580
+
version = "1.2.43"
581
581
source = "registry+https://github.com/rust-lang/crates.io-index"
582
-
checksum = "ac9fe6cdbb24b6ade63616c0a0688e45bb56732262c158df3c0c4bea4ca47cb7"
582
+
checksum = "739eb0f94557554b3ca9a86d2d37bebd49c5e6d0c1d2bda35ba5bdac830befc2"
583
583
dependencies = [
584
584
"find-msvc-tools",
585
585
"jobserver",
···
1080
1080
1081
1081
[[package]]
1082
1082
name = "deranged"
1083
-
version = "0.5.4"
1083
+
version = "0.5.5"
1084
1084
source = "registry+https://github.com/rust-lang/crates.io-index"
1085
-
checksum = "a41953f86f8a05768a6cda24def994fd2f424b04ec5c719cf89989779f199071"
1085
+
checksum = "ececcb659e7ba858fb4f10388c250a7252eb0a27373f1a72b8748afdd248e587"
1086
1086
dependencies = [
1087
1087
"powerfmt",
1088
1088
"serde_core",
···
1295
1295
checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb"
1296
1296
dependencies = [
1297
1297
"libc",
1298
-
"windows-sys 0.61.2",
1298
+
"windows-sys 0.59.0",
1299
1299
]
1300
1300
1301
1301
[[package]]
···
1412
1412
1413
1413
[[package]]
1414
1414
name = "flate2"
1415
-
version = "1.1.4"
1415
+
version = "1.1.5"
1416
1416
source = "registry+https://github.com/rust-lang/crates.io-index"
1417
-
checksum = "dc5a4e564e38c699f2880d3fda590bedc2e69f3f84cd48b457bd892ce61d0aa9"
1417
+
checksum = "bfe33edd8e85a12a67454e37f8c75e730830d83e313556ab9ebf9ee7fbeb3bfb"
1418
1418
dependencies = [
1419
1419
"crc32fast",
1420
1420
"miniz_oxide 0.8.9",
···
1913
1913
"libc",
1914
1914
"percent-encoding",
1915
1915
"pin-project-lite",
1916
-
"socket2 0.6.1",
1916
+
"socket2 0.5.10",
1917
1917
"system-configuration",
1918
1918
"tokio",
1919
1919
"tower-service",
···
1933
1933
"js-sys",
1934
1934
"log",
1935
1935
"wasm-bindgen",
1936
-
"windows-core 0.62.2",
1936
+
"windows-core",
1937
1937
]
1938
1938
1939
1939
[[package]]
···
2288
2288
[[package]]
2289
2289
name = "jacquard-api"
2290
2290
version = "0.8.0"
2291
-
source = "git+https://tangled.org/@nonbinary.computer/jacquard#b8978f162ed3306519ec3155a5fd01fbda2390a3"
2291
+
source = "git+https://tangled.org/@nonbinary.computer/jacquard#4c31392df7f719950dabd64ef8e7c9cf84ba4212"
2292
2292
dependencies = [
2293
2293
"bon",
2294
2294
"bytes",
···
2378
2378
[[package]]
2379
2379
name = "jacquard-common"
2380
2380
version = "0.8.0"
2381
-
source = "git+https://tangled.org/@nonbinary.computer/jacquard#b8978f162ed3306519ec3155a5fd01fbda2390a3"
2381
+
source = "git+https://tangled.org/@nonbinary.computer/jacquard#4c31392df7f719950dabd64ef8e7c9cf84ba4212"
2382
2382
dependencies = [
2383
2383
"base64 0.22.1",
2384
2384
"bon",
···
2431
2431
[[package]]
2432
2432
name = "jacquard-derive"
2433
2433
version = "0.8.0"
2434
-
source = "git+https://tangled.org/@nonbinary.computer/jacquard#b8978f162ed3306519ec3155a5fd01fbda2390a3"
2434
+
source = "git+https://tangled.org/@nonbinary.computer/jacquard#4c31392df7f719950dabd64ef8e7c9cf84ba4212"
2435
2435
dependencies = [
2436
+
"heck 0.5.0",
2437
+
"jacquard-lexicon 0.8.0 (git+https://tangled.org/@nonbinary.computer/jacquard)",
2436
2438
"proc-macro2",
2437
2439
"quote",
2438
2440
"syn 2.0.108",
···
2466
2468
[[package]]
2467
2469
name = "jacquard-identity"
2468
2470
version = "0.8.0"
2469
-
source = "git+https://tangled.org/@nonbinary.computer/jacquard#b8978f162ed3306519ec3155a5fd01fbda2390a3"
2471
+
source = "git+https://tangled.org/@nonbinary.computer/jacquard#4c31392df7f719950dabd64ef8e7c9cf84ba4212"
2470
2472
dependencies = [
2471
2473
"bon",
2472
2474
"bytes",
···
2495
2497
"clap_complete",
2496
2498
"clap_mangen",
2497
2499
"glob",
2500
+
"inventory",
2498
2501
"jacquard-api 0.8.0 (git+https://tangled.org/@nonbinary.computer/jacquard)",
2499
2502
"jacquard-common 0.8.0 (git+https://tangled.org/@nonbinary.computer/jacquard)",
2503
+
"jacquard-derive 0.8.0 (git+https://tangled.org/@nonbinary.computer/jacquard)",
2500
2504
"jacquard-identity 0.8.0 (git+https://tangled.org/@nonbinary.computer/jacquard)",
2501
2505
"jacquard-lexicon 0.8.0 (git+https://tangled.org/@nonbinary.computer/jacquard)",
2502
2506
"kdl",
···
2536
2540
[[package]]
2537
2541
name = "jacquard-lexicon"
2538
2542
version = "0.8.0"
2539
-
source = "git+https://tangled.org/@nonbinary.computer/jacquard#b8978f162ed3306519ec3155a5fd01fbda2390a3"
2543
+
source = "git+https://tangled.org/@nonbinary.computer/jacquard#4c31392df7f719950dabd64ef8e7c9cf84ba4212"
2540
2544
dependencies = [
2541
-
"async-trait",
2542
-
"clap",
2543
-
"clap_complete",
2544
-
"clap_mangen",
2545
2545
"glob",
2546
2546
"heck 0.5.0",
2547
-
"jacquard-api 0.8.0 (git+https://tangled.org/@nonbinary.computer/jacquard)",
2547
+
"inventory",
2548
2548
"jacquard-common 0.8.0 (git+https://tangled.org/@nonbinary.computer/jacquard)",
2549
-
"jacquard-identity 0.8.0 (git+https://tangled.org/@nonbinary.computer/jacquard)",
2550
-
"kdl",
2551
2549
"miette",
2552
2550
"prettyplease",
2553
2551
"proc-macro2",
2554
2552
"quote",
2555
-
"reqwest",
2556
2553
"serde",
2557
2554
"serde_json",
2558
2555
"serde_repr",
2559
2556
"serde_with",
2560
2557
"syn 2.0.108",
2561
-
"tempfile",
2562
2558
"thiserror 2.0.17",
2563
-
"tokio",
2559
+
"unicode-segmentation",
2564
2560
"walkdir",
2565
2561
]
2566
2562
···
3156
3152
source = "registry+https://github.com/rust-lang/crates.io-index"
3157
3153
checksum = "7957b9740744892f114936ab4a57b3f487491bbeafaf8083688b16841a4240e5"
3158
3154
dependencies = [
3159
-
"windows-sys 0.61.2",
3155
+
"windows-sys 0.59.0",
3160
3156
]
3161
3157
3162
3158
[[package]]
···
3701
3697
"quinn-udp",
3702
3698
"rustc-hash",
3703
3699
"rustls",
3704
-
"socket2 0.6.1",
3700
+
"socket2 0.5.10",
3705
3701
"thiserror 2.0.17",
3706
3702
"tokio",
3707
3703
"tracing",
···
3738
3734
"cfg_aliases",
3739
3735
"libc",
3740
3736
"once_cell",
3741
-
"socket2 0.6.1",
3737
+
"socket2 0.5.10",
3742
3738
"tracing",
3743
-
"windows-sys 0.60.2",
3739
+
"windows-sys 0.59.0",
3744
3740
]
3745
3741
3746
3742
[[package]]
···
4159
4155
"errno",
4160
4156
"libc",
4161
4157
"linux-raw-sys 0.11.0",
4162
-
"windows-sys 0.61.2",
4158
+
"windows-sys 0.59.0",
4163
4159
]
4164
4160
4165
4161
[[package]]
···
4822
4818
"getrandom 0.3.4",
4823
4819
"once_cell",
4824
4820
"rustix 1.1.2",
4825
-
"windows-sys 0.61.2",
4821
+
"windows-sys 0.59.0",
4826
4822
]
4827
4823
4828
4824
[[package]]
···
5693
5689
source = "registry+https://github.com/rust-lang/crates.io-index"
5694
5690
checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22"
5695
5691
dependencies = [
5696
-
"windows-sys 0.61.2",
5692
+
"windows-sys 0.48.0",
5697
5693
]
5698
5694
5699
5695
[[package]]
···
5709
5705
checksum = "9babd3a767a4c1aef6900409f85f5d53ce2544ccdfaa86dad48c91782c6d6893"
5710
5706
dependencies = [
5711
5707
"windows-collections",
5712
-
"windows-core 0.61.2",
5708
+
"windows-core",
5713
5709
"windows-future",
5714
5710
"windows-link 0.1.3",
5715
5711
"windows-numerics",
···
5721
5717
source = "registry+https://github.com/rust-lang/crates.io-index"
5722
5718
checksum = "3beeceb5e5cfd9eb1d76b381630e82c4241ccd0d27f1a39ed41b2760b255c5e8"
5723
5719
dependencies = [
5724
-
"windows-core 0.61.2",
5720
+
"windows-core",
5725
5721
]
5726
5722
5727
5723
[[package]]
···
5733
5729
"windows-implement",
5734
5730
"windows-interface",
5735
5731
"windows-link 0.1.3",
5736
-
"windows-result 0.3.4",
5737
-
"windows-strings 0.4.2",
5738
-
]
5739
-
5740
-
[[package]]
5741
-
name = "windows-core"
5742
-
version = "0.62.2"
5743
-
source = "registry+https://github.com/rust-lang/crates.io-index"
5744
-
checksum = "b8e83a14d34d0623b51dce9581199302a221863196a1dde71a7663a4c2be9deb"
5745
-
dependencies = [
5746
-
"windows-implement",
5747
-
"windows-interface",
5748
-
"windows-link 0.2.1",
5749
-
"windows-result 0.4.1",
5750
-
"windows-strings 0.5.1",
5732
+
"windows-result",
5733
+
"windows-strings",
5751
5734
]
5752
5735
5753
5736
[[package]]
···
5756
5739
source = "registry+https://github.com/rust-lang/crates.io-index"
5757
5740
checksum = "fc6a41e98427b19fe4b73c550f060b59fa592d7d686537eebf9385621bfbad8e"
5758
5741
dependencies = [
5759
-
"windows-core 0.61.2",
5742
+
"windows-core",
5760
5743
"windows-link 0.1.3",
5761
5744
"windows-threading",
5762
5745
]
···
5801
5784
source = "registry+https://github.com/rust-lang/crates.io-index"
5802
5785
checksum = "9150af68066c4c5c07ddc0ce30421554771e528bde427614c61038bc2c92c2b1"
5803
5786
dependencies = [
5804
-
"windows-core 0.61.2",
5787
+
"windows-core",
5805
5788
"windows-link 0.1.3",
5806
5789
]
5807
5790
···
5812
5795
checksum = "5b8a9ed28765efc97bbc954883f4e6796c33a06546ebafacbabee9696967499e"
5813
5796
dependencies = [
5814
5797
"windows-link 0.1.3",
5815
-
"windows-result 0.3.4",
5816
-
"windows-strings 0.4.2",
5798
+
"windows-result",
5799
+
"windows-strings",
5817
5800
]
5818
5801
5819
5802
[[package]]
···
5826
5809
]
5827
5810
5828
5811
[[package]]
5829
-
name = "windows-result"
5830
-
version = "0.4.1"
5831
-
source = "registry+https://github.com/rust-lang/crates.io-index"
5832
-
checksum = "7781fa89eaf60850ac3d2da7af8e5242a5ea78d1a11c49bf2910bb5a73853eb5"
5833
-
dependencies = [
5834
-
"windows-link 0.2.1",
5835
-
]
5836
-
5837
-
[[package]]
5838
5812
name = "windows-strings"
5839
5813
version = "0.4.2"
5840
5814
source = "registry+https://github.com/rust-lang/crates.io-index"
5841
5815
checksum = "56e6c93f3a0c3b36176cb1327a4958a0353d5d166c2a35cb268ace15e91d3b57"
5842
5816
dependencies = [
5843
5817
"windows-link 0.1.3",
5844
-
]
5845
-
5846
-
[[package]]
5847
-
name = "windows-strings"
5848
-
version = "0.5.1"
5849
-
source = "registry+https://github.com/rust-lang/crates.io-index"
5850
-
checksum = "7837d08f69c77cf6b07689544538e017c1bfcf57e34b4c0ff58e6c2cd3b37091"
5851
-
dependencies = [
5852
-
"windows-link 0.2.1",
5853
5818
]
5854
5819
5855
5820
[[package]]
+10
crates/jacquard-derive/tests/lexicon_schema_derive.rs
+10
crates/jacquard-derive/tests/lexicon_schema_derive.rs
···
9
9
#[derive(LexiconSchema)]
10
10
#[lexicon(nsid = "com.example.simple", record, key = "tid")]
11
11
struct SimpleRecord<'a> {
12
+
#[allow(dead_code)]
12
13
pub text: CowStr<'a>,
14
+
#[allow(dead_code)]
13
15
pub created_at: Datetime,
14
16
}
15
17
···
124
126
#[derive(LexiconSchema)]
125
127
#[lexicon(nsid = "com.example.camel", record)]
126
128
struct CamelCaseRecord {
129
+
#[allow(dead_code)]
127
130
pub field_one: i64,
131
+
#[allow(dead_code)]
128
132
pub field_two: i64,
129
133
}
130
134
···
145
149
#[lexicon(nsid = "com.example.union")]
146
150
enum BasicUnion {
147
151
#[nsid = "com.example.variant.one"]
152
+
#[allow(dead_code)]
148
153
VariantOne,
149
154
150
155
#[nsid = "com.example.variant.two"]
156
+
#[allow(dead_code)]
151
157
VariantTwo,
152
158
}
153
159
···
173
179
#[open_union]
174
180
enum OpenUnion<'a> {
175
181
#[nsid = "com.example.variant"]
182
+
#[allow(dead_code)]
176
183
Variant,
177
184
185
+
#[allow(dead_code)]
178
186
Unknown(jacquard_common::types::value::Data<'a>),
179
187
}
180
188
···
217
225
#[lexicon(nsid = "com.example.fragments")]
218
226
enum FragmentUnion {
219
227
// Should generate com.example.fragments#variantOne
228
+
#[allow(dead_code)]
220
229
VariantOne,
221
230
// Should generate com.example.fragments#variantTwo
231
+
#[allow(dead_code)]
222
232
VariantTwo,
223
233
}
224
234
+6
crates/jacquard-lexgen/Cargo.toml
+6
crates/jacquard-lexgen/Cargo.toml
···
19
19
name = "lex-fetch"
20
20
path = "src/bin/lex_fetch.rs"
21
21
22
+
[[bin]]
23
+
name = "extract-schemas"
24
+
path = "src/bin/extract_schemas.rs"
25
+
22
26
[dependencies]
23
27
async-trait = "0.1"
24
28
clap.workspace = true
25
29
glob = "0.3"
30
+
inventory = "0.3"
26
31
jacquard-api = { version = "0.8", git = "https://tangled.org/@nonbinary.computer/jacquard" }
27
32
jacquard-common = { version = "0.8", features = [ "reqwest-client" ], git = "https://tangled.org/@nonbinary.computer/jacquard" }
33
+
jacquard-derive = { version = "0.8", git = "https://tangled.org/@nonbinary.computer/jacquard" }
28
34
jacquard-identity = { version = "0.8", git = "https://tangled.org/@nonbinary.computer/jacquard" }
29
35
jacquard-lexicon = { version = "0.8", git = "https://tangled.org/@nonbinary.computer/jacquard" }
30
36
kdl = "6"
+55
crates/jacquard-lexgen/src/bin/extract_schemas.rs
+55
crates/jacquard-lexgen/src/bin/extract_schemas.rs
···
1
+
use clap::Parser;
2
+
use jacquard_lexgen::schema_extraction::{ExtractOptions, SchemaExtractor};
3
+
use miette::Result;
4
+
5
+
/// Extract lexicon schemas from compiled Rust types
6
+
#[derive(Parser, Debug)]
7
+
#[command(name = "extract-schemas")]
8
+
#[command(about = "Extract AT Protocol lexicon schemas from Rust types")]
9
+
struct Args {
10
+
/// Output directory for generated schema files
11
+
#[arg(short, long, default_value = "lexicons")]
12
+
output: String,
13
+
14
+
/// Verbose output
15
+
#[arg(short, long)]
16
+
verbose: bool,
17
+
18
+
/// Filter by NSID prefix (e.g., "app.bsky")
19
+
#[arg(short, long)]
20
+
filter: Option<String>,
21
+
22
+
/// Validate schemas before writing
23
+
#[arg(short = 'V', long, default_value = "true")]
24
+
validate: bool,
25
+
26
+
/// Pretty-print JSON output
27
+
#[arg(short, long, default_value = "true")]
28
+
pretty: bool,
29
+
30
+
/// Watch mode - regenerate on changes
31
+
#[arg(short, long)]
32
+
watch: bool,
33
+
}
34
+
35
+
fn main() -> Result<()> {
36
+
let args = Args::parse();
37
+
38
+
let options = ExtractOptions {
39
+
output_dir: args.output.into(),
40
+
verbose: args.verbose,
41
+
filter: args.filter,
42
+
validate: args.validate,
43
+
pretty: args.pretty,
44
+
};
45
+
46
+
let extractor = SchemaExtractor::new(options);
47
+
48
+
if args.watch {
49
+
extractor.watch()?;
50
+
} else {
51
+
extractor.extract_all()?;
52
+
}
53
+
54
+
Ok(())
55
+
}
+4
crates/jacquard-lexgen/src/lib.rs
+4
crates/jacquard-lexgen/src/lib.rs
···
29
29
//!
30
30
//! - [`fetch`] - Ingests lexicons from git, atproto, http fetch, and other sources
31
31
//! - [`cli`] - CLI argument parsing utilities
32
+
//! - [`schema_extraction`] - Extract lexicon schemas from Rust types via inventory
32
33
33
34
pub mod cli;
34
35
pub mod fetch;
36
+
pub mod schema_extraction;
37
+
#[cfg(any(test, debug_assertions))]
38
+
pub mod test_schemas;
35
39
36
40
pub use fetch::{Config, Fetcher};
+288
crates/jacquard-lexgen/src/schema_extraction.rs
+288
crates/jacquard-lexgen/src/schema_extraction.rs
···
1
+
use jacquard_lexicon::lexicon::LexiconDoc;
2
+
use jacquard_lexicon::schema::LexiconSchemaRef;
3
+
use miette::{IntoDiagnostic, Result};
4
+
use std::collections::BTreeMap;
5
+
use std::fs;
6
+
use std::path::PathBuf;
7
+
8
+
pub struct ExtractOptions {
9
+
pub output_dir: PathBuf,
10
+
pub verbose: bool,
11
+
pub filter: Option<String>,
12
+
pub validate: bool,
13
+
pub pretty: bool,
14
+
}
15
+
16
+
pub struct SchemaExtractor {
17
+
options: ExtractOptions,
18
+
}
19
+
20
+
impl SchemaExtractor {
21
+
pub fn new(options: ExtractOptions) -> Self {
22
+
Self { options }
23
+
}
24
+
25
+
/// Extract all schemas from inventory
26
+
pub fn extract_all(&self) -> Result<()> {
27
+
if self.options.verbose {
28
+
println!("Discovering schemas via inventory...");
29
+
}
30
+
31
+
// Collect all schema refs from inventory
32
+
let refs: Vec<&LexiconSchemaRef> = inventory::iter::<LexiconSchemaRef>().collect();
33
+
34
+
if self.options.verbose {
35
+
println!("Found {} schema types", refs.len());
36
+
}
37
+
38
+
// Group by base NSID
39
+
let grouped = self.group_by_base_nsid(&refs)?;
40
+
41
+
// Create output directory
42
+
fs::create_dir_all(&self.options.output_dir).into_diagnostic()?;
43
+
44
+
// Process each group
45
+
let mut written = 0;
46
+
for (base_nsid, group_refs) in grouped {
47
+
// Apply filter if specified
48
+
if let Some(filter) = &self.options.filter {
49
+
if !base_nsid.starts_with(filter) {
50
+
continue;
51
+
}
52
+
}
53
+
54
+
if self.options.verbose {
55
+
println!("Processing {} ({} types)", base_nsid, group_refs.len());
56
+
}
57
+
58
+
self.write_lexicon(&base_nsid, &group_refs)?;
59
+
written += 1;
60
+
}
61
+
62
+
println!(
63
+
"✓ Wrote {} lexicon files to {}",
64
+
written,
65
+
self.options.output_dir.display()
66
+
);
67
+
68
+
Ok(())
69
+
}
70
+
71
+
/// Group refs by base NSID (strip fragment suffix)
72
+
fn group_by_base_nsid<'a>(
73
+
&self,
74
+
refs: &[&'a LexiconSchemaRef],
75
+
) -> Result<BTreeMap<String, Vec<&'a LexiconSchemaRef>>> {
76
+
let mut groups: BTreeMap<String, Vec<&'a LexiconSchemaRef>> = BTreeMap::new();
77
+
78
+
for schema_ref in refs {
79
+
let nsid = schema_ref.nsid;
80
+
81
+
// Split on # to get base NSID
82
+
let base_nsid = if let Some(pos) = nsid.find('#') {
83
+
&nsid[..pos]
84
+
} else {
85
+
nsid
86
+
};
87
+
88
+
groups
89
+
.entry(base_nsid.to_string())
90
+
.or_default()
91
+
.push(schema_ref);
92
+
}
93
+
94
+
Ok(groups)
95
+
}
96
+
97
+
/// Write a single lexicon file
98
+
fn write_lexicon(&self, base_nsid: &str, refs: &[&LexiconSchemaRef]) -> Result<()> {
99
+
// Generate all schemas in this group
100
+
let mut all_defs = BTreeMap::new();
101
+
let mut primary_doc: Option<LexiconDoc> = None;
102
+
103
+
for schema_ref in refs {
104
+
let doc = (schema_ref.provider)();
105
+
106
+
// Determine if this is the primary def or a fragment
107
+
if schema_ref.nsid.contains('#') {
108
+
// Fragment - extract def name and add to defs
109
+
let fragment_name = schema_ref.nsid.split('#').nth(1).unwrap();
110
+
111
+
// Merge defs from fragment doc
112
+
for (def_name, def) in doc.defs {
113
+
// Use fragment name if def is "main", otherwise use as-is
114
+
let final_name = if def_name == "main" {
115
+
fragment_name.to_string()
116
+
} else {
117
+
def_name.to_string()
118
+
};
119
+
all_defs.insert(final_name, def);
120
+
}
121
+
} else {
122
+
// Primary type - use as base doc
123
+
primary_doc = Some(doc);
124
+
}
125
+
}
126
+
127
+
// Build final doc
128
+
let mut final_doc = primary_doc.unwrap_or_else(|| {
129
+
// No primary doc - create one
130
+
use jacquard_lexicon::lexicon::Lexicon;
131
+
LexiconDoc {
132
+
lexicon: Lexicon::Lexicon1,
133
+
id: base_nsid.into(),
134
+
revision: None,
135
+
description: None,
136
+
defs: BTreeMap::new(),
137
+
}
138
+
});
139
+
140
+
// Merge in all defs (convert String keys to SmolStr)
141
+
for (k, v) in all_defs {
142
+
final_doc.defs.insert(k.into(), v);
143
+
}
144
+
145
+
// Validate if requested
146
+
if self.options.validate {
147
+
self.validate_schema(&final_doc)?;
148
+
}
149
+
150
+
// Serialize to JSON
151
+
let json = if self.options.pretty {
152
+
serde_json::to_string_pretty(&final_doc).into_diagnostic()?
153
+
} else {
154
+
serde_json::to_string(&final_doc).into_diagnostic()?
155
+
};
156
+
157
+
// Write to file
158
+
let filename = base_nsid.replace('.', "_") + ".json";
159
+
let path = self.options.output_dir.join(&filename);
160
+
161
+
fs::write(&path, json).into_diagnostic()?;
162
+
163
+
if self.options.verbose {
164
+
println!(" Wrote {} ({} defs)", filename, final_doc.defs.len());
165
+
}
166
+
167
+
Ok(())
168
+
}
169
+
170
+
/// Validate a schema document
171
+
fn validate_schema(&self, doc: &LexiconDoc) -> Result<()> {
172
+
// Must have at least one def
173
+
if doc.defs.is_empty() {
174
+
return Err(miette::miette!("lexicon {} has no defs", doc.id));
175
+
}
176
+
177
+
// Warn if no "main" def and doesn't follow .defs convention
178
+
if !doc.defs.contains_key("main") {
179
+
let id_str = doc.id.as_ref();
180
+
if !id_str.ends_with(".defs") {
181
+
eprintln!(
182
+
"⚠️ Warning: lexicon {} has no 'main' def - consider naming it {}.defs",
183
+
id_str, id_str
184
+
);
185
+
if self.options.verbose {
186
+
eprintln!(
187
+
" Lexicons without a primary type should use the .defs suffix (e.g., app.bsky.actor.defs)"
188
+
);
189
+
}
190
+
}
191
+
}
192
+
193
+
// Validate NSID format
194
+
if !is_valid_nsid(&doc.id) {
195
+
return Err(miette::miette!("invalid NSID format: {}", doc.id));
196
+
}
197
+
198
+
Ok(())
199
+
}
200
+
201
+
/// Watch mode - regenerate on file changes
202
+
pub fn watch(&self) -> Result<()> {
203
+
println!("Watch mode not yet implemented");
204
+
println!("Run with --help to see available options");
205
+
Ok(())
206
+
}
207
+
}
208
+
209
+
/// Validate NSID format: domain.name.record
210
+
fn is_valid_nsid(nsid: &str) -> bool {
211
+
let parts: Vec<&str> = nsid.split('.').collect();
212
+
213
+
// Must have at least 3 parts
214
+
if parts.len() < 3 {
215
+
return false;
216
+
}
217
+
218
+
// Each part must be valid
219
+
for part in parts {
220
+
if part.is_empty() {
221
+
return false;
222
+
}
223
+
224
+
// Must be alphanumeric, hyphens, or underscores
225
+
if !part
226
+
.chars()
227
+
.all(|c| c.is_alphanumeric() || c == '-' || c == '_')
228
+
{
229
+
return false;
230
+
}
231
+
}
232
+
233
+
true
234
+
}
235
+
236
+
#[cfg(test)]
237
+
mod tests {
238
+
use super::*;
239
+
240
+
#[test]
241
+
fn test_is_valid_nsid() {
242
+
assert!(is_valid_nsid("com.example.test"));
243
+
assert!(is_valid_nsid("app.bsky.feed.post"));
244
+
assert!(is_valid_nsid("com.example.with_underscore"));
245
+
assert!(is_valid_nsid("com.example.with-hyphen"));
246
+
247
+
assert!(!is_valid_nsid("com.example")); // Too short
248
+
assert!(!is_valid_nsid("com")); // Too short
249
+
assert!(!is_valid_nsid("com.example.invalid!")); // Invalid char
250
+
assert!(!is_valid_nsid("com..example")); // Empty segment
251
+
}
252
+
253
+
#[test]
254
+
fn test_group_by_base_nsid() {
255
+
let refs = vec![
256
+
LexiconSchemaRef {
257
+
nsid: "com.example.test",
258
+
provider: || todo!(),
259
+
},
260
+
LexiconSchemaRef {
261
+
nsid: "com.example.test#fragment",
262
+
provider: || todo!(),
263
+
},
264
+
LexiconSchemaRef {
265
+
nsid: "com.example.other",
266
+
provider: || todo!(),
267
+
},
268
+
];
269
+
270
+
let ref_ptrs: Vec<&LexiconSchemaRef> = refs.iter().collect();
271
+
272
+
let extractor = SchemaExtractor::new(ExtractOptions {
273
+
output_dir: PathBuf::from("test"),
274
+
verbose: false,
275
+
filter: None,
276
+
validate: false,
277
+
pretty: true,
278
+
});
279
+
280
+
let grouped = extractor.group_by_base_nsid(&ref_ptrs).unwrap();
281
+
282
+
assert_eq!(grouped.len(), 2);
283
+
assert!(grouped.contains_key("com.example.test"));
284
+
assert!(grouped.contains_key("com.example.other"));
285
+
assert_eq!(grouped["com.example.test"].len(), 2);
286
+
assert_eq!(grouped["com.example.other"].len(), 1);
287
+
}
288
+
}
+31
crates/jacquard-lexgen/src/test_schemas.rs
+31
crates/jacquard-lexgen/src/test_schemas.rs
···
1
+
// Test schemas for verifying extraction works
2
+
// These are only compiled in tests/dev builds
3
+
4
+
use jacquard_common::CowStr;
5
+
use jacquard_derive::LexiconSchema;
6
+
7
+
#[derive(LexiconSchema)]
8
+
#[lexicon(nsid = "com.example.testRecord", record, key = "tid")]
9
+
pub struct TestRecord<'a> {
10
+
#[lexicon(max_length = 100)]
11
+
pub text: CowStr<'a>,
12
+
pub count: i64,
13
+
}
14
+
15
+
#[derive(LexiconSchema)]
16
+
#[lexicon(nsid = "com.example.testRecord#fragment")]
17
+
pub struct TestFragment {
18
+
pub field: i64,
19
+
}
20
+
21
+
#[derive(LexiconSchema)]
22
+
#[lexicon(nsid = "com.example.testDefs.defs#defOne")]
23
+
pub struct DefOne {
24
+
pub value: String,
25
+
}
26
+
27
+
#[derive(LexiconSchema)]
28
+
#[lexicon(nsid = "com.example.testDefs.defs#defTwo")]
29
+
pub struct DefTwo {
30
+
pub number: i64,
31
+
}
+82
crates/jacquard-lexgen/tests/schema_extraction.rs
+82
crates/jacquard-lexgen/tests/schema_extraction.rs
···
1
+
use jacquard_lexgen::schema_extraction::{ExtractOptions, SchemaExtractor};
2
+
use tempfile::TempDir;
3
+
4
+
#[test]
5
+
fn test_extract_all_creates_output_dir() {
6
+
let temp_dir = TempDir::new().unwrap();
7
+
8
+
let options = ExtractOptions {
9
+
output_dir: temp_dir.path().to_path_buf(),
10
+
verbose: false,
11
+
filter: None,
12
+
validate: true,
13
+
pretty: true,
14
+
};
15
+
16
+
let extractor = SchemaExtractor::new(options);
17
+
18
+
// This will discover any schemas registered via inventory in the binary
19
+
// In a minimal test environment, this might be 0
20
+
let result = extractor.extract_all();
21
+
22
+
// Should succeed even if no schemas found
23
+
assert!(result.is_ok());
24
+
25
+
// Directory should exist
26
+
assert!(temp_dir.path().exists());
27
+
}
28
+
29
+
#[test]
30
+
fn test_extract_with_filter() {
31
+
let temp_dir = TempDir::new().unwrap();
32
+
33
+
let options = ExtractOptions {
34
+
output_dir: temp_dir.path().to_path_buf(),
35
+
verbose: false,
36
+
filter: Some("com.example.nonexistent".into()),
37
+
validate: true,
38
+
pretty: true,
39
+
};
40
+
41
+
let extractor = SchemaExtractor::new(options);
42
+
let result = extractor.extract_all();
43
+
44
+
// Should succeed (just won't write any files)
45
+
assert!(result.is_ok());
46
+
}
47
+
48
+
#[test]
49
+
fn test_extract_with_verbose() {
50
+
let temp_dir = TempDir::new().unwrap();
51
+
52
+
let options = ExtractOptions {
53
+
output_dir: temp_dir.path().to_path_buf(),
54
+
verbose: true,
55
+
filter: None,
56
+
validate: true,
57
+
pretty: true,
58
+
};
59
+
60
+
let extractor = SchemaExtractor::new(options);
61
+
let result = extractor.extract_all();
62
+
63
+
assert!(result.is_ok());
64
+
}
65
+
66
+
#[test]
67
+
fn test_extract_compact_json() {
68
+
let temp_dir = TempDir::new().unwrap();
69
+
70
+
let options = ExtractOptions {
71
+
output_dir: temp_dir.path().to_path_buf(),
72
+
verbose: false,
73
+
filter: None,
74
+
validate: true,
75
+
pretty: false, // Compact JSON
76
+
};
77
+
78
+
let extractor = SchemaExtractor::new(options);
79
+
let result = extractor.extract_all();
80
+
81
+
assert!(result.is_ok());
82
+
}
+3
-8
crates/jacquard-lexicon/src/derive_impl/lexicon_schema.rs
+3
-8
crates/jacquard-lexicon/src/derive_impl/lexicon_schema.rs
···
1
1
//! Implementation of #[derive(LexiconSchema)] macro
2
2
3
-
use crate::lexicon::{
4
-
LexArray, LexBlob, LexBoolean, LexBytes, LexCidLink, LexInteger, LexObject, LexObjectProperty,
5
-
LexRef, LexRefUnion, LexString, LexStringFormat, LexUnknown, LexUserType,
6
-
};
7
3
use crate::schema::type_mapping::{LexiconPrimitiveType, StringFormat, rust_type_to_lexicon_type};
8
4
use heck::{ToKebabCase, ToLowerCamelCase, ToPascalCase, ToShoutySnakeCase, ToSnakeCase};
9
-
use jacquard_common::smol_str::{SmolStr, ToSmolStr};
10
5
use proc_macro2::TokenStream;
11
-
use quote::{ToTokens, quote};
6
+
use quote::quote;
12
7
use syn::{Attribute, Data, DeriveInput, Fields, Ident, LitStr, Type, parse2};
13
8
14
9
/// Implementation for the LexiconSchema derive macro
···
422
417
nsid: #nsid,
423
418
provider: || {
424
419
let mut generator = ::jacquard_lexicon::schema::LexiconGenerator::new(#nsid);
425
-
#name::lexicon_doc(&mut generator)
420
+
<#name as ::jacquard_lexicon::schema::LexiconSchema>::lexicon_doc(&mut generator)
426
421
},
427
422
}
428
423
}
···
1202
1197
nsid: #nsid,
1203
1198
provider: || {
1204
1199
let mut generator = ::jacquard_lexicon::schema::LexiconGenerator::new(#nsid);
1205
-
#name::lexicon_doc(&mut generator)
1200
+
<#name as ::jacquard_lexicon::schema::LexiconSchema>::lexicon_doc(&mut generator)
1206
1201
},
1207
1202
}
1208
1203
}