A library for ATProtocol identities.
1//! Recursive lexicon resolution functionality for AT Protocol.
2//!
3//! This module provides recursive resolution of lexicons, following references
4//! within lexicon schemas to resolve all dependent lexicons up to a specified depth.
5
6use std::collections::{HashMap, HashSet};
7
8use anyhow::Result;
9use serde_json::Value;
10use tracing::instrument;
11
12use crate::errors::LexiconRecursiveError;
13use crate::resolve::LexiconResolver;
14use crate::validation::{absolute, extract_nsid_from_ref_object};
15
16/// Configuration for recursive lexicon resolution.
17#[derive(Debug, Clone)]
18pub struct RecursiveResolverConfig {
19 /// Maximum depth for recursive resolution (0 = only resolve the entry lexicon).
20 pub max_depth: usize,
21 /// Whether to include the entry lexicon in the results.
22 pub include_entry: bool,
23}
24
25impl Default for RecursiveResolverConfig {
26 fn default() -> Self {
27 Self {
28 max_depth: 10,
29 include_entry: true,
30 }
31 }
32}
33
34/// A lexicon resolver that recursively resolves referenced lexicons.
35pub struct RecursiveLexiconResolver<R> {
36 /// The underlying lexicon resolver.
37 resolver: R,
38 /// Configuration for recursive resolution.
39 config: RecursiveResolverConfig,
40}
41
42impl<R> RecursiveLexiconResolver<R> {
43 /// Create a new recursive lexicon resolver with default configuration.
44 pub fn new(resolver: R) -> Self {
45 Self {
46 resolver,
47 config: RecursiveResolverConfig::default(),
48 }
49 }
50
51 /// Create a new recursive lexicon resolver with custom configuration.
52 pub fn with_config(resolver: R, config: RecursiveResolverConfig) -> Self {
53 Self { resolver, config }
54 }
55
56 /// Set the maximum depth for recursive resolution.
57 pub fn set_max_depth(&mut self, max_depth: usize) {
58 self.config.max_depth = max_depth;
59 }
60
61 /// Set whether to include the entry lexicon in the results.
62 pub fn set_include_entry(&mut self, include_entry: bool) {
63 self.config.include_entry = include_entry;
64 }
65}
66
67impl<R> RecursiveLexiconResolver<R>
68where
69 R: LexiconResolver,
70{
71 /// Recursively resolve a lexicon and all its referenced lexicons.
72 ///
73 /// Returns a HashMap where keys are NSIDs and values are the resolved lexicon schemas.
74 #[instrument(skip(self), err)]
75 pub async fn resolve_recursive(&self, entry_nsid: &str) -> Result<HashMap<String, Value>> {
76 let mut resolved = HashMap::new();
77 let mut visited = HashSet::new();
78 let mut to_resolve = HashSet::new();
79
80 // Start with the entry lexicon
81 to_resolve.insert(entry_nsid.to_string());
82
83 // Resolve lexicons level by level
84 for depth in 0..=self.config.max_depth {
85 if to_resolve.is_empty() {
86 break;
87 }
88
89 let current_batch = to_resolve.clone();
90 to_resolve.clear();
91
92 for nsid in current_batch {
93 // Skip if already visited
94 if visited.contains(&nsid) {
95 continue;
96 }
97 visited.insert(nsid.clone());
98
99 // Skip the entry lexicon if configured to exclude it
100 if !self.config.include_entry && nsid == entry_nsid && depth == 0 {
101 // Still need to extract references from it
102 match self.resolver.resolve(&nsid).await {
103 Ok(lexicon) => {
104 let refs = extract_lexicon_references(&lexicon);
105 to_resolve.extend(refs);
106 }
107 Err(e) => {
108 tracing::warn!(error = ?e, nsid = %nsid, "Failed to resolve lexicon");
109 continue;
110 }
111 }
112 continue;
113 }
114
115 // Resolve the lexicon
116 match self.resolver.resolve(&nsid).await {
117 Ok(lexicon) => {
118 // Extract references for next level
119 if depth < self.config.max_depth {
120 let refs = extract_lexicon_references(&lexicon);
121 to_resolve.extend(refs);
122 }
123
124 // Store the resolved lexicon
125 resolved.insert(nsid.clone(), lexicon);
126 }
127 Err(e) => {
128 tracing::warn!(error = ?e, nsid = %nsid, "Failed to resolve lexicon");
129 continue;
130 }
131 }
132 }
133 }
134
135 if resolved.is_empty() && self.config.include_entry {
136 return Err(LexiconRecursiveError::NoLexiconsResolved.into());
137 }
138
139 Ok(resolved)
140 }
141
142 /// Resolve a lexicon and return only its direct references.
143 #[instrument(skip(self), err)]
144 pub async fn get_direct_references(&self, nsid: &str) -> Result<HashSet<String>> {
145 let lexicon = self.resolver.resolve(nsid).await?;
146 Ok(extract_lexicon_references(&lexicon))
147 }
148}
149
150/// Extract all lexicon references from a lexicon schema.
151///
152/// Looks for:
153/// - Objects with `"type": "ref"` and extracts the `"ref"` field value
154/// - Objects with `"type": "union"` and extracts NSIDs from the `"refs"` array
155/// - Handles fragment-only references using the lexicon's `id` field as context
156#[instrument(skip(value))]
157pub fn extract_lexicon_references(value: &Value) -> HashSet<String> {
158 // Extract the lexicon's ID to use as context for fragment-only references
159 let context = value
160 .as_object()
161 .and_then(|obj| obj.get("id"))
162 .and_then(|id| id.as_str())
163 .map(|s| s.to_string());
164
165 let mut references = HashSet::new();
166 extract_references_recursive(value, &mut references, context.as_deref());
167 references
168}
169
170/// Recursively extract references from a JSON value with optional context.
171fn extract_references_recursive(
172 value: &Value,
173 references: &mut HashSet<String>,
174 context: Option<&str>,
175) {
176 match value {
177 Value::Object(map) => {
178 // Check if this is a reference object
179 if let Some(type_val) = map.get("type")
180 && let Some(type_str) = type_val.as_str()
181 {
182 if type_str == "ref" {
183 // Handle ref objects with context for fragment-only refs
184 if let Some(ref_val) = map.get("ref").and_then(|v| v.as_str()) {
185 let absolute_ref = if let Some(ctx) = context {
186 absolute(ctx, ref_val)
187 } else {
188 ref_val.to_string()
189 };
190
191 // Now extract the NSID from the absolute reference
192 if let Some(nsid) = extract_nsid_from_ref_object(
193 serde_json::json!({
194 "type": "ref",
195 "ref": absolute_ref
196 })
197 .as_object()
198 .unwrap(),
199 ) {
200 references.insert(nsid);
201 }
202 }
203 return; // Don't recurse further into ref objects
204 } else if type_str == "union" {
205 // Handle union objects with context for fragment-only refs
206 if let Some(refs_val) = map.get("refs")
207 && let Some(refs_array) = refs_val.as_array()
208 {
209 for ref_item in refs_array {
210 let ref_str = if let Some(s) = ref_item.as_str() {
211 s
212 } else if let Some(obj) = ref_item.as_object() {
213 if let Some(ref_val) = obj.get("ref").and_then(|v| v.as_str()) {
214 ref_val
215 } else {
216 continue;
217 }
218 } else {
219 continue;
220 };
221
222 // Make fragment-only references absolute
223 let absolute_ref = if let Some(ctx) = context {
224 absolute(ctx, ref_str)
225 } else {
226 ref_str.to_string()
227 };
228
229 // Extract NSID from the absolute reference (stripping fragment)
230 let nsid = if let Some(hash_pos) = absolute_ref.find('#') {
231 &absolute_ref[..hash_pos]
232 } else {
233 &absolute_ref
234 };
235
236 // Validate it's a proper NSID
237 if nsid.contains('.') && !nsid.is_empty() {
238 references.insert(nsid.to_string());
239 }
240 }
241 }
242 return; // Don't recurse further into union objects
243 }
244 }
245
246 // Otherwise, recursively check all values in the object
247 for (_key, val) in map.iter() {
248 extract_references_recursive(val, references, context);
249 }
250 }
251 Value::Array(arr) => {
252 // Recursively check all elements in the array
253 for val in arr {
254 extract_references_recursive(val, references, context);
255 }
256 }
257 _ => {
258 // Primitive values don't contain references
259 }
260 }
261}
262
263/// Result of recursive lexicon resolution.
264#[derive(Debug, Clone)]
265pub struct RecursiveResolutionResult {
266 /// The resolved lexicons, keyed by NSID.
267 pub lexicons: HashMap<String, Value>,
268 /// NSIDs that were referenced but could not be resolved.
269 pub failed: HashSet<String>,
270 /// The dependency graph showing which lexicons reference which.
271 pub dependencies: HashMap<String, HashSet<String>>,
272}
273
274impl<R> RecursiveLexiconResolver<R>
275where
276 R: LexiconResolver,
277{
278 /// Recursively resolve a lexicon with detailed results.
279 ///
280 /// This provides more information than `resolve_recursive`, including
281 /// failed resolutions and the dependency graph.
282 #[instrument(skip(self), err)]
283 pub async fn resolve_with_details(
284 &self,
285 entry_nsid: &str,
286 ) -> Result<RecursiveResolutionResult> {
287 let mut lexicons = HashMap::new();
288 let mut failed = HashSet::new();
289 let mut dependencies = HashMap::new();
290 let mut visited = HashSet::new();
291 let mut to_resolve = HashSet::new();
292
293 // Start with the entry lexicon
294 to_resolve.insert(entry_nsid.to_string());
295
296 // Resolve lexicons level by level
297 for depth in 0..=self.config.max_depth {
298 if to_resolve.is_empty() {
299 break;
300 }
301
302 let current_batch = to_resolve.clone();
303 to_resolve.clear();
304
305 for nsid in current_batch {
306 // Skip if already visited
307 if visited.contains(&nsid) {
308 continue;
309 }
310 visited.insert(nsid.clone());
311
312 // Resolve the lexicon
313 match self.resolver.resolve(&nsid).await {
314 Ok(lexicon) => {
315 // Extract references
316 let refs = extract_lexicon_references(&lexicon);
317
318 // Record dependencies
319 if !refs.is_empty() {
320 dependencies.insert(nsid.clone(), refs.clone());
321 }
322
323 // Add references to resolve queue (if within depth limit)
324 if depth < self.config.max_depth {
325 to_resolve.extend(refs);
326 }
327
328 // Store the resolved lexicon (if configured to include it)
329 if self.config.include_entry || nsid != entry_nsid || depth > 0 {
330 lexicons.insert(nsid.clone(), lexicon);
331 }
332 }
333 Err(e) => {
334 tracing::warn!(error = ?e, nsid = %nsid, "Failed to resolve lexicon");
335 failed.insert(nsid.clone());
336 continue;
337 }
338 }
339 }
340 }
341
342 Ok(RecursiveResolutionResult {
343 lexicons,
344 failed,
345 dependencies,
346 })
347 }
348}
349
350#[cfg(test)]
351mod tests {
352 use super::*;
353
354 #[test]
355 fn test_extract_references() {
356 let schema = serde_json::json!({
357 "lexicon": 1,
358 "id": "app.bsky.feed.post",
359 "defs": {
360 "main": {
361 "type": "record",
362 "record": {
363 "type": "object",
364 "properties": {
365 "text": {
366 "type": "string"
367 },
368 "embed": {
369 "type": "union",
370 "refs": [
371 { "type": "ref", "ref": "app.bsky.embed.images" },
372 { "type": "ref", "ref": "app.bsky.embed.external" },
373 { "type": "ref", "ref": "#localref" }
374 ]
375 }
376 }
377 }
378 }
379 }
380 });
381
382 let refs = extract_lexicon_references(&schema);
383
384 assert!(refs.contains("app.bsky.embed.images"));
385 assert!(refs.contains("app.bsky.embed.external"));
386 // Fragment-only reference #localref should be resolved to app.bsky.feed.post
387 // (using the lexicon's id as context)
388 assert!(refs.contains("app.bsky.feed.post"));
389 assert_eq!(refs.len(), 3);
390 }
391
392 #[test]
393 fn test_extract_nested_references() {
394 let schema = serde_json::json!({
395 "defs": {
396 "main": {
397 "type": "object",
398 "properties": {
399 "nested": {
400 "type": "object",
401 "properties": {
402 "ref1": { "type": "ref", "ref": "com.example.schema1" },
403 "array": {
404 "type": "array",
405 "items": {
406 "type": "union",
407 "refs": [
408 { "type": "ref", "ref": "#localref" },
409 { "type": "ref", "ref": "com.example.schema3" }
410 ]
411 }
412 }
413 }
414 }
415 }
416 }
417 }
418 });
419
420 let refs = extract_lexicon_references(&schema);
421
422 assert!(refs.contains("com.example.schema1"));
423 assert!(refs.contains("com.example.schema3"));
424 // Without an id field, fragment-only references cannot be resolved
425 assert_eq!(refs.len(), 2);
426 }
427
428 #[test]
429 fn test_fragment_only_with_context() {
430 // Test that fragment-only references are properly resolved when lexicon has an ID
431 let schema = serde_json::json!({
432 "lexicon": 1,
433 "id": "com.example.myschema",
434 "defs": {
435 "main": {
436 "type": "object",
437 "properties": {
438 "directRef": { "type": "ref", "ref": "#localDefinition" },
439 "unionRefs": {
440 "type": "union",
441 "refs": [
442 "#main",
443 "#otherDef",
444 "external.schema.type"
445 ]
446 },
447 "nestedRef": {
448 "type": "object",
449 "properties": {
450 "field": { "type": "ref", "ref": "#nested" }
451 }
452 }
453 }
454 }
455 }
456 });
457
458 let refs = extract_lexicon_references(&schema);
459
460 // Fragment-only references should all resolve to com.example.myschema
461 assert!(refs.contains("com.example.myschema"));
462 assert!(refs.contains("external.schema.type"));
463 assert_eq!(refs.len(), 2);
464 }
465
466 #[test]
467 fn test_skip_invalid_references() {
468 let schema = serde_json::json!({
469 "defs": {
470 "main": {
471 "refs": [
472 { "type": "ref", "ref": "valid.schema.name" },
473 { "type": "ref", "ref": "invalid" }, // No dots - should be skipped
474 { "type": "ref", "ref": "#localref" }, // Fragment-only, no ID context - should be skipped
475 { "type": "string", "ref": "not.a.ref" }, // Wrong type - should be skipped
476 ]
477 }
478 }
479 });
480
481 let refs = extract_lexicon_references(&schema);
482
483 assert!(refs.contains("valid.schema.name"));
484 // Only valid.schema.name should be extracted (no ID field, so #localref is skipped)
485 assert_eq!(refs.len(), 1);
486 }
487
488 #[test]
489 fn test_extract_union_references() {
490 let schema = serde_json::json!({
491 "defs": {
492 "main": {
493 "type": "union",
494 "refs": [
495 "community.lexicon.calendar.event#uri",
496 "community.lexicon.location.address",
497 "community.lexicon.location.fsq",
498 "community.lexicon.location.geo",
499 "community.lexicon.location.hthree"
500 ]
501 }
502 }
503 });
504
505 let refs = extract_lexicon_references(&schema);
506
507 // NSIDs should be extracted without fragment identifiers
508 assert!(refs.contains("community.lexicon.calendar.event"));
509 assert!(refs.contains("community.lexicon.location.address"));
510 assert!(refs.contains("community.lexicon.location.fsq"));
511 assert!(refs.contains("community.lexicon.location.geo"));
512 assert!(refs.contains("community.lexicon.location.hthree"));
513 assert_eq!(refs.len(), 5);
514 }
515
516 #[test]
517 fn test_extract_mixed_union_references() {
518 let schema = serde_json::json!({
519 "defs": {
520 "main": {
521 "type": "union",
522 "refs": [
523 "app.bsky.feed.post",
524 { "type": "ref", "ref": "app.bsky.actor.profile" },
525 "#app.bsky.graph.follow", // Fragment-only, no ID context - should be skipped
526 "invalid", // No dots - should be skipped
527 ]
528 },
529 "other": {
530 "type": "ref",
531 "ref": "app.bsky.embed.images"
532 }
533 }
534 });
535
536 let refs = extract_lexicon_references(&schema);
537
538 assert!(refs.contains("app.bsky.feed.post"));
539 assert!(refs.contains("app.bsky.actor.profile"));
540 assert!(refs.contains("app.bsky.embed.images"));
541 // #app.bsky.graph.follow is fragment-only with no ID context, should not be included
542 assert!(!refs.contains("app.bsky.graph.follow"));
543 assert!(!refs.contains("invalid"));
544 assert_eq!(refs.len(), 3);
545 }
546}