A library for ATProtocol identities.
at main 20 kB view raw
1//! Recursive lexicon resolution functionality for AT Protocol. 2//! 3//! This module provides recursive resolution of lexicons, following references 4//! within lexicon schemas to resolve all dependent lexicons up to a specified depth. 5 6use std::collections::{HashMap, HashSet}; 7 8use anyhow::Result; 9use serde_json::Value; 10use tracing::instrument; 11 12use crate::errors::LexiconRecursiveError; 13use crate::resolve::LexiconResolver; 14use crate::validation::{absolute, extract_nsid_from_ref_object}; 15 16/// Configuration for recursive lexicon resolution. 17#[derive(Debug, Clone)] 18pub struct RecursiveResolverConfig { 19 /// Maximum depth for recursive resolution (0 = only resolve the entry lexicon). 20 pub max_depth: usize, 21 /// Whether to include the entry lexicon in the results. 22 pub include_entry: bool, 23} 24 25impl Default for RecursiveResolverConfig { 26 fn default() -> Self { 27 Self { 28 max_depth: 10, 29 include_entry: true, 30 } 31 } 32} 33 34/// A lexicon resolver that recursively resolves referenced lexicons. 35pub struct RecursiveLexiconResolver<R> { 36 /// The underlying lexicon resolver. 37 resolver: R, 38 /// Configuration for recursive resolution. 39 config: RecursiveResolverConfig, 40} 41 42impl<R> RecursiveLexiconResolver<R> { 43 /// Create a new recursive lexicon resolver with default configuration. 44 pub fn new(resolver: R) -> Self { 45 Self { 46 resolver, 47 config: RecursiveResolverConfig::default(), 48 } 49 } 50 51 /// Create a new recursive lexicon resolver with custom configuration. 52 pub fn with_config(resolver: R, config: RecursiveResolverConfig) -> Self { 53 Self { resolver, config } 54 } 55 56 /// Set the maximum depth for recursive resolution. 57 pub fn set_max_depth(&mut self, max_depth: usize) { 58 self.config.max_depth = max_depth; 59 } 60 61 /// Set whether to include the entry lexicon in the results. 62 pub fn set_include_entry(&mut self, include_entry: bool) { 63 self.config.include_entry = include_entry; 64 } 65} 66 67impl<R> RecursiveLexiconResolver<R> 68where 69 R: LexiconResolver, 70{ 71 /// Recursively resolve a lexicon and all its referenced lexicons. 72 /// 73 /// Returns a HashMap where keys are NSIDs and values are the resolved lexicon schemas. 74 #[instrument(skip(self), err)] 75 pub async fn resolve_recursive(&self, entry_nsid: &str) -> Result<HashMap<String, Value>> { 76 let mut resolved = HashMap::new(); 77 let mut visited = HashSet::new(); 78 let mut to_resolve = HashSet::new(); 79 80 // Start with the entry lexicon 81 to_resolve.insert(entry_nsid.to_string()); 82 83 // Resolve lexicons level by level 84 for depth in 0..=self.config.max_depth { 85 if to_resolve.is_empty() { 86 break; 87 } 88 89 let current_batch = to_resolve.clone(); 90 to_resolve.clear(); 91 92 for nsid in current_batch { 93 // Skip if already visited 94 if visited.contains(&nsid) { 95 continue; 96 } 97 visited.insert(nsid.clone()); 98 99 // Skip the entry lexicon if configured to exclude it 100 if !self.config.include_entry && nsid == entry_nsid && depth == 0 { 101 // Still need to extract references from it 102 match self.resolver.resolve(&nsid).await { 103 Ok(lexicon) => { 104 let refs = extract_lexicon_references(&lexicon); 105 to_resolve.extend(refs); 106 } 107 Err(e) => { 108 tracing::warn!(error = ?e, nsid = %nsid, "Failed to resolve lexicon"); 109 continue; 110 } 111 } 112 continue; 113 } 114 115 // Resolve the lexicon 116 match self.resolver.resolve(&nsid).await { 117 Ok(lexicon) => { 118 // Extract references for next level 119 if depth < self.config.max_depth { 120 let refs = extract_lexicon_references(&lexicon); 121 to_resolve.extend(refs); 122 } 123 124 // Store the resolved lexicon 125 resolved.insert(nsid.clone(), lexicon); 126 } 127 Err(e) => { 128 tracing::warn!(error = ?e, nsid = %nsid, "Failed to resolve lexicon"); 129 continue; 130 } 131 } 132 } 133 } 134 135 if resolved.is_empty() && self.config.include_entry { 136 return Err(LexiconRecursiveError::NoLexiconsResolved.into()); 137 } 138 139 Ok(resolved) 140 } 141 142 /// Resolve a lexicon and return only its direct references. 143 #[instrument(skip(self), err)] 144 pub async fn get_direct_references(&self, nsid: &str) -> Result<HashSet<String>> { 145 let lexicon = self.resolver.resolve(nsid).await?; 146 Ok(extract_lexicon_references(&lexicon)) 147 } 148} 149 150/// Extract all lexicon references from a lexicon schema. 151/// 152/// Looks for: 153/// - Objects with `"type": "ref"` and extracts the `"ref"` field value 154/// - Objects with `"type": "union"` and extracts NSIDs from the `"refs"` array 155/// - Handles fragment-only references using the lexicon's `id` field as context 156#[instrument(skip(value))] 157pub fn extract_lexicon_references(value: &Value) -> HashSet<String> { 158 // Extract the lexicon's ID to use as context for fragment-only references 159 let context = value 160 .as_object() 161 .and_then(|obj| obj.get("id")) 162 .and_then(|id| id.as_str()) 163 .map(|s| s.to_string()); 164 165 let mut references = HashSet::new(); 166 extract_references_recursive(value, &mut references, context.as_deref()); 167 references 168} 169 170/// Recursively extract references from a JSON value with optional context. 171fn extract_references_recursive( 172 value: &Value, 173 references: &mut HashSet<String>, 174 context: Option<&str>, 175) { 176 match value { 177 Value::Object(map) => { 178 // Check if this is a reference object 179 if let Some(type_val) = map.get("type") 180 && let Some(type_str) = type_val.as_str() 181 { 182 if type_str == "ref" { 183 // Handle ref objects with context for fragment-only refs 184 if let Some(ref_val) = map.get("ref").and_then(|v| v.as_str()) { 185 let absolute_ref = if let Some(ctx) = context { 186 absolute(ctx, ref_val) 187 } else { 188 ref_val.to_string() 189 }; 190 191 // Now extract the NSID from the absolute reference 192 if let Some(nsid) = extract_nsid_from_ref_object( 193 serde_json::json!({ 194 "type": "ref", 195 "ref": absolute_ref 196 }) 197 .as_object() 198 .unwrap(), 199 ) { 200 references.insert(nsid); 201 } 202 } 203 return; // Don't recurse further into ref objects 204 } else if type_str == "union" { 205 // Handle union objects with context for fragment-only refs 206 if let Some(refs_val) = map.get("refs") 207 && let Some(refs_array) = refs_val.as_array() 208 { 209 for ref_item in refs_array { 210 let ref_str = if let Some(s) = ref_item.as_str() { 211 s 212 } else if let Some(obj) = ref_item.as_object() { 213 if let Some(ref_val) = obj.get("ref").and_then(|v| v.as_str()) { 214 ref_val 215 } else { 216 continue; 217 } 218 } else { 219 continue; 220 }; 221 222 // Make fragment-only references absolute 223 let absolute_ref = if let Some(ctx) = context { 224 absolute(ctx, ref_str) 225 } else { 226 ref_str.to_string() 227 }; 228 229 // Extract NSID from the absolute reference (stripping fragment) 230 let nsid = if let Some(hash_pos) = absolute_ref.find('#') { 231 &absolute_ref[..hash_pos] 232 } else { 233 &absolute_ref 234 }; 235 236 // Validate it's a proper NSID 237 if nsid.contains('.') && !nsid.is_empty() { 238 references.insert(nsid.to_string()); 239 } 240 } 241 } 242 return; // Don't recurse further into union objects 243 } 244 } 245 246 // Otherwise, recursively check all values in the object 247 for (_key, val) in map.iter() { 248 extract_references_recursive(val, references, context); 249 } 250 } 251 Value::Array(arr) => { 252 // Recursively check all elements in the array 253 for val in arr { 254 extract_references_recursive(val, references, context); 255 } 256 } 257 _ => { 258 // Primitive values don't contain references 259 } 260 } 261} 262 263/// Result of recursive lexicon resolution. 264#[derive(Debug, Clone)] 265pub struct RecursiveResolutionResult { 266 /// The resolved lexicons, keyed by NSID. 267 pub lexicons: HashMap<String, Value>, 268 /// NSIDs that were referenced but could not be resolved. 269 pub failed: HashSet<String>, 270 /// The dependency graph showing which lexicons reference which. 271 pub dependencies: HashMap<String, HashSet<String>>, 272} 273 274impl<R> RecursiveLexiconResolver<R> 275where 276 R: LexiconResolver, 277{ 278 /// Recursively resolve a lexicon with detailed results. 279 /// 280 /// This provides more information than `resolve_recursive`, including 281 /// failed resolutions and the dependency graph. 282 #[instrument(skip(self), err)] 283 pub async fn resolve_with_details( 284 &self, 285 entry_nsid: &str, 286 ) -> Result<RecursiveResolutionResult> { 287 let mut lexicons = HashMap::new(); 288 let mut failed = HashSet::new(); 289 let mut dependencies = HashMap::new(); 290 let mut visited = HashSet::new(); 291 let mut to_resolve = HashSet::new(); 292 293 // Start with the entry lexicon 294 to_resolve.insert(entry_nsid.to_string()); 295 296 // Resolve lexicons level by level 297 for depth in 0..=self.config.max_depth { 298 if to_resolve.is_empty() { 299 break; 300 } 301 302 let current_batch = to_resolve.clone(); 303 to_resolve.clear(); 304 305 for nsid in current_batch { 306 // Skip if already visited 307 if visited.contains(&nsid) { 308 continue; 309 } 310 visited.insert(nsid.clone()); 311 312 // Resolve the lexicon 313 match self.resolver.resolve(&nsid).await { 314 Ok(lexicon) => { 315 // Extract references 316 let refs = extract_lexicon_references(&lexicon); 317 318 // Record dependencies 319 if !refs.is_empty() { 320 dependencies.insert(nsid.clone(), refs.clone()); 321 } 322 323 // Add references to resolve queue (if within depth limit) 324 if depth < self.config.max_depth { 325 to_resolve.extend(refs); 326 } 327 328 // Store the resolved lexicon (if configured to include it) 329 if self.config.include_entry || nsid != entry_nsid || depth > 0 { 330 lexicons.insert(nsid.clone(), lexicon); 331 } 332 } 333 Err(e) => { 334 tracing::warn!(error = ?e, nsid = %nsid, "Failed to resolve lexicon"); 335 failed.insert(nsid.clone()); 336 continue; 337 } 338 } 339 } 340 } 341 342 Ok(RecursiveResolutionResult { 343 lexicons, 344 failed, 345 dependencies, 346 }) 347 } 348} 349 350#[cfg(test)] 351mod tests { 352 use super::*; 353 354 #[test] 355 fn test_extract_references() { 356 let schema = serde_json::json!({ 357 "lexicon": 1, 358 "id": "app.bsky.feed.post", 359 "defs": { 360 "main": { 361 "type": "record", 362 "record": { 363 "type": "object", 364 "properties": { 365 "text": { 366 "type": "string" 367 }, 368 "embed": { 369 "type": "union", 370 "refs": [ 371 { "type": "ref", "ref": "app.bsky.embed.images" }, 372 { "type": "ref", "ref": "app.bsky.embed.external" }, 373 { "type": "ref", "ref": "#localref" } 374 ] 375 } 376 } 377 } 378 } 379 } 380 }); 381 382 let refs = extract_lexicon_references(&schema); 383 384 assert!(refs.contains("app.bsky.embed.images")); 385 assert!(refs.contains("app.bsky.embed.external")); 386 // Fragment-only reference #localref should be resolved to app.bsky.feed.post 387 // (using the lexicon's id as context) 388 assert!(refs.contains("app.bsky.feed.post")); 389 assert_eq!(refs.len(), 3); 390 } 391 392 #[test] 393 fn test_extract_nested_references() { 394 let schema = serde_json::json!({ 395 "defs": { 396 "main": { 397 "type": "object", 398 "properties": { 399 "nested": { 400 "type": "object", 401 "properties": { 402 "ref1": { "type": "ref", "ref": "com.example.schema1" }, 403 "array": { 404 "type": "array", 405 "items": { 406 "type": "union", 407 "refs": [ 408 { "type": "ref", "ref": "#localref" }, 409 { "type": "ref", "ref": "com.example.schema3" } 410 ] 411 } 412 } 413 } 414 } 415 } 416 } 417 } 418 }); 419 420 let refs = extract_lexicon_references(&schema); 421 422 assert!(refs.contains("com.example.schema1")); 423 assert!(refs.contains("com.example.schema3")); 424 // Without an id field, fragment-only references cannot be resolved 425 assert_eq!(refs.len(), 2); 426 } 427 428 #[test] 429 fn test_fragment_only_with_context() { 430 // Test that fragment-only references are properly resolved when lexicon has an ID 431 let schema = serde_json::json!({ 432 "lexicon": 1, 433 "id": "com.example.myschema", 434 "defs": { 435 "main": { 436 "type": "object", 437 "properties": { 438 "directRef": { "type": "ref", "ref": "#localDefinition" }, 439 "unionRefs": { 440 "type": "union", 441 "refs": [ 442 "#main", 443 "#otherDef", 444 "external.schema.type" 445 ] 446 }, 447 "nestedRef": { 448 "type": "object", 449 "properties": { 450 "field": { "type": "ref", "ref": "#nested" } 451 } 452 } 453 } 454 } 455 } 456 }); 457 458 let refs = extract_lexicon_references(&schema); 459 460 // Fragment-only references should all resolve to com.example.myschema 461 assert!(refs.contains("com.example.myschema")); 462 assert!(refs.contains("external.schema.type")); 463 assert_eq!(refs.len(), 2); 464 } 465 466 #[test] 467 fn test_skip_invalid_references() { 468 let schema = serde_json::json!({ 469 "defs": { 470 "main": { 471 "refs": [ 472 { "type": "ref", "ref": "valid.schema.name" }, 473 { "type": "ref", "ref": "invalid" }, // No dots - should be skipped 474 { "type": "ref", "ref": "#localref" }, // Fragment-only, no ID context - should be skipped 475 { "type": "string", "ref": "not.a.ref" }, // Wrong type - should be skipped 476 ] 477 } 478 } 479 }); 480 481 let refs = extract_lexicon_references(&schema); 482 483 assert!(refs.contains("valid.schema.name")); 484 // Only valid.schema.name should be extracted (no ID field, so #localref is skipped) 485 assert_eq!(refs.len(), 1); 486 } 487 488 #[test] 489 fn test_extract_union_references() { 490 let schema = serde_json::json!({ 491 "defs": { 492 "main": { 493 "type": "union", 494 "refs": [ 495 "community.lexicon.calendar.event#uri", 496 "community.lexicon.location.address", 497 "community.lexicon.location.fsq", 498 "community.lexicon.location.geo", 499 "community.lexicon.location.hthree" 500 ] 501 } 502 } 503 }); 504 505 let refs = extract_lexicon_references(&schema); 506 507 // NSIDs should be extracted without fragment identifiers 508 assert!(refs.contains("community.lexicon.calendar.event")); 509 assert!(refs.contains("community.lexicon.location.address")); 510 assert!(refs.contains("community.lexicon.location.fsq")); 511 assert!(refs.contains("community.lexicon.location.geo")); 512 assert!(refs.contains("community.lexicon.location.hthree")); 513 assert_eq!(refs.len(), 5); 514 } 515 516 #[test] 517 fn test_extract_mixed_union_references() { 518 let schema = serde_json::json!({ 519 "defs": { 520 "main": { 521 "type": "union", 522 "refs": [ 523 "app.bsky.feed.post", 524 { "type": "ref", "ref": "app.bsky.actor.profile" }, 525 "#app.bsky.graph.follow", // Fragment-only, no ID context - should be skipped 526 "invalid", // No dots - should be skipped 527 ] 528 }, 529 "other": { 530 "type": "ref", 531 "ref": "app.bsky.embed.images" 532 } 533 } 534 }); 535 536 let refs = extract_lexicon_references(&schema); 537 538 assert!(refs.contains("app.bsky.feed.post")); 539 assert!(refs.contains("app.bsky.actor.profile")); 540 assert!(refs.contains("app.bsky.embed.images")); 541 // #app.bsky.graph.follow is fragment-only with no ID context, should not be included 542 assert!(!refs.contains("app.bsky.graph.follow")); 543 assert!(!refs.contains("invalid")); 544 assert_eq!(refs.len(), 3); 545 } 546}