A better Rust ATProto crate

Compare changes

Choose any two refs to compare.

Changed files
+418 -38
crates
jacquard-common
src
jacquard-lexicon
src
tests
fixtures
error_cases
test_lexicons
+1
Cargo.lock
··· 2506 2506 "serde", 2507 2507 "serde_ipld_dagcbor", 2508 2508 "serde_json", 2509 + "serde_path_to_error", 2509 2510 "serde_repr", 2510 2511 "serde_with", 2511 2512 "sha2",
+49 -11
crates/jacquard-common/src/xrpc.rs
··· 48 48 }; 49 49 use serde::{Deserialize, Serialize}; 50 50 use smol_str::SmolStr; 51 + 51 52 #[cfg(feature = "websocket")] 52 53 pub use subscription::{ 53 54 BasicSubscriptionClient, MessageEncoding, SubscriptionCall, SubscriptionClient, ··· 485 486 Resp: XrpcResp, 486 487 { 487 488 let status = http_response.status(); 489 + 488 490 // If the server returned 401 with a WWW-Authenticate header, expose it so higher layers 489 491 // (e.g., DPoP handling) can detect `error="invalid_token"` and trigger refresh. 490 492 #[allow(deprecated)] 491 493 if status.as_u16() == 401 { 492 494 if let Some(hv) = http_response.headers().get(http::header::WWW_AUTHENTICATE) { 493 - return Err(crate::error::ClientError::auth( 494 - crate::error::AuthError::Other(hv.clone()), 495 - ) 496 - .for_nsid(Resp::NSID)); 495 + return Err( 496 + crate::error::ClientError::auth(crate::error::AuthError::Other(hv.clone())) 497 + .for_nsid(Resp::NSID), 498 + ); 497 499 } 498 500 } 499 501 let buffer = Bytes::from(http_response.into_body()); ··· 670 672 } 671 673 // 400: try typed XRPC error, fallback to generic error 672 674 } else if self.status.as_u16() == 400 { 673 - match serde_json::from_slice::<_>(&self.buffer) { 674 - Ok(error) => Err(XrpcError::Xrpc(error)), 675 + match serde_json::from_slice::<R::Err<'_>>(&self.buffer) { 676 + Ok(error) => { 677 + use alloc::string::ToString; 678 + if error.to_string().contains("InvalidToken") { 679 + Err(XrpcError::Auth(AuthError::InvalidToken)) 680 + } else if error.to_string().contains("ExpiredToken") { 681 + Err(XrpcError::Auth(AuthError::TokenExpired)) 682 + } else { 683 + Err(XrpcError::Xrpc(error)) 684 + } 685 + } 675 686 Err(_) => { 676 687 // Fallback to generic error (InvalidRequest, ExpiredToken, etc.) 677 688 match serde_json::from_slice::<GenericXrpcError>(&self.buffer) { ··· 730 741 } 731 742 // 400: try typed XRPC error, fallback to generic error 732 743 } else if self.status.as_u16() == 400 { 733 - match serde_json::from_slice::<_>(&self.buffer) { 734 - Ok(error) => Err(XrpcError::Xrpc(error)), 744 + match serde_json::from_slice::<R::Err<'_>>(&self.buffer) { 745 + Ok(error) => { 746 + use alloc::string::ToString; 747 + if error.to_string().contains("InvalidToken") { 748 + Err(XrpcError::Auth(AuthError::InvalidToken)) 749 + } else if error.to_string().contains("ExpiredToken") { 750 + Err(XrpcError::Auth(AuthError::TokenExpired)) 751 + } else { 752 + Err(XrpcError::Xrpc(error)) 753 + } 754 + } 735 755 Err(_) => { 736 756 // Fallback to generic error (InvalidRequest, ExpiredToken, etc.) 737 757 match serde_json::from_slice::<GenericXrpcError>(&self.buffer) { ··· 790 810 } 791 811 // 400: try typed XRPC error, fallback to generic error 792 812 } else if self.status.as_u16() == 400 { 793 - match serde_json::from_slice::<_>(&self.buffer) { 794 - Ok(error) => Err(XrpcError::Xrpc(error)), 813 + match serde_json::from_slice::<R::Err<'_>>(&self.buffer) { 814 + Ok(error) => { 815 + use alloc::string::ToString; 816 + if error.to_string().contains("InvalidToken") { 817 + Err(XrpcError::Auth(AuthError::InvalidToken)) 818 + } else if error.to_string().contains("ExpiredToken") { 819 + Err(XrpcError::Auth(AuthError::TokenExpired)) 820 + } else { 821 + Err(XrpcError::Xrpc(error)) 822 + } 823 + } 795 824 Err(_) => { 796 825 // Fallback to generic error (InvalidRequest, ExpiredToken, etc.) 797 826 match serde_json::from_slice::<GenericXrpcError>(&self.buffer) { ··· 876 905 // 400: try typed XRPC error, fallback to generic error 877 906 } else if self.status.as_u16() == 400 { 878 907 let error = match parse_error::<R>(&self.buffer) { 879 - Ok(error) => XrpcError::Xrpc(error), 908 + Ok(error) => { 909 + use alloc::string::ToString; 910 + if error.to_string().contains("InvalidToken") { 911 + XrpcError::Auth(AuthError::InvalidToken) 912 + } else if error.to_string().contains("ExpiredToken") { 913 + XrpcError::Auth(AuthError::TokenExpired) 914 + } else { 915 + XrpcError::Xrpc(error) 916 + } 917 + } 880 918 Err(_) => { 881 919 // Fallback to generic error (InvalidRequest, ExpiredToken, etc.) 882 920 match serde_json::from_slice::<GenericXrpcError>(&self.buffer) {
+1
crates/jacquard-lexicon/Cargo.toml
··· 27 27 proc-macro2 = { workspace = true, optional = true } 28 28 quote = { workspace = true, optional = true } 29 29 serde.workspace = true 30 + serde_path_to_error = "0.1" 30 31 serde_ipld_dagcbor.workspace = true 31 32 serde_json.workspace = true 32 33 serde_repr.workspace = true
+321 -8
crates/jacquard-lexicon/src/corpus.rs
··· 1 - use crate::ref_utils::RefPath; 2 - use crate::error::Result; 1 + use crate::error::{CodegenError, Result}; 3 2 use crate::lexicon::{LexUserType, LexiconDoc}; 3 + use crate::ref_utils::RefPath; 4 4 use jacquard_common::{into_static::IntoStatic, smol_str::SmolStr}; 5 5 use std::collections::BTreeMap; 6 6 use std::fs; 7 7 use std::path::Path; 8 8 9 + /// Check if content looks like a lexicon file. 10 + /// 11 + /// A file is considered a lexicon if it contains a `"lexicon"` key at the top level 12 + /// or one level down (for some wrapper formats). This allows us to distinguish 13 + /// "not a lexicon at all" (skip silently) from "broken lexicon" (report error). 14 + fn is_lexicon_content(content: &str) -> bool { 15 + // Quick string scan first (fast path for non-JSON or unrelated JSON) 16 + if !content.contains("\"lexicon\"") { 17 + return false; 18 + } 19 + 20 + // Parse to Value and check structure 21 + if let Ok(value) = serde_json::from_str::<serde_json::Value>(content) { 22 + // Top-level lexicon field 23 + if value.get("lexicon").is_some() { 24 + return true; 25 + } 26 + // One level down (some wrapper formats) 27 + if let Some(obj) = value.as_object() { 28 + for v in obj.values() { 29 + if v.get("lexicon").is_some() { 30 + return true; 31 + } 32 + } 33 + } 34 + } 35 + false 36 + } 37 + 38 + /// Raw lexicon doc for two-phase parsing - defs are kept as raw JSON Values 39 + /// so we can deserialize each separately with better error tracking. 40 + #[derive(Debug, serde::Deserialize)] 41 + struct RawLexiconDoc<'s> { 42 + pub lexicon: crate::lexicon::Lexicon, 43 + #[serde(borrow)] 44 + pub id: jacquard_common::CowStr<'s>, 45 + pub revision: Option<u32>, 46 + #[serde(borrow)] 47 + pub description: Option<jacquard_common::CowStr<'s>>, 48 + pub defs: BTreeMap<SmolStr, serde_json::Value>, 49 + } 50 + 51 + /// Helper to create a parse error with path context. 52 + fn make_parse_error( 53 + file_path: &Path, 54 + json_path: &str, 55 + message: String, 56 + content: &str, 57 + ) -> CodegenError { 58 + CodegenError::ParseError { 59 + path: file_path.to_path_buf(), 60 + json_path: Some(json_path.to_string()), 61 + message, 62 + src: Some(content.to_string()), 63 + span: None, 64 + } 65 + } 66 + 67 + /// Recursively parse properties with path tracking. 68 + /// Returns parsed properties or an error with the full path. 69 + fn parse_properties_deep( 70 + props_value: &serde_json::Value, 71 + base_path: &str, 72 + file_path: &Path, 73 + content: &str, 74 + ) -> std::result::Result<BTreeMap<SmolStr, crate::lexicon::LexObjectProperty<'static>>, CodegenError> 75 + { 76 + let props_obj = props_value.as_object().ok_or_else(|| { 77 + make_parse_error( 78 + file_path, 79 + base_path, 80 + "expected object for properties".to_string(), 81 + content, 82 + ) 83 + })?; 84 + 85 + let mut parsed_props = BTreeMap::new(); 86 + for (prop_name, prop_value) in props_obj { 87 + let prop_path = format!("{}.{}", base_path, prop_name); 88 + 89 + // Try to parse this property 90 + let parsed: crate::lexicon::LexObjectProperty = 91 + serde_path_to_error::deserialize(prop_value).map_err(|e| { 92 + let inner_path = e.path().to_string(); 93 + let full_path = if inner_path.is_empty() { 94 + prop_path.clone() 95 + } else { 96 + format!("{}.{}", prop_path, inner_path) 97 + }; 98 + make_parse_error(file_path, &full_path, e.inner().to_string(), content) 99 + })?; 100 + 101 + parsed_props.insert(SmolStr::new(prop_name), parsed.into_static()); 102 + } 103 + 104 + Ok(parsed_props) 105 + } 106 + 107 + /// Parse an object-like def with deep property tracking. 108 + fn parse_object_deep( 109 + value: &serde_json::Value, 110 + base_path: &str, 111 + file_path: &Path, 112 + content: &str, 113 + ) -> std::result::Result<crate::lexicon::LexObject<'static>, CodegenError> { 114 + use crate::lexicon::LexObject; 115 + 116 + let obj = value.as_object().ok_or_else(|| { 117 + make_parse_error(file_path, base_path, "expected object".to_string(), content) 118 + })?; 119 + 120 + // Parse properties deeply if present 121 + let properties = if let Some(props) = obj.get("properties") { 122 + let props_path = format!("{}.properties", base_path); 123 + parse_properties_deep(props, &props_path, file_path, content)? 124 + } else { 125 + BTreeMap::new() 126 + }; 127 + 128 + // Parse the rest of the object normally 129 + let description = obj 130 + .get("description") 131 + .and_then(|v| v.as_str()) 132 + .map(|s| jacquard_common::CowStr::copy_from_str(s)); 133 + let required: Option<Vec<SmolStr>> = obj 134 + .get("required") 135 + .map(|v| serde_json::from_value(v.clone())) 136 + .transpose() 137 + .map_err(|e| make_parse_error(file_path, &format!("{}.required", base_path), e.to_string(), content))?; 138 + let nullable: Option<Vec<SmolStr>> = obj 139 + .get("nullable") 140 + .map(|v| serde_json::from_value(v.clone())) 141 + .transpose() 142 + .map_err(|e| make_parse_error(file_path, &format!("{}.nullable", base_path), e.to_string(), content))?; 143 + 144 + Ok(LexObject { 145 + description, 146 + required, 147 + nullable, 148 + properties, 149 + }) 150 + } 151 + 152 + /// Parse a def with deep path tracking for nested structures. 153 + fn parse_def_deep( 154 + def_name: &str, 155 + value: &serde_json::Value, 156 + file_path: &Path, 157 + content: &str, 158 + ) -> std::result::Result<LexUserType<'static>, CodegenError> { 159 + let base_path = format!("defs.{}", def_name); 160 + 161 + // Check the type field to determine how to parse 162 + let type_str = value 163 + .get("type") 164 + .and_then(|v| v.as_str()) 165 + .unwrap_or("object"); 166 + 167 + match type_str { 168 + "object" => { 169 + let obj = parse_object_deep(value, &base_path, file_path, content)?; 170 + Ok(LexUserType::Object(obj)) 171 + } 172 + "record" => { 173 + // Records have a nested record.properties structure 174 + if let Some(record_value) = value.get("record") { 175 + let record_path = format!("{}.record", base_path); 176 + let inner_obj = parse_object_deep(record_value, &record_path, file_path, content)?; 177 + 178 + // Parse the rest of the record 179 + let obj = value.as_object().ok_or_else(|| { 180 + make_parse_error(file_path, &base_path, "expected object".to_string(), content) 181 + })?; 182 + 183 + let description = obj 184 + .get("description") 185 + .and_then(|v| v.as_str()) 186 + .map(|s| jacquard_common::CowStr::copy_from_str(s)); 187 + let key: Option<jacquard_common::CowStr<'static>> = obj 188 + .get("key") 189 + .and_then(|v| v.as_str()) 190 + .map(|s| jacquard_common::CowStr::copy_from_str(s)); 191 + 192 + Ok(LexUserType::Record(crate::lexicon::LexRecord { 193 + description, 194 + key, 195 + record: crate::lexicon::LexRecordRecord::Object(inner_obj), 196 + })) 197 + } else { 198 + // Fallback to normal parsing if no record field 199 + serde_path_to_error::deserialize(value) 200 + .map(|v: LexUserType| v.into_static()) 201 + .map_err(|e| make_parse_error(file_path, &base_path, e.inner().to_string(), content)) 202 + } 203 + } 204 + // For other types (query, procedure, etc.), use the simpler approach for now 205 + // Could be extended later 206 + _ => serde_path_to_error::deserialize(value) 207 + .map(|v: LexUserType| v.into_static()) 208 + .map_err(|e| { 209 + let inner_path = e.path().to_string(); 210 + let full_path = if inner_path.is_empty() { 211 + base_path 212 + } else { 213 + format!("{}.{}", base_path, inner_path) 214 + }; 215 + make_parse_error(file_path, &full_path, e.inner().to_string(), content) 216 + }), 217 + } 218 + } 219 + 220 + /// Parse a lexicon with rich error context using deep recursive parsing. 221 + /// 222 + /// This parses the document structure recursively, tracking paths through: 223 + /// - defs โ†’ def_name โ†’ properties โ†’ prop_name โ†’ nested fields 224 + /// 225 + /// This gives us detailed error paths like "defs.main.properties.count.default" 226 + fn parse_lexicon_with_context( 227 + content: &str, 228 + path: &Path, 229 + ) -> std::result::Result<LexiconDoc<'static>, CodegenError> { 230 + // Phase 1: Parse the top-level structure with defs as raw Values 231 + let raw_doc: RawLexiconDoc = serde_json::from_str(content).map_err(|e| { 232 + CodegenError::ParseError { 233 + path: path.to_path_buf(), 234 + json_path: None, 235 + message: e.to_string(), 236 + src: Some(content.to_string()), 237 + span: None, 238 + } 239 + })?; 240 + 241 + // Phase 2: Parse each def with deep path tracking 242 + let mut parsed_defs = BTreeMap::new(); 243 + for (def_name, def_value) in raw_doc.defs { 244 + let parsed_def = parse_def_deep(&def_name, &def_value, path, content)?; 245 + parsed_defs.insert(def_name, parsed_def); 246 + } 247 + 248 + // Reconstruct the full LexiconDoc 249 + Ok(LexiconDoc { 250 + lexicon: raw_doc.lexicon, 251 + id: raw_doc.id.into_static(), 252 + revision: raw_doc.revision, 253 + description: raw_doc.description.map(|d| d.into_static()), 254 + defs: parsed_defs, 255 + }) 256 + } 257 + 9 258 /// Registry of all loaded lexicons for reference resolution 10 259 #[derive(Debug, Clone)] 11 260 pub struct LexiconCorpus { ··· 32 281 for schema_path in schemas { 33 282 let content = fs::read_to_string(schema_path.as_ref())?; 34 283 35 - // Try to parse as lexicon doc - skip files that aren't lexicon schemas 36 - let doc: LexiconDoc = match serde_json::from_str(&content) { 37 - Ok(doc) => doc, 38 - Err(_) => continue, // Skip non-lexicon JSON files 39 - }; 284 + // Check if this file is trying to be a lexicon 285 + if !is_lexicon_content(&content) { 286 + // Not a lexicon, skip silently 287 + continue; 288 + } 289 + 290 + // This IS a lexicon - parse with good error reporting 291 + let doc = parse_lexicon_with_context(&content, schema_path.as_ref())?; 40 292 41 293 let nsid = SmolStr::from(doc.id.to_string()); 42 - corpus.docs.insert(nsid.clone(), doc.into_static()); 294 + corpus.docs.insert(nsid.clone(), doc); 43 295 corpus.sources.insert(nsid, content); 44 296 } 45 297 ··· 166 418 // Non-existing refs 167 419 assert!(!corpus.ref_exists("com.example.fake")); 168 420 assert!(!corpus.ref_exists("app.bsky.feed.post#nonexistent")); 421 + } 422 + 423 + #[test] 424 + fn test_non_lexicon_json_skipped_silently() { 425 + // The test_lexicons directory contains not_a_lexicon.json which should be skipped 426 + let corpus = LexiconCorpus::load_from_dir("tests/fixtures/test_lexicons") 427 + .expect("should succeed even with non-lexicon JSON files"); 428 + 429 + // The non-lexicon file should not be in the corpus 430 + assert!(corpus.get("some random config").is_none()); 431 + 432 + // But valid lexicons should still load 433 + assert!(corpus.get("app.bsky.feed.post").is_some()); 434 + } 435 + 436 + #[test] 437 + fn test_is_lexicon_content_detection() { 438 + // Not a lexicon - no "lexicon" key 439 + assert!(!is_lexicon_content(r#"{"name": "test", "version": "1.0"}"#)); 440 + 441 + // Not a lexicon - invalid JSON 442 + assert!(!is_lexicon_content("not json at all")); 443 + 444 + // Is a lexicon - has "lexicon" at top level 445 + assert!(is_lexicon_content(r#"{"lexicon": 1, "id": "test.foo"}"#)); 446 + 447 + // Is a lexicon - has "lexicon" one level down 448 + assert!(is_lexicon_content( 449 + r#"{"wrapper": {"lexicon": 1, "id": "test.foo"}}"# 450 + )); 451 + } 452 + 453 + #[test] 454 + fn test_broken_lexicon_returns_error_with_path() { 455 + let result = LexiconCorpus::load_from_dir("tests/fixtures/error_cases"); 456 + 457 + // Should fail because broken_lexicon.json is a lexicon (has "lexicon" key) 458 + // but has invalid structure 459 + let err = result.expect_err("should fail on broken lexicon"); 460 + let err_str = err.to_string(); 461 + 462 + // Error should include the full path to the broken property 463 + assert!( 464 + err_str.contains("defs.main.properties.count"), 465 + "error should contain path to the broken property, got: {}", 466 + err_str 467 + ); 468 + 469 + // Error should also include the actual error message 470 + assert!( 471 + err_str.contains("expected i64"), 472 + "error should describe the type mismatch, got: {}", 473 + err_str 474 + ); 475 + 476 + // Error should mention the file 477 + assert!( 478 + err_str.contains("broken_lexicon.json"), 479 + "error should mention the file, got: {}", 480 + err_str 481 + ); 169 482 } 170 483 }
+24 -19
crates/jacquard-lexicon/src/error.rs
··· 3 3 use std::path::PathBuf; 4 4 use thiserror::Error; 5 5 6 + fn format_parse_error(path: &PathBuf, json_path: Option<&str>, message: &str) -> String { 7 + match json_path { 8 + Some(jp) if !jp.is_empty() => { 9 + format!("failed to parse lexicon {}: at {}: {}", path.display(), jp, message) 10 + } 11 + _ => format!("failed to parse lexicon {}: {}", path.display(), message), 12 + } 13 + } 14 + 6 15 /// Errors that can occur during lexicon code generation 7 16 #[derive(Debug, Error, Diagnostic)] 8 17 #[non_exhaustive] ··· 12 21 Io(#[from] io::Error), 13 22 14 23 /// Failed to parse lexicon JSON 15 - #[error("Failed to parse lexicon JSON in {}", path.display())] 24 + #[error("{}", format_parse_error(path, json_path.as_deref(), message))] 16 25 #[diagnostic( 17 26 code(lexicon::parse_error), 18 27 help("Check that the lexicon file is valid JSON and follows the lexicon schema") 19 28 )] 20 29 ParseError { 21 - #[source] 22 - source: serde_json::Error, 23 30 /// Path to the file that failed to parse 24 31 path: PathBuf, 32 + /// JSON path where the error occurred (from serde_path_to_error) 33 + json_path: Option<String>, 34 + /// The underlying error message 35 + message: String, 25 36 /// Source text that failed to parse 26 37 #[source_code] 27 38 src: Option<String>, ··· 90 101 91 102 impl CodegenError { 92 103 /// Create a parse error with context 93 - pub fn parse_error(source: serde_json::Error, path: impl Into<PathBuf>) -> Self { 104 + pub fn parse_error(message: impl Into<String>, path: impl Into<PathBuf>) -> Self { 94 105 Self::ParseError { 95 - source, 96 106 path: path.into(), 107 + json_path: None, 108 + message: message.into(), 97 109 src: None, 98 110 span: None, 99 111 } 100 112 } 101 113 102 - /// Create a parse error with source text 103 - pub fn parse_error_with_source( 104 - source: serde_json::Error, 114 + /// Create a parse error with source text and JSON path 115 + pub fn parse_error_with_context( 116 + message: impl Into<String>, 105 117 path: impl Into<PathBuf>, 118 + json_path: Option<String>, 106 119 src: String, 107 120 ) -> Self { 108 - // Try to extract error location from serde_json error 109 - let span = if let Some(line) = source.line().checked_sub(1) { 110 - let col = source.column().saturating_sub(1); 111 - // Approximate byte offset (not perfect but good enough for display) 112 - Some((line * 80 + col, 1).into()) 113 - } else { 114 - None 115 - }; 116 - 117 121 Self::ParseError { 118 - source, 119 122 path: path.into(), 123 + json_path, 124 + message: message.into(), 120 125 src: Some(src), 121 - span, 126 + span: None, 122 127 } 123 128 } 124 129
+15
crates/jacquard-lexicon/tests/fixtures/error_cases/broken_lexicon.json
··· 1 + { 2 + "lexicon": 1, 3 + "id": "test.broken.lexicon", 4 + "defs": { 5 + "main": { 6 + "type": "object", 7 + "properties": { 8 + "count": { 9 + "type": "integer", 10 + "default": "not_a_number" 11 + } 12 + } 13 + } 14 + } 15 + }
+7
crates/jacquard-lexicon/tests/fixtures/test_lexicons/not_a_lexicon.json
··· 1 + { 2 + "name": "some random config", 3 + "version": "1.0.0", 4 + "settings": { 5 + "enabled": true 6 + } 7 + }