A better Rust ATProto crate

level 1 raw value enum + serde impl atproto!() macro (behaves like json!()) few little fixes

Orual 2760ad16 429fb2f9

Changed files
+1044 -15
crates
+2
crates/jacquard-common/Cargo.toml
··· 11 11 exclude.workspace = true 12 12 description.workspace = true 13 13 14 + 15 + 14 16 [dependencies] 15 17 base64 = "0.22.1" 16 18 bytes = "1.10.1"
+1 -2
crates/jacquard-common/src/lib.rs
··· 2 2 pub mod cowstr; 3 3 #[macro_use] 4 4 pub mod into_static; 5 - 5 + pub mod macros; 6 6 pub mod types; 7 7 8 8 pub use cowstr::CowStr; 9 9 pub use into_static::IntoStatic; 10 - 11 10 pub use smol_str; 12 11 pub use url;
+288
crates/jacquard-common/src/macros.rs
··· 1 + //! `atproto!` macro. 2 + /// Construct a atproto `Data<'_>` value from a literal. 3 + /// 4 + /// ``` 5 + /// # use jacquard_common::atproto; 6 + /// # 7 + /// let value = atproto!({ 8 + /// "code": 200, 9 + /// "success": true, 10 + /// "payload": { 11 + /// "features": [ 12 + /// "serde", 13 + /// "json" 14 + /// ] 15 + /// } 16 + /// }); 17 + /// ``` 18 + /// 19 + /// Variables or expressions can be interpolated into the ATProto literal. Any type 20 + /// interpolated into an array element or object value must implement Serde's 21 + /// `Serialize` trait, while any type interpolated into a object key must 22 + /// implement `Into<String>`. If the `Serialize` implementation of the 23 + /// interpolated type decides to fail, or if the interpolated type contains a 24 + /// map with non-string keys, the `atproto!` macro will panic. 25 + /// 26 + /// ``` 27 + /// # use jacquard_common::atproto; 28 + /// # 29 + /// let code = 200; 30 + /// let features = vec!["serde", "json"]; 31 + /// 32 + /// let value = atproto!({ 33 + /// "code": code, 34 + /// "success": code == 200, 35 + /// "payload": { 36 + /// features[0]: features[1] 37 + /// } 38 + /// }); 39 + /// ``` 40 + /// 41 + /// Trailing commas are allowed inside both arrays and objects. 42 + /// 43 + /// ``` 44 + /// # use jacquard_common::atproto; 45 + /// # 46 + /// let value = atproto!([ 47 + /// "notice", 48 + /// "the", 49 + /// "trailing", 50 + /// "comma -->", 51 + /// ]); 52 + /// ``` 53 + #[macro_export(local_inner_macros)] 54 + macro_rules! atproto { 55 + // Hide distracting implementation details from the generated rustdoc. 56 + ($($atproto:tt)+) => { 57 + atproto_internal!($($atproto)+) 58 + }; 59 + } 60 + 61 + #[macro_export(local_inner_macros)] 62 + #[doc(hidden)] 63 + macro_rules! atproto_internal { 64 + ////////////////////////////////////////////////////////////////////////// 65 + // TT muncher for parsing the inside of an array [...]. Produces a vec![...] 66 + // of the elements. 67 + // 68 + // Must be invoked as: atproto_internal!(@array [] $($tt)*) 69 + ////////////////////////////////////////////////////////////////////////// 70 + 71 + // Done with trailing comma. 72 + (@array [$($elems:expr,)*]) => { 73 + atproto_internal_vec![$($elems,)*] 74 + }; 75 + 76 + // Done without trailing comma. 77 + (@array [$($elems:expr),*]) => { 78 + atproto_internal_vec![$($elems),*] 79 + }; 80 + 81 + // Next element is `null`. 82 + (@array [$($elems:expr,)*] null $($rest:tt)*) => { 83 + atproto_internal!(@array [$($elems,)* atproto_internal!(null)] $($rest)*) 84 + }; 85 + 86 + // Next element is `true`. 87 + (@array [$($elems:expr,)*] true $($rest:tt)*) => { 88 + atproto_internal!(@array [$($elems,)* atproto_internal!(true)] $($rest)*) 89 + }; 90 + 91 + // Next element is `false`. 92 + (@array [$($elems:expr,)*] false $($rest:tt)*) => { 93 + atproto_internal!(@array [$($elems,)* atproto_internal!(false)] $($rest)*) 94 + }; 95 + 96 + // Next element is an array. 97 + (@array [$($elems:expr,)*] [$($array:tt)*] $($rest:tt)*) => { 98 + atproto_internal!(@array [$($elems,)* atproto_internal!([$($array)*])] $($rest)*) 99 + }; 100 + 101 + // Next element is a map. 102 + (@array [$($elems:expr,)*] {$($map:tt)*} $($rest:tt)*) => { 103 + atproto_internal!(@array [$($elems,)* atproto_internal!({$($map)*})] $($rest)*) 104 + }; 105 + 106 + // Next element is an expression followed by comma. 107 + (@array [$($elems:expr,)*] $next:expr, $($rest:tt)*) => { 108 + atproto_internal!(@array [$($elems,)* atproto_internal!($next),] $($rest)*) 109 + }; 110 + 111 + // Last element is an expression with no trailing comma. 112 + (@array [$($elems:expr,)*] $last:expr) => { 113 + atproto_internal!(@array [$($elems,)* atproto_internal!($last)]) 114 + }; 115 + 116 + // Comma after the most recent element. 117 + (@array [$($elems:expr),*] , $($rest:tt)*) => { 118 + atproto_internal!(@array [$($elems,)*] $($rest)*) 119 + }; 120 + 121 + // Unexpected token after most recent element. 122 + (@array [$($elems:expr),*] $unexpected:tt $($rest:tt)*) => { 123 + atproto_unexpected!($unexpected) 124 + }; 125 + 126 + ////////////////////////////////////////////////////////////////////////// 127 + // TT muncher for parsing the inside of an object {...}. Each entry is 128 + // inserted into the given map variable. 129 + // 130 + // Must be invoked as: atproto_internal!(@object $map () ($($tt)*) ($($tt)*)) 131 + // 132 + // We require two copies of the input tokens so that we can match on one 133 + // copy and trigger errors on the other copy. 134 + ////////////////////////////////////////////////////////////////////////// 135 + 136 + // Done. 137 + (@object $object:ident () () ()) => {}; 138 + 139 + // Insert the current entry followed by trailing comma. 140 + (@object $object:ident [$($key:tt)+] ($value:expr) , $($rest:tt)*) => { 141 + let _ = $object.insert(($($key)+).into(), $value); 142 + atproto_internal!(@object $object () ($($rest)*) ($($rest)*)); 143 + }; 144 + 145 + // Current entry followed by unexpected token. 146 + (@object $object:ident [$($key:tt)+] ($value:expr) $unexpected:tt $($rest:tt)*) => { 147 + atproto_unexpected!($unexpected); 148 + }; 149 + 150 + // Insert the last entry without trailing comma. 151 + (@object $object:ident [$($key:tt)+] ($value:expr)) => { 152 + let _ = $object.insert(($($key)+).into(), $value); 153 + }; 154 + 155 + // Next value is `null`. 156 + (@object $object:ident ($($key:tt)+) (: null $($rest:tt)*) $copy:tt) => { 157 + atproto_internal!(@object $object [$($key)+] (atproto_internal!(null)) $($rest)*); 158 + }; 159 + 160 + // Next value is `true`. 161 + (@object $object:ident ($($key:tt)+) (: true $($rest:tt)*) $copy:tt) => { 162 + atproto_internal!(@object $object [$($key)+] (atproto_internal!(true)) $($rest)*); 163 + }; 164 + 165 + // Next value is `false`. 166 + (@object $object:ident ($($key:tt)+) (: false $($rest:tt)*) $copy:tt) => { 167 + atproto_internal!(@object $object [$($key)+] (atproto_internal!(false)) $($rest)*); 168 + }; 169 + 170 + // Next value is an array. 171 + (@object $object:ident ($($key:tt)+) (: [$($array:tt)*] $($rest:tt)*) $copy:tt) => { 172 + atproto_internal!(@object $object [$($key)+] (atproto_internal!([$($array)*])) $($rest)*); 173 + }; 174 + 175 + // Next value is a map. 176 + (@object $object:ident ($($key:tt)+) (: {$($map:tt)*} $($rest:tt)*) $copy:tt) => { 177 + atproto_internal!(@object $object [$($key)+] (atproto_internal!({$($map)*})) $($rest)*); 178 + }; 179 + 180 + // Next value is an expression followed by comma. 181 + (@object $object:ident ($($key:tt)+) (: $value:expr , $($rest:tt)*) $copy:tt) => { 182 + atproto_internal!(@object $object [$($key)+] (atproto_internal!($value)) , $($rest)*); 183 + }; 184 + 185 + // Last value is an expression with no trailing comma. 186 + (@object $object:ident ($($key:tt)+) (: $value:expr) $copy:tt) => { 187 + atproto_internal!(@object $object [$($key)+] (atproto_internal!($value))); 188 + }; 189 + 190 + // Missing value for last entry. Trigger a reasonable error message. 191 + (@object $object:ident ($($key:tt)+) (:) $copy:tt) => { 192 + // "unexpected end of macro invocation" 193 + atproto_internal!(); 194 + }; 195 + 196 + // Missing colon and value for last entry. Trigger a reasonable error 197 + // message. 198 + (@object $object:ident ($($key:tt)+) () $copy:tt) => { 199 + // "unexpected end of macro invocation" 200 + atproto_internal!(); 201 + }; 202 + 203 + // Misplaced colon. Trigger a reasonable error message. 204 + (@object $object:ident () (: $($rest:tt)*) ($colon:tt $($copy:tt)*)) => { 205 + // Takes no arguments so "no rules expected the token `:`". 206 + atproto_unexpected!($colon); 207 + }; 208 + 209 + // Found a comma inside a key. Trigger a reasonable error message. 210 + (@object $object:ident ($($key:tt)*) (, $($rest:tt)*) ($comma:tt $($copy:tt)*)) => { 211 + // Takes no arguments so "no rules expected the token `,`". 212 + atproto_unexpected!($comma); 213 + }; 214 + 215 + // Key is fully parenthesized. This avoids clippy double_parens false 216 + // positives because the parenthesization may be necessary here. 217 + (@object $object:ident () (($key:expr) : $($rest:tt)*) $copy:tt) => { 218 + atproto_internal!(@object $object ($key) (: $($rest)*) (: $($rest)*)); 219 + }; 220 + 221 + // Munch a token into the current key. 222 + (@object $object:ident ($($key:tt)*) ($tt:tt $($rest:tt)*) $copy:tt) => { 223 + atproto_internal!(@object $object ($($key)* $tt) ($($rest)*) ($($rest)*)); 224 + }; 225 + 226 + ////////////////////////////////////////////////////////////////////////// 227 + // The main implementation. 228 + // 229 + // Must be invoked as: atproto_internal!($($atproto)+) 230 + ////////////////////////////////////////////////////////////////////////// 231 + 232 + (null) => { 233 + $crate::types::value::Data::Null 234 + }; 235 + 236 + (true) => { 237 + $crate::types::value::Data::Boolean(true) 238 + }; 239 + 240 + (false) => { 241 + $crate::types::value::Data::Boolean(false) 242 + }; 243 + 244 + ([]) => { 245 + $crate::types::value::Data::Array($crate::types::value::Array(atproto_internal_vec![])) 246 + }; 247 + 248 + ([ $($tt:tt)+ ]) => { 249 + $crate::types::value::Data::Array($crate::types::value::Array(atproto_internal!(@array [] $($tt)+))) 250 + }; 251 + 252 + ({}) => { 253 + $crate::types::value::Data::Object($crate::types::value::Object(::std::collections::BTreeMap::new())) 254 + }; 255 + 256 + ({ $($tt:tt)+ }) => { 257 + $crate::types::value::Data::Object($crate::types::value::Object({ 258 + let mut object = ::std::collections::BTreeMap::new(); 259 + atproto_internal!(@object object () ($($tt)+) ($($tt)+)); 260 + object 261 + })) 262 + }; 263 + 264 + // Any Serialize type: numbers, strings, struct literals, variables etc. 265 + // Must be below every other rule. 266 + ($other:expr) => { 267 + { 268 + $crate::types::value::Data::from($other) 269 + } 270 + }; 271 + } 272 + 273 + // The atproto_internal macro above cannot invoke vec directly because it uses 274 + // local_inner_macros. A vec invocation there would resolve to $crate::vec. 275 + // Instead invoke vec here outside of local_inner_macros. 276 + #[macro_export] 277 + #[doc(hidden)] 278 + macro_rules! atproto_internal_vec { 279 + ($($content:tt)*) => { 280 + ::std::vec![$($content)*] 281 + }; 282 + } 283 + 284 + #[macro_export] 285 + #[doc(hidden)] 286 + macro_rules! atproto_unexpected { 287 + () => {}; 288 + }
+4 -1
crates/jacquard-common/src/types/cid.rs
··· 414 414 fn cidlink_serialize_json() { 415 415 let link = CidLink::str(TEST_CID); 416 416 let json = serde_json::to_string(&link).unwrap(); 417 - assert_eq!(json, r#"{"$link":"bafyreih4g7bvo6hdq2juolev5bfzpbo4ewkxh5mzxwgvkjp3kitc6hqkha"}"#); 417 + assert_eq!( 418 + json, 419 + r#"{"$link":"bafyreih4g7bvo6hdq2juolev5bfzpbo4ewkxh5mzxwgvkjp3kitc6hqkha"}"# 420 + ); 418 421 } 419 422 420 423 #[test]
+4 -1
crates/jacquard-common/src/types/did.rs
··· 218 218 219 219 #[test] 220 220 fn prefix_stripping() { 221 - assert_eq!(Did::new("at://did:plc:foo").unwrap().as_str(), "did:plc:foo"); 221 + assert_eq!( 222 + Did::new("at://did:plc:foo").unwrap().as_str(), 223 + "did:plc:foo" 224 + ); 222 225 assert_eq!(Did::new("did:plc:foo").unwrap().as_str(), "did:plc:foo"); 223 226 } 224 227
+10 -2
crates/jacquard-common/src/types/handle.rs
··· 29 29 .unwrap_or(handle); 30 30 31 31 if stripped.len() > 253 { 32 - Err(AtStrError::too_long("handle", stripped, 253, stripped.len())) 32 + Err(AtStrError::too_long( 33 + "handle", 34 + stripped, 35 + 253, 36 + stripped.len(), 37 + )) 33 38 } else if !HANDLE_REGEX.is_match(stripped) { 34 39 Err(AtStrError::regex( 35 40 "handle", ··· 224 229 #[test] 225 230 fn prefix_stripping() { 226 231 assert_eq!(Handle::new("@alice.test").unwrap().as_str(), "alice.test"); 227 - assert_eq!(Handle::new("at://alice.test").unwrap().as_str(), "alice.test"); 232 + assert_eq!( 233 + Handle::new("at://alice.test").unwrap().as_str(), 234 + "alice.test" 235 + ); 228 236 assert_eq!(Handle::new("alice.test").unwrap().as_str(), "alice.test"); 229 237 } 230 238
+26
crates/jacquard-common/src/types/string.rs
··· 195 195 } 196 196 } 197 197 198 + impl From<AtprotoStr<'_>> for String { 199 + fn from(value: AtprotoStr<'_>) -> Self { 200 + match value { 201 + AtprotoStr::AtIdentifier(ident) => ident.to_string(), 202 + AtprotoStr::AtUri(at_uri) => at_uri.to_string(), 203 + AtprotoStr::Uri(uri) => match uri { 204 + Uri::At(at_uri) => at_uri.to_string(), 205 + Uri::Cid(cid) => cid.to_string(), 206 + Uri::Did(did) => did.to_string(), 207 + Uri::Https(url) => url.to_string(), 208 + Uri::Wss(url) => url.to_string(), 209 + Uri::Any(cow_str) => cow_str.to_string(), 210 + }, 211 + AtprotoStr::Cid(cid) => cid.to_string(), 212 + AtprotoStr::RecordKey(record_key) => record_key.as_ref().to_string(), 213 + AtprotoStr::String(cow_str) => cow_str.to_string(), 214 + AtprotoStr::Datetime(datetime) => datetime.to_string(), 215 + AtprotoStr::Language(language) => language.to_string(), 216 + AtprotoStr::Tid(tid) => tid.to_string(), 217 + AtprotoStr::Nsid(nsid) => nsid.to_string(), 218 + AtprotoStr::Did(did) => did.to_string(), 219 + AtprotoStr::Handle(handle) => handle.to_string(), 220 + } 221 + } 222 + } 223 + 198 224 /// Parsing Error for atproto string types which don't have third-party specs 199 225 /// (e.g. datetime, CIDs, language tags). 200 226 ///
+61 -3
crates/jacquard-common/src/types/value.rs
··· 1 - use crate::types::{DataModelType, blob::Blob, string::*}; 1 + use crate::types::{DataModelType, LexiconStringType, UriType, blob::Blob, string::*}; 2 2 use bytes::Bytes; 3 3 use ipld_core::ipld::Ipld; 4 4 use smol_str::{SmolStr, ToSmolStr}; 5 5 use std::collections::BTreeMap; 6 6 7 + pub mod convert; 7 8 pub mod parsing; 8 9 pub mod serde_impl; 9 10 ··· 30 31 } 31 32 32 33 impl<'s> Data<'s> { 34 + pub fn data_type(&self) -> DataModelType { 35 + match self { 36 + Data::Null => DataModelType::Null, 37 + Data::Boolean(_) => DataModelType::Boolean, 38 + Data::Integer(_) => DataModelType::Integer, 39 + Data::String(s) => match s { 40 + AtprotoStr::Datetime(_) => DataModelType::String(LexiconStringType::Datetime), 41 + AtprotoStr::Language(_) => DataModelType::String(LexiconStringType::Language), 42 + AtprotoStr::Tid(_) => DataModelType::String(LexiconStringType::Tid), 43 + AtprotoStr::Nsid(_) => DataModelType::String(LexiconStringType::Nsid), 44 + AtprotoStr::Did(_) => DataModelType::String(LexiconStringType::Did), 45 + AtprotoStr::Handle(_) => DataModelType::String(LexiconStringType::Handle), 46 + AtprotoStr::AtIdentifier(_) => { 47 + DataModelType::String(LexiconStringType::AtIdentifier) 48 + } 49 + AtprotoStr::AtUri(_) => DataModelType::String(LexiconStringType::AtUri), 50 + AtprotoStr::Uri(uri) => match uri { 51 + Uri::Did(_) => DataModelType::String(LexiconStringType::Uri(UriType::Did)), 52 + Uri::At(_) => DataModelType::String(LexiconStringType::Uri(UriType::At)), 53 + Uri::Https(_) => DataModelType::String(LexiconStringType::Uri(UriType::Https)), 54 + Uri::Wss(_) => DataModelType::String(LexiconStringType::Uri(UriType::Wss)), 55 + Uri::Cid(_) => DataModelType::String(LexiconStringType::Uri(UriType::Cid)), 56 + Uri::Any(_) => DataModelType::String(LexiconStringType::Uri(UriType::Any)), 57 + }, 58 + AtprotoStr::Cid(_) => DataModelType::String(LexiconStringType::Cid), 59 + AtprotoStr::RecordKey(_) => DataModelType::String(LexiconStringType::RecordKey), 60 + AtprotoStr::String(_) => DataModelType::String(LexiconStringType::String), 61 + }, 62 + Data::Bytes(_) => DataModelType::Bytes, 63 + Data::CidLink(_) => DataModelType::CidLink, 64 + Data::Array(_) => DataModelType::Array, 65 + Data::Object(_) => DataModelType::Object, 66 + Data::Blob(_) => DataModelType::Blob, 67 + } 68 + } 33 69 pub fn from_json(json: &'s serde_json::Value) -> Result<Self, AtDataError> { 34 70 Ok(if let Some(value) = json.as_bool() { 35 71 Self::Boolean(value) 36 72 } else if let Some(value) = json.as_i64() { 37 73 Self::Integer(value) 38 74 } else if let Some(value) = json.as_str() { 39 - Self::String(AtprotoStr::new(value)) 75 + Self::String(parsing::parse_string(value)) 40 76 } else if let Some(value) = json.as_array() { 41 77 Self::Array(Array::from_json(value)?) 42 78 } else if let Some(value) = json.as_object() { ··· 56 92 Ipld::Float(_) => { 57 93 return Err(AtDataError::FloatNotAllowed); 58 94 } 59 - Ipld::String(string) => Self::String(AtprotoStr::new(string)), 95 + Ipld::String(string) => Self::String(parsing::parse_string(string)), 60 96 Ipld::Bytes(items) => Self::Bytes(Bytes::copy_from_slice(items.as_slice())), 61 97 Ipld::List(iplds) => Self::Array(Array::from_cbor(iplds)?), 62 98 Ipld::Map(btree_map) => Object::from_cbor(btree_map)?, ··· 210 246 Ok(Data::Object(Object(map))) 211 247 } 212 248 } 249 + 250 + /// Level 1 deserialization of raw atproto data 251 + /// 252 + /// Maximally permissive with zero inference for cases where you just want to pass through the data 253 + /// and don't necessarily care if it's totally valid, or you want to validate later. 254 + /// E.g. lower-level services, PDS implementations, firehose indexers, relay implementations. 255 + #[derive(Debug, Clone, PartialEq, Eq)] 256 + pub enum RawData<'s> { 257 + Null, 258 + Boolean(bool), 259 + SignedInt(i64), 260 + UnsignedInt(u64), 261 + String(CowStr<'s>), 262 + Bytes(Bytes), 263 + CidLink(Cid<'s>), 264 + Array(Vec<RawData<'s>>), 265 + Object(BTreeMap<SmolStr, RawData<'s>>), 266 + Blob(Blob<'s>), 267 + InvalidBlob(Box<RawData<'s>>), 268 + InvalidNumber(Bytes), 269 + InvalidData(Bytes), 270 + }
+269
crates/jacquard-common/src/types/value/convert.rs
··· 1 + use core::{any::TypeId, fmt}; 2 + use std::{borrow::ToOwned, boxed::Box, collections::BTreeMap, vec::Vec}; 3 + 4 + use crate::{ 5 + CowStr, 6 + types::{ 7 + DataModelType, 8 + cid::Cid, 9 + string::AtprotoStr, 10 + value::{Array, Data, Object}, 11 + }, 12 + }; 13 + use bytes::Bytes; 14 + use smol_str::SmolStr; 15 + 16 + /// Error used for converting from and into [`crate::types::value::Data`]. 17 + #[derive(Clone, Debug)] 18 + #[non_exhaustive] 19 + pub enum ConversionError { 20 + /// Error when the Atproto data type wasn't the one we expected. 21 + WrongAtprotoType { 22 + /// The expected type. 23 + expected: DataModelType, 24 + /// The actual type. 25 + found: DataModelType, 26 + }, 27 + /// Error when the given Atproto data type cannot be converted into a certain value type. 28 + FromAtprotoData { 29 + /// The Atproto data type trying to convert from. 30 + from: DataModelType, 31 + /// The type trying to convert into. 32 + into: TypeId, 33 + }, 34 + } 35 + 36 + impl fmt::Display for ConversionError { 37 + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { 38 + match self { 39 + Self::WrongAtprotoType { expected, found } => { 40 + write!( 41 + formatter, 42 + "kind error: expected {:?} but found {:?}", 43 + expected, found 44 + ) 45 + } 46 + Self::FromAtprotoData { from, into } => { 47 + write!( 48 + formatter, 49 + "conversion error: cannot convert {:?} into {:?}", 50 + from, into 51 + ) 52 + } 53 + } 54 + } 55 + } 56 + 57 + impl std::error::Error for ConversionError {} 58 + 59 + impl TryFrom<Data<'_>> for () { 60 + type Error = ConversionError; 61 + 62 + fn try_from(ipld: Data) -> Result<Self, Self::Error> { 63 + match ipld { 64 + Data::Null => Ok(()), 65 + _ => Err(ConversionError::WrongAtprotoType { 66 + expected: DataModelType::Null, 67 + found: ipld.data_type(), 68 + }), 69 + } 70 + } 71 + } 72 + 73 + macro_rules! derive_try_from_atproto_option { 74 + ($enum:ident, $ty:ty) => { 75 + impl TryFrom<Data<'static>> for Option<$ty> { 76 + type Error = ConversionError; 77 + 78 + fn try_from(ipld: Data<'static>) -> Result<Self, Self::Error> { 79 + match ipld { 80 + Data::Null => Ok(None), 81 + Data::$enum(value) => Ok(Some(value.try_into().map_err(|_| { 82 + ConversionError::FromAtprotoData { 83 + from: DataModelType::$enum, 84 + into: TypeId::of::<$ty>(), 85 + } 86 + })?)), 87 + _ => Err(ConversionError::WrongAtprotoType { 88 + expected: DataModelType::$enum, 89 + found: ipld.data_type(), 90 + }), 91 + } 92 + } 93 + } 94 + }; 95 + } 96 + 97 + macro_rules! derive_try_from_atproto { 98 + ($enum:ident, $ty:ty) => { 99 + impl TryFrom<Data<'static>> for $ty { 100 + type Error = ConversionError; 101 + 102 + fn try_from(ipld: Data<'static>) -> Result<Self, Self::Error> { 103 + match ipld { 104 + Data::$enum(value) => { 105 + Ok(value 106 + .try_into() 107 + .map_err(|_| ConversionError::FromAtprotoData { 108 + from: DataModelType::$enum, 109 + into: TypeId::of::<$ty>(), 110 + })?) 111 + } 112 + 113 + _ => Err(ConversionError::WrongAtprotoType { 114 + expected: DataModelType::$enum, 115 + found: ipld.data_type(), 116 + }), 117 + } 118 + } 119 + } 120 + }; 121 + } 122 + 123 + macro_rules! derive_into_atproto_prim { 124 + ($enum:ident, $ty:ty, $fn:ident) => { 125 + impl<'s> From<$ty> for Data<'s> { 126 + fn from(t: $ty) -> Self { 127 + Data::$enum(t.$fn() as _) 128 + } 129 + } 130 + }; 131 + } 132 + 133 + macro_rules! derive_into_atproto { 134 + ($enum:ident, $ty:ty, $($fn:ident),*) => { 135 + impl<'s> From<$ty> for Data<'s> { 136 + fn from(t: $ty) -> Self { 137 + Data::$enum(t$(.$fn())*) 138 + } 139 + } 140 + }; 141 + } 142 + 143 + impl From<String> for Data<'_> { 144 + fn from(t: String) -> Self { 145 + Data::String(AtprotoStr::new_owned(t)) 146 + } 147 + } 148 + 149 + impl From<&str> for Data<'_> { 150 + fn from(t: &str) -> Self { 151 + Data::String(AtprotoStr::new_owned(t)) 152 + } 153 + } 154 + 155 + impl From<&[u8]> for Data<'_> { 156 + fn from(t: &[u8]) -> Self { 157 + Data::Bytes(Bytes::copy_from_slice(t)) 158 + } 159 + } 160 + 161 + impl<'s> TryFrom<Data<'s>> for Option<String> { 162 + type Error = ConversionError; 163 + 164 + fn try_from(ipld: Data<'s>) -> Result<Self, Self::Error> { 165 + match ipld { 166 + Data::Null => Ok(None), 167 + Data::String(value) => Ok(Some(value.try_into().map_err(|_| { 168 + ConversionError::FromAtprotoData { 169 + from: DataModelType::String(crate::types::LexiconStringType::String), 170 + into: TypeId::of::<String>(), 171 + } 172 + })?)), 173 + _ => Err(ConversionError::WrongAtprotoType { 174 + expected: DataModelType::String(crate::types::LexiconStringType::String), 175 + found: ipld.data_type(), 176 + }), 177 + } 178 + } 179 + } 180 + 181 + impl<'s> TryFrom<Data<'s>> for String { 182 + type Error = ConversionError; 183 + 184 + fn try_from(ipld: Data<'s>) -> Result<Self, Self::Error> { 185 + match ipld { 186 + Data::String(value) => { 187 + Ok(value 188 + .try_into() 189 + .map_err(|_| ConversionError::FromAtprotoData { 190 + from: DataModelType::String(crate::types::LexiconStringType::String), 191 + into: TypeId::of::<String>(), 192 + })?) 193 + } 194 + 195 + _ => Err(ConversionError::WrongAtprotoType { 196 + expected: DataModelType::String(crate::types::LexiconStringType::String), 197 + found: ipld.data_type(), 198 + }), 199 + } 200 + } 201 + } 202 + 203 + impl<'s> From<Vec<Data<'s>>> for Array<'s> { 204 + fn from(value: Vec<Data<'s>>) -> Self { 205 + Array(value) 206 + } 207 + } 208 + 209 + impl<'s> From<BTreeMap<SmolStr, Data<'s>>> for Object<'s> { 210 + fn from(value: BTreeMap<SmolStr, Data<'s>>) -> Self { 211 + Object(value) 212 + } 213 + } 214 + 215 + derive_into_atproto!(Boolean, bool, clone); 216 + derive_into_atproto_prim!(Integer, i8, clone); 217 + derive_into_atproto_prim!(Integer, i16, clone); 218 + derive_into_atproto_prim!(Integer, i32, clone); 219 + derive_into_atproto_prim!(Integer, i64, clone); 220 + derive_into_atproto_prim!(Integer, i128, clone); 221 + derive_into_atproto_prim!(Integer, isize, clone); 222 + derive_into_atproto_prim!(Integer, u8, clone); 223 + derive_into_atproto_prim!(Integer, u16, clone); 224 + derive_into_atproto_prim!(Integer, u32, clone); 225 + derive_into_atproto_prim!(Integer, u64, clone); 226 + derive_into_atproto_prim!(Integer, usize, clone); 227 + derive_into_atproto!(Bytes, Box<[u8]>, into); 228 + derive_into_atproto!(Bytes, Vec<u8>, into); 229 + derive_into_atproto!(Array, Array<'s>, into); 230 + derive_into_atproto!(Object, Object<'s>, to_owned); 231 + 232 + derive_into_atproto!(CidLink, Cid<'s>, clone); 233 + derive_into_atproto!(CidLink, &Cid<'s>, to_owned); 234 + 235 + derive_try_from_atproto!(Boolean, bool); 236 + derive_try_from_atproto!(Integer, i8); 237 + derive_try_from_atproto!(Integer, i16); 238 + derive_try_from_atproto!(Integer, i32); 239 + derive_try_from_atproto!(Integer, i64); 240 + derive_try_from_atproto!(Integer, i128); 241 + derive_try_from_atproto!(Integer, isize); 242 + derive_try_from_atproto!(Integer, u8); 243 + derive_try_from_atproto!(Integer, u16); 244 + derive_try_from_atproto!(Integer, u32); 245 + derive_try_from_atproto!(Integer, u64); 246 + derive_try_from_atproto!(Integer, u128); 247 + derive_try_from_atproto!(Integer, usize); 248 + derive_try_from_atproto!(Bytes, Vec<u8>); 249 + derive_try_from_atproto!(Object, Object<'static>); 250 + derive_try_from_atproto!(CidLink, Cid<'static>); 251 + 252 + derive_try_from_atproto_option!(Boolean, bool); 253 + derive_try_from_atproto_option!(Integer, i8); 254 + derive_try_from_atproto_option!(Integer, i16); 255 + derive_try_from_atproto_option!(Integer, i32); 256 + derive_try_from_atproto_option!(Integer, i64); 257 + derive_try_from_atproto_option!(Integer, i128); 258 + derive_try_from_atproto_option!(Integer, isize); 259 + derive_try_from_atproto_option!(Integer, u8); 260 + derive_try_from_atproto_option!(Integer, u16); 261 + derive_try_from_atproto_option!(Integer, u32); 262 + derive_try_from_atproto_option!(Integer, u64); 263 + derive_try_from_atproto_option!(Integer, u128); 264 + derive_try_from_atproto_option!(Integer, usize); 265 + 266 + derive_try_from_atproto_option!(Bytes, Vec<u8>); 267 + derive_try_from_atproto_option!(Array, Array<'static>); 268 + derive_try_from_atproto_option!(Object, Object<'static>); 269 + derive_try_from_atproto_option!(CidLink, Cid<'static>);
+16 -1
crates/jacquard-common/src/types/value/parsing.rs
··· 4 4 DataModelType, LexiconStringType, UriType, 5 5 blob::{Blob, MimeType}, 6 6 string::*, 7 - value::{AtDataError, Data}, 7 + value::{AtDataError, Data, RawData}, 8 8 }, 9 9 }; 10 10 use base64::{ ··· 318 318 Data::String(AtprotoStr::String(CowStr::Borrowed(bytes).into_static())) 319 319 } 320 320 } 321 + 322 + pub fn decode_raw_bytes<'s>(bytes: &str) -> RawData<'s> { 323 + // First one should just work. rest are insurance. 324 + if let Ok(bytes) = BASE64_STANDARD.decode(bytes) { 325 + RawData::Bytes(Bytes::from_owner(bytes)) 326 + } else if let Ok(bytes) = BASE64_STANDARD_NO_PAD.decode(bytes) { 327 + RawData::Bytes(Bytes::from_owner(bytes)) 328 + } else if let Ok(bytes) = BASE64_URL_SAFE.decode(bytes) { 329 + RawData::Bytes(Bytes::from_owner(bytes)) 330 + } else if let Ok(bytes) = BASE64_URL_SAFE_NO_PAD.decode(bytes) { 331 + RawData::Bytes(Bytes::from_owner(bytes)) 332 + } else { 333 + RawData::String(CowStr::Borrowed(bytes).into_static()) 334 + } 335 + }
+363 -5
crates/jacquard-common/src/types/value/serde_impl.rs
··· 3 3 4 4 use base64::{Engine, prelude::BASE64_STANDARD}; 5 5 use bytes::Bytes; 6 - use serde::{Deserialize, Deserializer, Serialize, Serializer}; 6 + use serde::{Deserialize, Deserializer, Serialize, Serializer, de::VariantAccess}; 7 7 use smol_str::SmolStr; 8 8 9 9 use crate::{ ··· 13 13 blob::{Blob, MimeType}, 14 14 string::*, 15 15 value::{ 16 - Array, AtDataError, Data, Object, 17 - parsing::{decode_bytes, infer_from_type, parse_string, string_key_type_guess}, 16 + Array, AtDataError, Data, Object, RawData, 17 + parsing::{ 18 + decode_bytes, decode_raw_bytes, infer_from_type, parse_string, 19 + string_key_type_guess, 20 + }, 18 21 }, 19 22 }, 20 23 }; ··· 61 64 } 62 65 63 66 impl<'de> Deserialize<'de> for Data<'de> { 67 + /// Currently only works for self-describing formats 68 + /// Thankfully the supported atproto data formats are both self-describing (json and dag-cbor). 69 + /// TODO: see if there's any way to make this work with Postcard. 64 70 fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> 65 71 where 66 72 D: Deserializer<'de>, ··· 85 91 Ok(Data::Null) 86 92 } 87 93 94 + fn visit_some<D>(self, deserializer: D) -> Result<Self::Value, D::Error> 95 + where 96 + D: Deserializer<'v>, 97 + { 98 + Ok(deserializer.deserialize_any(self)?) 99 + } 100 + 88 101 fn visit_unit<E>(self) -> Result<Self::Value, E> 89 102 where 90 103 E: serde::de::Error, ··· 110 123 where 111 124 E: serde::de::Error, 112 125 { 113 - Ok(Data::Integer(v as i64)) 126 + Ok(Data::Integer((v % (i64::MAX as u64)) as i64)) 114 127 } 115 128 116 129 fn visit_f64<E>(self, _v: f64) -> Result<Self::Value, E> ··· 154 167 Ok(Data::Bytes(Bytes::copy_from_slice(v))) 155 168 } 156 169 170 + fn visit_borrowed_bytes<E>(self, v: &'v [u8]) -> Result<Self::Value, E> 171 + where 172 + E: serde::de::Error, 173 + { 174 + Ok(Data::Bytes(Bytes::copy_from_slice(v))) 175 + } 176 + 177 + fn visit_byte_buf<E>(self, v: Vec<u8>) -> Result<Self::Value, E> 178 + where 179 + E: serde::de::Error, 180 + { 181 + Ok(Data::Bytes(Bytes::from_owner(v))) 182 + } 183 + 184 + fn visit_enum<A>(self, data: A) -> Result<Self::Value, A::Error> 185 + where 186 + A: serde::de::EnumAccess<'v>, 187 + { 188 + match data.variant::<SmolStr>() { 189 + Ok((key, value)) => { 190 + let mut map = BTreeMap::new(); 191 + if let Ok(variant) = value.newtype_variant::<Data>() { 192 + map.insert(key, variant); 193 + } 194 + Ok(Data::Object(Object(map))) 195 + } 196 + Err(e) => Err(e), 197 + } 198 + } 199 + 157 200 fn visit_seq<A>(self, mut seq: A) -> Result<Self::Value, A::Error> 158 201 where 159 202 A: serde::de::SeqAccess<'v>, ··· 163 206 array.push(elem); 164 207 } 165 208 Ok(Data::Array(Array(array))) 209 + } 210 + 211 + fn visit_newtype_struct<D>(self, deserializer: D) -> Result<Self::Value, D::Error> 212 + where 213 + D: Deserializer<'v>, 214 + { 215 + deserializer.deserialize_map(self) 166 216 } 167 217 168 218 fn visit_map<A>(self, mut map: A) -> Result<Self::Value, A::Error> ··· 236 286 237 287 // Check for blob 238 288 if let Some(type_str) = type_field { 239 - if type_str == "blob" && infer_from_type(type_str) == DataModelType::Blob { 289 + if infer_from_type(type_str) == DataModelType::Blob { 240 290 // Try to construct blob from the collected data 241 291 let ref_cid = map.get("ref").and_then(|v| { 242 292 if let Data::CidLink(cid) = v { ··· 388 438 } 389 439 } 390 440 } 441 + 442 + impl Serialize for RawData<'_> { 443 + fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> 444 + where 445 + S: Serializer, 446 + { 447 + match self { 448 + RawData::Null => serializer.serialize_none(), 449 + RawData::Boolean(b) => serializer.serialize_bool(*b), 450 + RawData::SignedInt(i) => serializer.serialize_i64(*i), 451 + RawData::UnsignedInt(u) => serializer.serialize_u64(*u), 452 + RawData::String(s) => serializer.serialize_str(&s), 453 + RawData::Bytes(bytes) => { 454 + if serializer.is_human_readable() { 455 + // JSON: {"$bytes": "base64 string"} 456 + use serde::ser::SerializeMap; 457 + let mut map = serializer.serialize_map(Some(1))?; 458 + map.serialize_entry("$bytes", &BASE64_STANDARD.encode(bytes))?; 459 + map.end() 460 + } else { 461 + // CBOR: raw bytes 462 + serializer.serialize_bytes(bytes) 463 + } 464 + } 465 + RawData::CidLink(cid) => { 466 + if serializer.is_human_readable() { 467 + // JSON: {"$link": "cid_string"} 468 + use serde::ser::SerializeMap; 469 + let mut map = serializer.serialize_map(Some(1))?; 470 + map.serialize_entry("$link", cid.as_str())?; 471 + map.end() 472 + } else { 473 + // CBOR: raw cid (Cid's serialize handles this) 474 + cid.serialize(serializer) 475 + } 476 + } 477 + RawData::Array(arr) => arr.serialize(serializer), 478 + RawData::Object(obj) => obj.serialize(serializer), 479 + RawData::Blob(blob) => blob.serialize(serializer), 480 + RawData::InvalidBlob(raw_data) => raw_data.serialize(serializer), 481 + RawData::InvalidNumber(bytes) => serializer.serialize_bytes(bytes), 482 + RawData::InvalidData(bytes) => serializer.serialize_bytes(bytes), 483 + } 484 + } 485 + } 486 + 487 + impl<'de> Deserialize<'de> for RawData<'de> { 488 + /// Currently only works for self-describing formats 489 + /// Thankfully the supported atproto data formats are both self-describing (json and dag-cbor). 490 + /// TODO: see if there's any way to make this work with Postcard. 491 + fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> 492 + where 493 + D: Deserializer<'de>, 494 + { 495 + deserializer.deserialize_any(RawDataVisitor) 496 + } 497 + } 498 + 499 + struct RawDataVisitor; 500 + 501 + impl<'de: 'v, 'v> serde::de::Visitor<'v> for RawDataVisitor { 502 + type Value = RawData<'v>; 503 + 504 + fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { 505 + formatter.write_str("any valid AT Protocol data value") 506 + } 507 + 508 + fn visit_none<E>(self) -> Result<Self::Value, E> 509 + where 510 + E: serde::de::Error, 511 + { 512 + Ok(RawData::Null) 513 + } 514 + 515 + fn visit_some<D>(self, deserializer: D) -> Result<Self::Value, D::Error> 516 + where 517 + D: Deserializer<'v>, 518 + { 519 + Ok(deserializer.deserialize_option(self)?) 520 + } 521 + 522 + fn visit_unit<E>(self) -> Result<Self::Value, E> 523 + where 524 + E: serde::de::Error, 525 + { 526 + Ok(RawData::Null) 527 + } 528 + 529 + fn visit_bool<E>(self, v: bool) -> Result<Self::Value, E> 530 + where 531 + E: serde::de::Error, 532 + { 533 + Ok(RawData::Boolean(v)) 534 + } 535 + 536 + fn visit_i64<E>(self, v: i64) -> Result<Self::Value, E> 537 + where 538 + E: serde::de::Error, 539 + { 540 + Ok(RawData::SignedInt(v)) 541 + } 542 + 543 + fn visit_u64<E>(self, v: u64) -> Result<Self::Value, E> 544 + where 545 + E: serde::de::Error, 546 + { 547 + Ok(RawData::UnsignedInt(v)) 548 + } 549 + 550 + fn visit_f64<E>(self, v: f64) -> Result<Self::Value, E> 551 + where 552 + E: serde::de::Error, 553 + { 554 + Ok(RawData::InvalidNumber(Bytes::from_owner(v.to_be_bytes()))) 555 + } 556 + 557 + fn visit_str<E>(self, v: &str) -> Result<Self::Value, E> 558 + where 559 + E: serde::de::Error, 560 + { 561 + Ok(RawData::String(CowStr::Borrowed(v).into_static())) 562 + } 563 + 564 + fn visit_borrowed_str<E>(self, v: &'v str) -> Result<Self::Value, E> 565 + where 566 + E: serde::de::Error, 567 + { 568 + Ok(RawData::String(v.into())) 569 + } 570 + 571 + fn visit_string<E>(self, v: String) -> Result<Self::Value, E> 572 + where 573 + E: serde::de::Error, 574 + { 575 + Ok(RawData::String(v.into())) 576 + } 577 + 578 + fn visit_bytes<E>(self, v: &[u8]) -> Result<Self::Value, E> 579 + where 580 + E: serde::de::Error, 581 + { 582 + Ok(RawData::Bytes(Bytes::copy_from_slice(v))) 583 + } 584 + 585 + fn visit_borrowed_bytes<E>(self, v: &'v [u8]) -> Result<Self::Value, E> 586 + where 587 + E: serde::de::Error, 588 + { 589 + Ok(RawData::Bytes(Bytes::copy_from_slice(v))) 590 + } 591 + 592 + fn visit_byte_buf<E>(self, v: Vec<u8>) -> Result<Self::Value, E> 593 + where 594 + E: serde::de::Error, 595 + { 596 + Ok(RawData::Bytes(Bytes::from_owner(v))) 597 + } 598 + 599 + // check on this, feels weird 600 + fn visit_enum<A>(self, data: A) -> Result<Self::Value, A::Error> 601 + where 602 + A: serde::de::EnumAccess<'v>, 603 + { 604 + match data.variant::<SmolStr>() { 605 + Ok((key, value)) => { 606 + let mut map = BTreeMap::new(); 607 + if let Ok(variant) = value.newtype_variant::<RawData>() { 608 + map.insert(key, variant); 609 + } 610 + Ok(RawData::Object(map)) 611 + } 612 + Err(e) => Err(e), 613 + } 614 + } 615 + 616 + fn visit_seq<A>(self, mut seq: A) -> Result<Self::Value, A::Error> 617 + where 618 + A: serde::de::SeqAccess<'v>, 619 + { 620 + let mut array = Vec::new(); 621 + while let Some(elem) = seq.next_element()? { 622 + array.push(elem); 623 + } 624 + Ok(RawData::Array(array)) 625 + } 626 + 627 + fn visit_newtype_struct<D>(self, deserializer: D) -> Result<Self::Value, D::Error> 628 + where 629 + D: Deserializer<'v>, 630 + { 631 + deserializer.deserialize_map(self) 632 + } 633 + 634 + fn visit_map<A>(self, mut map: A) -> Result<Self::Value, A::Error> 635 + where 636 + A: serde::de::MapAccess<'v>, 637 + { 638 + use serde::de::Error; 639 + 640 + // Peek at first key to check for special single-key patterns 641 + let mut temp_map: BTreeMap<SmolStr, RawData<'v>> = BTreeMap::new(); 642 + 643 + while let Some(key) = map.next_key::<SmolStr>()? { 644 + // Check for special patterns on single-key maps 645 + if temp_map.is_empty() { 646 + if key.as_str() == "$link" { 647 + // {"$link": "cid_string"} pattern 648 + let cid_str: String = map.next_value()?; 649 + // Check if there are more keys 650 + if let Some(next_key) = map.next_key::<SmolStr>()? { 651 + // More keys, treat as regular object 652 + temp_map.insert(key, RawData::String(cid_str.into())); 653 + let next_value: RawData = map.next_value()?; 654 + temp_map.insert(next_key, next_value); 655 + continue; 656 + } else { 657 + // Only key, return CidLink 658 + return Ok(RawData::CidLink(Cid::from(cid_str))); 659 + } 660 + } else if key.as_str() == "$bytes" { 661 + // {"$bytes": "base64_string"} pattern 662 + let bytes_str: String = map.next_value()?; 663 + // Check if there are more keys 664 + if map.next_key::<SmolStr>()?.is_some() { 665 + // More keys, treat as regular object - shouldn't happen but handle it 666 + temp_map.insert(key, RawData::String(bytes_str.into())); 667 + continue; 668 + } else { 669 + // Only key, decode and return bytes 670 + return Ok(decode_raw_bytes(&bytes_str)); 671 + } 672 + } 673 + } 674 + 675 + let value: RawData = map.next_value()?; 676 + temp_map.insert(key, value); 677 + } 678 + 679 + // Second pass: apply type inference and check for special patterns 680 + apply_raw_type_inference(temp_map).map_err(A::Error::custom) 681 + } 682 + } 683 + 684 + fn apply_raw_type_inference<'s>( 685 + map: BTreeMap<SmolStr, RawData<'s>>, 686 + ) -> Result<RawData<'s>, AtDataError> { 687 + // Check for CID link pattern first: {"$link": "cid_string"} 688 + if map.len() == 1 { 689 + if let Some(RawData::String(link)) = map.get("$link") { 690 + // Need to extract ownership, can't borrow from map we're about to consume 691 + let link_owned = link.clone(); 692 + return Ok(RawData::CidLink(Cid::cow_str(link_owned))); 693 + } 694 + } 695 + 696 + // Check for $type field to detect special structures 697 + let type_field = map.get("$type").and_then(|v| { 698 + if let RawData::String(s) = v { 699 + Some(s.as_ref()) 700 + } else { 701 + None 702 + } 703 + }); 704 + 705 + // Check for blob 706 + if let Some(type_str) = type_field { 707 + if infer_from_type(type_str) == DataModelType::Blob { 708 + // Try to construct blob from the collected data 709 + let ref_cid = map.get("ref").and_then(|v| { 710 + if let RawData::CidLink(cid) = v { 711 + Some(cid.clone()) 712 + } else { 713 + None 714 + } 715 + }); 716 + 717 + let mime_type = map.get("mimeType").and_then(|v| { 718 + if let RawData::String(s) = v { 719 + Some(s.clone()) 720 + } else { 721 + None 722 + } 723 + }); 724 + 725 + let size = map.get("size").and_then(|v| { 726 + if let RawData::UnsignedInt(i) = v { 727 + Some(*i as usize) 728 + } else if let RawData::SignedInt(i) = v { 729 + Some(*i as usize) 730 + } else { 731 + None 732 + } 733 + }); 734 + 735 + if let (Some(ref_cid), Some(mime_cowstr), Some(size)) = (ref_cid, mime_type, size) { 736 + return Ok(RawData::Blob(Blob { 737 + r#ref: ref_cid, 738 + mime_type: MimeType::from(mime_cowstr), 739 + size, 740 + })); 741 + } else { 742 + return Ok(RawData::InvalidBlob(Box::new(RawData::Object(map)))); 743 + } 744 + } 745 + } 746 + 747 + Ok(RawData::Object(map)) 748 + }