A better Rust ATProto crate

atproto data model value enum first pass serde_json::Value -> Data

Orual da4dcc67 286d3ec3

Changed files
+430 -3
crates
jacquard-common
+13
Cargo.lock
··· 699 699 checksum = "f4c7245a08504955605670dbf141fceab975f15ca21570696aebe9d2e71576bd" 700 700 701 701 [[package]] 702 + name = "ipld-core" 703 + version = "0.4.2" 704 + source = "registry+https://github.com/rust-lang/crates.io-index" 705 + checksum = "104718b1cc124d92a6d01ca9c9258a7df311405debb3408c445a36452f9bf8db" 706 + dependencies = [ 707 + "cid", 708 + "serde", 709 + "serde_bytes", 710 + ] 711 + 712 + [[package]] 702 713 name = "is_terminal_polyfill" 703 714 version = "1.70.1" 704 715 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 722 733 name = "jacquard-common" 723 734 version = "0.1.0" 724 735 dependencies = [ 736 + "base64", 725 737 "bytes", 726 738 "chrono", 727 739 "cid", 728 740 "enum_dispatch", 741 + "ipld-core", 729 742 "langtag", 730 743 "miette", 731 744 "multibase",
+2
crates/jacquard-common/Cargo.toml
··· 12 12 description.workspace = true 13 13 14 14 [dependencies] 15 + base64 = "0.22.1" 15 16 bytes = "1.10.1" 16 17 chrono = "0.4.42" 17 18 cid = { version = "0.11.1", features = ["serde", "std"] } 18 19 enum_dispatch = "0.3.13" 20 + ipld-core = { version = "0.4.2", features = ["serde"] } 19 21 langtag = { version = "0.4.0", features = ["serde"] } 20 22 miette = "7.6.0" 21 23 multibase = "0.9.1"
+2
crates/jacquard-common/src/types.rs
··· 1 1 use serde::{Deserialize, Serialize}; 2 2 3 + use crate::types::nsid::Nsid; 4 + 3 5 pub mod aturi; 4 6 pub mod blob; 5 7 pub mod cid;
+413 -3
crates/jacquard-common/src/types/value.rs
··· 1 + use base64::{ 2 + Engine, 3 + prelude::{BASE64_STANDARD, BASE64_STANDARD_NO_PAD, BASE64_URL_SAFE, BASE64_URL_SAFE_NO_PAD}, 4 + }; 1 5 use bytes::Bytes; 2 6 use serde::{Deserialize, Deserializer, Serialize, Serializer}; 3 - use smol_str::SmolStr; 4 - use std::collections::BTreeMap; 7 + use smol_str::{SmolStr, ToSmolStr}; 8 + use std::{collections::BTreeMap, str::FromStr}; 9 + use url::Url; 5 10 6 - use crate::types::{blob::Blob, string::*}; 11 + use crate::types::{ 12 + DataModelType, LexiconStringType, 13 + blob::{Blob, MimeType}, 14 + string::*, 15 + }; 7 16 8 17 #[derive(Debug, Clone, PartialEq, Eq)] 9 18 pub enum Data<'s> { ··· 18 27 Blob(Blob<'s>), 19 28 } 20 29 30 + impl<'s> Data<'s> { 31 + pub fn from_json(json: &'s serde_json::Value) -> Self { 32 + if let Some(value) = json.as_bool() { 33 + Self::Boolean(value) 34 + } else if let Some(value) = json.as_i64() { 35 + Self::Integer(value) 36 + } else if let Some(value) = json.as_str() { 37 + Self::String(AtprotoStr::new(value)) 38 + } else if let Some(value) = json.as_array() { 39 + Self::Array(Array::from_json(value)) 40 + } else if let Some(value) = json.as_object() { 41 + Object::from_json(value) 42 + } else if let Some(num) = json.as_number() { 43 + // deliberately permissive here, just in case. 44 + Self::String(AtprotoStr::new_owned(num.to_smolstr())) 45 + } else { 46 + Self::Null 47 + } 48 + } 49 + } 50 + 21 51 #[derive(Debug, Clone, PartialEq, Eq)] 22 52 pub struct Array<'s>(pub Vec<Data<'s>>); 23 53 54 + impl<'s> Array<'s> { 55 + pub fn from_json(json: &'s Vec<serde_json::Value>) -> Self { 56 + let mut array = Vec::with_capacity(json.len()); 57 + for item in json { 58 + array.push(Data::from_json(item)); 59 + } 60 + Self(array) 61 + } 62 + } 63 + 24 64 #[derive(Debug, Clone, PartialEq, Eq)] 25 65 pub struct Object<'s>(pub BTreeMap<SmolStr, Data<'s>>); 66 + 67 + impl<'s> Object<'s> { 68 + pub fn from_json(json: &'s serde_json::Map<String, serde_json::Value>) -> Data<'s> { 69 + if let Some(type_field) = json.get("$type").and_then(|v| v.as_str()) { 70 + if infer_from_type(type_field) == DataModelType::Blob { 71 + if let Some(blob) = json_to_blob(json) { 72 + return Data::Blob(blob); 73 + } 74 + } 75 + } 76 + let mut map = BTreeMap::new(); 77 + 78 + for (key, value) in json { 79 + if key == "$type" { 80 + continue; // skip, because we've already handled it 81 + } 82 + match string_key_type_guess(key) { 83 + DataModelType::Null => { 84 + if value.is_null() { 85 + map.insert(key.to_smolstr(), Data::Null); 86 + } else { 87 + map.insert(key.to_smolstr(), Data::from_json(value)); 88 + } 89 + } 90 + DataModelType::Boolean => { 91 + if let Some(value) = value.as_bool() { 92 + map.insert(key.to_smolstr(), Data::Boolean(value)); 93 + } else { 94 + map.insert(key.to_smolstr(), Data::from_json(value)); 95 + } 96 + } 97 + DataModelType::Integer => { 98 + if let Some(int) = value.as_i64() { 99 + map.insert(key.to_smolstr(), Data::Integer(int)); 100 + } else { 101 + map.insert(key.to_smolstr(), Data::from_json(value)); 102 + } 103 + } 104 + DataModelType::Bytes => { 105 + if let Some(value) = value.as_str() { 106 + map.insert(key.to_smolstr(), decode_bytes(value)); 107 + } else { 108 + map.insert(key.to_smolstr(), Data::from_json(value)); 109 + } 110 + } 111 + DataModelType::CidLink => { 112 + if let Some(value) = value.as_str() { 113 + map.insert( 114 + key.to_smolstr(), 115 + Data::String(AtprotoStr::Cid(Cid::Str(value.into()))), 116 + ); 117 + } else { 118 + map.insert(key.to_smolstr(), Data::from_json(value)); 119 + } 120 + } 121 + DataModelType::Blob => { 122 + if let Some(value) = value.as_object() { 123 + map.insert(key.to_smolstr(), Object::from_json(value)); 124 + } else { 125 + map.insert(key.to_smolstr(), Data::from_json(value)); 126 + } 127 + } 128 + DataModelType::Array => { 129 + if let Some(value) = value.as_array() { 130 + map.insert(key.to_smolstr(), Data::Array(Array::from_json(value))); 131 + } else { 132 + map.insert(key.to_smolstr(), Data::from_json(value)); 133 + } 134 + } 135 + DataModelType::Object => { 136 + if let Some(value) = value.as_object() { 137 + map.insert(key.to_smolstr(), Object::from_json(value)); 138 + } else { 139 + map.insert(key.to_smolstr(), Data::from_json(value)); 140 + } 141 + } 142 + DataModelType::String(string_type) => { 143 + if let Some(value) = value.as_str() { 144 + match string_type { 145 + LexiconStringType::Datetime => { 146 + if let Ok(datetime) = Datetime::from_str(value) { 147 + map.insert( 148 + key.to_smolstr(), 149 + Data::String(AtprotoStr::Datetime(datetime)), 150 + ); 151 + } else { 152 + map.insert( 153 + key.to_smolstr(), 154 + Data::String(AtprotoStr::String(value.into())), 155 + ); 156 + } 157 + } 158 + LexiconStringType::AtUri => { 159 + if let Ok(value) = AtUri::new(value) { 160 + map.insert( 161 + key.to_smolstr(), 162 + Data::String(AtprotoStr::AtUri(value)), 163 + ); 164 + } else { 165 + map.insert( 166 + key.to_smolstr(), 167 + Data::String(AtprotoStr::String(value.into())), 168 + ); 169 + } 170 + } 171 + LexiconStringType::Did => { 172 + if let Ok(value) = Did::new(value) { 173 + map.insert( 174 + key.to_smolstr(), 175 + Data::String(AtprotoStr::Did(value)), 176 + ); 177 + } else { 178 + map.insert( 179 + key.to_smolstr(), 180 + Data::String(AtprotoStr::String(value.into())), 181 + ); 182 + } 183 + } 184 + LexiconStringType::Handle => { 185 + if let Ok(value) = Handle::new(value) { 186 + map.insert( 187 + key.to_smolstr(), 188 + Data::String(AtprotoStr::Handle(value)), 189 + ); 190 + } else { 191 + map.insert( 192 + key.to_smolstr(), 193 + Data::String(AtprotoStr::String(value.into())), 194 + ); 195 + } 196 + } 197 + LexiconStringType::AtIdentifier => { 198 + if let Ok(value) = AtIdentifier::new(value) { 199 + map.insert( 200 + key.to_smolstr(), 201 + Data::String(AtprotoStr::AtIdentifier(value)), 202 + ); 203 + } else { 204 + map.insert( 205 + key.to_smolstr(), 206 + Data::String(AtprotoStr::String(value.into())), 207 + ); 208 + } 209 + } 210 + LexiconStringType::Nsid => { 211 + if let Ok(value) = Nsid::new(value) { 212 + map.insert( 213 + key.to_smolstr(), 214 + Data::String(AtprotoStr::Nsid(value)), 215 + ); 216 + } else { 217 + map.insert( 218 + key.to_smolstr(), 219 + Data::String(AtprotoStr::String(value.into())), 220 + ); 221 + } 222 + } 223 + LexiconStringType::Cid => { 224 + if let Ok(value) = Cid::new(value.as_bytes()) { 225 + map.insert( 226 + key.to_smolstr(), 227 + Data::String(AtprotoStr::Cid(value)), 228 + ); 229 + } else { 230 + map.insert( 231 + key.to_smolstr(), 232 + Data::String(AtprotoStr::String(value.into())), 233 + ); 234 + } 235 + } 236 + LexiconStringType::Language => { 237 + if let Ok(value) = Language::new(value) { 238 + map.insert( 239 + key.to_smolstr(), 240 + Data::String(AtprotoStr::Language(value)), 241 + ); 242 + } else { 243 + map.insert( 244 + key.to_smolstr(), 245 + Data::String(AtprotoStr::String(value.into())), 246 + ); 247 + } 248 + } 249 + LexiconStringType::Tid => { 250 + if let Ok(value) = Tid::new(value) { 251 + map.insert( 252 + key.to_smolstr(), 253 + Data::String(AtprotoStr::Tid(value)), 254 + ); 255 + } else { 256 + map.insert( 257 + key.to_smolstr(), 258 + Data::String(AtprotoStr::String(value.into())), 259 + ); 260 + } 261 + } 262 + LexiconStringType::RecordKey => { 263 + if let Ok(value) = Rkey::new(value) { 264 + map.insert( 265 + key.to_smolstr(), 266 + Data::String(AtprotoStr::RecordKey(RecordKey::from(value))), 267 + ); 268 + } else { 269 + map.insert( 270 + key.to_smolstr(), 271 + Data::String(AtprotoStr::String(value.into())), 272 + ); 273 + } 274 + } 275 + LexiconStringType::Uri(_) => { 276 + if let Ok(uri) = Uri::new(value) { 277 + map.insert( 278 + key.to_smolstr(), 279 + Data::String(AtprotoStr::Uri(uri)), 280 + ); 281 + } else { 282 + map.insert( 283 + key.to_smolstr(), 284 + Data::String(AtprotoStr::String(value.into())), 285 + ); 286 + } 287 + } 288 + LexiconStringType::String => { 289 + map.insert(key.to_smolstr(), Data::String(parse_string(value))); 290 + } 291 + } 292 + } else { 293 + map.insert(key.to_smolstr(), Data::from_json(value)); 294 + } 295 + } 296 + } 297 + } 298 + 299 + Data::Object(Object(map)) 300 + } 301 + 302 + //pub fn from_cbor(cbor: BTreeMap<String, ipld_core::ipld::Ipld>) -> Self {} 303 + } 304 + 305 + /// smarter parsing to avoid trying as many posibilities. 306 + pub fn parse_string<'s>(string: &'s str) -> AtprotoStr<'s> { 307 + if string.len() < 2048 && string.starts_with("did:") { 308 + if let Ok(did) = Did::new(string) { 309 + return AtprotoStr::Did(did); 310 + } 311 + } else if string.starts_with("20") && string.ends_with("Z") { 312 + // probably a date (for the next 75 years) 313 + if let Ok(datetime) = Datetime::from_str(string) { 314 + return AtprotoStr::Datetime(datetime); 315 + } 316 + } else if string.starts_with("at://") { 317 + if let Ok(uri) = AtUri::new(string) { 318 + return AtprotoStr::AtUri(uri); 319 + } 320 + } else if string.starts_with("https://") { 321 + if let Ok(uri) = Url::parse(string) { 322 + return AtprotoStr::Uri(Uri::Https(uri)); 323 + } 324 + } else if string.starts_with("wss://") { 325 + if let Ok(uri) = Url::parse(string) { 326 + return AtprotoStr::Uri(Uri::Https(uri)); 327 + } 328 + } else if string.starts_with("ipfs://") { 329 + return AtprotoStr::Uri(Uri::Cid(Cid::str(string))); 330 + } else if string.contains('.') && !string.contains([' ', '\n']) { 331 + if string.len() < 253 && Url::parse(string).is_ok() { 332 + // probably a handle 333 + if let Ok(handle) = AtIdentifier::new(string) { 334 + return AtprotoStr::AtIdentifier(handle); 335 + } else { 336 + return AtprotoStr::Uri(Uri::Any(string.into())); 337 + } 338 + } else if let Ok(nsid) = Nsid::new(string) { 339 + return AtprotoStr::Nsid(nsid); 340 + } 341 + } else if string.len() == 13 { 342 + if let Ok(tid) = Tid::new(string) { 343 + return AtprotoStr::Tid(tid); 344 + } 345 + } else if !string.contains([' ', '\n']) { 346 + // cid? 347 + if let Ok(cid) = Cid::new(string.as_bytes()) { 348 + return AtprotoStr::Cid(cid); 349 + } 350 + } 351 + 352 + AtprotoStr::String(string.into()) 353 + } 354 + 355 + /// First-level guess at what we should parse the corresponding value as 356 + /// Helps speed up parsing, avoids some ambiguities. 357 + pub fn string_key_type_guess(key: &str) -> DataModelType { 358 + match key { 359 + "cid" => DataModelType::String(LexiconStringType::Cid), 360 + "uri" => DataModelType::String(LexiconStringType::Uri(super::UriType::Any)), 361 + "did" => DataModelType::String(LexiconStringType::Did), 362 + "handle" => DataModelType::String(LexiconStringType::AtIdentifier), 363 + "ref" => DataModelType::CidLink, 364 + "list" => DataModelType::String(LexiconStringType::AtUri), 365 + "blobref" => DataModelType::Blob, 366 + "createdAt" | "created" | "indexedAt" | "issuedAt" | "updatedAt" | "playedTime" => { 367 + DataModelType::String(LexiconStringType::Datetime) 368 + } 369 + "size" | "width" | "height" => DataModelType::Integer, 370 + "value" | "record" | "embed" => DataModelType::Object, 371 + "text" | "displayName" | "alt" | "name" | "description" => { 372 + DataModelType::String(LexiconStringType::String) 373 + } 374 + "langs" | "blobs" | "images" | "labels" => DataModelType::Array, 375 + "$bytes" => DataModelType::Bytes, 376 + "$link" => DataModelType::String(LexiconStringType::Cid), 377 + "$type" => DataModelType::String(LexiconStringType::String), 378 + 379 + // we assume others are strings speficially because it's easy to check if a serde_json::Value 380 + // or Ipld value is at least a string, and then we fall back to Object/Map. 381 + _ => DataModelType::String(LexiconStringType::String), 382 + } 383 + } 384 + 385 + pub fn json_to_blob<'b>(blob: &'b serde_json::Map<String, serde_json::Value>) -> Option<Blob<'b>> { 386 + let mime_type = blob.get("mimeType").and_then(|v| v.as_str()); 387 + if let Some(value) = blob.get("ref") { 388 + if let Some(value) = value 389 + .as_object() 390 + .and_then(|o| o.get("$link")) 391 + .and_then(|v| v.as_str()) 392 + { 393 + let size = blob.get("size").and_then(|v| v.as_u64()); 394 + if let (Some(mime_type), Some(size)) = (mime_type, size) { 395 + return Some(Blob { 396 + r#ref: Cid::str(value), 397 + mime_type: MimeType::raw(mime_type), 398 + size: size as usize, 399 + }); 400 + } 401 + } 402 + } else if let Some(value) = blob.get("cid").and_then(|v| v.as_str()) { 403 + if let Some(mime_type) = mime_type { 404 + return Some(Blob { 405 + r#ref: Cid::str(value), 406 + mime_type: MimeType::raw(mime_type), 407 + size: 0, 408 + }); 409 + } 410 + } 411 + 412 + None 413 + } 414 + 415 + pub fn infer_from_type(type_field: &str) -> DataModelType { 416 + match type_field { 417 + "blob" => DataModelType::Blob, 418 + _ => DataModelType::Object, 419 + } 420 + } 421 + 422 + pub fn decode_bytes<'s>(bytes: &'s str) -> Data<'s> { 423 + // First one should just work. rest are insurance. 424 + if let Ok(bytes) = BASE64_STANDARD.decode(bytes) { 425 + Data::Bytes(Bytes::from_owner(bytes)) 426 + } else if let Ok(bytes) = BASE64_STANDARD_NO_PAD.decode(bytes) { 427 + Data::Bytes(Bytes::from_owner(bytes)) 428 + } else if let Ok(bytes) = BASE64_URL_SAFE.decode(bytes) { 429 + Data::Bytes(Bytes::from_owner(bytes)) 430 + } else if let Ok(bytes) = BASE64_URL_SAFE_NO_PAD.decode(bytes) { 431 + Data::Bytes(Bytes::from_owner(bytes)) 432 + } else { 433 + Data::String(AtprotoStr::String(bytes.into())) 434 + } 435 + }