A better Rust ATProto crate

ipld value type to atproto data model first pass

Orual 9ced24d4 47857bff

Changed files
+493 -4
crates
jacquard-common
+101
Cargo.lock
··· 487 487 ] 488 488 489 489 [[package]] 490 + name = "getrandom" 491 + version = "0.3.3" 492 + source = "registry+https://github.com/rust-lang/crates.io-index" 493 + checksum = "26145e563e54f2cadc477553f1ec5ee650b00862f0a58bcd12cbdc5f0ea2d2f4" 494 + dependencies = [ 495 + "cfg-if", 496 + "libc", 497 + "r-efi", 498 + "wasi", 499 + ] 500 + 501 + [[package]] 490 502 name = "half" 491 503 version = "2.6.0" 492 504 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 744 756 "multibase", 745 757 "multihash", 746 758 "ouroboros", 759 + "rand", 747 760 "regex", 748 761 "serde", 749 762 "serde_html_form", ··· 936 949 checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391" 937 950 938 951 [[package]] 952 + name = "ppv-lite86" 953 + version = "0.2.21" 954 + source = "registry+https://github.com/rust-lang/crates.io-index" 955 + checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9" 956 + dependencies = [ 957 + "zerocopy", 958 + ] 959 + 960 + [[package]] 939 961 name = "proc-macro-error" 940 962 version = "1.0.4" 941 963 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 991 1013 ] 992 1014 993 1015 [[package]] 1016 + name = "r-efi" 1017 + version = "5.3.0" 1018 + source = "registry+https://github.com/rust-lang/crates.io-index" 1019 + checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" 1020 + 1021 + [[package]] 1022 + name = "rand" 1023 + version = "0.9.2" 1024 + source = "registry+https://github.com/rust-lang/crates.io-index" 1025 + checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1" 1026 + dependencies = [ 1027 + "rand_chacha", 1028 + "rand_core", 1029 + ] 1030 + 1031 + [[package]] 1032 + name = "rand_chacha" 1033 + version = "0.9.0" 1034 + source = "registry+https://github.com/rust-lang/crates.io-index" 1035 + checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb" 1036 + dependencies = [ 1037 + "ppv-lite86", 1038 + "rand_core", 1039 + ] 1040 + 1041 + [[package]] 1042 + name = "rand_core" 1043 + version = "0.9.3" 1044 + source = "registry+https://github.com/rust-lang/crates.io-index" 1045 + checksum = "99d9a13982dcf210057a8a78572b2217b667c3beacbf3a0d8b454f6f82837d38" 1046 + dependencies = [ 1047 + "getrandom", 1048 + ] 1049 + 1050 + [[package]] 994 1051 name = "range-traits" 995 1052 version = "0.3.2" 996 1053 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 1423 1480 checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" 1424 1481 1425 1482 [[package]] 1483 + name = "wasi" 1484 + version = "0.14.7+wasi-0.2.4" 1485 + source = "registry+https://github.com/rust-lang/crates.io-index" 1486 + checksum = "883478de20367e224c0090af9cf5f9fa85bed63a95c1abf3afc5c083ebc06e8c" 1487 + dependencies = [ 1488 + "wasip2", 1489 + ] 1490 + 1491 + [[package]] 1492 + name = "wasip2" 1493 + version = "1.0.1+wasi-0.2.4" 1494 + source = "registry+https://github.com/rust-lang/crates.io-index" 1495 + checksum = "0562428422c63773dad2c345a1882263bbf4d65cf3f42e90921f787ef5ad58e7" 1496 + dependencies = [ 1497 + "wit-bindgen", 1498 + ] 1499 + 1500 + [[package]] 1426 1501 name = "wasm-bindgen" 1427 1502 version = "0.2.104" 1428 1503 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 1615 1690 checksum = "271414315aff87387382ec3d271b52d7ae78726f5d44ac98b4f4030c91880486" 1616 1691 1617 1692 [[package]] 1693 + name = "wit-bindgen" 1694 + version = "0.46.0" 1695 + source = "registry+https://github.com/rust-lang/crates.io-index" 1696 + checksum = "f17a85883d4e6d00e8a97c586de764dabcc06133f7f1d55dce5cdc070ad7fe59" 1697 + 1698 + [[package]] 1618 1699 name = "writeable" 1619 1700 version = "0.6.1" 1620 1701 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 1648 1729 "quote", 1649 1730 "syn 2.0.106", 1650 1731 "synstructure", 1732 + ] 1733 + 1734 + [[package]] 1735 + name = "zerocopy" 1736 + version = "0.8.27" 1737 + source = "registry+https://github.com/rust-lang/crates.io-index" 1738 + checksum = "0894878a5fa3edfd6da3f88c4805f4c8558e2b996227a3d864f47fe11e38282c" 1739 + dependencies = [ 1740 + "zerocopy-derive", 1741 + ] 1742 + 1743 + [[package]] 1744 + name = "zerocopy-derive" 1745 + version = "0.8.27" 1746 + source = "registry+https://github.com/rust-lang/crates.io-index" 1747 + checksum = "88d2b8d9c68ad2b9e4340d7832716a4d21a22a1154777ad56ea55c51a9cf3831" 1748 + dependencies = [ 1749 + "proc-macro2", 1750 + "quote", 1751 + "syn 2.0.106", 1651 1752 ] 1652 1753 1653 1754 [[package]]
+1
crates/jacquard-common/Cargo.toml
··· 23 23 multibase = "0.9.1" 24 24 multihash = "0.19.3" 25 25 ouroboros = "0.18.5" 26 + rand = "0.9.2" 26 27 regex = "1.11.3" 27 28 serde = { version = "1.0.227", features = ["derive"] } 28 29 serde_html_form = "0.2.8"
+103 -3
crates/jacquard-common/src/types/tid.rs
··· 2 2 use smol_str::{SmolStr, SmolStrBuilder}; 3 3 use std::fmt; 4 4 use std::sync::LazyLock; 5 + use std::time::SystemTime; 5 6 use std::{ops::Deref, str::FromStr}; 6 7 7 8 use crate::CowStr; ··· 9 10 use crate::types::string::{AtStrError, StrParseKind}; 10 11 use regex::Regex; 11 12 13 + const S32_CHAR: &str = "234567abcdefghijklmnopqrstuvwxyz"; 14 + 12 15 fn s32_encode(mut i: u64) -> SmolStr { 13 - const S32_CHAR: &[u8] = b"234567abcdefghijklmnopqrstuvwxyz"; 14 - 15 16 let mut s = SmolStrBuilder::new(); 16 17 for _ in 0..13 { 17 18 let c = i & 0x1F; 18 - s.push(S32_CHAR[c as usize] as char); 19 + s.push(S32_CHAR.chars().nth(c as usize).unwrap()); 19 20 20 21 i >>= 5; 21 22 } ··· 104 105 Self(s32_encode(tid)) 105 106 } 106 107 108 + pub fn from_time(timestamp: usize, clkid: u32) -> Self { 109 + let str = smol_str::format_smolstr!( 110 + "{0}{1:2>2}", 111 + s32_encode(timestamp as u64), 112 + s32_encode(Into::<u32>::into(clkid) as u64) 113 + ); 114 + Self(str) 115 + } 116 + 117 + pub fn timestamp(&self) -> usize { 118 + s32decode(self.0[0..11].to_owned()) 119 + } 120 + 121 + // newer > older 122 + pub fn compare_to(&self, other: &Tid) -> i8 { 123 + if self.0 > other.0 { 124 + return 1; 125 + } 126 + if self.0 < other.0 { 127 + return -1; 128 + } 129 + 0 130 + } 131 + 132 + pub fn newer_than(&self, other: &Tid) -> bool { 133 + self.compare_to(other) > 0 134 + } 135 + 136 + pub fn older_than(&self, other: &Tid) -> bool { 137 + self.compare_to(other) < 0 138 + } 139 + 140 + pub fn next_str(prev: Option<Tid>) -> Result<Self, AtStrError> { 141 + let prev = match prev { 142 + None => None, 143 + Some(prev) => Some(Tid::new(prev)?), 144 + }; 145 + Ok(Ticker::new().next(prev)) 146 + } 147 + 107 148 /// Construct a new [Tid] that represents the current time. 108 149 /// 109 150 /// If you have multiple clock sources, you can use `clkid` to distinguish between them ··· 132 173 } 133 174 } 134 175 176 + pub fn s32decode(s: String) -> usize { 177 + let mut i: usize = 0; 178 + for c in s.chars() { 179 + i = i * 32 + S32_CHAR.chars().position(|x| x == c).unwrap(); 180 + } 181 + i 182 + } 183 + 135 184 impl FromStr for Tid { 136 185 type Err = AtStrError; 137 186 ··· 207 256 self.as_str() 208 257 } 209 258 } 259 + 260 + /// Based on adenosine/adenosine/src/identifiers.rs 261 + /// TODO: clean up and normalize stuff between this and the stuff pulled from atrium 262 + pub struct Ticker { 263 + last_timestamp: usize, 264 + clock_id: u32, 265 + } 266 + 267 + impl Ticker { 268 + pub fn new() -> Self { 269 + let mut ticker = Self { 270 + last_timestamp: 0, 271 + // mask to 10 bits 272 + clock_id: rand::random::<u32>() & 0x03FF, 273 + }; 274 + // prime the pump 275 + ticker.next(None); 276 + ticker 277 + } 278 + 279 + pub fn next(&mut self, prev: Option<Tid>) -> Tid { 280 + let now = SystemTime::now() 281 + .duration_since(SystemTime::UNIX_EPOCH) 282 + .expect("timestamp in micros since UNIX epoch") 283 + .as_micros() as usize; 284 + // mask to 53 bits 285 + let now = now & 0x001FFFFFFFFFFFFF; 286 + if now > self.last_timestamp { 287 + self.last_timestamp = now; 288 + } else { 289 + self.last_timestamp += 1; 290 + } 291 + // 53 bits of millis 292 + let micros = self.last_timestamp & 0x001FFFFFFFFFFFFF; 293 + // 10 bits of clock ID 294 + let clock_id = self.clock_id & 0x03FF; 295 + 296 + let tid = Tid::from_time(micros, clock_id as u32); 297 + match prev { 298 + Some(ref prev) if tid.newer_than(prev) => tid, 299 + Some(prev) => Tid::from_time(prev.timestamp() + 1, clock_id as u32), 300 + None => tid, 301 + } 302 + } 303 + } 304 + 305 + impl Default for Ticker { 306 + fn default() -> Self { 307 + Self::new() 308 + } 309 + }
+288 -1
crates/jacquard-common/src/types/value.rs
··· 3 3 prelude::{BASE64_STANDARD, BASE64_STANDARD_NO_PAD, BASE64_URL_SAFE, BASE64_URL_SAFE_NO_PAD}, 4 4 }; 5 5 use bytes::Bytes; 6 + use ipld_core::ipld::Ipld; 6 7 use serde::{Deserialize, Deserializer, Serialize, Serializer}; 7 8 use smol_str::{SmolStr, ToSmolStr}; 8 9 use std::{collections::BTreeMap, str::FromStr}; ··· 46 47 Self::Null 47 48 } 48 49 } 50 + 51 + pub fn from_cbor(cbor: &'s Ipld) -> Self { 52 + match cbor { 53 + Ipld::Null => Data::Null, 54 + Ipld::Bool(bool) => Data::Boolean(*bool), 55 + Ipld::Integer(int) => Data::Integer(*int as i64), 56 + Ipld::Float(_) => todo!(), 57 + Ipld::String(string) => Self::String(AtprotoStr::new(string)), 58 + Ipld::Bytes(items) => Self::Bytes(Bytes::copy_from_slice(items.as_slice())), 59 + Ipld::List(iplds) => Self::Array(Array::from_cbor(iplds)), 60 + Ipld::Map(btree_map) => Object::from_cbor(btree_map), 61 + Ipld::Link(cid) => Self::CidLink(Cid::ipld(*cid)), 62 + } 63 + } 49 64 } 50 65 51 66 #[derive(Debug, Clone, PartialEq, Eq)] ··· 56 71 let mut array = Vec::with_capacity(json.len()); 57 72 for item in json { 58 73 array.push(Data::from_json(item)); 74 + } 75 + Self(array) 76 + } 77 + pub fn from_cbor(cbor: &'s Vec<Ipld>) -> Self { 78 + let mut array = Vec::with_capacity(cbor.len()); 79 + for item in cbor { 80 + array.push(Data::from_cbor(item)); 59 81 } 60 82 Self(array) 61 83 } ··· 296 318 Data::Object(Object(map)) 297 319 } 298 320 299 - //pub fn from_cbor(cbor: BTreeMap<String, ipld_core::ipld::Ipld>) -> Self {} 321 + pub fn from_cbor(cbor: &'s BTreeMap<String, Ipld>) -> Data<'s> { 322 + if let Some(Ipld::String(type_field)) = cbor.get("$type") { 323 + if infer_from_type(type_field) == DataModelType::Blob { 324 + if let Some(blob) = cbor_to_blob(cbor) { 325 + return Data::Blob(blob); 326 + } 327 + } 328 + } 329 + let mut map = BTreeMap::new(); 330 + 331 + for (key, value) in cbor { 332 + if key == "$type" { 333 + continue; // skip, because we've already handled it 334 + } 335 + match string_key_type_guess(key) { 336 + DataModelType::Null => { 337 + if *value == Ipld::Null { 338 + map.insert(key.to_smolstr(), Data::Null); 339 + } else { 340 + map.insert(key.to_smolstr(), Data::from_cbor(value)); 341 + } 342 + } 343 + DataModelType::Boolean => { 344 + if let Ipld::Bool(value) = value { 345 + map.insert(key.to_smolstr(), Data::Boolean(*value)); 346 + } else { 347 + map.insert(key.to_smolstr(), Data::from_cbor(value)); 348 + } 349 + } 350 + DataModelType::Integer => { 351 + if let Ipld::Integer(int) = value { 352 + map.insert(key.to_smolstr(), Data::Integer(*int as i64)); 353 + } else { 354 + map.insert(key.to_smolstr(), Data::from_cbor(value)); 355 + } 356 + } 357 + DataModelType::Bytes => { 358 + if let Ipld::Bytes(value) = value { 359 + map.insert(key.to_smolstr(), Data::Bytes(Bytes::copy_from_slice(value))); 360 + } else { 361 + map.insert(key.to_smolstr(), Data::from_cbor(value)); 362 + } 363 + } 364 + DataModelType::Blob => { 365 + if let Ipld::Map(value) = value { 366 + map.insert(key.to_smolstr(), Object::from_cbor(value)); 367 + } else { 368 + map.insert(key.to_smolstr(), Data::from_cbor(value)); 369 + } 370 + } 371 + DataModelType::Array => { 372 + if let Ipld::List(value) = value { 373 + map.insert(key.to_smolstr(), Data::Array(Array::from_cbor(value))); 374 + } else { 375 + map.insert(key.to_smolstr(), Data::from_cbor(value)); 376 + } 377 + } 378 + DataModelType::Object => { 379 + if let Ipld::Map(value) = value { 380 + map.insert(key.to_smolstr(), Object::from_cbor(value)); 381 + } else { 382 + map.insert(key.to_smolstr(), Data::from_cbor(value)); 383 + } 384 + } 385 + DataModelType::String(string_type) => { 386 + if let Ipld::String(value) = value { 387 + match string_type { 388 + LexiconStringType::Datetime => { 389 + if let Ok(datetime) = Datetime::from_str(value) { 390 + map.insert( 391 + key.to_smolstr(), 392 + Data::String(AtprotoStr::Datetime(datetime)), 393 + ); 394 + } else { 395 + map.insert( 396 + key.to_smolstr(), 397 + Data::String(AtprotoStr::String(value.into())), 398 + ); 399 + } 400 + } 401 + LexiconStringType::AtUri => { 402 + if let Ok(value) = AtUri::new(value) { 403 + map.insert( 404 + key.to_smolstr(), 405 + Data::String(AtprotoStr::AtUri(value)), 406 + ); 407 + } else { 408 + map.insert( 409 + key.to_smolstr(), 410 + Data::String(AtprotoStr::String(value.into())), 411 + ); 412 + } 413 + } 414 + LexiconStringType::Did => { 415 + if let Ok(value) = Did::new(value) { 416 + map.insert( 417 + key.to_smolstr(), 418 + Data::String(AtprotoStr::Did(value)), 419 + ); 420 + } else { 421 + map.insert( 422 + key.to_smolstr(), 423 + Data::String(AtprotoStr::String(value.into())), 424 + ); 425 + } 426 + } 427 + LexiconStringType::Handle => { 428 + if let Ok(value) = Handle::new(value) { 429 + map.insert( 430 + key.to_smolstr(), 431 + Data::String(AtprotoStr::Handle(value)), 432 + ); 433 + } else { 434 + map.insert( 435 + key.to_smolstr(), 436 + Data::String(AtprotoStr::String(value.into())), 437 + ); 438 + } 439 + } 440 + LexiconStringType::AtIdentifier => { 441 + if let Ok(value) = AtIdentifier::new(value) { 442 + map.insert( 443 + key.to_smolstr(), 444 + Data::String(AtprotoStr::AtIdentifier(value)), 445 + ); 446 + } else { 447 + map.insert( 448 + key.to_smolstr(), 449 + Data::String(AtprotoStr::String(value.into())), 450 + ); 451 + } 452 + } 453 + LexiconStringType::Nsid => { 454 + if let Ok(value) = Nsid::new(value) { 455 + map.insert( 456 + key.to_smolstr(), 457 + Data::String(AtprotoStr::Nsid(value)), 458 + ); 459 + } else { 460 + map.insert( 461 + key.to_smolstr(), 462 + Data::String(AtprotoStr::String(value.into())), 463 + ); 464 + } 465 + } 466 + LexiconStringType::Cid => { 467 + if let Ok(value) = Cid::new(value.as_bytes()) { 468 + map.insert( 469 + key.to_smolstr(), 470 + Data::String(AtprotoStr::Cid(value)), 471 + ); 472 + } else { 473 + map.insert( 474 + key.to_smolstr(), 475 + Data::String(AtprotoStr::String(value.into())), 476 + ); 477 + } 478 + } 479 + LexiconStringType::Language => { 480 + if let Ok(value) = Language::new(value) { 481 + map.insert( 482 + key.to_smolstr(), 483 + Data::String(AtprotoStr::Language(value)), 484 + ); 485 + } else { 486 + map.insert( 487 + key.to_smolstr(), 488 + Data::String(AtprotoStr::String(value.into())), 489 + ); 490 + } 491 + } 492 + LexiconStringType::Tid => { 493 + if let Ok(value) = Tid::new(value) { 494 + map.insert( 495 + key.to_smolstr(), 496 + Data::String(AtprotoStr::Tid(value)), 497 + ); 498 + } else { 499 + map.insert( 500 + key.to_smolstr(), 501 + Data::String(AtprotoStr::String(value.into())), 502 + ); 503 + } 504 + } 505 + LexiconStringType::RecordKey => { 506 + if let Ok(value) = Rkey::new(value) { 507 + map.insert( 508 + key.to_smolstr(), 509 + Data::String(AtprotoStr::RecordKey(RecordKey::from(value))), 510 + ); 511 + } else { 512 + map.insert( 513 + key.to_smolstr(), 514 + Data::String(AtprotoStr::String(value.into())), 515 + ); 516 + } 517 + } 518 + LexiconStringType::Uri(_) => { 519 + if let Ok(uri) = Uri::new(value) { 520 + map.insert( 521 + key.to_smolstr(), 522 + Data::String(AtprotoStr::Uri(uri)), 523 + ); 524 + } else { 525 + map.insert( 526 + key.to_smolstr(), 527 + Data::String(AtprotoStr::String(value.into())), 528 + ); 529 + } 530 + } 531 + LexiconStringType::String => { 532 + map.insert(key.to_smolstr(), Data::String(parse_string(value))); 533 + } 534 + } 535 + } else { 536 + map.insert(key.to_smolstr(), Data::from_cbor(value)); 537 + } 538 + } 539 + _ => { 540 + map.insert(key.to_smolstr(), Data::from_cbor(value)); 541 + } 542 + } 543 + } 544 + 545 + Data::Object(Object(map)) 546 + } 300 547 } 301 548 302 549 /// smarter parsing to avoid trying as many posibilities. ··· 377 624 // or Ipld value is at least a string, and then we fall back to Object/Map. 378 625 _ => DataModelType::String(LexiconStringType::String), 379 626 } 627 + } 628 + 629 + pub fn cbor_to_blob<'b>(blob: &'b BTreeMap<String, Ipld>) -> Option<Blob<'b>> { 630 + let mime_type = blob.get("mimeType").and_then(|o| { 631 + if let Ipld::String(string) = o { 632 + Some(string) 633 + } else { 634 + None 635 + } 636 + }); 637 + if let Some(value) = blob.get("ref") { 638 + if let Ipld::Map(value) = value { 639 + if let Some(Ipld::String(value)) = value.get("$link") { 640 + let size = blob.get("size").and_then(|o| { 641 + if let Ipld::Integer(i) = o { 642 + Some(*i as i64) 643 + } else { 644 + None 645 + } 646 + }); 647 + if let (Some(mime_type), Some(size)) = (mime_type, size) { 648 + return Some(Blob { 649 + r#ref: Cid::str(value), 650 + mime_type: MimeType::raw(mime_type), 651 + size: size as usize, 652 + }); 653 + } 654 + } 655 + } 656 + } else if let Some(Ipld::String(value)) = blob.get("cid") { 657 + if let Some(mime_type) = mime_type { 658 + return Some(Blob { 659 + r#ref: Cid::str(value), 660 + mime_type: MimeType::raw(mime_type), 661 + size: 0, 662 + }); 663 + } 664 + } 665 + 666 + None 380 667 } 381 668 382 669 pub fn json_to_blob<'b>(blob: &'b serde_json::Map<String, serde_json::Value>) -> Option<Blob<'b>> {