A better Rust ATProto crate

more string types, blobrefs

opting to just...not be compatible with the old untyped blobrefs to simplify.
haven't seen those in the wild ever afaik.
might swap to a custom Serialize/Deserialize impl to get that capability without
a bunch of extra nesting.

Orual 1d282258 7f513e14

Changed files
+576 -15
crates
+62
Cargo.lock
··· 91 91 "core2", 92 92 "multibase", 93 93 "multihash", 94 + "serde", 95 + "serde_bytes", 94 96 "unsigned-varint", 95 97 ] 96 98 ··· 220 222 dependencies = [ 221 223 "cid", 222 224 "compact_str", 225 + "miette", 223 226 "multibase", 224 227 "multihash", 225 228 "regex", 226 229 "serde", 230 + "thiserror", 227 231 ] 228 232 229 233 [[package]] ··· 233 237 checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273" 234 238 235 239 [[package]] 240 + name = "miette" 241 + version = "7.6.0" 242 + source = "registry+https://github.com/rust-lang/crates.io-index" 243 + checksum = "5f98efec8807c63c752b5bd61f862c165c115b0a35685bdcfd9238c7aeb592b7" 244 + dependencies = [ 245 + "cfg-if", 246 + "miette-derive", 247 + "unicode-width", 248 + ] 249 + 250 + [[package]] 251 + name = "miette-derive" 252 + version = "7.6.0" 253 + source = "registry+https://github.com/rust-lang/crates.io-index" 254 + checksum = "db5b29714e950dbb20d5e6f74f9dcec4edbcc1067bb7f8ed198c097b8c1a818b" 255 + dependencies = [ 256 + "proc-macro2", 257 + "quote", 258 + "syn", 259 + ] 260 + 261 + [[package]] 236 262 name = "multibase" 237 263 version = "0.9.1" 238 264 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 250 276 checksum = "6b430e7953c29dd6a09afc29ff0bb69c6e306329ee6794700aee27b76a1aea8d" 251 277 dependencies = [ 252 278 "core2", 279 + "serde", 253 280 "unsigned-varint", 254 281 ] 255 282 ··· 329 356 ] 330 357 331 358 [[package]] 359 + name = "serde_bytes" 360 + version = "0.11.17" 361 + source = "registry+https://github.com/rust-lang/crates.io-index" 362 + checksum = "8437fd221bde2d4ca316d61b90e337e9e702b3820b87d63caa9ba6c02bd06d96" 363 + dependencies = [ 364 + "serde", 365 + ] 366 + 367 + [[package]] 332 368 name = "serde_core" 333 369 version = "1.0.227" 334 370 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 372 408 ] 373 409 374 410 [[package]] 411 + name = "thiserror" 412 + version = "2.0.16" 413 + source = "registry+https://github.com/rust-lang/crates.io-index" 414 + checksum = "3467d614147380f2e4e374161426ff399c91084acd2363eaf549172b3d5e60c0" 415 + dependencies = [ 416 + "thiserror-impl", 417 + ] 418 + 419 + [[package]] 420 + name = "thiserror-impl" 421 + version = "2.0.16" 422 + source = "registry+https://github.com/rust-lang/crates.io-index" 423 + checksum = "6c5e1be1c48b9172ee610da68fd9cd2770e7a4056cb3fc98710ee6906f0c7960" 424 + dependencies = [ 425 + "proc-macro2", 426 + "quote", 427 + "syn", 428 + ] 429 + 430 + [[package]] 375 431 name = "unicode-ident" 376 432 version = "1.0.19" 377 433 source = "registry+https://github.com/rust-lang/crates.io-index" 378 434 checksum = "f63a545481291138910575129486daeaf8ac54aee4387fe7906919f7830c7d9d" 435 + 436 + [[package]] 437 + name = "unicode-width" 438 + version = "0.1.14" 439 + source = "registry+https://github.com/rust-lang/crates.io-index" 440 + checksum = "7dd6e30e90baa6f72411720665d41d89b9a3d039dc45b8faea1ddd07f617f6af" 379 441 380 442 [[package]] 381 443 name = "unsigned-varint"
+3 -1
crates/jacquard-common/Cargo.toml
··· 6 6 description.workspace = true 7 7 8 8 [dependencies] 9 - cid = "0.11.1" 9 + cid = { version = "0.11.1", features = ["serde", "std"] } 10 10 compact_str = "0.9.0" 11 + miette = "7.6.0" 11 12 multibase = "0.9.1" 12 13 multihash = "0.19.3" 13 14 regex = "1.11.3" 14 15 serde = { version = "1.0.227", features = ["derive"] } 16 + thiserror = "2.0.16"
+129 -2
crates/jacquard-common/src/blob.rs
··· 1 - use crate::CowStr; 1 + use crate::{CowStr, cid::Cid}; 2 + use compact_str::ToCompactString; 3 + #[allow(unused)] 4 + use serde::{Deserialize, Deserializer, Serialize, Serializer, de::Error}; 5 + #[allow(unused)] 6 + use std::{ 7 + borrow::Cow, 8 + fmt, 9 + hash::{Hash, Hasher}, 10 + ops::Deref, 11 + str::FromStr, 12 + }; 13 + 14 + #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)] 15 + #[serde(rename_all = "camelCase")] 16 + pub struct Blob<'b> { 17 + pub r#ref: Cid<'b>, 18 + #[serde(borrow)] 19 + pub mime_type: MimeType<'b>, 20 + pub size: usize, 21 + } 22 + 23 + impl<'r> BlobRef<'r> { 24 + pub fn blob(&self) -> &Blob<'r> { 25 + match self { 26 + BlobRef::Blob(blob) => blob, 27 + } 28 + } 29 + } 30 + 31 + /// Current, typed blob reference. 32 + #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)] 33 + #[serde(tag = "$type", rename_all = "lowercase")] 34 + pub enum BlobRef<'r> { 35 + #[serde(borrow)] 36 + Blob(Blob<'r>), 37 + } 2 38 3 39 /// Wrapper for file type 4 - #[derive(serde::Serialize, serde::Deserialize, Debug, Clone)] 40 + #[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize)] 41 + #[serde(transparent)] 5 42 pub struct MimeType<'m>(pub CowStr<'m>); 43 + 44 + impl<'m> MimeType<'m> { 45 + /// Fallible constructor, validates, borrows from input 46 + pub fn new(mime_type: &'m str) -> Result<MimeType<'m>, &'static str> { 47 + Ok(Self(CowStr::Borrowed(mime_type))) 48 + } 49 + 50 + /// Fallible constructor from an existing CowStr, borrows 51 + pub fn from_cowstr(mime_type: CowStr<'m>) -> Result<MimeType<'m>, &'static str> { 52 + Ok(Self(mime_type)) 53 + } 54 + 55 + /// Infallible constructor 56 + pub fn raw(mime_type: &'m str) -> Self { 57 + Self(CowStr::Borrowed(mime_type)) 58 + } 59 + 60 + pub fn as_str(&self) -> &str { 61 + { 62 + let this = &self.0; 63 + this 64 + } 65 + } 66 + } 67 + 68 + impl FromStr for MimeType<'_> { 69 + type Err = &'static str; 70 + 71 + /// Has to take ownership due to the lifetime constraints of the FromStr trait. 72 + fn from_str(s: &str) -> Result<Self, Self::Err> { 73 + Self::from_cowstr(CowStr::Owned(s.to_compact_string())) 74 + } 75 + } 76 + 77 + impl<'de, 'b> Deserialize<'de> for MimeType<'b> 78 + where 79 + 'de: 'b, 80 + { 81 + fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> 82 + where 83 + D: Deserializer<'de>, 84 + { 85 + let value = Deserialize::deserialize(deserializer)?; 86 + Self::new(value).map_err(D::Error::custom) 87 + } 88 + } 89 + 90 + impl fmt::Display for MimeType<'_> { 91 + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 92 + f.write_str(&self.0) 93 + } 94 + } 95 + 96 + impl<'m> From<MimeType<'m>> for String { 97 + fn from(value: MimeType<'m>) -> Self { 98 + value.0.to_string() 99 + } 100 + } 101 + 102 + impl<'m> From<MimeType<'m>> for CowStr<'m> { 103 + fn from(value: MimeType<'m>) -> Self { 104 + value.0 105 + } 106 + } 107 + 108 + impl From<String> for MimeType<'static> { 109 + fn from(value: String) -> Self { 110 + Self(CowStr::Owned(value.to_compact_string())) 111 + } 112 + } 113 + 114 + impl<'m> From<CowStr<'m>> for MimeType<'m> { 115 + fn from(value: CowStr<'m>) -> Self { 116 + Self(value) 117 + } 118 + } 119 + 120 + impl AsRef<str> for MimeType<'_> { 121 + fn as_ref(&self) -> &str { 122 + self.as_str() 123 + } 124 + } 125 + 126 + impl Deref for MimeType<'_> { 127 + type Target = str; 128 + 129 + fn deref(&self) -> &Self::Target { 130 + self.as_str() 131 + } 132 + }
+183 -8
crates/jacquard-common/src/cid.rs
··· 1 - use serde::{Deserialize, Serialize}; 2 - use std::str:: 1 + use std::{convert::Infallible, fmt, marker::PhantomData, ops::Deref, str::FromStr}; 2 + 3 + use compact_str::ToCompactString; 4 + use serde::{Deserialize, Deserializer, Serialize, Serializer, de::Visitor}; 3 5 4 6 pub use cid::Cid as IpldCid; 5 7 8 + use crate::CowStr; 9 + 6 10 /// raw 7 11 pub const ATP_CID_CODEC: u64 = 0x55; 8 12 ··· 10 14 pub const ATP_CID_HASH: u64 = 0x12; 11 15 12 16 /// base 32 13 - pub const ATP_CID_BASE: multibase::Base = multibase::Base::Base32; 17 + pub const ATP_CID_BASE: multibase::Base = multibase::Base::Base32Lower; 18 + 19 + #[derive(Debug, Clone, PartialEq, Eq, Hash)] 20 + /// Either the string form of a cid or the ipld form 21 + /// For the IPLD form we also cache the string representation for later use. 22 + /// 23 + /// Default on deserialization matches the format (if we get bytes, we try to decode) 24 + pub enum Cid<'c> { 25 + Ipld { cid: IpldCid, s: CowStr<'c> }, 26 + Str(CowStr<'c>), 27 + } 28 + 29 + #[derive(Debug, thiserror::Error, miette::Diagnostic)] 30 + pub enum Error { 31 + #[error("Invalid IPLD CID {:?}", 0)] 32 + Ipld(#[from] cid::Error), 33 + #[error("{:?}", 0)] 34 + Utf8(#[from] std::str::Utf8Error), 35 + } 36 + 37 + impl<'c> Cid<'c> { 38 + pub fn new(cid: &'c [u8]) -> Result<Self, Error> { 39 + if let Ok(cid) = IpldCid::try_from(cid.as_ref()) { 40 + Ok(Self::ipld(cid)) 41 + } else { 42 + let cid_str = CowStr::from_utf8(cid)?; 43 + Ok(Self::Str(cid_str)) 44 + } 45 + } 46 + pub fn ipld(cid: IpldCid) -> Self { 47 + let s = CowStr::Owned( 48 + cid.to_string_of_base(ATP_CID_BASE) 49 + .unwrap_or_default() 50 + .to_compact_string(), 51 + ); 52 + Self::Ipld { cid, s } 53 + } 54 + 55 + pub fn str(cid: &'c str) -> Self { 56 + Self::Str(CowStr::Borrowed(cid)) 57 + } 58 + 59 + pub fn cow_str(cid: CowStr<'c>) -> Self { 60 + Self::Str(cid) 61 + } 62 + 63 + pub fn to_ipld(&self) -> Result<IpldCid, cid::Error> { 64 + match self { 65 + Cid::Ipld { cid, s: _ } => Ok(cid.clone()), 66 + Cid::Str(cow_str) => IpldCid::try_from(cow_str.as_ref()), 67 + } 68 + } 69 + 70 + pub fn as_str(&self) -> &str { 71 + match self { 72 + Cid::Ipld { cid: _, s } => s.as_ref(), 73 + Cid::Str(cow_str) => cow_str.as_ref(), 74 + } 75 + } 76 + } 77 + 78 + impl std::fmt::Display for Cid<'_> { 79 + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 80 + match self { 81 + Cid::Ipld { cid: _, s } => f.write_str(&s), 82 + Cid::Str(cow_str) => f.write_str(&cow_str), 83 + } 84 + } 85 + } 86 + 87 + impl FromStr for Cid<'_> { 88 + type Err = Infallible; 89 + 90 + /// Has to take ownership due to the lifetime constraints of the FromStr trait. 91 + fn from_str(s: &str) -> Result<Self, Self::Err> { 92 + Ok(Cid::Str(CowStr::Owned(s.to_compact_string()))) 93 + } 94 + } 95 + 96 + impl Serialize for Cid<'_> { 97 + fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> 98 + where 99 + S: Serializer, 100 + { 101 + match self { 102 + Cid::Ipld { cid, s: _ } => cid.serialize(serializer), 103 + Cid::Str(cow_str) => cow_str.serialize(serializer), 104 + } 105 + } 106 + } 107 + 108 + // TODO: take another look at this, see if we can do more borrowed and such 109 + impl<'de> Deserialize<'de> for Cid<'_> { 110 + fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> 111 + where 112 + D: Deserializer<'de>, 113 + { 114 + struct StringOrBytes<T>(PhantomData<fn() -> T>); 115 + 116 + impl<'de, T> Visitor<'de> for StringOrBytes<T> 117 + where 118 + T: Deserialize<'de> + FromStr<Err = Infallible> + From<IpldCid>, 119 + { 120 + type Value = T; 121 + 122 + fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { 123 + formatter.write_str("either valid IPLD CID bytes or a str") 124 + } 125 + 126 + fn visit_str<E>(self, v: &str) -> Result<Self::Value, E> 127 + where 128 + E: serde::de::Error, 129 + { 130 + Ok(FromStr::from_str(v).unwrap()) 131 + } 132 + 133 + fn visit_bytes<E>(self, v: &[u8]) -> Result<Self::Value, E> 134 + where 135 + E: serde::de::Error, 136 + { 137 + let hash = cid::multihash::Multihash::from_bytes(v).map_err(|e| E::custom(e))?; 138 + Ok(T::from(IpldCid::new_v1(ATP_CID_CODEC, hash))) 139 + } 140 + } 141 + 142 + deserializer.deserialize_any(StringOrBytes(PhantomData)) 143 + } 144 + } 145 + 146 + impl From<Cid<'_>> for String { 147 + fn from(value: Cid) -> Self { 148 + let cow_str = match value { 149 + Cid::Ipld { cid: _, s } => s, 150 + Cid::Str(cow_str) => cow_str, 151 + }; 152 + cow_str.to_string() 153 + } 154 + } 155 + 156 + impl<'d> From<Cid<'d>> for CowStr<'d> { 157 + fn from(value: Cid<'d>) -> Self { 158 + match value { 159 + Cid::Ipld { cid: _, s } => s, 160 + Cid::Str(cow_str) => cow_str, 161 + } 162 + } 163 + } 164 + 165 + impl From<String> for Cid<'_> { 166 + fn from(value: String) -> Self { 167 + Cid::Str(CowStr::Owned(value.to_compact_string())) 168 + } 169 + } 170 + 171 + impl<'d> From<CowStr<'d>> for Cid<'d> { 172 + fn from(value: CowStr<'d>) -> Self { 173 + Cid::Str(value) 174 + } 175 + } 176 + 177 + impl From<IpldCid> for Cid<'_> { 178 + fn from(value: IpldCid) -> Self { 179 + Cid::ipld(value) 180 + } 181 + } 182 + 183 + impl AsRef<str> for Cid<'_> { 184 + fn as_ref(&self) -> &str { 185 + self.as_str() 186 + } 187 + } 188 + 189 + impl Deref for Cid<'_> { 190 + type Target = str; 14 191 15 - #[derive(Serialize, Deserialize, Debug, Clone)] 16 - #[serde(untagged)] 17 - pub enum Cid { 18 - Cid(IpldCid), 19 - CidStr(CowStr<'static>), 192 + fn deref(&self) -> &Self::Target { 193 + self.as_str() 194 + } 20 195 }
+52 -3
crates/jacquard-common/src/cowstr.rs
··· 1 + use compact_str::CompactString; 2 + use serde::{Deserialize, Deserializer, Serialize, Serializer, de::Error}; 1 3 use std::{ 2 4 borrow::Cow, 3 5 fmt, 4 6 hash::{Hash, Hasher}, 5 7 ops::Deref, 8 + str::FromStr, 6 9 }; 7 - 8 - use compact_str::CompactString; 9 10 10 11 use crate::IntoStatic; 11 12 ··· 208 209 } 209 210 } 210 211 211 - use serde::{Deserialize, Serialize}; 212 + /// Common trait implementations for Lexicon string formats that are newtype wrappers 213 + /// around `String`. 214 + macro_rules! string_newtype { 215 + ($name:ident) => { 216 + impl FromStr for $name<'_> { 217 + type Err = &'static str; 218 + 219 + fn from_str(s: &str) -> Result<Self, Self::Err> { 220 + Self::new(s) 221 + } 222 + } 223 + 224 + impl<'de> Deserialize<'de> for $name<'de> { 225 + fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> 226 + where 227 + D: Deserializer<'de>, 228 + { 229 + let value = Deserialize::deserialize(deserializer)?; 230 + Self::new(value).map_err(D::Error::custom) 231 + } 232 + } 233 + 234 + impl From<$name<'_>> for String { 235 + fn from(value: $name) -> Self { 236 + value.0.to_string() 237 + } 238 + } 239 + 240 + impl From<$name> for CowStr<'s> { 241 + fn from(value: $name) -> Self { 242 + value.0 243 + } 244 + } 245 + 246 + impl AsRef<str> for $name<'_> { 247 + fn as_ref(&self) -> &str { 248 + self.as_str() 249 + } 250 + } 251 + 252 + impl Deref for $name<'_> { 253 + type Target = str; 254 + 255 + fn deref(&self) -> &Self::Target { 256 + self.as_str() 257 + } 258 + } 259 + }; 260 + } 212 261 213 262 impl Serialize for CowStr<'_> { 214 263 #[inline]
+141 -1
crates/jacquard-common/src/did.rs
··· 1 - use crate::CowStr; 1 + use std::fmt; 2 + use std::sync::LazyLock; 3 + use std::{ops::Deref, str::FromStr}; 4 + 5 + use compact_str::ToCompactString; 6 + use serde::{Deserialize, Deserializer, Serialize, de::Error}; 2 7 8 + use crate::{CowStr, IntoStatic}; 9 + use regex::Regex; 10 + 11 + #[derive(Debug, Clone, PartialEq, Eq, Serialize, Hash)] 12 + #[serde(transparent)] 3 13 pub struct Did<'d>(CowStr<'d>); 14 + 15 + pub static DID_REGEX: LazyLock<Regex> = 16 + LazyLock::new(|| Regex::new(r"^did:[a-z]+:[a-zA-Z0-9._:%-]*[a-zA-Z0-9._-]$").unwrap()); 17 + 18 + impl<'d> Did<'d> { 19 + /// Fallible constructor, validates, borrows from input 20 + pub fn new(did: &'d str) -> Result<Self, &'static str> { 21 + if did.len() > 2048 { 22 + Err("DID too long") 23 + } else if !DID_REGEX.is_match(did) { 24 + Err("Invalid DID") 25 + } else { 26 + Ok(Self(CowStr::Borrowed(did))) 27 + } 28 + } 29 + 30 + /// Fallible constructor from an existing CowStr, clones and takes 31 + pub fn from_cowstr(did: CowStr<'d>) -> Result<Did<'d>, &'static str> { 32 + if did.len() > 2048 { 33 + Err("DID too long") 34 + } else if !DID_REGEX.is_match(&did) { 35 + Err("Invalid DID") 36 + } else { 37 + Ok(Self(did.into_static())) 38 + } 39 + } 40 + 41 + /// Infallible constructor for when you *know* the string is a valid DID. 42 + /// Will panic on invalid DIDs. If you're manually decoding atproto records 43 + /// or API values you know are valid (rather than using serde), this is the one to use. 44 + /// The From<String> and From<CowStr> impls use the same logic. 45 + pub fn raw(did: &'d str) -> Self { 46 + if did.len() > 2048 { 47 + panic!("DID too long") 48 + } else if !DID_REGEX.is_match(did) { 49 + panic!("Invalid DID") 50 + } else { 51 + Self(CowStr::Borrowed(did)) 52 + } 53 + } 54 + 55 + /// Infallible constructor for when you *know* the string is a valid DID. 56 + /// Marked unsafe because responsibility for upholding the invariant is on the developer. 57 + pub unsafe fn unchecked(did: &'d str) -> Self { 58 + Self(CowStr::Borrowed(did)) 59 + } 60 + 61 + pub fn as_str(&self) -> &str { 62 + { 63 + let this = &self.0; 64 + this 65 + } 66 + } 67 + } 68 + 69 + impl FromStr for Did<'_> { 70 + type Err = &'static str; 71 + 72 + /// Has to take ownership due to the lifetime constraints of the FromStr trait. 73 + /// Prefer `Did::new()` or `Did::raw` if you want to borrow. 74 + fn from_str(s: &str) -> Result<Self, Self::Err> { 75 + Self::from_cowstr(CowStr::Owned(s.to_compact_string())) 76 + } 77 + } 78 + 79 + impl<'de> Deserialize<'de> for Did<'de> { 80 + fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> 81 + where 82 + D: Deserializer<'de>, 83 + { 84 + let value = Deserialize::deserialize(deserializer)?; 85 + Self::new(value).map_err(D::Error::custom) 86 + } 87 + } 88 + 89 + impl fmt::Display for Did<'_> { 90 + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 91 + f.write_str(&self.0) 92 + } 93 + } 94 + 95 + impl<'d> From<Did<'d>> for String { 96 + fn from(value: Did<'d>) -> Self { 97 + value.0.to_string() 98 + } 99 + } 100 + 101 + impl<'d> From<Did<'d>> for CowStr<'d> { 102 + fn from(value: Did<'d>) -> Self { 103 + value.0 104 + } 105 + } 106 + 107 + impl From<String> for Did<'static> { 108 + fn from(value: String) -> Self { 109 + if value.len() > 2048 { 110 + panic!("DID too long") 111 + } else if !DID_REGEX.is_match(&value) { 112 + panic!("Invalid DID") 113 + } else { 114 + Self(CowStr::Owned(value.to_compact_string())) 115 + } 116 + } 117 + } 118 + 119 + impl<'d> From<CowStr<'d>> for Did<'d> { 120 + fn from(value: CowStr<'d>) -> Self { 121 + if value.len() > 2048 { 122 + panic!("DID too long") 123 + } else if !DID_REGEX.is_match(&value) { 124 + panic!("Invalid DID") 125 + } else { 126 + Self(value) 127 + } 128 + } 129 + } 130 + 131 + impl AsRef<str> for Did<'_> { 132 + fn as_ref(&self) -> &str { 133 + self.as_str() 134 + } 135 + } 136 + 137 + impl Deref for Did<'_> { 138 + type Target = str; 139 + 140 + fn deref(&self) -> &Self::Target { 141 + self.as_str() 142 + } 143 + }
+6
crates/jacquard-common/src/lib.rs
··· 1 1 pub mod aturi; 2 + #[macro_use] 2 3 pub mod cowstr; 4 + #[macro_use] 5 + pub mod blob; 6 + pub mod cid; 7 + 3 8 pub mod did; 4 9 pub mod handle; 10 + #[macro_use] 5 11 pub mod into_static; 6 12 pub mod link; 7 13 pub mod nsid;