A highly-optimized library for atproto DIDs.
at main 483 lines 18 kB view raw
1#![forbid(unsafe_op_in_unsafe_fn)] 2//! This library implements a memory-efficient container for atproto DIDs. 3//! 4//! The [`Did`] type is guaranteed to take up exactly 16 bytes. 5//! - `did:plc` is represented with just these 16 bytes 6//! - `did:web` uses up to 255 bytes of heap 7//! - other DID methods (not officially supported by atproto) allocate heap space as needed 8//! 9//! --- 10//! 11//! The most common DID in the Atmosphere (the atproto ecosystem) is `did:plc`. 12//! Currently, its identifier is exactly 24 characters of base32. At 5 bits per characters, 13//! that is 120 bits, or 15 bytes. This leaves just enough space for 14//! a single-byte discriminator, making this implementation highly-optimized 15//! for `did:plc` while also allowing it to represent other DID methods. 16//! **The PLC identifier is expected to be exactly 24 characters long.** 17//! 18//! Atproto currently only supports host-level `did:web` DIDs, so the identifier is a web domain. 19//! The maximum length of a web domain is 255 bytes, so a [`Did`] representing a `did:web` allocates 20//! the necessary space on the heap. **Parsing a `did:web` with an unsupported format fails**. 21//! _The only exception is `localhost`, which supports an optional port 22//! (as in `localhost%3A12345`)._ 23//! 24//! Other DID methods are currently unsupported by atproto, but not strictly disallowed. 25//! DID methods that are not `plc` or `web` have their method and identifier strings 26//! allocated on the heap. 27 28mod plc_codec; 29mod web_domain; 30 31use std::{ 32 cmp::Ordering, 33 fmt::{Debug, Display, Formatter}, 34 hash::{Hash, Hasher}, 35 mem::ManuallyDrop, 36 str::FromStr, 37}; 38 39use thiserror::Error; 40 41use crate::web_domain::DidWebDomain; 42 43/// A tightly-packed representation of DIDs for atproto. 44/// 45/// Currently, `did:plc`, the most common kind of DID in the atmosphere, 46/// has a base32 identifier with exactly 24 characters. 47/// At 5 bits per characters, that is 120 bits, or 15 bytes. 48/// This type is optimized for `did:plc`, and _is guaranteed_ 49/// to take up exactly 16 bytes. 50/// 51/// did:web has its domain part heap-allocated as a string of up to 255 bytes. 52/// Other DID methods (not currently supported by atproto) 53/// allocate both the method and identifier as strings. 54pub struct Did(DidInner); 55 56impl Did { 57 /// Which method does this [`Did`] represent? 58 /// 59 /// See [`Did::method()`] to get the string representation (especially for [`DidKind::Other`]). 60 pub const fn kind(&self) -> DidKind { 61 match &self.0 { 62 DidInner::Plc(_) => DidKind::Plc, 63 DidInner::Web { .. } => DidKind::Web, 64 DidInner::Other(_) => DidKind::Other, 65 } 66 } 67 68 /// The DID method string. 69 pub const fn method(&self) -> &str { 70 match &self.0 { 71 DidInner::Plc(_) => "plc", 72 DidInner::Web { .. } => "web", 73 DidInner::Other(other) => other.method.as_str(), 74 } 75 } 76 77 // TODO: did:plc: will now return the full 32 bytes - patch this to work with that 78 // /// The identifier part of the DID. 79 // /// 80 // /// Allocates a String for `did:plc` (since it is stored as bytes rather than base32). 81 // pub fn identifier<'a>(&'a self) -> Cow<'a, str> { 82 // match &self.0 { 83 // DidInner::Plc(bytes) => { 84 // let mut string = String::with_capacity(24); 85 // bytes_to_plc_ident_append(bytes, &mut string); 86 // Cow::Owned(string) 87 // } 88 // DidInner::Web { ptr, len } => { 89 // // SAFETY: ptr and len come from an unpacked DidWebDomain 90 // let web = unsafe { ManuallyDrop::new(DidWebDomain::from_raw_parts(*ptr, *len)) }; 91 // // SAFETY: web is not dropped, so the string slice remains valid 92 // // The string slice is valid for as long as &self 93 // let ident: &'a str = unsafe { transmute::<_, &'a str>(web.as_str()) }; 94 // ident.into() 95 // } 96 // DidInner::Other(other) => other.identifier.as_str().into(), 97 // } 98 // } 99 100 // /// Constructs a [`Did`] from a plain `did:plc` identifier (only the part after `did:plc:`). 101 // pub fn from_plc_ident(_ident: &str) -> Option<Self> { 102 // todo!("Reimplement or remove") 103 // // plc_ident_to_bytes(ident).map(DidInner::Plc).map(Did) 104 // } 105 106 /// Constructs a [`Did`] directly from the identifier's bytes. 107 pub fn from_plc_bytes(bytes: [u8; 15]) -> Self { 108 Did(DidInner::Plc(bytes)) 109 } 110 111 /// Constructs a [`Did`] from a web domain (the identifier after `did:web:`). 112 pub fn from_web_domain(domain_str: &str) -> Option<Self> { 113 // Try to parse the domain, 114 // then unpack the raw data for a more packed repr 115 // Storing the DidWebDomain directly in the enum makes it too large 116 let web = DidWebDomain::from_str(domain_str).ok()?; 117 let (ptr, len) = web.into_raw_parts(); 118 Some(Did(DidInner::Web { len, ptr })) 119 } 120 121 /// Constructs a [`Did`] from an arbitrary method and identifier. 122 pub fn from_other_method_ident(method: &str, ident: &str) -> Option<Self> { 123 // Method must be lowercase alphanumeric 124 if !method.chars().all(|c: char| c.is_ascii_lowercase() || c.is_ascii_digit()) { 125 return None; 126 } 127 128 // Identifier must be alphanumeric or one of: ._:%- 129 if !ident.chars().all(|c| c.is_ascii_alphanumeric() || "._:%-".contains(c)) { 130 return None; 131 } 132 133 // Identifier cannot end with % or : 134 if ident.ends_with('%') || ident.ends_with(':') { 135 return None; 136 } 137 138 // Identifier cannot be empty 139 if ident.is_empty() { 140 return None; 141 } 142 143 // Max identifier is (currently) 2048 144 if ident.len() > 2048 { 145 return None; 146 } 147 148 // Does not check percent sign encoding! 149 150 Some(Did(DidInner::Other(Box::new(DidOther { 151 method: method.to_owned(), 152 identifier: ident.to_owned(), 153 })))) 154 } 155} 156 157#[derive(Debug, Error)] 158pub enum DidParseError { 159 #[error("missing `did:`")] 160 MissingDid, 161 #[error("missing identifier")] 162 MissingIdent, 163 #[error("invalid identifier")] 164 InvalidIdent, 165} 166 167impl FromStr for Did { 168 type Err = DidParseError; 169 fn from_str(input: &str) -> Result<Self, Self::Err> { 170 // Eagerly try to parse did:plc 171 // This optimizes the code for the happy path 172 if let Some(input_32) = input.as_bytes().as_array::<32>() { 173 let plc_opt = plc_codec::decode_plc(input_32); 174 if let Ok(did_inner) = plc_opt.try_into() { 175 return Ok(Did(did_inner)); 176 } 177 } 178 179 let Some(method_ident) = input.strip_prefix("did:") else { 180 return Err(DidParseError::MissingDid); 181 }; 182 183 let Some((method, ident)) = method_ident.split_once(':') else { 184 return Err(DidParseError::MissingIdent); 185 }; 186 187 match method { 188 "plc" => { 189 // did:plc is parsed eagerly 190 // If we got here, the DID was not 32 bytes long 191 // We already checked for "did:" and the "plc" method, 192 // so the identifier must be wrong 193 Err(DidParseError::InvalidIdent) 194 } 195 "web" => Self::from_web_domain(ident).ok_or(DidParseError::InvalidIdent), 196 _ => Self::from_other_method_ident(method, ident).ok_or(DidParseError::InvalidIdent), 197 } 198 } 199} 200 201/// Which method does this [`Did`] represent? 202/// 203/// See [`Did::method()`] to get the string representation (especially for [`DidKind::Other`]). 204#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)] 205pub enum DidKind { 206 Plc, 207 Web, 208 Other, 209} 210 211#[repr(u8)] 212enum DidInner { 213 /// 1-byte discriminant + 15 identifier bytes 214 Plc([u8; 15]), 215 /// Contains an unpacked [`DidWebDomain`] 216 Web { len: u8, ptr: *const u8 }, 217 /// A single-word pointer to two heap-allocated strings 218 Other(Box<DidOther>), 219} 220 221#[derive(Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)] 222struct DidOther { 223 method: String, 224 identifier: String, 225} 226 227impl Drop for DidInner { 228 fn drop(&mut self) { 229 match self { 230 DidInner::Web { len, ptr } => { 231 // SAFETY: ptr and len come from an unpacked DidWebDomain 232 // did:web needs to run its special drop code 233 // This deallocates the memory at ptr! 234 _ = unsafe { DidWebDomain::from_raw_parts(*ptr, *len) }; 235 } 236 DidInner::Plc(_) | DidInner::Other(_) => { 237 // plc and other don't require special drop logic 238 } 239 } 240 } 241} 242 243impl PartialEq for Did { 244 fn eq(&self, other: &Self) -> bool { 245 match (&self.0, &other.0) { 246 (DidInner::Plc(a), DidInner::Plc(b)) => a == b, 247 ( 248 DidInner::Web { len: len_a, ptr: ptr_a }, 249 DidInner::Web { len: len_b, ptr: ptr_b }, 250 ) => { 251 // SAFETY: ptr and len come from an unpacked DidWebDomain 252 // ManuallyDrop prevents dropping the contents 253 let a = unsafe { ManuallyDrop::new(DidWebDomain::from_raw_parts(*ptr_a, *len_a)) }; 254 let b = unsafe { ManuallyDrop::new(DidWebDomain::from_raw_parts(*ptr_b, *len_b)) }; 255 a.eq(&b) 256 } 257 (DidInner::Other(a), DidInner::Other(b)) => a.eq(b), 258 _ => false, 259 } 260 } 261} 262 263impl Eq for Did {} 264 265impl PartialOrd for Did { 266 fn partial_cmp(&self, other: &Self) -> Option<Ordering> { 267 Some(self.cmp(other)) 268 } 269} 270 271impl Ord for Did { 272 fn cmp(&self, other: &Self) -> Ordering { 273 match (self, other) { 274 // plc & plc -> compare bytes 275 (Did(DidInner::Plc(a)), Did(DidInner::Plc(b))) => a.cmp(b), 276 // web & web -> compare domains 277 ( 278 Did(DidInner::Web { len: len_a, ptr: ptr_a }), 279 Did(DidInner::Web { len: len_b, ptr: ptr_b }), 280 ) => { 281 // SAFETY: ptr and len come from an unpacked DidWebDomain 282 // ManuallyDrop prevents dropping the contents 283 let a = unsafe { ManuallyDrop::new(DidWebDomain::from_raw_parts(*ptr_a, *len_a)) }; 284 let b = unsafe { ManuallyDrop::new(DidWebDomain::from_raw_parts(*ptr_b, *len_b)) }; 285 a.cmp(&b) 286 } 287 // other & other -> compare methods and identifiers 288 (Did(DidInner::Other(a)), Did(DidInner::Other(b))) => a.cmp(b), 289 // different kinds -> just compare the method 290 (a, b) => { 291 debug_assert_ne!(a.kind(), b.kind()); 292 a.method().cmp(b.method()) 293 } 294 } 295 } 296} 297 298impl Hash for Did { 299 fn hash<H: Hasher>(&self, state: &mut H) { 300 match &self.0 { 301 DidInner::Plc(bytes) => { 302 state.write(b"plc"); 303 bytes.hash(state); 304 } 305 DidInner::Web { ptr, len } => { 306 state.write(b"web"); 307 let web = unsafe { ManuallyDrop::new(DidWebDomain::from_raw_parts(*ptr, *len)) }; 308 web.hash(state); 309 } 310 DidInner::Other(other) => { 311 other.method.hash(state); 312 } 313 } 314 } 315} 316 317impl Clone for Did { 318 fn clone(&self) -> Self { 319 Did(match &self.0 { 320 DidInner::Plc(data) => DidInner::Plc(*data), 321 DidInner::Web { len, ptr } => { 322 // SAFETY: ptr and len come from an unpacked DidWebDomain 323 // ManuallyDrop prevents dropping the contents 324 // The implementation of DidWebDomain::clone is used 325 let web = unsafe { ManuallyDrop::new(DidWebDomain::from_raw_parts(*ptr, *len)) }; 326 let web = web.clone(); 327 // into_raw_parts doesn't drop the data 328 let (ptr, len) = ManuallyDrop::into_inner(web).into_raw_parts(); 329 DidInner::Web { len, ptr } 330 } 331 DidInner::Other(other) => DidInner::Other(Box::clone(other)), 332 }) 333 } 334} 335 336impl Debug for Did { 337 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { 338 match &self.0 { 339 DidInner::Plc(data) => f.debug_tuple("DidInner::Plc").field(data).finish(), 340 DidInner::Web { len, ptr } => { 341 // SAFETY: ptr and len come from an unpacked DidWebDomain 342 // ManuallyDrop prevents dropping the contents 343 let web = unsafe { ManuallyDrop::new(DidWebDomain::from_raw_parts(*ptr, *len)) }; 344 f.debug_tuple("DidInner::Web").field(&*web).finish() 345 } 346 DidInner::Other(other) => f.debug_tuple("DidInner::Other").field(other).finish(), 347 } 348 } 349} 350 351impl Display for Did { 352 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { 353 match &self.0 { 354 DidInner::Plc(data) => { 355 let mut bytes = vec![0u8; 32]; 356 plc_codec::encode_plc(DidInner::Plc(*data), bytes.as_mut_array::<32>().unwrap()); 357 let string = String::from_utf8(bytes).expect("Encoded value should be UTF-8"); 358 write!(f, "{string}") 359 } 360 DidInner::Web { len, ptr } => { 361 // SAFETY: ptr and len come from an unpacked DidWebDomain 362 // ManuallyDrop prevents dropping the contents 363 let web = unsafe { ManuallyDrop::new(DidWebDomain::from_raw_parts(*ptr, *len)) }; 364 write!(f, "did:web:{}", web.as_str()) 365 } 366 DidInner::Other(other) => { 367 write!(f, "did:{}:{}", other.method, other.identifier) 368 } 369 } 370 } 371} 372 373#[cfg(test)] 374mod tests { 375 use super::*; 376 377 #[test] 378 fn did_type_size() { 379 assert_eq!(size_of::<Did>(), 16); 380 } 381 382 fn did_syntax_valid_lines() -> impl Iterator<Item = &'static str> { 383 include_str!("./did_syntax_valid.txt") 384 .lines() 385 .filter(|l| !l.is_empty() && !l.starts_with('#')) 386 } 387 388 fn did_syntax_invalid_lines() -> impl Iterator<Item = &'static str> { 389 include_str!("./did_syntax_invalid.txt") 390 .lines() 391 .filter(|l| !l.is_empty() && !l.starts_with('#')) 392 } 393 394 #[test_case::test_case("did:plc:c6te24qg5hx54qgegqylpqkx" => [0x17, 0xa6, 0x4d, 0x72, 0x06, 0xe9, 0xef, 0xde, 0x40, 0xc4, 0x34, 0x30, 0xb7, 0xc1, 0x57] 395 )] 396 #[test_case::test_case("did:plc:abcdefghijklmnopqrstuvwx" => [0x00, 0x44, 0x32, 0x14, 0xc7, 0x42, 0x54, 0xb6, 0x35, 0xcf, 0x84, 0x65, 0x3a, 0x56, 0xd7] 397 )] 398 #[test_case::test_case("did:plc:abcdefghabcdefghabcdefgh" => [0x00, 0x44, 0x32, 0x14, 0xc7, 0x00, 0x44, 0x32, 0x14, 0xc7, 0x00, 0x44, 0x32, 0x14, 0xc7] 399 )] 400 #[test_case::test_case("did:plc:234567234567234567234567" => [0xd6, 0xf9, 0xdf, 0x7f, 0x5b, 0xe7, 0x7d, 0xfd, 0x6f, 0x9d, 0xf7, 0xf5, 0xbe, 0x77, 0xdf] 401 )] 402 fn plc_str_to_bytes_ok(ident: &str) -> [u8; 15] { 403 let DidInner::Plc(bytes) = Did::from_str(ident).unwrap().0 else { panic!() }; 404 bytes 405 } 406 407 #[test_case::test_case(&[0x17u8, 0xa6, 0x4d, 0x72, 0x06, 0xe9, 0xef, 0xde, 0x40, 0xc4, 0x34, 0x30, 0xb7, 0xc1, 0x57] => "did:plc:c6te24qg5hx54qgegqylpqkx" 408 )] 409 #[test_case::test_case(&[0x00u8, 0x44, 0x32, 0x14, 0xc7, 0x42, 0x54, 0xb6, 0x35, 0xcf, 0x84, 0x65, 0x3a, 0x56, 0xd7] => "did:plc:abcdefghijklmnopqrstuvwx" 410 )] 411 #[test_case::test_case(&[0x00u8, 0x44, 0x32, 0x14, 0xc7, 0x00, 0x44, 0x32, 0x14, 0xc7, 0x00, 0x44, 0x32, 0x14, 0xc7] => "did:plc:abcdefghabcdefghabcdefgh" 412 )] 413 #[test_case::test_case(&[0xd6u8, 0xf9, 0xdf, 0x7f, 0x5b, 0xe7, 0x7d, 0xfd, 0x6f, 0x9d, 0xf7, 0xf5, 0xbe, 0x77, 0xdf] => "did:plc:234567234567234567234567" 414 )] 415 fn bytes_to_plc_str_ok(bytes: &[u8; 15]) -> String { 416 Did(DidInner::Plc(bytes.to_owned())).to_string() 417 } 418 419 #[test] 420 fn did_syntax_valid() { 421 for l in did_syntax_valid_lines() { 422 Did::from_str(l).unwrap_or_else(|_| panic!("Parsing {l} failed")); 423 } 424 } 425 426 #[test] 427 fn did_syntax_invalid() { 428 for l in did_syntax_invalid_lines() { 429 Did::from_str(l).expect_err(&format!("Parsing {l} should have failed")); 430 } 431 } 432 433 #[test] 434 fn did_equals() { 435 for l in did_syntax_valid_lines() { 436 let a = Did::from_str(l).unwrap_or_else(|_| panic!("Parsing {l} failed")); 437 let b = Did::from_str(l).unwrap_or_else(|_| panic!("Parsing {l} failed")); 438 assert_eq!(a, b); 439 } 440 } 441 442 #[test] 443 fn did_clone_equals() { 444 for l in did_syntax_valid_lines() { 445 let a = Did::from_str(l).unwrap_or_else(|_| panic!("Parsing {l} failed")); 446 let b = a.clone(); 447 assert_eq!(a, b, "cloned item should be equal"); 448 } 449 } 450 451 #[test] 452 fn did_clone_drop_safe() { 453 for l in did_syntax_valid_lines() { 454 let a = Did::from_str(l).unwrap_or_else(|_| panic!("Parsing {l} failed")); 455 let b = a.clone(); 456 drop(a); 457 assert_eq!(b.to_string(), l, "cloned item roundtrip should work after drop"); 458 459 let a = Did::from_str(l).unwrap_or_else(|_| panic!("Parsing {l} failed")); 460 let b = a.clone(); 461 drop(b); 462 assert_eq!(a.to_string(), l, "original item roundtrip should work after drop"); 463 } 464 } 465 466 #[test] 467 fn did_str_roundtrip() { 468 for l in did_syntax_valid_lines() { 469 let did = Did::from_str(l).unwrap_or_else(|_| panic!("Parsing {l} failed")); 470 assert_eq!(did.to_string(), l); 471 } 472 } 473 474 #[test] 475 #[ignore] 476 fn did_parts_roundtrip() { 477 // TODO Fix once identifier() is fixed 478 // for l in did_syntax_valid_lines() { 479 // let did = Did::from_str(l).expect(&format!("Parsing {l} failed")); 480 // assert_eq!(format!("did:{}:{}", did.method(), did.identifier()), l); 481 // } 482 } 483}