#![forbid(unsafe_op_in_unsafe_fn)] //! This library implements a memory-efficient container for atproto DIDs. //! //! The [`Did`] type is guaranteed to take up exactly 16 bytes. //! - `did:plc` is represented with just these 16 bytes //! - `did:web` uses up to 255 bytes of heap //! - other DID methods (not officially supported by atproto) allocate heap space as needed //! //! --- //! //! The most common DID in the Atmosphere (the atproto ecosystem) is `did:plc`. //! Currently, its identifier is exactly 24 characters of base32. At 5 bits per characters, //! that is 120 bits, or 15 bytes. This leaves just enough space for //! a single-byte discriminator, making this implementation highly-optimized //! for `did:plc` while also allowing it to represent other DID methods. //! **The PLC identifier is expected to be exactly 24 characters long.** //! //! Atproto currently only supports host-level `did:web` DIDs, so the identifier is a web domain. //! The maximum length of a web domain is 255 bytes, so a [`Did`] representing a `did:web` allocates //! the necessary space on the heap. **Parsing a `did:web` with an unsupported format fails**. //! _The only exception is `localhost`, which supports an optional port //! (as in `localhost%3A12345`)._ //! //! Other DID methods are currently unsupported by atproto, but not strictly disallowed. //! DID methods that are not `plc` or `web` have their method and identifier strings //! allocated on the heap. //! //! --- //! //! [`Did`] supports `serde` using an optional feature. //! Values are serialized and deserialized using the standard string representation. mod plc_codec; mod web_domain; #[cfg(feature = "serde")] mod serde; use std::{ cmp::Ordering, fmt::{Debug, Display, Formatter}, hash::{Hash, Hasher}, mem::ManuallyDrop, str::FromStr, }; use thiserror::Error; use crate::web_domain::DidWebDomain; /// A tightly-packed representation of DIDs for atproto. /// /// Currently, `did:plc`, the most common kind of DID in the atmosphere, /// has a base32 identifier with exactly 24 characters. /// At 5 bits per characters, that is 120 bits, or 15 bytes. /// This type is optimized for `did:plc`, and _is guaranteed_ /// to take up exactly 16 bytes. /// /// did:web has its domain part heap-allocated as a string of up to 255 bytes. /// Other DID methods (not currently supported by atproto) /// allocate both the method and identifier as strings. pub struct Did(DidInner); impl Did { /// Which method does this [`Did`] represent? /// /// See [`Did::method()`] to get the string representation (especially for [`DidKind::Other`]). pub fn kind(&self) -> DidKind { match &self.0 { DidInner::Plc(_) => DidKind::Plc, DidInner::Web { .. } => DidKind::Web, DidInner::Other(_) => DidKind::Other, } } /// The DID method string. pub fn method(&self) -> &str { match &self.0 { DidInner::Plc(_) => "plc", DidInner::Web { .. } => "web", DidInner::Other(other) => other.method_ident().0, } } /// Constructs a [`Did`] directly from the identifier's bytes. pub fn from_plc_bytes(bytes: [u8; 15]) -> Self { Did(DidInner::Plc(bytes)) } /// Constructs a [`Did`] from a web domain (the identifier after `did:web:`). pub fn from_web_domain(domain_str: &str) -> Option { // Try to parse the domain, // then unpack the raw data for a more packed repr // Storing the DidWebDomain directly in the enum makes it too large let web = DidWebDomain::from_str(domain_str).ok()?; let (ptr, len) = web.into_raw_parts(); Some(Did(DidInner::Web { len, ptr })) } } /// Validates a DID `method:ident` string fn validate_method_ident(method_ident: &str) -> Result<(), ParseDidError> { fn validate_method(method: &str) -> bool { // Method must be lowercase alphanumeric method.chars().all(|c: char| c.is_ascii_lowercase() || c.is_ascii_digit()) } fn validate_ident(ident: &str) -> bool { // Identifier must be alphanumeric or one of: ._:%- // Identifier cannot end with % or : // Identifier cannot be empty // Max identifier is (currently) 2048 // Does not currently check percent sign encoding! ident.chars().all(|c| c.is_ascii_alphanumeric() || "._:%-".contains(c)) && !ident.ends_with('%') && !ident.ends_with(':') && !ident.is_empty() && ident.len() <= 2048 } // A missing `:` is considered an invalid (nonexistent) identifier let (method, ident) = method_ident.split_once(':').ok_or(DidErrorKind::InvalidIdent)?; validate_method(method).then_some(()).ok_or(DidErrorKind::InvalidMethod)?; validate_ident(ident).then_some(()).ok_or(DidErrorKind::InvalidIdent)?; Ok(()) } /// An error which can be returned when parsing a DID. /// /// Use [`ParseDidError::kind()`] to get error details. #[derive(Clone, Debug, Error, Eq, PartialEq)] #[error(transparent)] pub struct ParseDidError { #[from] kind: DidErrorKind, } impl ParseDidError { pub const fn kind(&self) -> &DidErrorKind { &self.kind } } /// Details about why a DID failed to parse. #[derive(Clone, Debug, Error, Eq, PartialEq)] #[non_exhaustive] pub enum DidErrorKind { #[error("missing `did:`")] MissingDid, #[error("invalid DID method")] InvalidMethod, #[error("invalid identifier")] InvalidIdent, } impl FromStr for Did { type Err = ParseDidError; fn from_str(input: &str) -> Result { // Eagerly try to parse did:plc // This optimizes the code for the happy path if let Some(input_32) = input.as_bytes().as_array::<32>() { let plc_opt = plc_codec::decode_plc(input_32); if let Ok(did_inner) = plc_opt.try_into() { return Ok(Did(did_inner)); } } let Some(method_ident) = input.strip_prefix("did:") else { return Err(DidErrorKind::MissingDid.into()); }; let Some((method, ident)) = method_ident.split_once(':') else { return Err(DidErrorKind::InvalidIdent.into()); }; match method { "plc" => { // did:plc is parsed eagerly // If we got here, the DID was not 32 bytes long // We already checked for "did:" and the "plc" method, // so the identifier must be wrong Err(DidErrorKind::InvalidIdent.into()) } "web" => Self::from_web_domain(ident).ok_or(DidErrorKind::InvalidIdent.into()), _ => validate_method_ident(method_ident) .map(|_| Did(DidInner::Other(Box::new(DidOther(method_ident.to_owned()))))), } } } /// Which method does this [`Did`] represent? /// /// See [`Did::method()`] to get the string representation (especially for [`DidKind::Other`]). /// /// Atproto currently officially supports `did:plc` and `did:web`, /// but more methods may be supported in the future. #[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)] #[non_exhaustive] pub enum DidKind { Plc, Web, Other, } #[repr(u8)] enum DidInner { /// 1-byte discriminant + 15 identifier bytes Plc([u8; 15]), /// Contains an unpacked [`DidWebDomain`] Web { len: u8, ptr: *const u8 }, /// A single-word pointer to a heap-allocated string Other(Box), } /// A simple String wrapper. Contains at least one `:` character /// (between the method and the identifier). #[derive(Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)] struct DidOther(String); impl DidOther { /// DID method, the part before the `:` #[inline] pub fn method_ident(&self) -> (&str, &str) { self.0.split_once(':').unwrap() } /// The DID string without the `did:` prefix. #[inline] pub fn as_str(&self) -> &str { self.as_ref() } } impl AsRef for DidOther { fn as_ref(&self) -> &str { &self.0 } } impl Drop for DidInner { fn drop(&mut self) { match self { DidInner::Web { len, ptr } => { // SAFETY: ptr and len come from an unpacked DidWebDomain // did:web needs to run its special drop code // This deallocates the memory at ptr! _ = unsafe { DidWebDomain::from_raw_parts(*ptr, *len) }; } DidInner::Plc(_) | DidInner::Other(_) => { // plc and other don't require special drop logic } } } } impl PartialEq for Did { fn eq(&self, other: &Self) -> bool { match (&self.0, &other.0) { (DidInner::Plc(a), DidInner::Plc(b)) => a == b, ( DidInner::Web { len: len_a, ptr: ptr_a }, DidInner::Web { len: len_b, ptr: ptr_b }, ) => { // SAFETY: ptr and len come from an unpacked DidWebDomain // ManuallyDrop prevents dropping the contents let a = unsafe { ManuallyDrop::new(DidWebDomain::from_raw_parts(*ptr_a, *len_a)) }; let b = unsafe { ManuallyDrop::new(DidWebDomain::from_raw_parts(*ptr_b, *len_b)) }; a.eq(&b) } (DidInner::Other(a), DidInner::Other(b)) => a.eq(b), _ => false, } } } impl Eq for Did {} impl PartialOrd for Did { fn partial_cmp(&self, other: &Self) -> Option { Some(self.cmp(other)) } } impl Ord for Did { fn cmp(&self, other: &Self) -> Ordering { match (self, other) { // plc & plc -> compare bytes (Did(DidInner::Plc(a)), Did(DidInner::Plc(b))) => a.cmp(b), // web & web -> compare domains ( Did(DidInner::Web { len: len_a, ptr: ptr_a }), Did(DidInner::Web { len: len_b, ptr: ptr_b }), ) => { // SAFETY: ptr and len come from an unpacked DidWebDomain // ManuallyDrop prevents dropping the contents let a = unsafe { ManuallyDrop::new(DidWebDomain::from_raw_parts(*ptr_a, *len_a)) }; let b = unsafe { ManuallyDrop::new(DidWebDomain::from_raw_parts(*ptr_b, *len_b)) }; a.cmp(&b) } // other & other -> compare methods and identifiers (Did(DidInner::Other(a)), Did(DidInner::Other(b))) => a.cmp(b), // different kinds -> just compare the method (a, b) => { debug_assert_ne!(a.kind(), b.kind()); a.method().cmp(b.method()) } } } } impl Hash for Did { fn hash(&self, state: &mut H) { match &self.0 { DidInner::Plc(bytes) => { state.write(b"plc"); bytes.hash(state); } DidInner::Web { ptr, len } => { state.write(b"web"); let web = unsafe { ManuallyDrop::new(DidWebDomain::from_raw_parts(*ptr, *len)) }; web.hash(state); } DidInner::Other(other) => { other.hash(state); } } } } impl Clone for Did { fn clone(&self) -> Self { Did(match &self.0 { DidInner::Plc(data) => DidInner::Plc(*data), DidInner::Web { len, ptr } => { // SAFETY: ptr and len come from an unpacked DidWebDomain // ManuallyDrop prevents dropping the contents // The implementation of DidWebDomain::clone is used let web = unsafe { ManuallyDrop::new(DidWebDomain::from_raw_parts(*ptr, *len)) }; let web = web.clone(); // into_raw_parts doesn't drop the data let (ptr, len) = ManuallyDrop::into_inner(web).into_raw_parts(); DidInner::Web { len, ptr } } DidInner::Other(other) => DidInner::Other(Box::clone(other)), }) } } impl Debug for Did { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { match &self.0 { DidInner::Plc(data) => f.debug_tuple("DidInner::Plc").field(data).finish(), DidInner::Web { len, ptr } => { // SAFETY: ptr and len come from an unpacked DidWebDomain // ManuallyDrop prevents dropping the contents let web = unsafe { ManuallyDrop::new(DidWebDomain::from_raw_parts(*ptr, *len)) }; f.debug_tuple("DidInner::Web").field(&*web).finish() } DidInner::Other(other) => f.debug_tuple("DidInner::Other").field(other).finish(), } } } impl Display for Did { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { match &self.0 { DidInner::Plc(data) => { // A custom ToString impl is currently not possible // trying to introduce more unsafe here didn't yield any significant perf+ // Adding more unsafe for negligible gains doesn't feel worth it let mut bytes = vec![0u8; 32]; plc_codec::encode_plc(DidInner::Plc(*data), bytes.as_mut_array::<32>().unwrap()); let string = String::from_utf8(bytes).expect("Encoded value should be UTF-8"); write!(f, "{string}") } DidInner::Web { len, ptr } => { // SAFETY: ptr and len come from an unpacked DidWebDomain // ManuallyDrop prevents dropping the contents let web = unsafe { ManuallyDrop::new(DidWebDomain::from_raw_parts(*ptr, *len)) }; write!(f, "did:web:{}", web.as_str()) } DidInner::Other(other) => { write!(f, "did:{}", other.as_str()) } } } } #[cfg(test)] mod tests { use super::*; #[test] fn did_type_size() { assert_eq!(size_of::(), 16); } fn did_syntax_valid_lines() -> impl Iterator { include_str!("./did_syntax_valid.txt") .lines() .filter(|l| !l.is_empty() && !l.starts_with('#')) } fn did_syntax_invalid_lines() -> impl Iterator { include_str!("./did_syntax_invalid.txt") .lines() .filter(|l| !l.is_empty() && !l.starts_with('#')) } #[test_case::test_case("did:plc:c6te24qg5hx54qgegqylpqkx" => [0x17, 0xa6, 0x4d, 0x72, 0x06, 0xe9, 0xef, 0xde, 0x40, 0xc4, 0x34, 0x30, 0xb7, 0xc1, 0x57] )] #[test_case::test_case("did:plc:abcdefghijklmnopqrstuvwx" => [0x00, 0x44, 0x32, 0x14, 0xc7, 0x42, 0x54, 0xb6, 0x35, 0xcf, 0x84, 0x65, 0x3a, 0x56, 0xd7] )] #[test_case::test_case("did:plc:abcdefghabcdefghabcdefgh" => [0x00, 0x44, 0x32, 0x14, 0xc7, 0x00, 0x44, 0x32, 0x14, 0xc7, 0x00, 0x44, 0x32, 0x14, 0xc7] )] #[test_case::test_case("did:plc:234567234567234567234567" => [0xd6, 0xf9, 0xdf, 0x7f, 0x5b, 0xe7, 0x7d, 0xfd, 0x6f, 0x9d, 0xf7, 0xf5, 0xbe, 0x77, 0xdf] )] fn plc_str_to_bytes_ok(ident: &str) -> [u8; 15] { let DidInner::Plc(bytes) = Did::from_str(ident).unwrap().0 else { panic!() }; bytes } #[test_case::test_case(&[0x17u8, 0xa6, 0x4d, 0x72, 0x06, 0xe9, 0xef, 0xde, 0x40, 0xc4, 0x34, 0x30, 0xb7, 0xc1, 0x57] => "did:plc:c6te24qg5hx54qgegqylpqkx" )] #[test_case::test_case(&[0x00u8, 0x44, 0x32, 0x14, 0xc7, 0x42, 0x54, 0xb6, 0x35, 0xcf, 0x84, 0x65, 0x3a, 0x56, 0xd7] => "did:plc:abcdefghijklmnopqrstuvwx" )] #[test_case::test_case(&[0x00u8, 0x44, 0x32, 0x14, 0xc7, 0x00, 0x44, 0x32, 0x14, 0xc7, 0x00, 0x44, 0x32, 0x14, 0xc7] => "did:plc:abcdefghabcdefghabcdefgh" )] #[test_case::test_case(&[0xd6u8, 0xf9, 0xdf, 0x7f, 0x5b, 0xe7, 0x7d, 0xfd, 0x6f, 0x9d, 0xf7, 0xf5, 0xbe, 0x77, 0xdf] => "did:plc:234567234567234567234567" )] fn bytes_to_plc_str_ok(bytes: &[u8; 15]) -> String { Did(DidInner::Plc(bytes.to_owned())).to_string() } #[test] fn did_syntax_valid() { for l in did_syntax_valid_lines() { Did::from_str(l).unwrap_or_else(|_| panic!("Parsing {l} failed")); } } #[test] fn did_syntax_invalid() { for l in did_syntax_invalid_lines() { Did::from_str(l).expect_err(&format!("Parsing {l} should have failed")); } } #[test] fn did_equals() { for l in did_syntax_valid_lines() { let a = Did::from_str(l).unwrap_or_else(|_| panic!("Parsing {l} failed")); let b = Did::from_str(l).unwrap_or_else(|_| panic!("Parsing {l} failed")); assert_eq!(a, b); } } #[test] fn did_clone_equals() { for l in did_syntax_valid_lines() { let a = Did::from_str(l).unwrap_or_else(|_| panic!("Parsing {l} failed")); let b = a.clone(); assert_eq!(a, b, "cloned item should be equal"); } } #[test] fn did_clone_drop_safe() { for l in did_syntax_valid_lines() { let a = Did::from_str(l).unwrap_or_else(|_| panic!("Parsing {l} failed")); let b = a.clone(); drop(a); assert_eq!(b.to_string(), l, "cloned item roundtrip should work after drop"); let a = Did::from_str(l).unwrap_or_else(|_| panic!("Parsing {l} failed")); let b = a.clone(); drop(b); assert_eq!(a.to_string(), l, "original item roundtrip should work after drop"); } } #[test] fn did_str_roundtrip() { for l in did_syntax_valid_lines() { let did = Did::from_str(l).unwrap_or_else(|_| panic!("Parsing {l} failed")); assert_eq!(did.to_string(), l); } } }