A highly-optimized library for atproto DIDs.
1#![forbid(unsafe_op_in_unsafe_fn)]
2//! This library implements a memory-efficient container for atproto DIDs.
3//!
4//! The [`Did`] type is guaranteed to take up exactly 16 bytes.
5//! - `did:plc` is represented with just these 16 bytes
6//! - `did:web` uses up to 255 bytes of heap
7//! - other DID methods (not officially supported by atproto) allocate heap space as needed
8//!
9//! ---
10//!
11//! The most common DID in the Atmosphere (the atproto ecosystem) is `did:plc`.
12//! Currently, its identifier is exactly 24 characters of base32. At 5 bits per characters,
13//! that is 120 bits, or 15 bytes. This leaves just enough space for
14//! a single-byte discriminator, making this implementation highly-optimized
15//! for `did:plc` while also allowing it to represent other DID methods.
16//! **The PLC identifier is expected to be exactly 24 characters long.**
17//!
18//! Atproto currently only supports host-level `did:web` DIDs, so the identifier is a web domain.
19//! The maximum length of a web domain is 255 bytes, so a [`Did`] representing a `did:web` allocates
20//! the necessary space on the heap. **Parsing a `did:web` with an unsupported format fails**.
21//! _The only exception is `localhost`, which supports an optional port
22//! (as in `localhost%3A12345`)._
23//!
24//! Other DID methods are currently unsupported by atproto, but not strictly disallowed.
25//! DID methods that are not `plc` or `web` have their method and identifier strings
26//! allocated on the heap.
27
28mod plc_codec;
29mod web_domain;
30
31use std::{
32 cmp::Ordering,
33 fmt::{Debug, Display, Formatter},
34 hash::{Hash, Hasher},
35 mem::ManuallyDrop,
36 str::FromStr,
37};
38
39use thiserror::Error;
40
41use crate::web_domain::DidWebDomain;
42
43/// A tightly-packed representation of DIDs for atproto.
44///
45/// Currently, `did:plc`, the most common kind of DID in the atmosphere,
46/// has a base32 identifier with exactly 24 characters.
47/// At 5 bits per characters, that is 120 bits, or 15 bytes.
48/// This type is optimized for `did:plc`, and _is guaranteed_
49/// to take up exactly 16 bytes.
50///
51/// did:web has its domain part heap-allocated as a string of up to 255 bytes.
52/// Other DID methods (not currently supported by atproto)
53/// allocate both the method and identifier as strings.
54pub struct Did(DidInner);
55
56impl Did {
57 /// Which method does this [`Did`] represent?
58 ///
59 /// See [`Did::method()`] to get the string representation (especially for [`DidKind::Other`]).
60 pub const fn kind(&self) -> DidKind {
61 match &self.0 {
62 DidInner::Plc(_) => DidKind::Plc,
63 DidInner::Web { .. } => DidKind::Web,
64 DidInner::Other(_) => DidKind::Other,
65 }
66 }
67
68 /// The DID method string.
69 pub const fn method(&self) -> &str {
70 match &self.0 {
71 DidInner::Plc(_) => "plc",
72 DidInner::Web { .. } => "web",
73 DidInner::Other(other) => other.method.as_str(),
74 }
75 }
76
77 // TODO: did:plc: will now return the full 32 bytes - patch this to work with that
78 // /// The identifier part of the DID.
79 // ///
80 // /// Allocates a String for `did:plc` (since it is stored as bytes rather than base32).
81 // pub fn identifier<'a>(&'a self) -> Cow<'a, str> {
82 // match &self.0 {
83 // DidInner::Plc(bytes) => {
84 // let mut string = String::with_capacity(24);
85 // bytes_to_plc_ident_append(bytes, &mut string);
86 // Cow::Owned(string)
87 // }
88 // DidInner::Web { ptr, len } => {
89 // // SAFETY: ptr and len come from an unpacked DidWebDomain
90 // let web = unsafe { ManuallyDrop::new(DidWebDomain::from_raw_parts(*ptr, *len)) };
91 // // SAFETY: web is not dropped, so the string slice remains valid
92 // // The string slice is valid for as long as &self
93 // let ident: &'a str = unsafe { transmute::<_, &'a str>(web.as_str()) };
94 // ident.into()
95 // }
96 // DidInner::Other(other) => other.identifier.as_str().into(),
97 // }
98 // }
99
100 // /// Constructs a [`Did`] from a plain `did:plc` identifier (only the part after `did:plc:`).
101 // pub fn from_plc_ident(_ident: &str) -> Option<Self> {
102 // todo!("Reimplement or remove")
103 // // plc_ident_to_bytes(ident).map(DidInner::Plc).map(Did)
104 // }
105
106 /// Constructs a [`Did`] directly from the identifier's bytes.
107 pub fn from_plc_bytes(bytes: [u8; 15]) -> Self {
108 Did(DidInner::Plc(bytes))
109 }
110
111 /// Constructs a [`Did`] from a web domain (the identifier after `did:web:`).
112 pub fn from_web_domain(domain_str: &str) -> Option<Self> {
113 // Try to parse the domain,
114 // then unpack the raw data for a more packed repr
115 // Storing the DidWebDomain directly in the enum makes it too large
116 let web = DidWebDomain::from_str(domain_str).ok()?;
117 let (ptr, len) = web.into_raw_parts();
118 Some(Did(DidInner::Web { len, ptr }))
119 }
120
121 /// Constructs a [`Did`] from an arbitrary method and identifier.
122 pub fn from_other_method_ident(method: &str, ident: &str) -> Option<Self> {
123 // Method must be lowercase alphanumeric
124 if !method.chars().all(|c: char| c.is_ascii_lowercase() || c.is_ascii_digit()) {
125 return None;
126 }
127
128 // Identifier must be alphanumeric or one of: ._:%-
129 if !ident.chars().all(|c| c.is_ascii_alphanumeric() || "._:%-".contains(c)) {
130 return None;
131 }
132
133 // Identifier cannot end with % or :
134 if ident.ends_with('%') || ident.ends_with(':') {
135 return None;
136 }
137
138 // Identifier cannot be empty
139 if ident.is_empty() {
140 return None;
141 }
142
143 // Max identifier is (currently) 2048
144 if ident.len() > 2048 {
145 return None;
146 }
147
148 // Does not check percent sign encoding!
149
150 Some(Did(DidInner::Other(Box::new(DidOther {
151 method: method.to_owned(),
152 identifier: ident.to_owned(),
153 }))))
154 }
155}
156
157#[derive(Debug, Error)]
158pub enum DidParseError {
159 #[error("missing `did:`")]
160 MissingDid,
161 #[error("missing identifier")]
162 MissingIdent,
163 #[error("invalid identifier")]
164 InvalidIdent,
165}
166
167impl FromStr for Did {
168 type Err = DidParseError;
169 fn from_str(input: &str) -> Result<Self, Self::Err> {
170 // Eagerly try to parse did:plc
171 // This optimizes the code for the happy path
172 if let Some(input_32) = input.as_bytes().as_array::<32>() {
173 let plc_opt = plc_codec::decode_plc(input_32);
174 if let Ok(did_inner) = plc_opt.try_into() {
175 return Ok(Did(did_inner));
176 }
177 }
178
179 let Some(method_ident) = input.strip_prefix("did:") else {
180 return Err(DidParseError::MissingDid);
181 };
182
183 let Some((method, ident)) = method_ident.split_once(':') else {
184 return Err(DidParseError::MissingIdent);
185 };
186
187 match method {
188 "plc" => {
189 // did:plc is parsed eagerly
190 // If we got here, the DID was not 32 bytes long
191 // We already checked for "did:" and the "plc" method,
192 // so the identifier must be wrong
193 Err(DidParseError::InvalidIdent)
194 }
195 "web" => Self::from_web_domain(ident).ok_or(DidParseError::InvalidIdent),
196 _ => Self::from_other_method_ident(method, ident).ok_or(DidParseError::InvalidIdent),
197 }
198 }
199}
200
201/// Which method does this [`Did`] represent?
202///
203/// See [`Did::method()`] to get the string representation (especially for [`DidKind::Other`]).
204#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)]
205pub enum DidKind {
206 Plc,
207 Web,
208 Other,
209}
210
211#[repr(u8)]
212enum DidInner {
213 /// 1-byte discriminant + 15 identifier bytes
214 Plc([u8; 15]),
215 /// Contains an unpacked [`DidWebDomain`]
216 Web { len: u8, ptr: *const u8 },
217 /// A single-word pointer to two heap-allocated strings
218 Other(Box<DidOther>),
219}
220
221#[derive(Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
222struct DidOther {
223 method: String,
224 identifier: String,
225}
226
227impl Drop for DidInner {
228 fn drop(&mut self) {
229 match self {
230 DidInner::Web { len, ptr } => {
231 // SAFETY: ptr and len come from an unpacked DidWebDomain
232 // did:web needs to run its special drop code
233 // This deallocates the memory at ptr!
234 _ = unsafe { DidWebDomain::from_raw_parts(*ptr, *len) };
235 }
236 DidInner::Plc(_) | DidInner::Other(_) => {
237 // plc and other don't require special drop logic
238 }
239 }
240 }
241}
242
243impl PartialEq for Did {
244 fn eq(&self, other: &Self) -> bool {
245 match (&self.0, &other.0) {
246 (DidInner::Plc(a), DidInner::Plc(b)) => a == b,
247 (
248 DidInner::Web { len: len_a, ptr: ptr_a },
249 DidInner::Web { len: len_b, ptr: ptr_b },
250 ) => {
251 // SAFETY: ptr and len come from an unpacked DidWebDomain
252 // ManuallyDrop prevents dropping the contents
253 let a = unsafe { ManuallyDrop::new(DidWebDomain::from_raw_parts(*ptr_a, *len_a)) };
254 let b = unsafe { ManuallyDrop::new(DidWebDomain::from_raw_parts(*ptr_b, *len_b)) };
255 a.eq(&b)
256 }
257 (DidInner::Other(a), DidInner::Other(b)) => a.eq(b),
258 _ => false,
259 }
260 }
261}
262
263impl Eq for Did {}
264
265impl PartialOrd for Did {
266 fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
267 Some(self.cmp(other))
268 }
269}
270
271impl Ord for Did {
272 fn cmp(&self, other: &Self) -> Ordering {
273 match (self, other) {
274 // plc & plc -> compare bytes
275 (Did(DidInner::Plc(a)), Did(DidInner::Plc(b))) => a.cmp(b),
276 // web & web -> compare domains
277 (
278 Did(DidInner::Web { len: len_a, ptr: ptr_a }),
279 Did(DidInner::Web { len: len_b, ptr: ptr_b }),
280 ) => {
281 // SAFETY: ptr and len come from an unpacked DidWebDomain
282 // ManuallyDrop prevents dropping the contents
283 let a = unsafe { ManuallyDrop::new(DidWebDomain::from_raw_parts(*ptr_a, *len_a)) };
284 let b = unsafe { ManuallyDrop::new(DidWebDomain::from_raw_parts(*ptr_b, *len_b)) };
285 a.cmp(&b)
286 }
287 // other & other -> compare methods and identifiers
288 (Did(DidInner::Other(a)), Did(DidInner::Other(b))) => a.cmp(b),
289 // different kinds -> just compare the method
290 (a, b) => {
291 debug_assert_ne!(a.kind(), b.kind());
292 a.method().cmp(b.method())
293 }
294 }
295 }
296}
297
298impl Hash for Did {
299 fn hash<H: Hasher>(&self, state: &mut H) {
300 match &self.0 {
301 DidInner::Plc(bytes) => {
302 state.write(b"plc");
303 bytes.hash(state);
304 }
305 DidInner::Web { ptr, len } => {
306 state.write(b"web");
307 let web = unsafe { ManuallyDrop::new(DidWebDomain::from_raw_parts(*ptr, *len)) };
308 web.hash(state);
309 }
310 DidInner::Other(other) => {
311 other.method.hash(state);
312 }
313 }
314 }
315}
316
317impl Clone for Did {
318 fn clone(&self) -> Self {
319 Did(match &self.0 {
320 DidInner::Plc(data) => DidInner::Plc(*data),
321 DidInner::Web { len, ptr } => {
322 // SAFETY: ptr and len come from an unpacked DidWebDomain
323 // ManuallyDrop prevents dropping the contents
324 // The implementation of DidWebDomain::clone is used
325 let web = unsafe { ManuallyDrop::new(DidWebDomain::from_raw_parts(*ptr, *len)) };
326 let web = web.clone();
327 // into_raw_parts doesn't drop the data
328 let (ptr, len) = ManuallyDrop::into_inner(web).into_raw_parts();
329 DidInner::Web { len, ptr }
330 }
331 DidInner::Other(other) => DidInner::Other(Box::clone(other)),
332 })
333 }
334}
335
336impl Debug for Did {
337 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
338 match &self.0 {
339 DidInner::Plc(data) => f.debug_tuple("DidInner::Plc").field(data).finish(),
340 DidInner::Web { len, ptr } => {
341 // SAFETY: ptr and len come from an unpacked DidWebDomain
342 // ManuallyDrop prevents dropping the contents
343 let web = unsafe { ManuallyDrop::new(DidWebDomain::from_raw_parts(*ptr, *len)) };
344 f.debug_tuple("DidInner::Web").field(&*web).finish()
345 }
346 DidInner::Other(other) => f.debug_tuple("DidInner::Other").field(other).finish(),
347 }
348 }
349}
350
351impl Display for Did {
352 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
353 match &self.0 {
354 DidInner::Plc(data) => {
355 let mut bytes = vec![0u8; 32];
356 plc_codec::encode_plc(DidInner::Plc(*data), bytes.as_mut_array::<32>().unwrap());
357 let string = String::from_utf8(bytes).expect("Encoded value should be UTF-8");
358 write!(f, "{string}")
359 }
360 DidInner::Web { len, ptr } => {
361 // SAFETY: ptr and len come from an unpacked DidWebDomain
362 // ManuallyDrop prevents dropping the contents
363 let web = unsafe { ManuallyDrop::new(DidWebDomain::from_raw_parts(*ptr, *len)) };
364 write!(f, "did:web:{}", web.as_str())
365 }
366 DidInner::Other(other) => {
367 write!(f, "did:{}:{}", other.method, other.identifier)
368 }
369 }
370 }
371}
372
373#[cfg(test)]
374mod tests {
375 use super::*;
376
377 #[test]
378 fn did_type_size() {
379 assert_eq!(size_of::<Did>(), 16);
380 }
381
382 fn did_syntax_valid_lines() -> impl Iterator<Item = &'static str> {
383 include_str!("./did_syntax_valid.txt")
384 .lines()
385 .filter(|l| !l.is_empty() && !l.starts_with('#'))
386 }
387
388 fn did_syntax_invalid_lines() -> impl Iterator<Item = &'static str> {
389 include_str!("./did_syntax_invalid.txt")
390 .lines()
391 .filter(|l| !l.is_empty() && !l.starts_with('#'))
392 }
393
394 #[test_case::test_case("did:plc:c6te24qg5hx54qgegqylpqkx" => [0x17, 0xa6, 0x4d, 0x72, 0x06, 0xe9, 0xef, 0xde, 0x40, 0xc4, 0x34, 0x30, 0xb7, 0xc1, 0x57]
395 )]
396 #[test_case::test_case("did:plc:abcdefghijklmnopqrstuvwx" => [0x00, 0x44, 0x32, 0x14, 0xc7, 0x42, 0x54, 0xb6, 0x35, 0xcf, 0x84, 0x65, 0x3a, 0x56, 0xd7]
397 )]
398 #[test_case::test_case("did:plc:abcdefghabcdefghabcdefgh" => [0x00, 0x44, 0x32, 0x14, 0xc7, 0x00, 0x44, 0x32, 0x14, 0xc7, 0x00, 0x44, 0x32, 0x14, 0xc7]
399 )]
400 #[test_case::test_case("did:plc:234567234567234567234567" => [0xd6, 0xf9, 0xdf, 0x7f, 0x5b, 0xe7, 0x7d, 0xfd, 0x6f, 0x9d, 0xf7, 0xf5, 0xbe, 0x77, 0xdf]
401 )]
402 fn plc_str_to_bytes_ok(ident: &str) -> [u8; 15] {
403 let DidInner::Plc(bytes) = Did::from_str(ident).unwrap().0 else { panic!() };
404 bytes
405 }
406
407 #[test_case::test_case(&[0x17u8, 0xa6, 0x4d, 0x72, 0x06, 0xe9, 0xef, 0xde, 0x40, 0xc4, 0x34, 0x30, 0xb7, 0xc1, 0x57] => "did:plc:c6te24qg5hx54qgegqylpqkx"
408 )]
409 #[test_case::test_case(&[0x00u8, 0x44, 0x32, 0x14, 0xc7, 0x42, 0x54, 0xb6, 0x35, 0xcf, 0x84, 0x65, 0x3a, 0x56, 0xd7] => "did:plc:abcdefghijklmnopqrstuvwx"
410 )]
411 #[test_case::test_case(&[0x00u8, 0x44, 0x32, 0x14, 0xc7, 0x00, 0x44, 0x32, 0x14, 0xc7, 0x00, 0x44, 0x32, 0x14, 0xc7] => "did:plc:abcdefghabcdefghabcdefgh"
412 )]
413 #[test_case::test_case(&[0xd6u8, 0xf9, 0xdf, 0x7f, 0x5b, 0xe7, 0x7d, 0xfd, 0x6f, 0x9d, 0xf7, 0xf5, 0xbe, 0x77, 0xdf] => "did:plc:234567234567234567234567"
414 )]
415 fn bytes_to_plc_str_ok(bytes: &[u8; 15]) -> String {
416 Did(DidInner::Plc(bytes.to_owned())).to_string()
417 }
418
419 #[test]
420 fn did_syntax_valid() {
421 for l in did_syntax_valid_lines() {
422 Did::from_str(l).unwrap_or_else(|_| panic!("Parsing {l} failed"));
423 }
424 }
425
426 #[test]
427 fn did_syntax_invalid() {
428 for l in did_syntax_invalid_lines() {
429 Did::from_str(l).expect_err(&format!("Parsing {l} should have failed"));
430 }
431 }
432
433 #[test]
434 fn did_equals() {
435 for l in did_syntax_valid_lines() {
436 let a = Did::from_str(l).unwrap_or_else(|_| panic!("Parsing {l} failed"));
437 let b = Did::from_str(l).unwrap_or_else(|_| panic!("Parsing {l} failed"));
438 assert_eq!(a, b);
439 }
440 }
441
442 #[test]
443 fn did_clone_equals() {
444 for l in did_syntax_valid_lines() {
445 let a = Did::from_str(l).unwrap_or_else(|_| panic!("Parsing {l} failed"));
446 let b = a.clone();
447 assert_eq!(a, b, "cloned item should be equal");
448 }
449 }
450
451 #[test]
452 fn did_clone_drop_safe() {
453 for l in did_syntax_valid_lines() {
454 let a = Did::from_str(l).unwrap_or_else(|_| panic!("Parsing {l} failed"));
455 let b = a.clone();
456 drop(a);
457 assert_eq!(b.to_string(), l, "cloned item roundtrip should work after drop");
458
459 let a = Did::from_str(l).unwrap_or_else(|_| panic!("Parsing {l} failed"));
460 let b = a.clone();
461 drop(b);
462 assert_eq!(a.to_string(), l, "original item roundtrip should work after drop");
463 }
464 }
465
466 #[test]
467 fn did_str_roundtrip() {
468 for l in did_syntax_valid_lines() {
469 let did = Did::from_str(l).unwrap_or_else(|_| panic!("Parsing {l} failed"));
470 assert_eq!(did.to_string(), l);
471 }
472 }
473
474 #[test]
475 #[ignore]
476 fn did_parts_roundtrip() {
477 // TODO Fix once identifier() is fixed
478 // for l in did_syntax_valid_lines() {
479 // let did = Did::from_str(l).expect(&format!("Parsing {l} failed"));
480 // assert_eq!(format!("did:{}:{}", did.method(), did.identifier()), l);
481 // }
482 }
483}