forked from
smokesignal.events/atproto-plc
Rust and WASM did-method-plc tools and structures
1//! Encoding utilities for base32, base64url, and DAG-CBOR
2
3use crate::error::{PlcError, Result};
4use base64::engine::general_purpose::URL_SAFE_NO_PAD;
5use base64::Engine;
6use cid::Cid;
7use data_encoding::BASE32_NOPAD;
8use multihash::Multihash;
9use serde::{Deserialize, Serialize};
10use sha2::{Digest, Sha256};
11
12/// Base32 alphabet used for did:plc identifiers
13/// Lowercase, excludes 0,1,8,9
14const BASE32_ALPHABET: &str = "abcdefghijklmnopqrstuvwxyz234567";
15
16/// Maximum size for an operation in bytes
17pub const MAX_OPERATION_SIZE: usize = 7500;
18
19/// Encode bytes to base32 using the lowercase alphabet
20///
21/// # Examples
22///
23/// ```
24/// use atproto_plc::encoding::base32_encode;
25///
26/// let data = b"hello world";
27/// let encoded = base32_encode(data);
28/// assert!(!encoded.is_empty());
29/// ```
30pub fn base32_encode(data: &[u8]) -> String {
31 BASE32_NOPAD.encode(data).to_lowercase()
32}
33
34/// Decode base32 string to bytes
35///
36/// # Errors
37///
38/// Returns `PlcError::InvalidBase32` if the input contains invalid characters
39pub fn base32_decode(s: &str) -> Result<Vec<u8>> {
40 // Validate that all characters are in the allowed alphabet
41 if !s.chars().all(|c| BASE32_ALPHABET.contains(c)) {
42 return Err(PlcError::InvalidBase32(format!(
43 "String contains invalid characters. Allowed: {}",
44 BASE32_ALPHABET
45 )));
46 }
47
48 BASE32_NOPAD
49 .decode(s.to_uppercase().as_bytes())
50 .map_err(|e| PlcError::InvalidBase32(e.to_string()))
51}
52
53/// Encode bytes to base64url without padding
54///
55/// # Examples
56///
57/// ```
58/// use atproto_plc::encoding::base64url_encode;
59///
60/// let data = b"hello world";
61/// let encoded = base64url_encode(data);
62/// assert!(!encoded.contains('='));
63/// ```
64pub fn base64url_encode(data: &[u8]) -> String {
65 URL_SAFE_NO_PAD.encode(data)
66}
67
68/// Decode base64url string to bytes
69///
70/// # Errors
71///
72/// Returns `PlcError::InvalidBase64Url` if the input is not valid base64url
73pub fn base64url_decode(s: &str) -> Result<Vec<u8>> {
74 URL_SAFE_NO_PAD
75 .decode(s.as_bytes())
76 .map_err(|e| PlcError::InvalidBase64Url(e.to_string()))
77}
78
79/// Encode a value to DAG-CBOR format
80///
81/// # Errors
82///
83/// Returns `PlcError::DagCborError` if serialization fails or the result exceeds MAX_OPERATION_SIZE
84pub fn dag_cbor_encode<T: Serialize>(value: &T) -> Result<Vec<u8>> {
85 let bytes = serde_ipld_dagcbor::to_vec(value)
86 .map_err(|e| PlcError::DagCborError(e.to_string()))?;
87
88 if bytes.len() > MAX_OPERATION_SIZE {
89 return Err(PlcError::OperationTooLarge(bytes.len()));
90 }
91
92 Ok(bytes)
93}
94
95/// Decode a value from DAG-CBOR format
96///
97/// # Errors
98///
99/// Returns `PlcError::DagCborDecodeError` if deserialization fails
100pub fn dag_cbor_decode<T: for<'de> Deserialize<'de>>(data: &[u8]) -> Result<T> {
101 serde_ipld_dagcbor::from_slice(data)
102 .map_err(|e| PlcError::DagCborDecodeError(e.to_string()))
103}
104
105/// Compute the CID (Content Identifier) of data using SHA-256 and dag-cbor codec
106///
107/// The CID is computed as:
108/// 1. Hash the data with SHA-256
109/// 2. Create a multihash with the hash
110/// 3. Create a CIDv1 with dag-cbor codec
111/// 4. Encode as base32
112///
113/// # Examples
114///
115/// ```
116/// use atproto_plc::encoding::compute_cid;
117///
118/// let data = b"hello world";
119/// let cid = compute_cid(data).unwrap();
120/// assert!(cid.starts_with("bafy"));
121/// ```
122pub fn compute_cid(data: &[u8]) -> Result<String> {
123 // Hash the data with SHA-256
124 let hash_bytes = sha256(data);
125
126 // Create multihash (0x12 = SHA-256, followed by length and hash)
127 let mut multihash_bytes = Vec::with_capacity(34); // 2 bytes header + 32 bytes hash
128 multihash_bytes.push(0x12); // SHA-256 code
129 multihash_bytes.push(32); // Hash length
130 multihash_bytes.extend_from_slice(&hash_bytes);
131
132 // Create multihash
133 let multihash = Multihash::from_bytes(&multihash_bytes)
134 .map_err(|e| PlcError::InvalidCid(format!("Failed to create multihash: {:?}", e)))?;
135
136 // Create CIDv1 with dag-cbor codec (0x71)
137 let cid = Cid::new_v1(0x71, multihash);
138
139 Ok(cid.to_string())
140}
141
142/// Hash data with SHA-256 and return the digest
143pub fn sha256(data: &[u8]) -> [u8; 32] {
144 let mut hasher = Sha256::new();
145 hasher.update(data);
146 hasher.finalize().into()
147}
148
149/// Validate that a string is a valid base32 encoding
150///
151/// Returns `true` if all characters are in the allowed alphabet
152pub fn is_valid_base32(s: &str) -> bool {
153 !s.is_empty() && s.chars().all(|c| BASE32_ALPHABET.contains(c))
154}
155
156#[cfg(test)]
157mod tests {
158 use super::*;
159
160 #[test]
161 fn test_base32_roundtrip() {
162 let data = b"hello world";
163 let encoded = base32_encode(data);
164 let decoded = base32_decode(&encoded).unwrap();
165 assert_eq!(data, decoded.as_slice());
166 }
167
168 #[test]
169 fn test_base32_invalid_chars() {
170 assert!(base32_decode("0189").is_err()); // Invalid chars: 0, 1, 8, 9
171 assert!(base32_decode("ABCD").is_err()); // Uppercase not allowed
172 }
173
174 #[test]
175 fn test_base64url_roundtrip() {
176 let data = b"hello world";
177 let encoded = base64url_encode(data);
178 let decoded = base64url_decode(&encoded).unwrap();
179 assert_eq!(data, decoded.as_slice());
180 assert!(!encoded.contains('='));
181 }
182
183 #[test]
184 fn test_is_valid_base32() {
185 assert!(is_valid_base32("abcdefghijklmnopqrstuvwxyz234567"));
186 assert!(!is_valid_base32("0189"));
187 assert!(!is_valid_base32("ABCD"));
188 assert!(!is_valid_base32(""));
189 }
190
191 #[test]
192 fn test_sha256() {
193 let data = b"hello world";
194 let hash = sha256(data);
195 assert_eq!(hash.len(), 32);
196 }
197
198 #[test]
199 fn test_compute_cid() {
200 let data = b"hello world";
201 let cid = compute_cid(data).unwrap();
202 assert!(cid.starts_with("b")); // CIDv1 starts with 'b' in base32
203 }
204}