A highly-optimized library for atproto DIDs.
at main 673 lines 28 kB view raw
1#![doc = include_str!("plc_codec.md")] 2 3use std::{arch::x86_64, mem}; 4 5use crate::DidInner; 6 7/// A helper type representing an `Option<DidInner::Plc>`, in a sense. 8/// 9/// Can be turned into a `DidInner::Plc` via `try_into` only if the first byte is 0. 10#[repr(transparent)] 11pub struct OptionDidPlc([u8; 16]); 12 13impl OptionDidPlc { 14 pub const INVALID: OptionDidPlc = { 15 let mut val = OptionDidPlc([0; 16]); 16 val.0[0] = 1; 17 val 18 }; 19} 20 21impl TryFrom<OptionDidPlc> for DidInner { 22 type Error = (); 23 24 fn try_from(val: OptionDidPlc) -> Result<Self, Self::Error> { 25 // Compile-time check that the first byte of `DidInner::Plc` is a discriminant 26 // with a value of 0 27 const { 28 let plc_val = DidInner::Plc([0xff; 15]); 29 // SAFETY: DidInner is 16 bytes 30 let bytes = unsafe { mem::transmute::<DidInner, [u8; 16]>(plc_val) }; 31 assert!(bytes[0] == 0, "The discriminant of `DidInner::Plc` should be 0"); 32 } 33 34 if val.0[0] == 0 { 35 // SAFETY: The discriminant of `DidInner::Plc` is 0 36 unsafe { Ok(mem::transmute::<OptionDidPlc, DidInner>(val)) } 37 } else { 38 Err(()) 39 } 40 } 41} 42 43/// Validates and decodes a `did:plc:` string, producing a `DidInner::Plc` if successful. 44/// 45/// [`OptionDidPlc::try_into()`] produces an `Ok(DidInner)` if successful. 46#[inline] 47pub fn decode_plc(plc_str: &[u8; 32]) -> OptionDidPlc { 48 if is_x86_feature_detected!("avx2") { 49 // SAFETY: avx2 is detected 50 unsafe { decode_plc_avx2(plc_str) } 51 } else { 52 decode_plc_non_avx(plc_str) 53 } 54} 55 56/// Validates and decodes a `did:plc:` string, producing a `DidInner::Plc` if successful. 57/// 58/// Uses AVX2 SIMD instructions. 59/// 60/// [`OptionDidPlc::try_into()`] produces an `Ok(DidInner)` if successful. 61#[target_feature(enable = "avx2")] 62#[inline] 63fn decode_plc_avx2(plc_str: &[u8; 32]) -> OptionDidPlc { 64 // SAFETY: plc_str is 32 bytes (256 bits) 65 let data = unsafe { x86_64::_mm256_loadu_si256(plc_str.as_ptr() as _) }; 66 67 // For "did:plc:abcdefghijklmnopqrstuvwx", the debugger shows `data` as: 68 // [0] = {i64} 4207325706165971300 [0x3a636c703a646964] 69 // [1] = {i64} 7523094288207667809 [0x6867666564636261] 70 // [2] = {i64} 8101815670912281193 [0x706f6e6d6c6b6a69] 71 // [3] = {i64} 8680537053616894577 [0x7877767574737271] 72 // The did:plc string is loaded into the SIMD register from LSB to MSB 73 74 // let did_plc_eq_mask = x86_64::_mm256_cmpeq_epi8( 75 // data, 76 // x86_64::_mm256_set_epi64x( 77 // 0, // The others can be 0 78 // 0, 79 // 0, 80 // 0x3a636c703a646964, // "did:plc:" with bytes in reverse order 81 // ), 82 // ); 83 84 // TODO: rewrite to be more readable (macros, const eval) 85 86 let alpha_mask = { 87 x86_64::_mm256_andnot_si256( 88 x86_64::_mm256_cmpgt_epi8( 89 data, 90 x86_64::_mm256_set_epi64x( 91 0x7a7a7a7a7a7a7a7a, // "z" repeated 92 0x7a7a7a7a7a7a7a7a, 93 0x7a7a7a7a7a7a7a7a, 94 0x3a636c703a646964, // "did:plc:" with bytes in reverse order 95 ), 96 ), 97 x86_64::_mm256_cmpgt_epi8( 98 data, 99 x86_64::_mm256_set_epi64x( 100 0x6060606060606060, // "a" - 1 repeated 101 0x6060606060606060, 102 0x6060606060606060, 103 0x39626b6f39636863, // "did:plc:" as above, just 1 bit lower 104 ), 105 ), 106 ) 107 }; 108 // let alpha_mask = x86_64::_mm256_cmpgt_epi8(data, x86_64::_mm256_set1_epi8((b'a' - 1) as _)); 109 110 let num_mask = { 111 x86_64::_mm256_andnot_si256( 112 x86_64::_mm256_cmpgt_epi8( 113 data, 114 x86_64::_mm256_set_epi64x( 115 0x3737373737373737, // "7" repeated 116 0x3737373737373737, 117 0x3737373737373737, 118 0x3a636c703a646964, // "did:plc:" with bytes in reverse order 119 ), 120 ), 121 x86_64::_mm256_cmpgt_epi8( 122 data, 123 x86_64::_mm256_set_epi64x( 124 0x3131313131313131, // "2" - 1 repeated 125 0x3131313131313131, 126 0x3131313131313131, 127 0x39626b6f39636863, // "did:plc:" as above, just 1 bit lower 128 ), 129 ), 130 ) 131 }; 132 133 let char_to_val = x86_64::_mm256_blendv_epi8( 134 x86_64::_mm256_set1_epi8((b'2' - 26) as i8), 135 x86_64::_mm256_set1_epi8(b'a' as i8), 136 alpha_mask, 137 ); 138 139 let values = x86_64::_mm256_sub_epi8(data, char_to_val); 140 141 let is_valid = { 142 // alpha and num are masks for a..=z and 2..=7 respectively 143 // In addition, they also both check for the "did:plc:" prefix 144 let alpha = x86_64::_mm256_movemask_epi8(alpha_mask) as u32; 145 let num = x86_64::_mm256_movemask_epi8(num_mask) as u32; 146 let base32 = alpha | num; 147 148 base32 == !0 // all ones 149 }; 150 151 // Current register layout: 152 // MSB ____________________________________________________________________________________ 153 // | 000xxxxx | 000wwwww | 000vvvvv | 000uuuuu | 000ttttt | 000sssss | 000rrrrr | 000qqqqq | 154 // | 000ppppp | 000ooooo | 000nnnnn | 000mmmmm | 000lllll | 000kkkkk | 000jjjjj | 000iiiii | 155 // | 000hhhhh | 000ggggg | 000fffff | 000eeeee | 000ddddd | 000ccccc | 000bbbbb | 000aaaaa | 156 // | 00111010 | 01100011 | 01101100 | 01110000 | 00111010 | 01100100 | 01101001 | 01100100 | 157 // ____________________________________________________________________________________ LSB 158 // If the identifier was valid base32, all bytes (a-x) have been converted to 5-bit values 159 // (If not, the bytes will contain garbage, but `is_valid` will be set to 0) 160 161 // Permute in order to bring some values to the lower half 162 let reg1 = x86_64::_mm256_permute4x64_epi64::<0b11100110>(values); // 3, 2, 1, 2 163 164 // Swizzle to allow for u32 bit shifts 165 #[rustfmt::skip] 166 let reg1 = x86_64::_mm256_shuffle_epi8( 167 reg1, 168 x86_64::_mm256_set_epi8( 169 15, 14, 7, 6, 170 13, 12, 5, 4, 171 11, 10, 3, 2, 172 9, 8, 1, 0, 173 174 15, 14, 7, 6, 175 13, 12, 5, 4, 176 11, 10, 3, 2, 177 9, 8, 1, 0, 178 ), 179 ); 180 181 // Split u16 halves 182 let reg2 = x86_64::_mm256_and_si256(reg1, x86_64::_mm256_set1_epi16(0xff00u16 as i16)); 183 let reg3 = x86_64::_mm256_and_si256(reg1, x86_64::_mm256_set1_epi16(0x00ffu16 as i16)); 184 185 // Bit-shift 186 let reg2 = x86_64::_mm256_srlv_epi32( 187 reg2, 188 x86_64::_mm256_set_epi32( 189 8, 6, 4, 2, // (x, p), (v, n), (t, l), (r, j) 190 8, 6, 4, 2, // (h, p), (f, n), (d, l), (b, j) 191 ), 192 ); 193 let reg3 = x86_64::_mm256_sllv_epi32( 194 reg3, 195 x86_64::_mm256_set_epi32( 196 5, 7, 1, 3, // (w, o), (u, m), (s, k), (q, i) 197 5, 7, 1, 3, // (g, o), (e, m), (c, k), (a, i) 198 ), 199 ); 200 201 // Shuffle 202 #[rustfmt::skip] 203 let reg2 = x86_64::_mm256_shuffle_epi8( 204 reg2, 205 x86_64::_mm256_set_epi8( 206 14, 10, 6, 2, 3, 12, 8, 4, // x, v, t, r, r, p, n, l 207 -1, -1, -1, 7, -1, -1, -1, -1, // 0, 0, 0, t, 0, 0, 0, 0 208 5, -1, -1, -1, -1, 7, -1, -1, // l, 0, 0, 0, 0, d, 0, 0 209 0, 1, 14, 10, 6, 2, 3, -1, // j, j, h, f, d, b, b, 0 210 ), 211 ); 212 213 #[rustfmt::skip] 214 let reg3 = x86_64::_mm256_shuffle_epi8( 215 reg3, 216 x86_64::_mm256_set_epi8( 217 14, 15, 11, 6, 2, 12, 13, 9, // w, w, u, s, q, o, o, m 218 -1, 10, -1, -1, -1, -1, 8, -1, // 0, u, 0, 0, 0, 0, m, 0 219 -1, -1, -1, 10, -1, -1, -1, -1, // 0, 0, 0, e, 0, 0, 0, 0 220 4, 0, 14, 15, 11, 6, 2, -1, // k, i, g, g, e, c, a, 0 221 ), 222 ); 223 224 // OR-Reduce 225 let reduce1 = x86_64::_mm256_or_si256(reg2, reg3); 226 let reduce_hi = 227 x86_64::_mm256_castsi256_si128(x86_64::_mm256_permute4x64_epi64::<0b1101>(reduce1)); 228 let reduce_lo = 229 x86_64::_mm256_castsi256_si128(x86_64::_mm256_permute4x64_epi64::<0b1000>(reduce1)); 230 231 let reduce2 = x86_64::_mm_or_si128(reduce_hi, reduce_lo); 232 233 // Current register layout: 234 // MSB ____________________________________________________________________________________ 235 // | wwwxxxxx | uvvvvvww | ttttuuuu | rrssssst | qqqqqrrr | oooppppp | mnnnnnoo | llllmmmm | 236 // | jjkkkkkl | iiiiijjj | ggghhhhh | efffffgg | ddddeeee | bbcccccd | aaaaabbb | 00000000 | 237 // ____________________________________________________________________________________ LSB 238 // This is then written in reverse order, so: 239 // MSB ____________________________________________________________________________________ 240 // | 00000000 | aaaaabbb | bbcccccd | ddddeeee | efffffgg | ggghhhhh | iiiiijjj | jjkkkkkl | 241 // | llllmmmm | mnnnnnoo | oooppppp | qqqqqrrr | rrssssst | ttttuuuu | uvvvvvww | wwwxxxxx | 242 // ____________________________________________________________________________________ LSB 243 244 let mut out = OptionDidPlc([0; 16]); 245 246 // SAFETY: `out` is 16 bytes (128 bits) 247 unsafe { x86_64::_mm_storeu_si128(out.0.as_mut_ptr() as _, reduce2) }; 248 249 out.0[0] = if is_valid { 0 } else { 1 }; 250 251 out 252} 253 254/// Validates and decodes a `did:plc:` string, producing a `DidInner::Plc` if successful. 255/// 256/// Avoids using AVX instructions, but uses BMI2 if able. 257/// 258/// [`OptionDidPlc::try_into()`] produces an `Ok(DidInner)` if successful. 259#[inline] 260fn decode_plc_non_avx(plc_str: &[u8; 32]) -> OptionDidPlc { 261 let Some(ident) = plc_str.strip_prefix(b"did:plc:") else { 262 return OptionDidPlc::INVALID; 263 }; 264 265 if !ident.iter().all(|&b| matches!(b, b'a'..=b'z' | b'2'..=b'7')) { 266 return OptionDidPlc::INVALID; 267 } 268 269 let mut out = OptionDidPlc([0u8; 16]); 270 271 #[inline] 272 fn pack_bytes(ident_bytes: &[u8]) -> u64 { 273 // Note: all ident_bytes must be valid base32 chars! ('a'..='z', '2'..='7') 274 debug_assert_eq!(ident_bytes.len(), 8); 275 276 let bytes = u64::from_le_bytes([ 277 ident_bytes[7], 278 ident_bytes[6], 279 ident_bytes[5], 280 ident_bytes[4], 281 ident_bytes[3], 282 ident_bytes[2], 283 ident_bytes[1], 284 ident_bytes[0], 285 ]); 286 287 // Here we treat the u64 as packed u8 values 288 // There are some add/sub ops, but none of them should overflow within their u8 289 // All bytes are already validated when this function is used 290 291 // For reference: 292 // b'2' = 50 = 0x32 293 // b'7' = 55 = 0x37 294 // b'a' = 97 = 0x61 295 // b'z' = 122 = 0x7a 296 297 // Chars 'a'..='z' have the 0x40 byte set, while '2'..='7' don't 298 let alpha_mask = 0x4040404040404040_u64; 299 300 // alpha_flags has a 0x01 byte for every char that is 'a'..='z' 301 let alpha_flags = (bytes & alpha_mask) >> 6; 302 303 // Bring values from alpha chars right "behind" numeric chars 304 // That is, if a char was b'z', it should end up as (b'2' - 1) 305 // This should not underflow any 8-bit part of the 64bit value 306 // In other words, b'z' (0x7a) maps to b'2' - 1 (0x31), so -73 (-0x49) 307 let values = bytes - alpha_flags * (b'z' - b'2' + 1) as u64; 308 309 // The character values now represent a contiguous range, but it's not yet zero-based 310 // b'z' (0x7a) ended up at 0x31, so b'a' (0x61) is now at 0x18 311 let values = values - 0x1818181818181818_u64; 312 313 // At this point, the base32 chars should have all been converted appropriately 314 // The value ranges 'a'..='z' and '2'..='7' have been made contiguous, 315 // and then shifted to make the range start at 0. 316 317 // Finally, the bits need to be packed. 318 // Every 8 bits actually represent only 5 bits. 319 320 // Compile-time detection 321 if is_x86_feature_detected!("bmi2") { 322 // SAFETY: bmi2 is active 323 unsafe { x86_64::_pext_u64(values, 0x1f1f1f1f1f1f1f1f) } 324 } else { 325 let [h, g, f, e, d, c, b, a] = values.to_le_bytes(); 326 327 ((a as u64) << 35) 328 | ((b as u64) << 30) 329 | ((c as u64) << 25) 330 | ((d as u64) << 20) 331 | ((e as u64) << 15) 332 | ((f as u64) << 10) 333 | ((g as u64) << 5) 334 | (h as u64) 335 } 336 } 337 338 debug_assert_eq!(ident.len(), 24); 339 340 for i in 0..3 { 341 let from = i * 8; 342 // The compiler is not convinced that a regular index access is safe 343 // This is a minor optimization, and just gets rid of a few calls to slice_index_fail 344 // SAFETY: this is in bounds 345 // `from` is 0, 8, 16 346 // The indexed ranges are 0..8, 8..16, 16..24 347 let ident_slice = unsafe { ident.get_unchecked(from..from + 8) }; 348 let bytes = pack_bytes(ident_slice).to_le_bytes(); 349 out.0[i * 5 + 1] = bytes[4]; 350 out.0[i * 5 + 2] = bytes[3]; 351 out.0[i * 5 + 3] = bytes[2]; 352 out.0[i * 5 + 4] = bytes[1]; 353 out.0[i * 5 + 5] = bytes[0]; 354 } 355 356 out 357} 358 359/// Encodes a [`DidInner::Plc`] into a `did:plc:` string. 360/// 361/// Precondition: `val` must be a [`DidInner::Plc`] 362#[allow(dead_code)] // while still WIP 363pub fn encode_plc(val: DidInner, out: &mut [u8; 32]) { 364 debug_assert!(matches!(val, DidInner::Plc(_)), "Input should be `DidInner::Plc`"); 365 366 // SAFETY: DidInner is 16 bytes, and known to be the PLC variant 367 // The latter is only debug-asserted locally, but the function is not public API 368 let bytes: [u8; 16] = unsafe { mem::transmute::<DidInner, [u8; 16]>(val) }; 369 370 if is_x86_feature_detected!("avx2") { 371 // SAFETY: avx2 is detected 372 unsafe { 373 encode_plc_avx2(bytes, out); 374 } 375 } else { 376 encode_plc_non_avx(bytes, out); 377 } 378} 379 380#[target_feature(enable = "avx2")] 381#[inline] 382fn encode_plc_avx2(bytes_with_discr: [u8; 16], out: &mut [u8; 32]) { 383 // SAFETY: bytes_with_discr is 16 bytes (128 bits) 384 let data = unsafe { x86_64::_mm_loadu_si128(bytes_with_discr.as_ptr() as _) }; 385 let data_x2 = x86_64::_mm256_broadcastsi128_si256(data); 386 387 // Data is loaded in little-endian format (so it gets reversed) 388 // __ 0x00 _______________________________________________________________________________ 389 // | 00000000 | aaaaabbb | bbcccccd | ddddeeee | efffffgg | ggghhhhh | iiiiijjj | jjkkkkkl | 390 // | llllmmmm | mnnnnnoo | oooppppp | qqqqqrrr | rrssssst | ttttuuuu | uvvvvvww | wwwxxxxx | 391 // _______________________________________________________________________________ 0x0f __ 392 // This is loaded into the register as: 393 // MSB ____________________________________________________________________________________ 394 // | wwwxxxxx | uvvvvvww | ttttuuuu | rrssssst | qqqqqrrr | oooppppp | mnnnnnoo | llllmmmm | 395 // | jjkkkkkl | iiiiijjj | ggghhhhh | efffffgg | ddddeeee | bbcccccd | aaaaabbb | 00000000 | 396 // ____________________________________________________________________________________ LSB 397 // This is subsequently duplicated into the other 128-bit lane 398 399 // Register layout goal: 400 // MSB ____________________________________________________________________________________ 401 // | ...xxxxx | ...wwwww | ...vvvvv | ...uuuuu | ...ttttt | ...sssss | ...rrrrr | ...qqqqq | 402 // | ...ppppp | ...ooooo | ...nnnnn | ...mmmmm | ...lllll | ...kkkkk | ...jjjjj | ...iiiii | 403 // | ...hhhhh | ...ggggg | ...fffff | ...eeeee | ...ddddd | ...ccccc | ...bbbbb | ...aaaaa | 404 // | 00111010 | 01100011 | 01101100 | 01110000 | 00111010 | 01100100 | 01101001 | 01100100 | 405 // ____________________________________________________________________________________ LSB 406 407 // The result will be assembled from two 256-bit registers, which will hold alternating columns: 408 // MSB ____________________________________________________________________________________ 409 // | ...xxxxx | ........ | ...vvvvv | ........ | ...ttttt | ........ | ...rrrrr | ........ | 410 // | ...ppppp | ........ | ...nnnnn | ........ | ...lllll | ........ | ...jjjjj | ........ | 411 // | ...hhhhh | ........ | ...fffff | ........ | ...ddddd | ........ | ...bbbbb | ........ | 412 // | ........ | ........ | ........ | ........ | ........ | ........ | ........ | ........ | 413 // ____________________________________________________________________________________ LSB 414 // MSB ____________________________________________________________________________________ 415 // | ........ | ...wwwww | ........ | ...uuuuu | ........ | ...sssss | ........ | ...qqqqq | 416 // | ........ | ...ooooo | ........ | ...mmmmm | ........ | ...kkkkk | ........ | ...iiiii | 417 // | ........ | ...ggggg | ........ | ...eeeee | ........ | ...ccccc | ........ | ...aaaaa | 418 // | ........ | ........ | ........ | ........ | ........ | ........ | ........ | ........ | 419 // ____________________________________________________________________________________ LSB 420 421 // However, because AVX2 only supports shifts of packed 32-bit integers (and not 16), 422 // Values have to first be grouped up appropriately. 423 424 // Steps: 425 // MSB ____________________________________________________________________________________ 426 // | ........ | wwwxxxxx | ........ | oooppppp | rrssssst | ttttuuuu | jjkkkkkl | llllmmmm | 427 // | ........ | uvvvvvww | ........ | mnnnnnoo | qqqqqrrr | rrssssst | iiiiijjj | jjkkkkkl | 428 // | ........ | ggghhhhh | ........ | ........ | bbcccccd | ddddeeee | ........ | ........ | 429 // | ........ | efffffgg | ........ | ........ | aaaaabbb | bbcccccd | ........ | ........ | 430 // ____________________________________________________________________________________ LSB 431 // MSB ____________________________________________________________________________________ 432 // | ........ | ...xxxxx | ........ | ...ppppp | .......t | tttt.... | .......l | llll.... | 433 // | ........ | .vvvvv.. | ........ | .nnnnn.. | .....rrr | rr...... | .....jjj | jj...... | 434 // | ........ | ...hhhhh | ........ | ........ | .......d | dddd.... | ........ | ........ | 435 // | ........ | .fffff.. | ........ | ........ | .....bbb | bb...... | ........ | ........ | 436 // ____________________________________________________________________________________ LSB 437 // MSB ____________________________________________________________________________________ 438 // | ...xxxxx | ........ | ...ppppp | ........ | ...ttttt | ........ | ...lllll | ........ | 439 // | ...vvvvv | ........ | ...nnnnn | ........ | ...rrrrr | ........ | ...jjjjj | ........ | 440 // | ...hhhhh | ........ | ........ | ........ | ...ddddd | ........ | ........ | ........ | 441 // | ...fffff | ........ | ........ | ........ | ...bbbbb | ........ | ........ | ........ | 442 // ____________________________________________________________________________________ LSB 443 // MSB ____________________________________________________________________________________ 444 // | ...xxxxx | ........ | ...vvvvv | ........ | ...ttttt | ........ | ...rrrrr | ........ | 445 // | ...ppppp | ........ | ...nnnnn | ........ | ...lllll | ........ | ...jjjjj | ........ | 446 // | ...hhhhh | ........ | ...fffff | ........ | ...ddddd | ........ | ...bbbbb | ........ | 447 // | ........ | ........ | ........ | ........ | ........ | ........ | ........ | ........ | 448 // ____________________________________________________________________________________ LSB 449 #[rustfmt::skip] 450 let half1 = x86_64::_mm256_shuffle_epi8( 451 data_x2, 452 x86_64::_mm256_set_epi8( 453 -1, 15, -1, 10, 12, 13, 7, 8, 454 -1, 14, -1, 9, 11, 12, 6, 7, 455 456 -1, 5, -1, -1, 2, 3, -1, -1, 457 -1, 4, -1, -1, 1, 2, -1, -1, 458 ) 459 ); 460 461 #[rustfmt::skip] 462 let half1 = x86_64::_mm256_sllv_epi32(half1, x86_64::_mm256_set_epi32( 463 8, 4, 464 6, 2, 465 466 8, 4, 467 6, 2, 468 )); 469 470 #[rustfmt::skip] 471 let half1 = x86_64::_mm256_shuffle_epi8( 472 half1, 473 x86_64::_mm256_set_epi8( 474 15, -1, 7, -1, 11, -1, 3, -1, 475 13, -1, 5, -1, 9, -1, 1, -1, 476 477 15, -1, 7, -1, 11, -1, 3, -1, 478 -1, -1, -1, -1, -1, -1, -1, -1, 479 ) 480 ); 481 482 // MSB ____________________________________________________________________________________ 483 // | ......ww | www..... | ......oo | ooo..... | ..sssss. | ........ | ..kkkkk. | ........ | 484 // | ....uuuu | u....... | ....mmmm | m....... | qqqqq... | ........ | iiiii... | ........ | 485 // | ......gg | ggg..... | ........ | ........ | ..ccccc. | ........ | ........ | ........ | 486 // | ....eeee | e....... | ........ | ........ | aaaaa... | ........ | ........ | ........ | 487 // ____________________________________________________________________________________ LSB 488 // MSB ____________________________________________________________________________________ 489 // | ........ | ...wwwww | ........ | ...ooooo | ........ | ...sssss | ........ | ...kkkkk | 490 // | ........ | ...uuuuu | ........ | ...mmmmm | ........ | ...qqqqq | ........ | ...iiiii | 491 // | ........ | ...ggggg | ........ | ........ | ........ | ...ccccc | ........ | ........ | 492 // | ........ | ...eeeee | ........ | ........ | ........ | ...aaaaa | ........ | ........ | 493 // ____________________________________________________________________________________ LSB 494 // MSB ____________________________________________________________________________________ 495 // | ........ | ...wwwww | ........ | ...uuuuu | ........ | ...sssss | ........ | ...qqqqq | 496 // | ........ | ...ooooo | ........ | ...mmmmm | ........ | ...kkkkk | ........ | ...iiiii | 497 // | ........ | ...ggggg | ........ | ...eeeee | ........ | ...ccccc | ........ | ...aaaaa | 498 // | ........ | ........ | ........ | ........ | ........ | ........ | ........ | ........ | 499 // ____________________________________________________________________________________ LSB 500 #[rustfmt::skip] 501 let half2 = x86_64::_mm256_shuffle_epi8( 502 data_x2, 503 x86_64::_mm256_set_epi8( 504 14, 15, 9, 10, 12, -1, 7, -1, 505 13, 14, 8, 9, 11, -1, 6, -1, 506 507 4, 5, -1, -1, 2, -1, -1, -1, 508 3, 4, -1, -1, 1, -1, -1, -1, 509 ), 510 ); 511 #[rustfmt::skip] 512 let half2 = x86_64::_mm256_srlv_epi32(half2, x86_64::_mm256_set_epi32( 513 5, 9, 514 7, 11, 515 516 5, 9, 517 7, 11, 518 )); 519 520 #[rustfmt::skip] 521 let half2 = x86_64::_mm256_shuffle_epi8( 522 half2, 523 x86_64::_mm256_set_epi8( 524 -1, 14, -1, 6, -1, 10, -1, 2, 525 -1, 12, -1, 4, -1, 8, -1, 0, 526 527 -1, 14, -1, 6, -1, 10, -1, 2, 528 -1, -1, -1, -1, -1, -1, -1, -1, 529 ), 530 ); 531 532 let combined = x86_64::_mm256_or_si256(half1, half2); 533 let combined = x86_64::_mm256_and_si256(combined, x86_64::_mm256_set1_epi16(0x1f1f)); 534 535 let alpha_or_num_mask = 536 x86_64::_mm256_cmpgt_epi8(combined, x86_64::_mm256_set1_epi8((b'z' - b'a') as i8)); 537 538 let add_vec = x86_64::_mm256_blendv_epi8( 539 x86_64::_mm256_set1_epi8(b'a' as i8), 540 x86_64::_mm256_set1_epi8((b'2' - (b'z' - b'a') - 1) as i8), 541 alpha_or_num_mask, 542 ); 543 544 let chars = x86_64::_mm256_add_epi8(combined, add_vec); 545 546 // SAFETY: `out` is 32 bytes (256 bits) 547 unsafe { 548 x86_64::_mm256_storeu_si256(out.as_mut_ptr() as _, chars); 549 } 550 551 // TODO: is writing the prefix from the vector register faster? 552 out[..8].copy_from_slice(b"did:plc:"); 553 // dbg!(chars); 554 // dbg!(out); 555} 556 557#[inline] 558fn encode_plc_non_avx(bytes_with_discr: [u8; 16], out: &mut [u8; 32]) { 559 // Note: bytes_with_discr includes the zero-byte at the start! 560 let bytes = &bytes_with_discr[1..]; 561 562 fn byte_to_base32(val: u8) -> u8 { 563 match val { 564 0..26 => val + b'a', 565 26..32 => val - 26 + b'2', 566 _ => unreachable!(), 567 } 568 } 569 570 out[..8].copy_from_slice(b"did:plc:"); 571 572 for i in 0..3 { 573 let bytes_pos = i * 5; 574 let packed = usize::from_le_bytes([ 575 bytes[bytes_pos + 4], 576 bytes[bytes_pos + 3], 577 bytes[bytes_pos + 2], 578 bytes[bytes_pos + 1], 579 bytes[bytes_pos], 580 0, 581 0, 582 0, 583 ]); 584 585 let a = byte_to_base32((packed >> 35) as u8 & 0x1f); 586 let b = byte_to_base32((packed >> 30) as u8 & 0x1f); 587 let c = byte_to_base32((packed >> 25) as u8 & 0x1f); 588 let d = byte_to_base32((packed >> 20) as u8 & 0x1f); 589 let e = byte_to_base32((packed >> 15) as u8 & 0x1f); 590 let f = byte_to_base32((packed >> 10) as u8 & 0x1f); 591 let g = byte_to_base32((packed >> 5) as u8 & 0x1f); 592 let h = byte_to_base32(packed as u8 & 0x1f); 593 594 let start = 8 + i * 8; 595 let end = start + 8; 596 out[start..end].copy_from_slice(&[a, b, c, d, e, f, g, h]); 597 } 598} 599 600#[cfg(test)] 601mod tests { 602 use super::*; 603 604 #[test] 605 #[cfg(target_feature = "avx2")] 606 fn individual_bytes_decode_ok_avx2() { 607 if !is_x86_feature_detected!("avx2") { 608 panic!("AVX2 feature not detected"); 609 } 610 test_individual_bytes_decode(|x| unsafe { decode_plc_avx2(x) }); 611 } 612 613 #[test] 614 fn individual_bytes_decode_ok_non_avx() { 615 // TODO: test both the BMI and non-BMI impls 616 test_individual_bytes_decode(decode_plc_non_avx); 617 } 618 619 /// Tests parsing for every base32 character at every individual position. 620 /// 621 /// All errors are reported together at the end. 622 fn test_individual_bytes_decode<F: Fn(&[u8; 32]) -> OptionDidPlc>(decoder: F) { 623 let mut did = "did:plc:aaaaaaaaaaaaaaaaaaaaaaaa".to_string(); 624 625 let mut bad_results = vec![]; 626 627 for i in 8..32 { 628 let base32_alphabet = b"abcdefghijklmnopqrstuvwxyz234567"; 629 // Test every char except 'a' 630 for c in &base32_alphabet[1..] { 631 unsafe { did.as_bytes_mut()[i] = *c }; 632 633 let result: DidInner = decoder(did.as_bytes().as_array().unwrap()) 634 .try_into() 635 .unwrap_or_else(|_| panic!("Decoder failed on {did}")); 636 637 let mut expected_bytes = 638 base32::decode(base32::Alphabet::Rfc4648Lower { padding: false }, &did[8..]) 639 .unwrap(); 640 641 // Prefix 0 642 expected_bytes.insert(0, 0); 643 644 let result_bytes = unsafe { mem::transmute::<DidInner, [u8; 16]>(result) }; 645 646 if result_bytes != expected_bytes[..] { 647 bad_results.push((did.to_owned(), result_bytes, expected_bytes)); 648 } 649 } 650 651 unsafe { did.as_bytes_mut()[i] = b'a' }; // reset again 652 } 653 654 if !bad_results.is_empty() { 655 let mut out = format!("{} error(s):\n", bad_results.len()); 656 out.push_str(" "); 657 658 // Byte indices 659 let byte_indices = 660 (00..16).map(|i| format!("{i:02x}")).collect::<Vec<_>>().as_slice().join(", "); 661 let ref_did = "did:plc:abcdefghijklmnopqrstuvwx"; 662 out.push('\n'); 663 for (did, result, expected) in bad_results { 664 out.push_str(&format!("Ref DID: {ref_did}\n")); 665 out.push_str(&format!("DID: {did}\n")); 666 out.push_str(&format!("Indices: {byte_indices}\n")); 667 out.push_str(&format!("Result: {result:02x?}\n")); 668 out.push_str(&format!("Expected: {expected:02x?}\n\n")); 669 } 670 panic!("{out}"); 671 } 672 } 673}