A highly-optimized library for atproto DIDs.
1#![doc = include_str!("plc_codec.md")]
2
3use std::{arch::x86_64, mem};
4
5use crate::DidInner;
6
7/// A helper type representing an `Option<DidInner::Plc>`, in a sense.
8///
9/// Can be turned into a `DidInner::Plc` via `try_into` only if the first byte is 0.
10#[repr(transparent)]
11pub struct OptionDidPlc([u8; 16]);
12
13impl OptionDidPlc {
14 pub const INVALID: OptionDidPlc = {
15 let mut val = OptionDidPlc([0; 16]);
16 val.0[0] = 1;
17 val
18 };
19}
20
21impl TryFrom<OptionDidPlc> for DidInner {
22 type Error = ();
23
24 fn try_from(val: OptionDidPlc) -> Result<Self, Self::Error> {
25 // Compile-time check that the first byte of `DidInner::Plc` is a discriminant
26 // with a value of 0
27 const {
28 let plc_val = DidInner::Plc([0xff; 15]);
29 // SAFETY: DidInner is 16 bytes
30 let bytes = unsafe { mem::transmute::<DidInner, [u8; 16]>(plc_val) };
31 assert!(bytes[0] == 0, "The discriminant of `DidInner::Plc` should be 0");
32 }
33
34 if val.0[0] == 0 {
35 // SAFETY: The discriminant of `DidInner::Plc` is 0
36 unsafe { Ok(mem::transmute::<OptionDidPlc, DidInner>(val)) }
37 } else {
38 Err(())
39 }
40 }
41}
42
43/// Validates and decodes a `did:plc:` string, producing a `DidInner::Plc` if successful.
44///
45/// [`OptionDidPlc::try_into()`] produces an `Ok(DidInner)` if successful.
46#[inline]
47pub fn decode_plc(plc_str: &[u8; 32]) -> OptionDidPlc {
48 if is_x86_feature_detected!("avx2") {
49 // SAFETY: avx2 is detected
50 unsafe { decode_plc_avx2(plc_str) }
51 } else {
52 decode_plc_non_avx(plc_str)
53 }
54}
55
56/// Validates and decodes a `did:plc:` string, producing a `DidInner::Plc` if successful.
57///
58/// Uses AVX2 SIMD instructions.
59///
60/// [`OptionDidPlc::try_into()`] produces an `Ok(DidInner)` if successful.
61#[target_feature(enable = "avx2")]
62#[inline]
63fn decode_plc_avx2(plc_str: &[u8; 32]) -> OptionDidPlc {
64 // SAFETY: plc_str is 32 bytes (256 bits)
65 let data = unsafe { x86_64::_mm256_loadu_si256(plc_str.as_ptr() as _) };
66
67 // For "did:plc:abcdefghijklmnopqrstuvwx", the debugger shows `data` as:
68 // [0] = {i64} 4207325706165971300 [0x3a636c703a646964]
69 // [1] = {i64} 7523094288207667809 [0x6867666564636261]
70 // [2] = {i64} 8101815670912281193 [0x706f6e6d6c6b6a69]
71 // [3] = {i64} 8680537053616894577 [0x7877767574737271]
72 // The did:plc string is loaded into the SIMD register from LSB to MSB
73
74 // let did_plc_eq_mask = x86_64::_mm256_cmpeq_epi8(
75 // data,
76 // x86_64::_mm256_set_epi64x(
77 // 0, // The others can be 0
78 // 0,
79 // 0,
80 // 0x3a636c703a646964, // "did:plc:" with bytes in reverse order
81 // ),
82 // );
83
84 // TODO: rewrite to be more readable (macros, const eval)
85
86 let alpha_mask = {
87 x86_64::_mm256_andnot_si256(
88 x86_64::_mm256_cmpgt_epi8(
89 data,
90 x86_64::_mm256_set_epi64x(
91 0x7a7a7a7a7a7a7a7a, // "z" repeated
92 0x7a7a7a7a7a7a7a7a,
93 0x7a7a7a7a7a7a7a7a,
94 0x3a636c703a646964, // "did:plc:" with bytes in reverse order
95 ),
96 ),
97 x86_64::_mm256_cmpgt_epi8(
98 data,
99 x86_64::_mm256_set_epi64x(
100 0x6060606060606060, // "a" - 1 repeated
101 0x6060606060606060,
102 0x6060606060606060,
103 0x39626b6f39636863, // "did:plc:" as above, just 1 bit lower
104 ),
105 ),
106 )
107 };
108 // let alpha_mask = x86_64::_mm256_cmpgt_epi8(data, x86_64::_mm256_set1_epi8((b'a' - 1) as _));
109
110 let num_mask = {
111 x86_64::_mm256_andnot_si256(
112 x86_64::_mm256_cmpgt_epi8(
113 data,
114 x86_64::_mm256_set_epi64x(
115 0x3737373737373737, // "7" repeated
116 0x3737373737373737,
117 0x3737373737373737,
118 0x3a636c703a646964, // "did:plc:" with bytes in reverse order
119 ),
120 ),
121 x86_64::_mm256_cmpgt_epi8(
122 data,
123 x86_64::_mm256_set_epi64x(
124 0x3131313131313131, // "2" - 1 repeated
125 0x3131313131313131,
126 0x3131313131313131,
127 0x39626b6f39636863, // "did:plc:" as above, just 1 bit lower
128 ),
129 ),
130 )
131 };
132
133 let char_to_val = x86_64::_mm256_blendv_epi8(
134 x86_64::_mm256_set1_epi8((b'2' - 26) as i8),
135 x86_64::_mm256_set1_epi8(b'a' as i8),
136 alpha_mask,
137 );
138
139 let values = x86_64::_mm256_sub_epi8(data, char_to_val);
140
141 let is_valid = {
142 // alpha and num are masks for a..=z and 2..=7 respectively
143 // In addition, they also both check for the "did:plc:" prefix
144 let alpha = x86_64::_mm256_movemask_epi8(alpha_mask) as u32;
145 let num = x86_64::_mm256_movemask_epi8(num_mask) as u32;
146 let base32 = alpha | num;
147
148 base32 == !0 // all ones
149 };
150
151 // Current register layout:
152 // MSB ____________________________________________________________________________________
153 // | 000xxxxx | 000wwwww | 000vvvvv | 000uuuuu | 000ttttt | 000sssss | 000rrrrr | 000qqqqq |
154 // | 000ppppp | 000ooooo | 000nnnnn | 000mmmmm | 000lllll | 000kkkkk | 000jjjjj | 000iiiii |
155 // | 000hhhhh | 000ggggg | 000fffff | 000eeeee | 000ddddd | 000ccccc | 000bbbbb | 000aaaaa |
156 // | 00111010 | 01100011 | 01101100 | 01110000 | 00111010 | 01100100 | 01101001 | 01100100 |
157 // ____________________________________________________________________________________ LSB
158 // If the identifier was valid base32, all bytes (a-x) have been converted to 5-bit values
159 // (If not, the bytes will contain garbage, but `is_valid` will be set to 0)
160
161 // Permute in order to bring some values to the lower half
162 let reg1 = x86_64::_mm256_permute4x64_epi64::<0b11100110>(values); // 3, 2, 1, 2
163
164 // Swizzle to allow for u32 bit shifts
165 #[rustfmt::skip]
166 let reg1 = x86_64::_mm256_shuffle_epi8(
167 reg1,
168 x86_64::_mm256_set_epi8(
169 15, 14, 7, 6,
170 13, 12, 5, 4,
171 11, 10, 3, 2,
172 9, 8, 1, 0,
173
174 15, 14, 7, 6,
175 13, 12, 5, 4,
176 11, 10, 3, 2,
177 9, 8, 1, 0,
178 ),
179 );
180
181 // Split u16 halves
182 let reg2 = x86_64::_mm256_and_si256(reg1, x86_64::_mm256_set1_epi16(0xff00u16 as i16));
183 let reg3 = x86_64::_mm256_and_si256(reg1, x86_64::_mm256_set1_epi16(0x00ffu16 as i16));
184
185 // Bit-shift
186 let reg2 = x86_64::_mm256_srlv_epi32(
187 reg2,
188 x86_64::_mm256_set_epi32(
189 8, 6, 4, 2, // (x, p), (v, n), (t, l), (r, j)
190 8, 6, 4, 2, // (h, p), (f, n), (d, l), (b, j)
191 ),
192 );
193 let reg3 = x86_64::_mm256_sllv_epi32(
194 reg3,
195 x86_64::_mm256_set_epi32(
196 5, 7, 1, 3, // (w, o), (u, m), (s, k), (q, i)
197 5, 7, 1, 3, // (g, o), (e, m), (c, k), (a, i)
198 ),
199 );
200
201 // Shuffle
202 #[rustfmt::skip]
203 let reg2 = x86_64::_mm256_shuffle_epi8(
204 reg2,
205 x86_64::_mm256_set_epi8(
206 14, 10, 6, 2, 3, 12, 8, 4, // x, v, t, r, r, p, n, l
207 -1, -1, -1, 7, -1, -1, -1, -1, // 0, 0, 0, t, 0, 0, 0, 0
208 5, -1, -1, -1, -1, 7, -1, -1, // l, 0, 0, 0, 0, d, 0, 0
209 0, 1, 14, 10, 6, 2, 3, -1, // j, j, h, f, d, b, b, 0
210 ),
211 );
212
213 #[rustfmt::skip]
214 let reg3 = x86_64::_mm256_shuffle_epi8(
215 reg3,
216 x86_64::_mm256_set_epi8(
217 14, 15, 11, 6, 2, 12, 13, 9, // w, w, u, s, q, o, o, m
218 -1, 10, -1, -1, -1, -1, 8, -1, // 0, u, 0, 0, 0, 0, m, 0
219 -1, -1, -1, 10, -1, -1, -1, -1, // 0, 0, 0, e, 0, 0, 0, 0
220 4, 0, 14, 15, 11, 6, 2, -1, // k, i, g, g, e, c, a, 0
221 ),
222 );
223
224 // OR-Reduce
225 let reduce1 = x86_64::_mm256_or_si256(reg2, reg3);
226 let reduce_hi =
227 x86_64::_mm256_castsi256_si128(x86_64::_mm256_permute4x64_epi64::<0b1101>(reduce1));
228 let reduce_lo =
229 x86_64::_mm256_castsi256_si128(x86_64::_mm256_permute4x64_epi64::<0b1000>(reduce1));
230
231 let reduce2 = x86_64::_mm_or_si128(reduce_hi, reduce_lo);
232
233 // Current register layout:
234 // MSB ____________________________________________________________________________________
235 // | wwwxxxxx | uvvvvvww | ttttuuuu | rrssssst | qqqqqrrr | oooppppp | mnnnnnoo | llllmmmm |
236 // | jjkkkkkl | iiiiijjj | ggghhhhh | efffffgg | ddddeeee | bbcccccd | aaaaabbb | 00000000 |
237 // ____________________________________________________________________________________ LSB
238 // This is then written in reverse order, so:
239 // MSB ____________________________________________________________________________________
240 // | 00000000 | aaaaabbb | bbcccccd | ddddeeee | efffffgg | ggghhhhh | iiiiijjj | jjkkkkkl |
241 // | llllmmmm | mnnnnnoo | oooppppp | qqqqqrrr | rrssssst | ttttuuuu | uvvvvvww | wwwxxxxx |
242 // ____________________________________________________________________________________ LSB
243
244 let mut out = OptionDidPlc([0; 16]);
245
246 // SAFETY: `out` is 16 bytes (128 bits)
247 unsafe { x86_64::_mm_storeu_si128(out.0.as_mut_ptr() as _, reduce2) };
248
249 out.0[0] = if is_valid { 0 } else { 1 };
250
251 out
252}
253
254/// Validates and decodes a `did:plc:` string, producing a `DidInner::Plc` if successful.
255///
256/// Avoids using AVX instructions, but uses BMI2 if able.
257///
258/// [`OptionDidPlc::try_into()`] produces an `Ok(DidInner)` if successful.
259#[inline]
260fn decode_plc_non_avx(plc_str: &[u8; 32]) -> OptionDidPlc {
261 let Some(ident) = plc_str.strip_prefix(b"did:plc:") else {
262 return OptionDidPlc::INVALID;
263 };
264
265 if !ident.iter().all(|&b| matches!(b, b'a'..=b'z' | b'2'..=b'7')) {
266 return OptionDidPlc::INVALID;
267 }
268
269 let mut out = OptionDidPlc([0u8; 16]);
270
271 #[inline]
272 fn pack_bytes(ident_bytes: &[u8]) -> u64 {
273 // Note: all ident_bytes must be valid base32 chars! ('a'..='z', '2'..='7')
274 debug_assert_eq!(ident_bytes.len(), 8);
275
276 let bytes = u64::from_le_bytes([
277 ident_bytes[7],
278 ident_bytes[6],
279 ident_bytes[5],
280 ident_bytes[4],
281 ident_bytes[3],
282 ident_bytes[2],
283 ident_bytes[1],
284 ident_bytes[0],
285 ]);
286
287 // Here we treat the u64 as packed u8 values
288 // There are some add/sub ops, but none of them should overflow within their u8
289 // All bytes are already validated when this function is used
290
291 // For reference:
292 // b'2' = 50 = 0x32
293 // b'7' = 55 = 0x37
294 // b'a' = 97 = 0x61
295 // b'z' = 122 = 0x7a
296
297 // Chars 'a'..='z' have the 0x40 byte set, while '2'..='7' don't
298 let alpha_mask = 0x4040404040404040_u64;
299
300 // alpha_flags has a 0x01 byte for every char that is 'a'..='z'
301 let alpha_flags = (bytes & alpha_mask) >> 6;
302
303 // Bring values from alpha chars right "behind" numeric chars
304 // That is, if a char was b'z', it should end up as (b'2' - 1)
305 // This should not underflow any 8-bit part of the 64bit value
306 // In other words, b'z' (0x7a) maps to b'2' - 1 (0x31), so -73 (-0x49)
307 let values = bytes - alpha_flags * (b'z' - b'2' + 1) as u64;
308
309 // The character values now represent a contiguous range, but it's not yet zero-based
310 // b'z' (0x7a) ended up at 0x31, so b'a' (0x61) is now at 0x18
311 let values = values - 0x1818181818181818_u64;
312
313 // At this point, the base32 chars should have all been converted appropriately
314 // The value ranges 'a'..='z' and '2'..='7' have been made contiguous,
315 // and then shifted to make the range start at 0.
316
317 // Finally, the bits need to be packed.
318 // Every 8 bits actually represent only 5 bits.
319
320 // Compile-time detection
321 if is_x86_feature_detected!("bmi2") {
322 // SAFETY: bmi2 is active
323 unsafe { x86_64::_pext_u64(values, 0x1f1f1f1f1f1f1f1f) }
324 } else {
325 let [h, g, f, e, d, c, b, a] = values.to_le_bytes();
326
327 ((a as u64) << 35)
328 | ((b as u64) << 30)
329 | ((c as u64) << 25)
330 | ((d as u64) << 20)
331 | ((e as u64) << 15)
332 | ((f as u64) << 10)
333 | ((g as u64) << 5)
334 | (h as u64)
335 }
336 }
337
338 debug_assert_eq!(ident.len(), 24);
339
340 for i in 0..3 {
341 let from = i * 8;
342 // The compiler is not convinced that a regular index access is safe
343 // This is a minor optimization, and just gets rid of a few calls to slice_index_fail
344 // SAFETY: this is in bounds
345 // `from` is 0, 8, 16
346 // The indexed ranges are 0..8, 8..16, 16..24
347 let ident_slice = unsafe { ident.get_unchecked(from..from + 8) };
348 let bytes = pack_bytes(ident_slice).to_le_bytes();
349 out.0[i * 5 + 1] = bytes[4];
350 out.0[i * 5 + 2] = bytes[3];
351 out.0[i * 5 + 3] = bytes[2];
352 out.0[i * 5 + 4] = bytes[1];
353 out.0[i * 5 + 5] = bytes[0];
354 }
355
356 out
357}
358
359/// Encodes a [`DidInner::Plc`] into a `did:plc:` string.
360///
361/// Precondition: `val` must be a [`DidInner::Plc`]
362#[allow(dead_code)] // while still WIP
363pub fn encode_plc(val: DidInner, out: &mut [u8; 32]) {
364 debug_assert!(matches!(val, DidInner::Plc(_)), "Input should be `DidInner::Plc`");
365
366 // SAFETY: DidInner is 16 bytes, and known to be the PLC variant
367 // The latter is only debug-asserted locally, but the function is not public API
368 let bytes: [u8; 16] = unsafe { mem::transmute::<DidInner, [u8; 16]>(val) };
369
370 if is_x86_feature_detected!("avx2") {
371 // SAFETY: avx2 is detected
372 unsafe {
373 encode_plc_avx2(bytes, out);
374 }
375 } else {
376 encode_plc_non_avx(bytes, out);
377 }
378}
379
380#[target_feature(enable = "avx2")]
381#[inline]
382fn encode_plc_avx2(bytes_with_discr: [u8; 16], out: &mut [u8; 32]) {
383 // SAFETY: bytes_with_discr is 16 bytes (128 bits)
384 let data = unsafe { x86_64::_mm_loadu_si128(bytes_with_discr.as_ptr() as _) };
385 let data_x2 = x86_64::_mm256_broadcastsi128_si256(data);
386
387 // Data is loaded in little-endian format (so it gets reversed)
388 // __ 0x00 _______________________________________________________________________________
389 // | 00000000 | aaaaabbb | bbcccccd | ddddeeee | efffffgg | ggghhhhh | iiiiijjj | jjkkkkkl |
390 // | llllmmmm | mnnnnnoo | oooppppp | qqqqqrrr | rrssssst | ttttuuuu | uvvvvvww | wwwxxxxx |
391 // _______________________________________________________________________________ 0x0f __
392 // This is loaded into the register as:
393 // MSB ____________________________________________________________________________________
394 // | wwwxxxxx | uvvvvvww | ttttuuuu | rrssssst | qqqqqrrr | oooppppp | mnnnnnoo | llllmmmm |
395 // | jjkkkkkl | iiiiijjj | ggghhhhh | efffffgg | ddddeeee | bbcccccd | aaaaabbb | 00000000 |
396 // ____________________________________________________________________________________ LSB
397 // This is subsequently duplicated into the other 128-bit lane
398
399 // Register layout goal:
400 // MSB ____________________________________________________________________________________
401 // | ...xxxxx | ...wwwww | ...vvvvv | ...uuuuu | ...ttttt | ...sssss | ...rrrrr | ...qqqqq |
402 // | ...ppppp | ...ooooo | ...nnnnn | ...mmmmm | ...lllll | ...kkkkk | ...jjjjj | ...iiiii |
403 // | ...hhhhh | ...ggggg | ...fffff | ...eeeee | ...ddddd | ...ccccc | ...bbbbb | ...aaaaa |
404 // | 00111010 | 01100011 | 01101100 | 01110000 | 00111010 | 01100100 | 01101001 | 01100100 |
405 // ____________________________________________________________________________________ LSB
406
407 // The result will be assembled from two 256-bit registers, which will hold alternating columns:
408 // MSB ____________________________________________________________________________________
409 // | ...xxxxx | ........ | ...vvvvv | ........ | ...ttttt | ........ | ...rrrrr | ........ |
410 // | ...ppppp | ........ | ...nnnnn | ........ | ...lllll | ........ | ...jjjjj | ........ |
411 // | ...hhhhh | ........ | ...fffff | ........ | ...ddddd | ........ | ...bbbbb | ........ |
412 // | ........ | ........ | ........ | ........ | ........ | ........ | ........ | ........ |
413 // ____________________________________________________________________________________ LSB
414 // MSB ____________________________________________________________________________________
415 // | ........ | ...wwwww | ........ | ...uuuuu | ........ | ...sssss | ........ | ...qqqqq |
416 // | ........ | ...ooooo | ........ | ...mmmmm | ........ | ...kkkkk | ........ | ...iiiii |
417 // | ........ | ...ggggg | ........ | ...eeeee | ........ | ...ccccc | ........ | ...aaaaa |
418 // | ........ | ........ | ........ | ........ | ........ | ........ | ........ | ........ |
419 // ____________________________________________________________________________________ LSB
420
421 // However, because AVX2 only supports shifts of packed 32-bit integers (and not 16),
422 // Values have to first be grouped up appropriately.
423
424 // Steps:
425 // MSB ____________________________________________________________________________________
426 // | ........ | wwwxxxxx | ........ | oooppppp | rrssssst | ttttuuuu | jjkkkkkl | llllmmmm |
427 // | ........ | uvvvvvww | ........ | mnnnnnoo | qqqqqrrr | rrssssst | iiiiijjj | jjkkkkkl |
428 // | ........ | ggghhhhh | ........ | ........ | bbcccccd | ddddeeee | ........ | ........ |
429 // | ........ | efffffgg | ........ | ........ | aaaaabbb | bbcccccd | ........ | ........ |
430 // ____________________________________________________________________________________ LSB
431 // MSB ____________________________________________________________________________________
432 // | ........ | ...xxxxx | ........ | ...ppppp | .......t | tttt.... | .......l | llll.... |
433 // | ........ | .vvvvv.. | ........ | .nnnnn.. | .....rrr | rr...... | .....jjj | jj...... |
434 // | ........ | ...hhhhh | ........ | ........ | .......d | dddd.... | ........ | ........ |
435 // | ........ | .fffff.. | ........ | ........ | .....bbb | bb...... | ........ | ........ |
436 // ____________________________________________________________________________________ LSB
437 // MSB ____________________________________________________________________________________
438 // | ...xxxxx | ........ | ...ppppp | ........ | ...ttttt | ........ | ...lllll | ........ |
439 // | ...vvvvv | ........ | ...nnnnn | ........ | ...rrrrr | ........ | ...jjjjj | ........ |
440 // | ...hhhhh | ........ | ........ | ........ | ...ddddd | ........ | ........ | ........ |
441 // | ...fffff | ........ | ........ | ........ | ...bbbbb | ........ | ........ | ........ |
442 // ____________________________________________________________________________________ LSB
443 // MSB ____________________________________________________________________________________
444 // | ...xxxxx | ........ | ...vvvvv | ........ | ...ttttt | ........ | ...rrrrr | ........ |
445 // | ...ppppp | ........ | ...nnnnn | ........ | ...lllll | ........ | ...jjjjj | ........ |
446 // | ...hhhhh | ........ | ...fffff | ........ | ...ddddd | ........ | ...bbbbb | ........ |
447 // | ........ | ........ | ........ | ........ | ........ | ........ | ........ | ........ |
448 // ____________________________________________________________________________________ LSB
449 #[rustfmt::skip]
450 let half1 = x86_64::_mm256_shuffle_epi8(
451 data_x2,
452 x86_64::_mm256_set_epi8(
453 -1, 15, -1, 10, 12, 13, 7, 8,
454 -1, 14, -1, 9, 11, 12, 6, 7,
455
456 -1, 5, -1, -1, 2, 3, -1, -1,
457 -1, 4, -1, -1, 1, 2, -1, -1,
458 )
459 );
460
461 #[rustfmt::skip]
462 let half1 = x86_64::_mm256_sllv_epi32(half1, x86_64::_mm256_set_epi32(
463 8, 4,
464 6, 2,
465
466 8, 4,
467 6, 2,
468 ));
469
470 #[rustfmt::skip]
471 let half1 = x86_64::_mm256_shuffle_epi8(
472 half1,
473 x86_64::_mm256_set_epi8(
474 15, -1, 7, -1, 11, -1, 3, -1,
475 13, -1, 5, -1, 9, -1, 1, -1,
476
477 15, -1, 7, -1, 11, -1, 3, -1,
478 -1, -1, -1, -1, -1, -1, -1, -1,
479 )
480 );
481
482 // MSB ____________________________________________________________________________________
483 // | ......ww | www..... | ......oo | ooo..... | ..sssss. | ........ | ..kkkkk. | ........ |
484 // | ....uuuu | u....... | ....mmmm | m....... | qqqqq... | ........ | iiiii... | ........ |
485 // | ......gg | ggg..... | ........ | ........ | ..ccccc. | ........ | ........ | ........ |
486 // | ....eeee | e....... | ........ | ........ | aaaaa... | ........ | ........ | ........ |
487 // ____________________________________________________________________________________ LSB
488 // MSB ____________________________________________________________________________________
489 // | ........ | ...wwwww | ........ | ...ooooo | ........ | ...sssss | ........ | ...kkkkk |
490 // | ........ | ...uuuuu | ........ | ...mmmmm | ........ | ...qqqqq | ........ | ...iiiii |
491 // | ........ | ...ggggg | ........ | ........ | ........ | ...ccccc | ........ | ........ |
492 // | ........ | ...eeeee | ........ | ........ | ........ | ...aaaaa | ........ | ........ |
493 // ____________________________________________________________________________________ LSB
494 // MSB ____________________________________________________________________________________
495 // | ........ | ...wwwww | ........ | ...uuuuu | ........ | ...sssss | ........ | ...qqqqq |
496 // | ........ | ...ooooo | ........ | ...mmmmm | ........ | ...kkkkk | ........ | ...iiiii |
497 // | ........ | ...ggggg | ........ | ...eeeee | ........ | ...ccccc | ........ | ...aaaaa |
498 // | ........ | ........ | ........ | ........ | ........ | ........ | ........ | ........ |
499 // ____________________________________________________________________________________ LSB
500 #[rustfmt::skip]
501 let half2 = x86_64::_mm256_shuffle_epi8(
502 data_x2,
503 x86_64::_mm256_set_epi8(
504 14, 15, 9, 10, 12, -1, 7, -1,
505 13, 14, 8, 9, 11, -1, 6, -1,
506
507 4, 5, -1, -1, 2, -1, -1, -1,
508 3, 4, -1, -1, 1, -1, -1, -1,
509 ),
510 );
511 #[rustfmt::skip]
512 let half2 = x86_64::_mm256_srlv_epi32(half2, x86_64::_mm256_set_epi32(
513 5, 9,
514 7, 11,
515
516 5, 9,
517 7, 11,
518 ));
519
520 #[rustfmt::skip]
521 let half2 = x86_64::_mm256_shuffle_epi8(
522 half2,
523 x86_64::_mm256_set_epi8(
524 -1, 14, -1, 6, -1, 10, -1, 2,
525 -1, 12, -1, 4, -1, 8, -1, 0,
526
527 -1, 14, -1, 6, -1, 10, -1, 2,
528 -1, -1, -1, -1, -1, -1, -1, -1,
529 ),
530 );
531
532 let combined = x86_64::_mm256_or_si256(half1, half2);
533 let combined = x86_64::_mm256_and_si256(combined, x86_64::_mm256_set1_epi16(0x1f1f));
534
535 let alpha_or_num_mask =
536 x86_64::_mm256_cmpgt_epi8(combined, x86_64::_mm256_set1_epi8((b'z' - b'a') as i8));
537
538 let add_vec = x86_64::_mm256_blendv_epi8(
539 x86_64::_mm256_set1_epi8(b'a' as i8),
540 x86_64::_mm256_set1_epi8((b'2' - (b'z' - b'a') - 1) as i8),
541 alpha_or_num_mask,
542 );
543
544 let chars = x86_64::_mm256_add_epi8(combined, add_vec);
545
546 // SAFETY: `out` is 32 bytes (256 bits)
547 unsafe {
548 x86_64::_mm256_storeu_si256(out.as_mut_ptr() as _, chars);
549 }
550
551 // TODO: is writing the prefix from the vector register faster?
552 out[..8].copy_from_slice(b"did:plc:");
553 // dbg!(chars);
554 // dbg!(out);
555}
556
557#[inline]
558fn encode_plc_non_avx(bytes_with_discr: [u8; 16], out: &mut [u8; 32]) {
559 // Note: bytes_with_discr includes the zero-byte at the start!
560 let bytes = &bytes_with_discr[1..];
561
562 fn byte_to_base32(val: u8) -> u8 {
563 match val {
564 0..26 => val + b'a',
565 26..32 => val - 26 + b'2',
566 _ => unreachable!(),
567 }
568 }
569
570 out[..8].copy_from_slice(b"did:plc:");
571
572 for i in 0..3 {
573 let bytes_pos = i * 5;
574 let packed = usize::from_le_bytes([
575 bytes[bytes_pos + 4],
576 bytes[bytes_pos + 3],
577 bytes[bytes_pos + 2],
578 bytes[bytes_pos + 1],
579 bytes[bytes_pos],
580 0,
581 0,
582 0,
583 ]);
584
585 let a = byte_to_base32((packed >> 35) as u8 & 0x1f);
586 let b = byte_to_base32((packed >> 30) as u8 & 0x1f);
587 let c = byte_to_base32((packed >> 25) as u8 & 0x1f);
588 let d = byte_to_base32((packed >> 20) as u8 & 0x1f);
589 let e = byte_to_base32((packed >> 15) as u8 & 0x1f);
590 let f = byte_to_base32((packed >> 10) as u8 & 0x1f);
591 let g = byte_to_base32((packed >> 5) as u8 & 0x1f);
592 let h = byte_to_base32(packed as u8 & 0x1f);
593
594 let start = 8 + i * 8;
595 let end = start + 8;
596 out[start..end].copy_from_slice(&[a, b, c, d, e, f, g, h]);
597 }
598}
599
600#[cfg(test)]
601mod tests {
602 use super::*;
603
604 #[test]
605 #[cfg(target_feature = "avx2")]
606 fn individual_bytes_decode_ok_avx2() {
607 if !is_x86_feature_detected!("avx2") {
608 panic!("AVX2 feature not detected");
609 }
610 test_individual_bytes_decode(|x| unsafe { decode_plc_avx2(x) });
611 }
612
613 #[test]
614 fn individual_bytes_decode_ok_non_avx() {
615 // TODO: test both the BMI and non-BMI impls
616 test_individual_bytes_decode(decode_plc_non_avx);
617 }
618
619 /// Tests parsing for every base32 character at every individual position.
620 ///
621 /// All errors are reported together at the end.
622 fn test_individual_bytes_decode<F: Fn(&[u8; 32]) -> OptionDidPlc>(decoder: F) {
623 let mut did = "did:plc:aaaaaaaaaaaaaaaaaaaaaaaa".to_string();
624
625 let mut bad_results = vec![];
626
627 for i in 8..32 {
628 let base32_alphabet = b"abcdefghijklmnopqrstuvwxyz234567";
629 // Test every char except 'a'
630 for c in &base32_alphabet[1..] {
631 unsafe { did.as_bytes_mut()[i] = *c };
632
633 let result: DidInner = decoder(did.as_bytes().as_array().unwrap())
634 .try_into()
635 .unwrap_or_else(|_| panic!("Decoder failed on {did}"));
636
637 let mut expected_bytes =
638 base32::decode(base32::Alphabet::Rfc4648Lower { padding: false }, &did[8..])
639 .unwrap();
640
641 // Prefix 0
642 expected_bytes.insert(0, 0);
643
644 let result_bytes = unsafe { mem::transmute::<DidInner, [u8; 16]>(result) };
645
646 if result_bytes != expected_bytes[..] {
647 bad_results.push((did.to_owned(), result_bytes, expected_bytes));
648 }
649 }
650
651 unsafe { did.as_bytes_mut()[i] = b'a' }; // reset again
652 }
653
654 if !bad_results.is_empty() {
655 let mut out = format!("{} error(s):\n", bad_results.len());
656 out.push_str(" ");
657
658 // Byte indices
659 let byte_indices =
660 (00..16).map(|i| format!("{i:02x}")).collect::<Vec<_>>().as_slice().join(", ");
661 let ref_did = "did:plc:abcdefghijklmnopqrstuvwx";
662 out.push('\n');
663 for (did, result, expected) in bad_results {
664 out.push_str(&format!("Ref DID: {ref_did}\n"));
665 out.push_str(&format!("DID: {did}\n"));
666 out.push_str(&format!("Indices: {byte_indices}\n"));
667 out.push_str(&format!("Result: {result:02x?}\n"));
668 out.push_str(&format!("Expected: {expected:02x?}\n\n"));
669 }
670 panic!("{out}");
671 }
672 }
673}