crawlspace-proto/src/datatypes/variable.rs at protocol-breakout · brwr.dev/crawlspace

brwr.dev / crawlspace
a bare-bones limbo server in rust (mirror of https://github.com/xoogware/crawlspace)
crawlspace / crawlspace-proto / src / datatypes / variable.rs
at protocol-breakout 295 lines 11 kB view raw
  1/*
  2 * Copyright (c) 2024 Andrew Brower.
  3 * This file is part of Crawlspace.
  4 *
  5 * Crawlspace is free software: you can redistribute it and/or
  6 * modify it under the terms of the GNU Affero General Public
  7 * License as published by the Free Software Foundation, either
  8 * version 3 of the License, or (at your option) any later version.
  9 *
 10 * Crawlspace is distributed in the hope that it will be useful,
 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 13 * Affero General Public License for more details.
 14 *
 15 * You should have received a copy of the GNU Affero General Public
 16 * License along with Crawlspace. If not, see
 17 * <https://www.gnu.org/licenses/>.
 18 */
 19
 20use std::fmt::Display;
 21
 22use byteorder::ReadBytesExt;
 23use serde::Deserialize;
 24
 25use crate::{
 26    ErrorKind::{self, InvalidData},
 27    Read, Write,
 28};
 29
 30pub trait VariableNumber: Sized + Write + Read {
 31    const SEGMENT_BITS: u8 = 0b01111111;
 32    const CONTINUE_BITS: u8 = 0b10000000;
 33
 34    const MAX_BYTES: usize;
 35
 36    fn len(self) -> usize;
 37}
 38
 39macro_rules! make_var_num {
 40    ($name: ident, $type: ty, $max_bytes: expr) => {
 41        #[derive(Clone, Copy, Default, PartialEq, Eq, PartialOrd, Ord, Debug, Deserialize)]
 42        #[serde(transparent)]
 43        pub struct $name(pub $type);
 44
 45        impl VariableNumber for $name {
 46            const MAX_BYTES: usize = $max_bytes;
 47
 48            fn len(self) -> usize {
 49                match self.0 {
 50                    0 => 1,
 51                    n => (31 - n.leading_zeros() as usize) / 7 + 1,
 52                }
 53            }
 54        }
 55
 56        impl Display for $name {
 57            fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
 58                write!(f, "{}", self.0)
 59            }
 60        }
 61
 62        impl Read for $name {
 63            fn read(r: &mut impl std::io::Read) -> Result<Self, ErrorKind> {
 64                let mut v: $type = 0;
 65
 66                for i in 0..Self::MAX_BYTES {
 67                    let byte = r
 68                        .read_u8()
 69                        .map_err(|_| InvalidData("Incomplete variable number".to_string()))?;
 70                    v |= <$type>::from(byte & Self::SEGMENT_BITS) << (i * 7);
 71                    if byte & Self::CONTINUE_BITS == 0 {
 72                        return Ok(Self(v));
 73                    }
 74                }
 75
 76                Err(InvalidData("Malformed variable number".to_string()))
 77            }
 78        }
 79    };
 80}
 81
 82make_var_num!(VarInt, i32, 5);
 83make_var_num!(VarLong, i64, 10);
 84
 85impl Write for VarInt {
 86    // implementation taken from https://github.com/as-com/varint-simd/blob/0f468783da8e181929b01b9c6e9f741c1fe09825/src/encode/mod.rs#L71
 87    // only the first branch is done here because we never need to change varint size
 88    fn write(&self, w: &mut impl std::io::Write) -> Result<(), ErrorKind> {
 89        let x = self.0 as u64;
 90        let stage1 = (x & 0x000000000000007f)
 91            | ((x & 0x0000000000003f80) << 1)
 92            | ((x & 0x00000000001fc000) << 2)
 93            | ((x & 0x000000000fe00000) << 3)
 94            | ((x & 0x00000000f0000000) << 4);
 95
 96        let leading = stage1.leading_zeros();
 97
 98        let unused_bytes = (leading - 1) / 8;
 99        let bytes_needed = 8 - unused_bytes;
100
101        let msbs = 0x8080808080808080;
102        let msbmask = 0xFFFFFFFFFFFFFFFF >> ((8 - bytes_needed + 1) * 8 - 1);
103
104        let merged = stage1 | (msbs & msbmask);
105        let bytes = merged.to_le_bytes();
106
107        Ok(w.write_all(unsafe { bytes.get_unchecked(..bytes_needed as usize) })?)
108    }
109}
110
111impl VarLong {
112    // how cute...
113    #[inline(always)]
114    #[cfg(target_feature = "bmi2")]
115    fn num_to_vector_stage1(self) -> [u8; 16] {
116        use std::arch::x86_64::*;
117        let mut res = [0u64; 2];
118
119        let x = self.0 as u64;
120
121        res[0] = unsafe { _pdep_u64(x, 0x7f7f7f7f7f7f7f7f) };
122        res[1] = unsafe { _pdep_u64(x >> 56, 0x000000000000017f) };
123
124        unsafe { core::mem::transmute(res) }
125    }
126
127    #[inline(always)]
128    #[cfg(all(target_feature = "avx2", not(all(target_feature = "bmi2"))))]
129    fn num_to_vector_stage1(self) -> [u8; 16] {
130        use std::arch::x86_64::*;
131        let mut res = [0u64; 2];
132        let x = self;
133
134        let b = unsafe { _mm_set1_epi64x(self as i64) };
135        let c = unsafe {
136            _mm_or_si128(
137                _mm_or_si128(
138                    _mm_sllv_epi64(
139                        _mm_and_si128(b, _mm_set_epi64x(0x00000007f0000000, 0x000003f800000000)),
140                        _mm_set_epi64x(4, 5),
141                    ),
142                    _mm_sllv_epi64(
143                        _mm_and_si128(b, _mm_set_epi64x(0x0001fc0000000000, 0x00fe000000000000)),
144                        _mm_set_epi64x(6, 7),
145                    ),
146                ),
147                _mm_or_si128(
148                    _mm_sllv_epi64(
149                        _mm_and_si128(b, _mm_set_epi64x(0x000000000000007f, 0x0000000000003f80)),
150                        _mm_set_epi64x(0, 1),
151                    ),
152                    _mm_sllv_epi64(
153                        _mm_and_si128(b, _mm_set_epi64x(0x00000000001fc000, 0x000000000fe00000)),
154                        _mm_set_epi64x(2, 3),
155                    ),
156                ),
157            )
158        };
159        let d = unsafe { _mm_or_si128(c, _mm_bsrli_si128(c, 8)) };
160
161        res[0] = unsafe { _mm_extract_epi64(d, 0) as u64 };
162        res[1] = ((x & 0x7f00000000000000) >> 56) | ((x & 0x8000000000000000) >> 55);
163
164        unsafe { core::mem::transmute(res) }
165    }
166
167    // TODO: need to confirm this works. for now it's just a naive translation of avx2,
168    // but could definitely be improved -- blocking NEON implementation of Encode
169    //
170    // #[inline(always)]
171    // #[cfg(target_feature = "neon")]
172    // fn num_to_vector_stage1(self) -> [u8; 16] {
173    //     use std::arch::aarch64::*;
174    //
175    //     let mut res = [0u64; 2];
176    //     let x = self;
177    //
178    //     let b = unsafe { vdupq_n_s64(self.0 as i64) };
179    //     let c = unsafe {
180    //         vorrq_s64(
181    //             vorrq_s64(
182    //                 vshlq_s64(
183    //                     vandq_s64(
184    //                         b,
185    //                         vcombine_s64(
186    //                             vcreate_s64(0x000003f800000000),
187    //                             vcreate_s64(0x00000007f0000000),
188    //                         ),
189    //                     ),
190    //                     vcombine_s64(vcreate_s64(5), vcreate_s64(4)),
191    //                 ),
192    //                 vshlq_s64(
193    //                     vandq_s64(
194    //                         b,
195    //                         vcombine_s64(
196    //                             vcreate_s64(0x00fe000000000000),
197    //                             vcreate_s64(0x0001fc0000000000),
198    //                         ),
199    //                     ),
200    //                     vcombine_s64(vcreate_s64(7), vcreate_s64(6)),
201    //                 ),
202    //             ),
203    //             vorrq_s64(
204    //                 vshlq_s64(
205    //                     vandq_s64(
206    //                         b,
207    //                         vcombine_s64(
208    //                             vcreate_s64(0x0000000000003f80),
209    //                             vcreate_s64(0x000000000000007f),
210    //                         ),
211    //                     ),
212    //                     vcombine_s64(vcreate_s64(1), vcreate_s64(0)),
213    //                 ),
214    //                 vshlq_s64(
215    //                     vandq_s64(
216    //                         b,
217    //                         vcombine_s64(
218    //                             vcreate_s64(0x000000000fe00000),
219    //                             vcreate_s64(0x00000000001fc000),
220    //                         ),
221    //                     ),
222    //                     vcombine_s64(vcreate_s64(3), vcreate_s64(2)),
223    //                 ),
224    //             ),
225    //         )
226    //     };
227    //     let d = unsafe { vorrq_s64(c, vshrq_n_s64::<8>(c)) };
228    //
229    //     res[0] = unsafe { vgetq_lane_s64(d, 0) as u64 };
230    //     res[1] =
231    //         ((x.0 as u64 & 0x7f00000000000000) >> 56) | ((x.0 as u64 & 0x8000000000000000) >> 55);
232    //
233    //     unsafe { core::mem::transmute(res) }
234    // }
235}
236
237impl Write for VarLong {
238    // ...and here's the second branch ^_^
239    #[cfg(any(target_feature = "bmi2", target_feature = "avx2"))]
240    fn write(&self, w: &mut impl std::io::Write) -> Result<(), ErrorKind> {
241        use std::arch::x86_64::*;
242        unsafe {
243            // Break the number into 7-bit parts and spread them out into a vector
244            let stage1: __m128i = std::mem::transmute(self.num_to_vector_stage1());
245
246            // Create a mask for where there exist values
247            // This signed comparison works because all MSBs should be cleared at this point
248            // Also handle the special case when num == 0
249            let minimum = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xffu8 as i8);
250            let exists = _mm_or_si128(_mm_cmpgt_epi8(stage1, _mm_setzero_si128()), minimum);
251            let bits = _mm_movemask_epi8(exists);
252
253            // Count the number of bytes used
254            let bytes = 32 - bits.leading_zeros() as u8; // lzcnt on supported CPUs
255
256            // Fill that many bytes into a vector
257            let ascend = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
258            let mask = _mm_cmplt_epi8(ascend, _mm_set1_epi8(bytes as i8));
259
260            // Shift it down 1 byte so the last MSB is the only one set, and make sure only the MSB is set
261            let shift = _mm_bsrli_si128(mask, 1);
262            let msbmask = _mm_and_si128(shift, _mm_set1_epi8(128u8 as i8));
263
264            // Merge the MSB bits into the vector
265            let merged = _mm_or_si128(stage1, msbmask);
266
267            Ok(w.write_all(
268                std::mem::transmute::<__m128i, [u8; 16]>(merged).get_unchecked(..bytes as usize),
269            )?)
270        }
271    }
272
273    // TODO: implement this using neon? not likely we'll use arm-based servers but maybe nice for
274    // local testing?
275    #[cfg(not(any(target_feature = "bmi2", target_feature = "avx2")))]
276    fn write(&self, w: &mut impl std::io::Write) -> Result<(), ErrorKind> {
277        use byteorder::WriteBytesExt;
278
279        let mut val = self.0 as u64;
280        loop {
281            if val & 0b1111111111111111111111111111111111111111111111111111111110000000 == 0 {
282                w.write_u8(val as u8)?;
283                return Ok(());
284            }
285            w.write_u8(val as u8 & 0b01111111 | 0b10000000)?;
286            val >>= 7;
287        }
288    }
289}
290
291impl From<VarInt> for i32 {
292    fn from(value: VarInt) -> Self {
293        value.0
294    }
295}