a bare-bones limbo server in rust (mirror of https://github.com/xoogware/crawlspace)
1/*
2 * Copyright (c) 2024 Andrew Brower.
3 * This file is part of Crawlspace.
4 *
5 * Crawlspace is free software: you can redistribute it and/or
6 * modify it under the terms of the GNU Affero General Public
7 * License as published by the Free Software Foundation, either
8 * version 3 of the License, or (at your option) any later version.
9 *
10 * Crawlspace is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Affero General Public License for more details.
14 *
15 * You should have received a copy of the GNU Affero General Public
16 * License along with Crawlspace. If not, see
17 * <https://www.gnu.org/licenses/>.
18 */
19
20use std::fmt::Display;
21
22use byteorder::ReadBytesExt;
23use serde::Deserialize;
24
25use crate::{
26 ErrorKind::{self, InvalidData},
27 Read, Write,
28};
29
30pub trait VariableNumber: Sized + Write + Read {
31 const SEGMENT_BITS: u8 = 0b01111111;
32 const CONTINUE_BITS: u8 = 0b10000000;
33
34 const MAX_BYTES: usize;
35
36 fn len(self) -> usize;
37}
38
39macro_rules! make_var_num {
40 ($name: ident, $type: ty, $max_bytes: expr) => {
41 #[derive(Clone, Copy, Default, PartialEq, Eq, PartialOrd, Ord, Debug, Deserialize)]
42 #[serde(transparent)]
43 pub struct $name(pub $type);
44
45 impl VariableNumber for $name {
46 const MAX_BYTES: usize = $max_bytes;
47
48 fn len(self) -> usize {
49 match self.0 {
50 0 => 1,
51 n => (31 - n.leading_zeros() as usize) / 7 + 1,
52 }
53 }
54 }
55
56 impl Display for $name {
57 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
58 write!(f, "{}", self.0)
59 }
60 }
61
62 impl Read for $name {
63 fn read(r: &mut impl std::io::Read) -> Result<Self, ErrorKind> {
64 let mut v: $type = 0;
65
66 for i in 0..Self::MAX_BYTES {
67 let byte = r
68 .read_u8()
69 .map_err(|_| InvalidData("Incomplete variable number".to_string()))?;
70 v |= <$type>::from(byte & Self::SEGMENT_BITS) << (i * 7);
71 if byte & Self::CONTINUE_BITS == 0 {
72 return Ok(Self(v));
73 }
74 }
75
76 Err(InvalidData("Malformed variable number".to_string()))
77 }
78 }
79 };
80}
81
82make_var_num!(VarInt, i32, 5);
83make_var_num!(VarLong, i64, 10);
84
85impl Write for VarInt {
86 // implementation taken from https://github.com/as-com/varint-simd/blob/0f468783da8e181929b01b9c6e9f741c1fe09825/src/encode/mod.rs#L71
87 // only the first branch is done here because we never need to change varint size
88 fn write(&self, w: &mut impl std::io::Write) -> Result<(), ErrorKind> {
89 let x = self.0 as u64;
90 let stage1 = (x & 0x000000000000007f)
91 | ((x & 0x0000000000003f80) << 1)
92 | ((x & 0x00000000001fc000) << 2)
93 | ((x & 0x000000000fe00000) << 3)
94 | ((x & 0x00000000f0000000) << 4);
95
96 let leading = stage1.leading_zeros();
97
98 let unused_bytes = (leading - 1) / 8;
99 let bytes_needed = 8 - unused_bytes;
100
101 let msbs = 0x8080808080808080;
102 let msbmask = 0xFFFFFFFFFFFFFFFF >> ((8 - bytes_needed + 1) * 8 - 1);
103
104 let merged = stage1 | (msbs & msbmask);
105 let bytes = merged.to_le_bytes();
106
107 Ok(w.write_all(unsafe { bytes.get_unchecked(..bytes_needed as usize) })?)
108 }
109}
110
111impl VarLong {
112 // how cute...
113 #[inline(always)]
114 #[cfg(target_feature = "bmi2")]
115 fn num_to_vector_stage1(self) -> [u8; 16] {
116 use std::arch::x86_64::*;
117 let mut res = [0u64; 2];
118
119 let x = self.0 as u64;
120
121 res[0] = unsafe { _pdep_u64(x, 0x7f7f7f7f7f7f7f7f) };
122 res[1] = unsafe { _pdep_u64(x >> 56, 0x000000000000017f) };
123
124 unsafe { core::mem::transmute(res) }
125 }
126
127 #[inline(always)]
128 #[cfg(all(target_feature = "avx2", not(all(target_feature = "bmi2"))))]
129 fn num_to_vector_stage1(self) -> [u8; 16] {
130 use std::arch::x86_64::*;
131 let mut res = [0u64; 2];
132 let x = self;
133
134 let b = unsafe { _mm_set1_epi64x(self as i64) };
135 let c = unsafe {
136 _mm_or_si128(
137 _mm_or_si128(
138 _mm_sllv_epi64(
139 _mm_and_si128(b, _mm_set_epi64x(0x00000007f0000000, 0x000003f800000000)),
140 _mm_set_epi64x(4, 5),
141 ),
142 _mm_sllv_epi64(
143 _mm_and_si128(b, _mm_set_epi64x(0x0001fc0000000000, 0x00fe000000000000)),
144 _mm_set_epi64x(6, 7),
145 ),
146 ),
147 _mm_or_si128(
148 _mm_sllv_epi64(
149 _mm_and_si128(b, _mm_set_epi64x(0x000000000000007f, 0x0000000000003f80)),
150 _mm_set_epi64x(0, 1),
151 ),
152 _mm_sllv_epi64(
153 _mm_and_si128(b, _mm_set_epi64x(0x00000000001fc000, 0x000000000fe00000)),
154 _mm_set_epi64x(2, 3),
155 ),
156 ),
157 )
158 };
159 let d = unsafe { _mm_or_si128(c, _mm_bsrli_si128(c, 8)) };
160
161 res[0] = unsafe { _mm_extract_epi64(d, 0) as u64 };
162 res[1] = ((x & 0x7f00000000000000) >> 56) | ((x & 0x8000000000000000) >> 55);
163
164 unsafe { core::mem::transmute(res) }
165 }
166
167 // TODO: need to confirm this works. for now it's just a naive translation of avx2,
168 // but could definitely be improved -- blocking NEON implementation of Encode
169 //
170 // #[inline(always)]
171 // #[cfg(target_feature = "neon")]
172 // fn num_to_vector_stage1(self) -> [u8; 16] {
173 // use std::arch::aarch64::*;
174 //
175 // let mut res = [0u64; 2];
176 // let x = self;
177 //
178 // let b = unsafe { vdupq_n_s64(self.0 as i64) };
179 // let c = unsafe {
180 // vorrq_s64(
181 // vorrq_s64(
182 // vshlq_s64(
183 // vandq_s64(
184 // b,
185 // vcombine_s64(
186 // vcreate_s64(0x000003f800000000),
187 // vcreate_s64(0x00000007f0000000),
188 // ),
189 // ),
190 // vcombine_s64(vcreate_s64(5), vcreate_s64(4)),
191 // ),
192 // vshlq_s64(
193 // vandq_s64(
194 // b,
195 // vcombine_s64(
196 // vcreate_s64(0x00fe000000000000),
197 // vcreate_s64(0x0001fc0000000000),
198 // ),
199 // ),
200 // vcombine_s64(vcreate_s64(7), vcreate_s64(6)),
201 // ),
202 // ),
203 // vorrq_s64(
204 // vshlq_s64(
205 // vandq_s64(
206 // b,
207 // vcombine_s64(
208 // vcreate_s64(0x0000000000003f80),
209 // vcreate_s64(0x000000000000007f),
210 // ),
211 // ),
212 // vcombine_s64(vcreate_s64(1), vcreate_s64(0)),
213 // ),
214 // vshlq_s64(
215 // vandq_s64(
216 // b,
217 // vcombine_s64(
218 // vcreate_s64(0x000000000fe00000),
219 // vcreate_s64(0x00000000001fc000),
220 // ),
221 // ),
222 // vcombine_s64(vcreate_s64(3), vcreate_s64(2)),
223 // ),
224 // ),
225 // )
226 // };
227 // let d = unsafe { vorrq_s64(c, vshrq_n_s64::<8>(c)) };
228 //
229 // res[0] = unsafe { vgetq_lane_s64(d, 0) as u64 };
230 // res[1] =
231 // ((x.0 as u64 & 0x7f00000000000000) >> 56) | ((x.0 as u64 & 0x8000000000000000) >> 55);
232 //
233 // unsafe { core::mem::transmute(res) }
234 // }
235}
236
237impl Write for VarLong {
238 // ...and here's the second branch ^_^
239 #[cfg(any(target_feature = "bmi2", target_feature = "avx2"))]
240 fn write(&self, w: &mut impl std::io::Write) -> Result<(), ErrorKind> {
241 use std::arch::x86_64::*;
242 unsafe {
243 // Break the number into 7-bit parts and spread them out into a vector
244 let stage1: __m128i = std::mem::transmute(self.num_to_vector_stage1());
245
246 // Create a mask for where there exist values
247 // This signed comparison works because all MSBs should be cleared at this point
248 // Also handle the special case when num == 0
249 let minimum = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xffu8 as i8);
250 let exists = _mm_or_si128(_mm_cmpgt_epi8(stage1, _mm_setzero_si128()), minimum);
251 let bits = _mm_movemask_epi8(exists);
252
253 // Count the number of bytes used
254 let bytes = 32 - bits.leading_zeros() as u8; // lzcnt on supported CPUs
255
256 // Fill that many bytes into a vector
257 let ascend = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
258 let mask = _mm_cmplt_epi8(ascend, _mm_set1_epi8(bytes as i8));
259
260 // Shift it down 1 byte so the last MSB is the only one set, and make sure only the MSB is set
261 let shift = _mm_bsrli_si128(mask, 1);
262 let msbmask = _mm_and_si128(shift, _mm_set1_epi8(128u8 as i8));
263
264 // Merge the MSB bits into the vector
265 let merged = _mm_or_si128(stage1, msbmask);
266
267 Ok(w.write_all(
268 std::mem::transmute::<__m128i, [u8; 16]>(merged).get_unchecked(..bytes as usize),
269 )?)
270 }
271 }
272
273 // TODO: implement this using neon? not likely we'll use arm-based servers but maybe nice for
274 // local testing?
275 #[cfg(not(any(target_feature = "bmi2", target_feature = "avx2")))]
276 fn write(&self, w: &mut impl std::io::Write) -> Result<(), ErrorKind> {
277 use byteorder::WriteBytesExt;
278
279 let mut val = self.0 as u64;
280 loop {
281 if val & 0b1111111111111111111111111111111111111111111111111111111110000000 == 0 {
282 w.write_u8(val as u8)?;
283 return Ok(());
284 }
285 w.write_u8(val as u8 & 0b01111111 | 0b10000000)?;
286 val >>= 7;
287 }
288 }
289}
290
291impl From<VarInt> for i32 {
292 fn from(value: VarInt) -> Self {
293 value.0
294 }
295}