a bare-bones limbo server in rust (mirror of https://github.com/xoogware/crawlspace)
at protocol-breakout 295 lines 11 kB view raw
1/* 2 * Copyright (c) 2024 Andrew Brower. 3 * This file is part of Crawlspace. 4 * 5 * Crawlspace is free software: you can redistribute it and/or 6 * modify it under the terms of the GNU Affero General Public 7 * License as published by the Free Software Foundation, either 8 * version 3 of the License, or (at your option) any later version. 9 * 10 * Crawlspace is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 * Affero General Public License for more details. 14 * 15 * You should have received a copy of the GNU Affero General Public 16 * License along with Crawlspace. If not, see 17 * <https://www.gnu.org/licenses/>. 18 */ 19 20use std::fmt::Display; 21 22use byteorder::ReadBytesExt; 23use serde::Deserialize; 24 25use crate::{ 26 ErrorKind::{self, InvalidData}, 27 Read, Write, 28}; 29 30pub trait VariableNumber: Sized + Write + Read { 31 const SEGMENT_BITS: u8 = 0b01111111; 32 const CONTINUE_BITS: u8 = 0b10000000; 33 34 const MAX_BYTES: usize; 35 36 fn len(self) -> usize; 37} 38 39macro_rules! make_var_num { 40 ($name: ident, $type: ty, $max_bytes: expr) => { 41 #[derive(Clone, Copy, Default, PartialEq, Eq, PartialOrd, Ord, Debug, Deserialize)] 42 #[serde(transparent)] 43 pub struct $name(pub $type); 44 45 impl VariableNumber for $name { 46 const MAX_BYTES: usize = $max_bytes; 47 48 fn len(self) -> usize { 49 match self.0 { 50 0 => 1, 51 n => (31 - n.leading_zeros() as usize) / 7 + 1, 52 } 53 } 54 } 55 56 impl Display for $name { 57 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 58 write!(f, "{}", self.0) 59 } 60 } 61 62 impl Read for $name { 63 fn read(r: &mut impl std::io::Read) -> Result<Self, ErrorKind> { 64 let mut v: $type = 0; 65 66 for i in 0..Self::MAX_BYTES { 67 let byte = r 68 .read_u8() 69 .map_err(|_| InvalidData("Incomplete variable number".to_string()))?; 70 v |= <$type>::from(byte & Self::SEGMENT_BITS) << (i * 7); 71 if byte & Self::CONTINUE_BITS == 0 { 72 return Ok(Self(v)); 73 } 74 } 75 76 Err(InvalidData("Malformed variable number".to_string())) 77 } 78 } 79 }; 80} 81 82make_var_num!(VarInt, i32, 5); 83make_var_num!(VarLong, i64, 10); 84 85impl Write for VarInt { 86 // implementation taken from https://github.com/as-com/varint-simd/blob/0f468783da8e181929b01b9c6e9f741c1fe09825/src/encode/mod.rs#L71 87 // only the first branch is done here because we never need to change varint size 88 fn write(&self, w: &mut impl std::io::Write) -> Result<(), ErrorKind> { 89 let x = self.0 as u64; 90 let stage1 = (x & 0x000000000000007f) 91 | ((x & 0x0000000000003f80) << 1) 92 | ((x & 0x00000000001fc000) << 2) 93 | ((x & 0x000000000fe00000) << 3) 94 | ((x & 0x00000000f0000000) << 4); 95 96 let leading = stage1.leading_zeros(); 97 98 let unused_bytes = (leading - 1) / 8; 99 let bytes_needed = 8 - unused_bytes; 100 101 let msbs = 0x8080808080808080; 102 let msbmask = 0xFFFFFFFFFFFFFFFF >> ((8 - bytes_needed + 1) * 8 - 1); 103 104 let merged = stage1 | (msbs & msbmask); 105 let bytes = merged.to_le_bytes(); 106 107 Ok(w.write_all(unsafe { bytes.get_unchecked(..bytes_needed as usize) })?) 108 } 109} 110 111impl VarLong { 112 // how cute... 113 #[inline(always)] 114 #[cfg(target_feature = "bmi2")] 115 fn num_to_vector_stage1(self) -> [u8; 16] { 116 use std::arch::x86_64::*; 117 let mut res = [0u64; 2]; 118 119 let x = self.0 as u64; 120 121 res[0] = unsafe { _pdep_u64(x, 0x7f7f7f7f7f7f7f7f) }; 122 res[1] = unsafe { _pdep_u64(x >> 56, 0x000000000000017f) }; 123 124 unsafe { core::mem::transmute(res) } 125 } 126 127 #[inline(always)] 128 #[cfg(all(target_feature = "avx2", not(all(target_feature = "bmi2"))))] 129 fn num_to_vector_stage1(self) -> [u8; 16] { 130 use std::arch::x86_64::*; 131 let mut res = [0u64; 2]; 132 let x = self; 133 134 let b = unsafe { _mm_set1_epi64x(self as i64) }; 135 let c = unsafe { 136 _mm_or_si128( 137 _mm_or_si128( 138 _mm_sllv_epi64( 139 _mm_and_si128(b, _mm_set_epi64x(0x00000007f0000000, 0x000003f800000000)), 140 _mm_set_epi64x(4, 5), 141 ), 142 _mm_sllv_epi64( 143 _mm_and_si128(b, _mm_set_epi64x(0x0001fc0000000000, 0x00fe000000000000)), 144 _mm_set_epi64x(6, 7), 145 ), 146 ), 147 _mm_or_si128( 148 _mm_sllv_epi64( 149 _mm_and_si128(b, _mm_set_epi64x(0x000000000000007f, 0x0000000000003f80)), 150 _mm_set_epi64x(0, 1), 151 ), 152 _mm_sllv_epi64( 153 _mm_and_si128(b, _mm_set_epi64x(0x00000000001fc000, 0x000000000fe00000)), 154 _mm_set_epi64x(2, 3), 155 ), 156 ), 157 ) 158 }; 159 let d = unsafe { _mm_or_si128(c, _mm_bsrli_si128(c, 8)) }; 160 161 res[0] = unsafe { _mm_extract_epi64(d, 0) as u64 }; 162 res[1] = ((x & 0x7f00000000000000) >> 56) | ((x & 0x8000000000000000) >> 55); 163 164 unsafe { core::mem::transmute(res) } 165 } 166 167 // TODO: need to confirm this works. for now it's just a naive translation of avx2, 168 // but could definitely be improved -- blocking NEON implementation of Encode 169 // 170 // #[inline(always)] 171 // #[cfg(target_feature = "neon")] 172 // fn num_to_vector_stage1(self) -> [u8; 16] { 173 // use std::arch::aarch64::*; 174 // 175 // let mut res = [0u64; 2]; 176 // let x = self; 177 // 178 // let b = unsafe { vdupq_n_s64(self.0 as i64) }; 179 // let c = unsafe { 180 // vorrq_s64( 181 // vorrq_s64( 182 // vshlq_s64( 183 // vandq_s64( 184 // b, 185 // vcombine_s64( 186 // vcreate_s64(0x000003f800000000), 187 // vcreate_s64(0x00000007f0000000), 188 // ), 189 // ), 190 // vcombine_s64(vcreate_s64(5), vcreate_s64(4)), 191 // ), 192 // vshlq_s64( 193 // vandq_s64( 194 // b, 195 // vcombine_s64( 196 // vcreate_s64(0x00fe000000000000), 197 // vcreate_s64(0x0001fc0000000000), 198 // ), 199 // ), 200 // vcombine_s64(vcreate_s64(7), vcreate_s64(6)), 201 // ), 202 // ), 203 // vorrq_s64( 204 // vshlq_s64( 205 // vandq_s64( 206 // b, 207 // vcombine_s64( 208 // vcreate_s64(0x0000000000003f80), 209 // vcreate_s64(0x000000000000007f), 210 // ), 211 // ), 212 // vcombine_s64(vcreate_s64(1), vcreate_s64(0)), 213 // ), 214 // vshlq_s64( 215 // vandq_s64( 216 // b, 217 // vcombine_s64( 218 // vcreate_s64(0x000000000fe00000), 219 // vcreate_s64(0x00000000001fc000), 220 // ), 221 // ), 222 // vcombine_s64(vcreate_s64(3), vcreate_s64(2)), 223 // ), 224 // ), 225 // ) 226 // }; 227 // let d = unsafe { vorrq_s64(c, vshrq_n_s64::<8>(c)) }; 228 // 229 // res[0] = unsafe { vgetq_lane_s64(d, 0) as u64 }; 230 // res[1] = 231 // ((x.0 as u64 & 0x7f00000000000000) >> 56) | ((x.0 as u64 & 0x8000000000000000) >> 55); 232 // 233 // unsafe { core::mem::transmute(res) } 234 // } 235} 236 237impl Write for VarLong { 238 // ...and here's the second branch ^_^ 239 #[cfg(any(target_feature = "bmi2", target_feature = "avx2"))] 240 fn write(&self, w: &mut impl std::io::Write) -> Result<(), ErrorKind> { 241 use std::arch::x86_64::*; 242 unsafe { 243 // Break the number into 7-bit parts and spread them out into a vector 244 let stage1: __m128i = std::mem::transmute(self.num_to_vector_stage1()); 245 246 // Create a mask for where there exist values 247 // This signed comparison works because all MSBs should be cleared at this point 248 // Also handle the special case when num == 0 249 let minimum = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xffu8 as i8); 250 let exists = _mm_or_si128(_mm_cmpgt_epi8(stage1, _mm_setzero_si128()), minimum); 251 let bits = _mm_movemask_epi8(exists); 252 253 // Count the number of bytes used 254 let bytes = 32 - bits.leading_zeros() as u8; // lzcnt on supported CPUs 255 256 // Fill that many bytes into a vector 257 let ascend = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); 258 let mask = _mm_cmplt_epi8(ascend, _mm_set1_epi8(bytes as i8)); 259 260 // Shift it down 1 byte so the last MSB is the only one set, and make sure only the MSB is set 261 let shift = _mm_bsrli_si128(mask, 1); 262 let msbmask = _mm_and_si128(shift, _mm_set1_epi8(128u8 as i8)); 263 264 // Merge the MSB bits into the vector 265 let merged = _mm_or_si128(stage1, msbmask); 266 267 Ok(w.write_all( 268 std::mem::transmute::<__m128i, [u8; 16]>(merged).get_unchecked(..bytes as usize), 269 )?) 270 } 271 } 272 273 // TODO: implement this using neon? not likely we'll use arm-based servers but maybe nice for 274 // local testing? 275 #[cfg(not(any(target_feature = "bmi2", target_feature = "avx2")))] 276 fn write(&self, w: &mut impl std::io::Write) -> Result<(), ErrorKind> { 277 use byteorder::WriteBytesExt; 278 279 let mut val = self.0 as u64; 280 loop { 281 if val & 0b1111111111111111111111111111111111111111111111111111111110000000 == 0 { 282 w.write_u8(val as u8)?; 283 return Ok(()); 284 } 285 w.write_u8(val as u8 & 0b01111111 | 0b10000000)?; 286 val >>= 7; 287 } 288 } 289} 290 291impl From<VarInt> for i32 { 292 fn from(value: VarInt) -> Self { 293 value.0 294 } 295}