Short (up to 65,535 bytes) immutable strings to e.g. parse tokens, implemented in Rust. These are sometimes called "German Strings", because Germans have written the paper mentioning them

Refactor StringPtr in file

+219 -200
+1 -1
src/builder.rs
··· 5 5 // File: builder.rs 6 6 // Date: 22.Nov.2024 7 7 // ============================================================================= 8 - //! String builder, string concatenation. 8 + //! String builder, concatenation of [`TokenString`]s. 9 9 10 10 use crate::{ 11 11 EMPTY,
+5 -1
src/lib.rs
··· 12 12 13 13 mod builder; 14 14 mod string; 15 + mod string_ptr; 15 16 17 + // Exports. 16 18 pub use builder::{Builder, BuilderIter, Collect, Concat}; 17 19 pub use string::{ 18 20 EMPTY, ··· 23 25 TokenStringIter, 24 26 TokenStringIterOwn, 25 27 }; 26 - pub(crate) use string::{MAX_LENGTH_SMALL_ADD1, PREFIX_LENGTH, StringPtr}; 28 + // Internal use 29 + pub(crate) use string::{MAX_LENGTH_SMALL_ADD1, PREFIX_LENGTH}; 30 + pub(crate) use string_ptr::StringPtr;
+4 -198
src/string.rs
··· 5 5 // File: string.rs 6 6 // Date: 22.Nov.2024 7 7 // ============================================================================= 8 - //! Crate doc. 8 + //! The string type [`TokenString`]. 9 9 10 10 extern crate alloc; 11 11 12 12 use alloc::string::ToString as _; 13 13 use alloc::vec; 14 - use core::{borrow, cmp, error, fmt, hash, mem, ops, panic, ptr, slice, str}; 14 + use core::{borrow, cmp, error, fmt, hash, mem, ops, panic, slice, str}; 15 + 16 + use crate::StringPtr; 15 17 16 18 17 19 #[non_exhaustive] ··· 140 142 /// For bigger strings as [`MAX_LENGTH_SMALL`], this points to the memory 141 143 /// holding the whole string. 142 144 pub(crate) ptr: mem::ManuallyDrop<StringPtr>, 143 - } 144 - 145 - /// The string's data pointer, if allocated on the heap. 146 - /// 147 - /// The data this points to must be a valid UTF-8 string. 148 - /// 149 - /// This uses a trick to get the size to 64 bits. A non statically sized array 150 - /// needs two fields, the pointer and a length, but we already know the length 151 - /// and do not want another useless field holding it. 152 - #[repr(C)] 153 - #[derive(Debug)] 154 - pub struct StringPtr { 155 - /// The pointer to the string's data, allocated on the heap. 156 - ptr: ptr::NonNull<u8>, 157 - } 158 - 159 - const _: () = assert!( 160 - mem::align_of::<StringPtr>() == mem::size_of::<u64>(), 161 - "struct StringPtr is not aligned to 64 bits!" 162 - ); 163 - const _: () = assert!( 164 - mem::size_of::<StringPtr>() == mem::size_of::<u64>(), 165 - "struct StringPtr has size != 64 bits" 166 - ); 167 - 168 - 169 - /// Return a [`Layout`] suitable for a byte array to hold the string's data. 170 - /// 171 - /// Panics if something does not work out creating the layout - which should 172 - /// never happen. 173 - fn array_layout<T>(len: usize) -> core::alloc::Layout { 174 - core::alloc::Layout::array::<T>(len) 175 - .expect("Error: constructing an array layout for TokenString failed!") 176 - } 177 - 178 - impl From<&[u8]> for StringPtr { 179 - fn from(value: &[u8]) -> Self { 180 - let ptr = if value.is_empty() { 181 - ptr::NonNull::dangling() 182 - } else { 183 - let s = value.len(); 184 - let l = array_layout::<u8>(s); 185 - // SAFETY: 186 - // Well, we must allocate memory for the array, which is guaranteed 187 - // to have a positive size `s`. 188 - let raw = unsafe { alloc::alloc::alloc(l) }; 189 - let Some(not_null) = ptr::NonNull::new(raw) else { 190 - alloc::alloc::handle_alloc_error(l) 191 - }; 192 - // SAFETY: 193 - // `not_null` is a newly allocated pointer, so it must be different 194 - // from `value` and not overlapping. `s` is the size of both `value` 195 - // and `not_null`. 196 - unsafe { 197 - ptr::copy_nonoverlapping(value.as_ptr(), not_null.as_ptr(), s); 198 - } 199 - not_null 200 - }; 201 - Self { ptr } 202 - } 203 - } 204 - 205 - // SAFETY: 206 - // This is an immutable pointer to a non-shared string. 207 - unsafe impl Send for StringPtr {} 208 - 209 - // SAFETY: 210 - // This is an immutable pointer to a non-shared string. 211 - unsafe impl Sync for StringPtr {} 212 - 213 - impl StringPtr { 214 - /// Drop the [`StringPtr`], deallocate its memory. 215 - /// 216 - /// We cannot implement the [`Drop`] trait, as we must explicitly pass the 217 - /// size of the string (the array). 218 - /// 219 - /// `len` must be the correct length of the string, else we get memory 220 - /// corruption. 221 - fn drop_manually(&mut self, len: usize) { 222 - if len > 0 { 223 - // SAFETY: 224 - // The layout is the same as has been used when allocating. 225 - unsafe { 226 - alloc::alloc::dealloc( 227 - self.ptr.as_ptr(), 228 - array_layout::<u8>(len), 229 - ); 230 - } 231 - } 232 - } 233 - 234 - /// Clone the string by copying the array in memory. 235 - /// 236 - /// We cannot implement the [`Clone`] trait, as we must explicitly pass the 237 - /// size of the string (the array). 238 - /// 239 - /// `len` must be the correct length of the string, else we get memory 240 - /// corruption. 241 - fn clone_manually(&self, len: usize) -> Self { 242 - let ptr = if len == 0 { 243 - ptr::NonNull::dangling() 244 - } else { 245 - let l = array_layout::<u8>(len); 246 - // SAFETY: 247 - // Well, we must allocate memory for the array, which is guaranteed 248 - // to have a positive size `len`. 249 - let raw = unsafe { alloc::alloc::alloc(l) }; 250 - let Some(not_null) = ptr::NonNull::new(raw) else { 251 - alloc::alloc::handle_alloc_error(l) 252 - }; 253 - // SAFETY: 254 - // `not_null` is a newly allocated pointer, so it must be different 255 - // from `self` and not overlapping. `len` is the size of both `self` 256 - // and `not_null`. 257 - unsafe { 258 - ptr::copy_nonoverlapping( 259 - self.ptr.as_ptr(), 260 - not_null.as_ptr(), 261 - len, 262 - ); 263 - } 264 - not_null 265 - }; 266 - Self { ptr } 267 - } 268 - 269 - /// Copy the slice `value` into the string, starting at `index`. 270 - pub(crate) fn copy_manually(&mut self, idx: usize, value: &[u8]) { 271 - // SAFETY: 272 - // `not_null` is a newly allocated pointer, so it must be different 273 - // from `self` and not overlapping. `len` is the size of both `self` 274 - // and `not_null`. 275 - unsafe { 276 - ptr::copy_nonoverlapping( 277 - value.as_ptr(), 278 - self.ptr.as_ptr().add(idx), 279 - value.len(), 280 - ); 281 - } 282 - } 283 - 284 - /// Allocate memory for the string with a size of `len`. 285 - pub(crate) fn alloc_manually(len: usize) -> Self { 286 - debug_assert!(len > 0, "don't allocate an array of length 0!"); 287 - let l = array_layout::<u8>(len); 288 - // SAFETY: 289 - // Well, we must allocate memory for the array, which is guaranteed 290 - // to have a positive size `s`. 291 - let raw = unsafe { alloc::alloc::alloc(l) }; 292 - let Some(ptr) = ptr::NonNull::new(raw) else { 293 - alloc::alloc::handle_alloc_error(l) 294 - }; 295 - Self { ptr } 296 - } 297 - 298 - /// Return the string as a byte slice. 299 - /// 300 - /// `len` must be the correct length of the string, else we get memory 301 - /// corruption. 302 - pub(crate) fn as_slice_manually(&self, len: usize) -> &[u8] { 303 - if len == 0 { 304 - Default::default() 305 - } else { 306 - // SAFETY: 307 - // `ptr` is not null and properly aligned. 308 - // `len` is the correct length. 309 - unsafe { slice::from_raw_parts(self.ptr.as_ptr(), len) } 310 - } 311 - } 312 - 313 - /// Return the string as a mutable byte slice. 314 - pub(crate) fn as_slice_manually_mut(&mut self, len: usize) -> &mut [u8] { 315 - if len == 0 { 316 - Default::default() 317 - } else { 318 - // SAFETY: 319 - // `ptr` is not null and properly aligned. 320 - // `len` is the correct length. 321 - unsafe { slice::from_raw_parts_mut(self.ptr.as_ptr(), len) } 322 - } 323 - } 324 - 325 - /// Return the string this pointer holds. 326 - pub(crate) fn as_string_manually(&self, len: usize) -> &str { 327 - let bytes = self.as_slice_manually(len); 328 - // SAFETY: 329 - // Being valid UTF-8 is a precondition of `StringPtr`. 330 - unsafe { str::from_utf8_unchecked(bytes) } 331 - } 332 - 333 - /// Return `true` if the given strings are equal, `false` else. 334 - /// 335 - /// Both strings to compare must have the same length. 336 - fn eq_manually(&self, other: &Self, len: usize) -> bool { 337 - self.as_slice_manually(len) == other.as_slice_manually(len) 338 - } 339 145 } 340 146 341 147 // =============================================================================
+209
src/string_ptr.rs
··· 1 + // SPDX-FileCopyrightText: Copyright (C) 2024 Roland Csaszar 2 + // SPDX-License-Identifier: MPL-2.0 3 + // 4 + // Project: token-string 5 + // File: string_ptr.rs 6 + // Date: 22.Nov.2024 7 + // ============================================================================= 8 + //! Pointer to a string, internal struct for [`TokenString`]. 9 + 10 + extern crate alloc; 11 + 12 + use core::{mem, ptr, slice, str}; 13 + 14 + 15 + /// The string's data pointer, if allocated on the heap. 16 + /// 17 + /// The data this points to must be a valid UTF-8 string. 18 + /// 19 + /// This uses a trick to get the size to 64 bits. A non statically sized array 20 + /// needs two fields, the pointer and a length, but we already know the length 21 + /// and do not want another useless field holding it. 22 + #[repr(C)] 23 + #[derive(Debug)] 24 + pub struct StringPtr { 25 + /// The pointer to the string's data, allocated on the heap. 26 + ptr: ptr::NonNull<u8>, 27 + } 28 + 29 + const _: () = assert!( 30 + mem::align_of::<StringPtr>() == mem::size_of::<u64>(), 31 + "struct StringPtr is not aligned to 64 bits!" 32 + ); 33 + const _: () = assert!( 34 + mem::size_of::<StringPtr>() == mem::size_of::<u64>(), 35 + "struct StringPtr has size != 64 bits" 36 + ); 37 + 38 + 39 + /// Return a [`Layout`] suitable for a byte array to hold the string's data. 40 + /// 41 + /// Panics if something does not work out creating the layout - which should 42 + /// never happen. 43 + fn array_layout<T>(len: usize) -> core::alloc::Layout { 44 + core::alloc::Layout::array::<T>(len) 45 + .expect("Error: constructing an array layout for TokenString failed!") 46 + } 47 + 48 + impl From<&[u8]> for StringPtr { 49 + fn from(value: &[u8]) -> Self { 50 + let ptr = if value.is_empty() { 51 + ptr::NonNull::dangling() 52 + } else { 53 + let s = value.len(); 54 + let l = array_layout::<u8>(s); 55 + // SAFETY: 56 + // Well, we must allocate memory for the array, which is guaranteed 57 + // to have a positive size `s`. 58 + let raw = unsafe { alloc::alloc::alloc(l) }; 59 + let Some(not_null) = ptr::NonNull::new(raw) else { 60 + alloc::alloc::handle_alloc_error(l) 61 + }; 62 + // SAFETY: 63 + // `not_null` is a newly allocated pointer, so it must be different 64 + // from `value` and not overlapping. `s` is the size of both `value` 65 + // and `not_null`. 66 + unsafe { 67 + ptr::copy_nonoverlapping(value.as_ptr(), not_null.as_ptr(), s); 68 + } 69 + not_null 70 + }; 71 + Self { ptr } 72 + } 73 + } 74 + 75 + // SAFETY: 76 + // This is an immutable pointer to a non-shared string. 77 + unsafe impl Send for StringPtr {} 78 + 79 + // SAFETY: 80 + // This is an immutable pointer to a non-shared string. 81 + unsafe impl Sync for StringPtr {} 82 + 83 + impl StringPtr { 84 + /// Drop the [`StringPtr`], deallocate its memory. 85 + /// 86 + /// We cannot implement the [`Drop`] trait, as we must explicitly pass the 87 + /// size of the string (the array). 88 + /// 89 + /// `len` must be the correct length of the string, else we get memory 90 + /// corruption. 91 + pub fn drop_manually(&mut self, len: usize) { 92 + if len > 0 { 93 + // SAFETY: 94 + // The layout is the same as has been used when allocating. 95 + unsafe { 96 + alloc::alloc::dealloc( 97 + self.ptr.as_ptr(), 98 + array_layout::<u8>(len), 99 + ); 100 + } 101 + } 102 + } 103 + 104 + /// Clone the string by copying the array in memory. 105 + /// 106 + /// We cannot implement the [`Clone`] trait, as we must explicitly pass the 107 + /// size of the string (the array). 108 + /// 109 + /// `len` must be the correct length of the string, else we get memory 110 + /// corruption. 111 + pub fn clone_manually(&self, len: usize) -> Self { 112 + let ptr = if len == 0 { 113 + ptr::NonNull::dangling() 114 + } else { 115 + let l = array_layout::<u8>(len); 116 + // SAFETY: 117 + // Well, we must allocate memory for the array, which is guaranteed 118 + // to have a positive size `len`. 119 + let raw = unsafe { alloc::alloc::alloc(l) }; 120 + let Some(not_null) = ptr::NonNull::new(raw) else { 121 + alloc::alloc::handle_alloc_error(l) 122 + }; 123 + // SAFETY: 124 + // `not_null` is a newly allocated pointer, so it must be different 125 + // from `self` and not overlapping. `len` is the size of both `self` 126 + // and `not_null`. 127 + unsafe { 128 + ptr::copy_nonoverlapping( 129 + self.ptr.as_ptr(), 130 + not_null.as_ptr(), 131 + len, 132 + ); 133 + } 134 + not_null 135 + }; 136 + Self { ptr } 137 + } 138 + 139 + /// Copy the slice `value` into the string, starting at `index`. 140 + pub(crate) fn copy_manually(&mut self, idx: usize, value: &[u8]) { 141 + // SAFETY: 142 + // `not_null` is a newly allocated pointer, so it must be different 143 + // from `self` and not overlapping. `len` is the size of both `self` 144 + // and `not_null`. 145 + unsafe { 146 + ptr::copy_nonoverlapping( 147 + value.as_ptr(), 148 + self.ptr.as_ptr().add(idx), 149 + value.len(), 150 + ); 151 + } 152 + } 153 + 154 + /// Allocate memory for the string with a size of `len`. 155 + pub(crate) fn alloc_manually(len: usize) -> Self { 156 + debug_assert!(len > 0, "don't allocate an array of length 0!"); 157 + let l = array_layout::<u8>(len); 158 + // SAFETY: 159 + // Well, we must allocate memory for the array, which is guaranteed 160 + // to have a positive size `s`. 161 + let raw = unsafe { alloc::alloc::alloc(l) }; 162 + let Some(ptr) = ptr::NonNull::new(raw) else { 163 + alloc::alloc::handle_alloc_error(l) 164 + }; 165 + Self { ptr } 166 + } 167 + 168 + /// Return the string as a byte slice. 169 + /// 170 + /// `len` must be the correct length of the string, else we get memory 171 + /// corruption. 172 + pub(crate) fn as_slice_manually(&self, len: usize) -> &[u8] { 173 + if len == 0 { 174 + Default::default() 175 + } else { 176 + // SAFETY: 177 + // `ptr` is not null and properly aligned. 178 + // `len` is the correct length. 179 + unsafe { slice::from_raw_parts(self.ptr.as_ptr(), len) } 180 + } 181 + } 182 + 183 + /// Return the string as a mutable byte slice. 184 + pub(crate) fn as_slice_manually_mut(&mut self, len: usize) -> &mut [u8] { 185 + if len == 0 { 186 + Default::default() 187 + } else { 188 + // SAFETY: 189 + // `ptr` is not null and properly aligned. 190 + // `len` is the correct length. 191 + unsafe { slice::from_raw_parts_mut(self.ptr.as_ptr(), len) } 192 + } 193 + } 194 + 195 + /// Return the string this pointer holds. 196 + pub(crate) fn as_string_manually(&self, len: usize) -> &str { 197 + let bytes = self.as_slice_manually(len); 198 + // SAFETY: 199 + // Being valid UTF-8 is a precondition of `StringPtr`. 200 + unsafe { str::from_utf8_unchecked(bytes) } 201 + } 202 + 203 + /// Return `true` if the given strings are equal, `false` else. 204 + /// 205 + /// Both strings to compare must have the same length. 206 + pub fn eq_manually(&self, other: &Self, len: usize) -> bool { 207 + self.as_slice_manually(len) == other.as_slice_manually(len) 208 + } 209 + }