Short (up to 65,535 bytes) immutable strings to e.g. parse tokens, implemented in Rust. These are sometimes called "German Strings", because Germans have written the paper mentioning them
at main 211 lines 6.3 kB view raw
1// SPDX-FileCopyrightText: Copyright (C) 2024 Roland Csaszar 2// SPDX-License-Identifier: MPL-2.0 3// 4// Project: token-string 5// File: string_ptr.rs 6// Date: 22.Nov.2024 7// ============================================================================= 8//! Pointer to a string, an internal, private struct used by the heap allocated 9//! strings of [`crate::TokenString`]. 10 11extern crate alloc; 12 13use core::{mem, ptr, slice, str}; 14 15 16/// The string's data pointer, if allocated on the heap. 17/// 18/// The data this points to must be a valid UTF-8 string. 19/// 20/// This uses a trick to get the size to 64 bits. A non statically sized array 21/// needs two fields, the pointer and a length, but we already know the length 22/// and do not want another useless field holding it. 23#[repr(C)] 24#[derive(Debug)] 25pub struct StringPtr { 26 /// The pointer to the string's data, allocated on the heap. 27 ptr: ptr::NonNull<u8>, 28} 29 30const _: () = assert!( 31 mem::align_of::<StringPtr>() == mem::size_of::<u64>(), 32 "struct StringPtr is not aligned to 64 bits!" 33); 34const _: () = assert!( 35 mem::size_of::<StringPtr>() == mem::size_of::<u64>(), 36 "struct StringPtr has size != 64 bits" 37); 38 39 40/// Return a [`Layout`] suitable for a byte array to hold the string's data. 41/// 42/// Panics if something does not work out creating the layout - which should 43/// never happen. 44fn array_layout<T>(len: usize) -> core::alloc::Layout { 45 core::alloc::Layout::array::<T>(len) 46 .expect("Error: constructing an array layout for TokenString failed!") 47} 48 49impl From<&[u8]> for StringPtr { 50 fn from(value: &[u8]) -> Self { 51 let ptr = if value.is_empty() { 52 ptr::NonNull::dangling() 53 } else { 54 let s = value.len(); 55 let l = array_layout::<u8>(s); 56 // SAFETY: 57 // Well, we must allocate memory for the array, which is guaranteed 58 // to have a positive size `s`. 59 let raw = unsafe { alloc::alloc::alloc(l) }; 60 let Some(not_null) = ptr::NonNull::new(raw) else { 61 alloc::alloc::handle_alloc_error(l) 62 }; 63 // SAFETY: 64 // `not_null` is a newly allocated pointer, so it must be different 65 // from `value` and not overlapping. `s` is the size of both `value` 66 // and `not_null`. 67 unsafe { 68 ptr::copy_nonoverlapping(value.as_ptr(), not_null.as_ptr(), s); 69 } 70 not_null 71 }; 72 Self { ptr } 73 } 74} 75 76// SAFETY: 77// This is an immutable pointer to a non-shared string. 78unsafe impl Send for StringPtr {} 79 80// SAFETY: 81// This is an immutable pointer to a non-shared string. 82unsafe impl Sync for StringPtr {} 83 84impl StringPtr { 85 /// Drop the [`StringPtr`], deallocate its memory. 86 /// 87 /// We cannot implement the [`Drop`] trait, as we must explicitly pass the 88 /// size of the string (the array). 89 /// 90 /// `len` must be the correct length of the string, else we get memory 91 /// corruption. 92 #[cfg_attr(test, mutants::skip)] 93 pub fn drop_manually(&mut self, len: usize) { 94 if len > 0 { 95 // SAFETY: 96 // The layout is the same as has been used when allocating. 97 unsafe { 98 alloc::alloc::dealloc( 99 self.ptr.as_ptr(), 100 array_layout::<u8>(len), 101 ); 102 } 103 } 104 } 105 106 /// Clone the string by copying the array in memory. 107 /// 108 /// We cannot implement the [`Clone`] trait, as we must explicitly pass the 109 /// size of the string (the array). 110 /// 111 /// `len` must be the correct length of the string, else we get memory 112 /// corruption. 113 pub fn clone_manually(&self, len: usize) -> Self { 114 let ptr = if len == 0 { 115 ptr::NonNull::dangling() 116 } else { 117 let l = array_layout::<u8>(len); 118 // SAFETY: 119 // Well, we must allocate memory for the array, which is guaranteed 120 // to have a positive size `len`. 121 let raw = unsafe { alloc::alloc::alloc(l) }; 122 let Some(not_null) = ptr::NonNull::new(raw) else { 123 alloc::alloc::handle_alloc_error(l) 124 }; 125 // SAFETY: 126 // `not_null` is a newly allocated pointer, so it must be different 127 // from `self` and not overlapping. `len` is the size of both `self` 128 // and `not_null`. 129 unsafe { 130 ptr::copy_nonoverlapping( 131 self.ptr.as_ptr(), 132 not_null.as_ptr(), 133 len, 134 ); 135 } 136 not_null 137 }; 138 Self { ptr } 139 } 140 141 /// Copy the slice `value` into the string, starting at `index`. 142 pub(crate) const fn copy_manually(&mut self, idx: usize, value: &[u8]) { 143 // SAFETY: 144 // `not_null` is a newly allocated pointer, so it must be different 145 // from `self` and not overlapping. `len` is the size of both `self` 146 // and `not_null`. 147 unsafe { 148 ptr::copy_nonoverlapping( 149 value.as_ptr(), 150 self.ptr.as_ptr().add(idx), 151 value.len(), 152 ); 153 } 154 } 155 156 /// Allocate memory for the string with a size of `len`. 157 pub(crate) fn alloc_manually(len: usize) -> Self { 158 debug_assert!(len > 0, "don't allocate an array of length 0!"); 159 let l = array_layout::<u8>(len); 160 // SAFETY: 161 // Well, we must allocate memory for the array, which is guaranteed 162 // to have a positive size `s`. 163 let raw = unsafe { alloc::alloc::alloc(l) }; 164 let Some(ptr) = ptr::NonNull::new(raw) else { 165 alloc::alloc::handle_alloc_error(l) 166 }; 167 Self { ptr } 168 } 169 170 /// Return the string as a byte slice. 171 /// 172 /// `len` must be the correct length of the string, else we get memory 173 /// corruption. 174 pub(crate) fn as_slice_manually(&self, len: usize) -> &[u8] { 175 if len == 0 { 176 Default::default() 177 } else { 178 // SAFETY: 179 // `ptr` is not null and properly aligned. 180 // `len` is the correct length. 181 unsafe { slice::from_raw_parts(self.ptr.as_ptr(), len) } 182 } 183 } 184 185 /// Return the string as a mutable byte slice. 186 pub(crate) fn as_slice_manually_mut(&mut self, len: usize) -> &mut [u8] { 187 if len == 0 { 188 Default::default() 189 } else { 190 // SAFETY: 191 // `ptr` is not null and properly aligned. 192 // `len` is the correct length. 193 unsafe { slice::from_raw_parts_mut(self.ptr.as_ptr(), len) } 194 } 195 } 196 197 /// Return the string this pointer holds. 198 pub(crate) fn as_string_manually(&self, len: usize) -> &str { 199 let bytes = self.as_slice_manually(len); 200 // SAFETY: 201 // Being valid UTF-8 is a precondition of `StringPtr`. 202 unsafe { str::from_utf8_unchecked(bytes) } 203 } 204 205 /// Return `true` if the given strings are equal, `false` else. 206 /// 207 /// Both strings to compare must have the same length. 208 pub fn eq_manually(&self, other: &Self, len: usize) -> bool { 209 self.as_slice_manually(len) == other.as_slice_manually(len) 210 } 211}