Short (up to 65,535 bytes) immutable strings to e.g. parse tokens, implemented in Rust. These are sometimes called "German Strings", because Germans have written the paper mentioning them
1// SPDX-FileCopyrightText: Copyright (C) 2024 Roland Csaszar
2// SPDX-License-Identifier: MPL-2.0
3//
4// Project: token-string
5// File: string_ptr.rs
6// Date: 22.Nov.2024
7// =============================================================================
8//! Pointer to a string, an internal, private struct used by the heap allocated
9//! strings of [`crate::TokenString`].
10
11extern crate alloc;
12
13use core::{mem, ptr, slice, str};
14
15
16/// The string's data pointer, if allocated on the heap.
17///
18/// The data this points to must be a valid UTF-8 string.
19///
20/// This uses a trick to get the size to 64 bits. A non statically sized array
21/// needs two fields, the pointer and a length, but we already know the length
22/// and do not want another useless field holding it.
23#[repr(C)]
24#[derive(Debug)]
25pub struct StringPtr {
26 /// The pointer to the string's data, allocated on the heap.
27 ptr: ptr::NonNull<u8>,
28}
29
30const _: () = assert!(
31 mem::align_of::<StringPtr>() == mem::size_of::<u64>(),
32 "struct StringPtr is not aligned to 64 bits!"
33);
34const _: () = assert!(
35 mem::size_of::<StringPtr>() == mem::size_of::<u64>(),
36 "struct StringPtr has size != 64 bits"
37);
38
39
40/// Return a [`Layout`] suitable for a byte array to hold the string's data.
41///
42/// Panics if something does not work out creating the layout - which should
43/// never happen.
44fn array_layout<T>(len: usize) -> core::alloc::Layout {
45 core::alloc::Layout::array::<T>(len)
46 .expect("Error: constructing an array layout for TokenString failed!")
47}
48
49impl From<&[u8]> for StringPtr {
50 fn from(value: &[u8]) -> Self {
51 let ptr = if value.is_empty() {
52 ptr::NonNull::dangling()
53 } else {
54 let s = value.len();
55 let l = array_layout::<u8>(s);
56 // SAFETY:
57 // Well, we must allocate memory for the array, which is guaranteed
58 // to have a positive size `s`.
59 let raw = unsafe { alloc::alloc::alloc(l) };
60 let Some(not_null) = ptr::NonNull::new(raw) else {
61 alloc::alloc::handle_alloc_error(l)
62 };
63 // SAFETY:
64 // `not_null` is a newly allocated pointer, so it must be different
65 // from `value` and not overlapping. `s` is the size of both `value`
66 // and `not_null`.
67 unsafe {
68 ptr::copy_nonoverlapping(value.as_ptr(), not_null.as_ptr(), s);
69 }
70 not_null
71 };
72 Self { ptr }
73 }
74}
75
76// SAFETY:
77// This is an immutable pointer to a non-shared string.
78unsafe impl Send for StringPtr {}
79
80// SAFETY:
81// This is an immutable pointer to a non-shared string.
82unsafe impl Sync for StringPtr {}
83
84impl StringPtr {
85 /// Drop the [`StringPtr`], deallocate its memory.
86 ///
87 /// We cannot implement the [`Drop`] trait, as we must explicitly pass the
88 /// size of the string (the array).
89 ///
90 /// `len` must be the correct length of the string, else we get memory
91 /// corruption.
92 #[cfg_attr(test, mutants::skip)]
93 pub fn drop_manually(&mut self, len: usize) {
94 if len > 0 {
95 // SAFETY:
96 // The layout is the same as has been used when allocating.
97 unsafe {
98 alloc::alloc::dealloc(
99 self.ptr.as_ptr(),
100 array_layout::<u8>(len),
101 );
102 }
103 }
104 }
105
106 /// Clone the string by copying the array in memory.
107 ///
108 /// We cannot implement the [`Clone`] trait, as we must explicitly pass the
109 /// size of the string (the array).
110 ///
111 /// `len` must be the correct length of the string, else we get memory
112 /// corruption.
113 pub fn clone_manually(&self, len: usize) -> Self {
114 let ptr = if len == 0 {
115 ptr::NonNull::dangling()
116 } else {
117 let l = array_layout::<u8>(len);
118 // SAFETY:
119 // Well, we must allocate memory for the array, which is guaranteed
120 // to have a positive size `len`.
121 let raw = unsafe { alloc::alloc::alloc(l) };
122 let Some(not_null) = ptr::NonNull::new(raw) else {
123 alloc::alloc::handle_alloc_error(l)
124 };
125 // SAFETY:
126 // `not_null` is a newly allocated pointer, so it must be different
127 // from `self` and not overlapping. `len` is the size of both `self`
128 // and `not_null`.
129 unsafe {
130 ptr::copy_nonoverlapping(
131 self.ptr.as_ptr(),
132 not_null.as_ptr(),
133 len,
134 );
135 }
136 not_null
137 };
138 Self { ptr }
139 }
140
141 /// Copy the slice `value` into the string, starting at `index`.
142 pub(crate) const fn copy_manually(&mut self, idx: usize, value: &[u8]) {
143 // SAFETY:
144 // `not_null` is a newly allocated pointer, so it must be different
145 // from `self` and not overlapping. `len` is the size of both `self`
146 // and `not_null`.
147 unsafe {
148 ptr::copy_nonoverlapping(
149 value.as_ptr(),
150 self.ptr.as_ptr().add(idx),
151 value.len(),
152 );
153 }
154 }
155
156 /// Allocate memory for the string with a size of `len`.
157 pub(crate) fn alloc_manually(len: usize) -> Self {
158 debug_assert!(len > 0, "don't allocate an array of length 0!");
159 let l = array_layout::<u8>(len);
160 // SAFETY:
161 // Well, we must allocate memory for the array, which is guaranteed
162 // to have a positive size `s`.
163 let raw = unsafe { alloc::alloc::alloc(l) };
164 let Some(ptr) = ptr::NonNull::new(raw) else {
165 alloc::alloc::handle_alloc_error(l)
166 };
167 Self { ptr }
168 }
169
170 /// Return the string as a byte slice.
171 ///
172 /// `len` must be the correct length of the string, else we get memory
173 /// corruption.
174 pub(crate) fn as_slice_manually(&self, len: usize) -> &[u8] {
175 if len == 0 {
176 Default::default()
177 } else {
178 // SAFETY:
179 // `ptr` is not null and properly aligned.
180 // `len` is the correct length.
181 unsafe { slice::from_raw_parts(self.ptr.as_ptr(), len) }
182 }
183 }
184
185 /// Return the string as a mutable byte slice.
186 pub(crate) fn as_slice_manually_mut(&mut self, len: usize) -> &mut [u8] {
187 if len == 0 {
188 Default::default()
189 } else {
190 // SAFETY:
191 // `ptr` is not null and properly aligned.
192 // `len` is the correct length.
193 unsafe { slice::from_raw_parts_mut(self.ptr.as_ptr(), len) }
194 }
195 }
196
197 /// Return the string this pointer holds.
198 pub(crate) fn as_string_manually(&self, len: usize) -> &str {
199 let bytes = self.as_slice_manually(len);
200 // SAFETY:
201 // Being valid UTF-8 is a precondition of `StringPtr`.
202 unsafe { str::from_utf8_unchecked(bytes) }
203 }
204
205 /// Return `true` if the given strings are equal, `false` else.
206 ///
207 /// Both strings to compare must have the same length.
208 pub fn eq_manually(&self, other: &Self, len: usize) -> bool {
209 self.as_slice_manually(len) == other.as_slice_manually(len)
210 }
211}