Short (up to 65,535 bytes) immutable strings to e.g. parse tokens, implemented in Rust. These are sometimes called "German Strings", because Germans have written the paper mentioning them
···12121313mod builder;
1414mod string;
1515+mod string_ptr;
15161717+// Exports.
1618pub use builder::{Builder, BuilderIter, Collect, Concat};
1719pub use string::{
1820 EMPTY,
···2325 TokenStringIter,
2426 TokenStringIterOwn,
2527};
2626-pub(crate) use string::{MAX_LENGTH_SMALL_ADD1, PREFIX_LENGTH, StringPtr};
2828+// Internal use
2929+pub(crate) use string::{MAX_LENGTH_SMALL_ADD1, PREFIX_LENGTH};
3030+pub(crate) use string_ptr::StringPtr;
+4-198
src/string.rs
···55// File: string.rs
66// Date: 22.Nov.2024
77// =============================================================================
88-//! Crate doc.
88+//! The string type [`TokenString`].
991010extern crate alloc;
11111212use alloc::string::ToString as _;
1313use alloc::vec;
1414-use core::{borrow, cmp, error, fmt, hash, mem, ops, panic, ptr, slice, str};
1414+use core::{borrow, cmp, error, fmt, hash, mem, ops, panic, slice, str};
1515+1616+use crate::StringPtr;
151716181719#[non_exhaustive]
···140142 /// For bigger strings as [`MAX_LENGTH_SMALL`], this points to the memory
141143 /// holding the whole string.
142144 pub(crate) ptr: mem::ManuallyDrop<StringPtr>,
143143-}
144144-145145-/// The string's data pointer, if allocated on the heap.
146146-///
147147-/// The data this points to must be a valid UTF-8 string.
148148-///
149149-/// This uses a trick to get the size to 64 bits. A non statically sized array
150150-/// needs two fields, the pointer and a length, but we already know the length
151151-/// and do not want another useless field holding it.
152152-#[repr(C)]
153153-#[derive(Debug)]
154154-pub struct StringPtr {
155155- /// The pointer to the string's data, allocated on the heap.
156156- ptr: ptr::NonNull<u8>,
157157-}
158158-159159-const _: () = assert!(
160160- mem::align_of::<StringPtr>() == mem::size_of::<u64>(),
161161- "struct StringPtr is not aligned to 64 bits!"
162162-);
163163-const _: () = assert!(
164164- mem::size_of::<StringPtr>() == mem::size_of::<u64>(),
165165- "struct StringPtr has size != 64 bits"
166166-);
167167-168168-169169-/// Return a [`Layout`] suitable for a byte array to hold the string's data.
170170-///
171171-/// Panics if something does not work out creating the layout - which should
172172-/// never happen.
173173-fn array_layout<T>(len: usize) -> core::alloc::Layout {
174174- core::alloc::Layout::array::<T>(len)
175175- .expect("Error: constructing an array layout for TokenString failed!")
176176-}
177177-178178-impl From<&[u8]> for StringPtr {
179179- fn from(value: &[u8]) -> Self {
180180- let ptr = if value.is_empty() {
181181- ptr::NonNull::dangling()
182182- } else {
183183- let s = value.len();
184184- let l = array_layout::<u8>(s);
185185- // SAFETY:
186186- // Well, we must allocate memory for the array, which is guaranteed
187187- // to have a positive size `s`.
188188- let raw = unsafe { alloc::alloc::alloc(l) };
189189- let Some(not_null) = ptr::NonNull::new(raw) else {
190190- alloc::alloc::handle_alloc_error(l)
191191- };
192192- // SAFETY:
193193- // `not_null` is a newly allocated pointer, so it must be different
194194- // from `value` and not overlapping. `s` is the size of both `value`
195195- // and `not_null`.
196196- unsafe {
197197- ptr::copy_nonoverlapping(value.as_ptr(), not_null.as_ptr(), s);
198198- }
199199- not_null
200200- };
201201- Self { ptr }
202202- }
203203-}
204204-205205-// SAFETY:
206206-// This is an immutable pointer to a non-shared string.
207207-unsafe impl Send for StringPtr {}
208208-209209-// SAFETY:
210210-// This is an immutable pointer to a non-shared string.
211211-unsafe impl Sync for StringPtr {}
212212-213213-impl StringPtr {
214214- /// Drop the [`StringPtr`], deallocate its memory.
215215- ///
216216- /// We cannot implement the [`Drop`] trait, as we must explicitly pass the
217217- /// size of the string (the array).
218218- ///
219219- /// `len` must be the correct length of the string, else we get memory
220220- /// corruption.
221221- fn drop_manually(&mut self, len: usize) {
222222- if len > 0 {
223223- // SAFETY:
224224- // The layout is the same as has been used when allocating.
225225- unsafe {
226226- alloc::alloc::dealloc(
227227- self.ptr.as_ptr(),
228228- array_layout::<u8>(len),
229229- );
230230- }
231231- }
232232- }
233233-234234- /// Clone the string by copying the array in memory.
235235- ///
236236- /// We cannot implement the [`Clone`] trait, as we must explicitly pass the
237237- /// size of the string (the array).
238238- ///
239239- /// `len` must be the correct length of the string, else we get memory
240240- /// corruption.
241241- fn clone_manually(&self, len: usize) -> Self {
242242- let ptr = if len == 0 {
243243- ptr::NonNull::dangling()
244244- } else {
245245- let l = array_layout::<u8>(len);
246246- // SAFETY:
247247- // Well, we must allocate memory for the array, which is guaranteed
248248- // to have a positive size `len`.
249249- let raw = unsafe { alloc::alloc::alloc(l) };
250250- let Some(not_null) = ptr::NonNull::new(raw) else {
251251- alloc::alloc::handle_alloc_error(l)
252252- };
253253- // SAFETY:
254254- // `not_null` is a newly allocated pointer, so it must be different
255255- // from `self` and not overlapping. `len` is the size of both `self`
256256- // and `not_null`.
257257- unsafe {
258258- ptr::copy_nonoverlapping(
259259- self.ptr.as_ptr(),
260260- not_null.as_ptr(),
261261- len,
262262- );
263263- }
264264- not_null
265265- };
266266- Self { ptr }
267267- }
268268-269269- /// Copy the slice `value` into the string, starting at `index`.
270270- pub(crate) fn copy_manually(&mut self, idx: usize, value: &[u8]) {
271271- // SAFETY:
272272- // `not_null` is a newly allocated pointer, so it must be different
273273- // from `self` and not overlapping. `len` is the size of both `self`
274274- // and `not_null`.
275275- unsafe {
276276- ptr::copy_nonoverlapping(
277277- value.as_ptr(),
278278- self.ptr.as_ptr().add(idx),
279279- value.len(),
280280- );
281281- }
282282- }
283283-284284- /// Allocate memory for the string with a size of `len`.
285285- pub(crate) fn alloc_manually(len: usize) -> Self {
286286- debug_assert!(len > 0, "don't allocate an array of length 0!");
287287- let l = array_layout::<u8>(len);
288288- // SAFETY:
289289- // Well, we must allocate memory for the array, which is guaranteed
290290- // to have a positive size `s`.
291291- let raw = unsafe { alloc::alloc::alloc(l) };
292292- let Some(ptr) = ptr::NonNull::new(raw) else {
293293- alloc::alloc::handle_alloc_error(l)
294294- };
295295- Self { ptr }
296296- }
297297-298298- /// Return the string as a byte slice.
299299- ///
300300- /// `len` must be the correct length of the string, else we get memory
301301- /// corruption.
302302- pub(crate) fn as_slice_manually(&self, len: usize) -> &[u8] {
303303- if len == 0 {
304304- Default::default()
305305- } else {
306306- // SAFETY:
307307- // `ptr` is not null and properly aligned.
308308- // `len` is the correct length.
309309- unsafe { slice::from_raw_parts(self.ptr.as_ptr(), len) }
310310- }
311311- }
312312-313313- /// Return the string as a mutable byte slice.
314314- pub(crate) fn as_slice_manually_mut(&mut self, len: usize) -> &mut [u8] {
315315- if len == 0 {
316316- Default::default()
317317- } else {
318318- // SAFETY:
319319- // `ptr` is not null and properly aligned.
320320- // `len` is the correct length.
321321- unsafe { slice::from_raw_parts_mut(self.ptr.as_ptr(), len) }
322322- }
323323- }
324324-325325- /// Return the string this pointer holds.
326326- pub(crate) fn as_string_manually(&self, len: usize) -> &str {
327327- let bytes = self.as_slice_manually(len);
328328- // SAFETY:
329329- // Being valid UTF-8 is a precondition of `StringPtr`.
330330- unsafe { str::from_utf8_unchecked(bytes) }
331331- }
332332-333333- /// Return `true` if the given strings are equal, `false` else.
334334- ///
335335- /// Both strings to compare must have the same length.
336336- fn eq_manually(&self, other: &Self, len: usize) -> bool {
337337- self.as_slice_manually(len) == other.as_slice_manually(len)
338338- }
339145}
340146341147// =============================================================================
+209
src/string_ptr.rs
···11+// SPDX-FileCopyrightText: Copyright (C) 2024 Roland Csaszar
22+// SPDX-License-Identifier: MPL-2.0
33+//
44+// Project: token-string
55+// File: string_ptr.rs
66+// Date: 22.Nov.2024
77+// =============================================================================
88+//! Pointer to a string, internal struct for [`TokenString`].
99+1010+extern crate alloc;
1111+1212+use core::{mem, ptr, slice, str};
1313+1414+1515+/// The string's data pointer, if allocated on the heap.
1616+///
1717+/// The data this points to must be a valid UTF-8 string.
1818+///
1919+/// This uses a trick to get the size to 64 bits. A non statically sized array
2020+/// needs two fields, the pointer and a length, but we already know the length
2121+/// and do not want another useless field holding it.
2222+#[repr(C)]
2323+#[derive(Debug)]
2424+pub struct StringPtr {
2525+ /// The pointer to the string's data, allocated on the heap.
2626+ ptr: ptr::NonNull<u8>,
2727+}
2828+2929+const _: () = assert!(
3030+ mem::align_of::<StringPtr>() == mem::size_of::<u64>(),
3131+ "struct StringPtr is not aligned to 64 bits!"
3232+);
3333+const _: () = assert!(
3434+ mem::size_of::<StringPtr>() == mem::size_of::<u64>(),
3535+ "struct StringPtr has size != 64 bits"
3636+);
3737+3838+3939+/// Return a [`Layout`] suitable for a byte array to hold the string's data.
4040+///
4141+/// Panics if something does not work out creating the layout - which should
4242+/// never happen.
4343+fn array_layout<T>(len: usize) -> core::alloc::Layout {
4444+ core::alloc::Layout::array::<T>(len)
4545+ .expect("Error: constructing an array layout for TokenString failed!")
4646+}
4747+4848+impl From<&[u8]> for StringPtr {
4949+ fn from(value: &[u8]) -> Self {
5050+ let ptr = if value.is_empty() {
5151+ ptr::NonNull::dangling()
5252+ } else {
5353+ let s = value.len();
5454+ let l = array_layout::<u8>(s);
5555+ // SAFETY:
5656+ // Well, we must allocate memory for the array, which is guaranteed
5757+ // to have a positive size `s`.
5858+ let raw = unsafe { alloc::alloc::alloc(l) };
5959+ let Some(not_null) = ptr::NonNull::new(raw) else {
6060+ alloc::alloc::handle_alloc_error(l)
6161+ };
6262+ // SAFETY:
6363+ // `not_null` is a newly allocated pointer, so it must be different
6464+ // from `value` and not overlapping. `s` is the size of both `value`
6565+ // and `not_null`.
6666+ unsafe {
6767+ ptr::copy_nonoverlapping(value.as_ptr(), not_null.as_ptr(), s);
6868+ }
6969+ not_null
7070+ };
7171+ Self { ptr }
7272+ }
7373+}
7474+7575+// SAFETY:
7676+// This is an immutable pointer to a non-shared string.
7777+unsafe impl Send for StringPtr {}
7878+7979+// SAFETY:
8080+// This is an immutable pointer to a non-shared string.
8181+unsafe impl Sync for StringPtr {}
8282+8383+impl StringPtr {
8484+ /// Drop the [`StringPtr`], deallocate its memory.
8585+ ///
8686+ /// We cannot implement the [`Drop`] trait, as we must explicitly pass the
8787+ /// size of the string (the array).
8888+ ///
8989+ /// `len` must be the correct length of the string, else we get memory
9090+ /// corruption.
9191+ pub fn drop_manually(&mut self, len: usize) {
9292+ if len > 0 {
9393+ // SAFETY:
9494+ // The layout is the same as has been used when allocating.
9595+ unsafe {
9696+ alloc::alloc::dealloc(
9797+ self.ptr.as_ptr(),
9898+ array_layout::<u8>(len),
9999+ );
100100+ }
101101+ }
102102+ }
103103+104104+ /// Clone the string by copying the array in memory.
105105+ ///
106106+ /// We cannot implement the [`Clone`] trait, as we must explicitly pass the
107107+ /// size of the string (the array).
108108+ ///
109109+ /// `len` must be the correct length of the string, else we get memory
110110+ /// corruption.
111111+ pub fn clone_manually(&self, len: usize) -> Self {
112112+ let ptr = if len == 0 {
113113+ ptr::NonNull::dangling()
114114+ } else {
115115+ let l = array_layout::<u8>(len);
116116+ // SAFETY:
117117+ // Well, we must allocate memory for the array, which is guaranteed
118118+ // to have a positive size `len`.
119119+ let raw = unsafe { alloc::alloc::alloc(l) };
120120+ let Some(not_null) = ptr::NonNull::new(raw) else {
121121+ alloc::alloc::handle_alloc_error(l)
122122+ };
123123+ // SAFETY:
124124+ // `not_null` is a newly allocated pointer, so it must be different
125125+ // from `self` and not overlapping. `len` is the size of both `self`
126126+ // and `not_null`.
127127+ unsafe {
128128+ ptr::copy_nonoverlapping(
129129+ self.ptr.as_ptr(),
130130+ not_null.as_ptr(),
131131+ len,
132132+ );
133133+ }
134134+ not_null
135135+ };
136136+ Self { ptr }
137137+ }
138138+139139+ /// Copy the slice `value` into the string, starting at `index`.
140140+ pub(crate) fn copy_manually(&mut self, idx: usize, value: &[u8]) {
141141+ // SAFETY:
142142+ // `not_null` is a newly allocated pointer, so it must be different
143143+ // from `self` and not overlapping. `len` is the size of both `self`
144144+ // and `not_null`.
145145+ unsafe {
146146+ ptr::copy_nonoverlapping(
147147+ value.as_ptr(),
148148+ self.ptr.as_ptr().add(idx),
149149+ value.len(),
150150+ );
151151+ }
152152+ }
153153+154154+ /// Allocate memory for the string with a size of `len`.
155155+ pub(crate) fn alloc_manually(len: usize) -> Self {
156156+ debug_assert!(len > 0, "don't allocate an array of length 0!");
157157+ let l = array_layout::<u8>(len);
158158+ // SAFETY:
159159+ // Well, we must allocate memory for the array, which is guaranteed
160160+ // to have a positive size `s`.
161161+ let raw = unsafe { alloc::alloc::alloc(l) };
162162+ let Some(ptr) = ptr::NonNull::new(raw) else {
163163+ alloc::alloc::handle_alloc_error(l)
164164+ };
165165+ Self { ptr }
166166+ }
167167+168168+ /// Return the string as a byte slice.
169169+ ///
170170+ /// `len` must be the correct length of the string, else we get memory
171171+ /// corruption.
172172+ pub(crate) fn as_slice_manually(&self, len: usize) -> &[u8] {
173173+ if len == 0 {
174174+ Default::default()
175175+ } else {
176176+ // SAFETY:
177177+ // `ptr` is not null and properly aligned.
178178+ // `len` is the correct length.
179179+ unsafe { slice::from_raw_parts(self.ptr.as_ptr(), len) }
180180+ }
181181+ }
182182+183183+ /// Return the string as a mutable byte slice.
184184+ pub(crate) fn as_slice_manually_mut(&mut self, len: usize) -> &mut [u8] {
185185+ if len == 0 {
186186+ Default::default()
187187+ } else {
188188+ // SAFETY:
189189+ // `ptr` is not null and properly aligned.
190190+ // `len` is the correct length.
191191+ unsafe { slice::from_raw_parts_mut(self.ptr.as_ptr(), len) }
192192+ }
193193+ }
194194+195195+ /// Return the string this pointer holds.
196196+ pub(crate) fn as_string_manually(&self, len: usize) -> &str {
197197+ let bytes = self.as_slice_manually(len);
198198+ // SAFETY:
199199+ // Being valid UTF-8 is a precondition of `StringPtr`.
200200+ unsafe { str::from_utf8_unchecked(bytes) }
201201+ }
202202+203203+ /// Return `true` if the given strings are equal, `false` else.
204204+ ///
205205+ /// Both strings to compare must have the same length.
206206+ pub fn eq_manually(&self, other: &Self, len: usize) -> bool {
207207+ self.as_slice_manually(len) == other.as_slice_manually(len)
208208+ }
209209+}