Short (up to 65,535 bytes) immutable strings to e.g. parse tokens, implemented in Rust. These are sometimes called "German Strings", because Germans have written the paper mentioning them
1// SPDX-FileCopyrightText: Copyright (C) 2024 Roland Csaszar
2// SPDX-License-Identifier: MPL-2.0
3//
4// Project: token-string
5// File: string.rs
6// Date: 22.Nov.2024
7// =============================================================================
8//! The string type [`TokenString`].
9
10extern crate alloc;
11
12use alloc::string::ToString as _;
13use alloc::vec;
14use core::{borrow, cmp, fmt, hash, mem, ops, panic, slice, str};
15
16use crate::{StringPtr, TkStrError};
17
18/// The length of the prefix of the string, that is, the first bytes stored
19/// in the field `prefix` for comparisons.
20pub const PREFIX_LENGTH: usize = mem::size_of::<u64>() - mem::size_of::<u16>();
21
22/// Helper constant for matching intervals.
23const PREFIX_LENGTH_ADD1: usize = PREFIX_LENGTH + 1;
24
25/// The length of the non-prefix part of a "small string", 8 bytes. The content
26/// of the field `_d.small`.
27pub const SMALL_DATA_LENGTH: usize = mem::size_of::<u64>();
28
29/// The maximum length in bytes, not Unicode scalar values, of a "small" string
30/// that is saved in the struct [`TokenString`] itself and not on the heap.
31pub const MAX_LENGTH_SMALL: usize = PREFIX_LENGTH + SMALL_DATA_LENGTH;
32
33/// Helper constant for matching intervals.
34pub const MAX_LENGTH_SMALL_ADD1: usize = MAX_LENGTH_SMALL + 1;
35
36/// The maximum length in bytes, not Unicode scalar values, of a
37/// [`TokenString`].
38pub const MAX_LENGTH: usize = u16::MAX as usize;
39
40/// A string which can hold at most [`MAX_LENGTH`] bytes (not Unicode scalar
41/// values).
42///
43/// This holds valid UTF-8 encoded strings only.
44/// Strings that are short enough, which need at most [`MAX_LENGTH_SMALL`]
45/// bytes, are stored in the struct itself, bigger ones use the heap.
46///
47/// # Invariant
48///
49/// - [`TokenString`] must be a UTF-8 string (like &[`prim@str`] and
50/// [`alloc::string::String`]).
51/// - The length of a [`TokenString`] is at most [`MAX_LENGTH`] and at least 0 -
52/// the empty string.
53#[repr(C)]
54pub struct TokenString {
55 /// The length of the string.
56 ///
57 /// Maximum: [`MAX_LENGTH`].
58 pub(crate) len: u16,
59 /// The first [`PREFIX_LENGTH`] bytes of the string.
60 pub(crate) prefix: [u8; PREFIX_LENGTH],
61 /// The data (see [`Data`]).
62 ///
63 /// If the string is at most [`MAX_LENGTH_SMALL`] bytes, this holds the
64 /// other bytes of the string, else this is a pointer to the heap.
65 pub(crate) u: Data,
66}
67
68
69// Invariants: [`TokenString`] must be aligned to 64 bits and its size must be
70// 128 bits. That means that `sizeof len + prefix == 64 bit` and
71// `sizeof u == 64 bit`. So there is no padding.
72
73const _: () = assert!(
74 mem::align_of::<TokenString>() == mem::size_of::<u64>(),
75 "struct TokenString is not aligned to 64 bits!"
76);
77const _: () = assert!(
78 mem::size_of::<TokenString>() == 2 * mem::size_of::<u64>(),
79 "struct TokenString has size != 128 bits"
80);
81const _: () = assert!(
82 mem::align_of::<Data>() == mem::size_of::<u64>(),
83 "struct Data is not aligned to 64 bits!"
84);
85const _: () = assert!(
86 mem::size_of::<Data>() == mem::size_of::<u64>(),
87 "union Data has size != 64 bits"
88);
89
90// =============================================================================
91// Inner types of `TokenString`.
92
93/// This is either a pointer to the string, if the string is bigger than
94/// [`SMALL_DATA_LENGTH`] bytes, or a pointer to a string as an array of bytes.
95///
96/// See [`StringPtr`]
97#[repr(C)]
98pub union Data {
99 /// If the string is small enough (at most [`MAX_LENGTH_SMALL`]), its data
100 /// after the prefix is here.
101 pub(crate) small: [u8; SMALL_DATA_LENGTH],
102 /// For bigger strings as [`MAX_LENGTH_SMALL`], this points to the memory
103 /// holding the whole string.
104 pub(crate) ptr: mem::ManuallyDrop<StringPtr>,
105}
106
107// =============================================================================
108// `TokenString` itself
109
110/// The empty string.
111///
112/// Has a length of zero.
113pub const EMPTY: TokenString = TokenString {
114 len: 0,
115 prefix: [0_u8; PREFIX_LENGTH],
116 u: Data {
117 small: [0_u8; SMALL_DATA_LENGTH],
118 },
119};
120
121// =============================================================================
122// Traits
123
124impl TryFrom<&str> for TokenString {
125 type Error = TkStrError;
126
127 /// Create a [`TokenString`] from a &[`prim@str`].
128 ///
129 /// Return [`TkStrError::TooBig`] if the argument is greater than
130 /// [`MAX_LENGTH`].
131 ///
132 /// Memory:
133 ///
134 /// Allocates if and only if the length of `value` is bigger than
135 /// [`MAX_LENGTH_SMALL`].
136 fn try_from(value: &str) -> Result<Self, Self::Error> {
137 let bytes = value.as_bytes();
138 match value.len() {
139 | 0 => Ok(Self {
140 len: 0,
141 prefix: [0_u8; PREFIX_LENGTH],
142 u: Data {
143 small: [0_u8; SMALL_DATA_LENGTH],
144 },
145 }),
146 | 1 ..= PREFIX_LENGTH => {
147 let s = value.len();
148 let mut prefix = [0_u8; PREFIX_LENGTH];
149 prefix[.. s].copy_from_slice(&bytes[.. s]);
150 Ok(Self {
151 #[expect(
152 clippy::cast_possible_truncation,
153 reason = "Length has been checked above"
154 )]
155 len: s as u16,
156 prefix,
157 u: Data {
158 small: [0_u8; SMALL_DATA_LENGTH],
159 },
160 })
161 }
162 | PREFIX_LENGTH_ADD1 ..= MAX_LENGTH_SMALL => {
163 let s = value.len();
164 let mut prefix = [0_u8; PREFIX_LENGTH];
165 prefix.copy_from_slice(&bytes[.. PREFIX_LENGTH]);
166 let mut small = [0_u8; SMALL_DATA_LENGTH];
167 small[.. s - PREFIX_LENGTH]
168 .copy_from_slice(&bytes[PREFIX_LENGTH .. s]);
169 Ok(Self {
170 #[expect(
171 clippy::cast_possible_truncation,
172 reason = "Length has been checked above"
173 )]
174 len: s as u16,
175 prefix,
176 u: Data { small },
177 })
178 }
179 | MAX_LENGTH_SMALL_ADD1 ..= MAX_LENGTH => {
180 let ptr = StringPtr::from(bytes);
181 let u = Data {
182 ptr: mem::ManuallyDrop::new(ptr),
183 };
184 let mut prefix = [0_u8; PREFIX_LENGTH];
185 prefix.copy_from_slice(&bytes[.. PREFIX_LENGTH]);
186 Ok(Self {
187 #[expect(
188 clippy::cast_possible_truncation,
189 reason = "Length has been checked above"
190 )]
191 len: value.len() as u16,
192 prefix,
193 u,
194 })
195 }
196 | _ => Err(TkStrError::TooBig(value.len())),
197 }
198 }
199}
200
201impl TryFrom<&[u8]> for TokenString {
202 type Error = TkStrError;
203
204 /// Try to create a [`TokenString`] from the given slice.
205 ///
206 /// Return [`TkStrError::TooBig`] if the given slice is too big, greater
207 /// than [`MAX_LENGTH`].
208 /// Return [`TkStrError::UnicodeError`]
209 ///
210 /// Memory:
211 ///
212 /// Allocates if and only if the length of `value` is bigger than
213 /// [`MAX_LENGTH_SMALL`].
214 #[inline]
215 fn try_from(value: &[u8]) -> Result<Self, Self::Error> {
216 match str::from_utf8(value) {
217 | Ok(str) => Self::try_from(str),
218 | Err(utf_err) => Err(TkStrError::UnicodeError(utf_err)),
219 }
220 }
221}
222
223impl TryFrom<&[char]> for TokenString {
224 type Error = TkStrError;
225
226 /// Try to create a [`TokenString`] from the given slice.
227 ///
228 /// Return [`TkStrError::TooBig`] if the given slice is too big, greater
229 /// than [`MAX_LENGTH`].
230 ///
231 /// Memory
232 ///
233 /// Allocates and deallocates a temporary [`alloc::string::String`]
234 /// collecting the converted bytes.
235 #[inline]
236 fn try_from(value: &[char]) -> Result<Self, Self::Error> {
237 let i = value.iter();
238 Self::try_from(i.collect::<alloc::string::String>())
239 }
240}
241
242impl TryFrom<&alloc::string::String> for TokenString {
243 type Error = TkStrError;
244
245 /// Create a `TokenString` from a &[`alloc::string::String`].
246 ///
247 /// Return [`TkStrError::TooBig`] if the argument is greater than
248 /// [`MAX_LENGTH`].
249 ///
250 /// Memory:
251 ///
252 /// Allocates if and only if the length of `value` is bigger than
253 /// [`MAX_LENGTH_SMALL`].
254 #[inline]
255 fn try_from(value: &alloc::string::String) -> Result<Self, Self::Error> {
256 let str = value.as_str();
257 Self::try_from(str)
258 }
259}
260
261impl TryFrom<alloc::string::String> for TokenString {
262 type Error = TkStrError;
263
264 /// Create a [`TokenString`] from a [`alloc::string::String`].
265 ///
266 /// Return [`TkStrError::TooBig`] if the argument is greater than
267 /// [`MAX_LENGTH`].
268 ///
269 /// Memory:
270 ///
271 /// Allocates if and only if the length of `value` is bigger than
272 /// [`MAX_LENGTH_SMALL`].
273 #[inline]
274 fn try_from(value: alloc::string::String) -> Result<Self, Self::Error> {
275 // Sadly we can't use the string's data directly, as a [`String`] has a
276 // capacity which is to be known when deallocating the data.
277 // See [`String::into_raw_parts`].
278 let str = value.as_str();
279 Self::try_from(str)
280 }
281}
282
283impl Drop for TokenString {
284 #[cfg_attr(test, mutants::skip)]
285 #[inline]
286 fn drop(&mut self) {
287 if usize::from(self.len) > MAX_LENGTH_SMALL {
288 // SAFETY:
289 // We know that there is a pointer saved in the union.
290 // The whole string is being dropped, so taking a mutable
291 // reference of the pointer is legal.
292 let mut m_ptr = unsafe { mem::ManuallyDrop::take(&mut self.u.ptr) };
293 m_ptr.drop_manually(self.len.into());
294 }
295 }
296}
297
298impl Clone for TokenString {
299 /// Return a clone of the [`TokenString`].
300 ///
301 /// Memory:
302 ///
303 /// Allocates if and only if the length of `value` is bigger than
304 /// [`MAX_LENGTH_SMALL`].
305 #[inline]
306 fn clone(&self) -> Self {
307 let u = if self.len as usize > MAX_LENGTH_SMALL {
308 Data {
309 // SAFETY:
310 // We check, that there is an allocated pointer saved in the
311 // union.
312 ptr: mem::ManuallyDrop::new(unsafe {
313 self.u.ptr.clone_manually(self.len.into())
314 }),
315 }
316 } else {
317 Data {
318 // SAFETY:
319 // We check, that there is a small string in the union.
320 small: unsafe { self.u.small },
321 }
322 };
323 Self {
324 len: self.len,
325 prefix: self.prefix,
326 u,
327 }
328 }
329}
330
331impl Default for TokenString {
332 /// Return the empty string.
333 #[inline]
334 fn default() -> Self {
335 EMPTY
336 }
337}
338
339impl Eq for TokenString {}
340
341impl PartialEq for TokenString {
342 #[inline]
343 fn eq(&self, other: &Self) -> bool {
344 if self.len != other.len || self.prefix != other.prefix {
345 return false;
346 }
347
348 if self.len as usize <= MAX_LENGTH_SMALL {
349 // SAFETY:
350 // We know we have two small strings to compare.
351 unsafe { self.u.small == other.u.small }
352 } else {
353 // SAFETY:
354 // We know we have two string pointers to compare.
355 unsafe { self.u.ptr.eq_manually(&other.u.ptr, self.len.into()) }
356 }
357 }
358}
359
360impl PartialEq<[u8]> for TokenString {
361 fn eq(&self, other: &[u8]) -> bool {
362 if self.len as usize != other.len() {
363 return false;
364 }
365 let len = self.len as usize;
366 match len {
367 | 0 => true,
368 | 1 ..= PREFIX_LENGTH => self.prefix[.. len] == other[.. len],
369 | PREFIX_LENGTH_ADD1 ..= MAX_LENGTH_SMALL => {
370 // SAFETY:
371 // Use the whole memory region of self.`prefix` and
372 // `self.u.small` as a single array. This is not UB, as the
373 // whole memory `TokenString` has been allocated at once and
374 // is guaranteed to be continuous in memory. If Miri
375 // complains about this, use the flag `MIRIFLAGS="
376 // -Zmiri-tree-borrows"` to use "tree borrows" instead of
377 // "stacked borrows".
378 let bytes =
379 unsafe { slice::from_raw_parts(self.prefix.as_ptr(), len) };
380 bytes == other
381 }
382 // SAFETY:
383 // We know that the pointer actually points to allocated memory.
384 | MAX_LENGTH_SMALL_ADD1 ..= MAX_LENGTH => unsafe {
385 self.u.ptr.as_slice_manually(len) == other
386 },
387 | _ => panic!("The TokenString is bigger than MAX_LENGTH!"),
388 }
389 }
390}
391
392impl PartialEq<str> for TokenString {
393 #[inline]
394 fn eq(&self, other: &str) -> bool {
395 self == other.as_bytes()
396 }
397}
398
399impl PartialEq<alloc::string::String> for TokenString {
400 #[inline]
401 fn eq(&self, other: &alloc::string::String) -> bool {
402 self == other.as_bytes()
403 }
404}
405
406
407impl Ord for TokenString {
408 /// Compare two [`TokenString`]s byte-wise.
409 ///
410 /// This is not a sensible alphabetical comparison for anything that isn't
411 /// ASCII.
412 #[inline]
413 fn cmp(&self, other: &Self) -> cmp::Ordering {
414 let pref_ord = self.prefix.cmp(&other.prefix);
415 if pref_ord != cmp::Ordering::Equal {
416 return pref_ord;
417 }
418
419 self.suffix().cmp(other.suffix())
420 }
421}
422
423impl PartialOrd for TokenString {
424 /// Compare two [`TokenString`]s byte-wise.
425 ///
426 /// This is not a sensible alphabetical comparison for anything that isn't
427 /// ASCII.
428 #[inline]
429 fn partial_cmp(&self, other: &Self) -> Option<cmp::Ordering> {
430 Some(self.cmp(other))
431 }
432}
433
434impl fmt::Display for TokenString {
435 #[inline]
436 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
437 write!(f, "{}", self.as_str())
438 }
439}
440
441impl fmt::Debug for TokenString {
442 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
443 if self.len as usize > MAX_LENGTH_SMALL {
444 let string =
445 // SAFETY:
446 // We know that the pointer points to a string.
447 unsafe { self.u.ptr.as_string_manually(self.len.into()) };
448 // SAFETY:
449 // We know that the pointer points to a string.
450 let ptr = unsafe { &self.u.ptr };
451 f.debug_struct("TokenString")
452 .field("len", &self.len)
453 .field("prefix", &self.prefix_str())
454 .field("ptr", ptr)
455 .field("string", &string)
456 .finish()
457 } else {
458 // SAFETY:
459 // We've checked that this is a small string.
460 unsafe {
461 f.debug_struct("TokenString")
462 .field("len", &self.len)
463 .field("prefix", &self.prefix_str())
464 .field("small", &self.small_str())
465 .field("string", &self.as_str())
466 .finish()
467 }
468 }
469 }
470}
471
472impl<Idx> ops::Index<Idx> for TokenString
473where
474 Idx: slice::SliceIndex<str>,
475{
476 type Output = Idx::Output;
477
478 #[inline]
479 fn index(&self, index: Idx) -> &Self::Output {
480 self.as_str().index(index)
481 }
482}
483
484impl borrow::Borrow<str> for TokenString {
485 #[inline]
486 fn borrow(&self) -> &str {
487 self.as_str()
488 }
489}
490
491impl AsRef<str> for TokenString {
492 #[inline]
493 fn as_ref(&self) -> &str {
494 self.as_str()
495 }
496}
497
498impl hash::Hash for TokenString {
499 #[inline]
500 fn hash<H: core::hash::Hasher>(&self, state: &mut H) {
501 self.as_str().hash(state);
502 }
503}
504
505// SAFETY:
506// There can be no shared references of a `TokenString`.
507unsafe impl Send for TokenString {}
508
509// SAFETY:
510// `TokenString` is immutable.
511unsafe impl Sync for TokenString {}
512
513// =============================================================================
514// Non trait methods
515
516impl TokenString {
517 /// Return the prefix as a `&[u8]`.
518 fn prefix_str(&self) -> &[u8] {
519 let l = cmp::min(self.len as usize, PREFIX_LENGTH);
520 &self.prefix[.. l]
521 }
522
523 /// Return the suffix of a small string as a `&[u8]`.
524 ///
525 /// # Safety
526 ///
527 /// Must be called with a small string only!
528 unsafe fn small_str(&self) -> &[u8] {
529 let l = (self.len as usize).saturating_sub(PREFIX_LENGTH);
530 // SAFETY:
531 // We know that the union contains a small string.
532 unsafe { &self.u.small[.. l] }
533 }
534
535 /// Return the length of the string in bytes.
536 ///
537 /// This is the length of the string in bytes, not Unicode scalar values and
538 /// not grapheme clusters.
539 #[must_use]
540 #[inline]
541 pub const fn len(&self) -> usize {
542 self.len as usize
543 }
544
545 /// Return `true` if the string is a "small string", that is, it is saved in
546 /// the [`TokenString`] struct itself.
547 ///
548 /// If this returns `false`, the string is allocated on the heap.
549 #[must_use]
550 #[inline]
551 pub const fn is_small(&self) -> bool {
552 self.len as usize <= MAX_LENGTH_SMALL
553 }
554
555 /// Return `true`, if this is the empty string.
556 ///
557 /// Returns `false` else.
558 #[must_use]
559 #[inline]
560 pub const fn is_empty(&self) -> bool {
561 self.len == 0
562 }
563
564 /// Convert to a [`TokenString`].
565 ///
566 /// `bytes` must be valid UTF-8, use [`TokenString::try_from`] if you are
567 /// not sure that it is valid. If the given byte slice is bigger than
568 /// [`MAX_LENGTH`], this panics.
569 ///
570 /// Memory:
571 ///
572 /// Allocates if and only if the length of `bytes` is bigger than
573 /// [`MAX_LENGTH_SMALL`].
574 ///
575 /// # Panics
576 ///
577 /// Panics if `bytes` is bigger than [`MAX_LENGTH`].
578 ///
579 /// # Safety
580 ///
581 /// `bytes` must be valid UTF-8, if not, all bets are off - UB!
582 #[must_use]
583 pub unsafe fn from_bytes_unchecked(bytes: &[u8]) -> Self {
584 match bytes.len() {
585 | 0 => Self {
586 len: 0,
587 prefix: [0_u8; PREFIX_LENGTH],
588 u: Data {
589 small: [0_u8; SMALL_DATA_LENGTH],
590 },
591 },
592 | 1 ..= PREFIX_LENGTH => {
593 let s = bytes.len();
594 let mut prefix = [0_u8; PREFIX_LENGTH];
595 prefix[.. s].copy_from_slice(&bytes[.. s]);
596 Self {
597 #[expect(
598 clippy::cast_possible_truncation,
599 reason = "Length has been checked above"
600 )]
601 len: s as u16,
602 prefix,
603 u: Data {
604 small: [0_u8; SMALL_DATA_LENGTH],
605 },
606 }
607 }
608 | PREFIX_LENGTH_ADD1 ..= MAX_LENGTH_SMALL => {
609 let s = bytes.len();
610 let mut prefix = [0_u8; PREFIX_LENGTH];
611 prefix.copy_from_slice(&bytes[.. PREFIX_LENGTH]);
612 let mut small = [0_u8; SMALL_DATA_LENGTH];
613 small[.. s - PREFIX_LENGTH]
614 .copy_from_slice(&bytes[PREFIX_LENGTH .. s]);
615 Self {
616 #[expect(
617 clippy::cast_possible_truncation,
618 reason = "Length has been checked above"
619 )]
620 len: s as u16,
621 prefix,
622 u: Data { small },
623 }
624 }
625 | MAX_LENGTH_SMALL_ADD1 ..= MAX_LENGTH => {
626 let ptr = StringPtr::from(bytes);
627 let u = Data {
628 ptr: mem::ManuallyDrop::new(ptr),
629 };
630 let mut prefix = [0_u8; PREFIX_LENGTH];
631 prefix.copy_from_slice(&bytes[.. PREFIX_LENGTH]);
632 Self {
633 #[expect(
634 clippy::cast_possible_truncation,
635 reason = "Length has been checked above"
636 )]
637 len: bytes.len() as u16,
638 prefix,
639 u,
640 }
641 }
642 | _ => panic!(
643 "This byte slice is too big for a TokenString, {} > \
644 {MAX_LENGTH}",
645 bytes.len()
646 ),
647 }
648 }
649
650 /// Convert to a [`TokenString`].
651 ///
652 /// If the given string `s` is bigger than [`MAX_LENGTH`], this panics. Use
653 /// [`TokenString::try_from`] for a function that does not panic. The string
654 /// `s` must be valid UTF-8 too, but it has already been UB if it isn't.
655 ///
656 /// Memory:
657 ///
658 /// Allocates if and only if the length of `s` is bigger than
659 /// [`MAX_LENGTH_SMALL`].
660 ///
661 /// # Panics
662 ///
663 /// Panics if `s` is bigger than [`MAX_LENGTH`].
664 #[must_use]
665 #[inline]
666 pub fn from_str_unchecked(s: &str) -> Self {
667 // SAFETY:
668 // The unsafe part of `from_bytes_unchecked` is the possibility of the
669 // byte slice not being valid UTF-8. We are processing an UTF-8 string
670 // here.
671 unsafe { Self::from_bytes_unchecked(s.as_bytes()) }
672 }
673
674 /// Convert to a [`TokenString`].
675 ///
676 /// If the given string `s` is bigger than [`MAX_LENGTH`], this panics. Use
677 /// [`TokenString::try_from`] for a function that does not panic. The string
678 /// `s` must be valid UTF-8 too, but it has already been UB if it isn't.
679 ///
680 /// Memory:
681 ///
682 /// Allocates if and only if the length of `s` is bigger than
683 /// [`MAX_LENGTH_SMALL`].
684 ///
685 /// # Panics
686 ///
687 /// Panics if `s` is bigger than [`MAX_LENGTH`].
688 #[must_use]
689 #[inline]
690 pub fn from_string_unchecked(s: &alloc::string::String) -> Self {
691 // SAFETY:
692 // The unsafe part of `from_bytes_unchecked` is the possibility of the
693 // byte slice not being valid UTF-8. We are processing an UTF-8 string
694 // here.
695 unsafe { Self::from_bytes_unchecked(s.as_bytes()) }
696 }
697
698 /// Return the string as a &[`prim@str`].
699 #[must_use]
700 #[inline]
701 pub fn as_str(&self) -> &str {
702 if self.len == 0 {
703 ""
704 } else if self.len as usize > MAX_LENGTH_SMALL {
705 // SAFETY:
706 // We know, that in the union must be a valid pointer.
707 unsafe { self.u.ptr.as_string_manually(self.len.into()) }
708 } else {
709 // SAFETY:
710 // Use the whole memory region of self.`prefix` and `self.u.small`
711 // as a single array. This is not UB, as the whole memory
712 // `TokenString` has been allocated at once and is guaranteed to be
713 // continuous in memory. If Miri complains about this, use the
714 // flag `MIRIFLAGS="-Zmiri-tree-borrows"` to use "tree borrows"
715 // instead of "stacked borrows".
716 let bytes = unsafe {
717 slice::from_raw_parts(self.prefix.as_ptr(), self.len.into())
718 };
719 // SAFETY:
720 // The precondition of `TokenString` is that the string is a valid
721 // UTF-8 byte sequence.
722 unsafe { str::from_utf8_unchecked(bytes) }
723 }
724 }
725
726 /// Return the string as a byte slice.
727 #[must_use]
728 #[inline]
729 pub fn as_bytes(&self) -> &[u8] {
730 if self.len == 0 {
731 Default::default()
732 } else if self.len as usize > MAX_LENGTH_SMALL {
733 // SAFETY:
734 // We know, that in the union must be a valid pointer.
735 unsafe { self.u.ptr.as_slice_manually(self.len.into()) }
736 } else {
737 // SAFETY:
738 // Use the whole memory region of self.`prefix` and `self.u.small`
739 // as a single array. This is not UB, as the whole memory
740 // `TokenString` has been allocated at once and is guaranteed to be
741 // continuous in memory. If Miri complains about this, use the
742 // flag `MIRIFLAGS="-Zmiri-tree-borrows"` to use "tree borrows"
743 // instead of "stacked borrows".
744 unsafe {
745 slice::from_raw_parts(self.prefix.as_ptr(), self.len.into())
746 }
747 }
748 }
749
750 /// Return the string as a new [`alloc::string::String`].
751 ///
752 /// Memory:
753 ///
754 /// Allocates a new [`alloc::string::String`].
755 #[must_use]
756 #[inline]
757 pub fn as_string(&self) -> alloc::string::String {
758 self.to_string()
759 }
760
761 /// Return the string as a new vector of [`char`]s.
762 ///
763 /// Memory:
764 ///
765 /// Allocates a new [`vec::Vec`].
766 #[must_use]
767 #[inline]
768 pub fn as_chars(&self) -> vec::Vec<char> {
769 self.as_str().chars().collect()
770 }
771
772 /// Return the part of the string which is not stored in `self.prefix`.
773 ///
774 /// If the string is <= [`PREFIX_LENGTH`], the empty slice is returned.
775 fn suffix(&self) -> &[u8] {
776 match self.len as usize {
777 | 0 ..= PREFIX_LENGTH => Default::default(),
778 | PREFIX_LENGTH_ADD1 ..= MAX_LENGTH_SMALL =>
779 // SAFETY:
780 // We checked and know that this is a small string.
781 unsafe { &self.u.small },
782 | MAX_LENGTH_SMALL_ADD1 ..= MAX_LENGTH =>
783 // SAFETY:
784 // We checked and know that this string is allocated on the heap.
785 unsafe {
786 &self.u.ptr.as_slice_manually(self.len.into())[PREFIX_LENGTH ..]
787 },
788 | _ => panic!(
789 "Error: this TokenString is bigger than \
790 TokenString::MAX_LENGTH!"
791 ),
792 }
793 }
794
795 /// Return the byte at index `idx`, check bounds.
796 ///
797 /// Returns [`TkStrError::OutOfBounds`] if the index is bigger than the
798 /// string's length.
799 ///
800 /// # Errors
801 /// [`TkStrError::OutOfBounds`] if `idx` is bigger than the string's length.
802 #[inline]
803 pub fn get(&self, idx: u16) -> Result<u8, TkStrError> {
804 if idx >= self.len {
805 return Err(TkStrError::OutOfBounds(idx as usize));
806 }
807 // SAFETY:
808 // We check above that the index is in bounds.
809 unsafe { Ok(*self.as_bytes().get_unchecked(idx as usize)) }
810 }
811
812 /// Return the byte at index `idx`, don't check bounds.
813 ///
814 /// Panics if the index is bigger than the
815 /// string's length.
816 ///
817 /// # Panics
818 ///
819 /// if `idx` is bigger than the string's length.
820 #[must_use]
821 #[inline]
822 pub fn get_unchecked(&self, idx: u16) -> u8 {
823 assert!((idx < self.len), "index {idx} out of bounds");
824 // SAFETY:
825 // We check above that the index is in bounds.
826 unsafe { *self.as_bytes().get_unchecked(idx as usize) }
827 }
828
829 /// Return an iterator over the `[char]`s of a string.
830 ///
831 /// That is, an iterator over the Unicode scalar values of the
832 /// `TokenString`.
833 #[inline]
834 pub fn chars(&'_ self) -> str::Chars<'_> {
835 self.as_str().chars()
836 }
837
838 /// Get a reference iterator.
839 #[must_use]
840 #[inline]
841 pub fn iter(&self) -> TokenStringIter<'_> {
842 <&Self as IntoIterator>::into_iter(self)
843 }
844
845 /// Return `true`, if the first byte is an uppercase ASCII character.
846 #[must_use]
847 #[inline]
848 pub const fn starts_ascii_uppercase(&self) -> bool {
849 self.prefix[0].is_ascii_uppercase()
850 }
851
852 /// Return `true`, if the first byte is an lowercase ASCII character.
853 #[must_use]
854 #[inline]
855 pub const fn starts_ascii_lowercase(&self) -> bool {
856 self.prefix[0].is_ascii_lowercase()
857 }
858
859 /// Return `true`, if the string contains only ASCII characters.
860 #[must_use]
861 #[inline]
862 pub fn is_ascii(&self) -> bool {
863 self.as_bytes().is_ascii()
864 }
865
866 /// Return `true`, if the string starts with `needle`.
867 ///
868 /// Returns `true` too if the string is `needle`.
869 #[must_use]
870 #[inline]
871 pub fn starts_with(&self, needle: &Self) -> bool {
872 self.as_bytes().starts_with(needle.as_bytes())
873 }
874
875 /// Return `true`, if the string starts with `needle`.
876 ///
877 /// Returns `true` too if the string is `needle`.
878 #[must_use]
879 #[inline]
880 pub fn starts_with_bytes(&self, needle: &[u8]) -> bool {
881 self.as_bytes().starts_with(needle)
882 }
883
884 /// Return `true`, if the string starts with `needle`.
885 ///
886 /// Returns `true` too if the string is `needle`.
887 #[must_use]
888 #[inline]
889 pub fn starts_with_str(&self, needle: &str) -> bool {
890 self.as_str().starts_with(needle)
891 }
892
893 /// Return `true`, if the string ends with `needle`.
894 ///
895 /// Returns `true` too if the string is `needle`.
896 #[must_use]
897 #[inline]
898 pub fn ends_with(&self, needle: &Self) -> bool {
899 self.as_bytes().ends_with(needle.as_bytes())
900 }
901
902 /// Return `true`, if the string ends with `needle`.
903 ///
904 /// Returns `true` too if the string is `needle`.
905 #[must_use]
906 #[inline]
907 pub fn ends_with_bytes(&self, needle: &[u8]) -> bool {
908 self.as_bytes().ends_with(needle)
909 }
910
911 /// Return `true`, if the string ends with `needle`.
912 ///
913 /// Returns `true` too if the string is `needle`.
914 #[must_use]
915 #[inline]
916 pub fn ends_with_str(&self, needle: &str) -> bool {
917 self.as_str().ends_with(needle)
918 }
919
920 /// Map the given function `f` over the bytes of the string, mutating it.
921 fn map_bytes_mut(&mut self, f: fn(&mut [u8]) -> ()) {
922 if self.len as usize > MAX_LENGTH_SMALL {
923 // SAFETY:
924 // We check, that we actually have a valid pointer.
925 unsafe {
926 f((*self.u.ptr).as_slice_manually_mut(self.len as usize));
927 }
928 } else {
929 // SAFETY:
930 // The two arrays, `prefix` and `small`, are guaranteed to be
931 // continuous in memory.
932 unsafe {
933 f(slice::from_raw_parts_mut(
934 self.prefix.as_mut_ptr(),
935 self.len as usize,
936 ));
937 }
938 }
939 }
940
941 /// Return a new string with all uppercase ASCII characters changed to
942 /// lowercase.
943 #[must_use]
944 #[inline]
945 pub fn to_ascii_lowercase(&self) -> Self {
946 let mut ret_val = self.clone();
947 ret_val.map_bytes_mut(<[u8]>::make_ascii_lowercase);
948 ret_val
949 }
950
951 /// Return a new string with all lowercase ASCII characters changed to
952 /// uppercase.
953 #[must_use]
954 #[inline]
955 pub fn to_ascii_uppercase(&self) -> Self {
956 let mut ret_val = self.clone();
957 ret_val.map_bytes_mut(<[u8]>::make_ascii_uppercase);
958 ret_val
959 }
960
961 /// Return a new string with all ASCII whitespace removed from the start and
962 /// end.
963 #[must_use]
964 #[inline]
965 pub fn trim_ascii(&self) -> Self {
966 // SAFETY:
967 // We copy the current string, so the invariants should hold for the
968 // copy too. The string does not get longer, so cannot be greater than
969 // `MAX_LENGTH`.
970 unsafe { Self::from_bytes_unchecked(self.as_bytes().trim_ascii()) }
971 }
972
973 /// Return a new string with all ASCII whitespace removed from the start.
974 #[must_use]
975 #[inline]
976 pub fn trim_ascii_start(&self) -> Self {
977 // SAFETY:
978 // We copy the current string, so the invariants should hold for the
979 // copy too:
980 // - The string does not get longer, so cannot be greater than
981 // `MAX_LENGTH`.
982 // - if the string is valid UTF-8, removing ASCII characters does not
983 // change that.
984 unsafe {
985 Self::from_bytes_unchecked(self.as_bytes().trim_ascii_start())
986 }
987 }
988
989 /// Return a new string with all ASCII whitespace removed from the end.
990 #[must_use]
991 #[inline]
992 pub fn trim_ascii_end(&self) -> Self {
993 // SAFETY:
994 // We copy the current string, so the invariants should hold for the
995 // copy too:
996 // - The string does not get longer, so cannot be greater than
997 // `MAX_LENGTH`.
998 // - if the string is valid UTF-8, removing ASCII characters does not
999 // change that.
1000 unsafe { Self::from_bytes_unchecked(self.as_bytes().trim_ascii_end()) }
1001 }
1002
1003 /// Return a new string with `prefix` removed from the start.
1004 #[cfg(feature = "pattern")]
1005 #[doc(cfg(pattern))]
1006 #[inline]
1007 pub fn strip_prefix<P: str::pattern::Pattern>(
1008 &self,
1009 prefix: P,
1010 ) -> Option<Self> {
1011 self.as_str()
1012 .strip_prefix(prefix)
1013 // stripping a prefix should not make the string invalid UTF-8, and
1014 // does shorten it.
1015 .map(Self::from_str_unchecked)
1016 }
1017
1018 /// Return a new string with `suffix` removed from the end.
1019 #[cfg(feature = "pattern")]
1020 #[doc(cfg(pattern))]
1021 #[inline]
1022 pub fn strip_suffix<P>(&self, suffix: P) -> Option<Self>
1023 where
1024 P: str::pattern::Pattern,
1025 for<'a> P::Searcher<'a>: str::pattern::ReverseSearcher<'a>,
1026 {
1027 self.as_str()
1028 .strip_suffix(suffix)
1029 // stripping a suffix should not make the string invalid UTF-8, and
1030 // does shorten it.
1031 .map(Self::from_str_unchecked)
1032 }
1033
1034 /// Return `true` if the string contains the pattern `pat`.
1035 ///
1036 /// Returns `false` else.
1037 ///
1038 /// The feature
1039 #[cfg(feature = "pattern")]
1040 #[doc(cfg(pattern))]
1041 #[inline]
1042 pub fn contains<P: str::pattern::Pattern>(&self, pat: P) -> bool {
1043 self.as_str().contains(pat)
1044 }
1045}
1046
1047
1048//==============================================================================
1049// Iterating by reference
1050
1051/// Iterator struct for a `&TokenString`.
1052///
1053/// Iterator items are single bytes, `u8`.
1054pub struct TokenStringIter<'a> {
1055 /// The [`TokenString`] to iterate over.
1056 string: &'a TokenString,
1057 /// The current index in the string.
1058 idx: usize,
1059}
1060
1061impl<'a> TokenStringIter<'a> {
1062 /// Generate a reference iterator for the given [`TokenString`].
1063 #[must_use]
1064 #[inline]
1065 pub const fn new(s: &'a TokenString) -> Self {
1066 TokenStringIter { string: s, idx: 0 }
1067 }
1068}
1069
1070impl Iterator for TokenStringIter<'_> {
1071 type Item = u8;
1072
1073 /// Return either the next byte, [`u8`], or [`None`] if we are at the end of
1074 /// the string.
1075 fn next(&mut self) -> Option<Self::Item> {
1076 debug_assert!(
1077 self.idx <= self.string.len.into(),
1078 "The iterator index '{0}' is greater than the string length '{1}'!",
1079 self.idx,
1080 self.string.len
1081 );
1082 if self.idx == self.string.len.into() {
1083 None
1084 } else if self.string.len as usize > MAX_LENGTH_SMALL {
1085 self.idx += 1;
1086 Some(self.string.as_bytes()[self.idx - 1])
1087 } else {
1088 self.idx += 1;
1089 Some(
1090 // SAFETY:
1091 // The two arrays, `prefix` and `u.small`, are guaranteed to be
1092 // consecutive in memory and allocated at the same time.
1093 unsafe {
1094 slice::from_raw_parts(
1095 self.string.prefix.as_ptr(),
1096 self.string.len as usize,
1097 )
1098 }[self.idx - 1],
1099 )
1100 }
1101 }
1102}
1103
1104impl<'a> IntoIterator for &'a TokenString {
1105 type IntoIter = TokenStringIter<'a>;
1106 type Item = u8;
1107
1108 #[inline]
1109 fn into_iter(self) -> Self::IntoIter {
1110 Self::IntoIter::new(self)
1111 }
1112}
1113
1114//==============================================================================
1115// Iterating an owned `TokenString`.
1116
1117/// Iterator struct for an owned [`TokenString`].
1118///
1119/// Iterator items are single bytes, [`u8`].
1120pub struct TokenStringIterOwn {
1121 /// The [`TokenString`] to iterate over.
1122 string: TokenString,
1123 /// The current index in the string.
1124 idx: usize,
1125}
1126
1127impl TokenStringIterOwn {
1128 /// Generate an owned iterator for the given [`TokenString`].
1129 #[must_use]
1130 #[inline]
1131 pub const fn new(s: TokenString) -> Self {
1132 Self { string: s, idx: 0 }
1133 }
1134}
1135
1136impl Iterator for TokenStringIterOwn {
1137 type Item = u8;
1138
1139 /// Return either the next byte, [`u8`], or [`None`] if we are at the end of
1140 /// the string.
1141 fn next(&mut self) -> Option<Self::Item> {
1142 debug_assert!(
1143 self.idx <= self.string.len.into(),
1144 "The iterator index '{0}' is greater than the string length '{1}'!",
1145 self.idx,
1146 self.string.len
1147 );
1148 if self.idx == self.string.len.into() {
1149 None
1150 } else if self.string.len as usize > MAX_LENGTH_SMALL {
1151 self.idx += 1;
1152 Some(self.string.as_bytes()[self.idx - 1])
1153 } else {
1154 self.idx += 1;
1155 Some(
1156 // SAFETY:
1157 // The two arrays, `prefix` and `u.small`, are guaranteed to be
1158 // consecutive in memory and allocated at the same time.
1159 unsafe {
1160 slice::from_raw_parts(
1161 self.string.prefix.as_ptr(),
1162 self.string.len as usize,
1163 )
1164 }[self.idx - 1],
1165 )
1166 }
1167 }
1168}
1169
1170impl IntoIterator for TokenString {
1171 type IntoIter = TokenStringIterOwn;
1172 type Item = u8;
1173
1174 #[inline]
1175 fn into_iter(self) -> Self::IntoIter {
1176 Self::IntoIter::new(self)
1177 }
1178}
1179
1180
1181// =============================================================================
1182// Tests
1183// =============================================================================
1184
1185#[cfg(test)]
1186mod prefix {
1187 extern crate std;
1188 use assert2::{check, let_assert};
1189
1190 use crate::TokenString;
1191
1192
1193 #[test]
1194 fn empty_is_empty() {
1195 let_assert!(Ok(res) = TokenString::try_from(""));
1196 check!(res.prefix[0] == 0);
1197 check!(res.len == 0);
1198 check!(res.is_small() == true);
1199 }
1200
1201 #[test]
1202 fn clone_empty() {
1203 let_assert!(Ok(s1) = TokenString::try_from(""));
1204 let res = s1.clone();
1205 check!(res.prefix[0] == s1.prefix[0]);
1206 check!(res.len == s1.len);
1207 check!(res.is_small() == true);
1208 }
1209
1210 #[test]
1211 fn try_from_str() {
1212 let_assert!(Ok(res) = TokenString::try_from("123456"));
1213 check!(&res.prefix[0 .. 6] == b"123456");
1214 check!(res.len == 6);
1215 check!(res.is_small() == true);
1216 }
1217
1218 #[test]
1219 fn clone() {
1220 let_assert!(Ok(s1) = TokenString::try_from("123456"));
1221 let res = s1.clone();
1222 check!(&res.prefix[0 .. 6] == &s1.prefix[0 .. 6]);
1223 check!(res.len == s1.len);
1224 check!(res.is_small() == true);
1225 }
1226
1227 #[test]
1228 fn try_from_bytes() {
1229 let s1: &[u8] = b"123456";
1230 let_assert!(Ok(res) = TokenString::try_from(s1));
1231 check!(&res.prefix[0 .. 6] == b"123456");
1232 check!(res.len == 6);
1233 check!(res.is_small() == true);
1234 }
1235
1236 #[test]
1237 fn try_from_chars() {
1238 #[expect(
1239 clippy::std_instead_of_alloc,
1240 reason = "We are testing, this needs std"
1241 )]
1242 let s1: std::vec::Vec<char> = "123456".chars().collect();
1243 let_assert!(Ok(res) = TokenString::try_from(s1.as_slice()));
1244 check!(&res.prefix[0 .. 6] == b"123456");
1245 check!(res.len == 6);
1246 check!(res.is_small() == true);
1247 }
1248
1249 #[test]
1250 fn try_from_string() {
1251 #[expect(
1252 clippy::std_instead_of_alloc,
1253 reason = "We are testing, this needs std"
1254 )]
1255 let s1: std::string::String = "123456".into();
1256 let_assert!(Ok(res) = TokenString::try_from(s1));
1257 check!(&res.prefix[0 .. 6] == b"123456");
1258 check!(res.len == 6);
1259 check!(res.is_small() == true);
1260 }
1261
1262 #[test]
1263 fn try_from_stringref() {
1264 #[expect(
1265 clippy::std_instead_of_alloc,
1266 reason = "We are testing, this needs std"
1267 )]
1268 let s1: std::string::String = "123456".into();
1269 let_assert!(Ok(res) = TokenString::try_from(&s1));
1270 check!(&res.prefix[0 .. 6] == b"123456");
1271 check!(res.len == 6);
1272 check!(res.is_small() == true);
1273 }
1274
1275 #[test]
1276 fn from_str_unchecked() {
1277 let res = TokenString::from_str_unchecked("123456");
1278 check!(&res.prefix[0 .. 6] == b"123456");
1279 check!(res.len == 6);
1280 }
1281
1282 #[test]
1283 fn from_bytes_unchecked() {
1284 let s1: &[u8] = b"123456";
1285 // SAFETY:
1286 // We know that the string is valid UTF-8.
1287 let res = unsafe { TokenString::from_bytes_unchecked(s1) };
1288 check!(&res.prefix[0 .. 6] == b"123456");
1289 check!(res.len == 6);
1290 check!(res.is_small() == true);
1291 }
1292
1293 #[test]
1294 fn from_stringref_unchecked() {
1295 #[expect(
1296 clippy::std_instead_of_alloc,
1297 reason = "We are testing, this needs std"
1298 )]
1299 let s1: std::string::String = "123456".into();
1300 let res = TokenString::from_string_unchecked(&s1);
1301 check!(&res.prefix[0 .. 6] == b"123456");
1302 check!(res.len == 6);
1303 check!(res.is_small() == true);
1304 }
1305}
1306
1307#[cfg(test)]
1308mod small {
1309 extern crate std;
1310 use assert2::{check, let_assert};
1311
1312 use crate::TokenString;
1313
1314
1315 #[test]
1316 fn try_from_str() {
1317 let_assert!(Ok(res) = TokenString::try_from("1234567"));
1318 check!(&res.prefix[0 .. 6] == b"123456");
1319 // SAFETY:
1320 // We know there is a small string in the union.
1321 check!(unsafe { res.u.small[0] } == b'7');
1322 check!(res.len == 7);
1323 check!(res.is_small() == true);
1324 }
1325
1326 #[test]
1327 fn clone() {
1328 let_assert!(Ok(s1) = TokenString::try_from("1234567"));
1329 let res = s1.clone();
1330 check!(&res.prefix[0 .. 6] == &s1.prefix[0 .. 6]);
1331 // SAFETY:
1332 // We know there is a small string in the union.
1333 check!(unsafe { res.u.small[0] == s1.u.small[0] });
1334 check!(res.len == s1.len);
1335 check!(res.is_small() == true);
1336 }
1337
1338 #[test]
1339 fn try_from_bytes() {
1340 let s1: &[u8] = b"1234567";
1341 let_assert!(Ok(res) = TokenString::try_from(s1));
1342 check!(&res.prefix[0 .. 6] == b"123456");
1343 // SAFETY:
1344 // We know there is a small string in the union.
1345 check!(unsafe { res.u.small[0] } == b'7');
1346 check!(res.len == 7);
1347 check!(res.is_small() == true);
1348 }
1349
1350 #[test]
1351 fn try_from_chars() {
1352 #[expect(
1353 clippy::std_instead_of_alloc,
1354 reason = "We are testing, this needs std"
1355 )]
1356 let s1: std::vec::Vec<char> = "1234567".chars().collect();
1357 let_assert!(Ok(res) = TokenString::try_from(s1.as_slice()));
1358 check!(&res.prefix[0 .. 6] == b"123456");
1359 // SAFETY:
1360 // We know there is a small string in the union.
1361 check!(unsafe { res.u.small[0] } == b'7');
1362 check!(res.len == 7);
1363 check!(res.is_small() == true);
1364 }
1365
1366 #[test]
1367 fn try_from_string() {
1368 #[expect(
1369 clippy::std_instead_of_alloc,
1370 reason = "We are testing, this needs std"
1371 )]
1372 let s1: std::string::String = "1234567".into();
1373 let_assert!(Ok(res) = TokenString::try_from(s1));
1374 check!(&res.prefix[0 .. 6] == b"123456");
1375 // SAFETY:
1376 // We know there is a small string in the union.
1377 check!(unsafe { res.u.small[0] } == b'7');
1378 check!(res.len == 7);
1379 check!(res.is_small() == true);
1380 }
1381
1382 #[test]
1383 fn try_from_stringref() {
1384 #[expect(
1385 clippy::std_instead_of_alloc,
1386 reason = "We are testing, this needs std"
1387 )]
1388 let s1: std::string::String = "1234567".into();
1389 let_assert!(Ok(res) = TokenString::try_from(&s1));
1390 check!(&res.prefix[0 .. 6] == b"123456");
1391 // SAFETY:
1392 // We know there is a small string in the union.
1393 check!(unsafe { res.u.small[0] } == b'7');
1394 check!(res.len == 7);
1395 check!(res.is_small() == true);
1396 }
1397
1398 #[test]
1399 fn from_str_unchecked() {
1400 let res = TokenString::from_str_unchecked("1234567");
1401 check!(&res.prefix[0 .. 6] == b"123456");
1402 // SAFETY:
1403 // We know there is a small string in the union.
1404 check!(unsafe { res.u.small[0] } == b'7');
1405 check!(res.len == 7);
1406 check!(res.is_small() == true);
1407 }
1408
1409 #[test]
1410 fn from_bytes_unchecked() {
1411 let s1: &[u8] = b"1234567";
1412 // SAFETY:
1413 // We know that the string is valid UTF-8.
1414 let res = unsafe { TokenString::from_bytes_unchecked(s1) };
1415 check!(&res.prefix[0 .. 6] == b"123456");
1416 // SAFETY:
1417 // We know there is a small string in the union.
1418 check!(unsafe { res.u.small[0] } == b'7');
1419 check!(res.len == 7);
1420 check!(res.is_small() == true);
1421 }
1422
1423 #[test]
1424 fn from_stringref_unchecked() {
1425 #[expect(
1426 clippy::std_instead_of_alloc,
1427 reason = "We are testing, this needs std"
1428 )]
1429 let s1: std::string::String = "1234567".into();
1430 let res = TokenString::from_string_unchecked(&s1);
1431 check!(&res.prefix[0 .. 6] == b"123456");
1432 // SAFETY:
1433 // We know there is a small string in the union.
1434 check!(unsafe { res.u.small[0] } == b'7');
1435 check!(res.len == 7);
1436 check!(res.is_small() == true);
1437 }
1438}
1439
1440#[cfg(test)]
1441mod heap {
1442 extern crate std;
1443 use assert2::{check, let_assert};
1444
1445 use crate::TokenString;
1446
1447
1448 #[test]
1449 fn try_from_str() {
1450 let_assert!(Ok(res) = TokenString::try_from("1234567890ABCDE"));
1451 check!(&res.prefix[0 .. 6] == b"123456");
1452 check!(
1453 // SAFETY:
1454 // We know there is a large string in the union.
1455 unsafe { &res.u.ptr.as_slice_manually(res.len as usize)[.. 15] }
1456 == b"1234567890ABCDE"
1457 );
1458 check!(res.len == 15);
1459 check!(res.is_small() == false);
1460 }
1461
1462
1463 #[test]
1464 fn clone() {
1465 let_assert!(Ok(s1) = TokenString::try_from("1234567890ABCDE"));
1466 let res = s1.clone();
1467 check!(&res.prefix[0 .. 6] == &s1.prefix[0 .. 6]);
1468 check!(
1469 // SAFETY:
1470 // We know there is a large string in the union.
1471 unsafe {
1472 res.u.ptr.as_slice_manually(res.len as usize)[.. 15]
1473 == s1.u.ptr.as_slice_manually(res.len as usize)[.. 15]
1474 }
1475 );
1476 check!(res.len == s1.len);
1477 check!(res.is_small() == false);
1478 }
1479
1480 #[test]
1481 fn try_from_bytes() {
1482 let s1: &[u8] = b"1234567890ABCDE";
1483 let_assert!(Ok(res) = TokenString::try_from(s1));
1484 check!(&res.prefix[0 .. 6] == b"123456");
1485 check!(
1486 // SAFETY:
1487 // We know there is a large string in the union.
1488 unsafe { &res.u.ptr.as_slice_manually(res.len as usize)[.. 15] }
1489 == b"1234567890ABCDE"
1490 );
1491 check!(res.len == 15);
1492 check!(res.is_small() == false);
1493 }
1494
1495 #[test]
1496 fn try_from_chars() {
1497 #[expect(
1498 clippy::std_instead_of_alloc,
1499 reason = "We are testing, this needs std"
1500 )]
1501 let s1: std::vec::Vec<char> = "1234567890ABCDE".chars().collect();
1502 let_assert!(Ok(res) = TokenString::try_from(s1.as_slice()));
1503 check!(&res.prefix[0 .. 6] == b"123456");
1504 check!(
1505 // SAFETY:
1506 // We know there is a large string in the union.
1507 unsafe { &res.u.ptr.as_slice_manually(res.len as usize)[.. 15] }
1508 == b"1234567890ABCDE"
1509 );
1510 check!(res.len == 15);
1511 check!(res.is_small() == false);
1512 }
1513
1514 #[test]
1515 fn try_from_string() {
1516 #[expect(
1517 clippy::std_instead_of_alloc,
1518 reason = "We are testing, this needs std"
1519 )]
1520 let s1: std::string::String = "1234567890ABCDE".into();
1521 let_assert!(Ok(res) = TokenString::try_from(s1));
1522 check!(&res.prefix[0 .. 6] == b"123456");
1523 check!(
1524 // SAFETY:
1525 // We know there is a large string in the union.
1526 unsafe { &res.u.ptr.as_slice_manually(res.len as usize)[.. 15] }
1527 == b"1234567890ABCDE"
1528 );
1529 check!(res.len == 15);
1530 check!(res.is_small() == false);
1531 }
1532
1533 #[test]
1534 fn try_from_stringref() {
1535 #[expect(
1536 clippy::std_instead_of_alloc,
1537 reason = "We are testing, this needs std"
1538 )]
1539 let s1: std::string::String = "1234567890ABCDE".into();
1540 let_assert!(Ok(res) = TokenString::try_from(&s1));
1541 check!(&res.prefix[0 .. 6] == b"123456");
1542 check!(
1543 // SAFETY:
1544 // We know there is a large string in the union.
1545 unsafe { &res.u.ptr.as_slice_manually(res.len as usize)[.. 15] }
1546 == b"1234567890ABCDE"
1547 );
1548 check!(res.len == 15);
1549 check!(res.is_small() == false);
1550 }
1551
1552 #[test]
1553 fn from_str_unchecked() {
1554 let res = TokenString::from_str_unchecked("1234567890ABCDE");
1555 check!(&res.prefix[0 .. 6] == b"123456");
1556 check!(
1557 // SAFETY:
1558 // We know there is a large string in the union.
1559 unsafe { &res.u.ptr.as_slice_manually(res.len as usize)[.. 15] }
1560 == b"1234567890ABCDE"
1561 );
1562 check!(res.len == 15);
1563 check!(res.is_small() == false);
1564 }
1565
1566 #[test]
1567 fn from_bytes_unchecked() {
1568 let s1: &[u8] = b"1234567890ABCDE";
1569 // SAFETY:
1570 // We know that the string is valid UTF-8.
1571 let res = unsafe { TokenString::from_bytes_unchecked(s1) };
1572 check!(&res.prefix[0 .. 6] == b"123456");
1573 check!(
1574 // SAFETY:
1575 // We know there is a large string in the union.
1576 unsafe { &res.u.ptr.as_slice_manually(res.len as usize)[.. 15] }
1577 == b"1234567890ABCDE"
1578 );
1579 check!(res.len == 15);
1580 check!(res.is_small() == false);
1581 }
1582
1583 #[test]
1584 fn from_stringref_unchecked() {
1585 #[expect(
1586 clippy::std_instead_of_alloc,
1587 reason = "We are testing, this needs std"
1588 )]
1589 let s1: std::string::String = "1234567890ABCDE".into();
1590 let res = TokenString::from_string_unchecked(&s1);
1591 check!(&res.prefix[0 .. 6] == b"123456");
1592 check!(
1593 // SAFETY:
1594 // We know there is a large string in the union.
1595 unsafe { &res.u.ptr.as_slice_manually(res.len as usize)[.. 15] }
1596 == b"1234567890ABCDE"
1597 );
1598 check!(res.len == 15);
1599 check!(res.is_small() == false);
1600 }
1601}