src/string.rs at main · releasecandidate.bsky.social/token-string

releasecandidate.bsky.social / token-string
Short (up to 65,535 bytes) immutable strings to e.g. parse tokens, implemented in Rust. These are sometimes called "German Strings", because Germans have written the paper mentioning them
fork atom
token-string / src / string.rs
at main 1601 lines 43 kB view raw
wrap content
releasecandidate.bsky.social string.rs: add lifetime annotation in function `chars`, fix various clippy warnings of clippy 0.1.90 8mo ago
43f88899
   1// SPDX-FileCopyrightText: Copyright (C) 2024 Roland Csaszar
   2// SPDX-License-Identifier: MPL-2.0
   3//
   4// Project:  token-string
   5// File:     string.rs
   6// Date:     22.Nov.2024
   7// =============================================================================
   8//! The string type [`TokenString`].
   9
  10extern crate alloc;
  11
  12use alloc::string::ToString as _;
  13use alloc::vec;
  14use core::{borrow, cmp, fmt, hash, mem, ops, panic, slice, str};
  15
  16use crate::{StringPtr, TkStrError};
  17
  18/// The length of the prefix of the string, that is, the first bytes stored
  19/// in the field `prefix` for comparisons.
  20pub const PREFIX_LENGTH: usize = mem::size_of::<u64>() - mem::size_of::<u16>();
  21
  22/// Helper constant for matching intervals.
  23const PREFIX_LENGTH_ADD1: usize = PREFIX_LENGTH + 1;
  24
  25/// The length of the non-prefix part of a "small string", 8 bytes. The content
  26/// of the field `_d.small`.
  27pub const SMALL_DATA_LENGTH: usize = mem::size_of::<u64>();
  28
  29/// The maximum length in bytes, not Unicode scalar values, of a "small" string
  30/// that is saved in the struct [`TokenString`] itself and not on the heap.
  31pub const MAX_LENGTH_SMALL: usize = PREFIX_LENGTH + SMALL_DATA_LENGTH;
  32
  33/// Helper constant for matching intervals.
  34pub const MAX_LENGTH_SMALL_ADD1: usize = MAX_LENGTH_SMALL + 1;
  35
  36/// The maximum length in bytes, not Unicode scalar values, of a
  37/// [`TokenString`].
  38pub const MAX_LENGTH: usize = u16::MAX as usize;
  39
  40/// A string which can hold at most [`MAX_LENGTH`] bytes (not Unicode scalar
  41/// values).
  42///
  43/// This holds valid UTF-8 encoded strings only.
  44/// Strings that are short enough, which need at most [`MAX_LENGTH_SMALL`]
  45/// bytes, are stored in the struct itself, bigger ones use the heap.
  46///
  47/// # Invariant
  48///
  49/// - [`TokenString`] must be a UTF-8 string (like &[`prim@str`] and
  50///   [`alloc::string::String`]).
  51/// - The length of a [`TokenString`] is at most [`MAX_LENGTH`] and at least 0 -
  52///   the empty string.
  53#[repr(C)]
  54pub struct TokenString {
  55	/// The length of the string.
  56	///
  57	/// Maximum: [`MAX_LENGTH`].
  58	pub(crate) len: u16,
  59	/// The first [`PREFIX_LENGTH`] bytes of the string.
  60	pub(crate) prefix: [u8; PREFIX_LENGTH],
  61	/// The data (see [`Data`]).
  62	///
  63	/// If the string is at most [`MAX_LENGTH_SMALL`] bytes, this holds the
  64	/// other bytes of the string, else this is a pointer to the heap.
  65	pub(crate) u: Data,
  66}
  67
  68
  69// Invariants: [`TokenString`] must be aligned to 64 bits and its size must be
  70// 128 bits. That means that `sizeof len + prefix == 64 bit` and
  71// `sizeof u == 64 bit`. So there is no padding.
  72
  73const _: () = assert!(
  74	mem::align_of::<TokenString>() == mem::size_of::<u64>(),
  75	"struct TokenString is not aligned to 64 bits!"
  76);
  77const _: () = assert!(
  78	mem::size_of::<TokenString>() == 2 * mem::size_of::<u64>(),
  79	"struct TokenString has size != 128 bits"
  80);
  81const _: () = assert!(
  82	mem::align_of::<Data>() == mem::size_of::<u64>(),
  83	"struct Data is not aligned to 64 bits!"
  84);
  85const _: () = assert!(
  86	mem::size_of::<Data>() == mem::size_of::<u64>(),
  87	"union Data has size != 64 bits"
  88);
  89
  90// =============================================================================
  91// Inner types of `TokenString`.
  92
  93/// This is either a pointer to the string, if the string is bigger than
  94/// [`SMALL_DATA_LENGTH`] bytes, or a pointer to a string as an array of bytes.
  95///
  96/// See [`StringPtr`]
  97#[repr(C)]
  98pub union Data {
  99	/// If the string is small enough (at most [`MAX_LENGTH_SMALL`]), its data
 100	/// after the prefix is here.
 101	pub(crate) small: [u8; SMALL_DATA_LENGTH],
 102	/// For bigger strings as [`MAX_LENGTH_SMALL`], this points to the memory
 103	/// holding the whole string.
 104	pub(crate) ptr: mem::ManuallyDrop<StringPtr>,
 105}
 106
 107// =============================================================================
 108// `TokenString` itself
 109
 110/// The empty string.
 111///
 112/// Has a length of zero.
 113pub const EMPTY: TokenString = TokenString {
 114	len: 0,
 115	prefix: [0_u8; PREFIX_LENGTH],
 116	u: Data {
 117		small: [0_u8; SMALL_DATA_LENGTH],
 118	},
 119};
 120
 121// =============================================================================
 122// Traits
 123
 124impl TryFrom<&str> for TokenString {
 125	type Error = TkStrError;
 126
 127	/// Create a [`TokenString`] from a &[`prim@str`].
 128	///
 129	/// Return [`TkStrError::TooBig`] if the argument is greater than
 130	/// [`MAX_LENGTH`].
 131	///
 132	/// Memory:
 133	///
 134	/// Allocates if and only if the length of `value` is bigger than
 135	/// [`MAX_LENGTH_SMALL`].
 136	fn try_from(value: &str) -> Result<Self, Self::Error> {
 137		let bytes = value.as_bytes();
 138		match value.len() {
 139			| 0 => Ok(Self {
 140				len: 0,
 141				prefix: [0_u8; PREFIX_LENGTH],
 142				u: Data {
 143					small: [0_u8; SMALL_DATA_LENGTH],
 144				},
 145			}),
 146			| 1 ..= PREFIX_LENGTH => {
 147				let s = value.len();
 148				let mut prefix = [0_u8; PREFIX_LENGTH];
 149				prefix[.. s].copy_from_slice(&bytes[.. s]);
 150				Ok(Self {
 151					#[expect(
 152						clippy::cast_possible_truncation,
 153						reason = "Length has been checked above"
 154					)]
 155					len: s as u16,
 156					prefix,
 157					u: Data {
 158						small: [0_u8; SMALL_DATA_LENGTH],
 159					},
 160				})
 161			}
 162			| PREFIX_LENGTH_ADD1 ..= MAX_LENGTH_SMALL => {
 163				let s = value.len();
 164				let mut prefix = [0_u8; PREFIX_LENGTH];
 165				prefix.copy_from_slice(&bytes[.. PREFIX_LENGTH]);
 166				let mut small = [0_u8; SMALL_DATA_LENGTH];
 167				small[.. s - PREFIX_LENGTH]
 168					.copy_from_slice(&bytes[PREFIX_LENGTH .. s]);
 169				Ok(Self {
 170					#[expect(
 171						clippy::cast_possible_truncation,
 172						reason = "Length has been checked above"
 173					)]
 174					len: s as u16,
 175					prefix,
 176					u: Data { small },
 177				})
 178			}
 179			| MAX_LENGTH_SMALL_ADD1 ..= MAX_LENGTH => {
 180				let ptr = StringPtr::from(bytes);
 181				let u = Data {
 182					ptr: mem::ManuallyDrop::new(ptr),
 183				};
 184				let mut prefix = [0_u8; PREFIX_LENGTH];
 185				prefix.copy_from_slice(&bytes[.. PREFIX_LENGTH]);
 186				Ok(Self {
 187					#[expect(
 188						clippy::cast_possible_truncation,
 189						reason = "Length has been checked above"
 190					)]
 191					len: value.len() as u16,
 192					prefix,
 193					u,
 194				})
 195			}
 196			| _ => Err(TkStrError::TooBig(value.len())),
 197		}
 198	}
 199}
 200
 201impl TryFrom<&[u8]> for TokenString {
 202	type Error = TkStrError;
 203
 204	/// Try to create a [`TokenString`] from the given slice.
 205	///
 206	/// Return [`TkStrError::TooBig`] if the given slice is too big, greater
 207	/// than [`MAX_LENGTH`].
 208	/// Return [`TkStrError::UnicodeError`]
 209	///
 210	/// Memory:
 211	///
 212	/// Allocates if and only if the length of `value` is bigger than
 213	/// [`MAX_LENGTH_SMALL`].
 214	#[inline]
 215	fn try_from(value: &[u8]) -> Result<Self, Self::Error> {
 216		match str::from_utf8(value) {
 217			| Ok(str) => Self::try_from(str),
 218			| Err(utf_err) => Err(TkStrError::UnicodeError(utf_err)),
 219		}
 220	}
 221}
 222
 223impl TryFrom<&[char]> for TokenString {
 224	type Error = TkStrError;
 225
 226	/// Try to create a [`TokenString`] from the given slice.
 227	///
 228	/// Return [`TkStrError::TooBig`] if the given slice is too big, greater
 229	/// than [`MAX_LENGTH`].
 230	///
 231	/// Memory
 232	///
 233	/// Allocates and deallocates a temporary [`alloc::string::String`]
 234	/// collecting the converted bytes.
 235	#[inline]
 236	fn try_from(value: &[char]) -> Result<Self, Self::Error> {
 237		let i = value.iter();
 238		Self::try_from(i.collect::<alloc::string::String>())
 239	}
 240}
 241
 242impl TryFrom<&alloc::string::String> for TokenString {
 243	type Error = TkStrError;
 244
 245	/// Create a `TokenString` from a &[`alloc::string::String`].
 246	///
 247	/// Return [`TkStrError::TooBig`] if the argument is greater than
 248	/// [`MAX_LENGTH`].
 249	///
 250	/// Memory:
 251	///
 252	/// Allocates if and only if the length of `value` is bigger than
 253	/// [`MAX_LENGTH_SMALL`].
 254	#[inline]
 255	fn try_from(value: &alloc::string::String) -> Result<Self, Self::Error> {
 256		let str = value.as_str();
 257		Self::try_from(str)
 258	}
 259}
 260
 261impl TryFrom<alloc::string::String> for TokenString {
 262	type Error = TkStrError;
 263
 264	/// Create a [`TokenString`] from a [`alloc::string::String`].
 265	///
 266	/// Return [`TkStrError::TooBig`] if the argument is greater than
 267	/// [`MAX_LENGTH`].
 268	///
 269	/// Memory:
 270	///
 271	/// Allocates if and only if the length of `value` is bigger than
 272	/// [`MAX_LENGTH_SMALL`].
 273	#[inline]
 274	fn try_from(value: alloc::string::String) -> Result<Self, Self::Error> {
 275		// Sadly we can't use the string's data directly, as a [`String`] has a
 276		// capacity which is to be known when deallocating the data.
 277		// See [`String::into_raw_parts`].
 278		let str = value.as_str();
 279		Self::try_from(str)
 280	}
 281}
 282
 283impl Drop for TokenString {
 284	#[cfg_attr(test, mutants::skip)]
 285	#[inline]
 286	fn drop(&mut self) {
 287		if usize::from(self.len) > MAX_LENGTH_SMALL {
 288			// SAFETY:
 289			// We know that there is a pointer saved in the union.
 290			// The whole string is being dropped, so taking a mutable
 291			// reference of the pointer is legal.
 292			let mut m_ptr = unsafe { mem::ManuallyDrop::take(&mut self.u.ptr) };
 293			m_ptr.drop_manually(self.len.into());
 294		}
 295	}
 296}
 297
 298impl Clone for TokenString {
 299	/// Return a clone of the [`TokenString`].
 300	///
 301	/// Memory:
 302	///
 303	/// Allocates if and only if the length of `value` is bigger than
 304	/// [`MAX_LENGTH_SMALL`].
 305	#[inline]
 306	fn clone(&self) -> Self {
 307		let u = if self.len as usize > MAX_LENGTH_SMALL {
 308			Data {
 309				// SAFETY:
 310				// We check, that there is an allocated pointer saved in the
 311				// union.
 312				ptr: mem::ManuallyDrop::new(unsafe {
 313					self.u.ptr.clone_manually(self.len.into())
 314				}),
 315			}
 316		} else {
 317			Data {
 318				// SAFETY:
 319				// We check, that there is a small string in the union.
 320				small: unsafe { self.u.small },
 321			}
 322		};
 323		Self {
 324			len: self.len,
 325			prefix: self.prefix,
 326			u,
 327		}
 328	}
 329}
 330
 331impl Default for TokenString {
 332	/// Return the empty string.
 333	#[inline]
 334	fn default() -> Self {
 335		EMPTY
 336	}
 337}
 338
 339impl Eq for TokenString {}
 340
 341impl PartialEq for TokenString {
 342	#[inline]
 343	fn eq(&self, other: &Self) -> bool {
 344		if self.len != other.len || self.prefix != other.prefix {
 345			return false;
 346		}
 347
 348		if self.len as usize <= MAX_LENGTH_SMALL {
 349			// SAFETY:
 350			// We know we have two small strings to compare.
 351			unsafe { self.u.small == other.u.small }
 352		} else {
 353			// SAFETY:
 354			// We know we have two string pointers to compare.
 355			unsafe { self.u.ptr.eq_manually(&other.u.ptr, self.len.into()) }
 356		}
 357	}
 358}
 359
 360impl PartialEq<[u8]> for TokenString {
 361	fn eq(&self, other: &[u8]) -> bool {
 362		if self.len as usize != other.len() {
 363			return false;
 364		}
 365		let len = self.len as usize;
 366		match len {
 367			| 0 => true,
 368			| 1 ..= PREFIX_LENGTH => self.prefix[.. len] == other[.. len],
 369			| PREFIX_LENGTH_ADD1 ..= MAX_LENGTH_SMALL => {
 370				// SAFETY:
 371				// Use the whole memory region of self.`prefix` and
 372				// `self.u.small` as a single array. This is not UB, as the
 373				// whole memory `TokenString` has been allocated at once and
 374				// is guaranteed to be continuous in memory. If Miri
 375				// complains about this, use the flag `MIRIFLAGS="
 376				// -Zmiri-tree-borrows"` to use "tree borrows" instead of
 377				// "stacked borrows".
 378				let bytes =
 379					unsafe { slice::from_raw_parts(self.prefix.as_ptr(), len) };
 380				bytes == other
 381			}
 382			// SAFETY:
 383			// We know that the pointer actually points to allocated memory.
 384			| MAX_LENGTH_SMALL_ADD1 ..= MAX_LENGTH => unsafe {
 385				self.u.ptr.as_slice_manually(len) == other
 386			},
 387			| _ => panic!("The TokenString is bigger than MAX_LENGTH!"),
 388		}
 389	}
 390}
 391
 392impl PartialEq<str> for TokenString {
 393	#[inline]
 394	fn eq(&self, other: &str) -> bool {
 395		self == other.as_bytes()
 396	}
 397}
 398
 399impl PartialEq<alloc::string::String> for TokenString {
 400	#[inline]
 401	fn eq(&self, other: &alloc::string::String) -> bool {
 402		self == other.as_bytes()
 403	}
 404}
 405
 406
 407impl Ord for TokenString {
 408	/// Compare two [`TokenString`]s byte-wise.
 409	///
 410	/// This is not a sensible alphabetical comparison for anything that isn't
 411	/// ASCII.
 412	#[inline]
 413	fn cmp(&self, other: &Self) -> cmp::Ordering {
 414		let pref_ord = self.prefix.cmp(&other.prefix);
 415		if pref_ord != cmp::Ordering::Equal {
 416			return pref_ord;
 417		}
 418
 419		self.suffix().cmp(other.suffix())
 420	}
 421}
 422
 423impl PartialOrd for TokenString {
 424	/// Compare two [`TokenString`]s byte-wise.
 425	///
 426	/// This is not a sensible alphabetical comparison for anything that isn't
 427	/// ASCII.
 428	#[inline]
 429	fn partial_cmp(&self, other: &Self) -> Option<cmp::Ordering> {
 430		Some(self.cmp(other))
 431	}
 432}
 433
 434impl fmt::Display for TokenString {
 435	#[inline]
 436	fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
 437		write!(f, "{}", self.as_str())
 438	}
 439}
 440
 441impl fmt::Debug for TokenString {
 442	fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
 443		if self.len as usize > MAX_LENGTH_SMALL {
 444			let string =
 445			// SAFETY:
 446			// We know that the pointer points to a string.
 447				unsafe { self.u.ptr.as_string_manually(self.len.into()) };
 448			// SAFETY:
 449			// We know that the pointer points to a string.
 450			let ptr = unsafe { &self.u.ptr };
 451			f.debug_struct("TokenString")
 452				.field("len", &self.len)
 453				.field("prefix", &self.prefix_str())
 454				.field("ptr", ptr)
 455				.field("string", &string)
 456				.finish()
 457		} else {
 458			// SAFETY:
 459			// We've checked that this is a small string.
 460			unsafe {
 461				f.debug_struct("TokenString")
 462					.field("len", &self.len)
 463					.field("prefix", &self.prefix_str())
 464					.field("small", &self.small_str())
 465					.field("string", &self.as_str())
 466					.finish()
 467			}
 468		}
 469	}
 470}
 471
 472impl<Idx> ops::Index<Idx> for TokenString
 473where
 474	Idx: slice::SliceIndex<str>,
 475{
 476	type Output = Idx::Output;
 477
 478	#[inline]
 479	fn index(&self, index: Idx) -> &Self::Output {
 480		self.as_str().index(index)
 481	}
 482}
 483
 484impl borrow::Borrow<str> for TokenString {
 485	#[inline]
 486	fn borrow(&self) -> &str {
 487		self.as_str()
 488	}
 489}
 490
 491impl AsRef<str> for TokenString {
 492	#[inline]
 493	fn as_ref(&self) -> &str {
 494		self.as_str()
 495	}
 496}
 497
 498impl hash::Hash for TokenString {
 499	#[inline]
 500	fn hash<H: core::hash::Hasher>(&self, state: &mut H) {
 501		self.as_str().hash(state);
 502	}
 503}
 504
 505// SAFETY:
 506// There can be no shared references of a `TokenString`.
 507unsafe impl Send for TokenString {}
 508
 509// SAFETY:
 510// `TokenString` is immutable.
 511unsafe impl Sync for TokenString {}
 512
 513// =============================================================================
 514// Non trait methods
 515
 516impl TokenString {
 517	/// Return the prefix as a `&[u8]`.
 518	fn prefix_str(&self) -> &[u8] {
 519		let l = cmp::min(self.len as usize, PREFIX_LENGTH);
 520		&self.prefix[.. l]
 521	}
 522
 523	/// Return the suffix of a small string as a `&[u8]`.
 524	///
 525	/// # Safety
 526	///
 527	/// Must be called with a small string only!
 528	unsafe fn small_str(&self) -> &[u8] {
 529		let l = (self.len as usize).saturating_sub(PREFIX_LENGTH);
 530		// SAFETY:
 531		// We know that the union contains a small string.
 532		unsafe { &self.u.small[.. l] }
 533	}
 534
 535	/// Return the length of the string in bytes.
 536	///
 537	/// This is the length of the string in bytes, not Unicode scalar values and
 538	/// not grapheme clusters.
 539	#[must_use]
 540	#[inline]
 541	pub const fn len(&self) -> usize {
 542		self.len as usize
 543	}
 544
 545	/// Return `true` if the string is a "small string", that is, it is saved in
 546	/// the [`TokenString`] struct itself.
 547	///
 548	/// If this returns `false`, the string is allocated on the heap.
 549	#[must_use]
 550	#[inline]
 551	pub const fn is_small(&self) -> bool {
 552		self.len as usize <= MAX_LENGTH_SMALL
 553	}
 554
 555	/// Return `true`, if this is the empty string.
 556	///
 557	/// Returns `false` else.
 558	#[must_use]
 559	#[inline]
 560	pub const fn is_empty(&self) -> bool {
 561		self.len == 0
 562	}
 563
 564	/// Convert to a [`TokenString`].
 565	///
 566	/// `bytes` must be valid UTF-8, use [`TokenString::try_from`] if you are
 567	/// not sure that it is valid. If the given byte slice is bigger than
 568	/// [`MAX_LENGTH`], this panics.
 569	///
 570	/// Memory:
 571	///
 572	/// Allocates if and only if the length of `bytes` is bigger than
 573	/// [`MAX_LENGTH_SMALL`].
 574	///
 575	/// # Panics
 576	///
 577	/// Panics if `bytes` is bigger than [`MAX_LENGTH`].
 578	///
 579	/// # Safety
 580	///
 581	/// `bytes` must be valid UTF-8, if not, all bets are off - UB!
 582	#[must_use]
 583	pub unsafe fn from_bytes_unchecked(bytes: &[u8]) -> Self {
 584		match bytes.len() {
 585			| 0 => Self {
 586				len: 0,
 587				prefix: [0_u8; PREFIX_LENGTH],
 588				u: Data {
 589					small: [0_u8; SMALL_DATA_LENGTH],
 590				},
 591			},
 592			| 1 ..= PREFIX_LENGTH => {
 593				let s = bytes.len();
 594				let mut prefix = [0_u8; PREFIX_LENGTH];
 595				prefix[.. s].copy_from_slice(&bytes[.. s]);
 596				Self {
 597					#[expect(
 598						clippy::cast_possible_truncation,
 599						reason = "Length has been checked above"
 600					)]
 601					len: s as u16,
 602					prefix,
 603					u: Data {
 604						small: [0_u8; SMALL_DATA_LENGTH],
 605					},
 606				}
 607			}
 608			| PREFIX_LENGTH_ADD1 ..= MAX_LENGTH_SMALL => {
 609				let s = bytes.len();
 610				let mut prefix = [0_u8; PREFIX_LENGTH];
 611				prefix.copy_from_slice(&bytes[.. PREFIX_LENGTH]);
 612				let mut small = [0_u8; SMALL_DATA_LENGTH];
 613				small[.. s - PREFIX_LENGTH]
 614					.copy_from_slice(&bytes[PREFIX_LENGTH .. s]);
 615				Self {
 616					#[expect(
 617						clippy::cast_possible_truncation,
 618						reason = "Length has been checked above"
 619					)]
 620					len: s as u16,
 621					prefix,
 622					u: Data { small },
 623				}
 624			}
 625			| MAX_LENGTH_SMALL_ADD1 ..= MAX_LENGTH => {
 626				let ptr = StringPtr::from(bytes);
 627				let u = Data {
 628					ptr: mem::ManuallyDrop::new(ptr),
 629				};
 630				let mut prefix = [0_u8; PREFIX_LENGTH];
 631				prefix.copy_from_slice(&bytes[.. PREFIX_LENGTH]);
 632				Self {
 633					#[expect(
 634						clippy::cast_possible_truncation,
 635						reason = "Length has been checked above"
 636					)]
 637					len: bytes.len() as u16,
 638					prefix,
 639					u,
 640				}
 641			}
 642			| _ => panic!(
 643				"This byte slice is too big for a TokenString, {} > \
 644				 {MAX_LENGTH}",
 645				bytes.len()
 646			),
 647		}
 648	}
 649
 650	/// Convert to a [`TokenString`].
 651	///
 652	/// If the given string `s` is bigger than [`MAX_LENGTH`], this panics. Use
 653	/// [`TokenString::try_from`] for a function that does not panic. The string
 654	/// `s` must be valid UTF-8 too, but it has already been UB if it isn't.
 655	///
 656	/// Memory:
 657	///
 658	/// Allocates if and only if the length of `s` is bigger than
 659	/// [`MAX_LENGTH_SMALL`].
 660	///
 661	/// # Panics
 662	///
 663	/// Panics if `s` is bigger than [`MAX_LENGTH`].
 664	#[must_use]
 665	#[inline]
 666	pub fn from_str_unchecked(s: &str) -> Self {
 667		// SAFETY:
 668		// The unsafe part of `from_bytes_unchecked` is the possibility of the
 669		// byte slice not being valid UTF-8. We are processing an UTF-8 string
 670		// here.
 671		unsafe { Self::from_bytes_unchecked(s.as_bytes()) }
 672	}
 673
 674	/// Convert to a [`TokenString`].
 675	///
 676	/// If the given string `s` is bigger than [`MAX_LENGTH`], this panics. Use
 677	/// [`TokenString::try_from`] for a function that does not panic. The string
 678	/// `s` must be valid UTF-8 too, but it has already been UB if it isn't.
 679	///
 680	/// Memory:
 681	///
 682	/// Allocates if and only if the length of `s` is bigger than
 683	/// [`MAX_LENGTH_SMALL`].
 684	///
 685	/// # Panics
 686	///
 687	/// Panics if `s` is bigger than [`MAX_LENGTH`].
 688	#[must_use]
 689	#[inline]
 690	pub fn from_string_unchecked(s: &alloc::string::String) -> Self {
 691		// SAFETY:
 692		// The unsafe part of `from_bytes_unchecked` is the possibility of the
 693		// byte slice not being valid UTF-8. We are processing an UTF-8 string
 694		// here.
 695		unsafe { Self::from_bytes_unchecked(s.as_bytes()) }
 696	}
 697
 698	/// Return the string as a &[`prim@str`].
 699	#[must_use]
 700	#[inline]
 701	pub fn as_str(&self) -> &str {
 702		if self.len == 0 {
 703			""
 704		} else if self.len as usize > MAX_LENGTH_SMALL {
 705			// SAFETY:
 706			// We know, that in the union must be a valid pointer.
 707			unsafe { self.u.ptr.as_string_manually(self.len.into()) }
 708		} else {
 709			// SAFETY:
 710			// Use the whole memory region of self.`prefix` and `self.u.small`
 711			// as a single array. This is not UB, as the whole memory
 712			// `TokenString` has been allocated at once and is guaranteed to be
 713			// continuous in memory. If Miri complains about this, use the
 714			// flag `MIRIFLAGS="-Zmiri-tree-borrows"` to use "tree borrows"
 715			// instead of "stacked borrows".
 716			let bytes = unsafe {
 717				slice::from_raw_parts(self.prefix.as_ptr(), self.len.into())
 718			};
 719			// SAFETY:
 720			// The precondition of `TokenString` is that the string is a valid
 721			// UTF-8 byte sequence.
 722			unsafe { str::from_utf8_unchecked(bytes) }
 723		}
 724	}
 725
 726	/// Return the string as a byte slice.
 727	#[must_use]
 728	#[inline]
 729	pub fn as_bytes(&self) -> &[u8] {
 730		if self.len == 0 {
 731			Default::default()
 732		} else if self.len as usize > MAX_LENGTH_SMALL {
 733			// SAFETY:
 734			// We know, that in the union must be a valid pointer.
 735			unsafe { self.u.ptr.as_slice_manually(self.len.into()) }
 736		} else {
 737			// SAFETY:
 738			// Use the whole memory region of self.`prefix` and `self.u.small`
 739			// as a single array. This is not UB, as the whole memory
 740			// `TokenString` has been allocated at once and is guaranteed to be
 741			// continuous in memory. If Miri complains about this, use the
 742			// flag `MIRIFLAGS="-Zmiri-tree-borrows"` to use "tree borrows"
 743			// instead of "stacked borrows".
 744			unsafe {
 745				slice::from_raw_parts(self.prefix.as_ptr(), self.len.into())
 746			}
 747		}
 748	}
 749
 750	/// Return the string as a new [`alloc::string::String`].
 751	///
 752	/// Memory:
 753	///
 754	/// Allocates a new [`alloc::string::String`].
 755	#[must_use]
 756	#[inline]
 757	pub fn as_string(&self) -> alloc::string::String {
 758		self.to_string()
 759	}
 760
 761	/// Return the string as a new vector of [`char`]s.
 762	///
 763	/// Memory:
 764	///
 765	/// Allocates a new [`vec::Vec`].
 766	#[must_use]
 767	#[inline]
 768	pub fn as_chars(&self) -> vec::Vec<char> {
 769		self.as_str().chars().collect()
 770	}
 771
 772	/// Return the part of the string which is not stored in `self.prefix`.
 773	///
 774	/// If the string is <= [`PREFIX_LENGTH`], the empty slice is returned.
 775	fn suffix(&self) -> &[u8] {
 776		match self.len as usize {
 777			| 0 ..= PREFIX_LENGTH => Default::default(),
 778			| PREFIX_LENGTH_ADD1 ..= MAX_LENGTH_SMALL =>
 779			// SAFETY:
 780			// We checked and know that this is a small string.
 781			unsafe { &self.u.small },
 782			| MAX_LENGTH_SMALL_ADD1 ..= MAX_LENGTH =>
 783			// SAFETY:
 784			// We checked and know that this string is allocated on the heap.
 785			unsafe {
 786				&self.u.ptr.as_slice_manually(self.len.into())[PREFIX_LENGTH ..]
 787			},
 788			| _ => panic!(
 789				"Error: this TokenString is bigger than \
 790				 TokenString::MAX_LENGTH!"
 791			),
 792		}
 793	}
 794
 795	/// Return the byte at index `idx`, check bounds.
 796	///
 797	/// Returns [`TkStrError::OutOfBounds`] if the index is bigger than the
 798	/// string's length.
 799	///
 800	/// # Errors
 801	/// [`TkStrError::OutOfBounds`] if `idx` is bigger than the string's length.
 802	#[inline]
 803	pub fn get(&self, idx: u16) -> Result<u8, TkStrError> {
 804		if idx >= self.len {
 805			return Err(TkStrError::OutOfBounds(idx as usize));
 806		}
 807		// SAFETY:
 808		// We check above that the index is in bounds.
 809		unsafe { Ok(*self.as_bytes().get_unchecked(idx as usize)) }
 810	}
 811
 812	/// Return the byte at index `idx`, don't check bounds.
 813	///
 814	/// Panics if the index is bigger than the
 815	/// string's length.
 816	///
 817	/// # Panics
 818	///
 819	/// if `idx` is bigger than the string's length.
 820	#[must_use]
 821	#[inline]
 822	pub fn get_unchecked(&self, idx: u16) -> u8 {
 823		assert!((idx < self.len), "index {idx} out of bounds");
 824		// SAFETY:
 825		// We check above that the index is in bounds.
 826		unsafe { *self.as_bytes().get_unchecked(idx as usize) }
 827	}
 828
 829	/// Return an iterator over the `[char]`s of a string.
 830	///
 831	/// That is, an iterator over the Unicode scalar values of the
 832	/// `TokenString`.
 833	#[inline]
 834	pub fn chars(&'_ self) -> str::Chars<'_> {
 835		self.as_str().chars()
 836	}
 837
 838	/// Get a reference iterator.
 839	#[must_use]
 840	#[inline]
 841	pub fn iter(&self) -> TokenStringIter<'_> {
 842		<&Self as IntoIterator>::into_iter(self)
 843	}
 844
 845	/// Return `true`, if the first byte is an uppercase ASCII character.
 846	#[must_use]
 847	#[inline]
 848	pub const fn starts_ascii_uppercase(&self) -> bool {
 849		self.prefix[0].is_ascii_uppercase()
 850	}
 851
 852	/// Return `true`, if the first byte is an lowercase ASCII character.
 853	#[must_use]
 854	#[inline]
 855	pub const fn starts_ascii_lowercase(&self) -> bool {
 856		self.prefix[0].is_ascii_lowercase()
 857	}
 858
 859	/// Return `true`, if the string contains only ASCII characters.
 860	#[must_use]
 861	#[inline]
 862	pub fn is_ascii(&self) -> bool {
 863		self.as_bytes().is_ascii()
 864	}
 865
 866	/// Return `true`, if the string starts with `needle`.
 867	///
 868	/// Returns `true` too if the string is `needle`.
 869	#[must_use]
 870	#[inline]
 871	pub fn starts_with(&self, needle: &Self) -> bool {
 872		self.as_bytes().starts_with(needle.as_bytes())
 873	}
 874
 875	/// Return `true`, if the string starts with `needle`.
 876	///
 877	/// Returns `true` too if the string is `needle`.
 878	#[must_use]
 879	#[inline]
 880	pub fn starts_with_bytes(&self, needle: &[u8]) -> bool {
 881		self.as_bytes().starts_with(needle)
 882	}
 883
 884	/// Return `true`, if the string starts with `needle`.
 885	///
 886	/// Returns `true` too if the string is `needle`.
 887	#[must_use]
 888	#[inline]
 889	pub fn starts_with_str(&self, needle: &str) -> bool {
 890		self.as_str().starts_with(needle)
 891	}
 892
 893	/// Return `true`, if the string ends with `needle`.
 894	///
 895	/// Returns `true` too if the string is `needle`.
 896	#[must_use]
 897	#[inline]
 898	pub fn ends_with(&self, needle: &Self) -> bool {
 899		self.as_bytes().ends_with(needle.as_bytes())
 900	}
 901
 902	/// Return `true`, if the string ends with `needle`.
 903	///
 904	/// Returns `true` too if the string is `needle`.
 905	#[must_use]
 906	#[inline]
 907	pub fn ends_with_bytes(&self, needle: &[u8]) -> bool {
 908		self.as_bytes().ends_with(needle)
 909	}
 910
 911	/// Return `true`, if the string ends with `needle`.
 912	///
 913	/// Returns `true` too if the string is `needle`.
 914	#[must_use]
 915	#[inline]
 916	pub fn ends_with_str(&self, needle: &str) -> bool {
 917		self.as_str().ends_with(needle)
 918	}
 919
 920	/// Map the given function `f` over the bytes of the string, mutating it.
 921	fn map_bytes_mut(&mut self, f: fn(&mut [u8]) -> ()) {
 922		if self.len as usize > MAX_LENGTH_SMALL {
 923			// SAFETY:
 924			// We check, that we actually have a valid pointer.
 925			unsafe {
 926				f((*self.u.ptr).as_slice_manually_mut(self.len as usize));
 927			}
 928		} else {
 929			// SAFETY:
 930			// The two arrays, `prefix` and `small`, are guaranteed to be
 931			// continuous in memory.
 932			unsafe {
 933				f(slice::from_raw_parts_mut(
 934					self.prefix.as_mut_ptr(),
 935					self.len as usize,
 936				));
 937			}
 938		}
 939	}
 940
 941	/// Return a new string with all uppercase ASCII characters changed to
 942	/// lowercase.
 943	#[must_use]
 944	#[inline]
 945	pub fn to_ascii_lowercase(&self) -> Self {
 946		let mut ret_val = self.clone();
 947		ret_val.map_bytes_mut(<[u8]>::make_ascii_lowercase);
 948		ret_val
 949	}
 950
 951	/// Return a new string with all lowercase ASCII characters changed to
 952	/// uppercase.
 953	#[must_use]
 954	#[inline]
 955	pub fn to_ascii_uppercase(&self) -> Self {
 956		let mut ret_val = self.clone();
 957		ret_val.map_bytes_mut(<[u8]>::make_ascii_uppercase);
 958		ret_val
 959	}
 960
 961	/// Return a new string with all ASCII whitespace removed from the start and
 962	/// end.
 963	#[must_use]
 964	#[inline]
 965	pub fn trim_ascii(&self) -> Self {
 966		// SAFETY:
 967		// We copy the current string, so the invariants should hold for the
 968		// copy too. The string does not get longer, so cannot be greater than
 969		// `MAX_LENGTH`.
 970		unsafe { Self::from_bytes_unchecked(self.as_bytes().trim_ascii()) }
 971	}
 972
 973	/// Return a new string with all ASCII whitespace removed from the start.
 974	#[must_use]
 975	#[inline]
 976	pub fn trim_ascii_start(&self) -> Self {
 977		// SAFETY:
 978		// We copy the current string, so the invariants should hold for the
 979		// copy too:
 980		// - The string does not get longer, so cannot be greater than
 981		// `MAX_LENGTH`.
 982		// - if the string is valid UTF-8, removing ASCII characters does not
 983		//   change that.
 984		unsafe {
 985			Self::from_bytes_unchecked(self.as_bytes().trim_ascii_start())
 986		}
 987	}
 988
 989	/// Return a new string with all ASCII whitespace removed from the end.
 990	#[must_use]
 991	#[inline]
 992	pub fn trim_ascii_end(&self) -> Self {
 993		// SAFETY:
 994		// We copy the current string, so the invariants should hold for the
 995		// copy too:
 996		// - The string does not get longer, so cannot be greater than
 997		// `MAX_LENGTH`.
 998		// - if the string is valid UTF-8, removing ASCII characters does not
 999		//   change that.
1000		unsafe { Self::from_bytes_unchecked(self.as_bytes().trim_ascii_end()) }
1001	}
1002
1003	/// Return a new string with `prefix` removed from the start.
1004	#[cfg(feature = "pattern")]
1005	#[doc(cfg(pattern))]
1006	#[inline]
1007	pub fn strip_prefix<P: str::pattern::Pattern>(
1008		&self,
1009		prefix: P,
1010	) -> Option<Self> {
1011		self.as_str()
1012			.strip_prefix(prefix)
1013			// stripping a prefix should not make the string invalid UTF-8, and
1014			// does shorten it.
1015			.map(Self::from_str_unchecked)
1016	}
1017
1018	/// Return a new string with `suffix` removed from the end.
1019	#[cfg(feature = "pattern")]
1020	#[doc(cfg(pattern))]
1021	#[inline]
1022	pub fn strip_suffix<P>(&self, suffix: P) -> Option<Self>
1023	where
1024		P: str::pattern::Pattern,
1025		for<'a> P::Searcher<'a>: str::pattern::ReverseSearcher<'a>,
1026	{
1027		self.as_str()
1028			.strip_suffix(suffix)
1029			// stripping a suffix should not make the string invalid UTF-8, and
1030			// does shorten it.
1031			.map(Self::from_str_unchecked)
1032	}
1033
1034	/// Return `true` if the string contains the pattern `pat`.
1035	///
1036	/// Returns `false` else.
1037	///
1038	/// The feature
1039	#[cfg(feature = "pattern")]
1040	#[doc(cfg(pattern))]
1041	#[inline]
1042	pub fn contains<P: str::pattern::Pattern>(&self, pat: P) -> bool {
1043		self.as_str().contains(pat)
1044	}
1045}
1046
1047
1048//==============================================================================
1049// Iterating by reference
1050
1051/// Iterator struct for a `&TokenString`.
1052///
1053/// Iterator items are single bytes, `u8`.
1054pub struct TokenStringIter<'a> {
1055	/// The [`TokenString`] to iterate over.
1056	string: &'a TokenString,
1057	/// The current index in the string.
1058	idx: usize,
1059}
1060
1061impl<'a> TokenStringIter<'a> {
1062	/// Generate a reference iterator for the given [`TokenString`].
1063	#[must_use]
1064	#[inline]
1065	pub const fn new(s: &'a TokenString) -> Self {
1066		TokenStringIter { string: s, idx: 0 }
1067	}
1068}
1069
1070impl Iterator for TokenStringIter<'_> {
1071	type Item = u8;
1072
1073	/// Return either the next byte, [`u8`], or [`None`] if we are at the end of
1074	/// the string.
1075	fn next(&mut self) -> Option<Self::Item> {
1076		debug_assert!(
1077			self.idx <= self.string.len.into(),
1078			"The iterator index '{0}' is greater than the string length '{1}'!",
1079			self.idx,
1080			self.string.len
1081		);
1082		if self.idx == self.string.len.into() {
1083			None
1084		} else if self.string.len as usize > MAX_LENGTH_SMALL {
1085			self.idx += 1;
1086			Some(self.string.as_bytes()[self.idx - 1])
1087		} else {
1088			self.idx += 1;
1089			Some(
1090				// SAFETY:
1091				// The two arrays, `prefix` and `u.small`, are guaranteed to be
1092				// consecutive in memory and allocated at the same time.
1093				unsafe {
1094					slice::from_raw_parts(
1095						self.string.prefix.as_ptr(),
1096						self.string.len as usize,
1097					)
1098				}[self.idx - 1],
1099			)
1100		}
1101	}
1102}
1103
1104impl<'a> IntoIterator for &'a TokenString {
1105	type IntoIter = TokenStringIter<'a>;
1106	type Item = u8;
1107
1108	#[inline]
1109	fn into_iter(self) -> Self::IntoIter {
1110		Self::IntoIter::new(self)
1111	}
1112}
1113
1114//==============================================================================
1115// Iterating an owned `TokenString`.
1116
1117/// Iterator struct for an owned [`TokenString`].
1118///
1119/// Iterator items are single bytes, [`u8`].
1120pub struct TokenStringIterOwn {
1121	/// The [`TokenString`] to iterate over.
1122	string: TokenString,
1123	/// The current index in the string.
1124	idx: usize,
1125}
1126
1127impl TokenStringIterOwn {
1128	/// Generate an owned iterator for the given [`TokenString`].
1129	#[must_use]
1130	#[inline]
1131	pub const fn new(s: TokenString) -> Self {
1132		Self { string: s, idx: 0 }
1133	}
1134}
1135
1136impl Iterator for TokenStringIterOwn {
1137	type Item = u8;
1138
1139	/// Return either the next byte, [`u8`], or [`None`] if we are at the end of
1140	/// the string.
1141	fn next(&mut self) -> Option<Self::Item> {
1142		debug_assert!(
1143			self.idx <= self.string.len.into(),
1144			"The iterator index '{0}' is greater than the string length '{1}'!",
1145			self.idx,
1146			self.string.len
1147		);
1148		if self.idx == self.string.len.into() {
1149			None
1150		} else if self.string.len as usize > MAX_LENGTH_SMALL {
1151			self.idx += 1;
1152			Some(self.string.as_bytes()[self.idx - 1])
1153		} else {
1154			self.idx += 1;
1155			Some(
1156				// SAFETY:
1157				// The two arrays, `prefix` and `u.small`, are guaranteed to be
1158				// consecutive in memory and allocated at the same time.
1159				unsafe {
1160					slice::from_raw_parts(
1161						self.string.prefix.as_ptr(),
1162						self.string.len as usize,
1163					)
1164				}[self.idx - 1],
1165			)
1166		}
1167	}
1168}
1169
1170impl IntoIterator for TokenString {
1171	type IntoIter = TokenStringIterOwn;
1172	type Item = u8;
1173
1174	#[inline]
1175	fn into_iter(self) -> Self::IntoIter {
1176		Self::IntoIter::new(self)
1177	}
1178}
1179
1180
1181// =============================================================================
1182//                                  Tests
1183// =============================================================================
1184
1185#[cfg(test)]
1186mod prefix {
1187	extern crate std;
1188	use assert2::{check, let_assert};
1189
1190	use crate::TokenString;
1191
1192
1193	#[test]
1194	fn empty_is_empty() {
1195		let_assert!(Ok(res) = TokenString::try_from(""));
1196		check!(res.prefix[0] == 0);
1197		check!(res.len == 0);
1198		check!(res.is_small() == true);
1199	}
1200
1201	#[test]
1202	fn clone_empty() {
1203		let_assert!(Ok(s1) = TokenString::try_from(""));
1204		let res = s1.clone();
1205		check!(res.prefix[0] == s1.prefix[0]);
1206		check!(res.len == s1.len);
1207		check!(res.is_small() == true);
1208	}
1209
1210	#[test]
1211	fn try_from_str() {
1212		let_assert!(Ok(res) = TokenString::try_from("123456"));
1213		check!(&res.prefix[0 .. 6] == b"123456");
1214		check!(res.len == 6);
1215		check!(res.is_small() == true);
1216	}
1217
1218	#[test]
1219	fn clone() {
1220		let_assert!(Ok(s1) = TokenString::try_from("123456"));
1221		let res = s1.clone();
1222		check!(&res.prefix[0 .. 6] == &s1.prefix[0 .. 6]);
1223		check!(res.len == s1.len);
1224		check!(res.is_small() == true);
1225	}
1226
1227	#[test]
1228	fn try_from_bytes() {
1229		let s1: &[u8] = b"123456";
1230		let_assert!(Ok(res) = TokenString::try_from(s1));
1231		check!(&res.prefix[0 .. 6] == b"123456");
1232		check!(res.len == 6);
1233		check!(res.is_small() == true);
1234	}
1235
1236	#[test]
1237	fn try_from_chars() {
1238		#[expect(
1239			clippy::std_instead_of_alloc,
1240			reason = "We are testing, this needs std"
1241		)]
1242		let s1: std::vec::Vec<char> = "123456".chars().collect();
1243		let_assert!(Ok(res) = TokenString::try_from(s1.as_slice()));
1244		check!(&res.prefix[0 .. 6] == b"123456");
1245		check!(res.len == 6);
1246		check!(res.is_small() == true);
1247	}
1248
1249	#[test]
1250	fn try_from_string() {
1251		#[expect(
1252			clippy::std_instead_of_alloc,
1253			reason = "We are testing, this needs std"
1254		)]
1255		let s1: std::string::String = "123456".into();
1256		let_assert!(Ok(res) = TokenString::try_from(s1));
1257		check!(&res.prefix[0 .. 6] == b"123456");
1258		check!(res.len == 6);
1259		check!(res.is_small() == true);
1260	}
1261
1262	#[test]
1263	fn try_from_stringref() {
1264		#[expect(
1265			clippy::std_instead_of_alloc,
1266			reason = "We are testing, this needs std"
1267		)]
1268		let s1: std::string::String = "123456".into();
1269		let_assert!(Ok(res) = TokenString::try_from(&s1));
1270		check!(&res.prefix[0 .. 6] == b"123456");
1271		check!(res.len == 6);
1272		check!(res.is_small() == true);
1273	}
1274
1275	#[test]
1276	fn from_str_unchecked() {
1277		let res = TokenString::from_str_unchecked("123456");
1278		check!(&res.prefix[0 .. 6] == b"123456");
1279		check!(res.len == 6);
1280	}
1281
1282	#[test]
1283	fn from_bytes_unchecked() {
1284		let s1: &[u8] = b"123456";
1285		// SAFETY:
1286		// We know that the string is valid UTF-8.
1287		let res = unsafe { TokenString::from_bytes_unchecked(s1) };
1288		check!(&res.prefix[0 .. 6] == b"123456");
1289		check!(res.len == 6);
1290		check!(res.is_small() == true);
1291	}
1292
1293	#[test]
1294	fn from_stringref_unchecked() {
1295		#[expect(
1296			clippy::std_instead_of_alloc,
1297			reason = "We are testing, this needs std"
1298		)]
1299		let s1: std::string::String = "123456".into();
1300		let res = TokenString::from_string_unchecked(&s1);
1301		check!(&res.prefix[0 .. 6] == b"123456");
1302		check!(res.len == 6);
1303		check!(res.is_small() == true);
1304	}
1305}
1306
1307#[cfg(test)]
1308mod small {
1309	extern crate std;
1310	use assert2::{check, let_assert};
1311
1312	use crate::TokenString;
1313
1314
1315	#[test]
1316	fn try_from_str() {
1317		let_assert!(Ok(res) = TokenString::try_from("1234567"));
1318		check!(&res.prefix[0 .. 6] == b"123456");
1319		// SAFETY:
1320		// We know there is a small string in the union.
1321		check!(unsafe { res.u.small[0] } == b'7');
1322		check!(res.len == 7);
1323		check!(res.is_small() == true);
1324	}
1325
1326	#[test]
1327	fn clone() {
1328		let_assert!(Ok(s1) = TokenString::try_from("1234567"));
1329		let res = s1.clone();
1330		check!(&res.prefix[0 .. 6] == &s1.prefix[0 .. 6]);
1331		// SAFETY:
1332		// We know there is a small string in the union.
1333		check!(unsafe { res.u.small[0] == s1.u.small[0] });
1334		check!(res.len == s1.len);
1335		check!(res.is_small() == true);
1336	}
1337
1338	#[test]
1339	fn try_from_bytes() {
1340		let s1: &[u8] = b"1234567";
1341		let_assert!(Ok(res) = TokenString::try_from(s1));
1342		check!(&res.prefix[0 .. 6] == b"123456");
1343		// SAFETY:
1344		// We know there is a small string in the union.
1345		check!(unsafe { res.u.small[0] } == b'7');
1346		check!(res.len == 7);
1347		check!(res.is_small() == true);
1348	}
1349
1350	#[test]
1351	fn try_from_chars() {
1352		#[expect(
1353			clippy::std_instead_of_alloc,
1354			reason = "We are testing, this needs std"
1355		)]
1356		let s1: std::vec::Vec<char> = "1234567".chars().collect();
1357		let_assert!(Ok(res) = TokenString::try_from(s1.as_slice()));
1358		check!(&res.prefix[0 .. 6] == b"123456");
1359		// SAFETY:
1360		// We know there is a small string in the union.
1361		check!(unsafe { res.u.small[0] } == b'7');
1362		check!(res.len == 7);
1363		check!(res.is_small() == true);
1364	}
1365
1366	#[test]
1367	fn try_from_string() {
1368		#[expect(
1369			clippy::std_instead_of_alloc,
1370			reason = "We are testing, this needs std"
1371		)]
1372		let s1: std::string::String = "1234567".into();
1373		let_assert!(Ok(res) = TokenString::try_from(s1));
1374		check!(&res.prefix[0 .. 6] == b"123456");
1375		// SAFETY:
1376		// We know there is a small string in the union.
1377		check!(unsafe { res.u.small[0] } == b'7');
1378		check!(res.len == 7);
1379		check!(res.is_small() == true);
1380	}
1381
1382	#[test]
1383	fn try_from_stringref() {
1384		#[expect(
1385			clippy::std_instead_of_alloc,
1386			reason = "We are testing, this needs std"
1387		)]
1388		let s1: std::string::String = "1234567".into();
1389		let_assert!(Ok(res) = TokenString::try_from(&s1));
1390		check!(&res.prefix[0 .. 6] == b"123456");
1391		// SAFETY:
1392		// We know there is a small string in the union.
1393		check!(unsafe { res.u.small[0] } == b'7');
1394		check!(res.len == 7);
1395		check!(res.is_small() == true);
1396	}
1397
1398	#[test]
1399	fn from_str_unchecked() {
1400		let res = TokenString::from_str_unchecked("1234567");
1401		check!(&res.prefix[0 .. 6] == b"123456");
1402		// SAFETY:
1403		// We know there is a small string in the union.
1404		check!(unsafe { res.u.small[0] } == b'7');
1405		check!(res.len == 7);
1406		check!(res.is_small() == true);
1407	}
1408
1409	#[test]
1410	fn from_bytes_unchecked() {
1411		let s1: &[u8] = b"1234567";
1412		// SAFETY:
1413		// We know that the string is valid UTF-8.
1414		let res = unsafe { TokenString::from_bytes_unchecked(s1) };
1415		check!(&res.prefix[0 .. 6] == b"123456");
1416		// SAFETY:
1417		// We know there is a small string in the union.
1418		check!(unsafe { res.u.small[0] } == b'7');
1419		check!(res.len == 7);
1420		check!(res.is_small() == true);
1421	}
1422
1423	#[test]
1424	fn from_stringref_unchecked() {
1425		#[expect(
1426			clippy::std_instead_of_alloc,
1427			reason = "We are testing, this needs std"
1428		)]
1429		let s1: std::string::String = "1234567".into();
1430		let res = TokenString::from_string_unchecked(&s1);
1431		check!(&res.prefix[0 .. 6] == b"123456");
1432		// SAFETY:
1433		// We know there is a small string in the union.
1434		check!(unsafe { res.u.small[0] } == b'7');
1435		check!(res.len == 7);
1436		check!(res.is_small() == true);
1437	}
1438}
1439
1440#[cfg(test)]
1441mod heap {
1442	extern crate std;
1443	use assert2::{check, let_assert};
1444
1445	use crate::TokenString;
1446
1447
1448	#[test]
1449	fn try_from_str() {
1450		let_assert!(Ok(res) = TokenString::try_from("1234567890ABCDE"));
1451		check!(&res.prefix[0 .. 6] == b"123456");
1452		check!(
1453			// SAFETY:
1454			// We know there is a large string in the union.
1455			unsafe { &res.u.ptr.as_slice_manually(res.len as usize)[.. 15] }
1456				== b"1234567890ABCDE"
1457		);
1458		check!(res.len == 15);
1459		check!(res.is_small() == false);
1460	}
1461
1462
1463	#[test]
1464	fn clone() {
1465		let_assert!(Ok(s1) = TokenString::try_from("1234567890ABCDE"));
1466		let res = s1.clone();
1467		check!(&res.prefix[0 .. 6] == &s1.prefix[0 .. 6]);
1468		check!(
1469			// SAFETY:
1470			// We know there is a large string in the union.
1471			unsafe {
1472				res.u.ptr.as_slice_manually(res.len as usize)[.. 15]
1473					== s1.u.ptr.as_slice_manually(res.len as usize)[.. 15]
1474			}
1475		);
1476		check!(res.len == s1.len);
1477		check!(res.is_small() == false);
1478	}
1479
1480	#[test]
1481	fn try_from_bytes() {
1482		let s1: &[u8] = b"1234567890ABCDE";
1483		let_assert!(Ok(res) = TokenString::try_from(s1));
1484		check!(&res.prefix[0 .. 6] == b"123456");
1485		check!(
1486			// SAFETY:
1487			// We know there is a large string in the union.
1488			unsafe { &res.u.ptr.as_slice_manually(res.len as usize)[.. 15] }
1489				== b"1234567890ABCDE"
1490		);
1491		check!(res.len == 15);
1492		check!(res.is_small() == false);
1493	}
1494
1495	#[test]
1496	fn try_from_chars() {
1497		#[expect(
1498			clippy::std_instead_of_alloc,
1499			reason = "We are testing, this needs std"
1500		)]
1501		let s1: std::vec::Vec<char> = "1234567890ABCDE".chars().collect();
1502		let_assert!(Ok(res) = TokenString::try_from(s1.as_slice()));
1503		check!(&res.prefix[0 .. 6] == b"123456");
1504		check!(
1505			// SAFETY:
1506			// We know there is a large string in the union.
1507			unsafe { &res.u.ptr.as_slice_manually(res.len as usize)[.. 15] }
1508				== b"1234567890ABCDE"
1509		);
1510		check!(res.len == 15);
1511		check!(res.is_small() == false);
1512	}
1513
1514	#[test]
1515	fn try_from_string() {
1516		#[expect(
1517			clippy::std_instead_of_alloc,
1518			reason = "We are testing, this needs std"
1519		)]
1520		let s1: std::string::String = "1234567890ABCDE".into();
1521		let_assert!(Ok(res) = TokenString::try_from(s1));
1522		check!(&res.prefix[0 .. 6] == b"123456");
1523		check!(
1524			// SAFETY:
1525			// We know there is a large string in the union.
1526			unsafe { &res.u.ptr.as_slice_manually(res.len as usize)[.. 15] }
1527				== b"1234567890ABCDE"
1528		);
1529		check!(res.len == 15);
1530		check!(res.is_small() == false);
1531	}
1532
1533	#[test]
1534	fn try_from_stringref() {
1535		#[expect(
1536			clippy::std_instead_of_alloc,
1537			reason = "We are testing, this needs std"
1538		)]
1539		let s1: std::string::String = "1234567890ABCDE".into();
1540		let_assert!(Ok(res) = TokenString::try_from(&s1));
1541		check!(&res.prefix[0 .. 6] == b"123456");
1542		check!(
1543			// SAFETY:
1544			// We know there is a large string in the union.
1545			unsafe { &res.u.ptr.as_slice_manually(res.len as usize)[.. 15] }
1546				== b"1234567890ABCDE"
1547		);
1548		check!(res.len == 15);
1549		check!(res.is_small() == false);
1550	}
1551
1552	#[test]
1553	fn from_str_unchecked() {
1554		let res = TokenString::from_str_unchecked("1234567890ABCDE");
1555		check!(&res.prefix[0 .. 6] == b"123456");
1556		check!(
1557			// SAFETY:
1558			// We know there is a large string in the union.
1559			unsafe { &res.u.ptr.as_slice_manually(res.len as usize)[.. 15] }
1560				== b"1234567890ABCDE"
1561		);
1562		check!(res.len == 15);
1563		check!(res.is_small() == false);
1564	}
1565
1566	#[test]
1567	fn from_bytes_unchecked() {
1568		let s1: &[u8] = b"1234567890ABCDE";
1569		// SAFETY:
1570		// We know that the string is valid UTF-8.
1571		let res = unsafe { TokenString::from_bytes_unchecked(s1) };
1572		check!(&res.prefix[0 .. 6] == b"123456");
1573		check!(
1574			// SAFETY:
1575			// We know there is a large string in the union.
1576			unsafe { &res.u.ptr.as_slice_manually(res.len as usize)[.. 15] }
1577				== b"1234567890ABCDE"
1578		);
1579		check!(res.len == 15);
1580		check!(res.is_small() == false);
1581	}
1582
1583	#[test]
1584	fn from_stringref_unchecked() {
1585		#[expect(
1586			clippy::std_instead_of_alloc,
1587			reason = "We are testing, this needs std"
1588		)]
1589		let s1: std::string::String = "1234567890ABCDE".into();
1590		let res = TokenString::from_string_unchecked(&s1);
1591		check!(&res.prefix[0 .. 6] == b"123456");
1592		check!(
1593			// SAFETY:
1594			// We know there is a large string in the union.
1595			unsafe { &res.u.ptr.as_slice_manually(res.len as usize)[.. 15] }
1596				== b"1234567890ABCDE"
1597		);
1598		check!(res.len == 15);
1599		check!(res.is_small() == false);
1600	}
1601}