Short (up to 65,535 bytes) immutable strings to e.g. parse tokens, implemented in Rust. These are sometimes called "German Strings", because Germans have written the paper mentioning them
at main 1601 lines 43 kB view raw
1// SPDX-FileCopyrightText: Copyright (C) 2024 Roland Csaszar 2// SPDX-License-Identifier: MPL-2.0 3// 4// Project: token-string 5// File: string.rs 6// Date: 22.Nov.2024 7// ============================================================================= 8//! The string type [`TokenString`]. 9 10extern crate alloc; 11 12use alloc::string::ToString as _; 13use alloc::vec; 14use core::{borrow, cmp, fmt, hash, mem, ops, panic, slice, str}; 15 16use crate::{StringPtr, TkStrError}; 17 18/// The length of the prefix of the string, that is, the first bytes stored 19/// in the field `prefix` for comparisons. 20pub const PREFIX_LENGTH: usize = mem::size_of::<u64>() - mem::size_of::<u16>(); 21 22/// Helper constant for matching intervals. 23const PREFIX_LENGTH_ADD1: usize = PREFIX_LENGTH + 1; 24 25/// The length of the non-prefix part of a "small string", 8 bytes. The content 26/// of the field `_d.small`. 27pub const SMALL_DATA_LENGTH: usize = mem::size_of::<u64>(); 28 29/// The maximum length in bytes, not Unicode scalar values, of a "small" string 30/// that is saved in the struct [`TokenString`] itself and not on the heap. 31pub const MAX_LENGTH_SMALL: usize = PREFIX_LENGTH + SMALL_DATA_LENGTH; 32 33/// Helper constant for matching intervals. 34pub const MAX_LENGTH_SMALL_ADD1: usize = MAX_LENGTH_SMALL + 1; 35 36/// The maximum length in bytes, not Unicode scalar values, of a 37/// [`TokenString`]. 38pub const MAX_LENGTH: usize = u16::MAX as usize; 39 40/// A string which can hold at most [`MAX_LENGTH`] bytes (not Unicode scalar 41/// values). 42/// 43/// This holds valid UTF-8 encoded strings only. 44/// Strings that are short enough, which need at most [`MAX_LENGTH_SMALL`] 45/// bytes, are stored in the struct itself, bigger ones use the heap. 46/// 47/// # Invariant 48/// 49/// - [`TokenString`] must be a UTF-8 string (like &[`prim@str`] and 50/// [`alloc::string::String`]). 51/// - The length of a [`TokenString`] is at most [`MAX_LENGTH`] and at least 0 - 52/// the empty string. 53#[repr(C)] 54pub struct TokenString { 55 /// The length of the string. 56 /// 57 /// Maximum: [`MAX_LENGTH`]. 58 pub(crate) len: u16, 59 /// The first [`PREFIX_LENGTH`] bytes of the string. 60 pub(crate) prefix: [u8; PREFIX_LENGTH], 61 /// The data (see [`Data`]). 62 /// 63 /// If the string is at most [`MAX_LENGTH_SMALL`] bytes, this holds the 64 /// other bytes of the string, else this is a pointer to the heap. 65 pub(crate) u: Data, 66} 67 68 69// Invariants: [`TokenString`] must be aligned to 64 bits and its size must be 70// 128 bits. That means that `sizeof len + prefix == 64 bit` and 71// `sizeof u == 64 bit`. So there is no padding. 72 73const _: () = assert!( 74 mem::align_of::<TokenString>() == mem::size_of::<u64>(), 75 "struct TokenString is not aligned to 64 bits!" 76); 77const _: () = assert!( 78 mem::size_of::<TokenString>() == 2 * mem::size_of::<u64>(), 79 "struct TokenString has size != 128 bits" 80); 81const _: () = assert!( 82 mem::align_of::<Data>() == mem::size_of::<u64>(), 83 "struct Data is not aligned to 64 bits!" 84); 85const _: () = assert!( 86 mem::size_of::<Data>() == mem::size_of::<u64>(), 87 "union Data has size != 64 bits" 88); 89 90// ============================================================================= 91// Inner types of `TokenString`. 92 93/// This is either a pointer to the string, if the string is bigger than 94/// [`SMALL_DATA_LENGTH`] bytes, or a pointer to a string as an array of bytes. 95/// 96/// See [`StringPtr`] 97#[repr(C)] 98pub union Data { 99 /// If the string is small enough (at most [`MAX_LENGTH_SMALL`]), its data 100 /// after the prefix is here. 101 pub(crate) small: [u8; SMALL_DATA_LENGTH], 102 /// For bigger strings as [`MAX_LENGTH_SMALL`], this points to the memory 103 /// holding the whole string. 104 pub(crate) ptr: mem::ManuallyDrop<StringPtr>, 105} 106 107// ============================================================================= 108// `TokenString` itself 109 110/// The empty string. 111/// 112/// Has a length of zero. 113pub const EMPTY: TokenString = TokenString { 114 len: 0, 115 prefix: [0_u8; PREFIX_LENGTH], 116 u: Data { 117 small: [0_u8; SMALL_DATA_LENGTH], 118 }, 119}; 120 121// ============================================================================= 122// Traits 123 124impl TryFrom<&str> for TokenString { 125 type Error = TkStrError; 126 127 /// Create a [`TokenString`] from a &[`prim@str`]. 128 /// 129 /// Return [`TkStrError::TooBig`] if the argument is greater than 130 /// [`MAX_LENGTH`]. 131 /// 132 /// Memory: 133 /// 134 /// Allocates if and only if the length of `value` is bigger than 135 /// [`MAX_LENGTH_SMALL`]. 136 fn try_from(value: &str) -> Result<Self, Self::Error> { 137 let bytes = value.as_bytes(); 138 match value.len() { 139 | 0 => Ok(Self { 140 len: 0, 141 prefix: [0_u8; PREFIX_LENGTH], 142 u: Data { 143 small: [0_u8; SMALL_DATA_LENGTH], 144 }, 145 }), 146 | 1 ..= PREFIX_LENGTH => { 147 let s = value.len(); 148 let mut prefix = [0_u8; PREFIX_LENGTH]; 149 prefix[.. s].copy_from_slice(&bytes[.. s]); 150 Ok(Self { 151 #[expect( 152 clippy::cast_possible_truncation, 153 reason = "Length has been checked above" 154 )] 155 len: s as u16, 156 prefix, 157 u: Data { 158 small: [0_u8; SMALL_DATA_LENGTH], 159 }, 160 }) 161 } 162 | PREFIX_LENGTH_ADD1 ..= MAX_LENGTH_SMALL => { 163 let s = value.len(); 164 let mut prefix = [0_u8; PREFIX_LENGTH]; 165 prefix.copy_from_slice(&bytes[.. PREFIX_LENGTH]); 166 let mut small = [0_u8; SMALL_DATA_LENGTH]; 167 small[.. s - PREFIX_LENGTH] 168 .copy_from_slice(&bytes[PREFIX_LENGTH .. s]); 169 Ok(Self { 170 #[expect( 171 clippy::cast_possible_truncation, 172 reason = "Length has been checked above" 173 )] 174 len: s as u16, 175 prefix, 176 u: Data { small }, 177 }) 178 } 179 | MAX_LENGTH_SMALL_ADD1 ..= MAX_LENGTH => { 180 let ptr = StringPtr::from(bytes); 181 let u = Data { 182 ptr: mem::ManuallyDrop::new(ptr), 183 }; 184 let mut prefix = [0_u8; PREFIX_LENGTH]; 185 prefix.copy_from_slice(&bytes[.. PREFIX_LENGTH]); 186 Ok(Self { 187 #[expect( 188 clippy::cast_possible_truncation, 189 reason = "Length has been checked above" 190 )] 191 len: value.len() as u16, 192 prefix, 193 u, 194 }) 195 } 196 | _ => Err(TkStrError::TooBig(value.len())), 197 } 198 } 199} 200 201impl TryFrom<&[u8]> for TokenString { 202 type Error = TkStrError; 203 204 /// Try to create a [`TokenString`] from the given slice. 205 /// 206 /// Return [`TkStrError::TooBig`] if the given slice is too big, greater 207 /// than [`MAX_LENGTH`]. 208 /// Return [`TkStrError::UnicodeError`] 209 /// 210 /// Memory: 211 /// 212 /// Allocates if and only if the length of `value` is bigger than 213 /// [`MAX_LENGTH_SMALL`]. 214 #[inline] 215 fn try_from(value: &[u8]) -> Result<Self, Self::Error> { 216 match str::from_utf8(value) { 217 | Ok(str) => Self::try_from(str), 218 | Err(utf_err) => Err(TkStrError::UnicodeError(utf_err)), 219 } 220 } 221} 222 223impl TryFrom<&[char]> for TokenString { 224 type Error = TkStrError; 225 226 /// Try to create a [`TokenString`] from the given slice. 227 /// 228 /// Return [`TkStrError::TooBig`] if the given slice is too big, greater 229 /// than [`MAX_LENGTH`]. 230 /// 231 /// Memory 232 /// 233 /// Allocates and deallocates a temporary [`alloc::string::String`] 234 /// collecting the converted bytes. 235 #[inline] 236 fn try_from(value: &[char]) -> Result<Self, Self::Error> { 237 let i = value.iter(); 238 Self::try_from(i.collect::<alloc::string::String>()) 239 } 240} 241 242impl TryFrom<&alloc::string::String> for TokenString { 243 type Error = TkStrError; 244 245 /// Create a `TokenString` from a &[`alloc::string::String`]. 246 /// 247 /// Return [`TkStrError::TooBig`] if the argument is greater than 248 /// [`MAX_LENGTH`]. 249 /// 250 /// Memory: 251 /// 252 /// Allocates if and only if the length of `value` is bigger than 253 /// [`MAX_LENGTH_SMALL`]. 254 #[inline] 255 fn try_from(value: &alloc::string::String) -> Result<Self, Self::Error> { 256 let str = value.as_str(); 257 Self::try_from(str) 258 } 259} 260 261impl TryFrom<alloc::string::String> for TokenString { 262 type Error = TkStrError; 263 264 /// Create a [`TokenString`] from a [`alloc::string::String`]. 265 /// 266 /// Return [`TkStrError::TooBig`] if the argument is greater than 267 /// [`MAX_LENGTH`]. 268 /// 269 /// Memory: 270 /// 271 /// Allocates if and only if the length of `value` is bigger than 272 /// [`MAX_LENGTH_SMALL`]. 273 #[inline] 274 fn try_from(value: alloc::string::String) -> Result<Self, Self::Error> { 275 // Sadly we can't use the string's data directly, as a [`String`] has a 276 // capacity which is to be known when deallocating the data. 277 // See [`String::into_raw_parts`]. 278 let str = value.as_str(); 279 Self::try_from(str) 280 } 281} 282 283impl Drop for TokenString { 284 #[cfg_attr(test, mutants::skip)] 285 #[inline] 286 fn drop(&mut self) { 287 if usize::from(self.len) > MAX_LENGTH_SMALL { 288 // SAFETY: 289 // We know that there is a pointer saved in the union. 290 // The whole string is being dropped, so taking a mutable 291 // reference of the pointer is legal. 292 let mut m_ptr = unsafe { mem::ManuallyDrop::take(&mut self.u.ptr) }; 293 m_ptr.drop_manually(self.len.into()); 294 } 295 } 296} 297 298impl Clone for TokenString { 299 /// Return a clone of the [`TokenString`]. 300 /// 301 /// Memory: 302 /// 303 /// Allocates if and only if the length of `value` is bigger than 304 /// [`MAX_LENGTH_SMALL`]. 305 #[inline] 306 fn clone(&self) -> Self { 307 let u = if self.len as usize > MAX_LENGTH_SMALL { 308 Data { 309 // SAFETY: 310 // We check, that there is an allocated pointer saved in the 311 // union. 312 ptr: mem::ManuallyDrop::new(unsafe { 313 self.u.ptr.clone_manually(self.len.into()) 314 }), 315 } 316 } else { 317 Data { 318 // SAFETY: 319 // We check, that there is a small string in the union. 320 small: unsafe { self.u.small }, 321 } 322 }; 323 Self { 324 len: self.len, 325 prefix: self.prefix, 326 u, 327 } 328 } 329} 330 331impl Default for TokenString { 332 /// Return the empty string. 333 #[inline] 334 fn default() -> Self { 335 EMPTY 336 } 337} 338 339impl Eq for TokenString {} 340 341impl PartialEq for TokenString { 342 #[inline] 343 fn eq(&self, other: &Self) -> bool { 344 if self.len != other.len || self.prefix != other.prefix { 345 return false; 346 } 347 348 if self.len as usize <= MAX_LENGTH_SMALL { 349 // SAFETY: 350 // We know we have two small strings to compare. 351 unsafe { self.u.small == other.u.small } 352 } else { 353 // SAFETY: 354 // We know we have two string pointers to compare. 355 unsafe { self.u.ptr.eq_manually(&other.u.ptr, self.len.into()) } 356 } 357 } 358} 359 360impl PartialEq<[u8]> for TokenString { 361 fn eq(&self, other: &[u8]) -> bool { 362 if self.len as usize != other.len() { 363 return false; 364 } 365 let len = self.len as usize; 366 match len { 367 | 0 => true, 368 | 1 ..= PREFIX_LENGTH => self.prefix[.. len] == other[.. len], 369 | PREFIX_LENGTH_ADD1 ..= MAX_LENGTH_SMALL => { 370 // SAFETY: 371 // Use the whole memory region of self.`prefix` and 372 // `self.u.small` as a single array. This is not UB, as the 373 // whole memory `TokenString` has been allocated at once and 374 // is guaranteed to be continuous in memory. If Miri 375 // complains about this, use the flag `MIRIFLAGS=" 376 // -Zmiri-tree-borrows"` to use "tree borrows" instead of 377 // "stacked borrows". 378 let bytes = 379 unsafe { slice::from_raw_parts(self.prefix.as_ptr(), len) }; 380 bytes == other 381 } 382 // SAFETY: 383 // We know that the pointer actually points to allocated memory. 384 | MAX_LENGTH_SMALL_ADD1 ..= MAX_LENGTH => unsafe { 385 self.u.ptr.as_slice_manually(len) == other 386 }, 387 | _ => panic!("The TokenString is bigger than MAX_LENGTH!"), 388 } 389 } 390} 391 392impl PartialEq<str> for TokenString { 393 #[inline] 394 fn eq(&self, other: &str) -> bool { 395 self == other.as_bytes() 396 } 397} 398 399impl PartialEq<alloc::string::String> for TokenString { 400 #[inline] 401 fn eq(&self, other: &alloc::string::String) -> bool { 402 self == other.as_bytes() 403 } 404} 405 406 407impl Ord for TokenString { 408 /// Compare two [`TokenString`]s byte-wise. 409 /// 410 /// This is not a sensible alphabetical comparison for anything that isn't 411 /// ASCII. 412 #[inline] 413 fn cmp(&self, other: &Self) -> cmp::Ordering { 414 let pref_ord = self.prefix.cmp(&other.prefix); 415 if pref_ord != cmp::Ordering::Equal { 416 return pref_ord; 417 } 418 419 self.suffix().cmp(other.suffix()) 420 } 421} 422 423impl PartialOrd for TokenString { 424 /// Compare two [`TokenString`]s byte-wise. 425 /// 426 /// This is not a sensible alphabetical comparison for anything that isn't 427 /// ASCII. 428 #[inline] 429 fn partial_cmp(&self, other: &Self) -> Option<cmp::Ordering> { 430 Some(self.cmp(other)) 431 } 432} 433 434impl fmt::Display for TokenString { 435 #[inline] 436 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 437 write!(f, "{}", self.as_str()) 438 } 439} 440 441impl fmt::Debug for TokenString { 442 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 443 if self.len as usize > MAX_LENGTH_SMALL { 444 let string = 445 // SAFETY: 446 // We know that the pointer points to a string. 447 unsafe { self.u.ptr.as_string_manually(self.len.into()) }; 448 // SAFETY: 449 // We know that the pointer points to a string. 450 let ptr = unsafe { &self.u.ptr }; 451 f.debug_struct("TokenString") 452 .field("len", &self.len) 453 .field("prefix", &self.prefix_str()) 454 .field("ptr", ptr) 455 .field("string", &string) 456 .finish() 457 } else { 458 // SAFETY: 459 // We've checked that this is a small string. 460 unsafe { 461 f.debug_struct("TokenString") 462 .field("len", &self.len) 463 .field("prefix", &self.prefix_str()) 464 .field("small", &self.small_str()) 465 .field("string", &self.as_str()) 466 .finish() 467 } 468 } 469 } 470} 471 472impl<Idx> ops::Index<Idx> for TokenString 473where 474 Idx: slice::SliceIndex<str>, 475{ 476 type Output = Idx::Output; 477 478 #[inline] 479 fn index(&self, index: Idx) -> &Self::Output { 480 self.as_str().index(index) 481 } 482} 483 484impl borrow::Borrow<str> for TokenString { 485 #[inline] 486 fn borrow(&self) -> &str { 487 self.as_str() 488 } 489} 490 491impl AsRef<str> for TokenString { 492 #[inline] 493 fn as_ref(&self) -> &str { 494 self.as_str() 495 } 496} 497 498impl hash::Hash for TokenString { 499 #[inline] 500 fn hash<H: core::hash::Hasher>(&self, state: &mut H) { 501 self.as_str().hash(state); 502 } 503} 504 505// SAFETY: 506// There can be no shared references of a `TokenString`. 507unsafe impl Send for TokenString {} 508 509// SAFETY: 510// `TokenString` is immutable. 511unsafe impl Sync for TokenString {} 512 513// ============================================================================= 514// Non trait methods 515 516impl TokenString { 517 /// Return the prefix as a `&[u8]`. 518 fn prefix_str(&self) -> &[u8] { 519 let l = cmp::min(self.len as usize, PREFIX_LENGTH); 520 &self.prefix[.. l] 521 } 522 523 /// Return the suffix of a small string as a `&[u8]`. 524 /// 525 /// # Safety 526 /// 527 /// Must be called with a small string only! 528 unsafe fn small_str(&self) -> &[u8] { 529 let l = (self.len as usize).saturating_sub(PREFIX_LENGTH); 530 // SAFETY: 531 // We know that the union contains a small string. 532 unsafe { &self.u.small[.. l] } 533 } 534 535 /// Return the length of the string in bytes. 536 /// 537 /// This is the length of the string in bytes, not Unicode scalar values and 538 /// not grapheme clusters. 539 #[must_use] 540 #[inline] 541 pub const fn len(&self) -> usize { 542 self.len as usize 543 } 544 545 /// Return `true` if the string is a "small string", that is, it is saved in 546 /// the [`TokenString`] struct itself. 547 /// 548 /// If this returns `false`, the string is allocated on the heap. 549 #[must_use] 550 #[inline] 551 pub const fn is_small(&self) -> bool { 552 self.len as usize <= MAX_LENGTH_SMALL 553 } 554 555 /// Return `true`, if this is the empty string. 556 /// 557 /// Returns `false` else. 558 #[must_use] 559 #[inline] 560 pub const fn is_empty(&self) -> bool { 561 self.len == 0 562 } 563 564 /// Convert to a [`TokenString`]. 565 /// 566 /// `bytes` must be valid UTF-8, use [`TokenString::try_from`] if you are 567 /// not sure that it is valid. If the given byte slice is bigger than 568 /// [`MAX_LENGTH`], this panics. 569 /// 570 /// Memory: 571 /// 572 /// Allocates if and only if the length of `bytes` is bigger than 573 /// [`MAX_LENGTH_SMALL`]. 574 /// 575 /// # Panics 576 /// 577 /// Panics if `bytes` is bigger than [`MAX_LENGTH`]. 578 /// 579 /// # Safety 580 /// 581 /// `bytes` must be valid UTF-8, if not, all bets are off - UB! 582 #[must_use] 583 pub unsafe fn from_bytes_unchecked(bytes: &[u8]) -> Self { 584 match bytes.len() { 585 | 0 => Self { 586 len: 0, 587 prefix: [0_u8; PREFIX_LENGTH], 588 u: Data { 589 small: [0_u8; SMALL_DATA_LENGTH], 590 }, 591 }, 592 | 1 ..= PREFIX_LENGTH => { 593 let s = bytes.len(); 594 let mut prefix = [0_u8; PREFIX_LENGTH]; 595 prefix[.. s].copy_from_slice(&bytes[.. s]); 596 Self { 597 #[expect( 598 clippy::cast_possible_truncation, 599 reason = "Length has been checked above" 600 )] 601 len: s as u16, 602 prefix, 603 u: Data { 604 small: [0_u8; SMALL_DATA_LENGTH], 605 }, 606 } 607 } 608 | PREFIX_LENGTH_ADD1 ..= MAX_LENGTH_SMALL => { 609 let s = bytes.len(); 610 let mut prefix = [0_u8; PREFIX_LENGTH]; 611 prefix.copy_from_slice(&bytes[.. PREFIX_LENGTH]); 612 let mut small = [0_u8; SMALL_DATA_LENGTH]; 613 small[.. s - PREFIX_LENGTH] 614 .copy_from_slice(&bytes[PREFIX_LENGTH .. s]); 615 Self { 616 #[expect( 617 clippy::cast_possible_truncation, 618 reason = "Length has been checked above" 619 )] 620 len: s as u16, 621 prefix, 622 u: Data { small }, 623 } 624 } 625 | MAX_LENGTH_SMALL_ADD1 ..= MAX_LENGTH => { 626 let ptr = StringPtr::from(bytes); 627 let u = Data { 628 ptr: mem::ManuallyDrop::new(ptr), 629 }; 630 let mut prefix = [0_u8; PREFIX_LENGTH]; 631 prefix.copy_from_slice(&bytes[.. PREFIX_LENGTH]); 632 Self { 633 #[expect( 634 clippy::cast_possible_truncation, 635 reason = "Length has been checked above" 636 )] 637 len: bytes.len() as u16, 638 prefix, 639 u, 640 } 641 } 642 | _ => panic!( 643 "This byte slice is too big for a TokenString, {} > \ 644 {MAX_LENGTH}", 645 bytes.len() 646 ), 647 } 648 } 649 650 /// Convert to a [`TokenString`]. 651 /// 652 /// If the given string `s` is bigger than [`MAX_LENGTH`], this panics. Use 653 /// [`TokenString::try_from`] for a function that does not panic. The string 654 /// `s` must be valid UTF-8 too, but it has already been UB if it isn't. 655 /// 656 /// Memory: 657 /// 658 /// Allocates if and only if the length of `s` is bigger than 659 /// [`MAX_LENGTH_SMALL`]. 660 /// 661 /// # Panics 662 /// 663 /// Panics if `s` is bigger than [`MAX_LENGTH`]. 664 #[must_use] 665 #[inline] 666 pub fn from_str_unchecked(s: &str) -> Self { 667 // SAFETY: 668 // The unsafe part of `from_bytes_unchecked` is the possibility of the 669 // byte slice not being valid UTF-8. We are processing an UTF-8 string 670 // here. 671 unsafe { Self::from_bytes_unchecked(s.as_bytes()) } 672 } 673 674 /// Convert to a [`TokenString`]. 675 /// 676 /// If the given string `s` is bigger than [`MAX_LENGTH`], this panics. Use 677 /// [`TokenString::try_from`] for a function that does not panic. The string 678 /// `s` must be valid UTF-8 too, but it has already been UB if it isn't. 679 /// 680 /// Memory: 681 /// 682 /// Allocates if and only if the length of `s` is bigger than 683 /// [`MAX_LENGTH_SMALL`]. 684 /// 685 /// # Panics 686 /// 687 /// Panics if `s` is bigger than [`MAX_LENGTH`]. 688 #[must_use] 689 #[inline] 690 pub fn from_string_unchecked(s: &alloc::string::String) -> Self { 691 // SAFETY: 692 // The unsafe part of `from_bytes_unchecked` is the possibility of the 693 // byte slice not being valid UTF-8. We are processing an UTF-8 string 694 // here. 695 unsafe { Self::from_bytes_unchecked(s.as_bytes()) } 696 } 697 698 /// Return the string as a &[`prim@str`]. 699 #[must_use] 700 #[inline] 701 pub fn as_str(&self) -> &str { 702 if self.len == 0 { 703 "" 704 } else if self.len as usize > MAX_LENGTH_SMALL { 705 // SAFETY: 706 // We know, that in the union must be a valid pointer. 707 unsafe { self.u.ptr.as_string_manually(self.len.into()) } 708 } else { 709 // SAFETY: 710 // Use the whole memory region of self.`prefix` and `self.u.small` 711 // as a single array. This is not UB, as the whole memory 712 // `TokenString` has been allocated at once and is guaranteed to be 713 // continuous in memory. If Miri complains about this, use the 714 // flag `MIRIFLAGS="-Zmiri-tree-borrows"` to use "tree borrows" 715 // instead of "stacked borrows". 716 let bytes = unsafe { 717 slice::from_raw_parts(self.prefix.as_ptr(), self.len.into()) 718 }; 719 // SAFETY: 720 // The precondition of `TokenString` is that the string is a valid 721 // UTF-8 byte sequence. 722 unsafe { str::from_utf8_unchecked(bytes) } 723 } 724 } 725 726 /// Return the string as a byte slice. 727 #[must_use] 728 #[inline] 729 pub fn as_bytes(&self) -> &[u8] { 730 if self.len == 0 { 731 Default::default() 732 } else if self.len as usize > MAX_LENGTH_SMALL { 733 // SAFETY: 734 // We know, that in the union must be a valid pointer. 735 unsafe { self.u.ptr.as_slice_manually(self.len.into()) } 736 } else { 737 // SAFETY: 738 // Use the whole memory region of self.`prefix` and `self.u.small` 739 // as a single array. This is not UB, as the whole memory 740 // `TokenString` has been allocated at once and is guaranteed to be 741 // continuous in memory. If Miri complains about this, use the 742 // flag `MIRIFLAGS="-Zmiri-tree-borrows"` to use "tree borrows" 743 // instead of "stacked borrows". 744 unsafe { 745 slice::from_raw_parts(self.prefix.as_ptr(), self.len.into()) 746 } 747 } 748 } 749 750 /// Return the string as a new [`alloc::string::String`]. 751 /// 752 /// Memory: 753 /// 754 /// Allocates a new [`alloc::string::String`]. 755 #[must_use] 756 #[inline] 757 pub fn as_string(&self) -> alloc::string::String { 758 self.to_string() 759 } 760 761 /// Return the string as a new vector of [`char`]s. 762 /// 763 /// Memory: 764 /// 765 /// Allocates a new [`vec::Vec`]. 766 #[must_use] 767 #[inline] 768 pub fn as_chars(&self) -> vec::Vec<char> { 769 self.as_str().chars().collect() 770 } 771 772 /// Return the part of the string which is not stored in `self.prefix`. 773 /// 774 /// If the string is <= [`PREFIX_LENGTH`], the empty slice is returned. 775 fn suffix(&self) -> &[u8] { 776 match self.len as usize { 777 | 0 ..= PREFIX_LENGTH => Default::default(), 778 | PREFIX_LENGTH_ADD1 ..= MAX_LENGTH_SMALL => 779 // SAFETY: 780 // We checked and know that this is a small string. 781 unsafe { &self.u.small }, 782 | MAX_LENGTH_SMALL_ADD1 ..= MAX_LENGTH => 783 // SAFETY: 784 // We checked and know that this string is allocated on the heap. 785 unsafe { 786 &self.u.ptr.as_slice_manually(self.len.into())[PREFIX_LENGTH ..] 787 }, 788 | _ => panic!( 789 "Error: this TokenString is bigger than \ 790 TokenString::MAX_LENGTH!" 791 ), 792 } 793 } 794 795 /// Return the byte at index `idx`, check bounds. 796 /// 797 /// Returns [`TkStrError::OutOfBounds`] if the index is bigger than the 798 /// string's length. 799 /// 800 /// # Errors 801 /// [`TkStrError::OutOfBounds`] if `idx` is bigger than the string's length. 802 #[inline] 803 pub fn get(&self, idx: u16) -> Result<u8, TkStrError> { 804 if idx >= self.len { 805 return Err(TkStrError::OutOfBounds(idx as usize)); 806 } 807 // SAFETY: 808 // We check above that the index is in bounds. 809 unsafe { Ok(*self.as_bytes().get_unchecked(idx as usize)) } 810 } 811 812 /// Return the byte at index `idx`, don't check bounds. 813 /// 814 /// Panics if the index is bigger than the 815 /// string's length. 816 /// 817 /// # Panics 818 /// 819 /// if `idx` is bigger than the string's length. 820 #[must_use] 821 #[inline] 822 pub fn get_unchecked(&self, idx: u16) -> u8 { 823 assert!((idx < self.len), "index {idx} out of bounds"); 824 // SAFETY: 825 // We check above that the index is in bounds. 826 unsafe { *self.as_bytes().get_unchecked(idx as usize) } 827 } 828 829 /// Return an iterator over the `[char]`s of a string. 830 /// 831 /// That is, an iterator over the Unicode scalar values of the 832 /// `TokenString`. 833 #[inline] 834 pub fn chars(&'_ self) -> str::Chars<'_> { 835 self.as_str().chars() 836 } 837 838 /// Get a reference iterator. 839 #[must_use] 840 #[inline] 841 pub fn iter(&self) -> TokenStringIter<'_> { 842 <&Self as IntoIterator>::into_iter(self) 843 } 844 845 /// Return `true`, if the first byte is an uppercase ASCII character. 846 #[must_use] 847 #[inline] 848 pub const fn starts_ascii_uppercase(&self) -> bool { 849 self.prefix[0].is_ascii_uppercase() 850 } 851 852 /// Return `true`, if the first byte is an lowercase ASCII character. 853 #[must_use] 854 #[inline] 855 pub const fn starts_ascii_lowercase(&self) -> bool { 856 self.prefix[0].is_ascii_lowercase() 857 } 858 859 /// Return `true`, if the string contains only ASCII characters. 860 #[must_use] 861 #[inline] 862 pub fn is_ascii(&self) -> bool { 863 self.as_bytes().is_ascii() 864 } 865 866 /// Return `true`, if the string starts with `needle`. 867 /// 868 /// Returns `true` too if the string is `needle`. 869 #[must_use] 870 #[inline] 871 pub fn starts_with(&self, needle: &Self) -> bool { 872 self.as_bytes().starts_with(needle.as_bytes()) 873 } 874 875 /// Return `true`, if the string starts with `needle`. 876 /// 877 /// Returns `true` too if the string is `needle`. 878 #[must_use] 879 #[inline] 880 pub fn starts_with_bytes(&self, needle: &[u8]) -> bool { 881 self.as_bytes().starts_with(needle) 882 } 883 884 /// Return `true`, if the string starts with `needle`. 885 /// 886 /// Returns `true` too if the string is `needle`. 887 #[must_use] 888 #[inline] 889 pub fn starts_with_str(&self, needle: &str) -> bool { 890 self.as_str().starts_with(needle) 891 } 892 893 /// Return `true`, if the string ends with `needle`. 894 /// 895 /// Returns `true` too if the string is `needle`. 896 #[must_use] 897 #[inline] 898 pub fn ends_with(&self, needle: &Self) -> bool { 899 self.as_bytes().ends_with(needle.as_bytes()) 900 } 901 902 /// Return `true`, if the string ends with `needle`. 903 /// 904 /// Returns `true` too if the string is `needle`. 905 #[must_use] 906 #[inline] 907 pub fn ends_with_bytes(&self, needle: &[u8]) -> bool { 908 self.as_bytes().ends_with(needle) 909 } 910 911 /// Return `true`, if the string ends with `needle`. 912 /// 913 /// Returns `true` too if the string is `needle`. 914 #[must_use] 915 #[inline] 916 pub fn ends_with_str(&self, needle: &str) -> bool { 917 self.as_str().ends_with(needle) 918 } 919 920 /// Map the given function `f` over the bytes of the string, mutating it. 921 fn map_bytes_mut(&mut self, f: fn(&mut [u8]) -> ()) { 922 if self.len as usize > MAX_LENGTH_SMALL { 923 // SAFETY: 924 // We check, that we actually have a valid pointer. 925 unsafe { 926 f((*self.u.ptr).as_slice_manually_mut(self.len as usize)); 927 } 928 } else { 929 // SAFETY: 930 // The two arrays, `prefix` and `small`, are guaranteed to be 931 // continuous in memory. 932 unsafe { 933 f(slice::from_raw_parts_mut( 934 self.prefix.as_mut_ptr(), 935 self.len as usize, 936 )); 937 } 938 } 939 } 940 941 /// Return a new string with all uppercase ASCII characters changed to 942 /// lowercase. 943 #[must_use] 944 #[inline] 945 pub fn to_ascii_lowercase(&self) -> Self { 946 let mut ret_val = self.clone(); 947 ret_val.map_bytes_mut(<[u8]>::make_ascii_lowercase); 948 ret_val 949 } 950 951 /// Return a new string with all lowercase ASCII characters changed to 952 /// uppercase. 953 #[must_use] 954 #[inline] 955 pub fn to_ascii_uppercase(&self) -> Self { 956 let mut ret_val = self.clone(); 957 ret_val.map_bytes_mut(<[u8]>::make_ascii_uppercase); 958 ret_val 959 } 960 961 /// Return a new string with all ASCII whitespace removed from the start and 962 /// end. 963 #[must_use] 964 #[inline] 965 pub fn trim_ascii(&self) -> Self { 966 // SAFETY: 967 // We copy the current string, so the invariants should hold for the 968 // copy too. The string does not get longer, so cannot be greater than 969 // `MAX_LENGTH`. 970 unsafe { Self::from_bytes_unchecked(self.as_bytes().trim_ascii()) } 971 } 972 973 /// Return a new string with all ASCII whitespace removed from the start. 974 #[must_use] 975 #[inline] 976 pub fn trim_ascii_start(&self) -> Self { 977 // SAFETY: 978 // We copy the current string, so the invariants should hold for the 979 // copy too: 980 // - The string does not get longer, so cannot be greater than 981 // `MAX_LENGTH`. 982 // - if the string is valid UTF-8, removing ASCII characters does not 983 // change that. 984 unsafe { 985 Self::from_bytes_unchecked(self.as_bytes().trim_ascii_start()) 986 } 987 } 988 989 /// Return a new string with all ASCII whitespace removed from the end. 990 #[must_use] 991 #[inline] 992 pub fn trim_ascii_end(&self) -> Self { 993 // SAFETY: 994 // We copy the current string, so the invariants should hold for the 995 // copy too: 996 // - The string does not get longer, so cannot be greater than 997 // `MAX_LENGTH`. 998 // - if the string is valid UTF-8, removing ASCII characters does not 999 // change that. 1000 unsafe { Self::from_bytes_unchecked(self.as_bytes().trim_ascii_end()) } 1001 } 1002 1003 /// Return a new string with `prefix` removed from the start. 1004 #[cfg(feature = "pattern")] 1005 #[doc(cfg(pattern))] 1006 #[inline] 1007 pub fn strip_prefix<P: str::pattern::Pattern>( 1008 &self, 1009 prefix: P, 1010 ) -> Option<Self> { 1011 self.as_str() 1012 .strip_prefix(prefix) 1013 // stripping a prefix should not make the string invalid UTF-8, and 1014 // does shorten it. 1015 .map(Self::from_str_unchecked) 1016 } 1017 1018 /// Return a new string with `suffix` removed from the end. 1019 #[cfg(feature = "pattern")] 1020 #[doc(cfg(pattern))] 1021 #[inline] 1022 pub fn strip_suffix<P>(&self, suffix: P) -> Option<Self> 1023 where 1024 P: str::pattern::Pattern, 1025 for<'a> P::Searcher<'a>: str::pattern::ReverseSearcher<'a>, 1026 { 1027 self.as_str() 1028 .strip_suffix(suffix) 1029 // stripping a suffix should not make the string invalid UTF-8, and 1030 // does shorten it. 1031 .map(Self::from_str_unchecked) 1032 } 1033 1034 /// Return `true` if the string contains the pattern `pat`. 1035 /// 1036 /// Returns `false` else. 1037 /// 1038 /// The feature 1039 #[cfg(feature = "pattern")] 1040 #[doc(cfg(pattern))] 1041 #[inline] 1042 pub fn contains<P: str::pattern::Pattern>(&self, pat: P) -> bool { 1043 self.as_str().contains(pat) 1044 } 1045} 1046 1047 1048//============================================================================== 1049// Iterating by reference 1050 1051/// Iterator struct for a `&TokenString`. 1052/// 1053/// Iterator items are single bytes, `u8`. 1054pub struct TokenStringIter<'a> { 1055 /// The [`TokenString`] to iterate over. 1056 string: &'a TokenString, 1057 /// The current index in the string. 1058 idx: usize, 1059} 1060 1061impl<'a> TokenStringIter<'a> { 1062 /// Generate a reference iterator for the given [`TokenString`]. 1063 #[must_use] 1064 #[inline] 1065 pub const fn new(s: &'a TokenString) -> Self { 1066 TokenStringIter { string: s, idx: 0 } 1067 } 1068} 1069 1070impl Iterator for TokenStringIter<'_> { 1071 type Item = u8; 1072 1073 /// Return either the next byte, [`u8`], or [`None`] if we are at the end of 1074 /// the string. 1075 fn next(&mut self) -> Option<Self::Item> { 1076 debug_assert!( 1077 self.idx <= self.string.len.into(), 1078 "The iterator index '{0}' is greater than the string length '{1}'!", 1079 self.idx, 1080 self.string.len 1081 ); 1082 if self.idx == self.string.len.into() { 1083 None 1084 } else if self.string.len as usize > MAX_LENGTH_SMALL { 1085 self.idx += 1; 1086 Some(self.string.as_bytes()[self.idx - 1]) 1087 } else { 1088 self.idx += 1; 1089 Some( 1090 // SAFETY: 1091 // The two arrays, `prefix` and `u.small`, are guaranteed to be 1092 // consecutive in memory and allocated at the same time. 1093 unsafe { 1094 slice::from_raw_parts( 1095 self.string.prefix.as_ptr(), 1096 self.string.len as usize, 1097 ) 1098 }[self.idx - 1], 1099 ) 1100 } 1101 } 1102} 1103 1104impl<'a> IntoIterator for &'a TokenString { 1105 type IntoIter = TokenStringIter<'a>; 1106 type Item = u8; 1107 1108 #[inline] 1109 fn into_iter(self) -> Self::IntoIter { 1110 Self::IntoIter::new(self) 1111 } 1112} 1113 1114//============================================================================== 1115// Iterating an owned `TokenString`. 1116 1117/// Iterator struct for an owned [`TokenString`]. 1118/// 1119/// Iterator items are single bytes, [`u8`]. 1120pub struct TokenStringIterOwn { 1121 /// The [`TokenString`] to iterate over. 1122 string: TokenString, 1123 /// The current index in the string. 1124 idx: usize, 1125} 1126 1127impl TokenStringIterOwn { 1128 /// Generate an owned iterator for the given [`TokenString`]. 1129 #[must_use] 1130 #[inline] 1131 pub const fn new(s: TokenString) -> Self { 1132 Self { string: s, idx: 0 } 1133 } 1134} 1135 1136impl Iterator for TokenStringIterOwn { 1137 type Item = u8; 1138 1139 /// Return either the next byte, [`u8`], or [`None`] if we are at the end of 1140 /// the string. 1141 fn next(&mut self) -> Option<Self::Item> { 1142 debug_assert!( 1143 self.idx <= self.string.len.into(), 1144 "The iterator index '{0}' is greater than the string length '{1}'!", 1145 self.idx, 1146 self.string.len 1147 ); 1148 if self.idx == self.string.len.into() { 1149 None 1150 } else if self.string.len as usize > MAX_LENGTH_SMALL { 1151 self.idx += 1; 1152 Some(self.string.as_bytes()[self.idx - 1]) 1153 } else { 1154 self.idx += 1; 1155 Some( 1156 // SAFETY: 1157 // The two arrays, `prefix` and `u.small`, are guaranteed to be 1158 // consecutive in memory and allocated at the same time. 1159 unsafe { 1160 slice::from_raw_parts( 1161 self.string.prefix.as_ptr(), 1162 self.string.len as usize, 1163 ) 1164 }[self.idx - 1], 1165 ) 1166 } 1167 } 1168} 1169 1170impl IntoIterator for TokenString { 1171 type IntoIter = TokenStringIterOwn; 1172 type Item = u8; 1173 1174 #[inline] 1175 fn into_iter(self) -> Self::IntoIter { 1176 Self::IntoIter::new(self) 1177 } 1178} 1179 1180 1181// ============================================================================= 1182// Tests 1183// ============================================================================= 1184 1185#[cfg(test)] 1186mod prefix { 1187 extern crate std; 1188 use assert2::{check, let_assert}; 1189 1190 use crate::TokenString; 1191 1192 1193 #[test] 1194 fn empty_is_empty() { 1195 let_assert!(Ok(res) = TokenString::try_from("")); 1196 check!(res.prefix[0] == 0); 1197 check!(res.len == 0); 1198 check!(res.is_small() == true); 1199 } 1200 1201 #[test] 1202 fn clone_empty() { 1203 let_assert!(Ok(s1) = TokenString::try_from("")); 1204 let res = s1.clone(); 1205 check!(res.prefix[0] == s1.prefix[0]); 1206 check!(res.len == s1.len); 1207 check!(res.is_small() == true); 1208 } 1209 1210 #[test] 1211 fn try_from_str() { 1212 let_assert!(Ok(res) = TokenString::try_from("123456")); 1213 check!(&res.prefix[0 .. 6] == b"123456"); 1214 check!(res.len == 6); 1215 check!(res.is_small() == true); 1216 } 1217 1218 #[test] 1219 fn clone() { 1220 let_assert!(Ok(s1) = TokenString::try_from("123456")); 1221 let res = s1.clone(); 1222 check!(&res.prefix[0 .. 6] == &s1.prefix[0 .. 6]); 1223 check!(res.len == s1.len); 1224 check!(res.is_small() == true); 1225 } 1226 1227 #[test] 1228 fn try_from_bytes() { 1229 let s1: &[u8] = b"123456"; 1230 let_assert!(Ok(res) = TokenString::try_from(s1)); 1231 check!(&res.prefix[0 .. 6] == b"123456"); 1232 check!(res.len == 6); 1233 check!(res.is_small() == true); 1234 } 1235 1236 #[test] 1237 fn try_from_chars() { 1238 #[expect( 1239 clippy::std_instead_of_alloc, 1240 reason = "We are testing, this needs std" 1241 )] 1242 let s1: std::vec::Vec<char> = "123456".chars().collect(); 1243 let_assert!(Ok(res) = TokenString::try_from(s1.as_slice())); 1244 check!(&res.prefix[0 .. 6] == b"123456"); 1245 check!(res.len == 6); 1246 check!(res.is_small() == true); 1247 } 1248 1249 #[test] 1250 fn try_from_string() { 1251 #[expect( 1252 clippy::std_instead_of_alloc, 1253 reason = "We are testing, this needs std" 1254 )] 1255 let s1: std::string::String = "123456".into(); 1256 let_assert!(Ok(res) = TokenString::try_from(s1)); 1257 check!(&res.prefix[0 .. 6] == b"123456"); 1258 check!(res.len == 6); 1259 check!(res.is_small() == true); 1260 } 1261 1262 #[test] 1263 fn try_from_stringref() { 1264 #[expect( 1265 clippy::std_instead_of_alloc, 1266 reason = "We are testing, this needs std" 1267 )] 1268 let s1: std::string::String = "123456".into(); 1269 let_assert!(Ok(res) = TokenString::try_from(&s1)); 1270 check!(&res.prefix[0 .. 6] == b"123456"); 1271 check!(res.len == 6); 1272 check!(res.is_small() == true); 1273 } 1274 1275 #[test] 1276 fn from_str_unchecked() { 1277 let res = TokenString::from_str_unchecked("123456"); 1278 check!(&res.prefix[0 .. 6] == b"123456"); 1279 check!(res.len == 6); 1280 } 1281 1282 #[test] 1283 fn from_bytes_unchecked() { 1284 let s1: &[u8] = b"123456"; 1285 // SAFETY: 1286 // We know that the string is valid UTF-8. 1287 let res = unsafe { TokenString::from_bytes_unchecked(s1) }; 1288 check!(&res.prefix[0 .. 6] == b"123456"); 1289 check!(res.len == 6); 1290 check!(res.is_small() == true); 1291 } 1292 1293 #[test] 1294 fn from_stringref_unchecked() { 1295 #[expect( 1296 clippy::std_instead_of_alloc, 1297 reason = "We are testing, this needs std" 1298 )] 1299 let s1: std::string::String = "123456".into(); 1300 let res = TokenString::from_string_unchecked(&s1); 1301 check!(&res.prefix[0 .. 6] == b"123456"); 1302 check!(res.len == 6); 1303 check!(res.is_small() == true); 1304 } 1305} 1306 1307#[cfg(test)] 1308mod small { 1309 extern crate std; 1310 use assert2::{check, let_assert}; 1311 1312 use crate::TokenString; 1313 1314 1315 #[test] 1316 fn try_from_str() { 1317 let_assert!(Ok(res) = TokenString::try_from("1234567")); 1318 check!(&res.prefix[0 .. 6] == b"123456"); 1319 // SAFETY: 1320 // We know there is a small string in the union. 1321 check!(unsafe { res.u.small[0] } == b'7'); 1322 check!(res.len == 7); 1323 check!(res.is_small() == true); 1324 } 1325 1326 #[test] 1327 fn clone() { 1328 let_assert!(Ok(s1) = TokenString::try_from("1234567")); 1329 let res = s1.clone(); 1330 check!(&res.prefix[0 .. 6] == &s1.prefix[0 .. 6]); 1331 // SAFETY: 1332 // We know there is a small string in the union. 1333 check!(unsafe { res.u.small[0] == s1.u.small[0] }); 1334 check!(res.len == s1.len); 1335 check!(res.is_small() == true); 1336 } 1337 1338 #[test] 1339 fn try_from_bytes() { 1340 let s1: &[u8] = b"1234567"; 1341 let_assert!(Ok(res) = TokenString::try_from(s1)); 1342 check!(&res.prefix[0 .. 6] == b"123456"); 1343 // SAFETY: 1344 // We know there is a small string in the union. 1345 check!(unsafe { res.u.small[0] } == b'7'); 1346 check!(res.len == 7); 1347 check!(res.is_small() == true); 1348 } 1349 1350 #[test] 1351 fn try_from_chars() { 1352 #[expect( 1353 clippy::std_instead_of_alloc, 1354 reason = "We are testing, this needs std" 1355 )] 1356 let s1: std::vec::Vec<char> = "1234567".chars().collect(); 1357 let_assert!(Ok(res) = TokenString::try_from(s1.as_slice())); 1358 check!(&res.prefix[0 .. 6] == b"123456"); 1359 // SAFETY: 1360 // We know there is a small string in the union. 1361 check!(unsafe { res.u.small[0] } == b'7'); 1362 check!(res.len == 7); 1363 check!(res.is_small() == true); 1364 } 1365 1366 #[test] 1367 fn try_from_string() { 1368 #[expect( 1369 clippy::std_instead_of_alloc, 1370 reason = "We are testing, this needs std" 1371 )] 1372 let s1: std::string::String = "1234567".into(); 1373 let_assert!(Ok(res) = TokenString::try_from(s1)); 1374 check!(&res.prefix[0 .. 6] == b"123456"); 1375 // SAFETY: 1376 // We know there is a small string in the union. 1377 check!(unsafe { res.u.small[0] } == b'7'); 1378 check!(res.len == 7); 1379 check!(res.is_small() == true); 1380 } 1381 1382 #[test] 1383 fn try_from_stringref() { 1384 #[expect( 1385 clippy::std_instead_of_alloc, 1386 reason = "We are testing, this needs std" 1387 )] 1388 let s1: std::string::String = "1234567".into(); 1389 let_assert!(Ok(res) = TokenString::try_from(&s1)); 1390 check!(&res.prefix[0 .. 6] == b"123456"); 1391 // SAFETY: 1392 // We know there is a small string in the union. 1393 check!(unsafe { res.u.small[0] } == b'7'); 1394 check!(res.len == 7); 1395 check!(res.is_small() == true); 1396 } 1397 1398 #[test] 1399 fn from_str_unchecked() { 1400 let res = TokenString::from_str_unchecked("1234567"); 1401 check!(&res.prefix[0 .. 6] == b"123456"); 1402 // SAFETY: 1403 // We know there is a small string in the union. 1404 check!(unsafe { res.u.small[0] } == b'7'); 1405 check!(res.len == 7); 1406 check!(res.is_small() == true); 1407 } 1408 1409 #[test] 1410 fn from_bytes_unchecked() { 1411 let s1: &[u8] = b"1234567"; 1412 // SAFETY: 1413 // We know that the string is valid UTF-8. 1414 let res = unsafe { TokenString::from_bytes_unchecked(s1) }; 1415 check!(&res.prefix[0 .. 6] == b"123456"); 1416 // SAFETY: 1417 // We know there is a small string in the union. 1418 check!(unsafe { res.u.small[0] } == b'7'); 1419 check!(res.len == 7); 1420 check!(res.is_small() == true); 1421 } 1422 1423 #[test] 1424 fn from_stringref_unchecked() { 1425 #[expect( 1426 clippy::std_instead_of_alloc, 1427 reason = "We are testing, this needs std" 1428 )] 1429 let s1: std::string::String = "1234567".into(); 1430 let res = TokenString::from_string_unchecked(&s1); 1431 check!(&res.prefix[0 .. 6] == b"123456"); 1432 // SAFETY: 1433 // We know there is a small string in the union. 1434 check!(unsafe { res.u.small[0] } == b'7'); 1435 check!(res.len == 7); 1436 check!(res.is_small() == true); 1437 } 1438} 1439 1440#[cfg(test)] 1441mod heap { 1442 extern crate std; 1443 use assert2::{check, let_assert}; 1444 1445 use crate::TokenString; 1446 1447 1448 #[test] 1449 fn try_from_str() { 1450 let_assert!(Ok(res) = TokenString::try_from("1234567890ABCDE")); 1451 check!(&res.prefix[0 .. 6] == b"123456"); 1452 check!( 1453 // SAFETY: 1454 // We know there is a large string in the union. 1455 unsafe { &res.u.ptr.as_slice_manually(res.len as usize)[.. 15] } 1456 == b"1234567890ABCDE" 1457 ); 1458 check!(res.len == 15); 1459 check!(res.is_small() == false); 1460 } 1461 1462 1463 #[test] 1464 fn clone() { 1465 let_assert!(Ok(s1) = TokenString::try_from("1234567890ABCDE")); 1466 let res = s1.clone(); 1467 check!(&res.prefix[0 .. 6] == &s1.prefix[0 .. 6]); 1468 check!( 1469 // SAFETY: 1470 // We know there is a large string in the union. 1471 unsafe { 1472 res.u.ptr.as_slice_manually(res.len as usize)[.. 15] 1473 == s1.u.ptr.as_slice_manually(res.len as usize)[.. 15] 1474 } 1475 ); 1476 check!(res.len == s1.len); 1477 check!(res.is_small() == false); 1478 } 1479 1480 #[test] 1481 fn try_from_bytes() { 1482 let s1: &[u8] = b"1234567890ABCDE"; 1483 let_assert!(Ok(res) = TokenString::try_from(s1)); 1484 check!(&res.prefix[0 .. 6] == b"123456"); 1485 check!( 1486 // SAFETY: 1487 // We know there is a large string in the union. 1488 unsafe { &res.u.ptr.as_slice_manually(res.len as usize)[.. 15] } 1489 == b"1234567890ABCDE" 1490 ); 1491 check!(res.len == 15); 1492 check!(res.is_small() == false); 1493 } 1494 1495 #[test] 1496 fn try_from_chars() { 1497 #[expect( 1498 clippy::std_instead_of_alloc, 1499 reason = "We are testing, this needs std" 1500 )] 1501 let s1: std::vec::Vec<char> = "1234567890ABCDE".chars().collect(); 1502 let_assert!(Ok(res) = TokenString::try_from(s1.as_slice())); 1503 check!(&res.prefix[0 .. 6] == b"123456"); 1504 check!( 1505 // SAFETY: 1506 // We know there is a large string in the union. 1507 unsafe { &res.u.ptr.as_slice_manually(res.len as usize)[.. 15] } 1508 == b"1234567890ABCDE" 1509 ); 1510 check!(res.len == 15); 1511 check!(res.is_small() == false); 1512 } 1513 1514 #[test] 1515 fn try_from_string() { 1516 #[expect( 1517 clippy::std_instead_of_alloc, 1518 reason = "We are testing, this needs std" 1519 )] 1520 let s1: std::string::String = "1234567890ABCDE".into(); 1521 let_assert!(Ok(res) = TokenString::try_from(s1)); 1522 check!(&res.prefix[0 .. 6] == b"123456"); 1523 check!( 1524 // SAFETY: 1525 // We know there is a large string in the union. 1526 unsafe { &res.u.ptr.as_slice_manually(res.len as usize)[.. 15] } 1527 == b"1234567890ABCDE" 1528 ); 1529 check!(res.len == 15); 1530 check!(res.is_small() == false); 1531 } 1532 1533 #[test] 1534 fn try_from_stringref() { 1535 #[expect( 1536 clippy::std_instead_of_alloc, 1537 reason = "We are testing, this needs std" 1538 )] 1539 let s1: std::string::String = "1234567890ABCDE".into(); 1540 let_assert!(Ok(res) = TokenString::try_from(&s1)); 1541 check!(&res.prefix[0 .. 6] == b"123456"); 1542 check!( 1543 // SAFETY: 1544 // We know there is a large string in the union. 1545 unsafe { &res.u.ptr.as_slice_manually(res.len as usize)[.. 15] } 1546 == b"1234567890ABCDE" 1547 ); 1548 check!(res.len == 15); 1549 check!(res.is_small() == false); 1550 } 1551 1552 #[test] 1553 fn from_str_unchecked() { 1554 let res = TokenString::from_str_unchecked("1234567890ABCDE"); 1555 check!(&res.prefix[0 .. 6] == b"123456"); 1556 check!( 1557 // SAFETY: 1558 // We know there is a large string in the union. 1559 unsafe { &res.u.ptr.as_slice_manually(res.len as usize)[.. 15] } 1560 == b"1234567890ABCDE" 1561 ); 1562 check!(res.len == 15); 1563 check!(res.is_small() == false); 1564 } 1565 1566 #[test] 1567 fn from_bytes_unchecked() { 1568 let s1: &[u8] = b"1234567890ABCDE"; 1569 // SAFETY: 1570 // We know that the string is valid UTF-8. 1571 let res = unsafe { TokenString::from_bytes_unchecked(s1) }; 1572 check!(&res.prefix[0 .. 6] == b"123456"); 1573 check!( 1574 // SAFETY: 1575 // We know there is a large string in the union. 1576 unsafe { &res.u.ptr.as_slice_manually(res.len as usize)[.. 15] } 1577 == b"1234567890ABCDE" 1578 ); 1579 check!(res.len == 15); 1580 check!(res.is_small() == false); 1581 } 1582 1583 #[test] 1584 fn from_stringref_unchecked() { 1585 #[expect( 1586 clippy::std_instead_of_alloc, 1587 reason = "We are testing, this needs std" 1588 )] 1589 let s1: std::string::String = "1234567890ABCDE".into(); 1590 let res = TokenString::from_string_unchecked(&s1); 1591 check!(&res.prefix[0 .. 6] == b"123456"); 1592 check!( 1593 // SAFETY: 1594 // We know there is a large string in the union. 1595 unsafe { &res.u.ptr.as_slice_manually(res.len as usize)[.. 15] } 1596 == b"1234567890ABCDE" 1597 ); 1598 check!(res.len == 15); 1599 check!(res.is_small() == false); 1600 } 1601}