crates/java_string/src/char.rs at main · avikav.net/valence

avikav.net / valence
this repo has no description
valence / crates / java_string / src / char.rs
at main 1012 lines 28 kB view raw
   1use std::char::ParseCharError;
   2use std::cmp::Ordering;
   3use std::fmt;
   4use std::fmt::{Debug, Display, Formatter, Write};
   5use std::hash::{Hash, Hasher};
   6use std::iter::{once, FusedIterator, Once};
   7use std::ops::Range;
   8use std::str::FromStr;
   9
  10use crate::validations::{TAG_CONT, TAG_FOUR_B, TAG_THREE_B, TAG_TWO_B};
  11
  12// JavaCodePoint is guaranteed to have the same repr as a u32, with valid values
  13// of between 0 and 0x10FFFF, the same as a unicode code point. Surrogate code
  14// points are valid values of this type.
  15#[derive(Copy, Clone, PartialEq, Eq)]
  16#[repr(C)]
  17pub struct JavaCodePoint {
  18    #[cfg(target_endian = "little")]
  19    lower: u16,
  20    upper: SeventeenValues,
  21    #[cfg(target_endian = "big")]
  22    lower: u16,
  23}
  24
  25#[repr(u16)]
  26#[derive(Copy, Clone, PartialEq, Eq)]
  27#[allow(unused)]
  28enum SeventeenValues {
  29    V0,
  30    V1,
  31    V2,
  32    V3,
  33    V4,
  34    V5,
  35    V6,
  36    V7,
  37    V8,
  38    V9,
  39    V10,
  40    V11,
  41    V12,
  42    V13,
  43    V14,
  44    V15,
  45    V16,
  46}
  47
  48impl JavaCodePoint {
  49    pub const MAX: JavaCodePoint = JavaCodePoint::from_char(char::MAX);
  50    pub const REPLACEMENT_CHARACTER: JavaCodePoint =
  51        JavaCodePoint::from_char(char::REPLACEMENT_CHARACTER);
  52
  53    /// See [`char::from_u32`]
  54    ///
  55    /// ```
  56    /// # use java_string::JavaCodePoint;
  57    /// let c = JavaCodePoint::from_u32(0x2764);
  58    /// assert_eq!(Some(JavaCodePoint::from_char('❤')), c);
  59    ///
  60    /// assert_eq!(None, JavaCodePoint::from_u32(0x110000));
  61    /// ```
  62    #[inline]
  63    #[must_use]
  64    pub const fn from_u32(i: u32) -> Option<JavaCodePoint> {
  65        if i <= 0x10ffff {
  66            unsafe { Some(Self::from_u32_unchecked(i)) }
  67        } else {
  68            None
  69        }
  70    }
  71
  72    /// # Safety
  73    /// The argument must be within the valid Unicode code point range of 0 to
  74    /// 0x10FFFF inclusive. Surrogate code points are allowed.
  75    #[inline]
  76    #[must_use]
  77    pub const unsafe fn from_u32_unchecked(i: u32) -> JavaCodePoint {
  78        // SAFETY: the caller checks that the argument can be represented by this type
  79        std::mem::transmute(i)
  80    }
  81
  82    /// Converts a `char` to a code point.
  83    #[inline]
  84    #[must_use]
  85    pub const fn from_char(char: char) -> JavaCodePoint {
  86        unsafe {
  87            // SAFETY: all chars are valid code points
  88            JavaCodePoint::from_u32_unchecked(char as u32)
  89        }
  90    }
  91
  92    /// Converts this code point to a `u32`.
  93    ///
  94    /// ```
  95    /// # use java_string::JavaCodePoint;
  96    /// assert_eq!(65, JavaCodePoint::from_char('A').as_u32());
  97    /// assert_eq!(0xd800, JavaCodePoint::from_u32(0xd800).unwrap().as_u32());
  98    /// ```
  99    #[inline]
 100    #[must_use]
 101    pub const fn as_u32(self) -> u32 {
 102        unsafe {
 103            // SAFETY: JavaCodePoint has the same repr as a u32
 104            let result = std::mem::transmute::<Self, u32>(self);
 105
 106            if result > 0x10ffff {
 107                // SAFETY: JavaCodePoint can never have a value > 0x10FFFF.
 108                // This statement may allow the optimizer to remove branches in the calling code
 109                // associated with out of bounds chars.
 110                std::hint::unreachable_unchecked();
 111            }
 112
 113            result
 114        }
 115    }
 116
 117    /// Converts this code point to a `char`.
 118    ///
 119    /// ```
 120    /// # use java_string::JavaCodePoint;
 121    /// assert_eq!(Some('a'), JavaCodePoint::from_char('a').as_char());
 122    /// assert_eq!(None, JavaCodePoint::from_u32(0xd800).unwrap().as_char());
 123    /// ```
 124    #[inline]
 125    #[must_use]
 126    pub const fn as_char(self) -> Option<char> {
 127        char::from_u32(self.as_u32())
 128    }
 129
 130    /// # Safety
 131    /// The caller must ensure that this code point is not a surrogate code
 132    /// point.
 133    #[inline]
 134    #[must_use]
 135    pub unsafe fn as_char_unchecked(self) -> char {
 136        char::from_u32_unchecked(self.as_u32())
 137    }
 138
 139    /// See [`char::encode_utf16`]
 140    ///
 141    /// ```
 142    /// # use java_string::JavaCodePoint;
 143    /// assert_eq!(
 144    ///     2,
 145    ///     JavaCodePoint::from_char('𝕊')
 146    ///         .encode_utf16(&mut [0; 2])
 147    ///         .len()
 148    /// );
 149    /// assert_eq!(
 150    ///     1,
 151    ///     JavaCodePoint::from_u32(0xd800)
 152    ///         .unwrap()
 153    ///         .encode_utf16(&mut [0; 2])
 154    ///         .len()
 155    /// );
 156    /// ```
 157    /// ```should_panic
 158    /// # use java_string::JavaCodePoint;
 159    /// // Should panic
 160    /// JavaCodePoint::from_char('𝕊').encode_utf16(&mut [0; 1]);
 161    /// ```
 162    #[inline]
 163    pub fn encode_utf16(self, dst: &mut [u16]) -> &mut [u16] {
 164        if let Some(char) = self.as_char() {
 165            char.encode_utf16(dst)
 166        } else {
 167            dst[0] = self.as_u32() as u16;
 168            &mut dst[..1]
 169        }
 170    }
 171
 172    /// Encodes this `JavaCodePoint` into semi UTF-8, that is, UTF-8 with
 173    /// surrogate code points. See also [`char::encode_utf8`].
 174    ///
 175    /// ```
 176    /// # use java_string::JavaCodePoint;
 177    /// assert_eq!(
 178    ///     2,
 179    ///     JavaCodePoint::from_char('ß')
 180    ///         .encode_semi_utf8(&mut [0; 4])
 181    ///         .len()
 182    /// );
 183    /// assert_eq!(
 184    ///     3,
 185    ///     JavaCodePoint::from_u32(0xd800)
 186    ///         .unwrap()
 187    ///         .encode_semi_utf8(&mut [0; 4])
 188    ///         .len()
 189    /// );
 190    /// ```
 191    /// ```should_panic
 192    /// # use java_string::JavaCodePoint;
 193    /// // Should panic
 194    /// JavaCodePoint::from_char('ß').encode_semi_utf8(&mut [0; 1]);
 195    /// ```
 196    #[inline]
 197    pub fn encode_semi_utf8(self, dst: &mut [u8]) -> &mut [u8] {
 198        let len = self.len_utf8();
 199        let code = self.as_u32();
 200        match (len, &mut dst[..]) {
 201            (1, [a, ..]) => {
 202                *a = code as u8;
 203            }
 204            (2, [a, b, ..]) => {
 205                *a = ((code >> 6) & 0x1f) as u8 | TAG_TWO_B;
 206                *b = (code & 0x3f) as u8 | TAG_CONT;
 207            }
 208            (3, [a, b, c, ..]) => {
 209                *a = ((code >> 12) & 0x0f) as u8 | TAG_THREE_B;
 210                *b = ((code >> 6) & 0x3f) as u8 | TAG_CONT;
 211                *c = (code & 0x3f) as u8 | TAG_CONT;
 212            }
 213            (4, [a, b, c, d, ..]) => {
 214                *a = ((code >> 18) & 0x07) as u8 | TAG_FOUR_B;
 215                *b = ((code >> 12) & 0x3f) as u8 | TAG_CONT;
 216                *c = ((code >> 6) & 0x3f) as u8 | TAG_CONT;
 217                *d = (code & 0x3f) as u8 | TAG_CONT;
 218            }
 219            _ => panic!(
 220                "encode_utf8: need {} bytes to encode U+{:X}, but the buffer has {}",
 221                len,
 222                code,
 223                dst.len()
 224            ),
 225        }
 226        &mut dst[..len]
 227    }
 228
 229    /// See [`char::eq_ignore_ascii_case`].
 230    #[inline]
 231    pub fn eq_ignore_ascii_case(&self, other: &JavaCodePoint) -> bool {
 232        match (self.as_char(), other.as_char()) {
 233            (Some(char1), Some(char2)) => char1.eq_ignore_ascii_case(&char2),
 234            (None, None) => self == other,
 235            _ => false,
 236        }
 237    }
 238
 239    /// See [`char::escape_debug`].
 240    ///
 241    /// ```
 242    /// # use java_string::JavaCodePoint;
 243    /// assert_eq!(
 244    ///     "a",
 245    ///     JavaCodePoint::from_char('a').escape_debug().to_string()
 246    /// );
 247    /// assert_eq!(
 248    ///     "\\n",
 249    ///     JavaCodePoint::from_char('\n').escape_debug().to_string()
 250    /// );
 251    /// assert_eq!(
 252    ///     "\\u{d800}",
 253    ///     JavaCodePoint::from_u32(0xd800)
 254    ///         .unwrap()
 255    ///         .escape_debug()
 256    ///         .to_string()
 257    /// );
 258    /// ```
 259    #[inline]
 260    #[must_use]
 261    pub fn escape_debug(self) -> CharEscapeIter {
 262        self.escape_debug_ext(EscapeDebugExtArgs::ESCAPE_ALL)
 263    }
 264
 265    #[inline]
 266    #[must_use]
 267    pub(crate) fn escape_debug_ext(self, args: EscapeDebugExtArgs) -> CharEscapeIter {
 268        const NULL: u32 = '\0' as u32;
 269        const TAB: u32 = '\t' as u32;
 270        const CARRIAGE_RETURN: u32 = '\r' as u32;
 271        const LINE_FEED: u32 = '\n' as u32;
 272        const SINGLE_QUOTE: u32 = '\'' as u32;
 273        const DOUBLE_QUOTE: u32 = '"' as u32;
 274        const BACKSLASH: u32 = '\\' as u32;
 275
 276        unsafe {
 277            // SAFETY: all characters specified are in ascii range
 278            match self.as_u32() {
 279                NULL => CharEscapeIter::new([b'\\', b'0']),
 280                TAB => CharEscapeIter::new([b'\\', b't']),
 281                CARRIAGE_RETURN => CharEscapeIter::new([b'\\', b'r']),
 282                LINE_FEED => CharEscapeIter::new([b'\\', b'n']),
 283                SINGLE_QUOTE if args.escape_single_quote => CharEscapeIter::new([b'\\', b'\'']),
 284                DOUBLE_QUOTE if args.escape_double_quote => CharEscapeIter::new([b'\\', b'"']),
 285                BACKSLASH => CharEscapeIter::new([b'\\', b'\\']),
 286                _ if self.is_printable() => {
 287                    // SAFETY: surrogate code points are not printable
 288                    CharEscapeIter::printable(self.as_char_unchecked())
 289                }
 290                _ => self.escape_unicode(),
 291            }
 292        }
 293    }
 294
 295    #[inline]
 296    fn is_printable(self) -> bool {
 297        let Some(char) = self.as_char() else {
 298            return false;
 299        };
 300        if matches!(char, '\\' | '\'' | '"') {
 301            return true;
 302        }
 303        char.escape_debug().next() != Some('\\')
 304    }
 305
 306    /// See [`char::escape_default`].
 307    ///
 308    /// ```
 309    /// # use java_string::JavaCodePoint;
 310    /// assert_eq!(
 311    ///     "a",
 312    ///     JavaCodePoint::from_char('a').escape_default().to_string()
 313    /// );
 314    /// assert_eq!(
 315    ///     "\\n",
 316    ///     JavaCodePoint::from_char('\n').escape_default().to_string()
 317    /// );
 318    /// assert_eq!(
 319    ///     "\\u{d800}",
 320    ///     JavaCodePoint::from_u32(0xd800)
 321    ///         .unwrap()
 322    ///         .escape_default()
 323    ///         .to_string()
 324    /// );
 325    /// ```
 326    #[inline]
 327    #[must_use]
 328    pub fn escape_default(self) -> CharEscapeIter {
 329        const TAB: u32 = '\t' as u32;
 330        const CARRIAGE_RETURN: u32 = '\r' as u32;
 331        const LINE_FEED: u32 = '\n' as u32;
 332        const SINGLE_QUOTE: u32 = '\'' as u32;
 333        const DOUBLE_QUOTE: u32 = '"' as u32;
 334        const BACKSLASH: u32 = '\\' as u32;
 335
 336        unsafe {
 337            // SAFETY: all characters specified are in ascii range
 338            match self.as_u32() {
 339                TAB => CharEscapeIter::new([b'\\', b't']),
 340                CARRIAGE_RETURN => CharEscapeIter::new([b'\\', b'r']),
 341                LINE_FEED => CharEscapeIter::new([b'\\', b'n']),
 342                SINGLE_QUOTE => CharEscapeIter::new([b'\\', b'\'']),
 343                DOUBLE_QUOTE => CharEscapeIter::new([b'\\', b'"']),
 344                BACKSLASH => CharEscapeIter::new([b'\\', b'\\']),
 345                0x20..=0x7e => CharEscapeIter::new([self.as_u32() as u8]),
 346                _ => self.escape_unicode(),
 347            }
 348        }
 349    }
 350
 351    /// See [`char::escape_unicode`].
 352    ///
 353    /// ```
 354    /// # use java_string::JavaCodePoint;
 355    /// assert_eq!(
 356    ///     "\\u{2764}",
 357    ///     JavaCodePoint::from_char('❤').escape_unicode().to_string()
 358    /// );
 359    /// assert_eq!(
 360    ///     "\\u{d800}",
 361    ///     JavaCodePoint::from_u32(0xd800)
 362    ///         .unwrap()
 363    ///         .escape_unicode()
 364    ///         .to_string()
 365    /// );
 366    /// ```
 367    #[inline]
 368    #[must_use]
 369    pub fn escape_unicode(self) -> CharEscapeIter {
 370        let x = self.as_u32();
 371
 372        let mut arr = [0; 10];
 373        arr[0] = b'\\';
 374        arr[1] = b'u';
 375        arr[2] = b'{';
 376
 377        let number_len = if x == 0 {
 378            1
 379        } else {
 380            ((x.ilog2() >> 2) + 1) as usize
 381        };
 382        arr[3 + number_len] = b'}';
 383        for hexit in 0..number_len {
 384            arr[2 + number_len - hexit] = b"0123456789abcdef"[((x >> (hexit << 2)) & 15) as usize];
 385        }
 386
 387        CharEscapeIter {
 388            inner: EscapeIterInner::Escaped(EscapeIterEscaped {
 389                bytes: arr,
 390                range: 0..number_len + 4,
 391            }),
 392        }
 393    }
 394
 395    /// See [`char::is_alphabetic`].
 396    #[inline]
 397    #[must_use]
 398    pub fn is_alphabetic(self) -> bool {
 399        self.as_char().is_some_and(|char| char.is_alphabetic())
 400    }
 401
 402    /// See [`char::is_alphanumeric`].
 403    #[inline]
 404    #[must_use]
 405    pub fn is_alphanumeric(self) -> bool {
 406        self.as_char().is_some_and(|char| char.is_alphanumeric())
 407    }
 408
 409    /// See [`char::is_ascii`].
 410    #[inline]
 411    #[must_use]
 412    pub fn is_ascii(self) -> bool {
 413        self.as_u32() <= 0x7f
 414    }
 415
 416    /// See [`char::is_ascii_alphabetic`].
 417    #[inline]
 418    #[must_use]
 419    pub const fn is_ascii_alphabetic(self) -> bool {
 420        self.is_ascii_lowercase() || self.is_ascii_uppercase()
 421    }
 422
 423    /// See [`char::is_ascii_alphanumeric`].
 424    #[inline]
 425    #[must_use]
 426    pub const fn is_ascii_alphanumeric(self) -> bool {
 427        self.is_ascii_alphabetic() || self.is_ascii_digit()
 428    }
 429
 430    /// See [`char::is_ascii_control`].
 431    #[inline]
 432    #[must_use]
 433    pub const fn is_ascii_control(self) -> bool {
 434        matches!(self.as_u32(), 0..=0x1f | 0x7f)
 435    }
 436
 437    /// See [`char::is_ascii_digit`].
 438    #[inline]
 439    #[must_use]
 440    pub const fn is_ascii_digit(self) -> bool {
 441        const ZERO: u32 = '0' as u32;
 442        const NINE: u32 = '9' as u32;
 443        matches!(self.as_u32(), ZERO..=NINE)
 444    }
 445
 446    /// See [`char::is_ascii_graphic`].
 447    #[inline]
 448    #[must_use]
 449    pub const fn is_ascii_graphic(self) -> bool {
 450        matches!(self.as_u32(), 0x21..=0x7e)
 451    }
 452
 453    /// See [`char::is_ascii_hexdigit`].
 454    #[inline]
 455    #[must_use]
 456    pub const fn is_ascii_hexdigit(self) -> bool {
 457        const LOWER_A: u32 = 'a' as u32;
 458        const LOWER_F: u32 = 'f' as u32;
 459        const UPPER_A: u32 = 'A' as u32;
 460        const UPPER_F: u32 = 'F' as u32;
 461        self.is_ascii_digit() || matches!(self.as_u32(), (LOWER_A..=LOWER_F) | (UPPER_A..=UPPER_F))
 462    }
 463
 464    /// See [`char::is_ascii_lowercase`].
 465    #[inline]
 466    #[must_use]
 467    pub const fn is_ascii_lowercase(self) -> bool {
 468        const A: u32 = 'a' as u32;
 469        const Z: u32 = 'z' as u32;
 470        matches!(self.as_u32(), A..=Z)
 471    }
 472
 473    /// See [`char::is_ascii_octdigit`].
 474    #[inline]
 475    #[must_use]
 476    pub const fn is_ascii_octdigit(self) -> bool {
 477        const ZERO: u32 = '0' as u32;
 478        const SEVEN: u32 = '7' as u32;
 479        matches!(self.as_u32(), ZERO..=SEVEN)
 480    }
 481
 482    /// See [`char::is_ascii_punctuation`].
 483    #[inline]
 484    #[must_use]
 485    pub const fn is_ascii_punctuation(self) -> bool {
 486        matches!(
 487            self.as_u32(),
 488            (0x21..=0x2f) | (0x3a..=0x40) | (0x5b..=0x60) | (0x7b..=0x7e)
 489        )
 490    }
 491
 492    /// See [`char::is_ascii_uppercase`].
 493    #[inline]
 494    #[must_use]
 495    pub const fn is_ascii_uppercase(self) -> bool {
 496        const A: u32 = 'A' as u32;
 497        const Z: u32 = 'Z' as u32;
 498        matches!(self.as_u32(), A..=Z)
 499    }
 500
 501    /// See [`char::is_ascii_whitespace`].
 502    #[inline]
 503    #[must_use]
 504    pub const fn is_ascii_whitespace(self) -> bool {
 505        const SPACE: u32 = ' ' as u32;
 506        const HORIZONTAL_TAB: u32 = '\t' as u32;
 507        const LINE_FEED: u32 = '\n' as u32;
 508        const FORM_FEED: u32 = 0xc;
 509        const CARRIAGE_RETURN: u32 = '\r' as u32;
 510        matches!(
 511            self.as_u32(),
 512            SPACE | HORIZONTAL_TAB | LINE_FEED | FORM_FEED | CARRIAGE_RETURN
 513        )
 514    }
 515
 516    /// See [`char::is_control`].
 517    #[inline]
 518    #[must_use]
 519    pub fn is_control(self) -> bool {
 520        self.as_char().is_some_and(|char| char.is_control())
 521    }
 522
 523    /// See [`char::is_digit`].
 524    #[inline]
 525    #[must_use]
 526    pub fn is_digit(self, radix: u32) -> bool {
 527        self.to_digit(radix).is_some()
 528    }
 529
 530    /// See [`char::is_lowercase`].
 531    #[inline]
 532    #[must_use]
 533    pub fn is_lowercase(self) -> bool {
 534        self.as_char().is_some_and(|char| char.is_lowercase())
 535    }
 536
 537    /// See [`char::is_numeric`].
 538    #[inline]
 539    #[must_use]
 540    pub fn is_numeric(self) -> bool {
 541        self.as_char().is_some_and(|char| char.is_numeric())
 542    }
 543
 544    /// See [`char::is_uppercase`].
 545    #[inline]
 546    #[must_use]
 547    pub fn is_uppercase(self) -> bool {
 548        self.as_char().is_some_and(|char| char.is_uppercase())
 549    }
 550
 551    /// See [`char::is_whitespace`].
 552    #[inline]
 553    #[must_use]
 554    pub fn is_whitespace(self) -> bool {
 555        self.as_char().is_some_and(|char| char.is_whitespace())
 556    }
 557
 558    /// See [`char::len_utf16`]. Surrogate code points return 1.
 559    ///
 560    /// ```
 561    /// # use java_string::JavaCodePoint;
 562    ///
 563    /// let n = JavaCodePoint::from_char('ß').len_utf16();
 564    /// assert_eq!(n, 1);
 565    ///
 566    /// let len = JavaCodePoint::from_char('💣').len_utf16();
 567    /// assert_eq!(len, 2);
 568    ///
 569    /// assert_eq!(1, JavaCodePoint::from_u32(0xd800).unwrap().len_utf16());
 570    /// ```
 571    #[inline]
 572    #[must_use]
 573    pub const fn len_utf16(self) -> usize {
 574        if let Some(char) = self.as_char() {
 575            char.len_utf16()
 576        } else {
 577            1 // invalid code points are encoded as 1 utf16 code point anyway
 578        }
 579    }
 580
 581    /// See [`char::len_utf8`]. Surrogate code points return 3.
 582    ///
 583    /// ```
 584    /// # use java_string::JavaCodePoint;
 585    ///
 586    /// let len = JavaCodePoint::from_char('A').len_utf8();
 587    /// assert_eq!(len, 1);
 588    ///
 589    /// let len = JavaCodePoint::from_char('ß').len_utf8();
 590    /// assert_eq!(len, 2);
 591    ///
 592    /// let len = JavaCodePoint::from_char('ℝ').len_utf8();
 593    /// assert_eq!(len, 3);
 594    ///
 595    /// let len = JavaCodePoint::from_char('💣').len_utf8();
 596    /// assert_eq!(len, 4);
 597    ///
 598    /// let len = JavaCodePoint::from_u32(0xd800).unwrap().len_utf8();
 599    /// assert_eq!(len, 3);
 600    /// ```
 601    #[inline]
 602    #[must_use]
 603    pub const fn len_utf8(self) -> usize {
 604        if let Some(char) = self.as_char() {
 605            char.len_utf8()
 606        } else {
 607            3 // invalid code points are all length 3 in semi-valid utf8
 608        }
 609    }
 610
 611    /// See [`char::make_ascii_lowercase`].
 612    #[inline]
 613    pub fn make_ascii_lowercase(&mut self) {
 614        *self = self.to_ascii_lowercase();
 615    }
 616
 617    /// See [`char::make_ascii_uppercase`].
 618    #[inline]
 619    pub fn make_ascii_uppercase(&mut self) {
 620        *self = self.to_ascii_uppercase();
 621    }
 622
 623    /// See [`char::to_ascii_lowercase`].
 624    ///
 625    /// ```
 626    /// # use java_string::JavaCodePoint;
 627    ///
 628    /// let ascii = JavaCodePoint::from_char('A');
 629    /// let non_ascii = JavaCodePoint::from_char('❤');
 630    ///
 631    /// assert_eq!('a', ascii.to_ascii_lowercase());
 632    /// assert_eq!('❤', non_ascii.to_ascii_lowercase());
 633    /// ```
 634    #[inline]
 635    #[must_use]
 636    pub const fn to_ascii_lowercase(self) -> JavaCodePoint {
 637        if self.is_ascii_uppercase() {
 638            unsafe {
 639                // SAFETY: all lowercase chars are valid chars
 640                Self::from_u32_unchecked(self.as_u32() + 32)
 641            }
 642        } else {
 643            self
 644        }
 645    }
 646
 647    /// See [`char::to_ascii_uppercase`].
 648    ///
 649    /// ```
 650    /// # use java_string::JavaCodePoint;
 651    ///
 652    /// let ascii = JavaCodePoint::from_char('a');
 653    /// let non_ascii = JavaCodePoint::from_char('❤');
 654    ///
 655    /// assert_eq!('A', ascii.to_ascii_uppercase());
 656    /// assert_eq!('❤', non_ascii.to_ascii_uppercase());
 657    /// ```
 658    #[inline]
 659    #[must_use]
 660    pub const fn to_ascii_uppercase(self) -> JavaCodePoint {
 661        if self.is_ascii_lowercase() {
 662            unsafe {
 663                // SAFETY: all uppercase chars are valid chars
 664                Self::from_u32_unchecked(self.as_u32() - 32)
 665            }
 666        } else {
 667            self
 668        }
 669    }
 670
 671    /// See [`char::to_digit`].
 672    #[inline]
 673    #[must_use]
 674    pub const fn to_digit(self, radix: u32) -> Option<u32> {
 675        if let Some(char) = self.as_char() {
 676            char.to_digit(radix)
 677        } else {
 678            None
 679        }
 680    }
 681
 682    /// See [`char::to_lowercase`].
 683    #[inline]
 684    #[must_use]
 685    pub fn to_lowercase(self) -> ToLowercase {
 686        match self.as_char() {
 687            Some(char) => ToLowercase::char(char.to_lowercase()),
 688            None => ToLowercase::invalid(self),
 689        }
 690    }
 691
 692    /// See [`char::to_uppercase`].
 693    #[inline]
 694    #[must_use]
 695    pub fn to_uppercase(self) -> ToUppercase {
 696        match self.as_char() {
 697            Some(char) => ToUppercase::char(char.to_uppercase()),
 698            None => ToUppercase::invalid(self),
 699        }
 700    }
 701}
 702
 703impl Debug for JavaCodePoint {
 704    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
 705        f.write_char('\'')?;
 706        for c in self.escape_debug_ext(EscapeDebugExtArgs {
 707            escape_single_quote: true,
 708            escape_double_quote: false,
 709        }) {
 710            f.write_char(c)?;
 711        }
 712        f.write_char('\'')
 713    }
 714}
 715
 716impl Default for JavaCodePoint {
 717    #[inline]
 718    fn default() -> Self {
 719        JavaCodePoint::from_char('\0')
 720    }
 721}
 722
 723impl Display for JavaCodePoint {
 724    #[inline]
 725    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
 726        Display::fmt(&self.as_char().unwrap_or(char::REPLACEMENT_CHARACTER), f)
 727    }
 728}
 729
 730impl From<JavaCodePoint> for u32 {
 731    #[inline]
 732    fn from(value: JavaCodePoint) -> Self {
 733        value.as_u32()
 734    }
 735}
 736
 737impl From<u8> for JavaCodePoint {
 738    #[inline]
 739    fn from(value: u8) -> Self {
 740        JavaCodePoint::from_char(char::from(value))
 741    }
 742}
 743
 744impl FromStr for JavaCodePoint {
 745    type Err = ParseCharError;
 746
 747    #[inline]
 748    fn from_str(s: &str) -> Result<Self, Self::Err> {
 749        char::from_str(s).map(JavaCodePoint::from_char)
 750    }
 751}
 752
 753impl Hash for JavaCodePoint {
 754    #[inline]
 755    fn hash<H: Hasher>(&self, state: &mut H) {
 756        self.as_u32().hash(state)
 757    }
 758}
 759
 760impl Ord for JavaCodePoint {
 761    #[inline]
 762    fn cmp(&self, other: &Self) -> Ordering {
 763        self.as_u32().cmp(&other.as_u32())
 764    }
 765}
 766
 767impl PartialOrd for JavaCodePoint {
 768    #[inline]
 769    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
 770        Some(self.cmp(other))
 771    }
 772}
 773
 774impl PartialOrd<char> for JavaCodePoint {
 775    #[inline]
 776    fn partial_cmp(&self, other: &char) -> Option<Ordering> {
 777        self.partial_cmp(&JavaCodePoint::from_char(*other))
 778    }
 779}
 780
 781impl PartialOrd<JavaCodePoint> for char {
 782    #[inline]
 783    fn partial_cmp(&self, other: &JavaCodePoint) -> Option<Ordering> {
 784        JavaCodePoint::from_char(*self).partial_cmp(other)
 785    }
 786}
 787
 788impl PartialEq<char> for JavaCodePoint {
 789    #[inline]
 790    fn eq(&self, other: &char) -> bool {
 791        self == &JavaCodePoint::from_char(*other)
 792    }
 793}
 794
 795impl PartialEq<JavaCodePoint> for char {
 796    #[inline]
 797    fn eq(&self, other: &JavaCodePoint) -> bool {
 798        &JavaCodePoint::from_char(*self) == other
 799    }
 800}
 801
 802pub(crate) struct EscapeDebugExtArgs {
 803    pub(crate) escape_single_quote: bool,
 804    pub(crate) escape_double_quote: bool,
 805}
 806
 807impl EscapeDebugExtArgs {
 808    pub(crate) const ESCAPE_ALL: Self = Self {
 809        escape_single_quote: true,
 810        escape_double_quote: true,
 811    };
 812}
 813
 814#[derive(Clone, Debug)]
 815pub struct CharEscapeIter {
 816    inner: EscapeIterInner,
 817}
 818
 819#[derive(Clone, Debug)]
 820enum EscapeIterInner {
 821    Printable(Once<char>),
 822    Escaped(EscapeIterEscaped),
 823}
 824
 825impl Display for EscapeIterInner {
 826    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
 827        match self {
 828            EscapeIterInner::Printable(char) => char.clone().try_for_each(|ch| f.write_char(ch)),
 829            EscapeIterInner::Escaped(escaped) => Display::fmt(escaped, f),
 830        }
 831    }
 832}
 833
 834impl CharEscapeIter {
 835    #[inline]
 836    fn printable(char: char) -> Self {
 837        CharEscapeIter {
 838            inner: EscapeIterInner::Printable(once(char)),
 839        }
 840    }
 841
 842    /// # Safety
 843    /// Assumes that the input byte array is ASCII
 844    #[inline]
 845    unsafe fn new<const N: usize>(bytes: [u8; N]) -> Self {
 846        assert!(N <= 10, "Too many bytes in escape iter");
 847        let mut ten_bytes = [0; 10];
 848        ten_bytes[..N].copy_from_slice(&bytes);
 849        CharEscapeIter {
 850            inner: EscapeIterInner::Escaped(EscapeIterEscaped {
 851                bytes: ten_bytes,
 852                range: 0..N,
 853            }),
 854        }
 855    }
 856}
 857
 858impl Iterator for CharEscapeIter {
 859    type Item = char;
 860
 861    #[inline]
 862    fn next(&mut self) -> Option<Self::Item> {
 863        match &mut self.inner {
 864            EscapeIterInner::Printable(printable) => printable.next(),
 865            EscapeIterInner::Escaped(escaped) => escaped.next(),
 866        }
 867    }
 868
 869    #[inline]
 870    fn size_hint(&self) -> (usize, Option<usize>) {
 871        match &self.inner {
 872            EscapeIterInner::Printable(printable) => printable.size_hint(),
 873            EscapeIterInner::Escaped(escaped) => escaped.size_hint(),
 874        }
 875    }
 876}
 877
 878impl ExactSizeIterator for CharEscapeIter {
 879    #[inline]
 880    fn len(&self) -> usize {
 881        match &self.inner {
 882            EscapeIterInner::Printable(printable) => printable.len(),
 883            EscapeIterInner::Escaped(escaped) => escaped.len(),
 884        }
 885    }
 886}
 887
 888impl FusedIterator for CharEscapeIter {}
 889
 890impl Display for CharEscapeIter {
 891    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
 892        Display::fmt(&self.inner, f)
 893    }
 894}
 895
 896#[derive(Clone, Debug)]
 897struct EscapeIterEscaped {
 898    // SAFETY: all values must be in the ASCII range
 899    bytes: [u8; 10],
 900    // SAFETY: range must not be out of bounds for length 10
 901    range: Range<usize>,
 902}
 903
 904impl Iterator for EscapeIterEscaped {
 905    type Item = char;
 906
 907    #[inline]
 908    fn next(&mut self) -> Option<Self::Item> {
 909        self.range.next().map(|index| unsafe {
 910            // SAFETY: the range is never out of bounds for length 10
 911            char::from(*self.bytes.get_unchecked(index))
 912        })
 913    }
 914
 915    #[inline]
 916    fn size_hint(&self) -> (usize, Option<usize>) {
 917        self.range.size_hint()
 918    }
 919
 920    #[inline]
 921    fn count(self) -> usize {
 922        self.range.len()
 923    }
 924}
 925
 926impl ExactSizeIterator for EscapeIterEscaped {
 927    #[inline]
 928    fn len(&self) -> usize {
 929        self.range.len()
 930    }
 931}
 932
 933impl FusedIterator for EscapeIterEscaped {}
 934
 935impl Display for EscapeIterEscaped {
 936    #[inline]
 937    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
 938        let str = unsafe {
 939            // SAFETY: all bytes are in ASCII range, and range is in bounds for length 10
 940            std::str::from_utf8_unchecked(self.bytes.get_unchecked(self.range.clone()))
 941        };
 942        f.write_str(str)
 943    }
 944}
 945
 946pub type ToLowercase = CharIterDelegate<std::char::ToLowercase>;
 947pub type ToUppercase = CharIterDelegate<std::char::ToUppercase>;
 948
 949#[derive(Debug, Clone)]
 950pub struct CharIterDelegate<I>(CharIterDelegateInner<I>);
 951
 952impl<I> CharIterDelegate<I> {
 953    #[inline]
 954    fn char(iter: I) -> CharIterDelegate<I> {
 955        CharIterDelegate(CharIterDelegateInner::Char(iter))
 956    }
 957
 958    #[inline]
 959    fn invalid(code_point: JavaCodePoint) -> CharIterDelegate<I> {
 960        CharIterDelegate(CharIterDelegateInner::Invalid(Some(code_point).into_iter()))
 961    }
 962}
 963
 964#[derive(Debug, Clone)]
 965enum CharIterDelegateInner<I> {
 966    Char(I),
 967    Invalid(std::option::IntoIter<JavaCodePoint>),
 968}
 969
 970impl<I> Iterator for CharIterDelegate<I>
 971where
 972    I: Iterator<Item = char>,
 973{
 974    type Item = JavaCodePoint;
 975
 976    #[inline]
 977    fn next(&mut self) -> Option<Self::Item> {
 978        match &mut self.0 {
 979            CharIterDelegateInner::Char(char_iter) => {
 980                char_iter.next().map(JavaCodePoint::from_char)
 981            }
 982            CharIterDelegateInner::Invalid(code_point) => code_point.next(),
 983        }
 984    }
 985
 986    #[inline]
 987    fn size_hint(&self) -> (usize, Option<usize>) {
 988        match &self.0 {
 989            CharIterDelegateInner::Char(char_iter) => char_iter.size_hint(),
 990            CharIterDelegateInner::Invalid(code_point) => code_point.size_hint(),
 991        }
 992    }
 993}
 994
 995impl<I> DoubleEndedIterator for CharIterDelegate<I>
 996where
 997    I: Iterator<Item = char> + DoubleEndedIterator,
 998{
 999    #[inline]
1000    fn next_back(&mut self) -> Option<Self::Item> {
1001        match &mut self.0 {
1002            CharIterDelegateInner::Char(char_iter) => {
1003                char_iter.next_back().map(JavaCodePoint::from_char)
1004            }
1005            CharIterDelegateInner::Invalid(code_point) => code_point.next_back(),
1006        }
1007    }
1008}
1009
1010impl<I> ExactSizeIterator for CharIterDelegate<I> where I: Iterator<Item = char> + ExactSizeIterator {}
1011
1012impl<I> FusedIterator for CharIterDelegate<I> where I: Iterator<Item = char> + FusedIterator {}