this repo has no description
at main 1012 lines 28 kB view raw
1use std::char::ParseCharError; 2use std::cmp::Ordering; 3use std::fmt; 4use std::fmt::{Debug, Display, Formatter, Write}; 5use std::hash::{Hash, Hasher}; 6use std::iter::{once, FusedIterator, Once}; 7use std::ops::Range; 8use std::str::FromStr; 9 10use crate::validations::{TAG_CONT, TAG_FOUR_B, TAG_THREE_B, TAG_TWO_B}; 11 12// JavaCodePoint is guaranteed to have the same repr as a u32, with valid values 13// of between 0 and 0x10FFFF, the same as a unicode code point. Surrogate code 14// points are valid values of this type. 15#[derive(Copy, Clone, PartialEq, Eq)] 16#[repr(C)] 17pub struct JavaCodePoint { 18 #[cfg(target_endian = "little")] 19 lower: u16, 20 upper: SeventeenValues, 21 #[cfg(target_endian = "big")] 22 lower: u16, 23} 24 25#[repr(u16)] 26#[derive(Copy, Clone, PartialEq, Eq)] 27#[allow(unused)] 28enum SeventeenValues { 29 V0, 30 V1, 31 V2, 32 V3, 33 V4, 34 V5, 35 V6, 36 V7, 37 V8, 38 V9, 39 V10, 40 V11, 41 V12, 42 V13, 43 V14, 44 V15, 45 V16, 46} 47 48impl JavaCodePoint { 49 pub const MAX: JavaCodePoint = JavaCodePoint::from_char(char::MAX); 50 pub const REPLACEMENT_CHARACTER: JavaCodePoint = 51 JavaCodePoint::from_char(char::REPLACEMENT_CHARACTER); 52 53 /// See [`char::from_u32`] 54 /// 55 /// ``` 56 /// # use java_string::JavaCodePoint; 57 /// let c = JavaCodePoint::from_u32(0x2764); 58 /// assert_eq!(Some(JavaCodePoint::from_char('❤')), c); 59 /// 60 /// assert_eq!(None, JavaCodePoint::from_u32(0x110000)); 61 /// ``` 62 #[inline] 63 #[must_use] 64 pub const fn from_u32(i: u32) -> Option<JavaCodePoint> { 65 if i <= 0x10ffff { 66 unsafe { Some(Self::from_u32_unchecked(i)) } 67 } else { 68 None 69 } 70 } 71 72 /// # Safety 73 /// The argument must be within the valid Unicode code point range of 0 to 74 /// 0x10FFFF inclusive. Surrogate code points are allowed. 75 #[inline] 76 #[must_use] 77 pub const unsafe fn from_u32_unchecked(i: u32) -> JavaCodePoint { 78 // SAFETY: the caller checks that the argument can be represented by this type 79 std::mem::transmute(i) 80 } 81 82 /// Converts a `char` to a code point. 83 #[inline] 84 #[must_use] 85 pub const fn from_char(char: char) -> JavaCodePoint { 86 unsafe { 87 // SAFETY: all chars are valid code points 88 JavaCodePoint::from_u32_unchecked(char as u32) 89 } 90 } 91 92 /// Converts this code point to a `u32`. 93 /// 94 /// ``` 95 /// # use java_string::JavaCodePoint; 96 /// assert_eq!(65, JavaCodePoint::from_char('A').as_u32()); 97 /// assert_eq!(0xd800, JavaCodePoint::from_u32(0xd800).unwrap().as_u32()); 98 /// ``` 99 #[inline] 100 #[must_use] 101 pub const fn as_u32(self) -> u32 { 102 unsafe { 103 // SAFETY: JavaCodePoint has the same repr as a u32 104 let result = std::mem::transmute::<Self, u32>(self); 105 106 if result > 0x10ffff { 107 // SAFETY: JavaCodePoint can never have a value > 0x10FFFF. 108 // This statement may allow the optimizer to remove branches in the calling code 109 // associated with out of bounds chars. 110 std::hint::unreachable_unchecked(); 111 } 112 113 result 114 } 115 } 116 117 /// Converts this code point to a `char`. 118 /// 119 /// ``` 120 /// # use java_string::JavaCodePoint; 121 /// assert_eq!(Some('a'), JavaCodePoint::from_char('a').as_char()); 122 /// assert_eq!(None, JavaCodePoint::from_u32(0xd800).unwrap().as_char()); 123 /// ``` 124 #[inline] 125 #[must_use] 126 pub const fn as_char(self) -> Option<char> { 127 char::from_u32(self.as_u32()) 128 } 129 130 /// # Safety 131 /// The caller must ensure that this code point is not a surrogate code 132 /// point. 133 #[inline] 134 #[must_use] 135 pub unsafe fn as_char_unchecked(self) -> char { 136 char::from_u32_unchecked(self.as_u32()) 137 } 138 139 /// See [`char::encode_utf16`] 140 /// 141 /// ``` 142 /// # use java_string::JavaCodePoint; 143 /// assert_eq!( 144 /// 2, 145 /// JavaCodePoint::from_char('𝕊') 146 /// .encode_utf16(&mut [0; 2]) 147 /// .len() 148 /// ); 149 /// assert_eq!( 150 /// 1, 151 /// JavaCodePoint::from_u32(0xd800) 152 /// .unwrap() 153 /// .encode_utf16(&mut [0; 2]) 154 /// .len() 155 /// ); 156 /// ``` 157 /// ```should_panic 158 /// # use java_string::JavaCodePoint; 159 /// // Should panic 160 /// JavaCodePoint::from_char('𝕊').encode_utf16(&mut [0; 1]); 161 /// ``` 162 #[inline] 163 pub fn encode_utf16(self, dst: &mut [u16]) -> &mut [u16] { 164 if let Some(char) = self.as_char() { 165 char.encode_utf16(dst) 166 } else { 167 dst[0] = self.as_u32() as u16; 168 &mut dst[..1] 169 } 170 } 171 172 /// Encodes this `JavaCodePoint` into semi UTF-8, that is, UTF-8 with 173 /// surrogate code points. See also [`char::encode_utf8`]. 174 /// 175 /// ``` 176 /// # use java_string::JavaCodePoint; 177 /// assert_eq!( 178 /// 2, 179 /// JavaCodePoint::from_char('ß') 180 /// .encode_semi_utf8(&mut [0; 4]) 181 /// .len() 182 /// ); 183 /// assert_eq!( 184 /// 3, 185 /// JavaCodePoint::from_u32(0xd800) 186 /// .unwrap() 187 /// .encode_semi_utf8(&mut [0; 4]) 188 /// .len() 189 /// ); 190 /// ``` 191 /// ```should_panic 192 /// # use java_string::JavaCodePoint; 193 /// // Should panic 194 /// JavaCodePoint::from_char('ß').encode_semi_utf8(&mut [0; 1]); 195 /// ``` 196 #[inline] 197 pub fn encode_semi_utf8(self, dst: &mut [u8]) -> &mut [u8] { 198 let len = self.len_utf8(); 199 let code = self.as_u32(); 200 match (len, &mut dst[..]) { 201 (1, [a, ..]) => { 202 *a = code as u8; 203 } 204 (2, [a, b, ..]) => { 205 *a = ((code >> 6) & 0x1f) as u8 | TAG_TWO_B; 206 *b = (code & 0x3f) as u8 | TAG_CONT; 207 } 208 (3, [a, b, c, ..]) => { 209 *a = ((code >> 12) & 0x0f) as u8 | TAG_THREE_B; 210 *b = ((code >> 6) & 0x3f) as u8 | TAG_CONT; 211 *c = (code & 0x3f) as u8 | TAG_CONT; 212 } 213 (4, [a, b, c, d, ..]) => { 214 *a = ((code >> 18) & 0x07) as u8 | TAG_FOUR_B; 215 *b = ((code >> 12) & 0x3f) as u8 | TAG_CONT; 216 *c = ((code >> 6) & 0x3f) as u8 | TAG_CONT; 217 *d = (code & 0x3f) as u8 | TAG_CONT; 218 } 219 _ => panic!( 220 "encode_utf8: need {} bytes to encode U+{:X}, but the buffer has {}", 221 len, 222 code, 223 dst.len() 224 ), 225 } 226 &mut dst[..len] 227 } 228 229 /// See [`char::eq_ignore_ascii_case`]. 230 #[inline] 231 pub fn eq_ignore_ascii_case(&self, other: &JavaCodePoint) -> bool { 232 match (self.as_char(), other.as_char()) { 233 (Some(char1), Some(char2)) => char1.eq_ignore_ascii_case(&char2), 234 (None, None) => self == other, 235 _ => false, 236 } 237 } 238 239 /// See [`char::escape_debug`]. 240 /// 241 /// ``` 242 /// # use java_string::JavaCodePoint; 243 /// assert_eq!( 244 /// "a", 245 /// JavaCodePoint::from_char('a').escape_debug().to_string() 246 /// ); 247 /// assert_eq!( 248 /// "\\n", 249 /// JavaCodePoint::from_char('\n').escape_debug().to_string() 250 /// ); 251 /// assert_eq!( 252 /// "\\u{d800}", 253 /// JavaCodePoint::from_u32(0xd800) 254 /// .unwrap() 255 /// .escape_debug() 256 /// .to_string() 257 /// ); 258 /// ``` 259 #[inline] 260 #[must_use] 261 pub fn escape_debug(self) -> CharEscapeIter { 262 self.escape_debug_ext(EscapeDebugExtArgs::ESCAPE_ALL) 263 } 264 265 #[inline] 266 #[must_use] 267 pub(crate) fn escape_debug_ext(self, args: EscapeDebugExtArgs) -> CharEscapeIter { 268 const NULL: u32 = '\0' as u32; 269 const TAB: u32 = '\t' as u32; 270 const CARRIAGE_RETURN: u32 = '\r' as u32; 271 const LINE_FEED: u32 = '\n' as u32; 272 const SINGLE_QUOTE: u32 = '\'' as u32; 273 const DOUBLE_QUOTE: u32 = '"' as u32; 274 const BACKSLASH: u32 = '\\' as u32; 275 276 unsafe { 277 // SAFETY: all characters specified are in ascii range 278 match self.as_u32() { 279 NULL => CharEscapeIter::new([b'\\', b'0']), 280 TAB => CharEscapeIter::new([b'\\', b't']), 281 CARRIAGE_RETURN => CharEscapeIter::new([b'\\', b'r']), 282 LINE_FEED => CharEscapeIter::new([b'\\', b'n']), 283 SINGLE_QUOTE if args.escape_single_quote => CharEscapeIter::new([b'\\', b'\'']), 284 DOUBLE_QUOTE if args.escape_double_quote => CharEscapeIter::new([b'\\', b'"']), 285 BACKSLASH => CharEscapeIter::new([b'\\', b'\\']), 286 _ if self.is_printable() => { 287 // SAFETY: surrogate code points are not printable 288 CharEscapeIter::printable(self.as_char_unchecked()) 289 } 290 _ => self.escape_unicode(), 291 } 292 } 293 } 294 295 #[inline] 296 fn is_printable(self) -> bool { 297 let Some(char) = self.as_char() else { 298 return false; 299 }; 300 if matches!(char, '\\' | '\'' | '"') { 301 return true; 302 } 303 char.escape_debug().next() != Some('\\') 304 } 305 306 /// See [`char::escape_default`]. 307 /// 308 /// ``` 309 /// # use java_string::JavaCodePoint; 310 /// assert_eq!( 311 /// "a", 312 /// JavaCodePoint::from_char('a').escape_default().to_string() 313 /// ); 314 /// assert_eq!( 315 /// "\\n", 316 /// JavaCodePoint::from_char('\n').escape_default().to_string() 317 /// ); 318 /// assert_eq!( 319 /// "\\u{d800}", 320 /// JavaCodePoint::from_u32(0xd800) 321 /// .unwrap() 322 /// .escape_default() 323 /// .to_string() 324 /// ); 325 /// ``` 326 #[inline] 327 #[must_use] 328 pub fn escape_default(self) -> CharEscapeIter { 329 const TAB: u32 = '\t' as u32; 330 const CARRIAGE_RETURN: u32 = '\r' as u32; 331 const LINE_FEED: u32 = '\n' as u32; 332 const SINGLE_QUOTE: u32 = '\'' as u32; 333 const DOUBLE_QUOTE: u32 = '"' as u32; 334 const BACKSLASH: u32 = '\\' as u32; 335 336 unsafe { 337 // SAFETY: all characters specified are in ascii range 338 match self.as_u32() { 339 TAB => CharEscapeIter::new([b'\\', b't']), 340 CARRIAGE_RETURN => CharEscapeIter::new([b'\\', b'r']), 341 LINE_FEED => CharEscapeIter::new([b'\\', b'n']), 342 SINGLE_QUOTE => CharEscapeIter::new([b'\\', b'\'']), 343 DOUBLE_QUOTE => CharEscapeIter::new([b'\\', b'"']), 344 BACKSLASH => CharEscapeIter::new([b'\\', b'\\']), 345 0x20..=0x7e => CharEscapeIter::new([self.as_u32() as u8]), 346 _ => self.escape_unicode(), 347 } 348 } 349 } 350 351 /// See [`char::escape_unicode`]. 352 /// 353 /// ``` 354 /// # use java_string::JavaCodePoint; 355 /// assert_eq!( 356 /// "\\u{2764}", 357 /// JavaCodePoint::from_char('❤').escape_unicode().to_string() 358 /// ); 359 /// assert_eq!( 360 /// "\\u{d800}", 361 /// JavaCodePoint::from_u32(0xd800) 362 /// .unwrap() 363 /// .escape_unicode() 364 /// .to_string() 365 /// ); 366 /// ``` 367 #[inline] 368 #[must_use] 369 pub fn escape_unicode(self) -> CharEscapeIter { 370 let x = self.as_u32(); 371 372 let mut arr = [0; 10]; 373 arr[0] = b'\\'; 374 arr[1] = b'u'; 375 arr[2] = b'{'; 376 377 let number_len = if x == 0 { 378 1 379 } else { 380 ((x.ilog2() >> 2) + 1) as usize 381 }; 382 arr[3 + number_len] = b'}'; 383 for hexit in 0..number_len { 384 arr[2 + number_len - hexit] = b"0123456789abcdef"[((x >> (hexit << 2)) & 15) as usize]; 385 } 386 387 CharEscapeIter { 388 inner: EscapeIterInner::Escaped(EscapeIterEscaped { 389 bytes: arr, 390 range: 0..number_len + 4, 391 }), 392 } 393 } 394 395 /// See [`char::is_alphabetic`]. 396 #[inline] 397 #[must_use] 398 pub fn is_alphabetic(self) -> bool { 399 self.as_char().is_some_and(|char| char.is_alphabetic()) 400 } 401 402 /// See [`char::is_alphanumeric`]. 403 #[inline] 404 #[must_use] 405 pub fn is_alphanumeric(self) -> bool { 406 self.as_char().is_some_and(|char| char.is_alphanumeric()) 407 } 408 409 /// See [`char::is_ascii`]. 410 #[inline] 411 #[must_use] 412 pub fn is_ascii(self) -> bool { 413 self.as_u32() <= 0x7f 414 } 415 416 /// See [`char::is_ascii_alphabetic`]. 417 #[inline] 418 #[must_use] 419 pub const fn is_ascii_alphabetic(self) -> bool { 420 self.is_ascii_lowercase() || self.is_ascii_uppercase() 421 } 422 423 /// See [`char::is_ascii_alphanumeric`]. 424 #[inline] 425 #[must_use] 426 pub const fn is_ascii_alphanumeric(self) -> bool { 427 self.is_ascii_alphabetic() || self.is_ascii_digit() 428 } 429 430 /// See [`char::is_ascii_control`]. 431 #[inline] 432 #[must_use] 433 pub const fn is_ascii_control(self) -> bool { 434 matches!(self.as_u32(), 0..=0x1f | 0x7f) 435 } 436 437 /// See [`char::is_ascii_digit`]. 438 #[inline] 439 #[must_use] 440 pub const fn is_ascii_digit(self) -> bool { 441 const ZERO: u32 = '0' as u32; 442 const NINE: u32 = '9' as u32; 443 matches!(self.as_u32(), ZERO..=NINE) 444 } 445 446 /// See [`char::is_ascii_graphic`]. 447 #[inline] 448 #[must_use] 449 pub const fn is_ascii_graphic(self) -> bool { 450 matches!(self.as_u32(), 0x21..=0x7e) 451 } 452 453 /// See [`char::is_ascii_hexdigit`]. 454 #[inline] 455 #[must_use] 456 pub const fn is_ascii_hexdigit(self) -> bool { 457 const LOWER_A: u32 = 'a' as u32; 458 const LOWER_F: u32 = 'f' as u32; 459 const UPPER_A: u32 = 'A' as u32; 460 const UPPER_F: u32 = 'F' as u32; 461 self.is_ascii_digit() || matches!(self.as_u32(), (LOWER_A..=LOWER_F) | (UPPER_A..=UPPER_F)) 462 } 463 464 /// See [`char::is_ascii_lowercase`]. 465 #[inline] 466 #[must_use] 467 pub const fn is_ascii_lowercase(self) -> bool { 468 const A: u32 = 'a' as u32; 469 const Z: u32 = 'z' as u32; 470 matches!(self.as_u32(), A..=Z) 471 } 472 473 /// See [`char::is_ascii_octdigit`]. 474 #[inline] 475 #[must_use] 476 pub const fn is_ascii_octdigit(self) -> bool { 477 const ZERO: u32 = '0' as u32; 478 const SEVEN: u32 = '7' as u32; 479 matches!(self.as_u32(), ZERO..=SEVEN) 480 } 481 482 /// See [`char::is_ascii_punctuation`]. 483 #[inline] 484 #[must_use] 485 pub const fn is_ascii_punctuation(self) -> bool { 486 matches!( 487 self.as_u32(), 488 (0x21..=0x2f) | (0x3a..=0x40) | (0x5b..=0x60) | (0x7b..=0x7e) 489 ) 490 } 491 492 /// See [`char::is_ascii_uppercase`]. 493 #[inline] 494 #[must_use] 495 pub const fn is_ascii_uppercase(self) -> bool { 496 const A: u32 = 'A' as u32; 497 const Z: u32 = 'Z' as u32; 498 matches!(self.as_u32(), A..=Z) 499 } 500 501 /// See [`char::is_ascii_whitespace`]. 502 #[inline] 503 #[must_use] 504 pub const fn is_ascii_whitespace(self) -> bool { 505 const SPACE: u32 = ' ' as u32; 506 const HORIZONTAL_TAB: u32 = '\t' as u32; 507 const LINE_FEED: u32 = '\n' as u32; 508 const FORM_FEED: u32 = 0xc; 509 const CARRIAGE_RETURN: u32 = '\r' as u32; 510 matches!( 511 self.as_u32(), 512 SPACE | HORIZONTAL_TAB | LINE_FEED | FORM_FEED | CARRIAGE_RETURN 513 ) 514 } 515 516 /// See [`char::is_control`]. 517 #[inline] 518 #[must_use] 519 pub fn is_control(self) -> bool { 520 self.as_char().is_some_and(|char| char.is_control()) 521 } 522 523 /// See [`char::is_digit`]. 524 #[inline] 525 #[must_use] 526 pub fn is_digit(self, radix: u32) -> bool { 527 self.to_digit(radix).is_some() 528 } 529 530 /// See [`char::is_lowercase`]. 531 #[inline] 532 #[must_use] 533 pub fn is_lowercase(self) -> bool { 534 self.as_char().is_some_and(|char| char.is_lowercase()) 535 } 536 537 /// See [`char::is_numeric`]. 538 #[inline] 539 #[must_use] 540 pub fn is_numeric(self) -> bool { 541 self.as_char().is_some_and(|char| char.is_numeric()) 542 } 543 544 /// See [`char::is_uppercase`]. 545 #[inline] 546 #[must_use] 547 pub fn is_uppercase(self) -> bool { 548 self.as_char().is_some_and(|char| char.is_uppercase()) 549 } 550 551 /// See [`char::is_whitespace`]. 552 #[inline] 553 #[must_use] 554 pub fn is_whitespace(self) -> bool { 555 self.as_char().is_some_and(|char| char.is_whitespace()) 556 } 557 558 /// See [`char::len_utf16`]. Surrogate code points return 1. 559 /// 560 /// ``` 561 /// # use java_string::JavaCodePoint; 562 /// 563 /// let n = JavaCodePoint::from_char('ß').len_utf16(); 564 /// assert_eq!(n, 1); 565 /// 566 /// let len = JavaCodePoint::from_char('💣').len_utf16(); 567 /// assert_eq!(len, 2); 568 /// 569 /// assert_eq!(1, JavaCodePoint::from_u32(0xd800).unwrap().len_utf16()); 570 /// ``` 571 #[inline] 572 #[must_use] 573 pub const fn len_utf16(self) -> usize { 574 if let Some(char) = self.as_char() { 575 char.len_utf16() 576 } else { 577 1 // invalid code points are encoded as 1 utf16 code point anyway 578 } 579 } 580 581 /// See [`char::len_utf8`]. Surrogate code points return 3. 582 /// 583 /// ``` 584 /// # use java_string::JavaCodePoint; 585 /// 586 /// let len = JavaCodePoint::from_char('A').len_utf8(); 587 /// assert_eq!(len, 1); 588 /// 589 /// let len = JavaCodePoint::from_char('ß').len_utf8(); 590 /// assert_eq!(len, 2); 591 /// 592 /// let len = JavaCodePoint::from_char('ℝ').len_utf8(); 593 /// assert_eq!(len, 3); 594 /// 595 /// let len = JavaCodePoint::from_char('💣').len_utf8(); 596 /// assert_eq!(len, 4); 597 /// 598 /// let len = JavaCodePoint::from_u32(0xd800).unwrap().len_utf8(); 599 /// assert_eq!(len, 3); 600 /// ``` 601 #[inline] 602 #[must_use] 603 pub const fn len_utf8(self) -> usize { 604 if let Some(char) = self.as_char() { 605 char.len_utf8() 606 } else { 607 3 // invalid code points are all length 3 in semi-valid utf8 608 } 609 } 610 611 /// See [`char::make_ascii_lowercase`]. 612 #[inline] 613 pub fn make_ascii_lowercase(&mut self) { 614 *self = self.to_ascii_lowercase(); 615 } 616 617 /// See [`char::make_ascii_uppercase`]. 618 #[inline] 619 pub fn make_ascii_uppercase(&mut self) { 620 *self = self.to_ascii_uppercase(); 621 } 622 623 /// See [`char::to_ascii_lowercase`]. 624 /// 625 /// ``` 626 /// # use java_string::JavaCodePoint; 627 /// 628 /// let ascii = JavaCodePoint::from_char('A'); 629 /// let non_ascii = JavaCodePoint::from_char('❤'); 630 /// 631 /// assert_eq!('a', ascii.to_ascii_lowercase()); 632 /// assert_eq!('❤', non_ascii.to_ascii_lowercase()); 633 /// ``` 634 #[inline] 635 #[must_use] 636 pub const fn to_ascii_lowercase(self) -> JavaCodePoint { 637 if self.is_ascii_uppercase() { 638 unsafe { 639 // SAFETY: all lowercase chars are valid chars 640 Self::from_u32_unchecked(self.as_u32() + 32) 641 } 642 } else { 643 self 644 } 645 } 646 647 /// See [`char::to_ascii_uppercase`]. 648 /// 649 /// ``` 650 /// # use java_string::JavaCodePoint; 651 /// 652 /// let ascii = JavaCodePoint::from_char('a'); 653 /// let non_ascii = JavaCodePoint::from_char('❤'); 654 /// 655 /// assert_eq!('A', ascii.to_ascii_uppercase()); 656 /// assert_eq!('❤', non_ascii.to_ascii_uppercase()); 657 /// ``` 658 #[inline] 659 #[must_use] 660 pub const fn to_ascii_uppercase(self) -> JavaCodePoint { 661 if self.is_ascii_lowercase() { 662 unsafe { 663 // SAFETY: all uppercase chars are valid chars 664 Self::from_u32_unchecked(self.as_u32() - 32) 665 } 666 } else { 667 self 668 } 669 } 670 671 /// See [`char::to_digit`]. 672 #[inline] 673 #[must_use] 674 pub const fn to_digit(self, radix: u32) -> Option<u32> { 675 if let Some(char) = self.as_char() { 676 char.to_digit(radix) 677 } else { 678 None 679 } 680 } 681 682 /// See [`char::to_lowercase`]. 683 #[inline] 684 #[must_use] 685 pub fn to_lowercase(self) -> ToLowercase { 686 match self.as_char() { 687 Some(char) => ToLowercase::char(char.to_lowercase()), 688 None => ToLowercase::invalid(self), 689 } 690 } 691 692 /// See [`char::to_uppercase`]. 693 #[inline] 694 #[must_use] 695 pub fn to_uppercase(self) -> ToUppercase { 696 match self.as_char() { 697 Some(char) => ToUppercase::char(char.to_uppercase()), 698 None => ToUppercase::invalid(self), 699 } 700 } 701} 702 703impl Debug for JavaCodePoint { 704 fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { 705 f.write_char('\'')?; 706 for c in self.escape_debug_ext(EscapeDebugExtArgs { 707 escape_single_quote: true, 708 escape_double_quote: false, 709 }) { 710 f.write_char(c)?; 711 } 712 f.write_char('\'') 713 } 714} 715 716impl Default for JavaCodePoint { 717 #[inline] 718 fn default() -> Self { 719 JavaCodePoint::from_char('\0') 720 } 721} 722 723impl Display for JavaCodePoint { 724 #[inline] 725 fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { 726 Display::fmt(&self.as_char().unwrap_or(char::REPLACEMENT_CHARACTER), f) 727 } 728} 729 730impl From<JavaCodePoint> for u32 { 731 #[inline] 732 fn from(value: JavaCodePoint) -> Self { 733 value.as_u32() 734 } 735} 736 737impl From<u8> for JavaCodePoint { 738 #[inline] 739 fn from(value: u8) -> Self { 740 JavaCodePoint::from_char(char::from(value)) 741 } 742} 743 744impl FromStr for JavaCodePoint { 745 type Err = ParseCharError; 746 747 #[inline] 748 fn from_str(s: &str) -> Result<Self, Self::Err> { 749 char::from_str(s).map(JavaCodePoint::from_char) 750 } 751} 752 753impl Hash for JavaCodePoint { 754 #[inline] 755 fn hash<H: Hasher>(&self, state: &mut H) { 756 self.as_u32().hash(state) 757 } 758} 759 760impl Ord for JavaCodePoint { 761 #[inline] 762 fn cmp(&self, other: &Self) -> Ordering { 763 self.as_u32().cmp(&other.as_u32()) 764 } 765} 766 767impl PartialOrd for JavaCodePoint { 768 #[inline] 769 fn partial_cmp(&self, other: &Self) -> Option<Ordering> { 770 Some(self.cmp(other)) 771 } 772} 773 774impl PartialOrd<char> for JavaCodePoint { 775 #[inline] 776 fn partial_cmp(&self, other: &char) -> Option<Ordering> { 777 self.partial_cmp(&JavaCodePoint::from_char(*other)) 778 } 779} 780 781impl PartialOrd<JavaCodePoint> for char { 782 #[inline] 783 fn partial_cmp(&self, other: &JavaCodePoint) -> Option<Ordering> { 784 JavaCodePoint::from_char(*self).partial_cmp(other) 785 } 786} 787 788impl PartialEq<char> for JavaCodePoint { 789 #[inline] 790 fn eq(&self, other: &char) -> bool { 791 self == &JavaCodePoint::from_char(*other) 792 } 793} 794 795impl PartialEq<JavaCodePoint> for char { 796 #[inline] 797 fn eq(&self, other: &JavaCodePoint) -> bool { 798 &JavaCodePoint::from_char(*self) == other 799 } 800} 801 802pub(crate) struct EscapeDebugExtArgs { 803 pub(crate) escape_single_quote: bool, 804 pub(crate) escape_double_quote: bool, 805} 806 807impl EscapeDebugExtArgs { 808 pub(crate) const ESCAPE_ALL: Self = Self { 809 escape_single_quote: true, 810 escape_double_quote: true, 811 }; 812} 813 814#[derive(Clone, Debug)] 815pub struct CharEscapeIter { 816 inner: EscapeIterInner, 817} 818 819#[derive(Clone, Debug)] 820enum EscapeIterInner { 821 Printable(Once<char>), 822 Escaped(EscapeIterEscaped), 823} 824 825impl Display for EscapeIterInner { 826 fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { 827 match self { 828 EscapeIterInner::Printable(char) => char.clone().try_for_each(|ch| f.write_char(ch)), 829 EscapeIterInner::Escaped(escaped) => Display::fmt(escaped, f), 830 } 831 } 832} 833 834impl CharEscapeIter { 835 #[inline] 836 fn printable(char: char) -> Self { 837 CharEscapeIter { 838 inner: EscapeIterInner::Printable(once(char)), 839 } 840 } 841 842 /// # Safety 843 /// Assumes that the input byte array is ASCII 844 #[inline] 845 unsafe fn new<const N: usize>(bytes: [u8; N]) -> Self { 846 assert!(N <= 10, "Too many bytes in escape iter"); 847 let mut ten_bytes = [0; 10]; 848 ten_bytes[..N].copy_from_slice(&bytes); 849 CharEscapeIter { 850 inner: EscapeIterInner::Escaped(EscapeIterEscaped { 851 bytes: ten_bytes, 852 range: 0..N, 853 }), 854 } 855 } 856} 857 858impl Iterator for CharEscapeIter { 859 type Item = char; 860 861 #[inline] 862 fn next(&mut self) -> Option<Self::Item> { 863 match &mut self.inner { 864 EscapeIterInner::Printable(printable) => printable.next(), 865 EscapeIterInner::Escaped(escaped) => escaped.next(), 866 } 867 } 868 869 #[inline] 870 fn size_hint(&self) -> (usize, Option<usize>) { 871 match &self.inner { 872 EscapeIterInner::Printable(printable) => printable.size_hint(), 873 EscapeIterInner::Escaped(escaped) => escaped.size_hint(), 874 } 875 } 876} 877 878impl ExactSizeIterator for CharEscapeIter { 879 #[inline] 880 fn len(&self) -> usize { 881 match &self.inner { 882 EscapeIterInner::Printable(printable) => printable.len(), 883 EscapeIterInner::Escaped(escaped) => escaped.len(), 884 } 885 } 886} 887 888impl FusedIterator for CharEscapeIter {} 889 890impl Display for CharEscapeIter { 891 fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { 892 Display::fmt(&self.inner, f) 893 } 894} 895 896#[derive(Clone, Debug)] 897struct EscapeIterEscaped { 898 // SAFETY: all values must be in the ASCII range 899 bytes: [u8; 10], 900 // SAFETY: range must not be out of bounds for length 10 901 range: Range<usize>, 902} 903 904impl Iterator for EscapeIterEscaped { 905 type Item = char; 906 907 #[inline] 908 fn next(&mut self) -> Option<Self::Item> { 909 self.range.next().map(|index| unsafe { 910 // SAFETY: the range is never out of bounds for length 10 911 char::from(*self.bytes.get_unchecked(index)) 912 }) 913 } 914 915 #[inline] 916 fn size_hint(&self) -> (usize, Option<usize>) { 917 self.range.size_hint() 918 } 919 920 #[inline] 921 fn count(self) -> usize { 922 self.range.len() 923 } 924} 925 926impl ExactSizeIterator for EscapeIterEscaped { 927 #[inline] 928 fn len(&self) -> usize { 929 self.range.len() 930 } 931} 932 933impl FusedIterator for EscapeIterEscaped {} 934 935impl Display for EscapeIterEscaped { 936 #[inline] 937 fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { 938 let str = unsafe { 939 // SAFETY: all bytes are in ASCII range, and range is in bounds for length 10 940 std::str::from_utf8_unchecked(self.bytes.get_unchecked(self.range.clone())) 941 }; 942 f.write_str(str) 943 } 944} 945 946pub type ToLowercase = CharIterDelegate<std::char::ToLowercase>; 947pub type ToUppercase = CharIterDelegate<std::char::ToUppercase>; 948 949#[derive(Debug, Clone)] 950pub struct CharIterDelegate<I>(CharIterDelegateInner<I>); 951 952impl<I> CharIterDelegate<I> { 953 #[inline] 954 fn char(iter: I) -> CharIterDelegate<I> { 955 CharIterDelegate(CharIterDelegateInner::Char(iter)) 956 } 957 958 #[inline] 959 fn invalid(code_point: JavaCodePoint) -> CharIterDelegate<I> { 960 CharIterDelegate(CharIterDelegateInner::Invalid(Some(code_point).into_iter())) 961 } 962} 963 964#[derive(Debug, Clone)] 965enum CharIterDelegateInner<I> { 966 Char(I), 967 Invalid(std::option::IntoIter<JavaCodePoint>), 968} 969 970impl<I> Iterator for CharIterDelegate<I> 971where 972 I: Iterator<Item = char>, 973{ 974 type Item = JavaCodePoint; 975 976 #[inline] 977 fn next(&mut self) -> Option<Self::Item> { 978 match &mut self.0 { 979 CharIterDelegateInner::Char(char_iter) => { 980 char_iter.next().map(JavaCodePoint::from_char) 981 } 982 CharIterDelegateInner::Invalid(code_point) => code_point.next(), 983 } 984 } 985 986 #[inline] 987 fn size_hint(&self) -> (usize, Option<usize>) { 988 match &self.0 { 989 CharIterDelegateInner::Char(char_iter) => char_iter.size_hint(), 990 CharIterDelegateInner::Invalid(code_point) => code_point.size_hint(), 991 } 992 } 993} 994 995impl<I> DoubleEndedIterator for CharIterDelegate<I> 996where 997 I: Iterator<Item = char> + DoubleEndedIterator, 998{ 999 #[inline] 1000 fn next_back(&mut self) -> Option<Self::Item> { 1001 match &mut self.0 { 1002 CharIterDelegateInner::Char(char_iter) => { 1003 char_iter.next_back().map(JavaCodePoint::from_char) 1004 } 1005 CharIterDelegateInner::Invalid(code_point) => code_point.next_back(), 1006 } 1007 } 1008} 1009 1010impl<I> ExactSizeIterator for CharIterDelegate<I> where I: Iterator<Item = char> + ExactSizeIterator {} 1011 1012impl<I> FusedIterator for CharIterDelegate<I> where I: Iterator<Item = char> + FusedIterator {}