this repo has no description
1use std::char::ParseCharError;
2use std::cmp::Ordering;
3use std::fmt;
4use std::fmt::{Debug, Display, Formatter, Write};
5use std::hash::{Hash, Hasher};
6use std::iter::{once, FusedIterator, Once};
7use std::ops::Range;
8use std::str::FromStr;
9
10use crate::validations::{TAG_CONT, TAG_FOUR_B, TAG_THREE_B, TAG_TWO_B};
11
12// JavaCodePoint is guaranteed to have the same repr as a u32, with valid values
13// of between 0 and 0x10FFFF, the same as a unicode code point. Surrogate code
14// points are valid values of this type.
15#[derive(Copy, Clone, PartialEq, Eq)]
16#[repr(C)]
17pub struct JavaCodePoint {
18 #[cfg(target_endian = "little")]
19 lower: u16,
20 upper: SeventeenValues,
21 #[cfg(target_endian = "big")]
22 lower: u16,
23}
24
25#[repr(u16)]
26#[derive(Copy, Clone, PartialEq, Eq)]
27#[allow(unused)]
28enum SeventeenValues {
29 V0,
30 V1,
31 V2,
32 V3,
33 V4,
34 V5,
35 V6,
36 V7,
37 V8,
38 V9,
39 V10,
40 V11,
41 V12,
42 V13,
43 V14,
44 V15,
45 V16,
46}
47
48impl JavaCodePoint {
49 pub const MAX: JavaCodePoint = JavaCodePoint::from_char(char::MAX);
50 pub const REPLACEMENT_CHARACTER: JavaCodePoint =
51 JavaCodePoint::from_char(char::REPLACEMENT_CHARACTER);
52
53 /// See [`char::from_u32`]
54 ///
55 /// ```
56 /// # use java_string::JavaCodePoint;
57 /// let c = JavaCodePoint::from_u32(0x2764);
58 /// assert_eq!(Some(JavaCodePoint::from_char('❤')), c);
59 ///
60 /// assert_eq!(None, JavaCodePoint::from_u32(0x110000));
61 /// ```
62 #[inline]
63 #[must_use]
64 pub const fn from_u32(i: u32) -> Option<JavaCodePoint> {
65 if i <= 0x10ffff {
66 unsafe { Some(Self::from_u32_unchecked(i)) }
67 } else {
68 None
69 }
70 }
71
72 /// # Safety
73 /// The argument must be within the valid Unicode code point range of 0 to
74 /// 0x10FFFF inclusive. Surrogate code points are allowed.
75 #[inline]
76 #[must_use]
77 pub const unsafe fn from_u32_unchecked(i: u32) -> JavaCodePoint {
78 // SAFETY: the caller checks that the argument can be represented by this type
79 std::mem::transmute(i)
80 }
81
82 /// Converts a `char` to a code point.
83 #[inline]
84 #[must_use]
85 pub const fn from_char(char: char) -> JavaCodePoint {
86 unsafe {
87 // SAFETY: all chars are valid code points
88 JavaCodePoint::from_u32_unchecked(char as u32)
89 }
90 }
91
92 /// Converts this code point to a `u32`.
93 ///
94 /// ```
95 /// # use java_string::JavaCodePoint;
96 /// assert_eq!(65, JavaCodePoint::from_char('A').as_u32());
97 /// assert_eq!(0xd800, JavaCodePoint::from_u32(0xd800).unwrap().as_u32());
98 /// ```
99 #[inline]
100 #[must_use]
101 pub const fn as_u32(self) -> u32 {
102 unsafe {
103 // SAFETY: JavaCodePoint has the same repr as a u32
104 let result = std::mem::transmute::<Self, u32>(self);
105
106 if result > 0x10ffff {
107 // SAFETY: JavaCodePoint can never have a value > 0x10FFFF.
108 // This statement may allow the optimizer to remove branches in the calling code
109 // associated with out of bounds chars.
110 std::hint::unreachable_unchecked();
111 }
112
113 result
114 }
115 }
116
117 /// Converts this code point to a `char`.
118 ///
119 /// ```
120 /// # use java_string::JavaCodePoint;
121 /// assert_eq!(Some('a'), JavaCodePoint::from_char('a').as_char());
122 /// assert_eq!(None, JavaCodePoint::from_u32(0xd800).unwrap().as_char());
123 /// ```
124 #[inline]
125 #[must_use]
126 pub const fn as_char(self) -> Option<char> {
127 char::from_u32(self.as_u32())
128 }
129
130 /// # Safety
131 /// The caller must ensure that this code point is not a surrogate code
132 /// point.
133 #[inline]
134 #[must_use]
135 pub unsafe fn as_char_unchecked(self) -> char {
136 char::from_u32_unchecked(self.as_u32())
137 }
138
139 /// See [`char::encode_utf16`]
140 ///
141 /// ```
142 /// # use java_string::JavaCodePoint;
143 /// assert_eq!(
144 /// 2,
145 /// JavaCodePoint::from_char('𝕊')
146 /// .encode_utf16(&mut [0; 2])
147 /// .len()
148 /// );
149 /// assert_eq!(
150 /// 1,
151 /// JavaCodePoint::from_u32(0xd800)
152 /// .unwrap()
153 /// .encode_utf16(&mut [0; 2])
154 /// .len()
155 /// );
156 /// ```
157 /// ```should_panic
158 /// # use java_string::JavaCodePoint;
159 /// // Should panic
160 /// JavaCodePoint::from_char('𝕊').encode_utf16(&mut [0; 1]);
161 /// ```
162 #[inline]
163 pub fn encode_utf16(self, dst: &mut [u16]) -> &mut [u16] {
164 if let Some(char) = self.as_char() {
165 char.encode_utf16(dst)
166 } else {
167 dst[0] = self.as_u32() as u16;
168 &mut dst[..1]
169 }
170 }
171
172 /// Encodes this `JavaCodePoint` into semi UTF-8, that is, UTF-8 with
173 /// surrogate code points. See also [`char::encode_utf8`].
174 ///
175 /// ```
176 /// # use java_string::JavaCodePoint;
177 /// assert_eq!(
178 /// 2,
179 /// JavaCodePoint::from_char('ß')
180 /// .encode_semi_utf8(&mut [0; 4])
181 /// .len()
182 /// );
183 /// assert_eq!(
184 /// 3,
185 /// JavaCodePoint::from_u32(0xd800)
186 /// .unwrap()
187 /// .encode_semi_utf8(&mut [0; 4])
188 /// .len()
189 /// );
190 /// ```
191 /// ```should_panic
192 /// # use java_string::JavaCodePoint;
193 /// // Should panic
194 /// JavaCodePoint::from_char('ß').encode_semi_utf8(&mut [0; 1]);
195 /// ```
196 #[inline]
197 pub fn encode_semi_utf8(self, dst: &mut [u8]) -> &mut [u8] {
198 let len = self.len_utf8();
199 let code = self.as_u32();
200 match (len, &mut dst[..]) {
201 (1, [a, ..]) => {
202 *a = code as u8;
203 }
204 (2, [a, b, ..]) => {
205 *a = ((code >> 6) & 0x1f) as u8 | TAG_TWO_B;
206 *b = (code & 0x3f) as u8 | TAG_CONT;
207 }
208 (3, [a, b, c, ..]) => {
209 *a = ((code >> 12) & 0x0f) as u8 | TAG_THREE_B;
210 *b = ((code >> 6) & 0x3f) as u8 | TAG_CONT;
211 *c = (code & 0x3f) as u8 | TAG_CONT;
212 }
213 (4, [a, b, c, d, ..]) => {
214 *a = ((code >> 18) & 0x07) as u8 | TAG_FOUR_B;
215 *b = ((code >> 12) & 0x3f) as u8 | TAG_CONT;
216 *c = ((code >> 6) & 0x3f) as u8 | TAG_CONT;
217 *d = (code & 0x3f) as u8 | TAG_CONT;
218 }
219 _ => panic!(
220 "encode_utf8: need {} bytes to encode U+{:X}, but the buffer has {}",
221 len,
222 code,
223 dst.len()
224 ),
225 }
226 &mut dst[..len]
227 }
228
229 /// See [`char::eq_ignore_ascii_case`].
230 #[inline]
231 pub fn eq_ignore_ascii_case(&self, other: &JavaCodePoint) -> bool {
232 match (self.as_char(), other.as_char()) {
233 (Some(char1), Some(char2)) => char1.eq_ignore_ascii_case(&char2),
234 (None, None) => self == other,
235 _ => false,
236 }
237 }
238
239 /// See [`char::escape_debug`].
240 ///
241 /// ```
242 /// # use java_string::JavaCodePoint;
243 /// assert_eq!(
244 /// "a",
245 /// JavaCodePoint::from_char('a').escape_debug().to_string()
246 /// );
247 /// assert_eq!(
248 /// "\\n",
249 /// JavaCodePoint::from_char('\n').escape_debug().to_string()
250 /// );
251 /// assert_eq!(
252 /// "\\u{d800}",
253 /// JavaCodePoint::from_u32(0xd800)
254 /// .unwrap()
255 /// .escape_debug()
256 /// .to_string()
257 /// );
258 /// ```
259 #[inline]
260 #[must_use]
261 pub fn escape_debug(self) -> CharEscapeIter {
262 self.escape_debug_ext(EscapeDebugExtArgs::ESCAPE_ALL)
263 }
264
265 #[inline]
266 #[must_use]
267 pub(crate) fn escape_debug_ext(self, args: EscapeDebugExtArgs) -> CharEscapeIter {
268 const NULL: u32 = '\0' as u32;
269 const TAB: u32 = '\t' as u32;
270 const CARRIAGE_RETURN: u32 = '\r' as u32;
271 const LINE_FEED: u32 = '\n' as u32;
272 const SINGLE_QUOTE: u32 = '\'' as u32;
273 const DOUBLE_QUOTE: u32 = '"' as u32;
274 const BACKSLASH: u32 = '\\' as u32;
275
276 unsafe {
277 // SAFETY: all characters specified are in ascii range
278 match self.as_u32() {
279 NULL => CharEscapeIter::new([b'\\', b'0']),
280 TAB => CharEscapeIter::new([b'\\', b't']),
281 CARRIAGE_RETURN => CharEscapeIter::new([b'\\', b'r']),
282 LINE_FEED => CharEscapeIter::new([b'\\', b'n']),
283 SINGLE_QUOTE if args.escape_single_quote => CharEscapeIter::new([b'\\', b'\'']),
284 DOUBLE_QUOTE if args.escape_double_quote => CharEscapeIter::new([b'\\', b'"']),
285 BACKSLASH => CharEscapeIter::new([b'\\', b'\\']),
286 _ if self.is_printable() => {
287 // SAFETY: surrogate code points are not printable
288 CharEscapeIter::printable(self.as_char_unchecked())
289 }
290 _ => self.escape_unicode(),
291 }
292 }
293 }
294
295 #[inline]
296 fn is_printable(self) -> bool {
297 let Some(char) = self.as_char() else {
298 return false;
299 };
300 if matches!(char, '\\' | '\'' | '"') {
301 return true;
302 }
303 char.escape_debug().next() != Some('\\')
304 }
305
306 /// See [`char::escape_default`].
307 ///
308 /// ```
309 /// # use java_string::JavaCodePoint;
310 /// assert_eq!(
311 /// "a",
312 /// JavaCodePoint::from_char('a').escape_default().to_string()
313 /// );
314 /// assert_eq!(
315 /// "\\n",
316 /// JavaCodePoint::from_char('\n').escape_default().to_string()
317 /// );
318 /// assert_eq!(
319 /// "\\u{d800}",
320 /// JavaCodePoint::from_u32(0xd800)
321 /// .unwrap()
322 /// .escape_default()
323 /// .to_string()
324 /// );
325 /// ```
326 #[inline]
327 #[must_use]
328 pub fn escape_default(self) -> CharEscapeIter {
329 const TAB: u32 = '\t' as u32;
330 const CARRIAGE_RETURN: u32 = '\r' as u32;
331 const LINE_FEED: u32 = '\n' as u32;
332 const SINGLE_QUOTE: u32 = '\'' as u32;
333 const DOUBLE_QUOTE: u32 = '"' as u32;
334 const BACKSLASH: u32 = '\\' as u32;
335
336 unsafe {
337 // SAFETY: all characters specified are in ascii range
338 match self.as_u32() {
339 TAB => CharEscapeIter::new([b'\\', b't']),
340 CARRIAGE_RETURN => CharEscapeIter::new([b'\\', b'r']),
341 LINE_FEED => CharEscapeIter::new([b'\\', b'n']),
342 SINGLE_QUOTE => CharEscapeIter::new([b'\\', b'\'']),
343 DOUBLE_QUOTE => CharEscapeIter::new([b'\\', b'"']),
344 BACKSLASH => CharEscapeIter::new([b'\\', b'\\']),
345 0x20..=0x7e => CharEscapeIter::new([self.as_u32() as u8]),
346 _ => self.escape_unicode(),
347 }
348 }
349 }
350
351 /// See [`char::escape_unicode`].
352 ///
353 /// ```
354 /// # use java_string::JavaCodePoint;
355 /// assert_eq!(
356 /// "\\u{2764}",
357 /// JavaCodePoint::from_char('❤').escape_unicode().to_string()
358 /// );
359 /// assert_eq!(
360 /// "\\u{d800}",
361 /// JavaCodePoint::from_u32(0xd800)
362 /// .unwrap()
363 /// .escape_unicode()
364 /// .to_string()
365 /// );
366 /// ```
367 #[inline]
368 #[must_use]
369 pub fn escape_unicode(self) -> CharEscapeIter {
370 let x = self.as_u32();
371
372 let mut arr = [0; 10];
373 arr[0] = b'\\';
374 arr[1] = b'u';
375 arr[2] = b'{';
376
377 let number_len = if x == 0 {
378 1
379 } else {
380 ((x.ilog2() >> 2) + 1) as usize
381 };
382 arr[3 + number_len] = b'}';
383 for hexit in 0..number_len {
384 arr[2 + number_len - hexit] = b"0123456789abcdef"[((x >> (hexit << 2)) & 15) as usize];
385 }
386
387 CharEscapeIter {
388 inner: EscapeIterInner::Escaped(EscapeIterEscaped {
389 bytes: arr,
390 range: 0..number_len + 4,
391 }),
392 }
393 }
394
395 /// See [`char::is_alphabetic`].
396 #[inline]
397 #[must_use]
398 pub fn is_alphabetic(self) -> bool {
399 self.as_char().is_some_and(|char| char.is_alphabetic())
400 }
401
402 /// See [`char::is_alphanumeric`].
403 #[inline]
404 #[must_use]
405 pub fn is_alphanumeric(self) -> bool {
406 self.as_char().is_some_and(|char| char.is_alphanumeric())
407 }
408
409 /// See [`char::is_ascii`].
410 #[inline]
411 #[must_use]
412 pub fn is_ascii(self) -> bool {
413 self.as_u32() <= 0x7f
414 }
415
416 /// See [`char::is_ascii_alphabetic`].
417 #[inline]
418 #[must_use]
419 pub const fn is_ascii_alphabetic(self) -> bool {
420 self.is_ascii_lowercase() || self.is_ascii_uppercase()
421 }
422
423 /// See [`char::is_ascii_alphanumeric`].
424 #[inline]
425 #[must_use]
426 pub const fn is_ascii_alphanumeric(self) -> bool {
427 self.is_ascii_alphabetic() || self.is_ascii_digit()
428 }
429
430 /// See [`char::is_ascii_control`].
431 #[inline]
432 #[must_use]
433 pub const fn is_ascii_control(self) -> bool {
434 matches!(self.as_u32(), 0..=0x1f | 0x7f)
435 }
436
437 /// See [`char::is_ascii_digit`].
438 #[inline]
439 #[must_use]
440 pub const fn is_ascii_digit(self) -> bool {
441 const ZERO: u32 = '0' as u32;
442 const NINE: u32 = '9' as u32;
443 matches!(self.as_u32(), ZERO..=NINE)
444 }
445
446 /// See [`char::is_ascii_graphic`].
447 #[inline]
448 #[must_use]
449 pub const fn is_ascii_graphic(self) -> bool {
450 matches!(self.as_u32(), 0x21..=0x7e)
451 }
452
453 /// See [`char::is_ascii_hexdigit`].
454 #[inline]
455 #[must_use]
456 pub const fn is_ascii_hexdigit(self) -> bool {
457 const LOWER_A: u32 = 'a' as u32;
458 const LOWER_F: u32 = 'f' as u32;
459 const UPPER_A: u32 = 'A' as u32;
460 const UPPER_F: u32 = 'F' as u32;
461 self.is_ascii_digit() || matches!(self.as_u32(), (LOWER_A..=LOWER_F) | (UPPER_A..=UPPER_F))
462 }
463
464 /// See [`char::is_ascii_lowercase`].
465 #[inline]
466 #[must_use]
467 pub const fn is_ascii_lowercase(self) -> bool {
468 const A: u32 = 'a' as u32;
469 const Z: u32 = 'z' as u32;
470 matches!(self.as_u32(), A..=Z)
471 }
472
473 /// See [`char::is_ascii_octdigit`].
474 #[inline]
475 #[must_use]
476 pub const fn is_ascii_octdigit(self) -> bool {
477 const ZERO: u32 = '0' as u32;
478 const SEVEN: u32 = '7' as u32;
479 matches!(self.as_u32(), ZERO..=SEVEN)
480 }
481
482 /// See [`char::is_ascii_punctuation`].
483 #[inline]
484 #[must_use]
485 pub const fn is_ascii_punctuation(self) -> bool {
486 matches!(
487 self.as_u32(),
488 (0x21..=0x2f) | (0x3a..=0x40) | (0x5b..=0x60) | (0x7b..=0x7e)
489 )
490 }
491
492 /// See [`char::is_ascii_uppercase`].
493 #[inline]
494 #[must_use]
495 pub const fn is_ascii_uppercase(self) -> bool {
496 const A: u32 = 'A' as u32;
497 const Z: u32 = 'Z' as u32;
498 matches!(self.as_u32(), A..=Z)
499 }
500
501 /// See [`char::is_ascii_whitespace`].
502 #[inline]
503 #[must_use]
504 pub const fn is_ascii_whitespace(self) -> bool {
505 const SPACE: u32 = ' ' as u32;
506 const HORIZONTAL_TAB: u32 = '\t' as u32;
507 const LINE_FEED: u32 = '\n' as u32;
508 const FORM_FEED: u32 = 0xc;
509 const CARRIAGE_RETURN: u32 = '\r' as u32;
510 matches!(
511 self.as_u32(),
512 SPACE | HORIZONTAL_TAB | LINE_FEED | FORM_FEED | CARRIAGE_RETURN
513 )
514 }
515
516 /// See [`char::is_control`].
517 #[inline]
518 #[must_use]
519 pub fn is_control(self) -> bool {
520 self.as_char().is_some_and(|char| char.is_control())
521 }
522
523 /// See [`char::is_digit`].
524 #[inline]
525 #[must_use]
526 pub fn is_digit(self, radix: u32) -> bool {
527 self.to_digit(radix).is_some()
528 }
529
530 /// See [`char::is_lowercase`].
531 #[inline]
532 #[must_use]
533 pub fn is_lowercase(self) -> bool {
534 self.as_char().is_some_and(|char| char.is_lowercase())
535 }
536
537 /// See [`char::is_numeric`].
538 #[inline]
539 #[must_use]
540 pub fn is_numeric(self) -> bool {
541 self.as_char().is_some_and(|char| char.is_numeric())
542 }
543
544 /// See [`char::is_uppercase`].
545 #[inline]
546 #[must_use]
547 pub fn is_uppercase(self) -> bool {
548 self.as_char().is_some_and(|char| char.is_uppercase())
549 }
550
551 /// See [`char::is_whitespace`].
552 #[inline]
553 #[must_use]
554 pub fn is_whitespace(self) -> bool {
555 self.as_char().is_some_and(|char| char.is_whitespace())
556 }
557
558 /// See [`char::len_utf16`]. Surrogate code points return 1.
559 ///
560 /// ```
561 /// # use java_string::JavaCodePoint;
562 ///
563 /// let n = JavaCodePoint::from_char('ß').len_utf16();
564 /// assert_eq!(n, 1);
565 ///
566 /// let len = JavaCodePoint::from_char('💣').len_utf16();
567 /// assert_eq!(len, 2);
568 ///
569 /// assert_eq!(1, JavaCodePoint::from_u32(0xd800).unwrap().len_utf16());
570 /// ```
571 #[inline]
572 #[must_use]
573 pub const fn len_utf16(self) -> usize {
574 if let Some(char) = self.as_char() {
575 char.len_utf16()
576 } else {
577 1 // invalid code points are encoded as 1 utf16 code point anyway
578 }
579 }
580
581 /// See [`char::len_utf8`]. Surrogate code points return 3.
582 ///
583 /// ```
584 /// # use java_string::JavaCodePoint;
585 ///
586 /// let len = JavaCodePoint::from_char('A').len_utf8();
587 /// assert_eq!(len, 1);
588 ///
589 /// let len = JavaCodePoint::from_char('ß').len_utf8();
590 /// assert_eq!(len, 2);
591 ///
592 /// let len = JavaCodePoint::from_char('ℝ').len_utf8();
593 /// assert_eq!(len, 3);
594 ///
595 /// let len = JavaCodePoint::from_char('💣').len_utf8();
596 /// assert_eq!(len, 4);
597 ///
598 /// let len = JavaCodePoint::from_u32(0xd800).unwrap().len_utf8();
599 /// assert_eq!(len, 3);
600 /// ```
601 #[inline]
602 #[must_use]
603 pub const fn len_utf8(self) -> usize {
604 if let Some(char) = self.as_char() {
605 char.len_utf8()
606 } else {
607 3 // invalid code points are all length 3 in semi-valid utf8
608 }
609 }
610
611 /// See [`char::make_ascii_lowercase`].
612 #[inline]
613 pub fn make_ascii_lowercase(&mut self) {
614 *self = self.to_ascii_lowercase();
615 }
616
617 /// See [`char::make_ascii_uppercase`].
618 #[inline]
619 pub fn make_ascii_uppercase(&mut self) {
620 *self = self.to_ascii_uppercase();
621 }
622
623 /// See [`char::to_ascii_lowercase`].
624 ///
625 /// ```
626 /// # use java_string::JavaCodePoint;
627 ///
628 /// let ascii = JavaCodePoint::from_char('A');
629 /// let non_ascii = JavaCodePoint::from_char('❤');
630 ///
631 /// assert_eq!('a', ascii.to_ascii_lowercase());
632 /// assert_eq!('❤', non_ascii.to_ascii_lowercase());
633 /// ```
634 #[inline]
635 #[must_use]
636 pub const fn to_ascii_lowercase(self) -> JavaCodePoint {
637 if self.is_ascii_uppercase() {
638 unsafe {
639 // SAFETY: all lowercase chars are valid chars
640 Self::from_u32_unchecked(self.as_u32() + 32)
641 }
642 } else {
643 self
644 }
645 }
646
647 /// See [`char::to_ascii_uppercase`].
648 ///
649 /// ```
650 /// # use java_string::JavaCodePoint;
651 ///
652 /// let ascii = JavaCodePoint::from_char('a');
653 /// let non_ascii = JavaCodePoint::from_char('❤');
654 ///
655 /// assert_eq!('A', ascii.to_ascii_uppercase());
656 /// assert_eq!('❤', non_ascii.to_ascii_uppercase());
657 /// ```
658 #[inline]
659 #[must_use]
660 pub const fn to_ascii_uppercase(self) -> JavaCodePoint {
661 if self.is_ascii_lowercase() {
662 unsafe {
663 // SAFETY: all uppercase chars are valid chars
664 Self::from_u32_unchecked(self.as_u32() - 32)
665 }
666 } else {
667 self
668 }
669 }
670
671 /// See [`char::to_digit`].
672 #[inline]
673 #[must_use]
674 pub const fn to_digit(self, radix: u32) -> Option<u32> {
675 if let Some(char) = self.as_char() {
676 char.to_digit(radix)
677 } else {
678 None
679 }
680 }
681
682 /// See [`char::to_lowercase`].
683 #[inline]
684 #[must_use]
685 pub fn to_lowercase(self) -> ToLowercase {
686 match self.as_char() {
687 Some(char) => ToLowercase::char(char.to_lowercase()),
688 None => ToLowercase::invalid(self),
689 }
690 }
691
692 /// See [`char::to_uppercase`].
693 #[inline]
694 #[must_use]
695 pub fn to_uppercase(self) -> ToUppercase {
696 match self.as_char() {
697 Some(char) => ToUppercase::char(char.to_uppercase()),
698 None => ToUppercase::invalid(self),
699 }
700 }
701}
702
703impl Debug for JavaCodePoint {
704 fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
705 f.write_char('\'')?;
706 for c in self.escape_debug_ext(EscapeDebugExtArgs {
707 escape_single_quote: true,
708 escape_double_quote: false,
709 }) {
710 f.write_char(c)?;
711 }
712 f.write_char('\'')
713 }
714}
715
716impl Default for JavaCodePoint {
717 #[inline]
718 fn default() -> Self {
719 JavaCodePoint::from_char('\0')
720 }
721}
722
723impl Display for JavaCodePoint {
724 #[inline]
725 fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
726 Display::fmt(&self.as_char().unwrap_or(char::REPLACEMENT_CHARACTER), f)
727 }
728}
729
730impl From<JavaCodePoint> for u32 {
731 #[inline]
732 fn from(value: JavaCodePoint) -> Self {
733 value.as_u32()
734 }
735}
736
737impl From<u8> for JavaCodePoint {
738 #[inline]
739 fn from(value: u8) -> Self {
740 JavaCodePoint::from_char(char::from(value))
741 }
742}
743
744impl FromStr for JavaCodePoint {
745 type Err = ParseCharError;
746
747 #[inline]
748 fn from_str(s: &str) -> Result<Self, Self::Err> {
749 char::from_str(s).map(JavaCodePoint::from_char)
750 }
751}
752
753impl Hash for JavaCodePoint {
754 #[inline]
755 fn hash<H: Hasher>(&self, state: &mut H) {
756 self.as_u32().hash(state)
757 }
758}
759
760impl Ord for JavaCodePoint {
761 #[inline]
762 fn cmp(&self, other: &Self) -> Ordering {
763 self.as_u32().cmp(&other.as_u32())
764 }
765}
766
767impl PartialOrd for JavaCodePoint {
768 #[inline]
769 fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
770 Some(self.cmp(other))
771 }
772}
773
774impl PartialOrd<char> for JavaCodePoint {
775 #[inline]
776 fn partial_cmp(&self, other: &char) -> Option<Ordering> {
777 self.partial_cmp(&JavaCodePoint::from_char(*other))
778 }
779}
780
781impl PartialOrd<JavaCodePoint> for char {
782 #[inline]
783 fn partial_cmp(&self, other: &JavaCodePoint) -> Option<Ordering> {
784 JavaCodePoint::from_char(*self).partial_cmp(other)
785 }
786}
787
788impl PartialEq<char> for JavaCodePoint {
789 #[inline]
790 fn eq(&self, other: &char) -> bool {
791 self == &JavaCodePoint::from_char(*other)
792 }
793}
794
795impl PartialEq<JavaCodePoint> for char {
796 #[inline]
797 fn eq(&self, other: &JavaCodePoint) -> bool {
798 &JavaCodePoint::from_char(*self) == other
799 }
800}
801
802pub(crate) struct EscapeDebugExtArgs {
803 pub(crate) escape_single_quote: bool,
804 pub(crate) escape_double_quote: bool,
805}
806
807impl EscapeDebugExtArgs {
808 pub(crate) const ESCAPE_ALL: Self = Self {
809 escape_single_quote: true,
810 escape_double_quote: true,
811 };
812}
813
814#[derive(Clone, Debug)]
815pub struct CharEscapeIter {
816 inner: EscapeIterInner,
817}
818
819#[derive(Clone, Debug)]
820enum EscapeIterInner {
821 Printable(Once<char>),
822 Escaped(EscapeIterEscaped),
823}
824
825impl Display for EscapeIterInner {
826 fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
827 match self {
828 EscapeIterInner::Printable(char) => char.clone().try_for_each(|ch| f.write_char(ch)),
829 EscapeIterInner::Escaped(escaped) => Display::fmt(escaped, f),
830 }
831 }
832}
833
834impl CharEscapeIter {
835 #[inline]
836 fn printable(char: char) -> Self {
837 CharEscapeIter {
838 inner: EscapeIterInner::Printable(once(char)),
839 }
840 }
841
842 /// # Safety
843 /// Assumes that the input byte array is ASCII
844 #[inline]
845 unsafe fn new<const N: usize>(bytes: [u8; N]) -> Self {
846 assert!(N <= 10, "Too many bytes in escape iter");
847 let mut ten_bytes = [0; 10];
848 ten_bytes[..N].copy_from_slice(&bytes);
849 CharEscapeIter {
850 inner: EscapeIterInner::Escaped(EscapeIterEscaped {
851 bytes: ten_bytes,
852 range: 0..N,
853 }),
854 }
855 }
856}
857
858impl Iterator for CharEscapeIter {
859 type Item = char;
860
861 #[inline]
862 fn next(&mut self) -> Option<Self::Item> {
863 match &mut self.inner {
864 EscapeIterInner::Printable(printable) => printable.next(),
865 EscapeIterInner::Escaped(escaped) => escaped.next(),
866 }
867 }
868
869 #[inline]
870 fn size_hint(&self) -> (usize, Option<usize>) {
871 match &self.inner {
872 EscapeIterInner::Printable(printable) => printable.size_hint(),
873 EscapeIterInner::Escaped(escaped) => escaped.size_hint(),
874 }
875 }
876}
877
878impl ExactSizeIterator for CharEscapeIter {
879 #[inline]
880 fn len(&self) -> usize {
881 match &self.inner {
882 EscapeIterInner::Printable(printable) => printable.len(),
883 EscapeIterInner::Escaped(escaped) => escaped.len(),
884 }
885 }
886}
887
888impl FusedIterator for CharEscapeIter {}
889
890impl Display for CharEscapeIter {
891 fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
892 Display::fmt(&self.inner, f)
893 }
894}
895
896#[derive(Clone, Debug)]
897struct EscapeIterEscaped {
898 // SAFETY: all values must be in the ASCII range
899 bytes: [u8; 10],
900 // SAFETY: range must not be out of bounds for length 10
901 range: Range<usize>,
902}
903
904impl Iterator for EscapeIterEscaped {
905 type Item = char;
906
907 #[inline]
908 fn next(&mut self) -> Option<Self::Item> {
909 self.range.next().map(|index| unsafe {
910 // SAFETY: the range is never out of bounds for length 10
911 char::from(*self.bytes.get_unchecked(index))
912 })
913 }
914
915 #[inline]
916 fn size_hint(&self) -> (usize, Option<usize>) {
917 self.range.size_hint()
918 }
919
920 #[inline]
921 fn count(self) -> usize {
922 self.range.len()
923 }
924}
925
926impl ExactSizeIterator for EscapeIterEscaped {
927 #[inline]
928 fn len(&self) -> usize {
929 self.range.len()
930 }
931}
932
933impl FusedIterator for EscapeIterEscaped {}
934
935impl Display for EscapeIterEscaped {
936 #[inline]
937 fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
938 let str = unsafe {
939 // SAFETY: all bytes are in ASCII range, and range is in bounds for length 10
940 std::str::from_utf8_unchecked(self.bytes.get_unchecked(self.range.clone()))
941 };
942 f.write_str(str)
943 }
944}
945
946pub type ToLowercase = CharIterDelegate<std::char::ToLowercase>;
947pub type ToUppercase = CharIterDelegate<std::char::ToUppercase>;
948
949#[derive(Debug, Clone)]
950pub struct CharIterDelegate<I>(CharIterDelegateInner<I>);
951
952impl<I> CharIterDelegate<I> {
953 #[inline]
954 fn char(iter: I) -> CharIterDelegate<I> {
955 CharIterDelegate(CharIterDelegateInner::Char(iter))
956 }
957
958 #[inline]
959 fn invalid(code_point: JavaCodePoint) -> CharIterDelegate<I> {
960 CharIterDelegate(CharIterDelegateInner::Invalid(Some(code_point).into_iter()))
961 }
962}
963
964#[derive(Debug, Clone)]
965enum CharIterDelegateInner<I> {
966 Char(I),
967 Invalid(std::option::IntoIter<JavaCodePoint>),
968}
969
970impl<I> Iterator for CharIterDelegate<I>
971where
972 I: Iterator<Item = char>,
973{
974 type Item = JavaCodePoint;
975
976 #[inline]
977 fn next(&mut self) -> Option<Self::Item> {
978 match &mut self.0 {
979 CharIterDelegateInner::Char(char_iter) => {
980 char_iter.next().map(JavaCodePoint::from_char)
981 }
982 CharIterDelegateInner::Invalid(code_point) => code_point.next(),
983 }
984 }
985
986 #[inline]
987 fn size_hint(&self) -> (usize, Option<usize>) {
988 match &self.0 {
989 CharIterDelegateInner::Char(char_iter) => char_iter.size_hint(),
990 CharIterDelegateInner::Invalid(code_point) => code_point.size_hint(),
991 }
992 }
993}
994
995impl<I> DoubleEndedIterator for CharIterDelegate<I>
996where
997 I: Iterator<Item = char> + DoubleEndedIterator,
998{
999 #[inline]
1000 fn next_back(&mut self) -> Option<Self::Item> {
1001 match &mut self.0 {
1002 CharIterDelegateInner::Char(char_iter) => {
1003 char_iter.next_back().map(JavaCodePoint::from_char)
1004 }
1005 CharIterDelegateInner::Invalid(code_point) => code_point.next_back(),
1006 }
1007 }
1008}
1009
1010impl<I> ExactSizeIterator for CharIterDelegate<I> where I: Iterator<Item = char> + ExactSizeIterator {}
1011
1012impl<I> FusedIterator for CharIterDelegate<I> where I: Iterator<Item = char> + FusedIterator {}