A custom OS for the xteink x4 ebook reader
1// UTF-8 decoding utilities for no_std environments
2//
3// provides both iterator-based and single-char decoding interfaces
4// for processing UTF-8 byte slices without std::str
5
6// decode one UTF-8 character at buf[pos]
7// returns (char, byte_length); malformed sequences yield '\u{FFFD}'
8// panics if pos >= buf.len()
9#[inline]
10pub fn decode_utf8_char(buf: &[u8], pos: usize) -> (char, usize) {
11 let b0 = buf[pos];
12
13 // ASCII fast path
14 if b0 < 0x80 {
15 return (b0 as char, 1);
16 }
17
18 // Determine expected sequence length from lead byte
19 let (mut cp, expected) = if b0 < 0xC0 {
20 // Stray continuation byte
21 return ('\u{FFFD}', 1);
22 } else if b0 < 0xE0 {
23 ((b0 as u32) & 0x1F, 2)
24 } else if b0 < 0xF0 {
25 ((b0 as u32) & 0x0F, 3)
26 } else if b0 < 0xF8 {
27 ((b0 as u32) & 0x07, 4)
28 } else {
29 // Invalid lead byte
30 return ('\u{FFFD}', 1);
31 };
32
33 // Check if we have enough bytes
34 let len = buf.len();
35 if pos + expected > len {
36 return ('\u{FFFD}', len - pos);
37 }
38
39 // Decode continuation bytes
40 for i in 1..expected {
41 let cont = buf[pos + i];
42 if cont & 0xC0 != 0x80 {
43 // Invalid continuation byte
44 return ('\u{FFFD}', i);
45 }
46 cp = (cp << 6) | (cont as u32 & 0x3F);
47 }
48
49 let ch = char::from_u32(cp).unwrap_or('\u{FFFD}');
50 (ch, expected)
51}
52
53// iterator over UTF-8 characters in a byte slice
54// invalid sequences yield U+FFFD
55pub struct Utf8Iter<'a> {
56 data: &'a [u8],
57 pos: usize,
58}
59
60impl<'a> Utf8Iter<'a> {
61 #[inline]
62 pub fn new(data: &'a [u8]) -> Self {
63 Self { data, pos: 0 }
64 }
65
66 #[inline]
67 pub fn position(&self) -> usize {
68 self.pos
69 }
70
71 #[inline]
72 pub fn remaining(&self) -> &'a [u8] {
73 &self.data[self.pos..]
74 }
75}
76
77impl Iterator for Utf8Iter<'_> {
78 type Item = char;
79
80 fn next(&mut self) -> Option<char> {
81 if self.pos >= self.data.len() {
82 return None;
83 }
84
85 let (ch, len) = decode_utf8_char(self.data, self.pos);
86 self.pos += len;
87 Some(ch)
88 }
89}