kernel/src/util/utf8.rs at main

A custom OS for the xteink x4 ebook reader

plump / kernel / src / util / utf8.rs

at main 89 lines 2.1 kB view raw

wrap content

hansmrtn refactor: code review cleanup, bug fixes, and RTC session persistence rtc session persistence: - add rtc_session module for deep sleep state preservation - save/restore nav stack, reader position, files scroll, home state - cache settings in RTC memory to skip SD reads on wake - keep RTC FAST memory powered during sleep (~1-2µA extra) - add collect_session/apply_session to AppLayer trait - add session state accessors to home, files, reader apps bugs: - fix typos in timing.rs (dookmark -> bookmark, dffset -> offset) - add spine bounds check in images.rs scan_chapter_for_image - add chapter validation after spine load in reader - log errors on pulp dir creation failure in main.rs - fix misleading comment in bitmap.rs (fallback is '?' not space) consolidation: - remove redundant extern crate alloc from files.rs, reader/mod.rs, images.rs - extract hint() helper in build.rs - extract is_power_event() helper in scheduler.rs - extract enter_error() helper in reader/mod.rs - add CONTENT_REGION constant in home.rs - trim unused layout constants clarity: - add rotation comment for SCREEN_W/H swap in board/mod.rs - add window bounds comment in strip.rs - rename cryptic vars in paging.rs (lc->line_count, ls->line_start, etc) - add state machine comment in reader/mod.rs simplification: - use if let instead of empty match in storage.rs - use map_or in app.rs render_ready - use .expect() for spawn failures in main.rs style: - convert /// doc comments to // in rtc_session.rs, layout.rs, utf8.rs 5w ago

2f7b352f

 1// UTF-8 decoding utilities for no_std environments
 2//
 3// provides both iterator-based and single-char decoding interfaces
 4// for processing UTF-8 byte slices without std::str
 5
 6// decode one UTF-8 character at buf[pos]
 7// returns (char, byte_length); malformed sequences yield '\u{FFFD}'
 8// panics if pos >= buf.len()
 9#[inline]
10pub fn decode_utf8_char(buf: &[u8], pos: usize) -> (char, usize) {
11    let b0 = buf[pos];
12
13    // ASCII fast path
14    if b0 < 0x80 {
15        return (b0 as char, 1);
16    }
17
18    // Determine expected sequence length from lead byte
19    let (mut cp, expected) = if b0 < 0xC0 {
20        // Stray continuation byte
21        return ('\u{FFFD}', 1);
22    } else if b0 < 0xE0 {
23        ((b0 as u32) & 0x1F, 2)
24    } else if b0 < 0xF0 {
25        ((b0 as u32) & 0x0F, 3)
26    } else if b0 < 0xF8 {
27        ((b0 as u32) & 0x07, 4)
28    } else {
29        // Invalid lead byte
30        return ('\u{FFFD}', 1);
31    };
32
33    // Check if we have enough bytes
34    let len = buf.len();
35    if pos + expected > len {
36        return ('\u{FFFD}', len - pos);
37    }
38
39    // Decode continuation bytes
40    for i in 1..expected {
41        let cont = buf[pos + i];
42        if cont & 0xC0 != 0x80 {
43            // Invalid continuation byte
44            return ('\u{FFFD}', i);
45        }
46        cp = (cp << 6) | (cont as u32 & 0x3F);
47    }
48
49    let ch = char::from_u32(cp).unwrap_or('\u{FFFD}');
50    (ch, expected)
51}
52
53// iterator over UTF-8 characters in a byte slice
54// invalid sequences yield U+FFFD
55pub struct Utf8Iter<'a> {
56    data: &'a [u8],
57    pos: usize,
58}
59
60impl<'a> Utf8Iter<'a> {
61    #[inline]
62    pub fn new(data: &'a [u8]) -> Self {
63        Self { data, pos: 0 }
64    }
65
66    #[inline]
67    pub fn position(&self) -> usize {
68        self.pos
69    }
70
71    #[inline]
72    pub fn remaining(&self) -> &'a [u8] {
73        &self.data[self.pos..]
74    }
75}
76
77impl Iterator for Utf8Iter<'_> {
78    type Item = char;
79
80    fn next(&mut self) -> Option<char> {
81        if self.pos >= self.data.len() {
82            return None;
83        }
84
85        let (ch, len) = decode_utf8_char(self.data, self.pos);
86        self.pos += len;
87        Some(ch)
88    }
89}