we (web engine): Experimental web browser project to understand the limits of Claude
1//! `name` — Naming table.
2//!
3//! Contains human-readable strings like family name, style name, copyright, etc.
4//! Reference: <https://learn.microsoft.com/en-us/typography/opentype/spec/name>
5
6use crate::font::parse::Reader;
7use crate::font::FontError;
8
9/// Parsed `name` table.
10#[derive(Debug)]
11pub struct NameTable {
12 /// All name records extracted from the table.
13 pub records: Vec<NameRecord>,
14}
15
16/// A single name record.
17#[derive(Debug)]
18pub struct NameRecord {
19 /// Platform ID (0 = Unicode, 1 = Macintosh, 3 = Windows).
20 pub platform_id: u16,
21 /// Encoding ID (platform-specific).
22 pub encoding_id: u16,
23 /// Language ID.
24 pub language_id: u16,
25 /// Name ID (1 = family, 2 = subfamily, 4 = full name, 6 = PostScript name, etc.).
26 pub name_id: u16,
27 /// The decoded string value.
28 pub value: String,
29}
30
31impl NameTable {
32 /// Parse the `name` table from raw bytes.
33 pub fn parse(data: &[u8]) -> Result<NameTable, FontError> {
34 let r = Reader::new(data);
35 if r.len() < 6 {
36 return Err(FontError::MalformedTable("name"));
37 }
38
39 // format(2) + count(2) + stringOffset(2)
40 let count = r.u16(2)? as usize;
41 let string_offset = r.u16(4)? as usize;
42
43 let mut records = Vec::with_capacity(count);
44
45 for i in 0..count {
46 let base = 6 + i * 12;
47 if base + 12 > data.len() {
48 break;
49 }
50
51 let platform_id = r.u16(base)?;
52 let encoding_id = r.u16(base + 2)?;
53 let language_id = r.u16(base + 4)?;
54 let name_id = r.u16(base + 6)?;
55 let length = r.u16(base + 8)? as usize;
56 let offset = r.u16(base + 10)? as usize;
57
58 let str_start = string_offset + offset;
59 if str_start + length > data.len() {
60 continue;
61 }
62
63 let raw = r.slice(str_start, length)?;
64 let value = decode_name_string(platform_id, encoding_id, raw);
65
66 records.push(NameRecord {
67 platform_id,
68 encoding_id,
69 language_id,
70 name_id,
71 value,
72 });
73 }
74
75 Ok(NameTable { records })
76 }
77
78 /// Get the font family name (name ID 1).
79 ///
80 /// Prefers Windows/Unicode platform, falls back to any platform.
81 pub fn family_name(&self) -> Option<&str> {
82 self.get_name(1)
83 }
84
85 /// Get the font subfamily/style name (name ID 2, e.g. "Regular", "Bold").
86 pub fn subfamily_name(&self) -> Option<&str> {
87 self.get_name(2)
88 }
89
90 /// Get the full font name (name ID 4).
91 pub fn full_name(&self) -> Option<&str> {
92 self.get_name(4)
93 }
94
95 /// Get a name string by name ID.
96 ///
97 /// Prefers Windows platform (3) with English, then any platform.
98 fn get_name(&self, name_id: u16) -> Option<&str> {
99 // Prefer Windows platform (3), English (language_id 0x0409).
100 let win_en = self
101 .records
102 .iter()
103 .find(|r| r.name_id == name_id && r.platform_id == 3 && r.language_id == 0x0409);
104 if let Some(rec) = win_en {
105 if !rec.value.is_empty() {
106 return Some(&rec.value);
107 }
108 }
109
110 // Fall back to any Windows platform record.
111 let win = self
112 .records
113 .iter()
114 .find(|r| r.name_id == name_id && r.platform_id == 3);
115 if let Some(rec) = win {
116 if !rec.value.is_empty() {
117 return Some(&rec.value);
118 }
119 }
120
121 // Fall back to any record.
122 self.records
123 .iter()
124 .find(|r| r.name_id == name_id && !r.value.is_empty())
125 .map(|r| r.value.as_str())
126 }
127}
128
129/// Decode a name string based on platform/encoding.
130fn decode_name_string(platform_id: u16, encoding_id: u16, data: &[u8]) -> String {
131 match platform_id {
132 0 => {
133 // Unicode platform — always UTF-16BE.
134 decode_utf16be(data)
135 }
136 1 => {
137 // Macintosh platform.
138 if encoding_id == 0 {
139 // Mac Roman.
140 decode_mac_roman(data)
141 } else {
142 // Other Mac encodings — treat as ASCII fallback.
143 String::from_utf8_lossy(data).into_owned()
144 }
145 }
146 3 => {
147 // Windows platform — encoding 1 = UTF-16BE, encoding 10 = UTF-16BE.
148 match encoding_id {
149 1 | 10 => decode_utf16be(data),
150 0 => {
151 // Symbol encoding — treat as UTF-16BE.
152 decode_utf16be(data)
153 }
154 _ => String::from_utf8_lossy(data).into_owned(),
155 }
156 }
157 _ => String::from_utf8_lossy(data).into_owned(),
158 }
159}
160
161fn decode_utf16be(data: &[u8]) -> String {
162 let mut chars = Vec::with_capacity(data.len() / 2);
163 let mut i = 0;
164 while i + 1 < data.len() {
165 let unit = u16::from_be_bytes([data[i], data[i + 1]]);
166 i += 2;
167
168 // Handle surrogate pairs.
169 if (0xD800..=0xDBFF).contains(&unit) {
170 if i + 1 < data.len() {
171 let lo = u16::from_be_bytes([data[i], data[i + 1]]);
172 if (0xDC00..=0xDFFF).contains(&lo) {
173 i += 2;
174 let cp = 0x10000 + ((unit as u32 - 0xD800) << 10) + (lo as u32 - 0xDC00);
175 if let Some(ch) = char::from_u32(cp) {
176 chars.push(ch);
177 }
178 continue;
179 }
180 }
181 // Lone surrogate — skip.
182 continue;
183 }
184
185 if let Some(ch) = char::from_u32(unit as u32) {
186 chars.push(ch);
187 }
188 }
189 chars.into_iter().collect()
190}
191
192fn decode_mac_roman(data: &[u8]) -> String {
193 // Mac Roman: 0x00-0x7F are ASCII, 0x80-0xFF map to specific Unicode code points.
194 static MAC_ROMAN_HIGH: [u16; 128] = [
195 0x00C4, 0x00C5, 0x00C7, 0x00C9, 0x00D1, 0x00D6, 0x00DC, 0x00E1, 0x00E0, 0x00E2, 0x00E4,
196 0x00E3, 0x00E5, 0x00E7, 0x00E9, 0x00E8, 0x00EA, 0x00EB, 0x00ED, 0x00EC, 0x00EE, 0x00EF,
197 0x00F1, 0x00F3, 0x00F2, 0x00F4, 0x00F6, 0x00F5, 0x00FA, 0x00F9, 0x00FB, 0x00FC, 0x2020,
198 0x00B0, 0x00A2, 0x00A3, 0x00A7, 0x2022, 0x00B6, 0x00DF, 0x00AE, 0x00A9, 0x2122, 0x00B4,
199 0x00A8, 0x2260, 0x00C6, 0x00D8, 0x221E, 0x00B1, 0x2264, 0x2265, 0x00A5, 0x00B5, 0x2202,
200 0x2211, 0x220F, 0x03C0, 0x222B, 0x00AA, 0x00BA, 0x03A9, 0x00E6, 0x00F8, 0x00BF, 0x00A1,
201 0x00AC, 0x221A, 0x0192, 0x2248, 0x2206, 0x00AB, 0x00BB, 0x2026, 0x00A0, 0x00C0, 0x00C3,
202 0x00D5, 0x0152, 0x0153, 0x2013, 0x2014, 0x201C, 0x201D, 0x2018, 0x2019, 0x00F7, 0x25CA,
203 0x00FF, 0x0178, 0x2044, 0x20AC, 0x2039, 0x203A, 0xFB01, 0xFB02, 0x2021, 0x00B7, 0x201A,
204 0x201E, 0x2030, 0x00C2, 0x00CA, 0x00C1, 0x00CB, 0x00C8, 0x00CD, 0x00CE, 0x00CF, 0x00CC,
205 0x00D3, 0x00D4, 0xF8FF, 0x00D2, 0x00DA, 0x00DB, 0x00D9, 0x0131, 0x02C6, 0x02DC, 0x00AF,
206 0x02D8, 0x02D9, 0x02DA, 0x00B8, 0x02DD, 0x02DB, 0x02C7,
207 ];
208
209 let mut s = String::with_capacity(data.len());
210 for &b in data {
211 if b < 0x80 {
212 s.push(b as char);
213 } else {
214 let cp = MAC_ROMAN_HIGH[(b - 0x80) as usize];
215 if let Some(ch) = char::from_u32(cp as u32) {
216 s.push(ch);
217 }
218 }
219 }
220 s
221}