we (web engine): Experimental web browser project to understand the limits of Claude
1//! Single-byte encoding decoders per WHATWG Encoding Standard §14.1.
2//!
3//! Each single-byte encoding maps bytes 0x00–0x7F to ASCII and
4//! bytes 0x80–0xFF via a 128-entry lookup table to Unicode codepoints.
5
6use crate::error::{EncodingError, Result};
7use crate::utf8::ErrorMode;
8
9/// Decode bytes using a single-byte encoding lookup table.
10///
11/// Bytes 0x00–0x7F are ASCII. Bytes 0x80–0xFF are looked up in `table`.
12/// Table entries of `0xFFFD` indicate unmapped bytes.
13pub(crate) fn decode_single_byte(
14 bytes: &[u8],
15 table: &[u16; 128],
16 encoding_name: &'static str,
17 mode: ErrorMode,
18) -> Result<String> {
19 let mut output = String::with_capacity(bytes.len());
20 for (i, &byte) in bytes.iter().enumerate() {
21 if byte < 0x80 {
22 output.push(byte as char);
23 } else {
24 let cp = table[(byte - 0x80) as usize];
25 if cp == 0xFFFD {
26 if mode == ErrorMode::Fatal {
27 return Err(EncodingError::InvalidSequence {
28 encoding: encoding_name,
29 position: i,
30 });
31 }
32 output.push('\u{FFFD}');
33 } else {
34 output.push(char::from_u32(cp as u32).unwrap_or('\u{FFFD}'));
35 }
36 }
37 }
38 Ok(output)
39}
40
41// ---------------------------------------------------------------------------
42// WHATWG single-byte encoding index tables
43// Each table maps bytes 0x80–0xFF (index 0–127) to Unicode codepoints.
44// 0xFFFD = unmapped byte position.
45// ---------------------------------------------------------------------------
46
47/// windows-1252 (WHATWG index)
48/// Also serves as the encoding for labels: iso-8859-1, us-ascii, latin1, etc.
49pub(crate) const WINDOWS_1252: [u16; 128] = [
50 // 0x80–0x8F
51 0x20AC, 0x0081, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021, 0x02C6, 0x2030, 0x0160, 0x2039,
52 0x0152, 0x008D, 0x017D, 0x008F, // 0x90–0x9F
53 0x0090, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, 0x02DC, 0x2122, 0x0161, 0x203A,
54 0x0153, 0x009D, 0x017E, 0x0178, // 0xA0–0xAF
55 0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7, 0x00A8, 0x00A9, 0x00AA, 0x00AB,
56 0x00AC, 0x00AD, 0x00AE, 0x00AF, // 0xB0–0xBF
57 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7, 0x00B8, 0x00B9, 0x00BA, 0x00BB,
58 0x00BC, 0x00BD, 0x00BE, 0x00BF, // 0xC0–0xCF
59 0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7, 0x00C8, 0x00C9, 0x00CA, 0x00CB,
60 0x00CC, 0x00CD, 0x00CE, 0x00CF, // 0xD0–0xDF
61 0x00D0, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x00D7, 0x00D8, 0x00D9, 0x00DA, 0x00DB,
62 0x00DC, 0x00DD, 0x00DE, 0x00DF, // 0xE0–0xEF
63 0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7, 0x00E8, 0x00E9, 0x00EA, 0x00EB,
64 0x00EC, 0x00ED, 0x00EE, 0x00EF, // 0xF0–0xFF
65 0x00F0, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x00F7, 0x00F8, 0x00F9, 0x00FA, 0x00FB,
66 0x00FC, 0x00FD, 0x00FE, 0x00FF,
67];
68
69/// ISO-8859-2 (Latin-2, Central European)
70pub(crate) const ISO_8859_2: [u16; 128] = [
71 // 0x80–0x8F: C1 controls
72 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 0x0088, 0x0089, 0x008A, 0x008B,
73 0x008C, 0x008D, 0x008E, 0x008F, // 0x90–0x9F: C1 controls
74 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 0x0098, 0x0099, 0x009A, 0x009B,
75 0x009C, 0x009D, 0x009E, 0x009F, // 0xA0–0xAF
76 0x00A0, 0x0104, 0x02D8, 0x0141, 0x00A4, 0x013D, 0x015A, 0x00A7, 0x00A8, 0x0160, 0x015E, 0x0164,
77 0x0179, 0x00AD, 0x017D, 0x017B, // 0xB0–0xBF
78 0x00B0, 0x0105, 0x02DB, 0x0142, 0x00B4, 0x013E, 0x015B, 0x02C7, 0x00B8, 0x0161, 0x015F, 0x0165,
79 0x017A, 0x02DD, 0x017E, 0x017C, // 0xC0–0xCF
80 0x0154, 0x00C1, 0x00C2, 0x0102, 0x00C4, 0x0139, 0x0106, 0x00C7, 0x010C, 0x00C9, 0x0118, 0x00CB,
81 0x011A, 0x00CD, 0x00CE, 0x010E, // 0xD0–0xDF
82 0x0110, 0x0143, 0x0147, 0x00D3, 0x00D4, 0x0150, 0x00D6, 0x00D7, 0x0158, 0x016E, 0x00DA, 0x0170,
83 0x00DC, 0x00DD, 0x0162, 0x00DF, // 0xE0–0xEF
84 0x0155, 0x00E1, 0x00E2, 0x0103, 0x00E4, 0x013A, 0x0107, 0x00E7, 0x010D, 0x00E9, 0x0119, 0x00EB,
85 0x011B, 0x00ED, 0x00EE, 0x010F, // 0xF0–0xFF
86 0x0111, 0x0144, 0x0148, 0x00F3, 0x00F4, 0x0151, 0x00F6, 0x00F7, 0x0159, 0x016F, 0x00FA, 0x0171,
87 0x00FC, 0x00FD, 0x0163, 0x02D9,
88];
89
90/// ISO-8859-3 (Latin-3, South European)
91pub(crate) const ISO_8859_3: [u16; 128] = [
92 // 0x80–0x9F: C1 controls
93 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 0x0088, 0x0089, 0x008A, 0x008B,
94 0x008C, 0x008D, 0x008E, 0x008F, 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
95 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F, // 0xA0–0xAF
96 0x00A0, 0x0126, 0x02D8, 0x00A3, 0x00A4, 0xFFFD, 0x0124, 0x00A7, 0x00A8, 0x0130, 0x015E, 0x011E,
97 0x0134, 0x00AD, 0xFFFD, 0x017B, // 0xB0–0xBF
98 0x00B0, 0x0127, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x0125, 0x00B7, 0x00B8, 0x0131, 0x015F, 0x011F,
99 0x0135, 0x00BD, 0xFFFD, 0x017C, // 0xC0–0xCF
100 0x00C0, 0x00C1, 0x00C2, 0xFFFD, 0x00C4, 0x010A, 0x0108, 0x00C7, 0x00C8, 0x00C9, 0x00CA, 0x00CB,
101 0x00CC, 0x00CD, 0x00CE, 0x00CF, // 0xD0–0xDF
102 0xFFFD, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x0120, 0x00D6, 0x00D7, 0x011C, 0x00D9, 0x00DA, 0x00DB,
103 0x00DC, 0x016C, 0x015C, 0x00DF, // 0xE0–0xEF
104 0x00E0, 0x00E1, 0x00E2, 0xFFFD, 0x00E4, 0x010B, 0x0109, 0x00E7, 0x00E8, 0x00E9, 0x00EA, 0x00EB,
105 0x00EC, 0x00ED, 0x00EE, 0x00EF, // 0xF0–0xFF
106 0xFFFD, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x0121, 0x00F6, 0x00F7, 0x011D, 0x00F9, 0x00FA, 0x00FB,
107 0x00FC, 0x016D, 0x015D, 0x02D9,
108];
109
110/// ISO-8859-4 (Latin-4, North European)
111pub(crate) const ISO_8859_4: [u16; 128] = [
112 // 0x80–0x9F: C1 controls
113 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 0x0088, 0x0089, 0x008A, 0x008B,
114 0x008C, 0x008D, 0x008E, 0x008F, 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
115 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F, // 0xA0–0xAF
116 0x00A0, 0x0104, 0x0138, 0x0156, 0x00A4, 0x0128, 0x013B, 0x00A7, 0x00A8, 0x0160, 0x0112, 0x0122,
117 0x0166, 0x00AD, 0x017D, 0x00AF, // 0xB0–0xBF
118 0x00B0, 0x0105, 0x02DB, 0x0157, 0x00B4, 0x0129, 0x013C, 0x02C7, 0x00B8, 0x0161, 0x0113, 0x0123,
119 0x0167, 0x014A, 0x017E, 0x014B, // 0xC0–0xCF
120 0x0100, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x012E, 0x010C, 0x00C9, 0x0118, 0x00CB,
121 0x0116, 0x00CD, 0x00CE, 0x012A, // 0xD0–0xDF
122 0x0110, 0x0145, 0x014C, 0x0136, 0x00D4, 0x00D5, 0x00D6, 0x00D7, 0x00D8, 0x0172, 0x00DA, 0x00DB,
123 0x00DC, 0x0168, 0x016A, 0x00DF, // 0xE0–0xEF
124 0x0101, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x012F, 0x010D, 0x00E9, 0x0119, 0x00EB,
125 0x0117, 0x00ED, 0x00EE, 0x012B, // 0xF0–0xFF
126 0x0111, 0x0146, 0x014D, 0x0137, 0x00F4, 0x00F5, 0x00F6, 0x00F7, 0x00F8, 0x0173, 0x00FA, 0x00FB,
127 0x00FC, 0x0169, 0x016B, 0x02D9,
128];
129
130/// ISO-8859-5 (Cyrillic)
131pub(crate) const ISO_8859_5: [u16; 128] = [
132 // 0x80–0x9F: C1 controls
133 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 0x0088, 0x0089, 0x008A, 0x008B,
134 0x008C, 0x008D, 0x008E, 0x008F, 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
135 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F, // 0xA0–0xAF
136 0x00A0, 0x0401, 0x0402, 0x0403, 0x0404, 0x0405, 0x0406, 0x0407, 0x0408, 0x0409, 0x040A, 0x040B,
137 0x040C, 0x00AD, 0x040E, 0x040F, // 0xB0–0xBF
138 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417, 0x0418, 0x0419, 0x041A, 0x041B,
139 0x041C, 0x041D, 0x041E, 0x041F, // 0xC0–0xCF
140 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427, 0x0428, 0x0429, 0x042A, 0x042B,
141 0x042C, 0x042D, 0x042E, 0x042F, // 0xD0–0xDF
142 0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437, 0x0438, 0x0439, 0x043A, 0x043B,
143 0x043C, 0x043D, 0x043E, 0x043F, // 0xE0–0xEF
144 0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447, 0x0448, 0x0449, 0x044A, 0x044B,
145 0x044C, 0x044D, 0x044E, 0x044F, // 0xF0–0xFF
146 0x2116, 0x0451, 0x0452, 0x0453, 0x0454, 0x0455, 0x0456, 0x0457, 0x0458, 0x0459, 0x045A, 0x045B,
147 0x045C, 0x00A7, 0x045E, 0x045F,
148];
149
150/// ISO-8859-6 (Arabic)
151pub(crate) const ISO_8859_6: [u16; 128] = [
152 // 0x80–0x9F: C1 controls
153 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 0x0088, 0x0089, 0x008A, 0x008B,
154 0x008C, 0x008D, 0x008E, 0x008F, 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
155 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F, // 0xA0–0xAF
156 0x00A0, 0xFFFD, 0xFFFD, 0xFFFD, 0x00A4, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
157 0x060C, 0x00AD, 0xFFFD, 0xFFFD, // 0xB0–0xBF
158 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0x061B,
159 0xFFFD, 0xFFFD, 0xFFFD, 0x061F, // 0xC0–0xCF
160 0xFFFD, 0x0621, 0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627, 0x0628, 0x0629, 0x062A, 0x062B,
161 0x062C, 0x062D, 0x062E, 0x062F, // 0xD0–0xDF
162 0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x0637, 0x0638, 0x0639, 0x063A, 0xFFFD,
163 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, // 0xE0–0xEF
164 0x0640, 0x0641, 0x0642, 0x0643, 0x0644, 0x0645, 0x0646, 0x0647, 0x0648, 0x0649, 0x064A, 0x064B,
165 0x064C, 0x064D, 0x064E, 0x064F, // 0xF0–0xFF
166 0x0650, 0x0651, 0x0652, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
167 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
168];
169
170/// ISO-8859-7 (Greek)
171pub(crate) const ISO_8859_7: [u16; 128] = [
172 // 0x80–0x9F: C1 controls
173 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 0x0088, 0x0089, 0x008A, 0x008B,
174 0x008C, 0x008D, 0x008E, 0x008F, 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
175 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F, // 0xA0–0xAF
176 0x00A0, 0x2018, 0x2019, 0x00A3, 0x20AC, 0x20AF, 0x00A6, 0x00A7, 0x00A8, 0x00A9, 0x037A, 0x00AB,
177 0x00AC, 0x00AD, 0xFFFD, 0x2015, // 0xB0–0xBF
178 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x0384, 0x0385, 0x0386, 0x00B7, 0x0388, 0x0389, 0x038A, 0x00BB,
179 0x038C, 0x00BD, 0x038E, 0x038F, // 0xC0–0xCF
180 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x039B,
181 0x039C, 0x039D, 0x039E, 0x039F, // 0xD0–0xDF
182 0x03A0, 0x03A1, 0xFFFD, 0x03A3, 0x03A4, 0x03A5, 0x03A6, 0x03A7, 0x03A8, 0x03A9, 0x03AA, 0x03AB,
183 0x03AC, 0x03AD, 0x03AE, 0x03AF, // 0xE0–0xEF
184 0x03B0, 0x03B1, 0x03B2, 0x03B3, 0x03B4, 0x03B5, 0x03B6, 0x03B7, 0x03B8, 0x03B9, 0x03BA, 0x03BB,
185 0x03BC, 0x03BD, 0x03BE, 0x03BF, // 0xF0–0xFF
186 0x03C0, 0x03C1, 0x03C2, 0x03C3, 0x03C4, 0x03C5, 0x03C6, 0x03C7, 0x03C8, 0x03C9, 0x03CA, 0x03CB,
187 0x03CC, 0x03CD, 0x03CE, 0xFFFD,
188];
189
190/// ISO-8859-8 (Hebrew) — also used for ISO-8859-8-I (logical order)
191pub(crate) const ISO_8859_8: [u16; 128] = [
192 // 0x80–0x9F: C1 controls
193 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 0x0088, 0x0089, 0x008A, 0x008B,
194 0x008C, 0x008D, 0x008E, 0x008F, 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
195 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F, // 0xA0–0xAF
196 0x00A0, 0xFFFD, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7, 0x00A8, 0x00A9, 0x00D7, 0x00AB,
197 0x00AC, 0x00AD, 0x00AE, 0x00AF, // 0xB0–0xBF
198 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7, 0x00B8, 0x00B9, 0x00F7, 0x00BB,
199 0x00BC, 0x00BD, 0x00BE, 0xFFFD, // 0xC0–0xCF
200 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
201 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, // 0xD0–0xDF
202 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
203 0xFFFD, 0xFFFD, 0xFFFD, 0x2017, // 0xE0–0xEF
204 0x05D0, 0x05D1, 0x05D2, 0x05D3, 0x05D4, 0x05D5, 0x05D6, 0x05D7, 0x05D8, 0x05D9, 0x05DA, 0x05DB,
205 0x05DC, 0x05DD, 0x05DE, 0x05DF, // 0xF0–0xFF
206 0x05E0, 0x05E1, 0x05E2, 0x05E3, 0x05E4, 0x05E5, 0x05E6, 0x05E7, 0x05E8, 0x05E9, 0x05EA, 0xFFFD,
207 0xFFFD, 0x200E, 0x200F, 0xFFFD,
208];
209
210/// ISO-8859-10 (Latin-6, Nordic)
211pub(crate) const ISO_8859_10: [u16; 128] = [
212 // 0x80–0x9F: C1 controls
213 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 0x0088, 0x0089, 0x008A, 0x008B,
214 0x008C, 0x008D, 0x008E, 0x008F, 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
215 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F, // 0xA0–0xAF
216 0x00A0, 0x0104, 0x0112, 0x0122, 0x012A, 0x0128, 0x0136, 0x00A7, 0x013B, 0x0110, 0x0160, 0x0166,
217 0x017D, 0x00AD, 0x016A, 0x014A, // 0xB0–0xBF
218 0x00B0, 0x0105, 0x0113, 0x0123, 0x012B, 0x0129, 0x0137, 0x00B7, 0x013C, 0x0111, 0x0161, 0x0167,
219 0x017E, 0x2015, 0x016B, 0x014B, // 0xC0–0xCF
220 0x0100, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x012E, 0x010C, 0x00C9, 0x0118, 0x00CB,
221 0x0116, 0x00CD, 0x00CE, 0x00CF, // 0xD0–0xDF
222 0x00D0, 0x0145, 0x014C, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x0168, 0x00D8, 0x0172, 0x00DA, 0x00DB,
223 0x00DC, 0x00DD, 0x00DE, 0x00DF, // 0xE0–0xEF
224 0x0101, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x012F, 0x010D, 0x00E9, 0x0119, 0x00EB,
225 0x0117, 0x00ED, 0x00EE, 0x00EF, // 0xF0–0xFF
226 0x00F0, 0x0146, 0x014D, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x0169, 0x00F8, 0x0173, 0x00FA, 0x00FB,
227 0x00FC, 0x00FD, 0x00FE, 0x0138,
228];
229
230/// ISO-8859-13 (Latin-7, Baltic Rim)
231pub(crate) const ISO_8859_13: [u16; 128] = [
232 // 0x80–0x9F: C1 controls
233 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 0x0088, 0x0089, 0x008A, 0x008B,
234 0x008C, 0x008D, 0x008E, 0x008F, 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
235 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F, // 0xA0–0xAF
236 0x00A0, 0x201D, 0x00A2, 0x00A3, 0x00A4, 0x201E, 0x00A6, 0x00A7, 0x00D8, 0x00A9, 0x0156, 0x00AB,
237 0x00AC, 0x00AD, 0x00AE, 0x00C6, // 0xB0–0xBF
238 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x201C, 0x00B5, 0x00B6, 0x00B7, 0x00F8, 0x00B9, 0x0157, 0x00BB,
239 0x00BC, 0x00BD, 0x00BE, 0x00E6, // 0xC0–0xCF
240 0x0104, 0x012E, 0x0100, 0x0106, 0x00C4, 0x00C5, 0x0118, 0x0112, 0x010C, 0x00C9, 0x0179, 0x0116,
241 0x0122, 0x0136, 0x012A, 0x013B, // 0xD0–0xDF
242 0x0160, 0x0143, 0x0145, 0x00D3, 0x014C, 0x00D5, 0x00D6, 0x00D7, 0x0172, 0x0141, 0x015A, 0x016A,
243 0x00DC, 0x017B, 0x017D, 0x00DF, // 0xE0–0xEF
244 0x0105, 0x012F, 0x0101, 0x0107, 0x00E4, 0x00E5, 0x0119, 0x0113, 0x010D, 0x00E9, 0x017A, 0x0117,
245 0x0123, 0x0137, 0x012B, 0x013C, // 0xF0–0xFF
246 0x0161, 0x0144, 0x0146, 0x00F3, 0x014D, 0x00F5, 0x00F6, 0x00F7, 0x0173, 0x0142, 0x015B, 0x016B,
247 0x00FC, 0x017C, 0x017E, 0x2019,
248];
249
250/// ISO-8859-14 (Latin-8, Celtic)
251pub(crate) const ISO_8859_14: [u16; 128] = [
252 // 0x80–0x9F: C1 controls
253 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 0x0088, 0x0089, 0x008A, 0x008B,
254 0x008C, 0x008D, 0x008E, 0x008F, 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
255 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F, // 0xA0–0xAF
256 0x00A0, 0x1E02, 0x1E03, 0x00A3, 0x010A, 0x010B, 0x1E0A, 0x00A7, 0x1E80, 0x00A9, 0x1E82, 0x1E0B,
257 0x1EF2, 0x00AD, 0x00AE, 0x0178, // 0xB0–0xBF
258 0x1E1E, 0x1E1F, 0x0120, 0x0121, 0x1E40, 0x1E41, 0x00B6, 0x1E56, 0x1E81, 0x1E57, 0x1E83, 0x1E60,
259 0x1EF3, 0x1E84, 0x1E85, 0x1E61, // 0xC0–0xCF
260 0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7, 0x00C8, 0x00C9, 0x00CA, 0x00CB,
261 0x00CC, 0x00CD, 0x00CE, 0x00CF, // 0xD0–0xDF
262 0x0174, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x1E6A, 0x00D8, 0x00D9, 0x00DA, 0x00DB,
263 0x00DC, 0x00DD, 0x0176, 0x00DF, // 0xE0–0xEF
264 0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7, 0x00E8, 0x00E9, 0x00EA, 0x00EB,
265 0x00EC, 0x00ED, 0x00EE, 0x00EF, // 0xF0–0xFF
266 0x0175, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x1E6B, 0x00F8, 0x00F9, 0x00FA, 0x00FB,
267 0x00FC, 0x00FD, 0x0177, 0x00FF,
268];
269
270/// ISO-8859-15 (Latin-9, updated Western European)
271pub(crate) const ISO_8859_15: [u16; 128] = [
272 // 0x80–0x9F: C1 controls
273 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 0x0088, 0x0089, 0x008A, 0x008B,
274 0x008C, 0x008D, 0x008E, 0x008F, 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
275 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F,
276 // 0xA0–0xAF: Differs from 8859-1 at A4, A6, A8, B4, B8, BC, BD, BE
277 0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x20AC, 0x00A5, 0x0160, 0x00A7, 0x0161, 0x00A9, 0x00AA, 0x00AB,
278 0x00AC, 0x00AD, 0x00AE, 0x00AF, // 0xB0–0xBF
279 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x017D, 0x00B5, 0x00B6, 0x00B7, 0x017E, 0x00B9, 0x00BA, 0x00BB,
280 0x0152, 0x0153, 0x0178, 0x00BF, // 0xC0–0xFF: same as ISO-8859-1
281 0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7, 0x00C8, 0x00C9, 0x00CA, 0x00CB,
282 0x00CC, 0x00CD, 0x00CE, 0x00CF, 0x00D0, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x00D7,
283 0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x00DE, 0x00DF, 0x00E0, 0x00E1, 0x00E2, 0x00E3,
284 0x00E4, 0x00E5, 0x00E6, 0x00E7, 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF,
285 0x00F0, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x00F7, 0x00F8, 0x00F9, 0x00FA, 0x00FB,
286 0x00FC, 0x00FD, 0x00FE, 0x00FF,
287];
288
289/// ISO-8859-16 (Latin-10, South-Eastern European)
290pub(crate) const ISO_8859_16: [u16; 128] = [
291 // 0x80–0x9F: C1 controls
292 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 0x0088, 0x0089, 0x008A, 0x008B,
293 0x008C, 0x008D, 0x008E, 0x008F, 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
294 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F, // 0xA0–0xAF
295 0x00A0, 0x0104, 0x0105, 0x0141, 0x20AC, 0x201E, 0x0160, 0x00A7, 0x0161, 0x00A9, 0x0218, 0x00AB,
296 0x0179, 0x00AD, 0x017A, 0x017B, // 0xB0–0xBF
297 0x00B0, 0x00B1, 0x010C, 0x0142, 0x017D, 0x201D, 0x00B6, 0x00B7, 0x017E, 0x010D, 0x0219, 0x00BB,
298 0x0152, 0x0153, 0x0178, 0x017C, // 0xC0–0xCF
299 0x00C0, 0x00C1, 0x00C2, 0x0102, 0x00C4, 0x0106, 0x00C6, 0x00C7, 0x00C8, 0x00C9, 0x00CA, 0x00CB,
300 0x00CC, 0x00CD, 0x00CE, 0x00CF, // 0xD0–0xDF
301 0x0110, 0x0143, 0x00D2, 0x00D3, 0x00D4, 0x0150, 0x00D6, 0x015A, 0x0170, 0x00D9, 0x00DA, 0x00DB,
302 0x00DC, 0x0118, 0x021A, 0x00DF, // 0xE0–0xEF
303 0x00E0, 0x00E1, 0x00E2, 0x0103, 0x00E4, 0x0107, 0x00E6, 0x00E7, 0x00E8, 0x00E9, 0x00EA, 0x00EB,
304 0x00EC, 0x00ED, 0x00EE, 0x00EF, // 0xF0–0xFF
305 0x0111, 0x0144, 0x00F2, 0x00F3, 0x00F4, 0x0151, 0x00F6, 0x015B, 0x0171, 0x00F9, 0x00FA, 0x00FB,
306 0x00FC, 0x0119, 0x021B, 0x00FF,
307];
308
309/// KOI8-R (Russian)
310pub(crate) const KOI8_R: [u16; 128] = [
311 // 0x80–0x8F: box drawing
312 0x2500, 0x2502, 0x250C, 0x2510, 0x2514, 0x2518, 0x251C, 0x2524, 0x252C, 0x2534, 0x253C, 0x2580,
313 0x2584, 0x2588, 0x258C, 0x2590, // 0x90–0x9F: more box drawing + block elements
314 0x2591, 0x2592, 0x2593, 0x2320, 0x25A0, 0x2219, 0x221A, 0x2248, 0x2264, 0x2265, 0x00A0, 0x2321,
315 0x00B0, 0x00B2, 0x00B7, 0x00F7, // 0xA0–0xAF
316 0x2550, 0x2551, 0x2552, 0x0451, 0x2553, 0x2554, 0x2555, 0x2556, 0x2557, 0x2558, 0x2559, 0x255A,
317 0x255B, 0x255C, 0x255D, 0x255E, // 0xB0–0xBF
318 0x255F, 0x2560, 0x2561, 0x0401, 0x2562, 0x2563, 0x2564, 0x2565, 0x2566, 0x2567, 0x2568, 0x2569,
319 0x256A, 0x256B, 0x256C, 0x00A9, // 0xC0–0xCF: Cyrillic lowercase
320 0x044E, 0x0430, 0x0431, 0x0446, 0x0434, 0x0435, 0x0444, 0x0433, 0x0445, 0x0438, 0x0439, 0x043A,
321 0x043B, 0x043C, 0x043D, 0x043E, // 0xD0–0xDF
322 0x043F, 0x044F, 0x0440, 0x0441, 0x0442, 0x0443, 0x0436, 0x0432, 0x044C, 0x044B, 0x0437, 0x0448,
323 0x044D, 0x0449, 0x0447, 0x044A, // 0xE0–0xEF: Cyrillic uppercase
324 0x042E, 0x0410, 0x0411, 0x0426, 0x0414, 0x0415, 0x0424, 0x0413, 0x0425, 0x0418, 0x0419, 0x041A,
325 0x041B, 0x041C, 0x041D, 0x041E, // 0xF0–0xFF
326 0x041F, 0x042F, 0x0420, 0x0421, 0x0422, 0x0423, 0x0416, 0x0412, 0x042C, 0x042B, 0x0417, 0x0428,
327 0x042D, 0x0429, 0x0427, 0x042A,
328];
329
330/// KOI8-U (Ukrainian)
331pub(crate) const KOI8_U: [u16; 128] = [
332 // 0x80–0x8F: box drawing (same as KOI8-R)
333 0x2500, 0x2502, 0x250C, 0x2510, 0x2514, 0x2518, 0x251C, 0x2524, 0x252C, 0x2534, 0x253C, 0x2580,
334 0x2584, 0x2588, 0x258C, 0x2590, // 0x90–0x9F
335 0x2591, 0x2592, 0x2593, 0x2320, 0x25A0, 0x2219, 0x221A, 0x2248, 0x2264, 0x2265, 0x00A0, 0x2321,
336 0x00B0, 0x00B2, 0x00B7, 0x00F7,
337 // 0xA0–0xAF: differs from KOI8-R at A3, A4, A6, A7
338 0x2550, 0x2551, 0x2552, 0x0451, 0x0454, 0x2554, 0x0456, 0x0457, 0x2557, 0x2558, 0x2559, 0x255A,
339 0x255B, 0x0491, 0x255D, 0x255E, // 0xB0–0xBF
340 0x255F, 0x2560, 0x2561, 0x0401, 0x0404, 0x2563, 0x0406, 0x0407, 0x2566, 0x2567, 0x2568, 0x2569,
341 0x256A, 0x0490, 0x256C, 0x00A9, // 0xC0–0xFF: Cyrillic (same as KOI8-R)
342 0x044E, 0x0430, 0x0431, 0x0446, 0x0434, 0x0435, 0x0444, 0x0433, 0x0445, 0x0438, 0x0439, 0x043A,
343 0x043B, 0x043C, 0x043D, 0x043E, 0x043F, 0x044F, 0x0440, 0x0441, 0x0442, 0x0443, 0x0436, 0x0432,
344 0x044C, 0x044B, 0x0437, 0x0448, 0x044D, 0x0449, 0x0447, 0x044A, 0x042E, 0x0410, 0x0411, 0x0426,
345 0x0414, 0x0415, 0x0424, 0x0413, 0x0425, 0x0418, 0x0419, 0x041A, 0x041B, 0x041C, 0x041D, 0x041E,
346 0x041F, 0x042F, 0x0420, 0x0421, 0x0422, 0x0423, 0x0416, 0x0412, 0x042C, 0x042B, 0x0417, 0x0428,
347 0x042D, 0x0429, 0x0427, 0x042A,
348];
349
350/// macintosh (Mac OS Roman)
351pub(crate) const MACINTOSH: [u16; 128] = [
352 // 0x80–0x8F
353 0x00C4, 0x00C5, 0x00C7, 0x00C9, 0x00D1, 0x00D6, 0x00DC, 0x00E1, 0x00E0, 0x00E2, 0x00E4, 0x00E3,
354 0x00E5, 0x00E7, 0x00E9, 0x00E8, // 0x90–0x9F
355 0x00EA, 0x00EB, 0x00ED, 0x00EC, 0x00EE, 0x00EF, 0x00F1, 0x00F3, 0x00F2, 0x00F4, 0x00F6, 0x00F5,
356 0x00FA, 0x00F9, 0x00FB, 0x00FC, // 0xA0–0xAF
357 0x2020, 0x00B0, 0x00A2, 0x00A3, 0x00A7, 0x2022, 0x00B6, 0x00DF, 0x00AE, 0x00A9, 0x2122, 0x00B4,
358 0x00A8, 0x2260, 0x00C6, 0x00D8, // 0xB0–0xBF
359 0x221E, 0x00B1, 0x2264, 0x2265, 0x00A5, 0x00B5, 0x2202, 0x2211, 0x220F, 0x03C0, 0x222B, 0x00AA,
360 0x00BA, 0x2126, 0x00E6, 0x00F8, // 0xC0–0xCF
361 0x00BF, 0x00A1, 0x00AC, 0x221A, 0x0192, 0x2248, 0x2206, 0x00AB, 0x00BB, 0x2026, 0x00A0, 0x00C0,
362 0x00C3, 0x00D5, 0x0152, 0x0153, // 0xD0–0xDF
363 0x2013, 0x2014, 0x201C, 0x201D, 0x2018, 0x2019, 0x00F7, 0x25CA, 0x00FF, 0x0178, 0x2044, 0x20AC,
364 0x2039, 0x203A, 0xFB01, 0xFB02, // 0xE0–0xEF
365 0x2021, 0x00B7, 0x201A, 0x201E, 0x2030, 0x00C2, 0x00CA, 0x00C1, 0x00CB, 0x00C8, 0x00CD, 0x00CE,
366 0x00CF, 0x00CC, 0x00D3, 0x00D4, // 0xF0–0xFF
367 0xF8FF, 0x00D2, 0x00DA, 0x00DB, 0x00D9, 0x0131, 0x02C6, 0x02DC, 0x00AF, 0x02D8, 0x02D9, 0x02DA,
368 0x00B8, 0x02DD, 0x02DB, 0x02C7,
369];
370
371/// IBM866 (DOS Cyrillic)
372pub(crate) const IBM866: [u16; 128] = [
373 // 0x80–0x8F: Cyrillic uppercase А–П
374 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417, 0x0418, 0x0419, 0x041A, 0x041B,
375 0x041C, 0x041D, 0x041E, 0x041F, // 0x90–0x9F: Cyrillic uppercase Р–Я
376 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427, 0x0428, 0x0429, 0x042A, 0x042B,
377 0x042C, 0x042D, 0x042E, 0x042F, // 0xA0–0xAF: Cyrillic lowercase а–п
378 0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437, 0x0438, 0x0439, 0x043A, 0x043B,
379 0x043C, 0x043D, 0x043E, 0x043F, // 0xB0–0xBF: box drawing light
380 0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x2561, 0x2562, 0x2556, 0x2555, 0x2563, 0x2551, 0x2557,
381 0x255D, 0x255C, 0x255B, 0x2510, // 0xC0–0xCF: box drawing
382 0x2514, 0x2534, 0x252C, 0x251C, 0x2500, 0x253C, 0x255E, 0x255F, 0x255A, 0x2554, 0x2569, 0x2566,
383 0x2560, 0x2550, 0x256C, 0x2567, // 0xD0–0xDF: more box drawing
384 0x2568, 0x2564, 0x2565, 0x2559, 0x2558, 0x2552, 0x2553, 0x256B, 0x256A, 0x2518, 0x250C, 0x2588,
385 0x2584, 0x258C, 0x2590, 0x2580, // 0xE0–0xEF: Cyrillic lowercase р–я
386 0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447, 0x0448, 0x0449, 0x044A, 0x044B,
387 0x044C, 0x044D, 0x044E, 0x044F, // 0xF0–0xFF
388 0x0401, 0x0451, 0x0404, 0x0454, 0x0407, 0x0457, 0x040E, 0x045E, 0x00B0, 0x2219, 0x00B7, 0x221A,
389 0x2116, 0x00A4, 0x25A0, 0x00A0,
390];
391
392/// windows-874 (Thai)
393pub(crate) const WINDOWS_874: [u16; 128] = [
394 // 0x80–0x8F
395 0x20AC, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0x2026, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
396 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, // 0x90–0x9F
397 0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
398 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, // 0xA0–0xAF
399 0x00A0, 0x0E01, 0x0E02, 0x0E03, 0x0E04, 0x0E05, 0x0E06, 0x0E07, 0x0E08, 0x0E09, 0x0E0A, 0x0E0B,
400 0x0E0C, 0x0E0D, 0x0E0E, 0x0E0F, // 0xB0–0xBF
401 0x0E10, 0x0E11, 0x0E12, 0x0E13, 0x0E14, 0x0E15, 0x0E16, 0x0E17, 0x0E18, 0x0E19, 0x0E1A, 0x0E1B,
402 0x0E1C, 0x0E1D, 0x0E1E, 0x0E1F, // 0xC0–0xCF
403 0x0E20, 0x0E21, 0x0E22, 0x0E23, 0x0E24, 0x0E25, 0x0E26, 0x0E27, 0x0E28, 0x0E29, 0x0E2A, 0x0E2B,
404 0x0E2C, 0x0E2D, 0x0E2E, 0x0E2F, // 0xD0–0xDF
405 0x0E30, 0x0E31, 0x0E32, 0x0E33, 0x0E34, 0x0E35, 0x0E36, 0x0E37, 0x0E38, 0x0E39, 0x0E3A, 0xFFFD,
406 0xFFFD, 0xFFFD, 0xFFFD, 0x0E3F, // 0xE0–0xEF
407 0x0E40, 0x0E41, 0x0E42, 0x0E43, 0x0E44, 0x0E45, 0x0E46, 0x0E47, 0x0E48, 0x0E49, 0x0E4A, 0x0E4B,
408 0x0E4C, 0x0E4D, 0x0E4E, 0x0E4F, // 0xF0–0xFF
409 0x0E50, 0x0E51, 0x0E52, 0x0E53, 0x0E54, 0x0E55, 0x0E56, 0x0E57, 0x0E58, 0x0E59, 0x0E5A, 0x0E5B,
410 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
411];
412
413/// windows-1250 (Central European)
414pub(crate) const WINDOWS_1250: [u16; 128] = [
415 // 0x80–0x8F
416 0x20AC, 0xFFFD, 0x201A, 0xFFFD, 0x201E, 0x2026, 0x2020, 0x2021, 0xFFFD, 0x2030, 0x0160, 0x2039,
417 0x015A, 0x0164, 0x017D, 0x0179, // 0x90–0x9F
418 0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, 0xFFFD, 0x2122, 0x0161, 0x203A,
419 0x015B, 0x0165, 0x017E, 0x017A, // 0xA0–0xAF
420 0x00A0, 0x02C7, 0x02D8, 0x0141, 0x00A4, 0x0104, 0x00A6, 0x00A7, 0x00A8, 0x00A9, 0x015E, 0x00AB,
421 0x00AC, 0x00AD, 0x00AE, 0x017B, // 0xB0–0xBF
422 0x00B0, 0x00B1, 0x02DB, 0x0142, 0x00B4, 0x00B5, 0x00B6, 0x00B7, 0x00B8, 0x0105, 0x015F, 0x00BB,
423 0x013D, 0x02DD, 0x013E, 0x017C, // 0xC0–0xCF
424 0x0154, 0x00C1, 0x00C2, 0x0102, 0x00C4, 0x0139, 0x0106, 0x00C7, 0x010C, 0x00C9, 0x0118, 0x00CB,
425 0x011A, 0x00CD, 0x00CE, 0x010E, // 0xD0–0xDF
426 0x0110, 0x0143, 0x0147, 0x00D3, 0x00D4, 0x0150, 0x00D6, 0x00D7, 0x0158, 0x016E, 0x00DA, 0x0170,
427 0x00DC, 0x00DD, 0x0162, 0x00DF, // 0xE0–0xEF
428 0x0155, 0x00E1, 0x00E2, 0x0103, 0x00E4, 0x013A, 0x0107, 0x00E7, 0x010D, 0x00E9, 0x0119, 0x00EB,
429 0x011B, 0x00ED, 0x00EE, 0x010F, // 0xF0–0xFF
430 0x0111, 0x0144, 0x0148, 0x00F3, 0x00F4, 0x0151, 0x00F6, 0x00F7, 0x0159, 0x016F, 0x00FA, 0x0171,
431 0x00FC, 0x00FD, 0x0163, 0x02D9,
432];
433
434/// windows-1251 (Cyrillic)
435pub(crate) const WINDOWS_1251: [u16; 128] = [
436 // 0x80–0x8F
437 0x0402, 0x0403, 0x201A, 0x0453, 0x201E, 0x2026, 0x2020, 0x2021, 0x20AC, 0x2030, 0x0409, 0x2039,
438 0x040A, 0x040C, 0x040B, 0x040F, // 0x90–0x9F
439 0x0452, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, 0xFFFD, 0x2122, 0x0459, 0x203A,
440 0x045A, 0x045C, 0x045B, 0x045F, // 0xA0–0xAF
441 0x00A0, 0x040E, 0x045E, 0x0408, 0x00A4, 0x0490, 0x00A6, 0x00A7, 0x0401, 0x00A9, 0x0404, 0x00AB,
442 0x00AC, 0x00AD, 0x00AE, 0x0407, // 0xB0–0xBF
443 0x00B0, 0x00B1, 0x0406, 0x0456, 0x0491, 0x00B5, 0x00B6, 0x00B7, 0x0451, 0x2116, 0x0454, 0x00BB,
444 0x0458, 0x0405, 0x0455, 0x0457, // 0xC0–0xCF: А–П
445 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417, 0x0418, 0x0419, 0x041A, 0x041B,
446 0x041C, 0x041D, 0x041E, 0x041F, // 0xD0–0xDF: Р–Я
447 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427, 0x0428, 0x0429, 0x042A, 0x042B,
448 0x042C, 0x042D, 0x042E, 0x042F, // 0xE0–0xEF: а–п
449 0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437, 0x0438, 0x0439, 0x043A, 0x043B,
450 0x043C, 0x043D, 0x043E, 0x043F, // 0xF0–0xFF: р–я
451 0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447, 0x0448, 0x0449, 0x044A, 0x044B,
452 0x044C, 0x044D, 0x044E, 0x044F,
453];
454
455/// windows-1253 (Greek)
456pub(crate) const WINDOWS_1253: [u16; 128] = [
457 // 0x80–0x8F
458 0x20AC, 0xFFFD, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021, 0xFFFD, 0x2030, 0xFFFD, 0x2039,
459 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, // 0x90–0x9F
460 0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, 0xFFFD, 0x2122, 0xFFFD, 0x203A,
461 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, // 0xA0–0xAF
462 0x00A0, 0x0385, 0x0386, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7, 0x00A8, 0x00A9, 0xFFFD, 0x00AB,
463 0x00AC, 0x00AD, 0x00AE, 0x2015, // 0xB0–0xBF
464 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x0384, 0x00B5, 0x00B6, 0x00B7, 0x0388, 0x0389, 0x038A, 0x00BB,
465 0x038C, 0x00BD, 0x038E, 0x038F, // 0xC0–0xCF
466 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x039B,
467 0x039C, 0x039D, 0x039E, 0x039F, // 0xD0–0xDF
468 0x03A0, 0x03A1, 0xFFFD, 0x03A3, 0x03A4, 0x03A5, 0x03A6, 0x03A7, 0x03A8, 0x03A9, 0x03AA, 0x03AB,
469 0x03AC, 0x03AD, 0x03AE, 0x03AF, // 0xE0–0xEF
470 0x03B0, 0x03B1, 0x03B2, 0x03B3, 0x03B4, 0x03B5, 0x03B6, 0x03B7, 0x03B8, 0x03B9, 0x03BA, 0x03BB,
471 0x03BC, 0x03BD, 0x03BE, 0x03BF, // 0xF0–0xFF
472 0x03C0, 0x03C1, 0x03C2, 0x03C3, 0x03C4, 0x03C5, 0x03C6, 0x03C7, 0x03C8, 0x03C9, 0x03CA, 0x03CB,
473 0x03CC, 0x03CD, 0x03CE, 0xFFFD,
474];
475
476/// windows-1254 (Turkish)
477pub(crate) const WINDOWS_1254: [u16; 128] = [
478 // 0x80–0x8F
479 0x20AC, 0xFFFD, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021, 0x02C6, 0x2030, 0x0160, 0x2039,
480 0x0152, 0xFFFD, 0xFFFD, 0xFFFD, // 0x90–0x9F
481 0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, 0x02DC, 0x2122, 0x0161, 0x203A,
482 0x0153, 0xFFFD, 0xFFFD, 0x0178, // 0xA0–0xAF
483 0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7, 0x00A8, 0x00A9, 0x00AA, 0x00AB,
484 0x00AC, 0x00AD, 0x00AE, 0x00AF, // 0xB0–0xBF
485 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7, 0x00B8, 0x00B9, 0x00BA, 0x00BB,
486 0x00BC, 0x00BD, 0x00BE, 0x00BF, // 0xC0–0xCF
487 0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7, 0x00C8, 0x00C9, 0x00CA, 0x00CB,
488 0x00CC, 0x00CD, 0x00CE, 0x00CF, // 0xD0–0xDF: differs from 1252 at D0, DD, DE
489 0x011E, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x00D7, 0x00D8, 0x00D9, 0x00DA, 0x00DB,
490 0x00DC, 0x0130, 0x015E, 0x00DF, // 0xE0–0xEF
491 0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7, 0x00E8, 0x00E9, 0x00EA, 0x00EB,
492 0x00EC, 0x00ED, 0x00EE, 0x00EF, // 0xF0–0xFF: differs from 1252 at F0, FD, FE
493 0x011F, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x00F7, 0x00F8, 0x00F9, 0x00FA, 0x00FB,
494 0x00FC, 0x0131, 0x015F, 0x00FF,
495];
496
497/// windows-1255 (Hebrew)
498pub(crate) const WINDOWS_1255: [u16; 128] = [
499 // 0x80–0x8F
500 0x20AC, 0xFFFD, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021, 0x02C6, 0x2030, 0xFFFD, 0x2039,
501 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, // 0x90–0x9F
502 0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, 0x02DC, 0x2122, 0xFFFD, 0x203A,
503 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, // 0xA0–0xAF
504 0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x20AA, 0x00A5, 0x00A6, 0x00A7, 0x00A8, 0x00A9, 0x00D7, 0x00AB,
505 0x00AC, 0x00AD, 0x00AE, 0x00AF, // 0xB0–0xBF
506 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7, 0x00B8, 0x00B9, 0x00F7, 0x00BB,
507 0x00BC, 0x00BD, 0x00BE, 0x00BF, // 0xC0–0xCF
508 0x05B0, 0x05B1, 0x05B2, 0x05B3, 0x05B4, 0x05B5, 0x05B6, 0x05B7, 0x05B8, 0x05B9, 0x05BA, 0x05BB,
509 0x05BC, 0x05BD, 0x05BE, 0x05BF, // 0xD0–0xDF
510 0x05C0, 0x05C1, 0x05C2, 0x05C3, 0x05F0, 0x05F1, 0x05F2, 0x05F3, 0x05F4, 0xFFFD, 0xFFFD, 0xFFFD,
511 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, // 0xE0–0xEF
512 0x05D0, 0x05D1, 0x05D2, 0x05D3, 0x05D4, 0x05D5, 0x05D6, 0x05D7, 0x05D8, 0x05D9, 0x05DA, 0x05DB,
513 0x05DC, 0x05DD, 0x05DE, 0x05DF, // 0xF0–0xFF
514 0x05E0, 0x05E1, 0x05E2, 0x05E3, 0x05E4, 0x05E5, 0x05E6, 0x05E7, 0x05E8, 0x05E9, 0x05EA, 0xFFFD,
515 0xFFFD, 0x200E, 0x200F, 0xFFFD,
516];
517
518/// windows-1256 (Arabic)
519pub(crate) const WINDOWS_1256: [u16; 128] = [
520 // 0x80–0x8F
521 0x20AC, 0x067E, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021, 0x02C6, 0x2030, 0x0679, 0x2039,
522 0x0152, 0x0686, 0x0698, 0x0688, // 0x90–0x9F
523 0x06AF, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, 0x06A9, 0x2122, 0x0691, 0x203A,
524 0x0153, 0x200C, 0x200D, 0x06BA, // 0xA0–0xAF
525 0x00A0, 0x060C, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7, 0x00A8, 0x00A9, 0x06BE, 0x00AB,
526 0x00AC, 0x00AD, 0x00AE, 0x00AF, // 0xB0–0xBF
527 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7, 0x00B8, 0x00B9, 0x061B, 0x00BB,
528 0x00BC, 0x00BD, 0x00BE, 0x061F, // 0xC0–0xCF
529 0x06C1, 0x0621, 0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627, 0x0628, 0x0629, 0x062A, 0x062B,
530 0x062C, 0x062D, 0x062E, 0x062F, // 0xD0–0xDF
531 0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x00D7, 0x0637, 0x0638, 0x0639, 0x063A,
532 0x0640, 0x0641, 0x0642, 0x0643, // 0xE0–0xEF
533 0x00E0, 0x0644, 0x00E2, 0x0645, 0x0646, 0x0647, 0x0648, 0x00E7, 0x00E8, 0x00E9, 0x00EA, 0x00EB,
534 0x0649, 0x064A, 0x00EE, 0x00EF, // 0xF0–0xFF
535 0x064B, 0x064C, 0x064D, 0x064E, 0x00F4, 0x064F, 0x0650, 0x00F7, 0x0651, 0x00F9, 0x0652, 0x00FB,
536 0x00FC, 0x200E, 0x200F, 0x06D2,
537];
538
539/// windows-1257 (Baltic)
540pub(crate) const WINDOWS_1257: [u16; 128] = [
541 // 0x80–0x8F
542 0x20AC, 0xFFFD, 0x201A, 0xFFFD, 0x201E, 0x2026, 0x2020, 0x2021, 0xFFFD, 0x2030, 0xFFFD, 0x2039,
543 0xFFFD, 0x00A8, 0x02C7, 0x00B8, // 0x90–0x9F
544 0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, 0xFFFD, 0x2122, 0xFFFD, 0x203A,
545 0xFFFD, 0x00AF, 0x02DB, 0xFFFD, // 0xA0–0xAF
546 0x00A0, 0xFFFD, 0x00A2, 0x00A3, 0x00A4, 0xFFFD, 0x00A6, 0x00A7, 0x00D8, 0x00A9, 0x0156, 0x00AB,
547 0x00AC, 0x00AD, 0x00AE, 0x00C6, // 0xB0–0xBF
548 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7, 0x00F8, 0x00B9, 0x0157, 0x00BB,
549 0x00BC, 0x00BD, 0x00BE, 0x00E6, // 0xC0–0xCF
550 0x0104, 0x012E, 0x0100, 0x0106, 0x00C4, 0x00C5, 0x0118, 0x0112, 0x010C, 0x00C9, 0x0179, 0x0116,
551 0x0122, 0x0136, 0x012A, 0x013B, // 0xD0–0xDF
552 0x0160, 0x0143, 0x0145, 0x00D3, 0x014C, 0x00D5, 0x00D6, 0x00D7, 0x0172, 0x0141, 0x015A, 0x016A,
553 0x00DC, 0x017B, 0x017D, 0x00DF, // 0xE0–0xEF
554 0x0105, 0x012F, 0x0101, 0x0107, 0x00E4, 0x00E5, 0x0119, 0x0113, 0x010D, 0x00E9, 0x017A, 0x0117,
555 0x0123, 0x0137, 0x012B, 0x013C, // 0xF0–0xFF
556 0x0161, 0x0144, 0x0146, 0x00F3, 0x014D, 0x00F5, 0x00F6, 0x00F7, 0x0173, 0x0142, 0x015B, 0x016B,
557 0x00FC, 0x017C, 0x017E, 0x02D9,
558];
559
560/// windows-1258 (Vietnamese)
561pub(crate) const WINDOWS_1258: [u16; 128] = [
562 // 0x80–0x8F
563 0x20AC, 0xFFFD, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021, 0x02C6, 0x2030, 0xFFFD, 0x2039,
564 0x0152, 0xFFFD, 0xFFFD, 0xFFFD, // 0x90–0x9F
565 0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, 0x02DC, 0x2122, 0xFFFD, 0x203A,
566 0x0153, 0xFFFD, 0xFFFD, 0x0178, // 0xA0–0xAF
567 0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7, 0x00A8, 0x00A9, 0x00AA, 0x00AB,
568 0x00AC, 0x00AD, 0x00AE, 0x00AF, // 0xB0–0xBF
569 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7, 0x00B8, 0x00B9, 0x00BA, 0x00BB,
570 0x00BC, 0x00BD, 0x00BE, 0x00BF, // 0xC0–0xCF
571 0x00C0, 0x00C1, 0x00C2, 0x0102, 0x00C4, 0x00C5, 0x00C6, 0x00C7, 0x00C8, 0x00C9, 0x00CA, 0x00CB,
572 0x0300, 0x00CD, 0x00CE, 0x00CF, // 0xD0–0xDF
573 0x0110, 0x00D1, 0x0309, 0x00D3, 0x00D4, 0x01A0, 0x00D6, 0x00D7, 0x00D8, 0x00D9, 0x00DA, 0x00DB,
574 0x00DC, 0x01AF, 0x0303, 0x00DF, // 0xE0–0xEF
575 0x00E0, 0x00E1, 0x00E2, 0x0103, 0x00E4, 0x00E5, 0x00E6, 0x00E7, 0x00E8, 0x00E9, 0x00EA, 0x00EB,
576 0x0301, 0x00ED, 0x00EE, 0x00EF, // 0xF0–0xFF
577 0x0111, 0x00F1, 0x0323, 0x00F3, 0x00F4, 0x01A1, 0x00F6, 0x00F7, 0x00F8, 0x00F9, 0x00FA, 0x00FB,
578 0x00FC, 0x01B0, 0x20AB, 0x00FF,
579];
580
581/// x-mac-cyrillic
582pub(crate) const X_MAC_CYRILLIC: [u16; 128] = [
583 // 0x80–0x8F
584 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417, 0x0418, 0x0419, 0x041A, 0x041B,
585 0x041C, 0x041D, 0x041E, 0x041F, // 0x90–0x9F
586 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427, 0x0428, 0x0429, 0x042A, 0x042B,
587 0x042C, 0x042D, 0x042E, 0x042F, // 0xA0–0xAF
588 0x2020, 0x00B0, 0x0490, 0x00A3, 0x00A7, 0x2022, 0x00B6, 0x0406, 0x00AE, 0x00A9, 0x2122, 0x0402,
589 0x0452, 0x2260, 0x0403, 0x0453, // 0xB0–0xBF
590 0x221E, 0x00B1, 0x2264, 0x2265, 0x0456, 0x00B5, 0x0491, 0x0408, 0x0404, 0x0454, 0x0407, 0x0457,
591 0x0409, 0x0459, 0x040A, 0x045A, // 0xC0–0xCF
592 0x0458, 0x0405, 0x00AC, 0x221A, 0x0192, 0x2248, 0x2206, 0x00AB, 0x00BB, 0x2026, 0x00A0, 0x040B,
593 0x045B, 0x040C, 0x045C, 0x0455, // 0xD0–0xDF
594 0x2013, 0x2014, 0x201C, 0x201D, 0x2018, 0x2019, 0x00F7, 0x201E, 0x040E, 0x045E, 0x040F, 0x045F,
595 0x2116, 0x0401, 0x0451, 0x044F, // 0xE0–0xEF
596 0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437, 0x0438, 0x0439, 0x043A, 0x043B,
597 0x043C, 0x043D, 0x043E, 0x043F, // 0xF0–0xFF
598 0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447, 0x0448, 0x0449, 0x044A, 0x044B,
599 0x044C, 0x044D, 0x044E, 0x20AC,
600];
601
602/// Return the lookup table for a single-byte encoding variant.
603pub(crate) fn table_for(encoding: &crate::Encoding) -> Option<&'static [u16; 128]> {
604 use crate::Encoding::*;
605 match encoding {
606 Windows1252 => Some(&WINDOWS_1252),
607 Iso8859_2 => Some(&ISO_8859_2),
608 Iso8859_3 => Some(&ISO_8859_3),
609 Iso8859_4 => Some(&ISO_8859_4),
610 Iso8859_5 => Some(&ISO_8859_5),
611 Iso8859_6 => Some(&ISO_8859_6),
612 Iso8859_7 => Some(&ISO_8859_7),
613 Iso8859_8 | Iso8859_8I => Some(&ISO_8859_8),
614 Iso8859_10 => Some(&ISO_8859_10),
615 Iso8859_13 => Some(&ISO_8859_13),
616 Iso8859_14 => Some(&ISO_8859_14),
617 Iso8859_15 => Some(&ISO_8859_15),
618 Iso8859_16 => Some(&ISO_8859_16),
619 Koi8R => Some(&KOI8_R),
620 Koi8U => Some(&KOI8_U),
621 Macintosh => Some(&MACINTOSH),
622 Ibm866 => Some(&IBM866),
623 Windows874 => Some(&WINDOWS_874),
624 Windows1250 => Some(&WINDOWS_1250),
625 Windows1251 => Some(&WINDOWS_1251),
626 Windows1253 => Some(&WINDOWS_1253),
627 Windows1254 => Some(&WINDOWS_1254),
628 Windows1255 => Some(&WINDOWS_1255),
629 Windows1256 => Some(&WINDOWS_1256),
630 Windows1257 => Some(&WINDOWS_1257),
631 Windows1258 => Some(&WINDOWS_1258),
632 XMacCyrillic => Some(&X_MAC_CYRILLIC),
633 _ => None,
634 }
635}
636
637// ---------------------------------------------------------------------------
638// Tests
639// ---------------------------------------------------------------------------
640
641#[cfg(test)]
642mod tests {
643 use super::*;
644 use crate::error::EncodingError;
645
646 fn decode_replace(bytes: &[u8], table: &[u16; 128]) -> String {
647 decode_single_byte(bytes, table, "test", ErrorMode::Replacement).unwrap()
648 }
649
650 // -- Basic ASCII passthrough --
651
652 #[test]
653 fn ascii_passthrough() {
654 assert_eq!(
655 decode_replace(b"Hello, world!", &WINDOWS_1252),
656 "Hello, world!"
657 );
658 }
659
660 #[test]
661 fn empty_input() {
662 assert_eq!(decode_replace(b"", &WINDOWS_1252), "");
663 }
664
665 #[test]
666 fn null_byte() {
667 assert_eq!(decode_replace(&[0x00], &WINDOWS_1252), "\0");
668 }
669
670 // -- Windows-1252 --
671
672 #[test]
673 fn windows_1252_euro() {
674 // 0x80 → U+20AC (€)
675 assert_eq!(decode_replace(&[0x80], &WINDOWS_1252), "\u{20AC}");
676 }
677
678 #[test]
679 fn windows_1252_smart_quotes() {
680 // 0x93 → U+201C (") and 0x94 → U+201D (")
681 assert_eq!(
682 decode_replace(&[0x93, 0x94], &WINDOWS_1252),
683 "\u{201C}\u{201D}"
684 );
685 }
686
687 #[test]
688 fn windows_1252_trademark() {
689 // 0x99 → U+2122 (™)
690 assert_eq!(decode_replace(&[0x99], &WINDOWS_1252), "\u{2122}");
691 }
692
693 #[test]
694 fn windows_1252_high_latin() {
695 // 0xE9 → U+00E9 (é)
696 assert_eq!(decode_replace(&[0xE9], &WINDOWS_1252), "\u{00E9}");
697 }
698
699 #[test]
700 fn windows_1252_mixed() {
701 // "Caf" + 0xE9 → "Café"
702 assert_eq!(
703 decode_replace(&[0x43, 0x61, 0x66, 0xE9], &WINDOWS_1252),
704 "Caf\u{00E9}"
705 );
706 }
707
708 // -- ISO-8859-2 (Central European) --
709
710 #[test]
711 fn iso_8859_2_polish() {
712 // 0xA1 → U+0104 (Ą), 0xB1 → U+0105 (ą)
713 assert_eq!(decode_replace(&[0xA1], &ISO_8859_2), "\u{0104}");
714 assert_eq!(decode_replace(&[0xB1], &ISO_8859_2), "\u{0105}");
715 }
716
717 #[test]
718 fn iso_8859_2_czech() {
719 // 0xC8 → U+010C (Č), 0xE8 → U+010D (č)
720 assert_eq!(decode_replace(&[0xC8], &ISO_8859_2), "\u{010C}");
721 assert_eq!(decode_replace(&[0xE8], &ISO_8859_2), "\u{010D}");
722 }
723
724 // -- ISO-8859-5 (Cyrillic) --
725
726 #[test]
727 fn iso_8859_5_cyrillic() {
728 // 0xB0 → U+0410 (А), 0xD0 → U+0430 (а)
729 assert_eq!(decode_replace(&[0xB0], &ISO_8859_5), "\u{0410}");
730 assert_eq!(decode_replace(&[0xD0], &ISO_8859_5), "\u{0430}");
731 }
732
733 // -- ISO-8859-7 (Greek) --
734
735 #[test]
736 fn iso_8859_7_greek() {
737 // 0xC1 → U+0391 (Α), 0xE1 → U+03B1 (α)
738 assert_eq!(decode_replace(&[0xC1], &ISO_8859_7), "\u{0391}");
739 assert_eq!(decode_replace(&[0xE1], &ISO_8859_7), "\u{03B1}");
740 }
741
742 // -- ISO-8859-15 (Latin-9) --
743
744 #[test]
745 fn iso_8859_15_euro() {
746 // 0xA4 → U+20AC (€) — differs from ISO-8859-1
747 assert_eq!(decode_replace(&[0xA4], &ISO_8859_15), "\u{20AC}");
748 }
749
750 #[test]
751 fn iso_8859_15_oe_ligature() {
752 // 0xBC → U+0152 (Œ), 0xBD → U+0153 (œ)
753 assert_eq!(decode_replace(&[0xBC], &ISO_8859_15), "\u{0152}");
754 assert_eq!(decode_replace(&[0xBD], &ISO_8859_15), "\u{0153}");
755 }
756
757 // -- KOI8-R (Russian) --
758
759 #[test]
760 fn koi8_r_cyrillic() {
761 // 0xC1 → U+0430 (а), 0xE1 → U+0410 (А)
762 assert_eq!(decode_replace(&[0xC1], &KOI8_R), "\u{0430}");
763 assert_eq!(decode_replace(&[0xE1], &KOI8_R), "\u{0410}");
764 }
765
766 #[test]
767 fn koi8_r_copyright() {
768 // 0xBF → U+00A9 (©)
769 assert_eq!(decode_replace(&[0xBF], &KOI8_R), "\u{00A9}");
770 }
771
772 // -- Windows-1251 (Cyrillic) --
773
774 #[test]
775 fn windows_1251_cyrillic() {
776 // 0xC0 → U+0410 (А), 0xE0 → U+0430 (а)
777 assert_eq!(decode_replace(&[0xC0], &WINDOWS_1251), "\u{0410}");
778 assert_eq!(decode_replace(&[0xE0], &WINDOWS_1251), "\u{0430}");
779 }
780
781 #[test]
782 fn windows_1251_euro() {
783 // 0x88 → U+20AC (€)
784 assert_eq!(decode_replace(&[0x88], &WINDOWS_1251), "\u{20AC}");
785 }
786
787 // -- macintosh --
788
789 #[test]
790 fn macintosh_special() {
791 // 0x80 → U+00C4 (Ä), 0xCA → U+00A0 (NBSP)
792 assert_eq!(decode_replace(&[0x80], &MACINTOSH), "\u{00C4}");
793 assert_eq!(decode_replace(&[0xCA], &MACINTOSH), "\u{00A0}");
794 }
795
796 // -- IBM866 --
797
798 #[test]
799 fn ibm866_cyrillic() {
800 // 0x80 → U+0410 (А), 0xA0 → U+0430 (а)
801 assert_eq!(decode_replace(&[0x80], &IBM866), "\u{0410}");
802 assert_eq!(decode_replace(&[0xA0], &IBM866), "\u{0430}");
803 }
804
805 // -- Unmapped bytes --
806
807 #[test]
808 fn unmapped_replacement() {
809 // ISO-8859-3 has unmapped bytes, e.g. 0xA5
810 assert_eq!(decode_replace(&[0xA5], &ISO_8859_3), "\u{FFFD}");
811 }
812
813 #[test]
814 fn unmapped_fatal() {
815 let err = decode_single_byte(&[0x41, 0xA5], &ISO_8859_3, "ISO-8859-3", ErrorMode::Fatal)
816 .unwrap_err();
817 assert!(matches!(
818 err,
819 EncodingError::InvalidSequence {
820 encoding: "ISO-8859-3",
821 position: 1
822 }
823 ));
824 }
825
826 // -- Table sizes --
827
828 #[test]
829 fn all_tables_128_entries() {
830 assert_eq!(WINDOWS_1252.len(), 128);
831 assert_eq!(ISO_8859_2.len(), 128);
832 assert_eq!(ISO_8859_3.len(), 128);
833 assert_eq!(ISO_8859_4.len(), 128);
834 assert_eq!(ISO_8859_5.len(), 128);
835 assert_eq!(ISO_8859_6.len(), 128);
836 assert_eq!(ISO_8859_7.len(), 128);
837 assert_eq!(ISO_8859_8.len(), 128);
838 assert_eq!(ISO_8859_10.len(), 128);
839 assert_eq!(ISO_8859_13.len(), 128);
840 assert_eq!(ISO_8859_14.len(), 128);
841 assert_eq!(ISO_8859_15.len(), 128);
842 assert_eq!(ISO_8859_16.len(), 128);
843 assert_eq!(KOI8_R.len(), 128);
844 assert_eq!(KOI8_U.len(), 128);
845 assert_eq!(MACINTOSH.len(), 128);
846 assert_eq!(IBM866.len(), 128);
847 assert_eq!(WINDOWS_874.len(), 128);
848 assert_eq!(WINDOWS_1250.len(), 128);
849 assert_eq!(WINDOWS_1251.len(), 128);
850 assert_eq!(WINDOWS_1253.len(), 128);
851 assert_eq!(WINDOWS_1254.len(), 128);
852 assert_eq!(WINDOWS_1255.len(), 128);
853 assert_eq!(WINDOWS_1256.len(), 128);
854 assert_eq!(WINDOWS_1257.len(), 128);
855 assert_eq!(WINDOWS_1258.len(), 128);
856 assert_eq!(X_MAC_CYRILLIC.len(), 128);
857 }
858}