we (web engine): Experimental web browser project to understand the limits of Claude
at data-urls 858 lines 45 kB view raw
1//! Single-byte encoding decoders per WHATWG Encoding Standard §14.1. 2//! 3//! Each single-byte encoding maps bytes 0x00–0x7F to ASCII and 4//! bytes 0x80–0xFF via a 128-entry lookup table to Unicode codepoints. 5 6use crate::error::{EncodingError, Result}; 7use crate::utf8::ErrorMode; 8 9/// Decode bytes using a single-byte encoding lookup table. 10/// 11/// Bytes 0x00–0x7F are ASCII. Bytes 0x80–0xFF are looked up in `table`. 12/// Table entries of `0xFFFD` indicate unmapped bytes. 13pub(crate) fn decode_single_byte( 14 bytes: &[u8], 15 table: &[u16; 128], 16 encoding_name: &'static str, 17 mode: ErrorMode, 18) -> Result<String> { 19 let mut output = String::with_capacity(bytes.len()); 20 for (i, &byte) in bytes.iter().enumerate() { 21 if byte < 0x80 { 22 output.push(byte as char); 23 } else { 24 let cp = table[(byte - 0x80) as usize]; 25 if cp == 0xFFFD { 26 if mode == ErrorMode::Fatal { 27 return Err(EncodingError::InvalidSequence { 28 encoding: encoding_name, 29 position: i, 30 }); 31 } 32 output.push('\u{FFFD}'); 33 } else { 34 output.push(char::from_u32(cp as u32).unwrap_or('\u{FFFD}')); 35 } 36 } 37 } 38 Ok(output) 39} 40 41// --------------------------------------------------------------------------- 42// WHATWG single-byte encoding index tables 43// Each table maps bytes 0x80–0xFF (index 0–127) to Unicode codepoints. 44// 0xFFFD = unmapped byte position. 45// --------------------------------------------------------------------------- 46 47/// windows-1252 (WHATWG index) 48/// Also serves as the encoding for labels: iso-8859-1, us-ascii, latin1, etc. 49pub(crate) const WINDOWS_1252: [u16; 128] = [ 50 // 0x80–0x8F 51 0x20AC, 0x0081, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021, 0x02C6, 0x2030, 0x0160, 0x2039, 52 0x0152, 0x008D, 0x017D, 0x008F, // 0x90–0x9F 53 0x0090, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, 0x02DC, 0x2122, 0x0161, 0x203A, 54 0x0153, 0x009D, 0x017E, 0x0178, // 0xA0–0xAF 55 0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7, 0x00A8, 0x00A9, 0x00AA, 0x00AB, 56 0x00AC, 0x00AD, 0x00AE, 0x00AF, // 0xB0–0xBF 57 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7, 0x00B8, 0x00B9, 0x00BA, 0x00BB, 58 0x00BC, 0x00BD, 0x00BE, 0x00BF, // 0xC0–0xCF 59 0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7, 0x00C8, 0x00C9, 0x00CA, 0x00CB, 60 0x00CC, 0x00CD, 0x00CE, 0x00CF, // 0xD0–0xDF 61 0x00D0, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x00D7, 0x00D8, 0x00D9, 0x00DA, 0x00DB, 62 0x00DC, 0x00DD, 0x00DE, 0x00DF, // 0xE0–0xEF 63 0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7, 0x00E8, 0x00E9, 0x00EA, 0x00EB, 64 0x00EC, 0x00ED, 0x00EE, 0x00EF, // 0xF0–0xFF 65 0x00F0, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x00F7, 0x00F8, 0x00F9, 0x00FA, 0x00FB, 66 0x00FC, 0x00FD, 0x00FE, 0x00FF, 67]; 68 69/// ISO-8859-2 (Latin-2, Central European) 70pub(crate) const ISO_8859_2: [u16; 128] = [ 71 // 0x80–0x8F: C1 controls 72 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 0x0088, 0x0089, 0x008A, 0x008B, 73 0x008C, 0x008D, 0x008E, 0x008F, // 0x90–0x9F: C1 controls 74 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 0x0098, 0x0099, 0x009A, 0x009B, 75 0x009C, 0x009D, 0x009E, 0x009F, // 0xA0–0xAF 76 0x00A0, 0x0104, 0x02D8, 0x0141, 0x00A4, 0x013D, 0x015A, 0x00A7, 0x00A8, 0x0160, 0x015E, 0x0164, 77 0x0179, 0x00AD, 0x017D, 0x017B, // 0xB0–0xBF 78 0x00B0, 0x0105, 0x02DB, 0x0142, 0x00B4, 0x013E, 0x015B, 0x02C7, 0x00B8, 0x0161, 0x015F, 0x0165, 79 0x017A, 0x02DD, 0x017E, 0x017C, // 0xC0–0xCF 80 0x0154, 0x00C1, 0x00C2, 0x0102, 0x00C4, 0x0139, 0x0106, 0x00C7, 0x010C, 0x00C9, 0x0118, 0x00CB, 81 0x011A, 0x00CD, 0x00CE, 0x010E, // 0xD0–0xDF 82 0x0110, 0x0143, 0x0147, 0x00D3, 0x00D4, 0x0150, 0x00D6, 0x00D7, 0x0158, 0x016E, 0x00DA, 0x0170, 83 0x00DC, 0x00DD, 0x0162, 0x00DF, // 0xE0–0xEF 84 0x0155, 0x00E1, 0x00E2, 0x0103, 0x00E4, 0x013A, 0x0107, 0x00E7, 0x010D, 0x00E9, 0x0119, 0x00EB, 85 0x011B, 0x00ED, 0x00EE, 0x010F, // 0xF0–0xFF 86 0x0111, 0x0144, 0x0148, 0x00F3, 0x00F4, 0x0151, 0x00F6, 0x00F7, 0x0159, 0x016F, 0x00FA, 0x0171, 87 0x00FC, 0x00FD, 0x0163, 0x02D9, 88]; 89 90/// ISO-8859-3 (Latin-3, South European) 91pub(crate) const ISO_8859_3: [u16; 128] = [ 92 // 0x80–0x9F: C1 controls 93 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 0x0088, 0x0089, 0x008A, 0x008B, 94 0x008C, 0x008D, 0x008E, 0x008F, 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 95 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F, // 0xA0–0xAF 96 0x00A0, 0x0126, 0x02D8, 0x00A3, 0x00A4, 0xFFFD, 0x0124, 0x00A7, 0x00A8, 0x0130, 0x015E, 0x011E, 97 0x0134, 0x00AD, 0xFFFD, 0x017B, // 0xB0–0xBF 98 0x00B0, 0x0127, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x0125, 0x00B7, 0x00B8, 0x0131, 0x015F, 0x011F, 99 0x0135, 0x00BD, 0xFFFD, 0x017C, // 0xC0–0xCF 100 0x00C0, 0x00C1, 0x00C2, 0xFFFD, 0x00C4, 0x010A, 0x0108, 0x00C7, 0x00C8, 0x00C9, 0x00CA, 0x00CB, 101 0x00CC, 0x00CD, 0x00CE, 0x00CF, // 0xD0–0xDF 102 0xFFFD, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x0120, 0x00D6, 0x00D7, 0x011C, 0x00D9, 0x00DA, 0x00DB, 103 0x00DC, 0x016C, 0x015C, 0x00DF, // 0xE0–0xEF 104 0x00E0, 0x00E1, 0x00E2, 0xFFFD, 0x00E4, 0x010B, 0x0109, 0x00E7, 0x00E8, 0x00E9, 0x00EA, 0x00EB, 105 0x00EC, 0x00ED, 0x00EE, 0x00EF, // 0xF0–0xFF 106 0xFFFD, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x0121, 0x00F6, 0x00F7, 0x011D, 0x00F9, 0x00FA, 0x00FB, 107 0x00FC, 0x016D, 0x015D, 0x02D9, 108]; 109 110/// ISO-8859-4 (Latin-4, North European) 111pub(crate) const ISO_8859_4: [u16; 128] = [ 112 // 0x80–0x9F: C1 controls 113 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 0x0088, 0x0089, 0x008A, 0x008B, 114 0x008C, 0x008D, 0x008E, 0x008F, 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 115 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F, // 0xA0–0xAF 116 0x00A0, 0x0104, 0x0138, 0x0156, 0x00A4, 0x0128, 0x013B, 0x00A7, 0x00A8, 0x0160, 0x0112, 0x0122, 117 0x0166, 0x00AD, 0x017D, 0x00AF, // 0xB0–0xBF 118 0x00B0, 0x0105, 0x02DB, 0x0157, 0x00B4, 0x0129, 0x013C, 0x02C7, 0x00B8, 0x0161, 0x0113, 0x0123, 119 0x0167, 0x014A, 0x017E, 0x014B, // 0xC0–0xCF 120 0x0100, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x012E, 0x010C, 0x00C9, 0x0118, 0x00CB, 121 0x0116, 0x00CD, 0x00CE, 0x012A, // 0xD0–0xDF 122 0x0110, 0x0145, 0x014C, 0x0136, 0x00D4, 0x00D5, 0x00D6, 0x00D7, 0x00D8, 0x0172, 0x00DA, 0x00DB, 123 0x00DC, 0x0168, 0x016A, 0x00DF, // 0xE0–0xEF 124 0x0101, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x012F, 0x010D, 0x00E9, 0x0119, 0x00EB, 125 0x0117, 0x00ED, 0x00EE, 0x012B, // 0xF0–0xFF 126 0x0111, 0x0146, 0x014D, 0x0137, 0x00F4, 0x00F5, 0x00F6, 0x00F7, 0x00F8, 0x0173, 0x00FA, 0x00FB, 127 0x00FC, 0x0169, 0x016B, 0x02D9, 128]; 129 130/// ISO-8859-5 (Cyrillic) 131pub(crate) const ISO_8859_5: [u16; 128] = [ 132 // 0x80–0x9F: C1 controls 133 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 0x0088, 0x0089, 0x008A, 0x008B, 134 0x008C, 0x008D, 0x008E, 0x008F, 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 135 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F, // 0xA0–0xAF 136 0x00A0, 0x0401, 0x0402, 0x0403, 0x0404, 0x0405, 0x0406, 0x0407, 0x0408, 0x0409, 0x040A, 0x040B, 137 0x040C, 0x00AD, 0x040E, 0x040F, // 0xB0–0xBF 138 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417, 0x0418, 0x0419, 0x041A, 0x041B, 139 0x041C, 0x041D, 0x041E, 0x041F, // 0xC0–0xCF 140 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427, 0x0428, 0x0429, 0x042A, 0x042B, 141 0x042C, 0x042D, 0x042E, 0x042F, // 0xD0–0xDF 142 0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437, 0x0438, 0x0439, 0x043A, 0x043B, 143 0x043C, 0x043D, 0x043E, 0x043F, // 0xE0–0xEF 144 0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447, 0x0448, 0x0449, 0x044A, 0x044B, 145 0x044C, 0x044D, 0x044E, 0x044F, // 0xF0–0xFF 146 0x2116, 0x0451, 0x0452, 0x0453, 0x0454, 0x0455, 0x0456, 0x0457, 0x0458, 0x0459, 0x045A, 0x045B, 147 0x045C, 0x00A7, 0x045E, 0x045F, 148]; 149 150/// ISO-8859-6 (Arabic) 151pub(crate) const ISO_8859_6: [u16; 128] = [ 152 // 0x80–0x9F: C1 controls 153 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 0x0088, 0x0089, 0x008A, 0x008B, 154 0x008C, 0x008D, 0x008E, 0x008F, 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 155 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F, // 0xA0–0xAF 156 0x00A0, 0xFFFD, 0xFFFD, 0xFFFD, 0x00A4, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 157 0x060C, 0x00AD, 0xFFFD, 0xFFFD, // 0xB0–0xBF 158 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0x061B, 159 0xFFFD, 0xFFFD, 0xFFFD, 0x061F, // 0xC0–0xCF 160 0xFFFD, 0x0621, 0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627, 0x0628, 0x0629, 0x062A, 0x062B, 161 0x062C, 0x062D, 0x062E, 0x062F, // 0xD0–0xDF 162 0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x0637, 0x0638, 0x0639, 0x063A, 0xFFFD, 163 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, // 0xE0–0xEF 164 0x0640, 0x0641, 0x0642, 0x0643, 0x0644, 0x0645, 0x0646, 0x0647, 0x0648, 0x0649, 0x064A, 0x064B, 165 0x064C, 0x064D, 0x064E, 0x064F, // 0xF0–0xFF 166 0x0650, 0x0651, 0x0652, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 167 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 168]; 169 170/// ISO-8859-7 (Greek) 171pub(crate) const ISO_8859_7: [u16; 128] = [ 172 // 0x80–0x9F: C1 controls 173 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 0x0088, 0x0089, 0x008A, 0x008B, 174 0x008C, 0x008D, 0x008E, 0x008F, 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 175 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F, // 0xA0–0xAF 176 0x00A0, 0x2018, 0x2019, 0x00A3, 0x20AC, 0x20AF, 0x00A6, 0x00A7, 0x00A8, 0x00A9, 0x037A, 0x00AB, 177 0x00AC, 0x00AD, 0xFFFD, 0x2015, // 0xB0–0xBF 178 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x0384, 0x0385, 0x0386, 0x00B7, 0x0388, 0x0389, 0x038A, 0x00BB, 179 0x038C, 0x00BD, 0x038E, 0x038F, // 0xC0–0xCF 180 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x039B, 181 0x039C, 0x039D, 0x039E, 0x039F, // 0xD0–0xDF 182 0x03A0, 0x03A1, 0xFFFD, 0x03A3, 0x03A4, 0x03A5, 0x03A6, 0x03A7, 0x03A8, 0x03A9, 0x03AA, 0x03AB, 183 0x03AC, 0x03AD, 0x03AE, 0x03AF, // 0xE0–0xEF 184 0x03B0, 0x03B1, 0x03B2, 0x03B3, 0x03B4, 0x03B5, 0x03B6, 0x03B7, 0x03B8, 0x03B9, 0x03BA, 0x03BB, 185 0x03BC, 0x03BD, 0x03BE, 0x03BF, // 0xF0–0xFF 186 0x03C0, 0x03C1, 0x03C2, 0x03C3, 0x03C4, 0x03C5, 0x03C6, 0x03C7, 0x03C8, 0x03C9, 0x03CA, 0x03CB, 187 0x03CC, 0x03CD, 0x03CE, 0xFFFD, 188]; 189 190/// ISO-8859-8 (Hebrew) — also used for ISO-8859-8-I (logical order) 191pub(crate) const ISO_8859_8: [u16; 128] = [ 192 // 0x80–0x9F: C1 controls 193 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 0x0088, 0x0089, 0x008A, 0x008B, 194 0x008C, 0x008D, 0x008E, 0x008F, 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 195 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F, // 0xA0–0xAF 196 0x00A0, 0xFFFD, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7, 0x00A8, 0x00A9, 0x00D7, 0x00AB, 197 0x00AC, 0x00AD, 0x00AE, 0x00AF, // 0xB0–0xBF 198 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7, 0x00B8, 0x00B9, 0x00F7, 0x00BB, 199 0x00BC, 0x00BD, 0x00BE, 0xFFFD, // 0xC0–0xCF 200 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 201 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, // 0xD0–0xDF 202 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 203 0xFFFD, 0xFFFD, 0xFFFD, 0x2017, // 0xE0–0xEF 204 0x05D0, 0x05D1, 0x05D2, 0x05D3, 0x05D4, 0x05D5, 0x05D6, 0x05D7, 0x05D8, 0x05D9, 0x05DA, 0x05DB, 205 0x05DC, 0x05DD, 0x05DE, 0x05DF, // 0xF0–0xFF 206 0x05E0, 0x05E1, 0x05E2, 0x05E3, 0x05E4, 0x05E5, 0x05E6, 0x05E7, 0x05E8, 0x05E9, 0x05EA, 0xFFFD, 207 0xFFFD, 0x200E, 0x200F, 0xFFFD, 208]; 209 210/// ISO-8859-10 (Latin-6, Nordic) 211pub(crate) const ISO_8859_10: [u16; 128] = [ 212 // 0x80–0x9F: C1 controls 213 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 0x0088, 0x0089, 0x008A, 0x008B, 214 0x008C, 0x008D, 0x008E, 0x008F, 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 215 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F, // 0xA0–0xAF 216 0x00A0, 0x0104, 0x0112, 0x0122, 0x012A, 0x0128, 0x0136, 0x00A7, 0x013B, 0x0110, 0x0160, 0x0166, 217 0x017D, 0x00AD, 0x016A, 0x014A, // 0xB0–0xBF 218 0x00B0, 0x0105, 0x0113, 0x0123, 0x012B, 0x0129, 0x0137, 0x00B7, 0x013C, 0x0111, 0x0161, 0x0167, 219 0x017E, 0x2015, 0x016B, 0x014B, // 0xC0–0xCF 220 0x0100, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x012E, 0x010C, 0x00C9, 0x0118, 0x00CB, 221 0x0116, 0x00CD, 0x00CE, 0x00CF, // 0xD0–0xDF 222 0x00D0, 0x0145, 0x014C, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x0168, 0x00D8, 0x0172, 0x00DA, 0x00DB, 223 0x00DC, 0x00DD, 0x00DE, 0x00DF, // 0xE0–0xEF 224 0x0101, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x012F, 0x010D, 0x00E9, 0x0119, 0x00EB, 225 0x0117, 0x00ED, 0x00EE, 0x00EF, // 0xF0–0xFF 226 0x00F0, 0x0146, 0x014D, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x0169, 0x00F8, 0x0173, 0x00FA, 0x00FB, 227 0x00FC, 0x00FD, 0x00FE, 0x0138, 228]; 229 230/// ISO-8859-13 (Latin-7, Baltic Rim) 231pub(crate) const ISO_8859_13: [u16; 128] = [ 232 // 0x80–0x9F: C1 controls 233 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 0x0088, 0x0089, 0x008A, 0x008B, 234 0x008C, 0x008D, 0x008E, 0x008F, 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 235 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F, // 0xA0–0xAF 236 0x00A0, 0x201D, 0x00A2, 0x00A3, 0x00A4, 0x201E, 0x00A6, 0x00A7, 0x00D8, 0x00A9, 0x0156, 0x00AB, 237 0x00AC, 0x00AD, 0x00AE, 0x00C6, // 0xB0–0xBF 238 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x201C, 0x00B5, 0x00B6, 0x00B7, 0x00F8, 0x00B9, 0x0157, 0x00BB, 239 0x00BC, 0x00BD, 0x00BE, 0x00E6, // 0xC0–0xCF 240 0x0104, 0x012E, 0x0100, 0x0106, 0x00C4, 0x00C5, 0x0118, 0x0112, 0x010C, 0x00C9, 0x0179, 0x0116, 241 0x0122, 0x0136, 0x012A, 0x013B, // 0xD0–0xDF 242 0x0160, 0x0143, 0x0145, 0x00D3, 0x014C, 0x00D5, 0x00D6, 0x00D7, 0x0172, 0x0141, 0x015A, 0x016A, 243 0x00DC, 0x017B, 0x017D, 0x00DF, // 0xE0–0xEF 244 0x0105, 0x012F, 0x0101, 0x0107, 0x00E4, 0x00E5, 0x0119, 0x0113, 0x010D, 0x00E9, 0x017A, 0x0117, 245 0x0123, 0x0137, 0x012B, 0x013C, // 0xF0–0xFF 246 0x0161, 0x0144, 0x0146, 0x00F3, 0x014D, 0x00F5, 0x00F6, 0x00F7, 0x0173, 0x0142, 0x015B, 0x016B, 247 0x00FC, 0x017C, 0x017E, 0x2019, 248]; 249 250/// ISO-8859-14 (Latin-8, Celtic) 251pub(crate) const ISO_8859_14: [u16; 128] = [ 252 // 0x80–0x9F: C1 controls 253 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 0x0088, 0x0089, 0x008A, 0x008B, 254 0x008C, 0x008D, 0x008E, 0x008F, 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 255 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F, // 0xA0–0xAF 256 0x00A0, 0x1E02, 0x1E03, 0x00A3, 0x010A, 0x010B, 0x1E0A, 0x00A7, 0x1E80, 0x00A9, 0x1E82, 0x1E0B, 257 0x1EF2, 0x00AD, 0x00AE, 0x0178, // 0xB0–0xBF 258 0x1E1E, 0x1E1F, 0x0120, 0x0121, 0x1E40, 0x1E41, 0x00B6, 0x1E56, 0x1E81, 0x1E57, 0x1E83, 0x1E60, 259 0x1EF3, 0x1E84, 0x1E85, 0x1E61, // 0xC0–0xCF 260 0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7, 0x00C8, 0x00C9, 0x00CA, 0x00CB, 261 0x00CC, 0x00CD, 0x00CE, 0x00CF, // 0xD0–0xDF 262 0x0174, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x1E6A, 0x00D8, 0x00D9, 0x00DA, 0x00DB, 263 0x00DC, 0x00DD, 0x0176, 0x00DF, // 0xE0–0xEF 264 0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7, 0x00E8, 0x00E9, 0x00EA, 0x00EB, 265 0x00EC, 0x00ED, 0x00EE, 0x00EF, // 0xF0–0xFF 266 0x0175, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x1E6B, 0x00F8, 0x00F9, 0x00FA, 0x00FB, 267 0x00FC, 0x00FD, 0x0177, 0x00FF, 268]; 269 270/// ISO-8859-15 (Latin-9, updated Western European) 271pub(crate) const ISO_8859_15: [u16; 128] = [ 272 // 0x80–0x9F: C1 controls 273 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 0x0088, 0x0089, 0x008A, 0x008B, 274 0x008C, 0x008D, 0x008E, 0x008F, 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 275 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F, 276 // 0xA0–0xAF: Differs from 8859-1 at A4, A6, A8, B4, B8, BC, BD, BE 277 0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x20AC, 0x00A5, 0x0160, 0x00A7, 0x0161, 0x00A9, 0x00AA, 0x00AB, 278 0x00AC, 0x00AD, 0x00AE, 0x00AF, // 0xB0–0xBF 279 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x017D, 0x00B5, 0x00B6, 0x00B7, 0x017E, 0x00B9, 0x00BA, 0x00BB, 280 0x0152, 0x0153, 0x0178, 0x00BF, // 0xC0–0xFF: same as ISO-8859-1 281 0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7, 0x00C8, 0x00C9, 0x00CA, 0x00CB, 282 0x00CC, 0x00CD, 0x00CE, 0x00CF, 0x00D0, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x00D7, 283 0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x00DE, 0x00DF, 0x00E0, 0x00E1, 0x00E2, 0x00E3, 284 0x00E4, 0x00E5, 0x00E6, 0x00E7, 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF, 285 0x00F0, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x00F7, 0x00F8, 0x00F9, 0x00FA, 0x00FB, 286 0x00FC, 0x00FD, 0x00FE, 0x00FF, 287]; 288 289/// ISO-8859-16 (Latin-10, South-Eastern European) 290pub(crate) const ISO_8859_16: [u16; 128] = [ 291 // 0x80–0x9F: C1 controls 292 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 0x0088, 0x0089, 0x008A, 0x008B, 293 0x008C, 0x008D, 0x008E, 0x008F, 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 294 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F, // 0xA0–0xAF 295 0x00A0, 0x0104, 0x0105, 0x0141, 0x20AC, 0x201E, 0x0160, 0x00A7, 0x0161, 0x00A9, 0x0218, 0x00AB, 296 0x0179, 0x00AD, 0x017A, 0x017B, // 0xB0–0xBF 297 0x00B0, 0x00B1, 0x010C, 0x0142, 0x017D, 0x201D, 0x00B6, 0x00B7, 0x017E, 0x010D, 0x0219, 0x00BB, 298 0x0152, 0x0153, 0x0178, 0x017C, // 0xC0–0xCF 299 0x00C0, 0x00C1, 0x00C2, 0x0102, 0x00C4, 0x0106, 0x00C6, 0x00C7, 0x00C8, 0x00C9, 0x00CA, 0x00CB, 300 0x00CC, 0x00CD, 0x00CE, 0x00CF, // 0xD0–0xDF 301 0x0110, 0x0143, 0x00D2, 0x00D3, 0x00D4, 0x0150, 0x00D6, 0x015A, 0x0170, 0x00D9, 0x00DA, 0x00DB, 302 0x00DC, 0x0118, 0x021A, 0x00DF, // 0xE0–0xEF 303 0x00E0, 0x00E1, 0x00E2, 0x0103, 0x00E4, 0x0107, 0x00E6, 0x00E7, 0x00E8, 0x00E9, 0x00EA, 0x00EB, 304 0x00EC, 0x00ED, 0x00EE, 0x00EF, // 0xF0–0xFF 305 0x0111, 0x0144, 0x00F2, 0x00F3, 0x00F4, 0x0151, 0x00F6, 0x015B, 0x0171, 0x00F9, 0x00FA, 0x00FB, 306 0x00FC, 0x0119, 0x021B, 0x00FF, 307]; 308 309/// KOI8-R (Russian) 310pub(crate) const KOI8_R: [u16; 128] = [ 311 // 0x80–0x8F: box drawing 312 0x2500, 0x2502, 0x250C, 0x2510, 0x2514, 0x2518, 0x251C, 0x2524, 0x252C, 0x2534, 0x253C, 0x2580, 313 0x2584, 0x2588, 0x258C, 0x2590, // 0x90–0x9F: more box drawing + block elements 314 0x2591, 0x2592, 0x2593, 0x2320, 0x25A0, 0x2219, 0x221A, 0x2248, 0x2264, 0x2265, 0x00A0, 0x2321, 315 0x00B0, 0x00B2, 0x00B7, 0x00F7, // 0xA0–0xAF 316 0x2550, 0x2551, 0x2552, 0x0451, 0x2553, 0x2554, 0x2555, 0x2556, 0x2557, 0x2558, 0x2559, 0x255A, 317 0x255B, 0x255C, 0x255D, 0x255E, // 0xB0–0xBF 318 0x255F, 0x2560, 0x2561, 0x0401, 0x2562, 0x2563, 0x2564, 0x2565, 0x2566, 0x2567, 0x2568, 0x2569, 319 0x256A, 0x256B, 0x256C, 0x00A9, // 0xC0–0xCF: Cyrillic lowercase 320 0x044E, 0x0430, 0x0431, 0x0446, 0x0434, 0x0435, 0x0444, 0x0433, 0x0445, 0x0438, 0x0439, 0x043A, 321 0x043B, 0x043C, 0x043D, 0x043E, // 0xD0–0xDF 322 0x043F, 0x044F, 0x0440, 0x0441, 0x0442, 0x0443, 0x0436, 0x0432, 0x044C, 0x044B, 0x0437, 0x0448, 323 0x044D, 0x0449, 0x0447, 0x044A, // 0xE0–0xEF: Cyrillic uppercase 324 0x042E, 0x0410, 0x0411, 0x0426, 0x0414, 0x0415, 0x0424, 0x0413, 0x0425, 0x0418, 0x0419, 0x041A, 325 0x041B, 0x041C, 0x041D, 0x041E, // 0xF0–0xFF 326 0x041F, 0x042F, 0x0420, 0x0421, 0x0422, 0x0423, 0x0416, 0x0412, 0x042C, 0x042B, 0x0417, 0x0428, 327 0x042D, 0x0429, 0x0427, 0x042A, 328]; 329 330/// KOI8-U (Ukrainian) 331pub(crate) const KOI8_U: [u16; 128] = [ 332 // 0x80–0x8F: box drawing (same as KOI8-R) 333 0x2500, 0x2502, 0x250C, 0x2510, 0x2514, 0x2518, 0x251C, 0x2524, 0x252C, 0x2534, 0x253C, 0x2580, 334 0x2584, 0x2588, 0x258C, 0x2590, // 0x90–0x9F 335 0x2591, 0x2592, 0x2593, 0x2320, 0x25A0, 0x2219, 0x221A, 0x2248, 0x2264, 0x2265, 0x00A0, 0x2321, 336 0x00B0, 0x00B2, 0x00B7, 0x00F7, 337 // 0xA0–0xAF: differs from KOI8-R at A3, A4, A6, A7 338 0x2550, 0x2551, 0x2552, 0x0451, 0x0454, 0x2554, 0x0456, 0x0457, 0x2557, 0x2558, 0x2559, 0x255A, 339 0x255B, 0x0491, 0x255D, 0x255E, // 0xB0–0xBF 340 0x255F, 0x2560, 0x2561, 0x0401, 0x0404, 0x2563, 0x0406, 0x0407, 0x2566, 0x2567, 0x2568, 0x2569, 341 0x256A, 0x0490, 0x256C, 0x00A9, // 0xC0–0xFF: Cyrillic (same as KOI8-R) 342 0x044E, 0x0430, 0x0431, 0x0446, 0x0434, 0x0435, 0x0444, 0x0433, 0x0445, 0x0438, 0x0439, 0x043A, 343 0x043B, 0x043C, 0x043D, 0x043E, 0x043F, 0x044F, 0x0440, 0x0441, 0x0442, 0x0443, 0x0436, 0x0432, 344 0x044C, 0x044B, 0x0437, 0x0448, 0x044D, 0x0449, 0x0447, 0x044A, 0x042E, 0x0410, 0x0411, 0x0426, 345 0x0414, 0x0415, 0x0424, 0x0413, 0x0425, 0x0418, 0x0419, 0x041A, 0x041B, 0x041C, 0x041D, 0x041E, 346 0x041F, 0x042F, 0x0420, 0x0421, 0x0422, 0x0423, 0x0416, 0x0412, 0x042C, 0x042B, 0x0417, 0x0428, 347 0x042D, 0x0429, 0x0427, 0x042A, 348]; 349 350/// macintosh (Mac OS Roman) 351pub(crate) const MACINTOSH: [u16; 128] = [ 352 // 0x80–0x8F 353 0x00C4, 0x00C5, 0x00C7, 0x00C9, 0x00D1, 0x00D6, 0x00DC, 0x00E1, 0x00E0, 0x00E2, 0x00E4, 0x00E3, 354 0x00E5, 0x00E7, 0x00E9, 0x00E8, // 0x90–0x9F 355 0x00EA, 0x00EB, 0x00ED, 0x00EC, 0x00EE, 0x00EF, 0x00F1, 0x00F3, 0x00F2, 0x00F4, 0x00F6, 0x00F5, 356 0x00FA, 0x00F9, 0x00FB, 0x00FC, // 0xA0–0xAF 357 0x2020, 0x00B0, 0x00A2, 0x00A3, 0x00A7, 0x2022, 0x00B6, 0x00DF, 0x00AE, 0x00A9, 0x2122, 0x00B4, 358 0x00A8, 0x2260, 0x00C6, 0x00D8, // 0xB0–0xBF 359 0x221E, 0x00B1, 0x2264, 0x2265, 0x00A5, 0x00B5, 0x2202, 0x2211, 0x220F, 0x03C0, 0x222B, 0x00AA, 360 0x00BA, 0x2126, 0x00E6, 0x00F8, // 0xC0–0xCF 361 0x00BF, 0x00A1, 0x00AC, 0x221A, 0x0192, 0x2248, 0x2206, 0x00AB, 0x00BB, 0x2026, 0x00A0, 0x00C0, 362 0x00C3, 0x00D5, 0x0152, 0x0153, // 0xD0–0xDF 363 0x2013, 0x2014, 0x201C, 0x201D, 0x2018, 0x2019, 0x00F7, 0x25CA, 0x00FF, 0x0178, 0x2044, 0x20AC, 364 0x2039, 0x203A, 0xFB01, 0xFB02, // 0xE0–0xEF 365 0x2021, 0x00B7, 0x201A, 0x201E, 0x2030, 0x00C2, 0x00CA, 0x00C1, 0x00CB, 0x00C8, 0x00CD, 0x00CE, 366 0x00CF, 0x00CC, 0x00D3, 0x00D4, // 0xF0–0xFF 367 0xF8FF, 0x00D2, 0x00DA, 0x00DB, 0x00D9, 0x0131, 0x02C6, 0x02DC, 0x00AF, 0x02D8, 0x02D9, 0x02DA, 368 0x00B8, 0x02DD, 0x02DB, 0x02C7, 369]; 370 371/// IBM866 (DOS Cyrillic) 372pub(crate) const IBM866: [u16; 128] = [ 373 // 0x80–0x8F: Cyrillic uppercase А–П 374 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417, 0x0418, 0x0419, 0x041A, 0x041B, 375 0x041C, 0x041D, 0x041E, 0x041F, // 0x90–0x9F: Cyrillic uppercase Р–Я 376 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427, 0x0428, 0x0429, 0x042A, 0x042B, 377 0x042C, 0x042D, 0x042E, 0x042F, // 0xA0–0xAF: Cyrillic lowercase а–п 378 0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437, 0x0438, 0x0439, 0x043A, 0x043B, 379 0x043C, 0x043D, 0x043E, 0x043F, // 0xB0–0xBF: box drawing light 380 0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x2561, 0x2562, 0x2556, 0x2555, 0x2563, 0x2551, 0x2557, 381 0x255D, 0x255C, 0x255B, 0x2510, // 0xC0–0xCF: box drawing 382 0x2514, 0x2534, 0x252C, 0x251C, 0x2500, 0x253C, 0x255E, 0x255F, 0x255A, 0x2554, 0x2569, 0x2566, 383 0x2560, 0x2550, 0x256C, 0x2567, // 0xD0–0xDF: more box drawing 384 0x2568, 0x2564, 0x2565, 0x2559, 0x2558, 0x2552, 0x2553, 0x256B, 0x256A, 0x2518, 0x250C, 0x2588, 385 0x2584, 0x258C, 0x2590, 0x2580, // 0xE0–0xEF: Cyrillic lowercase р–я 386 0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447, 0x0448, 0x0449, 0x044A, 0x044B, 387 0x044C, 0x044D, 0x044E, 0x044F, // 0xF0–0xFF 388 0x0401, 0x0451, 0x0404, 0x0454, 0x0407, 0x0457, 0x040E, 0x045E, 0x00B0, 0x2219, 0x00B7, 0x221A, 389 0x2116, 0x00A4, 0x25A0, 0x00A0, 390]; 391 392/// windows-874 (Thai) 393pub(crate) const WINDOWS_874: [u16; 128] = [ 394 // 0x80–0x8F 395 0x20AC, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0x2026, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 396 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, // 0x90–0x9F 397 0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 398 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, // 0xA0–0xAF 399 0x00A0, 0x0E01, 0x0E02, 0x0E03, 0x0E04, 0x0E05, 0x0E06, 0x0E07, 0x0E08, 0x0E09, 0x0E0A, 0x0E0B, 400 0x0E0C, 0x0E0D, 0x0E0E, 0x0E0F, // 0xB0–0xBF 401 0x0E10, 0x0E11, 0x0E12, 0x0E13, 0x0E14, 0x0E15, 0x0E16, 0x0E17, 0x0E18, 0x0E19, 0x0E1A, 0x0E1B, 402 0x0E1C, 0x0E1D, 0x0E1E, 0x0E1F, // 0xC0–0xCF 403 0x0E20, 0x0E21, 0x0E22, 0x0E23, 0x0E24, 0x0E25, 0x0E26, 0x0E27, 0x0E28, 0x0E29, 0x0E2A, 0x0E2B, 404 0x0E2C, 0x0E2D, 0x0E2E, 0x0E2F, // 0xD0–0xDF 405 0x0E30, 0x0E31, 0x0E32, 0x0E33, 0x0E34, 0x0E35, 0x0E36, 0x0E37, 0x0E38, 0x0E39, 0x0E3A, 0xFFFD, 406 0xFFFD, 0xFFFD, 0xFFFD, 0x0E3F, // 0xE0–0xEF 407 0x0E40, 0x0E41, 0x0E42, 0x0E43, 0x0E44, 0x0E45, 0x0E46, 0x0E47, 0x0E48, 0x0E49, 0x0E4A, 0x0E4B, 408 0x0E4C, 0x0E4D, 0x0E4E, 0x0E4F, // 0xF0–0xFF 409 0x0E50, 0x0E51, 0x0E52, 0x0E53, 0x0E54, 0x0E55, 0x0E56, 0x0E57, 0x0E58, 0x0E59, 0x0E5A, 0x0E5B, 410 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 411]; 412 413/// windows-1250 (Central European) 414pub(crate) const WINDOWS_1250: [u16; 128] = [ 415 // 0x80–0x8F 416 0x20AC, 0xFFFD, 0x201A, 0xFFFD, 0x201E, 0x2026, 0x2020, 0x2021, 0xFFFD, 0x2030, 0x0160, 0x2039, 417 0x015A, 0x0164, 0x017D, 0x0179, // 0x90–0x9F 418 0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, 0xFFFD, 0x2122, 0x0161, 0x203A, 419 0x015B, 0x0165, 0x017E, 0x017A, // 0xA0–0xAF 420 0x00A0, 0x02C7, 0x02D8, 0x0141, 0x00A4, 0x0104, 0x00A6, 0x00A7, 0x00A8, 0x00A9, 0x015E, 0x00AB, 421 0x00AC, 0x00AD, 0x00AE, 0x017B, // 0xB0–0xBF 422 0x00B0, 0x00B1, 0x02DB, 0x0142, 0x00B4, 0x00B5, 0x00B6, 0x00B7, 0x00B8, 0x0105, 0x015F, 0x00BB, 423 0x013D, 0x02DD, 0x013E, 0x017C, // 0xC0–0xCF 424 0x0154, 0x00C1, 0x00C2, 0x0102, 0x00C4, 0x0139, 0x0106, 0x00C7, 0x010C, 0x00C9, 0x0118, 0x00CB, 425 0x011A, 0x00CD, 0x00CE, 0x010E, // 0xD0–0xDF 426 0x0110, 0x0143, 0x0147, 0x00D3, 0x00D4, 0x0150, 0x00D6, 0x00D7, 0x0158, 0x016E, 0x00DA, 0x0170, 427 0x00DC, 0x00DD, 0x0162, 0x00DF, // 0xE0–0xEF 428 0x0155, 0x00E1, 0x00E2, 0x0103, 0x00E4, 0x013A, 0x0107, 0x00E7, 0x010D, 0x00E9, 0x0119, 0x00EB, 429 0x011B, 0x00ED, 0x00EE, 0x010F, // 0xF0–0xFF 430 0x0111, 0x0144, 0x0148, 0x00F3, 0x00F4, 0x0151, 0x00F6, 0x00F7, 0x0159, 0x016F, 0x00FA, 0x0171, 431 0x00FC, 0x00FD, 0x0163, 0x02D9, 432]; 433 434/// windows-1251 (Cyrillic) 435pub(crate) const WINDOWS_1251: [u16; 128] = [ 436 // 0x80–0x8F 437 0x0402, 0x0403, 0x201A, 0x0453, 0x201E, 0x2026, 0x2020, 0x2021, 0x20AC, 0x2030, 0x0409, 0x2039, 438 0x040A, 0x040C, 0x040B, 0x040F, // 0x90–0x9F 439 0x0452, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, 0xFFFD, 0x2122, 0x0459, 0x203A, 440 0x045A, 0x045C, 0x045B, 0x045F, // 0xA0–0xAF 441 0x00A0, 0x040E, 0x045E, 0x0408, 0x00A4, 0x0490, 0x00A6, 0x00A7, 0x0401, 0x00A9, 0x0404, 0x00AB, 442 0x00AC, 0x00AD, 0x00AE, 0x0407, // 0xB0–0xBF 443 0x00B0, 0x00B1, 0x0406, 0x0456, 0x0491, 0x00B5, 0x00B6, 0x00B7, 0x0451, 0x2116, 0x0454, 0x00BB, 444 0x0458, 0x0405, 0x0455, 0x0457, // 0xC0–0xCF: А–П 445 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417, 0x0418, 0x0419, 0x041A, 0x041B, 446 0x041C, 0x041D, 0x041E, 0x041F, // 0xD0–0xDF: Р–Я 447 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427, 0x0428, 0x0429, 0x042A, 0x042B, 448 0x042C, 0x042D, 0x042E, 0x042F, // 0xE0–0xEF: а–п 449 0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437, 0x0438, 0x0439, 0x043A, 0x043B, 450 0x043C, 0x043D, 0x043E, 0x043F, // 0xF0–0xFF: р–я 451 0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447, 0x0448, 0x0449, 0x044A, 0x044B, 452 0x044C, 0x044D, 0x044E, 0x044F, 453]; 454 455/// windows-1253 (Greek) 456pub(crate) const WINDOWS_1253: [u16; 128] = [ 457 // 0x80–0x8F 458 0x20AC, 0xFFFD, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021, 0xFFFD, 0x2030, 0xFFFD, 0x2039, 459 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, // 0x90–0x9F 460 0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, 0xFFFD, 0x2122, 0xFFFD, 0x203A, 461 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, // 0xA0–0xAF 462 0x00A0, 0x0385, 0x0386, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7, 0x00A8, 0x00A9, 0xFFFD, 0x00AB, 463 0x00AC, 0x00AD, 0x00AE, 0x2015, // 0xB0–0xBF 464 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x0384, 0x00B5, 0x00B6, 0x00B7, 0x0388, 0x0389, 0x038A, 0x00BB, 465 0x038C, 0x00BD, 0x038E, 0x038F, // 0xC0–0xCF 466 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x039B, 467 0x039C, 0x039D, 0x039E, 0x039F, // 0xD0–0xDF 468 0x03A0, 0x03A1, 0xFFFD, 0x03A3, 0x03A4, 0x03A5, 0x03A6, 0x03A7, 0x03A8, 0x03A9, 0x03AA, 0x03AB, 469 0x03AC, 0x03AD, 0x03AE, 0x03AF, // 0xE0–0xEF 470 0x03B0, 0x03B1, 0x03B2, 0x03B3, 0x03B4, 0x03B5, 0x03B6, 0x03B7, 0x03B8, 0x03B9, 0x03BA, 0x03BB, 471 0x03BC, 0x03BD, 0x03BE, 0x03BF, // 0xF0–0xFF 472 0x03C0, 0x03C1, 0x03C2, 0x03C3, 0x03C4, 0x03C5, 0x03C6, 0x03C7, 0x03C8, 0x03C9, 0x03CA, 0x03CB, 473 0x03CC, 0x03CD, 0x03CE, 0xFFFD, 474]; 475 476/// windows-1254 (Turkish) 477pub(crate) const WINDOWS_1254: [u16; 128] = [ 478 // 0x80–0x8F 479 0x20AC, 0xFFFD, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021, 0x02C6, 0x2030, 0x0160, 0x2039, 480 0x0152, 0xFFFD, 0xFFFD, 0xFFFD, // 0x90–0x9F 481 0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, 0x02DC, 0x2122, 0x0161, 0x203A, 482 0x0153, 0xFFFD, 0xFFFD, 0x0178, // 0xA0–0xAF 483 0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7, 0x00A8, 0x00A9, 0x00AA, 0x00AB, 484 0x00AC, 0x00AD, 0x00AE, 0x00AF, // 0xB0–0xBF 485 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7, 0x00B8, 0x00B9, 0x00BA, 0x00BB, 486 0x00BC, 0x00BD, 0x00BE, 0x00BF, // 0xC0–0xCF 487 0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7, 0x00C8, 0x00C9, 0x00CA, 0x00CB, 488 0x00CC, 0x00CD, 0x00CE, 0x00CF, // 0xD0–0xDF: differs from 1252 at D0, DD, DE 489 0x011E, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x00D7, 0x00D8, 0x00D9, 0x00DA, 0x00DB, 490 0x00DC, 0x0130, 0x015E, 0x00DF, // 0xE0–0xEF 491 0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7, 0x00E8, 0x00E9, 0x00EA, 0x00EB, 492 0x00EC, 0x00ED, 0x00EE, 0x00EF, // 0xF0–0xFF: differs from 1252 at F0, FD, FE 493 0x011F, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x00F7, 0x00F8, 0x00F9, 0x00FA, 0x00FB, 494 0x00FC, 0x0131, 0x015F, 0x00FF, 495]; 496 497/// windows-1255 (Hebrew) 498pub(crate) const WINDOWS_1255: [u16; 128] = [ 499 // 0x80–0x8F 500 0x20AC, 0xFFFD, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021, 0x02C6, 0x2030, 0xFFFD, 0x2039, 501 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, // 0x90–0x9F 502 0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, 0x02DC, 0x2122, 0xFFFD, 0x203A, 503 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, // 0xA0–0xAF 504 0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x20AA, 0x00A5, 0x00A6, 0x00A7, 0x00A8, 0x00A9, 0x00D7, 0x00AB, 505 0x00AC, 0x00AD, 0x00AE, 0x00AF, // 0xB0–0xBF 506 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7, 0x00B8, 0x00B9, 0x00F7, 0x00BB, 507 0x00BC, 0x00BD, 0x00BE, 0x00BF, // 0xC0–0xCF 508 0x05B0, 0x05B1, 0x05B2, 0x05B3, 0x05B4, 0x05B5, 0x05B6, 0x05B7, 0x05B8, 0x05B9, 0x05BA, 0x05BB, 509 0x05BC, 0x05BD, 0x05BE, 0x05BF, // 0xD0–0xDF 510 0x05C0, 0x05C1, 0x05C2, 0x05C3, 0x05F0, 0x05F1, 0x05F2, 0x05F3, 0x05F4, 0xFFFD, 0xFFFD, 0xFFFD, 511 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, // 0xE0–0xEF 512 0x05D0, 0x05D1, 0x05D2, 0x05D3, 0x05D4, 0x05D5, 0x05D6, 0x05D7, 0x05D8, 0x05D9, 0x05DA, 0x05DB, 513 0x05DC, 0x05DD, 0x05DE, 0x05DF, // 0xF0–0xFF 514 0x05E0, 0x05E1, 0x05E2, 0x05E3, 0x05E4, 0x05E5, 0x05E6, 0x05E7, 0x05E8, 0x05E9, 0x05EA, 0xFFFD, 515 0xFFFD, 0x200E, 0x200F, 0xFFFD, 516]; 517 518/// windows-1256 (Arabic) 519pub(crate) const WINDOWS_1256: [u16; 128] = [ 520 // 0x80–0x8F 521 0x20AC, 0x067E, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021, 0x02C6, 0x2030, 0x0679, 0x2039, 522 0x0152, 0x0686, 0x0698, 0x0688, // 0x90–0x9F 523 0x06AF, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, 0x06A9, 0x2122, 0x0691, 0x203A, 524 0x0153, 0x200C, 0x200D, 0x06BA, // 0xA0–0xAF 525 0x00A0, 0x060C, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7, 0x00A8, 0x00A9, 0x06BE, 0x00AB, 526 0x00AC, 0x00AD, 0x00AE, 0x00AF, // 0xB0–0xBF 527 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7, 0x00B8, 0x00B9, 0x061B, 0x00BB, 528 0x00BC, 0x00BD, 0x00BE, 0x061F, // 0xC0–0xCF 529 0x06C1, 0x0621, 0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627, 0x0628, 0x0629, 0x062A, 0x062B, 530 0x062C, 0x062D, 0x062E, 0x062F, // 0xD0–0xDF 531 0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x00D7, 0x0637, 0x0638, 0x0639, 0x063A, 532 0x0640, 0x0641, 0x0642, 0x0643, // 0xE0–0xEF 533 0x00E0, 0x0644, 0x00E2, 0x0645, 0x0646, 0x0647, 0x0648, 0x00E7, 0x00E8, 0x00E9, 0x00EA, 0x00EB, 534 0x0649, 0x064A, 0x00EE, 0x00EF, // 0xF0–0xFF 535 0x064B, 0x064C, 0x064D, 0x064E, 0x00F4, 0x064F, 0x0650, 0x00F7, 0x0651, 0x00F9, 0x0652, 0x00FB, 536 0x00FC, 0x200E, 0x200F, 0x06D2, 537]; 538 539/// windows-1257 (Baltic) 540pub(crate) const WINDOWS_1257: [u16; 128] = [ 541 // 0x80–0x8F 542 0x20AC, 0xFFFD, 0x201A, 0xFFFD, 0x201E, 0x2026, 0x2020, 0x2021, 0xFFFD, 0x2030, 0xFFFD, 0x2039, 543 0xFFFD, 0x00A8, 0x02C7, 0x00B8, // 0x90–0x9F 544 0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, 0xFFFD, 0x2122, 0xFFFD, 0x203A, 545 0xFFFD, 0x00AF, 0x02DB, 0xFFFD, // 0xA0–0xAF 546 0x00A0, 0xFFFD, 0x00A2, 0x00A3, 0x00A4, 0xFFFD, 0x00A6, 0x00A7, 0x00D8, 0x00A9, 0x0156, 0x00AB, 547 0x00AC, 0x00AD, 0x00AE, 0x00C6, // 0xB0–0xBF 548 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7, 0x00F8, 0x00B9, 0x0157, 0x00BB, 549 0x00BC, 0x00BD, 0x00BE, 0x00E6, // 0xC0–0xCF 550 0x0104, 0x012E, 0x0100, 0x0106, 0x00C4, 0x00C5, 0x0118, 0x0112, 0x010C, 0x00C9, 0x0179, 0x0116, 551 0x0122, 0x0136, 0x012A, 0x013B, // 0xD0–0xDF 552 0x0160, 0x0143, 0x0145, 0x00D3, 0x014C, 0x00D5, 0x00D6, 0x00D7, 0x0172, 0x0141, 0x015A, 0x016A, 553 0x00DC, 0x017B, 0x017D, 0x00DF, // 0xE0–0xEF 554 0x0105, 0x012F, 0x0101, 0x0107, 0x00E4, 0x00E5, 0x0119, 0x0113, 0x010D, 0x00E9, 0x017A, 0x0117, 555 0x0123, 0x0137, 0x012B, 0x013C, // 0xF0–0xFF 556 0x0161, 0x0144, 0x0146, 0x00F3, 0x014D, 0x00F5, 0x00F6, 0x00F7, 0x0173, 0x0142, 0x015B, 0x016B, 557 0x00FC, 0x017C, 0x017E, 0x02D9, 558]; 559 560/// windows-1258 (Vietnamese) 561pub(crate) const WINDOWS_1258: [u16; 128] = [ 562 // 0x80–0x8F 563 0x20AC, 0xFFFD, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021, 0x02C6, 0x2030, 0xFFFD, 0x2039, 564 0x0152, 0xFFFD, 0xFFFD, 0xFFFD, // 0x90–0x9F 565 0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, 0x02DC, 0x2122, 0xFFFD, 0x203A, 566 0x0153, 0xFFFD, 0xFFFD, 0x0178, // 0xA0–0xAF 567 0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7, 0x00A8, 0x00A9, 0x00AA, 0x00AB, 568 0x00AC, 0x00AD, 0x00AE, 0x00AF, // 0xB0–0xBF 569 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7, 0x00B8, 0x00B9, 0x00BA, 0x00BB, 570 0x00BC, 0x00BD, 0x00BE, 0x00BF, // 0xC0–0xCF 571 0x00C0, 0x00C1, 0x00C2, 0x0102, 0x00C4, 0x00C5, 0x00C6, 0x00C7, 0x00C8, 0x00C9, 0x00CA, 0x00CB, 572 0x0300, 0x00CD, 0x00CE, 0x00CF, // 0xD0–0xDF 573 0x0110, 0x00D1, 0x0309, 0x00D3, 0x00D4, 0x01A0, 0x00D6, 0x00D7, 0x00D8, 0x00D9, 0x00DA, 0x00DB, 574 0x00DC, 0x01AF, 0x0303, 0x00DF, // 0xE0–0xEF 575 0x00E0, 0x00E1, 0x00E2, 0x0103, 0x00E4, 0x00E5, 0x00E6, 0x00E7, 0x00E8, 0x00E9, 0x00EA, 0x00EB, 576 0x0301, 0x00ED, 0x00EE, 0x00EF, // 0xF0–0xFF 577 0x0111, 0x00F1, 0x0323, 0x00F3, 0x00F4, 0x01A1, 0x00F6, 0x00F7, 0x00F8, 0x00F9, 0x00FA, 0x00FB, 578 0x00FC, 0x01B0, 0x20AB, 0x00FF, 579]; 580 581/// x-mac-cyrillic 582pub(crate) const X_MAC_CYRILLIC: [u16; 128] = [ 583 // 0x80–0x8F 584 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417, 0x0418, 0x0419, 0x041A, 0x041B, 585 0x041C, 0x041D, 0x041E, 0x041F, // 0x90–0x9F 586 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427, 0x0428, 0x0429, 0x042A, 0x042B, 587 0x042C, 0x042D, 0x042E, 0x042F, // 0xA0–0xAF 588 0x2020, 0x00B0, 0x0490, 0x00A3, 0x00A7, 0x2022, 0x00B6, 0x0406, 0x00AE, 0x00A9, 0x2122, 0x0402, 589 0x0452, 0x2260, 0x0403, 0x0453, // 0xB0–0xBF 590 0x221E, 0x00B1, 0x2264, 0x2265, 0x0456, 0x00B5, 0x0491, 0x0408, 0x0404, 0x0454, 0x0407, 0x0457, 591 0x0409, 0x0459, 0x040A, 0x045A, // 0xC0–0xCF 592 0x0458, 0x0405, 0x00AC, 0x221A, 0x0192, 0x2248, 0x2206, 0x00AB, 0x00BB, 0x2026, 0x00A0, 0x040B, 593 0x045B, 0x040C, 0x045C, 0x0455, // 0xD0–0xDF 594 0x2013, 0x2014, 0x201C, 0x201D, 0x2018, 0x2019, 0x00F7, 0x201E, 0x040E, 0x045E, 0x040F, 0x045F, 595 0x2116, 0x0401, 0x0451, 0x044F, // 0xE0–0xEF 596 0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437, 0x0438, 0x0439, 0x043A, 0x043B, 597 0x043C, 0x043D, 0x043E, 0x043F, // 0xF0–0xFF 598 0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447, 0x0448, 0x0449, 0x044A, 0x044B, 599 0x044C, 0x044D, 0x044E, 0x20AC, 600]; 601 602/// Return the lookup table for a single-byte encoding variant. 603pub(crate) fn table_for(encoding: &crate::Encoding) -> Option<&'static [u16; 128]> { 604 use crate::Encoding::*; 605 match encoding { 606 Windows1252 => Some(&WINDOWS_1252), 607 Iso8859_2 => Some(&ISO_8859_2), 608 Iso8859_3 => Some(&ISO_8859_3), 609 Iso8859_4 => Some(&ISO_8859_4), 610 Iso8859_5 => Some(&ISO_8859_5), 611 Iso8859_6 => Some(&ISO_8859_6), 612 Iso8859_7 => Some(&ISO_8859_7), 613 Iso8859_8 | Iso8859_8I => Some(&ISO_8859_8), 614 Iso8859_10 => Some(&ISO_8859_10), 615 Iso8859_13 => Some(&ISO_8859_13), 616 Iso8859_14 => Some(&ISO_8859_14), 617 Iso8859_15 => Some(&ISO_8859_15), 618 Iso8859_16 => Some(&ISO_8859_16), 619 Koi8R => Some(&KOI8_R), 620 Koi8U => Some(&KOI8_U), 621 Macintosh => Some(&MACINTOSH), 622 Ibm866 => Some(&IBM866), 623 Windows874 => Some(&WINDOWS_874), 624 Windows1250 => Some(&WINDOWS_1250), 625 Windows1251 => Some(&WINDOWS_1251), 626 Windows1253 => Some(&WINDOWS_1253), 627 Windows1254 => Some(&WINDOWS_1254), 628 Windows1255 => Some(&WINDOWS_1255), 629 Windows1256 => Some(&WINDOWS_1256), 630 Windows1257 => Some(&WINDOWS_1257), 631 Windows1258 => Some(&WINDOWS_1258), 632 XMacCyrillic => Some(&X_MAC_CYRILLIC), 633 _ => None, 634 } 635} 636 637// --------------------------------------------------------------------------- 638// Tests 639// --------------------------------------------------------------------------- 640 641#[cfg(test)] 642mod tests { 643 use super::*; 644 use crate::error::EncodingError; 645 646 fn decode_replace(bytes: &[u8], table: &[u16; 128]) -> String { 647 decode_single_byte(bytes, table, "test", ErrorMode::Replacement).unwrap() 648 } 649 650 // -- Basic ASCII passthrough -- 651 652 #[test] 653 fn ascii_passthrough() { 654 assert_eq!( 655 decode_replace(b"Hello, world!", &WINDOWS_1252), 656 "Hello, world!" 657 ); 658 } 659 660 #[test] 661 fn empty_input() { 662 assert_eq!(decode_replace(b"", &WINDOWS_1252), ""); 663 } 664 665 #[test] 666 fn null_byte() { 667 assert_eq!(decode_replace(&[0x00], &WINDOWS_1252), "\0"); 668 } 669 670 // -- Windows-1252 -- 671 672 #[test] 673 fn windows_1252_euro() { 674 // 0x80 → U+20AC (€) 675 assert_eq!(decode_replace(&[0x80], &WINDOWS_1252), "\u{20AC}"); 676 } 677 678 #[test] 679 fn windows_1252_smart_quotes() { 680 // 0x93 → U+201C (") and 0x94 → U+201D (") 681 assert_eq!( 682 decode_replace(&[0x93, 0x94], &WINDOWS_1252), 683 "\u{201C}\u{201D}" 684 ); 685 } 686 687 #[test] 688 fn windows_1252_trademark() { 689 // 0x99 → U+2122 (™) 690 assert_eq!(decode_replace(&[0x99], &WINDOWS_1252), "\u{2122}"); 691 } 692 693 #[test] 694 fn windows_1252_high_latin() { 695 // 0xE9 → U+00E9 (é) 696 assert_eq!(decode_replace(&[0xE9], &WINDOWS_1252), "\u{00E9}"); 697 } 698 699 #[test] 700 fn windows_1252_mixed() { 701 // "Caf" + 0xE9 → "Café" 702 assert_eq!( 703 decode_replace(&[0x43, 0x61, 0x66, 0xE9], &WINDOWS_1252), 704 "Caf\u{00E9}" 705 ); 706 } 707 708 // -- ISO-8859-2 (Central European) -- 709 710 #[test] 711 fn iso_8859_2_polish() { 712 // 0xA1 → U+0104 (Ą), 0xB1 → U+0105 (ą) 713 assert_eq!(decode_replace(&[0xA1], &ISO_8859_2), "\u{0104}"); 714 assert_eq!(decode_replace(&[0xB1], &ISO_8859_2), "\u{0105}"); 715 } 716 717 #[test] 718 fn iso_8859_2_czech() { 719 // 0xC8 → U+010C (Č), 0xE8 → U+010D (č) 720 assert_eq!(decode_replace(&[0xC8], &ISO_8859_2), "\u{010C}"); 721 assert_eq!(decode_replace(&[0xE8], &ISO_8859_2), "\u{010D}"); 722 } 723 724 // -- ISO-8859-5 (Cyrillic) -- 725 726 #[test] 727 fn iso_8859_5_cyrillic() { 728 // 0xB0 → U+0410 (А), 0xD0 → U+0430 (а) 729 assert_eq!(decode_replace(&[0xB0], &ISO_8859_5), "\u{0410}"); 730 assert_eq!(decode_replace(&[0xD0], &ISO_8859_5), "\u{0430}"); 731 } 732 733 // -- ISO-8859-7 (Greek) -- 734 735 #[test] 736 fn iso_8859_7_greek() { 737 // 0xC1 → U+0391 (Α), 0xE1 → U+03B1 (α) 738 assert_eq!(decode_replace(&[0xC1], &ISO_8859_7), "\u{0391}"); 739 assert_eq!(decode_replace(&[0xE1], &ISO_8859_7), "\u{03B1}"); 740 } 741 742 // -- ISO-8859-15 (Latin-9) -- 743 744 #[test] 745 fn iso_8859_15_euro() { 746 // 0xA4 → U+20AC (€) — differs from ISO-8859-1 747 assert_eq!(decode_replace(&[0xA4], &ISO_8859_15), "\u{20AC}"); 748 } 749 750 #[test] 751 fn iso_8859_15_oe_ligature() { 752 // 0xBC → U+0152 (Œ), 0xBD → U+0153 (œ) 753 assert_eq!(decode_replace(&[0xBC], &ISO_8859_15), "\u{0152}"); 754 assert_eq!(decode_replace(&[0xBD], &ISO_8859_15), "\u{0153}"); 755 } 756 757 // -- KOI8-R (Russian) -- 758 759 #[test] 760 fn koi8_r_cyrillic() { 761 // 0xC1 → U+0430 (а), 0xE1 → U+0410 (А) 762 assert_eq!(decode_replace(&[0xC1], &KOI8_R), "\u{0430}"); 763 assert_eq!(decode_replace(&[0xE1], &KOI8_R), "\u{0410}"); 764 } 765 766 #[test] 767 fn koi8_r_copyright() { 768 // 0xBF → U+00A9 (©) 769 assert_eq!(decode_replace(&[0xBF], &KOI8_R), "\u{00A9}"); 770 } 771 772 // -- Windows-1251 (Cyrillic) -- 773 774 #[test] 775 fn windows_1251_cyrillic() { 776 // 0xC0 → U+0410 (А), 0xE0 → U+0430 (а) 777 assert_eq!(decode_replace(&[0xC0], &WINDOWS_1251), "\u{0410}"); 778 assert_eq!(decode_replace(&[0xE0], &WINDOWS_1251), "\u{0430}"); 779 } 780 781 #[test] 782 fn windows_1251_euro() { 783 // 0x88 → U+20AC (€) 784 assert_eq!(decode_replace(&[0x88], &WINDOWS_1251), "\u{20AC}"); 785 } 786 787 // -- macintosh -- 788 789 #[test] 790 fn macintosh_special() { 791 // 0x80 → U+00C4 (Ä), 0xCA → U+00A0 (NBSP) 792 assert_eq!(decode_replace(&[0x80], &MACINTOSH), "\u{00C4}"); 793 assert_eq!(decode_replace(&[0xCA], &MACINTOSH), "\u{00A0}"); 794 } 795 796 // -- IBM866 -- 797 798 #[test] 799 fn ibm866_cyrillic() { 800 // 0x80 → U+0410 (А), 0xA0 → U+0430 (а) 801 assert_eq!(decode_replace(&[0x80], &IBM866), "\u{0410}"); 802 assert_eq!(decode_replace(&[0xA0], &IBM866), "\u{0430}"); 803 } 804 805 // -- Unmapped bytes -- 806 807 #[test] 808 fn unmapped_replacement() { 809 // ISO-8859-3 has unmapped bytes, e.g. 0xA5 810 assert_eq!(decode_replace(&[0xA5], &ISO_8859_3), "\u{FFFD}"); 811 } 812 813 #[test] 814 fn unmapped_fatal() { 815 let err = decode_single_byte(&[0x41, 0xA5], &ISO_8859_3, "ISO-8859-3", ErrorMode::Fatal) 816 .unwrap_err(); 817 assert!(matches!( 818 err, 819 EncodingError::InvalidSequence { 820 encoding: "ISO-8859-3", 821 position: 1 822 } 823 )); 824 } 825 826 // -- Table sizes -- 827 828 #[test] 829 fn all_tables_128_entries() { 830 assert_eq!(WINDOWS_1252.len(), 128); 831 assert_eq!(ISO_8859_2.len(), 128); 832 assert_eq!(ISO_8859_3.len(), 128); 833 assert_eq!(ISO_8859_4.len(), 128); 834 assert_eq!(ISO_8859_5.len(), 128); 835 assert_eq!(ISO_8859_6.len(), 128); 836 assert_eq!(ISO_8859_7.len(), 128); 837 assert_eq!(ISO_8859_8.len(), 128); 838 assert_eq!(ISO_8859_10.len(), 128); 839 assert_eq!(ISO_8859_13.len(), 128); 840 assert_eq!(ISO_8859_14.len(), 128); 841 assert_eq!(ISO_8859_15.len(), 128); 842 assert_eq!(ISO_8859_16.len(), 128); 843 assert_eq!(KOI8_R.len(), 128); 844 assert_eq!(KOI8_U.len(), 128); 845 assert_eq!(MACINTOSH.len(), 128); 846 assert_eq!(IBM866.len(), 128); 847 assert_eq!(WINDOWS_874.len(), 128); 848 assert_eq!(WINDOWS_1250.len(), 128); 849 assert_eq!(WINDOWS_1251.len(), 128); 850 assert_eq!(WINDOWS_1253.len(), 128); 851 assert_eq!(WINDOWS_1254.len(), 128); 852 assert_eq!(WINDOWS_1255.len(), 128); 853 assert_eq!(WINDOWS_1256.len(), 128); 854 assert_eq!(WINDOWS_1257.len(), 128); 855 assert_eq!(WINDOWS_1258.len(), 128); 856 assert_eq!(X_MAC_CYRILLIC.len(), 128); 857 } 858}