//! generated by scripts/gen_tokenizer_data.py — do not edit. //! tokenizer pattern data compiled from spaCy en_core_web_sm. const std = @import("std"); // ── utf-8 helpers ── pub const Codepoint = struct { value: u21, len: u3 }; pub fn decodeUtf8(bytes: []const u8) ?Codepoint { if (bytes.len == 0) return null; const b0 = bytes[0]; if (b0 < 0x80) return .{ .value = b0, .len = 1 }; if (b0 & 0xE0 == 0xC0 and bytes.len >= 2) return .{ .value = (@as(u21, b0 & 0x1F) << 6) | (bytes[1] & 0x3F), .len = 2 }; if (b0 & 0xF0 == 0xE0 and bytes.len >= 3) return .{ .value = (@as(u21, b0 & 0x0F) << 12) | (@as(u21, bytes[1] & 0x3F) << 6) | (bytes[2] & 0x3F), .len = 3 }; if (b0 & 0xF8 == 0xF0 and bytes.len >= 4) return .{ .value = (@as(u21, b0 & 0x07) << 18) | (@as(u21, bytes[1] & 0x3F) << 12) | (@as(u21, bytes[2] & 0x3F) << 6) | (bytes[3] & 0x3F), .len = 4 }; return .{ .value = 0xFFFD, .len = 1 }; // replacement char } pub fn lastCodepoint(text: []const u8) ?Codepoint { if (text.len == 0) return null; var i = text.len - 1; while (i > 0 and text[i] & 0xC0 == 0x80) : (i -= 1) {} return decodeUtf8(text[i..]); } // ── range search ── fn rangeContains(ranges: []const [2]u21, c: u21) bool { var lo: usize = 0; var hi: usize = ranges.len; while (lo < hi) { const mid = lo + (hi - lo) / 2; if (c > ranges[mid][1]) { lo = mid + 1; } else if (c < ranges[mid][0]) { hi = mid; } else return true; } return false; } // ── symbol class (So/Sc unicode categories) ── pub const isSymbol_ranges = [_][2]u21{ .{ 0x00A6, 0x00A6 }, .{ 0x00A9, 0x00A9 }, .{ 0x00AE, 0x00AE }, .{ 0x00B0, 0x00B0 }, .{ 0x0482, 0x0482 }, .{ 0x058D, 0x058E }, .{ 0x060E, 0x060F }, .{ 0x06DE, 0x06DE }, .{ 0x06E9, 0x06E9 }, .{ 0x06FD, 0x06FE }, .{ 0x07F6, 0x07F6 }, .{ 0x09FA, 0x09FA }, .{ 0x0B70, 0x0B70 }, .{ 0x0BF3, 0x0BF8 }, .{ 0x0BFA, 0x0BFA }, .{ 0x0C7F, 0x0C7F }, .{ 0x0D4F, 0x0D4F }, .{ 0x0D79, 0x0D79 }, .{ 0x0F01, 0x0F03 }, .{ 0x0F13, 0x0F13 }, .{ 0x0F15, 0x0F17 }, .{ 0x0F1A, 0x0F1F }, .{ 0x0F34, 0x0F34 }, .{ 0x0F36, 0x0F36 }, .{ 0x0F38, 0x0F38 }, .{ 0x0FBE, 0x0FC5 }, .{ 0x0FC7, 0x0FCC }, .{ 0x0FCE, 0x0FCF }, .{ 0x0FD5, 0x0FD8 }, .{ 0x109E, 0x109F }, .{ 0x1390, 0x1399 }, .{ 0x1940, 0x1940 }, .{ 0x19DE, 0x19FF }, .{ 0x1B61, 0x1B6A }, .{ 0x1B74, 0x1B7C }, .{ 0x2100, 0x2101 }, .{ 0x2103, 0x2106 }, .{ 0x2108, 0x2109 }, .{ 0x2114, 0x2114 }, .{ 0x2116, 0x2117 }, .{ 0x211E, 0x2123 }, .{ 0x2125, 0x2125 }, .{ 0x2127, 0x2127 }, .{ 0x2129, 0x2129 }, .{ 0x212E, 0x212E }, .{ 0x213A, 0x213B }, .{ 0x214A, 0x214A }, .{ 0x214C, 0x214D }, .{ 0x214F, 0x214F }, .{ 0x218A, 0x218B }, .{ 0x2195, 0x2199 }, .{ 0x219C, 0x219F }, .{ 0x21A1, 0x21A2 }, .{ 0x21A4, 0x21A5 }, .{ 0x21A7, 0x21AD }, .{ 0x21AF, 0x21CD }, .{ 0x21D0, 0x21D1 }, .{ 0x21D3, 0x21D3 }, .{ 0x21D5, 0x21F3 }, .{ 0x2300, 0x2307 }, .{ 0x230C, 0x231F }, .{ 0x2322, 0x2328 }, .{ 0x232B, 0x237B }, .{ 0x237D, 0x239A }, .{ 0x23B4, 0x23DB }, .{ 0x23E2, 0x2426 }, .{ 0x2440, 0x244A }, .{ 0x249C, 0x24E9 }, .{ 0x2500, 0x25B6 }, .{ 0x25B8, 0x25C0 }, .{ 0x25C2, 0x25F7 }, .{ 0x2600, 0x266E }, .{ 0x2670, 0x2767 }, .{ 0x2794, 0x27BF }, .{ 0x2800, 0x28FF }, .{ 0x2B00, 0x2B2F }, .{ 0x2B45, 0x2B46 }, .{ 0x2B4D, 0x2B73 }, .{ 0x2B76, 0x2B95 }, .{ 0x2B98, 0x2BC8 }, .{ 0x2BCA, 0x2BFE }, .{ 0x2CE5, 0x2CEA }, .{ 0x2E80, 0x2E99 }, .{ 0x2E9B, 0x2EF3 }, .{ 0x2F00, 0x2FD5 }, .{ 0x2FF0, 0x2FFB }, .{ 0x3004, 0x3004 }, .{ 0x3012, 0x3013 }, .{ 0x3020, 0x3020 }, .{ 0x3036, 0x3037 }, .{ 0x303E, 0x303F }, .{ 0x3190, 0x3191 }, .{ 0x3196, 0x319F }, .{ 0x31C0, 0x31E3 }, .{ 0x3200, 0x321E }, .{ 0x322A, 0x3247 }, .{ 0x3250, 0x3250 }, .{ 0x3260, 0x327F }, .{ 0x328A, 0x32B0 }, .{ 0x32C0, 0x32FE }, .{ 0x3300, 0x33FF }, .{ 0x4DC0, 0x4DFF }, .{ 0xA490, 0xA4C6 }, .{ 0xA828, 0xA82B }, .{ 0xA836, 0xA837 }, .{ 0xA839, 0xA839 }, .{ 0xAA77, 0xAA79 }, .{ 0xFDFD, 0xFDFD }, .{ 0xFFE4, 0xFFE4 }, .{ 0xFFE8, 0xFFE8 }, .{ 0xFFED, 0xFFEE }, .{ 0xFFFC, 0xFFFD }, .{ 0x10137, 0x1013F }, .{ 0x10179, 0x10189 }, .{ 0x1018C, 0x1018E }, .{ 0x10190, 0x1019B }, .{ 0x101A0, 0x101A0 }, .{ 0x101D0, 0x101FC }, .{ 0x10877, 0x10878 }, .{ 0x10AC8, 0x10AC8 }, .{ 0x1173F, 0x1173F }, .{ 0x16B3C, 0x16B3F }, .{ 0x16B45, 0x16B45 }, .{ 0x1BC9C, 0x1BC9C }, .{ 0x1D000, 0x1D0F5 }, .{ 0x1D100, 0x1D126 }, .{ 0x1D129, 0x1D164 }, .{ 0x1D16A, 0x1D16C }, .{ 0x1D183, 0x1D184 }, .{ 0x1D18C, 0x1D1A9 }, .{ 0x1D1AE, 0x1D1E8 }, .{ 0x1D200, 0x1D241 }, .{ 0x1D245, 0x1D245 }, .{ 0x1D300, 0x1D356 }, .{ 0x1D800, 0x1D9FF }, .{ 0x1DA37, 0x1DA3A }, .{ 0x1DA6D, 0x1DA74 }, .{ 0x1DA76, 0x1DA83 }, .{ 0x1DA85, 0x1DA86 }, .{ 0x1ECAC, 0x1ECAC }, .{ 0x1F000, 0x1F02B }, .{ 0x1F030, 0x1F093 }, .{ 0x1F0A0, 0x1F0AE }, .{ 0x1F0B1, 0x1F0BF }, .{ 0x1F0C1, 0x1F0CF }, .{ 0x1F0D1, 0x1F0F5 }, .{ 0x1F110, 0x1F16B }, .{ 0x1F170, 0x1F1AC }, .{ 0x1F1E6, 0x1F202 }, .{ 0x1F210, 0x1F23B }, .{ 0x1F240, 0x1F248 }, .{ 0x1F250, 0x1F251 }, .{ 0x1F260, 0x1F265 }, .{ 0x1F300, 0x1F3FA }, .{ 0x1F400, 0x1F6D4 }, .{ 0x1F6E0, 0x1F6EC }, .{ 0x1F6F0, 0x1F6F9 }, .{ 0x1F700, 0x1F773 }, .{ 0x1F780, 0x1F7D8 }, .{ 0x1F800, 0x1F80B }, .{ 0x1F810, 0x1F847 }, .{ 0x1F850, 0x1F859 }, .{ 0x1F860, 0x1F887 }, .{ 0x1F890, 0x1F8AD }, .{ 0x1F900, 0x1F90B }, .{ 0x1F910, 0x1F93E }, .{ 0x1F940, 0x1F970 }, .{ 0x1F973, 0x1F976 }, .{ 0x1F97A, 0x1F97A }, .{ 0x1F97C, 0x1F9A2 }, .{ 0x1F9B0, 0x1F9B9 }, .{ 0x1F9C0, 0x1F9C2 }, .{ 0x1F9D0, 0x1F9FF }, .{ 0x1FA60, 0x1FA6D }, }; pub fn isSymbol(c: u21) bool { return rangeContains(&isSymbol_ranges, c); } // ── prefix data ── pub fn isPrefixChar(c: u21) bool { return switch (c) { '!'...'*' => true, ',' => true, ':'...'?' => true, '[' => true, ']' => true, '_'...'`' => true, '{' => true, '}' => true, 0x00A1 => true, 0x00A3 => true, 0x00A5 => true, 0x00A7 => true, 0x00AB => true, 0x00B4 => true, 0x00B7 => true, 0x00BB => true, 0x00BF => true, 0x060C => true, 0x061B => true, 0x061F => true, 0x066A => true, 0x06D4 => true, 0x0964 => true, 0x0E3F => true, 0x2013...0x2014 => true, 0x2018...0x201A => true, 0x201C...0x201E => true, 0x2026 => true, 0x20A0...0x20BF => true, 0x2329...0x232A => true, 0x27E6...0x27E7 => true, 0x3001...0x3002 => true, 0x3008...0x3011 => true, 0x3014...0x3015 => true, 0xFDFC => true, 0xFF01 => true, 0xFF08...0xFF09 => true, 0xFF0C => true, 0xFF1A...0xFF1B => true, 0xFF1F => true, 0xFF5E => true, else => false, }; } pub const prefix_multi_literals = [_][]const u8{ "US$", "\xe2\x80\xa6\xe2\x80\xa6", "C$", "A$", }; pub fn isPrefixUnlessDigit(c: u21) bool { return switch (c) { '+' => true, else => false, }; } // ── suffix data ── pub fn isSuffixChar(c: u21) bool { return switch (c) { '!'...'#' => true, '&'...'*' => true, ',' => true, ':'...'<' => true, '>'...'?' => true, '[' => true, ']' => true, '_'...'`' => true, '{' => true, '}' => true, 0x00A1 => true, 0x00AB => true, 0x00B4 => true, 0x00B7 => true, 0x00BB => true, 0x00BF => true, 0x060C => true, 0x061B => true, 0x061F => true, 0x066A => true, 0x06D4 => true, 0x0964 => true, 0x2013...0x2014 => true, 0x2018...0x201A => true, 0x201C...0x201E => true, 0x2026 => true, 0x2329...0x232A => true, 0x27E6...0x27E7 => true, 0x3001...0x3002 => true, 0x3008...0x3011 => true, 0x3014...0x3015 => true, 0xFF01 => true, 0xFF08...0xFF09 => true, 0xFF0C => true, 0xFF1A...0xFF1B => true, 0xFF1F => true, 0xFF5E => true, else => false, }; } pub const suffix_multi_literals = [_][]const u8{ "\xe2\x80\xa6\xe2\x80\xa6", "'s", "'S", "\xe2\x80\x99s", "\xe2\x80\x99S", }; // ── suffix lookbehind helpers ── const lookbehind_class_0_ranges = [_][2]u21{ .{ 0x0030, 0x0039 }, }; pub fn matchLookbehind0(c: u21) bool { return rangeContains(&lookbehind_class_0_ranges, c); } const lookbehind_class_1_ranges = [_][2]u21{ .{ 0x0043, 0x0043 }, .{ 0x0046, 0x0046 }, .{ 0x004B, 0x004B }, .{ 0x0063, 0x0063 }, .{ 0x0066, 0x0066 }, .{ 0x006B, 0x006B }, }; pub fn matchLookbehind1(c: u21) bool { return rangeContains(&lookbehind_class_1_ranges, c); } const lookbehind_class_2_ranges = [_][2]u21{ .{ 0x0021, 0x0023 }, .{ 0x0025, 0x002D }, .{ 0x0030, 0x003C }, .{ 0x003E, 0x003F }, .{ 0x005B, 0x005B }, .{ 0x005D, 0x005D }, .{ 0x005F, 0x007D }, .{ 0x00A1, 0x00A1 }, .{ 0x00AB, 0x00AB }, .{ 0x00B2, 0x00B2 }, .{ 0x00B4, 0x00B4 }, .{ 0x00B7, 0x00B7 }, .{ 0x00BB, 0x00BB }, .{ 0x00BF, 0x00BF }, .{ 0x00DF, 0x00F6 }, .{ 0x00F8, 0x00FF }, .{ 0x0101, 0x0101 }, .{ 0x0103, 0x0103 }, .{ 0x0105, 0x0105 }, .{ 0x0107, 0x0107 }, .{ 0x0109, 0x0109 }, .{ 0x010B, 0x010B }, .{ 0x010D, 0x010D }, .{ 0x010F, 0x010F }, .{ 0x0111, 0x0111 }, .{ 0x0113, 0x0113 }, .{ 0x0115, 0x0115 }, .{ 0x0117, 0x0117 }, .{ 0x0119, 0x0119 }, .{ 0x011B, 0x011B }, .{ 0x011D, 0x011D }, .{ 0x011F, 0x011F }, .{ 0x0121, 0x0121 }, .{ 0x0123, 0x0123 }, .{ 0x0125, 0x0125 }, .{ 0x0127, 0x0127 }, .{ 0x0129, 0x0129 }, .{ 0x012B, 0x012B }, .{ 0x012D, 0x012D }, .{ 0x012F, 0x012F }, .{ 0x0131, 0x0131 }, .{ 0x0133, 0x0133 }, .{ 0x0135, 0x0135 }, .{ 0x0137, 0x0138 }, .{ 0x013A, 0x013A }, .{ 0x013C, 0x013C }, .{ 0x013E, 0x013E }, .{ 0x0140, 0x0140 }, .{ 0x0142, 0x0142 }, .{ 0x0144, 0x0144 }, .{ 0x0146, 0x0146 }, .{ 0x0148, 0x0149 }, .{ 0x014B, 0x014B }, .{ 0x014D, 0x014D }, .{ 0x014F, 0x014F }, .{ 0x0151, 0x0151 }, .{ 0x0153, 0x0153 }, .{ 0x0155, 0x0155 }, .{ 0x0157, 0x0157 }, .{ 0x0159, 0x0159 }, .{ 0x015B, 0x015B }, .{ 0x015D, 0x015D }, .{ 0x015F, 0x015F }, .{ 0x0161, 0x0161 }, .{ 0x0163, 0x0163 }, .{ 0x0165, 0x0165 }, .{ 0x0167, 0x0167 }, .{ 0x0169, 0x0169 }, .{ 0x016B, 0x016B }, .{ 0x016D, 0x016D }, .{ 0x016F, 0x016F }, .{ 0x0171, 0x0171 }, .{ 0x0173, 0x0173 }, .{ 0x0175, 0x0175 }, .{ 0x0177, 0x0177 }, .{ 0x017A, 0x017A }, .{ 0x017C, 0x017C }, .{ 0x017E, 0x0180 }, .{ 0x0183, 0x0183 }, .{ 0x0185, 0x0185 }, .{ 0x0188, 0x0188 }, .{ 0x018C, 0x018D }, .{ 0x0192, 0x0192 }, .{ 0x0195, 0x0195 }, .{ 0x0199, 0x019B }, .{ 0x019E, 0x019E }, .{ 0x01A1, 0x01A1 }, .{ 0x01A3, 0x01A3 }, .{ 0x01A5, 0x01A5 }, .{ 0x01A8, 0x01A8 }, .{ 0x01AA, 0x01AB }, .{ 0x01AD, 0x01AD }, .{ 0x01B0, 0x01B0 }, .{ 0x01B4, 0x01B4 }, .{ 0x01B6, 0x01B6 }, .{ 0x01B9, 0x01BA }, .{ 0x01BD, 0x01BF }, .{ 0x01C6, 0x01C6 }, .{ 0x01C9, 0x01C9 }, .{ 0x01CC, 0x01CC }, .{ 0x01CE, 0x01CE }, .{ 0x01D0, 0x01D0 }, .{ 0x01D2, 0x01D2 }, .{ 0x01D4, 0x01D4 }, .{ 0x01D6, 0x01D6 }, .{ 0x01D8, 0x01D8 }, .{ 0x01DA, 0x01DA }, .{ 0x01DC, 0x01DD }, .{ 0x01DF, 0x01DF }, .{ 0x01E1, 0x01E1 }, .{ 0x01E3, 0x01E3 }, .{ 0x01E5, 0x01E5 }, .{ 0x01E7, 0x01E7 }, .{ 0x01E9, 0x01E9 }, .{ 0x01EB, 0x01EB }, .{ 0x01ED, 0x01ED }, .{ 0x01EF, 0x01F0 }, .{ 0x01F3, 0x01F3 }, .{ 0x01F5, 0x01F5 }, .{ 0x01F9, 0x01F9 }, .{ 0x01FB, 0x01FB }, .{ 0x01FD, 0x01FD }, .{ 0x01FF, 0x01FF }, .{ 0x0201, 0x0201 }, .{ 0x0203, 0x0203 }, .{ 0x0205, 0x0205 }, .{ 0x0207, 0x0207 }, .{ 0x0209, 0x0209 }, .{ 0x020B, 0x020B }, .{ 0x020D, 0x020D }, .{ 0x020F, 0x020F }, .{ 0x0211, 0x0211 }, .{ 0x0213, 0x0213 }, .{ 0x0215, 0x0215 }, .{ 0x0217, 0x0217 }, .{ 0x0219, 0x0219 }, .{ 0x021B, 0x021B }, .{ 0x021D, 0x021D }, .{ 0x021F, 0x021F }, .{ 0x0221, 0x0221 }, .{ 0x0223, 0x0223 }, .{ 0x0225, 0x0225 }, .{ 0x0227, 0x0227 }, .{ 0x0229, 0x0229 }, .{ 0x022B, 0x022B }, .{ 0x022D, 0x022D }, .{ 0x022F, 0x022F }, .{ 0x0231, 0x0231 }, .{ 0x0233, 0x0239 }, .{ 0x023C, 0x023C }, .{ 0x023F, 0x0240 }, .{ 0x0242, 0x0242 }, .{ 0x0247, 0x0247 }, .{ 0x0249, 0x0249 }, .{ 0x024B, 0x024B }, .{ 0x024D, 0x024D }, .{ 0x024F, 0x02AF }, .{ 0x03AC, 0x03AF }, .{ 0x03B1, 0x03C9 }, .{ 0x03CC, 0x03CE }, .{ 0x0430, 0x0451 }, .{ 0x0453, 0x045A }, .{ 0x045C, 0x045D }, .{ 0x0491, 0x0491 }, .{ 0x0497, 0x0497 }, .{ 0x04A3, 0x04A3 }, .{ 0x04AF, 0x04AF }, .{ 0x04BB, 0x04BB }, .{ 0x04D9, 0x04D9 }, .{ 0x04E9, 0x04E9 }, .{ 0x0591, 0x05F4 }, .{ 0x060C, 0x060C }, .{ 0x061B, 0x061B }, .{ 0x061F, 0x064A }, .{ 0x066A, 0x066A }, .{ 0x066E, 0x06D5 }, .{ 0x06E5, 0x06FF }, .{ 0x0750, 0x077F }, .{ 0x08A0, 0x08BD }, .{ 0x0900, 0x09FF }, .{ 0x0B80, 0x0CFF }, .{ 0x0D80, 0x0DFF }, .{ 0x1100, 0x137F }, .{ 0x1D00, 0x1D25 }, .{ 0x1D6B, 0x1D77 }, .{ 0x1D79, 0x1D9A }, .{ 0x1E01, 0x1E01 }, .{ 0x1E03, 0x1E03 }, .{ 0x1E05, 0x1E05 }, .{ 0x1E07, 0x1E07 }, .{ 0x1E09, 0x1E09 }, .{ 0x1E0B, 0x1E0B }, .{ 0x1E0D, 0x1E0D }, .{ 0x1E0F, 0x1E0F }, .{ 0x1E11, 0x1E11 }, .{ 0x1E13, 0x1E13 }, .{ 0x1E15, 0x1E15 }, .{ 0x1E17, 0x1E17 }, .{ 0x1E19, 0x1E19 }, .{ 0x1E1B, 0x1E1B }, .{ 0x1E1D, 0x1E1D }, .{ 0x1E1F, 0x1E1F }, .{ 0x1E21, 0x1E21 }, .{ 0x1E23, 0x1E23 }, .{ 0x1E25, 0x1E25 }, .{ 0x1E27, 0x1E27 }, .{ 0x1E29, 0x1E29 }, .{ 0x1E2B, 0x1E2B }, .{ 0x1E2D, 0x1E2D }, .{ 0x1E2F, 0x1E2F }, .{ 0x1E31, 0x1E31 }, .{ 0x1E33, 0x1E33 }, .{ 0x1E35, 0x1E35 }, .{ 0x1E37, 0x1E37 }, .{ 0x1E39, 0x1E39 }, .{ 0x1E3B, 0x1E3B }, .{ 0x1E3D, 0x1E3D }, .{ 0x1E3F, 0x1E3F }, .{ 0x1E41, 0x1E41 }, .{ 0x1E43, 0x1E43 }, .{ 0x1E45, 0x1E45 }, .{ 0x1E47, 0x1E47 }, .{ 0x1E49, 0x1E49 }, .{ 0x1E4B, 0x1E4B }, .{ 0x1E4D, 0x1E4D }, .{ 0x1E4F, 0x1E4F }, .{ 0x1E51, 0x1E51 }, .{ 0x1E53, 0x1E53 }, .{ 0x1E55, 0x1E55 }, .{ 0x1E57, 0x1E57 }, .{ 0x1E59, 0x1E59 }, .{ 0x1E5B, 0x1E5B }, .{ 0x1E5D, 0x1E5D }, .{ 0x1E5F, 0x1E5F }, .{ 0x1E61, 0x1E61 }, .{ 0x1E63, 0x1E63 }, .{ 0x1E65, 0x1E65 }, .{ 0x1E67, 0x1E67 }, .{ 0x1E69, 0x1E69 }, .{ 0x1E6B, 0x1E6B }, .{ 0x1E6D, 0x1E6D }, .{ 0x1E6F, 0x1E6F }, .{ 0x1E71, 0x1E71 }, .{ 0x1E73, 0x1E73 }, .{ 0x1E75, 0x1E75 }, .{ 0x1E77, 0x1E77 }, .{ 0x1E79, 0x1E79 }, .{ 0x1E7B, 0x1E7B }, .{ 0x1E7D, 0x1E7D }, .{ 0x1E7F, 0x1E7F }, .{ 0x1E81, 0x1E81 }, .{ 0x1E83, 0x1E83 }, .{ 0x1E85, 0x1E85 }, .{ 0x1E87, 0x1E87 }, .{ 0x1E89, 0x1E89 }, .{ 0x1E8B, 0x1E8B }, .{ 0x1E8D, 0x1E8D }, .{ 0x1E8F, 0x1E8F }, .{ 0x1E91, 0x1E91 }, .{ 0x1E93, 0x1E93 }, .{ 0x1E95, 0x1E9D }, .{ 0x1E9F, 0x1E9F }, .{ 0x1EA1, 0x1EA1 }, .{ 0x1EA3, 0x1EA3 }, .{ 0x1EA5, 0x1EA5 }, .{ 0x1EA7, 0x1EA7 }, .{ 0x1EA9, 0x1EA9 }, .{ 0x1EAB, 0x1EAB }, .{ 0x1EAD, 0x1EAD }, .{ 0x1EAF, 0x1EAF }, .{ 0x1EB1, 0x1EB1 }, .{ 0x1EB3, 0x1EB3 }, .{ 0x1EB5, 0x1EB5 }, .{ 0x1EB7, 0x1EB7 }, .{ 0x1EB9, 0x1EB9 }, .{ 0x1EBB, 0x1EBB }, .{ 0x1EBD, 0x1EBD }, .{ 0x1EBF, 0x1EBF }, .{ 0x1EC1, 0x1EC1 }, .{ 0x1EC3, 0x1EC3 }, .{ 0x1EC5, 0x1EC5 }, .{ 0x1EC7, 0x1EC7 }, .{ 0x1EC9, 0x1EC9 }, .{ 0x1ECB, 0x1ECB }, .{ 0x1ECD, 0x1ECD }, .{ 0x1ECF, 0x1ECF }, .{ 0x1ED1, 0x1ED1 }, .{ 0x1ED3, 0x1ED3 }, .{ 0x1ED5, 0x1ED5 }, .{ 0x1ED7, 0x1ED7 }, .{ 0x1ED9, 0x1ED9 }, .{ 0x1EDB, 0x1EDB }, .{ 0x1EDD, 0x1EDD }, .{ 0x1EDF, 0x1EDF }, .{ 0x1EE1, 0x1EE1 }, .{ 0x1EE3, 0x1EE3 }, .{ 0x1EE5, 0x1EE5 }, .{ 0x1EE7, 0x1EE7 }, .{ 0x1EE9, 0x1EE9 }, .{ 0x1EEB, 0x1EEB }, .{ 0x1EED, 0x1EED }, .{ 0x1EEF, 0x1EEF }, .{ 0x1EF1, 0x1EF1 }, .{ 0x1EF3, 0x1EF3 }, .{ 0x1EF5, 0x1EF5 }, .{ 0x1EF7, 0x1EF7 }, .{ 0x1EF9, 0x1EF9 }, .{ 0x1EFB, 0x1EFB }, .{ 0x1EFD, 0x1EFD }, .{ 0x1EFF, 0x1EFF }, .{ 0x2018, 0x201A }, .{ 0x201C, 0x201E }, .{ 0x2026, 0x2026 }, .{ 0x2329, 0x232A }, .{ 0x27E6, 0x27E7 }, .{ 0x2C61, 0x2C61 }, .{ 0x2C65, 0x2C66 }, .{ 0x2C68, 0x2C68 }, .{ 0x2C6A, 0x2C6A }, .{ 0x2C6C, 0x2C6C }, .{ 0x2C71, 0x2C71 }, .{ 0x2C73, 0x2C74 }, .{ 0x2C76, 0x2C7B }, .{ 0x2E80, 0x2FDF }, .{ 0x2FF0, 0x30FF }, .{ 0x31C0, 0x31EF }, .{ 0x3200, 0x4DBF }, .{ 0x4E00, 0x9FFF }, .{ 0xA723, 0xA723 }, .{ 0xA725, 0xA725 }, .{ 0xA727, 0xA727 }, .{ 0xA729, 0xA729 }, .{ 0xA72B, 0xA72B }, .{ 0xA72D, 0xA72D }, .{ 0xA72F, 0xA731 }, .{ 0xA733, 0xA733 }, .{ 0xA735, 0xA735 }, .{ 0xA737, 0xA737 }, .{ 0xA739, 0xA739 }, .{ 0xA73B, 0xA73B }, .{ 0xA73D, 0xA73D }, .{ 0xA73F, 0xA73F }, .{ 0xA741, 0xA741 }, .{ 0xA743, 0xA743 }, .{ 0xA745, 0xA745 }, .{ 0xA747, 0xA747 }, .{ 0xA749, 0xA749 }, .{ 0xA74B, 0xA74B }, .{ 0xA74D, 0xA74D }, .{ 0xA74F, 0xA74F }, .{ 0xA751, 0xA751 }, .{ 0xA753, 0xA753 }, .{ 0xA755, 0xA755 }, .{ 0xA757, 0xA757 }, .{ 0xA759, 0xA759 }, .{ 0xA75B, 0xA75B }, .{ 0xA75D, 0xA75D }, .{ 0xA75F, 0xA75F }, .{ 0xA761, 0xA761 }, .{ 0xA763, 0xA763 }, .{ 0xA765, 0xA765 }, .{ 0xA767, 0xA767 }, .{ 0xA769, 0xA769 }, .{ 0xA76B, 0xA76B }, .{ 0xA76D, 0xA76D }, .{ 0xA76F, 0xA76F }, .{ 0xA771, 0xA778 }, .{ 0xA77A, 0xA77A }, .{ 0xA77C, 0xA77C }, .{ 0xA77F, 0xA77F }, .{ 0xA781, 0xA781 }, .{ 0xA783, 0xA783 }, .{ 0xA785, 0xA785 }, .{ 0xA787, 0xA787 }, .{ 0xA78C, 0xA78C }, .{ 0xA78E, 0xA78E }, .{ 0xA791, 0xA791 }, .{ 0xA793, 0xA795 }, .{ 0xA797, 0xA797 }, .{ 0xA799, 0xA799 }, .{ 0xA79B, 0xA79B }, .{ 0xA79D, 0xA79D }, .{ 0xA79F, 0xA79F }, .{ 0xA7A1, 0xA7A1 }, .{ 0xA7A3, 0xA7A3 }, .{ 0xA7A5, 0xA7A5 }, .{ 0xA7A7, 0xA7A7 }, .{ 0xA7A9, 0xA7A9 }, .{ 0xA7AF, 0xA7AF }, .{ 0xA7B5, 0xA7B5 }, .{ 0xA7B7, 0xA7B7 }, .{ 0xA7B9, 0xA7B9 }, .{ 0xA7FA, 0xA7FA }, .{ 0xAB30, 0xAB5A }, .{ 0xAB60, 0xAB64 }, .{ 0xAC00, 0xD7AF }, .{ 0xF900, 0xFAFF }, .{ 0xFB1D, 0xFBB1 }, .{ 0xFBD3, 0xFD3D }, .{ 0xFD50, 0xFDC7 }, .{ 0xFDF0, 0xFDFB }, .{ 0xFE30, 0xFE4F }, .{ 0xFE70, 0xFEFC }, .{ 0xFF01, 0xFF01 }, .{ 0xFF08, 0xFF09 }, .{ 0xFF0C, 0xFF0C }, .{ 0xFF1A, 0xFF1B }, .{ 0xFF1F, 0xFF1F }, .{ 0xFF41, 0xFF5A }, .{ 0xFF5E, 0xFF5E }, .{ 0x1EE00, 0x1EEBB }, .{ 0x1F200, 0x1F2FF }, .{ 0x20000, 0x2A6DF }, .{ 0x2A700, 0x2EBEF }, .{ 0x2F800, 0x2FA1F }, }; pub fn matchLookbehind2(c: u21) bool { return rangeContains(&lookbehind_class_2_ranges, c); } const lookbehind_class_3_ranges = [_][2]u21{ .{ 0x0041, 0x005A }, .{ 0x00C0, 0x00D6 }, .{ 0x00D8, 0x00DE }, .{ 0x0100, 0x0100 }, .{ 0x0102, 0x0102 }, .{ 0x0104, 0x0104 }, .{ 0x0106, 0x0106 }, .{ 0x0108, 0x0108 }, .{ 0x010A, 0x010A }, .{ 0x010C, 0x010C }, .{ 0x010E, 0x010E }, .{ 0x0110, 0x0110 }, .{ 0x0112, 0x0112 }, .{ 0x0114, 0x0114 }, .{ 0x0116, 0x0116 }, .{ 0x0118, 0x0118 }, .{ 0x011A, 0x011A }, .{ 0x011C, 0x011C }, .{ 0x011E, 0x011E }, .{ 0x0120, 0x0120 }, .{ 0x0122, 0x0122 }, .{ 0x0124, 0x0124 }, .{ 0x0126, 0x0126 }, .{ 0x0128, 0x0128 }, .{ 0x012A, 0x012A }, .{ 0x012C, 0x012C }, .{ 0x012E, 0x012E }, .{ 0x0130, 0x0130 }, .{ 0x0132, 0x0132 }, .{ 0x0134, 0x0134 }, .{ 0x0136, 0x0136 }, .{ 0x0139, 0x0139 }, .{ 0x013B, 0x013B }, .{ 0x013D, 0x013D }, .{ 0x013F, 0x013F }, .{ 0x0141, 0x0141 }, .{ 0x0143, 0x0143 }, .{ 0x0145, 0x0145 }, .{ 0x0147, 0x0147 }, .{ 0x014A, 0x014A }, .{ 0x014C, 0x014C }, .{ 0x014E, 0x014E }, .{ 0x0150, 0x0150 }, .{ 0x0152, 0x0152 }, .{ 0x0154, 0x0154 }, .{ 0x0156, 0x0156 }, .{ 0x0158, 0x0158 }, .{ 0x015A, 0x015A }, .{ 0x015C, 0x015C }, .{ 0x015E, 0x015E }, .{ 0x0160, 0x0160 }, .{ 0x0162, 0x0162 }, .{ 0x0164, 0x0164 }, .{ 0x0166, 0x0166 }, .{ 0x0168, 0x0168 }, .{ 0x016A, 0x016A }, .{ 0x016C, 0x016C }, .{ 0x016E, 0x016E }, .{ 0x0170, 0x0170 }, .{ 0x0172, 0x0172 }, .{ 0x0174, 0x0174 }, .{ 0x0176, 0x0176 }, .{ 0x0178, 0x0179 }, .{ 0x017B, 0x017B }, .{ 0x017D, 0x017D }, .{ 0x0181, 0x0182 }, .{ 0x0184, 0x0184 }, .{ 0x0186, 0x0187 }, .{ 0x0189, 0x018B }, .{ 0x018E, 0x0191 }, .{ 0x0193, 0x0194 }, .{ 0x0196, 0x0198 }, .{ 0x019C, 0x019D }, .{ 0x019F, 0x01A0 }, .{ 0x01A2, 0x01A2 }, .{ 0x01A4, 0x01A4 }, .{ 0x01A6, 0x01A7 }, .{ 0x01A9, 0x01A9 }, .{ 0x01AC, 0x01AC }, .{ 0x01AE, 0x01AF }, .{ 0x01B1, 0x01B3 }, .{ 0x01B5, 0x01B5 }, .{ 0x01B7, 0x01B8 }, .{ 0x01BC, 0x01BC }, .{ 0x01C4, 0x01C4 }, .{ 0x01C7, 0x01C7 }, .{ 0x01CA, 0x01CA }, .{ 0x01CD, 0x01CD }, .{ 0x01CF, 0x01CF }, .{ 0x01D1, 0x01D1 }, .{ 0x01D3, 0x01D3 }, .{ 0x01D5, 0x01D5 }, .{ 0x01D7, 0x01D7 }, .{ 0x01D9, 0x01D9 }, .{ 0x01DB, 0x01DB }, .{ 0x01DE, 0x01DE }, .{ 0x01E0, 0x01E0 }, .{ 0x01E2, 0x01E2 }, .{ 0x01E4, 0x01E4 }, .{ 0x01E6, 0x01E6 }, .{ 0x01E8, 0x01E8 }, .{ 0x01EA, 0x01EA }, .{ 0x01EC, 0x01EC }, .{ 0x01EE, 0x01EE }, .{ 0x01F1, 0x01F1 }, .{ 0x01F4, 0x01F4 }, .{ 0x01F6, 0x01F8 }, .{ 0x01FA, 0x01FA }, .{ 0x01FC, 0x01FC }, .{ 0x01FE, 0x01FE }, .{ 0x0200, 0x0200 }, .{ 0x0202, 0x0202 }, .{ 0x0204, 0x0204 }, .{ 0x0206, 0x0206 }, .{ 0x0208, 0x0208 }, .{ 0x020A, 0x020A }, .{ 0x020C, 0x020C }, .{ 0x020E, 0x020E }, .{ 0x0210, 0x0210 }, .{ 0x0212, 0x0212 }, .{ 0x0214, 0x0214 }, .{ 0x0216, 0x0216 }, .{ 0x0218, 0x0218 }, .{ 0x021A, 0x021A }, .{ 0x021C, 0x021C }, .{ 0x021E, 0x021E }, .{ 0x0220, 0x0220 }, .{ 0x0222, 0x0222 }, .{ 0x0224, 0x0224 }, .{ 0x0226, 0x0226 }, .{ 0x0228, 0x0228 }, .{ 0x022A, 0x022A }, .{ 0x022C, 0x022C }, .{ 0x022E, 0x022E }, .{ 0x0230, 0x0230 }, .{ 0x0232, 0x0232 }, .{ 0x023A, 0x023B }, .{ 0x023D, 0x023E }, .{ 0x0241, 0x0241 }, .{ 0x0243, 0x0246 }, .{ 0x0248, 0x0248 }, .{ 0x024A, 0x024A }, .{ 0x024C, 0x024C }, .{ 0x024E, 0x024E }, .{ 0x0386, 0x0386 }, .{ 0x0388, 0x038A }, .{ 0x038C, 0x038C }, .{ 0x038E, 0x038F }, .{ 0x0391, 0x03A9 }, .{ 0x0400, 0x0401 }, .{ 0x0403, 0x040A }, .{ 0x040C, 0x040D }, .{ 0x0410, 0x042F }, .{ 0x0490, 0x0490 }, .{ 0x0496, 0x0496 }, .{ 0x04A2, 0x04A2 }, .{ 0x04AE, 0x04AE }, .{ 0x04BA, 0x04BA }, .{ 0x04D8, 0x04D8 }, .{ 0x04E8, 0x04E8 }, .{ 0x0591, 0x05F4 }, .{ 0x0620, 0x064A }, .{ 0x066E, 0x06D5 }, .{ 0x06E5, 0x06FF }, .{ 0x0750, 0x077F }, .{ 0x08A0, 0x08BD }, .{ 0x0900, 0x09FF }, .{ 0x0B80, 0x0CFF }, .{ 0x0D80, 0x0DFF }, .{ 0x1100, 0x137F }, .{ 0x1E00, 0x1E00 }, .{ 0x1E02, 0x1E02 }, .{ 0x1E04, 0x1E04 }, .{ 0x1E06, 0x1E06 }, .{ 0x1E08, 0x1E08 }, .{ 0x1E0A, 0x1E0A }, .{ 0x1E0C, 0x1E0C }, .{ 0x1E0E, 0x1E0E }, .{ 0x1E10, 0x1E10 }, .{ 0x1E12, 0x1E12 }, .{ 0x1E14, 0x1E14 }, .{ 0x1E16, 0x1E16 }, .{ 0x1E18, 0x1E18 }, .{ 0x1E1A, 0x1E1A }, .{ 0x1E1C, 0x1E1C }, .{ 0x1E1E, 0x1E1E }, .{ 0x1E20, 0x1E20 }, .{ 0x1E22, 0x1E22 }, .{ 0x1E24, 0x1E24 }, .{ 0x1E26, 0x1E26 }, .{ 0x1E28, 0x1E28 }, .{ 0x1E2A, 0x1E2A }, .{ 0x1E2C, 0x1E2C }, .{ 0x1E2E, 0x1E2E }, .{ 0x1E30, 0x1E30 }, .{ 0x1E32, 0x1E32 }, .{ 0x1E34, 0x1E34 }, .{ 0x1E36, 0x1E36 }, .{ 0x1E38, 0x1E38 }, .{ 0x1E3A, 0x1E3A }, .{ 0x1E3C, 0x1E3C }, .{ 0x1E3E, 0x1E3E }, .{ 0x1E40, 0x1E40 }, .{ 0x1E42, 0x1E42 }, .{ 0x1E44, 0x1E44 }, .{ 0x1E46, 0x1E46 }, .{ 0x1E48, 0x1E48 }, .{ 0x1E4A, 0x1E4A }, .{ 0x1E4C, 0x1E4C }, .{ 0x1E4E, 0x1E4E }, .{ 0x1E50, 0x1E50 }, .{ 0x1E52, 0x1E52 }, .{ 0x1E54, 0x1E54 }, .{ 0x1E56, 0x1E56 }, .{ 0x1E58, 0x1E58 }, .{ 0x1E5A, 0x1E5A }, .{ 0x1E5C, 0x1E5C }, .{ 0x1E5E, 0x1E5E }, .{ 0x1E60, 0x1E60 }, .{ 0x1E62, 0x1E62 }, .{ 0x1E64, 0x1E64 }, .{ 0x1E66, 0x1E66 }, .{ 0x1E68, 0x1E68 }, .{ 0x1E6A, 0x1E6A }, .{ 0x1E6C, 0x1E6C }, .{ 0x1E6E, 0x1E6E }, .{ 0x1E70, 0x1E70 }, .{ 0x1E72, 0x1E72 }, .{ 0x1E74, 0x1E74 }, .{ 0x1E76, 0x1E76 }, .{ 0x1E78, 0x1E78 }, .{ 0x1E7A, 0x1E7A }, .{ 0x1E7C, 0x1E7C }, .{ 0x1E7E, 0x1E7E }, .{ 0x1E80, 0x1E80 }, .{ 0x1E82, 0x1E82 }, .{ 0x1E84, 0x1E84 }, .{ 0x1E86, 0x1E86 }, .{ 0x1E88, 0x1E88 }, .{ 0x1E8A, 0x1E8A }, .{ 0x1E8C, 0x1E8C }, .{ 0x1E8E, 0x1E8E }, .{ 0x1E90, 0x1E90 }, .{ 0x1E92, 0x1E92 }, .{ 0x1E94, 0x1E94 }, .{ 0x1E9E, 0x1E9E }, .{ 0x1EA0, 0x1EA0 }, .{ 0x1EA2, 0x1EA2 }, .{ 0x1EA4, 0x1EA4 }, .{ 0x1EA6, 0x1EA6 }, .{ 0x1EA8, 0x1EA8 }, .{ 0x1EAA, 0x1EAA }, .{ 0x1EAC, 0x1EAC }, .{ 0x1EAE, 0x1EAE }, .{ 0x1EB0, 0x1EB0 }, .{ 0x1EB2, 0x1EB2 }, .{ 0x1EB4, 0x1EB4 }, .{ 0x1EB6, 0x1EB6 }, .{ 0x1EB8, 0x1EB8 }, .{ 0x1EBA, 0x1EBA }, .{ 0x1EBC, 0x1EBC }, .{ 0x1EBE, 0x1EBE }, .{ 0x1EC0, 0x1EC0 }, .{ 0x1EC2, 0x1EC2 }, .{ 0x1EC4, 0x1EC4 }, .{ 0x1EC6, 0x1EC6 }, .{ 0x1EC8, 0x1EC8 }, .{ 0x1ECA, 0x1ECA }, .{ 0x1ECC, 0x1ECC }, .{ 0x1ECE, 0x1ECE }, .{ 0x1ED0, 0x1ED0 }, .{ 0x1ED2, 0x1ED2 }, .{ 0x1ED4, 0x1ED4 }, .{ 0x1ED6, 0x1ED6 }, .{ 0x1ED8, 0x1ED8 }, .{ 0x1EDA, 0x1EDA }, .{ 0x1EDC, 0x1EDC }, .{ 0x1EDE, 0x1EDE }, .{ 0x1EE0, 0x1EE0 }, .{ 0x1EE2, 0x1EE2 }, .{ 0x1EE4, 0x1EE4 }, .{ 0x1EE6, 0x1EE6 }, .{ 0x1EE8, 0x1EE8 }, .{ 0x1EEA, 0x1EEA }, .{ 0x1EEC, 0x1EEC }, .{ 0x1EEE, 0x1EEE }, .{ 0x1EF0, 0x1EF0 }, .{ 0x1EF2, 0x1EF2 }, .{ 0x1EF4, 0x1EF4 }, .{ 0x1EF6, 0x1EF6 }, .{ 0x1EF8, 0x1EF8 }, .{ 0x1EFA, 0x1EFA }, .{ 0x1EFC, 0x1EFC }, .{ 0x1EFE, 0x1EFE }, .{ 0x2C60, 0x2C60 }, .{ 0x2C62, 0x2C64 }, .{ 0x2C67, 0x2C67 }, .{ 0x2C69, 0x2C69 }, .{ 0x2C6B, 0x2C6B }, .{ 0x2C6D, 0x2C70 }, .{ 0x2C72, 0x2C72 }, .{ 0x2C75, 0x2C75 }, .{ 0x2C7E, 0x2C7F }, .{ 0x2E80, 0x2FDF }, .{ 0x2FF0, 0x30FF }, .{ 0x31C0, 0x31EF }, .{ 0x3200, 0x4DBF }, .{ 0x4E00, 0x9FFF }, .{ 0xA722, 0xA722 }, .{ 0xA724, 0xA724 }, .{ 0xA726, 0xA726 }, .{ 0xA728, 0xA728 }, .{ 0xA72A, 0xA72A }, .{ 0xA72C, 0xA72C }, .{ 0xA72E, 0xA72E }, .{ 0xA732, 0xA732 }, .{ 0xA734, 0xA734 }, .{ 0xA736, 0xA736 }, .{ 0xA738, 0xA738 }, .{ 0xA73A, 0xA73A }, .{ 0xA73C, 0xA73C }, .{ 0xA73E, 0xA73E }, .{ 0xA740, 0xA740 }, .{ 0xA742, 0xA742 }, .{ 0xA744, 0xA744 }, .{ 0xA746, 0xA746 }, .{ 0xA748, 0xA748 }, .{ 0xA74A, 0xA74A }, .{ 0xA74C, 0xA74C }, .{ 0xA74E, 0xA74E }, .{ 0xA750, 0xA750 }, .{ 0xA752, 0xA752 }, .{ 0xA754, 0xA754 }, .{ 0xA756, 0xA756 }, .{ 0xA758, 0xA758 }, .{ 0xA75A, 0xA75A }, .{ 0xA75C, 0xA75C }, .{ 0xA75E, 0xA75E }, .{ 0xA760, 0xA760 }, .{ 0xA762, 0xA762 }, .{ 0xA764, 0xA764 }, .{ 0xA766, 0xA766 }, .{ 0xA768, 0xA768 }, .{ 0xA76A, 0xA76A }, .{ 0xA76C, 0xA76C }, .{ 0xA76E, 0xA76E }, .{ 0xA779, 0xA779 }, .{ 0xA77B, 0xA77B }, .{ 0xA77D, 0xA77E }, .{ 0xA780, 0xA780 }, .{ 0xA782, 0xA782 }, .{ 0xA784, 0xA784 }, .{ 0xA786, 0xA786 }, .{ 0xA78B, 0xA78B }, .{ 0xA78D, 0xA78D }, .{ 0xA790, 0xA790 }, .{ 0xA792, 0xA792 }, .{ 0xA796, 0xA796 }, .{ 0xA798, 0xA798 }, .{ 0xA79A, 0xA79A }, .{ 0xA79C, 0xA79C }, .{ 0xA79E, 0xA79E }, .{ 0xA7A0, 0xA7A0 }, .{ 0xA7A2, 0xA7A2 }, .{ 0xA7A4, 0xA7A4 }, .{ 0xA7A6, 0xA7A6 }, .{ 0xA7A8, 0xA7A8 }, .{ 0xA7AA, 0xA7AE }, .{ 0xA7B0, 0xA7B4 }, .{ 0xA7B6, 0xA7B6 }, .{ 0xA7B8, 0xA7B8 }, .{ 0xAC00, 0xD7AF }, .{ 0xF900, 0xFAFF }, .{ 0xFB1D, 0xFBB1 }, .{ 0xFBD3, 0xFD3D }, .{ 0xFD50, 0xFDC7 }, .{ 0xFDF0, 0xFDFB }, .{ 0xFE30, 0xFE4F }, .{ 0xFE70, 0xFEFC }, .{ 0xFF21, 0xFF3A }, .{ 0x1EE00, 0x1EEBB }, .{ 0x1F200, 0x1F2FF }, .{ 0x20000, 0x2A6DF }, .{ 0x2A700, 0x2EBEF }, .{ 0x2F800, 0x2FA1F }, }; pub fn matchLookbehind3(c: u21) bool { return rangeContains(&lookbehind_class_3_ranges, c); } // ── suffix lookbehind rules ── // these are checked by tokenizer.zig matchSuffix() // format: for each rule, check behind condition then try suffix literal(s) pub fn matchSuffixLookbehind(text: []const u8) usize { if (text.len < 2) return 0; if (std.mem.endsWith(u8, text, "+") and text.len > 1) { const before = lastCodepoint(text[0 .. text.len - 1]); if (before != null and matchLookbehind0(before.?.value)) return 1; } if (std.mem.endsWith(u8, text, ".") and text.len > 1) { const b1 = lastCodepoint(text[0 .. text.len - 1]); if (b1) |bp1| { const b2 = lastCodepoint(text[0 .. text.len - 1 - bp1.len]); if (matchLookbehind1(bp1.value)) { if (b2) |b2p| { if (b2p.value == 0x00B0) return 1; } } } } if (std.mem.endsWith(u8, text, "\xe2\x82\xac") and text.len > 3) { const before = lastCodepoint(text[0 .. text.len - 3]); if (before != null and matchLookbehind0(before.?.value)) return 3; } if (std.mem.endsWith(u8, text, "\xe0\xb8\xbf") and text.len > 3) { const before = lastCodepoint(text[0 .. text.len - 3]); if (before != null and matchLookbehind0(before.?.value)) return 3; } if (std.mem.endsWith(u8, text, "US$") and text.len > 3) { const before = lastCodepoint(text[0 .. text.len - 3]); if (before != null and matchLookbehind0(before.?.value)) return 3; } if (std.mem.endsWith(u8, text, "\xe2\x82\xbd") and text.len > 3) { const before = lastCodepoint(text[0 .. text.len - 3]); if (before != null and matchLookbehind0(before.?.value)) return 3; } if (std.mem.endsWith(u8, text, "\xef\xb7\xbc") and text.len > 3) { const before = lastCodepoint(text[0 .. text.len - 3]); if (before != null and matchLookbehind0(before.?.value)) return 3; } if (std.mem.endsWith(u8, text, "\xe2\x82\xb4") and text.len > 3) { const before = lastCodepoint(text[0 .. text.len - 3]); if (before != null and matchLookbehind0(before.?.value)) return 3; } if (std.mem.endsWith(u8, text, "\xe2\x82\xa0") and text.len > 3) { const before = lastCodepoint(text[0 .. text.len - 3]); if (before != null and matchLookbehind0(before.?.value)) return 3; } if (std.mem.endsWith(u8, text, "\xe2\x82\xa1") and text.len > 3) { const before = lastCodepoint(text[0 .. text.len - 3]); if (before != null and matchLookbehind0(before.?.value)) return 3; } if (std.mem.endsWith(u8, text, "\xe2\x82\xa2") and text.len > 3) { const before = lastCodepoint(text[0 .. text.len - 3]); if (before != null and matchLookbehind0(before.?.value)) return 3; } if (std.mem.endsWith(u8, text, "\xe2\x82\xa3") and text.len > 3) { const before = lastCodepoint(text[0 .. text.len - 3]); if (before != null and matchLookbehind0(before.?.value)) return 3; } if (std.mem.endsWith(u8, text, "\xe2\x82\xa4") and text.len > 3) { const before = lastCodepoint(text[0 .. text.len - 3]); if (before != null and matchLookbehind0(before.?.value)) return 3; } if (std.mem.endsWith(u8, text, "\xe2\x82\xa5") and text.len > 3) { const before = lastCodepoint(text[0 .. text.len - 3]); if (before != null and matchLookbehind0(before.?.value)) return 3; } if (std.mem.endsWith(u8, text, "\xe2\x82\xa6") and text.len > 3) { const before = lastCodepoint(text[0 .. text.len - 3]); if (before != null and matchLookbehind0(before.?.value)) return 3; } if (std.mem.endsWith(u8, text, "\xe2\x82\xa7") and text.len > 3) { const before = lastCodepoint(text[0 .. text.len - 3]); if (before != null and matchLookbehind0(before.?.value)) return 3; } if (std.mem.endsWith(u8, text, "\xe2\x82\xa8") and text.len > 3) { const before = lastCodepoint(text[0 .. text.len - 3]); if (before != null and matchLookbehind0(before.?.value)) return 3; } if (std.mem.endsWith(u8, text, "\xe2\x82\xa9") and text.len > 3) { const before = lastCodepoint(text[0 .. text.len - 3]); if (before != null and matchLookbehind0(before.?.value)) return 3; } if (std.mem.endsWith(u8, text, "\xe2\x82\xaa") and text.len > 3) { const before = lastCodepoint(text[0 .. text.len - 3]); if (before != null and matchLookbehind0(before.?.value)) return 3; } if (std.mem.endsWith(u8, text, "\xe2\x82\xab") and text.len > 3) { const before = lastCodepoint(text[0 .. text.len - 3]); if (before != null and matchLookbehind0(before.?.value)) return 3; } if (std.mem.endsWith(u8, text, "\xe2\x82\xac") and text.len > 3) { const before = lastCodepoint(text[0 .. text.len - 3]); if (before != null and matchLookbehind0(before.?.value)) return 3; } if (std.mem.endsWith(u8, text, "\xe2\x82\xad") and text.len > 3) { const before = lastCodepoint(text[0 .. text.len - 3]); if (before != null and matchLookbehind0(before.?.value)) return 3; } if (std.mem.endsWith(u8, text, "\xe2\x82\xae") and text.len > 3) { const before = lastCodepoint(text[0 .. text.len - 3]); if (before != null and matchLookbehind0(before.?.value)) return 3; } if (std.mem.endsWith(u8, text, "\xe2\x82\xaf") and text.len > 3) { const before = lastCodepoint(text[0 .. text.len - 3]); if (before != null and matchLookbehind0(before.?.value)) return 3; } if (std.mem.endsWith(u8, text, "\xe2\x82\xb0") and text.len > 3) { const before = lastCodepoint(text[0 .. text.len - 3]); if (before != null and matchLookbehind0(before.?.value)) return 3; } if (std.mem.endsWith(u8, text, "\xe2\x82\xb1") and text.len > 3) { const before = lastCodepoint(text[0 .. text.len - 3]); if (before != null and matchLookbehind0(before.?.value)) return 3; } if (std.mem.endsWith(u8, text, "\xe2\x82\xb2") and text.len > 3) { const before = lastCodepoint(text[0 .. text.len - 3]); if (before != null and matchLookbehind0(before.?.value)) return 3; } if (std.mem.endsWith(u8, text, "\xe2\x82\xb3") and text.len > 3) { const before = lastCodepoint(text[0 .. text.len - 3]); if (before != null and matchLookbehind0(before.?.value)) return 3; } if (std.mem.endsWith(u8, text, "\xe2\x82\xb4") and text.len > 3) { const before = lastCodepoint(text[0 .. text.len - 3]); if (before != null and matchLookbehind0(before.?.value)) return 3; } if (std.mem.endsWith(u8, text, "\xe2\x82\xb5") and text.len > 3) { const before = lastCodepoint(text[0 .. text.len - 3]); if (before != null and matchLookbehind0(before.?.value)) return 3; } if (std.mem.endsWith(u8, text, "\xe2\x82\xb6") and text.len > 3) { const before = lastCodepoint(text[0 .. text.len - 3]); if (before != null and matchLookbehind0(before.?.value)) return 3; } if (std.mem.endsWith(u8, text, "\xe2\x82\xb7") and text.len > 3) { const before = lastCodepoint(text[0 .. text.len - 3]); if (before != null and matchLookbehind0(before.?.value)) return 3; } if (std.mem.endsWith(u8, text, "\xe2\x82\xb8") and text.len > 3) { const before = lastCodepoint(text[0 .. text.len - 3]); if (before != null and matchLookbehind0(before.?.value)) return 3; } if (std.mem.endsWith(u8, text, "\xe2\x82\xb9") and text.len > 3) { const before = lastCodepoint(text[0 .. text.len - 3]); if (before != null and matchLookbehind0(before.?.value)) return 3; } if (std.mem.endsWith(u8, text, "\xe2\x82\xba") and text.len > 3) { const before = lastCodepoint(text[0 .. text.len - 3]); if (before != null and matchLookbehind0(before.?.value)) return 3; } if (std.mem.endsWith(u8, text, "\xe2\x82\xbb") and text.len > 3) { const before = lastCodepoint(text[0 .. text.len - 3]); if (before != null and matchLookbehind0(before.?.value)) return 3; } if (std.mem.endsWith(u8, text, "\xe2\x82\xbc") and text.len > 3) { const before = lastCodepoint(text[0 .. text.len - 3]); if (before != null and matchLookbehind0(before.?.value)) return 3; } if (std.mem.endsWith(u8, text, "\xe2\x82\xbd") and text.len > 3) { const before = lastCodepoint(text[0 .. text.len - 3]); if (before != null and matchLookbehind0(before.?.value)) return 3; } if (std.mem.endsWith(u8, text, "\xe2\x82\xbe") and text.len > 3) { const before = lastCodepoint(text[0 .. text.len - 3]); if (before != null and matchLookbehind0(before.?.value)) return 3; } if (std.mem.endsWith(u8, text, "\xe2\x82\xbf") and text.len > 3) { const before = lastCodepoint(text[0 .. text.len - 3]); if (before != null and matchLookbehind0(before.?.value)) return 3; } if (std.mem.endsWith(u8, text, "\xc2\xa3") and text.len > 2) { const before = lastCodepoint(text[0 .. text.len - 2]); if (before != null and matchLookbehind0(before.?.value)) return 2; } if (std.mem.endsWith(u8, text, "\xc2\xa5") and text.len > 2) { const before = lastCodepoint(text[0 .. text.len - 2]); if (before != null and matchLookbehind0(before.?.value)) return 2; } if (std.mem.endsWith(u8, text, "C$") and text.len > 2) { const before = lastCodepoint(text[0 .. text.len - 2]); if (before != null and matchLookbehind0(before.?.value)) return 2; } if (std.mem.endsWith(u8, text, "A$") and text.len > 2) { const before = lastCodepoint(text[0 .. text.len - 2]); if (before != null and matchLookbehind0(before.?.value)) return 2; } if (std.mem.endsWith(u8, text, "$") and text.len > 1) { const before = lastCodepoint(text[0 .. text.len - 1]); if (before != null and matchLookbehind0(before.?.value)) return 1; } if (std.mem.endsWith(u8, text, "\xd8\xa7\xd9\x83\xd9\x88\xd8\xa7\xd8\xa8") and text.len > 10) { const before = lastCodepoint(text[0 .. text.len - 10]); if (before != null and matchLookbehind0(before.?.value)) return 10; } if (std.mem.endsWith(u8, text, "\xd0\xbc\xd0\xb1\xd0\xb0\xd1\x80") and text.len > 8) { const before = lastCodepoint(text[0 .. text.len - 8]); if (before != null and matchLookbehind0(before.?.value)) return 8; } if (std.mem.endsWith(u8, text, "\xd1\x82\xd0\xb1\xd9\x83\xd9\x85") and text.len > 8) { const before = lastCodepoint(text[0 .. text.len - 8]); if (before != null and matchLookbehind0(before.?.value)) return 8; } if (std.mem.endsWith(u8, text, "\xd8\xba\xd8\xb1\xd8\xa7\xd9\x85") and text.len > 8) { const before = lastCodepoint(text[0 .. text.len - 8]); if (before != null and matchLookbehind0(before.?.value)) return 8; } if (std.mem.endsWith(u8, text, "\xd8\xac\xd8\xb1\xd8\xa7\xd9\x85") and text.len > 8) { const before = lastCodepoint(text[0 .. text.len - 8]); if (before != null and matchLookbehind0(before.?.value)) return 8; } if (std.mem.endsWith(u8, text, "\xd0\xba\xd0\xbc/\xd1\x87") and text.len > 7) { const before = lastCodepoint(text[0 .. text.len - 7]); if (before != null and matchLookbehind0(before.?.value)) return 7; } if (std.mem.endsWith(u8, text, "\xd0\xba\xd0\xbc\xc2\xb2") and text.len > 6) { const before = lastCodepoint(text[0 .. text.len - 6]); if (before != null and matchLookbehind0(before.?.value)) return 6; } if (std.mem.endsWith(u8, text, "\xd0\xba\xd0\xbc\xc2\xb3") and text.len > 6) { const before = lastCodepoint(text[0 .. text.len - 6]); if (before != null and matchLookbehind0(before.?.value)) return 6; } if (std.mem.endsWith(u8, text, "\xd0\xb4\xd0\xbc\xc2\xb2") and text.len > 6) { const before = lastCodepoint(text[0 .. text.len - 6]); if (before != null and matchLookbehind0(before.?.value)) return 6; } if (std.mem.endsWith(u8, text, "\xd0\xb4\xd0\xbc\xc2\xb3") and text.len > 6) { const before = lastCodepoint(text[0 .. text.len - 6]); if (before != null and matchLookbehind0(before.?.value)) return 6; } if (std.mem.endsWith(u8, text, "\xd1\x81\xd0\xbc\xc2\xb2") and text.len > 6) { const before = lastCodepoint(text[0 .. text.len - 6]); if (before != null and matchLookbehind0(before.?.value)) return 6; } if (std.mem.endsWith(u8, text, "\xd1\x81\xd0\xbc\xc2\xb3") and text.len > 6) { const before = lastCodepoint(text[0 .. text.len - 6]); if (before != null and matchLookbehind0(before.?.value)) return 6; } if (std.mem.endsWith(u8, text, "\xd0\xbc\xd0\xbc\xc2\xb2") and text.len > 6) { const before = lastCodepoint(text[0 .. text.len - 6]); if (before != null and matchLookbehind0(before.?.value)) return 6; } if (std.mem.endsWith(u8, text, "\xd0\xbc\xd0\xbc\xc2\xb3") and text.len > 6) { const before = lastCodepoint(text[0 .. text.len - 6]); if (before != null and matchLookbehind0(before.?.value)) return 6; } if (std.mem.endsWith(u8, text, "\xd0\xba\xd0\x9f\xd0\xb0") and text.len > 6) { const before = lastCodepoint(text[0 .. text.len - 6]); if (before != null and matchLookbehind0(before.?.value)) return 6; } if (std.mem.endsWith(u8, text, "\xd9\x83\xd9\x85\xc2\xb2") and text.len > 6) { const before = lastCodepoint(text[0 .. text.len - 6]); if (before != null and matchLookbehind0(before.?.value)) return 6; } if (std.mem.endsWith(u8, text, "\xd9\x83\xd9\x85\xc2\xb3") and text.len > 6) { const before = lastCodepoint(text[0 .. text.len - 6]); if (before != null and matchLookbehind0(before.?.value)) return 6; } if (std.mem.endsWith(u8, text, "\xd8\xb3\xd9\x85\xc2\xb2") and text.len > 6) { const before = lastCodepoint(text[0 .. text.len - 6]); if (before != null and matchLookbehind0(before.?.value)) return 6; } if (std.mem.endsWith(u8, text, "\xd8\xb3\xd9\x85\xc2\xb3") and text.len > 6) { const before = lastCodepoint(text[0 .. text.len - 6]); if (before != null and matchLookbehind0(before.?.value)) return 6; } if (std.mem.endsWith(u8, text, "\xd9\x85\xd9\x85\xc2\xb2") and text.len > 6) { const before = lastCodepoint(text[0 .. text.len - 6]); if (before != null and matchLookbehind0(before.?.value)) return 6; } if (std.mem.endsWith(u8, text, "\xd9\x85\xd9\x85\xc2\xb3") and text.len > 6) { const before = lastCodepoint(text[0 .. text.len - 6]); if (before != null and matchLookbehind0(before.?.value)) return 6; } if (std.mem.endsWith(u8, text, "\xd9\x85\xd9\x84\xd8\xba") and text.len > 6) { const before = lastCodepoint(text[0 .. text.len - 6]); if (before != null and matchLookbehind0(before.?.value)) return 6; } if (std.mem.endsWith(u8, text, "\xd9\x83\xd9\x88\xd8\xa8") and text.len > 6) { const before = lastCodepoint(text[0 .. text.len - 6]); if (before != null and matchLookbehind0(before.?.value)) return 6; } if (std.mem.endsWith(u8, text, "\xd0\xbc/\xd1\x81") and text.len > 5) { const before = lastCodepoint(text[0 .. text.len - 5]); if (before != null and matchLookbehind0(before.?.value)) return 5; } if (std.mem.endsWith(u8, text, "km\xc2\xb2") and text.len > 4) { const before = lastCodepoint(text[0 .. text.len - 4]); if (before != null and matchLookbehind0(before.?.value)) return 4; } if (std.mem.endsWith(u8, text, "km\xc2\xb3") and text.len > 4) { const before = lastCodepoint(text[0 .. text.len - 4]); if (before != null and matchLookbehind0(before.?.value)) return 4; } if (std.mem.endsWith(u8, text, "dm\xc2\xb2") and text.len > 4) { const before = lastCodepoint(text[0 .. text.len - 4]); if (before != null and matchLookbehind0(before.?.value)) return 4; } if (std.mem.endsWith(u8, text, "dm\xc2\xb3") and text.len > 4) { const before = lastCodepoint(text[0 .. text.len - 4]); if (before != null and matchLookbehind0(before.?.value)) return 4; } if (std.mem.endsWith(u8, text, "cm\xc2\xb2") and text.len > 4) { const before = lastCodepoint(text[0 .. text.len - 4]); if (before != null and matchLookbehind0(before.?.value)) return 4; } if (std.mem.endsWith(u8, text, "cm\xc2\xb3") and text.len > 4) { const before = lastCodepoint(text[0 .. text.len - 4]); if (before != null and matchLookbehind0(before.?.value)) return 4; } if (std.mem.endsWith(u8, text, "mm\xc2\xb2") and text.len > 4) { const before = lastCodepoint(text[0 .. text.len - 4]); if (before != null and matchLookbehind0(before.?.value)) return 4; } if (std.mem.endsWith(u8, text, "mm\xc2\xb3") and text.len > 4) { const before = lastCodepoint(text[0 .. text.len - 4]); if (before != null and matchLookbehind0(before.?.value)) return 4; } if (std.mem.endsWith(u8, text, "km/h") and text.len > 4) { const before = lastCodepoint(text[0 .. text.len - 4]); if (before != null and matchLookbehind0(before.?.value)) return 4; } if (std.mem.endsWith(u8, text, "mbar") and text.len > 4) { const before = lastCodepoint(text[0 .. text.len - 4]); if (before != null and matchLookbehind0(before.?.value)) return 4; } if (std.mem.endsWith(u8, text, "\xd0\xba\xd0\xbc") and text.len > 4) { const before = lastCodepoint(text[0 .. text.len - 4]); if (before != null and matchLookbehind0(before.?.value)) return 4; } if (std.mem.endsWith(u8, text, "\xd0\xbc\xc2\xb2") and text.len > 4) { const before = lastCodepoint(text[0 .. text.len - 4]); if (before != null and matchLookbehind0(before.?.value)) return 4; } if (std.mem.endsWith(u8, text, "\xd0\xbc\xc2\xb3") and text.len > 4) { const before = lastCodepoint(text[0 .. text.len - 4]); if (before != null and matchLookbehind0(before.?.value)) return 4; } if (std.mem.endsWith(u8, text, "\xd0\xb4\xd0\xbc") and text.len > 4) { const before = lastCodepoint(text[0 .. text.len - 4]); if (before != null and matchLookbehind0(before.?.value)) return 4; } if (std.mem.endsWith(u8, text, "\xd1\x81\xd0\xbc") and text.len > 4) { const before = lastCodepoint(text[0 .. text.len - 4]); if (before != null and matchLookbehind0(before.?.value)) return 4; } if (std.mem.endsWith(u8, text, "\xd0\xbc\xd0\xbc") and text.len > 4) { const before = lastCodepoint(text[0 .. text.len - 4]); if (before != null and matchLookbehind0(before.?.value)) return 4; } if (std.mem.endsWith(u8, text, "\xd0\xbd\xd0\xbc") and text.len > 4) { const before = lastCodepoint(text[0 .. text.len - 4]); if (before != null and matchLookbehind0(before.?.value)) return 4; } if (std.mem.endsWith(u8, text, "\xd0\xba\xd0\xb3") and text.len > 4) { const before = lastCodepoint(text[0 .. text.len - 4]); if (before != null and matchLookbehind0(before.?.value)) return 4; } if (std.mem.endsWith(u8, text, "\xd0\xbc\xd0\xb3") and text.len > 4) { const before = lastCodepoint(text[0 .. text.len - 4]); if (before != null and matchLookbehind0(before.?.value)) return 4; } if (std.mem.endsWith(u8, text, "\xd0\x9f\xd0\xb0") and text.len > 4) { const before = lastCodepoint(text[0 .. text.len - 4]); if (before != null and matchLookbehind0(before.?.value)) return 4; } if (std.mem.endsWith(u8, text, "\xd0\x9a\xd0\xb1") and text.len > 4) { const before = lastCodepoint(text[0 .. text.len - 4]); if (before != null and matchLookbehind0(before.?.value)) return 4; } if (std.mem.endsWith(u8, text, "\xd0\x9a\xd0\x91") and text.len > 4) { const before = lastCodepoint(text[0 .. text.len - 4]); if (before != null and matchLookbehind0(before.?.value)) return 4; } if (std.mem.endsWith(u8, text, "\xd0\xba\xd0\xb1") and text.len > 4) { const before = lastCodepoint(text[0 .. text.len - 4]); if (before != null and matchLookbehind0(before.?.value)) return 4; } if (std.mem.endsWith(u8, text, "\xd0\x9c\xd0\xb1") and text.len > 4) { const before = lastCodepoint(text[0 .. text.len - 4]); if (before != null and matchLookbehind0(before.?.value)) return 4; } if (std.mem.endsWith(u8, text, "\xd0\x9c\xd0\x91") and text.len > 4) { const before = lastCodepoint(text[0 .. text.len - 4]); if (before != null and matchLookbehind0(before.?.value)) return 4; } if (std.mem.endsWith(u8, text, "\xd0\xbc\xd0\xb1") and text.len > 4) { const before = lastCodepoint(text[0 .. text.len - 4]); if (before != null and matchLookbehind0(before.?.value)) return 4; } if (std.mem.endsWith(u8, text, "\xd0\x93\xd0\xb1") and text.len > 4) { const before = lastCodepoint(text[0 .. text.len - 4]); if (before != null and matchLookbehind0(before.?.value)) return 4; } if (std.mem.endsWith(u8, text, "\xd0\x93\xd0\x91") and text.len > 4) { const before = lastCodepoint(text[0 .. text.len - 4]); if (before != null and matchLookbehind0(before.?.value)) return 4; } if (std.mem.endsWith(u8, text, "\xd0\xb3\xd0\xb1") and text.len > 4) { const before = lastCodepoint(text[0 .. text.len - 4]); if (before != null and matchLookbehind0(before.?.value)) return 4; } if (std.mem.endsWith(u8, text, "\xd0\xa2\xd0\xb1") and text.len > 4) { const before = lastCodepoint(text[0 .. text.len - 4]); if (before != null and matchLookbehind0(before.?.value)) return 4; } if (std.mem.endsWith(u8, text, "\xd0\xa2\xd0\x91") and text.len > 4) { const before = lastCodepoint(text[0 .. text.len - 4]); if (before != null and matchLookbehind0(before.?.value)) return 4; } if (std.mem.endsWith(u8, text, "\xd9\x85\xc2\xb2") and text.len > 4) { const before = lastCodepoint(text[0 .. text.len - 4]); if (before != null and matchLookbehind0(before.?.value)) return 4; } if (std.mem.endsWith(u8, text, "\xd9\x85\xc2\xb3") and text.len > 4) { const before = lastCodepoint(text[0 .. text.len - 4]); if (before != null and matchLookbehind0(before.?.value)) return 4; } if (std.mem.endsWith(u8, text, "\xd8\xb3\xd9\x85") and text.len > 4) { const before = lastCodepoint(text[0 .. text.len - 4]); if (before != null and matchLookbehind0(before.?.value)) return 4; } if (std.mem.endsWith(u8, text, "\xd9\x85\xd9\x85") and text.len > 4) { const before = lastCodepoint(text[0 .. text.len - 4]); if (before != null and matchLookbehind0(before.?.value)) return 4; } if (std.mem.endsWith(u8, text, "\xd9\x83\xd9\x85") and text.len > 4) { const before = lastCodepoint(text[0 .. text.len - 4]); if (before != null and matchLookbehind0(before.?.value)) return 4; } if (std.mem.endsWith(u8, text, "\xd8\xac\xd9\x85") and text.len > 4) { const before = lastCodepoint(text[0 .. text.len - 4]); if (before != null and matchLookbehind0(before.?.value)) return 4; } if (std.mem.endsWith(u8, text, "\xd9\x83\xd8\xba") and text.len > 4) { const before = lastCodepoint(text[0 .. text.len - 4]); if (before != null and matchLookbehind0(before.?.value)) return 4; } if (std.mem.endsWith(u8, text, "m\xc2\xb2") and text.len > 3) { const before = lastCodepoint(text[0 .. text.len - 3]); if (before != null and matchLookbehind0(before.?.value)) return 3; } if (std.mem.endsWith(u8, text, "m\xc2\xb3") and text.len > 3) { const before = lastCodepoint(text[0 .. text.len - 3]); if (before != null and matchLookbehind0(before.?.value)) return 3; } if (std.mem.endsWith(u8, text, "\xc2\xb5m") and text.len > 3) { const before = lastCodepoint(text[0 .. text.len - 3]); if (before != null and matchLookbehind0(before.?.value)) return 3; } if (std.mem.endsWith(u8, text, "\xc2\xb5g") and text.len > 3) { const before = lastCodepoint(text[0 .. text.len - 3]); if (before != null and matchLookbehind0(before.?.value)) return 3; } if (std.mem.endsWith(u8, text, "m/s") and text.len > 3) { const before = lastCodepoint(text[0 .. text.len - 3]); if (before != null and matchLookbehind0(before.?.value)) return 3; } if (std.mem.endsWith(u8, text, "kmh") and text.len > 3) { const before = lastCodepoint(text[0 .. text.len - 3]); if (before != null and matchLookbehind0(before.?.value)) return 3; } if (std.mem.endsWith(u8, text, "mph") and text.len > 3) { const before = lastCodepoint(text[0 .. text.len - 3]); if (before != null and matchLookbehind0(before.?.value)) return 3; } if (std.mem.endsWith(u8, text, "hPa") and text.len > 3) { const before = lastCodepoint(text[0 .. text.len - 3]); if (before != null and matchLookbehind0(before.?.value)) return 3; } if (std.mem.endsWith(u8, text, "km") and text.len > 2) { const before = lastCodepoint(text[0 .. text.len - 2]); if (before != null and matchLookbehind0(before.?.value)) return 2; } if (std.mem.endsWith(u8, text, "dm") and text.len > 2) { const before = lastCodepoint(text[0 .. text.len - 2]); if (before != null and matchLookbehind0(before.?.value)) return 2; } if (std.mem.endsWith(u8, text, "cm") and text.len > 2) { const before = lastCodepoint(text[0 .. text.len - 2]); if (before != null and matchLookbehind0(before.?.value)) return 2; } if (std.mem.endsWith(u8, text, "mm") and text.len > 2) { const before = lastCodepoint(text[0 .. text.len - 2]); if (before != null and matchLookbehind0(before.?.value)) return 2; } if (std.mem.endsWith(u8, text, "ha") and text.len > 2) { const before = lastCodepoint(text[0 .. text.len - 2]); if (before != null and matchLookbehind0(before.?.value)) return 2; } if (std.mem.endsWith(u8, text, "nm") and text.len > 2) { const before = lastCodepoint(text[0 .. text.len - 2]); if (before != null and matchLookbehind0(before.?.value)) return 2; } if (std.mem.endsWith(u8, text, "yd") and text.len > 2) { const before = lastCodepoint(text[0 .. text.len - 2]); if (before != null and matchLookbehind0(before.?.value)) return 2; } if (std.mem.endsWith(u8, text, "in") and text.len > 2) { const before = lastCodepoint(text[0 .. text.len - 2]); if (before != null and matchLookbehind0(before.?.value)) return 2; } if (std.mem.endsWith(u8, text, "ft") and text.len > 2) { const before = lastCodepoint(text[0 .. text.len - 2]); if (before != null and matchLookbehind0(before.?.value)) return 2; } if (std.mem.endsWith(u8, text, "kg") and text.len > 2) { const before = lastCodepoint(text[0 .. text.len - 2]); if (before != null and matchLookbehind0(before.?.value)) return 2; } if (std.mem.endsWith(u8, text, "mg") and text.len > 2) { const before = lastCodepoint(text[0 .. text.len - 2]); if (before != null and matchLookbehind0(before.?.value)) return 2; } if (std.mem.endsWith(u8, text, "lb") and text.len > 2) { const before = lastCodepoint(text[0 .. text.len - 2]); if (before != null and matchLookbehind0(before.?.value)) return 2; } if (std.mem.endsWith(u8, text, "oz") and text.len > 2) { const before = lastCodepoint(text[0 .. text.len - 2]); if (before != null and matchLookbehind0(before.?.value)) return 2; } if (std.mem.endsWith(u8, text, "Pa") and text.len > 2) { const before = lastCodepoint(text[0 .. text.len - 2]); if (before != null and matchLookbehind0(before.?.value)) return 2; } if (std.mem.endsWith(u8, text, "mb") and text.len > 2) { const before = lastCodepoint(text[0 .. text.len - 2]); if (before != null and matchLookbehind0(before.?.value)) return 2; } if (std.mem.endsWith(u8, text, "MB") and text.len > 2) { const before = lastCodepoint(text[0 .. text.len - 2]); if (before != null and matchLookbehind0(before.?.value)) return 2; } if (std.mem.endsWith(u8, text, "kb") and text.len > 2) { const before = lastCodepoint(text[0 .. text.len - 2]); if (before != null and matchLookbehind0(before.?.value)) return 2; } if (std.mem.endsWith(u8, text, "KB") and text.len > 2) { const before = lastCodepoint(text[0 .. text.len - 2]); if (before != null and matchLookbehind0(before.?.value)) return 2; } if (std.mem.endsWith(u8, text, "gb") and text.len > 2) { const before = lastCodepoint(text[0 .. text.len - 2]); if (before != null and matchLookbehind0(before.?.value)) return 2; } if (std.mem.endsWith(u8, text, "GB") and text.len > 2) { const before = lastCodepoint(text[0 .. text.len - 2]); if (before != null and matchLookbehind0(before.?.value)) return 2; } if (std.mem.endsWith(u8, text, "tb") and text.len > 2) { const before = lastCodepoint(text[0 .. text.len - 2]); if (before != null and matchLookbehind0(before.?.value)) return 2; } if (std.mem.endsWith(u8, text, "TB") and text.len > 2) { const before = lastCodepoint(text[0 .. text.len - 2]); if (before != null and matchLookbehind0(before.?.value)) return 2; } if (std.mem.endsWith(u8, text, "\xd0\xbc") and text.len > 2) { const before = lastCodepoint(text[0 .. text.len - 2]); if (before != null and matchLookbehind0(before.?.value)) return 2; } if (std.mem.endsWith(u8, text, "\xd0\xb3") and text.len > 2) { const before = lastCodepoint(text[0 .. text.len - 2]); if (before != null and matchLookbehind0(before.?.value)) return 2; } if (std.mem.endsWith(u8, text, "\xd9\x85") and text.len > 2) { const before = lastCodepoint(text[0 .. text.len - 2]); if (before != null and matchLookbehind0(before.?.value)) return 2; } if (std.mem.endsWith(u8, text, "m") and text.len > 1) { const before = lastCodepoint(text[0 .. text.len - 1]); if (before != null and matchLookbehind0(before.?.value)) return 1; } if (std.mem.endsWith(u8, text, "g") and text.len > 1) { const before = lastCodepoint(text[0 .. text.len - 1]); if (before != null and matchLookbehind0(before.?.value)) return 1; } if (std.mem.endsWith(u8, text, "t") and text.len > 1) { const before = lastCodepoint(text[0 .. text.len - 1]); if (before != null and matchLookbehind0(before.?.value)) return 1; } if (std.mem.endsWith(u8, text, "T") and text.len > 1) { const before = lastCodepoint(text[0 .. text.len - 1]); if (before != null and matchLookbehind0(before.?.value)) return 1; } if (std.mem.endsWith(u8, text, "G") and text.len > 1) { const before = lastCodepoint(text[0 .. text.len - 1]); if (before != null and matchLookbehind0(before.?.value)) return 1; } if (std.mem.endsWith(u8, text, "M") and text.len > 1) { const before = lastCodepoint(text[0 .. text.len - 1]); if (before != null and matchLookbehind0(before.?.value)) return 1; } if (std.mem.endsWith(u8, text, "K") and text.len > 1) { const before = lastCodepoint(text[0 .. text.len - 1]); if (before != null and matchLookbehind0(before.?.value)) return 1; } if (std.mem.endsWith(u8, text, "%") and text.len > 1) { const before = lastCodepoint(text[0 .. text.len - 1]); if (before != null and matchLookbehind0(before.?.value)) return 1; } if (std.mem.endsWith(u8, text, ".") and text.len > 1) { const before = lastCodepoint(text[0 .. text.len - 1]); if (before != null and matchLookbehind2(before.?.value)) return 1; } if (std.mem.endsWith(u8, text, ".") and text.len > 1) { const b1 = lastCodepoint(text[0 .. text.len - 1]); if (b1) |bp1| { const b2 = lastCodepoint(text[0 .. text.len - 1 - bp1.len]); if (matchLookbehind3(bp1.value)) { if (b2) |b2p| { if (matchLookbehind3(b2p.value)) return 1; } } } } return 0; } // ── infix character classes ── pub const is_infix_3_ahead_ranges = [_][2]u21{ .{ 0x002D, 0x002D }, .{ 0x0030, 0x0039 }, }; pub fn is_infix_3_ahead(c: u21) bool { return rangeContains(&is_infix_3_ahead_ranges, c); } pub const is_infix_3_behind_ranges = [_][2]u21{ .{ 0x0030, 0x0039 }, }; pub fn is_infix_3_behind(c: u21) bool { return rangeContains(&is_infix_3_behind_ranges, c); } pub const is_infix_4_ahead_ranges = [_][2]u21{ .{ 0x0022, 0x0022 }, .{ 0x0027, 0x0027 }, .{ 0x002C, 0x002C }, .{ 0x0041, 0x005A }, .{ 0x0060, 0x0060 }, .{ 0x00AB, 0x00AB }, .{ 0x00B4, 0x00B4 }, .{ 0x00BB, 0x00BB }, .{ 0x00C0, 0x00D6 }, .{ 0x00D8, 0x00DE }, .{ 0x0100, 0x0100 }, .{ 0x0102, 0x0102 }, .{ 0x0104, 0x0104 }, .{ 0x0106, 0x0106 }, .{ 0x0108, 0x0108 }, .{ 0x010A, 0x010A }, .{ 0x010C, 0x010C }, .{ 0x010E, 0x010E }, .{ 0x0110, 0x0110 }, .{ 0x0112, 0x0112 }, .{ 0x0114, 0x0114 }, .{ 0x0116, 0x0116 }, .{ 0x0118, 0x0118 }, .{ 0x011A, 0x011A }, .{ 0x011C, 0x011C }, .{ 0x011E, 0x011E }, .{ 0x0120, 0x0120 }, .{ 0x0122, 0x0122 }, .{ 0x0124, 0x0124 }, .{ 0x0126, 0x0126 }, .{ 0x0128, 0x0128 }, .{ 0x012A, 0x012A }, .{ 0x012C, 0x012C }, .{ 0x012E, 0x012E }, .{ 0x0130, 0x0130 }, .{ 0x0132, 0x0132 }, .{ 0x0134, 0x0134 }, .{ 0x0136, 0x0136 }, .{ 0x0139, 0x0139 }, .{ 0x013B, 0x013B }, .{ 0x013D, 0x013D }, .{ 0x013F, 0x013F }, .{ 0x0141, 0x0141 }, .{ 0x0143, 0x0143 }, .{ 0x0145, 0x0145 }, .{ 0x0147, 0x0147 }, .{ 0x014A, 0x014A }, .{ 0x014C, 0x014C }, .{ 0x014E, 0x014E }, .{ 0x0150, 0x0150 }, .{ 0x0152, 0x0152 }, .{ 0x0154, 0x0154 }, .{ 0x0156, 0x0156 }, .{ 0x0158, 0x0158 }, .{ 0x015A, 0x015A }, .{ 0x015C, 0x015C }, .{ 0x015E, 0x015E }, .{ 0x0160, 0x0160 }, .{ 0x0162, 0x0162 }, .{ 0x0164, 0x0164 }, .{ 0x0166, 0x0166 }, .{ 0x0168, 0x0168 }, .{ 0x016A, 0x016A }, .{ 0x016C, 0x016C }, .{ 0x016E, 0x016E }, .{ 0x0170, 0x0170 }, .{ 0x0172, 0x0172 }, .{ 0x0174, 0x0174 }, .{ 0x0176, 0x0176 }, .{ 0x0178, 0x0179 }, .{ 0x017B, 0x017B }, .{ 0x017D, 0x017D }, .{ 0x0181, 0x0182 }, .{ 0x0184, 0x0184 }, .{ 0x0186, 0x0187 }, .{ 0x0189, 0x018B }, .{ 0x018E, 0x0191 }, .{ 0x0193, 0x0194 }, .{ 0x0196, 0x0198 }, .{ 0x019C, 0x019D }, .{ 0x019F, 0x01A0 }, .{ 0x01A2, 0x01A2 }, .{ 0x01A4, 0x01A4 }, .{ 0x01A6, 0x01A7 }, .{ 0x01A9, 0x01A9 }, .{ 0x01AC, 0x01AC }, .{ 0x01AE, 0x01AF }, .{ 0x01B1, 0x01B3 }, .{ 0x01B5, 0x01B5 }, .{ 0x01B7, 0x01B8 }, .{ 0x01BC, 0x01BC }, .{ 0x01C4, 0x01C4 }, .{ 0x01C7, 0x01C7 }, .{ 0x01CA, 0x01CA }, .{ 0x01CD, 0x01CD }, .{ 0x01CF, 0x01CF }, .{ 0x01D1, 0x01D1 }, .{ 0x01D3, 0x01D3 }, .{ 0x01D5, 0x01D5 }, .{ 0x01D7, 0x01D7 }, .{ 0x01D9, 0x01D9 }, .{ 0x01DB, 0x01DB }, .{ 0x01DE, 0x01DE }, .{ 0x01E0, 0x01E0 }, .{ 0x01E2, 0x01E2 }, .{ 0x01E4, 0x01E4 }, .{ 0x01E6, 0x01E6 }, .{ 0x01E8, 0x01E8 }, .{ 0x01EA, 0x01EA }, .{ 0x01EC, 0x01EC }, .{ 0x01EE, 0x01EE }, .{ 0x01F1, 0x01F1 }, .{ 0x01F4, 0x01F4 }, .{ 0x01F6, 0x01F8 }, .{ 0x01FA, 0x01FA }, .{ 0x01FC, 0x01FC }, .{ 0x01FE, 0x01FE }, .{ 0x0200, 0x0200 }, .{ 0x0202, 0x0202 }, .{ 0x0204, 0x0204 }, .{ 0x0206, 0x0206 }, .{ 0x0208, 0x0208 }, .{ 0x020A, 0x020A }, .{ 0x020C, 0x020C }, .{ 0x020E, 0x020E }, .{ 0x0210, 0x0210 }, .{ 0x0212, 0x0212 }, .{ 0x0214, 0x0214 }, .{ 0x0216, 0x0216 }, .{ 0x0218, 0x0218 }, .{ 0x021A, 0x021A }, .{ 0x021C, 0x021C }, .{ 0x021E, 0x021E }, .{ 0x0220, 0x0220 }, .{ 0x0222, 0x0222 }, .{ 0x0224, 0x0224 }, .{ 0x0226, 0x0226 }, .{ 0x0228, 0x0228 }, .{ 0x022A, 0x022A }, .{ 0x022C, 0x022C }, .{ 0x022E, 0x022E }, .{ 0x0230, 0x0230 }, .{ 0x0232, 0x0232 }, .{ 0x023A, 0x023B }, .{ 0x023D, 0x023E }, .{ 0x0241, 0x0241 }, .{ 0x0243, 0x0246 }, .{ 0x0248, 0x0248 }, .{ 0x024A, 0x024A }, .{ 0x024C, 0x024C }, .{ 0x024E, 0x024E }, .{ 0x0386, 0x0386 }, .{ 0x0388, 0x038A }, .{ 0x038C, 0x038C }, .{ 0x038E, 0x038F }, .{ 0x0391, 0x03A9 }, .{ 0x0400, 0x0401 }, .{ 0x0403, 0x040A }, .{ 0x040C, 0x040D }, .{ 0x0410, 0x042F }, .{ 0x0490, 0x0490 }, .{ 0x0496, 0x0496 }, .{ 0x04A2, 0x04A2 }, .{ 0x04AE, 0x04AE }, .{ 0x04BA, 0x04BA }, .{ 0x04D8, 0x04D8 }, .{ 0x04E8, 0x04E8 }, .{ 0x0591, 0x05F4 }, .{ 0x0620, 0x064A }, .{ 0x066E, 0x06D5 }, .{ 0x06E5, 0x06FF }, .{ 0x0750, 0x077F }, .{ 0x08A0, 0x08BD }, .{ 0x0900, 0x09FF }, .{ 0x0B80, 0x0CFF }, .{ 0x0D80, 0x0DFF }, .{ 0x1100, 0x137F }, .{ 0x1E00, 0x1E00 }, .{ 0x1E02, 0x1E02 }, .{ 0x1E04, 0x1E04 }, .{ 0x1E06, 0x1E06 }, .{ 0x1E08, 0x1E08 }, .{ 0x1E0A, 0x1E0A }, .{ 0x1E0C, 0x1E0C }, .{ 0x1E0E, 0x1E0E }, .{ 0x1E10, 0x1E10 }, .{ 0x1E12, 0x1E12 }, .{ 0x1E14, 0x1E14 }, .{ 0x1E16, 0x1E16 }, .{ 0x1E18, 0x1E18 }, .{ 0x1E1A, 0x1E1A }, .{ 0x1E1C, 0x1E1C }, .{ 0x1E1E, 0x1E1E }, .{ 0x1E20, 0x1E20 }, .{ 0x1E22, 0x1E22 }, .{ 0x1E24, 0x1E24 }, .{ 0x1E26, 0x1E26 }, .{ 0x1E28, 0x1E28 }, .{ 0x1E2A, 0x1E2A }, .{ 0x1E2C, 0x1E2C }, .{ 0x1E2E, 0x1E2E }, .{ 0x1E30, 0x1E30 }, .{ 0x1E32, 0x1E32 }, .{ 0x1E34, 0x1E34 }, .{ 0x1E36, 0x1E36 }, .{ 0x1E38, 0x1E38 }, .{ 0x1E3A, 0x1E3A }, .{ 0x1E3C, 0x1E3C }, .{ 0x1E3E, 0x1E3E }, .{ 0x1E40, 0x1E40 }, .{ 0x1E42, 0x1E42 }, .{ 0x1E44, 0x1E44 }, .{ 0x1E46, 0x1E46 }, .{ 0x1E48, 0x1E48 }, .{ 0x1E4A, 0x1E4A }, .{ 0x1E4C, 0x1E4C }, .{ 0x1E4E, 0x1E4E }, .{ 0x1E50, 0x1E50 }, .{ 0x1E52, 0x1E52 }, .{ 0x1E54, 0x1E54 }, .{ 0x1E56, 0x1E56 }, .{ 0x1E58, 0x1E58 }, .{ 0x1E5A, 0x1E5A }, .{ 0x1E5C, 0x1E5C }, .{ 0x1E5E, 0x1E5E }, .{ 0x1E60, 0x1E60 }, .{ 0x1E62, 0x1E62 }, .{ 0x1E64, 0x1E64 }, .{ 0x1E66, 0x1E66 }, .{ 0x1E68, 0x1E68 }, .{ 0x1E6A, 0x1E6A }, .{ 0x1E6C, 0x1E6C }, .{ 0x1E6E, 0x1E6E }, .{ 0x1E70, 0x1E70 }, .{ 0x1E72, 0x1E72 }, .{ 0x1E74, 0x1E74 }, .{ 0x1E76, 0x1E76 }, .{ 0x1E78, 0x1E78 }, .{ 0x1E7A, 0x1E7A }, .{ 0x1E7C, 0x1E7C }, .{ 0x1E7E, 0x1E7E }, .{ 0x1E80, 0x1E80 }, .{ 0x1E82, 0x1E82 }, .{ 0x1E84, 0x1E84 }, .{ 0x1E86, 0x1E86 }, .{ 0x1E88, 0x1E88 }, .{ 0x1E8A, 0x1E8A }, .{ 0x1E8C, 0x1E8C }, .{ 0x1E8E, 0x1E8E }, .{ 0x1E90, 0x1E90 }, .{ 0x1E92, 0x1E92 }, .{ 0x1E94, 0x1E94 }, .{ 0x1E9E, 0x1E9E }, .{ 0x1EA0, 0x1EA0 }, .{ 0x1EA2, 0x1EA2 }, .{ 0x1EA4, 0x1EA4 }, .{ 0x1EA6, 0x1EA6 }, .{ 0x1EA8, 0x1EA8 }, .{ 0x1EAA, 0x1EAA }, .{ 0x1EAC, 0x1EAC }, .{ 0x1EAE, 0x1EAE }, .{ 0x1EB0, 0x1EB0 }, .{ 0x1EB2, 0x1EB2 }, .{ 0x1EB4, 0x1EB4 }, .{ 0x1EB6, 0x1EB6 }, .{ 0x1EB8, 0x1EB8 }, .{ 0x1EBA, 0x1EBA }, .{ 0x1EBC, 0x1EBC }, .{ 0x1EBE, 0x1EBE }, .{ 0x1EC0, 0x1EC0 }, .{ 0x1EC2, 0x1EC2 }, .{ 0x1EC4, 0x1EC4 }, .{ 0x1EC6, 0x1EC6 }, .{ 0x1EC8, 0x1EC8 }, .{ 0x1ECA, 0x1ECA }, .{ 0x1ECC, 0x1ECC }, .{ 0x1ECE, 0x1ECE }, .{ 0x1ED0, 0x1ED0 }, .{ 0x1ED2, 0x1ED2 }, .{ 0x1ED4, 0x1ED4 }, .{ 0x1ED6, 0x1ED6 }, .{ 0x1ED8, 0x1ED8 }, .{ 0x1EDA, 0x1EDA }, .{ 0x1EDC, 0x1EDC }, .{ 0x1EDE, 0x1EDE }, .{ 0x1EE0, 0x1EE0 }, .{ 0x1EE2, 0x1EE2 }, .{ 0x1EE4, 0x1EE4 }, .{ 0x1EE6, 0x1EE6 }, .{ 0x1EE8, 0x1EE8 }, .{ 0x1EEA, 0x1EEA }, .{ 0x1EEC, 0x1EEC }, .{ 0x1EEE, 0x1EEE }, .{ 0x1EF0, 0x1EF0 }, .{ 0x1EF2, 0x1EF2 }, .{ 0x1EF4, 0x1EF4 }, .{ 0x1EF6, 0x1EF6 }, .{ 0x1EF8, 0x1EF8 }, .{ 0x1EFA, 0x1EFA }, .{ 0x1EFC, 0x1EFC }, .{ 0x1EFE, 0x1EFE }, .{ 0x2018, 0x201A }, .{ 0x201C, 0x201E }, .{ 0x2329, 0x232A }, .{ 0x27E6, 0x27E7 }, .{ 0x2C60, 0x2C60 }, .{ 0x2C62, 0x2C64 }, .{ 0x2C67, 0x2C67 }, .{ 0x2C69, 0x2C69 }, .{ 0x2C6B, 0x2C6B }, .{ 0x2C6D, 0x2C70 }, .{ 0x2C72, 0x2C72 }, .{ 0x2C75, 0x2C75 }, .{ 0x2C7E, 0x2C7F }, .{ 0x2E80, 0x2FDF }, .{ 0x2FF0, 0x30FF }, .{ 0x31C0, 0x31EF }, .{ 0x3200, 0x4DBF }, .{ 0x4E00, 0x9FFF }, .{ 0xA722, 0xA722 }, .{ 0xA724, 0xA724 }, .{ 0xA726, 0xA726 }, .{ 0xA728, 0xA728 }, .{ 0xA72A, 0xA72A }, .{ 0xA72C, 0xA72C }, .{ 0xA72E, 0xA72E }, .{ 0xA732, 0xA732 }, .{ 0xA734, 0xA734 }, .{ 0xA736, 0xA736 }, .{ 0xA738, 0xA738 }, .{ 0xA73A, 0xA73A }, .{ 0xA73C, 0xA73C }, .{ 0xA73E, 0xA73E }, .{ 0xA740, 0xA740 }, .{ 0xA742, 0xA742 }, .{ 0xA744, 0xA744 }, .{ 0xA746, 0xA746 }, .{ 0xA748, 0xA748 }, .{ 0xA74A, 0xA74A }, .{ 0xA74C, 0xA74C }, .{ 0xA74E, 0xA74E }, .{ 0xA750, 0xA750 }, .{ 0xA752, 0xA752 }, .{ 0xA754, 0xA754 }, .{ 0xA756, 0xA756 }, .{ 0xA758, 0xA758 }, .{ 0xA75A, 0xA75A }, .{ 0xA75C, 0xA75C }, .{ 0xA75E, 0xA75E }, .{ 0xA760, 0xA760 }, .{ 0xA762, 0xA762 }, .{ 0xA764, 0xA764 }, .{ 0xA766, 0xA766 }, .{ 0xA768, 0xA768 }, .{ 0xA76A, 0xA76A }, .{ 0xA76C, 0xA76C }, .{ 0xA76E, 0xA76E }, .{ 0xA779, 0xA779 }, .{ 0xA77B, 0xA77B }, .{ 0xA77D, 0xA77E }, .{ 0xA780, 0xA780 }, .{ 0xA782, 0xA782 }, .{ 0xA784, 0xA784 }, .{ 0xA786, 0xA786 }, .{ 0xA78B, 0xA78B }, .{ 0xA78D, 0xA78D }, .{ 0xA790, 0xA790 }, .{ 0xA792, 0xA792 }, .{ 0xA796, 0xA796 }, .{ 0xA798, 0xA798 }, .{ 0xA79A, 0xA79A }, .{ 0xA79C, 0xA79C }, .{ 0xA79E, 0xA79E }, .{ 0xA7A0, 0xA7A0 }, .{ 0xA7A2, 0xA7A2 }, .{ 0xA7A4, 0xA7A4 }, .{ 0xA7A6, 0xA7A6 }, .{ 0xA7A8, 0xA7A8 }, .{ 0xA7AA, 0xA7AE }, .{ 0xA7B0, 0xA7B4 }, .{ 0xA7B6, 0xA7B6 }, .{ 0xA7B8, 0xA7B8 }, .{ 0xAC00, 0xD7AF }, .{ 0xF900, 0xFAFF }, .{ 0xFB1D, 0xFBB1 }, .{ 0xFBD3, 0xFD3D }, .{ 0xFD50, 0xFDC7 }, .{ 0xFDF0, 0xFDFB }, .{ 0xFE30, 0xFE4F }, .{ 0xFE70, 0xFEFC }, .{ 0xFF08, 0xFF09 }, .{ 0xFF21, 0xFF3A }, .{ 0x1EE00, 0x1EEBB }, .{ 0x1F200, 0x1F2FF }, .{ 0x20000, 0x2A6DF }, .{ 0x2A700, 0x2EBEF }, .{ 0x2F800, 0x2FA1F }, }; pub fn is_infix_4_ahead(c: u21) bool { return rangeContains(&is_infix_4_ahead_ranges, c); } pub const is_infix_4_behind_ranges = [_][2]u21{ .{ 0x0022, 0x0022 }, .{ 0x0027, 0x0027 }, .{ 0x002C, 0x002C }, .{ 0x0060, 0x007A }, .{ 0x00AB, 0x00AB }, .{ 0x00B4, 0x00B4 }, .{ 0x00BB, 0x00BB }, .{ 0x00DF, 0x00F6 }, .{ 0x00F8, 0x00FF }, .{ 0x0101, 0x0101 }, .{ 0x0103, 0x0103 }, .{ 0x0105, 0x0105 }, .{ 0x0107, 0x0107 }, .{ 0x0109, 0x0109 }, .{ 0x010B, 0x010B }, .{ 0x010D, 0x010D }, .{ 0x010F, 0x010F }, .{ 0x0111, 0x0111 }, .{ 0x0113, 0x0113 }, .{ 0x0115, 0x0115 }, .{ 0x0117, 0x0117 }, .{ 0x0119, 0x0119 }, .{ 0x011B, 0x011B }, .{ 0x011D, 0x011D }, .{ 0x011F, 0x011F }, .{ 0x0121, 0x0121 }, .{ 0x0123, 0x0123 }, .{ 0x0125, 0x0125 }, .{ 0x0127, 0x0127 }, .{ 0x0129, 0x0129 }, .{ 0x012B, 0x012B }, .{ 0x012D, 0x012D }, .{ 0x012F, 0x012F }, .{ 0x0131, 0x0131 }, .{ 0x0133, 0x0133 }, .{ 0x0135, 0x0135 }, .{ 0x0137, 0x0138 }, .{ 0x013A, 0x013A }, .{ 0x013C, 0x013C }, .{ 0x013E, 0x013E }, .{ 0x0140, 0x0140 }, .{ 0x0142, 0x0142 }, .{ 0x0144, 0x0144 }, .{ 0x0146, 0x0146 }, .{ 0x0148, 0x0149 }, .{ 0x014B, 0x014B }, .{ 0x014D, 0x014D }, .{ 0x014F, 0x014F }, .{ 0x0151, 0x0151 }, .{ 0x0153, 0x0153 }, .{ 0x0155, 0x0155 }, .{ 0x0157, 0x0157 }, .{ 0x0159, 0x0159 }, .{ 0x015B, 0x015B }, .{ 0x015D, 0x015D }, .{ 0x015F, 0x015F }, .{ 0x0161, 0x0161 }, .{ 0x0163, 0x0163 }, .{ 0x0165, 0x0165 }, .{ 0x0167, 0x0167 }, .{ 0x0169, 0x0169 }, .{ 0x016B, 0x016B }, .{ 0x016D, 0x016D }, .{ 0x016F, 0x016F }, .{ 0x0171, 0x0171 }, .{ 0x0173, 0x0173 }, .{ 0x0175, 0x0175 }, .{ 0x0177, 0x0177 }, .{ 0x017A, 0x017A }, .{ 0x017C, 0x017C }, .{ 0x017E, 0x0180 }, .{ 0x0183, 0x0183 }, .{ 0x0185, 0x0185 }, .{ 0x0188, 0x0188 }, .{ 0x018C, 0x018D }, .{ 0x0192, 0x0192 }, .{ 0x0195, 0x0195 }, .{ 0x0199, 0x019B }, .{ 0x019E, 0x019E }, .{ 0x01A1, 0x01A1 }, .{ 0x01A3, 0x01A3 }, .{ 0x01A5, 0x01A5 }, .{ 0x01A8, 0x01A8 }, .{ 0x01AA, 0x01AB }, .{ 0x01AD, 0x01AD }, .{ 0x01B0, 0x01B0 }, .{ 0x01B4, 0x01B4 }, .{ 0x01B6, 0x01B6 }, .{ 0x01B9, 0x01BA }, .{ 0x01BD, 0x01BF }, .{ 0x01C6, 0x01C6 }, .{ 0x01C9, 0x01C9 }, .{ 0x01CC, 0x01CC }, .{ 0x01CE, 0x01CE }, .{ 0x01D0, 0x01D0 }, .{ 0x01D2, 0x01D2 }, .{ 0x01D4, 0x01D4 }, .{ 0x01D6, 0x01D6 }, .{ 0x01D8, 0x01D8 }, .{ 0x01DA, 0x01DA }, .{ 0x01DC, 0x01DD }, .{ 0x01DF, 0x01DF }, .{ 0x01E1, 0x01E1 }, .{ 0x01E3, 0x01E3 }, .{ 0x01E5, 0x01E5 }, .{ 0x01E7, 0x01E7 }, .{ 0x01E9, 0x01E9 }, .{ 0x01EB, 0x01EB }, .{ 0x01ED, 0x01ED }, .{ 0x01EF, 0x01F0 }, .{ 0x01F3, 0x01F3 }, .{ 0x01F5, 0x01F5 }, .{ 0x01F9, 0x01F9 }, .{ 0x01FB, 0x01FB }, .{ 0x01FD, 0x01FD }, .{ 0x01FF, 0x01FF }, .{ 0x0201, 0x0201 }, .{ 0x0203, 0x0203 }, .{ 0x0205, 0x0205 }, .{ 0x0207, 0x0207 }, .{ 0x0209, 0x0209 }, .{ 0x020B, 0x020B }, .{ 0x020D, 0x020D }, .{ 0x020F, 0x020F }, .{ 0x0211, 0x0211 }, .{ 0x0213, 0x0213 }, .{ 0x0215, 0x0215 }, .{ 0x0217, 0x0217 }, .{ 0x0219, 0x0219 }, .{ 0x021B, 0x021B }, .{ 0x021D, 0x021D }, .{ 0x021F, 0x021F }, .{ 0x0221, 0x0221 }, .{ 0x0223, 0x0223 }, .{ 0x0225, 0x0225 }, .{ 0x0227, 0x0227 }, .{ 0x0229, 0x0229 }, .{ 0x022B, 0x022B }, .{ 0x022D, 0x022D }, .{ 0x022F, 0x022F }, .{ 0x0231, 0x0231 }, .{ 0x0233, 0x0239 }, .{ 0x023C, 0x023C }, .{ 0x023F, 0x0240 }, .{ 0x0242, 0x0242 }, .{ 0x0247, 0x0247 }, .{ 0x0249, 0x0249 }, .{ 0x024B, 0x024B }, .{ 0x024D, 0x024D }, .{ 0x024F, 0x02AF }, .{ 0x03AC, 0x03AF }, .{ 0x03B1, 0x03C9 }, .{ 0x03CC, 0x03CE }, .{ 0x0430, 0x0451 }, .{ 0x0453, 0x045A }, .{ 0x045C, 0x045D }, .{ 0x0491, 0x0491 }, .{ 0x0497, 0x0497 }, .{ 0x04A3, 0x04A3 }, .{ 0x04AF, 0x04AF }, .{ 0x04BB, 0x04BB }, .{ 0x04D9, 0x04D9 }, .{ 0x04E9, 0x04E9 }, .{ 0x0591, 0x05F4 }, .{ 0x0620, 0x064A }, .{ 0x066E, 0x06D5 }, .{ 0x06E5, 0x06FF }, .{ 0x0750, 0x077F }, .{ 0x08A0, 0x08BD }, .{ 0x0900, 0x09FF }, .{ 0x0B80, 0x0CFF }, .{ 0x0D80, 0x0DFF }, .{ 0x1100, 0x137F }, .{ 0x1D00, 0x1D25 }, .{ 0x1D6B, 0x1D77 }, .{ 0x1D79, 0x1D9A }, .{ 0x1E01, 0x1E01 }, .{ 0x1E03, 0x1E03 }, .{ 0x1E05, 0x1E05 }, .{ 0x1E07, 0x1E07 }, .{ 0x1E09, 0x1E09 }, .{ 0x1E0B, 0x1E0B }, .{ 0x1E0D, 0x1E0D }, .{ 0x1E0F, 0x1E0F }, .{ 0x1E11, 0x1E11 }, .{ 0x1E13, 0x1E13 }, .{ 0x1E15, 0x1E15 }, .{ 0x1E17, 0x1E17 }, .{ 0x1E19, 0x1E19 }, .{ 0x1E1B, 0x1E1B }, .{ 0x1E1D, 0x1E1D }, .{ 0x1E1F, 0x1E1F }, .{ 0x1E21, 0x1E21 }, .{ 0x1E23, 0x1E23 }, .{ 0x1E25, 0x1E25 }, .{ 0x1E27, 0x1E27 }, .{ 0x1E29, 0x1E29 }, .{ 0x1E2B, 0x1E2B }, .{ 0x1E2D, 0x1E2D }, .{ 0x1E2F, 0x1E2F }, .{ 0x1E31, 0x1E31 }, .{ 0x1E33, 0x1E33 }, .{ 0x1E35, 0x1E35 }, .{ 0x1E37, 0x1E37 }, .{ 0x1E39, 0x1E39 }, .{ 0x1E3B, 0x1E3B }, .{ 0x1E3D, 0x1E3D }, .{ 0x1E3F, 0x1E3F }, .{ 0x1E41, 0x1E41 }, .{ 0x1E43, 0x1E43 }, .{ 0x1E45, 0x1E45 }, .{ 0x1E47, 0x1E47 }, .{ 0x1E49, 0x1E49 }, .{ 0x1E4B, 0x1E4B }, .{ 0x1E4D, 0x1E4D }, .{ 0x1E4F, 0x1E4F }, .{ 0x1E51, 0x1E51 }, .{ 0x1E53, 0x1E53 }, .{ 0x1E55, 0x1E55 }, .{ 0x1E57, 0x1E57 }, .{ 0x1E59, 0x1E59 }, .{ 0x1E5B, 0x1E5B }, .{ 0x1E5D, 0x1E5D }, .{ 0x1E5F, 0x1E5F }, .{ 0x1E61, 0x1E61 }, .{ 0x1E63, 0x1E63 }, .{ 0x1E65, 0x1E65 }, .{ 0x1E67, 0x1E67 }, .{ 0x1E69, 0x1E69 }, .{ 0x1E6B, 0x1E6B }, .{ 0x1E6D, 0x1E6D }, .{ 0x1E6F, 0x1E6F }, .{ 0x1E71, 0x1E71 }, .{ 0x1E73, 0x1E73 }, .{ 0x1E75, 0x1E75 }, .{ 0x1E77, 0x1E77 }, .{ 0x1E79, 0x1E79 }, .{ 0x1E7B, 0x1E7B }, .{ 0x1E7D, 0x1E7D }, .{ 0x1E7F, 0x1E7F }, .{ 0x1E81, 0x1E81 }, .{ 0x1E83, 0x1E83 }, .{ 0x1E85, 0x1E85 }, .{ 0x1E87, 0x1E87 }, .{ 0x1E89, 0x1E89 }, .{ 0x1E8B, 0x1E8B }, .{ 0x1E8D, 0x1E8D }, .{ 0x1E8F, 0x1E8F }, .{ 0x1E91, 0x1E91 }, .{ 0x1E93, 0x1E93 }, .{ 0x1E95, 0x1E9D }, .{ 0x1E9F, 0x1E9F }, .{ 0x1EA1, 0x1EA1 }, .{ 0x1EA3, 0x1EA3 }, .{ 0x1EA5, 0x1EA5 }, .{ 0x1EA7, 0x1EA7 }, .{ 0x1EA9, 0x1EA9 }, .{ 0x1EAB, 0x1EAB }, .{ 0x1EAD, 0x1EAD }, .{ 0x1EAF, 0x1EAF }, .{ 0x1EB1, 0x1EB1 }, .{ 0x1EB3, 0x1EB3 }, .{ 0x1EB5, 0x1EB5 }, .{ 0x1EB7, 0x1EB7 }, .{ 0x1EB9, 0x1EB9 }, .{ 0x1EBB, 0x1EBB }, .{ 0x1EBD, 0x1EBD }, .{ 0x1EBF, 0x1EBF }, .{ 0x1EC1, 0x1EC1 }, .{ 0x1EC3, 0x1EC3 }, .{ 0x1EC5, 0x1EC5 }, .{ 0x1EC7, 0x1EC7 }, .{ 0x1EC9, 0x1EC9 }, .{ 0x1ECB, 0x1ECB }, .{ 0x1ECD, 0x1ECD }, .{ 0x1ECF, 0x1ECF }, .{ 0x1ED1, 0x1ED1 }, .{ 0x1ED3, 0x1ED3 }, .{ 0x1ED5, 0x1ED5 }, .{ 0x1ED7, 0x1ED7 }, .{ 0x1ED9, 0x1ED9 }, .{ 0x1EDB, 0x1EDB }, .{ 0x1EDD, 0x1EDD }, .{ 0x1EDF, 0x1EDF }, .{ 0x1EE1, 0x1EE1 }, .{ 0x1EE3, 0x1EE3 }, .{ 0x1EE5, 0x1EE5 }, .{ 0x1EE7, 0x1EE7 }, .{ 0x1EE9, 0x1EE9 }, .{ 0x1EEB, 0x1EEB }, .{ 0x1EED, 0x1EED }, .{ 0x1EEF, 0x1EEF }, .{ 0x1EF1, 0x1EF1 }, .{ 0x1EF3, 0x1EF3 }, .{ 0x1EF5, 0x1EF5 }, .{ 0x1EF7, 0x1EF7 }, .{ 0x1EF9, 0x1EF9 }, .{ 0x1EFB, 0x1EFB }, .{ 0x1EFD, 0x1EFD }, .{ 0x1EFF, 0x1EFF }, .{ 0x2018, 0x201A }, .{ 0x201C, 0x201E }, .{ 0x2329, 0x232A }, .{ 0x27E6, 0x27E7 }, .{ 0x2C61, 0x2C61 }, .{ 0x2C65, 0x2C66 }, .{ 0x2C68, 0x2C68 }, .{ 0x2C6A, 0x2C6A }, .{ 0x2C6C, 0x2C6C }, .{ 0x2C71, 0x2C71 }, .{ 0x2C73, 0x2C74 }, .{ 0x2C76, 0x2C7B }, .{ 0x2E80, 0x2FDF }, .{ 0x2FF0, 0x30FF }, .{ 0x31C0, 0x31EF }, .{ 0x3200, 0x4DBF }, .{ 0x4E00, 0x9FFF }, .{ 0xA723, 0xA723 }, .{ 0xA725, 0xA725 }, .{ 0xA727, 0xA727 }, .{ 0xA729, 0xA729 }, .{ 0xA72B, 0xA72B }, .{ 0xA72D, 0xA72D }, .{ 0xA72F, 0xA731 }, .{ 0xA733, 0xA733 }, .{ 0xA735, 0xA735 }, .{ 0xA737, 0xA737 }, .{ 0xA739, 0xA739 }, .{ 0xA73B, 0xA73B }, .{ 0xA73D, 0xA73D }, .{ 0xA73F, 0xA73F }, .{ 0xA741, 0xA741 }, .{ 0xA743, 0xA743 }, .{ 0xA745, 0xA745 }, .{ 0xA747, 0xA747 }, .{ 0xA749, 0xA749 }, .{ 0xA74B, 0xA74B }, .{ 0xA74D, 0xA74D }, .{ 0xA74F, 0xA74F }, .{ 0xA751, 0xA751 }, .{ 0xA753, 0xA753 }, .{ 0xA755, 0xA755 }, .{ 0xA757, 0xA757 }, .{ 0xA759, 0xA759 }, .{ 0xA75B, 0xA75B }, .{ 0xA75D, 0xA75D }, .{ 0xA75F, 0xA75F }, .{ 0xA761, 0xA761 }, .{ 0xA763, 0xA763 }, .{ 0xA765, 0xA765 }, .{ 0xA767, 0xA767 }, .{ 0xA769, 0xA769 }, .{ 0xA76B, 0xA76B }, .{ 0xA76D, 0xA76D }, .{ 0xA76F, 0xA76F }, .{ 0xA771, 0xA778 }, .{ 0xA77A, 0xA77A }, .{ 0xA77C, 0xA77C }, .{ 0xA77F, 0xA77F }, .{ 0xA781, 0xA781 }, .{ 0xA783, 0xA783 }, .{ 0xA785, 0xA785 }, .{ 0xA787, 0xA787 }, .{ 0xA78C, 0xA78C }, .{ 0xA78E, 0xA78E }, .{ 0xA791, 0xA791 }, .{ 0xA793, 0xA795 }, .{ 0xA797, 0xA797 }, .{ 0xA799, 0xA799 }, .{ 0xA79B, 0xA79B }, .{ 0xA79D, 0xA79D }, .{ 0xA79F, 0xA79F }, .{ 0xA7A1, 0xA7A1 }, .{ 0xA7A3, 0xA7A3 }, .{ 0xA7A5, 0xA7A5 }, .{ 0xA7A7, 0xA7A7 }, .{ 0xA7A9, 0xA7A9 }, .{ 0xA7AF, 0xA7AF }, .{ 0xA7B5, 0xA7B5 }, .{ 0xA7B7, 0xA7B7 }, .{ 0xA7B9, 0xA7B9 }, .{ 0xA7FA, 0xA7FA }, .{ 0xAB30, 0xAB5A }, .{ 0xAB60, 0xAB64 }, .{ 0xAC00, 0xD7AF }, .{ 0xF900, 0xFAFF }, .{ 0xFB1D, 0xFBB1 }, .{ 0xFBD3, 0xFD3D }, .{ 0xFD50, 0xFDC7 }, .{ 0xFDF0, 0xFDFB }, .{ 0xFE30, 0xFE4F }, .{ 0xFE70, 0xFEFC }, .{ 0xFF08, 0xFF09 }, .{ 0xFF41, 0xFF5A }, .{ 0x1EE00, 0x1EEBB }, .{ 0x1F200, 0x1F2FF }, .{ 0x20000, 0x2A6DF }, .{ 0x2A700, 0x2EBEF }, .{ 0x2F800, 0x2FA1F }, }; pub fn is_infix_4_behind(c: u21) bool { return rangeContains(&is_infix_4_behind_ranges, c); } pub const is_infix_5_ahead_ranges = [_][2]u21{ .{ 0x0041, 0x005A }, .{ 0x0061, 0x007A }, .{ 0x00C0, 0x00D6 }, .{ 0x00D8, 0x00F6 }, .{ 0x00F8, 0x01BF }, .{ 0x01C4, 0x02AF }, .{ 0x0386, 0x0386 }, .{ 0x0388, 0x038A }, .{ 0x038C, 0x038C }, .{ 0x038E, 0x038F }, .{ 0x0391, 0x03A9 }, .{ 0x03AC, 0x03AF }, .{ 0x03B1, 0x03C9 }, .{ 0x03CC, 0x03CE }, .{ 0x0400, 0x0401 }, .{ 0x0403, 0x040A }, .{ 0x040C, 0x040D }, .{ 0x0410, 0x0451 }, .{ 0x0453, 0x045A }, .{ 0x045C, 0x045D }, .{ 0x0490, 0x0491 }, .{ 0x0496, 0x0497 }, .{ 0x04A2, 0x04A3 }, .{ 0x04AE, 0x04AF }, .{ 0x04BA, 0x04BB }, .{ 0x04D8, 0x04D9 }, .{ 0x04E8, 0x04E9 }, .{ 0x0591, 0x05F4 }, .{ 0x0620, 0x064A }, .{ 0x066E, 0x06D5 }, .{ 0x06E5, 0x06FF }, .{ 0x0750, 0x077F }, .{ 0x08A0, 0x08BD }, .{ 0x0900, 0x09FF }, .{ 0x0B80, 0x0CFF }, .{ 0x0D80, 0x0DFF }, .{ 0x1100, 0x137F }, .{ 0x1D00, 0x1D25 }, .{ 0x1D6B, 0x1D77 }, .{ 0x1D79, 0x1D9A }, .{ 0x1E00, 0x1EFF }, .{ 0x2C60, 0x2C7B }, .{ 0x2C7E, 0x2C7F }, .{ 0x2E80, 0x2FDF }, .{ 0x2FF0, 0x30FF }, .{ 0x31C0, 0x31EF }, .{ 0x3200, 0x4DBF }, .{ 0x4E00, 0x9FFF }, .{ 0xA722, 0xA76F }, .{ 0xA771, 0xA787 }, .{ 0xA78B, 0xA78E }, .{ 0xA790, 0xA7B9 }, .{ 0xA7FA, 0xA7FA }, .{ 0xAB30, 0xAB5A }, .{ 0xAB60, 0xAB64 }, .{ 0xAC00, 0xD7AF }, .{ 0xF900, 0xFAFF }, .{ 0xFB1D, 0xFBB1 }, .{ 0xFBD3, 0xFD3D }, .{ 0xFD50, 0xFDC7 }, .{ 0xFDF0, 0xFDFB }, .{ 0xFE30, 0xFE4F }, .{ 0xFE70, 0xFEFC }, .{ 0xFF21, 0xFF3A }, .{ 0xFF41, 0xFF5A }, .{ 0x1EE00, 0x1EEBB }, .{ 0x1F200, 0x1F2FF }, .{ 0x20000, 0x2A6DF }, .{ 0x2A700, 0x2EBEF }, .{ 0x2F800, 0x2FA1F }, }; pub fn is_infix_5_ahead(c: u21) bool { return rangeContains(&is_infix_5_ahead_ranges, c); } pub const is_infix_5_behind_ranges = [_][2]u21{ .{ 0x0041, 0x005A }, .{ 0x0061, 0x007A }, .{ 0x00C0, 0x00D6 }, .{ 0x00D8, 0x00F6 }, .{ 0x00F8, 0x01BF }, .{ 0x01C4, 0x02AF }, .{ 0x0386, 0x0386 }, .{ 0x0388, 0x038A }, .{ 0x038C, 0x038C }, .{ 0x038E, 0x038F }, .{ 0x0391, 0x03A9 }, .{ 0x03AC, 0x03AF }, .{ 0x03B1, 0x03C9 }, .{ 0x03CC, 0x03CE }, .{ 0x0400, 0x0401 }, .{ 0x0403, 0x040A }, .{ 0x040C, 0x040D }, .{ 0x0410, 0x0451 }, .{ 0x0453, 0x045A }, .{ 0x045C, 0x045D }, .{ 0x0490, 0x0491 }, .{ 0x0496, 0x0497 }, .{ 0x04A2, 0x04A3 }, .{ 0x04AE, 0x04AF }, .{ 0x04BA, 0x04BB }, .{ 0x04D8, 0x04D9 }, .{ 0x04E8, 0x04E9 }, .{ 0x0591, 0x05F4 }, .{ 0x0620, 0x064A }, .{ 0x066E, 0x06D5 }, .{ 0x06E5, 0x06FF }, .{ 0x0750, 0x077F }, .{ 0x08A0, 0x08BD }, .{ 0x0900, 0x09FF }, .{ 0x0B80, 0x0CFF }, .{ 0x0D80, 0x0DFF }, .{ 0x1100, 0x137F }, .{ 0x1D00, 0x1D25 }, .{ 0x1D6B, 0x1D77 }, .{ 0x1D79, 0x1D9A }, .{ 0x1E00, 0x1EFF }, .{ 0x2C60, 0x2C7B }, .{ 0x2C7E, 0x2C7F }, .{ 0x2E80, 0x2FDF }, .{ 0x2FF0, 0x30FF }, .{ 0x31C0, 0x31EF }, .{ 0x3200, 0x4DBF }, .{ 0x4E00, 0x9FFF }, .{ 0xA722, 0xA76F }, .{ 0xA771, 0xA787 }, .{ 0xA78B, 0xA78E }, .{ 0xA790, 0xA7B9 }, .{ 0xA7FA, 0xA7FA }, .{ 0xAB30, 0xAB5A }, .{ 0xAB60, 0xAB64 }, .{ 0xAC00, 0xD7AF }, .{ 0xF900, 0xFAFF }, .{ 0xFB1D, 0xFBB1 }, .{ 0xFBD3, 0xFD3D }, .{ 0xFD50, 0xFDC7 }, .{ 0xFDF0, 0xFDFB }, .{ 0xFE30, 0xFE4F }, .{ 0xFE70, 0xFEFC }, .{ 0xFF21, 0xFF3A }, .{ 0xFF41, 0xFF5A }, .{ 0x1EE00, 0x1EEBB }, .{ 0x1F200, 0x1F2FF }, .{ 0x20000, 0x2A6DF }, .{ 0x2A700, 0x2EBEF }, .{ 0x2F800, 0x2FA1F }, }; pub fn is_infix_5_behind(c: u21) bool { return rangeContains(&is_infix_5_behind_ranges, c); } pub const is_infix_6_ahead_ranges = [_][2]u21{ .{ 0x0041, 0x005A }, .{ 0x0061, 0x007A }, .{ 0x00C0, 0x00D6 }, .{ 0x00D8, 0x00F6 }, .{ 0x00F8, 0x01BF }, .{ 0x01C4, 0x02AF }, .{ 0x0386, 0x0386 }, .{ 0x0388, 0x038A }, .{ 0x038C, 0x038C }, .{ 0x038E, 0x038F }, .{ 0x0391, 0x03A9 }, .{ 0x03AC, 0x03AF }, .{ 0x03B1, 0x03C9 }, .{ 0x03CC, 0x03CE }, .{ 0x0400, 0x0401 }, .{ 0x0403, 0x040A }, .{ 0x040C, 0x040D }, .{ 0x0410, 0x0451 }, .{ 0x0453, 0x045A }, .{ 0x045C, 0x045D }, .{ 0x0490, 0x0491 }, .{ 0x0496, 0x0497 }, .{ 0x04A2, 0x04A3 }, .{ 0x04AE, 0x04AF }, .{ 0x04BA, 0x04BB }, .{ 0x04D8, 0x04D9 }, .{ 0x04E8, 0x04E9 }, .{ 0x0591, 0x05F4 }, .{ 0x0620, 0x064A }, .{ 0x066E, 0x06D5 }, .{ 0x06E5, 0x06FF }, .{ 0x0750, 0x077F }, .{ 0x08A0, 0x08BD }, .{ 0x0900, 0x09FF }, .{ 0x0B80, 0x0CFF }, .{ 0x0D80, 0x0DFF }, .{ 0x1100, 0x137F }, .{ 0x1D00, 0x1D25 }, .{ 0x1D6B, 0x1D77 }, .{ 0x1D79, 0x1D9A }, .{ 0x1E00, 0x1EFF }, .{ 0x2C60, 0x2C7B }, .{ 0x2C7E, 0x2C7F }, .{ 0x2E80, 0x2FDF }, .{ 0x2FF0, 0x30FF }, .{ 0x31C0, 0x31EF }, .{ 0x3200, 0x4DBF }, .{ 0x4E00, 0x9FFF }, .{ 0xA722, 0xA76F }, .{ 0xA771, 0xA787 }, .{ 0xA78B, 0xA78E }, .{ 0xA790, 0xA7B9 }, .{ 0xA7FA, 0xA7FA }, .{ 0xAB30, 0xAB5A }, .{ 0xAB60, 0xAB64 }, .{ 0xAC00, 0xD7AF }, .{ 0xF900, 0xFAFF }, .{ 0xFB1D, 0xFBB1 }, .{ 0xFBD3, 0xFD3D }, .{ 0xFD50, 0xFDC7 }, .{ 0xFDF0, 0xFDFB }, .{ 0xFE30, 0xFE4F }, .{ 0xFE70, 0xFEFC }, .{ 0xFF21, 0xFF3A }, .{ 0xFF41, 0xFF5A }, .{ 0x1EE00, 0x1EEBB }, .{ 0x1F200, 0x1F2FF }, .{ 0x20000, 0x2A6DF }, .{ 0x2A700, 0x2EBEF }, .{ 0x2F800, 0x2FA1F }, }; pub fn is_infix_6_ahead(c: u21) bool { return rangeContains(&is_infix_6_ahead_ranges, c); } pub const is_infix_6_behind_ranges = [_][2]u21{ .{ 0x0030, 0x0039 }, .{ 0x0041, 0x005A }, .{ 0x0061, 0x007A }, .{ 0x00C0, 0x00D6 }, .{ 0x00D8, 0x00F6 }, .{ 0x00F8, 0x01BF }, .{ 0x01C4, 0x02AF }, .{ 0x0386, 0x0386 }, .{ 0x0388, 0x038A }, .{ 0x038C, 0x038C }, .{ 0x038E, 0x038F }, .{ 0x0391, 0x03A9 }, .{ 0x03AC, 0x03AF }, .{ 0x03B1, 0x03C9 }, .{ 0x03CC, 0x03CE }, .{ 0x0400, 0x0401 }, .{ 0x0403, 0x040A }, .{ 0x040C, 0x040D }, .{ 0x0410, 0x0451 }, .{ 0x0453, 0x045A }, .{ 0x045C, 0x045D }, .{ 0x0490, 0x0491 }, .{ 0x0496, 0x0497 }, .{ 0x04A2, 0x04A3 }, .{ 0x04AE, 0x04AF }, .{ 0x04BA, 0x04BB }, .{ 0x04D8, 0x04D9 }, .{ 0x04E8, 0x04E9 }, .{ 0x0591, 0x05F4 }, .{ 0x0620, 0x064A }, .{ 0x066E, 0x06D5 }, .{ 0x06E5, 0x06FF }, .{ 0x0750, 0x077F }, .{ 0x08A0, 0x08BD }, .{ 0x0900, 0x09FF }, .{ 0x0B80, 0x0CFF }, .{ 0x0D80, 0x0DFF }, .{ 0x1100, 0x137F }, .{ 0x1D00, 0x1D25 }, .{ 0x1D6B, 0x1D77 }, .{ 0x1D79, 0x1D9A }, .{ 0x1E00, 0x1EFF }, .{ 0x2C60, 0x2C7B }, .{ 0x2C7E, 0x2C7F }, .{ 0x2E80, 0x2FDF }, .{ 0x2FF0, 0x30FF }, .{ 0x31C0, 0x31EF }, .{ 0x3200, 0x4DBF }, .{ 0x4E00, 0x9FFF }, .{ 0xA722, 0xA76F }, .{ 0xA771, 0xA787 }, .{ 0xA78B, 0xA78E }, .{ 0xA790, 0xA7B9 }, .{ 0xA7FA, 0xA7FA }, .{ 0xAB30, 0xAB5A }, .{ 0xAB60, 0xAB64 }, .{ 0xAC00, 0xD7AF }, .{ 0xF900, 0xFAFF }, .{ 0xFB1D, 0xFBB1 }, .{ 0xFBD3, 0xFD3D }, .{ 0xFD50, 0xFDC7 }, .{ 0xFDF0, 0xFDFB }, .{ 0xFE30, 0xFE4F }, .{ 0xFE70, 0xFEFC }, .{ 0xFF21, 0xFF3A }, .{ 0xFF41, 0xFF5A }, .{ 0x1EE00, 0x1EEBB }, .{ 0x1F200, 0x1F2FF }, .{ 0x20000, 0x2A6DF }, .{ 0x2A700, 0x2EBEF }, .{ 0x2F800, 0x2FA1F }, }; pub fn is_infix_6_behind(c: u21) bool { return rangeContains(&is_infix_6_behind_ranges, c); } pub const is_infix_7_ahead_ranges = [_][2]u21{ .{ 0x0041, 0x005A }, .{ 0x0061, 0x007A }, .{ 0x00C0, 0x00D6 }, .{ 0x00D8, 0x00F6 }, .{ 0x00F8, 0x01BF }, .{ 0x01C4, 0x02AF }, .{ 0x0386, 0x0386 }, .{ 0x0388, 0x038A }, .{ 0x038C, 0x038C }, .{ 0x038E, 0x038F }, .{ 0x0391, 0x03A9 }, .{ 0x03AC, 0x03AF }, .{ 0x03B1, 0x03C9 }, .{ 0x03CC, 0x03CE }, .{ 0x0400, 0x0401 }, .{ 0x0403, 0x040A }, .{ 0x040C, 0x040D }, .{ 0x0410, 0x0451 }, .{ 0x0453, 0x045A }, .{ 0x045C, 0x045D }, .{ 0x0490, 0x0491 }, .{ 0x0496, 0x0497 }, .{ 0x04A2, 0x04A3 }, .{ 0x04AE, 0x04AF }, .{ 0x04BA, 0x04BB }, .{ 0x04D8, 0x04D9 }, .{ 0x04E8, 0x04E9 }, .{ 0x0591, 0x05F4 }, .{ 0x0620, 0x064A }, .{ 0x066E, 0x06D5 }, .{ 0x06E5, 0x06FF }, .{ 0x0750, 0x077F }, .{ 0x08A0, 0x08BD }, .{ 0x0900, 0x09FF }, .{ 0x0B80, 0x0CFF }, .{ 0x0D80, 0x0DFF }, .{ 0x1100, 0x137F }, .{ 0x1D00, 0x1D25 }, .{ 0x1D6B, 0x1D77 }, .{ 0x1D79, 0x1D9A }, .{ 0x1E00, 0x1EFF }, .{ 0x2C60, 0x2C7B }, .{ 0x2C7E, 0x2C7F }, .{ 0x2E80, 0x2FDF }, .{ 0x2FF0, 0x30FF }, .{ 0x31C0, 0x31EF }, .{ 0x3200, 0x4DBF }, .{ 0x4E00, 0x9FFF }, .{ 0xA722, 0xA76F }, .{ 0xA771, 0xA787 }, .{ 0xA78B, 0xA78E }, .{ 0xA790, 0xA7B9 }, .{ 0xA7FA, 0xA7FA }, .{ 0xAB30, 0xAB5A }, .{ 0xAB60, 0xAB64 }, .{ 0xAC00, 0xD7AF }, .{ 0xF900, 0xFAFF }, .{ 0xFB1D, 0xFBB1 }, .{ 0xFBD3, 0xFD3D }, .{ 0xFD50, 0xFDC7 }, .{ 0xFDF0, 0xFDFB }, .{ 0xFE30, 0xFE4F }, .{ 0xFE70, 0xFEFC }, .{ 0xFF21, 0xFF3A }, .{ 0xFF41, 0xFF5A }, .{ 0x1EE00, 0x1EEBB }, .{ 0x1F200, 0x1F2FF }, .{ 0x20000, 0x2A6DF }, .{ 0x2A700, 0x2EBEF }, .{ 0x2F800, 0x2FA1F }, }; pub fn is_infix_7_ahead(c: u21) bool { return rangeContains(&is_infix_7_ahead_ranges, c); } pub const is_infix_7_behind_ranges = [_][2]u21{ .{ 0x0030, 0x0039 }, .{ 0x0041, 0x005A }, .{ 0x0061, 0x007A }, .{ 0x00C0, 0x00D6 }, .{ 0x00D8, 0x00F6 }, .{ 0x00F8, 0x01BF }, .{ 0x01C4, 0x02AF }, .{ 0x0386, 0x0386 }, .{ 0x0388, 0x038A }, .{ 0x038C, 0x038C }, .{ 0x038E, 0x038F }, .{ 0x0391, 0x03A9 }, .{ 0x03AC, 0x03AF }, .{ 0x03B1, 0x03C9 }, .{ 0x03CC, 0x03CE }, .{ 0x0400, 0x0401 }, .{ 0x0403, 0x040A }, .{ 0x040C, 0x040D }, .{ 0x0410, 0x0451 }, .{ 0x0453, 0x045A }, .{ 0x045C, 0x045D }, .{ 0x0490, 0x0491 }, .{ 0x0496, 0x0497 }, .{ 0x04A2, 0x04A3 }, .{ 0x04AE, 0x04AF }, .{ 0x04BA, 0x04BB }, .{ 0x04D8, 0x04D9 }, .{ 0x04E8, 0x04E9 }, .{ 0x0591, 0x05F4 }, .{ 0x0620, 0x064A }, .{ 0x066E, 0x06D5 }, .{ 0x06E5, 0x06FF }, .{ 0x0750, 0x077F }, .{ 0x08A0, 0x08BD }, .{ 0x0900, 0x09FF }, .{ 0x0B80, 0x0CFF }, .{ 0x0D80, 0x0DFF }, .{ 0x1100, 0x137F }, .{ 0x1D00, 0x1D25 }, .{ 0x1D6B, 0x1D77 }, .{ 0x1D79, 0x1D9A }, .{ 0x1E00, 0x1EFF }, .{ 0x2C60, 0x2C7B }, .{ 0x2C7E, 0x2C7F }, .{ 0x2E80, 0x2FDF }, .{ 0x2FF0, 0x30FF }, .{ 0x31C0, 0x31EF }, .{ 0x3200, 0x4DBF }, .{ 0x4E00, 0x9FFF }, .{ 0xA722, 0xA76F }, .{ 0xA771, 0xA787 }, .{ 0xA78B, 0xA78E }, .{ 0xA790, 0xA7B9 }, .{ 0xA7FA, 0xA7FA }, .{ 0xAB30, 0xAB5A }, .{ 0xAB60, 0xAB64 }, .{ 0xAC00, 0xD7AF }, .{ 0xF900, 0xFAFF }, .{ 0xFB1D, 0xFBB1 }, .{ 0xFBD3, 0xFD3D }, .{ 0xFD50, 0xFDC7 }, .{ 0xFDF0, 0xFDFB }, .{ 0xFE30, 0xFE4F }, .{ 0xFE70, 0xFEFC }, .{ 0xFF21, 0xFF3A }, .{ 0xFF41, 0xFF5A }, .{ 0x1EE00, 0x1EEBB }, .{ 0x1F200, 0x1F2FF }, .{ 0x20000, 0x2A6DF }, .{ 0x2A700, 0x2EBEF }, .{ 0x2F800, 0x2FA1F }, }; pub fn is_infix_7_behind(c: u21) bool { return rangeContains(&is_infix_7_behind_ranges, c); } // ── special cases ── pub const SpecialCase = struct { tokens: [3][]const u8, len: u8, }; pub const specials = std.StaticStringMap(SpecialCase).initComptime(.{ .{ "\t", SpecialCase{ .tokens = .{ "\t", "", "" }, .len = 1 } }, .{ "\n", SpecialCase{ .tokens = .{ "\n", "", "" }, .len = 1 } }, .{ " ", SpecialCase{ .tokens = .{ " ", "", "" }, .len = 1 } }, .{ "'", SpecialCase{ .tokens = .{ "'", "", "" }, .len = 1 } }, .{ "''", SpecialCase{ .tokens = .{ "''", "", "" }, .len = 1 } }, .{ "'Cause", SpecialCase{ .tokens = .{ "'Cause", "", "" }, .len = 1 } }, .{ "'Cos", SpecialCase{ .tokens = .{ "'Cos", "", "" }, .len = 1 } }, .{ "'Coz", SpecialCase{ .tokens = .{ "'Coz", "", "" }, .len = 1 } }, .{ "'Cuz", SpecialCase{ .tokens = .{ "'Cuz", "", "" }, .len = 1 } }, .{ "'S", SpecialCase{ .tokens = .{ "'S", "", "" }, .len = 1 } }, .{ "'bout", SpecialCase{ .tokens = .{ "'bout", "", "" }, .len = 1 } }, .{ "'cause", SpecialCase{ .tokens = .{ "'cause", "", "" }, .len = 1 } }, .{ "'cos", SpecialCase{ .tokens = .{ "'cos", "", "" }, .len = 1 } }, .{ "'coz", SpecialCase{ .tokens = .{ "'coz", "", "" }, .len = 1 } }, .{ "'cuz", SpecialCase{ .tokens = .{ "'cuz", "", "" }, .len = 1 } }, .{ "'d", SpecialCase{ .tokens = .{ "'d", "", "" }, .len = 1 } }, .{ "'em", SpecialCase{ .tokens = .{ "'em", "", "" }, .len = 1 } }, .{ "'ll", SpecialCase{ .tokens = .{ "'ll", "", "" }, .len = 1 } }, .{ "'nuff", SpecialCase{ .tokens = .{ "'nuff", "", "" }, .len = 1 } }, .{ "'re", SpecialCase{ .tokens = .{ "'re", "", "" }, .len = 1 } }, .{ "'s", SpecialCase{ .tokens = .{ "'s", "", "" }, .len = 1 } }, .{ "(*_*)", SpecialCase{ .tokens = .{ "(*_*)", "", "" }, .len = 1 } }, .{ "(-8", SpecialCase{ .tokens = .{ "(-8", "", "" }, .len = 1 } }, .{ "(-:", SpecialCase{ .tokens = .{ "(-:", "", "" }, .len = 1 } }, .{ "(-;", SpecialCase{ .tokens = .{ "(-;", "", "" }, .len = 1 } }, .{ "(-_-)", SpecialCase{ .tokens = .{ "(-_-)", "", "" }, .len = 1 } }, .{ "(._.)", SpecialCase{ .tokens = .{ "(._.)", "", "" }, .len = 1 } }, .{ "(:", SpecialCase{ .tokens = .{ "(:", "", "" }, .len = 1 } }, .{ "(;", SpecialCase{ .tokens = .{ "(;", "", "" }, .len = 1 } }, .{ "(=", SpecialCase{ .tokens = .{ "(=", "", "" }, .len = 1 } }, .{ "(>_<)", SpecialCase{ .tokens = .{ "(>_<)", "", "" }, .len = 1 } }, .{ "(^_^)", SpecialCase{ .tokens = .{ "(^_^)", "", "" }, .len = 1 } }, .{ "(o:", SpecialCase{ .tokens = .{ "(o:", "", "" }, .len = 1 } }, .{ "(\xc2\xac_\xc2\xac)", SpecialCase{ .tokens = .{ "(\xc2\xac_\xc2\xac)", "", "" }, .len = 1 } }, .{ "(\xe0\xb2\xa0_\xe0\xb2\xa0)", SpecialCase{ .tokens = .{ "(\xe0\xb2\xa0_\xe0\xb2\xa0)", "", "" }, .len = 1 } }, .{ "(\xe2\x95\xaf\xc2\xb0\xe2\x96\xa1\xc2\xb0\xef\xbc\x89\xe2\x95\xaf\xef\xb8\xb5\xe2\x94\xbb\xe2\x94\x81\xe2\x94\xbb", SpecialCase{ .tokens = .{ "(\xe2\x95\xaf\xc2\xb0\xe2\x96\xa1\xc2\xb0\xef\xbc\x89\xe2\x95\xaf\xef\xb8\xb5\xe2\x94\xbb\xe2\x94\x81\xe2\x94\xbb", "", "" }, .len = 1 } }, .{ ")-:", SpecialCase{ .tokens = .{ ")-:", "", "" }, .len = 1 } }, .{ "):", SpecialCase{ .tokens = .{ "):", "", "" }, .len = 1 } }, .{ "-_-", SpecialCase{ .tokens = .{ "-_-", "", "" }, .len = 1 } }, .{ "-__-", SpecialCase{ .tokens = .{ "-__-", "", "" }, .len = 1 } }, .{ "._.", SpecialCase{ .tokens = .{ "._.", "", "" }, .len = 1 } }, .{ "0.0", SpecialCase{ .tokens = .{ "0.0", "", "" }, .len = 1 } }, .{ "0.o", SpecialCase{ .tokens = .{ "0.o", "", "" }, .len = 1 } }, .{ "0_0", SpecialCase{ .tokens = .{ "0_0", "", "" }, .len = 1 } }, .{ "0_o", SpecialCase{ .tokens = .{ "0_o", "", "" }, .len = 1 } }, .{ "10a.m.", SpecialCase{ .tokens = .{ "10", "a.m.", "" }, .len = 2 } }, .{ "10am", SpecialCase{ .tokens = .{ "10", "am", "" }, .len = 2 } }, .{ "10p.m.", SpecialCase{ .tokens = .{ "10", "p.m.", "" }, .len = 2 } }, .{ "10pm", SpecialCase{ .tokens = .{ "10", "pm", "" }, .len = 2 } }, .{ "11a.m.", SpecialCase{ .tokens = .{ "11", "a.m.", "" }, .len = 2 } }, .{ "11am", SpecialCase{ .tokens = .{ "11", "am", "" }, .len = 2 } }, .{ "11p.m.", SpecialCase{ .tokens = .{ "11", "p.m.", "" }, .len = 2 } }, .{ "11pm", SpecialCase{ .tokens = .{ "11", "pm", "" }, .len = 2 } }, .{ "12a.m.", SpecialCase{ .tokens = .{ "12", "a.m.", "" }, .len = 2 } }, .{ "12am", SpecialCase{ .tokens = .{ "12", "am", "" }, .len = 2 } }, .{ "12p.m.", SpecialCase{ .tokens = .{ "12", "p.m.", "" }, .len = 2 } }, .{ "12pm", SpecialCase{ .tokens = .{ "12", "pm", "" }, .len = 2 } }, .{ "1a.m.", SpecialCase{ .tokens = .{ "1", "a.m.", "" }, .len = 2 } }, .{ "1am", SpecialCase{ .tokens = .{ "1", "am", "" }, .len = 2 } }, .{ "1p.m.", SpecialCase{ .tokens = .{ "1", "p.m.", "" }, .len = 2 } }, .{ "1pm", SpecialCase{ .tokens = .{ "1", "pm", "" }, .len = 2 } }, .{ "2a.m.", SpecialCase{ .tokens = .{ "2", "a.m.", "" }, .len = 2 } }, .{ "2am", SpecialCase{ .tokens = .{ "2", "am", "" }, .len = 2 } }, .{ "2p.m.", SpecialCase{ .tokens = .{ "2", "p.m.", "" }, .len = 2 } }, .{ "2pm", SpecialCase{ .tokens = .{ "2", "pm", "" }, .len = 2 } }, .{ "3a.m.", SpecialCase{ .tokens = .{ "3", "a.m.", "" }, .len = 2 } }, .{ "3am", SpecialCase{ .tokens = .{ "3", "am", "" }, .len = 2 } }, .{ "3p.m.", SpecialCase{ .tokens = .{ "3", "p.m.", "" }, .len = 2 } }, .{ "3pm", SpecialCase{ .tokens = .{ "3", "pm", "" }, .len = 2 } }, .{ "4a.m.", SpecialCase{ .tokens = .{ "4", "a.m.", "" }, .len = 2 } }, .{ "4am", SpecialCase{ .tokens = .{ "4", "am", "" }, .len = 2 } }, .{ "4p.m.", SpecialCase{ .tokens = .{ "4", "p.m.", "" }, .len = 2 } }, .{ "4pm", SpecialCase{ .tokens = .{ "4", "pm", "" }, .len = 2 } }, .{ "5a.m.", SpecialCase{ .tokens = .{ "5", "a.m.", "" }, .len = 2 } }, .{ "5am", SpecialCase{ .tokens = .{ "5", "am", "" }, .len = 2 } }, .{ "5p.m.", SpecialCase{ .tokens = .{ "5", "p.m.", "" }, .len = 2 } }, .{ "5pm", SpecialCase{ .tokens = .{ "5", "pm", "" }, .len = 2 } }, .{ "6a.m.", SpecialCase{ .tokens = .{ "6", "a.m.", "" }, .len = 2 } }, .{ "6am", SpecialCase{ .tokens = .{ "6", "am", "" }, .len = 2 } }, .{ "6p.m.", SpecialCase{ .tokens = .{ "6", "p.m.", "" }, .len = 2 } }, .{ "6pm", SpecialCase{ .tokens = .{ "6", "pm", "" }, .len = 2 } }, .{ "7a.m.", SpecialCase{ .tokens = .{ "7", "a.m.", "" }, .len = 2 } }, .{ "7am", SpecialCase{ .tokens = .{ "7", "am", "" }, .len = 2 } }, .{ "7p.m.", SpecialCase{ .tokens = .{ "7", "p.m.", "" }, .len = 2 } }, .{ "7pm", SpecialCase{ .tokens = .{ "7", "pm", "" }, .len = 2 } }, .{ "8)", SpecialCase{ .tokens = .{ "8)", "", "" }, .len = 1 } }, .{ "8-)", SpecialCase{ .tokens = .{ "8-)", "", "" }, .len = 1 } }, .{ "8-D", SpecialCase{ .tokens = .{ "8-D", "", "" }, .len = 1 } }, .{ "8D", SpecialCase{ .tokens = .{ "8D", "", "" }, .len = 1 } }, .{ "8a.m.", SpecialCase{ .tokens = .{ "8", "a.m.", "" }, .len = 2 } }, .{ "8am", SpecialCase{ .tokens = .{ "8", "am", "" }, .len = 2 } }, .{ "8p.m.", SpecialCase{ .tokens = .{ "8", "p.m.", "" }, .len = 2 } }, .{ "8pm", SpecialCase{ .tokens = .{ "8", "pm", "" }, .len = 2 } }, .{ "9a.m.", SpecialCase{ .tokens = .{ "9", "a.m.", "" }, .len = 2 } }, .{ "9am", SpecialCase{ .tokens = .{ "9", "am", "" }, .len = 2 } }, .{ "9p.m.", SpecialCase{ .tokens = .{ "9", "p.m.", "" }, .len = 2 } }, .{ "9pm", SpecialCase{ .tokens = .{ "9", "pm", "" }, .len = 2 } }, .{ ":'(", SpecialCase{ .tokens = .{ ":'(", "", "" }, .len = 1 } }, .{ ":')", SpecialCase{ .tokens = .{ ":')", "", "" }, .len = 1 } }, .{ ":'-(", SpecialCase{ .tokens = .{ ":'-(", "", "" }, .len = 1 } }, .{ ":'-)", SpecialCase{ .tokens = .{ ":'-)", "", "" }, .len = 1 } }, .{ ":(", SpecialCase{ .tokens = .{ ":(", "", "" }, .len = 1 } }, .{ ":((", SpecialCase{ .tokens = .{ ":((", "", "" }, .len = 1 } }, .{ ":(((", SpecialCase{ .tokens = .{ ":(((", "", "" }, .len = 1 } }, .{ ":()", SpecialCase{ .tokens = .{ ":()", "", "" }, .len = 1 } }, .{ ":)", SpecialCase{ .tokens = .{ ":)", "", "" }, .len = 1 } }, .{ ":))", SpecialCase{ .tokens = .{ ":))", "", "" }, .len = 1 } }, .{ ":)))", SpecialCase{ .tokens = .{ ":)))", "", "" }, .len = 1 } }, .{ ":*", SpecialCase{ .tokens = .{ ":*", "", "" }, .len = 1 } }, .{ ":-(", SpecialCase{ .tokens = .{ ":-(", "", "" }, .len = 1 } }, .{ ":-((", SpecialCase{ .tokens = .{ ":-((", "", "" }, .len = 1 } }, .{ ":-(((", SpecialCase{ .tokens = .{ ":-(((", "", "" }, .len = 1 } }, .{ ":-)", SpecialCase{ .tokens = .{ ":-)", "", "" }, .len = 1 } }, .{ ":-))", SpecialCase{ .tokens = .{ ":-))", "", "" }, .len = 1 } }, .{ ":-)))", SpecialCase{ .tokens = .{ ":-)))", "", "" }, .len = 1 } }, .{ ":-*", SpecialCase{ .tokens = .{ ":-*", "", "" }, .len = 1 } }, .{ ":-/", SpecialCase{ .tokens = .{ ":-/", "", "" }, .len = 1 } }, .{ ":-0", SpecialCase{ .tokens = .{ ":-0", "", "" }, .len = 1 } }, .{ ":-3", SpecialCase{ .tokens = .{ ":-3", "", "" }, .len = 1 } }, .{ ":->", SpecialCase{ .tokens = .{ ":->", "", "" }, .len = 1 } }, .{ ":-D", SpecialCase{ .tokens = .{ ":-D", "", "" }, .len = 1 } }, .{ ":-O", SpecialCase{ .tokens = .{ ":-O", "", "" }, .len = 1 } }, .{ ":-P", SpecialCase{ .tokens = .{ ":-P", "", "" }, .len = 1 } }, .{ ":-X", SpecialCase{ .tokens = .{ ":-X", "", "" }, .len = 1 } }, .{ ":-]", SpecialCase{ .tokens = .{ ":-]", "", "" }, .len = 1 } }, .{ ":-o", SpecialCase{ .tokens = .{ ":-o", "", "" }, .len = 1 } }, .{ ":-p", SpecialCase{ .tokens = .{ ":-p", "", "" }, .len = 1 } }, .{ ":-x", SpecialCase{ .tokens = .{ ":-x", "", "" }, .len = 1 } }, .{ ":-|", SpecialCase{ .tokens = .{ ":-|", "", "" }, .len = 1 } }, .{ ":-}", SpecialCase{ .tokens = .{ ":-}", "", "" }, .len = 1 } }, .{ ":/", SpecialCase{ .tokens = .{ ":/", "", "" }, .len = 1 } }, .{ ":0", SpecialCase{ .tokens = .{ ":0", "", "" }, .len = 1 } }, .{ ":1", SpecialCase{ .tokens = .{ ":1", "", "" }, .len = 1 } }, .{ ":3", SpecialCase{ .tokens = .{ ":3", "", "" }, .len = 1 } }, .{ ":>", SpecialCase{ .tokens = .{ ":>", "", "" }, .len = 1 } }, .{ ":D", SpecialCase{ .tokens = .{ ":D", "", "" }, .len = 1 } }, .{ ":O", SpecialCase{ .tokens = .{ ":O", "", "" }, .len = 1 } }, .{ ":P", SpecialCase{ .tokens = .{ ":P", "", "" }, .len = 1 } }, .{ ":X", SpecialCase{ .tokens = .{ ":X", "", "" }, .len = 1 } }, .{ ":]", SpecialCase{ .tokens = .{ ":]", "", "" }, .len = 1 } }, .{ ":o", SpecialCase{ .tokens = .{ ":o", "", "" }, .len = 1 } }, .{ ":o)", SpecialCase{ .tokens = .{ ":o)", "", "" }, .len = 1 } }, .{ ":p", SpecialCase{ .tokens = .{ ":p", "", "" }, .len = 1 } }, .{ ":x", SpecialCase{ .tokens = .{ ":x", "", "" }, .len = 1 } }, .{ ":|", SpecialCase{ .tokens = .{ ":|", "", "" }, .len = 1 } }, .{ ":}", SpecialCase{ .tokens = .{ ":}", "", "" }, .len = 1 } }, .{ ":\xe2\x80\x99(", SpecialCase{ .tokens = .{ ":\xe2\x80\x99(", "", "" }, .len = 1 } }, .{ ":\xe2\x80\x99)", SpecialCase{ .tokens = .{ ":\xe2\x80\x99)", "", "" }, .len = 1 } }, .{ ":\xe2\x80\x99-(", SpecialCase{ .tokens = .{ ":\xe2\x80\x99-(", "", "" }, .len = 1 } }, .{ ":\xe2\x80\x99-)", SpecialCase{ .tokens = .{ ":\xe2\x80\x99-)", "", "" }, .len = 1 } }, .{ ";)", SpecialCase{ .tokens = .{ ";)", "", "" }, .len = 1 } }, .{ ";-)", SpecialCase{ .tokens = .{ ";-)", "", "" }, .len = 1 } }, .{ ";-D", SpecialCase{ .tokens = .{ ";-D", "", "" }, .len = 1 } }, .{ ";D", SpecialCase{ .tokens = .{ ";D", "", "" }, .len = 1 } }, .{ ";_;", SpecialCase{ .tokens = .{ ";_;", "", "" }, .len = 1 } }, .{ "<.<", SpecialCase{ .tokens = .{ "<.<", "", "" }, .len = 1 } }, .{ "", SpecialCase{ .tokens = .{ "", "", "" }, .len = 1 } }, .{ "=(", SpecialCase{ .tokens = .{ "=(", "", "" }, .len = 1 } }, .{ "=)", SpecialCase{ .tokens = .{ "=)", "", "" }, .len = 1 } }, .{ "=/", SpecialCase{ .tokens = .{ "=/", "", "" }, .len = 1 } }, .{ "=3", SpecialCase{ .tokens = .{ "=3", "", "" }, .len = 1 } }, .{ "=D", SpecialCase{ .tokens = .{ "=D", "", "" }, .len = 1 } }, .{ "=[", SpecialCase{ .tokens = .{ "=[", "", "" }, .len = 1 } }, .{ "=]", SpecialCase{ .tokens = .{ "=]", "", "" }, .len = 1 } }, .{ "=|", SpecialCase{ .tokens = .{ "=|", "", "" }, .len = 1 } }, .{ ">.<", SpecialCase{ .tokens = .{ ">.<", "", "" }, .len = 1 } }, .{ ">.>", SpecialCase{ .tokens = .{ ">.>", "", "" }, .len = 1 } }, .{ ">:(", SpecialCase{ .tokens = .{ ">:(", "", "" }, .len = 1 } }, .{ ">:o", SpecialCase{ .tokens = .{ ">:o", "", "" }, .len = 1 } }, .{ "><(((*>", SpecialCase{ .tokens = .{ "><(((*>", "", "" }, .len = 1 } }, .{ "@_@", SpecialCase{ .tokens = .{ "@_@", "", "" }, .len = 1 } }, .{ "Adm.", SpecialCase{ .tokens = .{ "Adm.", "", "" }, .len = 1 } }, .{ "Ain't", SpecialCase{ .tokens = .{ "Ai", "n't", "" }, .len = 2 } }, .{ "Aint", SpecialCase{ .tokens = .{ "Ai", "nt", "" }, .len = 2 } }, .{ "Ain\xe2\x80\x99t", SpecialCase{ .tokens = .{ "Ai", "n\xe2\x80\x99t", "" }, .len = 2 } }, .{ "Ak.", SpecialCase{ .tokens = .{ "Ak.", "", "" }, .len = 1 } }, .{ "Ala.", SpecialCase{ .tokens = .{ "Ala.", "", "" }, .len = 1 } }, .{ "Apr.", SpecialCase{ .tokens = .{ "Apr.", "", "" }, .len = 1 } }, .{ "Aren't", SpecialCase{ .tokens = .{ "Are", "n't", "" }, .len = 2 } }, .{ "Arent", SpecialCase{ .tokens = .{ "Are", "nt", "" }, .len = 2 } }, .{ "Aren\xe2\x80\x99t", SpecialCase{ .tokens = .{ "Are", "n\xe2\x80\x99t", "" }, .len = 2 } }, .{ "Ariz.", SpecialCase{ .tokens = .{ "Ariz.", "", "" }, .len = 1 } }, .{ "Ark.", SpecialCase{ .tokens = .{ "Ark.", "", "" }, .len = 1 } }, .{ "Aug.", SpecialCase{ .tokens = .{ "Aug.", "", "" }, .len = 1 } }, .{ "Bros.", SpecialCase{ .tokens = .{ "Bros.", "", "" }, .len = 1 } }, .{ "C'mon", SpecialCase{ .tokens = .{ "C'm", "on", "" }, .len = 2 } }, .{ "C++", SpecialCase{ .tokens = .{ "C++", "", "" }, .len = 1 } }, .{ "Calif.", SpecialCase{ .tokens = .{ "Calif.", "", "" }, .len = 1 } }, .{ "Can't", SpecialCase{ .tokens = .{ "Ca", "n't", "" }, .len = 2 } }, .{ "Can't've", SpecialCase{ .tokens = .{ "Ca", "n't", "'ve" }, .len = 3 } }, .{ "Cannot", SpecialCase{ .tokens = .{ "Can", "not", "" }, .len = 2 } }, .{ "Cant", SpecialCase{ .tokens = .{ "Ca", "nt", "" }, .len = 2 } }, .{ "Cantve", SpecialCase{ .tokens = .{ "Ca", "nt", "ve" }, .len = 3 } }, .{ "Can\xe2\x80\x99t", SpecialCase{ .tokens = .{ "Ca", "n\xe2\x80\x99t", "" }, .len = 2 } }, .{ "Can\xe2\x80\x99t\xe2\x80\x99ve", SpecialCase{ .tokens = .{ "Ca", "n\xe2\x80\x99t", "\xe2\x80\x99ve" }, .len = 3 } }, .{ "Co.", SpecialCase{ .tokens = .{ "Co.", "", "" }, .len = 1 } }, .{ "Colo.", SpecialCase{ .tokens = .{ "Colo.", "", "" }, .len = 1 } }, .{ "Conn.", SpecialCase{ .tokens = .{ "Conn.", "", "" }, .len = 1 } }, .{ "Corp.", SpecialCase{ .tokens = .{ "Corp.", "", "" }, .len = 1 } }, .{ "Could've", SpecialCase{ .tokens = .{ "Could", "'ve", "" }, .len = 2 } }, .{ "Couldn't", SpecialCase{ .tokens = .{ "Could", "n't", "" }, .len = 2 } }, .{ "Couldn't've", SpecialCase{ .tokens = .{ "Could", "n't", "'ve" }, .len = 3 } }, .{ "Couldnt", SpecialCase{ .tokens = .{ "Could", "nt", "" }, .len = 2 } }, .{ "Couldntve", SpecialCase{ .tokens = .{ "Could", "nt", "ve" }, .len = 3 } }, .{ "Couldn\xe2\x80\x99t", SpecialCase{ .tokens = .{ "Could", "n\xe2\x80\x99t", "" }, .len = 2 } }, .{ "Couldn\xe2\x80\x99t\xe2\x80\x99ve", SpecialCase{ .tokens = .{ "Could", "n\xe2\x80\x99t", "\xe2\x80\x99ve" }, .len = 3 } }, .{ "Couldve", SpecialCase{ .tokens = .{ "Could", "ve", "" }, .len = 2 } }, .{ "Could\xe2\x80\x99ve", SpecialCase{ .tokens = .{ "Could", "\xe2\x80\x99ve", "" }, .len = 2 } }, .{ "C\xe2\x80\x99mon", SpecialCase{ .tokens = .{ "C\xe2\x80\x99m", "on", "" }, .len = 2 } }, .{ "D.C.", SpecialCase{ .tokens = .{ "D.C.", "", "" }, .len = 1 } }, .{ "Daren't", SpecialCase{ .tokens = .{ "Dare", "n't", "" }, .len = 2 } }, .{ "Darent", SpecialCase{ .tokens = .{ "Dare", "nt", "" }, .len = 2 } }, .{ "Daren\xe2\x80\x99t", SpecialCase{ .tokens = .{ "Dare", "n\xe2\x80\x99t", "" }, .len = 2 } }, .{ "Dec.", SpecialCase{ .tokens = .{ "Dec.", "", "" }, .len = 1 } }, .{ "Del.", SpecialCase{ .tokens = .{ "Del.", "", "" }, .len = 1 } }, .{ "Didn't", SpecialCase{ .tokens = .{ "Did", "n't", "" }, .len = 2 } }, .{ "Didn't've", SpecialCase{ .tokens = .{ "Did", "n't", "'ve" }, .len = 3 } }, .{ "Didnt", SpecialCase{ .tokens = .{ "Did", "nt", "" }, .len = 2 } }, .{ "Didntve", SpecialCase{ .tokens = .{ "Did", "nt", "ve" }, .len = 3 } }, .{ "Didn\xe2\x80\x99t", SpecialCase{ .tokens = .{ "Did", "n\xe2\x80\x99t", "" }, .len = 2 } }, .{ "Didn\xe2\x80\x99t\xe2\x80\x99ve", SpecialCase{ .tokens = .{ "Did", "n\xe2\x80\x99t", "\xe2\x80\x99ve" }, .len = 3 } }, .{ "Doesn't", SpecialCase{ .tokens = .{ "Does", "n't", "" }, .len = 2 } }, .{ "Doesn't've", SpecialCase{ .tokens = .{ "Does", "n't", "'ve" }, .len = 3 } }, .{ "Doesnt", SpecialCase{ .tokens = .{ "Does", "nt", "" }, .len = 2 } }, .{ "Doesntve", SpecialCase{ .tokens = .{ "Does", "nt", "ve" }, .len = 3 } }, .{ "Doesn\xe2\x80\x99t", SpecialCase{ .tokens = .{ "Does", "n\xe2\x80\x99t", "" }, .len = 2 } }, .{ "Doesn\xe2\x80\x99t\xe2\x80\x99ve", SpecialCase{ .tokens = .{ "Does", "n\xe2\x80\x99t", "\xe2\x80\x99ve" }, .len = 3 } }, .{ "Doin", SpecialCase{ .tokens = .{ "Doin", "", "" }, .len = 1 } }, .{ "Doin'", SpecialCase{ .tokens = .{ "Doin'", "", "" }, .len = 1 } }, .{ "Doin\xe2\x80\x99", SpecialCase{ .tokens = .{ "Doin\xe2\x80\x99", "", "" }, .len = 1 } }, .{ "Don't", SpecialCase{ .tokens = .{ "Do", "n't", "" }, .len = 2 } }, .{ "Don't've", SpecialCase{ .tokens = .{ "Do", "n't", "'ve" }, .len = 3 } }, .{ "Dont", SpecialCase{ .tokens = .{ "Do", "nt", "" }, .len = 2 } }, .{ "Dontve", SpecialCase{ .tokens = .{ "Do", "nt", "ve" }, .len = 3 } }, .{ "Don\xe2\x80\x99t", SpecialCase{ .tokens = .{ "Do", "n\xe2\x80\x99t", "" }, .len = 2 } }, .{ "Don\xe2\x80\x99t\xe2\x80\x99ve", SpecialCase{ .tokens = .{ "Do", "n\xe2\x80\x99t", "\xe2\x80\x99ve" }, .len = 3 } }, .{ "Dr.", SpecialCase{ .tokens = .{ "Dr.", "", "" }, .len = 1 } }, .{ "E.G.", SpecialCase{ .tokens = .{ "E.G.", "", "" }, .len = 1 } }, .{ "E.g.", SpecialCase{ .tokens = .{ "E.g.", "", "" }, .len = 1 } }, .{ "Feb.", SpecialCase{ .tokens = .{ "Feb.", "", "" }, .len = 1 } }, .{ "Fla.", SpecialCase{ .tokens = .{ "Fla.", "", "" }, .len = 1 } }, .{ "Ga.", SpecialCase{ .tokens = .{ "Ga.", "", "" }, .len = 1 } }, .{ "Gen.", SpecialCase{ .tokens = .{ "Gen.", "", "" }, .len = 1 } }, .{ "Goin", SpecialCase{ .tokens = .{ "Goin", "", "" }, .len = 1 } }, .{ "Goin'", SpecialCase{ .tokens = .{ "Goin'", "", "" }, .len = 1 } }, .{ "Goin\xe2\x80\x99", SpecialCase{ .tokens = .{ "Goin\xe2\x80\x99", "", "" }, .len = 1 } }, .{ "Gonna", SpecialCase{ .tokens = .{ "Gon", "na", "" }, .len = 2 } }, .{ "Gotta", SpecialCase{ .tokens = .{ "Got", "ta", "" }, .len = 2 } }, .{ "Gov.", SpecialCase{ .tokens = .{ "Gov.", "", "" }, .len = 1 } }, .{ "Hadn't", SpecialCase{ .tokens = .{ "Had", "n't", "" }, .len = 2 } }, .{ "Hadn't've", SpecialCase{ .tokens = .{ "Had", "n't", "'ve" }, .len = 3 } }, .{ "Hadnt", SpecialCase{ .tokens = .{ "Had", "nt", "" }, .len = 2 } }, .{ "Hadntve", SpecialCase{ .tokens = .{ "Had", "nt", "ve" }, .len = 3 } }, .{ "Hadn\xe2\x80\x99t", SpecialCase{ .tokens = .{ "Had", "n\xe2\x80\x99t", "" }, .len = 2 } }, .{ "Hadn\xe2\x80\x99t\xe2\x80\x99ve", SpecialCase{ .tokens = .{ "Had", "n\xe2\x80\x99t", "\xe2\x80\x99ve" }, .len = 3 } }, .{ "Hasn't", SpecialCase{ .tokens = .{ "Has", "n't", "" }, .len = 2 } }, .{ "Hasnt", SpecialCase{ .tokens = .{ "Has", "nt", "" }, .len = 2 } }, .{ "Hasn\xe2\x80\x99t", SpecialCase{ .tokens = .{ "Has", "n\xe2\x80\x99t", "" }, .len = 2 } }, .{ "Haven't", SpecialCase{ .tokens = .{ "Have", "n't", "" }, .len = 2 } }, .{ "Havent", SpecialCase{ .tokens = .{ "Have", "nt", "" }, .len = 2 } }, .{ "Haven\xe2\x80\x99t", SpecialCase{ .tokens = .{ "Have", "n\xe2\x80\x99t", "" }, .len = 2 } }, .{ "Havin", SpecialCase{ .tokens = .{ "Havin", "", "" }, .len = 1 } }, .{ "Havin'", SpecialCase{ .tokens = .{ "Havin'", "", "" }, .len = 1 } }, .{ "Havin\xe2\x80\x99", SpecialCase{ .tokens = .{ "Havin\xe2\x80\x99", "", "" }, .len = 1 } }, .{ "He'd", SpecialCase{ .tokens = .{ "He", "'d", "" }, .len = 2 } }, .{ "He'd've", SpecialCase{ .tokens = .{ "He", "'d", "'ve" }, .len = 3 } }, .{ "He'll", SpecialCase{ .tokens = .{ "He", "'ll", "" }, .len = 2 } }, .{ "He'll've", SpecialCase{ .tokens = .{ "He", "'ll", "'ve" }, .len = 3 } }, .{ "He's", SpecialCase{ .tokens = .{ "He", "'s", "" }, .len = 2 } }, .{ "Hed", SpecialCase{ .tokens = .{ "He", "d", "" }, .len = 2 } }, .{ "Hedve", SpecialCase{ .tokens = .{ "He", "d", "ve" }, .len = 3 } }, .{ "Hellve", SpecialCase{ .tokens = .{ "He", "ll", "ve" }, .len = 3 } }, .{ "Hes", SpecialCase{ .tokens = .{ "He", "s", "" }, .len = 2 } }, .{ "He\xe2\x80\x99d", SpecialCase{ .tokens = .{ "He", "\xe2\x80\x99d", "" }, .len = 2 } }, .{ "He\xe2\x80\x99d\xe2\x80\x99ve", SpecialCase{ .tokens = .{ "He", "\xe2\x80\x99d", "\xe2\x80\x99ve" }, .len = 3 } }, .{ "He\xe2\x80\x99ll", SpecialCase{ .tokens = .{ "He", "\xe2\x80\x99ll", "" }, .len = 2 } }, .{ "He\xe2\x80\x99ll\xe2\x80\x99ve", SpecialCase{ .tokens = .{ "He", "\xe2\x80\x99ll", "\xe2\x80\x99ve" }, .len = 3 } }, .{ "He\xe2\x80\x99s", SpecialCase{ .tokens = .{ "He", "\xe2\x80\x99s", "" }, .len = 2 } }, .{ "How'd", SpecialCase{ .tokens = .{ "How", "'d", "" }, .len = 2 } }, .{ "How'd've", SpecialCase{ .tokens = .{ "How", "'d", "'ve" }, .len = 3 } }, .{ "How'd'y", SpecialCase{ .tokens = .{ "How", "'d", "'y" }, .len = 3 } }, .{ "How'll", SpecialCase{ .tokens = .{ "How", "'ll", "" }, .len = 2 } }, .{ "How'll've", SpecialCase{ .tokens = .{ "How", "'ll", "'ve" }, .len = 3 } }, .{ "How're", SpecialCase{ .tokens = .{ "How", "'re", "" }, .len = 2 } }, .{ "How's", SpecialCase{ .tokens = .{ "How", "'s", "" }, .len = 2 } }, .{ "How've", SpecialCase{ .tokens = .{ "How", "'ve", "" }, .len = 2 } }, .{ "Howd", SpecialCase{ .tokens = .{ "How", "d", "" }, .len = 2 } }, .{ "Howdve", SpecialCase{ .tokens = .{ "How", "d", "ve" }, .len = 3 } }, .{ "Howll", SpecialCase{ .tokens = .{ "How", "ll", "" }, .len = 2 } }, .{ "Howllve", SpecialCase{ .tokens = .{ "How", "ll", "ve" }, .len = 3 } }, .{ "Howre", SpecialCase{ .tokens = .{ "How", "re", "" }, .len = 2 } }, .{ "Hows", SpecialCase{ .tokens = .{ "How", "s", "" }, .len = 2 } }, .{ "Howve", SpecialCase{ .tokens = .{ "How", "ve", "" }, .len = 2 } }, .{ "How\xe2\x80\x99d", SpecialCase{ .tokens = .{ "How", "\xe2\x80\x99d", "" }, .len = 2 } }, .{ "How\xe2\x80\x99d\xe2\x80\x99ve", SpecialCase{ .tokens = .{ "How", "\xe2\x80\x99d", "\xe2\x80\x99ve" }, .len = 3 } }, .{ "How\xe2\x80\x99d\xe2\x80\x99y", SpecialCase{ .tokens = .{ "How", "\xe2\x80\x99d", "\xe2\x80\x99y" }, .len = 3 } }, .{ "How\xe2\x80\x99ll", SpecialCase{ .tokens = .{ "How", "\xe2\x80\x99ll", "" }, .len = 2 } }, .{ "How\xe2\x80\x99ll\xe2\x80\x99ve", SpecialCase{ .tokens = .{ "How", "\xe2\x80\x99ll", "\xe2\x80\x99ve" }, .len = 3 } }, .{ "How\xe2\x80\x99re", SpecialCase{ .tokens = .{ "How", "\xe2\x80\x99re", "" }, .len = 2 } }, .{ "How\xe2\x80\x99s", SpecialCase{ .tokens = .{ "How", "\xe2\x80\x99s", "" }, .len = 2 } }, .{ "How\xe2\x80\x99ve", SpecialCase{ .tokens = .{ "How", "\xe2\x80\x99ve", "" }, .len = 2 } }, .{ "I'd", SpecialCase{ .tokens = .{ "I", "'d", "" }, .len = 2 } }, .{ "I'd've", SpecialCase{ .tokens = .{ "I", "'d", "'ve" }, .len = 3 } }, .{ "I'll", SpecialCase{ .tokens = .{ "I", "'ll", "" }, .len = 2 } }, .{ "I'll've", SpecialCase{ .tokens = .{ "I", "'ll", "'ve" }, .len = 3 } }, .{ "I'm", SpecialCase{ .tokens = .{ "I", "'m", "" }, .len = 2 } }, .{ "I'ma", SpecialCase{ .tokens = .{ "I", "'m", "a" }, .len = 3 } }, .{ "I've", SpecialCase{ .tokens = .{ "I", "'ve", "" }, .len = 2 } }, .{ "I.E.", SpecialCase{ .tokens = .{ "I.E.", "", "" }, .len = 1 } }, .{ "I.e.", SpecialCase{ .tokens = .{ "I.e.", "", "" }, .len = 1 } }, .{ "Ia.", SpecialCase{ .tokens = .{ "Ia.", "", "" }, .len = 1 } }, .{ "Id", SpecialCase{ .tokens = .{ "I", "d", "" }, .len = 2 } }, .{ "Id.", SpecialCase{ .tokens = .{ "Id.", "", "" }, .len = 1 } }, .{ "Idve", SpecialCase{ .tokens = .{ "I", "d", "ve" }, .len = 3 } }, .{ "Ill.", SpecialCase{ .tokens = .{ "Ill.", "", "" }, .len = 1 } }, .{ "Illve", SpecialCase{ .tokens = .{ "I", "ll", "ve" }, .len = 3 } }, .{ "Im", SpecialCase{ .tokens = .{ "I", "m", "" }, .len = 2 } }, .{ "Ima", SpecialCase{ .tokens = .{ "I", "m", "a" }, .len = 3 } }, .{ "Inc.", SpecialCase{ .tokens = .{ "Inc.", "", "" }, .len = 1 } }, .{ "Ind.", SpecialCase{ .tokens = .{ "Ind.", "", "" }, .len = 1 } }, .{ "Isn't", SpecialCase{ .tokens = .{ "Is", "n't", "" }, .len = 2 } }, .{ "Isnt", SpecialCase{ .tokens = .{ "Is", "nt", "" }, .len = 2 } }, .{ "Isn\xe2\x80\x99t", SpecialCase{ .tokens = .{ "Is", "n\xe2\x80\x99t", "" }, .len = 2 } }, .{ "It'd", SpecialCase{ .tokens = .{ "It", "'d", "" }, .len = 2 } }, .{ "It'd've", SpecialCase{ .tokens = .{ "It", "'d", "'ve" }, .len = 3 } }, .{ "It'll", SpecialCase{ .tokens = .{ "It", "'ll", "" }, .len = 2 } }, .{ "It'll've", SpecialCase{ .tokens = .{ "It", "'ll", "'ve" }, .len = 3 } }, .{ "It's", SpecialCase{ .tokens = .{ "It", "'s", "" }, .len = 2 } }, .{ "Itd", SpecialCase{ .tokens = .{ "It", "d", "" }, .len = 2 } }, .{ "Itdve", SpecialCase{ .tokens = .{ "It", "d", "ve" }, .len = 3 } }, .{ "Itll", SpecialCase{ .tokens = .{ "It", "ll", "" }, .len = 2 } }, .{ "Itllve", SpecialCase{ .tokens = .{ "It", "ll", "ve" }, .len = 3 } }, .{ "It\xe2\x80\x99d", SpecialCase{ .tokens = .{ "It", "\xe2\x80\x99d", "" }, .len = 2 } }, .{ "It\xe2\x80\x99d\xe2\x80\x99ve", SpecialCase{ .tokens = .{ "It", "\xe2\x80\x99d", "\xe2\x80\x99ve" }, .len = 3 } }, .{ "It\xe2\x80\x99ll", SpecialCase{ .tokens = .{ "It", "\xe2\x80\x99ll", "" }, .len = 2 } }, .{ "It\xe2\x80\x99ll\xe2\x80\x99ve", SpecialCase{ .tokens = .{ "It", "\xe2\x80\x99ll", "\xe2\x80\x99ve" }, .len = 3 } }, .{ "It\xe2\x80\x99s", SpecialCase{ .tokens = .{ "It", "\xe2\x80\x99s", "" }, .len = 2 } }, .{ "Ive", SpecialCase{ .tokens = .{ "I", "ve", "" }, .len = 2 } }, .{ "I\xe2\x80\x99d", SpecialCase{ .tokens = .{ "I", "\xe2\x80\x99d", "" }, .len = 2 } }, .{ "I\xe2\x80\x99d\xe2\x80\x99ve", SpecialCase{ .tokens = .{ "I", "\xe2\x80\x99d", "\xe2\x80\x99ve" }, .len = 3 } }, .{ "I\xe2\x80\x99ll", SpecialCase{ .tokens = .{ "I", "\xe2\x80\x99ll", "" }, .len = 2 } }, .{ "I\xe2\x80\x99ll\xe2\x80\x99ve", SpecialCase{ .tokens = .{ "I", "\xe2\x80\x99ll", "\xe2\x80\x99ve" }, .len = 3 } }, .{ "I\xe2\x80\x99m", SpecialCase{ .tokens = .{ "I", "\xe2\x80\x99m", "" }, .len = 2 } }, .{ "I\xe2\x80\x99ma", SpecialCase{ .tokens = .{ "I", "\xe2\x80\x99m", "a" }, .len = 3 } }, .{ "I\xe2\x80\x99ve", SpecialCase{ .tokens = .{ "I", "\xe2\x80\x99ve", "" }, .len = 2 } }, .{ "Jan.", SpecialCase{ .tokens = .{ "Jan.", "", "" }, .len = 1 } }, .{ "Jr.", SpecialCase{ .tokens = .{ "Jr.", "", "" }, .len = 1 } }, .{ "Jul.", SpecialCase{ .tokens = .{ "Jul.", "", "" }, .len = 1 } }, .{ "Jun.", SpecialCase{ .tokens = .{ "Jun.", "", "" }, .len = 1 } }, .{ "Kan.", SpecialCase{ .tokens = .{ "Kan.", "", "" }, .len = 1 } }, .{ "Kans.", SpecialCase{ .tokens = .{ "Kans.", "", "" }, .len = 1 } }, .{ "Ky.", SpecialCase{ .tokens = .{ "Ky.", "", "" }, .len = 1 } }, .{ "La.", SpecialCase{ .tokens = .{ "La.", "", "" }, .len = 1 } }, .{ "Let's", SpecialCase{ .tokens = .{ "Let", "'s", "" }, .len = 2 } }, .{ "Let\xe2\x80\x99s", SpecialCase{ .tokens = .{ "Let", "\xe2\x80\x99s", "" }, .len = 2 } }, .{ "Lovin", SpecialCase{ .tokens = .{ "Lovin", "", "" }, .len = 1 } }, .{ "Lovin'", SpecialCase{ .tokens = .{ "Lovin'", "", "" }, .len = 1 } }, .{ "Lovin\xe2\x80\x99", SpecialCase{ .tokens = .{ "Lovin\xe2\x80\x99", "", "" }, .len = 1 } }, .{ "Ltd.", SpecialCase{ .tokens = .{ "Ltd.", "", "" }, .len = 1 } }, .{ "Ma'am", SpecialCase{ .tokens = .{ "Ma'am", "", "" }, .len = 1 } }, .{ "Mar.", SpecialCase{ .tokens = .{ "Mar.", "", "" }, .len = 1 } }, .{ "Mass.", SpecialCase{ .tokens = .{ "Mass.", "", "" }, .len = 1 } }, .{ "Mayn't", SpecialCase{ .tokens = .{ "May", "n't", "" }, .len = 2 } }, .{ "Mayn't've", SpecialCase{ .tokens = .{ "May", "n't", "'ve" }, .len = 3 } }, .{ "Maynt", SpecialCase{ .tokens = .{ "May", "nt", "" }, .len = 2 } }, .{ "Mayntve", SpecialCase{ .tokens = .{ "May", "nt", "ve" }, .len = 3 } }, .{ "Mayn\xe2\x80\x99t", SpecialCase{ .tokens = .{ "May", "n\xe2\x80\x99t", "" }, .len = 2 } }, .{ "Mayn\xe2\x80\x99t\xe2\x80\x99ve", SpecialCase{ .tokens = .{ "May", "n\xe2\x80\x99t", "\xe2\x80\x99ve" }, .len = 3 } }, .{ "Ma\xe2\x80\x99am", SpecialCase{ .tokens = .{ "Ma\xe2\x80\x99am", "", "" }, .len = 1 } }, .{ "Md.", SpecialCase{ .tokens = .{ "Md.", "", "" }, .len = 1 } }, .{ "Messrs.", SpecialCase{ .tokens = .{ "Messrs.", "", "" }, .len = 1 } }, .{ "Mich.", SpecialCase{ .tokens = .{ "Mich.", "", "" }, .len = 1 } }, .{ "Might've", SpecialCase{ .tokens = .{ "Might", "'ve", "" }, .len = 2 } }, .{ "Mightn't", SpecialCase{ .tokens = .{ "Might", "n't", "" }, .len = 2 } }, .{ "Mightn't've", SpecialCase{ .tokens = .{ "Might", "n't", "'ve" }, .len = 3 } }, .{ "Mightnt", SpecialCase{ .tokens = .{ "Might", "nt", "" }, .len = 2 } }, .{ "Mightntve", SpecialCase{ .tokens = .{ "Might", "nt", "ve" }, .len = 3 } }, .{ "Mightn\xe2\x80\x99t", SpecialCase{ .tokens = .{ "Might", "n\xe2\x80\x99t", "" }, .len = 2 } }, .{ "Mightn\xe2\x80\x99t\xe2\x80\x99ve", SpecialCase{ .tokens = .{ "Might", "n\xe2\x80\x99t", "\xe2\x80\x99ve" }, .len = 3 } }, .{ "Mightve", SpecialCase{ .tokens = .{ "Might", "ve", "" }, .len = 2 } }, .{ "Might\xe2\x80\x99ve", SpecialCase{ .tokens = .{ "Might", "\xe2\x80\x99ve", "" }, .len = 2 } }, .{ "Minn.", SpecialCase{ .tokens = .{ "Minn.", "", "" }, .len = 1 } }, .{ "Miss.", SpecialCase{ .tokens = .{ "Miss.", "", "" }, .len = 1 } }, .{ "Mo.", SpecialCase{ .tokens = .{ "Mo.", "", "" }, .len = 1 } }, .{ "Mont.", SpecialCase{ .tokens = .{ "Mont.", "", "" }, .len = 1 } }, .{ "Mr.", SpecialCase{ .tokens = .{ "Mr.", "", "" }, .len = 1 } }, .{ "Mrs.", SpecialCase{ .tokens = .{ "Mrs.", "", "" }, .len = 1 } }, .{ "Ms.", SpecialCase{ .tokens = .{ "Ms.", "", "" }, .len = 1 } }, .{ "Mt.", SpecialCase{ .tokens = .{ "Mt.", "", "" }, .len = 1 } }, .{ "Must've", SpecialCase{ .tokens = .{ "Must", "'ve", "" }, .len = 2 } }, .{ "Mustn't", SpecialCase{ .tokens = .{ "Must", "n't", "" }, .len = 2 } }, .{ "Mustn't've", SpecialCase{ .tokens = .{ "Must", "n't", "'ve" }, .len = 3 } }, .{ "Mustnt", SpecialCase{ .tokens = .{ "Must", "nt", "" }, .len = 2 } }, .{ "Mustntve", SpecialCase{ .tokens = .{ "Must", "nt", "ve" }, .len = 3 } }, .{ "Mustn\xe2\x80\x99t", SpecialCase{ .tokens = .{ "Must", "n\xe2\x80\x99t", "" }, .len = 2 } }, .{ "Mustn\xe2\x80\x99t\xe2\x80\x99ve", SpecialCase{ .tokens = .{ "Must", "n\xe2\x80\x99t", "\xe2\x80\x99ve" }, .len = 3 } }, .{ "Mustve", SpecialCase{ .tokens = .{ "Must", "ve", "" }, .len = 2 } }, .{ "Must\xe2\x80\x99ve", SpecialCase{ .tokens = .{ "Must", "\xe2\x80\x99ve", "" }, .len = 2 } }, .{ "N.C.", SpecialCase{ .tokens = .{ "N.C.", "", "" }, .len = 1 } }, .{ "N.D.", SpecialCase{ .tokens = .{ "N.D.", "", "" }, .len = 1 } }, .{ "N.H.", SpecialCase{ .tokens = .{ "N.H.", "", "" }, .len = 1 } }, .{ "N.J.", SpecialCase{ .tokens = .{ "N.J.", "", "" }, .len = 1 } }, .{ "N.M.", SpecialCase{ .tokens = .{ "N.M.", "", "" }, .len = 1 } }, .{ "N.Y.", SpecialCase{ .tokens = .{ "N.Y.", "", "" }, .len = 1 } }, .{ "Neb.", SpecialCase{ .tokens = .{ "Neb.", "", "" }, .len = 1 } }, .{ "Nebr.", SpecialCase{ .tokens = .{ "Nebr.", "", "" }, .len = 1 } }, .{ "Needn't", SpecialCase{ .tokens = .{ "Need", "n't", "" }, .len = 2 } }, .{ "Needn't've", SpecialCase{ .tokens = .{ "Need", "n't", "'ve" }, .len = 3 } }, .{ "Neednt", SpecialCase{ .tokens = .{ "Need", "nt", "" }, .len = 2 } }, .{ "Needntve", SpecialCase{ .tokens = .{ "Need", "nt", "ve" }, .len = 3 } }, .{ "Needn\xe2\x80\x99t", SpecialCase{ .tokens = .{ "Need", "n\xe2\x80\x99t", "" }, .len = 2 } }, .{ "Needn\xe2\x80\x99t\xe2\x80\x99ve", SpecialCase{ .tokens = .{ "Need", "n\xe2\x80\x99t", "\xe2\x80\x99ve" }, .len = 3 } }, .{ "Nev.", SpecialCase{ .tokens = .{ "Nev.", "", "" }, .len = 1 } }, .{ "Not've", SpecialCase{ .tokens = .{ "Not", "'ve", "" }, .len = 2 } }, .{ "Nothin", SpecialCase{ .tokens = .{ "Nothin", "", "" }, .len = 1 } }, .{ "Nothin'", SpecialCase{ .tokens = .{ "Nothin'", "", "" }, .len = 1 } }, .{ "Nothin\xe2\x80\x99", SpecialCase{ .tokens = .{ "Nothin\xe2\x80\x99", "", "" }, .len = 1 } }, .{ "Notve", SpecialCase{ .tokens = .{ "Not", "ve", "" }, .len = 2 } }, .{ "Not\xe2\x80\x99ve", SpecialCase{ .tokens = .{ "Not", "\xe2\x80\x99ve", "" }, .len = 2 } }, .{ "Nov.", SpecialCase{ .tokens = .{ "Nov.", "", "" }, .len = 1 } }, .{ "Nuthin", SpecialCase{ .tokens = .{ "Nuthin", "", "" }, .len = 1 } }, .{ "Nuthin'", SpecialCase{ .tokens = .{ "Nuthin'", "", "" }, .len = 1 } }, .{ "Nuthin\xe2\x80\x99", SpecialCase{ .tokens = .{ "Nuthin\xe2\x80\x99", "", "" }, .len = 1 } }, .{ "O'clock", SpecialCase{ .tokens = .{ "O'clock", "", "" }, .len = 1 } }, .{ "O.O", SpecialCase{ .tokens = .{ "O.O", "", "" }, .len = 1 } }, .{ "O.o", SpecialCase{ .tokens = .{ "O.o", "", "" }, .len = 1 } }, .{ "O_O", SpecialCase{ .tokens = .{ "O_O", "", "" }, .len = 1 } }, .{ "O_o", SpecialCase{ .tokens = .{ "O_o", "", "" }, .len = 1 } }, .{ "Oct.", SpecialCase{ .tokens = .{ "Oct.", "", "" }, .len = 1 } }, .{ "Okla.", SpecialCase{ .tokens = .{ "Okla.", "", "" }, .len = 1 } }, .{ "Ol", SpecialCase{ .tokens = .{ "Ol", "", "" }, .len = 1 } }, .{ "Ol'", SpecialCase{ .tokens = .{ "Ol'", "", "" }, .len = 1 } }, .{ "Ol\xe2\x80\x99", SpecialCase{ .tokens = .{ "Ol\xe2\x80\x99", "", "" }, .len = 1 } }, .{ "Ore.", SpecialCase{ .tokens = .{ "Ore.", "", "" }, .len = 1 } }, .{ "Oughtn't", SpecialCase{ .tokens = .{ "Ought", "n't", "" }, .len = 2 } }, .{ "Oughtn't've", SpecialCase{ .tokens = .{ "Ought", "n't", "'ve" }, .len = 3 } }, .{ "Oughtnt", SpecialCase{ .tokens = .{ "Ought", "nt", "" }, .len = 2 } }, .{ "Oughtntve", SpecialCase{ .tokens = .{ "Ought", "nt", "ve" }, .len = 3 } }, .{ "Oughtn\xe2\x80\x99t", SpecialCase{ .tokens = .{ "Ought", "n\xe2\x80\x99t", "" }, .len = 2 } }, .{ "Oughtn\xe2\x80\x99t\xe2\x80\x99ve", SpecialCase{ .tokens = .{ "Ought", "n\xe2\x80\x99t", "\xe2\x80\x99ve" }, .len = 3 } }, .{ "O\xe2\x80\x99clock", SpecialCase{ .tokens = .{ "O\xe2\x80\x99clock", "", "" }, .len = 1 } }, .{ "Pa.", SpecialCase{ .tokens = .{ "Pa.", "", "" }, .len = 1 } }, .{ "Ph.D.", SpecialCase{ .tokens = .{ "Ph.D.", "", "" }, .len = 1 } }, .{ "Prof.", SpecialCase{ .tokens = .{ "Prof.", "", "" }, .len = 1 } }, .{ "Rep.", SpecialCase{ .tokens = .{ "Rep.", "", "" }, .len = 1 } }, .{ "Rev.", SpecialCase{ .tokens = .{ "Rev.", "", "" }, .len = 1 } }, .{ "S.C.", SpecialCase{ .tokens = .{ "S.C.", "", "" }, .len = 1 } }, .{ "Sen.", SpecialCase{ .tokens = .{ "Sen.", "", "" }, .len = 1 } }, .{ "Sep.", SpecialCase{ .tokens = .{ "Sep.", "", "" }, .len = 1 } }, .{ "Sept.", SpecialCase{ .tokens = .{ "Sept.", "", "" }, .len = 1 } }, .{ "Shan't", SpecialCase{ .tokens = .{ "Sha", "n't", "" }, .len = 2 } }, .{ "Shan't've", SpecialCase{ .tokens = .{ "Sha", "n't", "'ve" }, .len = 3 } }, .{ "Shant", SpecialCase{ .tokens = .{ "Sha", "nt", "" }, .len = 2 } }, .{ "Shantve", SpecialCase{ .tokens = .{ "Sha", "nt", "ve" }, .len = 3 } }, .{ "Shan\xe2\x80\x99t", SpecialCase{ .tokens = .{ "Sha", "n\xe2\x80\x99t", "" }, .len = 2 } }, .{ "Shan\xe2\x80\x99t\xe2\x80\x99ve", SpecialCase{ .tokens = .{ "Sha", "n\xe2\x80\x99t", "\xe2\x80\x99ve" }, .len = 3 } }, .{ "She'd", SpecialCase{ .tokens = .{ "She", "'d", "" }, .len = 2 } }, .{ "She'd've", SpecialCase{ .tokens = .{ "She", "'d", "'ve" }, .len = 3 } }, .{ "She'll", SpecialCase{ .tokens = .{ "She", "'ll", "" }, .len = 2 } }, .{ "She'll've", SpecialCase{ .tokens = .{ "She", "'ll", "'ve" }, .len = 3 } }, .{ "She's", SpecialCase{ .tokens = .{ "She", "'s", "" }, .len = 2 } }, .{ "Shedve", SpecialCase{ .tokens = .{ "She", "d", "ve" }, .len = 3 } }, .{ "Shellve", SpecialCase{ .tokens = .{ "She", "ll", "ve" }, .len = 3 } }, .{ "Shes", SpecialCase{ .tokens = .{ "She", "s", "" }, .len = 2 } }, .{ "She\xe2\x80\x99d", SpecialCase{ .tokens = .{ "She", "\xe2\x80\x99d", "" }, .len = 2 } }, .{ "She\xe2\x80\x99d\xe2\x80\x99ve", SpecialCase{ .tokens = .{ "She", "\xe2\x80\x99d", "\xe2\x80\x99ve" }, .len = 3 } }, .{ "She\xe2\x80\x99ll", SpecialCase{ .tokens = .{ "She", "\xe2\x80\x99ll", "" }, .len = 2 } }, .{ "She\xe2\x80\x99ll\xe2\x80\x99ve", SpecialCase{ .tokens = .{ "She", "\xe2\x80\x99ll", "\xe2\x80\x99ve" }, .len = 3 } }, .{ "She\xe2\x80\x99s", SpecialCase{ .tokens = .{ "She", "\xe2\x80\x99s", "" }, .len = 2 } }, .{ "Should've", SpecialCase{ .tokens = .{ "Should", "'ve", "" }, .len = 2 } }, .{ "Shouldn't", SpecialCase{ .tokens = .{ "Should", "n't", "" }, .len = 2 } }, .{ "Shouldn't've", SpecialCase{ .tokens = .{ "Should", "n't", "'ve" }, .len = 3 } }, .{ "Shouldnt", SpecialCase{ .tokens = .{ "Should", "nt", "" }, .len = 2 } }, .{ "Shouldntve", SpecialCase{ .tokens = .{ "Should", "nt", "ve" }, .len = 3 } }, .{ "Shouldn\xe2\x80\x99t", SpecialCase{ .tokens = .{ "Should", "n\xe2\x80\x99t", "" }, .len = 2 } }, .{ "Shouldn\xe2\x80\x99t\xe2\x80\x99ve", SpecialCase{ .tokens = .{ "Should", "n\xe2\x80\x99t", "\xe2\x80\x99ve" }, .len = 3 } }, .{ "Shouldve", SpecialCase{ .tokens = .{ "Should", "ve", "" }, .len = 2 } }, .{ "Should\xe2\x80\x99ve", SpecialCase{ .tokens = .{ "Should", "\xe2\x80\x99ve", "" }, .len = 2 } }, .{ "Somethin", SpecialCase{ .tokens = .{ "Somethin", "", "" }, .len = 1 } }, .{ "Somethin'", SpecialCase{ .tokens = .{ "Somethin'", "", "" }, .len = 1 } }, .{ "Somethin\xe2\x80\x99", SpecialCase{ .tokens = .{ "Somethin\xe2\x80\x99", "", "" }, .len = 1 } }, .{ "St.", SpecialCase{ .tokens = .{ "St.", "", "" }, .len = 1 } }, .{ "Tenn.", SpecialCase{ .tokens = .{ "Tenn.", "", "" }, .len = 1 } }, .{ "That'd", SpecialCase{ .tokens = .{ "That", "'d", "" }, .len = 2 } }, .{ "That'd've", SpecialCase{ .tokens = .{ "That", "'d", "'ve" }, .len = 3 } }, .{ "That'll", SpecialCase{ .tokens = .{ "That", "'ll", "" }, .len = 2 } }, .{ "That'll've", SpecialCase{ .tokens = .{ "That", "'ll", "'ve" }, .len = 3 } }, .{ "That's", SpecialCase{ .tokens = .{ "That", "'s", "" }, .len = 2 } }, .{ "Thatd", SpecialCase{ .tokens = .{ "That", "d", "" }, .len = 2 } }, .{ "Thatdve", SpecialCase{ .tokens = .{ "That", "d", "ve" }, .len = 3 } }, .{ "Thatll", SpecialCase{ .tokens = .{ "That", "ll", "" }, .len = 2 } }, .{ "Thatllve", SpecialCase{ .tokens = .{ "That", "ll", "ve" }, .len = 3 } }, .{ "Thats", SpecialCase{ .tokens = .{ "That", "s", "" }, .len = 2 } }, .{ "That\xe2\x80\x99d", SpecialCase{ .tokens = .{ "That", "\xe2\x80\x99d", "" }, .len = 2 } }, .{ "That\xe2\x80\x99d\xe2\x80\x99ve", SpecialCase{ .tokens = .{ "That", "\xe2\x80\x99d", "\xe2\x80\x99ve" }, .len = 3 } }, .{ "That\xe2\x80\x99ll", SpecialCase{ .tokens = .{ "That", "\xe2\x80\x99ll", "" }, .len = 2 } }, .{ "That\xe2\x80\x99ll\xe2\x80\x99ve", SpecialCase{ .tokens = .{ "That", "\xe2\x80\x99ll", "\xe2\x80\x99ve" }, .len = 3 } }, .{ "That\xe2\x80\x99s", SpecialCase{ .tokens = .{ "That", "\xe2\x80\x99s", "" }, .len = 2 } }, .{ "There'd", SpecialCase{ .tokens = .{ "There", "'d", "" }, .len = 2 } }, .{ "There'd've", SpecialCase{ .tokens = .{ "There", "'d", "'ve" }, .len = 3 } }, .{ "There'll", SpecialCase{ .tokens = .{ "There", "'ll", "" }, .len = 2 } }, .{ "There'll've", SpecialCase{ .tokens = .{ "There", "'ll", "'ve" }, .len = 3 } }, .{ "There're", SpecialCase{ .tokens = .{ "There", "'re", "" }, .len = 2 } }, .{ "There's", SpecialCase{ .tokens = .{ "There", "'s", "" }, .len = 2 } }, .{ "There've", SpecialCase{ .tokens = .{ "There", "'ve", "" }, .len = 2 } }, .{ "Thered", SpecialCase{ .tokens = .{ "There", "d", "" }, .len = 2 } }, .{ "Theredve", SpecialCase{ .tokens = .{ "There", "d", "ve" }, .len = 3 } }, .{ "Therell", SpecialCase{ .tokens = .{ "There", "ll", "" }, .len = 2 } }, .{ "Therellve", SpecialCase{ .tokens = .{ "There", "ll", "ve" }, .len = 3 } }, .{ "Therere", SpecialCase{ .tokens = .{ "There", "re", "" }, .len = 2 } }, .{ "Theres", SpecialCase{ .tokens = .{ "There", "s", "" }, .len = 2 } }, .{ "Thereve", SpecialCase{ .tokens = .{ "There", "ve", "" }, .len = 2 } }, .{ "There\xe2\x80\x99d", SpecialCase{ .tokens = .{ "There", "\xe2\x80\x99d", "" }, .len = 2 } }, .{ "There\xe2\x80\x99d\xe2\x80\x99ve", SpecialCase{ .tokens = .{ "There", "\xe2\x80\x99d", "\xe2\x80\x99ve" }, .len = 3 } }, .{ "There\xe2\x80\x99ll", SpecialCase{ .tokens = .{ "There", "\xe2\x80\x99ll", "" }, .len = 2 } }, .{ "There\xe2\x80\x99ll\xe2\x80\x99ve", SpecialCase{ .tokens = .{ "There", "\xe2\x80\x99ll", "\xe2\x80\x99ve" }, .len = 3 } }, .{ "There\xe2\x80\x99re", SpecialCase{ .tokens = .{ "There", "\xe2\x80\x99re", "" }, .len = 2 } }, .{ "There\xe2\x80\x99s", SpecialCase{ .tokens = .{ "There", "\xe2\x80\x99s", "" }, .len = 2 } }, .{ "There\xe2\x80\x99ve", SpecialCase{ .tokens = .{ "There", "\xe2\x80\x99ve", "" }, .len = 2 } }, .{ "These'd", SpecialCase{ .tokens = .{ "These", "'d", "" }, .len = 2 } }, .{ "These'd've", SpecialCase{ .tokens = .{ "These", "'d", "'ve" }, .len = 3 } }, .{ "These'll", SpecialCase{ .tokens = .{ "These", "'ll", "" }, .len = 2 } }, .{ "These'll've", SpecialCase{ .tokens = .{ "These", "'ll", "'ve" }, .len = 3 } }, .{ "These're", SpecialCase{ .tokens = .{ "These", "'re", "" }, .len = 2 } }, .{ "These've", SpecialCase{ .tokens = .{ "These", "'ve", "" }, .len = 2 } }, .{ "Thesed", SpecialCase{ .tokens = .{ "These", "d", "" }, .len = 2 } }, .{ "Thesedve", SpecialCase{ .tokens = .{ "These", "d", "ve" }, .len = 3 } }, .{ "Thesell", SpecialCase{ .tokens = .{ "These", "ll", "" }, .len = 2 } }, .{ "Thesellve", SpecialCase{ .tokens = .{ "These", "ll", "ve" }, .len = 3 } }, .{ "Thesere", SpecialCase{ .tokens = .{ "These", "re", "" }, .len = 2 } }, .{ "Theseve", SpecialCase{ .tokens = .{ "These", "ve", "" }, .len = 2 } }, .{ "These\xe2\x80\x99d", SpecialCase{ .tokens = .{ "These", "\xe2\x80\x99d", "" }, .len = 2 } }, .{ "These\xe2\x80\x99d\xe2\x80\x99ve", SpecialCase{ .tokens = .{ "These", "\xe2\x80\x99d", "\xe2\x80\x99ve" }, .len = 3 } }, .{ "These\xe2\x80\x99ll", SpecialCase{ .tokens = .{ "These", "\xe2\x80\x99ll", "" }, .len = 2 } }, .{ "These\xe2\x80\x99ll\xe2\x80\x99ve", SpecialCase{ .tokens = .{ "These", "\xe2\x80\x99ll", "\xe2\x80\x99ve" }, .len = 3 } }, .{ "These\xe2\x80\x99re", SpecialCase{ .tokens = .{ "These", "\xe2\x80\x99re", "" }, .len = 2 } }, .{ "These\xe2\x80\x99ve", SpecialCase{ .tokens = .{ "These", "\xe2\x80\x99ve", "" }, .len = 2 } }, .{ "They'd", SpecialCase{ .tokens = .{ "They", "'d", "" }, .len = 2 } }, .{ "They'd've", SpecialCase{ .tokens = .{ "They", "'d", "'ve" }, .len = 3 } }, .{ "They'll", SpecialCase{ .tokens = .{ "They", "'ll", "" }, .len = 2 } }, .{ "They'll've", SpecialCase{ .tokens = .{ "They", "'ll", "'ve" }, .len = 3 } }, .{ "They're", SpecialCase{ .tokens = .{ "They", "'re", "" }, .len = 2 } }, .{ "They've", SpecialCase{ .tokens = .{ "They", "'ve", "" }, .len = 2 } }, .{ "Theyd", SpecialCase{ .tokens = .{ "They", "d", "" }, .len = 2 } }, .{ "Theydve", SpecialCase{ .tokens = .{ "They", "d", "ve" }, .len = 3 } }, .{ "Theyll", SpecialCase{ .tokens = .{ "They", "ll", "" }, .len = 2 } }, .{ "Theyllve", SpecialCase{ .tokens = .{ "They", "ll", "ve" }, .len = 3 } }, .{ "Theyre", SpecialCase{ .tokens = .{ "They", "re", "" }, .len = 2 } }, .{ "Theyve", SpecialCase{ .tokens = .{ "They", "ve", "" }, .len = 2 } }, .{ "They\xe2\x80\x99d", SpecialCase{ .tokens = .{ "They", "\xe2\x80\x99d", "" }, .len = 2 } }, .{ "They\xe2\x80\x99d\xe2\x80\x99ve", SpecialCase{ .tokens = .{ "They", "\xe2\x80\x99d", "\xe2\x80\x99ve" }, .len = 3 } }, .{ "They\xe2\x80\x99ll", SpecialCase{ .tokens = .{ "They", "\xe2\x80\x99ll", "" }, .len = 2 } }, .{ "They\xe2\x80\x99ll\xe2\x80\x99ve", SpecialCase{ .tokens = .{ "They", "\xe2\x80\x99ll", "\xe2\x80\x99ve" }, .len = 3 } }, .{ "They\xe2\x80\x99re", SpecialCase{ .tokens = .{ "They", "\xe2\x80\x99re", "" }, .len = 2 } }, .{ "They\xe2\x80\x99ve", SpecialCase{ .tokens = .{ "They", "\xe2\x80\x99ve", "" }, .len = 2 } }, .{ "This'd", SpecialCase{ .tokens = .{ "This", "'d", "" }, .len = 2 } }, .{ "This'd've", SpecialCase{ .tokens = .{ "This", "'d", "'ve" }, .len = 3 } }, .{ "This'll", SpecialCase{ .tokens = .{ "This", "'ll", "" }, .len = 2 } }, .{ "This'll've", SpecialCase{ .tokens = .{ "This", "'ll", "'ve" }, .len = 3 } }, .{ "This's", SpecialCase{ .tokens = .{ "This", "'s", "" }, .len = 2 } }, .{ "Thisd", SpecialCase{ .tokens = .{ "This", "d", "" }, .len = 2 } }, .{ "Thisdve", SpecialCase{ .tokens = .{ "This", "d", "ve" }, .len = 3 } }, .{ "Thisll", SpecialCase{ .tokens = .{ "This", "ll", "" }, .len = 2 } }, .{ "Thisllve", SpecialCase{ .tokens = .{ "This", "ll", "ve" }, .len = 3 } }, .{ "Thiss", SpecialCase{ .tokens = .{ "This", "s", "" }, .len = 2 } }, .{ "This\xe2\x80\x99d", SpecialCase{ .tokens = .{ "This", "\xe2\x80\x99d", "" }, .len = 2 } }, .{ "This\xe2\x80\x99d\xe2\x80\x99ve", SpecialCase{ .tokens = .{ "This", "\xe2\x80\x99d", "\xe2\x80\x99ve" }, .len = 3 } }, .{ "This\xe2\x80\x99ll", SpecialCase{ .tokens = .{ "This", "\xe2\x80\x99ll", "" }, .len = 2 } }, .{ "This\xe2\x80\x99ll\xe2\x80\x99ve", SpecialCase{ .tokens = .{ "This", "\xe2\x80\x99ll", "\xe2\x80\x99ve" }, .len = 3 } }, .{ "This\xe2\x80\x99s", SpecialCase{ .tokens = .{ "This", "\xe2\x80\x99s", "" }, .len = 2 } }, .{ "Those'd", SpecialCase{ .tokens = .{ "Those", "'d", "" }, .len = 2 } }, .{ "Those'd've", SpecialCase{ .tokens = .{ "Those", "'d", "'ve" }, .len = 3 } }, .{ "Those'll", SpecialCase{ .tokens = .{ "Those", "'ll", "" }, .len = 2 } }, .{ "Those'll've", SpecialCase{ .tokens = .{ "Those", "'ll", "'ve" }, .len = 3 } }, .{ "Those're", SpecialCase{ .tokens = .{ "Those", "'re", "" }, .len = 2 } }, .{ "Those've", SpecialCase{ .tokens = .{ "Those", "'ve", "" }, .len = 2 } }, .{ "Thosed", SpecialCase{ .tokens = .{ "Those", "d", "" }, .len = 2 } }, .{ "Thosedve", SpecialCase{ .tokens = .{ "Those", "d", "ve" }, .len = 3 } }, .{ "Thosell", SpecialCase{ .tokens = .{ "Those", "ll", "" }, .len = 2 } }, .{ "Thosellve", SpecialCase{ .tokens = .{ "Those", "ll", "ve" }, .len = 3 } }, .{ "Thosere", SpecialCase{ .tokens = .{ "Those", "re", "" }, .len = 2 } }, .{ "Thoseve", SpecialCase{ .tokens = .{ "Those", "ve", "" }, .len = 2 } }, .{ "Those\xe2\x80\x99d", SpecialCase{ .tokens = .{ "Those", "\xe2\x80\x99d", "" }, .len = 2 } }, .{ "Those\xe2\x80\x99d\xe2\x80\x99ve", SpecialCase{ .tokens = .{ "Those", "\xe2\x80\x99d", "\xe2\x80\x99ve" }, .len = 3 } }, .{ "Those\xe2\x80\x99ll", SpecialCase{ .tokens = .{ "Those", "\xe2\x80\x99ll", "" }, .len = 2 } }, .{ "Those\xe2\x80\x99ll\xe2\x80\x99ve", SpecialCase{ .tokens = .{ "Those", "\xe2\x80\x99ll", "\xe2\x80\x99ve" }, .len = 3 } }, .{ "Those\xe2\x80\x99re", SpecialCase{ .tokens = .{ "Those", "\xe2\x80\x99re", "" }, .len = 2 } }, .{ "Those\xe2\x80\x99ve", SpecialCase{ .tokens = .{ "Those", "\xe2\x80\x99ve", "" }, .len = 2 } }, .{ "V.V", SpecialCase{ .tokens = .{ "V.V", "", "" }, .len = 1 } }, .{ "V_V", SpecialCase{ .tokens = .{ "V_V", "", "" }, .len = 1 } }, .{ "Va.", SpecialCase{ .tokens = .{ "Va.", "", "" }, .len = 1 } }, .{ "Wash.", SpecialCase{ .tokens = .{ "Wash.", "", "" }, .len = 1 } }, .{ "Wasn't", SpecialCase{ .tokens = .{ "Was", "n't", "" }, .len = 2 } }, .{ "Wasnt", SpecialCase{ .tokens = .{ "Was", "nt", "" }, .len = 2 } }, .{ "Wasn\xe2\x80\x99t", SpecialCase{ .tokens = .{ "Was", "n\xe2\x80\x99t", "" }, .len = 2 } }, .{ "We'd", SpecialCase{ .tokens = .{ "We", "'d", "" }, .len = 2 } }, .{ "We'd've", SpecialCase{ .tokens = .{ "We", "'d", "'ve" }, .len = 3 } }, .{ "We'll", SpecialCase{ .tokens = .{ "We", "'ll", "" }, .len = 2 } }, .{ "We'll've", SpecialCase{ .tokens = .{ "We", "'ll", "'ve" }, .len = 3 } }, .{ "We're", SpecialCase{ .tokens = .{ "We", "'re", "" }, .len = 2 } }, .{ "We've", SpecialCase{ .tokens = .{ "We", "'ve", "" }, .len = 2 } }, .{ "Wed", SpecialCase{ .tokens = .{ "We", "d", "" }, .len = 2 } }, .{ "Wedve", SpecialCase{ .tokens = .{ "We", "d", "ve" }, .len = 3 } }, .{ "Wellve", SpecialCase{ .tokens = .{ "We", "ll", "ve" }, .len = 3 } }, .{ "Weren't", SpecialCase{ .tokens = .{ "Were", "n't", "" }, .len = 2 } }, .{ "Werent", SpecialCase{ .tokens = .{ "Were", "nt", "" }, .len = 2 } }, .{ "Weren\xe2\x80\x99t", SpecialCase{ .tokens = .{ "Were", "n\xe2\x80\x99t", "" }, .len = 2 } }, .{ "Weve", SpecialCase{ .tokens = .{ "We", "ve", "" }, .len = 2 } }, .{ "We\xe2\x80\x99d", SpecialCase{ .tokens = .{ "We", "\xe2\x80\x99d", "" }, .len = 2 } }, .{ "We\xe2\x80\x99d\xe2\x80\x99ve", SpecialCase{ .tokens = .{ "We", "\xe2\x80\x99d", "\xe2\x80\x99ve" }, .len = 3 } }, .{ "We\xe2\x80\x99ll", SpecialCase{ .tokens = .{ "We", "\xe2\x80\x99ll", "" }, .len = 2 } }, .{ "We\xe2\x80\x99ll\xe2\x80\x99ve", SpecialCase{ .tokens = .{ "We", "\xe2\x80\x99ll", "\xe2\x80\x99ve" }, .len = 3 } }, .{ "We\xe2\x80\x99re", SpecialCase{ .tokens = .{ "We", "\xe2\x80\x99re", "" }, .len = 2 } }, .{ "We\xe2\x80\x99ve", SpecialCase{ .tokens = .{ "We", "\xe2\x80\x99ve", "" }, .len = 2 } }, .{ "What'd", SpecialCase{ .tokens = .{ "What", "'d", "" }, .len = 2 } }, .{ "What'd've", SpecialCase{ .tokens = .{ "What", "'d", "'ve" }, .len = 3 } }, .{ "What'll", SpecialCase{ .tokens = .{ "What", "'ll", "" }, .len = 2 } }, .{ "What'll've", SpecialCase{ .tokens = .{ "What", "'ll", "'ve" }, .len = 3 } }, .{ "What're", SpecialCase{ .tokens = .{ "What", "'re", "" }, .len = 2 } }, .{ "What's", SpecialCase{ .tokens = .{ "What", "'s", "" }, .len = 2 } }, .{ "What've", SpecialCase{ .tokens = .{ "What", "'ve", "" }, .len = 2 } }, .{ "Whatd", SpecialCase{ .tokens = .{ "What", "d", "" }, .len = 2 } }, .{ "Whatdve", SpecialCase{ .tokens = .{ "What", "d", "ve" }, .len = 3 } }, .{ "Whatll", SpecialCase{ .tokens = .{ "What", "ll", "" }, .len = 2 } }, .{ "Whatllve", SpecialCase{ .tokens = .{ "What", "ll", "ve" }, .len = 3 } }, .{ "Whatre", SpecialCase{ .tokens = .{ "What", "re", "" }, .len = 2 } }, .{ "Whats", SpecialCase{ .tokens = .{ "What", "s", "" }, .len = 2 } }, .{ "Whatve", SpecialCase{ .tokens = .{ "What", "ve", "" }, .len = 2 } }, .{ "What\xe2\x80\x99d", SpecialCase{ .tokens = .{ "What", "\xe2\x80\x99d", "" }, .len = 2 } }, .{ "What\xe2\x80\x99d\xe2\x80\x99ve", SpecialCase{ .tokens = .{ "What", "\xe2\x80\x99d", "\xe2\x80\x99ve" }, .len = 3 } }, .{ "What\xe2\x80\x99ll", SpecialCase{ .tokens = .{ "What", "\xe2\x80\x99ll", "" }, .len = 2 } }, .{ "What\xe2\x80\x99ll\xe2\x80\x99ve", SpecialCase{ .tokens = .{ "What", "\xe2\x80\x99ll", "\xe2\x80\x99ve" }, .len = 3 } }, .{ "What\xe2\x80\x99re", SpecialCase{ .tokens = .{ "What", "\xe2\x80\x99re", "" }, .len = 2 } }, .{ "What\xe2\x80\x99s", SpecialCase{ .tokens = .{ "What", "\xe2\x80\x99s", "" }, .len = 2 } }, .{ "What\xe2\x80\x99ve", SpecialCase{ .tokens = .{ "What", "\xe2\x80\x99ve", "" }, .len = 2 } }, .{ "When'd", SpecialCase{ .tokens = .{ "When", "'d", "" }, .len = 2 } }, .{ "When'd've", SpecialCase{ .tokens = .{ "When", "'d", "'ve" }, .len = 3 } }, .{ "When'll", SpecialCase{ .tokens = .{ "When", "'ll", "" }, .len = 2 } }, .{ "When'll've", SpecialCase{ .tokens = .{ "When", "'ll", "'ve" }, .len = 3 } }, .{ "When're", SpecialCase{ .tokens = .{ "When", "'re", "" }, .len = 2 } }, .{ "When's", SpecialCase{ .tokens = .{ "When", "'s", "" }, .len = 2 } }, .{ "When've", SpecialCase{ .tokens = .{ "When", "'ve", "" }, .len = 2 } }, .{ "Whend", SpecialCase{ .tokens = .{ "When", "d", "" }, .len = 2 } }, .{ "Whendve", SpecialCase{ .tokens = .{ "When", "d", "ve" }, .len = 3 } }, .{ "Whenll", SpecialCase{ .tokens = .{ "When", "ll", "" }, .len = 2 } }, .{ "Whenllve", SpecialCase{ .tokens = .{ "When", "ll", "ve" }, .len = 3 } }, .{ "Whenre", SpecialCase{ .tokens = .{ "When", "re", "" }, .len = 2 } }, .{ "Whens", SpecialCase{ .tokens = .{ "When", "s", "" }, .len = 2 } }, .{ "Whenve", SpecialCase{ .tokens = .{ "When", "ve", "" }, .len = 2 } }, .{ "When\xe2\x80\x99d", SpecialCase{ .tokens = .{ "When", "\xe2\x80\x99d", "" }, .len = 2 } }, .{ "When\xe2\x80\x99d\xe2\x80\x99ve", SpecialCase{ .tokens = .{ "When", "\xe2\x80\x99d", "\xe2\x80\x99ve" }, .len = 3 } }, .{ "When\xe2\x80\x99ll", SpecialCase{ .tokens = .{ "When", "\xe2\x80\x99ll", "" }, .len = 2 } }, .{ "When\xe2\x80\x99ll\xe2\x80\x99ve", SpecialCase{ .tokens = .{ "When", "\xe2\x80\x99ll", "\xe2\x80\x99ve" }, .len = 3 } }, .{ "When\xe2\x80\x99re", SpecialCase{ .tokens = .{ "When", "\xe2\x80\x99re", "" }, .len = 2 } }, .{ "When\xe2\x80\x99s", SpecialCase{ .tokens = .{ "When", "\xe2\x80\x99s", "" }, .len = 2 } }, .{ "When\xe2\x80\x99ve", SpecialCase{ .tokens = .{ "When", "\xe2\x80\x99ve", "" }, .len = 2 } }, .{ "Where'd", SpecialCase{ .tokens = .{ "Where", "'d", "" }, .len = 2 } }, .{ "Where'd've", SpecialCase{ .tokens = .{ "Where", "'d", "'ve" }, .len = 3 } }, .{ "Where'll", SpecialCase{ .tokens = .{ "Where", "'ll", "" }, .len = 2 } }, .{ "Where'll've", SpecialCase{ .tokens = .{ "Where", "'ll", "'ve" }, .len = 3 } }, .{ "Where're", SpecialCase{ .tokens = .{ "Where", "'re", "" }, .len = 2 } }, .{ "Where's", SpecialCase{ .tokens = .{ "Where", "'s", "" }, .len = 2 } }, .{ "Where've", SpecialCase{ .tokens = .{ "Where", "'ve", "" }, .len = 2 } }, .{ "Whered", SpecialCase{ .tokens = .{ "Where", "d", "" }, .len = 2 } }, .{ "Wheredve", SpecialCase{ .tokens = .{ "Where", "d", "ve" }, .len = 3 } }, .{ "Wherell", SpecialCase{ .tokens = .{ "Where", "ll", "" }, .len = 2 } }, .{ "Wherellve", SpecialCase{ .tokens = .{ "Where", "ll", "ve" }, .len = 3 } }, .{ "Wherere", SpecialCase{ .tokens = .{ "Where", "re", "" }, .len = 2 } }, .{ "Wheres", SpecialCase{ .tokens = .{ "Where", "s", "" }, .len = 2 } }, .{ "Whereve", SpecialCase{ .tokens = .{ "Where", "ve", "" }, .len = 2 } }, .{ "Where\xe2\x80\x99d", SpecialCase{ .tokens = .{ "Where", "\xe2\x80\x99d", "" }, .len = 2 } }, .{ "Where\xe2\x80\x99d\xe2\x80\x99ve", SpecialCase{ .tokens = .{ "Where", "\xe2\x80\x99d", "\xe2\x80\x99ve" }, .len = 3 } }, .{ "Where\xe2\x80\x99ll", SpecialCase{ .tokens = .{ "Where", "\xe2\x80\x99ll", "" }, .len = 2 } }, .{ "Where\xe2\x80\x99ll\xe2\x80\x99ve", SpecialCase{ .tokens = .{ "Where", "\xe2\x80\x99ll", "\xe2\x80\x99ve" }, .len = 3 } }, .{ "Where\xe2\x80\x99re", SpecialCase{ .tokens = .{ "Where", "\xe2\x80\x99re", "" }, .len = 2 } }, .{ "Where\xe2\x80\x99s", SpecialCase{ .tokens = .{ "Where", "\xe2\x80\x99s", "" }, .len = 2 } }, .{ "Where\xe2\x80\x99ve", SpecialCase{ .tokens = .{ "Where", "\xe2\x80\x99ve", "" }, .len = 2 } }, .{ "Who'd", SpecialCase{ .tokens = .{ "Who", "'d", "" }, .len = 2 } }, .{ "Who'd've", SpecialCase{ .tokens = .{ "Who", "'d", "'ve" }, .len = 3 } }, .{ "Who'll", SpecialCase{ .tokens = .{ "Who", "'ll", "" }, .len = 2 } }, .{ "Who'll've", SpecialCase{ .tokens = .{ "Who", "'ll", "'ve" }, .len = 3 } }, .{ "Who're", SpecialCase{ .tokens = .{ "Who", "'re", "" }, .len = 2 } }, .{ "Who's", SpecialCase{ .tokens = .{ "Who", "'s", "" }, .len = 2 } }, .{ "Who've", SpecialCase{ .tokens = .{ "Who", "'ve", "" }, .len = 2 } }, .{ "Whod", SpecialCase{ .tokens = .{ "Who", "d", "" }, .len = 2 } }, .{ "Whodve", SpecialCase{ .tokens = .{ "Who", "d", "ve" }, .len = 3 } }, .{ "Wholl", SpecialCase{ .tokens = .{ "Who", "ll", "" }, .len = 2 } }, .{ "Whollve", SpecialCase{ .tokens = .{ "Who", "ll", "ve" }, .len = 3 } }, .{ "Whos", SpecialCase{ .tokens = .{ "Who", "s", "" }, .len = 2 } }, .{ "Whove", SpecialCase{ .tokens = .{ "Who", "ve", "" }, .len = 2 } }, .{ "Who\xe2\x80\x99d", SpecialCase{ .tokens = .{ "Who", "\xe2\x80\x99d", "" }, .len = 2 } }, .{ "Who\xe2\x80\x99d\xe2\x80\x99ve", SpecialCase{ .tokens = .{ "Who", "\xe2\x80\x99d", "\xe2\x80\x99ve" }, .len = 3 } }, .{ "Who\xe2\x80\x99ll", SpecialCase{ .tokens = .{ "Who", "\xe2\x80\x99ll", "" }, .len = 2 } }, .{ "Who\xe2\x80\x99ll\xe2\x80\x99ve", SpecialCase{ .tokens = .{ "Who", "\xe2\x80\x99ll", "\xe2\x80\x99ve" }, .len = 3 } }, .{ "Who\xe2\x80\x99re", SpecialCase{ .tokens = .{ "Who", "\xe2\x80\x99re", "" }, .len = 2 } }, .{ "Who\xe2\x80\x99s", SpecialCase{ .tokens = .{ "Who", "\xe2\x80\x99s", "" }, .len = 2 } }, .{ "Who\xe2\x80\x99ve", SpecialCase{ .tokens = .{ "Who", "\xe2\x80\x99ve", "" }, .len = 2 } }, .{ "Why'd", SpecialCase{ .tokens = .{ "Why", "'d", "" }, .len = 2 } }, .{ "Why'd've", SpecialCase{ .tokens = .{ "Why", "'d", "'ve" }, .len = 3 } }, .{ "Why'll", SpecialCase{ .tokens = .{ "Why", "'ll", "" }, .len = 2 } }, .{ "Why'll've", SpecialCase{ .tokens = .{ "Why", "'ll", "'ve" }, .len = 3 } }, .{ "Why're", SpecialCase{ .tokens = .{ "Why", "'re", "" }, .len = 2 } }, .{ "Why's", SpecialCase{ .tokens = .{ "Why", "'s", "" }, .len = 2 } }, .{ "Why've", SpecialCase{ .tokens = .{ "Why", "'ve", "" }, .len = 2 } }, .{ "Whyd", SpecialCase{ .tokens = .{ "Why", "d", "" }, .len = 2 } }, .{ "Whydve", SpecialCase{ .tokens = .{ "Why", "d", "ve" }, .len = 3 } }, .{ "Whyll", SpecialCase{ .tokens = .{ "Why", "ll", "" }, .len = 2 } }, .{ "Whyllve", SpecialCase{ .tokens = .{ "Why", "ll", "ve" }, .len = 3 } }, .{ "Whyre", SpecialCase{ .tokens = .{ "Why", "re", "" }, .len = 2 } }, .{ "Whys", SpecialCase{ .tokens = .{ "Why", "s", "" }, .len = 2 } }, .{ "Whyve", SpecialCase{ .tokens = .{ "Why", "ve", "" }, .len = 2 } }, .{ "Why\xe2\x80\x99d", SpecialCase{ .tokens = .{ "Why", "\xe2\x80\x99d", "" }, .len = 2 } }, .{ "Why\xe2\x80\x99d\xe2\x80\x99ve", SpecialCase{ .tokens = .{ "Why", "\xe2\x80\x99d", "\xe2\x80\x99ve" }, .len = 3 } }, .{ "Why\xe2\x80\x99ll", SpecialCase{ .tokens = .{ "Why", "\xe2\x80\x99ll", "" }, .len = 2 } }, .{ "Why\xe2\x80\x99ll\xe2\x80\x99ve", SpecialCase{ .tokens = .{ "Why", "\xe2\x80\x99ll", "\xe2\x80\x99ve" }, .len = 3 } }, .{ "Why\xe2\x80\x99re", SpecialCase{ .tokens = .{ "Why", "\xe2\x80\x99re", "" }, .len = 2 } }, .{ "Why\xe2\x80\x99s", SpecialCase{ .tokens = .{ "Why", "\xe2\x80\x99s", "" }, .len = 2 } }, .{ "Why\xe2\x80\x99ve", SpecialCase{ .tokens = .{ "Why", "\xe2\x80\x99ve", "" }, .len = 2 } }, .{ "Wis.", SpecialCase{ .tokens = .{ "Wis.", "", "" }, .len = 1 } }, .{ "Won't", SpecialCase{ .tokens = .{ "Wo", "n't", "" }, .len = 2 } }, .{ "Won't've", SpecialCase{ .tokens = .{ "Wo", "n't", "'ve" }, .len = 3 } }, .{ "Wont", SpecialCase{ .tokens = .{ "Wo", "nt", "" }, .len = 2 } }, .{ "Wontve", SpecialCase{ .tokens = .{ "Wo", "nt", "ve" }, .len = 3 } }, .{ "Won\xe2\x80\x99t", SpecialCase{ .tokens = .{ "Wo", "n\xe2\x80\x99t", "" }, .len = 2 } }, .{ "Won\xe2\x80\x99t\xe2\x80\x99ve", SpecialCase{ .tokens = .{ "Wo", "n\xe2\x80\x99t", "\xe2\x80\x99ve" }, .len = 3 } }, .{ "Would've", SpecialCase{ .tokens = .{ "Would", "'ve", "" }, .len = 2 } }, .{ "Wouldn't", SpecialCase{ .tokens = .{ "Would", "n't", "" }, .len = 2 } }, .{ "Wouldn't've", SpecialCase{ .tokens = .{ "Would", "n't", "'ve" }, .len = 3 } }, .{ "Wouldnt", SpecialCase{ .tokens = .{ "Would", "nt", "" }, .len = 2 } }, .{ "Wouldntve", SpecialCase{ .tokens = .{ "Would", "nt", "ve" }, .len = 3 } }, .{ "Wouldn\xe2\x80\x99t", SpecialCase{ .tokens = .{ "Would", "n\xe2\x80\x99t", "" }, .len = 2 } }, .{ "Wouldn\xe2\x80\x99t\xe2\x80\x99ve", SpecialCase{ .tokens = .{ "Would", "n\xe2\x80\x99t", "\xe2\x80\x99ve" }, .len = 3 } }, .{ "Wouldve", SpecialCase{ .tokens = .{ "Would", "ve", "" }, .len = 2 } }, .{ "Would\xe2\x80\x99ve", SpecialCase{ .tokens = .{ "Would", "\xe2\x80\x99ve", "" }, .len = 2 } }, .{ "XD", SpecialCase{ .tokens = .{ "XD", "", "" }, .len = 1 } }, .{ "XDD", SpecialCase{ .tokens = .{ "XDD", "", "" }, .len = 1 } }, .{ "You'd", SpecialCase{ .tokens = .{ "You", "'d", "" }, .len = 2 } }, .{ "You'd've", SpecialCase{ .tokens = .{ "You", "'d", "'ve" }, .len = 3 } }, .{ "You'll", SpecialCase{ .tokens = .{ "You", "'ll", "" }, .len = 2 } }, .{ "You'll've", SpecialCase{ .tokens = .{ "You", "'ll", "'ve" }, .len = 3 } }, .{ "You're", SpecialCase{ .tokens = .{ "You", "'re", "" }, .len = 2 } }, .{ "You've", SpecialCase{ .tokens = .{ "You", "'ve", "" }, .len = 2 } }, .{ "Youd", SpecialCase{ .tokens = .{ "You", "d", "" }, .len = 2 } }, .{ "Youdve", SpecialCase{ .tokens = .{ "You", "d", "ve" }, .len = 3 } }, .{ "Youll", SpecialCase{ .tokens = .{ "You", "ll", "" }, .len = 2 } }, .{ "Youllve", SpecialCase{ .tokens = .{ "You", "ll", "ve" }, .len = 3 } }, .{ "Youre", SpecialCase{ .tokens = .{ "You", "re", "" }, .len = 2 } }, .{ "Youve", SpecialCase{ .tokens = .{ "You", "ve", "" }, .len = 2 } }, .{ "You\xe2\x80\x99d", SpecialCase{ .tokens = .{ "You", "\xe2\x80\x99d", "" }, .len = 2 } }, .{ "You\xe2\x80\x99d\xe2\x80\x99ve", SpecialCase{ .tokens = .{ "You", "\xe2\x80\x99d", "\xe2\x80\x99ve" }, .len = 3 } }, .{ "You\xe2\x80\x99ll", SpecialCase{ .tokens = .{ "You", "\xe2\x80\x99ll", "" }, .len = 2 } }, .{ "You\xe2\x80\x99ll\xe2\x80\x99ve", SpecialCase{ .tokens = .{ "You", "\xe2\x80\x99ll", "\xe2\x80\x99ve" }, .len = 3 } }, .{ "You\xe2\x80\x99re", SpecialCase{ .tokens = .{ "You", "\xe2\x80\x99re", "" }, .len = 2 } }, .{ "You\xe2\x80\x99ve", SpecialCase{ .tokens = .{ "You", "\xe2\x80\x99ve", "" }, .len = 2 } }, .{ "[-:", SpecialCase{ .tokens = .{ "[-:", "", "" }, .len = 1 } }, .{ "[:", SpecialCase{ .tokens = .{ "[:", "", "" }, .len = 1 } }, .{ "[=", SpecialCase{ .tokens = .{ "[=", "", "" }, .len = 1 } }, .{ "\\\")", SpecialCase{ .tokens = .{ "\\\")", "", "" }, .len = 1 } }, .{ "\\n", SpecialCase{ .tokens = .{ "\\n", "", "" }, .len = 1 } }, .{ "\\t", SpecialCase{ .tokens = .{ "\\t", "", "" }, .len = 1 } }, .{ "]=", SpecialCase{ .tokens = .{ "]=", "", "" }, .len = 1 } }, .{ "^_^", SpecialCase{ .tokens = .{ "^_^", "", "" }, .len = 1 } }, .{ "^__^", SpecialCase{ .tokens = .{ "^__^", "", "" }, .len = 1 } }, .{ "^___^", SpecialCase{ .tokens = .{ "^___^", "", "" }, .len = 1 } }, .{ "a.", SpecialCase{ .tokens = .{ "a.", "", "" }, .len = 1 } }, .{ "a.m.", SpecialCase{ .tokens = .{ "a.m.", "", "" }, .len = 1 } }, .{ "ain't", SpecialCase{ .tokens = .{ "ai", "n't", "" }, .len = 2 } }, .{ "aint", SpecialCase{ .tokens = .{ "ai", "nt", "" }, .len = 2 } }, .{ "ain\xe2\x80\x99t", SpecialCase{ .tokens = .{ "ai", "n\xe2\x80\x99t", "" }, .len = 2 } }, .{ "and/or", SpecialCase{ .tokens = .{ "and/or", "", "" }, .len = 1 } }, .{ "aren't", SpecialCase{ .tokens = .{ "are", "n't", "" }, .len = 2 } }, .{ "arent", SpecialCase{ .tokens = .{ "are", "nt", "" }, .len = 2 } }, .{ "aren\xe2\x80\x99t", SpecialCase{ .tokens = .{ "are", "n\xe2\x80\x99t", "" }, .len = 2 } }, .{ "b.", SpecialCase{ .tokens = .{ "b.", "", "" }, .len = 1 } }, .{ "c'mon", SpecialCase{ .tokens = .{ "c'm", "on", "" }, .len = 2 } }, .{ "c.", SpecialCase{ .tokens = .{ "c.", "", "" }, .len = 1 } }, .{ "can't", SpecialCase{ .tokens = .{ "ca", "n't", "" }, .len = 2 } }, .{ "can't've", SpecialCase{ .tokens = .{ "ca", "n't", "'ve" }, .len = 3 } }, .{ "cannot", SpecialCase{ .tokens = .{ "can", "not", "" }, .len = 2 } }, .{ "cant", SpecialCase{ .tokens = .{ "ca", "nt", "" }, .len = 2 } }, .{ "cantve", SpecialCase{ .tokens = .{ "ca", "nt", "ve" }, .len = 3 } }, .{ "can\xe2\x80\x99t", SpecialCase{ .tokens = .{ "ca", "n\xe2\x80\x99t", "" }, .len = 2 } }, .{ "can\xe2\x80\x99t\xe2\x80\x99ve", SpecialCase{ .tokens = .{ "ca", "n\xe2\x80\x99t", "\xe2\x80\x99ve" }, .len = 3 } }, .{ "co.", SpecialCase{ .tokens = .{ "co.", "", "" }, .len = 1 } }, .{ "could've", SpecialCase{ .tokens = .{ "could", "'ve", "" }, .len = 2 } }, .{ "couldn't", SpecialCase{ .tokens = .{ "could", "n't", "" }, .len = 2 } }, .{ "couldn't've", SpecialCase{ .tokens = .{ "could", "n't", "'ve" }, .len = 3 } }, .{ "couldnt", SpecialCase{ .tokens = .{ "could", "nt", "" }, .len = 2 } }, .{ "couldntve", SpecialCase{ .tokens = .{ "could", "nt", "ve" }, .len = 3 } }, .{ "couldn\xe2\x80\x99t", SpecialCase{ .tokens = .{ "could", "n\xe2\x80\x99t", "" }, .len = 2 } }, .{ "couldn\xe2\x80\x99t\xe2\x80\x99ve", SpecialCase{ .tokens = .{ "could", "n\xe2\x80\x99t", "\xe2\x80\x99ve" }, .len = 3 } }, .{ "couldve", SpecialCase{ .tokens = .{ "could", "ve", "" }, .len = 2 } }, .{ "could\xe2\x80\x99ve", SpecialCase{ .tokens = .{ "could", "\xe2\x80\x99ve", "" }, .len = 2 } }, .{ "c\xe2\x80\x99mon", SpecialCase{ .tokens = .{ "c\xe2\x80\x99m", "on", "" }, .len = 2 } }, .{ "d.", SpecialCase{ .tokens = .{ "d.", "", "" }, .len = 1 } }, .{ "daren't", SpecialCase{ .tokens = .{ "dare", "n't", "" }, .len = 2 } }, .{ "darent", SpecialCase{ .tokens = .{ "dare", "nt", "" }, .len = 2 } }, .{ "daren\xe2\x80\x99t", SpecialCase{ .tokens = .{ "dare", "n\xe2\x80\x99t", "" }, .len = 2 } }, .{ "didn't", SpecialCase{ .tokens = .{ "did", "n't", "" }, .len = 2 } }, .{ "didn't've", SpecialCase{ .tokens = .{ "did", "n't", "'ve" }, .len = 3 } }, .{ "didnt", SpecialCase{ .tokens = .{ "did", "nt", "" }, .len = 2 } }, .{ "didntve", SpecialCase{ .tokens = .{ "did", "nt", "ve" }, .len = 3 } }, .{ "didn\xe2\x80\x99t", SpecialCase{ .tokens = .{ "did", "n\xe2\x80\x99t", "" }, .len = 2 } }, .{ "didn\xe2\x80\x99t\xe2\x80\x99ve", SpecialCase{ .tokens = .{ "did", "n\xe2\x80\x99t", "\xe2\x80\x99ve" }, .len = 3 } }, .{ "doesn't", SpecialCase{ .tokens = .{ "does", "n't", "" }, .len = 2 } }, .{ "doesn't've", SpecialCase{ .tokens = .{ "does", "n't", "'ve" }, .len = 3 } }, .{ "doesnt", SpecialCase{ .tokens = .{ "does", "nt", "" }, .len = 2 } }, .{ "doesntve", SpecialCase{ .tokens = .{ "does", "nt", "ve" }, .len = 3 } }, .{ "doesn\xe2\x80\x99t", SpecialCase{ .tokens = .{ "does", "n\xe2\x80\x99t", "" }, .len = 2 } }, .{ "doesn\xe2\x80\x99t\xe2\x80\x99ve", SpecialCase{ .tokens = .{ "does", "n\xe2\x80\x99t", "\xe2\x80\x99ve" }, .len = 3 } }, .{ "doin", SpecialCase{ .tokens = .{ "doin", "", "" }, .len = 1 } }, .{ "doin'", SpecialCase{ .tokens = .{ "doin'", "", "" }, .len = 1 } }, .{ "doin\xe2\x80\x99", SpecialCase{ .tokens = .{ "doin\xe2\x80\x99", "", "" }, .len = 1 } }, .{ "don't", SpecialCase{ .tokens = .{ "do", "n't", "" }, .len = 2 } }, .{ "don't've", SpecialCase{ .tokens = .{ "do", "n't", "'ve" }, .len = 3 } }, .{ "dont", SpecialCase{ .tokens = .{ "do", "nt", "" }, .len = 2 } }, .{ "dontve", SpecialCase{ .tokens = .{ "do", "nt", "ve" }, .len = 3 } }, .{ "don\xe2\x80\x99t", SpecialCase{ .tokens = .{ "do", "n\xe2\x80\x99t", "" }, .len = 2 } }, .{ "don\xe2\x80\x99t\xe2\x80\x99ve", SpecialCase{ .tokens = .{ "do", "n\xe2\x80\x99t", "\xe2\x80\x99ve" }, .len = 3 } }, .{ "e.", SpecialCase{ .tokens = .{ "e.", "", "" }, .len = 1 } }, .{ "e.g.", SpecialCase{ .tokens = .{ "e.g.", "", "" }, .len = 1 } }, .{ "em", SpecialCase{ .tokens = .{ "em", "", "" }, .len = 1 } }, .{ "f.", SpecialCase{ .tokens = .{ "f.", "", "" }, .len = 1 } }, .{ "g.", SpecialCase{ .tokens = .{ "g.", "", "" }, .len = 1 } }, .{ "goin", SpecialCase{ .tokens = .{ "goin", "", "" }, .len = 1 } }, .{ "goin'", SpecialCase{ .tokens = .{ "goin'", "", "" }, .len = 1 } }, .{ "goin\xe2\x80\x99", SpecialCase{ .tokens = .{ "goin\xe2\x80\x99", "", "" }, .len = 1 } }, .{ "gonna", SpecialCase{ .tokens = .{ "gon", "na", "" }, .len = 2 } }, .{ "gotta", SpecialCase{ .tokens = .{ "got", "ta", "" }, .len = 2 } }, .{ "h.", SpecialCase{ .tokens = .{ "h.", "", "" }, .len = 1 } }, .{ "hadn't", SpecialCase{ .tokens = .{ "had", "n't", "" }, .len = 2 } }, .{ "hadn't've", SpecialCase{ .tokens = .{ "had", "n't", "'ve" }, .len = 3 } }, .{ "hadnt", SpecialCase{ .tokens = .{ "had", "nt", "" }, .len = 2 } }, .{ "hadntve", SpecialCase{ .tokens = .{ "had", "nt", "ve" }, .len = 3 } }, .{ "hadn\xe2\x80\x99t", SpecialCase{ .tokens = .{ "had", "n\xe2\x80\x99t", "" }, .len = 2 } }, .{ "hadn\xe2\x80\x99t\xe2\x80\x99ve", SpecialCase{ .tokens = .{ "had", "n\xe2\x80\x99t", "\xe2\x80\x99ve" }, .len = 3 } }, .{ "hasn't", SpecialCase{ .tokens = .{ "has", "n't", "" }, .len = 2 } }, .{ "hasnt", SpecialCase{ .tokens = .{ "has", "nt", "" }, .len = 2 } }, .{ "hasn\xe2\x80\x99t", SpecialCase{ .tokens = .{ "has", "n\xe2\x80\x99t", "" }, .len = 2 } }, .{ "haven't", SpecialCase{ .tokens = .{ "have", "n't", "" }, .len = 2 } }, .{ "havent", SpecialCase{ .tokens = .{ "have", "nt", "" }, .len = 2 } }, .{ "haven\xe2\x80\x99t", SpecialCase{ .tokens = .{ "have", "n\xe2\x80\x99t", "" }, .len = 2 } }, .{ "havin", SpecialCase{ .tokens = .{ "havin", "", "" }, .len = 1 } }, .{ "havin'", SpecialCase{ .tokens = .{ "havin'", "", "" }, .len = 1 } }, .{ "havin\xe2\x80\x99", SpecialCase{ .tokens = .{ "havin\xe2\x80\x99", "", "" }, .len = 1 } }, .{ "he'd", SpecialCase{ .tokens = .{ "he", "'d", "" }, .len = 2 } }, .{ "he'd've", SpecialCase{ .tokens = .{ "he", "'d", "'ve" }, .len = 3 } }, .{ "he'll", SpecialCase{ .tokens = .{ "he", "'ll", "" }, .len = 2 } }, .{ "he'll've", SpecialCase{ .tokens = .{ "he", "'ll", "'ve" }, .len = 3 } }, .{ "he's", SpecialCase{ .tokens = .{ "he", "'s", "" }, .len = 2 } }, .{ "hed", SpecialCase{ .tokens = .{ "he", "d", "" }, .len = 2 } }, .{ "hedve", SpecialCase{ .tokens = .{ "he", "d", "ve" }, .len = 3 } }, .{ "hellve", SpecialCase{ .tokens = .{ "he", "ll", "ve" }, .len = 3 } }, .{ "hes", SpecialCase{ .tokens = .{ "he", "s", "" }, .len = 2 } }, .{ "he\xe2\x80\x99d", SpecialCase{ .tokens = .{ "he", "\xe2\x80\x99d", "" }, .len = 2 } }, .{ "he\xe2\x80\x99d\xe2\x80\x99ve", SpecialCase{ .tokens = .{ "he", "\xe2\x80\x99d", "\xe2\x80\x99ve" }, .len = 3 } }, .{ "he\xe2\x80\x99ll", SpecialCase{ .tokens = .{ "he", "\xe2\x80\x99ll", "" }, .len = 2 } }, .{ "he\xe2\x80\x99ll\xe2\x80\x99ve", SpecialCase{ .tokens = .{ "he", "\xe2\x80\x99ll", "\xe2\x80\x99ve" }, .len = 3 } }, .{ "he\xe2\x80\x99s", SpecialCase{ .tokens = .{ "he", "\xe2\x80\x99s", "" }, .len = 2 } }, .{ "how'd", SpecialCase{ .tokens = .{ "how", "'d", "" }, .len = 2 } }, .{ "how'd've", SpecialCase{ .tokens = .{ "how", "'d", "'ve" }, .len = 3 } }, .{ "how'd'y", SpecialCase{ .tokens = .{ "how", "'d", "'y" }, .len = 3 } }, .{ "how'll", SpecialCase{ .tokens = .{ "how", "'ll", "" }, .len = 2 } }, .{ "how'll've", SpecialCase{ .tokens = .{ "how", "'ll", "'ve" }, .len = 3 } }, .{ "how're", SpecialCase{ .tokens = .{ "how", "'re", "" }, .len = 2 } }, .{ "how's", SpecialCase{ .tokens = .{ "how", "'s", "" }, .len = 2 } }, .{ "how've", SpecialCase{ .tokens = .{ "how", "'ve", "" }, .len = 2 } }, .{ "howd", SpecialCase{ .tokens = .{ "how", "d", "" }, .len = 2 } }, .{ "howdve", SpecialCase{ .tokens = .{ "how", "d", "ve" }, .len = 3 } }, .{ "howll", SpecialCase{ .tokens = .{ "how", "ll", "" }, .len = 2 } }, .{ "howllve", SpecialCase{ .tokens = .{ "how", "ll", "ve" }, .len = 3 } }, .{ "howre", SpecialCase{ .tokens = .{ "how", "re", "" }, .len = 2 } }, .{ "hows", SpecialCase{ .tokens = .{ "how", "s", "" }, .len = 2 } }, .{ "howve", SpecialCase{ .tokens = .{ "how", "ve", "" }, .len = 2 } }, .{ "how\xe2\x80\x99d", SpecialCase{ .tokens = .{ "how", "\xe2\x80\x99d", "" }, .len = 2 } }, .{ "how\xe2\x80\x99d\xe2\x80\x99ve", SpecialCase{ .tokens = .{ "how", "\xe2\x80\x99d", "\xe2\x80\x99ve" }, .len = 3 } }, .{ "how\xe2\x80\x99d\xe2\x80\x99y", SpecialCase{ .tokens = .{ "how", "\xe2\x80\x99d", "\xe2\x80\x99y" }, .len = 3 } }, .{ "how\xe2\x80\x99ll", SpecialCase{ .tokens = .{ "how", "\xe2\x80\x99ll", "" }, .len = 2 } }, .{ "how\xe2\x80\x99ll\xe2\x80\x99ve", SpecialCase{ .tokens = .{ "how", "\xe2\x80\x99ll", "\xe2\x80\x99ve" }, .len = 3 } }, .{ "how\xe2\x80\x99re", SpecialCase{ .tokens = .{ "how", "\xe2\x80\x99re", "" }, .len = 2 } }, .{ "how\xe2\x80\x99s", SpecialCase{ .tokens = .{ "how", "\xe2\x80\x99s", "" }, .len = 2 } }, .{ "how\xe2\x80\x99ve", SpecialCase{ .tokens = .{ "how", "\xe2\x80\x99ve", "" }, .len = 2 } }, .{ "i'd", SpecialCase{ .tokens = .{ "i", "'d", "" }, .len = 2 } }, .{ "i'd've", SpecialCase{ .tokens = .{ "i", "'d", "'ve" }, .len = 3 } }, .{ "i'll", SpecialCase{ .tokens = .{ "i", "'ll", "" }, .len = 2 } }, .{ "i'll've", SpecialCase{ .tokens = .{ "i", "'ll", "'ve" }, .len = 3 } }, .{ "i'm", SpecialCase{ .tokens = .{ "i", "'m", "" }, .len = 2 } }, .{ "i'ma", SpecialCase{ .tokens = .{ "i", "'m", "a" }, .len = 3 } }, .{ "i've", SpecialCase{ .tokens = .{ "i", "'ve", "" }, .len = 2 } }, .{ "i.", SpecialCase{ .tokens = .{ "i.", "", "" }, .len = 1 } }, .{ "i.e.", SpecialCase{ .tokens = .{ "i.e.", "", "" }, .len = 1 } }, .{ "id", SpecialCase{ .tokens = .{ "i", "d", "" }, .len = 2 } }, .{ "idve", SpecialCase{ .tokens = .{ "i", "d", "ve" }, .len = 3 } }, .{ "illve", SpecialCase{ .tokens = .{ "i", "ll", "ve" }, .len = 3 } }, .{ "im", SpecialCase{ .tokens = .{ "i", "m", "" }, .len = 2 } }, .{ "ima", SpecialCase{ .tokens = .{ "i", "m", "a" }, .len = 3 } }, .{ "isn't", SpecialCase{ .tokens = .{ "is", "n't", "" }, .len = 2 } }, .{ "isnt", SpecialCase{ .tokens = .{ "is", "nt", "" }, .len = 2 } }, .{ "isn\xe2\x80\x99t", SpecialCase{ .tokens = .{ "is", "n\xe2\x80\x99t", "" }, .len = 2 } }, .{ "it'd", SpecialCase{ .tokens = .{ "it", "'d", "" }, .len = 2 } }, .{ "it'd've", SpecialCase{ .tokens = .{ "it", "'d", "'ve" }, .len = 3 } }, .{ "it'll", SpecialCase{ .tokens = .{ "it", "'ll", "" }, .len = 2 } }, .{ "it'll've", SpecialCase{ .tokens = .{ "it", "'ll", "'ve" }, .len = 3 } }, .{ "it's", SpecialCase{ .tokens = .{ "it", "'s", "" }, .len = 2 } }, .{ "itd", SpecialCase{ .tokens = .{ "it", "d", "" }, .len = 2 } }, .{ "itdve", SpecialCase{ .tokens = .{ "it", "d", "ve" }, .len = 3 } }, .{ "itll", SpecialCase{ .tokens = .{ "it", "ll", "" }, .len = 2 } }, .{ "itllve", SpecialCase{ .tokens = .{ "it", "ll", "ve" }, .len = 3 } }, .{ "it\xe2\x80\x99d", SpecialCase{ .tokens = .{ "it", "\xe2\x80\x99d", "" }, .len = 2 } }, .{ "it\xe2\x80\x99d\xe2\x80\x99ve", SpecialCase{ .tokens = .{ "it", "\xe2\x80\x99d", "\xe2\x80\x99ve" }, .len = 3 } }, .{ "it\xe2\x80\x99ll", SpecialCase{ .tokens = .{ "it", "\xe2\x80\x99ll", "" }, .len = 2 } }, .{ "it\xe2\x80\x99ll\xe2\x80\x99ve", SpecialCase{ .tokens = .{ "it", "\xe2\x80\x99ll", "\xe2\x80\x99ve" }, .len = 3 } }, .{ "it\xe2\x80\x99s", SpecialCase{ .tokens = .{ "it", "\xe2\x80\x99s", "" }, .len = 2 } }, .{ "ive", SpecialCase{ .tokens = .{ "i", "ve", "" }, .len = 2 } }, .{ "i\xe2\x80\x99d", SpecialCase{ .tokens = .{ "i", "\xe2\x80\x99d", "" }, .len = 2 } }, .{ "i\xe2\x80\x99d\xe2\x80\x99ve", SpecialCase{ .tokens = .{ "i", "\xe2\x80\x99d", "\xe2\x80\x99ve" }, .len = 3 } }, .{ "i\xe2\x80\x99ll", SpecialCase{ .tokens = .{ "i", "\xe2\x80\x99ll", "" }, .len = 2 } }, .{ "i\xe2\x80\x99ll\xe2\x80\x99ve", SpecialCase{ .tokens = .{ "i", "\xe2\x80\x99ll", "\xe2\x80\x99ve" }, .len = 3 } }, .{ "i\xe2\x80\x99m", SpecialCase{ .tokens = .{ "i", "\xe2\x80\x99m", "" }, .len = 2 } }, .{ "i\xe2\x80\x99ma", SpecialCase{ .tokens = .{ "i", "\xe2\x80\x99m", "a" }, .len = 3 } }, .{ "i\xe2\x80\x99ve", SpecialCase{ .tokens = .{ "i", "\xe2\x80\x99ve", "" }, .len = 2 } }, .{ "j.", SpecialCase{ .tokens = .{ "j.", "", "" }, .len = 1 } }, .{ "k.", SpecialCase{ .tokens = .{ "k.", "", "" }, .len = 1 } }, .{ "l.", SpecialCase{ .tokens = .{ "l.", "", "" }, .len = 1 } }, .{ "let's", SpecialCase{ .tokens = .{ "let", "'s", "" }, .len = 2 } }, .{ "let\xe2\x80\x99s", SpecialCase{ .tokens = .{ "let", "\xe2\x80\x99s", "" }, .len = 2 } }, .{ "ll", SpecialCase{ .tokens = .{ "ll", "", "" }, .len = 1 } }, .{ "lovin", SpecialCase{ .tokens = .{ "lovin", "", "" }, .len = 1 } }, .{ "lovin'", SpecialCase{ .tokens = .{ "lovin'", "", "" }, .len = 1 } }, .{ "lovin\xe2\x80\x99", SpecialCase{ .tokens = .{ "lovin\xe2\x80\x99", "", "" }, .len = 1 } }, .{ "m.", SpecialCase{ .tokens = .{ "m.", "", "" }, .len = 1 } }, .{ "ma'am", SpecialCase{ .tokens = .{ "ma'am", "", "" }, .len = 1 } }, .{ "mayn't", SpecialCase{ .tokens = .{ "may", "n't", "" }, .len = 2 } }, .{ "mayn't've", SpecialCase{ .tokens = .{ "may", "n't", "'ve" }, .len = 3 } }, .{ "maynt", SpecialCase{ .tokens = .{ "may", "nt", "" }, .len = 2 } }, .{ "mayntve", SpecialCase{ .tokens = .{ "may", "nt", "ve" }, .len = 3 } }, .{ "mayn\xe2\x80\x99t", SpecialCase{ .tokens = .{ "may", "n\xe2\x80\x99t", "" }, .len = 2 } }, .{ "mayn\xe2\x80\x99t\xe2\x80\x99ve", SpecialCase{ .tokens = .{ "may", "n\xe2\x80\x99t", "\xe2\x80\x99ve" }, .len = 3 } }, .{ "ma\xe2\x80\x99am", SpecialCase{ .tokens = .{ "ma\xe2\x80\x99am", "", "" }, .len = 1 } }, .{ "might've", SpecialCase{ .tokens = .{ "might", "'ve", "" }, .len = 2 } }, .{ "mightn't", SpecialCase{ .tokens = .{ "might", "n't", "" }, .len = 2 } }, .{ "mightn't've", SpecialCase{ .tokens = .{ "might", "n't", "'ve" }, .len = 3 } }, .{ "mightnt", SpecialCase{ .tokens = .{ "might", "nt", "" }, .len = 2 } }, .{ "mightntve", SpecialCase{ .tokens = .{ "might", "nt", "ve" }, .len = 3 } }, .{ "mightn\xe2\x80\x99t", SpecialCase{ .tokens = .{ "might", "n\xe2\x80\x99t", "" }, .len = 2 } }, .{ "mightn\xe2\x80\x99t\xe2\x80\x99ve", SpecialCase{ .tokens = .{ "might", "n\xe2\x80\x99t", "\xe2\x80\x99ve" }, .len = 3 } }, .{ "mightve", SpecialCase{ .tokens = .{ "might", "ve", "" }, .len = 2 } }, .{ "might\xe2\x80\x99ve", SpecialCase{ .tokens = .{ "might", "\xe2\x80\x99ve", "" }, .len = 2 } }, .{ "must've", SpecialCase{ .tokens = .{ "must", "'ve", "" }, .len = 2 } }, .{ "mustn't", SpecialCase{ .tokens = .{ "must", "n't", "" }, .len = 2 } }, .{ "mustn't've", SpecialCase{ .tokens = .{ "must", "n't", "'ve" }, .len = 3 } }, .{ "mustnt", SpecialCase{ .tokens = .{ "must", "nt", "" }, .len = 2 } }, .{ "mustntve", SpecialCase{ .tokens = .{ "must", "nt", "ve" }, .len = 3 } }, .{ "mustn\xe2\x80\x99t", SpecialCase{ .tokens = .{ "must", "n\xe2\x80\x99t", "" }, .len = 2 } }, .{ "mustn\xe2\x80\x99t\xe2\x80\x99ve", SpecialCase{ .tokens = .{ "must", "n\xe2\x80\x99t", "\xe2\x80\x99ve" }, .len = 3 } }, .{ "mustve", SpecialCase{ .tokens = .{ "must", "ve", "" }, .len = 2 } }, .{ "must\xe2\x80\x99ve", SpecialCase{ .tokens = .{ "must", "\xe2\x80\x99ve", "" }, .len = 2 } }, .{ "n.", SpecialCase{ .tokens = .{ "n.", "", "" }, .len = 1 } }, .{ "needn't", SpecialCase{ .tokens = .{ "need", "n't", "" }, .len = 2 } }, .{ "needn't've", SpecialCase{ .tokens = .{ "need", "n't", "'ve" }, .len = 3 } }, .{ "neednt", SpecialCase{ .tokens = .{ "need", "nt", "" }, .len = 2 } }, .{ "needntve", SpecialCase{ .tokens = .{ "need", "nt", "ve" }, .len = 3 } }, .{ "needn\xe2\x80\x99t", SpecialCase{ .tokens = .{ "need", "n\xe2\x80\x99t", "" }, .len = 2 } }, .{ "needn\xe2\x80\x99t\xe2\x80\x99ve", SpecialCase{ .tokens = .{ "need", "n\xe2\x80\x99t", "\xe2\x80\x99ve" }, .len = 3 } }, .{ "not've", SpecialCase{ .tokens = .{ "not", "'ve", "" }, .len = 2 } }, .{ "nothin", SpecialCase{ .tokens = .{ "nothin", "", "" }, .len = 1 } }, .{ "nothin'", SpecialCase{ .tokens = .{ "nothin'", "", "" }, .len = 1 } }, .{ "nothin\xe2\x80\x99", SpecialCase{ .tokens = .{ "nothin\xe2\x80\x99", "", "" }, .len = 1 } }, .{ "notve", SpecialCase{ .tokens = .{ "not", "ve", "" }, .len = 2 } }, .{ "not\xe2\x80\x99ve", SpecialCase{ .tokens = .{ "not", "\xe2\x80\x99ve", "" }, .len = 2 } }, .{ "nuff", SpecialCase{ .tokens = .{ "nuff", "", "" }, .len = 1 } }, .{ "nuthin", SpecialCase{ .tokens = .{ "nuthin", "", "" }, .len = 1 } }, .{ "nuthin'", SpecialCase{ .tokens = .{ "nuthin'", "", "" }, .len = 1 } }, .{ "nuthin\xe2\x80\x99", SpecialCase{ .tokens = .{ "nuthin\xe2\x80\x99", "", "" }, .len = 1 } }, .{ "o'clock", SpecialCase{ .tokens = .{ "o'clock", "", "" }, .len = 1 } }, .{ "o.", SpecialCase{ .tokens = .{ "o.", "", "" }, .len = 1 } }, .{ "o.0", SpecialCase{ .tokens = .{ "o.0", "", "" }, .len = 1 } }, .{ "o.O", SpecialCase{ .tokens = .{ "o.O", "", "" }, .len = 1 } }, .{ "o.o", SpecialCase{ .tokens = .{ "o.o", "", "" }, .len = 1 } }, .{ "o_0", SpecialCase{ .tokens = .{ "o_0", "", "" }, .len = 1 } }, .{ "o_O", SpecialCase{ .tokens = .{ "o_O", "", "" }, .len = 1 } }, .{ "o_o", SpecialCase{ .tokens = .{ "o_o", "", "" }, .len = 1 } }, .{ "ol", SpecialCase{ .tokens = .{ "ol", "", "" }, .len = 1 } }, .{ "ol'", SpecialCase{ .tokens = .{ "ol'", "", "" }, .len = 1 } }, .{ "ol\xe2\x80\x99", SpecialCase{ .tokens = .{ "ol\xe2\x80\x99", "", "" }, .len = 1 } }, .{ "oughtn't", SpecialCase{ .tokens = .{ "ought", "n't", "" }, .len = 2 } }, .{ "oughtn't've", SpecialCase{ .tokens = .{ "ought", "n't", "'ve" }, .len = 3 } }, .{ "oughtnt", SpecialCase{ .tokens = .{ "ought", "nt", "" }, .len = 2 } }, .{ "oughtntve", SpecialCase{ .tokens = .{ "ought", "nt", "ve" }, .len = 3 } }, .{ "oughtn\xe2\x80\x99t", SpecialCase{ .tokens = .{ "ought", "n\xe2\x80\x99t", "" }, .len = 2 } }, .{ "oughtn\xe2\x80\x99t\xe2\x80\x99ve", SpecialCase{ .tokens = .{ "ought", "n\xe2\x80\x99t", "\xe2\x80\x99ve" }, .len = 3 } }, .{ "o\xe2\x80\x99clock", SpecialCase{ .tokens = .{ "o\xe2\x80\x99clock", "", "" }, .len = 1 } }, .{ "p.", SpecialCase{ .tokens = .{ "p.", "", "" }, .len = 1 } }, .{ "p.m.", SpecialCase{ .tokens = .{ "p.m.", "", "" }, .len = 1 } }, .{ "q.", SpecialCase{ .tokens = .{ "q.", "", "" }, .len = 1 } }, .{ "r.", SpecialCase{ .tokens = .{ "r.", "", "" }, .len = 1 } }, .{ "s.", SpecialCase{ .tokens = .{ "s.", "", "" }, .len = 1 } }, .{ "shan't", SpecialCase{ .tokens = .{ "sha", "n't", "" }, .len = 2 } }, .{ "shan't've", SpecialCase{ .tokens = .{ "sha", "n't", "'ve" }, .len = 3 } }, .{ "shant", SpecialCase{ .tokens = .{ "sha", "nt", "" }, .len = 2 } }, .{ "shantve", SpecialCase{ .tokens = .{ "sha", "nt", "ve" }, .len = 3 } }, .{ "shan\xe2\x80\x99t", SpecialCase{ .tokens = .{ "sha", "n\xe2\x80\x99t", "" }, .len = 2 } }, .{ "shan\xe2\x80\x99t\xe2\x80\x99ve", SpecialCase{ .tokens = .{ "sha", "n\xe2\x80\x99t", "\xe2\x80\x99ve" }, .len = 3 } }, .{ "she'd", SpecialCase{ .tokens = .{ "she", "'d", "" }, .len = 2 } }, .{ "she'd've", SpecialCase{ .tokens = .{ "she", "'d", "'ve" }, .len = 3 } }, .{ "she'll", SpecialCase{ .tokens = .{ "she", "'ll", "" }, .len = 2 } }, .{ "she'll've", SpecialCase{ .tokens = .{ "she", "'ll", "'ve" }, .len = 3 } }, .{ "she's", SpecialCase{ .tokens = .{ "she", "'s", "" }, .len = 2 } }, .{ "shedve", SpecialCase{ .tokens = .{ "she", "d", "ve" }, .len = 3 } }, .{ "shellve", SpecialCase{ .tokens = .{ "she", "ll", "ve" }, .len = 3 } }, .{ "shes", SpecialCase{ .tokens = .{ "she", "s", "" }, .len = 2 } }, .{ "she\xe2\x80\x99d", SpecialCase{ .tokens = .{ "she", "\xe2\x80\x99d", "" }, .len = 2 } }, .{ "she\xe2\x80\x99d\xe2\x80\x99ve", SpecialCase{ .tokens = .{ "she", "\xe2\x80\x99d", "\xe2\x80\x99ve" }, .len = 3 } }, .{ "she\xe2\x80\x99ll", SpecialCase{ .tokens = .{ "she", "\xe2\x80\x99ll", "" }, .len = 2 } }, .{ "she\xe2\x80\x99ll\xe2\x80\x99ve", SpecialCase{ .tokens = .{ "she", "\xe2\x80\x99ll", "\xe2\x80\x99ve" }, .len = 3 } }, .{ "she\xe2\x80\x99s", SpecialCase{ .tokens = .{ "she", "\xe2\x80\x99s", "" }, .len = 2 } }, .{ "should've", SpecialCase{ .tokens = .{ "should", "'ve", "" }, .len = 2 } }, .{ "shouldn't", SpecialCase{ .tokens = .{ "should", "n't", "" }, .len = 2 } }, .{ "shouldn't've", SpecialCase{ .tokens = .{ "should", "n't", "'ve" }, .len = 3 } }, .{ "shouldnt", SpecialCase{ .tokens = .{ "should", "nt", "" }, .len = 2 } }, .{ "shouldntve", SpecialCase{ .tokens = .{ "should", "nt", "ve" }, .len = 3 } }, .{ "shouldn\xe2\x80\x99t", SpecialCase{ .tokens = .{ "should", "n\xe2\x80\x99t", "" }, .len = 2 } }, .{ "shouldn\xe2\x80\x99t\xe2\x80\x99ve", SpecialCase{ .tokens = .{ "should", "n\xe2\x80\x99t", "\xe2\x80\x99ve" }, .len = 3 } }, .{ "shouldve", SpecialCase{ .tokens = .{ "should", "ve", "" }, .len = 2 } }, .{ "should\xe2\x80\x99ve", SpecialCase{ .tokens = .{ "should", "\xe2\x80\x99ve", "" }, .len = 2 } }, .{ "somethin", SpecialCase{ .tokens = .{ "somethin", "", "" }, .len = 1 } }, .{ "somethin'", SpecialCase{ .tokens = .{ "somethin'", "", "" }, .len = 1 } }, .{ "somethin\xe2\x80\x99", SpecialCase{ .tokens = .{ "somethin\xe2\x80\x99", "", "" }, .len = 1 } }, .{ "t.", SpecialCase{ .tokens = .{ "t.", "", "" }, .len = 1 } }, .{ "that'd", SpecialCase{ .tokens = .{ "that", "'d", "" }, .len = 2 } }, .{ "that'd've", SpecialCase{ .tokens = .{ "that", "'d", "'ve" }, .len = 3 } }, .{ "that'll", SpecialCase{ .tokens = .{ "that", "'ll", "" }, .len = 2 } }, .{ "that'll've", SpecialCase{ .tokens = .{ "that", "'ll", "'ve" }, .len = 3 } }, .{ "that's", SpecialCase{ .tokens = .{ "that", "'s", "" }, .len = 2 } }, .{ "thatd", SpecialCase{ .tokens = .{ "that", "d", "" }, .len = 2 } }, .{ "thatdve", SpecialCase{ .tokens = .{ "that", "d", "ve" }, .len = 3 } }, .{ "thatll", SpecialCase{ .tokens = .{ "that", "ll", "" }, .len = 2 } }, .{ "thatllve", SpecialCase{ .tokens = .{ "that", "ll", "ve" }, .len = 3 } }, .{ "thats", SpecialCase{ .tokens = .{ "that", "s", "" }, .len = 2 } }, .{ "that\xe2\x80\x99d", SpecialCase{ .tokens = .{ "that", "\xe2\x80\x99d", "" }, .len = 2 } }, .{ "that\xe2\x80\x99d\xe2\x80\x99ve", SpecialCase{ .tokens = .{ "that", "\xe2\x80\x99d", "\xe2\x80\x99ve" }, .len = 3 } }, .{ "that\xe2\x80\x99ll", SpecialCase{ .tokens = .{ "that", "\xe2\x80\x99ll", "" }, .len = 2 } }, .{ "that\xe2\x80\x99ll\xe2\x80\x99ve", SpecialCase{ .tokens = .{ "that", "\xe2\x80\x99ll", "\xe2\x80\x99ve" }, .len = 3 } }, .{ "that\xe2\x80\x99s", SpecialCase{ .tokens = .{ "that", "\xe2\x80\x99s", "" }, .len = 2 } }, .{ "there'd", SpecialCase{ .tokens = .{ "there", "'d", "" }, .len = 2 } }, .{ "there'd've", SpecialCase{ .tokens = .{ "there", "'d", "'ve" }, .len = 3 } }, .{ "there'll", SpecialCase{ .tokens = .{ "there", "'ll", "" }, .len = 2 } }, .{ "there'll've", SpecialCase{ .tokens = .{ "there", "'ll", "'ve" }, .len = 3 } }, .{ "there're", SpecialCase{ .tokens = .{ "there", "'re", "" }, .len = 2 } }, .{ "there's", SpecialCase{ .tokens = .{ "there", "'s", "" }, .len = 2 } }, .{ "there've", SpecialCase{ .tokens = .{ "there", "'ve", "" }, .len = 2 } }, .{ "thered", SpecialCase{ .tokens = .{ "there", "d", "" }, .len = 2 } }, .{ "theredve", SpecialCase{ .tokens = .{ "there", "d", "ve" }, .len = 3 } }, .{ "therell", SpecialCase{ .tokens = .{ "there", "ll", "" }, .len = 2 } }, .{ "therellve", SpecialCase{ .tokens = .{ "there", "ll", "ve" }, .len = 3 } }, .{ "therere", SpecialCase{ .tokens = .{ "there", "re", "" }, .len = 2 } }, .{ "theres", SpecialCase{ .tokens = .{ "there", "s", "" }, .len = 2 } }, .{ "thereve", SpecialCase{ .tokens = .{ "there", "ve", "" }, .len = 2 } }, .{ "there\xe2\x80\x99d", SpecialCase{ .tokens = .{ "there", "\xe2\x80\x99d", "" }, .len = 2 } }, .{ "there\xe2\x80\x99d\xe2\x80\x99ve", SpecialCase{ .tokens = .{ "there", "\xe2\x80\x99d", "\xe2\x80\x99ve" }, .len = 3 } }, .{ "there\xe2\x80\x99ll", SpecialCase{ .tokens = .{ "there", "\xe2\x80\x99ll", "" }, .len = 2 } }, .{ "there\xe2\x80\x99ll\xe2\x80\x99ve", SpecialCase{ .tokens = .{ "there", "\xe2\x80\x99ll", "\xe2\x80\x99ve" }, .len = 3 } }, .{ "there\xe2\x80\x99re", SpecialCase{ .tokens = .{ "there", "\xe2\x80\x99re", "" }, .len = 2 } }, .{ "there\xe2\x80\x99s", SpecialCase{ .tokens = .{ "there", "\xe2\x80\x99s", "" }, .len = 2 } }, .{ "there\xe2\x80\x99ve", SpecialCase{ .tokens = .{ "there", "\xe2\x80\x99ve", "" }, .len = 2 } }, .{ "these'd", SpecialCase{ .tokens = .{ "these", "'d", "" }, .len = 2 } }, .{ "these'd've", SpecialCase{ .tokens = .{ "these", "'d", "'ve" }, .len = 3 } }, .{ "these'll", SpecialCase{ .tokens = .{ "these", "'ll", "" }, .len = 2 } }, .{ "these'll've", SpecialCase{ .tokens = .{ "these", "'ll", "'ve" }, .len = 3 } }, .{ "these're", SpecialCase{ .tokens = .{ "these", "'re", "" }, .len = 2 } }, .{ "these've", SpecialCase{ .tokens = .{ "these", "'ve", "" }, .len = 2 } }, .{ "thesed", SpecialCase{ .tokens = .{ "these", "d", "" }, .len = 2 } }, .{ "thesedve", SpecialCase{ .tokens = .{ "these", "d", "ve" }, .len = 3 } }, .{ "thesell", SpecialCase{ .tokens = .{ "these", "ll", "" }, .len = 2 } }, .{ "thesellve", SpecialCase{ .tokens = .{ "these", "ll", "ve" }, .len = 3 } }, .{ "thesere", SpecialCase{ .tokens = .{ "these", "re", "" }, .len = 2 } }, .{ "theseve", SpecialCase{ .tokens = .{ "these", "ve", "" }, .len = 2 } }, .{ "these\xe2\x80\x99d", SpecialCase{ .tokens = .{ "these", "\xe2\x80\x99d", "" }, .len = 2 } }, .{ "these\xe2\x80\x99d\xe2\x80\x99ve", SpecialCase{ .tokens = .{ "these", "\xe2\x80\x99d", "\xe2\x80\x99ve" }, .len = 3 } }, .{ "these\xe2\x80\x99ll", SpecialCase{ .tokens = .{ "these", "\xe2\x80\x99ll", "" }, .len = 2 } }, .{ "these\xe2\x80\x99ll\xe2\x80\x99ve", SpecialCase{ .tokens = .{ "these", "\xe2\x80\x99ll", "\xe2\x80\x99ve" }, .len = 3 } }, .{ "these\xe2\x80\x99re", SpecialCase{ .tokens = .{ "these", "\xe2\x80\x99re", "" }, .len = 2 } }, .{ "these\xe2\x80\x99ve", SpecialCase{ .tokens = .{ "these", "\xe2\x80\x99ve", "" }, .len = 2 } }, .{ "they'd", SpecialCase{ .tokens = .{ "they", "'d", "" }, .len = 2 } }, .{ "they'd've", SpecialCase{ .tokens = .{ "they", "'d", "'ve" }, .len = 3 } }, .{ "they'll", SpecialCase{ .tokens = .{ "they", "'ll", "" }, .len = 2 } }, .{ "they'll've", SpecialCase{ .tokens = .{ "they", "'ll", "'ve" }, .len = 3 } }, .{ "they're", SpecialCase{ .tokens = .{ "they", "'re", "" }, .len = 2 } }, .{ "they've", SpecialCase{ .tokens = .{ "they", "'ve", "" }, .len = 2 } }, .{ "theyd", SpecialCase{ .tokens = .{ "they", "d", "" }, .len = 2 } }, .{ "theydve", SpecialCase{ .tokens = .{ "they", "d", "ve" }, .len = 3 } }, .{ "theyll", SpecialCase{ .tokens = .{ "they", "ll", "" }, .len = 2 } }, .{ "theyllve", SpecialCase{ .tokens = .{ "they", "ll", "ve" }, .len = 3 } }, .{ "theyre", SpecialCase{ .tokens = .{ "they", "re", "" }, .len = 2 } }, .{ "theyve", SpecialCase{ .tokens = .{ "they", "ve", "" }, .len = 2 } }, .{ "they\xe2\x80\x99d", SpecialCase{ .tokens = .{ "they", "\xe2\x80\x99d", "" }, .len = 2 } }, .{ "they\xe2\x80\x99d\xe2\x80\x99ve", SpecialCase{ .tokens = .{ "they", "\xe2\x80\x99d", "\xe2\x80\x99ve" }, .len = 3 } }, .{ "they\xe2\x80\x99ll", SpecialCase{ .tokens = .{ "they", "\xe2\x80\x99ll", "" }, .len = 2 } }, .{ "they\xe2\x80\x99ll\xe2\x80\x99ve", SpecialCase{ .tokens = .{ "they", "\xe2\x80\x99ll", "\xe2\x80\x99ve" }, .len = 3 } }, .{ "they\xe2\x80\x99re", SpecialCase{ .tokens = .{ "they", "\xe2\x80\x99re", "" }, .len = 2 } }, .{ "they\xe2\x80\x99ve", SpecialCase{ .tokens = .{ "they", "\xe2\x80\x99ve", "" }, .len = 2 } }, .{ "this'd", SpecialCase{ .tokens = .{ "this", "'d", "" }, .len = 2 } }, .{ "this'd've", SpecialCase{ .tokens = .{ "this", "'d", "'ve" }, .len = 3 } }, .{ "this'll", SpecialCase{ .tokens = .{ "this", "'ll", "" }, .len = 2 } }, .{ "this'll've", SpecialCase{ .tokens = .{ "this", "'ll", "'ve" }, .len = 3 } }, .{ "this's", SpecialCase{ .tokens = .{ "this", "'s", "" }, .len = 2 } }, .{ "thisd", SpecialCase{ .tokens = .{ "this", "d", "" }, .len = 2 } }, .{ "thisdve", SpecialCase{ .tokens = .{ "this", "d", "ve" }, .len = 3 } }, .{ "thisll", SpecialCase{ .tokens = .{ "this", "ll", "" }, .len = 2 } }, .{ "thisllve", SpecialCase{ .tokens = .{ "this", "ll", "ve" }, .len = 3 } }, .{ "thiss", SpecialCase{ .tokens = .{ "this", "s", "" }, .len = 2 } }, .{ "this\xe2\x80\x99d", SpecialCase{ .tokens = .{ "this", "\xe2\x80\x99d", "" }, .len = 2 } }, .{ "this\xe2\x80\x99d\xe2\x80\x99ve", SpecialCase{ .tokens = .{ "this", "\xe2\x80\x99d", "\xe2\x80\x99ve" }, .len = 3 } }, .{ "this\xe2\x80\x99ll", SpecialCase{ .tokens = .{ "this", "\xe2\x80\x99ll", "" }, .len = 2 } }, .{ "this\xe2\x80\x99ll\xe2\x80\x99ve", SpecialCase{ .tokens = .{ "this", "\xe2\x80\x99ll", "\xe2\x80\x99ve" }, .len = 3 } }, .{ "this\xe2\x80\x99s", SpecialCase{ .tokens = .{ "this", "\xe2\x80\x99s", "" }, .len = 2 } }, .{ "those'd", SpecialCase{ .tokens = .{ "those", "'d", "" }, .len = 2 } }, .{ "those'd've", SpecialCase{ .tokens = .{ "those", "'d", "'ve" }, .len = 3 } }, .{ "those'll", SpecialCase{ .tokens = .{ "those", "'ll", "" }, .len = 2 } }, .{ "those'll've", SpecialCase{ .tokens = .{ "those", "'ll", "'ve" }, .len = 3 } }, .{ "those're", SpecialCase{ .tokens = .{ "those", "'re", "" }, .len = 2 } }, .{ "those've", SpecialCase{ .tokens = .{ "those", "'ve", "" }, .len = 2 } }, .{ "thosed", SpecialCase{ .tokens = .{ "those", "d", "" }, .len = 2 } }, .{ "thosedve", SpecialCase{ .tokens = .{ "those", "d", "ve" }, .len = 3 } }, .{ "thosell", SpecialCase{ .tokens = .{ "those", "ll", "" }, .len = 2 } }, .{ "thosellve", SpecialCase{ .tokens = .{ "those", "ll", "ve" }, .len = 3 } }, .{ "thosere", SpecialCase{ .tokens = .{ "those", "re", "" }, .len = 2 } }, .{ "thoseve", SpecialCase{ .tokens = .{ "those", "ve", "" }, .len = 2 } }, .{ "those\xe2\x80\x99d", SpecialCase{ .tokens = .{ "those", "\xe2\x80\x99d", "" }, .len = 2 } }, .{ "those\xe2\x80\x99d\xe2\x80\x99ve", SpecialCase{ .tokens = .{ "those", "\xe2\x80\x99d", "\xe2\x80\x99ve" }, .len = 3 } }, .{ "those\xe2\x80\x99ll", SpecialCase{ .tokens = .{ "those", "\xe2\x80\x99ll", "" }, .len = 2 } }, .{ "those\xe2\x80\x99ll\xe2\x80\x99ve", SpecialCase{ .tokens = .{ "those", "\xe2\x80\x99ll", "\xe2\x80\x99ve" }, .len = 3 } }, .{ "those\xe2\x80\x99re", SpecialCase{ .tokens = .{ "those", "\xe2\x80\x99re", "" }, .len = 2 } }, .{ "those\xe2\x80\x99ve", SpecialCase{ .tokens = .{ "those", "\xe2\x80\x99ve", "" }, .len = 2 } }, .{ "u.", SpecialCase{ .tokens = .{ "u.", "", "" }, .len = 1 } }, .{ "v.", SpecialCase{ .tokens = .{ "v.", "", "" }, .len = 1 } }, .{ "v.s.", SpecialCase{ .tokens = .{ "v.s.", "", "" }, .len = 1 } }, .{ "v.v", SpecialCase{ .tokens = .{ "v.v", "", "" }, .len = 1 } }, .{ "v_v", SpecialCase{ .tokens = .{ "v_v", "", "" }, .len = 1 } }, .{ "vs.", SpecialCase{ .tokens = .{ "vs.", "", "" }, .len = 1 } }, .{ "w.", SpecialCase{ .tokens = .{ "w.", "", "" }, .len = 1 } }, .{ "w/o", SpecialCase{ .tokens = .{ "w/o", "", "" }, .len = 1 } }, .{ "wasn't", SpecialCase{ .tokens = .{ "was", "n't", "" }, .len = 2 } }, .{ "wasnt", SpecialCase{ .tokens = .{ "was", "nt", "" }, .len = 2 } }, .{ "wasn\xe2\x80\x99t", SpecialCase{ .tokens = .{ "was", "n\xe2\x80\x99t", "" }, .len = 2 } }, .{ "we'd", SpecialCase{ .tokens = .{ "we", "'d", "" }, .len = 2 } }, .{ "we'd've", SpecialCase{ .tokens = .{ "we", "'d", "'ve" }, .len = 3 } }, .{ "we'll", SpecialCase{ .tokens = .{ "we", "'ll", "" }, .len = 2 } }, .{ "we'll've", SpecialCase{ .tokens = .{ "we", "'ll", "'ve" }, .len = 3 } }, .{ "we're", SpecialCase{ .tokens = .{ "we", "'re", "" }, .len = 2 } }, .{ "we've", SpecialCase{ .tokens = .{ "we", "'ve", "" }, .len = 2 } }, .{ "wed", SpecialCase{ .tokens = .{ "we", "d", "" }, .len = 2 } }, .{ "wedve", SpecialCase{ .tokens = .{ "we", "d", "ve" }, .len = 3 } }, .{ "wellve", SpecialCase{ .tokens = .{ "we", "ll", "ve" }, .len = 3 } }, .{ "weren't", SpecialCase{ .tokens = .{ "were", "n't", "" }, .len = 2 } }, .{ "werent", SpecialCase{ .tokens = .{ "were", "nt", "" }, .len = 2 } }, .{ "weren\xe2\x80\x99t", SpecialCase{ .tokens = .{ "were", "n\xe2\x80\x99t", "" }, .len = 2 } }, .{ "weve", SpecialCase{ .tokens = .{ "we", "ve", "" }, .len = 2 } }, .{ "we\xe2\x80\x99d", SpecialCase{ .tokens = .{ "we", "\xe2\x80\x99d", "" }, .len = 2 } }, .{ "we\xe2\x80\x99d\xe2\x80\x99ve", SpecialCase{ .tokens = .{ "we", "\xe2\x80\x99d", "\xe2\x80\x99ve" }, .len = 3 } }, .{ "we\xe2\x80\x99ll", SpecialCase{ .tokens = .{ "we", "\xe2\x80\x99ll", "" }, .len = 2 } }, .{ "we\xe2\x80\x99ll\xe2\x80\x99ve", SpecialCase{ .tokens = .{ "we", "\xe2\x80\x99ll", "\xe2\x80\x99ve" }, .len = 3 } }, .{ "we\xe2\x80\x99re", SpecialCase{ .tokens = .{ "we", "\xe2\x80\x99re", "" }, .len = 2 } }, .{ "we\xe2\x80\x99ve", SpecialCase{ .tokens = .{ "we", "\xe2\x80\x99ve", "" }, .len = 2 } }, .{ "what'd", SpecialCase{ .tokens = .{ "what", "'d", "" }, .len = 2 } }, .{ "what'd've", SpecialCase{ .tokens = .{ "what", "'d", "'ve" }, .len = 3 } }, .{ "what'll", SpecialCase{ .tokens = .{ "what", "'ll", "" }, .len = 2 } }, .{ "what'll've", SpecialCase{ .tokens = .{ "what", "'ll", "'ve" }, .len = 3 } }, .{ "what're", SpecialCase{ .tokens = .{ "what", "'re", "" }, .len = 2 } }, .{ "what's", SpecialCase{ .tokens = .{ "what", "'s", "" }, .len = 2 } }, .{ "what've", SpecialCase{ .tokens = .{ "what", "'ve", "" }, .len = 2 } }, .{ "whatd", SpecialCase{ .tokens = .{ "what", "d", "" }, .len = 2 } }, .{ "whatdve", SpecialCase{ .tokens = .{ "what", "d", "ve" }, .len = 3 } }, .{ "whatll", SpecialCase{ .tokens = .{ "what", "ll", "" }, .len = 2 } }, .{ "whatllve", SpecialCase{ .tokens = .{ "what", "ll", "ve" }, .len = 3 } }, .{ "whatre", SpecialCase{ .tokens = .{ "what", "re", "" }, .len = 2 } }, .{ "whats", SpecialCase{ .tokens = .{ "what", "s", "" }, .len = 2 } }, .{ "whatve", SpecialCase{ .tokens = .{ "what", "ve", "" }, .len = 2 } }, .{ "what\xe2\x80\x99d", SpecialCase{ .tokens = .{ "what", "\xe2\x80\x99d", "" }, .len = 2 } }, .{ "what\xe2\x80\x99d\xe2\x80\x99ve", SpecialCase{ .tokens = .{ "what", "\xe2\x80\x99d", "\xe2\x80\x99ve" }, .len = 3 } }, .{ "what\xe2\x80\x99ll", SpecialCase{ .tokens = .{ "what", "\xe2\x80\x99ll", "" }, .len = 2 } }, .{ "what\xe2\x80\x99ll\xe2\x80\x99ve", SpecialCase{ .tokens = .{ "what", "\xe2\x80\x99ll", "\xe2\x80\x99ve" }, .len = 3 } }, .{ "what\xe2\x80\x99re", SpecialCase{ .tokens = .{ "what", "\xe2\x80\x99re", "" }, .len = 2 } }, .{ "what\xe2\x80\x99s", SpecialCase{ .tokens = .{ "what", "\xe2\x80\x99s", "" }, .len = 2 } }, .{ "what\xe2\x80\x99ve", SpecialCase{ .tokens = .{ "what", "\xe2\x80\x99ve", "" }, .len = 2 } }, .{ "when'd", SpecialCase{ .tokens = .{ "when", "'d", "" }, .len = 2 } }, .{ "when'd've", SpecialCase{ .tokens = .{ "when", "'d", "'ve" }, .len = 3 } }, .{ "when'll", SpecialCase{ .tokens = .{ "when", "'ll", "" }, .len = 2 } }, .{ "when'll've", SpecialCase{ .tokens = .{ "when", "'ll", "'ve" }, .len = 3 } }, .{ "when're", SpecialCase{ .tokens = .{ "when", "'re", "" }, .len = 2 } }, .{ "when's", SpecialCase{ .tokens = .{ "when", "'s", "" }, .len = 2 } }, .{ "when've", SpecialCase{ .tokens = .{ "when", "'ve", "" }, .len = 2 } }, .{ "whend", SpecialCase{ .tokens = .{ "when", "d", "" }, .len = 2 } }, .{ "whendve", SpecialCase{ .tokens = .{ "when", "d", "ve" }, .len = 3 } }, .{ "whenll", SpecialCase{ .tokens = .{ "when", "ll", "" }, .len = 2 } }, .{ "whenllve", SpecialCase{ .tokens = .{ "when", "ll", "ve" }, .len = 3 } }, .{ "whenre", SpecialCase{ .tokens = .{ "when", "re", "" }, .len = 2 } }, .{ "whens", SpecialCase{ .tokens = .{ "when", "s", "" }, .len = 2 } }, .{ "whenve", SpecialCase{ .tokens = .{ "when", "ve", "" }, .len = 2 } }, .{ "when\xe2\x80\x99d", SpecialCase{ .tokens = .{ "when", "\xe2\x80\x99d", "" }, .len = 2 } }, .{ "when\xe2\x80\x99d\xe2\x80\x99ve", SpecialCase{ .tokens = .{ "when", "\xe2\x80\x99d", "\xe2\x80\x99ve" }, .len = 3 } }, .{ "when\xe2\x80\x99ll", SpecialCase{ .tokens = .{ "when", "\xe2\x80\x99ll", "" }, .len = 2 } }, .{ "when\xe2\x80\x99ll\xe2\x80\x99ve", SpecialCase{ .tokens = .{ "when", "\xe2\x80\x99ll", "\xe2\x80\x99ve" }, .len = 3 } }, .{ "when\xe2\x80\x99re", SpecialCase{ .tokens = .{ "when", "\xe2\x80\x99re", "" }, .len = 2 } }, .{ "when\xe2\x80\x99s", SpecialCase{ .tokens = .{ "when", "\xe2\x80\x99s", "" }, .len = 2 } }, .{ "when\xe2\x80\x99ve", SpecialCase{ .tokens = .{ "when", "\xe2\x80\x99ve", "" }, .len = 2 } }, .{ "where'd", SpecialCase{ .tokens = .{ "where", "'d", "" }, .len = 2 } }, .{ "where'd've", SpecialCase{ .tokens = .{ "where", "'d", "'ve" }, .len = 3 } }, .{ "where'll", SpecialCase{ .tokens = .{ "where", "'ll", "" }, .len = 2 } }, .{ "where'll've", SpecialCase{ .tokens = .{ "where", "'ll", "'ve" }, .len = 3 } }, .{ "where're", SpecialCase{ .tokens = .{ "where", "'re", "" }, .len = 2 } }, .{ "where's", SpecialCase{ .tokens = .{ "where", "'s", "" }, .len = 2 } }, .{ "where've", SpecialCase{ .tokens = .{ "where", "'ve", "" }, .len = 2 } }, .{ "whered", SpecialCase{ .tokens = .{ "where", "d", "" }, .len = 2 } }, .{ "wheredve", SpecialCase{ .tokens = .{ "where", "d", "ve" }, .len = 3 } }, .{ "wherell", SpecialCase{ .tokens = .{ "where", "ll", "" }, .len = 2 } }, .{ "wherellve", SpecialCase{ .tokens = .{ "where", "ll", "ve" }, .len = 3 } }, .{ "wherere", SpecialCase{ .tokens = .{ "where", "re", "" }, .len = 2 } }, .{ "wheres", SpecialCase{ .tokens = .{ "where", "s", "" }, .len = 2 } }, .{ "whereve", SpecialCase{ .tokens = .{ "where", "ve", "" }, .len = 2 } }, .{ "where\xe2\x80\x99d", SpecialCase{ .tokens = .{ "where", "\xe2\x80\x99d", "" }, .len = 2 } }, .{ "where\xe2\x80\x99d\xe2\x80\x99ve", SpecialCase{ .tokens = .{ "where", "\xe2\x80\x99d", "\xe2\x80\x99ve" }, .len = 3 } }, .{ "where\xe2\x80\x99ll", SpecialCase{ .tokens = .{ "where", "\xe2\x80\x99ll", "" }, .len = 2 } }, .{ "where\xe2\x80\x99ll\xe2\x80\x99ve", SpecialCase{ .tokens = .{ "where", "\xe2\x80\x99ll", "\xe2\x80\x99ve" }, .len = 3 } }, .{ "where\xe2\x80\x99re", SpecialCase{ .tokens = .{ "where", "\xe2\x80\x99re", "" }, .len = 2 } }, .{ "where\xe2\x80\x99s", SpecialCase{ .tokens = .{ "where", "\xe2\x80\x99s", "" }, .len = 2 } }, .{ "where\xe2\x80\x99ve", SpecialCase{ .tokens = .{ "where", "\xe2\x80\x99ve", "" }, .len = 2 } }, .{ "who'd", SpecialCase{ .tokens = .{ "who", "'d", "" }, .len = 2 } }, .{ "who'd've", SpecialCase{ .tokens = .{ "who", "'d", "'ve" }, .len = 3 } }, .{ "who'll", SpecialCase{ .tokens = .{ "who", "'ll", "" }, .len = 2 } }, .{ "who'll've", SpecialCase{ .tokens = .{ "who", "'ll", "'ve" }, .len = 3 } }, .{ "who're", SpecialCase{ .tokens = .{ "who", "'re", "" }, .len = 2 } }, .{ "who's", SpecialCase{ .tokens = .{ "who", "'s", "" }, .len = 2 } }, .{ "who've", SpecialCase{ .tokens = .{ "who", "'ve", "" }, .len = 2 } }, .{ "whod", SpecialCase{ .tokens = .{ "who", "d", "" }, .len = 2 } }, .{ "whodve", SpecialCase{ .tokens = .{ "who", "d", "ve" }, .len = 3 } }, .{ "wholl", SpecialCase{ .tokens = .{ "who", "ll", "" }, .len = 2 } }, .{ "whollve", SpecialCase{ .tokens = .{ "who", "ll", "ve" }, .len = 3 } }, .{ "whos", SpecialCase{ .tokens = .{ "who", "s", "" }, .len = 2 } }, .{ "whove", SpecialCase{ .tokens = .{ "who", "ve", "" }, .len = 2 } }, .{ "who\xe2\x80\x99d", SpecialCase{ .tokens = .{ "who", "\xe2\x80\x99d", "" }, .len = 2 } }, .{ "who\xe2\x80\x99d\xe2\x80\x99ve", SpecialCase{ .tokens = .{ "who", "\xe2\x80\x99d", "\xe2\x80\x99ve" }, .len = 3 } }, .{ "who\xe2\x80\x99ll", SpecialCase{ .tokens = .{ "who", "\xe2\x80\x99ll", "" }, .len = 2 } }, .{ "who\xe2\x80\x99ll\xe2\x80\x99ve", SpecialCase{ .tokens = .{ "who", "\xe2\x80\x99ll", "\xe2\x80\x99ve" }, .len = 3 } }, .{ "who\xe2\x80\x99re", SpecialCase{ .tokens = .{ "who", "\xe2\x80\x99re", "" }, .len = 2 } }, .{ "who\xe2\x80\x99s", SpecialCase{ .tokens = .{ "who", "\xe2\x80\x99s", "" }, .len = 2 } }, .{ "who\xe2\x80\x99ve", SpecialCase{ .tokens = .{ "who", "\xe2\x80\x99ve", "" }, .len = 2 } }, .{ "why'd", SpecialCase{ .tokens = .{ "why", "'d", "" }, .len = 2 } }, .{ "why'd've", SpecialCase{ .tokens = .{ "why", "'d", "'ve" }, .len = 3 } }, .{ "why'll", SpecialCase{ .tokens = .{ "why", "'ll", "" }, .len = 2 } }, .{ "why'll've", SpecialCase{ .tokens = .{ "why", "'ll", "'ve" }, .len = 3 } }, .{ "why're", SpecialCase{ .tokens = .{ "why", "'re", "" }, .len = 2 } }, .{ "why's", SpecialCase{ .tokens = .{ "why", "'s", "" }, .len = 2 } }, .{ "why've", SpecialCase{ .tokens = .{ "why", "'ve", "" }, .len = 2 } }, .{ "whyd", SpecialCase{ .tokens = .{ "why", "d", "" }, .len = 2 } }, .{ "whydve", SpecialCase{ .tokens = .{ "why", "d", "ve" }, .len = 3 } }, .{ "whyll", SpecialCase{ .tokens = .{ "why", "ll", "" }, .len = 2 } }, .{ "whyllve", SpecialCase{ .tokens = .{ "why", "ll", "ve" }, .len = 3 } }, .{ "whyre", SpecialCase{ .tokens = .{ "why", "re", "" }, .len = 2 } }, .{ "whys", SpecialCase{ .tokens = .{ "why", "s", "" }, .len = 2 } }, .{ "whyve", SpecialCase{ .tokens = .{ "why", "ve", "" }, .len = 2 } }, .{ "why\xe2\x80\x99d", SpecialCase{ .tokens = .{ "why", "\xe2\x80\x99d", "" }, .len = 2 } }, .{ "why\xe2\x80\x99d\xe2\x80\x99ve", SpecialCase{ .tokens = .{ "why", "\xe2\x80\x99d", "\xe2\x80\x99ve" }, .len = 3 } }, .{ "why\xe2\x80\x99ll", SpecialCase{ .tokens = .{ "why", "\xe2\x80\x99ll", "" }, .len = 2 } }, .{ "why\xe2\x80\x99ll\xe2\x80\x99ve", SpecialCase{ .tokens = .{ "why", "\xe2\x80\x99ll", "\xe2\x80\x99ve" }, .len = 3 } }, .{ "why\xe2\x80\x99re", SpecialCase{ .tokens = .{ "why", "\xe2\x80\x99re", "" }, .len = 2 } }, .{ "why\xe2\x80\x99s", SpecialCase{ .tokens = .{ "why", "\xe2\x80\x99s", "" }, .len = 2 } }, .{ "why\xe2\x80\x99ve", SpecialCase{ .tokens = .{ "why", "\xe2\x80\x99ve", "" }, .len = 2 } }, .{ "won't", SpecialCase{ .tokens = .{ "wo", "n't", "" }, .len = 2 } }, .{ "won't've", SpecialCase{ .tokens = .{ "wo", "n't", "'ve" }, .len = 3 } }, .{ "wont", SpecialCase{ .tokens = .{ "wo", "nt", "" }, .len = 2 } }, .{ "wontve", SpecialCase{ .tokens = .{ "wo", "nt", "ve" }, .len = 3 } }, .{ "won\xe2\x80\x99t", SpecialCase{ .tokens = .{ "wo", "n\xe2\x80\x99t", "" }, .len = 2 } }, .{ "won\xe2\x80\x99t\xe2\x80\x99ve", SpecialCase{ .tokens = .{ "wo", "n\xe2\x80\x99t", "\xe2\x80\x99ve" }, .len = 3 } }, .{ "would've", SpecialCase{ .tokens = .{ "would", "'ve", "" }, .len = 2 } }, .{ "wouldn't", SpecialCase{ .tokens = .{ "would", "n't", "" }, .len = 2 } }, .{ "wouldn't've", SpecialCase{ .tokens = .{ "would", "n't", "'ve" }, .len = 3 } }, .{ "wouldnt", SpecialCase{ .tokens = .{ "would", "nt", "" }, .len = 2 } }, .{ "wouldntve", SpecialCase{ .tokens = .{ "would", "nt", "ve" }, .len = 3 } }, .{ "wouldn\xe2\x80\x99t", SpecialCase{ .tokens = .{ "would", "n\xe2\x80\x99t", "" }, .len = 2 } }, .{ "wouldn\xe2\x80\x99t\xe2\x80\x99ve", SpecialCase{ .tokens = .{ "would", "n\xe2\x80\x99t", "\xe2\x80\x99ve" }, .len = 3 } }, .{ "wouldve", SpecialCase{ .tokens = .{ "would", "ve", "" }, .len = 2 } }, .{ "would\xe2\x80\x99ve", SpecialCase{ .tokens = .{ "would", "\xe2\x80\x99ve", "" }, .len = 2 } }, .{ "x.", SpecialCase{ .tokens = .{ "x.", "", "" }, .len = 1 } }, .{ "xD", SpecialCase{ .tokens = .{ "xD", "", "" }, .len = 1 } }, .{ "xDD", SpecialCase{ .tokens = .{ "xDD", "", "" }, .len = 1 } }, .{ "y'all", SpecialCase{ .tokens = .{ "y'", "all", "" }, .len = 2 } }, .{ "y.", SpecialCase{ .tokens = .{ "y.", "", "" }, .len = 1 } }, .{ "yall", SpecialCase{ .tokens = .{ "y", "all", "" }, .len = 2 } }, .{ "you'd", SpecialCase{ .tokens = .{ "you", "'d", "" }, .len = 2 } }, .{ "you'd've", SpecialCase{ .tokens = .{ "you", "'d", "'ve" }, .len = 3 } }, .{ "you'll", SpecialCase{ .tokens = .{ "you", "'ll", "" }, .len = 2 } }, .{ "you'll've", SpecialCase{ .tokens = .{ "you", "'ll", "'ve" }, .len = 3 } }, .{ "you're", SpecialCase{ .tokens = .{ "you", "'re", "" }, .len = 2 } }, .{ "you've", SpecialCase{ .tokens = .{ "you", "'ve", "" }, .len = 2 } }, .{ "youd", SpecialCase{ .tokens = .{ "you", "d", "" }, .len = 2 } }, .{ "youdve", SpecialCase{ .tokens = .{ "you", "d", "ve" }, .len = 3 } }, .{ "youll", SpecialCase{ .tokens = .{ "you", "ll", "" }, .len = 2 } }, .{ "youllve", SpecialCase{ .tokens = .{ "you", "ll", "ve" }, .len = 3 } }, .{ "youre", SpecialCase{ .tokens = .{ "you", "re", "" }, .len = 2 } }, .{ "youve", SpecialCase{ .tokens = .{ "you", "ve", "" }, .len = 2 } }, .{ "you\xe2\x80\x99d", SpecialCase{ .tokens = .{ "you", "\xe2\x80\x99d", "" }, .len = 2 } }, .{ "you\xe2\x80\x99d\xe2\x80\x99ve", SpecialCase{ .tokens = .{ "you", "\xe2\x80\x99d", "\xe2\x80\x99ve" }, .len = 3 } }, .{ "you\xe2\x80\x99ll", SpecialCase{ .tokens = .{ "you", "\xe2\x80\x99ll", "" }, .len = 2 } }, .{ "you\xe2\x80\x99ll\xe2\x80\x99ve", SpecialCase{ .tokens = .{ "you", "\xe2\x80\x99ll", "\xe2\x80\x99ve" }, .len = 3 } }, .{ "you\xe2\x80\x99re", SpecialCase{ .tokens = .{ "you", "\xe2\x80\x99re", "" }, .len = 2 } }, .{ "you\xe2\x80\x99ve", SpecialCase{ .tokens = .{ "you", "\xe2\x80\x99ve", "" }, .len = 2 } }, .{ "y\xe2\x80\x99all", SpecialCase{ .tokens = .{ "y\xe2\x80\x99", "all", "" }, .len = 2 } }, .{ "z.", SpecialCase{ .tokens = .{ "z.", "", "" }, .len = 1 } }, .{ "\xc2\xa0", SpecialCase{ .tokens = .{ "\xc2\xa0", "", "" }, .len = 1 } }, .{ "\xc2\xaf\\(\xe3\x83\x84)/\xc2\xaf", SpecialCase{ .tokens = .{ "\xc2\xaf\\(\xe3\x83\x84)/\xc2\xaf", "", "" }, .len = 1 } }, .{ "\xc2\xb0C.", SpecialCase{ .tokens = .{ "\xc2\xb0", "C", "." }, .len = 3 } }, .{ "\xc2\xb0F.", SpecialCase{ .tokens = .{ "\xc2\xb0", "F", "." }, .len = 3 } }, .{ "\xc2\xb0K.", SpecialCase{ .tokens = .{ "\xc2\xb0", "K", "." }, .len = 3 } }, .{ "\xc2\xb0c.", SpecialCase{ .tokens = .{ "\xc2\xb0", "c", "." }, .len = 3 } }, .{ "\xc2\xb0f.", SpecialCase{ .tokens = .{ "\xc2\xb0", "f", "." }, .len = 3 } }, .{ "\xc2\xb0k.", SpecialCase{ .tokens = .{ "\xc2\xb0", "k", "." }, .len = 3 } }, .{ "\xc3\xa4.", SpecialCase{ .tokens = .{ "\xc3\xa4.", "", "" }, .len = 1 } }, .{ "\xc3\xb6.", SpecialCase{ .tokens = .{ "\xc3\xb6.", "", "" }, .len = 1 } }, .{ "\xc3\xbc.", SpecialCase{ .tokens = .{ "\xc3\xbc.", "", "" }, .len = 1 } }, .{ "\xe0\xb2\xa0_\xe0\xb2\xa0", SpecialCase{ .tokens = .{ "\xe0\xb2\xa0_\xe0\xb2\xa0", "", "" }, .len = 1 } }, .{ "\xe0\xb2\xa0\xef\xb8\xb5\xe0\xb2\xa0", SpecialCase{ .tokens = .{ "\xe0\xb2\xa0\xef\xb8\xb5\xe0\xb2\xa0", "", "" }, .len = 1 } }, .{ "\xe2\x80\x94", SpecialCase{ .tokens = .{ "\xe2\x80\x94", "", "" }, .len = 1 } }, .{ "\xe2\x80\x98S", SpecialCase{ .tokens = .{ "\xe2\x80\x98S", "", "" }, .len = 1 } }, .{ "\xe2\x80\x98s", SpecialCase{ .tokens = .{ "\xe2\x80\x98s", "", "" }, .len = 1 } }, .{ "\xe2\x80\x99", SpecialCase{ .tokens = .{ "\xe2\x80\x99", "", "" }, .len = 1 } }, .{ "\xe2\x80\x99Cause", SpecialCase{ .tokens = .{ "\xe2\x80\x99Cause", "", "" }, .len = 1 } }, .{ "\xe2\x80\x99Cos", SpecialCase{ .tokens = .{ "\xe2\x80\x99Cos", "", "" }, .len = 1 } }, .{ "\xe2\x80\x99Coz", SpecialCase{ .tokens = .{ "\xe2\x80\x99Coz", "", "" }, .len = 1 } }, .{ "\xe2\x80\x99Cuz", SpecialCase{ .tokens = .{ "\xe2\x80\x99Cuz", "", "" }, .len = 1 } }, .{ "\xe2\x80\x99S", SpecialCase{ .tokens = .{ "\xe2\x80\x99S", "", "" }, .len = 1 } }, .{ "\xe2\x80\x99bout", SpecialCase{ .tokens = .{ "\xe2\x80\x99bout", "", "" }, .len = 1 } }, .{ "\xe2\x80\x99cause", SpecialCase{ .tokens = .{ "\xe2\x80\x99cause", "", "" }, .len = 1 } }, .{ "\xe2\x80\x99cos", SpecialCase{ .tokens = .{ "\xe2\x80\x99cos", "", "" }, .len = 1 } }, .{ "\xe2\x80\x99coz", SpecialCase{ .tokens = .{ "\xe2\x80\x99coz", "", "" }, .len = 1 } }, .{ "\xe2\x80\x99cuz", SpecialCase{ .tokens = .{ "\xe2\x80\x99cuz", "", "" }, .len = 1 } }, .{ "\xe2\x80\x99d", SpecialCase{ .tokens = .{ "\xe2\x80\x99d", "", "" }, .len = 1 } }, .{ "\xe2\x80\x99em", SpecialCase{ .tokens = .{ "\xe2\x80\x99em", "", "" }, .len = 1 } }, .{ "\xe2\x80\x99ll", SpecialCase{ .tokens = .{ "\xe2\x80\x99ll", "", "" }, .len = 1 } }, .{ "\xe2\x80\x99nuff", SpecialCase{ .tokens = .{ "\xe2\x80\x99nuff", "", "" }, .len = 1 } }, .{ "\xe2\x80\x99re", SpecialCase{ .tokens = .{ "\xe2\x80\x99re", "", "" }, .len = 1 } }, .{ "\xe2\x80\x99s", SpecialCase{ .tokens = .{ "\xe2\x80\x99s", "", "" }, .len = 1 } }, .{ "\xe2\x80\x99\xe2\x80\x99", SpecialCase{ .tokens = .{ "\xe2\x80\x99\xe2\x80\x99", "", "" }, .len = 1 } }, });