this repo has no description
at trunk 184 lines 5.5 kB view raw
1// Copyright (c) Facebook, Inc. and its affiliates. (http://www.facebook.com) 2#include "unicode.h" 3 4#include <cstdint> 5 6#include "unicode-db.h" 7 8namespace py { 9 10constexpr byte Byte::kTable[]; 11constexpr byte Byte::kToLower[]; 12constexpr byte Byte::kToUpper[]; 13constexpr byte UTF8::kBOM[]; 14constexpr byte UTF16::kBOMLittleEndian[]; 15constexpr byte UTF16::kBOMBigEndian[]; 16constexpr byte UTF32::kBOMLittleEndian[]; 17constexpr byte UTF32::kBOMBigEndian[]; 18 19bool Unicode::isAlphaDB(int32_t code_point) { 20 return (typeRecord(code_point)->flags & kAlphaMask) != 0; 21} 22 23bool Unicode::isCaseIgnorableDB(int32_t code_point) { 24 return (typeRecord(code_point)->flags & kCaseIgnorableMask) != 0; 25} 26 27bool Unicode::isCasedDB(int32_t code_point) { 28 return (typeRecord(code_point)->flags & kCasedMask) != 0; 29} 30 31bool Unicode::isDecimalDB(int32_t code_point) { 32 return (typeRecord(code_point)->flags & kDecimalMask) != 0; 33} 34 35bool Unicode::isDigitDB(int32_t code_point) { 36 return (typeRecord(code_point)->flags & kDigitMask) != 0; 37} 38 39bool Unicode::isLinebreakDB(int32_t code_point) { 40 return unicodeIsLinebreak(code_point); 41} 42 43bool Unicode::isLowerDB(int32_t code_point) { 44 return (typeRecord(code_point)->flags & kLowerMask) != 0; 45} 46 47bool Unicode::isNumericDB(int32_t code_point) { 48 return (typeRecord(code_point)->flags & kNumericMask) != 0; 49} 50 51bool Unicode::isPrintableDB(int32_t code_point) { 52 return (typeRecord(code_point)->flags & kPrintableMask) != 0; 53} 54 55bool Unicode::isSpaceDB(int32_t code_point) { 56 return unicodeIsWhitespace(code_point); 57} 58 59bool Unicode::isTitleDB(int32_t code_point) { 60 return (typeRecord(code_point)->flags & kTitleMask) != 0; 61} 62 63bool Unicode::isUnfoldedDB(int32_t code_point) { 64 const UnicodeTypeRecord* record = typeRecord(code_point); 65 return (record->flags & kExtendedCaseMask) != 0 && 66 ((record->lower >> 20) & 7) != 0; 67} 68 69bool Unicode::isUpperDB(int32_t code_point) { 70 return (typeRecord(code_point)->flags & kUpperMask) != 0; 71} 72 73bool Unicode::isXidContinueDB(int32_t code_point) { 74 return (typeRecord(code_point)->flags & kXidContinueMask) != 0; 75} 76 77bool Unicode::isXidStartDB(int32_t code_point) { 78 return (typeRecord(code_point)->flags & kXidStartMask) != 0; 79} 80 81int8_t Unicode::toDecimalDB(int32_t code_point) { 82 const UnicodeTypeRecord* record = typeRecord(code_point); 83 return (record->flags & kDecimalMask) != 0 ? record->decimal : -1; 84} 85 86int8_t Unicode::toDigitDB(int32_t code_point) { 87 const UnicodeTypeRecord* record = typeRecord(code_point); 88 return (record->flags & kDigitMask) != 0 ? record->digit : -1; 89} 90 91FullCasing Unicode::toFoldedDB(int32_t code_point) { 92 const UnicodeTypeRecord* record = typeRecord(code_point); 93 94 if (record->flags & kExtendedCaseMask && (record->lower >> 20) & 7) { 95 FullCasing result = {-1, -1, -1}; 96 int32_t index = (record->lower & 0xFFFF) + (record->lower >> 24); 97 switch ((record->lower >> 20) & 7) { 98 default: 99 UNREACHABLE("Case mappings are limited to [1..3] code points"); 100 case 3: 101 result.code_points[2] = extendedCaseMapping(index + 2); 102 FALLTHROUGH; 103 case 2: 104 result.code_points[1] = extendedCaseMapping(index + 1); 105 FALLTHROUGH; 106 case 1: 107 result.code_points[0] = extendedCaseMapping(index); 108 } 109 return result; 110 } 111 return toLowerDB(code_point); 112} 113 114FullCasing Unicode::toLowerDB(int32_t code_point) { 115 const UnicodeTypeRecord* record = typeRecord(code_point); 116 if ((record->flags & kExtendedCaseMask) == 0) { 117 return {code_point + record->lower, -1}; 118 } 119 FullCasing result = {-1, -1, -1}; 120 int32_t index = record->lower & 0xFFFF; 121 switch (record->lower >> 24) { 122 default: 123 UNREACHABLE("Case mappings are limited to [1..3] code points"); 124 case 3: 125 result.code_points[2] = extendedCaseMapping(index + 2); 126 FALLTHROUGH; 127 case 2: 128 result.code_points[1] = extendedCaseMapping(index + 1); 129 FALLTHROUGH; 130 case 1: 131 result.code_points[0] = extendedCaseMapping(index); 132 } 133 return result; 134} 135 136double Unicode::toNumericDB(int32_t code_point) { 137 return numericValue(code_point); 138} 139 140FullCasing Unicode::toTitleDB(int32_t code_point) { 141 const UnicodeTypeRecord* record = typeRecord(code_point); 142 if ((record->flags & kExtendedCaseMask) == 0) { 143 return {code_point + record->title, -1}; 144 } 145 FullCasing result = {-1, -1, -1}; 146 int32_t index = record->title & 0xFFFF; 147 switch (record->title >> 24) { 148 default: 149 UNREACHABLE("Case mappings are limited to [1..3] code points"); 150 case 3: 151 result.code_points[2] = extendedCaseMapping(index + 2); 152 FALLTHROUGH; 153 case 2: 154 result.code_points[1] = extendedCaseMapping(index + 1); 155 FALLTHROUGH; 156 case 1: 157 result.code_points[0] = extendedCaseMapping(index); 158 } 159 return result; 160} 161 162FullCasing Unicode::toUpperDB(int32_t code_point) { 163 const UnicodeTypeRecord* record = typeRecord(code_point); 164 if ((record->flags & kExtendedCaseMask) == 0) { 165 return {code_point + record->upper, -1}; 166 } 167 FullCasing result = {-1, -1, -1}; 168 int32_t index = record->upper & 0xFFFF; 169 switch (record->upper >> 24) { 170 default: 171 UNREACHABLE("Case mappings are limited to [1..3] code points"); 172 case 3: 173 result.code_points[2] = extendedCaseMapping(index + 2); 174 FALLTHROUGH; 175 case 2: 176 result.code_points[1] = extendedCaseMapping(index + 1); 177 FALLTHROUGH; 178 case 1: 179 result.code_points[0] = extendedCaseMapping(index); 180 } 181 return result; 182} 183 184} // namespace py