this repo has no description
1// Copyright (c) Facebook, Inc. and its affiliates. (http://www.facebook.com)
2#include "unicode.h"
3
4#include <cstdint>
5
6#include "unicode-db.h"
7
8namespace py {
9
10constexpr byte Byte::kTable[];
11constexpr byte Byte::kToLower[];
12constexpr byte Byte::kToUpper[];
13constexpr byte UTF8::kBOM[];
14constexpr byte UTF16::kBOMLittleEndian[];
15constexpr byte UTF16::kBOMBigEndian[];
16constexpr byte UTF32::kBOMLittleEndian[];
17constexpr byte UTF32::kBOMBigEndian[];
18
19bool Unicode::isAlphaDB(int32_t code_point) {
20 return (typeRecord(code_point)->flags & kAlphaMask) != 0;
21}
22
23bool Unicode::isCaseIgnorableDB(int32_t code_point) {
24 return (typeRecord(code_point)->flags & kCaseIgnorableMask) != 0;
25}
26
27bool Unicode::isCasedDB(int32_t code_point) {
28 return (typeRecord(code_point)->flags & kCasedMask) != 0;
29}
30
31bool Unicode::isDecimalDB(int32_t code_point) {
32 return (typeRecord(code_point)->flags & kDecimalMask) != 0;
33}
34
35bool Unicode::isDigitDB(int32_t code_point) {
36 return (typeRecord(code_point)->flags & kDigitMask) != 0;
37}
38
39bool Unicode::isLinebreakDB(int32_t code_point) {
40 return unicodeIsLinebreak(code_point);
41}
42
43bool Unicode::isLowerDB(int32_t code_point) {
44 return (typeRecord(code_point)->flags & kLowerMask) != 0;
45}
46
47bool Unicode::isNumericDB(int32_t code_point) {
48 return (typeRecord(code_point)->flags & kNumericMask) != 0;
49}
50
51bool Unicode::isPrintableDB(int32_t code_point) {
52 return (typeRecord(code_point)->flags & kPrintableMask) != 0;
53}
54
55bool Unicode::isSpaceDB(int32_t code_point) {
56 return unicodeIsWhitespace(code_point);
57}
58
59bool Unicode::isTitleDB(int32_t code_point) {
60 return (typeRecord(code_point)->flags & kTitleMask) != 0;
61}
62
63bool Unicode::isUnfoldedDB(int32_t code_point) {
64 const UnicodeTypeRecord* record = typeRecord(code_point);
65 return (record->flags & kExtendedCaseMask) != 0 &&
66 ((record->lower >> 20) & 7) != 0;
67}
68
69bool Unicode::isUpperDB(int32_t code_point) {
70 return (typeRecord(code_point)->flags & kUpperMask) != 0;
71}
72
73bool Unicode::isXidContinueDB(int32_t code_point) {
74 return (typeRecord(code_point)->flags & kXidContinueMask) != 0;
75}
76
77bool Unicode::isXidStartDB(int32_t code_point) {
78 return (typeRecord(code_point)->flags & kXidStartMask) != 0;
79}
80
81int8_t Unicode::toDecimalDB(int32_t code_point) {
82 const UnicodeTypeRecord* record = typeRecord(code_point);
83 return (record->flags & kDecimalMask) != 0 ? record->decimal : -1;
84}
85
86int8_t Unicode::toDigitDB(int32_t code_point) {
87 const UnicodeTypeRecord* record = typeRecord(code_point);
88 return (record->flags & kDigitMask) != 0 ? record->digit : -1;
89}
90
91FullCasing Unicode::toFoldedDB(int32_t code_point) {
92 const UnicodeTypeRecord* record = typeRecord(code_point);
93
94 if (record->flags & kExtendedCaseMask && (record->lower >> 20) & 7) {
95 FullCasing result = {-1, -1, -1};
96 int32_t index = (record->lower & 0xFFFF) + (record->lower >> 24);
97 switch ((record->lower >> 20) & 7) {
98 default:
99 UNREACHABLE("Case mappings are limited to [1..3] code points");
100 case 3:
101 result.code_points[2] = extendedCaseMapping(index + 2);
102 FALLTHROUGH;
103 case 2:
104 result.code_points[1] = extendedCaseMapping(index + 1);
105 FALLTHROUGH;
106 case 1:
107 result.code_points[0] = extendedCaseMapping(index);
108 }
109 return result;
110 }
111 return toLowerDB(code_point);
112}
113
114FullCasing Unicode::toLowerDB(int32_t code_point) {
115 const UnicodeTypeRecord* record = typeRecord(code_point);
116 if ((record->flags & kExtendedCaseMask) == 0) {
117 return {code_point + record->lower, -1};
118 }
119 FullCasing result = {-1, -1, -1};
120 int32_t index = record->lower & 0xFFFF;
121 switch (record->lower >> 24) {
122 default:
123 UNREACHABLE("Case mappings are limited to [1..3] code points");
124 case 3:
125 result.code_points[2] = extendedCaseMapping(index + 2);
126 FALLTHROUGH;
127 case 2:
128 result.code_points[1] = extendedCaseMapping(index + 1);
129 FALLTHROUGH;
130 case 1:
131 result.code_points[0] = extendedCaseMapping(index);
132 }
133 return result;
134}
135
136double Unicode::toNumericDB(int32_t code_point) {
137 return numericValue(code_point);
138}
139
140FullCasing Unicode::toTitleDB(int32_t code_point) {
141 const UnicodeTypeRecord* record = typeRecord(code_point);
142 if ((record->flags & kExtendedCaseMask) == 0) {
143 return {code_point + record->title, -1};
144 }
145 FullCasing result = {-1, -1, -1};
146 int32_t index = record->title & 0xFFFF;
147 switch (record->title >> 24) {
148 default:
149 UNREACHABLE("Case mappings are limited to [1..3] code points");
150 case 3:
151 result.code_points[2] = extendedCaseMapping(index + 2);
152 FALLTHROUGH;
153 case 2:
154 result.code_points[1] = extendedCaseMapping(index + 1);
155 FALLTHROUGH;
156 case 1:
157 result.code_points[0] = extendedCaseMapping(index);
158 }
159 return result;
160}
161
162FullCasing Unicode::toUpperDB(int32_t code_point) {
163 const UnicodeTypeRecord* record = typeRecord(code_point);
164 if ((record->flags & kExtendedCaseMask) == 0) {
165 return {code_point + record->upper, -1};
166 }
167 FullCasing result = {-1, -1, -1};
168 int32_t index = record->upper & 0xFFFF;
169 switch (record->upper >> 24) {
170 default:
171 UNREACHABLE("Case mappings are limited to [1..3] code points");
172 case 3:
173 result.code_points[2] = extendedCaseMapping(index + 2);
174 FALLTHROUGH;
175 case 2:
176 result.code_points[1] = extendedCaseMapping(index + 1);
177 FALLTHROUGH;
178 case 1:
179 result.code_points[0] = extendedCaseMapping(index);
180 }
181 return result;
182}
183
184} // namespace py