runtime/unicode.cpp at trunk · bernsteinbear.com/skybison

bernsteinbear.com / skybison
fork atom
this repo has no description
fork atom
skybison / runtime / unicode.cpp
at trunk 184 lines 5.5 kB view raw
wrap content
Max Bernstein Add license headers 4y ago
29d072a3
  1// Copyright (c) Facebook, Inc. and its affiliates. (http://www.facebook.com)
  2#include "unicode.h"
  3
  4#include <cstdint>
  5
  6#include "unicode-db.h"
  7
  8namespace py {
  9
 10constexpr byte Byte::kTable[];
 11constexpr byte Byte::kToLower[];
 12constexpr byte Byte::kToUpper[];
 13constexpr byte UTF8::kBOM[];
 14constexpr byte UTF16::kBOMLittleEndian[];
 15constexpr byte UTF16::kBOMBigEndian[];
 16constexpr byte UTF32::kBOMLittleEndian[];
 17constexpr byte UTF32::kBOMBigEndian[];
 18
 19bool Unicode::isAlphaDB(int32_t code_point) {
 20  return (typeRecord(code_point)->flags & kAlphaMask) != 0;
 21}
 22
 23bool Unicode::isCaseIgnorableDB(int32_t code_point) {
 24  return (typeRecord(code_point)->flags & kCaseIgnorableMask) != 0;
 25}
 26
 27bool Unicode::isCasedDB(int32_t code_point) {
 28  return (typeRecord(code_point)->flags & kCasedMask) != 0;
 29}
 30
 31bool Unicode::isDecimalDB(int32_t code_point) {
 32  return (typeRecord(code_point)->flags & kDecimalMask) != 0;
 33}
 34
 35bool Unicode::isDigitDB(int32_t code_point) {
 36  return (typeRecord(code_point)->flags & kDigitMask) != 0;
 37}
 38
 39bool Unicode::isLinebreakDB(int32_t code_point) {
 40  return unicodeIsLinebreak(code_point);
 41}
 42
 43bool Unicode::isLowerDB(int32_t code_point) {
 44  return (typeRecord(code_point)->flags & kLowerMask) != 0;
 45}
 46
 47bool Unicode::isNumericDB(int32_t code_point) {
 48  return (typeRecord(code_point)->flags & kNumericMask) != 0;
 49}
 50
 51bool Unicode::isPrintableDB(int32_t code_point) {
 52  return (typeRecord(code_point)->flags & kPrintableMask) != 0;
 53}
 54
 55bool Unicode::isSpaceDB(int32_t code_point) {
 56  return unicodeIsWhitespace(code_point);
 57}
 58
 59bool Unicode::isTitleDB(int32_t code_point) {
 60  return (typeRecord(code_point)->flags & kTitleMask) != 0;
 61}
 62
 63bool Unicode::isUnfoldedDB(int32_t code_point) {
 64  const UnicodeTypeRecord* record = typeRecord(code_point);
 65  return (record->flags & kExtendedCaseMask) != 0 &&
 66         ((record->lower >> 20) & 7) != 0;
 67}
 68
 69bool Unicode::isUpperDB(int32_t code_point) {
 70  return (typeRecord(code_point)->flags & kUpperMask) != 0;
 71}
 72
 73bool Unicode::isXidContinueDB(int32_t code_point) {
 74  return (typeRecord(code_point)->flags & kXidContinueMask) != 0;
 75}
 76
 77bool Unicode::isXidStartDB(int32_t code_point) {
 78  return (typeRecord(code_point)->flags & kXidStartMask) != 0;
 79}
 80
 81int8_t Unicode::toDecimalDB(int32_t code_point) {
 82  const UnicodeTypeRecord* record = typeRecord(code_point);
 83  return (record->flags & kDecimalMask) != 0 ? record->decimal : -1;
 84}
 85
 86int8_t Unicode::toDigitDB(int32_t code_point) {
 87  const UnicodeTypeRecord* record = typeRecord(code_point);
 88  return (record->flags & kDigitMask) != 0 ? record->digit : -1;
 89}
 90
 91FullCasing Unicode::toFoldedDB(int32_t code_point) {
 92  const UnicodeTypeRecord* record = typeRecord(code_point);
 93
 94  if (record->flags & kExtendedCaseMask && (record->lower >> 20) & 7) {
 95    FullCasing result = {-1, -1, -1};
 96    int32_t index = (record->lower & 0xFFFF) + (record->lower >> 24);
 97    switch ((record->lower >> 20) & 7) {
 98      default:
 99        UNREACHABLE("Case mappings are limited to [1..3] code points");
100      case 3:
101        result.code_points[2] = extendedCaseMapping(index + 2);
102        FALLTHROUGH;
103      case 2:
104        result.code_points[1] = extendedCaseMapping(index + 1);
105        FALLTHROUGH;
106      case 1:
107        result.code_points[0] = extendedCaseMapping(index);
108    }
109    return result;
110  }
111  return toLowerDB(code_point);
112}
113
114FullCasing Unicode::toLowerDB(int32_t code_point) {
115  const UnicodeTypeRecord* record = typeRecord(code_point);
116  if ((record->flags & kExtendedCaseMask) == 0) {
117    return {code_point + record->lower, -1};
118  }
119  FullCasing result = {-1, -1, -1};
120  int32_t index = record->lower & 0xFFFF;
121  switch (record->lower >> 24) {
122    default:
123      UNREACHABLE("Case mappings are limited to [1..3] code points");
124    case 3:
125      result.code_points[2] = extendedCaseMapping(index + 2);
126      FALLTHROUGH;
127    case 2:
128      result.code_points[1] = extendedCaseMapping(index + 1);
129      FALLTHROUGH;
130    case 1:
131      result.code_points[0] = extendedCaseMapping(index);
132  }
133  return result;
134}
135
136double Unicode::toNumericDB(int32_t code_point) {
137  return numericValue(code_point);
138}
139
140FullCasing Unicode::toTitleDB(int32_t code_point) {
141  const UnicodeTypeRecord* record = typeRecord(code_point);
142  if ((record->flags & kExtendedCaseMask) == 0) {
143    return {code_point + record->title, -1};
144  }
145  FullCasing result = {-1, -1, -1};
146  int32_t index = record->title & 0xFFFF;
147  switch (record->title >> 24) {
148    default:
149      UNREACHABLE("Case mappings are limited to [1..3] code points");
150    case 3:
151      result.code_points[2] = extendedCaseMapping(index + 2);
152      FALLTHROUGH;
153    case 2:
154      result.code_points[1] = extendedCaseMapping(index + 1);
155      FALLTHROUGH;
156    case 1:
157      result.code_points[0] = extendedCaseMapping(index);
158  }
159  return result;
160}
161
162FullCasing Unicode::toUpperDB(int32_t code_point) {
163  const UnicodeTypeRecord* record = typeRecord(code_point);
164  if ((record->flags & kExtendedCaseMask) == 0) {
165    return {code_point + record->upper, -1};
166  }
167  FullCasing result = {-1, -1, -1};
168  int32_t index = record->upper & 0xFFFF;
169  switch (record->upper >> 24) {
170    default:
171      UNREACHABLE("Case mappings are limited to [1..3] code points");
172    case 3:
173      result.code_points[2] = extendedCaseMapping(index + 2);
174      FALLTHROUGH;
175    case 2:
176      result.code_points[1] = extendedCaseMapping(index + 1);
177      FALLTHROUGH;
178    case 1:
179      result.code_points[0] = extendedCaseMapping(index);
180  }
181  return result;
182}
183
184}  // namespace py