/* Copyright (c) Facebook, Inc. and its affiliates. (http://www.facebook.com) */
// @generated by generate_unicode_database.py
#pragma once

#include <cstdint>

#include "globals.h"
#include "objects.h"
#include "unicode.h"

namespace py {

static const int kMaxNameLength = 256;

// Longest decomposition in Unicode 11.0.0: U+FDFA
static const int kMaxDecomposition = 18;

static_assert(Unicode::kAliasStart == 0xf0000,
              "Unicode aliases start at unexpected code point");
static_assert(Unicode::kAliasCount == 468,
              "Unexpected number of Unicode aliases");
static_assert(Unicode::kNamedSequenceStart == 0xf0200,
              "Unicode named sequences start at unexpected code point");
static_assert(Unicode::kNamedSequenceCount == 442,
              "Unexpected number of Unicode named sequences");

enum NormalizationForm : byte {
  kInvalid = 0,
  kNFD = 0x3,
  kNFKD = 0xc,
  kNFC = 0x30,
  kNFKC = 0xc0,
};

enum : int32_t {
  kAlphaMask = 0x1,
  kDecimalMask = 0x2,
  kDigitMask = 0x4,
  kLowerMask = 0x8,
  kLinebreakMask = 0x10,
  kSpaceMask = 0x20,
  kTitleMask = 0x40,
  kUpperMask = 0x80,
  kXidStartMask = 0x100,
  kXidContinueMask = 0x200,
  kPrintableMask = 0x400,
  kNumericMask = 0x800,
  kCaseIgnorableMask = 0x1000,
  kCasedMask = 0x2000,
  kExtendedCaseMask = 0x4000,
};

struct UnicodeChangeRecord {
  const byte bidirectional;
  const byte category;
  const byte decimal;
  const byte east_asian_width;
  const byte mirrored;
  const double numeric;
};

struct UnicodeDatabaseRecord {
  const byte bidirectional;
  const byte category;
  const byte combining;  // canonical combining class
  const byte east_asian_width;
  const bool mirrored;
  const byte quick_check;
};

struct UnicodeDecomposition {
  const char* prefix;
  const int count;
  const int32_t* code_points;
};

struct UnicodeNamedSequence {
  const byte length;
  const int32_t code_points[4];
};

struct UnicodeTypeRecord {
  // Note: if more flag space is needed, decimal and digit could be unified
  const int8_t decimal;
  const int8_t digit;
  const int16_t flags;
  // Deltas to the character or offsets in kExtendedCase
  const int32_t lower;
  const int32_t title;
  const int32_t upper;
};

extern const RawSmallStr kBidirectionalNames[];
extern const RawSmallStr kCategoryNames[];
extern const RawSmallStr kEastAsianWidthNames[];

// Get a code point from its Unicode name.
// Returns the code point if the lookup succeeds, -1 if it fails.
int32_t codePointFromName(const byte* name, word size);
int32_t codePointFromNameOrNamedSequence(const byte* name, word size);

// Returns the NFC composition given the NFC first and last indices.
int32_t composeCodePoint(int32_t first, int32_t last);

// Returns the decomposition mapping of the code point.
UnicodeDecomposition decomposeCodePoint(int32_t code_point);

// Returns the case mapping for code points where offset is insufficient
int32_t extendedCaseMapping(int32_t index);

// Finds the first/last character of an NFC sequence containing the code point.
int32_t findNFCFirst(int32_t code_point);
int32_t findNFCLast(int32_t code_point);

// Write the Unicode name for the given code point into the buffer.
// Returns true if the name was written successfully, false otherwise.
bool nameFromCodePoint(int32_t code_point, byte* buffer, word size);

// Returns the normalization of the code point in Unicode 3.2.0, if it differs
// from the current version. If the normalization is unchanged, returns -1.
int32_t normalizeOld(int32_t code_point);

// Returns the numeric value of the code point, or -1.0 if not numeric.
double numericValue(int32_t code_point);

// Returns true if the code point has one of the line break properties "BK",
// "CR", "LR", or "NL" or the bidirectional type "B". Returns false otherwise.
bool unicodeIsLinebreak(int32_t code_point);

// Returns true if the code point has the bidirectional type "WS", "B", or "S"
// or the category "Zs". Returns false otherwise.
bool unicodeIsWhitespace(int32_t code_point);

const UnicodeChangeRecord* changeRecord(int32_t code_point);
const UnicodeDatabaseRecord* databaseRecord(int32_t code_point);
const UnicodeNamedSequence* namedSequence(int32_t code_point);
const UnicodeTypeRecord* typeRecord(int32_t code_point);

}  // namespace py