runtime/formatter.cpp at trunk · bernsteinbear.com/skybison

bernsteinbear.com / skybison
fork atom
this repo has no description
fork atom
skybison / runtime / formatter.cpp
at trunk 1132 lines 38 kB view raw
wrap content
Max Bernstein Add license headers 4y ago
29d072a3
   1// Copyright (c) Facebook, Inc. and its affiliates. (http://www.facebook.com)
   2#include "formatter.h"
   3
   4#include <climits>
   5
   6#include "float-builtins.h"
   7#include "float-conversion.h"
   8#include "formatter-utils.h"
   9#include "globals.h"
  10#include "handles-decl.h"
  11#include "objects.h"
  12#include "runtime.h"
  13#include "str-builtins.h"
  14#include "thread.h"
  15#include "unicode.h"
  16
  17namespace py {
  18
  19const byte kLowerCaseHexDigitArray[] = {'0', '1', '2', '3', '4', '5', '6', '7',
  20                                        '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'};
  21const byte kUpperCaseHexDigitArray[] = {'0', '1', '2', '3', '4', '5', '6', '7',
  22                                        '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'};
  23
  24struct FloatWidths {
  25  word left_padding;
  26  byte sign;
  27  word sign_padding;
  28  word grouped_digits;
  29  bool has_decimal;
  30  word remainder;
  31  word right_padding;
  32};
  33
  34static word calculateFloatWidths(const FormatSpec* format, const char* buf,
  35                                 word length, FloatWidths* result) {
  36  result->left_padding = 0;
  37  result->sign = '\0';
  38  result->sign_padding = 0;
  39  result->grouped_digits = length;
  40  result->has_decimal = false;
  41  result->remainder = 0;
  42  result->right_padding = 0;
  43
  44  // Check for a sign character
  45  word index = 0;
  46  word total_length = 0;
  47  if (buf[index] == '-') {
  48    result->sign = '-';
  49    index++;
  50    total_length++;
  51  }
  52
  53  // Parse the digits for a remainder
  54  for (; index < length; index++) {
  55    char c = buf[index];
  56    if (!ASCII::isDigit(c)) {
  57      word remainder = length - index;
  58      if (c == '.') {
  59        result->has_decimal = true;
  60        result->remainder = remainder - 1;
  61        total_length += remainder;  // TODO(T52759101): use locale for decimal
  62      } else {
  63        result->remainder = remainder;
  64        total_length += remainder;
  65      }
  66      break;
  67    }
  68  }
  69
  70  if (format->positive_sign != '\0' && result->sign == '\0') {
  71    result->sign = format->positive_sign;
  72    total_length++;
  73  }
  74
  75  // TODO(T52759101): use locale for thousands separator and grouping
  76  word digits = result->sign == '-' ? index - 1 : index;
  77  if (format->thousands_separator != '\0') {
  78    digits += (digits - 1) / 3;
  79  }
  80  result->grouped_digits = digits;
  81  total_length += digits;
  82
  83  word padding = format->width - total_length;
  84  if (padding > 0) {
  85    total_length +=
  86        padding * SmallStr::fromCodePoint(format->fill_char).length();
  87    switch (format->alignment) {
  88      case '<':
  89        result->right_padding = padding;
  90        break;
  91      case '=':
  92        result->sign_padding = padding;
  93        break;
  94      case '>':
  95        result->left_padding = padding;
  96        break;
  97      case '^':
  98        result->left_padding = padding >> 1;
  99        result->right_padding = padding - result->left_padding;
 100        break;
 101      default:
 102        UNREACHABLE("unexpected alignment %c", format->alignment);
 103    }
 104  }
 105
 106  return total_length;
 107}
 108
 109static bool isAlignmentSpec(int32_t cp) {
 110  switch (cp) {
 111    case '<':
 112    case '>':
 113    case '=':
 114    case '^':
 115      return true;
 116    default:
 117      return false;
 118  }
 119}
 120
 121static inline int32_t nextCodePoint(const Str& spec, word length, word* index) {
 122  if (*index >= length) {
 123    return 0;
 124  }
 125  word cp_length;
 126  int32_t cp = spec.codePointAt(*index, &cp_length);
 127  *index += cp_length;
 128  return cp;
 129}
 130
 131RawObject parseFormatSpec(Thread* thread, const Str& spec, int32_t default_type,
 132                          char default_align, FormatSpec* result) {
 133  result->alignment = default_align;
 134  result->positive_sign = 0;
 135  result->thousands_separator = 0;
 136  result->type = default_type;
 137  result->alternate = false;
 138  result->fill_char = ' ';
 139  result->width = -1;
 140  result->precision = -1;
 141
 142  word index = 0;
 143  word length = spec.length();
 144  int32_t cp = nextCodePoint(spec, length, &index);
 145
 146  bool fill_char_specified = false;
 147  bool alignment_specified = false;
 148  word old_index = index;
 149  int32_t c_next = nextCodePoint(spec, length, &index);
 150  if (isAlignmentSpec(c_next)) {
 151    result->alignment = static_cast<char>(c_next);
 152    result->fill_char = cp;
 153    fill_char_specified = true;
 154    alignment_specified = true;
 155
 156    cp = nextCodePoint(spec, length, &index);
 157  } else if (!alignment_specified && isAlignmentSpec(cp)) {
 158    result->alignment = static_cast<char>(cp);
 159    alignment_specified = true;
 160    cp = c_next;
 161  } else {
 162    index = old_index;
 163  }
 164
 165  switch (cp) {
 166    case '+':
 167    case ' ':
 168      result->positive_sign = static_cast<char>(cp);
 169      cp = nextCodePoint(spec, length, &index);
 170      break;
 171    case '-':
 172      cp = nextCodePoint(spec, length, &index);
 173      break;
 174  }
 175
 176  if (cp == '#') {
 177    result->alternate = true;
 178    cp = nextCodePoint(spec, length, &index);
 179  }
 180
 181  if (!fill_char_specified && cp == '0') {
 182    result->fill_char = '0';
 183    if (!alignment_specified) {
 184      result->alignment = '=';
 185    }
 186  }
 187
 188  if ('0' <= cp && cp <= '9') {
 189    word width = 0;
 190    for (;;) {
 191      width += cp - '0';
 192      cp = nextCodePoint(spec, length, &index);
 193      if ('0' > cp || cp > '9') break;
 194      if (__builtin_mul_overflow(width, 10, &width)) {
 195        return thread->raiseWithFmt(LayoutId::kValueError,
 196                                    "Too many decimal digits in format string");
 197      }
 198    }
 199    result->width = width;
 200  }
 201
 202  if (cp == ',') {
 203    result->thousands_separator = ',';
 204    cp = nextCodePoint(spec, length, &index);
 205  }
 206  if (cp == '_') {
 207    if (result->thousands_separator != 0) {
 208      return thread->raiseWithFmt(LayoutId::kValueError,
 209                                  "Cannot specify both ',' and '_'.");
 210    }
 211    result->thousands_separator = '_';
 212    cp = nextCodePoint(spec, length, &index);
 213  }
 214  if (cp == ',') {
 215    return thread->raiseWithFmt(LayoutId::kValueError,
 216                                "Cannot specify both ',' and '_'.");
 217  }
 218
 219  if (cp == '.') {
 220    cp = nextCodePoint(spec, length, &index);
 221    if ('0' > cp || cp > '9') {
 222      return thread->raiseWithFmt(LayoutId::kValueError,
 223                                  "Format specifier missing precision");
 224    }
 225
 226    word precision = 0;
 227    for (;;) {
 228      precision += cp - '0';
 229      cp = nextCodePoint(spec, length, &index);
 230      if ('0' > cp || cp > '9') break;
 231      if (__builtin_mul_overflow(precision, 10, &precision)) {
 232        return thread->raiseWithFmt(LayoutId::kValueError,
 233                                    "Too many decimal digits in format string");
 234      }
 235    }
 236    result->precision = precision;
 237  }
 238
 239  if (cp != 0) {
 240    result->type = cp;
 241    // This was the last step: No need to call `nextCodePoint()` here.
 242  }
 243  if (index < length) {
 244    return thread->raiseWithFmt(LayoutId::kValueError,
 245                                "Invalid format specifier");
 246  }
 247
 248  if (result->thousands_separator) {
 249    switch (result->type) {
 250      case 'd':
 251      case 'e':
 252      case 'f':
 253      case 'g':
 254      case 'E':
 255      case 'G':
 256      case '%':
 257      case 'F':
 258      case '\0':
 259        // These are allowed. See PEP 378.
 260        break;
 261      case 'b':
 262      case 'o':
 263      case 'x':
 264      case 'X':
 265        // Underscores are allowed in bin/oct/hex. See PEP 515.
 266        if (result->thousands_separator == '_') {
 267          break;
 268        }
 269        FALLTHROUGH;
 270      default:
 271        if (32 < result->type && result->type <= kMaxASCII) {
 272          return thread->raiseWithFmt(
 273              LayoutId::kValueError, "Cannot specify '%c' with '%c'.",
 274              result->thousands_separator, static_cast<char>(result->type));
 275        }
 276        return thread->raiseWithFmt(
 277            LayoutId::kValueError, "Cannot specify '%c' with '\\x%x'.",
 278            result->thousands_separator, static_cast<unsigned>(result->type));
 279    }
 280  }
 281  return NoneType::object();
 282}
 283
 284static word putFloat(const MutableBytes& dest, const byte* buf,
 285                     const FormatSpec* format, const FloatWidths* widths) {
 286  word at = 0;
 287  RawSmallStr fill = SmallStr::fromCodePoint(format->fill_char);
 288  word fill_length = fill.length();
 289  for (word i = 0; i < widths->left_padding; i++, at += fill_length) {
 290    dest.replaceFromWithStr(at, Str::cast(fill), fill_length);
 291  }
 292  switch (widths->sign) {
 293    case '\0':
 294      break;
 295    case '-':
 296      buf++;
 297      FALLTHROUGH;
 298    case '+':
 299    case ' ':
 300      dest.byteAtPut(at++, widths->sign);
 301      break;
 302    default:
 303      UNREACHABLE("unexpected sign char %c", widths->sign);
 304  }
 305  for (word i = 0; i < widths->sign_padding; i++, at += fill_length) {
 306    dest.replaceFromWithStr(at, Str::cast(fill), fill_length);
 307  }
 308  // TODO(T52759101): use thousands separator from locale
 309  if (format->thousands_separator == '\0') {
 310    dest.replaceFromWithAll(at, {buf, widths->grouped_digits});
 311    at += widths->grouped_digits;
 312    buf += widths->grouped_digits;
 313  } else {
 314    // TODO(T52759101): use locale for grouping
 315    word prefix = widths->grouped_digits % 4;
 316    dest.replaceFromWithAll(at, {buf, prefix});
 317    buf += prefix;
 318    for (word i = prefix; i < widths->grouped_digits; i += 4, buf += 3) {
 319      dest.byteAtPut(at + i, format->thousands_separator);
 320      dest.replaceFromWithAll(at + i + 1, {buf, 3});
 321    }
 322    at += widths->grouped_digits;
 323  }
 324  if (widths->has_decimal) {
 325    // TODO(T52759101): use decimal from locale
 326    dest.byteAtPut(at++, '.');
 327    buf++;
 328  }
 329  dest.replaceFromWithAll(at, {buf, widths->remainder});
 330  at += widths->remainder;
 331  for (word i = 0; i < widths->right_padding; i++, at += fill_length) {
 332    dest.replaceFromWithStr(at, Str::cast(fill), fill_length);
 333  }
 334  return at;
 335}
 336
 337RawObject raiseUnknownFormatError(Thread* thread, int32_t format_code,
 338                                  const Object& object) {
 339  if (32 < format_code && format_code < kMaxASCII) {
 340    return thread->raiseWithFmt(
 341        LayoutId::kValueError,
 342        "Unknown format code '%c' for object of type '%T'",
 343        static_cast<char>(format_code), &object);
 344  }
 345  return thread->raiseWithFmt(
 346      LayoutId::kValueError,
 347      "Unknown format code '\\x%x' for object of type '%T'",
 348      static_cast<unsigned>(format_code), &object);
 349}
 350
 351RawObject formatFloat(Thread* thread, double value, FormatSpec* format) {
 352  static const word default_precision = 6;
 353  word precision = format->precision;
 354  if (precision > INT_MAX) {
 355    return thread->raiseWithFmt(LayoutId::kValueError, "precision too big");
 356  }
 357
 358  int32_t type = format->type;
 359  bool add_dot_0 = false;
 360  bool add_percent = false;
 361  switch (type) {
 362    case '\0':
 363      add_dot_0 = true;
 364      type = 'r';
 365      break;
 366    case 'n':
 367      type = 'g';
 368      break;
 369    case '%':
 370      type = 'f';
 371      value *= 100;
 372      add_percent = true;
 373      break;
 374  }
 375
 376  if (precision < 0) {
 377    precision = add_dot_0 ? 0 : default_precision;
 378  } else if (type == 'r') {
 379    type = 'g';
 380  }
 381
 382  FormatResultKind kind;
 383  unique_c_ptr<char> buf(doubleToString(value, type,
 384                                        static_cast<int>(precision), false,
 385                                        add_dot_0, format->alternate, &kind));
 386  word length = std::strlen(buf.get());
 387  if (add_percent) {
 388    // Overwrite the terminating null character
 389    buf.get()[length++] = '%';
 390  }
 391
 392  Runtime* runtime = thread->runtime();
 393  if (format->positive_sign == '\0' && format->width <= length &&
 394      format->type != 'n' && format->thousands_separator == '\0') {
 395    return runtime->newStrWithAll({reinterpret_cast<byte*>(buf.get()), length});
 396  }
 397
 398  // TODO(T52759101): use locale for grouping, separator, and decimal point
 399  FloatWidths widths;
 400  word result_length = calculateFloatWidths(format, buf.get(), length, &widths);
 401
 402  HandleScope scope(thread);
 403  MutableBytes result(&scope,
 404                      runtime->newMutableBytesUninitialized(result_length));
 405  word written =
 406      putFloat(result, reinterpret_cast<byte*>(buf.get()), format, &widths);
 407  DCHECK(written == result_length, "expected to write %ld bytes, wrote %ld",
 408         result_length, written);
 409  return result.becomeStr();
 410}
 411
 412RawObject formatStr(Thread* thread, const Str& str, FormatSpec* format) {
 413  DCHECK(format->positive_sign == '\0', "must no have sign specified");
 414  DCHECK(!format->alternate, "must not have alternative format");
 415  word width = format->width;
 416  word precision = format->precision;
 417  if (width == -1 && precision == -1) {
 418    return *str;
 419  }
 420
 421  word char_length = str.length();
 422  word codepoint_length;
 423  word str_end_index;
 424  if (precision >= 0) {
 425    str_end_index = str.offsetByCodePoints(0, precision);
 426    if (str_end_index < char_length) {
 427      codepoint_length = precision;
 428    } else {
 429      codepoint_length = str.codePointLength();
 430    }
 431  } else {
 432    str_end_index = char_length;
 433    codepoint_length = str.codePointLength();
 434  }
 435
 436  Runtime* runtime = thread->runtime();
 437  word padding = width - codepoint_length;
 438  if (padding <= 0) {
 439    return strSubstr(thread, str, 0, str_end_index);
 440  }
 441
 442  // Construct result.
 443  HandleScope scope(thread);
 444  Str fill_char(&scope, SmallStr::fromCodePoint(format->fill_char));
 445  word fill_char_length = fill_char.length();
 446  word padding_char_length = padding * fill_char_length;
 447  word result_char_length = str_end_index + padding_char_length;
 448  MutableBytes result(
 449      &scope, runtime->newMutableBytesUninitialized(result_char_length));
 450  word index = 0;
 451  word early_padding;
 452  if (format->alignment == '<') {
 453    early_padding = 0;
 454  } else if (format->alignment == '^') {
 455    word half = padding / 2;
 456    early_padding = half;
 457    padding = half + (padding % 2);
 458  } else {
 459    DCHECK(format->alignment == '>' || format->alignment == '=',
 460           "remaining cases must be '>' or '='");
 461    early_padding = padding;
 462    padding = 0;
 463  }
 464  for (word i = 0; i < early_padding; i++) {
 465    result.replaceFromWithStr(index, *fill_char, fill_char_length);
 466    index += fill_char_length;
 467  }
 468  result.replaceFromWithStr(index, *str, str_end_index);
 469  index += str_end_index;
 470  if (padding > 0) {
 471    DCHECK(format->alignment == '<' || format->alignment == '^',
 472           "unexpected alignment");
 473    for (word i = 0; i < padding; i++) {
 474      result.replaceFromWithStr(index, *fill_char, fill_char_length);
 475      index += fill_char_length;
 476    }
 477  }
 478  DCHECK(index == result_char_length, "overflow or underflow in result");
 479  return result.becomeStr();
 480}
 481
 482// Returns the quotient of a double word number and a single word.
 483// Assumes the result will fit in a single uword: `dividend_high < divisor`.
 484static uword dwordUDiv(uword dividend_low, uword dividend_high, uword divisor,
 485                       uword* remainder) {
 486  // TODO(matthiasb): Future optimization idea:
 487  // This whole function is a single `divq` instruction on x86_64, we could use
 488  // inline assembly for it (there doesn't seem to be a builtin).
 489
 490  // The code is based on Hacker's Delight chapter 9-4 Unsigned Long Division.
 491  DCHECK(divisor != 0, "division by zero");
 492  DCHECK(dividend_high < divisor, "overflow");
 493
 494  // Performs some arithmetic with no more than half the bits of a `uword`.
 495  int half_bits = kBitsPerWord / 2;
 496  uword half_mask = (uword{1} << half_bits) - 1;
 497
 498  // Normalize divisor by shifting the highest bit left as much as possible.
 499  static_assert(sizeof(divisor) == sizeof(long), "choose right builtin");
 500  int s = __builtin_clzl(divisor);
 501  uword divisor_n = divisor << s;
 502  uword divisor_n_high_half = divisor_n >> half_bits;
 503  uword divisor_n_low_half = divisor_n & half_mask;
 504
 505  // Normalize dividend by shifting it by the same amount as the divisor.
 506  uword dividend_high_n =
 507      (s == 0) ? dividend_high
 508               : (dividend_high << s) | (dividend_low >> (kBitsPerWord - s));
 509  uword dividend_low_n = dividend_low << s;
 510  uword dividend_low_n_high_half = dividend_low_n >> half_bits;
 511  uword dividend_low_n_low_half = dividend_low_n & half_mask;
 512
 513  uword quot_high_half = dividend_high_n / divisor_n_high_half;
 514  uword remainder_high_half = dividend_high_n % divisor_n_high_half;
 515  while (quot_high_half > half_mask ||
 516         quot_high_half * divisor_n_low_half >
 517             ((remainder_high_half << half_bits) | dividend_low_n_high_half)) {
 518    quot_high_half--;
 519    remainder_high_half += divisor_n_high_half;
 520    if (remainder_high_half > half_mask) break;
 521  }
 522
 523  uword dividend_middle =
 524      ((dividend_high_n << half_bits) | dividend_low_n_high_half) -
 525      quot_high_half * divisor_n;
 526
 527  uword quot_low_half = dividend_middle / divisor_n_high_half;
 528  uword remainder_low_half = dividend_middle % divisor_n_high_half;
 529  while (quot_low_half > half_mask ||
 530         quot_low_half * divisor_n_low_half >
 531             ((remainder_low_half << half_bits) | dividend_low_n_low_half)) {
 532    quot_low_half--;
 533    remainder_low_half += divisor_n_high_half;
 534    if (remainder_low_half > half_mask) break;
 535  }
 536
 537  uword result = (quot_high_half << half_bits) | quot_low_half;
 538  *remainder = dividend_low - result * divisor;
 539  return result;
 540}
 541
 542// Divide a large integer formed by an array of int digits by a single digit and
 543// return the remainder. `digits_in` and `digits_out` may be the same pointer
 544// for in-place operation.
 545static uword divIntSingleDigit(uword* digits_out, const uword* digits_in,
 546                               word num_digits, uword divisor) {
 547  // TODO(matthiasb): Future optimization idea:
 548  // Instead of dividing by a constant, multiply with a precomputed inverse
 549  // (see Hackers Delight, chapter 10). The compiler doesn't catch this case
 550  // for double word arithmetic as in dwordUDiv.
 551  uword remainder = 0;
 552  for (word i = num_digits - 1; i >= 0; i--) {
 553    // Compute `remainder:digit / divisor`.
 554    digits_out[i] = dwordUDiv(digits_in[i], remainder, divisor, &remainder);
 555  }
 556  return remainder;
 557}
 558
 559// Return upper bound on number of decimal digits to format large int `value`.
 560static word estimateNumDecimalDigits(RawLargeInt value) {
 561  // Compute an upper bound on the number of decimal digits required for a
 562  // number with n bits:
 563  //   ceil(log10(2**n - 1))
 564  // We over-approximate this with:
 565  //   ceil(log10(2**n - 1))
 566  //   == ceil(log2(2**n - 1)/log2(10))
 567  //   <= 1 + n * (1/log2(10))
 568  //   <= 1 + n * 0.30102999566398114
 569  //   <= 1 + n * 309 / 1024
 570  // This isn't off by more than 1 digit for all one binary numbers up to
 571  // 1425 bits.
 572  word bit_length = value.bitLength();
 573  return 1 + bit_length * 309 / 1024;
 574}
 575
 576static byte* writeLargeIntDecimalDigits(byte* buffer_end, RawLargeInt value) {
 577  // Allocate space for intermediate results. We also convert a negative number
 578  // to a positive number of the same magnitude here.
 579  word num_digits = value.numDigits();
 580  std::unique_ptr<uword[]> temp_digits(new uword[num_digits]);
 581  bool negative = value.isNegative();
 582  if (!negative) {
 583    for (word i = 0; i < num_digits; ++i) {
 584      temp_digits[i] = value.digitAt(i);
 585    }
 586  } else {
 587    uword carry = 1;
 588    for (word i = 0; i < num_digits; ++i) {
 589      uword digit = value.digitAt(i);
 590      carry = __builtin_uaddl_overflow(~digit, carry, &temp_digits[i]);
 591    }
 592    // The complement of the highest bit in a negative number must be 0 so we
 593    // cannot overflow.
 594    DCHECK(carry == 0, "overflow");
 595  }
 596  word num_temp_digits = num_digits;
 597
 598  // The strategy here is to divide the large integer by continually dividing it
 599  // by `kUwordDigits10Pow`. `uwordToDecimal` can convert those remainders to
 600  // decimal digits.
 601  //
 602  // TODO(matthiasb): Future optimization ideas:
 603  // It seems cpythons algorithm is faster (for big numbers) in practive.
 604  // Their source claims it is (Knuth TAOCP, vol 2, section 4.4, method 1b).
 605  byte* start = buffer_end;
 606  do {
 607    uword remainder = divIntSingleDigit(temp_digits.get(), temp_digits.get(),
 608                                        num_temp_digits, kUwordDigits10Pow);
 609    byte* new_start = uwordToDecimal(remainder, start);
 610
 611    while (num_temp_digits > 0 && temp_digits[num_temp_digits - 1] == 0) {
 612      num_temp_digits--;
 613    }
 614    // Produce leading zeros if this wasn't the last round.
 615    if (num_temp_digits > 0) {
 616      for (word i = 0, n = kUwordDigits10 - (start - new_start); i < n; i++) {
 617        *--new_start = '0';
 618      }
 619    }
 620    start = new_start;
 621  } while (num_temp_digits > 0);
 622  return start;
 623}
 624
 625RawObject formatIntDecimalSimple(Thread* thread, const Int& value) {
 626  if (!value.isLargeInt()) {
 627    word value_word = value.asWord();
 628    uword magnitude =
 629        value_word >= 0 ? value_word : -static_cast<uword>(value_word);
 630    byte buffer[kUwordDigits10 + 1];
 631    byte* end = buffer + sizeof(buffer);
 632    byte* start = uwordToDecimal(magnitude, end);
 633    if (value_word < 0) *--start = '-';
 634    DCHECK(start >= buffer, "buffer underflow");
 635    return thread->runtime()->newStrWithAll(View<byte>(start, end - start));
 636  }
 637
 638  RawLargeInt value_large = LargeInt::cast(*value);
 639  bool is_negative = value_large.isNegative();
 640  word max_chars =
 641      estimateNumDecimalDigits(value_large) + (is_negative ? 1 : 0);
 642  std::unique_ptr<byte[]> buffer(new byte[max_chars]);
 643  byte* end = buffer.get() + max_chars;
 644  byte* start = writeLargeIntDecimalDigits(end, value_large);
 645  if (is_negative) {
 646    *--start = '-';
 647  }
 648  DCHECK(start >= buffer.get(), "buffer underflow");
 649  return thread->runtime()->newStrWithAll(View<byte>(start, end - start));
 650}
 651
 652static word numBinaryDigits(const Int& value) {
 653  return value.isZero() ? 1 : value.bitLength();
 654}
 655
 656static word numHexadecimalDigits(const Int& value) {
 657  return value.isZero() ? 1 : (value.bitLength() + 3) >> 2;
 658}
 659
 660static word numOctalDigits(const Int& value) {
 661  return value.isZero() ? 1 : (value.bitLength() + 2) / 3;
 662}
 663
 664static void putBinaryDigits(Thread* thread, const MutableBytes& dest, word at,
 665                            const Int& value, word num_digits) {
 666  static const char* const quads[16] = {
 667      "0000", "0001", "0010", "0011", "0100", "0101", "0110", "0111",
 668      "1000", "1001", "1010", "1011", "1100", "1101", "1110", "1111",
 669  };
 670
 671  word idx = at + num_digits;
 672  uword last_digit;
 673  if (value.isLargeInt()) {
 674    HandleScope scope(thread);
 675    LargeInt value_large(&scope, *value);
 676    word d_last = (num_digits - 1) / kBitsPerWord;
 677    bool is_negative = value_large.isNegative();
 678    uword carry = 1;
 679    for (word d = 0; d < d_last; d++) {
 680      uword digit = value_large.digitAt(d);
 681      if (is_negative) {
 682        digit = ~digit + carry;
 683        carry = carry & (digit == 0);
 684      }
 685      static_assert(kBitsPerWord % 4 == 0, "bits per word divisible by 4");
 686      for (word i = 0; i < kBitsPerWord / 4; i++) {
 687        const char* quad = quads[digit & 0xf];
 688        dest.byteAtPut(--idx, quad[3]);
 689        dest.byteAtPut(--idx, quad[2]);
 690        dest.byteAtPut(--idx, quad[1]);
 691        dest.byteAtPut(--idx, quad[0]);
 692        digit >>= 4;
 693      }
 694    }
 695    last_digit = value_large.digitAt(d_last);
 696    if (is_negative) {
 697      last_digit = ~last_digit + carry;
 698    }
 699  } else {
 700    last_digit = static_cast<uword>(std::abs(value.asWord()));
 701  }
 702
 703  do {
 704    dest.byteAtPut(--idx, '0' + (last_digit & 1));
 705    last_digit >>= 1;
 706  } while (last_digit != 0);
 707  DCHECK(idx == at, "unexpected number of digits");
 708}
 709
 710static void putHexadecimalDigitsImpl(Thread* thread, const MutableBytes& dest,
 711                                     word at, const Int& value, word num_digits,
 712                                     const byte* digits) {
 713  word idx = at + num_digits;
 714  uword last_digit;
 715  if (value.isLargeInt()) {
 716    HandleScope scope(thread);
 717    LargeInt value_large(&scope, *value);
 718    const word hexdigits_per_word = kBitsPerWord / kBitsPerHexDigit;
 719    word d_last = (num_digits - 1) / hexdigits_per_word;
 720    bool is_negative = value_large.isNegative();
 721    uword carry = 1;
 722    for (word d = 0; d < d_last; d++) {
 723      uword digit = value_large.digitAt(d);
 724      if (is_negative) {
 725        digit = ~digit + carry;
 726        carry = carry & (digit == 0);
 727      }
 728      for (word i = 0; i < hexdigits_per_word; i++) {
 729        dest.byteAtPut(--idx, digits[digit & 0xf]);
 730        digit >>= kBitsPerHexDigit;
 731      }
 732    }
 733    last_digit = value_large.digitAt(d_last);
 734    if (is_negative) {
 735      last_digit = ~last_digit + carry;
 736    }
 737  } else {
 738    last_digit = static_cast<uword>(std::abs(value.asWord()));
 739  }
 740
 741  do {
 742    dest.byteAtPut(--idx, digits[last_digit & 0xf]);
 743    last_digit >>= kBitsPerHexDigit;
 744  } while (last_digit != 0);
 745  DCHECK(idx == at, "unexpected number of digits");
 746}
 747
 748static void putHexadecimalLowerCaseDigits(Thread* thread,
 749                                          const MutableBytes& dest, word at,
 750                                          const Int& value, word num_digits) {
 751  putHexadecimalDigitsImpl(thread, dest, at, value, num_digits,
 752                           kLowerCaseHexDigitArray);
 753}
 754
 755static void putHexadecimalUpperCaseDigits(Thread* thread,
 756                                          const MutableBytes& dest, word at,
 757                                          const Int& value, word num_digits) {
 758  putHexadecimalDigitsImpl(thread, dest, at, value, num_digits,
 759                           kUpperCaseHexDigitArray);
 760}
 761
 762static void putOctalDigits(Thread* thread, const MutableBytes& dest, word at,
 763                           const Int& value, word num_result_digits) {
 764  word idx = at + num_result_digits;
 765  if (value.isLargeInt()) {
 766    HandleScope scope(thread);
 767    LargeInt value_large(&scope, *value);
 768    bool is_negative = value_large.isNegative();
 769
 770    // We have to negate negative numbers which produces carry between
 771    // digits.
 772    uword negate_carry = 1;
 773    // We have carry over when the three bits for an octal digits are between
 774    // large-int digits (this has nothing to do with `negate_carry`).
 775    uword prev_digit_carry = 0;
 776    word prev_digit_carry_num_bits = 0;
 777    for (word d = 0, num_digits = value_large.numDigits(); d < num_digits;
 778         d++) {
 779      uword digit = value_large.digitAt(d);
 780      if (is_negative) {
 781        digit = ~digit + negate_carry;
 782        negate_carry &= (digit == 0);
 783      }
 784
 785      word num_oct_digits = kBitsPerWord / kBitsPerOctDigit;
 786      word next_carry_num_bits = kBitsPerWord % kBitsPerOctDigit;
 787      if (prev_digit_carry_num_bits != 0) {
 788        word combined = digit << prev_digit_carry_num_bits | prev_digit_carry;
 789        dest.byteAtPut(--idx, '0' + (combined & 7));
 790        digit >>= (kBitsPerOctDigit - prev_digit_carry_num_bits);
 791        if (idx == at) {
 792          DCHECK(d == num_digits - 1 && digit == 0, "rest must be zero");
 793          break;
 794        }
 795        num_oct_digits--;
 796
 797        next_carry_num_bits += prev_digit_carry_num_bits;
 798        if (next_carry_num_bits == kBitsPerOctDigit) {
 799          num_oct_digits++;
 800          next_carry_num_bits = 0;
 801        }
 802      }
 803      for (word i = 0; i < num_oct_digits; i++) {
 804        dest.byteAtPut(--idx, '0' + (digit & 7));
 805        digit >>= kBitsPerOctDigit;
 806        // Stop before we start outputting leading zeros.
 807        if (idx == at) {
 808          DCHECK(d == num_digits - 1 && digit == 0, "rest must be zero");
 809          break;
 810        }
 811      }
 812      DCHECK(digit < static_cast<uword>(1 << next_carry_num_bits),
 813             "too many bits left");
 814      prev_digit_carry_num_bits = next_carry_num_bits;
 815      prev_digit_carry = digit;
 816    }
 817    // Output leftover carry bits.
 818    if (idx > at) {
 819      DCHECK(prev_digit_carry_num_bits > 0, "should have carry bits");
 820      dest.byteAtPut(--idx, '0' + prev_digit_carry);
 821    }
 822  } else {
 823    uword value_uword = static_cast<uword>(std::abs(value.asWord()));
 824    do {
 825      dest.byteAtPut(--idx, '0' + (value_uword & 7));
 826      value_uword >>= kBitsPerOctDigit;
 827    } while (value_uword != 0);
 828  }
 829  DCHECK(idx == at, "unexpected number of digits");
 830}
 831
 832using numDigitsFunc = word (*)(const Int&);
 833using putDigitsFunc = void (*)(Thread*, const MutableBytes&, word, const Int&,
 834                               word);
 835
 836static inline RawObject formatIntSimpleImpl(Thread* thread, const Int& value,
 837                                            char format_prefix,
 838                                            numDigitsFunc num_digits,
 839                                            putDigitsFunc put_digits) {
 840  word result_n_digits = num_digits(value);
 841  word result_size = 2 + (value.isNegative() ? 1 : 0) + result_n_digits;
 842
 843  HandleScope scope(thread);
 844  Runtime* runtime = thread->runtime();
 845  MutableBytes result(&scope,
 846                      runtime->newMutableBytesUninitialized(result_size));
 847  word index = 0;
 848  if (value.isNegative()) {
 849    result.byteAtPut(index++, '-');
 850  }
 851  result.byteAtPut(index++, '0');
 852  result.byteAtPut(index++, format_prefix);
 853  put_digits(thread, result, index, value, result_n_digits);
 854  return result.becomeStr();
 855}
 856
 857RawObject formatIntBinarySimple(Thread* thread, const Int& value) {
 858  return formatIntSimpleImpl(thread, value, 'b', numBinaryDigits,
 859                             putBinaryDigits);
 860}
 861
 862RawObject formatIntHexadecimalSimple(Thread* thread, const Int& value) {
 863  return formatIntSimpleImpl(thread, value, 'x', numHexadecimalDigits,
 864                             putHexadecimalLowerCaseDigits);
 865}
 866
 867RawObject formatIntOctalSimple(Thread* thread, const Int& value) {
 868  return formatIntSimpleImpl(thread, value, 'o', numOctalDigits,
 869                             putOctalDigits);
 870}
 871
 872RawObject formatIntDecimal(Thread* thread, const Int& value,
 873                           FormatSpec* format) {
 874  if (format->precision >= 0) {
 875    return thread->raiseWithFmt(
 876        LayoutId::kValueError,
 877        "Precision not allowed in integer format specifier");
 878  }
 879  if (format->thousands_separator != 0) {
 880    UNIMPLEMENTED("thousands separator");
 881  }
 882
 883  // We cannot easily predict how many digits are necessary. So we overestimate
 884  // and printing into a temporary buffer.
 885  byte fixed_buffer[kUwordDigits10];
 886  byte* buffer;
 887  byte* digits;
 888  word result_n_digits;
 889  if (value.isLargeInt()) {
 890    word max_chars = estimateNumDecimalDigits(LargeInt::cast(*value));
 891    buffer = new byte[max_chars];
 892    byte* end = buffer + max_chars;
 893    digits = writeLargeIntDecimalDigits(end, LargeInt::cast(*value));
 894    result_n_digits = end - digits;
 895  } else {
 896    buffer = fixed_buffer;
 897    word value_word = value.asWord();
 898    uword magnitude =
 899        value_word >= 0 ? value_word : -static_cast<uword>(value_word);
 900    byte* end = buffer + ARRAYSIZE(fixed_buffer);
 901    digits = uwordToDecimal(magnitude, end);
 902    result_n_digits = end - digits;
 903  }
 904
 905  bool is_negative = value.isNegative();
 906  word number_chars =
 907      (is_negative || format->positive_sign != '\0') + result_n_digits;
 908
 909  HandleScope scope(thread);
 910  Str fill_cp(&scope, SmallStr::fromCodePoint(format->fill_char));
 911  word fill_cp_chars = fill_cp.length();
 912  word result_chars = number_chars;
 913
 914  word padding = format->width - number_chars;
 915  word left_padding = 0;
 916  word sign_padding = 0;
 917  word right_padding = 0;
 918  if (padding > 0) {
 919    result_chars += padding * fill_cp_chars;
 920    switch (format->alignment) {
 921      case '<':
 922        right_padding = padding;
 923        break;
 924      case '=':
 925        sign_padding = padding;
 926        break;
 927      case '>':
 928        left_padding = padding;
 929        break;
 930      case '^':
 931        left_padding = padding >> 1;
 932        right_padding = padding - left_padding;
 933        break;
 934      default:
 935        UNREACHABLE("unexpected alignment %c", format->alignment);
 936    }
 937  }
 938
 939  word index = 0;
 940  MutableBytes result(
 941      &scope, thread->runtime()->newMutableBytesUninitialized(result_chars));
 942  for (word i = 0; i < left_padding; i++) {
 943    result.replaceFromWithStr(index, *fill_cp, fill_cp_chars);
 944    index += fill_cp_chars;
 945  }
 946  if (is_negative) {
 947    result.byteAtPut(index++, '-');
 948  } else if (format->positive_sign != '\0') {
 949    result.byteAtPut(index++, format->positive_sign);
 950  }
 951  for (word i = 0; i < sign_padding; i++) {
 952    result.replaceFromWithStr(index, *fill_cp, fill_cp_chars);
 953    index += fill_cp_chars;
 954  }
 955  result.replaceFromWithAll(index, {digits, result_n_digits});
 956  index += result_n_digits;
 957  for (word i = 0; i < right_padding; i++) {
 958    result.replaceFromWithStr(index, *fill_cp, fill_cp_chars);
 959    index += fill_cp_chars;
 960  }
 961  DCHECK(index == result.length(), "wrong number of bytes written");
 962
 963  if (value.isLargeInt()) {
 964    delete[] buffer;
 965  }
 966  return result.becomeStr();
 967}
 968
 969static inline RawObject formatIntImpl(Thread* thread, const Int& value,
 970                                      FormatSpec* format, char format_prefix,
 971                                      numDigitsFunc num_digits,
 972                                      putDigitsFunc put_digits) {
 973  if (format->precision >= 0) {
 974    return thread->raiseWithFmt(
 975        LayoutId::kValueError,
 976        "Precision not allowed in integer format specifier");
 977  }
 978  if (format->thousands_separator != 0) {
 979    UNIMPLEMENTED("thousands separator");
 980  }
 981
 982  bool is_negative = value.isNegative();
 983  word result_n_digits = num_digits(value);
 984  word number_chars = (is_negative || format->positive_sign != '\0') +
 985                      (format->alternate ? 2 : 0) + result_n_digits;
 986
 987  HandleScope scope(thread);
 988  Str fill_cp(&scope, SmallStr::fromCodePoint(format->fill_char));
 989  word fill_cp_chars = fill_cp.length();
 990  word result_chars = number_chars;
 991
 992  word padding = format->width - number_chars;
 993  word left_padding = 0;
 994  word sign_padding = 0;
 995  word right_padding = 0;
 996  if (padding > 0) {
 997    result_chars += padding * fill_cp_chars;
 998    switch (format->alignment) {
 999      case '<':
1000        right_padding = padding;
1001        break;
1002      case '=':
1003        sign_padding = padding;
1004        break;
1005      case '>':
1006        left_padding = padding;
1007        break;
1008      case '^':
1009        left_padding = padding >> 1;
1010        right_padding = padding - left_padding;
1011        break;
1012      default:
1013        UNREACHABLE("unexpected alignment %c", format->alignment);
1014    }
1015  }
1016
1017  word index = 0;
1018  MutableBytes result(
1019      &scope, thread->runtime()->newMutableBytesUninitialized(result_chars));
1020  for (word i = 0; i < left_padding; i++) {
1021    result.replaceFromWithStr(index, *fill_cp, fill_cp_chars);
1022    index += fill_cp_chars;
1023  }
1024  if (is_negative) {
1025    result.byteAtPut(index++, '-');
1026  } else if (format->positive_sign != '\0') {
1027    result.byteAtPut(index++, format->positive_sign);
1028  }
1029  if (format->alternate) {
1030    result.byteAtPut(index++, '0');
1031    result.byteAtPut(index++, format_prefix);
1032  }
1033  for (word i = 0; i < sign_padding; i++) {
1034    result.replaceFromWithStr(index, *fill_cp, fill_cp_chars);
1035    index += fill_cp_chars;
1036  }
1037  put_digits(thread, result, index, value, result_n_digits);
1038  index += result_n_digits;
1039
1040  for (word i = 0; i < right_padding; i++) {
1041    result.replaceFromWithStr(index, *fill_cp, fill_cp_chars);
1042    index += fill_cp_chars;
1043  }
1044  DCHECK(index == result.length(), "wrong number of bytes written");
1045  return result.becomeStr();
1046}
1047
1048RawObject formatIntBinary(Thread* thread, const Int& value,
1049                          FormatSpec* format) {
1050  return formatIntImpl(thread, value, format, 'b', numBinaryDigits,
1051                       putBinaryDigits);
1052}
1053
1054RawObject formatIntHexadecimalLowerCase(Thread* thread, const Int& value,
1055                                        FormatSpec* format) {
1056  return formatIntImpl(thread, value, format, 'x', numHexadecimalDigits,
1057                       putHexadecimalLowerCaseDigits);
1058}
1059
1060RawObject formatIntHexadecimalUpperCase(Thread* thread, const Int& value,
1061                                        FormatSpec* format) {
1062  return formatIntImpl(thread, value, format, 'X', numHexadecimalDigits,
1063                       putHexadecimalUpperCaseDigits);
1064}
1065
1066RawObject formatIntOctal(Thread* thread, const Int& value, FormatSpec* format) {
1067  return formatIntImpl(thread, value, format, 'o', numOctalDigits,
1068                       putOctalDigits);
1069}
1070
1071RawObject formatDoubleHexadecimalSimple(Runtime* runtime, double value) {
1072  const int mantissa_hex_digits = (kDoubleMantissaBits / 4) + 1;
1073  const int exp_bits = kBitsPerDouble - kDoubleMantissaBits - 1;
1074  const int max_exp = 1 << (exp_bits - 1);
1075  const int min_exp = -(1 << (exp_bits - 1)) + 1;
1076
1077  bool is_negative;
1078  int exponent;
1079  int64_t mantissa;
1080
1081  decodeDouble(value, &is_negative, &exponent, &mantissa);
1082  if (exponent == max_exp) {
1083    if (mantissa == 0) {
1084      return is_negative ? runtime->newStrFromCStr("-inf")
1085                         : runtime->newStrFromCStr("inf");
1086    }
1087    return runtime->newStrFromCStr("nan");
1088  }
1089
1090  const bool is_subnormal = exponent == min_exp;
1091  if (is_subnormal) {
1092    if (mantissa != 0) {
1093      exponent += 1;
1094    } else {
1095      return is_negative ? runtime->newStrFromCStr("-0x0.0p+0")
1096                         : runtime->newStrFromCStr("0x0.0p+0");
1097    }
1098  }
1099
1100  int exponent_sign;
1101  if (exponent < 0) {
1102    exponent_sign = '-';
1103    exponent = -exponent;
1104  } else {
1105    exponent_sign = '+';
1106  }
1107
1108  // Output buffer_size mantissaHexDigits + 1 char for '-', 2 chars for '0x', 2
1109  // chars for '1.', 1 char for 'p', 1 char for exponent sign '+' or '-', and 4
1110  // chars for the exponent.
1111  const int buffer_size = mantissa_hex_digits + 11;
1112  char output[buffer_size];
1113  byte* end = reinterpret_cast<byte*>(output) + buffer_size;
1114  byte* poutput = uwordToDecimal(exponent, end);
1115
1116  *--poutput = exponent_sign;
1117  *--poutput = 'p';
1118  for (int k = 0; k < kDoubleMantissaBits; k += 4) {
1119    *--poutput = lowerCaseHexDigit((mantissa >> k) & 0xf);
1120  }
1121
1122  *--poutput = '.';
1123  *--poutput = is_subnormal ? '0' : '1';
1124  *--poutput = 'x';
1125  *--poutput = '0';
1126  if (is_negative) {
1127    *--poutput = '-';
1128  }
1129  return runtime->newStrWithAll(View<byte>(poutput, end - poutput));
1130}
1131
1132}  // namespace py