runtime/float-conversion.cpp at trunk · bernsteinbear.com/skybison

bernsteinbear.com / skybison
fork atom
this repo has no description
fork atom
skybison / runtime / float-conversion.cpp
at trunk 3121 lines 86 kB view raw
wrap content
Max Bernstein Add license headers 4y ago
29d072a3
   1// Copyright (c) Facebook, Inc. and its affiliates. (http://www.facebook.com)
   2#include "float-conversion.h"
   3
   4#include <cerrno>
   5#include <cfloat>
   6#include <climits>
   7#include <cmath>
   8#include <cstdint>
   9#include <cstdio>
  10#include <cstdlib>
  11#include <cstring>
  12#include <limits>
  13
  14#include "globals.h"
  15#include "utils.h"
  16
  17// The following code is a copy of `pystrtod.c` and `dtoa.c` from cpython.
  18// The public interface has been slightly changes and the code was heavily
  19// modified to conform to our style guides. It also doesn't contain code for
  20// non-ieee754 architectures any longer.
  21// However there were no changes to the algorithm and there are no plans to
  22// do so at the moment!
  23
  24namespace py {
  25
  26static char* dtoa(double dd, int mode, int ndigits, int* decpt, int* sign,
  27                  char** rve);
  28
  29static double strtod(const char* s00, char** se);
  30
  31static void freedtoa(char* s);
  32
  33// Case-insensitive string match used for nan and inf detection; t should be
  34// lower-case.  Returns 1 for a successful match, 0 otherwise.
  35static bool caseInsensitiveMatch(const char* s, const char* t) {
  36  for (; *t != '\0'; s++, t++) {
  37    DCHECK('a' <= *t && *t <= 'z', "t must be lowercase letters");
  38    if (*t != *s && *t != (*s - 'A' + 'a')) {
  39      return false;
  40    }
  41  }
  42  return true;
  43}
  44
  45// parseInfOrNan: Attempt to parse a string of the form "nan", "inf" or
  46// "infinity", with an optional leading sign of "+" or "-".  On success,
  47// return the NaN or Infinity as a double and set *endptr to point just beyond
  48// the successfully parsed portion of the string.  On failure, return -1.0 and
  49// set *endptr to point to the start of the string.
  50double parseInfOrNan(const char* p, char** endptr) {
  51  const char* s = p;
  52  bool negate = false;
  53  if (*s == '-') {
  54    negate = true;
  55    s++;
  56  } else if (*s == '+') {
  57    s++;
  58  }
  59
  60  double retval;
  61  if (caseInsensitiveMatch(s, "inf")) {
  62    s += 3;
  63    if (caseInsensitiveMatch(s, "inity")) {
  64      s += 5;
  65    }
  66    retval = negate ? -std::numeric_limits<double>::infinity()
  67                    : std::numeric_limits<double>::infinity();
  68  } else if (caseInsensitiveMatch(s, "nan")) {
  69    s += 3;
  70    retval = negate ? -kDoubleNaN : kDoubleNaN;
  71  } else {
  72    s = p;
  73    retval = -1.0;
  74  }
  75  *endptr = const_cast<char*>(s);
  76  return retval;
  77}
  78
  79// asciiStrtod:
  80// @nptr:    the string to convert to a numeric value.
  81// @endptr:  if non-%nullptr, it returns the character after
  82//           the last character used in the conversion.
  83//
  84// Converts a string to a #gdouble value.
  85// This function behaves like the standard strtod() function
  86// does in the C locale. It does this without actually
  87// changing the current locale, since that would not be
  88// thread-safe.
  89//
  90// This function is typically used when reading configuration
  91// files or other non-user input that should be locale independent.
  92// To handle input from the user you should normally use the
  93// locale-sensitive system strtod() function.
  94//
  95// If the correct value would cause overflow, plus or minus %HUGE_VAL
  96// is returned (according to the sign of the value), and %ERANGE is
  97// stored in %errno. If the correct value would cause underflow,
  98// zero is returned and %ERANGE is stored in %errno.
  99// If memory allocation fails, %ENOMEM is stored in %errno.
 100//
 101// This function resets %errno before calling strtod() so that
 102// you can reliably detect overflow and underflow.
 103//
 104// Return value: the #gdouble value.
 105static double asciiStrtod(const char* nptr, char** endptr) {
 106  double result = strtod(nptr, endptr);
 107  if (*endptr == nptr) {
 108    // string might represent an inf or nan
 109    result = parseInfOrNan(nptr, endptr);
 110  }
 111  return result;
 112}
 113
 114// parseFloat converts a null-terminated byte string s (interpreted
 115// as a string of ASCII characters) to a float.  The string should not have
 116// leading or trailing whitespace.  The conversion is independent of the
 117// current locale.
 118//
 119// If endptr is nullptr, try to convert the whole string.  Raise ValueError and
 120// return -1.0 if the string is not a valid representation of a floating-point
 121// number.
 122//
 123// If endptr is non-nullptr, try to convert as much of the string as possible.
 124// If no initial segment of the string is the valid representation of a
 125// floating-point number then *endptr is set to point to the beginning of the
 126// string, -1.0 is returned and again ValueError is raised.
 127//
 128// On overflow (e.g., when trying to convert '1e500' on an IEEE 754 machine),
 129// if overflow_exception is nullptr then +-HUGE_VAL is returned, and no
 130// Python exception is raised.  Otherwise, overflow_exception should point to a
 131// Python exception, this exception will be raised, -1.0 will be returned, and
 132// *endptr will point just past the end of the converted value.
 133//
 134// If any other failure occurs (for example lack of memory), -1.0 is returned
 135// and the appropriate Python exception will have been set.
 136double parseFloat(const char* s, char** endptr,
 137                  ConversionResult* conversion_result) {
 138  errno = 0;
 139  char* fail_pos;
 140  double x = asciiStrtod(s, &fail_pos);
 141
 142  double result = -1.0;
 143  if (errno == ENOMEM) {
 144    *conversion_result = ConversionResult::kOutOfMemory;
 145    fail_pos = const_cast<char*>(s);
 146  } else if (!endptr && (fail_pos == s || *fail_pos != '\0')) {
 147    *conversion_result = ConversionResult::kInvalid;
 148  } else if (fail_pos == s) {
 149    *conversion_result = ConversionResult::kInvalid;
 150  } else if (errno == ERANGE && std::fabs(x) >= 1.0) {
 151    *conversion_result = ConversionResult::kOverflow;
 152  } else {
 153    *conversion_result = ConversionResult::kSuccess;
 154    result = x;
 155  }
 156
 157  if (endptr != nullptr) {
 158    *endptr = fail_pos;
 159  }
 160  return result;
 161}
 162
 163// I'm using a lookup table here so that I don't have to invent a non-locale
 164// specific way to convert to uppercase.
 165static const int kOfsInf = 0;
 166static const int kOfsNan = 1;
 167static const int kOfsE = 2;
 168
 169// The lengths of these are known to the code below, so don't change them.
 170static const char* const kLcFloatStrings[] = {
 171    "inf",
 172    "nan",
 173    "e",
 174};
 175static const char* const kUcFloatStrings[] = {
 176    "INF",
 177    "NAN",
 178    "E",
 179};
 180
 181// Convert a double d to a string, and return a PyMem_Malloc'd block of
 182// memory contain the resulting string.
 183//
 184// Arguments:
 185//   d is the double to be converted
 186//   format_code is one of 'e', 'f', 'g', 'r'.  'e', 'f' and 'g'
 187//     correspond to '%e', '%f' and '%g';  'r' corresponds to repr.
 188//   mode is one of '0', '2' or '3', and is completely determined by
 189//     format_code: 'e' and 'g' use mode 2; 'f' mode 3, 'r' mode 0.
 190//   precision is the desired precision
 191//   skip_sign do not prepend any sign even for negative numbers.
 192//   add_dot_0_if_integer is nonzero if integers in non-exponential form
 193//     should have ".0" added.  Only applies to format codes 'r' and 'g'.
 194//   use_alt_formatting is nonzero if alternative formatting should be
 195//     used.  Only applies to format codes 'e', 'f' and 'g'.  For code 'g',
 196//     at most one of use_alt_formatting and add_dot_0_if_integer should
 197//     be nonzero.
 198//   type, if non-nullptr, will be set to one of these constants to identify
 199//     the type of the 'd' argument:
 200//   FormatResultKind::kFinite
 201//   FormatResultKind::kInfinite
 202//   FormatResultKind::kNan
 203//
 204// Returns a PyMem_Malloc'd block of memory containing the resulting string,
 205//  or nullptr on error. If nullptr is returned, the Python error has been set.
 206static char* formatFloatShort(double d, char format_code, int mode,
 207                              int precision, bool skip_sign,
 208                              bool always_add_sign, bool add_dot_0_if_integer,
 209                              bool use_alt_formatting,
 210                              const char* const* float_strings,
 211                              FormatResultKind* type) {
 212  // py::dtoa returns a digit string (no decimal point or exponent).
 213  // Must be matched by a call to py::freedtoa.
 214  char* digits_end;
 215  int decpt_as_int;
 216  int sign;
 217  char* digits = dtoa(d, mode, precision, &decpt_as_int, &sign, &digits_end);
 218
 219  word decpt = decpt_as_int;
 220  if (digits == nullptr) {
 221    // The only failure mode is no memory.
 222    return nullptr;
 223  }
 224  DCHECK(digits_end != nullptr && digits_end >= digits, "unexpected result");
 225  word digits_len = digits_end - digits;
 226
 227  word vdigits_end = digits_len;
 228  bool use_exp = false;
 229
 230  word bufsize = 0;
 231  char* p = nullptr;
 232  char* buf = nullptr;
 233  if (digits_len && !('0' <= digits[0] && digits[0] <= '9')) {
 234    // Infinities and nans here; adapt Gay's output,
 235    // so convert Infinity to inf and NaN to nan, and
 236    // ignore sign of nan. Then return.
 237
 238    // ignore the actual sign of a nan.
 239    if (digits[0] == 'n' || digits[0] == 'N') {
 240      sign = 0;
 241    }
 242
 243    // We only need 5 bytes to hold the result "+inf\0".
 244    bufsize = 5;  // Used later in an assert.
 245    buf = static_cast<char*>(std::malloc(bufsize));
 246    if (buf == nullptr) {
 247      goto exit;
 248    }
 249    p = buf;
 250
 251    if (!skip_sign) {
 252      if (sign == 1) {
 253        *p++ = '-';
 254      } else if (always_add_sign) {
 255        *p++ = '+';
 256      }
 257    }
 258    if (digits[0] == 'i' || digits[0] == 'I') {
 259      std::strncpy(p, float_strings[kOfsInf], 3);
 260      p += 3;
 261
 262      if (type) {
 263        *type = FormatResultKind::kInfinite;
 264      }
 265    } else if (digits[0] == 'n' || digits[0] == 'N') {
 266      std::strncpy(p, float_strings[kOfsNan], 3);
 267      p += 3;
 268
 269      if (type) {
 270        *type = FormatResultKind::kNan;
 271      }
 272    } else {
 273      // shouldn't get here: Gay's code should always return
 274      // something starting with a digit, an 'I',  or 'N'.
 275      UNIMPLEMENTED("should always return digit, I or N");
 276    }
 277    goto exit;
 278  }
 279
 280  // The result must be finite (not inf or nan).
 281  if (type) {
 282    *type = FormatResultKind::kFinite;
 283  }
 284
 285  // We got digits back, format them.  We may need to pad 'digits'
 286  // either on the left or right (or both) with extra zeros, so in
 287  // general the resulting string has the form
 288
 289  //   [<sign>]<zeros><digits><zeros>[<exponent>]
 290
 291  // where either of the <zeros> pieces could be empty, and there's a
 292  // decimal point that could appear either in <digits> or in the
 293  // leading or trailing <zeros>.
 294
 295  // Imagine an infinite 'virtual' string vdigits, consisting of the
 296  // string 'digits' (starting at index 0) padded on both the left and
 297  // right with infinite strings of zeros.  We want to output a slice
 298
 299  //   vdigits[vdigits_start : vdigits_end]
 300
 301  // of this virtual string.  Thus if vdigits_start < 0 then we'll end
 302  // up producing some leading zeros; if vdigits_end > digits_len there
 303  // will be trailing zeros in the output.  The next section of code
 304  // determines whether to use an exponent or not, figures out the
 305  // position 'decpt' of the decimal point, and computes 'vdigits_start'
 306  // and 'vdigits_end'.
 307  switch (format_code) {
 308    case 'e':
 309      use_exp = true;
 310      vdigits_end = precision;
 311      break;
 312    case 'f':
 313      vdigits_end = decpt + precision;
 314      break;
 315    case 'g':
 316      if (decpt <= -4 ||
 317          decpt > (add_dot_0_if_integer ? precision - 1 : precision)) {
 318        use_exp = true;
 319      }
 320      if (use_alt_formatting) {
 321        vdigits_end = precision;
 322      }
 323      break;
 324    case 'r':
 325      // convert to exponential format at 1e16.  We used to convert
 326      // at 1e17, but that gives odd-looking results for some values
 327      // when a 16-digit 'shortest' repr is padded with bogus zeros.
 328      // For example, repr(2e16+8) would give 20000000000000010.0;
 329      // the true value is 20000000000000008.0.
 330      if (decpt <= -4 || decpt > 16) {
 331        use_exp = true;
 332      }
 333      break;
 334    default:
 335      goto exit;
 336  }
 337
 338  // if using an exponent, reset decimal point position to 1 and adjust
 339  // exponent accordingly.
 340  {
 341    int exp = 0;
 342    if (use_exp) {
 343      exp = static_cast<int>(decpt) - 1;
 344      decpt = 1;
 345    }
 346    // ensure vdigits_start < decpt <= vdigits_end, or vdigits_start <
 347    // decpt < vdigits_end if add_dot_0_if_integer and no exponent.
 348    word vdigits_start = decpt <= 0 ? decpt - 1 : 0;
 349    if (!use_exp && add_dot_0_if_integer) {
 350      vdigits_end = vdigits_end > decpt ? vdigits_end : decpt + 1;
 351    } else {
 352      vdigits_end = vdigits_end > decpt ? vdigits_end : decpt;
 353    }
 354
 355    // double check inequalities.
 356    DCHECK(vdigits_start <= 0 && 0 <= digits_len && digits_len <= vdigits_end,
 357           "failed consistency check");
 358    // decimal point should be in (vdigits_start, vdigits_end]
 359    DCHECK(vdigits_start < decpt && decpt <= vdigits_end,
 360           "failed decimal point check");
 361
 362    // Compute an upper bound how much memory we need. This might be a few
 363    // chars too long, but no big deal.
 364    bufsize =
 365        // sign, decimal point and trailing 0 byte
 366        3 - skip_sign +
 367
 368        // total digit count (including zero padding on both sides)
 369        (vdigits_end - vdigits_start) +
 370
 371        // exponent "e+100", max 3 numerical digits
 372        (use_exp ? 5 : 0);
 373
 374    // Now allocate the memory and initialize p to point to the start of it.
 375    buf = static_cast<char*>(std::malloc(bufsize));
 376    if (buf == nullptr) {
 377      goto exit;
 378    }
 379    p = buf;
 380
 381    // Add a negative sign if negative, and a plus sign if non-negative
 382    // and always_add_sign is true.
 383    if (!skip_sign) {
 384      if (sign == 1) {
 385        *p++ = '-';
 386      } else if (always_add_sign) {
 387        *p++ = '+';
 388      }
 389    }
 390
 391    // note that exactly one of the three 'if' conditions is true,
 392    // so we include exactly one decimal point.
 393    // Zero padding on left of digit string
 394    if (decpt <= 0) {
 395      std::memset(p, '0', decpt - vdigits_start);
 396      p += decpt - vdigits_start;
 397      *p++ = '.';
 398      std::memset(p, '0', 0 - decpt);
 399      p += 0 - decpt;
 400    } else {
 401      std::memset(p, '0', 0 - vdigits_start);
 402      p += 0 - vdigits_start;
 403    }
 404
 405    // Digits, with included decimal point
 406    if (0 < decpt && decpt <= digits_len) {
 407      std::strncpy(p, digits, decpt - 0);
 408      p += decpt - 0;
 409      *p++ = '.';
 410      std::strncpy(p, digits + decpt, digits_len - decpt);
 411      p += digits_len - decpt;
 412    } else {
 413      std::strncpy(p, digits, digits_len);
 414      p += digits_len;
 415    }
 416
 417    // And zeros on the right
 418    if (digits_len < decpt) {
 419      std::memset(p, '0', decpt - digits_len);
 420      p += decpt - digits_len;
 421      *p++ = '.';
 422      std::memset(p, '0', vdigits_end - decpt);
 423      p += vdigits_end - decpt;
 424    } else {
 425      std::memset(p, '0', vdigits_end - digits_len);
 426      p += vdigits_end - digits_len;
 427    }
 428
 429    // Delete a trailing decimal pt unless using alternative formatting.
 430    if (p[-1] == '.' && !use_alt_formatting) {
 431      p--;
 432    }
 433
 434    // Now that we've done zero padding, add an exponent if needed.
 435    if (use_exp) {
 436      *p++ = float_strings[kOfsE][0];
 437      int exp_len = std::sprintf(p, "%+.02d", exp);
 438      p += exp_len;
 439    }
 440  }
 441
 442exit:
 443  if (buf) {
 444    *p = '\0';
 445    // It's too late if this fails, as we've already stepped on
 446    // memory that isn't ours. But it's an okay debugging test.
 447    DCHECK(p - buf < bufsize, "buffer overflow");
 448  }
 449  if (digits) {
 450    freedtoa(digits);
 451  }
 452
 453  return buf;
 454}
 455
 456char* doubleToString(double value, char format_code, int precision,
 457                     bool skip_sign, bool add_dot_0, bool use_alt_formatting,
 458                     FormatResultKind* type) {
 459  const char* const* float_strings = kLcFloatStrings;
 460
 461  // Validate format_code, and map upper and lower case. Compute the
 462  // mode and make any adjustments as needed.
 463  int mode;
 464  switch (format_code) {
 465    // exponent
 466    case 'E':
 467      float_strings = kUcFloatStrings;
 468      format_code = 'e';
 469      FALLTHROUGH;
 470    case 'e':
 471      mode = 2;
 472      precision++;
 473      break;
 474
 475    // fixed
 476    case 'F':
 477      float_strings = kUcFloatStrings;
 478      format_code = 'f';
 479      FALLTHROUGH;
 480    case 'f':
 481      mode = 3;
 482      break;
 483
 484    // general
 485    case 'G':
 486      float_strings = kUcFloatStrings;
 487      format_code = 'g';
 488      FALLTHROUGH;
 489    case 'g':
 490      mode = 2;
 491      // precision 0 makes no sense for 'g' format; interpret as 1
 492      if (precision == 0) {
 493        precision = 1;
 494      }
 495      break;
 496
 497    // repr format
 498    case 'r':
 499      mode = 0;
 500      // Supplied precision is unused, must be 0.
 501      if (precision != 0) {
 502        return nullptr;
 503      }
 504      break;
 505
 506    default:
 507      return nullptr;
 508  }
 509
 510  return formatFloatShort(value, format_code, mode, precision, skip_sign,
 511                          /*always_add_sign=*/false, add_dot_0,
 512                          use_alt_formatting, float_strings, type);
 513}
 514
 515double doubleRoundDecimals(double value, int ndigits) {
 516  // Print value to a string with `ndigits` decimal digits.
 517  char* buf_end;
 518  int decpt;
 519  int sign;
 520  char* buf = dtoa(value, 3, ndigits, &decpt, &sign, &buf_end);
 521  CHECK(buf != nullptr, "out of memory");
 522
 523  char shortbuf[100];
 524  unique_c_ptr<char> longbuf;
 525  size_t buflen = buf_end - buf;
 526  char* number_buf;
 527  size_t number_buf_len = buflen + 8;
 528  if (number_buf_len <= sizeof(shortbuf)) {
 529    number_buf = shortbuf;
 530  } else {
 531    longbuf.reset(static_cast<char*>(std::malloc(number_buf_len)));
 532    number_buf = longbuf.get();
 533  }
 534  std::snprintf(number_buf, number_buf_len, "%c0%se%d", (sign ? '-' : '+'), buf,
 535                decpt - static_cast<int>(buflen));
 536  freedtoa(buf);
 537
 538  // Convert resulting string back to a double.
 539  return strtod(number_buf, nullptr);
 540}
 541
 542/****************************************************************
 543 *
 544 * The author of this software is David M. Gay.
 545 *
 546 * Copyright (c) 1991, 2000, 2001 by Lucent Technologies.
 547 *
 548 * Permission to use, copy, modify, and distribute this software for any
 549 * purpose without fee is hereby granted, provided that this entire notice
 550 * is included in all copies of any software which is or includes a copy
 551 * or modification of this software and in all copies of the supporting
 552 * documentation for such software.
 553 *
 554 * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
 555 * WARRANTY.  IN PARTICULAR, NEITHER THE AUTHOR NOR LUCENT MAKES ANY
 556 * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
 557 * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
 558 *
 559 ***************************************************************/
 560
 561// This is dtoa.c by David M. Gay, downloaded from
 562// http://www.netlib.org/fp/dtoa.c on April 15, 2009 and modified for
 563// inclusion into the Python core by Mark E. T. Dickinson and Eric V. Smith.
 564//
 565// Please remember to check http://www.netlib.org/fp regularly (and especially
 566// before any Python release) for bugfixes and updates.
 567//
 568// The major modifications from Gay's original code are as follows:
 569//
 570//  0. The original code has been specialized to Python's needs by removing
 571//     many of the #ifdef'd sections.  In particular, code to support VAX and
 572//     IBM floating-point formats, hex NaNs, hex floats, locale-aware
 573//     treatment of the decimal point, and setting of the inexact flag have
 574//     been removed.
 575//
 576//  1. We use PyMem_Malloc and PyMem_Free in place of malloc and free.
 577//
 578//  2. The public functions strtod, dtoa and freedtoa all now have
 579//     a _Py_dg_ prefix.
 580//
 581//  3. Instead of assuming that PyMem_Malloc always succeeds, we thread
 582//     PyMem_Malloc failures through the code.  The functions
 583//
 584//       Balloc, multadd, s2b, i2b, mult, pow5mult, lshift, diff, d2b
 585//
 586//     of return type *Bigint all return nullptr to indicate a malloc failure.
 587//     Similarly, rv_alloc and nrv_alloc (return type char *) return nullptr on
 588//     failure.  bigcomp now has return type int (it used to be void) and
 589//     returns -1 on failure and 0 otherwise.  dtoa returns nullptr
 590//     on failure.  strtod indicates failure due to malloc failure
 591//     by returning -1.0, setting errno=ENOMEM and *se to s00.
 592//
 593//  4. The static variable dtoa_result has been removed.  Callers of
 594//     dtoa are expected to call freedtoa to free
 595//     the memory allocated by dtoa.
 596//
 597//  5. The code has been reformatted to better fit with Python's
 598//     C style guide (PEP 7).
 599//
 600//  6. A bug in the memory allocation has been fixed: to avoid FREEing memory
 601//     that hasn't been std::malloc'ed, private_mem should only be used when
 602//     k <= kKmax.
 603//
 604//  7. strtod has been modified so that it doesn't accept strings with
 605//     leading whitespace.
 606//
 607
 608// Please send bug reports for the original dtoa.c code to David M. Gay (dmg
 609// at acm dot org, with " at " changed at "@" and " dot " changed to ".").
 610// Please report bugs for this modified version using the Python issue tracker
 611// (http://bugs.python.org).
 612
 613// On a machine with IEEE extended-precision registers, it is
 614// necessary to specify double-precision (53-bit) rounding precision
 615// before invoking strtod or dtoa.  If the machine uses (the equivalent
 616// of) Intel 80x87 arithmetic, the call
 617//      _control87(PC_53, MCW_PC);
 618// does this with many compilers.  Whether this or another call is
 619// appropriate depends on the compiler; for this to work, it may be
 620// necessary to #include "float.h" or another system-dependent header
 621// file.
 622
 623// strtod for IEEE-, VAX-, and IBM-arithmetic machines.
 624//
 625// This strtod returns a nearest machine number to the input decimal
 626// string (or sets errno to ERANGE).  With IEEE arithmetic, ties are
 627// broken by the IEEE round-even rule.  Otherwise ties are broken by
 628// biased rounding (add half and chop).
 629//
 630// Inspired loosely by William D. Clinger's paper "How to Read Floating
 631// Point Numbers Accurately" [Proc. ACM SIGPLAN '90, pp. 92-101].
 632//
 633// Modifications:
 634//
 635//      1. We only require IEEE, IBM, or VAX double-precision
 636//              arithmetic (not IEEE double-extended).
 637//      2. We get by with floating-point arithmetic in a case that
 638//              Clinger missed -- when we're computing d * 10^n
 639//              for a small integer d and the integer n is not too
 640//              much larger than 22 (the maximum integer k for which
 641//              we can represent 10^k exactly), we may be able to
 642//              compute (d*10^k) * 10^(e-k) with just one roundoff.
 643//      3. Rather than a bit-at-a-time adjustment of the binary
 644//              result in the hard case, we use floating-point
 645//              arithmetic to determine the adjustment to within
 646//              one bit; only in really hard cases do we need to
 647//              compute a second residual.
 648//      4. Because of 3., we don't need a large table of powers of 10
 649//              for ten-to-e (just some small tables, e.g. of 10^k
 650//              for 0 <= k <= 22).
 651
 652static const int kPrivateMemBytes = 2304;
 653static const int kPrivateMemArraySize =
 654    ((kPrivateMemBytes + sizeof(double) - 1) / sizeof(double));
 655static double private_mem[kPrivateMemArraySize];
 656static double* pmem_next = private_mem;
 657
 658typedef union {
 659  double d;
 660  uint32_t L[2];
 661} U;
 662
 663#define word0(x) (x)->L[1]
 664#define word1(x) (x)->L[0]
 665#define dval(x) (x)->d
 666
 667static const int kStrtodDiglim = 40;
 668
 669// maximum permitted exponent value for strtod; exponents larger than
 670// kMaxAbsExp in absolute value get truncated to +-kMaxAbsExp.  kMaxAbsExp
 671// should fit into an int.
 672static const int kMaxAbsExp = 1100000000;
 673
 674// Bound on length of pieces of input strings in strtod; specifically,
 675// this is used to bound the total number of digits ignoring leading zeros and
 676// the number of digits that follow the decimal point.  Ideally, kMaxDigits
 677// should satisfy kMaxDigits + 400 < kMaxAbsExp; that ensures that the
 678// exponent clipping in strtod can't affect the value of the output.
 679static const unsigned kMaxDigits = 1000000000U;
 680
 681// kTenPmax = floor(P*log(2)/log(5))
 682// kBletch = (highest power of 2 < DBL_MAX_10_EXP) / 16
 683// kQuickMax = floor((P-1)*log(FLT_RADIX)/log(10) - 1)
 684// kIntMax = floor(P*log(FLT_RADIX)/log(10) - 1)
 685
 686static_assert(sizeof(double) == 8, "Expected IEEE 754 binary64");
 687static_assert(DBL_MANT_DIG == 53, "Expected IEEE 754 binary64");
 688static const int kExpShift = 20;
 689static const int kExpShift1 = 20;
 690static const int kExpMsk1 = 0x100000;
 691static const uint32_t kExpMask = 0x7ff00000;
 692static const int kP = 53;
 693static const int kBias = 1023;
 694static const int kEtiny = -1074;  // smallest denormal is 2**kEtiny
 695static const uint32_t kExp1 = 0x3ff00000;
 696static const uint32_t kExp11 = 0x3ff00000;
 697static const int kEbits = 11;
 698static const uint32_t kFracMask = 0xfffff;
 699static const uint32_t kFracMask1 = 0xfffff;
 700static const int kTenPmax = 22;
 701static const int kBletch = 0x10;
 702static const uint32_t kBndryMask = 0xfffff;
 703static const uint32_t kBndryMask1 = 0xfffff;
 704static const uint32_t kSignBit = 0x80000000;
 705static const int kLog2P = 1;
 706static const int kTiny1 = 1;
 707static const int kQuickMax = 14;
 708static const int kIntMax = 14;
 709
 710static const uint32_t kBig0 =
 711    (kFracMask1 | kExpMsk1 * (DBL_MAX_EXP + kBias - 1));
 712static const uint32_t kBig1 = 0xffffffff;
 713
 714// struct BCinfo is used to pass information from strtod to bigcomp
 715
 716struct BCinfo {
 717  int e0, nd, nd0, scale;
 718};
 719
 720static const uint32_t kFfffffff = 0xffffffffU;
 721
 722static const int kKmax = 7;
 723
 724// struct Bigint is used to represent arbitrary-precision integers.  These
 725// integers are stored in sign-magnitude format, with the magnitude stored as
 726// an array of base 2**32 digits.  Bigints are always normalized: if x is a
 727// Bigint then x->wds >= 1, and either x->wds == 1 or x[wds-1] is nonzero.
 728//
 729// The Bigint fields are as follows:
 730//
 731//   - next is a header used by Balloc and Bfree to keep track of lists
 732//       of freed Bigints;  it's also used for the linked list of
 733//       powers of 5 of the form 5**2**i used by pow5mult.
 734//   - k indicates which pool this Bigint was allocated from
 735//   - maxwds is the maximum number of words space was allocated for
 736//     (usually maxwds == 2**k)
 737//   - sign is 1 for negative Bigints, 0 for positive.  The sign is unused
 738//     (ignored on inputs, set to 0 on outputs) in almost all operations
 739//     involving Bigints: a notable exception is the diff function, which
 740//     ignores signs on inputs but sets the sign of the output correctly.
 741//   - wds is the actual number of significant words
 742//   - x contains the vector of words (digits) for this Bigint, from least
 743//     significant (x[0]) to most significant (x[wds-1]).
 744
 745struct Bigint {
 746  struct Bigint* next;
 747  int k, maxwds, sign, wds;
 748  uint32_t x[1];
 749};
 750
 751// Memory management: memory is allocated from, and returned to, kKmax+1 pools
 752// of memory, where pool k (0 <= k <= kKmax) is for Bigints b with b->maxwds ==
 753// 1 << k.  These pools are maintained as linked lists, with freelist[k]
 754// pointing to the head of the list for pool k.
 755//
 756// On allocation, if there's no free slot in the appropriate pool, std::malloc
 757// is called to get more memory.  This memory is not returned to the system
 758// until Python quits.  There's also a private memory pool that's allocated from
 759// in preference to using std::malloc.
 760//
 761// For Bigints with more than (1 << kKmax) digits (which implies at least 1233
 762// decimal digits), memory is directly allocated using std::malloc, and freed
 763// using std::free.
 764//
 765// XXX: it would be easy to bypass this memory-management system and
 766// translate each call to Balloc into a call to PyMem_Malloc, and each
 767// Bfree to PyMem_Free.  Investigate whether this has any significant
 768// performance on impact.
 769
 770static Bigint* freelist[kKmax + 1];
 771
 772// Allocate space for a Bigint with up to 1<<k digits.
 773static Bigint* Balloc(int k) {
 774  Bigint* rv;
 775  if (k <= kKmax && (rv = freelist[k])) {
 776    freelist[k] = rv->next;
 777  } else {
 778    int x = 1 << k;
 779    unsigned len =
 780        (sizeof(Bigint) + (x - 1) * sizeof(uint32_t) + sizeof(double) - 1) /
 781        sizeof(double);
 782    if (k <= kKmax && pmem_next - private_mem + len <=
 783                          static_cast<long>(kPrivateMemArraySize)) {
 784      rv = reinterpret_cast<Bigint*>(pmem_next);
 785      pmem_next += len;
 786    } else {
 787      rv = static_cast<Bigint*>(std::malloc(len * sizeof(double)));
 788      if (rv == nullptr) {
 789        return nullptr;
 790      }
 791    }
 792    rv->k = k;
 793    rv->maxwds = x;
 794  }
 795  rv->sign = rv->wds = 0;
 796  return rv;
 797}
 798
 799// Free a Bigint allocated with Balloc.
 800static void Bfree(Bigint* v) {
 801  if (v == nullptr) {
 802    return;
 803  }
 804
 805  if (v->k > kKmax) {
 806    std::free(v);
 807  } else {
 808    v->next = freelist[v->k];
 809    freelist[v->k] = v;
 810  }
 811}
 812
 813static inline void Bcopy(Bigint* dest, const Bigint* src) {
 814  std::memcpy(&dest->sign, &src->sign,
 815              src->wds * sizeof(int32_t) + 2 * sizeof(int));
 816}
 817
 818// Multiply a Bigint b by m and add a.  Either modifies b in place and returns
 819// a pointer to the modified b, or Bfrees b and returns a pointer to a copy.
 820// On failure, return nullptr.  In this case, b will have been already freed.
 821static Bigint* multadd(Bigint* b, int m, int a)  // multiply by m and add a
 822{
 823  int wds = b->wds;
 824  uint32_t* x = b->x;
 825  int i = 0;
 826  uint64_t carry = a;
 827  do {
 828    uint64_t y = *x * static_cast<uint64_t>(m) + carry;
 829    carry = y >> 32;
 830    *x++ = static_cast<uint32_t>(y & kFfffffff);
 831  } while (++i < wds);
 832
 833  if (carry) {
 834    if (wds >= b->maxwds) {
 835      Bigint* b1 = Balloc(b->k + 1);
 836      if (b1 == nullptr) {
 837        Bfree(b);
 838        return nullptr;
 839      }
 840      Bcopy(b1, b);
 841      Bfree(b);
 842      b = b1;
 843    }
 844    b->x[wds++] = static_cast<uint32_t>(carry);
 845    b->wds = wds;
 846  }
 847  return b;
 848}
 849
 850// convert a string s containing nd decimal digits (possibly containing a
 851// decimal separator at position nd0, which is ignored) to a Bigint.  This
 852// function carries on where the parsing code in strtod leaves off: on
 853// entry, y9 contains the result of converting the first 9 digits.  Returns
 854// nullptr on failure.
 855static Bigint* s2b(const char* s, int nd0, int nd, uint32_t y9) {
 856  int32_t x = (nd + 8) / 9;
 857  int k = 0;
 858  for (int32_t y = 1; x > y; y <<= 1, k++) {
 859  }
 860  Bigint* b = Balloc(k);
 861  if (b == nullptr) {
 862    return nullptr;
 863  }
 864  b->x[0] = y9;
 865  b->wds = 1;
 866
 867  if (nd <= 9) {
 868    return b;
 869  }
 870
 871  s += 9;
 872  int i;
 873  for (i = 9; i < nd0; i++) {
 874    b = multadd(b, 10, *s++ - '0');
 875    if (b == nullptr) {
 876      return nullptr;
 877    }
 878  }
 879  s++;
 880  for (; i < nd; i++) {
 881    b = multadd(b, 10, *s++ - '0');
 882    if (b == nullptr) {
 883      return nullptr;
 884    }
 885  }
 886  return b;
 887}
 888
 889// count leading 0 bits in the 32-bit integer x.
 890static int hi0bits(uint32_t x) {
 891  int k = 0;
 892
 893  if (!(x & 0xffff0000)) {
 894    k = 16;
 895    x <<= 16;
 896  }
 897  if (!(x & 0xff000000)) {
 898    k += 8;
 899    x <<= 8;
 900  }
 901  if (!(x & 0xf0000000)) {
 902    k += 4;
 903    x <<= 4;
 904  }
 905  if (!(x & 0xc0000000)) {
 906    k += 2;
 907    x <<= 2;
 908  }
 909  if (!(x & 0x80000000)) {
 910    k++;
 911    if (!(x & 0x40000000)) {
 912      return 32;
 913    }
 914  }
 915  return k;
 916}
 917
 918// count trailing 0 bits in the 32-bit integer y, and shift y right by that
 919// number of bits.
 920static int lo0bits(uint32_t* y) {
 921  uint32_t x = *y;
 922
 923  if (x & 7) {
 924    if (x & 1) {
 925      return 0;
 926    }
 927    if (x & 2) {
 928      *y = x >> 1;
 929      return 1;
 930    }
 931    *y = x >> 2;
 932    return 2;
 933  }
 934  int k = 0;
 935  if (!(x & 0xffff)) {
 936    k = 16;
 937    x >>= 16;
 938  }
 939  if (!(x & 0xff)) {
 940    k += 8;
 941    x >>= 8;
 942  }
 943  if (!(x & 0xf)) {
 944    k += 4;
 945    x >>= 4;
 946  }
 947  if (!(x & 0x3)) {
 948    k += 2;
 949    x >>= 2;
 950  }
 951  if (!(x & 1)) {
 952    k++;
 953    x >>= 1;
 954    if (!x) {
 955      return 32;
 956    }
 957  }
 958  *y = x;
 959  return k;
 960}
 961
 962// convert a small nonnegative integer to a Bigint
 963static Bigint* i2b(int i) {
 964  Bigint* b = Balloc(1);
 965  if (b == nullptr) {
 966    return nullptr;
 967  }
 968  b->x[0] = i;
 969  b->wds = 1;
 970  return b;
 971}
 972
 973// multiply two Bigints.  Returns a new Bigint, or nullptr on failure.  Ignores
 974// the signs of a and b.
 975static Bigint* mult(Bigint* a, Bigint* b) {
 976  Bigint* c;
 977  if ((!a->x[0] && a->wds == 1) || (!b->x[0] && b->wds == 1)) {
 978    c = Balloc(0);
 979    if (c == nullptr) {
 980      return nullptr;
 981    }
 982    c->wds = 1;
 983    c->x[0] = 0;
 984    return c;
 985  }
 986
 987  if (a->wds < b->wds) {
 988    c = a;
 989    a = b;
 990    b = c;
 991  }
 992  int k = a->k;
 993  int wa = a->wds;
 994  int wb = b->wds;
 995  int wc = wa + wb;
 996  if (wc > a->maxwds) {
 997    k++;
 998  }
 999  c = Balloc(k);
1000  if (c == nullptr) {
1001    return nullptr;
1002  }
1003  uint32_t* x;
1004  uint32_t* xa;
1005  for (x = c->x, xa = x + wc; x < xa; x++) {
1006    *x = 0;
1007  }
1008  xa = a->x;
1009  uint32_t* xae = xa + wa;
1010  uint32_t* xb = b->x;
1011  uint32_t* xbe = xb + wb;
1012  uint32_t* xc0 = c->x;
1013  uint32_t* xc;
1014  for (; xb < xbe; xc0++) {
1015    uint32_t y = *xb++;
1016    if (y) {
1017      x = xa;
1018      xc = xc0;
1019      uint64_t carry = 0;
1020      do {
1021        uint64_t z = *x++ * static_cast<uint64_t>(y) + *xc + carry;
1022        carry = z >> 32;
1023        *xc++ = static_cast<uint32_t>(z & kFfffffff);
1024      } while (x < xae);
1025      *xc = static_cast<uint32_t>(carry);
1026    }
1027  }
1028  for (xc0 = c->x, xc = xc0 + wc; wc > 0 && !*--xc; --wc) {
1029  }
1030  c->wds = wc;
1031  return c;
1032}
1033
1034// p5s is a linked list of powers of 5 of the form 5**(2**i), i >= 2
1035static Bigint* p5s;
1036
1037// multiply the Bigint b by 5**k.  Returns a pointer to the result, or nullptr
1038// on failure; if the returned pointer is distinct from b then the original
1039// Bigint b will have been Bfree'd.   Ignores the sign of b.
1040static Bigint* pow5mult(Bigint* b, int k) {
1041  static const int p05[3] = {5, 25, 125};
1042
1043  int i = k & 3;
1044  if (i) {
1045    b = multadd(b, p05[i - 1], 0);
1046    if (b == nullptr) {
1047      return nullptr;
1048    }
1049  }
1050
1051  if (!(k >>= 2)) {
1052    return b;
1053  }
1054  Bigint* p5 = p5s;
1055  if (!p5) {
1056    // first time
1057    p5 = i2b(625);
1058    if (p5 == nullptr) {
1059      Bfree(b);
1060      return nullptr;
1061    }
1062    p5s = p5;
1063    p5->next = nullptr;
1064  }
1065  for (;;) {
1066    if (k & 1) {
1067      Bigint* b1 = mult(b, p5);
1068      Bfree(b);
1069      b = b1;
1070      if (b == nullptr) {
1071        return nullptr;
1072      }
1073    }
1074    if (!(k >>= 1)) {
1075      break;
1076    }
1077    Bigint* p51 = p5->next;
1078    if (!p51) {
1079      p51 = mult(p5, p5);
1080      if (p51 == nullptr) {
1081        Bfree(b);
1082        return nullptr;
1083      }
1084      p51->next = nullptr;
1085      p5->next = p51;
1086    }
1087    p5 = p51;
1088  }
1089  return b;
1090}
1091
1092// shift a Bigint b left by k bits.  Return a pointer to the shifted result,
1093// or nullptr on failure.  If the returned pointer is distinct from b then the
1094// original b will have been Bfree'd.   Ignores the sign of b.
1095static Bigint* lshift(Bigint* b, int k) {
1096  if (!k || (!b->x[0] && b->wds == 1)) {
1097    return b;
1098  }
1099
1100  int n = k >> 5;
1101  int k1 = b->k;
1102  int n1 = n + b->wds + 1;
1103  for (int i = b->maxwds; n1 > i; i <<= 1) {
1104    k1++;
1105  }
1106  Bigint* b1 = Balloc(k1);
1107  if (b1 == nullptr) {
1108    Bfree(b);
1109    return nullptr;
1110  }
1111  uint32_t* x1 = b1->x;
1112  for (int i = 0; i < n; i++) {
1113    *x1++ = 0;
1114  }
1115  uint32_t* x = b->x;
1116  uint32_t* xe = x + b->wds;
1117  if (k &= 0x1f) {
1118    k1 = 32 - k;
1119    uint32_t z = 0;
1120    do {
1121      *x1++ = *x << k | z;
1122      z = *x++ >> k1;
1123    } while (x < xe);
1124    if ((*x1 = z)) {
1125      ++n1;
1126    }
1127  } else {
1128    do {
1129      *x1++ = *x++;
1130    } while (x < xe);
1131  }
1132  b1->wds = n1 - 1;
1133  Bfree(b);
1134  return b1;
1135}
1136
1137// Do a three-way compare of a and b, returning -1 if a < b, 0 if a == b and
1138// 1 if a > b.  Ignores signs of a and b.
1139static int cmp(Bigint* a, Bigint* b) {
1140  uint32_t *xa, *xa0, *xb, *xb0;
1141  int i, j;
1142
1143  i = a->wds;
1144  j = b->wds;
1145  DCHECK(i <= 1 || a->x[i - 1], "cmp called with a->x[a->wds-1] == 0");
1146  DCHECK(j <= 1 || b->x[j - 1], "cmp called with b->x[b->wds-1] == 0");
1147  if (i -= j) {
1148    return i;
1149  }
1150  xa0 = a->x;
1151  xa = xa0 + j;
1152  xb0 = b->x;
1153  xb = xb0 + j;
1154  for (;;) {
1155    if (*--xa != *--xb) {
1156      return *xa < *xb ? -1 : 1;
1157    }
1158    if (xa <= xa0) {
1159      break;
1160    }
1161  }
1162  return 0;
1163}
1164
1165// Take the difference of Bigints a and b, returning a new Bigint.  Returns
1166// nullptr on failure.  The signs of a and b are ignored, but the sign of the
1167// result is set appropriately.
1168static Bigint* diff(Bigint* a, Bigint* b) {
1169  Bigint* c;
1170  int i, wa, wb;
1171  uint32_t *xa, *xae, *xb, *xbe, *xc;
1172  uint64_t borrow, y;
1173
1174  i = cmp(a, b);
1175  if (!i) {
1176    c = Balloc(0);
1177    if (c == nullptr) {
1178      return nullptr;
1179    }
1180    c->wds = 1;
1181    c->x[0] = 0;
1182    return c;
1183  }
1184  if (i < 0) {
1185    c = a;
1186    a = b;
1187    b = c;
1188    i = 1;
1189  } else {
1190    i = 0;
1191  }
1192  c = Balloc(a->k);
1193  if (c == nullptr) {
1194    return nullptr;
1195  }
1196  c->sign = i;
1197  wa = a->wds;
1198  xa = a->x;
1199  xae = xa + wa;
1200  wb = b->wds;
1201  xb = b->x;
1202  xbe = xb + wb;
1203  xc = c->x;
1204  borrow = 0;
1205  do {
1206    y = static_cast<uint64_t>(*xa++) - *xb++ - borrow;
1207    borrow = y >> 32 & uint32_t{1};
1208    *xc++ = static_cast<uint32_t>(y & kFfffffff);
1209  } while (xb < xbe);
1210  while (xa < xae) {
1211    y = *xa++ - borrow;
1212    borrow = y >> 32 & uint32_t{1};
1213    *xc++ = static_cast<uint32_t>(y & kFfffffff);
1214  }
1215  while (!*--xc) {
1216    wa--;
1217  }
1218  c->wds = wa;
1219  return c;
1220}
1221
1222// Given a positive normal double x, return the difference between x and the
1223// next double up.  Doesn't give correct results for subnormals.
1224static double ulp(U* x) {
1225  int32_t big_l = (word0(x) & kExpMask) - (kP - 1) * kExpMsk1;
1226  U u;
1227  word0(&u) = big_l;
1228  word1(&u) = 0;
1229  return dval(&u);
1230}
1231
1232// Convert a Bigint to a double plus an exponent
1233static double b2d(Bigint* a, int* e) {
1234  uint32_t* xa0 = a->x;
1235  uint32_t* xa = xa0 + a->wds;
1236  uint32_t y = *--xa;
1237  DCHECK(y != 0, "zero y in b2d");
1238  int k = hi0bits(y);
1239  *e = 32 - k;
1240  U d;
1241  if (k < kEbits) {
1242    word0(&d) = kExp1 | y >> (kEbits - k);
1243    uint32_t w = xa > xa0 ? *--xa : 0;
1244    word1(&d) = y << ((32 - kEbits) + k) | w >> (kEbits - k);
1245    return dval(&d);
1246  }
1247  uint32_t z = xa > xa0 ? *--xa : 0;
1248  if (k -= kEbits) {
1249    word0(&d) = kExp1 | y << k | z >> (32 - k);
1250    y = xa > xa0 ? *--xa : 0;
1251    word1(&d) = z << k | y >> (32 - k);
1252  } else {
1253    word0(&d) = kExp1 | y;
1254    word1(&d) = z;
1255  }
1256  return dval(&d);
1257}
1258
1259// Convert a scaled double to a Bigint plus an exponent.  Similar to d2b,
1260// except that it accepts the scale parameter used in strtod (which
1261// should be either 0 or 2*kP), and the normalization for the return value is
1262// different (see below).  On input, d should be finite and nonnegative, and d
1263// / 2**scale should be exactly representable as an IEEE 754 double.
1264//
1265// Returns a Bigint b and an integer e such that
1266//
1267//   dval(d) / 2**scale = b * 2**e.
1268//
1269// Unlike d2b, b is not necessarily odd: b and e are normalized so
1270// that either 2**(kP-1) <= b < 2**kP and e >= kEtiny, or b < 2**kP
1271// and e == kEtiny.  This applies equally to an input of 0.0: in that
1272// case the return values are b = 0 and e = kEtiny.
1273//
1274// The above normalization ensures that for all possible inputs d,
1275// 2**e gives ulp(d/2**scale).
1276//
1277// Returns nullptr on failure.
1278static Bigint* sd2b(U* d, int scale, int* e) {
1279  Bigint* b = Balloc(1);
1280  if (b == nullptr) {
1281    return nullptr;
1282  }
1283
1284  // First construct b and e assuming that scale == 0.
1285  b->wds = 2;
1286  b->x[0] = word1(d);
1287  b->x[1] = word0(d) & kFracMask;
1288  *e = kEtiny - 1 + static_cast<int>((word0(d) & kExpMask) >> kExpShift);
1289  if (*e < kEtiny) {
1290    *e = kEtiny;
1291  } else {
1292    b->x[1] |= kExpMsk1;
1293  }
1294
1295  // Now adjust for scale, provided that b != 0.
1296  if (scale && (b->x[0] || b->x[1])) {
1297    *e -= scale;
1298    if (*e < kEtiny) {
1299      scale = kEtiny - *e;
1300      *e = kEtiny;
1301      // We can't shift more than kP-1 bits without shifting out a 1.
1302      DCHECK(0 < scale && scale <= kP - 1, "unexpected scale");
1303      if (scale >= 32) {
1304        // The bits shifted out should all be zero.
1305        DCHECK(b->x[0] == 0, "unexpected bits");
1306        b->x[0] = b->x[1];
1307        b->x[1] = 0;
1308        scale -= 32;
1309      }
1310      if (scale) {
1311        // The bits shifted out should all be zero.
1312        DCHECK(b->x[0] << (32 - scale) == 0, "unexpected bits");
1313        b->x[0] = (b->x[0] >> scale) | (b->x[1] << (32 - scale));
1314        b->x[1] >>= scale;
1315      }
1316    }
1317  }
1318  // Ensure b is normalized.
1319  if (!b->x[1]) {
1320    b->wds = 1;
1321  }
1322
1323  return b;
1324}
1325
1326// Convert a double to a Bigint plus an exponent.  Return nullptr on failure.
1327//
1328// Given a finite nonzero double d, return an odd Bigint b and exponent *e
1329// such that fabs(d) = b * 2**e.  On return, *bbits gives the number of
1330// significant bits of b; that is, 2**(*bbits-1) <= b < 2**(*bbits).
1331//
1332// If d is zero, then b == 0, *e == -1010, *bbits = 0.
1333static Bigint* d2b(U* d, int* e, int* bits) {
1334  Bigint* b = Balloc(1);
1335  if (b == nullptr) {
1336    return nullptr;
1337  }
1338  uint32_t* x = b->x;
1339
1340  uint32_t z = word0(d) & kFracMask;
1341  word0(d) &= 0x7fffffff;  // clear sign bit, which we ignore
1342  int de = static_cast<int>(word0(d) >> kExpShift);
1343  if (de) {
1344    z |= kExpMsk1;
1345  }
1346  uint32_t y = word1(d);
1347  int i;
1348  int k;
1349  if (y) {
1350    if ((k = lo0bits(&y))) {
1351      x[0] = y | z << (32 - k);
1352      z >>= k;
1353    } else {
1354      x[0] = y;
1355    }
1356    i = b->wds = (x[1] = z) ? 2 : 1;
1357  } else {
1358    k = lo0bits(&z);
1359    x[0] = z;
1360    i = b->wds = 1;
1361    k += 32;
1362  }
1363  if (de) {
1364    *e = de - kBias - (kP - 1) + k;
1365    *bits = kP - k;
1366  } else {
1367    *e = de - kBias - (kP - 1) + 1 + k;
1368    *bits = 32 * i - hi0bits(x[i - 1]);
1369  }
1370  return b;
1371}
1372
1373// Compute the ratio of two Bigints, as a double.  The result may have an
1374// error of up to 2.5 ulps.
1375static double ratio(Bigint* a, Bigint* b) {
1376  U da;
1377  int ka;
1378  dval(&da) = b2d(a, &ka);
1379  U db;
1380  int kb;
1381  dval(&db) = b2d(b, &kb);
1382  int k = ka - kb + 32 * (a->wds - b->wds);
1383  if (k > 0) {
1384    word0(&da) += k * kExpMsk1;
1385  } else {
1386    k = -k;
1387    word0(&db) += k * kExpMsk1;
1388  }
1389  return dval(&da) / dval(&db);
1390}
1391
1392static const double kTens[] = {1e0,  1e1,  1e2,  1e3,  1e4,  1e5,  1e6,  1e7,
1393                               1e8,  1e9,  1e10, 1e11, 1e12, 1e13, 1e14, 1e15,
1394                               1e16, 1e17, 1e18, 1e19, 1e20, 1e21, 1e22};
1395
1396static const double kBigTens[] = {1e16, 1e32, 1e64, 1e128, 1e256};
1397static const double kTinyTens[] = {
1398    1e-16, 1e-32, 1e-64, 1e-128, 9007199254740992. * 9007199254740992.e-256
1399    // = 2^106 * 1e-256
1400};
1401// The factor of 2^53 in kTinyTens[4] helps us avoid setting the underflow
1402// flag unnecessarily.  It leads to a song and dance at the end of strtod.
1403static const int kScaleBit = 0x10;
1404
1405static const int kKmask = 31;
1406
1407static int dshift(Bigint* b, int p2) {
1408  int rv = hi0bits(b->x[b->wds - 1]) - 4;
1409  if (p2 > 0) {
1410    rv -= p2;
1411  }
1412  return rv & kKmask;
1413}
1414
1415// special case of Bigint division.  The quotient is always in the range 0 <=
1416// quotient < 10, and on entry the divisor big_s is normalized so that its top 4
1417// bits (28--31) are zero and bit 27 is set.
1418static int quorem(Bigint* b, Bigint* big_s) {
1419  int n = big_s->wds;
1420  DCHECK(b->wds <= n, "oversize b in quorem");
1421  if (b->wds < n) {
1422    return 0;
1423  }
1424  uint32_t* sx = big_s->x;
1425  uint32_t* sxe = sx + --n;
1426  uint32_t* bx = b->x;
1427  uint32_t* bxe = bx + n;
1428  uint32_t q = *bxe / (*sxe + 1);  // ensure q <= true quotient.
1429  DCHECK(q <= 9, "oversized quotient in quorem");
1430  if (q) {
1431    uint64_t borrow = 0;
1432    uint64_t carry = 0;
1433    do {
1434      uint64_t ys = *sx++ * static_cast<uint64_t>(q) + carry;
1435      carry = ys >> 32;
1436      uint64_t y = *bx - (ys & kFfffffff) - borrow;
1437      borrow = y >> 32 & uint32_t{1};
1438      *bx++ = static_cast<uint32_t>(y & kFfffffff);
1439    } while (sx <= sxe);
1440    if (!*bxe) {
1441      bx = b->x;
1442      while (--bxe > bx && !*bxe) {
1443        --n;
1444      }
1445      b->wds = n;
1446    }
1447  }
1448  if (cmp(b, big_s) >= 0) {
1449    q++;
1450    uint64_t borrow = 0;
1451    uint64_t carry = 0;
1452    bx = b->x;
1453    sx = big_s->x;
1454    do {
1455      uint64_t ys = *sx++ + carry;
1456      carry = ys >> 32;
1457      uint64_t y = *bx - (ys & kFfffffff) - borrow;
1458      borrow = y >> 32 & uint32_t{1};
1459      *bx++ = static_cast<uint32_t>(y & kFfffffff);
1460    } while (sx <= sxe);
1461    bx = b->x;
1462    bxe = bx + n;
1463    if (!*bxe) {
1464      while (--bxe > bx && !*bxe) {
1465        --n;
1466      }
1467      b->wds = n;
1468    }
1469  }
1470  return q;
1471}
1472
1473// sulp(x) is a version of ulp(x) that takes bc.scale into account.
1474//
1475// Assuming that x is finite and nonnegative (positive zero is fine
1476// here) and x / 2^bc.scale is exactly representable as a double,
1477// sulp(x) is equivalent to 2^bc.scale * ulp(x / 2^bc.scale).
1478static double sulp(U* x, BCinfo* bc) {
1479  if (bc->scale &&
1480      2 * kP + 1 > static_cast<int>((word0(x) & kExpMask) >> kExpShift)) {
1481    // rv/2^bc->scale is subnormal
1482    U u;
1483    word0(&u) = (kP + 2) * kExpMsk1;
1484    word1(&u) = 0;
1485    return u.d;
1486  }
1487  DCHECK(word0(x) || word1(x), "should not be zero");  // x != 0.0
1488  return ulp(x);
1489}
1490
1491// The bigcomp function handles some hard cases for strtod, for inputs
1492// with more than kStrtodDiglim digits.  It's called once an initial
1493// estimate for the double corresponding to the input string has
1494// already been obtained by the code in strtod.
1495//
1496// The bigcomp function is only called after strtod has found a
1497// double value rv such that either rv or rv + 1ulp represents the
1498// correctly rounded value corresponding to the original string.  It
1499// determines which of these two values is the correct one by
1500// computing the decimal digits of rv + 0.5ulp and comparing them with
1501// the corresponding digits of s0.
1502//
1503// In the following, write dv for the absolute value of the number represented
1504// by the input string.
1505//
1506// Inputs:
1507//
1508//   s0 points to the first significant digit of the input string.
1509//
1510//   rv is a (possibly scaled) estimate for the closest double value to the
1511//      value represented by the original input to strtod.  If
1512//      bc->scale is nonzero, then rv/2^(bc->scale) is the approximation to
1513//      the input value.
1514//
1515//   bc is a struct containing information gathered during the parsing and
1516//      estimation steps of strtod.  Description of fields follows:
1517//
1518//      bc->e0 gives the exponent of the input value, such that dv = (integer
1519//         given by the bd->nd digits of s0) * 10**e0
1520//
1521//      bc->nd gives the total number of significant digits of s0.  It will
1522//         be at least 1.
1523//
1524//      bc->nd0 gives the number of significant digits of s0 before the
1525//         decimal separator.  If there's no decimal separator, bc->nd0 ==
1526//         bc->nd.
1527//
1528//      bc->scale is the value used to scale rv to avoid doing arithmetic with
1529//         subnormal values.  It's either 0 or 2*kP (=106).
1530//
1531// Outputs:
1532//
1533//   On successful exit, rv/2^(bc->scale) is the closest double to dv.
1534//
1535//   Returns 0 on success, -1 on failure (e.g., due to a failed malloc call).
1536static int bigcomp(U* rv, const char* s0, BCinfo* bc) {
1537  int nd = bc->nd;
1538  int nd0 = bc->nd0;
1539  int p5 = nd + bc->e0;
1540  int p2;
1541  Bigint* b = sd2b(rv, bc->scale, &p2);
1542  if (b == nullptr) {
1543    return -1;
1544  }
1545
1546  // record whether the lsb of rv/2^(bc->scale) is odd:  in the exact halfway
1547  // case, this is used for round to even.
1548  int odd = b->x[0] & 1;
1549
1550  // left shift b by 1 bit and or a 1 into the least significant bit;
1551  // this gives us b * 2**p2 = rv/2^(bc->scale) + 0.5 ulp.
1552  b = lshift(b, 1);
1553  if (b == nullptr) {
1554    return -1;
1555  }
1556  b->x[0] |= 1;
1557  p2--;
1558
1559  p2 -= p5;
1560  Bigint* d = i2b(1);
1561  if (d == nullptr) {
1562    Bfree(b);
1563    return -1;
1564  }
1565  // Arrange for convenient computation of quotients:
1566  // shift left if necessary so divisor has 4 leading 0 bits.
1567  if (p5 > 0) {
1568    d = pow5mult(d, p5);
1569    if (d == nullptr) {
1570      Bfree(b);
1571      return -1;
1572    }
1573  } else if (p5 < 0) {
1574    b = pow5mult(b, -p5);
1575    if (b == nullptr) {
1576      Bfree(d);
1577      return -1;
1578    }
1579  }
1580  int b2;
1581  int d2;
1582  if (p2 > 0) {
1583    b2 = p2;
1584    d2 = 0;
1585  } else {
1586    b2 = 0;
1587    d2 = -p2;
1588  }
1589  int i = dshift(d, d2);
1590  if ((b2 += i) > 0) {
1591    b = lshift(b, b2);
1592    if (b == nullptr) {
1593      Bfree(d);
1594      return -1;
1595    }
1596  }
1597  if ((d2 += i) > 0) {
1598    d = lshift(d, d2);
1599    if (d == nullptr) {
1600      Bfree(b);
1601      return -1;
1602    }
1603  }
1604
1605  // Compare s0 with b/d: set dd to -1, 0, or 1 according as s0 < b/d, s0 ==
1606  // b/d, or s0 > b/d.  Here the digits of s0 are thought of as representing
1607  // a number in the range [0.1, 1).
1608  int dd;
1609  if (cmp(b, d) >= 0) {  // b/d >= 1
1610    dd = -1;
1611  } else {
1612    i = 0;
1613    for (;;) {
1614      b = multadd(b, 10, 0);
1615      if (b == nullptr) {
1616        Bfree(d);
1617        return -1;
1618      }
1619      dd = s0[i < nd0 ? i : i + 1] - '0' - quorem(b, d);
1620      i++;
1621
1622      if (dd) {
1623        break;
1624      }
1625      if (!b->x[0] && b->wds == 1) {
1626        // b/d == 0
1627        dd = i < nd;
1628        break;
1629      }
1630      if (!(i < nd)) {
1631        // b/d != 0, but digits of s0 exhausted
1632        dd = -1;
1633        break;
1634      }
1635    }
1636  }
1637  Bfree(b);
1638  Bfree(d);
1639  if (dd > 0 || (dd == 0 && odd)) {
1640    dval(rv) += sulp(rv, bc);
1641  }
1642  return 0;
1643}
1644
1645static double strtod(const char* s00, char** se) {
1646  int bb2, bb5, bbe, bd2, bd5, bs2, dsign, e1, error;
1647  int i, j, k, nd, nd0, odd;
1648  double aadj, aadj1;
1649  U aadj2, adj, rv0;
1650  uint32_t y, z;
1651  BCinfo bc;
1652  Bigint *bb, *bb1, *bd, *bd0, *bs, *delta;
1653
1654  U rv;
1655  dval(&rv) = 0.;
1656
1657  // Start parsing.
1658  const char* s = s00;
1659  int c = *s;
1660
1661  // Parse optional sign, if present.
1662  bool sign = false;
1663  switch (c) {
1664    case '-':
1665      sign = true;
1666      FALLTHROUGH;
1667    case '+':
1668      c = *++s;
1669  }
1670
1671  // Skip leading zeros: lz is true iff there were leading zeros.
1672  const char* s1 = s;
1673  while (c == '0') {
1674    c = *++s;
1675  }
1676  bool lz = s != s1;
1677
1678  // Point s0 at the first nonzero digit (if any).  fraclen will be the
1679  // number of digits between the decimal point and the end of the
1680  // digit string.  ndigits will be the total number of digits ignoring
1681  // leading zeros.
1682  const char* s0 = s1 = s;
1683  while ('0' <= c && c <= '9') {
1684    c = *++s;
1685  }
1686  size_t ndigits = s - s1;
1687  size_t fraclen = 0;
1688
1689  // Parse decimal point and following digits.
1690  if (c == '.') {
1691    c = *++s;
1692    if (!ndigits) {
1693      s1 = s;
1694      while (c == '0') {
1695        c = *++s;
1696      }
1697      lz = lz || s != s1;
1698      fraclen += (s - s1);
1699      s0 = s;
1700    }
1701    s1 = s;
1702    while ('0' <= c && c <= '9') {
1703      c = *++s;
1704    }
1705    ndigits += s - s1;
1706    fraclen += s - s1;
1707  }
1708
1709  // Now lz is true if and only if there were leading zero digits, and
1710  // ndigits gives the total number of digits ignoring leading zeros.  A
1711  // valid input must have at least one digit.
1712  if (!ndigits && !lz) {
1713    if (se) {
1714      *se = const_cast<char*>(s00);
1715    }
1716    return 0.0;
1717  }
1718
1719  // Range check ndigits and fraclen to make sure that they, and values
1720  // computed with them, can safely fit in an int.
1721  if (ndigits > kMaxDigits || fraclen > kMaxDigits) {
1722    if (se) {
1723      *se = const_cast<char*>(s00);
1724    }
1725    return 0.0;
1726  }
1727  nd = static_cast<int>(ndigits);
1728  nd0 = nd - static_cast<int>(fraclen);
1729
1730  // Parse exponent.
1731  int e = 0;
1732  if (c == 'e' || c == 'E') {
1733    s00 = s;
1734    c = *++s;
1735
1736    // Exponent sign.
1737    bool esign = false;
1738    switch (c) {
1739      case '-':
1740        esign = true;
1741        // fall through
1742      case '+':
1743        c = *++s;
1744    }
1745
1746    // Skip zeros.  lz is true iff there are leading zeros.
1747    s1 = s;
1748    while (c == '0') {
1749      c = *++s;
1750    }
1751    lz = s != s1;
1752
1753    // Get absolute value of the exponent.
1754    s1 = s;
1755    uint32_t abs_exp = 0;
1756    while ('0' <= c && c <= '9') {
1757      abs_exp = 10 * abs_exp + (c - '0');
1758      c = *++s;
1759    }
1760
1761    // abs_exp will be correct modulo 2**32.  But 10**9 < 2**32, so if
1762    // there are at most 9 significant exponent digits then overflow is
1763    // impossible.
1764    if (s - s1 > 9 || abs_exp > kMaxAbsExp) {
1765      e = kMaxAbsExp;
1766    } else {
1767      e = static_cast<int>(abs_exp);
1768    }
1769    if (esign) {
1770      e = -e;
1771    }
1772
1773    // A valid exponent must have at least one digit.
1774    if (s == s1 && !lz) {
1775      s = s00;
1776    }
1777  }
1778
1779  // Adjust exponent to take into account position of the point.
1780  e -= nd - nd0;
1781  if (nd0 <= 0) {
1782    nd0 = nd;
1783  }
1784
1785  // Finished parsing.  Set se to indicate how far we parsed
1786  if (se) {
1787    *se = const_cast<char*>(s);
1788  }
1789
1790  // If all digits were zero, exit with return value +-0.0.  Otherwise,
1791  // strip trailing zeros: scan back until we hit a nonzero digit.
1792  if (!nd) {
1793    goto ret;
1794  }
1795  for (i = nd; i > 0;) {
1796    --i;
1797    if (s0[i < nd0 ? i : i + 1] != '0') {
1798      ++i;
1799      break;
1800    }
1801  }
1802  e += nd - i;
1803  nd = i;
1804  if (nd0 > nd) {
1805    nd0 = nd;
1806  }
1807
1808  // Summary of parsing results.  After parsing, and dealing with zero
1809  // inputs, we have values s0, nd0, nd, e, sign, where:
1810  //
1811  //  - s0 points to the first significant digit of the input string
1812  //
1813  //  - nd is the total number of significant digits (here, and
1814  //    below, 'significant digits' means the set of digits of the
1815  //    significand of the input that remain after ignoring leading
1816  //    and trailing zeros).
1817  //
1818  //  - nd0 indicates the position of the decimal point, if present; it
1819  //    satisfies 1 <= nd0 <= nd.  The nd significant digits are in
1820  //    s0[0:nd0] and s0[nd0+1:nd+1] using the usual Python half-open slice
1821  //    notation.  (If nd0 < nd, then s0[nd0] contains a '.'  character; if
1822  //    nd0 == nd, then s0[nd0] could be any non-digit character.)
1823  //
1824  //  - e is the adjusted exponent: the absolute value of the number
1825  //    represented by the original input string is n * 10**e, where
1826  //    n is the integer represented by the concatenation of
1827  //    s0[0:nd0] and s0[nd0+1:nd+1]
1828  //
1829  //  - sign gives the sign of the input:  1 for negative, 0 for positive
1830  //
1831  //  - the first and last significant digits are nonzero
1832
1833  // put first DBL_DIG+1 digits into integer y and z.
1834  //
1835  //  - y contains the value represented by the first min(9, nd)
1836  //    significant digits
1837  //
1838  //  - if nd > 9, z contains the value represented by significant digits
1839  //    with indices in [9, min(16, nd)).  So y * 10**(min(16, nd) - 9) + z
1840  //    gives the value represented by the first min(16, nd) sig. digits.
1841
1842  bc.e0 = e1 = e;
1843  y = z = 0;
1844  for (i = 0; i < nd; i++) {
1845    if (i < 9) {
1846      y = 10 * y + s0[i < nd0 ? i : i + 1] - '0';
1847    } else if (i < DBL_DIG + 1) {
1848      z = 10 * z + s0[i < nd0 ? i : i + 1] - '0';
1849    } else {
1850      break;
1851    }
1852  }
1853
1854  k = nd < DBL_DIG + 1 ? nd : DBL_DIG + 1;
1855  dval(&rv) = y;
1856  if (k > 9) {
1857    dval(&rv) = kTens[k - 9] * dval(&rv) + z;
1858  }
1859  bd0 = nullptr;
1860  if (nd <= DBL_DIG && FLT_ROUNDS == 1) {
1861    if (!e) {
1862      goto ret;
1863    }
1864    if (e > 0) {
1865      if (e <= kTenPmax) {
1866        dval(&rv) *= kTens[e];
1867        goto ret;
1868      }
1869      i = DBL_DIG - nd;
1870      if (e <= kTenPmax + i) {
1871        // A fancier test would sometimes let us do
1872        // this for larger i values.
1873        e -= i;
1874        dval(&rv) *= kTens[i];
1875        dval(&rv) *= kTens[e];
1876        goto ret;
1877      }
1878    } else if (e >= -kTenPmax) {
1879      dval(&rv) /= kTens[-e];
1880      goto ret;
1881    }
1882  }
1883  e1 += nd - k;
1884
1885  bc.scale = 0;
1886
1887  // Get starting approximation = rv * 10**e1
1888
1889  if (e1 > 0) {
1890    if ((i = e1 & 15)) {
1891      dval(&rv) *= kTens[i];
1892    }
1893    if (e1 &= ~15) {
1894      if (e1 > DBL_MAX_10_EXP) {
1895        goto ovfl;
1896      }
1897      e1 >>= 4;
1898      for (j = 0; e1 > 1; j++, e1 >>= 1) {
1899        if (e1 & 1) {
1900          dval(&rv) *= kBigTens[j];
1901        }
1902      }
1903      // The last multiplication could overflow.
1904      word0(&rv) -= kP * kExpMsk1;
1905      dval(&rv) *= kBigTens[j];
1906      if ((z = word0(&rv) & kExpMask) > kExpMsk1 * (DBL_MAX_EXP + kBias - kP)) {
1907        goto ovfl;
1908      }
1909      if (z > kExpMsk1 * (DBL_MAX_EXP + kBias - 1 - kP)) {
1910        // set to largest number (Can't trust DBL_MAX)
1911        word0(&rv) = kBig0;
1912        word1(&rv) = kBig1;
1913      } else {
1914        word0(&rv) += kP * kExpMsk1;
1915      }
1916    }
1917  } else if (e1 < 0) {
1918    // The input decimal value lies in [10**e1, 10**(e1+16)).
1919
1920    // If e1 <= -512, underflow immediately.
1921    // If e1 <= -256, set bc.scale to 2*kP.
1922
1923    // So for input value < 1e-256, bc.scale is always set;
1924    // for input value >= 1e-240, bc.scale is never set.
1925    // For input values in [1e-256, 1e-240), bc.scale may or may
1926    // not be set.
1927
1928    e1 = -e1;
1929    if ((i = e1 & 15)) {
1930      dval(&rv) /= kTens[i];
1931    }
1932    if (e1 >>= 4) {
1933      if (e1 >= 1 << ARRAYSIZE(kBigTens)) {
1934        goto undfl;
1935      }
1936      if (e1 & kScaleBit) {
1937        bc.scale = 2 * kP;
1938      }
1939      for (j = 0; e1 > 0; j++, e1 >>= 1) {
1940        if (e1 & 1) {
1941          dval(&rv) *= kTinyTens[j];
1942        }
1943      }
1944      if (bc.scale &&
1945          (j = 2 * kP + 1 - ((word0(&rv) & kExpMask) >> kExpShift)) > 0) {
1946        // scaled rv is denormal; clear j low bits
1947        if (j >= 32) {
1948          word1(&rv) = 0;
1949          if (j >= 53) {
1950            word0(&rv) = (kP + 2) * kExpMsk1;
1951          } else {
1952            word0(&rv) &= 0xffffffff << (j - 32);
1953          }
1954        } else {
1955          word1(&rv) &= 0xffffffff << j;
1956        }
1957      }
1958      if (!dval(&rv)) {
1959        goto undfl;
1960      }
1961    }
1962  }
1963
1964  // Now the hard part -- adjusting rv to the correct value.
1965
1966  // Put digits into bd: true value = bd * 10^e
1967
1968  bc.nd = nd;
1969  bc.nd0 = nd0;  // Only needed if nd > kStrtodDiglim, but done here
1970                 // to silence an erroneous warning about bc.nd0
1971                 // possibly not being initialized.
1972  if (nd > kStrtodDiglim) {
1973    // ASSERT(kStrtodDiglim >= 18); 18 == one more than the
1974    // minimum number of decimal digits to distinguish double values
1975    // in IEEE arithmetic.
1976
1977    // Truncate input to 18 significant digits, then discard any trailing
1978    // zeros on the result by updating nd, nd0, e and y suitably. (There's
1979    // no need to update z; it's not reused beyond this point.)
1980    for (i = 18; i > 0;) {
1981      // scan back until we hit a nonzero digit.  significant digit 'i'
1982      // is s0[i] if i < nd0, s0[i+1] if i >= nd0.
1983      --i;
1984      if (s0[i < nd0 ? i : i + 1] != '0') {
1985        ++i;
1986        break;
1987      }
1988    }
1989    e += nd - i;
1990    nd = i;
1991    if (nd0 > nd) {
1992      nd0 = nd;
1993    }
1994    if (nd < 9) {  // must recompute y
1995      y = 0;
1996      for (i = 0; i < nd0; ++i) {
1997        y = 10 * y + s0[i] - '0';
1998      }
1999      for (; i < nd; ++i) {
2000        y = 10 * y + s0[i + 1] - '0';
2001      }
2002    }
2003  }
2004  bd0 = s2b(s0, nd0, nd, y);
2005  if (bd0 == nullptr) {
2006    goto failed_malloc;
2007  }
2008
2009  // Notation for the comments below.  Write:
2010  //
2011  //   - dv for the absolute value of the number represented by the original
2012  //     decimal input string.
2013  //
2014  //   - if we've truncated dv, write tdv for the truncated value.
2015  //     Otherwise, set tdv == dv.
2016  //
2017  //   - srv for the quantity rv/2^bc.scale; so srv is the current binary
2018  //     approximation to tdv (and dv).  It should be exactly representable
2019  //     in an IEEE 754 double.
2020  for (;;) {
2021    // This is the main correction loop for strtod.
2022    //
2023    // We've got a decimal value tdv, and a floating-point approximation
2024    // srv=rv/2^bc.scale to tdv.  The aim is to determine whether srv is
2025    // close enough (i.e., within 0.5 ulps) to tdv, and to compute a new
2026    // approximation if not.
2027    //
2028    // To determine whether srv is close enough to tdv, compute integers
2029    // bd, bb and bs proportional to tdv, srv and 0.5 ulp(srv)
2030    // respectively, and then use integer arithmetic to determine whether
2031    // |tdv - srv| is less than, equal to, or greater than 0.5 ulp(srv).
2032
2033    bd = Balloc(bd0->k);
2034    if (bd == nullptr) {
2035      Bfree(bd0);
2036      goto failed_malloc;
2037    }
2038    Bcopy(bd, bd0);
2039    bb = sd2b(&rv, bc.scale, &bbe);  // srv = bb * 2^bbe
2040    if (bb == nullptr) {
2041      Bfree(bd);
2042      Bfree(bd0);
2043      goto failed_malloc;
2044    }
2045    // Record whether lsb of bb is odd, in case we need this
2046    // for the round-to-even step later.
2047    odd = bb->x[0] & 1;
2048
2049    // tdv = bd * 10**e;  srv = bb * 2**bbe
2050    bs = i2b(1);
2051    if (bs == nullptr) {
2052      Bfree(bb);
2053      Bfree(bd);
2054      Bfree(bd0);
2055      goto failed_malloc;
2056    }
2057
2058    if (e >= 0) {
2059      bb2 = bb5 = 0;
2060      bd2 = bd5 = e;
2061    } else {
2062      bb2 = bb5 = -e;
2063      bd2 = bd5 = 0;
2064    }
2065    if (bbe >= 0) {
2066      bb2 += bbe;
2067    } else {
2068      bd2 -= bbe;
2069    }
2070    bs2 = bb2;
2071    bb2++;
2072    bd2++;
2073
2074    // At this stage bd5 - bb5 == e == bd2 - bb2 + bbe, bb2 - bs2 == 1,
2075    // and bs == 1, so:
2076    //
2077    //    tdv == bd * 10**e = bd * 2**(bbe - bb2 + bd2) * 5**(bd5 - bb5)
2078    //    srv == bb * 2**bbe = bb * 2**(bbe - bb2 + bb2)
2079    //    0.5 ulp(srv) == 2**(bbe-1) = bs * 2**(bbe - bb2 + bs2)
2080    //
2081    // It follows that:
2082    //
2083    //    M * tdv = bd * 2**bd2 * 5**bd5
2084    //    M * srv = bb * 2**bb2 * 5**bb5
2085    //    M * 0.5 ulp(srv) = bs * 2**bs2 * 5**bb5
2086    //
2087    // for some constant M.  (Actually, M == 2**(bb2 - bbe) * 5**bb5, but
2088    // this fact is not needed below.)
2089
2090    // Remove factor of 2**i, where i = min(bb2, bd2, bs2).
2091    i = bb2 < bd2 ? bb2 : bd2;
2092    if (i > bs2) {
2093      i = bs2;
2094    }
2095    if (i > 0) {
2096      bb2 -= i;
2097      bd2 -= i;
2098      bs2 -= i;
2099    }
2100
2101    // Scale bb, bd, bs by the appropriate powers of 2 and 5.
2102    if (bb5 > 0) {
2103      bs = pow5mult(bs, bb5);
2104      if (bs == nullptr) {
2105        Bfree(bb);
2106        Bfree(bd);
2107        Bfree(bd0);
2108        goto failed_malloc;
2109      }
2110      bb1 = mult(bs, bb);
2111      Bfree(bb);
2112      bb = bb1;
2113      if (bb == nullptr) {
2114        Bfree(bs);
2115        Bfree(bd);
2116        Bfree(bd0);
2117        goto failed_malloc;
2118      }
2119    }
2120    if (bb2 > 0) {
2121      bb = lshift(bb, bb2);
2122      if (bb == nullptr) {
2123        Bfree(bs);
2124        Bfree(bd);
2125        Bfree(bd0);
2126        goto failed_malloc;
2127      }
2128    }
2129    if (bd5 > 0) {
2130      bd = pow5mult(bd, bd5);
2131      if (bd == nullptr) {
2132        Bfree(bb);
2133        Bfree(bs);
2134        Bfree(bd0);
2135        goto failed_malloc;
2136      }
2137    }
2138    if (bd2 > 0) {
2139      bd = lshift(bd, bd2);
2140      if (bd == nullptr) {
2141        Bfree(bb);
2142        Bfree(bs);
2143        Bfree(bd0);
2144        goto failed_malloc;
2145      }
2146    }
2147    if (bs2 > 0) {
2148      bs = lshift(bs, bs2);
2149      if (bs == nullptr) {
2150        Bfree(bb);
2151        Bfree(bd);
2152        Bfree(bd0);
2153        goto failed_malloc;
2154      }
2155    }
2156
2157    // Now bd, bb and bs are scaled versions of tdv, srv and 0.5 ulp(srv),
2158    // respectively.  Compute the difference |tdv - srv|, and compare
2159    // with 0.5 ulp(srv).
2160
2161    delta = diff(bb, bd);
2162    if (delta == nullptr) {
2163      Bfree(bb);
2164      Bfree(bs);
2165      Bfree(bd);
2166      Bfree(bd0);
2167      goto failed_malloc;
2168    }
2169    dsign = delta->sign;
2170    delta->sign = 0;
2171    i = cmp(delta, bs);
2172    if (bc.nd > nd && i <= 0) {
2173      if (dsign) {
2174        break;  // Must use bigcomp().
2175      }
2176
2177      // Here rv overestimates the truncated decimal value by at most
2178      // 0.5 ulp(rv).  Hence rv either overestimates the true decimal
2179      // value by <= 0.5 ulp(rv), or underestimates it by some small
2180      // amount (< 0.1 ulp(rv)); either way, rv is within 0.5 ulps of
2181      // the true decimal value, so it's possible to exit.
2182      //
2183      // Exception: if scaled rv is a normal exact power of 2, but not
2184      // DBL_MIN, then rv - 0.5 ulp(rv) takes us all the way down to the
2185      // next double, so the correctly rounded result is either rv - 0.5
2186      // ulp(rv) or rv; in this case, use bigcomp to distinguish.
2187
2188      if (!word1(&rv) && !(word0(&rv) & kBndryMask)) {
2189        // rv can't be 0, since it's an overestimate for some
2190        // nonzero value.  So rv is a normal power of 2.
2191        j = static_cast<int>((word0(&rv) & kExpMask) >> kExpShift);
2192        // rv / 2^bc.scale = 2^(j - 1023 - bc.scale); use bigcomp if
2193        // rv / 2^bc.scale >= 2^-1021.
2194        if (j - bc.scale >= 2) {
2195          dval(&rv) -= 0.5 * sulp(&rv, &bc);
2196          break;  // Use bigcomp.
2197        }
2198      }
2199
2200      {
2201        bc.nd = nd;
2202        i = -1;  // Discarded digits make delta smaller.
2203      }
2204    }
2205
2206    if (i < 0) {
2207      // Error is less than half an ulp -- check for
2208      // special case of mantissa a power of two.
2209      if (dsign || word1(&rv) || word0(&rv) & kBndryMask ||
2210          (word0(&rv) & kExpMask) <= (2 * kP + 1) * kExpMsk1) {
2211        break;
2212      }
2213      if (!delta->x[0] && delta->wds <= 1) {
2214        // exact result
2215        break;
2216      }
2217      delta = lshift(delta, kLog2P);
2218      if (delta == nullptr) {
2219        Bfree(bb);
2220        Bfree(bs);
2221        Bfree(bd);
2222        Bfree(bd0);
2223        goto failed_malloc;
2224      }
2225      if (cmp(delta, bs) > 0) {
2226        goto drop_down;
2227      }
2228      break;
2229    }
2230    if (i == 0) {
2231      // exactly half-way between
2232      if (dsign) {
2233        if ((word0(&rv) & kBndryMask1) == kBndryMask1 &&
2234            word1(&rv) ==
2235                ((bc.scale && (y = word0(&rv) & kExpMask) <= 2 * kP * kExpMsk1)
2236                     ? (0xffffffff &
2237                        (0xffffffff << (2 * kP + 1 - (y >> kExpShift))))
2238                     : 0xffffffff)) {
2239          // boundary case -- increment exponent
2240          word0(&rv) = (word0(&rv) & kExpMask) + kExpMsk1;
2241          word1(&rv) = 0;
2242          // dsign = 0;
2243          break;
2244        }
2245      } else if (!(word0(&rv) & kBndryMask) && !word1(&rv)) {
2246      drop_down:
2247        // boundary case -- decrement exponent
2248        if (bc.scale) {
2249          int32_t big_l = word0(&rv) & kExpMask;
2250          if (big_l <= (2 * kP + 1) * kExpMsk1) {
2251            if (big_l > (kP + 2) * kExpMsk1) {  // round even ==>
2252              // accept rv
2253              break;
2254            }
2255            // rv = smallest denormal
2256            if (bc.nd > nd) {
2257              break;
2258            }
2259            goto undfl;
2260          }
2261        }
2262        int32_t big_l = (word0(&rv) & kExpMask) - kExpMsk1;
2263        word0(&rv) = big_l | kBndryMask1;
2264        word1(&rv) = 0xffffffff;
2265        break;
2266      }
2267      if (!odd) {
2268        break;
2269      }
2270      if (dsign) {
2271        dval(&rv) += sulp(&rv, &bc);
2272      } else {
2273        dval(&rv) -= sulp(&rv, &bc);
2274        if (!dval(&rv)) {
2275          if (bc.nd > nd) {
2276            break;
2277          }
2278          goto undfl;
2279        }
2280      }
2281      // dsign = 1 - dsign;
2282      break;
2283    }
2284    if ((aadj = ratio(delta, bs)) <= 2.) {
2285      if (dsign) {
2286        aadj = aadj1 = 1.;
2287      } else if (word1(&rv) || word0(&rv) & kBndryMask) {
2288        if (word1(&rv) == kTiny1 && !word0(&rv)) {
2289          if (bc.nd > nd) {
2290            break;
2291          }
2292          goto undfl;
2293        }
2294        aadj = 1.;
2295        aadj1 = -1.;
2296      } else {
2297        // special case -- power of FLT_RADIX to be
2298        // rounded down...
2299
2300        if (aadj < 2. / FLT_RADIX) {
2301          aadj = 1. / FLT_RADIX;
2302        } else {
2303          aadj *= 0.5;
2304        }
2305        aadj1 = -aadj;
2306      }
2307    } else {
2308      aadj *= 0.5;
2309      aadj1 = dsign ? aadj : -aadj;
2310      if (FLT_ROUNDS == 0) {
2311        aadj1 += 0.5;
2312      }
2313    }
2314    y = word0(&rv) & kExpMask;
2315
2316    // Check for overflow
2317
2318    if (y == kExpMsk1 * (DBL_MAX_EXP + kBias - 1)) {
2319      dval(&rv0) = dval(&rv);
2320      word0(&rv) -= kP * kExpMsk1;
2321      adj.d = aadj1 * ulp(&rv);
2322      dval(&rv) += adj.d;
2323      if ((word0(&rv) & kExpMask) >= kExpMsk1 * (DBL_MAX_EXP + kBias - kP)) {
2324        if (word0(&rv0) == kBig0 && word1(&rv0) == kBig1) {
2325          Bfree(bb);
2326          Bfree(bd);
2327          Bfree(bs);
2328          Bfree(bd0);
2329          Bfree(delta);
2330          goto ovfl;
2331        }
2332        word0(&rv) = kBig0;
2333        word1(&rv) = kBig1;
2334        goto cont;
2335      } else {
2336        word0(&rv) += kP * kExpMsk1;
2337      }
2338    } else {
2339      if (bc.scale && y <= 2 * kP * kExpMsk1) {
2340        if (aadj <= 0x7fffffff) {
2341          if ((z = static_cast<uint32_t>(aadj)) <= 0) {
2342            z = 1;
2343          }
2344          aadj = z;
2345          aadj1 = dsign ? aadj : -aadj;
2346        }
2347        dval(&aadj2) = aadj1;
2348        word0(&aadj2) += (2 * kP + 1) * kExpMsk1 - y;
2349        aadj1 = dval(&aadj2);
2350      }
2351      adj.d = aadj1 * ulp(&rv);
2352      dval(&rv) += adj.d;
2353    }
2354    z = word0(&rv) & kExpMask;
2355    if (bc.nd == nd) {
2356      if (!bc.scale) {
2357        if (y == z) {
2358          // Can we stop now?
2359          int32_t big_l = static_cast<int32_t>(aadj);
2360          aadj -= big_l;
2361          // The tolerances below are conservative.
2362          if (dsign || word1(&rv) || word0(&rv) & kBndryMask) {
2363            if (aadj < .4999999 || aadj > .5000001) {
2364              break;
2365            }
2366          } else if (aadj < .4999999 / FLT_RADIX) {
2367            break;
2368          }
2369        }
2370      }
2371    }
2372  cont:
2373    Bfree(bb);
2374    Bfree(bd);
2375    Bfree(bs);
2376    Bfree(delta);
2377  }
2378  Bfree(bb);
2379  Bfree(bd);
2380  Bfree(bs);
2381  Bfree(bd0);
2382  Bfree(delta);
2383  if (bc.nd > nd) {
2384    error = bigcomp(&rv, s0, &bc);
2385    if (error) {
2386      goto failed_malloc;
2387    }
2388  }
2389
2390  if (bc.scale) {
2391    word0(&rv0) = kExp1 - 2 * kP * kExpMsk1;
2392    word1(&rv0) = 0;
2393    dval(&rv) *= dval(&rv0);
2394  }
2395
2396ret:
2397  return sign ? -dval(&rv) : dval(&rv);
2398
2399failed_malloc:
2400  errno = ENOMEM;
2401  return -1.0;
2402
2403undfl:
2404  return sign ? -0.0 : 0.0;
2405
2406ovfl:
2407  errno = ERANGE;
2408  return sign ? -HUGE_VAL : HUGE_VAL;
2409}
2410
2411static char* rv_alloc(int i) {
2412  int j = sizeof(uint32_t);
2413  int k;
2414  for (k = 0; sizeof(Bigint) - sizeof(uint32_t) - sizeof(int) + j <=
2415              static_cast<unsigned>(i);
2416       j <<= 1) {
2417    k++;
2418  }
2419  int* r = reinterpret_cast<int*>(Balloc(k));
2420  if (r == nullptr) {
2421    return nullptr;
2422  }
2423  *r = k;
2424  return reinterpret_cast<char*>(r + 1);
2425}
2426
2427static char* nrv_alloc(const char* s, char** rve, int n) {
2428  char* rv = rv_alloc(n);
2429  if (rv == nullptr) {
2430    return nullptr;
2431  }
2432  char* t = rv;
2433  while ((*t = *s++)) {
2434    t++;
2435  }
2436  if (rve) {
2437    *rve = t;
2438  }
2439  return rv;
2440}
2441
2442// freedtoa(s) must be used to free values s returned by dtoa
2443// when MULTIPLE_THREADS is #defined.  It should be used in all cases,
2444// but for consistency with earlier versions of dtoa, it is optional
2445// when MULTIPLE_THREADS is not defined.
2446void freedtoa(char* s) {
2447  Bigint* b = reinterpret_cast<Bigint*>(reinterpret_cast<int*>(s) - 1);
2448  b->maxwds = 1 << (b->k = *reinterpret_cast<int*>(b));
2449  Bfree(b);
2450}
2451
2452// dtoa for IEEE arithmetic (dmg): convert double to ASCII string.
2453//
2454// Inspired by "How to Print Floating-Point Numbers Accurately" by
2455// Guy L. Steele, Jr. and Jon L. White [Proc. ACM SIGPLAN '90, pp. 112-126].
2456//
2457// Modifications:
2458//      1. Rather than iterating, we use a simple numeric overestimate
2459//         to determine k = floor(log10(d)).  We scale relevant
2460//         quantities using O(log2(k)) rather than O(k) multiplications.
2461//      2. For some modes > 2 (corresponding to ecvt and fcvt), we don't
2462//         try to generate digits strictly left to right.  Instead, we
2463//         compute with fewer bits and propagate the carry if necessary
2464//         when rounding the final digit up.  This is often faster.
2465//      3. Under the assumption that input will be rounded nearest,
2466//         mode 0 renders 1e23 as 1e23 rather than 9.999999999999999e22.
2467//         That is, we allow equality in stopping tests when the
2468//         round-nearest rule will give the same floating-point value
2469//         as would satisfaction of the stopping test with strict
2470//         inequality.
2471//      4. We remove common factors of powers of 2 from relevant
2472//         quantities.
2473//      5. When converting floating-point integers less than 1e16,
2474//         we use floating-point arithmetic rather than resorting
2475//         to multiple-precision integers.
2476//      6. When asked to produce fewer than 15 digits, we first try
2477//         to get by with floating-point arithmetic; we resort to
2478//         multiple-precision integer arithmetic only if we cannot
2479//         guarantee that the floating-point calculation has given
2480//         the correctly rounded result.  For k requested digits and
2481//         "uniformly" distributed input, the probability is
2482//         something like 10^(k-15) that we must resort to the int32_t
2483//         calculation.
2484//
2485// Additional notes (METD): (1) returns nullptr on failure.  (2) to avoid
2486// memory leakage, a successful call to dtoa should always be matched
2487// by a call to freedtoa.
2488static char* dtoa(double dd, int mode, int ndigits, int* decpt, int* sign,
2489                  char** rve) {
2490  //  Arguments ndigits, decpt, sign are similar to those
2491  //  of ecvt and fcvt; trailing zeros are suppressed from
2492  //  the returned string.  If not null, *rve is set to point
2493  //  to the end of the return value.  If d is +-Infinity or NaN,
2494  //  then *decpt is set to 9999.
2495  //
2496  //  mode:
2497  //  0 ==> shortest string that yields d when read in
2498  //  and rounded to nearest.
2499  //  1 ==> like 0, but with Steele & White stopping rule;
2500  //  e.g. with IEEE P754 arithmetic , mode 0 gives
2501  //  1e23 whereas mode 1 gives 9.999999999999999e22.
2502  //  2 ==> max(1,ndigits) significant digits.  This gives a
2503  //  return value similar to that of ecvt, except
2504  //  that trailing zeros are suppressed.
2505  //  3 ==> through ndigits past the decimal point.  This
2506  //  gives a return value similar to that from fcvt,
2507  //  except that trailing zeros are suppressed, and
2508  //  ndigits can be negative.
2509  //  4,5 ==> similar to 2 and 3, respectively, but (in
2510  //  round-nearest mode) with the tests of mode 0 to
2511  //  possibly return a shorter string that rounds to d.
2512  //  With IEEE arithmetic and compilation with
2513  //  -DHonor_FLT_ROUNDS, modes 4 and 5 behave the same
2514  //  as modes 2 and 3 when FLT_ROUNDS != 1.
2515  //  6-9 ==> Debugging modes similar to mode - 4:  don't try
2516  //  fast floating-point estimate (if applicable).
2517  //
2518  //  Values of mode other than 0-9 are treated as mode 0.
2519  //
2520  //  Sufficient space is allocated to the return value
2521  //  to hold the suppressed trailing zeros.
2522
2523  int b2, b5, be, dig, i, ieps, ilim, ilim0, ilim1, j, j1, k, k0, k_check,
2524      leftright, m2, m5, s2, s5, spec_case, try_quick;
2525  U d2, eps;
2526  char* s;
2527  double ds;
2528
2529  // set pointers to nullptr, to silence gcc compiler warnings and make
2530  // cleanup easier on error
2531  Bigint* mlo = nullptr;
2532  Bigint* mhi = nullptr;
2533  Bigint* big_s = nullptr;
2534  char* s0 = nullptr;
2535
2536  U u;
2537  u.d = dd;
2538  if (word0(&u) & kSignBit) {
2539    // set sign for everything, including 0's and NaNs
2540    *sign = 1;
2541    word0(&u) &= ~kSignBit;  // clear sign bit
2542  } else {
2543    *sign = 0;
2544  }
2545
2546  // quick return for Infinities, NaNs and zeros
2547  if ((word0(&u) & kExpMask) == kExpMask) {
2548    // Infinity or NaN
2549    *decpt = 9999;
2550    if (!word1(&u) && !(word0(&u) & 0xfffff)) {
2551      return nrv_alloc("Infinity", rve, 8);
2552    }
2553    return nrv_alloc("NaN", rve, 3);
2554  }
2555  if (!dval(&u)) {
2556    *decpt = 1;
2557    return nrv_alloc("0", rve, 1);
2558  }
2559
2560  // compute k = floor(log10(d)).  The computation may leave k
2561  // one too large, but should never leave k too small.
2562  int bbits;
2563  Bigint* b = d2b(&u, &be, &bbits);
2564  if (b == nullptr) {
2565    goto failed_malloc;
2566  }
2567  bool denorm;
2568  i = static_cast<int>(word0(&u) >> kExpShift1 & (kExpMask >> kExpShift1));
2569  if (i) {
2570    dval(&d2) = dval(&u);
2571    word0(&d2) &= kFracMask1;
2572    word0(&d2) |= kExp11;
2573
2574    // log(x)       ~=~ log(1.5) + (x-1.5)/1.5
2575    // log10(x)      =  log(x) / log(10)
2576    //              ~=~ log(1.5)/log(10) + (x-1.5)/(1.5*log(10))
2577    // log10(d) = (i-kBias)*log(2)/log(10) + log10(d2)
2578    //
2579    // This suggests computing an approximation k to log10(d) by
2580    //
2581    // k = (i - kBias)*0.301029995663981
2582    //      + ( (d2-1.5)*0.289529654602168 + 0.176091259055681 );
2583    //
2584    // We want k to be too large rather than too small.
2585    // The error in the first-order Taylor series approximation
2586    // is in our favor, so we just round up the constant enough
2587    // to compensate for any error in the multiplication of
2588    // (i - kBias) by 0.301029995663981; since |i - kBias| <= 1077,
2589    // and 1077 * 0.30103 * 2^-52 ~=~ 7.2e-14,
2590    // adding 1e-13 to the constant term more than suffices.
2591    // Hence we adjust the constant term to 0.1760912590558.
2592    // (We could get a more accurate k by invoking log10,
2593    //  but this is probably not worthwhile.)
2594
2595    i -= kBias;
2596    denorm = false;
2597  } else {
2598    // d is denormalized
2599
2600    i = bbits + be + (kBias + (kP - 1) - 1);
2601    uint32_t x = i > 32 ? word0(&u) << (64 - i) | word1(&u) >> (i - 32)
2602                        : word1(&u) << (32 - i);
2603    dval(&d2) = x;
2604    word0(&d2) -= 31 * kExpMsk1;  // adjust exponent
2605    i -= (kBias + (kP - 1) - 1) + 1;
2606    denorm = true;
2607  }
2608  ds = (dval(&d2) - 1.5) * 0.289529654602168 + 0.1760912590558 +
2609       i * 0.301029995663981;
2610  k = static_cast<int>(ds);
2611  if (ds < 0. && ds != k) k--;  // want k = floor(ds)
2612  k_check = 1;
2613  if (k >= 0 && k <= kTenPmax) {
2614    if (dval(&u) < kTens[k]) {
2615      k--;
2616    }
2617    k_check = 0;
2618  }
2619  j = bbits - i - 1;
2620  if (j >= 0) {
2621    b2 = 0;
2622    s2 = j;
2623  } else {
2624    b2 = -j;
2625    s2 = 0;
2626  }
2627  if (k >= 0) {
2628    b5 = 0;
2629    s5 = k;
2630    s2 += k;
2631  } else {
2632    b2 -= k;
2633    b5 = -k;
2634    s5 = 0;
2635  }
2636  if (mode < 0 || mode > 9) {
2637    mode = 0;
2638  }
2639
2640  try_quick = 1;
2641
2642  if (mode > 5) {
2643    mode -= 4;
2644    try_quick = 0;
2645  }
2646  leftright = 1;
2647  ilim = ilim1 = -1;  // Values for cases 0 and 1; done here to
2648  // silence erroneous "gcc -Wall" warning.
2649  switch (mode) {
2650    case 0:
2651    case 1:
2652      i = 18;
2653      ndigits = 0;
2654      break;
2655    case 2:
2656      leftright = 0;
2657      FALLTHROUGH;
2658    case 4:
2659      if (ndigits <= 0) {
2660        ndigits = 1;
2661      }
2662      ilim = ilim1 = i = ndigits;
2663      break;
2664    case 3:
2665      leftright = 0;
2666      FALLTHROUGH;
2667    case 5:
2668      i = ndigits + k + 1;
2669      ilim = i;
2670      ilim1 = i - 1;
2671      if (i <= 0) {
2672        i = 1;
2673      }
2674  }
2675  s0 = rv_alloc(i);
2676  if (s0 == nullptr) {
2677    goto failed_malloc;
2678  }
2679  s = s0;
2680
2681  int32_t big_l;
2682  if (ilim >= 0 && ilim <= kQuickMax && try_quick) {
2683    // Try to get by with floating-point arithmetic.
2684
2685    i = 0;
2686    dval(&d2) = dval(&u);
2687    k0 = k;
2688    ilim0 = ilim;
2689    ieps = 2;  // conservative
2690    if (k > 0) {
2691      ds = kTens[k & 0xf];
2692      j = k >> 4;
2693      if (j & kBletch) {
2694        // prevent overflows
2695        j &= kBletch - 1;
2696        dval(&u) /= kBigTens[ARRAYSIZE(kBigTens) - 1];
2697        ieps++;
2698      }
2699      for (; j; j >>= 1, i++) {
2700        if (j & 1) {
2701          ieps++;
2702          ds *= kBigTens[i];
2703        }
2704      }
2705      dval(&u) /= ds;
2706    } else if ((j1 = -k)) {
2707      dval(&u) *= kTens[j1 & 0xf];
2708      for (j = j1 >> 4; j; j >>= 1, i++) {
2709        if (j & 1) {
2710          ieps++;
2711          dval(&u) *= kBigTens[i];
2712        }
2713      }
2714    }
2715    if (k_check && dval(&u) < 1. && ilim > 0) {
2716      if (ilim1 <= 0) {
2717        goto fast_failed;
2718      }
2719      ilim = ilim1;
2720      k--;
2721      dval(&u) *= 10.;
2722      ieps++;
2723    }
2724    dval(&eps) = ieps * dval(&u) + 7.;
2725    word0(&eps) -= (kP - 1) * kExpMsk1;
2726    if (ilim == 0) {
2727      big_s = nullptr;
2728      mhi = nullptr;
2729      dval(&u) -= 5.;
2730      if (dval(&u) > dval(&eps)) {
2731        goto one_digit;
2732      }
2733      if (dval(&u) < -dval(&eps)) {
2734        goto no_digits;
2735      }
2736      goto fast_failed;
2737    }
2738    if (leftright) {
2739      // Use Steele & White method of only
2740      // generating digits needed.
2741      dval(&eps) = 0.5 / kTens[ilim - 1] - dval(&eps);
2742      for (i = 0;;) {
2743        big_l = static_cast<int32_t>(dval(&u));
2744        dval(&u) -= big_l;
2745        *s++ = '0' + big_l;
2746        if (dval(&u) < dval(&eps)) {
2747          goto ret1;
2748        }
2749        if (1. - dval(&u) < dval(&eps)) {
2750          goto bump_up;
2751        }
2752        if (++i >= ilim) {
2753          break;
2754        }
2755        dval(&eps) *= 10.;
2756        dval(&u) *= 10.;
2757      }
2758    } else {
2759      // Generate ilim digits, then fix them up.
2760      dval(&eps) *= kTens[ilim - 1];
2761      for (i = 1;; i++, dval(&u) *= 10.) {
2762        big_l = static_cast<int32_t>(dval(&u));
2763        if (!(dval(&u) -= big_l)) {
2764          ilim = i;
2765        }
2766        *s++ = '0' + big_l;
2767        if (i == ilim) {
2768          if (dval(&u) > 0.5 + dval(&eps)) {
2769            goto bump_up;
2770          } else if (dval(&u) < 0.5 - dval(&eps)) {
2771            while (*--s == '0') {
2772            }
2773            s++;
2774            goto ret1;
2775          }
2776          break;
2777        }
2778      }
2779    }
2780  fast_failed:
2781    s = s0;
2782    dval(&u) = dval(&d2);
2783    k = k0;
2784    ilim = ilim0;
2785  }
2786
2787  // Do we have a "small" integer?
2788
2789  if (be >= 0 && k <= kIntMax) {
2790    // Yes.
2791    ds = kTens[k];
2792    if (ndigits < 0 && ilim <= 0) {
2793      big_s = nullptr;
2794      mhi = nullptr;
2795      if (ilim < 0 || dval(&u) <= 5 * ds) {
2796        goto no_digits;
2797      }
2798      goto one_digit;
2799    }
2800    for (i = 1;; i++, dval(&u) *= 10.) {
2801      big_l = static_cast<int32_t>(dval(&u) / ds);
2802      dval(&u) -= big_l * ds;
2803      *s++ = '0' + big_l;
2804      if (!dval(&u)) {
2805        break;
2806      }
2807      if (i == ilim) {
2808        dval(&u) += dval(&u);
2809        if (dval(&u) > ds || (dval(&u) == ds && big_l & 1)) {
2810        bump_up:
2811          while (*--s == '9') {
2812            if (s == s0) {
2813              k++;
2814              *s = '0';
2815              break;
2816            }
2817          }
2818          ++*s++;
2819        }
2820        break;
2821      }
2822    }
2823    goto ret1;
2824  }
2825
2826  m2 = b2;
2827  m5 = b5;
2828  if (leftright) {
2829    i = denorm ? be + (kBias + (kP - 1) - 1 + 1) : 1 + kP - bbits;
2830    b2 += i;
2831    s2 += i;
2832    mhi = i2b(1);
2833    if (mhi == nullptr) {
2834      goto failed_malloc;
2835    }
2836  }
2837  if (m2 > 0 && s2 > 0) {
2838    i = m2 < s2 ? m2 : s2;
2839    b2 -= i;
2840    m2 -= i;
2841    s2 -= i;
2842  }
2843  if (b5 > 0) {
2844    if (leftright) {
2845      if (m5 > 0) {
2846        mhi = pow5mult(mhi, m5);
2847        if (mhi == nullptr) {
2848          goto failed_malloc;
2849        }
2850        Bigint* b1 = mult(mhi, b);
2851        Bfree(b);
2852        b = b1;
2853        if (b == nullptr) {
2854          goto failed_malloc;
2855        }
2856      }
2857      if ((j = b5 - m5)) {
2858        b = pow5mult(b, j);
2859        if (b == nullptr) {
2860          goto failed_malloc;
2861        }
2862      }
2863    } else {
2864      b = pow5mult(b, b5);
2865      if (b == nullptr) {
2866        goto failed_malloc;
2867      }
2868    }
2869  }
2870  big_s = i2b(1);
2871  if (big_s == nullptr) {
2872    goto failed_malloc;
2873  }
2874  if (s5 > 0) {
2875    big_s = pow5mult(big_s, s5);
2876    if (big_s == nullptr) {
2877      goto failed_malloc;
2878    }
2879  }
2880
2881  // Check for special case that d is a normalized power of 2.
2882
2883  spec_case = 0;
2884  if (mode < 2 || leftright) {
2885    if (!word1(&u) && !(word0(&u) & kBndryMask) &&
2886        word0(&u) & (kExpMask & ~kExpMsk1)) {
2887      // The special case
2888      b2 += kLog2P;
2889      s2 += kLog2P;
2890      spec_case = 1;
2891    }
2892  }
2893
2894  // Arrange for convenient computation of quotients:
2895  // shift left if necessary so divisor has 4 leading 0 bits.
2896  //
2897  // Perhaps we should just compute leading 28 bits of big_s once
2898  // and for all and pass them and a shift to quorem, so it
2899  // can do shifts and ors to compute the numerator for q.
2900  i = dshift(big_s, s2);
2901  b2 += i;
2902  m2 += i;
2903  s2 += i;
2904  if (b2 > 0) {
2905    b = lshift(b, b2);
2906    if (b == nullptr) {
2907      goto failed_malloc;
2908    }
2909  }
2910  if (s2 > 0) {
2911    big_s = lshift(big_s, s2);
2912    if (big_s == nullptr) {
2913      goto failed_malloc;
2914    }
2915  }
2916  if (k_check) {
2917    if (cmp(b, big_s) < 0) {
2918      k--;
2919      b = multadd(b, 10, 0);  // we botched the k estimate
2920      if (b == nullptr) {
2921        goto failed_malloc;
2922      }
2923      if (leftright) {
2924        mhi = multadd(mhi, 10, 0);
2925        if (mhi == nullptr) {
2926          goto failed_malloc;
2927        }
2928      }
2929      ilim = ilim1;
2930    }
2931  }
2932  if (ilim <= 0 && (mode == 3 || mode == 5)) {
2933    if (ilim < 0) {
2934      // no digits, fcvt style
2935    no_digits:
2936      k = -1 - ndigits;
2937      goto ret;
2938    } else {
2939      big_s = multadd(big_s, 5, 0);
2940      if (big_s == nullptr) {
2941        goto failed_malloc;
2942      }
2943      if (cmp(b, big_s) <= 0) {
2944        goto no_digits;
2945      }
2946    }
2947  one_digit:
2948    *s++ = '1';
2949    k++;
2950    goto ret;
2951  }
2952  if (leftright) {
2953    if (m2 > 0) {
2954      mhi = lshift(mhi, m2);
2955      if (mhi == nullptr) {
2956        goto failed_malloc;
2957      }
2958    }
2959
2960    // Compute mlo -- check for special case
2961    // that d is a normalized power of 2.
2962
2963    mlo = mhi;
2964    if (spec_case) {
2965      mhi = Balloc(mhi->k);
2966      if (mhi == nullptr) {
2967        goto failed_malloc;
2968      }
2969      Bcopy(mhi, mlo);
2970      mhi = lshift(mhi, kLog2P);
2971      if (mhi == nullptr) {
2972        goto failed_malloc;
2973      }
2974    }
2975
2976    for (i = 1;; i++) {
2977      dig = quorem(b, big_s) + '0';
2978      // Do we yet have the shortest decimal string
2979      // that will round to d?
2980      j = cmp(b, mlo);
2981      Bigint* delta = diff(big_s, mhi);
2982      if (delta == nullptr) {
2983        goto failed_malloc;
2984      }
2985      j1 = delta->sign ? 1 : cmp(b, delta);
2986      Bfree(delta);
2987      if (j1 == 0 && mode != 1 && !(word1(&u) & 1)) {
2988        if (dig == '9') {
2989          goto round_9_up;
2990        }
2991        if (j > 0) {
2992          dig++;
2993        }
2994        *s++ = dig;
2995        goto ret;
2996      }
2997      if (j < 0 || (j == 0 && mode != 1 && !(word1(&u) & 1))) {
2998        if (!b->x[0] && b->wds <= 1) {
2999          goto accept_dig;
3000        }
3001        if (j1 > 0) {
3002          b = lshift(b, 1);
3003          if (b == nullptr) {
3004            goto failed_malloc;
3005          }
3006          j1 = cmp(b, big_s);
3007          if ((j1 > 0 || (j1 == 0 && dig & 1)) && dig++ == '9') {
3008            goto round_9_up;
3009          }
3010        }
3011      accept_dig:
3012        *s++ = dig;
3013        goto ret;
3014      }
3015      if (j1 > 0) {
3016        if (dig == '9') {  // possible if i == 1
3017        round_9_up:
3018          *s++ = '9';
3019          goto roundoff;
3020        }
3021        *s++ = dig + 1;
3022        goto ret;
3023      }
3024      *s++ = dig;
3025      if (i == ilim) {
3026        break;
3027      }
3028      b = multadd(b, 10, 0);
3029      if (b == nullptr) {
3030        goto failed_malloc;
3031      }
3032      if (mlo == mhi) {
3033        mlo = mhi = multadd(mhi, 10, 0);
3034        if (mlo == nullptr) {
3035          goto failed_malloc;
3036        }
3037      } else {
3038        mlo = multadd(mlo, 10, 0);
3039        if (mlo == nullptr) {
3040          goto failed_malloc;
3041        }
3042        mhi = multadd(mhi, 10, 0);
3043        if (mhi == nullptr) {
3044          goto failed_malloc;
3045        }
3046      }
3047    }
3048  } else {
3049    for (i = 1;; i++) {
3050      *s++ = dig = quorem(b, big_s) + '0';
3051      if (!b->x[0] && b->wds <= 1) {
3052        goto ret;
3053      }
3054      if (i >= ilim) {
3055        break;
3056      }
3057      b = multadd(b, 10, 0);
3058      if (b == nullptr) {
3059        goto failed_malloc;
3060      }
3061    }
3062  }
3063
3064  // Round off last digit
3065
3066  b = lshift(b, 1);
3067  if (b == nullptr) {
3068    goto failed_malloc;
3069  }
3070  j = cmp(b, big_s);
3071  if (j > 0 || (j == 0 && dig & 1)) {
3072  roundoff:
3073    while (*--s == '9') {
3074      if (s == s0) {
3075        k++;
3076        *s++ = '1';
3077        goto ret;
3078      }
3079    }
3080    ++*s++;
3081  } else {
3082    while (*--s == '0') {
3083    }
3084    s++;
3085  }
3086ret:
3087  Bfree(big_s);
3088  if (mhi) {
3089    if (mlo && mlo != mhi) {
3090      Bfree(mlo);
3091    }
3092    Bfree(mhi);
3093  }
3094ret1:
3095  Bfree(b);
3096  *s = 0;
3097  *decpt = k + 1;
3098  if (rve) {
3099    *rve = s;
3100  }
3101  return s0;
3102failed_malloc:
3103  if (big_s) {
3104    Bfree(big_s);
3105  }
3106  if (mlo && mlo != mhi) {
3107    Bfree(mlo);
3108  }
3109  if (mhi) {
3110    Bfree(mhi);
3111  }
3112  if (b) {
3113    Bfree(b);
3114  }
3115  if (s0) {
3116    freedtoa(s0);
3117  }
3118  return nullptr;
3119}
3120
3121}  // namespace py