this repo has no description
at trunk 1132 lines 38 kB view raw
1// Copyright (c) Facebook, Inc. and its affiliates. (http://www.facebook.com) 2#include "formatter.h" 3 4#include <climits> 5 6#include "float-builtins.h" 7#include "float-conversion.h" 8#include "formatter-utils.h" 9#include "globals.h" 10#include "handles-decl.h" 11#include "objects.h" 12#include "runtime.h" 13#include "str-builtins.h" 14#include "thread.h" 15#include "unicode.h" 16 17namespace py { 18 19const byte kLowerCaseHexDigitArray[] = {'0', '1', '2', '3', '4', '5', '6', '7', 20 '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'}; 21const byte kUpperCaseHexDigitArray[] = {'0', '1', '2', '3', '4', '5', '6', '7', 22 '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'}; 23 24struct FloatWidths { 25 word left_padding; 26 byte sign; 27 word sign_padding; 28 word grouped_digits; 29 bool has_decimal; 30 word remainder; 31 word right_padding; 32}; 33 34static word calculateFloatWidths(const FormatSpec* format, const char* buf, 35 word length, FloatWidths* result) { 36 result->left_padding = 0; 37 result->sign = '\0'; 38 result->sign_padding = 0; 39 result->grouped_digits = length; 40 result->has_decimal = false; 41 result->remainder = 0; 42 result->right_padding = 0; 43 44 // Check for a sign character 45 word index = 0; 46 word total_length = 0; 47 if (buf[index] == '-') { 48 result->sign = '-'; 49 index++; 50 total_length++; 51 } 52 53 // Parse the digits for a remainder 54 for (; index < length; index++) { 55 char c = buf[index]; 56 if (!ASCII::isDigit(c)) { 57 word remainder = length - index; 58 if (c == '.') { 59 result->has_decimal = true; 60 result->remainder = remainder - 1; 61 total_length += remainder; // TODO(T52759101): use locale for decimal 62 } else { 63 result->remainder = remainder; 64 total_length += remainder; 65 } 66 break; 67 } 68 } 69 70 if (format->positive_sign != '\0' && result->sign == '\0') { 71 result->sign = format->positive_sign; 72 total_length++; 73 } 74 75 // TODO(T52759101): use locale for thousands separator and grouping 76 word digits = result->sign == '-' ? index - 1 : index; 77 if (format->thousands_separator != '\0') { 78 digits += (digits - 1) / 3; 79 } 80 result->grouped_digits = digits; 81 total_length += digits; 82 83 word padding = format->width - total_length; 84 if (padding > 0) { 85 total_length += 86 padding * SmallStr::fromCodePoint(format->fill_char).length(); 87 switch (format->alignment) { 88 case '<': 89 result->right_padding = padding; 90 break; 91 case '=': 92 result->sign_padding = padding; 93 break; 94 case '>': 95 result->left_padding = padding; 96 break; 97 case '^': 98 result->left_padding = padding >> 1; 99 result->right_padding = padding - result->left_padding; 100 break; 101 default: 102 UNREACHABLE("unexpected alignment %c", format->alignment); 103 } 104 } 105 106 return total_length; 107} 108 109static bool isAlignmentSpec(int32_t cp) { 110 switch (cp) { 111 case '<': 112 case '>': 113 case '=': 114 case '^': 115 return true; 116 default: 117 return false; 118 } 119} 120 121static inline int32_t nextCodePoint(const Str& spec, word length, word* index) { 122 if (*index >= length) { 123 return 0; 124 } 125 word cp_length; 126 int32_t cp = spec.codePointAt(*index, &cp_length); 127 *index += cp_length; 128 return cp; 129} 130 131RawObject parseFormatSpec(Thread* thread, const Str& spec, int32_t default_type, 132 char default_align, FormatSpec* result) { 133 result->alignment = default_align; 134 result->positive_sign = 0; 135 result->thousands_separator = 0; 136 result->type = default_type; 137 result->alternate = false; 138 result->fill_char = ' '; 139 result->width = -1; 140 result->precision = -1; 141 142 word index = 0; 143 word length = spec.length(); 144 int32_t cp = nextCodePoint(spec, length, &index); 145 146 bool fill_char_specified = false; 147 bool alignment_specified = false; 148 word old_index = index; 149 int32_t c_next = nextCodePoint(spec, length, &index); 150 if (isAlignmentSpec(c_next)) { 151 result->alignment = static_cast<char>(c_next); 152 result->fill_char = cp; 153 fill_char_specified = true; 154 alignment_specified = true; 155 156 cp = nextCodePoint(spec, length, &index); 157 } else if (!alignment_specified && isAlignmentSpec(cp)) { 158 result->alignment = static_cast<char>(cp); 159 alignment_specified = true; 160 cp = c_next; 161 } else { 162 index = old_index; 163 } 164 165 switch (cp) { 166 case '+': 167 case ' ': 168 result->positive_sign = static_cast<char>(cp); 169 cp = nextCodePoint(spec, length, &index); 170 break; 171 case '-': 172 cp = nextCodePoint(spec, length, &index); 173 break; 174 } 175 176 if (cp == '#') { 177 result->alternate = true; 178 cp = nextCodePoint(spec, length, &index); 179 } 180 181 if (!fill_char_specified && cp == '0') { 182 result->fill_char = '0'; 183 if (!alignment_specified) { 184 result->alignment = '='; 185 } 186 } 187 188 if ('0' <= cp && cp <= '9') { 189 word width = 0; 190 for (;;) { 191 width += cp - '0'; 192 cp = nextCodePoint(spec, length, &index); 193 if ('0' > cp || cp > '9') break; 194 if (__builtin_mul_overflow(width, 10, &width)) { 195 return thread->raiseWithFmt(LayoutId::kValueError, 196 "Too many decimal digits in format string"); 197 } 198 } 199 result->width = width; 200 } 201 202 if (cp == ',') { 203 result->thousands_separator = ','; 204 cp = nextCodePoint(spec, length, &index); 205 } 206 if (cp == '_') { 207 if (result->thousands_separator != 0) { 208 return thread->raiseWithFmt(LayoutId::kValueError, 209 "Cannot specify both ',' and '_'."); 210 } 211 result->thousands_separator = '_'; 212 cp = nextCodePoint(spec, length, &index); 213 } 214 if (cp == ',') { 215 return thread->raiseWithFmt(LayoutId::kValueError, 216 "Cannot specify both ',' and '_'."); 217 } 218 219 if (cp == '.') { 220 cp = nextCodePoint(spec, length, &index); 221 if ('0' > cp || cp > '9') { 222 return thread->raiseWithFmt(LayoutId::kValueError, 223 "Format specifier missing precision"); 224 } 225 226 word precision = 0; 227 for (;;) { 228 precision += cp - '0'; 229 cp = nextCodePoint(spec, length, &index); 230 if ('0' > cp || cp > '9') break; 231 if (__builtin_mul_overflow(precision, 10, &precision)) { 232 return thread->raiseWithFmt(LayoutId::kValueError, 233 "Too many decimal digits in format string"); 234 } 235 } 236 result->precision = precision; 237 } 238 239 if (cp != 0) { 240 result->type = cp; 241 // This was the last step: No need to call `nextCodePoint()` here. 242 } 243 if (index < length) { 244 return thread->raiseWithFmt(LayoutId::kValueError, 245 "Invalid format specifier"); 246 } 247 248 if (result->thousands_separator) { 249 switch (result->type) { 250 case 'd': 251 case 'e': 252 case 'f': 253 case 'g': 254 case 'E': 255 case 'G': 256 case '%': 257 case 'F': 258 case '\0': 259 // These are allowed. See PEP 378. 260 break; 261 case 'b': 262 case 'o': 263 case 'x': 264 case 'X': 265 // Underscores are allowed in bin/oct/hex. See PEP 515. 266 if (result->thousands_separator == '_') { 267 break; 268 } 269 FALLTHROUGH; 270 default: 271 if (32 < result->type && result->type <= kMaxASCII) { 272 return thread->raiseWithFmt( 273 LayoutId::kValueError, "Cannot specify '%c' with '%c'.", 274 result->thousands_separator, static_cast<char>(result->type)); 275 } 276 return thread->raiseWithFmt( 277 LayoutId::kValueError, "Cannot specify '%c' with '\\x%x'.", 278 result->thousands_separator, static_cast<unsigned>(result->type)); 279 } 280 } 281 return NoneType::object(); 282} 283 284static word putFloat(const MutableBytes& dest, const byte* buf, 285 const FormatSpec* format, const FloatWidths* widths) { 286 word at = 0; 287 RawSmallStr fill = SmallStr::fromCodePoint(format->fill_char); 288 word fill_length = fill.length(); 289 for (word i = 0; i < widths->left_padding; i++, at += fill_length) { 290 dest.replaceFromWithStr(at, Str::cast(fill), fill_length); 291 } 292 switch (widths->sign) { 293 case '\0': 294 break; 295 case '-': 296 buf++; 297 FALLTHROUGH; 298 case '+': 299 case ' ': 300 dest.byteAtPut(at++, widths->sign); 301 break; 302 default: 303 UNREACHABLE("unexpected sign char %c", widths->sign); 304 } 305 for (word i = 0; i < widths->sign_padding; i++, at += fill_length) { 306 dest.replaceFromWithStr(at, Str::cast(fill), fill_length); 307 } 308 // TODO(T52759101): use thousands separator from locale 309 if (format->thousands_separator == '\0') { 310 dest.replaceFromWithAll(at, {buf, widths->grouped_digits}); 311 at += widths->grouped_digits; 312 buf += widths->grouped_digits; 313 } else { 314 // TODO(T52759101): use locale for grouping 315 word prefix = widths->grouped_digits % 4; 316 dest.replaceFromWithAll(at, {buf, prefix}); 317 buf += prefix; 318 for (word i = prefix; i < widths->grouped_digits; i += 4, buf += 3) { 319 dest.byteAtPut(at + i, format->thousands_separator); 320 dest.replaceFromWithAll(at + i + 1, {buf, 3}); 321 } 322 at += widths->grouped_digits; 323 } 324 if (widths->has_decimal) { 325 // TODO(T52759101): use decimal from locale 326 dest.byteAtPut(at++, '.'); 327 buf++; 328 } 329 dest.replaceFromWithAll(at, {buf, widths->remainder}); 330 at += widths->remainder; 331 for (word i = 0; i < widths->right_padding; i++, at += fill_length) { 332 dest.replaceFromWithStr(at, Str::cast(fill), fill_length); 333 } 334 return at; 335} 336 337RawObject raiseUnknownFormatError(Thread* thread, int32_t format_code, 338 const Object& object) { 339 if (32 < format_code && format_code < kMaxASCII) { 340 return thread->raiseWithFmt( 341 LayoutId::kValueError, 342 "Unknown format code '%c' for object of type '%T'", 343 static_cast<char>(format_code), &object); 344 } 345 return thread->raiseWithFmt( 346 LayoutId::kValueError, 347 "Unknown format code '\\x%x' for object of type '%T'", 348 static_cast<unsigned>(format_code), &object); 349} 350 351RawObject formatFloat(Thread* thread, double value, FormatSpec* format) { 352 static const word default_precision = 6; 353 word precision = format->precision; 354 if (precision > INT_MAX) { 355 return thread->raiseWithFmt(LayoutId::kValueError, "precision too big"); 356 } 357 358 int32_t type = format->type; 359 bool add_dot_0 = false; 360 bool add_percent = false; 361 switch (type) { 362 case '\0': 363 add_dot_0 = true; 364 type = 'r'; 365 break; 366 case 'n': 367 type = 'g'; 368 break; 369 case '%': 370 type = 'f'; 371 value *= 100; 372 add_percent = true; 373 break; 374 } 375 376 if (precision < 0) { 377 precision = add_dot_0 ? 0 : default_precision; 378 } else if (type == 'r') { 379 type = 'g'; 380 } 381 382 FormatResultKind kind; 383 unique_c_ptr<char> buf(doubleToString(value, type, 384 static_cast<int>(precision), false, 385 add_dot_0, format->alternate, &kind)); 386 word length = std::strlen(buf.get()); 387 if (add_percent) { 388 // Overwrite the terminating null character 389 buf.get()[length++] = '%'; 390 } 391 392 Runtime* runtime = thread->runtime(); 393 if (format->positive_sign == '\0' && format->width <= length && 394 format->type != 'n' && format->thousands_separator == '\0') { 395 return runtime->newStrWithAll({reinterpret_cast<byte*>(buf.get()), length}); 396 } 397 398 // TODO(T52759101): use locale for grouping, separator, and decimal point 399 FloatWidths widths; 400 word result_length = calculateFloatWidths(format, buf.get(), length, &widths); 401 402 HandleScope scope(thread); 403 MutableBytes result(&scope, 404 runtime->newMutableBytesUninitialized(result_length)); 405 word written = 406 putFloat(result, reinterpret_cast<byte*>(buf.get()), format, &widths); 407 DCHECK(written == result_length, "expected to write %ld bytes, wrote %ld", 408 result_length, written); 409 return result.becomeStr(); 410} 411 412RawObject formatStr(Thread* thread, const Str& str, FormatSpec* format) { 413 DCHECK(format->positive_sign == '\0', "must no have sign specified"); 414 DCHECK(!format->alternate, "must not have alternative format"); 415 word width = format->width; 416 word precision = format->precision; 417 if (width == -1 && precision == -1) { 418 return *str; 419 } 420 421 word char_length = str.length(); 422 word codepoint_length; 423 word str_end_index; 424 if (precision >= 0) { 425 str_end_index = str.offsetByCodePoints(0, precision); 426 if (str_end_index < char_length) { 427 codepoint_length = precision; 428 } else { 429 codepoint_length = str.codePointLength(); 430 } 431 } else { 432 str_end_index = char_length; 433 codepoint_length = str.codePointLength(); 434 } 435 436 Runtime* runtime = thread->runtime(); 437 word padding = width - codepoint_length; 438 if (padding <= 0) { 439 return strSubstr(thread, str, 0, str_end_index); 440 } 441 442 // Construct result. 443 HandleScope scope(thread); 444 Str fill_char(&scope, SmallStr::fromCodePoint(format->fill_char)); 445 word fill_char_length = fill_char.length(); 446 word padding_char_length = padding * fill_char_length; 447 word result_char_length = str_end_index + padding_char_length; 448 MutableBytes result( 449 &scope, runtime->newMutableBytesUninitialized(result_char_length)); 450 word index = 0; 451 word early_padding; 452 if (format->alignment == '<') { 453 early_padding = 0; 454 } else if (format->alignment == '^') { 455 word half = padding / 2; 456 early_padding = half; 457 padding = half + (padding % 2); 458 } else { 459 DCHECK(format->alignment == '>' || format->alignment == '=', 460 "remaining cases must be '>' or '='"); 461 early_padding = padding; 462 padding = 0; 463 } 464 for (word i = 0; i < early_padding; i++) { 465 result.replaceFromWithStr(index, *fill_char, fill_char_length); 466 index += fill_char_length; 467 } 468 result.replaceFromWithStr(index, *str, str_end_index); 469 index += str_end_index; 470 if (padding > 0) { 471 DCHECK(format->alignment == '<' || format->alignment == '^', 472 "unexpected alignment"); 473 for (word i = 0; i < padding; i++) { 474 result.replaceFromWithStr(index, *fill_char, fill_char_length); 475 index += fill_char_length; 476 } 477 } 478 DCHECK(index == result_char_length, "overflow or underflow in result"); 479 return result.becomeStr(); 480} 481 482// Returns the quotient of a double word number and a single word. 483// Assumes the result will fit in a single uword: `dividend_high < divisor`. 484static uword dwordUDiv(uword dividend_low, uword dividend_high, uword divisor, 485 uword* remainder) { 486 // TODO(matthiasb): Future optimization idea: 487 // This whole function is a single `divq` instruction on x86_64, we could use 488 // inline assembly for it (there doesn't seem to be a builtin). 489 490 // The code is based on Hacker's Delight chapter 9-4 Unsigned Long Division. 491 DCHECK(divisor != 0, "division by zero"); 492 DCHECK(dividend_high < divisor, "overflow"); 493 494 // Performs some arithmetic with no more than half the bits of a `uword`. 495 int half_bits = kBitsPerWord / 2; 496 uword half_mask = (uword{1} << half_bits) - 1; 497 498 // Normalize divisor by shifting the highest bit left as much as possible. 499 static_assert(sizeof(divisor) == sizeof(long), "choose right builtin"); 500 int s = __builtin_clzl(divisor); 501 uword divisor_n = divisor << s; 502 uword divisor_n_high_half = divisor_n >> half_bits; 503 uword divisor_n_low_half = divisor_n & half_mask; 504 505 // Normalize dividend by shifting it by the same amount as the divisor. 506 uword dividend_high_n = 507 (s == 0) ? dividend_high 508 : (dividend_high << s) | (dividend_low >> (kBitsPerWord - s)); 509 uword dividend_low_n = dividend_low << s; 510 uword dividend_low_n_high_half = dividend_low_n >> half_bits; 511 uword dividend_low_n_low_half = dividend_low_n & half_mask; 512 513 uword quot_high_half = dividend_high_n / divisor_n_high_half; 514 uword remainder_high_half = dividend_high_n % divisor_n_high_half; 515 while (quot_high_half > half_mask || 516 quot_high_half * divisor_n_low_half > 517 ((remainder_high_half << half_bits) | dividend_low_n_high_half)) { 518 quot_high_half--; 519 remainder_high_half += divisor_n_high_half; 520 if (remainder_high_half > half_mask) break; 521 } 522 523 uword dividend_middle = 524 ((dividend_high_n << half_bits) | dividend_low_n_high_half) - 525 quot_high_half * divisor_n; 526 527 uword quot_low_half = dividend_middle / divisor_n_high_half; 528 uword remainder_low_half = dividend_middle % divisor_n_high_half; 529 while (quot_low_half > half_mask || 530 quot_low_half * divisor_n_low_half > 531 ((remainder_low_half << half_bits) | dividend_low_n_low_half)) { 532 quot_low_half--; 533 remainder_low_half += divisor_n_high_half; 534 if (remainder_low_half > half_mask) break; 535 } 536 537 uword result = (quot_high_half << half_bits) | quot_low_half; 538 *remainder = dividend_low - result * divisor; 539 return result; 540} 541 542// Divide a large integer formed by an array of int digits by a single digit and 543// return the remainder. `digits_in` and `digits_out` may be the same pointer 544// for in-place operation. 545static uword divIntSingleDigit(uword* digits_out, const uword* digits_in, 546 word num_digits, uword divisor) { 547 // TODO(matthiasb): Future optimization idea: 548 // Instead of dividing by a constant, multiply with a precomputed inverse 549 // (see Hackers Delight, chapter 10). The compiler doesn't catch this case 550 // for double word arithmetic as in dwordUDiv. 551 uword remainder = 0; 552 for (word i = num_digits - 1; i >= 0; i--) { 553 // Compute `remainder:digit / divisor`. 554 digits_out[i] = dwordUDiv(digits_in[i], remainder, divisor, &remainder); 555 } 556 return remainder; 557} 558 559// Return upper bound on number of decimal digits to format large int `value`. 560static word estimateNumDecimalDigits(RawLargeInt value) { 561 // Compute an upper bound on the number of decimal digits required for a 562 // number with n bits: 563 // ceil(log10(2**n - 1)) 564 // We over-approximate this with: 565 // ceil(log10(2**n - 1)) 566 // == ceil(log2(2**n - 1)/log2(10)) 567 // <= 1 + n * (1/log2(10)) 568 // <= 1 + n * 0.30102999566398114 569 // <= 1 + n * 309 / 1024 570 // This isn't off by more than 1 digit for all one binary numbers up to 571 // 1425 bits. 572 word bit_length = value.bitLength(); 573 return 1 + bit_length * 309 / 1024; 574} 575 576static byte* writeLargeIntDecimalDigits(byte* buffer_end, RawLargeInt value) { 577 // Allocate space for intermediate results. We also convert a negative number 578 // to a positive number of the same magnitude here. 579 word num_digits = value.numDigits(); 580 std::unique_ptr<uword[]> temp_digits(new uword[num_digits]); 581 bool negative = value.isNegative(); 582 if (!negative) { 583 for (word i = 0; i < num_digits; ++i) { 584 temp_digits[i] = value.digitAt(i); 585 } 586 } else { 587 uword carry = 1; 588 for (word i = 0; i < num_digits; ++i) { 589 uword digit = value.digitAt(i); 590 carry = __builtin_uaddl_overflow(~digit, carry, &temp_digits[i]); 591 } 592 // The complement of the highest bit in a negative number must be 0 so we 593 // cannot overflow. 594 DCHECK(carry == 0, "overflow"); 595 } 596 word num_temp_digits = num_digits; 597 598 // The strategy here is to divide the large integer by continually dividing it 599 // by `kUwordDigits10Pow`. `uwordToDecimal` can convert those remainders to 600 // decimal digits. 601 // 602 // TODO(matthiasb): Future optimization ideas: 603 // It seems cpythons algorithm is faster (for big numbers) in practive. 604 // Their source claims it is (Knuth TAOCP, vol 2, section 4.4, method 1b). 605 byte* start = buffer_end; 606 do { 607 uword remainder = divIntSingleDigit(temp_digits.get(), temp_digits.get(), 608 num_temp_digits, kUwordDigits10Pow); 609 byte* new_start = uwordToDecimal(remainder, start); 610 611 while (num_temp_digits > 0 && temp_digits[num_temp_digits - 1] == 0) { 612 num_temp_digits--; 613 } 614 // Produce leading zeros if this wasn't the last round. 615 if (num_temp_digits > 0) { 616 for (word i = 0, n = kUwordDigits10 - (start - new_start); i < n; i++) { 617 *--new_start = '0'; 618 } 619 } 620 start = new_start; 621 } while (num_temp_digits > 0); 622 return start; 623} 624 625RawObject formatIntDecimalSimple(Thread* thread, const Int& value) { 626 if (!value.isLargeInt()) { 627 word value_word = value.asWord(); 628 uword magnitude = 629 value_word >= 0 ? value_word : -static_cast<uword>(value_word); 630 byte buffer[kUwordDigits10 + 1]; 631 byte* end = buffer + sizeof(buffer); 632 byte* start = uwordToDecimal(magnitude, end); 633 if (value_word < 0) *--start = '-'; 634 DCHECK(start >= buffer, "buffer underflow"); 635 return thread->runtime()->newStrWithAll(View<byte>(start, end - start)); 636 } 637 638 RawLargeInt value_large = LargeInt::cast(*value); 639 bool is_negative = value_large.isNegative(); 640 word max_chars = 641 estimateNumDecimalDigits(value_large) + (is_negative ? 1 : 0); 642 std::unique_ptr<byte[]> buffer(new byte[max_chars]); 643 byte* end = buffer.get() + max_chars; 644 byte* start = writeLargeIntDecimalDigits(end, value_large); 645 if (is_negative) { 646 *--start = '-'; 647 } 648 DCHECK(start >= buffer.get(), "buffer underflow"); 649 return thread->runtime()->newStrWithAll(View<byte>(start, end - start)); 650} 651 652static word numBinaryDigits(const Int& value) { 653 return value.isZero() ? 1 : value.bitLength(); 654} 655 656static word numHexadecimalDigits(const Int& value) { 657 return value.isZero() ? 1 : (value.bitLength() + 3) >> 2; 658} 659 660static word numOctalDigits(const Int& value) { 661 return value.isZero() ? 1 : (value.bitLength() + 2) / 3; 662} 663 664static void putBinaryDigits(Thread* thread, const MutableBytes& dest, word at, 665 const Int& value, word num_digits) { 666 static const char* const quads[16] = { 667 "0000", "0001", "0010", "0011", "0100", "0101", "0110", "0111", 668 "1000", "1001", "1010", "1011", "1100", "1101", "1110", "1111", 669 }; 670 671 word idx = at + num_digits; 672 uword last_digit; 673 if (value.isLargeInt()) { 674 HandleScope scope(thread); 675 LargeInt value_large(&scope, *value); 676 word d_last = (num_digits - 1) / kBitsPerWord; 677 bool is_negative = value_large.isNegative(); 678 uword carry = 1; 679 for (word d = 0; d < d_last; d++) { 680 uword digit = value_large.digitAt(d); 681 if (is_negative) { 682 digit = ~digit + carry; 683 carry = carry & (digit == 0); 684 } 685 static_assert(kBitsPerWord % 4 == 0, "bits per word divisible by 4"); 686 for (word i = 0; i < kBitsPerWord / 4; i++) { 687 const char* quad = quads[digit & 0xf]; 688 dest.byteAtPut(--idx, quad[3]); 689 dest.byteAtPut(--idx, quad[2]); 690 dest.byteAtPut(--idx, quad[1]); 691 dest.byteAtPut(--idx, quad[0]); 692 digit >>= 4; 693 } 694 } 695 last_digit = value_large.digitAt(d_last); 696 if (is_negative) { 697 last_digit = ~last_digit + carry; 698 } 699 } else { 700 last_digit = static_cast<uword>(std::abs(value.asWord())); 701 } 702 703 do { 704 dest.byteAtPut(--idx, '0' + (last_digit & 1)); 705 last_digit >>= 1; 706 } while (last_digit != 0); 707 DCHECK(idx == at, "unexpected number of digits"); 708} 709 710static void putHexadecimalDigitsImpl(Thread* thread, const MutableBytes& dest, 711 word at, const Int& value, word num_digits, 712 const byte* digits) { 713 word idx = at + num_digits; 714 uword last_digit; 715 if (value.isLargeInt()) { 716 HandleScope scope(thread); 717 LargeInt value_large(&scope, *value); 718 const word hexdigits_per_word = kBitsPerWord / kBitsPerHexDigit; 719 word d_last = (num_digits - 1) / hexdigits_per_word; 720 bool is_negative = value_large.isNegative(); 721 uword carry = 1; 722 for (word d = 0; d < d_last; d++) { 723 uword digit = value_large.digitAt(d); 724 if (is_negative) { 725 digit = ~digit + carry; 726 carry = carry & (digit == 0); 727 } 728 for (word i = 0; i < hexdigits_per_word; i++) { 729 dest.byteAtPut(--idx, digits[digit & 0xf]); 730 digit >>= kBitsPerHexDigit; 731 } 732 } 733 last_digit = value_large.digitAt(d_last); 734 if (is_negative) { 735 last_digit = ~last_digit + carry; 736 } 737 } else { 738 last_digit = static_cast<uword>(std::abs(value.asWord())); 739 } 740 741 do { 742 dest.byteAtPut(--idx, digits[last_digit & 0xf]); 743 last_digit >>= kBitsPerHexDigit; 744 } while (last_digit != 0); 745 DCHECK(idx == at, "unexpected number of digits"); 746} 747 748static void putHexadecimalLowerCaseDigits(Thread* thread, 749 const MutableBytes& dest, word at, 750 const Int& value, word num_digits) { 751 putHexadecimalDigitsImpl(thread, dest, at, value, num_digits, 752 kLowerCaseHexDigitArray); 753} 754 755static void putHexadecimalUpperCaseDigits(Thread* thread, 756 const MutableBytes& dest, word at, 757 const Int& value, word num_digits) { 758 putHexadecimalDigitsImpl(thread, dest, at, value, num_digits, 759 kUpperCaseHexDigitArray); 760} 761 762static void putOctalDigits(Thread* thread, const MutableBytes& dest, word at, 763 const Int& value, word num_result_digits) { 764 word idx = at + num_result_digits; 765 if (value.isLargeInt()) { 766 HandleScope scope(thread); 767 LargeInt value_large(&scope, *value); 768 bool is_negative = value_large.isNegative(); 769 770 // We have to negate negative numbers which produces carry between 771 // digits. 772 uword negate_carry = 1; 773 // We have carry over when the three bits for an octal digits are between 774 // large-int digits (this has nothing to do with `negate_carry`). 775 uword prev_digit_carry = 0; 776 word prev_digit_carry_num_bits = 0; 777 for (word d = 0, num_digits = value_large.numDigits(); d < num_digits; 778 d++) { 779 uword digit = value_large.digitAt(d); 780 if (is_negative) { 781 digit = ~digit + negate_carry; 782 negate_carry &= (digit == 0); 783 } 784 785 word num_oct_digits = kBitsPerWord / kBitsPerOctDigit; 786 word next_carry_num_bits = kBitsPerWord % kBitsPerOctDigit; 787 if (prev_digit_carry_num_bits != 0) { 788 word combined = digit << prev_digit_carry_num_bits | prev_digit_carry; 789 dest.byteAtPut(--idx, '0' + (combined & 7)); 790 digit >>= (kBitsPerOctDigit - prev_digit_carry_num_bits); 791 if (idx == at) { 792 DCHECK(d == num_digits - 1 && digit == 0, "rest must be zero"); 793 break; 794 } 795 num_oct_digits--; 796 797 next_carry_num_bits += prev_digit_carry_num_bits; 798 if (next_carry_num_bits == kBitsPerOctDigit) { 799 num_oct_digits++; 800 next_carry_num_bits = 0; 801 } 802 } 803 for (word i = 0; i < num_oct_digits; i++) { 804 dest.byteAtPut(--idx, '0' + (digit & 7)); 805 digit >>= kBitsPerOctDigit; 806 // Stop before we start outputting leading zeros. 807 if (idx == at) { 808 DCHECK(d == num_digits - 1 && digit == 0, "rest must be zero"); 809 break; 810 } 811 } 812 DCHECK(digit < static_cast<uword>(1 << next_carry_num_bits), 813 "too many bits left"); 814 prev_digit_carry_num_bits = next_carry_num_bits; 815 prev_digit_carry = digit; 816 } 817 // Output leftover carry bits. 818 if (idx > at) { 819 DCHECK(prev_digit_carry_num_bits > 0, "should have carry bits"); 820 dest.byteAtPut(--idx, '0' + prev_digit_carry); 821 } 822 } else { 823 uword value_uword = static_cast<uword>(std::abs(value.asWord())); 824 do { 825 dest.byteAtPut(--idx, '0' + (value_uword & 7)); 826 value_uword >>= kBitsPerOctDigit; 827 } while (value_uword != 0); 828 } 829 DCHECK(idx == at, "unexpected number of digits"); 830} 831 832using numDigitsFunc = word (*)(const Int&); 833using putDigitsFunc = void (*)(Thread*, const MutableBytes&, word, const Int&, 834 word); 835 836static inline RawObject formatIntSimpleImpl(Thread* thread, const Int& value, 837 char format_prefix, 838 numDigitsFunc num_digits, 839 putDigitsFunc put_digits) { 840 word result_n_digits = num_digits(value); 841 word result_size = 2 + (value.isNegative() ? 1 : 0) + result_n_digits; 842 843 HandleScope scope(thread); 844 Runtime* runtime = thread->runtime(); 845 MutableBytes result(&scope, 846 runtime->newMutableBytesUninitialized(result_size)); 847 word index = 0; 848 if (value.isNegative()) { 849 result.byteAtPut(index++, '-'); 850 } 851 result.byteAtPut(index++, '0'); 852 result.byteAtPut(index++, format_prefix); 853 put_digits(thread, result, index, value, result_n_digits); 854 return result.becomeStr(); 855} 856 857RawObject formatIntBinarySimple(Thread* thread, const Int& value) { 858 return formatIntSimpleImpl(thread, value, 'b', numBinaryDigits, 859 putBinaryDigits); 860} 861 862RawObject formatIntHexadecimalSimple(Thread* thread, const Int& value) { 863 return formatIntSimpleImpl(thread, value, 'x', numHexadecimalDigits, 864 putHexadecimalLowerCaseDigits); 865} 866 867RawObject formatIntOctalSimple(Thread* thread, const Int& value) { 868 return formatIntSimpleImpl(thread, value, 'o', numOctalDigits, 869 putOctalDigits); 870} 871 872RawObject formatIntDecimal(Thread* thread, const Int& value, 873 FormatSpec* format) { 874 if (format->precision >= 0) { 875 return thread->raiseWithFmt( 876 LayoutId::kValueError, 877 "Precision not allowed in integer format specifier"); 878 } 879 if (format->thousands_separator != 0) { 880 UNIMPLEMENTED("thousands separator"); 881 } 882 883 // We cannot easily predict how many digits are necessary. So we overestimate 884 // and printing into a temporary buffer. 885 byte fixed_buffer[kUwordDigits10]; 886 byte* buffer; 887 byte* digits; 888 word result_n_digits; 889 if (value.isLargeInt()) { 890 word max_chars = estimateNumDecimalDigits(LargeInt::cast(*value)); 891 buffer = new byte[max_chars]; 892 byte* end = buffer + max_chars; 893 digits = writeLargeIntDecimalDigits(end, LargeInt::cast(*value)); 894 result_n_digits = end - digits; 895 } else { 896 buffer = fixed_buffer; 897 word value_word = value.asWord(); 898 uword magnitude = 899 value_word >= 0 ? value_word : -static_cast<uword>(value_word); 900 byte* end = buffer + ARRAYSIZE(fixed_buffer); 901 digits = uwordToDecimal(magnitude, end); 902 result_n_digits = end - digits; 903 } 904 905 bool is_negative = value.isNegative(); 906 word number_chars = 907 (is_negative || format->positive_sign != '\0') + result_n_digits; 908 909 HandleScope scope(thread); 910 Str fill_cp(&scope, SmallStr::fromCodePoint(format->fill_char)); 911 word fill_cp_chars = fill_cp.length(); 912 word result_chars = number_chars; 913 914 word padding = format->width - number_chars; 915 word left_padding = 0; 916 word sign_padding = 0; 917 word right_padding = 0; 918 if (padding > 0) { 919 result_chars += padding * fill_cp_chars; 920 switch (format->alignment) { 921 case '<': 922 right_padding = padding; 923 break; 924 case '=': 925 sign_padding = padding; 926 break; 927 case '>': 928 left_padding = padding; 929 break; 930 case '^': 931 left_padding = padding >> 1; 932 right_padding = padding - left_padding; 933 break; 934 default: 935 UNREACHABLE("unexpected alignment %c", format->alignment); 936 } 937 } 938 939 word index = 0; 940 MutableBytes result( 941 &scope, thread->runtime()->newMutableBytesUninitialized(result_chars)); 942 for (word i = 0; i < left_padding; i++) { 943 result.replaceFromWithStr(index, *fill_cp, fill_cp_chars); 944 index += fill_cp_chars; 945 } 946 if (is_negative) { 947 result.byteAtPut(index++, '-'); 948 } else if (format->positive_sign != '\0') { 949 result.byteAtPut(index++, format->positive_sign); 950 } 951 for (word i = 0; i < sign_padding; i++) { 952 result.replaceFromWithStr(index, *fill_cp, fill_cp_chars); 953 index += fill_cp_chars; 954 } 955 result.replaceFromWithAll(index, {digits, result_n_digits}); 956 index += result_n_digits; 957 for (word i = 0; i < right_padding; i++) { 958 result.replaceFromWithStr(index, *fill_cp, fill_cp_chars); 959 index += fill_cp_chars; 960 } 961 DCHECK(index == result.length(), "wrong number of bytes written"); 962 963 if (value.isLargeInt()) { 964 delete[] buffer; 965 } 966 return result.becomeStr(); 967} 968 969static inline RawObject formatIntImpl(Thread* thread, const Int& value, 970 FormatSpec* format, char format_prefix, 971 numDigitsFunc num_digits, 972 putDigitsFunc put_digits) { 973 if (format->precision >= 0) { 974 return thread->raiseWithFmt( 975 LayoutId::kValueError, 976 "Precision not allowed in integer format specifier"); 977 } 978 if (format->thousands_separator != 0) { 979 UNIMPLEMENTED("thousands separator"); 980 } 981 982 bool is_negative = value.isNegative(); 983 word result_n_digits = num_digits(value); 984 word number_chars = (is_negative || format->positive_sign != '\0') + 985 (format->alternate ? 2 : 0) + result_n_digits; 986 987 HandleScope scope(thread); 988 Str fill_cp(&scope, SmallStr::fromCodePoint(format->fill_char)); 989 word fill_cp_chars = fill_cp.length(); 990 word result_chars = number_chars; 991 992 word padding = format->width - number_chars; 993 word left_padding = 0; 994 word sign_padding = 0; 995 word right_padding = 0; 996 if (padding > 0) { 997 result_chars += padding * fill_cp_chars; 998 switch (format->alignment) { 999 case '<': 1000 right_padding = padding; 1001 break; 1002 case '=': 1003 sign_padding = padding; 1004 break; 1005 case '>': 1006 left_padding = padding; 1007 break; 1008 case '^': 1009 left_padding = padding >> 1; 1010 right_padding = padding - left_padding; 1011 break; 1012 default: 1013 UNREACHABLE("unexpected alignment %c", format->alignment); 1014 } 1015 } 1016 1017 word index = 0; 1018 MutableBytes result( 1019 &scope, thread->runtime()->newMutableBytesUninitialized(result_chars)); 1020 for (word i = 0; i < left_padding; i++) { 1021 result.replaceFromWithStr(index, *fill_cp, fill_cp_chars); 1022 index += fill_cp_chars; 1023 } 1024 if (is_negative) { 1025 result.byteAtPut(index++, '-'); 1026 } else if (format->positive_sign != '\0') { 1027 result.byteAtPut(index++, format->positive_sign); 1028 } 1029 if (format->alternate) { 1030 result.byteAtPut(index++, '0'); 1031 result.byteAtPut(index++, format_prefix); 1032 } 1033 for (word i = 0; i < sign_padding; i++) { 1034 result.replaceFromWithStr(index, *fill_cp, fill_cp_chars); 1035 index += fill_cp_chars; 1036 } 1037 put_digits(thread, result, index, value, result_n_digits); 1038 index += result_n_digits; 1039 1040 for (word i = 0; i < right_padding; i++) { 1041 result.replaceFromWithStr(index, *fill_cp, fill_cp_chars); 1042 index += fill_cp_chars; 1043 } 1044 DCHECK(index == result.length(), "wrong number of bytes written"); 1045 return result.becomeStr(); 1046} 1047 1048RawObject formatIntBinary(Thread* thread, const Int& value, 1049 FormatSpec* format) { 1050 return formatIntImpl(thread, value, format, 'b', numBinaryDigits, 1051 putBinaryDigits); 1052} 1053 1054RawObject formatIntHexadecimalLowerCase(Thread* thread, const Int& value, 1055 FormatSpec* format) { 1056 return formatIntImpl(thread, value, format, 'x', numHexadecimalDigits, 1057 putHexadecimalLowerCaseDigits); 1058} 1059 1060RawObject formatIntHexadecimalUpperCase(Thread* thread, const Int& value, 1061 FormatSpec* format) { 1062 return formatIntImpl(thread, value, format, 'X', numHexadecimalDigits, 1063 putHexadecimalUpperCaseDigits); 1064} 1065 1066RawObject formatIntOctal(Thread* thread, const Int& value, FormatSpec* format) { 1067 return formatIntImpl(thread, value, format, 'o', numOctalDigits, 1068 putOctalDigits); 1069} 1070 1071RawObject formatDoubleHexadecimalSimple(Runtime* runtime, double value) { 1072 const int mantissa_hex_digits = (kDoubleMantissaBits / 4) + 1; 1073 const int exp_bits = kBitsPerDouble - kDoubleMantissaBits - 1; 1074 const int max_exp = 1 << (exp_bits - 1); 1075 const int min_exp = -(1 << (exp_bits - 1)) + 1; 1076 1077 bool is_negative; 1078 int exponent; 1079 int64_t mantissa; 1080 1081 decodeDouble(value, &is_negative, &exponent, &mantissa); 1082 if (exponent == max_exp) { 1083 if (mantissa == 0) { 1084 return is_negative ? runtime->newStrFromCStr("-inf") 1085 : runtime->newStrFromCStr("inf"); 1086 } 1087 return runtime->newStrFromCStr("nan"); 1088 } 1089 1090 const bool is_subnormal = exponent == min_exp; 1091 if (is_subnormal) { 1092 if (mantissa != 0) { 1093 exponent += 1; 1094 } else { 1095 return is_negative ? runtime->newStrFromCStr("-0x0.0p+0") 1096 : runtime->newStrFromCStr("0x0.0p+0"); 1097 } 1098 } 1099 1100 int exponent_sign; 1101 if (exponent < 0) { 1102 exponent_sign = '-'; 1103 exponent = -exponent; 1104 } else { 1105 exponent_sign = '+'; 1106 } 1107 1108 // Output buffer_size mantissaHexDigits + 1 char for '-', 2 chars for '0x', 2 1109 // chars for '1.', 1 char for 'p', 1 char for exponent sign '+' or '-', and 4 1110 // chars for the exponent. 1111 const int buffer_size = mantissa_hex_digits + 11; 1112 char output[buffer_size]; 1113 byte* end = reinterpret_cast<byte*>(output) + buffer_size; 1114 byte* poutput = uwordToDecimal(exponent, end); 1115 1116 *--poutput = exponent_sign; 1117 *--poutput = 'p'; 1118 for (int k = 0; k < kDoubleMantissaBits; k += 4) { 1119 *--poutput = lowerCaseHexDigit((mantissa >> k) & 0xf); 1120 } 1121 1122 *--poutput = '.'; 1123 *--poutput = is_subnormal ? '0' : '1'; 1124 *--poutput = 'x'; 1125 *--poutput = '0'; 1126 if (is_negative) { 1127 *--poutput = '-'; 1128 } 1129 return runtime->newStrWithAll(View<byte>(poutput, end - poutput)); 1130} 1131 1132} // namespace py