this repo has no description
1// Copyright (c) Facebook, Inc. and its affiliates. (http://www.facebook.com)
2#include "formatter.h"
3
4#include <climits>
5
6#include "float-builtins.h"
7#include "float-conversion.h"
8#include "formatter-utils.h"
9#include "globals.h"
10#include "handles-decl.h"
11#include "objects.h"
12#include "runtime.h"
13#include "str-builtins.h"
14#include "thread.h"
15#include "unicode.h"
16
17namespace py {
18
19const byte kLowerCaseHexDigitArray[] = {'0', '1', '2', '3', '4', '5', '6', '7',
20 '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'};
21const byte kUpperCaseHexDigitArray[] = {'0', '1', '2', '3', '4', '5', '6', '7',
22 '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'};
23
24struct FloatWidths {
25 word left_padding;
26 byte sign;
27 word sign_padding;
28 word grouped_digits;
29 bool has_decimal;
30 word remainder;
31 word right_padding;
32};
33
34static word calculateFloatWidths(const FormatSpec* format, const char* buf,
35 word length, FloatWidths* result) {
36 result->left_padding = 0;
37 result->sign = '\0';
38 result->sign_padding = 0;
39 result->grouped_digits = length;
40 result->has_decimal = false;
41 result->remainder = 0;
42 result->right_padding = 0;
43
44 // Check for a sign character
45 word index = 0;
46 word total_length = 0;
47 if (buf[index] == '-') {
48 result->sign = '-';
49 index++;
50 total_length++;
51 }
52
53 // Parse the digits for a remainder
54 for (; index < length; index++) {
55 char c = buf[index];
56 if (!ASCII::isDigit(c)) {
57 word remainder = length - index;
58 if (c == '.') {
59 result->has_decimal = true;
60 result->remainder = remainder - 1;
61 total_length += remainder; // TODO(T52759101): use locale for decimal
62 } else {
63 result->remainder = remainder;
64 total_length += remainder;
65 }
66 break;
67 }
68 }
69
70 if (format->positive_sign != '\0' && result->sign == '\0') {
71 result->sign = format->positive_sign;
72 total_length++;
73 }
74
75 // TODO(T52759101): use locale for thousands separator and grouping
76 word digits = result->sign == '-' ? index - 1 : index;
77 if (format->thousands_separator != '\0') {
78 digits += (digits - 1) / 3;
79 }
80 result->grouped_digits = digits;
81 total_length += digits;
82
83 word padding = format->width - total_length;
84 if (padding > 0) {
85 total_length +=
86 padding * SmallStr::fromCodePoint(format->fill_char).length();
87 switch (format->alignment) {
88 case '<':
89 result->right_padding = padding;
90 break;
91 case '=':
92 result->sign_padding = padding;
93 break;
94 case '>':
95 result->left_padding = padding;
96 break;
97 case '^':
98 result->left_padding = padding >> 1;
99 result->right_padding = padding - result->left_padding;
100 break;
101 default:
102 UNREACHABLE("unexpected alignment %c", format->alignment);
103 }
104 }
105
106 return total_length;
107}
108
109static bool isAlignmentSpec(int32_t cp) {
110 switch (cp) {
111 case '<':
112 case '>':
113 case '=':
114 case '^':
115 return true;
116 default:
117 return false;
118 }
119}
120
121static inline int32_t nextCodePoint(const Str& spec, word length, word* index) {
122 if (*index >= length) {
123 return 0;
124 }
125 word cp_length;
126 int32_t cp = spec.codePointAt(*index, &cp_length);
127 *index += cp_length;
128 return cp;
129}
130
131RawObject parseFormatSpec(Thread* thread, const Str& spec, int32_t default_type,
132 char default_align, FormatSpec* result) {
133 result->alignment = default_align;
134 result->positive_sign = 0;
135 result->thousands_separator = 0;
136 result->type = default_type;
137 result->alternate = false;
138 result->fill_char = ' ';
139 result->width = -1;
140 result->precision = -1;
141
142 word index = 0;
143 word length = spec.length();
144 int32_t cp = nextCodePoint(spec, length, &index);
145
146 bool fill_char_specified = false;
147 bool alignment_specified = false;
148 word old_index = index;
149 int32_t c_next = nextCodePoint(spec, length, &index);
150 if (isAlignmentSpec(c_next)) {
151 result->alignment = static_cast<char>(c_next);
152 result->fill_char = cp;
153 fill_char_specified = true;
154 alignment_specified = true;
155
156 cp = nextCodePoint(spec, length, &index);
157 } else if (!alignment_specified && isAlignmentSpec(cp)) {
158 result->alignment = static_cast<char>(cp);
159 alignment_specified = true;
160 cp = c_next;
161 } else {
162 index = old_index;
163 }
164
165 switch (cp) {
166 case '+':
167 case ' ':
168 result->positive_sign = static_cast<char>(cp);
169 cp = nextCodePoint(spec, length, &index);
170 break;
171 case '-':
172 cp = nextCodePoint(spec, length, &index);
173 break;
174 }
175
176 if (cp == '#') {
177 result->alternate = true;
178 cp = nextCodePoint(spec, length, &index);
179 }
180
181 if (!fill_char_specified && cp == '0') {
182 result->fill_char = '0';
183 if (!alignment_specified) {
184 result->alignment = '=';
185 }
186 }
187
188 if ('0' <= cp && cp <= '9') {
189 word width = 0;
190 for (;;) {
191 width += cp - '0';
192 cp = nextCodePoint(spec, length, &index);
193 if ('0' > cp || cp > '9') break;
194 if (__builtin_mul_overflow(width, 10, &width)) {
195 return thread->raiseWithFmt(LayoutId::kValueError,
196 "Too many decimal digits in format string");
197 }
198 }
199 result->width = width;
200 }
201
202 if (cp == ',') {
203 result->thousands_separator = ',';
204 cp = nextCodePoint(spec, length, &index);
205 }
206 if (cp == '_') {
207 if (result->thousands_separator != 0) {
208 return thread->raiseWithFmt(LayoutId::kValueError,
209 "Cannot specify both ',' and '_'.");
210 }
211 result->thousands_separator = '_';
212 cp = nextCodePoint(spec, length, &index);
213 }
214 if (cp == ',') {
215 return thread->raiseWithFmt(LayoutId::kValueError,
216 "Cannot specify both ',' and '_'.");
217 }
218
219 if (cp == '.') {
220 cp = nextCodePoint(spec, length, &index);
221 if ('0' > cp || cp > '9') {
222 return thread->raiseWithFmt(LayoutId::kValueError,
223 "Format specifier missing precision");
224 }
225
226 word precision = 0;
227 for (;;) {
228 precision += cp - '0';
229 cp = nextCodePoint(spec, length, &index);
230 if ('0' > cp || cp > '9') break;
231 if (__builtin_mul_overflow(precision, 10, &precision)) {
232 return thread->raiseWithFmt(LayoutId::kValueError,
233 "Too many decimal digits in format string");
234 }
235 }
236 result->precision = precision;
237 }
238
239 if (cp != 0) {
240 result->type = cp;
241 // This was the last step: No need to call `nextCodePoint()` here.
242 }
243 if (index < length) {
244 return thread->raiseWithFmt(LayoutId::kValueError,
245 "Invalid format specifier");
246 }
247
248 if (result->thousands_separator) {
249 switch (result->type) {
250 case 'd':
251 case 'e':
252 case 'f':
253 case 'g':
254 case 'E':
255 case 'G':
256 case '%':
257 case 'F':
258 case '\0':
259 // These are allowed. See PEP 378.
260 break;
261 case 'b':
262 case 'o':
263 case 'x':
264 case 'X':
265 // Underscores are allowed in bin/oct/hex. See PEP 515.
266 if (result->thousands_separator == '_') {
267 break;
268 }
269 FALLTHROUGH;
270 default:
271 if (32 < result->type && result->type <= kMaxASCII) {
272 return thread->raiseWithFmt(
273 LayoutId::kValueError, "Cannot specify '%c' with '%c'.",
274 result->thousands_separator, static_cast<char>(result->type));
275 }
276 return thread->raiseWithFmt(
277 LayoutId::kValueError, "Cannot specify '%c' with '\\x%x'.",
278 result->thousands_separator, static_cast<unsigned>(result->type));
279 }
280 }
281 return NoneType::object();
282}
283
284static word putFloat(const MutableBytes& dest, const byte* buf,
285 const FormatSpec* format, const FloatWidths* widths) {
286 word at = 0;
287 RawSmallStr fill = SmallStr::fromCodePoint(format->fill_char);
288 word fill_length = fill.length();
289 for (word i = 0; i < widths->left_padding; i++, at += fill_length) {
290 dest.replaceFromWithStr(at, Str::cast(fill), fill_length);
291 }
292 switch (widths->sign) {
293 case '\0':
294 break;
295 case '-':
296 buf++;
297 FALLTHROUGH;
298 case '+':
299 case ' ':
300 dest.byteAtPut(at++, widths->sign);
301 break;
302 default:
303 UNREACHABLE("unexpected sign char %c", widths->sign);
304 }
305 for (word i = 0; i < widths->sign_padding; i++, at += fill_length) {
306 dest.replaceFromWithStr(at, Str::cast(fill), fill_length);
307 }
308 // TODO(T52759101): use thousands separator from locale
309 if (format->thousands_separator == '\0') {
310 dest.replaceFromWithAll(at, {buf, widths->grouped_digits});
311 at += widths->grouped_digits;
312 buf += widths->grouped_digits;
313 } else {
314 // TODO(T52759101): use locale for grouping
315 word prefix = widths->grouped_digits % 4;
316 dest.replaceFromWithAll(at, {buf, prefix});
317 buf += prefix;
318 for (word i = prefix; i < widths->grouped_digits; i += 4, buf += 3) {
319 dest.byteAtPut(at + i, format->thousands_separator);
320 dest.replaceFromWithAll(at + i + 1, {buf, 3});
321 }
322 at += widths->grouped_digits;
323 }
324 if (widths->has_decimal) {
325 // TODO(T52759101): use decimal from locale
326 dest.byteAtPut(at++, '.');
327 buf++;
328 }
329 dest.replaceFromWithAll(at, {buf, widths->remainder});
330 at += widths->remainder;
331 for (word i = 0; i < widths->right_padding; i++, at += fill_length) {
332 dest.replaceFromWithStr(at, Str::cast(fill), fill_length);
333 }
334 return at;
335}
336
337RawObject raiseUnknownFormatError(Thread* thread, int32_t format_code,
338 const Object& object) {
339 if (32 < format_code && format_code < kMaxASCII) {
340 return thread->raiseWithFmt(
341 LayoutId::kValueError,
342 "Unknown format code '%c' for object of type '%T'",
343 static_cast<char>(format_code), &object);
344 }
345 return thread->raiseWithFmt(
346 LayoutId::kValueError,
347 "Unknown format code '\\x%x' for object of type '%T'",
348 static_cast<unsigned>(format_code), &object);
349}
350
351RawObject formatFloat(Thread* thread, double value, FormatSpec* format) {
352 static const word default_precision = 6;
353 word precision = format->precision;
354 if (precision > INT_MAX) {
355 return thread->raiseWithFmt(LayoutId::kValueError, "precision too big");
356 }
357
358 int32_t type = format->type;
359 bool add_dot_0 = false;
360 bool add_percent = false;
361 switch (type) {
362 case '\0':
363 add_dot_0 = true;
364 type = 'r';
365 break;
366 case 'n':
367 type = 'g';
368 break;
369 case '%':
370 type = 'f';
371 value *= 100;
372 add_percent = true;
373 break;
374 }
375
376 if (precision < 0) {
377 precision = add_dot_0 ? 0 : default_precision;
378 } else if (type == 'r') {
379 type = 'g';
380 }
381
382 FormatResultKind kind;
383 unique_c_ptr<char> buf(doubleToString(value, type,
384 static_cast<int>(precision), false,
385 add_dot_0, format->alternate, &kind));
386 word length = std::strlen(buf.get());
387 if (add_percent) {
388 // Overwrite the terminating null character
389 buf.get()[length++] = '%';
390 }
391
392 Runtime* runtime = thread->runtime();
393 if (format->positive_sign == '\0' && format->width <= length &&
394 format->type != 'n' && format->thousands_separator == '\0') {
395 return runtime->newStrWithAll({reinterpret_cast<byte*>(buf.get()), length});
396 }
397
398 // TODO(T52759101): use locale for grouping, separator, and decimal point
399 FloatWidths widths;
400 word result_length = calculateFloatWidths(format, buf.get(), length, &widths);
401
402 HandleScope scope(thread);
403 MutableBytes result(&scope,
404 runtime->newMutableBytesUninitialized(result_length));
405 word written =
406 putFloat(result, reinterpret_cast<byte*>(buf.get()), format, &widths);
407 DCHECK(written == result_length, "expected to write %ld bytes, wrote %ld",
408 result_length, written);
409 return result.becomeStr();
410}
411
412RawObject formatStr(Thread* thread, const Str& str, FormatSpec* format) {
413 DCHECK(format->positive_sign == '\0', "must no have sign specified");
414 DCHECK(!format->alternate, "must not have alternative format");
415 word width = format->width;
416 word precision = format->precision;
417 if (width == -1 && precision == -1) {
418 return *str;
419 }
420
421 word char_length = str.length();
422 word codepoint_length;
423 word str_end_index;
424 if (precision >= 0) {
425 str_end_index = str.offsetByCodePoints(0, precision);
426 if (str_end_index < char_length) {
427 codepoint_length = precision;
428 } else {
429 codepoint_length = str.codePointLength();
430 }
431 } else {
432 str_end_index = char_length;
433 codepoint_length = str.codePointLength();
434 }
435
436 Runtime* runtime = thread->runtime();
437 word padding = width - codepoint_length;
438 if (padding <= 0) {
439 return strSubstr(thread, str, 0, str_end_index);
440 }
441
442 // Construct result.
443 HandleScope scope(thread);
444 Str fill_char(&scope, SmallStr::fromCodePoint(format->fill_char));
445 word fill_char_length = fill_char.length();
446 word padding_char_length = padding * fill_char_length;
447 word result_char_length = str_end_index + padding_char_length;
448 MutableBytes result(
449 &scope, runtime->newMutableBytesUninitialized(result_char_length));
450 word index = 0;
451 word early_padding;
452 if (format->alignment == '<') {
453 early_padding = 0;
454 } else if (format->alignment == '^') {
455 word half = padding / 2;
456 early_padding = half;
457 padding = half + (padding % 2);
458 } else {
459 DCHECK(format->alignment == '>' || format->alignment == '=',
460 "remaining cases must be '>' or '='");
461 early_padding = padding;
462 padding = 0;
463 }
464 for (word i = 0; i < early_padding; i++) {
465 result.replaceFromWithStr(index, *fill_char, fill_char_length);
466 index += fill_char_length;
467 }
468 result.replaceFromWithStr(index, *str, str_end_index);
469 index += str_end_index;
470 if (padding > 0) {
471 DCHECK(format->alignment == '<' || format->alignment == '^',
472 "unexpected alignment");
473 for (word i = 0; i < padding; i++) {
474 result.replaceFromWithStr(index, *fill_char, fill_char_length);
475 index += fill_char_length;
476 }
477 }
478 DCHECK(index == result_char_length, "overflow or underflow in result");
479 return result.becomeStr();
480}
481
482// Returns the quotient of a double word number and a single word.
483// Assumes the result will fit in a single uword: `dividend_high < divisor`.
484static uword dwordUDiv(uword dividend_low, uword dividend_high, uword divisor,
485 uword* remainder) {
486 // TODO(matthiasb): Future optimization idea:
487 // This whole function is a single `divq` instruction on x86_64, we could use
488 // inline assembly for it (there doesn't seem to be a builtin).
489
490 // The code is based on Hacker's Delight chapter 9-4 Unsigned Long Division.
491 DCHECK(divisor != 0, "division by zero");
492 DCHECK(dividend_high < divisor, "overflow");
493
494 // Performs some arithmetic with no more than half the bits of a `uword`.
495 int half_bits = kBitsPerWord / 2;
496 uword half_mask = (uword{1} << half_bits) - 1;
497
498 // Normalize divisor by shifting the highest bit left as much as possible.
499 static_assert(sizeof(divisor) == sizeof(long), "choose right builtin");
500 int s = __builtin_clzl(divisor);
501 uword divisor_n = divisor << s;
502 uword divisor_n_high_half = divisor_n >> half_bits;
503 uword divisor_n_low_half = divisor_n & half_mask;
504
505 // Normalize dividend by shifting it by the same amount as the divisor.
506 uword dividend_high_n =
507 (s == 0) ? dividend_high
508 : (dividend_high << s) | (dividend_low >> (kBitsPerWord - s));
509 uword dividend_low_n = dividend_low << s;
510 uword dividend_low_n_high_half = dividend_low_n >> half_bits;
511 uword dividend_low_n_low_half = dividend_low_n & half_mask;
512
513 uword quot_high_half = dividend_high_n / divisor_n_high_half;
514 uword remainder_high_half = dividend_high_n % divisor_n_high_half;
515 while (quot_high_half > half_mask ||
516 quot_high_half * divisor_n_low_half >
517 ((remainder_high_half << half_bits) | dividend_low_n_high_half)) {
518 quot_high_half--;
519 remainder_high_half += divisor_n_high_half;
520 if (remainder_high_half > half_mask) break;
521 }
522
523 uword dividend_middle =
524 ((dividend_high_n << half_bits) | dividend_low_n_high_half) -
525 quot_high_half * divisor_n;
526
527 uword quot_low_half = dividend_middle / divisor_n_high_half;
528 uword remainder_low_half = dividend_middle % divisor_n_high_half;
529 while (quot_low_half > half_mask ||
530 quot_low_half * divisor_n_low_half >
531 ((remainder_low_half << half_bits) | dividend_low_n_low_half)) {
532 quot_low_half--;
533 remainder_low_half += divisor_n_high_half;
534 if (remainder_low_half > half_mask) break;
535 }
536
537 uword result = (quot_high_half << half_bits) | quot_low_half;
538 *remainder = dividend_low - result * divisor;
539 return result;
540}
541
542// Divide a large integer formed by an array of int digits by a single digit and
543// return the remainder. `digits_in` and `digits_out` may be the same pointer
544// for in-place operation.
545static uword divIntSingleDigit(uword* digits_out, const uword* digits_in,
546 word num_digits, uword divisor) {
547 // TODO(matthiasb): Future optimization idea:
548 // Instead of dividing by a constant, multiply with a precomputed inverse
549 // (see Hackers Delight, chapter 10). The compiler doesn't catch this case
550 // for double word arithmetic as in dwordUDiv.
551 uword remainder = 0;
552 for (word i = num_digits - 1; i >= 0; i--) {
553 // Compute `remainder:digit / divisor`.
554 digits_out[i] = dwordUDiv(digits_in[i], remainder, divisor, &remainder);
555 }
556 return remainder;
557}
558
559// Return upper bound on number of decimal digits to format large int `value`.
560static word estimateNumDecimalDigits(RawLargeInt value) {
561 // Compute an upper bound on the number of decimal digits required for a
562 // number with n bits:
563 // ceil(log10(2**n - 1))
564 // We over-approximate this with:
565 // ceil(log10(2**n - 1))
566 // == ceil(log2(2**n - 1)/log2(10))
567 // <= 1 + n * (1/log2(10))
568 // <= 1 + n * 0.30102999566398114
569 // <= 1 + n * 309 / 1024
570 // This isn't off by more than 1 digit for all one binary numbers up to
571 // 1425 bits.
572 word bit_length = value.bitLength();
573 return 1 + bit_length * 309 / 1024;
574}
575
576static byte* writeLargeIntDecimalDigits(byte* buffer_end, RawLargeInt value) {
577 // Allocate space for intermediate results. We also convert a negative number
578 // to a positive number of the same magnitude here.
579 word num_digits = value.numDigits();
580 std::unique_ptr<uword[]> temp_digits(new uword[num_digits]);
581 bool negative = value.isNegative();
582 if (!negative) {
583 for (word i = 0; i < num_digits; ++i) {
584 temp_digits[i] = value.digitAt(i);
585 }
586 } else {
587 uword carry = 1;
588 for (word i = 0; i < num_digits; ++i) {
589 uword digit = value.digitAt(i);
590 carry = __builtin_uaddl_overflow(~digit, carry, &temp_digits[i]);
591 }
592 // The complement of the highest bit in a negative number must be 0 so we
593 // cannot overflow.
594 DCHECK(carry == 0, "overflow");
595 }
596 word num_temp_digits = num_digits;
597
598 // The strategy here is to divide the large integer by continually dividing it
599 // by `kUwordDigits10Pow`. `uwordToDecimal` can convert those remainders to
600 // decimal digits.
601 //
602 // TODO(matthiasb): Future optimization ideas:
603 // It seems cpythons algorithm is faster (for big numbers) in practive.
604 // Their source claims it is (Knuth TAOCP, vol 2, section 4.4, method 1b).
605 byte* start = buffer_end;
606 do {
607 uword remainder = divIntSingleDigit(temp_digits.get(), temp_digits.get(),
608 num_temp_digits, kUwordDigits10Pow);
609 byte* new_start = uwordToDecimal(remainder, start);
610
611 while (num_temp_digits > 0 && temp_digits[num_temp_digits - 1] == 0) {
612 num_temp_digits--;
613 }
614 // Produce leading zeros if this wasn't the last round.
615 if (num_temp_digits > 0) {
616 for (word i = 0, n = kUwordDigits10 - (start - new_start); i < n; i++) {
617 *--new_start = '0';
618 }
619 }
620 start = new_start;
621 } while (num_temp_digits > 0);
622 return start;
623}
624
625RawObject formatIntDecimalSimple(Thread* thread, const Int& value) {
626 if (!value.isLargeInt()) {
627 word value_word = value.asWord();
628 uword magnitude =
629 value_word >= 0 ? value_word : -static_cast<uword>(value_word);
630 byte buffer[kUwordDigits10 + 1];
631 byte* end = buffer + sizeof(buffer);
632 byte* start = uwordToDecimal(magnitude, end);
633 if (value_word < 0) *--start = '-';
634 DCHECK(start >= buffer, "buffer underflow");
635 return thread->runtime()->newStrWithAll(View<byte>(start, end - start));
636 }
637
638 RawLargeInt value_large = LargeInt::cast(*value);
639 bool is_negative = value_large.isNegative();
640 word max_chars =
641 estimateNumDecimalDigits(value_large) + (is_negative ? 1 : 0);
642 std::unique_ptr<byte[]> buffer(new byte[max_chars]);
643 byte* end = buffer.get() + max_chars;
644 byte* start = writeLargeIntDecimalDigits(end, value_large);
645 if (is_negative) {
646 *--start = '-';
647 }
648 DCHECK(start >= buffer.get(), "buffer underflow");
649 return thread->runtime()->newStrWithAll(View<byte>(start, end - start));
650}
651
652static word numBinaryDigits(const Int& value) {
653 return value.isZero() ? 1 : value.bitLength();
654}
655
656static word numHexadecimalDigits(const Int& value) {
657 return value.isZero() ? 1 : (value.bitLength() + 3) >> 2;
658}
659
660static word numOctalDigits(const Int& value) {
661 return value.isZero() ? 1 : (value.bitLength() + 2) / 3;
662}
663
664static void putBinaryDigits(Thread* thread, const MutableBytes& dest, word at,
665 const Int& value, word num_digits) {
666 static const char* const quads[16] = {
667 "0000", "0001", "0010", "0011", "0100", "0101", "0110", "0111",
668 "1000", "1001", "1010", "1011", "1100", "1101", "1110", "1111",
669 };
670
671 word idx = at + num_digits;
672 uword last_digit;
673 if (value.isLargeInt()) {
674 HandleScope scope(thread);
675 LargeInt value_large(&scope, *value);
676 word d_last = (num_digits - 1) / kBitsPerWord;
677 bool is_negative = value_large.isNegative();
678 uword carry = 1;
679 for (word d = 0; d < d_last; d++) {
680 uword digit = value_large.digitAt(d);
681 if (is_negative) {
682 digit = ~digit + carry;
683 carry = carry & (digit == 0);
684 }
685 static_assert(kBitsPerWord % 4 == 0, "bits per word divisible by 4");
686 for (word i = 0; i < kBitsPerWord / 4; i++) {
687 const char* quad = quads[digit & 0xf];
688 dest.byteAtPut(--idx, quad[3]);
689 dest.byteAtPut(--idx, quad[2]);
690 dest.byteAtPut(--idx, quad[1]);
691 dest.byteAtPut(--idx, quad[0]);
692 digit >>= 4;
693 }
694 }
695 last_digit = value_large.digitAt(d_last);
696 if (is_negative) {
697 last_digit = ~last_digit + carry;
698 }
699 } else {
700 last_digit = static_cast<uword>(std::abs(value.asWord()));
701 }
702
703 do {
704 dest.byteAtPut(--idx, '0' + (last_digit & 1));
705 last_digit >>= 1;
706 } while (last_digit != 0);
707 DCHECK(idx == at, "unexpected number of digits");
708}
709
710static void putHexadecimalDigitsImpl(Thread* thread, const MutableBytes& dest,
711 word at, const Int& value, word num_digits,
712 const byte* digits) {
713 word idx = at + num_digits;
714 uword last_digit;
715 if (value.isLargeInt()) {
716 HandleScope scope(thread);
717 LargeInt value_large(&scope, *value);
718 const word hexdigits_per_word = kBitsPerWord / kBitsPerHexDigit;
719 word d_last = (num_digits - 1) / hexdigits_per_word;
720 bool is_negative = value_large.isNegative();
721 uword carry = 1;
722 for (word d = 0; d < d_last; d++) {
723 uword digit = value_large.digitAt(d);
724 if (is_negative) {
725 digit = ~digit + carry;
726 carry = carry & (digit == 0);
727 }
728 for (word i = 0; i < hexdigits_per_word; i++) {
729 dest.byteAtPut(--idx, digits[digit & 0xf]);
730 digit >>= kBitsPerHexDigit;
731 }
732 }
733 last_digit = value_large.digitAt(d_last);
734 if (is_negative) {
735 last_digit = ~last_digit + carry;
736 }
737 } else {
738 last_digit = static_cast<uword>(std::abs(value.asWord()));
739 }
740
741 do {
742 dest.byteAtPut(--idx, digits[last_digit & 0xf]);
743 last_digit >>= kBitsPerHexDigit;
744 } while (last_digit != 0);
745 DCHECK(idx == at, "unexpected number of digits");
746}
747
748static void putHexadecimalLowerCaseDigits(Thread* thread,
749 const MutableBytes& dest, word at,
750 const Int& value, word num_digits) {
751 putHexadecimalDigitsImpl(thread, dest, at, value, num_digits,
752 kLowerCaseHexDigitArray);
753}
754
755static void putHexadecimalUpperCaseDigits(Thread* thread,
756 const MutableBytes& dest, word at,
757 const Int& value, word num_digits) {
758 putHexadecimalDigitsImpl(thread, dest, at, value, num_digits,
759 kUpperCaseHexDigitArray);
760}
761
762static void putOctalDigits(Thread* thread, const MutableBytes& dest, word at,
763 const Int& value, word num_result_digits) {
764 word idx = at + num_result_digits;
765 if (value.isLargeInt()) {
766 HandleScope scope(thread);
767 LargeInt value_large(&scope, *value);
768 bool is_negative = value_large.isNegative();
769
770 // We have to negate negative numbers which produces carry between
771 // digits.
772 uword negate_carry = 1;
773 // We have carry over when the three bits for an octal digits are between
774 // large-int digits (this has nothing to do with `negate_carry`).
775 uword prev_digit_carry = 0;
776 word prev_digit_carry_num_bits = 0;
777 for (word d = 0, num_digits = value_large.numDigits(); d < num_digits;
778 d++) {
779 uword digit = value_large.digitAt(d);
780 if (is_negative) {
781 digit = ~digit + negate_carry;
782 negate_carry &= (digit == 0);
783 }
784
785 word num_oct_digits = kBitsPerWord / kBitsPerOctDigit;
786 word next_carry_num_bits = kBitsPerWord % kBitsPerOctDigit;
787 if (prev_digit_carry_num_bits != 0) {
788 word combined = digit << prev_digit_carry_num_bits | prev_digit_carry;
789 dest.byteAtPut(--idx, '0' + (combined & 7));
790 digit >>= (kBitsPerOctDigit - prev_digit_carry_num_bits);
791 if (idx == at) {
792 DCHECK(d == num_digits - 1 && digit == 0, "rest must be zero");
793 break;
794 }
795 num_oct_digits--;
796
797 next_carry_num_bits += prev_digit_carry_num_bits;
798 if (next_carry_num_bits == kBitsPerOctDigit) {
799 num_oct_digits++;
800 next_carry_num_bits = 0;
801 }
802 }
803 for (word i = 0; i < num_oct_digits; i++) {
804 dest.byteAtPut(--idx, '0' + (digit & 7));
805 digit >>= kBitsPerOctDigit;
806 // Stop before we start outputting leading zeros.
807 if (idx == at) {
808 DCHECK(d == num_digits - 1 && digit == 0, "rest must be zero");
809 break;
810 }
811 }
812 DCHECK(digit < static_cast<uword>(1 << next_carry_num_bits),
813 "too many bits left");
814 prev_digit_carry_num_bits = next_carry_num_bits;
815 prev_digit_carry = digit;
816 }
817 // Output leftover carry bits.
818 if (idx > at) {
819 DCHECK(prev_digit_carry_num_bits > 0, "should have carry bits");
820 dest.byteAtPut(--idx, '0' + prev_digit_carry);
821 }
822 } else {
823 uword value_uword = static_cast<uword>(std::abs(value.asWord()));
824 do {
825 dest.byteAtPut(--idx, '0' + (value_uword & 7));
826 value_uword >>= kBitsPerOctDigit;
827 } while (value_uword != 0);
828 }
829 DCHECK(idx == at, "unexpected number of digits");
830}
831
832using numDigitsFunc = word (*)(const Int&);
833using putDigitsFunc = void (*)(Thread*, const MutableBytes&, word, const Int&,
834 word);
835
836static inline RawObject formatIntSimpleImpl(Thread* thread, const Int& value,
837 char format_prefix,
838 numDigitsFunc num_digits,
839 putDigitsFunc put_digits) {
840 word result_n_digits = num_digits(value);
841 word result_size = 2 + (value.isNegative() ? 1 : 0) + result_n_digits;
842
843 HandleScope scope(thread);
844 Runtime* runtime = thread->runtime();
845 MutableBytes result(&scope,
846 runtime->newMutableBytesUninitialized(result_size));
847 word index = 0;
848 if (value.isNegative()) {
849 result.byteAtPut(index++, '-');
850 }
851 result.byteAtPut(index++, '0');
852 result.byteAtPut(index++, format_prefix);
853 put_digits(thread, result, index, value, result_n_digits);
854 return result.becomeStr();
855}
856
857RawObject formatIntBinarySimple(Thread* thread, const Int& value) {
858 return formatIntSimpleImpl(thread, value, 'b', numBinaryDigits,
859 putBinaryDigits);
860}
861
862RawObject formatIntHexadecimalSimple(Thread* thread, const Int& value) {
863 return formatIntSimpleImpl(thread, value, 'x', numHexadecimalDigits,
864 putHexadecimalLowerCaseDigits);
865}
866
867RawObject formatIntOctalSimple(Thread* thread, const Int& value) {
868 return formatIntSimpleImpl(thread, value, 'o', numOctalDigits,
869 putOctalDigits);
870}
871
872RawObject formatIntDecimal(Thread* thread, const Int& value,
873 FormatSpec* format) {
874 if (format->precision >= 0) {
875 return thread->raiseWithFmt(
876 LayoutId::kValueError,
877 "Precision not allowed in integer format specifier");
878 }
879 if (format->thousands_separator != 0) {
880 UNIMPLEMENTED("thousands separator");
881 }
882
883 // We cannot easily predict how many digits are necessary. So we overestimate
884 // and printing into a temporary buffer.
885 byte fixed_buffer[kUwordDigits10];
886 byte* buffer;
887 byte* digits;
888 word result_n_digits;
889 if (value.isLargeInt()) {
890 word max_chars = estimateNumDecimalDigits(LargeInt::cast(*value));
891 buffer = new byte[max_chars];
892 byte* end = buffer + max_chars;
893 digits = writeLargeIntDecimalDigits(end, LargeInt::cast(*value));
894 result_n_digits = end - digits;
895 } else {
896 buffer = fixed_buffer;
897 word value_word = value.asWord();
898 uword magnitude =
899 value_word >= 0 ? value_word : -static_cast<uword>(value_word);
900 byte* end = buffer + ARRAYSIZE(fixed_buffer);
901 digits = uwordToDecimal(magnitude, end);
902 result_n_digits = end - digits;
903 }
904
905 bool is_negative = value.isNegative();
906 word number_chars =
907 (is_negative || format->positive_sign != '\0') + result_n_digits;
908
909 HandleScope scope(thread);
910 Str fill_cp(&scope, SmallStr::fromCodePoint(format->fill_char));
911 word fill_cp_chars = fill_cp.length();
912 word result_chars = number_chars;
913
914 word padding = format->width - number_chars;
915 word left_padding = 0;
916 word sign_padding = 0;
917 word right_padding = 0;
918 if (padding > 0) {
919 result_chars += padding * fill_cp_chars;
920 switch (format->alignment) {
921 case '<':
922 right_padding = padding;
923 break;
924 case '=':
925 sign_padding = padding;
926 break;
927 case '>':
928 left_padding = padding;
929 break;
930 case '^':
931 left_padding = padding >> 1;
932 right_padding = padding - left_padding;
933 break;
934 default:
935 UNREACHABLE("unexpected alignment %c", format->alignment);
936 }
937 }
938
939 word index = 0;
940 MutableBytes result(
941 &scope, thread->runtime()->newMutableBytesUninitialized(result_chars));
942 for (word i = 0; i < left_padding; i++) {
943 result.replaceFromWithStr(index, *fill_cp, fill_cp_chars);
944 index += fill_cp_chars;
945 }
946 if (is_negative) {
947 result.byteAtPut(index++, '-');
948 } else if (format->positive_sign != '\0') {
949 result.byteAtPut(index++, format->positive_sign);
950 }
951 for (word i = 0; i < sign_padding; i++) {
952 result.replaceFromWithStr(index, *fill_cp, fill_cp_chars);
953 index += fill_cp_chars;
954 }
955 result.replaceFromWithAll(index, {digits, result_n_digits});
956 index += result_n_digits;
957 for (word i = 0; i < right_padding; i++) {
958 result.replaceFromWithStr(index, *fill_cp, fill_cp_chars);
959 index += fill_cp_chars;
960 }
961 DCHECK(index == result.length(), "wrong number of bytes written");
962
963 if (value.isLargeInt()) {
964 delete[] buffer;
965 }
966 return result.becomeStr();
967}
968
969static inline RawObject formatIntImpl(Thread* thread, const Int& value,
970 FormatSpec* format, char format_prefix,
971 numDigitsFunc num_digits,
972 putDigitsFunc put_digits) {
973 if (format->precision >= 0) {
974 return thread->raiseWithFmt(
975 LayoutId::kValueError,
976 "Precision not allowed in integer format specifier");
977 }
978 if (format->thousands_separator != 0) {
979 UNIMPLEMENTED("thousands separator");
980 }
981
982 bool is_negative = value.isNegative();
983 word result_n_digits = num_digits(value);
984 word number_chars = (is_negative || format->positive_sign != '\0') +
985 (format->alternate ? 2 : 0) + result_n_digits;
986
987 HandleScope scope(thread);
988 Str fill_cp(&scope, SmallStr::fromCodePoint(format->fill_char));
989 word fill_cp_chars = fill_cp.length();
990 word result_chars = number_chars;
991
992 word padding = format->width - number_chars;
993 word left_padding = 0;
994 word sign_padding = 0;
995 word right_padding = 0;
996 if (padding > 0) {
997 result_chars += padding * fill_cp_chars;
998 switch (format->alignment) {
999 case '<':
1000 right_padding = padding;
1001 break;
1002 case '=':
1003 sign_padding = padding;
1004 break;
1005 case '>':
1006 left_padding = padding;
1007 break;
1008 case '^':
1009 left_padding = padding >> 1;
1010 right_padding = padding - left_padding;
1011 break;
1012 default:
1013 UNREACHABLE("unexpected alignment %c", format->alignment);
1014 }
1015 }
1016
1017 word index = 0;
1018 MutableBytes result(
1019 &scope, thread->runtime()->newMutableBytesUninitialized(result_chars));
1020 for (word i = 0; i < left_padding; i++) {
1021 result.replaceFromWithStr(index, *fill_cp, fill_cp_chars);
1022 index += fill_cp_chars;
1023 }
1024 if (is_negative) {
1025 result.byteAtPut(index++, '-');
1026 } else if (format->positive_sign != '\0') {
1027 result.byteAtPut(index++, format->positive_sign);
1028 }
1029 if (format->alternate) {
1030 result.byteAtPut(index++, '0');
1031 result.byteAtPut(index++, format_prefix);
1032 }
1033 for (word i = 0; i < sign_padding; i++) {
1034 result.replaceFromWithStr(index, *fill_cp, fill_cp_chars);
1035 index += fill_cp_chars;
1036 }
1037 put_digits(thread, result, index, value, result_n_digits);
1038 index += result_n_digits;
1039
1040 for (word i = 0; i < right_padding; i++) {
1041 result.replaceFromWithStr(index, *fill_cp, fill_cp_chars);
1042 index += fill_cp_chars;
1043 }
1044 DCHECK(index == result.length(), "wrong number of bytes written");
1045 return result.becomeStr();
1046}
1047
1048RawObject formatIntBinary(Thread* thread, const Int& value,
1049 FormatSpec* format) {
1050 return formatIntImpl(thread, value, format, 'b', numBinaryDigits,
1051 putBinaryDigits);
1052}
1053
1054RawObject formatIntHexadecimalLowerCase(Thread* thread, const Int& value,
1055 FormatSpec* format) {
1056 return formatIntImpl(thread, value, format, 'x', numHexadecimalDigits,
1057 putHexadecimalLowerCaseDigits);
1058}
1059
1060RawObject formatIntHexadecimalUpperCase(Thread* thread, const Int& value,
1061 FormatSpec* format) {
1062 return formatIntImpl(thread, value, format, 'X', numHexadecimalDigits,
1063 putHexadecimalUpperCaseDigits);
1064}
1065
1066RawObject formatIntOctal(Thread* thread, const Int& value, FormatSpec* format) {
1067 return formatIntImpl(thread, value, format, 'o', numOctalDigits,
1068 putOctalDigits);
1069}
1070
1071RawObject formatDoubleHexadecimalSimple(Runtime* runtime, double value) {
1072 const int mantissa_hex_digits = (kDoubleMantissaBits / 4) + 1;
1073 const int exp_bits = kBitsPerDouble - kDoubleMantissaBits - 1;
1074 const int max_exp = 1 << (exp_bits - 1);
1075 const int min_exp = -(1 << (exp_bits - 1)) + 1;
1076
1077 bool is_negative;
1078 int exponent;
1079 int64_t mantissa;
1080
1081 decodeDouble(value, &is_negative, &exponent, &mantissa);
1082 if (exponent == max_exp) {
1083 if (mantissa == 0) {
1084 return is_negative ? runtime->newStrFromCStr("-inf")
1085 : runtime->newStrFromCStr("inf");
1086 }
1087 return runtime->newStrFromCStr("nan");
1088 }
1089
1090 const bool is_subnormal = exponent == min_exp;
1091 if (is_subnormal) {
1092 if (mantissa != 0) {
1093 exponent += 1;
1094 } else {
1095 return is_negative ? runtime->newStrFromCStr("-0x0.0p+0")
1096 : runtime->newStrFromCStr("0x0.0p+0");
1097 }
1098 }
1099
1100 int exponent_sign;
1101 if (exponent < 0) {
1102 exponent_sign = '-';
1103 exponent = -exponent;
1104 } else {
1105 exponent_sign = '+';
1106 }
1107
1108 // Output buffer_size mantissaHexDigits + 1 char for '-', 2 chars for '0x', 2
1109 // chars for '1.', 1 char for 'p', 1 char for exponent sign '+' or '-', and 4
1110 // chars for the exponent.
1111 const int buffer_size = mantissa_hex_digits + 11;
1112 char output[buffer_size];
1113 byte* end = reinterpret_cast<byte*>(output) + buffer_size;
1114 byte* poutput = uwordToDecimal(exponent, end);
1115
1116 *--poutput = exponent_sign;
1117 *--poutput = 'p';
1118 for (int k = 0; k < kDoubleMantissaBits; k += 4) {
1119 *--poutput = lowerCaseHexDigit((mantissa >> k) & 0xf);
1120 }
1121
1122 *--poutput = '.';
1123 *--poutput = is_subnormal ? '0' : '1';
1124 *--poutput = 'x';
1125 *--poutput = '0';
1126 if (is_negative) {
1127 *--poutput = '-';
1128 }
1129 return runtime->newStrWithAll(View<byte>(poutput, end - poutput));
1130}
1131
1132} // namespace py