this repo has no description
1// Copyright (c) Facebook, Inc. and its affiliates. (http://www.facebook.com)
2#include "objects.h"
3
4#include <cstring>
5
6#include "bytes-builtins.h"
7#include "byteslike.h"
8#include "frame.h"
9#include "runtime.h"
10#include "thread.h"
11#include "unicode.h"
12#include "view.h"
13
14namespace py {
15
16static inline word offset(const byte* data, word len, word index, word count) {
17 if (count >= 0) {
18 while (count-- && index < len) {
19 index += UTF8::numChars(data[index]);
20 }
21 return Utils::minimum(index, len);
22 }
23 while (count < 0) {
24 index--;
25 if (index < 0) return -1;
26 if (UTF8::isLeadByte(data[index])) count++;
27 }
28 return index;
29}
30
31static inline int32_t decodeCodePoint(const byte* data, word src_length,
32 word index, word* char_length) {
33 DCHECK_INDEX(index, src_length);
34 byte b0 = data[index];
35 if (b0 <= kMaxASCII) {
36 *char_length = 1;
37 return b0;
38 }
39 DCHECK_INDEX(index + 1, src_length);
40 byte b1 = data[index + 1] & byte{0x3F};
41 // 0b110xxxxx begins a sequence with one continuation byte.
42 if (b0 < 0xE0) {
43 DCHECK(b0 >= 0xC, "unexpected continuation byte");
44 *char_length = 2;
45 return ((b0 & 0x1F) << 6) | b1;
46 }
47 DCHECK_INDEX(index + 2, src_length);
48 byte b2 = data[index + 2] & byte{0x3F};
49 // 0b1110xxxx starts a sequence with two continuation bytes.
50 if (b0 < 0xF0) {
51 *char_length = 3;
52 return ((b0 & 0xF) << 12) | (b1 << 6) | b2;
53 }
54 // 0b11110xxx starts a sequence with three continuation bytes.
55 DCHECK((b0 & 0xF8) == 0xF0, "invalid code unit");
56 DCHECK_INDEX(index + 2, src_length);
57 byte b3 = data[index + 3] & byte{0x3F};
58 *char_length = 4;
59 return ((b0 & 0x7) << 18) | (b1 << 12) | (b2 << 6) | b3;
60}
61
62// RawSmallData
63
64int32_t RawSmallData::codePointAt(word index, word* char_length) const {
65 const byte* data = smallDataData(this);
66 return decodeCodePoint(data, length(), index, char_length);
67}
68
69word RawSmallData::codePointLength() const {
70 uword block = raw() >> kBitsPerByte;
71 uword mask_0 = ~uword{0} / 0xFF; // 0x010101...
72 uword mask_7 = mask_0 << 7; // 0x808080...
73 block = ((block & mask_7) >> 7) & ((~block) >> 6);
74 // TODO(cshapiro): evaluate using popcount instead of multiplication
75 word num_trailing = (block * mask_0) >> ((kWordSize - 1) * kBitsPerByte);
76 return length() - num_trailing;
77}
78
79word RawSmallData::findByte(byte value, word start, word length) const {
80 DCHECK_BOUND(start, this->length());
81 DCHECK_BOUND(start + length, this->length());
82 for (word i = 0; i < length; i++) {
83 if (byteAt(start + i) == value) return start + i;
84 }
85 return -1;
86}
87
88static uword hasZeroByte(uword value) {
89 uword mask_0 = ~uword{0} / 0xFF; // 0x010101...
90 uword mask_7 = mask_0 << 7; // 0x808080...
91 return (value - mask_0) & ~value & mask_7;
92}
93
94bool RawSmallData::includesByte(byte b) const {
95 DCHECK(b != 0, "Due to padding bytes, cannot check for 0.");
96 uword block = raw() >> kBitsPerByte;
97 uword surrogate_mask = (~uword{0} / 0xFF) * b;
98 return hasZeroByte(block ^ surrogate_mask);
99}
100
101bool RawSmallData::isASCII() const {
102 uword block = raw() >> kBitsPerByte;
103 uword non_ascii_mask = (~uword{0} / 0xFF) << (kBitsPerByte - 1);
104 return (block & non_ascii_mask) == 0;
105}
106
107word RawSmallData::offsetByCodePoints(word index, word count) const {
108 const byte* data = smallDataData(this);
109 return offset(data, length(), index, count);
110}
111
112char* RawSmallData::toCStr() const {
113 word length = this->length();
114 byte* result = static_cast<byte*>(std::malloc(length + 1));
115 CHECK(result != nullptr, "out of memory");
116 copyTo(result, length);
117 result[length] = '\0';
118 return reinterpret_cast<char*>(result);
119}
120
121// RawSmallBytes
122
123RawObject RawSmallBytes::becomeStr() const {
124 return RawObject{raw() ^ kSmallBytesTag ^ kSmallStrTag};
125}
126
127RawSmallBytes RawSmallBytes::fromBytes(View<byte> data) {
128 word length = data.length();
129 DCHECK_BOUND(length, kMaxLength);
130 uword result = 0;
131 for (word i = length - 1; i >= 0; i--) {
132 result = (result << kBitsPerByte) | data.get(i);
133 }
134 return RawSmallBytes(result << kBitsPerByte | length << kImmediateTagBits |
135 kSmallBytesTag);
136}
137
138// RawSmallStr
139
140RawObject RawSmallStr::becomeBytes() const {
141 return RawObject{raw() ^ kSmallStrTag ^ kSmallBytesTag};
142}
143
144word RawSmallStr::compare(RawObject that) const {
145 word length = this->length();
146 for (word i = 0; i < length; i++) {
147 word result = byteAt(i) - RawLargeStr::cast(that).byteAt(i);
148 if (result != 0) return result;
149 }
150 return -1;
151}
152
153word RawSmallStr::equalsCStr(const char* c_str) const {
154 const char* cp = c_str;
155 const word len = length();
156 for (word i = 0; i < len; i++, cp++) {
157 byte ch = static_cast<byte>(*cp);
158 if (ch == '\0' || ch != byteAt(i)) {
159 return false;
160 }
161 }
162 return *cp == '\0';
163}
164
165RawSmallStr RawSmallStr::fromBytes(View<byte> data) {
166 word length = data.length();
167 DCHECK_BOUND(length, kMaxLength);
168 uword result = 0;
169 for (word i = length - 1; i >= 0; i--) {
170 result = (result << kBitsPerByte) | data.get(i);
171 }
172 return RawSmallStr(result << kBitsPerByte | length << kImmediateTagBits |
173 kSmallStrTag);
174}
175
176RawSmallStr RawSmallStr::fromCodePoint(int32_t code_point) {
177 DCHECK_BOUND(code_point, kMaxUnicode);
178 uword cp = static_cast<uword>(code_point);
179 // 0xxxxxxx
180 if (cp <= kMaxASCII) { // 01111111
181 return RawSmallStr(cp << 8 | (1 << kImmediateTagBits) | kSmallStrTag);
182 }
183 uword result = cp & 0x3F; // 00111111
184 cp >>= 6;
185 result <<= kBitsPerByte;
186 // 110xxxxx 10xxxxxx
187 if (cp <= 0x1F) { // 00011111
188 result |= cp;
189 result |= 0x80C0; // 10xxxxxx 110xxxxx
190 result <<= kBitsPerByte;
191 return RawSmallStr(result | (2 << kImmediateTagBits) | kSmallStrTag);
192 }
193
194 result |= cp & 0x3F; // 00111111
195 cp >>= 6;
196 result <<= kBitsPerByte;
197 // 1110xxxx 10xxxxxx 10xxxxxx
198 if (cp <= 0xF) { // 00001111
199 result |= cp;
200 result |= 0x8080E0; // 10xxxxxx 10xxxxxx 1110xxxx
201 result <<= kBitsPerByte;
202 return RawSmallStr(result | (3 << kImmediateTagBits) | kSmallStrTag);
203 }
204 result |= cp & 0x3F; // 00111111
205 cp >>= 6;
206 result <<= kBitsPerByte;
207 // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
208 result |= cp;
209 result |= 0x808080F0; // 10xxxxxx 10xxxxxx 10xxxxxx 11110xxx
210 result <<= kBitsPerByte;
211 return RawSmallStr(result | (4 << kImmediateTagBits) | kSmallStrTag);
212}
213
214RawSmallStr RawSmallStr::fromCStr(const char* value) {
215 word len = std::strlen(value);
216 return fromBytes(View<byte>(reinterpret_cast<const byte*>(value), len));
217}
218
219bool RawSmallStr::includesByte(byte b) const {
220 DCHECK(b != 0, "Due to padding bytes, cannot check for 0.");
221 uword block = raw() >> kBitsPerByte;
222 uword surrogate_mask = (~uword{0} / 0xFF) * b;
223 return hasZeroByte(block ^ surrogate_mask);
224}
225
226bool RawSmallStr::includes(RawObject that) const {
227 if (!that.isSmallStr()) {
228 return false;
229 }
230 word haystack_len = length();
231 word needle_len = SmallStr::cast(that).length();
232 word diff = haystack_len - needle_len;
233 if (diff < 0) {
234 return false;
235 }
236 uword haystack = raw() >> kBitsPerByte;
237 uword needle = that.raw() >> kBitsPerByte;
238 uword mask = ~0UL >> ((kWordSize - needle_len) * kBitsPerByte);
239 for (word i = 0; i <= diff; i++) {
240 if ((haystack & mask) == needle) {
241 return true;
242 }
243 haystack >>= kBitsPerByte;
244 }
245 return false;
246}
247
248word RawSmallStr::occurrencesOf(RawObject that) const {
249 DCHECK(that.isStr(), "must be searching for a Str object");
250 if (!that.isSmallStr()) {
251 return 0;
252 }
253 word haystack_len = length();
254 word needle_len = SmallStr::cast(that).length();
255 DCHECK(needle_len >= 0, "needle length must be non-negative");
256 if (needle_len == 0) {
257 return 0;
258 }
259 word diff = haystack_len - needle_len;
260 uword needle = that.raw() >> kBitsPerByte;
261 uword haystack = raw() >> kBitsPerByte;
262 uword mask = (1 << (needle_len * kBitsPerByte)) - 1;
263 word result = 0;
264 for (word i = 0; i <= diff; i++) {
265 if ((haystack & mask) == needle) {
266 result++;
267 i += needle_len - 1;
268 haystack >>= (kBitsPerByte * (needle_len - 1));
269 }
270 haystack >>= kBitsPerByte;
271 }
272 return result;
273}
274
275// RawSmallInt
276
277const word RawSmallInt::kMinValue;
278const word RawSmallInt::kMaxValue;
279
280// RawBytearray
281
282word RawBytearray::compare(RawBytes that, word that_len) {
283 DCHECK_BOUND(that_len, that.length());
284 word this_len = this->numItems();
285 word len = Utils::minimum(this_len, that_len);
286 for (word i = 0; i < len; i++) {
287 word diff = this->byteAt(i) - that.byteAt(i);
288 if (diff != 0) return diff;
289 }
290 return this_len - that_len;
291}
292
293void RawBytearray::downsize(word new_length) const {
294 word original_length = numItems();
295 DCHECK_BOUND(new_length, original_length);
296 if (original_length == 0) return;
297 byte* dst = reinterpret_cast<byte*>(RawMutableBytes::cast(items()).address());
298 std::memset(dst + new_length, 0, original_length - new_length);
299 setNumItems(new_length);
300}
301
302void RawBytearray::replaceFromWith(word dst_start, RawBytearray src,
303 word count) const {
304 DCHECK_BOUND(dst_start + count, numItems());
305 MutableBytes::cast(items()).replaceFromWithBytes(
306 dst_start, Bytes::cast(src.items()), count);
307}
308
309void RawBytearray::replaceFromWithStartAt(word dst_start, RawBytearray src,
310 word count, word src_start) const {
311 DCHECK_BOUND(dst_start + count, numItems());
312 DCHECK_BOUND(src_start + count, src.numItems());
313 MutableBytes::cast(items()).replaceFromWithBytesStartAt(
314 dst_start, Bytes::cast(src.items()), count, src_start);
315}
316
317// RawBytes
318
319word RawBytes::compare(RawBytes that) const {
320 word this_len = this->length();
321 word that_len = that.length();
322 word len = Utils::minimum(this_len, that_len);
323 for (word i = 0; i < len; i++) {
324 word diff = this->byteAt(i) - that.byteAt(i);
325 if (diff != 0) return diff;
326 }
327 return this_len - that_len;
328}
329
330// RawCode
331
332word RawCode::offsetToLineNum(word offset) const {
333 offset /= kCodeUnitScale;
334 // See https://github.com/python/cpython/blob/master/Objects/lnotab_notes.txt
335 // for details about the line number table format
336 RawBytes table = Bytes::cast(lnotab());
337 word line = firstlineno();
338 word cur_offset = 0;
339 for (word i = 0; i < table.length(); i += 2) {
340 cur_offset += table.byteAt(i);
341 if (cur_offset > offset) {
342 break;
343 }
344 line += static_cast<int8_t>(table.byteAt(i + 1));
345 }
346 return line;
347}
348
349// RawDataArray
350
351static inline const byte* dataArrayData(RawDataArray obj) {
352 return reinterpret_cast<const byte*>(obj.address());
353}
354
355int32_t RawDataArray::codePointAt(word index, word* char_length) const {
356 const byte* data = dataArrayData(*this);
357 return decodeCodePoint(data, length(), index, char_length);
358}
359
360word RawDataArray::codePointLength() const {
361 // This is a vectorized loop for processing code units in groups the size of a
362 // machine word. The garbage collector ensures the following invariants that
363 // simplify the algorithm, eliminating the need for a scalar pre-loop or a
364 // scalar-post loop:
365 //
366 // 1) The base address of instance data is always word aligned
367 // 2) The allocation sizes are always rounded-up to the next word
368 // 3) Unused bytes at the end of an allocation are always zero
369 //
370 // This algorithm works by counting the number of UTF-8 trailing bytes found
371 // in the string from the total number of byte in the string. Because the
372 // unused bytes at the end of a string are zero they are conveniently ignored
373 // by the counting.
374 word length = this->length();
375 word size_in_words = (length + kWordSize - 1) >> kWordSizeLog2;
376 word result = length;
377 const uword* data = reinterpret_cast<const uword*>(dataArrayData(*this));
378 uword mask_0 = ~uword{0} / 0xFF; // 0x010101...
379 uword mask_7 = mask_0 << 7; // 0x808080...
380 for (word i = 0; i < size_in_words; i++) {
381 // Read an entire word of code units.
382 uword block = data[i];
383 // The bit pattern 0b10xxxxxx identifies a UTF-8 trailing byte. For each
384 // byte in a word, we isolate bit 6 and 7 and logically and the complement
385 // of bit 6 with bit 7. That leaves one set bit for each trailing byte in a
386 // word.
387 block = ((block & mask_7) >> 7) & ((~block) >> 6);
388 // Count the number of bits leftover in the word. That is equal to the
389 // number of trailing bytes.
390 // TODO(cshapiro): evaluate using popcount instead of multiplication
391 word num_trailing = (block * mask_0) >> ((kWordSize - 1) * kBitsPerByte);
392 // Finally, subtract the number of trailing bytes from the number of bytes
393 // in the string leaving just the number of ASCII code points and UTF-8
394 // leading bytes in the count.
395 result -= num_trailing;
396 }
397 return result;
398}
399
400word RawDataArray::compare(RawDataArray that) const {
401 word this_length = length();
402 word that_length = that.length();
403 word length = Utils::minimum(this_length, that_length);
404 const void* s1 = reinterpret_cast<const void*>(address());
405 const void* s2 = reinterpret_cast<const void*>(that.address());
406 word result = std::memcmp(s1, s2, length);
407 return result != 0 ? result : this_length - that_length;
408}
409
410bool RawDataArray::equals(RawDataArray that) const {
411 word len = this->length();
412 if (len != that.length()) {
413 return false;
414 }
415 const void* s1 = reinterpret_cast<const void*>(address());
416 const void* s2 = reinterpret_cast<const void*>(that.address());
417 return std::memcmp(s1, s2, len) == 0;
418}
419
420bool RawDataArray::equalsBytes(View<byte> bytes) const {
421 word length = this->length();
422 if (bytes.length() != length) {
423 return false;
424 }
425 return std::memcmp(dataArrayData(*this), bytes.data(), length) == 0;
426}
427
428bool RawDataArray::equalsCStr(const char* c_str) const {
429 const char* cp = c_str;
430 const word len = length();
431 for (word i = 0; i < len; i++, cp++) {
432 byte ch = static_cast<byte>(*cp);
433 if (ch == '\0' || ch != byteAt(i)) {
434 return false;
435 }
436 }
437 return *cp == '\0';
438}
439
440word RawDataArray::findByte(byte value, word start, word length) const {
441 DCHECK_BOUND(start, this->length());
442 DCHECK_BOUND(start + length, this->length());
443 word result =
444 Utils::memoryFindChar(dataArrayData(*this) + start, length, value);
445 if (result != -1) result += start;
446 return result;
447}
448
449bool RawDataArray::includesByte(byte b) const {
450 DCHECK(b != 0, "Due to padding bytes, cannot check for 0.");
451 word length = this->length();
452 word size_in_words = (length + kWordSize - 1) >> kWordSizeLog2;
453 const uword* data = reinterpret_cast<const uword*>(dataArrayData(*this));
454 uword mask = (~uword{0} / 0xFF) * b;
455 for (word i = 0; i < size_in_words; i++) {
456 uword block = data[i];
457 if (hasZeroByte(block ^ mask)) {
458 return true;
459 }
460 }
461 return false;
462}
463
464bool RawDataArray::isASCII() const {
465 // Depends on invariants specified in RawLargeStr::codePointLength
466 word length = this->length();
467 word size_in_words = (length + kWordSize - 1) >> kWordSizeLog2;
468 const uword* data = reinterpret_cast<const uword*>(dataArrayData(*this));
469 uword non_ascii_mask = (~uword{0} / 0xFF) << (kBitsPerByte - 1);
470 for (word i = 0; i < size_in_words; i++) {
471 // Read an entire word of code units.
472 uword block = data[i];
473 if ((block & non_ascii_mask) != 0) return false;
474 }
475 return true;
476}
477
478word RawDataArray::offsetByCodePoints(word index, word count) const {
479 const byte* data = dataArrayData(*this);
480 return offset(data, length(), index, count);
481}
482
483char* RawDataArray::toCStr() const {
484 word length = this->length();
485 byte* result = static_cast<byte*>(std::malloc(length + 1));
486 CHECK(result != nullptr, "out of memory");
487 copyTo(result, length);
488 result[length] = '\0';
489 return reinterpret_cast<char*>(result);
490}
491
492// RawLargeBytes
493
494RawObject RawLargeBytes::becomeStr() const {
495 DCHECK(bytesIsValidStr(RawBytes::cast(*this)), "must contain valid utf-8");
496 setHeader(header().withLayoutId(LayoutId::kLargeStr));
497 return *this;
498}
499
500// RawLargeStr
501
502static bool includes1(const byte* haystack, word haystack_len, byte needle) {
503 const uword* p = reinterpret_cast<const uword*>(haystack);
504 uword mask = ((~0UL / 255) * needle);
505 while (haystack_len >= kWordSize) {
506 if (hasZeroByte(*p ^ mask)) {
507 return true;
508 }
509 haystack_len -= kWordSize;
510 p++;
511 }
512 if (haystack_len != 0) {
513 word shift = (kWordSize - haystack_len) * kBitsPerByte;
514 return (hasZeroByte(*p ^ mask) << shift) != 0;
515 }
516 return false;
517}
518
519bool RawLargeStr::includes(RawObject that) const {
520 enum { kMask = kBitsPerWord - 1 };
521
522 if (that == Str::empty()) {
523 return true;
524 }
525
526 word haystack_len = length();
527 word needle_len = Str::cast(that).length();
528
529 word diff = haystack_len - needle_len;
530 if (diff < 0) {
531 return false;
532 }
533
534 const byte* haystack = dataArrayData(*this);
535
536 if (needle_len == 1) {
537 byte ch = SmallStr::cast(that).byteAt(0);
538 return includes1(haystack, haystack_len, ch);
539 }
540
541 byte buffer[SmallStr::kMaxLength];
542 const byte* needle;
543 if (that.isSmallStr()) {
544 SmallStr::cast(that).copyTo(buffer, needle_len);
545 needle = buffer;
546 } else {
547 needle = dataArrayData(LargeStr::cast(that));
548 }
549
550 word needle_last = needle_len - 1;
551 word skip = needle_last - 1;
552 uword delta1 = 0;
553 for (word i = 0; i < needle_last; i++) {
554 delta1 |= (1 << (needle[i] & kMask));
555 if (needle[i] == needle[needle_last]) {
556 skip = needle_last - i - 1;
557 }
558 }
559 delta1 |= (1 << (needle[needle_last] & kMask));
560 for (word i = 0; i <= diff; i++) {
561 if (haystack[i + needle_last] == needle[needle_last]) {
562 word j = 0;
563 for (; j < needle_last; j++) {
564 if (haystack[i + j] != needle[j]) {
565 break;
566 }
567 }
568 if (j == needle_last) {
569 return true;
570 }
571 if ((delta1 & (1 << (haystack[i + needle_len] & kMask))) == 0) {
572 i += needle_len;
573 } else {
574 i += skip;
575 }
576 } else {
577 if ((delta1 & (1 << (haystack[i + needle_len] & kMask))) == 0) {
578 i += needle_len;
579 }
580 }
581 }
582 return false;
583}
584
585word RawLargeStr::occurrencesOf(RawObject that) const {
586 DCHECK(that.isStr(), "must be searching for a Str object");
587 const byte* needle;
588 word needle_len;
589 byte buffer[SmallStr::kMaxLength];
590 if (that.isSmallStr()) {
591 RawSmallStr str = RawSmallStr::cast(that);
592 needle_len = str.length();
593 str.copyTo(buffer, needle_len);
594 needle = buffer;
595 } else {
596 RawLargeStr str = RawLargeStr::cast(that);
597 needle = dataArrayData(str);
598 needle_len = str.length();
599 }
600 DCHECK(needle != nullptr, "needle cannot be null");
601 DCHECK(needle_len >= 0, "needle length must be non-negative");
602 word haystack_len = length();
603 const byte* haystack = dataArrayData(*this);
604 if (haystack_len < needle_len) {
605 return 0;
606 }
607 word result = 0;
608 for (word pos = Utils::memoryFind(haystack, haystack_len, needle, needle_len);
609 pos != -1 && pos + needle_len <= haystack_len; result++) {
610 pos += needle_len;
611 haystack += pos;
612 haystack_len -= pos;
613 pos = Utils::memoryFind(haystack, haystack_len, needle, needle_len);
614 }
615 return result;
616}
617
618// RawList
619
620void RawList::replaceFromWith(word start, RawList src, word count) const {
621 DCHECK_BOUND(start + count, numItems());
622 MutableTuple::cast(items()).replaceFromWith(start, Tuple::cast(src.items()),
623 count);
624}
625
626void RawList::replaceFromWithStartAt(word start, RawList src, word count,
627 word src_start) const {
628 DCHECK_BOUND(start + count, numItems());
629 DCHECK_BOUND(src_start + count, src.numItems());
630 MutableTuple::cast(items()).replaceFromWithStartAt(
631 start, Tuple::cast(src.items()), count, src_start);
632}
633
634// RawInt
635
636word RawInt::compare(RawInt that) const {
637 if (this->isSmallInt() && that.isSmallInt()) {
638 return this->asWord() - that.asWord();
639 }
640 // compare with large ints always returns -1, 0, or 1
641 bool is_negative = this->isNegative();
642 if (is_negative != that.isNegative()) {
643 return is_negative ? -1 : 1;
644 }
645
646 word left_digits = this->numDigits();
647 word right_digits = that.numDigits();
648
649 if (left_digits > right_digits) {
650 return is_negative ? -1 : 1;
651 }
652 if (left_digits < right_digits) {
653 return is_negative ? 1 : -1;
654 }
655 for (word i = left_digits - 1; i >= 0; i--) {
656 uword left_digit = this->digitAt(i);
657 uword right_digit = that.digitAt(i);
658 if (left_digit > right_digit) {
659 return 1;
660 }
661 if (left_digit < right_digit) {
662 return -1;
663 }
664 }
665 return 0;
666}
667
668word RawInt::copyTo(byte* dst, word max_length) const {
669 if (isLargeInt()) {
670 return RawLargeInt::cast(*this).copyTo(dst, max_length);
671 }
672
673 DCHECK(isSmallInt() || isBool(), "not an integer");
674 uword val = isSmallInt() ? RawSmallInt::cast(*this).value()
675 : RawBool::cast(*this).value();
676 word memcpy_length = std::min(word{kWordSize}, max_length);
677 std::memcpy(dst, &val, memcpy_length);
678 return memcpy_length;
679}
680
681// RawLargeInt
682
683bool RawLargeInt::isValid() const {
684 word digits = numDigits();
685 if (digits <= 0) {
686 return false;
687 }
688 if (digits == 1) {
689 // Enforce a canonical representation for all ints.
690 return !RawSmallInt::isValid(digitAt(0));
691 }
692
693 word high_digit = digitAt(digits - 1);
694 word next_digit = digitAt(digits - 2);
695
696 // Redundant sign-extension for negative values.
697 if (high_digit == -1 && next_digit < 0) {
698 return false;
699 }
700
701 // Redundant zero-extension for positive values.
702 if (high_digit == 0 && next_digit >= 0) {
703 return false;
704 }
705
706 return true;
707}
708
709word RawLargeInt::bitLength() const {
710 word num_digits = numDigits();
711 word high_digit = digitAt(num_digits - 1);
712
713 if (high_digit < 0) {
714 // We're negative. Calculate what high_digit would be after negation.
715 uword carry = [&] {
716 for (word i = 0; i < num_digits - 1; i++) {
717 if (digitAt(i) != 0) return 0;
718 }
719 return 1;
720 }();
721 high_digit = ~high_digit + carry;
722 }
723 return (num_digits - 1) * kBitsPerWord + Utils::highestBit(high_digit);
724}
725
726word RawLargeInt::copyTo(byte* dst, word copy_length) const {
727 word length = numDigits() * kWordSize;
728 auto digits = reinterpret_cast<uword*>(address() + kValueOffset);
729 word memcpy_size = std::min(length, copy_length);
730 std::memcpy(dst, digits, memcpy_size);
731 return memcpy_size;
732}
733
734void RawLargeInt::copyFrom(RawBytes bytes, byte sign_extension) const {
735 auto dst = reinterpret_cast<byte*>(address() + kValueOffset);
736 word bytes_len = bytes.length();
737 DCHECK(bytes_len <= numDigits() * kWordSize, "too many bytes");
738 bytes.copyTo(dst, bytes_len);
739 std::memset(dst + bytes_len, sign_extension,
740 (numDigits() * kWordSize) - bytes_len);
741}
742
743// RawMutableBytes
744
745word RawMutableBytes::indexOfAny(View<byte> needle, word start) const {
746 DCHECK(needle.length() >= 0, "needle length must be non-negative");
747 if (needle.length() == 0) {
748 return length();
749 }
750 uword bitmap[(kMaxByte + 1) / kBitsPerWord] = {0};
751 for (word i = 0; i < needle.length(); i++) {
752 byte ch = needle.get(i);
753 bitmap[ch / kBitsPerWord] |= uword{1} << (ch % kBitsPerWord);
754 }
755 word result;
756 for (result = start; result < length(); result++) {
757 byte ch = byteAt(result);
758 if (bitmap[ch / kBitsPerWord] & (uword{1} << (ch % kBitsPerWord))) {
759 break;
760 }
761 }
762 return result;
763}
764
765void RawMutableBytes::replaceFromWith(word dst_start, RawDataArray src,
766 word count) const {
767 DCHECK_BOUND(dst_start + count, length());
768 byte* dst = reinterpret_cast<byte*>(address());
769 src.copyTo(dst + dst_start, count);
770}
771
772void RawMutableBytes::replaceFromWithStartAt(word dst_start, RawDataArray src,
773 word count, word src_start) const {
774 DCHECK_BOUND(dst_start + count, length());
775 DCHECK_BOUND(src_start + count, src.length());
776 src.copyToStartAt(reinterpret_cast<byte*>(address() + dst_start), count,
777 src_start);
778}
779
780void RawMutableBytes::replaceFromWithBytes(word dst_start, RawBytes src,
781 word count) const {
782 DCHECK_BOUND(dst_start + count, length());
783 byte* dst = reinterpret_cast<byte*>(address());
784 src.copyTo(dst + dst_start, count);
785}
786
787void RawMutableBytes::replaceFromWithByte(word dst_start, byte value,
788 word count) const {
789 DCHECK_BOUND(dst_start + count, length());
790 byte* dst = reinterpret_cast<byte*>(address());
791 std::memset(dst + dst_start, value, count);
792}
793
794void RawMutableBytes::replaceFromWithBytesStartAt(word dst_start, RawBytes src,
795 word count,
796 word src_start) const {
797 DCHECK_BOUND(dst_start + count, length());
798 DCHECK_BOUND(src_start + count, src.length());
799 src.copyToStartAt(reinterpret_cast<byte*>(address() + dst_start), count,
800 src_start);
801}
802
803void RawMutableBytes::replaceFromWithAll(word dst_start, View<byte> src) const {
804 DCHECK_BOUND(dst_start + src.length(), length());
805 byte* dst = reinterpret_cast<byte*>(address());
806 std::memcpy(dst + dst_start, src.data(), src.length());
807}
808
809void RawMutableBytes::replaceFromWithByteslike(word dst_start,
810 const Byteslike& byteslike,
811 word count) const {
812 DCHECK_BOUND(count, byteslike.length());
813 DCHECK_BOUND(dst_start, length() - count);
814 byte* dst = reinterpret_cast<byte*>(address() + dst_start);
815 const byte* src = reinterpret_cast<const byte*>(byteslike.address());
816 std::memcpy(dst, src, count);
817}
818
819void RawMutableBytes::replaceFromWithByteslikeStartAt(
820 word dst_start, const Byteslike& byteslike, word count,
821 word src_start) const {
822 DCHECK(count >= 0, "count must not be negative");
823 DCHECK_BOUND(src_start, byteslike.length() - count);
824 DCHECK_BOUND(dst_start, length() - count);
825 byte* dst = reinterpret_cast<byte*>(address() + dst_start);
826 const byte* src =
827 reinterpret_cast<const byte*>(byteslike.address() + src_start);
828 std::memcpy(dst, src, count);
829}
830
831void RawMutableBytes::replaceFromWithStr(word index, RawStr src,
832 word char_length) const {
833 DCHECK_BOUND(index + char_length, length());
834 byte* dst = reinterpret_cast<byte*>(address());
835 src.copyTo(dst + index, char_length);
836}
837
838void RawMutableBytes::replaceFromWithStrStartAt(word dst_start, RawStr src,
839 word char_length,
840 word src_start_char) const {
841 DCHECK_BOUND(dst_start + char_length, length());
842 byte* dst = reinterpret_cast<byte*>(address());
843 src.copyToStartAt(dst + dst_start, char_length, src_start_char);
844}
845
846RawObject RawMutableBytes::becomeImmutable() const {
847 word len = length();
848 if (len <= SmallBytes::kMaxLength) {
849 return SmallBytes::fromBytes({dataArrayData(*this), len});
850 }
851 setHeader(header().withLayoutId(LayoutId::kLargeBytes));
852 return *this;
853}
854
855RawObject RawMutableBytes::becomeStr() const {
856 DCHECK(bytesIsValidStr(RawBytes::cast(*this)), "must contain valid utf-8");
857 word len = length();
858 if (len <= SmallStr::kMaxLength) {
859 return SmallStr::fromBytes({dataArrayData(*this), len});
860 }
861 setHeader(header().withLayoutId(LayoutId::kLargeStr));
862 return *this;
863}
864
865// RawMutableTuple
866
867void RawMutableTuple::fill(RawObject value) const {
868 word len = length();
869 if (value.isNoneType()) {
870 std::memset(reinterpret_cast<byte*>(address()), -1, len * kWordSize);
871 return;
872 }
873 for (word i = 0; i < len; i++) {
874 atPut(i, value);
875 }
876}
877
878void RawMutableTuple::replaceFromWith(word dst_start, RawTuple src,
879 word count) const {
880 replaceFromWithStartAt(dst_start, src, count, 0);
881}
882
883void RawMutableTuple::replaceFromWithStartAt(word dst_start, RawTuple src,
884 word count, word src_start) const {
885 // TODO(emacs): This may need to be reverted when we have read/write
886 // barriers.
887 std::memmove(reinterpret_cast<uword*>(address()) + dst_start,
888 reinterpret_cast<uword*>(src.address()) + src_start,
889 count * kPointerSize);
890}
891
892// RawTuple
893
894bool RawTuple::contains(RawObject object) const {
895 word len = length();
896 for (word i = 0; i < len; i++) {
897 if (at(i) == object) {
898 return true;
899 }
900 }
901 return false;
902}
903
904// RawSlice
905
906word RawSlice::length(word start, word stop, word step) {
907 if (step < 0) {
908 if (stop < start) {
909 return (start - stop - 1) / (-step) + 1;
910 }
911 } else if (start < stop) {
912 return (stop - start - 1) / step + 1;
913 }
914 return 0;
915}
916
917word RawSlice::adjustIndices(word length, word* start, word* stop, word step) {
918 DCHECK(step != 0, "Step should be non zero");
919
920 if (*start < 0) {
921 *start += length;
922 if (*start < 0) {
923 *start = (step < 0) ? -1 : 0;
924 }
925 } else if (*start >= length) {
926 *start = (step < 0) ? length - 1 : length;
927 }
928
929 if (*stop < 0) {
930 *stop += length;
931 if (*stop < 0) {
932 *stop = (step < 0) ? -1 : 0;
933 }
934 } else if (*stop >= length) {
935 *stop = (step < 0) ? length - 1 : length;
936 }
937
938 return RawSlice::length(*start, *stop, step);
939}
940
941void RawSlice::adjustSearchIndices(word* start, word* end, word length) {
942 if (*start < 0) {
943 *start = Utils::maximum(*start + length, word{0});
944 }
945 if (*end < 0) {
946 *end = Utils::maximum(*end + length, word{0});
947 } else if (*end > length) {
948 *end = length;
949 }
950}
951
952// RawStr
953
954word RawStr::compareCStr(const char* c_str) const {
955 word c_length = std::strlen(c_str);
956 word length = Utils::minimum(this->length(), c_length);
957 for (word i = 0; i < length; i++) {
958 word diff = byteAt(i) - static_cast<byte>(c_str[i]);
959 if (diff != 0) {
960 return (diff > 0) ? 1 : -1;
961 }
962 }
963 word diff = this->length() - c_length;
964 return (diff > 0) ? 1 : ((diff < 0) ? -1 : 0);
965}
966
967// RawStrArray
968
969int32_t RawStrArray::codePointAt(word index, word* char_length) const {
970 RawMutableBytes buffer = RawMutableBytes::cast(items());
971 return decodeCodePoint(dataArrayData(buffer), numItems(), index, char_length);
972}
973
974word RawStrArray::offsetByCodePoints(word index, word count) const {
975 RawMutableBytes buffer = RawMutableBytes::cast(items());
976 const byte* data = dataArrayData(buffer);
977 return offset(data, numItems(), index, count);
978}
979
980void RawStrArray::rotateCodePoint(word first, word last) const {
981 DCHECK_BOUND(first, last);
982 DCHECK_INDEX(last, numItems());
983 if (first == last) {
984 return;
985 }
986
987 byte code_point[UTF8::kMaxLength];
988 byte* buffer =
989 reinterpret_cast<byte*>(RawMutableBytes::cast(items()).address());
990 word char_length = UTF8::numChars(buffer[last]);
991 std::memcpy(code_point, &buffer[last], char_length);
992 std::memmove(&buffer[first + char_length], &buffer[first], last - first);
993 std::memcpy(&buffer[first], code_point, char_length);
994}
995
996// Linked list helpers
997
998static void enqueueReference(RawObject reference, RawObject* tail,
999 word reference_link_offset,
1000 word tail_link_offset) {
1001 DCHECK(Instance::cast(reference)
1002 .instanceVariableAt(reference_link_offset)
1003 .isNoneType(),
1004 "Attempting to enqueue object that's already in queue");
1005 if (*tail == RawNoneType::object()) {
1006 Instance::cast(reference).instanceVariableAtPut(reference_link_offset,
1007 reference);
1008 } else {
1009 RawObject head = Instance::cast(*tail).instanceVariableAt(tail_link_offset);
1010 Instance::cast(*tail).instanceVariableAtPut(tail_link_offset, reference);
1011 Instance::cast(reference).instanceVariableAtPut(reference_link_offset,
1012 head);
1013 }
1014 *tail = reference;
1015}
1016
1017static void dequeueReference(RawObject head, RawObject* tail,
1018 word head_link_offset, word tail_link_offset) {
1019 if (head == *tail) {
1020 *tail = RawNoneType::object();
1021 } else {
1022 RawObject next = Instance::cast(head).instanceVariableAt(head_link_offset);
1023 Instance::cast(*tail).instanceVariableAtPut(tail_link_offset, next);
1024 }
1025 Instance::cast(head).instanceVariableAtPut(head_link_offset,
1026 RawNoneType::object());
1027}
1028
1029// RawWeakRef
1030
1031void RawWeakRef::enqueue(RawObject reference, RawObject* tail) {
1032 enqueueReference(reference, tail, RawWeakRef::kLinkOffset,
1033 RawWeakRef::kLinkOffset);
1034}
1035
1036RawObject RawWeakRef::dequeue(RawObject* tail) {
1037 RawObject head =
1038 Instance::cast(*tail).instanceVariableAt(RawWeakRef::kLinkOffset);
1039 dequeueReference(head, tail, RawWeakRef::kLinkOffset,
1040 RawWeakRef::kLinkOffset);
1041 return head;
1042}
1043
1044// Append tail2 to tail1 and return the new tail.
1045RawObject RawWeakRef::spliceQueue(RawObject tail1, RawObject tail2) {
1046 if (tail1 == RawNoneType::object() && tail2 == RawNoneType::object()) {
1047 return RawNoneType::object();
1048 }
1049 if (tail1 == RawNoneType::object()) {
1050 return tail2;
1051 }
1052 if (tail2 == RawNoneType::object()) {
1053 return tail1;
1054 }
1055 // merge two list, tail1 -> head2 -> ... -> tail2 -> head1
1056 RawObject head1 = RawWeakRef::cast(tail1).link();
1057 RawObject head2 = RawWeakRef::cast(tail2).link();
1058 RawWeakRef::cast(tail1).setLink(head2);
1059 RawWeakRef::cast(tail2).setLink(head1);
1060 return tail2;
1061}
1062
1063// RawNativeProxy
1064
1065static word nativeProxyAttributeOffset(const RawObject* object,
1066 int offset_from_end) {
1067 DCHECK(offset_from_end < 0, "negative offset is expected");
1068 return RawHeapObject::cast(*object).headerCountOrOverflow() * kPointerSize +
1069 offset_from_end;
1070}
1071
1072RawObject RawNativeProxy::native() const {
1073 return instanceVariableAt(
1074 nativeProxyAttributeOffset(this, kNativeOffsetFromEnd));
1075}
1076
1077void RawNativeProxy::setNative(RawObject native_ptr) const {
1078 instanceVariableAtPut(nativeProxyAttributeOffset(this, kNativeOffsetFromEnd),
1079 native_ptr);
1080}
1081
1082RawObject RawNativeProxy::dict() const {
1083 return instanceVariableAt(
1084 nativeProxyAttributeOffset(this, kDictOffsetFromEnd));
1085}
1086
1087void RawNativeProxy::setDict(RawObject dict) const {
1088 instanceVariableAtPut(nativeProxyAttributeOffset(this, kDictOffsetFromEnd),
1089 dict);
1090}
1091
1092RawObject RawNativeProxy::link() const {
1093 return instanceVariableAt(
1094 nativeProxyAttributeOffset(this, kLinkOffsetFromEnd));
1095}
1096
1097void RawNativeProxy::setLink(RawObject reference) const {
1098 instanceVariableAtPut(nativeProxyAttributeOffset(this, kLinkOffsetFromEnd),
1099 reference);
1100}
1101
1102void RawNativeProxy::enqueue(RawObject reference, RawObject* tail) {
1103 DCHECK(Thread::current()->runtime()->isInstanceOfNativeProxy(reference),
1104 "reference is expected to be native proxy");
1105 DCHECK(*tail == RawNoneType::object() ||
1106 Thread::current()->runtime()->isInstanceOfNativeProxy(*tail),
1107 "tail is expected to be none of native proxy");
1108 int reference_link_offset =
1109 nativeProxyAttributeOffset(&reference, kLinkOffsetFromEnd);
1110 int tail_link_offset = -1;
1111 if (*tail != RawNoneType::object()) {
1112 tail_link_offset = nativeProxyAttributeOffset(tail, kLinkOffsetFromEnd);
1113 }
1114 enqueueReference(reference, tail, reference_link_offset, tail_link_offset);
1115}
1116
1117RawObject RawNativeProxy::dequeue(RawObject* tail) {
1118 DCHECK(Thread::current()->runtime()->isInstanceOfNativeProxy(*tail),
1119 "expected native proxy");
1120 DCHECK(*tail != RawNoneType::object(), "empty queue");
1121 int tail_link_offset = nativeProxyAttributeOffset(tail, kLinkOffsetFromEnd);
1122 RawObject head = Instance::cast(*tail).instanceVariableAt(tail_link_offset);
1123 int head_link_offset = nativeProxyAttributeOffset(&head, kLinkOffsetFromEnd);
1124 dequeueReference(head, tail, head_link_offset, tail_link_offset);
1125 return head;
1126}
1127
1128// RawGeneratorFrame
1129
1130word RawGeneratorFrame::virtualPC() const { return frame()->virtualPC(); }
1131
1132void RawGeneratorFrame::setVirtualPC(word value) const {
1133 return frame()->setVirtualPC(value);
1134}
1135
1136RawObject* RawGeneratorFrame::valueStackTop() const {
1137 return frame()->stashedValueStackTop();
1138}
1139
1140RawObject RawGeneratorFrame::popValue() const {
1141 return frame()->stashedPopValue();
1142}
1143
1144} // namespace py