this repo has no description
at trunk 1144 lines 36 kB view raw
1// Copyright (c) Facebook, Inc. and its affiliates. (http://www.facebook.com) 2#include "objects.h" 3 4#include <cstring> 5 6#include "bytes-builtins.h" 7#include "byteslike.h" 8#include "frame.h" 9#include "runtime.h" 10#include "thread.h" 11#include "unicode.h" 12#include "view.h" 13 14namespace py { 15 16static inline word offset(const byte* data, word len, word index, word count) { 17 if (count >= 0) { 18 while (count-- && index < len) { 19 index += UTF8::numChars(data[index]); 20 } 21 return Utils::minimum(index, len); 22 } 23 while (count < 0) { 24 index--; 25 if (index < 0) return -1; 26 if (UTF8::isLeadByte(data[index])) count++; 27 } 28 return index; 29} 30 31static inline int32_t decodeCodePoint(const byte* data, word src_length, 32 word index, word* char_length) { 33 DCHECK_INDEX(index, src_length); 34 byte b0 = data[index]; 35 if (b0 <= kMaxASCII) { 36 *char_length = 1; 37 return b0; 38 } 39 DCHECK_INDEX(index + 1, src_length); 40 byte b1 = data[index + 1] & byte{0x3F}; 41 // 0b110xxxxx begins a sequence with one continuation byte. 42 if (b0 < 0xE0) { 43 DCHECK(b0 >= 0xC, "unexpected continuation byte"); 44 *char_length = 2; 45 return ((b0 & 0x1F) << 6) | b1; 46 } 47 DCHECK_INDEX(index + 2, src_length); 48 byte b2 = data[index + 2] & byte{0x3F}; 49 // 0b1110xxxx starts a sequence with two continuation bytes. 50 if (b0 < 0xF0) { 51 *char_length = 3; 52 return ((b0 & 0xF) << 12) | (b1 << 6) | b2; 53 } 54 // 0b11110xxx starts a sequence with three continuation bytes. 55 DCHECK((b0 & 0xF8) == 0xF0, "invalid code unit"); 56 DCHECK_INDEX(index + 2, src_length); 57 byte b3 = data[index + 3] & byte{0x3F}; 58 *char_length = 4; 59 return ((b0 & 0x7) << 18) | (b1 << 12) | (b2 << 6) | b3; 60} 61 62// RawSmallData 63 64int32_t RawSmallData::codePointAt(word index, word* char_length) const { 65 const byte* data = smallDataData(this); 66 return decodeCodePoint(data, length(), index, char_length); 67} 68 69word RawSmallData::codePointLength() const { 70 uword block = raw() >> kBitsPerByte; 71 uword mask_0 = ~uword{0} / 0xFF; // 0x010101... 72 uword mask_7 = mask_0 << 7; // 0x808080... 73 block = ((block & mask_7) >> 7) & ((~block) >> 6); 74 // TODO(cshapiro): evaluate using popcount instead of multiplication 75 word num_trailing = (block * mask_0) >> ((kWordSize - 1) * kBitsPerByte); 76 return length() - num_trailing; 77} 78 79word RawSmallData::findByte(byte value, word start, word length) const { 80 DCHECK_BOUND(start, this->length()); 81 DCHECK_BOUND(start + length, this->length()); 82 for (word i = 0; i < length; i++) { 83 if (byteAt(start + i) == value) return start + i; 84 } 85 return -1; 86} 87 88static uword hasZeroByte(uword value) { 89 uword mask_0 = ~uword{0} / 0xFF; // 0x010101... 90 uword mask_7 = mask_0 << 7; // 0x808080... 91 return (value - mask_0) & ~value & mask_7; 92} 93 94bool RawSmallData::includesByte(byte b) const { 95 DCHECK(b != 0, "Due to padding bytes, cannot check for 0."); 96 uword block = raw() >> kBitsPerByte; 97 uword surrogate_mask = (~uword{0} / 0xFF) * b; 98 return hasZeroByte(block ^ surrogate_mask); 99} 100 101bool RawSmallData::isASCII() const { 102 uword block = raw() >> kBitsPerByte; 103 uword non_ascii_mask = (~uword{0} / 0xFF) << (kBitsPerByte - 1); 104 return (block & non_ascii_mask) == 0; 105} 106 107word RawSmallData::offsetByCodePoints(word index, word count) const { 108 const byte* data = smallDataData(this); 109 return offset(data, length(), index, count); 110} 111 112char* RawSmallData::toCStr() const { 113 word length = this->length(); 114 byte* result = static_cast<byte*>(std::malloc(length + 1)); 115 CHECK(result != nullptr, "out of memory"); 116 copyTo(result, length); 117 result[length] = '\0'; 118 return reinterpret_cast<char*>(result); 119} 120 121// RawSmallBytes 122 123RawObject RawSmallBytes::becomeStr() const { 124 return RawObject{raw() ^ kSmallBytesTag ^ kSmallStrTag}; 125} 126 127RawSmallBytes RawSmallBytes::fromBytes(View<byte> data) { 128 word length = data.length(); 129 DCHECK_BOUND(length, kMaxLength); 130 uword result = 0; 131 for (word i = length - 1; i >= 0; i--) { 132 result = (result << kBitsPerByte) | data.get(i); 133 } 134 return RawSmallBytes(result << kBitsPerByte | length << kImmediateTagBits | 135 kSmallBytesTag); 136} 137 138// RawSmallStr 139 140RawObject RawSmallStr::becomeBytes() const { 141 return RawObject{raw() ^ kSmallStrTag ^ kSmallBytesTag}; 142} 143 144word RawSmallStr::compare(RawObject that) const { 145 word length = this->length(); 146 for (word i = 0; i < length; i++) { 147 word result = byteAt(i) - RawLargeStr::cast(that).byteAt(i); 148 if (result != 0) return result; 149 } 150 return -1; 151} 152 153word RawSmallStr::equalsCStr(const char* c_str) const { 154 const char* cp = c_str; 155 const word len = length(); 156 for (word i = 0; i < len; i++, cp++) { 157 byte ch = static_cast<byte>(*cp); 158 if (ch == '\0' || ch != byteAt(i)) { 159 return false; 160 } 161 } 162 return *cp == '\0'; 163} 164 165RawSmallStr RawSmallStr::fromBytes(View<byte> data) { 166 word length = data.length(); 167 DCHECK_BOUND(length, kMaxLength); 168 uword result = 0; 169 for (word i = length - 1; i >= 0; i--) { 170 result = (result << kBitsPerByte) | data.get(i); 171 } 172 return RawSmallStr(result << kBitsPerByte | length << kImmediateTagBits | 173 kSmallStrTag); 174} 175 176RawSmallStr RawSmallStr::fromCodePoint(int32_t code_point) { 177 DCHECK_BOUND(code_point, kMaxUnicode); 178 uword cp = static_cast<uword>(code_point); 179 // 0xxxxxxx 180 if (cp <= kMaxASCII) { // 01111111 181 return RawSmallStr(cp << 8 | (1 << kImmediateTagBits) | kSmallStrTag); 182 } 183 uword result = cp & 0x3F; // 00111111 184 cp >>= 6; 185 result <<= kBitsPerByte; 186 // 110xxxxx 10xxxxxx 187 if (cp <= 0x1F) { // 00011111 188 result |= cp; 189 result |= 0x80C0; // 10xxxxxx 110xxxxx 190 result <<= kBitsPerByte; 191 return RawSmallStr(result | (2 << kImmediateTagBits) | kSmallStrTag); 192 } 193 194 result |= cp & 0x3F; // 00111111 195 cp >>= 6; 196 result <<= kBitsPerByte; 197 // 1110xxxx 10xxxxxx 10xxxxxx 198 if (cp <= 0xF) { // 00001111 199 result |= cp; 200 result |= 0x8080E0; // 10xxxxxx 10xxxxxx 1110xxxx 201 result <<= kBitsPerByte; 202 return RawSmallStr(result | (3 << kImmediateTagBits) | kSmallStrTag); 203 } 204 result |= cp & 0x3F; // 00111111 205 cp >>= 6; 206 result <<= kBitsPerByte; 207 // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx 208 result |= cp; 209 result |= 0x808080F0; // 10xxxxxx 10xxxxxx 10xxxxxx 11110xxx 210 result <<= kBitsPerByte; 211 return RawSmallStr(result | (4 << kImmediateTagBits) | kSmallStrTag); 212} 213 214RawSmallStr RawSmallStr::fromCStr(const char* value) { 215 word len = std::strlen(value); 216 return fromBytes(View<byte>(reinterpret_cast<const byte*>(value), len)); 217} 218 219bool RawSmallStr::includesByte(byte b) const { 220 DCHECK(b != 0, "Due to padding bytes, cannot check for 0."); 221 uword block = raw() >> kBitsPerByte; 222 uword surrogate_mask = (~uword{0} / 0xFF) * b; 223 return hasZeroByte(block ^ surrogate_mask); 224} 225 226bool RawSmallStr::includes(RawObject that) const { 227 if (!that.isSmallStr()) { 228 return false; 229 } 230 word haystack_len = length(); 231 word needle_len = SmallStr::cast(that).length(); 232 word diff = haystack_len - needle_len; 233 if (diff < 0) { 234 return false; 235 } 236 uword haystack = raw() >> kBitsPerByte; 237 uword needle = that.raw() >> kBitsPerByte; 238 uword mask = ~0UL >> ((kWordSize - needle_len) * kBitsPerByte); 239 for (word i = 0; i <= diff; i++) { 240 if ((haystack & mask) == needle) { 241 return true; 242 } 243 haystack >>= kBitsPerByte; 244 } 245 return false; 246} 247 248word RawSmallStr::occurrencesOf(RawObject that) const { 249 DCHECK(that.isStr(), "must be searching for a Str object"); 250 if (!that.isSmallStr()) { 251 return 0; 252 } 253 word haystack_len = length(); 254 word needle_len = SmallStr::cast(that).length(); 255 DCHECK(needle_len >= 0, "needle length must be non-negative"); 256 if (needle_len == 0) { 257 return 0; 258 } 259 word diff = haystack_len - needle_len; 260 uword needle = that.raw() >> kBitsPerByte; 261 uword haystack = raw() >> kBitsPerByte; 262 uword mask = (1 << (needle_len * kBitsPerByte)) - 1; 263 word result = 0; 264 for (word i = 0; i <= diff; i++) { 265 if ((haystack & mask) == needle) { 266 result++; 267 i += needle_len - 1; 268 haystack >>= (kBitsPerByte * (needle_len - 1)); 269 } 270 haystack >>= kBitsPerByte; 271 } 272 return result; 273} 274 275// RawSmallInt 276 277const word RawSmallInt::kMinValue; 278const word RawSmallInt::kMaxValue; 279 280// RawBytearray 281 282word RawBytearray::compare(RawBytes that, word that_len) { 283 DCHECK_BOUND(that_len, that.length()); 284 word this_len = this->numItems(); 285 word len = Utils::minimum(this_len, that_len); 286 for (word i = 0; i < len; i++) { 287 word diff = this->byteAt(i) - that.byteAt(i); 288 if (diff != 0) return diff; 289 } 290 return this_len - that_len; 291} 292 293void RawBytearray::downsize(word new_length) const { 294 word original_length = numItems(); 295 DCHECK_BOUND(new_length, original_length); 296 if (original_length == 0) return; 297 byte* dst = reinterpret_cast<byte*>(RawMutableBytes::cast(items()).address()); 298 std::memset(dst + new_length, 0, original_length - new_length); 299 setNumItems(new_length); 300} 301 302void RawBytearray::replaceFromWith(word dst_start, RawBytearray src, 303 word count) const { 304 DCHECK_BOUND(dst_start + count, numItems()); 305 MutableBytes::cast(items()).replaceFromWithBytes( 306 dst_start, Bytes::cast(src.items()), count); 307} 308 309void RawBytearray::replaceFromWithStartAt(word dst_start, RawBytearray src, 310 word count, word src_start) const { 311 DCHECK_BOUND(dst_start + count, numItems()); 312 DCHECK_BOUND(src_start + count, src.numItems()); 313 MutableBytes::cast(items()).replaceFromWithBytesStartAt( 314 dst_start, Bytes::cast(src.items()), count, src_start); 315} 316 317// RawBytes 318 319word RawBytes::compare(RawBytes that) const { 320 word this_len = this->length(); 321 word that_len = that.length(); 322 word len = Utils::minimum(this_len, that_len); 323 for (word i = 0; i < len; i++) { 324 word diff = this->byteAt(i) - that.byteAt(i); 325 if (diff != 0) return diff; 326 } 327 return this_len - that_len; 328} 329 330// RawCode 331 332word RawCode::offsetToLineNum(word offset) const { 333 offset /= kCodeUnitScale; 334 // See https://github.com/python/cpython/blob/master/Objects/lnotab_notes.txt 335 // for details about the line number table format 336 RawBytes table = Bytes::cast(lnotab()); 337 word line = firstlineno(); 338 word cur_offset = 0; 339 for (word i = 0; i < table.length(); i += 2) { 340 cur_offset += table.byteAt(i); 341 if (cur_offset > offset) { 342 break; 343 } 344 line += static_cast<int8_t>(table.byteAt(i + 1)); 345 } 346 return line; 347} 348 349// RawDataArray 350 351static inline const byte* dataArrayData(RawDataArray obj) { 352 return reinterpret_cast<const byte*>(obj.address()); 353} 354 355int32_t RawDataArray::codePointAt(word index, word* char_length) const { 356 const byte* data = dataArrayData(*this); 357 return decodeCodePoint(data, length(), index, char_length); 358} 359 360word RawDataArray::codePointLength() const { 361 // This is a vectorized loop for processing code units in groups the size of a 362 // machine word. The garbage collector ensures the following invariants that 363 // simplify the algorithm, eliminating the need for a scalar pre-loop or a 364 // scalar-post loop: 365 // 366 // 1) The base address of instance data is always word aligned 367 // 2) The allocation sizes are always rounded-up to the next word 368 // 3) Unused bytes at the end of an allocation are always zero 369 // 370 // This algorithm works by counting the number of UTF-8 trailing bytes found 371 // in the string from the total number of byte in the string. Because the 372 // unused bytes at the end of a string are zero they are conveniently ignored 373 // by the counting. 374 word length = this->length(); 375 word size_in_words = (length + kWordSize - 1) >> kWordSizeLog2; 376 word result = length; 377 const uword* data = reinterpret_cast<const uword*>(dataArrayData(*this)); 378 uword mask_0 = ~uword{0} / 0xFF; // 0x010101... 379 uword mask_7 = mask_0 << 7; // 0x808080... 380 for (word i = 0; i < size_in_words; i++) { 381 // Read an entire word of code units. 382 uword block = data[i]; 383 // The bit pattern 0b10xxxxxx identifies a UTF-8 trailing byte. For each 384 // byte in a word, we isolate bit 6 and 7 and logically and the complement 385 // of bit 6 with bit 7. That leaves one set bit for each trailing byte in a 386 // word. 387 block = ((block & mask_7) >> 7) & ((~block) >> 6); 388 // Count the number of bits leftover in the word. That is equal to the 389 // number of trailing bytes. 390 // TODO(cshapiro): evaluate using popcount instead of multiplication 391 word num_trailing = (block * mask_0) >> ((kWordSize - 1) * kBitsPerByte); 392 // Finally, subtract the number of trailing bytes from the number of bytes 393 // in the string leaving just the number of ASCII code points and UTF-8 394 // leading bytes in the count. 395 result -= num_trailing; 396 } 397 return result; 398} 399 400word RawDataArray::compare(RawDataArray that) const { 401 word this_length = length(); 402 word that_length = that.length(); 403 word length = Utils::minimum(this_length, that_length); 404 const void* s1 = reinterpret_cast<const void*>(address()); 405 const void* s2 = reinterpret_cast<const void*>(that.address()); 406 word result = std::memcmp(s1, s2, length); 407 return result != 0 ? result : this_length - that_length; 408} 409 410bool RawDataArray::equals(RawDataArray that) const { 411 word len = this->length(); 412 if (len != that.length()) { 413 return false; 414 } 415 const void* s1 = reinterpret_cast<const void*>(address()); 416 const void* s2 = reinterpret_cast<const void*>(that.address()); 417 return std::memcmp(s1, s2, len) == 0; 418} 419 420bool RawDataArray::equalsBytes(View<byte> bytes) const { 421 word length = this->length(); 422 if (bytes.length() != length) { 423 return false; 424 } 425 return std::memcmp(dataArrayData(*this), bytes.data(), length) == 0; 426} 427 428bool RawDataArray::equalsCStr(const char* c_str) const { 429 const char* cp = c_str; 430 const word len = length(); 431 for (word i = 0; i < len; i++, cp++) { 432 byte ch = static_cast<byte>(*cp); 433 if (ch == '\0' || ch != byteAt(i)) { 434 return false; 435 } 436 } 437 return *cp == '\0'; 438} 439 440word RawDataArray::findByte(byte value, word start, word length) const { 441 DCHECK_BOUND(start, this->length()); 442 DCHECK_BOUND(start + length, this->length()); 443 word result = 444 Utils::memoryFindChar(dataArrayData(*this) + start, length, value); 445 if (result != -1) result += start; 446 return result; 447} 448 449bool RawDataArray::includesByte(byte b) const { 450 DCHECK(b != 0, "Due to padding bytes, cannot check for 0."); 451 word length = this->length(); 452 word size_in_words = (length + kWordSize - 1) >> kWordSizeLog2; 453 const uword* data = reinterpret_cast<const uword*>(dataArrayData(*this)); 454 uword mask = (~uword{0} / 0xFF) * b; 455 for (word i = 0; i < size_in_words; i++) { 456 uword block = data[i]; 457 if (hasZeroByte(block ^ mask)) { 458 return true; 459 } 460 } 461 return false; 462} 463 464bool RawDataArray::isASCII() const { 465 // Depends on invariants specified in RawLargeStr::codePointLength 466 word length = this->length(); 467 word size_in_words = (length + kWordSize - 1) >> kWordSizeLog2; 468 const uword* data = reinterpret_cast<const uword*>(dataArrayData(*this)); 469 uword non_ascii_mask = (~uword{0} / 0xFF) << (kBitsPerByte - 1); 470 for (word i = 0; i < size_in_words; i++) { 471 // Read an entire word of code units. 472 uword block = data[i]; 473 if ((block & non_ascii_mask) != 0) return false; 474 } 475 return true; 476} 477 478word RawDataArray::offsetByCodePoints(word index, word count) const { 479 const byte* data = dataArrayData(*this); 480 return offset(data, length(), index, count); 481} 482 483char* RawDataArray::toCStr() const { 484 word length = this->length(); 485 byte* result = static_cast<byte*>(std::malloc(length + 1)); 486 CHECK(result != nullptr, "out of memory"); 487 copyTo(result, length); 488 result[length] = '\0'; 489 return reinterpret_cast<char*>(result); 490} 491 492// RawLargeBytes 493 494RawObject RawLargeBytes::becomeStr() const { 495 DCHECK(bytesIsValidStr(RawBytes::cast(*this)), "must contain valid utf-8"); 496 setHeader(header().withLayoutId(LayoutId::kLargeStr)); 497 return *this; 498} 499 500// RawLargeStr 501 502static bool includes1(const byte* haystack, word haystack_len, byte needle) { 503 const uword* p = reinterpret_cast<const uword*>(haystack); 504 uword mask = ((~0UL / 255) * needle); 505 while (haystack_len >= kWordSize) { 506 if (hasZeroByte(*p ^ mask)) { 507 return true; 508 } 509 haystack_len -= kWordSize; 510 p++; 511 } 512 if (haystack_len != 0) { 513 word shift = (kWordSize - haystack_len) * kBitsPerByte; 514 return (hasZeroByte(*p ^ mask) << shift) != 0; 515 } 516 return false; 517} 518 519bool RawLargeStr::includes(RawObject that) const { 520 enum { kMask = kBitsPerWord - 1 }; 521 522 if (that == Str::empty()) { 523 return true; 524 } 525 526 word haystack_len = length(); 527 word needle_len = Str::cast(that).length(); 528 529 word diff = haystack_len - needle_len; 530 if (diff < 0) { 531 return false; 532 } 533 534 const byte* haystack = dataArrayData(*this); 535 536 if (needle_len == 1) { 537 byte ch = SmallStr::cast(that).byteAt(0); 538 return includes1(haystack, haystack_len, ch); 539 } 540 541 byte buffer[SmallStr::kMaxLength]; 542 const byte* needle; 543 if (that.isSmallStr()) { 544 SmallStr::cast(that).copyTo(buffer, needle_len); 545 needle = buffer; 546 } else { 547 needle = dataArrayData(LargeStr::cast(that)); 548 } 549 550 word needle_last = needle_len - 1; 551 word skip = needle_last - 1; 552 uword delta1 = 0; 553 for (word i = 0; i < needle_last; i++) { 554 delta1 |= (1 << (needle[i] & kMask)); 555 if (needle[i] == needle[needle_last]) { 556 skip = needle_last - i - 1; 557 } 558 } 559 delta1 |= (1 << (needle[needle_last] & kMask)); 560 for (word i = 0; i <= diff; i++) { 561 if (haystack[i + needle_last] == needle[needle_last]) { 562 word j = 0; 563 for (; j < needle_last; j++) { 564 if (haystack[i + j] != needle[j]) { 565 break; 566 } 567 } 568 if (j == needle_last) { 569 return true; 570 } 571 if ((delta1 & (1 << (haystack[i + needle_len] & kMask))) == 0) { 572 i += needle_len; 573 } else { 574 i += skip; 575 } 576 } else { 577 if ((delta1 & (1 << (haystack[i + needle_len] & kMask))) == 0) { 578 i += needle_len; 579 } 580 } 581 } 582 return false; 583} 584 585word RawLargeStr::occurrencesOf(RawObject that) const { 586 DCHECK(that.isStr(), "must be searching for a Str object"); 587 const byte* needle; 588 word needle_len; 589 byte buffer[SmallStr::kMaxLength]; 590 if (that.isSmallStr()) { 591 RawSmallStr str = RawSmallStr::cast(that); 592 needle_len = str.length(); 593 str.copyTo(buffer, needle_len); 594 needle = buffer; 595 } else { 596 RawLargeStr str = RawLargeStr::cast(that); 597 needle = dataArrayData(str); 598 needle_len = str.length(); 599 } 600 DCHECK(needle != nullptr, "needle cannot be null"); 601 DCHECK(needle_len >= 0, "needle length must be non-negative"); 602 word haystack_len = length(); 603 const byte* haystack = dataArrayData(*this); 604 if (haystack_len < needle_len) { 605 return 0; 606 } 607 word result = 0; 608 for (word pos = Utils::memoryFind(haystack, haystack_len, needle, needle_len); 609 pos != -1 && pos + needle_len <= haystack_len; result++) { 610 pos += needle_len; 611 haystack += pos; 612 haystack_len -= pos; 613 pos = Utils::memoryFind(haystack, haystack_len, needle, needle_len); 614 } 615 return result; 616} 617 618// RawList 619 620void RawList::replaceFromWith(word start, RawList src, word count) const { 621 DCHECK_BOUND(start + count, numItems()); 622 MutableTuple::cast(items()).replaceFromWith(start, Tuple::cast(src.items()), 623 count); 624} 625 626void RawList::replaceFromWithStartAt(word start, RawList src, word count, 627 word src_start) const { 628 DCHECK_BOUND(start + count, numItems()); 629 DCHECK_BOUND(src_start + count, src.numItems()); 630 MutableTuple::cast(items()).replaceFromWithStartAt( 631 start, Tuple::cast(src.items()), count, src_start); 632} 633 634// RawInt 635 636word RawInt::compare(RawInt that) const { 637 if (this->isSmallInt() && that.isSmallInt()) { 638 return this->asWord() - that.asWord(); 639 } 640 // compare with large ints always returns -1, 0, or 1 641 bool is_negative = this->isNegative(); 642 if (is_negative != that.isNegative()) { 643 return is_negative ? -1 : 1; 644 } 645 646 word left_digits = this->numDigits(); 647 word right_digits = that.numDigits(); 648 649 if (left_digits > right_digits) { 650 return is_negative ? -1 : 1; 651 } 652 if (left_digits < right_digits) { 653 return is_negative ? 1 : -1; 654 } 655 for (word i = left_digits - 1; i >= 0; i--) { 656 uword left_digit = this->digitAt(i); 657 uword right_digit = that.digitAt(i); 658 if (left_digit > right_digit) { 659 return 1; 660 } 661 if (left_digit < right_digit) { 662 return -1; 663 } 664 } 665 return 0; 666} 667 668word RawInt::copyTo(byte* dst, word max_length) const { 669 if (isLargeInt()) { 670 return RawLargeInt::cast(*this).copyTo(dst, max_length); 671 } 672 673 DCHECK(isSmallInt() || isBool(), "not an integer"); 674 uword val = isSmallInt() ? RawSmallInt::cast(*this).value() 675 : RawBool::cast(*this).value(); 676 word memcpy_length = std::min(word{kWordSize}, max_length); 677 std::memcpy(dst, &val, memcpy_length); 678 return memcpy_length; 679} 680 681// RawLargeInt 682 683bool RawLargeInt::isValid() const { 684 word digits = numDigits(); 685 if (digits <= 0) { 686 return false; 687 } 688 if (digits == 1) { 689 // Enforce a canonical representation for all ints. 690 return !RawSmallInt::isValid(digitAt(0)); 691 } 692 693 word high_digit = digitAt(digits - 1); 694 word next_digit = digitAt(digits - 2); 695 696 // Redundant sign-extension for negative values. 697 if (high_digit == -1 && next_digit < 0) { 698 return false; 699 } 700 701 // Redundant zero-extension for positive values. 702 if (high_digit == 0 && next_digit >= 0) { 703 return false; 704 } 705 706 return true; 707} 708 709word RawLargeInt::bitLength() const { 710 word num_digits = numDigits(); 711 word high_digit = digitAt(num_digits - 1); 712 713 if (high_digit < 0) { 714 // We're negative. Calculate what high_digit would be after negation. 715 uword carry = [&] { 716 for (word i = 0; i < num_digits - 1; i++) { 717 if (digitAt(i) != 0) return 0; 718 } 719 return 1; 720 }(); 721 high_digit = ~high_digit + carry; 722 } 723 return (num_digits - 1) * kBitsPerWord + Utils::highestBit(high_digit); 724} 725 726word RawLargeInt::copyTo(byte* dst, word copy_length) const { 727 word length = numDigits() * kWordSize; 728 auto digits = reinterpret_cast<uword*>(address() + kValueOffset); 729 word memcpy_size = std::min(length, copy_length); 730 std::memcpy(dst, digits, memcpy_size); 731 return memcpy_size; 732} 733 734void RawLargeInt::copyFrom(RawBytes bytes, byte sign_extension) const { 735 auto dst = reinterpret_cast<byte*>(address() + kValueOffset); 736 word bytes_len = bytes.length(); 737 DCHECK(bytes_len <= numDigits() * kWordSize, "too many bytes"); 738 bytes.copyTo(dst, bytes_len); 739 std::memset(dst + bytes_len, sign_extension, 740 (numDigits() * kWordSize) - bytes_len); 741} 742 743// RawMutableBytes 744 745word RawMutableBytes::indexOfAny(View<byte> needle, word start) const { 746 DCHECK(needle.length() >= 0, "needle length must be non-negative"); 747 if (needle.length() == 0) { 748 return length(); 749 } 750 uword bitmap[(kMaxByte + 1) / kBitsPerWord] = {0}; 751 for (word i = 0; i < needle.length(); i++) { 752 byte ch = needle.get(i); 753 bitmap[ch / kBitsPerWord] |= uword{1} << (ch % kBitsPerWord); 754 } 755 word result; 756 for (result = start; result < length(); result++) { 757 byte ch = byteAt(result); 758 if (bitmap[ch / kBitsPerWord] & (uword{1} << (ch % kBitsPerWord))) { 759 break; 760 } 761 } 762 return result; 763} 764 765void RawMutableBytes::replaceFromWith(word dst_start, RawDataArray src, 766 word count) const { 767 DCHECK_BOUND(dst_start + count, length()); 768 byte* dst = reinterpret_cast<byte*>(address()); 769 src.copyTo(dst + dst_start, count); 770} 771 772void RawMutableBytes::replaceFromWithStartAt(word dst_start, RawDataArray src, 773 word count, word src_start) const { 774 DCHECK_BOUND(dst_start + count, length()); 775 DCHECK_BOUND(src_start + count, src.length()); 776 src.copyToStartAt(reinterpret_cast<byte*>(address() + dst_start), count, 777 src_start); 778} 779 780void RawMutableBytes::replaceFromWithBytes(word dst_start, RawBytes src, 781 word count) const { 782 DCHECK_BOUND(dst_start + count, length()); 783 byte* dst = reinterpret_cast<byte*>(address()); 784 src.copyTo(dst + dst_start, count); 785} 786 787void RawMutableBytes::replaceFromWithByte(word dst_start, byte value, 788 word count) const { 789 DCHECK_BOUND(dst_start + count, length()); 790 byte* dst = reinterpret_cast<byte*>(address()); 791 std::memset(dst + dst_start, value, count); 792} 793 794void RawMutableBytes::replaceFromWithBytesStartAt(word dst_start, RawBytes src, 795 word count, 796 word src_start) const { 797 DCHECK_BOUND(dst_start + count, length()); 798 DCHECK_BOUND(src_start + count, src.length()); 799 src.copyToStartAt(reinterpret_cast<byte*>(address() + dst_start), count, 800 src_start); 801} 802 803void RawMutableBytes::replaceFromWithAll(word dst_start, View<byte> src) const { 804 DCHECK_BOUND(dst_start + src.length(), length()); 805 byte* dst = reinterpret_cast<byte*>(address()); 806 std::memcpy(dst + dst_start, src.data(), src.length()); 807} 808 809void RawMutableBytes::replaceFromWithByteslike(word dst_start, 810 const Byteslike& byteslike, 811 word count) const { 812 DCHECK_BOUND(count, byteslike.length()); 813 DCHECK_BOUND(dst_start, length() - count); 814 byte* dst = reinterpret_cast<byte*>(address() + dst_start); 815 const byte* src = reinterpret_cast<const byte*>(byteslike.address()); 816 std::memcpy(dst, src, count); 817} 818 819void RawMutableBytes::replaceFromWithByteslikeStartAt( 820 word dst_start, const Byteslike& byteslike, word count, 821 word src_start) const { 822 DCHECK(count >= 0, "count must not be negative"); 823 DCHECK_BOUND(src_start, byteslike.length() - count); 824 DCHECK_BOUND(dst_start, length() - count); 825 byte* dst = reinterpret_cast<byte*>(address() + dst_start); 826 const byte* src = 827 reinterpret_cast<const byte*>(byteslike.address() + src_start); 828 std::memcpy(dst, src, count); 829} 830 831void RawMutableBytes::replaceFromWithStr(word index, RawStr src, 832 word char_length) const { 833 DCHECK_BOUND(index + char_length, length()); 834 byte* dst = reinterpret_cast<byte*>(address()); 835 src.copyTo(dst + index, char_length); 836} 837 838void RawMutableBytes::replaceFromWithStrStartAt(word dst_start, RawStr src, 839 word char_length, 840 word src_start_char) const { 841 DCHECK_BOUND(dst_start + char_length, length()); 842 byte* dst = reinterpret_cast<byte*>(address()); 843 src.copyToStartAt(dst + dst_start, char_length, src_start_char); 844} 845 846RawObject RawMutableBytes::becomeImmutable() const { 847 word len = length(); 848 if (len <= SmallBytes::kMaxLength) { 849 return SmallBytes::fromBytes({dataArrayData(*this), len}); 850 } 851 setHeader(header().withLayoutId(LayoutId::kLargeBytes)); 852 return *this; 853} 854 855RawObject RawMutableBytes::becomeStr() const { 856 DCHECK(bytesIsValidStr(RawBytes::cast(*this)), "must contain valid utf-8"); 857 word len = length(); 858 if (len <= SmallStr::kMaxLength) { 859 return SmallStr::fromBytes({dataArrayData(*this), len}); 860 } 861 setHeader(header().withLayoutId(LayoutId::kLargeStr)); 862 return *this; 863} 864 865// RawMutableTuple 866 867void RawMutableTuple::fill(RawObject value) const { 868 word len = length(); 869 if (value.isNoneType()) { 870 std::memset(reinterpret_cast<byte*>(address()), -1, len * kWordSize); 871 return; 872 } 873 for (word i = 0; i < len; i++) { 874 atPut(i, value); 875 } 876} 877 878void RawMutableTuple::replaceFromWith(word dst_start, RawTuple src, 879 word count) const { 880 replaceFromWithStartAt(dst_start, src, count, 0); 881} 882 883void RawMutableTuple::replaceFromWithStartAt(word dst_start, RawTuple src, 884 word count, word src_start) const { 885 // TODO(emacs): This may need to be reverted when we have read/write 886 // barriers. 887 std::memmove(reinterpret_cast<uword*>(address()) + dst_start, 888 reinterpret_cast<uword*>(src.address()) + src_start, 889 count * kPointerSize); 890} 891 892// RawTuple 893 894bool RawTuple::contains(RawObject object) const { 895 word len = length(); 896 for (word i = 0; i < len; i++) { 897 if (at(i) == object) { 898 return true; 899 } 900 } 901 return false; 902} 903 904// RawSlice 905 906word RawSlice::length(word start, word stop, word step) { 907 if (step < 0) { 908 if (stop < start) { 909 return (start - stop - 1) / (-step) + 1; 910 } 911 } else if (start < stop) { 912 return (stop - start - 1) / step + 1; 913 } 914 return 0; 915} 916 917word RawSlice::adjustIndices(word length, word* start, word* stop, word step) { 918 DCHECK(step != 0, "Step should be non zero"); 919 920 if (*start < 0) { 921 *start += length; 922 if (*start < 0) { 923 *start = (step < 0) ? -1 : 0; 924 } 925 } else if (*start >= length) { 926 *start = (step < 0) ? length - 1 : length; 927 } 928 929 if (*stop < 0) { 930 *stop += length; 931 if (*stop < 0) { 932 *stop = (step < 0) ? -1 : 0; 933 } 934 } else if (*stop >= length) { 935 *stop = (step < 0) ? length - 1 : length; 936 } 937 938 return RawSlice::length(*start, *stop, step); 939} 940 941void RawSlice::adjustSearchIndices(word* start, word* end, word length) { 942 if (*start < 0) { 943 *start = Utils::maximum(*start + length, word{0}); 944 } 945 if (*end < 0) { 946 *end = Utils::maximum(*end + length, word{0}); 947 } else if (*end > length) { 948 *end = length; 949 } 950} 951 952// RawStr 953 954word RawStr::compareCStr(const char* c_str) const { 955 word c_length = std::strlen(c_str); 956 word length = Utils::minimum(this->length(), c_length); 957 for (word i = 0; i < length; i++) { 958 word diff = byteAt(i) - static_cast<byte>(c_str[i]); 959 if (diff != 0) { 960 return (diff > 0) ? 1 : -1; 961 } 962 } 963 word diff = this->length() - c_length; 964 return (diff > 0) ? 1 : ((diff < 0) ? -1 : 0); 965} 966 967// RawStrArray 968 969int32_t RawStrArray::codePointAt(word index, word* char_length) const { 970 RawMutableBytes buffer = RawMutableBytes::cast(items()); 971 return decodeCodePoint(dataArrayData(buffer), numItems(), index, char_length); 972} 973 974word RawStrArray::offsetByCodePoints(word index, word count) const { 975 RawMutableBytes buffer = RawMutableBytes::cast(items()); 976 const byte* data = dataArrayData(buffer); 977 return offset(data, numItems(), index, count); 978} 979 980void RawStrArray::rotateCodePoint(word first, word last) const { 981 DCHECK_BOUND(first, last); 982 DCHECK_INDEX(last, numItems()); 983 if (first == last) { 984 return; 985 } 986 987 byte code_point[UTF8::kMaxLength]; 988 byte* buffer = 989 reinterpret_cast<byte*>(RawMutableBytes::cast(items()).address()); 990 word char_length = UTF8::numChars(buffer[last]); 991 std::memcpy(code_point, &buffer[last], char_length); 992 std::memmove(&buffer[first + char_length], &buffer[first], last - first); 993 std::memcpy(&buffer[first], code_point, char_length); 994} 995 996// Linked list helpers 997 998static void enqueueReference(RawObject reference, RawObject* tail, 999 word reference_link_offset, 1000 word tail_link_offset) { 1001 DCHECK(Instance::cast(reference) 1002 .instanceVariableAt(reference_link_offset) 1003 .isNoneType(), 1004 "Attempting to enqueue object that's already in queue"); 1005 if (*tail == RawNoneType::object()) { 1006 Instance::cast(reference).instanceVariableAtPut(reference_link_offset, 1007 reference); 1008 } else { 1009 RawObject head = Instance::cast(*tail).instanceVariableAt(tail_link_offset); 1010 Instance::cast(*tail).instanceVariableAtPut(tail_link_offset, reference); 1011 Instance::cast(reference).instanceVariableAtPut(reference_link_offset, 1012 head); 1013 } 1014 *tail = reference; 1015} 1016 1017static void dequeueReference(RawObject head, RawObject* tail, 1018 word head_link_offset, word tail_link_offset) { 1019 if (head == *tail) { 1020 *tail = RawNoneType::object(); 1021 } else { 1022 RawObject next = Instance::cast(head).instanceVariableAt(head_link_offset); 1023 Instance::cast(*tail).instanceVariableAtPut(tail_link_offset, next); 1024 } 1025 Instance::cast(head).instanceVariableAtPut(head_link_offset, 1026 RawNoneType::object()); 1027} 1028 1029// RawWeakRef 1030 1031void RawWeakRef::enqueue(RawObject reference, RawObject* tail) { 1032 enqueueReference(reference, tail, RawWeakRef::kLinkOffset, 1033 RawWeakRef::kLinkOffset); 1034} 1035 1036RawObject RawWeakRef::dequeue(RawObject* tail) { 1037 RawObject head = 1038 Instance::cast(*tail).instanceVariableAt(RawWeakRef::kLinkOffset); 1039 dequeueReference(head, tail, RawWeakRef::kLinkOffset, 1040 RawWeakRef::kLinkOffset); 1041 return head; 1042} 1043 1044// Append tail2 to tail1 and return the new tail. 1045RawObject RawWeakRef::spliceQueue(RawObject tail1, RawObject tail2) { 1046 if (tail1 == RawNoneType::object() && tail2 == RawNoneType::object()) { 1047 return RawNoneType::object(); 1048 } 1049 if (tail1 == RawNoneType::object()) { 1050 return tail2; 1051 } 1052 if (tail2 == RawNoneType::object()) { 1053 return tail1; 1054 } 1055 // merge two list, tail1 -> head2 -> ... -> tail2 -> head1 1056 RawObject head1 = RawWeakRef::cast(tail1).link(); 1057 RawObject head2 = RawWeakRef::cast(tail2).link(); 1058 RawWeakRef::cast(tail1).setLink(head2); 1059 RawWeakRef::cast(tail2).setLink(head1); 1060 return tail2; 1061} 1062 1063// RawNativeProxy 1064 1065static word nativeProxyAttributeOffset(const RawObject* object, 1066 int offset_from_end) { 1067 DCHECK(offset_from_end < 0, "negative offset is expected"); 1068 return RawHeapObject::cast(*object).headerCountOrOverflow() * kPointerSize + 1069 offset_from_end; 1070} 1071 1072RawObject RawNativeProxy::native() const { 1073 return instanceVariableAt( 1074 nativeProxyAttributeOffset(this, kNativeOffsetFromEnd)); 1075} 1076 1077void RawNativeProxy::setNative(RawObject native_ptr) const { 1078 instanceVariableAtPut(nativeProxyAttributeOffset(this, kNativeOffsetFromEnd), 1079 native_ptr); 1080} 1081 1082RawObject RawNativeProxy::dict() const { 1083 return instanceVariableAt( 1084 nativeProxyAttributeOffset(this, kDictOffsetFromEnd)); 1085} 1086 1087void RawNativeProxy::setDict(RawObject dict) const { 1088 instanceVariableAtPut(nativeProxyAttributeOffset(this, kDictOffsetFromEnd), 1089 dict); 1090} 1091 1092RawObject RawNativeProxy::link() const { 1093 return instanceVariableAt( 1094 nativeProxyAttributeOffset(this, kLinkOffsetFromEnd)); 1095} 1096 1097void RawNativeProxy::setLink(RawObject reference) const { 1098 instanceVariableAtPut(nativeProxyAttributeOffset(this, kLinkOffsetFromEnd), 1099 reference); 1100} 1101 1102void RawNativeProxy::enqueue(RawObject reference, RawObject* tail) { 1103 DCHECK(Thread::current()->runtime()->isInstanceOfNativeProxy(reference), 1104 "reference is expected to be native proxy"); 1105 DCHECK(*tail == RawNoneType::object() || 1106 Thread::current()->runtime()->isInstanceOfNativeProxy(*tail), 1107 "tail is expected to be none of native proxy"); 1108 int reference_link_offset = 1109 nativeProxyAttributeOffset(&reference, kLinkOffsetFromEnd); 1110 int tail_link_offset = -1; 1111 if (*tail != RawNoneType::object()) { 1112 tail_link_offset = nativeProxyAttributeOffset(tail, kLinkOffsetFromEnd); 1113 } 1114 enqueueReference(reference, tail, reference_link_offset, tail_link_offset); 1115} 1116 1117RawObject RawNativeProxy::dequeue(RawObject* tail) { 1118 DCHECK(Thread::current()->runtime()->isInstanceOfNativeProxy(*tail), 1119 "expected native proxy"); 1120 DCHECK(*tail != RawNoneType::object(), "empty queue"); 1121 int tail_link_offset = nativeProxyAttributeOffset(tail, kLinkOffsetFromEnd); 1122 RawObject head = Instance::cast(*tail).instanceVariableAt(tail_link_offset); 1123 int head_link_offset = nativeProxyAttributeOffset(&head, kLinkOffsetFromEnd); 1124 dequeueReference(head, tail, head_link_offset, tail_link_offset); 1125 return head; 1126} 1127 1128// RawGeneratorFrame 1129 1130word RawGeneratorFrame::virtualPC() const { return frame()->virtualPC(); } 1131 1132void RawGeneratorFrame::setVirtualPC(word value) const { 1133 return frame()->setVirtualPC(value); 1134} 1135 1136RawObject* RawGeneratorFrame::valueStackTop() const { 1137 return frame()->stashedValueStackTop(); 1138} 1139 1140RawObject RawGeneratorFrame::popValue() const { 1141 return frame()->stashedPopValue(); 1142} 1143 1144} // namespace py