runtime/objects.cpp at trunk · bernsteinbear.com/skybison

bernsteinbear.com / skybison
fork atom
this repo has no description
fork atom
skybison / runtime / objects.cpp
at trunk 1144 lines 36 kB view raw
wrap content
bernsteinbear.com Use std::memmove in MutableTuple::replaceFromWithStartAt 2y ago
42d68454
   1// Copyright (c) Facebook, Inc. and its affiliates. (http://www.facebook.com)
   2#include "objects.h"
   3
   4#include <cstring>
   5
   6#include "bytes-builtins.h"
   7#include "byteslike.h"
   8#include "frame.h"
   9#include "runtime.h"
  10#include "thread.h"
  11#include "unicode.h"
  12#include "view.h"
  13
  14namespace py {
  15
  16static inline word offset(const byte* data, word len, word index, word count) {
  17  if (count >= 0) {
  18    while (count-- && index < len) {
  19      index += UTF8::numChars(data[index]);
  20    }
  21    return Utils::minimum(index, len);
  22  }
  23  while (count < 0) {
  24    index--;
  25    if (index < 0) return -1;
  26    if (UTF8::isLeadByte(data[index])) count++;
  27  }
  28  return index;
  29}
  30
  31static inline int32_t decodeCodePoint(const byte* data, word src_length,
  32                                      word index, word* char_length) {
  33  DCHECK_INDEX(index, src_length);
  34  byte b0 = data[index];
  35  if (b0 <= kMaxASCII) {
  36    *char_length = 1;
  37    return b0;
  38  }
  39  DCHECK_INDEX(index + 1, src_length);
  40  byte b1 = data[index + 1] & byte{0x3F};
  41  // 0b110xxxxx begins a sequence with one continuation byte.
  42  if (b0 < 0xE0) {
  43    DCHECK(b0 >= 0xC, "unexpected continuation byte");
  44    *char_length = 2;
  45    return ((b0 & 0x1F) << 6) | b1;
  46  }
  47  DCHECK_INDEX(index + 2, src_length);
  48  byte b2 = data[index + 2] & byte{0x3F};
  49  // 0b1110xxxx starts a sequence with two continuation bytes.
  50  if (b0 < 0xF0) {
  51    *char_length = 3;
  52    return ((b0 & 0xF) << 12) | (b1 << 6) | b2;
  53  }
  54  // 0b11110xxx starts a sequence with three continuation bytes.
  55  DCHECK((b0 & 0xF8) == 0xF0, "invalid code unit");
  56  DCHECK_INDEX(index + 2, src_length);
  57  byte b3 = data[index + 3] & byte{0x3F};
  58  *char_length = 4;
  59  return ((b0 & 0x7) << 18) | (b1 << 12) | (b2 << 6) | b3;
  60}
  61
  62// RawSmallData
  63
  64int32_t RawSmallData::codePointAt(word index, word* char_length) const {
  65  const byte* data = smallDataData(this);
  66  return decodeCodePoint(data, length(), index, char_length);
  67}
  68
  69word RawSmallData::codePointLength() const {
  70  uword block = raw() >> kBitsPerByte;
  71  uword mask_0 = ~uword{0} / 0xFF;  // 0x010101...
  72  uword mask_7 = mask_0 << 7;       // 0x808080...
  73  block = ((block & mask_7) >> 7) & ((~block) >> 6);
  74  // TODO(cshapiro): evaluate using popcount instead of multiplication
  75  word num_trailing = (block * mask_0) >> ((kWordSize - 1) * kBitsPerByte);
  76  return length() - num_trailing;
  77}
  78
  79word RawSmallData::findByte(byte value, word start, word length) const {
  80  DCHECK_BOUND(start, this->length());
  81  DCHECK_BOUND(start + length, this->length());
  82  for (word i = 0; i < length; i++) {
  83    if (byteAt(start + i) == value) return start + i;
  84  }
  85  return -1;
  86}
  87
  88static uword hasZeroByte(uword value) {
  89  uword mask_0 = ~uword{0} / 0xFF;  // 0x010101...
  90  uword mask_7 = mask_0 << 7;       // 0x808080...
  91  return (value - mask_0) & ~value & mask_7;
  92}
  93
  94bool RawSmallData::includesByte(byte b) const {
  95  DCHECK(b != 0, "Due to padding bytes, cannot check for 0.");
  96  uword block = raw() >> kBitsPerByte;
  97  uword surrogate_mask = (~uword{0} / 0xFF) * b;
  98  return hasZeroByte(block ^ surrogate_mask);
  99}
 100
 101bool RawSmallData::isASCII() const {
 102  uword block = raw() >> kBitsPerByte;
 103  uword non_ascii_mask = (~uword{0} / 0xFF) << (kBitsPerByte - 1);
 104  return (block & non_ascii_mask) == 0;
 105}
 106
 107word RawSmallData::offsetByCodePoints(word index, word count) const {
 108  const byte* data = smallDataData(this);
 109  return offset(data, length(), index, count);
 110}
 111
 112char* RawSmallData::toCStr() const {
 113  word length = this->length();
 114  byte* result = static_cast<byte*>(std::malloc(length + 1));
 115  CHECK(result != nullptr, "out of memory");
 116  copyTo(result, length);
 117  result[length] = '\0';
 118  return reinterpret_cast<char*>(result);
 119}
 120
 121// RawSmallBytes
 122
 123RawObject RawSmallBytes::becomeStr() const {
 124  return RawObject{raw() ^ kSmallBytesTag ^ kSmallStrTag};
 125}
 126
 127RawSmallBytes RawSmallBytes::fromBytes(View<byte> data) {
 128  word length = data.length();
 129  DCHECK_BOUND(length, kMaxLength);
 130  uword result = 0;
 131  for (word i = length - 1; i >= 0; i--) {
 132    result = (result << kBitsPerByte) | data.get(i);
 133  }
 134  return RawSmallBytes(result << kBitsPerByte | length << kImmediateTagBits |
 135                       kSmallBytesTag);
 136}
 137
 138// RawSmallStr
 139
 140RawObject RawSmallStr::becomeBytes() const {
 141  return RawObject{raw() ^ kSmallStrTag ^ kSmallBytesTag};
 142}
 143
 144word RawSmallStr::compare(RawObject that) const {
 145  word length = this->length();
 146  for (word i = 0; i < length; i++) {
 147    word result = byteAt(i) - RawLargeStr::cast(that).byteAt(i);
 148    if (result != 0) return result;
 149  }
 150  return -1;
 151}
 152
 153word RawSmallStr::equalsCStr(const char* c_str) const {
 154  const char* cp = c_str;
 155  const word len = length();
 156  for (word i = 0; i < len; i++, cp++) {
 157    byte ch = static_cast<byte>(*cp);
 158    if (ch == '\0' || ch != byteAt(i)) {
 159      return false;
 160    }
 161  }
 162  return *cp == '\0';
 163}
 164
 165RawSmallStr RawSmallStr::fromBytes(View<byte> data) {
 166  word length = data.length();
 167  DCHECK_BOUND(length, kMaxLength);
 168  uword result = 0;
 169  for (word i = length - 1; i >= 0; i--) {
 170    result = (result << kBitsPerByte) | data.get(i);
 171  }
 172  return RawSmallStr(result << kBitsPerByte | length << kImmediateTagBits |
 173                     kSmallStrTag);
 174}
 175
 176RawSmallStr RawSmallStr::fromCodePoint(int32_t code_point) {
 177  DCHECK_BOUND(code_point, kMaxUnicode);
 178  uword cp = static_cast<uword>(code_point);
 179  // 0xxxxxxx
 180  if (cp <= kMaxASCII) {  // 01111111
 181    return RawSmallStr(cp << 8 | (1 << kImmediateTagBits) | kSmallStrTag);
 182  }
 183  uword result = cp & 0x3F;  // 00111111
 184  cp >>= 6;
 185  result <<= kBitsPerByte;
 186  // 110xxxxx 10xxxxxx
 187  if (cp <= 0x1F) {  // 00011111
 188    result |= cp;
 189    result |= 0x80C0;  // 10xxxxxx 110xxxxx
 190    result <<= kBitsPerByte;
 191    return RawSmallStr(result | (2 << kImmediateTagBits) | kSmallStrTag);
 192  }
 193
 194  result |= cp & 0x3F;  // 00111111
 195  cp >>= 6;
 196  result <<= kBitsPerByte;
 197  // 1110xxxx 10xxxxxx 10xxxxxx
 198  if (cp <= 0xF) {  // 00001111
 199    result |= cp;
 200    result |= 0x8080E0;  // 10xxxxxx 10xxxxxx 1110xxxx
 201    result <<= kBitsPerByte;
 202    return RawSmallStr(result | (3 << kImmediateTagBits) | kSmallStrTag);
 203  }
 204  result |= cp & 0x3F;  // 00111111
 205  cp >>= 6;
 206  result <<= kBitsPerByte;
 207  // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
 208  result |= cp;
 209  result |= 0x808080F0;  // 10xxxxxx 10xxxxxx 10xxxxxx 11110xxx
 210  result <<= kBitsPerByte;
 211  return RawSmallStr(result | (4 << kImmediateTagBits) | kSmallStrTag);
 212}
 213
 214RawSmallStr RawSmallStr::fromCStr(const char* value) {
 215  word len = std::strlen(value);
 216  return fromBytes(View<byte>(reinterpret_cast<const byte*>(value), len));
 217}
 218
 219bool RawSmallStr::includesByte(byte b) const {
 220  DCHECK(b != 0, "Due to padding bytes, cannot check for 0.");
 221  uword block = raw() >> kBitsPerByte;
 222  uword surrogate_mask = (~uword{0} / 0xFF) * b;
 223  return hasZeroByte(block ^ surrogate_mask);
 224}
 225
 226bool RawSmallStr::includes(RawObject that) const {
 227  if (!that.isSmallStr()) {
 228    return false;
 229  }
 230  word haystack_len = length();
 231  word needle_len = SmallStr::cast(that).length();
 232  word diff = haystack_len - needle_len;
 233  if (diff < 0) {
 234    return false;
 235  }
 236  uword haystack = raw() >> kBitsPerByte;
 237  uword needle = that.raw() >> kBitsPerByte;
 238  uword mask = ~0UL >> ((kWordSize - needle_len) * kBitsPerByte);
 239  for (word i = 0; i <= diff; i++) {
 240    if ((haystack & mask) == needle) {
 241      return true;
 242    }
 243    haystack >>= kBitsPerByte;
 244  }
 245  return false;
 246}
 247
 248word RawSmallStr::occurrencesOf(RawObject that) const {
 249  DCHECK(that.isStr(), "must be searching for a Str object");
 250  if (!that.isSmallStr()) {
 251    return 0;
 252  }
 253  word haystack_len = length();
 254  word needle_len = SmallStr::cast(that).length();
 255  DCHECK(needle_len >= 0, "needle length must be non-negative");
 256  if (needle_len == 0) {
 257    return 0;
 258  }
 259  word diff = haystack_len - needle_len;
 260  uword needle = that.raw() >> kBitsPerByte;
 261  uword haystack = raw() >> kBitsPerByte;
 262  uword mask = (1 << (needle_len * kBitsPerByte)) - 1;
 263  word result = 0;
 264  for (word i = 0; i <= diff; i++) {
 265    if ((haystack & mask) == needle) {
 266      result++;
 267      i += needle_len - 1;
 268      haystack >>= (kBitsPerByte * (needle_len - 1));
 269    }
 270    haystack >>= kBitsPerByte;
 271  }
 272  return result;
 273}
 274
 275// RawSmallInt
 276
 277const word RawSmallInt::kMinValue;
 278const word RawSmallInt::kMaxValue;
 279
 280// RawBytearray
 281
 282word RawBytearray::compare(RawBytes that, word that_len) {
 283  DCHECK_BOUND(that_len, that.length());
 284  word this_len = this->numItems();
 285  word len = Utils::minimum(this_len, that_len);
 286  for (word i = 0; i < len; i++) {
 287    word diff = this->byteAt(i) - that.byteAt(i);
 288    if (diff != 0) return diff;
 289  }
 290  return this_len - that_len;
 291}
 292
 293void RawBytearray::downsize(word new_length) const {
 294  word original_length = numItems();
 295  DCHECK_BOUND(new_length, original_length);
 296  if (original_length == 0) return;
 297  byte* dst = reinterpret_cast<byte*>(RawMutableBytes::cast(items()).address());
 298  std::memset(dst + new_length, 0, original_length - new_length);
 299  setNumItems(new_length);
 300}
 301
 302void RawBytearray::replaceFromWith(word dst_start, RawBytearray src,
 303                                   word count) const {
 304  DCHECK_BOUND(dst_start + count, numItems());
 305  MutableBytes::cast(items()).replaceFromWithBytes(
 306      dst_start, Bytes::cast(src.items()), count);
 307}
 308
 309void RawBytearray::replaceFromWithStartAt(word dst_start, RawBytearray src,
 310                                          word count, word src_start) const {
 311  DCHECK_BOUND(dst_start + count, numItems());
 312  DCHECK_BOUND(src_start + count, src.numItems());
 313  MutableBytes::cast(items()).replaceFromWithBytesStartAt(
 314      dst_start, Bytes::cast(src.items()), count, src_start);
 315}
 316
 317// RawBytes
 318
 319word RawBytes::compare(RawBytes that) const {
 320  word this_len = this->length();
 321  word that_len = that.length();
 322  word len = Utils::minimum(this_len, that_len);
 323  for (word i = 0; i < len; i++) {
 324    word diff = this->byteAt(i) - that.byteAt(i);
 325    if (diff != 0) return diff;
 326  }
 327  return this_len - that_len;
 328}
 329
 330// RawCode
 331
 332word RawCode::offsetToLineNum(word offset) const {
 333  offset /= kCodeUnitScale;
 334  // See https://github.com/python/cpython/blob/master/Objects/lnotab_notes.txt
 335  // for details about the line number table format
 336  RawBytes table = Bytes::cast(lnotab());
 337  word line = firstlineno();
 338  word cur_offset = 0;
 339  for (word i = 0; i < table.length(); i += 2) {
 340    cur_offset += table.byteAt(i);
 341    if (cur_offset > offset) {
 342      break;
 343    }
 344    line += static_cast<int8_t>(table.byteAt(i + 1));
 345  }
 346  return line;
 347}
 348
 349// RawDataArray
 350
 351static inline const byte* dataArrayData(RawDataArray obj) {
 352  return reinterpret_cast<const byte*>(obj.address());
 353}
 354
 355int32_t RawDataArray::codePointAt(word index, word* char_length) const {
 356  const byte* data = dataArrayData(*this);
 357  return decodeCodePoint(data, length(), index, char_length);
 358}
 359
 360word RawDataArray::codePointLength() const {
 361  // This is a vectorized loop for processing code units in groups the size of a
 362  // machine word.  The garbage collector ensures the following invariants that
 363  // simplify the algorithm, eliminating the need for a scalar pre-loop or a
 364  // scalar-post loop:
 365  //
 366  //   1) The base address of instance data is always word aligned
 367  //   2) The allocation sizes are always rounded-up to the next word
 368  //   3) Unused bytes at the end of an allocation are always zero
 369  //
 370  // This algorithm works by counting the number of UTF-8 trailing bytes found
 371  // in the string from the total number of byte in the string.  Because the
 372  // unused bytes at the end of a string are zero they are conveniently ignored
 373  // by the counting.
 374  word length = this->length();
 375  word size_in_words = (length + kWordSize - 1) >> kWordSizeLog2;
 376  word result = length;
 377  const uword* data = reinterpret_cast<const uword*>(dataArrayData(*this));
 378  uword mask_0 = ~uword{0} / 0xFF;  // 0x010101...
 379  uword mask_7 = mask_0 << 7;       // 0x808080...
 380  for (word i = 0; i < size_in_words; i++) {
 381    // Read an entire word of code units.
 382    uword block = data[i];
 383    // The bit pattern 0b10xxxxxx identifies a UTF-8 trailing byte.  For each
 384    // byte in a word, we isolate bit 6 and 7 and logically and the complement
 385    // of bit 6 with bit 7.  That leaves one set bit for each trailing byte in a
 386    // word.
 387    block = ((block & mask_7) >> 7) & ((~block) >> 6);
 388    // Count the number of bits leftover in the word.  That is equal to the
 389    // number of trailing bytes.
 390    // TODO(cshapiro): evaluate using popcount instead of multiplication
 391    word num_trailing = (block * mask_0) >> ((kWordSize - 1) * kBitsPerByte);
 392    // Finally, subtract the number of trailing bytes from the number of bytes
 393    // in the string leaving just the number of ASCII code points and UTF-8
 394    // leading bytes in the count.
 395    result -= num_trailing;
 396  }
 397  return result;
 398}
 399
 400word RawDataArray::compare(RawDataArray that) const {
 401  word this_length = length();
 402  word that_length = that.length();
 403  word length = Utils::minimum(this_length, that_length);
 404  const void* s1 = reinterpret_cast<const void*>(address());
 405  const void* s2 = reinterpret_cast<const void*>(that.address());
 406  word result = std::memcmp(s1, s2, length);
 407  return result != 0 ? result : this_length - that_length;
 408}
 409
 410bool RawDataArray::equals(RawDataArray that) const {
 411  word len = this->length();
 412  if (len != that.length()) {
 413    return false;
 414  }
 415  const void* s1 = reinterpret_cast<const void*>(address());
 416  const void* s2 = reinterpret_cast<const void*>(that.address());
 417  return std::memcmp(s1, s2, len) == 0;
 418}
 419
 420bool RawDataArray::equalsBytes(View<byte> bytes) const {
 421  word length = this->length();
 422  if (bytes.length() != length) {
 423    return false;
 424  }
 425  return std::memcmp(dataArrayData(*this), bytes.data(), length) == 0;
 426}
 427
 428bool RawDataArray::equalsCStr(const char* c_str) const {
 429  const char* cp = c_str;
 430  const word len = length();
 431  for (word i = 0; i < len; i++, cp++) {
 432    byte ch = static_cast<byte>(*cp);
 433    if (ch == '\0' || ch != byteAt(i)) {
 434      return false;
 435    }
 436  }
 437  return *cp == '\0';
 438}
 439
 440word RawDataArray::findByte(byte value, word start, word length) const {
 441  DCHECK_BOUND(start, this->length());
 442  DCHECK_BOUND(start + length, this->length());
 443  word result =
 444      Utils::memoryFindChar(dataArrayData(*this) + start, length, value);
 445  if (result != -1) result += start;
 446  return result;
 447}
 448
 449bool RawDataArray::includesByte(byte b) const {
 450  DCHECK(b != 0, "Due to padding bytes, cannot check for 0.");
 451  word length = this->length();
 452  word size_in_words = (length + kWordSize - 1) >> kWordSizeLog2;
 453  const uword* data = reinterpret_cast<const uword*>(dataArrayData(*this));
 454  uword mask = (~uword{0} / 0xFF) * b;
 455  for (word i = 0; i < size_in_words; i++) {
 456    uword block = data[i];
 457    if (hasZeroByte(block ^ mask)) {
 458      return true;
 459    }
 460  }
 461  return false;
 462}
 463
 464bool RawDataArray::isASCII() const {
 465  // Depends on invariants specified in RawLargeStr::codePointLength
 466  word length = this->length();
 467  word size_in_words = (length + kWordSize - 1) >> kWordSizeLog2;
 468  const uword* data = reinterpret_cast<const uword*>(dataArrayData(*this));
 469  uword non_ascii_mask = (~uword{0} / 0xFF) << (kBitsPerByte - 1);
 470  for (word i = 0; i < size_in_words; i++) {
 471    // Read an entire word of code units.
 472    uword block = data[i];
 473    if ((block & non_ascii_mask) != 0) return false;
 474  }
 475  return true;
 476}
 477
 478word RawDataArray::offsetByCodePoints(word index, word count) const {
 479  const byte* data = dataArrayData(*this);
 480  return offset(data, length(), index, count);
 481}
 482
 483char* RawDataArray::toCStr() const {
 484  word length = this->length();
 485  byte* result = static_cast<byte*>(std::malloc(length + 1));
 486  CHECK(result != nullptr, "out of memory");
 487  copyTo(result, length);
 488  result[length] = '\0';
 489  return reinterpret_cast<char*>(result);
 490}
 491
 492// RawLargeBytes
 493
 494RawObject RawLargeBytes::becomeStr() const {
 495  DCHECK(bytesIsValidStr(RawBytes::cast(*this)), "must contain valid utf-8");
 496  setHeader(header().withLayoutId(LayoutId::kLargeStr));
 497  return *this;
 498}
 499
 500// RawLargeStr
 501
 502static bool includes1(const byte* haystack, word haystack_len, byte needle) {
 503  const uword* p = reinterpret_cast<const uword*>(haystack);
 504  uword mask = ((~0UL / 255) * needle);
 505  while (haystack_len >= kWordSize) {
 506    if (hasZeroByte(*p ^ mask)) {
 507      return true;
 508    }
 509    haystack_len -= kWordSize;
 510    p++;
 511  }
 512  if (haystack_len != 0) {
 513    word shift = (kWordSize - haystack_len) * kBitsPerByte;
 514    return (hasZeroByte(*p ^ mask) << shift) != 0;
 515  }
 516  return false;
 517}
 518
 519bool RawLargeStr::includes(RawObject that) const {
 520  enum { kMask = kBitsPerWord - 1 };
 521
 522  if (that == Str::empty()) {
 523    return true;
 524  }
 525
 526  word haystack_len = length();
 527  word needle_len = Str::cast(that).length();
 528
 529  word diff = haystack_len - needle_len;
 530  if (diff < 0) {
 531    return false;
 532  }
 533
 534  const byte* haystack = dataArrayData(*this);
 535
 536  if (needle_len == 1) {
 537    byte ch = SmallStr::cast(that).byteAt(0);
 538    return includes1(haystack, haystack_len, ch);
 539  }
 540
 541  byte buffer[SmallStr::kMaxLength];
 542  const byte* needle;
 543  if (that.isSmallStr()) {
 544    SmallStr::cast(that).copyTo(buffer, needle_len);
 545    needle = buffer;
 546  } else {
 547    needle = dataArrayData(LargeStr::cast(that));
 548  }
 549
 550  word needle_last = needle_len - 1;
 551  word skip = needle_last - 1;
 552  uword delta1 = 0;
 553  for (word i = 0; i < needle_last; i++) {
 554    delta1 |= (1 << (needle[i] & kMask));
 555    if (needle[i] == needle[needle_last]) {
 556      skip = needle_last - i - 1;
 557    }
 558  }
 559  delta1 |= (1 << (needle[needle_last] & kMask));
 560  for (word i = 0; i <= diff; i++) {
 561    if (haystack[i + needle_last] == needle[needle_last]) {
 562      word j = 0;
 563      for (; j < needle_last; j++) {
 564        if (haystack[i + j] != needle[j]) {
 565          break;
 566        }
 567      }
 568      if (j == needle_last) {
 569        return true;
 570      }
 571      if ((delta1 & (1 << (haystack[i + needle_len] & kMask))) == 0) {
 572        i += needle_len;
 573      } else {
 574        i += skip;
 575      }
 576    } else {
 577      if ((delta1 & (1 << (haystack[i + needle_len] & kMask))) == 0) {
 578        i += needle_len;
 579      }
 580    }
 581  }
 582  return false;
 583}
 584
 585word RawLargeStr::occurrencesOf(RawObject that) const {
 586  DCHECK(that.isStr(), "must be searching for a Str object");
 587  const byte* needle;
 588  word needle_len;
 589  byte buffer[SmallStr::kMaxLength];
 590  if (that.isSmallStr()) {
 591    RawSmallStr str = RawSmallStr::cast(that);
 592    needle_len = str.length();
 593    str.copyTo(buffer, needle_len);
 594    needle = buffer;
 595  } else {
 596    RawLargeStr str = RawLargeStr::cast(that);
 597    needle = dataArrayData(str);
 598    needle_len = str.length();
 599  }
 600  DCHECK(needle != nullptr, "needle cannot be null");
 601  DCHECK(needle_len >= 0, "needle length must be non-negative");
 602  word haystack_len = length();
 603  const byte* haystack = dataArrayData(*this);
 604  if (haystack_len < needle_len) {
 605    return 0;
 606  }
 607  word result = 0;
 608  for (word pos = Utils::memoryFind(haystack, haystack_len, needle, needle_len);
 609       pos != -1 && pos + needle_len <= haystack_len; result++) {
 610    pos += needle_len;
 611    haystack += pos;
 612    haystack_len -= pos;
 613    pos = Utils::memoryFind(haystack, haystack_len, needle, needle_len);
 614  }
 615  return result;
 616}
 617
 618// RawList
 619
 620void RawList::replaceFromWith(word start, RawList src, word count) const {
 621  DCHECK_BOUND(start + count, numItems());
 622  MutableTuple::cast(items()).replaceFromWith(start, Tuple::cast(src.items()),
 623                                              count);
 624}
 625
 626void RawList::replaceFromWithStartAt(word start, RawList src, word count,
 627                                     word src_start) const {
 628  DCHECK_BOUND(start + count, numItems());
 629  DCHECK_BOUND(src_start + count, src.numItems());
 630  MutableTuple::cast(items()).replaceFromWithStartAt(
 631      start, Tuple::cast(src.items()), count, src_start);
 632}
 633
 634// RawInt
 635
 636word RawInt::compare(RawInt that) const {
 637  if (this->isSmallInt() && that.isSmallInt()) {
 638    return this->asWord() - that.asWord();
 639  }
 640  // compare with large ints always returns -1, 0, or 1
 641  bool is_negative = this->isNegative();
 642  if (is_negative != that.isNegative()) {
 643    return is_negative ? -1 : 1;
 644  }
 645
 646  word left_digits = this->numDigits();
 647  word right_digits = that.numDigits();
 648
 649  if (left_digits > right_digits) {
 650    return is_negative ? -1 : 1;
 651  }
 652  if (left_digits < right_digits) {
 653    return is_negative ? 1 : -1;
 654  }
 655  for (word i = left_digits - 1; i >= 0; i--) {
 656    uword left_digit = this->digitAt(i);
 657    uword right_digit = that.digitAt(i);
 658    if (left_digit > right_digit) {
 659      return 1;
 660    }
 661    if (left_digit < right_digit) {
 662      return -1;
 663    }
 664  }
 665  return 0;
 666}
 667
 668word RawInt::copyTo(byte* dst, word max_length) const {
 669  if (isLargeInt()) {
 670    return RawLargeInt::cast(*this).copyTo(dst, max_length);
 671  }
 672
 673  DCHECK(isSmallInt() || isBool(), "not an integer");
 674  uword val = isSmallInt() ? RawSmallInt::cast(*this).value()
 675                           : RawBool::cast(*this).value();
 676  word memcpy_length = std::min(word{kWordSize}, max_length);
 677  std::memcpy(dst, &val, memcpy_length);
 678  return memcpy_length;
 679}
 680
 681// RawLargeInt
 682
 683bool RawLargeInt::isValid() const {
 684  word digits = numDigits();
 685  if (digits <= 0) {
 686    return false;
 687  }
 688  if (digits == 1) {
 689    // Enforce a canonical representation for all ints.
 690    return !RawSmallInt::isValid(digitAt(0));
 691  }
 692
 693  word high_digit = digitAt(digits - 1);
 694  word next_digit = digitAt(digits - 2);
 695
 696  // Redundant sign-extension for negative values.
 697  if (high_digit == -1 && next_digit < 0) {
 698    return false;
 699  }
 700
 701  // Redundant zero-extension for positive values.
 702  if (high_digit == 0 && next_digit >= 0) {
 703    return false;
 704  }
 705
 706  return true;
 707}
 708
 709word RawLargeInt::bitLength() const {
 710  word num_digits = numDigits();
 711  word high_digit = digitAt(num_digits - 1);
 712
 713  if (high_digit < 0) {
 714    // We're negative. Calculate what high_digit would be after negation.
 715    uword carry = [&] {
 716      for (word i = 0; i < num_digits - 1; i++) {
 717        if (digitAt(i) != 0) return 0;
 718      }
 719      return 1;
 720    }();
 721    high_digit = ~high_digit + carry;
 722  }
 723  return (num_digits - 1) * kBitsPerWord + Utils::highestBit(high_digit);
 724}
 725
 726word RawLargeInt::copyTo(byte* dst, word copy_length) const {
 727  word length = numDigits() * kWordSize;
 728  auto digits = reinterpret_cast<uword*>(address() + kValueOffset);
 729  word memcpy_size = std::min(length, copy_length);
 730  std::memcpy(dst, digits, memcpy_size);
 731  return memcpy_size;
 732}
 733
 734void RawLargeInt::copyFrom(RawBytes bytes, byte sign_extension) const {
 735  auto dst = reinterpret_cast<byte*>(address() + kValueOffset);
 736  word bytes_len = bytes.length();
 737  DCHECK(bytes_len <= numDigits() * kWordSize, "too many bytes");
 738  bytes.copyTo(dst, bytes_len);
 739  std::memset(dst + bytes_len, sign_extension,
 740              (numDigits() * kWordSize) - bytes_len);
 741}
 742
 743// RawMutableBytes
 744
 745word RawMutableBytes::indexOfAny(View<byte> needle, word start) const {
 746  DCHECK(needle.length() >= 0, "needle length must be non-negative");
 747  if (needle.length() == 0) {
 748    return length();
 749  }
 750  uword bitmap[(kMaxByte + 1) / kBitsPerWord] = {0};
 751  for (word i = 0; i < needle.length(); i++) {
 752    byte ch = needle.get(i);
 753    bitmap[ch / kBitsPerWord] |= uword{1} << (ch % kBitsPerWord);
 754  }
 755  word result;
 756  for (result = start; result < length(); result++) {
 757    byte ch = byteAt(result);
 758    if (bitmap[ch / kBitsPerWord] & (uword{1} << (ch % kBitsPerWord))) {
 759      break;
 760    }
 761  }
 762  return result;
 763}
 764
 765void RawMutableBytes::replaceFromWith(word dst_start, RawDataArray src,
 766                                      word count) const {
 767  DCHECK_BOUND(dst_start + count, length());
 768  byte* dst = reinterpret_cast<byte*>(address());
 769  src.copyTo(dst + dst_start, count);
 770}
 771
 772void RawMutableBytes::replaceFromWithStartAt(word dst_start, RawDataArray src,
 773                                             word count, word src_start) const {
 774  DCHECK_BOUND(dst_start + count, length());
 775  DCHECK_BOUND(src_start + count, src.length());
 776  src.copyToStartAt(reinterpret_cast<byte*>(address() + dst_start), count,
 777                    src_start);
 778}
 779
 780void RawMutableBytes::replaceFromWithBytes(word dst_start, RawBytes src,
 781                                           word count) const {
 782  DCHECK_BOUND(dst_start + count, length());
 783  byte* dst = reinterpret_cast<byte*>(address());
 784  src.copyTo(dst + dst_start, count);
 785}
 786
 787void RawMutableBytes::replaceFromWithByte(word dst_start, byte value,
 788                                          word count) const {
 789  DCHECK_BOUND(dst_start + count, length());
 790  byte* dst = reinterpret_cast<byte*>(address());
 791  std::memset(dst + dst_start, value, count);
 792}
 793
 794void RawMutableBytes::replaceFromWithBytesStartAt(word dst_start, RawBytes src,
 795                                                  word count,
 796                                                  word src_start) const {
 797  DCHECK_BOUND(dst_start + count, length());
 798  DCHECK_BOUND(src_start + count, src.length());
 799  src.copyToStartAt(reinterpret_cast<byte*>(address() + dst_start), count,
 800                    src_start);
 801}
 802
 803void RawMutableBytes::replaceFromWithAll(word dst_start, View<byte> src) const {
 804  DCHECK_BOUND(dst_start + src.length(), length());
 805  byte* dst = reinterpret_cast<byte*>(address());
 806  std::memcpy(dst + dst_start, src.data(), src.length());
 807}
 808
 809void RawMutableBytes::replaceFromWithByteslike(word dst_start,
 810                                               const Byteslike& byteslike,
 811                                               word count) const {
 812  DCHECK_BOUND(count, byteslike.length());
 813  DCHECK_BOUND(dst_start, length() - count);
 814  byte* dst = reinterpret_cast<byte*>(address() + dst_start);
 815  const byte* src = reinterpret_cast<const byte*>(byteslike.address());
 816  std::memcpy(dst, src, count);
 817}
 818
 819void RawMutableBytes::replaceFromWithByteslikeStartAt(
 820    word dst_start, const Byteslike& byteslike, word count,
 821    word src_start) const {
 822  DCHECK(count >= 0, "count must not be negative");
 823  DCHECK_BOUND(src_start, byteslike.length() - count);
 824  DCHECK_BOUND(dst_start, length() - count);
 825  byte* dst = reinterpret_cast<byte*>(address() + dst_start);
 826  const byte* src =
 827      reinterpret_cast<const byte*>(byteslike.address() + src_start);
 828  std::memcpy(dst, src, count);
 829}
 830
 831void RawMutableBytes::replaceFromWithStr(word index, RawStr src,
 832                                         word char_length) const {
 833  DCHECK_BOUND(index + char_length, length());
 834  byte* dst = reinterpret_cast<byte*>(address());
 835  src.copyTo(dst + index, char_length);
 836}
 837
 838void RawMutableBytes::replaceFromWithStrStartAt(word dst_start, RawStr src,
 839                                                word char_length,
 840                                                word src_start_char) const {
 841  DCHECK_BOUND(dst_start + char_length, length());
 842  byte* dst = reinterpret_cast<byte*>(address());
 843  src.copyToStartAt(dst + dst_start, char_length, src_start_char);
 844}
 845
 846RawObject RawMutableBytes::becomeImmutable() const {
 847  word len = length();
 848  if (len <= SmallBytes::kMaxLength) {
 849    return SmallBytes::fromBytes({dataArrayData(*this), len});
 850  }
 851  setHeader(header().withLayoutId(LayoutId::kLargeBytes));
 852  return *this;
 853}
 854
 855RawObject RawMutableBytes::becomeStr() const {
 856  DCHECK(bytesIsValidStr(RawBytes::cast(*this)), "must contain valid utf-8");
 857  word len = length();
 858  if (len <= SmallStr::kMaxLength) {
 859    return SmallStr::fromBytes({dataArrayData(*this), len});
 860  }
 861  setHeader(header().withLayoutId(LayoutId::kLargeStr));
 862  return *this;
 863}
 864
 865// RawMutableTuple
 866
 867void RawMutableTuple::fill(RawObject value) const {
 868  word len = length();
 869  if (value.isNoneType()) {
 870    std::memset(reinterpret_cast<byte*>(address()), -1, len * kWordSize);
 871    return;
 872  }
 873  for (word i = 0; i < len; i++) {
 874    atPut(i, value);
 875  }
 876}
 877
 878void RawMutableTuple::replaceFromWith(word dst_start, RawTuple src,
 879                                      word count) const {
 880  replaceFromWithStartAt(dst_start, src, count, 0);
 881}
 882
 883void RawMutableTuple::replaceFromWithStartAt(word dst_start, RawTuple src,
 884                                             word count, word src_start) const {
 885  // TODO(emacs): This may need to be reverted when we have read/write
 886  // barriers.
 887  std::memmove(reinterpret_cast<uword*>(address()) + dst_start,
 888               reinterpret_cast<uword*>(src.address()) + src_start,
 889               count * kPointerSize);
 890}
 891
 892// RawTuple
 893
 894bool RawTuple::contains(RawObject object) const {
 895  word len = length();
 896  for (word i = 0; i < len; i++) {
 897    if (at(i) == object) {
 898      return true;
 899    }
 900  }
 901  return false;
 902}
 903
 904// RawSlice
 905
 906word RawSlice::length(word start, word stop, word step) {
 907  if (step < 0) {
 908    if (stop < start) {
 909      return (start - stop - 1) / (-step) + 1;
 910    }
 911  } else if (start < stop) {
 912    return (stop - start - 1) / step + 1;
 913  }
 914  return 0;
 915}
 916
 917word RawSlice::adjustIndices(word length, word* start, word* stop, word step) {
 918  DCHECK(step != 0, "Step should be non zero");
 919
 920  if (*start < 0) {
 921    *start += length;
 922    if (*start < 0) {
 923      *start = (step < 0) ? -1 : 0;
 924    }
 925  } else if (*start >= length) {
 926    *start = (step < 0) ? length - 1 : length;
 927  }
 928
 929  if (*stop < 0) {
 930    *stop += length;
 931    if (*stop < 0) {
 932      *stop = (step < 0) ? -1 : 0;
 933    }
 934  } else if (*stop >= length) {
 935    *stop = (step < 0) ? length - 1 : length;
 936  }
 937
 938  return RawSlice::length(*start, *stop, step);
 939}
 940
 941void RawSlice::adjustSearchIndices(word* start, word* end, word length) {
 942  if (*start < 0) {
 943    *start = Utils::maximum(*start + length, word{0});
 944  }
 945  if (*end < 0) {
 946    *end = Utils::maximum(*end + length, word{0});
 947  } else if (*end > length) {
 948    *end = length;
 949  }
 950}
 951
 952// RawStr
 953
 954word RawStr::compareCStr(const char* c_str) const {
 955  word c_length = std::strlen(c_str);
 956  word length = Utils::minimum(this->length(), c_length);
 957  for (word i = 0; i < length; i++) {
 958    word diff = byteAt(i) - static_cast<byte>(c_str[i]);
 959    if (diff != 0) {
 960      return (diff > 0) ? 1 : -1;
 961    }
 962  }
 963  word diff = this->length() - c_length;
 964  return (diff > 0) ? 1 : ((diff < 0) ? -1 : 0);
 965}
 966
 967// RawStrArray
 968
 969int32_t RawStrArray::codePointAt(word index, word* char_length) const {
 970  RawMutableBytes buffer = RawMutableBytes::cast(items());
 971  return decodeCodePoint(dataArrayData(buffer), numItems(), index, char_length);
 972}
 973
 974word RawStrArray::offsetByCodePoints(word index, word count) const {
 975  RawMutableBytes buffer = RawMutableBytes::cast(items());
 976  const byte* data = dataArrayData(buffer);
 977  return offset(data, numItems(), index, count);
 978}
 979
 980void RawStrArray::rotateCodePoint(word first, word last) const {
 981  DCHECK_BOUND(first, last);
 982  DCHECK_INDEX(last, numItems());
 983  if (first == last) {
 984    return;
 985  }
 986
 987  byte code_point[UTF8::kMaxLength];
 988  byte* buffer =
 989      reinterpret_cast<byte*>(RawMutableBytes::cast(items()).address());
 990  word char_length = UTF8::numChars(buffer[last]);
 991  std::memcpy(code_point, &buffer[last], char_length);
 992  std::memmove(&buffer[first + char_length], &buffer[first], last - first);
 993  std::memcpy(&buffer[first], code_point, char_length);
 994}
 995
 996// Linked list helpers
 997
 998static void enqueueReference(RawObject reference, RawObject* tail,
 999                             word reference_link_offset,
1000                             word tail_link_offset) {
1001  DCHECK(Instance::cast(reference)
1002             .instanceVariableAt(reference_link_offset)
1003             .isNoneType(),
1004         "Attempting to enqueue object that's already in queue");
1005  if (*tail == RawNoneType::object()) {
1006    Instance::cast(reference).instanceVariableAtPut(reference_link_offset,
1007                                                    reference);
1008  } else {
1009    RawObject head = Instance::cast(*tail).instanceVariableAt(tail_link_offset);
1010    Instance::cast(*tail).instanceVariableAtPut(tail_link_offset, reference);
1011    Instance::cast(reference).instanceVariableAtPut(reference_link_offset,
1012                                                    head);
1013  }
1014  *tail = reference;
1015}
1016
1017static void dequeueReference(RawObject head, RawObject* tail,
1018                             word head_link_offset, word tail_link_offset) {
1019  if (head == *tail) {
1020    *tail = RawNoneType::object();
1021  } else {
1022    RawObject next = Instance::cast(head).instanceVariableAt(head_link_offset);
1023    Instance::cast(*tail).instanceVariableAtPut(tail_link_offset, next);
1024  }
1025  Instance::cast(head).instanceVariableAtPut(head_link_offset,
1026                                             RawNoneType::object());
1027}
1028
1029// RawWeakRef
1030
1031void RawWeakRef::enqueue(RawObject reference, RawObject* tail) {
1032  enqueueReference(reference, tail, RawWeakRef::kLinkOffset,
1033                   RawWeakRef::kLinkOffset);
1034}
1035
1036RawObject RawWeakRef::dequeue(RawObject* tail) {
1037  RawObject head =
1038      Instance::cast(*tail).instanceVariableAt(RawWeakRef::kLinkOffset);
1039  dequeueReference(head, tail, RawWeakRef::kLinkOffset,
1040                   RawWeakRef::kLinkOffset);
1041  return head;
1042}
1043
1044// Append tail2 to tail1 and return the new tail.
1045RawObject RawWeakRef::spliceQueue(RawObject tail1, RawObject tail2) {
1046  if (tail1 == RawNoneType::object() && tail2 == RawNoneType::object()) {
1047    return RawNoneType::object();
1048  }
1049  if (tail1 == RawNoneType::object()) {
1050    return tail2;
1051  }
1052  if (tail2 == RawNoneType::object()) {
1053    return tail1;
1054  }
1055  // merge two list, tail1 -> head2 -> ... -> tail2 -> head1
1056  RawObject head1 = RawWeakRef::cast(tail1).link();
1057  RawObject head2 = RawWeakRef::cast(tail2).link();
1058  RawWeakRef::cast(tail1).setLink(head2);
1059  RawWeakRef::cast(tail2).setLink(head1);
1060  return tail2;
1061}
1062
1063// RawNativeProxy
1064
1065static word nativeProxyAttributeOffset(const RawObject* object,
1066                                       int offset_from_end) {
1067  DCHECK(offset_from_end < 0, "negative offset is expected");
1068  return RawHeapObject::cast(*object).headerCountOrOverflow() * kPointerSize +
1069         offset_from_end;
1070}
1071
1072RawObject RawNativeProxy::native() const {
1073  return instanceVariableAt(
1074      nativeProxyAttributeOffset(this, kNativeOffsetFromEnd));
1075}
1076
1077void RawNativeProxy::setNative(RawObject native_ptr) const {
1078  instanceVariableAtPut(nativeProxyAttributeOffset(this, kNativeOffsetFromEnd),
1079                        native_ptr);
1080}
1081
1082RawObject RawNativeProxy::dict() const {
1083  return instanceVariableAt(
1084      nativeProxyAttributeOffset(this, kDictOffsetFromEnd));
1085}
1086
1087void RawNativeProxy::setDict(RawObject dict) const {
1088  instanceVariableAtPut(nativeProxyAttributeOffset(this, kDictOffsetFromEnd),
1089                        dict);
1090}
1091
1092RawObject RawNativeProxy::link() const {
1093  return instanceVariableAt(
1094      nativeProxyAttributeOffset(this, kLinkOffsetFromEnd));
1095}
1096
1097void RawNativeProxy::setLink(RawObject reference) const {
1098  instanceVariableAtPut(nativeProxyAttributeOffset(this, kLinkOffsetFromEnd),
1099                        reference);
1100}
1101
1102void RawNativeProxy::enqueue(RawObject reference, RawObject* tail) {
1103  DCHECK(Thread::current()->runtime()->isInstanceOfNativeProxy(reference),
1104         "reference is expected to be native proxy");
1105  DCHECK(*tail == RawNoneType::object() ||
1106             Thread::current()->runtime()->isInstanceOfNativeProxy(*tail),
1107         "tail is expected to be none of native proxy");
1108  int reference_link_offset =
1109      nativeProxyAttributeOffset(&reference, kLinkOffsetFromEnd);
1110  int tail_link_offset = -1;
1111  if (*tail != RawNoneType::object()) {
1112    tail_link_offset = nativeProxyAttributeOffset(tail, kLinkOffsetFromEnd);
1113  }
1114  enqueueReference(reference, tail, reference_link_offset, tail_link_offset);
1115}
1116
1117RawObject RawNativeProxy::dequeue(RawObject* tail) {
1118  DCHECK(Thread::current()->runtime()->isInstanceOfNativeProxy(*tail),
1119         "expected native proxy");
1120  DCHECK(*tail != RawNoneType::object(), "empty queue");
1121  int tail_link_offset = nativeProxyAttributeOffset(tail, kLinkOffsetFromEnd);
1122  RawObject head = Instance::cast(*tail).instanceVariableAt(tail_link_offset);
1123  int head_link_offset = nativeProxyAttributeOffset(&head, kLinkOffsetFromEnd);
1124  dequeueReference(head, tail, head_link_offset, tail_link_offset);
1125  return head;
1126}
1127
1128// RawGeneratorFrame
1129
1130word RawGeneratorFrame::virtualPC() const { return frame()->virtualPC(); }
1131
1132void RawGeneratorFrame::setVirtualPC(word value) const {
1133  return frame()->setVirtualPC(value);
1134}
1135
1136RawObject* RawGeneratorFrame::valueStackTop() const {
1137  return frame()->stashedValueStackTop();
1138}
1139
1140RawObject RawGeneratorFrame::popValue() const {
1141  return frame()->stashedPopValue();
1142}
1143
1144}  // namespace py