runtime/str-builtins.cpp at trunk · bernsteinbear.com/skybison

bernsteinbear.com / skybison
fork atom
this repo has no description
fork atom
skybison / runtime / str-builtins.cpp
at trunk 1876 lines 61 kB view raw
wrap content
bernsteinbear.com Fix uninitialized variable warnings 3y ago
28f597cc
   1// Copyright (c) Facebook, Inc. and its affiliates. (http://www.facebook.com)
   2#include "str-builtins.h"
   3
   4#include <cctype>
   5#include <cwchar>
   6
   7#include "builtins.h"
   8#include "formatter-utils.h"
   9#include "formatter.h"
  10#include "frame.h"
  11#include "globals.h"
  12#include "int-builtins.h"
  13#include "objects.h"
  14#include "runtime.h"
  15#include "set-builtins.h"
  16#include "slice-builtins.h"
  17#include "thread.h"
  18#include "tuple-builtins.h"
  19#include "type-builtins.h"
  20#include "unicode.h"
  21#include "utils.h"
  22
  23namespace py {
  24
  25word adjustedStrIndex(const Str& str, word index) {
  26  word len = str.length();
  27  if (index >= 0) {
  28    return str.offsetByCodePoints(0, index);
  29  }
  30  if (-len < index) {
  31    return Utils::maximum(0l, str.offsetByCodePoints(len, index));
  32  }
  33  return 0;
  34}
  35
  36RawObject dataArraySubstr(Thread* thread, const DataArray& data, word start,
  37                          word length) {
  38  word data_len = data.length();
  39  DCHECK_BOUND(start, data_len);
  40  DCHECK_BOUND(start, data_len - length);
  41  // SmallStr result
  42  if (length <= RawSmallStr::kMaxLength) {
  43    byte buffer[RawSmallStr::kMaxLength];
  44    data.copyToStartAt(buffer, length, start);
  45    return SmallStr::fromBytes(View<byte>(buffer, length));
  46  }
  47  // LargeStr result
  48  HandleScope scope(thread);
  49  MutableBytes result(&scope,
  50                      thread->runtime()->newMutableBytesUninitialized(length));
  51  result.replaceFromWithStartAt(0, *data, length, start);
  52  return result.becomeStr();
  53}
  54
  55RawObject newStrFromWideChar(Thread* thread, const wchar_t* wc_str) {
  56  return newStrFromWideCharWithLength(thread, wc_str, std::wcslen(wc_str));
  57}
  58
  59RawObject newStrFromWideCharWithLength(Thread* thread, const wchar_t* wc_str,
  60                                       word length) {
  61  static_assert(sizeof(*wc_str) * kBitsPerByte == 32,
  62                "only 32bit wchar_t supported.");
  63
  64  for (word i = 0; i < length; ++i) {
  65    if (wc_str[i] < 0 || wc_str[i] > kMaxUnicode) {
  66      return thread->raiseWithFmt(LayoutId::kValueError,
  67                                  "character is not in range");
  68    }
  69  }
  70
  71  return thread->runtime()->newStrFromUTF32(
  72      View<int32_t>(reinterpret_cast<const int32_t*>(wc_str), length));
  73}
  74
  75void strCopyToWCStr(wchar_t* buf, size_t buf_length, const Str& str) {
  76  uword wchar_index = 0;
  77  for (word byte_index = 0, num_bytes = 0, byte_count = str.length();
  78       byte_index < byte_count && wchar_index < buf_length;
  79       byte_index += num_bytes, wchar_index += 1) {
  80    int32_t cp = Str::cast(*str).codePointAt(byte_index, &num_bytes);
  81    static_assert(sizeof(*buf) == sizeof(cp), "Requires 32bit wchar_t");
  82    buf[wchar_index] = static_cast<wchar_t>(cp);
  83  }
  84  buf[wchar_index] = '\0';
  85}
  86
  87static word strCountCharFromTo(const Str& haystack, byte needle, word start,
  88                               word end) {
  89  word result = 0;
  90  if (haystack.isSmallStr()) {
  91    RawSmallStr small = SmallStr::cast(*haystack);
  92    for (word i = start; i < end; i++) {
  93      if (small.byteAt(i) == needle) result++;
  94    }
  95    return result;
  96  }
  97  byte* ptr = reinterpret_cast<byte*>(LargeStr::cast(*haystack).address());
  98  for (word i = start; i < end; i++) {
  99    if (ptr[i] == needle) result++;
 100  }
 101  return result;
 102}
 103
 104RawObject strCount(const Str& haystack, const Str& needle, word start,
 105                   word end) {
 106  if (end < 0 || start < 0) {
 107    // N.B.: If end is negative we may be able to cheaply walk backward. We
 108    // should avoid calling adjustSearchIndices here since the underlying
 109    // container is not O(1) and replace it with something that preserves some
 110    // of the signals that would be useful to lower the cost of the O(n)
 111    // traversal.
 112    // TODO(T41400083): Use a different search algorithm
 113    Slice::adjustSearchIndices(&start, &end, haystack.codePointLength());
 114  }
 115
 116  word start_index = start == 0 ? 0 : haystack.offsetByCodePoints(0, start);
 117  if (start_index == haystack.length() && needle.length() > 0) {
 118    // Haystack is too small; fast early return
 119    return SmallInt::fromWord(0);
 120  }
 121
 122  word end_index = end == kMaxWord
 123                       ? haystack.length()
 124                       : haystack.offsetByCodePoints(start_index, end - start);
 125  if ((end_index - start_index) < needle.length() || start_index > end_index) {
 126    // Haystack is too small; fast early return
 127    return SmallInt::fromWord(0);
 128  }
 129
 130  if (needle.length() == 1) {
 131    return SmallInt::fromWord(strCountCharFromTo(
 132        haystack, SmallStr::cast(*needle).byteAt(0), start_index, end_index));
 133  }
 134
 135  // TODO(T41400083): Use a different search algorithm
 136  return SmallInt::fromWord(strCountSubStrFromTo(haystack, needle, start_index,
 137                                                 end_index, haystack.length()));
 138}
 139
 140word strCountSubStrFromTo(const Str& haystack, const Str& needle, word start,
 141                          word end, word max_count) {
 142  DCHECK(max_count >= 0, "max_count must be non-negative");
 143  word needle_len = needle.length();
 144  word num_match = 0;
 145  // Loop is in byte space, not code point space
 146  for (word i = start; i <= end - needle_len && num_match < max_count;) {
 147    if (strHasPrefix(haystack, needle, i)) {
 148      i += needle_len;
 149      num_match++;
 150      continue;
 151    }
 152    i++;
 153  }
 154  return num_match;
 155}
 156
 157word strCountSubStr(const Str& haystack, const Str& needle, word max_count) {
 158  return strCountSubStrFromTo(haystack, needle, 0, haystack.length(),
 159                              max_count);
 160}
 161
 162RawObject strEncodeASCII(Thread* thread, const Str& str) {
 163  HandleScope scope(thread);
 164  if (!str.isASCII()) {
 165    return Unbound::object();
 166  }
 167  if (str.isSmallStr()) {
 168    return SmallStr::cast(*str).becomeBytes();
 169  }
 170  word str_len = str.length();
 171  MutableBytes bytes(&scope,
 172                     thread->runtime()->newMutableBytesUninitialized(str_len));
 173  bytes.replaceFromWithStr(0, *str, str_len);
 174  return bytes.becomeImmutable();
 175}
 176
 177RawObject strEscapeNonASCII(Thread* thread, const Str& str) {
 178  if (str.isASCII()) return *str;
 179  // Slow implementation using `codecs`.
 180  HandleScope scope(thread);
 181  Runtime* runtime = thread->runtime();
 182  Symbols* symbols = runtime->symbols();
 183  Object ascii(&scope, symbols->at(ID(ascii)));
 184  Object backslashreplace(&scope, symbols->at(ID(backslashreplace)));
 185  Bytes result_bytes(
 186      &scope, thread->invokeMethod3(str, ID(encode), ascii, backslashreplace));
 187  word length = result_bytes.length();
 188  MutableBytes result(&scope, runtime->newMutableBytesUninitialized(length));
 189  result.replaceFromWithBytes(0, *result_bytes, length);
 190  return result.becomeStr();
 191}
 192
 193RawObject strJoinWithTupleOrList(Thread* thread, const Str& sep,
 194                                 const Object& iterable) {
 195  Runtime* runtime = thread->runtime();
 196  HandleScope scope(thread);
 197  Tuple tuple(&scope, runtime->emptyTuple());
 198  word length = 0;
 199  if (iterable.isTuple()) {
 200    tuple = *iterable;
 201    length = tuple.length();
 202  } else if (iterable.isList()) {
 203    tuple = List::cast(*iterable).items();
 204    length = List::cast(*iterable).numItems();
 205  } else {
 206    // Slow path: collect items into list in Python and call again
 207    return Unbound::object();
 208  }
 209  Object elt(&scope, NoneType::object());
 210  for (word i = 0; i < length; i++) {
 211    elt = tuple.at(i);
 212    if (!runtime->isInstanceOfStr(*elt)) {
 213      return thread->raiseWithFmt(
 214          LayoutId::kTypeError,
 215          "sequence item %w: expected str instance, %T found", i, &elt);
 216    }
 217  }
 218  return runtime->strJoin(thread, sep, tuple, length);
 219}
 220
 221word strSpan(const Str& src, const Str& str) {
 222  word length = src.length();
 223  word str_length = str.length();
 224  word first = 0;
 225  for (; first < length; first++) {
 226    bool has_match = false;
 227    byte ch = src.byteAt(first);
 228    for (word j = 0; j < str_length; j++) {
 229      if (ch == str.byteAt(j)) {
 230        has_match = true;
 231        break;
 232      }
 233    }
 234    if (!has_match) {
 235      break;
 236    }
 237  }
 238  return first;
 239}
 240
 241RawObject strSubstr(Thread* thread, const Str& str, word start, word length) {
 242  word str_len = str.length();
 243  DCHECK_BOUND(start, str_len);
 244  DCHECK_BOUND(length, str_len - start);
 245  if (length == str_len) {
 246    return *str;
 247  }
 248  // SmallStr result
 249  if (length <= RawSmallStr::kMaxLength) {
 250    byte buffer[RawSmallStr::kMaxLength];
 251    str.copyToStartAt(buffer, length, start);
 252    return SmallStr::fromBytes(View<byte>(buffer, length));
 253  }
 254  // LargeStr result
 255  HandleScope scope(thread);
 256  MutableBytes result(&scope,
 257                      thread->runtime()->newMutableBytesUninitialized(length));
 258  result.replaceFromWithStartAt(0, LargeStr::cast(*str), length, start);
 259  return result.becomeStr();
 260}
 261
 262word strRSpan(const Str& src, const Str& str, word rend) {
 263  DCHECK(rend >= 0, "string index underflow");
 264  word length = src.length();
 265  word str_length = str.length();
 266  word result = 0;
 267  for (word i = length - 1; i >= rend; i--, result++) {
 268    byte ch = src.byteAt(i);
 269    bool has_match = false;
 270    for (word j = 0; j < str_length; j++) {
 271      if (ch == str.byteAt(j)) {
 272        has_match = true;
 273        break;
 274      }
 275    }
 276    if (!has_match) {
 277      break;
 278    }
 279  }
 280  return result;
 281}
 282
 283static bool isLineBreak(int32_t c) {
 284  switch (c) {
 285    // Common cases
 286    case '\n':
 287    case '\r':
 288    // Less common cases
 289    case '\f':
 290    case '\v':
 291    case 0x1c:
 292    case 0x1d:
 293    case 0x1e:
 294    case 0x85:
 295    case 0x2028:
 296    case 0x2029:
 297      return true;
 298    default:
 299      return false;
 300  }
 301}
 302
 303RawObject strSplit(Thread* thread, const Str& str, const Str& sep,
 304                   word maxsplit) {
 305  Runtime* runtime = thread->runtime();
 306  HandleScope scope(thread);
 307  word num_splits = strCountSubStr(str, sep, maxsplit);
 308  word result_len = num_splits + 1;
 309  MutableTuple result_items(&scope, runtime->newMutableTuple(result_len));
 310  word last_idx = 0;
 311  word sep_len = sep.length();
 312  for (word i = 0, result_idx = 0; result_idx < num_splits;) {
 313    if (strHasPrefix(str, sep, i)) {
 314      result_items.atPut(result_idx++,
 315                         strSubstr(thread, str, last_idx, i - last_idx));
 316      i += sep_len;
 317      last_idx = i;
 318    } else {
 319      i = str.offsetByCodePoints(i, 1);
 320    }
 321  }
 322  result_items.atPut(num_splits,
 323                     strSubstr(thread, str, last_idx, str.length() - last_idx));
 324  List result(&scope, runtime->newList());
 325  result.setItems(*result_items);
 326  result.setNumItems(result_len);
 327  return *result;
 328}
 329
 330RawObject strSplitlines(Thread* thread, const Str& str, bool keepends) {
 331  HandleScope scope(thread);
 332  Runtime* runtime = thread->runtime();
 333  List result(&scope, runtime->newList());
 334  // Looping over code points, not bytes, but i is a byte offset
 335  for (word i = 0, j = 0; i < str.length(); j = i) {
 336    // Skip newline chars
 337    word num_bytes;
 338    while (i < str.length() && !isLineBreak(str.codePointAt(i, &num_bytes))) {
 339      i += num_bytes;
 340    }
 341
 342    word eol_pos = i;
 343    if (i < str.length()) {
 344      int32_t cp = str.codePointAt(i, &num_bytes);
 345      word next = i + num_bytes;
 346      word next_num_bytes;
 347      // Check for \r\n specifically
 348      if (cp == '\r' && next < str.length() &&
 349          str.codePointAt(next, &next_num_bytes) == '\n') {
 350        i += next_num_bytes;
 351      }
 352      i += num_bytes;
 353      if (keepends) {
 354        eol_pos = i;
 355      }
 356    }
 357
 358    // If there are no newlines, the str returned should be identity-equal
 359    if (j == 0 && eol_pos == str.length() && str.isStr()) {
 360      runtime->listAdd(thread, result, str);
 361      return *result;
 362    }
 363
 364    Str substr(&scope, strSubstr(thread, str, j, eol_pos - j));
 365    runtime->listAdd(thread, result, substr);
 366  }
 367
 368  return *result;
 369}
 370
 371RawObject strStripSpace(Thread* thread, const Str& src) {
 372  word length = src.length();
 373  if (length == 0) {
 374    return *src;
 375  }
 376  if (length == 1 && ASCII::isSpace(src.byteAt(0))) {
 377    return Str::empty();
 378  }
 379  word first = 0;
 380  while (first < length) {
 381    word num_bytes;
 382    int32_t ch = src.codePointAt(first, &num_bytes);
 383    if (!Unicode::isSpace(ch)) {
 384      break;
 385    }
 386    first += num_bytes;
 387  }
 388  word last = length;
 389  while (last > first) {
 390    last = src.offsetByCodePoints(last, -1);
 391    word num_bytes;
 392    int32_t ch = src.codePointAt(last, &num_bytes);
 393    if (!Unicode::isSpace(ch)) {
 394      last += num_bytes;
 395      break;
 396    }
 397  }
 398  return strSubstr(thread, src, first, last - first);
 399}
 400
 401RawObject strStripSpaceLeft(Thread* thread, const Str& src) {
 402  word length = src.length();
 403  if (length == 0) {
 404    return *src;
 405  }
 406  if (length == 1 && ASCII::isSpace(src.byteAt(0))) {
 407    return Str::empty();
 408  }
 409  word first = 0;
 410  while (first < length) {
 411    word num_bytes;
 412    int32_t ch = src.codePointAt(first, &num_bytes);
 413    if (!Unicode::isSpace(ch)) {
 414      break;
 415    }
 416    first += num_bytes;
 417  }
 418  return strSubstr(thread, src, first, length - first);
 419}
 420
 421RawObject strStripSpaceRight(Thread* thread, const Str& src) {
 422  word length = src.length();
 423  if (length == 0) {
 424    return *src;
 425  }
 426  if (length == 1 && ASCII::isSpace(src.byteAt(0))) {
 427    return Str::empty();
 428  }
 429  word last = length;
 430  while (last > 0) {
 431    last = src.offsetByCodePoints(last, -1);
 432    word num_bytes;
 433    int32_t ch = src.codePointAt(last, &num_bytes);
 434    if (!Unicode::isSpace(ch)) {
 435      last += num_bytes;
 436      break;
 437    }
 438  }
 439  return strSubstr(thread, src, 0, last);
 440}
 441
 442RawObject strStrip(Thread* thread, const Str& src, const Str& str) {
 443  word length = src.length();
 444  if (length == 0 || str.length() == 0) {
 445    return *src;
 446  }
 447  word first = strSpan(src, str);
 448  word last = strRSpan(src, str, first);
 449  return strSubstr(thread, src, first, length - first - last);
 450}
 451
 452RawObject strStripLeft(Thread* thread, const Str& src, const Str& str) {
 453  word length = src.length();
 454  if (length == 0 || str.length() == 0) {
 455    return *src;
 456  }
 457  word first = strSpan(src, str);
 458  return strSubstr(thread, src, first, length - first);
 459}
 460
 461RawObject strStripRight(Thread* thread, const Str& src, const Str& str) {
 462  word length = src.length();
 463  if (length == 0 || str.length() == 0) {
 464    return *src;
 465  }
 466  word first = 0;
 467  word last = strRSpan(src, str, first);
 468  return strSubstr(thread, src, 0, length - last);
 469}
 470
 471RawObject strTranslateASCII(Thread* thread, const Str& src, const Str& table) {
 472  if (table.length() > kMaxASCII || !table.isASCII()) {
 473    return Unbound::object();
 474  }
 475  word src_len = src.length();
 476  word table_len = table.length();
 477  HandleScope scope(thread);
 478  MutableBytes result(&scope,
 479                      thread->runtime()->newMutableBytesUninitialized(src_len));
 480  // Since all non-ASCII bytes in UTF-8 have a 1 in front, we can iterate by
 481  // bytes instead of codepoints
 482  for (word i = 0; i < src.length(); i++) {
 483    byte to_translate = src.byteAt(i);
 484    if (to_translate > table_len) {
 485      result.byteAtPut(i, to_translate);
 486    } else {
 487      result.byteAtPut(i, table.byteAt(to_translate));
 488    }
 489  }
 490  return result.becomeStr();
 491}
 492
 493RawObject strIteratorNext(Thread* thread, const StrIterator& iter) {
 494  HandleScope scope(thread);
 495  word byte_offset = iter.index();
 496  Str underlying(&scope, iter.iterable());
 497  if (byte_offset >= underlying.length()) {
 498    return Error::noMoreItems();
 499  }
 500  word num_bytes = 0;
 501  word code_point = underlying.codePointAt(byte_offset, &num_bytes);
 502  iter.setIndex(byte_offset + num_bytes);
 503  return RawSmallStr::fromCodePoint(code_point);
 504}
 505
 506static const BuiltinAttribute kUserStrBaseAttributes[] = {
 507    {ID(_UserStr__value), RawUserStrBase::kValueOffset,
 508     AttributeFlags::kHidden},
 509};
 510
 511static const BuiltinAttribute kStrIteratorAttributes[] = {
 512    {ID(_str_iterator__iterable), RawStrIterator::kIterableOffset,
 513     AttributeFlags::kHidden},
 514    {ID(_str_iterator__index), RawStrIterator::kIndexOffset,
 515     AttributeFlags::kHidden},
 516};
 517
 518void initializeStrTypes(Thread* thread) {
 519  HandleScope scope(thread);
 520  Runtime* runtime = thread->runtime();
 521
 522  Type str(&scope, addBuiltinType(thread, ID(str), LayoutId::kStr,
 523                                  /*superclass_id=*/LayoutId::kObject,
 524                                  kUserStrBaseAttributes, UserStrBase::kSize,
 525                                  /*basetype=*/true));
 526
 527  {
 528    Type type(&scope,
 529              addImmediateBuiltinType(thread, ID(largestr), LayoutId::kLargeStr,
 530                                      /*builtin_base=*/LayoutId::kStr,
 531                                      /*superclass_id=*/LayoutId::kObject,
 532                                      /*basetype=*/false));
 533    Layout::cast(type.instanceLayout()).setDescribedType(*str);
 534    runtime->setLargeStrType(type);
 535  }
 536
 537  {
 538    Type type(&scope,
 539              addImmediateBuiltinType(thread, ID(smallstr), LayoutId::kSmallStr,
 540                                      /*builtin_base=*/LayoutId::kStr,
 541                                      /*superclass_id=*/LayoutId::kObject,
 542                                      /*basetype=*/false));
 543    Layout::cast(type.instanceLayout()).setDescribedType(*str);
 544    runtime->setSmallStrType(type);
 545  }
 546
 547  addBuiltinType(thread, ID(str_iterator), LayoutId::kStrIterator,
 548                 /*superclass_id=*/LayoutId::kObject, kStrIteratorAttributes,
 549                 StrIterator::kSize, /*basetype=*/false);
 550}
 551
 552RawObject METH(str, __add__)(Thread* thread, Arguments args) {
 553  Runtime* runtime = thread->runtime();
 554  HandleScope scope(thread);
 555  Object self_obj(&scope, args.get(0));
 556  Object other_obj(&scope, args.get(1));
 557  if (!runtime->isInstanceOfStr(*self_obj)) {
 558    return thread->raiseRequiresType(self_obj, ID(str));
 559  }
 560  if (!runtime->isInstanceOfStr(*other_obj)) {
 561    return NotImplementedType::object();
 562  }
 563  Str self(&scope, strUnderlying(*self_obj));
 564  Str other(&scope, strUnderlying(*other_obj));
 565  return runtime->strConcat(thread, self, other);
 566}
 567
 568RawObject METH(str, __bool__)(Thread* thread, Arguments args) {
 569  HandleScope scope(thread);
 570  Object self_obj(&scope, args.get(0));
 571  if (!thread->runtime()->isInstanceOfStr(*self_obj)) {
 572    return thread->raiseRequiresType(self_obj, ID(str));
 573  }
 574  Str self(&scope, strUnderlying(*self_obj));
 575  return Bool::fromBool(*self != Str::empty());
 576}
 577
 578RawObject METH(str, __contains__)(Thread* thread, Arguments args) {
 579  RawObject self = args.get(0);
 580  RawObject other = args.get(1);
 581  Runtime* runtime = thread->runtime();
 582  bool is_self_invalid = true;
 583  if (runtime->isInstanceOfStr(self)) {
 584    is_self_invalid = false;
 585    if (runtime->isInstanceOfStr(other)) {
 586      return Bool::fromBool(strUnderlying(self).includes(strUnderlying(other)));
 587    }
 588  }
 589  HandleScope scope(thread);
 590  Object invalid(&scope, is_self_invalid ? self : other);
 591  return thread->raiseRequiresType(invalid, ID(str));
 592}
 593
 594RawObject METH(str, __eq__)(Thread* thread, Arguments args) {
 595  HandleScope scope(thread);
 596  Object self_obj(&scope, args.get(0));
 597  Object other_obj(&scope, args.get(1));
 598  if (!thread->runtime()->isInstanceOfStr(*self_obj)) {
 599    return thread->raiseRequiresType(self_obj, ID(str));
 600  }
 601  if (!thread->runtime()->isInstanceOfStr(*other_obj)) {
 602    return NotImplementedType::object();
 603  }
 604  Str self(&scope, strUnderlying(*self_obj));
 605  Str other(&scope, strUnderlying(*other_obj));
 606  return Bool::fromBool(self.equals(*other));
 607}
 608
 609RawObject METH(str, __format__)(Thread* thread, Arguments args) {
 610  HandleScope scope(thread);
 611  Object self_obj(&scope, args.get(0));
 612  Runtime* runtime = thread->runtime();
 613  if (!runtime->isInstanceOfStr(*self_obj)) {
 614    return thread->raiseRequiresType(self_obj, ID(str));
 615  }
 616  Str self(&scope, strUnderlying(*self_obj));
 617
 618  Object spec_obj(&scope, args.get(1));
 619  if (!runtime->isInstanceOfStr(*spec_obj)) {
 620    return thread->raiseWithFmt(LayoutId::kTypeError,
 621                                "__format__() argument 1 must be str, not %T",
 622                                &spec_obj);
 623  }
 624  Str spec(&scope, strUnderlying(*spec_obj));
 625
 626  if (spec.length() == 0) {
 627    return *self;
 628  }
 629
 630  FormatSpec format;
 631  Object possible_error(&scope,
 632                        parseFormatSpec(thread, spec,
 633                                        /*default_type=*/'s',
 634                                        /*default_align=*/'<', &format));
 635  if (!possible_error.isNoneType()) {
 636    DCHECK(possible_error.isErrorException(), "expected exception");
 637    return *possible_error;
 638  }
 639  if (format.type != 's') {
 640    return raiseUnknownFormatError(thread, format.type, self_obj);
 641  }
 642  if (format.positive_sign != '\0') {
 643    return thread->raiseWithFmt(LayoutId::kValueError,
 644                                "Sign not allowed in string format specifier");
 645  }
 646  if (format.alternate) {
 647    return thread->raiseWithFmt(
 648        LayoutId::kValueError,
 649        "Alternate form (#) not allowed in string format specifier");
 650  }
 651  if (format.alignment == '=') {
 652    return thread->raiseWithFmt(
 653        LayoutId::kValueError,
 654        "'=' alignment not allowed in string format specifier");
 655  }
 656
 657  return formatStr(thread, self, &format);
 658}
 659
 660RawObject METH(str, __ge__)(Thread* thread, Arguments args) {
 661  HandleScope scope(thread);
 662  Object self_obj(&scope, args.get(0));
 663  Object other_obj(&scope, args.get(1));
 664  if (!thread->runtime()->isInstanceOfStr(*self_obj)) {
 665    return thread->raiseRequiresType(self_obj, ID(str));
 666  }
 667  if (!thread->runtime()->isInstanceOfStr(*other_obj)) {
 668    return NotImplementedType::object();
 669  }
 670  Str self(&scope, strUnderlying(*self_obj));
 671  Str other(&scope, strUnderlying(*other_obj));
 672  return Bool::fromBool(self.compare(*other) >= 0);
 673}
 674
 675bool METH(str, __getitem___intrinsic)(Thread* thread) {
 676  RawObject arg0 = thread->stackPeek(1);
 677  if (!arg0.isStr()) {
 678    return false;
 679  }
 680  RawObject arg1 = thread->stackPeek(0);
 681  word idx;
 682  if (arg1.isSmallInt()) {
 683    idx = RawSmallInt::cast(arg1).value();
 684  } else if (arg1.isBool()) {
 685    idx = RawBool::cast(arg1).value();
 686  } else {
 687    word start, stop;
 688    if (!tryUnpackSlice(arg1, &start, &stop)) {
 689      return false;
 690    }
 691    {
 692      // Manually adjust slice bounds to avoid an extra call to codePointLength
 693      HandleScope scope(thread);
 694      Str self(&scope, arg0);
 695      word start_index = adjustedStrIndex(self, start);
 696      word stop_index = adjustedStrIndex(self, stop);
 697      word length = stop_index - start_index;
 698
 699      thread->stackDrop(2);
 700      thread->stackSetTop(length <= 0
 701                              ? Str::empty()
 702                              : strSubstr(thread, self, start_index, length));
 703    }
 704    return true;
 705  }
 706  RawStr self = Str::cast(arg0);
 707  word len = self.length();
 708  if (0 <= idx && idx < len) {
 709    word offset = self.offsetByCodePoints(0, idx);
 710    if (offset < len) {
 711      word ignored;
 712      thread->stackDrop(2);
 713      thread->stackSetTop(
 714          RawSmallStr::fromCodePoint(self.codePointAt(offset, &ignored)));
 715      return true;
 716    }
 717  }
 718  if (0 > idx) {
 719    word offset = self.offsetByCodePoints(len, idx);
 720    if (offset < len && offset != -1) {
 721      word ignored;
 722      thread->stackDrop(2);
 723      thread->stackSetTop(
 724          RawSmallStr::fromCodePoint(self.codePointAt(offset, &ignored)));
 725      return true;
 726    }
 727  }
 728  return false;
 729}
 730
 731RawObject METH(str, __gt__)(Thread* thread, Arguments args) {
 732  HandleScope scope(thread);
 733  Object self_obj(&scope, args.get(0));
 734  Object other_obj(&scope, args.get(1));
 735  if (!thread->runtime()->isInstanceOfStr(*self_obj)) {
 736    return thread->raiseRequiresType(self_obj, ID(str));
 737  }
 738  if (!thread->runtime()->isInstanceOfStr(*other_obj)) {
 739    return NotImplementedType::object();
 740  }
 741  Str self(&scope, strUnderlying(*self_obj));
 742  Str other(&scope, strUnderlying(*other_obj));
 743  return Bool::fromBool(self.compare(*other) > 0);
 744}
 745
 746RawObject METH(str, __hash__)(Thread* thread, Arguments args) {
 747  HandleScope scope(thread);
 748  Object self(&scope, args.get(0));
 749  Runtime* runtime = thread->runtime();
 750  if (!runtime->isInstanceOfStr(*self)) {
 751    return thread->raiseRequiresType(self, ID(str));
 752  }
 753  Str self_str(&scope, strUnderlying(*self));
 754  return SmallInt::fromWord(strHash(thread, *self_str));
 755}
 756
 757void strInternInTuple(Thread* thread, const Object& items) {
 758  HandleScope scope(thread);
 759  Runtime* runtime = thread->runtime();
 760  DCHECK(runtime->isInstanceOfTuple(*items), "items must be a tuple instance");
 761  Tuple tuple(&scope, tupleUnderlying(*items));
 762  Str str(&scope, Str::empty());
 763  Object result(&scope, NoneType::object());
 764  for (word i = 0; i < tuple.length(); i++) {
 765    str = tuple.at(i);
 766    result = Runtime::internStr(thread, str);
 767    if (result.isError()) continue;
 768    if (result != str) {
 769      tuple.atPut(i, *result);
 770    }
 771  }
 772}
 773
 774static bool allNameChars(const Str& str) {
 775  for (word i = 0; i < str.length(); i++) {
 776    byte c = str.byteAt(i);
 777    if (!ASCII::isAlnum(c) && c != '_') {
 778      return false;
 779    }
 780  }
 781  return true;
 782}
 783
 784static bool strInternConstant(Thread* thread, Object* object) {
 785  if (object->isStr()) {
 786    HandleScope scope(thread);
 787    Str str(&scope, **object);
 788    if (allNameChars(str)) {
 789      str = Runtime::internStr(thread, str);
 790      if (str != *object) {
 791        *object = *str;
 792        return true;
 793      }
 794    }
 795    return false;
 796  }
 797  if (object->isTuple()) {
 798    HandleScope scope(thread);
 799    Tuple tuple(&scope, **object);
 800    Object item(&scope, NoneType::object());
 801    bool modified = false;
 802    for (word i = 0, length = tuple.length(); i < length; i++) {
 803      item = tuple.at(i);
 804      if (strInternConstant(thread, &item)) {
 805        modified = true;
 806        tuple.atPut(i, *item);
 807      }
 808    }
 809    if (!modified) return false;
 810    *object = *tuple;
 811    return true;
 812  }
 813  if (object->isFrozenSet()) {
 814    HandleScope scope(thread);
 815    FrozenSet set(&scope, **object);
 816    FrozenSet new_set(&scope, thread->runtime()->newFrozenSet());
 817    Object value(&scope, NoneType::object());
 818    Object hash(&scope, NoneType::object());
 819    bool modified = false;
 820    for (word idx = 0; setNextItem(set, &idx, &value);) {
 821      if (strInternConstant(thread, &value)) {
 822        modified = true;
 823      }
 824      hash = Interpreter::hash(thread, value);
 825      if (hash.isErrorException()) return false;
 826      setAdd(thread, new_set, value, SmallInt::cast(*hash).value());
 827    }
 828    if (!modified) return false;
 829    *object = *new_set;
 830    return true;
 831  }
 832  return false;
 833}
 834
 835bool strInternConstants(Thread* thread, const Object& items) {
 836  HandleScope scope(thread);
 837  DCHECK(thread->runtime()->isInstanceOfTuple(*items),
 838         "items must be a tuple instance");
 839  Object tuple(&scope, tupleUnderlying(*items));
 840  return strInternConstant(thread, &tuple);
 841}
 842
 843word strFind(const Str& haystack, const Str& needle) {
 844  word haystack_len = haystack.length();
 845  word needle_len = needle.length();
 846  if (needle_len > haystack_len) {
 847    return -1;
 848  }
 849  if (needle_len == 0) {
 850    return 0;
 851  }
 852  if (needle_len == 1 && haystack.isASCII()) {
 853    return strFindAsciiChar(haystack, needle.byteAt(0));
 854  }
 855  // Loop is in byte space, not code point space
 856  word result = 0;
 857  // TODO(T41400083): Use a different search algorithm
 858  for (word i = 0; i <= haystack_len - needle_len; result++) {
 859    if (strHasPrefix(haystack, needle, i)) {
 860      return result;
 861    }
 862    i = haystack.offsetByCodePoints(i, 1);
 863  }
 864  return -1;
 865}
 866
 867word strFindWithRange(const Str& haystack, const Str& needle, word start,
 868                      word end) {
 869  if (end < 0 || start < 0) {
 870    Slice::adjustSearchIndices(&start, &end, haystack.codePointLength());
 871  }
 872
 873  word start_index = haystack.offsetByCodePoints(0, start);
 874  if (start_index == haystack.length() && needle.length() > 0) {
 875    // Haystack is too small; fast early return
 876    return -1;
 877  }
 878  word end_index = haystack.offsetByCodePoints(start_index, end - start);
 879
 880  if ((end_index - start_index) < needle.length() || start_index > end_index) {
 881    // Haystack is too small; fast early return
 882    return -1;
 883  }
 884
 885  // Loop is in byte space, not code point space
 886  word result = start;
 887  // TODO(T41400083): Use a different search algorithm
 888  for (word i = start_index; i <= end_index - needle.length(); result++) {
 889    bool has_match = strHasPrefix(haystack, needle, i);
 890    word next = haystack.offsetByCodePoints(i, 1);
 891    if (i == next) {
 892      // We've reached a fixpoint; offsetByCodePoints will not advance past the
 893      // length of the string.
 894      if (start_index >= i) {
 895        // The start is greater than the length of the string.
 896        return -1;
 897      }
 898      // If the start is within bounds, just return the last found index.
 899      break;
 900    }
 901    if (has_match) {
 902      return result;
 903    }
 904    i = next;
 905  }
 906  return -1;
 907}
 908
 909word strFindAsciiChar(const Str& haystack, byte needle) {
 910  DCHECK(needle <= kMaxASCII, "must only be called for ASCII `needle`");
 911  for (word i = 0, length = haystack.length(); i < length; i++) {
 912    if (haystack.byteAt(i) == needle) {
 913      return i;
 914    }
 915  }
 916  return -1;
 917}
 918
 919word strFindFirstNonWhitespace(const Str& str) {
 920  word i = 0;
 921  for (word codepoint_len, length = str.length(); i < length;
 922       i += codepoint_len) {
 923    if (!Unicode::isSpace(str.codePointAt(i, &codepoint_len))) return i;
 924  }
 925  return i;
 926}
 927
 928bool strHasPrefix(const Str& str, const Str& prefix, word start) {
 929  word str_len = str.length();
 930  word prefix_len = prefix.length();
 931  if (str_len - start < prefix_len) {
 932    return false;
 933  }
 934  for (word i = 0; i < prefix_len; i++) {
 935    if (str.byteAt(start + i) != prefix.byteAt(i)) {
 936      return false;
 937    }
 938  }
 939  return true;
 940}
 941
 942bool strHasSurrogate(const Str& str) {
 943  return str.includesByte(UTF8::kSurrogateLeadByte);
 944}
 945
 946word strRFind(const Str& haystack, const Str& needle, word start, word end) {
 947  // Haystack slice is empty; fast early return
 948  if (start > end) return -1;
 949  // Needle is empty
 950  if (needle == Str::empty()) return end;
 951  word start_index = haystack.offsetByCodePoints(0, start);
 952  if (start_index == haystack.length()) {
 953    // Haystack is too small; fast early return
 954    return -1;
 955  }
 956  word end_index = haystack.offsetByCodePoints(start_index, end - start);
 957  if ((end_index - start_index) < needle.length() || start_index > end_index) {
 958    // Haystack is too small; fast early return
 959    return -1;
 960  }
 961  // Loop is in byte space, not code point space
 962  // Invariant: cp_offset and byte_offset describe the same offset into the
 963  // string, but one is in code point space and the other is in byte space
 964  // TODO(T41400083): Use a different search algorithm
 965  for (word cp_offset = end - 1,
 966            byte_offset = haystack.offsetByCodePoints(end_index, -1);
 967       byte_offset >= 0; cp_offset--,
 968            byte_offset = haystack.offsetByCodePoints(byte_offset, -1)) {
 969    if (strHasPrefix(haystack, needle, byte_offset)) {
 970      return cp_offset;
 971    }
 972  }
 973  return -1;
 974}
 975
 976word strRFindAsciiChar(const Str& haystack, byte needle) {
 977  DCHECK(needle <= kMaxASCII, "must only be called for ASCII `needle`");
 978  for (word i = haystack.length() - 1; i >= 0; i--) {
 979    if (haystack.byteAt(i) == needle) {
 980      return i;
 981    }
 982  }
 983  return -1;
 984}
 985
 986RawObject METH(str, __le__)(Thread* thread, Arguments args) {
 987  HandleScope scope(thread);
 988  Object self_obj(&scope, args.get(0));
 989  Object other_obj(&scope, args.get(1));
 990  if (!thread->runtime()->isInstanceOfStr(*self_obj)) {
 991    return thread->raiseRequiresType(self_obj, ID(str));
 992  }
 993  if (!thread->runtime()->isInstanceOfStr(*other_obj)) {
 994    return NotImplementedType::object();
 995  }
 996  Str self(&scope, strUnderlying(*self_obj));
 997  Str other(&scope, strUnderlying(*other_obj));
 998  return Bool::fromBool(self.compare(*other) <= 0);
 999}
1000
1001RawObject METH(str, __len__)(Thread* thread, Arguments args) {
1002  HandleScope scope(thread);
1003  Object self_obj(&scope, args.get(0));
1004  if (!thread->runtime()->isInstanceOfStr(*self_obj)) {
1005    return thread->raiseRequiresType(self_obj, ID(str));
1006  }
1007  Str self(&scope, strUnderlying(*self_obj));
1008  return SmallInt::fromWord(self.codePointLength());
1009}
1010
1011static RawObject strLowerASCII(Thread* thread, Object& str_obj, Str& str,
1012                               word length) {
1013  if (str.isSmallStr()) {
1014    byte buf[SmallStr::kMaxLength];
1015    for (word i = 0; i < length; i++) {
1016      buf[i] = ASCII::toLower(str.byteAt(i));
1017    }
1018    return SmallStr::fromBytes({buf, length});
1019  }
1020  // Search for the first uppercase character.
1021  word first_uppercase = 0;
1022  for (; first_uppercase < length; first_uppercase++) {
1023    if (ASCII::isUpper(str.byteAt(first_uppercase))) {
1024      break;
1025    }
1026  }
1027  if (first_uppercase >= length && str_obj.isStr()) {
1028    return *str_obj;
1029  }
1030
1031  HandleScope scope(thread);
1032  Runtime* runtime = thread->runtime();
1033  MutableBytes result(&scope, runtime->newMutableBytesUninitialized(length));
1034  result.replaceFromWithStr(0, *str, first_uppercase);
1035  for (word i = first_uppercase; i < length; i++) {
1036    byte lower = ASCII::toLower(str.byteAt(i));
1037    result.byteAtPut(i, lower);
1038  }
1039  return result.becomeStr();
1040}
1041
1042RawObject METH(str, casefold)(Thread* thread, Arguments args) {
1043  HandleScope scope(thread);
1044  Object self_obj(&scope, args.get(0));
1045  Runtime* runtime = thread->runtime();
1046  if (!runtime->isInstanceOfStr(*self_obj)) {
1047    return thread->raiseRequiresType(self_obj, ID(str));
1048  }
1049  Str self(&scope, strUnderlying(*self_obj));
1050  word length = self.length();
1051  if (self.isASCII()) {
1052    return strLowerASCII(thread, self_obj, self, length);
1053  }
1054
1055  // Search for the first uppercase character.
1056  word first_uppercase = 0;
1057  for (word char_length; first_uppercase < length;
1058       first_uppercase += char_length) {
1059    int32_t code_point = self.codePointAt(first_uppercase, &char_length);
1060    if (Unicode::isUpper(code_point) || Unicode::isTitle(code_point) ||
1061        Unicode::isUnfolded(code_point)) {
1062      break;
1063    }
1064  }
1065  if (first_uppercase >= length && self_obj.isStr()) {
1066    return *self_obj;
1067  }
1068
1069  StrArray result(&scope, runtime->newStrArray());
1070  runtime->strArrayEnsureCapacity(thread, result, length);
1071  // Since the prefix is valid UTF-8 and guaranteed to fit, it's safe to write
1072  // directly to the underlying MutableBytes.
1073  MutableBytes result_bytes(&scope, result.items());
1074  result_bytes.replaceFromWithStr(0, *self, first_uppercase);
1075  result.setNumItems(first_uppercase);
1076  for (word char_length, i = first_uppercase; i < length; i += char_length) {
1077    struct FullCasing casefold =
1078        Unicode::toFolded(self.codePointAt(i, &char_length));
1079    for (word j = 0; j < 3; j++) {
1080      int32_t decoded = casefold.code_points[j];
1081      if (decoded == -1) break;
1082      runtime->strArrayAddCodePoint(thread, result, decoded);
1083    }
1084  }
1085  return runtime->strFromStrArray(result);
1086}
1087
1088RawObject METH(str, lower)(Thread* thread, Arguments args) {
1089  HandleScope scope(thread);
1090  Object self_obj(&scope, args.get(0));
1091  Runtime* runtime = thread->runtime();
1092  if (!runtime->isInstanceOfStr(*self_obj)) {
1093    return thread->raiseRequiresType(self_obj, ID(str));
1094  }
1095  Str self(&scope, strUnderlying(*self_obj));
1096  word length = self.length();
1097  if (self.isASCII()) {
1098    return strLowerASCII(thread, self_obj, self, length);
1099  }
1100
1101  // Search for the first uppercase character.
1102  word first_uppercase = 0;
1103  for (word char_length; first_uppercase < length;
1104       first_uppercase += char_length) {
1105    int32_t code_point = self.codePointAt(first_uppercase, &char_length);
1106    if (Unicode::isUpper(code_point) || Unicode::isTitle(code_point)) {
1107      break;
1108    }
1109  }
1110  if (first_uppercase >= length && self_obj.isStr()) {
1111    return *self_obj;
1112  }
1113
1114  StrArray result(&scope, runtime->newStrArray());
1115  runtime->strArrayEnsureCapacity(thread, result, length);
1116  // Since the prefix is valid UTF-8 and guaranteed to fit, it's safe to write
1117  // directly to the underlying MutableBytes.
1118  MutableBytes result_bytes(&scope, result.items());
1119  result_bytes.replaceFromWithStr(0, *self, first_uppercase);
1120  result.setNumItems(first_uppercase);
1121  for (word char_length, i = first_uppercase; i < length; i += char_length) {
1122    struct FullCasing lower =
1123        Unicode::toLower(self.codePointAt(i, &char_length));
1124    for (word j = 0; j < 3; j++) {
1125      int32_t decoded = lower.code_points[j];
1126      if (decoded == -1) break;
1127      runtime->strArrayAddCodePoint(thread, result, decoded);
1128    }
1129  }
1130  return runtime->strFromStrArray(result);
1131}
1132
1133RawObject METH(str, title)(Thread* thread, Arguments args) {
1134  Runtime* runtime = thread->runtime();
1135  HandleScope scope(thread);
1136  Object self_obj(&scope, args.get(0));
1137  if (!thread->runtime()->isInstanceOfStr(*self_obj)) {
1138    return thread->raiseRequiresType(self_obj, ID(str));
1139  }
1140
1141  Str self(&scope, strUnderlying(*self_obj));
1142  word char_length = self.length();
1143
1144  bool previous_is_cased = false;
1145  StrArray result(&scope, runtime->newStrArray());
1146  for (word i = 0, len; i < char_length; i += len) {
1147    int32_t code_point = self.codePointAt(i, &len);
1148
1149    FullCasing mapped = previous_is_cased ? Unicode::toLower(code_point)
1150                                          : Unicode::toTitle(code_point);
1151    for (word j = 0; mapped.code_points[j] != -1; j++) {
1152      runtime->strArrayAddCodePoint(thread, result, mapped.code_points[j]);
1153    }
1154
1155    previous_is_cased = Unicode::isCased(code_point);
1156  }
1157
1158  return runtime->strFromStrArray(result);
1159}
1160
1161RawObject METH(str, upper)(Thread* thread, Arguments args) {
1162  HandleScope scope(thread);
1163  Object self_obj(&scope, args.get(0));
1164  Runtime* runtime = thread->runtime();
1165  if (!runtime->isInstanceOfStr(*self_obj)) {
1166    return thread->raiseRequiresType(self_obj, ID(str));
1167  }
1168  Str self(&scope, strUnderlying(*self_obj));
1169  word length = self.length();
1170  if (self.isASCII()) {
1171    if (self.isSmallStr()) {
1172      byte buf[SmallStr::kMaxLength];
1173      for (word i = 0; i < length; i++) {
1174        buf[i] = ASCII::toUpper(self.byteAt(i));
1175      }
1176      return SmallStr::fromBytes({buf, length});
1177    }
1178    // Search for the first lowercase character.
1179    word first_lowercase = 0;
1180    for (; first_lowercase < length; first_lowercase++) {
1181      if (ASCII::isLower(self.byteAt(first_lowercase))) {
1182        break;
1183      }
1184    }
1185    if (first_lowercase >= length && self_obj.isStr()) {
1186      return *self_obj;
1187    }
1188
1189    MutableBytes result(&scope, runtime->newMutableBytesUninitialized(length));
1190    result.replaceFromWithStr(0, *self, first_lowercase);
1191    for (word i = first_lowercase; i < length; i++) {
1192      byte lower = ASCII::toUpper(self.byteAt(i));
1193      result.byteAtPut(i, lower);
1194    }
1195    return result.becomeStr();
1196  }
1197
1198  // Search for the first lowercase character.
1199  word first_lowercase = 0;
1200  for (word char_length; first_lowercase < length;
1201       first_lowercase += char_length) {
1202    int32_t code_point = self.codePointAt(first_lowercase, &char_length);
1203    if (Unicode::isLower(code_point) || Unicode::isTitle(code_point)) {
1204      break;
1205    }
1206  }
1207  if (first_lowercase >= length && self_obj.isStr()) {
1208    return *self_obj;
1209  }
1210
1211  StrArray result(&scope, runtime->newStrArray());
1212  runtime->strArrayEnsureCapacity(thread, result, length);
1213  // Since the prefix is valid UTF-8 and guaranteed to fit, it's safe to write
1214  // directly to the underlying MutableBytes.
1215  MutableBytes result_bytes(&scope, result.items());
1216  result_bytes.replaceFromWithStr(0, *self, first_lowercase);
1217  result.setNumItems(first_lowercase);
1218  for (word char_length, i = first_lowercase; i < length; i += char_length) {
1219    struct FullCasing upper =
1220        Unicode::toUpper(self.codePointAt(i, &char_length));
1221    for (word j = 0; j < 3; j++) {
1222      int32_t decoded = upper.code_points[j];
1223      if (decoded == -1) break;
1224      runtime->strArrayAddCodePoint(thread, result, decoded);
1225    }
1226  }
1227  return runtime->strFromStrArray(result);
1228}
1229
1230RawObject METH(str, __lt__)(Thread* thread, Arguments args) {
1231  HandleScope scope(thread);
1232  Object self_obj(&scope, args.get(0));
1233  Object other_obj(&scope, args.get(1));
1234  if (!thread->runtime()->isInstanceOfStr(*self_obj)) {
1235    return thread->raiseRequiresType(self_obj, ID(str));
1236  }
1237  if (!thread->runtime()->isInstanceOfStr(*other_obj)) {
1238    return NotImplementedType::object();
1239  }
1240  Str self(&scope, strUnderlying(*self_obj));
1241  Str other(&scope, strUnderlying(*other_obj));
1242  return Bool::fromBool(self.compare(*other) < 0);
1243}
1244
1245RawObject METH(str, __mul__)(Thread* thread, Arguments args) {
1246  Runtime* runtime = thread->runtime();
1247  HandleScope scope(thread);
1248  Object self_obj(&scope, args.get(0));
1249  if (!runtime->isInstanceOfStr(*self_obj)) {
1250    return thread->raiseRequiresType(self_obj, ID(str));
1251  }
1252  Object count_index(&scope, args.get(1));
1253  Object count_obj(&scope, intFromIndex(thread, count_index));
1254  if (count_obj.isError()) return *count_obj;
1255  word count = intUnderlying(*count_obj).asWordSaturated();
1256  if (!SmallInt::isValid(count)) {
1257    return thread->raiseWithFmt(LayoutId::kOverflowError,
1258                                "cannot fit '%T' into an index-sized integer",
1259                                &count_obj);
1260  }
1261  Str self(&scope, *self_obj);
1262  word length = self.length();
1263  if (count <= 0 || length == 0) {
1264    return Str::empty();
1265  }
1266  word new_length;
1267  if (__builtin_mul_overflow(length, count, &new_length) ||
1268      !SmallInt::isValid(new_length)) {
1269    return thread->raiseWithFmt(LayoutId::kOverflowError,
1270                                "repeated string is too long");
1271  }
1272  return runtime->strRepeat(thread, self, count);
1273}
1274
1275RawObject METH(str, __ne__)(Thread* thread, Arguments args) {
1276  HandleScope scope(thread);
1277  Object self_obj(&scope, args.get(0));
1278  Object other_obj(&scope, args.get(1));
1279  if (!thread->runtime()->isInstanceOfStr(*self_obj)) {
1280    return thread->raiseRequiresType(self_obj, ID(str));
1281  }
1282  if (!thread->runtime()->isInstanceOfStr(*other_obj)) {
1283    return NotImplementedType::object();
1284  }
1285  Str self(&scope, strUnderlying(*self_obj));
1286  Str other(&scope, strUnderlying(*other_obj));
1287  return Bool::fromBool(!self.equals(*other));
1288}
1289
1290RawObject METH(str, __iter__)(Thread* thread, Arguments args) {
1291  HandleScope scope(thread);
1292  Object self_obj(&scope, args.get(0));
1293  Runtime* runtime = thread->runtime();
1294  if (!runtime->isInstanceOfStr(*self_obj)) {
1295    return thread->raiseRequiresType(self_obj, ID(str));
1296  }
1297  Str self(&scope, strUnderlying(*self_obj));
1298  return runtime->newStrIterator(self);
1299}
1300
1301RawObject METH(str, __repr__)(Thread* thread, Arguments args) {
1302  Runtime* runtime = thread->runtime();
1303  HandleScope scope(thread);
1304  Object self_obj(&scope, args.get(0));
1305  if (!runtime->isInstanceOfStr(*self_obj)) {
1306    return thread->raiseRequiresType(self_obj, ID(str));
1307  }
1308  Str self(&scope, strUnderlying(*self_obj));
1309  const word self_len = self.length();
1310  word result_len = 0;
1311  word squote = 0;
1312  word dquote = 0;
1313  // Precompute the size so that only one allocation is necessary.
1314  for (word i = 0, char_len; i < self_len; i += char_len) {
1315    int32_t code_point = self.codePointAt(i, &char_len);
1316    if (code_point == '\'') {
1317      squote++;
1318      result_len += 1;
1319    } else if (code_point == '"') {
1320      dquote++;
1321      result_len += 1;
1322    } else if (code_point == '\\' || code_point == '\t' || code_point == '\r' ||
1323               code_point == '\n') {
1324      result_len += 2;
1325    } else if (Unicode::isPrintable(code_point)) {
1326      result_len += char_len;
1327    } else if (code_point < 0x100) {
1328      result_len += 4;
1329    } else if (code_point < 0x10000) {
1330      result_len += 6;
1331    } else {
1332      result_len += 10;
1333    }
1334  }
1335
1336  byte quote = '\'';
1337  bool unchanged = (result_len == self_len);
1338  if (squote > 0) {
1339    unchanged = false;
1340    // If there are both single quotes and double quotes, the outer quote will
1341    // be singles, and all internal quotes will need to be escaped.
1342    if (dquote > 0) {
1343      // Add the size of the escape backslashes on the single quotes.
1344      result_len += squote;
1345    } else {
1346      quote = '"';
1347    }
1348  }
1349  result_len += 2;  // quotes
1350
1351  MutableBytes buf(&scope, runtime->newMutableBytesUninitialized(result_len));
1352  buf.byteAtPut(0, quote);
1353  buf.byteAtPut(result_len - 1, quote);
1354  if (unchanged) {
1355    // Remaining characters were unmodified, so copy them directly.
1356    buf.replaceFromWithStr(1, *self, self_len);
1357    return buf.becomeStr();
1358  }
1359  word out = 1;
1360  for (word in = 0, char_len; in < self_len; in += char_len) {
1361    int32_t code_point = self.codePointAt(in, &char_len);
1362    if (code_point == quote) {
1363      buf.byteAtPut(out++, '\\');
1364      buf.byteAtPut(out++, quote);
1365    } else if (code_point == '\\') {
1366      buf.byteAtPut(out++, '\\');
1367      buf.byteAtPut(out++, '\\');
1368    } else if (code_point == '\t') {
1369      buf.byteAtPut(out++, '\\');
1370      buf.byteAtPut(out++, 't');
1371    } else if (code_point == '\r') {
1372      buf.byteAtPut(out++, '\\');
1373      buf.byteAtPut(out++, 'r');
1374    } else if (code_point == '\n') {
1375      buf.byteAtPut(out++, '\\');
1376      buf.byteAtPut(out++, 'n');
1377    } else if (' ' <= code_point && code_point < kMaxASCII) {
1378      buf.byteAtPut(out++, code_point);
1379    } else if (code_point <= kMaxASCII) {
1380      buf.byteAtPut(out++, '\\');
1381      buf.byteAtPut(out++, 'x');
1382      uwordToHexadecimalWithMutableBytes(*buf, /*index=*/out, /*num_digits=*/2,
1383                                         code_point);
1384      out += 2;
1385    } else if (Unicode::isPrintable(code_point)) {
1386      for (word i = 0; i < char_len; i++) {
1387        buf.byteAtPut(out + i, self.byteAt(in + i));
1388      }
1389      out += char_len;
1390    } else if (code_point <= 0xff) {
1391      buf.byteAtPut(out++, '\\');
1392      buf.byteAtPut(out++, 'x');
1393      uwordToHexadecimalWithMutableBytes(*buf, /*index=*/out, /*num_digits=*/2,
1394                                         code_point);
1395      out += 2;
1396    } else if (code_point <= 0xffff) {
1397      buf.byteAtPut(out++, '\\');
1398      buf.byteAtPut(out++, 'u');
1399      uwordToHexadecimalWithMutableBytes(*buf, /*index=*/out, /*num_digits=*/4,
1400                                         code_point);
1401      out += 4;
1402    } else {
1403      buf.byteAtPut(out++, '\\');
1404      buf.byteAtPut(out++, 'U');
1405      uwordToHexadecimalWithMutableBytes(*buf, /*index=*/out,
1406                                         /*num_digits=*/8, code_point);
1407      out += 8;
1408    }
1409  }
1410  DCHECK(out == result_len - 1, "wrote %ld characters, expected %ld", out - 1,
1411         result_len - 2);
1412  return buf.becomeStr();
1413}
1414
1415bool METH(str, _mod_convert_number_int_intrinsic)(Thread* thread) {
1416  RawObject arg = thread->stackTop();
1417  if (arg.isBool()) {
1418    thread->stackDrop(2);
1419    thread->stackSetTop(convertBoolToInt(arg));
1420    return true;
1421  }
1422  if (arg.isInt()) {
1423    thread->stackDrop(2);
1424    thread->stackSetTop(arg);
1425    return true;
1426  }
1427  return false;
1428}
1429
1430bool METH(str, _mod_convert_number_index_intrinsic)(Thread* thread) {
1431  RawObject arg = thread->stackTop();
1432  if (arg.isBool()) {
1433    thread->stackDrop(2);
1434    thread->stackSetTop(convertBoolToInt(arg));
1435    return true;
1436  }
1437  if (arg.isInt()) {
1438    thread->stackDrop(2);
1439    thread->stackSetTop(arg);
1440    return true;
1441  }
1442  return false;
1443}
1444
1445bool METH(str, _mod_check_single_arg_intrinsic)(Thread* thread) {
1446  Runtime* runtime = thread->runtime();
1447  RawObject arg = thread->stackTop();
1448  if (runtime->isInstanceOfTuple(arg)) {
1449    RawTuple arg_tuple = tupleUnderlying(arg);
1450    if (arg_tuple.length() != 1) return false;
1451    thread->stackDrop(2);
1452    thread->stackSetTop(arg_tuple);
1453    return true;
1454  }
1455  RawMutableTuple result = MutableTuple::cast(runtime->newMutableTuple(1));
1456  // Note that we need to re-fetch stackTop() since newMutableTuple() may
1457  // have triggered GC.
1458  result.atPut(0, thread->stackTop());
1459  thread->stackDrop(2);
1460  thread->stackSetTop(result.becomeImmutable());
1461  return true;
1462}
1463
1464RawObject METH(str, isalnum)(Thread* thread, Arguments args) {
1465  HandleScope scope(thread);
1466  Object self_obj(&scope, args.get(0));
1467  if (!thread->runtime()->isInstanceOfStr(*self_obj)) {
1468    return thread->raiseRequiresType(self_obj, ID(str));
1469  }
1470  Str self(&scope, strUnderlying(*self_obj));
1471  word char_length = self.length();
1472  if (char_length == 0) {
1473    return Bool::falseObj();
1474  }
1475  for (word i = 0, len; i < char_length; i += len) {
1476    int32_t code_point = self.codePointAt(i, &len);
1477    if (!Unicode::isAlnum(code_point)) {
1478      return Bool::falseObj();
1479    }
1480  }
1481  return Bool::trueObj();
1482}
1483
1484RawObject METH(str, isalpha)(Thread* thread, Arguments args) {
1485  HandleScope scope(thread);
1486  Object self_obj(&scope, args.get(0));
1487  if (!thread->runtime()->isInstanceOfStr(*self_obj)) {
1488    return thread->raiseRequiresType(self_obj, ID(str));
1489  }
1490  Str self(&scope, strUnderlying(*self_obj));
1491  word char_length = self.length();
1492  if (char_length == 0) {
1493    return Bool::falseObj();
1494  }
1495  for (word i = 0, len; i < char_length; i += len) {
1496    int32_t code_point = self.codePointAt(i, &len);
1497    if (!Unicode::isAlpha(code_point)) {
1498      return Bool::falseObj();
1499    }
1500  }
1501  return Bool::trueObj();
1502}
1503
1504RawObject METH(str, isascii)(Thread* thread, Arguments args) {
1505  HandleScope scope(thread);
1506  Object self_obj(&scope, args.get(0));
1507  if (!thread->runtime()->isInstanceOfStr(*self_obj)) {
1508    return thread->raiseRequiresType(self_obj, ID(str));
1509  }
1510  Str self(&scope, strUnderlying(*self_obj));
1511  return Bool::fromBool(self.isASCII());
1512}
1513
1514RawObject METH(str, isdecimal)(Thread* thread, Arguments args) {
1515  HandleScope scope(thread);
1516  Object self_obj(&scope, args.get(0));
1517  if (!thread->runtime()->isInstanceOfStr(*self_obj)) {
1518    return thread->raiseRequiresType(self_obj, ID(str));
1519  }
1520  Str self(&scope, strUnderlying(*self_obj));
1521  word char_length = self.length();
1522  if (char_length == 0) {
1523    return Bool::falseObj();
1524  }
1525  for (word i = 0, len; i < char_length; i += len) {
1526    int32_t code_point = self.codePointAt(i, &len);
1527    if (!Unicode::isDecimal(code_point)) {
1528      return Bool::falseObj();
1529    }
1530  }
1531  return Bool::trueObj();
1532}
1533
1534RawObject METH(str, isdigit)(Thread* thread, Arguments args) {
1535  HandleScope scope(thread);
1536  Object self_obj(&scope, args.get(0));
1537  if (!thread->runtime()->isInstanceOfStr(*self_obj)) {
1538    return thread->raiseRequiresType(self_obj, ID(str));
1539  }
1540  Str self(&scope, strUnderlying(*self_obj));
1541  word char_length = self.length();
1542  if (char_length == 0) {
1543    return Bool::falseObj();
1544  }
1545  for (word i = 0, len; i < char_length; i += len) {
1546    int32_t code_point = self.codePointAt(i, &len);
1547    if (!Unicode::isDigit(code_point)) {
1548      return Bool::falseObj();
1549    }
1550  }
1551  return Bool::trueObj();
1552}
1553
1554bool strIsIdentifier(const Str& str) {
1555  word char_length = str.length();
1556  if (char_length == 0) {
1557    return false;
1558  }
1559  word len;
1560  int32_t first = str.codePointAt(0, &len);
1561  if (!Unicode::isXidStart(first) && first != '_') {
1562    return false;
1563  }
1564  for (word i = len; i < char_length; i += len) {
1565    int32_t code_point = str.codePointAt(i, &len);
1566    if (!Unicode::isXidContinue(code_point)) {
1567      return false;
1568    }
1569  }
1570  return true;
1571}
1572
1573RawObject METH(str, isidentifier)(Thread* thread, Arguments args) {
1574  HandleScope scope(thread);
1575  Object self_obj(&scope, args.get(0));
1576  if (!thread->runtime()->isInstanceOfStr(*self_obj)) {
1577    return thread->raiseRequiresType(self_obj, ID(str));
1578  }
1579  Str self(&scope, strUnderlying(*self_obj));
1580  return Bool::fromBool(strIsIdentifier(self));
1581}
1582
1583RawObject METH(str, islower)(Thread* thread, Arguments args) {
1584  HandleScope scope(thread);
1585  Object self_obj(&scope, args.get(0));
1586  if (!thread->runtime()->isInstanceOfStr(*self_obj)) {
1587    return thread->raiseRequiresType(self_obj, ID(str));
1588  }
1589  Str self(&scope, strUnderlying(*self_obj));
1590  word char_length = self.length();
1591  bool cased = false;
1592  for (word i = 0, len; i < char_length; i += len) {
1593    int32_t code_point = self.codePointAt(i, &len);
1594    if (Unicode::isUpper(code_point) || Unicode::isTitle(code_point)) {
1595      return Bool::falseObj();
1596    }
1597    if (!cased && Unicode::isLower(code_point)) {
1598      cased = true;
1599    }
1600  }
1601  return Bool::fromBool(cased);
1602}
1603
1604RawObject METH(str, isnumeric)(Thread* thread, Arguments args) {
1605  HandleScope scope(thread);
1606  Object self_obj(&scope, args.get(0));
1607  if (!thread->runtime()->isInstanceOfStr(*self_obj)) {
1608    return thread->raiseRequiresType(self_obj, ID(str));
1609  }
1610  Str self(&scope, strUnderlying(*self_obj));
1611  word char_length = self.length();
1612  if (char_length == 0) {
1613    return Bool::falseObj();
1614  }
1615  for (word i = 0, len; i < char_length; i += len) {
1616    int32_t code_point = self.codePointAt(i, &len);
1617    if (!Unicode::isNumeric(code_point)) {
1618      return Bool::falseObj();
1619    }
1620  }
1621  return Bool::trueObj();
1622}
1623
1624RawObject METH(str, isprintable)(Thread* thread, Arguments args) {
1625  HandleScope scope(thread);
1626  Object self_obj(&scope, args.get(0));
1627  if (!thread->runtime()->isInstanceOfStr(*self_obj)) {
1628    return thread->raiseRequiresType(self_obj, ID(str));
1629  }
1630  Str self(&scope, strUnderlying(*self_obj));
1631  word char_length = self.length();
1632  for (word i = 0, len; i < char_length; i += len) {
1633    int32_t code_point = self.codePointAt(i, &len);
1634    if (!Unicode::isPrintable(code_point)) {
1635      return Bool::falseObj();
1636    }
1637  }
1638  return Bool::trueObj();
1639}
1640
1641RawObject METH(str, isspace)(Thread* thread, Arguments args) {
1642  HandleScope scope(thread);
1643  Object self_obj(&scope, args.get(0));
1644  if (!thread->runtime()->isInstanceOfStr(*self_obj)) {
1645    return thread->raiseRequiresType(self_obj, ID(str));
1646  }
1647  Str self(&scope, strUnderlying(*self_obj));
1648  word char_length = self.length();
1649  if (char_length == 0) {
1650    return Bool::falseObj();
1651  }
1652  if (char_length == 1) {
1653    return ASCII::isSpace(self.byteAt(0)) ? Bool::trueObj() : Bool::falseObj();
1654  }
1655  word byte_index = 0;
1656  do {
1657    word num_bytes;
1658    int32_t codepoint = self.codePointAt(byte_index, &num_bytes);
1659    if (!Unicode::isSpace(codepoint)) {
1660      return Bool::falseObj();
1661    }
1662    byte_index += num_bytes;
1663  } while (byte_index < char_length);
1664  return Bool::trueObj();
1665}
1666
1667RawObject METH(str, istitle)(Thread* thread, Arguments args) {
1668  HandleScope scope(thread);
1669  Object self_obj(&scope, args.get(0));
1670  if (!thread->runtime()->isInstanceOfStr(*self_obj)) {
1671    return thread->raiseRequiresType(self_obj, ID(str));
1672  }
1673  Str self(&scope, strUnderlying(*self_obj));
1674  bool cased = false;
1675  bool previous_is_cased = false;
1676  word char_length = self.length();
1677  for (word i = 0, len; i < char_length; i += len) {
1678    int32_t code_point = self.codePointAt(i, &len);
1679    if (Unicode::isUpper(code_point) || Unicode::isTitle(code_point)) {
1680      if (previous_is_cased) return Bool::falseObj();
1681      cased = true;
1682      previous_is_cased = true;
1683    } else if (Unicode::isLower(code_point)) {
1684      if (!previous_is_cased) return Bool::falseObj();
1685      previous_is_cased = true;
1686      cased = true;
1687    } else {
1688      previous_is_cased = false;
1689    }
1690  }
1691  return Bool::fromBool(cased);
1692}
1693
1694RawObject METH(str, isupper)(Thread* thread, Arguments args) {
1695  HandleScope scope(thread);
1696  Object self_obj(&scope, args.get(0));
1697  if (!thread->runtime()->isInstanceOfStr(*self_obj)) {
1698    return thread->raiseRequiresType(self_obj, ID(str));
1699  }
1700  Str self(&scope, strUnderlying(*self_obj));
1701  word char_length = self.length();
1702  bool cased = false;
1703  for (word i = 0, len; i < char_length; i += len) {
1704    int32_t code_point = self.codePointAt(i, &len);
1705    if (Unicode::isLower(code_point) || Unicode::isTitle(code_point)) {
1706      return Bool::falseObj();
1707    }
1708    if (!cased && Unicode::isUpper(code_point)) {
1709      cased = true;
1710    }
1711  }
1712  return Bool::fromBool(cased);
1713}
1714
1715RawObject METH(str, lstrip)(Thread* thread, Arguments args) {
1716  Runtime* runtime = thread->runtime();
1717  HandleScope scope(thread);
1718  Object self_obj(&scope, args.get(0));
1719  if (!runtime->isInstanceOfStr(*self_obj)) {
1720    return thread->raiseRequiresType(self_obj, ID(str));
1721  }
1722  Str str(&scope, strUnderlying(*self_obj));
1723  Object other_obj(&scope, args.get(1));
1724  if (other_obj.isNoneType()) {
1725    return strStripSpaceLeft(thread, str);
1726  }
1727  if (!runtime->isInstanceOfStr(*other_obj)) {
1728    return thread->raiseWithFmt(LayoutId::kTypeError,
1729                                "str.lstrip() arg must be None or str");
1730  }
1731  Str chars(&scope, strUnderlying(*other_obj));
1732  return strStripLeft(thread, str, chars);
1733}
1734
1735RawObject METH(str, rstrip)(Thread* thread, Arguments args) {
1736  Runtime* runtime = thread->runtime();
1737  HandleScope scope(thread);
1738  Object self_obj(&scope, args.get(0));
1739  if (!runtime->isInstanceOfStr(*self_obj)) {
1740    return thread->raiseRequiresType(self_obj, ID(str));
1741  }
1742  Str str(&scope, strUnderlying(*self_obj));
1743  Object other_obj(&scope, args.get(1));
1744  if (other_obj.isNoneType()) {
1745    return strStripSpaceRight(thread, str);
1746  }
1747  if (!runtime->isInstanceOfStr(*other_obj)) {
1748    return thread->raiseWithFmt(LayoutId::kTypeError,
1749                                "str.rstrip() arg must be None or str");
1750  }
1751  Str chars(&scope, strUnderlying(*other_obj));
1752  return strStripRight(thread, str, chars);
1753}
1754
1755RawObject METH(str, strip)(Thread* thread, Arguments args) {
1756  Runtime* runtime = thread->runtime();
1757  HandleScope scope(thread);
1758  Object self_obj(&scope, args.get(0));
1759  if (!runtime->isInstanceOfStr(*self_obj)) {
1760    return thread->raiseRequiresType(self_obj, ID(str));
1761  }
1762  Str str(&scope, strUnderlying(*self_obj));
1763  Object other_obj(&scope, args.get(1));
1764  if (other_obj.isNoneType()) {
1765    return strStripSpace(thread, str);
1766  }
1767  if (!runtime->isInstanceOfStr(*other_obj)) {
1768    return thread->raiseWithFmt(LayoutId::kTypeError,
1769                                "str.strip() arg must be None or str");
1770  }
1771  Str chars(&scope, strUnderlying(*other_obj));
1772  return strStrip(thread, str, chars);
1773}
1774
1775static int32_t handleCapitalSigma(const Str& str, word i) {
1776  bool final_sigma = false;
1777  for (word j = str.offsetByCodePoints(i, -1), len; j >= 0;
1778       j = str.offsetByCodePoints(j, -1)) {
1779    int32_t code_point = str.codePointAt(j, &len);
1780    if (!Unicode::isCaseIgnorable(code_point)) {
1781      final_sigma = Unicode::isCased(code_point);
1782      break;
1783    }
1784  }
1785  if (!final_sigma) {
1786    return 0x03C3;
1787  }
1788
1789  word char_length = str.length();
1790  for (word j = str.offsetByCodePoints(i, 1), len; j < char_length; j += len) {
1791    int32_t code_point = str.codePointAt(j, &len);
1792    if (!Unicode::isCaseIgnorable(code_point)) {
1793      return Unicode::isCased(code_point) ? 0x03C3 : 0x03C2;
1794    }
1795  }
1796  return 0x03C2;
1797}
1798
1799RawObject METH(str, swapcase)(Thread* thread, Arguments args) {
1800  HandleScope scope(thread);
1801  Runtime* runtime = thread->runtime();
1802  Object self_obj(&scope, args.get(0));
1803  if (!runtime->isInstanceOfStr(*self_obj)) {
1804    return thread->raiseRequiresType(self_obj, ID(str));
1805  }
1806  Str self(&scope, strUnderlying(*self_obj));
1807  word char_length = self.length();
1808
1809  // Most of the time, this will be sufficient. However, due to Unicode casing,
1810  // it's possible that we could need up to 3 times as much space as the input.
1811  StrArray result(&scope, runtime->newStrArray());
1812  runtime->strArrayEnsureCapacity(thread, result, char_length);
1813  for (word i = 0, len; i < char_length; i += len) {
1814    int32_t code_point = self.codePointAt(i, &len);
1815    if (Unicode::isUpper(code_point)) {
1816      if (code_point == 0x03a3) {
1817        runtime->strArrayAddCodePoint(thread, result,
1818                                      handleCapitalSigma(self, i));
1819      } else {
1820        FullCasing lower = Unicode::toLower(code_point);
1821        for (word j = 0; j < 3; j++) {
1822          int32_t decoded = lower.code_points[j];
1823          if (decoded == -1) break;
1824          runtime->strArrayAddCodePoint(thread, result, decoded);
1825        }
1826      }
1827    } else {
1828      FullCasing upper = Unicode::toUpper(code_point);
1829      for (word j = 0; j < 3; j++) {
1830        int32_t decoded = upper.code_points[j];
1831        if (decoded == -1) break;
1832        runtime->strArrayAddCodePoint(thread, result, decoded);
1833      }
1834    }
1835  }
1836  return runtime->strFromStrArray(result);
1837}
1838
1839RawObject METH(str_iterator, __iter__)(Thread* thread, Arguments args) {
1840  HandleScope scope(thread);
1841  Object self(&scope, args.get(0));
1842  if (!self.isStrIterator()) {
1843    return thread->raiseRequiresType(self, ID(str_iterator));
1844  }
1845  return *self;
1846}
1847
1848// TODO(T35578204) Implement this for UTF-8. This probably means keeping extra
1849// state and logic so that __next__() will advance to the next codepoint.
1850
1851RawObject METH(str_iterator, __next__)(Thread* thread, Arguments args) {
1852  HandleScope scope(thread);
1853  Object self(&scope, args.get(0));
1854  if (!self.isStrIterator()) {
1855    return thread->raiseRequiresType(self, ID(str_iterator));
1856  }
1857  StrIterator iter(&scope, *self);
1858  Object value(&scope, strIteratorNext(thread, iter));
1859  if (value.isError()) {
1860    return thread->raise(LayoutId::kStopIteration, NoneType::object());
1861  }
1862  return *value;
1863}
1864
1865RawObject METH(str_iterator, __length_hint__)(Thread* thread, Arguments args) {
1866  HandleScope scope(thread);
1867  Object self(&scope, args.get(0));
1868  if (!self.isStrIterator()) {
1869    return thread->raiseRequiresType(self, ID(str_iterator));
1870  }
1871  StrIterator str_iterator(&scope, *self);
1872  Str str(&scope, str_iterator.iterable());
1873  return SmallInt::fromWord(str.length() - str_iterator.index());
1874}
1875
1876}  // namespace py