this repo has no description
at trunk 1108 lines 38 kB view raw
1// Copyright (c) Facebook, Inc. and its affiliates. (http://www.facebook.com) 2#include "bytes-builtins.h" 3 4#include "builtins.h" 5#include "bytearray-builtins.h" 6#include "byteslike.h" 7#include "formatter-utils.h" 8#include "frame.h" 9#include "int-builtins.h" 10#include "runtime.h" 11#include "slice-builtins.h" 12#include "strarray-builtins.h" 13#include "type-builtins.h" 14#include "unicode.h" 15#include "utils.h" 16 17namespace py { 18 19RawObject bytesDecodeASCII(Thread* thread, const Bytes& bytes) { 20 HandleScope scope(thread); 21 if (!bytes.isASCII()) { 22 return Unbound::object(); 23 } 24 if (bytes.isSmallBytes()) { 25 return SmallBytes::cast(*bytes).becomeStr(); 26 } 27 word bytes_len = LargeBytes::cast(*bytes).length(); 28 MutableBytes buf(&scope, 29 thread->runtime()->newMutableBytesUninitialized(bytes_len)); 30 buf.replaceFromWith(0, LargeBytes::cast(*bytes), bytes_len); 31 return buf.becomeStr(); 32} 33 34word bytesCount(const Bytes& haystack, word haystack_len, const Bytes& needle, 35 word needle_len, word start, word end) { 36 DCHECK_BOUND(haystack_len, haystack.length()); 37 DCHECK_BOUND(needle_len, needle.length()); 38 if (start > haystack_len) { 39 return 0; 40 } 41 Slice::adjustSearchIndices(&start, &end, haystack_len); 42 if (needle_len == 0) { 43 return haystack_len - start + 1; 44 } 45 word count = 0; 46 word index = 47 bytesFind(haystack, haystack_len, needle, needle_len, start, end); 48 while (index != -1) { 49 count++; 50 index = bytesFind(haystack, haystack_len, needle, needle_len, 51 index + needle_len, end); 52 } 53 return count; 54} 55 56word bytesFind(const Bytes& haystack, word haystack_len, const Bytes& needle, 57 word needle_len, word start, word end) { 58 DCHECK_BOUND(haystack_len, haystack.length()); 59 DCHECK_BOUND(needle_len, needle.length()); 60 Slice::adjustSearchIndices(&start, &end, haystack_len); 61 for (word i = start; i <= end - needle_len; i++) { 62 bool has_match = true; 63 for (word j = 0; has_match && j < needle_len; j++) { 64 has_match = haystack.byteAt(i + j) == needle.byteAt(j); 65 } 66 if (has_match) { 67 return i; 68 } 69 } 70 return -1; 71} 72 73RawObject bytesHex(Thread* thread, const Bytes& bytes, word length) { 74 HandleScope scope(thread); 75 Runtime* runtime = thread->runtime(); 76 MutableBytes result(&scope, 77 runtime->newMutableBytesUninitialized(length * 2)); 78 for (word i = 0, j = 0; i < length; i++) { 79 byte b = bytes.byteAt(i); 80 uwordToHexadecimalWithMutableBytes(*result, /*index=*/j, 81 /*num_digits=*/2, b); 82 j += 2; 83 } 84 return result.becomeStr(); 85} 86 87static RawObject smallBytesJoin(Thread* thread, const Bytes& sep, 88 word sep_length, const Tuple& src, 89 word src_length, word result_length) { 90 HandleScope scope(thread); 91 byte buffer[SmallBytes::kMaxLength]; 92 byte* dst = buffer; 93 for (word src_index = 0; src_index < src_length; src_index++) { 94 if (src_index > 0) { 95 sep.copyTo(dst, sep_length); 96 dst += sep_length; 97 } 98 Byteslike object(&scope, thread, src.at(src_index)); 99 word length = object.length(); 100 object.copyTo(dst, length); 101 dst += length; 102 } 103 DCHECK(dst == buffer + result_length, "unexpected number of bytes written"); 104 return SmallBytes::fromBytes({buffer, result_length}); 105} 106 107RawObject bytesJoin(Thread* thread, const Bytes& sep, word sep_length, 108 const Tuple& src, word src_length) { 109 DCHECK_BOUND(src_length, src.length()); 110 bool is_mutable = sep.isMutableBytes(); 111 Runtime* runtime = thread->runtime(); 112 if (src_length == 0) { 113 if (is_mutable) { 114 return runtime->emptyMutableBytes(); 115 } 116 return Bytes::empty(); 117 } 118 HandleScope scope(thread); 119 120 // first pass to accumulate length and check types 121 word result_length = sep_length * (src_length - 1); 122 Object item(&scope, Unbound::object()); 123 for (word index = 0; index < src_length; index++) { 124 item = src.at(index); 125 Byteslike object(&scope, thread, *item); 126 if (!object.isValid()) { 127 return thread->raiseWithFmt( 128 LayoutId::kTypeError, 129 "sequence item %w: expected a bytes-like object, '%T' found", index, 130 &item); 131 } 132 result_length += object.length(); 133 } 134 135 // second pass to accumulate concatenation 136 if (result_length <= SmallBytes::kMaxLength && !is_mutable) { 137 return smallBytesJoin(thread, sep, sep_length, src, src_length, 138 result_length); 139 } 140 MutableBytes result(&scope, 141 runtime->newMutableBytesUninitialized(result_length)); 142 word dst_offset = 0; 143 for (word src_index = 0;;) { 144 Byteslike object(&scope, thread, src.at(src_index)); 145 word length = object.length(); 146 result.replaceFromWithByteslike(dst_offset, object, length); 147 dst_offset += length; 148 149 src_index++; 150 if (src_index >= src_length) break; 151 152 result.replaceFromWithBytes(dst_offset, *sep, sep_length); 153 dst_offset += sep_length; 154 } 155 DCHECK(dst_offset == result_length, "offset must match expected length"); 156 return is_mutable ? *result : result.becomeImmutable(); 157} 158 159word bytesRFind(const Bytes& haystack, word haystack_len, const Bytes& needle, 160 word needle_len, word start, word end) { 161 DCHECK_BOUND(haystack_len, haystack.length()); 162 DCHECK_BOUND(needle_len, needle.length()); 163 Slice::adjustSearchIndices(&start, &end, haystack_len); 164 for (word i = end - needle_len; i >= start; i--) { 165 bool has_match = true; 166 for (word j = 0; has_match && j < needle_len; j++) { 167 has_match = haystack.byteAt(i + j) == needle.byteAt(j); 168 } 169 if (has_match) { 170 return i; 171 } 172 } 173 return -1; 174} 175 176RawObject bytesReprSingleQuotes(Thread* thread, const Bytes& bytes) { 177 HandleScope scope(thread); 178 Byteslike byteslike(&scope, thread, *bytes); 179 // Precalculate the length of the result to minimize allocation. 180 word length = byteslike.length(); 181 word result_length = length + 3; // b'' 182 for (word i = 0; i < length; i++) { 183 byte current = byteslike.byteAt(i); 184 switch (current) { 185 case '\t': 186 case '\n': 187 case '\r': 188 case '\'': 189 case '\\': 190 result_length++; 191 break; 192 default: 193 if (!ASCII::isPrintable(current)) { 194 result_length += 3; 195 } 196 } 197 } 198 199 if (result_length > SmallInt::kMaxValue) { 200 return thread->raiseWithFmt(LayoutId::kOverflowError, 201 "bytes object is too large to make repr"); 202 } 203 return thread->runtime()->byteslikeRepr(thread, byteslike, result_length, 204 '\''); 205} 206 207// Returns the index of the first byte in bytes that is not in chars. 208static word bytesSpanLeft(const Bytes& bytes, word bytes_len, 209 const Bytes& chars, word chars_len) { 210 for (word left = 0; left < bytes_len; left++) { 211 byte ch = bytes.byteAt(left); 212 bool found_in_chars = false; 213 for (word i = 0; i < chars_len; i++) { 214 if (ch == chars.byteAt(i)) { 215 found_in_chars = true; 216 break; 217 } 218 } 219 if (!found_in_chars) { 220 return left; 221 } 222 } 223 return bytes_len; 224} 225 226// Returns the index of the last byte in bytes that is not in chars. Stops at 227// and returns the left bound if all characters to the right were found. 228static word bytesSpanRight(const Bytes& bytes, word bytes_len, 229 const Bytes& chars, word chars_len, word left) { 230 for (word right = bytes_len; left < right; right--) { 231 byte ch = bytes.byteAt(right - 1); 232 bool found_in_chars = false; 233 for (word i = 0; i < chars_len; i++) { 234 if (ch == chars.byteAt(i)) { 235 found_in_chars = true; 236 break; 237 } 238 } 239 if (!found_in_chars) { 240 return right; 241 } 242 } 243 return left; 244} 245 246RawObject bytesSplitLines(Thread* thread, const Bytes& bytes, word length, 247 bool keepends) { 248 HandleScope scope(thread); 249 Runtime* runtime = thread->runtime(); 250 List result(&scope, runtime->newList()); 251 Object subseq(&scope, Unbound::object()); 252 253 for (word i = 0, j = 0; i < length; j = i) { 254 // Skip newline bytes 255 while (i < length) { 256 byte b = bytes.byteAt(i); 257 // PEP-278 258 if (b == '\n' || b == '\r') { 259 break; 260 } 261 i++; 262 } 263 264 word eol_pos = i; 265 if (i < length) { 266 word cur = i; 267 word next = i + 1; 268 i++; 269 // Check for \r\n specifically 270 if (bytes.byteAt(cur) == '\r' && next < length && 271 bytes.byteAt(next) == '\n') { 272 i++; 273 } 274 if (keepends) { 275 eol_pos = i; 276 } 277 } 278 279 // If there are no newlines, the bytes returned should be identity-equal 280 if (j == 0 && eol_pos == length) { 281 runtime->listAdd(thread, result, bytes); 282 return *result; 283 } 284 285 subseq = bytesSubseq(thread, bytes, j, eol_pos - j); 286 runtime->listAdd(thread, result, subseq); 287 } 288 289 return *result; 290} 291 292RawObject bytesStrip(Thread* thread, const Bytes& bytes, word bytes_len, 293 const Bytes& chars, word chars_len) { 294 word left = bytesSpanLeft(bytes, bytes_len, chars, chars_len); 295 word right = bytesSpanRight(bytes, bytes_len, chars, chars_len, left); 296 return bytesSubseq(thread, bytes, left, right - left); 297} 298 299RawObject bytesStripLeft(Thread* thread, const Bytes& bytes, word bytes_len, 300 const Bytes& chars, word chars_len) { 301 word left = bytesSpanLeft(bytes, bytes_len, chars, chars_len); 302 return bytesSubseq(thread, bytes, left, bytes_len - left); 303} 304 305RawObject bytesStripRight(Thread* thread, const Bytes& bytes, word bytes_len, 306 const Bytes& chars, word chars_len) { 307 word right = bytesSpanRight(bytes, bytes_len, chars, chars_len, 0); 308 return bytesSubseq(thread, bytes, 0, right); 309} 310 311RawObject bytesStripSpace(Thread* thread, const Bytes& bytes, word len) { 312 word left = 0; 313 while (left < len && ASCII::isSpace(bytes.byteAt(left))) { 314 left++; 315 } 316 word right = len; 317 while (right > left && ASCII::isSpace(bytes.byteAt(right - 1))) { 318 right--; 319 } 320 return bytesSubseq(thread, bytes, left, right - left); 321} 322 323RawObject bytesStripSpaceLeft(Thread* thread, const Bytes& bytes, word len) { 324 word left = 0; 325 while (left < len && ASCII::isSpace(bytes.byteAt(left))) { 326 left++; 327 } 328 return bytesSubseq(thread, bytes, left, len - left); 329} 330 331RawObject bytesStripSpaceRight(Thread* thread, const Bytes& bytes, word len) { 332 word right = len; 333 while (right > 0 && ASCII::isSpace(bytes.byteAt(right - 1))) { 334 right--; 335 } 336 return bytesSubseq(thread, bytes, 0, right); 337} 338 339RawObject bytesSubseq(Thread* thread, const Bytes& bytes, word start, 340 word length) { 341 DCHECK_BOUND(start, bytes.length()); 342 DCHECK_BOUND(length, bytes.length() - start); 343 if (length <= SmallBytes::kMaxLength) { 344 byte buffer[SmallBytes::kMaxLength]; 345 for (word i = length - 1; i >= 0; i--) { 346 buffer[i] = bytes.byteAt(start + i); 347 } 348 return SmallBytes::fromBytes({buffer, length}); 349 } 350 HandleScope scope(thread); 351 MutableBytes result(&scope, 352 thread->runtime()->newMutableBytesUninitialized(length)); 353 result.replaceFromWithStartAt(/*dst_start=*/0, DataArray::cast(*bytes), 354 length, start); 355 return result.becomeImmutable(); 356} 357 358static bool bytesIsValidUTF8Impl(RawBytes bytes, bool allow_surrogates) { 359 for (word i = 0, length = bytes.length(); i < length;) { 360 byte b0 = bytes.byteAt(i++); 361 // ASCII bytes have the topmost bit zero. 362 static_assert(kMaxASCII == 0x7F, "unexpected kMaxASCII value"); 363 if (b0 <= 0x7F) continue; 364 // Bytes past this point have the high bit set (0b1xxxxxxx). 365 366 // 0b110xxxxx begins a sequence with one continuation byte. 367 // `b0 < 0b11100000` overestimates and we filter in a 2nd comparison. 368 if (b0 < 0xE0) { 369 // b0 < 0xC0 catches 0b10xxxxxx bytes (invalid continuation bytes). 370 // 0xC0 + 0xC1 (0b11000000 + 0b110000001) would result in range(0x7F) 371 // which should have been encoded as ASCII. 372 if (b0 < 0xC2) { 373 return false; 374 } 375 if (i >= length) { 376 return false; 377 } 378 byte b1 = bytes.byteAt(i++); 379 if (!UTF8::isTrailByte(b1)) { 380 return false; 381 } 382 if (DCHECK_IS_ON()) { 383 uword decoded = 384 static_cast<uword>(b0 & 0x1F) << 6 | static_cast<uword>(b1 & 0x3F); 385 DCHECK(0x80 <= decoded && decoded <= 0x7FF, "unexpected value"); 386 } 387 388 // 0b1110xxxx starts a sequence with two continuation bytes. 389 } else if (b0 < 0xF0) { 390 if (i + 1 >= length) { 391 return false; 392 } 393 byte b1 = bytes.byteAt(i++); 394 byte b2 = bytes.byteAt(i++); 395 if (!UTF8::isTrailByte(b1) || !UTF8::isTrailByte(b2)) { 396 return false; 397 } 398 399 // Catch sequences that should have been encoded in 1-2 bytes instead. 400 if (b0 == 0xE0) { 401 if (b1 < 0xA0) { 402 return false; 403 } 404 } else if (!allow_surrogates && b0 == 0xED && b1 >= 0xA0) { 405 // 0b11011xxxxxxxxxxx (0xD800 - 0xDFFF) is declared invalid by unicode 406 // as they look like utf-16 surrogates making it easier to detect 407 // mix-ups. 408 return false; 409 } 410 411 if (DCHECK_IS_ON()) { 412 uword decoded = static_cast<uword>(b0 & 0x0F) << 12 | 413 static_cast<uword>(b1 & 0x3F) << 6 | 414 static_cast<uword>(b2 & 0x3F); 415 DCHECK(0x0800 <= decoded && decoded <= 0xFFFF, "unexpected value"); 416 } 417 418 static_assert(kMaxUnicode == 0x10FFFF, "unexpected maxunicode value"); 419 // 0b11110xxx starts a sequence with three continuation bytes. 420 // However values bigger than 0x10FFFF are not valid unicode, so we test 421 // b0 < 0b11110101 to overestimate that. 422 } else if (b0 < 0xF5) { 423 if (i + 2 >= length) { 424 return false; 425 } 426 byte b1 = bytes.byteAt(i++); 427 byte b2 = bytes.byteAt(i++); 428 byte b3 = bytes.byteAt(i++); 429 if (!UTF8::isTrailByte(b1) || !UTF8::isTrailByte(b2) || 430 !UTF8::isTrailByte(b3)) { 431 return false; 432 } 433 // Catch sequences that should have been encoded with 1-3 bytes instead. 434 if (b0 == 0xF0) { 435 if (b1 < 0x90) { 436 return false; 437 } 438 } else if (b0 == 0xF4 && b1 >= 0x90) { 439 // Bigger than kMaxUnicode. 440 return false; 441 } 442 443 if (DCHECK_IS_ON()) { 444 uword decoded = static_cast<uword>(b0 & 0x07) << 16 | 445 static_cast<uword>(b1 & 0x3F) << 12 | 446 static_cast<uword>(b2 & 0x3F) << 6 | 447 static_cast<uword>(b3 & 0x3F); 448 DCHECK(0x10000 <= decoded && decoded <= kMaxUnicode, 449 "unexpected value"); 450 } 451 } else { 452 // Invalid prefix byte. 453 return false; 454 } 455 } 456 return true; 457} 458 459bool bytesIsValidUTF8(RawBytes bytes) { 460 return bytesIsValidUTF8Impl(bytes, /*allow_surrogates=*/false); 461} 462 463bool bytesIsValidStr(RawBytes bytes) { 464 return bytesIsValidUTF8Impl(bytes, /*allow_surrogates=*/true); 465} 466 467// Used only for UserBytesBase as a heap-allocated object. 468static const BuiltinAttribute kUserBytesBaseAttributes[] = { 469 {ID(_UserBytes__value), RawUserBytesBase::kValueOffset, 470 AttributeFlags::kHidden}, 471}; 472 473static const BuiltinAttribute kBytesIteratorAttributes[] = { 474 {ID(_bytes_iterator__iterable), RawBytesIterator::kIterableOffset, 475 AttributeFlags::kHidden}, 476 {ID(_bytes_iterator__index), RawBytesIterator::kIndexOffset, 477 AttributeFlags::kHidden}, 478}; 479 480void initializeBytesTypes(Thread* thread) { 481 HandleScope scope(thread); 482 Runtime* runtime = thread->runtime(); 483 484 Type bytes(&scope, 485 addBuiltinType(thread, ID(bytes), LayoutId::kBytes, 486 /*superclass_id=*/LayoutId::kObject, 487 kUserBytesBaseAttributes, RawUserBytesBase::kSize, 488 /*basetype=*/true)); 489 490 { 491 Type type(&scope, addImmediateBuiltinType( 492 thread, ID(largebytes), LayoutId::kLargeBytes, 493 /*builtin_base=*/LayoutId::kBytes, 494 /*superclass_id=*/LayoutId::kObject, 495 /*basetype=*/false)); 496 Layout::cast(type.instanceLayout()).setDescribedType(*bytes); 497 runtime->setLargeBytesType(type); 498 } 499 500 { 501 Type type(&scope, addImmediateBuiltinType( 502 thread, ID(smallbytes), LayoutId::kSmallBytes, 503 /*builtin_base=*/LayoutId::kBytes, 504 /*superclass_id=*/LayoutId::kObject, 505 /*basetype=*/false)); 506 Layout::cast(type.instanceLayout()).setDescribedType(*bytes); 507 runtime->setSmallBytesType(type); 508 } 509 510 addBuiltinType(thread, ID(bytes_iterator), LayoutId::kBytesIterator, 511 /*superclass_id=*/LayoutId::kObject, kBytesIteratorAttributes, 512 BytesIterator::kSize, /*basetype=*/false); 513} 514 515RawObject METH(bytes, __add__)(Thread* thread, Arguments args) { 516 Runtime* runtime = thread->runtime(); 517 HandleScope scope(thread); 518 Object self_obj(&scope, args.get(0)); 519 if (!runtime->isInstanceOfBytes(*self_obj)) { 520 return thread->raiseRequiresType(self_obj, ID(bytes)); 521 } 522 Bytes self(&scope, bytesUnderlying(*self_obj)); 523 Object other_obj(&scope, args.get(1)); 524 if (runtime->isInstanceOfBytes(*other_obj)) { 525 Bytes other(&scope, bytesUnderlying(*other_obj)); 526 return runtime->bytesConcat(thread, self, other); 527 } 528 if (runtime->isInstanceOfBytearray(*other_obj)) { 529 Bytearray other(&scope, *other_obj); 530 Bytes other_bytes(&scope, bytearrayAsBytes(thread, other)); 531 return runtime->bytesConcat(thread, self, other_bytes); 532 } 533 // TODO(T38246066): buffers besides bytes/bytearray 534 return thread->raiseWithFmt(LayoutId::kTypeError, "can't concat %T to bytes", 535 &other_obj); 536} 537 538RawObject METH(bytes, __eq__)(Thread* thread, Arguments args) { 539 Runtime* runtime = thread->runtime(); 540 HandleScope scope(thread); 541 Object self_obj(&scope, args.get(0)); 542 if (!runtime->isInstanceOfBytes(*self_obj)) { 543 return thread->raiseRequiresType(self_obj, ID(bytes)); 544 } 545 Object other_obj(&scope, args.get(1)); 546 if (!runtime->isInstanceOfBytes(*other_obj)) { 547 return NotImplementedType::object(); 548 } 549 Bytes self(&scope, bytesUnderlying(*self_obj)); 550 Bytes other(&scope, bytesUnderlying(*other_obj)); 551 return Bool::fromBool(self.compare(*other) == 0); 552} 553 554RawObject METH(bytes, __ge__)(Thread* thread, Arguments args) { 555 Runtime* runtime = thread->runtime(); 556 HandleScope scope(thread); 557 Object self_obj(&scope, args.get(0)); 558 if (!runtime->isInstanceOfBytes(*self_obj)) { 559 return thread->raiseRequiresType(self_obj, ID(bytes)); 560 } 561 Object other_obj(&scope, args.get(1)); 562 if (!runtime->isInstanceOfBytes(*other_obj)) { 563 return NotImplementedType::object(); 564 } 565 Bytes self(&scope, bytesUnderlying(*self_obj)); 566 Bytes other(&scope, bytesUnderlying(*other_obj)); 567 return Bool::fromBool(self.compare(*other) >= 0); 568} 569 570RawObject METH(bytes, __gt__)(Thread* thread, Arguments args) { 571 Runtime* runtime = thread->runtime(); 572 HandleScope scope(thread); 573 Object self_obj(&scope, args.get(0)); 574 if (!runtime->isInstanceOfBytes(*self_obj)) { 575 return thread->raiseRequiresType(self_obj, ID(bytes)); 576 } 577 Object other_obj(&scope, args.get(1)); 578 if (!runtime->isInstanceOfBytes(*other_obj)) { 579 return NotImplementedType::object(); 580 } 581 Bytes self(&scope, bytesUnderlying(*self_obj)); 582 Bytes other(&scope, bytesUnderlying(*other_obj)); 583 return Bool::fromBool(self.compare(*other) > 0); 584} 585 586RawObject METH(bytes, __hash__)(Thread* thread, Arguments args) { 587 HandleScope scope(thread); 588 Object self_obj(&scope, args.get(0)); 589 if (!thread->runtime()->isInstanceOfBytes(*self_obj)) { 590 return thread->raiseRequiresType(self_obj, ID(bytes)); 591 } 592 Bytes self(&scope, bytesUnderlying(*self_obj)); 593 return SmallInt::fromWord(bytesHash(thread, *self)); 594} 595 596RawObject METH(bytes, __iter__)(Thread* thread, Arguments args) { 597 HandleScope scope(thread); 598 Object self_obj(&scope, args.get(0)); 599 Runtime* runtime = thread->runtime(); 600 if (!runtime->isInstanceOfBytes(*self_obj)) { 601 return thread->raiseRequiresType(self_obj, ID(bytes)); 602 } 603 Bytes self(&scope, bytesUnderlying(*self_obj)); 604 return runtime->newBytesIterator(thread, self); 605} 606 607RawObject METH(bytes, __le__)(Thread* thread, Arguments args) { 608 Runtime* runtime = thread->runtime(); 609 HandleScope scope(thread); 610 Object self_obj(&scope, args.get(0)); 611 if (!runtime->isInstanceOfBytes(*self_obj)) { 612 return thread->raiseRequiresType(self_obj, ID(bytes)); 613 } 614 Object other_obj(&scope, args.get(1)); 615 if (!runtime->isInstanceOfBytes(*other_obj)) { 616 return NotImplementedType::object(); 617 } 618 Bytes self(&scope, bytesUnderlying(*self_obj)); 619 Bytes other(&scope, bytesUnderlying(*other_obj)); 620 return Bool::fromBool(self.compare(*other) <= 0); 621} 622 623RawObject METH(bytes, __len__)(Thread* thread, Arguments args) { 624 Runtime* runtime = thread->runtime(); 625 HandleScope scope(thread); 626 Object self_obj(&scope, args.get(0)); 627 if (!runtime->isInstanceOfBytes(*self_obj)) { 628 return thread->raiseRequiresType(self_obj, ID(bytes)); 629 } 630 631 Bytes self(&scope, bytesUnderlying(*self_obj)); 632 return SmallInt::fromWord(self.length()); 633} 634 635RawObject METH(bytes, __lt__)(Thread* thread, Arguments args) { 636 Runtime* runtime = thread->runtime(); 637 HandleScope scope(thread); 638 Object self_obj(&scope, args.get(0)); 639 if (!runtime->isInstanceOfBytes(*self_obj)) { 640 return thread->raiseRequiresType(self_obj, ID(bytes)); 641 } 642 Object other_obj(&scope, args.get(1)); 643 if (!runtime->isInstanceOfBytes(*other_obj)) { 644 return NotImplementedType::object(); 645 } 646 Bytes self(&scope, bytesUnderlying(*self_obj)); 647 Bytes other(&scope, bytesUnderlying(*other_obj)); 648 return Bool::fromBool(self.compare(*other) < 0); 649} 650 651RawObject METH(bytes, __mul__)(Thread* thread, Arguments args) { 652 Runtime* runtime = thread->runtime(); 653 HandleScope scope(thread); 654 Object self_obj(&scope, args.get(0)); 655 if (!runtime->isInstanceOfBytes(*self_obj)) { 656 return thread->raiseRequiresType(self_obj, ID(bytes)); 657 } 658 Object count_index(&scope, args.get(1)); 659 Object count_obj(&scope, intFromIndex(thread, count_index)); 660 if (count_obj.isError()) return *count_obj; 661 Bytes self(&scope, bytesUnderlying(*self_obj)); 662 word count = intUnderlying(*count_obj).asWordSaturated(); 663 if (!SmallInt::isValid(count)) { 664 return thread->raiseWithFmt(LayoutId::kOverflowError, 665 "cannot fit '%T' into an index-sized integer", 666 &count_obj); 667 } 668 word length = self.length(); 669 if (count <= 0 || length == 0) { 670 return Bytes::empty(); 671 } 672 if (count == 1) { 673 return *self; 674 } 675 word new_length; 676 if (__builtin_mul_overflow(length, count, &new_length) || 677 !SmallInt::isValid(new_length)) { 678 return thread->raiseWithFmt(LayoutId::kOverflowError, 679 "repeated bytes are too long"); 680 } 681 return runtime->bytesRepeat(thread, self, length, count); 682} 683 684RawObject METH(bytes, __ne__)(Thread* thread, Arguments args) { 685 Runtime* runtime = thread->runtime(); 686 HandleScope scope(thread); 687 Object self_obj(&scope, args.get(0)); 688 if (!runtime->isInstanceOfBytes(*self_obj)) { 689 return thread->raiseRequiresType(self_obj, ID(bytes)); 690 } 691 Object other_obj(&scope, args.get(1)); 692 if (!runtime->isInstanceOfBytes(*other_obj)) { 693 return NotImplementedType::object(); 694 } 695 Bytes self(&scope, bytesUnderlying(*self_obj)); 696 Bytes other(&scope, bytesUnderlying(*other_obj)); 697 return Bool::fromBool(self.compare(*other) != 0); 698} 699 700RawObject METH(bytes, __repr__)(Thread* thread, Arguments args) { 701 Runtime* runtime = thread->runtime(); 702 HandleScope scope(thread); 703 Object self_obj(&scope, args.get(0)); 704 if (!runtime->isInstanceOfBytes(*self_obj)) { 705 return thread->raiseRequiresType(self_obj, ID(bytes)); 706 } 707 Byteslike self(&scope, thread, *self_obj); 708 return byteslikeReprSmartQuotes(thread, self); 709} 710 711RawObject METH(bytes, hex)(Thread* thread, Arguments args) { 712 HandleScope scope(thread); 713 Object obj(&scope, args.get(0)); 714 if (!thread->runtime()->isInstanceOfBytes(*obj)) { 715 return thread->raiseRequiresType(obj, ID(bytes)); 716 } 717 Bytes self(&scope, bytesUnderlying(*obj)); 718 return bytesHex(thread, self, self.length()); 719} 720 721RawObject METH(bytes, isalnum)(Thread* thread, Arguments args) { 722 HandleScope scope(thread); 723 Object self_obj(&scope, args.get(0)); 724 if (!thread->runtime()->isInstanceOfBytes(*self_obj)) { 725 return thread->raiseRequiresType(self_obj, ID(bytes)); 726 } 727 Bytes self(&scope, bytesUnderlying(*self_obj)); 728 word length = self.length(); 729 if (length == 0) { 730 return Bool::falseObj(); 731 } 732 for (word i = 0; i < length; i++) { 733 if (!ASCII::isAlnum(self.byteAt(i))) { 734 return Bool::falseObj(); 735 } 736 } 737 return Bool::trueObj(); 738} 739 740RawObject METH(bytes, isalpha)(Thread* thread, Arguments args) { 741 HandleScope scope(thread); 742 Object self_obj(&scope, args.get(0)); 743 if (!thread->runtime()->isInstanceOfBytes(*self_obj)) { 744 return thread->raiseRequiresType(self_obj, ID(bytes)); 745 } 746 Bytes self(&scope, bytesUnderlying(*self_obj)); 747 word length = self.length(); 748 if (length == 0) { 749 return Bool::falseObj(); 750 } 751 for (word i = 0; i < length; i++) { 752 if (!ASCII::isAlpha(self.byteAt(i))) { 753 return Bool::falseObj(); 754 } 755 } 756 return Bool::trueObj(); 757} 758 759RawObject METH(bytes, isdigit)(Thread* thread, Arguments args) { 760 HandleScope scope(thread); 761 Object self_obj(&scope, args.get(0)); 762 if (!thread->runtime()->isInstanceOfBytes(*self_obj)) { 763 return thread->raiseRequiresType(self_obj, ID(bytes)); 764 } 765 Bytes self(&scope, bytesUnderlying(*self_obj)); 766 word length = self.length(); 767 if (length == 0) { 768 return Bool::falseObj(); 769 } 770 for (word i = 0; i < length; i++) { 771 if (!ASCII::isDigit(self.byteAt(i))) { 772 return Bool::falseObj(); 773 } 774 } 775 return Bool::trueObj(); 776} 777 778RawObject METH(bytes, islower)(Thread* thread, Arguments args) { 779 HandleScope scope(thread); 780 Object self_obj(&scope, args.get(0)); 781 if (!thread->runtime()->isInstanceOfBytes(*self_obj)) { 782 return thread->raiseRequiresType(self_obj, ID(bytes)); 783 } 784 Bytes self(&scope, bytesUnderlying(*self_obj)); 785 word length = self.length(); 786 if (length == 0) { 787 return Bool::falseObj(); 788 } 789 for (word i = 0; i < length; i++) { 790 if (!ASCII::isLower(self.byteAt(i))) { 791 return Bool::falseObj(); 792 } 793 } 794 return Bool::trueObj(); 795} 796 797RawObject METH(bytes, isspace)(Thread* thread, Arguments args) { 798 HandleScope scope(thread); 799 Object self_obj(&scope, args.get(0)); 800 if (!thread->runtime()->isInstanceOfBytes(*self_obj)) { 801 return thread->raiseRequiresType(self_obj, ID(bytes)); 802 } 803 Bytes self(&scope, bytesUnderlying(*self_obj)); 804 word length = self.length(); 805 if (length == 0) { 806 return Bool::falseObj(); 807 } 808 for (word i = 0; i < length; i++) { 809 if (!ASCII::isSpace(self.byteAt(i))) { 810 return Bool::falseObj(); 811 } 812 } 813 return Bool::trueObj(); 814} 815 816RawObject METH(bytes, istitle)(Thread* thread, Arguments args) { 817 HandleScope scope(thread); 818 Object self_obj(&scope, args.get(0)); 819 if (!thread->runtime()->isInstanceOfBytes(*self_obj)) { 820 return thread->raiseRequiresType(self_obj, ID(bytes)); 821 } 822 Bytes self(&scope, bytesUnderlying(*self_obj)); 823 word length = self.length(); 824 825 bool cased = false; 826 bool previous_is_cased = false; 827 for (word i = 0; i < length; i++) { 828 byte b = self.byteAt(i); 829 if (ASCII::isUpper(b)) { 830 if (previous_is_cased) { 831 return Bool::falseObj(); 832 } 833 cased = true; 834 previous_is_cased = true; 835 } else if (ASCII::isLower(b)) { 836 if (!previous_is_cased) { 837 return Bool::falseObj(); 838 } 839 cased = true; 840 previous_is_cased = true; 841 } else { 842 previous_is_cased = false; 843 } 844 } 845 return Bool::fromBool(cased); 846} 847 848RawObject METH(bytes, isupper)(Thread* thread, Arguments args) { 849 HandleScope scope(thread); 850 Object self_obj(&scope, args.get(0)); 851 if (!thread->runtime()->isInstanceOfBytes(*self_obj)) { 852 return thread->raiseRequiresType(self_obj, ID(bytes)); 853 } 854 Bytes self(&scope, bytesUnderlying(*self_obj)); 855 word length = self.length(); 856 if (length == 0) { 857 return Bool::falseObj(); 858 } 859 for (word i = 0; i < length; i++) { 860 if (!ASCII::isUpper(self.byteAt(i))) { 861 return Bool::falseObj(); 862 } 863 } 864 return Bool::trueObj(); 865} 866 867RawObject METH(bytes, lower)(Thread* thread, Arguments args) { 868 HandleScope scope(thread); 869 Object self(&scope, args.get(0)); 870 Runtime* runtime = thread->runtime(); 871 if (!runtime->isInstanceOfBytes(*self)) { 872 return thread->raiseRequiresType(self, ID(bytes)); 873 } 874 self = bytesUnderlying(*self); 875 if (self.isSmallBytes()) { 876 SmallBytes small_bytes(&scope, *self); 877 word length = small_bytes.length(); 878 byte buffer[SmallBytes::kMaxLength]; 879 small_bytes.copyTo(buffer, length); 880 for (word i = 0; i < length; i++) { 881 buffer[i] = ASCII::toLower(buffer[i]); 882 } 883 return SmallBytes::fromBytes(View<byte>(buffer, length)); 884 } 885 LargeBytes large_bytes(&scope, *self); 886 word length = large_bytes.length(); 887 MutableBytes result(&scope, runtime->newMutableBytesUninitialized(length)); 888 for (word i = 0; i < length; i++) { 889 result.byteAtPut(i, ASCII::toLower(large_bytes.byteAt(i))); 890 } 891 return result.becomeImmutable(); 892} 893 894RawObject METH(bytes, lstrip)(Thread* thread, Arguments args) { 895 HandleScope scope(thread); 896 Object self_obj(&scope, args.get(0)); 897 Runtime* runtime = thread->runtime(); 898 if (!runtime->isInstanceOfBytes(*self_obj)) { 899 return thread->raiseRequiresType(self_obj, ID(bytes)); 900 } 901 Bytes self(&scope, bytesUnderlying(*self_obj)); 902 Object chars_obj(&scope, args.get(1)); 903 if (chars_obj.isNoneType()) { 904 return bytesStripSpaceLeft(thread, self, self.length()); 905 } 906 if (runtime->isInstanceOfBytes(*chars_obj)) { 907 Bytes chars(&scope, bytesUnderlying(*chars_obj)); 908 return bytesStripLeft(thread, self, self.length(), chars, chars.length()); 909 } 910 if (runtime->isInstanceOfBytearray(*chars_obj)) { 911 Bytearray chars(&scope, *chars_obj); 912 Bytes chars_bytes(&scope, chars.items()); 913 return bytesStripLeft(thread, self, self.length(), chars_bytes, 914 chars.numItems()); 915 } 916 // TODO(T38246066): support bytes-like objects other than bytes, bytearray 917 return thread->raiseWithFmt(LayoutId::kTypeError, 918 "a bytes-like object is required, not '%T'", 919 &chars_obj); 920} 921 922RawObject METH(bytes, rstrip)(Thread* thread, Arguments args) { 923 HandleScope scope(thread); 924 Object self_obj(&scope, args.get(0)); 925 Runtime* runtime = thread->runtime(); 926 if (!runtime->isInstanceOfBytes(*self_obj)) { 927 return thread->raiseRequiresType(self_obj, ID(bytes)); 928 } 929 Bytes self(&scope, bytesUnderlying(*self_obj)); 930 Object chars_obj(&scope, args.get(1)); 931 if (chars_obj.isNoneType()) { 932 return bytesStripSpaceRight(thread, self, self.length()); 933 } 934 if (runtime->isInstanceOfBytes(*chars_obj)) { 935 Bytes chars(&scope, bytesUnderlying(*chars_obj)); 936 return bytesStripRight(thread, self, self.length(), chars, chars.length()); 937 } 938 if (runtime->isInstanceOfBytearray(*chars_obj)) { 939 Bytearray chars(&scope, *chars_obj); 940 Bytes chars_bytes(&scope, chars.items()); 941 return bytesStripRight(thread, self, self.length(), chars_bytes, 942 chars.numItems()); 943 } 944 // TODO(T38246066): support bytes-like objects other than bytes, bytearray 945 return thread->raiseWithFmt(LayoutId::kTypeError, 946 "a bytes-like object is required, not '%T'", 947 &chars_obj); 948} 949 950RawObject METH(bytes, strip)(Thread* thread, Arguments args) { 951 HandleScope scope(thread); 952 Object self_obj(&scope, args.get(0)); 953 Runtime* runtime = thread->runtime(); 954 if (!runtime->isInstanceOfBytes(*self_obj)) { 955 return thread->raiseRequiresType(self_obj, ID(bytes)); 956 } 957 Bytes self(&scope, bytesUnderlying(*self_obj)); 958 Object chars_obj(&scope, args.get(1)); 959 if (chars_obj.isNoneType()) { 960 return bytesStripSpace(thread, self, self.length()); 961 } 962 if (runtime->isInstanceOfBytes(*chars_obj)) { 963 Bytes chars(&scope, bytesUnderlying(*chars_obj)); 964 return bytesStrip(thread, self, self.length(), chars, chars.length()); 965 } 966 if (runtime->isInstanceOfBytearray(*chars_obj)) { 967 Bytearray chars(&scope, *chars_obj); 968 Bytes chars_bytes(&scope, chars.items()); 969 return bytesStrip(thread, self, self.length(), chars_bytes, 970 chars.numItems()); 971 } 972 // TODO(T38246066): support bytes-like objects other than bytes, bytearray 973 return thread->raiseWithFmt(LayoutId::kTypeError, 974 "a bytes-like object is required, not '%T'", 975 &chars_obj); 976} 977 978RawObject METH(bytes, splitlines)(Thread* thread, Arguments args) { 979 HandleScope scope(thread); 980 Runtime* runtime = thread->runtime(); 981 Object self_obj(&scope, args.get(0)); 982 Object keepends_obj(&scope, args.get(1)); 983 if (!runtime->isInstanceOfBytes(*self_obj)) { 984 return thread->raiseRequiresType(self_obj, ID(bytes)); 985 } 986 if (!runtime->isInstanceOfInt(*keepends_obj)) { 987 return thread->raiseRequiresType(keepends_obj, ID(int)); 988 } 989 Bytes self(&scope, bytesUnderlying(*self_obj)); 990 bool keepends = !intUnderlying(*keepends_obj).isZero(); 991 return bytesSplitLines(thread, self, self.length(), keepends); 992} 993 994RawObject METH(bytes, translate)(Thread* thread, Arguments args) { 995 HandleScope scope(thread); 996 Object self_obj(&scope, args.get(0)); 997 Runtime* runtime = thread->runtime(); 998 if (!runtime->isInstanceOfBytes(*self_obj)) { 999 return thread->raiseRequiresType(self_obj, ID(bytes)); 1000 } 1001 Bytes self(&scope, bytesUnderlying(*self_obj)); 1002 Object table_obj(&scope, args.get(1)); 1003 word table_length; 1004 if (table_obj.isNoneType()) { 1005 table_length = kByteTranslationTableLength; 1006 table_obj = Bytes::empty(); 1007 } else if (runtime->isInstanceOfBytes(*table_obj)) { 1008 Bytes bytes(&scope, bytesUnderlying(*table_obj)); 1009 table_length = bytes.length(); 1010 table_obj = *bytes; 1011 } else if (runtime->isInstanceOfBytearray(*table_obj)) { 1012 Bytearray array(&scope, *table_obj); 1013 table_length = array.numItems(); 1014 table_obj = array.items(); 1015 } else { 1016 // TODO(T38246066): allow any bytes-like object 1017 return thread->raiseWithFmt(LayoutId::kTypeError, 1018 "a bytes-like object is required, not '%T'", 1019 &table_obj); 1020 } 1021 if (table_length != kByteTranslationTableLength) { 1022 return thread->raiseWithFmt(LayoutId::kValueError, 1023 "translation table must be %w characters long", 1024 kByteTranslationTableLength); 1025 } 1026 Bytes table(&scope, *table_obj); 1027 Object del(&scope, args.get(2)); 1028 if (runtime->isInstanceOfBytes(*del)) { 1029 Bytes bytes(&scope, bytesUnderlying(*del)); 1030 return runtime->bytesTranslate(thread, self, self.length(), table, 1031 table_length, bytes, bytes.length()); 1032 } 1033 if (runtime->isInstanceOfBytearray(*del)) { 1034 Bytearray array(&scope, *del); 1035 Bytes bytes(&scope, array.items()); 1036 return runtime->bytesTranslate(thread, self, self.length(), table, 1037 table_length, bytes, array.numItems()); 1038 } 1039 // TODO(T38246066): allow any bytes-like object 1040 return thread->raiseWithFmt( 1041 LayoutId::kTypeError, "a bytes-like object is required, not '%T'", &del); 1042} 1043 1044RawObject METH(bytes, upper)(Thread* thread, Arguments args) { 1045 HandleScope scope(thread); 1046 Object self(&scope, args.get(0)); 1047 Runtime* runtime = thread->runtime(); 1048 if (!runtime->isInstanceOfBytes(*self)) { 1049 return thread->raiseRequiresType(self, ID(bytes)); 1050 } 1051 self = bytesUnderlying(*self); 1052 if (self.isSmallBytes()) { 1053 SmallBytes small_bytes(&scope, *self); 1054 word length = small_bytes.length(); 1055 byte buffer[SmallBytes::kMaxLength]; 1056 small_bytes.copyTo(buffer, length); 1057 for (word i = 0; i < length; i++) { 1058 buffer[i] = ASCII::toUpper(buffer[i]); 1059 } 1060 return SmallBytes::fromBytes(View<byte>(buffer, length)); 1061 } 1062 LargeBytes large_bytes(&scope, *self); 1063 word length = large_bytes.length(); 1064 MutableBytes result(&scope, runtime->newMutableBytesUninitialized(length)); 1065 for (word i = 0; i < length; i++) { 1066 result.byteAtPut(i, ASCII::toUpper(large_bytes.byteAt(i))); 1067 } 1068 return result.becomeImmutable(); 1069} 1070 1071RawObject METH(bytes_iterator, __iter__)(Thread* thread, Arguments args) { 1072 HandleScope scope(thread); 1073 Object self(&scope, args.get(0)); 1074 if (!self.isBytesIterator()) { 1075 return thread->raiseRequiresType(self, ID(bytes_iterator)); 1076 } 1077 return *self; 1078} 1079 1080RawObject METH(bytes_iterator, __next__)(Thread* thread, Arguments args) { 1081 HandleScope scope(thread); 1082 Object self(&scope, args.get(0)); 1083 if (!self.isBytesIterator()) { 1084 return thread->raiseRequiresType(self, ID(bytes_iterator)); 1085 } 1086 BytesIterator iter(&scope, *self); 1087 Bytes underlying(&scope, iter.iterable()); 1088 word index = iter.index(); 1089 if (index >= underlying.length()) { 1090 return thread->raise(LayoutId::kStopIteration, NoneType::object()); 1091 } 1092 iter.setIndex(index + 1); 1093 return SmallInt::fromWord(underlying.byteAt(index)); 1094} 1095 1096RawObject METH(bytes_iterator, __length_hint__)(Thread* thread, 1097 Arguments args) { 1098 HandleScope scope(thread); 1099 Object self(&scope, args.get(0)); 1100 if (!self.isBytesIterator()) { 1101 return thread->raiseRequiresType(self, ID(bytes_iterator)); 1102 } 1103 BytesIterator iter(&scope, *self); 1104 Bytes underlying(&scope, iter.iterable()); 1105 return SmallInt::fromWord(underlying.length() - iter.index()); 1106} 1107 1108} // namespace py