this repo has no description
1// Copyright (c) Facebook, Inc. and its affiliates. (http://www.facebook.com)
2#include "bytes-builtins.h"
3
4#include "builtins.h"
5#include "bytearray-builtins.h"
6#include "byteslike.h"
7#include "formatter-utils.h"
8#include "frame.h"
9#include "int-builtins.h"
10#include "runtime.h"
11#include "slice-builtins.h"
12#include "strarray-builtins.h"
13#include "type-builtins.h"
14#include "unicode.h"
15#include "utils.h"
16
17namespace py {
18
19RawObject bytesDecodeASCII(Thread* thread, const Bytes& bytes) {
20 HandleScope scope(thread);
21 if (!bytes.isASCII()) {
22 return Unbound::object();
23 }
24 if (bytes.isSmallBytes()) {
25 return SmallBytes::cast(*bytes).becomeStr();
26 }
27 word bytes_len = LargeBytes::cast(*bytes).length();
28 MutableBytes buf(&scope,
29 thread->runtime()->newMutableBytesUninitialized(bytes_len));
30 buf.replaceFromWith(0, LargeBytes::cast(*bytes), bytes_len);
31 return buf.becomeStr();
32}
33
34word bytesCount(const Bytes& haystack, word haystack_len, const Bytes& needle,
35 word needle_len, word start, word end) {
36 DCHECK_BOUND(haystack_len, haystack.length());
37 DCHECK_BOUND(needle_len, needle.length());
38 if (start > haystack_len) {
39 return 0;
40 }
41 Slice::adjustSearchIndices(&start, &end, haystack_len);
42 if (needle_len == 0) {
43 return haystack_len - start + 1;
44 }
45 word count = 0;
46 word index =
47 bytesFind(haystack, haystack_len, needle, needle_len, start, end);
48 while (index != -1) {
49 count++;
50 index = bytesFind(haystack, haystack_len, needle, needle_len,
51 index + needle_len, end);
52 }
53 return count;
54}
55
56word bytesFind(const Bytes& haystack, word haystack_len, const Bytes& needle,
57 word needle_len, word start, word end) {
58 DCHECK_BOUND(haystack_len, haystack.length());
59 DCHECK_BOUND(needle_len, needle.length());
60 Slice::adjustSearchIndices(&start, &end, haystack_len);
61 for (word i = start; i <= end - needle_len; i++) {
62 bool has_match = true;
63 for (word j = 0; has_match && j < needle_len; j++) {
64 has_match = haystack.byteAt(i + j) == needle.byteAt(j);
65 }
66 if (has_match) {
67 return i;
68 }
69 }
70 return -1;
71}
72
73RawObject bytesHex(Thread* thread, const Bytes& bytes, word length) {
74 HandleScope scope(thread);
75 Runtime* runtime = thread->runtime();
76 MutableBytes result(&scope,
77 runtime->newMutableBytesUninitialized(length * 2));
78 for (word i = 0, j = 0; i < length; i++) {
79 byte b = bytes.byteAt(i);
80 uwordToHexadecimalWithMutableBytes(*result, /*index=*/j,
81 /*num_digits=*/2, b);
82 j += 2;
83 }
84 return result.becomeStr();
85}
86
87static RawObject smallBytesJoin(Thread* thread, const Bytes& sep,
88 word sep_length, const Tuple& src,
89 word src_length, word result_length) {
90 HandleScope scope(thread);
91 byte buffer[SmallBytes::kMaxLength];
92 byte* dst = buffer;
93 for (word src_index = 0; src_index < src_length; src_index++) {
94 if (src_index > 0) {
95 sep.copyTo(dst, sep_length);
96 dst += sep_length;
97 }
98 Byteslike object(&scope, thread, src.at(src_index));
99 word length = object.length();
100 object.copyTo(dst, length);
101 dst += length;
102 }
103 DCHECK(dst == buffer + result_length, "unexpected number of bytes written");
104 return SmallBytes::fromBytes({buffer, result_length});
105}
106
107RawObject bytesJoin(Thread* thread, const Bytes& sep, word sep_length,
108 const Tuple& src, word src_length) {
109 DCHECK_BOUND(src_length, src.length());
110 bool is_mutable = sep.isMutableBytes();
111 Runtime* runtime = thread->runtime();
112 if (src_length == 0) {
113 if (is_mutable) {
114 return runtime->emptyMutableBytes();
115 }
116 return Bytes::empty();
117 }
118 HandleScope scope(thread);
119
120 // first pass to accumulate length and check types
121 word result_length = sep_length * (src_length - 1);
122 Object item(&scope, Unbound::object());
123 for (word index = 0; index < src_length; index++) {
124 item = src.at(index);
125 Byteslike object(&scope, thread, *item);
126 if (!object.isValid()) {
127 return thread->raiseWithFmt(
128 LayoutId::kTypeError,
129 "sequence item %w: expected a bytes-like object, '%T' found", index,
130 &item);
131 }
132 result_length += object.length();
133 }
134
135 // second pass to accumulate concatenation
136 if (result_length <= SmallBytes::kMaxLength && !is_mutable) {
137 return smallBytesJoin(thread, sep, sep_length, src, src_length,
138 result_length);
139 }
140 MutableBytes result(&scope,
141 runtime->newMutableBytesUninitialized(result_length));
142 word dst_offset = 0;
143 for (word src_index = 0;;) {
144 Byteslike object(&scope, thread, src.at(src_index));
145 word length = object.length();
146 result.replaceFromWithByteslike(dst_offset, object, length);
147 dst_offset += length;
148
149 src_index++;
150 if (src_index >= src_length) break;
151
152 result.replaceFromWithBytes(dst_offset, *sep, sep_length);
153 dst_offset += sep_length;
154 }
155 DCHECK(dst_offset == result_length, "offset must match expected length");
156 return is_mutable ? *result : result.becomeImmutable();
157}
158
159word bytesRFind(const Bytes& haystack, word haystack_len, const Bytes& needle,
160 word needle_len, word start, word end) {
161 DCHECK_BOUND(haystack_len, haystack.length());
162 DCHECK_BOUND(needle_len, needle.length());
163 Slice::adjustSearchIndices(&start, &end, haystack_len);
164 for (word i = end - needle_len; i >= start; i--) {
165 bool has_match = true;
166 for (word j = 0; has_match && j < needle_len; j++) {
167 has_match = haystack.byteAt(i + j) == needle.byteAt(j);
168 }
169 if (has_match) {
170 return i;
171 }
172 }
173 return -1;
174}
175
176RawObject bytesReprSingleQuotes(Thread* thread, const Bytes& bytes) {
177 HandleScope scope(thread);
178 Byteslike byteslike(&scope, thread, *bytes);
179 // Precalculate the length of the result to minimize allocation.
180 word length = byteslike.length();
181 word result_length = length + 3; // b''
182 for (word i = 0; i < length; i++) {
183 byte current = byteslike.byteAt(i);
184 switch (current) {
185 case '\t':
186 case '\n':
187 case '\r':
188 case '\'':
189 case '\\':
190 result_length++;
191 break;
192 default:
193 if (!ASCII::isPrintable(current)) {
194 result_length += 3;
195 }
196 }
197 }
198
199 if (result_length > SmallInt::kMaxValue) {
200 return thread->raiseWithFmt(LayoutId::kOverflowError,
201 "bytes object is too large to make repr");
202 }
203 return thread->runtime()->byteslikeRepr(thread, byteslike, result_length,
204 '\'');
205}
206
207// Returns the index of the first byte in bytes that is not in chars.
208static word bytesSpanLeft(const Bytes& bytes, word bytes_len,
209 const Bytes& chars, word chars_len) {
210 for (word left = 0; left < bytes_len; left++) {
211 byte ch = bytes.byteAt(left);
212 bool found_in_chars = false;
213 for (word i = 0; i < chars_len; i++) {
214 if (ch == chars.byteAt(i)) {
215 found_in_chars = true;
216 break;
217 }
218 }
219 if (!found_in_chars) {
220 return left;
221 }
222 }
223 return bytes_len;
224}
225
226// Returns the index of the last byte in bytes that is not in chars. Stops at
227// and returns the left bound if all characters to the right were found.
228static word bytesSpanRight(const Bytes& bytes, word bytes_len,
229 const Bytes& chars, word chars_len, word left) {
230 for (word right = bytes_len; left < right; right--) {
231 byte ch = bytes.byteAt(right - 1);
232 bool found_in_chars = false;
233 for (word i = 0; i < chars_len; i++) {
234 if (ch == chars.byteAt(i)) {
235 found_in_chars = true;
236 break;
237 }
238 }
239 if (!found_in_chars) {
240 return right;
241 }
242 }
243 return left;
244}
245
246RawObject bytesSplitLines(Thread* thread, const Bytes& bytes, word length,
247 bool keepends) {
248 HandleScope scope(thread);
249 Runtime* runtime = thread->runtime();
250 List result(&scope, runtime->newList());
251 Object subseq(&scope, Unbound::object());
252
253 for (word i = 0, j = 0; i < length; j = i) {
254 // Skip newline bytes
255 while (i < length) {
256 byte b = bytes.byteAt(i);
257 // PEP-278
258 if (b == '\n' || b == '\r') {
259 break;
260 }
261 i++;
262 }
263
264 word eol_pos = i;
265 if (i < length) {
266 word cur = i;
267 word next = i + 1;
268 i++;
269 // Check for \r\n specifically
270 if (bytes.byteAt(cur) == '\r' && next < length &&
271 bytes.byteAt(next) == '\n') {
272 i++;
273 }
274 if (keepends) {
275 eol_pos = i;
276 }
277 }
278
279 // If there are no newlines, the bytes returned should be identity-equal
280 if (j == 0 && eol_pos == length) {
281 runtime->listAdd(thread, result, bytes);
282 return *result;
283 }
284
285 subseq = bytesSubseq(thread, bytes, j, eol_pos - j);
286 runtime->listAdd(thread, result, subseq);
287 }
288
289 return *result;
290}
291
292RawObject bytesStrip(Thread* thread, const Bytes& bytes, word bytes_len,
293 const Bytes& chars, word chars_len) {
294 word left = bytesSpanLeft(bytes, bytes_len, chars, chars_len);
295 word right = bytesSpanRight(bytes, bytes_len, chars, chars_len, left);
296 return bytesSubseq(thread, bytes, left, right - left);
297}
298
299RawObject bytesStripLeft(Thread* thread, const Bytes& bytes, word bytes_len,
300 const Bytes& chars, word chars_len) {
301 word left = bytesSpanLeft(bytes, bytes_len, chars, chars_len);
302 return bytesSubseq(thread, bytes, left, bytes_len - left);
303}
304
305RawObject bytesStripRight(Thread* thread, const Bytes& bytes, word bytes_len,
306 const Bytes& chars, word chars_len) {
307 word right = bytesSpanRight(bytes, bytes_len, chars, chars_len, 0);
308 return bytesSubseq(thread, bytes, 0, right);
309}
310
311RawObject bytesStripSpace(Thread* thread, const Bytes& bytes, word len) {
312 word left = 0;
313 while (left < len && ASCII::isSpace(bytes.byteAt(left))) {
314 left++;
315 }
316 word right = len;
317 while (right > left && ASCII::isSpace(bytes.byteAt(right - 1))) {
318 right--;
319 }
320 return bytesSubseq(thread, bytes, left, right - left);
321}
322
323RawObject bytesStripSpaceLeft(Thread* thread, const Bytes& bytes, word len) {
324 word left = 0;
325 while (left < len && ASCII::isSpace(bytes.byteAt(left))) {
326 left++;
327 }
328 return bytesSubseq(thread, bytes, left, len - left);
329}
330
331RawObject bytesStripSpaceRight(Thread* thread, const Bytes& bytes, word len) {
332 word right = len;
333 while (right > 0 && ASCII::isSpace(bytes.byteAt(right - 1))) {
334 right--;
335 }
336 return bytesSubseq(thread, bytes, 0, right);
337}
338
339RawObject bytesSubseq(Thread* thread, const Bytes& bytes, word start,
340 word length) {
341 DCHECK_BOUND(start, bytes.length());
342 DCHECK_BOUND(length, bytes.length() - start);
343 if (length <= SmallBytes::kMaxLength) {
344 byte buffer[SmallBytes::kMaxLength];
345 for (word i = length - 1; i >= 0; i--) {
346 buffer[i] = bytes.byteAt(start + i);
347 }
348 return SmallBytes::fromBytes({buffer, length});
349 }
350 HandleScope scope(thread);
351 MutableBytes result(&scope,
352 thread->runtime()->newMutableBytesUninitialized(length));
353 result.replaceFromWithStartAt(/*dst_start=*/0, DataArray::cast(*bytes),
354 length, start);
355 return result.becomeImmutable();
356}
357
358static bool bytesIsValidUTF8Impl(RawBytes bytes, bool allow_surrogates) {
359 for (word i = 0, length = bytes.length(); i < length;) {
360 byte b0 = bytes.byteAt(i++);
361 // ASCII bytes have the topmost bit zero.
362 static_assert(kMaxASCII == 0x7F, "unexpected kMaxASCII value");
363 if (b0 <= 0x7F) continue;
364 // Bytes past this point have the high bit set (0b1xxxxxxx).
365
366 // 0b110xxxxx begins a sequence with one continuation byte.
367 // `b0 < 0b11100000` overestimates and we filter in a 2nd comparison.
368 if (b0 < 0xE0) {
369 // b0 < 0xC0 catches 0b10xxxxxx bytes (invalid continuation bytes).
370 // 0xC0 + 0xC1 (0b11000000 + 0b110000001) would result in range(0x7F)
371 // which should have been encoded as ASCII.
372 if (b0 < 0xC2) {
373 return false;
374 }
375 if (i >= length) {
376 return false;
377 }
378 byte b1 = bytes.byteAt(i++);
379 if (!UTF8::isTrailByte(b1)) {
380 return false;
381 }
382 if (DCHECK_IS_ON()) {
383 uword decoded =
384 static_cast<uword>(b0 & 0x1F) << 6 | static_cast<uword>(b1 & 0x3F);
385 DCHECK(0x80 <= decoded && decoded <= 0x7FF, "unexpected value");
386 }
387
388 // 0b1110xxxx starts a sequence with two continuation bytes.
389 } else if (b0 < 0xF0) {
390 if (i + 1 >= length) {
391 return false;
392 }
393 byte b1 = bytes.byteAt(i++);
394 byte b2 = bytes.byteAt(i++);
395 if (!UTF8::isTrailByte(b1) || !UTF8::isTrailByte(b2)) {
396 return false;
397 }
398
399 // Catch sequences that should have been encoded in 1-2 bytes instead.
400 if (b0 == 0xE0) {
401 if (b1 < 0xA0) {
402 return false;
403 }
404 } else if (!allow_surrogates && b0 == 0xED && b1 >= 0xA0) {
405 // 0b11011xxxxxxxxxxx (0xD800 - 0xDFFF) is declared invalid by unicode
406 // as they look like utf-16 surrogates making it easier to detect
407 // mix-ups.
408 return false;
409 }
410
411 if (DCHECK_IS_ON()) {
412 uword decoded = static_cast<uword>(b0 & 0x0F) << 12 |
413 static_cast<uword>(b1 & 0x3F) << 6 |
414 static_cast<uword>(b2 & 0x3F);
415 DCHECK(0x0800 <= decoded && decoded <= 0xFFFF, "unexpected value");
416 }
417
418 static_assert(kMaxUnicode == 0x10FFFF, "unexpected maxunicode value");
419 // 0b11110xxx starts a sequence with three continuation bytes.
420 // However values bigger than 0x10FFFF are not valid unicode, so we test
421 // b0 < 0b11110101 to overestimate that.
422 } else if (b0 < 0xF5) {
423 if (i + 2 >= length) {
424 return false;
425 }
426 byte b1 = bytes.byteAt(i++);
427 byte b2 = bytes.byteAt(i++);
428 byte b3 = bytes.byteAt(i++);
429 if (!UTF8::isTrailByte(b1) || !UTF8::isTrailByte(b2) ||
430 !UTF8::isTrailByte(b3)) {
431 return false;
432 }
433 // Catch sequences that should have been encoded with 1-3 bytes instead.
434 if (b0 == 0xF0) {
435 if (b1 < 0x90) {
436 return false;
437 }
438 } else if (b0 == 0xF4 && b1 >= 0x90) {
439 // Bigger than kMaxUnicode.
440 return false;
441 }
442
443 if (DCHECK_IS_ON()) {
444 uword decoded = static_cast<uword>(b0 & 0x07) << 16 |
445 static_cast<uword>(b1 & 0x3F) << 12 |
446 static_cast<uword>(b2 & 0x3F) << 6 |
447 static_cast<uword>(b3 & 0x3F);
448 DCHECK(0x10000 <= decoded && decoded <= kMaxUnicode,
449 "unexpected value");
450 }
451 } else {
452 // Invalid prefix byte.
453 return false;
454 }
455 }
456 return true;
457}
458
459bool bytesIsValidUTF8(RawBytes bytes) {
460 return bytesIsValidUTF8Impl(bytes, /*allow_surrogates=*/false);
461}
462
463bool bytesIsValidStr(RawBytes bytes) {
464 return bytesIsValidUTF8Impl(bytes, /*allow_surrogates=*/true);
465}
466
467// Used only for UserBytesBase as a heap-allocated object.
468static const BuiltinAttribute kUserBytesBaseAttributes[] = {
469 {ID(_UserBytes__value), RawUserBytesBase::kValueOffset,
470 AttributeFlags::kHidden},
471};
472
473static const BuiltinAttribute kBytesIteratorAttributes[] = {
474 {ID(_bytes_iterator__iterable), RawBytesIterator::kIterableOffset,
475 AttributeFlags::kHidden},
476 {ID(_bytes_iterator__index), RawBytesIterator::kIndexOffset,
477 AttributeFlags::kHidden},
478};
479
480void initializeBytesTypes(Thread* thread) {
481 HandleScope scope(thread);
482 Runtime* runtime = thread->runtime();
483
484 Type bytes(&scope,
485 addBuiltinType(thread, ID(bytes), LayoutId::kBytes,
486 /*superclass_id=*/LayoutId::kObject,
487 kUserBytesBaseAttributes, RawUserBytesBase::kSize,
488 /*basetype=*/true));
489
490 {
491 Type type(&scope, addImmediateBuiltinType(
492 thread, ID(largebytes), LayoutId::kLargeBytes,
493 /*builtin_base=*/LayoutId::kBytes,
494 /*superclass_id=*/LayoutId::kObject,
495 /*basetype=*/false));
496 Layout::cast(type.instanceLayout()).setDescribedType(*bytes);
497 runtime->setLargeBytesType(type);
498 }
499
500 {
501 Type type(&scope, addImmediateBuiltinType(
502 thread, ID(smallbytes), LayoutId::kSmallBytes,
503 /*builtin_base=*/LayoutId::kBytes,
504 /*superclass_id=*/LayoutId::kObject,
505 /*basetype=*/false));
506 Layout::cast(type.instanceLayout()).setDescribedType(*bytes);
507 runtime->setSmallBytesType(type);
508 }
509
510 addBuiltinType(thread, ID(bytes_iterator), LayoutId::kBytesIterator,
511 /*superclass_id=*/LayoutId::kObject, kBytesIteratorAttributes,
512 BytesIterator::kSize, /*basetype=*/false);
513}
514
515RawObject METH(bytes, __add__)(Thread* thread, Arguments args) {
516 Runtime* runtime = thread->runtime();
517 HandleScope scope(thread);
518 Object self_obj(&scope, args.get(0));
519 if (!runtime->isInstanceOfBytes(*self_obj)) {
520 return thread->raiseRequiresType(self_obj, ID(bytes));
521 }
522 Bytes self(&scope, bytesUnderlying(*self_obj));
523 Object other_obj(&scope, args.get(1));
524 if (runtime->isInstanceOfBytes(*other_obj)) {
525 Bytes other(&scope, bytesUnderlying(*other_obj));
526 return runtime->bytesConcat(thread, self, other);
527 }
528 if (runtime->isInstanceOfBytearray(*other_obj)) {
529 Bytearray other(&scope, *other_obj);
530 Bytes other_bytes(&scope, bytearrayAsBytes(thread, other));
531 return runtime->bytesConcat(thread, self, other_bytes);
532 }
533 // TODO(T38246066): buffers besides bytes/bytearray
534 return thread->raiseWithFmt(LayoutId::kTypeError, "can't concat %T to bytes",
535 &other_obj);
536}
537
538RawObject METH(bytes, __eq__)(Thread* thread, Arguments args) {
539 Runtime* runtime = thread->runtime();
540 HandleScope scope(thread);
541 Object self_obj(&scope, args.get(0));
542 if (!runtime->isInstanceOfBytes(*self_obj)) {
543 return thread->raiseRequiresType(self_obj, ID(bytes));
544 }
545 Object other_obj(&scope, args.get(1));
546 if (!runtime->isInstanceOfBytes(*other_obj)) {
547 return NotImplementedType::object();
548 }
549 Bytes self(&scope, bytesUnderlying(*self_obj));
550 Bytes other(&scope, bytesUnderlying(*other_obj));
551 return Bool::fromBool(self.compare(*other) == 0);
552}
553
554RawObject METH(bytes, __ge__)(Thread* thread, Arguments args) {
555 Runtime* runtime = thread->runtime();
556 HandleScope scope(thread);
557 Object self_obj(&scope, args.get(0));
558 if (!runtime->isInstanceOfBytes(*self_obj)) {
559 return thread->raiseRequiresType(self_obj, ID(bytes));
560 }
561 Object other_obj(&scope, args.get(1));
562 if (!runtime->isInstanceOfBytes(*other_obj)) {
563 return NotImplementedType::object();
564 }
565 Bytes self(&scope, bytesUnderlying(*self_obj));
566 Bytes other(&scope, bytesUnderlying(*other_obj));
567 return Bool::fromBool(self.compare(*other) >= 0);
568}
569
570RawObject METH(bytes, __gt__)(Thread* thread, Arguments args) {
571 Runtime* runtime = thread->runtime();
572 HandleScope scope(thread);
573 Object self_obj(&scope, args.get(0));
574 if (!runtime->isInstanceOfBytes(*self_obj)) {
575 return thread->raiseRequiresType(self_obj, ID(bytes));
576 }
577 Object other_obj(&scope, args.get(1));
578 if (!runtime->isInstanceOfBytes(*other_obj)) {
579 return NotImplementedType::object();
580 }
581 Bytes self(&scope, bytesUnderlying(*self_obj));
582 Bytes other(&scope, bytesUnderlying(*other_obj));
583 return Bool::fromBool(self.compare(*other) > 0);
584}
585
586RawObject METH(bytes, __hash__)(Thread* thread, Arguments args) {
587 HandleScope scope(thread);
588 Object self_obj(&scope, args.get(0));
589 if (!thread->runtime()->isInstanceOfBytes(*self_obj)) {
590 return thread->raiseRequiresType(self_obj, ID(bytes));
591 }
592 Bytes self(&scope, bytesUnderlying(*self_obj));
593 return SmallInt::fromWord(bytesHash(thread, *self));
594}
595
596RawObject METH(bytes, __iter__)(Thread* thread, Arguments args) {
597 HandleScope scope(thread);
598 Object self_obj(&scope, args.get(0));
599 Runtime* runtime = thread->runtime();
600 if (!runtime->isInstanceOfBytes(*self_obj)) {
601 return thread->raiseRequiresType(self_obj, ID(bytes));
602 }
603 Bytes self(&scope, bytesUnderlying(*self_obj));
604 return runtime->newBytesIterator(thread, self);
605}
606
607RawObject METH(bytes, __le__)(Thread* thread, Arguments args) {
608 Runtime* runtime = thread->runtime();
609 HandleScope scope(thread);
610 Object self_obj(&scope, args.get(0));
611 if (!runtime->isInstanceOfBytes(*self_obj)) {
612 return thread->raiseRequiresType(self_obj, ID(bytes));
613 }
614 Object other_obj(&scope, args.get(1));
615 if (!runtime->isInstanceOfBytes(*other_obj)) {
616 return NotImplementedType::object();
617 }
618 Bytes self(&scope, bytesUnderlying(*self_obj));
619 Bytes other(&scope, bytesUnderlying(*other_obj));
620 return Bool::fromBool(self.compare(*other) <= 0);
621}
622
623RawObject METH(bytes, __len__)(Thread* thread, Arguments args) {
624 Runtime* runtime = thread->runtime();
625 HandleScope scope(thread);
626 Object self_obj(&scope, args.get(0));
627 if (!runtime->isInstanceOfBytes(*self_obj)) {
628 return thread->raiseRequiresType(self_obj, ID(bytes));
629 }
630
631 Bytes self(&scope, bytesUnderlying(*self_obj));
632 return SmallInt::fromWord(self.length());
633}
634
635RawObject METH(bytes, __lt__)(Thread* thread, Arguments args) {
636 Runtime* runtime = thread->runtime();
637 HandleScope scope(thread);
638 Object self_obj(&scope, args.get(0));
639 if (!runtime->isInstanceOfBytes(*self_obj)) {
640 return thread->raiseRequiresType(self_obj, ID(bytes));
641 }
642 Object other_obj(&scope, args.get(1));
643 if (!runtime->isInstanceOfBytes(*other_obj)) {
644 return NotImplementedType::object();
645 }
646 Bytes self(&scope, bytesUnderlying(*self_obj));
647 Bytes other(&scope, bytesUnderlying(*other_obj));
648 return Bool::fromBool(self.compare(*other) < 0);
649}
650
651RawObject METH(bytes, __mul__)(Thread* thread, Arguments args) {
652 Runtime* runtime = thread->runtime();
653 HandleScope scope(thread);
654 Object self_obj(&scope, args.get(0));
655 if (!runtime->isInstanceOfBytes(*self_obj)) {
656 return thread->raiseRequiresType(self_obj, ID(bytes));
657 }
658 Object count_index(&scope, args.get(1));
659 Object count_obj(&scope, intFromIndex(thread, count_index));
660 if (count_obj.isError()) return *count_obj;
661 Bytes self(&scope, bytesUnderlying(*self_obj));
662 word count = intUnderlying(*count_obj).asWordSaturated();
663 if (!SmallInt::isValid(count)) {
664 return thread->raiseWithFmt(LayoutId::kOverflowError,
665 "cannot fit '%T' into an index-sized integer",
666 &count_obj);
667 }
668 word length = self.length();
669 if (count <= 0 || length == 0) {
670 return Bytes::empty();
671 }
672 if (count == 1) {
673 return *self;
674 }
675 word new_length;
676 if (__builtin_mul_overflow(length, count, &new_length) ||
677 !SmallInt::isValid(new_length)) {
678 return thread->raiseWithFmt(LayoutId::kOverflowError,
679 "repeated bytes are too long");
680 }
681 return runtime->bytesRepeat(thread, self, length, count);
682}
683
684RawObject METH(bytes, __ne__)(Thread* thread, Arguments args) {
685 Runtime* runtime = thread->runtime();
686 HandleScope scope(thread);
687 Object self_obj(&scope, args.get(0));
688 if (!runtime->isInstanceOfBytes(*self_obj)) {
689 return thread->raiseRequiresType(self_obj, ID(bytes));
690 }
691 Object other_obj(&scope, args.get(1));
692 if (!runtime->isInstanceOfBytes(*other_obj)) {
693 return NotImplementedType::object();
694 }
695 Bytes self(&scope, bytesUnderlying(*self_obj));
696 Bytes other(&scope, bytesUnderlying(*other_obj));
697 return Bool::fromBool(self.compare(*other) != 0);
698}
699
700RawObject METH(bytes, __repr__)(Thread* thread, Arguments args) {
701 Runtime* runtime = thread->runtime();
702 HandleScope scope(thread);
703 Object self_obj(&scope, args.get(0));
704 if (!runtime->isInstanceOfBytes(*self_obj)) {
705 return thread->raiseRequiresType(self_obj, ID(bytes));
706 }
707 Byteslike self(&scope, thread, *self_obj);
708 return byteslikeReprSmartQuotes(thread, self);
709}
710
711RawObject METH(bytes, hex)(Thread* thread, Arguments args) {
712 HandleScope scope(thread);
713 Object obj(&scope, args.get(0));
714 if (!thread->runtime()->isInstanceOfBytes(*obj)) {
715 return thread->raiseRequiresType(obj, ID(bytes));
716 }
717 Bytes self(&scope, bytesUnderlying(*obj));
718 return bytesHex(thread, self, self.length());
719}
720
721RawObject METH(bytes, isalnum)(Thread* thread, Arguments args) {
722 HandleScope scope(thread);
723 Object self_obj(&scope, args.get(0));
724 if (!thread->runtime()->isInstanceOfBytes(*self_obj)) {
725 return thread->raiseRequiresType(self_obj, ID(bytes));
726 }
727 Bytes self(&scope, bytesUnderlying(*self_obj));
728 word length = self.length();
729 if (length == 0) {
730 return Bool::falseObj();
731 }
732 for (word i = 0; i < length; i++) {
733 if (!ASCII::isAlnum(self.byteAt(i))) {
734 return Bool::falseObj();
735 }
736 }
737 return Bool::trueObj();
738}
739
740RawObject METH(bytes, isalpha)(Thread* thread, Arguments args) {
741 HandleScope scope(thread);
742 Object self_obj(&scope, args.get(0));
743 if (!thread->runtime()->isInstanceOfBytes(*self_obj)) {
744 return thread->raiseRequiresType(self_obj, ID(bytes));
745 }
746 Bytes self(&scope, bytesUnderlying(*self_obj));
747 word length = self.length();
748 if (length == 0) {
749 return Bool::falseObj();
750 }
751 for (word i = 0; i < length; i++) {
752 if (!ASCII::isAlpha(self.byteAt(i))) {
753 return Bool::falseObj();
754 }
755 }
756 return Bool::trueObj();
757}
758
759RawObject METH(bytes, isdigit)(Thread* thread, Arguments args) {
760 HandleScope scope(thread);
761 Object self_obj(&scope, args.get(0));
762 if (!thread->runtime()->isInstanceOfBytes(*self_obj)) {
763 return thread->raiseRequiresType(self_obj, ID(bytes));
764 }
765 Bytes self(&scope, bytesUnderlying(*self_obj));
766 word length = self.length();
767 if (length == 0) {
768 return Bool::falseObj();
769 }
770 for (word i = 0; i < length; i++) {
771 if (!ASCII::isDigit(self.byteAt(i))) {
772 return Bool::falseObj();
773 }
774 }
775 return Bool::trueObj();
776}
777
778RawObject METH(bytes, islower)(Thread* thread, Arguments args) {
779 HandleScope scope(thread);
780 Object self_obj(&scope, args.get(0));
781 if (!thread->runtime()->isInstanceOfBytes(*self_obj)) {
782 return thread->raiseRequiresType(self_obj, ID(bytes));
783 }
784 Bytes self(&scope, bytesUnderlying(*self_obj));
785 word length = self.length();
786 if (length == 0) {
787 return Bool::falseObj();
788 }
789 for (word i = 0; i < length; i++) {
790 if (!ASCII::isLower(self.byteAt(i))) {
791 return Bool::falseObj();
792 }
793 }
794 return Bool::trueObj();
795}
796
797RawObject METH(bytes, isspace)(Thread* thread, Arguments args) {
798 HandleScope scope(thread);
799 Object self_obj(&scope, args.get(0));
800 if (!thread->runtime()->isInstanceOfBytes(*self_obj)) {
801 return thread->raiseRequiresType(self_obj, ID(bytes));
802 }
803 Bytes self(&scope, bytesUnderlying(*self_obj));
804 word length = self.length();
805 if (length == 0) {
806 return Bool::falseObj();
807 }
808 for (word i = 0; i < length; i++) {
809 if (!ASCII::isSpace(self.byteAt(i))) {
810 return Bool::falseObj();
811 }
812 }
813 return Bool::trueObj();
814}
815
816RawObject METH(bytes, istitle)(Thread* thread, Arguments args) {
817 HandleScope scope(thread);
818 Object self_obj(&scope, args.get(0));
819 if (!thread->runtime()->isInstanceOfBytes(*self_obj)) {
820 return thread->raiseRequiresType(self_obj, ID(bytes));
821 }
822 Bytes self(&scope, bytesUnderlying(*self_obj));
823 word length = self.length();
824
825 bool cased = false;
826 bool previous_is_cased = false;
827 for (word i = 0; i < length; i++) {
828 byte b = self.byteAt(i);
829 if (ASCII::isUpper(b)) {
830 if (previous_is_cased) {
831 return Bool::falseObj();
832 }
833 cased = true;
834 previous_is_cased = true;
835 } else if (ASCII::isLower(b)) {
836 if (!previous_is_cased) {
837 return Bool::falseObj();
838 }
839 cased = true;
840 previous_is_cased = true;
841 } else {
842 previous_is_cased = false;
843 }
844 }
845 return Bool::fromBool(cased);
846}
847
848RawObject METH(bytes, isupper)(Thread* thread, Arguments args) {
849 HandleScope scope(thread);
850 Object self_obj(&scope, args.get(0));
851 if (!thread->runtime()->isInstanceOfBytes(*self_obj)) {
852 return thread->raiseRequiresType(self_obj, ID(bytes));
853 }
854 Bytes self(&scope, bytesUnderlying(*self_obj));
855 word length = self.length();
856 if (length == 0) {
857 return Bool::falseObj();
858 }
859 for (word i = 0; i < length; i++) {
860 if (!ASCII::isUpper(self.byteAt(i))) {
861 return Bool::falseObj();
862 }
863 }
864 return Bool::trueObj();
865}
866
867RawObject METH(bytes, lower)(Thread* thread, Arguments args) {
868 HandleScope scope(thread);
869 Object self(&scope, args.get(0));
870 Runtime* runtime = thread->runtime();
871 if (!runtime->isInstanceOfBytes(*self)) {
872 return thread->raiseRequiresType(self, ID(bytes));
873 }
874 self = bytesUnderlying(*self);
875 if (self.isSmallBytes()) {
876 SmallBytes small_bytes(&scope, *self);
877 word length = small_bytes.length();
878 byte buffer[SmallBytes::kMaxLength];
879 small_bytes.copyTo(buffer, length);
880 for (word i = 0; i < length; i++) {
881 buffer[i] = ASCII::toLower(buffer[i]);
882 }
883 return SmallBytes::fromBytes(View<byte>(buffer, length));
884 }
885 LargeBytes large_bytes(&scope, *self);
886 word length = large_bytes.length();
887 MutableBytes result(&scope, runtime->newMutableBytesUninitialized(length));
888 for (word i = 0; i < length; i++) {
889 result.byteAtPut(i, ASCII::toLower(large_bytes.byteAt(i)));
890 }
891 return result.becomeImmutable();
892}
893
894RawObject METH(bytes, lstrip)(Thread* thread, Arguments args) {
895 HandleScope scope(thread);
896 Object self_obj(&scope, args.get(0));
897 Runtime* runtime = thread->runtime();
898 if (!runtime->isInstanceOfBytes(*self_obj)) {
899 return thread->raiseRequiresType(self_obj, ID(bytes));
900 }
901 Bytes self(&scope, bytesUnderlying(*self_obj));
902 Object chars_obj(&scope, args.get(1));
903 if (chars_obj.isNoneType()) {
904 return bytesStripSpaceLeft(thread, self, self.length());
905 }
906 if (runtime->isInstanceOfBytes(*chars_obj)) {
907 Bytes chars(&scope, bytesUnderlying(*chars_obj));
908 return bytesStripLeft(thread, self, self.length(), chars, chars.length());
909 }
910 if (runtime->isInstanceOfBytearray(*chars_obj)) {
911 Bytearray chars(&scope, *chars_obj);
912 Bytes chars_bytes(&scope, chars.items());
913 return bytesStripLeft(thread, self, self.length(), chars_bytes,
914 chars.numItems());
915 }
916 // TODO(T38246066): support bytes-like objects other than bytes, bytearray
917 return thread->raiseWithFmt(LayoutId::kTypeError,
918 "a bytes-like object is required, not '%T'",
919 &chars_obj);
920}
921
922RawObject METH(bytes, rstrip)(Thread* thread, Arguments args) {
923 HandleScope scope(thread);
924 Object self_obj(&scope, args.get(0));
925 Runtime* runtime = thread->runtime();
926 if (!runtime->isInstanceOfBytes(*self_obj)) {
927 return thread->raiseRequiresType(self_obj, ID(bytes));
928 }
929 Bytes self(&scope, bytesUnderlying(*self_obj));
930 Object chars_obj(&scope, args.get(1));
931 if (chars_obj.isNoneType()) {
932 return bytesStripSpaceRight(thread, self, self.length());
933 }
934 if (runtime->isInstanceOfBytes(*chars_obj)) {
935 Bytes chars(&scope, bytesUnderlying(*chars_obj));
936 return bytesStripRight(thread, self, self.length(), chars, chars.length());
937 }
938 if (runtime->isInstanceOfBytearray(*chars_obj)) {
939 Bytearray chars(&scope, *chars_obj);
940 Bytes chars_bytes(&scope, chars.items());
941 return bytesStripRight(thread, self, self.length(), chars_bytes,
942 chars.numItems());
943 }
944 // TODO(T38246066): support bytes-like objects other than bytes, bytearray
945 return thread->raiseWithFmt(LayoutId::kTypeError,
946 "a bytes-like object is required, not '%T'",
947 &chars_obj);
948}
949
950RawObject METH(bytes, strip)(Thread* thread, Arguments args) {
951 HandleScope scope(thread);
952 Object self_obj(&scope, args.get(0));
953 Runtime* runtime = thread->runtime();
954 if (!runtime->isInstanceOfBytes(*self_obj)) {
955 return thread->raiseRequiresType(self_obj, ID(bytes));
956 }
957 Bytes self(&scope, bytesUnderlying(*self_obj));
958 Object chars_obj(&scope, args.get(1));
959 if (chars_obj.isNoneType()) {
960 return bytesStripSpace(thread, self, self.length());
961 }
962 if (runtime->isInstanceOfBytes(*chars_obj)) {
963 Bytes chars(&scope, bytesUnderlying(*chars_obj));
964 return bytesStrip(thread, self, self.length(), chars, chars.length());
965 }
966 if (runtime->isInstanceOfBytearray(*chars_obj)) {
967 Bytearray chars(&scope, *chars_obj);
968 Bytes chars_bytes(&scope, chars.items());
969 return bytesStrip(thread, self, self.length(), chars_bytes,
970 chars.numItems());
971 }
972 // TODO(T38246066): support bytes-like objects other than bytes, bytearray
973 return thread->raiseWithFmt(LayoutId::kTypeError,
974 "a bytes-like object is required, not '%T'",
975 &chars_obj);
976}
977
978RawObject METH(bytes, splitlines)(Thread* thread, Arguments args) {
979 HandleScope scope(thread);
980 Runtime* runtime = thread->runtime();
981 Object self_obj(&scope, args.get(0));
982 Object keepends_obj(&scope, args.get(1));
983 if (!runtime->isInstanceOfBytes(*self_obj)) {
984 return thread->raiseRequiresType(self_obj, ID(bytes));
985 }
986 if (!runtime->isInstanceOfInt(*keepends_obj)) {
987 return thread->raiseRequiresType(keepends_obj, ID(int));
988 }
989 Bytes self(&scope, bytesUnderlying(*self_obj));
990 bool keepends = !intUnderlying(*keepends_obj).isZero();
991 return bytesSplitLines(thread, self, self.length(), keepends);
992}
993
994RawObject METH(bytes, translate)(Thread* thread, Arguments args) {
995 HandleScope scope(thread);
996 Object self_obj(&scope, args.get(0));
997 Runtime* runtime = thread->runtime();
998 if (!runtime->isInstanceOfBytes(*self_obj)) {
999 return thread->raiseRequiresType(self_obj, ID(bytes));
1000 }
1001 Bytes self(&scope, bytesUnderlying(*self_obj));
1002 Object table_obj(&scope, args.get(1));
1003 word table_length;
1004 if (table_obj.isNoneType()) {
1005 table_length = kByteTranslationTableLength;
1006 table_obj = Bytes::empty();
1007 } else if (runtime->isInstanceOfBytes(*table_obj)) {
1008 Bytes bytes(&scope, bytesUnderlying(*table_obj));
1009 table_length = bytes.length();
1010 table_obj = *bytes;
1011 } else if (runtime->isInstanceOfBytearray(*table_obj)) {
1012 Bytearray array(&scope, *table_obj);
1013 table_length = array.numItems();
1014 table_obj = array.items();
1015 } else {
1016 // TODO(T38246066): allow any bytes-like object
1017 return thread->raiseWithFmt(LayoutId::kTypeError,
1018 "a bytes-like object is required, not '%T'",
1019 &table_obj);
1020 }
1021 if (table_length != kByteTranslationTableLength) {
1022 return thread->raiseWithFmt(LayoutId::kValueError,
1023 "translation table must be %w characters long",
1024 kByteTranslationTableLength);
1025 }
1026 Bytes table(&scope, *table_obj);
1027 Object del(&scope, args.get(2));
1028 if (runtime->isInstanceOfBytes(*del)) {
1029 Bytes bytes(&scope, bytesUnderlying(*del));
1030 return runtime->bytesTranslate(thread, self, self.length(), table,
1031 table_length, bytes, bytes.length());
1032 }
1033 if (runtime->isInstanceOfBytearray(*del)) {
1034 Bytearray array(&scope, *del);
1035 Bytes bytes(&scope, array.items());
1036 return runtime->bytesTranslate(thread, self, self.length(), table,
1037 table_length, bytes, array.numItems());
1038 }
1039 // TODO(T38246066): allow any bytes-like object
1040 return thread->raiseWithFmt(
1041 LayoutId::kTypeError, "a bytes-like object is required, not '%T'", &del);
1042}
1043
1044RawObject METH(bytes, upper)(Thread* thread, Arguments args) {
1045 HandleScope scope(thread);
1046 Object self(&scope, args.get(0));
1047 Runtime* runtime = thread->runtime();
1048 if (!runtime->isInstanceOfBytes(*self)) {
1049 return thread->raiseRequiresType(self, ID(bytes));
1050 }
1051 self = bytesUnderlying(*self);
1052 if (self.isSmallBytes()) {
1053 SmallBytes small_bytes(&scope, *self);
1054 word length = small_bytes.length();
1055 byte buffer[SmallBytes::kMaxLength];
1056 small_bytes.copyTo(buffer, length);
1057 for (word i = 0; i < length; i++) {
1058 buffer[i] = ASCII::toUpper(buffer[i]);
1059 }
1060 return SmallBytes::fromBytes(View<byte>(buffer, length));
1061 }
1062 LargeBytes large_bytes(&scope, *self);
1063 word length = large_bytes.length();
1064 MutableBytes result(&scope, runtime->newMutableBytesUninitialized(length));
1065 for (word i = 0; i < length; i++) {
1066 result.byteAtPut(i, ASCII::toUpper(large_bytes.byteAt(i)));
1067 }
1068 return result.becomeImmutable();
1069}
1070
1071RawObject METH(bytes_iterator, __iter__)(Thread* thread, Arguments args) {
1072 HandleScope scope(thread);
1073 Object self(&scope, args.get(0));
1074 if (!self.isBytesIterator()) {
1075 return thread->raiseRequiresType(self, ID(bytes_iterator));
1076 }
1077 return *self;
1078}
1079
1080RawObject METH(bytes_iterator, __next__)(Thread* thread, Arguments args) {
1081 HandleScope scope(thread);
1082 Object self(&scope, args.get(0));
1083 if (!self.isBytesIterator()) {
1084 return thread->raiseRequiresType(self, ID(bytes_iterator));
1085 }
1086 BytesIterator iter(&scope, *self);
1087 Bytes underlying(&scope, iter.iterable());
1088 word index = iter.index();
1089 if (index >= underlying.length()) {
1090 return thread->raise(LayoutId::kStopIteration, NoneType::object());
1091 }
1092 iter.setIndex(index + 1);
1093 return SmallInt::fromWord(underlying.byteAt(index));
1094}
1095
1096RawObject METH(bytes_iterator, __length_hint__)(Thread* thread,
1097 Arguments args) {
1098 HandleScope scope(thread);
1099 Object self(&scope, args.get(0));
1100 if (!self.isBytesIterator()) {
1101 return thread->raiseRequiresType(self, ID(bytes_iterator));
1102 }
1103 BytesIterator iter(&scope, *self);
1104 Bytes underlying(&scope, iter.iterable());
1105 return SmallInt::fromWord(underlying.length() - iter.index());
1106}
1107
1108} // namespace py