this repo has no description
1// Copyright (c) Facebook, Inc. and its affiliates. (http://www.facebook.com)
2#include "builtins.h"
3#include "dict-builtins.h"
4#include "float-builtins.h"
5#include "handles.h"
6#include "objects.h"
7#include "runtime.h"
8#include "str-builtins.h"
9#include "str-intern.h"
10#include "thread.h"
11#include "unicode.h"
12#include "utils.h"
13
14namespace py {
15
16static const word kDictKeySetInitLength = 8;
17static const int kNumUEscapeChars = 4;
18
19enum class LoadsArg {
20 kString = 0,
21 kEncoding = 1,
22 kCls = 2,
23 kObjectHook = 3,
24 kParseFloat = 4,
25 kParseInt = 5,
26 kParseConstant = 6,
27 kObjectPairsHook = 7,
28 kKw = 8,
29};
30
31struct JSONParser {
32 // Index of next byte to read.
33 word next;
34 word length;
35 Arguments args;
36 bool has_object_hook;
37 bool has_object_pairs_hook;
38 bool has_parse_constant;
39 bool has_parse_float;
40 bool has_parse_int;
41 bool strict;
42};
43
44static NEVER_INLINE RawObject callObjectHook(Thread* thread, JSONParser* env,
45 const Object& dict) {
46 HandleScope scope(thread);
47 DCHECK(dict.isDict(), "expected dict");
48 if (env->has_object_pairs_hook) {
49 Object hook(&scope,
50 env->args.get(static_cast<word>(LoadsArg::kObjectPairsHook)));
51 Object items(&scope, thread->invokeMethod1(dict, ID(items)));
52 if (items.isErrorException()) return *items;
53 Object list_type(&scope, thread->runtime()->typeAt(LayoutId::kList));
54 Object list(&scope, Interpreter::call1(thread, list_type, items));
55 if (list.isErrorException()) return *list;
56 return Interpreter::call1(thread, hook, list);
57 }
58 Object hook(&scope, env->args.get(static_cast<word>(LoadsArg::kObjectHook)));
59 return Interpreter::call1(thread, hook, dict);
60}
61
62static NEVER_INLINE int callParseConstant(Thread* thread, JSONParser* env,
63 const DataArray& data, word length,
64 Object* value_out) {
65 HandleScope scope(thread);
66 Object hook(&scope,
67 env->args.get(static_cast<word>(LoadsArg::kParseConstant)));
68 Str string(&scope, dataArraySubstr(thread, data, env->next - length, length));
69 *value_out = Interpreter::call1(thread, hook, string);
70 if (value_out->isErrorException()) return -1;
71 return 0;
72}
73
74static NEVER_INLINE RawObject callParseFloat(Thread* thread, JSONParser* env,
75 const DataArray& data, word begin,
76 word length) {
77 HandleScope scope(thread);
78 Object hook(&scope, env->args.get(static_cast<word>(LoadsArg::kParseFloat)));
79 Object str(&scope, dataArraySubstr(thread, data, begin, length));
80 return Interpreter::call1(thread, hook, str);
81}
82
83static NEVER_INLINE RawObject callParseInt(Thread* thread, JSONParser* env,
84 const DataArray& data, word begin) {
85 HandleScope scope(thread);
86 Object hook(&scope, env->args.get(static_cast<word>(LoadsArg::kParseInt)));
87 Object str(&scope, dataArraySubstr(thread, data, begin, env->next - begin));
88 return Interpreter::call1(thread, hook, str);
89}
90
91static byte nextNonWhitespace(Thread*, JSONParser* env, const DataArray& data) {
92 word next = env->next;
93 word length = env->length;
94 byte b;
95 do {
96 if (next >= length) {
97 // Set `next` to `length + 1` to indicate EOF (end of file).
98 env->next = length + 1;
99 return 0;
100 }
101 b = data.byteAt(next++);
102 } while (b == ' ' || b == '\t' || b == '\n' || b == '\r');
103 env->next = next;
104 return b;
105}
106
107static NEVER_INLINE RawObject raiseJSONDecodeError(Thread* thread,
108 JSONParser* env,
109 const DataArray& data,
110 word index,
111 const char* msg) {
112 HandleScope scope(thread);
113 Runtime* runtime = thread->runtime();
114 Object json_decode_error(&scope, runtime->lookupNameInModule(
115 thread, ID(_json), ID(JSONDecodeError)));
116 CHECK(json_decode_error.isType(), "_json.JSONDecodeError not found");
117
118 // TODO(T81331502): Add helper function for byte offset to code point index
119 // translation.
120 word pos = 0;
121 for (word i = 0, cp_length; i < index; i += cp_length) {
122 data.codePointAt(i, &cp_length);
123 pos++;
124 }
125
126 // Convert byte position to codepoint.
127 Object msg_str(&scope, runtime->newStrFromCStr(msg));
128 Object doc(&scope, env->args.get(static_cast<word>(LoadsArg::kString)));
129 Object pos_obj(&scope, runtime->newInt(pos));
130 Object args(&scope, runtime->newTupleWith3(msg_str, doc, pos_obj));
131 return thread->raiseWithType(*json_decode_error, *args);
132}
133
134// Given a bytes object, search for UTF byte order marks (BOMs). If there are
135// none apply heuristics to detect UTF-32, UTF-16 and UTF-8 encodings in big or
136// little endian. Inputs that are UTF-32 or UTF-16 are decoded and a `str`
137// object is returned; UTF-8 inputs are returned unchanged with `next` possibly
138// incremented to skip a BOM.
139static RawObject maybeDecode(Thread* thread, const Object& s,
140 const Bytes& bytes, word length, word* next) {
141 // Cannot guess with just 0 or 1 bytes. Assume it's UTF-8.
142 if (length < 2) return *bytes;
143
144 // Search for BOM sequences. If there are none, search for `0` bytes which
145 // are a strong sign for the high bits of UTF-16/UTF-32 encodings, since
146 // legal JSON must start with an ASCII character with high byte(s) zero.
147 // The code looks at the first 2 bytes to detect UTF-16 and the first 4
148 // bytes to detect UTF-32.
149 const char* encoding;
150 byte b0 = bytes.byteAt(0);
151 byte b1 = bytes.byteAt(1);
152 if (b0 == UTF8::kBOM[0] && b1 == UTF8::kBOM[1] && length >= 3 &&
153 bytes.byteAt(2) == UTF8::kBOM[2]) {
154 *next += 3;
155 return *bytes;
156 }
157 if (b0 == UTF32::kBOMLittleEndian[0] && b1 == UTF32::kBOMLittleEndian[1] &&
158 length >= 4 && bytes.byteAt(2) == UTF32::kBOMLittleEndian[2] &&
159 bytes.byteAt(3) == UTF32::kBOMLittleEndian[3]) {
160 encoding = "utf-32";
161 } else if (b0 == UTF32::kBOMBigEndian[0] && b1 == UTF32::kBOMBigEndian[1] &&
162 length >= 4 && bytes.byteAt(2) == UTF32::kBOMBigEndian[2] &&
163 bytes.byteAt(3) == UTF32::kBOMBigEndian[3]) {
164 encoding = "utf-32";
165 } else if (b0 == UTF16::kBOMLittleEndian[0] &&
166 b1 == UTF16::kBOMLittleEndian[1]) {
167 encoding = "utf-16";
168 } else if (b0 == UTF16::kBOMBigEndian[0] && b1 == UTF16::kBOMBigEndian[1]) {
169 encoding = "utf-16";
170 } else if (b0 == 0) {
171 if (b1 == 0 && length >= 4) {
172 encoding = "utf-32-be";
173 } else {
174 encoding = "utf-16-be";
175 }
176 } else if (b1 == 0) {
177 DCHECK(b0 != 0, "Expected b0 != 0");
178 if (length >= 4 && bytes.byteAt(2) == 0 && bytes.byteAt(3) == 0) {
179 encoding = "utf-32-le";
180 } else {
181 encoding = "utf-16-le";
182 }
183 } else {
184 // Default to UTF-8 which the decoder handles naturally.
185 return *bytes;
186 }
187
188 HandleScope scope(thread);
189 Object encoding_str(&scope, Runtime::internStrFromCStr(thread, encoding));
190 Object errors(&scope, Runtime::internStrFromCStr(thread, "surrogatepass"));
191 return thread->invokeFunction3(ID(_codecs), ID(decode), s, encoding_str,
192 errors);
193}
194
195static RawObject scanEscapeSequence(Thread* thread, JSONParser* env,
196 const DataArray& data, word begin) {
197 word next = env->next;
198 word length = env->length;
199 if (next >= length) {
200 return raiseJSONDecodeError(thread, env, data, begin - 1,
201 "Unterminated string starting at");
202 }
203 byte ascii_result;
204 byte b = data.byteAt(next++);
205 switch (b) {
206 case '"':
207 case '\\':
208 case '/':
209 ascii_result = b;
210 break;
211 case 'b':
212 ascii_result = '\b';
213 break;
214 case 'f':
215 ascii_result = '\f';
216 break;
217 case 'n':
218 ascii_result = '\n';
219 break;
220 case 'r':
221 ascii_result = '\r';
222 break;
223 case 't':
224 ascii_result = '\t';
225 break;
226 case 'u': {
227 int32_t code_point;
228 if (next >= length - kNumUEscapeChars) {
229 return raiseJSONDecodeError(thread, env, data, next - 1,
230 "Invalid \\uXXXX escape");
231 }
232 code_point = 0;
233 word end = next + kNumUEscapeChars;
234 do {
235 b = data.byteAt(next++);
236 code_point <<= kBitsPerHexDigit;
237 if ('0' <= b && b <= '9') {
238 code_point |= b - '0';
239 } else if ('a' <= b && b <= 'f') {
240 code_point |= b - 'a' + 10;
241 } else if ('A' <= b && b <= 'F') {
242 code_point |= b - 'A' + 10;
243 } else {
244 return raiseJSONDecodeError(thread, env, data, end - kNumUEscapeChars,
245 "Invalid \\uXXXX escape");
246 }
247 } while (next < end);
248 if (Unicode::isHighSurrogate(code_point) &&
249 next < length - (kNumUEscapeChars + 2) && data.byteAt(next) == '\\' &&
250 data.byteAt(next + 1) == 'u') {
251 word next2 = next + 2;
252 int32_t code_point2 = 0;
253 word end2 = next2 + kNumUEscapeChars;
254 do {
255 byte b2 = data.byteAt(next2++);
256 code_point2 <<= kBitsPerHexDigit;
257 if ('0' <= b2 && b2 <= '9') {
258 code_point2 |= b2 - '0';
259 } else if ('a' <= b2 && b2 <= 'f') {
260 code_point2 |= b2 - 'a' + 10;
261 } else if ('A' <= b2 && b2 <= 'F') {
262 code_point2 |= b2 - 'A' + 10;
263 } else {
264 code_point2 = 0;
265 break;
266 }
267 } while (next2 < end2);
268 if (Unicode::isLowSurrogate(code_point2)) {
269 code_point = Unicode::combineSurrogates(code_point, code_point2);
270 next = end2;
271 }
272 }
273 env->next = next;
274 return SmallStr::fromCodePoint(code_point);
275 }
276 default:
277 return raiseJSONDecodeError(thread, env, data, next - 2,
278 "Invalid \\escape");
279 }
280 env->next = next;
281 return SmallStr::fromCodePoint(ascii_result);
282}
283
284static RawObject scanFloat(Thread* thread, JSONParser* env,
285 const DataArray& data, byte b, word begin) {
286 word next = env->next;
287 word length = env->length;
288 if (b == '.') {
289 // Need at least 1 digit.
290 if (next >= length) {
291 return raiseJSONDecodeError(thread, env, data, next - 1, "Extra data");
292 }
293 b = data.byteAt(next++);
294 if (b < '0' || b > '9') {
295 return raiseJSONDecodeError(thread, env, data, next - 2, "Extra data");
296 }
297 // Optionally followed by more digits.
298 do {
299 if (next >= length) {
300 b = 0;
301 next++;
302 break;
303 }
304 b = data.byteAt(next++);
305 } while ('0' <= b && b <= '9');
306 }
307 if (b == 'e' || b == 'E') {
308 word e_begin = next;
309 if (next >= length) {
310 return raiseJSONDecodeError(thread, env, data, e_begin - 1, "Extra data");
311 }
312 b = data.byteAt(next++);
313 if (b == '+' || b == '-') {
314 if (next >= length) {
315 return raiseJSONDecodeError(thread, env, data, e_begin - 1,
316 "Extra data");
317 }
318 b = data.byteAt(next++);
319 }
320 // Need at least 1 digit.
321 if (b < '0' || b > '9') {
322 return raiseJSONDecodeError(thread, env, data, e_begin - 1, "Extra data");
323 }
324 // Optionally followed by more digits.
325 do {
326 if (next >= length) {
327 b = 0;
328 next++;
329 break;
330 }
331 b = data.byteAt(next++);
332 } while ('0' <= b && b <= '9');
333 }
334 next--;
335 env->next = next;
336
337 word number_length = next - begin;
338 if (env->has_parse_float) {
339 return callParseFloat(thread, env, data, begin, number_length);
340 }
341 unique_c_ptr<byte> buf(static_cast<byte*>(std::malloc(number_length + 1)));
342 data.copyToStartAt(buf.get(), number_length, begin);
343 buf.get()[number_length] = '\0';
344 return floatFromDigits(thread, reinterpret_cast<char*>(buf.get()),
345 number_length);
346}
347
348static RawObject scanLargeInt(Thread* thread, JSONParser* env,
349 const DataArray& data, byte b, word begin,
350 bool negative, word value) {
351 HandleScope scope(thread);
352 Runtime* runtime = thread->runtime();
353 word next = env->next;
354 word length = env->length;
355 Int result(&scope, SmallInt::fromWord(value));
356 Int factor(&scope, SmallInt::fromWord(SmallInt::kMaxDigits10Pow));
357 Int value_int(&scope, SmallInt::fromWord(0));
358
359 value = 0;
360 word digits = 0;
361 for (;;) {
362 value += b - '0';
363 if (next >= length) break;
364 b = data.byteAt(next++);
365 if ('0' <= b && b <= '9') {
366 digits++;
367 if (digits >= SmallInt::kMaxDigits10) {
368 value_int = Int::cast(SmallInt::fromWord(value));
369 result = runtime->intMultiply(thread, result, factor);
370 result = runtime->intAdd(thread, result, value_int);
371 digits = 0;
372 value = 0;
373 } else {
374 value *= 10;
375 }
376 continue;
377 }
378
379 if (b == '.' || b == 'e' || b == 'E') {
380 env->next = next;
381 return scanFloat(thread, env, data, b, begin);
382 }
383
384 next--;
385 break;
386 }
387 env->next = next;
388 if (env->has_parse_int) {
389 return callParseInt(thread, env, data, begin);
390 }
391
392 word f = negative ? -10 : 10;
393 for (word i = 0; i < digits; i++) {
394 f *= 10;
395 }
396 factor = Int::cast(SmallInt::fromWord(f));
397 result = runtime->intMultiply(thread, result, factor);
398 value_int = Int::cast(SmallInt::fromWord(value));
399 if (negative) {
400 result = runtime->intSubtract(thread, result, value_int);
401 } else {
402 result = runtime->intAdd(thread, result, value_int);
403 }
404 return *result;
405}
406
407static RawObject scanString(Thread* thread, JSONParser* env,
408 const DataArray& data) {
409 struct Segment {
410 int32_t begin_or_negative_length;
411 int32_t length_or_utf8;
412 };
413
414 Runtime* runtime = thread->runtime();
415 word next = env->next;
416 word length = env->length;
417 word result_length = 0;
418 Vector<Segment> segments;
419 word begin = next;
420 word segment_begin;
421 word segment_length;
422 for (;;) {
423 segment_begin = next;
424 byte b;
425 for (;;) {
426 if (next >= length) {
427 return raiseJSONDecodeError(thread, env, data, begin - 1,
428 "Unterminated string starting at");
429 }
430 b = data.byteAt(next++);
431 if (b == '"' || b == '\\') {
432 break;
433 }
434 if (ASCII::isControlCharacter(b) && env->strict) {
435 return raiseJSONDecodeError(thread, env, data, next - 1,
436 "Invalid control character at");
437 }
438 }
439 // Segment ends before the current `"` or `\` character.
440 segment_length = next - segment_begin - 1;
441 if (b == '"') {
442 break;
443 }
444
445 if (segment_length > 0) {
446 segments.push_back(Segment{static_cast<int32_t>(segment_begin),
447 static_cast<int32_t>(segment_length)});
448 result_length += segment_length;
449 }
450
451 DCHECK(b == '\\', "Expected backslash");
452 env->next = next;
453 RawObject escape_result = scanEscapeSequence(thread, env, data, begin);
454 if (escape_result.isErrorException()) return escape_result;
455 next = env->next;
456 RawSmallStr str = SmallStr::cast(escape_result);
457 word str_length = str.length();
458 Segment segment;
459 segment.begin_or_negative_length = -str_length;
460 segment.length_or_utf8 = 0;
461 CHECK(str_length <= static_cast<word>(sizeof(segment.length_or_utf8)),
462 "encoded codepoint should fit in `length_or_utf8`");
463 str.copyTo(reinterpret_cast<byte*>(&segment.length_or_utf8), str_length);
464 result_length += str_length;
465 segments.push_back(segment);
466 }
467 env->next = next;
468 if (segments.size() == 0) {
469 return dataArraySubstr(thread, data, segment_begin, segment_length);
470 }
471 if (segment_length > 0) {
472 segments.push_back(Segment{static_cast<int32_t>(segment_begin),
473 static_cast<int32_t>(segment_length)});
474 result_length += segment_length;
475 }
476 HandleScope scope(thread);
477 MutableBytes result(&scope,
478 runtime->newMutableBytesUninitialized(result_length));
479 word result_index = 0;
480 for (Segment segment : segments) {
481 word begin_or_negative_length = segment.begin_or_negative_length;
482 word length_or_utf8 = segment.length_or_utf8;
483 if (begin_or_negative_length >= 0) {
484 result.replaceFromWithStartAt(result_index, *data, length_or_utf8,
485 begin_or_negative_length);
486 result_index += length_or_utf8;
487 } else {
488 word utf8_length = -begin_or_negative_length;
489 result.replaceFromWithAll(
490 result_index,
491 View<byte>(reinterpret_cast<byte*>(&length_or_utf8), utf8_length));
492 result_index += utf8_length;
493 }
494 }
495 DCHECK(result_index == result_length, "index/length mismatch");
496 return result.becomeStr();
497}
498
499static RawObject scanNumber(Thread* thread, JSONParser* env,
500 const DataArray& data, byte b) {
501 word begin = env->next - 1;
502 word next = env->next;
503 word length = env->length;
504 bool negative = (b == '-');
505 if (negative) {
506 if (next >= length) {
507 return raiseJSONDecodeError(thread, env, data, length - 1,
508 "Expecting value");
509 }
510 negative = true;
511 b = data.byteAt(next++);
512 if (b < '0' || b > '9') {
513 return raiseJSONDecodeError(thread, env, data, next - 2,
514 "Expecting value");
515 }
516 }
517 if (b == '0') {
518 if (next < length) {
519 b = data.byteAt(next++);
520 if (b == '.' || b == 'e' || b == 'E') {
521 env->next = next;
522 return scanFloat(thread, env, data, b, begin);
523 }
524 next--;
525 }
526 env->next = next;
527 if (env->has_parse_int) {
528 return callParseInt(thread, env, data, begin);
529 }
530 return SmallInt::fromWord(0);
531 }
532
533 word value = 0;
534 word digits_left = SmallInt::kMaxDigits10;
535 for (;;) {
536 value += b - '0';
537 if (next >= length) break;
538 b = data.byteAt(next++);
539 if ('0' <= b && b <= '9') {
540 digits_left--;
541 if (digits_left == 0) {
542 env->next = next;
543 return scanLargeInt(thread, env, data, b, begin, negative, value);
544 }
545 value *= 10;
546 continue;
547 }
548
549 if (b == '.' || b == 'e' || b == 'E') {
550 env->next = next;
551 return scanFloat(thread, env, data, b, begin);
552 }
553
554 next--;
555 break;
556 }
557 env->next = next;
558 if (env->has_parse_int) {
559 return callParseInt(thread, env, data, begin);
560 }
561 return SmallInt::fromWord(negative ? -value : value);
562}
563
564static int scan(Thread* thread, JSONParser* env, const DataArray& data, byte b,
565 Object* value_out) {
566 for (;;) {
567 word next = env->next;
568 word length = env->length;
569
570 switch (b) {
571 case '"': {
572 *value_out = scanString(thread, env, data);
573 if (value_out->isErrorException()) return -1;
574 return 0;
575 }
576 case '{':
577 return '{';
578 case '[':
579 return '[';
580
581 case '-': // `-Infinity` or number
582 if (next <= length - 8 && data.byteAt(next) == 'I' &&
583 data.byteAt(next + 1) == 'n' && data.byteAt(next + 2) == 'f' &&
584 data.byteAt(next + 3) == 'i' && data.byteAt(next + 4) == 'n' &&
585 data.byteAt(next + 5) == 'i' && data.byteAt(next + 6) == 't' &&
586 data.byteAt(next + 7) == 'y') {
587 env->next = next + 8;
588 if (env->has_parse_constant) {
589 return callParseConstant(thread, env, data, 9, value_out);
590 }
591 *value_out = thread->runtime()->newFloat(-kDoubleInfinity);
592 return 0;
593 }
594 FALLTHROUGH;
595 case '0':
596 case '1':
597 case '2':
598 case '3':
599 case '4':
600 case '5':
601 case '6':
602 case '7':
603 case '8':
604 case '9': {
605 RawObject value = scanNumber(thread, env, data, b);
606 *value_out = value;
607 if (value.isErrorException()) return -1;
608 return 0;
609 }
610
611 case 'n': // `null`
612 if (next <= length - 3 && data.byteAt(next) == 'u' &&
613 data.byteAt(next + 1) == 'l' && data.byteAt(next + 2) == 'l') {
614 env->next = next + 3;
615 *value_out = NoneType::object();
616 return 0;
617 }
618 break;
619 case 't': // `true`
620 if (next <= length - 3 && data.byteAt(next) == 'r' &&
621 data.byteAt(next + 1) == 'u' && data.byteAt(next + 2) == 'e') {
622 env->next = next + 3;
623 *value_out = Bool::trueObj();
624 return 0;
625 }
626 break;
627 case 'f': // `false`
628 if (next <= length - 4 && data.byteAt(next) == 'a' &&
629 data.byteAt(next + 1) == 'l' && data.byteAt(next + 2) == 's' &&
630 data.byteAt(next + 3) == 'e') {
631 env->next = next + 4;
632 *value_out = Bool::falseObj();
633 return 0;
634 }
635 break;
636 case 'N': // `NaN`
637 if (next <= length - 2 && data.byteAt(next) == 'a' &&
638 data.byteAt(next + 1) == 'N') {
639 env->next = next + 2;
640 if (env->has_parse_constant) {
641 return callParseConstant(thread, env, data, 3, value_out);
642 }
643 *value_out = thread->runtime()->newFloat(kDoubleNaN);
644 return 0;
645 }
646 break;
647 case 'I': // `Infinity`
648 if (next <= length - 7 && data.byteAt(next) == 'n' &&
649 data.byteAt(next + 1) == 'f' && data.byteAt(next + 2) == 'i' &&
650 data.byteAt(next + 3) == 'n' && data.byteAt(next + 4) == 'i' &&
651 data.byteAt(next + 5) == 't' && data.byteAt(next + 6) == 'y') {
652 env->next = next + 7;
653 if (env->has_parse_constant) {
654 return callParseConstant(thread, env, data, 8, value_out);
655 }
656 *value_out = thread->runtime()->newFloat(kDoubleInfinity);
657 return 0;
658 }
659 break;
660 default:
661 break;
662 }
663 DCHECK(b != ' ' && b != '\t' && b != '\r' && b != '\n',
664 "whitespace not skipped");
665 if (next == 1 && b == UTF8::kBOM[0] && length >= 3 &&
666 data.byteAt(1) == UTF8::kBOM[1] && data.byteAt(2) == UTF8::kBOM[2]) {
667 *value_out =
668 raiseJSONDecodeError(thread, env, data, next,
669 "Unexpected UTF-8 BOM (decode using utf-8-sig)");
670 return -1;
671 }
672 *value_out =
673 raiseJSONDecodeError(thread, env, data, next - 1, "Expecting value");
674 return -1;
675 }
676}
677
678static inline RawObject scanDictKey(Thread* thread, JSONParser* env,
679 const DataArray& data, byte b,
680 MutableTuple* dict_key_set,
681 word* dict_key_set_remaining) {
682 if (b != '"') {
683 return raiseJSONDecodeError(
684 thread, env, data, env->next - 1,
685 "Expecting property name enclosed in double quotes");
686 }
687
688 HandleScope scope(thread);
689 Object dict_key(&scope, scanString(thread, env, data));
690 if (dict_key.isErrorException()) return *dict_key;
691
692 if (dict_key.isLargeStr()) {
693 RawObject str_key_interned = NoneType::object();
694 bool added =
695 internSetAdd(thread, **dict_key_set, dict_key, &str_key_interned);
696 dict_key = str_key_interned;
697 if (added && --(*dict_key_set_remaining) == 0) {
698 *dict_key_set =
699 internSetGrow(thread, **dict_key_set, dict_key_set_remaining);
700 }
701 }
702
703 b = nextNonWhitespace(thread, env, data);
704 if (b != ':') {
705 return raiseJSONDecodeError(thread, env, data, env->next - 1,
706 "Expecting ':' delimiter");
707 }
708 return *dict_key;
709}
710
711static RawObject parse(Thread* thread, JSONParser* env, const DataArray& data) {
712 HandleScope scope(thread);
713 Runtime* runtime = thread->runtime();
714
715 Object container(&scope, NoneType::object());
716 Object dict_key(&scope, NoneType::object());
717 Object value(&scope, NoneType::object());
718 MutableTuple dict_key_set(&scope,
719 runtime->newMutableTuple(kDictKeySetInitLength));
720 word dict_key_set_remaining =
721 internSetComputeRemaining(kDictKeySetInitLength);
722 byte b = nextNonWhitespace(thread, env, data);
723 for (;;) {
724 int scan_result = scan(thread, env, data, b, &value);
725 switch (scan_result) {
726 case 0:
727 // Already have a finished object.
728 b = nextNonWhitespace(thread, env, data);
729 break;
730 case '[':
731 value = runtime->newList();
732 b = nextNonWhitespace(thread, env, data);
733 if (b != ']') {
734 if (thread->wouldStackOverflow(kPointerSize) &&
735 thread->handleInterrupt(kPointerSize)) {
736 return Error::exception();
737 }
738 thread->stackPush(*container);
739 container = *value;
740 continue;
741 }
742 b = nextNonWhitespace(thread, env, data);
743 break;
744 case '{':
745 value = runtime->newDict();
746 b = nextNonWhitespace(thread, env, data);
747 if (b != '}') {
748 if (thread->wouldStackOverflow(2 * kPointerSize) &&
749 thread->handleInterrupt(2 * kPointerSize)) {
750 return Error::exception();
751 }
752 thread->stackPush(*container);
753 container = *value;
754 dict_key = scanDictKey(thread, env, data, b, &dict_key_set,
755 &dict_key_set_remaining);
756 if (dict_key.isErrorException()) return *dict_key;
757 b = nextNonWhitespace(thread, env, data);
758 thread->stackPush(*dict_key);
759 continue;
760 }
761 if (env->has_object_hook) {
762 value = callObjectHook(thread, env, value);
763 if (value.isErrorException()) return *value;
764 }
765 b = nextNonWhitespace(thread, env, data);
766 break;
767 default:
768 DCHECK(value.isErrorException(), "expected error raised");
769 return *value;
770 }
771
772 for (;;) {
773 // We finished reading the object `value`. Add it to the outer container
774 // or return if there is no container left.
775
776 if (container.isList()) {
777 List list(&scope, *container);
778 runtime->listAdd(thread, list, value);
779 if (b == ',') {
780 b = nextNonWhitespace(thread, env, data);
781 break;
782 }
783 if (b == ']') {
784 value = *container;
785 container = thread->stackPop();
786 b = nextNonWhitespace(thread, env, data);
787 continue;
788 }
789 return raiseJSONDecodeError(thread, env, data, env->next - 1,
790 "Expecting ',' delimiter");
791 }
792
793 if (container.isDict()) {
794 Dict dict(&scope, *container);
795 dict_key = thread->stackPop();
796 dictAtPutByStr(thread, dict, dict_key, value);
797 if (b == ',') {
798 b = nextNonWhitespace(thread, env, data);
799 dict_key = scanDictKey(thread, env, data, b, &dict_key_set,
800 &dict_key_set_remaining);
801 if (dict_key.isErrorException()) return *dict_key;
802 b = nextNonWhitespace(thread, env, data);
803 thread->stackPush(*dict_key);
804 break;
805 }
806 if (b == '}') {
807 value = *container;
808 container = thread->stackPop();
809 b = nextNonWhitespace(thread, env, data);
810
811 if (env->has_object_hook) {
812 value = callObjectHook(thread, env, value);
813 if (value.isErrorException()) return *value;
814 }
815 continue;
816 }
817 return raiseJSONDecodeError(thread, env, data, env->next - 1,
818 "Expecting ',' delimiter");
819 }
820
821 DCHECK(container.isNoneType(), "expected no container");
822 if (env->next <= env->length) {
823 return raiseJSONDecodeError(thread, env, data, env->next - 1,
824 "Extra data");
825 }
826 return *value;
827 }
828 }
829}
830
831RawObject FUNC(_json, loads)(Thread* thread, Arguments args) {
832 HandleScope scope(thread);
833 Runtime* runtime = thread->runtime();
834 DataArray data(&scope, runtime->emptyMutableBytes());
835 Object s(&scope, args.get(static_cast<word>(LoadsArg::kString)));
836 word length;
837 word next = 0;
838 if (runtime->isInstanceOfStr(*s)) {
839 s = strUnderlying(*s);
840 length = Str::cast(*s).length();
841 } else if (runtime->isInstanceOfBytes(*s)) {
842 Bytes bytes(&scope, bytesUnderlying(*s));
843 length = bytes.length();
844 s = maybeDecode(thread, s, bytes, length, &next);
845 if (s.isErrorException()) return *s;
846 if (s == bytes) {
847 if (bytes.isSmallBytes()) {
848 MutableBytes copy(&scope,
849 runtime->newMutableBytesUninitialized(length));
850 copy.replaceFromWithBytes(0, *bytes, length);
851 data = *copy;
852 } else {
853 data = LargeBytes::cast(*bytes);
854 }
855 } else {
856 CHECK(s.isStr(), "expected str return from decoder");
857 length = Str::cast(*s).length();
858 }
859 } else if (runtime->isInstanceOfBytearray(*s)) {
860 Bytearray array(&scope, *s);
861 Bytes items(&scope, array.items());
862 length = array.numItems();
863 s = maybeDecode(thread, s, items, length, &next);
864 if (s.isErrorException()) return *s;
865 if (s == items) {
866 data = MutableBytes::cast(*items);
867 } else {
868 CHECK(s.isStr(), "expected str return from decoder");
869 length = Str::cast(*s).length();
870 }
871 } else {
872 return thread->raiseWithFmt(
873 LayoutId::kTypeError,
874 "the JSON object must be str, bytes or bytearray, not %T", &s);
875 }
876
877 if (s.isSmallStr()) {
878 DCHECK(length == SmallStr::cast(*s).length(), "length mismatch");
879 MutableBytes copy(&scope, runtime->newMutableBytesUninitialized(length));
880 copy.replaceFromWithStr(0, Str::cast(*s), length);
881 data = *copy;
882 } else if (s.isLargeStr()) {
883 DCHECK(length == LargeStr::cast(*s).length(), "length mismatch");
884 data = LargeStr::cast(*s);
885 }
886
887 Dict kw(&scope, args.get(static_cast<word>(LoadsArg::kKw)));
888 Object strict_obj(&scope, dictAtById(thread, kw, ID(strict)));
889 bool strict;
890 bool had_strict = false;
891 if (!strict_obj.isErrorNotFound()) {
892 if (!runtime->isInstanceOfInt(*strict_obj)) {
893 return thread->raiseRequiresType(strict_obj, ID(int));
894 }
895 had_strict = true;
896 strict = !intUnderlying(*strict_obj).isZero();
897 } else {
898 strict = true;
899 }
900
901 Object cls(&scope, args.get(static_cast<word>(LoadsArg::kCls)));
902 if (!cls.isNoneType() || kw.numItems() > static_cast<word>(had_strict)) {
903 Object function(&scope, runtime->lookupNameInModule(thread, ID(_json),
904 ID(_decode_with_cls)));
905 CHECK(!function.isErrorNotFound(), "missing function in internal module");
906 thread->stackPush(*function);
907 MutableTuple call_args(&scope, runtime->newMutableTuple(7));
908 call_args.atPut(0, *s);
909 call_args.atPut(1, *cls);
910 call_args.atPut(2, args.get(static_cast<word>(LoadsArg::kObjectHook)));
911 call_args.atPut(3, args.get(static_cast<word>(LoadsArg::kParseFloat)));
912 call_args.atPut(4, args.get(static_cast<word>(LoadsArg::kParseInt)));
913 call_args.atPut(5, args.get(static_cast<word>(LoadsArg::kParseConstant)));
914 call_args.atPut(6, args.get(static_cast<word>(LoadsArg::kObjectPairsHook)));
915 thread->stackPush(call_args.becomeImmutable());
916 thread->stackPush(*kw);
917 return Interpreter::callEx(thread, CallFunctionExFlag::VAR_KEYWORDS);
918 }
919
920 JSONParser env;
921 memset(&env, 0, sizeof(env));
922 env.next = next;
923 env.length = length;
924 env.args = args;
925 env.strict = strict;
926
927 if (!args.get(static_cast<word>(LoadsArg::kObjectHook)).isNoneType()) {
928 env.has_object_hook = true;
929 }
930 if (!args.get(static_cast<word>(LoadsArg::kParseFloat)).isNoneType()) {
931 env.has_parse_float = true;
932 }
933 if (!args.get(static_cast<word>(LoadsArg::kParseInt)).isNoneType()) {
934 env.has_parse_int = true;
935 }
936 if (!args.get(static_cast<word>(LoadsArg::kParseConstant)).isNoneType()) {
937 env.has_parse_constant = true;
938 }
939 if (!args.get(static_cast<word>(LoadsArg::kObjectPairsHook)).isNoneType()) {
940 env.has_object_hook = true;
941 env.has_object_pairs_hook = true;
942 }
943 return parse(thread, &env, data);
944}
945
946} // namespace py