this repo has no description
at trunk 946 lines 32 kB view raw
1// Copyright (c) Facebook, Inc. and its affiliates. (http://www.facebook.com) 2#include "builtins.h" 3#include "dict-builtins.h" 4#include "float-builtins.h" 5#include "handles.h" 6#include "objects.h" 7#include "runtime.h" 8#include "str-builtins.h" 9#include "str-intern.h" 10#include "thread.h" 11#include "unicode.h" 12#include "utils.h" 13 14namespace py { 15 16static const word kDictKeySetInitLength = 8; 17static const int kNumUEscapeChars = 4; 18 19enum class LoadsArg { 20 kString = 0, 21 kEncoding = 1, 22 kCls = 2, 23 kObjectHook = 3, 24 kParseFloat = 4, 25 kParseInt = 5, 26 kParseConstant = 6, 27 kObjectPairsHook = 7, 28 kKw = 8, 29}; 30 31struct JSONParser { 32 // Index of next byte to read. 33 word next; 34 word length; 35 Arguments args; 36 bool has_object_hook; 37 bool has_object_pairs_hook; 38 bool has_parse_constant; 39 bool has_parse_float; 40 bool has_parse_int; 41 bool strict; 42}; 43 44static NEVER_INLINE RawObject callObjectHook(Thread* thread, JSONParser* env, 45 const Object& dict) { 46 HandleScope scope(thread); 47 DCHECK(dict.isDict(), "expected dict"); 48 if (env->has_object_pairs_hook) { 49 Object hook(&scope, 50 env->args.get(static_cast<word>(LoadsArg::kObjectPairsHook))); 51 Object items(&scope, thread->invokeMethod1(dict, ID(items))); 52 if (items.isErrorException()) return *items; 53 Object list_type(&scope, thread->runtime()->typeAt(LayoutId::kList)); 54 Object list(&scope, Interpreter::call1(thread, list_type, items)); 55 if (list.isErrorException()) return *list; 56 return Interpreter::call1(thread, hook, list); 57 } 58 Object hook(&scope, env->args.get(static_cast<word>(LoadsArg::kObjectHook))); 59 return Interpreter::call1(thread, hook, dict); 60} 61 62static NEVER_INLINE int callParseConstant(Thread* thread, JSONParser* env, 63 const DataArray& data, word length, 64 Object* value_out) { 65 HandleScope scope(thread); 66 Object hook(&scope, 67 env->args.get(static_cast<word>(LoadsArg::kParseConstant))); 68 Str string(&scope, dataArraySubstr(thread, data, env->next - length, length)); 69 *value_out = Interpreter::call1(thread, hook, string); 70 if (value_out->isErrorException()) return -1; 71 return 0; 72} 73 74static NEVER_INLINE RawObject callParseFloat(Thread* thread, JSONParser* env, 75 const DataArray& data, word begin, 76 word length) { 77 HandleScope scope(thread); 78 Object hook(&scope, env->args.get(static_cast<word>(LoadsArg::kParseFloat))); 79 Object str(&scope, dataArraySubstr(thread, data, begin, length)); 80 return Interpreter::call1(thread, hook, str); 81} 82 83static NEVER_INLINE RawObject callParseInt(Thread* thread, JSONParser* env, 84 const DataArray& data, word begin) { 85 HandleScope scope(thread); 86 Object hook(&scope, env->args.get(static_cast<word>(LoadsArg::kParseInt))); 87 Object str(&scope, dataArraySubstr(thread, data, begin, env->next - begin)); 88 return Interpreter::call1(thread, hook, str); 89} 90 91static byte nextNonWhitespace(Thread*, JSONParser* env, const DataArray& data) { 92 word next = env->next; 93 word length = env->length; 94 byte b; 95 do { 96 if (next >= length) { 97 // Set `next` to `length + 1` to indicate EOF (end of file). 98 env->next = length + 1; 99 return 0; 100 } 101 b = data.byteAt(next++); 102 } while (b == ' ' || b == '\t' || b == '\n' || b == '\r'); 103 env->next = next; 104 return b; 105} 106 107static NEVER_INLINE RawObject raiseJSONDecodeError(Thread* thread, 108 JSONParser* env, 109 const DataArray& data, 110 word index, 111 const char* msg) { 112 HandleScope scope(thread); 113 Runtime* runtime = thread->runtime(); 114 Object json_decode_error(&scope, runtime->lookupNameInModule( 115 thread, ID(_json), ID(JSONDecodeError))); 116 CHECK(json_decode_error.isType(), "_json.JSONDecodeError not found"); 117 118 // TODO(T81331502): Add helper function for byte offset to code point index 119 // translation. 120 word pos = 0; 121 for (word i = 0, cp_length; i < index; i += cp_length) { 122 data.codePointAt(i, &cp_length); 123 pos++; 124 } 125 126 // Convert byte position to codepoint. 127 Object msg_str(&scope, runtime->newStrFromCStr(msg)); 128 Object doc(&scope, env->args.get(static_cast<word>(LoadsArg::kString))); 129 Object pos_obj(&scope, runtime->newInt(pos)); 130 Object args(&scope, runtime->newTupleWith3(msg_str, doc, pos_obj)); 131 return thread->raiseWithType(*json_decode_error, *args); 132} 133 134// Given a bytes object, search for UTF byte order marks (BOMs). If there are 135// none apply heuristics to detect UTF-32, UTF-16 and UTF-8 encodings in big or 136// little endian. Inputs that are UTF-32 or UTF-16 are decoded and a `str` 137// object is returned; UTF-8 inputs are returned unchanged with `next` possibly 138// incremented to skip a BOM. 139static RawObject maybeDecode(Thread* thread, const Object& s, 140 const Bytes& bytes, word length, word* next) { 141 // Cannot guess with just 0 or 1 bytes. Assume it's UTF-8. 142 if (length < 2) return *bytes; 143 144 // Search for BOM sequences. If there are none, search for `0` bytes which 145 // are a strong sign for the high bits of UTF-16/UTF-32 encodings, since 146 // legal JSON must start with an ASCII character with high byte(s) zero. 147 // The code looks at the first 2 bytes to detect UTF-16 and the first 4 148 // bytes to detect UTF-32. 149 const char* encoding; 150 byte b0 = bytes.byteAt(0); 151 byte b1 = bytes.byteAt(1); 152 if (b0 == UTF8::kBOM[0] && b1 == UTF8::kBOM[1] && length >= 3 && 153 bytes.byteAt(2) == UTF8::kBOM[2]) { 154 *next += 3; 155 return *bytes; 156 } 157 if (b0 == UTF32::kBOMLittleEndian[0] && b1 == UTF32::kBOMLittleEndian[1] && 158 length >= 4 && bytes.byteAt(2) == UTF32::kBOMLittleEndian[2] && 159 bytes.byteAt(3) == UTF32::kBOMLittleEndian[3]) { 160 encoding = "utf-32"; 161 } else if (b0 == UTF32::kBOMBigEndian[0] && b1 == UTF32::kBOMBigEndian[1] && 162 length >= 4 && bytes.byteAt(2) == UTF32::kBOMBigEndian[2] && 163 bytes.byteAt(3) == UTF32::kBOMBigEndian[3]) { 164 encoding = "utf-32"; 165 } else if (b0 == UTF16::kBOMLittleEndian[0] && 166 b1 == UTF16::kBOMLittleEndian[1]) { 167 encoding = "utf-16"; 168 } else if (b0 == UTF16::kBOMBigEndian[0] && b1 == UTF16::kBOMBigEndian[1]) { 169 encoding = "utf-16"; 170 } else if (b0 == 0) { 171 if (b1 == 0 && length >= 4) { 172 encoding = "utf-32-be"; 173 } else { 174 encoding = "utf-16-be"; 175 } 176 } else if (b1 == 0) { 177 DCHECK(b0 != 0, "Expected b0 != 0"); 178 if (length >= 4 && bytes.byteAt(2) == 0 && bytes.byteAt(3) == 0) { 179 encoding = "utf-32-le"; 180 } else { 181 encoding = "utf-16-le"; 182 } 183 } else { 184 // Default to UTF-8 which the decoder handles naturally. 185 return *bytes; 186 } 187 188 HandleScope scope(thread); 189 Object encoding_str(&scope, Runtime::internStrFromCStr(thread, encoding)); 190 Object errors(&scope, Runtime::internStrFromCStr(thread, "surrogatepass")); 191 return thread->invokeFunction3(ID(_codecs), ID(decode), s, encoding_str, 192 errors); 193} 194 195static RawObject scanEscapeSequence(Thread* thread, JSONParser* env, 196 const DataArray& data, word begin) { 197 word next = env->next; 198 word length = env->length; 199 if (next >= length) { 200 return raiseJSONDecodeError(thread, env, data, begin - 1, 201 "Unterminated string starting at"); 202 } 203 byte ascii_result; 204 byte b = data.byteAt(next++); 205 switch (b) { 206 case '"': 207 case '\\': 208 case '/': 209 ascii_result = b; 210 break; 211 case 'b': 212 ascii_result = '\b'; 213 break; 214 case 'f': 215 ascii_result = '\f'; 216 break; 217 case 'n': 218 ascii_result = '\n'; 219 break; 220 case 'r': 221 ascii_result = '\r'; 222 break; 223 case 't': 224 ascii_result = '\t'; 225 break; 226 case 'u': { 227 int32_t code_point; 228 if (next >= length - kNumUEscapeChars) { 229 return raiseJSONDecodeError(thread, env, data, next - 1, 230 "Invalid \\uXXXX escape"); 231 } 232 code_point = 0; 233 word end = next + kNumUEscapeChars; 234 do { 235 b = data.byteAt(next++); 236 code_point <<= kBitsPerHexDigit; 237 if ('0' <= b && b <= '9') { 238 code_point |= b - '0'; 239 } else if ('a' <= b && b <= 'f') { 240 code_point |= b - 'a' + 10; 241 } else if ('A' <= b && b <= 'F') { 242 code_point |= b - 'A' + 10; 243 } else { 244 return raiseJSONDecodeError(thread, env, data, end - kNumUEscapeChars, 245 "Invalid \\uXXXX escape"); 246 } 247 } while (next < end); 248 if (Unicode::isHighSurrogate(code_point) && 249 next < length - (kNumUEscapeChars + 2) && data.byteAt(next) == '\\' && 250 data.byteAt(next + 1) == 'u') { 251 word next2 = next + 2; 252 int32_t code_point2 = 0; 253 word end2 = next2 + kNumUEscapeChars; 254 do { 255 byte b2 = data.byteAt(next2++); 256 code_point2 <<= kBitsPerHexDigit; 257 if ('0' <= b2 && b2 <= '9') { 258 code_point2 |= b2 - '0'; 259 } else if ('a' <= b2 && b2 <= 'f') { 260 code_point2 |= b2 - 'a' + 10; 261 } else if ('A' <= b2 && b2 <= 'F') { 262 code_point2 |= b2 - 'A' + 10; 263 } else { 264 code_point2 = 0; 265 break; 266 } 267 } while (next2 < end2); 268 if (Unicode::isLowSurrogate(code_point2)) { 269 code_point = Unicode::combineSurrogates(code_point, code_point2); 270 next = end2; 271 } 272 } 273 env->next = next; 274 return SmallStr::fromCodePoint(code_point); 275 } 276 default: 277 return raiseJSONDecodeError(thread, env, data, next - 2, 278 "Invalid \\escape"); 279 } 280 env->next = next; 281 return SmallStr::fromCodePoint(ascii_result); 282} 283 284static RawObject scanFloat(Thread* thread, JSONParser* env, 285 const DataArray& data, byte b, word begin) { 286 word next = env->next; 287 word length = env->length; 288 if (b == '.') { 289 // Need at least 1 digit. 290 if (next >= length) { 291 return raiseJSONDecodeError(thread, env, data, next - 1, "Extra data"); 292 } 293 b = data.byteAt(next++); 294 if (b < '0' || b > '9') { 295 return raiseJSONDecodeError(thread, env, data, next - 2, "Extra data"); 296 } 297 // Optionally followed by more digits. 298 do { 299 if (next >= length) { 300 b = 0; 301 next++; 302 break; 303 } 304 b = data.byteAt(next++); 305 } while ('0' <= b && b <= '9'); 306 } 307 if (b == 'e' || b == 'E') { 308 word e_begin = next; 309 if (next >= length) { 310 return raiseJSONDecodeError(thread, env, data, e_begin - 1, "Extra data"); 311 } 312 b = data.byteAt(next++); 313 if (b == '+' || b == '-') { 314 if (next >= length) { 315 return raiseJSONDecodeError(thread, env, data, e_begin - 1, 316 "Extra data"); 317 } 318 b = data.byteAt(next++); 319 } 320 // Need at least 1 digit. 321 if (b < '0' || b > '9') { 322 return raiseJSONDecodeError(thread, env, data, e_begin - 1, "Extra data"); 323 } 324 // Optionally followed by more digits. 325 do { 326 if (next >= length) { 327 b = 0; 328 next++; 329 break; 330 } 331 b = data.byteAt(next++); 332 } while ('0' <= b && b <= '9'); 333 } 334 next--; 335 env->next = next; 336 337 word number_length = next - begin; 338 if (env->has_parse_float) { 339 return callParseFloat(thread, env, data, begin, number_length); 340 } 341 unique_c_ptr<byte> buf(static_cast<byte*>(std::malloc(number_length + 1))); 342 data.copyToStartAt(buf.get(), number_length, begin); 343 buf.get()[number_length] = '\0'; 344 return floatFromDigits(thread, reinterpret_cast<char*>(buf.get()), 345 number_length); 346} 347 348static RawObject scanLargeInt(Thread* thread, JSONParser* env, 349 const DataArray& data, byte b, word begin, 350 bool negative, word value) { 351 HandleScope scope(thread); 352 Runtime* runtime = thread->runtime(); 353 word next = env->next; 354 word length = env->length; 355 Int result(&scope, SmallInt::fromWord(value)); 356 Int factor(&scope, SmallInt::fromWord(SmallInt::kMaxDigits10Pow)); 357 Int value_int(&scope, SmallInt::fromWord(0)); 358 359 value = 0; 360 word digits = 0; 361 for (;;) { 362 value += b - '0'; 363 if (next >= length) break; 364 b = data.byteAt(next++); 365 if ('0' <= b && b <= '9') { 366 digits++; 367 if (digits >= SmallInt::kMaxDigits10) { 368 value_int = Int::cast(SmallInt::fromWord(value)); 369 result = runtime->intMultiply(thread, result, factor); 370 result = runtime->intAdd(thread, result, value_int); 371 digits = 0; 372 value = 0; 373 } else { 374 value *= 10; 375 } 376 continue; 377 } 378 379 if (b == '.' || b == 'e' || b == 'E') { 380 env->next = next; 381 return scanFloat(thread, env, data, b, begin); 382 } 383 384 next--; 385 break; 386 } 387 env->next = next; 388 if (env->has_parse_int) { 389 return callParseInt(thread, env, data, begin); 390 } 391 392 word f = negative ? -10 : 10; 393 for (word i = 0; i < digits; i++) { 394 f *= 10; 395 } 396 factor = Int::cast(SmallInt::fromWord(f)); 397 result = runtime->intMultiply(thread, result, factor); 398 value_int = Int::cast(SmallInt::fromWord(value)); 399 if (negative) { 400 result = runtime->intSubtract(thread, result, value_int); 401 } else { 402 result = runtime->intAdd(thread, result, value_int); 403 } 404 return *result; 405} 406 407static RawObject scanString(Thread* thread, JSONParser* env, 408 const DataArray& data) { 409 struct Segment { 410 int32_t begin_or_negative_length; 411 int32_t length_or_utf8; 412 }; 413 414 Runtime* runtime = thread->runtime(); 415 word next = env->next; 416 word length = env->length; 417 word result_length = 0; 418 Vector<Segment> segments; 419 word begin = next; 420 word segment_begin; 421 word segment_length; 422 for (;;) { 423 segment_begin = next; 424 byte b; 425 for (;;) { 426 if (next >= length) { 427 return raiseJSONDecodeError(thread, env, data, begin - 1, 428 "Unterminated string starting at"); 429 } 430 b = data.byteAt(next++); 431 if (b == '"' || b == '\\') { 432 break; 433 } 434 if (ASCII::isControlCharacter(b) && env->strict) { 435 return raiseJSONDecodeError(thread, env, data, next - 1, 436 "Invalid control character at"); 437 } 438 } 439 // Segment ends before the current `"` or `\` character. 440 segment_length = next - segment_begin - 1; 441 if (b == '"') { 442 break; 443 } 444 445 if (segment_length > 0) { 446 segments.push_back(Segment{static_cast<int32_t>(segment_begin), 447 static_cast<int32_t>(segment_length)}); 448 result_length += segment_length; 449 } 450 451 DCHECK(b == '\\', "Expected backslash"); 452 env->next = next; 453 RawObject escape_result = scanEscapeSequence(thread, env, data, begin); 454 if (escape_result.isErrorException()) return escape_result; 455 next = env->next; 456 RawSmallStr str = SmallStr::cast(escape_result); 457 word str_length = str.length(); 458 Segment segment; 459 segment.begin_or_negative_length = -str_length; 460 segment.length_or_utf8 = 0; 461 CHECK(str_length <= static_cast<word>(sizeof(segment.length_or_utf8)), 462 "encoded codepoint should fit in `length_or_utf8`"); 463 str.copyTo(reinterpret_cast<byte*>(&segment.length_or_utf8), str_length); 464 result_length += str_length; 465 segments.push_back(segment); 466 } 467 env->next = next; 468 if (segments.size() == 0) { 469 return dataArraySubstr(thread, data, segment_begin, segment_length); 470 } 471 if (segment_length > 0) { 472 segments.push_back(Segment{static_cast<int32_t>(segment_begin), 473 static_cast<int32_t>(segment_length)}); 474 result_length += segment_length; 475 } 476 HandleScope scope(thread); 477 MutableBytes result(&scope, 478 runtime->newMutableBytesUninitialized(result_length)); 479 word result_index = 0; 480 for (Segment segment : segments) { 481 word begin_or_negative_length = segment.begin_or_negative_length; 482 word length_or_utf8 = segment.length_or_utf8; 483 if (begin_or_negative_length >= 0) { 484 result.replaceFromWithStartAt(result_index, *data, length_or_utf8, 485 begin_or_negative_length); 486 result_index += length_or_utf8; 487 } else { 488 word utf8_length = -begin_or_negative_length; 489 result.replaceFromWithAll( 490 result_index, 491 View<byte>(reinterpret_cast<byte*>(&length_or_utf8), utf8_length)); 492 result_index += utf8_length; 493 } 494 } 495 DCHECK(result_index == result_length, "index/length mismatch"); 496 return result.becomeStr(); 497} 498 499static RawObject scanNumber(Thread* thread, JSONParser* env, 500 const DataArray& data, byte b) { 501 word begin = env->next - 1; 502 word next = env->next; 503 word length = env->length; 504 bool negative = (b == '-'); 505 if (negative) { 506 if (next >= length) { 507 return raiseJSONDecodeError(thread, env, data, length - 1, 508 "Expecting value"); 509 } 510 negative = true; 511 b = data.byteAt(next++); 512 if (b < '0' || b > '9') { 513 return raiseJSONDecodeError(thread, env, data, next - 2, 514 "Expecting value"); 515 } 516 } 517 if (b == '0') { 518 if (next < length) { 519 b = data.byteAt(next++); 520 if (b == '.' || b == 'e' || b == 'E') { 521 env->next = next; 522 return scanFloat(thread, env, data, b, begin); 523 } 524 next--; 525 } 526 env->next = next; 527 if (env->has_parse_int) { 528 return callParseInt(thread, env, data, begin); 529 } 530 return SmallInt::fromWord(0); 531 } 532 533 word value = 0; 534 word digits_left = SmallInt::kMaxDigits10; 535 for (;;) { 536 value += b - '0'; 537 if (next >= length) break; 538 b = data.byteAt(next++); 539 if ('0' <= b && b <= '9') { 540 digits_left--; 541 if (digits_left == 0) { 542 env->next = next; 543 return scanLargeInt(thread, env, data, b, begin, negative, value); 544 } 545 value *= 10; 546 continue; 547 } 548 549 if (b == '.' || b == 'e' || b == 'E') { 550 env->next = next; 551 return scanFloat(thread, env, data, b, begin); 552 } 553 554 next--; 555 break; 556 } 557 env->next = next; 558 if (env->has_parse_int) { 559 return callParseInt(thread, env, data, begin); 560 } 561 return SmallInt::fromWord(negative ? -value : value); 562} 563 564static int scan(Thread* thread, JSONParser* env, const DataArray& data, byte b, 565 Object* value_out) { 566 for (;;) { 567 word next = env->next; 568 word length = env->length; 569 570 switch (b) { 571 case '"': { 572 *value_out = scanString(thread, env, data); 573 if (value_out->isErrorException()) return -1; 574 return 0; 575 } 576 case '{': 577 return '{'; 578 case '[': 579 return '['; 580 581 case '-': // `-Infinity` or number 582 if (next <= length - 8 && data.byteAt(next) == 'I' && 583 data.byteAt(next + 1) == 'n' && data.byteAt(next + 2) == 'f' && 584 data.byteAt(next + 3) == 'i' && data.byteAt(next + 4) == 'n' && 585 data.byteAt(next + 5) == 'i' && data.byteAt(next + 6) == 't' && 586 data.byteAt(next + 7) == 'y') { 587 env->next = next + 8; 588 if (env->has_parse_constant) { 589 return callParseConstant(thread, env, data, 9, value_out); 590 } 591 *value_out = thread->runtime()->newFloat(-kDoubleInfinity); 592 return 0; 593 } 594 FALLTHROUGH; 595 case '0': 596 case '1': 597 case '2': 598 case '3': 599 case '4': 600 case '5': 601 case '6': 602 case '7': 603 case '8': 604 case '9': { 605 RawObject value = scanNumber(thread, env, data, b); 606 *value_out = value; 607 if (value.isErrorException()) return -1; 608 return 0; 609 } 610 611 case 'n': // `null` 612 if (next <= length - 3 && data.byteAt(next) == 'u' && 613 data.byteAt(next + 1) == 'l' && data.byteAt(next + 2) == 'l') { 614 env->next = next + 3; 615 *value_out = NoneType::object(); 616 return 0; 617 } 618 break; 619 case 't': // `true` 620 if (next <= length - 3 && data.byteAt(next) == 'r' && 621 data.byteAt(next + 1) == 'u' && data.byteAt(next + 2) == 'e') { 622 env->next = next + 3; 623 *value_out = Bool::trueObj(); 624 return 0; 625 } 626 break; 627 case 'f': // `false` 628 if (next <= length - 4 && data.byteAt(next) == 'a' && 629 data.byteAt(next + 1) == 'l' && data.byteAt(next + 2) == 's' && 630 data.byteAt(next + 3) == 'e') { 631 env->next = next + 4; 632 *value_out = Bool::falseObj(); 633 return 0; 634 } 635 break; 636 case 'N': // `NaN` 637 if (next <= length - 2 && data.byteAt(next) == 'a' && 638 data.byteAt(next + 1) == 'N') { 639 env->next = next + 2; 640 if (env->has_parse_constant) { 641 return callParseConstant(thread, env, data, 3, value_out); 642 } 643 *value_out = thread->runtime()->newFloat(kDoubleNaN); 644 return 0; 645 } 646 break; 647 case 'I': // `Infinity` 648 if (next <= length - 7 && data.byteAt(next) == 'n' && 649 data.byteAt(next + 1) == 'f' && data.byteAt(next + 2) == 'i' && 650 data.byteAt(next + 3) == 'n' && data.byteAt(next + 4) == 'i' && 651 data.byteAt(next + 5) == 't' && data.byteAt(next + 6) == 'y') { 652 env->next = next + 7; 653 if (env->has_parse_constant) { 654 return callParseConstant(thread, env, data, 8, value_out); 655 } 656 *value_out = thread->runtime()->newFloat(kDoubleInfinity); 657 return 0; 658 } 659 break; 660 default: 661 break; 662 } 663 DCHECK(b != ' ' && b != '\t' && b != '\r' && b != '\n', 664 "whitespace not skipped"); 665 if (next == 1 && b == UTF8::kBOM[0] && length >= 3 && 666 data.byteAt(1) == UTF8::kBOM[1] && data.byteAt(2) == UTF8::kBOM[2]) { 667 *value_out = 668 raiseJSONDecodeError(thread, env, data, next, 669 "Unexpected UTF-8 BOM (decode using utf-8-sig)"); 670 return -1; 671 } 672 *value_out = 673 raiseJSONDecodeError(thread, env, data, next - 1, "Expecting value"); 674 return -1; 675 } 676} 677 678static inline RawObject scanDictKey(Thread* thread, JSONParser* env, 679 const DataArray& data, byte b, 680 MutableTuple* dict_key_set, 681 word* dict_key_set_remaining) { 682 if (b != '"') { 683 return raiseJSONDecodeError( 684 thread, env, data, env->next - 1, 685 "Expecting property name enclosed in double quotes"); 686 } 687 688 HandleScope scope(thread); 689 Object dict_key(&scope, scanString(thread, env, data)); 690 if (dict_key.isErrorException()) return *dict_key; 691 692 if (dict_key.isLargeStr()) { 693 RawObject str_key_interned = NoneType::object(); 694 bool added = 695 internSetAdd(thread, **dict_key_set, dict_key, &str_key_interned); 696 dict_key = str_key_interned; 697 if (added && --(*dict_key_set_remaining) == 0) { 698 *dict_key_set = 699 internSetGrow(thread, **dict_key_set, dict_key_set_remaining); 700 } 701 } 702 703 b = nextNonWhitespace(thread, env, data); 704 if (b != ':') { 705 return raiseJSONDecodeError(thread, env, data, env->next - 1, 706 "Expecting ':' delimiter"); 707 } 708 return *dict_key; 709} 710 711static RawObject parse(Thread* thread, JSONParser* env, const DataArray& data) { 712 HandleScope scope(thread); 713 Runtime* runtime = thread->runtime(); 714 715 Object container(&scope, NoneType::object()); 716 Object dict_key(&scope, NoneType::object()); 717 Object value(&scope, NoneType::object()); 718 MutableTuple dict_key_set(&scope, 719 runtime->newMutableTuple(kDictKeySetInitLength)); 720 word dict_key_set_remaining = 721 internSetComputeRemaining(kDictKeySetInitLength); 722 byte b = nextNonWhitespace(thread, env, data); 723 for (;;) { 724 int scan_result = scan(thread, env, data, b, &value); 725 switch (scan_result) { 726 case 0: 727 // Already have a finished object. 728 b = nextNonWhitespace(thread, env, data); 729 break; 730 case '[': 731 value = runtime->newList(); 732 b = nextNonWhitespace(thread, env, data); 733 if (b != ']') { 734 if (thread->wouldStackOverflow(kPointerSize) && 735 thread->handleInterrupt(kPointerSize)) { 736 return Error::exception(); 737 } 738 thread->stackPush(*container); 739 container = *value; 740 continue; 741 } 742 b = nextNonWhitespace(thread, env, data); 743 break; 744 case '{': 745 value = runtime->newDict(); 746 b = nextNonWhitespace(thread, env, data); 747 if (b != '}') { 748 if (thread->wouldStackOverflow(2 * kPointerSize) && 749 thread->handleInterrupt(2 * kPointerSize)) { 750 return Error::exception(); 751 } 752 thread->stackPush(*container); 753 container = *value; 754 dict_key = scanDictKey(thread, env, data, b, &dict_key_set, 755 &dict_key_set_remaining); 756 if (dict_key.isErrorException()) return *dict_key; 757 b = nextNonWhitespace(thread, env, data); 758 thread->stackPush(*dict_key); 759 continue; 760 } 761 if (env->has_object_hook) { 762 value = callObjectHook(thread, env, value); 763 if (value.isErrorException()) return *value; 764 } 765 b = nextNonWhitespace(thread, env, data); 766 break; 767 default: 768 DCHECK(value.isErrorException(), "expected error raised"); 769 return *value; 770 } 771 772 for (;;) { 773 // We finished reading the object `value`. Add it to the outer container 774 // or return if there is no container left. 775 776 if (container.isList()) { 777 List list(&scope, *container); 778 runtime->listAdd(thread, list, value); 779 if (b == ',') { 780 b = nextNonWhitespace(thread, env, data); 781 break; 782 } 783 if (b == ']') { 784 value = *container; 785 container = thread->stackPop(); 786 b = nextNonWhitespace(thread, env, data); 787 continue; 788 } 789 return raiseJSONDecodeError(thread, env, data, env->next - 1, 790 "Expecting ',' delimiter"); 791 } 792 793 if (container.isDict()) { 794 Dict dict(&scope, *container); 795 dict_key = thread->stackPop(); 796 dictAtPutByStr(thread, dict, dict_key, value); 797 if (b == ',') { 798 b = nextNonWhitespace(thread, env, data); 799 dict_key = scanDictKey(thread, env, data, b, &dict_key_set, 800 &dict_key_set_remaining); 801 if (dict_key.isErrorException()) return *dict_key; 802 b = nextNonWhitespace(thread, env, data); 803 thread->stackPush(*dict_key); 804 break; 805 } 806 if (b == '}') { 807 value = *container; 808 container = thread->stackPop(); 809 b = nextNonWhitespace(thread, env, data); 810 811 if (env->has_object_hook) { 812 value = callObjectHook(thread, env, value); 813 if (value.isErrorException()) return *value; 814 } 815 continue; 816 } 817 return raiseJSONDecodeError(thread, env, data, env->next - 1, 818 "Expecting ',' delimiter"); 819 } 820 821 DCHECK(container.isNoneType(), "expected no container"); 822 if (env->next <= env->length) { 823 return raiseJSONDecodeError(thread, env, data, env->next - 1, 824 "Extra data"); 825 } 826 return *value; 827 } 828 } 829} 830 831RawObject FUNC(_json, loads)(Thread* thread, Arguments args) { 832 HandleScope scope(thread); 833 Runtime* runtime = thread->runtime(); 834 DataArray data(&scope, runtime->emptyMutableBytes()); 835 Object s(&scope, args.get(static_cast<word>(LoadsArg::kString))); 836 word length; 837 word next = 0; 838 if (runtime->isInstanceOfStr(*s)) { 839 s = strUnderlying(*s); 840 length = Str::cast(*s).length(); 841 } else if (runtime->isInstanceOfBytes(*s)) { 842 Bytes bytes(&scope, bytesUnderlying(*s)); 843 length = bytes.length(); 844 s = maybeDecode(thread, s, bytes, length, &next); 845 if (s.isErrorException()) return *s; 846 if (s == bytes) { 847 if (bytes.isSmallBytes()) { 848 MutableBytes copy(&scope, 849 runtime->newMutableBytesUninitialized(length)); 850 copy.replaceFromWithBytes(0, *bytes, length); 851 data = *copy; 852 } else { 853 data = LargeBytes::cast(*bytes); 854 } 855 } else { 856 CHECK(s.isStr(), "expected str return from decoder"); 857 length = Str::cast(*s).length(); 858 } 859 } else if (runtime->isInstanceOfBytearray(*s)) { 860 Bytearray array(&scope, *s); 861 Bytes items(&scope, array.items()); 862 length = array.numItems(); 863 s = maybeDecode(thread, s, items, length, &next); 864 if (s.isErrorException()) return *s; 865 if (s == items) { 866 data = MutableBytes::cast(*items); 867 } else { 868 CHECK(s.isStr(), "expected str return from decoder"); 869 length = Str::cast(*s).length(); 870 } 871 } else { 872 return thread->raiseWithFmt( 873 LayoutId::kTypeError, 874 "the JSON object must be str, bytes or bytearray, not %T", &s); 875 } 876 877 if (s.isSmallStr()) { 878 DCHECK(length == SmallStr::cast(*s).length(), "length mismatch"); 879 MutableBytes copy(&scope, runtime->newMutableBytesUninitialized(length)); 880 copy.replaceFromWithStr(0, Str::cast(*s), length); 881 data = *copy; 882 } else if (s.isLargeStr()) { 883 DCHECK(length == LargeStr::cast(*s).length(), "length mismatch"); 884 data = LargeStr::cast(*s); 885 } 886 887 Dict kw(&scope, args.get(static_cast<word>(LoadsArg::kKw))); 888 Object strict_obj(&scope, dictAtById(thread, kw, ID(strict))); 889 bool strict; 890 bool had_strict = false; 891 if (!strict_obj.isErrorNotFound()) { 892 if (!runtime->isInstanceOfInt(*strict_obj)) { 893 return thread->raiseRequiresType(strict_obj, ID(int)); 894 } 895 had_strict = true; 896 strict = !intUnderlying(*strict_obj).isZero(); 897 } else { 898 strict = true; 899 } 900 901 Object cls(&scope, args.get(static_cast<word>(LoadsArg::kCls))); 902 if (!cls.isNoneType() || kw.numItems() > static_cast<word>(had_strict)) { 903 Object function(&scope, runtime->lookupNameInModule(thread, ID(_json), 904 ID(_decode_with_cls))); 905 CHECK(!function.isErrorNotFound(), "missing function in internal module"); 906 thread->stackPush(*function); 907 MutableTuple call_args(&scope, runtime->newMutableTuple(7)); 908 call_args.atPut(0, *s); 909 call_args.atPut(1, *cls); 910 call_args.atPut(2, args.get(static_cast<word>(LoadsArg::kObjectHook))); 911 call_args.atPut(3, args.get(static_cast<word>(LoadsArg::kParseFloat))); 912 call_args.atPut(4, args.get(static_cast<word>(LoadsArg::kParseInt))); 913 call_args.atPut(5, args.get(static_cast<word>(LoadsArg::kParseConstant))); 914 call_args.atPut(6, args.get(static_cast<word>(LoadsArg::kObjectPairsHook))); 915 thread->stackPush(call_args.becomeImmutable()); 916 thread->stackPush(*kw); 917 return Interpreter::callEx(thread, CallFunctionExFlag::VAR_KEYWORDS); 918 } 919 920 JSONParser env; 921 memset(&env, 0, sizeof(env)); 922 env.next = next; 923 env.length = length; 924 env.args = args; 925 env.strict = strict; 926 927 if (!args.get(static_cast<word>(LoadsArg::kObjectHook)).isNoneType()) { 928 env.has_object_hook = true; 929 } 930 if (!args.get(static_cast<word>(LoadsArg::kParseFloat)).isNoneType()) { 931 env.has_parse_float = true; 932 } 933 if (!args.get(static_cast<word>(LoadsArg::kParseInt)).isNoneType()) { 934 env.has_parse_int = true; 935 } 936 if (!args.get(static_cast<word>(LoadsArg::kParseConstant)).isNoneType()) { 937 env.has_parse_constant = true; 938 } 939 if (!args.get(static_cast<word>(LoadsArg::kObjectPairsHook)).isNoneType()) { 940 env.has_object_hook = true; 941 env.has_object_pairs_hook = true; 942 } 943 return parse(thread, &env, data); 944} 945 946} // namespace py