this repo has no description
at trunk 560 lines 16 kB view raw
1// Copyright (c) Facebook, Inc. and its affiliates. (http://www.facebook.com) 2#include "marshal.h" 3 4#include <cstdlib> 5#include <cstring> 6#include <memory> 7 8#include "handles.h" 9#include "heap.h" 10#include "modules.h" 11#include "runtime.h" 12#include "set-builtins.h" 13#include "tuple-builtins.h" 14#include "utils.h" 15#include "view.h" 16 17namespace py { 18 19enum { 20 FLAG_REF = '\x80', // with a type, add obj to index 21 TYPE_ASCII = 'a', 22 TYPE_ASCII_INTERNED = 'A', 23 TYPE_BINARY_COMPLEX = 'y', 24 TYPE_BINARY_FLOAT = 'g', 25 TYPE_CODE = 'c', 26 TYPE_COMPLEX = 'x', 27 TYPE_DICT = '{', 28 TYPE_ELLIPSIS = '.', 29 TYPE_FALSE = 'F', 30 TYPE_FLOAT = 'f', 31 TYPE_FROZENSET = '>', 32 TYPE_INTERNED = 't', 33 TYPE_INT = 'i', 34 TYPE_LIST = '[', 35 TYPE_LONG = 'l', 36 TYPE_NONE = 'N', 37 TYPE_NULL = '0', 38 TYPE_REF = 'r', 39 TYPE_SET = '<', 40 TYPE_SHORT_ASCII_INTERNED = 'Z', 41 TYPE_SHORT_ASCII = 'z', 42 TYPE_SMALL_TUPLE = ')', 43 TYPE_STOPITER = 'S', 44 TYPE_STRING = 's', 45 TYPE_TRUE = 'T', 46 TYPE_TUPLE = '(', 47 TYPE_UNICODE = 'u', 48 TYPE_UNKNOWN = '?', 49}; 50 51Marshal::Reader::Reader(HandleScope* scope, Thread* thread, View<byte> buffer) 52 : thread_(thread), 53 runtime_(thread->runtime()), 54 refs_(scope, runtime_->newList()), 55 start_(buffer.data()), 56 length_(buffer.length()), 57 pos_(0) { 58 end_ = start_ + length_; 59} 60 61RawObject Marshal::Reader::readPycHeader(const Str& filename) { 62 if (length_ - pos_ < 4) { 63 return thread_->raiseWithFmt( 64 LayoutId::kEOFError, "reached end of file while reading header of '%S'", 65 &filename); 66 } 67 int32_t magic = readLong(); 68 if (magic == kPycMagic) { 69 if (length_ - pos_ < 12) { 70 return thread_->raiseWithFmt( 71 LayoutId::kEOFError, 72 "reached end of file while reading header of '%S'", &filename); 73 } 74 readLong(); // read flags. 75 readLong(); // read source timestamp. 76 readLong(); // read source length. 77 DCHECK(pos_ == 16, "size mismatch"); 78 } else { 79 return thread_->raiseWithFmt(LayoutId::kImportError, 80 "unsupported magic number in '%S'", &filename); 81 } 82 return NoneType::object(); 83} 84 85void Marshal::Reader::setBuiltinFunctions( 86 const BuiltinFunction* builtin_functions, word num_builtin_functions, 87 const IntrinsicFunction* intrinsic_functions, 88 word num_intrinsic_functions) { 89 builtin_functions_ = builtin_functions; 90 num_builtin_functions_ = num_builtin_functions; 91 intrinsic_functions_ = intrinsic_functions; 92 num_intrinsic_functions_ = num_intrinsic_functions; 93} 94 95const byte* Marshal::Reader::readBytes(int length) { 96 const byte* result = &start_[pos_]; 97 pos_ += length; 98 return result; 99} 100 101byte Marshal::Reader::readByte() { 102 byte result = 0xFF; 103 const byte* buffer = readBytes(1); 104 if (buffer != nullptr) { 105 result = buffer[0]; 106 } 107 return result; 108} 109 110int16_t Marshal::Reader::readShort() { 111 int16_t result = -1; 112 const byte* buffer = readBytes(sizeof(result)); 113 if (buffer != nullptr) { 114 result = buffer[0]; 115 result |= buffer[1] << 8; 116 } 117 return result; 118} 119 120int32_t Marshal::Reader::readLong() { 121 int32_t result = -1; 122 const byte* buffer = readBytes(4); 123 if (buffer != nullptr) { 124 result = buffer[0]; 125 result |= buffer[1] << 8; 126 result |= buffer[2] << 16; 127 result |= buffer[3] << 24; 128 } 129 return result; 130} 131 132double Marshal::Reader::readBinaryFloat() { 133 double result; 134 const byte* buffer = readBytes(sizeof(result)); 135 std::memcpy(&result, buffer, sizeof(result)); 136 return result; 137} 138 139RawObject Marshal::Reader::readObject() { 140 byte code = readByte(); 141 byte flag = code & FLAG_REF; 142 byte type = code & ~FLAG_REF; 143 isRef_ = flag; 144 switch (type) { 145 case TYPE_NULL: 146 return SmallInt::fromWord(0); 147 148 case TYPE_NONE: 149 return NoneType::object(); 150 151 case TYPE_STOPITER: 152 UNIMPLEMENTED("TYPE_STOPITER"); 153 154 case TYPE_ELLIPSIS: 155 return runtime_->ellipsis(); 156 157 case TYPE_FALSE: 158 return Bool::falseObj(); 159 160 case TYPE_TRUE: 161 return Bool::trueObj(); 162 163 case TYPE_INT: { 164 // NB: this will continue to work as long as SmallInt can contain the 165 // full range of 32 bit signed integer values. Notably, this will break if 166 // we need to support 32 bit machines. 167 word n = readLong(); 168 if (!SmallInt::isValid(n)) { 169 UNIMPLEMENTED("value '%ld' outside range supported by RawSmallInt", n); 170 } 171 HandleScope scope(thread_); 172 Object result(&scope, SmallInt::fromWord(n)); 173 if (isRef_) { 174 addRef(result); 175 } 176 return *result; 177 } 178 179 case TYPE_FLOAT: 180 UNIMPLEMENTED("TYPE_FLOAT"); 181 182 case TYPE_BINARY_FLOAT: { 183 double n = readBinaryFloat(); 184 HandleScope scope(thread_); 185 Object result(&scope, runtime_->newFloat(n)); 186 if (isRef_) { 187 addRef(result); 188 } 189 return *result; 190 } 191 192 case TYPE_COMPLEX: 193 UNIMPLEMENTED("TYPE_COMPLEX"); 194 195 case TYPE_BINARY_COMPLEX: { 196 double real = readBinaryFloat(); 197 double imag = readBinaryFloat(); 198 HandleScope scope(thread_); 199 Object result(&scope, runtime_->newComplex(real, imag)); 200 if (isRef_) { 201 addRef(result); 202 } 203 return *result; 204 } 205 206 case TYPE_STRING: // Misnomer, should be TYPE_BYTES 207 return readTypeString(); 208 209 case TYPE_INTERNED: 210 case TYPE_ASCII_INTERNED: 211 return readTypeAsciiInterned(); 212 213 case TYPE_UNICODE: 214 case TYPE_ASCII: { 215 return readTypeAscii(); 216 } 217 218 case TYPE_SHORT_ASCII_INTERNED: 219 return readTypeShortAsciiInterned(); 220 221 case TYPE_SHORT_ASCII: 222 return readTypeShortAscii(); 223 224 case TYPE_SMALL_TUPLE: 225 return readTypeSmallTuple(); 226 227 case TYPE_TUPLE: 228 return readTypeTuple(); 229 230 case TYPE_LIST: 231 UNIMPLEMENTED("TYPE_LIST"); 232 233 case TYPE_DICT: 234 UNIMPLEMENTED("TYPE_DICT"); 235 236 case TYPE_SET: 237 return readTypeSet(); 238 239 case TYPE_FROZENSET: 240 return readTypeFrozenSet(); 241 242 case TYPE_CODE: 243 return readTypeCode(); 244 245 case TYPE_REF: 246 return readTypeRef(); 247 248 case TYPE_LONG: 249 return readLongObject(); 250 251 default: 252 UNREACHABLE("unknown type '%c' (flags=%x)", type, flag); 253 } 254 UNREACHABLE("all cases should be covered"); 255} 256 257word Marshal::Reader::addRef(const Object& value) { 258 word result = refs_.numItems(); 259 runtime_->listAdd(thread_, refs_, value); 260 return result; 261} 262 263void Marshal::Reader::setRef(word index, RawObject value) { 264 refs_.atPut(index, value); 265} 266 267RawObject Marshal::Reader::getRef(word index) { return refs_.at(index); } 268 269word Marshal::Reader::numRefs() { return refs_.numItems(); } 270 271RawObject Marshal::Reader::readTypeString() { 272 int32_t length = readLong(); 273 const byte* data = readBytes(length); 274 HandleScope scope(thread_); 275 Object result(&scope, runtime_->newBytesWithAll(View<byte>(data, length))); 276 if (isRef_) { 277 addRef(result); 278 } 279 return *result; 280} 281 282RawObject Marshal::Reader::readTypeAscii() { 283 word length = readLong(); 284 if (length < 0) { 285 return thread_->raiseWithFmt(LayoutId::kValueError, 286 "bad marshal data (string size out of range)"); 287 } 288 return readStr(length); 289} 290 291RawObject Marshal::Reader::readTypeAsciiInterned() { 292 word length = readLong(); 293 if (length < 0) { 294 return thread_->raiseWithFmt(LayoutId::kValueError, 295 "bad marshal data (string size out of range)"); 296 } 297 return readAndInternStr(length); 298} 299 300RawObject Marshal::Reader::readTypeShortAscii() { 301 word length = readByte(); 302 return readStr(length); 303} 304 305RawObject Marshal::Reader::readTypeShortAsciiInterned() { 306 word length = readByte(); 307 return readAndInternStr(length); 308} 309 310RawObject Marshal::Reader::readStr(word length) { 311 const byte* data = readBytes(length); 312 HandleScope scope(thread_); 313 Object result(&scope, runtime_->newStrWithAll(View<byte>(data, length))); 314 if (isRef_) { 315 addRef(result); 316 } 317 return *result; 318} 319 320RawObject Marshal::Reader::readAndInternStr(word length) { 321 const byte* data = readBytes(length); 322 HandleScope scope(thread_); 323 Object result(&scope, 324 Runtime::internStrFromAll(thread_, View<byte>(data, length))); 325 if (isRef_) { 326 addRef(result); 327 } 328 return *result; 329} 330 331RawObject Marshal::Reader::readTypeSmallTuple() { 332 int32_t n = readByte(); 333 return doTupleElements(n); 334} 335 336RawObject Marshal::Reader::readTypeTuple() { 337 int32_t n = readLong(); 338 return doTupleElements(n); 339} 340 341RawObject Marshal::Reader::doTupleElements(int32_t length) { 342 HandleScope scope(thread_); 343 if (length == 0) { 344 Object result(&scope, runtime_->emptyTuple()); 345 if (isRef_) { 346 addRef(result); 347 } 348 return *result; 349 } 350 MutableTuple result(&scope, runtime_->newMutableTuple(length)); 351 if (isRef_) { 352 addRef(result); 353 } 354 for (int32_t i = 0; i < length; i++) { 355 RawObject value = readObject(); 356 result.atPut(i, value); 357 } 358 return result.becomeImmutable(); 359} 360 361RawObject Marshal::Reader::readTypeSet() { 362 int32_t n = readLong(); 363 HandleScope scope(thread_); 364 Set set(&scope, runtime_->newSet()); 365 return doSetElements(n, set); 366} 367 368RawObject Marshal::Reader::readTypeFrozenSet() { 369 int32_t n = readLong(); 370 if (n == 0) { 371 return runtime_->emptyFrozenSet(); 372 } 373 HandleScope scope(thread_); 374 FrozenSet set(&scope, runtime_->newFrozenSet()); 375 return doSetElements(n, set); 376} 377 378RawObject Marshal::Reader::doSetElements(int32_t length, const SetBase& set) { 379 if (isRef_) { 380 addRef(set); 381 } 382 HandleScope scope(thread_); 383 Object value(&scope, NoneType::object()); 384 Object hash_obj(&scope, NoneType::object()); 385 for (int32_t i = 0; i < length; i++) { 386 value = readObject(); 387 hash_obj = Interpreter::hash(thread_, value); 388 DCHECK(!hash_obj.isErrorException(), "must be hashable"); 389 word hash = SmallInt::cast(*hash_obj).value(); 390 RawObject result = setAdd(thread_, set, value, hash); 391 if (result.isError()) { 392 return result; 393 } 394 } 395 return *set; 396} 397 398RawObject Marshal::Reader::readTypeCode() { 399 word index = -1; 400 HandleScope scope(thread_); 401 if (isRef_) { 402 // Reserve a reflist index 403 Object none(&scope, NoneType::object()); 404 index = addRef(none); 405 } 406 int32_t argcount = readLong(); 407 int32_t posonlyargcount = readLong(); 408 int32_t kwonlyargcount = readLong(); 409 int32_t nlocals = readLong(); 410 uint32_t stacksize = readLong(); 411 int32_t flags = readLong(); 412 CHECK(flags <= (Code::Flags::kLast << 1) - 1, "unknown flags in code object"); 413 Object code(&scope, readObject()); 414 Tuple consts(&scope, readObject()); 415 Object names(&scope, readObject()); 416 Tuple varnames(&scope, readObject()); 417 Tuple freevars(&scope, readObject()); 418 Tuple cellvars(&scope, readObject()); 419 Object filename(&scope, readObject()); 420 Object name(&scope, readObject()); 421 int32_t firstlineno = readLong(); 422 Object lnotab(&scope, readObject()); 423 424 word intrinsic_index = 0; 425 if (flags & Code::Flags::kMetadata) { 426 Object metadata_obj(&scope, consts.at(0)); 427 CHECK(metadata_obj.isTuple() && Tuple::cast(*metadata_obj).length() == 1, 428 "malformed metadata"); 429 Tuple metadata(&scope, *metadata_obj); 430 Object intrinsic(&scope, metadata.at(0)); 431 CHECK(intrinsic.isSmallInt(), "malformed intrinsic ID"); 432 intrinsic_index = SmallInt::cast(*intrinsic).value(); 433 consts = runtime_->tupleSubseq(thread_, consts, 1, consts.length() - 1); 434 } 435 436 IntrinsicFunction intrinsic = nullptr; 437 if (intrinsic_functions_ != nullptr && intrinsic_index != 0) { 438 CHECK_INDEX(intrinsic_index - 1, num_intrinsic_functions_); 439 // The intrinsic IDs are biased by 1 so that 0 means no intrinsic 440 intrinsic = intrinsic_functions_[intrinsic_index - 1]; 441 } 442 Object result(&scope, NoneType::object()); 443 if (flags & Code::Flags::kBuiltin) { 444 word function_index = stacksize; 445 CHECK(code.isBytes() && Bytes::cast(*code).length() == 0, 446 "must not have bytecode in native code"); 447 CHECK(consts.length() == 0, "consts should contain only metadata"); 448 CHECK(names.isTuple() && Tuple::cast(*names).length() == 0, 449 "must not have variables in native code"); 450 CHECK(freevars.length() == 0, "must not have free vars in native code"); 451 CHECK(cellvars.length() == 0, "must not have cell vars in native code"); 452 CHECK_INDEX(function_index, num_builtin_functions_); 453 BuiltinFunction function = builtin_functions_[function_index]; 454 result = runtime_->newBuiltinCode(argcount, posonlyargcount, kwonlyargcount, 455 flags, function, varnames, name); 456 Code::cast(*result).setFilename(*filename); 457 Code::cast(*result).setFirstlineno(firstlineno); 458 } else { 459 result = runtime_->newCode(argcount, posonlyargcount, kwonlyargcount, 460 nlocals, stacksize, flags, code, consts, names, 461 varnames, freevars, cellvars, filename, name, 462 firstlineno, lnotab); 463 } 464 Code::cast(*result).setIntrinsic(reinterpret_cast<void*>(intrinsic)); 465 if (index >= 0) { 466 setRef(index, *result); 467 } 468 return *result; 469} 470 471RawObject Marshal::Reader::readTypeRef() { 472 int32_t n = readLong(); 473 return getRef(n); 474} 475 476RawObject Marshal::Reader::readLongObject() { 477 int32_t n = readLong(); 478 if (n == 0) { 479 HandleScope scope(thread_); 480 Object zero(&scope, SmallInt::fromWord(0)); 481 if (isRef_) { 482 addRef(zero); 483 } 484 return *zero; 485 } 486 if (n < kMinInt32 || n > kMaxInt32) { 487 return thread_->raiseWithFmt(LayoutId::kValueError, 488 "bad marshal data (string size out of range)"); 489 } 490 word bits_consumed = 0; 491 word n_bits = std::abs(n) * kBitsPerLongDigit; 492 word num_digits = ((n_bits + kBitsPerWord + 1) / kBitsPerWord) + 1; 493 std::unique_ptr<uword[]> digits{new uword[num_digits]}; 494 word digits_idx = 0; 495 uword buf = 0; 496 word word_offset = 0; 497 while (bits_consumed < n_bits) { 498 int16_t digit = readShort(); 499 if (digit < 0) { 500 return thread_->raiseWithFmt(LayoutId::kValueError, 501 "bad marshal data (negative long digit)"); 502 } 503 auto unsigned_digit = static_cast<uword>(digit); 504 if (word_offset + kBitsPerLongDigit <= kBitsPerWord) { 505 buf |= unsigned_digit << word_offset; 506 word_offset += kBitsPerLongDigit; 507 if (word_offset == kBitsPerWord) { 508 digits[digits_idx++] = buf; 509 buf = 0; 510 word_offset = 0; 511 } 512 } else { 513 word extra_bits = (word_offset + kBitsPerLongDigit) % kBitsPerWord; 514 word bits_to_include = kBitsPerLongDigit - extra_bits; 515 buf |= (unsigned_digit & ((1 << bits_to_include) - 1)) << word_offset; 516 digits[digits_idx++] = buf; 517 buf = (unsigned_digit >> bits_to_include) & ((1 << extra_bits) - 1); 518 word_offset = extra_bits; 519 } 520 bits_consumed += kBitsPerLongDigit; 521 } 522 if (word_offset > 0 && buf != 0) { 523 digits[digits_idx++] = buf; 524 } else if (n > 0 && (digits[digits_idx - 1] >> (kBitsPerWord - 1))) { 525 // Zero extend if the MSB is set in the top digit and either the result is 526 // positive or the top digit has at least one other bit set (in which case 527 // we need the extra digit for the negation). 528 digits[digits_idx++] = 0; 529 } 530 if (n < 0) { 531 uword carry = 1; 532 for (word i = 0; i < digits_idx; i++) { 533 uword digit = digits[i]; 534 carry = __builtin_uaddl_overflow(~digit, carry, &digit); 535 digits[i] = digit; 536 } 537 DCHECK(carry == 0, "Carry should be zero"); 538 if ((digits[digits_idx - 1] >> (kBitsPerWord - 1)) == 0) { 539 digits[digits_idx++] = kMaxUword; 540 } 541 } 542 543 HandleScope scope(thread_); 544 Object result(&scope, NoneType::object()); 545 if (digits_idx == 0) { 546 result = SmallInt::fromWord(0); 547 } else if (digits_idx == 1 && 548 SmallInt::isValid(static_cast<word>(digits[0]))) { 549 result = SmallInt::fromWord(static_cast<word>(digits[0])); 550 } else { 551 result = 552 runtime_->newLargeIntWithDigits(View<uword>(digits.get(), digits_idx)); 553 } 554 if (isRef_) { 555 addRef(result); 556 } 557 return *result; 558} 559 560} // namespace py