this repo has no description
1// Copyright (c) Facebook, Inc. and its affiliates. (http://www.facebook.com)
2#include "marshal.h"
3
4#include <cstdlib>
5#include <cstring>
6#include <memory>
7
8#include "handles.h"
9#include "heap.h"
10#include "modules.h"
11#include "runtime.h"
12#include "set-builtins.h"
13#include "tuple-builtins.h"
14#include "utils.h"
15#include "view.h"
16
17namespace py {
18
19enum {
20 FLAG_REF = '\x80', // with a type, add obj to index
21 TYPE_ASCII = 'a',
22 TYPE_ASCII_INTERNED = 'A',
23 TYPE_BINARY_COMPLEX = 'y',
24 TYPE_BINARY_FLOAT = 'g',
25 TYPE_CODE = 'c',
26 TYPE_COMPLEX = 'x',
27 TYPE_DICT = '{',
28 TYPE_ELLIPSIS = '.',
29 TYPE_FALSE = 'F',
30 TYPE_FLOAT = 'f',
31 TYPE_FROZENSET = '>',
32 TYPE_INTERNED = 't',
33 TYPE_INT = 'i',
34 TYPE_LIST = '[',
35 TYPE_LONG = 'l',
36 TYPE_NONE = 'N',
37 TYPE_NULL = '0',
38 TYPE_REF = 'r',
39 TYPE_SET = '<',
40 TYPE_SHORT_ASCII_INTERNED = 'Z',
41 TYPE_SHORT_ASCII = 'z',
42 TYPE_SMALL_TUPLE = ')',
43 TYPE_STOPITER = 'S',
44 TYPE_STRING = 's',
45 TYPE_TRUE = 'T',
46 TYPE_TUPLE = '(',
47 TYPE_UNICODE = 'u',
48 TYPE_UNKNOWN = '?',
49};
50
51Marshal::Reader::Reader(HandleScope* scope, Thread* thread, View<byte> buffer)
52 : thread_(thread),
53 runtime_(thread->runtime()),
54 refs_(scope, runtime_->newList()),
55 start_(buffer.data()),
56 length_(buffer.length()),
57 pos_(0) {
58 end_ = start_ + length_;
59}
60
61RawObject Marshal::Reader::readPycHeader(const Str& filename) {
62 if (length_ - pos_ < 4) {
63 return thread_->raiseWithFmt(
64 LayoutId::kEOFError, "reached end of file while reading header of '%S'",
65 &filename);
66 }
67 int32_t magic = readLong();
68 if (magic == kPycMagic) {
69 if (length_ - pos_ < 12) {
70 return thread_->raiseWithFmt(
71 LayoutId::kEOFError,
72 "reached end of file while reading header of '%S'", &filename);
73 }
74 readLong(); // read flags.
75 readLong(); // read source timestamp.
76 readLong(); // read source length.
77 DCHECK(pos_ == 16, "size mismatch");
78 } else {
79 return thread_->raiseWithFmt(LayoutId::kImportError,
80 "unsupported magic number in '%S'", &filename);
81 }
82 return NoneType::object();
83}
84
85void Marshal::Reader::setBuiltinFunctions(
86 const BuiltinFunction* builtin_functions, word num_builtin_functions,
87 const IntrinsicFunction* intrinsic_functions,
88 word num_intrinsic_functions) {
89 builtin_functions_ = builtin_functions;
90 num_builtin_functions_ = num_builtin_functions;
91 intrinsic_functions_ = intrinsic_functions;
92 num_intrinsic_functions_ = num_intrinsic_functions;
93}
94
95const byte* Marshal::Reader::readBytes(int length) {
96 const byte* result = &start_[pos_];
97 pos_ += length;
98 return result;
99}
100
101byte Marshal::Reader::readByte() {
102 byte result = 0xFF;
103 const byte* buffer = readBytes(1);
104 if (buffer != nullptr) {
105 result = buffer[0];
106 }
107 return result;
108}
109
110int16_t Marshal::Reader::readShort() {
111 int16_t result = -1;
112 const byte* buffer = readBytes(sizeof(result));
113 if (buffer != nullptr) {
114 result = buffer[0];
115 result |= buffer[1] << 8;
116 }
117 return result;
118}
119
120int32_t Marshal::Reader::readLong() {
121 int32_t result = -1;
122 const byte* buffer = readBytes(4);
123 if (buffer != nullptr) {
124 result = buffer[0];
125 result |= buffer[1] << 8;
126 result |= buffer[2] << 16;
127 result |= buffer[3] << 24;
128 }
129 return result;
130}
131
132double Marshal::Reader::readBinaryFloat() {
133 double result;
134 const byte* buffer = readBytes(sizeof(result));
135 std::memcpy(&result, buffer, sizeof(result));
136 return result;
137}
138
139RawObject Marshal::Reader::readObject() {
140 byte code = readByte();
141 byte flag = code & FLAG_REF;
142 byte type = code & ~FLAG_REF;
143 isRef_ = flag;
144 switch (type) {
145 case TYPE_NULL:
146 return SmallInt::fromWord(0);
147
148 case TYPE_NONE:
149 return NoneType::object();
150
151 case TYPE_STOPITER:
152 UNIMPLEMENTED("TYPE_STOPITER");
153
154 case TYPE_ELLIPSIS:
155 return runtime_->ellipsis();
156
157 case TYPE_FALSE:
158 return Bool::falseObj();
159
160 case TYPE_TRUE:
161 return Bool::trueObj();
162
163 case TYPE_INT: {
164 // NB: this will continue to work as long as SmallInt can contain the
165 // full range of 32 bit signed integer values. Notably, this will break if
166 // we need to support 32 bit machines.
167 word n = readLong();
168 if (!SmallInt::isValid(n)) {
169 UNIMPLEMENTED("value '%ld' outside range supported by RawSmallInt", n);
170 }
171 HandleScope scope(thread_);
172 Object result(&scope, SmallInt::fromWord(n));
173 if (isRef_) {
174 addRef(result);
175 }
176 return *result;
177 }
178
179 case TYPE_FLOAT:
180 UNIMPLEMENTED("TYPE_FLOAT");
181
182 case TYPE_BINARY_FLOAT: {
183 double n = readBinaryFloat();
184 HandleScope scope(thread_);
185 Object result(&scope, runtime_->newFloat(n));
186 if (isRef_) {
187 addRef(result);
188 }
189 return *result;
190 }
191
192 case TYPE_COMPLEX:
193 UNIMPLEMENTED("TYPE_COMPLEX");
194
195 case TYPE_BINARY_COMPLEX: {
196 double real = readBinaryFloat();
197 double imag = readBinaryFloat();
198 HandleScope scope(thread_);
199 Object result(&scope, runtime_->newComplex(real, imag));
200 if (isRef_) {
201 addRef(result);
202 }
203 return *result;
204 }
205
206 case TYPE_STRING: // Misnomer, should be TYPE_BYTES
207 return readTypeString();
208
209 case TYPE_INTERNED:
210 case TYPE_ASCII_INTERNED:
211 return readTypeAsciiInterned();
212
213 case TYPE_UNICODE:
214 case TYPE_ASCII: {
215 return readTypeAscii();
216 }
217
218 case TYPE_SHORT_ASCII_INTERNED:
219 return readTypeShortAsciiInterned();
220
221 case TYPE_SHORT_ASCII:
222 return readTypeShortAscii();
223
224 case TYPE_SMALL_TUPLE:
225 return readTypeSmallTuple();
226
227 case TYPE_TUPLE:
228 return readTypeTuple();
229
230 case TYPE_LIST:
231 UNIMPLEMENTED("TYPE_LIST");
232
233 case TYPE_DICT:
234 UNIMPLEMENTED("TYPE_DICT");
235
236 case TYPE_SET:
237 return readTypeSet();
238
239 case TYPE_FROZENSET:
240 return readTypeFrozenSet();
241
242 case TYPE_CODE:
243 return readTypeCode();
244
245 case TYPE_REF:
246 return readTypeRef();
247
248 case TYPE_LONG:
249 return readLongObject();
250
251 default:
252 UNREACHABLE("unknown type '%c' (flags=%x)", type, flag);
253 }
254 UNREACHABLE("all cases should be covered");
255}
256
257word Marshal::Reader::addRef(const Object& value) {
258 word result = refs_.numItems();
259 runtime_->listAdd(thread_, refs_, value);
260 return result;
261}
262
263void Marshal::Reader::setRef(word index, RawObject value) {
264 refs_.atPut(index, value);
265}
266
267RawObject Marshal::Reader::getRef(word index) { return refs_.at(index); }
268
269word Marshal::Reader::numRefs() { return refs_.numItems(); }
270
271RawObject Marshal::Reader::readTypeString() {
272 int32_t length = readLong();
273 const byte* data = readBytes(length);
274 HandleScope scope(thread_);
275 Object result(&scope, runtime_->newBytesWithAll(View<byte>(data, length)));
276 if (isRef_) {
277 addRef(result);
278 }
279 return *result;
280}
281
282RawObject Marshal::Reader::readTypeAscii() {
283 word length = readLong();
284 if (length < 0) {
285 return thread_->raiseWithFmt(LayoutId::kValueError,
286 "bad marshal data (string size out of range)");
287 }
288 return readStr(length);
289}
290
291RawObject Marshal::Reader::readTypeAsciiInterned() {
292 word length = readLong();
293 if (length < 0) {
294 return thread_->raiseWithFmt(LayoutId::kValueError,
295 "bad marshal data (string size out of range)");
296 }
297 return readAndInternStr(length);
298}
299
300RawObject Marshal::Reader::readTypeShortAscii() {
301 word length = readByte();
302 return readStr(length);
303}
304
305RawObject Marshal::Reader::readTypeShortAsciiInterned() {
306 word length = readByte();
307 return readAndInternStr(length);
308}
309
310RawObject Marshal::Reader::readStr(word length) {
311 const byte* data = readBytes(length);
312 HandleScope scope(thread_);
313 Object result(&scope, runtime_->newStrWithAll(View<byte>(data, length)));
314 if (isRef_) {
315 addRef(result);
316 }
317 return *result;
318}
319
320RawObject Marshal::Reader::readAndInternStr(word length) {
321 const byte* data = readBytes(length);
322 HandleScope scope(thread_);
323 Object result(&scope,
324 Runtime::internStrFromAll(thread_, View<byte>(data, length)));
325 if (isRef_) {
326 addRef(result);
327 }
328 return *result;
329}
330
331RawObject Marshal::Reader::readTypeSmallTuple() {
332 int32_t n = readByte();
333 return doTupleElements(n);
334}
335
336RawObject Marshal::Reader::readTypeTuple() {
337 int32_t n = readLong();
338 return doTupleElements(n);
339}
340
341RawObject Marshal::Reader::doTupleElements(int32_t length) {
342 HandleScope scope(thread_);
343 if (length == 0) {
344 Object result(&scope, runtime_->emptyTuple());
345 if (isRef_) {
346 addRef(result);
347 }
348 return *result;
349 }
350 MutableTuple result(&scope, runtime_->newMutableTuple(length));
351 if (isRef_) {
352 addRef(result);
353 }
354 for (int32_t i = 0; i < length; i++) {
355 RawObject value = readObject();
356 result.atPut(i, value);
357 }
358 return result.becomeImmutable();
359}
360
361RawObject Marshal::Reader::readTypeSet() {
362 int32_t n = readLong();
363 HandleScope scope(thread_);
364 Set set(&scope, runtime_->newSet());
365 return doSetElements(n, set);
366}
367
368RawObject Marshal::Reader::readTypeFrozenSet() {
369 int32_t n = readLong();
370 if (n == 0) {
371 return runtime_->emptyFrozenSet();
372 }
373 HandleScope scope(thread_);
374 FrozenSet set(&scope, runtime_->newFrozenSet());
375 return doSetElements(n, set);
376}
377
378RawObject Marshal::Reader::doSetElements(int32_t length, const SetBase& set) {
379 if (isRef_) {
380 addRef(set);
381 }
382 HandleScope scope(thread_);
383 Object value(&scope, NoneType::object());
384 Object hash_obj(&scope, NoneType::object());
385 for (int32_t i = 0; i < length; i++) {
386 value = readObject();
387 hash_obj = Interpreter::hash(thread_, value);
388 DCHECK(!hash_obj.isErrorException(), "must be hashable");
389 word hash = SmallInt::cast(*hash_obj).value();
390 RawObject result = setAdd(thread_, set, value, hash);
391 if (result.isError()) {
392 return result;
393 }
394 }
395 return *set;
396}
397
398RawObject Marshal::Reader::readTypeCode() {
399 word index = -1;
400 HandleScope scope(thread_);
401 if (isRef_) {
402 // Reserve a reflist index
403 Object none(&scope, NoneType::object());
404 index = addRef(none);
405 }
406 int32_t argcount = readLong();
407 int32_t posonlyargcount = readLong();
408 int32_t kwonlyargcount = readLong();
409 int32_t nlocals = readLong();
410 uint32_t stacksize = readLong();
411 int32_t flags = readLong();
412 CHECK(flags <= (Code::Flags::kLast << 1) - 1, "unknown flags in code object");
413 Object code(&scope, readObject());
414 Tuple consts(&scope, readObject());
415 Object names(&scope, readObject());
416 Tuple varnames(&scope, readObject());
417 Tuple freevars(&scope, readObject());
418 Tuple cellvars(&scope, readObject());
419 Object filename(&scope, readObject());
420 Object name(&scope, readObject());
421 int32_t firstlineno = readLong();
422 Object lnotab(&scope, readObject());
423
424 word intrinsic_index = 0;
425 if (flags & Code::Flags::kMetadata) {
426 Object metadata_obj(&scope, consts.at(0));
427 CHECK(metadata_obj.isTuple() && Tuple::cast(*metadata_obj).length() == 1,
428 "malformed metadata");
429 Tuple metadata(&scope, *metadata_obj);
430 Object intrinsic(&scope, metadata.at(0));
431 CHECK(intrinsic.isSmallInt(), "malformed intrinsic ID");
432 intrinsic_index = SmallInt::cast(*intrinsic).value();
433 consts = runtime_->tupleSubseq(thread_, consts, 1, consts.length() - 1);
434 }
435
436 IntrinsicFunction intrinsic = nullptr;
437 if (intrinsic_functions_ != nullptr && intrinsic_index != 0) {
438 CHECK_INDEX(intrinsic_index - 1, num_intrinsic_functions_);
439 // The intrinsic IDs are biased by 1 so that 0 means no intrinsic
440 intrinsic = intrinsic_functions_[intrinsic_index - 1];
441 }
442 Object result(&scope, NoneType::object());
443 if (flags & Code::Flags::kBuiltin) {
444 word function_index = stacksize;
445 CHECK(code.isBytes() && Bytes::cast(*code).length() == 0,
446 "must not have bytecode in native code");
447 CHECK(consts.length() == 0, "consts should contain only metadata");
448 CHECK(names.isTuple() && Tuple::cast(*names).length() == 0,
449 "must not have variables in native code");
450 CHECK(freevars.length() == 0, "must not have free vars in native code");
451 CHECK(cellvars.length() == 0, "must not have cell vars in native code");
452 CHECK_INDEX(function_index, num_builtin_functions_);
453 BuiltinFunction function = builtin_functions_[function_index];
454 result = runtime_->newBuiltinCode(argcount, posonlyargcount, kwonlyargcount,
455 flags, function, varnames, name);
456 Code::cast(*result).setFilename(*filename);
457 Code::cast(*result).setFirstlineno(firstlineno);
458 } else {
459 result = runtime_->newCode(argcount, posonlyargcount, kwonlyargcount,
460 nlocals, stacksize, flags, code, consts, names,
461 varnames, freevars, cellvars, filename, name,
462 firstlineno, lnotab);
463 }
464 Code::cast(*result).setIntrinsic(reinterpret_cast<void*>(intrinsic));
465 if (index >= 0) {
466 setRef(index, *result);
467 }
468 return *result;
469}
470
471RawObject Marshal::Reader::readTypeRef() {
472 int32_t n = readLong();
473 return getRef(n);
474}
475
476RawObject Marshal::Reader::readLongObject() {
477 int32_t n = readLong();
478 if (n == 0) {
479 HandleScope scope(thread_);
480 Object zero(&scope, SmallInt::fromWord(0));
481 if (isRef_) {
482 addRef(zero);
483 }
484 return *zero;
485 }
486 if (n < kMinInt32 || n > kMaxInt32) {
487 return thread_->raiseWithFmt(LayoutId::kValueError,
488 "bad marshal data (string size out of range)");
489 }
490 word bits_consumed = 0;
491 word n_bits = std::abs(n) * kBitsPerLongDigit;
492 word num_digits = ((n_bits + kBitsPerWord + 1) / kBitsPerWord) + 1;
493 std::unique_ptr<uword[]> digits{new uword[num_digits]};
494 word digits_idx = 0;
495 uword buf = 0;
496 word word_offset = 0;
497 while (bits_consumed < n_bits) {
498 int16_t digit = readShort();
499 if (digit < 0) {
500 return thread_->raiseWithFmt(LayoutId::kValueError,
501 "bad marshal data (negative long digit)");
502 }
503 auto unsigned_digit = static_cast<uword>(digit);
504 if (word_offset + kBitsPerLongDigit <= kBitsPerWord) {
505 buf |= unsigned_digit << word_offset;
506 word_offset += kBitsPerLongDigit;
507 if (word_offset == kBitsPerWord) {
508 digits[digits_idx++] = buf;
509 buf = 0;
510 word_offset = 0;
511 }
512 } else {
513 word extra_bits = (word_offset + kBitsPerLongDigit) % kBitsPerWord;
514 word bits_to_include = kBitsPerLongDigit - extra_bits;
515 buf |= (unsigned_digit & ((1 << bits_to_include) - 1)) << word_offset;
516 digits[digits_idx++] = buf;
517 buf = (unsigned_digit >> bits_to_include) & ((1 << extra_bits) - 1);
518 word_offset = extra_bits;
519 }
520 bits_consumed += kBitsPerLongDigit;
521 }
522 if (word_offset > 0 && buf != 0) {
523 digits[digits_idx++] = buf;
524 } else if (n > 0 && (digits[digits_idx - 1] >> (kBitsPerWord - 1))) {
525 // Zero extend if the MSB is set in the top digit and either the result is
526 // positive or the top digit has at least one other bit set (in which case
527 // we need the extra digit for the negation).
528 digits[digits_idx++] = 0;
529 }
530 if (n < 0) {
531 uword carry = 1;
532 for (word i = 0; i < digits_idx; i++) {
533 uword digit = digits[i];
534 carry = __builtin_uaddl_overflow(~digit, carry, &digit);
535 digits[i] = digit;
536 }
537 DCHECK(carry == 0, "Carry should be zero");
538 if ((digits[digits_idx - 1] >> (kBitsPerWord - 1)) == 0) {
539 digits[digits_idx++] = kMaxUword;
540 }
541 }
542
543 HandleScope scope(thread_);
544 Object result(&scope, NoneType::object());
545 if (digits_idx == 0) {
546 result = SmallInt::fromWord(0);
547 } else if (digits_idx == 1 &&
548 SmallInt::isValid(static_cast<word>(digits[0]))) {
549 result = SmallInt::fromWord(static_cast<word>(digits[0]));
550 } else {
551 result =
552 runtime_->newLargeIntWithDigits(View<uword>(digits.get(), digits_idx));
553 }
554 if (isRef_) {
555 addRef(result);
556 }
557 return *result;
558}
559
560} // namespace py