#include "simdcbor.hh" #include #include #include #include #if defined(_MSC_VER) #include #define bswap_16(x) _byteswap_ushort(x) #define bswap_32(x) _byteswap_ulong(x) #define bswap_64(x) _byteswap_uint64(x) #else #define bswap_16(x) __builtin_bswap16(x) #define bswap_32(x) __builtin_bswap32(x) #define bswap_64(x) __builtin_bswap64(x) #endif using namespace simdjson; namespace { class CborReader { public: const uint8_t* current; const uint8_t* end; dom::document& doc; size_t tape_idx; uint8_t* current_string_buf; const uint8_t* string_buf_start; CborReader(const uint8_t* buf, size_t len, dom::document& d) : current(buf), end(buf + len), doc(d), tape_idx(0), current_string_buf(d.string_buf.get()), string_buf_start(d.string_buf.get()) {} void append_tape(uint64_t val, internal::tape_type type) { doc.tape[tape_idx++] = val | (uint64_t(type) << 56); } void append_tape_value(uint64_t val) { doc.tape[tape_idx++] = val; } size_t reserve_tape() { return tape_idx++; } void set_tape(size_t idx, uint64_t val, internal::tape_type type) { doc.tape[idx] = val | (uint64_t(type) << 56); } error_code parse_root() { size_t root_start = reserve_tape(); error_code ec = parse_item(); if (ec != SUCCESS) return ec; size_t root_end = reserve_tape(); set_tape(root_start, root_end, internal::tape_type::ROOT); set_tape(root_end, root_start, internal::tape_type::ROOT); return SUCCESS; } error_code parse_item() { if (current >= end) return EMPTY; uint8_t initial = *current++; uint8_t major = initial >> 5; uint8_t additional = initial & 0x1F; switch (major) { case 0: return parse_uint(additional); case 1: return parse_nint(additional); case 2: return parse_byte_string(additional); case 3: return parse_text_string(additional); case 4: return parse_array(additional); case 5: return parse_map(additional); case 6: return parse_item(); case 7: return parse_float_simple(additional); default: return UNEXPECTED_ERROR; } } private: uint64_t read_uint(uint8_t additional, error_code& ec) { if (additional < 24) { return additional; } else if (additional == 24) { if (current + 1 > end) { ec = INDEX_OUT_OF_BOUNDS; return 0; } uint8_t v = *current++; return v; } else if (additional == 25) { if (current + 2 > end) { ec = INDEX_OUT_OF_BOUNDS; return 0; } uint16_t v; memcpy(&v, current, 2); current += 2; return bswap_16(v); } else if (additional == 26) { if (current + 4 > end) { ec = INDEX_OUT_OF_BOUNDS; return 0; } uint32_t v; memcpy(&v, current, 4); current += 4; return bswap_32(v); } else if (additional == 27) { if (current + 8 > end) { ec = INDEX_OUT_OF_BOUNDS; return 0; } uint64_t v; memcpy(&v, current, 8); current += 8; return bswap_64(v); } else { ec = UNEXPECTED_ERROR; return 0; } } error_code parse_uint(uint8_t additional) { error_code ec = SUCCESS; uint64_t val = read_uint(additional, ec); if (ec != SUCCESS) return ec; append_tape(0, internal::tape_type::UINT64); append_tape_value(val); return SUCCESS; } error_code parse_nint(uint8_t additional) { error_code ec = SUCCESS; uint64_t val = read_uint(additional, ec); if (ec != SUCCESS) return ec; append_tape(0, internal::tape_type::INT64); int64_t nval = -1 - int64_t(val); append_tape_value((uint64_t)nval); return SUCCESS; } error_code parse_byte_string(uint8_t additional) { if (additional == 31) return UNEXPECTED_ERROR; error_code ec = SUCCESS; uint64_t len = read_uint(additional, ec); if (ec != SUCCESS) return ec; if (current + len > end) return INDEX_OUT_OF_BOUNDS; return write_string(current, len); } error_code parse_text_string(uint8_t additional) { if (additional == 31) { size_t offset = current_string_buf - string_buf_start; uint8_t* len_ptr = current_string_buf; current_string_buf += sizeof(uint32_t); size_t total_len = 0; while (true) { if (current >= end) return UNEXPECTED_ERROR; if (*current == 0xFF) { current++; break; } uint8_t chunk_initial = *current++; if ((chunk_initial >> 5) != 3) return INCORRECT_TYPE; error_code ec = SUCCESS; uint64_t chunk_len = read_uint(chunk_initial & 0x1F, ec); if (ec != SUCCESS) return ec; if (current + chunk_len > end) return INDEX_OUT_OF_BOUNDS; if (!simdjson::validate_utf8((const char*)current, chunk_len)) { return UTF8_ERROR; } memcpy(current_string_buf, current, chunk_len); current_string_buf += chunk_len; current += chunk_len; total_len += chunk_len; } *current_string_buf++ = 0; uint32_t len32 = (uint32_t)total_len; memcpy(len_ptr, &len32, sizeof(uint32_t)); append_tape(offset, internal::tape_type::STRING); return SUCCESS; } error_code ec = SUCCESS; uint64_t len = read_uint(additional, ec); if (ec != SUCCESS) return ec; if (current + len > end) return INDEX_OUT_OF_BOUNDS; if (!simdjson::validate_utf8((const char*)current, len)) { return UTF8_ERROR; } return write_string(current, len); } error_code write_string(const uint8_t* ptr, size_t len) { uint32_t len32 = (uint32_t)len; size_t offset = current_string_buf - string_buf_start; append_tape(offset, internal::tape_type::STRING); memcpy(current_string_buf, &len32, sizeof(uint32_t)); current_string_buf += sizeof(uint32_t); memcpy(current_string_buf, ptr, len); current_string_buf += len; *current_string_buf++ = 0; current += len; return SUCCESS; } error_code parse_array(uint8_t additional) { size_t start_idx = reserve_tape(); uint64_t count = 0; if (additional == 31) { while (true) { if (current >= end) return UNEXPECTED_ERROR; if (*current == 0xFF) { current++; break; } error_code ec = parse_item(); if (ec != SUCCESS) return ec; count++; } } else { error_code ec = SUCCESS; count = read_uint(additional, ec); if (ec != SUCCESS) return ec; for (uint64_t i = 0; i < count; ++i) { ec = parse_item(); if (ec != SUCCESS) return ec; } } size_t end_idx = reserve_tape(); size_t next_idx = end_idx + 1; uint64_t start_payload = next_idx | (count << 32); doc.tape[start_idx] = start_payload | (uint64_t(internal::tape_type::START_ARRAY) << 56); doc.tape[end_idx] = start_idx | (uint64_t(internal::tape_type::END_ARRAY) << 56); return SUCCESS; } error_code parse_map(uint8_t additional) { size_t start_idx = reserve_tape(); uint64_t count = 0; if (additional == 31) { while (true) { if (current >= end) return UNEXPECTED_ERROR; if (*current == 0xFF) { current++; break; } error_code ec = parse_key(); if (ec != SUCCESS) return ec; ec = parse_item(); if (ec != SUCCESS) return ec; count++; } } else { error_code ec = SUCCESS; count = read_uint(additional, ec); if (ec != SUCCESS) return ec; for (uint64_t i = 0; i < count; ++i) { ec = parse_key(); if (ec != SUCCESS) return ec; ec = parse_item(); if (ec != SUCCESS) return ec; } } size_t end_idx = reserve_tape(); size_t next_idx = end_idx + 1; uint64_t start_payload = next_idx | (count << 32); doc.tape[start_idx] = start_payload | (uint64_t(internal::tape_type::START_OBJECT) << 56); doc.tape[end_idx] = start_idx | (uint64_t(internal::tape_type::END_OBJECT) << 56); return SUCCESS; } error_code parse_key() { if (current >= end) return EMPTY; uint8_t initial = *current; uint8_t major = initial >> 5; if (major == 3) { current++; return parse_text_string(initial & 0x1F); } else if (major == 2) { current++; return parse_byte_string(initial & 0x1F); } else { return INCORRECT_TYPE; } } error_code parse_float_simple(uint8_t additional) { if (additional < 20) { return UNEXPECTED_ERROR; } switch (additional) { case 20: append_tape(0, internal::tape_type::FALSE_VALUE); return SUCCESS; case 21: append_tape(0, internal::tape_type::TRUE_VALUE); return SUCCESS; case 22: append_tape(0, internal::tape_type::NULL_VALUE); return SUCCESS; case 23: append_tape(0, internal::tape_type::NULL_VALUE); return SUCCESS; case 24: if (current + 1 > end) return INDEX_OUT_OF_BOUNDS; current++; return UNEXPECTED_ERROR; case 25: { if (current + 2 > end) return INDEX_OUT_OF_BOUNDS; uint16_t v; memcpy(&v, current, 2); current += 2; v = bswap_16(v); uint32_t sign = (v >> 15) & 1; uint32_t exp = (v >> 10) & 0x1F; uint32_t mant = v & 0x3FF; double d; if (exp == 0) { d = std::ldexp(mant, -24); } else if (exp == 31) { d = (mant == 0) ? INFINITY : NAN; } else { d = std::ldexp(mant + 1024, exp - 25); } if (sign) d = -d; append_tape(0, internal::tape_type::DOUBLE); uint64_t d_as_u64; memcpy(&d_as_u64, &d, 8); append_tape_value(d_as_u64); return SUCCESS; } case 26: { if (current + 4 > end) return INDEX_OUT_OF_BOUNDS; uint32_t v; memcpy(&v, current, 4); current += 4; v = bswap_32(v); float f; memcpy(&f, &v, 4); append_tape(0, internal::tape_type::DOUBLE); double d = f; uint64_t d_as_u64; memcpy(&d_as_u64, &d, 8); append_tape_value(d_as_u64); return SUCCESS; } case 27: { if (current + 8 > end) return INDEX_OUT_OF_BOUNDS; uint64_t v; memcpy(&v, current, 8); current += 8; v = bswap_64(v); append_tape(0, internal::tape_type::DOUBLE); append_tape_value(v); return SUCCESS; } default: return UNEXPECTED_ERROR; } } }; } simdjson::error_code simdcbor::parse(const uint8_t* buf, size_t len, simdjson::dom::parser& parser, size_t& bytes_read) { auto err = parser.doc.allocate(len * 8 + 4096); if (err != SUCCESS) return err; CborReader reader(buf, len, parser.doc); err = reader.parse_root(); bytes_read = reader.current - buf; return err; } simdjson::dom::element simdcbor::get_root(simdjson::dom::parser& parser) { return parser.doc.root(); }