+52
README.md
+52
README.md
···
408
408
print_endline ("Parse error: " ^ e.message)
409
409
```
410
410
411
+
## CBOR
412
+
413
+
`simdjsont` includes support for [CBOR (RFC 8949)](https://cbor.io/), using the same codecs as JSON. It decodes CBOR into the same internal `Simdjsont.Json.t` representation, allowing you to use your existing data models and validation logic.
414
+
415
+
### CBOR Quick Start
416
+
417
+
```ocaml
418
+
type point = { x: int; y: int }
419
+
420
+
let point_codec =
421
+
let open Simdjsont.Decode in
422
+
Obj.field (fun x y -> { x; y })
423
+
|> Obj.mem "x" int ~enc:(fun p -> p.x)
424
+
|> Obj.mem "y" int ~enc:(fun p -> p.y)
425
+
|> Obj.finish
426
+
427
+
(* Encode to CBOR binary string *)
428
+
let cbor = Simdjsont.Cbor.encode_string point_codec { x = 10; y = 20 }
429
+
430
+
(* Decode from CBOR *)
431
+
match Simdjsont.Cbor.decode_string point_codec cbor with
432
+
| Ok p -> Printf.printf "Point from CBOR: (%d, %d)\n" p.x p.y
433
+
| Error e -> print_endline e
434
+
```
435
+
436
+
### CBOR Streaming
437
+
438
+
CBOR often contains multiple concatenated items. Use `Simdjsont.Cbor.to_seq` to parse these as a sequence:
439
+
440
+
```ocaml
441
+
let cbor_data =
442
+
Simdjsont.Cbor.encode_string Simdjsont.Decode.int 1 ^
443
+
Simdjsont.Cbor.encode_string Simdjsont.Decode.int 2 ^
444
+
Simdjsont.Cbor.encode_string Simdjsont.Decode.int 3
445
+
446
+
let () =
447
+
Simdjsont.Cbor.to_seq Simdjsont.Decode.int cbor_data
448
+
|> Seq.iter (function
449
+
| Ok n -> Printf.printf "Decoded: %d\n" n
450
+
| Error e -> Printf.printf "Error: %s\n" e)
451
+
```
452
+
453
+
### CBOR Limitations
454
+
455
+
The CBOR implementation is optimized for JSON-compatible data structures. The following CBOR features are currently NOT supported:
456
+
457
+
- **Tags**: While basic tags are skipped, specialized tag processing (e.g., for Date/Time) is not included.
458
+
- **Bignums**: Integers are limited to 64-bit signed/unsigned.
459
+
- **Byte Strings**: Decoded as strings; binary-only data might fail UTF-8 validation if decoded via `string` codec.
460
+
- **Undefined/Simple Values**: Only `true`, `false`, and `null` are supported.
461
+
- **Integer Map Keys**: Only string keys (Major type 3) and byte string keys (Major type 2) are supported for objects.
462
+
411
463
## License
412
464
413
465
ISC
+102
bench/bench_cbor.ml
+102
bench/bench_cbor.ml
···
1
+
let read_file path =
2
+
let ic = open_in path in
3
+
let n = in_channel_length ic in
4
+
let s = really_input_string ic n in
5
+
close_in ic;
6
+
s
7
+
8
+
let twitter_json = lazy (read_file "bench/data/twitter.json")
9
+
let citm_json = lazy (read_file "bench/data/citm_catalog.json")
10
+
11
+
let json_to_cbor_string json_str =
12
+
match Simdjsont.Codec.decode_string Simdjsont.Codec.value json_str with
13
+
| Ok json -> Simdjsont.Cbor.encode_string Simdjsont.Codec.value json
14
+
| Error e -> failwith e
15
+
16
+
let twitter_cbor = lazy (json_to_cbor_string (Lazy.force twitter_json))
17
+
let citm_cbor = lazy (json_to_cbor_string (Lazy.force citm_json))
18
+
19
+
let time_it name iterations f =
20
+
Gc.full_major ();
21
+
let start = Unix.gettimeofday () in
22
+
for _ = 1 to iterations do
23
+
f ()
24
+
done;
25
+
let elapsed = Unix.gettimeofday () -. start in
26
+
let ns_per_op = elapsed /. float_of_int iterations *. 1_000_000_000.0 in
27
+
(name, ns_per_op)
28
+
29
+
let run_cbor_benchmarks name cbor_str iterations =
30
+
Printf.printf "\n=== %s (%d bytes) ===\n\n%!" name (String.length cbor_str);
31
+
32
+
Printf.printf "--- DECODE (to AST) ---\n%!";
33
+
34
+
Printf.printf "Running simdjsont decode...\n%!";
35
+
let decode_results =
36
+
[
37
+
time_it "simdjsont" iterations (fun () ->
38
+
match Simdjsont.Cbor.decode_string Simdjsont.Codec.value cbor_str with
39
+
| Ok _ -> ()
40
+
| Error e -> failwith ("Simdjsont error: " ^ e));
41
+
time_it "ocaml-cbor" iterations (fun () ->
42
+
let _ = CBOR.Simple.decode cbor_str in
43
+
());
44
+
]
45
+
in
46
+
let baseline = List.assoc "ocaml-cbor" decode_results in
47
+
List.iter
48
+
(fun (n, ns) ->
49
+
Printf.printf " %-12s %10.0f ns (%.1fx)\n%!" n ns (baseline /. ns))
50
+
decode_results;
51
+
52
+
Printf.printf "\n--- ENCODE (AST to string) ---\n%!";
53
+
54
+
let simdjson_ast =
55
+
match Simdjsont.Cbor.decode_string Simdjsont.Codec.value cbor_str with
56
+
| Ok v -> v
57
+
| Error e -> failwith e
58
+
in
59
+
60
+
let cbor_ast = CBOR.Simple.decode cbor_str in
61
+
let encode_results =
62
+
[
63
+
time_it "simdjsont" iterations (fun () ->
64
+
let _ =
65
+
Simdjsont.Cbor.encode_string Simdjsont.Codec.value simdjson_ast
66
+
in
67
+
());
68
+
time_it "ocaml-cbor" iterations (fun () ->
69
+
let _ = CBOR.Simple.encode cbor_ast in
70
+
());
71
+
]
72
+
in
73
+
let baseline = List.assoc "ocaml-cbor" encode_results in
74
+
List.iter
75
+
(fun (n, ns) ->
76
+
Printf.printf " %-12s %10.0f ns (%.1fx)\n%!" n ns (baseline /. ns))
77
+
encode_results
78
+
79
+
let () =
80
+
Printf.printf "Preparing CBOR data...\n%!";
81
+
let twitter = Lazy.force twitter_cbor in
82
+
let citm = Lazy.force citm_cbor in
83
+
Printf.printf "CBOR data prepared. Twitter size: %d, CITM size: %d\n%!"
84
+
(String.length twitter) (String.length citm);
85
+
86
+
(* Printf.printf "Testing tiny CBOR...\n%!";
87
+
let tiny_json = "123" in
88
+
let tiny_cbor = json_to_cbor_string tiny_json in
89
+
(match Simdjsont.Cbor.decode_string Simdjsont.Codec.value tiny_cbor with
90
+
| Ok _ -> Printf.printf "Tiny decode success\n%!"
91
+
| Error e -> Printf.printf "Tiny decode failed: %s\n%!" e);
92
+
93
+
Printf.printf "Testing array CBOR...\n%!";
94
+
let array_json = "[1, 2, 3]" in
95
+
let array_cbor = json_to_cbor_string array_json in
96
+
(match Simdjsont.Cbor.decode_string Simdjsont.Codec.value array_cbor with
97
+
| Ok _ -> Printf.printf "Array decode success\n%!"
98
+
| Error e -> Printf.printf "Array decode failed: %s\n%!" e); *)
99
+
Printf.printf "CBOR benchmarks\n%!";
100
+
Printf.printf "==============================\n%!";
101
+
run_cbor_benchmarks "twitter.cbor" twitter 50;
102
+
run_cbor_benchmarks "citm_catalog.cbor" citm 20
+4
bench/dune
+4
bench/dune
+3
-3
dune-project
+3
-3
dune-project
···
1
1
(lang dune 3.20)
2
2
3
3
(name simdjsont)
4
-
(version 0.1.0)
4
+
(version 0.2.0)
5
5
6
6
(generate_opam_files true)
7
7
···
20
20
21
21
(package
22
22
(name simdjsont)
23
-
(synopsis "JSON parsing with simdjson, with support for ndjson streaming")
23
+
(synopsis "JSON and CBOR parsing with simdjson, with support for streaming")
24
24
(description
25
-
"OCaml bindings to simdjson with support for ndjson streaming. Includes vendored simdjson 4.2.4.")
25
+
"OCaml bindings to simdjson with support for ndjson streaming and CBOR encoding/decoding. Includes vendored simdjson 4.2.4.")
26
26
(depends
27
27
(ocaml (>= 5.4.0))
28
28
integers
+2
-2
lib/dune
+2
-2
lib/dune
···
4
4
(libraries integers)
5
5
(foreign_stubs
6
6
(language cxx)
7
-
(names simdjsont_stubs simdjsont_impl)
8
-
(extra_deps simdjson_vendor.cc simdjson_vendor.hh)
7
+
(names simdjsont_stubs simdjsont_impl simdcbor)
8
+
(extra_deps simdjson_vendor.cc simdjson_vendor.hh simdcbor.hh)
9
9
(flags
10
10
:standard
11
11
-std=c++17
+411
lib/simdcbor.cpp
+411
lib/simdcbor.cpp
···
1
+
#include "simdcbor.hh"
2
+
#include <cmath>
3
+
#include <cstring>
4
+
#include <vector>
5
+
#include <stdexcept>
6
+
7
+
#if defined(_MSC_VER)
8
+
#include <stdlib.h>
9
+
#define bswap_16(x) _byteswap_ushort(x)
10
+
#define bswap_32(x) _byteswap_ulong(x)
11
+
#define bswap_64(x) _byteswap_uint64(x)
12
+
#else
13
+
#define bswap_16(x) __builtin_bswap16(x)
14
+
#define bswap_32(x) __builtin_bswap32(x)
15
+
#define bswap_64(x) __builtin_bswap64(x)
16
+
#endif
17
+
18
+
using namespace simdjson;
19
+
20
+
namespace {
21
+
22
+
class CborReader {
23
+
public:
24
+
const uint8_t* current;
25
+
const uint8_t* end;
26
+
dom::document& doc;
27
+
size_t tape_idx;
28
+
uint8_t* current_string_buf;
29
+
const uint8_t* string_buf_start;
30
+
31
+
CborReader(const uint8_t* buf, size_t len, dom::document& d)
32
+
: current(buf), end(buf + len), doc(d), tape_idx(0),
33
+
current_string_buf(d.string_buf.get()),
34
+
string_buf_start(d.string_buf.get()) {}
35
+
36
+
void append_tape(uint64_t val, internal::tape_type type) {
37
+
doc.tape[tape_idx++] = val | (uint64_t(type) << 56);
38
+
}
39
+
40
+
void append_tape_value(uint64_t val) {
41
+
doc.tape[tape_idx++] = val;
42
+
}
43
+
44
+
size_t reserve_tape() {
45
+
return tape_idx++;
46
+
}
47
+
48
+
void set_tape(size_t idx, uint64_t val, internal::tape_type type) {
49
+
doc.tape[idx] = val | (uint64_t(type) << 56);
50
+
}
51
+
52
+
error_code parse_root() {
53
+
size_t root_start = reserve_tape();
54
+
55
+
error_code ec = parse_item();
56
+
if (ec != SUCCESS) return ec;
57
+
58
+
size_t root_end = reserve_tape();
59
+
60
+
set_tape(root_start, root_end, internal::tape_type::ROOT);
61
+
set_tape(root_end, root_start, internal::tape_type::ROOT);
62
+
63
+
return SUCCESS;
64
+
}
65
+
66
+
error_code parse_item() {
67
+
if (current >= end) return EMPTY;
68
+
69
+
uint8_t initial = *current++;
70
+
uint8_t major = initial >> 5;
71
+
uint8_t additional = initial & 0x1F;
72
+
73
+
switch (major) {
74
+
case 0: return parse_uint(additional);
75
+
case 1: return parse_nint(additional);
76
+
case 2: return parse_byte_string(additional);
77
+
case 3: return parse_text_string(additional);
78
+
case 4: return parse_array(additional);
79
+
case 5: return parse_map(additional);
80
+
case 6: return parse_item();
81
+
case 7: return parse_float_simple(additional);
82
+
default: return UNEXPECTED_ERROR;
83
+
}
84
+
}
85
+
86
+
private:
87
+
uint64_t read_uint(uint8_t additional, error_code& ec) {
88
+
if (additional < 24) {
89
+
return additional;
90
+
} else if (additional == 24) {
91
+
if (current + 1 > end) { ec = INDEX_OUT_OF_BOUNDS; return 0; }
92
+
uint8_t v = *current++;
93
+
return v;
94
+
} else if (additional == 25) {
95
+
if (current + 2 > end) { ec = INDEX_OUT_OF_BOUNDS; return 0; }
96
+
uint16_t v;
97
+
memcpy(&v, current, 2);
98
+
current += 2;
99
+
return bswap_16(v);
100
+
} else if (additional == 26) {
101
+
if (current + 4 > end) { ec = INDEX_OUT_OF_BOUNDS; return 0; }
102
+
uint32_t v;
103
+
memcpy(&v, current, 4);
104
+
current += 4;
105
+
return bswap_32(v);
106
+
} else if (additional == 27) {
107
+
if (current + 8 > end) { ec = INDEX_OUT_OF_BOUNDS; return 0; }
108
+
uint64_t v;
109
+
memcpy(&v, current, 8);
110
+
current += 8;
111
+
return bswap_64(v);
112
+
} else {
113
+
ec = UNEXPECTED_ERROR;
114
+
return 0;
115
+
}
116
+
}
117
+
118
+
error_code parse_uint(uint8_t additional) {
119
+
error_code ec = SUCCESS;
120
+
uint64_t val = read_uint(additional, ec);
121
+
if (ec != SUCCESS) return ec;
122
+
123
+
append_tape(0, internal::tape_type::UINT64);
124
+
append_tape_value(val);
125
+
return SUCCESS;
126
+
}
127
+
128
+
error_code parse_nint(uint8_t additional) {
129
+
error_code ec = SUCCESS;
130
+
uint64_t val = read_uint(additional, ec);
131
+
if (ec != SUCCESS) return ec;
132
+
133
+
append_tape(0, internal::tape_type::INT64);
134
+
int64_t nval = -1 - int64_t(val);
135
+
append_tape_value((uint64_t)nval);
136
+
return SUCCESS;
137
+
}
138
+
139
+
error_code parse_byte_string(uint8_t additional) {
140
+
if (additional == 31) return UNEXPECTED_ERROR;
141
+
142
+
error_code ec = SUCCESS;
143
+
uint64_t len = read_uint(additional, ec);
144
+
if (ec != SUCCESS) return ec;
145
+
146
+
if (current + len > end) return INDEX_OUT_OF_BOUNDS;
147
+
148
+
return write_string(current, len);
149
+
}
150
+
151
+
error_code parse_text_string(uint8_t additional) {
152
+
if (additional == 31) {
153
+
size_t offset = current_string_buf - string_buf_start;
154
+
uint8_t* len_ptr = current_string_buf;
155
+
current_string_buf += sizeof(uint32_t);
156
+
size_t total_len = 0;
157
+
158
+
while (true) {
159
+
if (current >= end) return UNEXPECTED_ERROR;
160
+
if (*current == 0xFF) {
161
+
current++;
162
+
break;
163
+
}
164
+
uint8_t chunk_initial = *current++;
165
+
if ((chunk_initial >> 5) != 3) return INCORRECT_TYPE;
166
+
167
+
error_code ec = SUCCESS;
168
+
uint64_t chunk_len = read_uint(chunk_initial & 0x1F, ec);
169
+
if (ec != SUCCESS) return ec;
170
+
if (current + chunk_len > end) return INDEX_OUT_OF_BOUNDS;
171
+
172
+
if (!simdjson::validate_utf8((const char*)current, chunk_len)) {
173
+
return UTF8_ERROR;
174
+
}
175
+
176
+
memcpy(current_string_buf, current, chunk_len);
177
+
current_string_buf += chunk_len;
178
+
current += chunk_len;
179
+
total_len += chunk_len;
180
+
}
181
+
182
+
*current_string_buf++ = 0;
183
+
uint32_t len32 = (uint32_t)total_len;
184
+
memcpy(len_ptr, &len32, sizeof(uint32_t));
185
+
append_tape(offset, internal::tape_type::STRING);
186
+
return SUCCESS;
187
+
}
188
+
189
+
error_code ec = SUCCESS;
190
+
uint64_t len = read_uint(additional, ec);
191
+
if (ec != SUCCESS) return ec;
192
+
193
+
if (current + len > end) return INDEX_OUT_OF_BOUNDS;
194
+
195
+
if (!simdjson::validate_utf8((const char*)current, len)) {
196
+
return UTF8_ERROR;
197
+
}
198
+
199
+
return write_string(current, len);
200
+
}
201
+
202
+
error_code write_string(const uint8_t* ptr, size_t len) {
203
+
uint32_t len32 = (uint32_t)len;
204
+
size_t offset = current_string_buf - string_buf_start;
205
+
206
+
append_tape(offset, internal::tape_type::STRING);
207
+
208
+
memcpy(current_string_buf, &len32, sizeof(uint32_t));
209
+
current_string_buf += sizeof(uint32_t);
210
+
211
+
memcpy(current_string_buf, ptr, len);
212
+
current_string_buf += len;
213
+
214
+
*current_string_buf++ = 0;
215
+
current += len;
216
+
217
+
return SUCCESS;
218
+
}
219
+
220
+
error_code parse_array(uint8_t additional) {
221
+
size_t start_idx = reserve_tape();
222
+
uint64_t count = 0;
223
+
224
+
if (additional == 31) {
225
+
while (true) {
226
+
if (current >= end) return UNEXPECTED_ERROR;
227
+
if (*current == 0xFF) {
228
+
current++;
229
+
break;
230
+
}
231
+
error_code ec = parse_item();
232
+
if (ec != SUCCESS) return ec;
233
+
count++;
234
+
}
235
+
} else {
236
+
error_code ec = SUCCESS;
237
+
count = read_uint(additional, ec);
238
+
if (ec != SUCCESS) return ec;
239
+
240
+
for (uint64_t i = 0; i < count; ++i) {
241
+
ec = parse_item();
242
+
if (ec != SUCCESS) return ec;
243
+
}
244
+
}
245
+
246
+
size_t end_idx = reserve_tape();
247
+
size_t next_idx = end_idx + 1;
248
+
249
+
uint64_t start_payload = next_idx | (count << 32);
250
+
doc.tape[start_idx] = start_payload | (uint64_t(internal::tape_type::START_ARRAY) << 56);
251
+
doc.tape[end_idx] = start_idx | (uint64_t(internal::tape_type::END_ARRAY) << 56);
252
+
253
+
return SUCCESS;
254
+
}
255
+
256
+
error_code parse_map(uint8_t additional) {
257
+
size_t start_idx = reserve_tape();
258
+
uint64_t count = 0;
259
+
260
+
if (additional == 31) {
261
+
while (true) {
262
+
if (current >= end) return UNEXPECTED_ERROR;
263
+
if (*current == 0xFF) {
264
+
current++;
265
+
break;
266
+
}
267
+
268
+
error_code ec = parse_key();
269
+
if (ec != SUCCESS) return ec;
270
+
271
+
ec = parse_item();
272
+
if (ec != SUCCESS) return ec;
273
+
count++;
274
+
}
275
+
} else {
276
+
error_code ec = SUCCESS;
277
+
count = read_uint(additional, ec);
278
+
if (ec != SUCCESS) return ec;
279
+
280
+
for (uint64_t i = 0; i < count; ++i) {
281
+
ec = parse_key();
282
+
if (ec != SUCCESS) return ec;
283
+
284
+
ec = parse_item();
285
+
if (ec != SUCCESS) return ec;
286
+
}
287
+
}
288
+
289
+
size_t end_idx = reserve_tape();
290
+
size_t next_idx = end_idx + 1;
291
+
292
+
uint64_t start_payload = next_idx | (count << 32);
293
+
doc.tape[start_idx] = start_payload | (uint64_t(internal::tape_type::START_OBJECT) << 56);
294
+
doc.tape[end_idx] = start_idx | (uint64_t(internal::tape_type::END_OBJECT) << 56);
295
+
296
+
return SUCCESS;
297
+
}
298
+
299
+
error_code parse_key() {
300
+
if (current >= end) return EMPTY;
301
+
302
+
uint8_t initial = *current;
303
+
uint8_t major = initial >> 5;
304
+
305
+
if (major == 3) {
306
+
current++;
307
+
return parse_text_string(initial & 0x1F);
308
+
} else if (major == 2) {
309
+
current++;
310
+
return parse_byte_string(initial & 0x1F);
311
+
} else {
312
+
return INCORRECT_TYPE;
313
+
}
314
+
}
315
+
316
+
error_code parse_float_simple(uint8_t additional) {
317
+
if (additional < 20) {
318
+
return UNEXPECTED_ERROR;
319
+
}
320
+
switch (additional) {
321
+
case 20:
322
+
append_tape(0, internal::tape_type::FALSE_VALUE);
323
+
return SUCCESS;
324
+
case 21:
325
+
append_tape(0, internal::tape_type::TRUE_VALUE);
326
+
return SUCCESS;
327
+
case 22:
328
+
append_tape(0, internal::tape_type::NULL_VALUE);
329
+
return SUCCESS;
330
+
case 23:
331
+
append_tape(0, internal::tape_type::NULL_VALUE);
332
+
return SUCCESS;
333
+
case 24:
334
+
if (current + 1 > end) return INDEX_OUT_OF_BOUNDS;
335
+
current++;
336
+
return UNEXPECTED_ERROR;
337
+
case 25: {
338
+
if (current + 2 > end) return INDEX_OUT_OF_BOUNDS;
339
+
uint16_t v;
340
+
memcpy(&v, current, 2);
341
+
current += 2;
342
+
v = bswap_16(v);
343
+
344
+
uint32_t sign = (v >> 15) & 1;
345
+
uint32_t exp = (v >> 10) & 0x1F;
346
+
uint32_t mant = v & 0x3FF;
347
+
348
+
double d;
349
+
if (exp == 0) {
350
+
d = std::ldexp(mant, -24);
351
+
} else if (exp == 31) {
352
+
d = (mant == 0) ? INFINITY : NAN;
353
+
} else {
354
+
d = std::ldexp(mant + 1024, exp - 25);
355
+
}
356
+
if (sign) d = -d;
357
+
358
+
append_tape(0, internal::tape_type::DOUBLE);
359
+
uint64_t d_as_u64;
360
+
memcpy(&d_as_u64, &d, 8);
361
+
append_tape_value(d_as_u64);
362
+
return SUCCESS;
363
+
}
364
+
case 26: {
365
+
if (current + 4 > end) return INDEX_OUT_OF_BOUNDS;
366
+
uint32_t v;
367
+
memcpy(&v, current, 4);
368
+
current += 4;
369
+
v = bswap_32(v);
370
+
float f;
371
+
memcpy(&f, &v, 4);
372
+
373
+
append_tape(0, internal::tape_type::DOUBLE);
374
+
double d = f;
375
+
uint64_t d_as_u64;
376
+
memcpy(&d_as_u64, &d, 8);
377
+
append_tape_value(d_as_u64);
378
+
return SUCCESS;
379
+
}
380
+
case 27: {
381
+
if (current + 8 > end) return INDEX_OUT_OF_BOUNDS;
382
+
uint64_t v;
383
+
memcpy(&v, current, 8);
384
+
current += 8;
385
+
v = bswap_64(v);
386
+
387
+
append_tape(0, internal::tape_type::DOUBLE);
388
+
append_tape_value(v);
389
+
return SUCCESS;
390
+
}
391
+
default:
392
+
return UNEXPECTED_ERROR;
393
+
}
394
+
}
395
+
};
396
+
397
+
}
398
+
399
+
simdjson::error_code simdcbor::parse(const uint8_t* buf, size_t len, simdjson::dom::parser& parser, size_t& bytes_read) {
400
+
auto err = parser.doc.allocate(len * 8 + 4096);
401
+
if (err != SUCCESS) return err;
402
+
403
+
CborReader reader(buf, len, parser.doc);
404
+
err = reader.parse_root();
405
+
bytes_read = reader.current - buf;
406
+
return err;
407
+
}
408
+
409
+
simdjson::dom::element simdcbor::get_root(simdjson::dom::parser& parser) {
410
+
return parser.doc.root();
411
+
}
+12
lib/simdcbor.hh
+12
lib/simdcbor.hh
···
1
+
#ifndef SIMDCBOR_HH
2
+
#define SIMDCBOR_HH
3
+
4
+
#include "simdjson_vendor.hh"
5
+
6
+
class simdcbor {
7
+
public:
8
+
static simdjson::error_code parse(const uint8_t* buf, size_t len, simdjson::dom::parser& parser, size_t& bytes_read);
9
+
static simdjson::dom::element get_root(simdjson::dom::parser& parser);
10
+
};
11
+
12
+
#endif
+3
lib/simdjson_vendor.hh
+3
lib/simdjson_vendor.hh
···
1
+
class simdcbor;
1
2
/* auto-generated on 2025-12-17 20:32:36 -0500. version 4.2.4 Do not edit! */
2
3
/* including simdjson.h: */
3
4
/* begin file simdjson.h */
···
5110
5111
* This class cannot be copied, only moved, to avoid unintended allocations.
5111
5112
*/
5112
5113
class document {
5114
+
friend class ::simdcbor;
5113
5115
public:
5114
5116
/**
5115
5117
* Create a document container with zero capacity.
···
5211
5213
* @note This is not thread safe: one parser cannot produce two documents at the same time!
5212
5214
*/
5213
5215
class parser {
5216
+
friend class ::simdcbor;
5214
5217
public:
5215
5218
/**
5216
5219
* Create a JSON parser.
+51
lib/simdjsont.ml
+51
lib/simdjsont.ml
···
97
97
let validate = Validate.is_valid
98
98
let decode codec s = Codec.decode_string codec s
99
99
let encode codec v = Codec.encode_string codec v
100
+
101
+
(** {1 CBOR} *)
102
+
103
+
module Cbor = struct
104
+
external parse_cbor :
105
+
Raw.parser -> Raw.buffer -> int -> Raw.error_code * Raw.element option
106
+
= "simdjson_parse_cbor"
107
+
108
+
external cbor_next :
109
+
Raw.parser ->
110
+
Raw.buffer ->
111
+
int ->
112
+
int ->
113
+
Raw.error_code * Raw.element option * int = "simdjson_cbor_next"
114
+
115
+
let decode_string codec s =
116
+
let parser = Raw.create_parser () in
117
+
let buf = Raw.buffer_of_string s in
118
+
let len = String.length s in
119
+
match parse_cbor parser buf len with
120
+
| Raw.Success, Some elt -> Codec.decode_element codec elt
121
+
| Raw.Success, None -> Error "Internal error: success but no element"
122
+
| err, _ -> Error (Raw.error_message err)
123
+
124
+
let to_seq codec s =
125
+
let parser = Raw.create_parser () in
126
+
let buf = Raw.buffer_of_string s in
127
+
let len = String.length s in
128
+
let rec aux offset () =
129
+
if offset >= len then Seq.Nil
130
+
else
131
+
match cbor_next parser buf len offset with
132
+
| Raw.Success, Some elt, new_offset ->
133
+
let res = Codec.decode_element codec elt in
134
+
Seq.Cons (res, aux new_offset)
135
+
| Raw.Success, None, _ -> Seq.Nil
136
+
| Raw.Empty, _, _ -> Seq.Nil
137
+
| err, _, _ ->
138
+
let msg = Raw.error_message err in
139
+
Seq.Cons (Error msg, fun () -> Seq.Nil)
140
+
in
141
+
aux 0
142
+
143
+
let encode_to_buffer codec v =
144
+
let ast = Codec.to_json codec v in
145
+
Simdjsont_cbor_encode.to_buffer ast
146
+
147
+
let encode_string codec v =
148
+
let ast = Codec.to_json codec v in
149
+
Simdjsont_cbor_encode.to_string ast
150
+
end
+50
lib/simdjsont.mli
+50
lib/simdjsont.mli
···
91
91
92
92
val encode : 'a Codec.t -> 'a -> string
93
93
(** Convenience alias for {!Codec.encode_string}. *)
94
+
95
+
(** {1 CBOR}
96
+
97
+
CBOR (Concise Binary Object Representation, RFC 8949) support using the same
98
+
codec infrastructure as JSON. The same codecs work for both formats.
99
+
100
+
{b Supported CBOR types:}
101
+
- Integers (major types 0, 1) including 64-bit
102
+
- Floats (major type 7) including half-precision (16-bit)
103
+
- Text strings (major type 3) including indefinite-length
104
+
- Arrays (major type 4) including indefinite-length
105
+
- Maps (major type 5) with string keys, including indefinite-length
106
+
- Simple values: true, false, null
107
+
108
+
{b Not supported} (returns error or skipped):
109
+
- Byte strings (major type 2)
110
+
- Tagged values (major type 6)
111
+
- Integer map keys
112
+
- Bignums, undefined, other simple values *)
113
+
114
+
module Cbor : sig
115
+
val decode_string : 'a Codec.t -> string -> ('a, string) result
116
+
(** [decode_string codec cbor] decodes a CBOR byte string using [codec].
117
+
118
+
Uses the same codec type as JSON decoding, so you can reuse codecs:
119
+
{[
120
+
let point = Simdjsont.Cbor.decode_string point_codec cbor_bytes
121
+
]} *)
122
+
123
+
val to_seq : 'a Codec.t -> string -> ('a, string) result Seq.t
124
+
(** [to_seq codec data] parses multiple concatenated CBOR items from [data].
125
+
126
+
Returns a sequence of decode results, one per CBOR item. Useful for
127
+
processing streams of CBOR-encoded records:
128
+
{[
129
+
Simdjsont.Cbor.to_seq event_codec cbor_stream
130
+
|> Seq.iter (function
131
+
| Ok event -> process event
132
+
| Error msg -> log_error msg)
133
+
]} *)
134
+
135
+
val encode_string : 'a Codec.t -> 'a -> string
136
+
(** [encode_string codec value] encodes [value] to a CBOR byte string.
137
+
138
+
The result is a binary string containing valid CBOR. *)
139
+
140
+
val encode_to_buffer : 'a Codec.t -> 'a -> Buffer.t
141
+
(** [encode_to_buffer codec value] encodes [value] to a fresh buffer
142
+
containing CBOR bytes. *)
143
+
end
+105
lib/simdjsont_cbor_encode.ml
+105
lib/simdjsont_cbor_encode.ml
···
1
+
module Json = Simdjsont_json
2
+
3
+
let write_byte buf b = Buffer.add_char buf (Char.unsafe_chr b)
4
+
5
+
let write_uint_body buf v =
6
+
if v < 24L then write_byte buf (Int64.to_int v)
7
+
else if v <= 0xFFL then (
8
+
write_byte buf 24;
9
+
write_byte buf (Int64.to_int v))
10
+
else if v <= 0xFFFFL then (
11
+
write_byte buf 25;
12
+
let v_int = Int64.to_int v in
13
+
write_byte buf ((v_int lsr 8) land 0xFF);
14
+
write_byte buf (v_int land 0xFF))
15
+
else if v <= 0xFFFFFFFFL then (
16
+
write_byte buf 26;
17
+
write_byte buf (Int64.to_int (Int64.shift_right_logical v 24) land 0xFF);
18
+
write_byte buf (Int64.to_int (Int64.shift_right_logical v 16) land 0xFF);
19
+
write_byte buf (Int64.to_int (Int64.shift_right_logical v 8) land 0xFF);
20
+
write_byte buf (Int64.to_int v land 0xFF))
21
+
else (
22
+
write_byte buf 27;
23
+
write_byte buf (Int64.to_int (Int64.shift_right_logical v 56) land 0xFF);
24
+
write_byte buf (Int64.to_int (Int64.shift_right_logical v 48) land 0xFF);
25
+
write_byte buf (Int64.to_int (Int64.shift_right_logical v 40) land 0xFF);
26
+
write_byte buf (Int64.to_int (Int64.shift_right_logical v 32) land 0xFF);
27
+
write_byte buf (Int64.to_int (Int64.shift_right_logical v 24) land 0xFF);
28
+
write_byte buf (Int64.to_int (Int64.shift_right_logical v 16) land 0xFF);
29
+
write_byte buf (Int64.to_int (Int64.shift_right_logical v 8) land 0xFF);
30
+
write_byte buf (Int64.to_int v land 0xFF))
31
+
32
+
33
+
let write_head buf major v =
34
+
let major_bits = major lsl 5 in
35
+
if v < 24L then write_byte buf (major_bits lor Int64.to_int v)
36
+
else if v <= 0xFFL then (
37
+
write_byte buf (major_bits lor 24);
38
+
write_byte buf (Int64.to_int v))
39
+
else if v <= 0xFFFFL then (
40
+
write_byte buf (major_bits lor 25);
41
+
let v_int = Int64.to_int v in
42
+
write_byte buf ((v_int lsr 8) land 0xFF);
43
+
write_byte buf (v_int land 0xFF))
44
+
else if v <= 0xFFFFFFFFL then (
45
+
write_byte buf (major_bits lor 26);
46
+
write_byte buf (Int64.to_int (Int64.shift_right_logical v 24) land 0xFF);
47
+
write_byte buf (Int64.to_int (Int64.shift_right_logical v 16) land 0xFF);
48
+
write_byte buf (Int64.to_int (Int64.shift_right_logical v 8) land 0xFF);
49
+
write_byte buf (Int64.to_int v land 0xFF))
50
+
else (
51
+
write_byte buf (major_bits lor 27);
52
+
write_byte buf (Int64.to_int (Int64.shift_right_logical v 56) land 0xFF);
53
+
write_byte buf (Int64.to_int (Int64.shift_right_logical v 48) land 0xFF);
54
+
write_byte buf (Int64.to_int (Int64.shift_right_logical v 40) land 0xFF);
55
+
write_byte buf (Int64.to_int (Int64.shift_right_logical v 32) land 0xFF);
56
+
write_byte buf (Int64.to_int (Int64.shift_right_logical v 24) land 0xFF);
57
+
write_byte buf (Int64.to_int (Int64.shift_right_logical v 16) land 0xFF);
58
+
write_byte buf (Int64.to_int (Int64.shift_right_logical v 8) land 0xFF);
59
+
write_byte buf (Int64.to_int v land 0xFF))
60
+
61
+
let rec write buf = function
62
+
| Json.Null -> write_byte buf 0xF6
63
+
| Json.Bool false -> write_byte buf 0xF4
64
+
| Json.Bool true -> write_byte buf 0xF5
65
+
| Json.Int i ->
66
+
if i >= 0L then write_head buf 0 i else write_head buf 1 (Int64.lognot i)
67
+
| Json.Float f ->
68
+
write_byte buf 0xFB;
69
+
let i = Int64.bits_of_float f in
70
+
write_byte buf (Int64.to_int (Int64.shift_right_logical i 56) land 0xFF);
71
+
write_byte buf (Int64.to_int (Int64.shift_right_logical i 48) land 0xFF);
72
+
write_byte buf (Int64.to_int (Int64.shift_right_logical i 40) land 0xFF);
73
+
write_byte buf (Int64.to_int (Int64.shift_right_logical i 32) land 0xFF);
74
+
write_byte buf (Int64.to_int (Int64.shift_right_logical i 24) land 0xFF);
75
+
write_byte buf (Int64.to_int (Int64.shift_right_logical i 16) land 0xFF);
76
+
write_byte buf (Int64.to_int (Int64.shift_right_logical i 8) land 0xFF);
77
+
write_byte buf (Int64.to_int i land 0xFF)
78
+
| Json.String s ->
79
+
let len = String.length s in
80
+
write_head buf 3 (Int64.of_int len);
81
+
Buffer.add_string buf s
82
+
| Json.Array items ->
83
+
let len = List.length items in
84
+
write_head buf 4 (Int64.of_int len);
85
+
List.iter (write buf) items
86
+
| Json.Object members ->
87
+
let len = List.length members in
88
+
write_head buf 5 (Int64.of_int len);
89
+
List.iter
90
+
(fun (k, v) ->
91
+
let klen = String.length k in
92
+
write_head buf 3 (Int64.of_int klen);
93
+
Buffer.add_string buf k;
94
+
write buf v)
95
+
members
96
+
97
+
let to_buffer ?(capacity = 256) v =
98
+
let buf = Buffer.create capacity in
99
+
write buf v;
100
+
buf
101
+
102
+
let to_string v =
103
+
let buf = Buffer.create 256 in
104
+
write buf v;
105
+
Buffer.contents buf
+34
-2
lib/simdjsont_codec.ml
+34
-2
lib/simdjsont_codec.ml
···
12
12
13
13
type 'a decoder = string list -> Simdjsont_raw.element -> 'a
14
14
type encoder = Buffer.t -> unit
15
-
type 'a t = { decode : 'a decoder; encode : 'a -> encoder }
15
+
16
+
type 'a t = {
17
+
decode : 'a decoder;
18
+
encode : 'a -> encoder;
19
+
to_json : 'a -> Simdjsont_json.t;
20
+
}
16
21
17
22
let decode_element codec elt =
18
23
try Ok (codec.decode [] elt)
···
36
41
buf
37
42
38
43
let encode_string codec v = Buffer.contents (encode_to_buffer codec v)
44
+
let to_json codec v = codec.to_json v
39
45
40
46
let write_escaped_string buf s =
41
47
Buffer.add_char buf '"';
···
62
68
| Simdjsont_raw.Null -> ()
63
69
| _ -> decode_error path "expected null");
64
70
encode = (fun () buf -> Buffer.add_string buf "null");
71
+
to_json = (fun () -> Simdjsont_json.Null);
65
72
}
66
73
67
74
let bool : bool t =
···
73
80
| Error _ -> decode_error path "expected bool");
74
81
encode =
75
82
(fun b buf -> Buffer.add_string buf (if b then "true" else "false"));
83
+
to_json = (fun b -> Simdjsont_json.Bool b);
76
84
}
77
85
78
86
let int : int t =
···
83
91
| Ok i -> Int64.to_int i
84
92
| Error _ -> decode_error path "expected int");
85
93
encode = (fun i buf -> Buffer.add_string buf (string_of_int i));
94
+
to_json = (fun i -> Simdjsont_json.Int (Int64.of_int i));
86
95
}
87
96
88
97
let int64 : int64 t =
···
93
102
| Ok i -> i
94
103
| Error _ -> decode_error path "expected int64");
95
104
encode = (fun i buf -> Buffer.add_string buf (Int64.to_string i));
105
+
to_json = (fun i -> Simdjsont_json.Int i);
96
106
}
97
107
98
108
let float : float t =
···
119
129
if Float.is_finite f then
120
130
Buffer.add_string buf (Printf.sprintf "%.17g" f)
121
131
else Buffer.add_string buf "null");
132
+
to_json = (fun f -> Simdjsont_json.Float f);
122
133
}
123
134
124
135
let string : string t =
···
129
140
| Ok s -> s
130
141
| Error _ -> decode_error path "expected string");
131
142
encode = (fun s buf -> write_escaped_string buf s);
143
+
to_json = (fun s -> Simdjsont_json.String s);
132
144
}
133
145
134
146
let list (item : 'a t) : 'a list t =
···
160
172
item.encode v buf)
161
173
xs);
162
174
Buffer.add_char buf ']');
175
+
to_json = (fun items -> Simdjsont_json.Array (List.map item.to_json items));
163
176
}
164
177
165
178
let array (item : 'a t) : 'a array t =
···
196
209
done
197
210
end;
198
211
Buffer.add_char buf ']');
212
+
to_json =
213
+
(fun items ->
214
+
Simdjsont_json.Array (List.map item.to_json (Array.to_list items)));
199
215
}
200
216
201
217
let optional (inner : 'a t) : 'a option t =
···
210
226
match opt with
211
227
| None -> Buffer.add_string buf "null"
212
228
| Some v -> inner.encode v buf);
229
+
to_json =
230
+
(function None -> Simdjsont_json.Null | Some v -> inner.to_json v);
213
231
}
214
232
215
233
let map (f : 'a -> 'b) (g : 'b -> 'a) (codec : 'a t) : 'b t =
216
234
{
217
235
decode = (fun path elt -> f (codec.decode path elt));
218
236
encode = (fun v buf -> codec.encode (g v) buf);
237
+
to_json = (fun v -> codec.to_json (g v));
219
238
}
220
239
221
240
let decode_obj_field path (obj : Simdjsont_raw.object_) name (dec : 'a decoder)
···
239
258
type ('o, 'dec) builder = {
240
259
dec : string list -> Simdjsont_raw.object_ -> 'dec;
241
260
enc : 'o -> Buffer.t -> bool -> bool;
261
+
ast : 'o -> (string * Simdjsont_json.t) list;
242
262
}
243
263
244
264
let field constructor =
245
265
{
246
266
dec = (fun _path _obj -> constructor);
247
267
enc = (fun _v _buf first -> first);
268
+
ast = (fun _v -> []);
248
269
}
249
270
250
271
let mem name (codec : 'a t) ~enc:(get : 'o -> 'a)
···
262
283
Buffer.add_char buf ':';
263
284
codec.encode (get v) buf;
264
285
false);
286
+
ast = (fun v -> (name, codec.to_json (get v)) :: builder.ast v);
265
287
}
266
288
267
289
let opt_mem name (codec : 'a t) ~enc:(get : 'o -> 'a option)
···
282
304
Buffer.add_char buf ':';
283
305
codec.encode inner_v buf;
284
306
false);
307
+
ast =
308
+
(fun v ->
309
+
match get v with
310
+
| None -> builder.ast v
311
+
| Some inner_v -> (name, codec.to_json inner_v) :: builder.ast v);
285
312
}
286
313
287
314
let finish (builder : ('o, 'o) builder) : 'o t =
···
296
323
Buffer.add_char buf '{';
297
324
let _ = builder.enc v buf true in
298
325
Buffer.add_char buf '}');
326
+
to_json = (fun v -> Simdjsont_json.Object (builder.ast v));
299
327
}
300
328
end
301
329
···
345
373
Simdjsont_json.Object (List.rev !members)
346
374
| Error _ -> Simdjsont_json.Object [])
347
375
in
348
-
{ decode = decode_value; encode = (fun v buf -> Simdjsont_json.write buf v) }
376
+
{
377
+
decode = decode_value;
378
+
encode = (fun v buf -> Simdjsont_json.write buf v);
379
+
to_json = (fun v -> v);
380
+
}
+3
lib/simdjsont_codec.mli
+3
lib/simdjsont_codec.mli
···
36
36
val encode_string : 'a t -> 'a -> string
37
37
(** Encode a value to a JSON string using the given codec. *)
38
38
39
+
val to_json : 'a t -> 'a -> Simdjsont_json.t
40
+
(** Convert a value to JSON AST using the given codec. *)
41
+
39
42
val null : unit t
40
43
(** Codec for the JSON [null] value. *)
41
44
+72
lib/simdjsont_stubs.cpp
+72
lib/simdjsont_stubs.cpp
···
1
1
#include "simdjson_vendor.hh"
2
+
#include "simdcbor.hh"
2
3
#include <cmath>
3
4
4
5
extern "C" {
···
940
941
DocumentStreamState *state = DocStream_val(stream);
941
942
CAMLreturn(Val_long(state->stream.size_in_bytes()));
942
943
}
944
+
945
+
/* ========================================================================= */
946
+
/* CBOR operations */
947
+
/* ========================================================================= */
948
+
949
+
extern "C" CAMLprim value simdjson_parse_cbor(value parser, value buf, value len) {
950
+
CAMLparam3(parser, buf, len);
951
+
CAMLlocal3(result, element_val, some_val);
952
+
953
+
dom::parser *p = Parser_val(parser);
954
+
const uint8_t *data = (const uint8_t *)Caml_ba_data_val(buf);
955
+
size_t length = Long_val(len);
956
+
957
+
size_t bytes_read = 0;
958
+
auto error = simdcbor::parse(data, length, *p, bytes_read);
959
+
960
+
result = caml_alloc_tuple(2);
961
+
Store_field(result, 0, Val_int(error_code_to_int(error)));
962
+
963
+
if (error == SUCCESS) {
964
+
element_val = caml_alloc_custom(&element_ops, sizeof(dom::element), 0, 1);
965
+
Element_val(element_val) = simdcbor::get_root(*p);
966
+
967
+
some_val = caml_alloc(1, 0);
968
+
Store_field(some_val, 0, element_val);
969
+
Store_field(result, 1, some_val);
970
+
} else {
971
+
Store_field(result, 1, Val_none);
972
+
}
973
+
974
+
CAMLreturn(result);
975
+
}
976
+
977
+
extern "C" CAMLprim value simdjson_cbor_next(value parser, value buf, value len, value offset_val) {
978
+
CAMLparam4(parser, buf, len, offset_val);
979
+
CAMLlocal3(result, element_val, some_val);
980
+
981
+
dom::parser *p = Parser_val(parser);
982
+
const uint8_t *data = (const uint8_t *)Caml_ba_data_val(buf);
983
+
size_t length = Long_val(len);
984
+
size_t offset = Long_val(offset_val);
985
+
986
+
if (offset >= length) {
987
+
result = caml_alloc_tuple(3);
988
+
Store_field(result, 0, Val_int(error_code_to_int(EMPTY)));
989
+
Store_field(result, 1, Val_none);
990
+
Store_field(result, 2, Val_long(offset));
991
+
CAMLreturn(result);
992
+
}
993
+
994
+
size_t bytes_read = 0;
995
+
auto error = simdcbor::parse(data + offset, length - offset, *p, bytes_read);
996
+
997
+
result = caml_alloc_tuple(3);
998
+
Store_field(result, 0, Val_int(error_code_to_int(error)));
999
+
1000
+
if (error == SUCCESS) {
1001
+
element_val = caml_alloc_custom(&element_ops, sizeof(dom::element), 0, 1);
1002
+
Element_val(element_val) = simdcbor::get_root(*p);
1003
+
1004
+
some_val = caml_alloc(1, 0);
1005
+
Store_field(some_val, 0, element_val);
1006
+
Store_field(result, 1, some_val);
1007
+
Store_field(result, 2, Val_long(offset + bytes_read));
1008
+
} else {
1009
+
Store_field(result, 1, Val_none);
1010
+
Store_field(result, 2, Val_long(offset));
1011
+
}
1012
+
1013
+
CAMLreturn(result);
1014
+
}
+3
-3
simdjsont.opam
+3
-3
simdjsont.opam
···
1
1
# This file is generated by dune, edit dune-project instead
2
2
opam-version: "2.0"
3
-
version: "0.1.0"
4
-
synopsis: "JSON parsing with simdjson, with support for ndjson streaming"
3
+
version: "0.2.0"
4
+
synopsis: "JSON and CBOR parsing with simdjson, with support for streaming"
5
5
description:
6
-
"OCaml bindings to simdjson with support for ndjson streaming. Includes vendored simdjson 4.2.4."
6
+
"OCaml bindings to simdjson with support for ndjson streaming and CBOR encoding/decoding. Includes vendored simdjson 4.2.4."
7
7
maintainer: ["Gabriel Díaz"]
8
8
authors: ["Gabriel Díaz"]
9
9
license: "ISC"
+8
test/dune
+8
test/dune
+71
test/test_cbor.ml
+71
test/test_cbor.ml
···
1
+
(* Test CBOR parsing and encoding *)
2
+
3
+
let test_decode_int () =
4
+
(* CBOR encoding of integer 42: 0x18 0x2a *)
5
+
let cbor_int = "\x18\x2a" in
6
+
match Simdjsont.Cbor.decode_string Simdjsont.Codec.int cbor_int with
7
+
| Ok n -> Alcotest.(check int) "decode int 42" 42 n
8
+
| Error e -> Alcotest.fail ("decode_int failed: " ^ e)
9
+
10
+
let test_decode_small_int () =
11
+
(* Small integers 0-23 are encoded in a single byte *)
12
+
let cbor_int = "\x05" in
13
+
match Simdjsont.Cbor.decode_string Simdjsont.Codec.int cbor_int with
14
+
| Ok n -> Alcotest.(check int) "decode int 5" 5 n
15
+
| Error e -> Alcotest.fail ("decode_small_int failed: " ^ e)
16
+
17
+
let test_decode_string () =
18
+
(* CBOR text string "hello": 0x65 h e l l o *)
19
+
let cbor_str = "\x65hello" in
20
+
match Simdjsont.Cbor.decode_string Simdjsont.Codec.string cbor_str with
21
+
| Ok s -> Alcotest.(check string) "decode string" "hello" s
22
+
| Error e -> Alcotest.fail ("decode_string failed: " ^ e)
23
+
24
+
let test_decode_array () =
25
+
let cbor_arr = "\x83\x01\x02\x03" in
26
+
match Simdjsont.Cbor.decode_string Simdjsont.Codec.(list int) cbor_arr with
27
+
| Ok lst -> Alcotest.(check (list int)) "decode array" [ 1; 2; 3 ] lst
28
+
| Error e -> Alcotest.fail ("decode_array failed: " ^ e)
29
+
30
+
let test_decode_bool () =
31
+
(* CBOR true: 0xf5, false: 0xf4 *)
32
+
let cbor_true = "\xf5" in
33
+
let cbor_false = "\xf4" in
34
+
(match Simdjsont.Cbor.decode_string Simdjsont.Codec.bool cbor_true with
35
+
| Ok b -> Alcotest.(check bool) "decode true" true b
36
+
| Error e -> Alcotest.fail ("decode_true failed: " ^ e));
37
+
match Simdjsont.Cbor.decode_string Simdjsont.Codec.bool cbor_false with
38
+
| Ok b -> Alcotest.(check bool) "decode false" false b
39
+
| Error e -> Alcotest.fail ("decode_false failed: " ^ e)
40
+
41
+
let test_decode_null () =
42
+
(* CBOR null: 0xf6 *)
43
+
let cbor_null = "\xf6" in
44
+
match Simdjsont.Cbor.decode_string Simdjsont.Codec.null cbor_null with
45
+
| Ok () -> ()
46
+
| Error e -> Alcotest.fail ("decode_null failed: " ^ e)
47
+
48
+
let test_decode_map () =
49
+
(* CBOR {"a": 1}: 0xa1 0x61 a 0x01 *)
50
+
let cbor_map = "\xa1\x61a\x01" in
51
+
let codec =
52
+
Simdjsont.Codec.Obj.field (fun a -> a)
53
+
|> Simdjsont.Codec.Obj.mem "a" Simdjsont.Codec.int ~enc:Fun.id
54
+
|> Simdjsont.Codec.Obj.finish
55
+
in
56
+
match Simdjsont.Cbor.decode_string codec cbor_map with
57
+
| Ok n -> Alcotest.(check int) "decode map" 1 n
58
+
| Error e -> Alcotest.fail ("decode_map failed: " ^ e)
59
+
60
+
let cbor_tests =
61
+
[
62
+
("decode int", `Quick, test_decode_int);
63
+
("decode small int", `Quick, test_decode_small_int);
64
+
("decode string", `Quick, test_decode_string);
65
+
("decode array", `Quick, test_decode_array);
66
+
("decode bool", `Quick, test_decode_bool);
67
+
("decode null", `Quick, test_decode_null);
68
+
("decode map", `Quick, test_decode_map);
69
+
]
70
+
71
+
let () = Alcotest.run "cbor" [ ("cbor", cbor_tests) ]
+335
test/test_cbor_compliance.ml
+335
test/test_cbor_compliance.ml
···
1
+
(* RFC 8949 Appendix A - https://github.com/cbor/test-vectors *)
2
+
3
+
let hex_to_bytes hex =
4
+
let len = String.length hex in
5
+
if len mod 2 <> 0 then failwith "hex_to_bytes: odd length";
6
+
let result = Bytes.create (len / 2) in
7
+
for i = 0 to (len / 2) - 1 do
8
+
let hi = Char.code hex.[i * 2] in
9
+
let lo = Char.code hex.[(i * 2) + 1] in
10
+
let hex_val c =
11
+
if c >= Char.code '0' && c <= Char.code '9' then c - Char.code '0'
12
+
else if c >= Char.code 'a' && c <= Char.code 'f' then
13
+
c - Char.code 'a' + 10
14
+
else if c >= Char.code 'A' && c <= Char.code 'F' then
15
+
c - Char.code 'A' + 10
16
+
else
17
+
failwith
18
+
("hex_to_bytes: invalid hex char: " ^ String.make 1 (Char.chr c))
19
+
in
20
+
Bytes.set result i (Char.chr ((hex_val hi lsl 4) lor hex_val lo))
21
+
done;
22
+
Bytes.to_string result
23
+
24
+
type test_result = Pass | Fail of string | Skip of string
25
+
26
+
let passed = ref 0
27
+
let failed = ref 0
28
+
let skipped = ref 0
29
+
30
+
let run_test name f =
31
+
match f () with
32
+
| Pass ->
33
+
incr passed;
34
+
Printf.printf " ✓ %s\n%!" name
35
+
| Fail reason ->
36
+
incr failed;
37
+
Printf.printf " ✗ %s: %s\n%!" name reason
38
+
| Skip reason ->
39
+
incr skipped;
40
+
Printf.printf " ⊘ %s: %s\n%!" name reason
41
+
42
+
let decode_int hex expected =
43
+
let cbor = hex_to_bytes hex in
44
+
match Simdjsont.Cbor.decode_string Simdjsont.Codec.int64 cbor with
45
+
| Ok n when n = expected -> Pass
46
+
| Ok n -> Fail (Printf.sprintf "expected %Ld, got %Ld" expected n)
47
+
| Error e -> Fail e
48
+
49
+
let decode_float hex expected =
50
+
let cbor = hex_to_bytes hex in
51
+
match Simdjsont.Cbor.decode_string Simdjsont.Codec.float cbor with
52
+
| Ok f when f = expected -> Pass
53
+
| Ok f when Float.is_nan f && Float.is_nan expected -> Pass
54
+
| Ok f -> Fail (Printf.sprintf "expected %g, got %g" expected f)
55
+
| Error e -> Fail e
56
+
57
+
let decode_bool hex expected =
58
+
let cbor = hex_to_bytes hex in
59
+
match Simdjsont.Cbor.decode_string Simdjsont.Codec.bool cbor with
60
+
| Ok b when b = expected -> Pass
61
+
| Ok b -> Fail (Printf.sprintf "expected %b, got %b" expected b)
62
+
| Error e -> Fail e
63
+
64
+
let decode_null hex =
65
+
let cbor = hex_to_bytes hex in
66
+
match Simdjsont.Cbor.decode_string Simdjsont.Codec.null cbor with
67
+
| Ok () -> Pass
68
+
| Error e -> Fail e
69
+
70
+
let decode_string hex expected =
71
+
let cbor = hex_to_bytes hex in
72
+
match Simdjsont.Cbor.decode_string Simdjsont.Codec.string cbor with
73
+
| Ok s when s = expected -> Pass
74
+
| Ok s -> Fail (Printf.sprintf "expected %S, got %S" expected s)
75
+
| Error e -> Fail e
76
+
77
+
let decode_int_list hex expected =
78
+
let cbor = hex_to_bytes hex in
79
+
match Simdjsont.Cbor.decode_string Simdjsont.Codec.(list int) cbor with
80
+
| Ok lst when lst = expected -> Pass
81
+
| Ok lst ->
82
+
Fail
83
+
(Printf.sprintf "expected [%s], got [%s]"
84
+
(String.concat "; " (List.map string_of_int expected))
85
+
(String.concat "; " (List.map string_of_int lst)))
86
+
| Error e -> Fail e
87
+
88
+
let decode_json hex expected =
89
+
let cbor = hex_to_bytes hex in
90
+
match Simdjsont.Cbor.decode_string Simdjsont.Codec.value cbor with
91
+
| Ok v when v = expected -> Pass
92
+
| Ok v ->
93
+
Fail
94
+
(Printf.sprintf "expected %s, got %s"
95
+
(Simdjsont.Json.to_string expected)
96
+
(Simdjsont.Json.to_string v))
97
+
| Error e -> Fail e
98
+
99
+
let test_unsigned_integers () =
100
+
Printf.printf "\n=== Unsigned Integers ===\n%!";
101
+
102
+
run_test "0 (0x00)" (fun () -> decode_int "00" 0L);
103
+
run_test "1 (0x01)" (fun () -> decode_int "01" 1L);
104
+
run_test "10 (0x0a)" (fun () -> decode_int "0a" 10L);
105
+
run_test "23 (0x17)" (fun () -> decode_int "17" 23L);
106
+
run_test "24 (0x1818)" (fun () -> decode_int "1818" 24L);
107
+
run_test "25 (0x1819)" (fun () -> decode_int "1819" 25L);
108
+
run_test "100 (0x1864)" (fun () -> decode_int "1864" 100L);
109
+
run_test "1000 (0x1903e8)" (fun () -> decode_int "1903e8" 1000L);
110
+
run_test "1000000 (0x1a000f4240)" (fun () -> decode_int "1a000f4240" 1000000L);
111
+
run_test "1000000000000 (0x1b000000e8d4a51000)" (fun () ->
112
+
decode_int "1b000000e8d4a51000" 1000000000000L);
113
+
run_test "18446744073709551615 (0x1bffffffffffffffff)" (fun () ->
114
+
Skip "uint64 max exceeds int64 range")
115
+
116
+
let test_negative_integers () =
117
+
Printf.printf "\n=== Negative Integers ===\n%!";
118
+
119
+
run_test "-1 (0x20)" (fun () -> decode_int "20" (-1L));
120
+
run_test "-10 (0x29)" (fun () -> decode_int "29" (-10L));
121
+
run_test "-100 (0x3863)" (fun () -> decode_int "3863" (-100L));
122
+
run_test "-1000 (0x3903e7)" (fun () -> decode_int "3903e7" (-1000L))
123
+
124
+
let test_floats () =
125
+
Printf.printf "\n=== Floating Point ===\n%!";
126
+
127
+
run_test "0.0 half (0xf90000)" (fun () -> decode_float "f90000" 0.0);
128
+
run_test "-0.0 half (0xf98000)" (fun () -> decode_float "f98000" (-0.0));
129
+
run_test "1.0 half (0xf93c00)" (fun () -> decode_float "f93c00" 1.0);
130
+
run_test "1.5 half (0xf93e00)" (fun () -> decode_float "f93e00" 1.5);
131
+
run_test "65504.0 half (0xf97bff)" (fun () -> decode_float "f97bff" 65504.0);
132
+
run_test "-4.0 half (0xf9c400)" (fun () -> decode_float "f9c400" (-4.0));
133
+
134
+
run_test "100000.0 single (0xfa47c35000)" (fun () ->
135
+
decode_float "fa47c35000" 100000.0);
136
+
run_test "3.4028234663852886e+38 single (0xfa7f7fffff)" (fun () ->
137
+
decode_float "fa7f7fffff" 3.4028234663852886e+38);
138
+
139
+
run_test "1.1 double (0xfb3ff199999999999a)" (fun () ->
140
+
decode_float "fb3ff199999999999a" 1.1);
141
+
run_test "1.0e+300 double (0xfb7e37e43c8800759c)" (fun () ->
142
+
decode_float "fb7e37e43c8800759c" 1.0e+300);
143
+
run_test "-4.1 double (0xfbc010666666666666)" (fun () ->
144
+
decode_float "fbc010666666666666" (-4.1));
145
+
146
+
run_test "Infinity half (0xf97c00)" (fun () ->
147
+
decode_float "f97c00" Float.infinity);
148
+
run_test "-Infinity half (0xf9fc00)" (fun () ->
149
+
decode_float "f9fc00" Float.neg_infinity);
150
+
run_test "NaN half (0xf97e00)" (fun () -> decode_float "f97e00" Float.nan);
151
+
run_test "Infinity single (0xfa7f800000)" (fun () ->
152
+
decode_float "fa7f800000" Float.infinity);
153
+
run_test "-Infinity single (0xfaff800000)" (fun () ->
154
+
decode_float "faff800000" Float.neg_infinity);
155
+
run_test "NaN single (0xfa7fc00000)" (fun () ->
156
+
decode_float "fa7fc00000" Float.nan);
157
+
run_test "Infinity double (0xfb7ff0000000000000)" (fun () ->
158
+
decode_float "fb7ff0000000000000" Float.infinity);
159
+
run_test "-Infinity double (0xfbfff0000000000000)" (fun () ->
160
+
decode_float "fbfff0000000000000" Float.neg_infinity);
161
+
run_test "NaN double (0xfb7ff8000000000000)" (fun () ->
162
+
decode_float "fb7ff8000000000000" Float.nan);
163
+
164
+
run_test "5.960464477539063e-08 half (0xf90001)" (fun () ->
165
+
decode_float "f90001" 5.960464477539063e-08);
166
+
run_test "6.103515625e-05 half (0xf90400)" (fun () ->
167
+
decode_float "f90400" 6.103515625e-05)
168
+
169
+
let test_booleans () =
170
+
Printf.printf "\n=== Booleans ===\n%!";
171
+
172
+
run_test "false (0xf4)" (fun () -> decode_bool "f4" false);
173
+
run_test "true (0xf5)" (fun () -> decode_bool "f5" true)
174
+
175
+
let test_null_undefined () =
176
+
Printf.printf "\n=== Null and Undefined ===\n%!";
177
+
178
+
run_test "null (0xf6)" (fun () -> decode_null "f6");
179
+
run_test "undefined (0xf7)" (fun () ->
180
+
Skip "undefined not supported in JSON model")
181
+
182
+
let test_simple_values () =
183
+
Printf.printf "\n=== Simple Values ===\n%!";
184
+
185
+
run_test "simple(16) (0xf0)" (fun () -> Skip "simple values not supported");
186
+
run_test "simple(24) (0xf818)" (fun () -> Skip "simple values not supported");
187
+
run_test "simple(255) (0xf8ff)" (fun () -> Skip "simple values not supported")
188
+
189
+
let test_tags () =
190
+
Printf.printf "\n=== Tags ===\n%!";
191
+
192
+
run_test "tag 0 datetime (0xc074...)" (fun () -> Skip "tags not supported");
193
+
run_test "tag 1 epoch (0xc11a514b67b0)" (fun () -> Skip "tags not supported");
194
+
run_test "tag 23 base16 (0xd74401020304)" (fun () ->
195
+
Skip "tags not supported");
196
+
run_test "tag 24 embedded CBOR (0xd818...)" (fun () ->
197
+
Skip "tags not supported");
198
+
run_test "tag 32 URI (0xd82076...)" (fun () -> Skip "tags not supported")
199
+
200
+
let test_bignum () =
201
+
Printf.printf "\n=== Bignum (Tags 2/3) ===\n%!";
202
+
203
+
run_test "bignum 18446744073709551616 (0xc249...)" (fun () ->
204
+
Skip "bignum tags not supported");
205
+
run_test "bignum -18446744073709551617 (0xc349...)" (fun () ->
206
+
Skip "bignum tags not supported");
207
+
run_test "-18446744073709551616 (0x3bffffffffffffffff)" (fun () ->
208
+
Skip "exceeds int64 range")
209
+
210
+
let test_byte_strings () =
211
+
Printf.printf "\n=== Byte Strings ===\n%!";
212
+
213
+
run_test "empty byte string h'' (0x40)" (fun () ->
214
+
Skip "byte strings not in JSON model");
215
+
run_test "byte string h'01020304' (0x4401020304)" (fun () ->
216
+
Skip "byte strings not in JSON model");
217
+
run_test "indefinite byte string (0x5f42010243030405ff)" (fun () ->
218
+
Skip "byte strings not in JSON model")
219
+
220
+
let test_text_strings () =
221
+
Printf.printf "\n=== Text Strings ===\n%!";
222
+
223
+
run_test "empty string \"\" (0x60)" (fun () -> decode_string "60" "");
224
+
run_test "\"a\" (0x6161)" (fun () -> decode_string "6161" "a");
225
+
run_test "\"IETF\" (0x6449455446)" (fun () ->
226
+
decode_string "6449455446" "IETF");
227
+
run_test "\"\\\"\\\\\" (0x62225c)" (fun () -> decode_string "62225c" "\"\\");
228
+
run_test "\"ü\" UTF-8 (0x62c3bc)" (fun () -> decode_string "62c3bc" "ü");
229
+
run_test "\"水\" UTF-8 (0x63e6b0b4)" (fun () -> decode_string "63e6b0b4" "水");
230
+
run_test "\"𐅑\" UTF-8 4-byte (0x64f0908591)" (fun () ->
231
+
decode_string "64f0908591" "𐅑");
232
+
run_test "indefinite \"streaming\" (0x7f657374726561646d696e67ff)" (fun () ->
233
+
decode_string "7f657374726561646d696e67ff" "streaming")
234
+
235
+
let test_arrays () =
236
+
Printf.printf "\n=== Arrays ===\n%!";
237
+
238
+
run_test "[] empty (0x80)" (fun () -> decode_int_list "80" []);
239
+
run_test "[1,2,3] (0x83010203)" (fun () ->
240
+
decode_int_list "83010203" [ 1; 2; 3 ]);
241
+
run_test "[1,[2,3],[4,5]] nested (0x8301820203820405)" (fun () ->
242
+
let open Simdjsont.Json in
243
+
decode_json "8301820203820405"
244
+
(Array [ Int 1L; Array [ Int 2L; Int 3L ]; Array [ Int 4L; Int 5L ] ]));
245
+
run_test "[1..25] long array (0x98190102...)" (fun () ->
246
+
decode_int_list
247
+
"98190102030405060708090a0b0c0d0e0f101112131415161718181819"
248
+
(List.init 25 (fun i -> i + 1)));
249
+
run_test "indefinite [] (0x9fff)" (fun () -> decode_int_list "9fff" []);
250
+
run_test "indefinite [1,[2,3],[4,5]] (0x9f018202039f0405ffff)" (fun () ->
251
+
let open Simdjsont.Json in
252
+
decode_json "9f018202039f0405ffff"
253
+
(Array [ Int 1L; Array [ Int 2L; Int 3L ]; Array [ Int 4L; Int 5L ] ]));
254
+
run_test "mixed definite/indefinite (0x9f01820203820405ff)" (fun () ->
255
+
let open Simdjsont.Json in
256
+
decode_json "9f01820203820405ff"
257
+
(Array [ Int 1L; Array [ Int 2L; Int 3L ]; Array [ Int 4L; Int 5L ] ]));
258
+
run_test "definite outer, indefinite inner (0x83018202039f0405ff)" (fun () ->
259
+
let open Simdjsont.Json in
260
+
decode_json "83018202039f0405ff"
261
+
(Array [ Int 1L; Array [ Int 2L; Int 3L ]; Array [ Int 4L; Int 5L ] ]));
262
+
run_test "indefinite outer, definite inner (0x83019f0203ff820405)" (fun () ->
263
+
let open Simdjsont.Json in
264
+
decode_json "83019f0203ff820405"
265
+
(Array [ Int 1L; Array [ Int 2L; Int 3L ]; Array [ Int 4L; Int 5L ] ]));
266
+
run_test "indefinite [1..25] (0x9f0102...19ff)" (fun () ->
267
+
decode_int_list
268
+
"9f0102030405060708090a0b0c0d0e0f101112131415161718181819ff"
269
+
(List.init 25 (fun i -> i + 1)))
270
+
271
+
let test_maps () =
272
+
Printf.printf "\n=== Maps ===\n%!";
273
+
274
+
run_test "{} empty (0xa0)" (fun () ->
275
+
decode_json "a0" (Simdjsont.Json.Object []));
276
+
run_test "{1:2, 3:4} int keys (0xa201020304)" (fun () ->
277
+
Skip "integer keys not supported in JSON model");
278
+
run_test "{\"a\":1, \"b\":[2,3]} (0xa26161016162820203)" (fun () ->
279
+
let open Simdjsont.Json in
280
+
decode_json "a26161016162820203"
281
+
(Object [ ("a", Int 1L); ("b", Array [ Int 2L; Int 3L ]) ]));
282
+
run_test "[\"a\", {\"b\":\"c\"}] (0x826161a161626163)" (fun () ->
283
+
let open Simdjsont.Json in
284
+
decode_json "826161a161626163"
285
+
(Array [ String "a"; Object [ ("b", String "c") ] ]));
286
+
run_test "{\"a\"..\"e\": \"A\"..\"E\"} 5 pairs (0xa56161614161626142...)"
287
+
(fun () ->
288
+
let open Simdjsont.Json in
289
+
decode_json "a56161614161626142616361436164614461656145"
290
+
(Object
291
+
[
292
+
("a", String "A");
293
+
("b", String "B");
294
+
("c", String "C");
295
+
("d", String "D");
296
+
("e", String "E");
297
+
]));
298
+
run_test "indefinite {\"a\":1, \"b\":[2,3]} (0xbf61610161629f0203ffff)"
299
+
(fun () ->
300
+
let open Simdjsont.Json in
301
+
decode_json "bf61610161629f0203ffff"
302
+
(Object [ ("a", Int 1L); ("b", Array [ Int 2L; Int 3L ]) ]));
303
+
run_test "[\"a\", indef {\"b\":\"c\"}] (0x826161bf61626163ff)" (fun () ->
304
+
let open Simdjsont.Json in
305
+
decode_json "826161bf61626163ff"
306
+
(Array [ String "a"; Object [ ("b", String "c") ] ]));
307
+
run_test "indefinite {\"Fun\":true, \"Amt\":-2} (0xbf6346756ef563416d7421ff)"
308
+
(fun () ->
309
+
let open Simdjsont.Json in
310
+
decode_json "bf6346756ef563416d7421ff"
311
+
(Object [ ("Fun", Bool true); ("Amt", Int (-2L)) ]))
312
+
313
+
let () =
314
+
Printf.printf "RFC 8949 Appendix A Compliance Tests\n";
315
+
Printf.printf "=====================================\n%!";
316
+
317
+
test_unsigned_integers ();
318
+
test_negative_integers ();
319
+
test_floats ();
320
+
test_booleans ();
321
+
test_null_undefined ();
322
+
test_simple_values ();
323
+
test_tags ();
324
+
test_bignum ();
325
+
test_byte_strings ();
326
+
test_text_strings ();
327
+
test_arrays ();
328
+
test_maps ();
329
+
330
+
Printf.printf "\n=====================================\n";
331
+
Printf.printf "Results: %d passed, %d failed, %d skipped\n%!" !passed !failed
332
+
!skipped;
333
+
Printf.printf "Total: %d tests\n%!" (!passed + !failed + !skipped);
334
+
335
+
if !failed > 0 then exit 1 else exit 0
+254
test/test_memory.ml
+254
test/test_memory.ml
···
390
390
(Printf.sprintf "RSS grew by %dKB in second half - possible C memory leak"
391
391
growth_mid_to_after)
392
392
393
+
let test_cbor_decode_lifecycle () =
394
+
Printf.printf "CBOR decode lifecycle test:\n";
395
+
396
+
let cbor_data = "\xa2\x61a\x01\x61b\x82\x02\x03" in
397
+
398
+
let initial_mem = get_mem_usage () in
399
+
400
+
for _ = 1 to 10000 do
401
+
let _ = Simdjsont.Cbor.decode_string Simdjsont.Codec.value cbor_data in
402
+
()
403
+
done;
404
+
405
+
let final_mem = get_mem_usage () in
406
+
let diff = final_mem - initial_mem in
407
+
408
+
Printf.printf " Initial: %d bytes\n" initial_mem;
409
+
Printf.printf " Final: %d bytes\n" final_mem;
410
+
Printf.printf " Diff: %d bytes\n" diff;
411
+
412
+
if diff > 1_000_000 then
413
+
Alcotest.fail
414
+
(Printf.sprintf "Memory grew by %d bytes - possible leak" diff)
415
+
416
+
let test_cbor_encode_lifecycle () =
417
+
Printf.printf "CBOR encode lifecycle test:\n";
418
+
419
+
let data = [ 1; 2; 3; 4; 5 ] in
420
+
421
+
let initial_mem = get_mem_usage () in
422
+
423
+
for _ = 1 to 10000 do
424
+
let _ = Simdjsont.Cbor.encode_string Simdjsont.Codec.(list int) data in
425
+
()
426
+
done;
427
+
428
+
let final_mem = get_mem_usage () in
429
+
let diff = final_mem - initial_mem in
430
+
431
+
Printf.printf " Initial: %d bytes\n" initial_mem;
432
+
Printf.printf " Final: %d bytes\n" final_mem;
433
+
Printf.printf " Diff: %d bytes\n" diff;
434
+
435
+
if diff > 1_000_000 then
436
+
Alcotest.fail
437
+
(Printf.sprintf "Memory grew by %d bytes - possible leak" diff)
438
+
439
+
let test_cbor_roundtrip_lifecycle () =
440
+
Printf.printf "CBOR roundtrip lifecycle test:\n";
441
+
442
+
let codec = Simdjsont.Codec.(list (list int)) in
443
+
let data = [ [ 1; 2; 3 ]; [ 4; 5; 6 ]; [ 7; 8; 9 ] ] in
444
+
445
+
let initial_mem = get_mem_usage () in
446
+
447
+
for _ = 1 to 10000 do
448
+
let cbor = Simdjsont.Cbor.encode_string codec data in
449
+
let _ = Simdjsont.Cbor.decode_string codec cbor in
450
+
()
451
+
done;
452
+
453
+
let final_mem = get_mem_usage () in
454
+
let diff = final_mem - initial_mem in
455
+
456
+
Printf.printf " Initial: %d bytes\n" initial_mem;
457
+
Printf.printf " Final: %d bytes\n" final_mem;
458
+
Printf.printf " Diff: %d bytes\n" diff;
459
+
460
+
if diff > 1_000_000 then
461
+
Alcotest.fail
462
+
(Printf.sprintf "Memory grew by %d bytes - possible leak" diff)
463
+
464
+
let test_cbor_complex_structures () =
465
+
Printf.printf "CBOR complex structures lifecycle test:\n";
466
+
467
+
let codec =
468
+
let open Simdjsont.Codec in
469
+
Obj.field (fun id name items -> (id, name, items))
470
+
|> Obj.mem "id" int ~enc:(fun (id, _, _) -> id)
471
+
|> Obj.mem "name" string ~enc:(fun (_, name, _) -> name)
472
+
|> Obj.mem "items" (list int) ~enc:(fun (_, _, items) -> items)
473
+
|> Obj.finish
474
+
in
475
+
476
+
let initial_mem = get_mem_usage () in
477
+
478
+
for i = 1 to 10000 do
479
+
let data = (i, Printf.sprintf "item%d" i, [ i; i + 1; i + 2 ]) in
480
+
let cbor = Simdjsont.Cbor.encode_string codec data in
481
+
let _ = Simdjsont.Cbor.decode_string codec cbor in
482
+
()
483
+
done;
484
+
485
+
let final_mem = get_mem_usage () in
486
+
let diff = final_mem - initial_mem in
487
+
488
+
Printf.printf " Initial: %d bytes\n" initial_mem;
489
+
Printf.printf " Final: %d bytes\n" final_mem;
490
+
Printf.printf " Diff: %d bytes\n" diff;
491
+
492
+
if diff > 1_000_000 then
493
+
Alcotest.fail
494
+
(Printf.sprintf "Memory grew by %d bytes - possible leak" diff)
495
+
496
+
let test_cbor_streaming_lifecycle () =
497
+
Printf.printf "CBOR streaming lifecycle test:\n";
498
+
499
+
let item1 = "\x83\x01\x02\x03" in
500
+
let item2 = "\x83\x04\x05\x06" in
501
+
let item3 = "\x83\x07\x08\x09" in
502
+
let multi_cbor = item1 ^ item2 ^ item3 in
503
+
504
+
let initial_mem = get_mem_usage () in
505
+
506
+
for _ = 1 to 5000 do
507
+
let seq = Simdjsont.Cbor.to_seq Simdjsont.Codec.(list int) multi_cbor in
508
+
Seq.iter (function Ok _ -> () | Error _ -> ()) seq
509
+
done;
510
+
511
+
let final_mem = get_mem_usage () in
512
+
let diff = final_mem - initial_mem in
513
+
514
+
Printf.printf " Initial: %d bytes\n" initial_mem;
515
+
Printf.printf " Final: %d bytes\n" final_mem;
516
+
Printf.printf " Diff: %d bytes\n" diff;
517
+
518
+
if diff > 1_000_000 then
519
+
Alcotest.fail
520
+
(Printf.sprintf "Memory grew by %d bytes - possible leak" diff)
521
+
522
+
let test_cbor_decode_discard_no_growth () =
523
+
Printf.printf "CBOR decode-and-discard memory growth test:\n";
524
+
525
+
let make_cbor i =
526
+
let codec =
527
+
let open Simdjsont.Codec in
528
+
Obj.field (fun id data -> (id, data))
529
+
|> Obj.mem "id" int ~enc:fst
530
+
|> Obj.mem "data" (list int) ~enc:snd
531
+
|> Obj.finish
532
+
in
533
+
Simdjsont.Cbor.encode_string codec (i, [ 1; 2; 3; 4; 5 ])
534
+
in
535
+
536
+
Gc.compact ();
537
+
let rss_before = get_rss_kb () in
538
+
539
+
for i = 1 to 50000 do
540
+
let cbor = make_cbor i in
541
+
match Simdjsont.Cbor.decode_string Simdjsont.Codec.value cbor with
542
+
| Ok _ -> ()
543
+
| Error _ -> ()
544
+
done;
545
+
546
+
Gc.compact ();
547
+
let rss_mid = get_rss_kb () in
548
+
549
+
for i = 50001 to 100000 do
550
+
let cbor = make_cbor i in
551
+
match Simdjsont.Cbor.decode_string Simdjsont.Codec.value cbor with
552
+
| Ok _ -> ()
553
+
| Error _ -> ()
554
+
done;
555
+
556
+
Gc.compact ();
557
+
let rss_after = get_rss_kb () in
558
+
559
+
Printf.printf " RSS: before=%dKB mid=%dKB after=%dKB\n" rss_before
560
+
rss_mid rss_after;
561
+
562
+
let growth_mid_to_after = rss_after - rss_mid in
563
+
Printf.printf " RSS growth (mid->after): %dKB\n" growth_mid_to_after;
564
+
565
+
if growth_mid_to_after > 10000 then
566
+
Alcotest.fail
567
+
(Printf.sprintf "RSS grew by %dKB in second half - possible C memory leak"
568
+
growth_mid_to_after)
569
+
570
+
let test_cbor_half_float_lifecycle () =
571
+
Printf.printf "CBOR half-precision float lifecycle test:\n";
572
+
573
+
let half_floats =
574
+
[
575
+
"\xf9\x3c\x00";
576
+
"\xf9\x3e\x00";
577
+
"\xf9\x7b\xff";
578
+
"\xf9\xc4\x00";
579
+
"\xf9\x7c\x00";
580
+
"\xf9\xfc\x00";
581
+
]
582
+
in
583
+
584
+
let initial_mem = get_mem_usage () in
585
+
586
+
for _ = 1 to 10000 do
587
+
List.iter
588
+
(fun cbor ->
589
+
let _ = Simdjsont.Cbor.decode_string Simdjsont.Codec.float cbor in
590
+
())
591
+
half_floats
592
+
done;
593
+
594
+
let final_mem = get_mem_usage () in
595
+
let diff = final_mem - initial_mem in
596
+
597
+
Printf.printf " Initial: %d bytes\n" initial_mem;
598
+
Printf.printf " Final: %d bytes\n" final_mem;
599
+
Printf.printf " Diff: %d bytes\n" diff;
600
+
601
+
if diff > 1_000_000 then
602
+
Alcotest.fail
603
+
(Printf.sprintf "Memory grew by %d bytes - possible leak" diff)
604
+
605
+
let test_cbor_indefinite_lifecycle () =
606
+
Printf.printf "CBOR indefinite-length lifecycle test:\n";
607
+
608
+
let indef_array = "\x9f\x01\x02\x03\x04\x05\xff" in
609
+
let indef_map = "\xbf\x61a\x01\x61b\x02\xff" in
610
+
let indef_string = "\x7f\x63foo\x63bar\xff" in
611
+
612
+
let initial_mem = get_mem_usage () in
613
+
614
+
for _ = 1 to 10000 do
615
+
let _ =
616
+
Simdjsont.Cbor.decode_string Simdjsont.Codec.(list int) indef_array
617
+
in
618
+
let _ = Simdjsont.Cbor.decode_string Simdjsont.Codec.value indef_map in
619
+
let _ = Simdjsont.Cbor.decode_string Simdjsont.Codec.string indef_string in
620
+
()
621
+
done;
622
+
623
+
let final_mem = get_mem_usage () in
624
+
let diff = final_mem - initial_mem in
625
+
626
+
Printf.printf " Initial: %d bytes\n" initial_mem;
627
+
Printf.printf " Final: %d bytes\n" final_mem;
628
+
Printf.printf " Diff: %d bytes\n" diff;
629
+
630
+
if diff > 1_000_000 then
631
+
Alcotest.fail
632
+
(Printf.sprintf "Memory grew by %d bytes - possible leak" diff)
633
+
393
634
let () =
394
635
Printf.printf "=== Memory Leak Tests ===\n\n";
395
636
···
411
652
( "raw parse discard no growth",
412
653
`Slow,
413
654
test_raw_parse_discard_no_growth );
655
+
] );
656
+
( "cbor",
657
+
[
658
+
("cbor decode lifecycle", `Quick, test_cbor_decode_lifecycle);
659
+
("cbor encode lifecycle", `Quick, test_cbor_encode_lifecycle);
660
+
("cbor roundtrip lifecycle", `Quick, test_cbor_roundtrip_lifecycle);
661
+
("cbor complex structures", `Quick, test_cbor_complex_structures);
662
+
("cbor streaming lifecycle", `Quick, test_cbor_streaming_lifecycle);
663
+
( "cbor decode discard no growth",
664
+
`Slow,
665
+
test_cbor_decode_discard_no_growth );
666
+
("cbor half float lifecycle", `Quick, test_cbor_half_float_lifecycle);
667
+
("cbor indefinite lifecycle", `Quick, test_cbor_indefinite_lifecycle);
414
668
] );
415
669
]