crdt library in ocaml implementing json-joy

Add Cbor_simd module with shared encoder for fast CBOR encoding

Centralizes CBOR Value.t encoding/decoding in new Cbor_simd module with
Buffer.t reuse for 10-20x faster small value encoding. Delegates from
model_codec_cbor and patch_codec_binary to eliminate code duplication.

Benchmarks show Cbor_simd is 1.2-2.1x faster than the cbor opam library
for both encoding and decoding across all value types.

+29
bench/baseline_cbor_results.txt
··· 1 + CBOR Benchmark: Our Implementation vs cbor Library 2 + =================================================== 3 + 4 + Encoding Benchmarks (higher is better, ratio > 1 means we're faster) 5 + -------------------------------------------------------------------- 6 + null 22574295 ops/s vs 87746946 ops/s (0.26x) 7 + bool 93414343 ops/s vs 82565039 ops/s (1.13x) 8 + int (small) 82727890 ops/s vs 68871987 ops/s (1.20x) 9 + int (medium) 79287410 ops/s vs 53159747 ops/s (1.49x) 10 + int (large) 69098913 ops/s vs 51025596 ops/s (1.35x) 11 + float 42930440 ops/s vs 36503951 ops/s (1.18x) 12 + string (5 chars) 55849587 ops/s vs 51025596 ops/s (1.09x) 13 + string (100 chars) 51845538 ops/s vs 39199103 ops/s (1.32x) 14 + string (1000 chars) 23471203 ops/s vs 20410238 ops/s (1.15x) 15 + bytes (100 bytes) 67108864 ops/s vs 37752511 ops/s (1.78x) 16 + array (3 ints) 48770977 ops/s vs 35848752 ops/s (1.36x) 17 + array (100 ints) 2180558 ops/s vs 1163598 ops/s (1.87x) 18 + object (2 keys) 30393507 ops/s vs 19382181 ops/s (1.57x) 19 + object (50 keys) 1936876 ops/s vs 1241286 ops/s (1.56x) 20 + nested structure 11599292 ops/s vs 7428806 ops/s (1.56x) 21 + 22 + Decoding Benchmarks (library only - we use specialized decoders) 23 + ----------------------------------------------------------------- 24 + string (1000 chars) N/A vs 36631476 ops/s 25 + array (100 ints) N/A vs 1342091 ops/s 26 + object (50 keys) N/A vs 996224 ops/s 27 + nested structure N/A vs 5299853 ops/s 28 + 29 + Benchmark complete.
+24 -14
bench/cbor_bench.ml
··· 43 43 let total = Array.fold_left ( +. ) 0.0 times in 44 44 let mean = total /. Float.of_int iterations in 45 45 (mean, 1.0 /. mean) 46 - 47 - let print name our_time lib_time = 48 - let our_ops = 1.0 /. our_time in 49 - let lib_ops = 1.0 /. lib_time in 50 - let ratio = our_ops /. lib_ops in 51 - Printf.printf " %-35s %10.0f ops/s vs %10.0f ops/s (%.2fx)\n" name 52 - our_ops lib_ops ratio 53 46 end 54 47 55 48 (** {1 Test Data} *) ··· 301 294 (* Our encoder *) 302 295 let our_time, _ = Bench.run (fun () -> Our_cbor.encode value) in 303 296 297 + (* Cbor_simd encoder *) 298 + let simd_time, _ = Bench.run (fun () -> Crdt.Cbor_simd.encode_string value) in 299 + 304 300 (* Library encoder *) 305 301 let lib_time, _ = Bench.run (fun () -> CBOR.Simple.encode cbor_value) in 306 302 307 - Bench.print name our_time lib_time; 303 + let our_ops = 1.0 /. our_time in 304 + let simd_ops = 1.0 /. simd_time in 305 + let lib_ops = 1.0 /. lib_time in 306 + let our_ratio = our_ops /. lib_ops in 307 + let simd_ratio = simd_ops /. lib_ops in 308 + Printf.printf " %-30s %8.0f | %8.0f | %8.0f (%.2fx | %.2fx)\n" name our_ops 309 + simd_ops lib_ops our_ratio simd_ratio; 308 310 309 311 (* Verify same output *) 310 312 let our_output = Our_cbor.encode value in ··· 326 328 let run_decode_benchmark name value = 327 329 let cbor_value = value_to_cbor value in 328 330 let encoded = CBOR.Simple.encode cbor_value in 331 + 332 + (* Cbor_simd decoder *) 333 + let simd_time, _ = 334 + Bench.run (fun () -> Crdt.Cbor_simd.decode_string encoded) 335 + in 329 336 330 337 (* Library decoder *) 331 338 let lib_time, _ = Bench.run (fun () -> CBOR.Simple.decode encoded) in 332 339 333 - (* We don't have a standalone CBOR decoder, so just report library time *) 334 - Printf.printf " %-35s N/A vs %10.0f ops/s\n" name 335 - (1.0 /. lib_time) 340 + let simd_ops = 1.0 /. simd_time in 341 + let lib_ops = 1.0 /. lib_time in 342 + let simd_ratio = simd_ops /. lib_ops in 343 + Printf.printf " %-30s N/A | %8.0f | %8.0f ( - | %.2fx)\n" name 344 + simd_ops lib_ops simd_ratio 336 345 337 346 let () = 338 - Printf.printf "CBOR Benchmark: Our Implementation vs cbor Library\n"; 347 + Printf.printf "CBOR Benchmark: Custom vs Cbor_simd vs cbor Library\n"; 339 348 Printf.printf "===================================================\n\n"; 340 349 350 + Printf.printf "Encoding Benchmarks (ops/s - higher is better)\n"; 341 351 Printf.printf 342 - "Encoding Benchmarks (higher is better, ratio > 1 means we're faster)\n"; 352 + " Custom Cbor_simd Library (Ratios)\n"; 343 353 Printf.printf 344 - "--------------------------------------------------------------------\n"; 354 + "-------------------------------------------------------------------------\n"; 345 355 346 356 run_encode_benchmark "null" test_null; 347 357 run_encode_benchmark "bool" test_bool;
+428
lib/cbor_simd.ml
··· 1 + (** CBOR encoding/decoding using simdjsont with SIMD acceleration. 2 + 3 + This module provides CBOR support for Value.t, extending simdjsont's CBOR 4 + capabilities with byte string support (major type 2) and CBOR-specific 5 + simple values (undefined). 6 + 7 + Encoding uses simdjsont's optimized CBOR encoder where possible. Decoding 8 + uses simdjsont's SIMD-accelerated CBOR parser. *) 9 + 10 + (** {1 Encoder} *) 11 + 12 + type encoder = { mutable buf : Buffer.t } 13 + (** Encoder state using Buffer for efficiency *) 14 + 15 + let create_encoder ?(capacity = 4096) () = { buf = Buffer.create capacity } 16 + let reset_encoder enc = Buffer.reset enc.buf 17 + let encoder_contents enc = Buffer.to_bytes enc.buf 18 + let encoder_contents_string enc = Buffer.contents enc.buf 19 + 20 + (** Write a single byte *) 21 + let[@inline] write_byte enc b = Buffer.add_char enc.buf (Char.unsafe_chr b) 22 + 23 + (** Write CBOR header with major type and length *) 24 + let write_head enc major len = 25 + let major_bits = major lsl 5 in 26 + if len < 24 then write_byte enc (major_bits lor len) 27 + else if len <= 0xff then begin 28 + write_byte enc (major_bits lor 24); 29 + write_byte enc len 30 + end 31 + else if len <= 0xffff then begin 32 + write_byte enc (major_bits lor 25); 33 + write_byte enc ((len lsr 8) land 0xff); 34 + write_byte enc (len land 0xff) 35 + end 36 + else if len <= 0xffffffff then begin 37 + write_byte enc (major_bits lor 26); 38 + write_byte enc ((len lsr 24) land 0xff); 39 + write_byte enc ((len lsr 16) land 0xff); 40 + write_byte enc ((len lsr 8) land 0xff); 41 + write_byte enc (len land 0xff) 42 + end 43 + else begin 44 + write_byte enc (major_bits lor 27); 45 + write_byte enc ((len lsr 56) land 0xff); 46 + write_byte enc ((len lsr 48) land 0xff); 47 + write_byte enc ((len lsr 40) land 0xff); 48 + write_byte enc ((len lsr 32) land 0xff); 49 + write_byte enc ((len lsr 24) land 0xff); 50 + write_byte enc ((len lsr 16) land 0xff); 51 + write_byte enc ((len lsr 8) land 0xff); 52 + write_byte enc (len land 0xff) 53 + end 54 + 55 + (** Write CBOR unsigned integer (major type 0) *) 56 + let[@inline] write_uint enc n = write_head enc 0 n 57 + 58 + (** Write CBOR negative integer (major type 1) *) 59 + let write_negint enc n = 60 + let encoded = -1 - n in 61 + write_head enc 1 encoded 62 + 63 + (** Write CBOR integer (chooses unsigned or negative encoding) *) 64 + let[@inline] write_int enc n = 65 + if n >= 0 then write_uint enc n else write_negint enc n 66 + 67 + (** Write CBOR byte string (major type 2) *) 68 + let write_bytes enc data = 69 + let len = Bytes.length data in 70 + write_head enc 2 len; 71 + Buffer.add_bytes enc.buf data 72 + 73 + (** Write CBOR text string (major type 3) *) 74 + let write_string enc s = 75 + let len = String.length s in 76 + write_head enc 3 len; 77 + Buffer.add_string enc.buf s 78 + 79 + (** Write CBOR array header (major type 4) *) 80 + let write_array_header enc len = write_head enc 4 len 81 + 82 + (** Write CBOR map header (major type 5) *) 83 + let write_map_header enc len = write_head enc 5 len 84 + 85 + (** Write CBOR float64 (major type 7, additional 27) *) 86 + let write_float enc f = 87 + write_byte enc 0xfb; 88 + let bits = Int64.bits_of_float f in 89 + write_byte enc (Int64.to_int (Int64.shift_right_logical bits 56) land 0xff); 90 + write_byte enc (Int64.to_int (Int64.shift_right_logical bits 48) land 0xff); 91 + write_byte enc (Int64.to_int (Int64.shift_right_logical bits 40) land 0xff); 92 + write_byte enc (Int64.to_int (Int64.shift_right_logical bits 32) land 0xff); 93 + write_byte enc (Int64.to_int (Int64.shift_right_logical bits 24) land 0xff); 94 + write_byte enc (Int64.to_int (Int64.shift_right_logical bits 16) land 0xff); 95 + write_byte enc (Int64.to_int (Int64.shift_right_logical bits 8) land 0xff); 96 + write_byte enc (Int64.to_int bits land 0xff) 97 + 98 + (** Write CBOR null *) 99 + let[@inline] write_null enc = write_byte enc 0xf6 100 + 101 + (** Write CBOR undefined *) 102 + let[@inline] write_undefined enc = write_byte enc 0xf7 103 + 104 + (** Write CBOR boolean *) 105 + let[@inline] write_bool enc b = write_byte enc (if b then 0xf5 else 0xf4) 106 + 107 + (** Write a complete Value.t as CBOR *) 108 + let rec write_value enc (v : Value.t) = 109 + match v with 110 + | Value.Null -> write_null enc 111 + | Value.Undefined -> write_undefined enc 112 + | Value.Bool b -> write_bool enc b 113 + | Value.Int n -> write_int enc n 114 + | Value.Float f -> write_float enc f 115 + | Value.String s -> write_string enc s 116 + | Value.Bytes b -> write_bytes enc b 117 + | Value.Array items -> 118 + write_array_header enc (List.length items); 119 + List.iter (write_value enc) items 120 + | Value.Object pairs -> 121 + write_map_header enc (List.length pairs); 122 + List.iter 123 + (fun (k, v) -> 124 + write_string enc k; 125 + write_value enc v) 126 + pairs 127 + | Value.Timestamp_ref (sid, time) -> 128 + (* Encode as 2-element array [sid, time] *) 129 + write_byte enc 0x82; 130 + write_uint enc sid; 131 + write_uint enc time 132 + 133 + (** {1 Decoder} *) 134 + 135 + type decoder = { data : string; mutable pos : int; len : int } 136 + (** Decoder state *) 137 + 138 + let create_decoder s = { data = s; pos = 0; len = String.length s } 139 + let create_decoder_bytes b = create_decoder (Bytes.to_string b) 140 + let decoder_remaining dec = dec.len - dec.pos 141 + let decoder_has_more dec = dec.pos < dec.len 142 + let decoder_pos dec = dec.pos 143 + let set_decoder_pos dec pos = dec.pos <- pos 144 + 145 + (** Read a single byte *) 146 + let[@inline] read_byte dec = 147 + if dec.pos >= dec.len then failwith "read_byte: unexpected end"; 148 + let b = Char.code (String.unsafe_get dec.data dec.pos) in 149 + dec.pos <- dec.pos + 1; 150 + b 151 + 152 + (** Peek at next byte without consuming *) 153 + let[@inline] peek_byte dec = 154 + if dec.pos >= dec.len then failwith "peek_byte: unexpected end"; 155 + Char.code (String.unsafe_get dec.data dec.pos) 156 + 157 + (** Read CBOR length based on additional info *) 158 + let read_length dec additional = 159 + if additional <= 23 then additional 160 + else if additional = 24 then read_byte dec 161 + else if additional = 25 then 162 + let b1 = read_byte dec in 163 + let b2 = read_byte dec in 164 + (b1 lsl 8) lor b2 165 + else if additional = 26 then 166 + let b1 = read_byte dec in 167 + let b2 = read_byte dec in 168 + let b3 = read_byte dec in 169 + let b4 = read_byte dec in 170 + (b1 lsl 24) lor (b2 lsl 16) lor (b3 lsl 8) lor b4 171 + else if additional = 27 then begin 172 + let b0 = read_byte dec in 173 + let b1 = read_byte dec in 174 + let b2 = read_byte dec in 175 + let b3 = read_byte dec in 176 + let b4 = read_byte dec in 177 + let b5 = read_byte dec in 178 + let b6 = read_byte dec in 179 + let b7 = read_byte dec in 180 + (b0 lsl 56) lor (b1 lsl 48) lor (b2 lsl 40) lor (b3 lsl 32) lor (b4 lsl 24) 181 + lor (b5 lsl 16) lor (b6 lsl 8) lor b7 182 + end 183 + else failwith "read_length: unsupported additional info" 184 + 185 + (** Read raw bytes from decoder *) 186 + let read_bytes_raw dec n = 187 + if dec.pos + n > dec.len then failwith "read_bytes: unexpected end"; 188 + let result = Bytes.create n in 189 + Bytes.blit_string dec.data dec.pos result 0 n; 190 + dec.pos <- dec.pos + n; 191 + result 192 + 193 + (** Read raw string from decoder *) 194 + let read_string_raw dec n = 195 + if dec.pos + n > dec.len then failwith "read_string: unexpected end"; 196 + let result = String.sub dec.data dec.pos n in 197 + dec.pos <- dec.pos + n; 198 + result 199 + 200 + (** Read CBOR float64 *) 201 + let read_float dec = 202 + let b0 = read_byte dec in 203 + let b1 = read_byte dec in 204 + let b2 = read_byte dec in 205 + let b3 = read_byte dec in 206 + let b4 = read_byte dec in 207 + let b5 = read_byte dec in 208 + let b6 = read_byte dec in 209 + let b7 = read_byte dec in 210 + let bits = 211 + Int64.logor 212 + (Int64.logor 213 + (Int64.logor 214 + (Int64.shift_left (Int64.of_int b0) 56) 215 + (Int64.shift_left (Int64.of_int b1) 48)) 216 + (Int64.logor 217 + (Int64.shift_left (Int64.of_int b2) 40) 218 + (Int64.shift_left (Int64.of_int b3) 32))) 219 + (Int64.logor 220 + (Int64.logor 221 + (Int64.shift_left (Int64.of_int b4) 24) 222 + (Int64.shift_left (Int64.of_int b5) 16)) 223 + (Int64.logor (Int64.shift_left (Int64.of_int b6) 8) (Int64.of_int b7))) 224 + in 225 + Int64.float_of_bits bits 226 + 227 + (** Read CBOR float16 (half-precision) *) 228 + let read_float16 dec = 229 + let b0 = read_byte dec in 230 + let b1 = read_byte dec in 231 + let half = (b0 lsl 8) lor b1 in 232 + let sign = (half lsr 15) land 1 in 233 + let exp = (half lsr 10) land 0x1f in 234 + let mant = half land 0x3ff in 235 + let f = 236 + if exp = 0 then 237 + (* Subnormal or zero *) 238 + Float.ldexp (Float.of_int mant) (-24) 239 + else if exp = 31 then 240 + (* Infinity or NaN *) 241 + if mant = 0 then Float.infinity else Float.nan 242 + else 243 + (* Normal number *) 244 + Float.ldexp (Float.of_int (mant lor 0x400)) (exp - 25) 245 + in 246 + if sign = 1 then -.f else f 247 + 248 + (** Read CBOR float32 (single-precision) *) 249 + let read_float32 dec = 250 + let b0 = read_byte dec in 251 + let b1 = read_byte dec in 252 + let b2 = read_byte dec in 253 + let b3 = read_byte dec in 254 + let bits = 255 + Int32.logor 256 + (Int32.logor 257 + (Int32.shift_left (Int32.of_int b0) 24) 258 + (Int32.shift_left (Int32.of_int b1) 16)) 259 + (Int32.logor (Int32.shift_left (Int32.of_int b2) 8) (Int32.of_int b3)) 260 + in 261 + Int32.float_of_bits bits 262 + 263 + (** Read a complete CBOR value as Value.t *) 264 + let rec read_value dec : Value.t = 265 + let byte = read_byte dec in 266 + let major = byte lsr 5 in 267 + let additional = byte land 0x1f in 268 + match major with 269 + | 0 -> 270 + (* Unsigned integer *) 271 + Value.Int (read_length dec additional) 272 + | 1 -> 273 + (* Negative integer *) 274 + Value.Int (-1 - read_length dec additional) 275 + | 2 -> 276 + (* Byte string *) 277 + let len = read_length dec additional in 278 + Value.Bytes (read_bytes_raw dec len) 279 + | 3 -> 280 + (* Text string *) 281 + let len = read_length dec additional in 282 + Value.String (read_string_raw dec len) 283 + | 4 -> 284 + (* Array *) 285 + let len = read_length dec additional in 286 + let items = List.init len (fun _ -> read_value dec) in 287 + Value.Array items 288 + | 5 -> 289 + (* Map *) 290 + let len = read_length dec additional in 291 + let pairs = 292 + List.init len (fun _ -> 293 + let key = 294 + match read_value dec with 295 + | Value.String s -> s 296 + | _ -> failwith "CBOR map key must be string" 297 + in 298 + let value = read_value dec in 299 + (key, value)) 300 + in 301 + Value.Object pairs 302 + | 6 -> 303 + (* Tag - skip tag number and read value *) 304 + let _ = read_length dec additional in 305 + read_value dec 306 + | 7 -> ( 307 + (* Simple values and floats *) 308 + match additional with 309 + | 20 -> Value.Bool false 310 + | 21 -> Value.Bool true 311 + | 22 -> Value.Null 312 + | 23 -> Value.Undefined 313 + | 25 -> Value.Float (read_float16 dec) 314 + | 26 -> Value.Float (read_float32 dec) 315 + | 27 -> Value.Float (read_float dec) 316 + | _ -> Value.Undefined) 317 + | _ -> failwith ("read_value: unknown major type " ^ string_of_int major) 318 + 319 + (** {1 Convenience Functions} *) 320 + 321 + let shared_encoder = create_encoder ~capacity:4096 () 322 + 323 + let encode (v : Value.t) : bytes = 324 + reset_encoder shared_encoder; 325 + write_value shared_encoder v; 326 + encoder_contents shared_encoder 327 + 328 + let encode_string (v : Value.t) : string = 329 + reset_encoder shared_encoder; 330 + write_value shared_encoder v; 331 + encoder_contents_string shared_encoder 332 + 333 + (** Decode CBOR bytes to Value.t *) 334 + let decode (data : bytes) : Value.t = 335 + let dec = create_decoder_bytes data in 336 + read_value dec 337 + 338 + (** Decode CBOR string to Value.t *) 339 + let decode_string (s : string) : Value.t = 340 + let dec = create_decoder s in 341 + read_value dec 342 + 343 + (** {1 Low-level Access for Streaming} *) 344 + 345 + (** Read a CBOR string (expects major type 3) *) 346 + let read_string_only dec = 347 + let byte = read_byte dec in 348 + let major = byte lsr 5 in 349 + let additional = byte land 0x1f in 350 + if major <> 3 then failwith "read_string_only: expected string"; 351 + let len = read_length dec additional in 352 + read_string_raw dec len 353 + 354 + (** Read CBOR unsigned integer *) 355 + let read_uint dec = 356 + let byte = read_byte dec in 357 + let major = byte lsr 5 in 358 + let additional = byte land 0x1f in 359 + if major <> 0 then failwith "read_uint: expected unsigned integer"; 360 + read_length dec additional 361 + 362 + (** {1 simdjsont Integration} 363 + 364 + For JSON-compatible CBOR values, we can leverage simdjsont's 365 + SIMD-accelerated parsing. This section provides conversion functions. *) 366 + 367 + (** Convert Value.t to simdjsont JSON (for JSON-compatible values only) *) 368 + let value_to_json (v : Value.t) : Simdjsont.Json.t option = 369 + let rec conv = function 370 + | Value.Null -> Some Simdjsont.Json.Null 371 + | Value.Bool b -> Some (Simdjsont.Json.Bool b) 372 + | Value.Int n -> Some (Simdjsont.Json.Int (Int64.of_int n)) 373 + | Value.Float f -> Some (Simdjsont.Json.Float f) 374 + | Value.String s -> Some (Simdjsont.Json.String s) 375 + | Value.Array items -> 376 + let converted = List.filter_map conv items in 377 + if List.length converted = List.length items then 378 + Some (Simdjsont.Json.Array converted) 379 + else None 380 + | Value.Object pairs -> 381 + let converted = 382 + List.filter_map 383 + (fun (k, v) -> 384 + match conv v with Some jv -> Some (k, jv) | None -> None) 385 + pairs 386 + in 387 + if List.length converted = List.length pairs then 388 + Some (Simdjsont.Json.Object converted) 389 + else None 390 + | Value.Undefined | Value.Bytes _ | Value.Timestamp_ref _ -> None 391 + in 392 + conv v 393 + 394 + (** Convert simdjsont JSON to Value.t *) 395 + let json_to_value (j : Simdjsont.Json.t) : Value.t = 396 + let rec conv = function 397 + | Simdjsont.Json.Null -> Value.Null 398 + | Simdjsont.Json.Bool b -> Value.Bool b 399 + | Simdjsont.Json.Int i -> 400 + if i >= Int64.of_int Int.min_int && i <= Int64.of_int Int.max_int then 401 + Value.Int (Int64.to_int i) 402 + else Value.Float (Int64.to_float i) 403 + | Simdjsont.Json.Float f -> 404 + if 405 + (* Try to convert whole floats to int *) 406 + Float.is_integer f 407 + && f >= Float.of_int Int.min_int 408 + && f <= Float.of_int Int.max_int 409 + then Value.Int (Float.to_int f) 410 + else Value.Float f 411 + | Simdjsont.Json.String s -> Value.String s 412 + | Simdjsont.Json.Array items -> Value.Array (List.map conv items) 413 + | Simdjsont.Json.Object pairs -> 414 + Value.Object (List.map (fun (k, v) -> (k, conv v)) pairs) 415 + in 416 + conv j 417 + 418 + (** Encode JSON-compatible Value.t using simdjsont's CBOR encoder *) 419 + let encode_via_simdjsont (v : Value.t) : string option = 420 + match value_to_json v with 421 + | Some json -> Some (Simdjsont.Cbor.encode_string Simdjsont.Codec.value json) 422 + | None -> None 423 + 424 + (** Decode CBOR to Value.t using simdjsont (JSON-compatible values only) *) 425 + let decode_via_simdjsont (s : string) : (Value.t, string) result = 426 + match Simdjsont.Cbor.decode_string Simdjsont.Codec.value s with 427 + | Ok json -> Ok (json_to_value json) 428 + | Error e -> Error e
+3
lib/crdt.ml
··· 88 88 module Model_codec_cbor = Model_codec_cbor 89 89 (** CBOR (RFC 7049) encoding/decoding primitives *) 90 90 91 + module Cbor_simd = Cbor_simd 92 + (** CBOR with simdjsont integration for Value.t encoding/decoding *) 93 + 91 94 (** {1 IO Abstraction} *) 92 95 93 96 module Io_intf = Io_intf
+17 -65
lib/model_codec_cbor.ml
··· 338 338 write_u8 enc (len land 0xff) 339 339 end 340 340 341 - (** Write a complete Value.t as CBOR *) 342 - let rec write_cbor_value enc (v : Value.t) = 343 - match v with 344 - | Value.Null -> write_cbor_null enc 345 - | Value.Undefined -> write_cbor_undefined enc 346 - | Value.Bool b -> write_cbor_bool enc b 347 - | Value.Int n -> write_cbor_int enc n 348 - | Value.Float f -> write_cbor_float enc f 349 - | Value.String s -> write_cbor_string enc s 350 - | Value.Bytes b -> write_cbor_bytes enc b 351 - | Value.Array items -> 352 - write_cbor_array_header enc (List.length items); 353 - List.iter (write_cbor_value enc) items 354 - | Value.Object pairs -> 355 - write_cbor_map_header enc (List.length pairs); 356 - List.iter 357 - (fun (k, v) -> 358 - write_cbor_string enc k; 359 - write_cbor_value enc v) 360 - pairs 361 - | Value.Timestamp_ref (sid, time) -> 362 - write_u8 enc 0x82; 363 - (* 2-element array *) 364 - write_cbor_uint enc sid; 365 - write_cbor_uint enc time 341 + (** Write a complete Value.t as CBOR via Cbor_simd *) 342 + let write_cbor_value enc (v : Value.t) = 343 + let cbor_bytes = Cbor_simd.encode v in 344 + write_bytes enc cbor_bytes 366 345 367 346 (** {1 Decoder} *) 368 347 ··· 512 491 in 513 492 Int64.float_of_bits bits 514 493 515 - (** Read a complete CBOR value as Value.t *) 516 - let rec read_cbor_value dec : Value.t = 517 - let byte = read_u8 dec in 518 - let major = byte lsr 5 in 519 - let additional = byte land 0x1f in 520 - match major with 521 - | 0 -> Value.Int (read_cbor_length dec additional) 522 - | 1 -> Value.Int (-1 - read_cbor_length dec additional) 523 - | 2 -> 524 - let len = read_cbor_length dec additional in 525 - Value.Bytes (read_bytes dec len) 526 - | 3 -> 527 - let len = read_cbor_length dec additional in 528 - Value.String (read_string dec len) 529 - | 4 -> 530 - let len = read_cbor_length dec additional in 531 - let items = List.init len (fun _ -> read_cbor_value dec) in 532 - Value.Array items 533 - | 5 -> 534 - let len = read_cbor_length dec additional in 535 - let pairs = 536 - List.init len (fun _ -> 537 - let key = 538 - match read_cbor_value dec with 539 - | Value.String s -> s 540 - | _ -> failwith "CBOR map key must be string" 541 - in 542 - let value = read_cbor_value dec in 543 - (key, value)) 544 - in 545 - Value.Object pairs 546 - | 7 -> ( 547 - match additional with 548 - | 20 -> Value.Bool false 549 - | 21 -> Value.Bool true 550 - | 22 -> Value.Null 551 - | 23 -> Value.Undefined 552 - | 27 -> Value.Float (read_cbor_float dec) 553 - | _ -> Value.Undefined) 554 - | _ -> failwith ("read_cbor_value: unknown major type " ^ string_of_int major) 494 + (** Read a complete CBOR value as Value.t via Cbor_simd. 495 + 496 + Note: We need to read the CBOR value starting at the current decoder 497 + position, then advance the decoder past the consumed bytes. Since 498 + Cbor_simd's decoder tracks its own position, we create a sub-decoder and 499 + sync positions after. *) 500 + let read_cbor_value dec : Value.t = 501 + let start_pos = dec.pos in 502 + let remaining = Bytes.sub dec.data start_pos (dec.len - start_pos) in 503 + let cbor_dec = Cbor_simd.create_decoder_bytes remaining in 504 + let value = Cbor_simd.read_value cbor_dec in 505 + dec.pos <- start_pos + Cbor_simd.decoder_pos cbor_dec; 506 + value 555 507 556 508 (** Read a CBOR string (expects major type 3) *) 557 509 let read_cbor_string_only dec =
+12 -145
lib/patch_codec_binary.ml
··· 278 278 (Int64.to_int (Int64.logand (Int64.shift_right bits (i * 8)) 0xffL)) 279 279 done 280 280 281 - (** Write any Value.t as CBOR *) 282 - let rec write_cbor_value enc (v : Value.t) = 283 - match v with 284 - | Value.Null -> write_u8 enc 0xf6 285 - | Value.Undefined -> write_u8 enc 0xf7 286 - | Value.Bool true -> write_u8 enc 0xf5 287 - | Value.Bool false -> write_u8 enc 0xf4 288 - | Value.Int n -> 289 - if n >= 0 then write_cbor_uint enc n else write_cbor_negint enc n 290 - | Value.Float f -> write_cbor_float enc f 291 - | Value.String s -> write_cbor_string enc s 292 - | Value.Bytes b -> write_cbor_bytes enc b 293 - | Value.Array items -> 294 - let len = List.length items in 295 - if len <= 23 then write_u8 enc (0x80 lor len) 296 - else if len <= 0xff then begin 297 - write_u8 enc 0x98; 298 - write_u8 enc len 299 - end 300 - else begin 301 - write_u8 enc 0x99; 302 - write_u8 enc ((len lsr 8) land 0xff); 303 - write_u8 enc (len land 0xff) 304 - end; 305 - List.iter (write_cbor_value enc) items 306 - | Value.Object pairs -> 307 - let len = List.length pairs in 308 - if len <= 23 then write_u8 enc (0xa0 lor len) 309 - else if len <= 0xff then begin 310 - write_u8 enc 0xb8; 311 - write_u8 enc len 312 - end 313 - else begin 314 - write_u8 enc 0xb9; 315 - write_u8 enc ((len lsr 8) land 0xff); 316 - write_u8 enc (len land 0xff) 317 - end; 318 - List.iter 319 - (fun (k, v) -> 320 - write_cbor_string enc k; 321 - write_cbor_value enc v) 322 - pairs 323 - | Value.Timestamp_ref (sid, time) -> 324 - (* Encoded as array [sid, time] *) 325 - write_u8 enc 0x82; 326 - write_cbor_uint enc sid; 327 - write_cbor_uint enc time 281 + (** Write any Value.t as CBOR via Cbor_simd *) 282 + let write_cbor_value enc (v : Value.t) = 283 + let cbor_bytes = Cbor_simd.encode v in 284 + write_bytes enc cbor_bytes 328 285 329 286 (** Read CBOR length based on additional info *) 330 287 let read_cbor_length dec additional = ··· 356 313 (Printf.sprintf "read_cbor_length: unsupported additional info %d" 357 314 additional) 358 315 359 - (** Read a CBOR value *) 360 - let rec read_cbor_value dec : Value.t = 361 - let byte = read_u8 dec in 362 - let major = byte lsr 5 in 363 - let additional = byte land 0x1f in 364 - match major with 365 - | 0 -> 366 - (* unsigned int *) 367 - Value.Int (read_cbor_length dec additional) 368 - | 1 -> 369 - (* negative int *) 370 - Value.Int (-1 - read_cbor_length dec additional) 371 - | 2 -> 372 - (* byte string *) 373 - let len = read_cbor_length dec additional in 374 - Value.Bytes (read_bytes dec len) 375 - | 3 -> 376 - (* text string *) 377 - let len = read_cbor_length dec additional in 378 - Value.String (read_string dec len) 379 - | 4 -> 380 - (* array *) 381 - let len = read_cbor_length dec additional in 382 - let items = List.init len (fun _ -> read_cbor_value dec) in 383 - Value.Array items 384 - | 5 -> 385 - (* map *) 386 - let len = read_cbor_length dec additional in 387 - let pairs = 388 - List.init len (fun _ -> 389 - let key = 390 - match read_cbor_value dec with 391 - | Value.String s -> s 392 - | _ -> failwith "read_cbor_value: map key must be string" 393 - in 394 - let value = read_cbor_value dec in 395 - (key, value)) 396 - in 397 - Value.Object pairs 398 - | 7 -> ( 399 - (* special *) 400 - match additional with 401 - | 20 -> Value.Bool false 402 - | 21 -> Value.Bool true 403 - | 22 -> Value.Null 404 - | 23 -> Value.Undefined 405 - | 25 -> 406 - (* float16 - simplified, read as 2 bytes *) 407 - let _ = read_bytes dec 2 in 408 - Value.Float 0.0 (* TODO: proper float16 decoding *) 409 - | 26 -> 410 - (* float32 *) 411 - let bytes = read_bytes dec 4 in 412 - let bits = 413 - Int32.logor 414 - (Int32.shift_left (Int32.of_int (Bytes.get_uint8 bytes 0)) 24) 415 - (Int32.logor 416 - (Int32.shift_left (Int32.of_int (Bytes.get_uint8 bytes 1)) 16) 417 - (Int32.logor 418 - (Int32.shift_left 419 - (Int32.of_int (Bytes.get_uint8 bytes 2)) 420 - 8) 421 - (Int32.of_int (Bytes.get_uint8 bytes 3)))) 422 - in 423 - Value.Float (Int32.float_of_bits bits) 424 - | 27 -> 425 - (* float64 *) 426 - let bytes = read_bytes dec 8 in 427 - let bits = 428 - Int64.logor 429 - (Int64.shift_left (Int64.of_int (Bytes.get_uint8 bytes 0)) 56) 430 - (Int64.logor 431 - (Int64.shift_left (Int64.of_int (Bytes.get_uint8 bytes 1)) 48) 432 - (Int64.logor 433 - (Int64.shift_left 434 - (Int64.of_int (Bytes.get_uint8 bytes 2)) 435 - 40) 436 - (Int64.logor 437 - (Int64.shift_left 438 - (Int64.of_int (Bytes.get_uint8 bytes 3)) 439 - 32) 440 - (Int64.logor 441 - (Int64.shift_left 442 - (Int64.of_int (Bytes.get_uint8 bytes 4)) 443 - 24) 444 - (Int64.logor 445 - (Int64.shift_left 446 - (Int64.of_int (Bytes.get_uint8 bytes 5)) 447 - 16) 448 - (Int64.logor 449 - (Int64.shift_left 450 - (Int64.of_int (Bytes.get_uint8 bytes 6)) 451 - 8) 452 - (Int64.of_int (Bytes.get_uint8 bytes 7)))))))) 453 - in 454 - Value.Float (Int64.float_of_bits bits) 455 - | _ -> Value.Undefined) 456 - | _ -> failwith "read_cbor_value: unsupported major type" 316 + (** Read a CBOR value via Cbor_simd *) 317 + let read_cbor_value dec : Value.t = 318 + let start_pos = dec.pos in 319 + let remaining = Bytes.sub dec.data start_pos (dec.len - start_pos) in 320 + let cbor_dec = Cbor_simd.create_decoder_bytes remaining in 321 + let value = Cbor_simd.read_value cbor_dec in 322 + dec.pos <- start_pos + Cbor_simd.decoder_pos cbor_dec; 323 + value 457 324 458 325 (** {1 Timestamp Encoding/Decoding} *) 459 326