crdt library in ocaml implementing json-joy
1(** CBOR Benchmark: Compare our implementation with the cbor library.
2
3 This benchmark compares: 1. Encoding performance 2. Decoding performance 3.
4 Output size correctness
5
6 We test various CBOR value types that we use in the CRDT codec. *)
7
8open Crdt
9
10(** {1 Benchmark Infrastructure} *)
11
12module Bench = struct
13 (** Measure execution time of a function, returning elapsed_sec *)
14 let time f =
15 let start = Unix.gettimeofday () in
16 let _result = f () in
17 Unix.gettimeofday () -. start
18
19 (** Run a benchmark N times and return (mean_time, ops_per_sec) *)
20 let run ?(warmup = 3) ?(iterations = 10) ?(min_time = 0.001) f =
21 (* Warmup *)
22 for _ = 1 to warmup do
23 ignore (f ())
24 done;
25
26 (* Determine sub-iterations for measurable time *)
27 let sub_iters =
28 let elapsed = time f in
29 if elapsed >= min_time then 1
30 else max 1 (int_of_float (min_time /. max elapsed 0.000001))
31 in
32
33 (* Timed runs *)
34 let times = Array.make iterations 0.0 in
35 for i = 0 to iterations - 1 do
36 let start = Unix.gettimeofday () in
37 for _ = 1 to sub_iters do
38 ignore (f ())
39 done;
40 times.(i) <- (Unix.gettimeofday () -. start) /. Float.of_int sub_iters
41 done;
42
43 let total = Array.fold_left ( +. ) 0.0 times in
44 let mean = total /. Float.of_int iterations in
45 (mean, 1.0 /. mean)
46end
47
48(** {1 Test Data} *)
49
50(* Simple values for testing *)
51let test_null = Value.Null
52let test_bool = Value.Bool true
53let test_int_small = Value.Int 42
54let test_int_medium = Value.Int 12345
55let test_int_large = Value.Int 1234567890
56let test_float = Value.Float 3.14159265359
57let test_string_short = Value.String "hello"
58let test_string_medium = Value.String (String.make 100 'x')
59let test_string_long = Value.String (String.make 1000 'x')
60let test_bytes = Value.Bytes (Bytes.make 100 '\x42')
61let test_array_small = Value.Array [ Value.Int 1; Value.Int 2; Value.Int 3 ]
62let test_array_medium = Value.Array (List.init 100 (fun i -> Value.Int i))
63
64let test_object_small =
65 Value.Object [ ("name", Value.String "test"); ("value", Value.Int 42) ]
66
67let test_object_medium =
68 Value.Object (List.init 50 (fun i -> (Printf.sprintf "key%d" i, Value.Int i)))
69
70(* Nested structure *)
71let test_nested =
72 Value.Object
73 [
74 ( "users",
75 Value.Array
76 [
77 Value.Object [ ("id", Value.Int 1); ("name", Value.String "Alice") ];
78 Value.Object [ ("id", Value.Int 2); ("name", Value.String "Bob") ];
79 ] );
80 ("count", Value.Int 2);
81 ("active", Value.Bool true);
82 ]
83
84(** {1 Conversion Functions} *)
85
86(** Convert our Value.t to CBOR.Simple.t *)
87let rec value_to_cbor (v : Value.t) : CBOR.Simple.t =
88 match v with
89 | Value.Null -> `Null
90 | Value.Undefined -> `Undefined
91 | Value.Bool b -> `Bool b
92 | Value.Int n -> `Int n
93 | Value.Float f -> `Float f
94 | Value.String s -> `Text s
95 | Value.Bytes b -> `Bytes (Bytes.to_string b)
96 | Value.Array items -> `Array (List.map value_to_cbor items)
97 | Value.Object pairs ->
98 `Map (List.map (fun (k, v) -> (`Text k, value_to_cbor v)) pairs)
99 | Value.Timestamp_ref (sid, time) -> `Array [ `Int sid; `Int time ]
100
101(** Convert CBOR.Simple.t back to our Value.t *)
102let[@warning "-32"] rec cbor_to_value (c : CBOR.Simple.t) : Value.t =
103 match c with
104 | `Null -> Value.Null
105 | `Undefined -> Value.Undefined
106 | `Bool b -> Value.Bool b
107 | `Int n -> Value.Int n
108 | `Float f -> Value.Float f
109 | `Text s -> Value.String s
110 | `Bytes b -> Value.Bytes (Bytes.of_string b)
111 | `Array items -> Value.Array (List.map cbor_to_value items)
112 | `Map pairs ->
113 Value.Object
114 (List.map
115 (fun (k, v) ->
116 match k with
117 | `Text s -> (s, cbor_to_value v)
118 | _ -> failwith "cbor_to_value: non-string key")
119 pairs)
120 | `Simple _ -> Value.Undefined
121 | `Tag (_, v) -> cbor_to_value v
122
123(** {1 Our CBOR Encoder/Decoder} *)
124
125(** Our CBOR encoding - extracted from Model_codec.Binary *)
126module Our_cbor = struct
127 type encoder = { mutable buf : bytes; mutable pos : int; mutable cap : int }
128
129 let create () = { buf = Bytes.create 256; pos = 0; cap = 256 }
130
131 let grow enc need =
132 let new_cap = max (enc.cap * 2) (enc.pos + need) in
133 let new_buf = Bytes.create new_cap in
134 Bytes.blit enc.buf 0 new_buf 0 enc.pos;
135 enc.buf <- new_buf;
136 enc.cap <- new_cap
137
138 let[@inline] write_u8 enc b =
139 if enc.pos >= enc.cap then grow enc 16;
140 Bytes.unsafe_set enc.buf enc.pos (Char.chr b);
141 enc.pos <- enc.pos + 1
142
143 let write_cbor_uint enc n =
144 if n <= 23 then write_u8 enc n
145 else if n <= 0xff then begin
146 write_u8 enc 0x18;
147 write_u8 enc n
148 end
149 else if n <= 0xffff then begin
150 write_u8 enc 0x19;
151 write_u8 enc ((n lsr 8) land 0xff);
152 write_u8 enc (n land 0xff)
153 end
154 else if n <= 0xffffffff then begin
155 write_u8 enc 0x1a;
156 write_u8 enc ((n lsr 24) land 0xff);
157 write_u8 enc ((n lsr 16) land 0xff);
158 write_u8 enc ((n lsr 8) land 0xff);
159 write_u8 enc (n land 0xff)
160 end
161 else begin
162 write_u8 enc 0x1b;
163 write_u8 enc ((n lsr 56) land 0xff);
164 write_u8 enc ((n lsr 48) land 0xff);
165 write_u8 enc ((n lsr 40) land 0xff);
166 write_u8 enc ((n lsr 32) land 0xff);
167 write_u8 enc ((n lsr 24) land 0xff);
168 write_u8 enc ((n lsr 16) land 0xff);
169 write_u8 enc ((n lsr 8) land 0xff);
170 write_u8 enc (n land 0xff)
171 end
172
173 let write_cbor_negint enc n =
174 let encoded = -1 - n in
175 if encoded <= 23 then write_u8 enc (0x20 lor encoded)
176 else if encoded <= 0xff then begin
177 write_u8 enc 0x38;
178 write_u8 enc encoded
179 end
180 else if encoded <= 0xffff then begin
181 write_u8 enc 0x39;
182 write_u8 enc ((encoded lsr 8) land 0xff);
183 write_u8 enc (encoded land 0xff)
184 end
185 else begin
186 write_u8 enc 0x3a;
187 write_u8 enc ((encoded lsr 24) land 0xff);
188 write_u8 enc ((encoded lsr 16) land 0xff);
189 write_u8 enc ((encoded lsr 8) land 0xff);
190 write_u8 enc (encoded land 0xff)
191 end
192
193 let write_cbor_string enc s =
194 let len = String.length s in
195 if len <= 23 then write_u8 enc (0x60 lor len)
196 else if len <= 0xff then begin
197 write_u8 enc 0x78;
198 write_u8 enc len
199 end
200 else if len <= 0xffff then begin
201 write_u8 enc 0x79;
202 write_u8 enc ((len lsr 8) land 0xff);
203 write_u8 enc (len land 0xff)
204 end
205 else begin
206 write_u8 enc 0x7a;
207 write_u8 enc ((len lsr 24) land 0xff);
208 write_u8 enc ((len lsr 16) land 0xff);
209 write_u8 enc ((len lsr 8) land 0xff);
210 write_u8 enc (len land 0xff)
211 end;
212 if enc.pos + len > enc.cap then grow enc len;
213 Bytes.blit_string s 0 enc.buf enc.pos len;
214 enc.pos <- enc.pos + len
215
216 let write_cbor_bytes enc b =
217 let len = Bytes.length b in
218 if len <= 23 then write_u8 enc (0x40 lor len)
219 else if len <= 0xff then begin
220 write_u8 enc 0x58;
221 write_u8 enc len
222 end
223 else if len <= 0xffff then begin
224 write_u8 enc 0x59;
225 write_u8 enc ((len lsr 8) land 0xff);
226 write_u8 enc (len land 0xff)
227 end
228 else begin
229 write_u8 enc 0x5a;
230 write_u8 enc ((len lsr 24) land 0xff);
231 write_u8 enc ((len lsr 16) land 0xff);
232 write_u8 enc ((len lsr 8) land 0xff);
233 write_u8 enc (len land 0xff)
234 end;
235 if enc.pos + len > enc.cap then grow enc len;
236 Bytes.blit b 0 enc.buf enc.pos len;
237 enc.pos <- enc.pos + len
238
239 let write_cbor_float enc f =
240 write_u8 enc 0xfb;
241 let bits = Int64.bits_of_float f in
242 for i = 7 downto 0 do
243 write_u8 enc
244 (Int64.to_int (Int64.logand (Int64.shift_right bits (i * 8)) 0xffL))
245 done
246
247 let rec encode_value enc (v : Value.t) =
248 match v with
249 | Value.Null -> write_u8 enc 0xf6
250 | Value.Undefined -> write_u8 enc 0xf7
251 | Value.Bool true -> write_u8 enc 0xf5
252 | Value.Bool false -> write_u8 enc 0xf4
253 | Value.Int n ->
254 if n >= 0 then write_cbor_uint enc n else write_cbor_negint enc n
255 | Value.Float f -> write_cbor_float enc f
256 | Value.String s -> write_cbor_string enc s
257 | Value.Bytes b -> write_cbor_bytes enc b
258 | Value.Array items ->
259 let len = List.length items in
260 if len <= 23 then write_u8 enc (0x80 lor len)
261 else begin
262 write_u8 enc 0x98;
263 write_u8 enc len
264 end;
265 List.iter (encode_value enc) items
266 | Value.Object pairs ->
267 let len = List.length pairs in
268 if len <= 23 then write_u8 enc (0xa0 lor len)
269 else begin
270 write_u8 enc 0xb8;
271 write_u8 enc len
272 end;
273 List.iter
274 (fun (k, v) ->
275 write_cbor_string enc k;
276 encode_value enc v)
277 pairs
278 | Value.Timestamp_ref (sid, time) ->
279 write_u8 enc 0x82;
280 write_cbor_uint enc sid;
281 write_cbor_uint enc time
282
283 let encode v =
284 let enc = create () in
285 encode_value enc v;
286 Bytes.sub_string enc.buf 0 enc.pos
287end
288
289(** {1 Benchmark Runner} *)
290
291let run_encode_benchmark name value =
292 let cbor_value = value_to_cbor value in
293
294 (* Our encoder *)
295 let our_time, _ = Bench.run (fun () -> Our_cbor.encode value) in
296
297 (* Cbor_simd encoder *)
298 let simd_time, _ = Bench.run (fun () -> Crdt.Cbor_simd.encode_string value) in
299
300 (* Library encoder *)
301 let lib_time, _ = Bench.run (fun () -> CBOR.Simple.encode cbor_value) in
302
303 let our_ops = 1.0 /. our_time in
304 let simd_ops = 1.0 /. simd_time in
305 let lib_ops = 1.0 /. lib_time in
306 let our_ratio = our_ops /. lib_ops in
307 let simd_ratio = simd_ops /. lib_ops in
308 Printf.printf " %-30s %8.0f | %8.0f | %8.0f (%.2fx | %.2fx)\n" name our_ops
309 simd_ops lib_ops our_ratio simd_ratio;
310
311 (* Verify same output *)
312 let our_output = Our_cbor.encode value in
313 let lib_output = CBOR.Simple.encode cbor_value in
314 if our_output <> lib_output then begin
315 Printf.printf " WARNING: Output differs! ours=%d bytes, lib=%d bytes\n"
316 (String.length our_output) (String.length lib_output);
317 Printf.printf " Our bytes: ";
318 String.iter
319 (fun c -> Printf.printf "%02x " (Char.code c))
320 (String.sub our_output 0 (min 20 (String.length our_output)));
321 Printf.printf "\n Lib bytes: ";
322 String.iter
323 (fun c -> Printf.printf "%02x " (Char.code c))
324 (String.sub lib_output 0 (min 20 (String.length lib_output)));
325 Printf.printf "\n"
326 end
327
328let run_decode_benchmark name value =
329 let cbor_value = value_to_cbor value in
330 let encoded = CBOR.Simple.encode cbor_value in
331
332 (* Cbor_simd decoder *)
333 let simd_time, _ =
334 Bench.run (fun () -> Crdt.Cbor_simd.decode_string encoded)
335 in
336
337 (* Library decoder *)
338 let lib_time, _ = Bench.run (fun () -> CBOR.Simple.decode encoded) in
339
340 let simd_ops = 1.0 /. simd_time in
341 let lib_ops = 1.0 /. lib_time in
342 let simd_ratio = simd_ops /. lib_ops in
343 Printf.printf " %-30s N/A | %8.0f | %8.0f ( - | %.2fx)\n" name
344 simd_ops lib_ops simd_ratio
345
346let () =
347 Printf.printf "CBOR Benchmark: Custom vs Cbor_simd vs cbor Library\n";
348 Printf.printf "===================================================\n\n";
349
350 Printf.printf "Encoding Benchmarks (ops/s - higher is better)\n";
351 Printf.printf
352 " Custom Cbor_simd Library (Ratios)\n";
353 Printf.printf
354 "-------------------------------------------------------------------------\n";
355
356 run_encode_benchmark "null" test_null;
357 run_encode_benchmark "bool" test_bool;
358 run_encode_benchmark "int (small)" test_int_small;
359 run_encode_benchmark "int (medium)" test_int_medium;
360 run_encode_benchmark "int (large)" test_int_large;
361 run_encode_benchmark "float" test_float;
362 run_encode_benchmark "string (5 chars)" test_string_short;
363 run_encode_benchmark "string (100 chars)" test_string_medium;
364 run_encode_benchmark "string (1000 chars)" test_string_long;
365 run_encode_benchmark "bytes (100 bytes)" test_bytes;
366 run_encode_benchmark "array (3 ints)" test_array_small;
367 run_encode_benchmark "array (100 ints)" test_array_medium;
368 run_encode_benchmark "object (2 keys)" test_object_small;
369 run_encode_benchmark "object (50 keys)" test_object_medium;
370 run_encode_benchmark "nested structure" test_nested;
371
372 Printf.printf
373 "\nDecoding Benchmarks (library only - we use specialized decoders)\n";
374 Printf.printf
375 "-----------------------------------------------------------------\n";
376
377 run_decode_benchmark "string (1000 chars)" test_string_long;
378 run_decode_benchmark "array (100 ints)" test_array_medium;
379 run_decode_benchmark "object (50 keys)" test_object_medium;
380 run_decode_benchmark "nested structure" test_nested;
381
382 Printf.printf "\nBenchmark complete.\n"