+15
LICENSE
+15
LICENSE
···
1
+
ISC License
2
+
3
+
Copyright (c) 2026 Gabriel Díaz López de la Llave
4
+
5
+
Permission to use, copy, modify, and/or distribute this software for any
6
+
purpose with or without fee is hereby granted, provided that the above
7
+
copyright notice and this permission notice appear in all copies.
8
+
9
+
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH
10
+
REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
11
+
AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT,
12
+
INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
13
+
LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
14
+
OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
15
+
PERFORMANCE OF THIS SOFTWARE.
+112
-13
README.md
+112
-13
README.md
···
6
6
7
7
## Installation
8
8
9
+
### System requirements
10
+
11
+
This package builds vendored C++ code (simdjson) via Dune `foreign_stubs`, so you need a **C++17-capable compiler toolchain** available on your system.
12
+
13
+
### Opam
14
+
9
15
```
10
16
opam install simdjsont
11
17
```
···
51
57
### Extracting Values
52
58
53
59
Use JSON Pointer (RFC 6901) syntax to extract values without parsing the entire document:
60
+
61
+
### NDJSON / streaming
62
+
63
+
`simdjsont` can parse **newline-delimited JSON (NDJSON)** (and more generally, concatenated JSON documents) using the low-level streaming API `Simdjsont.Raw.Stream`.
64
+
65
+
This is useful when you want to process large streams of JSON documents (logs, events, data pipelines) without splitting the input yourself.
66
+
67
+
If you want to decode each streamed document into an OCaml value, use `Simdjsont.Codec.decode_element` with your codec (this keeps decoding in the high-level API while parsing stays streaming/zero-copy).
68
+
69
+
```ocaml
70
+
let ndjson = "{\"id\":1}\n{\"id\":2}\n{broken\n{\"id\":3}\n" in
71
+
72
+
let parser = Simdjsont.Raw.create_parser () in
73
+
let buf = Simdjsont.Raw.buffer_of_string ndjson in
74
+
let len = String.length ndjson in
75
+
76
+
match Simdjsont.Raw.Stream.create parser buf ~len with
77
+
| Error e -> Printf.eprintf "Failed to create stream: %s\n" e.message
78
+
| Ok stream ->
79
+
Simdjsont.Raw.Stream.to_seq stream
80
+
|> Seq.iter (function
81
+
| Ok (elt, byte_offset) ->
82
+
Printf.printf "Doc at %d: %s\n" byte_offset
83
+
(Simdjsont.Raw.element_to_string elt)
84
+
| Error (e, byte_offset) ->
85
+
Printf.printf "Parse error at %d: %s\n" byte_offset e.message);
86
+
87
+
let truncated = Simdjsont.Raw.Stream.truncated_bytes stream in
88
+
if truncated > 0 then
89
+
Printf.printf "Warning: %d truncated bytes at end of stream\n" truncated
90
+
```
91
+
92
+
Decode streamed documents to a typed value using a codec:
93
+
94
+
```ocaml
95
+
type event = { id : int; name : string }
96
+
97
+
let event_codec =
98
+
let open Simdjsont.Decode in
99
+
Obj.field (fun id name -> { id; name })
100
+
|> Obj.mem "id" int ~enc:(fun e -> e.id)
101
+
|> Obj.mem "name" string ~enc:(fun e -> e.name)
102
+
|> Obj.finish
103
+
104
+
match Simdjsont.Raw.Stream.create parser buf ~len with
105
+
| Error e -> Printf.eprintf "Failed to create stream: %s\n" e.message
106
+
| Ok stream ->
107
+
Simdjsont.Raw.Stream.to_seq stream
108
+
|> Seq.iter (function
109
+
| Ok (elt, byte_offset) -> (
110
+
match Simdjsont.Codec.decode_element event_codec elt with
111
+
| Ok event ->
112
+
Printf.printf "Event at %d: id=%d name=%s\n" byte_offset event.id
113
+
event.name
114
+
| Error msg ->
115
+
Printf.printf "Decode error at %d: %s\n" byte_offset msg)
116
+
| Error (e, byte_offset) ->
117
+
Printf.printf "Parse error at %d: %s\n" byte_offset e.message)
118
+
```
119
+
120
+
**Lifetime note:** values returned from `Simdjsont.Raw.Stream` reference memory owned by the parser/buffer. Keep the `parser` and `buf` alive for the whole iteration, and dont reuse the parser for other parses while you still hold `element`s.
54
121
55
122
```ocaml
56
123
let json = {|{
···
258
325
| Error e -> print_endline e
259
326
```
260
327
261
-
### Yojson Compatibility
328
+
#### Converting to/from Yojson (example)
262
329
263
-
Convert between `Simdjsont.Json.t` and Yojson-compatible polymorphic variants:
330
+
`simdjsont` does not depend on `yojson`, but you can convert values when you need interoperability.
264
331
265
332
```ocaml
333
+
(* Requires adding `yojson` to your own project dependencies. *)
334
+
335
+
let rec simdjsont_to_yojson (v : Simdjsont.Json.t) : Yojson.Safe.t =
336
+
match v with
337
+
| Null -> `Null
338
+
| Bool b -> `Bool b
339
+
| Int i -> `Intlit (Int64.to_string i)
340
+
| Float f -> `Float f
341
+
| String s -> `String s
342
+
| Array xs -> `List (List.map simdjsont_to_yojson xs)
343
+
| Object fields ->
344
+
`Assoc
345
+
(List.map (fun (k, x) -> (k, simdjsont_to_yojson x)) fields)
346
+
347
+
let rec yojson_to_simdjsont (v : Yojson.Safe.t) : Simdjsont.Json.t =
348
+
match v with
349
+
| `Null -> Null
350
+
| `Bool b -> Bool b
351
+
| `Int n -> Int (Int64.of_int n)
352
+
| `Intlit s -> Int (Int64.of_string s)
353
+
| `Float f -> Float f
354
+
| `String s -> String s
355
+
| `List xs -> Array (List.map yojson_to_simdjsont xs)
356
+
| `Assoc fields ->
357
+
Object (List.map (fun (k, x) -> (k, yojson_to_simdjsont x)) fields)
358
+
| `Tuple xs -> Array (List.map yojson_to_simdjsont xs)
359
+
| `Variant (tag, arg_opt) ->
360
+
Object
361
+
[
362
+
( "variant",
363
+
Object
364
+
(
365
+
[ ("tag", String tag) ]
366
+
@
367
+
match arg_opt with
368
+
| None -> []
369
+
| Some a -> [ ("arg", yojson_to_simdjsont a) ] ) );
370
+
]
371
+
266
372
let () =
267
373
match Simdjsont.Decode.decode_string Simdjsont.Decode.value {|{"name": "test", "value": 123}|} with
268
374
| Ok json ->
269
-
let yojson = Simdjsont.Json.to_yojson json in
270
-
let name = match yojson with
271
-
| `Assoc fields -> List.assoc_opt "name" fields
272
-
| _ -> None
273
-
in
274
-
(match name with
275
-
| Some (`String s) -> print_endline s
276
-
| _ -> ());
277
-
let back = Simdjsont.Json.of_yojson yojson in
278
-
print_endline (Simdjsont.Json.to_string back)
279
-
| Error e -> print_endline e
375
+
let yojson = simdjsont_to_yojson json in
376
+
let back = yojson_to_simdjsont yojson in
377
+
assert (Simdjsont.Json.to_string back = Simdjsont.Json.to_string json)
378
+
| Error e -> failwith e
280
379
```
281
380
282
381
### Low-Level API
+16
-23
bench/bench_json.ml
+16
-23
bench/bench_json.ml
···
27
27
time_it "simdjsont" iterations (fun () ->
28
28
let _ = Simdjsont.Validate.is_valid json in
29
29
());
30
-
time_it "yojson" iterations (fun () ->
31
-
let _ = Yojson.Safe.from_string json in
30
+
time_it "baseline" iterations (fun () ->
31
+
let _ = Simdjsont.Validate.is_valid json in
32
32
());
33
33
]
34
34
in
35
-
let baseline = List.assoc "yojson" validate_results in
35
+
let baseline = List.assoc "baseline" validate_results in
36
36
List.iter
37
37
(fun (n, ns) ->
38
38
Printf.printf " %-12s %10.0f ns (%.1fx)\n" n ns (baseline /. ns))
···
44
44
time_it "simdjsont" iterations (fun () ->
45
45
let _ = Simdjsont.Extract.int64 json ~pointer:"/statuses/0/id" in
46
46
());
47
-
time_it "yojson" iterations (fun () ->
48
-
let v = Yojson.Safe.from_string json in
49
-
match v with
50
-
| `Assoc fields -> (
51
-
match List.assoc_opt "statuses" fields with
52
-
| Some (`List (first :: _)) -> (
53
-
match first with
54
-
| `Assoc f -> ignore (List.assoc_opt "id" f)
55
-
| _ -> ())
56
-
| _ -> ())
57
-
| _ -> ());
47
+
time_it "baseline" iterations (fun () ->
48
+
let _ = Simdjsont.Extract.int64 json ~pointer:"/statuses/0/id" in
49
+
());
58
50
]
59
51
in
60
-
let baseline = List.assoc "yojson" extract_results in
52
+
let baseline = List.assoc "baseline" extract_results in
61
53
List.iter
62
54
(fun (n, ns) ->
63
55
Printf.printf " %-12s %10.0f ns (%.1fx)\n" n ns (baseline /. ns))
···
69
61
time_it "simdjsont" iterations (fun () ->
70
62
let _ = Simdjsont.Codec.decode_string Simdjsont.Codec.value json in
71
63
());
72
-
time_it "yojson" iterations (fun () ->
73
-
let _ = Yojson.Safe.from_string json in
64
+
time_it "baseline" iterations (fun () ->
65
+
let _ = Simdjsont.Codec.decode_string Simdjsont.Codec.value json in
74
66
());
75
67
]
76
68
in
77
-
let baseline = List.assoc "yojson" decode_results in
69
+
let baseline = List.assoc "baseline" decode_results in
78
70
List.iter
79
71
(fun (n, ns) ->
80
72
Printf.printf " %-12s %10.0f ns (%.1fx)\n" n ns (baseline /. ns))
···
86
78
| Ok v -> v
87
79
| Error e -> failwith e
88
80
in
89
-
let yojson_data = Yojson.Safe.from_string json in
90
81
let encode_results =
91
82
[
92
83
time_it "simdjsont" iterations (fun () ->
···
94
85
Simdjsont.Codec.encode_string Simdjsont.Codec.value simdjson_data
95
86
in
96
87
());
97
-
time_it "yojson" iterations (fun () ->
98
-
let _ = Yojson.Safe.to_string yojson_data in
88
+
time_it "baseline" iterations (fun () ->
89
+
let _ =
90
+
Simdjsont.Codec.encode_string Simdjsont.Codec.value simdjson_data
91
+
in
99
92
());
100
93
]
101
94
in
102
-
let baseline = List.assoc "yojson" encode_results in
95
+
let baseline = List.assoc "baseline" encode_results in
103
96
List.iter
104
97
(fun (n, ns) ->
105
98
Printf.printf " %-12s %10.0f ns (%.1fx)\n" n ns (baseline /. ns))
106
99
encode_results
107
100
108
101
let () =
109
-
Printf.printf "simdjsont vs yojson benchmarks\n";
102
+
Printf.printf "simdjsont benchmarks\n";
110
103
Printf.printf "==============================\n";
111
104
run_use_case_benchmarks "twitter.json" (Lazy.force twitter_json) 50;
112
105
run_use_case_benchmarks "citm_catalog.json" (Lazy.force citm_json) 20
+1
-67
bench/bench_ndjson.ml
+1
-67
bench/bench_ndjson.ml
···
123
123
docs_per_s;
124
124
}
125
125
126
-
let bench_yojson_linebyline ndjson ~iterations =
127
-
let total_bytes = String.length ndjson in
128
-
let lines =
129
-
String.split_on_char '\n' ndjson
130
-
|> List.filter (fun s -> String.length s > 0)
131
-
in
132
-
let num_docs = List.length lines in
133
-
134
-
Gc.full_major ();
135
-
let start = Unix.gettimeofday () in
136
-
137
-
for _ = 1 to iterations do
138
-
List.iter
139
-
(fun line ->
140
-
let _ = Yojson.Safe.from_string line in
141
-
())
142
-
lines
143
-
done;
144
-
145
-
let elapsed = Unix.gettimeofday () -. start in
146
-
let bytes_processed = total_bytes * iterations in
147
-
let throughput_mb_s =
148
-
float_of_int bytes_processed /. elapsed /. 1_000_000.0
149
-
in
150
-
let docs_per_s = float_of_int (num_docs * iterations) /. elapsed in
151
-
152
-
{
153
-
name = "yojson_linebyline";
154
-
total_bytes;
155
-
num_docs;
156
-
elapsed_s = elapsed;
157
-
throughput_mb_s;
158
-
docs_per_s;
159
-
}
160
-
161
-
let bench_yojson_seq ndjson ~iterations =
162
-
let total_bytes = String.length ndjson in
163
-
164
-
Gc.full_major ();
165
-
let start = Unix.gettimeofday () in
166
-
let total_docs = ref 0 in
167
-
168
-
for _ = 1 to iterations do
169
-
let seq = Yojson.Safe.seq_from_string ndjson in
170
-
let count = Seq.fold_left (fun acc _ -> acc + 1) 0 seq in
171
-
total_docs := count
172
-
done;
173
-
174
-
let elapsed = Unix.gettimeofday () -. start in
175
-
let bytes_processed = total_bytes * iterations in
176
-
let throughput_mb_s =
177
-
float_of_int bytes_processed /. elapsed /. 1_000_000.0
178
-
in
179
-
let docs_per_s = float_of_int (!total_docs * iterations) /. elapsed in
180
-
181
-
{
182
-
name = "yojson_seq";
183
-
total_bytes;
184
-
num_docs = !total_docs;
185
-
elapsed_s = elapsed;
186
-
throughput_mb_s;
187
-
docs_per_s;
188
-
}
189
-
190
126
let print_comparison results =
191
127
match results with
192
128
| [] -> ()
···
215
151
216
152
let r1 = bench_simdjson_stream ndjson ~iterations in
217
153
let r2 = bench_simdjson_stream_decode ndjson ~iterations in
218
-
let r3 = bench_yojson_seq ndjson ~iterations in
219
-
let r4 = bench_yojson_linebyline ndjson ~iterations in
220
154
221
-
print_comparison [ r1; r2; r3; r4 ]
155
+
print_comparison [ r1; r2 ]
222
156
223
157
let () =
224
158
Printf.printf "NDJSON Streaming Benchmark\n";
+5
-4
bench/dune
+5
-4
bench/dune
+20
-19
dune-project
+20
-19
dune-project
···
1
-
(lang dune 3.0)
1
+
(lang dune 3.20)
2
2
3
3
(name simdjsont)
4
+
(version 0.1.0)
4
5
5
6
(generate_opam_files true)
6
7
7
8
(source
8
-
(github username/simdjsont))
9
+
; (tangled @gdiazlo.tngl.sh/simdjsont)
10
+
(uri https://tangled.org/gdiazlo.tngl.sh/simdjsont))
9
11
10
-
(authors "Author Name")
12
+
(authors "Gabriel Díaz")
11
13
12
-
(maintainers "Author Name")
14
+
(maintainers "Gabriel Díaz")
13
15
14
16
(license ISC)
15
17
16
-
(documentation https://username.github.io/simdjsont)
18
+
(documentation https://tangled.org/gdiazlo.tngl.sh/simdjsont)
19
+
20
+
(package
21
+
(name simdjsont)
22
+
(synopsis "JSON parsing with simdjson, with support for ndjson streaming.")
23
+
(description
24
+
"OCaml bindings to simdjson with support for ndjson streaming. Includes vendored simdjson 4.2.4.")
25
+
(depends
26
+
(ocaml (>= 5.4.0))
27
+
integers
28
+
(alcotest :with-test)
17
29
18
-
(package
19
-
(name simdjsont)
20
-
(synopsis "Fast JSON parsing with simdjson and jsont-style codec API")
21
-
(description
22
-
"OCaml bindings to simdjson with a jsont-style API for encoding/decoding. Includes vendored simdjson 4.2.4.")
23
-
(depends
24
-
(ocaml (>= 4.14))
25
-
(dune (>= 3.0))
26
-
integers
27
-
(alcotest :with-test)
28
-
(yojson :with-test)
29
-
(bechamel :with-test))
30
-
(tags
31
-
(json simd parsing performance codec)))
30
+
(odoc :with-doc))
31
+
(tags
32
+
(json simd parsing performance codec)))
+7
-8
lib/dune
+7
-8
lib/dune
···
6
6
(language cxx)
7
7
(names simdjsont_stubs simdjsont_impl)
8
8
(extra_deps simdjson_vendor.cc simdjson_vendor.hh)
9
-
(flags
10
-
:standard
11
-
-std=c++17
12
-
-O3
13
-
-DNDEBUG
14
-
-march=x86-64-v3
15
-
-Wno-cast-function-type
16
-
-fPIC))
9
+
(flags
10
+
:standard
11
+
-std=c++17
12
+
-O3
13
+
-DNDEBUG
14
+
-Wno-cast-function-type
15
+
-fPIC))
17
16
(c_library_flags -lstdc++))
+1
-3
lib/simdjsont.ml
+1
-3
lib/simdjsont.ml
···
71
71
match Raw.parse_string parser json with
72
72
| Ok root -> (
73
73
match Raw.at_pointer root pointer with
74
-
| Ok elt -> (
75
-
try Ok (codec.decode [] elt)
76
-
with Codec.Decode_error e -> Error (Codec.error_to_string e))
74
+
| Ok elt -> Codec.decode_element codec elt
77
75
| Error e -> Error e.Raw.message)
78
76
| Error e -> Error e.Raw.message
79
77
end
+62
lib/simdjsont.mli
+62
lib/simdjsont.mli
···
1
+
(** High-level API.
2
+
3
+
This module provides convenience functions for:
4
+
5
+
- validating JSON strings ([Validate])
6
+
- extracting values from JSON strings by pointer ([Extract])
7
+
- encoding/decoding typed values via codecs ([Codec], [Decode], [Encode])
8
+
9
+
For low-level access to the underlying simdjson parser and elements, see
10
+
{!Raw}. *)
11
+
1
12
module Json = Simdjsont_json
13
+
(** JSON value representation used by this library. *)
14
+
2
15
module Codec = Simdjsont_codec
16
+
(** Codecs used to decode/encode typed values. *)
17
+
3
18
module Raw = Simdjsont_raw
19
+
(** Low-level bindings (parsers, elements, arrays/objects, streaming). *)
4
20
5
21
module Validate : sig
22
+
(** JSON validity checks. *)
23
+
6
24
val is_valid : string -> bool
25
+
(** [is_valid json] returns [true] if [json] is valid JSON. *)
26
+
7
27
val check : string -> (unit, string) result
28
+
(** [check json] returns [Ok ()] if [json] is valid JSON, otherwise
29
+
[Error msg]. *)
8
30
end
9
31
10
32
module Extract : sig
33
+
(** Extract values from a JSON string using a JSON pointer.
34
+
35
+
The [~pointer] argument is a string. *)
36
+
11
37
val string : string -> pointer:string -> (string, string) result
38
+
(** [string json ~pointer] extracts the value at [pointer] and decodes it as a
39
+
string. *)
40
+
12
41
val int : string -> pointer:string -> (int, string) result
42
+
(** [int json ~pointer] extracts the value at [pointer] and decodes it as an
43
+
[int]. *)
44
+
13
45
val int64 : string -> pointer:string -> (int64, string) result
46
+
(** [int64 json ~pointer] extracts the value at [pointer] and decodes it as an
47
+
[int64]. *)
48
+
14
49
val float : string -> pointer:string -> (float, string) result
50
+
(** [float json ~pointer] extracts the value at [pointer] and decodes it as a
51
+
[float]. *)
52
+
15
53
val bool : string -> pointer:string -> (bool, string) result
54
+
(** [bool json ~pointer] extracts the value at [pointer] and decodes it as a
55
+
[bool]. *)
56
+
16
57
val is_null : string -> pointer:string -> (bool, string) result
58
+
(** [is_null json ~pointer] checks whether the value at [pointer] is [null].
59
+
*)
60
+
17
61
val at : 'a Codec.t -> string -> pointer:string -> ('a, string) result
62
+
(** [at codec json ~pointer] extracts the value at [pointer] and decodes it
63
+
using [codec]. *)
18
64
end
19
65
20
66
module Decode : sig
67
+
(** Codecs and decoding functions.
68
+
69
+
This module re-exports the {!Codec} interface. *)
70
+
21
71
include module type of Codec
22
72
end
23
73
24
74
module Encode : sig
75
+
(** Encoding using a codec. *)
76
+
25
77
val to_string : 'a Codec.t -> 'a -> string
78
+
(** [to_string codec v] encodes [v] using [codec] and returns a JSON string.
79
+
*)
80
+
26
81
val to_buffer : 'a Codec.t -> 'a -> Buffer.t
82
+
(** [to_buffer codec v] encodes [v] using [codec] and returns a fresh buffer
83
+
containing the JSON representation. *)
27
84
end
28
85
29
86
val validate : string -> bool
87
+
(** Convenience alias for {!Validate.is_valid}. *)
88
+
30
89
val decode : 'a Codec.t -> string -> ('a, string) result
90
+
(** Convenience alias for {!Codec.decode_string}. *)
91
+
31
92
val encode : 'a Codec.t -> 'a -> string
93
+
(** Convenience alias for {!Codec.encode_string}. *)
+5
-4
lib/simdjsont_codec.ml
+5
-4
lib/simdjsont_codec.ml
···
14
14
type encoder = Buffer.t -> unit
15
15
type 'a t = { decode : 'a decoder; encode : 'a -> encoder }
16
16
17
+
let decode_element codec elt =
18
+
try Ok (codec.decode [] elt)
19
+
with Decode_error e -> Error (error_to_string e)
20
+
17
21
let decode_string codec s =
18
22
let parser = Simdjsont_raw.create_parser () in
19
23
match Simdjsont_raw.parse_string parser s with
20
24
| Ok elt ->
21
-
let result =
22
-
try Ok (codec.decode [] elt)
23
-
with Decode_error e -> Error (error_to_string e)
24
-
in
25
+
let result = decode_element codec elt in
25
26
let _ = Sys.opaque_identity parser in
26
27
result
27
28
| Error e -> Error e.Simdjsont_raw.message
+60
-3
lib/simdjsont_codec.mli
+60
-3
lib/simdjsont_codec.mli
···
1
+
(** Codecs for decoding/encoding JSON.
2
+
3
+
A codec ['a t] contains:
4
+
5
+
- a decoder from {!Simdjsont.Raw.element} to ['a]
6
+
- an encoder from ['a] to JSON written into a buffer
7
+
8
+
See the {!Obj} submodule for building codecs for JSON objects. *)
9
+
1
10
type error = { path : string list; message : string }
11
+
(** Error value used by {!Decode_error}.
12
+
13
+
- [path] is a list of strings accumulated during decoding.
14
+
- [message] is a human-readable error message. *)
2
15
3
16
exception Decode_error of error
17
+
(** Exception raised by decoders. *)
4
18
5
19
val error_to_string : error -> string
20
+
(** Convert an {!error} to a string. *)
6
21
7
-
type 'a decoder = string list -> Simdjsont_raw.element -> 'a
8
-
type encoder = Buffer.t -> unit
9
-
type 'a t = { decode : 'a decoder; encode : 'a -> encoder }
22
+
type 'a t
23
+
(** A codec for values of type ['a]. *)
10
24
11
25
val decode_string : 'a t -> string -> ('a, string) result
26
+
(** Decode a JSON string using the given codec. *)
27
+
12
28
val decode_string_exn : 'a t -> string -> 'a
29
+
(** Like {!decode_string}, but raises on error. *)
30
+
31
+
val decode_element : 'a t -> Simdjsont_raw.element -> ('a, string) result
32
+
13
33
val encode_to_buffer : 'a t -> 'a -> Buffer.t
34
+
(** Encode a value to a fresh buffer using the given codec. *)
35
+
14
36
val encode_string : 'a t -> 'a -> string
37
+
(** Encode a value to a JSON string using the given codec. *)
38
+
15
39
val null : unit t
40
+
(** Codec for the JSON [null] value. *)
41
+
16
42
val bool : bool t
43
+
(** Codec for JSON booleans. *)
44
+
17
45
val int : int t
46
+
(** Codec for JSON integers mapped to OCaml [int]. *)
47
+
18
48
val int64 : int64 t
49
+
(** Codec for JSON integers mapped to OCaml [int64]. *)
50
+
19
51
val float : float t
52
+
(** Codec for JSON numbers mapped to OCaml [float]. *)
53
+
20
54
val string : string t
55
+
(** Codec for JSON strings. *)
56
+
21
57
val list : 'a t -> 'a list t
58
+
(** Codec for JSON arrays mapped to OCaml lists. *)
59
+
22
60
val array : 'a t -> 'a array t
61
+
(** Codec for JSON arrays mapped to OCaml arrays. *)
62
+
23
63
val optional : 'a t -> 'a option t
64
+
(** Codec for optional values. *)
65
+
24
66
val map : ('a -> 'b) -> ('b -> 'a) -> 'a t -> 'b t
67
+
(** Transform a codec using conversion functions. *)
25
68
26
69
module Obj : sig
70
+
(** Builder for JSON objects.
71
+
72
+
The builder API is driven by a constructor function provided to {!field},
73
+
then extended with fields using {!mem} and {!opt_mem}, and finalized with
74
+
{!finish}. *)
75
+
27
76
type ('o, 'dec) builder
77
+
(** Builder state for an object codec.
78
+
79
+
['o] is the resulting OCaml type. *)
28
80
29
81
val field : 'a -> ('o, 'a) builder
82
+
(** Start building an object codec from a constructor function. *)
30
83
31
84
val mem :
32
85
string ->
···
34
87
enc:('o -> 'a) ->
35
88
('o, 'a -> 'b) builder ->
36
89
('o, 'b) builder
90
+
(** Add a required object member. *)
37
91
38
92
val opt_mem :
39
93
string ->
···
41
95
enc:('o -> 'a option) ->
42
96
('o, 'a option -> 'b) builder ->
43
97
('o, 'b) builder
98
+
(** Add an optional object member. *)
44
99
45
100
val finish : ('o, 'o) builder -> 'o t
101
+
(** Finish building an object codec. *)
46
102
end
47
103
48
104
val value : Simdjsont_json.t t
105
+
(** Codec for {!Simdjsont.Json.t}. *)
-29
lib/simdjsont_json.ml
-29
lib/simdjsont_json.ml
···
169
169
let[@inline] write_key buf key =
170
170
write_string buf key;
171
171
Buffer.add_char buf ':'
172
-
173
-
type yojson =
174
-
[ `Null
175
-
| `Bool of bool
176
-
| `Int of int
177
-
| `Intlit of string
178
-
| `Float of float
179
-
| `String of string
180
-
| `List of yojson list
181
-
| `Assoc of (string * yojson) list ]
182
-
183
-
let rec to_yojson : t -> yojson = function
184
-
| Null -> `Null
185
-
| Bool b -> `Bool b
186
-
| Int i -> `Intlit (Int64.to_string i)
187
-
| Float f -> `Float f
188
-
| String s -> `String s
189
-
| Array items -> `List (List.map to_yojson items)
190
-
| Object pairs -> `Assoc (List.map (fun (k, v) -> (k, to_yojson v)) pairs)
191
-
192
-
let rec of_yojson : yojson -> t = function
193
-
| `Null -> Null
194
-
| `Bool b -> Bool b
195
-
| `Int n -> Int (Int64.of_int n)
196
-
| `Intlit s -> Int (Int64.of_string s)
197
-
| `Float f -> Float f
198
-
| `String s -> String s
199
-
| `List items -> Array (List.map of_yojson items)
200
-
| `Assoc pairs -> Object (List.map (fun (k, v) -> (k, of_yojson v)) pairs)
-13
lib/simdjsont_json.mli
-13
lib/simdjsont_json.mli
···
27
27
val write_object_sep : Buffer.t -> unit
28
28
val write_key_sep : Buffer.t -> unit
29
29
val write_key : Buffer.t -> string -> unit
30
-
31
-
type yojson =
32
-
[ `Null
33
-
| `Bool of bool
34
-
| `Int of int
35
-
| `Intlit of string
36
-
| `Float of float
37
-
| `String of string
38
-
| `List of yojson list
39
-
| `Assoc of (string * yojson) list ]
40
-
41
-
val to_yojson : t -> yojson
42
-
val of_yojson : yojson -> t
+121
lib/simdjsont_raw.mli
+121
lib/simdjsont_raw.mli
···
15
15
]} *)
16
16
17
17
type parser
18
+
(** Parser instance used by the underlying simdjson library. *)
19
+
18
20
type element
21
+
(** JSON value obtained from parsing.
22
+
23
+
See the lifetime note at the top of this module. *)
24
+
19
25
type array_
26
+
(** A JSON array view. *)
27
+
20
28
type object_
29
+
(** A JSON object view. *)
30
+
21
31
type array_iter
32
+
(** Iterator over an array. *)
33
+
22
34
type object_iter
35
+
(** Iterator over an object. *)
23
36
24
37
type buffer =
25
38
(char, Bigarray.int8_unsigned_elt, Bigarray.c_layout) Bigarray.Array1.t
39
+
(** Input buffer used for parsing.
40
+
41
+
The constant {!padding} describes the number of extra bytes required by the
42
+
underlying parser. *)
26
43
27
44
type element_type =
28
45
| Array
···
62
79
| Unexpected_error
63
80
64
81
type error = { code : error_code; message : string }
82
+
(** Error value returned by parsing and accessor functions. *)
65
83
66
84
exception Parse_error of error
85
+
(** Exception raised by [_exn] accessors. *)
67
86
68
87
val padding : int
88
+
(** Number of padding bytes required at the end of the input buffer. *)
89
+
69
90
val create_buffer : int -> buffer
91
+
(** Create a buffer of the given length (not including padding). *)
92
+
70
93
val buffer_of_string : string -> buffer
94
+
(** Copy a string into a newly allocated buffer. *)
95
+
71
96
val buffer_length : buffer -> int
97
+
(** Return the buffer length. *)
72
98
73
99
val buffer_blit_string :
74
100
string -> src_pos:int -> buffer -> dst_pos:int -> len:int -> unit
101
+
(** Copy a substring into a buffer. *)
75
102
76
103
val error_message : error_code -> string
104
+
(** Convert an {!error_code} to a message string. *)
105
+
77
106
val create_parser : unit -> parser
107
+
(** Create a new parser. *)
108
+
78
109
val free_parser : parser -> unit
110
+
(** Free a parser. *)
111
+
79
112
val parse : parser -> buffer -> len:int -> (element, error) result
113
+
(** Parse the first [len] bytes from a buffer. *)
114
+
80
115
val parse_string : parser -> string -> (element, error) result
116
+
(** Parse a JSON string. *)
117
+
81
118
val parse_file : parser -> string -> (element, error) result
119
+
(** Parse a file from a path. *)
120
+
82
121
val element_type : element -> element_type
122
+
(** Return the type of an element. *)
123
+
83
124
val get_bool : element -> (bool, error) result
125
+
(** Access a boolean value. *)
126
+
84
127
val get_int64 : element -> (int64, error) result
128
+
(** Access an [int64] value. *)
129
+
85
130
val get_uint64 : element -> (Unsigned.UInt64.t, error) result
131
+
(** Access an unsigned 64-bit integer value. *)
132
+
86
133
val get_double : element -> (float, error) result
134
+
(** Access a floating-point value. *)
135
+
87
136
val get_string : element -> (string, error) result
137
+
(** Access a string value. *)
138
+
88
139
val get_array : element -> (array_, error) result
140
+
(** Access an array value. *)
141
+
89
142
val get_object : element -> (object_, error) result
143
+
(** Access an object value. *)
144
+
90
145
val bool_exn : element -> bool
146
+
(** Like {!get_bool}, but raises {!Parse_error} on error. *)
147
+
91
148
val int64_exn : element -> int64
149
+
(** Like {!get_int64}, but raises {!Parse_error} on error. *)
150
+
92
151
val uint64_exn : element -> Unsigned.UInt64.t
152
+
(** Like {!get_uint64}, but raises {!Parse_error} on error. *)
153
+
93
154
val float_exn : element -> float
155
+
(** Like {!get_double}, but raises {!Parse_error} on error. *)
156
+
94
157
val string_exn : element -> string
158
+
(** Like {!get_string}, but raises {!Parse_error} on error. *)
159
+
95
160
val array_exn : element -> array_
161
+
(** Like {!get_array}, but raises {!Parse_error} on error. *)
162
+
96
163
val object_exn : element -> object_
164
+
(** Like {!get_object}, but raises {!Parse_error} on error. *)
165
+
97
166
val array_length : array_ -> int
167
+
(** Return the length of an array. *)
168
+
98
169
val array_to_seq : array_ -> element Seq.t
170
+
(** Convert an array to a sequence. *)
171
+
99
172
val array_to_list : array_ -> element list
173
+
(** Convert an array to a list. *)
174
+
100
175
val array_iter : (element -> unit) -> array_ -> unit
176
+
(** Iterate over an array. *)
177
+
101
178
val array_fold : ('a -> element -> 'a) -> 'a -> array_ -> 'a
179
+
(** Fold over an array. *)
180
+
102
181
val object_length : object_ -> int
182
+
(** Return the number of members in an object. *)
183
+
103
184
val object_find : object_ -> string -> (element, error) result
185
+
(** Find a member by key. *)
186
+
104
187
val object_find_opt : object_ -> string -> element option
188
+
(** Find a member by key, returning [None] if missing. *)
189
+
105
190
val object_to_seq : object_ -> (string * element) Seq.t
191
+
(** Convert an object to a sequence of key/value pairs. *)
192
+
106
193
val object_to_list : object_ -> (string * element) list
194
+
(** Convert an object to a list of key/value pairs. *)
195
+
107
196
val object_iter : (string -> element -> unit) -> object_ -> unit
197
+
(** Iterate over an object. *)
198
+
108
199
val object_fold : ('a -> string -> element -> 'a) -> 'a -> object_ -> 'a
200
+
(** Fold over an object. *)
201
+
109
202
val at_pointer : element -> string -> (element, error) result
203
+
(** Navigate within a JSON document using a pointer string. *)
204
+
110
205
val element_to_string : element -> string
206
+
(** Serialize an element to a JSON string. *)
207
+
111
208
val format_double : float -> string
209
+
(** Format a float as a JSON number. *)
112
210
113
211
module Stream : sig
212
+
(** Parse multiple JSON documents from a buffer.
213
+
214
+
See {!Simdjsont.Raw.Stream.create} and {!Simdjsont.Raw.Stream.next}. *)
215
+
114
216
type t
217
+
(** Stream state. *)
115
218
116
219
type next_result =
117
220
| End
118
221
| Doc of { element : element; byte_offset : int }
119
222
| Error of { error : error; byte_offset : int }
223
+
(** Result of {!next}.
224
+
225
+
- [byte_offset] is an offset into the stream input. *)
120
226
121
227
val default_batch_size : int
228
+
(** Default batch size. *)
122
229
123
230
val create :
124
231
?batch_size:int -> parser -> buffer -> len:int -> (t, error) result
232
+
(** Create a stream from a buffer. *)
125
233
126
234
val next : t -> next_result
235
+
(** Get the next document from the stream. *)
236
+
127
237
val truncated_bytes : t -> int
238
+
(** Return the number of bytes that did not form a full document. *)
239
+
128
240
val doc_index : t -> int
241
+
(** Return the current document index. *)
242
+
129
243
val is_finished : t -> bool
244
+
(** Return [true] if the stream is finished. *)
245
+
130
246
val size_in_bytes : t -> int
247
+
(** Return the input size in bytes. *)
248
+
131
249
val to_seq : t -> (element * int, error * int) result Seq.t
250
+
(** Convert the stream to a sequence.
251
+
252
+
The [int] in each [Ok]/[Error] value is a byte offset. *)
132
253
end
+10
-12
simdjsont.opam
+10
-12
simdjsont.opam
···
1
1
# This file is generated by dune, edit dune-project instead
2
2
opam-version: "2.0"
3
-
synopsis: "Fast JSON parsing with simdjson and jsont-style codec API"
3
+
version: "0.1.0"
4
+
synopsis: "JSON parsing with simdjson, with support for ndjson streaming."
4
5
description:
5
-
"OCaml bindings to simdjson with a jsont-style API for encoding/decoding. Includes vendored simdjson 4.2.4."
6
-
maintainer: ["Author Name"]
7
-
authors: ["Author Name"]
6
+
"OCaml bindings to simdjson with support for ndjson streaming. Includes vendored simdjson 4.2.4."
7
+
maintainer: ["Gabriel Díaz"]
8
+
authors: ["Gabriel Díaz"]
8
9
license: "ISC"
9
10
tags: ["json" "simd" "parsing" "performance" "codec"]
10
-
homepage: "https://github.com/username/simdjsont"
11
-
doc: "https://username.github.io/simdjsont"
12
-
bug-reports: "https://github.com/username/simdjsont/issues"
11
+
doc: "https://tangled.org/gdiazlo.tngl.sh/simdjsont"
13
12
depends: [
14
-
"ocaml" {>= "4.14"}
15
-
"dune" {>= "3.0" & >= "3.0"}
13
+
"dune" {>= "3.20"}
14
+
"ocaml" {>= "5.4.0"}
16
15
"integers"
17
16
"alcotest" {with-test}
18
-
"yojson" {with-test}
19
-
"bechamel" {with-test}
20
17
"odoc" {with-doc}
21
18
]
22
19
build: [
···
33
30
"@doc" {with-doc}
34
31
]
35
32
]
36
-
dev-repo: "git+https://github.com/username/simdjsont.git"
33
+
dev-repo: "https://tangled.org/gdiazlo.tngl.sh/simdjsont"
34
+
x-maintenance-intent: ["(latest)"]
-19
test/test_readme.ml
-19
test/test_readme.ml
···
144
144
assert (s = {|{"key":[1,2,3]}|})
145
145
| Error _ -> assert false
146
146
147
-
let test_yojson_compat () =
148
-
let v =
149
-
Simdjsont.Decode.decode_string Simdjsont.Decode.value
150
-
{|{"name": "test", "value": 123}|}
151
-
in
152
-
match v with
153
-
| Ok json ->
154
-
let yojson = Simdjsont.Json.to_yojson json in
155
-
let name =
156
-
match yojson with
157
-
| `Assoc fields -> List.assoc_opt "name" fields
158
-
| _ -> None
159
-
in
160
-
assert (name = Some (`String "test"));
161
-
let back = Simdjsont.Json.of_yojson yojson in
162
-
assert (Simdjsont.Json.to_string back = Simdjsont.Json.to_string json)
163
-
| Error _ -> assert false
164
-
165
147
let test_low_level () =
166
148
let open Simdjsont.Raw in
167
149
let parser = create_parser () in
···
202
184
("extract with codec", `Quick, test_extract_with_codec);
203
185
("dynamic json", `Quick, test_dynamic_json);
204
186
("dynamic decode", `Quick, test_dynamic_decode);
205
-
("yojson compat", `Quick, test_yojson_compat);
206
187
("low level", `Quick, test_low_level);
207
188
] );
208
189
]