simdjson bindings with streaming support

Release 0.1.0

+15
LICENSE
··· 1 + ISC License 2 + 3 + Copyright (c) 2026 Gabriel Díaz López de la Llave 4 + 5 + Permission to use, copy, modify, and/or distribute this software for any 6 + purpose with or without fee is hereby granted, provided that the above 7 + copyright notice and this permission notice appear in all copies. 8 + 9 + THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH 10 + REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY 11 + AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, 12 + INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM 13 + LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR 14 + OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR 15 + PERFORMANCE OF THIS SOFTWARE.
+112 -13
README.md
··· 6 6 7 7 ## Installation 8 8 9 + ### System requirements 10 + 11 + This package builds vendored C++ code (simdjson) via Dune `foreign_stubs`, so you need a **C++17-capable compiler toolchain** available on your system. 12 + 13 + ### Opam 14 + 9 15 ``` 10 16 opam install simdjsont 11 17 ``` ··· 51 57 ### Extracting Values 52 58 53 59 Use JSON Pointer (RFC 6901) syntax to extract values without parsing the entire document: 60 + 61 + ### NDJSON / streaming 62 + 63 + `simdjsont` can parse **newline-delimited JSON (NDJSON)** (and more generally, concatenated JSON documents) using the low-level streaming API `Simdjsont.Raw.Stream`. 64 + 65 + This is useful when you want to process large streams of JSON documents (logs, events, data pipelines) without splitting the input yourself. 66 + 67 + If you want to decode each streamed document into an OCaml value, use `Simdjsont.Codec.decode_element` with your codec (this keeps decoding in the high-level API while parsing stays streaming/zero-copy). 68 + 69 + ```ocaml 70 + let ndjson = "{\"id\":1}\n{\"id\":2}\n{broken\n{\"id\":3}\n" in 71 + 72 + let parser = Simdjsont.Raw.create_parser () in 73 + let buf = Simdjsont.Raw.buffer_of_string ndjson in 74 + let len = String.length ndjson in 75 + 76 + match Simdjsont.Raw.Stream.create parser buf ~len with 77 + | Error e -> Printf.eprintf "Failed to create stream: %s\n" e.message 78 + | Ok stream -> 79 + Simdjsont.Raw.Stream.to_seq stream 80 + |> Seq.iter (function 81 + | Ok (elt, byte_offset) -> 82 + Printf.printf "Doc at %d: %s\n" byte_offset 83 + (Simdjsont.Raw.element_to_string elt) 84 + | Error (e, byte_offset) -> 85 + Printf.printf "Parse error at %d: %s\n" byte_offset e.message); 86 + 87 + let truncated = Simdjsont.Raw.Stream.truncated_bytes stream in 88 + if truncated > 0 then 89 + Printf.printf "Warning: %d truncated bytes at end of stream\n" truncated 90 + ``` 91 + 92 + Decode streamed documents to a typed value using a codec: 93 + 94 + ```ocaml 95 + type event = { id : int; name : string } 96 + 97 + let event_codec = 98 + let open Simdjsont.Decode in 99 + Obj.field (fun id name -> { id; name }) 100 + |> Obj.mem "id" int ~enc:(fun e -> e.id) 101 + |> Obj.mem "name" string ~enc:(fun e -> e.name) 102 + |> Obj.finish 103 + 104 + match Simdjsont.Raw.Stream.create parser buf ~len with 105 + | Error e -> Printf.eprintf "Failed to create stream: %s\n" e.message 106 + | Ok stream -> 107 + Simdjsont.Raw.Stream.to_seq stream 108 + |> Seq.iter (function 109 + | Ok (elt, byte_offset) -> ( 110 + match Simdjsont.Codec.decode_element event_codec elt with 111 + | Ok event -> 112 + Printf.printf "Event at %d: id=%d name=%s\n" byte_offset event.id 113 + event.name 114 + | Error msg -> 115 + Printf.printf "Decode error at %d: %s\n" byte_offset msg) 116 + | Error (e, byte_offset) -> 117 + Printf.printf "Parse error at %d: %s\n" byte_offset e.message) 118 + ``` 119 + 120 + **Lifetime note:** values returned from `Simdjsont.Raw.Stream` reference memory owned by the parser/buffer. Keep the `parser` and `buf` alive for the whole iteration, and dont reuse the parser for other parses while you still hold `element`s. 54 121 55 122 ```ocaml 56 123 let json = {|{ ··· 258 325 | Error e -> print_endline e 259 326 ``` 260 327 261 - ### Yojson Compatibility 328 + #### Converting to/from Yojson (example) 262 329 263 - Convert between `Simdjsont.Json.t` and Yojson-compatible polymorphic variants: 330 + `simdjsont` does not depend on `yojson`, but you can convert values when you need interoperability. 264 331 265 332 ```ocaml 333 + (* Requires adding `yojson` to your own project dependencies. *) 334 + 335 + let rec simdjsont_to_yojson (v : Simdjsont.Json.t) : Yojson.Safe.t = 336 + match v with 337 + | Null -> `Null 338 + | Bool b -> `Bool b 339 + | Int i -> `Intlit (Int64.to_string i) 340 + | Float f -> `Float f 341 + | String s -> `String s 342 + | Array xs -> `List (List.map simdjsont_to_yojson xs) 343 + | Object fields -> 344 + `Assoc 345 + (List.map (fun (k, x) -> (k, simdjsont_to_yojson x)) fields) 346 + 347 + let rec yojson_to_simdjsont (v : Yojson.Safe.t) : Simdjsont.Json.t = 348 + match v with 349 + | `Null -> Null 350 + | `Bool b -> Bool b 351 + | `Int n -> Int (Int64.of_int n) 352 + | `Intlit s -> Int (Int64.of_string s) 353 + | `Float f -> Float f 354 + | `String s -> String s 355 + | `List xs -> Array (List.map yojson_to_simdjsont xs) 356 + | `Assoc fields -> 357 + Object (List.map (fun (k, x) -> (k, yojson_to_simdjsont x)) fields) 358 + | `Tuple xs -> Array (List.map yojson_to_simdjsont xs) 359 + | `Variant (tag, arg_opt) -> 360 + Object 361 + [ 362 + ( "variant", 363 + Object 364 + ( 365 + [ ("tag", String tag) ] 366 + @ 367 + match arg_opt with 368 + | None -> [] 369 + | Some a -> [ ("arg", yojson_to_simdjsont a) ] ) ); 370 + ] 371 + 266 372 let () = 267 373 match Simdjsont.Decode.decode_string Simdjsont.Decode.value {|{"name": "test", "value": 123}|} with 268 374 | Ok json -> 269 - let yojson = Simdjsont.Json.to_yojson json in 270 - let name = match yojson with 271 - | `Assoc fields -> List.assoc_opt "name" fields 272 - | _ -> None 273 - in 274 - (match name with 275 - | Some (`String s) -> print_endline s 276 - | _ -> ()); 277 - let back = Simdjsont.Json.of_yojson yojson in 278 - print_endline (Simdjsont.Json.to_string back) 279 - | Error e -> print_endline e 375 + let yojson = simdjsont_to_yojson json in 376 + let back = yojson_to_simdjsont yojson in 377 + assert (Simdjsont.Json.to_string back = Simdjsont.Json.to_string json) 378 + | Error e -> failwith e 280 379 ``` 281 380 282 381 ### Low-Level API
+16 -23
bench/bench_json.ml
··· 27 27 time_it "simdjsont" iterations (fun () -> 28 28 let _ = Simdjsont.Validate.is_valid json in 29 29 ()); 30 - time_it "yojson" iterations (fun () -> 31 - let _ = Yojson.Safe.from_string json in 30 + time_it "baseline" iterations (fun () -> 31 + let _ = Simdjsont.Validate.is_valid json in 32 32 ()); 33 33 ] 34 34 in 35 - let baseline = List.assoc "yojson" validate_results in 35 + let baseline = List.assoc "baseline" validate_results in 36 36 List.iter 37 37 (fun (n, ns) -> 38 38 Printf.printf " %-12s %10.0f ns (%.1fx)\n" n ns (baseline /. ns)) ··· 44 44 time_it "simdjsont" iterations (fun () -> 45 45 let _ = Simdjsont.Extract.int64 json ~pointer:"/statuses/0/id" in 46 46 ()); 47 - time_it "yojson" iterations (fun () -> 48 - let v = Yojson.Safe.from_string json in 49 - match v with 50 - | `Assoc fields -> ( 51 - match List.assoc_opt "statuses" fields with 52 - | Some (`List (first :: _)) -> ( 53 - match first with 54 - | `Assoc f -> ignore (List.assoc_opt "id" f) 55 - | _ -> ()) 56 - | _ -> ()) 57 - | _ -> ()); 47 + time_it "baseline" iterations (fun () -> 48 + let _ = Simdjsont.Extract.int64 json ~pointer:"/statuses/0/id" in 49 + ()); 58 50 ] 59 51 in 60 - let baseline = List.assoc "yojson" extract_results in 52 + let baseline = List.assoc "baseline" extract_results in 61 53 List.iter 62 54 (fun (n, ns) -> 63 55 Printf.printf " %-12s %10.0f ns (%.1fx)\n" n ns (baseline /. ns)) ··· 69 61 time_it "simdjsont" iterations (fun () -> 70 62 let _ = Simdjsont.Codec.decode_string Simdjsont.Codec.value json in 71 63 ()); 72 - time_it "yojson" iterations (fun () -> 73 - let _ = Yojson.Safe.from_string json in 64 + time_it "baseline" iterations (fun () -> 65 + let _ = Simdjsont.Codec.decode_string Simdjsont.Codec.value json in 74 66 ()); 75 67 ] 76 68 in 77 - let baseline = List.assoc "yojson" decode_results in 69 + let baseline = List.assoc "baseline" decode_results in 78 70 List.iter 79 71 (fun (n, ns) -> 80 72 Printf.printf " %-12s %10.0f ns (%.1fx)\n" n ns (baseline /. ns)) ··· 86 78 | Ok v -> v 87 79 | Error e -> failwith e 88 80 in 89 - let yojson_data = Yojson.Safe.from_string json in 90 81 let encode_results = 91 82 [ 92 83 time_it "simdjsont" iterations (fun () -> ··· 94 85 Simdjsont.Codec.encode_string Simdjsont.Codec.value simdjson_data 95 86 in 96 87 ()); 97 - time_it "yojson" iterations (fun () -> 98 - let _ = Yojson.Safe.to_string yojson_data in 88 + time_it "baseline" iterations (fun () -> 89 + let _ = 90 + Simdjsont.Codec.encode_string Simdjsont.Codec.value simdjson_data 91 + in 99 92 ()); 100 93 ] 101 94 in 102 - let baseline = List.assoc "yojson" encode_results in 95 + let baseline = List.assoc "baseline" encode_results in 103 96 List.iter 104 97 (fun (n, ns) -> 105 98 Printf.printf " %-12s %10.0f ns (%.1fx)\n" n ns (baseline /. ns)) 106 99 encode_results 107 100 108 101 let () = 109 - Printf.printf "simdjsont vs yojson benchmarks\n"; 102 + Printf.printf "simdjsont benchmarks\n"; 110 103 Printf.printf "==============================\n"; 111 104 run_use_case_benchmarks "twitter.json" (Lazy.force twitter_json) 50; 112 105 run_use_case_benchmarks "citm_catalog.json" (Lazy.force citm_json) 20
+1 -67
bench/bench_ndjson.ml
··· 123 123 docs_per_s; 124 124 } 125 125 126 - let bench_yojson_linebyline ndjson ~iterations = 127 - let total_bytes = String.length ndjson in 128 - let lines = 129 - String.split_on_char '\n' ndjson 130 - |> List.filter (fun s -> String.length s > 0) 131 - in 132 - let num_docs = List.length lines in 133 - 134 - Gc.full_major (); 135 - let start = Unix.gettimeofday () in 136 - 137 - for _ = 1 to iterations do 138 - List.iter 139 - (fun line -> 140 - let _ = Yojson.Safe.from_string line in 141 - ()) 142 - lines 143 - done; 144 - 145 - let elapsed = Unix.gettimeofday () -. start in 146 - let bytes_processed = total_bytes * iterations in 147 - let throughput_mb_s = 148 - float_of_int bytes_processed /. elapsed /. 1_000_000.0 149 - in 150 - let docs_per_s = float_of_int (num_docs * iterations) /. elapsed in 151 - 152 - { 153 - name = "yojson_linebyline"; 154 - total_bytes; 155 - num_docs; 156 - elapsed_s = elapsed; 157 - throughput_mb_s; 158 - docs_per_s; 159 - } 160 - 161 - let bench_yojson_seq ndjson ~iterations = 162 - let total_bytes = String.length ndjson in 163 - 164 - Gc.full_major (); 165 - let start = Unix.gettimeofday () in 166 - let total_docs = ref 0 in 167 - 168 - for _ = 1 to iterations do 169 - let seq = Yojson.Safe.seq_from_string ndjson in 170 - let count = Seq.fold_left (fun acc _ -> acc + 1) 0 seq in 171 - total_docs := count 172 - done; 173 - 174 - let elapsed = Unix.gettimeofday () -. start in 175 - let bytes_processed = total_bytes * iterations in 176 - let throughput_mb_s = 177 - float_of_int bytes_processed /. elapsed /. 1_000_000.0 178 - in 179 - let docs_per_s = float_of_int (!total_docs * iterations) /. elapsed in 180 - 181 - { 182 - name = "yojson_seq"; 183 - total_bytes; 184 - num_docs = !total_docs; 185 - elapsed_s = elapsed; 186 - throughput_mb_s; 187 - docs_per_s; 188 - } 189 - 190 126 let print_comparison results = 191 127 match results with 192 128 | [] -> () ··· 215 151 216 152 let r1 = bench_simdjson_stream ndjson ~iterations in 217 153 let r2 = bench_simdjson_stream_decode ndjson ~iterations in 218 - let r3 = bench_yojson_seq ndjson ~iterations in 219 - let r4 = bench_yojson_linebyline ndjson ~iterations in 220 154 221 - print_comparison [ r1; r2; r3; r4 ] 155 + print_comparison [ r1; r2 ] 222 156 223 157 let () = 224 158 Printf.printf "NDJSON Streaming Benchmark\n";
+5 -4
bench/dune
··· 1 - (executable 2 - (name bench_json) 3 - (libraries simdjsont yojson bechamel unix)) 1 + (executable 2 + (name bench_json) 3 + (libraries simdjsont unix)) 4 + 4 5 5 6 (executable 6 7 (name bench_ndjson) 7 - (libraries simdjsont yojson unix)) 8 + (libraries simdjsont unix))
+20 -19
dune-project
··· 1 - (lang dune 3.0) 1 + (lang dune 3.20) 2 2 3 3 (name simdjsont) 4 + (version 0.1.0) 4 5 5 6 (generate_opam_files true) 6 7 7 8 (source 8 - (github username/simdjsont)) 9 + ; (tangled @gdiazlo.tngl.sh/simdjsont) 10 + (uri https://tangled.org/gdiazlo.tngl.sh/simdjsont)) 9 11 10 - (authors "Author Name") 12 + (authors "Gabriel Díaz") 11 13 12 - (maintainers "Author Name") 14 + (maintainers "Gabriel Díaz") 13 15 14 16 (license ISC) 15 17 16 - (documentation https://username.github.io/simdjsont) 18 + (documentation https://tangled.org/gdiazlo.tngl.sh/simdjsont) 19 + 20 + (package 21 + (name simdjsont) 22 + (synopsis "JSON parsing with simdjson, with support for ndjson streaming.") 23 + (description 24 + "OCaml bindings to simdjson with support for ndjson streaming. Includes vendored simdjson 4.2.4.") 25 + (depends 26 + (ocaml (>= 5.4.0)) 27 + integers 28 + (alcotest :with-test) 17 29 18 - (package 19 - (name simdjsont) 20 - (synopsis "Fast JSON parsing with simdjson and jsont-style codec API") 21 - (description 22 - "OCaml bindings to simdjson with a jsont-style API for encoding/decoding. Includes vendored simdjson 4.2.4.") 23 - (depends 24 - (ocaml (>= 4.14)) 25 - (dune (>= 3.0)) 26 - integers 27 - (alcotest :with-test) 28 - (yojson :with-test) 29 - (bechamel :with-test)) 30 - (tags 31 - (json simd parsing performance codec))) 30 + (odoc :with-doc)) 31 + (tags 32 + (json simd parsing performance codec)))
+7 -8
lib/dune
··· 6 6 (language cxx) 7 7 (names simdjsont_stubs simdjsont_impl) 8 8 (extra_deps simdjson_vendor.cc simdjson_vendor.hh) 9 - (flags 10 - :standard 11 - -std=c++17 12 - -O3 13 - -DNDEBUG 14 - -march=x86-64-v3 15 - -Wno-cast-function-type 16 - -fPIC)) 9 + (flags 10 + :standard 11 + -std=c++17 12 + -O3 13 + -DNDEBUG 14 + -Wno-cast-function-type 15 + -fPIC)) 17 16 (c_library_flags -lstdc++))
+1 -3
lib/simdjsont.ml
··· 71 71 match Raw.parse_string parser json with 72 72 | Ok root -> ( 73 73 match Raw.at_pointer root pointer with 74 - | Ok elt -> ( 75 - try Ok (codec.decode [] elt) 76 - with Codec.Decode_error e -> Error (Codec.error_to_string e)) 74 + | Ok elt -> Codec.decode_element codec elt 77 75 | Error e -> Error e.Raw.message) 78 76 | Error e -> Error e.Raw.message 79 77 end
+62
lib/simdjsont.mli
··· 1 + (** High-level API. 2 + 3 + This module provides convenience functions for: 4 + 5 + - validating JSON strings ([Validate]) 6 + - extracting values from JSON strings by pointer ([Extract]) 7 + - encoding/decoding typed values via codecs ([Codec], [Decode], [Encode]) 8 + 9 + For low-level access to the underlying simdjson parser and elements, see 10 + {!Raw}. *) 11 + 1 12 module Json = Simdjsont_json 13 + (** JSON value representation used by this library. *) 14 + 2 15 module Codec = Simdjsont_codec 16 + (** Codecs used to decode/encode typed values. *) 17 + 3 18 module Raw = Simdjsont_raw 19 + (** Low-level bindings (parsers, elements, arrays/objects, streaming). *) 4 20 5 21 module Validate : sig 22 + (** JSON validity checks. *) 23 + 6 24 val is_valid : string -> bool 25 + (** [is_valid json] returns [true] if [json] is valid JSON. *) 26 + 7 27 val check : string -> (unit, string) result 28 + (** [check json] returns [Ok ()] if [json] is valid JSON, otherwise 29 + [Error msg]. *) 8 30 end 9 31 10 32 module Extract : sig 33 + (** Extract values from a JSON string using a JSON pointer. 34 + 35 + The [~pointer] argument is a string. *) 36 + 11 37 val string : string -> pointer:string -> (string, string) result 38 + (** [string json ~pointer] extracts the value at [pointer] and decodes it as a 39 + string. *) 40 + 12 41 val int : string -> pointer:string -> (int, string) result 42 + (** [int json ~pointer] extracts the value at [pointer] and decodes it as an 43 + [int]. *) 44 + 13 45 val int64 : string -> pointer:string -> (int64, string) result 46 + (** [int64 json ~pointer] extracts the value at [pointer] and decodes it as an 47 + [int64]. *) 48 + 14 49 val float : string -> pointer:string -> (float, string) result 50 + (** [float json ~pointer] extracts the value at [pointer] and decodes it as a 51 + [float]. *) 52 + 15 53 val bool : string -> pointer:string -> (bool, string) result 54 + (** [bool json ~pointer] extracts the value at [pointer] and decodes it as a 55 + [bool]. *) 56 + 16 57 val is_null : string -> pointer:string -> (bool, string) result 58 + (** [is_null json ~pointer] checks whether the value at [pointer] is [null]. 59 + *) 60 + 17 61 val at : 'a Codec.t -> string -> pointer:string -> ('a, string) result 62 + (** [at codec json ~pointer] extracts the value at [pointer] and decodes it 63 + using [codec]. *) 18 64 end 19 65 20 66 module Decode : sig 67 + (** Codecs and decoding functions. 68 + 69 + This module re-exports the {!Codec} interface. *) 70 + 21 71 include module type of Codec 22 72 end 23 73 24 74 module Encode : sig 75 + (** Encoding using a codec. *) 76 + 25 77 val to_string : 'a Codec.t -> 'a -> string 78 + (** [to_string codec v] encodes [v] using [codec] and returns a JSON string. 79 + *) 80 + 26 81 val to_buffer : 'a Codec.t -> 'a -> Buffer.t 82 + (** [to_buffer codec v] encodes [v] using [codec] and returns a fresh buffer 83 + containing the JSON representation. *) 27 84 end 28 85 29 86 val validate : string -> bool 87 + (** Convenience alias for {!Validate.is_valid}. *) 88 + 30 89 val decode : 'a Codec.t -> string -> ('a, string) result 90 + (** Convenience alias for {!Codec.decode_string}. *) 91 + 31 92 val encode : 'a Codec.t -> 'a -> string 93 + (** Convenience alias for {!Codec.encode_string}. *)
+5 -4
lib/simdjsont_codec.ml
··· 14 14 type encoder = Buffer.t -> unit 15 15 type 'a t = { decode : 'a decoder; encode : 'a -> encoder } 16 16 17 + let decode_element codec elt = 18 + try Ok (codec.decode [] elt) 19 + with Decode_error e -> Error (error_to_string e) 20 + 17 21 let decode_string codec s = 18 22 let parser = Simdjsont_raw.create_parser () in 19 23 match Simdjsont_raw.parse_string parser s with 20 24 | Ok elt -> 21 - let result = 22 - try Ok (codec.decode [] elt) 23 - with Decode_error e -> Error (error_to_string e) 24 - in 25 + let result = decode_element codec elt in 25 26 let _ = Sys.opaque_identity parser in 26 27 result 27 28 | Error e -> Error e.Simdjsont_raw.message
+60 -3
lib/simdjsont_codec.mli
··· 1 + (** Codecs for decoding/encoding JSON. 2 + 3 + A codec ['a t] contains: 4 + 5 + - a decoder from {!Simdjsont.Raw.element} to ['a] 6 + - an encoder from ['a] to JSON written into a buffer 7 + 8 + See the {!Obj} submodule for building codecs for JSON objects. *) 9 + 1 10 type error = { path : string list; message : string } 11 + (** Error value used by {!Decode_error}. 12 + 13 + - [path] is a list of strings accumulated during decoding. 14 + - [message] is a human-readable error message. *) 2 15 3 16 exception Decode_error of error 17 + (** Exception raised by decoders. *) 4 18 5 19 val error_to_string : error -> string 20 + (** Convert an {!error} to a string. *) 6 21 7 - type 'a decoder = string list -> Simdjsont_raw.element -> 'a 8 - type encoder = Buffer.t -> unit 9 - type 'a t = { decode : 'a decoder; encode : 'a -> encoder } 22 + type 'a t 23 + (** A codec for values of type ['a]. *) 10 24 11 25 val decode_string : 'a t -> string -> ('a, string) result 26 + (** Decode a JSON string using the given codec. *) 27 + 12 28 val decode_string_exn : 'a t -> string -> 'a 29 + (** Like {!decode_string}, but raises on error. *) 30 + 31 + val decode_element : 'a t -> Simdjsont_raw.element -> ('a, string) result 32 + 13 33 val encode_to_buffer : 'a t -> 'a -> Buffer.t 34 + (** Encode a value to a fresh buffer using the given codec. *) 35 + 14 36 val encode_string : 'a t -> 'a -> string 37 + (** Encode a value to a JSON string using the given codec. *) 38 + 15 39 val null : unit t 40 + (** Codec for the JSON [null] value. *) 41 + 16 42 val bool : bool t 43 + (** Codec for JSON booleans. *) 44 + 17 45 val int : int t 46 + (** Codec for JSON integers mapped to OCaml [int]. *) 47 + 18 48 val int64 : int64 t 49 + (** Codec for JSON integers mapped to OCaml [int64]. *) 50 + 19 51 val float : float t 52 + (** Codec for JSON numbers mapped to OCaml [float]. *) 53 + 20 54 val string : string t 55 + (** Codec for JSON strings. *) 56 + 21 57 val list : 'a t -> 'a list t 58 + (** Codec for JSON arrays mapped to OCaml lists. *) 59 + 22 60 val array : 'a t -> 'a array t 61 + (** Codec for JSON arrays mapped to OCaml arrays. *) 62 + 23 63 val optional : 'a t -> 'a option t 64 + (** Codec for optional values. *) 65 + 24 66 val map : ('a -> 'b) -> ('b -> 'a) -> 'a t -> 'b t 67 + (** Transform a codec using conversion functions. *) 25 68 26 69 module Obj : sig 70 + (** Builder for JSON objects. 71 + 72 + The builder API is driven by a constructor function provided to {!field}, 73 + then extended with fields using {!mem} and {!opt_mem}, and finalized with 74 + {!finish}. *) 75 + 27 76 type ('o, 'dec) builder 77 + (** Builder state for an object codec. 78 + 79 + ['o] is the resulting OCaml type. *) 28 80 29 81 val field : 'a -> ('o, 'a) builder 82 + (** Start building an object codec from a constructor function. *) 30 83 31 84 val mem : 32 85 string -> ··· 34 87 enc:('o -> 'a) -> 35 88 ('o, 'a -> 'b) builder -> 36 89 ('o, 'b) builder 90 + (** Add a required object member. *) 37 91 38 92 val opt_mem : 39 93 string -> ··· 41 95 enc:('o -> 'a option) -> 42 96 ('o, 'a option -> 'b) builder -> 43 97 ('o, 'b) builder 98 + (** Add an optional object member. *) 44 99 45 100 val finish : ('o, 'o) builder -> 'o t 101 + (** Finish building an object codec. *) 46 102 end 47 103 48 104 val value : Simdjsont_json.t t 105 + (** Codec for {!Simdjsont.Json.t}. *)
-29
lib/simdjsont_json.ml
··· 169 169 let[@inline] write_key buf key = 170 170 write_string buf key; 171 171 Buffer.add_char buf ':' 172 - 173 - type yojson = 174 - [ `Null 175 - | `Bool of bool 176 - | `Int of int 177 - | `Intlit of string 178 - | `Float of float 179 - | `String of string 180 - | `List of yojson list 181 - | `Assoc of (string * yojson) list ] 182 - 183 - let rec to_yojson : t -> yojson = function 184 - | Null -> `Null 185 - | Bool b -> `Bool b 186 - | Int i -> `Intlit (Int64.to_string i) 187 - | Float f -> `Float f 188 - | String s -> `String s 189 - | Array items -> `List (List.map to_yojson items) 190 - | Object pairs -> `Assoc (List.map (fun (k, v) -> (k, to_yojson v)) pairs) 191 - 192 - let rec of_yojson : yojson -> t = function 193 - | `Null -> Null 194 - | `Bool b -> Bool b 195 - | `Int n -> Int (Int64.of_int n) 196 - | `Intlit s -> Int (Int64.of_string s) 197 - | `Float f -> Float f 198 - | `String s -> String s 199 - | `List items -> Array (List.map of_yojson items) 200 - | `Assoc pairs -> Object (List.map (fun (k, v) -> (k, of_yojson v)) pairs)
-13
lib/simdjsont_json.mli
··· 27 27 val write_object_sep : Buffer.t -> unit 28 28 val write_key_sep : Buffer.t -> unit 29 29 val write_key : Buffer.t -> string -> unit 30 - 31 - type yojson = 32 - [ `Null 33 - | `Bool of bool 34 - | `Int of int 35 - | `Intlit of string 36 - | `Float of float 37 - | `String of string 38 - | `List of yojson list 39 - | `Assoc of (string * yojson) list ] 40 - 41 - val to_yojson : t -> yojson 42 - val of_yojson : yojson -> t
+121
lib/simdjsont_raw.mli
··· 15 15 ]} *) 16 16 17 17 type parser 18 + (** Parser instance used by the underlying simdjson library. *) 19 + 18 20 type element 21 + (** JSON value obtained from parsing. 22 + 23 + See the lifetime note at the top of this module. *) 24 + 19 25 type array_ 26 + (** A JSON array view. *) 27 + 20 28 type object_ 29 + (** A JSON object view. *) 30 + 21 31 type array_iter 32 + (** Iterator over an array. *) 33 + 22 34 type object_iter 35 + (** Iterator over an object. *) 23 36 24 37 type buffer = 25 38 (char, Bigarray.int8_unsigned_elt, Bigarray.c_layout) Bigarray.Array1.t 39 + (** Input buffer used for parsing. 40 + 41 + The constant {!padding} describes the number of extra bytes required by the 42 + underlying parser. *) 26 43 27 44 type element_type = 28 45 | Array ··· 62 79 | Unexpected_error 63 80 64 81 type error = { code : error_code; message : string } 82 + (** Error value returned by parsing and accessor functions. *) 65 83 66 84 exception Parse_error of error 85 + (** Exception raised by [_exn] accessors. *) 67 86 68 87 val padding : int 88 + (** Number of padding bytes required at the end of the input buffer. *) 89 + 69 90 val create_buffer : int -> buffer 91 + (** Create a buffer of the given length (not including padding). *) 92 + 70 93 val buffer_of_string : string -> buffer 94 + (** Copy a string into a newly allocated buffer. *) 95 + 71 96 val buffer_length : buffer -> int 97 + (** Return the buffer length. *) 72 98 73 99 val buffer_blit_string : 74 100 string -> src_pos:int -> buffer -> dst_pos:int -> len:int -> unit 101 + (** Copy a substring into a buffer. *) 75 102 76 103 val error_message : error_code -> string 104 + (** Convert an {!error_code} to a message string. *) 105 + 77 106 val create_parser : unit -> parser 107 + (** Create a new parser. *) 108 + 78 109 val free_parser : parser -> unit 110 + (** Free a parser. *) 111 + 79 112 val parse : parser -> buffer -> len:int -> (element, error) result 113 + (** Parse the first [len] bytes from a buffer. *) 114 + 80 115 val parse_string : parser -> string -> (element, error) result 116 + (** Parse a JSON string. *) 117 + 81 118 val parse_file : parser -> string -> (element, error) result 119 + (** Parse a file from a path. *) 120 + 82 121 val element_type : element -> element_type 122 + (** Return the type of an element. *) 123 + 83 124 val get_bool : element -> (bool, error) result 125 + (** Access a boolean value. *) 126 + 84 127 val get_int64 : element -> (int64, error) result 128 + (** Access an [int64] value. *) 129 + 85 130 val get_uint64 : element -> (Unsigned.UInt64.t, error) result 131 + (** Access an unsigned 64-bit integer value. *) 132 + 86 133 val get_double : element -> (float, error) result 134 + (** Access a floating-point value. *) 135 + 87 136 val get_string : element -> (string, error) result 137 + (** Access a string value. *) 138 + 88 139 val get_array : element -> (array_, error) result 140 + (** Access an array value. *) 141 + 89 142 val get_object : element -> (object_, error) result 143 + (** Access an object value. *) 144 + 90 145 val bool_exn : element -> bool 146 + (** Like {!get_bool}, but raises {!Parse_error} on error. *) 147 + 91 148 val int64_exn : element -> int64 149 + (** Like {!get_int64}, but raises {!Parse_error} on error. *) 150 + 92 151 val uint64_exn : element -> Unsigned.UInt64.t 152 + (** Like {!get_uint64}, but raises {!Parse_error} on error. *) 153 + 93 154 val float_exn : element -> float 155 + (** Like {!get_double}, but raises {!Parse_error} on error. *) 156 + 94 157 val string_exn : element -> string 158 + (** Like {!get_string}, but raises {!Parse_error} on error. *) 159 + 95 160 val array_exn : element -> array_ 161 + (** Like {!get_array}, but raises {!Parse_error} on error. *) 162 + 96 163 val object_exn : element -> object_ 164 + (** Like {!get_object}, but raises {!Parse_error} on error. *) 165 + 97 166 val array_length : array_ -> int 167 + (** Return the length of an array. *) 168 + 98 169 val array_to_seq : array_ -> element Seq.t 170 + (** Convert an array to a sequence. *) 171 + 99 172 val array_to_list : array_ -> element list 173 + (** Convert an array to a list. *) 174 + 100 175 val array_iter : (element -> unit) -> array_ -> unit 176 + (** Iterate over an array. *) 177 + 101 178 val array_fold : ('a -> element -> 'a) -> 'a -> array_ -> 'a 179 + (** Fold over an array. *) 180 + 102 181 val object_length : object_ -> int 182 + (** Return the number of members in an object. *) 183 + 103 184 val object_find : object_ -> string -> (element, error) result 185 + (** Find a member by key. *) 186 + 104 187 val object_find_opt : object_ -> string -> element option 188 + (** Find a member by key, returning [None] if missing. *) 189 + 105 190 val object_to_seq : object_ -> (string * element) Seq.t 191 + (** Convert an object to a sequence of key/value pairs. *) 192 + 106 193 val object_to_list : object_ -> (string * element) list 194 + (** Convert an object to a list of key/value pairs. *) 195 + 107 196 val object_iter : (string -> element -> unit) -> object_ -> unit 197 + (** Iterate over an object. *) 198 + 108 199 val object_fold : ('a -> string -> element -> 'a) -> 'a -> object_ -> 'a 200 + (** Fold over an object. *) 201 + 109 202 val at_pointer : element -> string -> (element, error) result 203 + (** Navigate within a JSON document using a pointer string. *) 204 + 110 205 val element_to_string : element -> string 206 + (** Serialize an element to a JSON string. *) 207 + 111 208 val format_double : float -> string 209 + (** Format a float as a JSON number. *) 112 210 113 211 module Stream : sig 212 + (** Parse multiple JSON documents from a buffer. 213 + 214 + See {!Simdjsont.Raw.Stream.create} and {!Simdjsont.Raw.Stream.next}. *) 215 + 114 216 type t 217 + (** Stream state. *) 115 218 116 219 type next_result = 117 220 | End 118 221 | Doc of { element : element; byte_offset : int } 119 222 | Error of { error : error; byte_offset : int } 223 + (** Result of {!next}. 224 + 225 + - [byte_offset] is an offset into the stream input. *) 120 226 121 227 val default_batch_size : int 228 + (** Default batch size. *) 122 229 123 230 val create : 124 231 ?batch_size:int -> parser -> buffer -> len:int -> (t, error) result 232 + (** Create a stream from a buffer. *) 125 233 126 234 val next : t -> next_result 235 + (** Get the next document from the stream. *) 236 + 127 237 val truncated_bytes : t -> int 238 + (** Return the number of bytes that did not form a full document. *) 239 + 128 240 val doc_index : t -> int 241 + (** Return the current document index. *) 242 + 129 243 val is_finished : t -> bool 244 + (** Return [true] if the stream is finished. *) 245 + 130 246 val size_in_bytes : t -> int 247 + (** Return the input size in bytes. *) 248 + 131 249 val to_seq : t -> (element * int, error * int) result Seq.t 250 + (** Convert the stream to a sequence. 251 + 252 + The [int] in each [Ok]/[Error] value is a byte offset. *) 132 253 end
+10 -12
simdjsont.opam
··· 1 1 # This file is generated by dune, edit dune-project instead 2 2 opam-version: "2.0" 3 - synopsis: "Fast JSON parsing with simdjson and jsont-style codec API" 3 + version: "0.1.0" 4 + synopsis: "JSON parsing with simdjson, with support for ndjson streaming." 4 5 description: 5 - "OCaml bindings to simdjson with a jsont-style API for encoding/decoding. Includes vendored simdjson 4.2.4." 6 - maintainer: ["Author Name"] 7 - authors: ["Author Name"] 6 + "OCaml bindings to simdjson with support for ndjson streaming. Includes vendored simdjson 4.2.4." 7 + maintainer: ["Gabriel Díaz"] 8 + authors: ["Gabriel Díaz"] 8 9 license: "ISC" 9 10 tags: ["json" "simd" "parsing" "performance" "codec"] 10 - homepage: "https://github.com/username/simdjsont" 11 - doc: "https://username.github.io/simdjsont" 12 - bug-reports: "https://github.com/username/simdjsont/issues" 11 + doc: "https://tangled.org/gdiazlo.tngl.sh/simdjsont" 13 12 depends: [ 14 - "ocaml" {>= "4.14"} 15 - "dune" {>= "3.0" & >= "3.0"} 13 + "dune" {>= "3.20"} 14 + "ocaml" {>= "5.4.0"} 16 15 "integers" 17 16 "alcotest" {with-test} 18 - "yojson" {with-test} 19 - "bechamel" {with-test} 20 17 "odoc" {with-doc} 21 18 ] 22 19 build: [ ··· 33 30 "@doc" {with-doc} 34 31 ] 35 32 ] 36 - dev-repo: "git+https://github.com/username/simdjsont.git" 33 + dev-repo: "https://tangled.org/gdiazlo.tngl.sh/simdjsont" 34 + x-maintenance-intent: ["(latest)"]
-19
test/test_readme.ml
··· 144 144 assert (s = {|{"key":[1,2,3]}|}) 145 145 | Error _ -> assert false 146 146 147 - let test_yojson_compat () = 148 - let v = 149 - Simdjsont.Decode.decode_string Simdjsont.Decode.value 150 - {|{"name": "test", "value": 123}|} 151 - in 152 - match v with 153 - | Ok json -> 154 - let yojson = Simdjsont.Json.to_yojson json in 155 - let name = 156 - match yojson with 157 - | `Assoc fields -> List.assoc_opt "name" fields 158 - | _ -> None 159 - in 160 - assert (name = Some (`String "test")); 161 - let back = Simdjsont.Json.of_yojson yojson in 162 - assert (Simdjsont.Json.to_string back = Simdjsont.Json.to_string json) 163 - | Error _ -> assert false 164 - 165 147 let test_low_level () = 166 148 let open Simdjsont.Raw in 167 149 let parser = create_parser () in ··· 202 184 ("extract with codec", `Quick, test_extract_with_codec); 203 185 ("dynamic json", `Quick, test_dynamic_json); 204 186 ("dynamic decode", `Quick, test_dynamic_decode); 205 - ("yojson compat", `Quick, test_yojson_compat); 206 187 ("low level", `Quick, test_low_level); 207 188 ] ); 208 189 ]