simdjsont#
OCaml bindings to simdjson, a JSON parser that uses SIMD instructions.
This library includes a vendored copy of simdjson 4.2.4, which is Copyright 2018-2025 The simdjson authors and licensed under the Apache License 2.0.
Installation#
System requirements#
This package builds vendored C++ code (simdjson) via Dune foreign_stubs, so you need a C++17-capable compiler toolchain available on your system.
Opam#
opam install simdjsont
Usage#
Quick Start#
(* Validate JSON *)
let () = assert (Simdjsont.Validate.is_valid {|{"name": "Alice", "age": 30}|})
(* Extract a value by path *)
let name = Simdjsont.Extract.string {|{"user": {"name": "Bob"}}|} ~pointer:"/user/name"
(* name = Ok "Bob" *)
(* Decode to a typed value *)
let numbers = Simdjsont.Decode.decode_string Simdjsont.Decode.(list int) "[1, 2, 3]"
(* numbers = Ok [1; 2; 3] *)
(* Encode to JSON string *)
let json = Simdjsont.Encode.to_string Simdjsont.Decode.(list string) ["a"; "b"; "c"]
(* json = {|["a","b","c"]|} *)
Validation#
(* Check if JSON is valid *)
let () =
if Simdjsont.Validate.is_valid "[1, 2, 3]" then
print_endline "Valid JSON"
else
print_endline "Invalid JSON"
(* Get error details for invalid JSON *)
let () =
match Simdjsont.Validate.check "{invalid" with
| Ok () -> print_endline "Valid"
| Error msg -> print_endline ("Invalid: " ^ msg)
Extracting Values#
Use JSON Pointer (RFC 6901) syntax to extract values without parsing the entire document:
NDJSON / streaming#
simdjsont can parse newline-delimited JSON (NDJSON) (and more generally, concatenated JSON documents) using the low-level streaming API Simdjsont.Raw.Stream.
This is useful when you want to process large streams of JSON documents (logs, events, data pipelines) without splitting the input yourself.
If you want to decode each streamed document into an OCaml value, use Simdjsont.Codec.decode_element with your codec (this keeps decoding in the high-level API while parsing stays streaming/zero-copy).
let ndjson = "{\"id\":1}\n{\"id\":2}\n{broken\n{\"id\":3}\n" in
let parser = Simdjsont.Raw.create_parser () in
let buf = Simdjsont.Raw.buffer_of_string ndjson in
let len = String.length ndjson in
match Simdjsont.Raw.Stream.create parser buf ~len with
| Error e -> Printf.eprintf "Failed to create stream: %s\n" e.message
| Ok stream ->
Simdjsont.Raw.Stream.to_seq stream
|> Seq.iter (function
| Ok (elt, byte_offset) ->
Printf.printf "Doc at %d: %s\n" byte_offset
(Simdjsont.Raw.element_to_string elt)
| Error (e, byte_offset) ->
Printf.printf "Parse error at %d: %s\n" byte_offset e.message);
let truncated = Simdjsont.Raw.Stream.truncated_bytes stream in
if truncated > 0 then
Printf.printf "Warning: %d truncated bytes at end of stream\n" truncated
Decode streamed documents to a typed value using a codec:
type event = { id : int; name : string }
let event_codec =
let open Simdjsont.Decode in
Obj.field (fun id name -> { id; name })
|> Obj.mem "id" int ~enc:(fun e -> e.id)
|> Obj.mem "name" string ~enc:(fun e -> e.name)
|> Obj.finish
match Simdjsont.Raw.Stream.create parser buf ~len with
| Error e -> Printf.eprintf "Failed to create stream: %s\n" e.message
| Ok stream ->
Simdjsont.Raw.Stream.to_seq stream
|> Seq.iter (function
| Ok (elt, byte_offset) -> (
match Simdjsont.Codec.decode_element event_codec elt with
| Ok event ->
Printf.printf "Event at %d: id=%d name=%s\n" byte_offset event.id
event.name
| Error msg ->
Printf.printf "Decode error at %d: %s\n" byte_offset msg)
| Error (e, byte_offset) ->
Printf.printf "Parse error at %d: %s\n" byte_offset e.message)
Lifetime note: values returned from Simdjsont.Raw.Stream reference memory owned by the parser/buffer. Keep the parser and buf alive for the whole iteration, and dont reuse the parser for other parses while you still hold elements.
let json = {|{
"users": [
{"id": 1, "name": "Alice", "active": true},
{"id": 2, "name": "Bob", "active": false}
],
"count": 2
}|}
(* Extract a string *)
let () =
match Simdjsont.Extract.string json ~pointer:"/users/0/name" with
| Ok name -> print_endline name (* prints: Alice *)
| Error e -> print_endline ("Error: " ^ e)
(* Extract an integer *)
let () =
match Simdjsont.Extract.int json ~pointer:"/count" with
| Ok n -> Printf.printf "Count: %d\n" n (* prints: Count: 2 *)
| Error e -> print_endline ("Error: " ^ e)
(* Extract from array by index *)
let () =
match Simdjsont.Extract.int json ~pointer:"/users/1/id" with
| Ok id -> Printf.printf "Second user ID: %d\n" id (* prints: Second user ID: 2 *)
| Error e -> print_endline ("Error: " ^ e)
(* Extract a boolean *)
let () =
match Simdjsont.Extract.bool json ~pointer:"/users/0/active" with
| Ok b -> Printf.printf "Active: %b\n" b (* prints: Active: true *)
| Error e -> print_endline ("Error: " ^ e)
(* Check if a value is null *)
let () =
match Simdjsont.Extract.is_null {|{"value": null}|} ~pointer:"/value" with
| Ok true -> print_endline "Value is null"
| Ok false -> print_endline "Value is not null"
| Error e -> print_endline ("Error: " ^ e)
Decoding to Types#
Define codecs to decode JSON directly to OCaml types:
(* Primitive types *)
let () =
match Simdjsont.Decode.decode_string Simdjsont.Decode.bool "true" with
| Ok b -> Printf.printf "Bool: %b\n" b
| Error e -> print_endline e
let () =
match Simdjsont.Decode.decode_string Simdjsont.Decode.int "42" with
| Ok n -> Printf.printf "Int: %d\n" n
| Error e -> print_endline e
let () =
match Simdjsont.Decode.decode_string Simdjsont.Decode.float "3.14" with
| Ok f -> Printf.printf "Float: %f\n" f
| Error e -> print_endline e
let () =
match Simdjsont.Decode.decode_string Simdjsont.Decode.string {|"hello"|} with
| Ok s -> Printf.printf "String: %s\n" s
| Error e -> print_endline e
(* Lists *)
let () =
match Simdjsont.Decode.decode_string Simdjsont.Decode.(list int) "[1, 2, 3, 4, 5]" with
| Ok nums -> List.iter (Printf.printf "%d ") nums
| Error e -> print_endline e
(* Optional values *)
let () =
match Simdjsont.Decode.decode_string Simdjsont.Decode.(optional string) "null" with
| Ok None -> print_endline "No value"
| Ok (Some s) -> print_endline s
| Error e -> print_endline e
Decoding Records#
Use the builder API to decode JSON objects to records:
type point = { x: int; y: int }
let point_codec =
let open Simdjsont.Decode in
Obj.field (fun x y -> { x; y })
|> Obj.mem "x" int ~enc:(fun p -> p.x)
|> Obj.mem "y" int ~enc:(fun p -> p.y)
|> Obj.finish
let () =
match Simdjsont.Decode.decode_string point_codec {|{"x": 10, "y": 20}|} with
| Ok p -> Printf.printf "Point: (%d, %d)\n" p.x p.y
| Error e -> print_endline e
type user = {
id: int;
name: string;
email: string option;
}
let user_codec =
let open Simdjsont.Decode in
Obj.field (fun id name email -> { id; name; email })
|> Obj.mem "id" int ~enc:(fun u -> u.id)
|> Obj.mem "name" string ~enc:(fun u -> u.name)
|> Obj.opt_mem "email" string ~enc:(fun u -> u.email)
|> Obj.finish
let () =
let json = {|{"id": 1, "name": "Alice", "email": null}|} in
match Simdjsont.Decode.decode_string user_codec json with
| Ok user ->
Printf.printf "User %d: %s\n" user.id user.name;
(match user.email with
| Some e -> Printf.printf "Email: %s\n" e
| None -> print_endline "No email")
| Error e -> print_endline e
Encoding#
Encode OCaml values to JSON strings:
(* Encode primitives *)
let () =
print_endline (Simdjsont.Encode.to_string Simdjsont.Decode.bool true); (* true *)
print_endline (Simdjsont.Encode.to_string Simdjsont.Decode.int 42); (* 42 *)
print_endline (Simdjsont.Encode.to_string Simdjsont.Decode.string "hello"); (* "hello" *)
print_endline (Simdjsont.Encode.to_string Simdjsont.Decode.(list int) [1;2;3]) (* [1,2,3] *)
(* Encode records using the same codec *)
type point = { x: int; y: int }
let point_codec =
let open Simdjsont.Decode in
Obj.field (fun x y -> { x; y })
|> Obj.mem "x" int ~enc:(fun p -> p.x)
|> Obj.mem "y" int ~enc:(fun p -> p.y)
|> Obj.finish
let () =
let p = { x = 100; y = 200 } in
print_endline (Simdjsont.Encode.to_string point_codec p)
(* {"x":100,"y":200} *)
Extracting with Codecs#
Combine extraction and decoding:
type item = { name: string; price: float }
let item_codec =
let open Simdjsont.Decode in
Obj.field (fun name price -> { name; price })
|> Obj.mem "name" string ~enc:(fun i -> i.name)
|> Obj.mem "price" float ~enc:(fun i -> i.price)
|> Obj.finish
let json = {|{
"order": {
"items": [
{"name": "Book", "price": 12.99},
{"name": "Pen", "price": 1.50}
]
}
}|}
let () =
match Simdjsont.Extract.at item_codec json ~pointer:"/order/items/0" with
| Ok item -> Printf.printf "%s: $%.2f\n" item.name item.price
| Error e -> print_endline e
(* prints: Book: $12.99 *)
Working with Dynamic JSON#
Use Simdjsont.Json.t for dynamic JSON values:
(* Build JSON programmatically *)
let json = Simdjsont.Json.(Object [
("name", String "Alice");
("age", Int 30L);
("scores", Array [Int 95L; Int 87L; Int 92L]);
("active", Bool true);
])
let () = print_endline (Simdjsont.Json.to_string json)
(* {"name":"Alice","age":30,"scores":[95,87,92],"active":true} *)
(* Decode to dynamic JSON *)
let () =
match Simdjsont.Decode.decode_string Simdjsont.Decode.value {|{"key": [1, 2, 3]}|} with
| Ok v -> print_endline (Simdjsont.Json.to_string v)
| Error e -> print_endline e
Converting to/from Yojson (example)#
simdjsont does not depend on yojson, but you can convert values when you need interoperability.
(* Requires adding `yojson` to your own project dependencies. *)
let rec simdjsont_to_yojson (v : Simdjsont.Json.t) : Yojson.Safe.t =
match v with
| Null -> `Null
| Bool b -> `Bool b
| Int i -> `Intlit (Int64.to_string i)
| Float f -> `Float f
| String s -> `String s
| Array xs -> `List (List.map simdjsont_to_yojson xs)
| Object fields ->
`Assoc
(List.map (fun (k, x) -> (k, simdjsont_to_yojson x)) fields)
let rec yojson_to_simdjsont (v : Yojson.Safe.t) : Simdjsont.Json.t =
match v with
| `Null -> Null
| `Bool b -> Bool b
| `Int n -> Int (Int64.of_int n)
| `Intlit s -> Int (Int64.of_string s)
| `Float f -> Float f
| `String s -> String s
| `List xs -> Array (List.map yojson_to_simdjsont xs)
| `Assoc fields ->
Object (List.map (fun (k, x) -> (k, yojson_to_simdjsont x)) fields)
| `Tuple xs -> Array (List.map yojson_to_simdjsont xs)
| `Variant (tag, arg_opt) ->
Object
[
( "variant",
Object
(
[ ("tag", String tag) ]
@
match arg_opt with
| None -> []
| Some a -> [ ("arg", yojson_to_simdjsont a) ] ) );
]
let () =
match Simdjsont.Decode.decode_string Simdjsont.Decode.value {|{"name": "test", "value": 123}|} with
| Ok json ->
let yojson = simdjsont_to_yojson json in
let back = yojson_to_simdjsont yojson in
assert (Simdjsont.Json.to_string back = Simdjsont.Json.to_string json)
| Error e -> failwith e
Low-Level API#
For maximum control, use the low-level bindings:
let () =
let open Simdjsont.Raw in
let parser = create_parser () in
match parse_string parser {|{"users": [{"name": "Alice"}, {"name": "Bob"}]}|} with
| Ok root ->
(* Navigate to nested value *)
(match at_pointer root "/users/1/name" with
| Ok elt -> print_endline (string_exn elt) (* prints: Bob *)
| Error e -> print_endline e.message);
(* Iterate over arrays *)
(match at_pointer root "/users" with
| Ok elt ->
let arr = array_exn elt in
array_iter (fun user ->
let obj = object_exn user in
match object_find obj "name" with
| Ok name -> print_endline (string_exn name)
| Error _ -> ()
) arr
| Error e -> print_endline e.message)
| Error e ->
print_endline ("Parse error: " ^ e.message)
CBOR#
simdjsont includes support for CBOR (RFC 8949), using the same codecs as JSON. It decodes CBOR into the same internal Simdjsont.Json.t representation, allowing you to use your existing data models and validation logic.
CBOR Quick Start#
type point = { x: int; y: int }
let point_codec =
let open Simdjsont.Decode in
Obj.field (fun x y -> { x; y })
|> Obj.mem "x" int ~enc:(fun p -> p.x)
|> Obj.mem "y" int ~enc:(fun p -> p.y)
|> Obj.finish
(* Encode to CBOR binary string *)
let cbor = Simdjsont.Cbor.encode_string point_codec { x = 10; y = 20 }
(* Decode from CBOR *)
match Simdjsont.Cbor.decode_string point_codec cbor with
| Ok p -> Printf.printf "Point from CBOR: (%d, %d)\n" p.x p.y
| Error e -> print_endline e
CBOR Streaming#
CBOR often contains multiple concatenated items. Use Simdjsont.Cbor.to_seq to parse these as a sequence:
let cbor_data =
Simdjsont.Cbor.encode_string Simdjsont.Decode.int 1 ^
Simdjsont.Cbor.encode_string Simdjsont.Decode.int 2 ^
Simdjsont.Cbor.encode_string Simdjsont.Decode.int 3
let () =
Simdjsont.Cbor.to_seq Simdjsont.Decode.int cbor_data
|> Seq.iter (function
| Ok n -> Printf.printf "Decoded: %d\n" n
| Error e -> Printf.printf "Error: %s\n" e)
CBOR Limitations#
The CBOR implementation is optimized for JSON-compatible data structures. The following CBOR features are currently NOT supported:
- Tags: While basic tags are skipped, specialized tag processing (e.g., for Date/Time) is not included.
- Bignums: Integers are limited to 64-bit signed/unsigned.
- Byte Strings: Decoded as strings; binary-only data might fail UTF-8 validation if decoded via
stringcodec. - Undefined/Simple Values: Only
true,false, andnullare supported. - Integer Map Keys: Only string keys (Major type 3) and byte string keys (Major type 2) are supported for objects.
License#
ISC