Minimal SQLite key-value store for OCaml
1(*---------------------------------------------------------------------------
2 Copyright (c) 2025 Thomas Gazagnaire. All rights reserved.
3 SPDX-License-Identifier: MIT
4 ---------------------------------------------------------------------------*)
5
6(* Pure OCaml B-tree backed key-value store with SQLite-compatible file format.
7
8 The file format is a valid SQLite database:
9 - Page 1: 100-byte database header + sqlite_master table B-tree
10 - Page 2+: user data tables using Table B-tree with Record encoding
11 - In-memory hashtable for O(1) key→rowid lookups *)
12
13let page_size = 4096
14let magic = "SQLite format 3\000"
15
16(* Re-export Btree.Record.value so users don't need to depend on btree *)
17
18type value = Btree.Record.value =
19 | Vnull
20 | Vint of int64
21 | Vfloat of float
22 | Vblob of string
23 | Vtext of string
24
25let pp_value = Btree.Record.pp_value
26
27(* Schema types *)
28
29type column = {
30 col_name : string;
31 col_affinity : string;
32 col_is_rowid_alias : bool;
33}
34
35type schema = { tbl_name : string; columns : column list; sql : string }
36
37(* Per-table state *)
38type kv_table = {
39 btree : Btree.Table.t;
40 keys : (string, int64) Hashtbl.t;
41 mutable next_rowid : int64;
42}
43
44type generic_table = { g_btree : Btree.Table.t; g_schema : schema }
45
46type t = {
47 pager : Btree.Pager.t;
48 mutable data : kv_table option;
49 mutable named_tables : (string * kv_table) list;
50 mutable all_tables : generic_table list;
51}
52
53let pp ppf t =
54 let names = List.map (fun gt -> gt.g_schema.tbl_name) t.all_tables in
55 Fmt.pf ppf "sqlite(%a)" Fmt.(list ~sep:(any ",") string) names
56
57(* CREATE TABLE parser *)
58
59let is_space = function ' ' | '\t' | '\n' | '\r' -> true | _ -> false
60
61(* Split a string by commas, respecting nested parentheses *)
62let split_respecting_parens s =
63 let len = String.length s in
64 let buf = Buffer.create 64 in
65 let parts = ref [] in
66 let depth = ref 0 in
67 for i = 0 to len - 1 do
68 match s.[i] with
69 | '(' ->
70 incr depth;
71 Buffer.add_char buf '('
72 | ')' ->
73 decr depth;
74 Buffer.add_char buf ')'
75 | ',' when !depth = 0 ->
76 parts := String.trim (Buffer.contents buf) :: !parts;
77 Buffer.clear buf
78 | c -> Buffer.add_char buf c
79 done;
80 let last = String.trim (Buffer.contents buf) in
81 if last <> "" then parts := last :: !parts;
82 List.rev !parts
83
84(* Find the position of the matching closing paren *)
85let matching_paren s start =
86 let len = String.length s in
87 let rec loop i depth =
88 if i >= len then None
89 else
90 match s.[i] with
91 | '(' -> loop (i + 1) (depth + 1)
92 | ')' -> if depth = 0 then Some i else loop (i + 1) (depth - 1)
93 | '\'' ->
94 (* Skip single-quoted string literal *)
95 let rec skip j =
96 if j >= len then loop j 0
97 else if s.[j] = '\'' then
98 if j + 1 < len && s.[j + 1] = '\'' then skip (j + 2)
99 else loop (j + 1) depth
100 else skip (j + 1)
101 in
102 skip (i + 1)
103 | '"' ->
104 let rec skip j =
105 if j >= len then loop j 0
106 else if s.[j] = '"' then loop (j + 1) depth
107 else skip (j + 1)
108 in
109 skip (i + 1)
110 | _ -> loop (i + 1) depth
111 in
112 loop (start + 1) 0
113
114(* Consume parenthesized content into buf, advancing i past the closing paren *)
115let add_paren_content buf s i len =
116 Buffer.add_char buf '(';
117 let depth = ref 1 in
118 incr i;
119 while !i < len && !depth > 0 do
120 (match s.[!i] with '(' -> incr depth | ')' -> decr depth | _ -> ());
121 Buffer.add_char buf s.[!i];
122 incr i
123 done
124
125(* Tokenize a column definition into words, handling quoted identifiers
126 and parenthesized type parameters like DECIMAL(10,2) *)
127let tokenize s =
128 let len = String.length s in
129 let buf = Buffer.create 16 in
130 let tokens = ref [] in
131 let flush () =
132 if Buffer.length buf > 0 then begin
133 tokens := Buffer.contents buf :: !tokens;
134 Buffer.clear buf
135 end
136 in
137 let i = ref 0 in
138 while !i < len do
139 match s.[!i] with
140 | c when is_space c ->
141 flush ();
142 incr i
143 | '(' ->
144 (* Include parenthesized content as part of current token *)
145 add_paren_content buf s i len
146 | '"' ->
147 (* Double-quoted identifier: strip quotes *)
148 incr i;
149 while !i < len && s.[!i] <> '"' do
150 Buffer.add_char buf s.[!i];
151 incr i
152 done;
153 if !i < len then incr i
154 | '[' ->
155 (* Bracket-quoted identifier: strip brackets *)
156 incr i;
157 while !i < len && s.[!i] <> ']' do
158 Buffer.add_char buf s.[!i];
159 incr i
160 done;
161 if !i < len then incr i
162 | '`' ->
163 (* Backtick-quoted identifier: strip backticks *)
164 incr i;
165 while !i < len && s.[!i] <> '`' do
166 Buffer.add_char buf s.[!i];
167 incr i
168 done;
169 if !i < len then incr i
170 | c ->
171 Buffer.add_char buf c;
172 incr i
173 done;
174 flush ();
175 List.rev !tokens
176
177(* Keywords that start constraint clauses in column definitions *)
178let constraint_keywords =
179 [
180 "PRIMARY";
181 "NOT";
182 "UNIQUE";
183 "DEFAULT";
184 "CHECK";
185 "REFERENCES";
186 "COLLATE";
187 "GENERATED";
188 "AUTOINCREMENT";
189 "ASC";
190 "DESC";
191 "ON";
192 "CONSTRAINT";
193 ]
194
195(* Check if a column def is a table-level constraint *)
196let is_table_constraint s =
197 let upper = String.uppercase_ascii (String.trim s) in
198 let starts_with prefix =
199 String.length upper >= String.length prefix
200 && String.sub upper 0 (String.length prefix) = prefix
201 in
202 starts_with "PRIMARY KEY(" || starts_with "PRIMARY KEY "
203 || starts_with "UNIQUE(" || starts_with "UNIQUE " || starts_with "FOREIGN KEY"
204 || starts_with "CONSTRAINT " || starts_with "CHECK(" || starts_with "CHECK "
205
206let parse_column_def s =
207 if is_table_constraint s then None
208 else
209 let tokens = tokenize s in
210 match tokens with
211 | [] -> None
212 | name :: rest ->
213 (* Collect type tokens until we hit a constraint keyword *)
214 let rec collect_type acc = function
215 | [] -> (List.rev acc, [])
216 | tok :: _ as all
217 when List.mem (String.uppercase_ascii tok) constraint_keywords ->
218 (List.rev acc, all)
219 | tok :: tl -> collect_type (tok :: acc) tl
220 in
221 let type_tokens, constraint_tokens = collect_type [] rest in
222 let affinity = String.concat " " type_tokens in
223 (* INTEGER PRIMARY KEY is a rowid alias *)
224 let is_rowid_alias =
225 String.uppercase_ascii affinity = "INTEGER"
226 &&
227 let rec has_pk = function
228 | "PRIMARY" :: "KEY" :: _ -> true
229 | _ :: tl -> has_pk tl
230 | [] -> false
231 in
232 has_pk (List.map String.uppercase_ascii constraint_tokens)
233 in
234 Some
235 {
236 col_name = name;
237 col_affinity = affinity;
238 col_is_rowid_alias = is_rowid_alias;
239 }
240
241let parse_create_table sql =
242 match String.index_opt sql '(' with
243 | None -> []
244 | Some start -> (
245 match matching_paren sql start with
246 | None -> []
247 | Some body_end ->
248 let body = String.sub sql (start + 1) (body_end - start - 1) in
249 let parts = split_respecting_parens body in
250 List.filter_map parse_column_def parts)
251
252(* Standard kv table schema *)
253let kv_columns =
254 [
255 { col_name = "key"; col_affinity = "TEXT"; col_is_rowid_alias = false };
256 { col_name = "value"; col_affinity = "BLOB"; col_is_rowid_alias = false };
257 ]
258
259let table_sql name = Fmt.str "CREATE TABLE %s (key TEXT, value BLOB)" name
260
261let kv_schema name =
262 { tbl_name = name; columns = kv_columns; sql = table_sql name }
263
264(* Decode a Record payload into (key, value) *)
265let decode_kv payload =
266 match Btree.Record.decode payload with
267 | [ Btree.Record.Vtext k; Btree.Record.Vblob v ]
268 | [ Btree.Record.Vtext k; Btree.Record.Vtext v ] ->
269 Some (k, v)
270 | _ -> None
271
272(* Scan a table B-tree to build key→rowid map *)
273let scan_table btree =
274 let keys = Hashtbl.create 64 in
275 let next_rowid = ref 1L in
276 Btree.Table.iter btree (fun rowid payload ->
277 (match decode_kv payload with
278 | Some (k, _) -> Hashtbl.replace keys k rowid
279 | None -> ());
280 if rowid >= !next_rowid then next_rowid := Int64.add rowid 1L);
281 (keys, !next_rowid)
282
283(* Write the 100-byte SQLite database header *)
284let write_db_header buf ~page_count =
285 Bytes.blit_string magic 0 buf 0 16;
286 Btree.Page.set_u16_be buf 16 page_size;
287 Bytes.set_uint8 buf 18 1;
288 Bytes.set_uint8 buf 19 1;
289 Bytes.set_uint8 buf 20 0;
290 Bytes.set_uint8 buf 21 64;
291 Bytes.set_uint8 buf 22 32;
292 Bytes.set_uint8 buf 23 32;
293 Btree.Page.set_u32_be buf 24 1;
294 Btree.Page.set_u32_be buf 28 page_count;
295 Btree.Page.set_u32_be buf 32 0;
296 Btree.Page.set_u32_be buf 36 0;
297 Btree.Page.set_u32_be buf 40 1;
298 Btree.Page.set_u32_be buf 44 4;
299 Btree.Page.set_u32_be buf 48 0;
300 Btree.Page.set_u32_be buf 52 0;
301 Btree.Page.set_u32_be buf 56 1;
302 Btree.Page.set_u32_be buf 60 0;
303 Btree.Page.set_u32_be buf 64 0;
304 Btree.Page.set_u32_be buf 68 0;
305 (* Offsets 72-91: reserved for expansion, must be zero (spec section 1.2) *)
306 Bytes.fill buf 72 20 '\x00';
307 Btree.Page.set_u32_be buf 92 1;
308 Btree.Page.set_u32_be buf 96 3046000
309
310(* Build a sqlite_master table leaf cell *)
311let master_cell ~rowid ~name ~root_page ~sql =
312 let record =
313 Btree.Record.encode
314 [
315 Btree.Record.Vtext "table";
316 Btree.Record.Vtext name;
317 Btree.Record.Vtext name;
318 Btree.Record.Vint (Int64.of_int root_page);
319 Btree.Record.Vtext sql;
320 ]
321 in
322 let payload_varint =
323 Btree.Varint.encode (Int64.of_int (String.length record))
324 in
325 let rowid_varint = Btree.Varint.encode rowid in
326 payload_varint ^ rowid_varint ^ record
327
328(* Write page 1: db header + sqlite_master leaf table *)
329let rebuild_page1 t =
330 let buf = Bytes.create page_size in
331 write_db_header buf ~page_count:(Btree.Pager.page_count t.pager);
332 (* Leaf table header at offset 100 *)
333 Bytes.set_uint8 buf 100 0x0d;
334 Btree.Page.set_u16_be buf 101 0;
335 Bytes.set_uint8 buf 107 0;
336 (* Collect all tables from all_tables *)
337 let tables =
338 List.map
339 (fun gt ->
340 (gt.g_schema.tbl_name, Btree.Table.root_page gt.g_btree, gt.g_schema.sql))
341 t.all_tables
342 in
343 let n = List.length tables in
344 Btree.Page.set_u16_be buf 103 n;
345 (* Build cells from end of page *)
346 let cell_content_start = ref page_size in
347 let cell_ptrs = Array.make n 0 in
348 List.iteri
349 (fun i (name, root_page, sql) ->
350 let cell =
351 master_cell ~rowid:(Int64.of_int (i + 1)) ~name ~root_page ~sql
352 in
353 let cell_len = String.length cell in
354 cell_content_start := !cell_content_start - cell_len;
355 Bytes.blit_string cell 0 buf !cell_content_start cell_len;
356 cell_ptrs.(i) <- !cell_content_start)
357 tables;
358 Btree.Page.set_u16_be buf 105 !cell_content_start;
359 (* Cell pointer array at offset 108 (100 + 8 byte leaf header) *)
360 Array.iteri
361 (fun i ptr -> Btree.Page.set_u16_be buf (108 + (i * 2)) ptr)
362 cell_ptrs;
363 Btree.Pager.write t.pager 1 (Bytes.unsafe_to_string buf)
364
365(* Initialize a new kv_table on a fresh page *)
366let new_kv_table pager =
367 let btree = Btree.Table.v pager in
368 { btree; keys = Hashtbl.create 64; next_rowid = 1L }
369
370let mkdirs_for path =
371 match Eio.Path.split path with
372 | None -> ()
373 | Some (fs, p) -> (
374 let dir = Filename.dirname p in
375 if dir <> "." && dir <> "/" then
376 try Eio.Path.mkdirs ~exists_ok:true ~perm:0o755 Eio.Path.(fs / dir)
377 with Eio.Io _ -> ())
378
379let v ~sw path =
380 mkdirs_for path;
381 let file =
382 Eio.Path.open_out ~sw ~create:(`Or_truncate 0o644) path |> fun f ->
383 (f :> Eio.File.rw_ty Eio.Resource.t)
384 in
385 let pager = Btree.Pager.v ~page_size file in
386 (* Allocate page 1 for db header + sqlite_master *)
387 let _page1 = Btree.Pager.allocate pager in
388 (* Create kv data table on page 2 *)
389 let kv = new_kv_table pager in
390 let gt = { g_btree = kv.btree; g_schema = kv_schema "kv" } in
391 let t = { pager; data = Some kv; named_tables = []; all_tables = [ gt ] } in
392 rebuild_page1 t;
393 t
394
395(* Extract named kv tables (non-kv tables with kv schema) from all_tables *)
396let extract_named_kv_tables all_tables =
397 List.filter_map
398 (fun gt ->
399 let name = gt.g_schema.tbl_name in
400 if name = "kv" then None
401 else
402 match gt.g_schema.columns with
403 | [
404 { col_name = "key"; col_affinity = "TEXT"; _ };
405 { col_name = "value"; col_affinity = "BLOB"; _ };
406 ] ->
407 let keys, next_rowid = scan_table gt.g_btree in
408 Some (name, { btree = gt.g_btree; keys; next_rowid })
409 | _ -> None)
410 all_tables
411
412let in_memory () =
413 let pager = Btree.Pager.mem ~page_size () in
414 let _page1 = Btree.Pager.allocate pager in
415 let kv = new_kv_table pager in
416 let gt = { g_btree = kv.btree; g_schema = kv_schema "kv" } in
417 let t = { pager; data = Some kv; named_tables = []; all_tables = [ gt ] } in
418 rebuild_page1 t;
419 t
420
421let open_ ~sw path =
422 let file =
423 Eio.Path.open_out ~sw ~create:`Never path |> fun f ->
424 (f :> Eio.File.rw_ty Eio.Resource.t)
425 in
426 let pager = Btree.Pager.v ~page_size file in
427 if Btree.Pager.page_count pager = 0 then failwith "Database file is empty";
428 (* Read page 1 and validate *)
429 let page1 = Btree.Pager.read pager 1 in
430 if String.sub page1 0 16 <> magic then failwith "Not a SQLite database";
431 let ps = Btree.Page.u16_be page1 16 in
432 if ps <> page_size then Fmt.failwith "Unsupported page size: %d" ps;
433 (* Parse sqlite_master at offset 100 *)
434 let header = Btree.Page.parse_header page1 100 in
435 let ptrs = Btree.Page.cell_pointers page1 100 header in
436 let raw_tables = ref [] in
437 for i = 0 to header.Btree.Page.cell_count - 1 do
438 let cell, _ =
439 Btree.Cell.parse_table_leaf page1 ptrs.(i) ~usable_size:page_size
440 in
441 match Btree.Record.decode cell.Btree.Cell.payload with
442 | [
443 Btree.Record.Vtext "table";
444 Btree.Record.Vtext name;
445 _;
446 Btree.Record.Vint root;
447 Btree.Record.Vtext sql;
448 ] ->
449 raw_tables := (name, Int64.to_int root, sql) :: !raw_tables
450 | _ -> ()
451 done;
452 let raw_tables = List.rev !raw_tables in
453 (* Build generic_table for every table *)
454 let all_tables =
455 List.map
456 (fun (name, root, sql) ->
457 let btree = Btree.Table.open_ pager ~root_page:root in
458 let columns = parse_create_table sql in
459 let schema = { tbl_name = name; columns; sql } in
460 { g_btree = btree; g_schema = schema })
461 raw_tables
462 in
463 (* Try to find "kv" table for backward compat *)
464 let data =
465 match List.find_opt (fun gt -> gt.g_schema.tbl_name = "kv") all_tables with
466 | None -> None
467 | Some gt ->
468 let keys, next_rowid = scan_table gt.g_btree in
469 Some { btree = gt.g_btree; keys; next_rowid }
470 in
471 let named = extract_named_kv_tables all_tables in
472 { pager; data; named_tables = named; all_tables }
473
474(* Get the kv_table, raising if no kv table exists *)
475let kv t =
476 match t.data with
477 | Some d -> d
478 | None -> failwith "No 'kv' table in this database"
479
480(* KV operations *)
481
482let find t key =
483 let d = kv t in
484 match Hashtbl.find_opt d.keys key with
485 | None -> None
486 | Some rowid -> (
487 match Btree.Table.find d.btree rowid with
488 | None -> None
489 | Some payload -> (
490 match decode_kv payload with Some (_, v) -> Some v | None -> None))
491
492let put t key value =
493 let kv = kv t in
494 let record =
495 Btree.Record.encode [ Btree.Record.Vtext key; Btree.Record.Vblob value ]
496 in
497 (match Hashtbl.find_opt kv.keys key with
498 | Some old_rowid -> Btree.Table.delete kv.btree old_rowid
499 | None -> ());
500 let rowid = kv.next_rowid in
501 kv.next_rowid <- Int64.add kv.next_rowid 1L;
502 Btree.Table.insert kv.btree ~rowid record;
503 Hashtbl.replace kv.keys key rowid
504
505let delete t key =
506 let d = kv t in
507 match Hashtbl.find_opt d.keys key with
508 | None -> ()
509 | Some rowid ->
510 Btree.Table.delete d.btree rowid;
511 Hashtbl.remove d.keys key
512
513let mem t key =
514 let d = kv t in
515 Hashtbl.mem d.keys key
516
517let iter t ~f =
518 let d = kv t in
519 Btree.Table.iter d.btree (fun _rowid payload ->
520 match decode_kv payload with Some (k, v) -> f k v | None -> ())
521
522let fold t ~init ~f =
523 let acc = ref init in
524 iter t ~f:(fun k v -> acc := f k v !acc);
525 !acc
526
527let sync t =
528 rebuild_page1 t;
529 Btree.Pager.sync t.pager
530
531let close t = sync t
532
533(* Generic read API *)
534
535let tables t = List.map (fun gt -> gt.g_schema) t.all_tables
536
537let table t name =
538 match List.find_opt (fun gt -> gt.g_schema.tbl_name = name) t.all_tables with
539 | Some gt -> gt
540 | None -> Fmt.failwith "No table %S found in database" name
541
542(* Find the index of the rowid alias column, if any *)
543let rowid_alias_index columns =
544 let rec find i = function
545 | [] -> None
546 | c :: _ when c.col_is_rowid_alias -> Some i
547 | _ :: rest -> find (i + 1) rest
548 in
549 find 0 columns
550
551(* Apply rowid substitution and trailing Vnull padding *)
552let fixup_values ~schema ~rowid values =
553 let n_cols = List.length schema.columns in
554 let len = List.length values in
555 let values =
556 if len < n_cols then
557 values @ List.init (n_cols - len) (fun _ -> Btree.Record.Vnull)
558 else values
559 in
560 match rowid_alias_index schema.columns with
561 | None -> values
562 | Some idx ->
563 List.mapi
564 (fun i v ->
565 if i = idx then
566 match v with
567 | Btree.Record.Vnull -> Btree.Record.Vint rowid
568 | v -> v
569 else v)
570 values
571
572let iter_table t name ~f =
573 let gt = table t name in
574 let schema = gt.g_schema in
575 Btree.Table.iter gt.g_btree (fun rowid payload ->
576 let values = Btree.Record.decode payload in
577 let values = fixup_values ~schema ~rowid values in
578 f rowid values)
579
580let fold_table t name ~init ~f =
581 let acc = ref init in
582 iter_table t name ~f:(fun rowid values -> acc := f rowid values !acc);
583 !acc
584
585let read_table t name =
586 fold_table t name ~init:[] ~f:(fun rowid values acc -> (rowid, values) :: acc)
587 |> List.rev
588
589(* Namespaced Tables *)
590
591module Table = struct
592 type db = t
593 type t = { parent : db; name : string; kv : kv_table }
594
595 let valid_name name =
596 String.length name > 0
597 && (let first = name.[0] in
598 (first >= 'a' && first <= 'z')
599 || (first >= 'A' && first <= 'Z')
600 || first = '_')
601 && String.for_all
602 (fun c ->
603 (c >= 'a' && c <= 'z')
604 || (c >= 'A' && c <= 'Z')
605 || (c >= '0' && c <= '9')
606 || c = '_')
607 name
608
609 let create parent ~name =
610 if not (valid_name name) then Fmt.invalid_arg "Invalid table name: %S" name;
611 match List.assoc_opt name parent.named_tables with
612 | Some kv -> { parent; name; kv }
613 | None ->
614 let kv = new_kv_table parent.pager in
615 let gt = { g_btree = kv.btree; g_schema = kv_schema name } in
616 parent.named_tables <- (name, kv) :: parent.named_tables;
617 parent.all_tables <- parent.all_tables @ [ gt ];
618 { parent; name; kv }
619
620 let find t key =
621 match Hashtbl.find_opt t.kv.keys key with
622 | None -> None
623 | Some rowid -> (
624 match Btree.Table.find t.kv.btree rowid with
625 | None -> None
626 | Some payload -> (
627 match decode_kv payload with Some (_, v) -> Some v | None -> None))
628
629 let put t key value =
630 let kv = t.kv in
631 let record =
632 Btree.Record.encode [ Btree.Record.Vtext key; Btree.Record.Vblob value ]
633 in
634 (match Hashtbl.find_opt kv.keys key with
635 | Some old_rowid -> Btree.Table.delete kv.btree old_rowid
636 | None -> ());
637 let rowid = kv.next_rowid in
638 kv.next_rowid <- Int64.add kv.next_rowid 1L;
639 Btree.Table.insert kv.btree ~rowid record;
640 Hashtbl.replace kv.keys key rowid
641
642 let delete t key =
643 match Hashtbl.find_opt t.kv.keys key with
644 | None -> ()
645 | Some rowid ->
646 Btree.Table.delete t.kv.btree rowid;
647 Hashtbl.remove t.kv.keys key
648
649 let mem t key = Hashtbl.mem t.kv.keys key
650
651 let iter t ~f =
652 Btree.Table.iter t.kv.btree (fun _rowid payload ->
653 match decode_kv payload with Some (k, v) -> f k v | None -> ())
654end