···11+MIT License
22+33+Copyright (c) 2025 Thomas Gazagnaire
44+55+Permission is hereby granted, free of charge, to any person obtaining a copy
66+of this software and associated documentation files (the "Software"), to deal
77+in the Software without restriction, including without limitation the rights
88+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
99+copies of the Software, and to permit persons to whom the Software is
1010+furnished to do so, subject to the following conditions:
1111+1212+The above copyright notice and this permission notice shall be included in all
1313+copies or substantial portions of the Software.
1414+1515+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
1616+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
1717+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
1818+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
1919+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
2020+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
2121+SOFTWARE.
+105
README.md
···11+# btree
22+33+Pure OCaml B-tree implementation for persistent storage.
44+55+## Overview
66+77+A B-tree implementation supporting SQLite-compatible page-based storage:
88+99+- **Table B-trees**: 64-bit integer keys with data in leaves
1010+- **Index B-trees**: Arbitrary keys, no data (for secondary indexes)
1111+- **Page-based**: Interior and leaf pages with configurable size
1212+- **Overflow support**: Large records span multiple pages
1313+1414+## Installation
1515+1616+```
1717+opam install btree
1818+```
1919+2020+## Usage
2121+2222+```ocaml
2323+(* Create a pager backed by a file *)
2424+let pager = Btree.Pager.create ~page_size:4096 file in
2525+2626+(* Create a table B-tree *)
2727+let tree = Btree.Table.create pager in
2828+2929+(* Insert records *)
3030+Btree.Table.insert tree ~rowid:1L "Hello";
3131+Btree.Table.insert tree ~rowid:2L "World";
3232+3333+(* Lookup *)
3434+let data = Btree.Table.find tree 1L in (* Some "Hello" *)
3535+3636+(* Iterate *)
3737+Btree.Table.iter tree (fun rowid data ->
3838+ Printf.printf "%Ld: %s\n" rowid data)
3939+```
4040+4141+## API
4242+4343+### Pager
4444+4545+The pager manages page I/O and caching:
4646+4747+- `Pager.create ~page_size file` - Create pager with given page size
4848+- `Pager.read pager page_num` - Read a page
4949+- `Pager.write pager page_num data` - Write a page
5050+- `Pager.allocate pager` - Allocate a new page
5151+- `Pager.free pager page_num` - Free a page
5252+- `Pager.sync pager` - Sync to disk
5353+5454+### Table B-tree
5555+5656+For rowid-keyed tables (like SQLite tables):
5757+5858+- `Table.create pager` - Create a new table B-tree
5959+- `Table.open_ pager root_page` - Open existing table
6060+- `Table.insert tree ~rowid data` - Insert a record
6161+- `Table.find tree rowid` - Find by rowid
6262+- `Table.delete tree rowid` - Delete by rowid
6363+- `Table.iter tree f` - Iterate all records
6464+6565+### Index B-tree
6666+6767+For arbitrary keys (like SQLite indexes):
6868+6969+- `Index.create pager` - Create a new index B-tree
7070+- `Index.insert tree key` - Insert a key
7171+- `Index.mem tree key` - Check if key exists
7272+- `Index.delete tree key` - Delete a key
7373+- `Index.iter tree f` - Iterate all keys
7474+7575+## Page Format
7676+7777+Following SQLite's B-tree page format:
7878+7979+### Page Header
8080+8181+| Offset | Size | Description |
8282+|--------|------|-------------|
8383+| 0 | 1 | Page type (0x02, 0x05, 0x0a, 0x0d) |
8484+| 1 | 2 | First freeblock offset |
8585+| 3 | 2 | Number of cells |
8686+| 5 | 2 | Cell content area start |
8787+| 7 | 1 | Fragmented free bytes |
8888+| 8 | 4 | Right-most child (interior only) |
8989+9090+### Page Types
9191+9292+- `0x02` - Interior index page
9393+- `0x05` - Interior table page
9494+- `0x0a` - Leaf index page
9595+- `0x0d` - Leaf table page
9696+9797+## Related Work
9898+9999+- [SQLite](https://www.sqlite.org/fileformat.html) - Reference B-tree implementation
100100+- [Limbo](https://github.com/tursodatabase/limbo) - Rust SQLite implementation
101101+- [ocaml-btree (ctk21)](https://github.com/ctk21/ocaml-btree) - In-memory B-tree
102102+103103+## License
104104+105105+MIT License. See [LICENSE.md](LICENSE.md) for details.
+22
dune-project
···11+(lang dune 3.0)
22+33+(name btree)
44+55+(generate_opam_files true)
66+77+(license MIT)
88+(authors "Thomas Gazagnaire")
99+(maintainers "Thomas Gazagnaire")
1010+(source (uri https://tangled.org/gazagnaire.org/ocaml-btree))
1111+1212+(package
1313+ (name btree)
1414+ (synopsis "Pure OCaml B-tree implementation for persistent storage")
1515+ (description
1616+ "A B-tree implementation supporting both table B-trees (integer keys with data) and index B-trees (arbitrary keys). Designed for SQLite-compatible persistent storage with page-based I/O.")
1717+ (depends
1818+ (ocaml (>= 5.1))
1919+ (eio (>= 1.0))
2020+ cstruct
2121+ (alcotest :with-test)
2222+ (eio_main :with-test)))
+682
lib/btree.ml
···11+(*---------------------------------------------------------------------------
22+ Copyright (c) 2025 Thomas Gazagnaire. All rights reserved.
33+ SPDX-License-Identifier: MIT
44+ ---------------------------------------------------------------------------*)
55+66+(* Varint encoding - SQLite style *)
77+module Varint = struct
88+ let decode buf off =
99+ let rec loop acc shift i =
1010+ if i >= String.length buf then (acc, i - off)
1111+ else
1212+ let byte = Char.code buf.[i] in
1313+ let value = Int64.of_int (byte land 0x7f) in
1414+ let acc = Int64.logor acc (Int64.shift_left value shift) in
1515+ if byte land 0x80 = 0 then (acc, i - off + 1)
1616+ else if shift >= 56 then
1717+ (* 9th byte - use all 8 bits *)
1818+ let byte9 = Char.code buf.[i + 1] in
1919+ let acc = Int64.logor acc (Int64.shift_left (Int64.of_int byte9) 56) in
2020+ (acc, i - off + 2)
2121+ else loop acc (shift + 7) (i + 1)
2222+ in
2323+ loop 0L 0 off
2424+2525+ let size n =
2626+ if n < 0L then 9
2727+ else if n < 128L then 1
2828+ else if n < 16384L then 2
2929+ else if n < 2097152L then 3
3030+ else if n < 268435456L then 4
3131+ else if n < 34359738368L then 5
3232+ else if n < 4398046511104L then 6
3333+ else if n < 562949953421312L then 7
3434+ else if n < 72057594037927936L then 8
3535+ else 9
3636+3737+ let encode n =
3838+ let sz = size n in
3939+ let buf = Bytes.create sz in
4040+ let rec loop n i =
4141+ if i = sz - 1 then Bytes.set_uint8 buf i (Int64.to_int n land 0x7f)
4242+ else begin
4343+ Bytes.set_uint8 buf i (Int64.to_int n land 0x7f lor 0x80);
4444+ loop (Int64.shift_right_logical n 7) (i + 1)
4545+ end
4646+ in
4747+ loop n 0;
4848+ Bytes.unsafe_to_string buf
4949+end
5050+5151+(* Page types *)
5252+type page_type =
5353+ | Interior_index
5454+ | Interior_table
5555+ | Leaf_index
5656+ | Leaf_table
5757+5858+let pp_page_type ppf = function
5959+ | Interior_index -> Format.pp_print_string ppf "interior_index"
6060+ | Interior_table -> Format.pp_print_string ppf "interior_table"
6161+ | Leaf_index -> Format.pp_print_string ppf "leaf_index"
6262+ | Leaf_table -> Format.pp_print_string ppf "leaf_table"
6363+6464+let page_type_of_byte = function
6565+ | 0x02 -> Interior_index
6666+ | 0x05 -> Interior_table
6767+ | 0x0a -> Leaf_index
6868+ | 0x0d -> Leaf_table
6969+ | b -> failwith (Printf.sprintf "Invalid page type: 0x%02x" b)
7070+7171+let byte_of_page_type = function
7272+ | Interior_index -> 0x02
7373+ | Interior_table -> 0x05
7474+ | Leaf_index -> 0x0a
7575+ | Leaf_table -> 0x0d
7676+7777+let page_header_size = function
7878+ | Interior_index | Interior_table -> 12
7979+ | Leaf_index | Leaf_table -> 8
8080+8181+let is_interior = function
8282+ | Interior_index | Interior_table -> true
8383+ | Leaf_index | Leaf_table -> false
8484+8585+(* Page header *)
8686+type page_header = {
8787+ page_type : page_type;
8888+ first_freeblock : int;
8989+ cell_count : int;
9090+ cell_content_start : int;
9191+ fragmented_bytes : int;
9292+ right_child : int option;
9393+}
9494+9595+let get_u16_be buf off =
9696+ (Char.code buf.[off] lsl 8) lor Char.code buf.[off + 1]
9797+9898+let get_u32_be buf off =
9999+ (Char.code buf.[off] lsl 24)
100100+ lor (Char.code buf.[off + 1] lsl 16)
101101+ lor (Char.code buf.[off + 2] lsl 8)
102102+ lor Char.code buf.[off + 3]
103103+104104+let set_u16_be buf off v =
105105+ Bytes.set_uint8 buf off (v lsr 8);
106106+ Bytes.set_uint8 buf (off + 1) (v land 0xff)
107107+108108+let set_u32_be buf off v =
109109+ Bytes.set_uint8 buf off (v lsr 24);
110110+ Bytes.set_uint8 buf (off + 1) ((v lsr 16) land 0xff);
111111+ Bytes.set_uint8 buf (off + 2) ((v lsr 8) land 0xff);
112112+ Bytes.set_uint8 buf (off + 3) (v land 0xff)
113113+114114+let parse_page_header buf off =
115115+ let page_type = page_type_of_byte (Char.code buf.[off]) in
116116+ let first_freeblock = get_u16_be buf (off + 1) in
117117+ let cell_count = get_u16_be buf (off + 3) in
118118+ let cell_content_start =
119119+ let v = get_u16_be buf (off + 5) in
120120+ if v = 0 then 65536 else v
121121+ in
122122+ let fragmented_bytes = Char.code buf.[off + 7] in
123123+ let right_child =
124124+ if is_interior page_type then Some (get_u32_be buf (off + 8)) else None
125125+ in
126126+ {
127127+ page_type;
128128+ first_freeblock;
129129+ cell_count;
130130+ cell_content_start;
131131+ fragmented_bytes;
132132+ right_child;
133133+ }
134134+135135+(* Cells *)
136136+module Cell = struct
137137+ type table_leaf = {
138138+ rowid : int64;
139139+ payload : string;
140140+ overflow_page : int option;
141141+ }
142142+143143+ type table_interior = { left_child : int; rowid : int64 }
144144+145145+ type index_leaf = { payload : string; overflow_page : int option }
146146+147147+ type index_interior = {
148148+ left_child : int;
149149+ payload : string;
150150+ overflow_page : int option;
151151+ }
152152+153153+ (* Calculate max payload on page - simplified *)
154154+ let max_local ~usable_size ~is_table =
155155+ if is_table then usable_size - 35 else ((usable_size - 12) * 64 / 255) - 23
156156+157157+ let min_local ~usable_size =
158158+ ((usable_size - 12) * 32 / 255) - 23
159159+160160+ let parse_table_leaf buf off ~usable_size =
161161+ let payload_size, consumed1 = Varint.decode buf off in
162162+ let rowid, consumed2 = Varint.decode buf (off + consumed1) in
163163+ let header_len = consumed1 + consumed2 in
164164+ let payload_size = Int64.to_int payload_size in
165165+ let max_local = max_local ~usable_size ~is_table:true in
166166+ let min_local = min_local ~usable_size in
167167+ let local_size, overflow_page =
168168+ if payload_size <= max_local then (payload_size, None)
169169+ else
170170+ let k = min_local + ((payload_size - min_local) mod (usable_size - 4)) in
171171+ let local = if k <= max_local then k else min_local in
172172+ let overflow = get_u32_be buf (off + header_len + local) in
173173+ (local, Some overflow)
174174+ in
175175+ let payload = String.sub buf (off + header_len) local_size in
176176+ let total_consumed =
177177+ header_len + local_size + (if overflow_page = None then 0 else 4)
178178+ in
179179+ ({ rowid; payload; overflow_page }, total_consumed)
180180+181181+ let parse_table_interior buf off =
182182+ let left_child = get_u32_be buf off in
183183+ let rowid, consumed = Varint.decode buf (off + 4) in
184184+ ({ left_child; rowid }, 4 + consumed)
185185+186186+ let parse_index_leaf buf off ~usable_size =
187187+ let payload_size, consumed = Varint.decode buf off in
188188+ let payload_size = Int64.to_int payload_size in
189189+ let max_local = max_local ~usable_size ~is_table:false in
190190+ let min_local = min_local ~usable_size in
191191+ let local_size, overflow_page =
192192+ if payload_size <= max_local then (payload_size, None)
193193+ else
194194+ let k = min_local + ((payload_size - min_local) mod (usable_size - 4)) in
195195+ let local = if k <= max_local then k else min_local in
196196+ let overflow = get_u32_be buf (off + consumed + local) in
197197+ (local, Some overflow)
198198+ in
199199+ let payload = String.sub buf (off + consumed) local_size in
200200+ let total = consumed + local_size + (if overflow_page = None then 0 else 4) in
201201+ ({ payload; overflow_page }, total)
202202+203203+ let parse_index_interior buf off ~usable_size =
204204+ let left_child = get_u32_be buf off in
205205+ let payload_size, consumed = Varint.decode buf (off + 4) in
206206+ let payload_size = Int64.to_int payload_size in
207207+ let max_local = max_local ~usable_size ~is_table:false in
208208+ let min_local = min_local ~usable_size in
209209+ let local_size, overflow_page =
210210+ if payload_size <= max_local then (payload_size, None)
211211+ else
212212+ let k = min_local + ((payload_size - min_local) mod (usable_size - 4)) in
213213+ let local = if k <= max_local then k else min_local in
214214+ let overflow = get_u32_be buf (off + 4 + consumed + local) in
215215+ (local, Some overflow)
216216+ in
217217+ let payload = String.sub buf (off + 4 + consumed) local_size in
218218+ let total =
219219+ 4 + consumed + local_size + (if overflow_page = None then 0 else 4)
220220+ in
221221+ ({ left_child; payload; overflow_page }, total)
222222+end
223223+224224+(* Record format *)
225225+module Record = struct
226226+ type serial_type =
227227+ | Null
228228+ | Int8
229229+ | Int16
230230+ | Int24
231231+ | Int32
232232+ | Int48
233233+ | Int64
234234+ | Float64
235235+ | Zero
236236+ | One
237237+ | Blob of int
238238+ | Text of int
239239+240240+ type value = Vnull | Vint of int64 | Vfloat of float | Vblob of string | Vtext of string
241241+242242+ let serial_type_of_int = function
243243+ | 0 -> Null
244244+ | 1 -> Int8
245245+ | 2 -> Int16
246246+ | 3 -> Int24
247247+ | 4 -> Int32
248248+ | 5 -> Int48
249249+ | 6 -> Int64
250250+ | 7 -> Float64
251251+ | 8 -> Zero
252252+ | 9 -> One
253253+ | n when n >= 12 && n mod 2 = 0 -> Blob ((n - 12) / 2)
254254+ | n when n >= 13 -> Text ((n - 13) / 2)
255255+ | n -> failwith (Printf.sprintf "Invalid serial type: %d" n)
256256+257257+ let serial_type_size = function
258258+ | Null | Zero | One -> 0
259259+ | Int8 -> 1
260260+ | Int16 -> 2
261261+ | Int24 -> 3
262262+ | Int32 -> 4
263263+ | Int48 -> 6
264264+ | Int64 | Float64 -> 8
265265+ | Blob n | Text n -> n
266266+267267+ let decode_int buf off len =
268268+ let rec loop acc i =
269269+ if i >= len then acc
270270+ else
271271+ let b = Char.code buf.[off + i] in
272272+ let acc = Int64.logor (Int64.shift_left acc 8) (Int64.of_int b) in
273273+ loop acc (i + 1)
274274+ in
275275+ (* Sign extend for negative values *)
276276+ let v = loop 0L 0 in
277277+ if len > 0 && Char.code buf.[off] land 0x80 <> 0 then
278278+ let mask = Int64.shift_left (-1L) (len * 8) in
279279+ Int64.logor v mask
280280+ else v
281281+282282+ let decode payload =
283283+ let header_size, consumed = Varint.decode payload 0 in
284284+ let header_size = Int64.to_int header_size in
285285+ (* Parse serial types *)
286286+ let rec parse_types off acc =
287287+ if off >= header_size then List.rev acc
288288+ else
289289+ let st, consumed = Varint.decode payload off in
290290+ let st = serial_type_of_int (Int64.to_int st) in
291291+ parse_types (off + consumed) (st :: acc)
292292+ in
293293+ let types = parse_types consumed [] in
294294+ (* Parse values *)
295295+ let rec parse_values types off acc =
296296+ match types with
297297+ | [] -> List.rev acc
298298+ | st :: rest ->
299299+ let value, sz =
300300+ match st with
301301+ | Null -> (Vnull, 0)
302302+ | Zero -> (Vint 0L, 0)
303303+ | One -> (Vint 1L, 0)
304304+ | Int8 -> (Vint (decode_int payload off 1), 1)
305305+ | Int16 -> (Vint (decode_int payload off 2), 2)
306306+ | Int24 -> (Vint (decode_int payload off 3), 3)
307307+ | Int32 -> (Vint (decode_int payload off 4), 4)
308308+ | Int48 -> (Vint (decode_int payload off 6), 6)
309309+ | Int64 -> (Vint (decode_int payload off 8), 8)
310310+ | Float64 ->
311311+ let bits = decode_int payload off 8 in
312312+ (Vfloat (Int64.float_of_bits bits), 8)
313313+ | Blob n -> (Vblob (String.sub payload off n), n)
314314+ | Text n -> (Vtext (String.sub payload off n), n)
315315+ in
316316+ parse_values rest (off + sz) (value :: acc)
317317+ in
318318+ parse_values types header_size []
319319+320320+ let serial_type_of_value = function
321321+ | Vnull -> (0, 0)
322322+ | Vint 0L -> (8, 0)
323323+ | Vint 1L -> (9, 0)
324324+ | Vint n ->
325325+ if n >= -128L && n <= 127L then (1, 1)
326326+ else if n >= -32768L && n <= 32767L then (2, 2)
327327+ else if n >= -8388608L && n <= 8388607L then (3, 3)
328328+ else if n >= -2147483648L && n <= 2147483647L then (4, 4)
329329+ else if n >= -140737488355328L && n <= 140737488355327L then (5, 6)
330330+ else (6, 8)
331331+ | Vfloat _ -> (7, 8)
332332+ | Vblob s -> (12 + String.length s * 2, String.length s)
333333+ | Vtext s -> (13 + String.length s * 2, String.length s)
334334+335335+ let encode_int buf off n len =
336336+ for i = 0 to len - 1 do
337337+ let shift = (len - 1 - i) * 8 in
338338+ Bytes.set_uint8 buf (off + i)
339339+ (Int64.to_int (Int64.shift_right n shift) land 0xff)
340340+ done
341341+342342+ let encode values =
343343+ (* Calculate header *)
344344+ let types_and_sizes = List.map serial_type_of_value values in
345345+ let header_types =
346346+ List.map (fun (st, _) -> Varint.encode (Int64.of_int st)) types_and_sizes
347347+ in
348348+ let header_body = String.concat "" header_types in
349349+ let header_size = 1 + String.length header_body in
350350+ (* header size varint + types *)
351351+ let body_size = List.fold_left (fun acc (_, sz) -> acc + sz) 0 types_and_sizes in
352352+ let total = header_size + body_size in
353353+ let buf = Bytes.create total in
354354+ (* Write header size *)
355355+ Bytes.set_uint8 buf 0 header_size;
356356+ (* Write serial types *)
357357+ let _ =
358358+ List.fold_left
359359+ (fun off s ->
360360+ Bytes.blit_string s 0 buf off (String.length s);
361361+ off + String.length s)
362362+ 1 header_types
363363+ in
364364+ (* Write values *)
365365+ let _ =
366366+ List.fold_left2
367367+ (fun off value (_, sz) ->
368368+ (match value with
369369+ | Vnull | Vint 0L | Vint 1L -> ()
370370+ | Vint n -> encode_int buf off n sz
371371+ | Vfloat f -> encode_int buf off (Int64.bits_of_float f) 8
372372+ | Vblob s | Vtext s -> Bytes.blit_string s 0 buf off sz);
373373+ off + sz)
374374+ header_size values types_and_sizes
375375+ in
376376+ Bytes.unsafe_to_string buf
377377+378378+ let pp_value ppf = function
379379+ | Vnull -> Format.pp_print_string ppf "NULL"
380380+ | Vint n -> Format.fprintf ppf "%Ld" n
381381+ | Vfloat f -> Format.fprintf ppf "%f" f
382382+ | Vblob s -> Format.fprintf ppf "BLOB(%d)" (String.length s)
383383+ | Vtext s -> Format.fprintf ppf "%S" s
384384+end
385385+386386+(* Pager *)
387387+module Pager = struct
388388+ type t = {
389389+ file : Eio.File.rw_ty Eio.Resource.t;
390390+ page_size : int;
391391+ mutable page_count : int;
392392+ cache : (int, string) Hashtbl.t;
393393+ dirty : (int, string) Hashtbl.t;
394394+ }
395395+396396+ let create ~page_size file =
397397+ let stat = Eio.File.stat file in
398398+ let file_size = Optint.Int63.to_int stat.size in
399399+ let page_count = if file_size = 0 then 0 else file_size / page_size in
400400+ {
401401+ file;
402402+ page_size;
403403+ page_count;
404404+ cache = Hashtbl.create 64;
405405+ dirty = Hashtbl.create 16;
406406+ }
407407+408408+ let page_size t = t.page_size
409409+ let page_count t = t.page_count
410410+411411+ let read t page_num =
412412+ if page_num < 1 || page_num > t.page_count then
413413+ failwith (Printf.sprintf "Invalid page number: %d" page_num);
414414+ match Hashtbl.find_opt t.dirty page_num with
415415+ | Some data -> data
416416+ | None -> (
417417+ match Hashtbl.find_opt t.cache page_num with
418418+ | Some data -> data
419419+ | None ->
420420+ let offset = Optint.Int63.of_int ((page_num - 1) * t.page_size) in
421421+ let buf = Cstruct.create t.page_size in
422422+ Eio.File.pread_exact t.file ~file_offset:offset [ buf ];
423423+ let data = Cstruct.to_string buf in
424424+ Hashtbl.replace t.cache page_num data;
425425+ data)
426426+427427+ let write t page_num data =
428428+ if String.length data <> t.page_size then
429429+ failwith "Invalid page size";
430430+ Hashtbl.replace t.dirty page_num data;
431431+ Hashtbl.replace t.cache page_num data
432432+433433+ let allocate t =
434434+ t.page_count <- t.page_count + 1;
435435+ let data = String.make t.page_size '\x00' in
436436+ Hashtbl.replace t.dirty t.page_count data;
437437+ Hashtbl.replace t.cache t.page_count data;
438438+ t.page_count
439439+440440+ let sync t =
441441+ Hashtbl.iter
442442+ (fun page_num data ->
443443+ let offset = Optint.Int63.of_int ((page_num - 1) * t.page_size) in
444444+ let buf = Cstruct.of_string data in
445445+ Eio.File.pwrite_all t.file ~file_offset:offset [ buf ])
446446+ t.dirty;
447447+ Hashtbl.clear t.dirty
448448+end
449449+450450+(* Table B-tree *)
451451+module Table = struct
452452+ type t = { pager : Pager.t; mutable root_page : int }
453453+454454+ let create pager =
455455+ let root = Pager.allocate pager in
456456+ (* Initialize as empty leaf page *)
457457+ let page_size = Pager.page_size pager in
458458+ let buf = Bytes.create page_size in
459459+ Bytes.set_uint8 buf 0 (byte_of_page_type Leaf_table);
460460+ set_u16_be buf 1 0;
461461+ (* first freeblock *)
462462+ set_u16_be buf 3 0;
463463+ (* cell count *)
464464+ set_u16_be buf 5 page_size;
465465+ (* cell content start *)
466466+ Bytes.set_uint8 buf 7 0;
467467+ (* fragmented bytes *)
468468+ Pager.write pager root (Bytes.unsafe_to_string buf);
469469+ { pager; root_page = root }
470470+471471+ let open_ pager ~root_page = { pager; root_page }
472472+ let root_page t = t.root_page
473473+474474+ let usable_size t = Pager.page_size t.pager
475475+476476+ (* Find cell pointers in a page *)
477477+ let cell_pointers page header_offset header =
478478+ let ptrs = Array.make header.cell_count 0 in
479479+ let ptr_start = header_offset + page_header_size header.page_type in
480480+ for i = 0 to header.cell_count - 1 do
481481+ ptrs.(i) <- get_u16_be page (ptr_start + i * 2)
482482+ done;
483483+ ptrs
484484+485485+ (* Binary search for rowid in leaf page *)
486486+ let search_leaf t page header rowid =
487487+ let ptrs = cell_pointers page 0 header in
488488+ let usable = usable_size t in
489489+ let rec loop lo hi =
490490+ if lo > hi then None
491491+ else
492492+ let mid = (lo + hi) / 2 in
493493+ let cell, _ = Cell.parse_table_leaf page ptrs.(mid) ~usable_size:usable in
494494+ if cell.rowid = rowid then Some cell.payload
495495+ else if cell.rowid < rowid then loop (mid + 1) hi
496496+ else loop lo (mid - 1)
497497+ in
498498+ loop 0 (header.cell_count - 1)
499499+500500+ (* Find child page for rowid in interior page *)
501501+ let find_child t page header rowid =
502502+ let ptrs = cell_pointers page 0 header in
503503+ let rec loop i =
504504+ if i >= header.cell_count then Option.get header.right_child
505505+ else
506506+ let cell, _ = Cell.parse_table_interior page ptrs.(i) in
507507+ if rowid <= cell.rowid then cell.left_child else loop (i + 1)
508508+ in
509509+ loop 0
510510+511511+ let rec find_in_page t page_num rowid =
512512+ let page = Pager.read t.pager page_num in
513513+ let header = parse_page_header page 0 in
514514+ match header.page_type with
515515+ | Leaf_table -> search_leaf t page header rowid
516516+ | Interior_table ->
517517+ let child = find_child t page header rowid in
518518+ find_in_page t child rowid
519519+ | _ -> failwith "Invalid page type in table B-tree"
520520+521521+ let find t rowid = find_in_page t t.root_page rowid
522522+523523+ (* Insert - simplified, doesn't handle page splits *)
524524+ let insert t ~rowid data =
525525+ let page = Pager.read t.pager t.root_page in
526526+ let header = parse_page_header page 0 in
527527+ match header.page_type with
528528+ | Leaf_table ->
529529+ let page_size = Pager.page_size t.pager in
530530+ let buf = Bytes.of_string page in
531531+ (* Create cell *)
532532+ let rowid_varint = Varint.encode rowid in
533533+ let payload_size_varint = Varint.encode (Int64.of_int (String.length data)) in
534534+ let cell_size =
535535+ String.length payload_size_varint + String.length rowid_varint + String.length data
536536+ in
537537+ (* Find insertion point *)
538538+ let ptrs = cell_pointers page 0 header in
539539+ let usable = usable_size t in
540540+ let insert_idx =
541541+ let rec find i =
542542+ if i >= header.cell_count then i
543543+ else
544544+ let cell, _ = Cell.parse_table_leaf page ptrs.(i) ~usable_size:usable in
545545+ if rowid < cell.rowid then i else find (i + 1)
546546+ in
547547+ find 0
548548+ in
549549+ (* Write cell at end of cell content area *)
550550+ let cell_start = header.cell_content_start - cell_size in
551551+ Bytes.blit_string payload_size_varint 0 buf cell_start
552552+ (String.length payload_size_varint);
553553+ let off = cell_start + String.length payload_size_varint in
554554+ Bytes.blit_string rowid_varint 0 buf off (String.length rowid_varint);
555555+ let off = off + String.length rowid_varint in
556556+ Bytes.blit_string data 0 buf off (String.length data);
557557+ (* Update cell content start *)
558558+ set_u16_be buf 5 cell_start;
559559+ (* Insert cell pointer *)
560560+ let ptr_area_start = page_header_size Leaf_table in
561561+ (* Shift existing pointers *)
562562+ for i = header.cell_count - 1 downto insert_idx do
563563+ set_u16_be buf (ptr_area_start + (i + 1) * 2) (get_u16_be page (ptr_area_start + i * 2))
564564+ done;
565565+ set_u16_be buf (ptr_area_start + insert_idx * 2) cell_start;
566566+ (* Update cell count *)
567567+ set_u16_be buf 3 (header.cell_count + 1);
568568+ Pager.write t.pager t.root_page (Bytes.unsafe_to_string buf)
569569+ | _ -> failwith "Insert into interior page not yet implemented"
570570+571571+ let delete _t _rowid = failwith "Delete not yet implemented"
572572+573573+ let iter t f =
574574+ let rec iter_page page_num =
575575+ let page = Pager.read t.pager page_num in
576576+ let header = parse_page_header page 0 in
577577+ let ptrs = cell_pointers page 0 header in
578578+ let usable = usable_size t in
579579+ match header.page_type with
580580+ | Leaf_table ->
581581+ for i = 0 to header.cell_count - 1 do
582582+ let cell, _ = Cell.parse_table_leaf page ptrs.(i) ~usable_size:usable in
583583+ f cell.rowid cell.payload
584584+ done
585585+ | Interior_table ->
586586+ for i = 0 to header.cell_count - 1 do
587587+ let cell, _ = Cell.parse_table_interior page ptrs.(i) in
588588+ iter_page cell.left_child
589589+ done;
590590+ Option.iter iter_page header.right_child
591591+ | _ -> failwith "Invalid page type"
592592+ in
593593+ iter_page t.root_page
594594+595595+ let fold t ~init ~f =
596596+ let acc = ref init in
597597+ iter t (fun rowid data -> acc := f rowid data !acc);
598598+ !acc
599599+end
600600+601601+(* Index B-tree *)
602602+module Index = struct
603603+ type t = { pager : Pager.t; mutable root_page : int }
604604+605605+ let create pager =
606606+ let root = Pager.allocate pager in
607607+ let page_size = Pager.page_size pager in
608608+ let buf = Bytes.create page_size in
609609+ Bytes.set_uint8 buf 0 (byte_of_page_type Leaf_index);
610610+ set_u16_be buf 1 0;
611611+ set_u16_be buf 3 0;
612612+ set_u16_be buf 5 page_size;
613613+ Bytes.set_uint8 buf 7 0;
614614+ Pager.write pager root (Bytes.unsafe_to_string buf);
615615+ { pager; root_page = root }
616616+617617+ let open_ pager ~root_page = { pager; root_page }
618618+ let root_page t = t.root_page
619619+620620+ let usable_size t = Pager.page_size t.pager
621621+622622+ let cell_pointers page header =
623623+ let ptrs = Array.make header.cell_count 0 in
624624+ let ptr_start = page_header_size header.page_type in
625625+ for i = 0 to header.cell_count - 1 do
626626+ ptrs.(i) <- get_u16_be page (ptr_start + i * 2)
627627+ done;
628628+ ptrs
629629+630630+ let rec mem_in_page t page_num key =
631631+ let page = Pager.read t.pager page_num in
632632+ let header = parse_page_header page 0 in
633633+ let ptrs = cell_pointers page header in
634634+ let usable = usable_size t in
635635+ match header.page_type with
636636+ | Leaf_index ->
637637+ let rec search i =
638638+ if i >= header.cell_count then false
639639+ else
640640+ let cell, _ = Cell.parse_index_leaf page ptrs.(i) ~usable_size:usable in
641641+ if cell.payload = key then true
642642+ else if cell.payload > key then false
643643+ else search (i + 1)
644644+ in
645645+ search 0
646646+ | Interior_index ->
647647+ let rec find_child i =
648648+ if i >= header.cell_count then Option.get header.right_child
649649+ else
650650+ let cell, _ = Cell.parse_index_interior page ptrs.(i) ~usable_size:usable in
651651+ if key <= cell.payload then cell.left_child else find_child (i + 1)
652652+ in
653653+ mem_in_page t (find_child 0) key
654654+ | _ -> failwith "Invalid page type in index B-tree"
655655+656656+ let mem t key = mem_in_page t t.root_page key
657657+658658+ let insert _t _key = failwith "Index insert not yet implemented"
659659+ let delete _t _key = failwith "Index delete not yet implemented"
660660+661661+ let iter t f =
662662+ let rec iter_page page_num =
663663+ let page = Pager.read t.pager page_num in
664664+ let header = parse_page_header page 0 in
665665+ let ptrs = cell_pointers page header in
666666+ let usable = usable_size t in
667667+ match header.page_type with
668668+ | Leaf_index ->
669669+ for i = 0 to header.cell_count - 1 do
670670+ let cell, _ = Cell.parse_index_leaf page ptrs.(i) ~usable_size:usable in
671671+ f cell.payload
672672+ done
673673+ | Interior_index ->
674674+ for i = 0 to header.cell_count - 1 do
675675+ let cell, _ = Cell.parse_index_interior page ptrs.(i) ~usable_size:usable in
676676+ iter_page cell.left_child
677677+ done;
678678+ Option.iter iter_page header.right_child
679679+ | _ -> failwith "Invalid page type"
680680+ in
681681+ iter_page t.root_page
682682+end
+224
lib/btree.mli
···11+(*---------------------------------------------------------------------------
22+ Copyright (c) 2025 Thomas Gazagnaire. All rights reserved.
33+ SPDX-License-Identifier: MIT
44+ ---------------------------------------------------------------------------*)
55+66+(** Pure OCaml B-tree for persistent storage.
77+88+ Implements SQLite-compatible B-tree pages for table and index storage. *)
99+1010+(** {1 Varint Encoding}
1111+1212+ Variable-length integers as used in SQLite. *)
1313+1414+module Varint : sig
1515+ val decode : string -> int -> int64 * int
1616+ (** [decode buf off] decodes a varint starting at [off] in [buf].
1717+ Returns [(value, bytes_consumed)]. *)
1818+1919+ val encode : int64 -> string
2020+ (** [encode n] encodes [n] as a varint. *)
2121+2222+ val size : int64 -> int
2323+ (** [size n] returns the number of bytes needed to encode [n]. *)
2424+end
2525+2626+(** {1 Page Types} *)
2727+2828+type page_type =
2929+ | Interior_index (** 0x02 *)
3030+ | Interior_table (** 0x05 *)
3131+ | Leaf_index (** 0x0a *)
3232+ | Leaf_table (** 0x0d *)
3333+3434+val pp_page_type : Format.formatter -> page_type -> unit
3535+3636+(** {1 Page Header} *)
3737+3838+type page_header = {
3939+ page_type : page_type;
4040+ first_freeblock : int;
4141+ cell_count : int;
4242+ cell_content_start : int;
4343+ fragmented_bytes : int;
4444+ right_child : int option; (** Interior pages only *)
4545+}
4646+4747+val parse_page_header : string -> int -> page_header
4848+(** [parse_page_header buf off] parses a page header starting at [off].
4949+ For page 1, [off] should be 100 (after database header). *)
5050+5151+val page_header_size : page_type -> int
5252+(** [page_header_size typ] is 8 for leaf pages, 12 for interior pages. *)
5353+5454+(** {1 Cells}
5555+5656+ Cells contain the actual data in B-tree pages. *)
5757+5858+module Cell : sig
5959+ (** Table leaf cell: rowid + payload *)
6060+ type table_leaf = {
6161+ rowid : int64;
6262+ payload : string;
6363+ overflow_page : int option;
6464+ }
6565+6666+ (** Table interior cell: child page + rowid *)
6767+ type table_interior = {
6868+ left_child : int;
6969+ rowid : int64;
7070+ }
7171+7272+ (** Index leaf cell: payload only *)
7373+ type index_leaf = {
7474+ payload : string;
7575+ overflow_page : int option;
7676+ }
7777+7878+ (** Index interior cell: child page + payload *)
7979+ type index_interior = {
8080+ left_child : int;
8181+ payload : string;
8282+ overflow_page : int option;
8383+ }
8484+8585+ val parse_table_leaf : string -> int -> usable_size:int -> table_leaf * int
8686+ (** [parse_table_leaf buf off ~usable_size] parses a table leaf cell.
8787+ Returns [(cell, bytes_consumed)]. *)
8888+8989+ val parse_table_interior : string -> int -> table_interior * int
9090+ (** [parse_table_interior buf off] parses a table interior cell. *)
9191+9292+ val parse_index_leaf : string -> int -> usable_size:int -> index_leaf * int
9393+ (** [parse_index_leaf buf off ~usable_size] parses an index leaf cell. *)
9494+9595+ val parse_index_interior : string -> int -> usable_size:int -> index_interior * int
9696+ (** [parse_index_interior buf off ~usable_size] parses an index interior cell. *)
9797+end
9898+9999+(** {1 Record Format}
100100+101101+ SQLite record format for storing column values. *)
102102+103103+module Record : sig
104104+ (** Serial types determine how column values are stored. *)
105105+ type serial_type =
106106+ | Null
107107+ | Int8
108108+ | Int16
109109+ | Int24
110110+ | Int32
111111+ | Int48
112112+ | Int64
113113+ | Float64
114114+ | Zero
115115+ | One
116116+ | Blob of int
117117+ | Text of int
118118+119119+ (** Column values *)
120120+ type value =
121121+ | Vnull
122122+ | Vint of int64
123123+ | Vfloat of float
124124+ | Vblob of string
125125+ | Vtext of string
126126+127127+ val decode : string -> value list
128128+ (** [decode payload] decodes a record from its payload bytes. *)
129129+130130+ val encode : value list -> string
131131+ (** [encode values] encodes values as a record. *)
132132+133133+ val pp_value : Format.formatter -> value -> unit
134134+end
135135+136136+(** {1 Pager}
137137+138138+ Page I/O and caching layer. *)
139139+140140+module Pager : sig
141141+ type t
142142+143143+ val create : page_size:int -> Eio.File.rw_ty Eio.Resource.t -> t
144144+ (** [create ~page_size file] creates a pager with the given page size. *)
145145+146146+ val page_size : t -> int
147147+ (** [page_size t] returns the page size. *)
148148+149149+ val page_count : t -> int
150150+ (** [page_count t] returns the number of pages in the file. *)
151151+152152+ val read : t -> int -> string
153153+ (** [read t page_num] reads page [page_num] (1-indexed). *)
154154+155155+ val write : t -> int -> string -> unit
156156+ (** [write t page_num data] writes [data] to page [page_num]. *)
157157+158158+ val allocate : t -> int
159159+ (** [allocate t] allocates a new page and returns its number. *)
160160+161161+ val sync : t -> unit
162162+ (** [sync t] syncs all dirty pages to disk. *)
163163+end
164164+165165+(** {1 Table B-tree}
166166+167167+ B-tree for rowid-keyed tables (like SQLite tables). *)
168168+169169+module Table : sig
170170+ type t
171171+172172+ val create : Pager.t -> t
173173+ (** [create pager] creates a new empty table B-tree. *)
174174+175175+ val open_ : Pager.t -> root_page:int -> t
176176+ (** [open_ pager ~root_page] opens an existing table B-tree. *)
177177+178178+ val root_page : t -> int
179179+ (** [root_page t] returns the root page number. *)
180180+181181+ val find : t -> int64 -> string option
182182+ (** [find t rowid] finds the record with the given rowid. *)
183183+184184+ val insert : t -> rowid:int64 -> string -> unit
185185+ (** [insert t ~rowid data] inserts or updates a record. *)
186186+187187+ val delete : t -> int64 -> unit
188188+ (** [delete t rowid] deletes the record with the given rowid. *)
189189+190190+ val iter : t -> (int64 -> string -> unit) -> unit
191191+ (** [iter t f] calls [f rowid data] for each record in order. *)
192192+193193+ val fold : t -> init:'a -> f:(int64 -> string -> 'a -> 'a) -> 'a
194194+ (** [fold t ~init ~f] folds over all records in order. *)
195195+end
196196+197197+(** {1 Index B-tree}
198198+199199+ B-tree for arbitrary keys (like SQLite indexes). *)
200200+201201+module Index : sig
202202+ type t
203203+204204+ val create : Pager.t -> t
205205+ (** [create pager] creates a new empty index B-tree. *)
206206+207207+ val open_ : Pager.t -> root_page:int -> t
208208+ (** [open_ pager ~root_page] opens an existing index B-tree. *)
209209+210210+ val root_page : t -> int
211211+ (** [root_page t] returns the root page number. *)
212212+213213+ val mem : t -> string -> bool
214214+ (** [mem t key] returns true if [key] exists in the index. *)
215215+216216+ val insert : t -> string -> unit
217217+ (** [insert t key] inserts a key. *)
218218+219219+ val delete : t -> string -> unit
220220+ (** [delete t key] deletes a key. *)
221221+222222+ val iter : t -> (string -> unit) -> unit
223223+ (** [iter t f] calls [f key] for each key in order. *)
224224+end