Persistent store with Git semantics: lazy reads, delayed writes, content-addressing

Add ~inode parameter to disable inode splitting for git compatibility

Tree.hash and Store.commit now accept ?inode:bool (default true).
When inode:false, large tree nodes are always written as flat nodes
instead of being split into inode tries. Existing inode nodes are
expanded back to flat format. This ensures 100% git-compatible output
without limiting tree size.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

+49 -17
+2 -2
lib/store.ml
··· 36 36 | None -> None 37 37 | Some commit -> Some (read_tree t (Commit.tree commit))) 38 38 39 - let commit ?inline_threshold t ~tree ~parents ~message ~author = 39 + let commit ?inline_threshold ?inode t ~tree ~parents ~message ~author = 40 40 (* This is where delayed writes happen *) 41 - let tree_hash = Tree.hash ?inline_threshold tree ~backend:t.backend in 41 + let tree_hash = Tree.hash ?inline_threshold ?inode tree ~backend:t.backend in 42 42 let c = Commit.v ~tree:tree_hash ~parents ~author ~message () in 43 43 let data = Commit.to_bytes c in 44 44 let h = Commit.hash c in
+6 -3
lib/store.mli
··· 33 33 34 34 val commit : 35 35 ?inline_threshold:int -> 36 + ?inode:bool -> 36 37 t -> 37 38 tree:Tree.t -> 38 39 parents:hash list -> 39 40 message:string -> 40 41 author:string -> 41 42 hash 42 - (** [commit ?inline_threshold t ~tree ~parents ~message ~author] creates a 43 - commit. This is when delayed tree writes actually happen. If 43 + (** [commit ?inline_threshold ?inode t ~tree ~parents ~message ~author] 44 + creates a commit. This is when delayed tree writes actually happen. If 44 45 [inline_threshold] is given, contents smaller than that are stored 45 - inline in tree nodes. *) 46 + inline in tree nodes. If [inode] is [false], inodes are disabled and 47 + large trees are written as flat nodes (required for git compatibility). 48 + Defaults to [true]. *) 46 49 47 50 (** {2 Branch Operations} *) 48 51
+36 -9
lib/tree.ml
··· 269 269 `Tree all 270 270 271 271 (* Write tree to backend and return hash *) 272 - let rec write_tree t ~inline_threshold ~(backend : hash Backend.t) : hash = 272 + let rec write_tree t ~inline_threshold ~inode ~(backend : hash Backend.t) : hash = 273 273 match t with 274 274 | Contents s -> 275 275 let h = F.hash_contents s in ··· 292 292 (name, (`Contents h : F.entry)) 293 293 | Node _ -> 294 294 let child_hash = 295 - write_tree child ~inline_threshold ~backend 295 + write_tree child ~inline_threshold ~inode ~backend 296 296 in 297 297 (name, (`Node child_hash : F.entry))) 298 298 node.children 299 299 in 300 300 (match node.state with 301 - | Inode { hash; _ } -> 301 + | Inode { hash; backend = ib } when inode -> 302 302 (* Incremental update: only modify affected inode buckets *) 303 303 Inode.update ~backend hash ~additions:child_entries 304 304 ~removals:node.removed 305 + | Inode { hash; backend = ib } -> 306 + (* Inodes disabled: expand to flat node *) 307 + let base_entries = Inode.list_all ~backend:ib hash in 308 + let base = 309 + List.fold_left 310 + (fun n (name, entry) -> F.add n name entry) 311 + F.empty_node base_entries 312 + in 313 + let base = 314 + List.fold_left (fun n name -> F.remove n name) base node.removed 315 + in 316 + let final = 317 + List.fold_left 318 + (fun n (name, entry) -> F.add n name entry) 319 + base child_entries 320 + in 321 + let data = F.bytes_of_node final in 322 + let h = F.hash_node final in 323 + backend.write h data; 324 + h 305 325 | _ -> 306 326 (* Flat node: apply modifications, promote to inode if too large *) 307 327 let base = ··· 317 337 (fun n (name, entry) -> F.add n name entry) 318 338 base child_entries 319 339 in 320 - let entries = F.list final in 321 - if List.length entries > Inode.max_entries then 322 - Inode.write entries ~backend 323 - else begin 340 + if inode then begin 341 + let entries = F.list final in 342 + if List.length entries > Inode.max_entries then 343 + Inode.write entries ~backend 344 + else begin 345 + let data = F.bytes_of_node final in 346 + let h = F.hash_node final in 347 + backend.write h data; 348 + h 349 + end 350 + end else begin 324 351 let data = F.bytes_of_node final in 325 352 let h = F.hash_node final in 326 353 backend.write h data; 327 354 h 328 355 end) 329 356 330 - let hash ?(inline_threshold = F.inline_threshold) t ~backend = 331 - write_tree t ~inline_threshold ~backend 357 + let hash ?(inline_threshold = F.inline_threshold) ?(inode = true) t ~backend = 358 + write_tree t ~inline_threshold ~inode ~backend 332 359 333 360 type 'a force = [ `True | `False of hash -> 'a | `Shallow of hash -> 'a ] 334 361
+5 -3
lib/tree.mli
··· 78 78 (** [to_concrete t] fully materializes the tree. Forces all lazy nodes to be 79 79 loaded. *) 80 80 81 - val hash : ?inline_threshold:int -> t -> backend:hash Backend.t -> hash 82 - (** [hash ?inline_threshold t ~backend] computes the tree hash. Writes all 81 + val hash : ?inline_threshold:int -> ?inode:bool -> t -> backend:hash Backend.t -> hash 82 + (** [hash ?inline_threshold ?inode t ~backend] computes the tree hash. Writes all 83 83 accumulated changes to the backend. If [inline_threshold] is given, 84 84 contents at or below that size (in bytes) are stored directly in the 85 85 parent node rather than as separate blobs. Defaults to the codec's 86 - [inline_threshold]. *) 86 + [inline_threshold]. If [inode] is [false], large tree nodes are never 87 + split into inode tries — they are always written as flat nodes. This is 88 + required for git-compatible output. Defaults to [true]. *) 87 89 88 90 (** {2 Force Control} *) 89 91