1// Package mst contains a Merkle Search Tree (MST) implementation for atproto.
2//
3// This implementation is a port of the Typescript implementation in the
4// `atproto` git repo.
5//
6// ## Notes copied from TS repo
7//
8// This is an implementation of a Merkle Search Tree (MST)
9// The data structure is described here: https://hal.inria.fr/hal-02303490/document
10// The MST is an ordered, insert-order-independent, deterministic tree.
11// Keys are laid out in alphabetic order.
12// The key insight of an MST is that each key is hashed and starting 0s are counted
13// to determine which layer it falls on (5 zeros for ~32 fanout).
14// This is a merkle tree, so each subtree is referred to by it's hash (CID).
15// When a leaf is changed, ever tree on the path to that leaf is changed as well,
16// thereby updating the root hash.
17//
18// For atproto, we use SHA-256 as the key hashing algorithm, and ~16 fanout
19// (4-bits of zero per layer).
20//
21// NOTE: currently keys are strings, not bytes. Because UTF-8 strings can't be
22// safely split at arbitrary byte boundaries (the results are not necessarily
23// valid UTF-8 strings), this means that "wide" characters not really supported
24// in keys, particularly across programming language implementations. We
25// recommend sticking with simple alphanumeric (ASCII) strings.
26//
27// A couple notes on CBOR encoding:
28//
29// There are never two neighboring subtrees.
30// Therefore, we can represent a node as an array of
31// leaves & pointers to their right neighbor (possibly null),
32// along with a pointer to the left-most subtree (also possibly null).
33//
34// Most keys in a subtree will have overlap.
35// We do compression on prefixes by describing keys as:
36// - the length of the prefix that it shares in common with the preceding key
37// - the rest of the string
38//
39// For example:
40// If the first leaf in a tree is `bsky/posts/abcdefg` and the second is `bsky/posts/abcdehi`
41// Then the first will be described as `prefix: 0, key: 'bsky/posts/abcdefg'`,
42// and the second will be described as `prefix: 16, key: 'hi'.`
43package mst
44
45import (
46 "context"
47 "fmt"
48 "reflect"
49
50 "github.com/ipfs/go-cid"
51 cbor "github.com/ipfs/go-ipld-cbor"
52)
53
54// nodeKind is the type of node in the MST.
55type nodeKind uint8
56
57const (
58 entryUndefined nodeKind = 0
59 entryLeaf nodeKind = 1
60 entryTree nodeKind = 2
61)
62
63// nodeEntry is a node in the MST.
64//
65// Following the Typescript implementation, this is basically a flexible
66// "TreeEntry" (aka "leaf") which might also be the "Left" pointer on a
67// NodeData (aka "tree"). This type flexibility is not idiomatic in Go, but
68// we are keeping this a very direct port.
69type nodeEntry struct {
70 Kind nodeKind
71 Key string
72 Val cid.Cid
73 Tree *MerkleSearchTree
74}
75
76func mkTreeEntry(t *MerkleSearchTree) nodeEntry {
77 return nodeEntry{
78 Kind: entryTree,
79 Tree: t,
80 }
81}
82
83func (ne nodeEntry) isTree() bool {
84 return ne.Kind == entryTree
85}
86
87func (ne nodeEntry) isLeaf() bool {
88 return ne.Kind == entryLeaf
89}
90
91func (ne nodeEntry) isUndefined() bool {
92 return ne.Kind == entryUndefined
93}
94
95// golang-specific helper to sanity check nodeEntry semantics
96func checkTreeInvariant(ents []nodeEntry) {
97 for i := 0; i < len(ents)-1; i++ {
98 if ents[i].isTree() && ents[i+1].isTree() {
99 panic(fmt.Sprintf("two trees next to each other! %d %d", i, i+1))
100 }
101 }
102}
103
104// CBORTypes returns the types in this package that need to be registered with
105// the CBOR codec.
106func CBORTypes() []reflect.Type {
107 return []reflect.Type{
108 reflect.TypeOf(NodeData{}),
109 reflect.TypeOf(TreeEntry{}),
110 }
111}
112
113// MST tree node as gets serialized to CBOR. Note that the CBOR fields are all
114// single-character.
115type NodeData struct {
116 Left *cid.Cid `cborgen:"l"` // [nullable] pointer to lower-level subtree to the "left" of this path/key
117 Entries []TreeEntry `cborgen:"e"` // ordered list of entries at this node
118}
119
120// TreeEntry are elements of NodeData's Entries.
121type TreeEntry struct {
122 PrefixLen int64 `cborgen:"p"` // count of characters shared with previous path/key in tree
123 KeySuffix []byte `cborgen:"k"` // remaining part of path/key (appended to "previous key")
124 Val cid.Cid `cborgen:"v"` // CID pointer at this path/key
125 Tree *cid.Cid `cborgen:"t"` // [nullable] pointer to lower-level subtree to the "right" of this path/key entry
126}
127
128// MerkleSearchTree represents an MST tree node (NodeData type). It can be in
129// several levels of hydration: fully hydrated (entries and "pointer" (CID)
130// computed); dirty (entries correct, but pointer (CID) not valid); virtual
131// (pointer is defined, no entries have been pulled from blockstore)
132//
133// MerkleSearchTree values are immutable. Methods return copies with changes.
134type MerkleSearchTree struct {
135 cst cbor.IpldStore
136 entries []nodeEntry // non-nil when "hydrated"
137 layer int
138 pointer cid.Cid
139 validPtr bool
140}
141
142// NewEmptyMST reports a new empty MST using cst as its storage.
143func NewEmptyMST(cst cbor.IpldStore) *MerkleSearchTree {
144 return createMST(cst, cid.Undef, []nodeEntry{}, 0)
145}
146
147// Typescript: MST.create(storage, entries, layer, fanout) -> MST
148func createMST(cst cbor.IpldStore, ptr cid.Cid, entries []nodeEntry, layer int) *MerkleSearchTree {
149 mst := &MerkleSearchTree{
150 cst: cst,
151 pointer: ptr,
152 layer: layer,
153 entries: entries,
154 validPtr: ptr.Defined(),
155 }
156
157 return mst
158}
159
160// TODO: Typescript: MST.fromData(storage, data, layer=null, fanout)
161
162// This is poorly named in both implementations, because it is lazy
163// Typescript: MST.load(storage, cid, layer=null, fanout) -> MST
164func LoadMST(cst cbor.IpldStore, root cid.Cid) *MerkleSearchTree {
165 return createMST(cst, root, nil, -1)
166}
167
168// === "Immutability" ===
169
170// "We never mutate an MST, we just return a new MST with updated values"
171// Typescript: MST.newTree(entries) -> MST
172func (mst *MerkleSearchTree) newTree(entries []nodeEntry) *MerkleSearchTree {
173 if entries == nil {
174 panic("nil entries passed to newTree")
175 }
176 return createMST(mst.cst, cid.Undef, entries, mst.layer)
177}
178
179// === "Getters (lazy load)" ===
180
181// "We don't want to load entries of every subtree, just the ones we need"
182// Typescript: MST.getEntries() -> nodeEntry[]
183func (mst *MerkleSearchTree) getEntries(ctx context.Context) ([]nodeEntry, error) {
184 // if we are "hydrated", entries are available
185 if mst.entries != nil {
186 return mst.entries, nil
187 }
188
189 // otherwise this is a virtual/pointer struct and we need to hydrate from
190 // blockstore before returning entries
191 if mst.pointer != cid.Undef {
192 var nd NodeData
193 if err := mst.cst.Get(ctx, mst.pointer, &nd); err != nil {
194 return nil, err
195 }
196 // NOTE(bnewbold): Typescript version computes layer in-place here, but
197 // the entriesFromNodeData() helper does that for us in golang
198 entries, err := entriesFromNodeData(ctx, &nd, mst.cst)
199 if err != nil {
200 return nil, err
201 }
202 if entries == nil {
203 panic("got nil entries from node data decoding")
204 }
205 mst.entries = entries
206 return entries, nil
207 }
208
209 return nil, fmt.Errorf("no entries or self-pointer (CID) on MerkleSearchTree")
210}
211
212// golang-specific helper that calls in to deserializeNodeData
213func entriesFromNodeData(ctx context.Context, nd *NodeData, cst cbor.IpldStore) ([]nodeEntry, error) {
214 layer := -1
215 if len(nd.Entries) > 0 {
216 // NOTE(bnewbold): can compute the layer on the first KeySuffix, because for the first entry that field is a complete key
217 firstLeaf := nd.Entries[0]
218 layer = leadingZerosOnHashBytes(firstLeaf.KeySuffix)
219 }
220
221 entries, err := deserializeNodeData(ctx, cst, nd, layer)
222 if err != nil {
223 return nil, err
224 }
225
226 return entries, nil
227}
228
229// "We don't hash the node on every mutation for performance reasons. Instead we keep track of whether the pointer is outdated and only (recursively) calculate when needed"
230// Typescript: MST.getPointer() -> CID
231func (mst *MerkleSearchTree) GetPointer(ctx context.Context) (cid.Cid, error) {
232 if mst.validPtr {
233 return mst.pointer, nil
234 }
235
236 // NOTE(bnewbold): this is a bit different from how Typescript version works
237 // update in-place; first ensure that mst.entries is hydrated
238 _, err := mst.getEntries(ctx)
239 if err != nil {
240 return cid.Undef, err
241 }
242
243 for i, e := range mst.entries {
244 if e.isTree() {
245 if !e.Tree.validPtr {
246 _, err := e.Tree.GetPointer(ctx)
247 if err != nil {
248 return cid.Undef, err
249 }
250 mst.entries[i] = e
251 }
252 }
253 }
254
255 nptr, err := cidForEntries(ctx, mst.entries, mst.cst)
256 if err != nil {
257 return cid.Undef, err
258 }
259 mst.pointer = nptr
260 mst.validPtr = true
261
262 return mst.pointer, nil
263}
264
265// "In most cases, we get the layer of a node from a hint on creation"
266// "In the case of the topmost node in the tree, we look for a key in the node & determine the layer"
267// "In the case where we don't find one, we recurse down until we do."
268// "If we still can't find one, then we have an empty tree and the node is layer 0"
269// Typescript: MST.getLayer() -> number
270func (mst *MerkleSearchTree) getLayer(ctx context.Context) (int, error) {
271 layer, err := mst.attemptGetLayer(ctx)
272 if err != nil {
273 return -1, err
274 }
275 if layer < 0 {
276 mst.layer = 0
277 } else {
278 mst.layer = layer
279 }
280 return mst.layer, nil
281}
282
283// Typescript: MST.attemptGetLayer() -> number
284func (mst *MerkleSearchTree) attemptGetLayer(ctx context.Context) (int, error) {
285 if mst.layer >= 0 {
286 return mst.layer, nil
287 }
288
289 entries, err := mst.getEntries(ctx)
290 if err != nil {
291 return -1, err
292 }
293
294 layer := layerForEntries(entries)
295 if layer < 0 {
296 // NOTE(bnewbold): updated this from typescript
297 for _, e := range entries {
298 if e.isTree() {
299 childLayer, err := e.Tree.attemptGetLayer(ctx)
300 if err != nil {
301 return -1, err
302 }
303 if childLayer >= 0 {
304 layer = childLayer + 1
305 break
306 }
307 }
308 }
309 }
310
311 if layer >= 0 {
312 mst.layer = layer
313 }
314 return mst.layer, nil
315}
316
317// === "Core functionality" ===
318
319// NOTE: MST.getUnstoredBlocks() not needed; we are always working out of
320// blockstore in this implementation
321
322// "Adds a new leaf for the given key/value pair. Throws if a leaf with that key already exists"
323// Typescript: MST.add(key, value, knownZeros?) -> MST
324func (mst *MerkleSearchTree) Add(ctx context.Context, key string, val cid.Cid, knownZeros int) (*MerkleSearchTree, error) {
325
326 // NOTE(bnewbold): this is inefficient (recurses), but matches TS implementation
327 err := ensureValidMstKey(key)
328 if err != nil {
329 return nil, err
330 }
331
332 if val == cid.Undef {
333 return nil, fmt.Errorf("tried to insert an undef CID")
334 }
335
336 keyZeros := knownZeros
337 if keyZeros < 0 {
338 keyZeros = leadingZerosOnHash(key)
339 }
340
341 layer, err := mst.getLayer(ctx)
342 if err != nil {
343 return nil, fmt.Errorf("getting layer failed: %w", err)
344 }
345
346 newLeaf := nodeEntry{
347 Kind: entryLeaf,
348 Key: key,
349 Val: val,
350 }
351
352 if keyZeros == layer {
353 // it belongs in this layer
354 index, err := mst.findGtOrEqualLeafIndex(ctx, key)
355 if err != nil {
356 return nil, err
357 }
358
359 found, err := mst.atIndex(index)
360 if err != nil {
361 return nil, err
362 }
363
364 if found.isLeaf() && found.Key == key {
365 return nil, fmt.Errorf("value already set at key: %s", key)
366 }
367
368 prevNode, err := mst.atIndex(index - 1)
369 if err != nil {
370 return nil, err
371 }
372
373 if prevNode.isUndefined() || prevNode.isLeaf() {
374 // "if entry before is a leaf, (or we're on far left) we can just splice in"
375 return mst.spliceIn(ctx, newLeaf, index)
376 }
377
378 // "else we try to split the subtree around the key"
379 left, right, err := prevNode.Tree.splitAround(ctx, key)
380 if err != nil {
381 return nil, err
382 }
383 // NOTE(bnewbold): added this replaceWithSplit() call
384 return mst.replaceWithSplit(ctx, index-1, left, newLeaf, right)
385
386 } else if keyZeros < layer {
387 // "it belongs on a lower layer"
388 index, err := mst.findGtOrEqualLeafIndex(ctx, key)
389 if err != nil {
390 return nil, err
391 }
392
393 prevNode, err := mst.atIndex(index - 1)
394 if err != nil {
395 return nil, err
396 }
397
398 if !prevNode.isUndefined() && prevNode.isTree() {
399 // "if entry before is a tree, we add it to that tree"
400 newSubtree, err := prevNode.Tree.Add(ctx, key, val, keyZeros)
401 if err != nil {
402 return nil, err
403 }
404
405 return mst.updateEntry(ctx, index-1, mkTreeEntry(newSubtree))
406 } else {
407 subTree, err := mst.createChild(ctx)
408 if err != nil {
409 return nil, err
410 }
411
412 newSubTree, err := subTree.Add(ctx, key, val, keyZeros)
413 if err != nil {
414 return nil, fmt.Errorf("subtree add: %w", err)
415 }
416
417 return mst.spliceIn(ctx, mkTreeEntry(newSubTree), index)
418 }
419
420 } else {
421 // "it belongs on a higher layer & we must push the rest of the tree down"
422 left, right, err := mst.splitAround(ctx, key)
423 if err != nil {
424 return nil, err
425 }
426
427 // "if the newly added key has >=2 more leading zeros than the current highest layer then we need to add in structural nodes in between as well"
428 layer, err := mst.getLayer(ctx)
429 if err != nil {
430 return nil, fmt.Errorf("get layer in split case failed: %w", err)
431 }
432
433 extraLayersToAdd := keyZeros - layer
434
435 // "intentionally starting at 1, since first layer is taken care of by split"
436 for i := 1; i < extraLayersToAdd; i++ {
437 // seems bad if both left and right are non nil
438 if left != nil {
439 par, err := left.createParent(ctx)
440 if err != nil {
441 return nil, fmt.Errorf("create left parent: %w", err)
442 }
443 left = par
444 }
445
446 if right != nil {
447 par, err := right.createParent(ctx)
448 if err != nil {
449 return nil, fmt.Errorf("create right parent: %w", err)
450 }
451 right = par
452 }
453 }
454
455 var updated []nodeEntry
456 if left != nil {
457 updated = append(updated, mkTreeEntry(left))
458 }
459
460 updated = append(updated, nodeEntry{
461 Kind: entryLeaf,
462 Key: key,
463 Val: val,
464 })
465
466 if right != nil {
467 updated = append(updated, mkTreeEntry(right))
468 }
469
470 checkTreeInvariant(updated)
471 newRoot := createMST(mst.cst, cid.Undef, updated, keyZeros)
472
473 // NOTE(bnewbold): We do want to invalid the CID (because this node has
474 // changed, and we are "lazy" about recomputing). Setting this flag
475 // is redundant with passing cid.Undef to NewMST just above, but
476 // keeping because it is explicit and matches the explicit invalidation
477 // that happens in the Typescript code
478 newRoot.validPtr = false
479
480 return newRoot, nil
481 }
482}
483
484var ErrNotFound = fmt.Errorf("mst: not found")
485
486// "Gets the value at the given key"
487// Typescript: MST.get(key) -> (CID|null)
488func (mst *MerkleSearchTree) Get(ctx context.Context, k string) (cid.Cid, error) {
489 index, err := mst.findGtOrEqualLeafIndex(ctx, k)
490 if err != nil {
491 return cid.Undef, err
492 }
493
494 found, err := mst.atIndex(index)
495 if err != nil {
496 return cid.Undef, err
497 }
498
499 if !found.isUndefined() && found.isLeaf() && found.Key == k {
500 return found.Val, nil
501 }
502
503 prev, err := mst.atIndex(index - 1)
504 if err != nil {
505 return cid.Undef, err
506 }
507
508 if !prev.isUndefined() && prev.isTree() {
509 return prev.Tree.Get(ctx, k)
510 }
511
512 return cid.Undef, ErrNotFound
513}
514
515// "Edits the value at the given key. Throws if the given key does not exist"
516// Typescript: MST.update(key, value) -> MST
517func (mst *MerkleSearchTree) Update(ctx context.Context, k string, val cid.Cid) (*MerkleSearchTree, error) {
518
519 // NOTE(bnewbold): this is inefficient (recurses), but matches TS implementation
520 err := ensureValidMstKey(k)
521 if err != nil {
522 return nil, err
523 }
524
525 if val == cid.Undef {
526 return nil, fmt.Errorf("tried to insert an undef CID")
527 }
528
529 index, err := mst.findGtOrEqualLeafIndex(ctx, k)
530 if err != nil {
531 return nil, err
532 }
533
534 found, err := mst.atIndex(index)
535 if err != nil {
536 return nil, err
537 }
538
539 if !found.isUndefined() && found.isLeaf() && found.Key == k {
540 // NOTE(bnewbold): updated here
541 return mst.updateEntry(ctx, index, nodeEntry{
542 Kind: entryLeaf,
543 Key: string(k),
544 Val: val,
545 })
546 }
547
548 prev, err := mst.atIndex(index - 1)
549 if err != nil {
550 return nil, err
551 }
552
553 if !prev.isUndefined() && prev.isTree() {
554 updatedTree, err := prev.Tree.Update(ctx, k, val)
555 if err != nil {
556 return nil, err
557 }
558
559 return mst.updateEntry(ctx, index-1, mkTreeEntry(updatedTree))
560 }
561
562 return nil, ErrNotFound
563}
564
565// "Deletes the value at the given key"
566// Typescript: MST.delete(key) -> MST
567func (mst *MerkleSearchTree) Delete(ctx context.Context, k string) (*MerkleSearchTree, error) {
568 altered, err := mst.deleteRecurse(ctx, k)
569 if err != nil {
570 return nil, err
571 }
572 return altered.trimTop(ctx)
573}
574
575// Typescript: MST.deleteRecurse(key) -> MST
576func (mst *MerkleSearchTree) deleteRecurse(ctx context.Context, k string) (*MerkleSearchTree, error) {
577 ix, err := mst.findGtOrEqualLeafIndex(ctx, k)
578 if err != nil {
579 return nil, err
580 }
581
582 found, err := mst.atIndex(ix)
583 if err != nil {
584 return nil, err
585 }
586
587 // "if found, remove it on this level"
588 if found.isLeaf() && found.Key == k {
589 prev, err := mst.atIndex(ix - 1)
590 if err != nil {
591 return nil, err
592 }
593
594 next, err := mst.atIndex(ix + 1)
595 if err != nil {
596 return nil, err
597 }
598
599 if prev.isTree() && next.isTree() {
600 merged, err := prev.Tree.appendMerge(ctx, next.Tree)
601 if err != nil {
602 return nil, err
603 }
604 entries, err := mst.getEntries(ctx)
605 if err != nil {
606 return nil, err
607 }
608 return mst.newTree(append(append(entries[:ix-1], mkTreeEntry(merged)), entries[ix+2:]...)), nil
609 } else {
610 return mst.removeEntry(ctx, ix)
611 }
612 }
613
614 // "else recurse down to find it"
615 prev, err := mst.atIndex(ix - 1)
616 if err != nil {
617 return nil, err
618 }
619
620 if prev.isTree() {
621 subtree, err := prev.Tree.deleteRecurse(ctx, k)
622 if err != nil {
623 return nil, err
624 }
625
626 subtreeEntries, err := subtree.getEntries(ctx)
627 if err != nil {
628 return nil, err
629 }
630
631 if len(subtreeEntries) == 0 {
632 return mst.removeEntry(ctx, ix-1)
633 } else {
634 return mst.updateEntry(ctx, ix-1, mkTreeEntry(subtree))
635 }
636 } else {
637 return nil, fmt.Errorf("could not find record with key: %s", k)
638 }
639}
640
641// === "Simple Operations" ===
642
643// "update entry in place"
644// Typescript: MST.updateEntry(index, entry) -> MST
645func (mst *MerkleSearchTree) updateEntry(ctx context.Context, ix int, entry nodeEntry) (*MerkleSearchTree, error) {
646 entries, err := mst.getEntries(ctx)
647 if err != nil {
648 return nil, err
649 }
650
651 nents := make([]nodeEntry, len(entries))
652 copy(nents, entries[:ix])
653 nents[ix] = entry
654 copy(nents[ix+1:], entries[ix+1:])
655
656 checkTreeInvariant(nents)
657 return mst.newTree(nents), nil
658}
659
660// "remove entry at index"
661func (mst *MerkleSearchTree) removeEntry(ctx context.Context, ix int) (*MerkleSearchTree, error) {
662 entries, err := mst.getEntries(ctx)
663 if err != nil {
664 return nil, err
665 }
666
667 nents := make([]nodeEntry, len(entries)-1)
668 copy(nents, entries[:ix])
669 copy(nents[ix:], entries[ix+1:])
670
671 checkTreeInvariant(nents)
672 return mst.newTree(nents), nil
673}
674
675// "append entry to end of the node"
676func (mst *MerkleSearchTree) append(ctx context.Context, ent nodeEntry) (*MerkleSearchTree, error) {
677 entries, err := mst.getEntries(ctx)
678 if err != nil {
679 return nil, err
680 }
681
682 nents := make([]nodeEntry, len(entries)+1)
683 copy(nents, entries)
684 nents[len(nents)-1] = ent
685
686 checkTreeInvariant(nents)
687 return mst.newTree(nents), nil
688}
689
690// "prepend entry to start of the node"
691func (mst *MerkleSearchTree) prepend(ctx context.Context, ent nodeEntry) (*MerkleSearchTree, error) {
692 entries, err := mst.getEntries(ctx)
693 if err != nil {
694 return nil, err
695 }
696
697 nents := make([]nodeEntry, len(entries)+1)
698 copy(nents[1:], entries)
699 nents[0] = ent
700
701 checkTreeInvariant(nents)
702 return mst.newTree(nents), nil
703}
704
705// "returns entry at index"
706// Apparently returns null if nothing at index, which seems brittle
707func (mst *MerkleSearchTree) atIndex(ix int) (nodeEntry, error) {
708 entries, err := mst.getEntries(context.TODO())
709 if err != nil {
710 return nodeEntry{}, err
711 }
712
713 // TODO(bnewbold): same as Typescript, but shouldn't this error instead of returning null?
714 if ix < 0 || ix >= len(entries) {
715 return nodeEntry{}, nil
716 }
717
718 return entries[ix], nil
719}
720
721// NOTE(bnewbold): unlike Typescript, golang does not really need the slice(start?, end?) helper
722
723// "inserts entry at index"
724func (mst *MerkleSearchTree) spliceIn(ctx context.Context, entry nodeEntry, ix int) (*MerkleSearchTree, error) {
725 entries, err := mst.getEntries(ctx)
726 if err != nil {
727 return nil, err
728 }
729
730 nents := make([]nodeEntry, len(entries)+1)
731 copy(nents, entries[:ix])
732 nents[ix] = entry
733 copy(nents[ix+1:], entries[ix:])
734
735 checkTreeInvariant(nents)
736 return mst.newTree(nents), nil
737}
738
739// "replaces an entry with [ Maybe(tree), Leaf, Maybe(tree) ]"
740func (mst *MerkleSearchTree) replaceWithSplit(ctx context.Context, ix int, left *MerkleSearchTree, nl nodeEntry, right *MerkleSearchTree) (*MerkleSearchTree, error) {
741 entries, err := mst.getEntries(ctx)
742 if err != nil {
743 return nil, err
744 }
745 checkTreeInvariant(entries)
746 var update []nodeEntry
747 update = append(update, entries[:ix]...)
748
749 if left != nil {
750 update = append(update, nodeEntry{
751 Kind: entryTree,
752 Tree: left,
753 })
754 }
755
756 update = append(update, nl)
757
758 if right != nil {
759 update = append(update, nodeEntry{
760 Kind: entryTree,
761 Tree: right,
762 })
763 }
764
765 update = append(update, entries[ix+1:]...)
766
767 checkTreeInvariant(update)
768 return mst.newTree(update), nil
769}
770
771// "if the topmost node in the tree only points to another tree, trim the top and return the subtree"
772func (mst *MerkleSearchTree) trimTop(ctx context.Context) (*MerkleSearchTree, error) {
773 entries, err := mst.getEntries(ctx)
774 if err != nil {
775 return nil, err
776 }
777 if len(entries) == 1 && entries[0].isTree() {
778 return entries[0].Tree.trimTop(ctx)
779 } else {
780 return mst, nil
781 }
782}
783
784// === "Subtree & Splits" ===
785
786// "Recursively splits a sub tree around a given key"
787func (mst *MerkleSearchTree) splitAround(ctx context.Context, key string) (*MerkleSearchTree, *MerkleSearchTree, error) {
788 index, err := mst.findGtOrEqualLeafIndex(ctx, key)
789 if err != nil {
790 return nil, nil, err
791 }
792
793 entries, err := mst.getEntries(ctx)
794 if err != nil {
795 return nil, nil, err
796 }
797
798 leftData := entries[:index]
799 rightData := entries[index:]
800 left := mst.newTree(leftData)
801 right := mst.newTree(rightData)
802
803 // "if the far right of the left side is a subtree, we need to split it on the key as well"
804 if len(leftData) > 0 && leftData[len(leftData)-1].isTree() {
805 lastInLeft := leftData[len(leftData)-1]
806
807 nleft, err := left.removeEntry(ctx, len(leftData)-1)
808 if err != nil {
809 return nil, nil, err
810 }
811 left = nleft
812
813 subl, subr, err := lastInLeft.Tree.splitAround(ctx, key)
814 if err != nil {
815 return nil, nil, err
816 }
817
818 if subl != nil {
819 left, err = left.append(ctx, mkTreeEntry(subl))
820 if err != nil {
821 return nil, nil, err
822 }
823 }
824
825 if subr != nil {
826 right, err = right.prepend(ctx, mkTreeEntry(subr))
827 if err != nil {
828 return nil, nil, err
829 }
830 }
831 }
832
833 if left.entryCount() == 0 {
834 left = nil
835 }
836 if right.entryCount() == 0 {
837 right = nil
838 }
839
840 return left, right, nil
841}
842
843func (mst *MerkleSearchTree) entryCount() int {
844 entries, err := mst.getEntries(context.TODO())
845 if err != nil {
846 panic(err)
847 }
848
849 return len(entries)
850}
851
852// "The simple merge case where every key in the right tree is greater than every key in the left tree (used primarily for deletes)"
853func (mst *MerkleSearchTree) appendMerge(ctx context.Context, omst *MerkleSearchTree) (*MerkleSearchTree, error) {
854 mylayer, err := mst.getLayer(ctx)
855 if err != nil {
856 return nil, err
857 }
858
859 olayer, err := omst.getLayer(ctx)
860 if err != nil {
861 return nil, err
862 }
863
864 if mylayer != olayer {
865 return nil, fmt.Errorf("trying to merge two nodes from different layers")
866 }
867
868 entries, err := mst.getEntries(ctx)
869 if err != nil {
870 return nil, err
871 }
872
873 tomergeEnts, err := omst.getEntries(ctx)
874 if err != nil {
875 return nil, err
876 }
877
878 lastInLeft := entries[len(entries)-1]
879 firstInRight := tomergeEnts[0] // NOTE(bnewbold): bug fixed here, I think
880
881 if lastInLeft.isTree() && firstInRight.isTree() {
882 merged, err := lastInLeft.Tree.appendMerge(ctx, firstInRight.Tree)
883 if err != nil {
884 return nil, err
885 }
886
887 return mst.newTree(append(append(entries[:len(entries)-1], mkTreeEntry(merged)), tomergeEnts[1:]...)), nil
888 } else {
889 return mst.newTree(append(entries, tomergeEnts...)), nil
890 }
891}
892
893// === "Create relatives" ===
894
895func (mst *MerkleSearchTree) createChild(ctx context.Context) (*MerkleSearchTree, error) {
896 layer, err := mst.getLayer(ctx)
897 if err != nil {
898 return nil, err
899 }
900
901 return createMST(mst.cst, cid.Undef, []nodeEntry{}, layer-1), nil
902}
903
904func (mst *MerkleSearchTree) createParent(ctx context.Context) (*MerkleSearchTree, error) {
905 layer, err := mst.getLayer(ctx)
906 if err != nil {
907 return nil, err
908 }
909
910 return createMST(mst.cst, cid.Undef, []nodeEntry{mkTreeEntry(mst)}, layer+1), nil
911}
912
913// === "Finding insertion points" ===
914
915// NOTE(@why): this smells inefficient
916// "finds index of first leaf node that is greater than or equal to the value"
917func (mst *MerkleSearchTree) findGtOrEqualLeafIndex(ctx context.Context, key string) (int, error) {
918 entries, err := mst.getEntries(ctx)
919 if err != nil {
920 return -1, err
921 }
922
923 for i, e := range entries {
924 //if e.isLeaf() && bytes.Compare(e.Key, key) > 0 {
925 if e.isLeaf() && e.Key >= key {
926 return i, nil
927 }
928 }
929
930 // "if we can't find, we're on the end"
931 return len(entries), nil
932}
933
934// === "List operations (partial tree traversal)" ===
935
936// WalkLeavesFrom walks the leaves of the tree, calling the cb callback on each
937// key that's greater than or equal to the provided from key.
938// If cb returns an error, the walk is aborted and the error is returned.
939func (mst *MerkleSearchTree) WalkLeavesFrom(ctx context.Context, from string, cb func(key string, val cid.Cid) error) error {
940 index, err := mst.findGtOrEqualLeafIndex(ctx, from)
941 if err != nil {
942 return err
943 }
944
945 entries, err := mst.getEntries(ctx)
946 if err != nil {
947 return fmt.Errorf("get entries: %w", err)
948 }
949
950 if index > 0 {
951 prev := entries[index-1]
952 if !prev.isUndefined() && prev.isTree() {
953 if err := prev.Tree.WalkLeavesFrom(ctx, from, cb); err != nil {
954 return fmt.Errorf("walk leaves %d: %w", index, err)
955 }
956 }
957 }
958
959 for i, e := range entries[index:] {
960 if e.isLeaf() {
961 if err := cb(e.Key, e.Val); err != nil {
962 return err
963 }
964 } else {
965 if err := e.Tree.WalkLeavesFrom(ctx, from, cb); err != nil {
966 return fmt.Errorf("walk leaves from (%d): %w", i, err)
967 }
968 }
969 }
970 return nil
971}
972
973// TODO: Typescript: MST.list(count?, after?, before?) -> Leaf[]
974// TODO: Typescript: MST.listWithPrefix(prefix, count?) -> Leaf[]
975
976// "Walk full tree & emit nodes, consumer can bail at any point by returning false"
977// TODO: Typescript: MST.walk() -> nodeEntry (iterator)
978// TODO: Typescript: MST.paths() -> nodeEntry[][]
979// TODO: Typescript: MST.allNodes() -> nodeEntry[]
980// TODO: Typescript: MST.allCids() -> CidSet
981// TODO: Typescript: MST.leaves() -> Leaf[]
982// TODO: Typescript: MST.leafCount() -> number
983
984// TODO: Typescript: MST.walkReachable() -> nodeEntry (iterator)
985// TODO: Typescript: MST.reachableLeaves() -> Leaf[]
986
987// TODO: Typescript: MST.writeToCarStream(car) -> ()
988// TODO: Typescript: MST.cidsForPath(car) -> CID[]