fork of indigo with slightly nicer lexgen
at main 27 kB view raw
1// Package mst contains a Merkle Search Tree (MST) implementation for atproto. 2// 3// This implementation is a port of the Typescript implementation in the 4// `atproto` git repo. 5// 6// ## Notes copied from TS repo 7// 8// This is an implementation of a Merkle Search Tree (MST) 9// The data structure is described here: https://hal.inria.fr/hal-02303490/document 10// The MST is an ordered, insert-order-independent, deterministic tree. 11// Keys are laid out in alphabetic order. 12// The key insight of an MST is that each key is hashed and starting 0s are counted 13// to determine which layer it falls on (5 zeros for ~32 fanout). 14// This is a merkle tree, so each subtree is referred to by it's hash (CID). 15// When a leaf is changed, ever tree on the path to that leaf is changed as well, 16// thereby updating the root hash. 17// 18// For atproto, we use SHA-256 as the key hashing algorithm, and ~16 fanout 19// (4-bits of zero per layer). 20// 21// NOTE: currently keys are strings, not bytes. Because UTF-8 strings can't be 22// safely split at arbitrary byte boundaries (the results are not necessarily 23// valid UTF-8 strings), this means that "wide" characters not really supported 24// in keys, particularly across programming language implementations. We 25// recommend sticking with simple alphanumeric (ASCII) strings. 26// 27// A couple notes on CBOR encoding: 28// 29// There are never two neighboring subtrees. 30// Therefore, we can represent a node as an array of 31// leaves & pointers to their right neighbor (possibly null), 32// along with a pointer to the left-most subtree (also possibly null). 33// 34// Most keys in a subtree will have overlap. 35// We do compression on prefixes by describing keys as: 36// - the length of the prefix that it shares in common with the preceding key 37// - the rest of the string 38// 39// For example: 40// If the first leaf in a tree is `bsky/posts/abcdefg` and the second is `bsky/posts/abcdehi` 41// Then the first will be described as `prefix: 0, key: 'bsky/posts/abcdefg'`, 42// and the second will be described as `prefix: 16, key: 'hi'.` 43package mst 44 45import ( 46 "context" 47 "fmt" 48 "reflect" 49 50 "github.com/ipfs/go-cid" 51 cbor "github.com/ipfs/go-ipld-cbor" 52) 53 54// nodeKind is the type of node in the MST. 55type nodeKind uint8 56 57const ( 58 entryUndefined nodeKind = 0 59 entryLeaf nodeKind = 1 60 entryTree nodeKind = 2 61) 62 63// nodeEntry is a node in the MST. 64// 65// Following the Typescript implementation, this is basically a flexible 66// "TreeEntry" (aka "leaf") which might also be the "Left" pointer on a 67// NodeData (aka "tree"). This type flexibility is not idiomatic in Go, but 68// we are keeping this a very direct port. 69type nodeEntry struct { 70 Kind nodeKind 71 Key string 72 Val cid.Cid 73 Tree *MerkleSearchTree 74} 75 76func mkTreeEntry(t *MerkleSearchTree) nodeEntry { 77 return nodeEntry{ 78 Kind: entryTree, 79 Tree: t, 80 } 81} 82 83func (ne nodeEntry) isTree() bool { 84 return ne.Kind == entryTree 85} 86 87func (ne nodeEntry) isLeaf() bool { 88 return ne.Kind == entryLeaf 89} 90 91func (ne nodeEntry) isUndefined() bool { 92 return ne.Kind == entryUndefined 93} 94 95// golang-specific helper to sanity check nodeEntry semantics 96func checkTreeInvariant(ents []nodeEntry) { 97 for i := 0; i < len(ents)-1; i++ { 98 if ents[i].isTree() && ents[i+1].isTree() { 99 panic(fmt.Sprintf("two trees next to each other! %d %d", i, i+1)) 100 } 101 } 102} 103 104// CBORTypes returns the types in this package that need to be registered with 105// the CBOR codec. 106func CBORTypes() []reflect.Type { 107 return []reflect.Type{ 108 reflect.TypeOf(NodeData{}), 109 reflect.TypeOf(TreeEntry{}), 110 } 111} 112 113// MST tree node as gets serialized to CBOR. Note that the CBOR fields are all 114// single-character. 115type NodeData struct { 116 Left *cid.Cid `cborgen:"l"` // [nullable] pointer to lower-level subtree to the "left" of this path/key 117 Entries []TreeEntry `cborgen:"e"` // ordered list of entries at this node 118} 119 120// TreeEntry are elements of NodeData's Entries. 121type TreeEntry struct { 122 PrefixLen int64 `cborgen:"p"` // count of characters shared with previous path/key in tree 123 KeySuffix []byte `cborgen:"k"` // remaining part of path/key (appended to "previous key") 124 Val cid.Cid `cborgen:"v"` // CID pointer at this path/key 125 Tree *cid.Cid `cborgen:"t"` // [nullable] pointer to lower-level subtree to the "right" of this path/key entry 126} 127 128// MerkleSearchTree represents an MST tree node (NodeData type). It can be in 129// several levels of hydration: fully hydrated (entries and "pointer" (CID) 130// computed); dirty (entries correct, but pointer (CID) not valid); virtual 131// (pointer is defined, no entries have been pulled from blockstore) 132// 133// MerkleSearchTree values are immutable. Methods return copies with changes. 134type MerkleSearchTree struct { 135 cst cbor.IpldStore 136 entries []nodeEntry // non-nil when "hydrated" 137 layer int 138 pointer cid.Cid 139 validPtr bool 140} 141 142// NewEmptyMST reports a new empty MST using cst as its storage. 143func NewEmptyMST(cst cbor.IpldStore) *MerkleSearchTree { 144 return createMST(cst, cid.Undef, []nodeEntry{}, 0) 145} 146 147// Typescript: MST.create(storage, entries, layer, fanout) -> MST 148func createMST(cst cbor.IpldStore, ptr cid.Cid, entries []nodeEntry, layer int) *MerkleSearchTree { 149 mst := &MerkleSearchTree{ 150 cst: cst, 151 pointer: ptr, 152 layer: layer, 153 entries: entries, 154 validPtr: ptr.Defined(), 155 } 156 157 return mst 158} 159 160// TODO: Typescript: MST.fromData(storage, data, layer=null, fanout) 161 162// This is poorly named in both implementations, because it is lazy 163// Typescript: MST.load(storage, cid, layer=null, fanout) -> MST 164func LoadMST(cst cbor.IpldStore, root cid.Cid) *MerkleSearchTree { 165 return createMST(cst, root, nil, -1) 166} 167 168// === "Immutability" === 169 170// "We never mutate an MST, we just return a new MST with updated values" 171// Typescript: MST.newTree(entries) -> MST 172func (mst *MerkleSearchTree) newTree(entries []nodeEntry) *MerkleSearchTree { 173 if entries == nil { 174 panic("nil entries passed to newTree") 175 } 176 return createMST(mst.cst, cid.Undef, entries, mst.layer) 177} 178 179// === "Getters (lazy load)" === 180 181// "We don't want to load entries of every subtree, just the ones we need" 182// Typescript: MST.getEntries() -> nodeEntry[] 183func (mst *MerkleSearchTree) getEntries(ctx context.Context) ([]nodeEntry, error) { 184 // if we are "hydrated", entries are available 185 if mst.entries != nil { 186 return mst.entries, nil 187 } 188 189 // otherwise this is a virtual/pointer struct and we need to hydrate from 190 // blockstore before returning entries 191 if mst.pointer != cid.Undef { 192 var nd NodeData 193 if err := mst.cst.Get(ctx, mst.pointer, &nd); err != nil { 194 return nil, err 195 } 196 // NOTE(bnewbold): Typescript version computes layer in-place here, but 197 // the entriesFromNodeData() helper does that for us in golang 198 entries, err := entriesFromNodeData(ctx, &nd, mst.cst) 199 if err != nil { 200 return nil, err 201 } 202 if entries == nil { 203 panic("got nil entries from node data decoding") 204 } 205 mst.entries = entries 206 return entries, nil 207 } 208 209 return nil, fmt.Errorf("no entries or self-pointer (CID) on MerkleSearchTree") 210} 211 212// golang-specific helper that calls in to deserializeNodeData 213func entriesFromNodeData(ctx context.Context, nd *NodeData, cst cbor.IpldStore) ([]nodeEntry, error) { 214 layer := -1 215 if len(nd.Entries) > 0 { 216 // NOTE(bnewbold): can compute the layer on the first KeySuffix, because for the first entry that field is a complete key 217 firstLeaf := nd.Entries[0] 218 layer = leadingZerosOnHashBytes(firstLeaf.KeySuffix) 219 } 220 221 entries, err := deserializeNodeData(ctx, cst, nd, layer) 222 if err != nil { 223 return nil, err 224 } 225 226 return entries, nil 227} 228 229// "We don't hash the node on every mutation for performance reasons. Instead we keep track of whether the pointer is outdated and only (recursively) calculate when needed" 230// Typescript: MST.getPointer() -> CID 231func (mst *MerkleSearchTree) GetPointer(ctx context.Context) (cid.Cid, error) { 232 if mst.validPtr { 233 return mst.pointer, nil 234 } 235 236 // NOTE(bnewbold): this is a bit different from how Typescript version works 237 // update in-place; first ensure that mst.entries is hydrated 238 _, err := mst.getEntries(ctx) 239 if err != nil { 240 return cid.Undef, err 241 } 242 243 for i, e := range mst.entries { 244 if e.isTree() { 245 if !e.Tree.validPtr { 246 _, err := e.Tree.GetPointer(ctx) 247 if err != nil { 248 return cid.Undef, err 249 } 250 mst.entries[i] = e 251 } 252 } 253 } 254 255 nptr, err := cidForEntries(ctx, mst.entries, mst.cst) 256 if err != nil { 257 return cid.Undef, err 258 } 259 mst.pointer = nptr 260 mst.validPtr = true 261 262 return mst.pointer, nil 263} 264 265// "In most cases, we get the layer of a node from a hint on creation" 266// "In the case of the topmost node in the tree, we look for a key in the node & determine the layer" 267// "In the case where we don't find one, we recurse down until we do." 268// "If we still can't find one, then we have an empty tree and the node is layer 0" 269// Typescript: MST.getLayer() -> number 270func (mst *MerkleSearchTree) getLayer(ctx context.Context) (int, error) { 271 layer, err := mst.attemptGetLayer(ctx) 272 if err != nil { 273 return -1, err 274 } 275 if layer < 0 { 276 mst.layer = 0 277 } else { 278 mst.layer = layer 279 } 280 return mst.layer, nil 281} 282 283// Typescript: MST.attemptGetLayer() -> number 284func (mst *MerkleSearchTree) attemptGetLayer(ctx context.Context) (int, error) { 285 if mst.layer >= 0 { 286 return mst.layer, nil 287 } 288 289 entries, err := mst.getEntries(ctx) 290 if err != nil { 291 return -1, err 292 } 293 294 layer := layerForEntries(entries) 295 if layer < 0 { 296 // NOTE(bnewbold): updated this from typescript 297 for _, e := range entries { 298 if e.isTree() { 299 childLayer, err := e.Tree.attemptGetLayer(ctx) 300 if err != nil { 301 return -1, err 302 } 303 if childLayer >= 0 { 304 layer = childLayer + 1 305 break 306 } 307 } 308 } 309 } 310 311 if layer >= 0 { 312 mst.layer = layer 313 } 314 return mst.layer, nil 315} 316 317// === "Core functionality" === 318 319// NOTE: MST.getUnstoredBlocks() not needed; we are always working out of 320// blockstore in this implementation 321 322// "Adds a new leaf for the given key/value pair. Throws if a leaf with that key already exists" 323// Typescript: MST.add(key, value, knownZeros?) -> MST 324func (mst *MerkleSearchTree) Add(ctx context.Context, key string, val cid.Cid, knownZeros int) (*MerkleSearchTree, error) { 325 326 // NOTE(bnewbold): this is inefficient (recurses), but matches TS implementation 327 err := ensureValidMstKey(key) 328 if err != nil { 329 return nil, err 330 } 331 332 if val == cid.Undef { 333 return nil, fmt.Errorf("tried to insert an undef CID") 334 } 335 336 keyZeros := knownZeros 337 if keyZeros < 0 { 338 keyZeros = leadingZerosOnHash(key) 339 } 340 341 layer, err := mst.getLayer(ctx) 342 if err != nil { 343 return nil, fmt.Errorf("getting layer failed: %w", err) 344 } 345 346 newLeaf := nodeEntry{ 347 Kind: entryLeaf, 348 Key: key, 349 Val: val, 350 } 351 352 if keyZeros == layer { 353 // it belongs in this layer 354 index, err := mst.findGtOrEqualLeafIndex(ctx, key) 355 if err != nil { 356 return nil, err 357 } 358 359 found, err := mst.atIndex(index) 360 if err != nil { 361 return nil, err 362 } 363 364 if found.isLeaf() && found.Key == key { 365 return nil, fmt.Errorf("value already set at key: %s", key) 366 } 367 368 prevNode, err := mst.atIndex(index - 1) 369 if err != nil { 370 return nil, err 371 } 372 373 if prevNode.isUndefined() || prevNode.isLeaf() { 374 // "if entry before is a leaf, (or we're on far left) we can just splice in" 375 return mst.spliceIn(ctx, newLeaf, index) 376 } 377 378 // "else we try to split the subtree around the key" 379 left, right, err := prevNode.Tree.splitAround(ctx, key) 380 if err != nil { 381 return nil, err 382 } 383 // NOTE(bnewbold): added this replaceWithSplit() call 384 return mst.replaceWithSplit(ctx, index-1, left, newLeaf, right) 385 386 } else if keyZeros < layer { 387 // "it belongs on a lower layer" 388 index, err := mst.findGtOrEqualLeafIndex(ctx, key) 389 if err != nil { 390 return nil, err 391 } 392 393 prevNode, err := mst.atIndex(index - 1) 394 if err != nil { 395 return nil, err 396 } 397 398 if !prevNode.isUndefined() && prevNode.isTree() { 399 // "if entry before is a tree, we add it to that tree" 400 newSubtree, err := prevNode.Tree.Add(ctx, key, val, keyZeros) 401 if err != nil { 402 return nil, err 403 } 404 405 return mst.updateEntry(ctx, index-1, mkTreeEntry(newSubtree)) 406 } else { 407 subTree, err := mst.createChild(ctx) 408 if err != nil { 409 return nil, err 410 } 411 412 newSubTree, err := subTree.Add(ctx, key, val, keyZeros) 413 if err != nil { 414 return nil, fmt.Errorf("subtree add: %w", err) 415 } 416 417 return mst.spliceIn(ctx, mkTreeEntry(newSubTree), index) 418 } 419 420 } else { 421 // "it belongs on a higher layer & we must push the rest of the tree down" 422 left, right, err := mst.splitAround(ctx, key) 423 if err != nil { 424 return nil, err 425 } 426 427 // "if the newly added key has >=2 more leading zeros than the current highest layer then we need to add in structural nodes in between as well" 428 layer, err := mst.getLayer(ctx) 429 if err != nil { 430 return nil, fmt.Errorf("get layer in split case failed: %w", err) 431 } 432 433 extraLayersToAdd := keyZeros - layer 434 435 // "intentionally starting at 1, since first layer is taken care of by split" 436 for i := 1; i < extraLayersToAdd; i++ { 437 // seems bad if both left and right are non nil 438 if left != nil { 439 par, err := left.createParent(ctx) 440 if err != nil { 441 return nil, fmt.Errorf("create left parent: %w", err) 442 } 443 left = par 444 } 445 446 if right != nil { 447 par, err := right.createParent(ctx) 448 if err != nil { 449 return nil, fmt.Errorf("create right parent: %w", err) 450 } 451 right = par 452 } 453 } 454 455 var updated []nodeEntry 456 if left != nil { 457 updated = append(updated, mkTreeEntry(left)) 458 } 459 460 updated = append(updated, nodeEntry{ 461 Kind: entryLeaf, 462 Key: key, 463 Val: val, 464 }) 465 466 if right != nil { 467 updated = append(updated, mkTreeEntry(right)) 468 } 469 470 checkTreeInvariant(updated) 471 newRoot := createMST(mst.cst, cid.Undef, updated, keyZeros) 472 473 // NOTE(bnewbold): We do want to invalid the CID (because this node has 474 // changed, and we are "lazy" about recomputing). Setting this flag 475 // is redundant with passing cid.Undef to NewMST just above, but 476 // keeping because it is explicit and matches the explicit invalidation 477 // that happens in the Typescript code 478 newRoot.validPtr = false 479 480 return newRoot, nil 481 } 482} 483 484var ErrNotFound = fmt.Errorf("mst: not found") 485 486// "Gets the value at the given key" 487// Typescript: MST.get(key) -> (CID|null) 488func (mst *MerkleSearchTree) Get(ctx context.Context, k string) (cid.Cid, error) { 489 index, err := mst.findGtOrEqualLeafIndex(ctx, k) 490 if err != nil { 491 return cid.Undef, err 492 } 493 494 found, err := mst.atIndex(index) 495 if err != nil { 496 return cid.Undef, err 497 } 498 499 if !found.isUndefined() && found.isLeaf() && found.Key == k { 500 return found.Val, nil 501 } 502 503 prev, err := mst.atIndex(index - 1) 504 if err != nil { 505 return cid.Undef, err 506 } 507 508 if !prev.isUndefined() && prev.isTree() { 509 return prev.Tree.Get(ctx, k) 510 } 511 512 return cid.Undef, ErrNotFound 513} 514 515// "Edits the value at the given key. Throws if the given key does not exist" 516// Typescript: MST.update(key, value) -> MST 517func (mst *MerkleSearchTree) Update(ctx context.Context, k string, val cid.Cid) (*MerkleSearchTree, error) { 518 519 // NOTE(bnewbold): this is inefficient (recurses), but matches TS implementation 520 err := ensureValidMstKey(k) 521 if err != nil { 522 return nil, err 523 } 524 525 if val == cid.Undef { 526 return nil, fmt.Errorf("tried to insert an undef CID") 527 } 528 529 index, err := mst.findGtOrEqualLeafIndex(ctx, k) 530 if err != nil { 531 return nil, err 532 } 533 534 found, err := mst.atIndex(index) 535 if err != nil { 536 return nil, err 537 } 538 539 if !found.isUndefined() && found.isLeaf() && found.Key == k { 540 // NOTE(bnewbold): updated here 541 return mst.updateEntry(ctx, index, nodeEntry{ 542 Kind: entryLeaf, 543 Key: string(k), 544 Val: val, 545 }) 546 } 547 548 prev, err := mst.atIndex(index - 1) 549 if err != nil { 550 return nil, err 551 } 552 553 if !prev.isUndefined() && prev.isTree() { 554 updatedTree, err := prev.Tree.Update(ctx, k, val) 555 if err != nil { 556 return nil, err 557 } 558 559 return mst.updateEntry(ctx, index-1, mkTreeEntry(updatedTree)) 560 } 561 562 return nil, ErrNotFound 563} 564 565// "Deletes the value at the given key" 566// Typescript: MST.delete(key) -> MST 567func (mst *MerkleSearchTree) Delete(ctx context.Context, k string) (*MerkleSearchTree, error) { 568 altered, err := mst.deleteRecurse(ctx, k) 569 if err != nil { 570 return nil, err 571 } 572 return altered.trimTop(ctx) 573} 574 575// Typescript: MST.deleteRecurse(key) -> MST 576func (mst *MerkleSearchTree) deleteRecurse(ctx context.Context, k string) (*MerkleSearchTree, error) { 577 ix, err := mst.findGtOrEqualLeafIndex(ctx, k) 578 if err != nil { 579 return nil, err 580 } 581 582 found, err := mst.atIndex(ix) 583 if err != nil { 584 return nil, err 585 } 586 587 // "if found, remove it on this level" 588 if found.isLeaf() && found.Key == k { 589 prev, err := mst.atIndex(ix - 1) 590 if err != nil { 591 return nil, err 592 } 593 594 next, err := mst.atIndex(ix + 1) 595 if err != nil { 596 return nil, err 597 } 598 599 if prev.isTree() && next.isTree() { 600 merged, err := prev.Tree.appendMerge(ctx, next.Tree) 601 if err != nil { 602 return nil, err 603 } 604 entries, err := mst.getEntries(ctx) 605 if err != nil { 606 return nil, err 607 } 608 return mst.newTree(append(append(entries[:ix-1], mkTreeEntry(merged)), entries[ix+2:]...)), nil 609 } else { 610 return mst.removeEntry(ctx, ix) 611 } 612 } 613 614 // "else recurse down to find it" 615 prev, err := mst.atIndex(ix - 1) 616 if err != nil { 617 return nil, err 618 } 619 620 if prev.isTree() { 621 subtree, err := prev.Tree.deleteRecurse(ctx, k) 622 if err != nil { 623 return nil, err 624 } 625 626 subtreeEntries, err := subtree.getEntries(ctx) 627 if err != nil { 628 return nil, err 629 } 630 631 if len(subtreeEntries) == 0 { 632 return mst.removeEntry(ctx, ix-1) 633 } else { 634 return mst.updateEntry(ctx, ix-1, mkTreeEntry(subtree)) 635 } 636 } else { 637 return nil, fmt.Errorf("could not find record with key: %s", k) 638 } 639} 640 641// === "Simple Operations" === 642 643// "update entry in place" 644// Typescript: MST.updateEntry(index, entry) -> MST 645func (mst *MerkleSearchTree) updateEntry(ctx context.Context, ix int, entry nodeEntry) (*MerkleSearchTree, error) { 646 entries, err := mst.getEntries(ctx) 647 if err != nil { 648 return nil, err 649 } 650 651 nents := make([]nodeEntry, len(entries)) 652 copy(nents, entries[:ix]) 653 nents[ix] = entry 654 copy(nents[ix+1:], entries[ix+1:]) 655 656 checkTreeInvariant(nents) 657 return mst.newTree(nents), nil 658} 659 660// "remove entry at index" 661func (mst *MerkleSearchTree) removeEntry(ctx context.Context, ix int) (*MerkleSearchTree, error) { 662 entries, err := mst.getEntries(ctx) 663 if err != nil { 664 return nil, err 665 } 666 667 nents := make([]nodeEntry, len(entries)-1) 668 copy(nents, entries[:ix]) 669 copy(nents[ix:], entries[ix+1:]) 670 671 checkTreeInvariant(nents) 672 return mst.newTree(nents), nil 673} 674 675// "append entry to end of the node" 676func (mst *MerkleSearchTree) append(ctx context.Context, ent nodeEntry) (*MerkleSearchTree, error) { 677 entries, err := mst.getEntries(ctx) 678 if err != nil { 679 return nil, err 680 } 681 682 nents := make([]nodeEntry, len(entries)+1) 683 copy(nents, entries) 684 nents[len(nents)-1] = ent 685 686 checkTreeInvariant(nents) 687 return mst.newTree(nents), nil 688} 689 690// "prepend entry to start of the node" 691func (mst *MerkleSearchTree) prepend(ctx context.Context, ent nodeEntry) (*MerkleSearchTree, error) { 692 entries, err := mst.getEntries(ctx) 693 if err != nil { 694 return nil, err 695 } 696 697 nents := make([]nodeEntry, len(entries)+1) 698 copy(nents[1:], entries) 699 nents[0] = ent 700 701 checkTreeInvariant(nents) 702 return mst.newTree(nents), nil 703} 704 705// "returns entry at index" 706// Apparently returns null if nothing at index, which seems brittle 707func (mst *MerkleSearchTree) atIndex(ix int) (nodeEntry, error) { 708 entries, err := mst.getEntries(context.TODO()) 709 if err != nil { 710 return nodeEntry{}, err 711 } 712 713 // TODO(bnewbold): same as Typescript, but shouldn't this error instead of returning null? 714 if ix < 0 || ix >= len(entries) { 715 return nodeEntry{}, nil 716 } 717 718 return entries[ix], nil 719} 720 721// NOTE(bnewbold): unlike Typescript, golang does not really need the slice(start?, end?) helper 722 723// "inserts entry at index" 724func (mst *MerkleSearchTree) spliceIn(ctx context.Context, entry nodeEntry, ix int) (*MerkleSearchTree, error) { 725 entries, err := mst.getEntries(ctx) 726 if err != nil { 727 return nil, err 728 } 729 730 nents := make([]nodeEntry, len(entries)+1) 731 copy(nents, entries[:ix]) 732 nents[ix] = entry 733 copy(nents[ix+1:], entries[ix:]) 734 735 checkTreeInvariant(nents) 736 return mst.newTree(nents), nil 737} 738 739// "replaces an entry with [ Maybe(tree), Leaf, Maybe(tree) ]" 740func (mst *MerkleSearchTree) replaceWithSplit(ctx context.Context, ix int, left *MerkleSearchTree, nl nodeEntry, right *MerkleSearchTree) (*MerkleSearchTree, error) { 741 entries, err := mst.getEntries(ctx) 742 if err != nil { 743 return nil, err 744 } 745 checkTreeInvariant(entries) 746 var update []nodeEntry 747 update = append(update, entries[:ix]...) 748 749 if left != nil { 750 update = append(update, nodeEntry{ 751 Kind: entryTree, 752 Tree: left, 753 }) 754 } 755 756 update = append(update, nl) 757 758 if right != nil { 759 update = append(update, nodeEntry{ 760 Kind: entryTree, 761 Tree: right, 762 }) 763 } 764 765 update = append(update, entries[ix+1:]...) 766 767 checkTreeInvariant(update) 768 return mst.newTree(update), nil 769} 770 771// "if the topmost node in the tree only points to another tree, trim the top and return the subtree" 772func (mst *MerkleSearchTree) trimTop(ctx context.Context) (*MerkleSearchTree, error) { 773 entries, err := mst.getEntries(ctx) 774 if err != nil { 775 return nil, err 776 } 777 if len(entries) == 1 && entries[0].isTree() { 778 return entries[0].Tree.trimTop(ctx) 779 } else { 780 return mst, nil 781 } 782} 783 784// === "Subtree & Splits" === 785 786// "Recursively splits a sub tree around a given key" 787func (mst *MerkleSearchTree) splitAround(ctx context.Context, key string) (*MerkleSearchTree, *MerkleSearchTree, error) { 788 index, err := mst.findGtOrEqualLeafIndex(ctx, key) 789 if err != nil { 790 return nil, nil, err 791 } 792 793 entries, err := mst.getEntries(ctx) 794 if err != nil { 795 return nil, nil, err 796 } 797 798 leftData := entries[:index] 799 rightData := entries[index:] 800 left := mst.newTree(leftData) 801 right := mst.newTree(rightData) 802 803 // "if the far right of the left side is a subtree, we need to split it on the key as well" 804 if len(leftData) > 0 && leftData[len(leftData)-1].isTree() { 805 lastInLeft := leftData[len(leftData)-1] 806 807 nleft, err := left.removeEntry(ctx, len(leftData)-1) 808 if err != nil { 809 return nil, nil, err 810 } 811 left = nleft 812 813 subl, subr, err := lastInLeft.Tree.splitAround(ctx, key) 814 if err != nil { 815 return nil, nil, err 816 } 817 818 if subl != nil { 819 left, err = left.append(ctx, mkTreeEntry(subl)) 820 if err != nil { 821 return nil, nil, err 822 } 823 } 824 825 if subr != nil { 826 right, err = right.prepend(ctx, mkTreeEntry(subr)) 827 if err != nil { 828 return nil, nil, err 829 } 830 } 831 } 832 833 if left.entryCount() == 0 { 834 left = nil 835 } 836 if right.entryCount() == 0 { 837 right = nil 838 } 839 840 return left, right, nil 841} 842 843func (mst *MerkleSearchTree) entryCount() int { 844 entries, err := mst.getEntries(context.TODO()) 845 if err != nil { 846 panic(err) 847 } 848 849 return len(entries) 850} 851 852// "The simple merge case where every key in the right tree is greater than every key in the left tree (used primarily for deletes)" 853func (mst *MerkleSearchTree) appendMerge(ctx context.Context, omst *MerkleSearchTree) (*MerkleSearchTree, error) { 854 mylayer, err := mst.getLayer(ctx) 855 if err != nil { 856 return nil, err 857 } 858 859 olayer, err := omst.getLayer(ctx) 860 if err != nil { 861 return nil, err 862 } 863 864 if mylayer != olayer { 865 return nil, fmt.Errorf("trying to merge two nodes from different layers") 866 } 867 868 entries, err := mst.getEntries(ctx) 869 if err != nil { 870 return nil, err 871 } 872 873 tomergeEnts, err := omst.getEntries(ctx) 874 if err != nil { 875 return nil, err 876 } 877 878 lastInLeft := entries[len(entries)-1] 879 firstInRight := tomergeEnts[0] // NOTE(bnewbold): bug fixed here, I think 880 881 if lastInLeft.isTree() && firstInRight.isTree() { 882 merged, err := lastInLeft.Tree.appendMerge(ctx, firstInRight.Tree) 883 if err != nil { 884 return nil, err 885 } 886 887 return mst.newTree(append(append(entries[:len(entries)-1], mkTreeEntry(merged)), tomergeEnts[1:]...)), nil 888 } else { 889 return mst.newTree(append(entries, tomergeEnts...)), nil 890 } 891} 892 893// === "Create relatives" === 894 895func (mst *MerkleSearchTree) createChild(ctx context.Context) (*MerkleSearchTree, error) { 896 layer, err := mst.getLayer(ctx) 897 if err != nil { 898 return nil, err 899 } 900 901 return createMST(mst.cst, cid.Undef, []nodeEntry{}, layer-1), nil 902} 903 904func (mst *MerkleSearchTree) createParent(ctx context.Context) (*MerkleSearchTree, error) { 905 layer, err := mst.getLayer(ctx) 906 if err != nil { 907 return nil, err 908 } 909 910 return createMST(mst.cst, cid.Undef, []nodeEntry{mkTreeEntry(mst)}, layer+1), nil 911} 912 913// === "Finding insertion points" === 914 915// NOTE(@why): this smells inefficient 916// "finds index of first leaf node that is greater than or equal to the value" 917func (mst *MerkleSearchTree) findGtOrEqualLeafIndex(ctx context.Context, key string) (int, error) { 918 entries, err := mst.getEntries(ctx) 919 if err != nil { 920 return -1, err 921 } 922 923 for i, e := range entries { 924 //if e.isLeaf() && bytes.Compare(e.Key, key) > 0 { 925 if e.isLeaf() && e.Key >= key { 926 return i, nil 927 } 928 } 929 930 // "if we can't find, we're on the end" 931 return len(entries), nil 932} 933 934// === "List operations (partial tree traversal)" === 935 936// WalkLeavesFrom walks the leaves of the tree, calling the cb callback on each 937// key that's greater than or equal to the provided from key. 938// If cb returns an error, the walk is aborted and the error is returned. 939func (mst *MerkleSearchTree) WalkLeavesFrom(ctx context.Context, from string, cb func(key string, val cid.Cid) error) error { 940 index, err := mst.findGtOrEqualLeafIndex(ctx, from) 941 if err != nil { 942 return err 943 } 944 945 entries, err := mst.getEntries(ctx) 946 if err != nil { 947 return fmt.Errorf("get entries: %w", err) 948 } 949 950 if index > 0 { 951 prev := entries[index-1] 952 if !prev.isUndefined() && prev.isTree() { 953 if err := prev.Tree.WalkLeavesFrom(ctx, from, cb); err != nil { 954 return fmt.Errorf("walk leaves %d: %w", index, err) 955 } 956 } 957 } 958 959 for i, e := range entries[index:] { 960 if e.isLeaf() { 961 if err := cb(e.Key, e.Val); err != nil { 962 return err 963 } 964 } else { 965 if err := e.Tree.WalkLeavesFrom(ctx, from, cb); err != nil { 966 return fmt.Errorf("walk leaves from (%d): %w", i, err) 967 } 968 } 969 } 970 return nil 971} 972 973// TODO: Typescript: MST.list(count?, after?, before?) -> Leaf[] 974// TODO: Typescript: MST.listWithPrefix(prefix, count?) -> Leaf[] 975 976// "Walk full tree & emit nodes, consumer can bail at any point by returning false" 977// TODO: Typescript: MST.walk() -> nodeEntry (iterator) 978// TODO: Typescript: MST.paths() -> nodeEntry[][] 979// TODO: Typescript: MST.allNodes() -> nodeEntry[] 980// TODO: Typescript: MST.allCids() -> CidSet 981// TODO: Typescript: MST.leaves() -> Leaf[] 982// TODO: Typescript: MST.leafCount() -> number 983 984// TODO: Typescript: MST.walkReachable() -> nodeEntry (iterator) 985// TODO: Typescript: MST.reachableLeaves() -> Leaf[] 986 987// TODO: Typescript: MST.writeToCarStream(car) -> () 988// TODO: Typescript: MST.cidsForPath(car) -> CID[]