fork of indigo with slightly nicer lexgen
at main 5.3 kB view raw
1// Helpers for MST implementation. Following code split between mst.ts and util.ts in upstream Typescript implementation 2 3package mst 4 5import ( 6 "context" 7 "fmt" 8 "strings" 9 "unsafe" 10 11 "github.com/ipfs/go-cid" 12 cbor "github.com/ipfs/go-ipld-cbor" 13 sha256 "github.com/minio/sha256-simd" 14) 15 16// Used to determine the "depth" of keys in an MST. 17// For atproto, repo v2, the "fanout" is always 4, so we count "zeros" in 18// chunks of 2-bits. Eg, a leading 0x00 byte is 4 "zeros". 19// Typescript: leadingZerosOnHash(key, fanout) -> number 20func leadingZerosOnHash(key string) int { 21 var b []byte 22 if len(key) > 0 { 23 b = unsafe.Slice(unsafe.StringData(key), len(key)) 24 } 25 return leadingZerosOnHashBytes(b) 26} 27 28func leadingZerosOnHashBytes(key []byte) (total int) { 29 hv := sha256.Sum256(key) 30 for _, b := range hv { 31 if b&0xC0 != 0 { 32 // Common case. No leading pair of zero bits. 33 break 34 } 35 if b == 0x00 { 36 total += 4 37 continue 38 } 39 if b&0xFC == 0x00 { 40 total += 3 41 } else if b&0xF0 == 0x00 { 42 total += 2 43 } else { 44 total += 1 45 } 46 break 47 } 48 return total 49} 50 51// Typescript: layerForEntries(entries, fanout) -> (number?) 52func layerForEntries(entries []nodeEntry) int { 53 var firstLeaf nodeEntry 54 for _, e := range entries { 55 if e.isLeaf() { 56 firstLeaf = e 57 break 58 } 59 } 60 61 if firstLeaf.Kind == entryUndefined { 62 return -1 63 } 64 65 return leadingZerosOnHash(firstLeaf.Key) 66} 67 68// Typescript: deserializeNodeData(storage, data, layer) 69func deserializeNodeData(ctx context.Context, cst cbor.IpldStore, nd *NodeData, layer int) ([]nodeEntry, error) { 70 entries := []nodeEntry{} 71 if nd.Left != nil { 72 // Note: like Typescript, this is actually a lazy load 73 entries = append(entries, nodeEntry{ 74 Kind: entryTree, 75 Tree: createMST(cst, *nd.Left, nil, layer-1), 76 }) 77 } 78 79 var lastKey string 80 var keyb []byte // re-used between entries 81 for _, e := range nd.Entries { 82 if keyb == nil { 83 keyb = make([]byte, 0, int(e.PrefixLen)+len(e.KeySuffix)) 84 } 85 keyb = append(keyb[:0], lastKey[:e.PrefixLen]...) 86 keyb = append(keyb, e.KeySuffix...) 87 88 keyStr := string(keyb) 89 err := ensureValidMstKey(keyStr) 90 if err != nil { 91 return nil, err 92 } 93 94 entries = append(entries, nodeEntry{ 95 Kind: entryLeaf, 96 Key: keyStr, 97 Val: e.Val, 98 }) 99 100 if e.Tree != nil { 101 entries = append(entries, nodeEntry{ 102 Kind: entryTree, 103 Tree: createMST(cst, *e.Tree, nil, layer-1), 104 Key: keyStr, 105 }) 106 } 107 lastKey = keyStr 108 } 109 110 return entries, nil 111} 112 113// Typescript: serializeNodeData(entries) -> NodeData 114func serializeNodeData(entries []nodeEntry) (*NodeData, error) { 115 var data NodeData 116 117 i := 0 118 if len(entries) > 0 && entries[0].isTree() { 119 i++ 120 121 ptr, err := entries[0].Tree.GetPointer(context.TODO()) 122 if err != nil { 123 return nil, err 124 } 125 data.Left = &ptr 126 } 127 128 var lastKey string 129 for i < len(entries) { 130 leaf := entries[i] 131 132 if !leaf.isLeaf() { 133 return nil, fmt.Errorf("Not a valid node: two subtrees next to each other (%d, %d)", i, len(entries)) 134 } 135 i++ 136 137 var subtree *cid.Cid 138 139 if i < len(entries) { 140 next := entries[i] 141 142 if next.isTree() { 143 144 ptr, err := next.Tree.GetPointer(context.TODO()) 145 if err != nil { 146 return nil, fmt.Errorf("getting subtree pointer: %w", err) 147 } 148 149 subtree = &ptr 150 i++ 151 } 152 } 153 154 err := ensureValidMstKey(leaf.Key) 155 if err != nil { 156 return nil, err 157 } 158 159 prefixLen := countPrefixLen(lastKey, leaf.Key) 160 data.Entries = append(data.Entries, TreeEntry{ 161 PrefixLen: int64(prefixLen), 162 KeySuffix: []byte(leaf.Key)[prefixLen:], 163 Val: leaf.Val, 164 Tree: subtree, 165 }) 166 167 lastKey = leaf.Key 168 } 169 170 return &data, nil 171} 172 173// how many leading bytes are identical between the two strings? 174// Typescript: countPrefixLen(a: string, b: string) -> number 175func countPrefixLen(a, b string) int { 176 // This pattern avoids panicindex calls, as the Go compiler's prove pass can 177 // convince itself that neither a[i] nor b[i] are ever out of bounds. 178 var i int 179 for i = 0; i < len(a) && i < len(b); i++ { 180 if a[i] != b[i] { 181 return i 182 } 183 } 184 return i 185} 186 187// both computes *and* persists a tree entry; this is different from typescript 188// implementation 189// Typescript: cidForEntries(entries) -> CID 190func cidForEntries(ctx context.Context, entries []nodeEntry, cst cbor.IpldStore) (cid.Cid, error) { 191 nd, err := serializeNodeData(entries) 192 if err != nil { 193 return cid.Undef, fmt.Errorf("serializing new entries: %w", err) 194 } 195 196 return cst.Put(ctx, nd) 197} 198 199// keyHasAllValidChars reports whether s matches 200// the regexp /^[a-zA-Z0-9_:.-]+$/ without using regexp, 201// which is slower. 202func keyHasAllValidChars(s string) bool { 203 if len(s) == 0 { 204 return false 205 } 206 for i := 0; i < len(s); i++ { 207 b := s[i] 208 if 'a' <= b && b <= 'z' || 209 'A' <= b && b <= 'Z' || 210 '0' <= b && b <= '9' { 211 continue 212 } 213 switch b { 214 case '_', ':', '.', '-': 215 continue 216 default: 217 return false 218 } 219 } 220 return true 221} 222 223// Typescript: isValidMstKey(str) 224func isValidMstKey(s string) bool { 225 if len(s) > 256 || strings.Count(s, "/") != 1 { 226 return false 227 } 228 a, b, _ := strings.Cut(s, "/") 229 return len(b) > 0 && 230 keyHasAllValidChars(a) && 231 keyHasAllValidChars(b) 232} 233 234// Typescript: ensureValidMstKey(str) 235func ensureValidMstKey(s string) error { 236 if !isValidMstKey(s) { 237 return fmt.Errorf("Not a valid MST key: %s", s) 238 } 239 return nil 240}