1// Helpers for MST implementation. Following code split between mst.ts and util.ts in upstream Typescript implementation
2
3package mst
4
5import (
6 "context"
7 "fmt"
8 "strings"
9 "unsafe"
10
11 "github.com/ipfs/go-cid"
12 cbor "github.com/ipfs/go-ipld-cbor"
13 sha256 "github.com/minio/sha256-simd"
14)
15
16// Used to determine the "depth" of keys in an MST.
17// For atproto, repo v2, the "fanout" is always 4, so we count "zeros" in
18// chunks of 2-bits. Eg, a leading 0x00 byte is 4 "zeros".
19// Typescript: leadingZerosOnHash(key, fanout) -> number
20func leadingZerosOnHash(key string) int {
21 var b []byte
22 if len(key) > 0 {
23 b = unsafe.Slice(unsafe.StringData(key), len(key))
24 }
25 return leadingZerosOnHashBytes(b)
26}
27
28func leadingZerosOnHashBytes(key []byte) (total int) {
29 hv := sha256.Sum256(key)
30 for _, b := range hv {
31 if b&0xC0 != 0 {
32 // Common case. No leading pair of zero bits.
33 break
34 }
35 if b == 0x00 {
36 total += 4
37 continue
38 }
39 if b&0xFC == 0x00 {
40 total += 3
41 } else if b&0xF0 == 0x00 {
42 total += 2
43 } else {
44 total += 1
45 }
46 break
47 }
48 return total
49}
50
51// Typescript: layerForEntries(entries, fanout) -> (number?)
52func layerForEntries(entries []nodeEntry) int {
53 var firstLeaf nodeEntry
54 for _, e := range entries {
55 if e.isLeaf() {
56 firstLeaf = e
57 break
58 }
59 }
60
61 if firstLeaf.Kind == entryUndefined {
62 return -1
63 }
64
65 return leadingZerosOnHash(firstLeaf.Key)
66}
67
68// Typescript: deserializeNodeData(storage, data, layer)
69func deserializeNodeData(ctx context.Context, cst cbor.IpldStore, nd *NodeData, layer int) ([]nodeEntry, error) {
70 entries := []nodeEntry{}
71 if nd.Left != nil {
72 // Note: like Typescript, this is actually a lazy load
73 entries = append(entries, nodeEntry{
74 Kind: entryTree,
75 Tree: createMST(cst, *nd.Left, nil, layer-1),
76 })
77 }
78
79 var lastKey string
80 var keyb []byte // re-used between entries
81 for _, e := range nd.Entries {
82 if keyb == nil {
83 keyb = make([]byte, 0, int(e.PrefixLen)+len(e.KeySuffix))
84 }
85 keyb = append(keyb[:0], lastKey[:e.PrefixLen]...)
86 keyb = append(keyb, e.KeySuffix...)
87
88 keyStr := string(keyb)
89 err := ensureValidMstKey(keyStr)
90 if err != nil {
91 return nil, err
92 }
93
94 entries = append(entries, nodeEntry{
95 Kind: entryLeaf,
96 Key: keyStr,
97 Val: e.Val,
98 })
99
100 if e.Tree != nil {
101 entries = append(entries, nodeEntry{
102 Kind: entryTree,
103 Tree: createMST(cst, *e.Tree, nil, layer-1),
104 Key: keyStr,
105 })
106 }
107 lastKey = keyStr
108 }
109
110 return entries, nil
111}
112
113// Typescript: serializeNodeData(entries) -> NodeData
114func serializeNodeData(entries []nodeEntry) (*NodeData, error) {
115 var data NodeData
116
117 i := 0
118 if len(entries) > 0 && entries[0].isTree() {
119 i++
120
121 ptr, err := entries[0].Tree.GetPointer(context.TODO())
122 if err != nil {
123 return nil, err
124 }
125 data.Left = &ptr
126 }
127
128 var lastKey string
129 for i < len(entries) {
130 leaf := entries[i]
131
132 if !leaf.isLeaf() {
133 return nil, fmt.Errorf("Not a valid node: two subtrees next to each other (%d, %d)", i, len(entries))
134 }
135 i++
136
137 var subtree *cid.Cid
138
139 if i < len(entries) {
140 next := entries[i]
141
142 if next.isTree() {
143
144 ptr, err := next.Tree.GetPointer(context.TODO())
145 if err != nil {
146 return nil, fmt.Errorf("getting subtree pointer: %w", err)
147 }
148
149 subtree = &ptr
150 i++
151 }
152 }
153
154 err := ensureValidMstKey(leaf.Key)
155 if err != nil {
156 return nil, err
157 }
158
159 prefixLen := countPrefixLen(lastKey, leaf.Key)
160 data.Entries = append(data.Entries, TreeEntry{
161 PrefixLen: int64(prefixLen),
162 KeySuffix: []byte(leaf.Key)[prefixLen:],
163 Val: leaf.Val,
164 Tree: subtree,
165 })
166
167 lastKey = leaf.Key
168 }
169
170 return &data, nil
171}
172
173// how many leading bytes are identical between the two strings?
174// Typescript: countPrefixLen(a: string, b: string) -> number
175func countPrefixLen(a, b string) int {
176 // This pattern avoids panicindex calls, as the Go compiler's prove pass can
177 // convince itself that neither a[i] nor b[i] are ever out of bounds.
178 var i int
179 for i = 0; i < len(a) && i < len(b); i++ {
180 if a[i] != b[i] {
181 return i
182 }
183 }
184 return i
185}
186
187// both computes *and* persists a tree entry; this is different from typescript
188// implementation
189// Typescript: cidForEntries(entries) -> CID
190func cidForEntries(ctx context.Context, entries []nodeEntry, cst cbor.IpldStore) (cid.Cid, error) {
191 nd, err := serializeNodeData(entries)
192 if err != nil {
193 return cid.Undef, fmt.Errorf("serializing new entries: %w", err)
194 }
195
196 return cst.Put(ctx, nd)
197}
198
199// keyHasAllValidChars reports whether s matches
200// the regexp /^[a-zA-Z0-9_:.-]+$/ without using regexp,
201// which is slower.
202func keyHasAllValidChars(s string) bool {
203 if len(s) == 0 {
204 return false
205 }
206 for i := 0; i < len(s); i++ {
207 b := s[i]
208 if 'a' <= b && b <= 'z' ||
209 'A' <= b && b <= 'Z' ||
210 '0' <= b && b <= '9' {
211 continue
212 }
213 switch b {
214 case '_', ':', '.', '-':
215 continue
216 default:
217 return false
218 }
219 }
220 return true
221}
222
223// Typescript: isValidMstKey(str)
224func isValidMstKey(s string) bool {
225 if len(s) > 256 || strings.Count(s, "/") != 1 {
226 return false
227 }
228 a, b, _ := strings.Cut(s, "/")
229 return len(b) > 0 &&
230 keyHasAllValidChars(a) &&
231 keyHasAllValidChars(b)
232}
233
234// Typescript: ensureValidMstKey(str)
235func ensureValidMstKey(s string) error {
236 if !isValidMstKey(s) {
237 return fmt.Errorf("Not a valid MST key: %s", s)
238 }
239 return nil
240}