+133
-1124
Cargo.lock
+133
-1124
Cargo.lock
···
3
3
version = 4
4
4
5
5
[[package]]
6
-
name = "addr2line"
7
-
version = "0.25.1"
8
-
source = "registry+https://github.com/rust-lang/crates.io-index"
9
-
checksum = "1b5d307320b3181d6d7954e663bd7c774a838b8220fe0593c86d9fb09f498b4b"
10
-
dependencies = [
11
-
"gimli",
12
-
]
13
-
14
-
[[package]]
15
-
name = "adler2"
16
-
version = "2.0.1"
17
-
source = "registry+https://github.com/rust-lang/crates.io-index"
18
-
checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa"
19
-
20
-
[[package]]
21
-
name = "aho-corasick"
22
-
version = "1.1.3"
23
-
source = "registry+https://github.com/rust-lang/crates.io-index"
24
-
checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916"
25
-
dependencies = [
26
-
"memchr",
27
-
]
28
-
29
-
[[package]]
30
-
name = "anes"
31
-
version = "0.1.6"
32
-
source = "registry+https://github.com/rust-lang/crates.io-index"
33
-
checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299"
34
-
35
-
[[package]]
36
6
name = "anstream"
37
7
version = "0.6.21"
38
8
source = "registry+https://github.com/rust-lang/crates.io-index"
···
68
38
source = "registry+https://github.com/rust-lang/crates.io-index"
69
39
checksum = "9e231f6134f61b71076a3eab506c379d4f36122f2af15a9ff04415ea4c3339e2"
70
40
dependencies = [
71
-
"windows-sys 0.60.2",
41
+
"windows-sys",
72
42
]
73
43
74
44
[[package]]
···
79
49
dependencies = [
80
50
"anstyle",
81
51
"once_cell_polyfill",
82
-
"windows-sys 0.60.2",
83
-
]
84
-
85
-
[[package]]
86
-
name = "anyhow"
87
-
version = "1.0.100"
88
-
source = "registry+https://github.com/rust-lang/crates.io-index"
89
-
checksum = "a23eb6b1614318a8071c9b2521f36b424b2c83db5eb3a0fead4a6c0809af6e61"
90
-
91
-
[[package]]
92
-
name = "autocfg"
93
-
version = "1.5.0"
94
-
source = "registry+https://github.com/rust-lang/crates.io-index"
95
-
checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8"
96
-
97
-
[[package]]
98
-
name = "backtrace"
99
-
version = "0.3.76"
100
-
source = "registry+https://github.com/rust-lang/crates.io-index"
101
-
checksum = "bb531853791a215d7c62a30daf0dde835f381ab5de4589cfe7c649d2cbe92bd6"
102
-
dependencies = [
103
-
"addr2line",
104
-
"cfg-if",
105
-
"libc",
106
-
"miniz_oxide",
107
-
"object",
108
-
"rustc-demangle",
109
-
"windows-link",
110
-
]
111
-
112
-
[[package]]
113
-
name = "base-x"
114
-
version = "0.2.11"
115
-
source = "registry+https://github.com/rust-lang/crates.io-index"
116
-
checksum = "4cbbc9d0964165b47557570cce6c952866c2678457aca742aafc9fb771d30270"
117
-
118
-
[[package]]
119
-
name = "base256emoji"
120
-
version = "1.0.2"
121
-
source = "registry+https://github.com/rust-lang/crates.io-index"
122
-
checksum = "b5e9430d9a245a77c92176e649af6e275f20839a48389859d1661e9a128d077c"
123
-
dependencies = [
124
-
"const-str",
125
-
"match-lookup",
126
-
]
127
-
128
-
[[package]]
129
-
name = "bincode"
130
-
version = "2.0.1"
131
-
source = "registry+https://github.com/rust-lang/crates.io-index"
132
-
checksum = "36eaf5d7b090263e8150820482d5d93cd964a81e4019913c972f4edcc6edb740"
133
-
dependencies = [
134
-
"bincode_derive",
135
-
"serde",
136
-
"unty",
137
-
]
138
-
139
-
[[package]]
140
-
name = "bincode_derive"
141
-
version = "2.0.1"
142
-
source = "registry+https://github.com/rust-lang/crates.io-index"
143
-
checksum = "bf95709a440f45e986983918d0e8a1f30a9b1df04918fc828670606804ac3c09"
144
-
dependencies = [
145
-
"virtue",
52
+
"windows-sys",
146
53
]
147
54
148
55
[[package]]
···
152
59
checksum = "2261d10cca569e4643e526d8dc2e62e433cc8aba21ab764233731f8d369bf394"
153
60
154
61
[[package]]
155
-
name = "bumpalo"
156
-
version = "3.19.0"
157
-
source = "registry+https://github.com/rust-lang/crates.io-index"
158
-
checksum = "46c5e41b57b8bba42a04676d81cb89e9ee8e859a1a66f80a5a72e1cb76b34d43"
159
-
160
-
[[package]]
161
-
name = "bytes"
162
-
version = "1.10.1"
62
+
name = "byteorder-lite"
63
+
version = "0.1.0"
163
64
source = "registry+https://github.com/rust-lang/crates.io-index"
164
-
checksum = "d71b6127be86fdcfddb610f7182ac57211d4b18a3e9c82eb2d17662f2227ad6a"
65
+
checksum = "8f1fe948ff07f4bd06c30984e69f5b4899c516a3ef74f34df92a2df2ab535495"
165
66
166
67
[[package]]
167
-
name = "cast"
168
-
version = "0.3.0"
68
+
name = "byteview"
69
+
version = "0.10.0"
169
70
source = "registry+https://github.com/rust-lang/crates.io-index"
170
-
checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5"
171
-
172
-
[[package]]
173
-
name = "cbor4ii"
174
-
version = "0.2.14"
175
-
source = "registry+https://github.com/rust-lang/crates.io-index"
176
-
checksum = "b544cf8c89359205f4f990d0e6f3828db42df85b5dac95d09157a250eb0749c4"
177
-
dependencies = [
178
-
"serde",
179
-
]
71
+
checksum = "dda4398f387cc6395a3e93b3867cd9abda914c97a0b344d1eefb2e5c51785fca"
180
72
181
73
[[package]]
182
74
name = "cfg-if"
···
185
77
checksum = "2fd1289c04a9ea8cb22300a459a72a385d7c73d3259e2ed7dcb2af674838cfa9"
186
78
187
79
[[package]]
188
-
name = "ciborium"
189
-
version = "0.2.2"
190
-
source = "registry+https://github.com/rust-lang/crates.io-index"
191
-
checksum = "42e69ffd6f0917f5c029256a24d0161db17cea3997d185db0d35926308770f0e"
192
-
dependencies = [
193
-
"ciborium-io",
194
-
"ciborium-ll",
195
-
"serde",
196
-
]
197
-
198
-
[[package]]
199
-
name = "ciborium-io"
200
-
version = "0.2.2"
201
-
source = "registry+https://github.com/rust-lang/crates.io-index"
202
-
checksum = "05afea1e0a06c9be33d539b876f1ce3692f4afea2cb41f740e7743225ed1c757"
203
-
204
-
[[package]]
205
-
name = "ciborium-ll"
206
-
version = "0.2.2"
207
-
source = "registry+https://github.com/rust-lang/crates.io-index"
208
-
checksum = "57663b653d948a338bfb3eeba9bb2fd5fcfaecb9e199e87e1eda4d9e8b240fd9"
209
-
dependencies = [
210
-
"ciborium-io",
211
-
"half",
212
-
]
213
-
214
-
[[package]]
215
-
name = "cid"
216
-
version = "0.11.1"
217
-
source = "registry+https://github.com/rust-lang/crates.io-index"
218
-
checksum = "3147d8272e8fa0ccd29ce51194dd98f79ddfb8191ba9e3409884e751798acf3a"
219
-
dependencies = [
220
-
"core2",
221
-
"multibase",
222
-
"multihash",
223
-
"serde",
224
-
"serde_bytes",
225
-
"unsigned-varint 0.8.0",
226
-
]
227
-
228
-
[[package]]
229
80
name = "clap"
230
81
version = "4.5.48"
231
82
source = "registry+https://github.com/rust-lang/crates.io-index"
···
256
107
"heck",
257
108
"proc-macro2",
258
109
"quote",
259
-
"syn 2.0.106",
110
+
"syn",
260
111
]
261
112
262
113
[[package]]
···
272
123
checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75"
273
124
274
125
[[package]]
275
-
name = "const-str"
276
-
version = "0.4.3"
126
+
name = "compare"
127
+
version = "0.0.6"
277
128
source = "registry+https://github.com/rust-lang/crates.io-index"
278
-
checksum = "2f421161cb492475f1661ddc9815a745a1c894592070661180fdec3d4872e9c3"
129
+
checksum = "ea0095f6103c2a8b44acd6fd15960c801dafebf02e21940360833e0673f48ba7"
279
130
280
131
[[package]]
281
-
name = "core2"
282
-
version = "0.4.0"
132
+
name = "crossbeam-epoch"
133
+
version = "0.9.18"
283
134
source = "registry+https://github.com/rust-lang/crates.io-index"
284
-
checksum = "b49ba7ef1ad6107f8824dbe97de947cbaac53c44e7f9756a1fba0d37c1eec505"
135
+
checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e"
285
136
dependencies = [
286
-
"memchr",
137
+
"crossbeam-utils",
287
138
]
288
139
289
140
[[package]]
290
-
name = "criterion"
291
-
version = "0.7.0"
141
+
name = "crossbeam-skiplist"
142
+
version = "0.1.3"
292
143
source = "registry+https://github.com/rust-lang/crates.io-index"
293
-
checksum = "e1c047a62b0cc3e145fa84415a3191f628e980b194c2755aa12300a4e6cbd928"
294
-
dependencies = [
295
-
"anes",
296
-
"cast",
297
-
"ciborium",
298
-
"clap",
299
-
"criterion-plot",
300
-
"itertools",
301
-
"num-traits",
302
-
"oorandom",
303
-
"plotters",
304
-
"rayon",
305
-
"regex",
306
-
"serde",
307
-
"serde_json",
308
-
"tinytemplate",
309
-
"tokio",
310
-
"walkdir",
311
-
]
312
-
313
-
[[package]]
314
-
name = "criterion-plot"
315
-
version = "0.6.0"
316
-
source = "registry+https://github.com/rust-lang/crates.io-index"
317
-
checksum = "9b1bcc0dc7dfae599d84ad0b1a55f80cde8af3725da8313b528da95ef783e338"
318
-
dependencies = [
319
-
"cast",
320
-
"itertools",
321
-
]
322
-
323
-
[[package]]
324
-
name = "crossbeam-deque"
325
-
version = "0.8.6"
326
-
source = "registry+https://github.com/rust-lang/crates.io-index"
327
-
checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51"
144
+
checksum = "df29de440c58ca2cc6e587ec3d22347551a32435fbde9d2bff64e78a9ffa151b"
328
145
dependencies = [
329
146
"crossbeam-epoch",
330
147
"crossbeam-utils",
331
148
]
332
149
333
150
[[package]]
334
-
name = "crossbeam-epoch"
335
-
version = "0.9.18"
336
-
source = "registry+https://github.com/rust-lang/crates.io-index"
337
-
checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e"
338
-
dependencies = [
339
-
"crossbeam-utils",
340
-
]
341
-
342
-
[[package]]
343
151
name = "crossbeam-utils"
344
152
version = "0.8.21"
345
153
source = "registry+https://github.com/rust-lang/crates.io-index"
346
154
checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28"
347
155
348
156
[[package]]
349
-
name = "crunchy"
350
-
version = "0.2.4"
157
+
name = "dashmap"
158
+
version = "6.1.0"
351
159
source = "registry+https://github.com/rust-lang/crates.io-index"
352
-
checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5"
353
-
354
-
[[package]]
355
-
name = "data-encoding"
356
-
version = "2.9.0"
357
-
source = "registry+https://github.com/rust-lang/crates.io-index"
358
-
checksum = "2a2330da5de22e8a3cb63252ce2abb30116bf5265e89c0e01bc17015ce30a476"
359
-
360
-
[[package]]
361
-
name = "data-encoding-macro"
362
-
version = "0.1.18"
363
-
source = "registry+https://github.com/rust-lang/crates.io-index"
364
-
checksum = "47ce6c96ea0102f01122a185683611bd5ac8d99e62bc59dd12e6bda344ee673d"
160
+
checksum = "5041cc499144891f3790297212f32a74fb938e5136a14943f338ef9e0ae276cf"
365
161
dependencies = [
366
-
"data-encoding",
367
-
"data-encoding-macro-internal",
162
+
"cfg-if",
163
+
"crossbeam-utils",
164
+
"hashbrown 0.14.5",
165
+
"lock_api",
166
+
"once_cell",
167
+
"parking_lot_core",
368
168
]
369
169
370
170
[[package]]
371
-
name = "data-encoding-macro-internal"
372
-
version = "0.1.16"
373
-
source = "registry+https://github.com/rust-lang/crates.io-index"
374
-
checksum = "8d162beedaa69905488a8da94f5ac3edb4dd4788b732fadb7bd120b2625c1976"
375
-
dependencies = [
376
-
"data-encoding",
377
-
"syn 2.0.106",
378
-
]
379
-
380
-
[[package]]
381
-
name = "either"
382
-
version = "1.15.0"
383
-
source = "registry+https://github.com/rust-lang/crates.io-index"
384
-
checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719"
385
-
386
-
[[package]]
387
-
name = "env_filter"
388
-
version = "0.1.3"
171
+
name = "enum_dispatch"
172
+
version = "0.3.13"
389
173
source = "registry+https://github.com/rust-lang/crates.io-index"
390
-
checksum = "186e05a59d4c50738528153b83b0b0194d3a29507dfec16eccd4b342903397d0"
174
+
checksum = "aa18ce2bc66555b3218614519ac839ddb759a7d6720732f979ef8d13be147ecd"
391
175
dependencies = [
392
-
"log",
393
-
"regex",
176
+
"once_cell",
177
+
"proc-macro2",
178
+
"quote",
179
+
"syn",
394
180
]
395
181
396
182
[[package]]
397
-
name = "env_logger"
398
-
version = "0.11.8"
183
+
name = "equivalent"
184
+
version = "1.0.2"
399
185
source = "registry+https://github.com/rust-lang/crates.io-index"
400
-
checksum = "13c863f0904021b108aa8b2f55046443e6b1ebde8fd4a15c399893aae4fa069f"
401
-
dependencies = [
402
-
"anstream",
403
-
"anstyle",
404
-
"env_filter",
405
-
"jiff",
406
-
"log",
407
-
]
186
+
checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f"
408
187
409
188
[[package]]
410
189
name = "errno"
···
413
192
checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb"
414
193
dependencies = [
415
194
"libc",
416
-
"windows-sys 0.60.2",
195
+
"windows-sys",
417
196
]
418
197
419
198
[[package]]
420
-
name = "fallible-iterator"
421
-
version = "0.3.0"
422
-
source = "registry+https://github.com/rust-lang/crates.io-index"
423
-
checksum = "2acce4a10f12dc2fb14a218589d4f1f62ef011b2d0cc4b3cb1bba8e94da14649"
424
-
425
-
[[package]]
426
-
name = "fallible-streaming-iterator"
427
-
version = "0.1.9"
428
-
source = "registry+https://github.com/rust-lang/crates.io-index"
429
-
checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a"
430
-
431
-
[[package]]
432
199
name = "fastrand"
433
200
version = "2.3.0"
434
201
source = "registry+https://github.com/rust-lang/crates.io-index"
435
202
checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be"
436
203
437
204
[[package]]
438
-
name = "foldhash"
439
-
version = "0.1.5"
205
+
name = "fjall"
206
+
version = "3.0.1"
440
207
source = "registry+https://github.com/rust-lang/crates.io-index"
441
-
checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2"
442
-
443
-
[[package]]
444
-
name = "futures"
445
-
version = "0.3.31"
446
-
source = "registry+https://github.com/rust-lang/crates.io-index"
447
-
checksum = "65bc07b1a8bc7c85c5f2e110c476c7389b4554ba72af57d8445ea63a576b0876"
208
+
checksum = "4f69637c02d38ad1b0f003101d0195a60368130aa17d9ef78b1557d265a22093"
448
209
dependencies = [
449
-
"futures-channel",
450
-
"futures-core",
451
-
"futures-executor",
452
-
"futures-io",
453
-
"futures-sink",
454
-
"futures-task",
455
-
"futures-util",
210
+
"byteorder-lite",
211
+
"byteview",
212
+
"dashmap",
213
+
"flume",
214
+
"log",
215
+
"lsm-tree",
216
+
"lz4_flex",
217
+
"tempfile",
218
+
"xxhash-rust",
456
219
]
457
220
458
221
[[package]]
459
-
name = "futures-channel"
460
-
version = "0.3.31"
222
+
name = "flume"
223
+
version = "0.12.0"
461
224
source = "registry+https://github.com/rust-lang/crates.io-index"
462
-
checksum = "2dff15bf788c671c1934e366d07e30c1814a8ef514e1af724a602e8a2fbe1b10"
225
+
checksum = "5e139bc46ca777eb5efaf62df0ab8cc5fd400866427e56c68b22e414e53bd3be"
463
226
dependencies = [
464
-
"futures-core",
465
-
"futures-sink",
466
-
]
467
-
468
-
[[package]]
469
-
name = "futures-core"
470
-
version = "0.3.31"
471
-
source = "registry+https://github.com/rust-lang/crates.io-index"
472
-
checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e"
473
-
474
-
[[package]]
475
-
name = "futures-executor"
476
-
version = "0.3.31"
477
-
source = "registry+https://github.com/rust-lang/crates.io-index"
478
-
checksum = "1e28d1d997f585e54aebc3f97d39e72338912123a67330d723fdbb564d646c9f"
479
-
dependencies = [
480
-
"futures-core",
481
-
"futures-task",
482
-
"futures-util",
483
-
]
484
-
485
-
[[package]]
486
-
name = "futures-io"
487
-
version = "0.3.31"
488
-
source = "registry+https://github.com/rust-lang/crates.io-index"
489
-
checksum = "9e5c1b78ca4aae1ac06c48a526a655760685149f0d465d21f37abfe57ce075c6"
490
-
491
-
[[package]]
492
-
name = "futures-macro"
493
-
version = "0.3.31"
494
-
source = "registry+https://github.com/rust-lang/crates.io-index"
495
-
checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650"
496
-
dependencies = [
497
-
"proc-macro2",
498
-
"quote",
499
-
"syn 2.0.106",
500
-
]
501
-
502
-
[[package]]
503
-
name = "futures-sink"
504
-
version = "0.3.31"
505
-
source = "registry+https://github.com/rust-lang/crates.io-index"
506
-
checksum = "e575fab7d1e0dcb8d0c7bcf9a63ee213816ab51902e6d244a95819acacf1d4f7"
507
-
508
-
[[package]]
509
-
name = "futures-task"
510
-
version = "0.3.31"
511
-
source = "registry+https://github.com/rust-lang/crates.io-index"
512
-
checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988"
513
-
514
-
[[package]]
515
-
name = "futures-util"
516
-
version = "0.3.31"
517
-
source = "registry+https://github.com/rust-lang/crates.io-index"
518
-
checksum = "9fa08315bb612088cc391249efdc3bc77536f16c91f6cf495e6fbe85b20a4a81"
519
-
dependencies = [
520
-
"futures-channel",
521
-
"futures-core",
522
-
"futures-io",
523
-
"futures-macro",
524
-
"futures-sink",
525
-
"futures-task",
526
-
"memchr",
527
-
"pin-project-lite",
528
-
"pin-utils",
529
-
"slab",
227
+
"spin",
530
228
]
531
229
532
230
[[package]]
···
538
236
"cfg-if",
539
237
"libc",
540
238
"r-efi",
541
-
"wasi 0.14.7+wasi-0.2.4",
542
-
]
543
-
544
-
[[package]]
545
-
name = "gimli"
546
-
version = "0.32.3"
547
-
source = "registry+https://github.com/rust-lang/crates.io-index"
548
-
checksum = "e629b9b98ef3dd8afe6ca2bd0f89306cec16d43d907889945bc5d6687f2f13c7"
549
-
550
-
[[package]]
551
-
name = "half"
552
-
version = "2.7.0"
553
-
source = "registry+https://github.com/rust-lang/crates.io-index"
554
-
checksum = "e54c115d4f30f52c67202f079c5f9d8b49db4691f460fdb0b4c2e838261b2ba5"
555
-
dependencies = [
556
-
"cfg-if",
557
-
"crunchy",
558
-
"zerocopy",
239
+
"wasi",
559
240
]
560
241
561
242
[[package]]
562
243
name = "hashbrown"
563
-
version = "0.15.5"
244
+
version = "0.14.5"
564
245
source = "registry+https://github.com/rust-lang/crates.io-index"
565
-
checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1"
566
-
dependencies = [
567
-
"foldhash",
568
-
]
246
+
checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1"
569
247
570
248
[[package]]
571
-
name = "hashlink"
572
-
version = "0.10.0"
249
+
name = "hashbrown"
250
+
version = "0.16.1"
573
251
source = "registry+https://github.com/rust-lang/crates.io-index"
574
-
checksum = "7382cf6263419f2d8df38c55d7da83da5c18aef87fc7a7fc1fb1e344edfe14c1"
575
-
dependencies = [
576
-
"hashbrown",
577
-
]
252
+
checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100"
578
253
579
254
[[package]]
580
255
name = "heck"
···
583
258
checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
584
259
585
260
[[package]]
586
-
name = "io-uring"
587
-
version = "0.7.10"
588
-
source = "registry+https://github.com/rust-lang/crates.io-index"
589
-
checksum = "046fa2d4d00aea763528b4950358d0ead425372445dc8ff86312b3c69ff7727b"
590
-
dependencies = [
591
-
"bitflags",
592
-
"cfg-if",
593
-
"libc",
594
-
]
595
-
596
-
[[package]]
597
-
name = "ipld-core"
598
-
version = "0.4.2"
599
-
source = "registry+https://github.com/rust-lang/crates.io-index"
600
-
checksum = "104718b1cc124d92a6d01ca9c9258a7df311405debb3408c445a36452f9bf8db"
601
-
dependencies = [
602
-
"cid",
603
-
"serde",
604
-
"serde_bytes",
605
-
]
606
-
607
-
[[package]]
608
-
name = "iroh-car"
609
-
version = "0.5.1"
261
+
name = "interval-heap"
262
+
version = "0.0.5"
610
263
source = "registry+https://github.com/rust-lang/crates.io-index"
611
-
checksum = "cb7f8cd4cb9aa083fba8b52e921764252d0b4dcb1cd6d120b809dbfe1106e81a"
264
+
checksum = "11274e5e8e89b8607cfedc2910b6626e998779b48a019151c7604d0adcb86ac6"
612
265
dependencies = [
613
-
"anyhow",
614
-
"cid",
615
-
"futures",
616
-
"serde",
617
-
"serde_ipld_dagcbor",
618
-
"thiserror 1.0.69",
619
-
"tokio",
620
-
"unsigned-varint 0.7.2",
266
+
"compare",
621
267
]
622
268
623
269
[[package]]
···
627
273
checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf"
628
274
629
275
[[package]]
630
-
name = "itertools"
631
-
version = "0.13.0"
632
-
source = "registry+https://github.com/rust-lang/crates.io-index"
633
-
checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186"
634
-
dependencies = [
635
-
"either",
636
-
]
637
-
638
-
[[package]]
639
-
name = "itoa"
640
-
version = "1.0.15"
641
-
source = "registry+https://github.com/rust-lang/crates.io-index"
642
-
checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c"
643
-
644
-
[[package]]
645
-
name = "jiff"
646
-
version = "0.2.15"
647
-
source = "registry+https://github.com/rust-lang/crates.io-index"
648
-
checksum = "be1f93b8b1eb69c77f24bbb0afdf66f54b632ee39af40ca21c4365a1d7347e49"
649
-
dependencies = [
650
-
"jiff-static",
651
-
"log",
652
-
"portable-atomic",
653
-
"portable-atomic-util",
654
-
"serde",
655
-
]
656
-
657
-
[[package]]
658
-
name = "jiff-static"
659
-
version = "0.2.15"
660
-
source = "registry+https://github.com/rust-lang/crates.io-index"
661
-
checksum = "03343451ff899767262ec32146f6d559dd759fdadf42ff0e227c7c48f72594b4"
662
-
dependencies = [
663
-
"proc-macro2",
664
-
"quote",
665
-
"syn 2.0.106",
666
-
]
667
-
668
-
[[package]]
669
-
name = "js-sys"
670
-
version = "0.3.81"
671
-
source = "registry+https://github.com/rust-lang/crates.io-index"
672
-
checksum = "ec48937a97411dcb524a265206ccd4c90bb711fca92b2792c407f268825b9305"
673
-
dependencies = [
674
-
"once_cell",
675
-
"wasm-bindgen",
676
-
]
677
-
678
-
[[package]]
679
276
name = "libc"
680
277
version = "0.2.176"
681
278
source = "registry+https://github.com/rust-lang/crates.io-index"
682
279
checksum = "58f929b4d672ea937a23a1ab494143d968337a5f47e56d0815df1e0890ddf174"
683
-
684
-
[[package]]
685
-
name = "libsqlite3-sys"
686
-
version = "0.35.0"
687
-
source = "registry+https://github.com/rust-lang/crates.io-index"
688
-
checksum = "133c182a6a2c87864fe97778797e46c7e999672690dc9fa3ee8e241aa4a9c13f"
689
-
dependencies = [
690
-
"pkg-config",
691
-
"vcpkg",
692
-
]
693
280
694
281
[[package]]
695
282
name = "linux-raw-sys"
···
713
300
checksum = "34080505efa8e45a4b816c349525ebe327ceaa8559756f0356cba97ef3bf7432"
714
301
715
302
[[package]]
716
-
name = "match-lookup"
717
-
version = "0.1.1"
303
+
name = "lsm-tree"
304
+
version = "3.0.1"
718
305
source = "registry+https://github.com/rust-lang/crates.io-index"
719
-
checksum = "1265724d8cb29dbbc2b0f06fffb8bf1a8c0cf73a78eede9ba73a4a66c52a981e"
306
+
checksum = "b875f1dfe14f557f805b167fb9b0fc54c5560c7a4bd6ae02535b2846f276a8cb"
720
307
dependencies = [
721
-
"proc-macro2",
722
-
"quote",
723
-
"syn 1.0.109",
724
-
]
725
-
726
-
[[package]]
727
-
name = "memchr"
728
-
version = "2.7.6"
729
-
source = "registry+https://github.com/rust-lang/crates.io-index"
730
-
checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273"
731
-
732
-
[[package]]
733
-
name = "miniz_oxide"
734
-
version = "0.8.9"
735
-
source = "registry+https://github.com/rust-lang/crates.io-index"
736
-
checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316"
737
-
dependencies = [
738
-
"adler2",
739
-
]
740
-
741
-
[[package]]
742
-
name = "mio"
743
-
version = "1.0.4"
744
-
source = "registry+https://github.com/rust-lang/crates.io-index"
745
-
checksum = "78bed444cc8a2160f01cbcf811ef18cac863ad68ae8ca62092e8db51d51c761c"
746
-
dependencies = [
747
-
"libc",
748
-
"wasi 0.11.1+wasi-snapshot-preview1",
749
-
"windows-sys 0.59.0",
750
-
]
751
-
752
-
[[package]]
753
-
name = "multibase"
754
-
version = "0.9.2"
755
-
source = "registry+https://github.com/rust-lang/crates.io-index"
756
-
checksum = "8694bb4835f452b0e3bb06dbebb1d6fc5385b6ca1caf2e55fd165c042390ec77"
757
-
dependencies = [
758
-
"base-x",
759
-
"base256emoji",
760
-
"data-encoding",
761
-
"data-encoding-macro",
308
+
"byteorder-lite",
309
+
"byteview",
310
+
"crossbeam-skiplist",
311
+
"enum_dispatch",
312
+
"interval-heap",
313
+
"log",
314
+
"lz4_flex",
315
+
"quick_cache",
316
+
"rustc-hash",
317
+
"self_cell",
318
+
"sfa",
319
+
"tempfile",
320
+
"varint-rs",
321
+
"xxhash-rust",
762
322
]
763
323
764
324
[[package]]
765
-
name = "multihash"
766
-
version = "0.19.3"
325
+
name = "lz4_flex"
326
+
version = "0.11.5"
767
327
source = "registry+https://github.com/rust-lang/crates.io-index"
768
-
checksum = "6b430e7953c29dd6a09afc29ff0bb69c6e306329ee6794700aee27b76a1aea8d"
328
+
checksum = "08ab2867e3eeeca90e844d1940eab391c9dc5228783db2ed999acbc0a9ed375a"
769
329
dependencies = [
770
-
"core2",
771
-
"serde",
772
-
"unsigned-varint 0.8.0",
773
-
]
774
-
775
-
[[package]]
776
-
name = "num-traits"
777
-
version = "0.2.19"
778
-
source = "registry+https://github.com/rust-lang/crates.io-index"
779
-
checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841"
780
-
dependencies = [
781
-
"autocfg",
782
-
]
783
-
784
-
[[package]]
785
-
name = "object"
786
-
version = "0.37.3"
787
-
source = "registry+https://github.com/rust-lang/crates.io-index"
788
-
checksum = "ff76201f031d8863c38aa7f905eca4f53abbfa15f609db4277d44cd8938f33fe"
789
-
dependencies = [
790
-
"memchr",
330
+
"twox-hash",
791
331
]
792
332
793
333
[[package]]
···
803
343
checksum = "a4895175b425cb1f87721b59f0f286c2092bd4af812243672510e1ac53e2e0ad"
804
344
805
345
[[package]]
806
-
name = "oorandom"
807
-
version = "11.1.5"
808
-
source = "registry+https://github.com/rust-lang/crates.io-index"
809
-
checksum = "d6790f58c7ff633d8771f42965289203411a5e5c68388703c06e14f24770b41e"
810
-
811
-
[[package]]
812
-
name = "parking_lot"
813
-
version = "0.12.5"
814
-
source = "registry+https://github.com/rust-lang/crates.io-index"
815
-
checksum = "93857453250e3077bd71ff98b6a65ea6621a19bb0f559a85248955ac12c45a1a"
816
-
dependencies = [
817
-
"lock_api",
818
-
"parking_lot_core",
819
-
]
820
-
821
-
[[package]]
822
346
name = "parking_lot_core"
823
347
version = "0.9.12"
824
348
source = "registry+https://github.com/rust-lang/crates.io-index"
···
832
356
]
833
357
834
358
[[package]]
835
-
name = "pin-project-lite"
836
-
version = "0.2.16"
837
-
source = "registry+https://github.com/rust-lang/crates.io-index"
838
-
checksum = "3b3cff922bd51709b605d9ead9aa71031d81447142d828eb4a6eba76fe619f9b"
839
-
840
-
[[package]]
841
-
name = "pin-utils"
842
-
version = "0.1.0"
843
-
source = "registry+https://github.com/rust-lang/crates.io-index"
844
-
checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184"
845
-
846
-
[[package]]
847
-
name = "pkg-config"
848
-
version = "0.3.32"
359
+
name = "proc-macro2"
360
+
version = "1.0.101"
849
361
source = "registry+https://github.com/rust-lang/crates.io-index"
850
-
checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c"
851
-
852
-
[[package]]
853
-
name = "plotters"
854
-
version = "0.3.7"
855
-
source = "registry+https://github.com/rust-lang/crates.io-index"
856
-
checksum = "5aeb6f403d7a4911efb1e33402027fc44f29b5bf6def3effcc22d7bb75f2b747"
362
+
checksum = "89ae43fd86e4158d6db51ad8e2b80f313af9cc74f5c0e03ccb87de09998732de"
857
363
dependencies = [
858
-
"num-traits",
859
-
"plotters-backend",
860
-
"plotters-svg",
861
-
"wasm-bindgen",
862
-
"web-sys",
364
+
"unicode-ident",
863
365
]
864
366
865
367
[[package]]
866
-
name = "plotters-backend"
867
-
version = "0.3.7"
868
-
source = "registry+https://github.com/rust-lang/crates.io-index"
869
-
checksum = "df42e13c12958a16b3f7f4386b9ab1f3e7933914ecea48da7139435263a4172a"
870
-
871
-
[[package]]
872
-
name = "plotters-svg"
873
-
version = "0.3.7"
368
+
name = "quick_cache"
369
+
version = "0.6.18"
874
370
source = "registry+https://github.com/rust-lang/crates.io-index"
875
-
checksum = "51bae2ac328883f7acdfea3d66a7c35751187f870bc81f94563733a154d7a670"
371
+
checksum = "7ada44a88ef953a3294f6eb55d2007ba44646015e18613d2f213016379203ef3"
876
372
dependencies = [
877
-
"plotters-backend",
878
-
]
879
-
880
-
[[package]]
881
-
name = "portable-atomic"
882
-
version = "1.11.1"
883
-
source = "registry+https://github.com/rust-lang/crates.io-index"
884
-
checksum = "f84267b20a16ea918e43c6a88433c2d54fa145c92a811b5b047ccbe153674483"
885
-
886
-
[[package]]
887
-
name = "portable-atomic-util"
888
-
version = "0.2.4"
889
-
source = "registry+https://github.com/rust-lang/crates.io-index"
890
-
checksum = "d8a2f0d8d040d7848a709caf78912debcc3f33ee4b3cac47d73d1e1069e83507"
891
-
dependencies = [
892
-
"portable-atomic",
893
-
]
894
-
895
-
[[package]]
896
-
name = "proc-macro2"
897
-
version = "1.0.101"
898
-
source = "registry+https://github.com/rust-lang/crates.io-index"
899
-
checksum = "89ae43fd86e4158d6db51ad8e2b80f313af9cc74f5c0e03ccb87de09998732de"
900
-
dependencies = [
901
-
"unicode-ident",
373
+
"equivalent",
374
+
"hashbrown 0.16.1",
902
375
]
903
376
904
377
[[package]]
···
917
390
checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f"
918
391
919
392
[[package]]
920
-
name = "rayon"
921
-
version = "1.11.0"
922
-
source = "registry+https://github.com/rust-lang/crates.io-index"
923
-
checksum = "368f01d005bf8fd9b1206fb6fa653e6c4a81ceb1466406b81792d87c5677a58f"
924
-
dependencies = [
925
-
"either",
926
-
"rayon-core",
927
-
]
928
-
929
-
[[package]]
930
-
name = "rayon-core"
931
-
version = "1.13.0"
932
-
source = "registry+https://github.com/rust-lang/crates.io-index"
933
-
checksum = "22e18b0f0062d30d4230b2e85ff77fdfe4326feb054b9783a3460d8435c8ab91"
934
-
dependencies = [
935
-
"crossbeam-deque",
936
-
"crossbeam-utils",
937
-
]
938
-
939
-
[[package]]
940
-
name = "redb"
941
-
version = "3.1.0"
942
-
source = "registry+https://github.com/rust-lang/crates.io-index"
943
-
checksum = "ae323eb086579a3769daa2c753bb96deb95993c534711e0dbe881b5192906a06"
944
-
dependencies = [
945
-
"libc",
946
-
]
947
-
948
-
[[package]]
949
393
name = "redox_syscall"
950
394
version = "0.5.18"
951
395
source = "registry+https://github.com/rust-lang/crates.io-index"
···
955
399
]
956
400
957
401
[[package]]
958
-
name = "regex"
959
-
version = "1.11.3"
960
-
source = "registry+https://github.com/rust-lang/crates.io-index"
961
-
checksum = "8b5288124840bee7b386bc413c487869b360b2b4ec421ea56425128692f2a82c"
962
-
dependencies = [
963
-
"aho-corasick",
964
-
"memchr",
965
-
"regex-automata",
966
-
"regex-syntax",
967
-
]
968
-
969
-
[[package]]
970
-
name = "regex-automata"
971
-
version = "0.4.11"
972
-
source = "registry+https://github.com/rust-lang/crates.io-index"
973
-
checksum = "833eb9ce86d40ef33cb1306d8accf7bc8ec2bfea4355cbdebb3df68b40925cad"
974
-
dependencies = [
975
-
"aho-corasick",
976
-
"memchr",
977
-
"regex-syntax",
978
-
]
979
-
980
-
[[package]]
981
-
name = "regex-syntax"
982
-
version = "0.8.6"
983
-
source = "registry+https://github.com/rust-lang/crates.io-index"
984
-
checksum = "caf4aa5b0f434c91fe5c7f1ecb6a5ece2130b02ad2a590589dda5146df959001"
985
-
986
-
[[package]]
987
402
name = "repo-stream"
988
-
version = "0.1.1"
403
+
version = "0.2.2"
989
404
dependencies = [
990
-
"bincode",
991
405
"clap",
992
-
"criterion",
993
-
"env_logger",
994
-
"futures",
995
-
"futures-core",
996
-
"ipld-core",
997
-
"iroh-car",
998
-
"log",
999
-
"multibase",
1000
-
"redb",
1001
-
"rusqlite",
1002
-
"serde",
1003
-
"serde_bytes",
1004
-
"serde_ipld_dagcbor",
1005
-
"tempfile",
1006
-
"thiserror 2.0.17",
1007
-
"tokio",
1008
-
]
1009
-
1010
-
[[package]]
1011
-
name = "rusqlite"
1012
-
version = "0.37.0"
1013
-
source = "registry+https://github.com/rust-lang/crates.io-index"
1014
-
checksum = "165ca6e57b20e1351573e3729b958bc62f0e48025386970b6e4d29e7a7e71f3f"
1015
-
dependencies = [
1016
-
"bitflags",
1017
-
"fallible-iterator",
1018
-
"fallible-streaming-iterator",
1019
-
"hashlink",
1020
-
"libsqlite3-sys",
1021
-
"smallvec",
406
+
"fjall",
1022
407
]
1023
408
1024
409
[[package]]
1025
-
name = "rustc-demangle"
1026
-
version = "0.1.26"
410
+
name = "rustc-hash"
411
+
version = "2.1.1"
1027
412
source = "registry+https://github.com/rust-lang/crates.io-index"
1028
-
checksum = "56f7d92ca342cea22a06f2121d944b4fd82af56988c270852495420f961d4ace"
413
+
checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d"
1029
414
1030
415
[[package]]
1031
416
name = "rustix"
···
1037
422
"errno",
1038
423
"libc",
1039
424
"linux-raw-sys",
1040
-
"windows-sys 0.60.2",
1041
-
]
1042
-
1043
-
[[package]]
1044
-
name = "rustversion"
1045
-
version = "1.0.22"
1046
-
source = "registry+https://github.com/rust-lang/crates.io-index"
1047
-
checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d"
1048
-
1049
-
[[package]]
1050
-
name = "ryu"
1051
-
version = "1.0.20"
1052
-
source = "registry+https://github.com/rust-lang/crates.io-index"
1053
-
checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f"
1054
-
1055
-
[[package]]
1056
-
name = "same-file"
1057
-
version = "1.0.6"
1058
-
source = "registry+https://github.com/rust-lang/crates.io-index"
1059
-
checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502"
1060
-
dependencies = [
1061
-
"winapi-util",
425
+
"windows-sys",
1062
426
]
1063
427
1064
428
[[package]]
···
1068
432
checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
1069
433
1070
434
[[package]]
1071
-
name = "serde"
1072
-
version = "1.0.228"
435
+
name = "self_cell"
436
+
version = "1.2.2"
1073
437
source = "registry+https://github.com/rust-lang/crates.io-index"
1074
-
checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e"
1075
-
dependencies = [
1076
-
"serde_core",
1077
-
"serde_derive",
1078
-
]
438
+
checksum = "b12e76d157a900eb52e81bc6e9f3069344290341720e9178cde2407113ac8d89"
1079
439
1080
440
[[package]]
1081
-
name = "serde_bytes"
1082
-
version = "0.11.19"
441
+
name = "sfa"
442
+
version = "1.0.0"
1083
443
source = "registry+https://github.com/rust-lang/crates.io-index"
1084
-
checksum = "a5d440709e79d88e51ac01c4b72fc6cb7314017bb7da9eeff678aa94c10e3ea8"
444
+
checksum = "a1296838937cab56cd6c4eeeb8718ec777383700c33f060e2869867bd01d1175"
1085
445
dependencies = [
1086
-
"serde",
1087
-
"serde_core",
446
+
"byteorder-lite",
447
+
"log",
448
+
"xxhash-rust",
1088
449
]
1089
450
1090
451
[[package]]
1091
-
name = "serde_core"
1092
-
version = "1.0.228"
1093
-
source = "registry+https://github.com/rust-lang/crates.io-index"
1094
-
checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad"
1095
-
dependencies = [
1096
-
"serde_derive",
1097
-
]
1098
-
1099
-
[[package]]
1100
-
name = "serde_derive"
1101
-
version = "1.0.228"
1102
-
source = "registry+https://github.com/rust-lang/crates.io-index"
1103
-
checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79"
1104
-
dependencies = [
1105
-
"proc-macro2",
1106
-
"quote",
1107
-
"syn 2.0.106",
1108
-
]
1109
-
1110
-
[[package]]
1111
-
name = "serde_ipld_dagcbor"
1112
-
version = "0.6.4"
1113
-
source = "registry+https://github.com/rust-lang/crates.io-index"
1114
-
checksum = "46182f4f08349a02b45c998ba3215d3f9de826246ba02bb9dddfe9a2a2100778"
1115
-
dependencies = [
1116
-
"cbor4ii",
1117
-
"ipld-core",
1118
-
"scopeguard",
1119
-
"serde",
1120
-
]
1121
-
1122
-
[[package]]
1123
-
name = "serde_json"
1124
-
version = "1.0.145"
1125
-
source = "registry+https://github.com/rust-lang/crates.io-index"
1126
-
checksum = "402a6f66d8c709116cf22f558eab210f5a50187f702eb4d7e5ef38d9a7f1c79c"
1127
-
dependencies = [
1128
-
"itoa",
1129
-
"memchr",
1130
-
"ryu",
1131
-
"serde",
1132
-
"serde_core",
1133
-
]
1134
-
1135
-
[[package]]
1136
-
name = "signal-hook-registry"
1137
-
version = "1.4.6"
1138
-
source = "registry+https://github.com/rust-lang/crates.io-index"
1139
-
checksum = "b2a4719bff48cee6b39d12c020eeb490953ad2443b7055bd0b21fca26bd8c28b"
1140
-
dependencies = [
1141
-
"libc",
1142
-
]
1143
-
1144
-
[[package]]
1145
-
name = "slab"
1146
-
version = "0.4.11"
1147
-
source = "registry+https://github.com/rust-lang/crates.io-index"
1148
-
checksum = "7a2ae44ef20feb57a68b23d846850f861394c2e02dc425a50098ae8c90267589"
1149
-
1150
-
[[package]]
1151
452
name = "smallvec"
1152
453
version = "1.15.1"
1153
454
source = "registry+https://github.com/rust-lang/crates.io-index"
1154
455
checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03"
1155
456
1156
457
[[package]]
1157
-
name = "socket2"
1158
-
version = "0.6.0"
458
+
name = "spin"
459
+
version = "0.9.8"
1159
460
source = "registry+https://github.com/rust-lang/crates.io-index"
1160
-
checksum = "233504af464074f9d066d7b5416c5f9b894a5862a6506e306f7b816cdd6f1807"
461
+
checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67"
1161
462
dependencies = [
1162
-
"libc",
1163
-
"windows-sys 0.59.0",
463
+
"lock_api",
1164
464
]
1165
465
1166
466
[[package]]
···
1171
471
1172
472
[[package]]
1173
473
name = "syn"
1174
-
version = "1.0.109"
1175
-
source = "registry+https://github.com/rust-lang/crates.io-index"
1176
-
checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237"
1177
-
dependencies = [
1178
-
"proc-macro2",
1179
-
"quote",
1180
-
"unicode-ident",
1181
-
]
1182
-
1183
-
[[package]]
1184
-
name = "syn"
1185
474
version = "2.0.106"
1186
475
source = "registry+https://github.com/rust-lang/crates.io-index"
1187
476
checksum = "ede7c438028d4436d71104916910f5bb611972c5cfd7f89b8300a8186e6fada6"
···
1201
490
"getrandom",
1202
491
"once_cell",
1203
492
"rustix",
1204
-
"windows-sys 0.60.2",
1205
-
]
1206
-
1207
-
[[package]]
1208
-
name = "thiserror"
1209
-
version = "1.0.69"
1210
-
source = "registry+https://github.com/rust-lang/crates.io-index"
1211
-
checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52"
1212
-
dependencies = [
1213
-
"thiserror-impl 1.0.69",
1214
-
]
1215
-
1216
-
[[package]]
1217
-
name = "thiserror"
1218
-
version = "2.0.17"
1219
-
source = "registry+https://github.com/rust-lang/crates.io-index"
1220
-
checksum = "f63587ca0f12b72a0600bcba1d40081f830876000bb46dd2337a3051618f4fc8"
1221
-
dependencies = [
1222
-
"thiserror-impl 2.0.17",
1223
-
]
1224
-
1225
-
[[package]]
1226
-
name = "thiserror-impl"
1227
-
version = "1.0.69"
1228
-
source = "registry+https://github.com/rust-lang/crates.io-index"
1229
-
checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1"
1230
-
dependencies = [
1231
-
"proc-macro2",
1232
-
"quote",
1233
-
"syn 2.0.106",
493
+
"windows-sys",
1234
494
]
1235
495
1236
496
[[package]]
1237
-
name = "thiserror-impl"
1238
-
version = "2.0.17"
497
+
name = "twox-hash"
498
+
version = "2.1.2"
1239
499
source = "registry+https://github.com/rust-lang/crates.io-index"
1240
-
checksum = "3ff15c8ecd7de3849db632e14d18d2571fa09dfc5ed93479bc4485c7a517c913"
1241
-
dependencies = [
1242
-
"proc-macro2",
1243
-
"quote",
1244
-
"syn 2.0.106",
1245
-
]
1246
-
1247
-
[[package]]
1248
-
name = "tinytemplate"
1249
-
version = "1.2.1"
1250
-
source = "registry+https://github.com/rust-lang/crates.io-index"
1251
-
checksum = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc"
1252
-
dependencies = [
1253
-
"serde",
1254
-
"serde_json",
1255
-
]
1256
-
1257
-
[[package]]
1258
-
name = "tokio"
1259
-
version = "1.47.1"
1260
-
source = "registry+https://github.com/rust-lang/crates.io-index"
1261
-
checksum = "89e49afdadebb872d3145a5638b59eb0691ea23e46ca484037cfab3b76b95038"
1262
-
dependencies = [
1263
-
"backtrace",
1264
-
"bytes",
1265
-
"io-uring",
1266
-
"libc",
1267
-
"mio",
1268
-
"parking_lot",
1269
-
"pin-project-lite",
1270
-
"signal-hook-registry",
1271
-
"slab",
1272
-
"socket2",
1273
-
"tokio-macros",
1274
-
"windows-sys 0.59.0",
1275
-
]
1276
-
1277
-
[[package]]
1278
-
name = "tokio-macros"
1279
-
version = "2.5.0"
1280
-
source = "registry+https://github.com/rust-lang/crates.io-index"
1281
-
checksum = "6e06d43f1345a3bcd39f6a56dbb7dcab2ba47e68e8ac134855e7e2bdbaf8cab8"
1282
-
dependencies = [
1283
-
"proc-macro2",
1284
-
"quote",
1285
-
"syn 2.0.106",
1286
-
]
500
+
checksum = "9ea3136b675547379c4bd395ca6b938e5ad3c3d20fad76e7fe85f9e0d011419c"
1287
501
1288
502
[[package]]
1289
503
name = "unicode-ident"
···
1292
506
checksum = "f63a545481291138910575129486daeaf8ac54aee4387fe7906919f7830c7d9d"
1293
507
1294
508
[[package]]
1295
-
name = "unsigned-varint"
1296
-
version = "0.7.2"
1297
-
source = "registry+https://github.com/rust-lang/crates.io-index"
1298
-
checksum = "6889a77d49f1f013504cec6bf97a2c730394adedaeb1deb5ea08949a50541105"
1299
-
1300
-
[[package]]
1301
-
name = "unsigned-varint"
1302
-
version = "0.8.0"
1303
-
source = "registry+https://github.com/rust-lang/crates.io-index"
1304
-
checksum = "eb066959b24b5196ae73cb057f45598450d2c5f71460e98c49b738086eff9c06"
1305
-
1306
-
[[package]]
1307
-
name = "unty"
1308
-
version = "0.0.4"
1309
-
source = "registry+https://github.com/rust-lang/crates.io-index"
1310
-
checksum = "6d49784317cd0d1ee7ec5c716dd598ec5b4483ea832a2dced265471cc0f690ae"
1311
-
1312
-
[[package]]
1313
509
name = "utf8parse"
1314
510
version = "0.2.2"
1315
511
source = "registry+https://github.com/rust-lang/crates.io-index"
1316
512
checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821"
1317
513
1318
514
[[package]]
1319
-
name = "vcpkg"
1320
-
version = "0.2.15"
1321
-
source = "registry+https://github.com/rust-lang/crates.io-index"
1322
-
checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426"
1323
-
1324
-
[[package]]
1325
-
name = "virtue"
1326
-
version = "0.0.18"
1327
-
source = "registry+https://github.com/rust-lang/crates.io-index"
1328
-
checksum = "051eb1abcf10076295e815102942cc58f9d5e3b4560e46e53c21e8ff6f3af7b1"
1329
-
1330
-
[[package]]
1331
-
name = "walkdir"
1332
-
version = "2.5.0"
1333
-
source = "registry+https://github.com/rust-lang/crates.io-index"
1334
-
checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b"
1335
-
dependencies = [
1336
-
"same-file",
1337
-
"winapi-util",
1338
-
]
1339
-
1340
-
[[package]]
1341
-
name = "wasi"
1342
-
version = "0.11.1+wasi-snapshot-preview1"
515
+
name = "varint-rs"
516
+
version = "2.2.0"
1343
517
source = "registry+https://github.com/rust-lang/crates.io-index"
1344
-
checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b"
518
+
checksum = "8f54a172d0620933a27a4360d3db3e2ae0dd6cceae9730751a036bbf182c4b23"
1345
519
1346
520
[[package]]
1347
521
name = "wasi"
···
1362
536
]
1363
537
1364
538
[[package]]
1365
-
name = "wasm-bindgen"
1366
-
version = "0.2.104"
1367
-
source = "registry+https://github.com/rust-lang/crates.io-index"
1368
-
checksum = "c1da10c01ae9f1ae40cbfac0bac3b1e724b320abfcf52229f80b547c0d250e2d"
1369
-
dependencies = [
1370
-
"cfg-if",
1371
-
"once_cell",
1372
-
"rustversion",
1373
-
"wasm-bindgen-macro",
1374
-
"wasm-bindgen-shared",
1375
-
]
1376
-
1377
-
[[package]]
1378
-
name = "wasm-bindgen-backend"
1379
-
version = "0.2.104"
1380
-
source = "registry+https://github.com/rust-lang/crates.io-index"
1381
-
checksum = "671c9a5a66f49d8a47345ab942e2cb93c7d1d0339065d4f8139c486121b43b19"
1382
-
dependencies = [
1383
-
"bumpalo",
1384
-
"log",
1385
-
"proc-macro2",
1386
-
"quote",
1387
-
"syn 2.0.106",
1388
-
"wasm-bindgen-shared",
1389
-
]
1390
-
1391
-
[[package]]
1392
-
name = "wasm-bindgen-macro"
1393
-
version = "0.2.104"
1394
-
source = "registry+https://github.com/rust-lang/crates.io-index"
1395
-
checksum = "7ca60477e4c59f5f2986c50191cd972e3a50d8a95603bc9434501cf156a9a119"
1396
-
dependencies = [
1397
-
"quote",
1398
-
"wasm-bindgen-macro-support",
1399
-
]
1400
-
1401
-
[[package]]
1402
-
name = "wasm-bindgen-macro-support"
1403
-
version = "0.2.104"
1404
-
source = "registry+https://github.com/rust-lang/crates.io-index"
1405
-
checksum = "9f07d2f20d4da7b26400c9f4a0511e6e0345b040694e8a75bd41d578fa4421d7"
1406
-
dependencies = [
1407
-
"proc-macro2",
1408
-
"quote",
1409
-
"syn 2.0.106",
1410
-
"wasm-bindgen-backend",
1411
-
"wasm-bindgen-shared",
1412
-
]
1413
-
1414
-
[[package]]
1415
-
name = "wasm-bindgen-shared"
1416
-
version = "0.2.104"
1417
-
source = "registry+https://github.com/rust-lang/crates.io-index"
1418
-
checksum = "bad67dc8b2a1a6e5448428adec4c3e84c43e561d8c9ee8a9e5aabeb193ec41d1"
1419
-
dependencies = [
1420
-
"unicode-ident",
1421
-
]
1422
-
1423
-
[[package]]
1424
-
name = "web-sys"
1425
-
version = "0.3.81"
1426
-
source = "registry+https://github.com/rust-lang/crates.io-index"
1427
-
checksum = "9367c417a924a74cae129e6a2ae3b47fabb1f8995595ab474029da749a8be120"
1428
-
dependencies = [
1429
-
"js-sys",
1430
-
"wasm-bindgen",
1431
-
]
1432
-
1433
-
[[package]]
1434
-
name = "winapi-util"
1435
-
version = "0.1.11"
1436
-
source = "registry+https://github.com/rust-lang/crates.io-index"
1437
-
checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22"
1438
-
dependencies = [
1439
-
"windows-sys 0.60.2",
1440
-
]
1441
-
1442
-
[[package]]
1443
539
name = "windows-link"
1444
540
version = "0.2.1"
1445
541
source = "registry+https://github.com/rust-lang/crates.io-index"
···
1447
543
1448
544
[[package]]
1449
545
name = "windows-sys"
1450
-
version = "0.59.0"
1451
-
source = "registry+https://github.com/rust-lang/crates.io-index"
1452
-
checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b"
1453
-
dependencies = [
1454
-
"windows-targets 0.52.6",
1455
-
]
1456
-
1457
-
[[package]]
1458
-
name = "windows-sys"
1459
546
version = "0.60.2"
1460
547
source = "registry+https://github.com/rust-lang/crates.io-index"
1461
548
checksum = "f2f500e4d28234f72040990ec9d39e3a6b950f9f22d3dba18416c35882612bcb"
1462
549
dependencies = [
1463
-
"windows-targets 0.53.5",
1464
-
]
1465
-
1466
-
[[package]]
1467
-
name = "windows-targets"
1468
-
version = "0.52.6"
1469
-
source = "registry+https://github.com/rust-lang/crates.io-index"
1470
-
checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973"
1471
-
dependencies = [
1472
-
"windows_aarch64_gnullvm 0.52.6",
1473
-
"windows_aarch64_msvc 0.52.6",
1474
-
"windows_i686_gnu 0.52.6",
1475
-
"windows_i686_gnullvm 0.52.6",
1476
-
"windows_i686_msvc 0.52.6",
1477
-
"windows_x86_64_gnu 0.52.6",
1478
-
"windows_x86_64_gnullvm 0.52.6",
1479
-
"windows_x86_64_msvc 0.52.6",
550
+
"windows-targets",
1480
551
]
1481
552
1482
553
[[package]]
···
1486
557
checksum = "4945f9f551b88e0d65f3db0bc25c33b8acea4d9e41163edf90dcd0b19f9069f3"
1487
558
dependencies = [
1488
559
"windows-link",
1489
-
"windows_aarch64_gnullvm 0.53.1",
1490
-
"windows_aarch64_msvc 0.53.1",
1491
-
"windows_i686_gnu 0.53.1",
1492
-
"windows_i686_gnullvm 0.53.1",
1493
-
"windows_i686_msvc 0.53.1",
1494
-
"windows_x86_64_gnu 0.53.1",
1495
-
"windows_x86_64_gnullvm 0.53.1",
1496
-
"windows_x86_64_msvc 0.53.1",
560
+
"windows_aarch64_gnullvm",
561
+
"windows_aarch64_msvc",
562
+
"windows_i686_gnu",
563
+
"windows_i686_gnullvm",
564
+
"windows_i686_msvc",
565
+
"windows_x86_64_gnu",
566
+
"windows_x86_64_gnullvm",
567
+
"windows_x86_64_msvc",
1497
568
]
1498
569
1499
570
[[package]]
1500
571
name = "windows_aarch64_gnullvm"
1501
-
version = "0.52.6"
1502
-
source = "registry+https://github.com/rust-lang/crates.io-index"
1503
-
checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3"
1504
-
1505
-
[[package]]
1506
-
name = "windows_aarch64_gnullvm"
1507
572
version = "0.53.1"
1508
573
source = "registry+https://github.com/rust-lang/crates.io-index"
1509
574
checksum = "a9d8416fa8b42f5c947f8482c43e7d89e73a173cead56d044f6a56104a6d1b53"
1510
575
1511
576
[[package]]
1512
577
name = "windows_aarch64_msvc"
1513
-
version = "0.52.6"
1514
-
source = "registry+https://github.com/rust-lang/crates.io-index"
1515
-
checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469"
1516
-
1517
-
[[package]]
1518
-
name = "windows_aarch64_msvc"
1519
578
version = "0.53.1"
1520
579
source = "registry+https://github.com/rust-lang/crates.io-index"
1521
580
checksum = "b9d782e804c2f632e395708e99a94275910eb9100b2114651e04744e9b125006"
1522
581
1523
582
[[package]]
1524
583
name = "windows_i686_gnu"
1525
-
version = "0.52.6"
1526
-
source = "registry+https://github.com/rust-lang/crates.io-index"
1527
-
checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b"
1528
-
1529
-
[[package]]
1530
-
name = "windows_i686_gnu"
1531
584
version = "0.53.1"
1532
585
source = "registry+https://github.com/rust-lang/crates.io-index"
1533
586
checksum = "960e6da069d81e09becb0ca57a65220ddff016ff2d6af6a223cf372a506593a3"
1534
587
1535
588
[[package]]
1536
589
name = "windows_i686_gnullvm"
1537
-
version = "0.52.6"
1538
-
source = "registry+https://github.com/rust-lang/crates.io-index"
1539
-
checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66"
1540
-
1541
-
[[package]]
1542
-
name = "windows_i686_gnullvm"
1543
590
version = "0.53.1"
1544
591
source = "registry+https://github.com/rust-lang/crates.io-index"
1545
592
checksum = "fa7359d10048f68ab8b09fa71c3daccfb0e9b559aed648a8f95469c27057180c"
1546
593
1547
594
[[package]]
1548
595
name = "windows_i686_msvc"
1549
-
version = "0.52.6"
1550
-
source = "registry+https://github.com/rust-lang/crates.io-index"
1551
-
checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66"
1552
-
1553
-
[[package]]
1554
-
name = "windows_i686_msvc"
1555
596
version = "0.53.1"
1556
597
source = "registry+https://github.com/rust-lang/crates.io-index"
1557
598
checksum = "1e7ac75179f18232fe9c285163565a57ef8d3c89254a30685b57d83a38d326c2"
1558
599
1559
600
[[package]]
1560
601
name = "windows_x86_64_gnu"
1561
-
version = "0.52.6"
1562
-
source = "registry+https://github.com/rust-lang/crates.io-index"
1563
-
checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78"
1564
-
1565
-
[[package]]
1566
-
name = "windows_x86_64_gnu"
1567
602
version = "0.53.1"
1568
603
source = "registry+https://github.com/rust-lang/crates.io-index"
1569
604
checksum = "9c3842cdd74a865a8066ab39c8a7a473c0778a3f29370b5fd6b4b9aa7df4a499"
1570
605
1571
606
[[package]]
1572
607
name = "windows_x86_64_gnullvm"
1573
-
version = "0.52.6"
1574
-
source = "registry+https://github.com/rust-lang/crates.io-index"
1575
-
checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d"
1576
-
1577
-
[[package]]
1578
-
name = "windows_x86_64_gnullvm"
1579
608
version = "0.53.1"
1580
609
source = "registry+https://github.com/rust-lang/crates.io-index"
1581
610
checksum = "0ffa179e2d07eee8ad8f57493436566c7cc30ac536a3379fdf008f47f6bb7ae1"
1582
-
1583
-
[[package]]
1584
-
name = "windows_x86_64_msvc"
1585
-
version = "0.52.6"
1586
-
source = "registry+https://github.com/rust-lang/crates.io-index"
1587
-
checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
1588
611
1589
612
[[package]]
1590
613
name = "windows_x86_64_msvc"
···
1599
622
checksum = "f17a85883d4e6d00e8a97c586de764dabcc06133f7f1d55dce5cdc070ad7fe59"
1600
623
1601
624
[[package]]
1602
-
name = "zerocopy"
1603
-
version = "0.8.27"
625
+
name = "xxhash-rust"
626
+
version = "0.8.15"
1604
627
source = "registry+https://github.com/rust-lang/crates.io-index"
1605
-
checksum = "0894878a5fa3edfd6da3f88c4805f4c8558e2b996227a3d864f47fe11e38282c"
1606
-
dependencies = [
1607
-
"zerocopy-derive",
1608
-
]
1609
-
1610
-
[[package]]
1611
-
name = "zerocopy-derive"
1612
-
version = "0.8.27"
1613
-
source = "registry+https://github.com/rust-lang/crates.io-index"
1614
-
checksum = "88d2b8d9c68ad2b9e4340d7832716a4d21a22a1154777ad56ea55c51a9cf3831"
1615
-
dependencies = [
1616
-
"proc-macro2",
1617
-
"quote",
1618
-
"syn 2.0.106",
1619
-
]
628
+
checksum = "fdd20c5420375476fbd4394763288da7eb0cc0b8c11deed431a91562af7335d3"
+3
-37
Cargo.toml
+3
-37
Cargo.toml
···
1
1
[package]
2
2
name = "repo-stream"
3
-
version = "0.1.1"
3
+
version = "0.2.2"
4
4
edition = "2024"
5
5
license = "MIT OR Apache-2.0"
6
-
description = "Fast and robust atproto CAR file processing in rust"
6
+
description = "A robust CAR file -> MST walker for atproto"
7
7
repository = "https://tangled.org/@microcosm.blue/repo-stream"
8
8
9
9
[dependencies]
10
-
bincode = { version = "2.0.1", features = ["serde"] }
11
-
futures = "0.3.31"
12
-
futures-core = "0.3.31"
13
-
ipld-core = { version = "0.4.2", features = ["serde"] }
14
-
iroh-car = "0.5.1"
15
-
log = "0.4.28"
16
-
multibase = "0.9.2"
17
-
redb = "3.1.0"
18
-
rusqlite = "0.37.0"
19
-
serde = { version = "1.0.228", features = ["derive"] }
20
-
serde_bytes = "0.11.19"
21
-
serde_ipld_dagcbor = "0.6.4"
22
-
thiserror = "2.0.17"
23
-
tokio = { version = "1.47.1", features = ["rt"] }
24
-
25
-
[dev-dependencies]
10
+
fjall = "3.0.1"
26
11
clap = { version = "4.5.48", features = ["derive"] }
27
-
criterion = { version = "0.7.0", features = ["async_tokio"] }
28
-
env_logger = "0.11.8"
29
-
multibase = "0.9.2"
30
-
tempfile = "3.23.0"
31
-
tokio = { version = "1.47.1", features = ["full"] }
32
-
33
-
[profile.profiling]
34
-
inherits = "release"
35
-
debug = true
36
12
37
-
[profile.release]
38
-
debug = true
39
-
40
-
[[bench]]
41
-
name = "non-huge-cars"
42
-
harness = false
43
-
44
-
[[bench]]
45
-
name = "huge-car"
46
-
harness = false
+12
-21
benches/huge-car.rs
+12
-21
benches/huge-car.rs
···
1
1
extern crate repo_stream;
2
-
use futures::TryStreamExt;
3
-
use iroh_car::CarReader;
4
-
use std::convert::Infallible;
2
+
use repo_stream::Driver;
5
3
use std::path::{Path, PathBuf};
6
4
7
5
use criterion::{Criterion, criterion_group, criterion_main};
···
20
18
});
21
19
}
22
20
23
-
async fn drive_car(filename: impl AsRef<Path>) {
21
+
async fn drive_car(filename: impl AsRef<Path>) -> usize {
24
22
let reader = tokio::fs::File::open(filename).await.unwrap();
25
23
let reader = tokio::io::BufReader::new(reader);
26
-
let reader = CarReader::new(reader).await.unwrap();
27
24
28
-
let root = reader
29
-
.header()
30
-
.roots()
31
-
.first()
32
-
.ok_or("missing root")
25
+
let mut driver = match Driver::load_car(reader, |block| block.len(), 1024)
26
+
.await
33
27
.unwrap()
34
-
.clone();
35
-
36
-
let stream = std::pin::pin!(reader.stream());
37
-
38
-
let (_commit, v) =
39
-
repo_stream::drive::Vehicle::init(root, stream, |block| Ok::<_, Infallible>(block.len()))
40
-
.await
41
-
.unwrap();
42
-
let mut record_stream = std::pin::pin!(v.stream());
28
+
{
29
+
Driver::Memory(_, mem_driver) => mem_driver,
30
+
Driver::Disk(_) => panic!("not doing disk for benchmark"),
31
+
};
43
32
44
-
while let Some(_) = record_stream.try_next().await.unwrap() {
45
-
// just here for the drive
33
+
let mut n = 0;
34
+
while let Some(pairs) = driver.next_chunk(256).await.unwrap() {
35
+
n += pairs.len();
46
36
}
37
+
n
47
38
}
48
39
49
40
criterion_group!(benches, criterion_benchmark);
+16
-22
benches/non-huge-cars.rs
+16
-22
benches/non-huge-cars.rs
···
1
1
extern crate repo_stream;
2
-
use futures::TryStreamExt;
3
-
use iroh_car::CarReader;
4
-
use std::convert::Infallible;
2
+
use repo_stream::Driver;
5
3
6
4
use criterion::{Criterion, criterion_group, criterion_main};
7
5
6
+
const EMPTY_CAR: &'static [u8] = include_bytes!("../car-samples/empty.car");
8
7
const TINY_CAR: &'static [u8] = include_bytes!("../car-samples/tiny.car");
9
8
const LITTLE_CAR: &'static [u8] = include_bytes!("../car-samples/little.car");
10
9
const MIDSIZE_CAR: &'static [u8] = include_bytes!("../car-samples/midsize.car");
···
15
14
.build()
16
15
.expect("Creating runtime failed");
17
16
17
+
c.bench_function("empty-car", |b| {
18
+
b.to_async(&rt).iter(async || drive_car(EMPTY_CAR).await)
19
+
});
18
20
c.bench_function("tiny-car", |b| {
19
21
b.to_async(&rt).iter(async || drive_car(TINY_CAR).await)
20
22
});
···
26
28
});
27
29
}
28
30
29
-
async fn drive_car(bytes: &[u8]) {
30
-
let reader = CarReader::new(bytes).await.unwrap();
31
-
32
-
let root = reader
33
-
.header()
34
-
.roots()
35
-
.first()
36
-
.ok_or("missing root")
31
+
async fn drive_car(bytes: &[u8]) -> usize {
32
+
let mut driver = match Driver::load_car(bytes, |block| block.len(), 32)
33
+
.await
37
34
.unwrap()
38
-
.clone();
39
-
40
-
let stream = std::pin::pin!(reader.stream());
35
+
{
36
+
Driver::Memory(_, mem_driver) => mem_driver,
37
+
Driver::Disk(_) => panic!("not benching big cars here"),
38
+
};
41
39
42
-
let (_commit, v) =
43
-
repo_stream::drive::Vehicle::init(root, stream, |block| Ok::<_, Infallible>(block.len()))
44
-
.await
45
-
.unwrap();
46
-
let mut record_stream = std::pin::pin!(v.stream());
47
-
48
-
while let Some(_) = record_stream.try_next().await.unwrap() {
49
-
// just here for the drive
40
+
let mut n = 0;
41
+
while let Some(pairs) = driver.next_chunk(256).await.unwrap() {
42
+
n += pairs.len();
50
43
}
44
+
n
51
45
}
52
46
53
47
criterion_group!(benches, criterion_benchmark);
car-samples/empty.car
car-samples/empty.car
This is a binary file and will not be displayed.
+27
-47
examples/disk-read-file/main.rs
+27
-47
examples/disk-read-file/main.rs
···
1
-
extern crate repo_stream;
2
1
use clap::Parser;
3
-
use repo_stream::drive::Processable;
4
-
use serde::{Deserialize, Serialize};
5
-
use std::path::PathBuf;
6
-
7
-
type Result<T> = std::result::Result<T, Box<dyn std::error::Error>>;
2
+
use fjall::{Database, KeyspaceCreateOptions};
3
+
use std::{path::PathBuf, collections::BTreeMap};
8
4
9
5
#[derive(Debug, Parser)]
10
6
struct Args {
11
7
#[arg()]
12
-
car: PathBuf,
13
-
#[arg()]
14
-
tmpfile: PathBuf,
8
+
db_path: PathBuf,
15
9
}
16
10
17
-
#[derive(Clone, Serialize, Deserialize)]
18
-
struct S(usize);
11
+
fn main() -> Result<(), Box<dyn std::error::Error>> {
12
+
let Args { db_path } = Args::parse();
19
13
20
-
impl Processable for S {
21
-
fn get_size(&self) -> usize {
22
-
0 // no additional space taken, just its stack size (newtype is free)
23
-
}
24
-
}
25
-
26
-
#[tokio::main]
27
-
async fn main() -> Result<()> {
28
-
env_logger::init();
14
+
let db = Database::builder(db_path).open()?;
15
+
let ks = db.keyspace("z", KeyspaceCreateOptions::default)?;
16
+
let mut seen_keys: BTreeMap<Vec<u8>, usize> = BTreeMap::default();
29
17
30
-
let Args { car, tmpfile } = Args::parse();
31
-
let reader = tokio::fs::File::open(car).await?;
32
-
let reader = tokio::io::BufReader::new(reader);
33
-
34
-
// let kb = 2_usize.pow(10);
35
-
let mb = 2_usize.pow(20);
18
+
print!("writing...");
19
+
for i in 0..250_000_usize {
20
+
let k = i.to_be_bytes().to_vec();
21
+
ks.insert(k.clone(), vec![0xAA; 256])?;
22
+
seen_keys.insert(k, i);
23
+
}
36
24
37
-
let mut driver =
38
-
match repo_stream::drive::load_car(reader, |block| S(block.len()), 16 * mb).await? {
39
-
repo_stream::drive::Vehicle::Lil(_, _) => panic!("try this on a bigger car"),
40
-
repo_stream::drive::Vehicle::Big(big_stuff) => {
41
-
// let disk_store = repo_stream::disk::SqliteStore::new(tmpfile);
42
-
let disk_store = repo_stream::disk::RedbStore::new(tmpfile);
43
-
let (commit, driver) = big_stuff.finish_loading(disk_store).await?;
44
-
log::warn!("big: {:?}", commit);
45
-
driver
46
-
}
47
-
};
25
+
println!(" done. checking keys...");
48
26
49
-
println!("hello!");
27
+
// remove every seen key that fjall actually has, to see what's left
28
+
for guard in ks.iter() {
29
+
seen_keys.remove(guard.key()?.as_ref());
30
+
}
50
31
51
-
let mut n = 0;
52
-
loop {
53
-
let (d, Some(pairs)) = driver.next_chunk(256).await? else {
54
-
break;
55
-
};
56
-
driver = d;
57
-
n += pairs.len();
58
-
// log::info!("got {rkey:?}");
32
+
// report the result
33
+
if seen_keys.len() == 0 {
34
+
println!("[ OK ] all keys found");
35
+
} else {
36
+
println!("[FAIL] fjall did not have all seen_keys:");
37
+
for (k, i) in seen_keys {
38
+
println!(" insert #{i} missing, key bytes: {k:?}");
39
+
}
59
40
}
60
-
log::info!("bye! {n}");
61
41
62
42
Ok(())
63
43
}
+14
-6
examples/read-file/main.rs
+14
-6
examples/read-file/main.rs
···
1
+
/*!
2
+
Read a CAR file with in-memory processing
3
+
*/
4
+
1
5
extern crate repo_stream;
2
6
use clap::Parser;
7
+
use repo_stream::{Driver, DriverBuilder};
3
8
use std::path::PathBuf;
4
9
5
10
type Result<T> = std::result::Result<T, Box<dyn std::error::Error>>;
···
18
23
let reader = tokio::fs::File::open(file).await?;
19
24
let reader = tokio::io::BufReader::new(reader);
20
25
21
-
let (commit, mut driver) =
22
-
match repo_stream::drive::load_car(reader, |block| block.len(), 1024 * 1024).await? {
23
-
repo_stream::drive::Vehicle::Lil(commit, mem_driver) => (commit, mem_driver),
24
-
repo_stream::drive::Vehicle::Big(_) => panic!("can't handle big cars yet"),
25
-
};
26
+
let (commit, mut driver) = match DriverBuilder::new()
27
+
.with_block_processor(|block| block.len())
28
+
.load_car(reader)
29
+
.await?
30
+
{
31
+
Driver::Memory(commit, mem_driver) => (commit, mem_driver),
32
+
Driver::Disk(_) => panic!("this example doesn't handle big CARs"),
33
+
};
26
34
27
35
log::info!("got commit: {commit:?}");
28
36
···
31
39
n += pairs.len();
32
40
// log::info!("got {rkey:?}");
33
41
}
34
-
log::info!("bye! {n}");
42
+
log::info!("bye! total records={n}");
35
43
36
44
Ok(())
37
45
}
+70
-2
readme.md
+70
-2
readme.md
···
1
1
# repo-stream
2
2
3
-
Fast and (aspirationally) robust atproto CAR file processing in rust
3
+
A robust CAR file -> MST walker for atproto
4
+
5
+
[![Crates.io][crates-badge]](https://crates.io/crates/repo-stream)
6
+
[![Documentation][docs-badge]](https://docs.rs/repo-stream)
7
+
[![Sponsor][sponsor-badge]](https://github.com/sponsors/uniphil)
8
+
9
+
[crates-badge]: https://img.shields.io/crates/v/repo-stream.svg
10
+
[docs-badge]: https://docs.rs/repo-stream/badge.svg
11
+
[sponsor-badge]: https://img.shields.io/badge/at-microcosm-b820f9?labelColor=b820f9&logo=githubsponsors&logoColor=fff
12
+
13
+
```rust
14
+
use repo_stream::{Driver, DriverBuilder, DriveError, DiskBuilder};
15
+
16
+
#[tokio::main]
17
+
async fn main() -> Result<(), DriveError> {
18
+
// repo-stream takes any AsyncRead as input, like a tokio::fs::File
19
+
let reader = tokio::fs::File::open("repo.car".into()).await?;
20
+
let reader = tokio::io::BufReader::new(reader);
21
+
22
+
// example repo workload is simply counting the total record bytes
23
+
let mut total_size = 0;
24
+
25
+
match DriverBuilder::new()
26
+
.with_mem_limit_mb(10)
27
+
.with_block_processor(|rec| rec.len()) // block processing: just extract the raw record size
28
+
.load_car(reader)
29
+
.await?
30
+
{
31
+
32
+
// if all blocks fit within memory
33
+
Driver::Memory(_commit, mut driver) => {
34
+
while let Some(chunk) = driver.next_chunk(256).await? {
35
+
for (_rkey, size) in chunk {
36
+
total_size += size;
37
+
}
38
+
}
39
+
},
40
+
41
+
// if the CAR was too big for in-memory processing
42
+
Driver::Disk(paused) => {
43
+
// set up a disk store we can spill to
44
+
let store = DiskBuilder::new().open("some/path.db".into()).await?;
45
+
// do the spilling, get back a (similar) driver
46
+
let (_commit, mut driver) = paused.finish_loading(store).await?;
47
+
48
+
while let Some(chunk) = driver.next_chunk(256).await? {
49
+
for (_rkey, size) in chunk {
50
+
total_size += size;
51
+
}
52
+
}
53
+
54
+
// clean up the disk store (drop tables etc)
55
+
driver.reset_store().await?;
56
+
}
57
+
};
58
+
println!("sum of size of all records: {total_size}");
59
+
Ok(())
60
+
}
61
+
```
62
+
63
+
more recent todo
64
+
65
+
- [ ] get an *emtpy* car for the test suite
66
+
- [x] implement a max size on disk limit
67
+
68
+
69
+
-----
70
+
71
+
older stuff (to clean up):
4
72
5
73
6
74
current car processing times (records processed into their length usize, phil's dev machine):
···
27
95
-> yeah the commit is returned from init
28
96
- [ ] spec compliance todos
29
97
- [x] assert that keys are ordered and fail if not
30
-
- [ ] verify node mst depth from key (possibly pending [interop test fixes](https://github.com/bluesky-social/atproto-interop-tests/issues/5))
98
+
- [x] verify node mst depth from key (possibly pending [interop test fixes](https://github.com/bluesky-social/atproto-interop-tests/issues/5))
31
99
- [ ] performance todos
32
100
- [x] consume the serialized nodes into a mutable efficient format
33
101
- [ ] maybe customize the deserialize impl to do that directly?
-211
src/disk.rs
-211
src/disk.rs
···
1
-
use redb::ReadableDatabase;
2
-
use rusqlite::OptionalExtension;
3
-
use std::error::Error;
4
-
use std::path::PathBuf;
5
-
6
-
pub trait StorageErrorBase: Error + Send + 'static {}
7
-
8
-
/// high level potential storage resource
9
-
///
10
-
/// separating this allows (hopefully) implementing a storage pool that can
11
-
/// async-block when until a member is available to use
12
-
pub trait DiskStore {
13
-
type StorageError: StorageErrorBase + Send;
14
-
type Access: DiskAccess<StorageError = Self::StorageError>;
15
-
fn get_access(&mut self) -> impl Future<Output = Result<Self::Access, Self::StorageError>>;
16
-
}
17
-
18
-
/// actual concrete access to disk storage
19
-
pub trait DiskAccess: Send {
20
-
type StorageError: StorageErrorBase;
21
-
22
-
fn get_writer(&mut self) -> Result<impl DiskWriter<Self::StorageError>, Self::StorageError>;
23
-
24
-
fn get_reader(
25
-
&self,
26
-
) -> Result<impl DiskReader<StorageError = Self::StorageError>, Self::StorageError>;
27
-
28
-
// TODO: force a cleanup implementation?
29
-
}
30
-
31
-
pub trait DiskWriter<E: StorageErrorBase> {
32
-
fn put(&mut self, key: Vec<u8>, val: Vec<u8>) -> Result<(), E>;
33
-
}
34
-
35
-
pub trait DiskReader {
36
-
type StorageError: StorageErrorBase;
37
-
fn get(&mut self, key: Vec<u8>) -> Result<Option<Vec<u8>>, Self::StorageError>;
38
-
}
39
-
40
-
///////////////// sqlite
41
-
42
-
pub struct SqliteStore {
43
-
path: PathBuf,
44
-
}
45
-
46
-
impl SqliteStore {
47
-
pub fn new(path: PathBuf) -> Self {
48
-
Self { path }
49
-
}
50
-
}
51
-
52
-
impl StorageErrorBase for rusqlite::Error {}
53
-
54
-
impl DiskStore for SqliteStore {
55
-
type StorageError = rusqlite::Error;
56
-
type Access = SqliteAccess;
57
-
async fn get_access(&mut self) -> Result<SqliteAccess, rusqlite::Error> {
58
-
let path = self.path.clone();
59
-
let conn = tokio::task::spawn_blocking(move || {
60
-
let conn = rusqlite::Connection::open(path)?;
61
-
62
-
conn.pragma_update(None, "journal_mode", "WAL")?;
63
-
conn.pragma_update(None, "synchronous", "OFF")?;
64
-
conn.pragma_update(None, "cache_size", (-4 * 2_i64.pow(10)).to_string())?;
65
-
conn.execute(
66
-
"CREATE TABLE blocks (
67
-
key BLOB PRIMARY KEY NOT NULL,
68
-
val BLOB NOT NULL
69
-
) WITHOUT ROWID",
70
-
(),
71
-
)?;
72
-
73
-
Ok::<_, Self::StorageError>(conn)
74
-
})
75
-
.await
76
-
.expect("join error")?;
77
-
78
-
Ok(SqliteAccess { conn })
79
-
}
80
-
}
81
-
82
-
pub struct SqliteAccess {
83
-
conn: rusqlite::Connection,
84
-
}
85
-
86
-
impl DiskAccess for SqliteAccess {
87
-
type StorageError = rusqlite::Error;
88
-
fn get_writer(&mut self) -> Result<impl DiskWriter<rusqlite::Error>, rusqlite::Error> {
89
-
let insert_stmt = self
90
-
.conn
91
-
.prepare("INSERT INTO blocks (key, val) VALUES (?1, ?2)")?;
92
-
Ok(SqliteWriter { insert_stmt })
93
-
}
94
-
fn get_reader(
95
-
&self,
96
-
) -> Result<impl DiskReader<StorageError = rusqlite::Error>, rusqlite::Error> {
97
-
let select_stmt = self.conn.prepare("SELECT val FROM blocks WHERE key = ?1")?;
98
-
Ok(SqliteReader { select_stmt })
99
-
}
100
-
}
101
-
102
-
pub struct SqliteWriter<'conn> {
103
-
insert_stmt: rusqlite::Statement<'conn>,
104
-
}
105
-
106
-
impl DiskWriter<rusqlite::Error> for SqliteWriter<'_> {
107
-
fn put(&mut self, key: Vec<u8>, val: Vec<u8>) -> rusqlite::Result<()> {
108
-
self.insert_stmt.execute((key, val))?;
109
-
Ok(())
110
-
}
111
-
}
112
-
113
-
pub struct SqliteReader<'conn> {
114
-
select_stmt: rusqlite::Statement<'conn>,
115
-
}
116
-
117
-
impl DiskReader for SqliteReader<'_> {
118
-
type StorageError = rusqlite::Error;
119
-
fn get(&mut self, key: Vec<u8>) -> rusqlite::Result<Option<Vec<u8>>> {
120
-
self.select_stmt
121
-
.query_one((&key,), |row| row.get(0))
122
-
.optional()
123
-
}
124
-
}
125
-
126
-
//////////// redb why not
127
-
128
-
const REDB_TABLE: redb::TableDefinition<&[u8], &[u8]> = redb::TableDefinition::new("blocks");
129
-
130
-
pub struct RedbStore {
131
-
path: PathBuf,
132
-
}
133
-
134
-
impl RedbStore {
135
-
pub fn new(path: PathBuf) -> Self {
136
-
Self { path }
137
-
}
138
-
}
139
-
140
-
impl StorageErrorBase for redb::Error {}
141
-
142
-
impl DiskStore for RedbStore {
143
-
type StorageError = redb::Error;
144
-
type Access = RedbAccess;
145
-
async fn get_access(&mut self) -> Result<RedbAccess, redb::Error> {
146
-
let path = self.path.clone();
147
-
let kb = 2_usize.pow(10);
148
-
let db = tokio::task::spawn_blocking(move || {
149
-
let db = redb::Database::builder()
150
-
.set_cache_size(16 * kb)
151
-
.create(path)?;
152
-
Ok::<_, Self::StorageError>(db)
153
-
})
154
-
.await
155
-
.expect("join error")?;
156
-
157
-
Ok(RedbAccess { db })
158
-
}
159
-
}
160
-
161
-
pub struct RedbAccess {
162
-
db: redb::Database,
163
-
}
164
-
165
-
impl DiskAccess for RedbAccess {
166
-
type StorageError = redb::Error;
167
-
fn get_writer(&mut self) -> Result<impl DiskWriter<redb::Error>, redb::Error> {
168
-
let mut tx = self.db.begin_write()?;
169
-
tx.set_durability(redb::Durability::None)?;
170
-
Ok(RedbWriter { tx: Some(tx) })
171
-
}
172
-
fn get_reader(&self) -> Result<impl DiskReader<StorageError = redb::Error>, redb::Error> {
173
-
let tx = self.db.begin_read()?;
174
-
Ok(RedbReader { tx })
175
-
}
176
-
}
177
-
178
-
pub struct RedbWriter {
179
-
tx: Option<redb::WriteTransaction>,
180
-
}
181
-
182
-
impl DiskWriter<redb::Error> for RedbWriter {
183
-
fn put(&mut self, key: Vec<u8>, val: Vec<u8>) -> Result<(), redb::Error> {
184
-
let mut table = self.tx.as_ref().unwrap().open_table(REDB_TABLE)?;
185
-
table.insert(&*key, &*val)?;
186
-
Ok(())
187
-
}
188
-
}
189
-
190
-
/// oops careful in async
191
-
impl Drop for RedbWriter {
192
-
fn drop(&mut self) {
193
-
let tx = self.tx.take();
194
-
tx.unwrap().commit().unwrap();
195
-
}
196
-
}
197
-
198
-
pub struct RedbReader {
199
-
tx: redb::ReadTransaction,
200
-
}
201
-
202
-
impl DiskReader for RedbReader {
203
-
type StorageError = redb::Error;
204
-
fn get(&mut self, key: Vec<u8>) -> Result<Option<Vec<u8>>, redb::Error> {
205
-
let table = self.tx.open_table(REDB_TABLE)?;
206
-
let rv = table.get(&*key)?.map(|guard| guard.value().to_vec());
207
-
Ok(rv)
208
-
}
209
-
}
210
-
211
-
///// TODO: that other single file db thing to try
-377
src/drive.rs
-377
src/drive.rs
···
1
-
//! Consume an MST block stream, producing an ordered stream of records
2
-
3
-
use crate::disk::{DiskAccess, DiskStore, DiskWriter, StorageErrorBase};
4
-
use ipld_core::cid::Cid;
5
-
use iroh_car::CarReader;
6
-
use serde::de::DeserializeOwned;
7
-
use serde::{Deserialize, Serialize};
8
-
use std::collections::HashMap;
9
-
use std::convert::Infallible;
10
-
use tokio::io::AsyncRead;
11
-
12
-
use crate::mst::{Commit, Node};
13
-
use crate::walk::{DiskTrip, Step, Trip, Walker};
14
-
15
-
/// Errors that can happen while consuming and emitting blocks and records
16
-
#[derive(Debug, thiserror::Error)]
17
-
pub enum DriveError {
18
-
#[error("Error from iroh_car: {0}")]
19
-
CarReader(#[from] iroh_car::Error),
20
-
#[error("Failed to decode commit block: {0}")]
21
-
BadBlock(#[from] serde_ipld_dagcbor::DecodeError<Infallible>),
22
-
#[error("The Commit block reference by the root was not found")]
23
-
MissingCommit,
24
-
#[error("The MST block {0} could not be found")]
25
-
MissingBlock(Cid),
26
-
#[error("Failed to walk the mst tree: {0}")]
27
-
Tripped(#[from] Trip),
28
-
#[error("CAR file had no roots")]
29
-
MissingRoot,
30
-
}
31
-
32
-
#[derive(Debug, thiserror::Error)]
33
-
pub enum DiskDriveError<E: StorageErrorBase> {
34
-
#[error("Error from iroh_car: {0}")]
35
-
CarReader(#[from] iroh_car::Error),
36
-
#[error("Failed to decode commit block: {0}")]
37
-
BadBlock(#[from] serde_ipld_dagcbor::DecodeError<Infallible>),
38
-
#[error("Storage error")]
39
-
StorageError(#[from] E),
40
-
#[error("The Commit block reference by the root was not found")]
41
-
MissingCommit,
42
-
#[error("The MST block {0} could not be found")]
43
-
MissingBlock(Cid),
44
-
#[error("Encode error: {0}")]
45
-
BincodeEncodeError(#[from] bincode::error::EncodeError),
46
-
#[error("Decode error: {0}")]
47
-
BincodeDecodeError(#[from] bincode::error::DecodeError),
48
-
#[error("disk tripped: {0}")]
49
-
DiskTripped(#[from] DiskTrip<E>),
50
-
}
51
-
52
-
// #[derive(Debug, thiserror::Error)]
53
-
// pub enum Boooooo<E: StorageErrorBase> {
54
-
// #[error("disk tripped: {0}")]
55
-
// DiskTripped(#[from] DiskTrip<E>),
56
-
// #[error("dde whatever: {0}")]
57
-
// DiskDriveError(#[from] DiskDriveError<E>),
58
-
// }
59
-
60
-
pub trait Processable: Clone + Serialize + DeserializeOwned {
61
-
/// the additional size taken up (not including its mem::size_of)
62
-
fn get_size(&self) -> usize;
63
-
}
64
-
65
-
#[derive(Debug, Clone, Serialize, Deserialize)]
66
-
pub enum MaybeProcessedBlock<T> {
67
-
/// A block that's *probably* a Node (but we can't know yet)
68
-
///
69
-
/// It *can be* a record that suspiciously looks a lot like a node, so we
70
-
/// cannot eagerly turn it into a Node. We only know for sure what it is
71
-
/// when we actually walk down the MST
72
-
Raw(Vec<u8>),
73
-
/// A processed record from a block that was definitely not a Node
74
-
///
75
-
/// Processing has to be fallible because the CAR can have totally-unused
76
-
/// blocks, which can just be garbage. since we're eagerly trying to process
77
-
/// record blocks without knowing for sure that they *are* records, we
78
-
/// discard any definitely-not-nodes that fail processing and keep their
79
-
/// error in the buffer for them. if we later try to retreive them as a
80
-
/// record, then we can surface the error.
81
-
///
82
-
/// If we _never_ needed this block, then we may have wasted a bit of effort
83
-
/// trying to process it. Oh well.
84
-
///
85
-
/// There's an alternative here, which would be to kick unprocessable blocks
86
-
/// back to Raw, or maybe even a new RawUnprocessable variant. Then we could
87
-
/// surface the typed error later if needed by trying to reprocess.
88
-
Processed(T),
89
-
}
90
-
91
-
impl<T: Processable> Processable for MaybeProcessedBlock<T> {
92
-
/// TODO this is probably a little broken
93
-
fn get_size(&self) -> usize {
94
-
use std::{cmp::max, mem::size_of};
95
-
96
-
// enum is always as big as its biggest member?
97
-
let base_size = max(size_of::<Vec<u8>>(), size_of::<T>());
98
-
99
-
let extra = match self {
100
-
Self::Raw(bytes) => bytes.len(),
101
-
Self::Processed(t) => t.get_size(),
102
-
};
103
-
104
-
base_size + extra
105
-
}
106
-
}
107
-
108
-
pub enum Vehicle<R: AsyncRead + Unpin, T: Processable> {
109
-
Lil(Commit, MemDriver<T>),
110
-
Big(BigCar<R, T>),
111
-
}
112
-
113
-
pub async fn load_car<R: AsyncRead + Unpin, T: Processable>(
114
-
reader: R,
115
-
process: fn(Vec<u8>) -> T,
116
-
max_size: usize,
117
-
) -> Result<Vehicle<R, T>, DriveError> {
118
-
let mut mem_blocks = HashMap::new();
119
-
120
-
let mut car = CarReader::new(reader).await?;
121
-
122
-
let root = *car
123
-
.header()
124
-
.roots()
125
-
.first()
126
-
.ok_or(DriveError::MissingRoot)?;
127
-
log::debug!("root: {root:?}");
128
-
129
-
let mut commit = None;
130
-
131
-
// try to load all the blocks into memory
132
-
let mut mem_size = 0;
133
-
while let Some((cid, data)) = car.next_block().await? {
134
-
// the root commit is a Special Third Kind of block that we need to make
135
-
// sure not to optimistically send to the processing function
136
-
if cid == root {
137
-
let c: Commit = serde_ipld_dagcbor::from_slice(&data)?;
138
-
commit = Some(c);
139
-
continue;
140
-
}
141
-
142
-
// remaining possible types: node, record, other. optimistically process
143
-
// TODO: get the actual in-memory size to compute disk spill
144
-
let maybe_processed = if Node::could_be(&data) {
145
-
MaybeProcessedBlock::Raw(data)
146
-
} else {
147
-
MaybeProcessedBlock::Processed(process(data))
148
-
};
149
-
150
-
// stash (maybe processed) blocks in memory as long as we have room
151
-
mem_size += std::mem::size_of::<Cid>() + maybe_processed.get_size();
152
-
mem_blocks.insert(cid, maybe_processed);
153
-
if mem_size >= max_size {
154
-
return Ok(Vehicle::Big(BigCar {
155
-
car,
156
-
root,
157
-
process,
158
-
max_size,
159
-
mem_blocks,
160
-
commit,
161
-
}));
162
-
}
163
-
}
164
-
165
-
// all blocks loaded and we fit in memory! hopefully we found the commit...
166
-
let commit = commit.ok_or(DriveError::MissingCommit)?;
167
-
168
-
let walker = Walker::new(commit.data);
169
-
170
-
Ok(Vehicle::Lil(
171
-
commit,
172
-
MemDriver {
173
-
blocks: mem_blocks,
174
-
walker,
175
-
process,
176
-
},
177
-
))
178
-
}
179
-
180
-
/// a paritally memory-loaded car file that needs disk spillover to continue
181
-
pub struct BigCar<R: AsyncRead + Unpin, T: Processable> {
182
-
car: CarReader<R>,
183
-
root: Cid,
184
-
process: fn(Vec<u8>) -> T,
185
-
max_size: usize,
186
-
mem_blocks: HashMap<Cid, MaybeProcessedBlock<T>>,
187
-
pub commit: Option<Commit>,
188
-
}
189
-
190
-
fn encode(v: impl Serialize) -> Result<Vec<u8>, bincode::error::EncodeError> {
191
-
bincode::serde::encode_to_vec(v, bincode::config::standard())
192
-
}
193
-
194
-
pub fn decode<T: Processable>(bytes: &[u8]) -> Result<T, bincode::error::DecodeError> {
195
-
let (t, n) = bincode::serde::decode_from_slice(bytes, bincode::config::standard())?;
196
-
assert_eq!(n, bytes.len(), "expected to decode all bytes"); // TODO
197
-
Ok(t)
198
-
}
199
-
200
-
impl<R: AsyncRead + Unpin, T: Processable + Send + 'static> BigCar<R, T> {
201
-
pub async fn finish_loading<S: DiskStore>(
202
-
mut self,
203
-
mut store: S,
204
-
) -> Result<(Commit, BigCarReady<T, S::Access>), DiskDriveError<S::StorageError>>
205
-
where
206
-
S::Access: Send + 'static,
207
-
S::StorageError: 'static,
208
-
{
209
-
// set up access for real
210
-
let mut access = store.get_access().await?;
211
-
212
-
// move access in and back out so we can manage lifetimes
213
-
// dump mem blocks into the store
214
-
access = tokio::task::spawn(async move {
215
-
let mut writer = access.get_writer()?;
216
-
for (k, v) in self.mem_blocks {
217
-
let key_bytes = k.to_bytes();
218
-
let val_bytes = encode(v)?; // TODO
219
-
writer.put(key_bytes, val_bytes)?;
220
-
}
221
-
drop(writer); // cannot outlive access
222
-
Ok::<_, DiskDriveError<S::StorageError>>(access)
223
-
})
224
-
.await
225
-
.unwrap()?;
226
-
227
-
// dump the rest to disk (in chunks)
228
-
loop {
229
-
let mut chunk = vec![];
230
-
let mut mem_size = 0;
231
-
loop {
232
-
let Some((cid, data)) = self.car.next_block().await? else {
233
-
break;
234
-
};
235
-
// we still gotta keep checking for the root since we might not have it
236
-
if cid == self.root {
237
-
let c: Commit = serde_ipld_dagcbor::from_slice(&data)?;
238
-
self.commit = Some(c);
239
-
continue;
240
-
}
241
-
// remaining possible types: node, record, other. optimistically process
242
-
// TODO: get the actual in-memory size to compute disk spill
243
-
let maybe_processed = if Node::could_be(&data) {
244
-
MaybeProcessedBlock::Raw(data)
245
-
} else {
246
-
MaybeProcessedBlock::Processed((self.process)(data))
247
-
};
248
-
mem_size += std::mem::size_of::<Cid>() + maybe_processed.get_size();
249
-
chunk.push((cid, maybe_processed));
250
-
if mem_size >= self.max_size {
251
-
break;
252
-
}
253
-
}
254
-
if chunk.is_empty() {
255
-
break;
256
-
}
257
-
258
-
// move access in and back out so we can manage lifetimes
259
-
// dump mem blocks into the store
260
-
access = tokio::task::spawn_blocking(move || {
261
-
let mut writer = access.get_writer()?;
262
-
for (k, v) in chunk {
263
-
let key_bytes = k.to_bytes();
264
-
let val_bytes = encode(v)?; // TODO
265
-
writer.put(key_bytes, val_bytes)?;
266
-
}
267
-
drop(writer); // cannot outlive access
268
-
Ok::<_, DiskDriveError<S::StorageError>>(access)
269
-
})
270
-
.await
271
-
.unwrap()?; // TODO
272
-
}
273
-
274
-
let commit = self.commit.ok_or(DiskDriveError::MissingCommit)?;
275
-
276
-
let walker = Walker::new(commit.data);
277
-
278
-
Ok((
279
-
commit,
280
-
BigCarReady {
281
-
process: self.process,
282
-
access,
283
-
walker,
284
-
},
285
-
))
286
-
}
287
-
}
288
-
289
-
pub struct BigCarReady<T: Clone, A: DiskAccess> {
290
-
process: fn(Vec<u8>) -> T,
291
-
access: A,
292
-
walker: Walker,
293
-
}
294
-
295
-
impl<T: Processable + Send + 'static, A: DiskAccess + Send + 'static> BigCarReady<T, A> {
296
-
pub async fn next_chunk(
297
-
mut self,
298
-
n: usize,
299
-
) -> Result<(Self, Option<Vec<(String, T)>>), DiskDriveError<A::StorageError>>
300
-
where
301
-
A::StorageError: Send,
302
-
{
303
-
let mut out = Vec::with_capacity(n);
304
-
(self, out) = tokio::task::spawn_blocking(move || {
305
-
let access = self.access;
306
-
let mut reader = access.get_reader()?;
307
-
308
-
for _ in 0..n {
309
-
// walk as far as we can until we run out of blocks or find a record
310
-
match self.walker.disk_step(&mut reader, self.process)? {
311
-
Step::Missing(cid) => return Err(DiskDriveError::MissingBlock(cid)),
312
-
Step::Finish => break,
313
-
Step::Step { rkey, data } => {
314
-
out.push((rkey, data));
315
-
continue;
316
-
}
317
-
};
318
-
}
319
-
320
-
drop(reader); // cannot outlive access
321
-
self.access = access;
322
-
Ok::<_, DiskDriveError<A::StorageError>>((self, out))
323
-
})
324
-
.await
325
-
.unwrap()?; // TODO
326
-
327
-
if out.is_empty() {
328
-
Ok((self, None))
329
-
} else {
330
-
Ok((self, Some(out)))
331
-
}
332
-
}
333
-
}
334
-
335
-
/// The core driver between the block stream and MST walker
336
-
///
337
-
/// In the future, PDSs will export CARs in a stream-friendly order that will
338
-
/// enable processing them with tiny memory overhead. But that future is not
339
-
/// here yet.
340
-
///
341
-
/// CARs are almost always in a stream-unfriendly order, so I'm reverting the
342
-
/// optimistic stream features: we load all block first, then walk the MST.
343
-
///
344
-
/// This makes things much simpler: we only need to worry about spilling to disk
345
-
/// in one place, and we always have a reasonable expecatation about how much
346
-
/// work the init function will do. We can drop the CAR reader before walking,
347
-
/// so the sync/async boundaries become a little easier to work around.
348
-
#[derive(Debug)]
349
-
pub struct MemDriver<T: Processable> {
350
-
blocks: HashMap<Cid, MaybeProcessedBlock<T>>,
351
-
walker: Walker,
352
-
process: fn(Vec<u8>) -> T,
353
-
}
354
-
355
-
impl<T: Processable> MemDriver<T> {
356
-
/// Manually step through the record outputs
357
-
pub async fn next_chunk(&mut self, n: usize) -> Result<Option<Vec<(String, T)>>, DriveError> {
358
-
let mut out = Vec::with_capacity(n);
359
-
for _ in 0..n {
360
-
// walk as far as we can until we run out of blocks or find a record
361
-
match self.walker.step(&mut self.blocks, self.process)? {
362
-
Step::Missing(cid) => return Err(DriveError::MissingBlock(cid)),
363
-
Step::Finish => break,
364
-
Step::Step { rkey, data } => {
365
-
out.push((rkey, data));
366
-
continue;
367
-
}
368
-
};
369
-
}
370
-
371
-
if out.is_empty() {
372
-
Ok(None)
373
-
} else {
374
-
Ok(Some(out))
375
-
}
376
-
}
377
-
}
+74
-7
src/lib.rs
+74
-7
src/lib.rs
···
1
-
//! Fast and robust atproto CAR file processing in rust
2
-
//!
3
-
//! For now see the [examples](https://tangled.org/@microcosm.blue/repo-stream/tree/main/examples)
1
+
/*!
2
+
A robust CAR file -> MST walker for atproto
3
+
4
+
Small CARs have their blocks buffered in memory. If a configurable memory limit
5
+
is reached while reading blocks, CAR reading is suspended, and can be continued
6
+
by providing disk storage to buffer the CAR blocks instead.
7
+
8
+
A `process` function can be provided for tasks where records are transformed
9
+
into a smaller representation, to save memory (and disk) during block reading.
10
+
11
+
Once blocks are loaded, the MST is walked and emitted as chunks of pairs of
12
+
`(rkey, processed_block)` pairs, in order (depth first, left-to-right).
13
+
14
+
Some MST validations are applied
15
+
- Keys must appear in order
16
+
- Keys must be at the correct MST tree depth
17
+
18
+
`iroh_car` additionally applies a block size limit of `2MiB`.
19
+
20
+
```
21
+
use repo_stream::{Driver, DriverBuilder, DiskBuilder};
22
+
23
+
# #[tokio::main]
24
+
# async fn main() -> Result<(), Box<dyn std::error::Error>> {
25
+
# let reader = include_bytes!("../car-samples/tiny.car").as_slice();
26
+
let mut total_size = 0;
27
+
28
+
match DriverBuilder::new()
29
+
.with_mem_limit_mb(10)
30
+
.with_block_processor(|rec| rec.len()) // block processing: just extract the raw record size
31
+
.load_car(reader)
32
+
.await?
33
+
{
34
+
35
+
// if all blocks fit within memory
36
+
Driver::Memory(_commit, mut driver) => {
37
+
while let Some(chunk) = driver.next_chunk(256).await? {
38
+
for (_rkey, size) in chunk {
39
+
total_size += size;
40
+
}
41
+
}
42
+
},
4
43
5
-
pub mod disk;
6
-
pub mod drive;
7
-
pub mod mst;
8
-
pub mod walk;
44
+
// if the CAR was too big for in-memory processing
45
+
Driver::Disk(paused) => {
46
+
// set up a disk store we can spill to
47
+
let store = DiskBuilder::new().open("some/path.db".into()).await?;
48
+
// do the spilling, get back a (similar) driver
49
+
let (_commit, mut driver) = paused.finish_loading(store).await?;
50
+
51
+
while let Some(chunk) = driver.next_chunk(256).await? {
52
+
for (_rkey, size) in chunk {
53
+
total_size += size;
54
+
}
55
+
}
56
+
57
+
// clean up the disk store (drop tables etc)
58
+
driver.reset_store().await?;
59
+
}
60
+
};
61
+
println!("sum of size of all records: {total_size}");
62
+
# Ok(())
63
+
# }
64
+
```
65
+
66
+
Disk spilling suspends and returns a `Driver::Disk(paused)` instead of going
67
+
ahead and eagerly using disk I/O. This means you have to write a bit more code
68
+
to handle both cases, but it allows you to have finer control over resource
69
+
usage. For example, you can drive a number of parallel memory CAR workers, and
70
+
separately have a different number of disk workers picking up suspended disk
71
+
tasks from a queue.
72
+
73
+
Find more [examples in the repo](https://tangled.org/@microcosm.blue/repo-stream/tree/main/examples).
74
+
75
+
*/
-114
src/mst.rs
-114
src/mst.rs
···
1
-
//! Low-level types for parsing raw atproto MST CARs
2
-
//!
3
-
//! The primary aim is to work through the **tree** structure. Non-node blocks
4
-
//! are left as raw bytes, for upper levels to parse into DAG-CBOR or whatever.
5
-
6
-
use ipld_core::cid::Cid;
7
-
use serde::Deserialize;
8
-
9
-
/// The top-level data object in a repository's tree is a signed commit.
10
-
#[derive(Debug, Deserialize)]
11
-
// #[serde(deny_unknown_fields)]
12
-
pub struct Commit {
13
-
/// the account DID associated with the repo, in strictly normalized form
14
-
/// (eg, lowercase as appropriate)
15
-
pub did: String,
16
-
/// fixed value of 3 for this repo format version
17
-
pub version: u64,
18
-
/// pointer to the top of the repo contents tree structure (MST)
19
-
pub data: Cid,
20
-
/// revision of the repo, used as a logical clock.
21
-
///
22
-
/// TID format. Must increase monotonically. Recommend using current
23
-
/// timestamp as TID; rev values in the "future" (beyond a fudge factor)
24
-
/// should be ignored and not processed
25
-
pub rev: String,
26
-
/// pointer (by hash) to a previous commit object for this repository.
27
-
///
28
-
/// Could be used to create a chain of history, but largely unused (included
29
-
/// for v2 backwards compatibility). In version 3 repos, this field must
30
-
/// exist in the CBOR object, but is virtually always null. NOTE: previously
31
-
/// specified as nullable and optional, but this caused interoperability
32
-
/// issues.
33
-
pub prev: Option<Cid>,
34
-
/// cryptographic signature of this commit, as raw bytes
35
-
#[serde(with = "serde_bytes")]
36
-
pub sig: Vec<u8>,
37
-
}
38
-
39
-
/// MST node data schema
40
-
#[derive(Debug, Deserialize, PartialEq)]
41
-
#[serde(deny_unknown_fields)]
42
-
pub struct Node {
43
-
/// link to sub-tree Node on a lower level and with all keys sorting before
44
-
/// keys at this node
45
-
#[serde(rename = "l")]
46
-
pub left: Option<Cid>,
47
-
/// ordered list of TreeEntry objects
48
-
///
49
-
/// atproto MSTs have a fanout of 4, so there can be max 4 entries.
50
-
#[serde(rename = "e")]
51
-
pub entries: Vec<Entry>, // maybe we can do [Option<Entry>; 4]?
52
-
}
53
-
54
-
impl Node {
55
-
/// test if a block could possibly be a node
56
-
///
57
-
/// we can't eagerly decode records except where we're *sure* they cannot be
58
-
/// an mst node (and even then we can only attempt) because you can't know
59
-
/// with certainty what a block is supposed to be without actually walking
60
-
/// the tree.
61
-
///
62
-
/// so if a block *could be* a node, any record converter must postpone
63
-
/// processing. if it turns out it happens to be a very node-looking record,
64
-
/// well, sorry, it just has to only be processed later when that's known.
65
-
pub fn could_be(bytes: impl AsRef<[u8]>) -> bool {
66
-
const NODE_FINGERPRINT: [u8; 3] = [
67
-
0xA2, // map length 2 (for "l" and "e" keys)
68
-
0x61, // text length 1
69
-
b'e', // "e" before "l" because map keys have to be lex-sorted
70
-
// 0x8?: "e" has array (0x100 upper 3 bits) of some length
71
-
];
72
-
let bytes = bytes.as_ref();
73
-
bytes.starts_with(&NODE_FINGERPRINT)
74
-
&& bytes
75
-
.get(3)
76
-
.map(|b| b & 0b1110_0000 == 0x80)
77
-
.unwrap_or(false)
78
-
}
79
-
80
-
/// Check if a node has any entries
81
-
///
82
-
/// An empty repository with no records is represented as a single MST node
83
-
/// with an empty array of entries. This is the only situation in which a
84
-
/// tree may contain an empty leaf node which does not either contain keys
85
-
/// ("entries") or point to a sub-tree containing entries.
86
-
///
87
-
/// TODO: to me this is slightly unclear with respect to `l` (ask someone).
88
-
/// ...is that what "The top of the tree must not be a an empty node which
89
-
/// only points to a sub-tree." is referring to?
90
-
pub fn is_empty(&self) -> bool {
91
-
self.left.is_none() && self.entries.is_empty()
92
-
}
93
-
}
94
-
95
-
/// TreeEntry object
96
-
#[derive(Debug, Deserialize, PartialEq)]
97
-
#[serde(deny_unknown_fields)]
98
-
pub struct Entry {
99
-
/// count of bytes shared with previous TreeEntry in this Node (if any)
100
-
#[serde(rename = "p")]
101
-
pub prefix_len: usize,
102
-
/// remainder of key for this TreeEntry, after "prefixlen" have been removed
103
-
#[serde(rename = "k", with = "serde_bytes")]
104
-
pub keysuffix: Vec<u8>, // can we String this here?
105
-
/// link to the record data (CBOR) for this entry
106
-
#[serde(rename = "v")]
107
-
pub value: Cid,
108
-
/// link to a sub-tree Node at a lower level
109
-
///
110
-
/// the lower level must have keys sorting after this TreeEntry's key (to
111
-
/// the "right"), but before the next TreeEntry's key in this Node (if any)
112
-
#[serde(rename = "t")]
113
-
pub tree: Option<Cid>,
114
-
}
-471
src/walk.rs
-471
src/walk.rs
···
1
-
//! Depth-first MST traversal
2
-
3
-
use crate::disk::{DiskReader, StorageErrorBase};
4
-
use crate::drive::{MaybeProcessedBlock, Processable};
5
-
use crate::mst::Node;
6
-
use ipld_core::cid::Cid;
7
-
use std::collections::HashMap;
8
-
use std::convert::Infallible;
9
-
10
-
/// Errors that can happen while walking
11
-
#[derive(Debug, thiserror::Error)]
12
-
pub enum Trip {
13
-
#[error("empty mst nodes are not allowed")]
14
-
NodeEmpty,
15
-
#[error("Failed to fingerprint commit block")]
16
-
BadCommitFingerprint,
17
-
#[error("Failed to decode commit block: {0}")]
18
-
BadCommit(#[from] serde_ipld_dagcbor::DecodeError<Infallible>),
19
-
#[error("Action node error: {0}")]
20
-
RkeyError(#[from] RkeyError),
21
-
#[error("Encountered an rkey out of order while walking the MST")]
22
-
RkeyOutOfOrder,
23
-
}
24
-
25
-
/// Errors that can happen while walking
26
-
#[derive(Debug, thiserror::Error)]
27
-
pub enum DiskTrip<E: StorageErrorBase> {
28
-
#[error("tripped: {0}")]
29
-
Trip(#[from] Trip),
30
-
#[error("storage error: {0}")]
31
-
StorageError(#[from] E),
32
-
#[error("Decode error: {0}")]
33
-
BincodeDecodeError(#[from] bincode::error::DecodeError),
34
-
}
35
-
36
-
/// Errors from invalid Rkeys
37
-
#[derive(Debug, thiserror::Error)]
38
-
pub enum RkeyError {
39
-
#[error("Failed to compute an rkey due to invalid prefix_len")]
40
-
EntryPrefixOutOfbounds,
41
-
#[error("RKey was not utf-8")]
42
-
EntryRkeyNotUtf8(#[from] std::string::FromUtf8Error),
43
-
}
44
-
45
-
/// Walker outputs
46
-
#[derive(Debug)]
47
-
pub enum Step<T> {
48
-
/// We needed this CID but it's not in the block store
49
-
Missing(Cid),
50
-
/// Reached the end of the MST! yay!
51
-
Finish,
52
-
/// A record was found!
53
-
Step { rkey: String, data: T },
54
-
}
55
-
56
-
#[derive(Debug, Clone, PartialEq)]
57
-
enum Need {
58
-
Node(Cid),
59
-
Record { rkey: String, cid: Cid },
60
-
}
61
-
62
-
fn push_from_node(stack: &mut Vec<Need>, node: &Node) -> Result<(), RkeyError> {
63
-
let mut entries = Vec::with_capacity(node.entries.len());
64
-
65
-
let mut prefix = vec![];
66
-
for entry in &node.entries {
67
-
let mut rkey = vec![];
68
-
let pre_checked = prefix
69
-
.get(..entry.prefix_len)
70
-
.ok_or(RkeyError::EntryPrefixOutOfbounds)?;
71
-
rkey.extend_from_slice(pre_checked);
72
-
rkey.extend_from_slice(&entry.keysuffix);
73
-
prefix = rkey.clone();
74
-
75
-
entries.push(Need::Record {
76
-
rkey: String::from_utf8(rkey)?,
77
-
cid: entry.value,
78
-
});
79
-
if let Some(ref tree) = entry.tree {
80
-
entries.push(Need::Node(*tree));
81
-
}
82
-
}
83
-
84
-
entries.reverse();
85
-
stack.append(&mut entries);
86
-
87
-
if let Some(tree) = node.left {
88
-
stack.push(Need::Node(tree));
89
-
}
90
-
Ok(())
91
-
}
92
-
93
-
/// Traverser of an atproto MST
94
-
///
95
-
/// Walks the tree from left-to-right in depth-first order
96
-
#[derive(Debug)]
97
-
pub struct Walker {
98
-
stack: Vec<Need>,
99
-
prev: String,
100
-
}
101
-
102
-
impl Walker {
103
-
pub fn new(tree_root_cid: Cid) -> Self {
104
-
Self {
105
-
stack: vec![Need::Node(tree_root_cid)],
106
-
prev: "".to_string(),
107
-
}
108
-
}
109
-
110
-
/// Advance through nodes until we find a record or can't go further
111
-
pub fn step<T: Processable>(
112
-
&mut self,
113
-
blocks: &mut HashMap<Cid, MaybeProcessedBlock<T>>,
114
-
process: impl Fn(Vec<u8>) -> T,
115
-
) -> Result<Step<T>, Trip> {
116
-
loop {
117
-
let Some(mut need) = self.stack.last() else {
118
-
log::trace!("tried to walk but we're actually done.");
119
-
return Ok(Step::Finish);
120
-
};
121
-
122
-
match &mut need {
123
-
Need::Node(cid) => {
124
-
log::trace!("need node {cid:?}");
125
-
let Some(block) = blocks.remove(cid) else {
126
-
log::trace!("node not found, resting");
127
-
return Ok(Step::Missing(*cid));
128
-
};
129
-
130
-
let MaybeProcessedBlock::Raw(data) = block else {
131
-
return Err(Trip::BadCommitFingerprint);
132
-
};
133
-
let node =
134
-
serde_ipld_dagcbor::from_slice::<Node>(&data).map_err(Trip::BadCommit)?;
135
-
136
-
// found node, make sure we remember
137
-
self.stack.pop();
138
-
139
-
// queue up work on the found node next
140
-
push_from_node(&mut self.stack, &node)?;
141
-
}
142
-
Need::Record { rkey, cid } => {
143
-
log::trace!("need record {cid:?}");
144
-
let Some(data) = blocks.get_mut(cid) else {
145
-
log::trace!("record block not found, resting");
146
-
return Ok(Step::Missing(*cid));
147
-
};
148
-
let rkey = rkey.clone();
149
-
let data = match data {
150
-
MaybeProcessedBlock::Raw(data) => process(data.to_vec()),
151
-
MaybeProcessedBlock::Processed(t) => t.clone(),
152
-
};
153
-
154
-
// found node, make sure we remember
155
-
self.stack.pop();
156
-
157
-
log::trace!("emitting a block as a step. depth={}", self.stack.len());
158
-
159
-
// rkeys *must* be in order or else the tree is invalid (or
160
-
// we have a bug)
161
-
if rkey <= self.prev {
162
-
return Err(Trip::RkeyOutOfOrder);
163
-
}
164
-
self.prev = rkey.clone();
165
-
166
-
return Ok(Step::Step { rkey, data });
167
-
}
168
-
}
169
-
}
170
-
}
171
-
172
-
/// blocking!!!!!!
173
-
pub fn disk_step<T: Processable, R: DiskReader>(
174
-
&mut self,
175
-
reader: &mut R,
176
-
process: impl Fn(Vec<u8>) -> T,
177
-
) -> Result<Step<T>, DiskTrip<R::StorageError>> {
178
-
loop {
179
-
let Some(mut need) = self.stack.last() else {
180
-
log::trace!("tried to walk but we're actually done.");
181
-
return Ok(Step::Finish);
182
-
};
183
-
184
-
match &mut need {
185
-
Need::Node(cid) => {
186
-
let cid_bytes = cid.to_bytes();
187
-
log::trace!("need node {cid:?}");
188
-
let Some(block_bytes) = reader.get(cid_bytes)? else {
189
-
log::trace!("node not found, resting");
190
-
return Ok(Step::Missing(*cid));
191
-
};
192
-
193
-
let block: MaybeProcessedBlock<T> = crate::drive::decode(&block_bytes)?;
194
-
195
-
let MaybeProcessedBlock::Raw(data) = block else {
196
-
return Err(Trip::BadCommitFingerprint.into());
197
-
};
198
-
let node =
199
-
serde_ipld_dagcbor::from_slice::<Node>(&data).map_err(Trip::BadCommit)?;
200
-
201
-
// found node, make sure we remember
202
-
self.stack.pop();
203
-
204
-
// queue up work on the found node next
205
-
push_from_node(&mut self.stack, &node).map_err(Trip::RkeyError)?;
206
-
}
207
-
Need::Record { rkey, cid } => {
208
-
log::trace!("need record {cid:?}");
209
-
let cid_bytes = cid.to_bytes();
210
-
let Some(data_bytes) = reader.get(cid_bytes)? else {
211
-
log::trace!("record block not found, resting");
212
-
return Ok(Step::Missing(*cid));
213
-
};
214
-
let data: MaybeProcessedBlock<T> = crate::drive::decode(&data_bytes)?;
215
-
let rkey = rkey.clone();
216
-
let data = match data {
217
-
MaybeProcessedBlock::Raw(data) => process(data),
218
-
MaybeProcessedBlock::Processed(t) => t.clone(),
219
-
};
220
-
221
-
// found node, make sure we remember
222
-
self.stack.pop();
223
-
224
-
log::trace!("emitting a block as a step. depth={}", self.stack.len());
225
-
226
-
// rkeys *must* be in order or else the tree is invalid (or
227
-
// we have a bug)
228
-
if rkey <= self.prev {
229
-
return Err(DiskTrip::Trip(Trip::RkeyOutOfOrder));
230
-
}
231
-
self.prev = rkey.clone();
232
-
233
-
return Ok(Step::Step { rkey, data });
234
-
}
235
-
}
236
-
}
237
-
}
238
-
}
239
-
240
-
#[cfg(test)]
241
-
mod test {
242
-
use super::*;
243
-
// use crate::mst::Entry;
244
-
245
-
fn cid1() -> Cid {
246
-
"bafyreihixenvk3ahqbytas4hk4a26w43bh6eo3w6usjqtxkpzsvi655a3m"
247
-
.parse()
248
-
.unwrap()
249
-
}
250
-
// fn cid2() -> Cid {
251
-
// "QmY7Yh4UquoXHLPFo2XbhXkhBvFoPwmQUSa92pxnxjQuPU"
252
-
// .parse()
253
-
// .unwrap()
254
-
// }
255
-
// fn cid3() -> Cid {
256
-
// "bafybeigdyrzt5sfp7udm7hu76uh7y26nf3efuylqabf3oclgtqy55fbzdi"
257
-
// .parse()
258
-
// .unwrap()
259
-
// }
260
-
// fn cid4() -> Cid {
261
-
// "QmbWqxBEKC3P8tqsKc98xmWNzrzDtRLMiMPL8wBuTGsMnR"
262
-
// .parse()
263
-
// .unwrap()
264
-
// }
265
-
// fn cid5() -> Cid {
266
-
// "QmSnuWmxptJZdLJpKRarxBMS2Ju2oANVrgbr2xWbie9b2D"
267
-
// .parse()
268
-
// .unwrap()
269
-
// }
270
-
// fn cid6() -> Cid {
271
-
// "QmdmQXB2mzChmMeKY47C43LxUdg1NDJ5MWcKMKxDu7RgQm"
272
-
// .parse()
273
-
// .unwrap()
274
-
// }
275
-
// fn cid7() -> Cid {
276
-
// "bafybeiaysi4s6lnjev27ln5icwm6tueaw2vdykrtjkwiphwekaywqhcjze"
277
-
// .parse()
278
-
// .unwrap()
279
-
// }
280
-
// fn cid8() -> Cid {
281
-
// "bafyreif3tfdpr5n4jdrbielmcapwvbpcthepfkwq2vwonmlhirbjmotedi"
282
-
// .parse()
283
-
// .unwrap()
284
-
// }
285
-
// fn cid9() -> Cid {
286
-
// "bafyreicnokmhmrnlp2wjhyk2haep4tqxiptwfrp2rrs7rzq7uk766chqvq"
287
-
// .parse()
288
-
// .unwrap()
289
-
// }
290
-
291
-
#[test]
292
-
fn test_next_from_node_empty() {
293
-
let node = Node {
294
-
left: None,
295
-
entries: vec![],
296
-
};
297
-
let mut stack = vec![];
298
-
push_from_node(&mut stack, &node).unwrap();
299
-
assert_eq!(stack.last(), None);
300
-
}
301
-
302
-
#[test]
303
-
fn test_needs_from_node_just_left() {
304
-
let node = Node {
305
-
left: Some(cid1()),
306
-
entries: vec![],
307
-
};
308
-
let mut stack = vec![];
309
-
push_from_node(&mut stack, &node).unwrap();
310
-
assert_eq!(stack.last(), Some(Need::Node(cid1())).as_ref());
311
-
}
312
-
313
-
// #[test]
314
-
// fn test_needs_from_node_just_one_record() {
315
-
// let node = Node {
316
-
// left: None,
317
-
// entries: vec![Entry {
318
-
// keysuffix: "asdf".into(),
319
-
// prefix_len: 0,
320
-
// value: cid1(),
321
-
// tree: None,
322
-
// }],
323
-
// };
324
-
// assert_eq!(
325
-
// needs_from_node(node).unwrap(),
326
-
// vec![Need::Record {
327
-
// rkey: "asdf".into(),
328
-
// cid: cid1(),
329
-
// },]
330
-
// );
331
-
// }
332
-
333
-
// #[test]
334
-
// fn test_needs_from_node_two_records() {
335
-
// let node = Node {
336
-
// left: None,
337
-
// entries: vec![
338
-
// Entry {
339
-
// keysuffix: "asdf".into(),
340
-
// prefix_len: 0,
341
-
// value: cid1(),
342
-
// tree: None,
343
-
// },
344
-
// Entry {
345
-
// keysuffix: "gh".into(),
346
-
// prefix_len: 2,
347
-
// value: cid2(),
348
-
// tree: None,
349
-
// },
350
-
// ],
351
-
// };
352
-
// assert_eq!(
353
-
// needs_from_node(node).unwrap(),
354
-
// vec![
355
-
// Need::Record {
356
-
// rkey: "asdf".into(),
357
-
// cid: cid1(),
358
-
// },
359
-
// Need::Record {
360
-
// rkey: "asgh".into(),
361
-
// cid: cid2(),
362
-
// },
363
-
// ]
364
-
// );
365
-
// }
366
-
367
-
// #[test]
368
-
// fn test_needs_from_node_with_both() {
369
-
// let node = Node {
370
-
// left: None,
371
-
// entries: vec![Entry {
372
-
// keysuffix: "asdf".into(),
373
-
// prefix_len: 0,
374
-
// value: cid1(),
375
-
// tree: Some(cid2()),
376
-
// }],
377
-
// };
378
-
// assert_eq!(
379
-
// needs_from_node(node).unwrap(),
380
-
// vec![
381
-
// Need::Record {
382
-
// rkey: "asdf".into(),
383
-
// cid: cid1(),
384
-
// },
385
-
// Need::Node(cid2()),
386
-
// ]
387
-
// );
388
-
// }
389
-
390
-
// #[test]
391
-
// fn test_needs_from_node_left_and_record() {
392
-
// let node = Node {
393
-
// left: Some(cid1()),
394
-
// entries: vec![Entry {
395
-
// keysuffix: "asdf".into(),
396
-
// prefix_len: 0,
397
-
// value: cid2(),
398
-
// tree: None,
399
-
// }],
400
-
// };
401
-
// assert_eq!(
402
-
// needs_from_node(node).unwrap(),
403
-
// vec![
404
-
// Need::Node(cid1()),
405
-
// Need::Record {
406
-
// rkey: "asdf".into(),
407
-
// cid: cid2(),
408
-
// },
409
-
// ]
410
-
// );
411
-
// }
412
-
413
-
// #[test]
414
-
// fn test_needs_from_full_node() {
415
-
// let node = Node {
416
-
// left: Some(cid1()),
417
-
// entries: vec![
418
-
// Entry {
419
-
// keysuffix: "asdf".into(),
420
-
// prefix_len: 0,
421
-
// value: cid2(),
422
-
// tree: Some(cid3()),
423
-
// },
424
-
// Entry {
425
-
// keysuffix: "ghi".into(),
426
-
// prefix_len: 1,
427
-
// value: cid4(),
428
-
// tree: Some(cid5()),
429
-
// },
430
-
// Entry {
431
-
// keysuffix: "jkl".into(),
432
-
// prefix_len: 2,
433
-
// value: cid6(),
434
-
// tree: Some(cid7()),
435
-
// },
436
-
// Entry {
437
-
// keysuffix: "mno".into(),
438
-
// prefix_len: 4,
439
-
// value: cid8(),
440
-
// tree: Some(cid9()),
441
-
// },
442
-
// ],
443
-
// };
444
-
// assert_eq!(
445
-
// needs_from_node(node).unwrap(),
446
-
// vec![
447
-
// Need::Node(cid1()),
448
-
// Need::Record {
449
-
// rkey: "asdf".into(),
450
-
// cid: cid2(),
451
-
// },
452
-
// Need::Node(cid3()),
453
-
// Need::Record {
454
-
// rkey: "aghi".into(),
455
-
// cid: cid4(),
456
-
// },
457
-
// Need::Node(cid5()),
458
-
// Need::Record {
459
-
// rkey: "agjkl".into(),
460
-
// cid: cid6(),
461
-
// },
462
-
// Need::Node(cid7()),
463
-
// Need::Record {
464
-
// rkey: "agjkmno".into(),
465
-
// cid: cid8(),
466
-
// },
467
-
// Need::Node(cid9()),
468
-
// ]
469
-
// );
470
-
// }
471
-
}
+34
-31
tests/non-huge-cars.rs
+34
-31
tests/non-huge-cars.rs
···
1
1
extern crate repo_stream;
2
-
use futures::TryStreamExt;
3
-
use iroh_car::CarReader;
4
-
use std::convert::Infallible;
2
+
use repo_stream::Driver;
5
3
4
+
const EMPTY_CAR: &'static [u8] = include_bytes!("../car-samples/empty.car");
6
5
const TINY_CAR: &'static [u8] = include_bytes!("../car-samples/tiny.car");
7
6
const LITTLE_CAR: &'static [u8] = include_bytes!("../car-samples/little.car");
8
7
const MIDSIZE_CAR: &'static [u8] = include_bytes!("../car-samples/midsize.car");
9
8
10
-
async fn test_car(bytes: &[u8], expected_records: usize, expected_sum: usize) {
11
-
let reader = CarReader::new(bytes).await.unwrap();
12
-
13
-
let root = reader
14
-
.header()
15
-
.roots()
16
-
.first()
17
-
.ok_or("missing root")
9
+
async fn test_car(
10
+
bytes: &[u8],
11
+
expected_records: usize,
12
+
expected_sum: usize,
13
+
expect_profile: bool,
14
+
) {
15
+
let mut driver = match Driver::load_car(bytes, |block| block.len(), 10 /* MiB */)
16
+
.await
18
17
.unwrap()
19
-
.clone();
20
-
21
-
let stream = std::pin::pin!(reader.stream());
22
-
23
-
let (_commit, v) =
24
-
repo_stream::drive::Vehicle::init(root, stream, |block| Ok::<_, Infallible>(block.len()))
25
-
.await
26
-
.unwrap();
27
-
let mut record_stream = std::pin::pin!(v.stream());
18
+
{
19
+
Driver::Memory(_commit, mem_driver) => mem_driver,
20
+
Driver::Disk(_) => panic!("too big"),
21
+
};
28
22
29
23
let mut records = 0;
30
24
let mut sum = 0;
31
25
let mut found_bsky_profile = false;
32
26
let mut prev_rkey = "".to_string();
33
-
while let Some((rkey, size)) = record_stream.try_next().await.unwrap() {
34
-
records += 1;
35
-
sum += size;
36
-
if rkey == "app.bsky.actor.profile/self" {
37
-
found_bsky_profile = true;
27
+
28
+
while let Some(pairs) = driver.next_chunk(256).await.unwrap() {
29
+
for (rkey, size) in pairs {
30
+
records += 1;
31
+
sum += size;
32
+
if rkey == "app.bsky.actor.profile/self" {
33
+
found_bsky_profile = true;
34
+
}
35
+
assert!(rkey > prev_rkey, "rkeys are streamed in order");
36
+
prev_rkey = rkey;
38
37
}
39
-
assert!(rkey > prev_rkey, "rkeys are streamed in order");
40
-
prev_rkey = rkey;
41
38
}
39
+
42
40
assert_eq!(records, expected_records);
43
41
assert_eq!(sum, expected_sum);
44
-
assert!(found_bsky_profile);
42
+
assert_eq!(found_bsky_profile, expect_profile);
43
+
}
44
+
45
+
#[tokio::test]
46
+
async fn test_empty_car() {
47
+
test_car(EMPTY_CAR, 0, 0, false).await
45
48
}
46
49
47
50
#[tokio::test]
48
51
async fn test_tiny_car() {
49
-
test_car(TINY_CAR, 8, 2071).await
52
+
test_car(TINY_CAR, 8, 2071, true).await
50
53
}
51
54
52
55
#[tokio::test]
53
56
async fn test_little_car() {
54
-
test_car(LITTLE_CAR, 278, 246960).await
57
+
test_car(LITTLE_CAR, 278, 246960, true).await
55
58
}
56
59
57
60
#[tokio::test]
58
61
async fn test_midsize_car() {
59
-
test_car(MIDSIZE_CAR, 11585, 3741393).await
62
+
test_car(MIDSIZE_CAR, 11585, 3741393, true).await
60
63
}