+133
-1114
Cargo.lock
+133
-1114
Cargo.lock
···
3
3
version = 4
4
4
5
5
[[package]]
6
-
name = "addr2line"
7
-
version = "0.25.1"
8
-
source = "registry+https://github.com/rust-lang/crates.io-index"
9
-
checksum = "1b5d307320b3181d6d7954e663bd7c774a838b8220fe0593c86d9fb09f498b4b"
10
-
dependencies = [
11
-
"gimli",
12
-
]
13
-
14
-
[[package]]
15
-
name = "adler2"
16
-
version = "2.0.1"
17
-
source = "registry+https://github.com/rust-lang/crates.io-index"
18
-
checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa"
19
-
20
-
[[package]]
21
-
name = "aho-corasick"
22
-
version = "1.1.3"
23
-
source = "registry+https://github.com/rust-lang/crates.io-index"
24
-
checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916"
25
-
dependencies = [
26
-
"memchr",
27
-
]
28
-
29
-
[[package]]
30
-
name = "anes"
31
-
version = "0.1.6"
32
-
source = "registry+https://github.com/rust-lang/crates.io-index"
33
-
checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299"
34
-
35
-
[[package]]
36
6
name = "anstream"
37
7
version = "0.6.21"
38
8
source = "registry+https://github.com/rust-lang/crates.io-index"
···
68
38
source = "registry+https://github.com/rust-lang/crates.io-index"
69
39
checksum = "9e231f6134f61b71076a3eab506c379d4f36122f2af15a9ff04415ea4c3339e2"
70
40
dependencies = [
71
-
"windows-sys 0.60.2",
41
+
"windows-sys",
72
42
]
73
43
74
44
[[package]]
···
79
49
dependencies = [
80
50
"anstyle",
81
51
"once_cell_polyfill",
82
-
"windows-sys 0.60.2",
83
-
]
84
-
85
-
[[package]]
86
-
name = "anyhow"
87
-
version = "1.0.100"
88
-
source = "registry+https://github.com/rust-lang/crates.io-index"
89
-
checksum = "a23eb6b1614318a8071c9b2521f36b424b2c83db5eb3a0fead4a6c0809af6e61"
90
-
91
-
[[package]]
92
-
name = "autocfg"
93
-
version = "1.5.0"
94
-
source = "registry+https://github.com/rust-lang/crates.io-index"
95
-
checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8"
96
-
97
-
[[package]]
98
-
name = "backtrace"
99
-
version = "0.3.76"
100
-
source = "registry+https://github.com/rust-lang/crates.io-index"
101
-
checksum = "bb531853791a215d7c62a30daf0dde835f381ab5de4589cfe7c649d2cbe92bd6"
102
-
dependencies = [
103
-
"addr2line",
104
-
"cfg-if",
105
-
"libc",
106
-
"miniz_oxide",
107
-
"object",
108
-
"rustc-demangle",
109
-
"windows-link",
110
-
]
111
-
112
-
[[package]]
113
-
name = "base-x"
114
-
version = "0.2.11"
115
-
source = "registry+https://github.com/rust-lang/crates.io-index"
116
-
checksum = "4cbbc9d0964165b47557570cce6c952866c2678457aca742aafc9fb771d30270"
117
-
118
-
[[package]]
119
-
name = "base256emoji"
120
-
version = "1.0.2"
121
-
source = "registry+https://github.com/rust-lang/crates.io-index"
122
-
checksum = "b5e9430d9a245a77c92176e649af6e275f20839a48389859d1661e9a128d077c"
123
-
dependencies = [
124
-
"const-str",
125
-
"match-lookup",
126
-
]
127
-
128
-
[[package]]
129
-
name = "bincode"
130
-
version = "2.0.1"
131
-
source = "registry+https://github.com/rust-lang/crates.io-index"
132
-
checksum = "36eaf5d7b090263e8150820482d5d93cd964a81e4019913c972f4edcc6edb740"
133
-
dependencies = [
134
-
"bincode_derive",
135
-
"serde",
136
-
"unty",
137
-
]
138
-
139
-
[[package]]
140
-
name = "bincode_derive"
141
-
version = "2.0.1"
142
-
source = "registry+https://github.com/rust-lang/crates.io-index"
143
-
checksum = "bf95709a440f45e986983918d0e8a1f30a9b1df04918fc828670606804ac3c09"
144
-
dependencies = [
145
-
"virtue",
52
+
"windows-sys",
146
53
]
147
54
148
55
[[package]]
···
152
59
checksum = "2261d10cca569e4643e526d8dc2e62e433cc8aba21ab764233731f8d369bf394"
153
60
154
61
[[package]]
155
-
name = "bumpalo"
156
-
version = "3.19.0"
62
+
name = "byteorder-lite"
63
+
version = "0.1.0"
157
64
source = "registry+https://github.com/rust-lang/crates.io-index"
158
-
checksum = "46c5e41b57b8bba42a04676d81cb89e9ee8e859a1a66f80a5a72e1cb76b34d43"
65
+
checksum = "8f1fe948ff07f4bd06c30984e69f5b4899c516a3ef74f34df92a2df2ab535495"
159
66
160
67
[[package]]
161
-
name = "bytes"
162
-
version = "1.10.1"
163
-
source = "registry+https://github.com/rust-lang/crates.io-index"
164
-
checksum = "d71b6127be86fdcfddb610f7182ac57211d4b18a3e9c82eb2d17662f2227ad6a"
165
-
166
-
[[package]]
167
-
name = "cast"
168
-
version = "0.3.0"
169
-
source = "registry+https://github.com/rust-lang/crates.io-index"
170
-
checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5"
171
-
172
-
[[package]]
173
-
name = "cbor4ii"
174
-
version = "0.2.14"
68
+
name = "byteview"
69
+
version = "0.10.0"
175
70
source = "registry+https://github.com/rust-lang/crates.io-index"
176
-
checksum = "b544cf8c89359205f4f990d0e6f3828db42df85b5dac95d09157a250eb0749c4"
177
-
dependencies = [
178
-
"serde",
179
-
]
71
+
checksum = "dda4398f387cc6395a3e93b3867cd9abda914c97a0b344d1eefb2e5c51785fca"
180
72
181
73
[[package]]
182
74
name = "cfg-if"
···
185
77
checksum = "2fd1289c04a9ea8cb22300a459a72a385d7c73d3259e2ed7dcb2af674838cfa9"
186
78
187
79
[[package]]
188
-
name = "ciborium"
189
-
version = "0.2.2"
190
-
source = "registry+https://github.com/rust-lang/crates.io-index"
191
-
checksum = "42e69ffd6f0917f5c029256a24d0161db17cea3997d185db0d35926308770f0e"
192
-
dependencies = [
193
-
"ciborium-io",
194
-
"ciborium-ll",
195
-
"serde",
196
-
]
197
-
198
-
[[package]]
199
-
name = "ciborium-io"
200
-
version = "0.2.2"
201
-
source = "registry+https://github.com/rust-lang/crates.io-index"
202
-
checksum = "05afea1e0a06c9be33d539b876f1ce3692f4afea2cb41f740e7743225ed1c757"
203
-
204
-
[[package]]
205
-
name = "ciborium-ll"
206
-
version = "0.2.2"
207
-
source = "registry+https://github.com/rust-lang/crates.io-index"
208
-
checksum = "57663b653d948a338bfb3eeba9bb2fd5fcfaecb9e199e87e1eda4d9e8b240fd9"
209
-
dependencies = [
210
-
"ciborium-io",
211
-
"half",
212
-
]
213
-
214
-
[[package]]
215
-
name = "cid"
216
-
version = "0.11.1"
217
-
source = "registry+https://github.com/rust-lang/crates.io-index"
218
-
checksum = "3147d8272e8fa0ccd29ce51194dd98f79ddfb8191ba9e3409884e751798acf3a"
219
-
dependencies = [
220
-
"core2",
221
-
"multibase",
222
-
"multihash",
223
-
"serde",
224
-
"serde_bytes",
225
-
"unsigned-varint 0.8.0",
226
-
]
227
-
228
-
[[package]]
229
80
name = "clap"
230
81
version = "4.5.48"
231
82
source = "registry+https://github.com/rust-lang/crates.io-index"
···
256
107
"heck",
257
108
"proc-macro2",
258
109
"quote",
259
-
"syn 2.0.106",
110
+
"syn",
260
111
]
261
112
262
113
[[package]]
···
272
123
checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75"
273
124
274
125
[[package]]
275
-
name = "const-str"
276
-
version = "0.4.3"
277
-
source = "registry+https://github.com/rust-lang/crates.io-index"
278
-
checksum = "2f421161cb492475f1661ddc9815a745a1c894592070661180fdec3d4872e9c3"
279
-
280
-
[[package]]
281
-
name = "core2"
282
-
version = "0.4.0"
283
-
source = "registry+https://github.com/rust-lang/crates.io-index"
284
-
checksum = "b49ba7ef1ad6107f8824dbe97de947cbaac53c44e7f9756a1fba0d37c1eec505"
285
-
dependencies = [
286
-
"memchr",
287
-
]
288
-
289
-
[[package]]
290
-
name = "criterion"
291
-
version = "0.7.0"
292
-
source = "registry+https://github.com/rust-lang/crates.io-index"
293
-
checksum = "e1c047a62b0cc3e145fa84415a3191f628e980b194c2755aa12300a4e6cbd928"
294
-
dependencies = [
295
-
"anes",
296
-
"cast",
297
-
"ciborium",
298
-
"clap",
299
-
"criterion-plot",
300
-
"itertools",
301
-
"num-traits",
302
-
"oorandom",
303
-
"plotters",
304
-
"rayon",
305
-
"regex",
306
-
"serde",
307
-
"serde_json",
308
-
"tinytemplate",
309
-
"tokio",
310
-
"walkdir",
311
-
]
312
-
313
-
[[package]]
314
-
name = "criterion-plot"
315
-
version = "0.6.0"
126
+
name = "compare"
127
+
version = "0.0.6"
316
128
source = "registry+https://github.com/rust-lang/crates.io-index"
317
-
checksum = "9b1bcc0dc7dfae599d84ad0b1a55f80cde8af3725da8313b528da95ef783e338"
318
-
dependencies = [
319
-
"cast",
320
-
"itertools",
321
-
]
129
+
checksum = "ea0095f6103c2a8b44acd6fd15960c801dafebf02e21940360833e0673f48ba7"
322
130
323
131
[[package]]
324
-
name = "crossbeam-deque"
325
-
version = "0.8.6"
132
+
name = "crossbeam-epoch"
133
+
version = "0.9.18"
326
134
source = "registry+https://github.com/rust-lang/crates.io-index"
327
-
checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51"
135
+
checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e"
328
136
dependencies = [
329
-
"crossbeam-epoch",
330
137
"crossbeam-utils",
331
138
]
332
139
333
140
[[package]]
334
-
name = "crossbeam-epoch"
335
-
version = "0.9.18"
141
+
name = "crossbeam-skiplist"
142
+
version = "0.1.3"
336
143
source = "registry+https://github.com/rust-lang/crates.io-index"
337
-
checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e"
144
+
checksum = "df29de440c58ca2cc6e587ec3d22347551a32435fbde9d2bff64e78a9ffa151b"
338
145
dependencies = [
146
+
"crossbeam-epoch",
339
147
"crossbeam-utils",
340
148
]
341
149
···
346
154
checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28"
347
155
348
156
[[package]]
349
-
name = "crunchy"
350
-
version = "0.2.4"
351
-
source = "registry+https://github.com/rust-lang/crates.io-index"
352
-
checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5"
353
-
354
-
[[package]]
355
-
name = "data-encoding"
356
-
version = "2.9.0"
357
-
source = "registry+https://github.com/rust-lang/crates.io-index"
358
-
checksum = "2a2330da5de22e8a3cb63252ce2abb30116bf5265e89c0e01bc17015ce30a476"
359
-
360
-
[[package]]
361
-
name = "data-encoding-macro"
362
-
version = "0.1.18"
157
+
name = "dashmap"
158
+
version = "6.1.0"
363
159
source = "registry+https://github.com/rust-lang/crates.io-index"
364
-
checksum = "47ce6c96ea0102f01122a185683611bd5ac8d99e62bc59dd12e6bda344ee673d"
160
+
checksum = "5041cc499144891f3790297212f32a74fb938e5136a14943f338ef9e0ae276cf"
365
161
dependencies = [
366
-
"data-encoding",
367
-
"data-encoding-macro-internal",
162
+
"cfg-if",
163
+
"crossbeam-utils",
164
+
"hashbrown 0.14.5",
165
+
"lock_api",
166
+
"once_cell",
167
+
"parking_lot_core",
368
168
]
369
169
370
170
[[package]]
371
-
name = "data-encoding-macro-internal"
372
-
version = "0.1.16"
373
-
source = "registry+https://github.com/rust-lang/crates.io-index"
374
-
checksum = "8d162beedaa69905488a8da94f5ac3edb4dd4788b732fadb7bd120b2625c1976"
375
-
dependencies = [
376
-
"data-encoding",
377
-
"syn 2.0.106",
378
-
]
379
-
380
-
[[package]]
381
-
name = "either"
382
-
version = "1.15.0"
383
-
source = "registry+https://github.com/rust-lang/crates.io-index"
384
-
checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719"
385
-
386
-
[[package]]
387
-
name = "env_filter"
388
-
version = "0.1.3"
171
+
name = "enum_dispatch"
172
+
version = "0.3.13"
389
173
source = "registry+https://github.com/rust-lang/crates.io-index"
390
-
checksum = "186e05a59d4c50738528153b83b0b0194d3a29507dfec16eccd4b342903397d0"
174
+
checksum = "aa18ce2bc66555b3218614519ac839ddb759a7d6720732f979ef8d13be147ecd"
391
175
dependencies = [
392
-
"log",
393
-
"regex",
176
+
"once_cell",
177
+
"proc-macro2",
178
+
"quote",
179
+
"syn",
394
180
]
395
181
396
182
[[package]]
397
-
name = "env_logger"
398
-
version = "0.11.8"
183
+
name = "equivalent"
184
+
version = "1.0.2"
399
185
source = "registry+https://github.com/rust-lang/crates.io-index"
400
-
checksum = "13c863f0904021b108aa8b2f55046443e6b1ebde8fd4a15c399893aae4fa069f"
401
-
dependencies = [
402
-
"anstream",
403
-
"anstyle",
404
-
"env_filter",
405
-
"jiff",
406
-
"log",
407
-
]
186
+
checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f"
408
187
409
188
[[package]]
410
189
name = "errno"
···
413
192
checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb"
414
193
dependencies = [
415
194
"libc",
416
-
"windows-sys 0.60.2",
195
+
"windows-sys",
417
196
]
418
197
419
198
[[package]]
420
-
name = "fallible-iterator"
421
-
version = "0.3.0"
422
-
source = "registry+https://github.com/rust-lang/crates.io-index"
423
-
checksum = "2acce4a10f12dc2fb14a218589d4f1f62ef011b2d0cc4b3cb1bba8e94da14649"
424
-
425
-
[[package]]
426
-
name = "fallible-streaming-iterator"
427
-
version = "0.1.9"
428
-
source = "registry+https://github.com/rust-lang/crates.io-index"
429
-
checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a"
430
-
431
-
[[package]]
432
199
name = "fastrand"
433
200
version = "2.3.0"
434
201
source = "registry+https://github.com/rust-lang/crates.io-index"
435
202
checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be"
436
203
437
204
[[package]]
438
-
name = "foldhash"
439
-
version = "0.1.5"
440
-
source = "registry+https://github.com/rust-lang/crates.io-index"
441
-
checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2"
442
-
443
-
[[package]]
444
-
name = "futures"
445
-
version = "0.3.31"
446
-
source = "registry+https://github.com/rust-lang/crates.io-index"
447
-
checksum = "65bc07b1a8bc7c85c5f2e110c476c7389b4554ba72af57d8445ea63a576b0876"
448
-
dependencies = [
449
-
"futures-channel",
450
-
"futures-core",
451
-
"futures-executor",
452
-
"futures-io",
453
-
"futures-sink",
454
-
"futures-task",
455
-
"futures-util",
456
-
]
457
-
458
-
[[package]]
459
-
name = "futures-channel"
460
-
version = "0.3.31"
461
-
source = "registry+https://github.com/rust-lang/crates.io-index"
462
-
checksum = "2dff15bf788c671c1934e366d07e30c1814a8ef514e1af724a602e8a2fbe1b10"
463
-
dependencies = [
464
-
"futures-core",
465
-
"futures-sink",
466
-
]
467
-
468
-
[[package]]
469
-
name = "futures-core"
470
-
version = "0.3.31"
471
-
source = "registry+https://github.com/rust-lang/crates.io-index"
472
-
checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e"
473
-
474
-
[[package]]
475
-
name = "futures-executor"
476
-
version = "0.3.31"
477
-
source = "registry+https://github.com/rust-lang/crates.io-index"
478
-
checksum = "1e28d1d997f585e54aebc3f97d39e72338912123a67330d723fdbb564d646c9f"
479
-
dependencies = [
480
-
"futures-core",
481
-
"futures-task",
482
-
"futures-util",
483
-
]
484
-
485
-
[[package]]
486
-
name = "futures-io"
487
-
version = "0.3.31"
488
-
source = "registry+https://github.com/rust-lang/crates.io-index"
489
-
checksum = "9e5c1b78ca4aae1ac06c48a526a655760685149f0d465d21f37abfe57ce075c6"
490
-
491
-
[[package]]
492
-
name = "futures-macro"
493
-
version = "0.3.31"
205
+
name = "fjall"
206
+
version = "3.0.1"
494
207
source = "registry+https://github.com/rust-lang/crates.io-index"
495
-
checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650"
208
+
checksum = "4f69637c02d38ad1b0f003101d0195a60368130aa17d9ef78b1557d265a22093"
496
209
dependencies = [
497
-
"proc-macro2",
498
-
"quote",
499
-
"syn 2.0.106",
210
+
"byteorder-lite",
211
+
"byteview",
212
+
"dashmap",
213
+
"flume",
214
+
"log",
215
+
"lsm-tree",
216
+
"lz4_flex",
217
+
"tempfile",
218
+
"xxhash-rust",
500
219
]
501
220
502
221
[[package]]
503
-
name = "futures-sink"
504
-
version = "0.3.31"
505
-
source = "registry+https://github.com/rust-lang/crates.io-index"
506
-
checksum = "e575fab7d1e0dcb8d0c7bcf9a63ee213816ab51902e6d244a95819acacf1d4f7"
507
-
508
-
[[package]]
509
-
name = "futures-task"
510
-
version = "0.3.31"
511
-
source = "registry+https://github.com/rust-lang/crates.io-index"
512
-
checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988"
513
-
514
-
[[package]]
515
-
name = "futures-util"
516
-
version = "0.3.31"
222
+
name = "flume"
223
+
version = "0.12.0"
517
224
source = "registry+https://github.com/rust-lang/crates.io-index"
518
-
checksum = "9fa08315bb612088cc391249efdc3bc77536f16c91f6cf495e6fbe85b20a4a81"
225
+
checksum = "5e139bc46ca777eb5efaf62df0ab8cc5fd400866427e56c68b22e414e53bd3be"
519
226
dependencies = [
520
-
"futures-channel",
521
-
"futures-core",
522
-
"futures-io",
523
-
"futures-macro",
524
-
"futures-sink",
525
-
"futures-task",
526
-
"memchr",
527
-
"pin-project-lite",
528
-
"pin-utils",
529
-
"slab",
227
+
"spin",
530
228
]
531
229
532
230
[[package]]
···
538
236
"cfg-if",
539
237
"libc",
540
238
"r-efi",
541
-
"wasi 0.14.7+wasi-0.2.4",
542
-
]
543
-
544
-
[[package]]
545
-
name = "gimli"
546
-
version = "0.32.3"
547
-
source = "registry+https://github.com/rust-lang/crates.io-index"
548
-
checksum = "e629b9b98ef3dd8afe6ca2bd0f89306cec16d43d907889945bc5d6687f2f13c7"
549
-
550
-
[[package]]
551
-
name = "half"
552
-
version = "2.7.0"
553
-
source = "registry+https://github.com/rust-lang/crates.io-index"
554
-
checksum = "e54c115d4f30f52c67202f079c5f9d8b49db4691f460fdb0b4c2e838261b2ba5"
555
-
dependencies = [
556
-
"cfg-if",
557
-
"crunchy",
558
-
"zerocopy",
239
+
"wasi",
559
240
]
560
241
561
242
[[package]]
562
243
name = "hashbrown"
563
-
version = "0.15.5"
244
+
version = "0.14.5"
564
245
source = "registry+https://github.com/rust-lang/crates.io-index"
565
-
checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1"
566
-
dependencies = [
567
-
"foldhash",
568
-
]
246
+
checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1"
569
247
570
248
[[package]]
571
-
name = "hashlink"
572
-
version = "0.10.0"
249
+
name = "hashbrown"
250
+
version = "0.16.1"
573
251
source = "registry+https://github.com/rust-lang/crates.io-index"
574
-
checksum = "7382cf6263419f2d8df38c55d7da83da5c18aef87fc7a7fc1fb1e344edfe14c1"
575
-
dependencies = [
576
-
"hashbrown",
577
-
]
252
+
checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100"
578
253
579
254
[[package]]
580
255
name = "heck"
···
583
258
checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
584
259
585
260
[[package]]
586
-
name = "io-uring"
587
-
version = "0.7.10"
588
-
source = "registry+https://github.com/rust-lang/crates.io-index"
589
-
checksum = "046fa2d4d00aea763528b4950358d0ead425372445dc8ff86312b3c69ff7727b"
590
-
dependencies = [
591
-
"bitflags",
592
-
"cfg-if",
593
-
"libc",
594
-
]
595
-
596
-
[[package]]
597
-
name = "ipld-core"
598
-
version = "0.4.2"
599
-
source = "registry+https://github.com/rust-lang/crates.io-index"
600
-
checksum = "104718b1cc124d92a6d01ca9c9258a7df311405debb3408c445a36452f9bf8db"
601
-
dependencies = [
602
-
"cid",
603
-
"serde",
604
-
"serde_bytes",
605
-
]
606
-
607
-
[[package]]
608
-
name = "iroh-car"
609
-
version = "0.5.1"
261
+
name = "interval-heap"
262
+
version = "0.0.5"
610
263
source = "registry+https://github.com/rust-lang/crates.io-index"
611
-
checksum = "cb7f8cd4cb9aa083fba8b52e921764252d0b4dcb1cd6d120b809dbfe1106e81a"
264
+
checksum = "11274e5e8e89b8607cfedc2910b6626e998779b48a019151c7604d0adcb86ac6"
612
265
dependencies = [
613
-
"anyhow",
614
-
"cid",
615
-
"futures",
616
-
"serde",
617
-
"serde_ipld_dagcbor",
618
-
"thiserror 1.0.69",
619
-
"tokio",
620
-
"unsigned-varint 0.7.2",
266
+
"compare",
621
267
]
622
268
623
269
[[package]]
···
627
273
checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf"
628
274
629
275
[[package]]
630
-
name = "itertools"
631
-
version = "0.13.0"
632
-
source = "registry+https://github.com/rust-lang/crates.io-index"
633
-
checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186"
634
-
dependencies = [
635
-
"either",
636
-
]
637
-
638
-
[[package]]
639
-
name = "itoa"
640
-
version = "1.0.15"
641
-
source = "registry+https://github.com/rust-lang/crates.io-index"
642
-
checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c"
643
-
644
-
[[package]]
645
-
name = "jiff"
646
-
version = "0.2.15"
647
-
source = "registry+https://github.com/rust-lang/crates.io-index"
648
-
checksum = "be1f93b8b1eb69c77f24bbb0afdf66f54b632ee39af40ca21c4365a1d7347e49"
649
-
dependencies = [
650
-
"jiff-static",
651
-
"log",
652
-
"portable-atomic",
653
-
"portable-atomic-util",
654
-
"serde",
655
-
]
656
-
657
-
[[package]]
658
-
name = "jiff-static"
659
-
version = "0.2.15"
660
-
source = "registry+https://github.com/rust-lang/crates.io-index"
661
-
checksum = "03343451ff899767262ec32146f6d559dd759fdadf42ff0e227c7c48f72594b4"
662
-
dependencies = [
663
-
"proc-macro2",
664
-
"quote",
665
-
"syn 2.0.106",
666
-
]
667
-
668
-
[[package]]
669
-
name = "js-sys"
670
-
version = "0.3.81"
671
-
source = "registry+https://github.com/rust-lang/crates.io-index"
672
-
checksum = "ec48937a97411dcb524a265206ccd4c90bb711fca92b2792c407f268825b9305"
673
-
dependencies = [
674
-
"once_cell",
675
-
"wasm-bindgen",
676
-
]
677
-
678
-
[[package]]
679
276
name = "libc"
680
277
version = "0.2.176"
681
278
source = "registry+https://github.com/rust-lang/crates.io-index"
682
279
checksum = "58f929b4d672ea937a23a1ab494143d968337a5f47e56d0815df1e0890ddf174"
683
-
684
-
[[package]]
685
-
name = "libsqlite3-sys"
686
-
version = "0.35.0"
687
-
source = "registry+https://github.com/rust-lang/crates.io-index"
688
-
checksum = "133c182a6a2c87864fe97778797e46c7e999672690dc9fa3ee8e241aa4a9c13f"
689
-
dependencies = [
690
-
"pkg-config",
691
-
"vcpkg",
692
-
]
693
280
694
281
[[package]]
695
282
name = "linux-raw-sys"
···
713
300
checksum = "34080505efa8e45a4b816c349525ebe327ceaa8559756f0356cba97ef3bf7432"
714
301
715
302
[[package]]
716
-
name = "match-lookup"
717
-
version = "0.1.1"
718
-
source = "registry+https://github.com/rust-lang/crates.io-index"
719
-
checksum = "1265724d8cb29dbbc2b0f06fffb8bf1a8c0cf73a78eede9ba73a4a66c52a981e"
720
-
dependencies = [
721
-
"proc-macro2",
722
-
"quote",
723
-
"syn 1.0.109",
724
-
]
725
-
726
-
[[package]]
727
-
name = "memchr"
728
-
version = "2.7.6"
729
-
source = "registry+https://github.com/rust-lang/crates.io-index"
730
-
checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273"
731
-
732
-
[[package]]
733
-
name = "miniz_oxide"
734
-
version = "0.8.9"
735
-
source = "registry+https://github.com/rust-lang/crates.io-index"
736
-
checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316"
737
-
dependencies = [
738
-
"adler2",
739
-
]
740
-
741
-
[[package]]
742
-
name = "mio"
743
-
version = "1.0.4"
744
-
source = "registry+https://github.com/rust-lang/crates.io-index"
745
-
checksum = "78bed444cc8a2160f01cbcf811ef18cac863ad68ae8ca62092e8db51d51c761c"
746
-
dependencies = [
747
-
"libc",
748
-
"wasi 0.11.1+wasi-snapshot-preview1",
749
-
"windows-sys 0.59.0",
750
-
]
751
-
752
-
[[package]]
753
-
name = "multibase"
754
-
version = "0.9.2"
755
-
source = "registry+https://github.com/rust-lang/crates.io-index"
756
-
checksum = "8694bb4835f452b0e3bb06dbebb1d6fc5385b6ca1caf2e55fd165c042390ec77"
757
-
dependencies = [
758
-
"base-x",
759
-
"base256emoji",
760
-
"data-encoding",
761
-
"data-encoding-macro",
762
-
]
763
-
764
-
[[package]]
765
-
name = "multihash"
766
-
version = "0.19.3"
303
+
name = "lsm-tree"
304
+
version = "3.0.1"
767
305
source = "registry+https://github.com/rust-lang/crates.io-index"
768
-
checksum = "6b430e7953c29dd6a09afc29ff0bb69c6e306329ee6794700aee27b76a1aea8d"
306
+
checksum = "b875f1dfe14f557f805b167fb9b0fc54c5560c7a4bd6ae02535b2846f276a8cb"
769
307
dependencies = [
770
-
"core2",
771
-
"serde",
772
-
"unsigned-varint 0.8.0",
308
+
"byteorder-lite",
309
+
"byteview",
310
+
"crossbeam-skiplist",
311
+
"enum_dispatch",
312
+
"interval-heap",
313
+
"log",
314
+
"lz4_flex",
315
+
"quick_cache",
316
+
"rustc-hash",
317
+
"self_cell",
318
+
"sfa",
319
+
"tempfile",
320
+
"varint-rs",
321
+
"xxhash-rust",
773
322
]
774
323
775
324
[[package]]
776
-
name = "num-traits"
777
-
version = "0.2.19"
325
+
name = "lz4_flex"
326
+
version = "0.11.5"
778
327
source = "registry+https://github.com/rust-lang/crates.io-index"
779
-
checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841"
328
+
checksum = "08ab2867e3eeeca90e844d1940eab391c9dc5228783db2ed999acbc0a9ed375a"
780
329
dependencies = [
781
-
"autocfg",
782
-
]
783
-
784
-
[[package]]
785
-
name = "object"
786
-
version = "0.37.3"
787
-
source = "registry+https://github.com/rust-lang/crates.io-index"
788
-
checksum = "ff76201f031d8863c38aa7f905eca4f53abbfa15f609db4277d44cd8938f33fe"
789
-
dependencies = [
790
-
"memchr",
330
+
"twox-hash",
791
331
]
792
332
793
333
[[package]]
···
803
343
checksum = "a4895175b425cb1f87721b59f0f286c2092bd4af812243672510e1ac53e2e0ad"
804
344
805
345
[[package]]
806
-
name = "oorandom"
807
-
version = "11.1.5"
808
-
source = "registry+https://github.com/rust-lang/crates.io-index"
809
-
checksum = "d6790f58c7ff633d8771f42965289203411a5e5c68388703c06e14f24770b41e"
810
-
811
-
[[package]]
812
-
name = "parking_lot"
813
-
version = "0.12.5"
814
-
source = "registry+https://github.com/rust-lang/crates.io-index"
815
-
checksum = "93857453250e3077bd71ff98b6a65ea6621a19bb0f559a85248955ac12c45a1a"
816
-
dependencies = [
817
-
"lock_api",
818
-
"parking_lot_core",
819
-
]
820
-
821
-
[[package]]
822
346
name = "parking_lot_core"
823
347
version = "0.9.12"
824
348
source = "registry+https://github.com/rust-lang/crates.io-index"
···
832
356
]
833
357
834
358
[[package]]
835
-
name = "pin-project-lite"
836
-
version = "0.2.16"
837
-
source = "registry+https://github.com/rust-lang/crates.io-index"
838
-
checksum = "3b3cff922bd51709b605d9ead9aa71031d81447142d828eb4a6eba76fe619f9b"
839
-
840
-
[[package]]
841
-
name = "pin-utils"
842
-
version = "0.1.0"
843
-
source = "registry+https://github.com/rust-lang/crates.io-index"
844
-
checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184"
845
-
846
-
[[package]]
847
-
name = "pkg-config"
848
-
version = "0.3.32"
849
-
source = "registry+https://github.com/rust-lang/crates.io-index"
850
-
checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c"
851
-
852
-
[[package]]
853
-
name = "plotters"
854
-
version = "0.3.7"
855
-
source = "registry+https://github.com/rust-lang/crates.io-index"
856
-
checksum = "5aeb6f403d7a4911efb1e33402027fc44f29b5bf6def3effcc22d7bb75f2b747"
857
-
dependencies = [
858
-
"num-traits",
859
-
"plotters-backend",
860
-
"plotters-svg",
861
-
"wasm-bindgen",
862
-
"web-sys",
863
-
]
864
-
865
-
[[package]]
866
-
name = "plotters-backend"
867
-
version = "0.3.7"
868
-
source = "registry+https://github.com/rust-lang/crates.io-index"
869
-
checksum = "df42e13c12958a16b3f7f4386b9ab1f3e7933914ecea48da7139435263a4172a"
870
-
871
-
[[package]]
872
-
name = "plotters-svg"
873
-
version = "0.3.7"
874
-
source = "registry+https://github.com/rust-lang/crates.io-index"
875
-
checksum = "51bae2ac328883f7acdfea3d66a7c35751187f870bc81f94563733a154d7a670"
876
-
dependencies = [
877
-
"plotters-backend",
878
-
]
879
-
880
-
[[package]]
881
-
name = "portable-atomic"
882
-
version = "1.11.1"
883
-
source = "registry+https://github.com/rust-lang/crates.io-index"
884
-
checksum = "f84267b20a16ea918e43c6a88433c2d54fa145c92a811b5b047ccbe153674483"
885
-
886
-
[[package]]
887
-
name = "portable-atomic-util"
888
-
version = "0.2.4"
359
+
name = "proc-macro2"
360
+
version = "1.0.101"
889
361
source = "registry+https://github.com/rust-lang/crates.io-index"
890
-
checksum = "d8a2f0d8d040d7848a709caf78912debcc3f33ee4b3cac47d73d1e1069e83507"
362
+
checksum = "89ae43fd86e4158d6db51ad8e2b80f313af9cc74f5c0e03ccb87de09998732de"
891
363
dependencies = [
892
-
"portable-atomic",
364
+
"unicode-ident",
893
365
]
894
366
895
367
[[package]]
896
-
name = "proc-macro2"
897
-
version = "1.0.101"
368
+
name = "quick_cache"
369
+
version = "0.6.18"
898
370
source = "registry+https://github.com/rust-lang/crates.io-index"
899
-
checksum = "89ae43fd86e4158d6db51ad8e2b80f313af9cc74f5c0e03ccb87de09998732de"
371
+
checksum = "7ada44a88ef953a3294f6eb55d2007ba44646015e18613d2f213016379203ef3"
900
372
dependencies = [
901
-
"unicode-ident",
373
+
"equivalent",
374
+
"hashbrown 0.16.1",
902
375
]
903
376
904
377
[[package]]
···
917
390
checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f"
918
391
919
392
[[package]]
920
-
name = "rayon"
921
-
version = "1.11.0"
922
-
source = "registry+https://github.com/rust-lang/crates.io-index"
923
-
checksum = "368f01d005bf8fd9b1206fb6fa653e6c4a81ceb1466406b81792d87c5677a58f"
924
-
dependencies = [
925
-
"either",
926
-
"rayon-core",
927
-
]
928
-
929
-
[[package]]
930
-
name = "rayon-core"
931
-
version = "1.13.0"
932
-
source = "registry+https://github.com/rust-lang/crates.io-index"
933
-
checksum = "22e18b0f0062d30d4230b2e85ff77fdfe4326feb054b9783a3460d8435c8ab91"
934
-
dependencies = [
935
-
"crossbeam-deque",
936
-
"crossbeam-utils",
937
-
]
938
-
939
-
[[package]]
940
393
name = "redox_syscall"
941
394
version = "0.5.18"
942
395
source = "registry+https://github.com/rust-lang/crates.io-index"
···
946
399
]
947
400
948
401
[[package]]
949
-
name = "regex"
950
-
version = "1.11.3"
951
-
source = "registry+https://github.com/rust-lang/crates.io-index"
952
-
checksum = "8b5288124840bee7b386bc413c487869b360b2b4ec421ea56425128692f2a82c"
953
-
dependencies = [
954
-
"aho-corasick",
955
-
"memchr",
956
-
"regex-automata",
957
-
"regex-syntax",
958
-
]
959
-
960
-
[[package]]
961
-
name = "regex-automata"
962
-
version = "0.4.11"
963
-
source = "registry+https://github.com/rust-lang/crates.io-index"
964
-
checksum = "833eb9ce86d40ef33cb1306d8accf7bc8ec2bfea4355cbdebb3df68b40925cad"
965
-
dependencies = [
966
-
"aho-corasick",
967
-
"memchr",
968
-
"regex-syntax",
969
-
]
970
-
971
-
[[package]]
972
-
name = "regex-syntax"
973
-
version = "0.8.6"
974
-
source = "registry+https://github.com/rust-lang/crates.io-index"
975
-
checksum = "caf4aa5b0f434c91fe5c7f1ecb6a5ece2130b02ad2a590589dda5146df959001"
976
-
977
-
[[package]]
978
402
name = "repo-stream"
979
-
version = "0.1.1"
403
+
version = "0.2.2"
980
404
dependencies = [
981
-
"bincode",
982
405
"clap",
983
-
"criterion",
984
-
"env_logger",
985
-
"futures",
986
-
"futures-core",
987
-
"ipld-core",
988
-
"iroh-car",
989
-
"log",
990
-
"multibase",
991
-
"rusqlite",
992
-
"serde",
993
-
"serde_bytes",
994
-
"serde_ipld_dagcbor",
995
-
"tempfile",
996
-
"thiserror 2.0.17",
997
-
"tokio",
406
+
"fjall",
998
407
]
999
408
1000
409
[[package]]
1001
-
name = "rusqlite"
1002
-
version = "0.37.0"
410
+
name = "rustc-hash"
411
+
version = "2.1.1"
1003
412
source = "registry+https://github.com/rust-lang/crates.io-index"
1004
-
checksum = "165ca6e57b20e1351573e3729b958bc62f0e48025386970b6e4d29e7a7e71f3f"
1005
-
dependencies = [
1006
-
"bitflags",
1007
-
"fallible-iterator",
1008
-
"fallible-streaming-iterator",
1009
-
"hashlink",
1010
-
"libsqlite3-sys",
1011
-
"smallvec",
1012
-
]
1013
-
1014
-
[[package]]
1015
-
name = "rustc-demangle"
1016
-
version = "0.1.26"
1017
-
source = "registry+https://github.com/rust-lang/crates.io-index"
1018
-
checksum = "56f7d92ca342cea22a06f2121d944b4fd82af56988c270852495420f961d4ace"
413
+
checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d"
1019
414
1020
415
[[package]]
1021
416
name = "rustix"
···
1027
422
"errno",
1028
423
"libc",
1029
424
"linux-raw-sys",
1030
-
"windows-sys 0.60.2",
1031
-
]
1032
-
1033
-
[[package]]
1034
-
name = "rustversion"
1035
-
version = "1.0.22"
1036
-
source = "registry+https://github.com/rust-lang/crates.io-index"
1037
-
checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d"
1038
-
1039
-
[[package]]
1040
-
name = "ryu"
1041
-
version = "1.0.20"
1042
-
source = "registry+https://github.com/rust-lang/crates.io-index"
1043
-
checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f"
1044
-
1045
-
[[package]]
1046
-
name = "same-file"
1047
-
version = "1.0.6"
1048
-
source = "registry+https://github.com/rust-lang/crates.io-index"
1049
-
checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502"
1050
-
dependencies = [
1051
-
"winapi-util",
425
+
"windows-sys",
1052
426
]
1053
427
1054
428
[[package]]
···
1058
432
checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
1059
433
1060
434
[[package]]
1061
-
name = "serde"
1062
-
version = "1.0.228"
1063
-
source = "registry+https://github.com/rust-lang/crates.io-index"
1064
-
checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e"
1065
-
dependencies = [
1066
-
"serde_core",
1067
-
"serde_derive",
1068
-
]
1069
-
1070
-
[[package]]
1071
-
name = "serde_bytes"
1072
-
version = "0.11.19"
1073
-
source = "registry+https://github.com/rust-lang/crates.io-index"
1074
-
checksum = "a5d440709e79d88e51ac01c4b72fc6cb7314017bb7da9eeff678aa94c10e3ea8"
1075
-
dependencies = [
1076
-
"serde",
1077
-
"serde_core",
1078
-
]
1079
-
1080
-
[[package]]
1081
-
name = "serde_core"
1082
-
version = "1.0.228"
1083
-
source = "registry+https://github.com/rust-lang/crates.io-index"
1084
-
checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad"
1085
-
dependencies = [
1086
-
"serde_derive",
1087
-
]
1088
-
1089
-
[[package]]
1090
-
name = "serde_derive"
1091
-
version = "1.0.228"
435
+
name = "self_cell"
436
+
version = "1.2.2"
1092
437
source = "registry+https://github.com/rust-lang/crates.io-index"
1093
-
checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79"
1094
-
dependencies = [
1095
-
"proc-macro2",
1096
-
"quote",
1097
-
"syn 2.0.106",
1098
-
]
438
+
checksum = "b12e76d157a900eb52e81bc6e9f3069344290341720e9178cde2407113ac8d89"
1099
439
1100
440
[[package]]
1101
-
name = "serde_ipld_dagcbor"
1102
-
version = "0.6.4"
1103
-
source = "registry+https://github.com/rust-lang/crates.io-index"
1104
-
checksum = "46182f4f08349a02b45c998ba3215d3f9de826246ba02bb9dddfe9a2a2100778"
1105
-
dependencies = [
1106
-
"cbor4ii",
1107
-
"ipld-core",
1108
-
"scopeguard",
1109
-
"serde",
1110
-
]
1111
-
1112
-
[[package]]
1113
-
name = "serde_json"
1114
-
version = "1.0.145"
1115
-
source = "registry+https://github.com/rust-lang/crates.io-index"
1116
-
checksum = "402a6f66d8c709116cf22f558eab210f5a50187f702eb4d7e5ef38d9a7f1c79c"
1117
-
dependencies = [
1118
-
"itoa",
1119
-
"memchr",
1120
-
"ryu",
1121
-
"serde",
1122
-
"serde_core",
1123
-
]
1124
-
1125
-
[[package]]
1126
-
name = "signal-hook-registry"
1127
-
version = "1.4.6"
441
+
name = "sfa"
442
+
version = "1.0.0"
1128
443
source = "registry+https://github.com/rust-lang/crates.io-index"
1129
-
checksum = "b2a4719bff48cee6b39d12c020eeb490953ad2443b7055bd0b21fca26bd8c28b"
444
+
checksum = "a1296838937cab56cd6c4eeeb8718ec777383700c33f060e2869867bd01d1175"
1130
445
dependencies = [
1131
-
"libc",
446
+
"byteorder-lite",
447
+
"log",
448
+
"xxhash-rust",
1132
449
]
1133
450
1134
451
[[package]]
1135
-
name = "slab"
1136
-
version = "0.4.11"
1137
-
source = "registry+https://github.com/rust-lang/crates.io-index"
1138
-
checksum = "7a2ae44ef20feb57a68b23d846850f861394c2e02dc425a50098ae8c90267589"
1139
-
1140
-
[[package]]
1141
452
name = "smallvec"
1142
453
version = "1.15.1"
1143
454
source = "registry+https://github.com/rust-lang/crates.io-index"
1144
455
checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03"
1145
456
1146
457
[[package]]
1147
-
name = "socket2"
1148
-
version = "0.6.0"
458
+
name = "spin"
459
+
version = "0.9.8"
1149
460
source = "registry+https://github.com/rust-lang/crates.io-index"
1150
-
checksum = "233504af464074f9d066d7b5416c5f9b894a5862a6506e306f7b816cdd6f1807"
461
+
checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67"
1151
462
dependencies = [
1152
-
"libc",
1153
-
"windows-sys 0.59.0",
463
+
"lock_api",
1154
464
]
1155
465
1156
466
[[package]]
···
1161
471
1162
472
[[package]]
1163
473
name = "syn"
1164
-
version = "1.0.109"
1165
-
source = "registry+https://github.com/rust-lang/crates.io-index"
1166
-
checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237"
1167
-
dependencies = [
1168
-
"proc-macro2",
1169
-
"quote",
1170
-
"unicode-ident",
1171
-
]
1172
-
1173
-
[[package]]
1174
-
name = "syn"
1175
474
version = "2.0.106"
1176
475
source = "registry+https://github.com/rust-lang/crates.io-index"
1177
476
checksum = "ede7c438028d4436d71104916910f5bb611972c5cfd7f89b8300a8186e6fada6"
···
1191
490
"getrandom",
1192
491
"once_cell",
1193
492
"rustix",
1194
-
"windows-sys 0.60.2",
493
+
"windows-sys",
1195
494
]
1196
495
1197
496
[[package]]
1198
-
name = "thiserror"
1199
-
version = "1.0.69"
497
+
name = "twox-hash"
498
+
version = "2.1.2"
1200
499
source = "registry+https://github.com/rust-lang/crates.io-index"
1201
-
checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52"
1202
-
dependencies = [
1203
-
"thiserror-impl 1.0.69",
1204
-
]
1205
-
1206
-
[[package]]
1207
-
name = "thiserror"
1208
-
version = "2.0.17"
1209
-
source = "registry+https://github.com/rust-lang/crates.io-index"
1210
-
checksum = "f63587ca0f12b72a0600bcba1d40081f830876000bb46dd2337a3051618f4fc8"
1211
-
dependencies = [
1212
-
"thiserror-impl 2.0.17",
1213
-
]
1214
-
1215
-
[[package]]
1216
-
name = "thiserror-impl"
1217
-
version = "1.0.69"
1218
-
source = "registry+https://github.com/rust-lang/crates.io-index"
1219
-
checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1"
1220
-
dependencies = [
1221
-
"proc-macro2",
1222
-
"quote",
1223
-
"syn 2.0.106",
1224
-
]
1225
-
1226
-
[[package]]
1227
-
name = "thiserror-impl"
1228
-
version = "2.0.17"
1229
-
source = "registry+https://github.com/rust-lang/crates.io-index"
1230
-
checksum = "3ff15c8ecd7de3849db632e14d18d2571fa09dfc5ed93479bc4485c7a517c913"
1231
-
dependencies = [
1232
-
"proc-macro2",
1233
-
"quote",
1234
-
"syn 2.0.106",
1235
-
]
1236
-
1237
-
[[package]]
1238
-
name = "tinytemplate"
1239
-
version = "1.2.1"
1240
-
source = "registry+https://github.com/rust-lang/crates.io-index"
1241
-
checksum = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc"
1242
-
dependencies = [
1243
-
"serde",
1244
-
"serde_json",
1245
-
]
1246
-
1247
-
[[package]]
1248
-
name = "tokio"
1249
-
version = "1.47.1"
1250
-
source = "registry+https://github.com/rust-lang/crates.io-index"
1251
-
checksum = "89e49afdadebb872d3145a5638b59eb0691ea23e46ca484037cfab3b76b95038"
1252
-
dependencies = [
1253
-
"backtrace",
1254
-
"bytes",
1255
-
"io-uring",
1256
-
"libc",
1257
-
"mio",
1258
-
"parking_lot",
1259
-
"pin-project-lite",
1260
-
"signal-hook-registry",
1261
-
"slab",
1262
-
"socket2",
1263
-
"tokio-macros",
1264
-
"windows-sys 0.59.0",
1265
-
]
1266
-
1267
-
[[package]]
1268
-
name = "tokio-macros"
1269
-
version = "2.5.0"
1270
-
source = "registry+https://github.com/rust-lang/crates.io-index"
1271
-
checksum = "6e06d43f1345a3bcd39f6a56dbb7dcab2ba47e68e8ac134855e7e2bdbaf8cab8"
1272
-
dependencies = [
1273
-
"proc-macro2",
1274
-
"quote",
1275
-
"syn 2.0.106",
1276
-
]
500
+
checksum = "9ea3136b675547379c4bd395ca6b938e5ad3c3d20fad76e7fe85f9e0d011419c"
1277
501
1278
502
[[package]]
1279
503
name = "unicode-ident"
···
1282
506
checksum = "f63a545481291138910575129486daeaf8ac54aee4387fe7906919f7830c7d9d"
1283
507
1284
508
[[package]]
1285
-
name = "unsigned-varint"
1286
-
version = "0.7.2"
1287
-
source = "registry+https://github.com/rust-lang/crates.io-index"
1288
-
checksum = "6889a77d49f1f013504cec6bf97a2c730394adedaeb1deb5ea08949a50541105"
1289
-
1290
-
[[package]]
1291
-
name = "unsigned-varint"
1292
-
version = "0.8.0"
1293
-
source = "registry+https://github.com/rust-lang/crates.io-index"
1294
-
checksum = "eb066959b24b5196ae73cb057f45598450d2c5f71460e98c49b738086eff9c06"
1295
-
1296
-
[[package]]
1297
-
name = "unty"
1298
-
version = "0.0.4"
1299
-
source = "registry+https://github.com/rust-lang/crates.io-index"
1300
-
checksum = "6d49784317cd0d1ee7ec5c716dd598ec5b4483ea832a2dced265471cc0f690ae"
1301
-
1302
-
[[package]]
1303
509
name = "utf8parse"
1304
510
version = "0.2.2"
1305
511
source = "registry+https://github.com/rust-lang/crates.io-index"
1306
512
checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821"
1307
513
1308
514
[[package]]
1309
-
name = "vcpkg"
1310
-
version = "0.2.15"
1311
-
source = "registry+https://github.com/rust-lang/crates.io-index"
1312
-
checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426"
1313
-
1314
-
[[package]]
1315
-
name = "virtue"
1316
-
version = "0.0.18"
1317
-
source = "registry+https://github.com/rust-lang/crates.io-index"
1318
-
checksum = "051eb1abcf10076295e815102942cc58f9d5e3b4560e46e53c21e8ff6f3af7b1"
1319
-
1320
-
[[package]]
1321
-
name = "walkdir"
1322
-
version = "2.5.0"
1323
-
source = "registry+https://github.com/rust-lang/crates.io-index"
1324
-
checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b"
1325
-
dependencies = [
1326
-
"same-file",
1327
-
"winapi-util",
1328
-
]
1329
-
1330
-
[[package]]
1331
-
name = "wasi"
1332
-
version = "0.11.1+wasi-snapshot-preview1"
515
+
name = "varint-rs"
516
+
version = "2.2.0"
1333
517
source = "registry+https://github.com/rust-lang/crates.io-index"
1334
-
checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b"
518
+
checksum = "8f54a172d0620933a27a4360d3db3e2ae0dd6cceae9730751a036bbf182c4b23"
1335
519
1336
520
[[package]]
1337
521
name = "wasi"
···
1352
536
]
1353
537
1354
538
[[package]]
1355
-
name = "wasm-bindgen"
1356
-
version = "0.2.104"
1357
-
source = "registry+https://github.com/rust-lang/crates.io-index"
1358
-
checksum = "c1da10c01ae9f1ae40cbfac0bac3b1e724b320abfcf52229f80b547c0d250e2d"
1359
-
dependencies = [
1360
-
"cfg-if",
1361
-
"once_cell",
1362
-
"rustversion",
1363
-
"wasm-bindgen-macro",
1364
-
"wasm-bindgen-shared",
1365
-
]
1366
-
1367
-
[[package]]
1368
-
name = "wasm-bindgen-backend"
1369
-
version = "0.2.104"
1370
-
source = "registry+https://github.com/rust-lang/crates.io-index"
1371
-
checksum = "671c9a5a66f49d8a47345ab942e2cb93c7d1d0339065d4f8139c486121b43b19"
1372
-
dependencies = [
1373
-
"bumpalo",
1374
-
"log",
1375
-
"proc-macro2",
1376
-
"quote",
1377
-
"syn 2.0.106",
1378
-
"wasm-bindgen-shared",
1379
-
]
1380
-
1381
-
[[package]]
1382
-
name = "wasm-bindgen-macro"
1383
-
version = "0.2.104"
1384
-
source = "registry+https://github.com/rust-lang/crates.io-index"
1385
-
checksum = "7ca60477e4c59f5f2986c50191cd972e3a50d8a95603bc9434501cf156a9a119"
1386
-
dependencies = [
1387
-
"quote",
1388
-
"wasm-bindgen-macro-support",
1389
-
]
1390
-
1391
-
[[package]]
1392
-
name = "wasm-bindgen-macro-support"
1393
-
version = "0.2.104"
1394
-
source = "registry+https://github.com/rust-lang/crates.io-index"
1395
-
checksum = "9f07d2f20d4da7b26400c9f4a0511e6e0345b040694e8a75bd41d578fa4421d7"
1396
-
dependencies = [
1397
-
"proc-macro2",
1398
-
"quote",
1399
-
"syn 2.0.106",
1400
-
"wasm-bindgen-backend",
1401
-
"wasm-bindgen-shared",
1402
-
]
1403
-
1404
-
[[package]]
1405
-
name = "wasm-bindgen-shared"
1406
-
version = "0.2.104"
1407
-
source = "registry+https://github.com/rust-lang/crates.io-index"
1408
-
checksum = "bad67dc8b2a1a6e5448428adec4c3e84c43e561d8c9ee8a9e5aabeb193ec41d1"
1409
-
dependencies = [
1410
-
"unicode-ident",
1411
-
]
1412
-
1413
-
[[package]]
1414
-
name = "web-sys"
1415
-
version = "0.3.81"
1416
-
source = "registry+https://github.com/rust-lang/crates.io-index"
1417
-
checksum = "9367c417a924a74cae129e6a2ae3b47fabb1f8995595ab474029da749a8be120"
1418
-
dependencies = [
1419
-
"js-sys",
1420
-
"wasm-bindgen",
1421
-
]
1422
-
1423
-
[[package]]
1424
-
name = "winapi-util"
1425
-
version = "0.1.11"
1426
-
source = "registry+https://github.com/rust-lang/crates.io-index"
1427
-
checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22"
1428
-
dependencies = [
1429
-
"windows-sys 0.60.2",
1430
-
]
1431
-
1432
-
[[package]]
1433
539
name = "windows-link"
1434
540
version = "0.2.1"
1435
541
source = "registry+https://github.com/rust-lang/crates.io-index"
···
1437
543
1438
544
[[package]]
1439
545
name = "windows-sys"
1440
-
version = "0.59.0"
1441
-
source = "registry+https://github.com/rust-lang/crates.io-index"
1442
-
checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b"
1443
-
dependencies = [
1444
-
"windows-targets 0.52.6",
1445
-
]
1446
-
1447
-
[[package]]
1448
-
name = "windows-sys"
1449
546
version = "0.60.2"
1450
547
source = "registry+https://github.com/rust-lang/crates.io-index"
1451
548
checksum = "f2f500e4d28234f72040990ec9d39e3a6b950f9f22d3dba18416c35882612bcb"
1452
549
dependencies = [
1453
-
"windows-targets 0.53.5",
1454
-
]
1455
-
1456
-
[[package]]
1457
-
name = "windows-targets"
1458
-
version = "0.52.6"
1459
-
source = "registry+https://github.com/rust-lang/crates.io-index"
1460
-
checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973"
1461
-
dependencies = [
1462
-
"windows_aarch64_gnullvm 0.52.6",
1463
-
"windows_aarch64_msvc 0.52.6",
1464
-
"windows_i686_gnu 0.52.6",
1465
-
"windows_i686_gnullvm 0.52.6",
1466
-
"windows_i686_msvc 0.52.6",
1467
-
"windows_x86_64_gnu 0.52.6",
1468
-
"windows_x86_64_gnullvm 0.52.6",
1469
-
"windows_x86_64_msvc 0.52.6",
550
+
"windows-targets",
1470
551
]
1471
552
1472
553
[[package]]
···
1476
557
checksum = "4945f9f551b88e0d65f3db0bc25c33b8acea4d9e41163edf90dcd0b19f9069f3"
1477
558
dependencies = [
1478
559
"windows-link",
1479
-
"windows_aarch64_gnullvm 0.53.1",
1480
-
"windows_aarch64_msvc 0.53.1",
1481
-
"windows_i686_gnu 0.53.1",
1482
-
"windows_i686_gnullvm 0.53.1",
1483
-
"windows_i686_msvc 0.53.1",
1484
-
"windows_x86_64_gnu 0.53.1",
1485
-
"windows_x86_64_gnullvm 0.53.1",
1486
-
"windows_x86_64_msvc 0.53.1",
560
+
"windows_aarch64_gnullvm",
561
+
"windows_aarch64_msvc",
562
+
"windows_i686_gnu",
563
+
"windows_i686_gnullvm",
564
+
"windows_i686_msvc",
565
+
"windows_x86_64_gnu",
566
+
"windows_x86_64_gnullvm",
567
+
"windows_x86_64_msvc",
1487
568
]
1488
569
1489
570
[[package]]
1490
571
name = "windows_aarch64_gnullvm"
1491
-
version = "0.52.6"
1492
-
source = "registry+https://github.com/rust-lang/crates.io-index"
1493
-
checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3"
1494
-
1495
-
[[package]]
1496
-
name = "windows_aarch64_gnullvm"
1497
572
version = "0.53.1"
1498
573
source = "registry+https://github.com/rust-lang/crates.io-index"
1499
574
checksum = "a9d8416fa8b42f5c947f8482c43e7d89e73a173cead56d044f6a56104a6d1b53"
1500
575
1501
576
[[package]]
1502
577
name = "windows_aarch64_msvc"
1503
-
version = "0.52.6"
1504
-
source = "registry+https://github.com/rust-lang/crates.io-index"
1505
-
checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469"
1506
-
1507
-
[[package]]
1508
-
name = "windows_aarch64_msvc"
1509
578
version = "0.53.1"
1510
579
source = "registry+https://github.com/rust-lang/crates.io-index"
1511
580
checksum = "b9d782e804c2f632e395708e99a94275910eb9100b2114651e04744e9b125006"
1512
581
1513
582
[[package]]
1514
583
name = "windows_i686_gnu"
1515
-
version = "0.52.6"
1516
-
source = "registry+https://github.com/rust-lang/crates.io-index"
1517
-
checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b"
1518
-
1519
-
[[package]]
1520
-
name = "windows_i686_gnu"
1521
584
version = "0.53.1"
1522
585
source = "registry+https://github.com/rust-lang/crates.io-index"
1523
586
checksum = "960e6da069d81e09becb0ca57a65220ddff016ff2d6af6a223cf372a506593a3"
1524
587
1525
588
[[package]]
1526
589
name = "windows_i686_gnullvm"
1527
-
version = "0.52.6"
1528
-
source = "registry+https://github.com/rust-lang/crates.io-index"
1529
-
checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66"
1530
-
1531
-
[[package]]
1532
-
name = "windows_i686_gnullvm"
1533
590
version = "0.53.1"
1534
591
source = "registry+https://github.com/rust-lang/crates.io-index"
1535
592
checksum = "fa7359d10048f68ab8b09fa71c3daccfb0e9b559aed648a8f95469c27057180c"
1536
-
1537
-
[[package]]
1538
-
name = "windows_i686_msvc"
1539
-
version = "0.52.6"
1540
-
source = "registry+https://github.com/rust-lang/crates.io-index"
1541
-
checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66"
1542
593
1543
594
[[package]]
1544
595
name = "windows_i686_msvc"
···
1548
599
1549
600
[[package]]
1550
601
name = "windows_x86_64_gnu"
1551
-
version = "0.52.6"
1552
-
source = "registry+https://github.com/rust-lang/crates.io-index"
1553
-
checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78"
1554
-
1555
-
[[package]]
1556
-
name = "windows_x86_64_gnu"
1557
602
version = "0.53.1"
1558
603
source = "registry+https://github.com/rust-lang/crates.io-index"
1559
604
checksum = "9c3842cdd74a865a8066ab39c8a7a473c0778a3f29370b5fd6b4b9aa7df4a499"
1560
605
1561
606
[[package]]
1562
607
name = "windows_x86_64_gnullvm"
1563
-
version = "0.52.6"
1564
-
source = "registry+https://github.com/rust-lang/crates.io-index"
1565
-
checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d"
1566
-
1567
-
[[package]]
1568
-
name = "windows_x86_64_gnullvm"
1569
608
version = "0.53.1"
1570
609
source = "registry+https://github.com/rust-lang/crates.io-index"
1571
610
checksum = "0ffa179e2d07eee8ad8f57493436566c7cc30ac536a3379fdf008f47f6bb7ae1"
1572
-
1573
-
[[package]]
1574
-
name = "windows_x86_64_msvc"
1575
-
version = "0.52.6"
1576
-
source = "registry+https://github.com/rust-lang/crates.io-index"
1577
-
checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
1578
611
1579
612
[[package]]
1580
613
name = "windows_x86_64_msvc"
···
1589
622
checksum = "f17a85883d4e6d00e8a97c586de764dabcc06133f7f1d55dce5cdc070ad7fe59"
1590
623
1591
624
[[package]]
1592
-
name = "zerocopy"
1593
-
version = "0.8.27"
625
+
name = "xxhash-rust"
626
+
version = "0.8.15"
1594
627
source = "registry+https://github.com/rust-lang/crates.io-index"
1595
-
checksum = "0894878a5fa3edfd6da3f88c4805f4c8558e2b996227a3d864f47fe11e38282c"
1596
-
dependencies = [
1597
-
"zerocopy-derive",
1598
-
]
1599
-
1600
-
[[package]]
1601
-
name = "zerocopy-derive"
1602
-
version = "0.8.27"
1603
-
source = "registry+https://github.com/rust-lang/crates.io-index"
1604
-
checksum = "88d2b8d9c68ad2b9e4340d7832716a4d21a22a1154777ad56ea55c51a9cf3831"
1605
-
dependencies = [
1606
-
"proc-macro2",
1607
-
"quote",
1608
-
"syn 2.0.106",
1609
-
]
628
+
checksum = "fdd20c5420375476fbd4394763288da7eb0cc0b8c11deed431a91562af7335d3"
+3
-36
Cargo.toml
+3
-36
Cargo.toml
···
1
1
[package]
2
2
name = "repo-stream"
3
-
version = "0.1.1"
3
+
version = "0.2.2"
4
4
edition = "2024"
5
5
license = "MIT OR Apache-2.0"
6
-
description = "Fast and robust atproto CAR file processing in rust"
6
+
description = "A robust CAR file -> MST walker for atproto"
7
7
repository = "https://tangled.org/@microcosm.blue/repo-stream"
8
8
9
9
[dependencies]
10
-
bincode = { version = "2.0.1", features = ["serde"] }
11
-
futures = "0.3.31"
12
-
futures-core = "0.3.31"
13
-
ipld-core = { version = "0.4.2", features = ["serde"] }
14
-
iroh-car = "0.5.1"
15
-
log = "0.4.28"
16
-
multibase = "0.9.2"
17
-
rusqlite = "0.37.0"
18
-
serde = { version = "1.0.228", features = ["derive"] }
19
-
serde_bytes = "0.11.19"
20
-
serde_ipld_dagcbor = "0.6.4"
21
-
thiserror = "2.0.17"
22
-
tokio = { version = "1.47.1", features = ["rt", "sync"] }
23
-
24
-
[dev-dependencies]
10
+
fjall = "3.0.1"
25
11
clap = { version = "4.5.48", features = ["derive"] }
26
-
criterion = { version = "0.7.0", features = ["async_tokio"] }
27
-
env_logger = "0.11.8"
28
-
multibase = "0.9.2"
29
-
tempfile = "3.23.0"
30
-
tokio = { version = "1.47.1", features = ["full"] }
31
12
32
-
[profile.profiling]
33
-
inherits = "release"
34
-
debug = true
35
-
36
-
# [profile.release]
37
-
# debug = true
38
-
39
-
[[bench]]
40
-
name = "non-huge-cars"
41
-
harness = false
42
-
43
-
[[bench]]
44
-
name = "huge-car"
45
-
harness = false
+12
-21
benches/huge-car.rs
+12
-21
benches/huge-car.rs
···
1
1
extern crate repo_stream;
2
-
use futures::TryStreamExt;
3
-
use iroh_car::CarReader;
4
-
use std::convert::Infallible;
2
+
use repo_stream::Driver;
5
3
use std::path::{Path, PathBuf};
6
4
7
5
use criterion::{Criterion, criterion_group, criterion_main};
···
20
18
});
21
19
}
22
20
23
-
async fn drive_car(filename: impl AsRef<Path>) {
21
+
async fn drive_car(filename: impl AsRef<Path>) -> usize {
24
22
let reader = tokio::fs::File::open(filename).await.unwrap();
25
23
let reader = tokio::io::BufReader::new(reader);
26
-
let reader = CarReader::new(reader).await.unwrap();
27
24
28
-
let root = reader
29
-
.header()
30
-
.roots()
31
-
.first()
32
-
.ok_or("missing root")
25
+
let mut driver = match Driver::load_car(reader, |block| block.len(), 1024)
26
+
.await
33
27
.unwrap()
34
-
.clone();
35
-
36
-
let stream = std::pin::pin!(reader.stream());
37
-
38
-
let (_commit, v) =
39
-
repo_stream::drive::Vehicle::init(root, stream, |block| Ok::<_, Infallible>(block.len()))
40
-
.await
41
-
.unwrap();
42
-
let mut record_stream = std::pin::pin!(v.stream());
28
+
{
29
+
Driver::Memory(_, mem_driver) => mem_driver,
30
+
Driver::Disk(_) => panic!("not doing disk for benchmark"),
31
+
};
43
32
44
-
while let Some(_) = record_stream.try_next().await.unwrap() {
45
-
// just here for the drive
33
+
let mut n = 0;
34
+
while let Some(pairs) = driver.next_chunk(256).await.unwrap() {
35
+
n += pairs.len();
46
36
}
37
+
n
47
38
}
48
39
49
40
criterion_group!(benches, criterion_benchmark);
+16
-22
benches/non-huge-cars.rs
+16
-22
benches/non-huge-cars.rs
···
1
1
extern crate repo_stream;
2
-
use futures::TryStreamExt;
3
-
use iroh_car::CarReader;
4
-
use std::convert::Infallible;
2
+
use repo_stream::Driver;
5
3
6
4
use criterion::{Criterion, criterion_group, criterion_main};
7
5
6
+
const EMPTY_CAR: &'static [u8] = include_bytes!("../car-samples/empty.car");
8
7
const TINY_CAR: &'static [u8] = include_bytes!("../car-samples/tiny.car");
9
8
const LITTLE_CAR: &'static [u8] = include_bytes!("../car-samples/little.car");
10
9
const MIDSIZE_CAR: &'static [u8] = include_bytes!("../car-samples/midsize.car");
···
15
14
.build()
16
15
.expect("Creating runtime failed");
17
16
17
+
c.bench_function("empty-car", |b| {
18
+
b.to_async(&rt).iter(async || drive_car(EMPTY_CAR).await)
19
+
});
18
20
c.bench_function("tiny-car", |b| {
19
21
b.to_async(&rt).iter(async || drive_car(TINY_CAR).await)
20
22
});
···
26
28
});
27
29
}
28
30
29
-
async fn drive_car(bytes: &[u8]) {
30
-
let reader = CarReader::new(bytes).await.unwrap();
31
-
32
-
let root = reader
33
-
.header()
34
-
.roots()
35
-
.first()
36
-
.ok_or("missing root")
31
+
async fn drive_car(bytes: &[u8]) -> usize {
32
+
let mut driver = match Driver::load_car(bytes, |block| block.len(), 32)
33
+
.await
37
34
.unwrap()
38
-
.clone();
39
-
40
-
let stream = std::pin::pin!(reader.stream());
35
+
{
36
+
Driver::Memory(_, mem_driver) => mem_driver,
37
+
Driver::Disk(_) => panic!("not benching big cars here"),
38
+
};
41
39
42
-
let (_commit, v) =
43
-
repo_stream::drive::Vehicle::init(root, stream, |block| Ok::<_, Infallible>(block.len()))
44
-
.await
45
-
.unwrap();
46
-
let mut record_stream = std::pin::pin!(v.stream());
47
-
48
-
while let Some(_) = record_stream.try_next().await.unwrap() {
49
-
// just here for the drive
40
+
let mut n = 0;
41
+
while let Some(pairs) = driver.next_chunk(256).await.unwrap() {
42
+
n += pairs.len();
50
43
}
44
+
n
51
45
}
52
46
53
47
criterion_group!(benches, criterion_benchmark);
car-samples/empty.car
car-samples/empty.car
This is a binary file and will not be displayed.
+26
-53
examples/disk-read-file/main.rs
+26
-53
examples/disk-read-file/main.rs
···
1
-
extern crate repo_stream;
2
1
use clap::Parser;
3
-
use repo_stream::drive::Processable;
4
-
use serde::{Deserialize, Serialize};
5
-
use std::path::PathBuf;
6
-
7
-
type Result<T> = std::result::Result<T, Box<dyn std::error::Error>>;
2
+
use fjall::{Database, KeyspaceCreateOptions};
3
+
use std::{path::PathBuf, collections::BTreeMap};
8
4
9
5
#[derive(Debug, Parser)]
10
6
struct Args {
11
7
#[arg()]
12
-
car: PathBuf,
13
-
#[arg()]
14
-
tmpfile: PathBuf,
8
+
db_path: PathBuf,
15
9
}
16
10
17
-
#[derive(Clone, Serialize, Deserialize)]
18
-
struct S(usize);
11
+
fn main() -> Result<(), Box<dyn std::error::Error>> {
12
+
let Args { db_path } = Args::parse();
19
13
20
-
impl Processable for S {
21
-
fn get_size(&self) -> usize {
22
-
0 // no additional space taken, just its stack size (newtype is free)
14
+
let db = Database::builder(db_path).open()?;
15
+
let ks = db.keyspace("z", KeyspaceCreateOptions::default)?;
16
+
let mut seen_keys: BTreeMap<Vec<u8>, usize> = BTreeMap::default();
17
+
18
+
print!("writing...");
19
+
for i in 0..250_000_usize {
20
+
let k = i.to_be_bytes().to_vec();
21
+
ks.insert(k.clone(), vec![0xAA; 256])?;
22
+
seen_keys.insert(k, i);
23
23
}
24
-
}
25
24
26
-
#[tokio::main]
27
-
async fn main() -> Result<()> {
28
-
env_logger::init();
25
+
println!(" done. checking keys...");
29
26
30
-
let Args { car, tmpfile } = Args::parse();
31
-
let reader = tokio::fs::File::open(car).await?;
32
-
let reader = tokio::io::BufReader::new(reader);
27
+
// remove every seen key that fjall actually has, to see what's left
28
+
for guard in ks.iter() {
29
+
seen_keys.remove(guard.key()?.as_ref());
30
+
}
33
31
34
-
// let kb = 2_usize.pow(10);
35
-
let mb = 2_usize.pow(20);
36
-
37
-
let limit_mb = 32;
38
-
39
-
let driver = match repo_stream::drive::load_car(reader, |block| S(block.len()), 10 * mb).await?
40
-
{
41
-
repo_stream::drive::Vehicle::Lil(_, _) => panic!("try this on a bigger car"),
42
-
repo_stream::drive::Vehicle::Big(big_stuff) => {
43
-
let disk_store = repo_stream::disk::SqliteStore::new(tmpfile.clone(), limit_mb);
44
-
let (commit, driver) = big_stuff.finish_loading(disk_store).await?;
45
-
log::warn!("big: {:?}", commit);
46
-
driver
32
+
// report the result
33
+
if seen_keys.len() == 0 {
34
+
println!("[ OK ] all keys found");
35
+
} else {
36
+
println!("[FAIL] fjall did not have all seen_keys:");
37
+
for (k, i) in seen_keys {
38
+
println!(" insert #{i} missing, key bytes: {k:?}");
47
39
}
48
-
};
49
-
50
-
let mut n = 0;
51
-
let (mut rx, worker) = driver.rx(512).await?;
52
-
53
-
log::debug!("walking...");
54
-
while let Some(pairs) = rx.recv().await {
55
-
n += pairs.len();
56
40
}
57
-
log::debug!("done walking! joining...");
58
-
59
-
worker.await.unwrap().unwrap();
60
-
61
-
log::debug!("joined.");
62
-
63
-
// log::info!("now is the time to check mem...");
64
-
// tokio::time::sleep(std::time::Duration::from_secs(22)).await;
65
-
log::info!("bye! {n}");
66
-
67
-
std::fs::remove_file(tmpfile).unwrap(); // need to also remove -shm -wal
68
41
69
42
Ok(())
70
43
}
+14
-6
examples/read-file/main.rs
+14
-6
examples/read-file/main.rs
···
1
+
/*!
2
+
Read a CAR file with in-memory processing
3
+
*/
4
+
1
5
extern crate repo_stream;
2
6
use clap::Parser;
7
+
use repo_stream::{Driver, DriverBuilder};
3
8
use std::path::PathBuf;
4
9
5
10
type Result<T> = std::result::Result<T, Box<dyn std::error::Error>>;
···
18
23
let reader = tokio::fs::File::open(file).await?;
19
24
let reader = tokio::io::BufReader::new(reader);
20
25
21
-
let (commit, mut driver) =
22
-
match repo_stream::drive::load_car(reader, |block| block.len(), 1024 * 1024).await? {
23
-
repo_stream::drive::Vehicle::Lil(commit, mem_driver) => (commit, mem_driver),
24
-
repo_stream::drive::Vehicle::Big(_) => panic!("can't handle big cars yet"),
25
-
};
26
+
let (commit, mut driver) = match DriverBuilder::new()
27
+
.with_block_processor(|block| block.len())
28
+
.load_car(reader)
29
+
.await?
30
+
{
31
+
Driver::Memory(commit, mem_driver) => (commit, mem_driver),
32
+
Driver::Disk(_) => panic!("this example doesn't handle big CARs"),
33
+
};
26
34
27
35
log::info!("got commit: {commit:?}");
28
36
···
31
39
n += pairs.len();
32
40
// log::info!("got {rkey:?}");
33
41
}
34
-
log::info!("bye! {n}");
42
+
log::info!("bye! total records={n}");
35
43
36
44
Ok(())
37
45
}
+70
-2
readme.md
+70
-2
readme.md
···
1
1
# repo-stream
2
2
3
-
Fast and (aspirationally) robust atproto CAR file processing in rust
3
+
A robust CAR file -> MST walker for atproto
4
+
5
+
[![Crates.io][crates-badge]](https://crates.io/crates/repo-stream)
6
+
[![Documentation][docs-badge]](https://docs.rs/repo-stream)
7
+
[![Sponsor][sponsor-badge]](https://github.com/sponsors/uniphil)
8
+
9
+
[crates-badge]: https://img.shields.io/crates/v/repo-stream.svg
10
+
[docs-badge]: https://docs.rs/repo-stream/badge.svg
11
+
[sponsor-badge]: https://img.shields.io/badge/at-microcosm-b820f9?labelColor=b820f9&logo=githubsponsors&logoColor=fff
12
+
13
+
```rust
14
+
use repo_stream::{Driver, DriverBuilder, DriveError, DiskBuilder};
15
+
16
+
#[tokio::main]
17
+
async fn main() -> Result<(), DriveError> {
18
+
// repo-stream takes any AsyncRead as input, like a tokio::fs::File
19
+
let reader = tokio::fs::File::open("repo.car".into()).await?;
20
+
let reader = tokio::io::BufReader::new(reader);
21
+
22
+
// example repo workload is simply counting the total record bytes
23
+
let mut total_size = 0;
24
+
25
+
match DriverBuilder::new()
26
+
.with_mem_limit_mb(10)
27
+
.with_block_processor(|rec| rec.len()) // block processing: just extract the raw record size
28
+
.load_car(reader)
29
+
.await?
30
+
{
31
+
32
+
// if all blocks fit within memory
33
+
Driver::Memory(_commit, mut driver) => {
34
+
while let Some(chunk) = driver.next_chunk(256).await? {
35
+
for (_rkey, size) in chunk {
36
+
total_size += size;
37
+
}
38
+
}
39
+
},
40
+
41
+
// if the CAR was too big for in-memory processing
42
+
Driver::Disk(paused) => {
43
+
// set up a disk store we can spill to
44
+
let store = DiskBuilder::new().open("some/path.db".into()).await?;
45
+
// do the spilling, get back a (similar) driver
46
+
let (_commit, mut driver) = paused.finish_loading(store).await?;
47
+
48
+
while let Some(chunk) = driver.next_chunk(256).await? {
49
+
for (_rkey, size) in chunk {
50
+
total_size += size;
51
+
}
52
+
}
53
+
54
+
// clean up the disk store (drop tables etc)
55
+
driver.reset_store().await?;
56
+
}
57
+
};
58
+
println!("sum of size of all records: {total_size}");
59
+
Ok(())
60
+
}
61
+
```
62
+
63
+
more recent todo
64
+
65
+
- [ ] get an *emtpy* car for the test suite
66
+
- [x] implement a max size on disk limit
67
+
68
+
69
+
-----
70
+
71
+
older stuff (to clean up):
4
72
5
73
6
74
current car processing times (records processed into their length usize, phil's dev machine):
···
27
95
-> yeah the commit is returned from init
28
96
- [ ] spec compliance todos
29
97
- [x] assert that keys are ordered and fail if not
30
-
- [ ] verify node mst depth from key (possibly pending [interop test fixes](https://github.com/bluesky-social/atproto-interop-tests/issues/5))
98
+
- [x] verify node mst depth from key (possibly pending [interop test fixes](https://github.com/bluesky-social/atproto-interop-tests/issues/5))
31
99
- [ ] performance todos
32
100
- [x] consume the serialized nodes into a mutable efficient format
33
101
- [ ] maybe customize the deserialize impl to do that directly?
-105
src/disk.rs
-105
src/disk.rs
···
1
-
use rusqlite::OptionalExtension;
2
-
use std::path::PathBuf;
3
-
4
-
pub struct SqliteStore {
5
-
path: PathBuf,
6
-
limit_mb: usize,
7
-
}
8
-
9
-
impl SqliteStore {
10
-
pub fn new(path: PathBuf, limit_mb: usize) -> Self {
11
-
Self { path, limit_mb }
12
-
}
13
-
}
14
-
15
-
impl SqliteStore {
16
-
pub async fn get_access(&mut self) -> Result<SqliteAccess, rusqlite::Error> {
17
-
let path = self.path.clone();
18
-
let limit_mb = self.limit_mb;
19
-
let conn = tokio::task::spawn_blocking(move || {
20
-
let conn = rusqlite::Connection::open(path)?;
21
-
22
-
let sq_mb = -(2_i64.pow(10)); // negative is kibibytes for sqlite cache_size
23
-
24
-
// conn.pragma_update(None, "journal_mode", "OFF")?;
25
-
// conn.pragma_update(None, "journal_mode", "MEMORY")?;
26
-
conn.pragma_update(None, "journal_mode", "WAL")?;
27
-
// conn.pragma_update(None, "wal_autocheckpoint", "0")?; // this lets things get a bit big on disk
28
-
conn.pragma_update(None, "synchronous", "OFF")?;
29
-
conn.pragma_update(None, "cache_size", (limit_mb as i64 * sq_mb).to_string())?;
30
-
conn.execute(
31
-
"CREATE TABLE blocks (
32
-
key BLOB PRIMARY KEY NOT NULL,
33
-
val BLOB NOT NULL
34
-
) WITHOUT ROWID",
35
-
(),
36
-
)?;
37
-
38
-
Ok::<_, rusqlite::Error>(conn)
39
-
})
40
-
.await
41
-
.expect("join error")?;
42
-
43
-
Ok(SqliteAccess { conn })
44
-
}
45
-
}
46
-
47
-
pub struct SqliteAccess {
48
-
conn: rusqlite::Connection,
49
-
}
50
-
51
-
impl SqliteAccess {
52
-
pub fn get_writer(&'_ mut self) -> Result<SqliteWriter<'_>, rusqlite::Error> {
53
-
let tx = self.conn.transaction()?;
54
-
// let insert_stmt = tx.prepare("INSERT INTO blocks (key, val) VALUES (?1, ?2)")?;
55
-
Ok(SqliteWriter { tx: Some(tx) })
56
-
}
57
-
pub fn get_reader(&'_ self) -> Result<SqliteReader<'_>, rusqlite::Error> {
58
-
let select_stmt = self.conn.prepare("SELECT val FROM blocks WHERE key = ?1")?;
59
-
Ok(SqliteReader { select_stmt })
60
-
}
61
-
}
62
-
63
-
pub struct SqliteWriter<'conn> {
64
-
tx: Option<rusqlite::Transaction<'conn>>,
65
-
}
66
-
67
-
/// oops careful in async
68
-
impl Drop for SqliteWriter<'_> {
69
-
fn drop(&mut self) {
70
-
let tx = self.tx.take();
71
-
tx.unwrap().commit().unwrap();
72
-
}
73
-
}
74
-
75
-
impl SqliteWriter<'_> {
76
-
pub fn put(&mut self, key: Vec<u8>, val: Vec<u8>) -> rusqlite::Result<()> {
77
-
let tx = self.tx.as_ref().unwrap();
78
-
let mut insert_stmt = tx.prepare_cached("INSERT INTO blocks (key, val) VALUES (?1, ?2)")?;
79
-
insert_stmt.execute((key, val))?;
80
-
Ok(())
81
-
}
82
-
pub fn put_many(
83
-
&mut self,
84
-
kv: impl Iterator<Item = (Vec<u8>, Vec<u8>)>,
85
-
) -> rusqlite::Result<()> {
86
-
let tx = self.tx.as_ref().unwrap();
87
-
let mut insert_stmt = tx.prepare_cached("INSERT INTO blocks (key, val) VALUES (?1, ?2)")?;
88
-
for (k, v) in kv {
89
-
insert_stmt.execute((k, v))?;
90
-
}
91
-
Ok(())
92
-
}
93
-
}
94
-
95
-
pub struct SqliteReader<'conn> {
96
-
select_stmt: rusqlite::Statement<'conn>,
97
-
}
98
-
99
-
impl SqliteReader<'_> {
100
-
pub fn get(&mut self, key: Vec<u8>) -> rusqlite::Result<Option<Vec<u8>>> {
101
-
self.select_stmt
102
-
.query_one((&key,), |row| row.get(0))
103
-
.optional()
104
-
}
105
-
}
-423
src/drive.rs
-423
src/drive.rs
···
1
-
//! Consume an MST block stream, producing an ordered stream of records
2
-
3
-
use crate::disk::{SqliteAccess, SqliteStore};
4
-
use ipld_core::cid::Cid;
5
-
use iroh_car::CarReader;
6
-
use serde::de::DeserializeOwned;
7
-
use serde::{Deserialize, Serialize};
8
-
use std::collections::HashMap;
9
-
use std::convert::Infallible;
10
-
use tokio::io::AsyncRead;
11
-
12
-
use crate::mst::{Commit, Node};
13
-
use crate::walk::{DiskTrip, Step, Trip, Walker};
14
-
15
-
/// Errors that can happen while consuming and emitting blocks and records
16
-
#[derive(Debug, thiserror::Error)]
17
-
pub enum DriveError {
18
-
#[error("Error from iroh_car: {0}")]
19
-
CarReader(#[from] iroh_car::Error),
20
-
#[error("Failed to decode commit block: {0}")]
21
-
BadBlock(#[from] serde_ipld_dagcbor::DecodeError<Infallible>),
22
-
#[error("The Commit block reference by the root was not found")]
23
-
MissingCommit,
24
-
#[error("The MST block {0} could not be found")]
25
-
MissingBlock(Cid),
26
-
#[error("Failed to walk the mst tree: {0}")]
27
-
Tripped(#[from] Trip),
28
-
#[error("CAR file had no roots")]
29
-
MissingRoot,
30
-
}
31
-
32
-
#[derive(Debug, thiserror::Error)]
33
-
pub enum DiskDriveError {
34
-
#[error("Error from iroh_car: {0}")]
35
-
CarReader(#[from] iroh_car::Error),
36
-
#[error("Failed to decode commit block: {0}")]
37
-
BadBlock(#[from] serde_ipld_dagcbor::DecodeError<Infallible>),
38
-
#[error("Storage error")]
39
-
StorageError(#[from] rusqlite::Error),
40
-
#[error("The Commit block reference by the root was not found")]
41
-
MissingCommit,
42
-
#[error("The MST block {0} could not be found")]
43
-
MissingBlock(Cid),
44
-
#[error("Encode error: {0}")]
45
-
BincodeEncodeError(#[from] bincode::error::EncodeError),
46
-
#[error("Decode error: {0}")]
47
-
BincodeDecodeError(#[from] bincode::error::DecodeError),
48
-
#[error("disk tripped: {0}")]
49
-
DiskTripped(#[from] DiskTrip),
50
-
}
51
-
52
-
pub trait Processable: Clone + Serialize + DeserializeOwned {
53
-
/// the additional size taken up (not including its mem::size_of)
54
-
fn get_size(&self) -> usize;
55
-
}
56
-
57
-
#[derive(Debug, Clone, Serialize, Deserialize)]
58
-
pub enum MaybeProcessedBlock<T> {
59
-
/// A block that's *probably* a Node (but we can't know yet)
60
-
///
61
-
/// It *can be* a record that suspiciously looks a lot like a node, so we
62
-
/// cannot eagerly turn it into a Node. We only know for sure what it is
63
-
/// when we actually walk down the MST
64
-
Raw(Vec<u8>),
65
-
/// A processed record from a block that was definitely not a Node
66
-
///
67
-
/// Processing has to be fallible because the CAR can have totally-unused
68
-
/// blocks, which can just be garbage. since we're eagerly trying to process
69
-
/// record blocks without knowing for sure that they *are* records, we
70
-
/// discard any definitely-not-nodes that fail processing and keep their
71
-
/// error in the buffer for them. if we later try to retreive them as a
72
-
/// record, then we can surface the error.
73
-
///
74
-
/// If we _never_ needed this block, then we may have wasted a bit of effort
75
-
/// trying to process it. Oh well.
76
-
///
77
-
/// There's an alternative here, which would be to kick unprocessable blocks
78
-
/// back to Raw, or maybe even a new RawUnprocessable variant. Then we could
79
-
/// surface the typed error later if needed by trying to reprocess.
80
-
Processed(T),
81
-
}
82
-
83
-
impl<T: Processable> Processable for MaybeProcessedBlock<T> {
84
-
/// TODO this is probably a little broken
85
-
fn get_size(&self) -> usize {
86
-
use std::{cmp::max, mem::size_of};
87
-
88
-
// enum is always as big as its biggest member?
89
-
let base_size = max(size_of::<Vec<u8>>(), size_of::<T>());
90
-
91
-
let extra = match self {
92
-
Self::Raw(bytes) => bytes.len(),
93
-
Self::Processed(t) => t.get_size(),
94
-
};
95
-
96
-
base_size + extra
97
-
}
98
-
}
99
-
100
-
pub enum Vehicle<R: AsyncRead + Unpin, T: Processable> {
101
-
Lil(Commit, MemDriver<T>),
102
-
Big(BigCar<R, T>),
103
-
}
104
-
105
-
pub async fn load_car<R: AsyncRead + Unpin, T: Processable>(
106
-
reader: R,
107
-
process: fn(Vec<u8>) -> T,
108
-
max_size: usize,
109
-
) -> Result<Vehicle<R, T>, DriveError> {
110
-
let mut mem_blocks = HashMap::new();
111
-
112
-
let mut car = CarReader::new(reader).await?;
113
-
114
-
let root = *car
115
-
.header()
116
-
.roots()
117
-
.first()
118
-
.ok_or(DriveError::MissingRoot)?;
119
-
log::debug!("root: {root:?}");
120
-
121
-
let mut commit = None;
122
-
123
-
// try to load all the blocks into memory
124
-
let mut mem_size = 0;
125
-
while let Some((cid, data)) = car.next_block().await? {
126
-
// the root commit is a Special Third Kind of block that we need to make
127
-
// sure not to optimistically send to the processing function
128
-
if cid == root {
129
-
let c: Commit = serde_ipld_dagcbor::from_slice(&data)?;
130
-
commit = Some(c);
131
-
continue;
132
-
}
133
-
134
-
// remaining possible types: node, record, other. optimistically process
135
-
// TODO: get the actual in-memory size to compute disk spill
136
-
let maybe_processed = if Node::could_be(&data) {
137
-
MaybeProcessedBlock::Raw(data)
138
-
} else {
139
-
MaybeProcessedBlock::Processed(process(data))
140
-
};
141
-
142
-
// stash (maybe processed) blocks in memory as long as we have room
143
-
mem_size += std::mem::size_of::<Cid>() + maybe_processed.get_size();
144
-
mem_blocks.insert(cid, maybe_processed);
145
-
if mem_size >= max_size {
146
-
return Ok(Vehicle::Big(BigCar {
147
-
car,
148
-
root,
149
-
process,
150
-
max_size,
151
-
mem_blocks,
152
-
commit,
153
-
}));
154
-
}
155
-
}
156
-
157
-
// all blocks loaded and we fit in memory! hopefully we found the commit...
158
-
let commit = commit.ok_or(DriveError::MissingCommit)?;
159
-
160
-
let walker = Walker::new(commit.data);
161
-
162
-
Ok(Vehicle::Lil(
163
-
commit,
164
-
MemDriver {
165
-
blocks: mem_blocks,
166
-
walker,
167
-
process,
168
-
},
169
-
))
170
-
}
171
-
172
-
/// a paritally memory-loaded car file that needs disk spillover to continue
173
-
pub struct BigCar<R: AsyncRead + Unpin, T: Processable> {
174
-
car: CarReader<R>,
175
-
root: Cid,
176
-
process: fn(Vec<u8>) -> T,
177
-
max_size: usize,
178
-
mem_blocks: HashMap<Cid, MaybeProcessedBlock<T>>,
179
-
pub commit: Option<Commit>,
180
-
}
181
-
182
-
fn encode(v: impl Serialize) -> Result<Vec<u8>, bincode::error::EncodeError> {
183
-
bincode::serde::encode_to_vec(v, bincode::config::standard())
184
-
}
185
-
186
-
pub fn decode<T: Processable>(bytes: &[u8]) -> Result<T, bincode::error::DecodeError> {
187
-
let (t, n) = bincode::serde::decode_from_slice(bytes, bincode::config::standard())?;
188
-
assert_eq!(n, bytes.len(), "expected to decode all bytes"); // TODO
189
-
Ok(t)
190
-
}
191
-
192
-
impl<R: AsyncRead + Unpin, T: Processable + Send + 'static> BigCar<R, T> {
193
-
pub async fn finish_loading(
194
-
mut self,
195
-
mut store: SqliteStore,
196
-
) -> Result<(Commit, BigCarReady<T>), DiskDriveError> {
197
-
// set up access for real
198
-
let mut access = store.get_access().await?;
199
-
200
-
// move access in and back out so we can manage lifetimes
201
-
// dump mem blocks into the store
202
-
access = tokio::task::spawn(async move {
203
-
let mut writer = access.get_writer()?;
204
-
205
-
let kvs = self
206
-
.mem_blocks
207
-
.into_iter()
208
-
.map(|(k, v)| (k.to_bytes(), encode(v).unwrap()));
209
-
210
-
writer.put_many(kvs)?;
211
-
212
-
drop(writer); // cannot outlive access
213
-
Ok::<_, DiskDriveError>(access)
214
-
})
215
-
.await
216
-
.unwrap()?;
217
-
218
-
let (tx, mut rx) = tokio::sync::mpsc::channel::<Vec<(Cid, MaybeProcessedBlock<T>)>>(2);
219
-
220
-
let access_worker = tokio::task::spawn_blocking(move || {
221
-
let mut writer = access.get_writer()?;
222
-
223
-
while let Some(chunk) = rx.blocking_recv() {
224
-
let kvs = chunk
225
-
.into_iter()
226
-
.map(|(k, v)| (k.to_bytes(), encode(v).unwrap()));
227
-
writer.put_many(kvs)?;
228
-
}
229
-
230
-
drop(writer); // cannot outlive access
231
-
Ok::<_, DiskDriveError>(access)
232
-
}); // await later
233
-
234
-
// dump the rest to disk (in chunks)
235
-
log::debug!("dumping the rest of the stream...");
236
-
loop {
237
-
let mut mem_size = 0;
238
-
let mut chunk = vec![];
239
-
loop {
240
-
let Some((cid, data)) = self.car.next_block().await? else {
241
-
break;
242
-
};
243
-
// we still gotta keep checking for the root since we might not have it
244
-
if cid == self.root {
245
-
let c: Commit = serde_ipld_dagcbor::from_slice(&data)?;
246
-
self.commit = Some(c);
247
-
continue;
248
-
}
249
-
// remaining possible types: node, record, other. optimistically process
250
-
// TODO: get the actual in-memory size to compute disk spill
251
-
let maybe_processed = if Node::could_be(&data) {
252
-
MaybeProcessedBlock::Raw(data)
253
-
} else {
254
-
MaybeProcessedBlock::Processed((self.process)(data))
255
-
};
256
-
mem_size += std::mem::size_of::<Cid>() + maybe_processed.get_size();
257
-
chunk.push((cid, maybe_processed));
258
-
if mem_size >= self.max_size {
259
-
// soooooo if we're setting the db cache to max_size and then letting
260
-
// multiple chunks in the queue that are >= max_size, then at any time
261
-
// we might be using some multiple of max_size?
262
-
break;
263
-
}
264
-
}
265
-
if chunk.is_empty() {
266
-
break;
267
-
}
268
-
tx.send(chunk).await.unwrap();
269
-
}
270
-
drop(tx);
271
-
log::debug!("done. waiting for worker to finish...");
272
-
273
-
access = access_worker.await.unwrap()?;
274
-
275
-
log::debug!("worker finished.");
276
-
277
-
let commit = self.commit.ok_or(DiskDriveError::MissingCommit)?;
278
-
279
-
let walker = Walker::new(commit.data);
280
-
281
-
Ok((
282
-
commit,
283
-
BigCarReady {
284
-
process: self.process,
285
-
access,
286
-
walker,
287
-
},
288
-
))
289
-
}
290
-
}
291
-
292
-
pub struct BigCarReady<T: Clone> {
293
-
process: fn(Vec<u8>) -> T,
294
-
access: SqliteAccess,
295
-
walker: Walker,
296
-
}
297
-
298
-
impl<T: Processable + Send + 'static> BigCarReady<T> {
299
-
pub async fn next_chunk(
300
-
mut self,
301
-
n: usize,
302
-
) -> Result<(Self, Option<Vec<(String, T)>>), DiskDriveError> {
303
-
let mut out = Vec::with_capacity(n);
304
-
(self, out) = tokio::task::spawn_blocking(move || {
305
-
let access = self.access;
306
-
let mut reader = access.get_reader()?;
307
-
308
-
for _ in 0..n {
309
-
// walk as far as we can until we run out of blocks or find a record
310
-
match self.walker.disk_step(&mut reader, self.process)? {
311
-
Step::Missing(cid) => return Err(DiskDriveError::MissingBlock(cid)),
312
-
Step::Finish => break,
313
-
Step::Step { rkey, data } => {
314
-
out.push((rkey, data));
315
-
continue;
316
-
}
317
-
};
318
-
}
319
-
320
-
drop(reader); // cannot outlive access
321
-
self.access = access;
322
-
Ok::<_, DiskDriveError>((self, out))
323
-
})
324
-
.await
325
-
.unwrap()?; // TODO
326
-
327
-
if out.is_empty() {
328
-
Ok((self, None))
329
-
} else {
330
-
Ok((self, Some(out)))
331
-
}
332
-
}
333
-
334
-
pub async fn rx(
335
-
mut self,
336
-
n: usize,
337
-
) -> Result<
338
-
(
339
-
tokio::sync::mpsc::Receiver<Vec<(String, T)>>,
340
-
tokio::task::JoinHandle<Result<(), DiskDriveError>>,
341
-
),
342
-
DiskDriveError,
343
-
> {
344
-
let (tx, rx) = tokio::sync::mpsc::channel::<Vec<(String, T)>>(1);
345
-
346
-
// sketch: this worker is going to be allowed to execute without a join handle
347
-
// ...should we return the join handle here so the caller at least knows about it?
348
-
// yes probably for error handling?? (orrr put errors in the channel)
349
-
let worker = tokio::task::spawn_blocking(move || {
350
-
let mut reader = self.access.get_reader()?;
351
-
352
-
loop {
353
-
let mut out = Vec::with_capacity(n);
354
-
355
-
for _ in 0..n {
356
-
// walk as far as we can until we run out of blocks or find a record
357
-
match self.walker.disk_step(&mut reader, self.process)? {
358
-
Step::Missing(cid) => return Err(DiskDriveError::MissingBlock(cid)),
359
-
Step::Finish => break,
360
-
Step::Step { rkey, data } => {
361
-
out.push((rkey, data));
362
-
continue;
363
-
}
364
-
};
365
-
}
366
-
367
-
if out.is_empty() {
368
-
break;
369
-
}
370
-
tx.blocking_send(out).unwrap();
371
-
}
372
-
373
-
drop(reader); // cannot outlive access
374
-
Ok(())
375
-
}); // await later
376
-
377
-
Ok((rx, worker))
378
-
}
379
-
}
380
-
381
-
/// The core driver between the block stream and MST walker
382
-
///
383
-
/// In the future, PDSs will export CARs in a stream-friendly order that will
384
-
/// enable processing them with tiny memory overhead. But that future is not
385
-
/// here yet.
386
-
///
387
-
/// CARs are almost always in a stream-unfriendly order, so I'm reverting the
388
-
/// optimistic stream features: we load all block first, then walk the MST.
389
-
///
390
-
/// This makes things much simpler: we only need to worry about spilling to disk
391
-
/// in one place, and we always have a reasonable expecatation about how much
392
-
/// work the init function will do. We can drop the CAR reader before walking,
393
-
/// so the sync/async boundaries become a little easier to work around.
394
-
#[derive(Debug)]
395
-
pub struct MemDriver<T: Processable> {
396
-
blocks: HashMap<Cid, MaybeProcessedBlock<T>>,
397
-
walker: Walker,
398
-
process: fn(Vec<u8>) -> T,
399
-
}
400
-
401
-
impl<T: Processable> MemDriver<T> {
402
-
/// Manually step through the record outputs
403
-
pub async fn next_chunk(&mut self, n: usize) -> Result<Option<Vec<(String, T)>>, DriveError> {
404
-
let mut out = Vec::with_capacity(n);
405
-
for _ in 0..n {
406
-
// walk as far as we can until we run out of blocks or find a record
407
-
match self.walker.step(&mut self.blocks, self.process)? {
408
-
Step::Missing(cid) => return Err(DriveError::MissingBlock(cid)),
409
-
Step::Finish => break,
410
-
Step::Step { rkey, data } => {
411
-
out.push((rkey, data));
412
-
continue;
413
-
}
414
-
};
415
-
}
416
-
417
-
if out.is_empty() {
418
-
Ok(None)
419
-
} else {
420
-
Ok(Some(out))
421
-
}
422
-
}
423
-
}
+74
-7
src/lib.rs
+74
-7
src/lib.rs
···
1
-
//! Fast and robust atproto CAR file processing in rust
2
-
//!
3
-
//! For now see the [examples](https://tangled.org/@microcosm.blue/repo-stream/tree/main/examples)
1
+
/*!
2
+
A robust CAR file -> MST walker for atproto
3
+
4
+
Small CARs have their blocks buffered in memory. If a configurable memory limit
5
+
is reached while reading blocks, CAR reading is suspended, and can be continued
6
+
by providing disk storage to buffer the CAR blocks instead.
7
+
8
+
A `process` function can be provided for tasks where records are transformed
9
+
into a smaller representation, to save memory (and disk) during block reading.
10
+
11
+
Once blocks are loaded, the MST is walked and emitted as chunks of pairs of
12
+
`(rkey, processed_block)` pairs, in order (depth first, left-to-right).
13
+
14
+
Some MST validations are applied
15
+
- Keys must appear in order
16
+
- Keys must be at the correct MST tree depth
17
+
18
+
`iroh_car` additionally applies a block size limit of `2MiB`.
19
+
20
+
```
21
+
use repo_stream::{Driver, DriverBuilder, DiskBuilder};
22
+
23
+
# #[tokio::main]
24
+
# async fn main() -> Result<(), Box<dyn std::error::Error>> {
25
+
# let reader = include_bytes!("../car-samples/tiny.car").as_slice();
26
+
let mut total_size = 0;
27
+
28
+
match DriverBuilder::new()
29
+
.with_mem_limit_mb(10)
30
+
.with_block_processor(|rec| rec.len()) // block processing: just extract the raw record size
31
+
.load_car(reader)
32
+
.await?
33
+
{
34
+
35
+
// if all blocks fit within memory
36
+
Driver::Memory(_commit, mut driver) => {
37
+
while let Some(chunk) = driver.next_chunk(256).await? {
38
+
for (_rkey, size) in chunk {
39
+
total_size += size;
40
+
}
41
+
}
42
+
},
4
43
5
-
pub mod disk;
6
-
pub mod drive;
7
-
pub mod mst;
8
-
pub mod walk;
44
+
// if the CAR was too big for in-memory processing
45
+
Driver::Disk(paused) => {
46
+
// set up a disk store we can spill to
47
+
let store = DiskBuilder::new().open("some/path.db".into()).await?;
48
+
// do the spilling, get back a (similar) driver
49
+
let (_commit, mut driver) = paused.finish_loading(store).await?;
50
+
51
+
while let Some(chunk) = driver.next_chunk(256).await? {
52
+
for (_rkey, size) in chunk {
53
+
total_size += size;
54
+
}
55
+
}
56
+
57
+
// clean up the disk store (drop tables etc)
58
+
driver.reset_store().await?;
59
+
}
60
+
};
61
+
println!("sum of size of all records: {total_size}");
62
+
# Ok(())
63
+
# }
64
+
```
65
+
66
+
Disk spilling suspends and returns a `Driver::Disk(paused)` instead of going
67
+
ahead and eagerly using disk I/O. This means you have to write a bit more code
68
+
to handle both cases, but it allows you to have finer control over resource
69
+
usage. For example, you can drive a number of parallel memory CAR workers, and
70
+
separately have a different number of disk workers picking up suspended disk
71
+
tasks from a queue.
72
+
73
+
Find more [examples in the repo](https://tangled.org/@microcosm.blue/repo-stream/tree/main/examples).
74
+
75
+
*/
-114
src/mst.rs
-114
src/mst.rs
···
1
-
//! Low-level types for parsing raw atproto MST CARs
2
-
//!
3
-
//! The primary aim is to work through the **tree** structure. Non-node blocks
4
-
//! are left as raw bytes, for upper levels to parse into DAG-CBOR or whatever.
5
-
6
-
use ipld_core::cid::Cid;
7
-
use serde::Deserialize;
8
-
9
-
/// The top-level data object in a repository's tree is a signed commit.
10
-
#[derive(Debug, Deserialize)]
11
-
// #[serde(deny_unknown_fields)]
12
-
pub struct Commit {
13
-
/// the account DID associated with the repo, in strictly normalized form
14
-
/// (eg, lowercase as appropriate)
15
-
pub did: String,
16
-
/// fixed value of 3 for this repo format version
17
-
pub version: u64,
18
-
/// pointer to the top of the repo contents tree structure (MST)
19
-
pub data: Cid,
20
-
/// revision of the repo, used as a logical clock.
21
-
///
22
-
/// TID format. Must increase monotonically. Recommend using current
23
-
/// timestamp as TID; rev values in the "future" (beyond a fudge factor)
24
-
/// should be ignored and not processed
25
-
pub rev: String,
26
-
/// pointer (by hash) to a previous commit object for this repository.
27
-
///
28
-
/// Could be used to create a chain of history, but largely unused (included
29
-
/// for v2 backwards compatibility). In version 3 repos, this field must
30
-
/// exist in the CBOR object, but is virtually always null. NOTE: previously
31
-
/// specified as nullable and optional, but this caused interoperability
32
-
/// issues.
33
-
pub prev: Option<Cid>,
34
-
/// cryptographic signature of this commit, as raw bytes
35
-
#[serde(with = "serde_bytes")]
36
-
pub sig: Vec<u8>,
37
-
}
38
-
39
-
/// MST node data schema
40
-
#[derive(Debug, Deserialize, PartialEq)]
41
-
#[serde(deny_unknown_fields)]
42
-
pub struct Node {
43
-
/// link to sub-tree Node on a lower level and with all keys sorting before
44
-
/// keys at this node
45
-
#[serde(rename = "l")]
46
-
pub left: Option<Cid>,
47
-
/// ordered list of TreeEntry objects
48
-
///
49
-
/// atproto MSTs have a fanout of 4, so there can be max 4 entries.
50
-
#[serde(rename = "e")]
51
-
pub entries: Vec<Entry>, // maybe we can do [Option<Entry>; 4]?
52
-
}
53
-
54
-
impl Node {
55
-
/// test if a block could possibly be a node
56
-
///
57
-
/// we can't eagerly decode records except where we're *sure* they cannot be
58
-
/// an mst node (and even then we can only attempt) because you can't know
59
-
/// with certainty what a block is supposed to be without actually walking
60
-
/// the tree.
61
-
///
62
-
/// so if a block *could be* a node, any record converter must postpone
63
-
/// processing. if it turns out it happens to be a very node-looking record,
64
-
/// well, sorry, it just has to only be processed later when that's known.
65
-
pub fn could_be(bytes: impl AsRef<[u8]>) -> bool {
66
-
const NODE_FINGERPRINT: [u8; 3] = [
67
-
0xA2, // map length 2 (for "l" and "e" keys)
68
-
0x61, // text length 1
69
-
b'e', // "e" before "l" because map keys have to be lex-sorted
70
-
// 0x8?: "e" has array (0x100 upper 3 bits) of some length
71
-
];
72
-
let bytes = bytes.as_ref();
73
-
bytes.starts_with(&NODE_FINGERPRINT)
74
-
&& bytes
75
-
.get(3)
76
-
.map(|b| b & 0b1110_0000 == 0x80)
77
-
.unwrap_or(false)
78
-
}
79
-
80
-
/// Check if a node has any entries
81
-
///
82
-
/// An empty repository with no records is represented as a single MST node
83
-
/// with an empty array of entries. This is the only situation in which a
84
-
/// tree may contain an empty leaf node which does not either contain keys
85
-
/// ("entries") or point to a sub-tree containing entries.
86
-
///
87
-
/// TODO: to me this is slightly unclear with respect to `l` (ask someone).
88
-
/// ...is that what "The top of the tree must not be a an empty node which
89
-
/// only points to a sub-tree." is referring to?
90
-
pub fn is_empty(&self) -> bool {
91
-
self.left.is_none() && self.entries.is_empty()
92
-
}
93
-
}
94
-
95
-
/// TreeEntry object
96
-
#[derive(Debug, Deserialize, PartialEq)]
97
-
#[serde(deny_unknown_fields)]
98
-
pub struct Entry {
99
-
/// count of bytes shared with previous TreeEntry in this Node (if any)
100
-
#[serde(rename = "p")]
101
-
pub prefix_len: usize,
102
-
/// remainder of key for this TreeEntry, after "prefixlen" have been removed
103
-
#[serde(rename = "k", with = "serde_bytes")]
104
-
pub keysuffix: Vec<u8>, // can we String this here?
105
-
/// link to the record data (CBOR) for this entry
106
-
#[serde(rename = "v")]
107
-
pub value: Cid,
108
-
/// link to a sub-tree Node at a lower level
109
-
///
110
-
/// the lower level must have keys sorting after this TreeEntry's key (to
111
-
/// the "right"), but before the next TreeEntry's key in this Node (if any)
112
-
#[serde(rename = "t")]
113
-
pub tree: Option<Cid>,
114
-
}
-471
src/walk.rs
-471
src/walk.rs
···
1
-
//! Depth-first MST traversal
2
-
3
-
use crate::disk::SqliteReader;
4
-
use crate::drive::{MaybeProcessedBlock, Processable};
5
-
use crate::mst::Node;
6
-
use ipld_core::cid::Cid;
7
-
use std::collections::HashMap;
8
-
use std::convert::Infallible;
9
-
10
-
/// Errors that can happen while walking
11
-
#[derive(Debug, thiserror::Error)]
12
-
pub enum Trip {
13
-
#[error("empty mst nodes are not allowed")]
14
-
NodeEmpty,
15
-
#[error("Failed to fingerprint commit block")]
16
-
BadCommitFingerprint,
17
-
#[error("Failed to decode commit block: {0}")]
18
-
BadCommit(#[from] serde_ipld_dagcbor::DecodeError<Infallible>),
19
-
#[error("Action node error: {0}")]
20
-
RkeyError(#[from] RkeyError),
21
-
#[error("Encountered an rkey out of order while walking the MST")]
22
-
RkeyOutOfOrder,
23
-
}
24
-
25
-
/// Errors that can happen while walking
26
-
#[derive(Debug, thiserror::Error)]
27
-
pub enum DiskTrip {
28
-
#[error("tripped: {0}")]
29
-
Trip(#[from] Trip),
30
-
#[error("storage error: {0}")]
31
-
StorageError(#[from] rusqlite::Error),
32
-
#[error("Decode error: {0}")]
33
-
BincodeDecodeError(#[from] bincode::error::DecodeError),
34
-
}
35
-
36
-
/// Errors from invalid Rkeys
37
-
#[derive(Debug, thiserror::Error)]
38
-
pub enum RkeyError {
39
-
#[error("Failed to compute an rkey due to invalid prefix_len")]
40
-
EntryPrefixOutOfbounds,
41
-
#[error("RKey was not utf-8")]
42
-
EntryRkeyNotUtf8(#[from] std::string::FromUtf8Error),
43
-
}
44
-
45
-
/// Walker outputs
46
-
#[derive(Debug)]
47
-
pub enum Step<T> {
48
-
/// We needed this CID but it's not in the block store
49
-
Missing(Cid),
50
-
/// Reached the end of the MST! yay!
51
-
Finish,
52
-
/// A record was found!
53
-
Step { rkey: String, data: T },
54
-
}
55
-
56
-
#[derive(Debug, Clone, PartialEq)]
57
-
enum Need {
58
-
Node(Cid),
59
-
Record { rkey: String, cid: Cid },
60
-
}
61
-
62
-
fn push_from_node(stack: &mut Vec<Need>, node: &Node) -> Result<(), RkeyError> {
63
-
let mut entries = Vec::with_capacity(node.entries.len());
64
-
65
-
let mut prefix = vec![];
66
-
for entry in &node.entries {
67
-
let mut rkey = vec![];
68
-
let pre_checked = prefix
69
-
.get(..entry.prefix_len)
70
-
.ok_or(RkeyError::EntryPrefixOutOfbounds)?;
71
-
rkey.extend_from_slice(pre_checked);
72
-
rkey.extend_from_slice(&entry.keysuffix);
73
-
prefix = rkey.clone();
74
-
75
-
entries.push(Need::Record {
76
-
rkey: String::from_utf8(rkey)?,
77
-
cid: entry.value,
78
-
});
79
-
if let Some(ref tree) = entry.tree {
80
-
entries.push(Need::Node(*tree));
81
-
}
82
-
}
83
-
84
-
entries.reverse();
85
-
stack.append(&mut entries);
86
-
87
-
if let Some(tree) = node.left {
88
-
stack.push(Need::Node(tree));
89
-
}
90
-
Ok(())
91
-
}
92
-
93
-
/// Traverser of an atproto MST
94
-
///
95
-
/// Walks the tree from left-to-right in depth-first order
96
-
#[derive(Debug)]
97
-
pub struct Walker {
98
-
stack: Vec<Need>,
99
-
prev: String,
100
-
}
101
-
102
-
impl Walker {
103
-
pub fn new(tree_root_cid: Cid) -> Self {
104
-
Self {
105
-
stack: vec![Need::Node(tree_root_cid)],
106
-
prev: "".to_string(),
107
-
}
108
-
}
109
-
110
-
/// Advance through nodes until we find a record or can't go further
111
-
pub fn step<T: Processable>(
112
-
&mut self,
113
-
blocks: &mut HashMap<Cid, MaybeProcessedBlock<T>>,
114
-
process: impl Fn(Vec<u8>) -> T,
115
-
) -> Result<Step<T>, Trip> {
116
-
loop {
117
-
let Some(mut need) = self.stack.last() else {
118
-
log::trace!("tried to walk but we're actually done.");
119
-
return Ok(Step::Finish);
120
-
};
121
-
122
-
match &mut need {
123
-
Need::Node(cid) => {
124
-
log::trace!("need node {cid:?}");
125
-
let Some(block) = blocks.remove(cid) else {
126
-
log::trace!("node not found, resting");
127
-
return Ok(Step::Missing(*cid));
128
-
};
129
-
130
-
let MaybeProcessedBlock::Raw(data) = block else {
131
-
return Err(Trip::BadCommitFingerprint);
132
-
};
133
-
let node =
134
-
serde_ipld_dagcbor::from_slice::<Node>(&data).map_err(Trip::BadCommit)?;
135
-
136
-
// found node, make sure we remember
137
-
self.stack.pop();
138
-
139
-
// queue up work on the found node next
140
-
push_from_node(&mut self.stack, &node)?;
141
-
}
142
-
Need::Record { rkey, cid } => {
143
-
log::trace!("need record {cid:?}");
144
-
let Some(data) = blocks.get_mut(cid) else {
145
-
log::trace!("record block not found, resting");
146
-
return Ok(Step::Missing(*cid));
147
-
};
148
-
let rkey = rkey.clone();
149
-
let data = match data {
150
-
MaybeProcessedBlock::Raw(data) => process(data.to_vec()),
151
-
MaybeProcessedBlock::Processed(t) => t.clone(),
152
-
};
153
-
154
-
// found node, make sure we remember
155
-
self.stack.pop();
156
-
157
-
log::trace!("emitting a block as a step. depth={}", self.stack.len());
158
-
159
-
// rkeys *must* be in order or else the tree is invalid (or
160
-
// we have a bug)
161
-
if rkey <= self.prev {
162
-
return Err(Trip::RkeyOutOfOrder);
163
-
}
164
-
self.prev = rkey.clone();
165
-
166
-
return Ok(Step::Step { rkey, data });
167
-
}
168
-
}
169
-
}
170
-
}
171
-
172
-
/// blocking!!!!!!
173
-
pub fn disk_step<T: Processable>(
174
-
&mut self,
175
-
reader: &mut SqliteReader,
176
-
process: impl Fn(Vec<u8>) -> T,
177
-
) -> Result<Step<T>, DiskTrip> {
178
-
loop {
179
-
let Some(mut need) = self.stack.last() else {
180
-
log::trace!("tried to walk but we're actually done.");
181
-
return Ok(Step::Finish);
182
-
};
183
-
184
-
match &mut need {
185
-
Need::Node(cid) => {
186
-
let cid_bytes = cid.to_bytes();
187
-
log::trace!("need node {cid:?}");
188
-
let Some(block_bytes) = reader.get(cid_bytes)? else {
189
-
log::trace!("node not found, resting");
190
-
return Ok(Step::Missing(*cid));
191
-
};
192
-
193
-
let block: MaybeProcessedBlock<T> = crate::drive::decode(&block_bytes)?;
194
-
195
-
let MaybeProcessedBlock::Raw(data) = block else {
196
-
return Err(Trip::BadCommitFingerprint.into());
197
-
};
198
-
let node =
199
-
serde_ipld_dagcbor::from_slice::<Node>(&data).map_err(Trip::BadCommit)?;
200
-
201
-
// found node, make sure we remember
202
-
self.stack.pop();
203
-
204
-
// queue up work on the found node next
205
-
push_from_node(&mut self.stack, &node).map_err(Trip::RkeyError)?;
206
-
}
207
-
Need::Record { rkey, cid } => {
208
-
log::trace!("need record {cid:?}");
209
-
let cid_bytes = cid.to_bytes();
210
-
let Some(data_bytes) = reader.get(cid_bytes)? else {
211
-
log::trace!("record block not found, resting");
212
-
return Ok(Step::Missing(*cid));
213
-
};
214
-
let data: MaybeProcessedBlock<T> = crate::drive::decode(&data_bytes)?;
215
-
let rkey = rkey.clone();
216
-
let data = match data {
217
-
MaybeProcessedBlock::Raw(data) => process(data),
218
-
MaybeProcessedBlock::Processed(t) => t.clone(),
219
-
};
220
-
221
-
// found node, make sure we remember
222
-
self.stack.pop();
223
-
224
-
log::trace!("emitting a block as a step. depth={}", self.stack.len());
225
-
226
-
// rkeys *must* be in order or else the tree is invalid (or
227
-
// we have a bug)
228
-
if rkey <= self.prev {
229
-
return Err(DiskTrip::Trip(Trip::RkeyOutOfOrder));
230
-
}
231
-
self.prev = rkey.clone();
232
-
233
-
return Ok(Step::Step { rkey, data });
234
-
}
235
-
}
236
-
}
237
-
}
238
-
}
239
-
240
-
#[cfg(test)]
241
-
mod test {
242
-
use super::*;
243
-
// use crate::mst::Entry;
244
-
245
-
fn cid1() -> Cid {
246
-
"bafyreihixenvk3ahqbytas4hk4a26w43bh6eo3w6usjqtxkpzsvi655a3m"
247
-
.parse()
248
-
.unwrap()
249
-
}
250
-
// fn cid2() -> Cid {
251
-
// "QmY7Yh4UquoXHLPFo2XbhXkhBvFoPwmQUSa92pxnxjQuPU"
252
-
// .parse()
253
-
// .unwrap()
254
-
// }
255
-
// fn cid3() -> Cid {
256
-
// "bafybeigdyrzt5sfp7udm7hu76uh7y26nf3efuylqabf3oclgtqy55fbzdi"
257
-
// .parse()
258
-
// .unwrap()
259
-
// }
260
-
// fn cid4() -> Cid {
261
-
// "QmbWqxBEKC3P8tqsKc98xmWNzrzDtRLMiMPL8wBuTGsMnR"
262
-
// .parse()
263
-
// .unwrap()
264
-
// }
265
-
// fn cid5() -> Cid {
266
-
// "QmSnuWmxptJZdLJpKRarxBMS2Ju2oANVrgbr2xWbie9b2D"
267
-
// .parse()
268
-
// .unwrap()
269
-
// }
270
-
// fn cid6() -> Cid {
271
-
// "QmdmQXB2mzChmMeKY47C43LxUdg1NDJ5MWcKMKxDu7RgQm"
272
-
// .parse()
273
-
// .unwrap()
274
-
// }
275
-
// fn cid7() -> Cid {
276
-
// "bafybeiaysi4s6lnjev27ln5icwm6tueaw2vdykrtjkwiphwekaywqhcjze"
277
-
// .parse()
278
-
// .unwrap()
279
-
// }
280
-
// fn cid8() -> Cid {
281
-
// "bafyreif3tfdpr5n4jdrbielmcapwvbpcthepfkwq2vwonmlhirbjmotedi"
282
-
// .parse()
283
-
// .unwrap()
284
-
// }
285
-
// fn cid9() -> Cid {
286
-
// "bafyreicnokmhmrnlp2wjhyk2haep4tqxiptwfrp2rrs7rzq7uk766chqvq"
287
-
// .parse()
288
-
// .unwrap()
289
-
// }
290
-
291
-
#[test]
292
-
fn test_next_from_node_empty() {
293
-
let node = Node {
294
-
left: None,
295
-
entries: vec![],
296
-
};
297
-
let mut stack = vec![];
298
-
push_from_node(&mut stack, &node).unwrap();
299
-
assert_eq!(stack.last(), None);
300
-
}
301
-
302
-
#[test]
303
-
fn test_needs_from_node_just_left() {
304
-
let node = Node {
305
-
left: Some(cid1()),
306
-
entries: vec![],
307
-
};
308
-
let mut stack = vec![];
309
-
push_from_node(&mut stack, &node).unwrap();
310
-
assert_eq!(stack.last(), Some(Need::Node(cid1())).as_ref());
311
-
}
312
-
313
-
// #[test]
314
-
// fn test_needs_from_node_just_one_record() {
315
-
// let node = Node {
316
-
// left: None,
317
-
// entries: vec![Entry {
318
-
// keysuffix: "asdf".into(),
319
-
// prefix_len: 0,
320
-
// value: cid1(),
321
-
// tree: None,
322
-
// }],
323
-
// };
324
-
// assert_eq!(
325
-
// needs_from_node(node).unwrap(),
326
-
// vec![Need::Record {
327
-
// rkey: "asdf".into(),
328
-
// cid: cid1(),
329
-
// },]
330
-
// );
331
-
// }
332
-
333
-
// #[test]
334
-
// fn test_needs_from_node_two_records() {
335
-
// let node = Node {
336
-
// left: None,
337
-
// entries: vec![
338
-
// Entry {
339
-
// keysuffix: "asdf".into(),
340
-
// prefix_len: 0,
341
-
// value: cid1(),
342
-
// tree: None,
343
-
// },
344
-
// Entry {
345
-
// keysuffix: "gh".into(),
346
-
// prefix_len: 2,
347
-
// value: cid2(),
348
-
// tree: None,
349
-
// },
350
-
// ],
351
-
// };
352
-
// assert_eq!(
353
-
// needs_from_node(node).unwrap(),
354
-
// vec![
355
-
// Need::Record {
356
-
// rkey: "asdf".into(),
357
-
// cid: cid1(),
358
-
// },
359
-
// Need::Record {
360
-
// rkey: "asgh".into(),
361
-
// cid: cid2(),
362
-
// },
363
-
// ]
364
-
// );
365
-
// }
366
-
367
-
// #[test]
368
-
// fn test_needs_from_node_with_both() {
369
-
// let node = Node {
370
-
// left: None,
371
-
// entries: vec![Entry {
372
-
// keysuffix: "asdf".into(),
373
-
// prefix_len: 0,
374
-
// value: cid1(),
375
-
// tree: Some(cid2()),
376
-
// }],
377
-
// };
378
-
// assert_eq!(
379
-
// needs_from_node(node).unwrap(),
380
-
// vec![
381
-
// Need::Record {
382
-
// rkey: "asdf".into(),
383
-
// cid: cid1(),
384
-
// },
385
-
// Need::Node(cid2()),
386
-
// ]
387
-
// );
388
-
// }
389
-
390
-
// #[test]
391
-
// fn test_needs_from_node_left_and_record() {
392
-
// let node = Node {
393
-
// left: Some(cid1()),
394
-
// entries: vec![Entry {
395
-
// keysuffix: "asdf".into(),
396
-
// prefix_len: 0,
397
-
// value: cid2(),
398
-
// tree: None,
399
-
// }],
400
-
// };
401
-
// assert_eq!(
402
-
// needs_from_node(node).unwrap(),
403
-
// vec![
404
-
// Need::Node(cid1()),
405
-
// Need::Record {
406
-
// rkey: "asdf".into(),
407
-
// cid: cid2(),
408
-
// },
409
-
// ]
410
-
// );
411
-
// }
412
-
413
-
// #[test]
414
-
// fn test_needs_from_full_node() {
415
-
// let node = Node {
416
-
// left: Some(cid1()),
417
-
// entries: vec![
418
-
// Entry {
419
-
// keysuffix: "asdf".into(),
420
-
// prefix_len: 0,
421
-
// value: cid2(),
422
-
// tree: Some(cid3()),
423
-
// },
424
-
// Entry {
425
-
// keysuffix: "ghi".into(),
426
-
// prefix_len: 1,
427
-
// value: cid4(),
428
-
// tree: Some(cid5()),
429
-
// },
430
-
// Entry {
431
-
// keysuffix: "jkl".into(),
432
-
// prefix_len: 2,
433
-
// value: cid6(),
434
-
// tree: Some(cid7()),
435
-
// },
436
-
// Entry {
437
-
// keysuffix: "mno".into(),
438
-
// prefix_len: 4,
439
-
// value: cid8(),
440
-
// tree: Some(cid9()),
441
-
// },
442
-
// ],
443
-
// };
444
-
// assert_eq!(
445
-
// needs_from_node(node).unwrap(),
446
-
// vec![
447
-
// Need::Node(cid1()),
448
-
// Need::Record {
449
-
// rkey: "asdf".into(),
450
-
// cid: cid2(),
451
-
// },
452
-
// Need::Node(cid3()),
453
-
// Need::Record {
454
-
// rkey: "aghi".into(),
455
-
// cid: cid4(),
456
-
// },
457
-
// Need::Node(cid5()),
458
-
// Need::Record {
459
-
// rkey: "agjkl".into(),
460
-
// cid: cid6(),
461
-
// },
462
-
// Need::Node(cid7()),
463
-
// Need::Record {
464
-
// rkey: "agjkmno".into(),
465
-
// cid: cid8(),
466
-
// },
467
-
// Need::Node(cid9()),
468
-
// ]
469
-
// );
470
-
// }
471
-
}
+34
-31
tests/non-huge-cars.rs
+34
-31
tests/non-huge-cars.rs
···
1
1
extern crate repo_stream;
2
-
use futures::TryStreamExt;
3
-
use iroh_car::CarReader;
4
-
use std::convert::Infallible;
2
+
use repo_stream::Driver;
5
3
4
+
const EMPTY_CAR: &'static [u8] = include_bytes!("../car-samples/empty.car");
6
5
const TINY_CAR: &'static [u8] = include_bytes!("../car-samples/tiny.car");
7
6
const LITTLE_CAR: &'static [u8] = include_bytes!("../car-samples/little.car");
8
7
const MIDSIZE_CAR: &'static [u8] = include_bytes!("../car-samples/midsize.car");
9
8
10
-
async fn test_car(bytes: &[u8], expected_records: usize, expected_sum: usize) {
11
-
let reader = CarReader::new(bytes).await.unwrap();
12
-
13
-
let root = reader
14
-
.header()
15
-
.roots()
16
-
.first()
17
-
.ok_or("missing root")
9
+
async fn test_car(
10
+
bytes: &[u8],
11
+
expected_records: usize,
12
+
expected_sum: usize,
13
+
expect_profile: bool,
14
+
) {
15
+
let mut driver = match Driver::load_car(bytes, |block| block.len(), 10 /* MiB */)
16
+
.await
18
17
.unwrap()
19
-
.clone();
20
-
21
-
let stream = std::pin::pin!(reader.stream());
22
-
23
-
let (_commit, v) =
24
-
repo_stream::drive::Vehicle::init(root, stream, |block| Ok::<_, Infallible>(block.len()))
25
-
.await
26
-
.unwrap();
27
-
let mut record_stream = std::pin::pin!(v.stream());
18
+
{
19
+
Driver::Memory(_commit, mem_driver) => mem_driver,
20
+
Driver::Disk(_) => panic!("too big"),
21
+
};
28
22
29
23
let mut records = 0;
30
24
let mut sum = 0;
31
25
let mut found_bsky_profile = false;
32
26
let mut prev_rkey = "".to_string();
33
-
while let Some((rkey, size)) = record_stream.try_next().await.unwrap() {
34
-
records += 1;
35
-
sum += size;
36
-
if rkey == "app.bsky.actor.profile/self" {
37
-
found_bsky_profile = true;
27
+
28
+
while let Some(pairs) = driver.next_chunk(256).await.unwrap() {
29
+
for (rkey, size) in pairs {
30
+
records += 1;
31
+
sum += size;
32
+
if rkey == "app.bsky.actor.profile/self" {
33
+
found_bsky_profile = true;
34
+
}
35
+
assert!(rkey > prev_rkey, "rkeys are streamed in order");
36
+
prev_rkey = rkey;
38
37
}
39
-
assert!(rkey > prev_rkey, "rkeys are streamed in order");
40
-
prev_rkey = rkey;
41
38
}
39
+
42
40
assert_eq!(records, expected_records);
43
41
assert_eq!(sum, expected_sum);
44
-
assert!(found_bsky_profile);
42
+
assert_eq!(found_bsky_profile, expect_profile);
43
+
}
44
+
45
+
#[tokio::test]
46
+
async fn test_empty_car() {
47
+
test_car(EMPTY_CAR, 0, 0, false).await
45
48
}
46
49
47
50
#[tokio::test]
48
51
async fn test_tiny_car() {
49
-
test_car(TINY_CAR, 8, 2071).await
52
+
test_car(TINY_CAR, 8, 2071, true).await
50
53
}
51
54
52
55
#[tokio::test]
53
56
async fn test_little_car() {
54
-
test_car(LITTLE_CAR, 278, 246960).await
57
+
test_car(LITTLE_CAR, 278, 246960, true).await
55
58
}
56
59
57
60
#[tokio::test]
58
61
async fn test_midsize_car() {
59
-
test_car(MIDSIZE_CAR, 11585, 3741393).await
62
+
test_car(MIDSIZE_CAR, 11585, 3741393, true).await
60
63
}