Fast and robust atproto CAR file processing in rust

Compare changes

Choose any two refs to compare.

Changed files
+379 -3223
benches
car-samples
examples
disk-read-file
read-file
src
tests
+129 -1655
Cargo.lock
··· 3 3 version = 4 4 4 5 5 [[package]] 6 - name = "addr2line" 7 - version = "0.25.1" 8 - source = "registry+https://github.com/rust-lang/crates.io-index" 9 - checksum = "1b5d307320b3181d6d7954e663bd7c774a838b8220fe0593c86d9fb09f498b4b" 10 - dependencies = [ 11 - "gimli", 12 - ] 13 - 14 - [[package]] 15 - name = "adler2" 16 - version = "2.0.1" 17 - source = "registry+https://github.com/rust-lang/crates.io-index" 18 - checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa" 19 - 20 - [[package]] 21 - name = "aho-corasick" 22 - version = "1.1.3" 23 - source = "registry+https://github.com/rust-lang/crates.io-index" 24 - checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" 25 - dependencies = [ 26 - "memchr", 27 - ] 28 - 29 - [[package]] 30 - name = "anes" 31 - version = "0.1.6" 32 - source = "registry+https://github.com/rust-lang/crates.io-index" 33 - checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" 34 - 35 - [[package]] 36 6 name = "anstream" 37 7 version = "0.6.21" 38 8 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 68 38 source = "registry+https://github.com/rust-lang/crates.io-index" 69 39 checksum = "9e231f6134f61b71076a3eab506c379d4f36122f2af15a9ff04415ea4c3339e2" 70 40 dependencies = [ 71 - "windows-sys 0.60.2", 41 + "windows-sys", 72 42 ] 73 43 74 44 [[package]] ··· 79 49 dependencies = [ 80 50 "anstyle", 81 51 "once_cell_polyfill", 82 - "windows-sys 0.60.2", 83 - ] 84 - 85 - [[package]] 86 - name = "anyhow" 87 - version = "1.0.100" 88 - source = "registry+https://github.com/rust-lang/crates.io-index" 89 - checksum = "a23eb6b1614318a8071c9b2521f36b424b2c83db5eb3a0fead4a6c0809af6e61" 90 - 91 - [[package]] 92 - name = "autocfg" 93 - version = "1.5.0" 94 - source = "registry+https://github.com/rust-lang/crates.io-index" 95 - checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" 96 - 97 - [[package]] 98 - name = "backtrace" 99 - version = "0.3.76" 100 - source = "registry+https://github.com/rust-lang/crates.io-index" 101 - checksum = "bb531853791a215d7c62a30daf0dde835f381ab5de4589cfe7c649d2cbe92bd6" 102 - dependencies = [ 103 - "addr2line", 104 - "cfg-if", 105 - "libc", 106 - "miniz_oxide", 107 - "object", 108 - "rustc-demangle", 109 - "windows-link", 110 - ] 111 - 112 - [[package]] 113 - name = "base-x" 114 - version = "0.2.11" 115 - source = "registry+https://github.com/rust-lang/crates.io-index" 116 - checksum = "4cbbc9d0964165b47557570cce6c952866c2678457aca742aafc9fb771d30270" 117 - 118 - [[package]] 119 - name = "base256emoji" 120 - version = "1.0.2" 121 - source = "registry+https://github.com/rust-lang/crates.io-index" 122 - checksum = "b5e9430d9a245a77c92176e649af6e275f20839a48389859d1661e9a128d077c" 123 - dependencies = [ 124 - "const-str", 125 - "match-lookup", 126 - ] 127 - 128 - [[package]] 129 - name = "bincode" 130 - version = "1.3.3" 131 - source = "registry+https://github.com/rust-lang/crates.io-index" 132 - checksum = "b1f45e9417d87227c7a56d22e471c6206462cba514c7590c09aff4cf6d1ddcad" 133 - dependencies = [ 134 - "serde", 135 - ] 136 - 137 - [[package]] 138 - name = "bincode" 139 - version = "2.0.1" 140 - source = "registry+https://github.com/rust-lang/crates.io-index" 141 - checksum = "36eaf5d7b090263e8150820482d5d93cd964a81e4019913c972f4edcc6edb740" 142 - dependencies = [ 143 - "bincode_derive", 144 - "serde", 145 - "unty", 146 - ] 147 - 148 - [[package]] 149 - name = "bincode_derive" 150 - version = "2.0.1" 151 - source = "registry+https://github.com/rust-lang/crates.io-index" 152 - checksum = "bf95709a440f45e986983918d0e8a1f30a9b1df04918fc828670606804ac3c09" 153 - dependencies = [ 154 - "virtue", 52 + "windows-sys", 155 53 ] 156 54 157 55 [[package]] ··· 159 57 version = "2.9.4" 160 58 source = "registry+https://github.com/rust-lang/crates.io-index" 161 59 checksum = "2261d10cca569e4643e526d8dc2e62e433cc8aba21ab764233731f8d369bf394" 162 - dependencies = [ 163 - "serde", 164 - ] 165 60 166 61 [[package]] 167 - name = "bumpalo" 168 - version = "3.19.0" 62 + name = "byteorder-lite" 63 + version = "0.1.0" 169 64 source = "registry+https://github.com/rust-lang/crates.io-index" 170 - checksum = "46c5e41b57b8bba42a04676d81cb89e9ee8e859a1a66f80a5a72e1cb76b34d43" 171 - 172 - [[package]] 173 - name = "byteorder" 174 - version = "1.5.0" 175 - source = "registry+https://github.com/rust-lang/crates.io-index" 176 - checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" 177 - 178 - [[package]] 179 - name = "bytes" 180 - version = "1.10.1" 181 - source = "registry+https://github.com/rust-lang/crates.io-index" 182 - checksum = "d71b6127be86fdcfddb610f7182ac57211d4b18a3e9c82eb2d17662f2227ad6a" 183 - 184 - [[package]] 185 - name = "cast" 186 - version = "0.3.0" 187 - source = "registry+https://github.com/rust-lang/crates.io-index" 188 - checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" 189 - 190 - [[package]] 191 - name = "cbor4ii" 192 - version = "0.2.14" 193 - source = "registry+https://github.com/rust-lang/crates.io-index" 194 - checksum = "b544cf8c89359205f4f990d0e6f3828db42df85b5dac95d09157a250eb0749c4" 195 - dependencies = [ 196 - "serde", 197 - ] 65 + checksum = "8f1fe948ff07f4bd06c30984e69f5b4899c516a3ef74f34df92a2df2ab535495" 198 66 199 67 [[package]] 200 - name = "cc" 201 - version = "1.2.44" 68 + name = "byteview" 69 + version = "0.10.0" 202 70 source = "registry+https://github.com/rust-lang/crates.io-index" 203 - checksum = "37521ac7aabe3d13122dc382493e20c9416f299d2ccd5b3a5340a2570cdeb0f3" 204 - dependencies = [ 205 - "find-msvc-tools", 206 - "shlex", 207 - ] 71 + checksum = "dda4398f387cc6395a3e93b3867cd9abda914c97a0b344d1eefb2e5c51785fca" 208 72 209 73 [[package]] 210 74 name = "cfg-if" ··· 213 77 checksum = "2fd1289c04a9ea8cb22300a459a72a385d7c73d3259e2ed7dcb2af674838cfa9" 214 78 215 79 [[package]] 216 - name = "ciborium" 217 - version = "0.2.2" 218 - source = "registry+https://github.com/rust-lang/crates.io-index" 219 - checksum = "42e69ffd6f0917f5c029256a24d0161db17cea3997d185db0d35926308770f0e" 220 - dependencies = [ 221 - "ciborium-io", 222 - "ciborium-ll", 223 - "serde", 224 - ] 225 - 226 - [[package]] 227 - name = "ciborium-io" 228 - version = "0.2.2" 229 - source = "registry+https://github.com/rust-lang/crates.io-index" 230 - checksum = "05afea1e0a06c9be33d539b876f1ce3692f4afea2cb41f740e7743225ed1c757" 231 - 232 - [[package]] 233 - name = "ciborium-ll" 234 - version = "0.2.2" 235 - source = "registry+https://github.com/rust-lang/crates.io-index" 236 - checksum = "57663b653d948a338bfb3eeba9bb2fd5fcfaecb9e199e87e1eda4d9e8b240fd9" 237 - dependencies = [ 238 - "ciborium-io", 239 - "half", 240 - ] 241 - 242 - [[package]] 243 - name = "cid" 244 - version = "0.11.1" 245 - source = "registry+https://github.com/rust-lang/crates.io-index" 246 - checksum = "3147d8272e8fa0ccd29ce51194dd98f79ddfb8191ba9e3409884e751798acf3a" 247 - dependencies = [ 248 - "core2", 249 - "multibase", 250 - "multihash", 251 - "serde", 252 - "serde_bytes", 253 - "unsigned-varint 0.8.0", 254 - ] 255 - 256 - [[package]] 257 80 name = "clap" 258 81 version = "4.5.48" 259 82 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 284 107 "heck", 285 108 "proc-macro2", 286 109 "quote", 287 - "syn 2.0.106", 110 + "syn", 288 111 ] 289 112 290 113 [[package]] ··· 300 123 checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75" 301 124 302 125 [[package]] 303 - name = "const-str" 304 - version = "0.4.3" 305 - source = "registry+https://github.com/rust-lang/crates.io-index" 306 - checksum = "2f421161cb492475f1661ddc9815a745a1c894592070661180fdec3d4872e9c3" 307 - 308 - [[package]] 309 - name = "core2" 310 - version = "0.4.0" 311 - source = "registry+https://github.com/rust-lang/crates.io-index" 312 - checksum = "b49ba7ef1ad6107f8824dbe97de947cbaac53c44e7f9756a1fba0d37c1eec505" 313 - dependencies = [ 314 - "memchr", 315 - ] 316 - 317 - [[package]] 318 - name = "criterion" 319 - version = "0.7.0" 320 - source = "registry+https://github.com/rust-lang/crates.io-index" 321 - checksum = "e1c047a62b0cc3e145fa84415a3191f628e980b194c2755aa12300a4e6cbd928" 322 - dependencies = [ 323 - "anes", 324 - "cast", 325 - "ciborium", 326 - "clap", 327 - "criterion-plot", 328 - "itertools", 329 - "num-traits", 330 - "oorandom", 331 - "plotters", 332 - "rayon", 333 - "regex", 334 - "serde", 335 - "serde_json", 336 - "tinytemplate", 337 - "tokio", 338 - "walkdir", 339 - ] 340 - 341 - [[package]] 342 - name = "criterion-plot" 343 - version = "0.6.0" 344 - source = "registry+https://github.com/rust-lang/crates.io-index" 345 - checksum = "9b1bcc0dc7dfae599d84ad0b1a55f80cde8af3725da8313b528da95ef783e338" 346 - dependencies = [ 347 - "cast", 348 - "itertools", 349 - ] 350 - 351 - [[package]] 352 - name = "crossbeam-deque" 353 - version = "0.8.6" 126 + name = "compare" 127 + version = "0.0.6" 354 128 source = "registry+https://github.com/rust-lang/crates.io-index" 355 - checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51" 356 - dependencies = [ 357 - "crossbeam-epoch", 358 - "crossbeam-utils", 359 - ] 129 + checksum = "ea0095f6103c2a8b44acd6fd15960c801dafebf02e21940360833e0673f48ba7" 360 130 361 131 [[package]] 362 132 name = "crossbeam-epoch" ··· 368 138 ] 369 139 370 140 [[package]] 371 - name = "crossbeam-queue" 372 - version = "0.3.12" 141 + name = "crossbeam-skiplist" 142 + version = "0.1.3" 373 143 source = "registry+https://github.com/rust-lang/crates.io-index" 374 - checksum = "0f58bbc28f91df819d0aa2a2c00cd19754769c2fad90579b3592b1c9ba7a3115" 144 + checksum = "df29de440c58ca2cc6e587ec3d22347551a32435fbde9d2bff64e78a9ffa151b" 375 145 dependencies = [ 146 + "crossbeam-epoch", 376 147 "crossbeam-utils", 377 148 ] 378 149 ··· 383 154 checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" 384 155 385 156 [[package]] 386 - name = "crunchy" 387 - version = "0.2.4" 388 - source = "registry+https://github.com/rust-lang/crates.io-index" 389 - checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5" 390 - 391 - [[package]] 392 - name = "data-encoding" 393 - version = "2.9.0" 394 - source = "registry+https://github.com/rust-lang/crates.io-index" 395 - checksum = "2a2330da5de22e8a3cb63252ce2abb30116bf5265e89c0e01bc17015ce30a476" 396 - 397 - [[package]] 398 - name = "data-encoding-macro" 399 - version = "0.1.18" 400 - source = "registry+https://github.com/rust-lang/crates.io-index" 401 - checksum = "47ce6c96ea0102f01122a185683611bd5ac8d99e62bc59dd12e6bda344ee673d" 402 - dependencies = [ 403 - "data-encoding", 404 - "data-encoding-macro-internal", 405 - ] 406 - 407 - [[package]] 408 - name = "data-encoding-macro-internal" 409 - version = "0.1.16" 157 + name = "dashmap" 158 + version = "6.1.0" 410 159 source = "registry+https://github.com/rust-lang/crates.io-index" 411 - checksum = "8d162beedaa69905488a8da94f5ac3edb4dd4788b732fadb7bd120b2625c1976" 160 + checksum = "5041cc499144891f3790297212f32a74fb938e5136a14943f338ef9e0ae276cf" 412 161 dependencies = [ 413 - "data-encoding", 414 - "syn 2.0.106", 162 + "cfg-if", 163 + "crossbeam-utils", 164 + "hashbrown 0.14.5", 165 + "lock_api", 166 + "once_cell", 167 + "parking_lot_core", 415 168 ] 416 169 417 170 [[package]] 418 - name = "displaydoc" 419 - version = "0.2.5" 171 + name = "enum_dispatch" 172 + version = "0.3.13" 420 173 source = "registry+https://github.com/rust-lang/crates.io-index" 421 - checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" 174 + checksum = "aa18ce2bc66555b3218614519ac839ddb759a7d6720732f979ef8d13be147ecd" 422 175 dependencies = [ 176 + "once_cell", 423 177 "proc-macro2", 424 178 "quote", 425 - "syn 2.0.106", 426 - ] 427 - 428 - [[package]] 429 - name = "doxygen-rs" 430 - version = "0.4.2" 431 - source = "registry+https://github.com/rust-lang/crates.io-index" 432 - checksum = "415b6ec780d34dcf624666747194393603d0373b7141eef01d12ee58881507d9" 433 - dependencies = [ 434 - "phf", 179 + "syn", 435 180 ] 436 181 437 182 [[package]] 438 - name = "either" 439 - version = "1.15.0" 440 - source = "registry+https://github.com/rust-lang/crates.io-index" 441 - checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" 442 - 443 - [[package]] 444 - name = "env_filter" 445 - version = "0.1.3" 183 + name = "equivalent" 184 + version = "1.0.2" 446 185 source = "registry+https://github.com/rust-lang/crates.io-index" 447 - checksum = "186e05a59d4c50738528153b83b0b0194d3a29507dfec16eccd4b342903397d0" 448 - dependencies = [ 449 - "log", 450 - "regex", 451 - ] 452 - 453 - [[package]] 454 - name = "env_logger" 455 - version = "0.11.8" 456 - source = "registry+https://github.com/rust-lang/crates.io-index" 457 - checksum = "13c863f0904021b108aa8b2f55046443e6b1ebde8fd4a15c399893aae4fa069f" 458 - dependencies = [ 459 - "anstream", 460 - "anstyle", 461 - "env_filter", 462 - "jiff", 463 - "log", 464 - ] 186 + checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" 465 187 466 188 [[package]] 467 189 name = "errno" ··· 470 192 checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" 471 193 dependencies = [ 472 194 "libc", 473 - "windows-sys 0.60.2", 195 + "windows-sys", 474 196 ] 475 197 476 198 [[package]] 477 - name = "fallible-iterator" 478 - version = "0.3.0" 479 - source = "registry+https://github.com/rust-lang/crates.io-index" 480 - checksum = "2acce4a10f12dc2fb14a218589d4f1f62ef011b2d0cc4b3cb1bba8e94da14649" 481 - 482 - [[package]] 483 - name = "fallible-streaming-iterator" 484 - version = "0.1.9" 485 - source = "registry+https://github.com/rust-lang/crates.io-index" 486 - checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a" 487 - 488 - [[package]] 489 199 name = "fastrand" 490 200 version = "2.3.0" 491 201 source = "registry+https://github.com/rust-lang/crates.io-index" 492 202 checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" 493 203 494 204 [[package]] 495 - name = "find-msvc-tools" 496 - version = "0.1.4" 497 - source = "registry+https://github.com/rust-lang/crates.io-index" 498 - checksum = "52051878f80a721bb68ebfbc930e07b65ba72f2da88968ea5c06fd6ca3d3a127" 499 - 500 - [[package]] 501 - name = "foldhash" 502 - version = "0.1.5" 503 - source = "registry+https://github.com/rust-lang/crates.io-index" 504 - checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" 505 - 506 - [[package]] 507 - name = "form_urlencoded" 508 - version = "1.2.2" 509 - source = "registry+https://github.com/rust-lang/crates.io-index" 510 - checksum = "cb4cb245038516f5f85277875cdaa4f7d2c9a0fa0468de06ed190163b1581fcf" 511 - dependencies = [ 512 - "percent-encoding", 513 - ] 514 - 515 - [[package]] 516 - name = "futures" 517 - version = "0.3.31" 518 - source = "registry+https://github.com/rust-lang/crates.io-index" 519 - checksum = "65bc07b1a8bc7c85c5f2e110c476c7389b4554ba72af57d8445ea63a576b0876" 520 - dependencies = [ 521 - "futures-channel", 522 - "futures-core", 523 - "futures-executor", 524 - "futures-io", 525 - "futures-sink", 526 - "futures-task", 527 - "futures-util", 528 - ] 529 - 530 - [[package]] 531 - name = "futures-channel" 532 - version = "0.3.31" 533 - source = "registry+https://github.com/rust-lang/crates.io-index" 534 - checksum = "2dff15bf788c671c1934e366d07e30c1814a8ef514e1af724a602e8a2fbe1b10" 535 - dependencies = [ 536 - "futures-core", 537 - "futures-sink", 538 - ] 539 - 540 - [[package]] 541 - name = "futures-core" 542 - version = "0.3.31" 543 - source = "registry+https://github.com/rust-lang/crates.io-index" 544 - checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e" 545 - 546 - [[package]] 547 - name = "futures-executor" 548 - version = "0.3.31" 549 - source = "registry+https://github.com/rust-lang/crates.io-index" 550 - checksum = "1e28d1d997f585e54aebc3f97d39e72338912123a67330d723fdbb564d646c9f" 551 - dependencies = [ 552 - "futures-core", 553 - "futures-task", 554 - "futures-util", 555 - ] 556 - 557 - [[package]] 558 - name = "futures-io" 559 - version = "0.3.31" 560 - source = "registry+https://github.com/rust-lang/crates.io-index" 561 - checksum = "9e5c1b78ca4aae1ac06c48a526a655760685149f0d465d21f37abfe57ce075c6" 562 - 563 - [[package]] 564 - name = "futures-macro" 565 - version = "0.3.31" 566 - source = "registry+https://github.com/rust-lang/crates.io-index" 567 - checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650" 568 - dependencies = [ 569 - "proc-macro2", 570 - "quote", 571 - "syn 2.0.106", 572 - ] 573 - 574 - [[package]] 575 - name = "futures-sink" 576 - version = "0.3.31" 577 - source = "registry+https://github.com/rust-lang/crates.io-index" 578 - checksum = "e575fab7d1e0dcb8d0c7bcf9a63ee213816ab51902e6d244a95819acacf1d4f7" 579 - 580 - [[package]] 581 - name = "futures-task" 582 - version = "0.3.31" 583 - source = "registry+https://github.com/rust-lang/crates.io-index" 584 - checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988" 585 - 586 - [[package]] 587 - name = "futures-util" 588 - version = "0.3.31" 205 + name = "fjall" 206 + version = "3.0.1" 589 207 source = "registry+https://github.com/rust-lang/crates.io-index" 590 - checksum = "9fa08315bb612088cc391249efdc3bc77536f16c91f6cf495e6fbe85b20a4a81" 208 + checksum = "4f69637c02d38ad1b0f003101d0195a60368130aa17d9ef78b1557d265a22093" 591 209 dependencies = [ 592 - "futures-channel", 593 - "futures-core", 594 - "futures-io", 595 - "futures-macro", 596 - "futures-sink", 597 - "futures-task", 598 - "memchr", 599 - "pin-project-lite", 600 - "pin-utils", 601 - "slab", 210 + "byteorder-lite", 211 + "byteview", 212 + "dashmap", 213 + "flume", 214 + "log", 215 + "lsm-tree", 216 + "lz4_flex", 217 + "tempfile", 218 + "xxhash-rust", 602 219 ] 603 220 604 221 [[package]] 605 - name = "getrandom" 606 - version = "0.2.16" 222 + name = "flume" 223 + version = "0.12.0" 607 224 source = "registry+https://github.com/rust-lang/crates.io-index" 608 - checksum = "335ff9f135e4384c8150d6f27c6daed433577f86b4750418338c01a1a2528592" 225 + checksum = "5e139bc46ca777eb5efaf62df0ab8cc5fd400866427e56c68b22e414e53bd3be" 609 226 dependencies = [ 610 - "cfg-if", 611 - "libc", 612 - "wasi 0.11.1+wasi-snapshot-preview1", 227 + "spin", 613 228 ] 614 229 615 230 [[package]] ··· 621 236 "cfg-if", 622 237 "libc", 623 238 "r-efi", 624 - "wasi 0.14.7+wasi-0.2.4", 625 - ] 626 - 627 - [[package]] 628 - name = "gimli" 629 - version = "0.32.3" 630 - source = "registry+https://github.com/rust-lang/crates.io-index" 631 - checksum = "e629b9b98ef3dd8afe6ca2bd0f89306cec16d43d907889945bc5d6687f2f13c7" 632 - 633 - [[package]] 634 - name = "half" 635 - version = "2.7.0" 636 - source = "registry+https://github.com/rust-lang/crates.io-index" 637 - checksum = "e54c115d4f30f52c67202f079c5f9d8b49db4691f460fdb0b4c2e838261b2ba5" 638 - dependencies = [ 639 - "cfg-if", 640 - "crunchy", 641 - "zerocopy", 239 + "wasi", 642 240 ] 643 241 644 242 [[package]] 645 243 name = "hashbrown" 646 - version = "0.15.5" 244 + version = "0.14.5" 647 245 source = "registry+https://github.com/rust-lang/crates.io-index" 648 - checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1" 649 - dependencies = [ 650 - "foldhash", 651 - ] 246 + checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" 652 247 653 248 [[package]] 654 - name = "hashlink" 655 - version = "0.10.0" 249 + name = "hashbrown" 250 + version = "0.16.1" 656 251 source = "registry+https://github.com/rust-lang/crates.io-index" 657 - checksum = "7382cf6263419f2d8df38c55d7da83da5c18aef87fc7a7fc1fb1e344edfe14c1" 658 - dependencies = [ 659 - "hashbrown", 660 - ] 252 + checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100" 661 253 662 254 [[package]] 663 255 name = "heck" ··· 666 258 checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" 667 259 668 260 [[package]] 669 - name = "heed" 670 - version = "0.22.0" 671 - source = "registry+https://github.com/rust-lang/crates.io-index" 672 - checksum = "6a56c94661ddfb51aa9cdfbf102cfcc340aa69267f95ebccc4af08d7c530d393" 673 - dependencies = [ 674 - "bitflags", 675 - "byteorder", 676 - "heed-traits", 677 - "heed-types", 678 - "libc", 679 - "lmdb-master-sys", 680 - "once_cell", 681 - "page_size", 682 - "serde", 683 - "synchronoise", 684 - "url", 685 - ] 686 - 687 - [[package]] 688 - name = "heed-traits" 689 - version = "0.20.0" 690 - source = "registry+https://github.com/rust-lang/crates.io-index" 691 - checksum = "eb3130048d404c57ce5a1ac61a903696e8fcde7e8c2991e9fcfc1f27c3ef74ff" 692 - 693 - [[package]] 694 - name = "heed-types" 695 - version = "0.21.0" 696 - source = "registry+https://github.com/rust-lang/crates.io-index" 697 - checksum = "13c255bdf46e07fb840d120a36dcc81f385140d7191c76a7391672675c01a55d" 698 - dependencies = [ 699 - "bincode 1.3.3", 700 - "byteorder", 701 - "heed-traits", 702 - "serde", 703 - "serde_json", 704 - ] 705 - 706 - [[package]] 707 - name = "icu_collections" 708 - version = "2.1.1" 709 - source = "registry+https://github.com/rust-lang/crates.io-index" 710 - checksum = "4c6b649701667bbe825c3b7e6388cb521c23d88644678e83c0c4d0a621a34b43" 711 - dependencies = [ 712 - "displaydoc", 713 - "potential_utf", 714 - "yoke", 715 - "zerofrom", 716 - "zerovec", 717 - ] 718 - 719 - [[package]] 720 - name = "icu_locale_core" 721 - version = "2.1.1" 722 - source = "registry+https://github.com/rust-lang/crates.io-index" 723 - checksum = "edba7861004dd3714265b4db54a3c390e880ab658fec5f7db895fae2046b5bb6" 724 - dependencies = [ 725 - "displaydoc", 726 - "litemap", 727 - "tinystr", 728 - "writeable", 729 - "zerovec", 730 - ] 731 - 732 - [[package]] 733 - name = "icu_normalizer" 734 - version = "2.1.1" 735 - source = "registry+https://github.com/rust-lang/crates.io-index" 736 - checksum = "5f6c8828b67bf8908d82127b2054ea1b4427ff0230ee9141c54251934ab1b599" 737 - dependencies = [ 738 - "icu_collections", 739 - "icu_normalizer_data", 740 - "icu_properties", 741 - "icu_provider", 742 - "smallvec", 743 - "zerovec", 744 - ] 745 - 746 - [[package]] 747 - name = "icu_normalizer_data" 748 - version = "2.1.1" 749 - source = "registry+https://github.com/rust-lang/crates.io-index" 750 - checksum = "7aedcccd01fc5fe81e6b489c15b247b8b0690feb23304303a9e560f37efc560a" 751 - 752 - [[package]] 753 - name = "icu_properties" 754 - version = "2.1.1" 755 - source = "registry+https://github.com/rust-lang/crates.io-index" 756 - checksum = "e93fcd3157766c0c8da2f8cff6ce651a31f0810eaa1c51ec363ef790bbb5fb99" 757 - dependencies = [ 758 - "icu_collections", 759 - "icu_locale_core", 760 - "icu_properties_data", 761 - "icu_provider", 762 - "zerotrie", 763 - "zerovec", 764 - ] 765 - 766 - [[package]] 767 - name = "icu_properties_data" 768 - version = "2.1.1" 769 - source = "registry+https://github.com/rust-lang/crates.io-index" 770 - checksum = "02845b3647bb045f1100ecd6480ff52f34c35f82d9880e029d329c21d1054899" 771 - 772 - [[package]] 773 - name = "icu_provider" 774 - version = "2.1.1" 775 - source = "registry+https://github.com/rust-lang/crates.io-index" 776 - checksum = "85962cf0ce02e1e0a629cc34e7ca3e373ce20dda4c4d7294bbd0bf1fdb59e614" 777 - dependencies = [ 778 - "displaydoc", 779 - "icu_locale_core", 780 - "writeable", 781 - "yoke", 782 - "zerofrom", 783 - "zerotrie", 784 - "zerovec", 785 - ] 786 - 787 - [[package]] 788 - name = "idna" 789 - version = "1.1.0" 790 - source = "registry+https://github.com/rust-lang/crates.io-index" 791 - checksum = "3b0875f23caa03898994f6ddc501886a45c7d3d62d04d2d90788d47be1b1e4de" 792 - dependencies = [ 793 - "idna_adapter", 794 - "smallvec", 795 - "utf8_iter", 796 - ] 797 - 798 - [[package]] 799 - name = "idna_adapter" 800 - version = "1.2.1" 801 - source = "registry+https://github.com/rust-lang/crates.io-index" 802 - checksum = "3acae9609540aa318d1bc588455225fb2085b9ed0c4f6bd0d9d5bcd86f1a0344" 803 - dependencies = [ 804 - "icu_normalizer", 805 - "icu_properties", 806 - ] 807 - 808 - [[package]] 809 - name = "io-uring" 810 - version = "0.7.10" 811 - source = "registry+https://github.com/rust-lang/crates.io-index" 812 - checksum = "046fa2d4d00aea763528b4950358d0ead425372445dc8ff86312b3c69ff7727b" 813 - dependencies = [ 814 - "bitflags", 815 - "cfg-if", 816 - "libc", 817 - ] 818 - 819 - [[package]] 820 - name = "ipld-core" 821 - version = "0.4.2" 822 - source = "registry+https://github.com/rust-lang/crates.io-index" 823 - checksum = "104718b1cc124d92a6d01ca9c9258a7df311405debb3408c445a36452f9bf8db" 824 - dependencies = [ 825 - "cid", 826 - "serde", 827 - "serde_bytes", 828 - ] 829 - 830 - [[package]] 831 - name = "iroh-car" 832 - version = "0.5.1" 261 + name = "interval-heap" 262 + version = "0.0.5" 833 263 source = "registry+https://github.com/rust-lang/crates.io-index" 834 - checksum = "cb7f8cd4cb9aa083fba8b52e921764252d0b4dcb1cd6d120b809dbfe1106e81a" 264 + checksum = "11274e5e8e89b8607cfedc2910b6626e998779b48a019151c7604d0adcb86ac6" 835 265 dependencies = [ 836 - "anyhow", 837 - "cid", 838 - "futures", 839 - "serde", 840 - "serde_ipld_dagcbor", 841 - "thiserror 1.0.69", 842 - "tokio", 843 - "unsigned-varint 0.7.2", 266 + "compare", 844 267 ] 845 268 846 269 [[package]] ··· 850 273 checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" 851 274 852 275 [[package]] 853 - name = "itertools" 854 - version = "0.13.0" 855 - source = "registry+https://github.com/rust-lang/crates.io-index" 856 - checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186" 857 - dependencies = [ 858 - "either", 859 - ] 860 - 861 - [[package]] 862 - name = "itoa" 863 - version = "1.0.15" 864 - source = "registry+https://github.com/rust-lang/crates.io-index" 865 - checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" 866 - 867 - [[package]] 868 - name = "jiff" 869 - version = "0.2.15" 870 - source = "registry+https://github.com/rust-lang/crates.io-index" 871 - checksum = "be1f93b8b1eb69c77f24bbb0afdf66f54b632ee39af40ca21c4365a1d7347e49" 872 - dependencies = [ 873 - "jiff-static", 874 - "log", 875 - "portable-atomic", 876 - "portable-atomic-util", 877 - "serde", 878 - ] 879 - 880 - [[package]] 881 - name = "jiff-static" 882 - version = "0.2.15" 883 - source = "registry+https://github.com/rust-lang/crates.io-index" 884 - checksum = "03343451ff899767262ec32146f6d559dd759fdadf42ff0e227c7c48f72594b4" 885 - dependencies = [ 886 - "proc-macro2", 887 - "quote", 888 - "syn 2.0.106", 889 - ] 890 - 891 - [[package]] 892 - name = "js-sys" 893 - version = "0.3.81" 894 - source = "registry+https://github.com/rust-lang/crates.io-index" 895 - checksum = "ec48937a97411dcb524a265206ccd4c90bb711fca92b2792c407f268825b9305" 896 - dependencies = [ 897 - "once_cell", 898 - "wasm-bindgen", 899 - ] 900 - 901 - [[package]] 902 276 name = "libc" 903 277 version = "0.2.176" 904 278 source = "registry+https://github.com/rust-lang/crates.io-index" 905 279 checksum = "58f929b4d672ea937a23a1ab494143d968337a5f47e56d0815df1e0890ddf174" 906 280 907 281 [[package]] 908 - name = "libsqlite3-sys" 909 - version = "0.35.0" 910 - source = "registry+https://github.com/rust-lang/crates.io-index" 911 - checksum = "133c182a6a2c87864fe97778797e46c7e999672690dc9fa3ee8e241aa4a9c13f" 912 - dependencies = [ 913 - "pkg-config", 914 - "vcpkg", 915 - ] 916 - 917 - [[package]] 918 282 name = "linux-raw-sys" 919 283 version = "0.11.0" 920 284 source = "registry+https://github.com/rust-lang/crates.io-index" 921 285 checksum = "df1d3c3b53da64cf5760482273a98e575c651a67eec7f77df96b5b642de8f039" 922 286 923 287 [[package]] 924 - name = "litemap" 925 - version = "0.8.1" 926 - source = "registry+https://github.com/rust-lang/crates.io-index" 927 - checksum = "6373607a59f0be73a39b6fe456b8192fcc3585f602af20751600e974dd455e77" 928 - 929 - [[package]] 930 - name = "lmdb-master-sys" 931 - version = "0.2.5" 932 - source = "registry+https://github.com/rust-lang/crates.io-index" 933 - checksum = "864808e0b19fb6dd3b70ba94ee671b82fce17554cf80aeb0a155c65bb08027df" 934 - dependencies = [ 935 - "cc", 936 - "doxygen-rs", 937 - "libc", 938 - ] 939 - 940 - [[package]] 941 288 name = "lock_api" 942 289 version = "0.4.14" 943 290 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 953 300 checksum = "34080505efa8e45a4b816c349525ebe327ceaa8559756f0356cba97ef3bf7432" 954 301 955 302 [[package]] 956 - name = "match-lookup" 957 - version = "0.1.1" 303 + name = "lsm-tree" 304 + version = "3.0.1" 958 305 source = "registry+https://github.com/rust-lang/crates.io-index" 959 - checksum = "1265724d8cb29dbbc2b0f06fffb8bf1a8c0cf73a78eede9ba73a4a66c52a981e" 306 + checksum = "b875f1dfe14f557f805b167fb9b0fc54c5560c7a4bd6ae02535b2846f276a8cb" 960 307 dependencies = [ 961 - "proc-macro2", 962 - "quote", 963 - "syn 1.0.109", 964 - ] 965 - 966 - [[package]] 967 - name = "memchr" 968 - version = "2.7.6" 969 - source = "registry+https://github.com/rust-lang/crates.io-index" 970 - checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273" 971 - 972 - [[package]] 973 - name = "miniz_oxide" 974 - version = "0.8.9" 975 - source = "registry+https://github.com/rust-lang/crates.io-index" 976 - checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316" 977 - dependencies = [ 978 - "adler2", 979 - ] 980 - 981 - [[package]] 982 - name = "mio" 983 - version = "1.0.4" 984 - source = "registry+https://github.com/rust-lang/crates.io-index" 985 - checksum = "78bed444cc8a2160f01cbcf811ef18cac863ad68ae8ca62092e8db51d51c761c" 986 - dependencies = [ 987 - "libc", 988 - "wasi 0.11.1+wasi-snapshot-preview1", 989 - "windows-sys 0.59.0", 308 + "byteorder-lite", 309 + "byteview", 310 + "crossbeam-skiplist", 311 + "enum_dispatch", 312 + "interval-heap", 313 + "log", 314 + "lz4_flex", 315 + "quick_cache", 316 + "rustc-hash", 317 + "self_cell", 318 + "sfa", 319 + "tempfile", 320 + "varint-rs", 321 + "xxhash-rust", 990 322 ] 991 323 992 324 [[package]] 993 - name = "multibase" 994 - version = "0.9.2" 325 + name = "lz4_flex" 326 + version = "0.11.5" 995 327 source = "registry+https://github.com/rust-lang/crates.io-index" 996 - checksum = "8694bb4835f452b0e3bb06dbebb1d6fc5385b6ca1caf2e55fd165c042390ec77" 328 + checksum = "08ab2867e3eeeca90e844d1940eab391c9dc5228783db2ed999acbc0a9ed375a" 997 329 dependencies = [ 998 - "base-x", 999 - "base256emoji", 1000 - "data-encoding", 1001 - "data-encoding-macro", 1002 - ] 1003 - 1004 - [[package]] 1005 - name = "multihash" 1006 - version = "0.19.3" 1007 - source = "registry+https://github.com/rust-lang/crates.io-index" 1008 - checksum = "6b430e7953c29dd6a09afc29ff0bb69c6e306329ee6794700aee27b76a1aea8d" 1009 - dependencies = [ 1010 - "core2", 1011 - "serde", 1012 - "unsigned-varint 0.8.0", 1013 - ] 1014 - 1015 - [[package]] 1016 - name = "num-traits" 1017 - version = "0.2.19" 1018 - source = "registry+https://github.com/rust-lang/crates.io-index" 1019 - checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" 1020 - dependencies = [ 1021 - "autocfg", 1022 - ] 1023 - 1024 - [[package]] 1025 - name = "object" 1026 - version = "0.37.3" 1027 - source = "registry+https://github.com/rust-lang/crates.io-index" 1028 - checksum = "ff76201f031d8863c38aa7f905eca4f53abbfa15f609db4277d44cd8938f33fe" 1029 - dependencies = [ 1030 - "memchr", 330 + "twox-hash", 1031 331 ] 1032 332 1033 333 [[package]] ··· 1043 343 checksum = "a4895175b425cb1f87721b59f0f286c2092bd4af812243672510e1ac53e2e0ad" 1044 344 1045 345 [[package]] 1046 - name = "oorandom" 1047 - version = "11.1.5" 1048 - source = "registry+https://github.com/rust-lang/crates.io-index" 1049 - checksum = "d6790f58c7ff633d8771f42965289203411a5e5c68388703c06e14f24770b41e" 1050 - 1051 - [[package]] 1052 - name = "page_size" 1053 - version = "0.6.0" 1054 - source = "registry+https://github.com/rust-lang/crates.io-index" 1055 - checksum = "30d5b2194ed13191c1999ae0704b7839fb18384fa22e49b57eeaa97d79ce40da" 1056 - dependencies = [ 1057 - "libc", 1058 - "winapi", 1059 - ] 1060 - 1061 - [[package]] 1062 - name = "parking_lot" 1063 - version = "0.12.5" 1064 - source = "registry+https://github.com/rust-lang/crates.io-index" 1065 - checksum = "93857453250e3077bd71ff98b6a65ea6621a19bb0f559a85248955ac12c45a1a" 1066 - dependencies = [ 1067 - "lock_api", 1068 - "parking_lot_core", 1069 - ] 1070 - 1071 - [[package]] 1072 346 name = "parking_lot_core" 1073 347 version = "0.9.12" 1074 348 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 1082 356 ] 1083 357 1084 358 [[package]] 1085 - name = "percent-encoding" 1086 - version = "2.3.2" 1087 - source = "registry+https://github.com/rust-lang/crates.io-index" 1088 - checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220" 1089 - 1090 - [[package]] 1091 - name = "phf" 1092 - version = "0.11.3" 1093 - source = "registry+https://github.com/rust-lang/crates.io-index" 1094 - checksum = "1fd6780a80ae0c52cc120a26a1a42c1ae51b247a253e4e06113d23d2c2edd078" 1095 - dependencies = [ 1096 - "phf_macros", 1097 - "phf_shared", 1098 - ] 1099 - 1100 - [[package]] 1101 - name = "phf_generator" 1102 - version = "0.11.3" 1103 - source = "registry+https://github.com/rust-lang/crates.io-index" 1104 - checksum = "3c80231409c20246a13fddb31776fb942c38553c51e871f8cbd687a4cfb5843d" 1105 - dependencies = [ 1106 - "phf_shared", 1107 - "rand", 1108 - ] 1109 - 1110 - [[package]] 1111 - name = "phf_macros" 1112 - version = "0.11.3" 1113 - source = "registry+https://github.com/rust-lang/crates.io-index" 1114 - checksum = "f84ac04429c13a7ff43785d75ad27569f2951ce0ffd30a3321230db2fc727216" 1115 - dependencies = [ 1116 - "phf_generator", 1117 - "phf_shared", 1118 - "proc-macro2", 1119 - "quote", 1120 - "syn 2.0.106", 1121 - ] 1122 - 1123 - [[package]] 1124 - name = "phf_shared" 1125 - version = "0.11.3" 1126 - source = "registry+https://github.com/rust-lang/crates.io-index" 1127 - checksum = "67eabc2ef2a60eb7faa00097bd1ffdb5bd28e62bf39990626a582201b7a754e5" 1128 - dependencies = [ 1129 - "siphasher", 1130 - ] 1131 - 1132 - [[package]] 1133 - name = "pin-project-lite" 1134 - version = "0.2.16" 1135 - source = "registry+https://github.com/rust-lang/crates.io-index" 1136 - checksum = "3b3cff922bd51709b605d9ead9aa71031d81447142d828eb4a6eba76fe619f9b" 1137 - 1138 - [[package]] 1139 - name = "pin-utils" 1140 - version = "0.1.0" 1141 - source = "registry+https://github.com/rust-lang/crates.io-index" 1142 - checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" 1143 - 1144 - [[package]] 1145 - name = "pkg-config" 1146 - version = "0.3.32" 1147 - source = "registry+https://github.com/rust-lang/crates.io-index" 1148 - checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c" 1149 - 1150 - [[package]] 1151 - name = "plotters" 1152 - version = "0.3.7" 1153 - source = "registry+https://github.com/rust-lang/crates.io-index" 1154 - checksum = "5aeb6f403d7a4911efb1e33402027fc44f29b5bf6def3effcc22d7bb75f2b747" 1155 - dependencies = [ 1156 - "num-traits", 1157 - "plotters-backend", 1158 - "plotters-svg", 1159 - "wasm-bindgen", 1160 - "web-sys", 1161 - ] 1162 - 1163 - [[package]] 1164 - name = "plotters-backend" 1165 - version = "0.3.7" 1166 - source = "registry+https://github.com/rust-lang/crates.io-index" 1167 - checksum = "df42e13c12958a16b3f7f4386b9ab1f3e7933914ecea48da7139435263a4172a" 1168 - 1169 - [[package]] 1170 - name = "plotters-svg" 1171 - version = "0.3.7" 359 + name = "proc-macro2" 360 + version = "1.0.101" 1172 361 source = "registry+https://github.com/rust-lang/crates.io-index" 1173 - checksum = "51bae2ac328883f7acdfea3d66a7c35751187f870bc81f94563733a154d7a670" 362 + checksum = "89ae43fd86e4158d6db51ad8e2b80f313af9cc74f5c0e03ccb87de09998732de" 1174 363 dependencies = [ 1175 - "plotters-backend", 364 + "unicode-ident", 1176 365 ] 1177 366 1178 367 [[package]] 1179 - name = "portable-atomic" 1180 - version = "1.11.1" 1181 - source = "registry+https://github.com/rust-lang/crates.io-index" 1182 - checksum = "f84267b20a16ea918e43c6a88433c2d54fa145c92a811b5b047ccbe153674483" 1183 - 1184 - [[package]] 1185 - name = "portable-atomic-util" 1186 - version = "0.2.4" 368 + name = "quick_cache" 369 + version = "0.6.18" 1187 370 source = "registry+https://github.com/rust-lang/crates.io-index" 1188 - checksum = "d8a2f0d8d040d7848a709caf78912debcc3f33ee4b3cac47d73d1e1069e83507" 371 + checksum = "7ada44a88ef953a3294f6eb55d2007ba44646015e18613d2f213016379203ef3" 1189 372 dependencies = [ 1190 - "portable-atomic", 1191 - ] 1192 - 1193 - [[package]] 1194 - name = "potential_utf" 1195 - version = "0.1.4" 1196 - source = "registry+https://github.com/rust-lang/crates.io-index" 1197 - checksum = "b73949432f5e2a09657003c25bca5e19a0e9c84f8058ca374f49e0ebe605af77" 1198 - dependencies = [ 1199 - "zerovec", 1200 - ] 1201 - 1202 - [[package]] 1203 - name = "ppv-lite86" 1204 - version = "0.2.21" 1205 - source = "registry+https://github.com/rust-lang/crates.io-index" 1206 - checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9" 1207 - dependencies = [ 1208 - "zerocopy", 1209 - ] 1210 - 1211 - [[package]] 1212 - name = "proc-macro2" 1213 - version = "1.0.101" 1214 - source = "registry+https://github.com/rust-lang/crates.io-index" 1215 - checksum = "89ae43fd86e4158d6db51ad8e2b80f313af9cc74f5c0e03ccb87de09998732de" 1216 - dependencies = [ 1217 - "unicode-ident", 373 + "equivalent", 374 + "hashbrown 0.16.1", 1218 375 ] 1219 376 1220 377 [[package]] ··· 1233 390 checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" 1234 391 1235 392 [[package]] 1236 - name = "rand" 1237 - version = "0.8.5" 1238 - source = "registry+https://github.com/rust-lang/crates.io-index" 1239 - checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" 1240 - dependencies = [ 1241 - "libc", 1242 - "rand_chacha", 1243 - "rand_core", 1244 - ] 1245 - 1246 - [[package]] 1247 - name = "rand_chacha" 1248 - version = "0.3.1" 1249 - source = "registry+https://github.com/rust-lang/crates.io-index" 1250 - checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" 1251 - dependencies = [ 1252 - "ppv-lite86", 1253 - "rand_core", 1254 - ] 1255 - 1256 - [[package]] 1257 - name = "rand_core" 1258 - version = "0.6.4" 1259 - source = "registry+https://github.com/rust-lang/crates.io-index" 1260 - checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" 1261 - dependencies = [ 1262 - "getrandom 0.2.16", 1263 - ] 1264 - 1265 - [[package]] 1266 - name = "rayon" 1267 - version = "1.11.0" 1268 - source = "registry+https://github.com/rust-lang/crates.io-index" 1269 - checksum = "368f01d005bf8fd9b1206fb6fa653e6c4a81ceb1466406b81792d87c5677a58f" 1270 - dependencies = [ 1271 - "either", 1272 - "rayon-core", 1273 - ] 1274 - 1275 - [[package]] 1276 - name = "rayon-core" 1277 - version = "1.13.0" 1278 - source = "registry+https://github.com/rust-lang/crates.io-index" 1279 - checksum = "22e18b0f0062d30d4230b2e85ff77fdfe4326feb054b9783a3460d8435c8ab91" 1280 - dependencies = [ 1281 - "crossbeam-deque", 1282 - "crossbeam-utils", 1283 - ] 1284 - 1285 - [[package]] 1286 - name = "redb" 1287 - version = "3.1.0" 1288 - source = "registry+https://github.com/rust-lang/crates.io-index" 1289 - checksum = "ae323eb086579a3769daa2c753bb96deb95993c534711e0dbe881b5192906a06" 1290 - dependencies = [ 1291 - "libc", 1292 - ] 1293 - 1294 - [[package]] 1295 393 name = "redox_syscall" 1296 394 version = "0.5.18" 1297 395 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 1301 399 ] 1302 400 1303 401 [[package]] 1304 - name = "regex" 1305 - version = "1.11.3" 1306 - source = "registry+https://github.com/rust-lang/crates.io-index" 1307 - checksum = "8b5288124840bee7b386bc413c487869b360b2b4ec421ea56425128692f2a82c" 1308 - dependencies = [ 1309 - "aho-corasick", 1310 - "memchr", 1311 - "regex-automata", 1312 - "regex-syntax", 1313 - ] 1314 - 1315 - [[package]] 1316 - name = "regex-automata" 1317 - version = "0.4.11" 1318 - source = "registry+https://github.com/rust-lang/crates.io-index" 1319 - checksum = "833eb9ce86d40ef33cb1306d8accf7bc8ec2bfea4355cbdebb3df68b40925cad" 1320 - dependencies = [ 1321 - "aho-corasick", 1322 - "memchr", 1323 - "regex-syntax", 1324 - ] 1325 - 1326 - [[package]] 1327 - name = "regex-syntax" 1328 - version = "0.8.6" 1329 - source = "registry+https://github.com/rust-lang/crates.io-index" 1330 - checksum = "caf4aa5b0f434c91fe5c7f1ecb6a5ece2130b02ad2a590589dda5146df959001" 1331 - 1332 - [[package]] 1333 402 name = "repo-stream" 1334 - version = "0.1.1" 403 + version = "0.2.2" 1335 404 dependencies = [ 1336 - "bincode 2.0.1", 1337 405 "clap", 1338 - "criterion", 1339 - "env_logger", 1340 - "futures", 1341 - "futures-core", 1342 - "heed", 1343 - "ipld-core", 1344 - "iroh-car", 1345 - "log", 1346 - "multibase", 1347 - "redb", 1348 - "rusqlite", 1349 - "rustcask", 1350 - "serde", 1351 - "serde_bytes", 1352 - "serde_ipld_dagcbor", 1353 - "tempfile", 1354 - "thiserror 2.0.17", 1355 - "tokio", 1356 - ] 1357 - 1358 - [[package]] 1359 - name = "rusqlite" 1360 - version = "0.37.0" 1361 - source = "registry+https://github.com/rust-lang/crates.io-index" 1362 - checksum = "165ca6e57b20e1351573e3729b958bc62f0e48025386970b6e4d29e7a7e71f3f" 1363 - dependencies = [ 1364 - "bitflags", 1365 - "fallible-iterator", 1366 - "fallible-streaming-iterator", 1367 - "hashlink", 1368 - "libsqlite3-sys", 1369 - "smallvec", 406 + "fjall", 1370 407 ] 1371 408 1372 409 [[package]] 1373 - name = "rustc-demangle" 1374 - version = "0.1.26" 1375 - source = "registry+https://github.com/rust-lang/crates.io-index" 1376 - checksum = "56f7d92ca342cea22a06f2121d944b4fd82af56988c270852495420f961d4ace" 1377 - 1378 - [[package]] 1379 - name = "rustcask" 1380 - version = "0.1.0" 410 + name = "rustc-hash" 411 + version = "2.1.1" 1381 412 source = "registry+https://github.com/rust-lang/crates.io-index" 1382 - checksum = "e17ed1a2733a60fea8495ddcb42c22cabd17afec7ffa7b024b161dd662da4003" 1383 - dependencies = [ 1384 - "bincode 1.3.3", 1385 - "bytes", 1386 - "clap", 1387 - "log", 1388 - "rand", 1389 - "regex", 1390 - "serde", 1391 - "tokio", 1392 - ] 413 + checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d" 1393 414 1394 415 [[package]] 1395 416 name = "rustix" ··· 1401 422 "errno", 1402 423 "libc", 1403 424 "linux-raw-sys", 1404 - "windows-sys 0.60.2", 1405 - ] 1406 - 1407 - [[package]] 1408 - name = "rustversion" 1409 - version = "1.0.22" 1410 - source = "registry+https://github.com/rust-lang/crates.io-index" 1411 - checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" 1412 - 1413 - [[package]] 1414 - name = "ryu" 1415 - version = "1.0.20" 1416 - source = "registry+https://github.com/rust-lang/crates.io-index" 1417 - checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f" 1418 - 1419 - [[package]] 1420 - name = "same-file" 1421 - version = "1.0.6" 1422 - source = "registry+https://github.com/rust-lang/crates.io-index" 1423 - checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" 1424 - dependencies = [ 1425 - "winapi-util", 425 + "windows-sys", 1426 426 ] 1427 427 1428 428 [[package]] ··· 1432 432 checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" 1433 433 1434 434 [[package]] 1435 - name = "serde" 1436 - version = "1.0.228" 1437 - source = "registry+https://github.com/rust-lang/crates.io-index" 1438 - checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" 1439 - dependencies = [ 1440 - "serde_core", 1441 - "serde_derive", 1442 - ] 1443 - 1444 - [[package]] 1445 - name = "serde_bytes" 1446 - version = "0.11.19" 435 + name = "self_cell" 436 + version = "1.2.2" 1447 437 source = "registry+https://github.com/rust-lang/crates.io-index" 1448 - checksum = "a5d440709e79d88e51ac01c4b72fc6cb7314017bb7da9eeff678aa94c10e3ea8" 1449 - dependencies = [ 1450 - "serde", 1451 - "serde_core", 1452 - ] 438 + checksum = "b12e76d157a900eb52e81bc6e9f3069344290341720e9178cde2407113ac8d89" 1453 439 1454 440 [[package]] 1455 - name = "serde_core" 1456 - version = "1.0.228" 441 + name = "sfa" 442 + version = "1.0.0" 1457 443 source = "registry+https://github.com/rust-lang/crates.io-index" 1458 - checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad" 444 + checksum = "a1296838937cab56cd6c4eeeb8718ec777383700c33f060e2869867bd01d1175" 1459 445 dependencies = [ 1460 - "serde_derive", 446 + "byteorder-lite", 447 + "log", 448 + "xxhash-rust", 1461 449 ] 1462 450 1463 451 [[package]] 1464 - name = "serde_derive" 1465 - version = "1.0.228" 1466 - source = "registry+https://github.com/rust-lang/crates.io-index" 1467 - checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" 1468 - dependencies = [ 1469 - "proc-macro2", 1470 - "quote", 1471 - "syn 2.0.106", 1472 - ] 1473 - 1474 - [[package]] 1475 - name = "serde_ipld_dagcbor" 1476 - version = "0.6.4" 1477 - source = "registry+https://github.com/rust-lang/crates.io-index" 1478 - checksum = "46182f4f08349a02b45c998ba3215d3f9de826246ba02bb9dddfe9a2a2100778" 1479 - dependencies = [ 1480 - "cbor4ii", 1481 - "ipld-core", 1482 - "scopeguard", 1483 - "serde", 1484 - ] 1485 - 1486 - [[package]] 1487 - name = "serde_json" 1488 - version = "1.0.145" 1489 - source = "registry+https://github.com/rust-lang/crates.io-index" 1490 - checksum = "402a6f66d8c709116cf22f558eab210f5a50187f702eb4d7e5ef38d9a7f1c79c" 1491 - dependencies = [ 1492 - "itoa", 1493 - "memchr", 1494 - "ryu", 1495 - "serde", 1496 - "serde_core", 1497 - ] 1498 - 1499 - [[package]] 1500 - name = "shlex" 1501 - version = "1.3.0" 1502 - source = "registry+https://github.com/rust-lang/crates.io-index" 1503 - checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" 1504 - 1505 - [[package]] 1506 - name = "signal-hook-registry" 1507 - version = "1.4.6" 1508 - source = "registry+https://github.com/rust-lang/crates.io-index" 1509 - checksum = "b2a4719bff48cee6b39d12c020eeb490953ad2443b7055bd0b21fca26bd8c28b" 1510 - dependencies = [ 1511 - "libc", 1512 - ] 1513 - 1514 - [[package]] 1515 - name = "siphasher" 1516 - version = "1.0.1" 1517 - source = "registry+https://github.com/rust-lang/crates.io-index" 1518 - checksum = "56199f7ddabf13fe5074ce809e7d3f42b42ae711800501b5b16ea82ad029c39d" 1519 - 1520 - [[package]] 1521 - name = "slab" 1522 - version = "0.4.11" 1523 - source = "registry+https://github.com/rust-lang/crates.io-index" 1524 - checksum = "7a2ae44ef20feb57a68b23d846850f861394c2e02dc425a50098ae8c90267589" 1525 - 1526 - [[package]] 1527 452 name = "smallvec" 1528 453 version = "1.15.1" 1529 454 source = "registry+https://github.com/rust-lang/crates.io-index" 1530 455 checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" 1531 456 1532 457 [[package]] 1533 - name = "socket2" 1534 - version = "0.6.0" 458 + name = "spin" 459 + version = "0.9.8" 1535 460 source = "registry+https://github.com/rust-lang/crates.io-index" 1536 - checksum = "233504af464074f9d066d7b5416c5f9b894a5862a6506e306f7b816cdd6f1807" 461 + checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" 1537 462 dependencies = [ 1538 - "libc", 1539 - "windows-sys 0.59.0", 463 + "lock_api", 1540 464 ] 1541 465 1542 466 [[package]] 1543 - name = "stable_deref_trait" 1544 - version = "1.2.1" 1545 - source = "registry+https://github.com/rust-lang/crates.io-index" 1546 - checksum = "6ce2be8dc25455e1f91df71bfa12ad37d7af1092ae736f3a6cd0e37bc7810596" 1547 - 1548 - [[package]] 1549 467 name = "strsim" 1550 468 version = "0.11.1" 1551 469 source = "registry+https://github.com/rust-lang/crates.io-index" 1552 470 checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" 1553 - 1554 - [[package]] 1555 - name = "syn" 1556 - version = "1.0.109" 1557 - source = "registry+https://github.com/rust-lang/crates.io-index" 1558 - checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" 1559 - dependencies = [ 1560 - "proc-macro2", 1561 - "quote", 1562 - "unicode-ident", 1563 - ] 1564 471 1565 472 [[package]] 1566 473 name = "syn" ··· 1574 481 ] 1575 482 1576 483 [[package]] 1577 - name = "synchronoise" 1578 - version = "1.0.1" 1579 - source = "registry+https://github.com/rust-lang/crates.io-index" 1580 - checksum = "3dbc01390fc626ce8d1cffe3376ded2b72a11bb70e1c75f404a210e4daa4def2" 1581 - dependencies = [ 1582 - "crossbeam-queue", 1583 - ] 1584 - 1585 - [[package]] 1586 - name = "synstructure" 1587 - version = "0.13.2" 1588 - source = "registry+https://github.com/rust-lang/crates.io-index" 1589 - checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2" 1590 - dependencies = [ 1591 - "proc-macro2", 1592 - "quote", 1593 - "syn 2.0.106", 1594 - ] 1595 - 1596 - [[package]] 1597 484 name = "tempfile" 1598 485 version = "3.23.0" 1599 486 source = "registry+https://github.com/rust-lang/crates.io-index" 1600 487 checksum = "2d31c77bdf42a745371d260a26ca7163f1e0924b64afa0b688e61b5a9fa02f16" 1601 488 dependencies = [ 1602 489 "fastrand", 1603 - "getrandom 0.3.3", 490 + "getrandom", 1604 491 "once_cell", 1605 492 "rustix", 1606 - "windows-sys 0.60.2", 1607 - ] 1608 - 1609 - [[package]] 1610 - name = "thiserror" 1611 - version = "1.0.69" 1612 - source = "registry+https://github.com/rust-lang/crates.io-index" 1613 - checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52" 1614 - dependencies = [ 1615 - "thiserror-impl 1.0.69", 1616 - ] 1617 - 1618 - [[package]] 1619 - name = "thiserror" 1620 - version = "2.0.17" 1621 - source = "registry+https://github.com/rust-lang/crates.io-index" 1622 - checksum = "f63587ca0f12b72a0600bcba1d40081f830876000bb46dd2337a3051618f4fc8" 1623 - dependencies = [ 1624 - "thiserror-impl 2.0.17", 1625 - ] 1626 - 1627 - [[package]] 1628 - name = "thiserror-impl" 1629 - version = "1.0.69" 1630 - source = "registry+https://github.com/rust-lang/crates.io-index" 1631 - checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" 1632 - dependencies = [ 1633 - "proc-macro2", 1634 - "quote", 1635 - "syn 2.0.106", 1636 - ] 1637 - 1638 - [[package]] 1639 - name = "thiserror-impl" 1640 - version = "2.0.17" 1641 - source = "registry+https://github.com/rust-lang/crates.io-index" 1642 - checksum = "3ff15c8ecd7de3849db632e14d18d2571fa09dfc5ed93479bc4485c7a517c913" 1643 - dependencies = [ 1644 - "proc-macro2", 1645 - "quote", 1646 - "syn 2.0.106", 1647 - ] 1648 - 1649 - [[package]] 1650 - name = "tinystr" 1651 - version = "0.8.2" 1652 - source = "registry+https://github.com/rust-lang/crates.io-index" 1653 - checksum = "42d3e9c45c09de15d06dd8acf5f4e0e399e85927b7f00711024eb7ae10fa4869" 1654 - dependencies = [ 1655 - "displaydoc", 1656 - "zerovec", 1657 - ] 1658 - 1659 - [[package]] 1660 - name = "tinytemplate" 1661 - version = "1.2.1" 1662 - source = "registry+https://github.com/rust-lang/crates.io-index" 1663 - checksum = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc" 1664 - dependencies = [ 1665 - "serde", 1666 - "serde_json", 1667 - ] 1668 - 1669 - [[package]] 1670 - name = "tokio" 1671 - version = "1.47.1" 1672 - source = "registry+https://github.com/rust-lang/crates.io-index" 1673 - checksum = "89e49afdadebb872d3145a5638b59eb0691ea23e46ca484037cfab3b76b95038" 1674 - dependencies = [ 1675 - "backtrace", 1676 - "bytes", 1677 - "io-uring", 1678 - "libc", 1679 - "mio", 1680 - "parking_lot", 1681 - "pin-project-lite", 1682 - "signal-hook-registry", 1683 - "slab", 1684 - "socket2", 1685 - "tokio-macros", 1686 - "windows-sys 0.59.0", 493 + "windows-sys", 1687 494 ] 1688 495 1689 496 [[package]] 1690 - name = "tokio-macros" 1691 - version = "2.5.0" 497 + name = "twox-hash" 498 + version = "2.1.2" 1692 499 source = "registry+https://github.com/rust-lang/crates.io-index" 1693 - checksum = "6e06d43f1345a3bcd39f6a56dbb7dcab2ba47e68e8ac134855e7e2bdbaf8cab8" 1694 - dependencies = [ 1695 - "proc-macro2", 1696 - "quote", 1697 - "syn 2.0.106", 1698 - ] 500 + checksum = "9ea3136b675547379c4bd395ca6b938e5ad3c3d20fad76e7fe85f9e0d011419c" 1699 501 1700 502 [[package]] 1701 503 name = "unicode-ident" ··· 1704 506 checksum = "f63a545481291138910575129486daeaf8ac54aee4387fe7906919f7830c7d9d" 1705 507 1706 508 [[package]] 1707 - name = "unsigned-varint" 1708 - version = "0.7.2" 1709 - source = "registry+https://github.com/rust-lang/crates.io-index" 1710 - checksum = "6889a77d49f1f013504cec6bf97a2c730394adedaeb1deb5ea08949a50541105" 1711 - 1712 - [[package]] 1713 - name = "unsigned-varint" 1714 - version = "0.8.0" 1715 - source = "registry+https://github.com/rust-lang/crates.io-index" 1716 - checksum = "eb066959b24b5196ae73cb057f45598450d2c5f71460e98c49b738086eff9c06" 1717 - 1718 - [[package]] 1719 - name = "unty" 1720 - version = "0.0.4" 1721 - source = "registry+https://github.com/rust-lang/crates.io-index" 1722 - checksum = "6d49784317cd0d1ee7ec5c716dd598ec5b4483ea832a2dced265471cc0f690ae" 1723 - 1724 - [[package]] 1725 - name = "url" 1726 - version = "2.5.7" 1727 - source = "registry+https://github.com/rust-lang/crates.io-index" 1728 - checksum = "08bc136a29a3d1758e07a9cca267be308aeebf5cfd5a10f3f67ab2097683ef5b" 1729 - dependencies = [ 1730 - "form_urlencoded", 1731 - "idna", 1732 - "percent-encoding", 1733 - "serde", 1734 - ] 1735 - 1736 - [[package]] 1737 - name = "utf8_iter" 1738 - version = "1.0.4" 1739 - source = "registry+https://github.com/rust-lang/crates.io-index" 1740 - checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" 1741 - 1742 - [[package]] 1743 509 name = "utf8parse" 1744 510 version = "0.2.2" 1745 511 source = "registry+https://github.com/rust-lang/crates.io-index" 1746 512 checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" 1747 513 1748 514 [[package]] 1749 - name = "vcpkg" 1750 - version = "0.2.15" 1751 - source = "registry+https://github.com/rust-lang/crates.io-index" 1752 - checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" 1753 - 1754 - [[package]] 1755 - name = "virtue" 1756 - version = "0.0.18" 1757 - source = "registry+https://github.com/rust-lang/crates.io-index" 1758 - checksum = "051eb1abcf10076295e815102942cc58f9d5e3b4560e46e53c21e8ff6f3af7b1" 1759 - 1760 - [[package]] 1761 - name = "walkdir" 1762 - version = "2.5.0" 1763 - source = "registry+https://github.com/rust-lang/crates.io-index" 1764 - checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" 1765 - dependencies = [ 1766 - "same-file", 1767 - "winapi-util", 1768 - ] 1769 - 1770 - [[package]] 1771 - name = "wasi" 1772 - version = "0.11.1+wasi-snapshot-preview1" 515 + name = "varint-rs" 516 + version = "2.2.0" 1773 517 source = "registry+https://github.com/rust-lang/crates.io-index" 1774 - checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" 518 + checksum = "8f54a172d0620933a27a4360d3db3e2ae0dd6cceae9730751a036bbf182c4b23" 1775 519 1776 520 [[package]] 1777 521 name = "wasi" ··· 1792 536 ] 1793 537 1794 538 [[package]] 1795 - name = "wasm-bindgen" 1796 - version = "0.2.104" 1797 - source = "registry+https://github.com/rust-lang/crates.io-index" 1798 - checksum = "c1da10c01ae9f1ae40cbfac0bac3b1e724b320abfcf52229f80b547c0d250e2d" 1799 - dependencies = [ 1800 - "cfg-if", 1801 - "once_cell", 1802 - "rustversion", 1803 - "wasm-bindgen-macro", 1804 - "wasm-bindgen-shared", 1805 - ] 1806 - 1807 - [[package]] 1808 - name = "wasm-bindgen-backend" 1809 - version = "0.2.104" 1810 - source = "registry+https://github.com/rust-lang/crates.io-index" 1811 - checksum = "671c9a5a66f49d8a47345ab942e2cb93c7d1d0339065d4f8139c486121b43b19" 1812 - dependencies = [ 1813 - "bumpalo", 1814 - "log", 1815 - "proc-macro2", 1816 - "quote", 1817 - "syn 2.0.106", 1818 - "wasm-bindgen-shared", 1819 - ] 1820 - 1821 - [[package]] 1822 - name = "wasm-bindgen-macro" 1823 - version = "0.2.104" 1824 - source = "registry+https://github.com/rust-lang/crates.io-index" 1825 - checksum = "7ca60477e4c59f5f2986c50191cd972e3a50d8a95603bc9434501cf156a9a119" 1826 - dependencies = [ 1827 - "quote", 1828 - "wasm-bindgen-macro-support", 1829 - ] 1830 - 1831 - [[package]] 1832 - name = "wasm-bindgen-macro-support" 1833 - version = "0.2.104" 1834 - source = "registry+https://github.com/rust-lang/crates.io-index" 1835 - checksum = "9f07d2f20d4da7b26400c9f4a0511e6e0345b040694e8a75bd41d578fa4421d7" 1836 - dependencies = [ 1837 - "proc-macro2", 1838 - "quote", 1839 - "syn 2.0.106", 1840 - "wasm-bindgen-backend", 1841 - "wasm-bindgen-shared", 1842 - ] 1843 - 1844 - [[package]] 1845 - name = "wasm-bindgen-shared" 1846 - version = "0.2.104" 1847 - source = "registry+https://github.com/rust-lang/crates.io-index" 1848 - checksum = "bad67dc8b2a1a6e5448428adec4c3e84c43e561d8c9ee8a9e5aabeb193ec41d1" 1849 - dependencies = [ 1850 - "unicode-ident", 1851 - ] 1852 - 1853 - [[package]] 1854 - name = "web-sys" 1855 - version = "0.3.81" 1856 - source = "registry+https://github.com/rust-lang/crates.io-index" 1857 - checksum = "9367c417a924a74cae129e6a2ae3b47fabb1f8995595ab474029da749a8be120" 1858 - dependencies = [ 1859 - "js-sys", 1860 - "wasm-bindgen", 1861 - ] 1862 - 1863 - [[package]] 1864 - name = "winapi" 1865 - version = "0.3.9" 1866 - source = "registry+https://github.com/rust-lang/crates.io-index" 1867 - checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" 1868 - dependencies = [ 1869 - "winapi-i686-pc-windows-gnu", 1870 - "winapi-x86_64-pc-windows-gnu", 1871 - ] 1872 - 1873 - [[package]] 1874 - name = "winapi-i686-pc-windows-gnu" 1875 - version = "0.4.0" 1876 - source = "registry+https://github.com/rust-lang/crates.io-index" 1877 - checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" 1878 - 1879 - [[package]] 1880 - name = "winapi-util" 1881 - version = "0.1.11" 1882 - source = "registry+https://github.com/rust-lang/crates.io-index" 1883 - checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" 1884 - dependencies = [ 1885 - "windows-sys 0.60.2", 1886 - ] 1887 - 1888 - [[package]] 1889 - name = "winapi-x86_64-pc-windows-gnu" 1890 - version = "0.4.0" 1891 - source = "registry+https://github.com/rust-lang/crates.io-index" 1892 - checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" 1893 - 1894 - [[package]] 1895 539 name = "windows-link" 1896 540 version = "0.2.1" 1897 541 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 1899 543 1900 544 [[package]] 1901 545 name = "windows-sys" 1902 - version = "0.59.0" 1903 - source = "registry+https://github.com/rust-lang/crates.io-index" 1904 - checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" 1905 - dependencies = [ 1906 - "windows-targets 0.52.6", 1907 - ] 1908 - 1909 - [[package]] 1910 - name = "windows-sys" 1911 546 version = "0.60.2" 1912 547 source = "registry+https://github.com/rust-lang/crates.io-index" 1913 548 checksum = "f2f500e4d28234f72040990ec9d39e3a6b950f9f22d3dba18416c35882612bcb" 1914 549 dependencies = [ 1915 - "windows-targets 0.53.5", 1916 - ] 1917 - 1918 - [[package]] 1919 - name = "windows-targets" 1920 - version = "0.52.6" 1921 - source = "registry+https://github.com/rust-lang/crates.io-index" 1922 - checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" 1923 - dependencies = [ 1924 - "windows_aarch64_gnullvm 0.52.6", 1925 - "windows_aarch64_msvc 0.52.6", 1926 - "windows_i686_gnu 0.52.6", 1927 - "windows_i686_gnullvm 0.52.6", 1928 - "windows_i686_msvc 0.52.6", 1929 - "windows_x86_64_gnu 0.52.6", 1930 - "windows_x86_64_gnullvm 0.52.6", 1931 - "windows_x86_64_msvc 0.52.6", 550 + "windows-targets", 1932 551 ] 1933 552 1934 553 [[package]] ··· 1938 557 checksum = "4945f9f551b88e0d65f3db0bc25c33b8acea4d9e41163edf90dcd0b19f9069f3" 1939 558 dependencies = [ 1940 559 "windows-link", 1941 - "windows_aarch64_gnullvm 0.53.1", 1942 - "windows_aarch64_msvc 0.53.1", 1943 - "windows_i686_gnu 0.53.1", 1944 - "windows_i686_gnullvm 0.53.1", 1945 - "windows_i686_msvc 0.53.1", 1946 - "windows_x86_64_gnu 0.53.1", 1947 - "windows_x86_64_gnullvm 0.53.1", 1948 - "windows_x86_64_msvc 0.53.1", 560 + "windows_aarch64_gnullvm", 561 + "windows_aarch64_msvc", 562 + "windows_i686_gnu", 563 + "windows_i686_gnullvm", 564 + "windows_i686_msvc", 565 + "windows_x86_64_gnu", 566 + "windows_x86_64_gnullvm", 567 + "windows_x86_64_msvc", 1949 568 ] 1950 569 1951 570 [[package]] 1952 571 name = "windows_aarch64_gnullvm" 1953 - version = "0.52.6" 1954 - source = "registry+https://github.com/rust-lang/crates.io-index" 1955 - checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" 1956 - 1957 - [[package]] 1958 - name = "windows_aarch64_gnullvm" 1959 572 version = "0.53.1" 1960 573 source = "registry+https://github.com/rust-lang/crates.io-index" 1961 574 checksum = "a9d8416fa8b42f5c947f8482c43e7d89e73a173cead56d044f6a56104a6d1b53" 1962 - 1963 - [[package]] 1964 - name = "windows_aarch64_msvc" 1965 - version = "0.52.6" 1966 - source = "registry+https://github.com/rust-lang/crates.io-index" 1967 - checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" 1968 575 1969 576 [[package]] 1970 577 name = "windows_aarch64_msvc" ··· 1974 581 1975 582 [[package]] 1976 583 name = "windows_i686_gnu" 1977 - version = "0.52.6" 1978 - source = "registry+https://github.com/rust-lang/crates.io-index" 1979 - checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" 1980 - 1981 - [[package]] 1982 - name = "windows_i686_gnu" 1983 584 version = "0.53.1" 1984 585 source = "registry+https://github.com/rust-lang/crates.io-index" 1985 586 checksum = "960e6da069d81e09becb0ca57a65220ddff016ff2d6af6a223cf372a506593a3" 1986 587 1987 588 [[package]] 1988 589 name = "windows_i686_gnullvm" 1989 - version = "0.52.6" 1990 - source = "registry+https://github.com/rust-lang/crates.io-index" 1991 - checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" 1992 - 1993 - [[package]] 1994 - name = "windows_i686_gnullvm" 1995 590 version = "0.53.1" 1996 591 source = "registry+https://github.com/rust-lang/crates.io-index" 1997 592 checksum = "fa7359d10048f68ab8b09fa71c3daccfb0e9b559aed648a8f95469c27057180c" 1998 593 1999 594 [[package]] 2000 595 name = "windows_i686_msvc" 2001 - version = "0.52.6" 2002 - source = "registry+https://github.com/rust-lang/crates.io-index" 2003 - checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" 2004 - 2005 - [[package]] 2006 - name = "windows_i686_msvc" 2007 596 version = "0.53.1" 2008 597 source = "registry+https://github.com/rust-lang/crates.io-index" 2009 598 checksum = "1e7ac75179f18232fe9c285163565a57ef8d3c89254a30685b57d83a38d326c2" 2010 599 2011 600 [[package]] 2012 601 name = "windows_x86_64_gnu" 2013 - version = "0.52.6" 2014 - source = "registry+https://github.com/rust-lang/crates.io-index" 2015 - checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" 2016 - 2017 - [[package]] 2018 - name = "windows_x86_64_gnu" 2019 602 version = "0.53.1" 2020 603 source = "registry+https://github.com/rust-lang/crates.io-index" 2021 604 checksum = "9c3842cdd74a865a8066ab39c8a7a473c0778a3f29370b5fd6b4b9aa7df4a499" 2022 605 2023 606 [[package]] 2024 607 name = "windows_x86_64_gnullvm" 2025 - version = "0.52.6" 2026 - source = "registry+https://github.com/rust-lang/crates.io-index" 2027 - checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" 2028 - 2029 - [[package]] 2030 - name = "windows_x86_64_gnullvm" 2031 608 version = "0.53.1" 2032 609 source = "registry+https://github.com/rust-lang/crates.io-index" 2033 610 checksum = "0ffa179e2d07eee8ad8f57493436566c7cc30ac536a3379fdf008f47f6bb7ae1" 2034 611 2035 612 [[package]] 2036 613 name = "windows_x86_64_msvc" 2037 - version = "0.52.6" 2038 - source = "registry+https://github.com/rust-lang/crates.io-index" 2039 - checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" 2040 - 2041 - [[package]] 2042 - name = "windows_x86_64_msvc" 2043 614 version = "0.53.1" 2044 615 source = "registry+https://github.com/rust-lang/crates.io-index" 2045 616 checksum = "d6bbff5f0aada427a1e5a6da5f1f98158182f26556f345ac9e04d36d0ebed650" ··· 2051 622 checksum = "f17a85883d4e6d00e8a97c586de764dabcc06133f7f1d55dce5cdc070ad7fe59" 2052 623 2053 624 [[package]] 2054 - name = "writeable" 2055 - version = "0.6.2" 2056 - source = "registry+https://github.com/rust-lang/crates.io-index" 2057 - checksum = "9edde0db4769d2dc68579893f2306b26c6ecfbe0ef499b013d731b7b9247e0b9" 2058 - 2059 - [[package]] 2060 - name = "yoke" 2061 - version = "0.8.1" 2062 - source = "registry+https://github.com/rust-lang/crates.io-index" 2063 - checksum = "72d6e5c6afb84d73944e5cedb052c4680d5657337201555f9f2a16b7406d4954" 2064 - dependencies = [ 2065 - "stable_deref_trait", 2066 - "yoke-derive", 2067 - "zerofrom", 2068 - ] 2069 - 2070 - [[package]] 2071 - name = "yoke-derive" 2072 - version = "0.8.1" 2073 - source = "registry+https://github.com/rust-lang/crates.io-index" 2074 - checksum = "b659052874eb698efe5b9e8cf382204678a0086ebf46982b79d6ca3182927e5d" 2075 - dependencies = [ 2076 - "proc-macro2", 2077 - "quote", 2078 - "syn 2.0.106", 2079 - "synstructure", 2080 - ] 2081 - 2082 - [[package]] 2083 - name = "zerocopy" 2084 - version = "0.8.27" 2085 - source = "registry+https://github.com/rust-lang/crates.io-index" 2086 - checksum = "0894878a5fa3edfd6da3f88c4805f4c8558e2b996227a3d864f47fe11e38282c" 2087 - dependencies = [ 2088 - "zerocopy-derive", 2089 - ] 2090 - 2091 - [[package]] 2092 - name = "zerocopy-derive" 2093 - version = "0.8.27" 625 + name = "xxhash-rust" 626 + version = "0.8.15" 2094 627 source = "registry+https://github.com/rust-lang/crates.io-index" 2095 - checksum = "88d2b8d9c68ad2b9e4340d7832716a4d21a22a1154777ad56ea55c51a9cf3831" 2096 - dependencies = [ 2097 - "proc-macro2", 2098 - "quote", 2099 - "syn 2.0.106", 2100 - ] 2101 - 2102 - [[package]] 2103 - name = "zerofrom" 2104 - version = "0.1.6" 2105 - source = "registry+https://github.com/rust-lang/crates.io-index" 2106 - checksum = "50cc42e0333e05660c3587f3bf9d0478688e15d870fab3346451ce7f8c9fbea5" 2107 - dependencies = [ 2108 - "zerofrom-derive", 2109 - ] 2110 - 2111 - [[package]] 2112 - name = "zerofrom-derive" 2113 - version = "0.1.6" 2114 - source = "registry+https://github.com/rust-lang/crates.io-index" 2115 - checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502" 2116 - dependencies = [ 2117 - "proc-macro2", 2118 - "quote", 2119 - "syn 2.0.106", 2120 - "synstructure", 2121 - ] 2122 - 2123 - [[package]] 2124 - name = "zerotrie" 2125 - version = "0.2.3" 2126 - source = "registry+https://github.com/rust-lang/crates.io-index" 2127 - checksum = "2a59c17a5562d507e4b54960e8569ebee33bee890c70aa3fe7b97e85a9fd7851" 2128 - dependencies = [ 2129 - "displaydoc", 2130 - "yoke", 2131 - "zerofrom", 2132 - ] 2133 - 2134 - [[package]] 2135 - name = "zerovec" 2136 - version = "0.11.5" 2137 - source = "registry+https://github.com/rust-lang/crates.io-index" 2138 - checksum = "6c28719294829477f525be0186d13efa9a3c602f7ec202ca9e353d310fb9a002" 2139 - dependencies = [ 2140 - "yoke", 2141 - "zerofrom", 2142 - "zerovec-derive", 2143 - ] 2144 - 2145 - [[package]] 2146 - name = "zerovec-derive" 2147 - version = "0.11.2" 2148 - source = "registry+https://github.com/rust-lang/crates.io-index" 2149 - checksum = "eadce39539ca5cb3985590102671f2567e659fca9666581ad3411d59207951f3" 2150 - dependencies = [ 2151 - "proc-macro2", 2152 - "quote", 2153 - "syn 2.0.106", 2154 - ] 628 + checksum = "fdd20c5420375476fbd4394763288da7eb0cc0b8c11deed431a91562af7335d3"
+3 -39
Cargo.toml
··· 1 1 [package] 2 2 name = "repo-stream" 3 - version = "0.1.1" 3 + version = "0.2.2" 4 4 edition = "2024" 5 5 license = "MIT OR Apache-2.0" 6 - description = "Fast and robust atproto CAR file processing in rust" 6 + description = "A robust CAR file -> MST walker for atproto" 7 7 repository = "https://tangled.org/@microcosm.blue/repo-stream" 8 8 9 9 [dependencies] 10 - bincode = { version = "2.0.1", features = ["serde"] } 11 - futures = "0.3.31" 12 - futures-core = "0.3.31" 13 - heed = "0.22.0" 14 - ipld-core = { version = "0.4.2", features = ["serde"] } 15 - iroh-car = "0.5.1" 16 - log = "0.4.28" 17 - multibase = "0.9.2" 18 - redb = "3.1.0" 19 - rusqlite = "0.37.0" 20 - rustcask = "0.1.0" 21 - serde = { version = "1.0.228", features = ["derive"] } 22 - serde_bytes = "0.11.19" 23 - serde_ipld_dagcbor = "0.6.4" 24 - thiserror = "2.0.17" 25 - tokio = { version = "1.47.1", features = ["rt"] } 26 - 27 - [dev-dependencies] 10 + fjall = "3.0.1" 28 11 clap = { version = "4.5.48", features = ["derive"] } 29 - criterion = { version = "0.7.0", features = ["async_tokio"] } 30 - env_logger = "0.11.8" 31 - multibase = "0.9.2" 32 - tempfile = "3.23.0" 33 - tokio = { version = "1.47.1", features = ["full"] } 34 - 35 - [profile.profiling] 36 - inherits = "release" 37 - debug = true 38 12 39 - [profile.release] 40 - debug = true 41 - 42 - [[bench]] 43 - name = "non-huge-cars" 44 - harness = false 45 - 46 - [[bench]] 47 - name = "huge-car" 48 - harness = false
+12 -21
benches/huge-car.rs
··· 1 1 extern crate repo_stream; 2 - use futures::TryStreamExt; 3 - use iroh_car::CarReader; 4 - use std::convert::Infallible; 2 + use repo_stream::Driver; 5 3 use std::path::{Path, PathBuf}; 6 4 7 5 use criterion::{Criterion, criterion_group, criterion_main}; ··· 20 18 }); 21 19 } 22 20 23 - async fn drive_car(filename: impl AsRef<Path>) { 21 + async fn drive_car(filename: impl AsRef<Path>) -> usize { 24 22 let reader = tokio::fs::File::open(filename).await.unwrap(); 25 23 let reader = tokio::io::BufReader::new(reader); 26 - let reader = CarReader::new(reader).await.unwrap(); 27 24 28 - let root = reader 29 - .header() 30 - .roots() 31 - .first() 32 - .ok_or("missing root") 25 + let mut driver = match Driver::load_car(reader, |block| block.len(), 1024) 26 + .await 33 27 .unwrap() 34 - .clone(); 35 - 36 - let stream = std::pin::pin!(reader.stream()); 37 - 38 - let (_commit, v) = 39 - repo_stream::drive::Vehicle::init(root, stream, |block| Ok::<_, Infallible>(block.len())) 40 - .await 41 - .unwrap(); 42 - let mut record_stream = std::pin::pin!(v.stream()); 28 + { 29 + Driver::Memory(_, mem_driver) => mem_driver, 30 + Driver::Disk(_) => panic!("not doing disk for benchmark"), 31 + }; 43 32 44 - while let Some(_) = record_stream.try_next().await.unwrap() { 45 - // just here for the drive 33 + let mut n = 0; 34 + while let Some(pairs) = driver.next_chunk(256).await.unwrap() { 35 + n += pairs.len(); 46 36 } 37 + n 47 38 } 48 39 49 40 criterion_group!(benches, criterion_benchmark);
+16 -22
benches/non-huge-cars.rs
··· 1 1 extern crate repo_stream; 2 - use futures::TryStreamExt; 3 - use iroh_car::CarReader; 4 - use std::convert::Infallible; 2 + use repo_stream::Driver; 5 3 6 4 use criterion::{Criterion, criterion_group, criterion_main}; 7 5 6 + const EMPTY_CAR: &'static [u8] = include_bytes!("../car-samples/empty.car"); 8 7 const TINY_CAR: &'static [u8] = include_bytes!("../car-samples/tiny.car"); 9 8 const LITTLE_CAR: &'static [u8] = include_bytes!("../car-samples/little.car"); 10 9 const MIDSIZE_CAR: &'static [u8] = include_bytes!("../car-samples/midsize.car"); ··· 15 14 .build() 16 15 .expect("Creating runtime failed"); 17 16 17 + c.bench_function("empty-car", |b| { 18 + b.to_async(&rt).iter(async || drive_car(EMPTY_CAR).await) 19 + }); 18 20 c.bench_function("tiny-car", |b| { 19 21 b.to_async(&rt).iter(async || drive_car(TINY_CAR).await) 20 22 }); ··· 26 28 }); 27 29 } 28 30 29 - async fn drive_car(bytes: &[u8]) { 30 - let reader = CarReader::new(bytes).await.unwrap(); 31 - 32 - let root = reader 33 - .header() 34 - .roots() 35 - .first() 36 - .ok_or("missing root") 31 + async fn drive_car(bytes: &[u8]) -> usize { 32 + let mut driver = match Driver::load_car(bytes, |block| block.len(), 32) 33 + .await 37 34 .unwrap() 38 - .clone(); 39 - 40 - let stream = std::pin::pin!(reader.stream()); 35 + { 36 + Driver::Memory(_, mem_driver) => mem_driver, 37 + Driver::Disk(_) => panic!("not benching big cars here"), 38 + }; 41 39 42 - let (_commit, v) = 43 - repo_stream::drive::Vehicle::init(root, stream, |block| Ok::<_, Infallible>(block.len())) 44 - .await 45 - .unwrap(); 46 - let mut record_stream = std::pin::pin!(v.stream()); 47 - 48 - while let Some(_) = record_stream.try_next().await.unwrap() { 49 - // just here for the drive 40 + let mut n = 0; 41 + while let Some(pairs) = driver.next_chunk(256).await.unwrap() { 42 + n += pairs.len(); 50 43 } 44 + n 51 45 } 52 46 53 47 criterion_group!(benches, criterion_benchmark);
car-samples/empty.car

This is a binary file and will not be displayed.

+27 -56
examples/disk-read-file/main.rs
··· 1 - extern crate repo_stream; 2 1 use clap::Parser; 3 - use repo_stream::drive::Processable; 4 - use serde::{Deserialize, Serialize}; 5 - use std::path::PathBuf; 6 - 7 - type Result<T> = std::result::Result<T, Box<dyn std::error::Error>>; 2 + use fjall::{Database, KeyspaceCreateOptions}; 3 + use std::{path::PathBuf, collections::BTreeMap}; 8 4 9 5 #[derive(Debug, Parser)] 10 6 struct Args { 11 7 #[arg()] 12 - car: PathBuf, 13 - #[arg()] 14 - tmpfile: PathBuf, 8 + db_path: PathBuf, 15 9 } 16 10 17 - #[derive(Clone, Serialize, Deserialize)] 18 - struct S(usize); 11 + fn main() -> Result<(), Box<dyn std::error::Error>> { 12 + let Args { db_path } = Args::parse(); 19 13 20 - impl Processable for S { 21 - fn get_size(&self) -> usize { 22 - 0 // no additional space taken, just its stack size (newtype is free) 14 + let db = Database::builder(db_path).open()?; 15 + let ks = db.keyspace("z", KeyspaceCreateOptions::default)?; 16 + let mut seen_keys: BTreeMap<Vec<u8>, usize> = BTreeMap::default(); 17 + 18 + print!("writing..."); 19 + for i in 0..250_000_usize { 20 + let k = i.to_be_bytes().to_vec(); 21 + ks.insert(k.clone(), vec![0xAA; 256])?; 22 + seen_keys.insert(k, i); 23 23 } 24 - } 25 24 26 - #[tokio::main] 27 - async fn main() -> Result<()> { 28 - env_logger::init(); 25 + println!(" done. checking keys..."); 29 26 30 - let Args { car, tmpfile } = Args::parse(); 31 - let reader = tokio::fs::File::open(car).await?; 32 - let reader = tokio::io::BufReader::new(reader); 27 + // remove every seen key that fjall actually has, to see what's left 28 + for guard in ks.iter() { 29 + seen_keys.remove(guard.key()?.as_ref()); 30 + } 33 31 34 - // let kb = 2_usize.pow(10); 35 - let mb = 2_usize.pow(20); 36 - 37 - let mut driver = 38 - match repo_stream::drive::load_car(reader, |block| S(block.len()), 5 * mb).await? { 39 - repo_stream::drive::Vehicle::Lil(_, _) => panic!("try this on a bigger car"), 40 - repo_stream::drive::Vehicle::Big(big_stuff) => { 41 - let disk_store = repo_stream::disk::SqliteStore::new(tmpfile.clone()); 42 - // let disk_store = repo_stream::disk::RedbStore::new(tmpfile.clone()); 43 - // let disk_store = repo_stream::disk::RustcaskStore::new(tmpfile.clone()); 44 - // let disk_store = repo_stream::disk::HeedStore::new(tmpfile.clone()); 45 - let (commit, driver) = big_stuff.finish_loading(disk_store).await?; 46 - log::warn!("big: {:?}", commit); 47 - driver 48 - } 49 - }; 50 - 51 - println!("hello!"); 52 - 53 - let mut n = 0; 54 - loop { 55 - let (d, p) = driver.next_chunk(1024).await?; 56 - driver = d; 57 - let Some(pairs) = p else { 58 - break; 59 - }; 60 - n += pairs.len(); 61 - // log::info!("got {rkey:?}"); 32 + // report the result 33 + if seen_keys.len() == 0 { 34 + println!("[ OK ] all keys found"); 35 + } else { 36 + println!("[FAIL] fjall did not have all seen_keys:"); 37 + for (k, i) in seen_keys { 38 + println!(" insert #{i} missing, key bytes: {k:?}"); 39 + } 62 40 } 63 - // log::info!("now is the time to check mem..."); 64 - // tokio::time::sleep(std::time::Duration::from_secs(22)).await; 65 - drop(driver); 66 - log::info!("bye! {n}"); 67 - 68 - std::fs::remove_file(tmpfile).unwrap(); 69 - // std::fs::remove_dir_all(tmpfile).unwrap(); 70 41 71 42 Ok(()) 72 43 }
+14 -6
examples/read-file/main.rs
··· 1 + /*! 2 + Read a CAR file with in-memory processing 3 + */ 4 + 1 5 extern crate repo_stream; 2 6 use clap::Parser; 7 + use repo_stream::{Driver, DriverBuilder}; 3 8 use std::path::PathBuf; 4 9 5 10 type Result<T> = std::result::Result<T, Box<dyn std::error::Error>>; ··· 18 23 let reader = tokio::fs::File::open(file).await?; 19 24 let reader = tokio::io::BufReader::new(reader); 20 25 21 - let (commit, mut driver) = 22 - match repo_stream::drive::load_car(reader, |block| block.len(), 1024 * 1024).await? { 23 - repo_stream::drive::Vehicle::Lil(commit, mem_driver) => (commit, mem_driver), 24 - repo_stream::drive::Vehicle::Big(_) => panic!("can't handle big cars yet"), 25 - }; 26 + let (commit, mut driver) = match DriverBuilder::new() 27 + .with_block_processor(|block| block.len()) 28 + .load_car(reader) 29 + .await? 30 + { 31 + Driver::Memory(commit, mem_driver) => (commit, mem_driver), 32 + Driver::Disk(_) => panic!("this example doesn't handle big CARs"), 33 + }; 26 34 27 35 log::info!("got commit: {commit:?}"); 28 36 ··· 31 39 n += pairs.len(); 32 40 // log::info!("got {rkey:?}"); 33 41 } 34 - log::info!("bye! {n}"); 42 + log::info!("bye! total records={n}"); 35 43 36 44 Ok(()) 37 45 }
+70 -2
readme.md
··· 1 1 # repo-stream 2 2 3 - Fast and (aspirationally) robust atproto CAR file processing in rust 3 + A robust CAR file -> MST walker for atproto 4 + 5 + [![Crates.io][crates-badge]](https://crates.io/crates/repo-stream) 6 + [![Documentation][docs-badge]](https://docs.rs/repo-stream) 7 + [![Sponsor][sponsor-badge]](https://github.com/sponsors/uniphil) 8 + 9 + [crates-badge]: https://img.shields.io/crates/v/repo-stream.svg 10 + [docs-badge]: https://docs.rs/repo-stream/badge.svg 11 + [sponsor-badge]: https://img.shields.io/badge/at-microcosm-b820f9?labelColor=b820f9&logo=githubsponsors&logoColor=fff 12 + 13 + ```rust 14 + use repo_stream::{Driver, DriverBuilder, DriveError, DiskBuilder}; 15 + 16 + #[tokio::main] 17 + async fn main() -> Result<(), DriveError> { 18 + // repo-stream takes any AsyncRead as input, like a tokio::fs::File 19 + let reader = tokio::fs::File::open("repo.car".into()).await?; 20 + let reader = tokio::io::BufReader::new(reader); 21 + 22 + // example repo workload is simply counting the total record bytes 23 + let mut total_size = 0; 24 + 25 + match DriverBuilder::new() 26 + .with_mem_limit_mb(10) 27 + .with_block_processor(|rec| rec.len()) // block processing: just extract the raw record size 28 + .load_car(reader) 29 + .await? 30 + { 31 + 32 + // if all blocks fit within memory 33 + Driver::Memory(_commit, mut driver) => { 34 + while let Some(chunk) = driver.next_chunk(256).await? { 35 + for (_rkey, size) in chunk { 36 + total_size += size; 37 + } 38 + } 39 + }, 40 + 41 + // if the CAR was too big for in-memory processing 42 + Driver::Disk(paused) => { 43 + // set up a disk store we can spill to 44 + let store = DiskBuilder::new().open("some/path.db".into()).await?; 45 + // do the spilling, get back a (similar) driver 46 + let (_commit, mut driver) = paused.finish_loading(store).await?; 47 + 48 + while let Some(chunk) = driver.next_chunk(256).await? { 49 + for (_rkey, size) in chunk { 50 + total_size += size; 51 + } 52 + } 53 + 54 + // clean up the disk store (drop tables etc) 55 + driver.reset_store().await?; 56 + } 57 + }; 58 + println!("sum of size of all records: {total_size}"); 59 + Ok(()) 60 + } 61 + ``` 62 + 63 + more recent todo 64 + 65 + - [ ] get an *emtpy* car for the test suite 66 + - [x] implement a max size on disk limit 67 + 68 + 69 + ----- 70 + 71 + older stuff (to clean up): 4 72 5 73 6 74 current car processing times (records processed into their length usize, phil's dev machine): ··· 27 95 -> yeah the commit is returned from init 28 96 - [ ] spec compliance todos 29 97 - [x] assert that keys are ordered and fail if not 30 - - [ ] verify node mst depth from key (possibly pending [interop test fixes](https://github.com/bluesky-social/atproto-interop-tests/issues/5)) 98 + - [x] verify node mst depth from key (possibly pending [interop test fixes](https://github.com/bluesky-social/atproto-interop-tests/issues/5)) 31 99 - [ ] performance todos 32 100 - [x] consume the serialized nodes into a mutable efficient format 33 101 - [ ] maybe customize the deserialize impl to do that directly?
-425
src/disk.rs
··· 1 - use redb::ReadableDatabase; 2 - use rusqlite::OptionalExtension; 3 - use std::error::Error; 4 - use std::path::PathBuf; 5 - 6 - pub trait StorageErrorBase: Error + Send + 'static {} 7 - 8 - /// high level potential storage resource 9 - /// 10 - /// separating this allows (hopefully) implementing a storage pool that can 11 - /// async-block when until a member is available to use 12 - pub trait DiskStore { 13 - type StorageError: StorageErrorBase + Send; 14 - type Access: DiskAccess<StorageError = Self::StorageError>; 15 - fn get_access(&mut self) -> impl Future<Output = Result<Self::Access, Self::StorageError>>; 16 - } 17 - 18 - /// actual concrete access to disk storage 19 - pub trait DiskAccess: Send { 20 - type StorageError: StorageErrorBase; 21 - 22 - fn get_writer(&mut self) -> Result<impl DiskWriter<Self::StorageError>, Self::StorageError>; 23 - 24 - fn get_reader( 25 - &self, 26 - ) -> Result<impl DiskReader<StorageError = Self::StorageError>, Self::StorageError>; 27 - 28 - // TODO: force a cleanup implementation? 29 - } 30 - 31 - pub trait DiskWriter<E: StorageErrorBase> { 32 - fn put(&mut self, key: Vec<u8>, val: Vec<u8>) -> Result<(), E>; 33 - fn put_many(&mut self, _kv: impl Iterator<Item = (Vec<u8>, Vec<u8>)>) -> Result<(), E>; 34 - } 35 - 36 - pub trait DiskReader { 37 - type StorageError: StorageErrorBase; 38 - fn get(&mut self, key: Vec<u8>) -> Result<Option<Vec<u8>>, Self::StorageError>; 39 - } 40 - 41 - ///////////////// sqlite 42 - 43 - pub struct SqliteStore { 44 - path: PathBuf, 45 - } 46 - 47 - impl SqliteStore { 48 - pub fn new(path: PathBuf) -> Self { 49 - Self { path } 50 - } 51 - } 52 - 53 - impl StorageErrorBase for rusqlite::Error {} 54 - 55 - impl DiskStore for SqliteStore { 56 - type StorageError = rusqlite::Error; 57 - type Access = SqliteAccess; 58 - async fn get_access(&mut self) -> Result<SqliteAccess, rusqlite::Error> { 59 - let path = self.path.clone(); 60 - let conn = tokio::task::spawn_blocking(move || { 61 - let conn = rusqlite::Connection::open(path)?; 62 - 63 - let sq_mb = -(2_i64.pow(10)); // negative is kibibytes for sqlite cache_size 64 - 65 - // conn.pragma_update(None, "journal_mode", "OFF")?; 66 - // conn.pragma_update(None, "journal_mode", "MEMORY")?; 67 - conn.pragma_update(None, "journal_mode", "WAL")?; 68 - conn.pragma_update(None, "synchronous", "OFF")?; 69 - conn.pragma_update(None, "cache_size", (5 * sq_mb).to_string())?; 70 - conn.execute( 71 - "CREATE TABLE blocks ( 72 - key BLOB PRIMARY KEY NOT NULL, 73 - val BLOB NOT NULL 74 - ) WITHOUT ROWID", 75 - (), 76 - )?; 77 - 78 - Ok::<_, Self::StorageError>(conn) 79 - }) 80 - .await 81 - .expect("join error")?; 82 - 83 - Ok(SqliteAccess { conn }) 84 - } 85 - } 86 - 87 - pub struct SqliteAccess { 88 - conn: rusqlite::Connection, 89 - } 90 - 91 - impl DiskAccess for SqliteAccess { 92 - type StorageError = rusqlite::Error; 93 - fn get_writer(&mut self) -> Result<impl DiskWriter<rusqlite::Error>, rusqlite::Error> { 94 - let tx = self.conn.transaction()?; 95 - // let insert_stmt = tx.prepare("INSERT INTO blocks (key, val) VALUES (?1, ?2)")?; 96 - Ok(SqliteWriter { tx: Some(tx) }) 97 - } 98 - fn get_reader( 99 - &self, 100 - ) -> Result<impl DiskReader<StorageError = rusqlite::Error>, rusqlite::Error> { 101 - let select_stmt = self.conn.prepare("SELECT val FROM blocks WHERE key = ?1")?; 102 - Ok(SqliteReader { select_stmt }) 103 - } 104 - } 105 - 106 - pub struct SqliteWriter<'conn> { 107 - tx: Option<rusqlite::Transaction<'conn>>, 108 - } 109 - 110 - /// oops careful in async 111 - impl Drop for SqliteWriter<'_> { 112 - fn drop(&mut self) { 113 - let tx = self.tx.take(); 114 - tx.unwrap().commit().unwrap(); 115 - } 116 - } 117 - 118 - impl DiskWriter<rusqlite::Error> for SqliteWriter<'_> { 119 - fn put(&mut self, key: Vec<u8>, val: Vec<u8>) -> rusqlite::Result<()> { 120 - let tx = self.tx.as_ref().unwrap(); 121 - let mut insert_stmt = tx.prepare_cached("INSERT INTO blocks (key, val) VALUES (?1, ?2)")?; 122 - insert_stmt.execute((key, val))?; 123 - Ok(()) 124 - } 125 - fn put_many(&mut self, kv: impl Iterator<Item = (Vec<u8>, Vec<u8>)>) -> rusqlite::Result<()> { 126 - let tx = self.tx.as_ref().unwrap(); 127 - let mut insert_stmt = tx.prepare_cached("INSERT INTO blocks (key, val) VALUES (?1, ?2)")?; 128 - for (k, v) in kv { 129 - insert_stmt.execute((k, v))?; 130 - } 131 - Ok(()) 132 - } 133 - } 134 - 135 - pub struct SqliteReader<'conn> { 136 - select_stmt: rusqlite::Statement<'conn>, 137 - } 138 - 139 - impl DiskReader for SqliteReader<'_> { 140 - type StorageError = rusqlite::Error; 141 - fn get(&mut self, key: Vec<u8>) -> rusqlite::Result<Option<Vec<u8>>> { 142 - self.select_stmt 143 - .query_one((&key,), |row| row.get(0)) 144 - .optional() 145 - } 146 - } 147 - 148 - //////////// redb why not 149 - 150 - const REDB_TABLE: redb::TableDefinition<&[u8], &[u8]> = redb::TableDefinition::new("blocks"); 151 - 152 - pub struct RedbStore { 153 - path: PathBuf, 154 - } 155 - 156 - impl RedbStore { 157 - pub fn new(path: PathBuf) -> Self { 158 - Self { path } 159 - } 160 - } 161 - 162 - impl StorageErrorBase for redb::Error {} 163 - 164 - impl DiskStore for RedbStore { 165 - type StorageError = redb::Error; 166 - type Access = RedbAccess; 167 - async fn get_access(&mut self) -> Result<RedbAccess, redb::Error> { 168 - let path = self.path.clone(); 169 - let mb = 2_usize.pow(20); 170 - let db = tokio::task::spawn_blocking(move || { 171 - let db = redb::Database::builder() 172 - .set_cache_size(5 * mb) 173 - .create(path)?; 174 - Ok::<_, Self::StorageError>(db) 175 - }) 176 - .await 177 - .expect("join error")?; 178 - 179 - Ok(RedbAccess { db }) 180 - } 181 - } 182 - 183 - pub struct RedbAccess { 184 - db: redb::Database, 185 - } 186 - 187 - impl DiskAccess for RedbAccess { 188 - type StorageError = redb::Error; 189 - fn get_writer(&mut self) -> Result<impl DiskWriter<redb::Error>, redb::Error> { 190 - let mut tx = self.db.begin_write()?; 191 - tx.set_durability(redb::Durability::None)?; 192 - Ok(RedbWriter { tx: Some(tx) }) 193 - } 194 - fn get_reader(&self) -> Result<impl DiskReader<StorageError = redb::Error>, redb::Error> { 195 - let tx = self.db.begin_read()?; 196 - Ok(RedbReader { tx }) 197 - } 198 - } 199 - 200 - pub struct RedbWriter { 201 - tx: Option<redb::WriteTransaction>, 202 - } 203 - 204 - impl DiskWriter<redb::Error> for RedbWriter { 205 - fn put(&mut self, key: Vec<u8>, val: Vec<u8>) -> Result<(), redb::Error> { 206 - let mut table = self.tx.as_ref().unwrap().open_table(REDB_TABLE)?; 207 - table.insert(&*key, &*val)?; 208 - Ok(()) 209 - } 210 - fn put_many(&mut self, kv: impl Iterator<Item = (Vec<u8>, Vec<u8>)>) -> Result<(), redb::Error> { 211 - let mut table = self.tx.as_ref().unwrap().open_table(REDB_TABLE)?; 212 - for (k, v) in kv { 213 - table.insert(&*k, &*v)?; 214 - } 215 - Ok(()) 216 - } 217 - } 218 - 219 - /// oops careful in async 220 - impl Drop for RedbWriter { 221 - fn drop(&mut self) { 222 - let tx = self.tx.take(); 223 - tx.unwrap().commit().unwrap(); 224 - } 225 - } 226 - 227 - pub struct RedbReader { 228 - tx: redb::ReadTransaction, 229 - } 230 - 231 - impl DiskReader for RedbReader { 232 - type StorageError = redb::Error; 233 - fn get(&mut self, key: Vec<u8>) -> Result<Option<Vec<u8>>, redb::Error> { 234 - let table = self.tx.open_table(REDB_TABLE)?; 235 - let rv = table.get(&*key)?.map(|guard| guard.value().to_vec()); 236 - Ok(rv) 237 - } 238 - } 239 - 240 - ///// rustcask?? 241 - 242 - pub struct RustcaskStore { 243 - path: PathBuf, 244 - } 245 - 246 - impl RustcaskStore { 247 - pub fn new(path: PathBuf) -> Self { 248 - Self { path } 249 - } 250 - } 251 - 252 - #[derive(Debug, thiserror::Error)] 253 - pub enum CaskError { 254 - #[error(transparent)] 255 - OpenError(#[from] rustcask::error::OpenError), 256 - #[error(transparent)] 257 - SetError(#[from] rustcask::error::SetError), 258 - #[error("failed to get key: {0}")] 259 - GetError(String), 260 - #[error("failed to ensure directory: {0}")] 261 - EnsureDirError(std::io::Error), 262 - } 263 - 264 - impl StorageErrorBase for CaskError {} 265 - 266 - impl DiskStore for RustcaskStore { 267 - type StorageError = CaskError; 268 - type Access = RustcaskAccess; 269 - async fn get_access(&mut self) -> Result<RustcaskAccess, CaskError> { 270 - let path = self.path.clone(); 271 - let db = tokio::task::spawn_blocking(move || { 272 - std::fs::create_dir_all(&path).map_err(CaskError::EnsureDirError)?; 273 - let db = rustcask::Rustcask::builder().open(&path)?; 274 - Ok::<_, Self::StorageError>(db) 275 - }) 276 - .await 277 - .expect("join error")?; 278 - 279 - Ok(RustcaskAccess { db }) 280 - } 281 - } 282 - 283 - pub struct RustcaskAccess { 284 - db: rustcask::Rustcask, 285 - } 286 - 287 - impl DiskAccess for RustcaskAccess { 288 - type StorageError = CaskError; 289 - fn get_writer(&mut self) -> Result<impl DiskWriter<CaskError>, CaskError> { 290 - Ok(RustcaskWriter { db: self.db.clone() }) 291 - } 292 - fn get_reader(&self) -> Result<impl DiskReader<StorageError = CaskError>, CaskError> { 293 - Ok(RustcaskReader { db: self.db.clone() }) 294 - } 295 - } 296 - 297 - pub struct RustcaskWriter { 298 - db: rustcask::Rustcask, 299 - } 300 - 301 - impl DiskWriter<CaskError> for RustcaskWriter { 302 - fn put(&mut self, key: Vec<u8>, val: Vec<u8>) -> Result<(), CaskError> { 303 - self.db.set(key, val)?; 304 - Ok(()) 305 - } 306 - fn put_many(&mut self, kv: impl Iterator<Item = (Vec<u8>, Vec<u8>)>) -> Result<(), CaskError> { 307 - for (k, v) in kv { 308 - self.db.set(k, v)?; 309 - } 310 - Ok(()) 311 - } 312 - } 313 - 314 - pub struct RustcaskReader { 315 - db: rustcask::Rustcask, 316 - } 317 - 318 - impl DiskReader for RustcaskReader { 319 - type StorageError = CaskError; 320 - fn get(&mut self, key: Vec<u8>) -> Result<Option<Vec<u8>>, CaskError> { 321 - self.db 322 - .get(&key) 323 - .map_err(|e| CaskError::GetError(e.to_string())) 324 - } 325 - } 326 - 327 - 328 - ///////// heeeeeeeeeeeeed 329 - 330 - type HeedBytes = heed::types::SerdeBincode<Vec<u8>>; 331 - type HeedDb = heed::Database<HeedBytes, HeedBytes>; 332 - // type HeedDb = heed::Database<Vec<u8>, Vec<u8>>; 333 - 334 - pub struct HeedStore { 335 - path: PathBuf, 336 - } 337 - 338 - impl HeedStore { 339 - pub fn new(path: PathBuf) -> Self { 340 - Self { path } 341 - } 342 - } 343 - 344 - impl StorageErrorBase for heed::Error {} 345 - 346 - impl DiskStore for HeedStore { 347 - type StorageError = heed::Error; 348 - type Access = HeedAccess; 349 - async fn get_access(&mut self) -> Result<HeedAccess, heed::Error> { 350 - let path = self.path.clone(); 351 - let env = tokio::task::spawn_blocking(move || { 352 - std::fs::create_dir_all(&path).unwrap(); 353 - let env = unsafe { 354 - heed::EnvOpenOptions::new() 355 - .map_size(1 * 2_usize.pow(30)) 356 - .open(path)? 357 - }; 358 - Ok::<_, Self::StorageError>(env) 359 - }) 360 - .await 361 - .expect("join error")?; 362 - 363 - Ok(HeedAccess { env, db: None }) 364 - } 365 - } 366 - 367 - pub struct HeedAccess { 368 - env: heed::Env, 369 - db: Option<HeedDb>, 370 - } 371 - 372 - impl DiskAccess for HeedAccess { 373 - type StorageError = heed::Error; 374 - fn get_writer(&mut self) -> Result<impl DiskWriter<heed::Error>, heed::Error> { 375 - let mut tx = self.env.write_txn()?; 376 - let db = self.env.create_database(&mut tx, None)?; 377 - self.db = Some(db.clone()); 378 - Ok(HeedWriter { tx: Some(tx), db }) 379 - } 380 - fn get_reader(&self) -> Result<impl DiskReader<StorageError = heed::Error>, heed::Error> { 381 - let tx = self.env.read_txn()?; 382 - let db = self.db.expect("should have called get_writer first"); 383 - Ok(HeedReader { tx, db }) 384 - } 385 - } 386 - 387 - pub struct HeedWriter<'tx> { 388 - tx: Option<heed::RwTxn<'tx>>, 389 - db: HeedDb, 390 - } 391 - 392 - impl DiskWriter<heed::Error> for HeedWriter<'_> { 393 - fn put(&mut self, key: Vec<u8>, val: Vec<u8>) -> Result<(), heed::Error> { 394 - let mut tx = self.tx.as_mut().unwrap(); 395 - self.db.put(&mut tx, &key, &val)?; 396 - Ok(()) 397 - } 398 - fn put_many(&mut self, kv: impl Iterator<Item = (Vec<u8>, Vec<u8>)>) -> Result<(), heed::Error> { 399 - let mut tx = self.tx.as_mut().unwrap(); 400 - for (k, v) in kv { 401 - self.db.put(&mut tx, &k, &v)?; 402 - } 403 - Ok(()) 404 - } 405 - } 406 - 407 - /// oops careful in async 408 - impl Drop for HeedWriter<'_> { 409 - fn drop(&mut self) { 410 - let tx = self.tx.take(); 411 - tx.unwrap().commit().unwrap(); 412 - } 413 - } 414 - 415 - pub struct HeedReader<'tx> { 416 - tx: heed::RoTxn<'tx, heed::WithTls>, 417 - db: HeedDb, 418 - } 419 - 420 - impl DiskReader for HeedReader<'_> { 421 - type StorageError = heed::Error; 422 - fn get(&mut self, key: Vec<u8>) -> Result<Option<Vec<u8>>, heed::Error> { 423 - self.db.get(&self.tx, &key) 424 - } 425 - }
-374
src/drive.rs
··· 1 - //! Consume an MST block stream, producing an ordered stream of records 2 - 3 - use crate::disk::{DiskAccess, DiskStore, DiskWriter, StorageErrorBase}; 4 - use ipld_core::cid::Cid; 5 - use iroh_car::CarReader; 6 - use serde::de::DeserializeOwned; 7 - use serde::{Deserialize, Serialize}; 8 - use std::collections::HashMap; 9 - use std::convert::Infallible; 10 - use tokio::io::AsyncRead; 11 - 12 - use crate::mst::{Commit, Node}; 13 - use crate::walk::{DiskTrip, Step, Trip, Walker}; 14 - 15 - /// Errors that can happen while consuming and emitting blocks and records 16 - #[derive(Debug, thiserror::Error)] 17 - pub enum DriveError { 18 - #[error("Error from iroh_car: {0}")] 19 - CarReader(#[from] iroh_car::Error), 20 - #[error("Failed to decode commit block: {0}")] 21 - BadBlock(#[from] serde_ipld_dagcbor::DecodeError<Infallible>), 22 - #[error("The Commit block reference by the root was not found")] 23 - MissingCommit, 24 - #[error("The MST block {0} could not be found")] 25 - MissingBlock(Cid), 26 - #[error("Failed to walk the mst tree: {0}")] 27 - Tripped(#[from] Trip), 28 - #[error("CAR file had no roots")] 29 - MissingRoot, 30 - } 31 - 32 - #[derive(Debug, thiserror::Error)] 33 - pub enum DiskDriveError<E: StorageErrorBase> { 34 - #[error("Error from iroh_car: {0}")] 35 - CarReader(#[from] iroh_car::Error), 36 - #[error("Failed to decode commit block: {0}")] 37 - BadBlock(#[from] serde_ipld_dagcbor::DecodeError<Infallible>), 38 - #[error("Storage error")] 39 - StorageError(#[from] E), 40 - #[error("The Commit block reference by the root was not found")] 41 - MissingCommit, 42 - #[error("The MST block {0} could not be found")] 43 - MissingBlock(Cid), 44 - #[error("Encode error: {0}")] 45 - BincodeEncodeError(#[from] bincode::error::EncodeError), 46 - #[error("Decode error: {0}")] 47 - BincodeDecodeError(#[from] bincode::error::DecodeError), 48 - #[error("disk tripped: {0}")] 49 - DiskTripped(#[from] DiskTrip<E>), 50 - } 51 - 52 - pub trait Processable: Clone + Serialize + DeserializeOwned { 53 - /// the additional size taken up (not including its mem::size_of) 54 - fn get_size(&self) -> usize; 55 - } 56 - 57 - #[derive(Debug, Clone, Serialize, Deserialize)] 58 - pub enum MaybeProcessedBlock<T> { 59 - /// A block that's *probably* a Node (but we can't know yet) 60 - /// 61 - /// It *can be* a record that suspiciously looks a lot like a node, so we 62 - /// cannot eagerly turn it into a Node. We only know for sure what it is 63 - /// when we actually walk down the MST 64 - Raw(Vec<u8>), 65 - /// A processed record from a block that was definitely not a Node 66 - /// 67 - /// Processing has to be fallible because the CAR can have totally-unused 68 - /// blocks, which can just be garbage. since we're eagerly trying to process 69 - /// record blocks without knowing for sure that they *are* records, we 70 - /// discard any definitely-not-nodes that fail processing and keep their 71 - /// error in the buffer for them. if we later try to retreive them as a 72 - /// record, then we can surface the error. 73 - /// 74 - /// If we _never_ needed this block, then we may have wasted a bit of effort 75 - /// trying to process it. Oh well. 76 - /// 77 - /// There's an alternative here, which would be to kick unprocessable blocks 78 - /// back to Raw, or maybe even a new RawUnprocessable variant. Then we could 79 - /// surface the typed error later if needed by trying to reprocess. 80 - Processed(T), 81 - } 82 - 83 - impl<T: Processable> Processable for MaybeProcessedBlock<T> { 84 - /// TODO this is probably a little broken 85 - fn get_size(&self) -> usize { 86 - use std::{cmp::max, mem::size_of}; 87 - 88 - // enum is always as big as its biggest member? 89 - let base_size = max(size_of::<Vec<u8>>(), size_of::<T>()); 90 - 91 - let extra = match self { 92 - Self::Raw(bytes) => bytes.len(), 93 - Self::Processed(t) => t.get_size(), 94 - }; 95 - 96 - base_size + extra 97 - } 98 - } 99 - 100 - pub enum Vehicle<R: AsyncRead + Unpin, T: Processable> { 101 - Lil(Commit, MemDriver<T>), 102 - Big(BigCar<R, T>), 103 - } 104 - 105 - pub async fn load_car<R: AsyncRead + Unpin, T: Processable>( 106 - reader: R, 107 - process: fn(Vec<u8>) -> T, 108 - max_size: usize, 109 - ) -> Result<Vehicle<R, T>, DriveError> { 110 - let mut mem_blocks = HashMap::new(); 111 - 112 - let mut car = CarReader::new(reader).await?; 113 - 114 - let root = *car 115 - .header() 116 - .roots() 117 - .first() 118 - .ok_or(DriveError::MissingRoot)?; 119 - log::debug!("root: {root:?}"); 120 - 121 - let mut commit = None; 122 - 123 - // try to load all the blocks into memory 124 - let mut mem_size = 0; 125 - while let Some((cid, data)) = car.next_block().await? { 126 - // the root commit is a Special Third Kind of block that we need to make 127 - // sure not to optimistically send to the processing function 128 - if cid == root { 129 - let c: Commit = serde_ipld_dagcbor::from_slice(&data)?; 130 - commit = Some(c); 131 - continue; 132 - } 133 - 134 - // remaining possible types: node, record, other. optimistically process 135 - // TODO: get the actual in-memory size to compute disk spill 136 - let maybe_processed = if Node::could_be(&data) { 137 - MaybeProcessedBlock::Raw(data) 138 - } else { 139 - MaybeProcessedBlock::Processed(process(data)) 140 - }; 141 - 142 - // stash (maybe processed) blocks in memory as long as we have room 143 - mem_size += std::mem::size_of::<Cid>() + maybe_processed.get_size(); 144 - mem_blocks.insert(cid, maybe_processed); 145 - if mem_size >= max_size { 146 - return Ok(Vehicle::Big(BigCar { 147 - car, 148 - root, 149 - process, 150 - max_size, 151 - mem_blocks, 152 - commit, 153 - })); 154 - } 155 - } 156 - 157 - // all blocks loaded and we fit in memory! hopefully we found the commit... 158 - let commit = commit.ok_or(DriveError::MissingCommit)?; 159 - 160 - let walker = Walker::new(commit.data); 161 - 162 - Ok(Vehicle::Lil( 163 - commit, 164 - MemDriver { 165 - blocks: mem_blocks, 166 - walker, 167 - process, 168 - }, 169 - )) 170 - } 171 - 172 - /// a paritally memory-loaded car file that needs disk spillover to continue 173 - pub struct BigCar<R: AsyncRead + Unpin, T: Processable> { 174 - car: CarReader<R>, 175 - root: Cid, 176 - process: fn(Vec<u8>) -> T, 177 - max_size: usize, 178 - mem_blocks: HashMap<Cid, MaybeProcessedBlock<T>>, 179 - pub commit: Option<Commit>, 180 - } 181 - 182 - fn encode(v: impl Serialize) -> Result<Vec<u8>, bincode::error::EncodeError> { 183 - bincode::serde::encode_to_vec(v, bincode::config::standard()) 184 - } 185 - 186 - pub fn decode<T: Processable>(bytes: &[u8]) -> Result<T, bincode::error::DecodeError> { 187 - let (t, n) = bincode::serde::decode_from_slice(bytes, bincode::config::standard())?; 188 - assert_eq!(n, bytes.len(), "expected to decode all bytes"); // TODO 189 - Ok(t) 190 - } 191 - 192 - impl<R: AsyncRead + Unpin, T: Processable + Send + 'static> BigCar<R, T> { 193 - pub async fn finish_loading<S: DiskStore>( 194 - mut self, 195 - mut store: S, 196 - ) -> Result<(Commit, BigCarReady<T, S::Access>), DiskDriveError<S::StorageError>> 197 - where 198 - S::Access: Send + 'static, 199 - S::StorageError: 'static, 200 - { 201 - // set up access for real 202 - let mut access = store.get_access().await?; 203 - 204 - // move access in and back out so we can manage lifetimes 205 - // dump mem blocks into the store 206 - access = tokio::task::spawn(async move { 207 - let mut writer = access.get_writer()?; 208 - 209 - let kvs = self 210 - .mem_blocks 211 - .into_iter() 212 - .map(|(k, v)| (k.to_bytes(), encode(v).unwrap())); 213 - 214 - writer.put_many(kvs)?; 215 - 216 - drop(writer); // cannot outlive access 217 - Ok::<_, DiskDriveError<S::StorageError>>(access) 218 - }) 219 - .await 220 - .unwrap()?; 221 - 222 - // dump the rest to disk (in chunks) 223 - loop { 224 - let mut chunk = vec![]; 225 - let mut mem_size = 0; 226 - loop { 227 - let Some((cid, data)) = self.car.next_block().await? else { 228 - break; 229 - }; 230 - // we still gotta keep checking for the root since we might not have it 231 - if cid == self.root { 232 - let c: Commit = serde_ipld_dagcbor::from_slice(&data)?; 233 - self.commit = Some(c); 234 - continue; 235 - } 236 - // remaining possible types: node, record, other. optimistically process 237 - // TODO: get the actual in-memory size to compute disk spill 238 - let maybe_processed = if Node::could_be(&data) { 239 - MaybeProcessedBlock::Raw(data) 240 - } else { 241 - MaybeProcessedBlock::Processed((self.process)(data)) 242 - }; 243 - mem_size += std::mem::size_of::<Cid>() + maybe_processed.get_size(); 244 - chunk.push((cid, maybe_processed)); 245 - if mem_size >= self.max_size { 246 - break; 247 - } 248 - } 249 - if chunk.is_empty() { 250 - break; 251 - } 252 - 253 - // move access in and back out so we can manage lifetimes 254 - // dump mem blocks into the store 255 - access = tokio::task::spawn_blocking(move || { 256 - let mut writer = access.get_writer()?; 257 - 258 - let kvs = chunk 259 - .into_iter() 260 - .map(|(k, v)| (k.to_bytes(), encode(v).unwrap())); 261 - 262 - writer.put_many(kvs)?; 263 - 264 - drop(writer); // cannot outlive access 265 - Ok::<_, DiskDriveError<S::StorageError>>(access) 266 - }) 267 - .await 268 - .unwrap()?; // TODO 269 - } 270 - 271 - let commit = self.commit.ok_or(DiskDriveError::MissingCommit)?; 272 - 273 - let walker = Walker::new(commit.data); 274 - 275 - Ok(( 276 - commit, 277 - BigCarReady { 278 - process: self.process, 279 - access, 280 - walker, 281 - }, 282 - )) 283 - } 284 - } 285 - 286 - pub struct BigCarReady<T: Clone, A: DiskAccess> { 287 - process: fn(Vec<u8>) -> T, 288 - access: A, 289 - walker: Walker, 290 - } 291 - 292 - impl<T: Processable + Send + 'static, A: DiskAccess + Send + 'static> BigCarReady<T, A> { 293 - pub async fn next_chunk( 294 - mut self, 295 - n: usize, 296 - ) -> Result<(Self, Option<Vec<(String, T)>>), DiskDriveError<A::StorageError>> 297 - where 298 - A::StorageError: Send, 299 - { 300 - let mut out = Vec::with_capacity(n); 301 - (self, out) = tokio::task::spawn_blocking(move || { 302 - let access = self.access; 303 - let mut reader = access.get_reader()?; 304 - 305 - for _ in 0..n { 306 - // walk as far as we can until we run out of blocks or find a record 307 - match self.walker.disk_step(&mut reader, self.process)? { 308 - Step::Missing(cid) => return Err(DiskDriveError::MissingBlock(cid)), 309 - Step::Finish => break, 310 - Step::Step { rkey, data } => { 311 - out.push((rkey, data)); 312 - continue; 313 - } 314 - }; 315 - } 316 - 317 - drop(reader); // cannot outlive access 318 - self.access = access; 319 - Ok::<_, DiskDriveError<A::StorageError>>((self, out)) 320 - }) 321 - .await 322 - .unwrap()?; // TODO 323 - 324 - if out.is_empty() { 325 - Ok((self, None)) 326 - } else { 327 - Ok((self, Some(out))) 328 - } 329 - } 330 - } 331 - 332 - /// The core driver between the block stream and MST walker 333 - /// 334 - /// In the future, PDSs will export CARs in a stream-friendly order that will 335 - /// enable processing them with tiny memory overhead. But that future is not 336 - /// here yet. 337 - /// 338 - /// CARs are almost always in a stream-unfriendly order, so I'm reverting the 339 - /// optimistic stream features: we load all block first, then walk the MST. 340 - /// 341 - /// This makes things much simpler: we only need to worry about spilling to disk 342 - /// in one place, and we always have a reasonable expecatation about how much 343 - /// work the init function will do. We can drop the CAR reader before walking, 344 - /// so the sync/async boundaries become a little easier to work around. 345 - #[derive(Debug)] 346 - pub struct MemDriver<T: Processable> { 347 - blocks: HashMap<Cid, MaybeProcessedBlock<T>>, 348 - walker: Walker, 349 - process: fn(Vec<u8>) -> T, 350 - } 351 - 352 - impl<T: Processable> MemDriver<T> { 353 - /// Manually step through the record outputs 354 - pub async fn next_chunk(&mut self, n: usize) -> Result<Option<Vec<(String, T)>>, DriveError> { 355 - let mut out = Vec::with_capacity(n); 356 - for _ in 0..n { 357 - // walk as far as we can until we run out of blocks or find a record 358 - match self.walker.step(&mut self.blocks, self.process)? { 359 - Step::Missing(cid) => return Err(DriveError::MissingBlock(cid)), 360 - Step::Finish => break, 361 - Step::Step { rkey, data } => { 362 - out.push((rkey, data)); 363 - continue; 364 - } 365 - }; 366 - } 367 - 368 - if out.is_empty() { 369 - Ok(None) 370 - } else { 371 - Ok(Some(out)) 372 - } 373 - } 374 - }
+74 -7
src/lib.rs
··· 1 - //! Fast and robust atproto CAR file processing in rust 2 - //! 3 - //! For now see the [examples](https://tangled.org/@microcosm.blue/repo-stream/tree/main/examples) 1 + /*! 2 + A robust CAR file -> MST walker for atproto 3 + 4 + Small CARs have their blocks buffered in memory. If a configurable memory limit 5 + is reached while reading blocks, CAR reading is suspended, and can be continued 6 + by providing disk storage to buffer the CAR blocks instead. 7 + 8 + A `process` function can be provided for tasks where records are transformed 9 + into a smaller representation, to save memory (and disk) during block reading. 10 + 11 + Once blocks are loaded, the MST is walked and emitted as chunks of pairs of 12 + `(rkey, processed_block)` pairs, in order (depth first, left-to-right). 13 + 14 + Some MST validations are applied 15 + - Keys must appear in order 16 + - Keys must be at the correct MST tree depth 17 + 18 + `iroh_car` additionally applies a block size limit of `2MiB`. 19 + 20 + ``` 21 + use repo_stream::{Driver, DriverBuilder, DiskBuilder}; 22 + 23 + # #[tokio::main] 24 + # async fn main() -> Result<(), Box<dyn std::error::Error>> { 25 + # let reader = include_bytes!("../car-samples/tiny.car").as_slice(); 26 + let mut total_size = 0; 27 + 28 + match DriverBuilder::new() 29 + .with_mem_limit_mb(10) 30 + .with_block_processor(|rec| rec.len()) // block processing: just extract the raw record size 31 + .load_car(reader) 32 + .await? 33 + { 34 + 35 + // if all blocks fit within memory 36 + Driver::Memory(_commit, mut driver) => { 37 + while let Some(chunk) = driver.next_chunk(256).await? { 38 + for (_rkey, size) in chunk { 39 + total_size += size; 40 + } 41 + } 42 + }, 4 43 5 - pub mod disk; 6 - pub mod drive; 7 - pub mod mst; 8 - pub mod walk; 44 + // if the CAR was too big for in-memory processing 45 + Driver::Disk(paused) => { 46 + // set up a disk store we can spill to 47 + let store = DiskBuilder::new().open("some/path.db".into()).await?; 48 + // do the spilling, get back a (similar) driver 49 + let (_commit, mut driver) = paused.finish_loading(store).await?; 50 + 51 + while let Some(chunk) = driver.next_chunk(256).await? { 52 + for (_rkey, size) in chunk { 53 + total_size += size; 54 + } 55 + } 56 + 57 + // clean up the disk store (drop tables etc) 58 + driver.reset_store().await?; 59 + } 60 + }; 61 + println!("sum of size of all records: {total_size}"); 62 + # Ok(()) 63 + # } 64 + ``` 65 + 66 + Disk spilling suspends and returns a `Driver::Disk(paused)` instead of going 67 + ahead and eagerly using disk I/O. This means you have to write a bit more code 68 + to handle both cases, but it allows you to have finer control over resource 69 + usage. For example, you can drive a number of parallel memory CAR workers, and 70 + separately have a different number of disk workers picking up suspended disk 71 + tasks from a queue. 72 + 73 + Find more [examples in the repo](https://tangled.org/@microcosm.blue/repo-stream/tree/main/examples). 74 + 75 + */
-114
src/mst.rs
··· 1 - //! Low-level types for parsing raw atproto MST CARs 2 - //! 3 - //! The primary aim is to work through the **tree** structure. Non-node blocks 4 - //! are left as raw bytes, for upper levels to parse into DAG-CBOR or whatever. 5 - 6 - use ipld_core::cid::Cid; 7 - use serde::Deserialize; 8 - 9 - /// The top-level data object in a repository's tree is a signed commit. 10 - #[derive(Debug, Deserialize)] 11 - // #[serde(deny_unknown_fields)] 12 - pub struct Commit { 13 - /// the account DID associated with the repo, in strictly normalized form 14 - /// (eg, lowercase as appropriate) 15 - pub did: String, 16 - /// fixed value of 3 for this repo format version 17 - pub version: u64, 18 - /// pointer to the top of the repo contents tree structure (MST) 19 - pub data: Cid, 20 - /// revision of the repo, used as a logical clock. 21 - /// 22 - /// TID format. Must increase monotonically. Recommend using current 23 - /// timestamp as TID; rev values in the "future" (beyond a fudge factor) 24 - /// should be ignored and not processed 25 - pub rev: String, 26 - /// pointer (by hash) to a previous commit object for this repository. 27 - /// 28 - /// Could be used to create a chain of history, but largely unused (included 29 - /// for v2 backwards compatibility). In version 3 repos, this field must 30 - /// exist in the CBOR object, but is virtually always null. NOTE: previously 31 - /// specified as nullable and optional, but this caused interoperability 32 - /// issues. 33 - pub prev: Option<Cid>, 34 - /// cryptographic signature of this commit, as raw bytes 35 - #[serde(with = "serde_bytes")] 36 - pub sig: Vec<u8>, 37 - } 38 - 39 - /// MST node data schema 40 - #[derive(Debug, Deserialize, PartialEq)] 41 - #[serde(deny_unknown_fields)] 42 - pub struct Node { 43 - /// link to sub-tree Node on a lower level and with all keys sorting before 44 - /// keys at this node 45 - #[serde(rename = "l")] 46 - pub left: Option<Cid>, 47 - /// ordered list of TreeEntry objects 48 - /// 49 - /// atproto MSTs have a fanout of 4, so there can be max 4 entries. 50 - #[serde(rename = "e")] 51 - pub entries: Vec<Entry>, // maybe we can do [Option<Entry>; 4]? 52 - } 53 - 54 - impl Node { 55 - /// test if a block could possibly be a node 56 - /// 57 - /// we can't eagerly decode records except where we're *sure* they cannot be 58 - /// an mst node (and even then we can only attempt) because you can't know 59 - /// with certainty what a block is supposed to be without actually walking 60 - /// the tree. 61 - /// 62 - /// so if a block *could be* a node, any record converter must postpone 63 - /// processing. if it turns out it happens to be a very node-looking record, 64 - /// well, sorry, it just has to only be processed later when that's known. 65 - pub fn could_be(bytes: impl AsRef<[u8]>) -> bool { 66 - const NODE_FINGERPRINT: [u8; 3] = [ 67 - 0xA2, // map length 2 (for "l" and "e" keys) 68 - 0x61, // text length 1 69 - b'e', // "e" before "l" because map keys have to be lex-sorted 70 - // 0x8?: "e" has array (0x100 upper 3 bits) of some length 71 - ]; 72 - let bytes = bytes.as_ref(); 73 - bytes.starts_with(&NODE_FINGERPRINT) 74 - && bytes 75 - .get(3) 76 - .map(|b| b & 0b1110_0000 == 0x80) 77 - .unwrap_or(false) 78 - } 79 - 80 - /// Check if a node has any entries 81 - /// 82 - /// An empty repository with no records is represented as a single MST node 83 - /// with an empty array of entries. This is the only situation in which a 84 - /// tree may contain an empty leaf node which does not either contain keys 85 - /// ("entries") or point to a sub-tree containing entries. 86 - /// 87 - /// TODO: to me this is slightly unclear with respect to `l` (ask someone). 88 - /// ...is that what "The top of the tree must not be a an empty node which 89 - /// only points to a sub-tree." is referring to? 90 - pub fn is_empty(&self) -> bool { 91 - self.left.is_none() && self.entries.is_empty() 92 - } 93 - } 94 - 95 - /// TreeEntry object 96 - #[derive(Debug, Deserialize, PartialEq)] 97 - #[serde(deny_unknown_fields)] 98 - pub struct Entry { 99 - /// count of bytes shared with previous TreeEntry in this Node (if any) 100 - #[serde(rename = "p")] 101 - pub prefix_len: usize, 102 - /// remainder of key for this TreeEntry, after "prefixlen" have been removed 103 - #[serde(rename = "k", with = "serde_bytes")] 104 - pub keysuffix: Vec<u8>, // can we String this here? 105 - /// link to the record data (CBOR) for this entry 106 - #[serde(rename = "v")] 107 - pub value: Cid, 108 - /// link to a sub-tree Node at a lower level 109 - /// 110 - /// the lower level must have keys sorting after this TreeEntry's key (to 111 - /// the "right"), but before the next TreeEntry's key in this Node (if any) 112 - #[serde(rename = "t")] 113 - pub tree: Option<Cid>, 114 - }
-471
src/walk.rs
··· 1 - //! Depth-first MST traversal 2 - 3 - use crate::disk::{DiskReader, StorageErrorBase}; 4 - use crate::drive::{MaybeProcessedBlock, Processable}; 5 - use crate::mst::Node; 6 - use ipld_core::cid::Cid; 7 - use std::collections::HashMap; 8 - use std::convert::Infallible; 9 - 10 - /// Errors that can happen while walking 11 - #[derive(Debug, thiserror::Error)] 12 - pub enum Trip { 13 - #[error("empty mst nodes are not allowed")] 14 - NodeEmpty, 15 - #[error("Failed to fingerprint commit block")] 16 - BadCommitFingerprint, 17 - #[error("Failed to decode commit block: {0}")] 18 - BadCommit(#[from] serde_ipld_dagcbor::DecodeError<Infallible>), 19 - #[error("Action node error: {0}")] 20 - RkeyError(#[from] RkeyError), 21 - #[error("Encountered an rkey out of order while walking the MST")] 22 - RkeyOutOfOrder, 23 - } 24 - 25 - /// Errors that can happen while walking 26 - #[derive(Debug, thiserror::Error)] 27 - pub enum DiskTrip<E: StorageErrorBase> { 28 - #[error("tripped: {0}")] 29 - Trip(#[from] Trip), 30 - #[error("storage error: {0}")] 31 - StorageError(#[from] E), 32 - #[error("Decode error: {0}")] 33 - BincodeDecodeError(#[from] bincode::error::DecodeError), 34 - } 35 - 36 - /// Errors from invalid Rkeys 37 - #[derive(Debug, thiserror::Error)] 38 - pub enum RkeyError { 39 - #[error("Failed to compute an rkey due to invalid prefix_len")] 40 - EntryPrefixOutOfbounds, 41 - #[error("RKey was not utf-8")] 42 - EntryRkeyNotUtf8(#[from] std::string::FromUtf8Error), 43 - } 44 - 45 - /// Walker outputs 46 - #[derive(Debug)] 47 - pub enum Step<T> { 48 - /// We needed this CID but it's not in the block store 49 - Missing(Cid), 50 - /// Reached the end of the MST! yay! 51 - Finish, 52 - /// A record was found! 53 - Step { rkey: String, data: T }, 54 - } 55 - 56 - #[derive(Debug, Clone, PartialEq)] 57 - enum Need { 58 - Node(Cid), 59 - Record { rkey: String, cid: Cid }, 60 - } 61 - 62 - fn push_from_node(stack: &mut Vec<Need>, node: &Node) -> Result<(), RkeyError> { 63 - let mut entries = Vec::with_capacity(node.entries.len()); 64 - 65 - let mut prefix = vec![]; 66 - for entry in &node.entries { 67 - let mut rkey = vec![]; 68 - let pre_checked = prefix 69 - .get(..entry.prefix_len) 70 - .ok_or(RkeyError::EntryPrefixOutOfbounds)?; 71 - rkey.extend_from_slice(pre_checked); 72 - rkey.extend_from_slice(&entry.keysuffix); 73 - prefix = rkey.clone(); 74 - 75 - entries.push(Need::Record { 76 - rkey: String::from_utf8(rkey)?, 77 - cid: entry.value, 78 - }); 79 - if let Some(ref tree) = entry.tree { 80 - entries.push(Need::Node(*tree)); 81 - } 82 - } 83 - 84 - entries.reverse(); 85 - stack.append(&mut entries); 86 - 87 - if let Some(tree) = node.left { 88 - stack.push(Need::Node(tree)); 89 - } 90 - Ok(()) 91 - } 92 - 93 - /// Traverser of an atproto MST 94 - /// 95 - /// Walks the tree from left-to-right in depth-first order 96 - #[derive(Debug)] 97 - pub struct Walker { 98 - stack: Vec<Need>, 99 - prev: String, 100 - } 101 - 102 - impl Walker { 103 - pub fn new(tree_root_cid: Cid) -> Self { 104 - Self { 105 - stack: vec![Need::Node(tree_root_cid)], 106 - prev: "".to_string(), 107 - } 108 - } 109 - 110 - /// Advance through nodes until we find a record or can't go further 111 - pub fn step<T: Processable>( 112 - &mut self, 113 - blocks: &mut HashMap<Cid, MaybeProcessedBlock<T>>, 114 - process: impl Fn(Vec<u8>) -> T, 115 - ) -> Result<Step<T>, Trip> { 116 - loop { 117 - let Some(mut need) = self.stack.last() else { 118 - log::trace!("tried to walk but we're actually done."); 119 - return Ok(Step::Finish); 120 - }; 121 - 122 - match &mut need { 123 - Need::Node(cid) => { 124 - log::trace!("need node {cid:?}"); 125 - let Some(block) = blocks.remove(cid) else { 126 - log::trace!("node not found, resting"); 127 - return Ok(Step::Missing(*cid)); 128 - }; 129 - 130 - let MaybeProcessedBlock::Raw(data) = block else { 131 - return Err(Trip::BadCommitFingerprint); 132 - }; 133 - let node = 134 - serde_ipld_dagcbor::from_slice::<Node>(&data).map_err(Trip::BadCommit)?; 135 - 136 - // found node, make sure we remember 137 - self.stack.pop(); 138 - 139 - // queue up work on the found node next 140 - push_from_node(&mut self.stack, &node)?; 141 - } 142 - Need::Record { rkey, cid } => { 143 - log::trace!("need record {cid:?}"); 144 - let Some(data) = blocks.get_mut(cid) else { 145 - log::trace!("record block not found, resting"); 146 - return Ok(Step::Missing(*cid)); 147 - }; 148 - let rkey = rkey.clone(); 149 - let data = match data { 150 - MaybeProcessedBlock::Raw(data) => process(data.to_vec()), 151 - MaybeProcessedBlock::Processed(t) => t.clone(), 152 - }; 153 - 154 - // found node, make sure we remember 155 - self.stack.pop(); 156 - 157 - log::trace!("emitting a block as a step. depth={}", self.stack.len()); 158 - 159 - // rkeys *must* be in order or else the tree is invalid (or 160 - // we have a bug) 161 - if rkey <= self.prev { 162 - return Err(Trip::RkeyOutOfOrder); 163 - } 164 - self.prev = rkey.clone(); 165 - 166 - return Ok(Step::Step { rkey, data }); 167 - } 168 - } 169 - } 170 - } 171 - 172 - /// blocking!!!!!! 173 - pub fn disk_step<T: Processable, R: DiskReader>( 174 - &mut self, 175 - reader: &mut R, 176 - process: impl Fn(Vec<u8>) -> T, 177 - ) -> Result<Step<T>, DiskTrip<R::StorageError>> { 178 - loop { 179 - let Some(mut need) = self.stack.last() else { 180 - log::trace!("tried to walk but we're actually done."); 181 - return Ok(Step::Finish); 182 - }; 183 - 184 - match &mut need { 185 - Need::Node(cid) => { 186 - let cid_bytes = cid.to_bytes(); 187 - log::trace!("need node {cid:?}"); 188 - let Some(block_bytes) = reader.get(cid_bytes)? else { 189 - log::trace!("node not found, resting"); 190 - return Ok(Step::Missing(*cid)); 191 - }; 192 - 193 - let block: MaybeProcessedBlock<T> = crate::drive::decode(&block_bytes)?; 194 - 195 - let MaybeProcessedBlock::Raw(data) = block else { 196 - return Err(Trip::BadCommitFingerprint.into()); 197 - }; 198 - let node = 199 - serde_ipld_dagcbor::from_slice::<Node>(&data).map_err(Trip::BadCommit)?; 200 - 201 - // found node, make sure we remember 202 - self.stack.pop(); 203 - 204 - // queue up work on the found node next 205 - push_from_node(&mut self.stack, &node).map_err(Trip::RkeyError)?; 206 - } 207 - Need::Record { rkey, cid } => { 208 - log::trace!("need record {cid:?}"); 209 - let cid_bytes = cid.to_bytes(); 210 - let Some(data_bytes) = reader.get(cid_bytes)? else { 211 - log::trace!("record block not found, resting"); 212 - return Ok(Step::Missing(*cid)); 213 - }; 214 - let data: MaybeProcessedBlock<T> = crate::drive::decode(&data_bytes)?; 215 - let rkey = rkey.clone(); 216 - let data = match data { 217 - MaybeProcessedBlock::Raw(data) => process(data), 218 - MaybeProcessedBlock::Processed(t) => t.clone(), 219 - }; 220 - 221 - // found node, make sure we remember 222 - self.stack.pop(); 223 - 224 - log::trace!("emitting a block as a step. depth={}", self.stack.len()); 225 - 226 - // rkeys *must* be in order or else the tree is invalid (or 227 - // we have a bug) 228 - if rkey <= self.prev { 229 - return Err(DiskTrip::Trip(Trip::RkeyOutOfOrder)); 230 - } 231 - self.prev = rkey.clone(); 232 - 233 - return Ok(Step::Step { rkey, data }); 234 - } 235 - } 236 - } 237 - } 238 - } 239 - 240 - #[cfg(test)] 241 - mod test { 242 - use super::*; 243 - // use crate::mst::Entry; 244 - 245 - fn cid1() -> Cid { 246 - "bafyreihixenvk3ahqbytas4hk4a26w43bh6eo3w6usjqtxkpzsvi655a3m" 247 - .parse() 248 - .unwrap() 249 - } 250 - // fn cid2() -> Cid { 251 - // "QmY7Yh4UquoXHLPFo2XbhXkhBvFoPwmQUSa92pxnxjQuPU" 252 - // .parse() 253 - // .unwrap() 254 - // } 255 - // fn cid3() -> Cid { 256 - // "bafybeigdyrzt5sfp7udm7hu76uh7y26nf3efuylqabf3oclgtqy55fbzdi" 257 - // .parse() 258 - // .unwrap() 259 - // } 260 - // fn cid4() -> Cid { 261 - // "QmbWqxBEKC3P8tqsKc98xmWNzrzDtRLMiMPL8wBuTGsMnR" 262 - // .parse() 263 - // .unwrap() 264 - // } 265 - // fn cid5() -> Cid { 266 - // "QmSnuWmxptJZdLJpKRarxBMS2Ju2oANVrgbr2xWbie9b2D" 267 - // .parse() 268 - // .unwrap() 269 - // } 270 - // fn cid6() -> Cid { 271 - // "QmdmQXB2mzChmMeKY47C43LxUdg1NDJ5MWcKMKxDu7RgQm" 272 - // .parse() 273 - // .unwrap() 274 - // } 275 - // fn cid7() -> Cid { 276 - // "bafybeiaysi4s6lnjev27ln5icwm6tueaw2vdykrtjkwiphwekaywqhcjze" 277 - // .parse() 278 - // .unwrap() 279 - // } 280 - // fn cid8() -> Cid { 281 - // "bafyreif3tfdpr5n4jdrbielmcapwvbpcthepfkwq2vwonmlhirbjmotedi" 282 - // .parse() 283 - // .unwrap() 284 - // } 285 - // fn cid9() -> Cid { 286 - // "bafyreicnokmhmrnlp2wjhyk2haep4tqxiptwfrp2rrs7rzq7uk766chqvq" 287 - // .parse() 288 - // .unwrap() 289 - // } 290 - 291 - #[test] 292 - fn test_next_from_node_empty() { 293 - let node = Node { 294 - left: None, 295 - entries: vec![], 296 - }; 297 - let mut stack = vec![]; 298 - push_from_node(&mut stack, &node).unwrap(); 299 - assert_eq!(stack.last(), None); 300 - } 301 - 302 - #[test] 303 - fn test_needs_from_node_just_left() { 304 - let node = Node { 305 - left: Some(cid1()), 306 - entries: vec![], 307 - }; 308 - let mut stack = vec![]; 309 - push_from_node(&mut stack, &node).unwrap(); 310 - assert_eq!(stack.last(), Some(Need::Node(cid1())).as_ref()); 311 - } 312 - 313 - // #[test] 314 - // fn test_needs_from_node_just_one_record() { 315 - // let node = Node { 316 - // left: None, 317 - // entries: vec![Entry { 318 - // keysuffix: "asdf".into(), 319 - // prefix_len: 0, 320 - // value: cid1(), 321 - // tree: None, 322 - // }], 323 - // }; 324 - // assert_eq!( 325 - // needs_from_node(node).unwrap(), 326 - // vec![Need::Record { 327 - // rkey: "asdf".into(), 328 - // cid: cid1(), 329 - // },] 330 - // ); 331 - // } 332 - 333 - // #[test] 334 - // fn test_needs_from_node_two_records() { 335 - // let node = Node { 336 - // left: None, 337 - // entries: vec![ 338 - // Entry { 339 - // keysuffix: "asdf".into(), 340 - // prefix_len: 0, 341 - // value: cid1(), 342 - // tree: None, 343 - // }, 344 - // Entry { 345 - // keysuffix: "gh".into(), 346 - // prefix_len: 2, 347 - // value: cid2(), 348 - // tree: None, 349 - // }, 350 - // ], 351 - // }; 352 - // assert_eq!( 353 - // needs_from_node(node).unwrap(), 354 - // vec![ 355 - // Need::Record { 356 - // rkey: "asdf".into(), 357 - // cid: cid1(), 358 - // }, 359 - // Need::Record { 360 - // rkey: "asgh".into(), 361 - // cid: cid2(), 362 - // }, 363 - // ] 364 - // ); 365 - // } 366 - 367 - // #[test] 368 - // fn test_needs_from_node_with_both() { 369 - // let node = Node { 370 - // left: None, 371 - // entries: vec![Entry { 372 - // keysuffix: "asdf".into(), 373 - // prefix_len: 0, 374 - // value: cid1(), 375 - // tree: Some(cid2()), 376 - // }], 377 - // }; 378 - // assert_eq!( 379 - // needs_from_node(node).unwrap(), 380 - // vec![ 381 - // Need::Record { 382 - // rkey: "asdf".into(), 383 - // cid: cid1(), 384 - // }, 385 - // Need::Node(cid2()), 386 - // ] 387 - // ); 388 - // } 389 - 390 - // #[test] 391 - // fn test_needs_from_node_left_and_record() { 392 - // let node = Node { 393 - // left: Some(cid1()), 394 - // entries: vec![Entry { 395 - // keysuffix: "asdf".into(), 396 - // prefix_len: 0, 397 - // value: cid2(), 398 - // tree: None, 399 - // }], 400 - // }; 401 - // assert_eq!( 402 - // needs_from_node(node).unwrap(), 403 - // vec![ 404 - // Need::Node(cid1()), 405 - // Need::Record { 406 - // rkey: "asdf".into(), 407 - // cid: cid2(), 408 - // }, 409 - // ] 410 - // ); 411 - // } 412 - 413 - // #[test] 414 - // fn test_needs_from_full_node() { 415 - // let node = Node { 416 - // left: Some(cid1()), 417 - // entries: vec![ 418 - // Entry { 419 - // keysuffix: "asdf".into(), 420 - // prefix_len: 0, 421 - // value: cid2(), 422 - // tree: Some(cid3()), 423 - // }, 424 - // Entry { 425 - // keysuffix: "ghi".into(), 426 - // prefix_len: 1, 427 - // value: cid4(), 428 - // tree: Some(cid5()), 429 - // }, 430 - // Entry { 431 - // keysuffix: "jkl".into(), 432 - // prefix_len: 2, 433 - // value: cid6(), 434 - // tree: Some(cid7()), 435 - // }, 436 - // Entry { 437 - // keysuffix: "mno".into(), 438 - // prefix_len: 4, 439 - // value: cid8(), 440 - // tree: Some(cid9()), 441 - // }, 442 - // ], 443 - // }; 444 - // assert_eq!( 445 - // needs_from_node(node).unwrap(), 446 - // vec![ 447 - // Need::Node(cid1()), 448 - // Need::Record { 449 - // rkey: "asdf".into(), 450 - // cid: cid2(), 451 - // }, 452 - // Need::Node(cid3()), 453 - // Need::Record { 454 - // rkey: "aghi".into(), 455 - // cid: cid4(), 456 - // }, 457 - // Need::Node(cid5()), 458 - // Need::Record { 459 - // rkey: "agjkl".into(), 460 - // cid: cid6(), 461 - // }, 462 - // Need::Node(cid7()), 463 - // Need::Record { 464 - // rkey: "agjkmno".into(), 465 - // cid: cid8(), 466 - // }, 467 - // Need::Node(cid9()), 468 - // ] 469 - // ); 470 - // } 471 - }
+34 -31
tests/non-huge-cars.rs
··· 1 1 extern crate repo_stream; 2 - use futures::TryStreamExt; 3 - use iroh_car::CarReader; 4 - use std::convert::Infallible; 2 + use repo_stream::Driver; 5 3 4 + const EMPTY_CAR: &'static [u8] = include_bytes!("../car-samples/empty.car"); 6 5 const TINY_CAR: &'static [u8] = include_bytes!("../car-samples/tiny.car"); 7 6 const LITTLE_CAR: &'static [u8] = include_bytes!("../car-samples/little.car"); 8 7 const MIDSIZE_CAR: &'static [u8] = include_bytes!("../car-samples/midsize.car"); 9 8 10 - async fn test_car(bytes: &[u8], expected_records: usize, expected_sum: usize) { 11 - let reader = CarReader::new(bytes).await.unwrap(); 12 - 13 - let root = reader 14 - .header() 15 - .roots() 16 - .first() 17 - .ok_or("missing root") 9 + async fn test_car( 10 + bytes: &[u8], 11 + expected_records: usize, 12 + expected_sum: usize, 13 + expect_profile: bool, 14 + ) { 15 + let mut driver = match Driver::load_car(bytes, |block| block.len(), 10 /* MiB */) 16 + .await 18 17 .unwrap() 19 - .clone(); 20 - 21 - let stream = std::pin::pin!(reader.stream()); 22 - 23 - let (_commit, v) = 24 - repo_stream::drive::Vehicle::init(root, stream, |block| Ok::<_, Infallible>(block.len())) 25 - .await 26 - .unwrap(); 27 - let mut record_stream = std::pin::pin!(v.stream()); 18 + { 19 + Driver::Memory(_commit, mem_driver) => mem_driver, 20 + Driver::Disk(_) => panic!("too big"), 21 + }; 28 22 29 23 let mut records = 0; 30 24 let mut sum = 0; 31 25 let mut found_bsky_profile = false; 32 26 let mut prev_rkey = "".to_string(); 33 - while let Some((rkey, size)) = record_stream.try_next().await.unwrap() { 34 - records += 1; 35 - sum += size; 36 - if rkey == "app.bsky.actor.profile/self" { 37 - found_bsky_profile = true; 27 + 28 + while let Some(pairs) = driver.next_chunk(256).await.unwrap() { 29 + for (rkey, size) in pairs { 30 + records += 1; 31 + sum += size; 32 + if rkey == "app.bsky.actor.profile/self" { 33 + found_bsky_profile = true; 34 + } 35 + assert!(rkey > prev_rkey, "rkeys are streamed in order"); 36 + prev_rkey = rkey; 38 37 } 39 - assert!(rkey > prev_rkey, "rkeys are streamed in order"); 40 - prev_rkey = rkey; 41 38 } 39 + 42 40 assert_eq!(records, expected_records); 43 41 assert_eq!(sum, expected_sum); 44 - assert!(found_bsky_profile); 42 + assert_eq!(found_bsky_profile, expect_profile); 43 + } 44 + 45 + #[tokio::test] 46 + async fn test_empty_car() { 47 + test_car(EMPTY_CAR, 0, 0, false).await 45 48 } 46 49 47 50 #[tokio::test] 48 51 async fn test_tiny_car() { 49 - test_car(TINY_CAR, 8, 2071).await 52 + test_car(TINY_CAR, 8, 2071, true).await 50 53 } 51 54 52 55 #[tokio::test] 53 56 async fn test_little_car() { 54 - test_car(LITTLE_CAR, 278, 246960).await 57 + test_car(LITTLE_CAR, 278, 246960, true).await 55 58 } 56 59 57 60 #[tokio::test] 58 61 async fn test_midsize_car() { 59 - test_car(MIDSIZE_CAR, 11585, 3741393).await 62 + test_car(MIDSIZE_CAR, 11585, 3741393, true).await 60 63 }