Fast and robust atproto CAR file processing in rust

Compare changes

Choose any two refs to compare.

Changed files
+157 -2924
examples
disk-read-file
src
+130 -1411
Cargo.lock
··· 3 3 version = 4 4 4 5 5 [[package]] 6 - name = "addr2line" 7 - version = "0.25.1" 8 - source = "registry+https://github.com/rust-lang/crates.io-index" 9 - checksum = "1b5d307320b3181d6d7954e663bd7c774a838b8220fe0593c86d9fb09f498b4b" 10 - dependencies = [ 11 - "gimli", 12 - ] 13 - 14 - [[package]] 15 - name = "adler2" 16 - version = "2.0.1" 17 - source = "registry+https://github.com/rust-lang/crates.io-index" 18 - checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa" 19 - 20 - [[package]] 21 - name = "aho-corasick" 22 - version = "1.1.3" 23 - source = "registry+https://github.com/rust-lang/crates.io-index" 24 - checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" 25 - dependencies = [ 26 - "memchr", 27 - ] 28 - 29 - [[package]] 30 - name = "anes" 31 - version = "0.1.6" 32 - source = "registry+https://github.com/rust-lang/crates.io-index" 33 - checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" 34 - 35 - [[package]] 36 6 name = "anstream" 37 7 version = "0.6.21" 38 8 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 68 38 source = "registry+https://github.com/rust-lang/crates.io-index" 69 39 checksum = "9e231f6134f61b71076a3eab506c379d4f36122f2af15a9ff04415ea4c3339e2" 70 40 dependencies = [ 71 - "windows-sys 0.60.2", 41 + "windows-sys", 72 42 ] 73 43 74 44 [[package]] ··· 79 49 dependencies = [ 80 50 "anstyle", 81 51 "once_cell_polyfill", 82 - "windows-sys 0.60.2", 83 - ] 84 - 85 - [[package]] 86 - name = "anyhow" 87 - version = "1.0.100" 88 - source = "registry+https://github.com/rust-lang/crates.io-index" 89 - checksum = "a23eb6b1614318a8071c9b2521f36b424b2c83db5eb3a0fead4a6c0809af6e61" 90 - 91 - [[package]] 92 - name = "autocfg" 93 - version = "1.5.0" 94 - source = "registry+https://github.com/rust-lang/crates.io-index" 95 - checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" 96 - 97 - [[package]] 98 - name = "backtrace" 99 - version = "0.3.76" 100 - source = "registry+https://github.com/rust-lang/crates.io-index" 101 - checksum = "bb531853791a215d7c62a30daf0dde835f381ab5de4589cfe7c649d2cbe92bd6" 102 - dependencies = [ 103 - "addr2line", 104 - "cfg-if", 105 - "libc", 106 - "miniz_oxide", 107 - "object", 108 - "rustc-demangle", 109 - "windows-link", 110 - ] 111 - 112 - [[package]] 113 - name = "base-x" 114 - version = "0.2.11" 115 - source = "registry+https://github.com/rust-lang/crates.io-index" 116 - checksum = "4cbbc9d0964165b47557570cce6c952866c2678457aca742aafc9fb771d30270" 117 - 118 - [[package]] 119 - name = "base256emoji" 120 - version = "1.0.2" 121 - source = "registry+https://github.com/rust-lang/crates.io-index" 122 - checksum = "b5e9430d9a245a77c92176e649af6e275f20839a48389859d1661e9a128d077c" 123 - dependencies = [ 124 - "const-str", 125 - "match-lookup", 126 - ] 127 - 128 - [[package]] 129 - name = "bincode" 130 - version = "2.0.1" 131 - source = "registry+https://github.com/rust-lang/crates.io-index" 132 - checksum = "36eaf5d7b090263e8150820482d5d93cd964a81e4019913c972f4edcc6edb740" 133 - dependencies = [ 134 - "bincode_derive", 135 - "serde", 136 - "unty", 137 - ] 138 - 139 - [[package]] 140 - name = "bincode_derive" 141 - version = "2.0.1" 142 - source = "registry+https://github.com/rust-lang/crates.io-index" 143 - checksum = "bf95709a440f45e986983918d0e8a1f30a9b1df04918fc828670606804ac3c09" 144 - dependencies = [ 145 - "virtue", 52 + "windows-sys", 146 53 ] 147 54 148 55 [[package]] ··· 152 59 checksum = "2261d10cca569e4643e526d8dc2e62e433cc8aba21ab764233731f8d369bf394" 153 60 154 61 [[package]] 155 - name = "block-buffer" 156 - version = "0.10.4" 157 - source = "registry+https://github.com/rust-lang/crates.io-index" 158 - checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" 159 - dependencies = [ 160 - "generic-array", 161 - ] 162 - 163 - [[package]] 164 - name = "bumpalo" 165 - version = "3.19.0" 166 - source = "registry+https://github.com/rust-lang/crates.io-index" 167 - checksum = "46c5e41b57b8bba42a04676d81cb89e9ee8e859a1a66f80a5a72e1cb76b34d43" 168 - 169 - [[package]] 170 - name = "bytemuck" 171 - version = "1.24.0" 172 - source = "registry+https://github.com/rust-lang/crates.io-index" 173 - checksum = "1fbdf580320f38b612e485521afda1ee26d10cc9884efaaa750d383e13e3c5f4" 174 - dependencies = [ 175 - "bytemuck_derive", 176 - ] 177 - 178 - [[package]] 179 - name = "bytemuck_derive" 180 - version = "1.10.2" 181 - source = "registry+https://github.com/rust-lang/crates.io-index" 182 - checksum = "f9abbd1bc6865053c427f7198e6af43bfdedc55ab791faed4fbd361d789575ff" 183 - dependencies = [ 184 - "proc-macro2", 185 - "quote", 186 - "syn 2.0.106", 187 - ] 188 - 189 - [[package]] 190 - name = "bytes" 191 - version = "1.10.1" 192 - source = "registry+https://github.com/rust-lang/crates.io-index" 193 - checksum = "d71b6127be86fdcfddb610f7182ac57211d4b18a3e9c82eb2d17662f2227ad6a" 194 - 195 - [[package]] 196 - name = "candystore" 197 - version = "0.5.6" 198 - source = "registry+https://github.com/rust-lang/crates.io-index" 199 - checksum = "e015e4215c0e855880a745ed0d9be7f2d8f49d0426006ccbc66b7b0e10a1bd1a" 200 - dependencies = [ 201 - "anyhow", 202 - "bytemuck", 203 - "crossbeam-channel", 204 - "databuf", 205 - "fslock", 206 - "libc", 207 - "memmap", 208 - "parking_lot", 209 - "rand", 210 - "simd-itertools", 211 - "siphasher", 212 - "uuid", 213 - ] 214 - 215 - [[package]] 216 - name = "cast" 217 - version = "0.3.0" 62 + name = "byteorder-lite" 63 + version = "0.1.0" 218 64 source = "registry+https://github.com/rust-lang/crates.io-index" 219 - checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" 65 + checksum = "8f1fe948ff07f4bd06c30984e69f5b4899c516a3ef74f34df92a2df2ab535495" 220 66 221 67 [[package]] 222 - name = "cbor4ii" 223 - version = "0.2.14" 68 + name = "byteview" 69 + version = "0.10.0" 224 70 source = "registry+https://github.com/rust-lang/crates.io-index" 225 - checksum = "b544cf8c89359205f4f990d0e6f3828db42df85b5dac95d09157a250eb0749c4" 226 - dependencies = [ 227 - "serde", 228 - ] 71 + checksum = "dda4398f387cc6395a3e93b3867cd9abda914c97a0b344d1eefb2e5c51785fca" 229 72 230 73 [[package]] 231 74 name = "cfg-if" ··· 234 77 checksum = "2fd1289c04a9ea8cb22300a459a72a385d7c73d3259e2ed7dcb2af674838cfa9" 235 78 236 79 [[package]] 237 - name = "ciborium" 238 - version = "0.2.2" 239 - source = "registry+https://github.com/rust-lang/crates.io-index" 240 - checksum = "42e69ffd6f0917f5c029256a24d0161db17cea3997d185db0d35926308770f0e" 241 - dependencies = [ 242 - "ciborium-io", 243 - "ciborium-ll", 244 - "serde", 245 - ] 246 - 247 - [[package]] 248 - name = "ciborium-io" 249 - version = "0.2.2" 250 - source = "registry+https://github.com/rust-lang/crates.io-index" 251 - checksum = "05afea1e0a06c9be33d539b876f1ce3692f4afea2cb41f740e7743225ed1c757" 252 - 253 - [[package]] 254 - name = "ciborium-ll" 255 - version = "0.2.2" 256 - source = "registry+https://github.com/rust-lang/crates.io-index" 257 - checksum = "57663b653d948a338bfb3eeba9bb2fd5fcfaecb9e199e87e1eda4d9e8b240fd9" 258 - dependencies = [ 259 - "ciborium-io", 260 - "half", 261 - ] 262 - 263 - [[package]] 264 - name = "cid" 265 - version = "0.11.1" 266 - source = "registry+https://github.com/rust-lang/crates.io-index" 267 - checksum = "3147d8272e8fa0ccd29ce51194dd98f79ddfb8191ba9e3409884e751798acf3a" 268 - dependencies = [ 269 - "core2", 270 - "multibase", 271 - "multihash", 272 - "serde", 273 - "serde_bytes", 274 - "unsigned-varint 0.8.0", 275 - ] 276 - 277 - [[package]] 278 80 name = "clap" 279 81 version = "4.5.48" 280 82 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 305 107 "heck", 306 108 "proc-macro2", 307 109 "quote", 308 - "syn 2.0.106", 110 + "syn", 309 111 ] 310 112 311 113 [[package]] ··· 321 123 checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75" 322 124 323 125 [[package]] 324 - name = "const-str" 325 - version = "0.4.3" 126 + name = "compare" 127 + version = "0.0.6" 326 128 source = "registry+https://github.com/rust-lang/crates.io-index" 327 - checksum = "2f421161cb492475f1661ddc9815a745a1c894592070661180fdec3d4872e9c3" 328 - 329 - [[package]] 330 - name = "core2" 331 - version = "0.4.0" 332 - source = "registry+https://github.com/rust-lang/crates.io-index" 333 - checksum = "b49ba7ef1ad6107f8824dbe97de947cbaac53c44e7f9756a1fba0d37c1eec505" 334 - dependencies = [ 335 - "memchr", 336 - ] 129 + checksum = "ea0095f6103c2a8b44acd6fd15960c801dafebf02e21940360833e0673f48ba7" 337 130 338 131 [[package]] 339 - name = "cpufeatures" 340 - version = "0.2.17" 132 + name = "crossbeam-epoch" 133 + version = "0.9.18" 341 134 source = "registry+https://github.com/rust-lang/crates.io-index" 342 - checksum = "59ed5838eebb26a2bb2e58f6d5b5316989ae9d08bab10e0e6d103e656d1b0280" 343 - dependencies = [ 344 - "libc", 345 - ] 346 - 347 - [[package]] 348 - name = "criterion" 349 - version = "0.7.0" 350 - source = "registry+https://github.com/rust-lang/crates.io-index" 351 - checksum = "e1c047a62b0cc3e145fa84415a3191f628e980b194c2755aa12300a4e6cbd928" 352 - dependencies = [ 353 - "anes", 354 - "cast", 355 - "ciborium", 356 - "clap", 357 - "criterion-plot", 358 - "itertools", 359 - "num-traits", 360 - "oorandom", 361 - "plotters", 362 - "rayon", 363 - "regex", 364 - "serde", 365 - "serde_json", 366 - "tinytemplate", 367 - "tokio", 368 - "walkdir", 369 - ] 370 - 371 - [[package]] 372 - name = "criterion-plot" 373 - version = "0.6.0" 374 - source = "registry+https://github.com/rust-lang/crates.io-index" 375 - checksum = "9b1bcc0dc7dfae599d84ad0b1a55f80cde8af3725da8313b528da95ef783e338" 376 - dependencies = [ 377 - "cast", 378 - "itertools", 379 - ] 380 - 381 - [[package]] 382 - name = "crossbeam-channel" 383 - version = "0.5.15" 384 - source = "registry+https://github.com/rust-lang/crates.io-index" 385 - checksum = "82b8f8f868b36967f9606790d1903570de9ceaf870a7bf9fbbd3016d636a2cb2" 135 + checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" 386 136 dependencies = [ 387 137 "crossbeam-utils", 388 138 ] 389 139 390 140 [[package]] 391 - name = "crossbeam-deque" 392 - version = "0.8.6" 141 + name = "crossbeam-skiplist" 142 + version = "0.1.3" 393 143 source = "registry+https://github.com/rust-lang/crates.io-index" 394 - checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51" 144 + checksum = "df29de440c58ca2cc6e587ec3d22347551a32435fbde9d2bff64e78a9ffa151b" 395 145 dependencies = [ 396 146 "crossbeam-epoch", 397 147 "crossbeam-utils", 398 148 ] 399 149 400 150 [[package]] 401 - name = "crossbeam-epoch" 402 - version = "0.9.18" 403 - source = "registry+https://github.com/rust-lang/crates.io-index" 404 - checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" 405 - dependencies = [ 406 - "crossbeam-utils", 407 - ] 408 - 409 - [[package]] 410 151 name = "crossbeam-utils" 411 152 version = "0.8.21" 412 153 source = "registry+https://github.com/rust-lang/crates.io-index" 413 154 checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" 414 155 415 156 [[package]] 416 - name = "crunchy" 417 - version = "0.2.4" 418 - source = "registry+https://github.com/rust-lang/crates.io-index" 419 - checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5" 420 - 421 - [[package]] 422 - name = "crypto-common" 423 - version = "0.1.6" 424 - source = "registry+https://github.com/rust-lang/crates.io-index" 425 - checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3" 426 - dependencies = [ 427 - "generic-array", 428 - "typenum", 429 - ] 430 - 431 - [[package]] 432 - name = "data-encoding" 433 - version = "2.9.0" 434 - source = "registry+https://github.com/rust-lang/crates.io-index" 435 - checksum = "2a2330da5de22e8a3cb63252ce2abb30116bf5265e89c0e01bc17015ce30a476" 436 - 437 - [[package]] 438 - name = "data-encoding-macro" 439 - version = "0.1.18" 440 - source = "registry+https://github.com/rust-lang/crates.io-index" 441 - checksum = "47ce6c96ea0102f01122a185683611bd5ac8d99e62bc59dd12e6bda344ee673d" 442 - dependencies = [ 443 - "data-encoding", 444 - "data-encoding-macro-internal", 445 - ] 446 - 447 - [[package]] 448 - name = "data-encoding-macro-internal" 449 - version = "0.1.16" 450 - source = "registry+https://github.com/rust-lang/crates.io-index" 451 - checksum = "8d162beedaa69905488a8da94f5ac3edb4dd4788b732fadb7bd120b2625c1976" 452 - dependencies = [ 453 - "data-encoding", 454 - "syn 2.0.106", 455 - ] 456 - 457 - [[package]] 458 - name = "databuf" 459 - version = "0.5.0" 460 - source = "registry+https://github.com/rust-lang/crates.io-index" 461 - checksum = "9e1ad1d99bee317a8dac0b7cd86896c5a5f24307009292985dabbf3e412c8b9d" 462 - dependencies = [ 463 - "databuf-derive", 464 - ] 465 - 466 - [[package]] 467 - name = "databuf-derive" 468 - version = "0.5.0" 157 + name = "dashmap" 158 + version = "6.1.0" 469 159 source = "registry+https://github.com/rust-lang/crates.io-index" 470 - checksum = "04040c9fc8fcb4084222a26c99faf5b3014772a6115e076b7a50fe49bf25d0ea" 160 + checksum = "5041cc499144891f3790297212f32a74fb938e5136a14943f338ef9e0ae276cf" 471 161 dependencies = [ 472 - "databuf_derive_impl", 162 + "cfg-if", 163 + "crossbeam-utils", 164 + "hashbrown 0.14.5", 165 + "lock_api", 166 + "once_cell", 167 + "parking_lot_core", 473 168 ] 474 169 475 170 [[package]] 476 - name = "databuf_derive_impl" 477 - version = "0.2.3" 171 + name = "enum_dispatch" 172 + version = "0.3.13" 478 173 source = "registry+https://github.com/rust-lang/crates.io-index" 479 - checksum = "daf656eb071fe87d23716f933788a35a8ad6baa6fdbf66a67a261dbd3f9dc81a" 174 + checksum = "aa18ce2bc66555b3218614519ac839ddb759a7d6720732f979ef8d13be147ecd" 480 175 dependencies = [ 481 - "quote2", 482 - "syn 2.0.106", 176 + "once_cell", 177 + "proc-macro2", 178 + "quote", 179 + "syn", 483 180 ] 484 181 485 182 [[package]] 486 - name = "digest" 487 - version = "0.10.7" 183 + name = "equivalent" 184 + version = "1.0.2" 488 185 source = "registry+https://github.com/rust-lang/crates.io-index" 489 - checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" 490 - dependencies = [ 491 - "block-buffer", 492 - "crypto-common", 493 - ] 494 - 495 - [[package]] 496 - name = "either" 497 - version = "1.15.0" 498 - source = "registry+https://github.com/rust-lang/crates.io-index" 499 - checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" 500 - 501 - [[package]] 502 - name = "env_filter" 503 - version = "0.1.3" 504 - source = "registry+https://github.com/rust-lang/crates.io-index" 505 - checksum = "186e05a59d4c50738528153b83b0b0194d3a29507dfec16eccd4b342903397d0" 506 - dependencies = [ 507 - "log", 508 - "regex", 509 - ] 510 - 511 - [[package]] 512 - name = "env_logger" 513 - version = "0.11.8" 514 - source = "registry+https://github.com/rust-lang/crates.io-index" 515 - checksum = "13c863f0904021b108aa8b2f55046443e6b1ebde8fd4a15c399893aae4fa069f" 516 - dependencies = [ 517 - "anstream", 518 - "anstyle", 519 - "env_filter", 520 - "jiff", 521 - "log", 522 - ] 186 + checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" 523 187 524 188 [[package]] 525 189 name = "errno" ··· 528 192 checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" 529 193 dependencies = [ 530 194 "libc", 531 - "windows-sys 0.60.2", 195 + "windows-sys", 532 196 ] 533 197 534 198 [[package]] 535 - name = "fallible-iterator" 536 - version = "0.3.0" 537 - source = "registry+https://github.com/rust-lang/crates.io-index" 538 - checksum = "2acce4a10f12dc2fb14a218589d4f1f62ef011b2d0cc4b3cb1bba8e94da14649" 539 - 540 - [[package]] 541 - name = "fallible-streaming-iterator" 542 - version = "0.1.9" 543 - source = "registry+https://github.com/rust-lang/crates.io-index" 544 - checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a" 545 - 546 - [[package]] 547 199 name = "fastrand" 548 200 version = "2.3.0" 549 201 source = "registry+https://github.com/rust-lang/crates.io-index" 550 202 checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" 551 203 552 204 [[package]] 553 - name = "foldhash" 554 - version = "0.1.5" 555 - source = "registry+https://github.com/rust-lang/crates.io-index" 556 - checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" 557 - 558 - [[package]] 559 - name = "fslock" 560 - version = "0.2.1" 205 + name = "fjall" 206 + version = "3.0.1" 561 207 source = "registry+https://github.com/rust-lang/crates.io-index" 562 - checksum = "04412b8935272e3a9bae6f48c7bfff74c2911f60525404edfdd28e49884c3bfb" 208 + checksum = "4f69637c02d38ad1b0f003101d0195a60368130aa17d9ef78b1557d265a22093" 563 209 dependencies = [ 564 - "libc", 565 - "winapi", 566 - ] 567 - 568 - [[package]] 569 - name = "futures" 570 - version = "0.3.31" 571 - source = "registry+https://github.com/rust-lang/crates.io-index" 572 - checksum = "65bc07b1a8bc7c85c5f2e110c476c7389b4554ba72af57d8445ea63a576b0876" 573 - dependencies = [ 574 - "futures-channel", 575 - "futures-core", 576 - "futures-executor", 577 - "futures-io", 578 - "futures-sink", 579 - "futures-task", 580 - "futures-util", 210 + "byteorder-lite", 211 + "byteview", 212 + "dashmap", 213 + "flume", 214 + "log", 215 + "lsm-tree", 216 + "lz4_flex", 217 + "tempfile", 218 + "xxhash-rust", 581 219 ] 582 220 583 221 [[package]] 584 - name = "futures-channel" 585 - version = "0.3.31" 222 + name = "flume" 223 + version = "0.12.0" 586 224 source = "registry+https://github.com/rust-lang/crates.io-index" 587 - checksum = "2dff15bf788c671c1934e366d07e30c1814a8ef514e1af724a602e8a2fbe1b10" 225 + checksum = "5e139bc46ca777eb5efaf62df0ab8cc5fd400866427e56c68b22e414e53bd3be" 588 226 dependencies = [ 589 - "futures-core", 590 - "futures-sink", 591 - ] 592 - 593 - [[package]] 594 - name = "futures-core" 595 - version = "0.3.31" 596 - source = "registry+https://github.com/rust-lang/crates.io-index" 597 - checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e" 598 - 599 - [[package]] 600 - name = "futures-executor" 601 - version = "0.3.31" 602 - source = "registry+https://github.com/rust-lang/crates.io-index" 603 - checksum = "1e28d1d997f585e54aebc3f97d39e72338912123a67330d723fdbb564d646c9f" 604 - dependencies = [ 605 - "futures-core", 606 - "futures-task", 607 - "futures-util", 608 - ] 609 - 610 - [[package]] 611 - name = "futures-io" 612 - version = "0.3.31" 613 - source = "registry+https://github.com/rust-lang/crates.io-index" 614 - checksum = "9e5c1b78ca4aae1ac06c48a526a655760685149f0d465d21f37abfe57ce075c6" 615 - 616 - [[package]] 617 - name = "futures-macro" 618 - version = "0.3.31" 619 - source = "registry+https://github.com/rust-lang/crates.io-index" 620 - checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650" 621 - dependencies = [ 622 - "proc-macro2", 623 - "quote", 624 - "syn 2.0.106", 625 - ] 626 - 627 - [[package]] 628 - name = "futures-sink" 629 - version = "0.3.31" 630 - source = "registry+https://github.com/rust-lang/crates.io-index" 631 - checksum = "e575fab7d1e0dcb8d0c7bcf9a63ee213816ab51902e6d244a95819acacf1d4f7" 632 - 633 - [[package]] 634 - name = "futures-task" 635 - version = "0.3.31" 636 - source = "registry+https://github.com/rust-lang/crates.io-index" 637 - checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988" 638 - 639 - [[package]] 640 - name = "futures-util" 641 - version = "0.3.31" 642 - source = "registry+https://github.com/rust-lang/crates.io-index" 643 - checksum = "9fa08315bb612088cc391249efdc3bc77536f16c91f6cf495e6fbe85b20a4a81" 644 - dependencies = [ 645 - "futures-channel", 646 - "futures-core", 647 - "futures-io", 648 - "futures-macro", 649 - "futures-sink", 650 - "futures-task", 651 - "memchr", 652 - "pin-project-lite", 653 - "pin-utils", 654 - "slab", 655 - ] 656 - 657 - [[package]] 658 - name = "generic-array" 659 - version = "0.14.9" 660 - source = "registry+https://github.com/rust-lang/crates.io-index" 661 - checksum = "4bb6743198531e02858aeaea5398fcc883e71851fcbcb5a2f773e2fb6cb1edf2" 662 - dependencies = [ 663 - "typenum", 664 - "version_check", 227 + "spin", 665 228 ] 666 229 667 230 [[package]] ··· 673 236 "cfg-if", 674 237 "libc", 675 238 "r-efi", 676 - "wasi 0.14.7+wasi-0.2.4", 239 + "wasi", 677 240 ] 678 241 679 242 [[package]] 680 - name = "gimli" 681 - version = "0.32.3" 682 - source = "registry+https://github.com/rust-lang/crates.io-index" 683 - checksum = "e629b9b98ef3dd8afe6ca2bd0f89306cec16d43d907889945bc5d6687f2f13c7" 684 - 685 - [[package]] 686 - name = "half" 687 - version = "2.7.0" 243 + name = "hashbrown" 244 + version = "0.14.5" 688 245 source = "registry+https://github.com/rust-lang/crates.io-index" 689 - checksum = "e54c115d4f30f52c67202f079c5f9d8b49db4691f460fdb0b4c2e838261b2ba5" 690 - dependencies = [ 691 - "cfg-if", 692 - "crunchy", 693 - "zerocopy", 694 - ] 246 + checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" 695 247 696 248 [[package]] 697 249 name = "hashbrown" 698 - version = "0.15.5" 250 + version = "0.16.1" 699 251 source = "registry+https://github.com/rust-lang/crates.io-index" 700 - checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1" 701 - dependencies = [ 702 - "foldhash", 703 - ] 704 - 705 - [[package]] 706 - name = "hashlink" 707 - version = "0.10.0" 708 - source = "registry+https://github.com/rust-lang/crates.io-index" 709 - checksum = "7382cf6263419f2d8df38c55d7da83da5c18aef87fc7a7fc1fb1e344edfe14c1" 710 - dependencies = [ 711 - "hashbrown", 712 - ] 252 + checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100" 713 253 714 254 [[package]] 715 255 name = "heck" ··· 718 258 checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" 719 259 720 260 [[package]] 721 - name = "io-uring" 722 - version = "0.7.10" 723 - source = "registry+https://github.com/rust-lang/crates.io-index" 724 - checksum = "046fa2d4d00aea763528b4950358d0ead425372445dc8ff86312b3c69ff7727b" 725 - dependencies = [ 726 - "bitflags", 727 - "cfg-if", 728 - "libc", 729 - ] 730 - 731 - [[package]] 732 - name = "ipld-core" 733 - version = "0.4.2" 734 - source = "registry+https://github.com/rust-lang/crates.io-index" 735 - checksum = "104718b1cc124d92a6d01ca9c9258a7df311405debb3408c445a36452f9bf8db" 736 - dependencies = [ 737 - "cid", 738 - "serde", 739 - "serde_bytes", 740 - ] 741 - 742 - [[package]] 743 - name = "iroh-car" 744 - version = "0.5.1" 261 + name = "interval-heap" 262 + version = "0.0.5" 745 263 source = "registry+https://github.com/rust-lang/crates.io-index" 746 - checksum = "cb7f8cd4cb9aa083fba8b52e921764252d0b4dcb1cd6d120b809dbfe1106e81a" 264 + checksum = "11274e5e8e89b8607cfedc2910b6626e998779b48a019151c7604d0adcb86ac6" 747 265 dependencies = [ 748 - "anyhow", 749 - "cid", 750 - "futures", 751 - "serde", 752 - "serde_ipld_dagcbor", 753 - "thiserror 1.0.69", 754 - "tokio", 755 - "unsigned-varint 0.7.2", 266 + "compare", 756 267 ] 757 268 758 269 [[package]] ··· 762 273 checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" 763 274 764 275 [[package]] 765 - name = "itertools" 766 - version = "0.13.0" 767 - source = "registry+https://github.com/rust-lang/crates.io-index" 768 - checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186" 769 - dependencies = [ 770 - "either", 771 - ] 772 - 773 - [[package]] 774 - name = "itoa" 775 - version = "1.0.15" 776 - source = "registry+https://github.com/rust-lang/crates.io-index" 777 - checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" 778 - 779 - [[package]] 780 - name = "jiff" 781 - version = "0.2.15" 782 - source = "registry+https://github.com/rust-lang/crates.io-index" 783 - checksum = "be1f93b8b1eb69c77f24bbb0afdf66f54b632ee39af40ca21c4365a1d7347e49" 784 - dependencies = [ 785 - "jiff-static", 786 - "log", 787 - "portable-atomic", 788 - "portable-atomic-util", 789 - "serde", 790 - ] 791 - 792 - [[package]] 793 - name = "jiff-static" 794 - version = "0.2.15" 795 - source = "registry+https://github.com/rust-lang/crates.io-index" 796 - checksum = "03343451ff899767262ec32146f6d559dd759fdadf42ff0e227c7c48f72594b4" 797 - dependencies = [ 798 - "proc-macro2", 799 - "quote", 800 - "syn 2.0.106", 801 - ] 802 - 803 - [[package]] 804 - name = "js-sys" 805 - version = "0.3.81" 806 - source = "registry+https://github.com/rust-lang/crates.io-index" 807 - checksum = "ec48937a97411dcb524a265206ccd4c90bb711fca92b2792c407f268825b9305" 808 - dependencies = [ 809 - "once_cell", 810 - "wasm-bindgen", 811 - ] 812 - 813 - [[package]] 814 276 name = "libc" 815 277 version = "0.2.176" 816 278 source = "registry+https://github.com/rust-lang/crates.io-index" 817 279 checksum = "58f929b4d672ea937a23a1ab494143d968337a5f47e56d0815df1e0890ddf174" 818 280 819 281 [[package]] 820 - name = "libsqlite3-sys" 821 - version = "0.35.0" 822 - source = "registry+https://github.com/rust-lang/crates.io-index" 823 - checksum = "133c182a6a2c87864fe97778797e46c7e999672690dc9fa3ee8e241aa4a9c13f" 824 - dependencies = [ 825 - "pkg-config", 826 - "vcpkg", 827 - ] 828 - 829 - [[package]] 830 282 name = "linux-raw-sys" 831 283 version = "0.11.0" 832 284 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 848 300 checksum = "34080505efa8e45a4b816c349525ebe327ceaa8559756f0356cba97ef3bf7432" 849 301 850 302 [[package]] 851 - name = "match-lookup" 852 - version = "0.1.1" 853 - source = "registry+https://github.com/rust-lang/crates.io-index" 854 - checksum = "1265724d8cb29dbbc2b0f06fffb8bf1a8c0cf73a78eede9ba73a4a66c52a981e" 855 - dependencies = [ 856 - "proc-macro2", 857 - "quote", 858 - "syn 1.0.109", 859 - ] 860 - 861 - [[package]] 862 - name = "memchr" 863 - version = "2.7.6" 864 - source = "registry+https://github.com/rust-lang/crates.io-index" 865 - checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273" 866 - 867 - [[package]] 868 - name = "memmap" 869 - version = "0.7.0" 870 - source = "registry+https://github.com/rust-lang/crates.io-index" 871 - checksum = "6585fd95e7bb50d6cc31e20d4cf9afb4e2ba16c5846fc76793f11218da9c475b" 872 - dependencies = [ 873 - "libc", 874 - "winapi", 875 - ] 876 - 877 - [[package]] 878 - name = "miniz_oxide" 879 - version = "0.8.9" 880 - source = "registry+https://github.com/rust-lang/crates.io-index" 881 - checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316" 882 - dependencies = [ 883 - "adler2", 884 - ] 885 - 886 - [[package]] 887 - name = "mio" 888 - version = "1.0.4" 889 - source = "registry+https://github.com/rust-lang/crates.io-index" 890 - checksum = "78bed444cc8a2160f01cbcf811ef18cac863ad68ae8ca62092e8db51d51c761c" 891 - dependencies = [ 892 - "libc", 893 - "wasi 0.11.1+wasi-snapshot-preview1", 894 - "windows-sys 0.59.0", 895 - ] 896 - 897 - [[package]] 898 - name = "multibase" 899 - version = "0.9.2" 900 - source = "registry+https://github.com/rust-lang/crates.io-index" 901 - checksum = "8694bb4835f452b0e3bb06dbebb1d6fc5385b6ca1caf2e55fd165c042390ec77" 902 - dependencies = [ 903 - "base-x", 904 - "base256emoji", 905 - "data-encoding", 906 - "data-encoding-macro", 907 - ] 908 - 909 - [[package]] 910 - name = "multihash" 911 - version = "0.19.3" 303 + name = "lsm-tree" 304 + version = "3.0.1" 912 305 source = "registry+https://github.com/rust-lang/crates.io-index" 913 - checksum = "6b430e7953c29dd6a09afc29ff0bb69c6e306329ee6794700aee27b76a1aea8d" 306 + checksum = "b875f1dfe14f557f805b167fb9b0fc54c5560c7a4bd6ae02535b2846f276a8cb" 914 307 dependencies = [ 915 - "core2", 916 - "serde", 917 - "unsigned-varint 0.8.0", 308 + "byteorder-lite", 309 + "byteview", 310 + "crossbeam-skiplist", 311 + "enum_dispatch", 312 + "interval-heap", 313 + "log", 314 + "lz4_flex", 315 + "quick_cache", 316 + "rustc-hash", 317 + "self_cell", 318 + "sfa", 319 + "tempfile", 320 + "varint-rs", 321 + "xxhash-rust", 918 322 ] 919 323 920 324 [[package]] 921 - name = "multiversion" 922 - version = "0.8.0" 325 + name = "lz4_flex" 326 + version = "0.11.5" 923 327 source = "registry+https://github.com/rust-lang/crates.io-index" 924 - checksum = "7edb7f0ff51249dfda9ab96b5823695e15a052dc15074c9dbf3d118afaf2c201" 328 + checksum = "08ab2867e3eeeca90e844d1940eab391c9dc5228783db2ed999acbc0a9ed375a" 925 329 dependencies = [ 926 - "multiversion-macros", 927 - "target-features", 928 - ] 929 - 930 - [[package]] 931 - name = "multiversion-macros" 932 - version = "0.8.0" 933 - source = "registry+https://github.com/rust-lang/crates.io-index" 934 - checksum = "b093064383341eb3271f42e381cb8f10a01459478446953953c75d24bd339fc0" 935 - dependencies = [ 936 - "proc-macro2", 937 - "quote", 938 - "syn 2.0.106", 939 - "target-features", 940 - ] 941 - 942 - [[package]] 943 - name = "num-traits" 944 - version = "0.2.19" 945 - source = "registry+https://github.com/rust-lang/crates.io-index" 946 - checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" 947 - dependencies = [ 948 - "autocfg", 949 - ] 950 - 951 - [[package]] 952 - name = "object" 953 - version = "0.37.3" 954 - source = "registry+https://github.com/rust-lang/crates.io-index" 955 - checksum = "ff76201f031d8863c38aa7f905eca4f53abbfa15f609db4277d44cd8938f33fe" 956 - dependencies = [ 957 - "memchr", 330 + "twox-hash", 958 331 ] 959 332 960 333 [[package]] ··· 970 343 checksum = "a4895175b425cb1f87721b59f0f286c2092bd4af812243672510e1ac53e2e0ad" 971 344 972 345 [[package]] 973 - name = "oorandom" 974 - version = "11.1.5" 975 - source = "registry+https://github.com/rust-lang/crates.io-index" 976 - checksum = "d6790f58c7ff633d8771f42965289203411a5e5c68388703c06e14f24770b41e" 977 - 978 - [[package]] 979 - name = "parking_lot" 980 - version = "0.12.5" 981 - source = "registry+https://github.com/rust-lang/crates.io-index" 982 - checksum = "93857453250e3077bd71ff98b6a65ea6621a19bb0f559a85248955ac12c45a1a" 983 - dependencies = [ 984 - "lock_api", 985 - "parking_lot_core", 986 - ] 987 - 988 - [[package]] 989 346 name = "parking_lot_core" 990 347 version = "0.9.12" 991 348 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 999 356 ] 1000 357 1001 358 [[package]] 1002 - name = "pin-project-lite" 1003 - version = "0.2.16" 1004 - source = "registry+https://github.com/rust-lang/crates.io-index" 1005 - checksum = "3b3cff922bd51709b605d9ead9aa71031d81447142d828eb4a6eba76fe619f9b" 1006 - 1007 - [[package]] 1008 - name = "pin-utils" 1009 - version = "0.1.0" 1010 - source = "registry+https://github.com/rust-lang/crates.io-index" 1011 - checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" 1012 - 1013 - [[package]] 1014 - name = "pkg-config" 1015 - version = "0.3.32" 1016 - source = "registry+https://github.com/rust-lang/crates.io-index" 1017 - checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c" 1018 - 1019 - [[package]] 1020 - name = "plotters" 1021 - version = "0.3.7" 1022 - source = "registry+https://github.com/rust-lang/crates.io-index" 1023 - checksum = "5aeb6f403d7a4911efb1e33402027fc44f29b5bf6def3effcc22d7bb75f2b747" 1024 - dependencies = [ 1025 - "num-traits", 1026 - "plotters-backend", 1027 - "plotters-svg", 1028 - "wasm-bindgen", 1029 - "web-sys", 1030 - ] 1031 - 1032 - [[package]] 1033 - name = "plotters-backend" 1034 - version = "0.3.7" 1035 - source = "registry+https://github.com/rust-lang/crates.io-index" 1036 - checksum = "df42e13c12958a16b3f7f4386b9ab1f3e7933914ecea48da7139435263a4172a" 1037 - 1038 - [[package]] 1039 - name = "plotters-svg" 1040 - version = "0.3.7" 1041 - source = "registry+https://github.com/rust-lang/crates.io-index" 1042 - checksum = "51bae2ac328883f7acdfea3d66a7c35751187f870bc81f94563733a154d7a670" 1043 - dependencies = [ 1044 - "plotters-backend", 1045 - ] 1046 - 1047 - [[package]] 1048 - name = "portable-atomic" 1049 - version = "1.11.1" 1050 - source = "registry+https://github.com/rust-lang/crates.io-index" 1051 - checksum = "f84267b20a16ea918e43c6a88433c2d54fa145c92a811b5b047ccbe153674483" 1052 - 1053 - [[package]] 1054 - name = "portable-atomic-util" 1055 - version = "0.2.4" 1056 - source = "registry+https://github.com/rust-lang/crates.io-index" 1057 - checksum = "d8a2f0d8d040d7848a709caf78912debcc3f33ee4b3cac47d73d1e1069e83507" 1058 - dependencies = [ 1059 - "portable-atomic", 1060 - ] 1061 - 1062 - [[package]] 1063 - name = "ppv-lite86" 1064 - version = "0.2.21" 1065 - source = "registry+https://github.com/rust-lang/crates.io-index" 1066 - checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9" 1067 - dependencies = [ 1068 - "zerocopy", 1069 - ] 1070 - 1071 - [[package]] 1072 359 name = "proc-macro2" 1073 360 version = "1.0.101" 1074 361 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 1078 365 ] 1079 366 1080 367 [[package]] 1081 - name = "quote" 1082 - version = "1.0.41" 368 + name = "quick_cache" 369 + version = "0.6.18" 1083 370 source = "registry+https://github.com/rust-lang/crates.io-index" 1084 - checksum = "ce25767e7b499d1b604768e7cde645d14cc8584231ea6b295e9c9eb22c02e1d1" 371 + checksum = "7ada44a88ef953a3294f6eb55d2007ba44646015e18613d2f213016379203ef3" 1085 372 dependencies = [ 1086 - "proc-macro2", 373 + "equivalent", 374 + "hashbrown 0.16.1", 1087 375 ] 1088 376 1089 377 [[package]] 1090 - name = "quote2" 1091 - version = "0.7.0" 378 + name = "quote" 379 + version = "1.0.41" 1092 380 source = "registry+https://github.com/rust-lang/crates.io-index" 1093 - checksum = "970573b86f7e5795c8c6c50c56ef602368593f0687188da27fd489a59e253630" 381 + checksum = "ce25767e7b499d1b604768e7cde645d14cc8584231ea6b295e9c9eb22c02e1d1" 1094 382 dependencies = [ 1095 383 "proc-macro2", 1096 - "quote", 1097 - "quote2-macros", 1098 384 ] 1099 385 1100 386 [[package]] 1101 - name = "quote2-macros" 1102 - version = "0.7.0" 1103 - source = "registry+https://github.com/rust-lang/crates.io-index" 1104 - checksum = "5f4b89c37b2d870a28629ad20da669bb0e7d7214878d0d5111b304aa466e1977" 1105 - 1106 - [[package]] 1107 387 name = "r-efi" 1108 388 version = "5.3.0" 1109 389 source = "registry+https://github.com/rust-lang/crates.io-index" 1110 390 checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" 1111 391 1112 392 [[package]] 1113 - name = "rand" 1114 - version = "0.9.2" 1115 - source = "registry+https://github.com/rust-lang/crates.io-index" 1116 - checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1" 1117 - dependencies = [ 1118 - "rand_chacha", 1119 - "rand_core", 1120 - ] 1121 - 1122 - [[package]] 1123 - name = "rand_chacha" 1124 - version = "0.9.0" 1125 - source = "registry+https://github.com/rust-lang/crates.io-index" 1126 - checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb" 1127 - dependencies = [ 1128 - "ppv-lite86", 1129 - "rand_core", 1130 - ] 1131 - 1132 - [[package]] 1133 - name = "rand_core" 1134 - version = "0.9.3" 1135 - source = "registry+https://github.com/rust-lang/crates.io-index" 1136 - checksum = "99d9a13982dcf210057a8a78572b2217b667c3beacbf3a0d8b454f6f82837d38" 1137 - dependencies = [ 1138 - "getrandom", 1139 - ] 1140 - 1141 - [[package]] 1142 - name = "rayon" 1143 - version = "1.11.0" 1144 - source = "registry+https://github.com/rust-lang/crates.io-index" 1145 - checksum = "368f01d005bf8fd9b1206fb6fa653e6c4a81ceb1466406b81792d87c5677a58f" 1146 - dependencies = [ 1147 - "either", 1148 - "rayon-core", 1149 - ] 1150 - 1151 - [[package]] 1152 - name = "rayon-core" 1153 - version = "1.13.0" 1154 - source = "registry+https://github.com/rust-lang/crates.io-index" 1155 - checksum = "22e18b0f0062d30d4230b2e85ff77fdfe4326feb054b9783a3460d8435c8ab91" 1156 - dependencies = [ 1157 - "crossbeam-deque", 1158 - "crossbeam-utils", 1159 - ] 1160 - 1161 - [[package]] 1162 393 name = "redox_syscall" 1163 394 version = "0.5.18" 1164 395 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 1168 399 ] 1169 400 1170 401 [[package]] 1171 - name = "regex" 1172 - version = "1.11.3" 1173 - source = "registry+https://github.com/rust-lang/crates.io-index" 1174 - checksum = "8b5288124840bee7b386bc413c487869b360b2b4ec421ea56425128692f2a82c" 1175 - dependencies = [ 1176 - "aho-corasick", 1177 - "memchr", 1178 - "regex-automata", 1179 - "regex-syntax", 1180 - ] 1181 - 1182 - [[package]] 1183 - name = "regex-automata" 1184 - version = "0.4.11" 1185 - source = "registry+https://github.com/rust-lang/crates.io-index" 1186 - checksum = "833eb9ce86d40ef33cb1306d8accf7bc8ec2bfea4355cbdebb3df68b40925cad" 1187 - dependencies = [ 1188 - "aho-corasick", 1189 - "memchr", 1190 - "regex-syntax", 1191 - ] 1192 - 1193 - [[package]] 1194 - name = "regex-syntax" 1195 - version = "0.8.6" 1196 - source = "registry+https://github.com/rust-lang/crates.io-index" 1197 - checksum = "caf4aa5b0f434c91fe5c7f1ecb6a5ece2130b02ad2a590589dda5146df959001" 1198 - 1199 - [[package]] 1200 402 name = "repo-stream" 1201 403 version = "0.2.2" 1202 404 dependencies = [ 1203 - "bincode", 1204 - "candystore", 1205 405 "clap", 1206 - "criterion", 1207 - "env_logger", 1208 - "futures", 1209 - "futures-core", 1210 - "ipld-core", 1211 - "iroh-car", 1212 - "log", 1213 - "multibase", 1214 - "rusqlite", 1215 - "serde", 1216 - "serde_bytes", 1217 - "serde_ipld_dagcbor", 1218 - "sha2", 1219 - "tempfile", 1220 - "thiserror 2.0.17", 1221 - "tokio", 1222 - ] 1223 - 1224 - [[package]] 1225 - name = "rusqlite" 1226 - version = "0.37.0" 1227 - source = "registry+https://github.com/rust-lang/crates.io-index" 1228 - checksum = "165ca6e57b20e1351573e3729b958bc62f0e48025386970b6e4d29e7a7e71f3f" 1229 - dependencies = [ 1230 - "bitflags", 1231 - "fallible-iterator", 1232 - "fallible-streaming-iterator", 1233 - "hashlink", 1234 - "libsqlite3-sys", 1235 - "smallvec", 406 + "fjall", 1236 407 ] 1237 408 1238 409 [[package]] 1239 - name = "rustc-demangle" 1240 - version = "0.1.26" 410 + name = "rustc-hash" 411 + version = "2.1.1" 1241 412 source = "registry+https://github.com/rust-lang/crates.io-index" 1242 - checksum = "56f7d92ca342cea22a06f2121d944b4fd82af56988c270852495420f961d4ace" 413 + checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d" 1243 414 1244 415 [[package]] 1245 416 name = "rustix" ··· 1251 422 "errno", 1252 423 "libc", 1253 424 "linux-raw-sys", 1254 - "windows-sys 0.60.2", 1255 - ] 1256 - 1257 - [[package]] 1258 - name = "rustversion" 1259 - version = "1.0.22" 1260 - source = "registry+https://github.com/rust-lang/crates.io-index" 1261 - checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" 1262 - 1263 - [[package]] 1264 - name = "ryu" 1265 - version = "1.0.20" 1266 - source = "registry+https://github.com/rust-lang/crates.io-index" 1267 - checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f" 1268 - 1269 - [[package]] 1270 - name = "same-file" 1271 - version = "1.0.6" 1272 - source = "registry+https://github.com/rust-lang/crates.io-index" 1273 - checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" 1274 - dependencies = [ 1275 - "winapi-util", 425 + "windows-sys", 1276 426 ] 1277 427 1278 428 [[package]] ··· 1282 432 checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" 1283 433 1284 434 [[package]] 1285 - name = "serde" 1286 - version = "1.0.228" 1287 - source = "registry+https://github.com/rust-lang/crates.io-index" 1288 - checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" 1289 - dependencies = [ 1290 - "serde_core", 1291 - "serde_derive", 1292 - ] 1293 - 1294 - [[package]] 1295 - name = "serde_bytes" 1296 - version = "0.11.19" 1297 - source = "registry+https://github.com/rust-lang/crates.io-index" 1298 - checksum = "a5d440709e79d88e51ac01c4b72fc6cb7314017bb7da9eeff678aa94c10e3ea8" 1299 - dependencies = [ 1300 - "serde", 1301 - "serde_core", 1302 - ] 1303 - 1304 - [[package]] 1305 - name = "serde_core" 1306 - version = "1.0.228" 1307 - source = "registry+https://github.com/rust-lang/crates.io-index" 1308 - checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad" 1309 - dependencies = [ 1310 - "serde_derive", 1311 - ] 1312 - 1313 - [[package]] 1314 - name = "serde_derive" 1315 - version = "1.0.228" 1316 - source = "registry+https://github.com/rust-lang/crates.io-index" 1317 - checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" 1318 - dependencies = [ 1319 - "proc-macro2", 1320 - "quote", 1321 - "syn 2.0.106", 1322 - ] 1323 - 1324 - [[package]] 1325 - name = "serde_ipld_dagcbor" 1326 - version = "0.6.4" 1327 - source = "registry+https://github.com/rust-lang/crates.io-index" 1328 - checksum = "46182f4f08349a02b45c998ba3215d3f9de826246ba02bb9dddfe9a2a2100778" 1329 - dependencies = [ 1330 - "cbor4ii", 1331 - "ipld-core", 1332 - "scopeguard", 1333 - "serde", 1334 - ] 1335 - 1336 - [[package]] 1337 - name = "serde_json" 1338 - version = "1.0.145" 1339 - source = "registry+https://github.com/rust-lang/crates.io-index" 1340 - checksum = "402a6f66d8c709116cf22f558eab210f5a50187f702eb4d7e5ef38d9a7f1c79c" 1341 - dependencies = [ 1342 - "itoa", 1343 - "memchr", 1344 - "ryu", 1345 - "serde", 1346 - "serde_core", 1347 - ] 1348 - 1349 - [[package]] 1350 - name = "sha2" 1351 - version = "0.10.9" 1352 - source = "registry+https://github.com/rust-lang/crates.io-index" 1353 - checksum = "a7507d819769d01a365ab707794a4084392c824f54a7a6a7862f8c3d0892b283" 1354 - dependencies = [ 1355 - "cfg-if", 1356 - "cpufeatures", 1357 - "digest", 1358 - ] 1359 - 1360 - [[package]] 1361 - name = "signal-hook-registry" 1362 - version = "1.4.6" 435 + name = "self_cell" 436 + version = "1.2.2" 1363 437 source = "registry+https://github.com/rust-lang/crates.io-index" 1364 - checksum = "b2a4719bff48cee6b39d12c020eeb490953ad2443b7055bd0b21fca26bd8c28b" 1365 - dependencies = [ 1366 - "libc", 1367 - ] 438 + checksum = "b12e76d157a900eb52e81bc6e9f3069344290341720e9178cde2407113ac8d89" 1368 439 1369 440 [[package]] 1370 - name = "simd-itertools" 1371 - version = "0.3.0" 441 + name = "sfa" 442 + version = "1.0.0" 1372 443 source = "registry+https://github.com/rust-lang/crates.io-index" 1373 - checksum = "a037ed5ba0cb7102a5b720453b642c5b2cf39960edd2ceace91af8ec3743082a" 444 + checksum = "a1296838937cab56cd6c4eeeb8718ec777383700c33f060e2869867bd01d1175" 1374 445 dependencies = [ 1375 - "multiversion", 446 + "byteorder-lite", 447 + "log", 448 + "xxhash-rust", 1376 449 ] 1377 450 1378 451 [[package]] 1379 - name = "siphasher" 1380 - version = "1.0.1" 1381 - source = "registry+https://github.com/rust-lang/crates.io-index" 1382 - checksum = "56199f7ddabf13fe5074ce809e7d3f42b42ae711800501b5b16ea82ad029c39d" 1383 - 1384 - [[package]] 1385 - name = "slab" 1386 - version = "0.4.11" 1387 - source = "registry+https://github.com/rust-lang/crates.io-index" 1388 - checksum = "7a2ae44ef20feb57a68b23d846850f861394c2e02dc425a50098ae8c90267589" 1389 - 1390 - [[package]] 1391 452 name = "smallvec" 1392 453 version = "1.15.1" 1393 454 source = "registry+https://github.com/rust-lang/crates.io-index" 1394 455 checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" 1395 456 1396 457 [[package]] 1397 - name = "socket2" 1398 - version = "0.6.0" 458 + name = "spin" 459 + version = "0.9.8" 1399 460 source = "registry+https://github.com/rust-lang/crates.io-index" 1400 - checksum = "233504af464074f9d066d7b5416c5f9b894a5862a6506e306f7b816cdd6f1807" 461 + checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" 1401 462 dependencies = [ 1402 - "libc", 1403 - "windows-sys 0.59.0", 463 + "lock_api", 1404 464 ] 1405 465 1406 466 [[package]] ··· 1411 471 1412 472 [[package]] 1413 473 name = "syn" 1414 - version = "1.0.109" 1415 - source = "registry+https://github.com/rust-lang/crates.io-index" 1416 - checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" 1417 - dependencies = [ 1418 - "proc-macro2", 1419 - "quote", 1420 - "unicode-ident", 1421 - ] 1422 - 1423 - [[package]] 1424 - name = "syn" 1425 474 version = "2.0.106" 1426 475 source = "registry+https://github.com/rust-lang/crates.io-index" 1427 476 checksum = "ede7c438028d4436d71104916910f5bb611972c5cfd7f89b8300a8186e6fada6" ··· 1432 481 ] 1433 482 1434 483 [[package]] 1435 - name = "target-features" 1436 - version = "0.1.6" 1437 - source = "registry+https://github.com/rust-lang/crates.io-index" 1438 - checksum = "c1bbb9f3c5c463a01705937a24fdabc5047929ac764b2d5b9cf681c1f5041ed5" 1439 - 1440 - [[package]] 1441 484 name = "tempfile" 1442 485 version = "3.23.0" 1443 486 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 1447 490 "getrandom", 1448 491 "once_cell", 1449 492 "rustix", 1450 - "windows-sys 0.60.2", 493 + "windows-sys", 1451 494 ] 1452 495 1453 496 [[package]] 1454 - name = "thiserror" 1455 - version = "1.0.69" 1456 - source = "registry+https://github.com/rust-lang/crates.io-index" 1457 - checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52" 1458 - dependencies = [ 1459 - "thiserror-impl 1.0.69", 1460 - ] 1461 - 1462 - [[package]] 1463 - name = "thiserror" 1464 - version = "2.0.17" 497 + name = "twox-hash" 498 + version = "2.1.2" 1465 499 source = "registry+https://github.com/rust-lang/crates.io-index" 1466 - checksum = "f63587ca0f12b72a0600bcba1d40081f830876000bb46dd2337a3051618f4fc8" 1467 - dependencies = [ 1468 - "thiserror-impl 2.0.17", 1469 - ] 1470 - 1471 - [[package]] 1472 - name = "thiserror-impl" 1473 - version = "1.0.69" 1474 - source = "registry+https://github.com/rust-lang/crates.io-index" 1475 - checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" 1476 - dependencies = [ 1477 - "proc-macro2", 1478 - "quote", 1479 - "syn 2.0.106", 1480 - ] 1481 - 1482 - [[package]] 1483 - name = "thiserror-impl" 1484 - version = "2.0.17" 1485 - source = "registry+https://github.com/rust-lang/crates.io-index" 1486 - checksum = "3ff15c8ecd7de3849db632e14d18d2571fa09dfc5ed93479bc4485c7a517c913" 1487 - dependencies = [ 1488 - "proc-macro2", 1489 - "quote", 1490 - "syn 2.0.106", 1491 - ] 1492 - 1493 - [[package]] 1494 - name = "tinytemplate" 1495 - version = "1.2.1" 1496 - source = "registry+https://github.com/rust-lang/crates.io-index" 1497 - checksum = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc" 1498 - dependencies = [ 1499 - "serde", 1500 - "serde_json", 1501 - ] 1502 - 1503 - [[package]] 1504 - name = "tokio" 1505 - version = "1.47.1" 1506 - source = "registry+https://github.com/rust-lang/crates.io-index" 1507 - checksum = "89e49afdadebb872d3145a5638b59eb0691ea23e46ca484037cfab3b76b95038" 1508 - dependencies = [ 1509 - "backtrace", 1510 - "bytes", 1511 - "io-uring", 1512 - "libc", 1513 - "mio", 1514 - "parking_lot", 1515 - "pin-project-lite", 1516 - "signal-hook-registry", 1517 - "slab", 1518 - "socket2", 1519 - "tokio-macros", 1520 - "windows-sys 0.59.0", 1521 - ] 1522 - 1523 - [[package]] 1524 - name = "tokio-macros" 1525 - version = "2.5.0" 1526 - source = "registry+https://github.com/rust-lang/crates.io-index" 1527 - checksum = "6e06d43f1345a3bcd39f6a56dbb7dcab2ba47e68e8ac134855e7e2bdbaf8cab8" 1528 - dependencies = [ 1529 - "proc-macro2", 1530 - "quote", 1531 - "syn 2.0.106", 1532 - ] 1533 - 1534 - [[package]] 1535 - name = "typenum" 1536 - version = "1.19.0" 1537 - source = "registry+https://github.com/rust-lang/crates.io-index" 1538 - checksum = "562d481066bde0658276a35467c4af00bdc6ee726305698a55b86e61d7ad82bb" 500 + checksum = "9ea3136b675547379c4bd395ca6b938e5ad3c3d20fad76e7fe85f9e0d011419c" 1539 501 1540 502 [[package]] 1541 503 name = "unicode-ident" ··· 1544 506 checksum = "f63a545481291138910575129486daeaf8ac54aee4387fe7906919f7830c7d9d" 1545 507 1546 508 [[package]] 1547 - name = "unsigned-varint" 1548 - version = "0.7.2" 1549 - source = "registry+https://github.com/rust-lang/crates.io-index" 1550 - checksum = "6889a77d49f1f013504cec6bf97a2c730394adedaeb1deb5ea08949a50541105" 1551 - 1552 - [[package]] 1553 - name = "unsigned-varint" 1554 - version = "0.8.0" 1555 - source = "registry+https://github.com/rust-lang/crates.io-index" 1556 - checksum = "eb066959b24b5196ae73cb057f45598450d2c5f71460e98c49b738086eff9c06" 1557 - 1558 - [[package]] 1559 - name = "unty" 1560 - version = "0.0.4" 1561 - source = "registry+https://github.com/rust-lang/crates.io-index" 1562 - checksum = "6d49784317cd0d1ee7ec5c716dd598ec5b4483ea832a2dced265471cc0f690ae" 1563 - 1564 - [[package]] 1565 509 name = "utf8parse" 1566 510 version = "0.2.2" 1567 511 source = "registry+https://github.com/rust-lang/crates.io-index" 1568 512 checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" 1569 513 1570 514 [[package]] 1571 - name = "uuid" 1572 - version = "1.19.0" 1573 - source = "registry+https://github.com/rust-lang/crates.io-index" 1574 - checksum = "e2e054861b4bd027cd373e18e8d8d8e6548085000e41290d95ce0c373a654b4a" 1575 - dependencies = [ 1576 - "js-sys", 1577 - "wasm-bindgen", 1578 - ] 1579 - 1580 - [[package]] 1581 - name = "vcpkg" 1582 - version = "0.2.15" 1583 - source = "registry+https://github.com/rust-lang/crates.io-index" 1584 - checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" 1585 - 1586 - [[package]] 1587 - name = "version_check" 1588 - version = "0.9.5" 1589 - source = "registry+https://github.com/rust-lang/crates.io-index" 1590 - checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" 1591 - 1592 - [[package]] 1593 - name = "virtue" 1594 - version = "0.0.18" 1595 - source = "registry+https://github.com/rust-lang/crates.io-index" 1596 - checksum = "051eb1abcf10076295e815102942cc58f9d5e3b4560e46e53c21e8ff6f3af7b1" 1597 - 1598 - [[package]] 1599 - name = "walkdir" 1600 - version = "2.5.0" 1601 - source = "registry+https://github.com/rust-lang/crates.io-index" 1602 - checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" 1603 - dependencies = [ 1604 - "same-file", 1605 - "winapi-util", 1606 - ] 1607 - 1608 - [[package]] 1609 - name = "wasi" 1610 - version = "0.11.1+wasi-snapshot-preview1" 515 + name = "varint-rs" 516 + version = "2.2.0" 1611 517 source = "registry+https://github.com/rust-lang/crates.io-index" 1612 - checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" 518 + checksum = "8f54a172d0620933a27a4360d3db3e2ae0dd6cceae9730751a036bbf182c4b23" 1613 519 1614 520 [[package]] 1615 521 name = "wasi" ··· 1630 536 ] 1631 537 1632 538 [[package]] 1633 - name = "wasm-bindgen" 1634 - version = "0.2.104" 1635 - source = "registry+https://github.com/rust-lang/crates.io-index" 1636 - checksum = "c1da10c01ae9f1ae40cbfac0bac3b1e724b320abfcf52229f80b547c0d250e2d" 1637 - dependencies = [ 1638 - "cfg-if", 1639 - "once_cell", 1640 - "rustversion", 1641 - "wasm-bindgen-macro", 1642 - "wasm-bindgen-shared", 1643 - ] 1644 - 1645 - [[package]] 1646 - name = "wasm-bindgen-backend" 1647 - version = "0.2.104" 1648 - source = "registry+https://github.com/rust-lang/crates.io-index" 1649 - checksum = "671c9a5a66f49d8a47345ab942e2cb93c7d1d0339065d4f8139c486121b43b19" 1650 - dependencies = [ 1651 - "bumpalo", 1652 - "log", 1653 - "proc-macro2", 1654 - "quote", 1655 - "syn 2.0.106", 1656 - "wasm-bindgen-shared", 1657 - ] 1658 - 1659 - [[package]] 1660 - name = "wasm-bindgen-macro" 1661 - version = "0.2.104" 1662 - source = "registry+https://github.com/rust-lang/crates.io-index" 1663 - checksum = "7ca60477e4c59f5f2986c50191cd972e3a50d8a95603bc9434501cf156a9a119" 1664 - dependencies = [ 1665 - "quote", 1666 - "wasm-bindgen-macro-support", 1667 - ] 1668 - 1669 - [[package]] 1670 - name = "wasm-bindgen-macro-support" 1671 - version = "0.2.104" 1672 - source = "registry+https://github.com/rust-lang/crates.io-index" 1673 - checksum = "9f07d2f20d4da7b26400c9f4a0511e6e0345b040694e8a75bd41d578fa4421d7" 1674 - dependencies = [ 1675 - "proc-macro2", 1676 - "quote", 1677 - "syn 2.0.106", 1678 - "wasm-bindgen-backend", 1679 - "wasm-bindgen-shared", 1680 - ] 1681 - 1682 - [[package]] 1683 - name = "wasm-bindgen-shared" 1684 - version = "0.2.104" 1685 - source = "registry+https://github.com/rust-lang/crates.io-index" 1686 - checksum = "bad67dc8b2a1a6e5448428adec4c3e84c43e561d8c9ee8a9e5aabeb193ec41d1" 1687 - dependencies = [ 1688 - "unicode-ident", 1689 - ] 1690 - 1691 - [[package]] 1692 - name = "web-sys" 1693 - version = "0.3.81" 1694 - source = "registry+https://github.com/rust-lang/crates.io-index" 1695 - checksum = "9367c417a924a74cae129e6a2ae3b47fabb1f8995595ab474029da749a8be120" 1696 - dependencies = [ 1697 - "js-sys", 1698 - "wasm-bindgen", 1699 - ] 1700 - 1701 - [[package]] 1702 - name = "winapi" 1703 - version = "0.3.9" 1704 - source = "registry+https://github.com/rust-lang/crates.io-index" 1705 - checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" 1706 - dependencies = [ 1707 - "winapi-i686-pc-windows-gnu", 1708 - "winapi-x86_64-pc-windows-gnu", 1709 - ] 1710 - 1711 - [[package]] 1712 - name = "winapi-i686-pc-windows-gnu" 1713 - version = "0.4.0" 1714 - source = "registry+https://github.com/rust-lang/crates.io-index" 1715 - checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" 1716 - 1717 - [[package]] 1718 - name = "winapi-util" 1719 - version = "0.1.11" 1720 - source = "registry+https://github.com/rust-lang/crates.io-index" 1721 - checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" 1722 - dependencies = [ 1723 - "windows-sys 0.60.2", 1724 - ] 1725 - 1726 - [[package]] 1727 - name = "winapi-x86_64-pc-windows-gnu" 1728 - version = "0.4.0" 1729 - source = "registry+https://github.com/rust-lang/crates.io-index" 1730 - checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" 1731 - 1732 - [[package]] 1733 539 name = "windows-link" 1734 540 version = "0.2.1" 1735 541 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 1737 543 1738 544 [[package]] 1739 545 name = "windows-sys" 1740 - version = "0.59.0" 1741 - source = "registry+https://github.com/rust-lang/crates.io-index" 1742 - checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" 1743 - dependencies = [ 1744 - "windows-targets 0.52.6", 1745 - ] 1746 - 1747 - [[package]] 1748 - name = "windows-sys" 1749 546 version = "0.60.2" 1750 547 source = "registry+https://github.com/rust-lang/crates.io-index" 1751 548 checksum = "f2f500e4d28234f72040990ec9d39e3a6b950f9f22d3dba18416c35882612bcb" 1752 549 dependencies = [ 1753 - "windows-targets 0.53.5", 1754 - ] 1755 - 1756 - [[package]] 1757 - name = "windows-targets" 1758 - version = "0.52.6" 1759 - source = "registry+https://github.com/rust-lang/crates.io-index" 1760 - checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" 1761 - dependencies = [ 1762 - "windows_aarch64_gnullvm 0.52.6", 1763 - "windows_aarch64_msvc 0.52.6", 1764 - "windows_i686_gnu 0.52.6", 1765 - "windows_i686_gnullvm 0.52.6", 1766 - "windows_i686_msvc 0.52.6", 1767 - "windows_x86_64_gnu 0.52.6", 1768 - "windows_x86_64_gnullvm 0.52.6", 1769 - "windows_x86_64_msvc 0.52.6", 550 + "windows-targets", 1770 551 ] 1771 552 1772 553 [[package]] ··· 1776 557 checksum = "4945f9f551b88e0d65f3db0bc25c33b8acea4d9e41163edf90dcd0b19f9069f3" 1777 558 dependencies = [ 1778 559 "windows-link", 1779 - "windows_aarch64_gnullvm 0.53.1", 1780 - "windows_aarch64_msvc 0.53.1", 1781 - "windows_i686_gnu 0.53.1", 1782 - "windows_i686_gnullvm 0.53.1", 1783 - "windows_i686_msvc 0.53.1", 1784 - "windows_x86_64_gnu 0.53.1", 1785 - "windows_x86_64_gnullvm 0.53.1", 1786 - "windows_x86_64_msvc 0.53.1", 560 + "windows_aarch64_gnullvm", 561 + "windows_aarch64_msvc", 562 + "windows_i686_gnu", 563 + "windows_i686_gnullvm", 564 + "windows_i686_msvc", 565 + "windows_x86_64_gnu", 566 + "windows_x86_64_gnullvm", 567 + "windows_x86_64_msvc", 1787 568 ] 1788 - 1789 - [[package]] 1790 - name = "windows_aarch64_gnullvm" 1791 - version = "0.52.6" 1792 - source = "registry+https://github.com/rust-lang/crates.io-index" 1793 - checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" 1794 569 1795 570 [[package]] 1796 571 name = "windows_aarch64_gnullvm" ··· 1800 575 1801 576 [[package]] 1802 577 name = "windows_aarch64_msvc" 1803 - version = "0.52.6" 1804 - source = "registry+https://github.com/rust-lang/crates.io-index" 1805 - checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" 1806 - 1807 - [[package]] 1808 - name = "windows_aarch64_msvc" 1809 578 version = "0.53.1" 1810 579 source = "registry+https://github.com/rust-lang/crates.io-index" 1811 580 checksum = "b9d782e804c2f632e395708e99a94275910eb9100b2114651e04744e9b125006" 1812 581 1813 582 [[package]] 1814 583 name = "windows_i686_gnu" 1815 - version = "0.52.6" 1816 - source = "registry+https://github.com/rust-lang/crates.io-index" 1817 - checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" 1818 - 1819 - [[package]] 1820 - name = "windows_i686_gnu" 1821 584 version = "0.53.1" 1822 585 source = "registry+https://github.com/rust-lang/crates.io-index" 1823 586 checksum = "960e6da069d81e09becb0ca57a65220ddff016ff2d6af6a223cf372a506593a3" 1824 587 1825 588 [[package]] 1826 589 name = "windows_i686_gnullvm" 1827 - version = "0.52.6" 1828 - source = "registry+https://github.com/rust-lang/crates.io-index" 1829 - checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" 1830 - 1831 - [[package]] 1832 - name = "windows_i686_gnullvm" 1833 590 version = "0.53.1" 1834 591 source = "registry+https://github.com/rust-lang/crates.io-index" 1835 592 checksum = "fa7359d10048f68ab8b09fa71c3daccfb0e9b559aed648a8f95469c27057180c" 1836 593 1837 594 [[package]] 1838 595 name = "windows_i686_msvc" 1839 - version = "0.52.6" 1840 - source = "registry+https://github.com/rust-lang/crates.io-index" 1841 - checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" 1842 - 1843 - [[package]] 1844 - name = "windows_i686_msvc" 1845 596 version = "0.53.1" 1846 597 source = "registry+https://github.com/rust-lang/crates.io-index" 1847 598 checksum = "1e7ac75179f18232fe9c285163565a57ef8d3c89254a30685b57d83a38d326c2" 1848 599 1849 600 [[package]] 1850 601 name = "windows_x86_64_gnu" 1851 - version = "0.52.6" 1852 - source = "registry+https://github.com/rust-lang/crates.io-index" 1853 - checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" 1854 - 1855 - [[package]] 1856 - name = "windows_x86_64_gnu" 1857 602 version = "0.53.1" 1858 603 source = "registry+https://github.com/rust-lang/crates.io-index" 1859 604 checksum = "9c3842cdd74a865a8066ab39c8a7a473c0778a3f29370b5fd6b4b9aa7df4a499" 1860 605 1861 606 [[package]] 1862 607 name = "windows_x86_64_gnullvm" 1863 - version = "0.52.6" 1864 - source = "registry+https://github.com/rust-lang/crates.io-index" 1865 - checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" 1866 - 1867 - [[package]] 1868 - name = "windows_x86_64_gnullvm" 1869 608 version = "0.53.1" 1870 609 source = "registry+https://github.com/rust-lang/crates.io-index" 1871 610 checksum = "0ffa179e2d07eee8ad8f57493436566c7cc30ac536a3379fdf008f47f6bb7ae1" 1872 611 1873 612 [[package]] 1874 613 name = "windows_x86_64_msvc" 1875 - version = "0.52.6" 1876 - source = "registry+https://github.com/rust-lang/crates.io-index" 1877 - checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" 1878 - 1879 - [[package]] 1880 - name = "windows_x86_64_msvc" 1881 614 version = "0.53.1" 1882 615 source = "registry+https://github.com/rust-lang/crates.io-index" 1883 616 checksum = "d6bbff5f0aada427a1e5a6da5f1f98158182f26556f345ac9e04d36d0ebed650" ··· 1889 622 checksum = "f17a85883d4e6d00e8a97c586de764dabcc06133f7f1d55dce5cdc070ad7fe59" 1890 623 1891 624 [[package]] 1892 - name = "zerocopy" 1893 - version = "0.8.27" 1894 - source = "registry+https://github.com/rust-lang/crates.io-index" 1895 - checksum = "0894878a5fa3edfd6da3f88c4805f4c8558e2b996227a3d864f47fe11e38282c" 1896 - dependencies = [ 1897 - "zerocopy-derive", 1898 - ] 1899 - 1900 - [[package]] 1901 - name = "zerocopy-derive" 1902 - version = "0.8.27" 625 + name = "xxhash-rust" 626 + version = "0.8.15" 1903 627 source = "registry+https://github.com/rust-lang/crates.io-index" 1904 - checksum = "88d2b8d9c68ad2b9e4340d7832716a4d21a22a1154777ad56ea55c51a9cf3831" 1905 - dependencies = [ 1906 - "proc-macro2", 1907 - "quote", 1908 - "syn 2.0.106", 1909 - ] 628 + checksum = "fdd20c5420375476fbd4394763288da7eb0cc0b8c11deed431a91562af7335d3"
+1 -36
Cargo.toml
··· 7 7 repository = "https://tangled.org/@microcosm.blue/repo-stream" 8 8 9 9 [dependencies] 10 - bincode = { version = "2.0.1", features = ["serde"] } 11 - candystore = "0.5.6" 12 - futures = "0.3.31" 13 - futures-core = "0.3.31" 14 - ipld-core = { version = "0.4.2", features = ["serde"] } 15 - iroh-car = "0.5.1" 16 - log = "0.4.28" 17 - multibase = "0.9.2" 18 - rusqlite = "0.37.0" 19 - serde = { version = "1.0.228", features = ["derive"] } 20 - serde_bytes = "0.11.19" 21 - serde_ipld_dagcbor = "0.6.4" 22 - sha2 = "0.10.9" 23 - thiserror = "2.0.17" 24 - tokio = { version = "1.47.1", features = ["rt", "sync"] } 25 - 26 - [dev-dependencies] 10 + fjall = "3.0.1" 27 11 clap = { version = "4.5.48", features = ["derive"] } 28 - criterion = { version = "0.7.0", features = ["async_tokio"] } 29 - env_logger = "0.11.8" 30 - multibase = "0.9.2" 31 - tempfile = "3.23.0" 32 - tokio = { version = "1.47.1", features = ["full"] } 33 12 34 - [profile.profiling] 35 - inherits = "release" 36 - debug = true 37 - 38 - # [profile.release] 39 - # debug = true 40 - 41 - [[bench]] 42 - name = "non-huge-cars" 43 - harness = false 44 - 45 - [[bench]] 46 - name = "huge-car" 47 - harness = false
+26 -76
examples/disk-read-file/main.rs
··· 1 - /*! 2 - Read a CAR file by spilling to disk 3 - */ 4 - 5 - extern crate repo_stream; 6 1 use clap::Parser; 7 - use repo_stream::{DiskBuilder, Driver, DriverBuilder}; 8 - use std::path::PathBuf; 9 - use std::time::Instant; 2 + use fjall::{Database, KeyspaceCreateOptions}; 3 + use std::{path::PathBuf, collections::BTreeMap}; 10 4 11 5 #[derive(Debug, Parser)] 12 6 struct Args { 13 7 #[arg()] 14 - car: PathBuf, 15 - #[arg()] 16 - tmpfile: PathBuf, 8 + db_path: PathBuf, 17 9 } 18 10 19 - #[tokio::main] 20 - async fn main() -> Result<(), Box<dyn std::error::Error>> { 21 - env_logger::init(); 22 - 23 - let Args { car, tmpfile } = Args::parse(); 24 - 25 - // repo-stream takes an AsyncRead as input. wrapping a filesystem read in 26 - // BufReader can provide a really significant performance win. 27 - let reader = tokio::fs::File::open(car).await?; 28 - let reader = tokio::io::BufReader::new(reader); 29 - 30 - log::info!("hello! reading the car..."); 31 - let t0 = Instant::now(); 32 - 33 - // in this example we only bother handling CARs that are too big for memory 34 - // `noop` helper means: do no block processing, store the raw blocks 35 - let driver = match DriverBuilder::new() 36 - .with_mem_limit_mb(10) // how much memory can be used before disk spill 37 - .load_car(reader) 38 - .await? 39 - { 40 - Driver::Memory(_, _) => panic!("try this on a bigger car"), 41 - Driver::Disk(big_stuff) => { 42 - // we reach here if the repo was too big and needs to be spilled to 43 - // disk to continue 44 - 45 - // set up a disk store we can spill to 46 - let disk_store = DiskBuilder::new().open(tmpfile, Some(big_stuff.keys_hint())).await?; 47 - 48 - // do the spilling, get back a (similar) driver 49 - let (commit, driver) = big_stuff.finish_loading(disk_store).await?; 50 - 51 - // at this point you might want to fetch the account's signing key 52 - // via the DID from the commit, and then verify the signature. 53 - log::warn!("big's comit ({:?}): {:?}", t0.elapsed(), commit); 54 - 55 - // pop the driver back out to get some code indentation relief 56 - driver 57 - } 58 - }; 11 + fn main() -> Result<(), Box<dyn std::error::Error>> { 12 + let Args { db_path } = Args::parse(); 59 13 60 - // collect some random stats about the blocks 61 - let mut n = 0; 62 - let mut zeros = 0; 14 + let db = Database::builder(db_path).open()?; 15 + let ks = db.keyspace("z", KeyspaceCreateOptions::default)?; 16 + let mut seen_keys: BTreeMap<Vec<u8>, usize> = BTreeMap::default(); 63 17 64 - log::info!("walking..."); 18 + print!("writing..."); 19 + for i in 0..250_000_usize { 20 + let k = i.to_be_bytes().to_vec(); 21 + ks.insert(k.clone(), vec![0xAA; 256])?; 22 + seen_keys.insert(k, i); 23 + } 65 24 66 - // this example uses the disk driver's channel mode: the tree walking is 67 - // spawned onto a blocking thread, and we get chunks of rkey+blocks back 68 - let (mut rx, join) = driver.to_channel(512); 69 - while let Some(r) = rx.recv().await { 70 - let pairs = r?; 25 + println!(" done. checking keys..."); 71 26 72 - // keep a count of the total number of blocks seen 73 - n += pairs.len(); 27 + // remove every seen key that fjall actually has, to see what's left 28 + for guard in ks.iter() { 29 + seen_keys.remove(guard.key()?.as_ref()); 30 + } 74 31 75 - for (_, block) in pairs { 76 - // for each block, count how many bytes are equal to '0' 77 - // (this is just an example, you probably want to do something more 78 - // interesting) 79 - zeros += block.into_iter().filter(|&b| b == b'0').count() 32 + // report the result 33 + if seen_keys.len() == 0 { 34 + println!("[ OK ] all keys found"); 35 + } else { 36 + println!("[FAIL] fjall did not have all seen_keys:"); 37 + for (k, i) in seen_keys { 38 + println!(" insert #{i} missing, key bytes: {k:?}"); 80 39 } 81 40 } 82 - 83 - log::info!("arrived! ({:?}) joining rx...", t0.elapsed()); 84 - 85 - // clean up the database. would be nice to do this in drop so it happens 86 - // automatically, but some blocking work happens, so that's not allowed in 87 - // async rust. ๐Ÿคทโ€โ™€๏ธ 88 - join.await?.reset_store().await?; 89 - 90 - log::info!("done. n={n} zeros={zeros}"); 91 41 92 42 Ok(()) 93 43 }
-159
src/disk.rs
··· 1 - /*! 2 - Disk storage for blocks on disk 3 - 4 - Currently this uses sqlite. In testing sqlite wasn't the fastest, but it seemed 5 - to be the best behaved in terms of both on-disk space usage and memory usage. 6 - 7 - ```no_run 8 - # use repo_stream::{DiskBuilder, DiskError}; 9 - # #[tokio::main] 10 - # async fn main() -> Result<(), DiskError> { 11 - let store = DiskBuilder::new() 12 - .with_max_stored_mb(1024) // errors when >1GiB of processed blocks are inserted 13 - .open("/some/path.db".into()).await?; 14 - # Ok(()) 15 - # } 16 - ``` 17 - */ 18 - 19 - use crate::drive::DriveError; 20 - use candystore::{CandyError, CandyStore, Config}; 21 - use std::path::PathBuf; 22 - 23 - #[derive(Debug, thiserror::Error)] 24 - pub enum DiskError { 25 - /// A wrapped database error 26 - /// 27 - /// (The wrapped err should probably be obscured to remove public-facing 28 - /// sqlite bits) 29 - #[error(transparent)] 30 - DbError(#[from] CandyError), 31 - /// Unfortunately candystore uses anyhow::Result for it's open call 32 - #[error("Failed on a db call, see logs")] 33 - DbGarbageError, 34 - /// A tokio blocking task failed to join 35 - #[error("Failed to join a tokio blocking task: {0}")] 36 - JoinError(#[from] tokio::task::JoinError), 37 - /// The total size of stored blocks exceeded the allowed size 38 - /// 39 - /// If you need to process *really* big CARs, you can configure a higher 40 - /// limit. 41 - #[error("Maximum disk size reached")] 42 - MaxSizeExceeded, 43 - } 44 - 45 - /// Builder-style disk store setup 46 - #[derive(Debug, Clone)] 47 - pub struct DiskBuilder { 48 - /// Database stored block size limit 49 - /// 50 - /// Default: 10 GiB 51 - /// 52 - /// Note: actual size on disk may be more, but should approximately scale 53 - /// with this limit 54 - pub max_stored_mb: usize, 55 - } 56 - 57 - impl Default for DiskBuilder { 58 - fn default() -> Self { 59 - Self { 60 - max_stored_mb: 10 * 1024, // 10 GiB 61 - } 62 - } 63 - } 64 - 65 - impl DiskBuilder { 66 - /// Begin configuring the storage with defaults 67 - pub fn new() -> Self { 68 - Default::default() 69 - } 70 - /// Set the approximate stored block size limit 71 - /// 72 - /// Default: 10 GiB 73 - pub fn with_max_stored_mb(mut self, max: usize) -> Self { 74 - self.max_stored_mb = max; 75 - self 76 - } 77 - /// Open and initialize the actual disk storage 78 - pub async fn open(&self, path: PathBuf, keys_hint: Option<usize>) -> Result<DiskStore, DiskError> { 79 - DiskStore::new(path, self.max_stored_mb, keys_hint).await 80 - } 81 - } 82 - 83 - /// On-disk block storage 84 - pub struct DiskStore { 85 - db: CandyStore, 86 - max_stored: usize, 87 - stored: usize, 88 - } 89 - 90 - impl DiskStore { 91 - /// Initialize a new disk store 92 - pub async fn new(path: PathBuf, max_stored_mb: usize, keys_hint: Option<usize>) -> Result<Self, DiskError> { 93 - let max_stored = max_stored_mb * 2_usize.pow(20); 94 - let db = tokio::task::spawn_blocking(move || { 95 - let mut conf = Config::default(); 96 - // conf.max_shard_size = 256 * 1024 * 1024; 97 - // conf.min_compaction_threashold = 32 * 1024 * 1024; 98 - // conf.expected_number_of_keys = 1_200_000; 99 - if let Some(hint) = keys_hint { 100 - conf.expected_number_of_keys = hint; 101 - } 102 - conf.num_compaction_threads = 1; 103 - let db = CandyStore::open(path, conf).map_err(|e| { 104 - log::error!("{e:?}"); 105 - DiskError::DbGarbageError 106 - })?; 107 - 108 - Ok::<_, DiskError>(db) 109 - }) 110 - .await??; 111 - 112 - Ok(Self { 113 - db, 114 - max_stored, 115 - stored: 0, 116 - }) 117 - } 118 - 119 - /// Drop and recreate the kv table 120 - pub async fn reset(self) -> Result<Self, DiskError> { 121 - tokio::task::spawn_blocking(move || { 122 - Self::reset_tables(&self.db)?; 123 - Ok(self) 124 - }) 125 - .await? 126 - } 127 - fn reset_tables(db: &CandyStore) -> Result<(), DiskError> { 128 - db.clear().map_err(|e| { 129 - log::error!("{e:?}"); 130 - DiskError::DbGarbageError 131 - })?; 132 - 133 - Ok(()) 134 - } 135 - 136 - pub(crate) fn put_many( 137 - &mut self, 138 - kv: impl Iterator<Item = Result<(Vec<u8>, Vec<u8>), DriveError>>, 139 - ) -> Result<(), DriveError> { 140 - for pair in kv { 141 - let (k, v) = pair?; 142 - self.stored += v.len(); 143 - if self.stored > self.max_stored { 144 - return Err(DiskError::MaxSizeExceeded.into()); 145 - } 146 - self.db.owned_set(k, &v).map_err(|e| { 147 - log::error!("{e:?}"); 148 - DiskError::DbGarbageError 149 - })?; 150 - } 151 - Ok(()) 152 - } 153 - pub(crate) fn get(&mut self, key: Vec<u8>) -> Result<Option<Vec<u8>>, DiskError> { 154 - self.db.owned_get(key).map_err(|e| { 155 - log::error!("{e:?}"); 156 - DiskError::DbGarbageError 157 - }) 158 - } 159 - }
-605
src/drive.rs
··· 1 - //! Consume a CAR from an AsyncRead, producing an ordered stream of records 2 - 3 - use crate::disk::{DiskError, DiskStore}; 4 - use crate::process::Processable; 5 - use ipld_core::cid::Cid; 6 - use iroh_car::CarReader; 7 - use serde::{Deserialize, Serialize}; 8 - use std::collections::HashMap; 9 - use std::convert::Infallible; 10 - use tokio::{io::AsyncRead, sync::mpsc}; 11 - 12 - use crate::mst::{Commit, Node}; 13 - use crate::walk::{Step, WalkError, Walker}; 14 - 15 - /// Errors that can happen while consuming and emitting blocks and records 16 - #[derive(Debug, thiserror::Error)] 17 - pub enum DriveError { 18 - #[error("Error from iroh_car: {0}")] 19 - CarReader(#[from] iroh_car::Error), 20 - #[error("Failed to decode commit block: {0}")] 21 - BadBlock(#[from] serde_ipld_dagcbor::DecodeError<Infallible>), 22 - #[error("The Commit block reference by the root was not found")] 23 - MissingCommit, 24 - #[error("The MST block {0} could not be found")] 25 - MissingBlock(Cid), 26 - #[error("Failed to walk the mst tree: {0}")] 27 - WalkError(#[from] WalkError), 28 - #[error("CAR file had no roots")] 29 - MissingRoot, 30 - #[error("Storage error")] 31 - StorageError(#[from] DiskError), 32 - #[error("Encode error: {0}")] 33 - BincodeEncodeError(#[from] bincode::error::EncodeError), 34 - #[error("Tried to send on a closed channel")] 35 - ChannelSendError, // SendError takes <T> which we don't need 36 - #[error("Failed to join a task: {0}")] 37 - JoinError(#[from] tokio::task::JoinError), 38 - } 39 - 40 - #[derive(Debug, thiserror::Error)] 41 - pub enum DecodeError { 42 - #[error(transparent)] 43 - BincodeDecodeError(#[from] bincode::error::DecodeError), 44 - #[error("extra bytes remained after decoding")] 45 - ExtraGarbage, 46 - } 47 - 48 - /// An in-order chunk of Rkey + (processed) Block pairs 49 - pub type BlockChunk<T> = Vec<(String, T)>; 50 - 51 - #[derive(Debug, Clone, Serialize, Deserialize)] 52 - pub(crate) enum MaybeProcessedBlock<T> { 53 - /// A block that's *probably* a Node (but we can't know yet) 54 - /// 55 - /// It *can be* a record that suspiciously looks a lot like a node, so we 56 - /// cannot eagerly turn it into a Node. We only know for sure what it is 57 - /// when we actually walk down the MST 58 - Raw(Vec<u8>), 59 - /// A processed record from a block that was definitely not a Node 60 - /// 61 - /// Processing has to be fallible because the CAR can have totally-unused 62 - /// blocks, which can just be garbage. since we're eagerly trying to process 63 - /// record blocks without knowing for sure that they *are* records, we 64 - /// discard any definitely-not-nodes that fail processing and keep their 65 - /// error in the buffer for them. if we later try to retreive them as a 66 - /// record, then we can surface the error. 67 - /// 68 - /// If we _never_ needed this block, then we may have wasted a bit of effort 69 - /// trying to process it. Oh well. 70 - /// 71 - /// There's an alternative here, which would be to kick unprocessable blocks 72 - /// back to Raw, or maybe even a new RawUnprocessable variant. Then we could 73 - /// surface the typed error later if needed by trying to reprocess. 74 - Processed(T), 75 - } 76 - 77 - impl<T: Processable> Processable for MaybeProcessedBlock<T> { 78 - /// TODO this is probably a little broken 79 - fn get_size(&self) -> usize { 80 - use std::{cmp::max, mem::size_of}; 81 - 82 - // enum is always as big as its biggest member? 83 - let base_size = max(size_of::<Vec<u8>>(), size_of::<T>()); 84 - 85 - let extra = match self { 86 - Self::Raw(bytes) => bytes.len(), 87 - Self::Processed(t) => t.get_size(), 88 - }; 89 - 90 - base_size + extra 91 - } 92 - } 93 - 94 - impl<T> MaybeProcessedBlock<T> { 95 - fn maybe(process: fn(Vec<u8>) -> T, data: Vec<u8>) -> Self { 96 - if Node::could_be(&data) { 97 - MaybeProcessedBlock::Raw(data) 98 - } else { 99 - MaybeProcessedBlock::Processed(process(data)) 100 - } 101 - } 102 - } 103 - 104 - /// Read a CAR file, buffering blocks in memory or to disk 105 - pub enum Driver<R: AsyncRead + Unpin, T: Processable> { 106 - /// All blocks fit within the memory limit 107 - /// 108 - /// You probably want to check the commit's signature. You can go ahead and 109 - /// walk the MST right away. 110 - Memory(Commit, MemDriver<T>), 111 - /// Blocks exceed the memory limit 112 - /// 113 - /// You'll need to provide a disk storage to continue. The commit will be 114 - /// returned and can be validated only once all blocks are loaded. 115 - Disk(NeedDisk<R, T>), 116 - } 117 - 118 - /// Builder-style driver setup 119 - #[derive(Debug, Clone)] 120 - pub struct DriverBuilder { 121 - pub mem_limit_mb: usize, 122 - } 123 - 124 - impl Default for DriverBuilder { 125 - fn default() -> Self { 126 - Self { mem_limit_mb: 16 } 127 - } 128 - } 129 - 130 - impl DriverBuilder { 131 - /// Begin configuring the driver with defaults 132 - pub fn new() -> Self { 133 - Default::default() 134 - } 135 - /// Set the in-memory size limit, in MiB 136 - /// 137 - /// Default: 16 MiB 138 - pub fn with_mem_limit_mb(self, new_limit: usize) -> Self { 139 - Self { 140 - mem_limit_mb: new_limit, 141 - } 142 - } 143 - /// Set the block processor 144 - /// 145 - /// Default: noop, raw blocks will be emitted 146 - pub fn with_block_processor<T: Processable>( 147 - self, 148 - p: fn(Vec<u8>) -> T, 149 - ) -> DriverBuilderWithProcessor<T> { 150 - DriverBuilderWithProcessor { 151 - mem_limit_mb: self.mem_limit_mb, 152 - block_processor: p, 153 - } 154 - } 155 - /// Begin processing an atproto MST from a CAR file 156 - pub async fn load_car<R: AsyncRead + Unpin>( 157 - &self, 158 - reader: R, 159 - ) -> Result<Driver<R, Vec<u8>>, DriveError> { 160 - Driver::load_car(reader, crate::process::noop, self.mem_limit_mb).await 161 - } 162 - } 163 - 164 - /// Builder-style driver intermediate step 165 - /// 166 - /// start from `DriverBuilder` 167 - #[derive(Debug, Clone)] 168 - pub struct DriverBuilderWithProcessor<T: Processable> { 169 - pub mem_limit_mb: usize, 170 - pub block_processor: fn(Vec<u8>) -> T, 171 - } 172 - 173 - impl<T: Processable> DriverBuilderWithProcessor<T> { 174 - /// Set the in-memory size limit, in MiB 175 - /// 176 - /// Default: 16 MiB 177 - pub fn with_mem_limit_mb(mut self, new_limit: usize) -> Self { 178 - self.mem_limit_mb = new_limit; 179 - self 180 - } 181 - /// Begin processing an atproto MST from a CAR file 182 - pub async fn load_car<R: AsyncRead + Unpin>( 183 - &self, 184 - reader: R, 185 - ) -> Result<Driver<R, T>, DriveError> { 186 - Driver::load_car(reader, self.block_processor, self.mem_limit_mb).await 187 - } 188 - } 189 - 190 - impl<R: AsyncRead + Unpin, T: Processable> Driver<R, T> { 191 - /// Begin processing an atproto MST from a CAR file 192 - /// 193 - /// Blocks will be loaded, processed, and buffered in memory. If the entire 194 - /// processed size is under the `mem_limit_mb` limit, a `Driver::Memory` 195 - /// will be returned along with a `Commit` ready for validation. 196 - /// 197 - /// If the `mem_limit_mb` limit is reached before loading all blocks, the 198 - /// partial state will be returned as `Driver::Disk(needed)`, which can be 199 - /// resumed by providing a `SqliteStorage` for on-disk block storage. 200 - pub async fn load_car( 201 - reader: R, 202 - process: fn(Vec<u8>) -> T, 203 - mem_limit_mb: usize, 204 - ) -> Result<Driver<R, T>, DriveError> { 205 - let max_size = mem_limit_mb * 2_usize.pow(20); 206 - let mut mem_blocks = HashMap::new(); 207 - 208 - let mut car = CarReader::new(reader).await?; 209 - 210 - let root = *car 211 - .header() 212 - .roots() 213 - .first() 214 - .ok_or(DriveError::MissingRoot)?; 215 - log::debug!("root: {root:?}"); 216 - 217 - let mut commit = None; 218 - 219 - // try to load all the blocks into memory 220 - let mut mem_size = 0; 221 - while let Some((cid, data)) = car.next_block().await? { 222 - // the root commit is a Special Third Kind of block that we need to make 223 - // sure not to optimistically send to the processing function 224 - if cid == root { 225 - let c: Commit = serde_ipld_dagcbor::from_slice(&data)?; 226 - commit = Some(c); 227 - continue; 228 - } 229 - 230 - // remaining possible types: node, record, other. optimistically process 231 - let maybe_processed = MaybeProcessedBlock::maybe(process, data); 232 - 233 - // stash (maybe processed) blocks in memory as long as we have room 234 - mem_size += std::mem::size_of::<Cid>() + maybe_processed.get_size(); 235 - mem_blocks.insert(cid, maybe_processed); 236 - if mem_size >= max_size { 237 - return Ok(Driver::Disk(NeedDisk { 238 - car, 239 - root, 240 - process, 241 - max_size, 242 - mem_blocks, 243 - commit, 244 - })); 245 - } 246 - } 247 - 248 - // all blocks loaded and we fit in memory! hopefully we found the commit... 249 - let commit = commit.ok_or(DriveError::MissingCommit)?; 250 - 251 - let walker = Walker::new(commit.data); 252 - 253 - Ok(Driver::Memory( 254 - commit, 255 - MemDriver { 256 - blocks: mem_blocks, 257 - walker, 258 - process, 259 - }, 260 - )) 261 - } 262 - } 263 - 264 - /// The core driver between the block stream and MST walker 265 - /// 266 - /// In the future, PDSs will export CARs in a stream-friendly order that will 267 - /// enable processing them with tiny memory overhead. But that future is not 268 - /// here yet. 269 - /// 270 - /// CARs are almost always in a stream-unfriendly order, so I'm reverting the 271 - /// optimistic stream features: we load all block first, then walk the MST. 272 - /// 273 - /// This makes things much simpler: we only need to worry about spilling to disk 274 - /// in one place, and we always have a reasonable expecatation about how much 275 - /// work the init function will do. We can drop the CAR reader before walking, 276 - /// so the sync/async boundaries become a little easier to work around. 277 - #[derive(Debug)] 278 - pub struct MemDriver<T: Processable> { 279 - blocks: HashMap<Cid, MaybeProcessedBlock<T>>, 280 - walker: Walker, 281 - process: fn(Vec<u8>) -> T, 282 - } 283 - 284 - impl<T: Processable> MemDriver<T> { 285 - /// Step through the record outputs, in rkey order 286 - pub async fn next_chunk(&mut self, n: usize) -> Result<Option<BlockChunk<T>>, DriveError> { 287 - let mut out = Vec::with_capacity(n); 288 - for _ in 0..n { 289 - // walk as far as we can until we run out of blocks or find a record 290 - match self.walker.step(&mut self.blocks, self.process)? { 291 - Step::Missing(cid) => return Err(DriveError::MissingBlock(cid)), 292 - Step::Finish => break, 293 - Step::Found { rkey, data } => { 294 - out.push((rkey, data)); 295 - continue; 296 - } 297 - }; 298 - } 299 - 300 - if out.is_empty() { 301 - Ok(None) 302 - } else { 303 - Ok(Some(out)) 304 - } 305 - } 306 - } 307 - 308 - /// A partially memory-loaded car file that needs disk spillover to continue 309 - pub struct NeedDisk<R: AsyncRead + Unpin, T: Processable> { 310 - car: CarReader<R>, 311 - root: Cid, 312 - process: fn(Vec<u8>) -> T, 313 - max_size: usize, 314 - mem_blocks: HashMap<Cid, MaybeProcessedBlock<T>>, 315 - pub commit: Option<Commit>, 316 - } 317 - 318 - impl<R: AsyncRead + Unpin, T: Processable> NeedDisk<R, T> { 319 - pub fn keys_hint(&self) -> usize { 320 - self.mem_blocks.len() * 10 321 - } 322 - } 323 - 324 - fn encode(v: impl Serialize) -> Result<Vec<u8>, bincode::error::EncodeError> { 325 - bincode::serde::encode_to_vec(v, bincode::config::standard()) 326 - } 327 - 328 - pub(crate) fn decode<T: Processable>(bytes: &[u8]) -> Result<T, DecodeError> { 329 - let (t, n) = bincode::serde::decode_from_slice(bytes, bincode::config::standard())?; 330 - if n != bytes.len() { 331 - return Err(DecodeError::ExtraGarbage); 332 - } 333 - Ok(t) 334 - } 335 - 336 - impl<R: AsyncRead + Unpin, T: Processable + Send + 'static> NeedDisk<R, T> { 337 - pub async fn finish_loading( 338 - mut self, 339 - mut store: DiskStore, 340 - ) -> Result<(Commit, DiskDriver<T>), DriveError> { 341 - // move store in and back out so we can manage lifetimes 342 - // dump mem blocks into the store 343 - store = tokio::task::spawn(async move { 344 - let kvs = self 345 - .mem_blocks 346 - .into_iter() 347 - .map(|(k, v)| Ok(encode(v).map(|v| (k.to_bytes(), v))?)); 348 - 349 - store.put_many(kvs)?; 350 - Ok::<_, DriveError>(store) 351 - }) 352 - .await??; 353 - 354 - let (tx, mut rx) = mpsc::channel::<Vec<(Cid, MaybeProcessedBlock<T>)>>(1); 355 - 356 - let store_worker = tokio::task::spawn_blocking(move || { 357 - while let Some(chunk) = rx.blocking_recv() { 358 - let kvs = chunk 359 - .into_iter() 360 - .map(|(k, v)| Ok(encode(v).map(|v| (k.to_bytes(), v))?)); 361 - store.put_many(kvs)?; 362 - } 363 - 364 - Ok::<_, DriveError>(store) 365 - }); // await later 366 - 367 - // dump the rest to disk (in chunks) 368 - log::debug!("dumping the rest of the stream..."); 369 - loop { 370 - let mut mem_size = 0; 371 - let mut chunk = vec![]; 372 - loop { 373 - let Some((cid, data)) = self.car.next_block().await? else { 374 - break; 375 - }; 376 - // we still gotta keep checking for the root since we might not have it 377 - if cid == self.root { 378 - let c: Commit = serde_ipld_dagcbor::from_slice(&data)?; 379 - self.commit = Some(c); 380 - continue; 381 - } 382 - // remaining possible types: node, record, other. optimistically process 383 - // TODO: get the actual in-memory size to compute disk spill 384 - let maybe_processed = MaybeProcessedBlock::maybe(self.process, data); 385 - mem_size += std::mem::size_of::<Cid>() + maybe_processed.get_size(); 386 - chunk.push((cid, maybe_processed)); 387 - if mem_size >= self.max_size { 388 - // soooooo if we're setting the db cache to max_size and then letting 389 - // multiple chunks in the queue that are >= max_size, then at any time 390 - // we might be using some multiple of max_size? 391 - break; 392 - } 393 - } 394 - if chunk.is_empty() { 395 - break; 396 - } 397 - tx.send(chunk) 398 - .await 399 - .map_err(|_| DriveError::ChannelSendError)?; 400 - } 401 - drop(tx); 402 - log::debug!("done. waiting for worker to finish..."); 403 - 404 - store = store_worker.await??; 405 - 406 - log::debug!("worker finished."); 407 - 408 - let commit = self.commit.ok_or(DriveError::MissingCommit)?; 409 - 410 - let walker = Walker::new(commit.data); 411 - 412 - Ok(( 413 - commit, 414 - DiskDriver { 415 - process: self.process, 416 - state: Some(BigState { store, walker }), 417 - }, 418 - )) 419 - } 420 - } 421 - 422 - struct BigState { 423 - store: DiskStore, 424 - walker: Walker, 425 - } 426 - 427 - /// MST walker that reads from disk instead of an in-memory hashmap 428 - pub struct DiskDriver<T: Clone> { 429 - process: fn(Vec<u8>) -> T, 430 - state: Option<BigState>, 431 - } 432 - 433 - // for doctests only 434 - #[doc(hidden)] 435 - pub fn _get_fake_disk_driver() -> DiskDriver<Vec<u8>> { 436 - use crate::process::noop; 437 - DiskDriver { 438 - process: noop, 439 - state: None, 440 - } 441 - } 442 - 443 - impl<T: Processable + Send + 'static> DiskDriver<T> { 444 - /// Walk the MST returning up to `n` rkey + record pairs 445 - /// 446 - /// ```no_run 447 - /// # use repo_stream::{drive::{DiskDriver, DriveError, _get_fake_disk_driver}, process::noop}; 448 - /// # #[tokio::main] 449 - /// # async fn main() -> Result<(), DriveError> { 450 - /// # let mut disk_driver = _get_fake_disk_driver(); 451 - /// while let Some(pairs) = disk_driver.next_chunk(256).await? { 452 - /// for (rkey, record) in pairs { 453 - /// println!("{rkey}: size={}", record.len()); 454 - /// } 455 - /// } 456 - /// let store = disk_driver.reset_store().await?; 457 - /// # Ok(()) 458 - /// # } 459 - /// ``` 460 - pub async fn next_chunk(&mut self, n: usize) -> Result<Option<BlockChunk<T>>, DriveError> { 461 - let process = self.process; 462 - 463 - // state should only *ever* be None transiently while inside here 464 - let mut state = self.state.take().expect("DiskDriver must have Some(state)"); 465 - 466 - // the big pain here is that we don't want to leave self.state in an 467 - // invalid state (None), so all the error paths have to make sure it 468 - // comes out again. 469 - let (state, res) = tokio::task::spawn_blocking( 470 - move || -> (BigState, Result<BlockChunk<T>, DriveError>) { 471 - let mut out = Vec::with_capacity(n); 472 - 473 - for _ in 0..n { 474 - // walk as far as we can until we run out of blocks or find a record 475 - let step = match state.walker.disk_step(&mut state.store, process) { 476 - Ok(s) => s, 477 - Err(e) => { 478 - return (state, Err(e.into())); 479 - } 480 - }; 481 - match step { 482 - Step::Missing(cid) => { 483 - return (state, Err(DriveError::MissingBlock(cid))); 484 - } 485 - Step::Finish => break, 486 - Step::Found { rkey, data } => out.push((rkey, data)), 487 - }; 488 - } 489 - 490 - (state, Ok::<_, DriveError>(out)) 491 - }, 492 - ) 493 - .await?; // on tokio JoinError, we'll be left with invalid state :( 494 - 495 - // *must* restore state before dealing with the actual result 496 - self.state = Some(state); 497 - 498 - let out = res?; 499 - 500 - if out.is_empty() { 501 - Ok(None) 502 - } else { 503 - Ok(Some(out)) 504 - } 505 - } 506 - 507 - fn read_tx_blocking( 508 - &mut self, 509 - n: usize, 510 - tx: mpsc::Sender<Result<BlockChunk<T>, DriveError>>, 511 - ) -> Result<(), mpsc::error::SendError<Result<BlockChunk<T>, DriveError>>> { 512 - let BigState { store, walker } = self.state.as_mut().expect("valid state"); 513 - 514 - loop { 515 - let mut out: BlockChunk<T> = Vec::with_capacity(n); 516 - 517 - for _ in 0..n { 518 - // walk as far as we can until we run out of blocks or find a record 519 - 520 - let step = match walker.disk_step(store, self.process) { 521 - Ok(s) => s, 522 - Err(e) => return tx.blocking_send(Err(e.into())), 523 - }; 524 - 525 - match step { 526 - Step::Missing(cid) => { 527 - return tx.blocking_send(Err(DriveError::MissingBlock(cid))); 528 - } 529 - Step::Finish => return Ok(()), 530 - Step::Found { rkey, data } => { 531 - out.push((rkey, data)); 532 - continue; 533 - } 534 - }; 535 - } 536 - 537 - if out.is_empty() { 538 - break; 539 - } 540 - tx.blocking_send(Ok(out))?; 541 - } 542 - 543 - Ok(()) 544 - } 545 - 546 - /// Spawn the disk reading task into a tokio blocking thread 547 - /// 548 - /// The idea is to avoid so much sending back and forth to the blocking 549 - /// thread, letting a blocking task do all the disk reading work and sending 550 - /// records and rkeys back through an `mpsc` channel instead. 551 - /// 552 - /// This might also allow the disk work to continue while processing the 553 - /// records. It's still not yet clear if this method actually has much 554 - /// benefit over just using `.next_chunk(n)`. 555 - /// 556 - /// ```no_run 557 - /// # use repo_stream::{drive::{DiskDriver, DriveError, _get_fake_disk_driver}, process::noop}; 558 - /// # #[tokio::main] 559 - /// # async fn main() -> Result<(), DriveError> { 560 - /// # let mut disk_driver = _get_fake_disk_driver(); 561 - /// let (mut rx, join) = disk_driver.to_channel(512); 562 - /// while let Some(recvd) = rx.recv().await { 563 - /// let pairs = recvd?; 564 - /// for (rkey, record) in pairs { 565 - /// println!("{rkey}: size={}", record.len()); 566 - /// } 567 - /// 568 - /// } 569 - /// let store = join.await?.reset_store().await?; 570 - /// # Ok(()) 571 - /// # } 572 - /// ``` 573 - pub fn to_channel( 574 - mut self, 575 - n: usize, 576 - ) -> ( 577 - mpsc::Receiver<Result<BlockChunk<T>, DriveError>>, 578 - tokio::task::JoinHandle<Self>, 579 - ) { 580 - let (tx, rx) = mpsc::channel::<Result<BlockChunk<T>, DriveError>>(1); 581 - 582 - // sketch: this worker is going to be allowed to execute without a join handle 583 - let chan_task = tokio::task::spawn_blocking(move || { 584 - if let Err(mpsc::error::SendError(_)) = self.read_tx_blocking(n, tx) { 585 - log::debug!("big car reader exited early due to dropped receiver channel"); 586 - } 587 - self 588 - }); 589 - 590 - (rx, chan_task) 591 - } 592 - 593 - /// Reset the disk storage so it can be reused. You must call this. 594 - /// 595 - /// Ideally we'd put this in an `impl Drop`, but since it makes blocking 596 - /// calls, that would be risky in an async context. For now you just have to 597 - /// carefully make sure you call it. 598 - /// 599 - /// The sqlite store is returned, so it can be reused for another 600 - /// `DiskDriver`. 601 - pub async fn reset_store(mut self) -> Result<DiskStore, DriveError> { 602 - let BigState { store, .. } = self.state.take().expect("valid state"); 603 - Ok(store.reset().await?) 604 - } 605 - }
-12
src/lib.rs
··· 73 73 Find more [examples in the repo](https://tangled.org/@microcosm.blue/repo-stream/tree/main/examples). 74 74 75 75 */ 76 - 77 - pub mod mst; 78 - mod walk; 79 - 80 - pub mod disk; 81 - pub mod drive; 82 - pub mod process; 83 - 84 - pub use disk::{DiskBuilder, DiskError, DiskStore}; 85 - pub use drive::{DriveError, Driver, DriverBuilder, NeedDisk}; 86 - pub use mst::Commit; 87 - pub use process::Processable;
-110
src/mst.rs
··· 1 - //! Low-level types for parsing raw atproto MST CARs 2 - //! 3 - //! The primary aim is to work through the **tree** structure. Non-node blocks 4 - //! are left as raw bytes, for upper levels to parse into DAG-CBOR or whatever. 5 - 6 - use ipld_core::cid::Cid; 7 - use serde::Deserialize; 8 - 9 - /// The top-level data object in a repository's tree is a signed commit. 10 - #[derive(Debug, Deserialize)] 11 - // #[serde(deny_unknown_fields)] 12 - pub struct Commit { 13 - /// the account DID associated with the repo, in strictly normalized form 14 - /// (eg, lowercase as appropriate) 15 - pub did: String, 16 - /// fixed value of 3 for this repo format version 17 - pub version: u64, 18 - /// pointer to the top of the repo contents tree structure (MST) 19 - pub data: Cid, 20 - /// revision of the repo, used as a logical clock. 21 - /// 22 - /// TID format. Must increase monotonically. Recommend using current 23 - /// timestamp as TID; rev values in the "future" (beyond a fudge factor) 24 - /// should be ignored and not processed 25 - pub rev: String, 26 - /// pointer (by hash) to a previous commit object for this repository. 27 - /// 28 - /// Could be used to create a chain of history, but largely unused (included 29 - /// for v2 backwards compatibility). In version 3 repos, this field must 30 - /// exist in the CBOR object, but is virtually always null. NOTE: previously 31 - /// specified as nullable and optional, but this caused interoperability 32 - /// issues. 33 - pub prev: Option<Cid>, 34 - /// cryptographic signature of this commit, as raw bytes 35 - #[serde(with = "serde_bytes")] 36 - pub sig: Vec<u8>, 37 - } 38 - 39 - /// MST node data schema 40 - #[derive(Debug, Deserialize, PartialEq)] 41 - #[serde(deny_unknown_fields)] 42 - pub(crate) struct Node { 43 - /// link to sub-tree Node on a lower level and with all keys sorting before 44 - /// keys at this node 45 - #[serde(rename = "l")] 46 - pub left: Option<Cid>, 47 - /// ordered list of TreeEntry objects 48 - /// 49 - /// atproto MSTs have a fanout of 4, so there can be max 4 entries. 50 - #[serde(rename = "e")] 51 - pub entries: Vec<Entry>, // maybe we can do [Option<Entry>; 4]? 52 - } 53 - 54 - impl Node { 55 - /// test if a block could possibly be a node 56 - /// 57 - /// we can't eagerly decode records except where we're *sure* they cannot be 58 - /// an mst node (and even then we can only attempt) because you can't know 59 - /// with certainty what a block is supposed to be without actually walking 60 - /// the tree. 61 - /// 62 - /// so if a block *could be* a node, any record converter must postpone 63 - /// processing. if it turns out it happens to be a very node-looking record, 64 - /// well, sorry, it just has to only be processed later when that's known. 65 - pub(crate) fn could_be(bytes: impl AsRef<[u8]>) -> bool { 66 - const NODE_FINGERPRINT: [u8; 3] = [ 67 - 0xA2, // map length 2 (for "l" and "e" keys) 68 - 0x61, // text length 1 69 - b'e', // "e" before "l" because map keys have to be lex-sorted 70 - // 0x8?: "e" has array (0x100 upper 3 bits) of some length 71 - ]; 72 - let bytes = bytes.as_ref(); 73 - bytes.starts_with(&NODE_FINGERPRINT) 74 - && bytes 75 - .get(3) 76 - .map(|b| b & 0b1110_0000 == 0x80) 77 - .unwrap_or(false) 78 - } 79 - 80 - /// Check if a node has any entries 81 - /// 82 - /// An empty repository with no records is represented as a single MST node 83 - /// with an empty array of entries. This is the only situation in which a 84 - /// tree may contain an empty leaf node which does not either contain keys 85 - /// ("entries") or point to a sub-tree containing entries. 86 - pub(crate) fn is_empty(&self) -> bool { 87 - self.left.is_none() && self.entries.is_empty() 88 - } 89 - } 90 - 91 - /// TreeEntry object 92 - #[derive(Debug, Deserialize, PartialEq)] 93 - #[serde(deny_unknown_fields)] 94 - pub(crate) struct Entry { 95 - /// count of bytes shared with previous TreeEntry in this Node (if any) 96 - #[serde(rename = "p")] 97 - pub prefix_len: usize, 98 - /// remainder of key for this TreeEntry, after "prefixlen" have been removed 99 - #[serde(rename = "k", with = "serde_bytes")] 100 - pub keysuffix: Vec<u8>, // can we String this here? 101 - /// link to the record data (CBOR) for this entry 102 - #[serde(rename = "v")] 103 - pub value: Cid, 104 - /// link to a sub-tree Node at a lower level 105 - /// 106 - /// the lower level must have keys sorting after this TreeEntry's key (to 107 - /// the "right"), but before the next TreeEntry's key in this Node (if any) 108 - #[serde(rename = "t")] 109 - pub tree: Option<Cid>, 110 - }
-108
src/process.rs
··· 1 - /*! 2 - Record processor function output trait 3 - 4 - The return type must satisfy the `Processable` trait, which requires: 5 - 6 - - `Clone` because two rkeys can refer to the same record by CID, which may 7 - only appear once in the CAR file. 8 - - `Serialize + DeserializeOwned` so it can be spilled to disk. 9 - 10 - One required function must be implemented, `get_size()`: this should return the 11 - approximate total off-stack size of the type. (the on-stack size will be added 12 - automatically via `std::mem::get_size`). 13 - 14 - Note that it is **not guaranteed** that the `process` function will run on a 15 - block before storing it in memory or on disk: it's not possible to know if a 16 - block is a record without actually walking the MST, so the best we can do is 17 - apply `process` to any block that we know *cannot* be an MST node, and otherwise 18 - store the raw block bytes. 19 - 20 - Here's a silly processing function that just collects 'eyy's found in the raw 21 - record bytes 22 - 23 - ``` 24 - # use repo_stream::Processable; 25 - # use serde::{Serialize, Deserialize}; 26 - #[derive(Debug, Clone, Serialize, Deserialize)] 27 - struct Eyy(usize, String); 28 - 29 - impl Processable for Eyy { 30 - fn get_size(&self) -> usize { 31 - // don't need to compute the usize, it's on the stack 32 - self.1.capacity() // in-mem size from the string's capacity, in bytes 33 - } 34 - } 35 - 36 - fn process(raw: Vec<u8>) -> Vec<Eyy> { 37 - let mut out = Vec::new(); 38 - let to_find = "eyy".as_bytes(); 39 - for i in 0..(raw.len() - 3) { 40 - if &raw[i..(i+3)] == to_find { 41 - out.push(Eyy(i, "eyy".to_string())); 42 - } 43 - } 44 - out 45 - } 46 - ``` 47 - 48 - The memory sizing stuff is a little sketch but probably at least approximately 49 - works. 50 - */ 51 - 52 - use serde::{Serialize, de::DeserializeOwned}; 53 - 54 - /// Output trait for record processing 55 - pub trait Processable: Clone + Serialize + DeserializeOwned { 56 - /// Any additional in-memory size taken by the processed type 57 - /// 58 - /// Do not include stack size (`std::mem::size_of`) 59 - fn get_size(&self) -> usize; 60 - } 61 - 62 - /// Processor that just returns the raw blocks 63 - #[inline] 64 - pub fn noop(block: Vec<u8>) -> Vec<u8> { 65 - block 66 - } 67 - 68 - impl Processable for u8 { 69 - fn get_size(&self) -> usize { 70 - 0 71 - } 72 - } 73 - 74 - impl Processable for usize { 75 - fn get_size(&self) -> usize { 76 - 0 // no additional space taken, just its stack size (newtype is free) 77 - } 78 - } 79 - 80 - impl Processable for String { 81 - fn get_size(&self) -> usize { 82 - self.capacity() 83 - } 84 - } 85 - 86 - impl<Item: Sized + Processable> Processable for Vec<Item> { 87 - fn get_size(&self) -> usize { 88 - let slot_size = std::mem::size_of::<Item>(); 89 - let direct_size = slot_size * self.capacity(); 90 - let items_referenced_size: usize = self.iter().map(|item| item.get_size()).sum(); 91 - direct_size + items_referenced_size 92 - } 93 - } 94 - 95 - impl<Item: Processable> Processable for Option<Item> { 96 - fn get_size(&self) -> usize { 97 - self.as_ref().map(|item| item.get_size()).unwrap_or(0) 98 - } 99 - } 100 - 101 - impl<Item: Processable, Error: Processable> Processable for Result<Item, Error> { 102 - fn get_size(&self) -> usize { 103 - match self { 104 - Ok(item) => item.get_size(), 105 - Err(err) => err.get_size(), 106 - } 107 - } 108 - }
-407
src/walk.rs
··· 1 - //! Depth-first MST traversal 2 - 3 - use crate::DiskError; 4 - use crate::DiskStore; 5 - use crate::drive::{DecodeError, MaybeProcessedBlock}; 6 - use crate::mst::Node; 7 - use crate::process::Processable; 8 - use ipld_core::cid::Cid; 9 - use sha2::{Digest, Sha256}; 10 - use std::collections::HashMap; 11 - use std::convert::Infallible; 12 - 13 - /// Errors that can happen while walking 14 - #[derive(Debug, thiserror::Error)] 15 - pub enum WalkError { 16 - #[error("Failed to fingerprint commit block")] 17 - BadCommitFingerprint, 18 - #[error("Failed to decode commit block: {0}")] 19 - BadCommit(#[from] serde_ipld_dagcbor::DecodeError<Infallible>), 20 - #[error("Action node error: {0}")] 21 - MstError(#[from] MstError), 22 - #[error("storage error: {0}")] 23 - StorageError(#[from] DiskError), 24 - #[error("Decode error: {0}")] 25 - DecodeError(#[from] DecodeError), 26 - } 27 - 28 - /// Errors from invalid Rkeys 29 - #[derive(Debug, PartialEq, thiserror::Error)] 30 - pub enum MstError { 31 - #[error("Failed to compute an rkey due to invalid prefix_len")] 32 - EntryPrefixOutOfbounds, 33 - #[error("RKey was not utf-8")] 34 - EntryRkeyNotUtf8(#[from] std::string::FromUtf8Error), 35 - #[error("Nodes cannot be empty (except for an entirely empty MST)")] 36 - EmptyNode, 37 - #[error("Found an entry with rkey at the wrong depth")] 38 - WrongDepth, 39 - #[error("Lost track of our depth (possible bug?)")] 40 - LostDepth, 41 - #[error("MST depth underflow: depth-0 node with child trees")] 42 - DepthUnderflow, 43 - #[error("Encountered an rkey out of order while walking the MST")] 44 - RkeyOutOfOrder, 45 - } 46 - 47 - /// Walker outputs 48 - #[derive(Debug)] 49 - pub enum Step<T> { 50 - /// We needed this CID but it's not in the block store 51 - Missing(Cid), 52 - /// Reached the end of the MST! yay! 53 - Finish, 54 - /// A record was found! 55 - Found { rkey: String, data: T }, 56 - } 57 - 58 - #[derive(Debug, Clone, PartialEq)] 59 - enum Need { 60 - Node { depth: Depth, cid: Cid }, 61 - Record { rkey: String, cid: Cid }, 62 - } 63 - 64 - #[derive(Debug, Clone, Copy, PartialEq)] 65 - enum Depth { 66 - Root, 67 - Depth(u32), 68 - } 69 - 70 - impl Depth { 71 - fn from_key(key: &[u8]) -> Self { 72 - let mut zeros = 0; 73 - for byte in Sha256::digest(key) { 74 - let leading = byte.leading_zeros(); 75 - zeros += leading; 76 - if leading < 8 { 77 - break; 78 - } 79 - } 80 - Self::Depth(zeros / 2) // truncating divide (rounds down) 81 - } 82 - fn next_expected(&self) -> Result<Option<u32>, MstError> { 83 - match self { 84 - Self::Root => Ok(None), 85 - Self::Depth(d) => d.checked_sub(1).ok_or(MstError::DepthUnderflow).map(Some), 86 - } 87 - } 88 - } 89 - 90 - fn push_from_node(stack: &mut Vec<Need>, node: &Node, parent_depth: Depth) -> Result<(), MstError> { 91 - // empty nodes are not allowed in the MST except in an empty MST 92 - if node.is_empty() { 93 - if parent_depth == Depth::Root { 94 - return Ok(()); // empty mst, nothing to push 95 - } else { 96 - return Err(MstError::EmptyNode); 97 - } 98 - } 99 - 100 - let mut entries = Vec::with_capacity(node.entries.len()); 101 - let mut prefix = vec![]; 102 - let mut this_depth = parent_depth.next_expected()?; 103 - 104 - for entry in &node.entries { 105 - let mut rkey = vec![]; 106 - let pre_checked = prefix 107 - .get(..entry.prefix_len) 108 - .ok_or(MstError::EntryPrefixOutOfbounds)?; 109 - rkey.extend_from_slice(pre_checked); 110 - rkey.extend_from_slice(&entry.keysuffix); 111 - 112 - let Depth::Depth(key_depth) = Depth::from_key(&rkey) else { 113 - return Err(MstError::WrongDepth); 114 - }; 115 - 116 - // this_depth is `none` if we are the deepest child (directly below root) 117 - // in that case we accept whatever highest depth is claimed 118 - let expected_depth = match this_depth { 119 - Some(d) => d, 120 - None => { 121 - this_depth = Some(key_depth); 122 - key_depth 123 - } 124 - }; 125 - 126 - // all keys we find should be this depth 127 - if key_depth != expected_depth { 128 - return Err(MstError::DepthUnderflow); 129 - } 130 - 131 - prefix = rkey.clone(); 132 - 133 - entries.push(Need::Record { 134 - rkey: String::from_utf8(rkey)?, 135 - cid: entry.value, 136 - }); 137 - if let Some(ref tree) = entry.tree { 138 - entries.push(Need::Node { 139 - depth: Depth::Depth(key_depth), 140 - cid: *tree, 141 - }); 142 - } 143 - } 144 - 145 - entries.reverse(); 146 - stack.append(&mut entries); 147 - 148 - let d = this_depth.ok_or(MstError::LostDepth)?; 149 - 150 - if let Some(tree) = node.left { 151 - stack.push(Need::Node { 152 - depth: Depth::Depth(d), 153 - cid: tree, 154 - }); 155 - } 156 - Ok(()) 157 - } 158 - 159 - /// Traverser of an atproto MST 160 - /// 161 - /// Walks the tree from left-to-right in depth-first order 162 - #[derive(Debug)] 163 - pub struct Walker { 164 - stack: Vec<Need>, 165 - prev: String, 166 - } 167 - 168 - impl Walker { 169 - pub fn new(tree_root_cid: Cid) -> Self { 170 - Self { 171 - stack: vec![Need::Node { 172 - depth: Depth::Root, 173 - cid: tree_root_cid, 174 - }], 175 - prev: "".to_string(), 176 - } 177 - } 178 - 179 - /// Advance through nodes until we find a record or can't go further 180 - pub fn step<T: Processable>( 181 - &mut self, 182 - blocks: &mut HashMap<Cid, MaybeProcessedBlock<T>>, 183 - process: impl Fn(Vec<u8>) -> T, 184 - ) -> Result<Step<T>, WalkError> { 185 - loop { 186 - let Some(need) = self.stack.last_mut() else { 187 - log::trace!("tried to walk but we're actually done."); 188 - return Ok(Step::Finish); 189 - }; 190 - 191 - match need { 192 - &mut Need::Node { depth, cid } => { 193 - log::trace!("need node {cid:?}"); 194 - let Some(block) = blocks.remove(&cid) else { 195 - log::trace!("node not found, resting"); 196 - return Ok(Step::Missing(cid)); 197 - }; 198 - 199 - let MaybeProcessedBlock::Raw(data) = block else { 200 - return Err(WalkError::BadCommitFingerprint); 201 - }; 202 - let node = serde_ipld_dagcbor::from_slice::<Node>(&data) 203 - .map_err(WalkError::BadCommit)?; 204 - 205 - // found node, make sure we remember 206 - self.stack.pop(); 207 - 208 - // queue up work on the found node next 209 - push_from_node(&mut self.stack, &node, depth)?; 210 - } 211 - Need::Record { rkey, cid } => { 212 - log::trace!("need record {cid:?}"); 213 - // note that we cannot *remove* a record block, sadly, since 214 - // there can be multiple rkeys pointing to the same cid. 215 - let Some(data) = blocks.get_mut(cid) else { 216 - return Ok(Step::Missing(*cid)); 217 - }; 218 - let rkey = rkey.clone(); 219 - let data = match data { 220 - MaybeProcessedBlock::Raw(data) => process(data.to_vec()), 221 - MaybeProcessedBlock::Processed(t) => t.clone(), 222 - }; 223 - 224 - // found node, make sure we remember 225 - self.stack.pop(); 226 - 227 - // rkeys *must* be in order or else the tree is invalid (or 228 - // we have a bug) 229 - if rkey <= self.prev { 230 - return Err(MstError::RkeyOutOfOrder)?; 231 - } 232 - self.prev = rkey.clone(); 233 - 234 - return Ok(Step::Found { rkey, data }); 235 - } 236 - } 237 - } 238 - } 239 - 240 - /// blocking!!!!!! 241 - pub fn disk_step<T: Processable>( 242 - &mut self, 243 - db: &mut DiskStore, 244 - process: impl Fn(Vec<u8>) -> T, 245 - ) -> Result<Step<T>, WalkError> { 246 - loop { 247 - let Some(need) = self.stack.last_mut() else { 248 - log::trace!("tried to walk but we're actually done."); 249 - return Ok(Step::Finish); 250 - }; 251 - 252 - match need { 253 - &mut Need::Node { depth, cid } => { 254 - let cid_bytes = cid.to_bytes(); 255 - log::trace!("need node {cid:?}"); 256 - let Some(block_bytes) = db.get(cid_bytes)? else { 257 - log::trace!("node not found, resting"); 258 - return Ok(Step::Missing(cid)); 259 - }; 260 - 261 - let block: MaybeProcessedBlock<T> = crate::drive::decode(&block_bytes)?; 262 - 263 - let MaybeProcessedBlock::Raw(data) = block else { 264 - return Err(WalkError::BadCommitFingerprint); 265 - }; 266 - let node = serde_ipld_dagcbor::from_slice::<Node>(&data) 267 - .map_err(WalkError::BadCommit)?; 268 - 269 - // found node, make sure we remember 270 - self.stack.pop(); 271 - 272 - // queue up work on the found node next 273 - push_from_node(&mut self.stack, &node, depth).map_err(WalkError::MstError)?; 274 - } 275 - Need::Record { rkey, cid } => { 276 - log::trace!("need record {cid:?}"); 277 - let cid_bytes = cid.to_bytes(); 278 - let Some(data_bytes) = db.get(cid_bytes)? else { 279 - log::trace!("record block not found, resting"); 280 - return Ok(Step::Missing(*cid)); 281 - }; 282 - let data: MaybeProcessedBlock<T> = crate::drive::decode(&data_bytes)?; 283 - let rkey = rkey.clone(); 284 - let data = match data { 285 - MaybeProcessedBlock::Raw(data) => process(data), 286 - MaybeProcessedBlock::Processed(t) => t.clone(), 287 - }; 288 - 289 - // found node, make sure we remember 290 - self.stack.pop(); 291 - 292 - log::trace!("emitting a block as a step. depth={}", self.stack.len()); 293 - 294 - // rkeys *must* be in order or else the tree is invalid (or 295 - // we have a bug) 296 - if rkey <= self.prev { 297 - return Err(MstError::RkeyOutOfOrder)?; 298 - } 299 - self.prev = rkey.clone(); 300 - 301 - return Ok(Step::Found { rkey, data }); 302 - } 303 - } 304 - } 305 - } 306 - } 307 - 308 - #[cfg(test)] 309 - mod test { 310 - use super::*; 311 - 312 - fn cid1() -> Cid { 313 - "bafyreihixenvk3ahqbytas4hk4a26w43bh6eo3w6usjqtxkpzsvi655a3m" 314 - .parse() 315 - .unwrap() 316 - } 317 - 318 - #[test] 319 - fn test_depth_spec_0() { 320 - let d = Depth::from_key(b"2653ae71"); 321 - assert_eq!(d, Depth::Depth(0)) 322 - } 323 - 324 - #[test] 325 - fn test_depth_spec_1() { 326 - let d = Depth::from_key(b"blue"); 327 - assert_eq!(d, Depth::Depth(1)) 328 - } 329 - 330 - #[test] 331 - fn test_depth_spec_4() { 332 - let d = Depth::from_key(b"app.bsky.feed.post/454397e440ec"); 333 - assert_eq!(d, Depth::Depth(4)) 334 - } 335 - 336 - #[test] 337 - fn test_depth_spec_8() { 338 - let d = Depth::from_key(b"app.bsky.feed.post/9adeb165882c"); 339 - assert_eq!(d, Depth::Depth(8)) 340 - } 341 - 342 - #[test] 343 - fn test_depth_ietf_draft_0() { 344 - let d = Depth::from_key(b"key1"); 345 - assert_eq!(d, Depth::Depth(0)) 346 - } 347 - 348 - #[test] 349 - fn test_depth_ietf_draft_1() { 350 - let d = Depth::from_key(b"key7"); 351 - assert_eq!(d, Depth::Depth(1)) 352 - } 353 - 354 - #[test] 355 - fn test_depth_ietf_draft_4() { 356 - let d = Depth::from_key(b"key515"); 357 - assert_eq!(d, Depth::Depth(4)) 358 - } 359 - 360 - #[test] 361 - fn test_depth_interop() { 362 - // examples from https://github.com/bluesky-social/atproto-interop-tests/blob/main/mst/key_heights.json 363 - for (k, expected) in [ 364 - ("", 0), 365 - ("asdf", 0), 366 - ("blue", 1), 367 - ("2653ae71", 0), 368 - ("88bfafc7", 2), 369 - ("2a92d355", 4), 370 - ("884976f5", 6), 371 - ("app.bsky.feed.post/454397e440ec", 4), 372 - ("app.bsky.feed.post/9adeb165882c", 8), 373 - ] { 374 - let d = Depth::from_key(k.as_bytes()); 375 - assert_eq!(d, Depth::Depth(expected), "key: {}", k); 376 - } 377 - } 378 - 379 - #[test] 380 - fn test_push_empty_fails() { 381 - let empty_node = Node { 382 - left: None, 383 - entries: vec![], 384 - }; 385 - let mut stack = vec![]; 386 - let err = push_from_node(&mut stack, &empty_node, Depth::Depth(4)); 387 - assert_eq!(err, Err(MstError::EmptyNode)); 388 - } 389 - 390 - #[test] 391 - fn test_push_one_node() { 392 - let node = Node { 393 - left: Some(cid1()), 394 - entries: vec![], 395 - }; 396 - let mut stack = vec![]; 397 - push_from_node(&mut stack, &node, Depth::Depth(4)).unwrap(); 398 - assert_eq!( 399 - stack.last(), 400 - Some(Need::Node { 401 - depth: Depth::Depth(3), 402 - cid: cid1() 403 - }) 404 - .as_ref() 405 - ); 406 - } 407 - }