Fast and robust atproto CAR file processing in rust

kinda works?

+1
.gitignore
···
··· 1 + /target
+1028
Cargo.lock
···
··· 1 + # This file is automatically @generated by Cargo. 2 + # It is not intended for manual editing. 3 + version = 4 4 + 5 + [[package]] 6 + name = "addr2line" 7 + version = "0.25.1" 8 + source = "registry+https://github.com/rust-lang/crates.io-index" 9 + checksum = "1b5d307320b3181d6d7954e663bd7c774a838b8220fe0593c86d9fb09f498b4b" 10 + dependencies = [ 11 + "gimli", 12 + ] 13 + 14 + [[package]] 15 + name = "adler2" 16 + version = "2.0.1" 17 + source = "registry+https://github.com/rust-lang/crates.io-index" 18 + checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa" 19 + 20 + [[package]] 21 + name = "aho-corasick" 22 + version = "1.1.3" 23 + source = "registry+https://github.com/rust-lang/crates.io-index" 24 + checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" 25 + dependencies = [ 26 + "memchr", 27 + ] 28 + 29 + [[package]] 30 + name = "anstream" 31 + version = "0.6.21" 32 + source = "registry+https://github.com/rust-lang/crates.io-index" 33 + checksum = "43d5b281e737544384e969a5ccad3f1cdd24b48086a0fc1b2a5262a26b8f4f4a" 34 + dependencies = [ 35 + "anstyle", 36 + "anstyle-parse", 37 + "anstyle-query", 38 + "anstyle-wincon", 39 + "colorchoice", 40 + "is_terminal_polyfill", 41 + "utf8parse", 42 + ] 43 + 44 + [[package]] 45 + name = "anstyle" 46 + version = "1.0.13" 47 + source = "registry+https://github.com/rust-lang/crates.io-index" 48 + checksum = "5192cca8006f1fd4f7237516f40fa183bb07f8fbdfedaa0036de5ea9b0b45e78" 49 + 50 + [[package]] 51 + name = "anstyle-parse" 52 + version = "0.2.7" 53 + source = "registry+https://github.com/rust-lang/crates.io-index" 54 + checksum = "4e7644824f0aa2c7b9384579234ef10eb7efb6a0deb83f9630a49594dd9c15c2" 55 + dependencies = [ 56 + "utf8parse", 57 + ] 58 + 59 + [[package]] 60 + name = "anstyle-query" 61 + version = "1.1.4" 62 + source = "registry+https://github.com/rust-lang/crates.io-index" 63 + checksum = "9e231f6134f61b71076a3eab506c379d4f36122f2af15a9ff04415ea4c3339e2" 64 + dependencies = [ 65 + "windows-sys 0.60.2", 66 + ] 67 + 68 + [[package]] 69 + name = "anstyle-wincon" 70 + version = "3.0.10" 71 + source = "registry+https://github.com/rust-lang/crates.io-index" 72 + checksum = "3e0633414522a32ffaac8ac6cc8f748e090c5717661fddeea04219e2344f5f2a" 73 + dependencies = [ 74 + "anstyle", 75 + "once_cell_polyfill", 76 + "windows-sys 0.60.2", 77 + ] 78 + 79 + [[package]] 80 + name = "anyhow" 81 + version = "1.0.100" 82 + source = "registry+https://github.com/rust-lang/crates.io-index" 83 + checksum = "a23eb6b1614318a8071c9b2521f36b424b2c83db5eb3a0fead4a6c0809af6e61" 84 + 85 + [[package]] 86 + name = "backtrace" 87 + version = "0.3.76" 88 + source = "registry+https://github.com/rust-lang/crates.io-index" 89 + checksum = "bb531853791a215d7c62a30daf0dde835f381ab5de4589cfe7c649d2cbe92bd6" 90 + dependencies = [ 91 + "addr2line", 92 + "cfg-if", 93 + "libc", 94 + "miniz_oxide", 95 + "object", 96 + "rustc-demangle", 97 + "windows-link", 98 + ] 99 + 100 + [[package]] 101 + name = "base-x" 102 + version = "0.2.11" 103 + source = "registry+https://github.com/rust-lang/crates.io-index" 104 + checksum = "4cbbc9d0964165b47557570cce6c952866c2678457aca742aafc9fb771d30270" 105 + 106 + [[package]] 107 + name = "base256emoji" 108 + version = "1.0.2" 109 + source = "registry+https://github.com/rust-lang/crates.io-index" 110 + checksum = "b5e9430d9a245a77c92176e649af6e275f20839a48389859d1661e9a128d077c" 111 + dependencies = [ 112 + "const-str", 113 + "match-lookup", 114 + ] 115 + 116 + [[package]] 117 + name = "bitflags" 118 + version = "2.9.4" 119 + source = "registry+https://github.com/rust-lang/crates.io-index" 120 + checksum = "2261d10cca569e4643e526d8dc2e62e433cc8aba21ab764233731f8d369bf394" 121 + 122 + [[package]] 123 + name = "bytes" 124 + version = "1.10.1" 125 + source = "registry+https://github.com/rust-lang/crates.io-index" 126 + checksum = "d71b6127be86fdcfddb610f7182ac57211d4b18a3e9c82eb2d17662f2227ad6a" 127 + 128 + [[package]] 129 + name = "cbor4ii" 130 + version = "0.2.14" 131 + source = "registry+https://github.com/rust-lang/crates.io-index" 132 + checksum = "b544cf8c89359205f4f990d0e6f3828db42df85b5dac95d09157a250eb0749c4" 133 + dependencies = [ 134 + "serde", 135 + ] 136 + 137 + [[package]] 138 + name = "cfg-if" 139 + version = "1.0.3" 140 + source = "registry+https://github.com/rust-lang/crates.io-index" 141 + checksum = "2fd1289c04a9ea8cb22300a459a72a385d7c73d3259e2ed7dcb2af674838cfa9" 142 + 143 + [[package]] 144 + name = "cid" 145 + version = "0.11.1" 146 + source = "registry+https://github.com/rust-lang/crates.io-index" 147 + checksum = "3147d8272e8fa0ccd29ce51194dd98f79ddfb8191ba9e3409884e751798acf3a" 148 + dependencies = [ 149 + "core2", 150 + "multibase", 151 + "multihash", 152 + "serde", 153 + "serde_bytes", 154 + "unsigned-varint 0.8.0", 155 + ] 156 + 157 + [[package]] 158 + name = "clap" 159 + version = "4.5.48" 160 + source = "registry+https://github.com/rust-lang/crates.io-index" 161 + checksum = "e2134bb3ea021b78629caa971416385309e0131b351b25e01dc16fb54e1b5fae" 162 + dependencies = [ 163 + "clap_builder", 164 + "clap_derive", 165 + ] 166 + 167 + [[package]] 168 + name = "clap_builder" 169 + version = "4.5.48" 170 + source = "registry+https://github.com/rust-lang/crates.io-index" 171 + checksum = "c2ba64afa3c0a6df7fa517765e31314e983f51dda798ffba27b988194fb65dc9" 172 + dependencies = [ 173 + "anstream", 174 + "anstyle", 175 + "clap_lex", 176 + "strsim", 177 + ] 178 + 179 + [[package]] 180 + name = "clap_derive" 181 + version = "4.5.47" 182 + source = "registry+https://github.com/rust-lang/crates.io-index" 183 + checksum = "bbfd7eae0b0f1a6e63d4b13c9c478de77c2eb546fba158ad50b4203dc24b9f9c" 184 + dependencies = [ 185 + "heck", 186 + "proc-macro2", 187 + "quote", 188 + "syn 2.0.106", 189 + ] 190 + 191 + [[package]] 192 + name = "clap_lex" 193 + version = "0.7.5" 194 + source = "registry+https://github.com/rust-lang/crates.io-index" 195 + checksum = "b94f61472cee1439c0b966b47e3aca9ae07e45d070759512cd390ea2bebc6675" 196 + 197 + [[package]] 198 + name = "colorchoice" 199 + version = "1.0.4" 200 + source = "registry+https://github.com/rust-lang/crates.io-index" 201 + checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75" 202 + 203 + [[package]] 204 + name = "const-str" 205 + version = "0.4.3" 206 + source = "registry+https://github.com/rust-lang/crates.io-index" 207 + checksum = "2f421161cb492475f1661ddc9815a745a1c894592070661180fdec3d4872e9c3" 208 + 209 + [[package]] 210 + name = "core2" 211 + version = "0.4.0" 212 + source = "registry+https://github.com/rust-lang/crates.io-index" 213 + checksum = "b49ba7ef1ad6107f8824dbe97de947cbaac53c44e7f9756a1fba0d37c1eec505" 214 + dependencies = [ 215 + "memchr", 216 + ] 217 + 218 + [[package]] 219 + name = "data-encoding" 220 + version = "2.9.0" 221 + source = "registry+https://github.com/rust-lang/crates.io-index" 222 + checksum = "2a2330da5de22e8a3cb63252ce2abb30116bf5265e89c0e01bc17015ce30a476" 223 + 224 + [[package]] 225 + name = "data-encoding-macro" 226 + version = "0.1.18" 227 + source = "registry+https://github.com/rust-lang/crates.io-index" 228 + checksum = "47ce6c96ea0102f01122a185683611bd5ac8d99e62bc59dd12e6bda344ee673d" 229 + dependencies = [ 230 + "data-encoding", 231 + "data-encoding-macro-internal", 232 + ] 233 + 234 + [[package]] 235 + name = "data-encoding-macro-internal" 236 + version = "0.1.16" 237 + source = "registry+https://github.com/rust-lang/crates.io-index" 238 + checksum = "8d162beedaa69905488a8da94f5ac3edb4dd4788b732fadb7bd120b2625c1976" 239 + dependencies = [ 240 + "data-encoding", 241 + "syn 2.0.106", 242 + ] 243 + 244 + [[package]] 245 + name = "env_filter" 246 + version = "0.1.3" 247 + source = "registry+https://github.com/rust-lang/crates.io-index" 248 + checksum = "186e05a59d4c50738528153b83b0b0194d3a29507dfec16eccd4b342903397d0" 249 + dependencies = [ 250 + "log", 251 + "regex", 252 + ] 253 + 254 + [[package]] 255 + name = "env_logger" 256 + version = "0.11.8" 257 + source = "registry+https://github.com/rust-lang/crates.io-index" 258 + checksum = "13c863f0904021b108aa8b2f55046443e6b1ebde8fd4a15c399893aae4fa069f" 259 + dependencies = [ 260 + "anstream", 261 + "anstyle", 262 + "env_filter", 263 + "jiff", 264 + "log", 265 + ] 266 + 267 + [[package]] 268 + name = "futures" 269 + version = "0.3.31" 270 + source = "registry+https://github.com/rust-lang/crates.io-index" 271 + checksum = "65bc07b1a8bc7c85c5f2e110c476c7389b4554ba72af57d8445ea63a576b0876" 272 + dependencies = [ 273 + "futures-channel", 274 + "futures-core", 275 + "futures-executor", 276 + "futures-io", 277 + "futures-sink", 278 + "futures-task", 279 + "futures-util", 280 + ] 281 + 282 + [[package]] 283 + name = "futures-channel" 284 + version = "0.3.31" 285 + source = "registry+https://github.com/rust-lang/crates.io-index" 286 + checksum = "2dff15bf788c671c1934e366d07e30c1814a8ef514e1af724a602e8a2fbe1b10" 287 + dependencies = [ 288 + "futures-core", 289 + "futures-sink", 290 + ] 291 + 292 + [[package]] 293 + name = "futures-core" 294 + version = "0.3.31" 295 + source = "registry+https://github.com/rust-lang/crates.io-index" 296 + checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e" 297 + 298 + [[package]] 299 + name = "futures-executor" 300 + version = "0.3.31" 301 + source = "registry+https://github.com/rust-lang/crates.io-index" 302 + checksum = "1e28d1d997f585e54aebc3f97d39e72338912123a67330d723fdbb564d646c9f" 303 + dependencies = [ 304 + "futures-core", 305 + "futures-task", 306 + "futures-util", 307 + ] 308 + 309 + [[package]] 310 + name = "futures-io" 311 + version = "0.3.31" 312 + source = "registry+https://github.com/rust-lang/crates.io-index" 313 + checksum = "9e5c1b78ca4aae1ac06c48a526a655760685149f0d465d21f37abfe57ce075c6" 314 + 315 + [[package]] 316 + name = "futures-macro" 317 + version = "0.3.31" 318 + source = "registry+https://github.com/rust-lang/crates.io-index" 319 + checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650" 320 + dependencies = [ 321 + "proc-macro2", 322 + "quote", 323 + "syn 2.0.106", 324 + ] 325 + 326 + [[package]] 327 + name = "futures-sink" 328 + version = "0.3.31" 329 + source = "registry+https://github.com/rust-lang/crates.io-index" 330 + checksum = "e575fab7d1e0dcb8d0c7bcf9a63ee213816ab51902e6d244a95819acacf1d4f7" 331 + 332 + [[package]] 333 + name = "futures-task" 334 + version = "0.3.31" 335 + source = "registry+https://github.com/rust-lang/crates.io-index" 336 + checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988" 337 + 338 + [[package]] 339 + name = "futures-util" 340 + version = "0.3.31" 341 + source = "registry+https://github.com/rust-lang/crates.io-index" 342 + checksum = "9fa08315bb612088cc391249efdc3bc77536f16c91f6cf495e6fbe85b20a4a81" 343 + dependencies = [ 344 + "futures-channel", 345 + "futures-core", 346 + "futures-io", 347 + "futures-macro", 348 + "futures-sink", 349 + "futures-task", 350 + "memchr", 351 + "pin-project-lite", 352 + "pin-utils", 353 + "slab", 354 + ] 355 + 356 + [[package]] 357 + name = "gimli" 358 + version = "0.32.3" 359 + source = "registry+https://github.com/rust-lang/crates.io-index" 360 + checksum = "e629b9b98ef3dd8afe6ca2bd0f89306cec16d43d907889945bc5d6687f2f13c7" 361 + 362 + [[package]] 363 + name = "heck" 364 + version = "0.5.0" 365 + source = "registry+https://github.com/rust-lang/crates.io-index" 366 + checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" 367 + 368 + [[package]] 369 + name = "io-uring" 370 + version = "0.7.10" 371 + source = "registry+https://github.com/rust-lang/crates.io-index" 372 + checksum = "046fa2d4d00aea763528b4950358d0ead425372445dc8ff86312b3c69ff7727b" 373 + dependencies = [ 374 + "bitflags", 375 + "cfg-if", 376 + "libc", 377 + ] 378 + 379 + [[package]] 380 + name = "ipld-core" 381 + version = "0.4.2" 382 + source = "registry+https://github.com/rust-lang/crates.io-index" 383 + checksum = "104718b1cc124d92a6d01ca9c9258a7df311405debb3408c445a36452f9bf8db" 384 + dependencies = [ 385 + "cid", 386 + "serde", 387 + "serde_bytes", 388 + ] 389 + 390 + [[package]] 391 + name = "iroh-car" 392 + version = "0.5.1" 393 + source = "registry+https://github.com/rust-lang/crates.io-index" 394 + checksum = "cb7f8cd4cb9aa083fba8b52e921764252d0b4dcb1cd6d120b809dbfe1106e81a" 395 + dependencies = [ 396 + "anyhow", 397 + "cid", 398 + "futures", 399 + "serde", 400 + "serde_ipld_dagcbor", 401 + "thiserror 1.0.69", 402 + "tokio", 403 + "unsigned-varint 0.7.2", 404 + ] 405 + 406 + [[package]] 407 + name = "is_terminal_polyfill" 408 + version = "1.70.1" 409 + source = "registry+https://github.com/rust-lang/crates.io-index" 410 + checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" 411 + 412 + [[package]] 413 + name = "jiff" 414 + version = "0.2.15" 415 + source = "registry+https://github.com/rust-lang/crates.io-index" 416 + checksum = "be1f93b8b1eb69c77f24bbb0afdf66f54b632ee39af40ca21c4365a1d7347e49" 417 + dependencies = [ 418 + "jiff-static", 419 + "log", 420 + "portable-atomic", 421 + "portable-atomic-util", 422 + "serde", 423 + ] 424 + 425 + [[package]] 426 + name = "jiff-static" 427 + version = "0.2.15" 428 + source = "registry+https://github.com/rust-lang/crates.io-index" 429 + checksum = "03343451ff899767262ec32146f6d559dd759fdadf42ff0e227c7c48f72594b4" 430 + dependencies = [ 431 + "proc-macro2", 432 + "quote", 433 + "syn 2.0.106", 434 + ] 435 + 436 + [[package]] 437 + name = "libc" 438 + version = "0.2.176" 439 + source = "registry+https://github.com/rust-lang/crates.io-index" 440 + checksum = "58f929b4d672ea937a23a1ab494143d968337a5f47e56d0815df1e0890ddf174" 441 + 442 + [[package]] 443 + name = "lock_api" 444 + version = "0.4.14" 445 + source = "registry+https://github.com/rust-lang/crates.io-index" 446 + checksum = "224399e74b87b5f3557511d98dff8b14089b3dadafcab6bb93eab67d3aace965" 447 + dependencies = [ 448 + "scopeguard", 449 + ] 450 + 451 + [[package]] 452 + name = "log" 453 + version = "0.4.28" 454 + source = "registry+https://github.com/rust-lang/crates.io-index" 455 + checksum = "34080505efa8e45a4b816c349525ebe327ceaa8559756f0356cba97ef3bf7432" 456 + 457 + [[package]] 458 + name = "match-lookup" 459 + version = "0.1.1" 460 + source = "registry+https://github.com/rust-lang/crates.io-index" 461 + checksum = "1265724d8cb29dbbc2b0f06fffb8bf1a8c0cf73a78eede9ba73a4a66c52a981e" 462 + dependencies = [ 463 + "proc-macro2", 464 + "quote", 465 + "syn 1.0.109", 466 + ] 467 + 468 + [[package]] 469 + name = "memchr" 470 + version = "2.7.6" 471 + source = "registry+https://github.com/rust-lang/crates.io-index" 472 + checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273" 473 + 474 + [[package]] 475 + name = "miniz_oxide" 476 + version = "0.8.9" 477 + source = "registry+https://github.com/rust-lang/crates.io-index" 478 + checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316" 479 + dependencies = [ 480 + "adler2", 481 + ] 482 + 483 + [[package]] 484 + name = "mio" 485 + version = "1.0.4" 486 + source = "registry+https://github.com/rust-lang/crates.io-index" 487 + checksum = "78bed444cc8a2160f01cbcf811ef18cac863ad68ae8ca62092e8db51d51c761c" 488 + dependencies = [ 489 + "libc", 490 + "wasi", 491 + "windows-sys 0.59.0", 492 + ] 493 + 494 + [[package]] 495 + name = "multibase" 496 + version = "0.9.2" 497 + source = "registry+https://github.com/rust-lang/crates.io-index" 498 + checksum = "8694bb4835f452b0e3bb06dbebb1d6fc5385b6ca1caf2e55fd165c042390ec77" 499 + dependencies = [ 500 + "base-x", 501 + "base256emoji", 502 + "data-encoding", 503 + "data-encoding-macro", 504 + ] 505 + 506 + [[package]] 507 + name = "multihash" 508 + version = "0.19.3" 509 + source = "registry+https://github.com/rust-lang/crates.io-index" 510 + checksum = "6b430e7953c29dd6a09afc29ff0bb69c6e306329ee6794700aee27b76a1aea8d" 511 + dependencies = [ 512 + "core2", 513 + "serde", 514 + "unsigned-varint 0.8.0", 515 + ] 516 + 517 + [[package]] 518 + name = "object" 519 + version = "0.37.3" 520 + source = "registry+https://github.com/rust-lang/crates.io-index" 521 + checksum = "ff76201f031d8863c38aa7f905eca4f53abbfa15f609db4277d44cd8938f33fe" 522 + dependencies = [ 523 + "memchr", 524 + ] 525 + 526 + [[package]] 527 + name = "once_cell_polyfill" 528 + version = "1.70.1" 529 + source = "registry+https://github.com/rust-lang/crates.io-index" 530 + checksum = "a4895175b425cb1f87721b59f0f286c2092bd4af812243672510e1ac53e2e0ad" 531 + 532 + [[package]] 533 + name = "parking_lot" 534 + version = "0.12.5" 535 + source = "registry+https://github.com/rust-lang/crates.io-index" 536 + checksum = "93857453250e3077bd71ff98b6a65ea6621a19bb0f559a85248955ac12c45a1a" 537 + dependencies = [ 538 + "lock_api", 539 + "parking_lot_core", 540 + ] 541 + 542 + [[package]] 543 + name = "parking_lot_core" 544 + version = "0.9.12" 545 + source = "registry+https://github.com/rust-lang/crates.io-index" 546 + checksum = "2621685985a2ebf1c516881c026032ac7deafcda1a2c9b7850dc81e3dfcb64c1" 547 + dependencies = [ 548 + "cfg-if", 549 + "libc", 550 + "redox_syscall", 551 + "smallvec", 552 + "windows-link", 553 + ] 554 + 555 + [[package]] 556 + name = "pin-project-lite" 557 + version = "0.2.16" 558 + source = "registry+https://github.com/rust-lang/crates.io-index" 559 + checksum = "3b3cff922bd51709b605d9ead9aa71031d81447142d828eb4a6eba76fe619f9b" 560 + 561 + [[package]] 562 + name = "pin-utils" 563 + version = "0.1.0" 564 + source = "registry+https://github.com/rust-lang/crates.io-index" 565 + checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" 566 + 567 + [[package]] 568 + name = "portable-atomic" 569 + version = "1.11.1" 570 + source = "registry+https://github.com/rust-lang/crates.io-index" 571 + checksum = "f84267b20a16ea918e43c6a88433c2d54fa145c92a811b5b047ccbe153674483" 572 + 573 + [[package]] 574 + name = "portable-atomic-util" 575 + version = "0.2.4" 576 + source = "registry+https://github.com/rust-lang/crates.io-index" 577 + checksum = "d8a2f0d8d040d7848a709caf78912debcc3f33ee4b3cac47d73d1e1069e83507" 578 + dependencies = [ 579 + "portable-atomic", 580 + ] 581 + 582 + [[package]] 583 + name = "proc-macro2" 584 + version = "1.0.101" 585 + source = "registry+https://github.com/rust-lang/crates.io-index" 586 + checksum = "89ae43fd86e4158d6db51ad8e2b80f313af9cc74f5c0e03ccb87de09998732de" 587 + dependencies = [ 588 + "unicode-ident", 589 + ] 590 + 591 + [[package]] 592 + name = "quote" 593 + version = "1.0.41" 594 + source = "registry+https://github.com/rust-lang/crates.io-index" 595 + checksum = "ce25767e7b499d1b604768e7cde645d14cc8584231ea6b295e9c9eb22c02e1d1" 596 + dependencies = [ 597 + "proc-macro2", 598 + ] 599 + 600 + [[package]] 601 + name = "redox_syscall" 602 + version = "0.5.18" 603 + source = "registry+https://github.com/rust-lang/crates.io-index" 604 + checksum = "ed2bf2547551a7053d6fdfafda3f938979645c44812fbfcda098faae3f1a362d" 605 + dependencies = [ 606 + "bitflags", 607 + ] 608 + 609 + [[package]] 610 + name = "regex" 611 + version = "1.11.3" 612 + source = "registry+https://github.com/rust-lang/crates.io-index" 613 + checksum = "8b5288124840bee7b386bc413c487869b360b2b4ec421ea56425128692f2a82c" 614 + dependencies = [ 615 + "aho-corasick", 616 + "memchr", 617 + "regex-automata", 618 + "regex-syntax", 619 + ] 620 + 621 + [[package]] 622 + name = "regex-automata" 623 + version = "0.4.11" 624 + source = "registry+https://github.com/rust-lang/crates.io-index" 625 + checksum = "833eb9ce86d40ef33cb1306d8accf7bc8ec2bfea4355cbdebb3df68b40925cad" 626 + dependencies = [ 627 + "aho-corasick", 628 + "memchr", 629 + "regex-syntax", 630 + ] 631 + 632 + [[package]] 633 + name = "regex-syntax" 634 + version = "0.8.6" 635 + source = "registry+https://github.com/rust-lang/crates.io-index" 636 + checksum = "caf4aa5b0f434c91fe5c7f1ecb6a5ece2130b02ad2a590589dda5146df959001" 637 + 638 + [[package]] 639 + name = "repo-stream" 640 + version = "0.1.0" 641 + dependencies = [ 642 + "clap", 643 + "env_logger", 644 + "ipld-core", 645 + "iroh-car", 646 + "log", 647 + "serde", 648 + "serde_ipld_dagcbor", 649 + "thiserror 2.0.17", 650 + "tokio", 651 + ] 652 + 653 + [[package]] 654 + name = "rustc-demangle" 655 + version = "0.1.26" 656 + source = "registry+https://github.com/rust-lang/crates.io-index" 657 + checksum = "56f7d92ca342cea22a06f2121d944b4fd82af56988c270852495420f961d4ace" 658 + 659 + [[package]] 660 + name = "scopeguard" 661 + version = "1.2.0" 662 + source = "registry+https://github.com/rust-lang/crates.io-index" 663 + checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" 664 + 665 + [[package]] 666 + name = "serde" 667 + version = "1.0.228" 668 + source = "registry+https://github.com/rust-lang/crates.io-index" 669 + checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" 670 + dependencies = [ 671 + "serde_core", 672 + "serde_derive", 673 + ] 674 + 675 + [[package]] 676 + name = "serde_bytes" 677 + version = "0.11.19" 678 + source = "registry+https://github.com/rust-lang/crates.io-index" 679 + checksum = "a5d440709e79d88e51ac01c4b72fc6cb7314017bb7da9eeff678aa94c10e3ea8" 680 + dependencies = [ 681 + "serde", 682 + "serde_core", 683 + ] 684 + 685 + [[package]] 686 + name = "serde_core" 687 + version = "1.0.228" 688 + source = "registry+https://github.com/rust-lang/crates.io-index" 689 + checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad" 690 + dependencies = [ 691 + "serde_derive", 692 + ] 693 + 694 + [[package]] 695 + name = "serde_derive" 696 + version = "1.0.228" 697 + source = "registry+https://github.com/rust-lang/crates.io-index" 698 + checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" 699 + dependencies = [ 700 + "proc-macro2", 701 + "quote", 702 + "syn 2.0.106", 703 + ] 704 + 705 + [[package]] 706 + name = "serde_ipld_dagcbor" 707 + version = "0.6.4" 708 + source = "registry+https://github.com/rust-lang/crates.io-index" 709 + checksum = "46182f4f08349a02b45c998ba3215d3f9de826246ba02bb9dddfe9a2a2100778" 710 + dependencies = [ 711 + "cbor4ii", 712 + "ipld-core", 713 + "scopeguard", 714 + "serde", 715 + ] 716 + 717 + [[package]] 718 + name = "signal-hook-registry" 719 + version = "1.4.6" 720 + source = "registry+https://github.com/rust-lang/crates.io-index" 721 + checksum = "b2a4719bff48cee6b39d12c020eeb490953ad2443b7055bd0b21fca26bd8c28b" 722 + dependencies = [ 723 + "libc", 724 + ] 725 + 726 + [[package]] 727 + name = "slab" 728 + version = "0.4.11" 729 + source = "registry+https://github.com/rust-lang/crates.io-index" 730 + checksum = "7a2ae44ef20feb57a68b23d846850f861394c2e02dc425a50098ae8c90267589" 731 + 732 + [[package]] 733 + name = "smallvec" 734 + version = "1.15.1" 735 + source = "registry+https://github.com/rust-lang/crates.io-index" 736 + checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" 737 + 738 + [[package]] 739 + name = "socket2" 740 + version = "0.6.0" 741 + source = "registry+https://github.com/rust-lang/crates.io-index" 742 + checksum = "233504af464074f9d066d7b5416c5f9b894a5862a6506e306f7b816cdd6f1807" 743 + dependencies = [ 744 + "libc", 745 + "windows-sys 0.59.0", 746 + ] 747 + 748 + [[package]] 749 + name = "strsim" 750 + version = "0.11.1" 751 + source = "registry+https://github.com/rust-lang/crates.io-index" 752 + checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" 753 + 754 + [[package]] 755 + name = "syn" 756 + version = "1.0.109" 757 + source = "registry+https://github.com/rust-lang/crates.io-index" 758 + checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" 759 + dependencies = [ 760 + "proc-macro2", 761 + "quote", 762 + "unicode-ident", 763 + ] 764 + 765 + [[package]] 766 + name = "syn" 767 + version = "2.0.106" 768 + source = "registry+https://github.com/rust-lang/crates.io-index" 769 + checksum = "ede7c438028d4436d71104916910f5bb611972c5cfd7f89b8300a8186e6fada6" 770 + dependencies = [ 771 + "proc-macro2", 772 + "quote", 773 + "unicode-ident", 774 + ] 775 + 776 + [[package]] 777 + name = "thiserror" 778 + version = "1.0.69" 779 + source = "registry+https://github.com/rust-lang/crates.io-index" 780 + checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52" 781 + dependencies = [ 782 + "thiserror-impl 1.0.69", 783 + ] 784 + 785 + [[package]] 786 + name = "thiserror" 787 + version = "2.0.17" 788 + source = "registry+https://github.com/rust-lang/crates.io-index" 789 + checksum = "f63587ca0f12b72a0600bcba1d40081f830876000bb46dd2337a3051618f4fc8" 790 + dependencies = [ 791 + "thiserror-impl 2.0.17", 792 + ] 793 + 794 + [[package]] 795 + name = "thiserror-impl" 796 + version = "1.0.69" 797 + source = "registry+https://github.com/rust-lang/crates.io-index" 798 + checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" 799 + dependencies = [ 800 + "proc-macro2", 801 + "quote", 802 + "syn 2.0.106", 803 + ] 804 + 805 + [[package]] 806 + name = "thiserror-impl" 807 + version = "2.0.17" 808 + source = "registry+https://github.com/rust-lang/crates.io-index" 809 + checksum = "3ff15c8ecd7de3849db632e14d18d2571fa09dfc5ed93479bc4485c7a517c913" 810 + dependencies = [ 811 + "proc-macro2", 812 + "quote", 813 + "syn 2.0.106", 814 + ] 815 + 816 + [[package]] 817 + name = "tokio" 818 + version = "1.47.1" 819 + source = "registry+https://github.com/rust-lang/crates.io-index" 820 + checksum = "89e49afdadebb872d3145a5638b59eb0691ea23e46ca484037cfab3b76b95038" 821 + dependencies = [ 822 + "backtrace", 823 + "bytes", 824 + "io-uring", 825 + "libc", 826 + "mio", 827 + "parking_lot", 828 + "pin-project-lite", 829 + "signal-hook-registry", 830 + "slab", 831 + "socket2", 832 + "tokio-macros", 833 + "windows-sys 0.59.0", 834 + ] 835 + 836 + [[package]] 837 + name = "tokio-macros" 838 + version = "2.5.0" 839 + source = "registry+https://github.com/rust-lang/crates.io-index" 840 + checksum = "6e06d43f1345a3bcd39f6a56dbb7dcab2ba47e68e8ac134855e7e2bdbaf8cab8" 841 + dependencies = [ 842 + "proc-macro2", 843 + "quote", 844 + "syn 2.0.106", 845 + ] 846 + 847 + [[package]] 848 + name = "unicode-ident" 849 + version = "1.0.19" 850 + source = "registry+https://github.com/rust-lang/crates.io-index" 851 + checksum = "f63a545481291138910575129486daeaf8ac54aee4387fe7906919f7830c7d9d" 852 + 853 + [[package]] 854 + name = "unsigned-varint" 855 + version = "0.7.2" 856 + source = "registry+https://github.com/rust-lang/crates.io-index" 857 + checksum = "6889a77d49f1f013504cec6bf97a2c730394adedaeb1deb5ea08949a50541105" 858 + 859 + [[package]] 860 + name = "unsigned-varint" 861 + version = "0.8.0" 862 + source = "registry+https://github.com/rust-lang/crates.io-index" 863 + checksum = "eb066959b24b5196ae73cb057f45598450d2c5f71460e98c49b738086eff9c06" 864 + 865 + [[package]] 866 + name = "utf8parse" 867 + version = "0.2.2" 868 + source = "registry+https://github.com/rust-lang/crates.io-index" 869 + checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" 870 + 871 + [[package]] 872 + name = "wasi" 873 + version = "0.11.1+wasi-snapshot-preview1" 874 + source = "registry+https://github.com/rust-lang/crates.io-index" 875 + checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" 876 + 877 + [[package]] 878 + name = "windows-link" 879 + version = "0.2.1" 880 + source = "registry+https://github.com/rust-lang/crates.io-index" 881 + checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" 882 + 883 + [[package]] 884 + name = "windows-sys" 885 + version = "0.59.0" 886 + source = "registry+https://github.com/rust-lang/crates.io-index" 887 + checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" 888 + dependencies = [ 889 + "windows-targets 0.52.6", 890 + ] 891 + 892 + [[package]] 893 + name = "windows-sys" 894 + version = "0.60.2" 895 + source = "registry+https://github.com/rust-lang/crates.io-index" 896 + checksum = "f2f500e4d28234f72040990ec9d39e3a6b950f9f22d3dba18416c35882612bcb" 897 + dependencies = [ 898 + "windows-targets 0.53.5", 899 + ] 900 + 901 + [[package]] 902 + name = "windows-targets" 903 + version = "0.52.6" 904 + source = "registry+https://github.com/rust-lang/crates.io-index" 905 + checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" 906 + dependencies = [ 907 + "windows_aarch64_gnullvm 0.52.6", 908 + "windows_aarch64_msvc 0.52.6", 909 + "windows_i686_gnu 0.52.6", 910 + "windows_i686_gnullvm 0.52.6", 911 + "windows_i686_msvc 0.52.6", 912 + "windows_x86_64_gnu 0.52.6", 913 + "windows_x86_64_gnullvm 0.52.6", 914 + "windows_x86_64_msvc 0.52.6", 915 + ] 916 + 917 + [[package]] 918 + name = "windows-targets" 919 + version = "0.53.5" 920 + source = "registry+https://github.com/rust-lang/crates.io-index" 921 + checksum = "4945f9f551b88e0d65f3db0bc25c33b8acea4d9e41163edf90dcd0b19f9069f3" 922 + dependencies = [ 923 + "windows-link", 924 + "windows_aarch64_gnullvm 0.53.1", 925 + "windows_aarch64_msvc 0.53.1", 926 + "windows_i686_gnu 0.53.1", 927 + "windows_i686_gnullvm 0.53.1", 928 + "windows_i686_msvc 0.53.1", 929 + "windows_x86_64_gnu 0.53.1", 930 + "windows_x86_64_gnullvm 0.53.1", 931 + "windows_x86_64_msvc 0.53.1", 932 + ] 933 + 934 + [[package]] 935 + name = "windows_aarch64_gnullvm" 936 + version = "0.52.6" 937 + source = "registry+https://github.com/rust-lang/crates.io-index" 938 + checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" 939 + 940 + [[package]] 941 + name = "windows_aarch64_gnullvm" 942 + version = "0.53.1" 943 + source = "registry+https://github.com/rust-lang/crates.io-index" 944 + checksum = "a9d8416fa8b42f5c947f8482c43e7d89e73a173cead56d044f6a56104a6d1b53" 945 + 946 + [[package]] 947 + name = "windows_aarch64_msvc" 948 + version = "0.52.6" 949 + source = "registry+https://github.com/rust-lang/crates.io-index" 950 + checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" 951 + 952 + [[package]] 953 + name = "windows_aarch64_msvc" 954 + version = "0.53.1" 955 + source = "registry+https://github.com/rust-lang/crates.io-index" 956 + checksum = "b9d782e804c2f632e395708e99a94275910eb9100b2114651e04744e9b125006" 957 + 958 + [[package]] 959 + name = "windows_i686_gnu" 960 + version = "0.52.6" 961 + source = "registry+https://github.com/rust-lang/crates.io-index" 962 + checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" 963 + 964 + [[package]] 965 + name = "windows_i686_gnu" 966 + version = "0.53.1" 967 + source = "registry+https://github.com/rust-lang/crates.io-index" 968 + checksum = "960e6da069d81e09becb0ca57a65220ddff016ff2d6af6a223cf372a506593a3" 969 + 970 + [[package]] 971 + name = "windows_i686_gnullvm" 972 + version = "0.52.6" 973 + source = "registry+https://github.com/rust-lang/crates.io-index" 974 + checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" 975 + 976 + [[package]] 977 + name = "windows_i686_gnullvm" 978 + version = "0.53.1" 979 + source = "registry+https://github.com/rust-lang/crates.io-index" 980 + checksum = "fa7359d10048f68ab8b09fa71c3daccfb0e9b559aed648a8f95469c27057180c" 981 + 982 + [[package]] 983 + name = "windows_i686_msvc" 984 + version = "0.52.6" 985 + source = "registry+https://github.com/rust-lang/crates.io-index" 986 + checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" 987 + 988 + [[package]] 989 + name = "windows_i686_msvc" 990 + version = "0.53.1" 991 + source = "registry+https://github.com/rust-lang/crates.io-index" 992 + checksum = "1e7ac75179f18232fe9c285163565a57ef8d3c89254a30685b57d83a38d326c2" 993 + 994 + [[package]] 995 + name = "windows_x86_64_gnu" 996 + version = "0.52.6" 997 + source = "registry+https://github.com/rust-lang/crates.io-index" 998 + checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" 999 + 1000 + [[package]] 1001 + name = "windows_x86_64_gnu" 1002 + version = "0.53.1" 1003 + source = "registry+https://github.com/rust-lang/crates.io-index" 1004 + checksum = "9c3842cdd74a865a8066ab39c8a7a473c0778a3f29370b5fd6b4b9aa7df4a499" 1005 + 1006 + [[package]] 1007 + name = "windows_x86_64_gnullvm" 1008 + version = "0.52.6" 1009 + source = "registry+https://github.com/rust-lang/crates.io-index" 1010 + checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" 1011 + 1012 + [[package]] 1013 + name = "windows_x86_64_gnullvm" 1014 + version = "0.53.1" 1015 + source = "registry+https://github.com/rust-lang/crates.io-index" 1016 + checksum = "0ffa179e2d07eee8ad8f57493436566c7cc30ac536a3379fdf008f47f6bb7ae1" 1017 + 1018 + [[package]] 1019 + name = "windows_x86_64_msvc" 1020 + version = "0.52.6" 1021 + source = "registry+https://github.com/rust-lang/crates.io-index" 1022 + checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" 1023 + 1024 + [[package]] 1025 + name = "windows_x86_64_msvc" 1026 + version = "0.53.1" 1027 + source = "registry+https://github.com/rust-lang/crates.io-index" 1028 + checksum = "d6bbff5f0aada427a1e5a6da5f1f98158182f26556f345ac9e04d36d0ebed650"
+18
Cargo.toml
···
··· 1 + [package] 2 + name = "repo-stream" 3 + version = "0.1.0" 4 + edition = "2024" 5 + 6 + [dependencies] 7 + ipld-core = { version = "0.4.2", features = ["serde"] } 8 + iroh-car = "0.5.1" 9 + log = "0.4.28" 10 + serde = { version = "1.0.228", features = ["derive"] } 11 + serde_ipld_dagcbor = "0.6.4" 12 + thiserror = "2.0.17" 13 + tokio = "1.47.1" 14 + 15 + [dev-dependencies] 16 + clap = { version = "4.5.48", features = ["derive"] } 17 + env_logger = "0.11.8" 18 + tokio = { version = "1.47.1", features = ["full"] }
+23
examples/read-file/main.rs
···
··· 1 + extern crate repo_stream; 2 + use clap::Parser; 3 + use std::path::PathBuf; 4 + 5 + type Result<T> = std::result::Result<T, Box<dyn std::error::Error>>; 6 + 7 + #[derive(Debug, Parser)] 8 + struct Args { 9 + #[arg()] file: PathBuf, 10 + } 11 + 12 + #[tokio::main] 13 + async fn main() -> Result<()> { 14 + env_logger::init(); 15 + 16 + let Args { file } = Args::parse(); 17 + let reader = tokio::fs::File::open(file).await?; 18 + 19 + println!("hello!"); 20 + repo_stream::drive::drive(reader).await?; 21 + 22 + Ok(()) 23 + }
+13
readme.md
···
··· 1 + # repo-stream 2 + 3 + an AsyncRead for atproto MSTs in CAR files 4 + 5 + - tries to walk and emit the MST *while streaming in the CAR* 6 + - drops intermediate mst blocks after reading to reduce total memory 7 + - user-provided transform function on record blocks from IPLD 8 + 9 + future work: 10 + - flush to disk if needed (sqlite? redb?) https://bsky.app/profile/divy.zone/post/3m2mf3jqx3k2w 11 + - either just generally to handle huge CARs, or as a fallback when streaming fails 12 + 13 + redb has an in-memory backend, so it would be possible to *always* use it for block caching. user can choose if they want to allow disk or just do memory, and then "spilling" from the cache to disk would be mostly free?
+109
src/drive.rs
···
··· 1 + use ipld_core::ipld::Ipld; 2 + use tokio::io::AsyncRead; 3 + use iroh_car::CarReader; 4 + use std::collections::HashMap; 5 + use ipld_core::cid::Cid; 6 + 7 + use crate::mst::Commit; 8 + use crate::walk::{Walker, Step, Trip}; 9 + 10 + #[derive(Debug, thiserror::Error)] 11 + pub enum DriveError { 12 + #[error("Failed to initialize CarReader: {0}")] 13 + CarReader(#[from] iroh_car::Error), 14 + #[error("CAR file requires a root to be present")] 15 + MissingRoot, 16 + #[error("Failed to decode commit block: {0}")] 17 + BadCommit(Box<dyn std::error::Error>), 18 + #[error("Failed to decode record block: {0}")] 19 + BadRecord(Box<dyn std::error::Error>), 20 + #[error("The Commit block reference by the root was not found")] 21 + MissingCommit, 22 + #[error("Failed to walk the mst tree: {0}")] 23 + Tripped(#[from] Trip), 24 + } 25 + 26 + 27 + pub async fn drive<R: AsyncRead + Unpin>(reader: R) -> Result<(), DriveError> { 28 + let mut reader = CarReader::new(reader).await?; 29 + 30 + let root = reader 31 + .header() 32 + .roots() 33 + .first() 34 + .ok_or(DriveError::MissingRoot)? 35 + .clone(); 36 + log::debug!("root: {root:?}"); 37 + 38 + // one day, 39 + // https://github.com/bluesky-social/proposals/tree/main/0006-sync-iteration#streaming-car-processing 40 + 41 + // block buffers 42 + let mut blocks: HashMap::<Cid, Vec<u8>> = HashMap::new(); 43 + 44 + // stage 1: try to parse out the commit block, buffering other blocks until 45 + // we find it 46 + let mut commit = None; 47 + while let Some((cid, data)) = reader.next_block().await? { 48 + if cid == root { 49 + let c: Commit = serde_ipld_dagcbor::from_slice(&data) 50 + .map_err(|e| DriveError::BadCommit(e.into()))?; 51 + commit = Some(c); 52 + break; 53 + } 54 + blocks.insert(cid, data); 55 + }; 56 + 57 + // we either broke out or read all the blocks without finding the commit... 58 + let commit = commit.ok_or(DriveError::MissingCommit)?; 59 + 60 + log::debug!("got the commit: {commit:?}"); 61 + 62 + // broke out! found it! yay! and with the commit we should know the tree 63 + // root, so we can start walking as we go now. 64 + let mut walker = Walker::new(commit.data); 65 + let mut n = 0; 66 + 'outer: loop { 67 + // walk as far as we can, then stream in more blocks 68 + let mut m = 0; 69 + loop { 70 + match walker.walk(&mut blocks)? { 71 + Step::Rest => { 72 + log::trace!("walker is resting, get another block"); 73 + break; 74 + } 75 + Step::Finish => { 76 + log::trace!("walker finished"); 77 + break 'outer; 78 + } 79 + Step::Step { rkey, data } => { 80 + let rkey = String::from_utf8(rkey); 81 + let record: Ipld = serde_ipld_dagcbor::from_slice(&data) 82 + .map_err(|e| DriveError::BadRecord(e.into()))?; 83 + log::info!("found {rkey:?} => {record:?}"); 84 + } 85 + } 86 + m += 1; 87 + if m > 1000 { 88 + log::error!("ran out of inner loop time, breaking"); 89 + break 'outer; 90 + }; 91 + } 92 + 93 + let Some((cid, data)) = reader.next_block().await? else { 94 + log::warn!("no more data to stream in, but ig walker didn't finish?"); 95 + break; 96 + }; 97 + blocks.insert(cid, data); 98 + 99 + n += 1; 100 + if n > 1000 { 101 + log::error!("ran out of outer loop time, breaking"); 102 + break 'outer; 103 + }; 104 + } 105 + 106 + log::info!("done! bye!"); 107 + 108 + Ok(()) 109 + }
+3
src/lib.rs
···
··· 1 + pub mod drive; 2 + pub mod mst; 3 + pub mod walk;
+90
src/mst.rs
···
··· 1 + //! Low-level types for parsing raw atproto MST CARs 2 + //! 3 + //! The primary aim is to work through the **tree** structure. Non-node blocks 4 + //! are left as raw bytes, for upper levels to parse into DAG-CBOR or whatever. 5 + 6 + use ipld_core::ipld::Ipld; 7 + use ipld_core::cid::Cid; 8 + use serde::Deserialize; 9 + 10 + 11 + /// The top-level data object in a repository's tree is a signed commit. 12 + #[derive(Debug, Deserialize)] 13 + // #[serde(deny_unknown_fields)] 14 + pub struct Commit { 15 + /// the account DID associated with the repo, in strictly normalized form 16 + /// (eg, lowercase as appropriate) 17 + pub did: String, 18 + /// fixed value of 3 for this repo format version 19 + pub version: u64, 20 + /// pointer to the top of the repo contents tree structure (MST) 21 + pub data: Cid, 22 + /// revision of the repo, used as a logical clock. 23 + /// 24 + /// TID format. Must increase monotonically. Recommend using current 25 + /// timestamp as TID; rev values in the "future" (beyond a fudge factor) 26 + /// should be ignored and not processed 27 + pub rev: String, 28 + /// pointer (by hash) to a previous commit object for this repository. 29 + /// 30 + /// Could be used to create a chain of history, but largely unused (included 31 + /// for v2 backwards compatibility). In version 3 repos, this field must 32 + /// exist in the CBOR object, but is virtually always null. NOTE: previously 33 + /// specified as nullable and optional, but this caused interoperability 34 + /// issues. 35 + pub prev: Option<Cid>, 36 + /// cryptographic signature of this commit, as raw bytes 37 + pub sig: ipld_core::ipld::Ipld, // TODO (vec<u8> fails with Mismatch { expect_major: 4, byte: 88 }) 38 + } 39 + 40 + /// MST node data schema 41 + #[derive(Debug, Deserialize)] 42 + #[serde(deny_unknown_fields)] 43 + pub struct Node { 44 + /// link to sub-tree Node on a lower level and with all keys sorting before 45 + /// keys at this node 46 + #[serde(rename = "l")] 47 + pub left: Option<Cid>, 48 + /// ordered list of TreeEntry objects 49 + /// 50 + /// atproto MSTs have a fanout of 4, so there can be max 4 entries. 51 + #[serde(rename = "e")] 52 + pub entries: Vec<Entry>, // maybe we can do [Option<Entry>; 4]? 53 + } 54 + 55 + impl Node { 56 + /// Check if a node has any entries 57 + /// 58 + /// An empty repository with no records is represented as a single MST node 59 + /// with an empty array of entries. This is the only situation in which a 60 + /// tree may contain an empty leaf node which does not either contain keys 61 + /// ("entries") or point to a sub-tree containing entries. 62 + /// 63 + /// TODO: to me this is slightly unclear with respect to `l` (ask someone). 64 + /// ...is that what "The top of the tree must not be a an empty node which 65 + /// only points to a sub-tree." is referring to? 66 + pub fn is_empty(&self) -> bool { 67 + self.left.is_none() && self.entries.is_empty() 68 + } 69 + } 70 + 71 + /// TreeEntry object 72 + #[derive(Debug, Deserialize)] 73 + #[serde(deny_unknown_fields)] 74 + pub struct Entry { 75 + /// count of bytes shared with previous TreeEntry in this Node (if any) 76 + #[serde(rename = "p")] 77 + pub prefix_len: usize, 78 + /// remainder of key for this TreeEntry, after "prefixlen" have been removed 79 + #[serde(rename = "k")] 80 + pub keysuffix: Ipld, // can we String this here? 81 + /// link to the record data (CBOR) for this entry 82 + #[serde(rename = "v")] 83 + pub value: Cid, 84 + /// link to a sub-tree Node at a lower level 85 + /// 86 + /// the lower level must have keys sorting after this TreeEntry's key (to 87 + /// the "right"), but before the next TreeEntry's key in this Node (if any) 88 + #[serde(rename = "t")] 89 + pub tree: Option<Cid>, 90 + }
+124
src/walk.rs
···
··· 1 + //! Depth-first MST traversal 2 + 3 + use ipld_core::ipld::Ipld; 4 + use crate::mst::Node; 5 + use std::collections::HashMap; 6 + use ipld_core::cid::Cid; 7 + 8 + #[derive(Debug, thiserror::Error)] 9 + pub enum Trip { 10 + #[error("empty mst nodes are not allowed")] 11 + NodeEmpty, 12 + #[error("Failed to decode commit block: {0}")] 13 + BadCommit(Box<dyn std::error::Error>), 14 + } 15 + 16 + #[derive(Debug)] 17 + pub enum Step { 18 + Rest, 19 + Finish, 20 + Step { 21 + rkey: Vec<u8>, 22 + data: Vec<u8> 23 + }, 24 + } 25 + 26 + #[derive(Debug)] 27 + enum Need { 28 + Node(Cid), 29 + Record { 30 + rkey: Vec<u8>, 31 + cid: Cid, 32 + }, 33 + AcutallyDone, 34 + } 35 + 36 + fn needs_from_node(node: Node) -> Vec<Need> { 37 + let mut out = vec![]; 38 + if let Some(left_cid) = node.left { 39 + out.push(Need::Node(left_cid)); 40 + } 41 + let mut prefix = vec![]; 42 + for (i, entry) in node.entries.into_iter().enumerate() { 43 + let suffix = match entry.keysuffix { 44 + Ipld::Bytes(data) => data, 45 + _ => panic!("booo"), 46 + }; 47 + let mut rkey = Vec::with_capacity(prefix.len() + suffix.len()); 48 + rkey.extend_from_slice(&prefix); 49 + rkey.extend_from_slice(&suffix); 50 + if i == 0 { 51 + prefix.extend_from_slice(&suffix); 52 + } 53 + out.push(Need::Record { rkey, cid: entry.value }); 54 + if let Some(child_cid) = entry.tree { 55 + out.push(Need::Node(child_cid.clone())); 56 + } 57 + } 58 + // stack is right-to-left, for our left-to-right traversal 59 + out.reverse(); 60 + out 61 + } 62 + 63 + 64 + #[derive(Debug)] 65 + pub struct Walker { 66 + current: Need, 67 + stack: Vec<Need>, 68 + } 69 + 70 + impl Walker { 71 + pub fn new(tree_root_cid: Cid) -> Self { 72 + Self { 73 + current: Need::Node(tree_root_cid), 74 + stack: Vec::new(), 75 + } 76 + } 77 + 78 + pub fn walk(&mut self, blocks: &mut HashMap<Cid, Vec<u8>>) -> Result<Step, Trip> { 79 + loop { 80 + match &mut self.current { 81 + Need::Node(cid) => { 82 + log::trace!("need node {cid:?}"); 83 + let Some(block) = blocks.remove(&cid) else { 84 + log::trace!("node not found, resting"); 85 + return Ok(Step::Rest); 86 + }; 87 + let node = serde_ipld_dagcbor::from_slice::<Node>(&block) 88 + .map_err(|e| Trip::BadCommit(e.into()))?; 89 + let mut needs = needs_from_node(node); 90 + self.stack.append(&mut needs); 91 + if let Some(need) = self.stack.pop() { 92 + log::trace!("found a need from the stack {need:?}"); 93 + self.current = need; 94 + } else { 95 + log::trace!("no more needs from stack, ig we are done?"); 96 + return Ok(Step::Finish); 97 + } 98 + } 99 + Need::Record { rkey, cid } => { 100 + log::trace!("need record {cid:?}"); 101 + let Some(data) = blocks.get(&cid) else { 102 + log::trace!("record block not found, resting"); 103 + return Ok(Step::Rest); 104 + }; 105 + let rkey = rkey.to_vec(); 106 + let data = data.to_vec(); 107 + if let Some(next) = self.stack.pop() { 108 + log::trace!("updated current from stack"); 109 + self.current = next; 110 + } else { 111 + log::trace!("nothing left on the stack, making us done"); 112 + self.current = Need::AcutallyDone; 113 + } 114 + log::trace!("providing a block as a step"); 115 + return Ok(Step::Step { rkey, data }); 116 + } 117 + Need::AcutallyDone => { 118 + log::trace!("tried to walk but we're actually done."); 119 + return Ok(Step::Finish); 120 + } 121 + } 122 + } 123 + } 124 + }