Fast and robust atproto CAR file processing in rust

Compare changes

Choose any two refs to compare.

Changed files
+452 -1772
benches
car-samples
examples
disk-read-file
read-file
src
tests
+180 -964
Cargo.lock
··· 3 3 version = 4 4 4 5 5 [[package]] 6 - name = "addr2line" 7 - version = "0.25.1" 8 - source = "registry+https://github.com/rust-lang/crates.io-index" 9 - checksum = "1b5d307320b3181d6d7954e663bd7c774a838b8220fe0593c86d9fb09f498b4b" 10 - dependencies = [ 11 - "gimli", 12 - ] 13 - 14 - [[package]] 15 - name = "adler2" 16 - version = "2.0.1" 17 - source = "registry+https://github.com/rust-lang/crates.io-index" 18 - checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa" 19 - 20 - [[package]] 21 - name = "aho-corasick" 22 - version = "1.1.3" 23 - source = "registry+https://github.com/rust-lang/crates.io-index" 24 - checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" 25 - dependencies = [ 26 - "memchr", 27 - ] 28 - 29 - [[package]] 30 - name = "anes" 31 - version = "0.1.6" 32 - source = "registry+https://github.com/rust-lang/crates.io-index" 33 - checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" 34 - 35 - [[package]] 36 6 name = "anstream" 37 7 version = "0.6.21" 38 8 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 68 38 source = "registry+https://github.com/rust-lang/crates.io-index" 69 39 checksum = "9e231f6134f61b71076a3eab506c379d4f36122f2af15a9ff04415ea4c3339e2" 70 40 dependencies = [ 71 - "windows-sys 0.60.2", 41 + "windows-sys", 72 42 ] 73 43 74 44 [[package]] ··· 79 49 dependencies = [ 80 50 "anstyle", 81 51 "once_cell_polyfill", 82 - "windows-sys 0.60.2", 83 - ] 84 - 85 - [[package]] 86 - name = "anyhow" 87 - version = "1.0.100" 88 - source = "registry+https://github.com/rust-lang/crates.io-index" 89 - checksum = "a23eb6b1614318a8071c9b2521f36b424b2c83db5eb3a0fead4a6c0809af6e61" 90 - 91 - [[package]] 92 - name = "autocfg" 93 - version = "1.5.0" 94 - source = "registry+https://github.com/rust-lang/crates.io-index" 95 - checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" 96 - 97 - [[package]] 98 - name = "backtrace" 99 - version = "0.3.76" 100 - source = "registry+https://github.com/rust-lang/crates.io-index" 101 - checksum = "bb531853791a215d7c62a30daf0dde835f381ab5de4589cfe7c649d2cbe92bd6" 102 - dependencies = [ 103 - "addr2line", 104 - "cfg-if", 105 - "libc", 106 - "miniz_oxide", 107 - "object", 108 - "rustc-demangle", 109 - "windows-link", 110 - ] 111 - 112 - [[package]] 113 - name = "base-x" 114 - version = "0.2.11" 115 - source = "registry+https://github.com/rust-lang/crates.io-index" 116 - checksum = "4cbbc9d0964165b47557570cce6c952866c2678457aca742aafc9fb771d30270" 117 - 118 - [[package]] 119 - name = "base256emoji" 120 - version = "1.0.2" 121 - source = "registry+https://github.com/rust-lang/crates.io-index" 122 - checksum = "b5e9430d9a245a77c92176e649af6e275f20839a48389859d1661e9a128d077c" 123 - dependencies = [ 124 - "const-str", 125 - "match-lookup", 52 + "windows-sys", 126 53 ] 127 54 128 55 [[package]] ··· 132 59 checksum = "2261d10cca569e4643e526d8dc2e62e433cc8aba21ab764233731f8d369bf394" 133 60 134 61 [[package]] 135 - name = "bumpalo" 136 - version = "3.19.0" 137 - source = "registry+https://github.com/rust-lang/crates.io-index" 138 - checksum = "46c5e41b57b8bba42a04676d81cb89e9ee8e859a1a66f80a5a72e1cb76b34d43" 139 - 140 - [[package]] 141 - name = "bytes" 142 - version = "1.10.1" 143 - source = "registry+https://github.com/rust-lang/crates.io-index" 144 - checksum = "d71b6127be86fdcfddb610f7182ac57211d4b18a3e9c82eb2d17662f2227ad6a" 145 - 146 - [[package]] 147 - name = "cast" 148 - version = "0.3.0" 62 + name = "byteorder-lite" 63 + version = "0.1.0" 149 64 source = "registry+https://github.com/rust-lang/crates.io-index" 150 - checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" 65 + checksum = "8f1fe948ff07f4bd06c30984e69f5b4899c516a3ef74f34df92a2df2ab535495" 151 66 152 67 [[package]] 153 - name = "cbor4ii" 154 - version = "0.2.14" 68 + name = "byteview" 69 + version = "0.10.0" 155 70 source = "registry+https://github.com/rust-lang/crates.io-index" 156 - checksum = "b544cf8c89359205f4f990d0e6f3828db42df85b5dac95d09157a250eb0749c4" 157 - dependencies = [ 158 - "serde", 159 - ] 71 + checksum = "dda4398f387cc6395a3e93b3867cd9abda914c97a0b344d1eefb2e5c51785fca" 160 72 161 73 [[package]] 162 74 name = "cfg-if" ··· 165 77 checksum = "2fd1289c04a9ea8cb22300a459a72a385d7c73d3259e2ed7dcb2af674838cfa9" 166 78 167 79 [[package]] 168 - name = "ciborium" 169 - version = "0.2.2" 170 - source = "registry+https://github.com/rust-lang/crates.io-index" 171 - checksum = "42e69ffd6f0917f5c029256a24d0161db17cea3997d185db0d35926308770f0e" 172 - dependencies = [ 173 - "ciborium-io", 174 - "ciborium-ll", 175 - "serde", 176 - ] 177 - 178 - [[package]] 179 - name = "ciborium-io" 180 - version = "0.2.2" 181 - source = "registry+https://github.com/rust-lang/crates.io-index" 182 - checksum = "05afea1e0a06c9be33d539b876f1ce3692f4afea2cb41f740e7743225ed1c757" 183 - 184 - [[package]] 185 - name = "ciborium-ll" 186 - version = "0.2.2" 187 - source = "registry+https://github.com/rust-lang/crates.io-index" 188 - checksum = "57663b653d948a338bfb3eeba9bb2fd5fcfaecb9e199e87e1eda4d9e8b240fd9" 189 - dependencies = [ 190 - "ciborium-io", 191 - "half", 192 - ] 193 - 194 - [[package]] 195 - name = "cid" 196 - version = "0.11.1" 197 - source = "registry+https://github.com/rust-lang/crates.io-index" 198 - checksum = "3147d8272e8fa0ccd29ce51194dd98f79ddfb8191ba9e3409884e751798acf3a" 199 - dependencies = [ 200 - "core2", 201 - "multibase", 202 - "multihash", 203 - "serde", 204 - "serde_bytes", 205 - "unsigned-varint 0.8.0", 206 - ] 207 - 208 - [[package]] 209 80 name = "clap" 210 81 version = "4.5.48" 211 82 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 236 107 "heck", 237 108 "proc-macro2", 238 109 "quote", 239 - "syn 2.0.106", 110 + "syn", 240 111 ] 241 112 242 113 [[package]] ··· 252 123 checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75" 253 124 254 125 [[package]] 255 - name = "const-str" 256 - version = "0.4.3" 257 - source = "registry+https://github.com/rust-lang/crates.io-index" 258 - checksum = "2f421161cb492475f1661ddc9815a745a1c894592070661180fdec3d4872e9c3" 259 - 260 - [[package]] 261 - name = "core2" 262 - version = "0.4.0" 263 - source = "registry+https://github.com/rust-lang/crates.io-index" 264 - checksum = "b49ba7ef1ad6107f8824dbe97de947cbaac53c44e7f9756a1fba0d37c1eec505" 265 - dependencies = [ 266 - "memchr", 267 - ] 268 - 269 - [[package]] 270 - name = "criterion" 271 - version = "0.7.0" 272 - source = "registry+https://github.com/rust-lang/crates.io-index" 273 - checksum = "e1c047a62b0cc3e145fa84415a3191f628e980b194c2755aa12300a4e6cbd928" 274 - dependencies = [ 275 - "anes", 276 - "cast", 277 - "ciborium", 278 - "clap", 279 - "criterion-plot", 280 - "itertools", 281 - "num-traits", 282 - "oorandom", 283 - "plotters", 284 - "rayon", 285 - "regex", 286 - "serde", 287 - "serde_json", 288 - "tinytemplate", 289 - "tokio", 290 - "walkdir", 291 - ] 292 - 293 - [[package]] 294 - name = "criterion-plot" 295 - version = "0.6.0" 126 + name = "compare" 127 + version = "0.0.6" 296 128 source = "registry+https://github.com/rust-lang/crates.io-index" 297 - checksum = "9b1bcc0dc7dfae599d84ad0b1a55f80cde8af3725da8313b528da95ef783e338" 298 - dependencies = [ 299 - "cast", 300 - "itertools", 301 - ] 129 + checksum = "ea0095f6103c2a8b44acd6fd15960c801dafebf02e21940360833e0673f48ba7" 302 130 303 131 [[package]] 304 - name = "crossbeam-deque" 305 - version = "0.8.6" 132 + name = "crossbeam-epoch" 133 + version = "0.9.18" 306 134 source = "registry+https://github.com/rust-lang/crates.io-index" 307 - checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51" 135 + checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" 308 136 dependencies = [ 309 - "crossbeam-epoch", 310 137 "crossbeam-utils", 311 138 ] 312 139 313 140 [[package]] 314 - name = "crossbeam-epoch" 315 - version = "0.9.18" 141 + name = "crossbeam-skiplist" 142 + version = "0.1.3" 316 143 source = "registry+https://github.com/rust-lang/crates.io-index" 317 - checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" 144 + checksum = "df29de440c58ca2cc6e587ec3d22347551a32435fbde9d2bff64e78a9ffa151b" 318 145 dependencies = [ 146 + "crossbeam-epoch", 319 147 "crossbeam-utils", 320 148 ] 321 149 ··· 326 154 checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" 327 155 328 156 [[package]] 329 - name = "crunchy" 330 - version = "0.2.4" 157 + name = "dashmap" 158 + version = "6.1.0" 331 159 source = "registry+https://github.com/rust-lang/crates.io-index" 332 - checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5" 333 - 334 - [[package]] 335 - name = "data-encoding" 336 - version = "2.9.0" 337 - source = "registry+https://github.com/rust-lang/crates.io-index" 338 - checksum = "2a2330da5de22e8a3cb63252ce2abb30116bf5265e89c0e01bc17015ce30a476" 339 - 340 - [[package]] 341 - name = "data-encoding-macro" 342 - version = "0.1.18" 343 - source = "registry+https://github.com/rust-lang/crates.io-index" 344 - checksum = "47ce6c96ea0102f01122a185683611bd5ac8d99e62bc59dd12e6bda344ee673d" 160 + checksum = "5041cc499144891f3790297212f32a74fb938e5136a14943f338ef9e0ae276cf" 345 161 dependencies = [ 346 - "data-encoding", 347 - "data-encoding-macro-internal", 162 + "cfg-if", 163 + "crossbeam-utils", 164 + "hashbrown 0.14.5", 165 + "lock_api", 166 + "once_cell", 167 + "parking_lot_core", 348 168 ] 349 169 350 170 [[package]] 351 - name = "data-encoding-macro-internal" 352 - version = "0.1.16" 171 + name = "enum_dispatch" 172 + version = "0.3.13" 353 173 source = "registry+https://github.com/rust-lang/crates.io-index" 354 - checksum = "8d162beedaa69905488a8da94f5ac3edb4dd4788b732fadb7bd120b2625c1976" 174 + checksum = "aa18ce2bc66555b3218614519ac839ddb759a7d6720732f979ef8d13be147ecd" 355 175 dependencies = [ 356 - "data-encoding", 357 - "syn 2.0.106", 176 + "once_cell", 177 + "proc-macro2", 178 + "quote", 179 + "syn", 358 180 ] 359 181 360 182 [[package]] 361 - name = "either" 362 - version = "1.15.0" 183 + name = "equivalent" 184 + version = "1.0.2" 363 185 source = "registry+https://github.com/rust-lang/crates.io-index" 364 - checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" 365 - 366 - [[package]] 367 - name = "env_filter" 368 - version = "0.1.3" 369 - source = "registry+https://github.com/rust-lang/crates.io-index" 370 - checksum = "186e05a59d4c50738528153b83b0b0194d3a29507dfec16eccd4b342903397d0" 371 - dependencies = [ 372 - "log", 373 - "regex", 374 - ] 375 - 376 - [[package]] 377 - name = "env_logger" 378 - version = "0.11.8" 379 - source = "registry+https://github.com/rust-lang/crates.io-index" 380 - checksum = "13c863f0904021b108aa8b2f55046443e6b1ebde8fd4a15c399893aae4fa069f" 381 - dependencies = [ 382 - "anstream", 383 - "anstyle", 384 - "env_filter", 385 - "jiff", 386 - "log", 387 - ] 388 - 389 - [[package]] 390 - name = "futures" 391 - version = "0.3.31" 392 - source = "registry+https://github.com/rust-lang/crates.io-index" 393 - checksum = "65bc07b1a8bc7c85c5f2e110c476c7389b4554ba72af57d8445ea63a576b0876" 394 - dependencies = [ 395 - "futures-channel", 396 - "futures-core", 397 - "futures-executor", 398 - "futures-io", 399 - "futures-sink", 400 - "futures-task", 401 - "futures-util", 402 - ] 186 + checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" 403 187 404 188 [[package]] 405 - name = "futures-channel" 406 - version = "0.3.31" 189 + name = "errno" 190 + version = "0.3.14" 407 191 source = "registry+https://github.com/rust-lang/crates.io-index" 408 - checksum = "2dff15bf788c671c1934e366d07e30c1814a8ef514e1af724a602e8a2fbe1b10" 192 + checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" 409 193 dependencies = [ 410 - "futures-core", 411 - "futures-sink", 194 + "libc", 195 + "windows-sys", 412 196 ] 413 197 414 198 [[package]] 415 - name = "futures-core" 416 - version = "0.3.31" 199 + name = "fastrand" 200 + version = "2.3.0" 417 201 source = "registry+https://github.com/rust-lang/crates.io-index" 418 - checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e" 202 + checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" 419 203 420 204 [[package]] 421 - name = "futures-executor" 422 - version = "0.3.31" 205 + name = "fjall" 206 + version = "3.0.1" 423 207 source = "registry+https://github.com/rust-lang/crates.io-index" 424 - checksum = "1e28d1d997f585e54aebc3f97d39e72338912123a67330d723fdbb564d646c9f" 208 + checksum = "4f69637c02d38ad1b0f003101d0195a60368130aa17d9ef78b1557d265a22093" 425 209 dependencies = [ 426 - "futures-core", 427 - "futures-task", 428 - "futures-util", 210 + "byteorder-lite", 211 + "byteview", 212 + "dashmap", 213 + "flume", 214 + "log", 215 + "lsm-tree", 216 + "lz4_flex", 217 + "tempfile", 218 + "xxhash-rust", 429 219 ] 430 220 431 221 [[package]] 432 - name = "futures-io" 433 - version = "0.3.31" 434 - source = "registry+https://github.com/rust-lang/crates.io-index" 435 - checksum = "9e5c1b78ca4aae1ac06c48a526a655760685149f0d465d21f37abfe57ce075c6" 436 - 437 - [[package]] 438 - name = "futures-macro" 439 - version = "0.3.31" 222 + name = "flume" 223 + version = "0.12.0" 440 224 source = "registry+https://github.com/rust-lang/crates.io-index" 441 - checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650" 225 + checksum = "5e139bc46ca777eb5efaf62df0ab8cc5fd400866427e56c68b22e414e53bd3be" 442 226 dependencies = [ 443 - "proc-macro2", 444 - "quote", 445 - "syn 2.0.106", 227 + "spin", 446 228 ] 447 229 448 230 [[package]] 449 - name = "futures-sink" 450 - version = "0.3.31" 451 - source = "registry+https://github.com/rust-lang/crates.io-index" 452 - checksum = "e575fab7d1e0dcb8d0c7bcf9a63ee213816ab51902e6d244a95819acacf1d4f7" 453 - 454 - [[package]] 455 - name = "futures-task" 456 - version = "0.3.31" 231 + name = "getrandom" 232 + version = "0.3.3" 457 233 source = "registry+https://github.com/rust-lang/crates.io-index" 458 - checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988" 459 - 460 - [[package]] 461 - name = "futures-util" 462 - version = "0.3.31" 463 - source = "registry+https://github.com/rust-lang/crates.io-index" 464 - checksum = "9fa08315bb612088cc391249efdc3bc77536f16c91f6cf495e6fbe85b20a4a81" 234 + checksum = "26145e563e54f2cadc477553f1ec5ee650b00862f0a58bcd12cbdc5f0ea2d2f4" 465 235 dependencies = [ 466 - "futures-channel", 467 - "futures-core", 468 - "futures-io", 469 - "futures-macro", 470 - "futures-sink", 471 - "futures-task", 472 - "memchr", 473 - "pin-project-lite", 474 - "pin-utils", 475 - "slab", 236 + "cfg-if", 237 + "libc", 238 + "r-efi", 239 + "wasi", 476 240 ] 477 241 478 242 [[package]] 479 - name = "gimli" 480 - version = "0.32.3" 243 + name = "hashbrown" 244 + version = "0.14.5" 481 245 source = "registry+https://github.com/rust-lang/crates.io-index" 482 - checksum = "e629b9b98ef3dd8afe6ca2bd0f89306cec16d43d907889945bc5d6687f2f13c7" 246 + checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" 483 247 484 248 [[package]] 485 - name = "half" 486 - version = "2.7.0" 249 + name = "hashbrown" 250 + version = "0.16.1" 487 251 source = "registry+https://github.com/rust-lang/crates.io-index" 488 - checksum = "e54c115d4f30f52c67202f079c5f9d8b49db4691f460fdb0b4c2e838261b2ba5" 489 - dependencies = [ 490 - "cfg-if", 491 - "crunchy", 492 - "zerocopy", 493 - ] 252 + checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100" 494 253 495 254 [[package]] 496 255 name = "heck" ··· 499 258 checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" 500 259 501 260 [[package]] 502 - name = "io-uring" 503 - version = "0.7.10" 261 + name = "interval-heap" 262 + version = "0.0.5" 504 263 source = "registry+https://github.com/rust-lang/crates.io-index" 505 - checksum = "046fa2d4d00aea763528b4950358d0ead425372445dc8ff86312b3c69ff7727b" 506 - dependencies = [ 507 - "bitflags", 508 - "cfg-if", 509 - "libc", 510 - ] 511 - 512 - [[package]] 513 - name = "ipld-core" 514 - version = "0.4.2" 515 - source = "registry+https://github.com/rust-lang/crates.io-index" 516 - checksum = "104718b1cc124d92a6d01ca9c9258a7df311405debb3408c445a36452f9bf8db" 264 + checksum = "11274e5e8e89b8607cfedc2910b6626e998779b48a019151c7604d0adcb86ac6" 517 265 dependencies = [ 518 - "cid", 519 - "serde", 520 - "serde_bytes", 521 - ] 522 - 523 - [[package]] 524 - name = "iroh-car" 525 - version = "0.5.1" 526 - source = "registry+https://github.com/rust-lang/crates.io-index" 527 - checksum = "cb7f8cd4cb9aa083fba8b52e921764252d0b4dcb1cd6d120b809dbfe1106e81a" 528 - dependencies = [ 529 - "anyhow", 530 - "cid", 531 - "futures", 532 - "serde", 533 - "serde_ipld_dagcbor", 534 - "thiserror 1.0.69", 535 - "tokio", 536 - "unsigned-varint 0.7.2", 266 + "compare", 537 267 ] 538 268 539 269 [[package]] ··· 543 273 checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" 544 274 545 275 [[package]] 546 - name = "itertools" 547 - version = "0.13.0" 548 - source = "registry+https://github.com/rust-lang/crates.io-index" 549 - checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186" 550 - dependencies = [ 551 - "either", 552 - ] 553 - 554 - [[package]] 555 - name = "itoa" 556 - version = "1.0.15" 557 - source = "registry+https://github.com/rust-lang/crates.io-index" 558 - checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" 559 - 560 - [[package]] 561 - name = "jiff" 562 - version = "0.2.15" 563 - source = "registry+https://github.com/rust-lang/crates.io-index" 564 - checksum = "be1f93b8b1eb69c77f24bbb0afdf66f54b632ee39af40ca21c4365a1d7347e49" 565 - dependencies = [ 566 - "jiff-static", 567 - "log", 568 - "portable-atomic", 569 - "portable-atomic-util", 570 - "serde", 571 - ] 572 - 573 - [[package]] 574 - name = "jiff-static" 575 - version = "0.2.15" 576 - source = "registry+https://github.com/rust-lang/crates.io-index" 577 - checksum = "03343451ff899767262ec32146f6d559dd759fdadf42ff0e227c7c48f72594b4" 578 - dependencies = [ 579 - "proc-macro2", 580 - "quote", 581 - "syn 2.0.106", 582 - ] 583 - 584 - [[package]] 585 - name = "js-sys" 586 - version = "0.3.81" 587 - source = "registry+https://github.com/rust-lang/crates.io-index" 588 - checksum = "ec48937a97411dcb524a265206ccd4c90bb711fca92b2792c407f268825b9305" 589 - dependencies = [ 590 - "once_cell", 591 - "wasm-bindgen", 592 - ] 593 - 594 - [[package]] 595 276 name = "libc" 596 277 version = "0.2.176" 597 278 source = "registry+https://github.com/rust-lang/crates.io-index" 598 279 checksum = "58f929b4d672ea937a23a1ab494143d968337a5f47e56d0815df1e0890ddf174" 280 + 281 + [[package]] 282 + name = "linux-raw-sys" 283 + version = "0.11.0" 284 + source = "registry+https://github.com/rust-lang/crates.io-index" 285 + checksum = "df1d3c3b53da64cf5760482273a98e575c651a67eec7f77df96b5b642de8f039" 599 286 600 287 [[package]] 601 288 name = "lock_api" ··· 613 300 checksum = "34080505efa8e45a4b816c349525ebe327ceaa8559756f0356cba97ef3bf7432" 614 301 615 302 [[package]] 616 - name = "match-lookup" 617 - version = "0.1.1" 303 + name = "lsm-tree" 304 + version = "3.0.1" 618 305 source = "registry+https://github.com/rust-lang/crates.io-index" 619 - checksum = "1265724d8cb29dbbc2b0f06fffb8bf1a8c0cf73a78eede9ba73a4a66c52a981e" 306 + checksum = "b875f1dfe14f557f805b167fb9b0fc54c5560c7a4bd6ae02535b2846f276a8cb" 620 307 dependencies = [ 621 - "proc-macro2", 622 - "quote", 623 - "syn 1.0.109", 624 - ] 625 - 626 - [[package]] 627 - name = "memchr" 628 - version = "2.7.6" 629 - source = "registry+https://github.com/rust-lang/crates.io-index" 630 - checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273" 631 - 632 - [[package]] 633 - name = "miniz_oxide" 634 - version = "0.8.9" 635 - source = "registry+https://github.com/rust-lang/crates.io-index" 636 - checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316" 637 - dependencies = [ 638 - "adler2", 308 + "byteorder-lite", 309 + "byteview", 310 + "crossbeam-skiplist", 311 + "enum_dispatch", 312 + "interval-heap", 313 + "log", 314 + "lz4_flex", 315 + "quick_cache", 316 + "rustc-hash", 317 + "self_cell", 318 + "sfa", 319 + "tempfile", 320 + "varint-rs", 321 + "xxhash-rust", 639 322 ] 640 323 641 324 [[package]] 642 - name = "mio" 643 - version = "1.0.4" 325 + name = "lz4_flex" 326 + version = "0.11.5" 644 327 source = "registry+https://github.com/rust-lang/crates.io-index" 645 - checksum = "78bed444cc8a2160f01cbcf811ef18cac863ad68ae8ca62092e8db51d51c761c" 328 + checksum = "08ab2867e3eeeca90e844d1940eab391c9dc5228783db2ed999acbc0a9ed375a" 646 329 dependencies = [ 647 - "libc", 648 - "wasi", 649 - "windows-sys 0.59.0", 650 - ] 651 - 652 - [[package]] 653 - name = "multibase" 654 - version = "0.9.2" 655 - source = "registry+https://github.com/rust-lang/crates.io-index" 656 - checksum = "8694bb4835f452b0e3bb06dbebb1d6fc5385b6ca1caf2e55fd165c042390ec77" 657 - dependencies = [ 658 - "base-x", 659 - "base256emoji", 660 - "data-encoding", 661 - "data-encoding-macro", 662 - ] 663 - 664 - [[package]] 665 - name = "multihash" 666 - version = "0.19.3" 667 - source = "registry+https://github.com/rust-lang/crates.io-index" 668 - checksum = "6b430e7953c29dd6a09afc29ff0bb69c6e306329ee6794700aee27b76a1aea8d" 669 - dependencies = [ 670 - "core2", 671 - "serde", 672 - "unsigned-varint 0.8.0", 673 - ] 674 - 675 - [[package]] 676 - name = "num-traits" 677 - version = "0.2.19" 678 - source = "registry+https://github.com/rust-lang/crates.io-index" 679 - checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" 680 - dependencies = [ 681 - "autocfg", 682 - ] 683 - 684 - [[package]] 685 - name = "object" 686 - version = "0.37.3" 687 - source = "registry+https://github.com/rust-lang/crates.io-index" 688 - checksum = "ff76201f031d8863c38aa7f905eca4f53abbfa15f609db4277d44cd8938f33fe" 689 - dependencies = [ 690 - "memchr", 330 + "twox-hash", 691 331 ] 692 332 693 333 [[package]] ··· 703 343 checksum = "a4895175b425cb1f87721b59f0f286c2092bd4af812243672510e1ac53e2e0ad" 704 344 705 345 [[package]] 706 - name = "oorandom" 707 - version = "11.1.5" 708 - source = "registry+https://github.com/rust-lang/crates.io-index" 709 - checksum = "d6790f58c7ff633d8771f42965289203411a5e5c68388703c06e14f24770b41e" 710 - 711 - [[package]] 712 - name = "parking_lot" 713 - version = "0.12.5" 714 - source = "registry+https://github.com/rust-lang/crates.io-index" 715 - checksum = "93857453250e3077bd71ff98b6a65ea6621a19bb0f559a85248955ac12c45a1a" 716 - dependencies = [ 717 - "lock_api", 718 - "parking_lot_core", 719 - ] 720 - 721 - [[package]] 722 346 name = "parking_lot_core" 723 347 version = "0.9.12" 724 348 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 732 356 ] 733 357 734 358 [[package]] 735 - name = "pin-project-lite" 736 - version = "0.2.16" 737 - source = "registry+https://github.com/rust-lang/crates.io-index" 738 - checksum = "3b3cff922bd51709b605d9ead9aa71031d81447142d828eb4a6eba76fe619f9b" 739 - 740 - [[package]] 741 - name = "pin-utils" 742 - version = "0.1.0" 743 - source = "registry+https://github.com/rust-lang/crates.io-index" 744 - checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" 745 - 746 - [[package]] 747 - name = "plotters" 748 - version = "0.3.7" 749 - source = "registry+https://github.com/rust-lang/crates.io-index" 750 - checksum = "5aeb6f403d7a4911efb1e33402027fc44f29b5bf6def3effcc22d7bb75f2b747" 751 - dependencies = [ 752 - "num-traits", 753 - "plotters-backend", 754 - "plotters-svg", 755 - "wasm-bindgen", 756 - "web-sys", 757 - ] 758 - 759 - [[package]] 760 - name = "plotters-backend" 761 - version = "0.3.7" 762 - source = "registry+https://github.com/rust-lang/crates.io-index" 763 - checksum = "df42e13c12958a16b3f7f4386b9ab1f3e7933914ecea48da7139435263a4172a" 764 - 765 - [[package]] 766 - name = "plotters-svg" 767 - version = "0.3.7" 359 + name = "proc-macro2" 360 + version = "1.0.101" 768 361 source = "registry+https://github.com/rust-lang/crates.io-index" 769 - checksum = "51bae2ac328883f7acdfea3d66a7c35751187f870bc81f94563733a154d7a670" 362 + checksum = "89ae43fd86e4158d6db51ad8e2b80f313af9cc74f5c0e03ccb87de09998732de" 770 363 dependencies = [ 771 - "plotters-backend", 364 + "unicode-ident", 772 365 ] 773 366 774 367 [[package]] 775 - name = "portable-atomic" 776 - version = "1.11.1" 777 - source = "registry+https://github.com/rust-lang/crates.io-index" 778 - checksum = "f84267b20a16ea918e43c6a88433c2d54fa145c92a811b5b047ccbe153674483" 779 - 780 - [[package]] 781 - name = "portable-atomic-util" 782 - version = "0.2.4" 368 + name = "quick_cache" 369 + version = "0.6.18" 783 370 source = "registry+https://github.com/rust-lang/crates.io-index" 784 - checksum = "d8a2f0d8d040d7848a709caf78912debcc3f33ee4b3cac47d73d1e1069e83507" 371 + checksum = "7ada44a88ef953a3294f6eb55d2007ba44646015e18613d2f213016379203ef3" 785 372 dependencies = [ 786 - "portable-atomic", 787 - ] 788 - 789 - [[package]] 790 - name = "proc-macro2" 791 - version = "1.0.101" 792 - source = "registry+https://github.com/rust-lang/crates.io-index" 793 - checksum = "89ae43fd86e4158d6db51ad8e2b80f313af9cc74f5c0e03ccb87de09998732de" 794 - dependencies = [ 795 - "unicode-ident", 373 + "equivalent", 374 + "hashbrown 0.16.1", 796 375 ] 797 376 798 377 [[package]] ··· 805 384 ] 806 385 807 386 [[package]] 808 - name = "rayon" 809 - version = "1.11.0" 810 - source = "registry+https://github.com/rust-lang/crates.io-index" 811 - checksum = "368f01d005bf8fd9b1206fb6fa653e6c4a81ceb1466406b81792d87c5677a58f" 812 - dependencies = [ 813 - "either", 814 - "rayon-core", 815 - ] 816 - 817 - [[package]] 818 - name = "rayon-core" 819 - version = "1.13.0" 387 + name = "r-efi" 388 + version = "5.3.0" 820 389 source = "registry+https://github.com/rust-lang/crates.io-index" 821 - checksum = "22e18b0f0062d30d4230b2e85ff77fdfe4326feb054b9783a3460d8435c8ab91" 822 - dependencies = [ 823 - "crossbeam-deque", 824 - "crossbeam-utils", 825 - ] 390 + checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" 826 391 827 392 [[package]] 828 393 name = "redox_syscall" ··· 834 399 ] 835 400 836 401 [[package]] 837 - name = "regex" 838 - version = "1.11.3" 839 - source = "registry+https://github.com/rust-lang/crates.io-index" 840 - checksum = "8b5288124840bee7b386bc413c487869b360b2b4ec421ea56425128692f2a82c" 841 - dependencies = [ 842 - "aho-corasick", 843 - "memchr", 844 - "regex-automata", 845 - "regex-syntax", 846 - ] 847 - 848 - [[package]] 849 - name = "regex-automata" 850 - version = "0.4.11" 851 - source = "registry+https://github.com/rust-lang/crates.io-index" 852 - checksum = "833eb9ce86d40ef33cb1306d8accf7bc8ec2bfea4355cbdebb3df68b40925cad" 853 - dependencies = [ 854 - "aho-corasick", 855 - "memchr", 856 - "regex-syntax", 857 - ] 858 - 859 - [[package]] 860 - name = "regex-syntax" 861 - version = "0.8.6" 862 - source = "registry+https://github.com/rust-lang/crates.io-index" 863 - checksum = "caf4aa5b0f434c91fe5c7f1ecb6a5ece2130b02ad2a590589dda5146df959001" 864 - 865 - [[package]] 866 402 name = "repo-stream" 867 - version = "0.1.0" 403 + version = "0.2.2" 868 404 dependencies = [ 869 405 "clap", 870 - "criterion", 871 - "env_logger", 872 - "futures", 873 - "futures-core", 874 - "ipld-core", 875 - "iroh-car", 876 - "log", 877 - "multibase", 878 - "serde", 879 - "serde_bytes", 880 - "serde_ipld_dagcbor", 881 - "thiserror 2.0.17", 882 - "tokio", 406 + "fjall", 883 407 ] 884 408 885 409 [[package]] 886 - name = "rustc-demangle" 887 - version = "0.1.26" 888 - source = "registry+https://github.com/rust-lang/crates.io-index" 889 - checksum = "56f7d92ca342cea22a06f2121d944b4fd82af56988c270852495420f961d4ace" 890 - 891 - [[package]] 892 - name = "rustversion" 893 - version = "1.0.22" 410 + name = "rustc-hash" 411 + version = "2.1.1" 894 412 source = "registry+https://github.com/rust-lang/crates.io-index" 895 - checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" 896 - 897 - [[package]] 898 - name = "ryu" 899 - version = "1.0.20" 900 - source = "registry+https://github.com/rust-lang/crates.io-index" 901 - checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f" 413 + checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d" 902 414 903 415 [[package]] 904 - name = "same-file" 905 - version = "1.0.6" 416 + name = "rustix" 417 + version = "1.1.2" 906 418 source = "registry+https://github.com/rust-lang/crates.io-index" 907 - checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" 419 + checksum = "cd15f8a2c5551a84d56efdc1cd049089e409ac19a3072d5037a17fd70719ff3e" 908 420 dependencies = [ 909 - "winapi-util", 421 + "bitflags", 422 + "errno", 423 + "libc", 424 + "linux-raw-sys", 425 + "windows-sys", 910 426 ] 911 427 912 428 [[package]] ··· 916 432 checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" 917 433 918 434 [[package]] 919 - name = "serde" 920 - version = "1.0.228" 921 - source = "registry+https://github.com/rust-lang/crates.io-index" 922 - checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" 923 - dependencies = [ 924 - "serde_core", 925 - "serde_derive", 926 - ] 927 - 928 - [[package]] 929 - name = "serde_bytes" 930 - version = "0.11.19" 931 - source = "registry+https://github.com/rust-lang/crates.io-index" 932 - checksum = "a5d440709e79d88e51ac01c4b72fc6cb7314017bb7da9eeff678aa94c10e3ea8" 933 - dependencies = [ 934 - "serde", 935 - "serde_core", 936 - ] 937 - 938 - [[package]] 939 - name = "serde_core" 940 - version = "1.0.228" 435 + name = "self_cell" 436 + version = "1.2.2" 941 437 source = "registry+https://github.com/rust-lang/crates.io-index" 942 - checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad" 943 - dependencies = [ 944 - "serde_derive", 945 - ] 438 + checksum = "b12e76d157a900eb52e81bc6e9f3069344290341720e9178cde2407113ac8d89" 946 439 947 440 [[package]] 948 - name = "serde_derive" 949 - version = "1.0.228" 441 + name = "sfa" 442 + version = "1.0.0" 950 443 source = "registry+https://github.com/rust-lang/crates.io-index" 951 - checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" 444 + checksum = "a1296838937cab56cd6c4eeeb8718ec777383700c33f060e2869867bd01d1175" 952 445 dependencies = [ 953 - "proc-macro2", 954 - "quote", 955 - "syn 2.0.106", 446 + "byteorder-lite", 447 + "log", 448 + "xxhash-rust", 956 449 ] 957 450 958 451 [[package]] 959 - name = "serde_ipld_dagcbor" 960 - version = "0.6.4" 961 - source = "registry+https://github.com/rust-lang/crates.io-index" 962 - checksum = "46182f4f08349a02b45c998ba3215d3f9de826246ba02bb9dddfe9a2a2100778" 963 - dependencies = [ 964 - "cbor4ii", 965 - "ipld-core", 966 - "scopeguard", 967 - "serde", 968 - ] 969 - 970 - [[package]] 971 - name = "serde_json" 972 - version = "1.0.145" 973 - source = "registry+https://github.com/rust-lang/crates.io-index" 974 - checksum = "402a6f66d8c709116cf22f558eab210f5a50187f702eb4d7e5ef38d9a7f1c79c" 975 - dependencies = [ 976 - "itoa", 977 - "memchr", 978 - "ryu", 979 - "serde", 980 - "serde_core", 981 - ] 982 - 983 - [[package]] 984 - name = "signal-hook-registry" 985 - version = "1.4.6" 986 - source = "registry+https://github.com/rust-lang/crates.io-index" 987 - checksum = "b2a4719bff48cee6b39d12c020eeb490953ad2443b7055bd0b21fca26bd8c28b" 988 - dependencies = [ 989 - "libc", 990 - ] 991 - 992 - [[package]] 993 - name = "slab" 994 - version = "0.4.11" 995 - source = "registry+https://github.com/rust-lang/crates.io-index" 996 - checksum = "7a2ae44ef20feb57a68b23d846850f861394c2e02dc425a50098ae8c90267589" 997 - 998 - [[package]] 999 452 name = "smallvec" 1000 453 version = "1.15.1" 1001 454 source = "registry+https://github.com/rust-lang/crates.io-index" 1002 455 checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" 1003 456 1004 457 [[package]] 1005 - name = "socket2" 1006 - version = "0.6.0" 458 + name = "spin" 459 + version = "0.9.8" 1007 460 source = "registry+https://github.com/rust-lang/crates.io-index" 1008 - checksum = "233504af464074f9d066d7b5416c5f9b894a5862a6506e306f7b816cdd6f1807" 461 + checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" 1009 462 dependencies = [ 1010 - "libc", 1011 - "windows-sys 0.59.0", 463 + "lock_api", 1012 464 ] 1013 465 1014 466 [[package]] ··· 1019 471 1020 472 [[package]] 1021 473 name = "syn" 1022 - version = "1.0.109" 1023 - source = "registry+https://github.com/rust-lang/crates.io-index" 1024 - checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" 1025 - dependencies = [ 1026 - "proc-macro2", 1027 - "quote", 1028 - "unicode-ident", 1029 - ] 1030 - 1031 - [[package]] 1032 - name = "syn" 1033 474 version = "2.0.106" 1034 475 source = "registry+https://github.com/rust-lang/crates.io-index" 1035 476 checksum = "ede7c438028d4436d71104916910f5bb611972c5cfd7f89b8300a8186e6fada6" ··· 1040 481 ] 1041 482 1042 483 [[package]] 1043 - name = "thiserror" 1044 - version = "1.0.69" 1045 - source = "registry+https://github.com/rust-lang/crates.io-index" 1046 - checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52" 1047 - dependencies = [ 1048 - "thiserror-impl 1.0.69", 1049 - ] 1050 - 1051 - [[package]] 1052 - name = "thiserror" 1053 - version = "2.0.17" 1054 - source = "registry+https://github.com/rust-lang/crates.io-index" 1055 - checksum = "f63587ca0f12b72a0600bcba1d40081f830876000bb46dd2337a3051618f4fc8" 1056 - dependencies = [ 1057 - "thiserror-impl 2.0.17", 1058 - ] 1059 - 1060 - [[package]] 1061 - name = "thiserror-impl" 1062 - version = "1.0.69" 1063 - source = "registry+https://github.com/rust-lang/crates.io-index" 1064 - checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" 1065 - dependencies = [ 1066 - "proc-macro2", 1067 - "quote", 1068 - "syn 2.0.106", 1069 - ] 1070 - 1071 - [[package]] 1072 - name = "thiserror-impl" 1073 - version = "2.0.17" 1074 - source = "registry+https://github.com/rust-lang/crates.io-index" 1075 - checksum = "3ff15c8ecd7de3849db632e14d18d2571fa09dfc5ed93479bc4485c7a517c913" 1076 - dependencies = [ 1077 - "proc-macro2", 1078 - "quote", 1079 - "syn 2.0.106", 1080 - ] 1081 - 1082 - [[package]] 1083 - name = "tinytemplate" 1084 - version = "1.2.1" 484 + name = "tempfile" 485 + version = "3.23.0" 1085 486 source = "registry+https://github.com/rust-lang/crates.io-index" 1086 - checksum = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc" 487 + checksum = "2d31c77bdf42a745371d260a26ca7163f1e0924b64afa0b688e61b5a9fa02f16" 1087 488 dependencies = [ 1088 - "serde", 1089 - "serde_json", 489 + "fastrand", 490 + "getrandom", 491 + "once_cell", 492 + "rustix", 493 + "windows-sys", 1090 494 ] 1091 495 1092 496 [[package]] 1093 - name = "tokio" 1094 - version = "1.47.1" 497 + name = "twox-hash" 498 + version = "2.1.2" 1095 499 source = "registry+https://github.com/rust-lang/crates.io-index" 1096 - checksum = "89e49afdadebb872d3145a5638b59eb0691ea23e46ca484037cfab3b76b95038" 1097 - dependencies = [ 1098 - "backtrace", 1099 - "bytes", 1100 - "io-uring", 1101 - "libc", 1102 - "mio", 1103 - "parking_lot", 1104 - "pin-project-lite", 1105 - "signal-hook-registry", 1106 - "slab", 1107 - "socket2", 1108 - "tokio-macros", 1109 - "windows-sys 0.59.0", 1110 - ] 1111 - 1112 - [[package]] 1113 - name = "tokio-macros" 1114 - version = "2.5.0" 1115 - source = "registry+https://github.com/rust-lang/crates.io-index" 1116 - checksum = "6e06d43f1345a3bcd39f6a56dbb7dcab2ba47e68e8ac134855e7e2bdbaf8cab8" 1117 - dependencies = [ 1118 - "proc-macro2", 1119 - "quote", 1120 - "syn 2.0.106", 1121 - ] 500 + checksum = "9ea3136b675547379c4bd395ca6b938e5ad3c3d20fad76e7fe85f9e0d011419c" 1122 501 1123 502 [[package]] 1124 503 name = "unicode-ident" ··· 1127 506 checksum = "f63a545481291138910575129486daeaf8ac54aee4387fe7906919f7830c7d9d" 1128 507 1129 508 [[package]] 1130 - name = "unsigned-varint" 1131 - version = "0.7.2" 1132 - source = "registry+https://github.com/rust-lang/crates.io-index" 1133 - checksum = "6889a77d49f1f013504cec6bf97a2c730394adedaeb1deb5ea08949a50541105" 1134 - 1135 - [[package]] 1136 - name = "unsigned-varint" 1137 - version = "0.8.0" 1138 - source = "registry+https://github.com/rust-lang/crates.io-index" 1139 - checksum = "eb066959b24b5196ae73cb057f45598450d2c5f71460e98c49b738086eff9c06" 1140 - 1141 - [[package]] 1142 509 name = "utf8parse" 1143 510 version = "0.2.2" 1144 511 source = "registry+https://github.com/rust-lang/crates.io-index" 1145 512 checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" 1146 513 1147 514 [[package]] 1148 - name = "walkdir" 1149 - version = "2.5.0" 515 + name = "varint-rs" 516 + version = "2.2.0" 1150 517 source = "registry+https://github.com/rust-lang/crates.io-index" 1151 - checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" 1152 - dependencies = [ 1153 - "same-file", 1154 - "winapi-util", 1155 - ] 518 + checksum = "8f54a172d0620933a27a4360d3db3e2ae0dd6cceae9730751a036bbf182c4b23" 1156 519 1157 520 [[package]] 1158 521 name = "wasi" 1159 - version = "0.11.1+wasi-snapshot-preview1" 1160 - source = "registry+https://github.com/rust-lang/crates.io-index" 1161 - checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" 1162 - 1163 - [[package]] 1164 - name = "wasm-bindgen" 1165 - version = "0.2.104" 1166 - source = "registry+https://github.com/rust-lang/crates.io-index" 1167 - checksum = "c1da10c01ae9f1ae40cbfac0bac3b1e724b320abfcf52229f80b547c0d250e2d" 1168 - dependencies = [ 1169 - "cfg-if", 1170 - "once_cell", 1171 - "rustversion", 1172 - "wasm-bindgen-macro", 1173 - "wasm-bindgen-shared", 1174 - ] 1175 - 1176 - [[package]] 1177 - name = "wasm-bindgen-backend" 1178 - version = "0.2.104" 1179 - source = "registry+https://github.com/rust-lang/crates.io-index" 1180 - checksum = "671c9a5a66f49d8a47345ab942e2cb93c7d1d0339065d4f8139c486121b43b19" 1181 - dependencies = [ 1182 - "bumpalo", 1183 - "log", 1184 - "proc-macro2", 1185 - "quote", 1186 - "syn 2.0.106", 1187 - "wasm-bindgen-shared", 1188 - ] 1189 - 1190 - [[package]] 1191 - name = "wasm-bindgen-macro" 1192 - version = "0.2.104" 1193 - source = "registry+https://github.com/rust-lang/crates.io-index" 1194 - checksum = "7ca60477e4c59f5f2986c50191cd972e3a50d8a95603bc9434501cf156a9a119" 1195 - dependencies = [ 1196 - "quote", 1197 - "wasm-bindgen-macro-support", 1198 - ] 1199 - 1200 - [[package]] 1201 - name = "wasm-bindgen-macro-support" 1202 - version = "0.2.104" 1203 - source = "registry+https://github.com/rust-lang/crates.io-index" 1204 - checksum = "9f07d2f20d4da7b26400c9f4a0511e6e0345b040694e8a75bd41d578fa4421d7" 1205 - dependencies = [ 1206 - "proc-macro2", 1207 - "quote", 1208 - "syn 2.0.106", 1209 - "wasm-bindgen-backend", 1210 - "wasm-bindgen-shared", 1211 - ] 1212 - 1213 - [[package]] 1214 - name = "wasm-bindgen-shared" 1215 - version = "0.2.104" 1216 - source = "registry+https://github.com/rust-lang/crates.io-index" 1217 - checksum = "bad67dc8b2a1a6e5448428adec4c3e84c43e561d8c9ee8a9e5aabeb193ec41d1" 1218 - dependencies = [ 1219 - "unicode-ident", 1220 - ] 1221 - 1222 - [[package]] 1223 - name = "web-sys" 1224 - version = "0.3.81" 522 + version = "0.14.7+wasi-0.2.4" 1225 523 source = "registry+https://github.com/rust-lang/crates.io-index" 1226 - checksum = "9367c417a924a74cae129e6a2ae3b47fabb1f8995595ab474029da749a8be120" 524 + checksum = "883478de20367e224c0090af9cf5f9fa85bed63a95c1abf3afc5c083ebc06e8c" 1227 525 dependencies = [ 1228 - "js-sys", 1229 - "wasm-bindgen", 526 + "wasip2", 1230 527 ] 1231 528 1232 529 [[package]] 1233 - name = "winapi-util" 1234 - version = "0.1.11" 530 + name = "wasip2" 531 + version = "1.0.1+wasi-0.2.4" 1235 532 source = "registry+https://github.com/rust-lang/crates.io-index" 1236 - checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" 533 + checksum = "0562428422c63773dad2c345a1882263bbf4d65cf3f42e90921f787ef5ad58e7" 1237 534 dependencies = [ 1238 - "windows-sys 0.60.2", 535 + "wit-bindgen", 1239 536 ] 1240 537 1241 538 [[package]] ··· 1246 543 1247 544 [[package]] 1248 545 name = "windows-sys" 1249 - version = "0.59.0" 1250 - source = "registry+https://github.com/rust-lang/crates.io-index" 1251 - checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" 1252 - dependencies = [ 1253 - "windows-targets 0.52.6", 1254 - ] 1255 - 1256 - [[package]] 1257 - name = "windows-sys" 1258 546 version = "0.60.2" 1259 547 source = "registry+https://github.com/rust-lang/crates.io-index" 1260 548 checksum = "f2f500e4d28234f72040990ec9d39e3a6b950f9f22d3dba18416c35882612bcb" 1261 549 dependencies = [ 1262 - "windows-targets 0.53.5", 1263 - ] 1264 - 1265 - [[package]] 1266 - name = "windows-targets" 1267 - version = "0.52.6" 1268 - source = "registry+https://github.com/rust-lang/crates.io-index" 1269 - checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" 1270 - dependencies = [ 1271 - "windows_aarch64_gnullvm 0.52.6", 1272 - "windows_aarch64_msvc 0.52.6", 1273 - "windows_i686_gnu 0.52.6", 1274 - "windows_i686_gnullvm 0.52.6", 1275 - "windows_i686_msvc 0.52.6", 1276 - "windows_x86_64_gnu 0.52.6", 1277 - "windows_x86_64_gnullvm 0.52.6", 1278 - "windows_x86_64_msvc 0.52.6", 550 + "windows-targets", 1279 551 ] 1280 552 1281 553 [[package]] ··· 1285 557 checksum = "4945f9f551b88e0d65f3db0bc25c33b8acea4d9e41163edf90dcd0b19f9069f3" 1286 558 dependencies = [ 1287 559 "windows-link", 1288 - "windows_aarch64_gnullvm 0.53.1", 1289 - "windows_aarch64_msvc 0.53.1", 1290 - "windows_i686_gnu 0.53.1", 1291 - "windows_i686_gnullvm 0.53.1", 1292 - "windows_i686_msvc 0.53.1", 1293 - "windows_x86_64_gnu 0.53.1", 1294 - "windows_x86_64_gnullvm 0.53.1", 1295 - "windows_x86_64_msvc 0.53.1", 560 + "windows_aarch64_gnullvm", 561 + "windows_aarch64_msvc", 562 + "windows_i686_gnu", 563 + "windows_i686_gnullvm", 564 + "windows_i686_msvc", 565 + "windows_x86_64_gnu", 566 + "windows_x86_64_gnullvm", 567 + "windows_x86_64_msvc", 1296 568 ] 1297 - 1298 - [[package]] 1299 - name = "windows_aarch64_gnullvm" 1300 - version = "0.52.6" 1301 - source = "registry+https://github.com/rust-lang/crates.io-index" 1302 - checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" 1303 569 1304 570 [[package]] 1305 571 name = "windows_aarch64_gnullvm" ··· 1309 575 1310 576 [[package]] 1311 577 name = "windows_aarch64_msvc" 1312 - version = "0.52.6" 1313 - source = "registry+https://github.com/rust-lang/crates.io-index" 1314 - checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" 1315 - 1316 - [[package]] 1317 - name = "windows_aarch64_msvc" 1318 578 version = "0.53.1" 1319 579 source = "registry+https://github.com/rust-lang/crates.io-index" 1320 580 checksum = "b9d782e804c2f632e395708e99a94275910eb9100b2114651e04744e9b125006" 1321 581 1322 582 [[package]] 1323 583 name = "windows_i686_gnu" 1324 - version = "0.52.6" 1325 - source = "registry+https://github.com/rust-lang/crates.io-index" 1326 - checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" 1327 - 1328 - [[package]] 1329 - name = "windows_i686_gnu" 1330 584 version = "0.53.1" 1331 585 source = "registry+https://github.com/rust-lang/crates.io-index" 1332 586 checksum = "960e6da069d81e09becb0ca57a65220ddff016ff2d6af6a223cf372a506593a3" 1333 - 1334 - [[package]] 1335 - name = "windows_i686_gnullvm" 1336 - version = "0.52.6" 1337 - source = "registry+https://github.com/rust-lang/crates.io-index" 1338 - checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" 1339 587 1340 588 [[package]] 1341 589 name = "windows_i686_gnullvm" ··· 1345 593 1346 594 [[package]] 1347 595 name = "windows_i686_msvc" 1348 - version = "0.52.6" 1349 - source = "registry+https://github.com/rust-lang/crates.io-index" 1350 - checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" 1351 - 1352 - [[package]] 1353 - name = "windows_i686_msvc" 1354 596 version = "0.53.1" 1355 597 source = "registry+https://github.com/rust-lang/crates.io-index" 1356 598 checksum = "1e7ac75179f18232fe9c285163565a57ef8d3c89254a30685b57d83a38d326c2" 1357 599 1358 600 [[package]] 1359 601 name = "windows_x86_64_gnu" 1360 - version = "0.52.6" 1361 - source = "registry+https://github.com/rust-lang/crates.io-index" 1362 - checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" 1363 - 1364 - [[package]] 1365 - name = "windows_x86_64_gnu" 1366 602 version = "0.53.1" 1367 603 source = "registry+https://github.com/rust-lang/crates.io-index" 1368 604 checksum = "9c3842cdd74a865a8066ab39c8a7a473c0778a3f29370b5fd6b4b9aa7df4a499" 1369 - 1370 - [[package]] 1371 - name = "windows_x86_64_gnullvm" 1372 - version = "0.52.6" 1373 - source = "registry+https://github.com/rust-lang/crates.io-index" 1374 - checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" 1375 605 1376 606 [[package]] 1377 607 name = "windows_x86_64_gnullvm" ··· 1381 611 1382 612 [[package]] 1383 613 name = "windows_x86_64_msvc" 1384 - version = "0.52.6" 1385 - source = "registry+https://github.com/rust-lang/crates.io-index" 1386 - checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" 1387 - 1388 - [[package]] 1389 - name = "windows_x86_64_msvc" 1390 614 version = "0.53.1" 1391 615 source = "registry+https://github.com/rust-lang/crates.io-index" 1392 616 checksum = "d6bbff5f0aada427a1e5a6da5f1f98158182f26556f345ac9e04d36d0ebed650" 1393 617 1394 618 [[package]] 1395 - name = "zerocopy" 1396 - version = "0.8.27" 619 + name = "wit-bindgen" 620 + version = "0.46.0" 1397 621 source = "registry+https://github.com/rust-lang/crates.io-index" 1398 - checksum = "0894878a5fa3edfd6da3f88c4805f4c8558e2b996227a3d864f47fe11e38282c" 1399 - dependencies = [ 1400 - "zerocopy-derive", 1401 - ] 622 + checksum = "f17a85883d4e6d00e8a97c586de764dabcc06133f7f1d55dce5cdc070ad7fe59" 1402 623 1403 624 [[package]] 1404 - name = "zerocopy-derive" 1405 - version = "0.8.27" 625 + name = "xxhash-rust" 626 + version = "0.8.15" 1406 627 source = "registry+https://github.com/rust-lang/crates.io-index" 1407 - checksum = "88d2b8d9c68ad2b9e4340d7832716a4d21a22a1154777ad56ea55c51a9cf3831" 1408 - dependencies = [ 1409 - "proc-macro2", 1410 - "quote", 1411 - "syn 2.0.106", 1412 - ] 628 + checksum = "fdd20c5420375476fbd4394763288da7eb0cc0b8c11deed431a91562af7335d3"
+4 -30
Cargo.toml
··· 1 1 [package] 2 2 name = "repo-stream" 3 - version = "0.1.0" 3 + version = "0.2.2" 4 4 edition = "2024" 5 5 license = "MIT OR Apache-2.0" 6 - description = "Fast and robust atproto CAR file processing in rust" 6 + description = "A robust CAR file -> MST walker for atproto" 7 + repository = "https://tangled.org/@microcosm.blue/repo-stream" 7 8 8 9 [dependencies] 9 - futures = "0.3.31" 10 - futures-core = "0.3.31" 11 - ipld-core = { version = "0.4.2", features = ["serde"] } 12 - iroh-car = "0.5.1" 13 - log = "0.4.28" 14 - multibase = "0.9.2" 15 - serde = { version = "1.0.228", features = ["derive"] } 16 - serde_bytes = "0.11.19" 17 - serde_ipld_dagcbor = "0.6.4" 18 - thiserror = "2.0.17" 19 - tokio = "1.47.1" 20 - 21 - [dev-dependencies] 10 + fjall = "3.0.1" 22 11 clap = { version = "4.5.48", features = ["derive"] } 23 - criterion = { version = "0.7.0", features = ["async_tokio"] } 24 - env_logger = "0.11.8" 25 - multibase = "0.9.2" 26 - tokio = { version = "1.47.1", features = ["full"] } 27 12 28 - [profile.profiling] 29 - inherits = "release" 30 - debug = true 31 - 32 - [[bench]] 33 - name = "non-huge-cars" 34 - harness = false 35 - 36 - [[bench]] 37 - name = "huge-car" 38 - harness = false
+12 -21
benches/huge-car.rs
··· 1 1 extern crate repo_stream; 2 - use futures::TryStreamExt; 3 - use iroh_car::CarReader; 4 - use std::convert::Infallible; 2 + use repo_stream::Driver; 5 3 use std::path::{Path, PathBuf}; 6 4 7 5 use criterion::{Criterion, criterion_group, criterion_main}; ··· 20 18 }); 21 19 } 22 20 23 - async fn drive_car(filename: impl AsRef<Path>) { 21 + async fn drive_car(filename: impl AsRef<Path>) -> usize { 24 22 let reader = tokio::fs::File::open(filename).await.unwrap(); 25 23 let reader = tokio::io::BufReader::new(reader); 26 - let reader = CarReader::new(reader).await.unwrap(); 27 24 28 - let root = reader 29 - .header() 30 - .roots() 31 - .first() 32 - .ok_or("missing root") 25 + let mut driver = match Driver::load_car(reader, |block| block.len(), 1024) 26 + .await 33 27 .unwrap() 34 - .clone(); 35 - 36 - let stream = std::pin::pin!(reader.stream()); 37 - 38 - let (_commit, v) = 39 - repo_stream::drive::Vehicle::init(root, stream, |block| Ok::<_, Infallible>(block.len())) 40 - .await 41 - .unwrap(); 42 - let mut record_stream = std::pin::pin!(v.stream()); 28 + { 29 + Driver::Memory(_, mem_driver) => mem_driver, 30 + Driver::Disk(_) => panic!("not doing disk for benchmark"), 31 + }; 43 32 44 - while let Some(_) = record_stream.try_next().await.unwrap() { 45 - // just here for the drive 33 + let mut n = 0; 34 + while let Some(pairs) = driver.next_chunk(256).await.unwrap() { 35 + n += pairs.len(); 46 36 } 37 + n 47 38 } 48 39 49 40 criterion_group!(benches, criterion_benchmark);
+16 -22
benches/non-huge-cars.rs
··· 1 1 extern crate repo_stream; 2 - use futures::TryStreamExt; 3 - use iroh_car::CarReader; 4 - use std::convert::Infallible; 2 + use repo_stream::Driver; 5 3 6 4 use criterion::{Criterion, criterion_group, criterion_main}; 7 5 6 + const EMPTY_CAR: &'static [u8] = include_bytes!("../car-samples/empty.car"); 8 7 const TINY_CAR: &'static [u8] = include_bytes!("../car-samples/tiny.car"); 9 8 const LITTLE_CAR: &'static [u8] = include_bytes!("../car-samples/little.car"); 10 9 const MIDSIZE_CAR: &'static [u8] = include_bytes!("../car-samples/midsize.car"); ··· 15 14 .build() 16 15 .expect("Creating runtime failed"); 17 16 17 + c.bench_function("empty-car", |b| { 18 + b.to_async(&rt).iter(async || drive_car(EMPTY_CAR).await) 19 + }); 18 20 c.bench_function("tiny-car", |b| { 19 21 b.to_async(&rt).iter(async || drive_car(TINY_CAR).await) 20 22 }); ··· 26 28 }); 27 29 } 28 30 29 - async fn drive_car(bytes: &[u8]) { 30 - let reader = CarReader::new(bytes).await.unwrap(); 31 - 32 - let root = reader 33 - .header() 34 - .roots() 35 - .first() 36 - .ok_or("missing root") 31 + async fn drive_car(bytes: &[u8]) -> usize { 32 + let mut driver = match Driver::load_car(bytes, |block| block.len(), 32) 33 + .await 37 34 .unwrap() 38 - .clone(); 39 - 40 - let stream = std::pin::pin!(reader.stream()); 35 + { 36 + Driver::Memory(_, mem_driver) => mem_driver, 37 + Driver::Disk(_) => panic!("not benching big cars here"), 38 + }; 41 39 42 - let (_commit, v) = 43 - repo_stream::drive::Vehicle::init(root, stream, |block| Ok::<_, Infallible>(block.len())) 44 - .await 45 - .unwrap(); 46 - let mut record_stream = std::pin::pin!(v.stream()); 47 - 48 - while let Some(_) = record_stream.try_next().await.unwrap() { 49 - // just here for the drive 40 + let mut n = 0; 41 + while let Some(pairs) = driver.next_chunk(256).await.unwrap() { 42 + n += pairs.len(); 50 43 } 44 + n 51 45 } 52 46 53 47 criterion_group!(benches, criterion_benchmark);
car-samples/empty.car

This is a binary file and will not be displayed.

+43
examples/disk-read-file/main.rs
··· 1 + use clap::Parser; 2 + use fjall::{Database, KeyspaceCreateOptions}; 3 + use std::{path::PathBuf, collections::BTreeMap}; 4 + 5 + #[derive(Debug, Parser)] 6 + struct Args { 7 + #[arg()] 8 + db_path: PathBuf, 9 + } 10 + 11 + fn main() -> Result<(), Box<dyn std::error::Error>> { 12 + let Args { db_path } = Args::parse(); 13 + 14 + let db = Database::builder(db_path).open()?; 15 + let ks = db.keyspace("z", KeyspaceCreateOptions::default)?; 16 + let mut seen_keys: BTreeMap<Vec<u8>, usize> = BTreeMap::default(); 17 + 18 + print!("writing..."); 19 + for i in 0..250_000_usize { 20 + let k = i.to_be_bytes().to_vec(); 21 + ks.insert(k.clone(), vec![0xAA; 256])?; 22 + seen_keys.insert(k, i); 23 + } 24 + 25 + println!(" done. checking keys..."); 26 + 27 + // remove every seen key that fjall actually has, to see what's left 28 + for guard in ks.iter() { 29 + seen_keys.remove(guard.key()?.as_ref()); 30 + } 31 + 32 + // report the result 33 + if seen_keys.len() == 0 { 34 + println!("[ OK ] all keys found"); 35 + } else { 36 + println!("[FAIL] fjall did not have all seen_keys:"); 37 + for (k, i) in seen_keys { 38 + println!(" insert #{i} missing, key bytes: {k:?}"); 39 + } 40 + } 41 + 42 + Ok(()) 43 + }
+18 -25
examples/read-file/main.rs
··· 1 + /*! 2 + Read a CAR file with in-memory processing 3 + */ 4 + 1 5 extern crate repo_stream; 2 6 use clap::Parser; 3 - use futures::TryStreamExt; 4 - use iroh_car::CarReader; 5 - use std::convert::Infallible; 7 + use repo_stream::{Driver, DriverBuilder}; 6 8 use std::path::PathBuf; 7 9 8 10 type Result<T> = std::result::Result<T, Box<dyn std::error::Error>>; ··· 21 23 let reader = tokio::fs::File::open(file).await?; 22 24 let reader = tokio::io::BufReader::new(reader); 23 25 24 - println!("hello!"); 25 - 26 - let reader = CarReader::new(reader).await?; 27 - 28 - let root = reader 29 - .header() 30 - .roots() 31 - .first() 32 - .ok_or("missing root")? 33 - .clone(); 34 - log::debug!("root: {root:?}"); 35 - 36 - // let stream = Box::pin(reader.stream()); 37 - let stream = std::pin::pin!(reader.stream()); 38 - 39 - let (commit, v) = 40 - repo_stream::drive::Vehicle::init(root, stream, |block| Ok::<_, Infallible>(block.len())) 41 - .await?; 42 - let mut record_stream = std::pin::pin!(v.stream()); 26 + let (commit, mut driver) = match DriverBuilder::new() 27 + .with_block_processor(|block| block.len()) 28 + .load_car(reader) 29 + .await? 30 + { 31 + Driver::Memory(commit, mem_driver) => (commit, mem_driver), 32 + Driver::Disk(_) => panic!("this example doesn't handle big CARs"), 33 + }; 43 34 44 35 log::info!("got commit: {commit:?}"); 45 36 46 - while let Some((rkey, _rec)) = record_stream.try_next().await? { 47 - log::info!("got {rkey:?}"); 37 + let mut n = 0; 38 + while let Some(pairs) = driver.next_chunk(256).await? { 39 + n += pairs.len(); 40 + // log::info!("got {rkey:?}"); 48 41 } 49 - log::info!("bye!"); 42 + log::info!("bye! total records={n}"); 50 43 51 44 Ok(()) 52 45 }
+70 -2
readme.md
··· 1 1 # repo-stream 2 2 3 - Fast and (aspirationally) robust atproto CAR file processing in rust 3 + A robust CAR file -> MST walker for atproto 4 + 5 + [![Crates.io][crates-badge]](https://crates.io/crates/repo-stream) 6 + [![Documentation][docs-badge]](https://docs.rs/repo-stream) 7 + [![Sponsor][sponsor-badge]](https://github.com/sponsors/uniphil) 8 + 9 + [crates-badge]: https://img.shields.io/crates/v/repo-stream.svg 10 + [docs-badge]: https://docs.rs/repo-stream/badge.svg 11 + [sponsor-badge]: https://img.shields.io/badge/at-microcosm-b820f9?labelColor=b820f9&logo=githubsponsors&logoColor=fff 12 + 13 + ```rust 14 + use repo_stream::{Driver, DriverBuilder, DriveError, DiskBuilder}; 15 + 16 + #[tokio::main] 17 + async fn main() -> Result<(), DriveError> { 18 + // repo-stream takes any AsyncRead as input, like a tokio::fs::File 19 + let reader = tokio::fs::File::open("repo.car".into()).await?; 20 + let reader = tokio::io::BufReader::new(reader); 21 + 22 + // example repo workload is simply counting the total record bytes 23 + let mut total_size = 0; 24 + 25 + match DriverBuilder::new() 26 + .with_mem_limit_mb(10) 27 + .with_block_processor(|rec| rec.len()) // block processing: just extract the raw record size 28 + .load_car(reader) 29 + .await? 30 + { 31 + 32 + // if all blocks fit within memory 33 + Driver::Memory(_commit, mut driver) => { 34 + while let Some(chunk) = driver.next_chunk(256).await? { 35 + for (_rkey, size) in chunk { 36 + total_size += size; 37 + } 38 + } 39 + }, 40 + 41 + // if the CAR was too big for in-memory processing 42 + Driver::Disk(paused) => { 43 + // set up a disk store we can spill to 44 + let store = DiskBuilder::new().open("some/path.db".into()).await?; 45 + // do the spilling, get back a (similar) driver 46 + let (_commit, mut driver) = paused.finish_loading(store).await?; 47 + 48 + while let Some(chunk) = driver.next_chunk(256).await? { 49 + for (_rkey, size) in chunk { 50 + total_size += size; 51 + } 52 + } 53 + 54 + // clean up the disk store (drop tables etc) 55 + driver.reset_store().await?; 56 + } 57 + }; 58 + println!("sum of size of all records: {total_size}"); 59 + Ok(()) 60 + } 61 + ``` 62 + 63 + more recent todo 64 + 65 + - [ ] get an *emtpy* car for the test suite 66 + - [x] implement a max size on disk limit 67 + 68 + 69 + ----- 70 + 71 + older stuff (to clean up): 4 72 5 73 6 74 current car processing times (records processed into their length usize, phil's dev machine): ··· 27 95 -> yeah the commit is returned from init 28 96 - [ ] spec compliance todos 29 97 - [x] assert that keys are ordered and fail if not 30 - - [ ] verify node mst depth from key (possibly pending [interop test fixes](https://github.com/bluesky-social/atproto-interop-tests/issues/5)) 98 + - [x] verify node mst depth from key (possibly pending [interop test fixes](https://github.com/bluesky-social/atproto-interop-tests/issues/5)) 31 99 - [ ] performance todos 32 100 - [x] consume the serialized nodes into a mutable efficient format 33 101 - [ ] maybe customize the deserialize impl to do that directly?
-179
src/drive.rs
··· 1 - use futures::{Stream, TryStreamExt}; 2 - use ipld_core::cid::Cid; 3 - use std::collections::HashMap; 4 - use std::error::Error; 5 - 6 - use crate::mst::{Commit, Node}; 7 - use crate::walk::{Step, Trip, Walker}; 8 - 9 - #[derive(Debug, thiserror::Error)] 10 - pub enum DriveError<E: Error> { 11 - #[error("Failed to initialize CarReader: {0}")] 12 - CarReader(#[from] iroh_car::Error), 13 - #[error("Car block stream error: {0}")] 14 - CarBlockError(Box<dyn Error>), 15 - #[error("Failed to decode commit block: {0}")] 16 - BadCommit(Box<dyn Error>), 17 - #[error("The Commit block reference by the root was not found")] 18 - MissingCommit, 19 - #[error("The MST block {0} could not be found")] 20 - MissingBlock(Cid), 21 - #[error("Failed to walk the mst tree: {0}")] 22 - Tripped(#[from] Trip<E>), 23 - #[error("Encountered an rkey out of order while walking the MST")] 24 - RkeyOutOfOrder, 25 - } 26 - 27 - type CarBlock<E> = Result<(Cid, Vec<u8>), E>; 28 - 29 - #[derive(Debug)] 30 - pub struct Rkey(pub String); 31 - 32 - #[derive(Debug)] 33 - pub enum MaybeProcessedBlock<T, E> { 34 - /// A block that's *probably* a Node (but we can't know yet) 35 - /// 36 - /// It *can be* a record that suspiciously looks a lot like a node, so we 37 - /// cannot eagerly turn it into a Node. We only know for sure what it is 38 - /// when we actually walk down the MST 39 - Raw(Vec<u8>), 40 - /// A processed record from a block that was definitely not a Node 41 - /// 42 - /// Processing has to be fallible because the CAR can have totally-unused 43 - /// blocks, which can just be garbage. since we're eagerly trying to process 44 - /// record blocks without knowing for sure that they *are* records, we 45 - /// discard any definitely-not-nodes that fail processing and keep their 46 - /// error in the buffer for them. if we later try to retreive them as a 47 - /// record, then we can surface the error. 48 - /// 49 - /// If we _never_ needed this block, then we may have wasted a bit of effort 50 - /// trying to process it. Oh well. 51 - /// 52 - /// It would be nice to store the real error type from the processing 53 - /// function, but I'm leaving that generics puzzle for later. 54 - /// 55 - /// There's an alternative here, which would be to kick unprocessable blocks 56 - /// back to Raw, or maybe even a new RawUnprocessable variant. Then we could 57 - /// surface the typed error later if needed by trying to reprocess. 58 - Processed(Result<T, E>), 59 - } 60 - 61 - // TODO: generic error not box dyn nonsense. 62 - pub type ProcRes<T, E> = Result<T, E>; 63 - 64 - pub struct Vehicle<SE, S, T, P, PE> 65 - where 66 - S: Stream<Item = CarBlock<SE>>, 67 - P: Fn(&[u8]) -> ProcRes<T, PE>, 68 - PE: Error, 69 - { 70 - block_stream: S, 71 - blocks: HashMap<Cid, MaybeProcessedBlock<T, PE>>, 72 - walker: Walker, 73 - process: P, 74 - prev_rkey: String, 75 - } 76 - 77 - impl<SE, S, T: Clone, P, PE> Vehicle<SE, S, T, P, PE> 78 - where 79 - SE: Error + 'static, 80 - S: Stream<Item = CarBlock<SE>> + Unpin, 81 - P: Fn(&[u8]) -> ProcRes<T, PE>, 82 - PE: Error, 83 - { 84 - pub async fn init( 85 - root: Cid, 86 - mut block_stream: S, 87 - process: P, 88 - ) -> Result<(Commit, Self), DriveError<PE>> { 89 - let mut blocks = HashMap::new(); 90 - 91 - let mut commit = None; 92 - 93 - while let Some((cid, data)) = block_stream 94 - .try_next() 95 - .await 96 - .map_err(|e| DriveError::CarBlockError(e.into()))? 97 - { 98 - if cid == root { 99 - let c: Commit = serde_ipld_dagcbor::from_slice(&data) 100 - .map_err(|e| DriveError::BadCommit(e.into()))?; 101 - commit = Some(c); 102 - break; 103 - } else { 104 - blocks.insert( 105 - cid, 106 - if Node::could_be(&data) { 107 - MaybeProcessedBlock::Raw(data) 108 - } else { 109 - MaybeProcessedBlock::Processed(process(&data)) 110 - }, 111 - ); 112 - } 113 - } 114 - 115 - // we either broke out or read all the blocks without finding the commit... 116 - let commit = commit.ok_or(DriveError::MissingCommit)?; 117 - 118 - let walker = Walker::new(commit.data); 119 - 120 - let me = Self { 121 - block_stream, 122 - blocks, 123 - walker, 124 - process, 125 - prev_rkey: "".to_string(), 126 - }; 127 - Ok((commit, me)) 128 - } 129 - 130 - async fn drive_until(&mut self, cid_needed: Cid) -> Result<(), DriveError<PE>> { 131 - while let Some((cid, data)) = self 132 - .block_stream 133 - .try_next() 134 - .await 135 - .map_err(|e| DriveError::CarBlockError(e.into()))? 136 - { 137 - self.blocks.insert( 138 - cid, 139 - if Node::could_be(&data) { 140 - MaybeProcessedBlock::Raw(data) 141 - } else { 142 - MaybeProcessedBlock::Processed((self.process)(&data)) 143 - }, 144 - ); 145 - if cid == cid_needed { 146 - return Ok(()); 147 - } 148 - } 149 - 150 - // if we never found the block 151 - Err(DriveError::MissingBlock(cid_needed)) 152 - } 153 - 154 - pub async fn next_record(&mut self) -> Result<Option<(Rkey, T)>, DriveError<PE>> { 155 - loop { 156 - // walk as far as we can until we run out of blocks or find a record 157 - let cid_needed = match self.walker.walk(&mut self.blocks, &self.process)? { 158 - Step::Rest(cid) => cid, 159 - Step::Finish => return Ok(None), 160 - Step::Step { rkey, data } => { 161 - if rkey <= self.prev_rkey { 162 - return Err(DriveError::RkeyOutOfOrder); 163 - } 164 - return Ok(Some((Rkey(rkey), data))); 165 - } 166 - }; 167 - 168 - // load blocks until we reach that cid 169 - self.drive_until(cid_needed).await?; 170 - } 171 - } 172 - 173 - pub fn stream(self) -> impl Stream<Item = Result<(Rkey, T), DriveError<PE>>> { 174 - futures::stream::try_unfold(self, |mut this| async move { 175 - let maybe_record = this.next_record().await?; 176 - Ok(maybe_record.map(|b| (b, this))) 177 - }) 178 - } 179 - }
+75 -3
src/lib.rs
··· 1 - pub mod drive; 2 - pub mod mst; 3 - pub mod walk; 1 + /*! 2 + A robust CAR file -> MST walker for atproto 3 + 4 + Small CARs have their blocks buffered in memory. If a configurable memory limit 5 + is reached while reading blocks, CAR reading is suspended, and can be continued 6 + by providing disk storage to buffer the CAR blocks instead. 7 + 8 + A `process` function can be provided for tasks where records are transformed 9 + into a smaller representation, to save memory (and disk) during block reading. 10 + 11 + Once blocks are loaded, the MST is walked and emitted as chunks of pairs of 12 + `(rkey, processed_block)` pairs, in order (depth first, left-to-right). 13 + 14 + Some MST validations are applied 15 + - Keys must appear in order 16 + - Keys must be at the correct MST tree depth 17 + 18 + `iroh_car` additionally applies a block size limit of `2MiB`. 19 + 20 + ``` 21 + use repo_stream::{Driver, DriverBuilder, DiskBuilder}; 22 + 23 + # #[tokio::main] 24 + # async fn main() -> Result<(), Box<dyn std::error::Error>> { 25 + # let reader = include_bytes!("../car-samples/tiny.car").as_slice(); 26 + let mut total_size = 0; 27 + 28 + match DriverBuilder::new() 29 + .with_mem_limit_mb(10) 30 + .with_block_processor(|rec| rec.len()) // block processing: just extract the raw record size 31 + .load_car(reader) 32 + .await? 33 + { 34 + 35 + // if all blocks fit within memory 36 + Driver::Memory(_commit, mut driver) => { 37 + while let Some(chunk) = driver.next_chunk(256).await? { 38 + for (_rkey, size) in chunk { 39 + total_size += size; 40 + } 41 + } 42 + }, 43 + 44 + // if the CAR was too big for in-memory processing 45 + Driver::Disk(paused) => { 46 + // set up a disk store we can spill to 47 + let store = DiskBuilder::new().open("some/path.db".into()).await?; 48 + // do the spilling, get back a (similar) driver 49 + let (_commit, mut driver) = paused.finish_loading(store).await?; 50 + 51 + while let Some(chunk) = driver.next_chunk(256).await? { 52 + for (_rkey, size) in chunk { 53 + total_size += size; 54 + } 55 + } 56 + 57 + // clean up the disk store (drop tables etc) 58 + driver.reset_store().await?; 59 + } 60 + }; 61 + println!("sum of size of all records: {total_size}"); 62 + # Ok(()) 63 + # } 64 + ``` 65 + 66 + Disk spilling suspends and returns a `Driver::Disk(paused)` instead of going 67 + ahead and eagerly using disk I/O. This means you have to write a bit more code 68 + to handle both cases, but it allows you to have finer control over resource 69 + usage. For example, you can drive a number of parallel memory CAR workers, and 70 + separately have a different number of disk workers picking up suspended disk 71 + tasks from a queue. 72 + 73 + Find more [examples in the repo](https://tangled.org/@microcosm.blue/repo-stream/tree/main/examples). 74 + 75 + */
-114
src/mst.rs
··· 1 - //! Low-level types for parsing raw atproto MST CARs 2 - //! 3 - //! The primary aim is to work through the **tree** structure. Non-node blocks 4 - //! are left as raw bytes, for upper levels to parse into DAG-CBOR or whatever. 5 - 6 - use ipld_core::cid::Cid; 7 - use serde::Deserialize; 8 - 9 - /// The top-level data object in a repository's tree is a signed commit. 10 - #[derive(Debug, Deserialize)] 11 - // #[serde(deny_unknown_fields)] 12 - pub struct Commit { 13 - /// the account DID associated with the repo, in strictly normalized form 14 - /// (eg, lowercase as appropriate) 15 - pub did: String, 16 - /// fixed value of 3 for this repo format version 17 - pub version: u64, 18 - /// pointer to the top of the repo contents tree structure (MST) 19 - pub data: Cid, 20 - /// revision of the repo, used as a logical clock. 21 - /// 22 - /// TID format. Must increase monotonically. Recommend using current 23 - /// timestamp as TID; rev values in the "future" (beyond a fudge factor) 24 - /// should be ignored and not processed 25 - pub rev: String, 26 - /// pointer (by hash) to a previous commit object for this repository. 27 - /// 28 - /// Could be used to create a chain of history, but largely unused (included 29 - /// for v2 backwards compatibility). In version 3 repos, this field must 30 - /// exist in the CBOR object, but is virtually always null. NOTE: previously 31 - /// specified as nullable and optional, but this caused interoperability 32 - /// issues. 33 - pub prev: Option<Cid>, 34 - /// cryptographic signature of this commit, as raw bytes 35 - #[serde(with = "serde_bytes")] 36 - pub sig: Vec<u8>, 37 - } 38 - 39 - /// MST node data schema 40 - #[derive(Debug, Deserialize, PartialEq)] 41 - #[serde(deny_unknown_fields)] 42 - pub struct Node { 43 - /// link to sub-tree Node on a lower level and with all keys sorting before 44 - /// keys at this node 45 - #[serde(rename = "l")] 46 - pub left: Option<Cid>, 47 - /// ordered list of TreeEntry objects 48 - /// 49 - /// atproto MSTs have a fanout of 4, so there can be max 4 entries. 50 - #[serde(rename = "e")] 51 - pub entries: Vec<Entry>, // maybe we can do [Option<Entry>; 4]? 52 - } 53 - 54 - impl Node { 55 - /// test if a block could possibly be a node 56 - /// 57 - /// we can't eagerly decode records except where we're *sure* they cannot be 58 - /// an mst node (and even then we can only attempt) because you can't know 59 - /// with certainty what a block is supposed to be without actually walking 60 - /// the tree. 61 - /// 62 - /// so if a block *could be* a node, any record converter must postpone 63 - /// processing. if it turns out it happens to be a very node-looking record, 64 - /// well, sorry, it just has to only be processed later when that's known. 65 - pub fn could_be(bytes: impl AsRef<[u8]>) -> bool { 66 - const NODE_FINGERPRINT: [u8; 3] = [ 67 - 0xA2, // map length 2 (for "l" and "e" keys) 68 - 0x61, // text length 1 69 - b'e', // "e" before "l" because map keys have to be lex-sorted 70 - // 0x8?: "e" has array (0x100 upper 3 bits) of some length 71 - ]; 72 - let bytes = bytes.as_ref(); 73 - bytes.starts_with(&NODE_FINGERPRINT) 74 - && bytes 75 - .get(3) 76 - .map(|b| b & 0b1110_0000 == 0x80) 77 - .unwrap_or(false) 78 - } 79 - 80 - /// Check if a node has any entries 81 - /// 82 - /// An empty repository with no records is represented as a single MST node 83 - /// with an empty array of entries. This is the only situation in which a 84 - /// tree may contain an empty leaf node which does not either contain keys 85 - /// ("entries") or point to a sub-tree containing entries. 86 - /// 87 - /// TODO: to me this is slightly unclear with respect to `l` (ask someone). 88 - /// ...is that what "The top of the tree must not be a an empty node which 89 - /// only points to a sub-tree." is referring to? 90 - pub fn is_empty(&self) -> bool { 91 - self.left.is_none() && self.entries.is_empty() 92 - } 93 - } 94 - 95 - /// TreeEntry object 96 - #[derive(Debug, Deserialize, PartialEq)] 97 - #[serde(deny_unknown_fields)] 98 - pub struct Entry { 99 - /// count of bytes shared with previous TreeEntry in this Node (if any) 100 - #[serde(rename = "p")] 101 - pub prefix_len: usize, 102 - /// remainder of key for this TreeEntry, after "prefixlen" have been removed 103 - #[serde(rename = "k", with = "serde_bytes")] 104 - pub keysuffix: Vec<u8>, // can we String this here? 105 - /// link to the record data (CBOR) for this entry 106 - #[serde(rename = "v")] 107 - pub value: Cid, 108 - /// link to a sub-tree Node at a lower level 109 - /// 110 - /// the lower level must have keys sorting after this TreeEntry's key (to 111 - /// the "right"), but before the next TreeEntry's key in this Node (if any) 112 - #[serde(rename = "t")] 113 - pub tree: Option<Cid>, 114 - }
-381
src/walk.rs
··· 1 - //! Depth-first MST traversal 2 - 3 - use crate::drive::{MaybeProcessedBlock, ProcRes}; 4 - use crate::mst::Node; 5 - use ipld_core::cid::Cid; 6 - use std::collections::HashMap; 7 - use std::error::Error; 8 - 9 - #[derive(Debug, thiserror::Error)] 10 - pub enum Trip<E: Error> { 11 - #[error("empty mst nodes are not allowed")] 12 - NodeEmpty, 13 - #[error("Failed to decode commit block: {0}")] 14 - BadCommit(Box<dyn std::error::Error>), 15 - #[error("Action node error: {0}")] 16 - ActionNode(#[from] ActionNodeError), 17 - #[error("Process failed: {0}")] 18 - ProcessFailed(E), 19 - } 20 - 21 - #[derive(Debug, thiserror::Error)] 22 - pub enum ActionNodeError { 23 - #[error("Failed to compute an rkey due to invalid prefix_len")] 24 - EntryPrefixOutOfbounds, 25 - #[error("RKey was not utf-8")] 26 - EntryRkeyNotUtf8(#[from] std::string::FromUtf8Error), 27 - } 28 - 29 - #[derive(Debug)] 30 - pub enum Step<T> { 31 - Rest(Cid), 32 - Finish, 33 - Step { rkey: String, data: T }, 34 - } 35 - 36 - #[derive(Debug, Clone, PartialEq)] 37 - enum Need { 38 - Node(Cid), 39 - Record { rkey: String, cid: Cid }, 40 - } 41 - 42 - fn push_from_node(stack: &mut Vec<Need>, node: &Node) -> Result<(), ActionNodeError> { 43 - let mut entries = Vec::with_capacity(node.entries.len()); 44 - 45 - let mut prefix = vec![]; 46 - for entry in &node.entries { 47 - let mut rkey = vec![]; 48 - let pre_checked = prefix 49 - .get(..entry.prefix_len) 50 - .ok_or(ActionNodeError::EntryPrefixOutOfbounds)?; 51 - rkey.extend_from_slice(pre_checked); 52 - rkey.extend_from_slice(&entry.keysuffix); 53 - prefix = rkey.clone(); 54 - 55 - entries.push(Need::Record { 56 - rkey: String::from_utf8(rkey)?, 57 - cid: entry.value, 58 - }); 59 - if let Some(ref tree) = entry.tree { 60 - entries.push(Need::Node(*tree)); 61 - } 62 - } 63 - 64 - entries.reverse(); 65 - stack.append(&mut entries); 66 - 67 - if let Some(tree) = node.left { 68 - stack.push(Need::Node(tree)); 69 - } 70 - Ok(()) 71 - } 72 - 73 - #[derive(Debug)] 74 - pub struct Walker { 75 - stack: Vec<Need>, 76 - } 77 - 78 - impl Walker { 79 - pub fn new(tree_root_cid: Cid) -> Self { 80 - Self { 81 - stack: vec![Need::Node(tree_root_cid)], 82 - } 83 - } 84 - 85 - pub fn walk<T: Clone, E: Error>( 86 - &mut self, 87 - blocks: &mut HashMap<Cid, MaybeProcessedBlock<T, E>>, 88 - process: impl Fn(&[u8]) -> ProcRes<T, E>, 89 - ) -> Result<Step<T>, Trip<E>> { 90 - loop { 91 - let Some(mut need) = self.stack.last() else { 92 - log::trace!("tried to walk but we're actually done."); 93 - return Ok(Step::Finish); 94 - }; 95 - 96 - match &mut need { 97 - Need::Node(cid) => { 98 - log::trace!("need node {cid:?}"); 99 - let Some(block) = blocks.remove(cid) else { 100 - log::trace!("node not found, resting"); 101 - return Ok(Step::Rest(*cid)); 102 - }; 103 - 104 - let MaybeProcessedBlock::Raw(data) = block else { 105 - return Err(Trip::BadCommit("failed commit fingerprint".into())); 106 - }; 107 - let node = serde_ipld_dagcbor::from_slice::<Node>(&data) 108 - .map_err(|e| Trip::BadCommit(e.into()))?; 109 - 110 - // found node, make sure we remember 111 - self.stack.pop(); 112 - 113 - // queue up work on the found node next 114 - push_from_node(&mut self.stack, &node)?; 115 - } 116 - Need::Record { rkey, cid } => { 117 - log::trace!("need record {cid:?}"); 118 - let Some(data) = blocks.get_mut(cid) else { 119 - log::trace!("record block not found, resting"); 120 - return Ok(Step::Rest(*cid)); 121 - }; 122 - let rkey = rkey.clone(); 123 - let data = match data { 124 - MaybeProcessedBlock::Raw(data) => process(data), 125 - MaybeProcessedBlock::Processed(Ok(t)) => Ok(t.clone()), 126 - bad => { 127 - // big hack to pull the error out -- this corrupts 128 - // a block, so we should not continue trying to work 129 - let mut steal = MaybeProcessedBlock::Raw(vec![]); 130 - std::mem::swap(&mut steal, bad); 131 - let MaybeProcessedBlock::Processed(Err(e)) = steal else { 132 - unreachable!(); 133 - }; 134 - return Err(Trip::ProcessFailed(e)); 135 - } 136 - }; 137 - 138 - // found node, make sure we remember 139 - self.stack.pop(); 140 - 141 - log::trace!("emitting a block as a step. depth={}", self.stack.len()); 142 - let data = data.map_err(Trip::ProcessFailed)?; 143 - return Ok(Step::Step { rkey, data }); 144 - } 145 - } 146 - } 147 - } 148 - } 149 - 150 - #[cfg(test)] 151 - mod test { 152 - use super::*; 153 - // use crate::mst::Entry; 154 - 155 - fn cid1() -> Cid { 156 - "bafyreihixenvk3ahqbytas4hk4a26w43bh6eo3w6usjqtxkpzsvi655a3m" 157 - .parse() 158 - .unwrap() 159 - } 160 - // fn cid2() -> Cid { 161 - // "QmY7Yh4UquoXHLPFo2XbhXkhBvFoPwmQUSa92pxnxjQuPU" 162 - // .parse() 163 - // .unwrap() 164 - // } 165 - // fn cid3() -> Cid { 166 - // "bafybeigdyrzt5sfp7udm7hu76uh7y26nf3efuylqabf3oclgtqy55fbzdi" 167 - // .parse() 168 - // .unwrap() 169 - // } 170 - // fn cid4() -> Cid { 171 - // "QmbWqxBEKC3P8tqsKc98xmWNzrzDtRLMiMPL8wBuTGsMnR" 172 - // .parse() 173 - // .unwrap() 174 - // } 175 - // fn cid5() -> Cid { 176 - // "QmSnuWmxptJZdLJpKRarxBMS2Ju2oANVrgbr2xWbie9b2D" 177 - // .parse() 178 - // .unwrap() 179 - // } 180 - // fn cid6() -> Cid { 181 - // "QmdmQXB2mzChmMeKY47C43LxUdg1NDJ5MWcKMKxDu7RgQm" 182 - // .parse() 183 - // .unwrap() 184 - // } 185 - // fn cid7() -> Cid { 186 - // "bafybeiaysi4s6lnjev27ln5icwm6tueaw2vdykrtjkwiphwekaywqhcjze" 187 - // .parse() 188 - // .unwrap() 189 - // } 190 - // fn cid8() -> Cid { 191 - // "bafyreif3tfdpr5n4jdrbielmcapwvbpcthepfkwq2vwonmlhirbjmotedi" 192 - // .parse() 193 - // .unwrap() 194 - // } 195 - // fn cid9() -> Cid { 196 - // "bafyreicnokmhmrnlp2wjhyk2haep4tqxiptwfrp2rrs7rzq7uk766chqvq" 197 - // .parse() 198 - // .unwrap() 199 - // } 200 - 201 - #[test] 202 - fn test_next_from_node_empty() { 203 - let node = Node { 204 - left: None, 205 - entries: vec![], 206 - }; 207 - let mut stack = vec![]; 208 - push_from_node(&mut stack, &node).unwrap(); 209 - assert_eq!(stack.last(), None); 210 - } 211 - 212 - #[test] 213 - fn test_needs_from_node_just_left() { 214 - let node = Node { 215 - left: Some(cid1()), 216 - entries: vec![], 217 - }; 218 - let mut stack = vec![]; 219 - push_from_node(&mut stack, &node).unwrap(); 220 - assert_eq!(stack.last(), Some(Need::Node(cid1())).as_ref()); 221 - } 222 - 223 - // #[test] 224 - // fn test_needs_from_node_just_one_record() { 225 - // let node = Node { 226 - // left: None, 227 - // entries: vec![Entry { 228 - // keysuffix: "asdf".into(), 229 - // prefix_len: 0, 230 - // value: cid1(), 231 - // tree: None, 232 - // }], 233 - // }; 234 - // assert_eq!( 235 - // needs_from_node(node).unwrap(), 236 - // vec![Need::Record { 237 - // rkey: "asdf".into(), 238 - // cid: cid1(), 239 - // },] 240 - // ); 241 - // } 242 - 243 - // #[test] 244 - // fn test_needs_from_node_two_records() { 245 - // let node = Node { 246 - // left: None, 247 - // entries: vec![ 248 - // Entry { 249 - // keysuffix: "asdf".into(), 250 - // prefix_len: 0, 251 - // value: cid1(), 252 - // tree: None, 253 - // }, 254 - // Entry { 255 - // keysuffix: "gh".into(), 256 - // prefix_len: 2, 257 - // value: cid2(), 258 - // tree: None, 259 - // }, 260 - // ], 261 - // }; 262 - // assert_eq!( 263 - // needs_from_node(node).unwrap(), 264 - // vec![ 265 - // Need::Record { 266 - // rkey: "asdf".into(), 267 - // cid: cid1(), 268 - // }, 269 - // Need::Record { 270 - // rkey: "asgh".into(), 271 - // cid: cid2(), 272 - // }, 273 - // ] 274 - // ); 275 - // } 276 - 277 - // #[test] 278 - // fn test_needs_from_node_with_both() { 279 - // let node = Node { 280 - // left: None, 281 - // entries: vec![Entry { 282 - // keysuffix: "asdf".into(), 283 - // prefix_len: 0, 284 - // value: cid1(), 285 - // tree: Some(cid2()), 286 - // }], 287 - // }; 288 - // assert_eq!( 289 - // needs_from_node(node).unwrap(), 290 - // vec![ 291 - // Need::Record { 292 - // rkey: "asdf".into(), 293 - // cid: cid1(), 294 - // }, 295 - // Need::Node(cid2()), 296 - // ] 297 - // ); 298 - // } 299 - 300 - // #[test] 301 - // fn test_needs_from_node_left_and_record() { 302 - // let node = Node { 303 - // left: Some(cid1()), 304 - // entries: vec![Entry { 305 - // keysuffix: "asdf".into(), 306 - // prefix_len: 0, 307 - // value: cid2(), 308 - // tree: None, 309 - // }], 310 - // }; 311 - // assert_eq!( 312 - // needs_from_node(node).unwrap(), 313 - // vec![ 314 - // Need::Node(cid1()), 315 - // Need::Record { 316 - // rkey: "asdf".into(), 317 - // cid: cid2(), 318 - // }, 319 - // ] 320 - // ); 321 - // } 322 - 323 - // #[test] 324 - // fn test_needs_from_full_node() { 325 - // let node = Node { 326 - // left: Some(cid1()), 327 - // entries: vec![ 328 - // Entry { 329 - // keysuffix: "asdf".into(), 330 - // prefix_len: 0, 331 - // value: cid2(), 332 - // tree: Some(cid3()), 333 - // }, 334 - // Entry { 335 - // keysuffix: "ghi".into(), 336 - // prefix_len: 1, 337 - // value: cid4(), 338 - // tree: Some(cid5()), 339 - // }, 340 - // Entry { 341 - // keysuffix: "jkl".into(), 342 - // prefix_len: 2, 343 - // value: cid6(), 344 - // tree: Some(cid7()), 345 - // }, 346 - // Entry { 347 - // keysuffix: "mno".into(), 348 - // prefix_len: 4, 349 - // value: cid8(), 350 - // tree: Some(cid9()), 351 - // }, 352 - // ], 353 - // }; 354 - // assert_eq!( 355 - // needs_from_node(node).unwrap(), 356 - // vec![ 357 - // Need::Node(cid1()), 358 - // Need::Record { 359 - // rkey: "asdf".into(), 360 - // cid: cid2(), 361 - // }, 362 - // Need::Node(cid3()), 363 - // Need::Record { 364 - // rkey: "aghi".into(), 365 - // cid: cid4(), 366 - // }, 367 - // Need::Node(cid5()), 368 - // Need::Record { 369 - // rkey: "agjkl".into(), 370 - // cid: cid6(), 371 - // }, 372 - // Need::Node(cid7()), 373 - // Need::Record { 374 - // rkey: "agjkmno".into(), 375 - // cid: cid8(), 376 - // }, 377 - // Need::Node(cid9()), 378 - // ] 379 - // ); 380 - // } 381 - }
+34 -31
tests/non-huge-cars.rs
··· 1 1 extern crate repo_stream; 2 - use futures::TryStreamExt; 3 - use iroh_car::CarReader; 4 - use std::convert::Infallible; 2 + use repo_stream::Driver; 5 3 4 + const EMPTY_CAR: &'static [u8] = include_bytes!("../car-samples/empty.car"); 6 5 const TINY_CAR: &'static [u8] = include_bytes!("../car-samples/tiny.car"); 7 6 const LITTLE_CAR: &'static [u8] = include_bytes!("../car-samples/little.car"); 8 7 const MIDSIZE_CAR: &'static [u8] = include_bytes!("../car-samples/midsize.car"); 9 8 10 - async fn test_car(bytes: &[u8], expected_records: usize, expected_sum: usize) { 11 - let reader = CarReader::new(bytes).await.unwrap(); 12 - 13 - let root = reader 14 - .header() 15 - .roots() 16 - .first() 17 - .ok_or("missing root") 9 + async fn test_car( 10 + bytes: &[u8], 11 + expected_records: usize, 12 + expected_sum: usize, 13 + expect_profile: bool, 14 + ) { 15 + let mut driver = match Driver::load_car(bytes, |block| block.len(), 10 /* MiB */) 16 + .await 18 17 .unwrap() 19 - .clone(); 20 - 21 - let stream = std::pin::pin!(reader.stream()); 22 - 23 - let (_commit, v) = 24 - repo_stream::drive::Vehicle::init(root, stream, |block| Ok::<_, Infallible>(block.len())) 25 - .await 26 - .unwrap(); 27 - let mut record_stream = std::pin::pin!(v.stream()); 18 + { 19 + Driver::Memory(_commit, mem_driver) => mem_driver, 20 + Driver::Disk(_) => panic!("too big"), 21 + }; 28 22 29 23 let mut records = 0; 30 24 let mut sum = 0; 31 25 let mut found_bsky_profile = false; 32 26 let mut prev_rkey = "".to_string(); 33 - while let Some((rkey, size)) = record_stream.try_next().await.unwrap() { 34 - records += 1; 35 - sum += size; 36 - if rkey.0 == "app.bsky.actor.profile/self" { 37 - found_bsky_profile = true; 27 + 28 + while let Some(pairs) = driver.next_chunk(256).await.unwrap() { 29 + for (rkey, size) in pairs { 30 + records += 1; 31 + sum += size; 32 + if rkey == "app.bsky.actor.profile/self" { 33 + found_bsky_profile = true; 34 + } 35 + assert!(rkey > prev_rkey, "rkeys are streamed in order"); 36 + prev_rkey = rkey; 38 37 } 39 - assert!(rkey.0 > prev_rkey, "rkeys are streamed in order"); 40 - prev_rkey = rkey.0; 41 38 } 39 + 42 40 assert_eq!(records, expected_records); 43 41 assert_eq!(sum, expected_sum); 44 - assert!(found_bsky_profile); 42 + assert_eq!(found_bsky_profile, expect_profile); 43 + } 44 + 45 + #[tokio::test] 46 + async fn test_empty_car() { 47 + test_car(EMPTY_CAR, 0, 0, false).await 45 48 } 46 49 47 50 #[tokio::test] 48 51 async fn test_tiny_car() { 49 - test_car(TINY_CAR, 8, 2071).await 52 + test_car(TINY_CAR, 8, 2071, true).await 50 53 } 51 54 52 55 #[tokio::test] 53 56 async fn test_little_car() { 54 - test_car(LITTLE_CAR, 278, 246960).await 57 + test_car(LITTLE_CAR, 278, 246960, true).await 55 58 } 56 59 57 60 #[tokio::test] 58 61 async fn test_midsize_car() { 59 - test_car(MIDSIZE_CAR, 11585, 3741393).await 62 + test_car(MIDSIZE_CAR, 11585, 3741393, true).await 60 63 }