Fast and robust atproto CAR file processing in rust

Compare changes

Choose any two refs to compare.

+180 -964
Cargo.lock
··· 3 3 version = 4 4 4 5 5 [[package]] 6 - name = "addr2line" 7 - version = "0.25.1" 8 - source = "registry+https://github.com/rust-lang/crates.io-index" 9 - checksum = "1b5d307320b3181d6d7954e663bd7c774a838b8220fe0593c86d9fb09f498b4b" 10 - dependencies = [ 11 - "gimli", 12 - ] 13 - 14 - [[package]] 15 - name = "adler2" 16 - version = "2.0.1" 17 - source = "registry+https://github.com/rust-lang/crates.io-index" 18 - checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa" 19 - 20 - [[package]] 21 - name = "aho-corasick" 22 - version = "1.1.3" 23 - source = "registry+https://github.com/rust-lang/crates.io-index" 24 - checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" 25 - dependencies = [ 26 - "memchr", 27 - ] 28 - 29 - [[package]] 30 - name = "anes" 31 - version = "0.1.6" 32 - source = "registry+https://github.com/rust-lang/crates.io-index" 33 - checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" 34 - 35 - [[package]] 36 6 name = "anstream" 37 7 version = "0.6.21" 38 8 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 68 38 source = "registry+https://github.com/rust-lang/crates.io-index" 69 39 checksum = "9e231f6134f61b71076a3eab506c379d4f36122f2af15a9ff04415ea4c3339e2" 70 40 dependencies = [ 71 - "windows-sys 0.60.2", 41 + "windows-sys", 72 42 ] 73 43 74 44 [[package]] ··· 79 49 dependencies = [ 80 50 "anstyle", 81 51 "once_cell_polyfill", 82 - "windows-sys 0.60.2", 83 - ] 84 - 85 - [[package]] 86 - name = "anyhow" 87 - version = "1.0.100" 88 - source = "registry+https://github.com/rust-lang/crates.io-index" 89 - checksum = "a23eb6b1614318a8071c9b2521f36b424b2c83db5eb3a0fead4a6c0809af6e61" 90 - 91 - [[package]] 92 - name = "autocfg" 93 - version = "1.5.0" 94 - source = "registry+https://github.com/rust-lang/crates.io-index" 95 - checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" 96 - 97 - [[package]] 98 - name = "backtrace" 99 - version = "0.3.76" 100 - source = "registry+https://github.com/rust-lang/crates.io-index" 101 - checksum = "bb531853791a215d7c62a30daf0dde835f381ab5de4589cfe7c649d2cbe92bd6" 102 - dependencies = [ 103 - "addr2line", 104 - "cfg-if", 105 - "libc", 106 - "miniz_oxide", 107 - "object", 108 - "rustc-demangle", 109 - "windows-link", 110 - ] 111 - 112 - [[package]] 113 - name = "base-x" 114 - version = "0.2.11" 115 - source = "registry+https://github.com/rust-lang/crates.io-index" 116 - checksum = "4cbbc9d0964165b47557570cce6c952866c2678457aca742aafc9fb771d30270" 117 - 118 - [[package]] 119 - name = "base256emoji" 120 - version = "1.0.2" 121 - source = "registry+https://github.com/rust-lang/crates.io-index" 122 - checksum = "b5e9430d9a245a77c92176e649af6e275f20839a48389859d1661e9a128d077c" 123 - dependencies = [ 124 - "const-str", 125 - "match-lookup", 52 + "windows-sys", 126 53 ] 127 54 128 55 [[package]] ··· 132 59 checksum = "2261d10cca569e4643e526d8dc2e62e433cc8aba21ab764233731f8d369bf394" 133 60 134 61 [[package]] 135 - name = "bumpalo" 136 - version = "3.19.0" 137 - source = "registry+https://github.com/rust-lang/crates.io-index" 138 - checksum = "46c5e41b57b8bba42a04676d81cb89e9ee8e859a1a66f80a5a72e1cb76b34d43" 139 - 140 - [[package]] 141 - name = "bytes" 142 - version = "1.10.1" 143 - source = "registry+https://github.com/rust-lang/crates.io-index" 144 - checksum = "d71b6127be86fdcfddb610f7182ac57211d4b18a3e9c82eb2d17662f2227ad6a" 145 - 146 - [[package]] 147 - name = "cast" 148 - version = "0.3.0" 62 + name = "byteorder-lite" 63 + version = "0.1.0" 149 64 source = "registry+https://github.com/rust-lang/crates.io-index" 150 - checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" 65 + checksum = "8f1fe948ff07f4bd06c30984e69f5b4899c516a3ef74f34df92a2df2ab535495" 151 66 152 67 [[package]] 153 - name = "cbor4ii" 154 - version = "0.2.14" 68 + name = "byteview" 69 + version = "0.10.0" 155 70 source = "registry+https://github.com/rust-lang/crates.io-index" 156 - checksum = "b544cf8c89359205f4f990d0e6f3828db42df85b5dac95d09157a250eb0749c4" 157 - dependencies = [ 158 - "serde", 159 - ] 71 + checksum = "dda4398f387cc6395a3e93b3867cd9abda914c97a0b344d1eefb2e5c51785fca" 160 72 161 73 [[package]] 162 74 name = "cfg-if" ··· 165 77 checksum = "2fd1289c04a9ea8cb22300a459a72a385d7c73d3259e2ed7dcb2af674838cfa9" 166 78 167 79 [[package]] 168 - name = "ciborium" 169 - version = "0.2.2" 170 - source = "registry+https://github.com/rust-lang/crates.io-index" 171 - checksum = "42e69ffd6f0917f5c029256a24d0161db17cea3997d185db0d35926308770f0e" 172 - dependencies = [ 173 - "ciborium-io", 174 - "ciborium-ll", 175 - "serde", 176 - ] 177 - 178 - [[package]] 179 - name = "ciborium-io" 180 - version = "0.2.2" 181 - source = "registry+https://github.com/rust-lang/crates.io-index" 182 - checksum = "05afea1e0a06c9be33d539b876f1ce3692f4afea2cb41f740e7743225ed1c757" 183 - 184 - [[package]] 185 - name = "ciborium-ll" 186 - version = "0.2.2" 187 - source = "registry+https://github.com/rust-lang/crates.io-index" 188 - checksum = "57663b653d948a338bfb3eeba9bb2fd5fcfaecb9e199e87e1eda4d9e8b240fd9" 189 - dependencies = [ 190 - "ciborium-io", 191 - "half", 192 - ] 193 - 194 - [[package]] 195 - name = "cid" 196 - version = "0.11.1" 197 - source = "registry+https://github.com/rust-lang/crates.io-index" 198 - checksum = "3147d8272e8fa0ccd29ce51194dd98f79ddfb8191ba9e3409884e751798acf3a" 199 - dependencies = [ 200 - "core2", 201 - "multibase", 202 - "multihash", 203 - "serde", 204 - "serde_bytes", 205 - "unsigned-varint 0.8.0", 206 - ] 207 - 208 - [[package]] 209 80 name = "clap" 210 81 version = "4.5.48" 211 82 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 236 107 "heck", 237 108 "proc-macro2", 238 109 "quote", 239 - "syn 2.0.106", 110 + "syn", 240 111 ] 241 112 242 113 [[package]] ··· 252 123 checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75" 253 124 254 125 [[package]] 255 - name = "const-str" 256 - version = "0.4.3" 257 - source = "registry+https://github.com/rust-lang/crates.io-index" 258 - checksum = "2f421161cb492475f1661ddc9815a745a1c894592070661180fdec3d4872e9c3" 259 - 260 - [[package]] 261 - name = "core2" 262 - version = "0.4.0" 263 - source = "registry+https://github.com/rust-lang/crates.io-index" 264 - checksum = "b49ba7ef1ad6107f8824dbe97de947cbaac53c44e7f9756a1fba0d37c1eec505" 265 - dependencies = [ 266 - "memchr", 267 - ] 268 - 269 - [[package]] 270 - name = "criterion" 271 - version = "0.7.0" 272 - source = "registry+https://github.com/rust-lang/crates.io-index" 273 - checksum = "e1c047a62b0cc3e145fa84415a3191f628e980b194c2755aa12300a4e6cbd928" 274 - dependencies = [ 275 - "anes", 276 - "cast", 277 - "ciborium", 278 - "clap", 279 - "criterion-plot", 280 - "itertools", 281 - "num-traits", 282 - "oorandom", 283 - "plotters", 284 - "rayon", 285 - "regex", 286 - "serde", 287 - "serde_json", 288 - "tinytemplate", 289 - "tokio", 290 - "walkdir", 291 - ] 292 - 293 - [[package]] 294 - name = "criterion-plot" 295 - version = "0.6.0" 126 + name = "compare" 127 + version = "0.0.6" 296 128 source = "registry+https://github.com/rust-lang/crates.io-index" 297 - checksum = "9b1bcc0dc7dfae599d84ad0b1a55f80cde8af3725da8313b528da95ef783e338" 298 - dependencies = [ 299 - "cast", 300 - "itertools", 301 - ] 129 + checksum = "ea0095f6103c2a8b44acd6fd15960c801dafebf02e21940360833e0673f48ba7" 302 130 303 131 [[package]] 304 - name = "crossbeam-deque" 305 - version = "0.8.6" 132 + name = "crossbeam-epoch" 133 + version = "0.9.18" 306 134 source = "registry+https://github.com/rust-lang/crates.io-index" 307 - checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51" 135 + checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" 308 136 dependencies = [ 309 - "crossbeam-epoch", 310 137 "crossbeam-utils", 311 138 ] 312 139 313 140 [[package]] 314 - name = "crossbeam-epoch" 315 - version = "0.9.18" 141 + name = "crossbeam-skiplist" 142 + version = "0.1.3" 316 143 source = "registry+https://github.com/rust-lang/crates.io-index" 317 - checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" 144 + checksum = "df29de440c58ca2cc6e587ec3d22347551a32435fbde9d2bff64e78a9ffa151b" 318 145 dependencies = [ 146 + "crossbeam-epoch", 319 147 "crossbeam-utils", 320 148 ] 321 149 ··· 326 154 checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" 327 155 328 156 [[package]] 329 - name = "crunchy" 330 - version = "0.2.4" 157 + name = "dashmap" 158 + version = "6.1.0" 331 159 source = "registry+https://github.com/rust-lang/crates.io-index" 332 - checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5" 333 - 334 - [[package]] 335 - name = "data-encoding" 336 - version = "2.9.0" 337 - source = "registry+https://github.com/rust-lang/crates.io-index" 338 - checksum = "2a2330da5de22e8a3cb63252ce2abb30116bf5265e89c0e01bc17015ce30a476" 339 - 340 - [[package]] 341 - name = "data-encoding-macro" 342 - version = "0.1.18" 343 - source = "registry+https://github.com/rust-lang/crates.io-index" 344 - checksum = "47ce6c96ea0102f01122a185683611bd5ac8d99e62bc59dd12e6bda344ee673d" 160 + checksum = "5041cc499144891f3790297212f32a74fb938e5136a14943f338ef9e0ae276cf" 345 161 dependencies = [ 346 - "data-encoding", 347 - "data-encoding-macro-internal", 162 + "cfg-if", 163 + "crossbeam-utils", 164 + "hashbrown 0.14.5", 165 + "lock_api", 166 + "once_cell", 167 + "parking_lot_core", 348 168 ] 349 169 350 170 [[package]] 351 - name = "data-encoding-macro-internal" 352 - version = "0.1.16" 171 + name = "enum_dispatch" 172 + version = "0.3.13" 353 173 source = "registry+https://github.com/rust-lang/crates.io-index" 354 - checksum = "8d162beedaa69905488a8da94f5ac3edb4dd4788b732fadb7bd120b2625c1976" 174 + checksum = "aa18ce2bc66555b3218614519ac839ddb759a7d6720732f979ef8d13be147ecd" 355 175 dependencies = [ 356 - "data-encoding", 357 - "syn 2.0.106", 176 + "once_cell", 177 + "proc-macro2", 178 + "quote", 179 + "syn", 358 180 ] 359 181 360 182 [[package]] 361 - name = "either" 362 - version = "1.15.0" 183 + name = "equivalent" 184 + version = "1.0.2" 363 185 source = "registry+https://github.com/rust-lang/crates.io-index" 364 - checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" 365 - 366 - [[package]] 367 - name = "env_filter" 368 - version = "0.1.3" 369 - source = "registry+https://github.com/rust-lang/crates.io-index" 370 - checksum = "186e05a59d4c50738528153b83b0b0194d3a29507dfec16eccd4b342903397d0" 371 - dependencies = [ 372 - "log", 373 - "regex", 374 - ] 375 - 376 - [[package]] 377 - name = "env_logger" 378 - version = "0.11.8" 379 - source = "registry+https://github.com/rust-lang/crates.io-index" 380 - checksum = "13c863f0904021b108aa8b2f55046443e6b1ebde8fd4a15c399893aae4fa069f" 381 - dependencies = [ 382 - "anstream", 383 - "anstyle", 384 - "env_filter", 385 - "jiff", 386 - "log", 387 - ] 388 - 389 - [[package]] 390 - name = "futures" 391 - version = "0.3.31" 392 - source = "registry+https://github.com/rust-lang/crates.io-index" 393 - checksum = "65bc07b1a8bc7c85c5f2e110c476c7389b4554ba72af57d8445ea63a576b0876" 394 - dependencies = [ 395 - "futures-channel", 396 - "futures-core", 397 - "futures-executor", 398 - "futures-io", 399 - "futures-sink", 400 - "futures-task", 401 - "futures-util", 402 - ] 186 + checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" 403 187 404 188 [[package]] 405 - name = "futures-channel" 406 - version = "0.3.31" 189 + name = "errno" 190 + version = "0.3.14" 407 191 source = "registry+https://github.com/rust-lang/crates.io-index" 408 - checksum = "2dff15bf788c671c1934e366d07e30c1814a8ef514e1af724a602e8a2fbe1b10" 192 + checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" 409 193 dependencies = [ 410 - "futures-core", 411 - "futures-sink", 194 + "libc", 195 + "windows-sys", 412 196 ] 413 197 414 198 [[package]] 415 - name = "futures-core" 416 - version = "0.3.31" 199 + name = "fastrand" 200 + version = "2.3.0" 417 201 source = "registry+https://github.com/rust-lang/crates.io-index" 418 - checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e" 202 + checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" 419 203 420 204 [[package]] 421 - name = "futures-executor" 422 - version = "0.3.31" 205 + name = "fjall" 206 + version = "3.0.1" 423 207 source = "registry+https://github.com/rust-lang/crates.io-index" 424 - checksum = "1e28d1d997f585e54aebc3f97d39e72338912123a67330d723fdbb564d646c9f" 208 + checksum = "4f69637c02d38ad1b0f003101d0195a60368130aa17d9ef78b1557d265a22093" 425 209 dependencies = [ 426 - "futures-core", 427 - "futures-task", 428 - "futures-util", 210 + "byteorder-lite", 211 + "byteview", 212 + "dashmap", 213 + "flume", 214 + "log", 215 + "lsm-tree", 216 + "lz4_flex", 217 + "tempfile", 218 + "xxhash-rust", 429 219 ] 430 220 431 221 [[package]] 432 - name = "futures-io" 433 - version = "0.3.31" 434 - source = "registry+https://github.com/rust-lang/crates.io-index" 435 - checksum = "9e5c1b78ca4aae1ac06c48a526a655760685149f0d465d21f37abfe57ce075c6" 436 - 437 - [[package]] 438 - name = "futures-macro" 439 - version = "0.3.31" 222 + name = "flume" 223 + version = "0.12.0" 440 224 source = "registry+https://github.com/rust-lang/crates.io-index" 441 - checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650" 225 + checksum = "5e139bc46ca777eb5efaf62df0ab8cc5fd400866427e56c68b22e414e53bd3be" 442 226 dependencies = [ 443 - "proc-macro2", 444 - "quote", 445 - "syn 2.0.106", 227 + "spin", 446 228 ] 447 229 448 230 [[package]] 449 - name = "futures-sink" 450 - version = "0.3.31" 451 - source = "registry+https://github.com/rust-lang/crates.io-index" 452 - checksum = "e575fab7d1e0dcb8d0c7bcf9a63ee213816ab51902e6d244a95819acacf1d4f7" 453 - 454 - [[package]] 455 - name = "futures-task" 456 - version = "0.3.31" 231 + name = "getrandom" 232 + version = "0.3.3" 457 233 source = "registry+https://github.com/rust-lang/crates.io-index" 458 - checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988" 459 - 460 - [[package]] 461 - name = "futures-util" 462 - version = "0.3.31" 463 - source = "registry+https://github.com/rust-lang/crates.io-index" 464 - checksum = "9fa08315bb612088cc391249efdc3bc77536f16c91f6cf495e6fbe85b20a4a81" 234 + checksum = "26145e563e54f2cadc477553f1ec5ee650b00862f0a58bcd12cbdc5f0ea2d2f4" 465 235 dependencies = [ 466 - "futures-channel", 467 - "futures-core", 468 - "futures-io", 469 - "futures-macro", 470 - "futures-sink", 471 - "futures-task", 472 - "memchr", 473 - "pin-project-lite", 474 - "pin-utils", 475 - "slab", 236 + "cfg-if", 237 + "libc", 238 + "r-efi", 239 + "wasi", 476 240 ] 477 241 478 242 [[package]] 479 - name = "gimli" 480 - version = "0.32.3" 243 + name = "hashbrown" 244 + version = "0.14.5" 481 245 source = "registry+https://github.com/rust-lang/crates.io-index" 482 - checksum = "e629b9b98ef3dd8afe6ca2bd0f89306cec16d43d907889945bc5d6687f2f13c7" 246 + checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" 483 247 484 248 [[package]] 485 - name = "half" 486 - version = "2.7.0" 249 + name = "hashbrown" 250 + version = "0.16.1" 487 251 source = "registry+https://github.com/rust-lang/crates.io-index" 488 - checksum = "e54c115d4f30f52c67202f079c5f9d8b49db4691f460fdb0b4c2e838261b2ba5" 489 - dependencies = [ 490 - "cfg-if", 491 - "crunchy", 492 - "zerocopy", 493 - ] 252 + checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100" 494 253 495 254 [[package]] 496 255 name = "heck" ··· 499 258 checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" 500 259 501 260 [[package]] 502 - name = "io-uring" 503 - version = "0.7.10" 261 + name = "interval-heap" 262 + version = "0.0.5" 504 263 source = "registry+https://github.com/rust-lang/crates.io-index" 505 - checksum = "046fa2d4d00aea763528b4950358d0ead425372445dc8ff86312b3c69ff7727b" 506 - dependencies = [ 507 - "bitflags", 508 - "cfg-if", 509 - "libc", 510 - ] 511 - 512 - [[package]] 513 - name = "ipld-core" 514 - version = "0.4.2" 515 - source = "registry+https://github.com/rust-lang/crates.io-index" 516 - checksum = "104718b1cc124d92a6d01ca9c9258a7df311405debb3408c445a36452f9bf8db" 264 + checksum = "11274e5e8e89b8607cfedc2910b6626e998779b48a019151c7604d0adcb86ac6" 517 265 dependencies = [ 518 - "cid", 519 - "serde", 520 - "serde_bytes", 521 - ] 522 - 523 - [[package]] 524 - name = "iroh-car" 525 - version = "0.5.1" 526 - source = "registry+https://github.com/rust-lang/crates.io-index" 527 - checksum = "cb7f8cd4cb9aa083fba8b52e921764252d0b4dcb1cd6d120b809dbfe1106e81a" 528 - dependencies = [ 529 - "anyhow", 530 - "cid", 531 - "futures", 532 - "serde", 533 - "serde_ipld_dagcbor", 534 - "thiserror 1.0.69", 535 - "tokio", 536 - "unsigned-varint 0.7.2", 266 + "compare", 537 267 ] 538 268 539 269 [[package]] ··· 543 273 checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" 544 274 545 275 [[package]] 546 - name = "itertools" 547 - version = "0.13.0" 548 - source = "registry+https://github.com/rust-lang/crates.io-index" 549 - checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186" 550 - dependencies = [ 551 - "either", 552 - ] 553 - 554 - [[package]] 555 - name = "itoa" 556 - version = "1.0.15" 557 - source = "registry+https://github.com/rust-lang/crates.io-index" 558 - checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" 559 - 560 - [[package]] 561 - name = "jiff" 562 - version = "0.2.15" 563 - source = "registry+https://github.com/rust-lang/crates.io-index" 564 - checksum = "be1f93b8b1eb69c77f24bbb0afdf66f54b632ee39af40ca21c4365a1d7347e49" 565 - dependencies = [ 566 - "jiff-static", 567 - "log", 568 - "portable-atomic", 569 - "portable-atomic-util", 570 - "serde", 571 - ] 572 - 573 - [[package]] 574 - name = "jiff-static" 575 - version = "0.2.15" 576 - source = "registry+https://github.com/rust-lang/crates.io-index" 577 - checksum = "03343451ff899767262ec32146f6d559dd759fdadf42ff0e227c7c48f72594b4" 578 - dependencies = [ 579 - "proc-macro2", 580 - "quote", 581 - "syn 2.0.106", 582 - ] 583 - 584 - [[package]] 585 - name = "js-sys" 586 - version = "0.3.81" 587 - source = "registry+https://github.com/rust-lang/crates.io-index" 588 - checksum = "ec48937a97411dcb524a265206ccd4c90bb711fca92b2792c407f268825b9305" 589 - dependencies = [ 590 - "once_cell", 591 - "wasm-bindgen", 592 - ] 593 - 594 - [[package]] 595 276 name = "libc" 596 277 version = "0.2.176" 597 278 source = "registry+https://github.com/rust-lang/crates.io-index" 598 279 checksum = "58f929b4d672ea937a23a1ab494143d968337a5f47e56d0815df1e0890ddf174" 280 + 281 + [[package]] 282 + name = "linux-raw-sys" 283 + version = "0.11.0" 284 + source = "registry+https://github.com/rust-lang/crates.io-index" 285 + checksum = "df1d3c3b53da64cf5760482273a98e575c651a67eec7f77df96b5b642de8f039" 599 286 600 287 [[package]] 601 288 name = "lock_api" ··· 613 300 checksum = "34080505efa8e45a4b816c349525ebe327ceaa8559756f0356cba97ef3bf7432" 614 301 615 302 [[package]] 616 - name = "match-lookup" 617 - version = "0.1.1" 303 + name = "lsm-tree" 304 + version = "3.0.1" 618 305 source = "registry+https://github.com/rust-lang/crates.io-index" 619 - checksum = "1265724d8cb29dbbc2b0f06fffb8bf1a8c0cf73a78eede9ba73a4a66c52a981e" 306 + checksum = "b875f1dfe14f557f805b167fb9b0fc54c5560c7a4bd6ae02535b2846f276a8cb" 620 307 dependencies = [ 621 - "proc-macro2", 622 - "quote", 623 - "syn 1.0.109", 624 - ] 625 - 626 - [[package]] 627 - name = "memchr" 628 - version = "2.7.6" 629 - source = "registry+https://github.com/rust-lang/crates.io-index" 630 - checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273" 631 - 632 - [[package]] 633 - name = "miniz_oxide" 634 - version = "0.8.9" 635 - source = "registry+https://github.com/rust-lang/crates.io-index" 636 - checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316" 637 - dependencies = [ 638 - "adler2", 308 + "byteorder-lite", 309 + "byteview", 310 + "crossbeam-skiplist", 311 + "enum_dispatch", 312 + "interval-heap", 313 + "log", 314 + "lz4_flex", 315 + "quick_cache", 316 + "rustc-hash", 317 + "self_cell", 318 + "sfa", 319 + "tempfile", 320 + "varint-rs", 321 + "xxhash-rust", 639 322 ] 640 323 641 324 [[package]] 642 - name = "mio" 643 - version = "1.0.4" 325 + name = "lz4_flex" 326 + version = "0.11.5" 644 327 source = "registry+https://github.com/rust-lang/crates.io-index" 645 - checksum = "78bed444cc8a2160f01cbcf811ef18cac863ad68ae8ca62092e8db51d51c761c" 328 + checksum = "08ab2867e3eeeca90e844d1940eab391c9dc5228783db2ed999acbc0a9ed375a" 646 329 dependencies = [ 647 - "libc", 648 - "wasi", 649 - "windows-sys 0.59.0", 650 - ] 651 - 652 - [[package]] 653 - name = "multibase" 654 - version = "0.9.2" 655 - source = "registry+https://github.com/rust-lang/crates.io-index" 656 - checksum = "8694bb4835f452b0e3bb06dbebb1d6fc5385b6ca1caf2e55fd165c042390ec77" 657 - dependencies = [ 658 - "base-x", 659 - "base256emoji", 660 - "data-encoding", 661 - "data-encoding-macro", 662 - ] 663 - 664 - [[package]] 665 - name = "multihash" 666 - version = "0.19.3" 667 - source = "registry+https://github.com/rust-lang/crates.io-index" 668 - checksum = "6b430e7953c29dd6a09afc29ff0bb69c6e306329ee6794700aee27b76a1aea8d" 669 - dependencies = [ 670 - "core2", 671 - "serde", 672 - "unsigned-varint 0.8.0", 673 - ] 674 - 675 - [[package]] 676 - name = "num-traits" 677 - version = "0.2.19" 678 - source = "registry+https://github.com/rust-lang/crates.io-index" 679 - checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" 680 - dependencies = [ 681 - "autocfg", 682 - ] 683 - 684 - [[package]] 685 - name = "object" 686 - version = "0.37.3" 687 - source = "registry+https://github.com/rust-lang/crates.io-index" 688 - checksum = "ff76201f031d8863c38aa7f905eca4f53abbfa15f609db4277d44cd8938f33fe" 689 - dependencies = [ 690 - "memchr", 330 + "twox-hash", 691 331 ] 692 332 693 333 [[package]] ··· 703 343 checksum = "a4895175b425cb1f87721b59f0f286c2092bd4af812243672510e1ac53e2e0ad" 704 344 705 345 [[package]] 706 - name = "oorandom" 707 - version = "11.1.5" 708 - source = "registry+https://github.com/rust-lang/crates.io-index" 709 - checksum = "d6790f58c7ff633d8771f42965289203411a5e5c68388703c06e14f24770b41e" 710 - 711 - [[package]] 712 - name = "parking_lot" 713 - version = "0.12.5" 714 - source = "registry+https://github.com/rust-lang/crates.io-index" 715 - checksum = "93857453250e3077bd71ff98b6a65ea6621a19bb0f559a85248955ac12c45a1a" 716 - dependencies = [ 717 - "lock_api", 718 - "parking_lot_core", 719 - ] 720 - 721 - [[package]] 722 346 name = "parking_lot_core" 723 347 version = "0.9.12" 724 348 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 732 356 ] 733 357 734 358 [[package]] 735 - name = "pin-project-lite" 736 - version = "0.2.16" 737 - source = "registry+https://github.com/rust-lang/crates.io-index" 738 - checksum = "3b3cff922bd51709b605d9ead9aa71031d81447142d828eb4a6eba76fe619f9b" 739 - 740 - [[package]] 741 - name = "pin-utils" 742 - version = "0.1.0" 743 - source = "registry+https://github.com/rust-lang/crates.io-index" 744 - checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" 745 - 746 - [[package]] 747 - name = "plotters" 748 - version = "0.3.7" 749 - source = "registry+https://github.com/rust-lang/crates.io-index" 750 - checksum = "5aeb6f403d7a4911efb1e33402027fc44f29b5bf6def3effcc22d7bb75f2b747" 751 - dependencies = [ 752 - "num-traits", 753 - "plotters-backend", 754 - "plotters-svg", 755 - "wasm-bindgen", 756 - "web-sys", 757 - ] 758 - 759 - [[package]] 760 - name = "plotters-backend" 761 - version = "0.3.7" 762 - source = "registry+https://github.com/rust-lang/crates.io-index" 763 - checksum = "df42e13c12958a16b3f7f4386b9ab1f3e7933914ecea48da7139435263a4172a" 764 - 765 - [[package]] 766 - name = "plotters-svg" 767 - version = "0.3.7" 359 + name = "proc-macro2" 360 + version = "1.0.101" 768 361 source = "registry+https://github.com/rust-lang/crates.io-index" 769 - checksum = "51bae2ac328883f7acdfea3d66a7c35751187f870bc81f94563733a154d7a670" 362 + checksum = "89ae43fd86e4158d6db51ad8e2b80f313af9cc74f5c0e03ccb87de09998732de" 770 363 dependencies = [ 771 - "plotters-backend", 364 + "unicode-ident", 772 365 ] 773 366 774 367 [[package]] 775 - name = "portable-atomic" 776 - version = "1.11.1" 777 - source = "registry+https://github.com/rust-lang/crates.io-index" 778 - checksum = "f84267b20a16ea918e43c6a88433c2d54fa145c92a811b5b047ccbe153674483" 779 - 780 - [[package]] 781 - name = "portable-atomic-util" 782 - version = "0.2.4" 368 + name = "quick_cache" 369 + version = "0.6.18" 783 370 source = "registry+https://github.com/rust-lang/crates.io-index" 784 - checksum = "d8a2f0d8d040d7848a709caf78912debcc3f33ee4b3cac47d73d1e1069e83507" 371 + checksum = "7ada44a88ef953a3294f6eb55d2007ba44646015e18613d2f213016379203ef3" 785 372 dependencies = [ 786 - "portable-atomic", 787 - ] 788 - 789 - [[package]] 790 - name = "proc-macro2" 791 - version = "1.0.101" 792 - source = "registry+https://github.com/rust-lang/crates.io-index" 793 - checksum = "89ae43fd86e4158d6db51ad8e2b80f313af9cc74f5c0e03ccb87de09998732de" 794 - dependencies = [ 795 - "unicode-ident", 373 + "equivalent", 374 + "hashbrown 0.16.1", 796 375 ] 797 376 798 377 [[package]] ··· 805 384 ] 806 385 807 386 [[package]] 808 - name = "rayon" 809 - version = "1.11.0" 810 - source = "registry+https://github.com/rust-lang/crates.io-index" 811 - checksum = "368f01d005bf8fd9b1206fb6fa653e6c4a81ceb1466406b81792d87c5677a58f" 812 - dependencies = [ 813 - "either", 814 - "rayon-core", 815 - ] 816 - 817 - [[package]] 818 - name = "rayon-core" 819 - version = "1.13.0" 387 + name = "r-efi" 388 + version = "5.3.0" 820 389 source = "registry+https://github.com/rust-lang/crates.io-index" 821 - checksum = "22e18b0f0062d30d4230b2e85ff77fdfe4326feb054b9783a3460d8435c8ab91" 822 - dependencies = [ 823 - "crossbeam-deque", 824 - "crossbeam-utils", 825 - ] 390 + checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" 826 391 827 392 [[package]] 828 393 name = "redox_syscall" ··· 834 399 ] 835 400 836 401 [[package]] 837 - name = "regex" 838 - version = "1.11.3" 839 - source = "registry+https://github.com/rust-lang/crates.io-index" 840 - checksum = "8b5288124840bee7b386bc413c487869b360b2b4ec421ea56425128692f2a82c" 841 - dependencies = [ 842 - "aho-corasick", 843 - "memchr", 844 - "regex-automata", 845 - "regex-syntax", 846 - ] 847 - 848 - [[package]] 849 - name = "regex-automata" 850 - version = "0.4.11" 851 - source = "registry+https://github.com/rust-lang/crates.io-index" 852 - checksum = "833eb9ce86d40ef33cb1306d8accf7bc8ec2bfea4355cbdebb3df68b40925cad" 853 - dependencies = [ 854 - "aho-corasick", 855 - "memchr", 856 - "regex-syntax", 857 - ] 858 - 859 - [[package]] 860 - name = "regex-syntax" 861 - version = "0.8.6" 862 - source = "registry+https://github.com/rust-lang/crates.io-index" 863 - checksum = "caf4aa5b0f434c91fe5c7f1ecb6a5ece2130b02ad2a590589dda5146df959001" 864 - 865 - [[package]] 866 402 name = "repo-stream" 867 - version = "0.1.0" 403 + version = "0.2.2" 868 404 dependencies = [ 869 405 "clap", 870 - "criterion", 871 - "env_logger", 872 - "futures", 873 - "futures-core", 874 - "ipld-core", 875 - "iroh-car", 876 - "log", 877 - "multibase", 878 - "serde", 879 - "serde_bytes", 880 - "serde_ipld_dagcbor", 881 - "thiserror 2.0.17", 882 - "tokio", 406 + "fjall", 883 407 ] 884 408 885 409 [[package]] 886 - name = "rustc-demangle" 887 - version = "0.1.26" 888 - source = "registry+https://github.com/rust-lang/crates.io-index" 889 - checksum = "56f7d92ca342cea22a06f2121d944b4fd82af56988c270852495420f961d4ace" 890 - 891 - [[package]] 892 - name = "rustversion" 893 - version = "1.0.22" 410 + name = "rustc-hash" 411 + version = "2.1.1" 894 412 source = "registry+https://github.com/rust-lang/crates.io-index" 895 - checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" 896 - 897 - [[package]] 898 - name = "ryu" 899 - version = "1.0.20" 900 - source = "registry+https://github.com/rust-lang/crates.io-index" 901 - checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f" 413 + checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d" 902 414 903 415 [[package]] 904 - name = "same-file" 905 - version = "1.0.6" 416 + name = "rustix" 417 + version = "1.1.2" 906 418 source = "registry+https://github.com/rust-lang/crates.io-index" 907 - checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" 419 + checksum = "cd15f8a2c5551a84d56efdc1cd049089e409ac19a3072d5037a17fd70719ff3e" 908 420 dependencies = [ 909 - "winapi-util", 421 + "bitflags", 422 + "errno", 423 + "libc", 424 + "linux-raw-sys", 425 + "windows-sys", 910 426 ] 911 427 912 428 [[package]] ··· 916 432 checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" 917 433 918 434 [[package]] 919 - name = "serde" 920 - version = "1.0.228" 921 - source = "registry+https://github.com/rust-lang/crates.io-index" 922 - checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" 923 - dependencies = [ 924 - "serde_core", 925 - "serde_derive", 926 - ] 927 - 928 - [[package]] 929 - name = "serde_bytes" 930 - version = "0.11.19" 931 - source = "registry+https://github.com/rust-lang/crates.io-index" 932 - checksum = "a5d440709e79d88e51ac01c4b72fc6cb7314017bb7da9eeff678aa94c10e3ea8" 933 - dependencies = [ 934 - "serde", 935 - "serde_core", 936 - ] 937 - 938 - [[package]] 939 - name = "serde_core" 940 - version = "1.0.228" 435 + name = "self_cell" 436 + version = "1.2.2" 941 437 source = "registry+https://github.com/rust-lang/crates.io-index" 942 - checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad" 943 - dependencies = [ 944 - "serde_derive", 945 - ] 438 + checksum = "b12e76d157a900eb52e81bc6e9f3069344290341720e9178cde2407113ac8d89" 946 439 947 440 [[package]] 948 - name = "serde_derive" 949 - version = "1.0.228" 441 + name = "sfa" 442 + version = "1.0.0" 950 443 source = "registry+https://github.com/rust-lang/crates.io-index" 951 - checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" 444 + checksum = "a1296838937cab56cd6c4eeeb8718ec777383700c33f060e2869867bd01d1175" 952 445 dependencies = [ 953 - "proc-macro2", 954 - "quote", 955 - "syn 2.0.106", 446 + "byteorder-lite", 447 + "log", 448 + "xxhash-rust", 956 449 ] 957 450 958 451 [[package]] 959 - name = "serde_ipld_dagcbor" 960 - version = "0.6.4" 961 - source = "registry+https://github.com/rust-lang/crates.io-index" 962 - checksum = "46182f4f08349a02b45c998ba3215d3f9de826246ba02bb9dddfe9a2a2100778" 963 - dependencies = [ 964 - "cbor4ii", 965 - "ipld-core", 966 - "scopeguard", 967 - "serde", 968 - ] 969 - 970 - [[package]] 971 - name = "serde_json" 972 - version = "1.0.145" 973 - source = "registry+https://github.com/rust-lang/crates.io-index" 974 - checksum = "402a6f66d8c709116cf22f558eab210f5a50187f702eb4d7e5ef38d9a7f1c79c" 975 - dependencies = [ 976 - "itoa", 977 - "memchr", 978 - "ryu", 979 - "serde", 980 - "serde_core", 981 - ] 982 - 983 - [[package]] 984 - name = "signal-hook-registry" 985 - version = "1.4.6" 986 - source = "registry+https://github.com/rust-lang/crates.io-index" 987 - checksum = "b2a4719bff48cee6b39d12c020eeb490953ad2443b7055bd0b21fca26bd8c28b" 988 - dependencies = [ 989 - "libc", 990 - ] 991 - 992 - [[package]] 993 - name = "slab" 994 - version = "0.4.11" 995 - source = "registry+https://github.com/rust-lang/crates.io-index" 996 - checksum = "7a2ae44ef20feb57a68b23d846850f861394c2e02dc425a50098ae8c90267589" 997 - 998 - [[package]] 999 452 name = "smallvec" 1000 453 version = "1.15.1" 1001 454 source = "registry+https://github.com/rust-lang/crates.io-index" 1002 455 checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" 1003 456 1004 457 [[package]] 1005 - name = "socket2" 1006 - version = "0.6.0" 458 + name = "spin" 459 + version = "0.9.8" 1007 460 source = "registry+https://github.com/rust-lang/crates.io-index" 1008 - checksum = "233504af464074f9d066d7b5416c5f9b894a5862a6506e306f7b816cdd6f1807" 461 + checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" 1009 462 dependencies = [ 1010 - "libc", 1011 - "windows-sys 0.59.0", 463 + "lock_api", 1012 464 ] 1013 465 1014 466 [[package]] ··· 1019 471 1020 472 [[package]] 1021 473 name = "syn" 1022 - version = "1.0.109" 1023 - source = "registry+https://github.com/rust-lang/crates.io-index" 1024 - checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" 1025 - dependencies = [ 1026 - "proc-macro2", 1027 - "quote", 1028 - "unicode-ident", 1029 - ] 1030 - 1031 - [[package]] 1032 - name = "syn" 1033 474 version = "2.0.106" 1034 475 source = "registry+https://github.com/rust-lang/crates.io-index" 1035 476 checksum = "ede7c438028d4436d71104916910f5bb611972c5cfd7f89b8300a8186e6fada6" ··· 1040 481 ] 1041 482 1042 483 [[package]] 1043 - name = "thiserror" 1044 - version = "1.0.69" 1045 - source = "registry+https://github.com/rust-lang/crates.io-index" 1046 - checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52" 1047 - dependencies = [ 1048 - "thiserror-impl 1.0.69", 1049 - ] 1050 - 1051 - [[package]] 1052 - name = "thiserror" 1053 - version = "2.0.17" 1054 - source = "registry+https://github.com/rust-lang/crates.io-index" 1055 - checksum = "f63587ca0f12b72a0600bcba1d40081f830876000bb46dd2337a3051618f4fc8" 1056 - dependencies = [ 1057 - "thiserror-impl 2.0.17", 1058 - ] 1059 - 1060 - [[package]] 1061 - name = "thiserror-impl" 1062 - version = "1.0.69" 1063 - source = "registry+https://github.com/rust-lang/crates.io-index" 1064 - checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" 1065 - dependencies = [ 1066 - "proc-macro2", 1067 - "quote", 1068 - "syn 2.0.106", 1069 - ] 1070 - 1071 - [[package]] 1072 - name = "thiserror-impl" 1073 - version = "2.0.17" 1074 - source = "registry+https://github.com/rust-lang/crates.io-index" 1075 - checksum = "3ff15c8ecd7de3849db632e14d18d2571fa09dfc5ed93479bc4485c7a517c913" 1076 - dependencies = [ 1077 - "proc-macro2", 1078 - "quote", 1079 - "syn 2.0.106", 1080 - ] 1081 - 1082 - [[package]] 1083 - name = "tinytemplate" 1084 - version = "1.2.1" 484 + name = "tempfile" 485 + version = "3.23.0" 1085 486 source = "registry+https://github.com/rust-lang/crates.io-index" 1086 - checksum = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc" 487 + checksum = "2d31c77bdf42a745371d260a26ca7163f1e0924b64afa0b688e61b5a9fa02f16" 1087 488 dependencies = [ 1088 - "serde", 1089 - "serde_json", 489 + "fastrand", 490 + "getrandom", 491 + "once_cell", 492 + "rustix", 493 + "windows-sys", 1090 494 ] 1091 495 1092 496 [[package]] 1093 - name = "tokio" 1094 - version = "1.47.1" 497 + name = "twox-hash" 498 + version = "2.1.2" 1095 499 source = "registry+https://github.com/rust-lang/crates.io-index" 1096 - checksum = "89e49afdadebb872d3145a5638b59eb0691ea23e46ca484037cfab3b76b95038" 1097 - dependencies = [ 1098 - "backtrace", 1099 - "bytes", 1100 - "io-uring", 1101 - "libc", 1102 - "mio", 1103 - "parking_lot", 1104 - "pin-project-lite", 1105 - "signal-hook-registry", 1106 - "slab", 1107 - "socket2", 1108 - "tokio-macros", 1109 - "windows-sys 0.59.0", 1110 - ] 1111 - 1112 - [[package]] 1113 - name = "tokio-macros" 1114 - version = "2.5.0" 1115 - source = "registry+https://github.com/rust-lang/crates.io-index" 1116 - checksum = "6e06d43f1345a3bcd39f6a56dbb7dcab2ba47e68e8ac134855e7e2bdbaf8cab8" 1117 - dependencies = [ 1118 - "proc-macro2", 1119 - "quote", 1120 - "syn 2.0.106", 1121 - ] 500 + checksum = "9ea3136b675547379c4bd395ca6b938e5ad3c3d20fad76e7fe85f9e0d011419c" 1122 501 1123 502 [[package]] 1124 503 name = "unicode-ident" ··· 1127 506 checksum = "f63a545481291138910575129486daeaf8ac54aee4387fe7906919f7830c7d9d" 1128 507 1129 508 [[package]] 1130 - name = "unsigned-varint" 1131 - version = "0.7.2" 1132 - source = "registry+https://github.com/rust-lang/crates.io-index" 1133 - checksum = "6889a77d49f1f013504cec6bf97a2c730394adedaeb1deb5ea08949a50541105" 1134 - 1135 - [[package]] 1136 - name = "unsigned-varint" 1137 - version = "0.8.0" 1138 - source = "registry+https://github.com/rust-lang/crates.io-index" 1139 - checksum = "eb066959b24b5196ae73cb057f45598450d2c5f71460e98c49b738086eff9c06" 1140 - 1141 - [[package]] 1142 509 name = "utf8parse" 1143 510 version = "0.2.2" 1144 511 source = "registry+https://github.com/rust-lang/crates.io-index" 1145 512 checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" 1146 513 1147 514 [[package]] 1148 - name = "walkdir" 1149 - version = "2.5.0" 515 + name = "varint-rs" 516 + version = "2.2.0" 1150 517 source = "registry+https://github.com/rust-lang/crates.io-index" 1151 - checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" 1152 - dependencies = [ 1153 - "same-file", 1154 - "winapi-util", 1155 - ] 518 + checksum = "8f54a172d0620933a27a4360d3db3e2ae0dd6cceae9730751a036bbf182c4b23" 1156 519 1157 520 [[package]] 1158 521 name = "wasi" 1159 - version = "0.11.1+wasi-snapshot-preview1" 1160 - source = "registry+https://github.com/rust-lang/crates.io-index" 1161 - checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" 1162 - 1163 - [[package]] 1164 - name = "wasm-bindgen" 1165 - version = "0.2.104" 1166 - source = "registry+https://github.com/rust-lang/crates.io-index" 1167 - checksum = "c1da10c01ae9f1ae40cbfac0bac3b1e724b320abfcf52229f80b547c0d250e2d" 1168 - dependencies = [ 1169 - "cfg-if", 1170 - "once_cell", 1171 - "rustversion", 1172 - "wasm-bindgen-macro", 1173 - "wasm-bindgen-shared", 1174 - ] 1175 - 1176 - [[package]] 1177 - name = "wasm-bindgen-backend" 1178 - version = "0.2.104" 1179 - source = "registry+https://github.com/rust-lang/crates.io-index" 1180 - checksum = "671c9a5a66f49d8a47345ab942e2cb93c7d1d0339065d4f8139c486121b43b19" 1181 - dependencies = [ 1182 - "bumpalo", 1183 - "log", 1184 - "proc-macro2", 1185 - "quote", 1186 - "syn 2.0.106", 1187 - "wasm-bindgen-shared", 1188 - ] 1189 - 1190 - [[package]] 1191 - name = "wasm-bindgen-macro" 1192 - version = "0.2.104" 1193 - source = "registry+https://github.com/rust-lang/crates.io-index" 1194 - checksum = "7ca60477e4c59f5f2986c50191cd972e3a50d8a95603bc9434501cf156a9a119" 1195 - dependencies = [ 1196 - "quote", 1197 - "wasm-bindgen-macro-support", 1198 - ] 1199 - 1200 - [[package]] 1201 - name = "wasm-bindgen-macro-support" 1202 - version = "0.2.104" 1203 - source = "registry+https://github.com/rust-lang/crates.io-index" 1204 - checksum = "9f07d2f20d4da7b26400c9f4a0511e6e0345b040694e8a75bd41d578fa4421d7" 1205 - dependencies = [ 1206 - "proc-macro2", 1207 - "quote", 1208 - "syn 2.0.106", 1209 - "wasm-bindgen-backend", 1210 - "wasm-bindgen-shared", 1211 - ] 1212 - 1213 - [[package]] 1214 - name = "wasm-bindgen-shared" 1215 - version = "0.2.104" 1216 - source = "registry+https://github.com/rust-lang/crates.io-index" 1217 - checksum = "bad67dc8b2a1a6e5448428adec4c3e84c43e561d8c9ee8a9e5aabeb193ec41d1" 1218 - dependencies = [ 1219 - "unicode-ident", 1220 - ] 1221 - 1222 - [[package]] 1223 - name = "web-sys" 1224 - version = "0.3.81" 522 + version = "0.14.7+wasi-0.2.4" 1225 523 source = "registry+https://github.com/rust-lang/crates.io-index" 1226 - checksum = "9367c417a924a74cae129e6a2ae3b47fabb1f8995595ab474029da749a8be120" 524 + checksum = "883478de20367e224c0090af9cf5f9fa85bed63a95c1abf3afc5c083ebc06e8c" 1227 525 dependencies = [ 1228 - "js-sys", 1229 - "wasm-bindgen", 526 + "wasip2", 1230 527 ] 1231 528 1232 529 [[package]] 1233 - name = "winapi-util" 1234 - version = "0.1.11" 530 + name = "wasip2" 531 + version = "1.0.1+wasi-0.2.4" 1235 532 source = "registry+https://github.com/rust-lang/crates.io-index" 1236 - checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" 533 + checksum = "0562428422c63773dad2c345a1882263bbf4d65cf3f42e90921f787ef5ad58e7" 1237 534 dependencies = [ 1238 - "windows-sys 0.60.2", 535 + "wit-bindgen", 1239 536 ] 1240 537 1241 538 [[package]] ··· 1246 543 1247 544 [[package]] 1248 545 name = "windows-sys" 1249 - version = "0.59.0" 1250 - source = "registry+https://github.com/rust-lang/crates.io-index" 1251 - checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" 1252 - dependencies = [ 1253 - "windows-targets 0.52.6", 1254 - ] 1255 - 1256 - [[package]] 1257 - name = "windows-sys" 1258 546 version = "0.60.2" 1259 547 source = "registry+https://github.com/rust-lang/crates.io-index" 1260 548 checksum = "f2f500e4d28234f72040990ec9d39e3a6b950f9f22d3dba18416c35882612bcb" 1261 549 dependencies = [ 1262 - "windows-targets 0.53.5", 1263 - ] 1264 - 1265 - [[package]] 1266 - name = "windows-targets" 1267 - version = "0.52.6" 1268 - source = "registry+https://github.com/rust-lang/crates.io-index" 1269 - checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" 1270 - dependencies = [ 1271 - "windows_aarch64_gnullvm 0.52.6", 1272 - "windows_aarch64_msvc 0.52.6", 1273 - "windows_i686_gnu 0.52.6", 1274 - "windows_i686_gnullvm 0.52.6", 1275 - "windows_i686_msvc 0.52.6", 1276 - "windows_x86_64_gnu 0.52.6", 1277 - "windows_x86_64_gnullvm 0.52.6", 1278 - "windows_x86_64_msvc 0.52.6", 550 + "windows-targets", 1279 551 ] 1280 552 1281 553 [[package]] ··· 1285 557 checksum = "4945f9f551b88e0d65f3db0bc25c33b8acea4d9e41163edf90dcd0b19f9069f3" 1286 558 dependencies = [ 1287 559 "windows-link", 1288 - "windows_aarch64_gnullvm 0.53.1", 1289 - "windows_aarch64_msvc 0.53.1", 1290 - "windows_i686_gnu 0.53.1", 1291 - "windows_i686_gnullvm 0.53.1", 1292 - "windows_i686_msvc 0.53.1", 1293 - "windows_x86_64_gnu 0.53.1", 1294 - "windows_x86_64_gnullvm 0.53.1", 1295 - "windows_x86_64_msvc 0.53.1", 560 + "windows_aarch64_gnullvm", 561 + "windows_aarch64_msvc", 562 + "windows_i686_gnu", 563 + "windows_i686_gnullvm", 564 + "windows_i686_msvc", 565 + "windows_x86_64_gnu", 566 + "windows_x86_64_gnullvm", 567 + "windows_x86_64_msvc", 1296 568 ] 1297 - 1298 - [[package]] 1299 - name = "windows_aarch64_gnullvm" 1300 - version = "0.52.6" 1301 - source = "registry+https://github.com/rust-lang/crates.io-index" 1302 - checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" 1303 569 1304 570 [[package]] 1305 571 name = "windows_aarch64_gnullvm" ··· 1309 575 1310 576 [[package]] 1311 577 name = "windows_aarch64_msvc" 1312 - version = "0.52.6" 1313 - source = "registry+https://github.com/rust-lang/crates.io-index" 1314 - checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" 1315 - 1316 - [[package]] 1317 - name = "windows_aarch64_msvc" 1318 578 version = "0.53.1" 1319 579 source = "registry+https://github.com/rust-lang/crates.io-index" 1320 580 checksum = "b9d782e804c2f632e395708e99a94275910eb9100b2114651e04744e9b125006" 1321 581 1322 582 [[package]] 1323 583 name = "windows_i686_gnu" 1324 - version = "0.52.6" 1325 - source = "registry+https://github.com/rust-lang/crates.io-index" 1326 - checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" 1327 - 1328 - [[package]] 1329 - name = "windows_i686_gnu" 1330 584 version = "0.53.1" 1331 585 source = "registry+https://github.com/rust-lang/crates.io-index" 1332 586 checksum = "960e6da069d81e09becb0ca57a65220ddff016ff2d6af6a223cf372a506593a3" 1333 - 1334 - [[package]] 1335 - name = "windows_i686_gnullvm" 1336 - version = "0.52.6" 1337 - source = "registry+https://github.com/rust-lang/crates.io-index" 1338 - checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" 1339 587 1340 588 [[package]] 1341 589 name = "windows_i686_gnullvm" ··· 1345 593 1346 594 [[package]] 1347 595 name = "windows_i686_msvc" 1348 - version = "0.52.6" 1349 - source = "registry+https://github.com/rust-lang/crates.io-index" 1350 - checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" 1351 - 1352 - [[package]] 1353 - name = "windows_i686_msvc" 1354 596 version = "0.53.1" 1355 597 source = "registry+https://github.com/rust-lang/crates.io-index" 1356 598 checksum = "1e7ac75179f18232fe9c285163565a57ef8d3c89254a30685b57d83a38d326c2" 1357 599 1358 600 [[package]] 1359 601 name = "windows_x86_64_gnu" 1360 - version = "0.52.6" 1361 - source = "registry+https://github.com/rust-lang/crates.io-index" 1362 - checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" 1363 - 1364 - [[package]] 1365 - name = "windows_x86_64_gnu" 1366 602 version = "0.53.1" 1367 603 source = "registry+https://github.com/rust-lang/crates.io-index" 1368 604 checksum = "9c3842cdd74a865a8066ab39c8a7a473c0778a3f29370b5fd6b4b9aa7df4a499" 1369 - 1370 - [[package]] 1371 - name = "windows_x86_64_gnullvm" 1372 - version = "0.52.6" 1373 - source = "registry+https://github.com/rust-lang/crates.io-index" 1374 - checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" 1375 605 1376 606 [[package]] 1377 607 name = "windows_x86_64_gnullvm" ··· 1381 611 1382 612 [[package]] 1383 613 name = "windows_x86_64_msvc" 1384 - version = "0.52.6" 1385 - source = "registry+https://github.com/rust-lang/crates.io-index" 1386 - checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" 1387 - 1388 - [[package]] 1389 - name = "windows_x86_64_msvc" 1390 614 version = "0.53.1" 1391 615 source = "registry+https://github.com/rust-lang/crates.io-index" 1392 616 checksum = "d6bbff5f0aada427a1e5a6da5f1f98158182f26556f345ac9e04d36d0ebed650" 1393 617 1394 618 [[package]] 1395 - name = "zerocopy" 1396 - version = "0.8.27" 619 + name = "wit-bindgen" 620 + version = "0.46.0" 1397 621 source = "registry+https://github.com/rust-lang/crates.io-index" 1398 - checksum = "0894878a5fa3edfd6da3f88c4805f4c8558e2b996227a3d864f47fe11e38282c" 1399 - dependencies = [ 1400 - "zerocopy-derive", 1401 - ] 622 + checksum = "f17a85883d4e6d00e8a97c586de764dabcc06133f7f1d55dce5cdc070ad7fe59" 1402 623 1403 624 [[package]] 1404 - name = "zerocopy-derive" 1405 - version = "0.8.27" 625 + name = "xxhash-rust" 626 + version = "0.8.15" 1406 627 source = "registry+https://github.com/rust-lang/crates.io-index" 1407 - checksum = "88d2b8d9c68ad2b9e4340d7832716a4d21a22a1154777ad56ea55c51a9cf3831" 1408 - dependencies = [ 1409 - "proc-macro2", 1410 - "quote", 1411 - "syn 2.0.106", 1412 - ] 628 + checksum = "fdd20c5420375476fbd4394763288da7eb0cc0b8c11deed431a91562af7335d3"
+5 -25
Cargo.toml
··· 1 1 [package] 2 2 name = "repo-stream" 3 - version = "0.1.0" 3 + version = "0.2.2" 4 4 edition = "2024" 5 + license = "MIT OR Apache-2.0" 6 + description = "A robust CAR file -> MST walker for atproto" 7 + repository = "https://tangled.org/@microcosm.blue/repo-stream" 5 8 6 9 [dependencies] 7 - futures = "0.3.31" 8 - futures-core = "0.3.31" 9 - ipld-core = { version = "0.4.2", features = ["serde"] } 10 - iroh-car = "0.5.1" 11 - log = "0.4.28" 12 - multibase = "0.9.2" 13 - serde = { version = "1.0.228", features = ["derive"] } 14 - serde_bytes = "0.11.19" 15 - serde_ipld_dagcbor = "0.6.4" 16 - thiserror = "2.0.17" 17 - tokio = "1.47.1" 18 - 19 - [dev-dependencies] 10 + fjall = "3.0.1" 20 11 clap = { version = "4.5.48", features = ["derive"] } 21 - criterion = { version = "0.7.0", features = ["async_tokio"] } 22 - env_logger = "0.11.8" 23 - multibase = "0.9.2" 24 - tokio = { version = "1.47.1", features = ["full"] } 25 12 26 - [profile.profiling] 27 - inherits = "release" 28 - debug = true 29 - 30 - [[bench]] 31 - name = "non-huge-cars" 32 - harness = false
+190
LICENSE.Apache-2.0
··· 1 + Apache License 2 + Version 2.0, January 2004 3 + http://www.apache.org/licenses/ 4 + 5 + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 + 7 + 1. Definitions. 8 + 9 + "License" shall mean the terms and conditions for use, reproduction, 10 + and distribution as defined by Sections 1 through 9 of this document. 11 + 12 + "Licensor" shall mean the copyright owner or entity authorized by 13 + the copyright owner that is granting the License. 14 + 15 + "Legal Entity" shall mean the union of the acting entity and all 16 + other entities that control, are controlled by, or are under common 17 + control with that entity. For the purposes of this definition, 18 + "control" means (i) the power, direct or indirect, to cause the 19 + direction or management of such entity, whether by contract or 20 + otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 + outstanding shares, or (iii) beneficial ownership of such entity. 22 + 23 + "You" (or "Your") shall mean an individual or Legal Entity 24 + exercising permissions granted by this License. 25 + 26 + "Source" form shall mean the preferred form for making modifications, 27 + including but not limited to software source code, documentation 28 + source, and configuration files. 29 + 30 + "Object" form shall mean any form resulting from mechanical 31 + transformation or translation of a Source form, including but 32 + not limited to compiled object code, generated documentation, 33 + and conversions to other media types. 34 + 35 + "Work" shall mean the work of authorship, whether in Source or 36 + Object form, made available under the License, as indicated by a 37 + copyright notice that is included in or attached to the work 38 + (an example is provided in the Appendix below). 39 + 40 + "Derivative Works" shall mean any work, whether in Source or Object 41 + form, that is based on (or derived from) the Work and for which the 42 + editorial revisions, annotations, elaborations, or other modifications 43 + represent, as a whole, an original work of authorship. For the purposes 44 + of this License, Derivative Works shall not include works that remain 45 + separable from, or merely link (or bind by name) to the interfaces of, 46 + the Work and Derivative Works thereof. 47 + 48 + "Contribution" shall mean any work of authorship, including 49 + the original version of the Work and any modifications or additions 50 + to that Work or Derivative Works thereof, that is intentionally 51 + submitted to Licensor for inclusion in the Work by the copyright owner 52 + or by an individual or Legal Entity authorized to submit on behalf of 53 + the copyright owner. For the purposes of this definition, "submitted" 54 + means any form of electronic, verbal, or written communication sent 55 + to the Licensor or its representatives, including but not limited to 56 + communication on electronic mailing lists, source code control systems, 57 + and issue tracking systems that are managed by, or on behalf of, the 58 + Licensor for the purpose of discussing and improving the Work, but 59 + excluding communication that is conspicuously marked or otherwise 60 + designated in writing by the copyright owner as "Not a Contribution." 61 + 62 + "Contributor" shall mean Licensor and any individual or Legal Entity 63 + on behalf of whom a Contribution has been received by Licensor and 64 + subsequently incorporated within the Work. 65 + 66 + 2. Grant of Copyright License. Subject to the terms and conditions of 67 + this License, each Contributor hereby grants to You a perpetual, 68 + worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 + copyright license to reproduce, prepare Derivative Works of, 70 + publicly display, publicly perform, sublicense, and distribute the 71 + Work and such Derivative Works in Source or Object form. 72 + 73 + 3. Grant of Patent License. Subject to the terms and conditions of 74 + this License, each Contributor hereby grants to You a perpetual, 75 + worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 + (except as stated in this section) patent license to make, have made, 77 + use, offer to sell, sell, import, and otherwise transfer the Work, 78 + where such license applies only to those patent claims licensable 79 + by such Contributor that are necessarily infringed by their 80 + Contribution(s) alone or by combination of their Contribution(s) 81 + with the Work to which such Contribution(s) was submitted. If You 82 + institute patent litigation against any entity (including a 83 + cross-claim or counterclaim in a lawsuit) alleging that the Work 84 + or a Contribution incorporated within the Work constitutes direct 85 + or contributory patent infringement, then any patent licenses 86 + granted to You under this License for that Work shall terminate 87 + as of the date such litigation is filed. 88 + 89 + 4. Redistribution. You may reproduce and distribute copies of the 90 + Work or Derivative Works thereof in any medium, with or without 91 + modifications, and in Source or Object form, provided that You 92 + meet the following conditions: 93 + 94 + (a) You must give any other recipients of the Work or 95 + Derivative Works a copy of this License; and 96 + 97 + (b) You must cause any modified files to carry prominent notices 98 + stating that You changed the files; and 99 + 100 + (c) You must retain, in the Source form of any Derivative Works 101 + that You distribute, all copyright, patent, trademark, and 102 + attribution notices from the Source form of the Work, 103 + excluding those notices that do not pertain to any part of 104 + the Derivative Works; and 105 + 106 + (d) If the Work includes a "NOTICE" text file as part of its 107 + distribution, then any Derivative Works that You distribute must 108 + include a readable copy of the attribution notices contained 109 + within such NOTICE file, excluding those notices that do not 110 + pertain to any part of the Derivative Works, in at least one 111 + of the following places: within a NOTICE text file distributed 112 + as part of the Derivative Works; within the Source form or 113 + documentation, if provided along with the Derivative Works; or, 114 + within a display generated by the Derivative Works, if and 115 + wherever such third-party notices normally appear. The contents 116 + of the NOTICE file are for informational purposes only and 117 + do not modify the License. You may add Your own attribution 118 + notices within Derivative Works that You distribute, alongside 119 + or as an addendum to the NOTICE text from the Work, provided 120 + that such additional attribution notices cannot be construed 121 + as modifying the License. 122 + 123 + You may add Your own copyright statement to Your modifications and 124 + may provide additional or different license terms and conditions 125 + for use, reproduction, or distribution of Your modifications, or 126 + for any such Derivative Works as a whole, provided Your use, 127 + reproduction, and distribution of the Work otherwise complies with 128 + the conditions stated in this License. 129 + 130 + 5. Submission of Contributions. Unless You explicitly state otherwise, 131 + any Contribution intentionally submitted for inclusion in the Work 132 + by You to the Licensor shall be under the terms and conditions of 133 + this License, without any additional terms or conditions. 134 + Notwithstanding the above, nothing herein shall supersede or modify 135 + the terms of any separate license agreement you may have executed 136 + with Licensor regarding such Contributions. 137 + 138 + 6. Trademarks. This License does not grant permission to use the trade 139 + names, trademarks, service marks, or product names of the Licensor, 140 + except as required for reasonable and customary use in describing the 141 + origin of the Work and reproducing the content of the NOTICE file. 142 + 143 + 7. Disclaimer of Warranty. Unless required by applicable law or 144 + agreed to in writing, Licensor provides the Work (and each 145 + Contributor provides its Contributions) on an "AS IS" BASIS, 146 + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 + implied, including, without limitation, any warranties or conditions 148 + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 + PARTICULAR PURPOSE. You are solely responsible for determining the 150 + appropriateness of using or redistributing the Work and assume any 151 + risks associated with Your exercise of permissions under this License. 152 + 153 + 8. Limitation of Liability. In no event and under no legal theory, 154 + whether in tort (including negligence), contract, or otherwise, 155 + unless required by applicable law (such as deliberate and grossly 156 + negligent acts) or agreed to in writing, shall any Contributor be 157 + liable to You for damages, including any direct, indirect, special, 158 + incidental, or consequential damages of any character arising as a 159 + result of this License or out of the use or inability to use the 160 + Work (including but not limited to damages for loss of goodwill, 161 + work stoppage, computer failure or malfunction, or any and all 162 + other commercial damages or losses), even if such Contributor 163 + has been advised of the possibility of such damages. 164 + 165 + 9. Accepting Warranty or Additional Liability. While redistributing 166 + the Work or Derivative Works thereof, You may choose to offer, 167 + and charge a fee for, acceptance of support, warranty, indemnity, 168 + or other liability obligations and/or rights consistent with this 169 + License. However, in accepting such obligations, You may act only 170 + on Your own behalf and on Your sole responsibility, not on behalf 171 + of any other Contributor, and only if You agree to indemnify, 172 + defend, and hold each Contributor harmless for any liability 173 + incurred by, or claims asserted against, such Contributor by reason 174 + of your accepting any such warranty or additional liability. 175 + 176 + END OF TERMS AND CONDITIONS 177 + 178 + Copyright 2025 microcosm 179 + 180 + Licensed under the Apache License, Version 2.0 (the "License"); 181 + you may not use this file except in compliance with the License. 182 + You may obtain a copy of the License at 183 + 184 + http://www.apache.org/licenses/LICENSE-2.0 185 + 186 + Unless required by applicable law or agreed to in writing, software 187 + distributed under the License is distributed on an "AS IS" BASIS, 188 + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 189 + See the License for the specific language governing permissions and 190 + limitations under the License.
+21
LICENSE.MIT
··· 1 + MIT License 2 + 3 + Copyright (c) 2025 microcosm 4 + 5 + Permission is hereby granted, free of charge, to any person obtaining a copy 6 + of this software and associated documentation files (the "Software"), to deal 7 + in the Software without restriction, including without limitation the rights 8 + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 + copies of the Software, and to permit persons to whom the Software is 10 + furnished to do so, subject to the following conditions: 11 + 12 + The above copyright notice and this permission notice shall be included in all 13 + copies or substantial portions of the Software. 14 + 15 + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 + SOFTWARE.
+41
benches/huge-car.rs
··· 1 + extern crate repo_stream; 2 + use repo_stream::Driver; 3 + use std::path::{Path, PathBuf}; 4 + 5 + use criterion::{Criterion, criterion_group, criterion_main}; 6 + 7 + pub fn criterion_benchmark(c: &mut Criterion) { 8 + let rt = tokio::runtime::Builder::new_multi_thread() 9 + .enable_all() 10 + .build() 11 + .expect("Creating runtime failed"); 12 + 13 + let filename = std::env::var("HUGE_CAR").expect("HUGE_CAR env var"); 14 + let filename: PathBuf = filename.try_into().unwrap(); 15 + 16 + c.bench_function("huge-car", |b| { 17 + b.to_async(&rt).iter(async || drive_car(&filename).await) 18 + }); 19 + } 20 + 21 + async fn drive_car(filename: impl AsRef<Path>) -> usize { 22 + let reader = tokio::fs::File::open(filename).await.unwrap(); 23 + let reader = tokio::io::BufReader::new(reader); 24 + 25 + let mut driver = match Driver::load_car(reader, |block| block.len(), 1024) 26 + .await 27 + .unwrap() 28 + { 29 + Driver::Memory(_, mem_driver) => mem_driver, 30 + Driver::Disk(_) => panic!("not doing disk for benchmark"), 31 + }; 32 + 33 + let mut n = 0; 34 + while let Some(pairs) = driver.next_chunk(256).await.unwrap() { 35 + n += pairs.len(); 36 + } 37 + n 38 + } 39 + 40 + criterion_group!(benches, criterion_benchmark); 41 + criterion_main!(benches);
+16 -22
benches/non-huge-cars.rs
··· 1 1 extern crate repo_stream; 2 - use futures::TryStreamExt; 3 - use iroh_car::CarReader; 4 - use std::convert::Infallible; 2 + use repo_stream::Driver; 5 3 6 4 use criterion::{Criterion, criterion_group, criterion_main}; 7 5 6 + const EMPTY_CAR: &'static [u8] = include_bytes!("../car-samples/empty.car"); 8 7 const TINY_CAR: &'static [u8] = include_bytes!("../car-samples/tiny.car"); 9 8 const LITTLE_CAR: &'static [u8] = include_bytes!("../car-samples/little.car"); 10 9 const MIDSIZE_CAR: &'static [u8] = include_bytes!("../car-samples/midsize.car"); ··· 15 14 .build() 16 15 .expect("Creating runtime failed"); 17 16 17 + c.bench_function("empty-car", |b| { 18 + b.to_async(&rt).iter(async || drive_car(EMPTY_CAR).await) 19 + }); 18 20 c.bench_function("tiny-car", |b| { 19 21 b.to_async(&rt).iter(async || drive_car(TINY_CAR).await) 20 22 }); ··· 26 28 }); 27 29 } 28 30 29 - async fn drive_car(bytes: &[u8]) { 30 - let reader = CarReader::new(bytes).await.unwrap(); 31 - 32 - let root = reader 33 - .header() 34 - .roots() 35 - .first() 36 - .ok_or("missing root") 31 + async fn drive_car(bytes: &[u8]) -> usize { 32 + let mut driver = match Driver::load_car(bytes, |block| block.len(), 32) 33 + .await 37 34 .unwrap() 38 - .clone(); 39 - 40 - let stream = std::pin::pin!(reader.stream()); 35 + { 36 + Driver::Memory(_, mem_driver) => mem_driver, 37 + Driver::Disk(_) => panic!("not benching big cars here"), 38 + }; 41 39 42 - let (_commit, v) = 43 - repo_stream::drive::Vehicle::init(root, stream, |block| Ok::<_, Infallible>(block.len())) 44 - .await 45 - .unwrap(); 46 - let mut record_stream = std::pin::pin!(v.stream()); 47 - 48 - while let Some(_) = record_stream.try_next().await.unwrap() { 49 - // just here for the drive 40 + let mut n = 0; 41 + while let Some(pairs) = driver.next_chunk(256).await.unwrap() { 42 + n += pairs.len(); 50 43 } 44 + n 51 45 } 52 46 53 47 criterion_group!(benches, criterion_benchmark);
car-samples/empty.car

This is a binary file and will not be displayed.

+43
examples/disk-read-file/main.rs
··· 1 + use clap::Parser; 2 + use fjall::{Database, KeyspaceCreateOptions}; 3 + use std::{path::PathBuf, collections::BTreeMap}; 4 + 5 + #[derive(Debug, Parser)] 6 + struct Args { 7 + #[arg()] 8 + db_path: PathBuf, 9 + } 10 + 11 + fn main() -> Result<(), Box<dyn std::error::Error>> { 12 + let Args { db_path } = Args::parse(); 13 + 14 + let db = Database::builder(db_path).open()?; 15 + let ks = db.keyspace("z", KeyspaceCreateOptions::default)?; 16 + let mut seen_keys: BTreeMap<Vec<u8>, usize> = BTreeMap::default(); 17 + 18 + print!("writing..."); 19 + for i in 0..250_000_usize { 20 + let k = i.to_be_bytes().to_vec(); 21 + ks.insert(k.clone(), vec![0xAA; 256])?; 22 + seen_keys.insert(k, i); 23 + } 24 + 25 + println!(" done. checking keys..."); 26 + 27 + // remove every seen key that fjall actually has, to see what's left 28 + for guard in ks.iter() { 29 + seen_keys.remove(guard.key()?.as_ref()); 30 + } 31 + 32 + // report the result 33 + if seen_keys.len() == 0 { 34 + println!("[ OK ] all keys found"); 35 + } else { 36 + println!("[FAIL] fjall did not have all seen_keys:"); 37 + for (k, i) in seen_keys { 38 + println!(" insert #{i} missing, key bytes: {k:?}"); 39 + } 40 + } 41 + 42 + Ok(()) 43 + }
+18 -25
examples/read-file/main.rs
··· 1 + /*! 2 + Read a CAR file with in-memory processing 3 + */ 4 + 1 5 extern crate repo_stream; 2 6 use clap::Parser; 3 - use futures::TryStreamExt; 4 - use iroh_car::CarReader; 5 - use std::convert::Infallible; 7 + use repo_stream::{Driver, DriverBuilder}; 6 8 use std::path::PathBuf; 7 9 8 10 type Result<T> = std::result::Result<T, Box<dyn std::error::Error>>; ··· 21 23 let reader = tokio::fs::File::open(file).await?; 22 24 let reader = tokio::io::BufReader::new(reader); 23 25 24 - println!("hello!"); 25 - 26 - let reader = CarReader::new(reader).await?; 27 - 28 - let root = reader 29 - .header() 30 - .roots() 31 - .first() 32 - .ok_or("missing root")? 33 - .clone(); 34 - log::debug!("root: {root:?}"); 35 - 36 - // let stream = Box::pin(reader.stream()); 37 - let stream = std::pin::pin!(reader.stream()); 38 - 39 - let (commit, v) = 40 - repo_stream::drive::Vehicle::init(root, stream, |block| Ok::<_, Infallible>(block.len())) 41 - .await?; 42 - let mut record_stream = std::pin::pin!(v.stream()); 26 + let (commit, mut driver) = match DriverBuilder::new() 27 + .with_block_processor(|block| block.len()) 28 + .load_car(reader) 29 + .await? 30 + { 31 + Driver::Memory(commit, mem_driver) => (commit, mem_driver), 32 + Driver::Disk(_) => panic!("this example doesn't handle big CARs"), 33 + }; 43 34 44 35 log::info!("got commit: {commit:?}"); 45 36 46 - while let Some((rkey, _rec)) = record_stream.try_next().await? { 47 - log::info!("got {rkey:?}"); 37 + let mut n = 0; 38 + while let Some(pairs) = driver.next_chunk(256).await? { 39 + n += pairs.len(); 40 + // log::info!("got {rkey:?}"); 48 41 } 49 - log::info!("bye!"); 42 + log::info!("bye! total records={n}"); 50 43 51 44 Ok(()) 52 45 }
+103 -9
readme.md
··· 1 1 # repo-stream 2 2 3 - Fast and (aspirationally) robust atproto CAR file processing in rust 3 + A robust CAR file -> MST walker for atproto 4 + 5 + [![Crates.io][crates-badge]](https://crates.io/crates/repo-stream) 6 + [![Documentation][docs-badge]](https://docs.rs/repo-stream) 7 + [![Sponsor][sponsor-badge]](https://github.com/sponsors/uniphil) 8 + 9 + [crates-badge]: https://img.shields.io/crates/v/repo-stream.svg 10 + [docs-badge]: https://docs.rs/repo-stream/badge.svg 11 + [sponsor-badge]: https://img.shields.io/badge/at-microcosm-b820f9?labelColor=b820f9&logo=githubsponsors&logoColor=fff 12 + 13 + ```rust 14 + use repo_stream::{Driver, DriverBuilder, DriveError, DiskBuilder}; 15 + 16 + #[tokio::main] 17 + async fn main() -> Result<(), DriveError> { 18 + // repo-stream takes any AsyncRead as input, like a tokio::fs::File 19 + let reader = tokio::fs::File::open("repo.car".into()).await?; 20 + let reader = tokio::io::BufReader::new(reader); 21 + 22 + // example repo workload is simply counting the total record bytes 23 + let mut total_size = 0; 24 + 25 + match DriverBuilder::new() 26 + .with_mem_limit_mb(10) 27 + .with_block_processor(|rec| rec.len()) // block processing: just extract the raw record size 28 + .load_car(reader) 29 + .await? 30 + { 31 + 32 + // if all blocks fit within memory 33 + Driver::Memory(_commit, mut driver) => { 34 + while let Some(chunk) = driver.next_chunk(256).await? { 35 + for (_rkey, size) in chunk { 36 + total_size += size; 37 + } 38 + } 39 + }, 40 + 41 + // if the CAR was too big for in-memory processing 42 + Driver::Disk(paused) => { 43 + // set up a disk store we can spill to 44 + let store = DiskBuilder::new().open("some/path.db".into()).await?; 45 + // do the spilling, get back a (similar) driver 46 + let (_commit, mut driver) = paused.finish_loading(store).await?; 47 + 48 + while let Some(chunk) = driver.next_chunk(256).await? { 49 + for (_rkey, size) in chunk { 50 + total_size += size; 51 + } 52 + } 53 + 54 + // clean up the disk store (drop tables etc) 55 + driver.reset_store().await?; 56 + } 57 + }; 58 + println!("sum of size of all records: {total_size}"); 59 + Ok(()) 60 + } 61 + ``` 62 + 63 + more recent todo 64 + 65 + - [ ] get an *emtpy* car for the test suite 66 + - [x] implement a max size on disk limit 67 + 68 + 69 + ----- 70 + 71 + older stuff (to clean up): 72 + 73 + 74 + current car processing times (records processed into their length usize, phil's dev machine): 75 + 76 + - 128MiB CAR file: `347ms` 77 + - 5.0MiB: `6.1ms` 78 + - 279KiB: `139us` 79 + - 3.4KiB: `4.9us` 80 + 81 + 82 + running the huge-car benchmark 83 + 84 + - to avoid committing it to the repo, you have to pass it in through the env for now. 85 + 86 + ```bash 87 + HUGE_CAR=~/Downloads/did_plc_redacted.car cargo bench -- huge-car 88 + ``` 4 89 5 90 6 91 todo 7 92 8 - - [ ] car file test fixtures & validation tests 9 - - [ ] make sure we can get the did and signature out for verification 93 + - [x] car file test fixtures & validation tests 94 + - [x] make sure we can get the did and signature out for verification 95 + -> yeah the commit is returned from init 10 96 - [ ] spec compliance todos 11 - - [ ] assert that keys are ordered and fail if not 12 - - [ ] verify node mst depth from key (possibly pending [interop test fixes](https://github.com/bluesky-social/atproto-interop-tests/issues/5)) 97 + - [x] assert that keys are ordered and fail if not 98 + - [x] verify node mst depth from key (possibly pending [interop test fixes](https://github.com/bluesky-social/atproto-interop-tests/issues/5)) 13 99 - [ ] performance todos 14 - - [ ] consume the serialized nodes into a mutable efficient format 100 + - [x] consume the serialized nodes into a mutable efficient format 15 101 - [ ] maybe customize the deserialize impl to do that directly? 16 - - [ ] benchmark and profile 102 + - [x] benchmark and profile 17 103 - [ ] robustness todos 18 104 - [ ] swap the blocks hashmap for a BlockStore trait that can be dumped to redb 19 105 - [ ] maybe keep the redb function behind a feature flag? 20 106 - [ ] can we assert a max size for node blocks? 21 - - [ ] figure out why asserting the upper nibble of the fourth byte of a node fails fingerprinting 107 + - [x] figure out why asserting the upper nibble of the fourth byte of a node fails fingerprinting 108 + -> because it's the upper 3 bytes, not upper 4 byte nibble, oops. 22 109 - [ ] max mst depth (there is actually a hard limit but a malicious repo could do anything) 23 - - [ ] i don't think we need a max recursion depth for processing cbor contents since we leave records to the user to decode 110 + - [ ] i don't *think* we need a max recursion depth for processing cbor contents since we leave records to the user to decode 24 111 25 112 newer ideas 26 113 ··· 47 134 - either just generally to handle huge CARs, or as a fallback when streaming fails 48 135 49 136 redb has an in-memory backend, so it would be possible to *always* use it for block caching. user can choose if they want to allow disk or just do memory, and then "spilling" from the cache to disk would be mostly free? 137 + 138 + 139 + ## license 140 + 141 + This work is dual-licensed under MIT and Apache 2.0. You can choose between one of them if you use this work. 142 + 143 + `SPDX-License-Identifier: MIT OR Apache-2.0`
-176
src/drive.rs
··· 1 - use futures::{Stream, TryStreamExt}; 2 - use ipld_core::cid::Cid; 3 - use std::collections::HashMap; 4 - use std::error::Error; 5 - 6 - use crate::mst::{Commit, Node}; 7 - use crate::walk::{Step, Trip, Walker}; 8 - 9 - #[derive(Debug, thiserror::Error)] 10 - pub enum DriveError<E: Error> { 11 - #[error("Failed to initialize CarReader: {0}")] 12 - CarReader(#[from] iroh_car::Error), 13 - #[error("CAR file requires a root to be present")] 14 - MissingRoot, 15 - #[error("Car block stream error: {0}")] 16 - CarBlockError(Box<dyn Error>), 17 - #[error("Failed to decode commit block: {0}")] 18 - BadCommit(Box<dyn Error>), 19 - #[error("Failed to decode record block: {0}")] 20 - BadRecord(Box<dyn Error>), 21 - #[error("The Commit block reference by the root was not found")] 22 - MissingCommit, 23 - #[error("The MST block {0} could not be found")] 24 - MissingBlock(Cid), 25 - #[error("Failed to walk the mst tree: {0}")] 26 - Tripped(#[from] Trip<E>), 27 - #[error("Not finished walking, but no more blocks are available to continue")] 28 - Dnf, 29 - } 30 - 31 - type CarBlock<E> = Result<(Cid, Vec<u8>), E>; 32 - 33 - #[derive(Debug)] 34 - pub struct Rkey(pub String); 35 - 36 - #[derive(Debug)] 37 - pub enum MaybeProcessedBlock<T, E> { 38 - /// A block that's *probably* a Node (but we can't know yet) 39 - /// 40 - /// It *can be* a record that suspiciously looks a lot like a node, so we 41 - /// cannot eagerly turn it into a Node. We only know for sure what it is 42 - /// when we actually walk down the MST 43 - Raw(Vec<u8>), 44 - /// A processed record from a block that was definitely not a Node 45 - /// 46 - /// Processing has to be fallible because the CAR can have totally-unused 47 - /// blocks, which can just be garbage. since we're eagerly trying to process 48 - /// record blocks without knowing for sure that they *are* records, we 49 - /// discard any definitely-not-nodes that fail processing and keep their 50 - /// error in the buffer for them. if we later try to retreive them as a 51 - /// record, then we can surface the error. 52 - /// 53 - /// If we _never_ needed this block, then we may have wasted a bit of effort 54 - /// trying to process it. Oh well. 55 - /// 56 - /// It would be nice to store the real error type from the processing 57 - /// function, but I'm leaving that generics puzzle for later. 58 - /// 59 - /// There's an alternative here, which would be to kick unprocessable blocks 60 - /// back to Raw, or maybe even a new RawUnprocessable variant. Then we could 61 - /// surface the typed error later if needed by trying to reprocess. 62 - Processed(Result<T, E>), 63 - } 64 - 65 - // TODO: generic error not box dyn nonsense. 66 - pub type ProcRes<T, E> = Result<T, E>; 67 - 68 - pub struct Vehicle<SE, S, T, P, PE> 69 - where 70 - S: Stream<Item = CarBlock<SE>>, 71 - P: Fn(&[u8]) -> ProcRes<T, PE>, 72 - PE: Error, 73 - { 74 - block_stream: S, 75 - blocks: HashMap<Cid, MaybeProcessedBlock<T, PE>>, 76 - walker: Walker, 77 - process: P, 78 - } 79 - 80 - impl<SE, S, T: Clone, P, PE> Vehicle<SE, S, T, P, PE> 81 - where 82 - SE: Error + 'static, 83 - S: Stream<Item = CarBlock<SE>> + Unpin, 84 - P: Fn(&[u8]) -> ProcRes<T, PE>, 85 - PE: Error, 86 - { 87 - pub async fn init( 88 - root: Cid, 89 - mut block_stream: S, 90 - process: P, 91 - ) -> Result<(Commit, Self), DriveError<PE>> { 92 - let mut blocks = HashMap::new(); 93 - 94 - let mut commit = None; 95 - 96 - while let Some((cid, data)) = block_stream 97 - .try_next() 98 - .await 99 - .map_err(|e| DriveError::CarBlockError(e.into()))? 100 - { 101 - if cid == root { 102 - let c: Commit = serde_ipld_dagcbor::from_slice(&data) 103 - .map_err(|e| DriveError::BadCommit(e.into()))?; 104 - commit = Some(c); 105 - break; // inner while 106 - } else { 107 - blocks.insert( 108 - cid, 109 - if Node::could_be(&data) { 110 - MaybeProcessedBlock::Raw(data) 111 - } else { 112 - MaybeProcessedBlock::Processed(process(&data)) 113 - }, 114 - ); 115 - } 116 - } 117 - 118 - // we either broke out or read all the blocks without finding the commit... 119 - let commit = commit.ok_or(DriveError::MissingCommit)?; 120 - 121 - let walker = Walker::new(commit.data); 122 - 123 - let me = Self { 124 - block_stream, 125 - blocks, 126 - walker, 127 - process, 128 - }; 129 - Ok((commit, me)) 130 - } 131 - 132 - async fn drive_until(&mut self, cid_needed: Cid) -> Result<(), DriveError<PE>> { 133 - while let Some((cid, data)) = self 134 - .block_stream 135 - .try_next() 136 - .await 137 - .map_err(|e| DriveError::CarBlockError(e.into()))? 138 - { 139 - self.blocks.insert( 140 - cid, 141 - if Node::could_be(&data) { 142 - MaybeProcessedBlock::Raw(data) 143 - } else { 144 - MaybeProcessedBlock::Processed((self.process)(&data)) 145 - }, 146 - ); 147 - if cid == cid_needed { 148 - return Ok(()); 149 - } 150 - } 151 - 152 - // if we never found the block 153 - Err(DriveError::MissingBlock(cid_needed)) 154 - } 155 - 156 - pub async fn next_record(&mut self) -> Result<Option<(Rkey, T)>, DriveError<PE>> { 157 - loop { 158 - // walk as far as we can until we run out of blocks or find a record 159 - let cid_needed = match self.walker.walk(&mut self.blocks, &self.process)? { 160 - Step::Rest(cid) => cid, 161 - Step::Finish => return Ok(None), 162 - Step::Step { rkey, data } => return Ok(Some((Rkey(rkey), data))), 163 - }; 164 - 165 - // load blocks until we reach that cid 166 - self.drive_until(cid_needed).await?; 167 - } 168 - } 169 - 170 - pub fn stream(self) -> impl Stream<Item = Result<(Rkey, T), DriveError<PE>>> { 171 - futures::stream::try_unfold(self, |mut this| async move { 172 - let maybe_record = this.next_record().await?; 173 - Ok(maybe_record.map(|b| (b, this))) 174 - }) 175 - } 176 - }
+75 -3
src/lib.rs
··· 1 - pub mod drive; 2 - pub mod mst; 3 - pub mod walk; 1 + /*! 2 + A robust CAR file -> MST walker for atproto 3 + 4 + Small CARs have their blocks buffered in memory. If a configurable memory limit 5 + is reached while reading blocks, CAR reading is suspended, and can be continued 6 + by providing disk storage to buffer the CAR blocks instead. 7 + 8 + A `process` function can be provided for tasks where records are transformed 9 + into a smaller representation, to save memory (and disk) during block reading. 10 + 11 + Once blocks are loaded, the MST is walked and emitted as chunks of pairs of 12 + `(rkey, processed_block)` pairs, in order (depth first, left-to-right). 13 + 14 + Some MST validations are applied 15 + - Keys must appear in order 16 + - Keys must be at the correct MST tree depth 17 + 18 + `iroh_car` additionally applies a block size limit of `2MiB`. 19 + 20 + ``` 21 + use repo_stream::{Driver, DriverBuilder, DiskBuilder}; 22 + 23 + # #[tokio::main] 24 + # async fn main() -> Result<(), Box<dyn std::error::Error>> { 25 + # let reader = include_bytes!("../car-samples/tiny.car").as_slice(); 26 + let mut total_size = 0; 27 + 28 + match DriverBuilder::new() 29 + .with_mem_limit_mb(10) 30 + .with_block_processor(|rec| rec.len()) // block processing: just extract the raw record size 31 + .load_car(reader) 32 + .await? 33 + { 34 + 35 + // if all blocks fit within memory 36 + Driver::Memory(_commit, mut driver) => { 37 + while let Some(chunk) = driver.next_chunk(256).await? { 38 + for (_rkey, size) in chunk { 39 + total_size += size; 40 + } 41 + } 42 + }, 43 + 44 + // if the CAR was too big for in-memory processing 45 + Driver::Disk(paused) => { 46 + // set up a disk store we can spill to 47 + let store = DiskBuilder::new().open("some/path.db".into()).await?; 48 + // do the spilling, get back a (similar) driver 49 + let (_commit, mut driver) = paused.finish_loading(store).await?; 50 + 51 + while let Some(chunk) = driver.next_chunk(256).await? { 52 + for (_rkey, size) in chunk { 53 + total_size += size; 54 + } 55 + } 56 + 57 + // clean up the disk store (drop tables etc) 58 + driver.reset_store().await?; 59 + } 60 + }; 61 + println!("sum of size of all records: {total_size}"); 62 + # Ok(()) 63 + # } 64 + ``` 65 + 66 + Disk spilling suspends and returns a `Driver::Disk(paused)` instead of going 67 + ahead and eagerly using disk I/O. This means you have to write a bit more code 68 + to handle both cases, but it allows you to have finer control over resource 69 + usage. For example, you can drive a number of parallel memory CAR workers, and 70 + separately have a different number of disk workers picking up suspended disk 71 + tasks from a queue. 72 + 73 + Find more [examples in the repo](https://tangled.org/@microcosm.blue/repo-stream/tree/main/examples). 74 + 75 + */
-111
src/mst.rs
··· 1 - //! Low-level types for parsing raw atproto MST CARs 2 - //! 3 - //! The primary aim is to work through the **tree** structure. Non-node blocks 4 - //! are left as raw bytes, for upper levels to parse into DAG-CBOR or whatever. 5 - 6 - use ipld_core::cid::Cid; 7 - use serde::Deserialize; 8 - 9 - /// The top-level data object in a repository's tree is a signed commit. 10 - #[derive(Debug, Deserialize)] 11 - // #[serde(deny_unknown_fields)] 12 - pub struct Commit { 13 - /// the account DID associated with the repo, in strictly normalized form 14 - /// (eg, lowercase as appropriate) 15 - pub did: String, 16 - /// fixed value of 3 for this repo format version 17 - pub version: u64, 18 - /// pointer to the top of the repo contents tree structure (MST) 19 - pub data: Cid, 20 - /// revision of the repo, used as a logical clock. 21 - /// 22 - /// TID format. Must increase monotonically. Recommend using current 23 - /// timestamp as TID; rev values in the "future" (beyond a fudge factor) 24 - /// should be ignored and not processed 25 - pub rev: String, 26 - /// pointer (by hash) to a previous commit object for this repository. 27 - /// 28 - /// Could be used to create a chain of history, but largely unused (included 29 - /// for v2 backwards compatibility). In version 3 repos, this field must 30 - /// exist in the CBOR object, but is virtually always null. NOTE: previously 31 - /// specified as nullable and optional, but this caused interoperability 32 - /// issues. 33 - pub prev: Option<Cid>, 34 - /// cryptographic signature of this commit, as raw bytes 35 - #[serde(with = "serde_bytes")] 36 - pub sig: Vec<u8>, 37 - } 38 - 39 - /// MST node data schema 40 - #[derive(Debug, Deserialize, PartialEq)] 41 - #[serde(deny_unknown_fields)] 42 - pub struct Node { 43 - /// link to sub-tree Node on a lower level and with all keys sorting before 44 - /// keys at this node 45 - #[serde(rename = "l")] 46 - pub left: Option<Cid>, 47 - /// ordered list of TreeEntry objects 48 - /// 49 - /// atproto MSTs have a fanout of 4, so there can be max 4 entries. 50 - #[serde(rename = "e")] 51 - pub entries: Vec<Entry>, // maybe we can do [Option<Entry>; 4]? 52 - } 53 - 54 - impl Node { 55 - /// test if a block could possibly be a node 56 - /// 57 - /// we can't eagerly decode records except where we're *sure* they cannot be 58 - /// an mst node (and even then we can only attempt) because you can't know 59 - /// with certainty what a block is supposed to be without actually walking 60 - /// the tree. 61 - /// 62 - /// so if a block *could be* a node, any record converter must postpone 63 - /// processing. if it turns out it happens to be a very node-looking record, 64 - /// well, sorry, it just has to only be processed later when that's known. 65 - pub fn could_be(bytes: impl AsRef<[u8]>) -> bool { 66 - const NODE_FINGERPRINT: [u8; 3] = [ 67 - 0xA2, // map length 2 (for "l" and "e" keys) 68 - 0x61, // text length 1 69 - b'e', // "e" before "l" because map keys have to be lex-sorted 70 - // 0x8?: "e" contains an array (0x8 nibble) of some length (low nib) 71 - ]; 72 - let bytes = bytes.as_ref(); 73 - bytes.starts_with(&NODE_FINGERPRINT) 74 - // && bytes.get(3).map(|b| b & 0xF0 == 0x80).unwrap_or(false) 75 - } 76 - 77 - /// Check if a node has any entries 78 - /// 79 - /// An empty repository with no records is represented as a single MST node 80 - /// with an empty array of entries. This is the only situation in which a 81 - /// tree may contain an empty leaf node which does not either contain keys 82 - /// ("entries") or point to a sub-tree containing entries. 83 - /// 84 - /// TODO: to me this is slightly unclear with respect to `l` (ask someone). 85 - /// ...is that what "The top of the tree must not be a an empty node which 86 - /// only points to a sub-tree." is referring to? 87 - pub fn is_empty(&self) -> bool { 88 - self.left.is_none() && self.entries.is_empty() 89 - } 90 - } 91 - 92 - /// TreeEntry object 93 - #[derive(Debug, Deserialize, PartialEq)] 94 - #[serde(deny_unknown_fields)] 95 - pub struct Entry { 96 - /// count of bytes shared with previous TreeEntry in this Node (if any) 97 - #[serde(rename = "p")] 98 - pub prefix_len: usize, 99 - /// remainder of key for this TreeEntry, after "prefixlen" have been removed 100 - #[serde(rename = "k", with = "serde_bytes")] 101 - pub keysuffix: Vec<u8>, // can we String this here? 102 - /// link to the record data (CBOR) for this entry 103 - #[serde(rename = "v")] 104 - pub value: Cid, 105 - /// link to a sub-tree Node at a lower level 106 - /// 107 - /// the lower level must have keys sorting after this TreeEntry's key (to 108 - /// the "right"), but before the next TreeEntry's key in this Node (if any) 109 - #[serde(rename = "t")] 110 - pub tree: Option<Cid>, 111 - }
-383
src/walk.rs
··· 1 - //! Depth-first MST traversal 2 - 3 - use crate::drive::{MaybeProcessedBlock, ProcRes}; 4 - use crate::mst::Node; 5 - use ipld_core::cid::Cid; 6 - use std::collections::HashMap; 7 - use std::error::Error; 8 - 9 - #[derive(Debug, thiserror::Error)] 10 - pub enum Trip<E: Error> { 11 - #[error("empty mst nodes are not allowed")] 12 - NodeEmpty, 13 - #[error("Failed to decode commit block: {0}")] 14 - BadCommit(Box<dyn std::error::Error>), 15 - #[error("Failed to process record: {0}")] 16 - RecordFailedProcessing(Box<dyn Error>), 17 - #[error("Action node error: {0}")] 18 - ActionNode(#[from] ActionNodeError), 19 - #[error("Process failed: {0}")] 20 - ProcessFailed(E), 21 - } 22 - 23 - #[derive(Debug, thiserror::Error)] 24 - pub enum ActionNodeError { 25 - #[error("Failed to compute an rkey due to invalid prefix_len")] 26 - EntryPrefixOutOfbounds, 27 - #[error("RKey was not utf-8")] 28 - EntryRkeyNotUtf8(#[from] std::string::FromUtf8Error), 29 - } 30 - 31 - #[derive(Debug)] 32 - pub enum Step<T> { 33 - Rest(Cid), 34 - Finish, 35 - Step { rkey: String, data: T }, 36 - } 37 - 38 - #[derive(Debug, Clone, PartialEq)] 39 - enum Need { 40 - Node(Cid), 41 - Record { rkey: String, cid: Cid }, 42 - } 43 - 44 - fn push_from_node(stack: &mut Vec<Need>, node: &Node) -> Result<(), ActionNodeError> { 45 - let mut entries = Vec::with_capacity(node.entries.len()); 46 - 47 - let mut prefix = vec![]; 48 - for entry in &node.entries { 49 - let mut rkey = vec![]; 50 - let pre_checked = prefix 51 - .get(..entry.prefix_len) 52 - .ok_or(ActionNodeError::EntryPrefixOutOfbounds)?; 53 - rkey.extend_from_slice(pre_checked); 54 - rkey.extend_from_slice(&entry.keysuffix); 55 - prefix = rkey.clone(); 56 - 57 - entries.push(Need::Record { 58 - rkey: String::from_utf8(rkey)?, 59 - cid: entry.value, 60 - }); 61 - if let Some(ref tree) = entry.tree { 62 - entries.push(Need::Node(*tree)); 63 - } 64 - } 65 - 66 - entries.reverse(); 67 - stack.append(&mut entries); 68 - 69 - if let Some(tree) = node.left { 70 - stack.push(Need::Node(tree)); 71 - } 72 - Ok(()) 73 - } 74 - 75 - #[derive(Debug)] 76 - pub struct Walker { 77 - stack: Vec<Need>, 78 - } 79 - 80 - impl Walker { 81 - pub fn new(tree_root_cid: Cid) -> Self { 82 - Self { 83 - stack: vec![Need::Node(tree_root_cid)], 84 - } 85 - } 86 - 87 - pub fn walk<T: Clone, E: Error>( 88 - &mut self, 89 - blocks: &mut HashMap<Cid, MaybeProcessedBlock<T, E>>, 90 - process: impl Fn(&[u8]) -> ProcRes<T, E>, 91 - ) -> Result<Step<T>, Trip<E>> { 92 - loop { 93 - let Some(mut need) = self.stack.last() else { 94 - log::trace!("tried to walk but we're actually done."); 95 - return Ok(Step::Finish); 96 - }; 97 - 98 - match &mut need { 99 - Need::Node(cid) => { 100 - log::trace!("need node {cid:?}"); 101 - let Some(block) = blocks.remove(cid) else { 102 - log::trace!("node not found, resting"); 103 - return Ok(Step::Rest(*cid)); 104 - }; 105 - 106 - let MaybeProcessedBlock::Raw(data) = block else { 107 - return Err(Trip::BadCommit("failed commit fingerprint".into())); 108 - }; 109 - let node = serde_ipld_dagcbor::from_slice::<Node>(&data) 110 - .map_err(|e| Trip::BadCommit(e.into()))?; 111 - 112 - // found node, make sure we remember 113 - self.stack.pop(); 114 - 115 - // queue up work on the found node next 116 - push_from_node(&mut self.stack, &node)?; 117 - } 118 - Need::Record { rkey, cid } => { 119 - log::trace!("need record {cid:?}"); 120 - let Some(data) = blocks.get_mut(cid) else { 121 - log::trace!("record block not found, resting"); 122 - return Ok(Step::Rest(*cid)); 123 - }; 124 - let rkey = rkey.clone(); 125 - let data = match data { 126 - MaybeProcessedBlock::Raw(data) => process(data), 127 - MaybeProcessedBlock::Processed(Ok(t)) => Ok(t.clone()), 128 - bad => { 129 - // big hack to pull the error out -- this corrupts 130 - // a block, so we should not continue trying to work 131 - let mut steal = MaybeProcessedBlock::Raw(vec![]); 132 - std::mem::swap(&mut steal, bad); 133 - let MaybeProcessedBlock::Processed(Err(e)) = steal else { 134 - unreachable!(); 135 - }; 136 - return Err(Trip::ProcessFailed(e)); 137 - } 138 - }; 139 - 140 - // found node, make sure we remember 141 - self.stack.pop(); 142 - 143 - log::trace!("emitting a block as a step. depth={}", self.stack.len()); 144 - let data = data.map_err(Trip::ProcessFailed)?; 145 - return Ok(Step::Step { rkey, data }); 146 - } 147 - } 148 - } 149 - } 150 - } 151 - 152 - #[cfg(test)] 153 - mod test { 154 - use super::*; 155 - // use crate::mst::Entry; 156 - 157 - fn cid1() -> Cid { 158 - "bafyreihixenvk3ahqbytas4hk4a26w43bh6eo3w6usjqtxkpzsvi655a3m" 159 - .parse() 160 - .unwrap() 161 - } 162 - // fn cid2() -> Cid { 163 - // "QmY7Yh4UquoXHLPFo2XbhXkhBvFoPwmQUSa92pxnxjQuPU" 164 - // .parse() 165 - // .unwrap() 166 - // } 167 - // fn cid3() -> Cid { 168 - // "bafybeigdyrzt5sfp7udm7hu76uh7y26nf3efuylqabf3oclgtqy55fbzdi" 169 - // .parse() 170 - // .unwrap() 171 - // } 172 - // fn cid4() -> Cid { 173 - // "QmbWqxBEKC3P8tqsKc98xmWNzrzDtRLMiMPL8wBuTGsMnR" 174 - // .parse() 175 - // .unwrap() 176 - // } 177 - // fn cid5() -> Cid { 178 - // "QmSnuWmxptJZdLJpKRarxBMS2Ju2oANVrgbr2xWbie9b2D" 179 - // .parse() 180 - // .unwrap() 181 - // } 182 - // fn cid6() -> Cid { 183 - // "QmdmQXB2mzChmMeKY47C43LxUdg1NDJ5MWcKMKxDu7RgQm" 184 - // .parse() 185 - // .unwrap() 186 - // } 187 - // fn cid7() -> Cid { 188 - // "bafybeiaysi4s6lnjev27ln5icwm6tueaw2vdykrtjkwiphwekaywqhcjze" 189 - // .parse() 190 - // .unwrap() 191 - // } 192 - // fn cid8() -> Cid { 193 - // "bafyreif3tfdpr5n4jdrbielmcapwvbpcthepfkwq2vwonmlhirbjmotedi" 194 - // .parse() 195 - // .unwrap() 196 - // } 197 - // fn cid9() -> Cid { 198 - // "bafyreicnokmhmrnlp2wjhyk2haep4tqxiptwfrp2rrs7rzq7uk766chqvq" 199 - // .parse() 200 - // .unwrap() 201 - // } 202 - 203 - #[test] 204 - fn test_next_from_node_empty() { 205 - let node = Node { 206 - left: None, 207 - entries: vec![], 208 - }; 209 - let mut stack = vec![]; 210 - push_from_node(&mut stack, &node).unwrap(); 211 - assert_eq!(stack.last(), None); 212 - } 213 - 214 - #[test] 215 - fn test_needs_from_node_just_left() { 216 - let node = Node { 217 - left: Some(cid1()), 218 - entries: vec![], 219 - }; 220 - let mut stack = vec![]; 221 - push_from_node(&mut stack, &node).unwrap(); 222 - assert_eq!(stack.last(), Some(Need::Node(cid1())).as_ref()); 223 - } 224 - 225 - // #[test] 226 - // fn test_needs_from_node_just_one_record() { 227 - // let node = Node { 228 - // left: None, 229 - // entries: vec![Entry { 230 - // keysuffix: "asdf".into(), 231 - // prefix_len: 0, 232 - // value: cid1(), 233 - // tree: None, 234 - // }], 235 - // }; 236 - // assert_eq!( 237 - // needs_from_node(node).unwrap(), 238 - // vec![Need::Record { 239 - // rkey: "asdf".into(), 240 - // cid: cid1(), 241 - // },] 242 - // ); 243 - // } 244 - 245 - // #[test] 246 - // fn test_needs_from_node_two_records() { 247 - // let node = Node { 248 - // left: None, 249 - // entries: vec![ 250 - // Entry { 251 - // keysuffix: "asdf".into(), 252 - // prefix_len: 0, 253 - // value: cid1(), 254 - // tree: None, 255 - // }, 256 - // Entry { 257 - // keysuffix: "gh".into(), 258 - // prefix_len: 2, 259 - // value: cid2(), 260 - // tree: None, 261 - // }, 262 - // ], 263 - // }; 264 - // assert_eq!( 265 - // needs_from_node(node).unwrap(), 266 - // vec![ 267 - // Need::Record { 268 - // rkey: "asdf".into(), 269 - // cid: cid1(), 270 - // }, 271 - // Need::Record { 272 - // rkey: "asgh".into(), 273 - // cid: cid2(), 274 - // }, 275 - // ] 276 - // ); 277 - // } 278 - 279 - // #[test] 280 - // fn test_needs_from_node_with_both() { 281 - // let node = Node { 282 - // left: None, 283 - // entries: vec![Entry { 284 - // keysuffix: "asdf".into(), 285 - // prefix_len: 0, 286 - // value: cid1(), 287 - // tree: Some(cid2()), 288 - // }], 289 - // }; 290 - // assert_eq!( 291 - // needs_from_node(node).unwrap(), 292 - // vec![ 293 - // Need::Record { 294 - // rkey: "asdf".into(), 295 - // cid: cid1(), 296 - // }, 297 - // Need::Node(cid2()), 298 - // ] 299 - // ); 300 - // } 301 - 302 - // #[test] 303 - // fn test_needs_from_node_left_and_record() { 304 - // let node = Node { 305 - // left: Some(cid1()), 306 - // entries: vec![Entry { 307 - // keysuffix: "asdf".into(), 308 - // prefix_len: 0, 309 - // value: cid2(), 310 - // tree: None, 311 - // }], 312 - // }; 313 - // assert_eq!( 314 - // needs_from_node(node).unwrap(), 315 - // vec![ 316 - // Need::Node(cid1()), 317 - // Need::Record { 318 - // rkey: "asdf".into(), 319 - // cid: cid2(), 320 - // }, 321 - // ] 322 - // ); 323 - // } 324 - 325 - // #[test] 326 - // fn test_needs_from_full_node() { 327 - // let node = Node { 328 - // left: Some(cid1()), 329 - // entries: vec![ 330 - // Entry { 331 - // keysuffix: "asdf".into(), 332 - // prefix_len: 0, 333 - // value: cid2(), 334 - // tree: Some(cid3()), 335 - // }, 336 - // Entry { 337 - // keysuffix: "ghi".into(), 338 - // prefix_len: 1, 339 - // value: cid4(), 340 - // tree: Some(cid5()), 341 - // }, 342 - // Entry { 343 - // keysuffix: "jkl".into(), 344 - // prefix_len: 2, 345 - // value: cid6(), 346 - // tree: Some(cid7()), 347 - // }, 348 - // Entry { 349 - // keysuffix: "mno".into(), 350 - // prefix_len: 4, 351 - // value: cid8(), 352 - // tree: Some(cid9()), 353 - // }, 354 - // ], 355 - // }; 356 - // assert_eq!( 357 - // needs_from_node(node).unwrap(), 358 - // vec![ 359 - // Need::Node(cid1()), 360 - // Need::Record { 361 - // rkey: "asdf".into(), 362 - // cid: cid2(), 363 - // }, 364 - // Need::Node(cid3()), 365 - // Need::Record { 366 - // rkey: "aghi".into(), 367 - // cid: cid4(), 368 - // }, 369 - // Need::Node(cid5()), 370 - // Need::Record { 371 - // rkey: "agjkl".into(), 372 - // cid: cid6(), 373 - // }, 374 - // Need::Node(cid7()), 375 - // Need::Record { 376 - // rkey: "agjkmno".into(), 377 - // cid: cid8(), 378 - // }, 379 - // Need::Node(cid9()), 380 - // ] 381 - // ); 382 - // } 383 - }
+34 -31
tests/non-huge-cars.rs
··· 1 1 extern crate repo_stream; 2 - use futures::TryStreamExt; 3 - use iroh_car::CarReader; 4 - use std::convert::Infallible; 2 + use repo_stream::Driver; 5 3 4 + const EMPTY_CAR: &'static [u8] = include_bytes!("../car-samples/empty.car"); 6 5 const TINY_CAR: &'static [u8] = include_bytes!("../car-samples/tiny.car"); 7 6 const LITTLE_CAR: &'static [u8] = include_bytes!("../car-samples/little.car"); 8 7 const MIDSIZE_CAR: &'static [u8] = include_bytes!("../car-samples/midsize.car"); 9 8 10 - async fn test_car(bytes: &[u8], expected_records: usize, expected_sum: usize) { 11 - let reader = CarReader::new(bytes).await.unwrap(); 12 - 13 - let root = reader 14 - .header() 15 - .roots() 16 - .first() 17 - .ok_or("missing root") 9 + async fn test_car( 10 + bytes: &[u8], 11 + expected_records: usize, 12 + expected_sum: usize, 13 + expect_profile: bool, 14 + ) { 15 + let mut driver = match Driver::load_car(bytes, |block| block.len(), 10 /* MiB */) 16 + .await 18 17 .unwrap() 19 - .clone(); 20 - 21 - let stream = std::pin::pin!(reader.stream()); 22 - 23 - let (_commit, v) = 24 - repo_stream::drive::Vehicle::init(root, stream, |block| Ok::<_, Infallible>(block.len())) 25 - .await 26 - .unwrap(); 27 - let mut record_stream = std::pin::pin!(v.stream()); 18 + { 19 + Driver::Memory(_commit, mem_driver) => mem_driver, 20 + Driver::Disk(_) => panic!("too big"), 21 + }; 28 22 29 23 let mut records = 0; 30 24 let mut sum = 0; 31 25 let mut found_bsky_profile = false; 32 26 let mut prev_rkey = "".to_string(); 33 - while let Some((rkey, size)) = record_stream.try_next().await.unwrap() { 34 - records += 1; 35 - sum += size; 36 - if rkey.0 == "app.bsky.actor.profile/self" { 37 - found_bsky_profile = true; 27 + 28 + while let Some(pairs) = driver.next_chunk(256).await.unwrap() { 29 + for (rkey, size) in pairs { 30 + records += 1; 31 + sum += size; 32 + if rkey == "app.bsky.actor.profile/self" { 33 + found_bsky_profile = true; 34 + } 35 + assert!(rkey > prev_rkey, "rkeys are streamed in order"); 36 + prev_rkey = rkey; 38 37 } 39 - assert!(rkey.0 > prev_rkey, "rkeys are streamed in order"); 40 - prev_rkey = rkey.0; 41 38 } 39 + 42 40 assert_eq!(records, expected_records); 43 41 assert_eq!(sum, expected_sum); 44 - assert!(found_bsky_profile); 42 + assert_eq!(found_bsky_profile, expect_profile); 43 + } 44 + 45 + #[tokio::test] 46 + async fn test_empty_car() { 47 + test_car(EMPTY_CAR, 0, 0, false).await 45 48 } 46 49 47 50 #[tokio::test] 48 51 async fn test_tiny_car() { 49 - test_car(TINY_CAR, 8, 2071).await 52 + test_car(TINY_CAR, 8, 2071, true).await 50 53 } 51 54 52 55 #[tokio::test] 53 56 async fn test_little_car() { 54 - test_car(LITTLE_CAR, 278, 246960).await 57 + test_car(LITTLE_CAR, 278, 246960, true).await 55 58 } 56 59 57 60 #[tokio::test] 58 61 async fn test_midsize_car() { 59 - test_car(MIDSIZE_CAR, 11585, 3741393).await 62 + test_car(MIDSIZE_CAR, 11585, 3741393, true).await 60 63 }