Fast and robust atproto CAR file processing in rust

note some sketchy timing data

Changed files
+73 -2
benches
+4
Cargo.toml
··· 30 30 [[bench]] 31 31 name = "non-huge-cars" 32 32 harness = false 33 + 34 + [[bench]] 35 + name = "huge-car" 36 + harness = false
+50
benches/huge-car.rs
··· 1 + extern crate repo_stream; 2 + use futures::TryStreamExt; 3 + use iroh_car::CarReader; 4 + use std::convert::Infallible; 5 + use std::path::{Path, PathBuf}; 6 + 7 + use criterion::{Criterion, criterion_group, criterion_main}; 8 + 9 + pub fn criterion_benchmark(c: &mut Criterion) { 10 + let rt = tokio::runtime::Builder::new_multi_thread() 11 + .enable_all() 12 + .build() 13 + .expect("Creating runtime failed"); 14 + 15 + let filename = std::env::var("HUGE_CAR").expect("HUGE_CAR env var"); 16 + let filename: PathBuf = filename.try_into().unwrap(); 17 + 18 + c.bench_function("huge-car", |b| { 19 + b.to_async(&rt).iter(async || drive_car(&filename).await) 20 + }); 21 + } 22 + 23 + async fn drive_car(filename: impl AsRef<Path>) { 24 + let reader = tokio::fs::File::open(filename).await.unwrap(); 25 + let reader = tokio::io::BufReader::new(reader); 26 + let reader = CarReader::new(reader).await.unwrap(); 27 + 28 + let root = reader 29 + .header() 30 + .roots() 31 + .first() 32 + .ok_or("missing root") 33 + .unwrap() 34 + .clone(); 35 + 36 + let stream = std::pin::pin!(reader.stream()); 37 + 38 + let (_commit, v) = 39 + repo_stream::drive::Vehicle::init(root, stream, |block| Ok::<_, Infallible>(block.len())) 40 + .await 41 + .unwrap(); 42 + let mut record_stream = std::pin::pin!(v.stream()); 43 + 44 + while let Some(_) = record_stream.try_next().await.unwrap() { 45 + // just here for the drive 46 + } 47 + } 48 + 49 + criterion_group!(benches, criterion_benchmark); 50 + criterion_main!(benches);
+19 -2
readme.md
··· 3 3 Fast and (aspirationally) robust atproto CAR file processing in rust 4 4 5 5 6 + current car processing times (records processed into their length usize, phil's dev machine): 7 + 8 + - 128MiB CAR file: `347ms` 9 + - 5.0MiB: `6.1ms` 10 + - 279KiB: `139us` 11 + - 3.4KiB: `4.9us` 12 + 13 + 14 + running the huge-car benchmark 15 + 16 + - to avoid committing it to the repo, you have to pass it in through the env for now. 17 + 18 + ```bash 19 + HUGE_CAR=~/Downloads/did_plc_redacted.car cargo bench -- huge-car 20 + ``` 21 + 22 + 6 23 todo 7 24 8 - - [ ] car file test fixtures & validation tests 25 + - [x] car file test fixtures & validation tests 9 26 - [ ] make sure we can get the did and signature out for verification 10 27 - [ ] spec compliance todos 11 28 - [ ] assert that keys are ordered and fail if not ··· 13 30 - [ ] performance todos 14 31 - [ ] consume the serialized nodes into a mutable efficient format 15 32 - [ ] maybe customize the deserialize impl to do that directly? 16 - - [ ] benchmark and profile 33 + - [x] benchmark and profile 17 34 - [ ] robustness todos 18 35 - [ ] swap the blocks hashmap for a BlockStore trait that can be dumped to redb 19 36 - [ ] maybe keep the redb function behind a feature flag?