Fast and robust atproto CAR file processing in rust

fix benches

Changed files
+47 -43
benches
+24 -21
benches/huge-car.rs
··· 1 extern crate repo_stream; 2 - use futures::TryStreamExt; 3 - use iroh_car::CarReader; 4 - use std::convert::Infallible; 5 use std::path::{Path, PathBuf}; 6 7 use criterion::{Criterion, criterion_group, criterion_main}; 8 9 pub fn criterion_benchmark(c: &mut Criterion) { 10 let rt = tokio::runtime::Builder::new_multi_thread() 11 .enable_all() ··· 20 }); 21 } 22 23 - async fn drive_car(filename: impl AsRef<Path>) { 24 let reader = tokio::fs::File::open(filename).await.unwrap(); 25 let reader = tokio::io::BufReader::new(reader); 26 - let reader = CarReader::new(reader).await.unwrap(); 27 28 - let root = reader 29 - .header() 30 - .roots() 31 - .first() 32 - .ok_or("missing root") 33 .unwrap() 34 - .clone(); 35 - 36 - let stream = std::pin::pin!(reader.stream()); 37 - 38 - let (_commit, v) = 39 - repo_stream::drive::Vehicle::init(root, stream, |block| Ok::<_, Infallible>(block.len())) 40 - .await 41 - .unwrap(); 42 - let mut record_stream = std::pin::pin!(v.stream()); 43 44 - while let Some(_) = record_stream.try_next().await.unwrap() { 45 - // just here for the drive 46 } 47 } 48 49 criterion_group!(benches, criterion_benchmark);
··· 1 extern crate repo_stream; 2 + use repo_stream::drive::Processable; 3 + use serde::{Deserialize, Serialize}; 4 use std::path::{Path, PathBuf}; 5 6 use criterion::{Criterion, criterion_group, criterion_main}; 7 8 + #[derive(Clone, Serialize, Deserialize)] 9 + struct S(usize); 10 + 11 + impl Processable for S { 12 + fn get_size(&self) -> usize { 13 + 0 // no additional space taken, just its stack size (newtype is free) 14 + } 15 + } 16 + 17 pub fn criterion_benchmark(c: &mut Criterion) { 18 let rt = tokio::runtime::Builder::new_multi_thread() 19 .enable_all() ··· 28 }); 29 } 30 31 + async fn drive_car(filename: impl AsRef<Path>) -> usize { 32 let reader = tokio::fs::File::open(filename).await.unwrap(); 33 let reader = tokio::io::BufReader::new(reader); 34 + 35 + let mb = 2_usize.pow(20); 36 37 + let mut driver = match repo_stream::drive::load_car(reader, |block| S(block.len()), 1024 * mb) 38 + .await 39 .unwrap() 40 + { 41 + repo_stream::drive::Vehicle::Lil(_, mem_driver) => mem_driver, 42 + repo_stream::drive::Vehicle::Big(_) => panic!("not doing disk for benchmark"), 43 + }; 44 45 + let mut n = 0; 46 + while let Some(pairs) = driver.next_chunk(256).await.unwrap() { 47 + n += pairs.len(); 48 } 49 + n 50 } 51 52 criterion_group!(benches, criterion_benchmark);
+23 -22
benches/non-huge-cars.rs
··· 1 extern crate repo_stream; 2 - use futures::TryStreamExt; 3 - use iroh_car::CarReader; 4 - use std::convert::Infallible; 5 6 use criterion::{Criterion, criterion_group, criterion_main}; 7 8 const TINY_CAR: &'static [u8] = include_bytes!("../car-samples/tiny.car"); 9 const LITTLE_CAR: &'static [u8] = include_bytes!("../car-samples/little.car"); 10 const MIDSIZE_CAR: &'static [u8] = include_bytes!("../car-samples/midsize.car"); 11 12 pub fn criterion_benchmark(c: &mut Criterion) { 13 let rt = tokio::runtime::Builder::new_multi_thread() 14 .enable_all() ··· 26 }); 27 } 28 29 - async fn drive_car(bytes: &[u8]) { 30 - let reader = CarReader::new(bytes).await.unwrap(); 31 - 32 - let root = reader 33 - .header() 34 - .roots() 35 - .first() 36 - .ok_or("missing root") 37 - .unwrap() 38 - .clone(); 39 - 40 - let stream = std::pin::pin!(reader.stream()); 41 - 42 - let (_commit, v) = 43 - repo_stream::drive::Vehicle::init(root, stream, |block| Ok::<_, Infallible>(block.len())) 44 .await 45 - .unwrap(); 46 - let mut record_stream = std::pin::pin!(v.stream()); 47 48 - while let Some(_) = record_stream.try_next().await.unwrap() { 49 - // just here for the drive 50 } 51 } 52 53 criterion_group!(benches, criterion_benchmark);
··· 1 extern crate repo_stream; 2 3 use criterion::{Criterion, criterion_group, criterion_main}; 4 + use repo_stream::drive::Processable; 5 + use serde::{Deserialize, Serialize}; 6 7 const TINY_CAR: &'static [u8] = include_bytes!("../car-samples/tiny.car"); 8 const LITTLE_CAR: &'static [u8] = include_bytes!("../car-samples/little.car"); 9 const MIDSIZE_CAR: &'static [u8] = include_bytes!("../car-samples/midsize.car"); 10 11 + #[derive(Clone, Serialize, Deserialize)] 12 + struct S(usize); 13 + 14 + impl Processable for S { 15 + fn get_size(&self) -> usize { 16 + 0 // no additional space taken, just its stack size (newtype is free) 17 + } 18 + } 19 + 20 pub fn criterion_benchmark(c: &mut Criterion) { 21 let rt = tokio::runtime::Builder::new_multi_thread() 22 .enable_all() ··· 34 }); 35 } 36 37 + async fn drive_car(bytes: &[u8]) -> usize { 38 + let mut driver = 39 + match repo_stream::drive::load_car(bytes, |block| S(block.len()), 32 * 2_usize.pow(20)) 40 .await 41 + .unwrap() 42 + { 43 + repo_stream::drive::Vehicle::Lil(_, mem_driver) => mem_driver, 44 + repo_stream::drive::Vehicle::Big(_) => panic!("not benching big cars here"), 45 + }; 46 47 + let mut n = 0; 48 + while let Some(pairs) = driver.next_chunk(256).await.unwrap() { 49 + n += pairs.len(); 50 } 51 + n 52 } 53 54 criterion_group!(benches, criterion_benchmark);