Fast and robust atproto CAR file processing in rust

slightly simplify disk channel interface

Changed files
+93 -86
examples
disk-read-file
src
+4 -7
examples/disk-read-file/main.rs
··· 38 38 39 39 let mut n = 0; 40 40 let mut zeros = 0; 41 - let (mut rx, worker) = driver.rx(512).await?; 41 + let mut rx = driver.to_channel(512); 42 42 43 43 log::debug!("walking..."); 44 - while let Some(pairs) = rx.recv().await { 44 + while let Some(r) = rx.recv().await { 45 + let pairs = r?; 45 46 n += pairs.len(); 46 47 for (_, block) in pairs { 47 48 zeros += block.into_iter().filter(|&b| b == b'0').count() 48 49 } 49 50 } 50 - log::debug!("done walking! joining..."); 51 - 52 - worker.await.unwrap().unwrap(); 53 - 54 - log::debug!("joined."); 51 + log::debug!("done walking!"); 55 52 56 53 // log::info!("now is the time to check mem..."); 57 54 // tokio::time::sleep(std::time::Duration::from_secs(22)).await;
+89 -79
src/drive.rs
··· 7 7 use serde::{Deserialize, Serialize}; 8 8 use std::collections::HashMap; 9 9 use std::convert::Infallible; 10 - use tokio::io::AsyncRead; 10 + use tokio::{io::AsyncRead, sync::mpsc}; 11 11 12 12 use crate::mst::{Commit, Node}; 13 13 use crate::walk::{Step, WalkError, Walker}; ··· 44 44 #[error("extra bytes remained after decoding")] 45 45 ExtraGarbage, 46 46 } 47 + 48 + pub type BlockChunk<T> = Vec<(String, T)>; 47 49 48 50 #[derive(Debug, Clone, Serialize, Deserialize)] 49 51 pub enum MaybeProcessedBlock<T> { ··· 161 163 } 162 164 } 163 165 166 + /// The core driver between the block stream and MST walker 167 + /// 168 + /// In the future, PDSs will export CARs in a stream-friendly order that will 169 + /// enable processing them with tiny memory overhead. But that future is not 170 + /// here yet. 171 + /// 172 + /// CARs are almost always in a stream-unfriendly order, so I'm reverting the 173 + /// optimistic stream features: we load all block first, then walk the MST. 174 + /// 175 + /// This makes things much simpler: we only need to worry about spilling to disk 176 + /// in one place, and we always have a reasonable expecatation about how much 177 + /// work the init function will do. We can drop the CAR reader before walking, 178 + /// so the sync/async boundaries become a little easier to work around. 179 + #[derive(Debug)] 180 + pub struct MemDriver<T: Processable> { 181 + blocks: HashMap<Cid, MaybeProcessedBlock<T>>, 182 + walker: Walker, 183 + process: fn(Vec<u8>) -> T, 184 + } 185 + 186 + impl<T: Processable> MemDriver<T> { 187 + /// Manually step through the record outputs 188 + pub async fn next_chunk(&mut self, n: usize) -> Result<Option<BlockChunk<T>>, DriveError> { 189 + let mut out = Vec::with_capacity(n); 190 + for _ in 0..n { 191 + // walk as far as we can until we run out of blocks or find a record 192 + match self.walker.step(&mut self.blocks, self.process)? { 193 + Step::Missing(cid) => return Err(DriveError::MissingBlock(cid)), 194 + Step::Finish => break, 195 + Step::Step { rkey, data } => { 196 + out.push((rkey, data)); 197 + continue; 198 + } 199 + }; 200 + } 201 + 202 + if out.is_empty() { 203 + Ok(None) 204 + } else { 205 + Ok(Some(out)) 206 + } 207 + } 208 + } 209 + 164 210 /// a paritally memory-loaded car file that needs disk spillover to continue 165 211 pub struct BigCar<R: AsyncRead + Unpin, T: Processable> { 166 212 car: CarReader<R>, ··· 204 250 }) 205 251 .await??; 206 252 207 - let (tx, mut rx) = tokio::sync::mpsc::channel::<Vec<(Cid, MaybeProcessedBlock<T>)>>(2); 253 + let (tx, mut rx) = mpsc::channel::<Vec<(Cid, MaybeProcessedBlock<T>)>>(2); 208 254 209 255 let store_worker = tokio::task::spawn_blocking(move || { 210 256 let mut writer = store.get_writer()?; ··· 290 336 pub async fn next_chunk( 291 337 mut self, 292 338 n: usize, 293 - ) -> Result<(Self, Option<Vec<(String, T)>>), DriveError> { 339 + ) -> Result<(Self, Option<BlockChunk<T>>), DriveError> { 294 340 let mut out = Vec::with_capacity(n); 295 341 (self, out) = tokio::task::spawn_blocking(move || { 296 342 let store = self.store; ··· 321 367 } 322 368 } 323 369 324 - pub async fn rx( 370 + fn read_tx_blocking( 325 371 mut self, 326 372 n: usize, 327 - ) -> Result< 328 - ( 329 - tokio::sync::mpsc::Receiver<Vec<(String, T)>>, 330 - tokio::task::JoinHandle<Result<(), DriveError>>, 331 - ), 332 - DriveError, 333 - > { 334 - let (tx, rx) = tokio::sync::mpsc::channel::<Vec<(String, T)>>(1); 373 + tx: mpsc::Sender<Result<BlockChunk<T>, DriveError>>, 374 + ) -> Result<(), mpsc::error::SendError<Result<BlockChunk<T>, DriveError>>> { 375 + let mut reader = match self.store.get_reader() { 376 + Ok(r) => r, 377 + Err(e) => return tx.blocking_send(Err(e.into())), 378 + }; 335 379 336 - // sketch: this worker is going to be allowed to execute without a join handle 337 - // ...should we return the join handle here so the caller at least knows about it? 338 - // yes probably for error handling?? (orrr put errors in the channel) 339 - let worker = tokio::task::spawn_blocking(move || { 340 - let mut reader = self.store.get_reader()?; 380 + loop { 381 + let mut out: BlockChunk<T> = Vec::with_capacity(n); 341 382 342 - loop { 343 - let mut out = Vec::with_capacity(n); 383 + for _ in 0..n { 384 + // walk as far as we can until we run out of blocks or find a record 344 385 345 - for _ in 0..n { 346 - // walk as far as we can until we run out of blocks or find a record 347 - match self.walker.disk_step(&mut reader, self.process)? { 348 - Step::Missing(cid) => return Err(DriveError::MissingBlock(cid)), 349 - Step::Finish => break, 350 - Step::Step { rkey, data } => { 351 - out.push((rkey, data)); 352 - continue; 353 - } 354 - }; 355 - } 386 + let step = match self.walker.disk_step(&mut reader, self.process) { 387 + Ok(s) => s, 388 + Err(e) => return tx.blocking_send(Err(e.into())), 389 + }; 356 390 357 - if out.is_empty() { 358 - break; 359 - } 360 - tx.blocking_send(out) 361 - .map_err(|_| DriveError::ChannelSendError)?; 391 + match step { 392 + Step::Missing(cid) => { 393 + return tx.blocking_send(Err(DriveError::MissingBlock(cid))); 394 + } 395 + Step::Finish => return Ok(()), 396 + Step::Step { rkey, data } => { 397 + out.push((rkey, data)); 398 + continue; 399 + } 400 + }; 362 401 } 363 402 364 - drop(reader); // cannot outlive store 365 - Ok(()) 366 - }); // await later 403 + if out.is_empty() { 404 + break; 405 + } 406 + tx.blocking_send(Ok(out))?; 407 + } 367 408 368 - Ok((rx, worker)) 409 + Ok(()) 369 410 } 370 - } 371 411 372 - /// The core driver between the block stream and MST walker 373 - /// 374 - /// In the future, PDSs will export CARs in a stream-friendly order that will 375 - /// enable processing them with tiny memory overhead. But that future is not 376 - /// here yet. 377 - /// 378 - /// CARs are almost always in a stream-unfriendly order, so I'm reverting the 379 - /// optimistic stream features: we load all block first, then walk the MST. 380 - /// 381 - /// This makes things much simpler: we only need to worry about spilling to disk 382 - /// in one place, and we always have a reasonable expecatation about how much 383 - /// work the init function will do. We can drop the CAR reader before walking, 384 - /// so the sync/async boundaries become a little easier to work around. 385 - #[derive(Debug)] 386 - pub struct MemDriver<T: Processable> { 387 - blocks: HashMap<Cid, MaybeProcessedBlock<T>>, 388 - walker: Walker, 389 - process: fn(Vec<u8>) -> T, 390 - } 412 + pub fn to_channel(self, n: usize) -> mpsc::Receiver<Result<BlockChunk<T>, DriveError>> { 413 + let (tx, rx) = mpsc::channel::<Result<BlockChunk<T>, DriveError>>(1); 391 414 392 - impl<T: Processable> MemDriver<T> { 393 - /// Manually step through the record outputs 394 - pub async fn next_chunk(&mut self, n: usize) -> Result<Option<Vec<(String, T)>>, DriveError> { 395 - let mut out = Vec::with_capacity(n); 396 - for _ in 0..n { 397 - // walk as far as we can until we run out of blocks or find a record 398 - match self.walker.step(&mut self.blocks, self.process)? { 399 - Step::Missing(cid) => return Err(DriveError::MissingBlock(cid)), 400 - Step::Finish => break, 401 - Step::Step { rkey, data } => { 402 - out.push((rkey, data)); 403 - continue; 404 - } 405 - }; 406 - } 415 + // sketch: this worker is going to be allowed to execute without a join handle 416 + tokio::task::spawn_blocking(move || { 417 + if let Err(mpsc::error::SendError(_)) = self.read_tx_blocking(n, tx) { 418 + log::debug!("big car reader exited early due to dropped receiver channel"); 419 + } 420 + }); 407 421 408 - if out.is_empty() { 409 - Ok(None) 410 - } else { 411 - Ok(Some(out)) 412 - } 422 + rx 413 423 } 414 424 }