Fast and robust atproto CAR file processing in rust
1extern crate repo_stream;
2use repo_stream::Driver;
3
4const EMPTY_CAR: &'static [u8] = include_bytes!("../car-samples/empty.car");
5const TINY_CAR: &'static [u8] = include_bytes!("../car-samples/tiny.car");
6const LITTLE_CAR: &'static [u8] = include_bytes!("../car-samples/little.car");
7const MIDSIZE_CAR: &'static [u8] = include_bytes!("../car-samples/midsize.car");
8
9async fn test_car(
10 bytes: &[u8],
11 expected_records: usize,
12 expected_sum: usize,
13 expect_profile: bool,
14) {
15 let mut driver = match Driver::load_car(bytes, |block| block.len(), 10 /* MiB */)
16 .await
17 .unwrap()
18 {
19 Driver::Memory(_commit, mem_driver) => mem_driver,
20 Driver::Disk(_) => panic!("too big"),
21 };
22
23 let mut records = 0;
24 let mut sum = 0;
25 let mut found_bsky_profile = false;
26 let mut prev_rkey = "".to_string();
27
28 while let Some(pairs) = driver.next_chunk(256).await.unwrap() {
29 for (rkey, size) in pairs {
30 records += 1;
31 sum += size;
32 if rkey == "app.bsky.actor.profile/self" {
33 found_bsky_profile = true;
34 }
35 assert!(rkey > prev_rkey, "rkeys are streamed in order");
36 prev_rkey = rkey;
37 }
38 }
39
40 assert_eq!(records, expected_records);
41 assert_eq!(sum, expected_sum);
42 assert_eq!(found_bsky_profile, expect_profile);
43}
44
45#[tokio::test]
46async fn test_empty_car() {
47 test_car(EMPTY_CAR, 0, 0, false).await
48}
49
50#[tokio::test]
51async fn test_tiny_car() {
52 test_car(TINY_CAR, 8, 2071, true).await
53}
54
55#[tokio::test]
56async fn test_little_car() {
57 test_car(LITTLE_CAR, 278, 246960, true).await
58}
59
60#[tokio::test]
61async fn test_midsize_car() {
62 test_car(MIDSIZE_CAR, 11585, 3741393, true).await
63}