Constellation, Spacedust, Slingshot, UFOs: atproto crates and services for microcosm
76
fork

Configure Feed

Select the types of activity you want to include in your feed.

backfill: faster and allow skipping old events

+48 -15
+7 -2
ufos/src/consumer.rs
··· 79 79 batch_sender: Sender<LimitedBatch>, 80 80 sketch_secret: SketchSecretPrefix, 81 81 ) -> Self { 82 - let mut rate_limit = tokio::time::interval(std::time::Duration::from_millis(5)); 82 + let mut rate_limit = tokio::time::interval(std::time::Duration::from_millis(3)); 83 83 rate_limit.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Delay); 84 84 Self { 85 85 jetstream_receiver, ··· 93 93 pub async fn run(&mut self) -> anyhow::Result<()> { 94 94 // TODO: report errors *from here* probably, since this gets shipped off into a spawned task that might just vanish 95 95 loop { 96 - match timeout(Duration::from_millis(9_000), self.jetstream_receiver.recv()).await { 96 + match timeout( 97 + Duration::from_millis(30_000), 98 + self.jetstream_receiver.recv(), 99 + ) 100 + .await 101 + { 97 102 Err(_elapsed) => self.no_events_step().await?, 98 103 Ok(Some(event)) => self.handle_event(event).await?, 99 104 Ok(None) => anyhow::bail!("channel closed"),
+30 -4
ufos/src/file_consumer.rs
··· 1 1 use crate::consumer::{Batcher, LimitedBatch, BATCH_QUEUE_SIZE}; 2 2 use crate::store_types::SketchSecretPrefix; 3 + use crate::Cursor; 3 4 use anyhow::Result; 4 5 use jetstream::{error::JetstreamEventError, events::JetstreamEvent}; 5 6 use std::path::PathBuf; ··· 9 10 sync::mpsc::{channel, Receiver, Sender}, 10 11 }; 11 12 12 - async fn read_jsonl(f: File, sender: Sender<JetstreamEvent>) -> Result<()> { 13 + async fn read_jsonl(f: File, sender: Sender<JetstreamEvent>, cursor: Option<Cursor>) -> Result<()> { 13 14 let mut lines = BufReader::new(f).lines(); 15 + if let Some(db_cursor) = cursor { 16 + log::info!("jsonl fixture: skipping events before cursor {db_cursor:?}"); 17 + let mut bad_lines = 0; 18 + let mut skipped = 0; 19 + while let Some(line) = lines.next_line().await? { 20 + let Ok(event) = serde_json::from_str::<JetstreamEvent>(&line) else { 21 + bad_lines += 1; 22 + continue; 23 + }; 24 + if event.cursor < db_cursor { 25 + skipped += 1; 26 + continue; 27 + } 28 + if event.cursor == db_cursor { 29 + log::info!("jsonl fixture: found existing db cursor! skipped {skipped} old events and failed parsing {bad_lines} lines"); 30 + break; 31 + } 32 + anyhow::bail!("jsonl fixture: did not find existing db cursor, found event cursor {:?} which is newer. bailing.", event.cursor); 33 + } 34 + } else { 35 + log::info!("jsonl fixture: no cursor provided, sending every event"); 36 + } 37 + 38 + log::info!("jsonl fixture: now sending events"); 14 39 while let Some(line) = lines.next_line().await? { 15 40 match serde_json::from_str::<JetstreamEvent>(&line) { 16 41 Ok(event) => match sender.send(event).await { ··· 35 60 pub async fn consume( 36 61 p: PathBuf, 37 62 sketch_secret: SketchSecretPrefix, 63 + cursor: Option<Cursor>, 38 64 ) -> Result<Receiver<LimitedBatch>> { 39 65 let f = File::open(p).await?; 40 66 let (jsonl_sender, jsonl_receiver) = channel::<JetstreamEvent>(16); 41 67 let (batch_sender, batch_reciever) = channel::<LimitedBatch>(BATCH_QUEUE_SIZE); 42 68 let mut batcher = Batcher::new(jsonl_receiver, batch_sender, sketch_secret); 43 69 tokio::task::spawn(async move { 44 - let r = read_jsonl(f, jsonl_sender).await; 45 - log::info!("read_jsonl finished: {r:?}"); 70 + let r = read_jsonl(f, jsonl_sender, cursor).await; 71 + log::warn!("read_jsonl finished: {r:?}"); 46 72 }); 47 73 tokio::task::spawn(async move { 48 74 let r = batcher.run().await; 49 - log::info!("batcher finished: {r:?}"); 75 + log::warn!("batcher finished: {r:?}"); 50 76 }); 51 77 Ok(batch_reciever) 52 78 }
+4 -2
ufos/src/main.rs
··· 130 130 131 131 let batches = if jetstream_fixture { 132 132 log::info!("starting with jestream file fixture: {jetstream:?}"); 133 - file_consumer::consume(jetstream.into(), sketch_secret).await? 133 + file_consumer::consume(jetstream.into(), sketch_secret, cursor).await? 134 134 } else { 135 135 log::info!( 136 136 "starting consumer with cursor: {cursor:?} from {:?} ago", ··· 163 163 let mut last_at = std::time::SystemTime::now(); 164 164 let mut last_cursor = None; 165 165 let mut last_rollup = None; 166 + let mut interval = tokio::time::interval(std::time::Duration::from_secs(4)); 167 + interval.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Delay); 166 168 loop { 167 - tokio::time::sleep(std::time::Duration::from_secs_f64(4.)).await; 169 + interval.tick().await; 168 170 match read_store.get_consumer_info().await { 169 171 Err(e) => log::warn!("failed to get jetstream consumer info: {e:?}"), 170 172 Ok(ConsumerInfo::Jetstream {
+7 -7
ufos/src/storage_fjall.rs
··· 1241 1241 let mut ended_early = false; 1242 1242 let mut batch = self.keyspace.batch(); 1243 1243 for (i, kv) in self.feeds.range(live_range).rev().enumerate() { 1244 - if !full_scan && i > 1_000_000 { 1245 - log::info!("stopping collection trim early: already scanned 1M elements"); 1244 + if !full_scan && i > 10_000_000 { 1245 + log::info!("stopping collection trim early: already scanned 10M elements"); 1246 1246 ended_early = true; 1247 1247 break; 1248 1248 } ··· 1336 1336 let mut dirty_nsids = HashSet::new(); 1337 1337 1338 1338 let mut rollup = 1339 - tokio::time::interval(Duration::from_millis(if backfill { 1 } else { 81 })); 1340 - rollup.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Delay); 1339 + tokio::time::interval(Duration::from_micros(if backfill { 100 } else { 81_000 })); 1340 + rollup.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Skip); 1341 1341 1342 1342 let mut trim = 1343 - tokio::time::interval(Duration::from_millis(if backfill { 3_000 } else { 6_000 })); 1343 + tokio::time::interval(Duration::from_millis(if backfill { 500 } else { 6_000 })); 1344 1344 trim.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Skip); 1345 1345 1346 1346 loop { ··· 1362 1362 let (danglers, deleted) = self.0.trim_collection(collection, 512, false).inspect_err(|e| log::error!("trim error: {e:?}"))?; 1363 1363 total_danglers += danglers; 1364 1364 total_deleted += deleted; 1365 - if total_deleted > 1_000_000 { 1366 - log::info!("trim stopped early, more than 1M records already deleted."); 1365 + if total_deleted > 100_000_000 { 1366 + log::info!("trim stopped early, more than 100M records already deleted."); 1367 1367 break; 1368 1368 } 1369 1369 }