+2
-3
jetstream/README.md
+2
-3
jetstream/README.md
···
1
-
# jetstream-oxide
1
+
# fork of the awesome jetstream-oxide
2
2
3
-
[](https://crates.io/crates/jetstream-oxide)
4
-
[](https://docs.rs/jetstream-oxide/latest/jetstream_oxide)
3
+
fork note: this readme is likely a bit out of date! i've been messing around with some apis.
5
4
6
5
A typed Rust library for easily interacting with and consuming the
7
6
Bluesky [Jetstream](https://github.com/bluesky-social/jetstream)
+76
-1
jetstream/src/events/mod.rs
+76
-1
jetstream/src/events/mod.rs
···
2
2
pub mod commit;
3
3
pub mod identity;
4
4
5
+
use std::time::{
6
+
Duration,
7
+
SystemTime,
8
+
UNIX_EPOCH,
9
+
};
10
+
5
11
use serde::Deserialize;
6
12
7
13
use crate::exports;
8
14
15
+
/// Opaque wrapper for the time_us cursor used by jetstream
16
+
///
17
+
/// Generally, you should use a cursor
18
+
#[derive(Deserialize, Debug, Clone)]
19
+
pub struct Cursor(u64);
20
+
9
21
/// Basic data that is included with every event.
10
22
#[derive(Deserialize, Debug)]
11
23
pub struct EventInfo {
12
24
pub did: exports::Did,
13
-
pub time_us: u64,
25
+
pub time_us: Cursor,
14
26
pub kind: EventKind,
15
27
}
16
28
···
29
41
Identity,
30
42
Account,
31
43
}
44
+
45
+
impl<R> JetstreamEvent<R> {
46
+
pub fn cursor(&self) -> Cursor {
47
+
match self {
48
+
JetstreamEvent::Commit(commit::CommitEvent::Create { info, .. }) => {
49
+
info.time_us.clone()
50
+
}
51
+
JetstreamEvent::Commit(commit::CommitEvent::Update { info, .. }) => {
52
+
info.time_us.clone()
53
+
}
54
+
JetstreamEvent::Commit(commit::CommitEvent::Delete { info, .. }) => {
55
+
info.time_us.clone()
56
+
}
57
+
JetstreamEvent::Identity(e) => e.info.time_us.clone(),
58
+
JetstreamEvent::Account(e) => e.info.time_us.clone(),
59
+
}
60
+
}
61
+
}
62
+
63
+
impl Cursor {
64
+
/// Get a cursor that will consume all available jetstream replay
65
+
///
66
+
/// This sets the cursor to zero.
67
+
///
68
+
/// Jetstream instances typically only have a few days of replay.
69
+
pub fn from_start() -> Self {
70
+
Self(0)
71
+
}
72
+
/// Get a cursor for a specific time
73
+
///
74
+
/// Panics: if t is older than the unix epoch: Jan 1, 1970.
75
+
///
76
+
/// If you want to receive all available jetstream replay (typically a few days), use
77
+
/// .from_start()
78
+
pub fn at(t: SystemTime) -> Self {
79
+
let unix_dt = t
80
+
.duration_since(UNIX_EPOCH)
81
+
.expect("cannot set jetstream cursor earlier than unix epoch");
82
+
Self(unix_dt.as_micros() as u64)
83
+
}
84
+
/// Get a cursor rewound from now by this amount
85
+
///
86
+
/// Panics: if d is greater than the time since the unix epoch: Jan 1, 1970.
87
+
///
88
+
/// Jetstream instances typically only have a few days of replay.
89
+
pub fn back_by(d: Duration) -> Self {
90
+
Self::at(SystemTime::now() - d)
91
+
}
92
+
/// Get a Cursor from a raw u64
93
+
///
94
+
/// For example, from a jetstream event's `time_us` field.
95
+
pub fn from_raw_u64(time_us: u64) -> Self {
96
+
Self(time_us)
97
+
}
98
+
/// Get the raw u64 value from this cursor.
99
+
pub fn to_raw_u64(&self) -> u64 {
100
+
self.0
101
+
}
102
+
/// Format the cursor value for use in a jetstream connection url querystring
103
+
pub fn to_jetstream(&self) -> String {
104
+
self.0.to_string()
105
+
}
106
+
}
+78
-31
jetstream/src/lib.rs
+78
-31
jetstream/src/lib.rs
···
4
4
5
5
use std::{
6
6
io::{
7
-
Cursor,
7
+
Cursor as IoCursor,
8
8
Read,
9
9
},
10
10
marker::PhantomData,
···
16
16
};
17
17
18
18
use atrium_api::record::KnownRecord;
19
-
use chrono::Utc;
20
19
use futures_util::{
21
20
stream::StreamExt,
22
21
SinkExt,
···
49
48
ConnectionError,
50
49
JetstreamEventError,
51
50
},
52
-
events::JetstreamEvent,
51
+
events::{
52
+
Cursor,
53
+
JetstreamEvent,
54
+
},
53
55
};
54
56
55
57
/// The Jetstream endpoints officially provided by Bluesky themselves.
···
167
169
pub wanted_dids: Vec<exports::Did>,
168
170
/// The compression algorithm to request and use for the WebSocket connection (if any).
169
171
pub compression: JetstreamCompression,
170
-
/// An optional timestamp to begin playback from.
172
+
/// Enable automatic cursor for auto-reconnect
171
173
///
172
-
/// An absent cursor or a cursor from the future will result in live-tail operation.
174
+
/// By default, reconnects will never set a cursor for the connection, so a small number of
175
+
/// events will always be dropped.
173
176
///
174
-
/// When reconnecting, use the time_us from your most recently processed event and maybe
175
-
/// provide a negative buffer (i.e. subtract a few seconds) to ensure gapless playback.
176
-
pub cursor: Option<chrono::DateTime<Utc>>,
177
+
/// If you want gapless playback across reconnects, set this to `true`. If you always want
178
+
/// the latest available events and can tolerate missing some: `false`.
179
+
pub replay_on_reconnect: bool,
177
180
/// Maximum size of send channel for jetstream events.
178
181
///
179
182
/// If your consuming task can't keep up with every new jetstream event in real-time,
···
197
200
wanted_collections: Vec::new(),
198
201
wanted_dids: Vec::new(),
199
202
compression: JetstreamCompression::None,
200
-
cursor: None,
203
+
replay_on_reconnect: false,
201
204
channel_size: 4096, // a few seconds of firehose buffer
202
205
record_type: PhantomData,
203
206
}
···
225
228
},
226
229
);
227
230
228
-
let cursor = self
229
-
.cursor
230
-
.map(|c| ("cursor", c.timestamp_micros().to_string()));
231
-
232
231
let params = did_search_query
233
232
.chain(collection_search_query)
234
233
.chain(std::iter::once(compression))
235
-
.chain(cursor)
236
234
.collect::<Vec<(&str, String)>>();
237
235
238
236
Url::parse_with_params(endpoint, params)
···
276
274
/// A [JetstreamReceiver] is returned which can be used to respond to events. When all instances
277
275
/// of this receiver are dropped, the connection and task are automatically closed.
278
276
pub async fn connect(&self) -> Result<JetstreamReceiver<R>, ConnectionError> {
277
+
self.base_connect(None).await
278
+
}
279
+
280
+
/// Connects to a Jetstream instance as defined in the [JetstreamConfig] with playback from a
281
+
/// cursor
282
+
///
283
+
/// A cursor from the future will result in live-tail operation.
284
+
///
285
+
/// The cursor is only used for first successfull connection -- on auto-reconnect it will
286
+
/// live-tail by default. Set `replay_on_reconnect: true` in the config if you need to
287
+
/// receive every event, which will keep track of the last-seen cursor and reconnect from
288
+
/// there.
289
+
pub async fn connect_cursor(
290
+
&self,
291
+
cursor: Cursor,
292
+
) -> Result<JetstreamReceiver<R>, ConnectionError> {
293
+
self.base_connect(Some(cursor)).await
294
+
}
295
+
296
+
async fn base_connect(
297
+
&self,
298
+
cursor: Option<Cursor>,
299
+
) -> Result<JetstreamReceiver<R>, ConnectionError> {
279
300
// We validate the config again for good measure. Probably not necessary but it can't hurt.
280
301
self.config
281
302
.validate()
···
288
309
.construct_endpoint(&self.config.endpoint)
289
310
.map_err(ConnectionError::InvalidEndpoint)?;
290
311
312
+
let replay_on_reconnect = self.config.replay_on_reconnect;
313
+
291
314
tokio::task::spawn(async move {
292
315
let max_retries = 30;
293
316
let base_delay_ms = 1_000; // 1 second
···
295
318
let success_threshold_s = 15; // 15 seconds, retry count is reset if we were connected at least this long
296
319
297
320
let mut retry_attempt = 0;
321
+
let mut connect_cursor = cursor;
298
322
loop {
299
323
let dict = DecoderDictionary::copy(JETSTREAM_ZSTD_DICTIONARY);
300
324
325
+
let mut configured_endpoint = configured_endpoint.clone();
326
+
if let Some(ref cursor) = connect_cursor {
327
+
configured_endpoint
328
+
.query_pairs_mut()
329
+
.append_pair("cursor", &cursor.to_jetstream());
330
+
}
331
+
332
+
let mut last_cursor = connect_cursor.clone();
333
+
301
334
retry_attempt += 1;
302
335
if let Ok((ws_stream, _)) = connect_async(&configured_endpoint).await {
303
336
let t_connected = Instant::now();
304
-
if let Err(e) = websocket_task(dict, ws_stream, send_channel.clone()).await {
337
+
if let Err(e) =
338
+
websocket_task(dict, ws_stream, send_channel.clone(), &mut last_cursor)
339
+
.await
340
+
{
305
341
log::error!("Jetstream closed after encountering error: {e:?}");
306
342
} else {
307
343
log::error!("Jetstream connection closed cleanly");
308
344
}
309
345
if t_connected.elapsed() > Duration::from_secs(success_threshold_s) {
310
346
retry_attempt = 0;
311
-
continue;
312
347
}
313
348
}
314
349
315
350
if retry_attempt >= max_retries {
316
-
eprintln!("max retries, bye");
351
+
log::error!("hit max retries, bye");
317
352
break;
318
353
}
319
354
320
-
eprintln!("will try to reconnect");
355
+
connect_cursor = if replay_on_reconnect {
356
+
last_cursor
357
+
} else {
358
+
None
359
+
};
321
360
322
-
// Exponential backoff
323
-
let delay_ms = base_delay_ms * (2_u64.pow(retry_attempt));
324
-
325
-
log::error!("Connection failed, retrying in {delay_ms}ms...");
326
-
tokio::time::sleep(Duration::from_millis(delay_ms.min(max_delay_ms))).await;
327
-
log::info!("Attempting to reconnect...")
361
+
if retry_attempt > 0 {
362
+
// Exponential backoff
363
+
let delay_ms = base_delay_ms * (2_u64.pow(retry_attempt));
364
+
log::error!("Connection failed, retrying in {delay_ms}ms...");
365
+
tokio::time::sleep(Duration::from_millis(delay_ms.min(max_delay_ms))).await;
366
+
log::info!("Attempting to reconnect...");
367
+
}
328
368
}
329
369
log::error!("Connection retries exhausted. Jetstream is disconnected.");
330
370
});
···
339
379
dictionary: DecoderDictionary<'_>,
340
380
ws: WebSocketStream<MaybeTlsStream<TcpStream>>,
341
381
send_channel: JetstreamSender<R>,
382
+
last_cursor: &mut Option<Cursor>,
342
383
) -> Result<(), JetstreamEventError> {
343
384
// TODO: Use the write half to allow the user to change configuration settings on the fly.
344
385
let (socket_write, mut socket_read) = ws.split();
···
373
414
Some(Ok(message)) => {
374
415
match message {
375
416
Message::Text(json) => {
376
-
let event = serde_json::from_str(&json)
417
+
let event: JetstreamEvent<R> = serde_json::from_str(&json)
377
418
.map_err(JetstreamEventError::ReceivedMalformedJSON)?;
419
+
let event_cursor = event.cursor();
378
420
379
421
if send_channel.send(event).await.is_err() {
380
422
// We can assume that all receivers have been dropped, so we can close
381
423
// the connection and exit the task.
382
424
log::info!(
383
-
"All receivers for the Jetstream connection have been dropped, closing connection."
384
-
);
425
+
"All receivers for the Jetstream connection have been dropped, closing connection."
426
+
);
385
427
closing_connection = true;
428
+
} else if let Some(v) = last_cursor.as_mut() {
429
+
*v = event_cursor;
386
430
}
387
431
}
388
432
Message::Binary(zstd_json) => {
389
-
let mut cursor = Cursor::new(zstd_json);
433
+
let mut cursor = IoCursor::new(zstd_json);
390
434
let mut decoder = zstd::stream::Decoder::with_prepared_dictionary(
391
435
&mut cursor,
392
436
&dictionary,
···
398
442
.read_to_string(&mut json)
399
443
.map_err(JetstreamEventError::CompressionDecoderError)?;
400
444
401
-
let event = serde_json::from_str(&json)
445
+
let event: JetstreamEvent<R> = serde_json::from_str(&json)
402
446
.map_err(JetstreamEventError::ReceivedMalformedJSON)?;
447
+
let event_cursor = event.cursor();
403
448
404
449
if send_channel.send(event).await.is_err() {
405
450
// We can assume that all receivers have been dropped, so we can close
406
451
// the connection and exit the task.
407
452
log::info!(
408
-
"All receivers for the Jetstream connection have been dropped, closing connection..."
409
-
);
453
+
"All receivers for the Jetstream connection have been dropped, closing connection..."
454
+
);
410
455
closing_connection = true;
456
+
} else if let Some(v) = last_cursor.as_mut() {
457
+
*v = event_cursor;
411
458
}
412
459
}
413
460
Message::Ping(vec) => {