Constellation, Spacedust, Slingshot, UFOs: atproto crates and services for microcosm

event shape and link filtering and stuff

+4
Cargo.lock
··· 3381 3381 name = "spacedust" 3382 3382 version = "0.1.0" 3383 3383 dependencies = [ 3384 + "async-trait", 3384 3385 "clap", 3385 3386 "dropshot", 3386 3387 "futures", 3388 + "http", 3387 3389 "jetstream", 3388 3390 "links", 3389 3391 "metrics", 3390 3392 "schemars", 3393 + "semver", 3391 3394 "serde", 3392 3395 "serde_json", 3396 + "serde_qs", 3393 3397 "tinyjson", 3394 3398 "tokio", 3395 3399 "tokio-tungstenite 0.27.0",
+4
spacedust/Cargo.toml
··· 4 4 edition = "2024" 5 5 6 6 [dependencies] 7 + async-trait = "0.1.88" 7 8 clap = { version = "4.5.40", features = ["derive"] } 8 9 dropshot = "0.16.2" 9 10 futures = "0.3.31" 11 + http = "1.3.1" 10 12 jetstream = { path = "../jetstream", features = ["metrics"] } 11 13 links = { path = "../links" } 12 14 metrics = "0.24.2" 13 15 schemars = "0.8.22" 16 + semver = "1.0.26" 14 17 serde = { version = "1.0.219", features = ["derive"] } 15 18 serde_json = "1.0.140" 19 + serde_qs = "1.0.0-rc.3" 16 20 tinyjson = "2.5.1" 17 21 tokio = { version = "1.45.1", features = ["full"] } 18 22 tokio-tungstenite = "0.27.0"
+1
spacedust/src/consumer.rs
··· 72 72 &*commit.collection, 73 73 &*commit.rkey, 74 74 ), 75 + rev: commit.rev.to_string(), 75 76 target: link.target.into_string(), 76 77 }; 77 78 let _ = b.send(link_ev); // only errors if no subscribers are connected, which is just fine.
+11
spacedust/src/lib.rs
··· 1 1 pub mod consumer; 2 2 pub mod server; 3 + pub mod subscriber; 3 4 4 5 use serde::Serialize; 5 6 ··· 9 10 path: String, 10 11 origin: String, 11 12 target: String, 13 + rev: String, 14 + } 15 + 16 + #[derive(Debug, Serialize)] 17 + #[serde(rename_all="snake_case")] 18 + pub struct ClientEvent { 19 + kind: String, 20 + link: ClientLinkEvent, 12 21 } 13 22 14 23 #[derive(Debug, Serialize)] ··· 16 25 operation: String, 17 26 source: String, 18 27 source_record: String, 28 + source_rev: String, 19 29 subject: String, 20 30 // TODO: include the record too? would save clients a level of hydration 21 31 } ··· 30 40 operation: "create".to_string(), 31 41 source: format!("{}:{undotted}", link.collection), 32 42 source_record: link.origin, 43 + source_rev: link.rev, 33 44 subject: link.target, 34 45 } 35 46 }
+244 -41
spacedust/src/server.rs
··· 1 - use crate::{ClientLinkEvent, LinkEvent}; 1 + use crate::subscriber; 2 + use metrics::{histogram, counter}; 3 + use std::sync::Arc; 4 + use crate::LinkEvent; 5 + use http::{ 6 + header::{ORIGIN, USER_AGENT}, 7 + Response, StatusCode, 8 + }; 2 9 use dropshot::{ 10 + Body, 3 11 ApiDescription, ConfigDropshot, ConfigLogging, ConfigLoggingLevel, Query, RequestContext, 4 - ServerBuilder, WebsocketConnection, channel, 12 + ServerBuilder, WebsocketConnection, channel, endpoint, HttpResponse, 13 + ApiEndpointBodyContentType, ExtractorMetadata, HttpError, ServerContext, 14 + SharedExtractor, 5 15 }; 6 - use futures::SinkExt; 16 + 7 17 use schemars::JsonSchema; 8 18 use serde::{Deserialize, Serialize}; 9 19 use tokio::sync::broadcast; 10 - use tokio_tungstenite::tungstenite::Message; 20 + use tokio::time::Instant; 11 21 use tokio_tungstenite::tungstenite::protocol::Role; 22 + use async_trait::async_trait; 23 + use std::collections::HashSet; 24 + 25 + const INDEX_HTML: &str = include_str!("../static/index.html"); 26 + const FAVICON: &[u8] = include_bytes!("../static/favicon.ico"); 12 27 13 28 pub async fn serve(b: broadcast::Sender<LinkEvent>) -> Result<(), String> { 14 29 let config_logging = ConfigLogging::StderrTerminal { ··· 20 35 .map_err(|error| format!("failed to create logger: {}", error))?; 21 36 22 37 let mut api = ApiDescription::new(); 38 + api.register(index).unwrap(); 39 + api.register(favicon).unwrap(); 40 + api.register(openapi).unwrap(); 23 41 api.register(subscribe).unwrap(); 24 42 25 - let server = ServerBuilder::new(api, b, log) 43 + // TODO: put spec in a once cell / lazy lock thing? 44 + let spec = Arc::new( 45 + api.openapi( 46 + "Spacedust", 47 + env!("CARGO_PKG_VERSION") 48 + .parse() 49 + .inspect_err(|e| { 50 + eprintln!("failed to parse cargo package version for openapi: {e:?}") 51 + }) 52 + .unwrap_or(semver::Version::new(0, 0, 1)), 53 + ) 54 + .description("A configurable ATProto notifications firehose.") 55 + .contact_name("part of @microcosm.blue") 56 + .contact_url("https://microcosm.blue") 57 + .json() 58 + .map_err(|e| e.to_string())?, 59 + ); 60 + 61 + let ctx = Context { spec, b }; 62 + 63 + let server = ServerBuilder::new(api, ctx, log) 26 64 .config(ConfigDropshot { 27 65 bind_address: "0.0.0.0:9998".parse().unwrap(), 28 66 ..Default::default() ··· 33 71 server.await 34 72 } 35 73 36 - #[derive(Debug, Serialize)] 37 - #[serde(rename_all="snake_case")] 38 - struct ClientEvent { 39 - r#type: String, 40 - link: ClientLinkEvent, 74 + #[derive(Debug, Clone)] 75 + struct Context { 76 + pub spec: Arc<serde_json::Value>, 77 + pub b: broadcast::Sender<LinkEvent>, 78 + } 79 + 80 + async fn instrument_handler<T, H, R>(ctx: &RequestContext<T>, handler: H) -> Result<R, HttpError> 81 + where 82 + R: HttpResponse, 83 + H: Future<Output = Result<R, HttpError>>, 84 + T: ServerContext, 85 + { 86 + let start = Instant::now(); 87 + let result = handler.await; 88 + let latency = start.elapsed(); 89 + let status_code = match &result { 90 + Ok(response) => response.status_code(), 91 + Err(e) => e.status_code.as_status(), 92 + } 93 + .as_str() // just the number (.to_string()'s Display does eg `200 OK`) 94 + .to_string(); 95 + let endpoint = ctx.endpoint.operation_id.clone(); 96 + let headers = ctx.request.headers(); 97 + let origin = headers 98 + .get(ORIGIN) 99 + .and_then(|v| v.to_str().ok()) 100 + .unwrap_or("") 101 + .to_string(); 102 + let ua = headers 103 + .get(USER_AGENT) 104 + .and_then(|v| v.to_str().ok()) 105 + .map(|ua| { 106 + if ua.starts_with("Mozilla/5.0 ") { 107 + "browser" 108 + } else { 109 + ua 110 + } 111 + }) 112 + .unwrap_or("") 113 + .to_string(); 114 + counter!("server_requests_total", 115 + "endpoint" => endpoint.clone(), 116 + "origin" => origin, 117 + "ua" => ua, 118 + "status_code" => status_code, 119 + ) 120 + .increment(1); 121 + histogram!("server_handler_latency", "endpoint" => endpoint).record(latency.as_micros() as f64); 122 + result 123 + } 124 + 125 + use dropshot::{HttpResponseHeaders, HttpResponseOk}; 126 + 127 + pub type OkCorsResponse<T> = Result<HttpResponseHeaders<HttpResponseOk<T>>, HttpError>; 128 + 129 + /// Helper for constructing Ok responses: return OkCors(T).into() 130 + /// (not happy with this yet) 131 + pub struct OkCors<T: Serialize + JsonSchema + Send + Sync>(pub T); 132 + 133 + impl<T> From<OkCors<T>> for OkCorsResponse<T> 134 + where 135 + T: Serialize + JsonSchema + Send + Sync, 136 + { 137 + fn from(ok: OkCors<T>) -> OkCorsResponse<T> { 138 + let mut res = HttpResponseHeaders::new_unnamed(HttpResponseOk(ok.0)); 139 + res.headers_mut() 140 + .insert("access-control-allow-origin", "*".parse().unwrap()); 141 + Ok(res) 142 + } 143 + } 144 + 145 + // TODO: cors for HttpError 146 + 147 + 148 + /// Serve index page as html 149 + #[endpoint { 150 + method = GET, 151 + path = "/", 152 + /* 153 + * not useful to have this in openapi 154 + */ 155 + unpublished = true, 156 + }] 157 + async fn index(ctx: RequestContext<Context>) -> Result<Response<Body>, HttpError> { 158 + instrument_handler(&ctx, async { 159 + Ok(Response::builder() 160 + .status(StatusCode::OK) 161 + .header(http::header::CONTENT_TYPE, "text/html") 162 + .body(INDEX_HTML.into())?) 163 + }) 164 + .await 165 + } 166 + 167 + /// Serve index page as html 168 + #[endpoint { 169 + method = GET, 170 + path = "/favicon.ico", 171 + /* 172 + * not useful to have this in openapi 173 + */ 174 + unpublished = true, 175 + }] 176 + async fn favicon(ctx: RequestContext<Context>) -> Result<Response<Body>, HttpError> { 177 + instrument_handler(&ctx, async { 178 + Ok(Response::builder() 179 + .status(StatusCode::OK) 180 + .header(http::header::CONTENT_TYPE, "image/x-icon") 181 + .body(FAVICON.to_vec().into())?) 182 + }) 183 + .await 184 + } 185 + 186 + /// Meta: get the openapi spec for this api 187 + #[endpoint { 188 + method = GET, 189 + path = "/openapi", 190 + /* 191 + * not useful to have this in openapi 192 + */ 193 + unpublished = true, 194 + }] 195 + async fn openapi(ctx: RequestContext<Context>) -> OkCorsResponse<serde_json::Value> { 196 + instrument_handler(&ctx, async { 197 + let spec = (*ctx.context().spec).clone(); 198 + OkCors(spec).into() 199 + }) 200 + .await 201 + } 202 + 203 + /// The real type that gets deserialized 204 + #[derive(Debug, Deserialize, JsonSchema)] 205 + #[serde(rename_all = "camelCase")] 206 + pub struct MultiSubscribeQuery { 207 + #[serde(default)] 208 + pub wanted_subjects: HashSet<String>, 209 + #[serde(default)] 210 + pub wanted_subject_dids: HashSet<String>, 211 + #[serde(default)] 212 + pub wanted_sources: HashSet<String>, 213 + } 214 + /// The fake corresponding type for docs that dropshot won't freak out about a 215 + /// vec for 216 + #[derive(Deserialize, JsonSchema)] 217 + #[allow(dead_code)] 218 + #[serde(rename_all = "camelCase")] 219 + struct MultiSubscribeQueryForDocs { 220 + /// One or more at-uris to receive links about 221 + /// 222 + /// The at-uri must be url-encoded 223 + /// 224 + /// Pass this parameter multiple times to specify multiple collections, like 225 + /// `wantedSubjects=[...]&wantedSubjects=[...]` 226 + pub wanted_subjects: String, 227 + /// One or more DIDs to receive links about 228 + /// 229 + /// Pass this parameter multiple times to specify multiple collections 230 + pub wanted_subject_dids: String, 231 + /// One or more link sources to receive links about 232 + /// 233 + /// TODO: docs about link sources 234 + /// 235 + /// eg, a bluesky like's link source: `app.bsky.feed.like:subject.uri` 236 + /// 237 + /// Pass this parameter multiple times to specify multiple sources 238 + pub wanted_sources: String, 239 + } 240 + 241 + // The `SharedExtractor` implementation for Query<QueryType> describes how to 242 + // construct an instance of `Query<QueryType>` from an HTTP request: namely, by 243 + // parsing the query string to an instance of `QueryType`. 244 + #[async_trait] 245 + impl SharedExtractor for MultiSubscribeQuery { 246 + async fn from_request<Context: ServerContext>( 247 + ctx: &RequestContext<Context>, 248 + ) -> Result<MultiSubscribeQuery, HttpError> { 249 + let raw_query = ctx.request.uri().query().unwrap_or(""); 250 + let q = serde_qs::from_str(raw_query).map_err(|e| { 251 + HttpError::for_bad_request(None, format!("unable to parse query string: {}", e)) 252 + })?; 253 + Ok(q) 254 + } 255 + 256 + fn metadata(body_content_type: ApiEndpointBodyContentType) -> ExtractorMetadata { 257 + // HACK: query type switcheroo: passing MultiSubscribeQuery to 258 + // `metadata` would "helpfully" panic because dropshot believes we can 259 + // only have scalar types in a query. 260 + // 261 + // so instead we have a fake second type whose only job is to look the 262 + // same as MultiSubscribeQuery exept that it has `String` instead of 263 + // `Vec<String>`, which dropshot will accept, and generate ~close-enough 264 + // docs for. 265 + <Query<MultiSubscribeQueryForDocs> as SharedExtractor>::metadata(body_content_type) 266 + } 41 267 } 42 268 43 269 #[derive(Deserialize, JsonSchema)] ··· 50 276 path = "/subscribe", 51 277 }] 52 278 async fn subscribe( 53 - ctx: RequestContext<broadcast::Sender<LinkEvent>>, 54 - _qp: Query<QueryParams>, 279 + ctx: RequestContext<Context>, 280 + query: MultiSubscribeQuery, 55 281 upgraded: WebsocketConnection, 56 282 ) -> dropshot::WebsocketChannelResult { 57 - let mut ws = tokio_tungstenite::WebSocketStream::from_raw_socket( 283 + let ws = tokio_tungstenite::WebSocketStream::from_raw_socket( 58 284 upgraded.into_inner(), 59 285 Role::Server, 60 286 None, 61 287 ) 62 288 .await; 63 - let mut sub = ctx.context().subscribe(); 64 289 65 - // TODO: pingpong 66 - // TODO: filtering subscription 290 + let b = ctx.context().b.subscribe(); 67 291 68 - loop { 69 - match sub.recv().await { 70 - Ok(link) => { 71 - let ev = ClientEvent { 72 - r#type: "link".to_string(), 73 - link: link.into(), 74 - }; 75 - let json = serde_json::to_string(&ev)?; 76 - if let Err(e) = ws.send(Message::Text(json.into())).await { 77 - eprintln!("client: failed to send event: {e:?}"); 78 - ws.close(None).await?; // TODO: do we need this one?? 79 - break; 80 - } 81 - } 82 - Err(broadcast::error::RecvError::Closed) => { 83 - ws.close(None).await?; // TODO: send reason 84 - break; 85 - } 86 - Err(broadcast::error::RecvError::Lagged(_n_missed)) => { 87 - eprintln!("client lagged, closing"); 88 - ws.close(None).await?; // TODO: send reason 89 - break; 90 - } 91 - } 92 - } 292 + subscriber::subscribe(b, ws, query) 293 + .await 294 + .map_err(|e| format!("boo: {e:?}"))?; 295 + 93 296 Ok(()) 94 297 }
+75
spacedust/src/subscriber.rs
··· 1 + use crate::ClientEvent; 2 + use crate::LinkEvent; 3 + use crate::server::MultiSubscribeQuery; 4 + use futures::SinkExt; 5 + use std::error::Error; 6 + use tokio::sync::broadcast; 7 + use tokio_tungstenite::{WebSocketStream, tungstenite::Message}; 8 + use dropshot::WebsocketConnectionRaw; 9 + 10 + pub async fn subscribe( 11 + mut sub: broadcast::Receiver<LinkEvent>, 12 + mut ws: WebSocketStream<WebsocketConnectionRaw>, 13 + query: MultiSubscribeQuery, 14 + ) -> Result<(), Box<dyn Error>> { 15 + // TODO: pingpong 16 + 17 + loop { 18 + match sub.recv().await { 19 + Ok(link) => { 20 + 21 + // subject + subject DIDs are logical OR 22 + let target_did = if link.target.starts_with("did:") { 23 + link.target.clone() 24 + } else { 25 + let Some(rest) = link.target.strip_prefix("at://") else { 26 + continue; 27 + }; 28 + if let Some((did, _)) = rest.split_once("/") { 29 + did 30 + } else { 31 + rest 32 + }.to_string() 33 + }; 34 + if !(query.wanted_subjects.contains(&link.target) || query.wanted_subject_dids.contains(&target_did) || query.wanted_subjects.is_empty() && query.wanted_subject_dids.is_empty()) { 35 + // wowwww ^^ fix that 36 + continue; 37 + } 38 + 39 + // subjects together with sources are logical AND 40 + 41 + if !query.wanted_sources.is_empty() { 42 + let undotted = link.path.strip_prefix('.').unwrap_or_else(|| { 43 + eprintln!("link path did not have expected '.' prefix: {}", link.path); 44 + "" 45 + }); 46 + let source = format!("{}:{undotted}", link.collection); 47 + if !query.wanted_sources.contains(&source) { 48 + continue; 49 + } 50 + } 51 + 52 + let ev = ClientEvent { 53 + kind: "link".to_string(), 54 + link: link.into(), 55 + }; 56 + let json = serde_json::to_string(&ev)?; 57 + if let Err(e) = ws.send(Message::Text(json.into())).await { 58 + eprintln!("client: failed to send event: {e:?}"); 59 + ws.close(None).await?; // TODO: do we need this one?? 60 + break; 61 + } 62 + } 63 + Err(broadcast::error::RecvError::Closed) => { 64 + ws.close(None).await?; // TODO: send reason 65 + break; 66 + } 67 + Err(broadcast::error::RecvError::Lagged(_n_missed)) => { 68 + eprintln!("client lagged, closing"); 69 + ws.close(None).await?; // TODO: send reason 70 + break; 71 + } 72 + } 73 + } 74 + Ok(()) 75 + }
spacedust/static/favicon.ico

This is a binary file and will not be displayed.

+54
spacedust/static/index.html
··· 1 + <!doctype html> 2 + <html lang="en"> 3 + <head> 4 + <meta charset="utf-8" /> 5 + <title>Spacedust documentation</title> 6 + <meta name="viewport" content="width=device-width, initial-scale=1" /> 7 + <meta name="description" content="API Documentation for Spacedust, a configurable ATProto notifications firehose" /> 8 + <style> 9 + .custom-header { 10 + height: 42px; 11 + background-color: #221828; 12 + box-shadow: inset 0 -1px 0 var(--scalar-border-color); 13 + color: var(--scalar-color-1); 14 + font-size: var(--scalar-font-size-3); 15 + font-family: 'Iowan Old Style', 'Palatino Linotype', 'URW Palladio L', P052, serif; 16 + padding: 0 18px; 17 + justify-content: space-between; 18 + } 19 + .custom-header, 20 + .custom-header nav { 21 + display: flex; 22 + align-items: center; 23 + gap: 18px; 24 + } 25 + .custom-header a:hover { 26 + color: var(--scalar-color-2); 27 + } 28 + </style> 29 + </head> 30 + <body> 31 + <header class="custom-header scalar-app"> 32 + <p> 33 + TODO: pdsls jetstream link 34 + <a href="https://ufos.microcosm.blue">Launch 🛸 UFOs app</a>: Explore lexicons 35 + </p> 36 + <nav> 37 + <b>a <a href="https://microcosm.blue">microcosm</a> project</b> 38 + <a href="https://bsky.app/profile/microcosm.blue">@microcosm.blue</a> 39 + <a href="https://github.com/at-microcosm">github</a> 40 + </nav> 41 + </header> 42 + 43 + <script id="api-reference" type="application/json" data-url="/openapi""></script> 44 + 45 + <script> 46 + var configuration = { 47 + theme: 'purple', 48 + } 49 + document.getElementById('api-reference').dataset.configuration = JSON.stringify(configuration) 50 + </script> 51 + 52 + <script src="https://cdn.jsdelivr.net/npm/@scalar/api-reference"></script> 53 + </body> 54 + </html>
+1 -1
ufos/src/index_html.rs
··· 2 2 <html lang="en"> 3 3 <head> 4 4 <meta charset="utf-8" /> 5 - <title>UFOs API Documentation</title> 5 + <title>UFOs API documentation</title> 6 6 <meta name="viewport" content="width=device-width, initial-scale=1" /> 7 7 <meta name="description" content="API Documentation for UFOs: Samples and stats for all atproto lexicons." /> 8 8 <style>