APIs for links and references in the ATmosphere

handle shutdown in all tasks

Changed files
+47 -12
slingshot
+2
slingshot/src/error.rs
··· 54 54 ServerTaskError(#[from] ServerError), 55 55 #[error(transparent)] 56 56 IdentityTaskError(#[from] IdentityError), 57 + #[error("firehose cache failed to close: {0}")] 58 + FirehoseCacheCloseError(foyer::Error), 57 59 } 58 60 59 61 #[derive(Debug, Error)]
+5 -1
slingshot/src/firehose_cache.rs
··· 10 10 .memory(64 * 2_usize.pow(20)) 11 11 .with_weighter(|k: &String, v| k.len() + std::mem::size_of_val(v)) 12 12 .storage(Engine::large()) 13 - .with_device_options(DirectFsDeviceOptions::new(cache_dir)) 13 + .with_device_options( 14 + DirectFsDeviceOptions::new(cache_dir) 15 + .with_capacity(2_usize.pow(30)) // TODO: configurable (1GB to have something) 16 + .with_file_size(16 * 2_usize.pow(20)), // note: this does limit the max cached item size, warning jumbo records 17 + ) 14 18 .build() 15 19 .await 16 20 .map_err(|e| format!("foyer setup error: {e:?}"))?;
+17 -3
slingshot/src/identity.rs
··· 13 13 /// 3. DID -> handle resolution: for bidirectional handle validation and in case we want to offer this 14 14 use std::time::Duration; 15 15 use tokio::sync::Mutex; 16 + use tokio_util::sync::CancellationToken; 16 17 17 18 use crate::error::IdentityError; 18 19 use atrium_api::{ ··· 175 176 .with_name("identity") 176 177 .memory(16 * 2_usize.pow(20)) 177 178 .with_weighter(|k, v| std::mem::size_of_val(k) + std::mem::size_of_val(v)) 178 - .storage(Engine::large()) 179 - .with_device_options(DirectFsDeviceOptions::new(cache_dir)) 179 + .storage(Engine::small()) 180 + .with_device_options( 181 + DirectFsDeviceOptions::new(cache_dir) 182 + .with_capacity(2_usize.pow(30)) // TODO: configurable (1GB to have something) 183 + .with_file_size(2_usize.pow(20)), // note: this does limit the max cached item size, warning jumbo records 184 + ) 180 185 .build() 181 186 .await?; 182 187 ··· 403 408 } 404 409 405 410 /// run the refresh queue consumer 406 - pub async fn run_refresher(&self) -> Result<(), IdentityError> { 411 + pub async fn run_refresher(&self, shutdown: CancellationToken) -> Result<(), IdentityError> { 407 412 let _guard = self 408 413 .refresher 409 414 .try_lock() 410 415 .expect("there to only be one refresher running"); 411 416 loop { 417 + if shutdown.is_cancelled() { 418 + log::info!("identity refresher: exiting for shutdown: closing cache..."); 419 + if let Err(e) = self.cache.close().await { 420 + log::error!("cache close errored: {e}"); 421 + } else { 422 + log::info!("identity cache closed.") 423 + } 424 + return Ok(()); 425 + } 412 426 let Some(task_key) = self.peek_refresh().await else { 413 427 tokio::time::sleep(tokio::time::Duration::from_millis(100)).await; 414 428 continue;
+12 -3
slingshot/src/main.rs
··· 89 89 .map_err(|e| format!("identity setup failed: {e:?}"))?; 90 90 log::info!("identity service ready."); 91 91 let identity_refresher = identity.clone(); 92 + let identity_shutdown = shutdown.clone(); 92 93 tasks.spawn(async move { 93 - identity_refresher.run_refresher().await?; 94 + identity_refresher.run_refresher(identity_shutdown).await?; 94 95 Ok(()) 95 96 }); 96 97 ··· 113 114 }); 114 115 115 116 let consumer_shutdown = shutdown.clone(); 117 + let consumer_cache = cache.clone(); 116 118 tasks.spawn(async move { 117 119 consume( 118 120 args.jetstream, 119 121 None, 120 122 args.jetstream_no_zstd, 121 123 consumer_shutdown, 122 - cache, 124 + consumer_cache, 123 125 ) 124 126 .await?; 125 127 Ok(()) ··· 133 135 } 134 136 } 135 137 138 + tasks.spawn(async move { 139 + cache 140 + .close() 141 + .await 142 + .map_err(MainTaskError::FirehoseCacheCloseError) 143 + }); 144 + 136 145 tokio::select! { 137 146 _ = async { 138 147 while let Some(completed) = tasks.join_next().await { 139 148 log::info!("shutdown: task completed: {completed:?}"); 140 149 } 141 150 } => {}, 142 - _ = tokio::time::sleep(std::time::Duration::from_secs(3)) => { 151 + _ = tokio::time::sleep(std::time::Duration::from_secs(30)) => { 143 152 log::info!("shutdown: not all tasks completed on time. aborting..."); 144 153 tasks.shutdown().await; 145 154 },
+11 -5
slingshot/src/server.rs
··· 410 410 host: Option<String>, 411 411 acme_contact: Option<String>, 412 412 certs: Option<PathBuf>, 413 - _shutdown: CancellationToken, 413 + shutdown: CancellationToken, 414 414 ) -> Result<(), ServerError> { 415 415 let repo = Arc::new(repo); 416 416 let api_service = OpenApiService::new( ··· 452 452 } 453 453 let auto_cert = auto_cert.build().map_err(ServerError::AcmeBuildError)?; 454 454 455 - run(TcpListener::bind("0.0.0.0:443").acme(auto_cert), app).await 455 + run( 456 + TcpListener::bind("0.0.0.0:443").acme(auto_cert), 457 + app, 458 + shutdown, 459 + ) 460 + .await 456 461 } else { 457 - run(TcpListener::bind("127.0.0.1:3000"), app).await 462 + run(TcpListener::bind("127.0.0.1:3000"), app, shutdown).await 458 463 } 459 464 } 460 465 461 - async fn run<L>(listener: L, app: Route) -> Result<(), ServerError> 466 + async fn run<L>(listener: L, app: Route, shutdown: CancellationToken) -> Result<(), ServerError> 462 467 where 463 468 L: Listener + 'static, 464 469 { ··· 472 477 .with(Tracing); 473 478 Server::new(listener) 474 479 .name("slingshot") 475 - .run(app) 480 + .run_with_graceful_shutdown(app, shutdown.cancelled(), None) 476 481 .await 477 482 .map_err(ServerError::ServerExited) 483 + .inspect(|()| log::info!("server ended. goodbye.")) 478 484 }