more endpoints, service auth options

Orual fc92b489 0186121d

+1567 -51
+14 -12
Cargo.lock
··· 1234 1235 [[package]] 1236 name = "cmake" 1237 - version = "0.1.54" 1238 source = "registry+https://github.com/rust-lang/crates.io-index" 1239 - checksum = "e7caa3f9de89ddbe2c607f4101924c5abec803763ae9534e4f4d7d8f84aa81f0" 1240 dependencies = [ 1241 "cc", 1242 ] ··· 5567 [[package]] 5568 name = "jacquard" 5569 version = "0.9.4" 5570 - source = "git+https://tangled.org/@nonbinary.computer/jacquard#5bd87b462e1b020feb232fefaf4926ff004eefe4" 5571 dependencies = [ 5572 "bytes", 5573 "getrandom 0.2.16", ··· 5599 [[package]] 5600 name = "jacquard-api" 5601 version = "0.9.2" 5602 - source = "git+https://tangled.org/@nonbinary.computer/jacquard#5bd87b462e1b020feb232fefaf4926ff004eefe4" 5603 dependencies = [ 5604 "bon", 5605 "bytes", ··· 5618 [[package]] 5619 name = "jacquard-axum" 5620 version = "0.9.2" 5621 - source = "git+https://tangled.org/@nonbinary.computer/jacquard#5bd87b462e1b020feb232fefaf4926ff004eefe4" 5622 dependencies = [ 5623 "axum", 5624 "bytes", ··· 5640 [[package]] 5641 name = "jacquard-common" 5642 version = "0.9.2" 5643 - source = "git+https://tangled.org/@nonbinary.computer/jacquard#5bd87b462e1b020feb232fefaf4926ff004eefe4" 5644 dependencies = [ 5645 "base64 0.22.1", 5646 "bon", ··· 5688 [[package]] 5689 name = "jacquard-derive" 5690 version = "0.9.4" 5691 - source = "git+https://tangled.org/@nonbinary.computer/jacquard#5bd87b462e1b020feb232fefaf4926ff004eefe4" 5692 dependencies = [ 5693 "heck 0.5.0", 5694 "jacquard-lexicon", ··· 5700 [[package]] 5701 name = "jacquard-identity" 5702 version = "0.9.2" 5703 - source = "git+https://tangled.org/@nonbinary.computer/jacquard#5bd87b462e1b020feb232fefaf4926ff004eefe4" 5704 dependencies = [ 5705 "bon", 5706 "bytes", ··· 5729 [[package]] 5730 name = "jacquard-lexicon" 5731 version = "0.9.2" 5732 - source = "git+https://tangled.org/@nonbinary.computer/jacquard#5bd87b462e1b020feb232fefaf4926ff004eefe4" 5733 dependencies = [ 5734 "cid", 5735 "dashmap 6.1.0", ··· 5755 [[package]] 5756 name = "jacquard-oauth" 5757 version = "0.9.2" 5758 - source = "git+https://tangled.org/@nonbinary.computer/jacquard#5bd87b462e1b020feb232fefaf4926ff004eefe4" 5759 dependencies = [ 5760 "base64 0.22.1", 5761 "bytes", ··· 5788 [[package]] 5789 name = "jacquard-repo" 5790 version = "0.9.4" 5791 - source = "git+https://tangled.org/@nonbinary.computer/jacquard#5bd87b462e1b020feb232fefaf4926ff004eefe4" 5792 dependencies = [ 5793 "bytes", 5794 "cid", ··· 6746 [[package]] 6747 name = "mini-moka" 6748 version = "0.10.99" 6749 - source = "git+https://tangled.org/@nonbinary.computer/jacquard#5bd87b462e1b020feb232fefaf4926ff004eefe4" 6750 dependencies = [ 6751 "crossbeam-channel", 6752 "crossbeam-utils", ··· 11787 "jacquard-axum", 11788 "jacquard-common", 11789 "jacquard-repo", 11790 "miette 7.6.0", 11791 "n0-future 0.1.3", 11792 "regex", 11793 "rusqlite", 11794 "rusqlite_migration",
··· 1234 1235 [[package]] 1236 name = "cmake" 1237 + version = "0.1.56" 1238 source = "registry+https://github.com/rust-lang/crates.io-index" 1239 + checksum = "b042e5d8a74ae91bb0961acd039822472ec99f8ab0948cbf6d1369588f8be586" 1240 dependencies = [ 1241 "cc", 1242 ] ··· 5567 [[package]] 5568 name = "jacquard" 5569 version = "0.9.4" 5570 + source = "git+https://tangled.org/@nonbinary.computer/jacquard#d5d29a337d8f08ae52fe7bb4e31f91f8b029ff48" 5571 dependencies = [ 5572 "bytes", 5573 "getrandom 0.2.16", ··· 5599 [[package]] 5600 name = "jacquard-api" 5601 version = "0.9.2" 5602 + source = "git+https://tangled.org/@nonbinary.computer/jacquard#d5d29a337d8f08ae52fe7bb4e31f91f8b029ff48" 5603 dependencies = [ 5604 "bon", 5605 "bytes", ··· 5618 [[package]] 5619 name = "jacquard-axum" 5620 version = "0.9.2" 5621 + source = "git+https://tangled.org/@nonbinary.computer/jacquard#d5d29a337d8f08ae52fe7bb4e31f91f8b029ff48" 5622 dependencies = [ 5623 "axum", 5624 "bytes", ··· 5640 [[package]] 5641 name = "jacquard-common" 5642 version = "0.9.2" 5643 + source = "git+https://tangled.org/@nonbinary.computer/jacquard#d5d29a337d8f08ae52fe7bb4e31f91f8b029ff48" 5644 dependencies = [ 5645 "base64 0.22.1", 5646 "bon", ··· 5688 [[package]] 5689 name = "jacquard-derive" 5690 version = "0.9.4" 5691 + source = "git+https://tangled.org/@nonbinary.computer/jacquard#d5d29a337d8f08ae52fe7bb4e31f91f8b029ff48" 5692 dependencies = [ 5693 "heck 0.5.0", 5694 "jacquard-lexicon", ··· 5700 [[package]] 5701 name = "jacquard-identity" 5702 version = "0.9.2" 5703 + source = "git+https://tangled.org/@nonbinary.computer/jacquard#d5d29a337d8f08ae52fe7bb4e31f91f8b029ff48" 5704 dependencies = [ 5705 "bon", 5706 "bytes", ··· 5729 [[package]] 5730 name = "jacquard-lexicon" 5731 version = "0.9.2" 5732 + source = "git+https://tangled.org/@nonbinary.computer/jacquard#d5d29a337d8f08ae52fe7bb4e31f91f8b029ff48" 5733 dependencies = [ 5734 "cid", 5735 "dashmap 6.1.0", ··· 5755 [[package]] 5756 name = "jacquard-oauth" 5757 version = "0.9.2" 5758 + source = "git+https://tangled.org/@nonbinary.computer/jacquard#d5d29a337d8f08ae52fe7bb4e31f91f8b029ff48" 5759 dependencies = [ 5760 "base64 0.22.1", 5761 "bytes", ··· 5788 [[package]] 5789 name = "jacquard-repo" 5790 version = "0.9.4" 5791 + source = "git+https://tangled.org/@nonbinary.computer/jacquard#d5d29a337d8f08ae52fe7bb4e31f91f8b029ff48" 5792 dependencies = [ 5793 "bytes", 5794 "cid", ··· 6746 [[package]] 6747 name = "mini-moka" 6748 version = "0.10.99" 6749 + source = "git+https://tangled.org/@nonbinary.computer/jacquard#d5d29a337d8f08ae52fe7bb4e31f91f8b029ff48" 6750 dependencies = [ 6751 "crossbeam-channel", 6752 "crossbeam-utils", ··· 11787 "jacquard-axum", 11788 "jacquard-common", 11789 "jacquard-repo", 11790 + "k256", 11791 "miette 7.6.0", 11792 "n0-future 0.1.3", 11793 + "rand 0.8.5", 11794 "regex", 11795 "rusqlite", 11796 "rusqlite_migration",
+5 -1
crates/weaver-index/Cargo.toml
··· 25 26 # AT Protocol / Jacquard 27 jacquard = { workspace = true, features = ["websocket", "zstd", "dns", "cache"] } 28 - jacquard-common = { workspace = true } 29 jacquard-repo = { workspace = true } 30 jacquard-axum = { workspace = true } 31 32 # ClickHouse 33 clickhouse = { version = "0.14", features = ["inserter", "chrono", "rustls-tls-ring", "rustls-tls-webpki-roots"] }
··· 25 26 # AT Protocol / Jacquard 27 jacquard = { workspace = true, features = ["websocket", "zstd", "dns", "cache"] } 28 + jacquard-common = { workspace = true, features = ["crypto-k256"] } 29 jacquard-repo = { workspace = true } 30 jacquard-axum = { workspace = true } 31 + 32 + # Crypto (for service identity keypair) 33 + k256 = { version = "0.13", features = ["ecdsa"] } 34 + rand = "0.8" 35 36 # ClickHouse 37 clickhouse = { version = "0.14", features = ["inserter", "chrono", "rustls-tls-ring", "rustls-tls-webpki-roots"] }
+58 -6
crates/weaver-index/src/bin/weaver_indexer.rs
··· 1 use clap::{Parser, Subcommand}; 2 use tracing::{error, info, warn}; 3 use weaver_index::clickhouse::{Client, Migrator}; 4 use weaver_index::config::{ 5 ClickHouseConfig, FirehoseConfig, IndexerConfig, ShardConfig, SourceMode, TapConfig, 6 }; 7 use weaver_index::firehose::FirehoseConsumer; 8 use weaver_index::server::{AppState, ServerConfig, TelemetryConfig, telemetry}; 9 - use weaver_index::clickhouse::InserterConfig; 10 - use weaver_index::{FirehoseIndexer, TapIndexer, load_cursor}; 11 12 #[derive(Parser)] 13 #[command(name = "indexer")] ··· 140 ); 141 info!("SQLite shards at {}", shard_config.base_path.display()); 142 143 // Create separate clients for indexer and server 144 let indexer_client = Client::new(&ch_config)?; 145 let server_client = Client::new(&ch_config)?; 146 147 // Build AppState for server 148 - let state = AppState::new(server_client, shard_config); 149 150 // Spawn the indexer task 151 let indexer_handle = match source_mode { ··· 182 183 // Run server, monitoring indexer health 184 tokio::select! { 185 - result = weaver_index::server::run(state, server_config) => { 186 result?; 187 } 188 result = indexer_handle => { ··· 273 ); 274 info!("SQLite shards at {}", shard_config.base_path.display()); 275 276 let client = Client::new(&ch_config)?; 277 278 - let state = AppState::new(client, shard_config); 279 - weaver_index::server::run(state, server_config).await?; 280 281 Ok(()) 282 }
··· 1 + use std::path::PathBuf; 2 + 3 use clap::{Parser, Subcommand}; 4 use tracing::{error, info, warn}; 5 + use weaver_index::clickhouse::InserterConfig; 6 use weaver_index::clickhouse::{Client, Migrator}; 7 use weaver_index::config::{ 8 ClickHouseConfig, FirehoseConfig, IndexerConfig, ShardConfig, SourceMode, TapConfig, 9 }; 10 use weaver_index::firehose::FirehoseConsumer; 11 use weaver_index::server::{AppState, ServerConfig, TelemetryConfig, telemetry}; 12 + use weaver_index::{FirehoseIndexer, ServiceIdentity, TapIndexer, load_cursor}; 13 14 #[derive(Parser)] 15 #[command(name = "indexer")] ··· 142 ); 143 info!("SQLite shards at {}", shard_config.base_path.display()); 144 145 + // Load or generate service identity keypair 146 + let key_path = std::env::var("SERVICE_KEY_PATH") 147 + .map(PathBuf::from) 148 + .unwrap_or_else(|_| PathBuf::from("./data/service.key")); 149 + let identity = ServiceIdentity::load_or_generate(&key_path)?; 150 + info!( 151 + public_key = %identity.public_key_multibase(), 152 + "Service identity loaded" 153 + ); 154 + 155 + // Generate DID document with service endpoint 156 + let service_endpoint = std::env::var("SERVICE_ENDPOINT").unwrap_or_else(|_| { 157 + format!( 158 + "https://{}", 159 + server_config 160 + .service_did 161 + .as_str() 162 + .strip_prefix("did:web:") 163 + .unwrap_or("index.weaver.sh") 164 + ) 165 + }); 166 + let did_doc = identity.did_document_with_service(&server_config.service_did, &service_endpoint); 167 + 168 // Create separate clients for indexer and server 169 let indexer_client = Client::new(&ch_config)?; 170 let server_client = Client::new(&ch_config)?; 171 172 // Build AppState for server 173 + let state = AppState::new( 174 + server_client, 175 + shard_config, 176 + server_config.service_did.clone(), 177 + ); 178 179 // Spawn the indexer task 180 let indexer_handle = match source_mode { ··· 211 212 // Run server, monitoring indexer health 213 tokio::select! { 214 + result = weaver_index::server::run(state, server_config, did_doc) => { 215 result?; 216 } 217 result = indexer_handle => { ··· 302 ); 303 info!("SQLite shards at {}", shard_config.base_path.display()); 304 305 + // Load or generate service identity keypair 306 + let key_path = std::env::var("SERVICE_KEY_PATH") 307 + .map(PathBuf::from) 308 + .unwrap_or_else(|_| PathBuf::from("./data/service.key")); 309 + let identity = ServiceIdentity::load_or_generate(&key_path)?; 310 + info!( 311 + public_key = %identity.public_key_multibase(), 312 + "Service identity loaded" 313 + ); 314 + 315 + // Generate DID document with service endpoint 316 + let service_endpoint = std::env::var("SERVICE_ENDPOINT").unwrap_or_else(|_| { 317 + format!( 318 + "https://{}", 319 + server_config 320 + .service_did 321 + .as_str() 322 + .strip_prefix("did:web:") 323 + .unwrap_or("localhost") 324 + ) 325 + }); 326 + let did_doc = identity.did_document_with_service(&server_config.service_did, &service_endpoint); 327 + 328 let client = Client::new(&ch_config)?; 329 330 + let state = AppState::new(client, shard_config, server_config.service_did.clone()); 331 + weaver_index::server::run(state, server_config, did_doc).await?; 332 333 Ok(()) 334 }
+467 -7
crates/weaver-index/src/clickhouse/queries/notebooks.rs
··· 19 pub tags: Vec<SmolStr>, 20 pub author_dids: Vec<SmolStr>, 21 #[serde(with = "clickhouse::serde::chrono::datetime64::millis")] 22 pub indexed_at: chrono::DateTime<chrono::Utc>, 23 pub record: SmolStr, 24 } ··· 34 pub path: SmolStr, 35 pub tags: Vec<SmolStr>, 36 pub author_dids: Vec<SmolStr>, 37 #[serde(with = "clickhouse::serde::chrono::datetime64::millis")] 38 pub indexed_at: chrono::DateTime<chrono::Utc>, 39 pub record: SmolStr, ··· 58 path, 59 tags, 60 author_dids, 61 indexed_at, 62 record 63 FROM notebooks 64 WHERE did = ? 65 AND (path = ? OR title = ?) 66 AND deleted_at = toDateTime64(0, 3) 67 - ORDER BY event_time DESC 68 LIMIT 1 69 "#; 70 ··· 102 path, 103 tags, 104 author_dids, 105 indexed_at, 106 record 107 FROM notebooks 108 WHERE did = ? 109 AND rkey = ? 110 AND deleted_at = toDateTime64(0, 3) 111 - ORDER BY event_time DESC 112 LIMIT 1 113 "#; 114 ··· 131 /// 132 /// Note: This is a simplified version. The full implementation would 133 /// need to join with notebook's entryList to get proper ordering. 134 - /// For now, we just list entries by the same author. 135 pub async fn list_notebook_entries( 136 &self, 137 did: &str, 138 limit: u32, 139 cursor: Option<&str>, 140 ) -> Result<Vec<EntryRow>, IndexError> { 141 let query = if cursor.is_some() { 142 r#" 143 SELECT ··· 149 path, 150 tags, 151 author_dids, 152 indexed_at, 153 record 154 FROM entries ··· 169 path, 170 tags, 171 author_dids, 172 indexed_at, 173 record 174 FROM entries ··· 200 /// Get an entry by rkey, picking the most recent version across collaborators. 201 /// 202 /// For collaborative entries, the same rkey may exist in multiple repos. 203 - /// This returns the most recently updated version, with indexed_at as tiebreaker. 204 /// 205 /// `candidate_dids` should include the notebook owner + all collaborator DIDs. 206 pub async fn get_entry( ··· 225 path, 226 tags, 227 author_dids, 228 indexed_at, 229 record 230 FROM entries 231 WHERE rkey = ? 232 AND did IN ({}) 233 AND deleted_at = toDateTime64(0, 3) 234 - ORDER BY updated_at DESC, indexed_at DESC 235 LIMIT 1 236 "#, 237 placeholders.join(", ") ··· 271 path, 272 tags, 273 author_dids, 274 indexed_at, 275 record 276 FROM entries 277 WHERE did = ? 278 AND rkey = ? 279 AND deleted_at = toDateTime64(0, 3) 280 - ORDER BY updated_at DESC, indexed_at DESC 281 LIMIT 1 282 "#; 283 ··· 312 path, 313 tags, 314 author_dids, 315 indexed_at, 316 record 317 FROM entries 318 WHERE did = ? 319 AND (path = ? OR title = ?) 320 AND deleted_at = toDateTime64(0, 3) 321 - ORDER BY event_time DESC 322 LIMIT 1 323 "#; 324 ··· 336 })?; 337 338 Ok(row) 339 } 340 }
··· 19 pub tags: Vec<SmolStr>, 20 pub author_dids: Vec<SmolStr>, 21 #[serde(with = "clickhouse::serde::chrono::datetime64::millis")] 22 + pub created_at: chrono::DateTime<chrono::Utc>, 23 + #[serde(with = "clickhouse::serde::chrono::datetime64::millis")] 24 pub indexed_at: chrono::DateTime<chrono::Utc>, 25 pub record: SmolStr, 26 } ··· 36 pub path: SmolStr, 37 pub tags: Vec<SmolStr>, 38 pub author_dids: Vec<SmolStr>, 39 + #[serde(with = "clickhouse::serde::chrono::datetime64::millis")] 40 + pub created_at: chrono::DateTime<chrono::Utc>, 41 #[serde(with = "clickhouse::serde::chrono::datetime64::millis")] 42 pub indexed_at: chrono::DateTime<chrono::Utc>, 43 pub record: SmolStr, ··· 62 path, 63 tags, 64 author_dids, 65 + created_at, 66 indexed_at, 67 record 68 FROM notebooks 69 WHERE did = ? 70 AND (path = ? OR title = ?) 71 AND deleted_at = toDateTime64(0, 3) 72 + ORDER BY toStartOfFiveMinutes(event_time) DESC, created_at DESC 73 LIMIT 1 74 "#; 75 ··· 107 path, 108 tags, 109 author_dids, 110 + created_at, 111 indexed_at, 112 record 113 FROM notebooks 114 WHERE did = ? 115 AND rkey = ? 116 AND deleted_at = toDateTime64(0, 3) 117 + ORDER BY toStartOfFiveMinutes(event_time) DESC, created_at DESC 118 LIMIT 1 119 "#; 120 ··· 137 /// 138 /// Note: This is a simplified version. The full implementation would 139 /// need to join with notebook's entryList to get proper ordering. 140 + /// For now, we just list entries by the same author, ordered by rkey (notebook order). 141 pub async fn list_notebook_entries( 142 &self, 143 did: &str, 144 limit: u32, 145 cursor: Option<&str>, 146 ) -> Result<Vec<EntryRow>, IndexError> { 147 + // Note: rkey ordering is intentional here - it's the notebook's entry order 148 let query = if cursor.is_some() { 149 r#" 150 SELECT ··· 156 path, 157 tags, 158 author_dids, 159 + created_at, 160 indexed_at, 161 record 162 FROM entries ··· 177 path, 178 tags, 179 author_dids, 180 + created_at, 181 indexed_at, 182 record 183 FROM entries ··· 209 /// Get an entry by rkey, picking the most recent version across collaborators. 210 /// 211 /// For collaborative entries, the same rkey may exist in multiple repos. 212 + /// This returns the most recently updated version. 213 /// 214 /// `candidate_dids` should include the notebook owner + all collaborator DIDs. 215 pub async fn get_entry( ··· 234 path, 235 tags, 236 author_dids, 237 + created_at, 238 indexed_at, 239 record 240 FROM entries 241 WHERE rkey = ? 242 AND did IN ({}) 243 AND deleted_at = toDateTime64(0, 3) 244 + ORDER BY toStartOfFiveMinutes(event_time) DESC, created_at DESC 245 LIMIT 1 246 "#, 247 placeholders.join(", ") ··· 281 path, 282 tags, 283 author_dids, 284 + created_at, 285 indexed_at, 286 record 287 FROM entries 288 WHERE did = ? 289 AND rkey = ? 290 AND deleted_at = toDateTime64(0, 3) 291 + ORDER BY toStartOfFiveMinutes(event_time) DESC, created_at DESC 292 LIMIT 1 293 "#; 294 ··· 323 path, 324 tags, 325 author_dids, 326 + created_at, 327 indexed_at, 328 record 329 FROM entries 330 WHERE did = ? 331 AND (path = ? OR title = ?) 332 AND deleted_at = toDateTime64(0, 3) 333 + ORDER BY toStartOfFiveMinutes(event_time) DESC, created_at DESC 334 LIMIT 1 335 "#; 336 ··· 348 })?; 349 350 Ok(row) 351 + } 352 + 353 + /// List notebooks for an actor. 354 + /// 355 + /// Returns notebooks owned by the given DID, ordered by created_at DESC. 356 + /// Cursor is created_at timestamp in milliseconds. 357 + pub async fn list_actor_notebooks( 358 + &self, 359 + did: &str, 360 + limit: u32, 361 + cursor: Option<i64>, 362 + ) -> Result<Vec<NotebookRow>, IndexError> { 363 + let query = if cursor.is_some() { 364 + r#" 365 + SELECT 366 + did, 367 + rkey, 368 + cid, 369 + uri, 370 + title, 371 + path, 372 + tags, 373 + author_dids, 374 + created_at, 375 + indexed_at, 376 + record 377 + FROM notebooks 378 + WHERE did = ? 379 + AND deleted_at = toDateTime64(0, 3) 380 + AND created_at < fromUnixTimestamp64Milli(?) 381 + ORDER BY toStartOfFiveMinutes(event_time) DESC, created_at DESC 382 + LIMIT ? 383 + "# 384 + } else { 385 + r#" 386 + SELECT 387 + did, 388 + rkey, 389 + cid, 390 + uri, 391 + title, 392 + path, 393 + tags, 394 + author_dids, 395 + created_at, 396 + indexed_at, 397 + record 398 + FROM notebooks 399 + WHERE did = ? 400 + AND deleted_at = toDateTime64(0, 3) 401 + ORDER BY toStartOfFiveMinutes(event_time) DESC, created_at DESC 402 + LIMIT ? 403 + "# 404 + }; 405 + 406 + let mut q = self.inner().query(query).bind(did); 407 + 408 + if let Some(c) = cursor { 409 + q = q.bind(c); 410 + } 411 + 412 + let rows = q 413 + .bind(limit) 414 + .fetch_all::<NotebookRow>() 415 + .await 416 + .map_err(|e| ClickHouseError::Query { 417 + message: "failed to list actor notebooks".into(), 418 + source: e, 419 + })?; 420 + 421 + Ok(rows) 422 + } 423 + 424 + /// List entries for an actor. 425 + /// 426 + /// Returns entries owned by the given DID, ordered by created_at DESC. 427 + /// Cursor is created_at timestamp in milliseconds. 428 + pub async fn list_actor_entries( 429 + &self, 430 + did: &str, 431 + limit: u32, 432 + cursor: Option<i64>, 433 + ) -> Result<Vec<EntryRow>, IndexError> { 434 + let query = if cursor.is_some() { 435 + r#" 436 + SELECT 437 + did, 438 + rkey, 439 + cid, 440 + uri, 441 + title, 442 + path, 443 + tags, 444 + author_dids, 445 + created_at, 446 + indexed_at, 447 + record 448 + FROM entries 449 + WHERE did = ? 450 + AND deleted_at = toDateTime64(0, 3) 451 + AND created_at < fromUnixTimestamp64Milli(?) 452 + ORDER BY toStartOfFiveMinutes(event_time) DESC, created_at DESC 453 + LIMIT ? 454 + "# 455 + } else { 456 + r#" 457 + SELECT 458 + did, 459 + rkey, 460 + cid, 461 + uri, 462 + title, 463 + path, 464 + tags, 465 + author_dids, 466 + created_at, 467 + indexed_at, 468 + record 469 + FROM entries 470 + WHERE did = ? 471 + AND deleted_at = toDateTime64(0, 3) 472 + ORDER BY toStartOfFiveMinutes(event_time) DESC, created_at DESC 473 + LIMIT ? 474 + "# 475 + }; 476 + 477 + let mut q = self.inner().query(query).bind(did); 478 + 479 + if let Some(c) = cursor { 480 + q = q.bind(c); 481 + } 482 + 483 + let rows = 484 + q.bind(limit) 485 + .fetch_all::<EntryRow>() 486 + .await 487 + .map_err(|e| ClickHouseError::Query { 488 + message: "failed to list actor entries".into(), 489 + source: e, 490 + })?; 491 + 492 + Ok(rows) 493 + } 494 + 495 + /// Get a global feed of notebooks. 496 + /// 497 + /// Returns notebooks ordered by created_at DESC (chronological) or by 498 + /// popularity metrics if algorithm is "popular". 499 + /// Cursor is created_at timestamp in milliseconds. 500 + pub async fn get_notebook_feed( 501 + &self, 502 + algorithm: &str, 503 + tags: Option<&[&str]>, 504 + limit: u32, 505 + cursor: Option<i64>, 506 + ) -> Result<Vec<NotebookRow>, IndexError> { 507 + // For now, just chronological. Popular would need join with counts. 508 + let base_query = if tags.is_some() && cursor.is_some() { 509 + r#" 510 + SELECT 511 + did, 512 + rkey, 513 + cid, 514 + uri, 515 + title, 516 + path, 517 + tags, 518 + author_dids, 519 + created_at, 520 + indexed_at, 521 + record 522 + FROM notebooks 523 + WHERE deleted_at = toDateTime64(0, 3) 524 + AND hasAny(tags, ?) 525 + AND created_at < fromUnixTimestamp64Milli(?) 526 + ORDER BY toStartOfFiveMinutes(event_time) DESC, created_at DESC 527 + LIMIT ? 528 + "# 529 + } else if tags.is_some() { 530 + r#" 531 + SELECT 532 + did, 533 + rkey, 534 + cid, 535 + uri, 536 + title, 537 + path, 538 + tags, 539 + author_dids, 540 + created_at, 541 + indexed_at, 542 + record 543 + FROM notebooks 544 + WHERE deleted_at = toDateTime64(0, 3) 545 + AND hasAny(tags, ?) 546 + ORDER BY toStartOfFiveMinutes(event_time) DESC, created_at DESC 547 + LIMIT ? 548 + "# 549 + } else if cursor.is_some() { 550 + r#" 551 + SELECT 552 + did, 553 + rkey, 554 + cid, 555 + uri, 556 + title, 557 + path, 558 + tags, 559 + author_dids, 560 + created_at, 561 + indexed_at, 562 + record 563 + FROM notebooks 564 + WHERE deleted_at = toDateTime64(0, 3) 565 + AND created_at < fromUnixTimestamp64Milli(?) 566 + ORDER BY toStartOfFiveMinutes(event_time) DESC, created_at DESC 567 + LIMIT ? 568 + "# 569 + } else { 570 + r#" 571 + SELECT 572 + did, 573 + rkey, 574 + cid, 575 + uri, 576 + title, 577 + path, 578 + tags, 579 + author_dids, 580 + created_at, 581 + indexed_at, 582 + record 583 + FROM notebooks 584 + WHERE deleted_at = toDateTime64(0, 3) 585 + ORDER BY toStartOfFiveMinutes(event_time) DESC, created_at DESC 586 + LIMIT ? 587 + "# 588 + }; 589 + 590 + let _ = algorithm; // TODO: implement popular sorting 591 + 592 + let mut q = self.inner().query(base_query); 593 + 594 + if let Some(t) = tags { 595 + q = q.bind(t); 596 + } 597 + if let Some(c) = cursor { 598 + q = q.bind(c); 599 + } 600 + 601 + let rows = q 602 + .bind(limit) 603 + .fetch_all::<NotebookRow>() 604 + .await 605 + .map_err(|e| ClickHouseError::Query { 606 + message: "failed to get notebook feed".into(), 607 + source: e, 608 + })?; 609 + 610 + Ok(rows) 611 + } 612 + 613 + /// Get a global feed of entries. 614 + /// 615 + /// Returns entries ordered by created_at DESC (chronological). 616 + /// Cursor is created_at timestamp in milliseconds. 617 + pub async fn get_entry_feed( 618 + &self, 619 + algorithm: &str, 620 + tags: Option<&[&str]>, 621 + limit: u32, 622 + cursor: Option<i64>, 623 + ) -> Result<Vec<EntryRow>, IndexError> { 624 + let base_query = if tags.is_some() && cursor.is_some() { 625 + r#" 626 + SELECT 627 + did, 628 + rkey, 629 + cid, 630 + uri, 631 + title, 632 + path, 633 + tags, 634 + author_dids, 635 + created_at, 636 + indexed_at, 637 + record 638 + FROM entries 639 + WHERE deleted_at = toDateTime64(0, 3) 640 + AND hasAny(tags, ?) 641 + AND created_at < fromUnixTimestamp64Milli(?) 642 + ORDER BY toStartOfFiveMinutes(event_time) DESC, created_at DESC 643 + LIMIT ? 644 + "# 645 + } else if tags.is_some() { 646 + r#" 647 + SELECT 648 + did, 649 + rkey, 650 + cid, 651 + uri, 652 + title, 653 + path, 654 + tags, 655 + author_dids, 656 + created_at, 657 + indexed_at, 658 + record 659 + FROM entries 660 + WHERE deleted_at = toDateTime64(0, 3) 661 + AND hasAny(tags, ?) 662 + ORDER BY toStartOfFiveMinutes(event_time) DESC, created_at DESC 663 + LIMIT ? 664 + "# 665 + } else if cursor.is_some() { 666 + r#" 667 + SELECT 668 + did, 669 + rkey, 670 + cid, 671 + uri, 672 + title, 673 + path, 674 + tags, 675 + author_dids, 676 + created_at, 677 + indexed_at, 678 + record 679 + FROM entries 680 + WHERE deleted_at = toDateTime64(0, 3) 681 + AND created_at < fromUnixTimestamp64Milli(?) 682 + ORDER BY toStartOfFiveMinutes(event_time) DESC, created_at DESC 683 + LIMIT ? 684 + "# 685 + } else { 686 + r#" 687 + SELECT 688 + did, 689 + rkey, 690 + cid, 691 + uri, 692 + title, 693 + path, 694 + tags, 695 + author_dids, 696 + created_at, 697 + indexed_at, 698 + record 699 + FROM entries 700 + WHERE deleted_at = toDateTime64(0, 3) 701 + ORDER BY toStartOfFiveMinutes(event_time) DESC, created_at DESC 702 + LIMIT ? 703 + "# 704 + }; 705 + 706 + let _ = algorithm; // TODO: implement popular sorting 707 + 708 + let mut q = self.inner().query(base_query); 709 + 710 + if let Some(t) = tags { 711 + q = q.bind(t); 712 + } 713 + if let Some(c) = cursor { 714 + q = q.bind(c); 715 + } 716 + 717 + let rows = 718 + q.bind(limit) 719 + .fetch_all::<EntryRow>() 720 + .await 721 + .map_err(|e| ClickHouseError::Query { 722 + message: "failed to get entry feed".into(), 723 + source: e, 724 + })?; 725 + 726 + Ok(rows) 727 + } 728 + 729 + /// Get an entry at a specific index within a notebook. 730 + /// 731 + /// Returns the entry at the given 0-based index, plus adjacent entries for prev/next. 732 + pub async fn get_book_entry_at_index( 733 + &self, 734 + notebook_did: &str, 735 + notebook_rkey: &str, 736 + index: u32, 737 + ) -> Result<Option<(EntryRow, Option<EntryRow>, Option<EntryRow>)>, IndexError> { 738 + // Fetch entries for this notebook with index context 739 + // We need 3 entries: prev (index-1), current (index), next (index+1) 740 + let offset = if index > 0 { index - 1 } else { 0 }; 741 + let fetch_count = if index > 0 { 3u32 } else { 2u32 }; 742 + 743 + let query = r#" 744 + SELECT 745 + e.did, 746 + e.rkey, 747 + e.cid, 748 + e.uri, 749 + e.title, 750 + e.path, 751 + e.tags, 752 + e.author_dids, 753 + e.created_at, 754 + e.indexed_at, 755 + e.record 756 + FROM notebook_entries ne FINAL 757 + INNER JOIN entries e ON 758 + e.did = ne.entry_did 759 + AND e.rkey = ne.entry_rkey 760 + AND e.deleted_at = toDateTime64(0, 3) 761 + WHERE ne.notebook_did = ? 762 + AND ne.notebook_rkey = ? 763 + ORDER BY ne.position ASC 764 + LIMIT ? OFFSET ? 765 + "#; 766 + 767 + let rows: Vec<EntryRow> = self 768 + .inner() 769 + .query(query) 770 + .bind(notebook_did) 771 + .bind(notebook_rkey) 772 + .bind(fetch_count) 773 + .bind(offset) 774 + .fetch_all() 775 + .await 776 + .map_err(|e| ClickHouseError::Query { 777 + message: "failed to get book entry at index".into(), 778 + source: e, 779 + })?; 780 + 781 + if rows.is_empty() { 782 + return Ok(None); 783 + } 784 + 785 + // Determine which row is which based on the offset 786 + let mut iter = rows.into_iter(); 787 + if index == 0 { 788 + // No prev, rows[0] is current, rows[1] is next (if exists) 789 + let current = iter.next(); 790 + let next = iter.next(); 791 + Ok(current.map(|c| (c, None, next))) 792 + } else { 793 + // rows[0] is prev, rows[1] is current, rows[2] is next 794 + let prev = iter.next(); 795 + let current = iter.next(); 796 + let next = iter.next(); 797 + Ok(current.map(|c| (c, prev, next))) 798 + } 799 } 800 }
+340 -1
crates/weaver-index/src/endpoints/actor.rs
··· 1 //! sh.weaver.actor.* endpoint handlers 2 3 use axum::{Json, extract::State}; 4 use jacquard::IntoStatic; 5 use jacquard::cowstr::ToCowStr; 6 use jacquard::identity::resolver::IdentityResolver; 7 use jacquard::types::ident::AtIdentifier; 8 - use jacquard::types::string::{Did, Handle}; 9 use jacquard_axum::ExtractXrpc; 10 use weaver_api::sh_weaver::actor::{ 11 ProfileDataView, ProfileDataViewInner, ProfileView, 12 get_profile::{GetProfileOutput, GetProfileRequest}, 13 }; 14 15 use crate::endpoints::repo::XrpcErrorResponse; 16 use crate::server::AppState; 17 18 /// Handle sh.weaver.actor.getProfile 19 /// 20 /// Returns a profile view with counts for the requested actor. 21 pub async fn get_profile( 22 State(state): State<AppState>, 23 ExtractXrpc(args): ExtractXrpc<GetProfileRequest>, 24 ) -> Result<Json<GetProfileOutput<'static>>, XrpcErrorResponse> { 25 // Resolve identifier to DID 26 let did = resolve_actor(&state, &args.actor).await?; 27 let did_str = did.as_str(); ··· 156 Some(s.to_cowstr().into_static()) 157 } 158 }
··· 1 //! sh.weaver.actor.* endpoint handlers 2 3 + use std::collections::{HashMap, HashSet}; 4 + 5 use axum::{Json, extract::State}; 6 use jacquard::IntoStatic; 7 use jacquard::cowstr::ToCowStr; 8 use jacquard::identity::resolver::IdentityResolver; 9 use jacquard::types::ident::AtIdentifier; 10 + use jacquard::types::string::{AtUri, Cid, Did, Handle}; 11 use jacquard_axum::ExtractXrpc; 12 + use jacquard_axum::service_auth::{ExtractOptionalServiceAuth, VerifiedServiceAuth}; 13 + use smol_str::SmolStr; 14 use weaver_api::sh_weaver::actor::{ 15 ProfileDataView, ProfileDataViewInner, ProfileView, 16 + get_actor_entries::{GetActorEntriesOutput, GetActorEntriesRequest}, 17 + get_actor_notebooks::{GetActorNotebooksOutput, GetActorNotebooksRequest}, 18 get_profile::{GetProfileOutput, GetProfileRequest}, 19 }; 20 + use weaver_api::sh_weaver::notebook::{AuthorListView, EntryView, NotebookView}; 21 22 + use crate::clickhouse::ProfileRow; 23 use crate::endpoints::repo::XrpcErrorResponse; 24 use crate::server::AppState; 25 + 26 + /// Authenticated viewer context (if present) 27 + pub type Viewer = Option<VerifiedServiceAuth<'static>>; 28 29 /// Handle sh.weaver.actor.getProfile 30 /// 31 /// Returns a profile view with counts for the requested actor. 32 pub async fn get_profile( 33 State(state): State<AppState>, 34 + ExtractOptionalServiceAuth(viewer): ExtractOptionalServiceAuth, 35 ExtractXrpc(args): ExtractXrpc<GetProfileRequest>, 36 ) -> Result<Json<GetProfileOutput<'static>>, XrpcErrorResponse> { 37 + // viewer contains Some(auth) if the request has valid service auth 38 + // can be used later for viewer-specific state (e.g., "you follow this person") 39 + let _viewer = viewer; 40 // Resolve identifier to DID 41 let did = resolve_actor(&state, &args.actor).await?; 42 let did_str = did.as_str(); ··· 171 Some(s.to_cowstr().into_static()) 172 } 173 } 174 + 175 + /// Parse cursor string to i64 timestamp millis 176 + fn parse_cursor(cursor: Option<&str>) -> Result<Option<i64>, XrpcErrorResponse> { 177 + cursor 178 + .map(|c| { 179 + c.parse::<i64>() 180 + .map_err(|_| XrpcErrorResponse::invalid_request("Invalid cursor format")) 181 + }) 182 + .transpose() 183 + } 184 + 185 + /// Handle sh.weaver.actor.getActorNotebooks 186 + /// 187 + /// Returns notebooks owned by the given actor. 188 + pub async fn get_actor_notebooks( 189 + State(state): State<AppState>, 190 + ExtractOptionalServiceAuth(viewer): ExtractOptionalServiceAuth, 191 + ExtractXrpc(args): ExtractXrpc<GetActorNotebooksRequest>, 192 + ) -> Result<Json<GetActorNotebooksOutput<'static>>, XrpcErrorResponse> { 193 + let _viewer: Viewer = viewer; 194 + 195 + // Resolve actor to DID 196 + let did = resolve_actor(&state, &args.actor).await?; 197 + let did_str = did.as_str(); 198 + 199 + // Fetch notebooks for this actor 200 + let limit = args.limit.unwrap_or(50).clamp(1, 100) as u32; 201 + let cursor = parse_cursor(args.cursor.as_deref())?; 202 + 203 + let notebook_rows = state 204 + .clickhouse 205 + .list_actor_notebooks(did_str, limit + 1, cursor) 206 + .await 207 + .map_err(|e| { 208 + tracing::error!("Failed to list actor notebooks: {}", e); 209 + XrpcErrorResponse::internal_error("Database query failed") 210 + })?; 211 + 212 + // Check if there are more 213 + let has_more = notebook_rows.len() > limit as usize; 214 + let notebook_rows: Vec<_> = notebook_rows.into_iter().take(limit as usize).collect(); 215 + 216 + // Collect author DIDs for hydration 217 + let mut all_author_dids: HashSet<&str> = HashSet::new(); 218 + for nb in &notebook_rows { 219 + for did in &nb.author_dids { 220 + all_author_dids.insert(did.as_str()); 221 + } 222 + } 223 + 224 + // Batch fetch profiles 225 + let author_dids_vec: Vec<&str> = all_author_dids.into_iter().collect(); 226 + let profiles = state 227 + .clickhouse 228 + .get_profiles_batch(&author_dids_vec) 229 + .await 230 + .map_err(|e| { 231 + tracing::error!("Failed to batch fetch profiles: {}", e); 232 + XrpcErrorResponse::internal_error("Database query failed") 233 + })?; 234 + 235 + let profile_map: HashMap<&str, &ProfileRow> = 236 + profiles.iter().map(|p| (p.did.as_str(), p)).collect(); 237 + 238 + // Build NotebookViews 239 + let mut notebooks: Vec<NotebookView<'static>> = Vec::with_capacity(notebook_rows.len()); 240 + for nb_row in &notebook_rows { 241 + let notebook_uri = AtUri::new(&nb_row.uri).map_err(|e| { 242 + tracing::error!("Invalid notebook URI in db: {}", e); 243 + XrpcErrorResponse::internal_error("Invalid URI stored") 244 + })?; 245 + 246 + let notebook_cid = Cid::new(nb_row.cid.as_bytes()).map_err(|e| { 247 + tracing::error!("Invalid notebook CID in db: {}", e); 248 + XrpcErrorResponse::internal_error("Invalid CID stored") 249 + })?; 250 + 251 + let authors = hydrate_authors(&nb_row.author_dids, &profile_map)?; 252 + let record = parse_record_json(&nb_row.record)?; 253 + 254 + let notebook = NotebookView::new() 255 + .uri(notebook_uri.into_static()) 256 + .cid(notebook_cid.into_static()) 257 + .authors(authors) 258 + .record(record) 259 + .indexed_at(nb_row.indexed_at.fixed_offset()) 260 + .maybe_title(non_empty_str(&nb_row.title)) 261 + .maybe_path(non_empty_str(&nb_row.path)) 262 + .build(); 263 + 264 + notebooks.push(notebook); 265 + } 266 + 267 + // Build cursor for pagination (created_at millis) 268 + let next_cursor = if has_more { 269 + notebook_rows 270 + .last() 271 + .map(|nb| nb.created_at.timestamp_millis().to_cowstr().into_static()) 272 + } else { 273 + None 274 + }; 275 + 276 + Ok(Json( 277 + GetActorNotebooksOutput { 278 + notebooks, 279 + cursor: next_cursor, 280 + extra_data: None 281 + } 282 + .into_static(), 283 + )) 284 + } 285 + 286 + /// Handle sh.weaver.actor.getActorEntries 287 + /// 288 + /// Returns entries owned by the given actor. 289 + pub async fn get_actor_entries( 290 + State(state): State<AppState>, 291 + ExtractOptionalServiceAuth(viewer): ExtractOptionalServiceAuth, 292 + ExtractXrpc(args): ExtractXrpc<GetActorEntriesRequest>, 293 + ) -> Result<Json<GetActorEntriesOutput<'static>>, XrpcErrorResponse> { 294 + let _viewer: Viewer = viewer; 295 + 296 + // Resolve actor to DID 297 + let did = resolve_actor(&state, &args.actor).await?; 298 + let did_str = did.as_str(); 299 + 300 + // Fetch entries for this actor 301 + let limit = args.limit.unwrap_or(50).clamp(1, 100) as u32; 302 + let cursor = parse_cursor(args.cursor.as_deref())?; 303 + 304 + let entry_rows = state 305 + .clickhouse 306 + .list_actor_entries(did_str, limit + 1, cursor) 307 + .await 308 + .map_err(|e| { 309 + tracing::error!("Failed to list actor entries: {}", e); 310 + XrpcErrorResponse::internal_error("Database query failed") 311 + })?; 312 + 313 + // Check if there are more 314 + let has_more = entry_rows.len() > limit as usize; 315 + let entry_rows: Vec<_> = entry_rows.into_iter().take(limit as usize).collect(); 316 + 317 + // Collect author DIDs for hydration 318 + let mut all_author_dids: HashSet<&str> = HashSet::new(); 319 + for entry in &entry_rows { 320 + for did in &entry.author_dids { 321 + all_author_dids.insert(did.as_str()); 322 + } 323 + } 324 + 325 + // Batch fetch profiles 326 + let author_dids_vec: Vec<&str> = all_author_dids.into_iter().collect(); 327 + let profiles = state 328 + .clickhouse 329 + .get_profiles_batch(&author_dids_vec) 330 + .await 331 + .map_err(|e| { 332 + tracing::error!("Failed to batch fetch profiles: {}", e); 333 + XrpcErrorResponse::internal_error("Database query failed") 334 + })?; 335 + 336 + let profile_map: HashMap<&str, &ProfileRow> = 337 + profiles.iter().map(|p| (p.did.as_str(), p)).collect(); 338 + 339 + // Build EntryViews 340 + let mut entries: Vec<EntryView<'static>> = Vec::with_capacity(entry_rows.len()); 341 + for entry_row in &entry_rows { 342 + let entry_uri = AtUri::new(&entry_row.uri).map_err(|e| { 343 + tracing::error!("Invalid entry URI in db: {}", e); 344 + XrpcErrorResponse::internal_error("Invalid URI stored") 345 + })?; 346 + 347 + let entry_cid = Cid::new(entry_row.cid.as_bytes()).map_err(|e| { 348 + tracing::error!("Invalid entry CID in db: {}", e); 349 + XrpcErrorResponse::internal_error("Invalid CID stored") 350 + })?; 351 + 352 + let authors = hydrate_authors(&entry_row.author_dids, &profile_map)?; 353 + let record = parse_record_json(&entry_row.record)?; 354 + 355 + let entry = EntryView::new() 356 + .uri(entry_uri.into_static()) 357 + .cid(entry_cid.into_static()) 358 + .authors(authors) 359 + .record(record) 360 + .indexed_at(entry_row.indexed_at.fixed_offset()) 361 + .maybe_title(non_empty_str(&entry_row.title)) 362 + .maybe_path(non_empty_str(&entry_row.path)) 363 + .build(); 364 + 365 + entries.push(entry); 366 + } 367 + 368 + // Build cursor for pagination (created_at millis) 369 + let next_cursor = if has_more { 370 + entry_rows 371 + .last() 372 + .map(|e| e.created_at.timestamp_millis().to_cowstr().into_static()) 373 + } else { 374 + None 375 + }; 376 + 377 + Ok(Json( 378 + GetActorEntriesOutput { 379 + entries, 380 + cursor: next_cursor, 381 + extra_data: None, 382 + } 383 + .into_static(), 384 + )) 385 + } 386 + 387 + /// Hydrate author list from DIDs using profile map 388 + fn hydrate_authors( 389 + author_dids: &[SmolStr], 390 + profile_map: &HashMap<&str, &ProfileRow>, 391 + ) -> Result<Vec<AuthorListView<'static>>, XrpcErrorResponse> { 392 + let mut authors = Vec::with_capacity(author_dids.len()); 393 + 394 + for (idx, did_str) in author_dids.iter().enumerate() { 395 + let profile_data = if let Some(profile) = profile_map.get(did_str.as_str()) { 396 + profile_to_data_view(profile)? 397 + } else { 398 + // No profile found - create minimal view with just the DID 399 + let did = Did::new(did_str).map_err(|e| { 400 + tracing::error!("Invalid DID in author_dids: {}", e); 401 + XrpcErrorResponse::internal_error("Invalid DID stored") 402 + })?; 403 + 404 + let inner_profile = ProfileView::new() 405 + .did(did.into_static()) 406 + .handle( 407 + Handle::new(did_str) 408 + .unwrap_or_else(|_| Handle::new("unknown.invalid").unwrap()), 409 + ) 410 + .build(); 411 + 412 + ProfileDataView::new() 413 + .inner(ProfileDataViewInner::ProfileView(Box::new(inner_profile))) 414 + .build() 415 + }; 416 + 417 + let author_view = AuthorListView::new() 418 + .index(idx as i64) 419 + .record(profile_data.into_static()) 420 + .build(); 421 + 422 + authors.push(author_view); 423 + } 424 + 425 + Ok(authors) 426 + } 427 + 428 + /// Convert ProfileRow to ProfileDataView 429 + fn profile_to_data_view( 430 + profile: &ProfileRow, 431 + ) -> Result<ProfileDataView<'static>, XrpcErrorResponse> { 432 + use jacquard::types::string::Uri; 433 + 434 + let did = Did::new(&profile.did).map_err(|e| { 435 + tracing::error!("Invalid DID in profile: {}", e); 436 + XrpcErrorResponse::internal_error("Invalid DID stored") 437 + })?; 438 + 439 + let handle = if profile.handle.is_empty() { 440 + Handle::new(&profile.did).unwrap_or_else(|_| Handle::new("unknown.invalid").unwrap()) 441 + } else { 442 + Handle::new(&profile.handle).map_err(|e| { 443 + tracing::error!("Invalid handle in profile: {}", e); 444 + XrpcErrorResponse::internal_error("Invalid handle stored") 445 + })? 446 + }; 447 + 448 + // Build avatar URL from CID if present 449 + let avatar = if !profile.avatar_cid.is_empty() { 450 + let url = format!( 451 + "https://cdn.bsky.app/img/avatar/plain/{}/{}@jpeg", 452 + profile.did, profile.avatar_cid 453 + ); 454 + Uri::new_owned(url).ok() 455 + } else { 456 + None 457 + }; 458 + 459 + // Build banner URL from CID if present 460 + let banner = if !profile.banner_cid.is_empty() { 461 + let url = format!( 462 + "https://cdn.bsky.app/img/banner/plain/{}/{}@jpeg", 463 + profile.did, profile.banner_cid 464 + ); 465 + Uri::new_owned(url).ok() 466 + } else { 467 + None 468 + }; 469 + 470 + let inner_profile = ProfileView::new() 471 + .did(did.into_static()) 472 + .handle(handle.into_static()) 473 + .maybe_display_name(non_empty_str(&profile.display_name)) 474 + .maybe_description(non_empty_str(&profile.description)) 475 + .maybe_avatar(avatar) 476 + .maybe_banner(banner) 477 + .build(); 478 + 479 + let profile_data = ProfileDataView::new() 480 + .inner(ProfileDataViewInner::ProfileView(Box::new(inner_profile))) 481 + .build(); 482 + 483 + Ok(profile_data) 484 + } 485 + 486 + /// Parse record JSON string into owned Data 487 + fn parse_record_json( 488 + json: &str, 489 + ) -> Result<jacquard::types::value::Data<'static>, XrpcErrorResponse> { 490 + use jacquard::types::value::Data; 491 + 492 + let data: Data<'_> = serde_json::from_str(json).map_err(|e| { 493 + tracing::error!("Failed to parse record JSON: {}", e); 494 + XrpcErrorResponse::internal_error("Invalid record JSON stored") 495 + })?; 496 + Ok(data.into_static()) 497 + }
+359 -3
crates/weaver-index/src/endpoints/notebook.rs
··· 8 use jacquard::types::string::{AtUri, Cid, Did, Handle, Uri}; 9 use jacquard::types::value::Data; 10 use jacquard_axum::ExtractXrpc; 11 use smol_str::SmolStr; 12 use weaver_api::sh_weaver::actor::{ProfileDataView, ProfileDataViewInner, ProfileView}; 13 use weaver_api::sh_weaver::notebook::{ 14 - AuthorListView, BookEntryView, EntryView, NotebookView, 15 get_entry::{GetEntryOutput, GetEntryRequest}, 16 resolve_entry::{ResolveEntryOutput, ResolveEntryRequest}, 17 resolve_notebook::{ResolveNotebookOutput, ResolveNotebookRequest}, 18 }; 19 20 - use crate::clickhouse::ProfileRow; 21 - use crate::endpoints::actor::resolve_actor; 22 use crate::endpoints::repo::XrpcErrorResponse; 23 use crate::server::AppState; 24 ··· 27 /// Resolves a notebook by actor + path/title, returns notebook with entries. 28 pub async fn resolve_notebook( 29 State(state): State<AppState>, 30 ExtractXrpc(args): ExtractXrpc<ResolveNotebookRequest>, 31 ) -> Result<Json<ResolveNotebookOutput<'static>>, XrpcErrorResponse> { 32 // Resolve actor to DID 33 let did = resolve_actor(&state, &args.actor).await?; 34 let did_str = did.as_str(); ··· 195 /// Gets an entry by AT URI. 196 pub async fn get_entry( 197 State(state): State<AppState>, 198 ExtractXrpc(args): ExtractXrpc<GetEntryRequest>, 199 ) -> Result<Json<GetEntryOutput<'static>>, XrpcErrorResponse> { 200 // Parse the AT URI to extract authority and rkey 201 let uri = &args.uri; 202 let authority = uri.authority(); ··· 273 /// Resolves an entry by actor + notebook name + entry name. 274 pub async fn resolve_entry( 275 State(state): State<AppState>, 276 ExtractXrpc(args): ExtractXrpc<ResolveEntryRequest>, 277 ) -> Result<Json<ResolveEntryOutput<'static>>, XrpcErrorResponse> { 278 // Resolve actor to DID 279 let did = resolve_actor(&state, &args.actor).await?; 280 let did_str = did.as_str(); ··· 294 XrpcErrorResponse::internal_error("Database query failed") 295 }) 296 }, 297 async { 298 state 299 .clickhouse ··· 522 523 Ok(profile_data) 524 }
··· 8 use jacquard::types::string::{AtUri, Cid, Did, Handle, Uri}; 9 use jacquard::types::value::Data; 10 use jacquard_axum::ExtractXrpc; 11 + use jacquard_axum::service_auth::ExtractOptionalServiceAuth; 12 use smol_str::SmolStr; 13 use weaver_api::sh_weaver::actor::{ProfileDataView, ProfileDataViewInner, ProfileView}; 14 use weaver_api::sh_weaver::notebook::{ 15 + AuthorListView, BookEntryRef, BookEntryView, EntryView, FeedEntryView, NotebookView, 16 + get_book_entry::{GetBookEntryOutput, GetBookEntryRequest}, 17 get_entry::{GetEntryOutput, GetEntryRequest}, 18 + get_entry_feed::{GetEntryFeedOutput, GetEntryFeedRequest}, 19 + get_notebook_feed::{GetNotebookFeedOutput, GetNotebookFeedRequest}, 20 resolve_entry::{ResolveEntryOutput, ResolveEntryRequest}, 21 resolve_notebook::{ResolveNotebookOutput, ResolveNotebookRequest}, 22 }; 23 24 + use crate::clickhouse::{EntryRow, ProfileRow}; 25 + use crate::endpoints::actor::{Viewer, resolve_actor}; 26 use crate::endpoints::repo::XrpcErrorResponse; 27 use crate::server::AppState; 28 ··· 31 /// Resolves a notebook by actor + path/title, returns notebook with entries. 32 pub async fn resolve_notebook( 33 State(state): State<AppState>, 34 + ExtractOptionalServiceAuth(viewer): ExtractOptionalServiceAuth, 35 ExtractXrpc(args): ExtractXrpc<ResolveNotebookRequest>, 36 ) -> Result<Json<ResolveNotebookOutput<'static>>, XrpcErrorResponse> { 37 + // viewer can be used later for viewer state (bookmarks, read status, etc.) 38 + let _viewer: Viewer = viewer; 39 + 40 // Resolve actor to DID 41 let did = resolve_actor(&state, &args.actor).await?; 42 let did_str = did.as_str(); ··· 203 /// Gets an entry by AT URI. 204 pub async fn get_entry( 205 State(state): State<AppState>, 206 + ExtractOptionalServiceAuth(viewer): ExtractOptionalServiceAuth, 207 ExtractXrpc(args): ExtractXrpc<GetEntryRequest>, 208 ) -> Result<Json<GetEntryOutput<'static>>, XrpcErrorResponse> { 209 + let _viewer: Viewer = viewer; 210 + 211 // Parse the AT URI to extract authority and rkey 212 let uri = &args.uri; 213 let authority = uri.authority(); ··· 284 /// Resolves an entry by actor + notebook name + entry name. 285 pub async fn resolve_entry( 286 State(state): State<AppState>, 287 + ExtractOptionalServiceAuth(viewer): ExtractOptionalServiceAuth, 288 ExtractXrpc(args): ExtractXrpc<ResolveEntryRequest>, 289 ) -> Result<Json<ResolveEntryOutput<'static>>, XrpcErrorResponse> { 290 + let _viewer: Viewer = viewer; 291 + 292 // Resolve actor to DID 293 let did = resolve_actor(&state, &args.actor).await?; 294 let did_str = did.as_str(); ··· 308 XrpcErrorResponse::internal_error("Database query failed") 309 }) 310 }, 311 + // TODO: fix this, as we do need the entries to know for sure which, in case of collisions 312 async { 313 state 314 .clickhouse ··· 537 538 Ok(profile_data) 539 } 540 + 541 + /// Parse cursor string to i64 timestamp millis 542 + fn parse_cursor(cursor: Option<&str>) -> Result<Option<i64>, XrpcErrorResponse> { 543 + cursor 544 + .map(|c| { 545 + c.parse::<i64>() 546 + .map_err(|_| XrpcErrorResponse::invalid_request("Invalid cursor format")) 547 + }) 548 + .transpose() 549 + } 550 + 551 + /// Handle sh.weaver.notebook.getNotebookFeed 552 + /// 553 + /// Returns a global feed of notebooks. 554 + pub async fn get_notebook_feed( 555 + State(state): State<AppState>, 556 + ExtractOptionalServiceAuth(viewer): ExtractOptionalServiceAuth, 557 + ExtractXrpc(args): ExtractXrpc<GetNotebookFeedRequest>, 558 + ) -> Result<Json<GetNotebookFeedOutput<'static>>, XrpcErrorResponse> { 559 + let _viewer: Viewer = viewer; 560 + 561 + let limit = args.limit.unwrap_or(50).clamp(1, 100) as u32; 562 + let cursor = parse_cursor(args.cursor.as_deref())?; 563 + let algorithm = args.algorithm.as_deref().unwrap_or("chronological"); 564 + 565 + // Convert tags to &[&str] if present 566 + let tags_vec: Vec<&str> = args 567 + .tags 568 + .as_ref() 569 + .map(|t| t.iter().map(|s| s.as_ref()).collect()) 570 + .unwrap_or_default(); 571 + let tags = if tags_vec.is_empty() { 572 + None 573 + } else { 574 + Some(tags_vec.as_slice()) 575 + }; 576 + 577 + let notebook_rows = state 578 + .clickhouse 579 + .get_notebook_feed(algorithm, tags, limit + 1, cursor) 580 + .await 581 + .map_err(|e| { 582 + tracing::error!("Failed to get notebook feed: {}", e); 583 + XrpcErrorResponse::internal_error("Database query failed") 584 + })?; 585 + 586 + // Check if there are more 587 + let has_more = notebook_rows.len() > limit as usize; 588 + let notebook_rows: Vec<_> = notebook_rows.into_iter().take(limit as usize).collect(); 589 + 590 + // Collect author DIDs for hydration 591 + let mut all_author_dids: HashSet<&str> = HashSet::new(); 592 + for nb in &notebook_rows { 593 + for did in &nb.author_dids { 594 + all_author_dids.insert(did.as_str()); 595 + } 596 + } 597 + 598 + // Batch fetch profiles 599 + let author_dids_vec: Vec<&str> = all_author_dids.into_iter().collect(); 600 + let profiles = state 601 + .clickhouse 602 + .get_profiles_batch(&author_dids_vec) 603 + .await 604 + .map_err(|e| { 605 + tracing::error!("Failed to batch fetch profiles: {}", e); 606 + XrpcErrorResponse::internal_error("Database query failed") 607 + })?; 608 + 609 + let profile_map: HashMap<&str, &ProfileRow> = 610 + profiles.iter().map(|p| (p.did.as_str(), p)).collect(); 611 + 612 + // Build NotebookViews 613 + let mut notebooks: Vec<NotebookView<'static>> = Vec::with_capacity(notebook_rows.len()); 614 + for nb_row in &notebook_rows { 615 + let notebook_uri = AtUri::new(&nb_row.uri).map_err(|e| { 616 + tracing::error!("Invalid notebook URI in db: {}", e); 617 + XrpcErrorResponse::internal_error("Invalid URI stored") 618 + })?; 619 + 620 + let notebook_cid = Cid::new(nb_row.cid.as_bytes()).map_err(|e| { 621 + tracing::error!("Invalid notebook CID in db: {}", e); 622 + XrpcErrorResponse::internal_error("Invalid CID stored") 623 + })?; 624 + 625 + let authors = hydrate_authors(&nb_row.author_dids, &profile_map)?; 626 + let record = parse_record_json(&nb_row.record)?; 627 + 628 + let notebook = NotebookView::new() 629 + .uri(notebook_uri.into_static()) 630 + .cid(notebook_cid.into_static()) 631 + .authors(authors) 632 + .record(record) 633 + .indexed_at(nb_row.indexed_at.fixed_offset()) 634 + .maybe_title(non_empty_cowstr(&nb_row.title)) 635 + .maybe_path(non_empty_cowstr(&nb_row.path)) 636 + .build(); 637 + 638 + notebooks.push(notebook); 639 + } 640 + 641 + // Build cursor for pagination (created_at millis) 642 + let next_cursor = if has_more { 643 + notebook_rows 644 + .last() 645 + .map(|nb| nb.created_at.timestamp_millis().to_cowstr().into_static()) 646 + } else { 647 + None 648 + }; 649 + 650 + Ok(Json( 651 + GetNotebookFeedOutput { 652 + notebooks, 653 + cursor: next_cursor, 654 + extra_data: None, 655 + } 656 + .into_static(), 657 + )) 658 + } 659 + 660 + /// Handle sh.weaver.notebook.getEntryFeed 661 + /// 662 + /// Returns a global feed of entries. 663 + pub async fn get_entry_feed( 664 + State(state): State<AppState>, 665 + ExtractOptionalServiceAuth(viewer): ExtractOptionalServiceAuth, 666 + ExtractXrpc(args): ExtractXrpc<GetEntryFeedRequest>, 667 + ) -> Result<Json<GetEntryFeedOutput<'static>>, XrpcErrorResponse> { 668 + let _viewer: Viewer = viewer; 669 + 670 + let limit = args.limit.unwrap_or(50).clamp(1, 100) as u32; 671 + let cursor = parse_cursor(args.cursor.as_deref())?; 672 + let algorithm = args.algorithm.as_deref().unwrap_or("chronological"); 673 + 674 + // Convert tags to &[&str] if present 675 + let tags_vec: Vec<&str> = args 676 + .tags 677 + .as_ref() 678 + .map(|t| t.iter().map(|s| s.as_ref()).collect()) 679 + .unwrap_or_default(); 680 + let tags = if tags_vec.is_empty() { 681 + None 682 + } else { 683 + Some(tags_vec.as_slice()) 684 + }; 685 + 686 + let entry_rows = state 687 + .clickhouse 688 + .get_entry_feed(algorithm, tags, limit + 1, cursor) 689 + .await 690 + .map_err(|e| { 691 + tracing::error!("Failed to get entry feed: {}", e); 692 + XrpcErrorResponse::internal_error("Database query failed") 693 + })?; 694 + 695 + // Check if there are more 696 + let has_more = entry_rows.len() > limit as usize; 697 + let entry_rows: Vec<_> = entry_rows.into_iter().take(limit as usize).collect(); 698 + 699 + // Collect author DIDs for hydration 700 + let mut all_author_dids: HashSet<&str> = HashSet::new(); 701 + for entry in &entry_rows { 702 + for did in &entry.author_dids { 703 + all_author_dids.insert(did.as_str()); 704 + } 705 + } 706 + 707 + // Batch fetch profiles 708 + let author_dids_vec: Vec<&str> = all_author_dids.into_iter().collect(); 709 + let profiles = state 710 + .clickhouse 711 + .get_profiles_batch(&author_dids_vec) 712 + .await 713 + .map_err(|e| { 714 + tracing::error!("Failed to batch fetch profiles: {}", e); 715 + XrpcErrorResponse::internal_error("Database query failed") 716 + })?; 717 + 718 + let profile_map: HashMap<&str, &ProfileRow> = 719 + profiles.iter().map(|p| (p.did.as_str(), p)).collect(); 720 + 721 + // Build FeedEntryViews 722 + let mut feed: Vec<FeedEntryView<'static>> = Vec::with_capacity(entry_rows.len()); 723 + for entry_row in &entry_rows { 724 + let entry_view = build_entry_view(entry_row, &profile_map)?; 725 + 726 + let feed_entry = FeedEntryView::new().entry(entry_view).build(); 727 + 728 + feed.push(feed_entry); 729 + } 730 + 731 + // Build cursor for pagination (created_at millis) 732 + let next_cursor = if has_more { 733 + entry_rows 734 + .last() 735 + .map(|e| e.created_at.timestamp_millis().to_cowstr().into_static()) 736 + } else { 737 + None 738 + }; 739 + 740 + Ok(Json( 741 + GetEntryFeedOutput { 742 + feed, 743 + cursor: next_cursor, 744 + extra_data: None, 745 + } 746 + .into_static(), 747 + )) 748 + } 749 + 750 + /// Handle sh.weaver.notebook.getBookEntry 751 + /// 752 + /// Returns an entry at a specific index within a notebook, with prev/next navigation. 753 + pub async fn get_book_entry( 754 + State(state): State<AppState>, 755 + ExtractOptionalServiceAuth(viewer): ExtractOptionalServiceAuth, 756 + ExtractXrpc(args): ExtractXrpc<GetBookEntryRequest>, 757 + ) -> Result<Json<GetBookEntryOutput<'static>>, XrpcErrorResponse> { 758 + let _viewer: Viewer = viewer; 759 + 760 + // Parse the notebook URI 761 + let notebook_uri = &args.notebook; 762 + let authority = notebook_uri.authority(); 763 + let notebook_rkey = notebook_uri 764 + .rkey() 765 + .ok_or_else(|| XrpcErrorResponse::invalid_request("Notebook URI must include rkey"))?; 766 + 767 + // Resolve authority to DID 768 + let notebook_did = resolve_actor(&state, authority).await?; 769 + let notebook_did_str = notebook_did.as_str(); 770 + let notebook_rkey_str = notebook_rkey.as_ref(); 771 + 772 + let index = args.index.unwrap_or(0).max(0) as u32; 773 + 774 + // Fetch entry at index with prev/next 775 + let result = state 776 + .clickhouse 777 + .get_book_entry_at_index(notebook_did_str, notebook_rkey_str, index) 778 + .await 779 + .map_err(|e| { 780 + tracing::error!("Failed to get book entry: {}", e); 781 + XrpcErrorResponse::internal_error("Database query failed") 782 + })?; 783 + 784 + let (current_row, prev_row, next_row) = 785 + result.ok_or_else(|| XrpcErrorResponse::not_found("Entry not found at index"))?; 786 + 787 + // Collect all author DIDs for hydration 788 + let mut all_author_dids: HashSet<&str> = HashSet::new(); 789 + for did in &current_row.author_dids { 790 + all_author_dids.insert(did.as_str()); 791 + } 792 + if let Some(ref prev) = prev_row { 793 + for did in &prev.author_dids { 794 + all_author_dids.insert(did.as_str()); 795 + } 796 + } 797 + if let Some(ref next) = next_row { 798 + for did in &next.author_dids { 799 + all_author_dids.insert(did.as_str()); 800 + } 801 + } 802 + 803 + // Batch fetch profiles 804 + let author_dids_vec: Vec<&str> = all_author_dids.into_iter().collect(); 805 + let profiles = state 806 + .clickhouse 807 + .get_profiles_batch(&author_dids_vec) 808 + .await 809 + .map_err(|e| { 810 + tracing::error!("Failed to fetch profiles: {}", e); 811 + XrpcErrorResponse::internal_error("Database query failed") 812 + })?; 813 + 814 + let profile_map: HashMap<&str, &ProfileRow> = 815 + profiles.iter().map(|p| (p.did.as_str(), p)).collect(); 816 + 817 + // Build the current entry view 818 + let entry_view = build_entry_view(&current_row, &profile_map)?; 819 + 820 + // Build prev/next refs if present 821 + let prev_ref = if let Some(ref prev) = prev_row { 822 + let prev_view = build_entry_view(prev, &profile_map)?; 823 + Some(BookEntryRef::new().entry(prev_view).build()) 824 + } else { 825 + None 826 + }; 827 + 828 + let next_ref = if let Some(ref next) = next_row { 829 + let next_view = build_entry_view(next, &profile_map)?; 830 + Some(BookEntryRef::new().entry(next_view).build()) 831 + } else { 832 + None 833 + }; 834 + 835 + let book_entry = BookEntryView::new() 836 + .entry(entry_view) 837 + .index(index as i64) 838 + .maybe_prev(prev_ref) 839 + .maybe_next(next_ref) 840 + .build(); 841 + 842 + Ok(Json( 843 + GetBookEntryOutput { 844 + value: book_entry, 845 + extra_data: None, 846 + } 847 + .into_static(), 848 + )) 849 + } 850 + 851 + /// Build an EntryView from an EntryRow 852 + fn build_entry_view( 853 + entry_row: &EntryRow, 854 + profile_map: &HashMap<&str, &ProfileRow>, 855 + ) -> Result<EntryView<'static>, XrpcErrorResponse> { 856 + let entry_uri = AtUri::new(&entry_row.uri).map_err(|e| { 857 + tracing::error!("Invalid entry URI in db: {}", e); 858 + XrpcErrorResponse::internal_error("Invalid URI stored") 859 + })?; 860 + 861 + let entry_cid = Cid::new(entry_row.cid.as_bytes()).map_err(|e| { 862 + tracing::error!("Invalid entry CID in db: {}", e); 863 + XrpcErrorResponse::internal_error("Invalid CID stored") 864 + })?; 865 + 866 + let authors = hydrate_authors(&entry_row.author_dids, profile_map)?; 867 + let record = parse_record_json(&entry_row.record)?; 868 + 869 + let entry_view = EntryView::new() 870 + .uri(entry_uri.into_static()) 871 + .cid(entry_cid.into_static()) 872 + .authors(authors) 873 + .record(record) 874 + .indexed_at(entry_row.indexed_at.fixed_offset()) 875 + .maybe_title(non_empty_cowstr(&entry_row.title)) 876 + .maybe_path(non_empty_cowstr(&entry_row.path)) 877 + .build(); 878 + 879 + Ok(entry_view) 880 + }
+2
crates/weaver-index/src/lib.rs
··· 6 pub mod indexer; 7 pub mod parallel_tap; 8 pub mod server; 9 pub mod sqlite; 10 pub mod tap; 11 ··· 14 pub use indexer::{FirehoseIndexer, load_cursor}; 15 pub use parallel_tap::TapIndexer; 16 pub use server::{AppState, ServerConfig}; 17 pub use sqlite::{ShardKey, ShardRouter, SqliteShard};
··· 6 pub mod indexer; 7 pub mod parallel_tap; 8 pub mod server; 9 + pub mod service_identity; 10 pub mod sqlite; 11 pub mod tap; 12 ··· 15 pub use indexer::{FirehoseIndexer, load_cursor}; 16 pub use parallel_tap::TapIndexer; 17 pub use server::{AppState, ServerConfig}; 18 + pub use service_identity::ServiceIdentity; 19 pub use sqlite::{ShardKey, ShardRouter, SqliteShard};
+5 -5
crates/weaver-index/src/parallel_tap.rs
··· 14 use crate::error::{ClickHouseError, Result}; 15 use crate::tap::{TapConfig as TapConsumerConfig, TapConsumer, TapEvent}; 16 17 - /// TAP indexer with multiple parallel websocket connections 18 /// 19 - /// Each worker maintains its own websocket connection to TAP and its own 20 - /// ClickHouse inserter. TAP distributes events across connected clients, 21 /// and its ack-gating mechanism ensures per-DID ordering is preserved 22 /// regardless of which worker handles which events. 23 pub struct TapIndexer { ··· 297 /// then runs INSERT queries to populate target tables for incremental MVs. 298 async fn run_backfill(client: Arc<Client>) { 299 // Wait for in-flight inserts to settle 300 - info!("backfill: waiting 10s for in-flight inserts to settle"); 301 - tokio::time::sleep(Duration::from_secs(10)).await; 302 303 let mvs = Migrator::incremental_mvs(); 304 if mvs.is_empty() {
··· 14 use crate::error::{ClickHouseError, Result}; 15 use crate::tap::{TapConfig as TapConsumerConfig, TapConsumer, TapEvent}; 16 17 + /// Tap indexer with multiple parallel websocket connections 18 /// 19 + /// Each worker maintains its own websocket connection to Tap and its own 20 + /// ClickHouse inserter. Tap distributes events across connected clients, 21 /// and its ack-gating mechanism ensures per-DID ordering is preserved 22 /// regardless of which worker handles which events. 23 pub struct TapIndexer { ··· 297 /// then runs INSERT queries to populate target tables for incremental MVs. 298 async fn run_backfill(client: Arc<Client>) { 299 // Wait for in-flight inserts to settle 300 + info!("backfill: waiting 100s for in-flight inserts to settle"); 301 + tokio::time::sleep(Duration::from_secs(100)).await; 302 303 let mvs = Migrator::incremental_mvs(); 304 if mvs.is_empty() {
+69 -12
crates/weaver-index/src/server.rs
··· 5 use jacquard::api::com_atproto::repo::{ 6 get_record::GetRecordRequest, list_records::ListRecordsRequest, 7 }; 8 - use weaver_api::sh_weaver::actor::get_profile::GetProfileRequest; 9 - use weaver_api::sh_weaver::notebook::{ 10 - get_entry::GetEntryRequest, 11 - resolve_entry::ResolveEntryRequest, 12 - resolve_notebook::ResolveNotebookRequest, 13 - }; 14 use jacquard::client::UnauthenticatedSession; 15 use jacquard::identity::JacquardResolver; 16 use jacquard_axum::IntoRouter; 17 use serde::Serialize; 18 use tower_http::trace::TraceLayer; 19 use tracing::info; 20 21 use crate::clickhouse::Client; 22 use crate::config::ShardConfig; ··· 35 pub clickhouse: Arc<Client>, 36 pub shards: Arc<ShardRouter>, 37 pub resolver: Resolver, 38 } 39 40 impl AppState { 41 - pub fn new(clickhouse: Client, shard_config: ShardConfig) -> Self { 42 Self { 43 clickhouse: Arc::new(clickhouse), 44 shards: Arc::new(ShardRouter::new(shard_config.base_path)), 45 resolver: UnauthenticatedSession::new_slingshot(), 46 } 47 } 48 } 49 50 /// Build the axum router with all XRPC endpoints 51 - pub fn router(state: AppState) -> Router { 52 Router::new() 53 .route("/xrpc/_health", get(health)) 54 .route("/metrics", get(metrics)) 55 // com.atproto.repo.* endpoints (record cache) ··· 57 .merge(ListRecordsRequest::into_router(repo::list_records)) 58 // sh.weaver.actor.* endpoints 59 .merge(GetProfileRequest::into_router(actor::get_profile)) 60 // sh.weaver.notebook.* endpoints 61 - .merge(ResolveNotebookRequest::into_router(notebook::resolve_notebook)) 62 .merge(GetEntryRequest::into_router(notebook::get_entry)) 63 .merge(ResolveEntryRequest::into_router(notebook::resolve_entry)) 64 .layer(TraceLayer::new_for_http()) 65 .with_state(state) 66 } 67 68 /// Prometheus metrics endpoint ··· 105 pub struct ServerConfig { 106 pub host: String, 107 pub port: u16, 108 } 109 110 impl Default for ServerConfig { ··· 112 Self { 113 host: "0.0.0.0".to_string(), 114 port: 3000, 115 } 116 } 117 } ··· 123 .ok() 124 .and_then(|s| s.parse().ok()) 125 .unwrap_or(3000); 126 127 - Self { host, port } 128 } 129 130 pub fn addr(&self) -> SocketAddr { ··· 135 } 136 137 /// Run the HTTP server 138 - pub async fn run(state: AppState, config: ServerConfig) -> Result<(), IndexError> { 139 let addr = config.addr(); 140 - let app = router(state); 141 142 info!("Starting HTTP server on {}", addr); 143
··· 5 use jacquard::api::com_atproto::repo::{ 6 get_record::GetRecordRequest, list_records::ListRecordsRequest, 7 }; 8 use jacquard::client::UnauthenticatedSession; 9 use jacquard::identity::JacquardResolver; 10 + use jacquard::types::did_doc::DidDocument; 11 + use jacquard::types::string::Did; 12 use jacquard_axum::IntoRouter; 13 + use jacquard_axum::did_web::did_web_router; 14 + use jacquard_axum::service_auth::ServiceAuth; 15 use serde::Serialize; 16 use tower_http::trace::TraceLayer; 17 use tracing::info; 18 + use weaver_api::sh_weaver::actor::{ 19 + get_actor_entries::GetActorEntriesRequest, get_actor_notebooks::GetActorNotebooksRequest, 20 + get_profile::GetProfileRequest, 21 + }; 22 + use weaver_api::sh_weaver::notebook::{ 23 + get_book_entry::GetBookEntryRequest, get_entry::GetEntryRequest, 24 + get_entry_feed::GetEntryFeedRequest, get_notebook_feed::GetNotebookFeedRequest, 25 + resolve_entry::ResolveEntryRequest, resolve_notebook::ResolveNotebookRequest, 26 + }; 27 28 use crate::clickhouse::Client; 29 use crate::config::ShardConfig; ··· 42 pub clickhouse: Arc<Client>, 43 pub shards: Arc<ShardRouter>, 44 pub resolver: Resolver, 45 + /// Our service DID (expected audience for service auth JWTs) 46 + pub service_did: Did<'static>, 47 } 48 49 impl AppState { 50 + pub fn new(clickhouse: Client, shard_config: ShardConfig, service_did: Did<'static>) -> Self { 51 Self { 52 clickhouse: Arc::new(clickhouse), 53 shards: Arc::new(ShardRouter::new(shard_config.base_path)), 54 resolver: UnauthenticatedSession::new_slingshot(), 55 + service_did, 56 } 57 } 58 } 59 60 + impl ServiceAuth for AppState { 61 + type Resolver = UnauthenticatedSession<JacquardResolver>; 62 + 63 + fn service_did(&self) -> &Did<'_> { 64 + &self.service_did 65 + } 66 + 67 + fn resolver(&self) -> &Self::Resolver { 68 + &self.resolver 69 + } 70 + 71 + fn require_lxm(&self) -> bool { 72 + true 73 + } 74 + } 75 + 76 /// Build the axum router with all XRPC endpoints 77 + pub fn router(state: AppState, did_doc: DidDocument<'static>) -> Router { 78 Router::new() 79 + // did:web document 80 .route("/xrpc/_health", get(health)) 81 .route("/metrics", get(metrics)) 82 // com.atproto.repo.* endpoints (record cache) ··· 84 .merge(ListRecordsRequest::into_router(repo::list_records)) 85 // sh.weaver.actor.* endpoints 86 .merge(GetProfileRequest::into_router(actor::get_profile)) 87 + .merge(GetActorNotebooksRequest::into_router( 88 + actor::get_actor_notebooks, 89 + )) 90 + .merge(GetActorEntriesRequest::into_router( 91 + actor::get_actor_entries, 92 + )) 93 // sh.weaver.notebook.* endpoints 94 + .merge(ResolveNotebookRequest::into_router( 95 + notebook::resolve_notebook, 96 + )) 97 .merge(GetEntryRequest::into_router(notebook::get_entry)) 98 .merge(ResolveEntryRequest::into_router(notebook::resolve_entry)) 99 + .merge(GetNotebookFeedRequest::into_router( 100 + notebook::get_notebook_feed, 101 + )) 102 + .merge(GetEntryFeedRequest::into_router(notebook::get_entry_feed)) 103 + .merge(GetBookEntryRequest::into_router(notebook::get_book_entry)) 104 .layer(TraceLayer::new_for_http()) 105 .with_state(state) 106 + .merge(did_web_router(did_doc)) 107 } 108 109 /// Prometheus metrics endpoint ··· 146 pub struct ServerConfig { 147 pub host: String, 148 pub port: u16, 149 + /// Service DID for this indexer (used as expected audience for service auth) 150 + pub service_did: Did<'static>, 151 } 152 153 impl Default for ServerConfig { ··· 155 Self { 156 host: "0.0.0.0".to_string(), 157 port: 3000, 158 + // Default to a placeholder - should be overridden in production 159 + service_did: Did::new_static("did:web:index.weaver.sh").unwrap(), 160 } 161 } 162 } ··· 168 .ok() 169 .and_then(|s| s.parse().ok()) 170 .unwrap_or(3000); 171 + let service_did = std::env::var("SERVICE_DID") 172 + .ok() 173 + .and_then(|s| Did::new_owned(s).ok()) 174 + .unwrap_or_else(|| Did::new_static("did:web:index.weaver.sh").unwrap()); 175 176 + Self { 177 + host, 178 + port, 179 + service_did, 180 + } 181 } 182 183 pub fn addr(&self) -> SocketAddr { ··· 188 } 189 190 /// Run the HTTP server 191 + pub async fn run( 192 + state: AppState, 193 + config: ServerConfig, 194 + did_doc: DidDocument<'static>, 195 + ) -> Result<(), IndexError> { 196 let addr = config.addr(); 197 + let app = router(state, did_doc); 198 199 info!("Starting HTTP server on {}", addr); 200
+173
crates/weaver-index/src/service_identity.rs
···
··· 1 + //! Service identity management 2 + //! 3 + //! Handles keypair generation, persistence, and DID document creation 4 + //! for service authentication. 5 + 6 + use std::path::Path; 7 + 8 + use jacquard::from_json_value; 9 + use jacquard::types::crypto::multikey; 10 + use jacquard::types::did_doc::DidDocument; 11 + use jacquard::types::string::Did; 12 + use k256::ecdsa::SigningKey; 13 + use miette::{IntoDiagnostic, Result, WrapErr}; 14 + use serde_json::json; 15 + use tracing::info; 16 + 17 + /// Service identity containing the signing keypair 18 + pub struct ServiceIdentity { 19 + signing_key: SigningKey, 20 + public_key_multibase: String, 21 + } 22 + 23 + impl ServiceIdentity { 24 + /// Load or generate a service identity keypair 25 + /// 26 + /// If the key file exists, loads it. Otherwise generates a new keypair 27 + /// and saves it to the specified path. 28 + pub fn load_or_generate(key_path: &Path) -> Result<Self> { 29 + if key_path.exists() { 30 + Self::load(key_path) 31 + } else { 32 + let identity = Self::generate()?; 33 + identity.save(key_path)?; 34 + Ok(identity) 35 + } 36 + } 37 + 38 + /// Generate a new random keypair 39 + pub fn generate() -> Result<Self> { 40 + info!("generating new service identity keypair"); 41 + let signing_key = SigningKey::random(&mut rand::thread_rng()); 42 + let public_key_multibase = Self::encode_public_key(&signing_key); 43 + Ok(Self { 44 + signing_key, 45 + public_key_multibase, 46 + }) 47 + } 48 + 49 + /// Load keypair from file 50 + fn load(key_path: &Path) -> Result<Self> { 51 + info!(?key_path, "loading service identity keypair"); 52 + let key_bytes = std::fs::read(key_path) 53 + .into_diagnostic() 54 + .wrap_err("failed to read service key file")?; 55 + 56 + if key_bytes.len() != 32 { 57 + miette::bail!( 58 + "invalid key file: expected 32 bytes, got {}", 59 + key_bytes.len() 60 + ); 61 + } 62 + 63 + let signing_key = SigningKey::from_slice(&key_bytes) 64 + .map_err(|e| miette::miette!("invalid key data: {}", e))?; 65 + let public_key_multibase = Self::encode_public_key(&signing_key); 66 + 67 + Ok(Self { 68 + signing_key, 69 + public_key_multibase, 70 + }) 71 + } 72 + 73 + /// Save keypair to file 74 + fn save(&self, key_path: &Path) -> Result<()> { 75 + info!(?key_path, "saving service identity keypair"); 76 + 77 + // Ensure parent directory exists 78 + if let Some(parent) = key_path.parent() { 79 + std::fs::create_dir_all(parent) 80 + .into_diagnostic() 81 + .wrap_err("failed to create key directory")?; 82 + } 83 + 84 + let key_bytes = self.signing_key.to_bytes(); 85 + std::fs::write(key_path, key_bytes.to_vec()) 86 + .into_diagnostic() 87 + .wrap_err("failed to write service key file")?; 88 + 89 + // Set restrictive permissions on Unix 90 + #[cfg(unix)] 91 + { 92 + use std::os::unix::fs::PermissionsExt; 93 + let perms = std::fs::Permissions::from_mode(0o600); 94 + std::fs::set_permissions(key_path, perms) 95 + .into_diagnostic() 96 + .wrap_err("failed to set key file permissions")?; 97 + } 98 + 99 + Ok(()) 100 + } 101 + 102 + /// Encode the public key as a multikey string 103 + fn encode_public_key(signing_key: &SigningKey) -> String { 104 + let verifying_key = signing_key.verifying_key(); 105 + let point = verifying_key.to_encoded_point(true); // compressed 106 + let bytes = point.as_bytes(); 107 + // 0xE7 is the multicodec for secp256k1-pub 108 + multikey(0xE7, bytes) 109 + } 110 + 111 + /// Get the public key multibase string 112 + pub fn public_key_multibase(&self) -> &str { 113 + &self.public_key_multibase 114 + } 115 + 116 + /// Get the signing key (for signing JWTs) 117 + pub fn signing_key(&self) -> &SigningKey { 118 + &self.signing_key 119 + } 120 + 121 + /// Build a DID document for this service identity 122 + pub fn did_document(&self, service_did: &Did<'_>) -> DidDocument<'static> { 123 + let did_str = service_did.as_str(); 124 + 125 + let doc = json!({ 126 + "@context": [ 127 + "https://www.w3.org/ns/did/v1", 128 + "https://w3id.org/security/multikey/v1", 129 + "https://w3id.org/security/suites/secp256k1-2019/v1" 130 + ], 131 + "id": did_str, 132 + "verificationMethod": [{ 133 + "id": format!("{}#atproto", did_str), 134 + "type": "Multikey", 135 + "controller": did_str, 136 + "publicKeyMultibase": self.public_key_multibase 137 + }] 138 + }); 139 + 140 + from_json_value::<DidDocument>(doc).expect("valid DID document") 141 + } 142 + 143 + /// Build a DID document with a service endpoint 144 + pub fn did_document_with_service( 145 + &self, 146 + service_did: &Did<'_>, 147 + service_endpoint: &str, 148 + ) -> DidDocument<'static> { 149 + let did_str = service_did.as_str(); 150 + 151 + let doc = json!({ 152 + "@context": [ 153 + "https://www.w3.org/ns/did/v1", 154 + "https://w3id.org/security/multikey/v1", 155 + "https://w3id.org/security/suites/secp256k1-2019/v1" 156 + ], 157 + "id": did_str, 158 + "verificationMethod": [{ 159 + "id": format!("{}#atproto", did_str), 160 + "type": "Multikey", 161 + "controller": did_str, 162 + "publicKeyMultibase": self.public_key_multibase 163 + }], 164 + "service": [{ 165 + "id": "#atproto_index", 166 + "type": "AtprotoRecordIndex", 167 + "serviceEndpoint": service_endpoint 168 + }] 169 + }); 170 + 171 + from_json_value::<DidDocument>(doc).expect("valid DID document") 172 + } 173 + }
+1 -3
lexicons/notebook/resolveNotebook.json
··· 30 } 31 } 32 }, 33 - "errors": [ 34 - { "name": "NotebookNotFound" } 35 - ] 36 } 37 } 38 }
··· 30 } 31 } 32 }, 33 + "errors": [{ "name": "NotebookNotFound" }] 34 } 35 } 36 }
+74 -1
scripts/test-indexer.sh
··· 56 python3 -c "import urllib.parse; print(urllib.parse.quote('$1', safe=''))" 57 } 58 59 # Test all entry rkeys 60 test_all_entries() { 61 local rkeys=( ··· 91 test_resolve_entry "weaver" "drafts_privacy" 92 echo 93 test_get_entry "3m7tg3ni77tqx" 94 } 95 96 # Main ··· 113 entries) 114 test_all_entries 115 ;; 116 all) 117 test_all 118 ;; 119 *) 120 - echo "Usage: $0 {health|profile|notebook [name]|entry [rkey]|resolve [notebook] [entry]|entries|all}" 121 echo 122 echo "Environment:" 123 echo " INDEXER_URL Base URL (default: http://localhost:3000)"
··· 56 python3 -c "import urllib.parse; print(urllib.parse.quote('$1', safe=''))" 57 } 58 59 + # Get actor notebooks 60 + test_actor_notebooks() { 61 + local actor="${1:-$DID}" 62 + local limit="${2:-10}" 63 + info "Testing sh.weaver.actor.getActorNotebooks (actor=${actor}, limit=${limit})..." 64 + curl -s "${BASE_URL}/xrpc/sh.weaver.actor.getActorNotebooks?actor=${actor}&limit=${limit}" | jq . 65 + } 66 + 67 + # Get actor notebooks with cursor 68 + test_actor_notebooks_cursor() { 69 + local cursor="$1" 70 + local actor="${2:-$DID}" 71 + local limit="${3:-10}" 72 + info "Testing sh.weaver.actor.getActorNotebooks with cursor..." 73 + curl -s "${BASE_URL}/xrpc/sh.weaver.actor.getActorNotebooks?actor=${actor}&limit=${limit}&cursor=${cursor}" | jq . 74 + } 75 + 76 + # Get actor entries 77 + test_actor_entries() { 78 + local actor="${1:-$DID}" 79 + local limit="${2:-10}" 80 + info "Testing sh.weaver.actor.getActorEntries (actor=${actor}, limit=${limit})..." 81 + curl -s "${BASE_URL}/xrpc/sh.weaver.actor.getActorEntries?actor=${actor}&limit=${limit}" | jq . 82 + } 83 + 84 + # Get notebook feed 85 + test_notebook_feed() { 86 + local limit="${1:-10}" 87 + info "Testing sh.weaver.notebook.getNotebookFeed (limit=${limit})..." 88 + curl -s "${BASE_URL}/xrpc/sh.weaver.notebook.getNotebookFeed?limit=${limit}" | jq . 89 + } 90 + 91 + # Get entry feed 92 + test_entry_feed() { 93 + local limit="${1:-10}" 94 + info "Testing sh.weaver.notebook.getEntryFeed (limit=${limit})..." 95 + curl -s "${BASE_URL}/xrpc/sh.weaver.notebook.getEntryFeed?limit=${limit}" | jq . 96 + } 97 + 98 + # Get book entry by index 99 + test_book_entry() { 100 + local notebook_rkey="${1:-weaver}" 101 + local index="${2:-0}" 102 + local notebook_uri="at://${DID}/sh.weaver.notebook.book/${notebook_rkey}" 103 + info "Testing sh.weaver.notebook.getBookEntry (notebook=${notebook_uri}, index=${index})..." 104 + curl -s "${BASE_URL}/xrpc/sh.weaver.notebook.getBookEntry?notebook=$(urlencode "${notebook_uri}")&index=${index}" | jq . 105 + } 106 + 107 # Test all entry rkeys 108 test_all_entries() { 109 local rkeys=( ··· 139 test_resolve_entry "weaver" "drafts_privacy" 140 echo 141 test_get_entry "3m7tg3ni77tqx" 142 + echo 143 + test_actor_notebooks 144 + echo 145 + test_actor_entries 146 + echo 147 + test_notebook_feed 148 + echo 149 + test_entry_feed 150 + echo 151 + test_book_entry "3m4rbphheug2b" 0 152 } 153 154 # Main ··· 171 entries) 172 test_all_entries 173 ;; 174 + actor-notebooks) 175 + test_actor_notebooks "${2:-$DID}" "${3:-10}" 176 + ;; 177 + actor-entries) 178 + test_actor_entries "${2:-$DID}" "${3:-10}" 179 + ;; 180 + notebook-feed) 181 + test_notebook_feed "${2:-10}" 182 + ;; 183 + entry-feed) 184 + test_entry_feed "${2:-10}" 185 + ;; 186 + book-entry) 187 + test_book_entry "${2:-3m4rbphheug2b}" "${3:-0}" 188 + ;; 189 all) 190 test_all 191 ;; 192 *) 193 + echo "Usage: $0 {health|profile|notebook [name]|entry [rkey]|resolve [notebook] [entry]|entries|actor-notebooks [actor] [limit]|actor-entries [actor] [limit]|notebook-feed [limit]|entry-feed [limit]|book-entry [notebook] [index]|all}" 194 echo 195 echo "Environment:" 196 echo " INDEXER_URL Base URL (default: http://localhost:3000)"