personal activity index (bluesky, leaflet, substack) pai.desertthunder.dev
rss bluesky
at main 661 lines 20 kB view raw
1#[cfg(not(target_arch = "wasm32"))] 2mod fetchers; 3 4use serde::{Deserialize, Serialize}; 5use std::path::Path; 6use std::{fmt, str::FromStr}; 7use thiserror::Error; 8 9#[cfg(not(target_arch = "wasm32"))] 10pub use fetchers::{BearBlogFetcher, BlueskyFetcher, LeafletFetcher, SubstackFetcher}; 11 12/// Errors that can occur in the Personal Activity Index 13#[derive(Error, Debug)] 14pub enum PaiError { 15 #[error("Unknown source kind: {0}")] 16 UnknownSourceKind(String), 17 18 #[error("Invalid argument: {0}")] 19 InvalidArgument(String), 20 21 #[error("Storage error: {0}")] 22 Storage(String), 23 24 #[error("Fetch error: {0}")] 25 Fetch(String), 26 27 #[error("Parse error: {0}")] 28 Parse(String), 29 30 #[error("Configuration error: {0}")] 31 Config(String), 32 33 #[error("IO error: {0}")] 34 Io(#[from] std::io::Error), 35} 36 37pub type Result<T> = std::result::Result<T, PaiError>; 38 39/// Represents the different source types supported by the indexer 40#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)] 41#[serde(rename_all = "lowercase")] 42pub enum SourceKind { 43 Substack, 44 Bluesky, 45 Leaflet, 46 BearBlog, 47} 48 49impl fmt::Display for SourceKind { 50 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 51 match self { 52 SourceKind::Substack => write!(f, "substack"), 53 SourceKind::Bluesky => write!(f, "bluesky"), 54 SourceKind::Leaflet => write!(f, "leaflet"), 55 SourceKind::BearBlog => write!(f, "bearblog"), 56 } 57 } 58} 59 60impl std::str::FromStr for SourceKind { 61 type Err = PaiError; 62 63 fn from_str(s: &str) -> Result<Self> { 64 match s.to_lowercase().as_str() { 65 "substack" => Ok(SourceKind::Substack), 66 "bluesky" => Ok(SourceKind::Bluesky), 67 "leaflet" => Ok(SourceKind::Leaflet), 68 "bearblog" => Ok(SourceKind::BearBlog), 69 _ => Err(PaiError::UnknownSourceKind(s.to_string())), 70 } 71 } 72} 73 74/// Represents a single content item from any source 75#[derive(Debug, Clone, Serialize, Deserialize)] 76pub struct Item { 77 /// Unique identifier for the item 78 pub id: String, 79 /// The source type this item came from 80 pub source_kind: SourceKind, 81 /// The specific source instance identifier (e.g., domain or handle) 82 pub source_id: String, 83 /// Author of the content 84 pub author: Option<String>, 85 /// Title of the content 86 pub title: Option<String>, 87 /// Summary or excerpt of the content 88 pub summary: Option<String>, 89 /// Canonical URL for the content 90 pub url: String, 91 /// Full HTML content 92 pub content_html: Option<String>, 93 /// When the content was published (ISO 8601) 94 pub published_at: String, 95 /// When this item was created in our database (ISO 8601) 96 pub created_at: String, 97} 98 99/// Filter criteria for listing items 100#[derive(Debug, Default, Clone)] 101pub struct ListFilter { 102 /// Filter by source kind 103 pub source_kind: Option<SourceKind>, 104 /// Filter by specific source ID 105 pub source_id: Option<String>, 106 /// Maximum number of items to return 107 pub limit: Option<usize>, 108 /// Only items published at or after this time (ISO 8601) 109 pub since: Option<String>, 110 /// Substring search on title/summary 111 pub query: Option<String>, 112} 113 114/// Storage trait for persisting and retrieving items 115pub trait Storage { 116 /// Insert or replace an item in storage 117 fn insert_or_replace_item(&self, item: &Item) -> Result<()>; 118 119 /// List items matching the given filter 120 fn list_items(&self, filter: &ListFilter) -> Result<Vec<Item>>; 121} 122 123/// Trait for fetching content from a specific source 124pub trait SourceFetcher { 125 /// Synchronize content from this source into storage 126 fn sync(&self, storage: &dyn Storage) -> Result<()>; 127} 128 129/// Configuration for Substack source 130#[derive(Debug, Clone, Deserialize, Serialize)] 131pub struct SubstackConfig { 132 #[serde(default)] 133 pub enabled: bool, 134 pub base_url: String, 135} 136 137/// Configuration for Bluesky source 138#[derive(Debug, Clone, Deserialize, Serialize)] 139pub struct BlueskyConfig { 140 #[serde(default)] 141 pub enabled: bool, 142 pub handle: String, 143} 144 145/// Configuration for a single Leaflet publication 146#[derive(Debug, Clone, Deserialize, Serialize)] 147pub struct LeafletConfig { 148 #[serde(default)] 149 pub enabled: bool, 150 pub id: String, 151 pub base_url: String, 152} 153 154/// Configuration for a single BearBlog publication 155#[derive(Debug, Clone, Deserialize, Serialize)] 156pub struct BearBlogConfig { 157 #[serde(default)] 158 pub enabled: bool, 159 pub id: String, 160 pub base_url: String, 161} 162 163/// Database configuration 164#[derive(Debug, Clone, Deserialize, Serialize, Default)] 165pub struct DatabaseConfig { 166 pub path: Option<String>, 167} 168 169/// Deployment mode configuration 170#[derive(Debug, Clone, Deserialize, Serialize, Default)] 171pub struct DeploymentConfig { 172 #[serde(default)] 173 pub mode: String, 174 pub cloudflare: Option<CloudflareConfig>, 175} 176 177/// Cloudflare deployment configuration 178#[derive(Debug, Clone, Deserialize, Serialize)] 179pub struct CloudflareConfig { 180 pub worker_name: String, 181 pub d1_binding: String, 182 pub database_name: String, 183} 184 185/// Sources configuration section 186#[derive(Debug, Clone, Deserialize, Serialize, Default)] 187pub struct SourcesConfig { 188 pub substack: Option<SubstackConfig>, 189 pub bluesky: Option<BlueskyConfig>, 190 #[serde(default)] 191 pub leaflet: Vec<LeafletConfig>, 192 #[serde(default)] 193 pub bearblog: Vec<BearBlogConfig>, 194} 195 196/// CORS configuration for the HTTP server and Worker 197/// 198/// Supports same-root-domain CORS (e.g., pai.desertthunder.dev from desertthunder.dev) 199/// and local development with a dev key header. 200#[derive(Debug, Clone, Deserialize, Serialize, Default)] 201pub struct CorsConfig { 202 /// List of allowed origins (exact match or same-root-domain) 203 /// Example: ["https://desertthunder.dev", "http://localhost:4321"] 204 #[serde(default)] 205 pub allowed_origins: Vec<String>, 206 207 /// Optional development key for local development 208 /// When set, requests with X-LOCAL-DEV-KEY header matching this value are allowed 209 pub dev_key: Option<String>, 210} 211 212impl CorsConfig { 213 /// Check if an origin is allowed based on exact match or same-root-domain logic. 214 /// 215 /// Same-root-domain means extracting the root domain (last two parts) from both 216 /// the origin and allowed origins, and checking for a match. 217 /// 218 /// Examples: 219 /// - https://pai.desertthunder.dev is allowed if https://desertthunder.dev is in allowed_origins 220 /// - http://localhost:4321 requires exact match 221 pub fn is_origin_allowed(&self, origin: &str) -> bool { 222 if self.allowed_origins.is_empty() { 223 return false; 224 } 225 226 let origin_domain = extract_domain(origin); 227 228 for allowed in &self.allowed_origins { 229 if origin == allowed { 230 return true; 231 } 232 233 let allowed_domain = extract_domain(allowed); 234 if let (Some(origin_root), Some(allowed_root)) = ( 235 extract_root_domain(&origin_domain), 236 extract_root_domain(&allowed_domain), 237 ) { 238 if origin_root == allowed_root { 239 return true; 240 } 241 } 242 } 243 244 false 245 } 246 247 /// Validate if a dev key matches the configured dev key 248 pub fn is_dev_key_valid(&self, key: Option<&str>) -> bool { 249 match (&self.dev_key, key) { 250 (Some(config_key), Some(request_key)) => config_key == request_key, 251 _ => false, 252 } 253 } 254} 255 256/// Extract domain from URL (removes protocol and path) 257fn extract_domain(url: &str) -> String { 258 url.trim_start_matches("https://") 259 .trim_start_matches("http://") 260 .split('/') 261 .next() 262 .unwrap_or("") 263 .split(':') 264 .next() 265 .unwrap_or("") 266 .to_string() 267} 268 269/// Extract root domain (last two parts of domain) 270/// Example: "pai.desertthunder.dev" -> Some("desertthunder.dev") 271/// Example: "localhost" -> None (single part) 272fn extract_root_domain(domain: &str) -> Option<String> { 273 let parts: Vec<&str> = domain.split('.').collect(); 274 if parts.len() >= 2 { 275 Some(format!("{}.{}", parts[parts.len() - 2], parts[parts.len() - 1])) 276 } else { 277 None 278 } 279} 280 281/// Configuration for all sources 282#[derive(Debug, Clone, Deserialize, Serialize, Default)] 283pub struct Config { 284 #[serde(default)] 285 pub database: DatabaseConfig, 286 #[serde(default)] 287 pub deployment: DeploymentConfig, 288 #[serde(default)] 289 pub sources: SourcesConfig, 290 #[serde(default)] 291 pub cors: CorsConfig, 292} 293 294impl Config { 295 /// Load configuration from a TOML file 296 /// 297 /// Reads and parses the config file, validating the structure and required fields. 298 pub fn from_file<P: AsRef<Path>>(path: P) -> Result<Self> { 299 let content = 300 std::fs::read_to_string(path).map_err(|e| PaiError::Config(format!("Failed to read config file: {e}")))?; 301 Self::from_str(&content) 302 } 303} 304 305impl FromStr for Config { 306 type Err = PaiError; 307 308 fn from_str(s: &str) -> Result<Self> { 309 toml::from_str(s).map_err(|e| PaiError::Config(format!("Failed to parse config: {e}"))) 310 } 311} 312 313/// Synchronize all enabled sources 314/// 315/// Calls each configured source fetcher to retrieve and store content. 316/// Returns the number of sources successfully synced. 317/// 318/// Filters sources based on optional kind and source_id parameters. 319#[cfg(not(target_arch = "wasm32"))] 320pub fn sync_all_sources( 321 config: &Config, storage: &dyn Storage, kind: Option<SourceKind>, source_id: Option<&str>, 322) -> Result<usize> { 323 let mut synced_count = 0; 324 325 if let Some(ref substack_config) = config.sources.substack { 326 let should_sync = substack_config.enabled 327 && match (kind, source_id) { 328 (Some(k), _) if k != SourceKind::Substack => false, 329 (_, Some(sid)) => { 330 let substack_id = substack_config 331 .base_url 332 .trim_start_matches("https://") 333 .trim_start_matches("http://") 334 .trim_end_matches('/'); 335 substack_id == sid 336 } 337 _ => true, 338 }; 339 340 if should_sync { 341 let fetcher = SubstackFetcher::new(substack_config.clone()); 342 fetcher.sync(storage)?; 343 synced_count += 1; 344 } 345 } 346 347 if let Some(ref bluesky_config) = config.sources.bluesky { 348 let should_sync = bluesky_config.enabled 349 && match (kind, source_id) { 350 (Some(k), _) if k != SourceKind::Bluesky => false, 351 (_, Some(sid)) => bluesky_config.handle == sid, 352 _ => true, 353 }; 354 355 if should_sync { 356 let fetcher = BlueskyFetcher::new(bluesky_config.clone()); 357 fetcher.sync(storage)?; 358 synced_count += 1; 359 } 360 } 361 362 for leaflet_config in &config.sources.leaflet { 363 if !leaflet_config.enabled { 364 continue; 365 } 366 367 let should_sync = match (kind, source_id) { 368 (Some(k), _) if k != SourceKind::Leaflet => false, 369 (_, Some(sid)) => leaflet_config.id == sid, 370 _ => true, 371 }; 372 373 if should_sync { 374 let fetcher = LeafletFetcher::new(leaflet_config.clone()); 375 fetcher.sync(storage)?; 376 synced_count += 1; 377 } 378 } 379 380 for bearblog_config in &config.sources.bearblog { 381 if !bearblog_config.enabled { 382 continue; 383 } 384 385 let should_sync = match (kind, source_id) { 386 (Some(k), _) if k != SourceKind::BearBlog => false, 387 (_, Some(sid)) => bearblog_config.id == sid, 388 _ => true, 389 }; 390 391 if should_sync { 392 let fetcher = BearBlogFetcher::new(bearblog_config.clone()); 393 fetcher.sync(storage)?; 394 synced_count += 1; 395 } 396 } 397 398 Ok(synced_count) 399} 400 401#[cfg(test)] 402mod tests { 403 use super::*; 404 405 #[test] 406 fn source_kind_display() { 407 assert_eq!(SourceKind::Substack.to_string(), "substack"); 408 assert_eq!(SourceKind::Bluesky.to_string(), "bluesky"); 409 assert_eq!(SourceKind::Leaflet.to_string(), "leaflet"); 410 assert_eq!(SourceKind::BearBlog.to_string(), "bearblog"); 411 } 412 413 #[test] 414 fn source_kind_parse() { 415 assert_eq!("substack".parse::<SourceKind>().unwrap(), SourceKind::Substack); 416 assert_eq!("BLUESKY".parse::<SourceKind>().unwrap(), SourceKind::Bluesky); 417 assert_eq!("Leaflet".parse::<SourceKind>().unwrap(), SourceKind::Leaflet); 418 assert_eq!("bearblog".parse::<SourceKind>().unwrap(), SourceKind::BearBlog); 419 assert_eq!("BEARBLOG".parse::<SourceKind>().unwrap(), SourceKind::BearBlog); 420 assert!("invalid".parse::<SourceKind>().is_err()); 421 } 422 423 #[test] 424 fn error_unknown_source_kind() { 425 let err = "unknown".parse::<SourceKind>().unwrap_err(); 426 assert!(matches!(err, PaiError::UnknownSourceKind(_))); 427 assert_eq!(err.to_string(), "Unknown source kind: unknown"); 428 } 429 430 #[test] 431 fn list_filter_default() { 432 let filter = ListFilter::default(); 433 assert!(filter.source_kind.is_none()); 434 assert!(filter.source_id.is_none()); 435 assert!(filter.limit.is_none()); 436 assert!(filter.since.is_none()); 437 assert!(filter.query.is_none()); 438 } 439 440 #[test] 441 fn config_parse_empty() { 442 let config = Config::from_str("").unwrap(); 443 assert!(config.sources.substack.is_none()); 444 assert!(config.sources.bluesky.is_none()); 445 assert!(config.sources.leaflet.is_empty()); 446 } 447 448 #[test] 449 fn config_parse_substack() { 450 let toml = r#" 451[sources.substack] 452enabled = true 453base_url = "https://patternmatched.substack.com" 454"#; 455 let config = Config::from_str(toml).unwrap(); 456 let substack = config.sources.substack.as_ref().unwrap(); 457 assert!(substack.enabled); 458 assert_eq!(substack.base_url, "https://patternmatched.substack.com"); 459 } 460 461 #[test] 462 fn config_parse_bluesky() { 463 let toml = r#" 464[sources.bluesky] 465enabled = true 466handle = "desertthunder.dev" 467"#; 468 let config = Config::from_str(toml).unwrap(); 469 let bluesky = config.sources.bluesky.as_ref().unwrap(); 470 assert!(bluesky.enabled); 471 assert_eq!(bluesky.handle, "desertthunder.dev"); 472 } 473 474 #[test] 475 fn config_parse_leaflet_multiple() { 476 let toml = r#" 477[[sources.leaflet]] 478enabled = true 479id = "desertthunder" 480base_url = "https://desertthunder.leaflet.pub" 481 482[[sources.leaflet]] 483enabled = true 484id = "stormlightlabs" 485base_url = "https://stormlightlabs.leaflet.pub" 486"#; 487 let config = Config::from_str(toml).unwrap(); 488 assert_eq!(config.sources.leaflet.len(), 2); 489 assert_eq!(config.sources.leaflet[0].id, "desertthunder"); 490 assert_eq!(config.sources.leaflet[1].id, "stormlightlabs"); 491 } 492 493 #[test] 494 fn config_parse_all_sources() { 495 let toml = r#" 496[database] 497path = "/tmp/test.db" 498 499[deployment] 500mode = "sqlite" 501 502[sources.substack] 503enabled = true 504base_url = "https://test.substack.com" 505 506[sources.bluesky] 507enabled = false 508handle = "test.bsky.social" 509 510[[sources.leaflet]] 511enabled = true 512id = "test" 513base_url = "https://test.leaflet.pub" 514"#; 515 let config = Config::from_str(toml).unwrap(); 516 assert_eq!(config.database.path, Some("/tmp/test.db".to_string())); 517 assert_eq!(config.deployment.mode, "sqlite"); 518 assert!(config.sources.substack.is_some()); 519 assert!(config.sources.bluesky.is_some()); 520 assert_eq!(config.sources.leaflet.len(), 1); 521 } 522 523 #[test] 524 fn config_parse_invalid_toml() { 525 let toml = "this is not valid toml {{{"; 526 assert!(Config::from_str(toml).is_err()); 527 } 528 529 #[test] 530 fn config_parse_missing_required_field() { 531 let toml = r#" 532[sources.substack] 533enabled = true 534"#; 535 let result = Config::from_str(toml); 536 assert!(result.is_err()); 537 } 538 539 #[test] 540 fn config_default_enabled_false() { 541 let toml = r#" 542[sources.substack] 543base_url = "https://test.substack.com" 544"#; 545 let config = Config::from_str(toml).unwrap(); 546 let substack = config.sources.substack.as_ref().unwrap(); 547 assert!(!substack.enabled); 548 } 549 550 #[test] 551 fn cors_config_exact_match() { 552 let cors = CorsConfig { 553 allowed_origins: vec![ 554 "https://desertthunder.dev".to_string(), 555 "http://localhost:4321".to_string(), 556 ], 557 dev_key: None, 558 }; 559 assert!(cors.is_origin_allowed("https://desertthunder.dev")); 560 assert!(cors.is_origin_allowed("http://localhost:4321")); 561 assert!(!cors.is_origin_allowed("https://evil.com")); 562 } 563 564 #[test] 565 fn cors_config_same_root_domain() { 566 let cors = CorsConfig { allowed_origins: vec!["https://desertthunder.dev".to_string()], dev_key: None }; 567 assert!(cors.is_origin_allowed("https://pai.desertthunder.dev")); 568 assert!(cors.is_origin_allowed("https://api.desertthunder.dev")); 569 assert!(cors.is_origin_allowed("https://desertthunder.dev")); 570 assert!(!cors.is_origin_allowed("https://evil.dev")); 571 } 572 573 #[test] 574 fn cors_config_localhost_requires_exact_match() { 575 let cors = CorsConfig { allowed_origins: vec!["http://localhost:4321".to_string()], dev_key: None }; 576 assert!(cors.is_origin_allowed("http://localhost:4321")); 577 assert!(!cors.is_origin_allowed("http://localhost:3000")); 578 } 579 580 #[test] 581 fn cors_config_empty_origins_denies_all() { 582 let cors = CorsConfig { allowed_origins: vec![], dev_key: None }; 583 assert!(!cors.is_origin_allowed("https://desertthunder.dev")); 584 assert!(!cors.is_origin_allowed("http://localhost:4321")); 585 } 586 587 #[test] 588 fn cors_config_dev_key_valid() { 589 let cors = CorsConfig { allowed_origins: vec![], dev_key: Some("secret-dev-key".to_string()) }; 590 assert!(cors.is_dev_key_valid(Some("secret-dev-key"))); 591 assert!(!cors.is_dev_key_valid(Some("wrong-key"))); 592 assert!(!cors.is_dev_key_valid(None)); 593 } 594 595 #[test] 596 fn cors_config_dev_key_none() { 597 let cors = CorsConfig { allowed_origins: vec![], dev_key: None }; 598 assert!(!cors.is_dev_key_valid(Some("any-key"))); 599 assert!(!cors.is_dev_key_valid(None)); 600 } 601 602 #[test] 603 fn extract_domain_https() { 604 assert_eq!( 605 super::extract_domain("https://desertthunder.dev/path"), 606 "desertthunder.dev" 607 ); 608 assert_eq!( 609 super::extract_domain("https://pai.desertthunder.dev"), 610 "pai.desertthunder.dev" 611 ); 612 } 613 614 #[test] 615 fn extract_domain_http() { 616 assert_eq!(super::extract_domain("http://localhost:4321/api"), "localhost"); 617 assert_eq!(super::extract_domain("http://example.com"), "example.com"); 618 } 619 620 #[test] 621 fn extract_root_domain_multi_level() { 622 assert_eq!( 623 super::extract_root_domain("pai.desertthunder.dev"), 624 Some("desertthunder.dev".to_string()) 625 ); 626 assert_eq!( 627 super::extract_root_domain("api.example.com"), 628 Some("example.com".to_string()) 629 ); 630 assert_eq!( 631 super::extract_root_domain("a.b.c.example.org"), 632 Some("example.org".to_string()) 633 ); 634 } 635 636 #[test] 637 fn extract_root_domain_single_part() { 638 assert_eq!(super::extract_root_domain("localhost"), None); 639 } 640 641 #[test] 642 fn extract_root_domain_two_parts() { 643 assert_eq!( 644 super::extract_root_domain("example.com"), 645 Some("example.com".to_string()) 646 ); 647 } 648 649 #[test] 650 fn config_parse_cors() { 651 let toml = r#" 652[cors] 653allowed_origins = ["https://desertthunder.dev", "http://localhost:4321"] 654dev_key = "my-dev-key" 655"#; 656 let config = Config::from_str(toml).unwrap(); 657 assert_eq!(config.cors.allowed_origins.len(), 2); 658 assert_eq!(config.cors.allowed_origins[0], "https://desertthunder.dev"); 659 assert_eq!(config.cors.dev_key, Some("my-dev-key".to_string())); 660 } 661}