personal activity index (bluesky, leaflet, substack)
pai.desertthunder.dev
rss
bluesky
1#[cfg(not(target_arch = "wasm32"))]
2mod fetchers;
3
4use serde::{Deserialize, Serialize};
5use std::path::Path;
6use std::{fmt, str::FromStr};
7use thiserror::Error;
8
9#[cfg(not(target_arch = "wasm32"))]
10pub use fetchers::{BearBlogFetcher, BlueskyFetcher, LeafletFetcher, SubstackFetcher};
11
12/// Errors that can occur in the Personal Activity Index
13#[derive(Error, Debug)]
14pub enum PaiError {
15 #[error("Unknown source kind: {0}")]
16 UnknownSourceKind(String),
17
18 #[error("Invalid argument: {0}")]
19 InvalidArgument(String),
20
21 #[error("Storage error: {0}")]
22 Storage(String),
23
24 #[error("Fetch error: {0}")]
25 Fetch(String),
26
27 #[error("Parse error: {0}")]
28 Parse(String),
29
30 #[error("Configuration error: {0}")]
31 Config(String),
32
33 #[error("IO error: {0}")]
34 Io(#[from] std::io::Error),
35}
36
37pub type Result<T> = std::result::Result<T, PaiError>;
38
39/// Represents the different source types supported by the indexer
40#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
41#[serde(rename_all = "lowercase")]
42pub enum SourceKind {
43 Substack,
44 Bluesky,
45 Leaflet,
46 BearBlog,
47}
48
49impl fmt::Display for SourceKind {
50 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
51 match self {
52 SourceKind::Substack => write!(f, "substack"),
53 SourceKind::Bluesky => write!(f, "bluesky"),
54 SourceKind::Leaflet => write!(f, "leaflet"),
55 SourceKind::BearBlog => write!(f, "bearblog"),
56 }
57 }
58}
59
60impl std::str::FromStr for SourceKind {
61 type Err = PaiError;
62
63 fn from_str(s: &str) -> Result<Self> {
64 match s.to_lowercase().as_str() {
65 "substack" => Ok(SourceKind::Substack),
66 "bluesky" => Ok(SourceKind::Bluesky),
67 "leaflet" => Ok(SourceKind::Leaflet),
68 "bearblog" => Ok(SourceKind::BearBlog),
69 _ => Err(PaiError::UnknownSourceKind(s.to_string())),
70 }
71 }
72}
73
74/// Represents a single content item from any source
75#[derive(Debug, Clone, Serialize, Deserialize)]
76pub struct Item {
77 /// Unique identifier for the item
78 pub id: String,
79 /// The source type this item came from
80 pub source_kind: SourceKind,
81 /// The specific source instance identifier (e.g., domain or handle)
82 pub source_id: String,
83 /// Author of the content
84 pub author: Option<String>,
85 /// Title of the content
86 pub title: Option<String>,
87 /// Summary or excerpt of the content
88 pub summary: Option<String>,
89 /// Canonical URL for the content
90 pub url: String,
91 /// Full HTML content
92 pub content_html: Option<String>,
93 /// When the content was published (ISO 8601)
94 pub published_at: String,
95 /// When this item was created in our database (ISO 8601)
96 pub created_at: String,
97}
98
99/// Filter criteria for listing items
100#[derive(Debug, Default, Clone)]
101pub struct ListFilter {
102 /// Filter by source kind
103 pub source_kind: Option<SourceKind>,
104 /// Filter by specific source ID
105 pub source_id: Option<String>,
106 /// Maximum number of items to return
107 pub limit: Option<usize>,
108 /// Only items published at or after this time (ISO 8601)
109 pub since: Option<String>,
110 /// Substring search on title/summary
111 pub query: Option<String>,
112}
113
114/// Storage trait for persisting and retrieving items
115pub trait Storage {
116 /// Insert or replace an item in storage
117 fn insert_or_replace_item(&self, item: &Item) -> Result<()>;
118
119 /// List items matching the given filter
120 fn list_items(&self, filter: &ListFilter) -> Result<Vec<Item>>;
121}
122
123/// Trait for fetching content from a specific source
124pub trait SourceFetcher {
125 /// Synchronize content from this source into storage
126 fn sync(&self, storage: &dyn Storage) -> Result<()>;
127}
128
129/// Configuration for Substack source
130#[derive(Debug, Clone, Deserialize, Serialize)]
131pub struct SubstackConfig {
132 #[serde(default)]
133 pub enabled: bool,
134 pub base_url: String,
135}
136
137/// Configuration for Bluesky source
138#[derive(Debug, Clone, Deserialize, Serialize)]
139pub struct BlueskyConfig {
140 #[serde(default)]
141 pub enabled: bool,
142 pub handle: String,
143}
144
145/// Configuration for a single Leaflet publication
146#[derive(Debug, Clone, Deserialize, Serialize)]
147pub struct LeafletConfig {
148 #[serde(default)]
149 pub enabled: bool,
150 pub id: String,
151 pub base_url: String,
152}
153
154/// Configuration for a single BearBlog publication
155#[derive(Debug, Clone, Deserialize, Serialize)]
156pub struct BearBlogConfig {
157 #[serde(default)]
158 pub enabled: bool,
159 pub id: String,
160 pub base_url: String,
161}
162
163/// Database configuration
164#[derive(Debug, Clone, Deserialize, Serialize, Default)]
165pub struct DatabaseConfig {
166 pub path: Option<String>,
167}
168
169/// Deployment mode configuration
170#[derive(Debug, Clone, Deserialize, Serialize, Default)]
171pub struct DeploymentConfig {
172 #[serde(default)]
173 pub mode: String,
174 pub cloudflare: Option<CloudflareConfig>,
175}
176
177/// Cloudflare deployment configuration
178#[derive(Debug, Clone, Deserialize, Serialize)]
179pub struct CloudflareConfig {
180 pub worker_name: String,
181 pub d1_binding: String,
182 pub database_name: String,
183}
184
185/// Sources configuration section
186#[derive(Debug, Clone, Deserialize, Serialize, Default)]
187pub struct SourcesConfig {
188 pub substack: Option<SubstackConfig>,
189 pub bluesky: Option<BlueskyConfig>,
190 #[serde(default)]
191 pub leaflet: Vec<LeafletConfig>,
192 #[serde(default)]
193 pub bearblog: Vec<BearBlogConfig>,
194}
195
196/// CORS configuration for the HTTP server and Worker
197///
198/// Supports same-root-domain CORS (e.g., pai.desertthunder.dev from desertthunder.dev)
199/// and local development with a dev key header.
200#[derive(Debug, Clone, Deserialize, Serialize, Default)]
201pub struct CorsConfig {
202 /// List of allowed origins (exact match or same-root-domain)
203 /// Example: ["https://desertthunder.dev", "http://localhost:4321"]
204 #[serde(default)]
205 pub allowed_origins: Vec<String>,
206
207 /// Optional development key for local development
208 /// When set, requests with X-LOCAL-DEV-KEY header matching this value are allowed
209 pub dev_key: Option<String>,
210}
211
212impl CorsConfig {
213 /// Check if an origin is allowed based on exact match or same-root-domain logic.
214 ///
215 /// Same-root-domain means extracting the root domain (last two parts) from both
216 /// the origin and allowed origins, and checking for a match.
217 ///
218 /// Examples:
219 /// - https://pai.desertthunder.dev is allowed if https://desertthunder.dev is in allowed_origins
220 /// - http://localhost:4321 requires exact match
221 pub fn is_origin_allowed(&self, origin: &str) -> bool {
222 if self.allowed_origins.is_empty() {
223 return false;
224 }
225
226 let origin_domain = extract_domain(origin);
227
228 for allowed in &self.allowed_origins {
229 if origin == allowed {
230 return true;
231 }
232
233 let allowed_domain = extract_domain(allowed);
234 if let (Some(origin_root), Some(allowed_root)) = (
235 extract_root_domain(&origin_domain),
236 extract_root_domain(&allowed_domain),
237 ) {
238 if origin_root == allowed_root {
239 return true;
240 }
241 }
242 }
243
244 false
245 }
246
247 /// Validate if a dev key matches the configured dev key
248 pub fn is_dev_key_valid(&self, key: Option<&str>) -> bool {
249 match (&self.dev_key, key) {
250 (Some(config_key), Some(request_key)) => config_key == request_key,
251 _ => false,
252 }
253 }
254}
255
256/// Extract domain from URL (removes protocol and path)
257fn extract_domain(url: &str) -> String {
258 url.trim_start_matches("https://")
259 .trim_start_matches("http://")
260 .split('/')
261 .next()
262 .unwrap_or("")
263 .split(':')
264 .next()
265 .unwrap_or("")
266 .to_string()
267}
268
269/// Extract root domain (last two parts of domain)
270/// Example: "pai.desertthunder.dev" -> Some("desertthunder.dev")
271/// Example: "localhost" -> None (single part)
272fn extract_root_domain(domain: &str) -> Option<String> {
273 let parts: Vec<&str> = domain.split('.').collect();
274 if parts.len() >= 2 {
275 Some(format!("{}.{}", parts[parts.len() - 2], parts[parts.len() - 1]))
276 } else {
277 None
278 }
279}
280
281/// Configuration for all sources
282#[derive(Debug, Clone, Deserialize, Serialize, Default)]
283pub struct Config {
284 #[serde(default)]
285 pub database: DatabaseConfig,
286 #[serde(default)]
287 pub deployment: DeploymentConfig,
288 #[serde(default)]
289 pub sources: SourcesConfig,
290 #[serde(default)]
291 pub cors: CorsConfig,
292}
293
294impl Config {
295 /// Load configuration from a TOML file
296 ///
297 /// Reads and parses the config file, validating the structure and required fields.
298 pub fn from_file<P: AsRef<Path>>(path: P) -> Result<Self> {
299 let content =
300 std::fs::read_to_string(path).map_err(|e| PaiError::Config(format!("Failed to read config file: {e}")))?;
301 Self::from_str(&content)
302 }
303}
304
305impl FromStr for Config {
306 type Err = PaiError;
307
308 fn from_str(s: &str) -> Result<Self> {
309 toml::from_str(s).map_err(|e| PaiError::Config(format!("Failed to parse config: {e}")))
310 }
311}
312
313/// Synchronize all enabled sources
314///
315/// Calls each configured source fetcher to retrieve and store content.
316/// Returns the number of sources successfully synced.
317///
318/// Filters sources based on optional kind and source_id parameters.
319#[cfg(not(target_arch = "wasm32"))]
320pub fn sync_all_sources(
321 config: &Config, storage: &dyn Storage, kind: Option<SourceKind>, source_id: Option<&str>,
322) -> Result<usize> {
323 let mut synced_count = 0;
324
325 if let Some(ref substack_config) = config.sources.substack {
326 let should_sync = substack_config.enabled
327 && match (kind, source_id) {
328 (Some(k), _) if k != SourceKind::Substack => false,
329 (_, Some(sid)) => {
330 let substack_id = substack_config
331 .base_url
332 .trim_start_matches("https://")
333 .trim_start_matches("http://")
334 .trim_end_matches('/');
335 substack_id == sid
336 }
337 _ => true,
338 };
339
340 if should_sync {
341 let fetcher = SubstackFetcher::new(substack_config.clone());
342 fetcher.sync(storage)?;
343 synced_count += 1;
344 }
345 }
346
347 if let Some(ref bluesky_config) = config.sources.bluesky {
348 let should_sync = bluesky_config.enabled
349 && match (kind, source_id) {
350 (Some(k), _) if k != SourceKind::Bluesky => false,
351 (_, Some(sid)) => bluesky_config.handle == sid,
352 _ => true,
353 };
354
355 if should_sync {
356 let fetcher = BlueskyFetcher::new(bluesky_config.clone());
357 fetcher.sync(storage)?;
358 synced_count += 1;
359 }
360 }
361
362 for leaflet_config in &config.sources.leaflet {
363 if !leaflet_config.enabled {
364 continue;
365 }
366
367 let should_sync = match (kind, source_id) {
368 (Some(k), _) if k != SourceKind::Leaflet => false,
369 (_, Some(sid)) => leaflet_config.id == sid,
370 _ => true,
371 };
372
373 if should_sync {
374 let fetcher = LeafletFetcher::new(leaflet_config.clone());
375 fetcher.sync(storage)?;
376 synced_count += 1;
377 }
378 }
379
380 for bearblog_config in &config.sources.bearblog {
381 if !bearblog_config.enabled {
382 continue;
383 }
384
385 let should_sync = match (kind, source_id) {
386 (Some(k), _) if k != SourceKind::BearBlog => false,
387 (_, Some(sid)) => bearblog_config.id == sid,
388 _ => true,
389 };
390
391 if should_sync {
392 let fetcher = BearBlogFetcher::new(bearblog_config.clone());
393 fetcher.sync(storage)?;
394 synced_count += 1;
395 }
396 }
397
398 Ok(synced_count)
399}
400
401#[cfg(test)]
402mod tests {
403 use super::*;
404
405 #[test]
406 fn source_kind_display() {
407 assert_eq!(SourceKind::Substack.to_string(), "substack");
408 assert_eq!(SourceKind::Bluesky.to_string(), "bluesky");
409 assert_eq!(SourceKind::Leaflet.to_string(), "leaflet");
410 assert_eq!(SourceKind::BearBlog.to_string(), "bearblog");
411 }
412
413 #[test]
414 fn source_kind_parse() {
415 assert_eq!("substack".parse::<SourceKind>().unwrap(), SourceKind::Substack);
416 assert_eq!("BLUESKY".parse::<SourceKind>().unwrap(), SourceKind::Bluesky);
417 assert_eq!("Leaflet".parse::<SourceKind>().unwrap(), SourceKind::Leaflet);
418 assert_eq!("bearblog".parse::<SourceKind>().unwrap(), SourceKind::BearBlog);
419 assert_eq!("BEARBLOG".parse::<SourceKind>().unwrap(), SourceKind::BearBlog);
420 assert!("invalid".parse::<SourceKind>().is_err());
421 }
422
423 #[test]
424 fn error_unknown_source_kind() {
425 let err = "unknown".parse::<SourceKind>().unwrap_err();
426 assert!(matches!(err, PaiError::UnknownSourceKind(_)));
427 assert_eq!(err.to_string(), "Unknown source kind: unknown");
428 }
429
430 #[test]
431 fn list_filter_default() {
432 let filter = ListFilter::default();
433 assert!(filter.source_kind.is_none());
434 assert!(filter.source_id.is_none());
435 assert!(filter.limit.is_none());
436 assert!(filter.since.is_none());
437 assert!(filter.query.is_none());
438 }
439
440 #[test]
441 fn config_parse_empty() {
442 let config = Config::from_str("").unwrap();
443 assert!(config.sources.substack.is_none());
444 assert!(config.sources.bluesky.is_none());
445 assert!(config.sources.leaflet.is_empty());
446 }
447
448 #[test]
449 fn config_parse_substack() {
450 let toml = r#"
451[sources.substack]
452enabled = true
453base_url = "https://patternmatched.substack.com"
454"#;
455 let config = Config::from_str(toml).unwrap();
456 let substack = config.sources.substack.as_ref().unwrap();
457 assert!(substack.enabled);
458 assert_eq!(substack.base_url, "https://patternmatched.substack.com");
459 }
460
461 #[test]
462 fn config_parse_bluesky() {
463 let toml = r#"
464[sources.bluesky]
465enabled = true
466handle = "desertthunder.dev"
467"#;
468 let config = Config::from_str(toml).unwrap();
469 let bluesky = config.sources.bluesky.as_ref().unwrap();
470 assert!(bluesky.enabled);
471 assert_eq!(bluesky.handle, "desertthunder.dev");
472 }
473
474 #[test]
475 fn config_parse_leaflet_multiple() {
476 let toml = r#"
477[[sources.leaflet]]
478enabled = true
479id = "desertthunder"
480base_url = "https://desertthunder.leaflet.pub"
481
482[[sources.leaflet]]
483enabled = true
484id = "stormlightlabs"
485base_url = "https://stormlightlabs.leaflet.pub"
486"#;
487 let config = Config::from_str(toml).unwrap();
488 assert_eq!(config.sources.leaflet.len(), 2);
489 assert_eq!(config.sources.leaflet[0].id, "desertthunder");
490 assert_eq!(config.sources.leaflet[1].id, "stormlightlabs");
491 }
492
493 #[test]
494 fn config_parse_all_sources() {
495 let toml = r#"
496[database]
497path = "/tmp/test.db"
498
499[deployment]
500mode = "sqlite"
501
502[sources.substack]
503enabled = true
504base_url = "https://test.substack.com"
505
506[sources.bluesky]
507enabled = false
508handle = "test.bsky.social"
509
510[[sources.leaflet]]
511enabled = true
512id = "test"
513base_url = "https://test.leaflet.pub"
514"#;
515 let config = Config::from_str(toml).unwrap();
516 assert_eq!(config.database.path, Some("/tmp/test.db".to_string()));
517 assert_eq!(config.deployment.mode, "sqlite");
518 assert!(config.sources.substack.is_some());
519 assert!(config.sources.bluesky.is_some());
520 assert_eq!(config.sources.leaflet.len(), 1);
521 }
522
523 #[test]
524 fn config_parse_invalid_toml() {
525 let toml = "this is not valid toml {{{";
526 assert!(Config::from_str(toml).is_err());
527 }
528
529 #[test]
530 fn config_parse_missing_required_field() {
531 let toml = r#"
532[sources.substack]
533enabled = true
534"#;
535 let result = Config::from_str(toml);
536 assert!(result.is_err());
537 }
538
539 #[test]
540 fn config_default_enabled_false() {
541 let toml = r#"
542[sources.substack]
543base_url = "https://test.substack.com"
544"#;
545 let config = Config::from_str(toml).unwrap();
546 let substack = config.sources.substack.as_ref().unwrap();
547 assert!(!substack.enabled);
548 }
549
550 #[test]
551 fn cors_config_exact_match() {
552 let cors = CorsConfig {
553 allowed_origins: vec![
554 "https://desertthunder.dev".to_string(),
555 "http://localhost:4321".to_string(),
556 ],
557 dev_key: None,
558 };
559 assert!(cors.is_origin_allowed("https://desertthunder.dev"));
560 assert!(cors.is_origin_allowed("http://localhost:4321"));
561 assert!(!cors.is_origin_allowed("https://evil.com"));
562 }
563
564 #[test]
565 fn cors_config_same_root_domain() {
566 let cors = CorsConfig { allowed_origins: vec!["https://desertthunder.dev".to_string()], dev_key: None };
567 assert!(cors.is_origin_allowed("https://pai.desertthunder.dev"));
568 assert!(cors.is_origin_allowed("https://api.desertthunder.dev"));
569 assert!(cors.is_origin_allowed("https://desertthunder.dev"));
570 assert!(!cors.is_origin_allowed("https://evil.dev"));
571 }
572
573 #[test]
574 fn cors_config_localhost_requires_exact_match() {
575 let cors = CorsConfig { allowed_origins: vec!["http://localhost:4321".to_string()], dev_key: None };
576 assert!(cors.is_origin_allowed("http://localhost:4321"));
577 assert!(!cors.is_origin_allowed("http://localhost:3000"));
578 }
579
580 #[test]
581 fn cors_config_empty_origins_denies_all() {
582 let cors = CorsConfig { allowed_origins: vec![], dev_key: None };
583 assert!(!cors.is_origin_allowed("https://desertthunder.dev"));
584 assert!(!cors.is_origin_allowed("http://localhost:4321"));
585 }
586
587 #[test]
588 fn cors_config_dev_key_valid() {
589 let cors = CorsConfig { allowed_origins: vec![], dev_key: Some("secret-dev-key".to_string()) };
590 assert!(cors.is_dev_key_valid(Some("secret-dev-key")));
591 assert!(!cors.is_dev_key_valid(Some("wrong-key")));
592 assert!(!cors.is_dev_key_valid(None));
593 }
594
595 #[test]
596 fn cors_config_dev_key_none() {
597 let cors = CorsConfig { allowed_origins: vec![], dev_key: None };
598 assert!(!cors.is_dev_key_valid(Some("any-key")));
599 assert!(!cors.is_dev_key_valid(None));
600 }
601
602 #[test]
603 fn extract_domain_https() {
604 assert_eq!(
605 super::extract_domain("https://desertthunder.dev/path"),
606 "desertthunder.dev"
607 );
608 assert_eq!(
609 super::extract_domain("https://pai.desertthunder.dev"),
610 "pai.desertthunder.dev"
611 );
612 }
613
614 #[test]
615 fn extract_domain_http() {
616 assert_eq!(super::extract_domain("http://localhost:4321/api"), "localhost");
617 assert_eq!(super::extract_domain("http://example.com"), "example.com");
618 }
619
620 #[test]
621 fn extract_root_domain_multi_level() {
622 assert_eq!(
623 super::extract_root_domain("pai.desertthunder.dev"),
624 Some("desertthunder.dev".to_string())
625 );
626 assert_eq!(
627 super::extract_root_domain("api.example.com"),
628 Some("example.com".to_string())
629 );
630 assert_eq!(
631 super::extract_root_domain("a.b.c.example.org"),
632 Some("example.org".to_string())
633 );
634 }
635
636 #[test]
637 fn extract_root_domain_single_part() {
638 assert_eq!(super::extract_root_domain("localhost"), None);
639 }
640
641 #[test]
642 fn extract_root_domain_two_parts() {
643 assert_eq!(
644 super::extract_root_domain("example.com"),
645 Some("example.com".to_string())
646 );
647 }
648
649 #[test]
650 fn config_parse_cors() {
651 let toml = r#"
652[cors]
653allowed_origins = ["https://desertthunder.dev", "http://localhost:4321"]
654dev_key = "my-dev-key"
655"#;
656 let config = Config::from_str(toml).unwrap();
657 assert_eq!(config.cors.allowed_origins.len(), 2);
658 assert_eq!(config.cors.allowed_origins[0], "https://desertthunder.dev");
659 assert_eq!(config.cors.dev_key, Some("my-dev-key".to_string()));
660 }
661}