personal activity index (bluesky, leaflet, substack)
pai.desertthunder.dev
rss
bluesky
1use crate::{Item, LeafletConfig, PaiError, Result, SourceFetcher, SourceKind, Storage};
2use chrono::Utc;
3use feed_rs::parser;
4
5/// Fetcher for Leaflet publications via RSS
6///
7/// Retrieves posts from Leaflet publications by parsing their RSS feeds.
8/// Each Leaflet publication provides an RSS feed at {base_url}/rss.
9pub struct LeafletFetcher {
10 config: LeafletConfig,
11 client: reqwest::Client,
12}
13
14impl LeafletFetcher {
15 /// Creates a new Leaflet fetcher with the given configuration
16 pub fn new(config: LeafletConfig) -> Self {
17 Self { config, client: reqwest::Client::new() }
18 }
19
20 /// Fetches and parses the RSS feed
21 async fn fetch_feed(&self) -> Result<feed_rs::model::Feed> {
22 let feed_url = format!("{}/rss", self.config.base_url.trim_end_matches('/'));
23 let response = self
24 .client
25 .get(&feed_url)
26 .send()
27 .await
28 .map_err(|e| PaiError::Fetch(format!("Failed to fetch Leaflet RSS feed: {e}")))?;
29
30 let body = response
31 .text()
32 .await
33 .map_err(|e| PaiError::Fetch(format!("Failed to read response body: {e}")))?;
34
35 parser::parse(body.as_bytes()).map_err(|e| PaiError::Parse(format!("Failed to parse RSS feed: {e}")))
36 }
37}
38
39impl SourceFetcher for LeafletFetcher {
40 fn sync(&self, storage: &dyn Storage) -> Result<()> {
41 let runtime =
42 tokio::runtime::Runtime::new().map_err(|e| PaiError::Fetch(format!("Failed to create runtime: {e}")))?;
43
44 runtime.block_on(async {
45 let feed = self.fetch_feed().await?;
46
47 for entry in feed.entries {
48 let id = entry.id.clone();
49 let url = entry
50 .links
51 .first()
52 .map(|link| link.href.clone())
53 .unwrap_or_else(|| id.clone());
54
55 let title = entry.title.as_ref().map(|t| t.content.clone());
56 let summary = entry.summary.as_ref().map(|s| s.content.clone());
57 let author = entry.authors.first().map(|a| a.name.clone());
58 let content_html = entry.content.and_then(|c| c.body);
59
60 let published_at = entry
61 .published
62 .or(entry.updated)
63 .map(|dt| dt.to_rfc3339())
64 .unwrap_or_else(|| Utc::now().to_rfc3339());
65
66 let item = Item {
67 id,
68 source_kind: SourceKind::Leaflet,
69 source_id: self.config.id.clone(),
70 author,
71 title,
72 summary,
73 url,
74 content_html,
75 published_at,
76 created_at: Utc::now().to_rfc3339(),
77 };
78
79 storage.insert_or_replace_item(&item)?;
80 }
81
82 Ok(())
83 })
84 }
85}
86
87#[cfg(test)]
88mod tests {
89 use super::*;
90
91 #[test]
92 fn parse_valid_rss() {
93 let rss = r#"<?xml version="1.0" encoding="UTF-8"?>
94<rss version="2.0">
95<channel>
96 <title>Test Leaflet</title>
97 <link>https://test.leaflet.pub</link>
98 <description>Test publication</description>
99 <item>
100 <title>Test Post</title>
101 <link>https://test.leaflet.pub/test-post</link>
102 <guid>test-guid</guid>
103 <pubDate>Mon, 01 Jan 2024 12:00:00 +0000</pubDate>
104 <description>Test summary</description>
105 </item>
106</channel>
107</rss>"#;
108
109 let feed = parser::parse(rss.as_bytes()).unwrap();
110 assert_eq!(feed.entries.len(), 1);
111 assert_eq!(feed.entries[0].title.as_ref().unwrap().content, "Test Post");
112 }
113
114 #[test]
115 fn parse_invalid_rss() {
116 let invalid_rss = "this is not valid XML";
117 let result = parser::parse(invalid_rss.as_bytes());
118 assert!(result.is_err());
119 }
120
121 #[test]
122 fn parse_empty_rss() {
123 let rss = r#"<?xml version="1.0" encoding="UTF-8"?>
124<rss version="2.0">
125<channel>
126 <title>Empty Feed</title>
127</channel>
128</rss>"#;
129
130 let feed = parser::parse(rss.as_bytes()).unwrap();
131 assert_eq!(feed.entries.len(), 0);
132 }
133}