personal activity index (bluesky, leaflet, substack) pai.desertthunder.dev
rss bluesky
at main 133 lines 4.2 kB view raw
1use crate::{Item, LeafletConfig, PaiError, Result, SourceFetcher, SourceKind, Storage}; 2use chrono::Utc; 3use feed_rs::parser; 4 5/// Fetcher for Leaflet publications via RSS 6/// 7/// Retrieves posts from Leaflet publications by parsing their RSS feeds. 8/// Each Leaflet publication provides an RSS feed at {base_url}/rss. 9pub struct LeafletFetcher { 10 config: LeafletConfig, 11 client: reqwest::Client, 12} 13 14impl LeafletFetcher { 15 /// Creates a new Leaflet fetcher with the given configuration 16 pub fn new(config: LeafletConfig) -> Self { 17 Self { config, client: reqwest::Client::new() } 18 } 19 20 /// Fetches and parses the RSS feed 21 async fn fetch_feed(&self) -> Result<feed_rs::model::Feed> { 22 let feed_url = format!("{}/rss", self.config.base_url.trim_end_matches('/')); 23 let response = self 24 .client 25 .get(&feed_url) 26 .send() 27 .await 28 .map_err(|e| PaiError::Fetch(format!("Failed to fetch Leaflet RSS feed: {e}")))?; 29 30 let body = response 31 .text() 32 .await 33 .map_err(|e| PaiError::Fetch(format!("Failed to read response body: {e}")))?; 34 35 parser::parse(body.as_bytes()).map_err(|e| PaiError::Parse(format!("Failed to parse RSS feed: {e}"))) 36 } 37} 38 39impl SourceFetcher for LeafletFetcher { 40 fn sync(&self, storage: &dyn Storage) -> Result<()> { 41 let runtime = 42 tokio::runtime::Runtime::new().map_err(|e| PaiError::Fetch(format!("Failed to create runtime: {e}")))?; 43 44 runtime.block_on(async { 45 let feed = self.fetch_feed().await?; 46 47 for entry in feed.entries { 48 let id = entry.id.clone(); 49 let url = entry 50 .links 51 .first() 52 .map(|link| link.href.clone()) 53 .unwrap_or_else(|| id.clone()); 54 55 let title = entry.title.as_ref().map(|t| t.content.clone()); 56 let summary = entry.summary.as_ref().map(|s| s.content.clone()); 57 let author = entry.authors.first().map(|a| a.name.clone()); 58 let content_html = entry.content.and_then(|c| c.body); 59 60 let published_at = entry 61 .published 62 .or(entry.updated) 63 .map(|dt| dt.to_rfc3339()) 64 .unwrap_or_else(|| Utc::now().to_rfc3339()); 65 66 let item = Item { 67 id, 68 source_kind: SourceKind::Leaflet, 69 source_id: self.config.id.clone(), 70 author, 71 title, 72 summary, 73 url, 74 content_html, 75 published_at, 76 created_at: Utc::now().to_rfc3339(), 77 }; 78 79 storage.insert_or_replace_item(&item)?; 80 } 81 82 Ok(()) 83 }) 84 } 85} 86 87#[cfg(test)] 88mod tests { 89 use super::*; 90 91 #[test] 92 fn parse_valid_rss() { 93 let rss = r#"<?xml version="1.0" encoding="UTF-8"?> 94<rss version="2.0"> 95<channel> 96 <title>Test Leaflet</title> 97 <link>https://test.leaflet.pub</link> 98 <description>Test publication</description> 99 <item> 100 <title>Test Post</title> 101 <link>https://test.leaflet.pub/test-post</link> 102 <guid>test-guid</guid> 103 <pubDate>Mon, 01 Jan 2024 12:00:00 +0000</pubDate> 104 <description>Test summary</description> 105 </item> 106</channel> 107</rss>"#; 108 109 let feed = parser::parse(rss.as_bytes()).unwrap(); 110 assert_eq!(feed.entries.len(), 1); 111 assert_eq!(feed.entries[0].title.as_ref().unwrap().content, "Test Post"); 112 } 113 114 #[test] 115 fn parse_invalid_rss() { 116 let invalid_rss = "this is not valid XML"; 117 let result = parser::parse(invalid_rss.as_bytes()); 118 assert!(result.is_err()); 119 } 120 121 #[test] 122 fn parse_empty_rss() { 123 let rss = r#"<?xml version="1.0" encoding="UTF-8"?> 124<rss version="2.0"> 125<channel> 126 <title>Empty Feed</title> 127</channel> 128</rss>"#; 129 130 let feed = parser::parse(rss.as_bytes()).unwrap(); 131 assert_eq!(feed.entries.len(), 0); 132 } 133}