Rust app that exports an RSS file from currently trending Bluesky topics

Can use headless_chromium package to run the actual embed.bsky.app/embed SPA for higher quality than the blockquote returned by the oEmbed server, but this is rather slow. Fallback to oEmbed is supported.

Changed files
+264 -30
src
+190 -3
Cargo.lock
··· 73 73 ] 74 74 75 75 [[package]] 76 + name = "anyhow" 77 + version = "1.0.100" 78 + source = "registry+https://github.com/rust-lang/crates.io-index" 79 + checksum = "a23eb6b1614318a8071c9b2521f36b424b2c83db5eb3a0fead4a6c0809af6e61" 80 + 81 + [[package]] 76 82 name = "ascii" 77 83 version = "1.1.0" 78 84 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 120 126 version = "1.1.2" 121 127 source = "registry+https://github.com/rust-lang/crates.io-index" 122 128 checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0" 129 + 130 + [[package]] 131 + name = "auto_generate_cdp" 132 + version = "0.4.5" 133 + source = "registry+https://github.com/rust-lang/crates.io-index" 134 + checksum = "d6e1961a0d5d77969057eba90d448e610d3c439024d135d9dbd98e33ec973520" 135 + dependencies = [ 136 + "convert_case", 137 + "proc-macro2", 138 + "quote", 139 + "serde", 140 + "serde_json", 141 + "ureq", 142 + ] 123 143 124 144 [[package]] 125 145 name = "autocfg" ··· 284 304 checksum = "175812e0be2bccb6abe50bb8d566126198344f707e304f45c648fd8f2cc0365e" 285 305 286 306 [[package]] 307 + name = "byteorder" 308 + version = "1.5.0" 309 + source = "registry+https://github.com/rust-lang/crates.io-index" 310 + checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" 311 + 312 + [[package]] 287 313 name = "bytes" 288 314 version = "1.10.1" 289 315 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 468 494 version = "0.4.3" 469 495 source = "registry+https://github.com/rust-lang/crates.io-index" 470 496 checksum = "2f421161cb492475f1661ddc9815a745a1c894592070661180fdec3d4872e9c3" 497 + 498 + [[package]] 499 + name = "convert_case" 500 + version = "0.4.0" 501 + source = "registry+https://github.com/rust-lang/crates.io-index" 502 + checksum = "6245d59a3e82a7fc217c5828a6692dbc6dfb63a0c8c90495621f7b9d79704a0e" 471 503 472 504 [[package]] 473 505 name = "core-foundation" ··· 844 876 ] 845 877 846 878 [[package]] 879 + name = "env_home" 880 + version = "0.1.0" 881 + source = "registry+https://github.com/rust-lang/crates.io-index" 882 + checksum = "c7f84e12ccf0a7ddc17a6c41c93326024c42920d7ee630d04950e6926645c0fe" 883 + 884 + [[package]] 847 885 name = "equivalent" 848 886 version = "1.0.2" 849 887 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 1154 1192 checksum = "5419bdc4f6a9207fbeba6d11b604d481addf78ecd10c11ad51e76c2f6482748d" 1155 1193 1156 1194 [[package]] 1195 + name = "headless_chrome" 1196 + version = "1.0.18" 1197 + source = "registry+https://github.com/rust-lang/crates.io-index" 1198 + checksum = "f77a421a200d6314c8830919715d8452320c16e06b37686b13a9942f799dbf9b" 1199 + dependencies = [ 1200 + "anyhow", 1201 + "auto_generate_cdp", 1202 + "base64 0.22.1", 1203 + "derive_builder", 1204 + "log", 1205 + "rand 0.9.2", 1206 + "regex", 1207 + "serde", 1208 + "serde_json", 1209 + "tempfile", 1210 + "thiserror 2.0.17", 1211 + "tungstenite", 1212 + "url", 1213 + "which", 1214 + "winreg 0.55.0", 1215 + ] 1216 + 1217 + [[package]] 1157 1218 name = "heck" 1158 1219 version = "0.4.1" 1159 1220 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 1333 1394 "tokio", 1334 1395 "tokio-rustls", 1335 1396 "tower-service", 1336 - "webpki-roots", 1397 + "webpki-roots 1.0.4", 1337 1398 ] 1338 1399 1339 1400 [[package]] ··· 1560 1621 "socket2 0.5.10", 1561 1622 "widestring", 1562 1623 "windows-sys 0.48.0", 1563 - "winreg", 1624 + "winreg 0.50.0", 1564 1625 ] 1565 1626 1566 1627 [[package]] ··· 2835 2896 "wasm-bindgen-futures", 2836 2897 "wasm-streams", 2837 2898 "web-sys", 2838 - "webpki-roots", 2899 + "webpki-roots 1.0.4", 2839 2900 ] 2840 2901 2841 2902 [[package]] ··· 2949 3010 source = "registry+https://github.com/rust-lang/crates.io-index" 2950 3011 checksum = "6a9586e9ee2b4f8fab52a0048ca7334d7024eef48e2cb9407e3497bb7cab7fa7" 2951 3012 dependencies = [ 3013 + "log", 2952 3014 "once_cell", 2953 3015 "ring", 2954 3016 "rustls-pki-types", ··· 3248 3310 ] 3249 3311 3250 3312 [[package]] 3313 + name = "sha1" 3314 + version = "0.10.6" 3315 + source = "registry+https://github.com/rust-lang/crates.io-index" 3316 + checksum = "e3bf829a2d51ab4a5ddf1352d8470c140cadc8301b2ae1789db023f01cedd6ba" 3317 + dependencies = [ 3318 + "cfg-if", 3319 + "cpufeatures", 3320 + "digest", 3321 + ] 3322 + 3323 + [[package]] 3251 3324 name = "sha1_smol" 3252 3325 version = "1.0.1" 3253 3326 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 3347 3420 dependencies = [ 3348 3421 "libc", 3349 3422 "windows-sys 0.60.2", 3423 + ] 3424 + 3425 + [[package]] 3426 + name = "socks" 3427 + version = "0.3.4" 3428 + source = "registry+https://github.com/rust-lang/crates.io-index" 3429 + checksum = "f0c3dbbd9ae980613c6dd8e28a9407b50509d3803b57624d5dfe8315218cd58b" 3430 + dependencies = [ 3431 + "byteorder", 3432 + "libc", 3433 + "winapi", 3350 3434 ] 3351 3435 3352 3436 [[package]] ··· 3796 3880 name = "trending2rss" 3797 3881 version = "0.1.0" 3798 3882 dependencies = [ 3883 + "anyhow", 3799 3884 "chrono", 3800 3885 "futures", 3886 + "headless_chrome", 3801 3887 "jacquard", 3802 3888 "lazy_static", 3803 3889 "miette", ··· 3824 3910 checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" 3825 3911 3826 3912 [[package]] 3913 + name = "tungstenite" 3914 + version = "0.27.0" 3915 + source = "registry+https://github.com/rust-lang/crates.io-index" 3916 + checksum = "eadc29d668c91fcc564941132e17b28a7ceb2f3ebf0b9dae3e03fd7a6748eb0d" 3917 + dependencies = [ 3918 + "bytes", 3919 + "data-encoding", 3920 + "http", 3921 + "httparse", 3922 + "log", 3923 + "rand 0.9.2", 3924 + "sha1", 3925 + "thiserror 2.0.17", 3926 + "utf-8", 3927 + ] 3928 + 3929 + [[package]] 3827 3930 name = "twoway" 3828 3931 version = "0.1.8" 3829 3932 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 3873 3976 version = "0.9.0" 3874 3977 source = "registry+https://github.com/rust-lang/crates.io-index" 3875 3978 checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" 3979 + 3980 + [[package]] 3981 + name = "ureq" 3982 + version = "2.12.1" 3983 + source = "registry+https://github.com/rust-lang/crates.io-index" 3984 + checksum = "02d1a66277ed75f640d608235660df48c8e3c19f3b4edb6a263315626cc3c01d" 3985 + dependencies = [ 3986 + "base64 0.22.1", 3987 + "flate2", 3988 + "log", 3989 + "once_cell", 3990 + "rustls", 3991 + "rustls-pki-types", 3992 + "socks", 3993 + "url", 3994 + "webpki-roots 0.26.11", 3995 + ] 3876 3996 3877 3997 [[package]] 3878 3998 name = "url" ··· 4071 4191 4072 4192 [[package]] 4073 4193 name = "webpki-roots" 4194 + version = "0.26.11" 4195 + source = "registry+https://github.com/rust-lang/crates.io-index" 4196 + checksum = "521bc38abb08001b01866da9f51eb7c5d647a19260e00054a8c7fd5f9e57f7a9" 4197 + dependencies = [ 4198 + "webpki-roots 1.0.4", 4199 + ] 4200 + 4201 + [[package]] 4202 + name = "webpki-roots" 4074 4203 version = "1.0.4" 4075 4204 source = "registry+https://github.com/rust-lang/crates.io-index" 4076 4205 checksum = "b2878ef029c47c6e8cf779119f20fcf52bde7ad42a731b2a304bc221df17571e" ··· 4079 4208 ] 4080 4209 4081 4210 [[package]] 4211 + name = "which" 4212 + version = "8.0.0" 4213 + source = "registry+https://github.com/rust-lang/crates.io-index" 4214 + checksum = "d3fabb953106c3c8eea8306e4393700d7657561cb43122571b172bbfb7c7ba1d" 4215 + dependencies = [ 4216 + "env_home", 4217 + "rustix", 4218 + "winsafe", 4219 + ] 4220 + 4221 + [[package]] 4082 4222 name = "widestring" 4083 4223 version = "1.2.1" 4084 4224 source = "registry+https://github.com/rust-lang/crates.io-index" 4085 4225 checksum = "72069c3113ab32ab29e5584db3c6ec55d416895e60715417b5b883a357c3e471" 4086 4226 4087 4227 [[package]] 4228 + name = "winapi" 4229 + version = "0.3.9" 4230 + source = "registry+https://github.com/rust-lang/crates.io-index" 4231 + checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" 4232 + dependencies = [ 4233 + "winapi-i686-pc-windows-gnu", 4234 + "winapi-x86_64-pc-windows-gnu", 4235 + ] 4236 + 4237 + [[package]] 4238 + name = "winapi-i686-pc-windows-gnu" 4239 + version = "0.4.0" 4240 + source = "registry+https://github.com/rust-lang/crates.io-index" 4241 + checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" 4242 + 4243 + [[package]] 4088 4244 name = "winapi-util" 4089 4245 version = "0.1.11" 4090 4246 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 4092 4248 dependencies = [ 4093 4249 "windows-sys 0.61.2", 4094 4250 ] 4251 + 4252 + [[package]] 4253 + name = "winapi-x86_64-pc-windows-gnu" 4254 + version = "0.4.0" 4255 + source = "registry+https://github.com/rust-lang/crates.io-index" 4256 + checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" 4095 4257 4096 4258 [[package]] 4097 4259 name = "windows-core" ··· 4210 4372 version = "0.52.0" 4211 4373 source = "registry+https://github.com/rust-lang/crates.io-index" 4212 4374 checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" 4375 + dependencies = [ 4376 + "windows-targets 0.52.6", 4377 + ] 4378 + 4379 + [[package]] 4380 + name = "windows-sys" 4381 + version = "0.59.0" 4382 + source = "registry+https://github.com/rust-lang/crates.io-index" 4383 + checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" 4213 4384 dependencies = [ 4214 4385 "windows-targets 0.52.6", 4215 4386 ] ··· 4484 4655 "cfg-if", 4485 4656 "windows-sys 0.48.0", 4486 4657 ] 4658 + 4659 + [[package]] 4660 + name = "winreg" 4661 + version = "0.55.0" 4662 + source = "registry+https://github.com/rust-lang/crates.io-index" 4663 + checksum = "cb5a765337c50e9ec252c2069be9bf91c7df47afb103b642ba3a53bf8101be97" 4664 + dependencies = [ 4665 + "cfg-if", 4666 + "windows-sys 0.59.0", 4667 + ] 4668 + 4669 + [[package]] 4670 + name = "winsafe" 4671 + version = "0.0.19" 4672 + source = "registry+https://github.com/rust-lang/crates.io-index" 4673 + checksum = "d135d17ab770252ad95e9a872d365cf3090e3be864a34ab46f48555993efc904" 4487 4674 4488 4675 [[package]] 4489 4676 name = "wit-bindgen"
+2
Cargo.toml
··· 4 4 edition = "2024" 5 5 6 6 [dependencies] 7 + anyhow = "1.0.100" 7 8 chrono = "0.4.42" 8 9 futures = "0.3.31" 10 + headless_chrome = "1.0.18" 9 11 jacquard = "0.9.0" 10 12 lazy_static = "1.5.0" 11 13 miette = "7.6.0"
+72 -27
src/main.rs
··· 1 1 use futures::future::join_all; 2 + use headless_chrome::Browser; 3 + use headless_chrome::Tab; 4 + use headless_chrome::protocol::cdp::Target::CreateTarget; 2 5 use jacquard::api::app_bsky::embed::record_with_media::ViewMedia; 3 6 use jacquard::api::app_bsky::embed::record::ViewUnionRecord; 4 7 use jacquard::api::app_bsky::feed::get_feed::{GetFeed, GetFeedResponse}; ··· 61 64 } 62 65 } 63 66 64 - async fn get_posts() -> miette::Result<()> { 67 + async fn get_posts(maybe_browser : Option<Browser> ) -> miette::Result<()> { 65 68 lazy_static! { 66 69 static ref RESOLVER: PublicResolver = PublicResolver::default(); 67 70 static ref PROFILE_RE: Regex = Regex::new(r"^(?P<app>https://bsky.app)?/profile/(?P<user>[^/]+)/feed/(?P<rkey>[^/?]+)").unwrap_or_else(|e| panic!("Invalid regex: {}", e)); ··· 75 78 let response = HTTP.xrpc(API_DOMAIN.clone()).send(&request).await?; 76 79 let output = response.into_output()?; 77 80 81 + let maybe_browser = &maybe_browser; 78 82 eprintln!("Current trending topics from Bluesky:"); 79 83 let future_feed_items = output.topics.iter().enumerate().flat_map(|(i, topic)| { 80 84 PROFILE_RE.captures(&topic.link).map(|groups| { ··· 103 107 let output: RespOutput<'static, GetFeedResponse> = response.into_output()?; 104 108 let future_posts = output.feed.iter().enumerate().into_iter().map(|(j, item) : (usize, &FeedViewPost)| /*-> impl Future<Output = miette::Result<rss:Item>> /* why rust is lame?? */ */ { 105 109 let feed_category = feed_category.clone(); 110 + let maybe_browser = &maybe_browser; 106 111 async move { 107 112 let post_view: &PostView = &item.post; 108 113 // Deserialize the post record from the Data type ··· 110 115 let post: Post = de_post.into_diagnostic()?; 111 116 eprintln!("\t{:02}.(@{})\n\t{} ", j + 1, post_view.author.handle, post.text); 112 117 113 - let post_web_url = format!("https://bsky.app/profile/{handle}/post/{rkey}", handle=post_view.author.handle, rkey=post_view.uri.path().as_ref().map(|path| path.rkey.as_ref()).flatten().unwrap().as_ref()); 114 - let mut query_url = O_EMBED_ENDPOINT.clone(); 115 - query_url.query_pairs_mut().append_pair("url", &post_web_url); 116 - 117 - let o_embed_response: reqwest::Response = HTTP.get(query_url).send().await.into_diagnostic()?; 118 - let o_embed_html: String = match o_embed_response.error_for_status() { 119 - Err(e) => { 120 - match e.status() { 121 - Some(code) => { 122 - match code { 123 - reqwest::StatusCode::FORBIDDEN => String::from("<h1>You must be logged in to view this content</h1>"), 124 - reqwest::StatusCode::NOT_FOUND => String::from("<h1>Post not found??!</h1>"), 125 - _ => format!("Unexpected HTTP status {} on error {:?}", code, e) 118 + let post_handle = &post_view.author.handle; 119 + let post_rkey = post_view.uri.path().as_ref().map(|path| path.rkey.as_ref()).flatten().unwrap().as_ref(); 120 + 121 + let post_web_url = format!("https://bsky.app/profile/{handle}/post/{rkey}", handle=post_handle, rkey=post_rkey); 122 + let get_o_embed_html = async || { 123 + let mut query_url = O_EMBED_ENDPOINT.clone(); 124 + query_url.query_pairs_mut().append_pair("url", &post_web_url); 125 + let o_embed_response: reqwest::Response = HTTP.get(query_url).send().await.into_diagnostic()?; 126 + Ok::<String, miette::Error>(match o_embed_response.error_for_status() { 127 + Err(e) => { 128 + match e.status() { 129 + Some(code) => { 130 + match code { 131 + reqwest::StatusCode::FORBIDDEN => String::from("<h1>You must be logged in to view this content</h1>"), 132 + reqwest::StatusCode::NOT_FOUND => String::from("<h1>Post not found??!</h1>"), 133 + _ => format!("Unexpected HTTP status {} on error {:?}", code, e) 134 + } 126 135 } 136 + _ => format!("Unknown error: {:?}", e) 127 137 } 128 - _ => format!("Unknown error: {:?}", e) 129 138 } 130 - } 131 - Ok(response) => { 132 - let o_embed_bytes: jacquard::bytes::Bytes = response.bytes().await.into_diagnostic()?; 133 - let o_embed_json: EmbedResponse = serde_json::from_slice::<EmbedResponse>(o_embed_bytes.as_ref()).into_diagnostic()?; 134 - 135 - match o_embed_json.oembed_type { 136 - EmbedType::Rich(rich) => rich.html, 137 - embed => { 138 - eprintln!("Bluesky embed server sends rich embeds, but we got: {:?}", embed); 139 - String::from("<h1>Unexpected oEmbed Response??</h1>") 139 + Ok(response) => { 140 + let o_embed_bytes: jacquard::bytes::Bytes = response.bytes().await.into_diagnostic()?; 141 + let o_embed_json: EmbedResponse = serde_json::from_slice::<EmbedResponse>(o_embed_bytes.as_ref()).into_diagnostic()?; 142 + 143 + match o_embed_json.oembed_type { 144 + EmbedType::Rich(rich) => rich.html, 145 + embed => { 146 + eprintln!("Bluesky embed server sends rich embeds, but we got: {:?}", embed); 147 + String::from("<h1>Unexpected oEmbed Response??</h1>") 148 + } 140 149 } 141 150 } 142 - } 151 + }) 143 152 }; 153 + 154 + 144 155 156 + let o_embed_html: String = match maybe_browser { 157 + Some(browser) => match (|| -> anyhow::Result<String> { 158 + let embed_url = format!("https://embed.bsky.app/embed/{authority}/app.bsky.feed.post/{rkey}", authority=post_view.uri.authority(), rkey=post_rkey); 159 + eprintln!("opening tab on {}", &embed_url); 160 + let tab: std::sync::Arc<Tab> = browser.new_tab_with_options(CreateTarget { 161 + url: embed_url, 162 + width: None, 163 + height: None, 164 + browser_context_id: None, 165 + enable_begin_frame_control: None, 166 + new_window: None, 167 + background: None, 168 + for_tab: None 169 + })?; 170 + let tab = tab.wait_until_navigated()?; 171 + eprintln!("!!PSA!! page finished loading"); 172 + let embed_div = tab.wait_for_element("div#app div")?; 173 + eprintln!("!!PSA!! embed div loaded"); 174 + let _ = embed_div.wait_for_element("p > span")?; /* has the page *REALLY* finished rendering? */ 175 + let result: String = embed_div.get_content()?; 176 + tab.close_target()?; 177 + Ok(result) 178 + })() { 179 + Ok(html) => html, 180 + Err(e) => { 181 + eprintln!("Browser failed with error {:?}, falling back to oEmbed", e); 182 + get_o_embed_html().await? 183 + } 184 + } 185 + _ => get_o_embed_html().await? 186 + }; 145 187 146 188 Ok(ItemBuilder::default() 147 189 .title(match &post_view.author.display_name { ··· 315 357 .enable_all() 316 358 .build() 317 359 .unwrap() 318 - .block_on(get_posts()); 360 + .block_on(get_posts(match Browser::default() { 361 + Ok(browser) => Some(browser), 362 + Err(e) => { eprintln!("Failed to construct browser due to {:?}, will proceed in fallback mode.", e); None } 363 + })); 319 364 }