My personal site cherry.computer
htmx tailwind axum askama

feat: get latest film watched from Letterboxd

cherry.computer 65ff1b93 0a3ff8e2

verified
+124 -4
+1
server/Cargo.lock
··· 1149 1149 "cached", 1150 1150 "reqwest", 1151 1151 "scraper", 1152 + "serde", 1152 1153 "tokio", 1153 1154 "tower", 1154 1155 "tower-http",
+2 -1
server/Cargo.toml
··· 10 10 askama = "0.14.0" 11 11 axum = "0.8.1" 12 12 cached = { version = "0.56.0", features = ["async"] } 13 - reqwest = "0.12.23" 13 + reqwest = { version = "0.12.23", features = ["json"] } 14 14 scraper = "0.24.0" 15 + serde = { version = "1.0.219", features = ["derive"] } 15 16 tokio = { version = "1.18.2", features = ["full"] } 16 17 tower = "0.5.2" 17 18 tower-http = { version = "0.6.2", features = ["compression-full", "fs", "trace", "set-header"] }
+4 -3
server/src/index.rs
··· 1 1 use crate::scrapers::backloggd::{self, Backloggd}; 2 + use crate::scrapers::letterboxd::{self, Letterboxd}; 2 3 3 4 use askama::Template; 4 5 ··· 6 7 #[template(path = "index.html")] 7 8 pub struct RootTemplate { 8 9 game: Option<Backloggd>, 10 + movie: Option<Letterboxd>, 9 11 } 10 12 11 13 impl RootTemplate { 12 14 pub async fn new() -> RootTemplate { 13 - RootTemplate { 14 - game: backloggd::cached_fetch().await, 15 - } 15 + let (game, movie) = tokio::join!(backloggd::cached_fetch(), letterboxd::cached_fetch(),); 16 + RootTemplate { game, movie } 16 17 } 17 18 }
+1
server/src/scrapers.rs
··· 1 1 pub mod backloggd; 2 + pub mod letterboxd;
+110
server/src/scrapers/letterboxd.rs
··· 1 + use std::{sync::LazyLock, time::Duration}; 2 + 3 + use anyhow::Context; 4 + use cached::proc_macro::once; 5 + use reqwest::{Client, Url}; 6 + use scraper::{ElementRef, Html, Selector}; 7 + use serde::Deserialize; 8 + 9 + #[derive(Debug, Clone)] 10 + pub struct Letterboxd { 11 + pub name: String, 12 + pub poster: String, 13 + } 14 + 15 + #[derive(Deserialize, Debug, Clone)] 16 + pub struct ImageUrlMetadata { 17 + url: String, 18 + } 19 + 20 + struct Extracted { 21 + name: String, 22 + image_url: Url, 23 + } 24 + 25 + impl Letterboxd { 26 + pub async fn fetch() -> anyhow::Result<Self> { 27 + let client = Client::new(); 28 + let html = client 29 + .get("https://letterboxd.com/ivom/films/diary/") 30 + .send() 31 + .await 32 + .context("failed to fetch Letterboxd page")? 33 + .text() 34 + .await 35 + .context("failed to get HTML text")?; 36 + let Extracted { name, image_url } = Self::parse_html(&html)?; 37 + 38 + let image_url_data: ImageUrlMetadata = client 39 + .get(image_url.clone()) 40 + .send() 41 + .await 42 + .with_context(|| format!("failed to fetch image metadata from URL {}", image_url))? 43 + .json() 44 + .await 45 + .context("failed to parse image metadata")?; 46 + 47 + Ok(Self { 48 + name, 49 + poster: image_url_data.url, 50 + }) 51 + } 52 + 53 + fn parse_html(html: &str) -> anyhow::Result<Extracted> { 54 + static FIRST_ENTRY_SEL: LazyLock<Selector> = 55 + LazyLock::new(|| Selector::parse(".diary-entry-row:first-child").unwrap()); 56 + static NAME_SEL: LazyLock<Selector> = LazyLock::new(|| Selector::parse(".name").unwrap()); 57 + static POSTER_COMPONENT_SEL: LazyLock<Selector> = 58 + LazyLock::new(|| Selector::parse(".react-component:has(> .poster)").unwrap()); 59 + 60 + let document = Html::parse_document(html); 61 + 62 + let first_entry = document 63 + .select(&FIRST_ENTRY_SEL) 64 + .next() 65 + .context("couldn't find any journal entries")?; 66 + let name = first_entry 67 + .select(&NAME_SEL) 68 + .next() 69 + .context("couldn't find name element")? 70 + .text() 71 + .next() 72 + .context("name element didn't have any text")? 73 + .to_owned(); 74 + let poster_component = first_entry 75 + .select(&POSTER_COMPONENT_SEL) 76 + .next() 77 + .context("couldn't find post component")?; 78 + 79 + let image_url = Self::build_image_url(poster_component)?; 80 + 81 + Ok(Extracted { name, image_url }) 82 + } 83 + 84 + fn build_image_url(poster_component: ElementRef) -> anyhow::Result<Url> { 85 + let film_path = poster_component 86 + .attr("data-item-link") 87 + .context("poster component didn't have an image URL path")?; 88 + let cache_key = poster_component.attr("data-cache-busting-key"); 89 + let image_size = 230; 90 + let image_url = format!( 91 + "https://letterboxd.com{}/poster/std/{}/", 92 + film_path, image_size 93 + ); 94 + let mut image_url = 95 + Url::parse(&image_url).with_context(|| format!("failed to parse URL {}", image_url))?; 96 + if let Some(cache_key) = cache_key { 97 + image_url.query_pairs_mut().append_pair("k", cache_key); 98 + } 99 + 100 + Ok(image_url) 101 + } 102 + } 103 + 104 + #[once(time = 1800, option = false)] 105 + pub async fn cached_fetch() -> Option<Letterboxd> { 106 + Letterboxd::fetch() 107 + .await 108 + .map_err(|error| tracing::warn!(?error, "failed to scrape Letterboxd")) 109 + .ok() 110 + }
+6
server/templates/index.html
··· 24 24 src="{{ game.image }}" 25 25 alt="Cover art for {{ game.name }}" 26 26 /> 27 + {%- endif %} {% if let Some(movie) = movie -%} 28 + <img 29 + class="p-3" 30 + src="{{ movie.poster }}" 31 + alt="Poster for {{ movie.name }}" 32 + /> 27 33 {%- endif %} 28 34 </div> 29 35 </div>