My personal site cherry.computer
htmx tailwind axum askama

fix: set headers to try and get through Letterboxd's CF bot detection

This will likely end up being a game of whack-a-mole in the long run but
setting these headers seems to be sufficient to avoid getting a JS
challenge for now.

cherry.computer 2bd028ae a73b55c4

verified
+10 -1
+10 -1
server/src/scrapers/letterboxd.rs
··· 27 url: String, 28 } 29 30 pub async fn fetch() -> anyhow::Result<Media> { 31 - let client = Client::new(); 32 let page_url = Url::parse("https://letterboxd.com/ivom/films/diary/") 33 .context("wrote invalid Letterboxd URL")?; 34 let html = client 35 .get(page_url.clone()) 36 .send() 37 .await 38 .context("failed to fetch Letterboxd page")?
··· 27 url: String, 28 } 29 30 + // CloudFlare's bot detection seems to be more generous towards user agents that don't include 31 + // known HTTP clients, like reqwest or curl. 32 + const USER_AGENT: &str = "myivo/1.0.0"; 33 + 34 pub async fn fetch() -> anyhow::Result<Media> { 35 + let client = Client::builder() 36 + .user_agent(USER_AGENT) 37 + .build() 38 + .context("failed to build client")?; 39 let page_url = Url::parse("https://letterboxd.com/ivom/films/diary/") 40 .context("wrote invalid Letterboxd URL")?; 41 let html = client 42 .get(page_url.clone()) 43 + // including this header seems to contribute to getting past CloudFlare's bot detection. 44 + .header("priority", "u=0, i") 45 .send() 46 .await 47 .context("failed to fetch Letterboxd page")?