commit 2bd028aeab8bba5847ac76f90af4d8c384f780ef · cherry.computer/website

cherry.computer / website

My personal site cherry.computer

htmx tailwind axum askama

fix: set headers to try and get through Letterboxd's CF bot detection

This will likely end up being a game of whack-a-mole in the long run but
setting these headers seems to be sufficient to avoid getting a JS
challenge for now.

cherry.computer 2 weeks ago 2bd028ae a73b55c4

verified

+10 -1

1 changed file

expand all

unified split

server

src

scrapers

letterboxd.rs

+10 -1

server/src/scrapers/letterboxd.rs

··· 27 url: String, 28 } 29 30 pub async fn fetch() -> anyhow::Result<Media> { 31 - let client = Client::new(); 32 let page_url = Url::parse("https://letterboxd.com/ivom/films/diary/") 33 .context("wrote invalid Letterboxd URL")?; 34 let html = client 35 .get(page_url.clone()) 36 .send() 37 .await 38 .context("failed to fetch Letterboxd page")?

··· 27 url: String, 28 } 29 30 + // CloudFlare's bot detection seems to be more generous towards user agents that don't include 31 + // known HTTP clients, like reqwest or curl. 32 + const USER_AGENT: &str = "myivo/1.0.0"; 33 + 34 pub async fn fetch() -> anyhow::Result<Media> { 35 + let client = Client::builder() 36 + .user_agent(USER_AGENT) 37 + .build() 38 + .context("failed to build client")?; 39 let page_url = Url::parse("https://letterboxd.com/ivom/films/diary/") 40 .context("wrote invalid Letterboxd URL")?; 41 let html = client 42 .get(page_url.clone()) 43 + // including this header seems to contribute to getting past CloudFlare's bot detection. 44 + .header("priority", "u=0, i") 45 .send() 46 .await 47 .context("failed to fetch Letterboxd page")?