pstream is dead; long live pstream taciturnaxolotl.github.io/pstream-ng/
at main 293 lines 9.0 kB view raw
1/* eslint-disable no-console */ 2import { isExtensionActive } from "@/backend/extension/messaging"; 3import { proxiedFetch } from "@/backend/helpers/fetch"; 4import { makeExtensionFetcher } from "@/backend/providers/fetchers"; 5import { useAuthStore } from "@/stores/auth"; 6import { useLanguageStore } from "@/stores/language"; 7 8import { getTmdbLanguageCode } from "./language"; 9 10// IMDb language code mapping (differs from TMDB format) 11// Map from ISO language code to IMDb language parameter 12const imdbLanguageMap: Record<string, string> = { 13 "en-US": "en-US", 14 "es-ES": "es-ES", 15 "fr-FR": "fr-FR", 16 "de-DE": "de-DE", 17 "it-IT": "it-IT", 18 "pt-PT": "pt-PT", 19 "ru-RU": "ru-RU", 20 "ja-JP": "ja-JP", 21 "zh-CN": "zh-CN", 22 "ko-KR": "ko-KR", 23 "ar-SA": "ar-SA", 24 "hi-IN": "hi-IN", 25 "el-GR": "el-GR", 26 // Add more mappings as needed 27}; 28 29/** 30 * Convert a TMDB-style language code to an IMDb language code 31 * @param language TMDB-style language code (e.g., "en-US") 32 * @returns IMDb language code or default "en-US" 33 */ 34function getImdbLanguageCode(language: string): string { 35 // If we have a direct mapping, use it 36 if (imdbLanguageMap[language]) return imdbLanguageMap[language]; 37 38 // Otherwise default to English 39 return "en-US"; 40} 41 42interface IMDbMetadata { 43 title?: string; 44 original_title?: string; 45 title_type?: string; 46 year?: number | null; 47 end_year?: number | null; 48 day?: number | null; 49 month?: number | null; 50 date?: string; 51 runtime?: number | null; 52 age_rating?: string; 53 imdb_rating?: number | null; 54 votes?: number | null; 55 plot?: string; 56 poster_url?: string; 57 trailer_url?: string; 58 trailer_thumbnail?: string; 59 url?: string; 60 genre?: string[]; 61 cast?: string[]; 62 directors?: string[]; 63 writers?: string[]; 64 keywords?: string[]; 65 countries?: string[]; 66 languages?: string[]; 67 locations?: string[]; 68 season?: number; 69 episode?: number; 70 episode_title?: string; 71 episode_plot?: string; 72 episode_rating?: number; 73 episode_votes?: number; 74} 75 76const months = [ 77 "January", 78 "February", 79 "March", 80 "April", 81 "May", 82 "June", 83 "July", 84 "August", 85 "September", 86 "October", 87 "November", 88 "December", 89]; 90 91const userAgents = [ 92 "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36", 93 "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36", 94 "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:89.0) Gecko/20100101 Firefox/89.0", 95 "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.1 Safari/605.1.15", 96]; 97 98function getRandomUserAgent(): string { 99 return userAgents[Math.floor(Math.random() * userAgents.length)]; 100} 101 102export async function scrapeIMDb( 103 imdbId: string, 104 season?: number, 105 episode?: number, 106 language?: string, 107 type?: "movie" | "show", 108): Promise<IMDbMetadata> { 109 // Check if we have a proxy or extension 110 const hasExtension = await isExtensionActive(); 111 const hasProxy = Boolean(useAuthStore.getState().proxySet); 112 113 if (!hasExtension && !hasProxy) { 114 // Custom API for trailers: 115 const trailerResponse = await fetch( 116 `https://fed-trailers.pstream.mov/${type === "movie" ? "movie" : "tv"}/${imdbId}`, 117 ).then((res) => res.json()); 118 if (trailerResponse.trailer?.embed_url) { 119 return { 120 trailer_url: trailerResponse.trailer.embed_url, 121 }; 122 } 123 // END CUSTOM API 124 throw new Error( 125 "IMDb scraping requires either the browser extension or a custom proxy to be set up. " + 126 "Please install the extension or set up a proxy in the settings.", 127 ); 128 } 129 130 console.log( 131 `[IMDb Scraper] Using ${hasExtension ? "browser extension" : "custom proxy"} for requests`, 132 ); 133 134 // Get user language if not provided 135 if (!language) { 136 const userLanguage = useLanguageStore.getState().language; 137 language = getTmdbLanguageCode(userLanguage); 138 } 139 140 // Get IMDb language format 141 const imdbLanguage = getImdbLanguageCode(language); 142 143 // Construct IMDb URL with language parameter 144 let imdbUrl = `https://www.imdb.com/title/${imdbId}/`; 145 if (season && episode) { 146 imdbUrl += `episodes?season=${season}`; 147 } 148 149 // Add language parameter to URL 150 const separator = imdbUrl.includes("?") ? "&" : "?"; 151 imdbUrl += `${separator}locale=${imdbLanguage}`; 152 153 // Add random delay to avoid rate limiting 154 const delay = Math.floor(Math.random() * (197 - 69) + 69); 155 await new Promise<void>((resolve) => { 156 setTimeout(resolve, delay); 157 }); 158 159 // Fetch IMDb page using appropriate fetcher 160 let response: string; 161 if (hasExtension) { 162 const extensionFetcher = makeExtensionFetcher(); 163 const result = await extensionFetcher(imdbUrl, { 164 headers: { 165 "User-Agent": getRandomUserAgent(), 166 "Accept-Language": imdbLanguage, 167 }, 168 method: "GET", 169 query: {}, 170 readHeaders: [], 171 }); 172 response = result.body as string; 173 } else { 174 response = await proxiedFetch<string>(imdbUrl, { 175 headers: { 176 "User-Agent": getRandomUserAgent(), 177 "Accept-Language": imdbLanguage, 178 }, 179 }); 180 } 181 182 // Extract JSON data from the page 183 const jsonMatch = response.match( 184 /<script id="__NEXT_DATA__" type="application\/json">(.*?)<\/script>/, 185 ); 186 if (!jsonMatch) { 187 throw new Error("Could not find IMDb data on the page"); 188 } 189 190 const data = JSON.parse(jsonMatch[1]); 191 const metadata: IMDbMetadata = { 192 title: "", 193 original_title: "", 194 title_type: "", 195 year: null, 196 end_year: null, 197 day: null, 198 month: null, 199 date: "", 200 runtime: null, 201 age_rating: "", 202 imdb_rating: null, 203 votes: null, 204 plot: "", 205 poster_url: "", 206 trailer_url: "", 207 url: imdbUrl, 208 genre: [], 209 cast: [], 210 directors: [], 211 writers: [], 212 keywords: [], 213 countries: [], 214 languages: [], 215 locations: [], 216 season, 217 episode, 218 }; 219 220 try { 221 // Extract all the metadata 222 const aboveTheFold = data.props.pageProps.aboveTheFoldData; 223 const mainColumn = data.props.pageProps.mainColumnData; 224 225 metadata.title = aboveTheFold.titleText?.text || ""; 226 metadata.original_title = aboveTheFold.originalTitleText?.text || ""; 227 metadata.title_type = aboveTheFold.titleType?.text || ""; 228 metadata.age_rating = aboveTheFold.certificate?.rating || ""; 229 metadata.year = aboveTheFold.releaseYear?.year || null; 230 metadata.end_year = aboveTheFold.releaseYear?.endYear || null; 231 metadata.day = aboveTheFold.releaseDate?.day || null; 232 metadata.month = aboveTheFold.releaseDate?.month || null; 233 234 if (metadata.month && metadata.day && metadata.year) { 235 metadata.date = `${months[metadata.month - 1]} ${metadata.day}, ${metadata.year}`; 236 } 237 238 metadata.runtime = aboveTheFold.runtime?.seconds || null; 239 metadata.plot = aboveTheFold.plot?.plotText?.plainText || ""; 240 metadata.imdb_rating = aboveTheFold.ratingsSummary?.aggregateRating || null; 241 metadata.votes = aboveTheFold.ratingsSummary?.voteCount || null; 242 metadata.poster_url = aboveTheFold.primaryImage?.url || ""; 243 const trailerNode = aboveTheFold.primaryVideos?.edges?.[0]?.node; 244 metadata.trailer_url = trailerNode?.playbackURLs?.[0]?.url || ""; 245 metadata.trailer_thumbnail = trailerNode?.thumbnail?.url || ""; 246 247 // Extract arrays 248 metadata.genre = aboveTheFold.genres?.genres?.map((g: any) => g.text) || []; 249 metadata.cast = 250 aboveTheFold.castPageTitle?.edges?.map( 251 (e: any) => e.node.name.nameText.text, 252 ) || []; 253 metadata.directors = 254 aboveTheFold.directorsPageTitle?.[0]?.credits?.map( 255 (c: any) => c.name.nameText.text, 256 ) || []; 257 metadata.writers = 258 mainColumn.writers?.[0]?.credits?.map((c: any) => c.name.nameText.text) || 259 []; 260 metadata.keywords = 261 aboveTheFold.keywords?.edges?.map((e: any) => e.node.text) || []; 262 metadata.countries = 263 mainColumn.countriesOfOrigin?.countries?.map((c: any) => c.text) || []; 264 metadata.languages = 265 mainColumn.spokenLanguages?.spokenLanguages?.map((l: any) => l.text) || 266 []; 267 metadata.locations = 268 mainColumn.filmingLocations?.edges?.map((e: any) => e.node.text) || []; 269 270 // If season and episode are provided, get episode-specific data 271 if (season && episode) { 272 const episodeData = 273 data.props.pageProps.mainColumnData.episodes?.edges?.find( 274 (e: any) => e.node.episodeNumber === episode, 275 ); 276 277 if (episodeData) { 278 metadata.episode_title = episodeData.node.titleText?.text || ""; 279 metadata.episode_plot = 280 episodeData.node.plot?.plotText?.plainText || ""; 281 metadata.episode_rating = 282 episodeData.node.ratingsSummary?.aggregateRating || null; 283 metadata.episode_votes = 284 episodeData.node.ratingsSummary?.voteCount || null; 285 } 286 } 287 } catch (error) { 288 console.error("Error parsing IMDb data:", error); 289 throw error; 290 } 291 292 return metadata; 293}