pstream is dead; long live pstream
taciturnaxolotl.github.io/pstream-ng/
1/* eslint-disable no-console */
2import { isExtensionActive } from "@/backend/extension/messaging";
3import { proxiedFetch } from "@/backend/helpers/fetch";
4import { makeExtensionFetcher } from "@/backend/providers/fetchers";
5import { useAuthStore } from "@/stores/auth";
6import { useLanguageStore } from "@/stores/language";
7
8import { getTmdbLanguageCode } from "./language";
9
10// IMDb language code mapping (differs from TMDB format)
11// Map from ISO language code to IMDb language parameter
12const imdbLanguageMap: Record<string, string> = {
13 "en-US": "en-US",
14 "es-ES": "es-ES",
15 "fr-FR": "fr-FR",
16 "de-DE": "de-DE",
17 "it-IT": "it-IT",
18 "pt-PT": "pt-PT",
19 "ru-RU": "ru-RU",
20 "ja-JP": "ja-JP",
21 "zh-CN": "zh-CN",
22 "ko-KR": "ko-KR",
23 "ar-SA": "ar-SA",
24 "hi-IN": "hi-IN",
25 "el-GR": "el-GR",
26 // Add more mappings as needed
27};
28
29/**
30 * Convert a TMDB-style language code to an IMDb language code
31 * @param language TMDB-style language code (e.g., "en-US")
32 * @returns IMDb language code or default "en-US"
33 */
34function getImdbLanguageCode(language: string): string {
35 // If we have a direct mapping, use it
36 if (imdbLanguageMap[language]) return imdbLanguageMap[language];
37
38 // Otherwise default to English
39 return "en-US";
40}
41
42interface IMDbMetadata {
43 title?: string;
44 original_title?: string;
45 title_type?: string;
46 year?: number | null;
47 end_year?: number | null;
48 day?: number | null;
49 month?: number | null;
50 date?: string;
51 runtime?: number | null;
52 age_rating?: string;
53 imdb_rating?: number | null;
54 votes?: number | null;
55 plot?: string;
56 poster_url?: string;
57 trailer_url?: string;
58 trailer_thumbnail?: string;
59 url?: string;
60 genre?: string[];
61 cast?: string[];
62 directors?: string[];
63 writers?: string[];
64 keywords?: string[];
65 countries?: string[];
66 languages?: string[];
67 locations?: string[];
68 season?: number;
69 episode?: number;
70 episode_title?: string;
71 episode_plot?: string;
72 episode_rating?: number;
73 episode_votes?: number;
74}
75
76const months = [
77 "January",
78 "February",
79 "March",
80 "April",
81 "May",
82 "June",
83 "July",
84 "August",
85 "September",
86 "October",
87 "November",
88 "December",
89];
90
91const userAgents = [
92 "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
93 "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
94 "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:89.0) Gecko/20100101 Firefox/89.0",
95 "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.1 Safari/605.1.15",
96];
97
98function getRandomUserAgent(): string {
99 return userAgents[Math.floor(Math.random() * userAgents.length)];
100}
101
102export async function scrapeIMDb(
103 imdbId: string,
104 season?: number,
105 episode?: number,
106 language?: string,
107 type?: "movie" | "show",
108): Promise<IMDbMetadata> {
109 // Check if we have a proxy or extension
110 const hasExtension = await isExtensionActive();
111 const hasProxy = Boolean(useAuthStore.getState().proxySet);
112
113 if (!hasExtension && !hasProxy) {
114 // Custom API for trailers:
115 const trailerResponse = await fetch(
116 `https://fed-trailers.pstream.mov/${type === "movie" ? "movie" : "tv"}/${imdbId}`,
117 ).then((res) => res.json());
118 if (trailerResponse.trailer?.embed_url) {
119 return {
120 trailer_url: trailerResponse.trailer.embed_url,
121 };
122 }
123 // END CUSTOM API
124 throw new Error(
125 "IMDb scraping requires either the browser extension or a custom proxy to be set up. " +
126 "Please install the extension or set up a proxy in the settings.",
127 );
128 }
129
130 console.log(
131 `[IMDb Scraper] Using ${hasExtension ? "browser extension" : "custom proxy"} for requests`,
132 );
133
134 // Get user language if not provided
135 if (!language) {
136 const userLanguage = useLanguageStore.getState().language;
137 language = getTmdbLanguageCode(userLanguage);
138 }
139
140 // Get IMDb language format
141 const imdbLanguage = getImdbLanguageCode(language);
142
143 // Construct IMDb URL with language parameter
144 let imdbUrl = `https://www.imdb.com/title/${imdbId}/`;
145 if (season && episode) {
146 imdbUrl += `episodes?season=${season}`;
147 }
148
149 // Add language parameter to URL
150 const separator = imdbUrl.includes("?") ? "&" : "?";
151 imdbUrl += `${separator}locale=${imdbLanguage}`;
152
153 // Add random delay to avoid rate limiting
154 const delay = Math.floor(Math.random() * (197 - 69) + 69);
155 await new Promise<void>((resolve) => {
156 setTimeout(resolve, delay);
157 });
158
159 // Fetch IMDb page using appropriate fetcher
160 let response: string;
161 if (hasExtension) {
162 const extensionFetcher = makeExtensionFetcher();
163 const result = await extensionFetcher(imdbUrl, {
164 headers: {
165 "User-Agent": getRandomUserAgent(),
166 "Accept-Language": imdbLanguage,
167 },
168 method: "GET",
169 query: {},
170 readHeaders: [],
171 });
172 response = result.body as string;
173 } else {
174 response = await proxiedFetch<string>(imdbUrl, {
175 headers: {
176 "User-Agent": getRandomUserAgent(),
177 "Accept-Language": imdbLanguage,
178 },
179 });
180 }
181
182 // Extract JSON data from the page
183 const jsonMatch = response.match(
184 /<script id="__NEXT_DATA__" type="application\/json">(.*?)<\/script>/,
185 );
186 if (!jsonMatch) {
187 throw new Error("Could not find IMDb data on the page");
188 }
189
190 const data = JSON.parse(jsonMatch[1]);
191 const metadata: IMDbMetadata = {
192 title: "",
193 original_title: "",
194 title_type: "",
195 year: null,
196 end_year: null,
197 day: null,
198 month: null,
199 date: "",
200 runtime: null,
201 age_rating: "",
202 imdb_rating: null,
203 votes: null,
204 plot: "",
205 poster_url: "",
206 trailer_url: "",
207 url: imdbUrl,
208 genre: [],
209 cast: [],
210 directors: [],
211 writers: [],
212 keywords: [],
213 countries: [],
214 languages: [],
215 locations: [],
216 season,
217 episode,
218 };
219
220 try {
221 // Extract all the metadata
222 const aboveTheFold = data.props.pageProps.aboveTheFoldData;
223 const mainColumn = data.props.pageProps.mainColumnData;
224
225 metadata.title = aboveTheFold.titleText?.text || "";
226 metadata.original_title = aboveTheFold.originalTitleText?.text || "";
227 metadata.title_type = aboveTheFold.titleType?.text || "";
228 metadata.age_rating = aboveTheFold.certificate?.rating || "";
229 metadata.year = aboveTheFold.releaseYear?.year || null;
230 metadata.end_year = aboveTheFold.releaseYear?.endYear || null;
231 metadata.day = aboveTheFold.releaseDate?.day || null;
232 metadata.month = aboveTheFold.releaseDate?.month || null;
233
234 if (metadata.month && metadata.day && metadata.year) {
235 metadata.date = `${months[metadata.month - 1]} ${metadata.day}, ${metadata.year}`;
236 }
237
238 metadata.runtime = aboveTheFold.runtime?.seconds || null;
239 metadata.plot = aboveTheFold.plot?.plotText?.plainText || "";
240 metadata.imdb_rating = aboveTheFold.ratingsSummary?.aggregateRating || null;
241 metadata.votes = aboveTheFold.ratingsSummary?.voteCount || null;
242 metadata.poster_url = aboveTheFold.primaryImage?.url || "";
243 const trailerNode = aboveTheFold.primaryVideos?.edges?.[0]?.node;
244 metadata.trailer_url = trailerNode?.playbackURLs?.[0]?.url || "";
245 metadata.trailer_thumbnail = trailerNode?.thumbnail?.url || "";
246
247 // Extract arrays
248 metadata.genre = aboveTheFold.genres?.genres?.map((g: any) => g.text) || [];
249 metadata.cast =
250 aboveTheFold.castPageTitle?.edges?.map(
251 (e: any) => e.node.name.nameText.text,
252 ) || [];
253 metadata.directors =
254 aboveTheFold.directorsPageTitle?.[0]?.credits?.map(
255 (c: any) => c.name.nameText.text,
256 ) || [];
257 metadata.writers =
258 mainColumn.writers?.[0]?.credits?.map((c: any) => c.name.nameText.text) ||
259 [];
260 metadata.keywords =
261 aboveTheFold.keywords?.edges?.map((e: any) => e.node.text) || [];
262 metadata.countries =
263 mainColumn.countriesOfOrigin?.countries?.map((c: any) => c.text) || [];
264 metadata.languages =
265 mainColumn.spokenLanguages?.spokenLanguages?.map((l: any) => l.text) ||
266 [];
267 metadata.locations =
268 mainColumn.filmingLocations?.edges?.map((e: any) => e.node.text) || [];
269
270 // If season and episode are provided, get episode-specific data
271 if (season && episode) {
272 const episodeData =
273 data.props.pageProps.mainColumnData.episodes?.edges?.find(
274 (e: any) => e.node.episodeNumber === episode,
275 );
276
277 if (episodeData) {
278 metadata.episode_title = episodeData.node.titleText?.text || "";
279 metadata.episode_plot =
280 episodeData.node.plot?.plotText?.plainText || "";
281 metadata.episode_rating =
282 episodeData.node.ratingsSummary?.aggregateRating || null;
283 metadata.episode_votes =
284 episodeData.node.ratingsSummary?.voteCount || null;
285 }
286 }
287 } catch (error) {
288 console.error("Error parsing IMDb data:", error);
289 throw error;
290 }
291
292 return metadata;
293}