pstream is dead; long live pstream taciturnaxolotl.github.io/pstream-ng/
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Refactor translator service to be less jank and more modular

vlOd2 81f1272f 5539061a

+361 -224
+73 -2
src/pages/developer/TestView.tsx
··· 1 - import { useState } from "react"; 1 + import { useCallback, useState } from "react"; 2 2 3 3 import { Button } from "@/components/buttons/Button"; 4 + import { usePlayer } from "@/components/player/hooks/usePlayer"; 5 + import { PlaybackErrorPart } from "@/pages/parts/player/PlaybackErrorPart"; 6 + import { PlayerPart } from "@/pages/parts/player/PlayerPart"; 7 + import { 8 + CaptionListItem, 9 + PlayerMeta, 10 + playerStatus, 11 + } from "@/stores/player/slices/source"; 12 + import { SourceSliceSource } from "@/stores/player/utils/qualities"; 13 + 14 + const subtitlesTestMeta: PlayerMeta = { 15 + type: "movie", 16 + title: "Subtitles Test", 17 + releaseYear: 2024, 18 + tmdbId: "0", 19 + }; 20 + 21 + const subtitlesTestSource: SourceSliceSource = { 22 + type: "hls", 23 + url: "http://localhost:8000/media/master.m3u8", 24 + }; 25 + 26 + const subtitlesTestSubs: CaptionListItem[] = [ 27 + { 28 + id: "http://localhost:8000/subs/en.srt", 29 + display: "English", 30 + language: "en", 31 + url: "http://localhost:8000/subs/en.srt", 32 + needsProxy: false, 33 + }, 34 + { 35 + id: "http://localhost:8000/subs/en-small.srt", 36 + display: "English Small", 37 + language: "en", 38 + url: "http://localhost:8000/subs/en-small.srt", 39 + needsProxy: false, 40 + }, 41 + { 42 + id: "http://localhost:8000/subs/ro.srt", 43 + display: "Romanian", 44 + language: "ro", 45 + url: "http://localhost:8000/subs/ro.srt", 46 + needsProxy: false, 47 + }, 48 + ]; 4 49 5 50 // mostly empty view, add whatever you need 6 51 export default function TestView() { 52 + const player = usePlayer(); 53 + const [showPlayer, setShowPlayer] = useState(false); 7 54 const [shouldCrash, setShouldCrash] = useState(false); 55 + 8 56 if (shouldCrash) { 9 57 throw new Error("I crashed"); 10 58 } 11 - return <Button onClick={() => setShouldCrash(true)}>Crash me!</Button>; 59 + 60 + const subtitlesTest = useCallback(async () => { 61 + setShowPlayer(true); 62 + player.reset(); 63 + await new Promise((r) => { 64 + setTimeout(r, 100); 65 + }); 66 + player.setShouldStartFromBeginning(true); 67 + player.setMeta(subtitlesTestMeta); 68 + player.playMedia(subtitlesTestSource, subtitlesTestSubs, null); 69 + }, [player]); 70 + 71 + return showPlayer ? ( 72 + <PlayerPart backUrl="/dev/"> 73 + {player && (player as any).status === playerStatus.PLAYBACK_ERROR ? ( 74 + <PlaybackErrorPart /> 75 + ) : null} 76 + </PlayerPart> 77 + ) : ( 78 + <> 79 + <Button onClick={() => setShouldCrash(true)}>Crash me!</Button> 80 + <Button onClick={() => subtitlesTest()}>Subtitles test</Button> 81 + </> 82 + ); 12 83 }
+5 -5
src/stores/player/slices/source.ts
··· 9 9 selectQuality, 10 10 } from "@/stores/player/utils/qualities"; 11 11 import { useQualityStore } from "@/stores/quality"; 12 + import googletranslate from "@/utils/translation/googletranslate"; 13 + import { translate } from "@/utils/translation/index"; 12 14 import { ValuesOf } from "@/utils/typeguard"; 13 - 14 - import { translateSubtitle } from "../utils/captionstranslation"; 15 15 16 16 export const playerStatus = { 17 17 IDLE: "idle", ··· 515 515 } 516 516 517 517 try { 518 - const result = await translateSubtitle( 519 - targetCaption.id, 520 - store.caption.translateTask!.fetchedTargetCaption!.srtData, 518 + const result = await translate( 519 + store.caption.translateTask!.fetchedTargetCaption!, 521 520 targetLanguage, 521 + googletranslate, 522 522 ); 523 523 if (cancelled) { 524 524 return;
-217
src/stores/player/utils/captionstranslation.ts
··· 1 - import subsrt from "subsrt-ts"; 2 - import { Caption, ContentCaption } from "subsrt-ts/dist/types/handler"; 3 - 4 - const API_URL = 5 - "https://translate.googleapis.com/translate_a/single?client=gtx&dt=t&dj=1&ie=UTF-8&oe=UTF-8&sl=auto"; 6 - const RETRY_COUNT = 3; 7 - const FETCH_RATE = 100; 8 - const SUBTITLES_CACHE: Map<string, ArrayBuffer> = new Map< 9 - string, 10 - ArrayBuffer 11 - >(); 12 - 13 - async function compressStr(string: string): Promise<ArrayBuffer> { 14 - const byteArray = new TextEncoder().encode(string); 15 - const cs = new CompressionStream("deflate"); 16 - const writer = cs.writable.getWriter(); 17 - writer.write(byteArray); 18 - writer.close(); 19 - return new Response(cs.readable).arrayBuffer(); 20 - } 21 - 22 - async function decompressStr(byteArray: ArrayBuffer): Promise<string> { 23 - const cs = new DecompressionStream("deflate"); 24 - const writer = cs.writable.getWriter(); 25 - writer.write(byteArray); 26 - writer.close(); 27 - return new Response(cs.readable).arrayBuffer().then((arrayBuffer) => { 28 - return new TextDecoder().decode(arrayBuffer); 29 - }); 30 - } 31 - 32 - function tryUseCachedCaption( 33 - caption: ContentCaption, 34 - cache: Map<string, string>, 35 - ): boolean { 36 - const text: string | undefined = cache.get(caption.text); 37 - if (text) { 38 - caption.text = text; 39 - return true; 40 - } 41 - return false; 42 - } 43 - 44 - async function translateText( 45 - text: string, 46 - targetLang: string, 47 - ): Promise<string | undefined> { 48 - if (!text) { 49 - return ""; 50 - } 51 - 52 - const response = await ( 53 - await fetch(`${API_URL}&tl=${targetLang}&q=${encodeURIComponent(text)}`, { 54 - method: "GET", 55 - headers: { 56 - Accept: "application/json", 57 - }, 58 - }) 59 - ).json(); 60 - 61 - if (!response) { 62 - throw new Error("Empty response"); 63 - } 64 - 65 - return (response.sentences as any[]) 66 - .map((s: any) => s.trans as string) 67 - .join(""); 68 - } 69 - 70 - async function translateCaption( 71 - caption: ContentCaption, 72 - targetLang: string, 73 - ): Promise<boolean> { 74 - (caption as any).oldText = caption.text; 75 - let text: string | undefined; 76 - for (let i = 0; i < RETRY_COUNT; i += 1) { 77 - try { 78 - text = await translateText( 79 - caption.text.replaceAll("\n", "<br>"), 80 - targetLang, 81 - ); 82 - if (text) { 83 - text = text.replaceAll("<br>", "\n"); 84 - break; 85 - } 86 - } catch (error) { 87 - console.warn("Re-trying caption translation", caption, error); 88 - } 89 - } 90 - if (!text) { 91 - console.error("Failed to translate caption"); 92 - caption.text = `(CAPTION COULD NOT BE TRANSLATED)\n${caption.text}`; 93 - return false; 94 - } 95 - caption.text = text.trim(); 96 - return true; 97 - } 98 - 99 - async function translateCaptions( 100 - captions: ContentCaption[], 101 - targetLang: string, 102 - ): Promise<boolean> { 103 - // console.log("Translating", captions.length, "captions"); 104 - try { 105 - const results: boolean[] = await Promise.all( 106 - captions.map((c) => translateCaption(c, targetLang)), 107 - ); 108 - 109 - const successCount = results.filter((v) => v).length; 110 - const failedCount = results.length - successCount; 111 - const successPercentage = (successCount / results.length) * 100; 112 - const failedPercentage = (failedCount / results.length) * 100; 113 - // console.log( 114 - // "Done translating captions", 115 - // results.length, 116 - // successCount, 117 - // failedCount, 118 - // successPercentage, 119 - // failedPercentage, 120 - // ); 121 - 122 - if (failedPercentage > successPercentage) { 123 - throw new Error("Success percentage is not acceptable"); 124 - } 125 - } catch (error) { 126 - console.error("Could not translate", captions.length, "captions", error); 127 - return false; 128 - } 129 - return true; 130 - } 131 - 132 - async function translateSRTData( 133 - data: string, 134 - targetLang: string, 135 - ): Promise<string | undefined> { 136 - let captions: Caption[]; 137 - try { 138 - captions = subsrt.parse(data); 139 - } catch (error) { 140 - console.error("Failed to parse subtitle data", error); 141 - return undefined; 142 - } 143 - 144 - let translatedCaptions: Caption[] | undefined = []; 145 - const contentCaptions: ContentCaption[] = []; 146 - const translatedCache: Map<string, string> = new Map<string, string>(); 147 - 148 - for (const caption of captions) { 149 - translatedCaptions.push(caption); 150 - if (caption.type !== "caption") { 151 - continue; 152 - } 153 - caption.text = caption.text 154 - .trim() 155 - .replace("\r\n", "\n") 156 - .replace("\r", "\n"); 157 - contentCaptions.push(caption); 158 - } 159 - 160 - for (let i = 0; i < contentCaptions.length; i += 1) { 161 - if (tryUseCachedCaption(contentCaptions[i], translatedCache)) { 162 - continue; 163 - } 164 - const batch: ContentCaption[] = [contentCaptions[i]]; 165 - 166 - let j; 167 - for (j = 1; j < FETCH_RATE; j += 1) { 168 - if (i + j >= contentCaptions.length) { 169 - break; 170 - } 171 - if (tryUseCachedCaption(contentCaptions[i + j], translatedCache)) { 172 - continue; 173 - } 174 - batch.push(contentCaptions[i + j]); 175 - } 176 - i += j; 177 - 178 - if (!(await translateCaptions(batch, targetLang))) { 179 - translatedCaptions = undefined; 180 - break; 181 - } 182 - 183 - batch.forEach((c) => translatedCache.set((c as any).oldText!, c.text)); 184 - } 185 - 186 - return translatedCaptions 187 - ? subsrt.build(translatedCaptions, { format: "srt" }) 188 - : undefined; 189 - } 190 - 191 - // TODO: make this support multiple providers rather than just google translate 192 - export async function translateSubtitle( 193 - id: string, 194 - srtData: string, 195 - targetLang: string, 196 - ): Promise<string | undefined> { 197 - const cacheID = `${id}_${targetLang}`; 198 - 199 - const cachedData: ArrayBuffer | undefined = SUBTITLES_CACHE.get(cacheID); 200 - if (cachedData) { 201 - // console.log("Using cached translation for", id, cacheID); 202 - return decompressStr(cachedData); 203 - } 204 - 205 - // console.log("Translating", id); 206 - const translatedData: string | undefined = await translateSRTData( 207 - srtData, 208 - targetLang, 209 - ); 210 - if (!translatedData) { 211 - return undefined; 212 - } 213 - 214 - // console.log("Caching translation for", id, cacheID); 215 - SUBTITLES_CACHE.set(cacheID, await compressStr(translatedData)); 216 - return translatedData; 217 - }
+72
src/utils/translation/googletranslate.ts
··· 1 + import { TranslateService } from "."; 2 + 3 + const SINGLE_API_URL = 4 + "https://translate.googleapis.com/translate_a/single?client=gtx&dt=t&dj=1&ie=UTF-8&oe=UTF-8&sl=auto"; 5 + const BATCH_API_URL = "https://translate-pa.googleapis.com/v1/translateHtml"; 6 + const BATCH_API_KEY = "AIzaSyATBXajvzQLTDHEQbcpq0Ihe0vWDHmO520"; 7 + 8 + export default { 9 + getName() { 10 + return "Google Translate"; 11 + }, 12 + 13 + getConfig() { 14 + return { 15 + singleBatchSize: 15, 16 + multiBatchSize: 80, 17 + maxRetryCount: 3, 18 + batchSleepMs: 200, 19 + }; 20 + }, 21 + 22 + async translate(str, targetLang) { 23 + if (!str) { 24 + return ""; 25 + } 26 + 27 + const response = await ( 28 + await fetch( 29 + `${SINGLE_API_URL}&tl=${targetLang}&q=${encodeURIComponent(str)}`, 30 + { 31 + method: "GET", 32 + headers: { 33 + Accept: "application/json", 34 + }, 35 + }, 36 + ) 37 + ).json(); 38 + 39 + if (!response.sentences) { 40 + console.warn("Invalid gt response", response); 41 + throw new Error("Invalid response"); 42 + } 43 + 44 + return (response.sentences as any[]) 45 + .map((s: any) => s.trans as string) 46 + .join(""); 47 + }, 48 + 49 + async translateMulti(batch, targetLang) { 50 + if (!batch || batch.length === 0) { 51 + return []; 52 + } 53 + 54 + const response = await ( 55 + await fetch(BATCH_API_URL, { 56 + method: "POST", 57 + headers: { 58 + "Content-Type": "application/json+protobuf", 59 + "X-goog-api-key": BATCH_API_KEY, 60 + }, 61 + body: JSON.stringify([[batch, "auto", targetLang], "te"]), 62 + }) 63 + ).json(); 64 + 65 + if (!Array.isArray(response) || response.length < 1) { 66 + console.warn("Invalid gt batch response", response); 67 + throw new Error("Invalid response"); 68 + } 69 + 70 + return response[0].map((s: any) => s as string); 71 + }, 72 + } satisfies TranslateService;
+187
src/utils/translation/index.ts
··· 1 + import subsrt from "subsrt-ts"; 2 + import { Caption, ContentCaption } from "subsrt-ts/dist/types/handler"; 3 + 4 + import { Caption as PlayerCaption } from "@/stores/player/slices/source"; 5 + 6 + import { compressStr, decompressStr, sleep } from "./utils"; 7 + 8 + const CAPTIONS_CACHE: Map<string, ArrayBuffer> = new Map<string, ArrayBuffer>(); 9 + 10 + export interface TranslateService { 11 + getName(): string; 12 + getConfig(): { 13 + singleBatchSize: number; 14 + multiBatchSize: number; // -1 = unsupported 15 + maxRetryCount: number; 16 + batchSleepMs: number; 17 + }; 18 + translate(str: string, targetLang: string): Promise<string>; 19 + translateMulti(batch: string[], targetLang: string): Promise<string[]>; 20 + } 21 + 22 + class Translator { 23 + private captions: Caption[]; 24 + 25 + private contentCaptions: ContentCaption[] = []; 26 + 27 + private contentCache: Map<string, string> = new Map<string, string>(); 28 + 29 + private targetLang: string; 30 + 31 + private service: TranslateService; 32 + 33 + constructor(srtData: string, targetLang: string, service: TranslateService) { 34 + this.captions = subsrt.parse(srtData); 35 + this.targetLang = targetLang; 36 + this.service = service; 37 + 38 + for (const caption of this.captions) { 39 + if (caption.type !== "caption") { 40 + continue; 41 + } 42 + // Normalize line endings 43 + caption.text = caption.text 44 + .trim() 45 + .replaceAll("\r\n", "\n") 46 + .replaceAll("\r", "\n"); 47 + this.contentCaptions.push(caption); 48 + } 49 + } 50 + 51 + fillContentFromCache(content: ContentCaption): boolean { 52 + const text: string | undefined = this.contentCache.get(content.text); 53 + if (text) { 54 + content.text = text; 55 + return true; 56 + } 57 + return false; 58 + } 59 + 60 + async translateContent(content: ContentCaption): Promise<boolean> { 61 + let result; 62 + let attempts = 0; 63 + const errors: any[] = []; 64 + 65 + while (!result && attempts < 3) { 66 + try { 67 + result = await this.service.translate(content.text, this.targetLang); 68 + } catch (err) { 69 + console.warn("Translation attempt failed"); 70 + errors.push(err); 71 + await sleep(500); 72 + attempts += 1; 73 + } 74 + } 75 + 76 + if (!result) { 77 + console.warn("Translation failed", errors); 78 + return false; 79 + } 80 + 81 + content.text = result; 82 + this.contentCache.set(content.text, result); 83 + return true; 84 + } 85 + 86 + async translateContentBatch(batch: ContentCaption[]): Promise<boolean> { 87 + try { 88 + const result = await this.service.translateMulti( 89 + batch.map((content) => content.text), 90 + this.targetLang, 91 + ); 92 + 93 + if (result.length !== batch.length) { 94 + console.warn( 95 + "Batch translation size mismatch", 96 + result.length, 97 + batch.length, 98 + ); 99 + return false; 100 + } 101 + 102 + for (let i = 0; i < batch.length; i += 1) { 103 + batch[i].text = result[i]; 104 + this.contentCache.set(batch[i].text, result[i]); 105 + } 106 + 107 + return true; 108 + } catch (err) { 109 + console.warn("Batch translation failed", err); 110 + return false; 111 + } 112 + } 113 + 114 + takeBatch(): ContentCaption[] { 115 + const batch: ContentCaption[] = []; 116 + const batchSize = 117 + this.service.getConfig().multiBatchSize === -1 118 + ? this.service.getConfig().singleBatchSize 119 + : this.service.getConfig().multiBatchSize; 120 + 121 + let count = 0; 122 + while (count < batchSize && this.contentCaptions.length > 0) { 123 + const content: ContentCaption = this.contentCaptions.shift()!; 124 + if (this.fillContentFromCache(content)) { 125 + continue; 126 + } 127 + batch.push(content); 128 + count += 1; 129 + } 130 + 131 + return batch; 132 + } 133 + 134 + async translate(): Promise<string | undefined> { 135 + let batch: ContentCaption[] = this.takeBatch(); 136 + while (batch.length > 0) { 137 + let result: boolean; 138 + console.info("Translating captions batch", batch.length, batch); 139 + 140 + if (this.service.getConfig().multiBatchSize === -1) { 141 + result = ( 142 + await Promise.all( 143 + batch.map((content) => this.translateContent(content)), 144 + ) 145 + ).every((res) => res); 146 + } else { 147 + result = await this.translateContentBatch(batch); 148 + } 149 + 150 + if (!result) { 151 + console.error( 152 + "Failed to translate captions batch", 153 + batch.length, 154 + batch, 155 + ); 156 + return undefined; 157 + } 158 + 159 + batch = this.takeBatch(); 160 + await sleep(this.service.getConfig().batchSleepMs); 161 + } 162 + return subsrt.build(this.captions, { format: "srt" }); 163 + } 164 + } 165 + 166 + export async function translate( 167 + caption: PlayerCaption, 168 + targetLang: string, 169 + service: TranslateService, 170 + ): Promise<string | undefined> { 171 + const cacheID = `${caption.id}_${targetLang}`; 172 + 173 + const cachedData: ArrayBuffer | undefined = CAPTIONS_CACHE.get(cacheID); 174 + if (cachedData) { 175 + return decompressStr(cachedData); 176 + } 177 + 178 + const translator = new Translator(caption.srtData, targetLang, service); 179 + 180 + const result = await translator.translate(); 181 + if (!result) { 182 + return undefined; 183 + } 184 + 185 + CAPTIONS_CACHE.set(cacheID, await compressStr(result)); 186 + return result; 187 + }
+24
src/utils/translation/utils.ts
··· 1 + export async function compressStr(string: string): Promise<ArrayBuffer> { 2 + const byteArray = new TextEncoder().encode(string); 3 + const cs = new CompressionStream("deflate"); 4 + const writer = cs.writable.getWriter(); 5 + writer.write(byteArray); 6 + writer.close(); 7 + return new Response(cs.readable).arrayBuffer(); 8 + } 9 + 10 + export async function decompressStr(byteArray: ArrayBuffer): Promise<string> { 11 + const cs = new DecompressionStream("deflate"); 12 + const writer = cs.writable.getWriter(); 13 + writer.write(byteArray); 14 + writer.close(); 15 + return new Response(cs.readable).arrayBuffer().then((arrayBuffer) => { 16 + return new TextDecoder().decode(arrayBuffer); 17 + }); 18 + } 19 + 20 + export function sleep(ms: number): Promise<void> { 21 + return new Promise((resolve) => { 22 + setTimeout(resolve, ms); 23 + }); 24 + }