A browser extension that lets you summarize any webpage and ask questions using AI.
1// Background script - handles API communication
2// Uses centralized CONFIG from config.js
3
4// Chrome MV3: service worker — load config via importScripts.
5// Firefox MV3: event-page scripts — config.js is listed first in manifest "scripts".
6if (typeof importScripts === "function") {
7 importScripts("config.js");
8}
9
10// Cache key prefixes from CONFIG
11const QUICK_SUMMARY_CACHE_PREFIX = CONFIG.CACHE.QUICK_SUMMARY;
12const DETAILED_SUMMARY_CACHE_PREFIX = CONFIG.CACHE.DETAILED_SUMMARY;
13const CONTENT_CACHE_PREFIX = CONFIG.CACHE.CONTENT;
14const CHAT_CACHE_PREFIX = CONFIG.CACHE.CHAT;
15
16/**
17 * Per-tab inflight Ollama/API state: one slot for streaming, one for non-stream (chat/suggestions).
18 * - New stream aborts everything (user superseded or regenerate).
19 * - New non-stream only aborts prior non-stream, never an active stream (avoids races with post-stream work).
20 */
21const inflightByTab = new Map();
22
23function abortAllInflightForTab(tabId) {
24 if (tabId == null) return;
25 const e = inflightByTab.get(tabId);
26 if (!e) return;
27 if (e.stream) e.stream.abort();
28 if (e.other) e.other.abort();
29 inflightByTab.delete(tabId);
30}
31
32function takeStreamSlot(tabId) {
33 if (tabId == null) return new AbortController();
34 abortAllInflightForTab(tabId);
35 const c = new AbortController();
36 inflightByTab.set(tabId, { stream: c });
37 return c;
38}
39
40function releaseStreamSlot(tabId, controller) {
41 if (tabId == null) return;
42 const e = inflightByTab.get(tabId);
43 if (e && e.stream === controller) {
44 if (e.other) {
45 inflightByTab.set(tabId, { other: e.other });
46 } else {
47 inflightByTab.delete(tabId);
48 }
49 }
50}
51
52function takeOtherSlot(tabId) {
53 if (tabId == null) return new AbortController();
54 const e = inflightByTab.get(tabId) || {};
55 if (e.other) e.other.abort();
56 const c = new AbortController();
57 inflightByTab.set(tabId, { ...e, other: c });
58 return c;
59}
60
61function releaseOtherSlot(tabId, controller) {
62 if (tabId == null) return;
63 const e = inflightByTab.get(tabId);
64 if (e && e.other === controller) {
65 if (e.stream) {
66 inflightByTab.set(tabId, { stream: e.stream });
67 } else {
68 inflightByTab.delete(tabId);
69 }
70 }
71}
72
73/** Coalesce tiny stream tokens into fewer runtime messages (easier on SW + popup; less backpressure). */
74const CHUNK_BATCH_MAX_CHARS = 512;
75const CHUNK_BATCH_MS = 20;
76
77let streamChunkBuffer = "";
78let streamChunkTimer = null;
79
80function resetStreamChunkBatching() {
81 if (streamChunkTimer) {
82 clearTimeout(streamChunkTimer);
83 streamChunkTimer = null;
84 }
85 streamChunkBuffer = "";
86}
87
88function flushStreamChunks() {
89 streamChunkTimer = null;
90 if (!streamChunkBuffer) return;
91 const chunk = streamChunkBuffer;
92 streamChunkBuffer = "";
93 chrome.runtime
94 .sendMessage({ action: "streamChunk", chunk, done: false })
95 .catch(() => {});
96}
97
98function queueStreamChunk(piece) {
99 streamChunkBuffer += piece;
100 if (streamChunkBuffer.length >= CHUNK_BATCH_MAX_CHARS) {
101 if (streamChunkTimer) {
102 clearTimeout(streamChunkTimer);
103 streamChunkTimer = null;
104 }
105 flushStreamChunks();
106 } else if (!streamChunkTimer) {
107 streamChunkTimer = setTimeout(flushStreamChunks, CHUNK_BATCH_MS);
108 }
109}
110
111function finalizeStreamChunkBatching() {
112 if (streamChunkTimer) {
113 clearTimeout(streamChunkTimer);
114 streamChunkTimer = null;
115 }
116 flushStreamChunks();
117}
118
119/** Ollama /api/generate keep_alive — null means omit so the server uses its default. */
120function normalizeOllamaKeepAlive(keepAlive) {
121 if (keepAlive == null) return null;
122 const s = String(keepAlive).trim();
123 return s || null;
124}
125
126function attachOllamaKeepAlive(requestBody, keepAlive) {
127 const v = normalizeOllamaKeepAlive(keepAlive);
128 if (v) requestBody.keep_alive = v;
129}
130
131// ── Prompt templates from CONFIG ─────────────────────────────────────────
132const OLLAMA_CONTEXT_TEMPLATE = CONFIG.OLLAMA.CONTEXT_TEMPLATE;
133
134chrome.runtime.onInstalled.addListener(() => {
135 // Set default settings only if they don't already exist
136 chrome.storage.sync.get(["apiMode"]).then((result) => {
137 if (!result.apiMode) {
138 // Settings don't exist yet, set defaults from CONFIG
139 chrome.storage.sync.set({
140 apiMode: CONFIG.API.MODE,
141 apiBaseUrl: CONFIG.API.BASE_URL,
142 model: CONFIG.API.MODEL,
143 apiKey: CONFIG.API.KEY,
144 disableThinking: CONFIG.API.DISABLE_THINKING,
145 autoSummarize: CONFIG.API.AUTO_SUMMARIZE,
146 keepAlive: CONFIG.API.KEEP_ALIVE,
147 });
148 }
149 });
150
151 // Create context menu item
152 chrome.contextMenus.create({
153 id: "summarize-page",
154 title: "Open Lede for this page",
155 contexts: ["page", "selection"],
156 });
157});
158
159// Clear cache when a tab is closed
160chrome.tabs.onRemoved.addListener((tabId) => {
161 clearTabCache(tabId);
162});
163
164// Clear cache when a tab navigates to a new URL
165chrome.tabs.onUpdated.addListener((tabId, changeInfo, tab) => {
166 if (changeInfo.url) {
167 // URL changed, clear the cache for this tab
168 clearTabCache(tabId);
169 }
170});
171
172async function clearTabCache(tabId) {
173 try {
174 await chrome.storage.session.remove([
175 QUICK_SUMMARY_CACHE_PREFIX + tabId,
176 DETAILED_SUMMARY_CACHE_PREFIX + tabId,
177 CONTENT_CACHE_PREFIX + tabId,
178 CHAT_CACHE_PREFIX + tabId,
179 CONFIG.CACHE.SUGGESTIONS + tabId,
180 ]);
181 } catch (e) {
182 console.error("[Lede] Error clearing cache:", e);
183 }
184}
185
186// Handle context menu clicks
187chrome.contextMenus.onClicked.addListener((info, tab) => {
188 if (info.menuItemId === "summarize-page") {
189 triggerSummarizeForTab(tab.id);
190 }
191});
192
193// Handle keyboard shortcut (manifest command + optional legacy alias)
194chrome.commands.onCommand.addListener((command) => {
195 if (command === "summarize-page" || command === "open-summarizer") {
196 chrome.tabs.query({ active: true, currentWindow: true }).then((tabs) => {
197 if (tabs[0]) {
198 triggerSummarizeForTab(tabs[0].id);
199 }
200 });
201 }
202});
203
204function triggerSummarizeForTab(tabId) {
205 // Store a flag to trigger summarize when popup opens
206 chrome.storage.session.set({ triggerSummarize: true, targetTabId: tabId });
207
208 // Firefox: Create a popup window
209 // Chrome: Use action.openPopup() for toolbar popup
210 if (typeof browser !== "undefined") {
211 // Firefox: Create a popup window matching the UI size (extra height for browser chrome)
212 chrome.windows.create({
213 url: chrome.runtime.getURL("popup/popup.html"),
214 type: "popup",
215 width: 400,
216 height: 600,
217 focused: true,
218 });
219 } else {
220 // Chrome: Programmatically open the popup
221 chrome.action.openPopup();
222 }
223}
224
225chrome.runtime.onMessage.addListener((request, sender, sendResponse) => {
226 if (request.action === "ping") {
227 sendResponse({ success: true, message: "pong" });
228 return true;
229 }
230
231 if (request.action === "extractPageContent") {
232 extractPageContentForTab(request.tabId)
233 .then((result) => sendResponse({ success: true, ...result }))
234 .catch((error) => {
235 sendResponse({
236 success: false,
237 error: error.message || "Failed to extract page content",
238 });
239 });
240 return true;
241 }
242
243 if (request.action === "chat") {
244 handleChatRequest(request.data)
245 .then((response) => {
246 sendResponse({ success: true, data: response });
247 })
248 .catch((error) => {
249 console.error("Background script error:", error);
250 sendResponse({
251 success: false,
252 error: error.message || "Unknown error occurred",
253 });
254 });
255 return true; // Keep channel open for async
256 }
257
258 if (request.action === "streamChat") {
259 const { tabId } = request;
260 handleStreamChatRequest(request.data, tabId).catch((error) => {
261 console.error("Stream chat error:", error);
262 sendStreamDoneToExtension({ error: error.message });
263 });
264 return false; // Popup receives streamChunk/streamDone via runtime messages
265 }
266
267 if (request.action === "cancelStream") {
268 const tabId = request.tabId;
269 if (tabId != null) {
270 abortAllInflightForTab(tabId);
271 }
272 return false;
273 }
274
275 if (request.action === "testOllama") {
276 testOllamaConnection()
277 .then(() => sendResponse({ success: true }))
278 .catch((err) => sendResponse({ success: false, error: err.message }));
279 return true;
280 }
281});
282
283function isRestrictedUrl(url) {
284 return (
285 !url ||
286 url.startsWith("chrome://") ||
287 url.startsWith("chrome-extension://") ||
288 url.startsWith("edge://") ||
289 url.startsWith("about:") ||
290 url.startsWith("moz-extension://") ||
291 url.startsWith("resource://")
292 );
293}
294
295async function extractPageContentForTab(tabId) {
296 if (!tabId) {
297 return { content: "", wasTruncated: false };
298 }
299
300 const tab = await chrome.tabs.get(tabId);
301 if (isRestrictedUrl(tab.url)) {
302 return { content: "", wasTruncated: false };
303 }
304
305 // Readability must load before content.js (content.js is not bundled with it).
306 // reddit-new.js registers window.__webaiTryRedditNew for new-Reddit thread extraction.
307 await chrome.scripting.executeScript({
308 target: { tabId },
309 files: ["scripts/Readability.js", "scripts/reddit-new.js", "scripts/content.js"],
310 });
311
312 const response = await chrome.tabs.sendMessage(tabId, { action: "extract" });
313 if (!response) {
314 return { content: "", wasTruncated: false };
315 }
316
317 return {
318 content: response.content ?? "",
319 wasTruncated: Boolean(response.wasTruncated),
320 extractionSource: response.extractionSource ?? "unknown",
321 unsupportedReason: response.unsupportedReason ?? null,
322 };
323}
324
325async function testOllamaConnection() {
326 const response = await fetch("http://localhost:11434/api/tags");
327 if (!response.ok) {
328 throw new Error(`HTTP ${response.status}`);
329 }
330 const data = await response.json();
331 return data;
332}
333
334/**
335 * OpenAI-compatible APIs differ: `message.content` may be a string, a parts array (Responses / some
336 * gateways), or text may appear on `choices[0].text` or `output_text`.
337 */
338function normalizeMessageContent(content) {
339 if (content == null) return "";
340 if (typeof content === "string") return content;
341 if (Array.isArray(content)) {
342 let out = "";
343 for (const part of content) {
344 if (typeof part === "string") {
345 out += part;
346 } else if (part && typeof part === "object") {
347 if (typeof part.text === "string") out += part.text;
348 else if (typeof part.content === "string") out += part.content;
349 }
350 }
351 return out;
352 }
353 return "";
354}
355
356function extractOpenAIChatCompletionText(data) {
357 if (!data || typeof data !== "object") return "";
358 const c0 = data.choices?.[0];
359 if (!c0) return "";
360 const fromMsg = normalizeMessageContent(c0.message?.content);
361 if (fromMsg) return fromMsg;
362 if (typeof c0.text === "string" && c0.text) return c0.text;
363 if (typeof data.output_text === "string" && data.output_text) return data.output_text;
364 return "";
365}
366
367function normalizeOpenAIChatResponse(data) {
368 const merged = extractOpenAIChatCompletionText(data);
369 if (!merged || !data?.choices?.[0]) return data;
370 const first = data.choices[0];
371 return {
372 ...data,
373 choices: [
374 {
375 ...first,
376 message: {
377 ...(first.message || {}),
378 role: first.message?.role || "assistant",
379 content: merged,
380 },
381 },
382 ...data.choices.slice(1),
383 ],
384 };
385}
386
387/** Extract one streaming text fragment from an SSE JSON payload (delta.content string or parts array). */
388function extractOpenAIStreamDeltaChunk(parsed) {
389 if (!parsed || typeof parsed !== "object") return "";
390 const c0 = parsed.choices?.[0];
391 if (!c0) return "";
392
393 const delta = c0.delta;
394 if (delta && typeof delta === "object") {
395 const fromDelta = normalizeMessageContent(delta.content);
396 if (fromDelta) return fromDelta;
397 if (typeof delta.reasoning_content === "string" && delta.reasoning_content) {
398 return delta.reasoning_content;
399 }
400 }
401
402 if (typeof c0.text === "string" && c0.text) return c0.text;
403
404 if (c0.message?.content) {
405 const t = normalizeMessageContent(c0.message.content);
406 if (t) return t;
407 }
408
409 return "";
410}
411
412/**
413 * Resolve API base URL to POST /.../chat/completions once. Avoids broken URLs when the user pastes
414 * a full endpoint, uses Azure (?api-version=…), or Google-style /v1beta/openai (substring "v1"
415 * must not trigger a bogus extra /v1 segment).
416 */
417function resolveOpenAICompatChatUrl(raw) {
418 const trimmed = String(raw || "").trim();
419 if (!trimmed) return trimmed;
420 try {
421 const u = new URL(trimmed);
422 let path = (u.pathname || "").replace(/\/+$/, "") || "";
423
424 if (/\/chat\/completions$/i.test(path)) {
425 return u.toString();
426 }
427
428 if (/\/openai\/deployments\/[^/]+$/i.test(path)) {
429 u.pathname = `${path}/chat/completions`;
430 return u.toString();
431 }
432
433 if (/\/v1$/i.test(path)) {
434 u.pathname = `${path}/chat/completions`;
435 return u.toString();
436 }
437
438 if (/\/v1beta\/openai$/i.test(path)) {
439 u.pathname = `${path}/chat/completions`;
440 return u.toString();
441 }
442
443 if (!/\bv1\b/i.test(path)) {
444 const base = path || "";
445 u.pathname = `${base}/v1/chat/completions`.replace(/\/{2,}/g, "/");
446 return u.toString();
447 }
448
449 u.pathname = `${path}/chat/completions`.replace(/\/{2,}/g, "/");
450 return u.toString();
451 } catch {
452 return trimmed;
453 }
454}
455
456function openAICompatFetchHeaders(apiKey, urlString) {
457 const headers = { "Content-Type": "application/json" };
458 if (!apiKey) return headers;
459 try {
460 const host = new URL(urlString).hostname;
461 if (/\.openai\.azure\.com$/i.test(host)) {
462 headers["api-key"] = apiKey;
463 } else {
464 headers.Authorization = `Bearer ${apiKey}`;
465 }
466 } catch {
467 headers.Authorization = `Bearer ${apiKey}`;
468 }
469 return headers;
470}
471
472async function handleChatRequest(data) {
473 const {
474 tabId,
475 apiBaseUrl,
476 model,
477 apiKey,
478 messages,
479 apiMode,
480 disableThinking,
481 maxOutputTokens,
482 keepAlive,
483 } = data;
484
485 const tokenCap =
486 typeof maxOutputTokens === "number" && maxOutputTokens > 0
487 ? maxOutputTokens
488 : CONFIG.API.MAX_TOKENS;
489
490 const controller = takeOtherSlot(tabId);
491 const signal = controller.signal;
492
493 try {
494 const useNativeOllama = apiMode === "ollama";
495
496 if (useNativeOllama) {
497 return await callOllamaNative(
498 apiBaseUrl,
499 model,
500 messages,
501 disableThinking,
502 tokenCap,
503 signal,
504 keepAlive,
505 );
506 } else {
507 const raw = await callOpenAICompatible(
508 apiBaseUrl,
509 model,
510 apiKey,
511 messages,
512 tokenCap,
513 signal,
514 );
515 return normalizeOpenAIChatResponse(raw);
516 }
517 } catch (error) {
518 if (error.name === "AbortError" || signal.aborted) {
519 throw new Error("Request cancelled");
520 }
521 throw error;
522 } finally {
523 releaseOtherSlot(tabId, controller);
524 }
525}
526
527async function handleStreamChatRequest(data, tabId) {
528 const {
529 apiBaseUrl,
530 model,
531 apiKey,
532 messages,
533 apiMode,
534 disableThinking,
535 keepAlive,
536 } = data;
537
538 const controller = takeStreamSlot(tabId);
539 const signal = controller.signal;
540
541 try {
542 const useNativeOllama = apiMode === "ollama";
543
544 if (useNativeOllama) {
545 await callOllamaNativeStream(
546 apiBaseUrl,
547 model,
548 messages,
549 disableThinking,
550 signal,
551 keepAlive,
552 );
553 } else {
554 await callOpenAICompatibleStream(
555 apiBaseUrl,
556 model,
557 apiKey,
558 messages,
559 signal,
560 );
561 }
562 } finally {
563 releaseStreamSlot(tabId, controller);
564 }
565}
566
567async function callOllamaNative(
568 baseUrl,
569 model,
570 messages,
571 disableThinking,
572 maxTokens = CONFIG.API.MAX_TOKENS,
573 signal,
574 keepAlive,
575) {
576 // Merge all system messages into one so none are dropped
577 const systemMsgs = messages.filter((m) => m.role === "system");
578 const systemContent = systemMsgs.map((m) => m.content).join("\n\n");
579 const otherMessages = messages.filter((m) => m.role !== "system");
580 const lastUserMsg = otherMessages.filter((m) => m.role === "user").pop();
581
582 // Build conversation context
583 let prompt;
584 if (otherMessages.length > 1) {
585 const context = otherMessages
586 .slice(0, -1)
587 .map((m) => `${m.role}: ${m.content}`)
588 .join("\n");
589 prompt = OLLAMA_CONTEXT_TEMPLATE.replace("${context}", context).replace(
590 "${userMessage}",
591 lastUserMsg?.content || "",
592 );
593 } else {
594 prompt = lastUserMsg?.content || "";
595 }
596
597 const url = baseUrl.replace(/\/$/, "") + "/api/generate";
598
599 // Build request body
600 const requestBody = {
601 model: model,
602 prompt: prompt,
603 system: systemContent,
604 stream: false,
605 options: {
606 temperature: CONFIG.API.TEMPERATURE,
607 num_predict: maxTokens,
608 },
609 };
610
611 // Only include think: false when user explicitly disables thinking
612 // (thinking is enabled by default in Ollama, and not all models support it)
613 if (disableThinking === true) {
614 requestBody.think = false;
615 }
616
617 attachOllamaKeepAlive(requestBody, keepAlive);
618
619 const fetchOpts = {
620 method: "POST",
621 headers: {
622 "Content-Type": "application/json",
623 Connection: "close",
624 },
625 body: JSON.stringify(requestBody),
626 };
627 if (signal) fetchOpts.signal = signal;
628
629 const response = await fetch(url, fetchOpts);
630
631 if (!response.ok) {
632 const text = await response.text();
633 let errorMsg = `HTTP ${response.status}`;
634 if (response.status === 403) {
635 errorMsg =
636 "403 Forbidden. Ollama is rejecting the request origin. Fix: restart Ollama with OLLAMA_ORIGINS=* (e.g. OLLAMA_ORIGINS=* ollama serve).";
637 } else {
638 try {
639 const err = JSON.parse(text);
640 errorMsg = err.error || err.message || errorMsg;
641 } catch (e) {
642 errorMsg = text || errorMsg;
643 }
644 }
645 throw new Error(errorMsg);
646 }
647
648 const data = await response.json();
649
650 return {
651 choices: [
652 {
653 message: {
654 role: "assistant",
655 content: data.response,
656 },
657 },
658 ],
659 model: model,
660 };
661}
662
663async function callOpenAICompatible(
664 baseUrl,
665 model,
666 apiKey,
667 messages,
668 maxTokens = CONFIG.API.MAX_TOKENS,
669 signal,
670) {
671 const url = resolveOpenAICompatChatUrl(baseUrl);
672
673 const fetchOpts = {
674 method: "POST",
675 headers: openAICompatFetchHeaders(apiKey, url),
676 body: JSON.stringify({
677 model: model,
678 messages: messages,
679 stream: false,
680 max_tokens: maxTokens,
681 }),
682 };
683 if (signal) fetchOpts.signal = signal;
684
685 const response = await fetch(url, fetchOpts);
686
687 if (!response.ok) {
688 const text = await response.text();
689 let errorMsg = `HTTP ${response.status}`;
690
691 if (response.status === 403) {
692 if (url.includes("/v1")) {
693 errorMsg =
694 "403 Forbidden. This often means: invalid API key, API key lacks permissions, or the server rejected the request origin.";
695 } else {
696 errorMsg =
697 "403 Forbidden. If using Ollama, ensure it's running with: ollama serve";
698 }
699 } else if (response.status === 405) {
700 errorMsg =
701 "405 Method not allowed. Check if the API URL is correct for your API mode (Native vs OpenAI-compatible).";
702 } else {
703 try {
704 const err = JSON.parse(text);
705 errorMsg = err.error?.message || err.message || errorMsg;
706 } catch (e) {
707 errorMsg = text || errorMsg;
708 }
709 }
710
711 throw new Error(errorMsg);
712 }
713
714 return await response.json();
715}
716
717function sendStreamDoneToExtension(payload = {}) {
718 if (payload.error) {
719 resetStreamChunkBatching();
720 } else {
721 finalizeStreamChunkBatching();
722 }
723 chrome.runtime.sendMessage({ action: "streamDone", ...payload }).catch(() => {});
724}
725
726async function callOllamaNativeStream(
727 baseUrl,
728 model,
729 messages,
730 disableThinking,
731 signal,
732 keepAlive,
733) {
734 const systemMsgs = messages.filter((m) => m.role === "system");
735 const systemContent = systemMsgs.map((m) => m.content).join("\n\n");
736 const otherMessages = messages.filter((m) => m.role !== "system");
737 const lastUserMsg = otherMessages.filter((m) => m.role === "user").pop();
738
739 let prompt;
740 if (otherMessages.length > 1) {
741 const context = otherMessages
742 .slice(0, -1)
743 .map((m) => `${m.role}: ${m.content}`)
744 .join("\n");
745 prompt = OLLAMA_CONTEXT_TEMPLATE.replace("${context}", context).replace(
746 "${userMessage}",
747 lastUserMsg?.content || "",
748 );
749 } else {
750 prompt = lastUserMsg?.content || "";
751 }
752
753 const url = baseUrl.replace(/\/$/, "") + "/api/generate";
754
755 const requestBody = {
756 model: model,
757 prompt: prompt,
758 system: systemContent,
759 stream: true,
760 options: {
761 temperature: CONFIG.API.TEMPERATURE,
762 num_predict: CONFIG.API.MAX_TOKENS,
763 },
764 };
765
766 if (disableThinking === true) {
767 requestBody.think = false;
768 }
769
770 attachOllamaKeepAlive(requestBody, keepAlive);
771
772 resetStreamChunkBatching();
773 let hitMaxChars = false;
774
775 try {
776 const response = await fetch(url, {
777 method: "POST",
778 headers: {
779 "Content-Type": "application/json",
780 Connection: "close",
781 },
782 body: JSON.stringify(requestBody),
783 signal,
784 });
785
786 if (!response.ok) {
787 const text = await response.text();
788 let errorMsg = `HTTP ${response.status}`;
789 if (response.status === 403) {
790 errorMsg =
791 "403 Forbidden. Ollama is rejecting the request origin. Fix: restart Ollama with OLLAMA_ORIGINS=* (e.g. OLLAMA_ORIGINS=* ollama serve).";
792 } else {
793 try {
794 const err = JSON.parse(text);
795 errorMsg = err.error || err.message || errorMsg;
796 } catch (e) {
797 errorMsg = text || errorMsg;
798 }
799 }
800 throw new Error(errorMsg);
801 }
802
803 const reader = response.body.getReader();
804 try {
805 const decoder = new TextDecoder();
806 let buffer = "";
807 let streamedChars = 0;
808 const maxChars = CONFIG.API.STREAM_MAX_OUTPUT_CHARS;
809
810 while (true) {
811 let readResult;
812 try {
813 readResult = await reader.read();
814 } catch (readErr) {
815 if (readErr.name === "AbortError" || signal.aborted) {
816 sendStreamDoneToExtension({ cancelled: true });
817 return;
818 }
819 throw readErr;
820 }
821
822 const { done, value } = readResult;
823 if (done) break;
824
825 buffer += decoder.decode(value, { stream: true });
826 const lines = buffer.split("\n");
827 buffer = lines.pop() || "";
828
829 for (const line of lines) {
830 if (line.trim()) {
831 try {
832 const json = JSON.parse(line);
833 if (json.response) {
834 const piece = json.response;
835 if (streamedChars + piece.length > maxChars) {
836 hitMaxChars = true;
837 reader.cancel().catch(() => {});
838 break;
839 }
840 streamedChars += piece.length;
841 queueStreamChunk(piece);
842 }
843 } catch (e) {
844 // Skip invalid JSON lines
845 }
846 }
847 }
848
849 if (hitMaxChars) break;
850 }
851
852 if (hitMaxChars) {
853 sendStreamDoneToExtension({ truncated: true });
854 } else {
855 sendStreamDoneToExtension();
856 }
857 } finally {
858 try {
859 await reader.cancel();
860 } catch (e) {
861 /* stream may already be closed */
862 }
863 }
864 } catch (error) {
865 if (error.name === "AbortError" || signal.aborted) {
866 sendStreamDoneToExtension({ cancelled: true });
867 return;
868 }
869 sendStreamDoneToExtension({ error: error.message });
870 }
871}
872
873async function callOpenAICompatibleStream(
874 baseUrl,
875 model,
876 apiKey,
877 messages,
878 signal,
879) {
880 const url = resolveOpenAICompatChatUrl(baseUrl);
881
882 resetStreamChunkBatching();
883 let hitMaxChars = false;
884
885 try {
886 const response = await fetch(url, {
887 method: "POST",
888 headers: openAICompatFetchHeaders(apiKey, url),
889 body: JSON.stringify({
890 model: model,
891 messages: messages,
892 stream: true,
893 max_tokens: CONFIG.API.MAX_TOKENS,
894 }),
895 signal,
896 });
897
898 if (!response.ok) {
899 const text = await response.text();
900 let errorMsg = `HTTP ${response.status}`;
901
902 if (response.status === 403) {
903 if (url.includes("/v1")) {
904 errorMsg =
905 "403 Forbidden. This often means: invalid API key, API key lacks permissions, or the server rejected the request origin.";
906 } else {
907 errorMsg =
908 "403 Forbidden. If using Ollama, ensure it's running with: ollama serve";
909 }
910 } else if (response.status === 405) {
911 errorMsg =
912 "405 Method not allowed. Check if the API URL is correct for your API mode (Native vs OpenAI-compatible).";
913 } else {
914 try {
915 const err = JSON.parse(text);
916 errorMsg = err.error?.message || err.message || errorMsg;
917 } catch (e) {
918 errorMsg = text || errorMsg;
919 }
920 }
921 throw new Error(errorMsg);
922 }
923
924 const reader = response.body.getReader();
925 try {
926 const decoder = new TextDecoder();
927 let buffer = "";
928 let streamedChars = 0;
929 const maxChars = CONFIG.API.STREAM_MAX_OUTPUT_CHARS;
930
931 while (true) {
932 let readResult;
933 try {
934 readResult = await reader.read();
935 } catch (readErr) {
936 if (readErr.name === "AbortError" || signal.aborted) {
937 sendStreamDoneToExtension({ cancelled: true });
938 return;
939 }
940 throw readErr;
941 }
942
943 const { done, value } = readResult;
944 if (done) break;
945
946 buffer += decoder.decode(value, { stream: true });
947 const lines = buffer.split("\n");
948 buffer = lines.pop() || "";
949
950 for (const line of lines) {
951 const trimmed = line.trim();
952 if (!trimmed) continue;
953 let payload = null;
954 if (trimmed.startsWith("data:")) {
955 payload = trimmed.slice(5).replace(/^\uFEFF/, "").trimStart();
956 if (payload === "[DONE]") continue;
957 } else if (trimmed.startsWith("{") && /"choices"\s*:/i.test(trimmed)) {
958 // Some OpenAI-compatible proxies stream NDJSON without a `data:` prefix.
959 payload = trimmed;
960 } else {
961 continue;
962 }
963 try {
964 const json = JSON.parse(payload);
965 const content = extractOpenAIStreamDeltaChunk(json);
966 if (content) {
967 if (streamedChars + content.length > maxChars) {
968 hitMaxChars = true;
969 reader.cancel().catch(() => {});
970 break;
971 }
972 streamedChars += content.length;
973 queueStreamChunk(content);
974 }
975 } catch (e) {
976 // Skip invalid JSON lines
977 }
978 }
979
980 if (hitMaxChars) break;
981 }
982
983 if (hitMaxChars) {
984 sendStreamDoneToExtension({ truncated: true });
985 } else {
986 sendStreamDoneToExtension();
987 }
988 } finally {
989 try {
990 await reader.cancel();
991 } catch (e) {
992 /* stream may already be closed */
993 }
994 }
995 } catch (error) {
996 if (error.name === "AbortError" || signal.aborted) {
997 sendStreamDoneToExtension({ cancelled: true });
998 return;
999 }
1000 sendStreamDoneToExtension({ error: error.message });
1001 }
1002}