A browser extension that lets you summarize any webpage and ask questions using AI.
at main 1002 lines 27 kB view raw
1// Background script - handles API communication 2// Uses centralized CONFIG from config.js 3 4// Chrome MV3: service worker — load config via importScripts. 5// Firefox MV3: event-page scripts — config.js is listed first in manifest "scripts". 6if (typeof importScripts === "function") { 7 importScripts("config.js"); 8} 9 10// Cache key prefixes from CONFIG 11const QUICK_SUMMARY_CACHE_PREFIX = CONFIG.CACHE.QUICK_SUMMARY; 12const DETAILED_SUMMARY_CACHE_PREFIX = CONFIG.CACHE.DETAILED_SUMMARY; 13const CONTENT_CACHE_PREFIX = CONFIG.CACHE.CONTENT; 14const CHAT_CACHE_PREFIX = CONFIG.CACHE.CHAT; 15 16/** 17 * Per-tab inflight Ollama/API state: one slot for streaming, one for non-stream (chat/suggestions). 18 * - New stream aborts everything (user superseded or regenerate). 19 * - New non-stream only aborts prior non-stream, never an active stream (avoids races with post-stream work). 20 */ 21const inflightByTab = new Map(); 22 23function abortAllInflightForTab(tabId) { 24 if (tabId == null) return; 25 const e = inflightByTab.get(tabId); 26 if (!e) return; 27 if (e.stream) e.stream.abort(); 28 if (e.other) e.other.abort(); 29 inflightByTab.delete(tabId); 30} 31 32function takeStreamSlot(tabId) { 33 if (tabId == null) return new AbortController(); 34 abortAllInflightForTab(tabId); 35 const c = new AbortController(); 36 inflightByTab.set(tabId, { stream: c }); 37 return c; 38} 39 40function releaseStreamSlot(tabId, controller) { 41 if (tabId == null) return; 42 const e = inflightByTab.get(tabId); 43 if (e && e.stream === controller) { 44 if (e.other) { 45 inflightByTab.set(tabId, { other: e.other }); 46 } else { 47 inflightByTab.delete(tabId); 48 } 49 } 50} 51 52function takeOtherSlot(tabId) { 53 if (tabId == null) return new AbortController(); 54 const e = inflightByTab.get(tabId) || {}; 55 if (e.other) e.other.abort(); 56 const c = new AbortController(); 57 inflightByTab.set(tabId, { ...e, other: c }); 58 return c; 59} 60 61function releaseOtherSlot(tabId, controller) { 62 if (tabId == null) return; 63 const e = inflightByTab.get(tabId); 64 if (e && e.other === controller) { 65 if (e.stream) { 66 inflightByTab.set(tabId, { stream: e.stream }); 67 } else { 68 inflightByTab.delete(tabId); 69 } 70 } 71} 72 73/** Coalesce tiny stream tokens into fewer runtime messages (easier on SW + popup; less backpressure). */ 74const CHUNK_BATCH_MAX_CHARS = 512; 75const CHUNK_BATCH_MS = 20; 76 77let streamChunkBuffer = ""; 78let streamChunkTimer = null; 79 80function resetStreamChunkBatching() { 81 if (streamChunkTimer) { 82 clearTimeout(streamChunkTimer); 83 streamChunkTimer = null; 84 } 85 streamChunkBuffer = ""; 86} 87 88function flushStreamChunks() { 89 streamChunkTimer = null; 90 if (!streamChunkBuffer) return; 91 const chunk = streamChunkBuffer; 92 streamChunkBuffer = ""; 93 chrome.runtime 94 .sendMessage({ action: "streamChunk", chunk, done: false }) 95 .catch(() => {}); 96} 97 98function queueStreamChunk(piece) { 99 streamChunkBuffer += piece; 100 if (streamChunkBuffer.length >= CHUNK_BATCH_MAX_CHARS) { 101 if (streamChunkTimer) { 102 clearTimeout(streamChunkTimer); 103 streamChunkTimer = null; 104 } 105 flushStreamChunks(); 106 } else if (!streamChunkTimer) { 107 streamChunkTimer = setTimeout(flushStreamChunks, CHUNK_BATCH_MS); 108 } 109} 110 111function finalizeStreamChunkBatching() { 112 if (streamChunkTimer) { 113 clearTimeout(streamChunkTimer); 114 streamChunkTimer = null; 115 } 116 flushStreamChunks(); 117} 118 119/** Ollama /api/generate keep_alive — null means omit so the server uses its default. */ 120function normalizeOllamaKeepAlive(keepAlive) { 121 if (keepAlive == null) return null; 122 const s = String(keepAlive).trim(); 123 return s || null; 124} 125 126function attachOllamaKeepAlive(requestBody, keepAlive) { 127 const v = normalizeOllamaKeepAlive(keepAlive); 128 if (v) requestBody.keep_alive = v; 129} 130 131// ── Prompt templates from CONFIG ───────────────────────────────────────── 132const OLLAMA_CONTEXT_TEMPLATE = CONFIG.OLLAMA.CONTEXT_TEMPLATE; 133 134chrome.runtime.onInstalled.addListener(() => { 135 // Set default settings only if they don't already exist 136 chrome.storage.sync.get(["apiMode"]).then((result) => { 137 if (!result.apiMode) { 138 // Settings don't exist yet, set defaults from CONFIG 139 chrome.storage.sync.set({ 140 apiMode: CONFIG.API.MODE, 141 apiBaseUrl: CONFIG.API.BASE_URL, 142 model: CONFIG.API.MODEL, 143 apiKey: CONFIG.API.KEY, 144 disableThinking: CONFIG.API.DISABLE_THINKING, 145 autoSummarize: CONFIG.API.AUTO_SUMMARIZE, 146 keepAlive: CONFIG.API.KEEP_ALIVE, 147 }); 148 } 149 }); 150 151 // Create context menu item 152 chrome.contextMenus.create({ 153 id: "summarize-page", 154 title: "Open Lede for this page", 155 contexts: ["page", "selection"], 156 }); 157}); 158 159// Clear cache when a tab is closed 160chrome.tabs.onRemoved.addListener((tabId) => { 161 clearTabCache(tabId); 162}); 163 164// Clear cache when a tab navigates to a new URL 165chrome.tabs.onUpdated.addListener((tabId, changeInfo, tab) => { 166 if (changeInfo.url) { 167 // URL changed, clear the cache for this tab 168 clearTabCache(tabId); 169 } 170}); 171 172async function clearTabCache(tabId) { 173 try { 174 await chrome.storage.session.remove([ 175 QUICK_SUMMARY_CACHE_PREFIX + tabId, 176 DETAILED_SUMMARY_CACHE_PREFIX + tabId, 177 CONTENT_CACHE_PREFIX + tabId, 178 CHAT_CACHE_PREFIX + tabId, 179 CONFIG.CACHE.SUGGESTIONS + tabId, 180 ]); 181 } catch (e) { 182 console.error("[Lede] Error clearing cache:", e); 183 } 184} 185 186// Handle context menu clicks 187chrome.contextMenus.onClicked.addListener((info, tab) => { 188 if (info.menuItemId === "summarize-page") { 189 triggerSummarizeForTab(tab.id); 190 } 191}); 192 193// Handle keyboard shortcut (manifest command + optional legacy alias) 194chrome.commands.onCommand.addListener((command) => { 195 if (command === "summarize-page" || command === "open-summarizer") { 196 chrome.tabs.query({ active: true, currentWindow: true }).then((tabs) => { 197 if (tabs[0]) { 198 triggerSummarizeForTab(tabs[0].id); 199 } 200 }); 201 } 202}); 203 204function triggerSummarizeForTab(tabId) { 205 // Store a flag to trigger summarize when popup opens 206 chrome.storage.session.set({ triggerSummarize: true, targetTabId: tabId }); 207 208 // Firefox: Create a popup window 209 // Chrome: Use action.openPopup() for toolbar popup 210 if (typeof browser !== "undefined") { 211 // Firefox: Create a popup window matching the UI size (extra height for browser chrome) 212 chrome.windows.create({ 213 url: chrome.runtime.getURL("popup/popup.html"), 214 type: "popup", 215 width: 400, 216 height: 600, 217 focused: true, 218 }); 219 } else { 220 // Chrome: Programmatically open the popup 221 chrome.action.openPopup(); 222 } 223} 224 225chrome.runtime.onMessage.addListener((request, sender, sendResponse) => { 226 if (request.action === "ping") { 227 sendResponse({ success: true, message: "pong" }); 228 return true; 229 } 230 231 if (request.action === "extractPageContent") { 232 extractPageContentForTab(request.tabId) 233 .then((result) => sendResponse({ success: true, ...result })) 234 .catch((error) => { 235 sendResponse({ 236 success: false, 237 error: error.message || "Failed to extract page content", 238 }); 239 }); 240 return true; 241 } 242 243 if (request.action === "chat") { 244 handleChatRequest(request.data) 245 .then((response) => { 246 sendResponse({ success: true, data: response }); 247 }) 248 .catch((error) => { 249 console.error("Background script error:", error); 250 sendResponse({ 251 success: false, 252 error: error.message || "Unknown error occurred", 253 }); 254 }); 255 return true; // Keep channel open for async 256 } 257 258 if (request.action === "streamChat") { 259 const { tabId } = request; 260 handleStreamChatRequest(request.data, tabId).catch((error) => { 261 console.error("Stream chat error:", error); 262 sendStreamDoneToExtension({ error: error.message }); 263 }); 264 return false; // Popup receives streamChunk/streamDone via runtime messages 265 } 266 267 if (request.action === "cancelStream") { 268 const tabId = request.tabId; 269 if (tabId != null) { 270 abortAllInflightForTab(tabId); 271 } 272 return false; 273 } 274 275 if (request.action === "testOllama") { 276 testOllamaConnection() 277 .then(() => sendResponse({ success: true })) 278 .catch((err) => sendResponse({ success: false, error: err.message })); 279 return true; 280 } 281}); 282 283function isRestrictedUrl(url) { 284 return ( 285 !url || 286 url.startsWith("chrome://") || 287 url.startsWith("chrome-extension://") || 288 url.startsWith("edge://") || 289 url.startsWith("about:") || 290 url.startsWith("moz-extension://") || 291 url.startsWith("resource://") 292 ); 293} 294 295async function extractPageContentForTab(tabId) { 296 if (!tabId) { 297 return { content: "", wasTruncated: false }; 298 } 299 300 const tab = await chrome.tabs.get(tabId); 301 if (isRestrictedUrl(tab.url)) { 302 return { content: "", wasTruncated: false }; 303 } 304 305 // Readability must load before content.js (content.js is not bundled with it). 306 // reddit-new.js registers window.__webaiTryRedditNew for new-Reddit thread extraction. 307 await chrome.scripting.executeScript({ 308 target: { tabId }, 309 files: ["scripts/Readability.js", "scripts/reddit-new.js", "scripts/content.js"], 310 }); 311 312 const response = await chrome.tabs.sendMessage(tabId, { action: "extract" }); 313 if (!response) { 314 return { content: "", wasTruncated: false }; 315 } 316 317 return { 318 content: response.content ?? "", 319 wasTruncated: Boolean(response.wasTruncated), 320 extractionSource: response.extractionSource ?? "unknown", 321 unsupportedReason: response.unsupportedReason ?? null, 322 }; 323} 324 325async function testOllamaConnection() { 326 const response = await fetch("http://localhost:11434/api/tags"); 327 if (!response.ok) { 328 throw new Error(`HTTP ${response.status}`); 329 } 330 const data = await response.json(); 331 return data; 332} 333 334/** 335 * OpenAI-compatible APIs differ: `message.content` may be a string, a parts array (Responses / some 336 * gateways), or text may appear on `choices[0].text` or `output_text`. 337 */ 338function normalizeMessageContent(content) { 339 if (content == null) return ""; 340 if (typeof content === "string") return content; 341 if (Array.isArray(content)) { 342 let out = ""; 343 for (const part of content) { 344 if (typeof part === "string") { 345 out += part; 346 } else if (part && typeof part === "object") { 347 if (typeof part.text === "string") out += part.text; 348 else if (typeof part.content === "string") out += part.content; 349 } 350 } 351 return out; 352 } 353 return ""; 354} 355 356function extractOpenAIChatCompletionText(data) { 357 if (!data || typeof data !== "object") return ""; 358 const c0 = data.choices?.[0]; 359 if (!c0) return ""; 360 const fromMsg = normalizeMessageContent(c0.message?.content); 361 if (fromMsg) return fromMsg; 362 if (typeof c0.text === "string" && c0.text) return c0.text; 363 if (typeof data.output_text === "string" && data.output_text) return data.output_text; 364 return ""; 365} 366 367function normalizeOpenAIChatResponse(data) { 368 const merged = extractOpenAIChatCompletionText(data); 369 if (!merged || !data?.choices?.[0]) return data; 370 const first = data.choices[0]; 371 return { 372 ...data, 373 choices: [ 374 { 375 ...first, 376 message: { 377 ...(first.message || {}), 378 role: first.message?.role || "assistant", 379 content: merged, 380 }, 381 }, 382 ...data.choices.slice(1), 383 ], 384 }; 385} 386 387/** Extract one streaming text fragment from an SSE JSON payload (delta.content string or parts array). */ 388function extractOpenAIStreamDeltaChunk(parsed) { 389 if (!parsed || typeof parsed !== "object") return ""; 390 const c0 = parsed.choices?.[0]; 391 if (!c0) return ""; 392 393 const delta = c0.delta; 394 if (delta && typeof delta === "object") { 395 const fromDelta = normalizeMessageContent(delta.content); 396 if (fromDelta) return fromDelta; 397 if (typeof delta.reasoning_content === "string" && delta.reasoning_content) { 398 return delta.reasoning_content; 399 } 400 } 401 402 if (typeof c0.text === "string" && c0.text) return c0.text; 403 404 if (c0.message?.content) { 405 const t = normalizeMessageContent(c0.message.content); 406 if (t) return t; 407 } 408 409 return ""; 410} 411 412/** 413 * Resolve API base URL to POST /.../chat/completions once. Avoids broken URLs when the user pastes 414 * a full endpoint, uses Azure (?api-version=…), or Google-style /v1beta/openai (substring "v1" 415 * must not trigger a bogus extra /v1 segment). 416 */ 417function resolveOpenAICompatChatUrl(raw) { 418 const trimmed = String(raw || "").trim(); 419 if (!trimmed) return trimmed; 420 try { 421 const u = new URL(trimmed); 422 let path = (u.pathname || "").replace(/\/+$/, "") || ""; 423 424 if (/\/chat\/completions$/i.test(path)) { 425 return u.toString(); 426 } 427 428 if (/\/openai\/deployments\/[^/]+$/i.test(path)) { 429 u.pathname = `${path}/chat/completions`; 430 return u.toString(); 431 } 432 433 if (/\/v1$/i.test(path)) { 434 u.pathname = `${path}/chat/completions`; 435 return u.toString(); 436 } 437 438 if (/\/v1beta\/openai$/i.test(path)) { 439 u.pathname = `${path}/chat/completions`; 440 return u.toString(); 441 } 442 443 if (!/\bv1\b/i.test(path)) { 444 const base = path || ""; 445 u.pathname = `${base}/v1/chat/completions`.replace(/\/{2,}/g, "/"); 446 return u.toString(); 447 } 448 449 u.pathname = `${path}/chat/completions`.replace(/\/{2,}/g, "/"); 450 return u.toString(); 451 } catch { 452 return trimmed; 453 } 454} 455 456function openAICompatFetchHeaders(apiKey, urlString) { 457 const headers = { "Content-Type": "application/json" }; 458 if (!apiKey) return headers; 459 try { 460 const host = new URL(urlString).hostname; 461 if (/\.openai\.azure\.com$/i.test(host)) { 462 headers["api-key"] = apiKey; 463 } else { 464 headers.Authorization = `Bearer ${apiKey}`; 465 } 466 } catch { 467 headers.Authorization = `Bearer ${apiKey}`; 468 } 469 return headers; 470} 471 472async function handleChatRequest(data) { 473 const { 474 tabId, 475 apiBaseUrl, 476 model, 477 apiKey, 478 messages, 479 apiMode, 480 disableThinking, 481 maxOutputTokens, 482 keepAlive, 483 } = data; 484 485 const tokenCap = 486 typeof maxOutputTokens === "number" && maxOutputTokens > 0 487 ? maxOutputTokens 488 : CONFIG.API.MAX_TOKENS; 489 490 const controller = takeOtherSlot(tabId); 491 const signal = controller.signal; 492 493 try { 494 const useNativeOllama = apiMode === "ollama"; 495 496 if (useNativeOllama) { 497 return await callOllamaNative( 498 apiBaseUrl, 499 model, 500 messages, 501 disableThinking, 502 tokenCap, 503 signal, 504 keepAlive, 505 ); 506 } else { 507 const raw = await callOpenAICompatible( 508 apiBaseUrl, 509 model, 510 apiKey, 511 messages, 512 tokenCap, 513 signal, 514 ); 515 return normalizeOpenAIChatResponse(raw); 516 } 517 } catch (error) { 518 if (error.name === "AbortError" || signal.aborted) { 519 throw new Error("Request cancelled"); 520 } 521 throw error; 522 } finally { 523 releaseOtherSlot(tabId, controller); 524 } 525} 526 527async function handleStreamChatRequest(data, tabId) { 528 const { 529 apiBaseUrl, 530 model, 531 apiKey, 532 messages, 533 apiMode, 534 disableThinking, 535 keepAlive, 536 } = data; 537 538 const controller = takeStreamSlot(tabId); 539 const signal = controller.signal; 540 541 try { 542 const useNativeOllama = apiMode === "ollama"; 543 544 if (useNativeOllama) { 545 await callOllamaNativeStream( 546 apiBaseUrl, 547 model, 548 messages, 549 disableThinking, 550 signal, 551 keepAlive, 552 ); 553 } else { 554 await callOpenAICompatibleStream( 555 apiBaseUrl, 556 model, 557 apiKey, 558 messages, 559 signal, 560 ); 561 } 562 } finally { 563 releaseStreamSlot(tabId, controller); 564 } 565} 566 567async function callOllamaNative( 568 baseUrl, 569 model, 570 messages, 571 disableThinking, 572 maxTokens = CONFIG.API.MAX_TOKENS, 573 signal, 574 keepAlive, 575) { 576 // Merge all system messages into one so none are dropped 577 const systemMsgs = messages.filter((m) => m.role === "system"); 578 const systemContent = systemMsgs.map((m) => m.content).join("\n\n"); 579 const otherMessages = messages.filter((m) => m.role !== "system"); 580 const lastUserMsg = otherMessages.filter((m) => m.role === "user").pop(); 581 582 // Build conversation context 583 let prompt; 584 if (otherMessages.length > 1) { 585 const context = otherMessages 586 .slice(0, -1) 587 .map((m) => `${m.role}: ${m.content}`) 588 .join("\n"); 589 prompt = OLLAMA_CONTEXT_TEMPLATE.replace("${context}", context).replace( 590 "${userMessage}", 591 lastUserMsg?.content || "", 592 ); 593 } else { 594 prompt = lastUserMsg?.content || ""; 595 } 596 597 const url = baseUrl.replace(/\/$/, "") + "/api/generate"; 598 599 // Build request body 600 const requestBody = { 601 model: model, 602 prompt: prompt, 603 system: systemContent, 604 stream: false, 605 options: { 606 temperature: CONFIG.API.TEMPERATURE, 607 num_predict: maxTokens, 608 }, 609 }; 610 611 // Only include think: false when user explicitly disables thinking 612 // (thinking is enabled by default in Ollama, and not all models support it) 613 if (disableThinking === true) { 614 requestBody.think = false; 615 } 616 617 attachOllamaKeepAlive(requestBody, keepAlive); 618 619 const fetchOpts = { 620 method: "POST", 621 headers: { 622 "Content-Type": "application/json", 623 Connection: "close", 624 }, 625 body: JSON.stringify(requestBody), 626 }; 627 if (signal) fetchOpts.signal = signal; 628 629 const response = await fetch(url, fetchOpts); 630 631 if (!response.ok) { 632 const text = await response.text(); 633 let errorMsg = `HTTP ${response.status}`; 634 if (response.status === 403) { 635 errorMsg = 636 "403 Forbidden. Ollama is rejecting the request origin. Fix: restart Ollama with OLLAMA_ORIGINS=* (e.g. OLLAMA_ORIGINS=* ollama serve)."; 637 } else { 638 try { 639 const err = JSON.parse(text); 640 errorMsg = err.error || err.message || errorMsg; 641 } catch (e) { 642 errorMsg = text || errorMsg; 643 } 644 } 645 throw new Error(errorMsg); 646 } 647 648 const data = await response.json(); 649 650 return { 651 choices: [ 652 { 653 message: { 654 role: "assistant", 655 content: data.response, 656 }, 657 }, 658 ], 659 model: model, 660 }; 661} 662 663async function callOpenAICompatible( 664 baseUrl, 665 model, 666 apiKey, 667 messages, 668 maxTokens = CONFIG.API.MAX_TOKENS, 669 signal, 670) { 671 const url = resolveOpenAICompatChatUrl(baseUrl); 672 673 const fetchOpts = { 674 method: "POST", 675 headers: openAICompatFetchHeaders(apiKey, url), 676 body: JSON.stringify({ 677 model: model, 678 messages: messages, 679 stream: false, 680 max_tokens: maxTokens, 681 }), 682 }; 683 if (signal) fetchOpts.signal = signal; 684 685 const response = await fetch(url, fetchOpts); 686 687 if (!response.ok) { 688 const text = await response.text(); 689 let errorMsg = `HTTP ${response.status}`; 690 691 if (response.status === 403) { 692 if (url.includes("/v1")) { 693 errorMsg = 694 "403 Forbidden. This often means: invalid API key, API key lacks permissions, or the server rejected the request origin."; 695 } else { 696 errorMsg = 697 "403 Forbidden. If using Ollama, ensure it's running with: ollama serve"; 698 } 699 } else if (response.status === 405) { 700 errorMsg = 701 "405 Method not allowed. Check if the API URL is correct for your API mode (Native vs OpenAI-compatible)."; 702 } else { 703 try { 704 const err = JSON.parse(text); 705 errorMsg = err.error?.message || err.message || errorMsg; 706 } catch (e) { 707 errorMsg = text || errorMsg; 708 } 709 } 710 711 throw new Error(errorMsg); 712 } 713 714 return await response.json(); 715} 716 717function sendStreamDoneToExtension(payload = {}) { 718 if (payload.error) { 719 resetStreamChunkBatching(); 720 } else { 721 finalizeStreamChunkBatching(); 722 } 723 chrome.runtime.sendMessage({ action: "streamDone", ...payload }).catch(() => {}); 724} 725 726async function callOllamaNativeStream( 727 baseUrl, 728 model, 729 messages, 730 disableThinking, 731 signal, 732 keepAlive, 733) { 734 const systemMsgs = messages.filter((m) => m.role === "system"); 735 const systemContent = systemMsgs.map((m) => m.content).join("\n\n"); 736 const otherMessages = messages.filter((m) => m.role !== "system"); 737 const lastUserMsg = otherMessages.filter((m) => m.role === "user").pop(); 738 739 let prompt; 740 if (otherMessages.length > 1) { 741 const context = otherMessages 742 .slice(0, -1) 743 .map((m) => `${m.role}: ${m.content}`) 744 .join("\n"); 745 prompt = OLLAMA_CONTEXT_TEMPLATE.replace("${context}", context).replace( 746 "${userMessage}", 747 lastUserMsg?.content || "", 748 ); 749 } else { 750 prompt = lastUserMsg?.content || ""; 751 } 752 753 const url = baseUrl.replace(/\/$/, "") + "/api/generate"; 754 755 const requestBody = { 756 model: model, 757 prompt: prompt, 758 system: systemContent, 759 stream: true, 760 options: { 761 temperature: CONFIG.API.TEMPERATURE, 762 num_predict: CONFIG.API.MAX_TOKENS, 763 }, 764 }; 765 766 if (disableThinking === true) { 767 requestBody.think = false; 768 } 769 770 attachOllamaKeepAlive(requestBody, keepAlive); 771 772 resetStreamChunkBatching(); 773 let hitMaxChars = false; 774 775 try { 776 const response = await fetch(url, { 777 method: "POST", 778 headers: { 779 "Content-Type": "application/json", 780 Connection: "close", 781 }, 782 body: JSON.stringify(requestBody), 783 signal, 784 }); 785 786 if (!response.ok) { 787 const text = await response.text(); 788 let errorMsg = `HTTP ${response.status}`; 789 if (response.status === 403) { 790 errorMsg = 791 "403 Forbidden. Ollama is rejecting the request origin. Fix: restart Ollama with OLLAMA_ORIGINS=* (e.g. OLLAMA_ORIGINS=* ollama serve)."; 792 } else { 793 try { 794 const err = JSON.parse(text); 795 errorMsg = err.error || err.message || errorMsg; 796 } catch (e) { 797 errorMsg = text || errorMsg; 798 } 799 } 800 throw new Error(errorMsg); 801 } 802 803 const reader = response.body.getReader(); 804 try { 805 const decoder = new TextDecoder(); 806 let buffer = ""; 807 let streamedChars = 0; 808 const maxChars = CONFIG.API.STREAM_MAX_OUTPUT_CHARS; 809 810 while (true) { 811 let readResult; 812 try { 813 readResult = await reader.read(); 814 } catch (readErr) { 815 if (readErr.name === "AbortError" || signal.aborted) { 816 sendStreamDoneToExtension({ cancelled: true }); 817 return; 818 } 819 throw readErr; 820 } 821 822 const { done, value } = readResult; 823 if (done) break; 824 825 buffer += decoder.decode(value, { stream: true }); 826 const lines = buffer.split("\n"); 827 buffer = lines.pop() || ""; 828 829 for (const line of lines) { 830 if (line.trim()) { 831 try { 832 const json = JSON.parse(line); 833 if (json.response) { 834 const piece = json.response; 835 if (streamedChars + piece.length > maxChars) { 836 hitMaxChars = true; 837 reader.cancel().catch(() => {}); 838 break; 839 } 840 streamedChars += piece.length; 841 queueStreamChunk(piece); 842 } 843 } catch (e) { 844 // Skip invalid JSON lines 845 } 846 } 847 } 848 849 if (hitMaxChars) break; 850 } 851 852 if (hitMaxChars) { 853 sendStreamDoneToExtension({ truncated: true }); 854 } else { 855 sendStreamDoneToExtension(); 856 } 857 } finally { 858 try { 859 await reader.cancel(); 860 } catch (e) { 861 /* stream may already be closed */ 862 } 863 } 864 } catch (error) { 865 if (error.name === "AbortError" || signal.aborted) { 866 sendStreamDoneToExtension({ cancelled: true }); 867 return; 868 } 869 sendStreamDoneToExtension({ error: error.message }); 870 } 871} 872 873async function callOpenAICompatibleStream( 874 baseUrl, 875 model, 876 apiKey, 877 messages, 878 signal, 879) { 880 const url = resolveOpenAICompatChatUrl(baseUrl); 881 882 resetStreamChunkBatching(); 883 let hitMaxChars = false; 884 885 try { 886 const response = await fetch(url, { 887 method: "POST", 888 headers: openAICompatFetchHeaders(apiKey, url), 889 body: JSON.stringify({ 890 model: model, 891 messages: messages, 892 stream: true, 893 max_tokens: CONFIG.API.MAX_TOKENS, 894 }), 895 signal, 896 }); 897 898 if (!response.ok) { 899 const text = await response.text(); 900 let errorMsg = `HTTP ${response.status}`; 901 902 if (response.status === 403) { 903 if (url.includes("/v1")) { 904 errorMsg = 905 "403 Forbidden. This often means: invalid API key, API key lacks permissions, or the server rejected the request origin."; 906 } else { 907 errorMsg = 908 "403 Forbidden. If using Ollama, ensure it's running with: ollama serve"; 909 } 910 } else if (response.status === 405) { 911 errorMsg = 912 "405 Method not allowed. Check if the API URL is correct for your API mode (Native vs OpenAI-compatible)."; 913 } else { 914 try { 915 const err = JSON.parse(text); 916 errorMsg = err.error?.message || err.message || errorMsg; 917 } catch (e) { 918 errorMsg = text || errorMsg; 919 } 920 } 921 throw new Error(errorMsg); 922 } 923 924 const reader = response.body.getReader(); 925 try { 926 const decoder = new TextDecoder(); 927 let buffer = ""; 928 let streamedChars = 0; 929 const maxChars = CONFIG.API.STREAM_MAX_OUTPUT_CHARS; 930 931 while (true) { 932 let readResult; 933 try { 934 readResult = await reader.read(); 935 } catch (readErr) { 936 if (readErr.name === "AbortError" || signal.aborted) { 937 sendStreamDoneToExtension({ cancelled: true }); 938 return; 939 } 940 throw readErr; 941 } 942 943 const { done, value } = readResult; 944 if (done) break; 945 946 buffer += decoder.decode(value, { stream: true }); 947 const lines = buffer.split("\n"); 948 buffer = lines.pop() || ""; 949 950 for (const line of lines) { 951 const trimmed = line.trim(); 952 if (!trimmed) continue; 953 let payload = null; 954 if (trimmed.startsWith("data:")) { 955 payload = trimmed.slice(5).replace(/^\uFEFF/, "").trimStart(); 956 if (payload === "[DONE]") continue; 957 } else if (trimmed.startsWith("{") && /"choices"\s*:/i.test(trimmed)) { 958 // Some OpenAI-compatible proxies stream NDJSON without a `data:` prefix. 959 payload = trimmed; 960 } else { 961 continue; 962 } 963 try { 964 const json = JSON.parse(payload); 965 const content = extractOpenAIStreamDeltaChunk(json); 966 if (content) { 967 if (streamedChars + content.length > maxChars) { 968 hitMaxChars = true; 969 reader.cancel().catch(() => {}); 970 break; 971 } 972 streamedChars += content.length; 973 queueStreamChunk(content); 974 } 975 } catch (e) { 976 // Skip invalid JSON lines 977 } 978 } 979 980 if (hitMaxChars) break; 981 } 982 983 if (hitMaxChars) { 984 sendStreamDoneToExtension({ truncated: true }); 985 } else { 986 sendStreamDoneToExtension(); 987 } 988 } finally { 989 try { 990 await reader.cancel(); 991 } catch (e) { 992 /* stream may already be closed */ 993 } 994 } 995 } catch (error) { 996 if (error.name === "AbortError" || signal.aborted) { 997 sendStreamDoneToExtension({ cancelled: true }); 998 return; 999 } 1000 sendStreamDoneToExtension({ error: error.message }); 1001 } 1002}