scripts/background.js at main · ellioth.co/summarizer-extension

ellioth.co / summarizer-extension
fork atom
A browser extension that lets you summarize any webpage and ask questions using AI.
fork atom
summarizer-extension / scripts / background.js
at main 1002 lines 27 kB view raw
wrap content
ellioth.co Rebrand extension to Lede (page summarizer & chat) 2d ago
94a36526
   1// Background script - handles API communication
   2// Uses centralized CONFIG from config.js
   3
   4// Chrome MV3: service worker — load config via importScripts.
   5// Firefox MV3: event-page scripts — config.js is listed first in manifest "scripts".
   6if (typeof importScripts === "function") {
   7  importScripts("config.js");
   8}
   9
  10// Cache key prefixes from CONFIG
  11const QUICK_SUMMARY_CACHE_PREFIX = CONFIG.CACHE.QUICK_SUMMARY;
  12const DETAILED_SUMMARY_CACHE_PREFIX = CONFIG.CACHE.DETAILED_SUMMARY;
  13const CONTENT_CACHE_PREFIX = CONFIG.CACHE.CONTENT;
  14const CHAT_CACHE_PREFIX = CONFIG.CACHE.CHAT;
  15
  16/**
  17 * Per-tab inflight Ollama/API state: one slot for streaming, one for non-stream (chat/suggestions).
  18 * - New stream aborts everything (user superseded or regenerate).
  19 * - New non-stream only aborts prior non-stream, never an active stream (avoids races with post-stream work).
  20 */
  21const inflightByTab = new Map();
  22
  23function abortAllInflightForTab(tabId) {
  24  if (tabId == null) return;
  25  const e = inflightByTab.get(tabId);
  26  if (!e) return;
  27  if (e.stream) e.stream.abort();
  28  if (e.other) e.other.abort();
  29  inflightByTab.delete(tabId);
  30}
  31
  32function takeStreamSlot(tabId) {
  33  if (tabId == null) return new AbortController();
  34  abortAllInflightForTab(tabId);
  35  const c = new AbortController();
  36  inflightByTab.set(tabId, { stream: c });
  37  return c;
  38}
  39
  40function releaseStreamSlot(tabId, controller) {
  41  if (tabId == null) return;
  42  const e = inflightByTab.get(tabId);
  43  if (e && e.stream === controller) {
  44    if (e.other) {
  45      inflightByTab.set(tabId, { other: e.other });
  46    } else {
  47      inflightByTab.delete(tabId);
  48    }
  49  }
  50}
  51
  52function takeOtherSlot(tabId) {
  53  if (tabId == null) return new AbortController();
  54  const e = inflightByTab.get(tabId) || {};
  55  if (e.other) e.other.abort();
  56  const c = new AbortController();
  57  inflightByTab.set(tabId, { ...e, other: c });
  58  return c;
  59}
  60
  61function releaseOtherSlot(tabId, controller) {
  62  if (tabId == null) return;
  63  const e = inflightByTab.get(tabId);
  64  if (e && e.other === controller) {
  65    if (e.stream) {
  66      inflightByTab.set(tabId, { stream: e.stream });
  67    } else {
  68      inflightByTab.delete(tabId);
  69    }
  70  }
  71}
  72
  73/** Coalesce tiny stream tokens into fewer runtime messages (easier on SW + popup; less backpressure). */
  74const CHUNK_BATCH_MAX_CHARS = 512;
  75const CHUNK_BATCH_MS = 20;
  76
  77let streamChunkBuffer = "";
  78let streamChunkTimer = null;
  79
  80function resetStreamChunkBatching() {
  81  if (streamChunkTimer) {
  82    clearTimeout(streamChunkTimer);
  83    streamChunkTimer = null;
  84  }
  85  streamChunkBuffer = "";
  86}
  87
  88function flushStreamChunks() {
  89  streamChunkTimer = null;
  90  if (!streamChunkBuffer) return;
  91  const chunk = streamChunkBuffer;
  92  streamChunkBuffer = "";
  93  chrome.runtime
  94    .sendMessage({ action: "streamChunk", chunk, done: false })
  95    .catch(() => {});
  96}
  97
  98function queueStreamChunk(piece) {
  99  streamChunkBuffer += piece;
 100  if (streamChunkBuffer.length >= CHUNK_BATCH_MAX_CHARS) {
 101    if (streamChunkTimer) {
 102      clearTimeout(streamChunkTimer);
 103      streamChunkTimer = null;
 104    }
 105    flushStreamChunks();
 106  } else if (!streamChunkTimer) {
 107    streamChunkTimer = setTimeout(flushStreamChunks, CHUNK_BATCH_MS);
 108  }
 109}
 110
 111function finalizeStreamChunkBatching() {
 112  if (streamChunkTimer) {
 113    clearTimeout(streamChunkTimer);
 114    streamChunkTimer = null;
 115  }
 116  flushStreamChunks();
 117}
 118
 119/** Ollama /api/generate keep_alive — null means omit so the server uses its default. */
 120function normalizeOllamaKeepAlive(keepAlive) {
 121  if (keepAlive == null) return null;
 122  const s = String(keepAlive).trim();
 123  return s || null;
 124}
 125
 126function attachOllamaKeepAlive(requestBody, keepAlive) {
 127  const v = normalizeOllamaKeepAlive(keepAlive);
 128  if (v) requestBody.keep_alive = v;
 129}
 130
 131// ── Prompt templates from CONFIG ─────────────────────────────────────────
 132const OLLAMA_CONTEXT_TEMPLATE = CONFIG.OLLAMA.CONTEXT_TEMPLATE;
 133
 134chrome.runtime.onInstalled.addListener(() => {
 135  // Set default settings only if they don't already exist
 136  chrome.storage.sync.get(["apiMode"]).then((result) => {
 137    if (!result.apiMode) {
 138      // Settings don't exist yet, set defaults from CONFIG
 139      chrome.storage.sync.set({
 140        apiMode: CONFIG.API.MODE,
 141        apiBaseUrl: CONFIG.API.BASE_URL,
 142        model: CONFIG.API.MODEL,
 143        apiKey: CONFIG.API.KEY,
 144        disableThinking: CONFIG.API.DISABLE_THINKING,
 145        autoSummarize: CONFIG.API.AUTO_SUMMARIZE,
 146        keepAlive: CONFIG.API.KEEP_ALIVE,
 147      });
 148    }
 149  });
 150
 151  // Create context menu item
 152  chrome.contextMenus.create({
 153    id: "summarize-page",
 154    title: "Open Lede for this page",
 155    contexts: ["page", "selection"],
 156  });
 157});
 158
 159// Clear cache when a tab is closed
 160chrome.tabs.onRemoved.addListener((tabId) => {
 161  clearTabCache(tabId);
 162});
 163
 164// Clear cache when a tab navigates to a new URL
 165chrome.tabs.onUpdated.addListener((tabId, changeInfo, tab) => {
 166  if (changeInfo.url) {
 167    // URL changed, clear the cache for this tab
 168    clearTabCache(tabId);
 169  }
 170});
 171
 172async function clearTabCache(tabId) {
 173  try {
 174    await chrome.storage.session.remove([
 175      QUICK_SUMMARY_CACHE_PREFIX + tabId,
 176      DETAILED_SUMMARY_CACHE_PREFIX + tabId,
 177      CONTENT_CACHE_PREFIX + tabId,
 178      CHAT_CACHE_PREFIX + tabId,
 179      CONFIG.CACHE.SUGGESTIONS + tabId,
 180    ]);
 181  } catch (e) {
 182    console.error("[Lede] Error clearing cache:", e);
 183  }
 184}
 185
 186// Handle context menu clicks
 187chrome.contextMenus.onClicked.addListener((info, tab) => {
 188  if (info.menuItemId === "summarize-page") {
 189    triggerSummarizeForTab(tab.id);
 190  }
 191});
 192
 193// Handle keyboard shortcut (manifest command + optional legacy alias)
 194chrome.commands.onCommand.addListener((command) => {
 195  if (command === "summarize-page" || command === "open-summarizer") {
 196    chrome.tabs.query({ active: true, currentWindow: true }).then((tabs) => {
 197      if (tabs[0]) {
 198        triggerSummarizeForTab(tabs[0].id);
 199      }
 200    });
 201  }
 202});
 203
 204function triggerSummarizeForTab(tabId) {
 205  // Store a flag to trigger summarize when popup opens
 206  chrome.storage.session.set({ triggerSummarize: true, targetTabId: tabId });
 207
 208  // Firefox: Create a popup window
 209  // Chrome: Use action.openPopup() for toolbar popup
 210  if (typeof browser !== "undefined") {
 211    // Firefox: Create a popup window matching the UI size (extra height for browser chrome)
 212    chrome.windows.create({
 213      url: chrome.runtime.getURL("popup/popup.html"),
 214      type: "popup",
 215      width: 400,
 216      height: 600,
 217      focused: true,
 218    });
 219  } else {
 220    // Chrome: Programmatically open the popup
 221    chrome.action.openPopup();
 222  }
 223}
 224
 225chrome.runtime.onMessage.addListener((request, sender, sendResponse) => {
 226  if (request.action === "ping") {
 227    sendResponse({ success: true, message: "pong" });
 228    return true;
 229  }
 230
 231  if (request.action === "extractPageContent") {
 232    extractPageContentForTab(request.tabId)
 233      .then((result) => sendResponse({ success: true, ...result }))
 234      .catch((error) => {
 235        sendResponse({
 236          success: false,
 237          error: error.message || "Failed to extract page content",
 238        });
 239      });
 240    return true;
 241  }
 242
 243  if (request.action === "chat") {
 244    handleChatRequest(request.data)
 245      .then((response) => {
 246        sendResponse({ success: true, data: response });
 247      })
 248      .catch((error) => {
 249        console.error("Background script error:", error);
 250        sendResponse({
 251          success: false,
 252          error: error.message || "Unknown error occurred",
 253        });
 254      });
 255    return true; // Keep channel open for async
 256  }
 257
 258  if (request.action === "streamChat") {
 259    const { tabId } = request;
 260    handleStreamChatRequest(request.data, tabId).catch((error) => {
 261      console.error("Stream chat error:", error);
 262      sendStreamDoneToExtension({ error: error.message });
 263    });
 264    return false; // Popup receives streamChunk/streamDone via runtime messages
 265  }
 266
 267  if (request.action === "cancelStream") {
 268    const tabId = request.tabId;
 269    if (tabId != null) {
 270      abortAllInflightForTab(tabId);
 271    }
 272    return false;
 273  }
 274
 275  if (request.action === "testOllama") {
 276    testOllamaConnection()
 277      .then(() => sendResponse({ success: true }))
 278      .catch((err) => sendResponse({ success: false, error: err.message }));
 279    return true;
 280  }
 281});
 282
 283function isRestrictedUrl(url) {
 284  return (
 285    !url ||
 286    url.startsWith("chrome://") ||
 287    url.startsWith("chrome-extension://") ||
 288    url.startsWith("edge://") ||
 289    url.startsWith("about:") ||
 290    url.startsWith("moz-extension://") ||
 291    url.startsWith("resource://")
 292  );
 293}
 294
 295async function extractPageContentForTab(tabId) {
 296  if (!tabId) {
 297    return { content: "", wasTruncated: false };
 298  }
 299
 300  const tab = await chrome.tabs.get(tabId);
 301  if (isRestrictedUrl(tab.url)) {
 302    return { content: "", wasTruncated: false };
 303  }
 304
 305  // Readability must load before content.js (content.js is not bundled with it).
 306  // reddit-new.js registers window.__webaiTryRedditNew for new-Reddit thread extraction.
 307  await chrome.scripting.executeScript({
 308    target: { tabId },
 309    files: ["scripts/Readability.js", "scripts/reddit-new.js", "scripts/content.js"],
 310  });
 311
 312  const response = await chrome.tabs.sendMessage(tabId, { action: "extract" });
 313  if (!response) {
 314    return { content: "", wasTruncated: false };
 315  }
 316
 317  return {
 318    content: response.content ?? "",
 319    wasTruncated: Boolean(response.wasTruncated),
 320    extractionSource: response.extractionSource ?? "unknown",
 321    unsupportedReason: response.unsupportedReason ?? null,
 322  };
 323}
 324
 325async function testOllamaConnection() {
 326  const response = await fetch("http://localhost:11434/api/tags");
 327  if (!response.ok) {
 328    throw new Error(`HTTP ${response.status}`);
 329  }
 330  const data = await response.json();
 331  return data;
 332}
 333
 334/**
 335 * OpenAI-compatible APIs differ: `message.content` may be a string, a parts array (Responses / some
 336 * gateways), or text may appear on `choices[0].text` or `output_text`.
 337 */
 338function normalizeMessageContent(content) {
 339  if (content == null) return "";
 340  if (typeof content === "string") return content;
 341  if (Array.isArray(content)) {
 342    let out = "";
 343    for (const part of content) {
 344      if (typeof part === "string") {
 345        out += part;
 346      } else if (part && typeof part === "object") {
 347        if (typeof part.text === "string") out += part.text;
 348        else if (typeof part.content === "string") out += part.content;
 349      }
 350    }
 351    return out;
 352  }
 353  return "";
 354}
 355
 356function extractOpenAIChatCompletionText(data) {
 357  if (!data || typeof data !== "object") return "";
 358  const c0 = data.choices?.[0];
 359  if (!c0) return "";
 360  const fromMsg = normalizeMessageContent(c0.message?.content);
 361  if (fromMsg) return fromMsg;
 362  if (typeof c0.text === "string" && c0.text) return c0.text;
 363  if (typeof data.output_text === "string" && data.output_text) return data.output_text;
 364  return "";
 365}
 366
 367function normalizeOpenAIChatResponse(data) {
 368  const merged = extractOpenAIChatCompletionText(data);
 369  if (!merged || !data?.choices?.[0]) return data;
 370  const first = data.choices[0];
 371  return {
 372    ...data,
 373    choices: [
 374      {
 375        ...first,
 376        message: {
 377          ...(first.message || {}),
 378          role: first.message?.role || "assistant",
 379          content: merged,
 380        },
 381      },
 382      ...data.choices.slice(1),
 383    ],
 384  };
 385}
 386
 387/** Extract one streaming text fragment from an SSE JSON payload (delta.content string or parts array). */
 388function extractOpenAIStreamDeltaChunk(parsed) {
 389  if (!parsed || typeof parsed !== "object") return "";
 390  const c0 = parsed.choices?.[0];
 391  if (!c0) return "";
 392
 393  const delta = c0.delta;
 394  if (delta && typeof delta === "object") {
 395    const fromDelta = normalizeMessageContent(delta.content);
 396    if (fromDelta) return fromDelta;
 397    if (typeof delta.reasoning_content === "string" && delta.reasoning_content) {
 398      return delta.reasoning_content;
 399    }
 400  }
 401
 402  if (typeof c0.text === "string" && c0.text) return c0.text;
 403
 404  if (c0.message?.content) {
 405    const t = normalizeMessageContent(c0.message.content);
 406    if (t) return t;
 407  }
 408
 409  return "";
 410}
 411
 412/**
 413 * Resolve API base URL to POST /.../chat/completions once. Avoids broken URLs when the user pastes
 414 * a full endpoint, uses Azure (?api-version=…), or Google-style /v1beta/openai (substring "v1"
 415 * must not trigger a bogus extra /v1 segment).
 416 */
 417function resolveOpenAICompatChatUrl(raw) {
 418  const trimmed = String(raw || "").trim();
 419  if (!trimmed) return trimmed;
 420  try {
 421    const u = new URL(trimmed);
 422    let path = (u.pathname || "").replace(/\/+$/, "") || "";
 423
 424    if (/\/chat\/completions$/i.test(path)) {
 425      return u.toString();
 426    }
 427
 428    if (/\/openai\/deployments\/[^/]+$/i.test(path)) {
 429      u.pathname = `${path}/chat/completions`;
 430      return u.toString();
 431    }
 432
 433    if (/\/v1$/i.test(path)) {
 434      u.pathname = `${path}/chat/completions`;
 435      return u.toString();
 436    }
 437
 438    if (/\/v1beta\/openai$/i.test(path)) {
 439      u.pathname = `${path}/chat/completions`;
 440      return u.toString();
 441    }
 442
 443    if (!/\bv1\b/i.test(path)) {
 444      const base = path || "";
 445      u.pathname = `${base}/v1/chat/completions`.replace(/\/{2,}/g, "/");
 446      return u.toString();
 447    }
 448
 449    u.pathname = `${path}/chat/completions`.replace(/\/{2,}/g, "/");
 450    return u.toString();
 451  } catch {
 452    return trimmed;
 453  }
 454}
 455
 456function openAICompatFetchHeaders(apiKey, urlString) {
 457  const headers = { "Content-Type": "application/json" };
 458  if (!apiKey) return headers;
 459  try {
 460    const host = new URL(urlString).hostname;
 461    if (/\.openai\.azure\.com$/i.test(host)) {
 462      headers["api-key"] = apiKey;
 463    } else {
 464      headers.Authorization = `Bearer ${apiKey}`;
 465    }
 466  } catch {
 467    headers.Authorization = `Bearer ${apiKey}`;
 468  }
 469  return headers;
 470}
 471
 472async function handleChatRequest(data) {
 473  const {
 474    tabId,
 475    apiBaseUrl,
 476    model,
 477    apiKey,
 478    messages,
 479    apiMode,
 480    disableThinking,
 481    maxOutputTokens,
 482    keepAlive,
 483  } = data;
 484
 485  const tokenCap =
 486    typeof maxOutputTokens === "number" && maxOutputTokens > 0
 487      ? maxOutputTokens
 488      : CONFIG.API.MAX_TOKENS;
 489
 490  const controller = takeOtherSlot(tabId);
 491  const signal = controller.signal;
 492
 493  try {
 494    const useNativeOllama = apiMode === "ollama";
 495
 496    if (useNativeOllama) {
 497      return await callOllamaNative(
 498        apiBaseUrl,
 499        model,
 500        messages,
 501        disableThinking,
 502        tokenCap,
 503        signal,
 504        keepAlive,
 505      );
 506    } else {
 507      const raw = await callOpenAICompatible(
 508        apiBaseUrl,
 509        model,
 510        apiKey,
 511        messages,
 512        tokenCap,
 513        signal,
 514      );
 515      return normalizeOpenAIChatResponse(raw);
 516    }
 517  } catch (error) {
 518    if (error.name === "AbortError" || signal.aborted) {
 519      throw new Error("Request cancelled");
 520    }
 521    throw error;
 522  } finally {
 523    releaseOtherSlot(tabId, controller);
 524  }
 525}
 526
 527async function handleStreamChatRequest(data, tabId) {
 528  const {
 529    apiBaseUrl,
 530    model,
 531    apiKey,
 532    messages,
 533    apiMode,
 534    disableThinking,
 535    keepAlive,
 536  } = data;
 537
 538  const controller = takeStreamSlot(tabId);
 539  const signal = controller.signal;
 540
 541  try {
 542    const useNativeOllama = apiMode === "ollama";
 543
 544    if (useNativeOllama) {
 545      await callOllamaNativeStream(
 546        apiBaseUrl,
 547        model,
 548        messages,
 549        disableThinking,
 550        signal,
 551        keepAlive,
 552      );
 553    } else {
 554      await callOpenAICompatibleStream(
 555        apiBaseUrl,
 556        model,
 557        apiKey,
 558        messages,
 559        signal,
 560      );
 561    }
 562  } finally {
 563    releaseStreamSlot(tabId, controller);
 564  }
 565}
 566
 567async function callOllamaNative(
 568  baseUrl,
 569  model,
 570  messages,
 571  disableThinking,
 572  maxTokens = CONFIG.API.MAX_TOKENS,
 573  signal,
 574  keepAlive,
 575) {
 576  // Merge all system messages into one so none are dropped
 577  const systemMsgs = messages.filter((m) => m.role === "system");
 578  const systemContent = systemMsgs.map((m) => m.content).join("\n\n");
 579  const otherMessages = messages.filter((m) => m.role !== "system");
 580  const lastUserMsg = otherMessages.filter((m) => m.role === "user").pop();
 581
 582  // Build conversation context
 583  let prompt;
 584  if (otherMessages.length > 1) {
 585    const context = otherMessages
 586      .slice(0, -1)
 587      .map((m) => `${m.role}: ${m.content}`)
 588      .join("\n");
 589    prompt = OLLAMA_CONTEXT_TEMPLATE.replace("${context}", context).replace(
 590      "${userMessage}",
 591      lastUserMsg?.content || "",
 592    );
 593  } else {
 594    prompt = lastUserMsg?.content || "";
 595  }
 596
 597  const url = baseUrl.replace(/\/$/, "") + "/api/generate";
 598
 599  // Build request body
 600  const requestBody = {
 601    model: model,
 602    prompt: prompt,
 603    system: systemContent,
 604    stream: false,
 605    options: {
 606      temperature: CONFIG.API.TEMPERATURE,
 607      num_predict: maxTokens,
 608    },
 609  };
 610
 611  // Only include think: false when user explicitly disables thinking
 612  // (thinking is enabled by default in Ollama, and not all models support it)
 613  if (disableThinking === true) {
 614    requestBody.think = false;
 615  }
 616
 617  attachOllamaKeepAlive(requestBody, keepAlive);
 618
 619  const fetchOpts = {
 620    method: "POST",
 621    headers: {
 622      "Content-Type": "application/json",
 623      Connection: "close",
 624    },
 625    body: JSON.stringify(requestBody),
 626  };
 627  if (signal) fetchOpts.signal = signal;
 628
 629  const response = await fetch(url, fetchOpts);
 630
 631  if (!response.ok) {
 632    const text = await response.text();
 633    let errorMsg = `HTTP ${response.status}`;
 634    if (response.status === 403) {
 635      errorMsg =
 636        "403 Forbidden. Ollama is rejecting the request origin. Fix: restart Ollama with OLLAMA_ORIGINS=* (e.g. OLLAMA_ORIGINS=* ollama serve).";
 637    } else {
 638      try {
 639        const err = JSON.parse(text);
 640        errorMsg = err.error || err.message || errorMsg;
 641      } catch (e) {
 642        errorMsg = text || errorMsg;
 643      }
 644    }
 645    throw new Error(errorMsg);
 646  }
 647
 648  const data = await response.json();
 649
 650  return {
 651    choices: [
 652      {
 653        message: {
 654          role: "assistant",
 655          content: data.response,
 656        },
 657      },
 658    ],
 659    model: model,
 660  };
 661}
 662
 663async function callOpenAICompatible(
 664  baseUrl,
 665  model,
 666  apiKey,
 667  messages,
 668  maxTokens = CONFIG.API.MAX_TOKENS,
 669  signal,
 670) {
 671  const url = resolveOpenAICompatChatUrl(baseUrl);
 672
 673  const fetchOpts = {
 674    method: "POST",
 675    headers: openAICompatFetchHeaders(apiKey, url),
 676    body: JSON.stringify({
 677      model: model,
 678      messages: messages,
 679      stream: false,
 680      max_tokens: maxTokens,
 681    }),
 682  };
 683  if (signal) fetchOpts.signal = signal;
 684
 685  const response = await fetch(url, fetchOpts);
 686
 687  if (!response.ok) {
 688    const text = await response.text();
 689    let errorMsg = `HTTP ${response.status}`;
 690
 691    if (response.status === 403) {
 692      if (url.includes("/v1")) {
 693        errorMsg =
 694          "403 Forbidden. This often means: invalid API key, API key lacks permissions, or the server rejected the request origin.";
 695      } else {
 696        errorMsg =
 697          "403 Forbidden. If using Ollama, ensure it's running with: ollama serve";
 698      }
 699    } else if (response.status === 405) {
 700      errorMsg =
 701        "405 Method not allowed. Check if the API URL is correct for your API mode (Native vs OpenAI-compatible).";
 702    } else {
 703      try {
 704        const err = JSON.parse(text);
 705        errorMsg = err.error?.message || err.message || errorMsg;
 706      } catch (e) {
 707        errorMsg = text || errorMsg;
 708      }
 709    }
 710
 711    throw new Error(errorMsg);
 712  }
 713
 714  return await response.json();
 715}
 716
 717function sendStreamDoneToExtension(payload = {}) {
 718  if (payload.error) {
 719    resetStreamChunkBatching();
 720  } else {
 721    finalizeStreamChunkBatching();
 722  }
 723  chrome.runtime.sendMessage({ action: "streamDone", ...payload }).catch(() => {});
 724}
 725
 726async function callOllamaNativeStream(
 727  baseUrl,
 728  model,
 729  messages,
 730  disableThinking,
 731  signal,
 732  keepAlive,
 733) {
 734  const systemMsgs = messages.filter((m) => m.role === "system");
 735  const systemContent = systemMsgs.map((m) => m.content).join("\n\n");
 736  const otherMessages = messages.filter((m) => m.role !== "system");
 737  const lastUserMsg = otherMessages.filter((m) => m.role === "user").pop();
 738
 739  let prompt;
 740  if (otherMessages.length > 1) {
 741    const context = otherMessages
 742      .slice(0, -1)
 743      .map((m) => `${m.role}: ${m.content}`)
 744      .join("\n");
 745    prompt = OLLAMA_CONTEXT_TEMPLATE.replace("${context}", context).replace(
 746      "${userMessage}",
 747      lastUserMsg?.content || "",
 748    );
 749  } else {
 750    prompt = lastUserMsg?.content || "";
 751  }
 752
 753  const url = baseUrl.replace(/\/$/, "") + "/api/generate";
 754
 755  const requestBody = {
 756    model: model,
 757    prompt: prompt,
 758    system: systemContent,
 759    stream: true,
 760    options: {
 761      temperature: CONFIG.API.TEMPERATURE,
 762      num_predict: CONFIG.API.MAX_TOKENS,
 763    },
 764  };
 765
 766  if (disableThinking === true) {
 767    requestBody.think = false;
 768  }
 769
 770  attachOllamaKeepAlive(requestBody, keepAlive);
 771
 772  resetStreamChunkBatching();
 773  let hitMaxChars = false;
 774
 775  try {
 776    const response = await fetch(url, {
 777      method: "POST",
 778      headers: {
 779        "Content-Type": "application/json",
 780        Connection: "close",
 781      },
 782      body: JSON.stringify(requestBody),
 783      signal,
 784    });
 785
 786    if (!response.ok) {
 787      const text = await response.text();
 788      let errorMsg = `HTTP ${response.status}`;
 789      if (response.status === 403) {
 790        errorMsg =
 791          "403 Forbidden. Ollama is rejecting the request origin. Fix: restart Ollama with OLLAMA_ORIGINS=* (e.g. OLLAMA_ORIGINS=* ollama serve).";
 792      } else {
 793        try {
 794          const err = JSON.parse(text);
 795          errorMsg = err.error || err.message || errorMsg;
 796        } catch (e) {
 797          errorMsg = text || errorMsg;
 798        }
 799      }
 800      throw new Error(errorMsg);
 801    }
 802
 803    const reader = response.body.getReader();
 804    try {
 805      const decoder = new TextDecoder();
 806      let buffer = "";
 807      let streamedChars = 0;
 808      const maxChars = CONFIG.API.STREAM_MAX_OUTPUT_CHARS;
 809
 810      while (true) {
 811        let readResult;
 812        try {
 813          readResult = await reader.read();
 814        } catch (readErr) {
 815          if (readErr.name === "AbortError" || signal.aborted) {
 816            sendStreamDoneToExtension({ cancelled: true });
 817            return;
 818          }
 819          throw readErr;
 820        }
 821
 822        const { done, value } = readResult;
 823        if (done) break;
 824
 825        buffer += decoder.decode(value, { stream: true });
 826        const lines = buffer.split("\n");
 827        buffer = lines.pop() || "";
 828
 829        for (const line of lines) {
 830          if (line.trim()) {
 831            try {
 832              const json = JSON.parse(line);
 833              if (json.response) {
 834                const piece = json.response;
 835                if (streamedChars + piece.length > maxChars) {
 836                  hitMaxChars = true;
 837                  reader.cancel().catch(() => {});
 838                  break;
 839                }
 840                streamedChars += piece.length;
 841                queueStreamChunk(piece);
 842              }
 843            } catch (e) {
 844              // Skip invalid JSON lines
 845            }
 846          }
 847        }
 848
 849        if (hitMaxChars) break;
 850      }
 851
 852      if (hitMaxChars) {
 853        sendStreamDoneToExtension({ truncated: true });
 854      } else {
 855        sendStreamDoneToExtension();
 856      }
 857    } finally {
 858      try {
 859        await reader.cancel();
 860      } catch (e) {
 861        /* stream may already be closed */
 862      }
 863    }
 864  } catch (error) {
 865    if (error.name === "AbortError" || signal.aborted) {
 866      sendStreamDoneToExtension({ cancelled: true });
 867      return;
 868    }
 869    sendStreamDoneToExtension({ error: error.message });
 870  }
 871}
 872
 873async function callOpenAICompatibleStream(
 874  baseUrl,
 875  model,
 876  apiKey,
 877  messages,
 878  signal,
 879) {
 880  const url = resolveOpenAICompatChatUrl(baseUrl);
 881
 882  resetStreamChunkBatching();
 883  let hitMaxChars = false;
 884
 885  try {
 886    const response = await fetch(url, {
 887      method: "POST",
 888      headers: openAICompatFetchHeaders(apiKey, url),
 889      body: JSON.stringify({
 890        model: model,
 891        messages: messages,
 892        stream: true,
 893        max_tokens: CONFIG.API.MAX_TOKENS,
 894      }),
 895      signal,
 896    });
 897
 898    if (!response.ok) {
 899      const text = await response.text();
 900      let errorMsg = `HTTP ${response.status}`;
 901
 902      if (response.status === 403) {
 903        if (url.includes("/v1")) {
 904          errorMsg =
 905            "403 Forbidden. This often means: invalid API key, API key lacks permissions, or the server rejected the request origin.";
 906        } else {
 907          errorMsg =
 908            "403 Forbidden. If using Ollama, ensure it's running with: ollama serve";
 909        }
 910      } else if (response.status === 405) {
 911        errorMsg =
 912          "405 Method not allowed. Check if the API URL is correct for your API mode (Native vs OpenAI-compatible).";
 913      } else {
 914        try {
 915          const err = JSON.parse(text);
 916          errorMsg = err.error?.message || err.message || errorMsg;
 917        } catch (e) {
 918          errorMsg = text || errorMsg;
 919        }
 920      }
 921      throw new Error(errorMsg);
 922    }
 923
 924    const reader = response.body.getReader();
 925    try {
 926      const decoder = new TextDecoder();
 927      let buffer = "";
 928      let streamedChars = 0;
 929      const maxChars = CONFIG.API.STREAM_MAX_OUTPUT_CHARS;
 930
 931      while (true) {
 932        let readResult;
 933        try {
 934          readResult = await reader.read();
 935        } catch (readErr) {
 936          if (readErr.name === "AbortError" || signal.aborted) {
 937            sendStreamDoneToExtension({ cancelled: true });
 938            return;
 939          }
 940          throw readErr;
 941        }
 942
 943        const { done, value } = readResult;
 944        if (done) break;
 945
 946        buffer += decoder.decode(value, { stream: true });
 947        const lines = buffer.split("\n");
 948        buffer = lines.pop() || "";
 949
 950        for (const line of lines) {
 951          const trimmed = line.trim();
 952          if (!trimmed) continue;
 953          let payload = null;
 954          if (trimmed.startsWith("data:")) {
 955            payload = trimmed.slice(5).replace(/^\uFEFF/, "").trimStart();
 956            if (payload === "[DONE]") continue;
 957          } else if (trimmed.startsWith("{") && /"choices"\s*:/i.test(trimmed)) {
 958            // Some OpenAI-compatible proxies stream NDJSON without a `data:` prefix.
 959            payload = trimmed;
 960          } else {
 961            continue;
 962          }
 963          try {
 964            const json = JSON.parse(payload);
 965            const content = extractOpenAIStreamDeltaChunk(json);
 966            if (content) {
 967              if (streamedChars + content.length > maxChars) {
 968                hitMaxChars = true;
 969                reader.cancel().catch(() => {});
 970                break;
 971              }
 972              streamedChars += content.length;
 973              queueStreamChunk(content);
 974            }
 975          } catch (e) {
 976            // Skip invalid JSON lines
 977          }
 978        }
 979
 980        if (hitMaxChars) break;
 981      }
 982
 983      if (hitMaxChars) {
 984        sendStreamDoneToExtension({ truncated: true });
 985      } else {
 986        sendStreamDoneToExtension();
 987      }
 988    } finally {
 989      try {
 990        await reader.cancel();
 991      } catch (e) {
 992        /* stream may already be closed */
 993      }
 994    }
 995  } catch (error) {
 996    if (error.name === "AbortError" || signal.aborted) {
 997      sendStreamDoneToExtension({ cancelled: true });
 998      return;
 999    }
1000    sendStreamDoneToExtension({ error: error.message });
1001  }
1002}