// Background script - handles API communication
// Uses centralized CONFIG from config.js

// Chrome MV3: service worker — load config via importScripts.
// Firefox MV3: event-page scripts — config.js is listed first in manifest "scripts".
if (typeof importScripts === "function") {
  importScripts("config.js");
}

// Cache key prefixes from CONFIG
const QUICK_SUMMARY_CACHE_PREFIX = CONFIG.CACHE.QUICK_SUMMARY;
const DETAILED_SUMMARY_CACHE_PREFIX = CONFIG.CACHE.DETAILED_SUMMARY;
const CONTENT_CACHE_PREFIX = CONFIG.CACHE.CONTENT;
const CHAT_CACHE_PREFIX = CONFIG.CACHE.CHAT;

/**
 * Per-tab inflight Ollama/API state: one slot for streaming, one for non-stream (chat/suggestions).
 * - New stream aborts everything (user superseded or regenerate).
 * - New non-stream only aborts prior non-stream, never an active stream (avoids races with post-stream work).
 */
const inflightByTab = new Map();

function abortAllInflightForTab(tabId) {
  if (tabId == null) return;
  const e = inflightByTab.get(tabId);
  if (!e) return;
  if (e.stream) e.stream.abort();
  if (e.other) e.other.abort();
  inflightByTab.delete(tabId);
}

function takeStreamSlot(tabId) {
  if (tabId == null) return new AbortController();
  abortAllInflightForTab(tabId);
  const c = new AbortController();
  inflightByTab.set(tabId, { stream: c });
  return c;
}

function releaseStreamSlot(tabId, controller) {
  if (tabId == null) return;
  const e = inflightByTab.get(tabId);
  if (e && e.stream === controller) {
    if (e.other) {
      inflightByTab.set(tabId, { other: e.other });
    } else {
      inflightByTab.delete(tabId);
    }
  }
}

function takeOtherSlot(tabId) {
  if (tabId == null) return new AbortController();
  const e = inflightByTab.get(tabId) || {};
  if (e.other) e.other.abort();
  const c = new AbortController();
  inflightByTab.set(tabId, { ...e, other: c });
  return c;
}

function releaseOtherSlot(tabId, controller) {
  if (tabId == null) return;
  const e = inflightByTab.get(tabId);
  if (e && e.other === controller) {
    if (e.stream) {
      inflightByTab.set(tabId, { stream: e.stream });
    } else {
      inflightByTab.delete(tabId);
    }
  }
}

/** Coalesce tiny stream tokens into fewer runtime messages (easier on SW + popup; less backpressure). */
const CHUNK_BATCH_MAX_CHARS = 512;
const CHUNK_BATCH_MS = 20;

let streamChunkBuffer = "";
let streamChunkTimer = null;

function resetStreamChunkBatching() {
  if (streamChunkTimer) {
    clearTimeout(streamChunkTimer);
    streamChunkTimer = null;
  }
  streamChunkBuffer = "";
}

function flushStreamChunks() {
  streamChunkTimer = null;
  if (!streamChunkBuffer) return;
  const chunk = streamChunkBuffer;
  streamChunkBuffer = "";
  chrome.runtime
    .sendMessage({ action: "streamChunk", chunk, done: false })
    .catch(() => {});
}

function queueStreamChunk(piece) {
  streamChunkBuffer += piece;
  if (streamChunkBuffer.length >= CHUNK_BATCH_MAX_CHARS) {
    if (streamChunkTimer) {
      clearTimeout(streamChunkTimer);
      streamChunkTimer = null;
    }
    flushStreamChunks();
  } else if (!streamChunkTimer) {
    streamChunkTimer = setTimeout(flushStreamChunks, CHUNK_BATCH_MS);
  }
}

function finalizeStreamChunkBatching() {
  if (streamChunkTimer) {
    clearTimeout(streamChunkTimer);
    streamChunkTimer = null;
  }
  flushStreamChunks();
}

/** Ollama /api/generate keep_alive — null means omit so the server uses its default. */
function normalizeOllamaKeepAlive(keepAlive) {
  if (keepAlive == null) return null;
  const s = String(keepAlive).trim();
  return s || null;
}

function attachOllamaKeepAlive(requestBody, keepAlive) {
  const v = normalizeOllamaKeepAlive(keepAlive);
  if (v) requestBody.keep_alive = v;
}

// ── Prompt templates from CONFIG ─────────────────────────────────────────
const OLLAMA_CONTEXT_TEMPLATE = CONFIG.OLLAMA.CONTEXT_TEMPLATE;

chrome.runtime.onInstalled.addListener(() => {
  // Set default settings only if they don't already exist
  chrome.storage.sync.get(["apiMode"]).then((result) => {
    if (!result.apiMode) {
      // Settings don't exist yet, set defaults from CONFIG
      chrome.storage.sync.set({
        apiMode: CONFIG.API.MODE,
        apiBaseUrl: CONFIG.API.BASE_URL,
        model: CONFIG.API.MODEL,
        apiKey: CONFIG.API.KEY,
        disableThinking: CONFIG.API.DISABLE_THINKING,
        autoSummarize: CONFIG.API.AUTO_SUMMARIZE,
        keepAlive: CONFIG.API.KEEP_ALIVE,
      });
    }
  });

  // Create context menu item
  chrome.contextMenus.create({
    id: "summarize-page",
    title: "Open Lede for this page",
    contexts: ["page", "selection"],
  });
});

// Clear cache when a tab is closed
chrome.tabs.onRemoved.addListener((tabId) => {
  clearTabCache(tabId);
});

// Clear cache when a tab navigates to a new URL
chrome.tabs.onUpdated.addListener((tabId, changeInfo, tab) => {
  if (changeInfo.url) {
    // URL changed, clear the cache for this tab
    clearTabCache(tabId);
  }
});

async function clearTabCache(tabId) {
  try {
    await chrome.storage.session.remove([
      QUICK_SUMMARY_CACHE_PREFIX + tabId,
      DETAILED_SUMMARY_CACHE_PREFIX + tabId,
      CONTENT_CACHE_PREFIX + tabId,
      CHAT_CACHE_PREFIX + tabId,
      CONFIG.CACHE.SUGGESTIONS + tabId,
    ]);
  } catch (e) {
    console.error("[Lede] Error clearing cache:", e);
  }
}

// Handle context menu clicks
chrome.contextMenus.onClicked.addListener((info, tab) => {
  if (info.menuItemId === "summarize-page") {
    triggerSummarizeForTab(tab.id);
  }
});

// Handle keyboard shortcut (manifest command + optional legacy alias)
chrome.commands.onCommand.addListener((command) => {
  if (command === "summarize-page" || command === "open-summarizer") {
    chrome.tabs.query({ active: true, currentWindow: true }).then((tabs) => {
      if (tabs[0]) {
        triggerSummarizeForTab(tabs[0].id);
      }
    });
  }
});

function triggerSummarizeForTab(tabId) {
  // Store a flag to trigger summarize when popup opens
  chrome.storage.session.set({ triggerSummarize: true, targetTabId: tabId });

  // Firefox: Create a popup window
  // Chrome: Use action.openPopup() for toolbar popup
  if (typeof browser !== "undefined") {
    // Firefox: Create a popup window matching the UI size (extra height for browser chrome)
    chrome.windows.create({
      url: chrome.runtime.getURL("popup/popup.html"),
      type: "popup",
      width: 400,
      height: 600,
      focused: true,
    });
  } else {
    // Chrome: Programmatically open the popup
    chrome.action.openPopup();
  }
}

chrome.runtime.onMessage.addListener((request, sender, sendResponse) => {
  if (request.action === "ping") {
    sendResponse({ success: true, message: "pong" });
    return true;
  }

  if (request.action === "extractPageContent") {
    extractPageContentForTab(request.tabId)
      .then((result) => sendResponse({ success: true, ...result }))
      .catch((error) => {
        sendResponse({
          success: false,
          error: error.message || "Failed to extract page content",
        });
      });
    return true;
  }

  if (request.action === "chat") {
    handleChatRequest(request.data)
      .then((response) => {
        sendResponse({ success: true, data: response });
      })
      .catch((error) => {
        console.error("Background script error:", error);
        sendResponse({
          success: false,
          error: error.message || "Unknown error occurred",
        });
      });
    return true; // Keep channel open for async
  }

  if (request.action === "streamChat") {
    const { tabId } = request;
    handleStreamChatRequest(request.data, tabId).catch((error) => {
      console.error("Stream chat error:", error);
      sendStreamDoneToExtension({ error: error.message });
    });
    return false; // Popup receives streamChunk/streamDone via runtime messages
  }

  if (request.action === "cancelStream") {
    const tabId = request.tabId;
    if (tabId != null) {
      abortAllInflightForTab(tabId);
    }
    return false;
  }

  if (request.action === "testOllama") {
    testOllamaConnection()
      .then(() => sendResponse({ success: true }))
      .catch((err) => sendResponse({ success: false, error: err.message }));
    return true;
  }
});

function isRestrictedUrl(url) {
  return (
    !url ||
    url.startsWith("chrome://") ||
    url.startsWith("chrome-extension://") ||
    url.startsWith("edge://") ||
    url.startsWith("about:") ||
    url.startsWith("moz-extension://") ||
    url.startsWith("resource://")
  );
}

async function extractPageContentForTab(tabId) {
  if (!tabId) {
    return { content: "", wasTruncated: false };
  }

  const tab = await chrome.tabs.get(tabId);
  if (isRestrictedUrl(tab.url)) {
    return { content: "", wasTruncated: false };
  }

  // Readability must load before content.js (content.js is not bundled with it).
  // reddit-new.js registers window.__webaiTryRedditNew for new-Reddit thread extraction.
  await chrome.scripting.executeScript({
    target: { tabId },
    files: ["scripts/Readability.js", "scripts/reddit-new.js", "scripts/content.js"],
  });

  const response = await chrome.tabs.sendMessage(tabId, { action: "extract" });
  if (!response) {
    return { content: "", wasTruncated: false };
  }

  return {
    content: response.content ?? "",
    wasTruncated: Boolean(response.wasTruncated),
    extractionSource: response.extractionSource ?? "unknown",
    unsupportedReason: response.unsupportedReason ?? null,
  };
}

async function testOllamaConnection() {
  const response = await fetch("http://localhost:11434/api/tags");
  if (!response.ok) {
    throw new Error(`HTTP ${response.status}`);
  }
  const data = await response.json();
  return data;
}

/**
 * OpenAI-compatible APIs differ: `message.content` may be a string, a parts array (Responses / some
 * gateways), or text may appear on `choices[0].text` or `output_text`.
 */
function normalizeMessageContent(content) {
  if (content == null) return "";
  if (typeof content === "string") return content;
  if (Array.isArray(content)) {
    let out = "";
    for (const part of content) {
      if (typeof part === "string") {
        out += part;
      } else if (part && typeof part === "object") {
        if (typeof part.text === "string") out += part.text;
        else if (typeof part.content === "string") out += part.content;
      }
    }
    return out;
  }
  return "";
}

function extractOpenAIChatCompletionText(data) {
  if (!data || typeof data !== "object") return "";
  const c0 = data.choices?.[0];
  if (!c0) return "";
  const fromMsg = normalizeMessageContent(c0.message?.content);
  if (fromMsg) return fromMsg;
  if (typeof c0.text === "string" && c0.text) return c0.text;
  if (typeof data.output_text === "string" && data.output_text) return data.output_text;
  return "";
}

function normalizeOpenAIChatResponse(data) {
  const merged = extractOpenAIChatCompletionText(data);
  if (!merged || !data?.choices?.[0]) return data;
  const first = data.choices[0];
  return {
    ...data,
    choices: [
      {
        ...first,
        message: {
          ...(first.message || {}),
          role: first.message?.role || "assistant",
          content: merged,
        },
      },
      ...data.choices.slice(1),
    ],
  };
}

/** Extract one streaming text fragment from an SSE JSON payload (delta.content string or parts array). */
function extractOpenAIStreamDeltaChunk(parsed) {
  if (!parsed || typeof parsed !== "object") return "";
  const c0 = parsed.choices?.[0];
  if (!c0) return "";

  const delta = c0.delta;
  if (delta && typeof delta === "object") {
    const fromDelta = normalizeMessageContent(delta.content);
    if (fromDelta) return fromDelta;
    if (typeof delta.reasoning_content === "string" && delta.reasoning_content) {
      return delta.reasoning_content;
    }
  }

  if (typeof c0.text === "string" && c0.text) return c0.text;

  if (c0.message?.content) {
    const t = normalizeMessageContent(c0.message.content);
    if (t) return t;
  }

  return "";
}

/**
 * Resolve API base URL to POST /.../chat/completions once. Avoids broken URLs when the user pastes
 * a full endpoint, uses Azure (?api-version=…), or Google-style /v1beta/openai (substring "v1"
 * must not trigger a bogus extra /v1 segment).
 */
function resolveOpenAICompatChatUrl(raw) {
  const trimmed = String(raw || "").trim();
  if (!trimmed) return trimmed;
  try {
    const u = new URL(trimmed);
    let path = (u.pathname || "").replace(/\/+$/, "") || "";

    if (/\/chat\/completions$/i.test(path)) {
      return u.toString();
    }

    if (/\/openai\/deployments\/[^/]+$/i.test(path)) {
      u.pathname = `${path}/chat/completions`;
      return u.toString();
    }

    if (/\/v1$/i.test(path)) {
      u.pathname = `${path}/chat/completions`;
      return u.toString();
    }

    if (/\/v1beta\/openai$/i.test(path)) {
      u.pathname = `${path}/chat/completions`;
      return u.toString();
    }

    if (!/\bv1\b/i.test(path)) {
      const base = path || "";
      u.pathname = `${base}/v1/chat/completions`.replace(/\/{2,}/g, "/");
      return u.toString();
    }

    u.pathname = `${path}/chat/completions`.replace(/\/{2,}/g, "/");
    return u.toString();
  } catch {
    return trimmed;
  }
}

function openAICompatFetchHeaders(apiKey, urlString) {
  const headers = { "Content-Type": "application/json" };
  if (!apiKey) return headers;
  try {
    const host = new URL(urlString).hostname;
    if (/\.openai\.azure\.com$/i.test(host)) {
      headers["api-key"] = apiKey;
    } else {
      headers.Authorization = `Bearer ${apiKey}`;
    }
  } catch {
    headers.Authorization = `Bearer ${apiKey}`;
  }
  return headers;
}

async function handleChatRequest(data) {
  const {
    tabId,
    apiBaseUrl,
    model,
    apiKey,
    messages,
    apiMode,
    disableThinking,
    maxOutputTokens,
    keepAlive,
  } = data;

  const tokenCap =
    typeof maxOutputTokens === "number" && maxOutputTokens > 0
      ? maxOutputTokens
      : CONFIG.API.MAX_TOKENS;

  const controller = takeOtherSlot(tabId);
  const signal = controller.signal;

  try {
    const useNativeOllama = apiMode === "ollama";

    if (useNativeOllama) {
      return await callOllamaNative(
        apiBaseUrl,
        model,
        messages,
        disableThinking,
        tokenCap,
        signal,
        keepAlive,
      );
    } else {
      const raw = await callOpenAICompatible(
        apiBaseUrl,
        model,
        apiKey,
        messages,
        tokenCap,
        signal,
      );
      return normalizeOpenAIChatResponse(raw);
    }
  } catch (error) {
    if (error.name === "AbortError" || signal.aborted) {
      throw new Error("Request cancelled");
    }
    throw error;
  } finally {
    releaseOtherSlot(tabId, controller);
  }
}

async function handleStreamChatRequest(data, tabId) {
  const {
    apiBaseUrl,
    model,
    apiKey,
    messages,
    apiMode,
    disableThinking,
    keepAlive,
  } = data;

  const controller = takeStreamSlot(tabId);
  const signal = controller.signal;

  try {
    const useNativeOllama = apiMode === "ollama";

    if (useNativeOllama) {
      await callOllamaNativeStream(
        apiBaseUrl,
        model,
        messages,
        disableThinking,
        signal,
        keepAlive,
      );
    } else {
      await callOpenAICompatibleStream(
        apiBaseUrl,
        model,
        apiKey,
        messages,
        signal,
      );
    }
  } finally {
    releaseStreamSlot(tabId, controller);
  }
}

async function callOllamaNative(
  baseUrl,
  model,
  messages,
  disableThinking,
  maxTokens = CONFIG.API.MAX_TOKENS,
  signal,
  keepAlive,
) {
  // Merge all system messages into one so none are dropped
  const systemMsgs = messages.filter((m) => m.role === "system");
  const systemContent = systemMsgs.map((m) => m.content).join("\n\n");
  const otherMessages = messages.filter((m) => m.role !== "system");
  const lastUserMsg = otherMessages.filter((m) => m.role === "user").pop();

  // Build conversation context
  let prompt;
  if (otherMessages.length > 1) {
    const context = otherMessages
      .slice(0, -1)
      .map((m) => `${m.role}: ${m.content}`)
      .join("\n");
    prompt = OLLAMA_CONTEXT_TEMPLATE.replace("${context}", context).replace(
      "${userMessage}",
      lastUserMsg?.content || "",
    );
  } else {
    prompt = lastUserMsg?.content || "";
  }

  const url = baseUrl.replace(/\/$/, "") + "/api/generate";

  // Build request body
  const requestBody = {
    model: model,
    prompt: prompt,
    system: systemContent,
    stream: false,
    options: {
      temperature: CONFIG.API.TEMPERATURE,
      num_predict: maxTokens,
    },
  };

  // Only include think: false when user explicitly disables thinking
  // (thinking is enabled by default in Ollama, and not all models support it)
  if (disableThinking === true) {
    requestBody.think = false;
  }

  attachOllamaKeepAlive(requestBody, keepAlive);

  const fetchOpts = {
    method: "POST",
    headers: {
      "Content-Type": "application/json",
      Connection: "close",
    },
    body: JSON.stringify(requestBody),
  };
  if (signal) fetchOpts.signal = signal;

  const response = await fetch(url, fetchOpts);

  if (!response.ok) {
    const text = await response.text();
    let errorMsg = `HTTP ${response.status}`;
    if (response.status === 403) {
      errorMsg =
        "403 Forbidden. Ollama is rejecting the request origin. Fix: restart Ollama with OLLAMA_ORIGINS=* (e.g. OLLAMA_ORIGINS=* ollama serve).";
    } else {
      try {
        const err = JSON.parse(text);
        errorMsg = err.error || err.message || errorMsg;
      } catch (e) {
        errorMsg = text || errorMsg;
      }
    }
    throw new Error(errorMsg);
  }

  const data = await response.json();

  return {
    choices: [
      {
        message: {
          role: "assistant",
          content: data.response,
        },
      },
    ],
    model: model,
  };
}

async function callOpenAICompatible(
  baseUrl,
  model,
  apiKey,
  messages,
  maxTokens = CONFIG.API.MAX_TOKENS,
  signal,
) {
  const url = resolveOpenAICompatChatUrl(baseUrl);

  const fetchOpts = {
    method: "POST",
    headers: openAICompatFetchHeaders(apiKey, url),
    body: JSON.stringify({
      model: model,
      messages: messages,
      stream: false,
      max_tokens: maxTokens,
    }),
  };
  if (signal) fetchOpts.signal = signal;

  const response = await fetch(url, fetchOpts);

  if (!response.ok) {
    const text = await response.text();
    let errorMsg = `HTTP ${response.status}`;

    if (response.status === 403) {
      if (url.includes("/v1")) {
        errorMsg =
          "403 Forbidden. This often means: invalid API key, API key lacks permissions, or the server rejected the request origin.";
      } else {
        errorMsg =
          "403 Forbidden. If using Ollama, ensure it's running with: ollama serve";
      }
    } else if (response.status === 405) {
      errorMsg =
        "405 Method not allowed. Check if the API URL is correct for your API mode (Native vs OpenAI-compatible).";
    } else {
      try {
        const err = JSON.parse(text);
        errorMsg = err.error?.message || err.message || errorMsg;
      } catch (e) {
        errorMsg = text || errorMsg;
      }
    }

    throw new Error(errorMsg);
  }

  return await response.json();
}

function sendStreamDoneToExtension(payload = {}) {
  if (payload.error) {
    resetStreamChunkBatching();
  } else {
    finalizeStreamChunkBatching();
  }
  chrome.runtime.sendMessage({ action: "streamDone", ...payload }).catch(() => {});
}

async function callOllamaNativeStream(
  baseUrl,
  model,
  messages,
  disableThinking,
  signal,
  keepAlive,
) {
  const systemMsgs = messages.filter((m) => m.role === "system");
  const systemContent = systemMsgs.map((m) => m.content).join("\n\n");
  const otherMessages = messages.filter((m) => m.role !== "system");
  const lastUserMsg = otherMessages.filter((m) => m.role === "user").pop();

  let prompt;
  if (otherMessages.length > 1) {
    const context = otherMessages
      .slice(0, -1)
      .map((m) => `${m.role}: ${m.content}`)
      .join("\n");
    prompt = OLLAMA_CONTEXT_TEMPLATE.replace("${context}", context).replace(
      "${userMessage}",
      lastUserMsg?.content || "",
    );
  } else {
    prompt = lastUserMsg?.content || "";
  }

  const url = baseUrl.replace(/\/$/, "") + "/api/generate";

  const requestBody = {
    model: model,
    prompt: prompt,
    system: systemContent,
    stream: true,
    options: {
      temperature: CONFIG.API.TEMPERATURE,
      num_predict: CONFIG.API.MAX_TOKENS,
    },
  };

  if (disableThinking === true) {
    requestBody.think = false;
  }

  attachOllamaKeepAlive(requestBody, keepAlive);

  resetStreamChunkBatching();
  let hitMaxChars = false;

  try {
    const response = await fetch(url, {
      method: "POST",
      headers: {
        "Content-Type": "application/json",
        Connection: "close",
      },
      body: JSON.stringify(requestBody),
      signal,
    });

    if (!response.ok) {
      const text = await response.text();
      let errorMsg = `HTTP ${response.status}`;
      if (response.status === 403) {
        errorMsg =
          "403 Forbidden. Ollama is rejecting the request origin. Fix: restart Ollama with OLLAMA_ORIGINS=* (e.g. OLLAMA_ORIGINS=* ollama serve).";
      } else {
        try {
          const err = JSON.parse(text);
          errorMsg = err.error || err.message || errorMsg;
        } catch (e) {
          errorMsg = text || errorMsg;
        }
      }
      throw new Error(errorMsg);
    }

    const reader = response.body.getReader();
    try {
      const decoder = new TextDecoder();
      let buffer = "";
      let streamedChars = 0;
      const maxChars = CONFIG.API.STREAM_MAX_OUTPUT_CHARS;

      while (true) {
        let readResult;
        try {
          readResult = await reader.read();
        } catch (readErr) {
          if (readErr.name === "AbortError" || signal.aborted) {
            sendStreamDoneToExtension({ cancelled: true });
            return;
          }
          throw readErr;
        }

        const { done, value } = readResult;
        if (done) break;

        buffer += decoder.decode(value, { stream: true });
        const lines = buffer.split("\n");
        buffer = lines.pop() || "";

        for (const line of lines) {
          if (line.trim()) {
            try {
              const json = JSON.parse(line);
              if (json.response) {
                const piece = json.response;
                if (streamedChars + piece.length > maxChars) {
                  hitMaxChars = true;
                  reader.cancel().catch(() => {});
                  break;
                }
                streamedChars += piece.length;
                queueStreamChunk(piece);
              }
            } catch (e) {
              // Skip invalid JSON lines
            }
          }
        }

        if (hitMaxChars) break;
      }

      if (hitMaxChars) {
        sendStreamDoneToExtension({ truncated: true });
      } else {
        sendStreamDoneToExtension();
      }
    } finally {
      try {
        await reader.cancel();
      } catch (e) {
        /* stream may already be closed */
      }
    }
  } catch (error) {
    if (error.name === "AbortError" || signal.aborted) {
      sendStreamDoneToExtension({ cancelled: true });
      return;
    }
    sendStreamDoneToExtension({ error: error.message });
  }
}

async function callOpenAICompatibleStream(
  baseUrl,
  model,
  apiKey,
  messages,
  signal,
) {
  const url = resolveOpenAICompatChatUrl(baseUrl);

  resetStreamChunkBatching();
  let hitMaxChars = false;

  try {
    const response = await fetch(url, {
      method: "POST",
      headers: openAICompatFetchHeaders(apiKey, url),
      body: JSON.stringify({
        model: model,
        messages: messages,
        stream: true,
        max_tokens: CONFIG.API.MAX_TOKENS,
      }),
      signal,
    });

    if (!response.ok) {
      const text = await response.text();
      let errorMsg = `HTTP ${response.status}`;

      if (response.status === 403) {
        if (url.includes("/v1")) {
          errorMsg =
            "403 Forbidden. This often means: invalid API key, API key lacks permissions, or the server rejected the request origin.";
        } else {
          errorMsg =
            "403 Forbidden. If using Ollama, ensure it's running with: ollama serve";
        }
      } else if (response.status === 405) {
        errorMsg =
          "405 Method not allowed. Check if the API URL is correct for your API mode (Native vs OpenAI-compatible).";
      } else {
        try {
          const err = JSON.parse(text);
          errorMsg = err.error?.message || err.message || errorMsg;
        } catch (e) {
          errorMsg = text || errorMsg;
        }
      }
      throw new Error(errorMsg);
    }

    const reader = response.body.getReader();
    try {
      const decoder = new TextDecoder();
      let buffer = "";
      let streamedChars = 0;
      const maxChars = CONFIG.API.STREAM_MAX_OUTPUT_CHARS;

      while (true) {
        let readResult;
        try {
          readResult = await reader.read();
        } catch (readErr) {
          if (readErr.name === "AbortError" || signal.aborted) {
            sendStreamDoneToExtension({ cancelled: true });
            return;
          }
          throw readErr;
        }

        const { done, value } = readResult;
        if (done) break;

        buffer += decoder.decode(value, { stream: true });
        const lines = buffer.split("\n");
        buffer = lines.pop() || "";

        for (const line of lines) {
          const trimmed = line.trim();
          if (!trimmed) continue;
          let payload = null;
          if (trimmed.startsWith("data:")) {
            payload = trimmed.slice(5).replace(/^\uFEFF/, "").trimStart();
            if (payload === "[DONE]") continue;
          } else if (trimmed.startsWith("{") && /"choices"\s*:/i.test(trimmed)) {
            // Some OpenAI-compatible proxies stream NDJSON without a `data:` prefix.
            payload = trimmed;
          } else {
            continue;
          }
          try {
            const json = JSON.parse(payload);
            const content = extractOpenAIStreamDeltaChunk(json);
            if (content) {
              if (streamedChars + content.length > maxChars) {
                hitMaxChars = true;
                reader.cancel().catch(() => {});
                break;
              }
              streamedChars += content.length;
              queueStreamChunk(content);
            }
          } catch (e) {
            // Skip invalid JSON lines
          }
        }

        if (hitMaxChars) break;
      }

      if (hitMaxChars) {
        sendStreamDoneToExtension({ truncated: true });
      } else {
        sendStreamDoneToExtension();
      }
    } finally {
      try {
        await reader.cancel();
      } catch (e) {
        /* stream may already be closed */
      }
    }
  } catch (error) {
    if (error.name === "AbortError" || signal.aborted) {
      sendStreamDoneToExtension({ cancelled: true });
      return;
    }
    sendStreamDoneToExtension({ error: error.message });
  }
}