import { feature } from 'bun:bundle' import type { Anthropic } from '@anthropic-ai/sdk' import { getSystemPrompt, SYSTEM_PROMPT_DYNAMIC_BOUNDARY, } from 'src/constants/prompts.js' import { microcompactMessages } from 'src/services/compact/microCompact.js' import { getSdkBetas } from '../bootstrap/state.js' import { getCommandName } from '../commands.js' import { getSystemContext } from '../context.js' import { getFeatureValue_CACHED_MAY_BE_STALE } from '../services/analytics/growthbook.js' import { AUTOCOMPACT_BUFFER_TOKENS, getEffectiveContextWindowSize, isAutoCompactEnabled, MANUAL_COMPACT_BUFFER_TOKENS, } from '../services/compact/autoCompact.js' import { countMessagesTokensWithAPI, countTokensViaHaikuFallback, roughTokenCountEstimation, } from '../services/tokenEstimation.js' import { estimateSkillFrontmatterTokens } from '../skills/loadSkillsDir.js' import { findToolByName, type Tool, type ToolPermissionContext, type Tools, type ToolUseContext, toolMatchesName, } from '../Tool.js' import type { AgentDefinition, AgentDefinitionsResult, } from '../tools/AgentTool/loadAgentsDir.js' import { SKILL_TOOL_NAME } from '../tools/SkillTool/constants.js' import { getLimitedSkillToolCommands, getSkillToolInfo as getSlashCommandInfo, } from '../tools/SkillTool/prompt.js' import type { AssistantMessage, AttachmentMessage, Message, NormalizedAssistantMessage, NormalizedUserMessage, UserMessage, } from '../types/message.js' import { toolToAPISchema } from './api.js' import { filterInjectedMemoryFiles, getMemoryFiles } from './claudemd.js' import { getContextWindowForModel } from './context.js' import { getCwd } from './cwd.js' import { logForDebugging } from './debug.js' import { isEnvTruthy } from './envUtils.js' import { errorMessage, toError } from './errors.js' import { logError } from './log.js' import { normalizeMessagesForAPI } from './messages.js' import { getRuntimeMainLoopModel } from './model/model.js' import type { SettingSource } from './settings/constants.js' import { jsonStringify } from './slowOperations.js' import { buildEffectiveSystemPrompt } from './systemPrompt.js' import type { Theme } from './theme.js' import { getCurrentUsage } from './tokens.js' const RESERVED_CATEGORY_NAME = 'Autocompact buffer' const MANUAL_COMPACT_BUFFER_NAME = 'Compact buffer' /** * Fixed token overhead added by the API when tools are present. * The API adds a tool prompt preamble (~500 tokens) once per API call when tools are present. * When we count tools individually via the token counting API, each call includes this overhead, * leading to N × overhead instead of 1 × overhead for N tools. * We subtract this overhead from per-tool counts to show accurate tool content sizes. */ export const TOOL_TOKEN_COUNT_OVERHEAD = 500 async function countTokensWithFallback( messages: Anthropic.Beta.Messages.BetaMessageParam[], tools: Anthropic.Beta.Messages.BetaToolUnion[], ): Promise { try { const result = await countMessagesTokensWithAPI(messages, tools) if (result !== null) { return result } logForDebugging( `countTokensWithFallback: API returned null, trying haiku fallback (${tools.length} tools)`, ) } catch (err) { logForDebugging(`countTokensWithFallback: API failed: ${errorMessage(err)}`) logError(err) } try { const fallbackResult = await countTokensViaHaikuFallback(messages, tools) if (fallbackResult === null) { logForDebugging( `countTokensWithFallback: haiku fallback also returned null (${tools.length} tools)`, ) } return fallbackResult } catch (err) { logForDebugging( `countTokensWithFallback: haiku fallback failed: ${errorMessage(err)}`, ) logError(err) return null } } interface ContextCategory { name: string tokens: number color: keyof Theme /** When true, these tokens are deferred and don't count toward context usage */ isDeferred?: boolean } interface GridSquare { color: keyof Theme isFilled: boolean categoryName: string tokens: number percentage: number squareFullness: number // 0-1 representing how full this individual square is } interface MemoryFile { path: string type: string tokens: number } interface McpTool { name: string serverName: string tokens: number isLoaded?: boolean } export interface DeferredBuiltinTool { name: string tokens: number isLoaded: boolean } export interface SystemToolDetail { name: string tokens: number } export interface SystemPromptSectionDetail { name: string tokens: number } interface Agent { agentType: string source: SettingSource | 'built-in' | 'plugin' tokens: number } interface SlashCommandInfo { readonly totalCommands: number readonly includedCommands: number readonly tokens: number } /** Individual skill detail for context display */ interface SkillFrontmatter { name: string source: SettingSource | 'plugin' tokens: number } /** * Information about skills included in the context window. */ interface SkillInfo { /** Total number of available skills */ readonly totalSkills: number /** Number of skills included within token budget */ readonly includedSkills: number /** Total tokens consumed by skills */ readonly tokens: number /** Individual skill details */ readonly skillFrontmatter: SkillFrontmatter[] } export interface ContextData { readonly categories: ContextCategory[] readonly totalTokens: number readonly maxTokens: number readonly rawMaxTokens: number readonly percentage: number readonly gridRows: GridSquare[][] readonly model: string readonly memoryFiles: MemoryFile[] readonly mcpTools: McpTool[] /** Ant-only: per-tool breakdown of deferred built-in tools */ readonly deferredBuiltinTools?: DeferredBuiltinTool[] /** Ant-only: per-tool breakdown of always-loaded built-in tools */ readonly systemTools?: SystemToolDetail[] /** Ant-only: per-section breakdown of system prompt */ readonly systemPromptSections?: SystemPromptSectionDetail[] readonly agents: Agent[] readonly slashCommands?: SlashCommandInfo /** Skill statistics */ readonly skills?: SkillInfo readonly autoCompactThreshold?: number readonly isAutoCompactEnabled: boolean messageBreakdown?: { toolCallTokens: number toolResultTokens: number attachmentTokens: number assistantMessageTokens: number userMessageTokens: number toolCallsByType: Array<{ name: string callTokens: number resultTokens: number }> attachmentsByType: Array<{ name: string; tokens: number }> } /** Actual token usage from last API response (if available) */ readonly apiUsage: { input_tokens: number output_tokens: number cache_creation_input_tokens: number cache_read_input_tokens: number } | null } export async function countToolDefinitionTokens( tools: Tools, getToolPermissionContext: () => Promise, agentInfo: AgentDefinitionsResult | null, model?: string, ): Promise { const toolSchemas = await Promise.all( tools.map(tool => toolToAPISchema(tool, { getToolPermissionContext, tools, agents: agentInfo?.activeAgents ?? [], model, }), ), ) const result = await countTokensWithFallback([], toolSchemas) if (result === null || result === 0) { const toolNames = tools.map(t => t.name).join(', ') logForDebugging( `countToolDefinitionTokens returned ${result} for ${tools.length} tools: ${toolNames.slice(0, 100)}${toolNames.length > 100 ? '...' : ''}`, ) } return result ?? 0 } /** Extract a human-readable name from a system prompt section's content */ function extractSectionName(content: string): string { // Try to find first markdown heading const headingMatch = content.match(/^#+\s+(.+)$/m) if (headingMatch) { return headingMatch[1]!.trim() } // Fall back to a truncated preview of the first non-empty line const firstLine = content.split('\n').find(l => l.trim().length > 0) ?? '' return firstLine.length > 40 ? firstLine.slice(0, 40) + '…' : firstLine } async function countSystemTokens( effectiveSystemPrompt: readonly string[], ): Promise<{ systemPromptTokens: number systemPromptSections: SystemPromptSectionDetail[] }> { // Get system context (gitStatus, etc.) which is always included const systemContext = await getSystemContext() // Build named entries: system prompt parts + system context values // Skip empty strings and the global-cache boundary marker const namedEntries: Array<{ name: string; content: string }> = [ ...effectiveSystemPrompt .filter( content => content.length > 0 && content !== SYSTEM_PROMPT_DYNAMIC_BOUNDARY, ) .map(content => ({ name: extractSectionName(content), content })), ...Object.entries(systemContext) .filter(([, content]) => content.length > 0) .map(([name, content]) => ({ name, content })), ] if (namedEntries.length < 1) { return { systemPromptTokens: 0, systemPromptSections: [] } } const systemTokenCounts = await Promise.all( namedEntries.map(({ content }) => countTokensWithFallback([{ role: 'user', content }], []), ), ) const systemPromptSections: SystemPromptSectionDetail[] = namedEntries.map( (entry, i) => ({ name: entry.name, tokens: systemTokenCounts[i] || 0, }), ) const systemPromptTokens = systemTokenCounts.reduce( (sum: number, tokens) => sum + (tokens || 0), 0, ) return { systemPromptTokens, systemPromptSections } } async function countMemoryFileTokens(): Promise<{ memoryFileDetails: MemoryFile[] claudeMdTokens: number }> { // Simple mode disables CLAUDE.md loading, so don't report tokens for them if (isEnvTruthy(process.env.CLAUDE_CODE_SIMPLE)) { return { memoryFileDetails: [], claudeMdTokens: 0 } } const memoryFilesData = filterInjectedMemoryFiles(await getMemoryFiles()) const memoryFileDetails: MemoryFile[] = [] let claudeMdTokens = 0 if (memoryFilesData.length < 1) { return { memoryFileDetails: [], claudeMdTokens: 0, } } const claudeMdTokenCounts = await Promise.all( memoryFilesData.map(async file => { const tokens = await countTokensWithFallback( [{ role: 'user', content: file.content }], [], ) return { file, tokens: tokens || 0 } }), ) for (const { file, tokens } of claudeMdTokenCounts) { claudeMdTokens += tokens memoryFileDetails.push({ path: file.path, type: file.type, tokens, }) } return { claudeMdTokens, memoryFileDetails } } async function countBuiltInToolTokens( tools: Tools, getToolPermissionContext: () => Promise, agentInfo: AgentDefinitionsResult | null, model?: string, messages?: Message[], ): Promise<{ builtInToolTokens: number deferredBuiltinDetails: DeferredBuiltinTool[] deferredBuiltinTokens: number systemToolDetails: SystemToolDetail[] }> { const builtInTools = tools.filter(tool => !tool.isMcp) if (builtInTools.length < 1) { return { builtInToolTokens: 0, deferredBuiltinDetails: [], deferredBuiltinTokens: 0, systemToolDetails: [], } } // Check if tool search is enabled const { isToolSearchEnabled } = await import('./toolSearch.js') const { isDeferredTool } = await import('../tools/ToolSearchTool/prompt.js') const isDeferred = await isToolSearchEnabled( model ?? '', tools, getToolPermissionContext, agentInfo?.activeAgents ?? [], 'analyzeBuiltIn', ) // Separate always-loaded and deferred builtin tools using dynamic isDeferredTool check const alwaysLoadedTools = builtInTools.filter(t => !isDeferredTool(t)) const deferredBuiltinTools = builtInTools.filter(t => isDeferredTool(t)) // Count always-loaded tools const alwaysLoadedTokens = alwaysLoadedTools.length > 0 ? await countToolDefinitionTokens( alwaysLoadedTools, getToolPermissionContext, agentInfo, model, ) : 0 // Build per-tool breakdown for always-loaded tools (ant-only, proportional // split of the bulk count based on rough schema size estimation). Excludes // SkillTool since its tokens are shown in the separate Skills category. let systemToolDetails: SystemToolDetail[] = [] if (process.env.USER_TYPE === 'ant') { const toolsForBreakdown = alwaysLoadedTools.filter( t => !toolMatchesName(t, SKILL_TOOL_NAME), ) if (toolsForBreakdown.length > 0) { const estimates = toolsForBreakdown.map(t => roughTokenCountEstimation(jsonStringify(t.inputSchema ?? {})), ) const estimateTotal = estimates.reduce((s, e) => s + e, 0) || 1 const distributable = Math.max( 0, alwaysLoadedTokens - TOOL_TOKEN_COUNT_OVERHEAD, ) systemToolDetails = toolsForBreakdown .map((t, i) => ({ name: t.name, tokens: Math.round((estimates[i]! / estimateTotal) * distributable), })) .sort((a, b) => b.tokens - a.tokens) } } // Count deferred builtin tools individually for details const deferredBuiltinDetails: DeferredBuiltinTool[] = [] let loadedDeferredTokens = 0 let totalDeferredTokens = 0 if (deferredBuiltinTools.length > 0 && isDeferred) { // Find which deferred tools have been used in messages const loadedToolNames = new Set() if (messages) { const deferredToolNameSet = new Set(deferredBuiltinTools.map(t => t.name)) for (const msg of messages) { if (msg.type === 'assistant') { for (const block of msg.message.content) { if ( 'type' in block && block.type === 'tool_use' && 'name' in block && typeof block.name === 'string' && deferredToolNameSet.has(block.name) ) { loadedToolNames.add(block.name) } } } } } // Count each deferred tool const tokensByTool = await Promise.all( deferredBuiltinTools.map(t => countToolDefinitionTokens( [t], getToolPermissionContext, agentInfo, model, ), ), ) for (const [i, tool] of deferredBuiltinTools.entries()) { const tokens = Math.max( 0, (tokensByTool[i] || 0) - TOOL_TOKEN_COUNT_OVERHEAD, ) const isLoaded = loadedToolNames.has(tool.name) deferredBuiltinDetails.push({ name: tool.name, tokens, isLoaded, }) totalDeferredTokens += tokens if (isLoaded) { loadedDeferredTokens += tokens } } } else if (deferredBuiltinTools.length > 0) { // Tool search not enabled - count deferred tools as regular const deferredTokens = await countToolDefinitionTokens( deferredBuiltinTools, getToolPermissionContext, agentInfo, model, ) return { builtInToolTokens: alwaysLoadedTokens + deferredTokens, deferredBuiltinDetails: [], deferredBuiltinTokens: 0, systemToolDetails, } } return { // When deferred, only count always-loaded tools + any loaded deferred tools builtInToolTokens: alwaysLoadedTokens + loadedDeferredTokens, deferredBuiltinDetails, deferredBuiltinTokens: totalDeferredTokens - loadedDeferredTokens, systemToolDetails, } } function findSkillTool(tools: Tools): Tool | undefined { return findToolByName(tools, SKILL_TOOL_NAME) } async function countSlashCommandTokens( tools: Tools, getToolPermissionContext: () => Promise, agentInfo: AgentDefinitionsResult | null, ): Promise<{ slashCommandTokens: number commandInfo: { totalCommands: number; includedCommands: number } }> { const info = await getSlashCommandInfo(getCwd()) const slashCommandTool = findSkillTool(tools) if (!slashCommandTool) { return { slashCommandTokens: 0, commandInfo: { totalCommands: 0, includedCommands: 0 }, } } const slashCommandTokens = await countToolDefinitionTokens( [slashCommandTool], getToolPermissionContext, agentInfo, ) return { slashCommandTokens, commandInfo: { totalCommands: info.totalCommands, includedCommands: info.includedCommands, }, } } async function countSkillTokens( tools: Tools, getToolPermissionContext: () => Promise, agentInfo: AgentDefinitionsResult | null, ): Promise<{ skillTokens: number skillInfo: { totalSkills: number includedSkills: number skillFrontmatter: SkillFrontmatter[] } }> { try { const skills = await getLimitedSkillToolCommands(getCwd()) const slashCommandTool = findSkillTool(tools) if (!slashCommandTool) { return { skillTokens: 0, skillInfo: { totalSkills: 0, includedSkills: 0, skillFrontmatter: [] }, } } // NOTE: This counts the entire SlashCommandTool (which includes both commands AND skills). // This is the same tool counted by countSlashCommandTokens(), but we track it separately // here for display purposes. These tokens should NOT be added to context categories // to avoid double-counting. const skillTokens = await countToolDefinitionTokens( [slashCommandTool], getToolPermissionContext, agentInfo, ) // Calculate per-skill token estimates based on frontmatter only // (name, description, whenToUse) since full content is only loaded on invocation const skillFrontmatter: SkillFrontmatter[] = skills.map(skill => ({ name: getCommandName(skill), source: (skill.type === 'prompt' ? skill.source : 'plugin') as | SettingSource | 'plugin', tokens: estimateSkillFrontmatterTokens(skill), })) return { skillTokens, skillInfo: { totalSkills: skills.length, includedSkills: skills.length, skillFrontmatter, }, } } catch (error) { logError(toError(error)) // Return zero values rather than failing the entire context analysis return { skillTokens: 0, skillInfo: { totalSkills: 0, includedSkills: 0, skillFrontmatter: [] }, } } } export async function countMcpToolTokens( tools: Tools, getToolPermissionContext: () => Promise, agentInfo: AgentDefinitionsResult | null, model: string, messages?: Message[], ): Promise<{ mcpToolTokens: number mcpToolDetails: McpTool[] deferredToolTokens: number loadedMcpToolNames: Set }> { const mcpTools = tools.filter(tool => tool.isMcp) const mcpToolDetails: McpTool[] = [] // Single bulk API call for all MCP tools (instead of N individual calls) const totalTokensRaw = await countToolDefinitionTokens( mcpTools, getToolPermissionContext, agentInfo, model, ) // Subtract the single overhead since we made one bulk call const totalTokens = Math.max( 0, (totalTokensRaw || 0) - TOOL_TOKEN_COUNT_OVERHEAD, ) // Estimate per-tool proportions for display using local estimation. // Include name + description + input schema to match what toolToAPISchema // sends — otherwise tools with similar schemas but different descriptions // get identical counts (MCP tools share the same base Zod inputSchema). const estimates = await Promise.all( mcpTools.map(async t => roughTokenCountEstimation( jsonStringify({ name: t.name, description: await t.prompt({ getToolPermissionContext, tools, agents: agentInfo?.activeAgents ?? [], }), input_schema: t.inputJSONSchema ?? {}, }), ), ), ) const estimateTotal = estimates.reduce((s, e) => s + e, 0) || 1 const mcpToolTokensByTool = estimates.map(e => Math.round((e / estimateTotal) * totalTokens), ) // Check if tool search is enabled - if so, MCP tools are deferred // isToolSearchEnabled handles threshold calculation internally for TstAuto mode const { isToolSearchEnabled } = await import('./toolSearch.js') const { isDeferredTool } = await import('../tools/ToolSearchTool/prompt.js') const isDeferred = await isToolSearchEnabled( model, tools, getToolPermissionContext, agentInfo?.activeAgents ?? [], 'analyzeMcp', ) // Find MCP tools that have been used in messages (loaded via ToolSearchTool) const loadedMcpToolNames = new Set() if (isDeferred && messages) { const mcpToolNameSet = new Set(mcpTools.map(t => t.name)) for (const msg of messages) { if (msg.type === 'assistant') { for (const block of msg.message.content) { if ( 'type' in block && block.type === 'tool_use' && 'name' in block && typeof block.name === 'string' && mcpToolNameSet.has(block.name) ) { loadedMcpToolNames.add(block.name) } } } } } // Build tool details with isLoaded flag for (const [i, tool] of mcpTools.entries()) { mcpToolDetails.push({ name: tool.name, serverName: tool.name.split('__')[1] || 'unknown', tokens: mcpToolTokensByTool[i]!, isLoaded: loadedMcpToolNames.has(tool.name) || !isDeferredTool(tool), }) } // Calculate loaded vs deferred tokens let loadedTokens = 0 let deferredTokens = 0 for (const detail of mcpToolDetails) { if (detail.isLoaded) { loadedTokens += detail.tokens } else if (isDeferred) { deferredTokens += detail.tokens } } return { // When deferred but some tools are loaded, count loaded tokens mcpToolTokens: isDeferred ? loadedTokens : totalTokens, mcpToolDetails, // Track deferred tokens separately for display deferredToolTokens: deferredTokens, loadedMcpToolNames, } } async function countCustomAgentTokens(agentDefinitions: { activeAgents: AgentDefinition[] }): Promise<{ agentTokens: number agentDetails: Agent[] }> { const customAgents = agentDefinitions.activeAgents.filter( a => a.source !== 'built-in', ) const agentDetails: Agent[] = [] let agentTokens = 0 const tokenCounts = await Promise.all( customAgents.map(agent => countTokensWithFallback( [ { role: 'user', content: [agent.agentType, agent.whenToUse].join(' '), }, ], [], ), ), ) for (const [i, agent] of customAgents.entries()) { const tokens = tokenCounts[i] || 0 agentTokens += tokens || 0 agentDetails.push({ agentType: agent.agentType, source: agent.source, tokens: tokens || 0, }) } return { agentTokens, agentDetails } } type MessageBreakdown = { totalTokens: number toolCallTokens: number toolResultTokens: number attachmentTokens: number assistantMessageTokens: number userMessageTokens: number toolCallsByType: Map toolResultsByType: Map attachmentsByType: Map } function processAssistantMessage( msg: AssistantMessage | NormalizedAssistantMessage, breakdown: MessageBreakdown, ): void { // Process each content block individually for (const block of msg.message.content) { const blockStr = jsonStringify(block) const blockTokens = roughTokenCountEstimation(blockStr) if ('type' in block && block.type === 'tool_use') { breakdown.toolCallTokens += blockTokens const toolName = ('name' in block ? block.name : undefined) || 'unknown' breakdown.toolCallsByType.set( toolName, (breakdown.toolCallsByType.get(toolName) || 0) + blockTokens, ) } else { // Text blocks or other non-tool content breakdown.assistantMessageTokens += blockTokens } } } function processUserMessage( msg: UserMessage | NormalizedUserMessage, breakdown: MessageBreakdown, toolUseIdToName: Map, ): void { // Handle both string and array content if (typeof msg.message.content === 'string') { // Simple string content const tokens = roughTokenCountEstimation(msg.message.content) breakdown.userMessageTokens += tokens return } // Process each content block individually for (const block of msg.message.content) { const blockStr = jsonStringify(block) const blockTokens = roughTokenCountEstimation(blockStr) if ('type' in block && block.type === 'tool_result') { breakdown.toolResultTokens += blockTokens const toolUseId = 'tool_use_id' in block ? block.tool_use_id : undefined const toolName = (toolUseId ? toolUseIdToName.get(toolUseId) : undefined) || 'unknown' breakdown.toolResultsByType.set( toolName, (breakdown.toolResultsByType.get(toolName) || 0) + blockTokens, ) } else { // Text blocks or other non-tool content breakdown.userMessageTokens += blockTokens } } } function processAttachment( msg: AttachmentMessage, breakdown: MessageBreakdown, ): void { const contentStr = jsonStringify(msg.attachment) const tokens = roughTokenCountEstimation(contentStr) breakdown.attachmentTokens += tokens const attachType = msg.attachment.type || 'unknown' breakdown.attachmentsByType.set( attachType, (breakdown.attachmentsByType.get(attachType) || 0) + tokens, ) } async function approximateMessageTokens( messages: Message[], ): Promise { const microcompactResult = await microcompactMessages(messages) // Initialize tracking const breakdown: MessageBreakdown = { totalTokens: 0, toolCallTokens: 0, toolResultTokens: 0, attachmentTokens: 0, assistantMessageTokens: 0, userMessageTokens: 0, toolCallsByType: new Map(), toolResultsByType: new Map(), attachmentsByType: new Map(), } // Build a map of tool_use_id to tool_name for easier lookup const toolUseIdToName = new Map() for (const msg of microcompactResult.messages) { if (msg.type === 'assistant') { for (const block of msg.message.content) { if ('type' in block && block.type === 'tool_use') { const toolUseId = 'id' in block ? block.id : undefined const toolName = ('name' in block ? block.name : undefined) || 'unknown' if (toolUseId) { toolUseIdToName.set(toolUseId, toolName) } } } } } // Process each message for detailed breakdown for (const msg of microcompactResult.messages) { if (msg.type === 'assistant') { processAssistantMessage(msg, breakdown) } else if (msg.type === 'user') { processUserMessage(msg, breakdown, toolUseIdToName) } else if (msg.type === 'attachment') { processAttachment(msg, breakdown) } } // Calculate total tokens using the API for accuracy const approximateMessageTokens = await countTokensWithFallback( normalizeMessagesForAPI(microcompactResult.messages).map(_ => { if (_.type === 'assistant') { return { // Important: strip out fields like id, etc. -- the counting API errors if they're present role: 'assistant', content: _.message.content, } } return _.message }), [], ) breakdown.totalTokens = approximateMessageTokens ?? 0 return breakdown } export async function analyzeContextUsage( messages: Message[], model: string, getToolPermissionContext: () => Promise, tools: Tools, agentDefinitions: AgentDefinitionsResult, terminalWidth?: number, toolUseContext?: Pick, mainThreadAgentDefinition?: AgentDefinition, /** Original messages before microcompact, used to extract API usage */ originalMessages?: Message[], ): Promise { const runtimeModel = getRuntimeMainLoopModel({ permissionMode: (await getToolPermissionContext()).mode, mainLoopModel: model, }) // Get context window size const contextWindow = getContextWindowForModel(runtimeModel, getSdkBetas()) // Build the effective system prompt using the shared utility const defaultSystemPrompt = await getSystemPrompt(tools, runtimeModel) const effectiveSystemPrompt = buildEffectiveSystemPrompt({ mainThreadAgentDefinition, toolUseContext: toolUseContext ?? { options: {} as ToolUseContext['options'], }, customSystemPrompt: toolUseContext?.options.customSystemPrompt, defaultSystemPrompt, appendSystemPrompt: toolUseContext?.options.appendSystemPrompt, }) // Critical operations that should not fail due to skills const [ { systemPromptTokens, systemPromptSections }, { claudeMdTokens, memoryFileDetails }, { builtInToolTokens, deferredBuiltinDetails, deferredBuiltinTokens, systemToolDetails, }, { mcpToolTokens, mcpToolDetails, deferredToolTokens }, { agentTokens, agentDetails }, { slashCommandTokens, commandInfo }, messageBreakdown, ] = await Promise.all([ countSystemTokens(effectiveSystemPrompt), countMemoryFileTokens(), countBuiltInToolTokens( tools, getToolPermissionContext, agentDefinitions, runtimeModel, messages, ), countMcpToolTokens( tools, getToolPermissionContext, agentDefinitions, runtimeModel, messages, ), countCustomAgentTokens(agentDefinitions), countSlashCommandTokens(tools, getToolPermissionContext, agentDefinitions), approximateMessageTokens(messages), ]) // Count skills separately with error isolation const skillResult = await countSkillTokens( tools, getToolPermissionContext, agentDefinitions, ) const skillInfo = skillResult.skillInfo // Use sum of individual skill token estimates (matches what's shown in details) // rather than skillResult.skillTokens which includes tool schema overhead const skillFrontmatterTokens = skillInfo.skillFrontmatter.reduce( (sum, skill) => sum + skill.tokens, 0, ) const messageTokens = messageBreakdown.totalTokens // Check if autocompact is enabled and calculate threshold const isAutoCompact = isAutoCompactEnabled() const autoCompactThreshold = isAutoCompact ? getEffectiveContextWindowSize(model) - AUTOCOMPACT_BUFFER_TOKENS : undefined // Create categories const cats: ContextCategory[] = [] // System prompt is always shown first (fixed overhead) if (systemPromptTokens > 0) { cats.push({ name: 'System prompt', tokens: systemPromptTokens, color: 'promptBorder', }) } // Built-in tools right after system prompt (skills shown separately below) // Ant users get a per-tool breakdown via systemToolDetails const systemToolsTokens = builtInToolTokens - skillFrontmatterTokens if (systemToolsTokens > 0) { cats.push({ name: process.env.USER_TYPE === 'ant' ? '[ANT-ONLY] System tools' : 'System tools', tokens: systemToolsTokens, color: 'inactive', }) } // MCP tools after system tools if (mcpToolTokens > 0) { cats.push({ name: 'MCP tools', tokens: mcpToolTokens, color: 'cyan_FOR_SUBAGENTS_ONLY', }) } // Show deferred MCP tools (when tool search is enabled) // These don't count toward context usage but we show them for visibility if (deferredToolTokens > 0) { cats.push({ name: 'MCP tools (deferred)', tokens: deferredToolTokens, color: 'inactive', isDeferred: true, }) } // Show deferred builtin tools (when tool search is enabled) if (deferredBuiltinTokens > 0) { cats.push({ name: 'System tools (deferred)', tokens: deferredBuiltinTokens, color: 'inactive', isDeferred: true, }) } // Custom agents after MCP tools if (agentTokens > 0) { cats.push({ name: 'Custom agents', tokens: agentTokens, color: 'permission', }) } // Memory files after custom agents if (claudeMdTokens > 0) { cats.push({ name: 'Memory files', tokens: claudeMdTokens, color: 'claude', }) } // Skills after memory files if (skillFrontmatterTokens > 0) { cats.push({ name: 'Skills', tokens: skillFrontmatterTokens, color: 'warning', }) } if (messageTokens !== null && messageTokens > 0) { cats.push({ name: 'Messages', tokens: messageTokens, color: 'purple_FOR_SUBAGENTS_ONLY', }) } // Calculate actual content usage (before adding reserved buffers) // Exclude deferred categories from the usage calculation const actualUsage = cats.reduce( (sum, cat) => sum + (cat.isDeferred ? 0 : cat.tokens), 0, ) // Reserved space after messages (not counted in actualUsage shown to user). // Under reactive-only mode (cobalt_raccoon), proactive autocompact never // fires and the reserved buffer is a lie — skip it entirely and let Free // space fill the grid. feature() guard keeps the flag string out of // external builds. Same for context-collapse (marble_origami) — collapse // owns the threshold ladder and autocompact is suppressed in // shouldAutoCompact, so the 33k buffer shown here would be a lie too. let reservedTokens = 0 let skipReservedBuffer = false if (feature('REACTIVE_COMPACT')) { if (getFeatureValue_CACHED_MAY_BE_STALE('tengu_cobalt_raccoon', false)) { skipReservedBuffer = true } } if (feature('CONTEXT_COLLAPSE')) { /* eslint-disable @typescript-eslint/no-require-imports */ const { isContextCollapseEnabled } = require('../services/contextCollapse/index.js') as typeof import('../services/contextCollapse/index.js') /* eslint-enable @typescript-eslint/no-require-imports */ if (isContextCollapseEnabled()) { skipReservedBuffer = true } } if (skipReservedBuffer) { // No buffer category pushed — reactive compaction is transparent and // doesn't need a visible reservation in the grid. } else if (isAutoCompact && autoCompactThreshold !== undefined) { // Autocompact buffer (from effective context) reservedTokens = contextWindow - autoCompactThreshold cats.push({ name: RESERVED_CATEGORY_NAME, tokens: reservedTokens, color: 'inactive', }) } else if (!isAutoCompact) { // Compact buffer reserve (3k from actual context limit) reservedTokens = MANUAL_COMPACT_BUFFER_TOKENS cats.push({ name: MANUAL_COMPACT_BUFFER_NAME, tokens: reservedTokens, color: 'inactive', }) } // Calculate free space (subtract both actual usage and reserved buffer) const freeTokens = Math.max(0, contextWindow - actualUsage - reservedTokens) cats.push({ name: 'Free space', tokens: freeTokens, color: 'promptBorder', }) // Total for display (everything except free space) const totalIncludingReserved = actualUsage // Extract API usage from original messages (if provided) to match status line // This uses the same source of truth as the status line for consistency const apiUsage = getCurrentUsage(originalMessages ?? messages) // When API usage is available, use it for total to match status line calculation // Status line uses: input_tokens + cache_creation_input_tokens + cache_read_input_tokens const totalFromAPI = apiUsage ? apiUsage.input_tokens + apiUsage.cache_creation_input_tokens + apiUsage.cache_read_input_tokens : null // Use API total if available, otherwise fall back to estimated total const finalTotalTokens = totalFromAPI ?? totalIncludingReserved // Pre-calculate grid based on model context window and terminal width // For narrow screens (< 80 cols), use 5x5 for 200k models, 5x10 for 1M+ models // For normal screens, use 10x10 for 200k models, 20x10 for 1M+ models const isNarrowScreen = terminalWidth && terminalWidth < 80 const GRID_WIDTH = contextWindow >= 1000000 ? isNarrowScreen ? 5 : 20 : isNarrowScreen ? 5 : 10 const GRID_HEIGHT = contextWindow >= 1000000 ? 10 : isNarrowScreen ? 5 : 10 const TOTAL_SQUARES = GRID_WIDTH * GRID_HEIGHT // Filter out deferred categories - they don't take up actual context space // (e.g., MCP tools when tool search is enabled) const nonDeferredCats = cats.filter(cat => !cat.isDeferred) // Calculate squares per category (use rawEffectiveMax for visualization to show full context) const categorySquares = nonDeferredCats.map(cat => ({ ...cat, squares: cat.name === 'Free space' ? Math.round((cat.tokens / contextWindow) * TOTAL_SQUARES) : Math.max(1, Math.round((cat.tokens / contextWindow) * TOTAL_SQUARES)), percentageOfTotal: Math.round((cat.tokens / contextWindow) * 100), })) // Helper function to create grid squares for a category function createCategorySquares( category: (typeof categorySquares)[0], ): GridSquare[] { const squares: GridSquare[] = [] const exactSquares = (category.tokens / contextWindow) * TOTAL_SQUARES const wholeSquares = Math.floor(exactSquares) const fractionalPart = exactSquares - wholeSquares for (let i = 0; i < category.squares; i++) { // Determine fullness: full squares get 1.0, partial square gets fractional amount let squareFullness = 1.0 if (i === wholeSquares && fractionalPart > 0) { // This is the partial square squareFullness = fractionalPart } squares.push({ color: category.color, isFilled: true, categoryName: category.name, tokens: category.tokens, percentage: category.percentageOfTotal, squareFullness, }) } return squares } // Build the grid as an array of squares with full metadata const gridSquares: GridSquare[] = [] // Separate reserved category for end placement (either autocompact or manual compact buffer) const reservedCategory = categorySquares.find( cat => cat.name === RESERVED_CATEGORY_NAME || cat.name === MANUAL_COMPACT_BUFFER_NAME, ) const nonReservedCategories = categorySquares.filter( cat => cat.name !== RESERVED_CATEGORY_NAME && cat.name !== MANUAL_COMPACT_BUFFER_NAME && cat.name !== 'Free space', ) // Add all non-reserved, non-free-space squares first for (const cat of nonReservedCategories) { const squares = createCategorySquares(cat) for (const square of squares) { if (gridSquares.length < TOTAL_SQUARES) { gridSquares.push(square) } } } // Calculate how many squares are needed for reserved const reservedSquareCount = reservedCategory ? reservedCategory.squares : 0 // Fill with free space, leaving room for reserved at the end const freeSpaceCat = cats.find(c => c.name === 'Free space') const freeSpaceTarget = TOTAL_SQUARES - reservedSquareCount while (gridSquares.length < freeSpaceTarget) { gridSquares.push({ color: 'promptBorder', isFilled: true, categoryName: 'Free space', tokens: freeSpaceCat?.tokens || 0, percentage: freeSpaceCat ? Math.round((freeSpaceCat.tokens / contextWindow) * 100) : 0, squareFullness: 1.0, // Free space is always "full" }) } // Add reserved squares at the end if (reservedCategory) { const squares = createCategorySquares(reservedCategory) for (const square of squares) { if (gridSquares.length < TOTAL_SQUARES) { gridSquares.push(square) } } } // Convert to rows for rendering const gridRows: GridSquare[][] = [] for (let i = 0; i < GRID_HEIGHT; i++) { gridRows.push(gridSquares.slice(i * GRID_WIDTH, (i + 1) * GRID_WIDTH)) } // Format message breakdown (used by context suggestions for all users) // Combine tool calls and results, then get top 5 const toolsMap = new Map< string, { callTokens: number; resultTokens: number } >() // Add call tokens for (const [name, tokens] of messageBreakdown.toolCallsByType.entries()) { const existing = toolsMap.get(name) || { callTokens: 0, resultTokens: 0 } toolsMap.set(name, { ...existing, callTokens: tokens }) } // Add result tokens for (const [name, tokens] of messageBreakdown.toolResultsByType.entries()) { const existing = toolsMap.get(name) || { callTokens: 0, resultTokens: 0 } toolsMap.set(name, { ...existing, resultTokens: tokens }) } // Convert to array and sort by total tokens (calls + results) const toolsByTypeArray = Array.from(toolsMap.entries()) .map(([name, { callTokens, resultTokens }]) => ({ name, callTokens, resultTokens, })) .sort( (a, b) => b.callTokens + b.resultTokens - (a.callTokens + a.resultTokens), ) const attachmentsByTypeArray = Array.from( messageBreakdown.attachmentsByType.entries(), ) .map(([name, tokens]) => ({ name, tokens })) .sort((a, b) => b.tokens - a.tokens) const formattedMessageBreakdown = { toolCallTokens: messageBreakdown.toolCallTokens, toolResultTokens: messageBreakdown.toolResultTokens, attachmentTokens: messageBreakdown.attachmentTokens, assistantMessageTokens: messageBreakdown.assistantMessageTokens, userMessageTokens: messageBreakdown.userMessageTokens, toolCallsByType: toolsByTypeArray, attachmentsByType: attachmentsByTypeArray, } return { categories: cats, totalTokens: finalTotalTokens, maxTokens: contextWindow, rawMaxTokens: contextWindow, percentage: Math.round((finalTotalTokens / contextWindow) * 100), gridRows, model: runtimeModel, memoryFiles: memoryFileDetails, mcpTools: mcpToolDetails, deferredBuiltinTools: process.env.USER_TYPE === 'ant' ? deferredBuiltinDetails : undefined, systemTools: process.env.USER_TYPE === 'ant' ? systemToolDetails : undefined, systemPromptSections: process.env.USER_TYPE === 'ant' ? systemPromptSections : undefined, agents: agentDetails, slashCommands: slashCommandTokens > 0 ? { totalCommands: commandInfo.totalCommands, includedCommands: commandInfo.includedCommands, tokens: slashCommandTokens, } : undefined, skills: skillFrontmatterTokens > 0 ? { totalSkills: skillInfo.totalSkills, includedSkills: skillInfo.includedSkills, tokens: skillFrontmatterTokens, skillFrontmatter: skillInfo.skillFrontmatter, } : undefined, autoCompactThreshold, isAutoCompactEnabled: isAutoCompact, messageBreakdown: formattedMessageBreakdown, apiUsage, } }