source dump of claude code
at main 1382 lines 43 kB view raw
1import { feature } from 'bun:bundle' 2import type { Anthropic } from '@anthropic-ai/sdk' 3import { 4 getSystemPrompt, 5 SYSTEM_PROMPT_DYNAMIC_BOUNDARY, 6} from 'src/constants/prompts.js' 7import { microcompactMessages } from 'src/services/compact/microCompact.js' 8import { getSdkBetas } from '../bootstrap/state.js' 9import { getCommandName } from '../commands.js' 10import { getSystemContext } from '../context.js' 11import { getFeatureValue_CACHED_MAY_BE_STALE } from '../services/analytics/growthbook.js' 12import { 13 AUTOCOMPACT_BUFFER_TOKENS, 14 getEffectiveContextWindowSize, 15 isAutoCompactEnabled, 16 MANUAL_COMPACT_BUFFER_TOKENS, 17} from '../services/compact/autoCompact.js' 18import { 19 countMessagesTokensWithAPI, 20 countTokensViaHaikuFallback, 21 roughTokenCountEstimation, 22} from '../services/tokenEstimation.js' 23import { estimateSkillFrontmatterTokens } from '../skills/loadSkillsDir.js' 24import { 25 findToolByName, 26 type Tool, 27 type ToolPermissionContext, 28 type Tools, 29 type ToolUseContext, 30 toolMatchesName, 31} from '../Tool.js' 32import type { 33 AgentDefinition, 34 AgentDefinitionsResult, 35} from '../tools/AgentTool/loadAgentsDir.js' 36import { SKILL_TOOL_NAME } from '../tools/SkillTool/constants.js' 37import { 38 getLimitedSkillToolCommands, 39 getSkillToolInfo as getSlashCommandInfo, 40} from '../tools/SkillTool/prompt.js' 41import type { 42 AssistantMessage, 43 AttachmentMessage, 44 Message, 45 NormalizedAssistantMessage, 46 NormalizedUserMessage, 47 UserMessage, 48} from '../types/message.js' 49import { toolToAPISchema } from './api.js' 50import { filterInjectedMemoryFiles, getMemoryFiles } from './claudemd.js' 51import { getContextWindowForModel } from './context.js' 52import { getCwd } from './cwd.js' 53import { logForDebugging } from './debug.js' 54import { isEnvTruthy } from './envUtils.js' 55import { errorMessage, toError } from './errors.js' 56import { logError } from './log.js' 57import { normalizeMessagesForAPI } from './messages.js' 58import { getRuntimeMainLoopModel } from './model/model.js' 59import type { SettingSource } from './settings/constants.js' 60import { jsonStringify } from './slowOperations.js' 61import { buildEffectiveSystemPrompt } from './systemPrompt.js' 62import type { Theme } from './theme.js' 63import { getCurrentUsage } from './tokens.js' 64 65const RESERVED_CATEGORY_NAME = 'Autocompact buffer' 66const MANUAL_COMPACT_BUFFER_NAME = 'Compact buffer' 67 68/** 69 * Fixed token overhead added by the API when tools are present. 70 * The API adds a tool prompt preamble (~500 tokens) once per API call when tools are present. 71 * When we count tools individually via the token counting API, each call includes this overhead, 72 * leading to N × overhead instead of 1 × overhead for N tools. 73 * We subtract this overhead from per-tool counts to show accurate tool content sizes. 74 */ 75export const TOOL_TOKEN_COUNT_OVERHEAD = 500 76 77async function countTokensWithFallback( 78 messages: Anthropic.Beta.Messages.BetaMessageParam[], 79 tools: Anthropic.Beta.Messages.BetaToolUnion[], 80): Promise<number | null> { 81 try { 82 const result = await countMessagesTokensWithAPI(messages, tools) 83 if (result !== null) { 84 return result 85 } 86 logForDebugging( 87 `countTokensWithFallback: API returned null, trying haiku fallback (${tools.length} tools)`, 88 ) 89 } catch (err) { 90 logForDebugging(`countTokensWithFallback: API failed: ${errorMessage(err)}`) 91 logError(err) 92 } 93 94 try { 95 const fallbackResult = await countTokensViaHaikuFallback(messages, tools) 96 if (fallbackResult === null) { 97 logForDebugging( 98 `countTokensWithFallback: haiku fallback also returned null (${tools.length} tools)`, 99 ) 100 } 101 return fallbackResult 102 } catch (err) { 103 logForDebugging( 104 `countTokensWithFallback: haiku fallback failed: ${errorMessage(err)}`, 105 ) 106 logError(err) 107 return null 108 } 109} 110 111interface ContextCategory { 112 name: string 113 tokens: number 114 color: keyof Theme 115 /** When true, these tokens are deferred and don't count toward context usage */ 116 isDeferred?: boolean 117} 118 119interface GridSquare { 120 color: keyof Theme 121 isFilled: boolean 122 categoryName: string 123 tokens: number 124 percentage: number 125 squareFullness: number // 0-1 representing how full this individual square is 126} 127 128interface MemoryFile { 129 path: string 130 type: string 131 tokens: number 132} 133 134interface McpTool { 135 name: string 136 serverName: string 137 tokens: number 138 isLoaded?: boolean 139} 140 141export interface DeferredBuiltinTool { 142 name: string 143 tokens: number 144 isLoaded: boolean 145} 146 147export interface SystemToolDetail { 148 name: string 149 tokens: number 150} 151 152export interface SystemPromptSectionDetail { 153 name: string 154 tokens: number 155} 156 157interface Agent { 158 agentType: string 159 source: SettingSource | 'built-in' | 'plugin' 160 tokens: number 161} 162 163interface SlashCommandInfo { 164 readonly totalCommands: number 165 readonly includedCommands: number 166 readonly tokens: number 167} 168 169/** Individual skill detail for context display */ 170interface SkillFrontmatter { 171 name: string 172 source: SettingSource | 'plugin' 173 tokens: number 174} 175 176/** 177 * Information about skills included in the context window. 178 */ 179interface SkillInfo { 180 /** Total number of available skills */ 181 readonly totalSkills: number 182 /** Number of skills included within token budget */ 183 readonly includedSkills: number 184 /** Total tokens consumed by skills */ 185 readonly tokens: number 186 /** Individual skill details */ 187 readonly skillFrontmatter: SkillFrontmatter[] 188} 189 190export interface ContextData { 191 readonly categories: ContextCategory[] 192 readonly totalTokens: number 193 readonly maxTokens: number 194 readonly rawMaxTokens: number 195 readonly percentage: number 196 readonly gridRows: GridSquare[][] 197 readonly model: string 198 readonly memoryFiles: MemoryFile[] 199 readonly mcpTools: McpTool[] 200 /** Ant-only: per-tool breakdown of deferred built-in tools */ 201 readonly deferredBuiltinTools?: DeferredBuiltinTool[] 202 /** Ant-only: per-tool breakdown of always-loaded built-in tools */ 203 readonly systemTools?: SystemToolDetail[] 204 /** Ant-only: per-section breakdown of system prompt */ 205 readonly systemPromptSections?: SystemPromptSectionDetail[] 206 readonly agents: Agent[] 207 readonly slashCommands?: SlashCommandInfo 208 /** Skill statistics */ 209 readonly skills?: SkillInfo 210 readonly autoCompactThreshold?: number 211 readonly isAutoCompactEnabled: boolean 212 messageBreakdown?: { 213 toolCallTokens: number 214 toolResultTokens: number 215 attachmentTokens: number 216 assistantMessageTokens: number 217 userMessageTokens: number 218 toolCallsByType: Array<{ 219 name: string 220 callTokens: number 221 resultTokens: number 222 }> 223 attachmentsByType: Array<{ name: string; tokens: number }> 224 } 225 /** Actual token usage from last API response (if available) */ 226 readonly apiUsage: { 227 input_tokens: number 228 output_tokens: number 229 cache_creation_input_tokens: number 230 cache_read_input_tokens: number 231 } | null 232} 233 234export async function countToolDefinitionTokens( 235 tools: Tools, 236 getToolPermissionContext: () => Promise<ToolPermissionContext>, 237 agentInfo: AgentDefinitionsResult | null, 238 model?: string, 239): Promise<number> { 240 const toolSchemas = await Promise.all( 241 tools.map(tool => 242 toolToAPISchema(tool, { 243 getToolPermissionContext, 244 tools, 245 agents: agentInfo?.activeAgents ?? [], 246 model, 247 }), 248 ), 249 ) 250 const result = await countTokensWithFallback([], toolSchemas) 251 if (result === null || result === 0) { 252 const toolNames = tools.map(t => t.name).join(', ') 253 logForDebugging( 254 `countToolDefinitionTokens returned ${result} for ${tools.length} tools: ${toolNames.slice(0, 100)}${toolNames.length > 100 ? '...' : ''}`, 255 ) 256 } 257 return result ?? 0 258} 259 260/** Extract a human-readable name from a system prompt section's content */ 261function extractSectionName(content: string): string { 262 // Try to find first markdown heading 263 const headingMatch = content.match(/^#+\s+(.+)$/m) 264 if (headingMatch) { 265 return headingMatch[1]!.trim() 266 } 267 // Fall back to a truncated preview of the first non-empty line 268 const firstLine = content.split('\n').find(l => l.trim().length > 0) ?? '' 269 return firstLine.length > 40 ? firstLine.slice(0, 40) + '…' : firstLine 270} 271 272async function countSystemTokens( 273 effectiveSystemPrompt: readonly string[], 274): Promise<{ 275 systemPromptTokens: number 276 systemPromptSections: SystemPromptSectionDetail[] 277}> { 278 // Get system context (gitStatus, etc.) which is always included 279 const systemContext = await getSystemContext() 280 281 // Build named entries: system prompt parts + system context values 282 // Skip empty strings and the global-cache boundary marker 283 const namedEntries: Array<{ name: string; content: string }> = [ 284 ...effectiveSystemPrompt 285 .filter( 286 content => 287 content.length > 0 && content !== SYSTEM_PROMPT_DYNAMIC_BOUNDARY, 288 ) 289 .map(content => ({ name: extractSectionName(content), content })), 290 ...Object.entries(systemContext) 291 .filter(([, content]) => content.length > 0) 292 .map(([name, content]) => ({ name, content })), 293 ] 294 295 if (namedEntries.length < 1) { 296 return { systemPromptTokens: 0, systemPromptSections: [] } 297 } 298 299 const systemTokenCounts = await Promise.all( 300 namedEntries.map(({ content }) => 301 countTokensWithFallback([{ role: 'user', content }], []), 302 ), 303 ) 304 305 const systemPromptSections: SystemPromptSectionDetail[] = namedEntries.map( 306 (entry, i) => ({ 307 name: entry.name, 308 tokens: systemTokenCounts[i] || 0, 309 }), 310 ) 311 312 const systemPromptTokens = systemTokenCounts.reduce( 313 (sum: number, tokens) => sum + (tokens || 0), 314 0, 315 ) 316 317 return { systemPromptTokens, systemPromptSections } 318} 319 320async function countMemoryFileTokens(): Promise<{ 321 memoryFileDetails: MemoryFile[] 322 claudeMdTokens: number 323}> { 324 // Simple mode disables CLAUDE.md loading, so don't report tokens for them 325 if (isEnvTruthy(process.env.CLAUDE_CODE_SIMPLE)) { 326 return { memoryFileDetails: [], claudeMdTokens: 0 } 327 } 328 329 const memoryFilesData = filterInjectedMemoryFiles(await getMemoryFiles()) 330 const memoryFileDetails: MemoryFile[] = [] 331 let claudeMdTokens = 0 332 333 if (memoryFilesData.length < 1) { 334 return { 335 memoryFileDetails: [], 336 claudeMdTokens: 0, 337 } 338 } 339 340 const claudeMdTokenCounts = await Promise.all( 341 memoryFilesData.map(async file => { 342 const tokens = await countTokensWithFallback( 343 [{ role: 'user', content: file.content }], 344 [], 345 ) 346 347 return { file, tokens: tokens || 0 } 348 }), 349 ) 350 351 for (const { file, tokens } of claudeMdTokenCounts) { 352 claudeMdTokens += tokens 353 memoryFileDetails.push({ 354 path: file.path, 355 type: file.type, 356 tokens, 357 }) 358 } 359 360 return { claudeMdTokens, memoryFileDetails } 361} 362 363async function countBuiltInToolTokens( 364 tools: Tools, 365 getToolPermissionContext: () => Promise<ToolPermissionContext>, 366 agentInfo: AgentDefinitionsResult | null, 367 model?: string, 368 messages?: Message[], 369): Promise<{ 370 builtInToolTokens: number 371 deferredBuiltinDetails: DeferredBuiltinTool[] 372 deferredBuiltinTokens: number 373 systemToolDetails: SystemToolDetail[] 374}> { 375 const builtInTools = tools.filter(tool => !tool.isMcp) 376 if (builtInTools.length < 1) { 377 return { 378 builtInToolTokens: 0, 379 deferredBuiltinDetails: [], 380 deferredBuiltinTokens: 0, 381 systemToolDetails: [], 382 } 383 } 384 385 // Check if tool search is enabled 386 const { isToolSearchEnabled } = await import('./toolSearch.js') 387 const { isDeferredTool } = await import('../tools/ToolSearchTool/prompt.js') 388 const isDeferred = await isToolSearchEnabled( 389 model ?? '', 390 tools, 391 getToolPermissionContext, 392 agentInfo?.activeAgents ?? [], 393 'analyzeBuiltIn', 394 ) 395 396 // Separate always-loaded and deferred builtin tools using dynamic isDeferredTool check 397 const alwaysLoadedTools = builtInTools.filter(t => !isDeferredTool(t)) 398 const deferredBuiltinTools = builtInTools.filter(t => isDeferredTool(t)) 399 400 // Count always-loaded tools 401 const alwaysLoadedTokens = 402 alwaysLoadedTools.length > 0 403 ? await countToolDefinitionTokens( 404 alwaysLoadedTools, 405 getToolPermissionContext, 406 agentInfo, 407 model, 408 ) 409 : 0 410 411 // Build per-tool breakdown for always-loaded tools (ant-only, proportional 412 // split of the bulk count based on rough schema size estimation). Excludes 413 // SkillTool since its tokens are shown in the separate Skills category. 414 let systemToolDetails: SystemToolDetail[] = [] 415 if (process.env.USER_TYPE === 'ant') { 416 const toolsForBreakdown = alwaysLoadedTools.filter( 417 t => !toolMatchesName(t, SKILL_TOOL_NAME), 418 ) 419 if (toolsForBreakdown.length > 0) { 420 const estimates = toolsForBreakdown.map(t => 421 roughTokenCountEstimation(jsonStringify(t.inputSchema ?? {})), 422 ) 423 const estimateTotal = estimates.reduce((s, e) => s + e, 0) || 1 424 const distributable = Math.max( 425 0, 426 alwaysLoadedTokens - TOOL_TOKEN_COUNT_OVERHEAD, 427 ) 428 systemToolDetails = toolsForBreakdown 429 .map((t, i) => ({ 430 name: t.name, 431 tokens: Math.round((estimates[i]! / estimateTotal) * distributable), 432 })) 433 .sort((a, b) => b.tokens - a.tokens) 434 } 435 } 436 437 // Count deferred builtin tools individually for details 438 const deferredBuiltinDetails: DeferredBuiltinTool[] = [] 439 let loadedDeferredTokens = 0 440 let totalDeferredTokens = 0 441 442 if (deferredBuiltinTools.length > 0 && isDeferred) { 443 // Find which deferred tools have been used in messages 444 const loadedToolNames = new Set<string>() 445 if (messages) { 446 const deferredToolNameSet = new Set(deferredBuiltinTools.map(t => t.name)) 447 for (const msg of messages) { 448 if (msg.type === 'assistant') { 449 for (const block of msg.message.content) { 450 if ( 451 'type' in block && 452 block.type === 'tool_use' && 453 'name' in block && 454 typeof block.name === 'string' && 455 deferredToolNameSet.has(block.name) 456 ) { 457 loadedToolNames.add(block.name) 458 } 459 } 460 } 461 } 462 } 463 464 // Count each deferred tool 465 const tokensByTool = await Promise.all( 466 deferredBuiltinTools.map(t => 467 countToolDefinitionTokens( 468 [t], 469 getToolPermissionContext, 470 agentInfo, 471 model, 472 ), 473 ), 474 ) 475 476 for (const [i, tool] of deferredBuiltinTools.entries()) { 477 const tokens = Math.max( 478 0, 479 (tokensByTool[i] || 0) - TOOL_TOKEN_COUNT_OVERHEAD, 480 ) 481 const isLoaded = loadedToolNames.has(tool.name) 482 deferredBuiltinDetails.push({ 483 name: tool.name, 484 tokens, 485 isLoaded, 486 }) 487 totalDeferredTokens += tokens 488 if (isLoaded) { 489 loadedDeferredTokens += tokens 490 } 491 } 492 } else if (deferredBuiltinTools.length > 0) { 493 // Tool search not enabled - count deferred tools as regular 494 const deferredTokens = await countToolDefinitionTokens( 495 deferredBuiltinTools, 496 getToolPermissionContext, 497 agentInfo, 498 model, 499 ) 500 return { 501 builtInToolTokens: alwaysLoadedTokens + deferredTokens, 502 deferredBuiltinDetails: [], 503 deferredBuiltinTokens: 0, 504 systemToolDetails, 505 } 506 } 507 508 return { 509 // When deferred, only count always-loaded tools + any loaded deferred tools 510 builtInToolTokens: alwaysLoadedTokens + loadedDeferredTokens, 511 deferredBuiltinDetails, 512 deferredBuiltinTokens: totalDeferredTokens - loadedDeferredTokens, 513 systemToolDetails, 514 } 515} 516 517function findSkillTool(tools: Tools): Tool | undefined { 518 return findToolByName(tools, SKILL_TOOL_NAME) 519} 520 521async function countSlashCommandTokens( 522 tools: Tools, 523 getToolPermissionContext: () => Promise<ToolPermissionContext>, 524 agentInfo: AgentDefinitionsResult | null, 525): Promise<{ 526 slashCommandTokens: number 527 commandInfo: { totalCommands: number; includedCommands: number } 528}> { 529 const info = await getSlashCommandInfo(getCwd()) 530 531 const slashCommandTool = findSkillTool(tools) 532 if (!slashCommandTool) { 533 return { 534 slashCommandTokens: 0, 535 commandInfo: { totalCommands: 0, includedCommands: 0 }, 536 } 537 } 538 539 const slashCommandTokens = await countToolDefinitionTokens( 540 [slashCommandTool], 541 getToolPermissionContext, 542 agentInfo, 543 ) 544 545 return { 546 slashCommandTokens, 547 commandInfo: { 548 totalCommands: info.totalCommands, 549 includedCommands: info.includedCommands, 550 }, 551 } 552} 553 554async function countSkillTokens( 555 tools: Tools, 556 getToolPermissionContext: () => Promise<ToolPermissionContext>, 557 agentInfo: AgentDefinitionsResult | null, 558): Promise<{ 559 skillTokens: number 560 skillInfo: { 561 totalSkills: number 562 includedSkills: number 563 skillFrontmatter: SkillFrontmatter[] 564 } 565}> { 566 try { 567 const skills = await getLimitedSkillToolCommands(getCwd()) 568 569 const slashCommandTool = findSkillTool(tools) 570 if (!slashCommandTool) { 571 return { 572 skillTokens: 0, 573 skillInfo: { totalSkills: 0, includedSkills: 0, skillFrontmatter: [] }, 574 } 575 } 576 577 // NOTE: This counts the entire SlashCommandTool (which includes both commands AND skills). 578 // This is the same tool counted by countSlashCommandTokens(), but we track it separately 579 // here for display purposes. These tokens should NOT be added to context categories 580 // to avoid double-counting. 581 const skillTokens = await countToolDefinitionTokens( 582 [slashCommandTool], 583 getToolPermissionContext, 584 agentInfo, 585 ) 586 587 // Calculate per-skill token estimates based on frontmatter only 588 // (name, description, whenToUse) since full content is only loaded on invocation 589 const skillFrontmatter: SkillFrontmatter[] = skills.map(skill => ({ 590 name: getCommandName(skill), 591 source: (skill.type === 'prompt' ? skill.source : 'plugin') as 592 | SettingSource 593 | 'plugin', 594 tokens: estimateSkillFrontmatterTokens(skill), 595 })) 596 597 return { 598 skillTokens, 599 skillInfo: { 600 totalSkills: skills.length, 601 includedSkills: skills.length, 602 skillFrontmatter, 603 }, 604 } 605 } catch (error) { 606 logError(toError(error)) 607 608 // Return zero values rather than failing the entire context analysis 609 return { 610 skillTokens: 0, 611 skillInfo: { totalSkills: 0, includedSkills: 0, skillFrontmatter: [] }, 612 } 613 } 614} 615 616export async function countMcpToolTokens( 617 tools: Tools, 618 getToolPermissionContext: () => Promise<ToolPermissionContext>, 619 agentInfo: AgentDefinitionsResult | null, 620 model: string, 621 messages?: Message[], 622): Promise<{ 623 mcpToolTokens: number 624 mcpToolDetails: McpTool[] 625 deferredToolTokens: number 626 loadedMcpToolNames: Set<string> 627}> { 628 const mcpTools = tools.filter(tool => tool.isMcp) 629 const mcpToolDetails: McpTool[] = [] 630 // Single bulk API call for all MCP tools (instead of N individual calls) 631 const totalTokensRaw = await countToolDefinitionTokens( 632 mcpTools, 633 getToolPermissionContext, 634 agentInfo, 635 model, 636 ) 637 // Subtract the single overhead since we made one bulk call 638 const totalTokens = Math.max( 639 0, 640 (totalTokensRaw || 0) - TOOL_TOKEN_COUNT_OVERHEAD, 641 ) 642 643 // Estimate per-tool proportions for display using local estimation. 644 // Include name + description + input schema to match what toolToAPISchema 645 // sends — otherwise tools with similar schemas but different descriptions 646 // get identical counts (MCP tools share the same base Zod inputSchema). 647 const estimates = await Promise.all( 648 mcpTools.map(async t => 649 roughTokenCountEstimation( 650 jsonStringify({ 651 name: t.name, 652 description: await t.prompt({ 653 getToolPermissionContext, 654 tools, 655 agents: agentInfo?.activeAgents ?? [], 656 }), 657 input_schema: t.inputJSONSchema ?? {}, 658 }), 659 ), 660 ), 661 ) 662 const estimateTotal = estimates.reduce((s, e) => s + e, 0) || 1 663 const mcpToolTokensByTool = estimates.map(e => 664 Math.round((e / estimateTotal) * totalTokens), 665 ) 666 667 // Check if tool search is enabled - if so, MCP tools are deferred 668 // isToolSearchEnabled handles threshold calculation internally for TstAuto mode 669 const { isToolSearchEnabled } = await import('./toolSearch.js') 670 const { isDeferredTool } = await import('../tools/ToolSearchTool/prompt.js') 671 672 const isDeferred = await isToolSearchEnabled( 673 model, 674 tools, 675 getToolPermissionContext, 676 agentInfo?.activeAgents ?? [], 677 'analyzeMcp', 678 ) 679 680 // Find MCP tools that have been used in messages (loaded via ToolSearchTool) 681 const loadedMcpToolNames = new Set<string>() 682 if (isDeferred && messages) { 683 const mcpToolNameSet = new Set(mcpTools.map(t => t.name)) 684 for (const msg of messages) { 685 if (msg.type === 'assistant') { 686 for (const block of msg.message.content) { 687 if ( 688 'type' in block && 689 block.type === 'tool_use' && 690 'name' in block && 691 typeof block.name === 'string' && 692 mcpToolNameSet.has(block.name) 693 ) { 694 loadedMcpToolNames.add(block.name) 695 } 696 } 697 } 698 } 699 } 700 701 // Build tool details with isLoaded flag 702 for (const [i, tool] of mcpTools.entries()) { 703 mcpToolDetails.push({ 704 name: tool.name, 705 serverName: tool.name.split('__')[1] || 'unknown', 706 tokens: mcpToolTokensByTool[i]!, 707 isLoaded: loadedMcpToolNames.has(tool.name) || !isDeferredTool(tool), 708 }) 709 } 710 711 // Calculate loaded vs deferred tokens 712 let loadedTokens = 0 713 let deferredTokens = 0 714 for (const detail of mcpToolDetails) { 715 if (detail.isLoaded) { 716 loadedTokens += detail.tokens 717 } else if (isDeferred) { 718 deferredTokens += detail.tokens 719 } 720 } 721 722 return { 723 // When deferred but some tools are loaded, count loaded tokens 724 mcpToolTokens: isDeferred ? loadedTokens : totalTokens, 725 mcpToolDetails, 726 // Track deferred tokens separately for display 727 deferredToolTokens: deferredTokens, 728 loadedMcpToolNames, 729 } 730} 731 732async function countCustomAgentTokens(agentDefinitions: { 733 activeAgents: AgentDefinition[] 734}): Promise<{ 735 agentTokens: number 736 agentDetails: Agent[] 737}> { 738 const customAgents = agentDefinitions.activeAgents.filter( 739 a => a.source !== 'built-in', 740 ) 741 const agentDetails: Agent[] = [] 742 let agentTokens = 0 743 744 const tokenCounts = await Promise.all( 745 customAgents.map(agent => 746 countTokensWithFallback( 747 [ 748 { 749 role: 'user', 750 content: [agent.agentType, agent.whenToUse].join(' '), 751 }, 752 ], 753 [], 754 ), 755 ), 756 ) 757 758 for (const [i, agent] of customAgents.entries()) { 759 const tokens = tokenCounts[i] || 0 760 agentTokens += tokens || 0 761 agentDetails.push({ 762 agentType: agent.agentType, 763 source: agent.source, 764 tokens: tokens || 0, 765 }) 766 } 767 return { agentTokens, agentDetails } 768} 769 770type MessageBreakdown = { 771 totalTokens: number 772 toolCallTokens: number 773 toolResultTokens: number 774 attachmentTokens: number 775 assistantMessageTokens: number 776 userMessageTokens: number 777 toolCallsByType: Map<string, number> 778 toolResultsByType: Map<string, number> 779 attachmentsByType: Map<string, number> 780} 781 782function processAssistantMessage( 783 msg: AssistantMessage | NormalizedAssistantMessage, 784 breakdown: MessageBreakdown, 785): void { 786 // Process each content block individually 787 for (const block of msg.message.content) { 788 const blockStr = jsonStringify(block) 789 const blockTokens = roughTokenCountEstimation(blockStr) 790 791 if ('type' in block && block.type === 'tool_use') { 792 breakdown.toolCallTokens += blockTokens 793 const toolName = ('name' in block ? block.name : undefined) || 'unknown' 794 breakdown.toolCallsByType.set( 795 toolName, 796 (breakdown.toolCallsByType.get(toolName) || 0) + blockTokens, 797 ) 798 } else { 799 // Text blocks or other non-tool content 800 breakdown.assistantMessageTokens += blockTokens 801 } 802 } 803} 804 805function processUserMessage( 806 msg: UserMessage | NormalizedUserMessage, 807 breakdown: MessageBreakdown, 808 toolUseIdToName: Map<string, string>, 809): void { 810 // Handle both string and array content 811 if (typeof msg.message.content === 'string') { 812 // Simple string content 813 const tokens = roughTokenCountEstimation(msg.message.content) 814 breakdown.userMessageTokens += tokens 815 return 816 } 817 818 // Process each content block individually 819 for (const block of msg.message.content) { 820 const blockStr = jsonStringify(block) 821 const blockTokens = roughTokenCountEstimation(blockStr) 822 823 if ('type' in block && block.type === 'tool_result') { 824 breakdown.toolResultTokens += blockTokens 825 const toolUseId = 'tool_use_id' in block ? block.tool_use_id : undefined 826 const toolName = 827 (toolUseId ? toolUseIdToName.get(toolUseId) : undefined) || 'unknown' 828 breakdown.toolResultsByType.set( 829 toolName, 830 (breakdown.toolResultsByType.get(toolName) || 0) + blockTokens, 831 ) 832 } else { 833 // Text blocks or other non-tool content 834 breakdown.userMessageTokens += blockTokens 835 } 836 } 837} 838 839function processAttachment( 840 msg: AttachmentMessage, 841 breakdown: MessageBreakdown, 842): void { 843 const contentStr = jsonStringify(msg.attachment) 844 const tokens = roughTokenCountEstimation(contentStr) 845 breakdown.attachmentTokens += tokens 846 const attachType = msg.attachment.type || 'unknown' 847 breakdown.attachmentsByType.set( 848 attachType, 849 (breakdown.attachmentsByType.get(attachType) || 0) + tokens, 850 ) 851} 852 853async function approximateMessageTokens( 854 messages: Message[], 855): Promise<MessageBreakdown> { 856 const microcompactResult = await microcompactMessages(messages) 857 858 // Initialize tracking 859 const breakdown: MessageBreakdown = { 860 totalTokens: 0, 861 toolCallTokens: 0, 862 toolResultTokens: 0, 863 attachmentTokens: 0, 864 assistantMessageTokens: 0, 865 userMessageTokens: 0, 866 toolCallsByType: new Map<string, number>(), 867 toolResultsByType: new Map<string, number>(), 868 attachmentsByType: new Map<string, number>(), 869 } 870 871 // Build a map of tool_use_id to tool_name for easier lookup 872 const toolUseIdToName = new Map<string, string>() 873 for (const msg of microcompactResult.messages) { 874 if (msg.type === 'assistant') { 875 for (const block of msg.message.content) { 876 if ('type' in block && block.type === 'tool_use') { 877 const toolUseId = 'id' in block ? block.id : undefined 878 const toolName = 879 ('name' in block ? block.name : undefined) || 'unknown' 880 if (toolUseId) { 881 toolUseIdToName.set(toolUseId, toolName) 882 } 883 } 884 } 885 } 886 } 887 888 // Process each message for detailed breakdown 889 for (const msg of microcompactResult.messages) { 890 if (msg.type === 'assistant') { 891 processAssistantMessage(msg, breakdown) 892 } else if (msg.type === 'user') { 893 processUserMessage(msg, breakdown, toolUseIdToName) 894 } else if (msg.type === 'attachment') { 895 processAttachment(msg, breakdown) 896 } 897 } 898 899 // Calculate total tokens using the API for accuracy 900 const approximateMessageTokens = await countTokensWithFallback( 901 normalizeMessagesForAPI(microcompactResult.messages).map(_ => { 902 if (_.type === 'assistant') { 903 return { 904 // Important: strip out fields like id, etc. -- the counting API errors if they're present 905 role: 'assistant', 906 content: _.message.content, 907 } 908 } 909 return _.message 910 }), 911 [], 912 ) 913 914 breakdown.totalTokens = approximateMessageTokens ?? 0 915 return breakdown 916} 917 918export async function analyzeContextUsage( 919 messages: Message[], 920 model: string, 921 getToolPermissionContext: () => Promise<ToolPermissionContext>, 922 tools: Tools, 923 agentDefinitions: AgentDefinitionsResult, 924 terminalWidth?: number, 925 toolUseContext?: Pick<ToolUseContext, 'options'>, 926 mainThreadAgentDefinition?: AgentDefinition, 927 /** Original messages before microcompact, used to extract API usage */ 928 originalMessages?: Message[], 929): Promise<ContextData> { 930 const runtimeModel = getRuntimeMainLoopModel({ 931 permissionMode: (await getToolPermissionContext()).mode, 932 mainLoopModel: model, 933 }) 934 // Get context window size 935 const contextWindow = getContextWindowForModel(runtimeModel, getSdkBetas()) 936 937 // Build the effective system prompt using the shared utility 938 const defaultSystemPrompt = await getSystemPrompt(tools, runtimeModel) 939 const effectiveSystemPrompt = buildEffectiveSystemPrompt({ 940 mainThreadAgentDefinition, 941 toolUseContext: toolUseContext ?? { 942 options: {} as ToolUseContext['options'], 943 }, 944 customSystemPrompt: toolUseContext?.options.customSystemPrompt, 945 defaultSystemPrompt, 946 appendSystemPrompt: toolUseContext?.options.appendSystemPrompt, 947 }) 948 949 // Critical operations that should not fail due to skills 950 const [ 951 { systemPromptTokens, systemPromptSections }, 952 { claudeMdTokens, memoryFileDetails }, 953 { 954 builtInToolTokens, 955 deferredBuiltinDetails, 956 deferredBuiltinTokens, 957 systemToolDetails, 958 }, 959 { mcpToolTokens, mcpToolDetails, deferredToolTokens }, 960 { agentTokens, agentDetails }, 961 { slashCommandTokens, commandInfo }, 962 messageBreakdown, 963 ] = await Promise.all([ 964 countSystemTokens(effectiveSystemPrompt), 965 countMemoryFileTokens(), 966 countBuiltInToolTokens( 967 tools, 968 getToolPermissionContext, 969 agentDefinitions, 970 runtimeModel, 971 messages, 972 ), 973 countMcpToolTokens( 974 tools, 975 getToolPermissionContext, 976 agentDefinitions, 977 runtimeModel, 978 messages, 979 ), 980 countCustomAgentTokens(agentDefinitions), 981 countSlashCommandTokens(tools, getToolPermissionContext, agentDefinitions), 982 approximateMessageTokens(messages), 983 ]) 984 985 // Count skills separately with error isolation 986 const skillResult = await countSkillTokens( 987 tools, 988 getToolPermissionContext, 989 agentDefinitions, 990 ) 991 const skillInfo = skillResult.skillInfo 992 // Use sum of individual skill token estimates (matches what's shown in details) 993 // rather than skillResult.skillTokens which includes tool schema overhead 994 const skillFrontmatterTokens = skillInfo.skillFrontmatter.reduce( 995 (sum, skill) => sum + skill.tokens, 996 0, 997 ) 998 999 const messageTokens = messageBreakdown.totalTokens 1000 1001 // Check if autocompact is enabled and calculate threshold 1002 const isAutoCompact = isAutoCompactEnabled() 1003 const autoCompactThreshold = isAutoCompact 1004 ? getEffectiveContextWindowSize(model) - AUTOCOMPACT_BUFFER_TOKENS 1005 : undefined 1006 1007 // Create categories 1008 const cats: ContextCategory[] = [] 1009 1010 // System prompt is always shown first (fixed overhead) 1011 if (systemPromptTokens > 0) { 1012 cats.push({ 1013 name: 'System prompt', 1014 tokens: systemPromptTokens, 1015 color: 'promptBorder', 1016 }) 1017 } 1018 1019 // Built-in tools right after system prompt (skills shown separately below) 1020 // Ant users get a per-tool breakdown via systemToolDetails 1021 const systemToolsTokens = builtInToolTokens - skillFrontmatterTokens 1022 if (systemToolsTokens > 0) { 1023 cats.push({ 1024 name: 1025 process.env.USER_TYPE === 'ant' 1026 ? '[ANT-ONLY] System tools' 1027 : 'System tools', 1028 tokens: systemToolsTokens, 1029 color: 'inactive', 1030 }) 1031 } 1032 1033 // MCP tools after system tools 1034 if (mcpToolTokens > 0) { 1035 cats.push({ 1036 name: 'MCP tools', 1037 tokens: mcpToolTokens, 1038 color: 'cyan_FOR_SUBAGENTS_ONLY', 1039 }) 1040 } 1041 1042 // Show deferred MCP tools (when tool search is enabled) 1043 // These don't count toward context usage but we show them for visibility 1044 if (deferredToolTokens > 0) { 1045 cats.push({ 1046 name: 'MCP tools (deferred)', 1047 tokens: deferredToolTokens, 1048 color: 'inactive', 1049 isDeferred: true, 1050 }) 1051 } 1052 1053 // Show deferred builtin tools (when tool search is enabled) 1054 if (deferredBuiltinTokens > 0) { 1055 cats.push({ 1056 name: 'System tools (deferred)', 1057 tokens: deferredBuiltinTokens, 1058 color: 'inactive', 1059 isDeferred: true, 1060 }) 1061 } 1062 1063 // Custom agents after MCP tools 1064 if (agentTokens > 0) { 1065 cats.push({ 1066 name: 'Custom agents', 1067 tokens: agentTokens, 1068 color: 'permission', 1069 }) 1070 } 1071 1072 // Memory files after custom agents 1073 if (claudeMdTokens > 0) { 1074 cats.push({ 1075 name: 'Memory files', 1076 tokens: claudeMdTokens, 1077 color: 'claude', 1078 }) 1079 } 1080 1081 // Skills after memory files 1082 if (skillFrontmatterTokens > 0) { 1083 cats.push({ 1084 name: 'Skills', 1085 tokens: skillFrontmatterTokens, 1086 color: 'warning', 1087 }) 1088 } 1089 1090 if (messageTokens !== null && messageTokens > 0) { 1091 cats.push({ 1092 name: 'Messages', 1093 tokens: messageTokens, 1094 color: 'purple_FOR_SUBAGENTS_ONLY', 1095 }) 1096 } 1097 1098 // Calculate actual content usage (before adding reserved buffers) 1099 // Exclude deferred categories from the usage calculation 1100 const actualUsage = cats.reduce( 1101 (sum, cat) => sum + (cat.isDeferred ? 0 : cat.tokens), 1102 0, 1103 ) 1104 1105 // Reserved space after messages (not counted in actualUsage shown to user). 1106 // Under reactive-only mode (cobalt_raccoon), proactive autocompact never 1107 // fires and the reserved buffer is a lie — skip it entirely and let Free 1108 // space fill the grid. feature() guard keeps the flag string out of 1109 // external builds. Same for context-collapse (marble_origami) — collapse 1110 // owns the threshold ladder and autocompact is suppressed in 1111 // shouldAutoCompact, so the 33k buffer shown here would be a lie too. 1112 let reservedTokens = 0 1113 let skipReservedBuffer = false 1114 if (feature('REACTIVE_COMPACT')) { 1115 if (getFeatureValue_CACHED_MAY_BE_STALE('tengu_cobalt_raccoon', false)) { 1116 skipReservedBuffer = true 1117 } 1118 } 1119 if (feature('CONTEXT_COLLAPSE')) { 1120 /* eslint-disable @typescript-eslint/no-require-imports */ 1121 const { isContextCollapseEnabled } = 1122 require('../services/contextCollapse/index.js') as typeof import('../services/contextCollapse/index.js') 1123 /* eslint-enable @typescript-eslint/no-require-imports */ 1124 if (isContextCollapseEnabled()) { 1125 skipReservedBuffer = true 1126 } 1127 } 1128 if (skipReservedBuffer) { 1129 // No buffer category pushed — reactive compaction is transparent and 1130 // doesn't need a visible reservation in the grid. 1131 } else if (isAutoCompact && autoCompactThreshold !== undefined) { 1132 // Autocompact buffer (from effective context) 1133 reservedTokens = contextWindow - autoCompactThreshold 1134 cats.push({ 1135 name: RESERVED_CATEGORY_NAME, 1136 tokens: reservedTokens, 1137 color: 'inactive', 1138 }) 1139 } else if (!isAutoCompact) { 1140 // Compact buffer reserve (3k from actual context limit) 1141 reservedTokens = MANUAL_COMPACT_BUFFER_TOKENS 1142 cats.push({ 1143 name: MANUAL_COMPACT_BUFFER_NAME, 1144 tokens: reservedTokens, 1145 color: 'inactive', 1146 }) 1147 } 1148 1149 // Calculate free space (subtract both actual usage and reserved buffer) 1150 const freeTokens = Math.max(0, contextWindow - actualUsage - reservedTokens) 1151 1152 cats.push({ 1153 name: 'Free space', 1154 tokens: freeTokens, 1155 color: 'promptBorder', 1156 }) 1157 1158 // Total for display (everything except free space) 1159 const totalIncludingReserved = actualUsage 1160 1161 // Extract API usage from original messages (if provided) to match status line 1162 // This uses the same source of truth as the status line for consistency 1163 const apiUsage = getCurrentUsage(originalMessages ?? messages) 1164 1165 // When API usage is available, use it for total to match status line calculation 1166 // Status line uses: input_tokens + cache_creation_input_tokens + cache_read_input_tokens 1167 const totalFromAPI = apiUsage 1168 ? apiUsage.input_tokens + 1169 apiUsage.cache_creation_input_tokens + 1170 apiUsage.cache_read_input_tokens 1171 : null 1172 1173 // Use API total if available, otherwise fall back to estimated total 1174 const finalTotalTokens = totalFromAPI ?? totalIncludingReserved 1175 1176 // Pre-calculate grid based on model context window and terminal width 1177 // For narrow screens (< 80 cols), use 5x5 for 200k models, 5x10 for 1M+ models 1178 // For normal screens, use 10x10 for 200k models, 20x10 for 1M+ models 1179 const isNarrowScreen = terminalWidth && terminalWidth < 80 1180 const GRID_WIDTH = 1181 contextWindow >= 1000000 1182 ? isNarrowScreen 1183 ? 5 1184 : 20 1185 : isNarrowScreen 1186 ? 5 1187 : 10 1188 const GRID_HEIGHT = contextWindow >= 1000000 ? 10 : isNarrowScreen ? 5 : 10 1189 const TOTAL_SQUARES = GRID_WIDTH * GRID_HEIGHT 1190 1191 // Filter out deferred categories - they don't take up actual context space 1192 // (e.g., MCP tools when tool search is enabled) 1193 const nonDeferredCats = cats.filter(cat => !cat.isDeferred) 1194 1195 // Calculate squares per category (use rawEffectiveMax for visualization to show full context) 1196 const categorySquares = nonDeferredCats.map(cat => ({ 1197 ...cat, 1198 squares: 1199 cat.name === 'Free space' 1200 ? Math.round((cat.tokens / contextWindow) * TOTAL_SQUARES) 1201 : Math.max(1, Math.round((cat.tokens / contextWindow) * TOTAL_SQUARES)), 1202 percentageOfTotal: Math.round((cat.tokens / contextWindow) * 100), 1203 })) 1204 1205 // Helper function to create grid squares for a category 1206 function createCategorySquares( 1207 category: (typeof categorySquares)[0], 1208 ): GridSquare[] { 1209 const squares: GridSquare[] = [] 1210 const exactSquares = (category.tokens / contextWindow) * TOTAL_SQUARES 1211 const wholeSquares = Math.floor(exactSquares) 1212 const fractionalPart = exactSquares - wholeSquares 1213 1214 for (let i = 0; i < category.squares; i++) { 1215 // Determine fullness: full squares get 1.0, partial square gets fractional amount 1216 let squareFullness = 1.0 1217 if (i === wholeSquares && fractionalPart > 0) { 1218 // This is the partial square 1219 squareFullness = fractionalPart 1220 } 1221 1222 squares.push({ 1223 color: category.color, 1224 isFilled: true, 1225 categoryName: category.name, 1226 tokens: category.tokens, 1227 percentage: category.percentageOfTotal, 1228 squareFullness, 1229 }) 1230 } 1231 1232 return squares 1233 } 1234 1235 // Build the grid as an array of squares with full metadata 1236 const gridSquares: GridSquare[] = [] 1237 1238 // Separate reserved category for end placement (either autocompact or manual compact buffer) 1239 const reservedCategory = categorySquares.find( 1240 cat => 1241 cat.name === RESERVED_CATEGORY_NAME || 1242 cat.name === MANUAL_COMPACT_BUFFER_NAME, 1243 ) 1244 const nonReservedCategories = categorySquares.filter( 1245 cat => 1246 cat.name !== RESERVED_CATEGORY_NAME && 1247 cat.name !== MANUAL_COMPACT_BUFFER_NAME && 1248 cat.name !== 'Free space', 1249 ) 1250 1251 // Add all non-reserved, non-free-space squares first 1252 for (const cat of nonReservedCategories) { 1253 const squares = createCategorySquares(cat) 1254 for (const square of squares) { 1255 if (gridSquares.length < TOTAL_SQUARES) { 1256 gridSquares.push(square) 1257 } 1258 } 1259 } 1260 1261 // Calculate how many squares are needed for reserved 1262 const reservedSquareCount = reservedCategory ? reservedCategory.squares : 0 1263 1264 // Fill with free space, leaving room for reserved at the end 1265 const freeSpaceCat = cats.find(c => c.name === 'Free space') 1266 const freeSpaceTarget = TOTAL_SQUARES - reservedSquareCount 1267 1268 while (gridSquares.length < freeSpaceTarget) { 1269 gridSquares.push({ 1270 color: 'promptBorder', 1271 isFilled: true, 1272 categoryName: 'Free space', 1273 tokens: freeSpaceCat?.tokens || 0, 1274 percentage: freeSpaceCat 1275 ? Math.round((freeSpaceCat.tokens / contextWindow) * 100) 1276 : 0, 1277 squareFullness: 1.0, // Free space is always "full" 1278 }) 1279 } 1280 1281 // Add reserved squares at the end 1282 if (reservedCategory) { 1283 const squares = createCategorySquares(reservedCategory) 1284 for (const square of squares) { 1285 if (gridSquares.length < TOTAL_SQUARES) { 1286 gridSquares.push(square) 1287 } 1288 } 1289 } 1290 1291 // Convert to rows for rendering 1292 const gridRows: GridSquare[][] = [] 1293 for (let i = 0; i < GRID_HEIGHT; i++) { 1294 gridRows.push(gridSquares.slice(i * GRID_WIDTH, (i + 1) * GRID_WIDTH)) 1295 } 1296 1297 // Format message breakdown (used by context suggestions for all users) 1298 // Combine tool calls and results, then get top 5 1299 const toolsMap = new Map< 1300 string, 1301 { callTokens: number; resultTokens: number } 1302 >() 1303 1304 // Add call tokens 1305 for (const [name, tokens] of messageBreakdown.toolCallsByType.entries()) { 1306 const existing = toolsMap.get(name) || { callTokens: 0, resultTokens: 0 } 1307 toolsMap.set(name, { ...existing, callTokens: tokens }) 1308 } 1309 1310 // Add result tokens 1311 for (const [name, tokens] of messageBreakdown.toolResultsByType.entries()) { 1312 const existing = toolsMap.get(name) || { callTokens: 0, resultTokens: 0 } 1313 toolsMap.set(name, { ...existing, resultTokens: tokens }) 1314 } 1315 1316 // Convert to array and sort by total tokens (calls + results) 1317 const toolsByTypeArray = Array.from(toolsMap.entries()) 1318 .map(([name, { callTokens, resultTokens }]) => ({ 1319 name, 1320 callTokens, 1321 resultTokens, 1322 })) 1323 .sort( 1324 (a, b) => b.callTokens + b.resultTokens - (a.callTokens + a.resultTokens), 1325 ) 1326 1327 const attachmentsByTypeArray = Array.from( 1328 messageBreakdown.attachmentsByType.entries(), 1329 ) 1330 .map(([name, tokens]) => ({ name, tokens })) 1331 .sort((a, b) => b.tokens - a.tokens) 1332 1333 const formattedMessageBreakdown = { 1334 toolCallTokens: messageBreakdown.toolCallTokens, 1335 toolResultTokens: messageBreakdown.toolResultTokens, 1336 attachmentTokens: messageBreakdown.attachmentTokens, 1337 assistantMessageTokens: messageBreakdown.assistantMessageTokens, 1338 userMessageTokens: messageBreakdown.userMessageTokens, 1339 toolCallsByType: toolsByTypeArray, 1340 attachmentsByType: attachmentsByTypeArray, 1341 } 1342 1343 return { 1344 categories: cats, 1345 totalTokens: finalTotalTokens, 1346 maxTokens: contextWindow, 1347 rawMaxTokens: contextWindow, 1348 percentage: Math.round((finalTotalTokens / contextWindow) * 100), 1349 gridRows, 1350 model: runtimeModel, 1351 memoryFiles: memoryFileDetails, 1352 mcpTools: mcpToolDetails, 1353 deferredBuiltinTools: 1354 process.env.USER_TYPE === 'ant' ? deferredBuiltinDetails : undefined, 1355 systemTools: 1356 process.env.USER_TYPE === 'ant' ? systemToolDetails : undefined, 1357 systemPromptSections: 1358 process.env.USER_TYPE === 'ant' ? systemPromptSections : undefined, 1359 agents: agentDetails, 1360 slashCommands: 1361 slashCommandTokens > 0 1362 ? { 1363 totalCommands: commandInfo.totalCommands, 1364 includedCommands: commandInfo.includedCommands, 1365 tokens: slashCommandTokens, 1366 } 1367 : undefined, 1368 skills: 1369 skillFrontmatterTokens > 0 1370 ? { 1371 totalSkills: skillInfo.totalSkills, 1372 includedSkills: skillInfo.includedSkills, 1373 tokens: skillFrontmatterTokens, 1374 skillFrontmatter: skillInfo.skillFrontmatter, 1375 } 1376 : undefined, 1377 autoCompactThreshold, 1378 isAutoCompactEnabled: isAutoCompact, 1379 messageBreakdown: formattedMessageBreakdown, 1380 apiUsage, 1381 } 1382}