source dump of claude code
at main 530 lines 20 kB view raw
1import { feature } from 'bun:bundle' 2import type { ToolResultBlockParam } from '@anthropic-ai/sdk/resources/index.mjs' 3import type { QuerySource } from '../../constants/querySource.js' 4import type { ToolUseContext } from '../../Tool.js' 5import { FILE_EDIT_TOOL_NAME } from '../../tools/FileEditTool/constants.js' 6import { FILE_READ_TOOL_NAME } from '../../tools/FileReadTool/prompt.js' 7import { FILE_WRITE_TOOL_NAME } from '../../tools/FileWriteTool/prompt.js' 8import { GLOB_TOOL_NAME } from '../../tools/GlobTool/prompt.js' 9import { GREP_TOOL_NAME } from '../../tools/GrepTool/prompt.js' 10import { WEB_FETCH_TOOL_NAME } from '../../tools/WebFetchTool/prompt.js' 11import { WEB_SEARCH_TOOL_NAME } from '../../tools/WebSearchTool/prompt.js' 12import type { Message } from '../../types/message.js' 13import { logForDebugging } from '../../utils/debug.js' 14import { getMainLoopModel } from '../../utils/model/model.js' 15import { SHELL_TOOL_NAMES } from '../../utils/shell/shellToolUtils.js' 16import { jsonStringify } from '../../utils/slowOperations.js' 17import { 18 type AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS, 19 logEvent, 20} from '../analytics/index.js' 21import { notifyCacheDeletion } from '../api/promptCacheBreakDetection.js' 22import { roughTokenCountEstimation } from '../tokenEstimation.js' 23import { 24 clearCompactWarningSuppression, 25 suppressCompactWarning, 26} from './compactWarningState.js' 27import { 28 getTimeBasedMCConfig, 29 type TimeBasedMCConfig, 30} from './timeBasedMCConfig.js' 31 32// Inline from utils/toolResultStorage.ts — importing that file pulls in 33// sessionStorage → utils/messages → services/api/errors, completing a 34// circular-deps loop back through this file via promptCacheBreakDetection. 35// Drift is caught by a test asserting equality with the source-of-truth. 36export const TIME_BASED_MC_CLEARED_MESSAGE = '[Old tool result content cleared]' 37 38const IMAGE_MAX_TOKEN_SIZE = 2000 39 40// Only compact these tools 41const COMPACTABLE_TOOLS = new Set<string>([ 42 FILE_READ_TOOL_NAME, 43 ...SHELL_TOOL_NAMES, 44 GREP_TOOL_NAME, 45 GLOB_TOOL_NAME, 46 WEB_SEARCH_TOOL_NAME, 47 WEB_FETCH_TOOL_NAME, 48 FILE_EDIT_TOOL_NAME, 49 FILE_WRITE_TOOL_NAME, 50]) 51 52// --- Cached microcompact state (ant-only, gated by feature('CACHED_MICROCOMPACT')) --- 53 54// Lazy-initialized cached MC module and state to avoid importing in external builds. 55// The imports and state live inside feature() checks for dead code elimination. 56let cachedMCModule: typeof import('./cachedMicrocompact.js') | null = null 57let cachedMCState: import('./cachedMicrocompact.js').CachedMCState | null = null 58let pendingCacheEdits: 59 | import('./cachedMicrocompact.js').CacheEditsBlock 60 | null = null 61 62async function getCachedMCModule(): Promise< 63 typeof import('./cachedMicrocompact.js') 64> { 65 if (!cachedMCModule) { 66 cachedMCModule = await import('./cachedMicrocompact.js') 67 } 68 return cachedMCModule 69} 70 71function ensureCachedMCState(): import('./cachedMicrocompact.js').CachedMCState { 72 if (!cachedMCState && cachedMCModule) { 73 cachedMCState = cachedMCModule.createCachedMCState() 74 } 75 if (!cachedMCState) { 76 throw new Error( 77 'cachedMCState not initialized — getCachedMCModule() must be called first', 78 ) 79 } 80 return cachedMCState 81} 82 83/** 84 * Get new pending cache edits to be included in the next API request. 85 * Returns null if there are no new pending edits. 86 * Clears the pending state (caller must pin them after insertion). 87 */ 88export function consumePendingCacheEdits(): 89 | import('./cachedMicrocompact.js').CacheEditsBlock 90 | null { 91 const edits = pendingCacheEdits 92 pendingCacheEdits = null 93 return edits 94} 95 96/** 97 * Get all previously-pinned cache edits that must be re-sent at their 98 * original positions for cache hits. 99 */ 100export function getPinnedCacheEdits(): import('./cachedMicrocompact.js').PinnedCacheEdits[] { 101 if (!cachedMCState) { 102 return [] 103 } 104 return cachedMCState.pinnedEdits 105} 106 107/** 108 * Pin a new cache_edits block to a specific user message position. 109 * Called after inserting new edits so they are re-sent in subsequent calls. 110 */ 111export function pinCacheEdits( 112 userMessageIndex: number, 113 block: import('./cachedMicrocompact.js').CacheEditsBlock, 114): void { 115 if (cachedMCState) { 116 cachedMCState.pinnedEdits.push({ userMessageIndex, block }) 117 } 118} 119 120/** 121 * Marks all registered tools as sent to the API. 122 * Called after a successful API response. 123 */ 124export function markToolsSentToAPIState(): void { 125 if (cachedMCState && cachedMCModule) { 126 cachedMCModule.markToolsSentToAPI(cachedMCState) 127 } 128} 129 130export function resetMicrocompactState(): void { 131 if (cachedMCState && cachedMCModule) { 132 cachedMCModule.resetCachedMCState(cachedMCState) 133 } 134 pendingCacheEdits = null 135} 136 137// Helper to calculate tool result tokens 138function calculateToolResultTokens(block: ToolResultBlockParam): number { 139 if (!block.content) { 140 return 0 141 } 142 143 if (typeof block.content === 'string') { 144 return roughTokenCountEstimation(block.content) 145 } 146 147 // Array of TextBlockParam | ImageBlockParam | DocumentBlockParam 148 return block.content.reduce((sum, item) => { 149 if (item.type === 'text') { 150 return sum + roughTokenCountEstimation(item.text) 151 } else if (item.type === 'image' || item.type === 'document') { 152 // Images/documents are approximately 2000 tokens regardless of format 153 return sum + IMAGE_MAX_TOKEN_SIZE 154 } 155 return sum 156 }, 0) 157} 158 159/** 160 * Estimate token count for messages by extracting text content 161 * Used for rough token estimation when we don't have accurate API counts 162 * Pads estimate by 4/3 to be conservative since we're approximating 163 */ 164export function estimateMessageTokens(messages: Message[]): number { 165 let totalTokens = 0 166 167 for (const message of messages) { 168 if (message.type !== 'user' && message.type !== 'assistant') { 169 continue 170 } 171 172 if (!Array.isArray(message.message.content)) { 173 continue 174 } 175 176 for (const block of message.message.content) { 177 if (block.type === 'text') { 178 totalTokens += roughTokenCountEstimation(block.text) 179 } else if (block.type === 'tool_result') { 180 totalTokens += calculateToolResultTokens(block) 181 } else if (block.type === 'image' || block.type === 'document') { 182 totalTokens += IMAGE_MAX_TOKEN_SIZE 183 } else if (block.type === 'thinking') { 184 // Match roughTokenCountEstimationForBlock: count only the thinking 185 // text, not the JSON wrapper or signature (signature is metadata, 186 // not model-tokenized content). 187 totalTokens += roughTokenCountEstimation(block.thinking) 188 } else if (block.type === 'redacted_thinking') { 189 totalTokens += roughTokenCountEstimation(block.data) 190 } else if (block.type === 'tool_use') { 191 // Match roughTokenCountEstimationForBlock: count name + input, 192 // not the JSON wrapper or id field. 193 totalTokens += roughTokenCountEstimation( 194 block.name + jsonStringify(block.input ?? {}), 195 ) 196 } else { 197 // server_tool_use, web_search_tool_result, etc. 198 totalTokens += roughTokenCountEstimation(jsonStringify(block)) 199 } 200 } 201 } 202 203 // Pad estimate by 4/3 to be conservative since we're approximating 204 return Math.ceil(totalTokens * (4 / 3)) 205} 206 207export type PendingCacheEdits = { 208 trigger: 'auto' 209 deletedToolIds: string[] 210 // Baseline cumulative cache_deleted_input_tokens from the previous API response, 211 // used to compute the per-operation delta (the API value is sticky/cumulative) 212 baselineCacheDeletedTokens: number 213} 214 215export type MicrocompactResult = { 216 messages: Message[] 217 compactionInfo?: { 218 pendingCacheEdits?: PendingCacheEdits 219 } 220} 221 222/** 223 * Walk messages and collect tool_use IDs whose tool name is in 224 * COMPACTABLE_TOOLS, in encounter order. Shared by both microcompact paths. 225 */ 226function collectCompactableToolIds(messages: Message[]): string[] { 227 const ids: string[] = [] 228 for (const message of messages) { 229 if ( 230 message.type === 'assistant' && 231 Array.isArray(message.message.content) 232 ) { 233 for (const block of message.message.content) { 234 if (block.type === 'tool_use' && COMPACTABLE_TOOLS.has(block.name)) { 235 ids.push(block.id) 236 } 237 } 238 } 239 } 240 return ids 241} 242 243// Prefix-match because promptCategory.ts sets the querySource to 244// 'repl_main_thread:outputStyle:<style>' when a non-default output style 245// is active. The bare 'repl_main_thread' is only used for the default style. 246// query.ts:350/1451 use the same startsWith pattern; the pre-existing 247// cached-MC `=== 'repl_main_thread'` check was a latent bug — users with a 248// non-default output style were silently excluded from cached MC. 249function isMainThreadSource(querySource: QuerySource | undefined): boolean { 250 return !querySource || querySource.startsWith('repl_main_thread') 251} 252 253export async function microcompactMessages( 254 messages: Message[], 255 toolUseContext?: ToolUseContext, 256 querySource?: QuerySource, 257): Promise<MicrocompactResult> { 258 // Clear suppression flag at start of new microcompact attempt 259 clearCompactWarningSuppression() 260 261 // Time-based trigger runs first and short-circuits. If the gap since the 262 // last assistant message exceeds the threshold, the server cache has expired 263 // and the full prefix will be rewritten regardless — so content-clear old 264 // tool results now, before the request, to shrink what gets rewritten. 265 // Cached MC (cache-editing) is skipped when this fires: editing assumes a 266 // warm cache, and we just established it's cold. 267 const timeBasedResult = maybeTimeBasedMicrocompact(messages, querySource) 268 if (timeBasedResult) { 269 return timeBasedResult 270 } 271 272 // Only run cached MC for the main thread to prevent forked agents 273 // (session_memory, prompt_suggestion, etc.) from registering their 274 // tool_results in the global cachedMCState, which would cause the main 275 // thread to try deleting tools that don't exist in its own conversation. 276 if (feature('CACHED_MICROCOMPACT')) { 277 const mod = await getCachedMCModule() 278 const model = toolUseContext?.options.mainLoopModel ?? getMainLoopModel() 279 if ( 280 mod.isCachedMicrocompactEnabled() && 281 mod.isModelSupportedForCacheEditing(model) && 282 isMainThreadSource(querySource) 283 ) { 284 return await cachedMicrocompactPath(messages, querySource) 285 } 286 } 287 288 // Legacy microcompact path removed — tengu_cache_plum_violet is always true. 289 // For contexts where cached microcompact is not available (external builds, 290 // non-ant users, unsupported models, sub-agents), no compaction happens here; 291 // autocompact handles context pressure instead. 292 return { messages } 293} 294 295/** 296 * Cached microcompact path - uses cache editing API to remove tool results 297 * without invalidating the cached prefix. 298 * 299 * Key differences from regular microcompact: 300 * - Does NOT modify local message content (cache_reference and cache_edits are added at API layer) 301 * - Uses count-based trigger/keep thresholds from GrowthBook config 302 * - Takes precedence over regular microcompact (no disk persistence) 303 * - Tracks tool results and queues cache edits for the API layer 304 */ 305async function cachedMicrocompactPath( 306 messages: Message[], 307 querySource: QuerySource | undefined, 308): Promise<MicrocompactResult> { 309 const mod = await getCachedMCModule() 310 const state = ensureCachedMCState() 311 const config = mod.getCachedMCConfig() 312 313 const compactableToolIds = new Set(collectCompactableToolIds(messages)) 314 // Second pass: register tool results grouped by user message 315 for (const message of messages) { 316 if (message.type === 'user' && Array.isArray(message.message.content)) { 317 const groupIds: string[] = [] 318 for (const block of message.message.content) { 319 if ( 320 block.type === 'tool_result' && 321 compactableToolIds.has(block.tool_use_id) && 322 !state.registeredTools.has(block.tool_use_id) 323 ) { 324 mod.registerToolResult(state, block.tool_use_id) 325 groupIds.push(block.tool_use_id) 326 } 327 } 328 mod.registerToolMessage(state, groupIds) 329 } 330 } 331 332 const toolsToDelete = mod.getToolResultsToDelete(state) 333 334 if (toolsToDelete.length > 0) { 335 // Create and queue the cache_edits block for the API layer 336 const cacheEdits = mod.createCacheEditsBlock(state, toolsToDelete) 337 if (cacheEdits) { 338 pendingCacheEdits = cacheEdits 339 } 340 341 logForDebugging( 342 `Cached MC deleting ${toolsToDelete.length} tool(s): ${toolsToDelete.join(', ')}`, 343 ) 344 345 // Log the event 346 logEvent('tengu_cached_microcompact', { 347 toolsDeleted: toolsToDelete.length, 348 deletedToolIds: toolsToDelete.join( 349 ',', 350 ) as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS, 351 activeToolCount: state.toolOrder.length - state.deletedRefs.size, 352 triggerType: 353 'auto' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS, 354 threshold: config.triggerThreshold, 355 keepRecent: config.keepRecent, 356 }) 357 358 // Suppress warning after successful compaction 359 suppressCompactWarning() 360 361 // Notify cache break detection that cache reads will legitimately drop 362 if (feature('PROMPT_CACHE_BREAK_DETECTION')) { 363 // Pass the actual querySource — isMainThreadSource now prefix-matches 364 // so output-style variants enter here, and getTrackingKey keys on the 365 // full source string, not the 'repl_main_thread' prefix. 366 notifyCacheDeletion(querySource ?? 'repl_main_thread') 367 } 368 369 // Return messages unchanged - cache_reference and cache_edits are added at API layer 370 // Boundary message is deferred until after API response so we can use 371 // actual cache_deleted_input_tokens from the API instead of client-side estimates 372 // Capture the baseline cumulative cache_deleted_input_tokens from the last 373 // assistant message so we can compute a per-operation delta after the API call 374 const lastAsst = messages.findLast(m => m.type === 'assistant') 375 const baseline = 376 lastAsst?.type === 'assistant' 377 ? (( 378 lastAsst.message.usage as unknown as Record< 379 string, 380 number | undefined 381 > 382 )?.cache_deleted_input_tokens ?? 0) 383 : 0 384 385 return { 386 messages, 387 compactionInfo: { 388 pendingCacheEdits: { 389 trigger: 'auto', 390 deletedToolIds: toolsToDelete, 391 baselineCacheDeletedTokens: baseline, 392 }, 393 }, 394 } 395 } 396 397 // No compaction needed, return messages unchanged 398 return { messages } 399} 400 401/** 402 * Time-based microcompact: when the gap since the last main-loop assistant 403 * message exceeds the configured threshold, content-clear all but the most 404 * recent N compactable tool results. 405 * 406 * Returns null when the trigger doesn't fire (disabled, wrong source, gap 407 * under threshold, nothing to clear) — caller falls through to other paths. 408 * 409 * Unlike cached MC, this mutates message content directly. The cache is cold, 410 * so there's no cached prefix to preserve via cache_edits. 411 */ 412/** 413 * Check whether the time-based trigger should fire for this request. 414 * 415 * Returns the measured gap (minutes since last assistant message) when the 416 * trigger fires, or null when it doesn't (disabled, wrong source, under 417 * threshold, no prior assistant, unparseable timestamp). 418 * 419 * Extracted so other pre-request paths (e.g. snip force-apply) can consult 420 * the same predicate without coupling to the tool-result clearing action. 421 */ 422export function evaluateTimeBasedTrigger( 423 messages: Message[], 424 querySource: QuerySource | undefined, 425): { gapMinutes: number; config: TimeBasedMCConfig } | null { 426 const config = getTimeBasedMCConfig() 427 // Require an explicit main-thread querySource. isMainThreadSource treats 428 // undefined as main-thread (for cached-MC backward-compat), but several 429 // callers (/context, /compact, analyzeContext) invoke microcompactMessages 430 // without a source for analysis-only purposes — they should not trigger. 431 if (!config.enabled || !querySource || !isMainThreadSource(querySource)) { 432 return null 433 } 434 const lastAssistant = messages.findLast(m => m.type === 'assistant') 435 if (!lastAssistant) { 436 return null 437 } 438 const gapMinutes = 439 (Date.now() - new Date(lastAssistant.timestamp).getTime()) / 60_000 440 if (!Number.isFinite(gapMinutes) || gapMinutes < config.gapThresholdMinutes) { 441 return null 442 } 443 return { gapMinutes, config } 444} 445 446function maybeTimeBasedMicrocompact( 447 messages: Message[], 448 querySource: QuerySource | undefined, 449): MicrocompactResult | null { 450 const trigger = evaluateTimeBasedTrigger(messages, querySource) 451 if (!trigger) { 452 return null 453 } 454 const { gapMinutes, config } = trigger 455 456 const compactableIds = collectCompactableToolIds(messages) 457 458 // Floor at 1: slice(-0) returns the full array (paradoxically keeps 459 // everything), and clearing ALL results leaves the model with zero working 460 // context. Neither degenerate is sensible — always keep at least the last. 461 const keepRecent = Math.max(1, config.keepRecent) 462 const keepSet = new Set(compactableIds.slice(-keepRecent)) 463 const clearSet = new Set(compactableIds.filter(id => !keepSet.has(id))) 464 465 if (clearSet.size === 0) { 466 return null 467 } 468 469 let tokensSaved = 0 470 const result: Message[] = messages.map(message => { 471 if (message.type !== 'user' || !Array.isArray(message.message.content)) { 472 return message 473 } 474 let touched = false 475 const newContent = message.message.content.map(block => { 476 if ( 477 block.type === 'tool_result' && 478 clearSet.has(block.tool_use_id) && 479 block.content !== TIME_BASED_MC_CLEARED_MESSAGE 480 ) { 481 tokensSaved += calculateToolResultTokens(block) 482 touched = true 483 return { ...block, content: TIME_BASED_MC_CLEARED_MESSAGE } 484 } 485 return block 486 }) 487 if (!touched) return message 488 return { 489 ...message, 490 message: { ...message.message, content: newContent }, 491 } 492 }) 493 494 if (tokensSaved === 0) { 495 return null 496 } 497 498 logEvent('tengu_time_based_microcompact', { 499 gapMinutes: Math.round(gapMinutes), 500 gapThresholdMinutes: config.gapThresholdMinutes, 501 toolsCleared: clearSet.size, 502 toolsKept: keepSet.size, 503 keepRecent: config.keepRecent, 504 tokensSaved, 505 }) 506 507 logForDebugging( 508 `[TIME-BASED MC] gap ${Math.round(gapMinutes)}min > ${config.gapThresholdMinutes}min, cleared ${clearSet.size} tool results (~${tokensSaved} tokens), kept last ${keepSet.size}`, 509 ) 510 511 suppressCompactWarning() 512 // Cached-MC state (module-level) holds tool IDs registered on prior turns. 513 // We just content-cleared some of those tools AND invalidated the server 514 // cache by changing prompt content. If cached-MC runs next turn with the 515 // stale state, it would try to cache_edit tools whose server-side entries 516 // no longer exist. Reset it. 517 resetMicrocompactState() 518 // We just changed the prompt content — the next response's cache read will 519 // be low, but that's us, not a break. Tell the detector to expect a drop. 520 // notifyCacheDeletion (not notifyCompaction) because it's already imported 521 // here and achieves the same false-positive suppression — adding the second 522 // symbol to the import was flagged by the circular-deps check. 523 // Pass the actual querySource: getTrackingKey returns the full source string 524 // (e.g. 'repl_main_thread:outputStyle:custom'), not just the prefix. 525 if (feature('PROMPT_CACHE_BREAK_DETECTION') && querySource) { 526 notifyCacheDeletion(querySource) 527 } 528 529 return { messages: result } 530}