source dump of claude code
at main 727 lines 26 kB view raw
1import type { BetaToolUnion } from '@anthropic-ai/sdk/resources/beta/messages/messages.mjs' 2import type { TextBlockParam } from '@anthropic-ai/sdk/resources/index.mjs' 3import { createPatch } from 'diff' 4import { mkdir, writeFile } from 'fs/promises' 5import { join } from 'path' 6import type { AgentId } from 'src/types/ids.js' 7import type { Message } from 'src/types/message.js' 8import { logForDebugging } from 'src/utils/debug.js' 9import { djb2Hash } from 'src/utils/hash.js' 10import { logError } from 'src/utils/log.js' 11import { getClaudeTempDir } from 'src/utils/permissions/filesystem.js' 12import { jsonStringify } from 'src/utils/slowOperations.js' 13import type { QuerySource } from '../../constants/querySource.js' 14import { 15 type AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS, 16 logEvent, 17} from '../analytics/index.js' 18 19function getCacheBreakDiffPath(): string { 20 const chars = 'abcdefghijklmnopqrstuvwxyz0123456789' 21 let suffix = '' 22 for (let i = 0; i < 4; i++) { 23 suffix += chars[Math.floor(Math.random() * chars.length)] 24 } 25 return join(getClaudeTempDir(), `cache-break-${suffix}.diff`) 26} 27 28type PreviousState = { 29 systemHash: number 30 toolsHash: number 31 /** Hash of system blocks WITH cache_control intact. Catches scope/TTL flips 32 * (global↔org, 1h↔5m) that stripCacheControl erases from systemHash. */ 33 cacheControlHash: number 34 toolNames: string[] 35 /** Per-tool schema hash. Diffed to name which tool's description changed 36 * when toolSchemasChanged but added=removed=0 (77% of tool breaks per 37 * BQ 2026-03-22). AgentTool/SkillTool embed dynamic agent/command lists. */ 38 perToolHashes: Record<string, number> 39 systemCharCount: number 40 model: string 41 fastMode: boolean 42 /** 'tool_based' | 'system_prompt' | 'none' — flips when MCP tools are 43 * discovered/removed. */ 44 globalCacheStrategy: string 45 /** Sorted beta header list. Diffed to show which headers were added/removed. */ 46 betas: string[] 47 /** AFK_MODE_BETA_HEADER presence — should NOT break cache anymore 48 * (sticky-on latched in claude.ts). Tracked to verify the fix. */ 49 autoModeActive: boolean 50 /** Overage state flip — should NOT break cache anymore (eligibility is 51 * latched session-stable in should1hCacheTTL). Tracked to verify the fix. */ 52 isUsingOverage: boolean 53 /** Cache-editing beta header presence — should NOT break cache anymore 54 * (sticky-on latched in claude.ts). Tracked to verify the fix. */ 55 cachedMCEnabled: boolean 56 /** Resolved effort (env → options → model default). Goes into output_config 57 * or anthropic_internal.effort_override. */ 58 effortValue: string 59 /** Hash of getExtraBodyParams() — catches CLAUDE_CODE_EXTRA_BODY and 60 * anthropic_internal changes. */ 61 extraBodyHash: number 62 callCount: number 63 pendingChanges: PendingChanges | null 64 prevCacheReadTokens: number | null 65 /** Set when cached microcompact sends cache_edits deletions. Cache reads 66 * will legitimately drop — this is expected, not a break. */ 67 cacheDeletionsPending: boolean 68 buildDiffableContent: () => string 69} 70 71type PendingChanges = { 72 systemPromptChanged: boolean 73 toolSchemasChanged: boolean 74 modelChanged: boolean 75 fastModeChanged: boolean 76 cacheControlChanged: boolean 77 globalCacheStrategyChanged: boolean 78 betasChanged: boolean 79 autoModeChanged: boolean 80 overageChanged: boolean 81 cachedMCChanged: boolean 82 effortChanged: boolean 83 extraBodyChanged: boolean 84 addedToolCount: number 85 removedToolCount: number 86 systemCharDelta: number 87 addedTools: string[] 88 removedTools: string[] 89 changedToolSchemas: string[] 90 previousModel: string 91 newModel: string 92 prevGlobalCacheStrategy: string 93 newGlobalCacheStrategy: string 94 addedBetas: string[] 95 removedBetas: string[] 96 prevEffortValue: string 97 newEffortValue: string 98 buildPrevDiffableContent: () => string 99} 100 101const previousStateBySource = new Map<string, PreviousState>() 102 103// Cap the number of tracked sources to prevent unbounded memory growth. 104// Each entry stores a ~300KB+ diffableContent string (serialized system prompt 105// + tool schemas). Without a cap, spawning many subagents (each with a unique 106// agentId key) causes the map to grow indefinitely. 107const MAX_TRACKED_SOURCES = 10 108 109const TRACKED_SOURCE_PREFIXES = [ 110 'repl_main_thread', 111 'sdk', 112 'agent:custom', 113 'agent:default', 114 'agent:builtin', 115] 116 117// Minimum absolute token drop required to trigger a cache break warning. 118// Small drops (e.g., a few thousand tokens) can happen due to normal variation 119// and aren't worth alerting on. 120const MIN_CACHE_MISS_TOKENS = 2_000 121 122// Anthropic's server-side prompt cache TTL thresholds to test. 123// Cache breaks after these durations are likely due to TTL expiration 124// rather than client-side changes. 125const CACHE_TTL_5MIN_MS = 5 * 60 * 1000 126export const CACHE_TTL_1HOUR_MS = 60 * 60 * 1000 127 128// Models to exclude from cache break detection (e.g., haiku has different caching behavior) 129function isExcludedModel(model: string): boolean { 130 return model.includes('haiku') 131} 132 133/** 134 * Returns the tracking key for a querySource, or null if untracked. 135 * Compact shares the same server-side cache as repl_main_thread 136 * (same cacheSafeParams), so they share tracking state. 137 * 138 * For subagents with a tracked querySource, uses the unique agentId to 139 * isolate tracking state. This prevents false positive cache break 140 * notifications when multiple instances of the same agent type run 141 * concurrently. 142 * 143 * Untracked sources (speculation, session_memory, prompt_suggestion, etc.) 144 * are short-lived forked agents where cache break detection provides no 145 * value — they run 1-3 turns with a fresh agentId each time, so there's 146 * nothing meaningful to compare against. Their cache metrics are still 147 * logged via tengu_api_success for analytics. 148 */ 149function getTrackingKey( 150 querySource: QuerySource, 151 agentId?: AgentId, 152): string | null { 153 if (querySource === 'compact') return 'repl_main_thread' 154 for (const prefix of TRACKED_SOURCE_PREFIXES) { 155 if (querySource.startsWith(prefix)) return agentId || querySource 156 } 157 return null 158} 159 160function stripCacheControl( 161 items: ReadonlyArray<Record<string, unknown>>, 162): unknown[] { 163 return items.map(item => { 164 if (!('cache_control' in item)) return item 165 const { cache_control: _, ...rest } = item 166 return rest 167 }) 168} 169 170function computeHash(data: unknown): number { 171 const str = jsonStringify(data) 172 if (typeof Bun !== 'undefined') { 173 const hash = Bun.hash(str) 174 // Bun.hash can return bigint for large inputs; convert to number safely 175 return typeof hash === 'bigint' ? Number(hash & 0xffffffffn) : hash 176 } 177 // Fallback for non-Bun runtimes (e.g. Node.js via npm global install) 178 return djb2Hash(str) 179} 180 181/** MCP tool names are user-controlled (server config) and may leak filepaths. 182 * Collapse them to 'mcp'; built-in names are a fixed vocabulary. */ 183function sanitizeToolName(name: string): string { 184 return name.startsWith('mcp__') ? 'mcp' : name 185} 186 187function computePerToolHashes( 188 strippedTools: ReadonlyArray<unknown>, 189 names: string[], 190): Record<string, number> { 191 const hashes: Record<string, number> = {} 192 for (let i = 0; i < strippedTools.length; i++) { 193 hashes[names[i] ?? `__idx_${i}`] = computeHash(strippedTools[i]) 194 } 195 return hashes 196} 197 198function getSystemCharCount(system: TextBlockParam[]): number { 199 let total = 0 200 for (const block of system) { 201 total += block.text.length 202 } 203 return total 204} 205 206function buildDiffableContent( 207 system: TextBlockParam[], 208 tools: BetaToolUnion[], 209 model: string, 210): string { 211 const systemText = system.map(b => b.text).join('\n\n') 212 const toolDetails = tools 213 .map(t => { 214 if (!('name' in t)) return 'unknown' 215 const desc = 'description' in t ? t.description : '' 216 const schema = 'input_schema' in t ? jsonStringify(t.input_schema) : '' 217 return `${t.name}\n description: ${desc}\n input_schema: ${schema}` 218 }) 219 .sort() 220 .join('\n\n') 221 return `Model: ${model}\n\n=== System Prompt ===\n\n${systemText}\n\n=== Tools (${tools.length}) ===\n\n${toolDetails}\n` 222} 223 224/** Extended tracking snapshot — everything that could affect the server-side 225 * cache key that we can observe from the client. All fields are optional so 226 * the call site can add incrementally; undefined fields compare as stable. */ 227export type PromptStateSnapshot = { 228 system: TextBlockParam[] 229 toolSchemas: BetaToolUnion[] 230 querySource: QuerySource 231 model: string 232 agentId?: AgentId 233 fastMode?: boolean 234 globalCacheStrategy?: string 235 betas?: readonly string[] 236 autoModeActive?: boolean 237 isUsingOverage?: boolean 238 cachedMCEnabled?: boolean 239 effortValue?: string | number 240 extraBodyParams?: unknown 241} 242 243/** 244 * Phase 1 (pre-call): Record the current prompt/tool state and detect what changed. 245 * Does NOT fire events — just stores pending changes for phase 2 to use. 246 */ 247export function recordPromptState(snapshot: PromptStateSnapshot): void { 248 try { 249 const { 250 system, 251 toolSchemas, 252 querySource, 253 model, 254 agentId, 255 fastMode, 256 globalCacheStrategy = '', 257 betas = [], 258 autoModeActive = false, 259 isUsingOverage = false, 260 cachedMCEnabled = false, 261 effortValue, 262 extraBodyParams, 263 } = snapshot 264 const key = getTrackingKey(querySource, agentId) 265 if (!key) return 266 267 const strippedSystem = stripCacheControl( 268 system as unknown as ReadonlyArray<Record<string, unknown>>, 269 ) 270 const strippedTools = stripCacheControl( 271 toolSchemas as unknown as ReadonlyArray<Record<string, unknown>>, 272 ) 273 274 const systemHash = computeHash(strippedSystem) 275 const toolsHash = computeHash(strippedTools) 276 // Hash the full system array INCLUDING cache_control — this catches 277 // scope flips (global↔org/none) and TTL flips (1h↔5m) that the stripped 278 // hash can't see because the text content is identical. 279 const cacheControlHash = computeHash( 280 system.map(b => ('cache_control' in b ? b.cache_control : null)), 281 ) 282 const toolNames = toolSchemas.map(t => ('name' in t ? t.name : 'unknown')) 283 // Only compute per-tool hashes when the aggregate changed — common case 284 // (tools unchanged) skips N extra jsonStringify calls. 285 const computeToolHashes = () => 286 computePerToolHashes(strippedTools, toolNames) 287 const systemCharCount = getSystemCharCount(system) 288 const lazyDiffableContent = () => 289 buildDiffableContent(system, toolSchemas, model) 290 const isFastMode = fastMode ?? false 291 const sortedBetas = [...betas].sort() 292 const effortStr = effortValue === undefined ? '' : String(effortValue) 293 const extraBodyHash = 294 extraBodyParams === undefined ? 0 : computeHash(extraBodyParams) 295 296 const prev = previousStateBySource.get(key) 297 298 if (!prev) { 299 // Evict oldest entries if map is at capacity 300 while (previousStateBySource.size >= MAX_TRACKED_SOURCES) { 301 const oldest = previousStateBySource.keys().next().value 302 if (oldest !== undefined) previousStateBySource.delete(oldest) 303 } 304 305 previousStateBySource.set(key, { 306 systemHash, 307 toolsHash, 308 cacheControlHash, 309 toolNames, 310 systemCharCount, 311 model, 312 fastMode: isFastMode, 313 globalCacheStrategy, 314 betas: sortedBetas, 315 autoModeActive, 316 isUsingOverage, 317 cachedMCEnabled, 318 effortValue: effortStr, 319 extraBodyHash, 320 callCount: 1, 321 pendingChanges: null, 322 prevCacheReadTokens: null, 323 cacheDeletionsPending: false, 324 buildDiffableContent: lazyDiffableContent, 325 perToolHashes: computeToolHashes(), 326 }) 327 return 328 } 329 330 prev.callCount++ 331 332 const systemPromptChanged = systemHash !== prev.systemHash 333 const toolSchemasChanged = toolsHash !== prev.toolsHash 334 const modelChanged = model !== prev.model 335 const fastModeChanged = isFastMode !== prev.fastMode 336 const cacheControlChanged = cacheControlHash !== prev.cacheControlHash 337 const globalCacheStrategyChanged = 338 globalCacheStrategy !== prev.globalCacheStrategy 339 const betasChanged = 340 sortedBetas.length !== prev.betas.length || 341 sortedBetas.some((b, i) => b !== prev.betas[i]) 342 const autoModeChanged = autoModeActive !== prev.autoModeActive 343 const overageChanged = isUsingOverage !== prev.isUsingOverage 344 const cachedMCChanged = cachedMCEnabled !== prev.cachedMCEnabled 345 const effortChanged = effortStr !== prev.effortValue 346 const extraBodyChanged = extraBodyHash !== prev.extraBodyHash 347 348 if ( 349 systemPromptChanged || 350 toolSchemasChanged || 351 modelChanged || 352 fastModeChanged || 353 cacheControlChanged || 354 globalCacheStrategyChanged || 355 betasChanged || 356 autoModeChanged || 357 overageChanged || 358 cachedMCChanged || 359 effortChanged || 360 extraBodyChanged 361 ) { 362 const prevToolSet = new Set(prev.toolNames) 363 const newToolSet = new Set(toolNames) 364 const prevBetaSet = new Set(prev.betas) 365 const newBetaSet = new Set(sortedBetas) 366 const addedTools = toolNames.filter(n => !prevToolSet.has(n)) 367 const removedTools = prev.toolNames.filter(n => !newToolSet.has(n)) 368 const changedToolSchemas: string[] = [] 369 if (toolSchemasChanged) { 370 const newHashes = computeToolHashes() 371 for (const name of toolNames) { 372 if (!prevToolSet.has(name)) continue 373 if (newHashes[name] !== prev.perToolHashes[name]) { 374 changedToolSchemas.push(name) 375 } 376 } 377 prev.perToolHashes = newHashes 378 } 379 prev.pendingChanges = { 380 systemPromptChanged, 381 toolSchemasChanged, 382 modelChanged, 383 fastModeChanged, 384 cacheControlChanged, 385 globalCacheStrategyChanged, 386 betasChanged, 387 autoModeChanged, 388 overageChanged, 389 cachedMCChanged, 390 effortChanged, 391 extraBodyChanged, 392 addedToolCount: addedTools.length, 393 removedToolCount: removedTools.length, 394 addedTools, 395 removedTools, 396 changedToolSchemas, 397 systemCharDelta: systemCharCount - prev.systemCharCount, 398 previousModel: prev.model, 399 newModel: model, 400 prevGlobalCacheStrategy: prev.globalCacheStrategy, 401 newGlobalCacheStrategy: globalCacheStrategy, 402 addedBetas: sortedBetas.filter(b => !prevBetaSet.has(b)), 403 removedBetas: prev.betas.filter(b => !newBetaSet.has(b)), 404 prevEffortValue: prev.effortValue, 405 newEffortValue: effortStr, 406 buildPrevDiffableContent: prev.buildDiffableContent, 407 } 408 } else { 409 prev.pendingChanges = null 410 } 411 412 prev.systemHash = systemHash 413 prev.toolsHash = toolsHash 414 prev.cacheControlHash = cacheControlHash 415 prev.toolNames = toolNames 416 prev.systemCharCount = systemCharCount 417 prev.model = model 418 prev.fastMode = isFastMode 419 prev.globalCacheStrategy = globalCacheStrategy 420 prev.betas = sortedBetas 421 prev.autoModeActive = autoModeActive 422 prev.isUsingOverage = isUsingOverage 423 prev.cachedMCEnabled = cachedMCEnabled 424 prev.effortValue = effortStr 425 prev.extraBodyHash = extraBodyHash 426 prev.buildDiffableContent = lazyDiffableContent 427 } catch (e: unknown) { 428 logError(e) 429 } 430} 431 432/** 433 * Phase 2 (post-call): Check the API response's cache tokens to determine 434 * if a cache break actually occurred. If it did, use the pending changes 435 * from phase 1 to explain why. 436 */ 437export async function checkResponseForCacheBreak( 438 querySource: QuerySource, 439 cacheReadTokens: number, 440 cacheCreationTokens: number, 441 messages: Message[], 442 agentId?: AgentId, 443 requestId?: string | null, 444): Promise<void> { 445 try { 446 const key = getTrackingKey(querySource, agentId) 447 if (!key) return 448 449 const state = previousStateBySource.get(key) 450 if (!state) return 451 452 // Skip excluded models (e.g., haiku has different caching behavior) 453 if (isExcludedModel(state.model)) return 454 455 const prevCacheRead = state.prevCacheReadTokens 456 state.prevCacheReadTokens = cacheReadTokens 457 458 // Calculate time since last call for TTL detection by finding the most recent 459 // assistant message timestamp in the messages array (before the current response) 460 const lastAssistantMessage = messages.findLast(m => m.type === 'assistant') 461 const timeSinceLastAssistantMsg = lastAssistantMessage 462 ? Date.now() - new Date(lastAssistantMessage.timestamp).getTime() 463 : null 464 465 // Skip the first call — no previous value to compare against 466 if (prevCacheRead === null) return 467 468 const changes = state.pendingChanges 469 470 // Cache deletions via cached microcompact intentionally reduce the cached 471 // prefix. The drop in cache read tokens is expected — reset the baseline 472 // so we don't false-positive on the next call. 473 if (state.cacheDeletionsPending) { 474 state.cacheDeletionsPending = false 475 logForDebugging( 476 `[PROMPT CACHE] cache deletion applied, cache read: ${prevCacheRead}${cacheReadTokens} (expected drop)`, 477 ) 478 // Don't flag as a break — the remaining state is still valid 479 state.pendingChanges = null 480 return 481 } 482 483 // Detect a cache break: cache read dropped >5% from previous AND 484 // the absolute drop exceeds the minimum threshold. 485 const tokenDrop = prevCacheRead - cacheReadTokens 486 if ( 487 cacheReadTokens >= prevCacheRead * 0.95 || 488 tokenDrop < MIN_CACHE_MISS_TOKENS 489 ) { 490 state.pendingChanges = null 491 return 492 } 493 494 // Build explanation from pending changes (if any) 495 const parts: string[] = [] 496 if (changes) { 497 if (changes.modelChanged) { 498 parts.push( 499 `model changed (${changes.previousModel}${changes.newModel})`, 500 ) 501 } 502 if (changes.systemPromptChanged) { 503 const charDelta = changes.systemCharDelta 504 const charInfo = 505 charDelta === 0 506 ? '' 507 : charDelta > 0 508 ? ` (+${charDelta} chars)` 509 : ` (${charDelta} chars)` 510 parts.push(`system prompt changed${charInfo}`) 511 } 512 if (changes.toolSchemasChanged) { 513 const toolDiff = 514 changes.addedToolCount > 0 || changes.removedToolCount > 0 515 ? ` (+${changes.addedToolCount}/-${changes.removedToolCount} tools)` 516 : ' (tool prompt/schema changed, same tool set)' 517 parts.push(`tools changed${toolDiff}`) 518 } 519 if (changes.fastModeChanged) { 520 parts.push('fast mode toggled') 521 } 522 if (changes.globalCacheStrategyChanged) { 523 parts.push( 524 `global cache strategy changed (${changes.prevGlobalCacheStrategy || 'none'}${changes.newGlobalCacheStrategy || 'none'})`, 525 ) 526 } 527 if ( 528 changes.cacheControlChanged && 529 !changes.globalCacheStrategyChanged && 530 !changes.systemPromptChanged 531 ) { 532 // Only report as standalone cause if nothing else explains it — 533 // otherwise the scope/TTL flip is a consequence, not the root cause. 534 parts.push('cache_control changed (scope or TTL)') 535 } 536 if (changes.betasChanged) { 537 const added = changes.addedBetas.length 538 ? `+${changes.addedBetas.join(',')}` 539 : '' 540 const removed = changes.removedBetas.length 541 ? `-${changes.removedBetas.join(',')}` 542 : '' 543 const diff = [added, removed].filter(Boolean).join(' ') 544 parts.push(`betas changed${diff ? ` (${diff})` : ''}`) 545 } 546 if (changes.autoModeChanged) { 547 parts.push('auto mode toggled') 548 } 549 if (changes.overageChanged) { 550 parts.push('overage state changed (TTL latched, no flip)') 551 } 552 if (changes.cachedMCChanged) { 553 parts.push('cached microcompact toggled') 554 } 555 if (changes.effortChanged) { 556 parts.push( 557 `effort changed (${changes.prevEffortValue || 'default'}${changes.newEffortValue || 'default'})`, 558 ) 559 } 560 if (changes.extraBodyChanged) { 561 parts.push('extra body params changed') 562 } 563 } 564 565 // Check if time gap suggests TTL expiration 566 const lastAssistantMsgOver5minAgo = 567 timeSinceLastAssistantMsg !== null && 568 timeSinceLastAssistantMsg > CACHE_TTL_5MIN_MS 569 const lastAssistantMsgOver1hAgo = 570 timeSinceLastAssistantMsg !== null && 571 timeSinceLastAssistantMsg > CACHE_TTL_1HOUR_MS 572 573 // Post PR #19823 BQ analysis (bq-queries/prompt-caching/cache_break_pr19823_analysis.sql): 574 // when all client-side flags are false and the gap is under TTL, ~90% of breaks 575 // are server-side routing/eviction or billed/inference disagreement. Label 576 // accordingly instead of implying a CC bug hunt. 577 let reason: string 578 if (parts.length > 0) { 579 reason = parts.join(', ') 580 } else if (lastAssistantMsgOver1hAgo) { 581 reason = 'possible 1h TTL expiry (prompt unchanged)' 582 } else if (lastAssistantMsgOver5minAgo) { 583 reason = 'possible 5min TTL expiry (prompt unchanged)' 584 } else if (timeSinceLastAssistantMsg !== null) { 585 reason = 'likely server-side (prompt unchanged, <5min gap)' 586 } else { 587 reason = 'unknown cause' 588 } 589 590 logEvent('tengu_prompt_cache_break', { 591 systemPromptChanged: changes?.systemPromptChanged ?? false, 592 toolSchemasChanged: changes?.toolSchemasChanged ?? false, 593 modelChanged: changes?.modelChanged ?? false, 594 fastModeChanged: changes?.fastModeChanged ?? false, 595 cacheControlChanged: changes?.cacheControlChanged ?? false, 596 globalCacheStrategyChanged: changes?.globalCacheStrategyChanged ?? false, 597 betasChanged: changes?.betasChanged ?? false, 598 autoModeChanged: changes?.autoModeChanged ?? false, 599 overageChanged: changes?.overageChanged ?? false, 600 cachedMCChanged: changes?.cachedMCChanged ?? false, 601 effortChanged: changes?.effortChanged ?? false, 602 extraBodyChanged: changes?.extraBodyChanged ?? false, 603 addedToolCount: changes?.addedToolCount ?? 0, 604 removedToolCount: changes?.removedToolCount ?? 0, 605 systemCharDelta: changes?.systemCharDelta ?? 0, 606 // Tool names are sanitized: built-in names are a fixed vocabulary, 607 // MCP tools collapse to 'mcp' (user-configured, could leak paths). 608 addedTools: (changes?.addedTools ?? []) 609 .map(sanitizeToolName) 610 .join( 611 ',', 612 ) as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS, 613 removedTools: (changes?.removedTools ?? []) 614 .map(sanitizeToolName) 615 .join( 616 ',', 617 ) as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS, 618 changedToolSchemas: (changes?.changedToolSchemas ?? []) 619 .map(sanitizeToolName) 620 .join( 621 ',', 622 ) as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS, 623 // Beta header names and cache strategy are fixed enum-like values, 624 // not code or filepaths. requestId is an opaque server-generated ID. 625 addedBetas: (changes?.addedBetas ?? []).join( 626 ',', 627 ) as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS, 628 removedBetas: (changes?.removedBetas ?? []).join( 629 ',', 630 ) as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS, 631 prevGlobalCacheStrategy: (changes?.prevGlobalCacheStrategy ?? 632 '') as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS, 633 newGlobalCacheStrategy: (changes?.newGlobalCacheStrategy ?? 634 '') as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS, 635 callNumber: state.callCount, 636 prevCacheReadTokens: prevCacheRead, 637 cacheReadTokens, 638 cacheCreationTokens, 639 timeSinceLastAssistantMsg: timeSinceLastAssistantMsg ?? -1, 640 lastAssistantMsgOver5minAgo, 641 lastAssistantMsgOver1hAgo, 642 requestId: (requestId ?? 643 '') as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS, 644 }) 645 646 // Write diff file for ant debugging via --debug. The path is included in 647 // the summary log so ants can find it (DevBar UI removed — event data 648 // flows reliably to BQ for analytics). 649 let diffPath: string | undefined 650 if (changes?.buildPrevDiffableContent) { 651 diffPath = await writeCacheBreakDiff( 652 changes.buildPrevDiffableContent(), 653 state.buildDiffableContent(), 654 ) 655 } 656 657 const diffSuffix = diffPath ? `, diff: ${diffPath}` : '' 658 const summary = `[PROMPT CACHE BREAK] ${reason} [source=${querySource}, call #${state.callCount}, cache read: ${prevCacheRead}${cacheReadTokens}, creation: ${cacheCreationTokens}${diffSuffix}]` 659 660 logForDebugging(summary, { level: 'warn' }) 661 662 state.pendingChanges = null 663 } catch (e: unknown) { 664 logError(e) 665 } 666} 667 668/** 669 * Call when cached microcompact sends cache_edits deletions. 670 * The next API response will have lower cache read tokens — that's 671 * expected, not a cache break. 672 */ 673export function notifyCacheDeletion( 674 querySource: QuerySource, 675 agentId?: AgentId, 676): void { 677 const key = getTrackingKey(querySource, agentId) 678 const state = key ? previousStateBySource.get(key) : undefined 679 if (state) { 680 state.cacheDeletionsPending = true 681 } 682} 683 684/** 685 * Call after compaction to reset the cache read baseline. 686 * Compaction legitimately reduces message count, so cache read tokens 687 * will naturally drop on the next call — that's not a break. 688 */ 689export function notifyCompaction( 690 querySource: QuerySource, 691 agentId?: AgentId, 692): void { 693 const key = getTrackingKey(querySource, agentId) 694 const state = key ? previousStateBySource.get(key) : undefined 695 if (state) { 696 state.prevCacheReadTokens = null 697 } 698} 699 700export function cleanupAgentTracking(agentId: AgentId): void { 701 previousStateBySource.delete(agentId) 702} 703 704export function resetPromptCacheBreakDetection(): void { 705 previousStateBySource.clear() 706} 707 708async function writeCacheBreakDiff( 709 prevContent: string, 710 newContent: string, 711): Promise<string | undefined> { 712 try { 713 const diffPath = getCacheBreakDiffPath() 714 await mkdir(getClaudeTempDir(), { recursive: true }) 715 const patch = createPatch( 716 'prompt-state', 717 prevContent, 718 newContent, 719 'before', 720 'after', 721 ) 722 await writeFile(diffPath, patch) 723 return diffPath 724 } catch { 725 return undefined 726 } 727}