services/api/promptCacheBreakDetection.ts at main

oppi.li / claude-code
fork atom
source dump of claude code
fork atom
claude-code / services / api / promptCacheBreakDetection.ts
at main 727 lines 26 kB view raw
wrap content
oppi.li dump from zip 11d ago
63aada3f
  1import type { BetaToolUnion } from '@anthropic-ai/sdk/resources/beta/messages/messages.mjs'
  2import type { TextBlockParam } from '@anthropic-ai/sdk/resources/index.mjs'
  3import { createPatch } from 'diff'
  4import { mkdir, writeFile } from 'fs/promises'
  5import { join } from 'path'
  6import type { AgentId } from 'src/types/ids.js'
  7import type { Message } from 'src/types/message.js'
  8import { logForDebugging } from 'src/utils/debug.js'
  9import { djb2Hash } from 'src/utils/hash.js'
 10import { logError } from 'src/utils/log.js'
 11import { getClaudeTempDir } from 'src/utils/permissions/filesystem.js'
 12import { jsonStringify } from 'src/utils/slowOperations.js'
 13import type { QuerySource } from '../../constants/querySource.js'
 14import {
 15  type AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
 16  logEvent,
 17} from '../analytics/index.js'
 18
 19function getCacheBreakDiffPath(): string {
 20  const chars = 'abcdefghijklmnopqrstuvwxyz0123456789'
 21  let suffix = ''
 22  for (let i = 0; i < 4; i++) {
 23    suffix += chars[Math.floor(Math.random() * chars.length)]
 24  }
 25  return join(getClaudeTempDir(), `cache-break-${suffix}.diff`)
 26}
 27
 28type PreviousState = {
 29  systemHash: number
 30  toolsHash: number
 31  /** Hash of system blocks WITH cache_control intact. Catches scope/TTL flips
 32   *  (global↔org, 1h↔5m) that stripCacheControl erases from systemHash. */
 33  cacheControlHash: number
 34  toolNames: string[]
 35  /** Per-tool schema hash. Diffed to name which tool's description changed
 36   *  when toolSchemasChanged but added=removed=0 (77% of tool breaks per
 37   *  BQ 2026-03-22). AgentTool/SkillTool embed dynamic agent/command lists. */
 38  perToolHashes: Record<string, number>
 39  systemCharCount: number
 40  model: string
 41  fastMode: boolean
 42  /** 'tool_based' | 'system_prompt' | 'none' — flips when MCP tools are
 43   *  discovered/removed. */
 44  globalCacheStrategy: string
 45  /** Sorted beta header list. Diffed to show which headers were added/removed. */
 46  betas: string[]
 47  /** AFK_MODE_BETA_HEADER presence — should NOT break cache anymore
 48   *  (sticky-on latched in claude.ts). Tracked to verify the fix. */
 49  autoModeActive: boolean
 50  /** Overage state flip — should NOT break cache anymore (eligibility is
 51   *  latched session-stable in should1hCacheTTL). Tracked to verify the fix. */
 52  isUsingOverage: boolean
 53  /** Cache-editing beta header presence — should NOT break cache anymore
 54   *  (sticky-on latched in claude.ts). Tracked to verify the fix. */
 55  cachedMCEnabled: boolean
 56  /** Resolved effort (env → options → model default). Goes into output_config
 57   *  or anthropic_internal.effort_override. */
 58  effortValue: string
 59  /** Hash of getExtraBodyParams() — catches CLAUDE_CODE_EXTRA_BODY and
 60   *  anthropic_internal changes. */
 61  extraBodyHash: number
 62  callCount: number
 63  pendingChanges: PendingChanges | null
 64  prevCacheReadTokens: number | null
 65  /** Set when cached microcompact sends cache_edits deletions. Cache reads
 66   *  will legitimately drop — this is expected, not a break. */
 67  cacheDeletionsPending: boolean
 68  buildDiffableContent: () => string
 69}
 70
 71type PendingChanges = {
 72  systemPromptChanged: boolean
 73  toolSchemasChanged: boolean
 74  modelChanged: boolean
 75  fastModeChanged: boolean
 76  cacheControlChanged: boolean
 77  globalCacheStrategyChanged: boolean
 78  betasChanged: boolean
 79  autoModeChanged: boolean
 80  overageChanged: boolean
 81  cachedMCChanged: boolean
 82  effortChanged: boolean
 83  extraBodyChanged: boolean
 84  addedToolCount: number
 85  removedToolCount: number
 86  systemCharDelta: number
 87  addedTools: string[]
 88  removedTools: string[]
 89  changedToolSchemas: string[]
 90  previousModel: string
 91  newModel: string
 92  prevGlobalCacheStrategy: string
 93  newGlobalCacheStrategy: string
 94  addedBetas: string[]
 95  removedBetas: string[]
 96  prevEffortValue: string
 97  newEffortValue: string
 98  buildPrevDiffableContent: () => string
 99}
100
101const previousStateBySource = new Map<string, PreviousState>()
102
103// Cap the number of tracked sources to prevent unbounded memory growth.
104// Each entry stores a ~300KB+ diffableContent string (serialized system prompt
105// + tool schemas). Without a cap, spawning many subagents (each with a unique
106// agentId key) causes the map to grow indefinitely.
107const MAX_TRACKED_SOURCES = 10
108
109const TRACKED_SOURCE_PREFIXES = [
110  'repl_main_thread',
111  'sdk',
112  'agent:custom',
113  'agent:default',
114  'agent:builtin',
115]
116
117// Minimum absolute token drop required to trigger a cache break warning.
118// Small drops (e.g., a few thousand tokens) can happen due to normal variation
119// and aren't worth alerting on.
120const MIN_CACHE_MISS_TOKENS = 2_000
121
122// Anthropic's server-side prompt cache TTL thresholds to test.
123// Cache breaks after these durations are likely due to TTL expiration
124// rather than client-side changes.
125const CACHE_TTL_5MIN_MS = 5 * 60 * 1000
126export const CACHE_TTL_1HOUR_MS = 60 * 60 * 1000
127
128// Models to exclude from cache break detection (e.g., haiku has different caching behavior)
129function isExcludedModel(model: string): boolean {
130  return model.includes('haiku')
131}
132
133/**
134 * Returns the tracking key for a querySource, or null if untracked.
135 * Compact shares the same server-side cache as repl_main_thread
136 * (same cacheSafeParams), so they share tracking state.
137 *
138 * For subagents with a tracked querySource, uses the unique agentId to
139 * isolate tracking state. This prevents false positive cache break
140 * notifications when multiple instances of the same agent type run
141 * concurrently.
142 *
143 * Untracked sources (speculation, session_memory, prompt_suggestion, etc.)
144 * are short-lived forked agents where cache break detection provides no
145 * value — they run 1-3 turns with a fresh agentId each time, so there's
146 * nothing meaningful to compare against. Their cache metrics are still
147 * logged via tengu_api_success for analytics.
148 */
149function getTrackingKey(
150  querySource: QuerySource,
151  agentId?: AgentId,
152): string | null {
153  if (querySource === 'compact') return 'repl_main_thread'
154  for (const prefix of TRACKED_SOURCE_PREFIXES) {
155    if (querySource.startsWith(prefix)) return agentId || querySource
156  }
157  return null
158}
159
160function stripCacheControl(
161  items: ReadonlyArray<Record<string, unknown>>,
162): unknown[] {
163  return items.map(item => {
164    if (!('cache_control' in item)) return item
165    const { cache_control: _, ...rest } = item
166    return rest
167  })
168}
169
170function computeHash(data: unknown): number {
171  const str = jsonStringify(data)
172  if (typeof Bun !== 'undefined') {
173    const hash = Bun.hash(str)
174    // Bun.hash can return bigint for large inputs; convert to number safely
175    return typeof hash === 'bigint' ? Number(hash & 0xffffffffn) : hash
176  }
177  // Fallback for non-Bun runtimes (e.g. Node.js via npm global install)
178  return djb2Hash(str)
179}
180
181/** MCP tool names are user-controlled (server config) and may leak filepaths.
182 *  Collapse them to 'mcp'; built-in names are a fixed vocabulary. */
183function sanitizeToolName(name: string): string {
184  return name.startsWith('mcp__') ? 'mcp' : name
185}
186
187function computePerToolHashes(
188  strippedTools: ReadonlyArray<unknown>,
189  names: string[],
190): Record<string, number> {
191  const hashes: Record<string, number> = {}
192  for (let i = 0; i < strippedTools.length; i++) {
193    hashes[names[i] ?? `__idx_${i}`] = computeHash(strippedTools[i])
194  }
195  return hashes
196}
197
198function getSystemCharCount(system: TextBlockParam[]): number {
199  let total = 0
200  for (const block of system) {
201    total += block.text.length
202  }
203  return total
204}
205
206function buildDiffableContent(
207  system: TextBlockParam[],
208  tools: BetaToolUnion[],
209  model: string,
210): string {
211  const systemText = system.map(b => b.text).join('\n\n')
212  const toolDetails = tools
213    .map(t => {
214      if (!('name' in t)) return 'unknown'
215      const desc = 'description' in t ? t.description : ''
216      const schema = 'input_schema' in t ? jsonStringify(t.input_schema) : ''
217      return `${t.name}\n  description: ${desc}\n  input_schema: ${schema}`
218    })
219    .sort()
220    .join('\n\n')
221  return `Model: ${model}\n\n=== System Prompt ===\n\n${systemText}\n\n=== Tools (${tools.length}) ===\n\n${toolDetails}\n`
222}
223
224/** Extended tracking snapshot — everything that could affect the server-side
225 *  cache key that we can observe from the client. All fields are optional so
226 *  the call site can add incrementally; undefined fields compare as stable. */
227export type PromptStateSnapshot = {
228  system: TextBlockParam[]
229  toolSchemas: BetaToolUnion[]
230  querySource: QuerySource
231  model: string
232  agentId?: AgentId
233  fastMode?: boolean
234  globalCacheStrategy?: string
235  betas?: readonly string[]
236  autoModeActive?: boolean
237  isUsingOverage?: boolean
238  cachedMCEnabled?: boolean
239  effortValue?: string | number
240  extraBodyParams?: unknown
241}
242
243/**
244 * Phase 1 (pre-call): Record the current prompt/tool state and detect what changed.
245 * Does NOT fire events — just stores pending changes for phase 2 to use.
246 */
247export function recordPromptState(snapshot: PromptStateSnapshot): void {
248  try {
249    const {
250      system,
251      toolSchemas,
252      querySource,
253      model,
254      agentId,
255      fastMode,
256      globalCacheStrategy = '',
257      betas = [],
258      autoModeActive = false,
259      isUsingOverage = false,
260      cachedMCEnabled = false,
261      effortValue,
262      extraBodyParams,
263    } = snapshot
264    const key = getTrackingKey(querySource, agentId)
265    if (!key) return
266
267    const strippedSystem = stripCacheControl(
268      system as unknown as ReadonlyArray<Record<string, unknown>>,
269    )
270    const strippedTools = stripCacheControl(
271      toolSchemas as unknown as ReadonlyArray<Record<string, unknown>>,
272    )
273
274    const systemHash = computeHash(strippedSystem)
275    const toolsHash = computeHash(strippedTools)
276    // Hash the full system array INCLUDING cache_control — this catches
277    // scope flips (global↔org/none) and TTL flips (1h↔5m) that the stripped
278    // hash can't see because the text content is identical.
279    const cacheControlHash = computeHash(
280      system.map(b => ('cache_control' in b ? b.cache_control : null)),
281    )
282    const toolNames = toolSchemas.map(t => ('name' in t ? t.name : 'unknown'))
283    // Only compute per-tool hashes when the aggregate changed — common case
284    // (tools unchanged) skips N extra jsonStringify calls.
285    const computeToolHashes = () =>
286      computePerToolHashes(strippedTools, toolNames)
287    const systemCharCount = getSystemCharCount(system)
288    const lazyDiffableContent = () =>
289      buildDiffableContent(system, toolSchemas, model)
290    const isFastMode = fastMode ?? false
291    const sortedBetas = [...betas].sort()
292    const effortStr = effortValue === undefined ? '' : String(effortValue)
293    const extraBodyHash =
294      extraBodyParams === undefined ? 0 : computeHash(extraBodyParams)
295
296    const prev = previousStateBySource.get(key)
297
298    if (!prev) {
299      // Evict oldest entries if map is at capacity
300      while (previousStateBySource.size >= MAX_TRACKED_SOURCES) {
301        const oldest = previousStateBySource.keys().next().value
302        if (oldest !== undefined) previousStateBySource.delete(oldest)
303      }
304
305      previousStateBySource.set(key, {
306        systemHash,
307        toolsHash,
308        cacheControlHash,
309        toolNames,
310        systemCharCount,
311        model,
312        fastMode: isFastMode,
313        globalCacheStrategy,
314        betas: sortedBetas,
315        autoModeActive,
316        isUsingOverage,
317        cachedMCEnabled,
318        effortValue: effortStr,
319        extraBodyHash,
320        callCount: 1,
321        pendingChanges: null,
322        prevCacheReadTokens: null,
323        cacheDeletionsPending: false,
324        buildDiffableContent: lazyDiffableContent,
325        perToolHashes: computeToolHashes(),
326      })
327      return
328    }
329
330    prev.callCount++
331
332    const systemPromptChanged = systemHash !== prev.systemHash
333    const toolSchemasChanged = toolsHash !== prev.toolsHash
334    const modelChanged = model !== prev.model
335    const fastModeChanged = isFastMode !== prev.fastMode
336    const cacheControlChanged = cacheControlHash !== prev.cacheControlHash
337    const globalCacheStrategyChanged =
338      globalCacheStrategy !== prev.globalCacheStrategy
339    const betasChanged =
340      sortedBetas.length !== prev.betas.length ||
341      sortedBetas.some((b, i) => b !== prev.betas[i])
342    const autoModeChanged = autoModeActive !== prev.autoModeActive
343    const overageChanged = isUsingOverage !== prev.isUsingOverage
344    const cachedMCChanged = cachedMCEnabled !== prev.cachedMCEnabled
345    const effortChanged = effortStr !== prev.effortValue
346    const extraBodyChanged = extraBodyHash !== prev.extraBodyHash
347
348    if (
349      systemPromptChanged ||
350      toolSchemasChanged ||
351      modelChanged ||
352      fastModeChanged ||
353      cacheControlChanged ||
354      globalCacheStrategyChanged ||
355      betasChanged ||
356      autoModeChanged ||
357      overageChanged ||
358      cachedMCChanged ||
359      effortChanged ||
360      extraBodyChanged
361    ) {
362      const prevToolSet = new Set(prev.toolNames)
363      const newToolSet = new Set(toolNames)
364      const prevBetaSet = new Set(prev.betas)
365      const newBetaSet = new Set(sortedBetas)
366      const addedTools = toolNames.filter(n => !prevToolSet.has(n))
367      const removedTools = prev.toolNames.filter(n => !newToolSet.has(n))
368      const changedToolSchemas: string[] = []
369      if (toolSchemasChanged) {
370        const newHashes = computeToolHashes()
371        for (const name of toolNames) {
372          if (!prevToolSet.has(name)) continue
373          if (newHashes[name] !== prev.perToolHashes[name]) {
374            changedToolSchemas.push(name)
375          }
376        }
377        prev.perToolHashes = newHashes
378      }
379      prev.pendingChanges = {
380        systemPromptChanged,
381        toolSchemasChanged,
382        modelChanged,
383        fastModeChanged,
384        cacheControlChanged,
385        globalCacheStrategyChanged,
386        betasChanged,
387        autoModeChanged,
388        overageChanged,
389        cachedMCChanged,
390        effortChanged,
391        extraBodyChanged,
392        addedToolCount: addedTools.length,
393        removedToolCount: removedTools.length,
394        addedTools,
395        removedTools,
396        changedToolSchemas,
397        systemCharDelta: systemCharCount - prev.systemCharCount,
398        previousModel: prev.model,
399        newModel: model,
400        prevGlobalCacheStrategy: prev.globalCacheStrategy,
401        newGlobalCacheStrategy: globalCacheStrategy,
402        addedBetas: sortedBetas.filter(b => !prevBetaSet.has(b)),
403        removedBetas: prev.betas.filter(b => !newBetaSet.has(b)),
404        prevEffortValue: prev.effortValue,
405        newEffortValue: effortStr,
406        buildPrevDiffableContent: prev.buildDiffableContent,
407      }
408    } else {
409      prev.pendingChanges = null
410    }
411
412    prev.systemHash = systemHash
413    prev.toolsHash = toolsHash
414    prev.cacheControlHash = cacheControlHash
415    prev.toolNames = toolNames
416    prev.systemCharCount = systemCharCount
417    prev.model = model
418    prev.fastMode = isFastMode
419    prev.globalCacheStrategy = globalCacheStrategy
420    prev.betas = sortedBetas
421    prev.autoModeActive = autoModeActive
422    prev.isUsingOverage = isUsingOverage
423    prev.cachedMCEnabled = cachedMCEnabled
424    prev.effortValue = effortStr
425    prev.extraBodyHash = extraBodyHash
426    prev.buildDiffableContent = lazyDiffableContent
427  } catch (e: unknown) {
428    logError(e)
429  }
430}
431
432/**
433 * Phase 2 (post-call): Check the API response's cache tokens to determine
434 * if a cache break actually occurred. If it did, use the pending changes
435 * from phase 1 to explain why.
436 */
437export async function checkResponseForCacheBreak(
438  querySource: QuerySource,
439  cacheReadTokens: number,
440  cacheCreationTokens: number,
441  messages: Message[],
442  agentId?: AgentId,
443  requestId?: string | null,
444): Promise<void> {
445  try {
446    const key = getTrackingKey(querySource, agentId)
447    if (!key) return
448
449    const state = previousStateBySource.get(key)
450    if (!state) return
451
452    // Skip excluded models (e.g., haiku has different caching behavior)
453    if (isExcludedModel(state.model)) return
454
455    const prevCacheRead = state.prevCacheReadTokens
456    state.prevCacheReadTokens = cacheReadTokens
457
458    // Calculate time since last call for TTL detection by finding the most recent
459    // assistant message timestamp in the messages array (before the current response)
460    const lastAssistantMessage = messages.findLast(m => m.type === 'assistant')
461    const timeSinceLastAssistantMsg = lastAssistantMessage
462      ? Date.now() - new Date(lastAssistantMessage.timestamp).getTime()
463      : null
464
465    // Skip the first call — no previous value to compare against
466    if (prevCacheRead === null) return
467
468    const changes = state.pendingChanges
469
470    // Cache deletions via cached microcompact intentionally reduce the cached
471    // prefix. The drop in cache read tokens is expected — reset the baseline
472    // so we don't false-positive on the next call.
473    if (state.cacheDeletionsPending) {
474      state.cacheDeletionsPending = false
475      logForDebugging(
476        `[PROMPT CACHE] cache deletion applied, cache read: ${prevCacheRead} → ${cacheReadTokens} (expected drop)`,
477      )
478      // Don't flag as a break — the remaining state is still valid
479      state.pendingChanges = null
480      return
481    }
482
483    // Detect a cache break: cache read dropped >5% from previous AND
484    // the absolute drop exceeds the minimum threshold.
485    const tokenDrop = prevCacheRead - cacheReadTokens
486    if (
487      cacheReadTokens >= prevCacheRead * 0.95 ||
488      tokenDrop < MIN_CACHE_MISS_TOKENS
489    ) {
490      state.pendingChanges = null
491      return
492    }
493
494    // Build explanation from pending changes (if any)
495    const parts: string[] = []
496    if (changes) {
497      if (changes.modelChanged) {
498        parts.push(
499          `model changed (${changes.previousModel} → ${changes.newModel})`,
500        )
501      }
502      if (changes.systemPromptChanged) {
503        const charDelta = changes.systemCharDelta
504        const charInfo =
505          charDelta === 0
506            ? ''
507            : charDelta > 0
508              ? ` (+${charDelta} chars)`
509              : ` (${charDelta} chars)`
510        parts.push(`system prompt changed${charInfo}`)
511      }
512      if (changes.toolSchemasChanged) {
513        const toolDiff =
514          changes.addedToolCount > 0 || changes.removedToolCount > 0
515            ? ` (+${changes.addedToolCount}/-${changes.removedToolCount} tools)`
516            : ' (tool prompt/schema changed, same tool set)'
517        parts.push(`tools changed${toolDiff}`)
518      }
519      if (changes.fastModeChanged) {
520        parts.push('fast mode toggled')
521      }
522      if (changes.globalCacheStrategyChanged) {
523        parts.push(
524          `global cache strategy changed (${changes.prevGlobalCacheStrategy || 'none'} → ${changes.newGlobalCacheStrategy || 'none'})`,
525        )
526      }
527      if (
528        changes.cacheControlChanged &&
529        !changes.globalCacheStrategyChanged &&
530        !changes.systemPromptChanged
531      ) {
532        // Only report as standalone cause if nothing else explains it —
533        // otherwise the scope/TTL flip is a consequence, not the root cause.
534        parts.push('cache_control changed (scope or TTL)')
535      }
536      if (changes.betasChanged) {
537        const added = changes.addedBetas.length
538          ? `+${changes.addedBetas.join(',')}`
539          : ''
540        const removed = changes.removedBetas.length
541          ? `-${changes.removedBetas.join(',')}`
542          : ''
543        const diff = [added, removed].filter(Boolean).join(' ')
544        parts.push(`betas changed${diff ? ` (${diff})` : ''}`)
545      }
546      if (changes.autoModeChanged) {
547        parts.push('auto mode toggled')
548      }
549      if (changes.overageChanged) {
550        parts.push('overage state changed (TTL latched, no flip)')
551      }
552      if (changes.cachedMCChanged) {
553        parts.push('cached microcompact toggled')
554      }
555      if (changes.effortChanged) {
556        parts.push(
557          `effort changed (${changes.prevEffortValue || 'default'} → ${changes.newEffortValue || 'default'})`,
558        )
559      }
560      if (changes.extraBodyChanged) {
561        parts.push('extra body params changed')
562      }
563    }
564
565    // Check if time gap suggests TTL expiration
566    const lastAssistantMsgOver5minAgo =
567      timeSinceLastAssistantMsg !== null &&
568      timeSinceLastAssistantMsg > CACHE_TTL_5MIN_MS
569    const lastAssistantMsgOver1hAgo =
570      timeSinceLastAssistantMsg !== null &&
571      timeSinceLastAssistantMsg > CACHE_TTL_1HOUR_MS
572
573    // Post PR #19823 BQ analysis (bq-queries/prompt-caching/cache_break_pr19823_analysis.sql):
574    // when all client-side flags are false and the gap is under TTL, ~90% of breaks
575    // are server-side routing/eviction or billed/inference disagreement. Label
576    // accordingly instead of implying a CC bug hunt.
577    let reason: string
578    if (parts.length > 0) {
579      reason = parts.join(', ')
580    } else if (lastAssistantMsgOver1hAgo) {
581      reason = 'possible 1h TTL expiry (prompt unchanged)'
582    } else if (lastAssistantMsgOver5minAgo) {
583      reason = 'possible 5min TTL expiry (prompt unchanged)'
584    } else if (timeSinceLastAssistantMsg !== null) {
585      reason = 'likely server-side (prompt unchanged, <5min gap)'
586    } else {
587      reason = 'unknown cause'
588    }
589
590    logEvent('tengu_prompt_cache_break', {
591      systemPromptChanged: changes?.systemPromptChanged ?? false,
592      toolSchemasChanged: changes?.toolSchemasChanged ?? false,
593      modelChanged: changes?.modelChanged ?? false,
594      fastModeChanged: changes?.fastModeChanged ?? false,
595      cacheControlChanged: changes?.cacheControlChanged ?? false,
596      globalCacheStrategyChanged: changes?.globalCacheStrategyChanged ?? false,
597      betasChanged: changes?.betasChanged ?? false,
598      autoModeChanged: changes?.autoModeChanged ?? false,
599      overageChanged: changes?.overageChanged ?? false,
600      cachedMCChanged: changes?.cachedMCChanged ?? false,
601      effortChanged: changes?.effortChanged ?? false,
602      extraBodyChanged: changes?.extraBodyChanged ?? false,
603      addedToolCount: changes?.addedToolCount ?? 0,
604      removedToolCount: changes?.removedToolCount ?? 0,
605      systemCharDelta: changes?.systemCharDelta ?? 0,
606      // Tool names are sanitized: built-in names are a fixed vocabulary,
607      // MCP tools collapse to 'mcp' (user-configured, could leak paths).
608      addedTools: (changes?.addedTools ?? [])
609        .map(sanitizeToolName)
610        .join(
611          ',',
612        ) as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
613      removedTools: (changes?.removedTools ?? [])
614        .map(sanitizeToolName)
615        .join(
616          ',',
617        ) as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
618      changedToolSchemas: (changes?.changedToolSchemas ?? [])
619        .map(sanitizeToolName)
620        .join(
621          ',',
622        ) as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
623      // Beta header names and cache strategy are fixed enum-like values,
624      // not code or filepaths. requestId is an opaque server-generated ID.
625      addedBetas: (changes?.addedBetas ?? []).join(
626        ',',
627      ) as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
628      removedBetas: (changes?.removedBetas ?? []).join(
629        ',',
630      ) as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
631      prevGlobalCacheStrategy: (changes?.prevGlobalCacheStrategy ??
632        '') as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
633      newGlobalCacheStrategy: (changes?.newGlobalCacheStrategy ??
634        '') as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
635      callNumber: state.callCount,
636      prevCacheReadTokens: prevCacheRead,
637      cacheReadTokens,
638      cacheCreationTokens,
639      timeSinceLastAssistantMsg: timeSinceLastAssistantMsg ?? -1,
640      lastAssistantMsgOver5minAgo,
641      lastAssistantMsgOver1hAgo,
642      requestId: (requestId ??
643        '') as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
644    })
645
646    // Write diff file for ant debugging via --debug. The path is included in
647    // the summary log so ants can find it (DevBar UI removed — event data
648    // flows reliably to BQ for analytics).
649    let diffPath: string | undefined
650    if (changes?.buildPrevDiffableContent) {
651      diffPath = await writeCacheBreakDiff(
652        changes.buildPrevDiffableContent(),
653        state.buildDiffableContent(),
654      )
655    }
656
657    const diffSuffix = diffPath ? `, diff: ${diffPath}` : ''
658    const summary = `[PROMPT CACHE BREAK] ${reason} [source=${querySource}, call #${state.callCount}, cache read: ${prevCacheRead} → ${cacheReadTokens}, creation: ${cacheCreationTokens}${diffSuffix}]`
659
660    logForDebugging(summary, { level: 'warn' })
661
662    state.pendingChanges = null
663  } catch (e: unknown) {
664    logError(e)
665  }
666}
667
668/**
669 * Call when cached microcompact sends cache_edits deletions.
670 * The next API response will have lower cache read tokens — that's
671 * expected, not a cache break.
672 */
673export function notifyCacheDeletion(
674  querySource: QuerySource,
675  agentId?: AgentId,
676): void {
677  const key = getTrackingKey(querySource, agentId)
678  const state = key ? previousStateBySource.get(key) : undefined
679  if (state) {
680    state.cacheDeletionsPending = true
681  }
682}
683
684/**
685 * Call after compaction to reset the cache read baseline.
686 * Compaction legitimately reduces message count, so cache read tokens
687 * will naturally drop on the next call — that's not a break.
688 */
689export function notifyCompaction(
690  querySource: QuerySource,
691  agentId?: AgentId,
692): void {
693  const key = getTrackingKey(querySource, agentId)
694  const state = key ? previousStateBySource.get(key) : undefined
695  if (state) {
696    state.prevCacheReadTokens = null
697  }
698}
699
700export function cleanupAgentTracking(agentId: AgentId): void {
701  previousStateBySource.delete(agentId)
702}
703
704export function resetPromptCacheBreakDetection(): void {
705  previousStateBySource.clear()
706}
707
708async function writeCacheBreakDiff(
709  prevContent: string,
710  newContent: string,
711): Promise<string | undefined> {
712  try {
713    const diffPath = getCacheBreakDiffPath()
714    await mkdir(getClaudeTempDir(), { recursive: true })
715    const patch = createPatch(
716      'prompt-state',
717      prevContent,
718      newContent,
719      'before',
720      'after',
721    )
722    await writeFile(diffPath, patch)
723    return diffPath
724  } catch {
725    return undefined
726  }
727}