services/compact/microCompact.ts at main

oppi.li / claude-code
fork atom
source dump of claude code
fork atom
claude-code / services / compact / microCompact.ts
at main 530 lines 20 kB view raw
wrap content
oppi.li dump from zip 9d ago
63aada3f
  1import { feature } from 'bun:bundle'
  2import type { ToolResultBlockParam } from '@anthropic-ai/sdk/resources/index.mjs'
  3import type { QuerySource } from '../../constants/querySource.js'
  4import type { ToolUseContext } from '../../Tool.js'
  5import { FILE_EDIT_TOOL_NAME } from '../../tools/FileEditTool/constants.js'
  6import { FILE_READ_TOOL_NAME } from '../../tools/FileReadTool/prompt.js'
  7import { FILE_WRITE_TOOL_NAME } from '../../tools/FileWriteTool/prompt.js'
  8import { GLOB_TOOL_NAME } from '../../tools/GlobTool/prompt.js'
  9import { GREP_TOOL_NAME } from '../../tools/GrepTool/prompt.js'
 10import { WEB_FETCH_TOOL_NAME } from '../../tools/WebFetchTool/prompt.js'
 11import { WEB_SEARCH_TOOL_NAME } from '../../tools/WebSearchTool/prompt.js'
 12import type { Message } from '../../types/message.js'
 13import { logForDebugging } from '../../utils/debug.js'
 14import { getMainLoopModel } from '../../utils/model/model.js'
 15import { SHELL_TOOL_NAMES } from '../../utils/shell/shellToolUtils.js'
 16import { jsonStringify } from '../../utils/slowOperations.js'
 17import {
 18  type AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
 19  logEvent,
 20} from '../analytics/index.js'
 21import { notifyCacheDeletion } from '../api/promptCacheBreakDetection.js'
 22import { roughTokenCountEstimation } from '../tokenEstimation.js'
 23import {
 24  clearCompactWarningSuppression,
 25  suppressCompactWarning,
 26} from './compactWarningState.js'
 27import {
 28  getTimeBasedMCConfig,
 29  type TimeBasedMCConfig,
 30} from './timeBasedMCConfig.js'
 31
 32// Inline from utils/toolResultStorage.ts — importing that file pulls in
 33// sessionStorage → utils/messages → services/api/errors, completing a
 34// circular-deps loop back through this file via promptCacheBreakDetection.
 35// Drift is caught by a test asserting equality with the source-of-truth.
 36export const TIME_BASED_MC_CLEARED_MESSAGE = '[Old tool result content cleared]'
 37
 38const IMAGE_MAX_TOKEN_SIZE = 2000
 39
 40// Only compact these tools
 41const COMPACTABLE_TOOLS = new Set<string>([
 42  FILE_READ_TOOL_NAME,
 43  ...SHELL_TOOL_NAMES,
 44  GREP_TOOL_NAME,
 45  GLOB_TOOL_NAME,
 46  WEB_SEARCH_TOOL_NAME,
 47  WEB_FETCH_TOOL_NAME,
 48  FILE_EDIT_TOOL_NAME,
 49  FILE_WRITE_TOOL_NAME,
 50])
 51
 52// --- Cached microcompact state (ant-only, gated by feature('CACHED_MICROCOMPACT')) ---
 53
 54// Lazy-initialized cached MC module and state to avoid importing in external builds.
 55// The imports and state live inside feature() checks for dead code elimination.
 56let cachedMCModule: typeof import('./cachedMicrocompact.js') | null = null
 57let cachedMCState: import('./cachedMicrocompact.js').CachedMCState | null = null
 58let pendingCacheEdits:
 59  | import('./cachedMicrocompact.js').CacheEditsBlock
 60  | null = null
 61
 62async function getCachedMCModule(): Promise<
 63  typeof import('./cachedMicrocompact.js')
 64> {
 65  if (!cachedMCModule) {
 66    cachedMCModule = await import('./cachedMicrocompact.js')
 67  }
 68  return cachedMCModule
 69}
 70
 71function ensureCachedMCState(): import('./cachedMicrocompact.js').CachedMCState {
 72  if (!cachedMCState && cachedMCModule) {
 73    cachedMCState = cachedMCModule.createCachedMCState()
 74  }
 75  if (!cachedMCState) {
 76    throw new Error(
 77      'cachedMCState not initialized — getCachedMCModule() must be called first',
 78    )
 79  }
 80  return cachedMCState
 81}
 82
 83/**
 84 * Get new pending cache edits to be included in the next API request.
 85 * Returns null if there are no new pending edits.
 86 * Clears the pending state (caller must pin them after insertion).
 87 */
 88export function consumePendingCacheEdits():
 89  | import('./cachedMicrocompact.js').CacheEditsBlock
 90  | null {
 91  const edits = pendingCacheEdits
 92  pendingCacheEdits = null
 93  return edits
 94}
 95
 96/**
 97 * Get all previously-pinned cache edits that must be re-sent at their
 98 * original positions for cache hits.
 99 */
100export function getPinnedCacheEdits(): import('./cachedMicrocompact.js').PinnedCacheEdits[] {
101  if (!cachedMCState) {
102    return []
103  }
104  return cachedMCState.pinnedEdits
105}
106
107/**
108 * Pin a new cache_edits block to a specific user message position.
109 * Called after inserting new edits so they are re-sent in subsequent calls.
110 */
111export function pinCacheEdits(
112  userMessageIndex: number,
113  block: import('./cachedMicrocompact.js').CacheEditsBlock,
114): void {
115  if (cachedMCState) {
116    cachedMCState.pinnedEdits.push({ userMessageIndex, block })
117  }
118}
119
120/**
121 * Marks all registered tools as sent to the API.
122 * Called after a successful API response.
123 */
124export function markToolsSentToAPIState(): void {
125  if (cachedMCState && cachedMCModule) {
126    cachedMCModule.markToolsSentToAPI(cachedMCState)
127  }
128}
129
130export function resetMicrocompactState(): void {
131  if (cachedMCState && cachedMCModule) {
132    cachedMCModule.resetCachedMCState(cachedMCState)
133  }
134  pendingCacheEdits = null
135}
136
137// Helper to calculate tool result tokens
138function calculateToolResultTokens(block: ToolResultBlockParam): number {
139  if (!block.content) {
140    return 0
141  }
142
143  if (typeof block.content === 'string') {
144    return roughTokenCountEstimation(block.content)
145  }
146
147  // Array of TextBlockParam | ImageBlockParam | DocumentBlockParam
148  return block.content.reduce((sum, item) => {
149    if (item.type === 'text') {
150      return sum + roughTokenCountEstimation(item.text)
151    } else if (item.type === 'image' || item.type === 'document') {
152      // Images/documents are approximately 2000 tokens regardless of format
153      return sum + IMAGE_MAX_TOKEN_SIZE
154    }
155    return sum
156  }, 0)
157}
158
159/**
160 * Estimate token count for messages by extracting text content
161 * Used for rough token estimation when we don't have accurate API counts
162 * Pads estimate by 4/3 to be conservative since we're approximating
163 */
164export function estimateMessageTokens(messages: Message[]): number {
165  let totalTokens = 0
166
167  for (const message of messages) {
168    if (message.type !== 'user' && message.type !== 'assistant') {
169      continue
170    }
171
172    if (!Array.isArray(message.message.content)) {
173      continue
174    }
175
176    for (const block of message.message.content) {
177      if (block.type === 'text') {
178        totalTokens += roughTokenCountEstimation(block.text)
179      } else if (block.type === 'tool_result') {
180        totalTokens += calculateToolResultTokens(block)
181      } else if (block.type === 'image' || block.type === 'document') {
182        totalTokens += IMAGE_MAX_TOKEN_SIZE
183      } else if (block.type === 'thinking') {
184        // Match roughTokenCountEstimationForBlock: count only the thinking
185        // text, not the JSON wrapper or signature (signature is metadata,
186        // not model-tokenized content).
187        totalTokens += roughTokenCountEstimation(block.thinking)
188      } else if (block.type === 'redacted_thinking') {
189        totalTokens += roughTokenCountEstimation(block.data)
190      } else if (block.type === 'tool_use') {
191        // Match roughTokenCountEstimationForBlock: count name + input,
192        // not the JSON wrapper or id field.
193        totalTokens += roughTokenCountEstimation(
194          block.name + jsonStringify(block.input ?? {}),
195        )
196      } else {
197        // server_tool_use, web_search_tool_result, etc.
198        totalTokens += roughTokenCountEstimation(jsonStringify(block))
199      }
200    }
201  }
202
203  // Pad estimate by 4/3 to be conservative since we're approximating
204  return Math.ceil(totalTokens * (4 / 3))
205}
206
207export type PendingCacheEdits = {
208  trigger: 'auto'
209  deletedToolIds: string[]
210  // Baseline cumulative cache_deleted_input_tokens from the previous API response,
211  // used to compute the per-operation delta (the API value is sticky/cumulative)
212  baselineCacheDeletedTokens: number
213}
214
215export type MicrocompactResult = {
216  messages: Message[]
217  compactionInfo?: {
218    pendingCacheEdits?: PendingCacheEdits
219  }
220}
221
222/**
223 * Walk messages and collect tool_use IDs whose tool name is in
224 * COMPACTABLE_TOOLS, in encounter order. Shared by both microcompact paths.
225 */
226function collectCompactableToolIds(messages: Message[]): string[] {
227  const ids: string[] = []
228  for (const message of messages) {
229    if (
230      message.type === 'assistant' &&
231      Array.isArray(message.message.content)
232    ) {
233      for (const block of message.message.content) {
234        if (block.type === 'tool_use' && COMPACTABLE_TOOLS.has(block.name)) {
235          ids.push(block.id)
236        }
237      }
238    }
239  }
240  return ids
241}
242
243// Prefix-match because promptCategory.ts sets the querySource to
244// 'repl_main_thread:outputStyle:<style>' when a non-default output style
245// is active. The bare 'repl_main_thread' is only used for the default style.
246// query.ts:350/1451 use the same startsWith pattern; the pre-existing
247// cached-MC `=== 'repl_main_thread'` check was a latent bug — users with a
248// non-default output style were silently excluded from cached MC.
249function isMainThreadSource(querySource: QuerySource | undefined): boolean {
250  return !querySource || querySource.startsWith('repl_main_thread')
251}
252
253export async function microcompactMessages(
254  messages: Message[],
255  toolUseContext?: ToolUseContext,
256  querySource?: QuerySource,
257): Promise<MicrocompactResult> {
258  // Clear suppression flag at start of new microcompact attempt
259  clearCompactWarningSuppression()
260
261  // Time-based trigger runs first and short-circuits. If the gap since the
262  // last assistant message exceeds the threshold, the server cache has expired
263  // and the full prefix will be rewritten regardless — so content-clear old
264  // tool results now, before the request, to shrink what gets rewritten.
265  // Cached MC (cache-editing) is skipped when this fires: editing assumes a
266  // warm cache, and we just established it's cold.
267  const timeBasedResult = maybeTimeBasedMicrocompact(messages, querySource)
268  if (timeBasedResult) {
269    return timeBasedResult
270  }
271
272  // Only run cached MC for the main thread to prevent forked agents
273  // (session_memory, prompt_suggestion, etc.) from registering their
274  // tool_results in the global cachedMCState, which would cause the main
275  // thread to try deleting tools that don't exist in its own conversation.
276  if (feature('CACHED_MICROCOMPACT')) {
277    const mod = await getCachedMCModule()
278    const model = toolUseContext?.options.mainLoopModel ?? getMainLoopModel()
279    if (
280      mod.isCachedMicrocompactEnabled() &&
281      mod.isModelSupportedForCacheEditing(model) &&
282      isMainThreadSource(querySource)
283    ) {
284      return await cachedMicrocompactPath(messages, querySource)
285    }
286  }
287
288  // Legacy microcompact path removed — tengu_cache_plum_violet is always true.
289  // For contexts where cached microcompact is not available (external builds,
290  // non-ant users, unsupported models, sub-agents), no compaction happens here;
291  // autocompact handles context pressure instead.
292  return { messages }
293}
294
295/**
296 * Cached microcompact path - uses cache editing API to remove tool results
297 * without invalidating the cached prefix.
298 *
299 * Key differences from regular microcompact:
300 * - Does NOT modify local message content (cache_reference and cache_edits are added at API layer)
301 * - Uses count-based trigger/keep thresholds from GrowthBook config
302 * - Takes precedence over regular microcompact (no disk persistence)
303 * - Tracks tool results and queues cache edits for the API layer
304 */
305async function cachedMicrocompactPath(
306  messages: Message[],
307  querySource: QuerySource | undefined,
308): Promise<MicrocompactResult> {
309  const mod = await getCachedMCModule()
310  const state = ensureCachedMCState()
311  const config = mod.getCachedMCConfig()
312
313  const compactableToolIds = new Set(collectCompactableToolIds(messages))
314  // Second pass: register tool results grouped by user message
315  for (const message of messages) {
316    if (message.type === 'user' && Array.isArray(message.message.content)) {
317      const groupIds: string[] = []
318      for (const block of message.message.content) {
319        if (
320          block.type === 'tool_result' &&
321          compactableToolIds.has(block.tool_use_id) &&
322          !state.registeredTools.has(block.tool_use_id)
323        ) {
324          mod.registerToolResult(state, block.tool_use_id)
325          groupIds.push(block.tool_use_id)
326        }
327      }
328      mod.registerToolMessage(state, groupIds)
329    }
330  }
331
332  const toolsToDelete = mod.getToolResultsToDelete(state)
333
334  if (toolsToDelete.length > 0) {
335    // Create and queue the cache_edits block for the API layer
336    const cacheEdits = mod.createCacheEditsBlock(state, toolsToDelete)
337    if (cacheEdits) {
338      pendingCacheEdits = cacheEdits
339    }
340
341    logForDebugging(
342      `Cached MC deleting ${toolsToDelete.length} tool(s): ${toolsToDelete.join(', ')}`,
343    )
344
345    // Log the event
346    logEvent('tengu_cached_microcompact', {
347      toolsDeleted: toolsToDelete.length,
348      deletedToolIds: toolsToDelete.join(
349        ',',
350      ) as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
351      activeToolCount: state.toolOrder.length - state.deletedRefs.size,
352      triggerType:
353        'auto' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
354      threshold: config.triggerThreshold,
355      keepRecent: config.keepRecent,
356    })
357
358    // Suppress warning after successful compaction
359    suppressCompactWarning()
360
361    // Notify cache break detection that cache reads will legitimately drop
362    if (feature('PROMPT_CACHE_BREAK_DETECTION')) {
363      // Pass the actual querySource — isMainThreadSource now prefix-matches
364      // so output-style variants enter here, and getTrackingKey keys on the
365      // full source string, not the 'repl_main_thread' prefix.
366      notifyCacheDeletion(querySource ?? 'repl_main_thread')
367    }
368
369    // Return messages unchanged - cache_reference and cache_edits are added at API layer
370    // Boundary message is deferred until after API response so we can use
371    // actual cache_deleted_input_tokens from the API instead of client-side estimates
372    // Capture the baseline cumulative cache_deleted_input_tokens from the last
373    // assistant message so we can compute a per-operation delta after the API call
374    const lastAsst = messages.findLast(m => m.type === 'assistant')
375    const baseline =
376      lastAsst?.type === 'assistant'
377        ? ((
378            lastAsst.message.usage as unknown as Record<
379              string,
380              number | undefined
381            >
382          )?.cache_deleted_input_tokens ?? 0)
383        : 0
384
385    return {
386      messages,
387      compactionInfo: {
388        pendingCacheEdits: {
389          trigger: 'auto',
390          deletedToolIds: toolsToDelete,
391          baselineCacheDeletedTokens: baseline,
392        },
393      },
394    }
395  }
396
397  // No compaction needed, return messages unchanged
398  return { messages }
399}
400
401/**
402 * Time-based microcompact: when the gap since the last main-loop assistant
403 * message exceeds the configured threshold, content-clear all but the most
404 * recent N compactable tool results.
405 *
406 * Returns null when the trigger doesn't fire (disabled, wrong source, gap
407 * under threshold, nothing to clear) — caller falls through to other paths.
408 *
409 * Unlike cached MC, this mutates message content directly. The cache is cold,
410 * so there's no cached prefix to preserve via cache_edits.
411 */
412/**
413 * Check whether the time-based trigger should fire for this request.
414 *
415 * Returns the measured gap (minutes since last assistant message) when the
416 * trigger fires, or null when it doesn't (disabled, wrong source, under
417 * threshold, no prior assistant, unparseable timestamp).
418 *
419 * Extracted so other pre-request paths (e.g. snip force-apply) can consult
420 * the same predicate without coupling to the tool-result clearing action.
421 */
422export function evaluateTimeBasedTrigger(
423  messages: Message[],
424  querySource: QuerySource | undefined,
425): { gapMinutes: number; config: TimeBasedMCConfig } | null {
426  const config = getTimeBasedMCConfig()
427  // Require an explicit main-thread querySource. isMainThreadSource treats
428  // undefined as main-thread (for cached-MC backward-compat), but several
429  // callers (/context, /compact, analyzeContext) invoke microcompactMessages
430  // without a source for analysis-only purposes — they should not trigger.
431  if (!config.enabled || !querySource || !isMainThreadSource(querySource)) {
432    return null
433  }
434  const lastAssistant = messages.findLast(m => m.type === 'assistant')
435  if (!lastAssistant) {
436    return null
437  }
438  const gapMinutes =
439    (Date.now() - new Date(lastAssistant.timestamp).getTime()) / 60_000
440  if (!Number.isFinite(gapMinutes) || gapMinutes < config.gapThresholdMinutes) {
441    return null
442  }
443  return { gapMinutes, config }
444}
445
446function maybeTimeBasedMicrocompact(
447  messages: Message[],
448  querySource: QuerySource | undefined,
449): MicrocompactResult | null {
450  const trigger = evaluateTimeBasedTrigger(messages, querySource)
451  if (!trigger) {
452    return null
453  }
454  const { gapMinutes, config } = trigger
455
456  const compactableIds = collectCompactableToolIds(messages)
457
458  // Floor at 1: slice(-0) returns the full array (paradoxically keeps
459  // everything), and clearing ALL results leaves the model with zero working
460  // context. Neither degenerate is sensible — always keep at least the last.
461  const keepRecent = Math.max(1, config.keepRecent)
462  const keepSet = new Set(compactableIds.slice(-keepRecent))
463  const clearSet = new Set(compactableIds.filter(id => !keepSet.has(id)))
464
465  if (clearSet.size === 0) {
466    return null
467  }
468
469  let tokensSaved = 0
470  const result: Message[] = messages.map(message => {
471    if (message.type !== 'user' || !Array.isArray(message.message.content)) {
472      return message
473    }
474    let touched = false
475    const newContent = message.message.content.map(block => {
476      if (
477        block.type === 'tool_result' &&
478        clearSet.has(block.tool_use_id) &&
479        block.content !== TIME_BASED_MC_CLEARED_MESSAGE
480      ) {
481        tokensSaved += calculateToolResultTokens(block)
482        touched = true
483        return { ...block, content: TIME_BASED_MC_CLEARED_MESSAGE }
484      }
485      return block
486    })
487    if (!touched) return message
488    return {
489      ...message,
490      message: { ...message.message, content: newContent },
491    }
492  })
493
494  if (tokensSaved === 0) {
495    return null
496  }
497
498  logEvent('tengu_time_based_microcompact', {
499    gapMinutes: Math.round(gapMinutes),
500    gapThresholdMinutes: config.gapThresholdMinutes,
501    toolsCleared: clearSet.size,
502    toolsKept: keepSet.size,
503    keepRecent: config.keepRecent,
504    tokensSaved,
505  })
506
507  logForDebugging(
508    `[TIME-BASED MC] gap ${Math.round(gapMinutes)}min > ${config.gapThresholdMinutes}min, cleared ${clearSet.size} tool results (~${tokensSaved} tokens), kept last ${keepSet.size}`,
509  )
510
511  suppressCompactWarning()
512  // Cached-MC state (module-level) holds tool IDs registered on prior turns.
513  // We just content-cleared some of those tools AND invalidated the server
514  // cache by changing prompt content. If cached-MC runs next turn with the
515  // stale state, it would try to cache_edit tools whose server-side entries
516  // no longer exist. Reset it.
517  resetMicrocompactState()
518  // We just changed the prompt content — the next response's cache read will
519  // be low, but that's us, not a break. Tell the detector to expect a drop.
520  // notifyCacheDeletion (not notifyCompaction) because it's already imported
521  // here and achieves the same false-positive suppression — adding the second
522  // symbol to the import was flagged by the circular-deps check.
523  // Pass the actual querySource: getTrackingKey returns the full source string
524  // (e.g. 'repl_main_thread:outputStyle:custom'), not just the prefix.
525  if (feature('PROMPT_CACHE_BREAK_DETECTION') && querySource) {
526    notifyCacheDeletion(querySource)
527  }
528
529  return { messages: result }
530}