services/compact/autoCompact.ts at main

oppi.li / claude-code
fork atom
source dump of claude code
fork atom
claude-code / services / compact / autoCompact.ts
at main 351 lines 13 kB view raw
wrap content
oppi.li dump from zip 10d ago
63aada3f
  1import { feature } from 'bun:bundle'
  2import { markPostCompaction } from 'src/bootstrap/state.js'
  3import { getSdkBetas } from '../../bootstrap/state.js'
  4import type { QuerySource } from '../../constants/querySource.js'
  5import type { ToolUseContext } from '../../Tool.js'
  6import type { Message } from '../../types/message.js'
  7import { getGlobalConfig } from '../../utils/config.js'
  8import { getContextWindowForModel } from '../../utils/context.js'
  9import { logForDebugging } from '../../utils/debug.js'
 10import { isEnvTruthy } from '../../utils/envUtils.js'
 11import { hasExactErrorMessage } from '../../utils/errors.js'
 12import type { CacheSafeParams } from '../../utils/forkedAgent.js'
 13import { logError } from '../../utils/log.js'
 14import { tokenCountWithEstimation } from '../../utils/tokens.js'
 15import { getFeatureValue_CACHED_MAY_BE_STALE } from '../analytics/growthbook.js'
 16import { getMaxOutputTokensForModel } from '../api/claude.js'
 17import { notifyCompaction } from '../api/promptCacheBreakDetection.js'
 18import { setLastSummarizedMessageId } from '../SessionMemory/sessionMemoryUtils.js'
 19import {
 20  type CompactionResult,
 21  compactConversation,
 22  ERROR_MESSAGE_USER_ABORT,
 23  type RecompactionInfo,
 24} from './compact.js'
 25import { runPostCompactCleanup } from './postCompactCleanup.js'
 26import { trySessionMemoryCompaction } from './sessionMemoryCompact.js'
 27
 28// Reserve this many tokens for output during compaction
 29// Based on p99.99 of compact summary output being 17,387 tokens.
 30const MAX_OUTPUT_TOKENS_FOR_SUMMARY = 20_000
 31
 32// Returns the context window size minus the max output tokens for the model
 33export function getEffectiveContextWindowSize(model: string): number {
 34  const reservedTokensForSummary = Math.min(
 35    getMaxOutputTokensForModel(model),
 36    MAX_OUTPUT_TOKENS_FOR_SUMMARY,
 37  )
 38  let contextWindow = getContextWindowForModel(model, getSdkBetas())
 39
 40  const autoCompactWindow = process.env.CLAUDE_CODE_AUTO_COMPACT_WINDOW
 41  if (autoCompactWindow) {
 42    const parsed = parseInt(autoCompactWindow, 10)
 43    if (!isNaN(parsed) && parsed > 0) {
 44      contextWindow = Math.min(contextWindow, parsed)
 45    }
 46  }
 47
 48  return contextWindow - reservedTokensForSummary
 49}
 50
 51export type AutoCompactTrackingState = {
 52  compacted: boolean
 53  turnCounter: number
 54  // Unique ID per turn
 55  turnId: string
 56  // Consecutive autocompact failures. Reset on success.
 57  // Used as a circuit breaker to stop retrying when the context is
 58  // irrecoverably over the limit (e.g., prompt_too_long).
 59  consecutiveFailures?: number
 60}
 61
 62export const AUTOCOMPACT_BUFFER_TOKENS = 13_000
 63export const WARNING_THRESHOLD_BUFFER_TOKENS = 20_000
 64export const ERROR_THRESHOLD_BUFFER_TOKENS = 20_000
 65export const MANUAL_COMPACT_BUFFER_TOKENS = 3_000
 66
 67// Stop trying autocompact after this many consecutive failures.
 68// BQ 2026-03-10: 1,279 sessions had 50+ consecutive failures (up to 3,272)
 69// in a single session, wasting ~250K API calls/day globally.
 70const MAX_CONSECUTIVE_AUTOCOMPACT_FAILURES = 3
 71
 72export function getAutoCompactThreshold(model: string): number {
 73  const effectiveContextWindow = getEffectiveContextWindowSize(model)
 74
 75  const autocompactThreshold =
 76    effectiveContextWindow - AUTOCOMPACT_BUFFER_TOKENS
 77
 78  // Override for easier testing of autocompact
 79  const envPercent = process.env.CLAUDE_AUTOCOMPACT_PCT_OVERRIDE
 80  if (envPercent) {
 81    const parsed = parseFloat(envPercent)
 82    if (!isNaN(parsed) && parsed > 0 && parsed <= 100) {
 83      const percentageThreshold = Math.floor(
 84        effectiveContextWindow * (parsed / 100),
 85      )
 86      return Math.min(percentageThreshold, autocompactThreshold)
 87    }
 88  }
 89
 90  return autocompactThreshold
 91}
 92
 93export function calculateTokenWarningState(
 94  tokenUsage: number,
 95  model: string,
 96): {
 97  percentLeft: number
 98  isAboveWarningThreshold: boolean
 99  isAboveErrorThreshold: boolean
100  isAboveAutoCompactThreshold: boolean
101  isAtBlockingLimit: boolean
102} {
103  const autoCompactThreshold = getAutoCompactThreshold(model)
104  const threshold = isAutoCompactEnabled()
105    ? autoCompactThreshold
106    : getEffectiveContextWindowSize(model)
107
108  const percentLeft = Math.max(
109    0,
110    Math.round(((threshold - tokenUsage) / threshold) * 100),
111  )
112
113  const warningThreshold = threshold - WARNING_THRESHOLD_BUFFER_TOKENS
114  const errorThreshold = threshold - ERROR_THRESHOLD_BUFFER_TOKENS
115
116  const isAboveWarningThreshold = tokenUsage >= warningThreshold
117  const isAboveErrorThreshold = tokenUsage >= errorThreshold
118
119  const isAboveAutoCompactThreshold =
120    isAutoCompactEnabled() && tokenUsage >= autoCompactThreshold
121
122  const actualContextWindow = getEffectiveContextWindowSize(model)
123  const defaultBlockingLimit =
124    actualContextWindow - MANUAL_COMPACT_BUFFER_TOKENS
125
126  // Allow override for testing
127  const blockingLimitOverride = process.env.CLAUDE_CODE_BLOCKING_LIMIT_OVERRIDE
128  const parsedOverride = blockingLimitOverride
129    ? parseInt(blockingLimitOverride, 10)
130    : NaN
131  const blockingLimit =
132    !isNaN(parsedOverride) && parsedOverride > 0
133      ? parsedOverride
134      : defaultBlockingLimit
135
136  const isAtBlockingLimit = tokenUsage >= blockingLimit
137
138  return {
139    percentLeft,
140    isAboveWarningThreshold,
141    isAboveErrorThreshold,
142    isAboveAutoCompactThreshold,
143    isAtBlockingLimit,
144  }
145}
146
147export function isAutoCompactEnabled(): boolean {
148  if (isEnvTruthy(process.env.DISABLE_COMPACT)) {
149    return false
150  }
151  // Allow disabling just auto-compact (keeps manual /compact working)
152  if (isEnvTruthy(process.env.DISABLE_AUTO_COMPACT)) {
153    return false
154  }
155  // Check if user has disabled auto-compact in their settings
156  const userConfig = getGlobalConfig()
157  return userConfig.autoCompactEnabled
158}
159
160export async function shouldAutoCompact(
161  messages: Message[],
162  model: string,
163  querySource?: QuerySource,
164  // Snip removes messages but the surviving assistant's usage still reflects
165  // pre-snip context, so tokenCountWithEstimation can't see the savings.
166  // Subtract the rough-delta that snip already computed.
167  snipTokensFreed = 0,
168): Promise<boolean> {
169  // Recursion guards. session_memory and compact are forked agents that
170  // would deadlock.
171  if (querySource === 'session_memory' || querySource === 'compact') {
172    return false
173  }
174  // marble_origami is the ctx-agent — if ITS context blows up and
175  // autocompact fires, runPostCompactCleanup calls resetContextCollapse()
176  // which destroys the MAIN thread's committed log (module-level state
177  // shared across forks). Inside feature() so the string DCEs from
178  // external builds (it's in excluded-strings.txt).
179  if (feature('CONTEXT_COLLAPSE')) {
180    if (querySource === 'marble_origami') {
181      return false
182    }
183  }
184
185  if (!isAutoCompactEnabled()) {
186    return false
187  }
188
189  // Reactive-only mode: suppress proactive autocompact, let reactive compact
190  // catch the API's prompt-too-long. feature() wrapper keeps the flag string
191  // out of external builds (REACTIVE_COMPACT is ant-only).
192  // Note: returning false here also means autoCompactIfNeeded never reaches
193  // trySessionMemoryCompaction in the query loop — the /compact call site
194  // still tries session memory first. Revisit if reactive-only graduates.
195  if (feature('REACTIVE_COMPACT')) {
196    if (getFeatureValue_CACHED_MAY_BE_STALE('tengu_cobalt_raccoon', false)) {
197      return false
198    }
199  }
200
201  // Context-collapse mode: same suppression. Collapse IS the context
202  // management system when it's on — the 90% commit / 95% blocking-spawn
203  // flow owns the headroom problem. Autocompact firing at effective-13k
204  // (~93% of effective) sits right between collapse's commit-start (90%)
205  // and blocking (95%), so it would race collapse and usually win, nuking
206  // granular context that collapse was about to save. Gating here rather
207  // than in isAutoCompactEnabled() keeps reactiveCompact alive as the 413
208  // fallback (it consults isAutoCompactEnabled directly) and leaves
209  // sessionMemory + manual /compact working.
210  //
211  // Consult isContextCollapseEnabled (not the raw gate) so the
212  // CLAUDE_CONTEXT_COLLAPSE env override is honored here too. require()
213  // inside the block breaks the init-time cycle (this file exports
214  // getEffectiveContextWindowSize which collapse's index imports).
215  if (feature('CONTEXT_COLLAPSE')) {
216    /* eslint-disable @typescript-eslint/no-require-imports */
217    const { isContextCollapseEnabled } =
218      require('../contextCollapse/index.js') as typeof import('../contextCollapse/index.js')
219    /* eslint-enable @typescript-eslint/no-require-imports */
220    if (isContextCollapseEnabled()) {
221      return false
222    }
223  }
224
225  const tokenCount = tokenCountWithEstimation(messages) - snipTokensFreed
226  const threshold = getAutoCompactThreshold(model)
227  const effectiveWindow = getEffectiveContextWindowSize(model)
228
229  logForDebugging(
230    `autocompact: tokens=${tokenCount} threshold=${threshold} effectiveWindow=${effectiveWindow}${snipTokensFreed > 0 ? ` snipFreed=${snipTokensFreed}` : ''}`,
231  )
232
233  const { isAboveAutoCompactThreshold } = calculateTokenWarningState(
234    tokenCount,
235    model,
236  )
237
238  return isAboveAutoCompactThreshold
239}
240
241export async function autoCompactIfNeeded(
242  messages: Message[],
243  toolUseContext: ToolUseContext,
244  cacheSafeParams: CacheSafeParams,
245  querySource?: QuerySource,
246  tracking?: AutoCompactTrackingState,
247  snipTokensFreed?: number,
248): Promise<{
249  wasCompacted: boolean
250  compactionResult?: CompactionResult
251  consecutiveFailures?: number
252}> {
253  if (isEnvTruthy(process.env.DISABLE_COMPACT)) {
254    return { wasCompacted: false }
255  }
256
257  // Circuit breaker: stop retrying after N consecutive failures.
258  // Without this, sessions where context is irrecoverably over the limit
259  // hammer the API with doomed compaction attempts on every turn.
260  if (
261    tracking?.consecutiveFailures !== undefined &&
262    tracking.consecutiveFailures >= MAX_CONSECUTIVE_AUTOCOMPACT_FAILURES
263  ) {
264    return { wasCompacted: false }
265  }
266
267  const model = toolUseContext.options.mainLoopModel
268  const shouldCompact = await shouldAutoCompact(
269    messages,
270    model,
271    querySource,
272    snipTokensFreed,
273  )
274
275  if (!shouldCompact) {
276    return { wasCompacted: false }
277  }
278
279  const recompactionInfo: RecompactionInfo = {
280    isRecompactionInChain: tracking?.compacted === true,
281    turnsSincePreviousCompact: tracking?.turnCounter ?? -1,
282    previousCompactTurnId: tracking?.turnId,
283    autoCompactThreshold: getAutoCompactThreshold(model),
284    querySource,
285  }
286
287  // EXPERIMENT: Try session memory compaction first
288  const sessionMemoryResult = await trySessionMemoryCompaction(
289    messages,
290    toolUseContext.agentId,
291    recompactionInfo.autoCompactThreshold,
292  )
293  if (sessionMemoryResult) {
294    // Reset lastSummarizedMessageId since session memory compaction prunes messages
295    // and the old message UUID will no longer exist after the REPL replaces messages
296    setLastSummarizedMessageId(undefined)
297    runPostCompactCleanup(querySource)
298    // Reset cache read baseline so the post-compact drop isn't flagged as a
299    // break. compactConversation does this internally; SM-compact doesn't.
300    // BQ 2026-03-01: missing this made 20% of tengu_prompt_cache_break events
301    // false positives (systemPromptChanged=true, timeSinceLastAssistantMsg=-1).
302    if (feature('PROMPT_CACHE_BREAK_DETECTION')) {
303      notifyCompaction(querySource ?? 'compact', toolUseContext.agentId)
304    }
305    markPostCompaction()
306    return {
307      wasCompacted: true,
308      compactionResult: sessionMemoryResult,
309    }
310  }
311
312  try {
313    const compactionResult = await compactConversation(
314      messages,
315      toolUseContext,
316      cacheSafeParams,
317      true, // Suppress user questions for autocompact
318      undefined, // No custom instructions for autocompact
319      true, // isAutoCompact
320      recompactionInfo,
321    )
322
323    // Reset lastSummarizedMessageId since legacy compaction replaces all messages
324    // and the old message UUID will no longer exist in the new messages array
325    setLastSummarizedMessageId(undefined)
326    runPostCompactCleanup(querySource)
327
328    return {
329      wasCompacted: true,
330      compactionResult,
331      // Reset failure count on success
332      consecutiveFailures: 0,
333    }
334  } catch (error) {
335    if (!hasExactErrorMessage(error, ERROR_MESSAGE_USER_ABORT)) {
336      logError(error)
337    }
338    // Increment consecutive failure count for circuit breaker.
339    // The caller threads this through autoCompactTracking so the
340    // next query loop iteration can skip futile retry attempts.
341    const prevFailures = tracking?.consecutiveFailures ?? 0
342    const nextFailures = prevFailures + 1
343    if (nextFailures >= MAX_CONSECUTIVE_AUTOCOMPACT_FAILURES) {
344      logForDebugging(
345        `autocompact: circuit breaker tripped after ${nextFailures} consecutive failures — skipping future attempts this session`,
346        { level: 'warn' },
347      )
348    }
349    return { wasCompacted: false, consecutiveFailures: nextFailures }
350  }
351}