source dump of claude code
at main 351 lines 13 kB view raw
1import { feature } from 'bun:bundle' 2import { markPostCompaction } from 'src/bootstrap/state.js' 3import { getSdkBetas } from '../../bootstrap/state.js' 4import type { QuerySource } from '../../constants/querySource.js' 5import type { ToolUseContext } from '../../Tool.js' 6import type { Message } from '../../types/message.js' 7import { getGlobalConfig } from '../../utils/config.js' 8import { getContextWindowForModel } from '../../utils/context.js' 9import { logForDebugging } from '../../utils/debug.js' 10import { isEnvTruthy } from '../../utils/envUtils.js' 11import { hasExactErrorMessage } from '../../utils/errors.js' 12import type { CacheSafeParams } from '../../utils/forkedAgent.js' 13import { logError } from '../../utils/log.js' 14import { tokenCountWithEstimation } from '../../utils/tokens.js' 15import { getFeatureValue_CACHED_MAY_BE_STALE } from '../analytics/growthbook.js' 16import { getMaxOutputTokensForModel } from '../api/claude.js' 17import { notifyCompaction } from '../api/promptCacheBreakDetection.js' 18import { setLastSummarizedMessageId } from '../SessionMemory/sessionMemoryUtils.js' 19import { 20 type CompactionResult, 21 compactConversation, 22 ERROR_MESSAGE_USER_ABORT, 23 type RecompactionInfo, 24} from './compact.js' 25import { runPostCompactCleanup } from './postCompactCleanup.js' 26import { trySessionMemoryCompaction } from './sessionMemoryCompact.js' 27 28// Reserve this many tokens for output during compaction 29// Based on p99.99 of compact summary output being 17,387 tokens. 30const MAX_OUTPUT_TOKENS_FOR_SUMMARY = 20_000 31 32// Returns the context window size minus the max output tokens for the model 33export function getEffectiveContextWindowSize(model: string): number { 34 const reservedTokensForSummary = Math.min( 35 getMaxOutputTokensForModel(model), 36 MAX_OUTPUT_TOKENS_FOR_SUMMARY, 37 ) 38 let contextWindow = getContextWindowForModel(model, getSdkBetas()) 39 40 const autoCompactWindow = process.env.CLAUDE_CODE_AUTO_COMPACT_WINDOW 41 if (autoCompactWindow) { 42 const parsed = parseInt(autoCompactWindow, 10) 43 if (!isNaN(parsed) && parsed > 0) { 44 contextWindow = Math.min(contextWindow, parsed) 45 } 46 } 47 48 return contextWindow - reservedTokensForSummary 49} 50 51export type AutoCompactTrackingState = { 52 compacted: boolean 53 turnCounter: number 54 // Unique ID per turn 55 turnId: string 56 // Consecutive autocompact failures. Reset on success. 57 // Used as a circuit breaker to stop retrying when the context is 58 // irrecoverably over the limit (e.g., prompt_too_long). 59 consecutiveFailures?: number 60} 61 62export const AUTOCOMPACT_BUFFER_TOKENS = 13_000 63export const WARNING_THRESHOLD_BUFFER_TOKENS = 20_000 64export const ERROR_THRESHOLD_BUFFER_TOKENS = 20_000 65export const MANUAL_COMPACT_BUFFER_TOKENS = 3_000 66 67// Stop trying autocompact after this many consecutive failures. 68// BQ 2026-03-10: 1,279 sessions had 50+ consecutive failures (up to 3,272) 69// in a single session, wasting ~250K API calls/day globally. 70const MAX_CONSECUTIVE_AUTOCOMPACT_FAILURES = 3 71 72export function getAutoCompactThreshold(model: string): number { 73 const effectiveContextWindow = getEffectiveContextWindowSize(model) 74 75 const autocompactThreshold = 76 effectiveContextWindow - AUTOCOMPACT_BUFFER_TOKENS 77 78 // Override for easier testing of autocompact 79 const envPercent = process.env.CLAUDE_AUTOCOMPACT_PCT_OVERRIDE 80 if (envPercent) { 81 const parsed = parseFloat(envPercent) 82 if (!isNaN(parsed) && parsed > 0 && parsed <= 100) { 83 const percentageThreshold = Math.floor( 84 effectiveContextWindow * (parsed / 100), 85 ) 86 return Math.min(percentageThreshold, autocompactThreshold) 87 } 88 } 89 90 return autocompactThreshold 91} 92 93export function calculateTokenWarningState( 94 tokenUsage: number, 95 model: string, 96): { 97 percentLeft: number 98 isAboveWarningThreshold: boolean 99 isAboveErrorThreshold: boolean 100 isAboveAutoCompactThreshold: boolean 101 isAtBlockingLimit: boolean 102} { 103 const autoCompactThreshold = getAutoCompactThreshold(model) 104 const threshold = isAutoCompactEnabled() 105 ? autoCompactThreshold 106 : getEffectiveContextWindowSize(model) 107 108 const percentLeft = Math.max( 109 0, 110 Math.round(((threshold - tokenUsage) / threshold) * 100), 111 ) 112 113 const warningThreshold = threshold - WARNING_THRESHOLD_BUFFER_TOKENS 114 const errorThreshold = threshold - ERROR_THRESHOLD_BUFFER_TOKENS 115 116 const isAboveWarningThreshold = tokenUsage >= warningThreshold 117 const isAboveErrorThreshold = tokenUsage >= errorThreshold 118 119 const isAboveAutoCompactThreshold = 120 isAutoCompactEnabled() && tokenUsage >= autoCompactThreshold 121 122 const actualContextWindow = getEffectiveContextWindowSize(model) 123 const defaultBlockingLimit = 124 actualContextWindow - MANUAL_COMPACT_BUFFER_TOKENS 125 126 // Allow override for testing 127 const blockingLimitOverride = process.env.CLAUDE_CODE_BLOCKING_LIMIT_OVERRIDE 128 const parsedOverride = blockingLimitOverride 129 ? parseInt(blockingLimitOverride, 10) 130 : NaN 131 const blockingLimit = 132 !isNaN(parsedOverride) && parsedOverride > 0 133 ? parsedOverride 134 : defaultBlockingLimit 135 136 const isAtBlockingLimit = tokenUsage >= blockingLimit 137 138 return { 139 percentLeft, 140 isAboveWarningThreshold, 141 isAboveErrorThreshold, 142 isAboveAutoCompactThreshold, 143 isAtBlockingLimit, 144 } 145} 146 147export function isAutoCompactEnabled(): boolean { 148 if (isEnvTruthy(process.env.DISABLE_COMPACT)) { 149 return false 150 } 151 // Allow disabling just auto-compact (keeps manual /compact working) 152 if (isEnvTruthy(process.env.DISABLE_AUTO_COMPACT)) { 153 return false 154 } 155 // Check if user has disabled auto-compact in their settings 156 const userConfig = getGlobalConfig() 157 return userConfig.autoCompactEnabled 158} 159 160export async function shouldAutoCompact( 161 messages: Message[], 162 model: string, 163 querySource?: QuerySource, 164 // Snip removes messages but the surviving assistant's usage still reflects 165 // pre-snip context, so tokenCountWithEstimation can't see the savings. 166 // Subtract the rough-delta that snip already computed. 167 snipTokensFreed = 0, 168): Promise<boolean> { 169 // Recursion guards. session_memory and compact are forked agents that 170 // would deadlock. 171 if (querySource === 'session_memory' || querySource === 'compact') { 172 return false 173 } 174 // marble_origami is the ctx-agent — if ITS context blows up and 175 // autocompact fires, runPostCompactCleanup calls resetContextCollapse() 176 // which destroys the MAIN thread's committed log (module-level state 177 // shared across forks). Inside feature() so the string DCEs from 178 // external builds (it's in excluded-strings.txt). 179 if (feature('CONTEXT_COLLAPSE')) { 180 if (querySource === 'marble_origami') { 181 return false 182 } 183 } 184 185 if (!isAutoCompactEnabled()) { 186 return false 187 } 188 189 // Reactive-only mode: suppress proactive autocompact, let reactive compact 190 // catch the API's prompt-too-long. feature() wrapper keeps the flag string 191 // out of external builds (REACTIVE_COMPACT is ant-only). 192 // Note: returning false here also means autoCompactIfNeeded never reaches 193 // trySessionMemoryCompaction in the query loop — the /compact call site 194 // still tries session memory first. Revisit if reactive-only graduates. 195 if (feature('REACTIVE_COMPACT')) { 196 if (getFeatureValue_CACHED_MAY_BE_STALE('tengu_cobalt_raccoon', false)) { 197 return false 198 } 199 } 200 201 // Context-collapse mode: same suppression. Collapse IS the context 202 // management system when it's on — the 90% commit / 95% blocking-spawn 203 // flow owns the headroom problem. Autocompact firing at effective-13k 204 // (~93% of effective) sits right between collapse's commit-start (90%) 205 // and blocking (95%), so it would race collapse and usually win, nuking 206 // granular context that collapse was about to save. Gating here rather 207 // than in isAutoCompactEnabled() keeps reactiveCompact alive as the 413 208 // fallback (it consults isAutoCompactEnabled directly) and leaves 209 // sessionMemory + manual /compact working. 210 // 211 // Consult isContextCollapseEnabled (not the raw gate) so the 212 // CLAUDE_CONTEXT_COLLAPSE env override is honored here too. require() 213 // inside the block breaks the init-time cycle (this file exports 214 // getEffectiveContextWindowSize which collapse's index imports). 215 if (feature('CONTEXT_COLLAPSE')) { 216 /* eslint-disable @typescript-eslint/no-require-imports */ 217 const { isContextCollapseEnabled } = 218 require('../contextCollapse/index.js') as typeof import('../contextCollapse/index.js') 219 /* eslint-enable @typescript-eslint/no-require-imports */ 220 if (isContextCollapseEnabled()) { 221 return false 222 } 223 } 224 225 const tokenCount = tokenCountWithEstimation(messages) - snipTokensFreed 226 const threshold = getAutoCompactThreshold(model) 227 const effectiveWindow = getEffectiveContextWindowSize(model) 228 229 logForDebugging( 230 `autocompact: tokens=${tokenCount} threshold=${threshold} effectiveWindow=${effectiveWindow}${snipTokensFreed > 0 ? ` snipFreed=${snipTokensFreed}` : ''}`, 231 ) 232 233 const { isAboveAutoCompactThreshold } = calculateTokenWarningState( 234 tokenCount, 235 model, 236 ) 237 238 return isAboveAutoCompactThreshold 239} 240 241export async function autoCompactIfNeeded( 242 messages: Message[], 243 toolUseContext: ToolUseContext, 244 cacheSafeParams: CacheSafeParams, 245 querySource?: QuerySource, 246 tracking?: AutoCompactTrackingState, 247 snipTokensFreed?: number, 248): Promise<{ 249 wasCompacted: boolean 250 compactionResult?: CompactionResult 251 consecutiveFailures?: number 252}> { 253 if (isEnvTruthy(process.env.DISABLE_COMPACT)) { 254 return { wasCompacted: false } 255 } 256 257 // Circuit breaker: stop retrying after N consecutive failures. 258 // Without this, sessions where context is irrecoverably over the limit 259 // hammer the API with doomed compaction attempts on every turn. 260 if ( 261 tracking?.consecutiveFailures !== undefined && 262 tracking.consecutiveFailures >= MAX_CONSECUTIVE_AUTOCOMPACT_FAILURES 263 ) { 264 return { wasCompacted: false } 265 } 266 267 const model = toolUseContext.options.mainLoopModel 268 const shouldCompact = await shouldAutoCompact( 269 messages, 270 model, 271 querySource, 272 snipTokensFreed, 273 ) 274 275 if (!shouldCompact) { 276 return { wasCompacted: false } 277 } 278 279 const recompactionInfo: RecompactionInfo = { 280 isRecompactionInChain: tracking?.compacted === true, 281 turnsSincePreviousCompact: tracking?.turnCounter ?? -1, 282 previousCompactTurnId: tracking?.turnId, 283 autoCompactThreshold: getAutoCompactThreshold(model), 284 querySource, 285 } 286 287 // EXPERIMENT: Try session memory compaction first 288 const sessionMemoryResult = await trySessionMemoryCompaction( 289 messages, 290 toolUseContext.agentId, 291 recompactionInfo.autoCompactThreshold, 292 ) 293 if (sessionMemoryResult) { 294 // Reset lastSummarizedMessageId since session memory compaction prunes messages 295 // and the old message UUID will no longer exist after the REPL replaces messages 296 setLastSummarizedMessageId(undefined) 297 runPostCompactCleanup(querySource) 298 // Reset cache read baseline so the post-compact drop isn't flagged as a 299 // break. compactConversation does this internally; SM-compact doesn't. 300 // BQ 2026-03-01: missing this made 20% of tengu_prompt_cache_break events 301 // false positives (systemPromptChanged=true, timeSinceLastAssistantMsg=-1). 302 if (feature('PROMPT_CACHE_BREAK_DETECTION')) { 303 notifyCompaction(querySource ?? 'compact', toolUseContext.agentId) 304 } 305 markPostCompaction() 306 return { 307 wasCompacted: true, 308 compactionResult: sessionMemoryResult, 309 } 310 } 311 312 try { 313 const compactionResult = await compactConversation( 314 messages, 315 toolUseContext, 316 cacheSafeParams, 317 true, // Suppress user questions for autocompact 318 undefined, // No custom instructions for autocompact 319 true, // isAutoCompact 320 recompactionInfo, 321 ) 322 323 // Reset lastSummarizedMessageId since legacy compaction replaces all messages 324 // and the old message UUID will no longer exist in the new messages array 325 setLastSummarizedMessageId(undefined) 326 runPostCompactCleanup(querySource) 327 328 return { 329 wasCompacted: true, 330 compactionResult, 331 // Reset failure count on success 332 consecutiveFailures: 0, 333 } 334 } catch (error) { 335 if (!hasExactErrorMessage(error, ERROR_MESSAGE_USER_ABORT)) { 336 logError(error) 337 } 338 // Increment consecutive failure count for circuit breaker. 339 // The caller threads this through autoCompactTracking so the 340 // next query loop iteration can skip futile retry attempts. 341 const prevFailures = tracking?.consecutiveFailures ?? 0 342 const nextFailures = prevFailures + 1 343 if (nextFailures >= MAX_CONSECUTIVE_AUTOCOMPACT_FAILURES) { 344 logForDebugging( 345 `autocompact: circuit breaker tripped after ${nextFailures} consecutive failures — skipping future attempts this session`, 346 { level: 'warn' }, 347 ) 348 } 349 return { wasCompacted: false, consecutiveFailures: nextFailures } 350 } 351}