source dump of claude code
at main 630 lines 21 kB view raw
1/** 2 * EXPERIMENT: Session memory compaction 3 */ 4 5import type { AgentId } from '../../types/ids.js' 6import type { HookResultMessage, Message } from '../../types/message.js' 7import { logForDebugging } from '../../utils/debug.js' 8import { isEnvTruthy } from '../../utils/envUtils.js' 9import { errorMessage } from '../../utils/errors.js' 10import { 11 createCompactBoundaryMessage, 12 createUserMessage, 13 isCompactBoundaryMessage, 14} from '../../utils/messages.js' 15import { getMainLoopModel } from '../../utils/model/model.js' 16import { getSessionMemoryPath } from '../../utils/permissions/filesystem.js' 17import { processSessionStartHooks } from '../../utils/sessionStart.js' 18import { getTranscriptPath } from '../../utils/sessionStorage.js' 19import { tokenCountFromLastAPIResponse } from '../../utils/tokens.js' 20import { extractDiscoveredToolNames } from '../../utils/toolSearch.js' 21import { 22 getDynamicConfig_BLOCKS_ON_INIT, 23 getFeatureValue_CACHED_MAY_BE_STALE, 24} from '../analytics/growthbook.js' 25import { logEvent } from '../analytics/index.js' 26import { 27 isSessionMemoryEmpty, 28 truncateSessionMemoryForCompact, 29} from '../SessionMemory/prompts.js' 30import { 31 getLastSummarizedMessageId, 32 getSessionMemoryContent, 33 waitForSessionMemoryExtraction, 34} from '../SessionMemory/sessionMemoryUtils.js' 35import { 36 annotateBoundaryWithPreservedSegment, 37 buildPostCompactMessages, 38 type CompactionResult, 39 createPlanAttachmentIfNeeded, 40} from './compact.js' 41import { estimateMessageTokens } from './microCompact.js' 42import { getCompactUserSummaryMessage } from './prompt.js' 43 44/** 45 * Configuration for session memory compaction thresholds 46 */ 47export type SessionMemoryCompactConfig = { 48 /** Minimum tokens to preserve after compaction */ 49 minTokens: number 50 /** Minimum number of messages with text blocks to keep */ 51 minTextBlockMessages: number 52 /** Maximum tokens to preserve after compaction (hard cap) */ 53 maxTokens: number 54} 55 56// Default configuration values (exported for use in tests) 57export const DEFAULT_SM_COMPACT_CONFIG: SessionMemoryCompactConfig = { 58 minTokens: 10_000, 59 minTextBlockMessages: 5, 60 maxTokens: 40_000, 61} 62 63// Current configuration (starts with defaults) 64let smCompactConfig: SessionMemoryCompactConfig = { 65 ...DEFAULT_SM_COMPACT_CONFIG, 66} 67 68// Track whether config has been initialized from remote 69let configInitialized = false 70 71/** 72 * Set the session memory compact configuration 73 */ 74export function setSessionMemoryCompactConfig( 75 config: Partial<SessionMemoryCompactConfig>, 76): void { 77 smCompactConfig = { 78 ...smCompactConfig, 79 ...config, 80 } 81} 82 83/** 84 * Get the current session memory compact configuration 85 */ 86export function getSessionMemoryCompactConfig(): SessionMemoryCompactConfig { 87 return { ...smCompactConfig } 88} 89 90/** 91 * Reset config state (useful for testing) 92 */ 93export function resetSessionMemoryCompactConfig(): void { 94 smCompactConfig = { ...DEFAULT_SM_COMPACT_CONFIG } 95 configInitialized = false 96} 97 98/** 99 * Initialize configuration from remote config (GrowthBook). 100 * Only fetches once per session - subsequent calls return immediately. 101 */ 102async function initSessionMemoryCompactConfig(): Promise<void> { 103 if (configInitialized) { 104 return 105 } 106 configInitialized = true 107 108 // Load config from GrowthBook, merging with defaults 109 const remoteConfig = await getDynamicConfig_BLOCKS_ON_INIT< 110 Partial<SessionMemoryCompactConfig> 111 >('tengu_sm_compact_config', {}) 112 113 // Only use remote values if they are explicitly set (positive numbers) 114 // This ensures sensible defaults aren't overridden by zero values 115 const config: SessionMemoryCompactConfig = { 116 minTokens: 117 remoteConfig.minTokens && remoteConfig.minTokens > 0 118 ? remoteConfig.minTokens 119 : DEFAULT_SM_COMPACT_CONFIG.minTokens, 120 minTextBlockMessages: 121 remoteConfig.minTextBlockMessages && remoteConfig.minTextBlockMessages > 0 122 ? remoteConfig.minTextBlockMessages 123 : DEFAULT_SM_COMPACT_CONFIG.minTextBlockMessages, 124 maxTokens: 125 remoteConfig.maxTokens && remoteConfig.maxTokens > 0 126 ? remoteConfig.maxTokens 127 : DEFAULT_SM_COMPACT_CONFIG.maxTokens, 128 } 129 setSessionMemoryCompactConfig(config) 130} 131 132/** 133 * Check if a message contains text blocks (text content for user/assistant interaction) 134 */ 135export function hasTextBlocks(message: Message): boolean { 136 if (message.type === 'assistant') { 137 const content = message.message.content 138 return content.some(block => block.type === 'text') 139 } 140 if (message.type === 'user') { 141 const content = message.message.content 142 if (typeof content === 'string') { 143 return content.length > 0 144 } 145 if (Array.isArray(content)) { 146 return content.some(block => block.type === 'text') 147 } 148 } 149 return false 150} 151 152/** 153 * Check if a message contains tool_result blocks and return their tool_use_ids 154 */ 155function getToolResultIds(message: Message): string[] { 156 if (message.type !== 'user') { 157 return [] 158 } 159 const content = message.message.content 160 if (!Array.isArray(content)) { 161 return [] 162 } 163 const ids: string[] = [] 164 for (const block of content) { 165 if (block.type === 'tool_result') { 166 ids.push(block.tool_use_id) 167 } 168 } 169 return ids 170} 171 172/** 173 * Check if a message contains tool_use blocks with any of the given ids 174 */ 175function hasToolUseWithIds(message: Message, toolUseIds: Set<string>): boolean { 176 if (message.type !== 'assistant') { 177 return false 178 } 179 const content = message.message.content 180 if (!Array.isArray(content)) { 181 return false 182 } 183 return content.some( 184 block => block.type === 'tool_use' && toolUseIds.has(block.id), 185 ) 186} 187 188/** 189 * Adjust the start index to ensure we don't split tool_use/tool_result pairs 190 * or thinking blocks that share the same message.id with kept assistant messages. 191 * 192 * If ANY message we're keeping contains tool_result blocks, we need to 193 * include the preceding assistant message(s) that contain the matching tool_use blocks. 194 * 195 * Additionally, if ANY assistant message in the kept range has the same message.id 196 * as a preceding assistant message (which may contain thinking blocks), we need to 197 * include those messages so they can be properly merged by normalizeMessagesForAPI. 198 * 199 * This handles the case where streaming yields separate messages per content block 200 * (thinking, tool_use, etc.) with the same message.id but different uuids. If the 201 * startIndex lands on one of these streaming messages, we need to look at ALL kept 202 * messages for tool_results, not just the first one. 203 * 204 * Example bug scenarios this fixes: 205 * 206 * Tool pair scenario: 207 * Session storage (before compaction): 208 * Index N: assistant, message.id: X, content: [thinking] 209 * Index N+1: assistant, message.id: X, content: [tool_use: ORPHAN_ID] 210 * Index N+2: assistant, message.id: X, content: [tool_use: VALID_ID] 211 * Index N+3: user, content: [tool_result: ORPHAN_ID, tool_result: VALID_ID] 212 * 213 * If startIndex = N+2: 214 * - Old code: checked only message N+2 for tool_results, found none, returned N+2 215 * - After slicing and normalizeMessagesForAPI merging by message.id: 216 * msg[1]: assistant with [tool_use: VALID_ID] (ORPHAN tool_use was excluded!) 217 * msg[2]: user with [tool_result: ORPHAN_ID, tool_result: VALID_ID] 218 * - API error: orphan tool_result references non-existent tool_use 219 * 220 * Thinking block scenario: 221 * Session storage (before compaction): 222 * Index N: assistant, message.id: X, content: [thinking] 223 * Index N+1: assistant, message.id: X, content: [tool_use: ID] 224 * Index N+2: user, content: [tool_result: ID] 225 * 226 * If startIndex = N+1: 227 * - Without this fix: thinking block at N is excluded 228 * - After normalizeMessagesForAPI: thinking block is lost (no message to merge with) 229 * 230 * Fixed code: detects that message N+1 has same message.id as N, adjusts to N. 231 */ 232export function adjustIndexToPreserveAPIInvariants( 233 messages: Message[], 234 startIndex: number, 235): number { 236 if (startIndex <= 0 || startIndex >= messages.length) { 237 return startIndex 238 } 239 240 let adjustedIndex = startIndex 241 242 // Step 1: Handle tool_use/tool_result pairs 243 // Collect tool_result IDs from ALL messages in the kept range 244 const allToolResultIds: string[] = [] 245 for (let i = startIndex; i < messages.length; i++) { 246 allToolResultIds.push(...getToolResultIds(messages[i]!)) 247 } 248 249 if (allToolResultIds.length > 0) { 250 // Collect tool_use IDs already in the kept range 251 const toolUseIdsInKeptRange = new Set<string>() 252 for (let i = adjustedIndex; i < messages.length; i++) { 253 const msg = messages[i]! 254 if (msg.type === 'assistant' && Array.isArray(msg.message.content)) { 255 for (const block of msg.message.content) { 256 if (block.type === 'tool_use') { 257 toolUseIdsInKeptRange.add(block.id) 258 } 259 } 260 } 261 } 262 263 // Only look for tool_uses that are NOT already in the kept range 264 const neededToolUseIds = new Set( 265 allToolResultIds.filter(id => !toolUseIdsInKeptRange.has(id)), 266 ) 267 268 // Find the assistant message(s) with matching tool_use blocks 269 for (let i = adjustedIndex - 1; i >= 0 && neededToolUseIds.size > 0; i--) { 270 const message = messages[i]! 271 if (hasToolUseWithIds(message, neededToolUseIds)) { 272 adjustedIndex = i 273 // Remove found tool_use_ids from the set 274 if ( 275 message.type === 'assistant' && 276 Array.isArray(message.message.content) 277 ) { 278 for (const block of message.message.content) { 279 if (block.type === 'tool_use' && neededToolUseIds.has(block.id)) { 280 neededToolUseIds.delete(block.id) 281 } 282 } 283 } 284 } 285 } 286 } 287 288 // Step 2: Handle thinking blocks that share message.id with kept assistant messages 289 // Collect all message.ids from assistant messages in the kept range 290 const messageIdsInKeptRange = new Set<string>() 291 for (let i = adjustedIndex; i < messages.length; i++) { 292 const msg = messages[i]! 293 if (msg.type === 'assistant' && msg.message.id) { 294 messageIdsInKeptRange.add(msg.message.id) 295 } 296 } 297 298 // Look backwards for assistant messages with the same message.id that are not in the kept range 299 // These may contain thinking blocks that need to be merged by normalizeMessagesForAPI 300 for (let i = adjustedIndex - 1; i >= 0; i--) { 301 const message = messages[i]! 302 if ( 303 message.type === 'assistant' && 304 message.message.id && 305 messageIdsInKeptRange.has(message.message.id) 306 ) { 307 // This message has the same message.id as one in the kept range 308 // Include it so thinking blocks can be properly merged 309 adjustedIndex = i 310 } 311 } 312 313 return adjustedIndex 314} 315 316/** 317 * Calculate the starting index for messages to keep after compaction. 318 * Starts from lastSummarizedMessageId, then expands backwards to meet minimums: 319 * - At least config.minTokens tokens 320 * - At least config.minTextBlockMessages messages with text blocks 321 * Stops expanding if config.maxTokens is reached. 322 * Also ensures tool_use/tool_result pairs are not split. 323 */ 324export function calculateMessagesToKeepIndex( 325 messages: Message[], 326 lastSummarizedIndex: number, 327): number { 328 if (messages.length === 0) { 329 return 0 330 } 331 332 const config = getSessionMemoryCompactConfig() 333 334 // Start from the message after lastSummarizedIndex 335 // If lastSummarizedIndex is -1 (not found) or messages.length (no summarized id), 336 // we start with no messages kept 337 let startIndex = 338 lastSummarizedIndex >= 0 ? lastSummarizedIndex + 1 : messages.length 339 340 // Calculate current tokens and text-block message count from startIndex to end 341 let totalTokens = 0 342 let textBlockMessageCount = 0 343 for (let i = startIndex; i < messages.length; i++) { 344 const msg = messages[i]! 345 totalTokens += estimateMessageTokens([msg]) 346 if (hasTextBlocks(msg)) { 347 textBlockMessageCount++ 348 } 349 } 350 351 // Check if we already hit the max cap 352 if (totalTokens >= config.maxTokens) { 353 return adjustIndexToPreserveAPIInvariants(messages, startIndex) 354 } 355 356 // Check if we already meet both minimums 357 if ( 358 totalTokens >= config.minTokens && 359 textBlockMessageCount >= config.minTextBlockMessages 360 ) { 361 return adjustIndexToPreserveAPIInvariants(messages, startIndex) 362 } 363 364 // Expand backwards until we meet both minimums or hit max cap. 365 // Floor at the last boundary: the preserved-segment chain has a disk 366 // discontinuity there (att[0]→summary shortcut from dedup-skip), which 367 // would let the loader's tail→head walk bypass inner preserved messages 368 // and then prune them. Reactive compact already slices at the boundary 369 // via getMessagesAfterCompactBoundary; this is the same invariant. 370 const idx = messages.findLastIndex(m => isCompactBoundaryMessage(m)) 371 const floor = idx === -1 ? 0 : idx + 1 372 for (let i = startIndex - 1; i >= floor; i--) { 373 const msg = messages[i]! 374 const msgTokens = estimateMessageTokens([msg]) 375 totalTokens += msgTokens 376 if (hasTextBlocks(msg)) { 377 textBlockMessageCount++ 378 } 379 startIndex = i 380 381 // Stop if we hit the max cap 382 if (totalTokens >= config.maxTokens) { 383 break 384 } 385 386 // Stop if we meet both minimums 387 if ( 388 totalTokens >= config.minTokens && 389 textBlockMessageCount >= config.minTextBlockMessages 390 ) { 391 break 392 } 393 } 394 395 // Adjust for tool pairs 396 return adjustIndexToPreserveAPIInvariants(messages, startIndex) 397} 398 399/** 400 * Check if we should use session memory for compaction 401 * Uses cached gate values to avoid blocking on Statsig initialization 402 */ 403export function shouldUseSessionMemoryCompaction(): boolean { 404 // Allow env var override for eval runs and testing 405 if (isEnvTruthy(process.env.ENABLE_CLAUDE_CODE_SM_COMPACT)) { 406 return true 407 } 408 if (isEnvTruthy(process.env.DISABLE_CLAUDE_CODE_SM_COMPACT)) { 409 return false 410 } 411 412 const sessionMemoryFlag = getFeatureValue_CACHED_MAY_BE_STALE( 413 'tengu_session_memory', 414 false, 415 ) 416 const smCompactFlag = getFeatureValue_CACHED_MAY_BE_STALE( 417 'tengu_sm_compact', 418 false, 419 ) 420 const shouldUse = sessionMemoryFlag && smCompactFlag 421 422 // Log flag states for debugging (ant-only to avoid noise in external logs) 423 if (process.env.USER_TYPE === 'ant') { 424 logEvent('tengu_sm_compact_flag_check', { 425 tengu_session_memory: sessionMemoryFlag, 426 tengu_sm_compact: smCompactFlag, 427 should_use: shouldUse, 428 }) 429 } 430 431 return shouldUse 432} 433 434/** 435 * Create a CompactionResult from session memory 436 */ 437function createCompactionResultFromSessionMemory( 438 messages: Message[], 439 sessionMemory: string, 440 messagesToKeep: Message[], 441 hookResults: HookResultMessage[], 442 transcriptPath: string, 443 agentId?: AgentId, 444): CompactionResult { 445 const preCompactTokenCount = tokenCountFromLastAPIResponse(messages) 446 447 const boundaryMarker = createCompactBoundaryMessage( 448 'auto', 449 preCompactTokenCount ?? 0, 450 messages[messages.length - 1]?.uuid, 451 ) 452 const preCompactDiscovered = extractDiscoveredToolNames(messages) 453 if (preCompactDiscovered.size > 0) { 454 boundaryMarker.compactMetadata.preCompactDiscoveredTools = [ 455 ...preCompactDiscovered, 456 ].sort() 457 } 458 459 // Truncate oversized sections to prevent session memory from consuming 460 // the entire post-compact token budget 461 const { truncatedContent, wasTruncated } = 462 truncateSessionMemoryForCompact(sessionMemory) 463 464 let summaryContent = getCompactUserSummaryMessage( 465 truncatedContent, 466 true, 467 transcriptPath, 468 true, 469 ) 470 471 if (wasTruncated) { 472 const memoryPath = getSessionMemoryPath() 473 summaryContent += `\n\nSome session memory sections were truncated for length. The full session memory can be viewed at: ${memoryPath}` 474 } 475 476 const summaryMessages = [ 477 createUserMessage({ 478 content: summaryContent, 479 isCompactSummary: true, 480 isVisibleInTranscriptOnly: true, 481 }), 482 ] 483 484 const planAttachment = createPlanAttachmentIfNeeded(agentId) 485 const attachments = planAttachment ? [planAttachment] : [] 486 487 return { 488 boundaryMarker: annotateBoundaryWithPreservedSegment( 489 boundaryMarker, 490 summaryMessages[summaryMessages.length - 1]!.uuid, 491 messagesToKeep, 492 ), 493 summaryMessages, 494 attachments, 495 hookResults, 496 messagesToKeep, 497 preCompactTokenCount, 498 // SM-compact has no compact-API-call, so postCompactTokenCount (kept for 499 // event continuity) and truePostCompactTokenCount converge to the same value. 500 postCompactTokenCount: estimateMessageTokens(summaryMessages), 501 truePostCompactTokenCount: estimateMessageTokens(summaryMessages), 502 } 503} 504 505/** 506 * Try to use session memory for compaction instead of traditional compaction. 507 * Returns null if session memory compaction cannot be used. 508 * 509 * Handles two scenarios: 510 * 1. Normal case: lastSummarizedMessageId is set, keep only messages after that ID 511 * 2. Resumed session: lastSummarizedMessageId is not set but session memory has content, 512 * keep all messages but use session memory as the summary 513 */ 514export async function trySessionMemoryCompaction( 515 messages: Message[], 516 agentId?: AgentId, 517 autoCompactThreshold?: number, 518): Promise<CompactionResult | null> { 519 if (!shouldUseSessionMemoryCompaction()) { 520 return null 521 } 522 523 // Initialize config from remote (only fetches once) 524 await initSessionMemoryCompactConfig() 525 526 // Wait for any in-progress session memory extraction to complete (with timeout) 527 await waitForSessionMemoryExtraction() 528 529 const lastSummarizedMessageId = getLastSummarizedMessageId() 530 const sessionMemory = await getSessionMemoryContent() 531 532 // No session memory file exists at all 533 if (!sessionMemory) { 534 logEvent('tengu_sm_compact_no_session_memory', {}) 535 return null 536 } 537 538 // Session memory exists but matches the template (no actual content extracted) 539 // Fall back to legacy compact behavior 540 if (await isSessionMemoryEmpty(sessionMemory)) { 541 logEvent('tengu_sm_compact_empty_template', {}) 542 return null 543 } 544 545 try { 546 let lastSummarizedIndex: number 547 548 if (lastSummarizedMessageId) { 549 // Normal case: we know exactly which messages have been summarized 550 lastSummarizedIndex = messages.findIndex( 551 msg => msg.uuid === lastSummarizedMessageId, 552 ) 553 554 if (lastSummarizedIndex === -1) { 555 // The summarized message ID doesn't exist in current messages 556 // This can happen if messages were modified - fall back to legacy compact 557 // since we can't determine the boundary between summarized and unsummarized messages 558 logEvent('tengu_sm_compact_summarized_id_not_found', {}) 559 return null 560 } 561 } else { 562 // Resumed session case: session memory has content but we don't know the boundary 563 // Set lastSummarizedIndex to last message so startIndex becomes messages.length (no messages kept initially) 564 lastSummarizedIndex = messages.length - 1 565 logEvent('tengu_sm_compact_resumed_session', {}) 566 } 567 568 // Calculate the starting index for messages to keep 569 // This starts from lastSummarizedIndex, expands to meet minimums, 570 // and adjusts to not split tool_use/tool_result pairs 571 const startIndex = calculateMessagesToKeepIndex( 572 messages, 573 lastSummarizedIndex, 574 ) 575 // Filter out old compact boundary messages from messagesToKeep. 576 // After REPL pruning, old boundaries re-yielded from messagesToKeep would 577 // trigger an unwanted second prune (isCompactBoundaryMessage returns true), 578 // discarding the new boundary and summary. 579 const messagesToKeep = messages 580 .slice(startIndex) 581 .filter(m => !isCompactBoundaryMessage(m)) 582 583 // Run session start hooks to restore CLAUDE.md and other context 584 const hookResults = await processSessionStartHooks('compact', { 585 model: getMainLoopModel(), 586 }) 587 588 // Get transcript path for the summary message 589 const transcriptPath = getTranscriptPath() 590 591 const compactionResult = createCompactionResultFromSessionMemory( 592 messages, 593 sessionMemory, 594 messagesToKeep, 595 hookResults, 596 transcriptPath, 597 agentId, 598 ) 599 600 const postCompactMessages = buildPostCompactMessages(compactionResult) 601 602 const postCompactTokenCount = estimateMessageTokens(postCompactMessages) 603 604 // Only check threshold if one was provided (for autocompact) 605 if ( 606 autoCompactThreshold !== undefined && 607 postCompactTokenCount >= autoCompactThreshold 608 ) { 609 logEvent('tengu_sm_compact_threshold_exceeded', { 610 postCompactTokenCount, 611 autoCompactThreshold, 612 }) 613 return null 614 } 615 616 return { 617 ...compactionResult, 618 postCompactTokenCount, 619 truePostCompactTokenCount: postCompactTokenCount, 620 } 621 } catch (error) { 622 // Use logEvent instead of logError since errors here are expected 623 // (e.g., file not found, path issues) and shouldn't go to error logs 624 logEvent('tengu_sm_compact_error', {}) 625 if (process.env.USER_TYPE === 'ant') { 626 logForDebugging(`Session memory compaction error: ${errorMessage(error)}`) 627 } 628 return null 629 } 630}