services/compact/sessionMemoryCompact.ts at main

oppi.li / claude-code
fork atom
source dump of claude code
fork atom
claude-code / services / compact / sessionMemoryCompact.ts
at main 630 lines 21 kB view raw
wrap content
oppi.li dump from zip 9d ago
63aada3f
  1/**
  2 * EXPERIMENT: Session memory compaction
  3 */
  4
  5import type { AgentId } from '../../types/ids.js'
  6import type { HookResultMessage, Message } from '../../types/message.js'
  7import { logForDebugging } from '../../utils/debug.js'
  8import { isEnvTruthy } from '../../utils/envUtils.js'
  9import { errorMessage } from '../../utils/errors.js'
 10import {
 11  createCompactBoundaryMessage,
 12  createUserMessage,
 13  isCompactBoundaryMessage,
 14} from '../../utils/messages.js'
 15import { getMainLoopModel } from '../../utils/model/model.js'
 16import { getSessionMemoryPath } from '../../utils/permissions/filesystem.js'
 17import { processSessionStartHooks } from '../../utils/sessionStart.js'
 18import { getTranscriptPath } from '../../utils/sessionStorage.js'
 19import { tokenCountFromLastAPIResponse } from '../../utils/tokens.js'
 20import { extractDiscoveredToolNames } from '../../utils/toolSearch.js'
 21import {
 22  getDynamicConfig_BLOCKS_ON_INIT,
 23  getFeatureValue_CACHED_MAY_BE_STALE,
 24} from '../analytics/growthbook.js'
 25import { logEvent } from '../analytics/index.js'
 26import {
 27  isSessionMemoryEmpty,
 28  truncateSessionMemoryForCompact,
 29} from '../SessionMemory/prompts.js'
 30import {
 31  getLastSummarizedMessageId,
 32  getSessionMemoryContent,
 33  waitForSessionMemoryExtraction,
 34} from '../SessionMemory/sessionMemoryUtils.js'
 35import {
 36  annotateBoundaryWithPreservedSegment,
 37  buildPostCompactMessages,
 38  type CompactionResult,
 39  createPlanAttachmentIfNeeded,
 40} from './compact.js'
 41import { estimateMessageTokens } from './microCompact.js'
 42import { getCompactUserSummaryMessage } from './prompt.js'
 43
 44/**
 45 * Configuration for session memory compaction thresholds
 46 */
 47export type SessionMemoryCompactConfig = {
 48  /** Minimum tokens to preserve after compaction */
 49  minTokens: number
 50  /** Minimum number of messages with text blocks to keep */
 51  minTextBlockMessages: number
 52  /** Maximum tokens to preserve after compaction (hard cap) */
 53  maxTokens: number
 54}
 55
 56// Default configuration values (exported for use in tests)
 57export const DEFAULT_SM_COMPACT_CONFIG: SessionMemoryCompactConfig = {
 58  minTokens: 10_000,
 59  minTextBlockMessages: 5,
 60  maxTokens: 40_000,
 61}
 62
 63// Current configuration (starts with defaults)
 64let smCompactConfig: SessionMemoryCompactConfig = {
 65  ...DEFAULT_SM_COMPACT_CONFIG,
 66}
 67
 68// Track whether config has been initialized from remote
 69let configInitialized = false
 70
 71/**
 72 * Set the session memory compact configuration
 73 */
 74export function setSessionMemoryCompactConfig(
 75  config: Partial<SessionMemoryCompactConfig>,
 76): void {
 77  smCompactConfig = {
 78    ...smCompactConfig,
 79    ...config,
 80  }
 81}
 82
 83/**
 84 * Get the current session memory compact configuration
 85 */
 86export function getSessionMemoryCompactConfig(): SessionMemoryCompactConfig {
 87  return { ...smCompactConfig }
 88}
 89
 90/**
 91 * Reset config state (useful for testing)
 92 */
 93export function resetSessionMemoryCompactConfig(): void {
 94  smCompactConfig = { ...DEFAULT_SM_COMPACT_CONFIG }
 95  configInitialized = false
 96}
 97
 98/**
 99 * Initialize configuration from remote config (GrowthBook).
100 * Only fetches once per session - subsequent calls return immediately.
101 */
102async function initSessionMemoryCompactConfig(): Promise<void> {
103  if (configInitialized) {
104    return
105  }
106  configInitialized = true
107
108  // Load config from GrowthBook, merging with defaults
109  const remoteConfig = await getDynamicConfig_BLOCKS_ON_INIT<
110    Partial<SessionMemoryCompactConfig>
111  >('tengu_sm_compact_config', {})
112
113  // Only use remote values if they are explicitly set (positive numbers)
114  // This ensures sensible defaults aren't overridden by zero values
115  const config: SessionMemoryCompactConfig = {
116    minTokens:
117      remoteConfig.minTokens && remoteConfig.minTokens > 0
118        ? remoteConfig.minTokens
119        : DEFAULT_SM_COMPACT_CONFIG.minTokens,
120    minTextBlockMessages:
121      remoteConfig.minTextBlockMessages && remoteConfig.minTextBlockMessages > 0
122        ? remoteConfig.minTextBlockMessages
123        : DEFAULT_SM_COMPACT_CONFIG.minTextBlockMessages,
124    maxTokens:
125      remoteConfig.maxTokens && remoteConfig.maxTokens > 0
126        ? remoteConfig.maxTokens
127        : DEFAULT_SM_COMPACT_CONFIG.maxTokens,
128  }
129  setSessionMemoryCompactConfig(config)
130}
131
132/**
133 * Check if a message contains text blocks (text content for user/assistant interaction)
134 */
135export function hasTextBlocks(message: Message): boolean {
136  if (message.type === 'assistant') {
137    const content = message.message.content
138    return content.some(block => block.type === 'text')
139  }
140  if (message.type === 'user') {
141    const content = message.message.content
142    if (typeof content === 'string') {
143      return content.length > 0
144    }
145    if (Array.isArray(content)) {
146      return content.some(block => block.type === 'text')
147    }
148  }
149  return false
150}
151
152/**
153 * Check if a message contains tool_result blocks and return their tool_use_ids
154 */
155function getToolResultIds(message: Message): string[] {
156  if (message.type !== 'user') {
157    return []
158  }
159  const content = message.message.content
160  if (!Array.isArray(content)) {
161    return []
162  }
163  const ids: string[] = []
164  for (const block of content) {
165    if (block.type === 'tool_result') {
166      ids.push(block.tool_use_id)
167    }
168  }
169  return ids
170}
171
172/**
173 * Check if a message contains tool_use blocks with any of the given ids
174 */
175function hasToolUseWithIds(message: Message, toolUseIds: Set<string>): boolean {
176  if (message.type !== 'assistant') {
177    return false
178  }
179  const content = message.message.content
180  if (!Array.isArray(content)) {
181    return false
182  }
183  return content.some(
184    block => block.type === 'tool_use' && toolUseIds.has(block.id),
185  )
186}
187
188/**
189 * Adjust the start index to ensure we don't split tool_use/tool_result pairs
190 * or thinking blocks that share the same message.id with kept assistant messages.
191 *
192 * If ANY message we're keeping contains tool_result blocks, we need to
193 * include the preceding assistant message(s) that contain the matching tool_use blocks.
194 *
195 * Additionally, if ANY assistant message in the kept range has the same message.id
196 * as a preceding assistant message (which may contain thinking blocks), we need to
197 * include those messages so they can be properly merged by normalizeMessagesForAPI.
198 *
199 * This handles the case where streaming yields separate messages per content block
200 * (thinking, tool_use, etc.) with the same message.id but different uuids. If the
201 * startIndex lands on one of these streaming messages, we need to look at ALL kept
202 * messages for tool_results, not just the first one.
203 *
204 * Example bug scenarios this fixes:
205 *
206 * Tool pair scenario:
207 *   Session storage (before compaction):
208 *     Index N:   assistant, message.id: X, content: [thinking]
209 *     Index N+1: assistant, message.id: X, content: [tool_use: ORPHAN_ID]
210 *     Index N+2: assistant, message.id: X, content: [tool_use: VALID_ID]
211 *     Index N+3: user, content: [tool_result: ORPHAN_ID, tool_result: VALID_ID]
212 *
213 *   If startIndex = N+2:
214 *     - Old code: checked only message N+2 for tool_results, found none, returned N+2
215 *     - After slicing and normalizeMessagesForAPI merging by message.id:
216 *       msg[1]: assistant with [tool_use: VALID_ID]  (ORPHAN tool_use was excluded!)
217 *       msg[2]: user with [tool_result: ORPHAN_ID, tool_result: VALID_ID]
218 *     - API error: orphan tool_result references non-existent tool_use
219 *
220 * Thinking block scenario:
221 *   Session storage (before compaction):
222 *     Index N:   assistant, message.id: X, content: [thinking]
223 *     Index N+1: assistant, message.id: X, content: [tool_use: ID]
224 *     Index N+2: user, content: [tool_result: ID]
225 *
226 *   If startIndex = N+1:
227 *     - Without this fix: thinking block at N is excluded
228 *     - After normalizeMessagesForAPI: thinking block is lost (no message to merge with)
229 *
230 *   Fixed code: detects that message N+1 has same message.id as N, adjusts to N.
231 */
232export function adjustIndexToPreserveAPIInvariants(
233  messages: Message[],
234  startIndex: number,
235): number {
236  if (startIndex <= 0 || startIndex >= messages.length) {
237    return startIndex
238  }
239
240  let adjustedIndex = startIndex
241
242  // Step 1: Handle tool_use/tool_result pairs
243  // Collect tool_result IDs from ALL messages in the kept range
244  const allToolResultIds: string[] = []
245  for (let i = startIndex; i < messages.length; i++) {
246    allToolResultIds.push(...getToolResultIds(messages[i]!))
247  }
248
249  if (allToolResultIds.length > 0) {
250    // Collect tool_use IDs already in the kept range
251    const toolUseIdsInKeptRange = new Set<string>()
252    for (let i = adjustedIndex; i < messages.length; i++) {
253      const msg = messages[i]!
254      if (msg.type === 'assistant' && Array.isArray(msg.message.content)) {
255        for (const block of msg.message.content) {
256          if (block.type === 'tool_use') {
257            toolUseIdsInKeptRange.add(block.id)
258          }
259        }
260      }
261    }
262
263    // Only look for tool_uses that are NOT already in the kept range
264    const neededToolUseIds = new Set(
265      allToolResultIds.filter(id => !toolUseIdsInKeptRange.has(id)),
266    )
267
268    // Find the assistant message(s) with matching tool_use blocks
269    for (let i = adjustedIndex - 1; i >= 0 && neededToolUseIds.size > 0; i--) {
270      const message = messages[i]!
271      if (hasToolUseWithIds(message, neededToolUseIds)) {
272        adjustedIndex = i
273        // Remove found tool_use_ids from the set
274        if (
275          message.type === 'assistant' &&
276          Array.isArray(message.message.content)
277        ) {
278          for (const block of message.message.content) {
279            if (block.type === 'tool_use' && neededToolUseIds.has(block.id)) {
280              neededToolUseIds.delete(block.id)
281            }
282          }
283        }
284      }
285    }
286  }
287
288  // Step 2: Handle thinking blocks that share message.id with kept assistant messages
289  // Collect all message.ids from assistant messages in the kept range
290  const messageIdsInKeptRange = new Set<string>()
291  for (let i = adjustedIndex; i < messages.length; i++) {
292    const msg = messages[i]!
293    if (msg.type === 'assistant' && msg.message.id) {
294      messageIdsInKeptRange.add(msg.message.id)
295    }
296  }
297
298  // Look backwards for assistant messages with the same message.id that are not in the kept range
299  // These may contain thinking blocks that need to be merged by normalizeMessagesForAPI
300  for (let i = adjustedIndex - 1; i >= 0; i--) {
301    const message = messages[i]!
302    if (
303      message.type === 'assistant' &&
304      message.message.id &&
305      messageIdsInKeptRange.has(message.message.id)
306    ) {
307      // This message has the same message.id as one in the kept range
308      // Include it so thinking blocks can be properly merged
309      adjustedIndex = i
310    }
311  }
312
313  return adjustedIndex
314}
315
316/**
317 * Calculate the starting index for messages to keep after compaction.
318 * Starts from lastSummarizedMessageId, then expands backwards to meet minimums:
319 * - At least config.minTokens tokens
320 * - At least config.minTextBlockMessages messages with text blocks
321 * Stops expanding if config.maxTokens is reached.
322 * Also ensures tool_use/tool_result pairs are not split.
323 */
324export function calculateMessagesToKeepIndex(
325  messages: Message[],
326  lastSummarizedIndex: number,
327): number {
328  if (messages.length === 0) {
329    return 0
330  }
331
332  const config = getSessionMemoryCompactConfig()
333
334  // Start from the message after lastSummarizedIndex
335  // If lastSummarizedIndex is -1 (not found) or messages.length (no summarized id),
336  // we start with no messages kept
337  let startIndex =
338    lastSummarizedIndex >= 0 ? lastSummarizedIndex + 1 : messages.length
339
340  // Calculate current tokens and text-block message count from startIndex to end
341  let totalTokens = 0
342  let textBlockMessageCount = 0
343  for (let i = startIndex; i < messages.length; i++) {
344    const msg = messages[i]!
345    totalTokens += estimateMessageTokens([msg])
346    if (hasTextBlocks(msg)) {
347      textBlockMessageCount++
348    }
349  }
350
351  // Check if we already hit the max cap
352  if (totalTokens >= config.maxTokens) {
353    return adjustIndexToPreserveAPIInvariants(messages, startIndex)
354  }
355
356  // Check if we already meet both minimums
357  if (
358    totalTokens >= config.minTokens &&
359    textBlockMessageCount >= config.minTextBlockMessages
360  ) {
361    return adjustIndexToPreserveAPIInvariants(messages, startIndex)
362  }
363
364  // Expand backwards until we meet both minimums or hit max cap.
365  // Floor at the last boundary: the preserved-segment chain has a disk
366  // discontinuity there (att[0]→summary shortcut from dedup-skip), which
367  // would let the loader's tail→head walk bypass inner preserved messages
368  // and then prune them. Reactive compact already slices at the boundary
369  // via getMessagesAfterCompactBoundary; this is the same invariant.
370  const idx = messages.findLastIndex(m => isCompactBoundaryMessage(m))
371  const floor = idx === -1 ? 0 : idx + 1
372  for (let i = startIndex - 1; i >= floor; i--) {
373    const msg = messages[i]!
374    const msgTokens = estimateMessageTokens([msg])
375    totalTokens += msgTokens
376    if (hasTextBlocks(msg)) {
377      textBlockMessageCount++
378    }
379    startIndex = i
380
381    // Stop if we hit the max cap
382    if (totalTokens >= config.maxTokens) {
383      break
384    }
385
386    // Stop if we meet both minimums
387    if (
388      totalTokens >= config.minTokens &&
389      textBlockMessageCount >= config.minTextBlockMessages
390    ) {
391      break
392    }
393  }
394
395  // Adjust for tool pairs
396  return adjustIndexToPreserveAPIInvariants(messages, startIndex)
397}
398
399/**
400 * Check if we should use session memory for compaction
401 * Uses cached gate values to avoid blocking on Statsig initialization
402 */
403export function shouldUseSessionMemoryCompaction(): boolean {
404  // Allow env var override for eval runs and testing
405  if (isEnvTruthy(process.env.ENABLE_CLAUDE_CODE_SM_COMPACT)) {
406    return true
407  }
408  if (isEnvTruthy(process.env.DISABLE_CLAUDE_CODE_SM_COMPACT)) {
409    return false
410  }
411
412  const sessionMemoryFlag = getFeatureValue_CACHED_MAY_BE_STALE(
413    'tengu_session_memory',
414    false,
415  )
416  const smCompactFlag = getFeatureValue_CACHED_MAY_BE_STALE(
417    'tengu_sm_compact',
418    false,
419  )
420  const shouldUse = sessionMemoryFlag && smCompactFlag
421
422  // Log flag states for debugging (ant-only to avoid noise in external logs)
423  if (process.env.USER_TYPE === 'ant') {
424    logEvent('tengu_sm_compact_flag_check', {
425      tengu_session_memory: sessionMemoryFlag,
426      tengu_sm_compact: smCompactFlag,
427      should_use: shouldUse,
428    })
429  }
430
431  return shouldUse
432}
433
434/**
435 * Create a CompactionResult from session memory
436 */
437function createCompactionResultFromSessionMemory(
438  messages: Message[],
439  sessionMemory: string,
440  messagesToKeep: Message[],
441  hookResults: HookResultMessage[],
442  transcriptPath: string,
443  agentId?: AgentId,
444): CompactionResult {
445  const preCompactTokenCount = tokenCountFromLastAPIResponse(messages)
446
447  const boundaryMarker = createCompactBoundaryMessage(
448    'auto',
449    preCompactTokenCount ?? 0,
450    messages[messages.length - 1]?.uuid,
451  )
452  const preCompactDiscovered = extractDiscoveredToolNames(messages)
453  if (preCompactDiscovered.size > 0) {
454    boundaryMarker.compactMetadata.preCompactDiscoveredTools = [
455      ...preCompactDiscovered,
456    ].sort()
457  }
458
459  // Truncate oversized sections to prevent session memory from consuming
460  // the entire post-compact token budget
461  const { truncatedContent, wasTruncated } =
462    truncateSessionMemoryForCompact(sessionMemory)
463
464  let summaryContent = getCompactUserSummaryMessage(
465    truncatedContent,
466    true,
467    transcriptPath,
468    true,
469  )
470
471  if (wasTruncated) {
472    const memoryPath = getSessionMemoryPath()
473    summaryContent += `\n\nSome session memory sections were truncated for length. The full session memory can be viewed at: ${memoryPath}`
474  }
475
476  const summaryMessages = [
477    createUserMessage({
478      content: summaryContent,
479      isCompactSummary: true,
480      isVisibleInTranscriptOnly: true,
481    }),
482  ]
483
484  const planAttachment = createPlanAttachmentIfNeeded(agentId)
485  const attachments = planAttachment ? [planAttachment] : []
486
487  return {
488    boundaryMarker: annotateBoundaryWithPreservedSegment(
489      boundaryMarker,
490      summaryMessages[summaryMessages.length - 1]!.uuid,
491      messagesToKeep,
492    ),
493    summaryMessages,
494    attachments,
495    hookResults,
496    messagesToKeep,
497    preCompactTokenCount,
498    // SM-compact has no compact-API-call, so postCompactTokenCount (kept for
499    // event continuity) and truePostCompactTokenCount converge to the same value.
500    postCompactTokenCount: estimateMessageTokens(summaryMessages),
501    truePostCompactTokenCount: estimateMessageTokens(summaryMessages),
502  }
503}
504
505/**
506 * Try to use session memory for compaction instead of traditional compaction.
507 * Returns null if session memory compaction cannot be used.
508 *
509 * Handles two scenarios:
510 * 1. Normal case: lastSummarizedMessageId is set, keep only messages after that ID
511 * 2. Resumed session: lastSummarizedMessageId is not set but session memory has content,
512 *    keep all messages but use session memory as the summary
513 */
514export async function trySessionMemoryCompaction(
515  messages: Message[],
516  agentId?: AgentId,
517  autoCompactThreshold?: number,
518): Promise<CompactionResult | null> {
519  if (!shouldUseSessionMemoryCompaction()) {
520    return null
521  }
522
523  // Initialize config from remote (only fetches once)
524  await initSessionMemoryCompactConfig()
525
526  // Wait for any in-progress session memory extraction to complete (with timeout)
527  await waitForSessionMemoryExtraction()
528
529  const lastSummarizedMessageId = getLastSummarizedMessageId()
530  const sessionMemory = await getSessionMemoryContent()
531
532  // No session memory file exists at all
533  if (!sessionMemory) {
534    logEvent('tengu_sm_compact_no_session_memory', {})
535    return null
536  }
537
538  // Session memory exists but matches the template (no actual content extracted)
539  // Fall back to legacy compact behavior
540  if (await isSessionMemoryEmpty(sessionMemory)) {
541    logEvent('tengu_sm_compact_empty_template', {})
542    return null
543  }
544
545  try {
546    let lastSummarizedIndex: number
547
548    if (lastSummarizedMessageId) {
549      // Normal case: we know exactly which messages have been summarized
550      lastSummarizedIndex = messages.findIndex(
551        msg => msg.uuid === lastSummarizedMessageId,
552      )
553
554      if (lastSummarizedIndex === -1) {
555        // The summarized message ID doesn't exist in current messages
556        // This can happen if messages were modified - fall back to legacy compact
557        // since we can't determine the boundary between summarized and unsummarized messages
558        logEvent('tengu_sm_compact_summarized_id_not_found', {})
559        return null
560      }
561    } else {
562      // Resumed session case: session memory has content but we don't know the boundary
563      // Set lastSummarizedIndex to last message so startIndex becomes messages.length (no messages kept initially)
564      lastSummarizedIndex = messages.length - 1
565      logEvent('tengu_sm_compact_resumed_session', {})
566    }
567
568    // Calculate the starting index for messages to keep
569    // This starts from lastSummarizedIndex, expands to meet minimums,
570    // and adjusts to not split tool_use/tool_result pairs
571    const startIndex = calculateMessagesToKeepIndex(
572      messages,
573      lastSummarizedIndex,
574    )
575    // Filter out old compact boundary messages from messagesToKeep.
576    // After REPL pruning, old boundaries re-yielded from messagesToKeep would
577    // trigger an unwanted second prune (isCompactBoundaryMessage returns true),
578    // discarding the new boundary and summary.
579    const messagesToKeep = messages
580      .slice(startIndex)
581      .filter(m => !isCompactBoundaryMessage(m))
582
583    // Run session start hooks to restore CLAUDE.md and other context
584    const hookResults = await processSessionStartHooks('compact', {
585      model: getMainLoopModel(),
586    })
587
588    // Get transcript path for the summary message
589    const transcriptPath = getTranscriptPath()
590
591    const compactionResult = createCompactionResultFromSessionMemory(
592      messages,
593      sessionMemory,
594      messagesToKeep,
595      hookResults,
596      transcriptPath,
597      agentId,
598    )
599
600    const postCompactMessages = buildPostCompactMessages(compactionResult)
601
602    const postCompactTokenCount = estimateMessageTokens(postCompactMessages)
603
604    // Only check threshold if one was provided (for autocompact)
605    if (
606      autoCompactThreshold !== undefined &&
607      postCompactTokenCount >= autoCompactThreshold
608    ) {
609      logEvent('tengu_sm_compact_threshold_exceeded', {
610        postCompactTokenCount,
611        autoCompactThreshold,
612      })
613      return null
614    }
615
616    return {
617      ...compactionResult,
618      postCompactTokenCount,
619      truePostCompactTokenCount: postCompactTokenCount,
620    }
621  } catch (error) {
622    // Use logEvent instead of logError since errors here are expected
623    // (e.g., file not found, path issues) and shouldn't go to error logs
624    logEvent('tengu_sm_compact_error', {})
625    if (process.env.USER_TYPE === 'ant') {
626      logForDebugging(`Session memory compaction error: ${errorMessage(error)}`)
627    }
628    return null
629  }
630}