source dump of claude code
at main 1705 lines 61 kB view raw
1import { feature } from 'bun:bundle' 2import type { UUID } from 'crypto' 3import uniqBy from 'lodash-es/uniqBy.js' 4 5/* eslint-disable @typescript-eslint/no-require-imports */ 6const sessionTranscriptModule = feature('KAIROS') 7 ? (require('../sessionTranscript/sessionTranscript.js') as typeof import('../sessionTranscript/sessionTranscript.js')) 8 : null 9 10import { APIUserAbortError } from '@anthropic-ai/sdk' 11import { markPostCompaction } from 'src/bootstrap/state.js' 12import { getInvokedSkillsForAgent } from '../../bootstrap/state.js' 13import type { QuerySource } from '../../constants/querySource.js' 14import type { CanUseToolFn } from '../../hooks/useCanUseTool.js' 15import type { Tool, ToolUseContext } from '../../Tool.js' 16import type { LocalAgentTaskState } from '../../tasks/LocalAgentTask/LocalAgentTask.js' 17import { FileReadTool } from '../../tools/FileReadTool/FileReadTool.js' 18import { 19 FILE_READ_TOOL_NAME, 20 FILE_UNCHANGED_STUB, 21} from '../../tools/FileReadTool/prompt.js' 22import { ToolSearchTool } from '../../tools/ToolSearchTool/ToolSearchTool.js' 23import type { AgentId } from '../../types/ids.js' 24import type { 25 AssistantMessage, 26 AttachmentMessage, 27 HookResultMessage, 28 Message, 29 PartialCompactDirection, 30 SystemCompactBoundaryMessage, 31 SystemMessage, 32 UserMessage, 33} from '../../types/message.js' 34import { 35 createAttachmentMessage, 36 generateFileAttachment, 37 getAgentListingDeltaAttachment, 38 getDeferredToolsDeltaAttachment, 39 getMcpInstructionsDeltaAttachment, 40} from '../../utils/attachments.js' 41import { getMemoryPath } from '../../utils/config.js' 42import { COMPACT_MAX_OUTPUT_TOKENS } from '../../utils/context.js' 43import { 44 analyzeContext, 45 tokenStatsToStatsigMetrics, 46} from '../../utils/contextAnalysis.js' 47import { logForDebugging } from '../../utils/debug.js' 48import { hasExactErrorMessage } from '../../utils/errors.js' 49import { cacheToObject } from '../../utils/fileStateCache.js' 50import { 51 type CacheSafeParams, 52 runForkedAgent, 53} from '../../utils/forkedAgent.js' 54import { 55 executePostCompactHooks, 56 executePreCompactHooks, 57} from '../../utils/hooks.js' 58import { logError } from '../../utils/log.js' 59import { MEMORY_TYPE_VALUES } from '../../utils/memory/types.js' 60import { 61 createCompactBoundaryMessage, 62 createUserMessage, 63 getAssistantMessageText, 64 getLastAssistantMessage, 65 getMessagesAfterCompactBoundary, 66 isCompactBoundaryMessage, 67 normalizeMessagesForAPI, 68} from '../../utils/messages.js' 69import { expandPath } from '../../utils/path.js' 70import { getPlan, getPlanFilePath } from '../../utils/plans.js' 71import { 72 isSessionActivityTrackingActive, 73 sendSessionActivitySignal, 74} from '../../utils/sessionActivity.js' 75import { processSessionStartHooks } from '../../utils/sessionStart.js' 76import { 77 getTranscriptPath, 78 reAppendSessionMetadata, 79} from '../../utils/sessionStorage.js' 80import { sleep } from '../../utils/sleep.js' 81import { jsonStringify } from '../../utils/slowOperations.js' 82/* eslint-enable @typescript-eslint/no-require-imports */ 83import { asSystemPrompt } from '../../utils/systemPromptType.js' 84import { getTaskOutputPath } from '../../utils/task/diskOutput.js' 85import { 86 getTokenUsage, 87 tokenCountFromLastAPIResponse, 88 tokenCountWithEstimation, 89} from '../../utils/tokens.js' 90import { 91 extractDiscoveredToolNames, 92 isToolSearchEnabled, 93} from '../../utils/toolSearch.js' 94import { getFeatureValue_CACHED_MAY_BE_STALE } from '../analytics/growthbook.js' 95import { 96 type AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS, 97 logEvent, 98} from '../analytics/index.js' 99import { 100 getMaxOutputTokensForModel, 101 queryModelWithStreaming, 102} from '../api/claude.js' 103import { 104 getPromptTooLongTokenGap, 105 PROMPT_TOO_LONG_ERROR_MESSAGE, 106 startsWithApiErrorPrefix, 107} from '../api/errors.js' 108import { notifyCompaction } from '../api/promptCacheBreakDetection.js' 109import { getRetryDelay } from '../api/withRetry.js' 110import { logPermissionContextForAnts } from '../internalLogging.js' 111import { 112 roughTokenCountEstimation, 113 roughTokenCountEstimationForMessages, 114} from '../tokenEstimation.js' 115import { groupMessagesByApiRound } from './grouping.js' 116import { 117 getCompactPrompt, 118 getCompactUserSummaryMessage, 119 getPartialCompactPrompt, 120} from './prompt.js' 121 122export const POST_COMPACT_MAX_FILES_TO_RESTORE = 5 123export const POST_COMPACT_TOKEN_BUDGET = 50_000 124export const POST_COMPACT_MAX_TOKENS_PER_FILE = 5_000 125// Skills can be large (verify=18.7KB, claude-api=20.1KB). Previously re-injected 126// unbounded on every compact → 5-10K tok/compact. Per-skill truncation beats 127// dropping — instructions at the top of a skill file are usually the critical 128// part. Budget sized to hold ~5 skills at the per-skill cap. 129export const POST_COMPACT_MAX_TOKENS_PER_SKILL = 5_000 130export const POST_COMPACT_SKILLS_TOKEN_BUDGET = 25_000 131const MAX_COMPACT_STREAMING_RETRIES = 2 132 133/** 134 * Strip image blocks from user messages before sending for compaction. 135 * Images are not needed for generating a conversation summary and can 136 * cause the compaction API call itself to hit the prompt-too-long limit, 137 * especially in CCD sessions where users frequently attach images. 138 * Replaces image blocks with a text marker so the summary still notes 139 * that an image was shared. 140 * 141 * Note: Only user messages contain images (either directly attached or within 142 * tool_result content from tools). Assistant messages contain text, tool_use, 143 * and thinking blocks but not images. 144 */ 145export function stripImagesFromMessages(messages: Message[]): Message[] { 146 return messages.map(message => { 147 if (message.type !== 'user') { 148 return message 149 } 150 151 const content = message.message.content 152 if (!Array.isArray(content)) { 153 return message 154 } 155 156 let hasMediaBlock = false 157 const newContent = content.flatMap(block => { 158 if (block.type === 'image') { 159 hasMediaBlock = true 160 return [{ type: 'text' as const, text: '[image]' }] 161 } 162 if (block.type === 'document') { 163 hasMediaBlock = true 164 return [{ type: 'text' as const, text: '[document]' }] 165 } 166 // Also strip images/documents nested inside tool_result content arrays 167 if (block.type === 'tool_result' && Array.isArray(block.content)) { 168 let toolHasMedia = false 169 const newToolContent = block.content.map(item => { 170 if (item.type === 'image') { 171 toolHasMedia = true 172 return { type: 'text' as const, text: '[image]' } 173 } 174 if (item.type === 'document') { 175 toolHasMedia = true 176 return { type: 'text' as const, text: '[document]' } 177 } 178 return item 179 }) 180 if (toolHasMedia) { 181 hasMediaBlock = true 182 return [{ ...block, content: newToolContent }] 183 } 184 } 185 return [block] 186 }) 187 188 if (!hasMediaBlock) { 189 return message 190 } 191 192 return { 193 ...message, 194 message: { 195 ...message.message, 196 content: newContent, 197 }, 198 } as typeof message 199 }) 200} 201 202/** 203 * Strip attachment types that are re-injected post-compaction anyway. 204 * skill_discovery/skill_listing are re-surfaced by resetSentSkillNames() 205 * + the next turn's discovery signal, so feeding them to the summarizer 206 * wastes tokens and pollutes the summary with stale skill suggestions. 207 * 208 * No-op when EXPERIMENTAL_SKILL_SEARCH is off (the attachment types 209 * don't exist on external builds). 210 */ 211export function stripReinjectedAttachments(messages: Message[]): Message[] { 212 if (feature('EXPERIMENTAL_SKILL_SEARCH')) { 213 return messages.filter( 214 m => 215 !( 216 m.type === 'attachment' && 217 (m.attachment.type === 'skill_discovery' || 218 m.attachment.type === 'skill_listing') 219 ), 220 ) 221 } 222 return messages 223} 224 225export const ERROR_MESSAGE_NOT_ENOUGH_MESSAGES = 226 'Not enough messages to compact.' 227const MAX_PTL_RETRIES = 3 228const PTL_RETRY_MARKER = '[earlier conversation truncated for compaction retry]' 229 230/** 231 * Drops the oldest API-round groups from messages until tokenGap is covered. 232 * Falls back to dropping 20% of groups when the gap is unparseable (some 233 * Vertex/Bedrock error formats). Returns null when nothing can be dropped 234 * without leaving an empty summarize set. 235 * 236 * This is the last-resort escape hatch for CC-1180 — when the compact request 237 * itself hits prompt-too-long, the user is otherwise stuck. Dropping the 238 * oldest context is lossy but unblocks them. The reactive-compact path 239 * (compactMessages.ts) has the proper retry loop that peels from the tail; 240 * this helper is the dumb-but-safe fallback for the proactive/manual path 241 * that wasn't migrated in bfdb472f's unification. 242 */ 243export function truncateHeadForPTLRetry( 244 messages: Message[], 245 ptlResponse: AssistantMessage, 246): Message[] | null { 247 // Strip our own synthetic marker from a previous retry before grouping. 248 // Otherwise it becomes its own group 0 and the 20% fallback stalls 249 // (drops only the marker, re-adds it, zero progress on retry 2+). 250 const input = 251 messages[0]?.type === 'user' && 252 messages[0].isMeta && 253 messages[0].message.content === PTL_RETRY_MARKER 254 ? messages.slice(1) 255 : messages 256 257 const groups = groupMessagesByApiRound(input) 258 if (groups.length < 2) return null 259 260 const tokenGap = getPromptTooLongTokenGap(ptlResponse) 261 let dropCount: number 262 if (tokenGap !== undefined) { 263 let acc = 0 264 dropCount = 0 265 for (const g of groups) { 266 acc += roughTokenCountEstimationForMessages(g) 267 dropCount++ 268 if (acc >= tokenGap) break 269 } 270 } else { 271 dropCount = Math.max(1, Math.floor(groups.length * 0.2)) 272 } 273 274 // Keep at least one group so there's something to summarize. 275 dropCount = Math.min(dropCount, groups.length - 1) 276 if (dropCount < 1) return null 277 278 const sliced = groups.slice(dropCount).flat() 279 // groupMessagesByApiRound puts the preamble in group 0 and starts every 280 // subsequent group with an assistant message. Dropping group 0 leaves an 281 // assistant-first sequence which the API rejects (first message must be 282 // role=user). Prepend a synthetic user marker — ensureToolResultPairing 283 // already handles any orphaned tool_results this creates. 284 if (sliced[0]?.type === 'assistant') { 285 return [ 286 createUserMessage({ content: PTL_RETRY_MARKER, isMeta: true }), 287 ...sliced, 288 ] 289 } 290 return sliced 291} 292 293export const ERROR_MESSAGE_PROMPT_TOO_LONG = 294 'Conversation too long. Press esc twice to go up a few messages and try again.' 295export const ERROR_MESSAGE_USER_ABORT = 'API Error: Request was aborted.' 296export const ERROR_MESSAGE_INCOMPLETE_RESPONSE = 297 'Compaction interrupted · This may be due to network issues — please try again.' 298 299export interface CompactionResult { 300 boundaryMarker: SystemMessage 301 summaryMessages: UserMessage[] 302 attachments: AttachmentMessage[] 303 hookResults: HookResultMessage[] 304 messagesToKeep?: Message[] 305 userDisplayMessage?: string 306 preCompactTokenCount?: number 307 postCompactTokenCount?: number 308 truePostCompactTokenCount?: number 309 compactionUsage?: ReturnType<typeof getTokenUsage> 310} 311 312/** 313 * Diagnosis context passed from autoCompactIfNeeded into compactConversation. 314 * Lets the tengu_compact event disambiguate same-chain loops (H2) from 315 * cross-agent (H1/H5) and manual-vs-auto (H3) compactions without joins. 316 */ 317export type RecompactionInfo = { 318 isRecompactionInChain: boolean 319 turnsSincePreviousCompact: number 320 previousCompactTurnId?: string 321 autoCompactThreshold: number 322 querySource?: QuerySource 323} 324 325/** 326 * Build the base post-compact messages array from a CompactionResult. 327 * This ensures consistent ordering across all compaction paths. 328 * Order: boundaryMarker, summaryMessages, messagesToKeep, attachments, hookResults 329 */ 330export function buildPostCompactMessages(result: CompactionResult): Message[] { 331 return [ 332 result.boundaryMarker, 333 ...result.summaryMessages, 334 ...(result.messagesToKeep ?? []), 335 ...result.attachments, 336 ...result.hookResults, 337 ] 338} 339 340/** 341 * Annotate a compact boundary with relink metadata for messagesToKeep. 342 * Preserved messages keep their original parentUuids on disk (dedup-skipped); 343 * the loader uses this to patch head→anchor and anchor's-other-children→tail. 344 * 345 * `anchorUuid` = what sits immediately before keep[0] in the desired chain: 346 * - suffix-preserving (reactive/session-memory): last summary message 347 * - prefix-preserving (partial compact): the boundary itself 348 */ 349export function annotateBoundaryWithPreservedSegment( 350 boundary: SystemCompactBoundaryMessage, 351 anchorUuid: UUID, 352 messagesToKeep: readonly Message[] | undefined, 353): SystemCompactBoundaryMessage { 354 const keep = messagesToKeep ?? [] 355 if (keep.length === 0) return boundary 356 return { 357 ...boundary, 358 compactMetadata: { 359 ...boundary.compactMetadata, 360 preservedSegment: { 361 headUuid: keep[0]!.uuid, 362 anchorUuid, 363 tailUuid: keep.at(-1)!.uuid, 364 }, 365 }, 366 } 367} 368 369/** 370 * Merges user-supplied custom instructions with hook-provided instructions. 371 * User instructions come first; hook instructions are appended. 372 * Empty strings normalize to undefined. 373 */ 374export function mergeHookInstructions( 375 userInstructions: string | undefined, 376 hookInstructions: string | undefined, 377): string | undefined { 378 if (!hookInstructions) return userInstructions || undefined 379 if (!userInstructions) return hookInstructions 380 return `${userInstructions}\n\n${hookInstructions}` 381} 382 383/** 384 * Creates a compact version of a conversation by summarizing older messages 385 * and preserving recent conversation history. 386 */ 387export async function compactConversation( 388 messages: Message[], 389 context: ToolUseContext, 390 cacheSafeParams: CacheSafeParams, 391 suppressFollowUpQuestions: boolean, 392 customInstructions?: string, 393 isAutoCompact: boolean = false, 394 recompactionInfo?: RecompactionInfo, 395): Promise<CompactionResult> { 396 try { 397 if (messages.length === 0) { 398 throw new Error(ERROR_MESSAGE_NOT_ENOUGH_MESSAGES) 399 } 400 401 const preCompactTokenCount = tokenCountWithEstimation(messages) 402 403 const appState = context.getAppState() 404 void logPermissionContextForAnts(appState.toolPermissionContext, 'summary') 405 406 context.onCompactProgress?.({ 407 type: 'hooks_start', 408 hookType: 'pre_compact', 409 }) 410 411 // Execute PreCompact hooks 412 context.setSDKStatus?.('compacting') 413 const hookResult = await executePreCompactHooks( 414 { 415 trigger: isAutoCompact ? 'auto' : 'manual', 416 customInstructions: customInstructions ?? null, 417 }, 418 context.abortController.signal, 419 ) 420 customInstructions = mergeHookInstructions( 421 customInstructions, 422 hookResult.newCustomInstructions, 423 ) 424 const userDisplayMessage = hookResult.userDisplayMessage 425 426 // Show requesting mode with up arrow and custom message 427 context.setStreamMode?.('requesting') 428 context.setResponseLength?.(() => 0) 429 context.onCompactProgress?.({ type: 'compact_start' }) 430 431 // 3P default: true — forked-agent path reuses main conversation's prompt cache. 432 // Experiment (Jan 2026) confirmed: false path is 98% cache miss, costs ~0.76% of 433 // fleet cache_creation (~38B tok/day), concentrated in ephemeral envs (CCR/GHA/SDK) 434 // with cold GB cache and 3P providers where GB is disabled. GB gate kept as kill-switch. 435 const promptCacheSharingEnabled = getFeatureValue_CACHED_MAY_BE_STALE( 436 'tengu_compact_cache_prefix', 437 true, 438 ) 439 440 const compactPrompt = getCompactPrompt(customInstructions) 441 const summaryRequest = createUserMessage({ 442 content: compactPrompt, 443 }) 444 445 let messagesToSummarize = messages 446 let retryCacheSafeParams = cacheSafeParams 447 let summaryResponse: AssistantMessage 448 let summary: string | null 449 let ptlAttempts = 0 450 for (;;) { 451 summaryResponse = await streamCompactSummary({ 452 messages: messagesToSummarize, 453 summaryRequest, 454 appState, 455 context, 456 preCompactTokenCount, 457 cacheSafeParams: retryCacheSafeParams, 458 }) 459 summary = getAssistantMessageText(summaryResponse) 460 if (!summary?.startsWith(PROMPT_TOO_LONG_ERROR_MESSAGE)) break 461 462 // CC-1180: compact request itself hit prompt-too-long. Truncate the 463 // oldest API-round groups and retry rather than leaving the user stuck. 464 ptlAttempts++ 465 const truncated = 466 ptlAttempts <= MAX_PTL_RETRIES 467 ? truncateHeadForPTLRetry(messagesToSummarize, summaryResponse) 468 : null 469 if (!truncated) { 470 logEvent('tengu_compact_failed', { 471 reason: 472 'prompt_too_long' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS, 473 preCompactTokenCount, 474 promptCacheSharingEnabled, 475 ptlAttempts, 476 }) 477 throw new Error(ERROR_MESSAGE_PROMPT_TOO_LONG) 478 } 479 logEvent('tengu_compact_ptl_retry', { 480 attempt: ptlAttempts, 481 droppedMessages: messagesToSummarize.length - truncated.length, 482 remainingMessages: truncated.length, 483 }) 484 messagesToSummarize = truncated 485 // The forked-agent path reads from cacheSafeParams.forkContextMessages, 486 // not the messages param — thread the truncated set through both paths. 487 retryCacheSafeParams = { 488 ...retryCacheSafeParams, 489 forkContextMessages: truncated, 490 } 491 } 492 493 if (!summary) { 494 logForDebugging( 495 `Compact failed: no summary text in response. Response: ${jsonStringify(summaryResponse)}`, 496 { level: 'error' }, 497 ) 498 logEvent('tengu_compact_failed', { 499 reason: 500 'no_summary' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS, 501 preCompactTokenCount, 502 promptCacheSharingEnabled, 503 }) 504 throw new Error( 505 `Failed to generate conversation summary - response did not contain valid text content`, 506 ) 507 } else if (startsWithApiErrorPrefix(summary)) { 508 logEvent('tengu_compact_failed', { 509 reason: 510 'api_error' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS, 511 preCompactTokenCount, 512 promptCacheSharingEnabled, 513 }) 514 throw new Error(summary) 515 } 516 517 // Store the current file state before clearing 518 const preCompactReadFileState = cacheToObject(context.readFileState) 519 520 // Clear the cache 521 context.readFileState.clear() 522 context.loadedNestedMemoryPaths?.clear() 523 524 // Intentionally NOT resetting sentSkillNames: re-injecting the full 525 // skill_listing (~4K tokens) post-compact is pure cache_creation with 526 // marginal benefit. The model still has SkillTool in its schema and 527 // invoked_skills attachment (below) preserves used-skill content. Ants 528 // with EXPERIMENTAL_SKILL_SEARCH already skip re-injection via the 529 // early-return in getSkillListingAttachments. 530 531 // Run async attachment generation in parallel 532 const [fileAttachments, asyncAgentAttachments] = await Promise.all([ 533 createPostCompactFileAttachments( 534 preCompactReadFileState, 535 context, 536 POST_COMPACT_MAX_FILES_TO_RESTORE, 537 ), 538 createAsyncAgentAttachmentsIfNeeded(context), 539 ]) 540 541 const postCompactFileAttachments: AttachmentMessage[] = [ 542 ...fileAttachments, 543 ...asyncAgentAttachments, 544 ] 545 const planAttachment = createPlanAttachmentIfNeeded(context.agentId) 546 if (planAttachment) { 547 postCompactFileAttachments.push(planAttachment) 548 } 549 550 // Add plan mode instructions if currently in plan mode, so the model 551 // continues operating in plan mode after compaction 552 const planModeAttachment = await createPlanModeAttachmentIfNeeded(context) 553 if (planModeAttachment) { 554 postCompactFileAttachments.push(planModeAttachment) 555 } 556 557 // Add skill attachment if skills were invoked in this session 558 const skillAttachment = createSkillAttachmentIfNeeded(context.agentId) 559 if (skillAttachment) { 560 postCompactFileAttachments.push(skillAttachment) 561 } 562 563 // Compaction ate prior delta attachments. Re-announce from the current 564 // state so the model has tool/instruction context on the first 565 // post-compact turn. Empty message history → diff against nothing → 566 // announces the full set. 567 for (const att of getDeferredToolsDeltaAttachment( 568 context.options.tools, 569 context.options.mainLoopModel, 570 [], 571 { callSite: 'compact_full' }, 572 )) { 573 postCompactFileAttachments.push(createAttachmentMessage(att)) 574 } 575 for (const att of getAgentListingDeltaAttachment(context, [])) { 576 postCompactFileAttachments.push(createAttachmentMessage(att)) 577 } 578 for (const att of getMcpInstructionsDeltaAttachment( 579 context.options.mcpClients, 580 context.options.tools, 581 context.options.mainLoopModel, 582 [], 583 )) { 584 postCompactFileAttachments.push(createAttachmentMessage(att)) 585 } 586 587 context.onCompactProgress?.({ 588 type: 'hooks_start', 589 hookType: 'session_start', 590 }) 591 // Execute SessionStart hooks after successful compaction 592 const hookMessages = await processSessionStartHooks('compact', { 593 model: context.options.mainLoopModel, 594 }) 595 596 // Create the compact boundary marker and summary messages before the 597 // event so we can compute the true resulting-context size. 598 const boundaryMarker = createCompactBoundaryMessage( 599 isAutoCompact ? 'auto' : 'manual', 600 preCompactTokenCount ?? 0, 601 messages.at(-1)?.uuid, 602 ) 603 // Carry loaded-tool state — the summary doesn't preserve tool_reference 604 // blocks, so the post-compact schema filter needs this to keep sending 605 // already-loaded deferred tool schemas to the API. 606 const preCompactDiscovered = extractDiscoveredToolNames(messages) 607 if (preCompactDiscovered.size > 0) { 608 boundaryMarker.compactMetadata.preCompactDiscoveredTools = [ 609 ...preCompactDiscovered, 610 ].sort() 611 } 612 613 const transcriptPath = getTranscriptPath() 614 const summaryMessages: UserMessage[] = [ 615 createUserMessage({ 616 content: getCompactUserSummaryMessage( 617 summary, 618 suppressFollowUpQuestions, 619 transcriptPath, 620 ), 621 isCompactSummary: true, 622 isVisibleInTranscriptOnly: true, 623 }), 624 ] 625 626 // Previously "postCompactTokenCount" — renamed because this is the 627 // compact API call's total usage (input_tokens ≈ preCompactTokenCount), 628 // NOT the size of the resulting context. Kept for event-field continuity. 629 const compactionCallTotalTokens = tokenCountFromLastAPIResponse([ 630 summaryResponse, 631 ]) 632 633 // Message-payload estimate of the resulting context. The next iteration's 634 // shouldAutoCompact will see this PLUS ~20-40K for system prompt + tools + 635 // userContext (via API usage.input_tokens). So `willRetriggerNextTurn: true` 636 // is a strong signal; `false` may still retrigger when this is close to threshold. 637 const truePostCompactTokenCount = roughTokenCountEstimationForMessages([ 638 boundaryMarker, 639 ...summaryMessages, 640 ...postCompactFileAttachments, 641 ...hookMessages, 642 ]) 643 644 // Extract compaction API usage metrics 645 const compactionUsage = getTokenUsage(summaryResponse) 646 647 const querySourceForEvent = 648 recompactionInfo?.querySource ?? context.options.querySource ?? 'unknown' 649 650 logEvent('tengu_compact', { 651 preCompactTokenCount, 652 // Kept for continuity — semantically the compact API call's total usage 653 postCompactTokenCount: compactionCallTotalTokens, 654 truePostCompactTokenCount, 655 autoCompactThreshold: recompactionInfo?.autoCompactThreshold ?? -1, 656 willRetriggerNextTurn: 657 recompactionInfo !== undefined && 658 truePostCompactTokenCount >= recompactionInfo.autoCompactThreshold, 659 isAutoCompact, 660 querySource: 661 querySourceForEvent as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS, 662 queryChainId: (context.queryTracking?.chainId ?? 663 '') as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS, 664 queryDepth: context.queryTracking?.depth ?? -1, 665 isRecompactionInChain: recompactionInfo?.isRecompactionInChain ?? false, 666 turnsSincePreviousCompact: 667 recompactionInfo?.turnsSincePreviousCompact ?? -1, 668 previousCompactTurnId: (recompactionInfo?.previousCompactTurnId ?? 669 '') as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS, 670 compactionInputTokens: compactionUsage?.input_tokens, 671 compactionOutputTokens: compactionUsage?.output_tokens, 672 compactionCacheReadTokens: compactionUsage?.cache_read_input_tokens ?? 0, 673 compactionCacheCreationTokens: 674 compactionUsage?.cache_creation_input_tokens ?? 0, 675 compactionTotalTokens: compactionUsage 676 ? compactionUsage.input_tokens + 677 (compactionUsage.cache_creation_input_tokens ?? 0) + 678 (compactionUsage.cache_read_input_tokens ?? 0) + 679 compactionUsage.output_tokens 680 : 0, 681 promptCacheSharingEnabled, 682 // analyzeContext walks every content block (~11ms on a 4.5K-message 683 // session) purely for this telemetry breakdown. Computed here, past 684 // the compaction-API await, so the sync walk doesn't starve the 685 // render loop before compaction even starts. Same deferral pattern 686 // as reactiveCompact.ts. 687 ...(() => { 688 try { 689 return tokenStatsToStatsigMetrics(analyzeContext(messages)) 690 } catch (error) { 691 logError(error as Error) 692 return {} 693 } 694 })(), 695 }) 696 697 // Reset cache read baseline so the post-compact drop isn't flagged as a break 698 if (feature('PROMPT_CACHE_BREAK_DETECTION')) { 699 notifyCompaction( 700 context.options.querySource ?? 'compact', 701 context.agentId, 702 ) 703 } 704 markPostCompaction() 705 706 // Re-append session metadata (custom title, tag) so it stays within 707 // the 16KB tail window that readLiteMetadata reads for --resume display. 708 // Without this, enough post-compaction messages push the metadata entry 709 // out of the window, causing --resume to show the auto-generated title 710 // instead of the user-set session name. 711 reAppendSessionMetadata() 712 713 // Write a reduced transcript segment for the pre-compaction messages 714 // (assistant mode only). Fire-and-forget — errors are logged internally. 715 if (feature('KAIROS')) { 716 void sessionTranscriptModule?.writeSessionTranscriptSegment(messages) 717 } 718 719 context.onCompactProgress?.({ 720 type: 'hooks_start', 721 hookType: 'post_compact', 722 }) 723 const postCompactHookResult = await executePostCompactHooks( 724 { 725 trigger: isAutoCompact ? 'auto' : 'manual', 726 compactSummary: summary, 727 }, 728 context.abortController.signal, 729 ) 730 731 const combinedUserDisplayMessage = [ 732 userDisplayMessage, 733 postCompactHookResult.userDisplayMessage, 734 ] 735 .filter(Boolean) 736 .join('\n') 737 738 return { 739 boundaryMarker, 740 summaryMessages, 741 attachments: postCompactFileAttachments, 742 hookResults: hookMessages, 743 userDisplayMessage: combinedUserDisplayMessage || undefined, 744 preCompactTokenCount, 745 postCompactTokenCount: compactionCallTotalTokens, 746 truePostCompactTokenCount, 747 compactionUsage, 748 } 749 } catch (error) { 750 // Only show the error notification for manual /compact. 751 // Auto-compact failures are retried on the next turn and the 752 // notification is confusing when compaction eventually succeeds. 753 if (!isAutoCompact) { 754 addErrorNotificationIfNeeded(error, context) 755 } 756 throw error 757 } finally { 758 context.setStreamMode?.('requesting') 759 context.setResponseLength?.(() => 0) 760 context.onCompactProgress?.({ type: 'compact_end' }) 761 context.setSDKStatus?.(null) 762 } 763} 764 765/** 766 * Performs a partial compaction around the selected message index. 767 * Direction 'from': summarizes messages after the index, keeps earlier ones. 768 * Prompt cache for kept (earlier) messages is preserved. 769 * Direction 'up_to': summarizes messages before the index, keeps later ones. 770 * Prompt cache is invalidated since the summary precedes the kept messages. 771 */ 772export async function partialCompactConversation( 773 allMessages: Message[], 774 pivotIndex: number, 775 context: ToolUseContext, 776 cacheSafeParams: CacheSafeParams, 777 userFeedback?: string, 778 direction: PartialCompactDirection = 'from', 779): Promise<CompactionResult> { 780 try { 781 const messagesToSummarize = 782 direction === 'up_to' 783 ? allMessages.slice(0, pivotIndex) 784 : allMessages.slice(pivotIndex) 785 // 'up_to' must strip old compact boundaries/summaries: for 'up_to', 786 // summary_B sits BEFORE kept, so a stale boundary_A in kept wins 787 // findLastCompactBoundaryIndex's backward scan and drops summary_B. 788 // 'from' keeps them: summary_B sits AFTER kept (backward scan still 789 // works), and removing an old summary would lose its covered history. 790 const messagesToKeep = 791 direction === 'up_to' 792 ? allMessages 793 .slice(pivotIndex) 794 .filter( 795 m => 796 m.type !== 'progress' && 797 !isCompactBoundaryMessage(m) && 798 !(m.type === 'user' && m.isCompactSummary), 799 ) 800 : allMessages.slice(0, pivotIndex).filter(m => m.type !== 'progress') 801 802 if (messagesToSummarize.length === 0) { 803 throw new Error( 804 direction === 'up_to' 805 ? 'Nothing to summarize before the selected message.' 806 : 'Nothing to summarize after the selected message.', 807 ) 808 } 809 810 const preCompactTokenCount = tokenCountWithEstimation(allMessages) 811 812 context.onCompactProgress?.({ 813 type: 'hooks_start', 814 hookType: 'pre_compact', 815 }) 816 817 context.setSDKStatus?.('compacting') 818 const hookResult = await executePreCompactHooks( 819 { 820 trigger: 'manual', 821 customInstructions: null, 822 }, 823 context.abortController.signal, 824 ) 825 826 // Merge hook instructions with user feedback 827 let customInstructions: string | undefined 828 if (hookResult.newCustomInstructions && userFeedback) { 829 customInstructions = `${hookResult.newCustomInstructions}\n\nUser context: ${userFeedback}` 830 } else if (hookResult.newCustomInstructions) { 831 customInstructions = hookResult.newCustomInstructions 832 } else if (userFeedback) { 833 customInstructions = `User context: ${userFeedback}` 834 } 835 836 context.setStreamMode?.('requesting') 837 context.setResponseLength?.(() => 0) 838 context.onCompactProgress?.({ type: 'compact_start' }) 839 840 const compactPrompt = getPartialCompactPrompt(customInstructions, direction) 841 const summaryRequest = createUserMessage({ 842 content: compactPrompt, 843 }) 844 845 const failureMetadata = { 846 preCompactTokenCount, 847 direction: 848 direction as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS, 849 messagesSummarized: messagesToSummarize.length, 850 } 851 852 // 'up_to' prefix hits cache directly; 'from' sends all (tail wouldn't cache). 853 // PTL retry breaks the cache prefix but unblocks the user (CC-1180). 854 let apiMessages = direction === 'up_to' ? messagesToSummarize : allMessages 855 let retryCacheSafeParams = 856 direction === 'up_to' 857 ? { ...cacheSafeParams, forkContextMessages: messagesToSummarize } 858 : cacheSafeParams 859 let summaryResponse: AssistantMessage 860 let summary: string | null 861 let ptlAttempts = 0 862 for (;;) { 863 summaryResponse = await streamCompactSummary({ 864 messages: apiMessages, 865 summaryRequest, 866 appState: context.getAppState(), 867 context, 868 preCompactTokenCount, 869 cacheSafeParams: retryCacheSafeParams, 870 }) 871 summary = getAssistantMessageText(summaryResponse) 872 if (!summary?.startsWith(PROMPT_TOO_LONG_ERROR_MESSAGE)) break 873 874 ptlAttempts++ 875 const truncated = 876 ptlAttempts <= MAX_PTL_RETRIES 877 ? truncateHeadForPTLRetry(apiMessages, summaryResponse) 878 : null 879 if (!truncated) { 880 logEvent('tengu_partial_compact_failed', { 881 reason: 882 'prompt_too_long' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS, 883 ...failureMetadata, 884 ptlAttempts, 885 }) 886 throw new Error(ERROR_MESSAGE_PROMPT_TOO_LONG) 887 } 888 logEvent('tengu_compact_ptl_retry', { 889 attempt: ptlAttempts, 890 droppedMessages: apiMessages.length - truncated.length, 891 remainingMessages: truncated.length, 892 path: 'partial' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS, 893 }) 894 apiMessages = truncated 895 retryCacheSafeParams = { 896 ...retryCacheSafeParams, 897 forkContextMessages: truncated, 898 } 899 } 900 if (!summary) { 901 logEvent('tengu_partial_compact_failed', { 902 reason: 903 'no_summary' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS, 904 ...failureMetadata, 905 }) 906 throw new Error( 907 'Failed to generate conversation summary - response did not contain valid text content', 908 ) 909 } else if (startsWithApiErrorPrefix(summary)) { 910 logEvent('tengu_partial_compact_failed', { 911 reason: 912 'api_error' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS, 913 ...failureMetadata, 914 }) 915 throw new Error(summary) 916 } 917 918 // Store the current file state before clearing 919 const preCompactReadFileState = cacheToObject(context.readFileState) 920 context.readFileState.clear() 921 context.loadedNestedMemoryPaths?.clear() 922 // Intentionally NOT resetting sentSkillNames — see compactConversation() 923 // for rationale (~4K tokens saved per compact event). 924 925 const [fileAttachments, asyncAgentAttachments] = await Promise.all([ 926 createPostCompactFileAttachments( 927 preCompactReadFileState, 928 context, 929 POST_COMPACT_MAX_FILES_TO_RESTORE, 930 messagesToKeep, 931 ), 932 createAsyncAgentAttachmentsIfNeeded(context), 933 ]) 934 935 const postCompactFileAttachments: AttachmentMessage[] = [ 936 ...fileAttachments, 937 ...asyncAgentAttachments, 938 ] 939 const planAttachment = createPlanAttachmentIfNeeded(context.agentId) 940 if (planAttachment) { 941 postCompactFileAttachments.push(planAttachment) 942 } 943 944 // Add plan mode instructions if currently in plan mode 945 const planModeAttachment = await createPlanModeAttachmentIfNeeded(context) 946 if (planModeAttachment) { 947 postCompactFileAttachments.push(planModeAttachment) 948 } 949 950 const skillAttachment = createSkillAttachmentIfNeeded(context.agentId) 951 if (skillAttachment) { 952 postCompactFileAttachments.push(skillAttachment) 953 } 954 955 // Re-announce only what was in the summarized portion — messagesToKeep 956 // is scanned, so anything already announced there is skipped. 957 for (const att of getDeferredToolsDeltaAttachment( 958 context.options.tools, 959 context.options.mainLoopModel, 960 messagesToKeep, 961 { callSite: 'compact_partial' }, 962 )) { 963 postCompactFileAttachments.push(createAttachmentMessage(att)) 964 } 965 for (const att of getAgentListingDeltaAttachment(context, messagesToKeep)) { 966 postCompactFileAttachments.push(createAttachmentMessage(att)) 967 } 968 for (const att of getMcpInstructionsDeltaAttachment( 969 context.options.mcpClients, 970 context.options.tools, 971 context.options.mainLoopModel, 972 messagesToKeep, 973 )) { 974 postCompactFileAttachments.push(createAttachmentMessage(att)) 975 } 976 977 context.onCompactProgress?.({ 978 type: 'hooks_start', 979 hookType: 'session_start', 980 }) 981 const hookMessages = await processSessionStartHooks('compact', { 982 model: context.options.mainLoopModel, 983 }) 984 985 const postCompactTokenCount = tokenCountFromLastAPIResponse([ 986 summaryResponse, 987 ]) 988 const compactionUsage = getTokenUsage(summaryResponse) 989 990 logEvent('tengu_partial_compact', { 991 preCompactTokenCount, 992 postCompactTokenCount, 993 messagesKept: messagesToKeep.length, 994 messagesSummarized: messagesToSummarize.length, 995 direction: 996 direction as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS, 997 hasUserFeedback: !!userFeedback, 998 trigger: 999 'message_selector' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS, 1000 compactionInputTokens: compactionUsage?.input_tokens, 1001 compactionOutputTokens: compactionUsage?.output_tokens, 1002 compactionCacheReadTokens: compactionUsage?.cache_read_input_tokens ?? 0, 1003 compactionCacheCreationTokens: 1004 compactionUsage?.cache_creation_input_tokens ?? 0, 1005 }) 1006 1007 // Progress messages aren't loggable, so forkSessionImpl would null out 1008 // a logicalParentUuid pointing at one. Both directions skip them. 1009 const lastPreCompactUuid = 1010 direction === 'up_to' 1011 ? allMessages.slice(0, pivotIndex).findLast(m => m.type !== 'progress') 1012 ?.uuid 1013 : messagesToKeep.at(-1)?.uuid 1014 const boundaryMarker = createCompactBoundaryMessage( 1015 'manual', 1016 preCompactTokenCount ?? 0, 1017 lastPreCompactUuid, 1018 userFeedback, 1019 messagesToSummarize.length, 1020 ) 1021 // allMessages not just messagesToSummarize — set union is idempotent, 1022 // simpler than tracking which half each tool lived in. 1023 const preCompactDiscovered = extractDiscoveredToolNames(allMessages) 1024 if (preCompactDiscovered.size > 0) { 1025 boundaryMarker.compactMetadata.preCompactDiscoveredTools = [ 1026 ...preCompactDiscovered, 1027 ].sort() 1028 } 1029 1030 const transcriptPath = getTranscriptPath() 1031 const summaryMessages: UserMessage[] = [ 1032 createUserMessage({ 1033 content: getCompactUserSummaryMessage(summary, false, transcriptPath), 1034 isCompactSummary: true, 1035 ...(messagesToKeep.length > 0 1036 ? { 1037 summarizeMetadata: { 1038 messagesSummarized: messagesToSummarize.length, 1039 userContext: userFeedback, 1040 direction, 1041 }, 1042 } 1043 : { isVisibleInTranscriptOnly: true as const }), 1044 }), 1045 ] 1046 1047 if (feature('PROMPT_CACHE_BREAK_DETECTION')) { 1048 notifyCompaction( 1049 context.options.querySource ?? 'compact', 1050 context.agentId, 1051 ) 1052 } 1053 markPostCompaction() 1054 1055 // Re-append session metadata (custom title, tag) so it stays within 1056 // the 16KB tail window that readLiteMetadata reads for --resume display. 1057 reAppendSessionMetadata() 1058 1059 if (feature('KAIROS')) { 1060 void sessionTranscriptModule?.writeSessionTranscriptSegment( 1061 messagesToSummarize, 1062 ) 1063 } 1064 1065 context.onCompactProgress?.({ 1066 type: 'hooks_start', 1067 hookType: 'post_compact', 1068 }) 1069 const postCompactHookResult = await executePostCompactHooks( 1070 { 1071 trigger: 'manual', 1072 compactSummary: summary, 1073 }, 1074 context.abortController.signal, 1075 ) 1076 1077 // 'from': prefix-preserving → boundary; 'up_to': suffix → last summary 1078 const anchorUuid = 1079 direction === 'up_to' 1080 ? (summaryMessages.at(-1)?.uuid ?? boundaryMarker.uuid) 1081 : boundaryMarker.uuid 1082 return { 1083 boundaryMarker: annotateBoundaryWithPreservedSegment( 1084 boundaryMarker, 1085 anchorUuid, 1086 messagesToKeep, 1087 ), 1088 summaryMessages, 1089 messagesToKeep, 1090 attachments: postCompactFileAttachments, 1091 hookResults: hookMessages, 1092 userDisplayMessage: postCompactHookResult.userDisplayMessage, 1093 preCompactTokenCount, 1094 postCompactTokenCount, 1095 compactionUsage, 1096 } 1097 } catch (error) { 1098 addErrorNotificationIfNeeded(error, context) 1099 throw error 1100 } finally { 1101 context.setStreamMode?.('requesting') 1102 context.setResponseLength?.(() => 0) 1103 context.onCompactProgress?.({ type: 'compact_end' }) 1104 context.setSDKStatus?.(null) 1105 } 1106} 1107 1108function addErrorNotificationIfNeeded( 1109 error: unknown, 1110 context: Pick<ToolUseContext, 'addNotification'>, 1111) { 1112 if ( 1113 !hasExactErrorMessage(error, ERROR_MESSAGE_USER_ABORT) && 1114 !hasExactErrorMessage(error, ERROR_MESSAGE_NOT_ENOUGH_MESSAGES) 1115 ) { 1116 context.addNotification?.({ 1117 key: 'error-compacting-conversation', 1118 text: 'Error compacting conversation', 1119 priority: 'immediate', 1120 color: 'error', 1121 }) 1122 } 1123} 1124 1125export function createCompactCanUseTool(): CanUseToolFn { 1126 return async () => ({ 1127 behavior: 'deny' as const, 1128 message: 'Tool use is not allowed during compaction', 1129 decisionReason: { 1130 type: 'other' as const, 1131 reason: 'compaction agent should only produce text summary', 1132 }, 1133 }) 1134} 1135 1136async function streamCompactSummary({ 1137 messages, 1138 summaryRequest, 1139 appState, 1140 context, 1141 preCompactTokenCount, 1142 cacheSafeParams, 1143}: { 1144 messages: Message[] 1145 summaryRequest: UserMessage 1146 appState: Awaited<ReturnType<ToolUseContext['getAppState']>> 1147 context: ToolUseContext 1148 preCompactTokenCount: number 1149 cacheSafeParams: CacheSafeParams 1150}): Promise<AssistantMessage> { 1151 // When prompt cache sharing is enabled, use forked agent to reuse the 1152 // main conversation's cached prefix (system prompt, tools, context messages). 1153 // Falls back to regular streaming path on failure. 1154 // 3P default: true — see comment at the other tengu_compact_cache_prefix read above. 1155 const promptCacheSharingEnabled = getFeatureValue_CACHED_MAY_BE_STALE( 1156 'tengu_compact_cache_prefix', 1157 true, 1158 ) 1159 // Send keep-alive signals during compaction to prevent remote session 1160 // WebSocket idle timeouts from dropping bridge connections. Compaction 1161 // API calls can take 5-10+ seconds, during which no other messages 1162 // flow through the transport — without keep-alives, the server may 1163 // close the WebSocket for inactivity. 1164 // Two signals: (1) PUT /worker heartbeat via sessionActivity, and 1165 // (2) re-emit 'compacting' status so the SDK event stream stays active 1166 // and the server doesn't consider the session stale. 1167 const activityInterval = isSessionActivityTrackingActive() 1168 ? setInterval( 1169 (statusSetter?: (status: 'compacting' | null) => void) => { 1170 sendSessionActivitySignal() 1171 statusSetter?.('compacting') 1172 }, 1173 30_000, 1174 context.setSDKStatus, 1175 ) 1176 : undefined 1177 1178 try { 1179 if (promptCacheSharingEnabled) { 1180 try { 1181 // DO NOT set maxOutputTokens here. The fork piggybacks on the main thread's 1182 // prompt cache by sending identical cache-key params (system, tools, model, 1183 // messages prefix, thinking config). Setting maxOutputTokens would clamp 1184 // budget_tokens via Math.min(budget, maxOutputTokens-1) in claude.ts, 1185 // creating a thinking config mismatch that invalidates the cache. 1186 // The streaming fallback path (below) can safely set maxOutputTokensOverride 1187 // since it doesn't share cache with the main thread. 1188 const result = await runForkedAgent({ 1189 promptMessages: [summaryRequest], 1190 cacheSafeParams, 1191 canUseTool: createCompactCanUseTool(), 1192 querySource: 'compact', 1193 forkLabel: 'compact', 1194 maxTurns: 1, 1195 skipCacheWrite: true, 1196 // Pass the compact context's abortController so user Esc aborts the 1197 // fork — same signal the streaming fallback uses at 1198 // `signal: context.abortController.signal` below. 1199 overrides: { abortController: context.abortController }, 1200 }) 1201 const assistantMsg = getLastAssistantMessage(result.messages) 1202 const assistantText = assistantMsg 1203 ? getAssistantMessageText(assistantMsg) 1204 : null 1205 // Guard isApiErrorMessage: query() catches API errors (including 1206 // APIUserAbortError on ESC) and yields them as synthetic assistant 1207 // messages. Without this check, an aborted compact "succeeds" with 1208 // "Request was aborted." as the summary — the text doesn't start with 1209 // "API Error" so the caller's startsWithApiErrorPrefix guard misses it. 1210 if (assistantMsg && assistantText && !assistantMsg.isApiErrorMessage) { 1211 // Skip success logging for PTL error text — it's returned so the 1212 // caller's retry loop catches it, but it's not a successful summary. 1213 if (!assistantText.startsWith(PROMPT_TOO_LONG_ERROR_MESSAGE)) { 1214 logEvent('tengu_compact_cache_sharing_success', { 1215 preCompactTokenCount, 1216 outputTokens: result.totalUsage.output_tokens, 1217 cacheReadInputTokens: result.totalUsage.cache_read_input_tokens, 1218 cacheCreationInputTokens: 1219 result.totalUsage.cache_creation_input_tokens, 1220 cacheHitRate: 1221 result.totalUsage.cache_read_input_tokens > 0 1222 ? result.totalUsage.cache_read_input_tokens / 1223 (result.totalUsage.cache_read_input_tokens + 1224 result.totalUsage.cache_creation_input_tokens + 1225 result.totalUsage.input_tokens) 1226 : 0, 1227 }) 1228 } 1229 return assistantMsg 1230 } 1231 logForDebugging( 1232 `Compact cache sharing: no text in response, falling back. Response: ${jsonStringify(assistantMsg)}`, 1233 { level: 'warn' }, 1234 ) 1235 logEvent('tengu_compact_cache_sharing_fallback', { 1236 reason: 1237 'no_text_response' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS, 1238 preCompactTokenCount, 1239 }) 1240 } catch (error) { 1241 logError(error) 1242 logEvent('tengu_compact_cache_sharing_fallback', { 1243 reason: 1244 'error' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS, 1245 preCompactTokenCount, 1246 }) 1247 } 1248 } 1249 1250 // Regular streaming path (fallback when cache sharing fails or is disabled) 1251 const retryEnabled = getFeatureValue_CACHED_MAY_BE_STALE( 1252 'tengu_compact_streaming_retry', 1253 false, 1254 ) 1255 const maxAttempts = retryEnabled ? MAX_COMPACT_STREAMING_RETRIES : 1 1256 1257 for (let attempt = 1; attempt <= maxAttempts; attempt++) { 1258 // Reset state for retry 1259 let hasStartedStreaming = false 1260 let response: AssistantMessage | undefined 1261 context.setResponseLength?.(() => 0) 1262 1263 // Check if tool search is enabled using the main loop's tools list. 1264 // context.options.tools includes MCP tools merged via useMergedTools. 1265 const useToolSearch = await isToolSearchEnabled( 1266 context.options.mainLoopModel, 1267 context.options.tools, 1268 async () => appState.toolPermissionContext, 1269 context.options.agentDefinitions.activeAgents, 1270 'compact', 1271 ) 1272 1273 // When tool search is enabled, include ToolSearchTool and MCP tools. They get 1274 // defer_loading: true and don't count against context - the API filters them out 1275 // of system_prompt_tools before token counting (see api/token_count_api/counting.py:188 1276 // and api/public_api/messages/handler.py:324). 1277 // Filter MCP tools from context.options.tools (not appState.mcp.tools) so we 1278 // get the permission-filtered set from useMergedTools — same source used for 1279 // isToolSearchEnabled above and normalizeMessagesForAPI below. 1280 // Deduplicate by name to avoid API errors when MCP tools share names with built-in tools. 1281 const tools: Tool[] = useToolSearch 1282 ? uniqBy( 1283 [ 1284 FileReadTool, 1285 ToolSearchTool, 1286 ...context.options.tools.filter(t => t.isMcp), 1287 ], 1288 'name', 1289 ) 1290 : [FileReadTool] 1291 1292 const streamingGen = queryModelWithStreaming({ 1293 messages: normalizeMessagesForAPI( 1294 stripImagesFromMessages( 1295 stripReinjectedAttachments([ 1296 ...getMessagesAfterCompactBoundary(messages), 1297 summaryRequest, 1298 ]), 1299 ), 1300 context.options.tools, 1301 ), 1302 systemPrompt: asSystemPrompt([ 1303 'You are a helpful AI assistant tasked with summarizing conversations.', 1304 ]), 1305 thinkingConfig: { type: 'disabled' as const }, 1306 tools, 1307 signal: context.abortController.signal, 1308 options: { 1309 async getToolPermissionContext() { 1310 const appState = context.getAppState() 1311 return appState.toolPermissionContext 1312 }, 1313 model: context.options.mainLoopModel, 1314 toolChoice: undefined, 1315 isNonInteractiveSession: context.options.isNonInteractiveSession, 1316 hasAppendSystemPrompt: !!context.options.appendSystemPrompt, 1317 maxOutputTokensOverride: Math.min( 1318 COMPACT_MAX_OUTPUT_TOKENS, 1319 getMaxOutputTokensForModel(context.options.mainLoopModel), 1320 ), 1321 querySource: 'compact', 1322 agents: context.options.agentDefinitions.activeAgents, 1323 mcpTools: [], 1324 effortValue: appState.effortValue, 1325 }, 1326 }) 1327 const streamIter = streamingGen[Symbol.asyncIterator]() 1328 let next = await streamIter.next() 1329 1330 while (!next.done) { 1331 const event = next.value 1332 1333 if ( 1334 !hasStartedStreaming && 1335 event.type === 'stream_event' && 1336 event.event.type === 'content_block_start' && 1337 event.event.content_block.type === 'text' 1338 ) { 1339 hasStartedStreaming = true 1340 context.setStreamMode?.('responding') 1341 } 1342 1343 if ( 1344 event.type === 'stream_event' && 1345 event.event.type === 'content_block_delta' && 1346 event.event.delta.type === 'text_delta' 1347 ) { 1348 const charactersStreamed = event.event.delta.text.length 1349 context.setResponseLength?.(length => length + charactersStreamed) 1350 } 1351 1352 if (event.type === 'assistant') { 1353 response = event 1354 } 1355 1356 next = await streamIter.next() 1357 } 1358 1359 if (response) { 1360 return response 1361 } 1362 1363 if (attempt < maxAttempts) { 1364 logEvent('tengu_compact_streaming_retry', { 1365 attempt, 1366 preCompactTokenCount, 1367 hasStartedStreaming, 1368 }) 1369 await sleep(getRetryDelay(attempt), context.abortController.signal, { 1370 abortError: () => new APIUserAbortError(), 1371 }) 1372 continue 1373 } 1374 1375 logForDebugging( 1376 `Compact streaming failed after ${attempt} attempts. hasStartedStreaming=${hasStartedStreaming}`, 1377 { level: 'error' }, 1378 ) 1379 logEvent('tengu_compact_failed', { 1380 reason: 1381 'no_streaming_response' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS, 1382 preCompactTokenCount, 1383 hasStartedStreaming, 1384 retryEnabled, 1385 attempts: attempt, 1386 promptCacheSharingEnabled, 1387 }) 1388 throw new Error(ERROR_MESSAGE_INCOMPLETE_RESPONSE) 1389 } 1390 1391 // This should never be reached due to the throw above, but TypeScript needs it 1392 throw new Error(ERROR_MESSAGE_INCOMPLETE_RESPONSE) 1393 } finally { 1394 clearInterval(activityInterval) 1395 } 1396} 1397 1398/** 1399 * Creates attachment messages for recently accessed files to restore them after compaction. 1400 * This prevents the model from having to re-read files that were recently accessed. 1401 * Re-reads files using FileReadTool to get fresh content with proper validation. 1402 * Files are selected based on recency, but constrained by both file count and token budget limits. 1403 * 1404 * Files already present as Read tool results in preservedMessages are skipped — 1405 * re-injecting identical content the model can already see in the preserved tail 1406 * is pure waste (up to 25K tok/compact). Mirrors the diff-against-preserved 1407 * pattern that getDeferredToolsDeltaAttachment uses at the same call sites. 1408 * 1409 * @param readFileState The current file state tracking recently read files 1410 * @param toolUseContext The tool use context for calling FileReadTool 1411 * @param maxFiles Maximum number of files to restore (default: 5) 1412 * @param preservedMessages Messages kept post-compact; Read results here are skipped 1413 * @returns Array of attachment messages for the most recently accessed files that fit within token budget 1414 */ 1415export async function createPostCompactFileAttachments( 1416 readFileState: Record<string, { content: string; timestamp: number }>, 1417 toolUseContext: ToolUseContext, 1418 maxFiles: number, 1419 preservedMessages: Message[] = [], 1420): Promise<AttachmentMessage[]> { 1421 const preservedReadPaths = collectReadToolFilePaths(preservedMessages) 1422 const recentFiles = Object.entries(readFileState) 1423 .map(([filename, state]) => ({ filename, ...state })) 1424 .filter( 1425 file => 1426 !shouldExcludeFromPostCompactRestore( 1427 file.filename, 1428 toolUseContext.agentId, 1429 ) && !preservedReadPaths.has(expandPath(file.filename)), 1430 ) 1431 .sort((a, b) => b.timestamp - a.timestamp) 1432 .slice(0, maxFiles) 1433 1434 const results = await Promise.all( 1435 recentFiles.map(async file => { 1436 const attachment = await generateFileAttachment( 1437 file.filename, 1438 { 1439 ...toolUseContext, 1440 fileReadingLimits: { 1441 maxTokens: POST_COMPACT_MAX_TOKENS_PER_FILE, 1442 }, 1443 }, 1444 'tengu_post_compact_file_restore_success', 1445 'tengu_post_compact_file_restore_error', 1446 'compact', 1447 ) 1448 return attachment ? createAttachmentMessage(attachment) : null 1449 }), 1450 ) 1451 1452 let usedTokens = 0 1453 return results.filter((result): result is AttachmentMessage => { 1454 if (result === null) { 1455 return false 1456 } 1457 const attachmentTokens = roughTokenCountEstimation(jsonStringify(result)) 1458 if (usedTokens + attachmentTokens <= POST_COMPACT_TOKEN_BUDGET) { 1459 usedTokens += attachmentTokens 1460 return true 1461 } 1462 return false 1463 }) 1464} 1465 1466/** 1467 * Creates a plan file attachment if a plan file exists for the current session. 1468 * This ensures the plan is preserved after compaction. 1469 */ 1470export function createPlanAttachmentIfNeeded( 1471 agentId?: AgentId, 1472): AttachmentMessage | null { 1473 const planContent = getPlan(agentId) 1474 1475 if (!planContent) { 1476 return null 1477 } 1478 1479 const planFilePath = getPlanFilePath(agentId) 1480 1481 return createAttachmentMessage({ 1482 type: 'plan_file_reference', 1483 planFilePath, 1484 planContent, 1485 }) 1486} 1487 1488/** 1489 * Creates an attachment for invoked skills to preserve their content across compaction. 1490 * Only includes skills scoped to the given agent (or main session when agentId is null/undefined). 1491 * This ensures skill guidelines remain available after the conversation is summarized 1492 * without leaking skills from other agent contexts. 1493 */ 1494export function createSkillAttachmentIfNeeded( 1495 agentId?: string, 1496): AttachmentMessage | null { 1497 const invokedSkills = getInvokedSkillsForAgent(agentId) 1498 1499 if (invokedSkills.size === 0) { 1500 return null 1501 } 1502 1503 // Sorted most-recent-first so budget pressure drops the least-relevant skills. 1504 // Per-skill truncation keeps the head of each file (where setup/usage 1505 // instructions typically live) rather than dropping whole skills. 1506 let usedTokens = 0 1507 const skills = Array.from(invokedSkills.values()) 1508 .sort((a, b) => b.invokedAt - a.invokedAt) 1509 .map(skill => ({ 1510 name: skill.skillName, 1511 path: skill.skillPath, 1512 content: truncateToTokens( 1513 skill.content, 1514 POST_COMPACT_MAX_TOKENS_PER_SKILL, 1515 ), 1516 })) 1517 .filter(skill => { 1518 const tokens = roughTokenCountEstimation(skill.content) 1519 if (usedTokens + tokens > POST_COMPACT_SKILLS_TOKEN_BUDGET) { 1520 return false 1521 } 1522 usedTokens += tokens 1523 return true 1524 }) 1525 1526 if (skills.length === 0) { 1527 return null 1528 } 1529 1530 return createAttachmentMessage({ 1531 type: 'invoked_skills', 1532 skills, 1533 }) 1534} 1535 1536/** 1537 * Creates a plan_mode attachment if the user is currently in plan mode. 1538 * This ensures the model continues to operate in plan mode after compaction 1539 * (otherwise it would lose the plan mode instructions since those are 1540 * normally only injected on tool-use turns via getAttachmentMessages). 1541 */ 1542export async function createPlanModeAttachmentIfNeeded( 1543 context: ToolUseContext, 1544): Promise<AttachmentMessage | null> { 1545 const appState = context.getAppState() 1546 if (appState.toolPermissionContext.mode !== 'plan') { 1547 return null 1548 } 1549 1550 const planFilePath = getPlanFilePath(context.agentId) 1551 const planExists = getPlan(context.agentId) !== null 1552 1553 return createAttachmentMessage({ 1554 type: 'plan_mode', 1555 reminderType: 'full', 1556 isSubAgent: !!context.agentId, 1557 planFilePath, 1558 planExists, 1559 }) 1560} 1561 1562/** 1563 * Creates attachments for async agents so the model knows about them after 1564 * compaction. Covers both agents still running in the background (so the model 1565 * doesn't spawn a duplicate) and agents that have finished but whose results 1566 * haven't been retrieved yet. 1567 */ 1568export async function createAsyncAgentAttachmentsIfNeeded( 1569 context: ToolUseContext, 1570): Promise<AttachmentMessage[]> { 1571 const appState = context.getAppState() 1572 const asyncAgents = Object.values(appState.tasks).filter( 1573 (task): task is LocalAgentTaskState => task.type === 'local_agent', 1574 ) 1575 1576 return asyncAgents.flatMap(agent => { 1577 if ( 1578 agent.retrieved || 1579 agent.status === 'pending' || 1580 agent.agentId === context.agentId 1581 ) { 1582 return [] 1583 } 1584 return [ 1585 createAttachmentMessage({ 1586 type: 'task_status', 1587 taskId: agent.agentId, 1588 taskType: 'local_agent', 1589 description: agent.description, 1590 status: agent.status, 1591 deltaSummary: 1592 agent.status === 'running' 1593 ? (agent.progress?.summary ?? null) 1594 : (agent.error ?? null), 1595 outputFilePath: getTaskOutputPath(agent.agentId), 1596 }), 1597 ] 1598 }) 1599} 1600 1601/** 1602 * Scan messages for Read tool_use blocks and collect their file_path inputs 1603 * (normalized via expandPath). Used to dedup post-compact file restoration 1604 * against what's already visible in the preserved tail. 1605 * 1606 * Skips Reads whose tool_result is a dedup stub — the stub points at an 1607 * earlier full Read that may have been compacted away, so we want 1608 * createPostCompactFileAttachments to re-inject the real content. 1609 */ 1610function collectReadToolFilePaths(messages: Message[]): Set<string> { 1611 const stubIds = new Set<string>() 1612 for (const message of messages) { 1613 if (message.type !== 'user' || !Array.isArray(message.message.content)) { 1614 continue 1615 } 1616 for (const block of message.message.content) { 1617 if ( 1618 block.type === 'tool_result' && 1619 typeof block.content === 'string' && 1620 block.content.startsWith(FILE_UNCHANGED_STUB) 1621 ) { 1622 stubIds.add(block.tool_use_id) 1623 } 1624 } 1625 } 1626 1627 const paths = new Set<string>() 1628 for (const message of messages) { 1629 if ( 1630 message.type !== 'assistant' || 1631 !Array.isArray(message.message.content) 1632 ) { 1633 continue 1634 } 1635 for (const block of message.message.content) { 1636 if ( 1637 block.type !== 'tool_use' || 1638 block.name !== FILE_READ_TOOL_NAME || 1639 stubIds.has(block.id) 1640 ) { 1641 continue 1642 } 1643 const input = block.input 1644 if ( 1645 input && 1646 typeof input === 'object' && 1647 'file_path' in input && 1648 typeof input.file_path === 'string' 1649 ) { 1650 paths.add(expandPath(input.file_path)) 1651 } 1652 } 1653 } 1654 return paths 1655} 1656 1657const SKILL_TRUNCATION_MARKER = 1658 '\n\n[... skill content truncated for compaction; use Read on the skill path if you need the full text]' 1659 1660/** 1661 * Truncate content to roughly maxTokens, keeping the head. roughTokenCountEstimation 1662 * uses ~4 chars/token (its default bytesPerToken), so char budget = maxTokens * 4 1663 * minus the marker so the result stays within budget. Marker tells the model it 1664 * can Read the full file if needed. 1665 */ 1666function truncateToTokens(content: string, maxTokens: number): string { 1667 if (roughTokenCountEstimation(content) <= maxTokens) { 1668 return content 1669 } 1670 const charBudget = maxTokens * 4 - SKILL_TRUNCATION_MARKER.length 1671 return content.slice(0, charBudget) + SKILL_TRUNCATION_MARKER 1672} 1673 1674function shouldExcludeFromPostCompactRestore( 1675 filename: string, 1676 agentId?: AgentId, 1677): boolean { 1678 const normalizedFilename = expandPath(filename) 1679 // Exclude plan files 1680 try { 1681 const planFilePath = expandPath(getPlanFilePath(agentId)) 1682 if (normalizedFilename === planFilePath) { 1683 return true 1684 } 1685 } catch { 1686 // If we can't get plan file path, continue with other checks 1687 } 1688 1689 // Exclude all types of claude.md files 1690 // TODO: Refactor to use isMemoryFilePath() from claudemd.ts for consistency 1691 // and to also match child directory memory files (.claude/rules/*.md, etc.) 1692 try { 1693 const normalizedMemoryPaths = new Set( 1694 MEMORY_TYPE_VALUES.map(type => expandPath(getMemoryPath(type))), 1695 ) 1696 1697 if (normalizedMemoryPaths.has(normalizedFilename)) { 1698 return true 1699 } 1700 } catch { 1701 // If we can't get memory paths, continue 1702 } 1703 1704 return false 1705}