services/compact/compact.ts at main

oppi.li / claude-code
fork atom
source dump of claude code
fork atom
claude-code / services / compact / compact.ts
at main 1705 lines 61 kB view raw
wrap content
oppi.li dump from zip 10d ago
63aada3f
   1import { feature } from 'bun:bundle'
   2import type { UUID } from 'crypto'
   3import uniqBy from 'lodash-es/uniqBy.js'
   4
   5/* eslint-disable @typescript-eslint/no-require-imports */
   6const sessionTranscriptModule = feature('KAIROS')
   7  ? (require('../sessionTranscript/sessionTranscript.js') as typeof import('../sessionTranscript/sessionTranscript.js'))
   8  : null
   9
  10import { APIUserAbortError } from '@anthropic-ai/sdk'
  11import { markPostCompaction } from 'src/bootstrap/state.js'
  12import { getInvokedSkillsForAgent } from '../../bootstrap/state.js'
  13import type { QuerySource } from '../../constants/querySource.js'
  14import type { CanUseToolFn } from '../../hooks/useCanUseTool.js'
  15import type { Tool, ToolUseContext } from '../../Tool.js'
  16import type { LocalAgentTaskState } from '../../tasks/LocalAgentTask/LocalAgentTask.js'
  17import { FileReadTool } from '../../tools/FileReadTool/FileReadTool.js'
  18import {
  19  FILE_READ_TOOL_NAME,
  20  FILE_UNCHANGED_STUB,
  21} from '../../tools/FileReadTool/prompt.js'
  22import { ToolSearchTool } from '../../tools/ToolSearchTool/ToolSearchTool.js'
  23import type { AgentId } from '../../types/ids.js'
  24import type {
  25  AssistantMessage,
  26  AttachmentMessage,
  27  HookResultMessage,
  28  Message,
  29  PartialCompactDirection,
  30  SystemCompactBoundaryMessage,
  31  SystemMessage,
  32  UserMessage,
  33} from '../../types/message.js'
  34import {
  35  createAttachmentMessage,
  36  generateFileAttachment,
  37  getAgentListingDeltaAttachment,
  38  getDeferredToolsDeltaAttachment,
  39  getMcpInstructionsDeltaAttachment,
  40} from '../../utils/attachments.js'
  41import { getMemoryPath } from '../../utils/config.js'
  42import { COMPACT_MAX_OUTPUT_TOKENS } from '../../utils/context.js'
  43import {
  44  analyzeContext,
  45  tokenStatsToStatsigMetrics,
  46} from '../../utils/contextAnalysis.js'
  47import { logForDebugging } from '../../utils/debug.js'
  48import { hasExactErrorMessage } from '../../utils/errors.js'
  49import { cacheToObject } from '../../utils/fileStateCache.js'
  50import {
  51  type CacheSafeParams,
  52  runForkedAgent,
  53} from '../../utils/forkedAgent.js'
  54import {
  55  executePostCompactHooks,
  56  executePreCompactHooks,
  57} from '../../utils/hooks.js'
  58import { logError } from '../../utils/log.js'
  59import { MEMORY_TYPE_VALUES } from '../../utils/memory/types.js'
  60import {
  61  createCompactBoundaryMessage,
  62  createUserMessage,
  63  getAssistantMessageText,
  64  getLastAssistantMessage,
  65  getMessagesAfterCompactBoundary,
  66  isCompactBoundaryMessage,
  67  normalizeMessagesForAPI,
  68} from '../../utils/messages.js'
  69import { expandPath } from '../../utils/path.js'
  70import { getPlan, getPlanFilePath } from '../../utils/plans.js'
  71import {
  72  isSessionActivityTrackingActive,
  73  sendSessionActivitySignal,
  74} from '../../utils/sessionActivity.js'
  75import { processSessionStartHooks } from '../../utils/sessionStart.js'
  76import {
  77  getTranscriptPath,
  78  reAppendSessionMetadata,
  79} from '../../utils/sessionStorage.js'
  80import { sleep } from '../../utils/sleep.js'
  81import { jsonStringify } from '../../utils/slowOperations.js'
  82/* eslint-enable @typescript-eslint/no-require-imports */
  83import { asSystemPrompt } from '../../utils/systemPromptType.js'
  84import { getTaskOutputPath } from '../../utils/task/diskOutput.js'
  85import {
  86  getTokenUsage,
  87  tokenCountFromLastAPIResponse,
  88  tokenCountWithEstimation,
  89} from '../../utils/tokens.js'
  90import {
  91  extractDiscoveredToolNames,
  92  isToolSearchEnabled,
  93} from '../../utils/toolSearch.js'
  94import { getFeatureValue_CACHED_MAY_BE_STALE } from '../analytics/growthbook.js'
  95import {
  96  type AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
  97  logEvent,
  98} from '../analytics/index.js'
  99import {
 100  getMaxOutputTokensForModel,
 101  queryModelWithStreaming,
 102} from '../api/claude.js'
 103import {
 104  getPromptTooLongTokenGap,
 105  PROMPT_TOO_LONG_ERROR_MESSAGE,
 106  startsWithApiErrorPrefix,
 107} from '../api/errors.js'
 108import { notifyCompaction } from '../api/promptCacheBreakDetection.js'
 109import { getRetryDelay } from '../api/withRetry.js'
 110import { logPermissionContextForAnts } from '../internalLogging.js'
 111import {
 112  roughTokenCountEstimation,
 113  roughTokenCountEstimationForMessages,
 114} from '../tokenEstimation.js'
 115import { groupMessagesByApiRound } from './grouping.js'
 116import {
 117  getCompactPrompt,
 118  getCompactUserSummaryMessage,
 119  getPartialCompactPrompt,
 120} from './prompt.js'
 121
 122export const POST_COMPACT_MAX_FILES_TO_RESTORE = 5
 123export const POST_COMPACT_TOKEN_BUDGET = 50_000
 124export const POST_COMPACT_MAX_TOKENS_PER_FILE = 5_000
 125// Skills can be large (verify=18.7KB, claude-api=20.1KB). Previously re-injected
 126// unbounded on every compact → 5-10K tok/compact. Per-skill truncation beats
 127// dropping — instructions at the top of a skill file are usually the critical
 128// part. Budget sized to hold ~5 skills at the per-skill cap.
 129export const POST_COMPACT_MAX_TOKENS_PER_SKILL = 5_000
 130export const POST_COMPACT_SKILLS_TOKEN_BUDGET = 25_000
 131const MAX_COMPACT_STREAMING_RETRIES = 2
 132
 133/**
 134 * Strip image blocks from user messages before sending for compaction.
 135 * Images are not needed for generating a conversation summary and can
 136 * cause the compaction API call itself to hit the prompt-too-long limit,
 137 * especially in CCD sessions where users frequently attach images.
 138 * Replaces image blocks with a text marker so the summary still notes
 139 * that an image was shared.
 140 *
 141 * Note: Only user messages contain images (either directly attached or within
 142 * tool_result content from tools). Assistant messages contain text, tool_use,
 143 * and thinking blocks but not images.
 144 */
 145export function stripImagesFromMessages(messages: Message[]): Message[] {
 146  return messages.map(message => {
 147    if (message.type !== 'user') {
 148      return message
 149    }
 150
 151    const content = message.message.content
 152    if (!Array.isArray(content)) {
 153      return message
 154    }
 155
 156    let hasMediaBlock = false
 157    const newContent = content.flatMap(block => {
 158      if (block.type === 'image') {
 159        hasMediaBlock = true
 160        return [{ type: 'text' as const, text: '[image]' }]
 161      }
 162      if (block.type === 'document') {
 163        hasMediaBlock = true
 164        return [{ type: 'text' as const, text: '[document]' }]
 165      }
 166      // Also strip images/documents nested inside tool_result content arrays
 167      if (block.type === 'tool_result' && Array.isArray(block.content)) {
 168        let toolHasMedia = false
 169        const newToolContent = block.content.map(item => {
 170          if (item.type === 'image') {
 171            toolHasMedia = true
 172            return { type: 'text' as const, text: '[image]' }
 173          }
 174          if (item.type === 'document') {
 175            toolHasMedia = true
 176            return { type: 'text' as const, text: '[document]' }
 177          }
 178          return item
 179        })
 180        if (toolHasMedia) {
 181          hasMediaBlock = true
 182          return [{ ...block, content: newToolContent }]
 183        }
 184      }
 185      return [block]
 186    })
 187
 188    if (!hasMediaBlock) {
 189      return message
 190    }
 191
 192    return {
 193      ...message,
 194      message: {
 195        ...message.message,
 196        content: newContent,
 197      },
 198    } as typeof message
 199  })
 200}
 201
 202/**
 203 * Strip attachment types that are re-injected post-compaction anyway.
 204 * skill_discovery/skill_listing are re-surfaced by resetSentSkillNames()
 205 * + the next turn's discovery signal, so feeding them to the summarizer
 206 * wastes tokens and pollutes the summary with stale skill suggestions.
 207 *
 208 * No-op when EXPERIMENTAL_SKILL_SEARCH is off (the attachment types
 209 * don't exist on external builds).
 210 */
 211export function stripReinjectedAttachments(messages: Message[]): Message[] {
 212  if (feature('EXPERIMENTAL_SKILL_SEARCH')) {
 213    return messages.filter(
 214      m =>
 215        !(
 216          m.type === 'attachment' &&
 217          (m.attachment.type === 'skill_discovery' ||
 218            m.attachment.type === 'skill_listing')
 219        ),
 220    )
 221  }
 222  return messages
 223}
 224
 225export const ERROR_MESSAGE_NOT_ENOUGH_MESSAGES =
 226  'Not enough messages to compact.'
 227const MAX_PTL_RETRIES = 3
 228const PTL_RETRY_MARKER = '[earlier conversation truncated for compaction retry]'
 229
 230/**
 231 * Drops the oldest API-round groups from messages until tokenGap is covered.
 232 * Falls back to dropping 20% of groups when the gap is unparseable (some
 233 * Vertex/Bedrock error formats). Returns null when nothing can be dropped
 234 * without leaving an empty summarize set.
 235 *
 236 * This is the last-resort escape hatch for CC-1180 — when the compact request
 237 * itself hits prompt-too-long, the user is otherwise stuck. Dropping the
 238 * oldest context is lossy but unblocks them. The reactive-compact path
 239 * (compactMessages.ts) has the proper retry loop that peels from the tail;
 240 * this helper is the dumb-but-safe fallback for the proactive/manual path
 241 * that wasn't migrated in bfdb472f's unification.
 242 */
 243export function truncateHeadForPTLRetry(
 244  messages: Message[],
 245  ptlResponse: AssistantMessage,
 246): Message[] | null {
 247  // Strip our own synthetic marker from a previous retry before grouping.
 248  // Otherwise it becomes its own group 0 and the 20% fallback stalls
 249  // (drops only the marker, re-adds it, zero progress on retry 2+).
 250  const input =
 251    messages[0]?.type === 'user' &&
 252    messages[0].isMeta &&
 253    messages[0].message.content === PTL_RETRY_MARKER
 254      ? messages.slice(1)
 255      : messages
 256
 257  const groups = groupMessagesByApiRound(input)
 258  if (groups.length < 2) return null
 259
 260  const tokenGap = getPromptTooLongTokenGap(ptlResponse)
 261  let dropCount: number
 262  if (tokenGap !== undefined) {
 263    let acc = 0
 264    dropCount = 0
 265    for (const g of groups) {
 266      acc += roughTokenCountEstimationForMessages(g)
 267      dropCount++
 268      if (acc >= tokenGap) break
 269    }
 270  } else {
 271    dropCount = Math.max(1, Math.floor(groups.length * 0.2))
 272  }
 273
 274  // Keep at least one group so there's something to summarize.
 275  dropCount = Math.min(dropCount, groups.length - 1)
 276  if (dropCount < 1) return null
 277
 278  const sliced = groups.slice(dropCount).flat()
 279  // groupMessagesByApiRound puts the preamble in group 0 and starts every
 280  // subsequent group with an assistant message. Dropping group 0 leaves an
 281  // assistant-first sequence which the API rejects (first message must be
 282  // role=user). Prepend a synthetic user marker — ensureToolResultPairing
 283  // already handles any orphaned tool_results this creates.
 284  if (sliced[0]?.type === 'assistant') {
 285    return [
 286      createUserMessage({ content: PTL_RETRY_MARKER, isMeta: true }),
 287      ...sliced,
 288    ]
 289  }
 290  return sliced
 291}
 292
 293export const ERROR_MESSAGE_PROMPT_TOO_LONG =
 294  'Conversation too long. Press esc twice to go up a few messages and try again.'
 295export const ERROR_MESSAGE_USER_ABORT = 'API Error: Request was aborted.'
 296export const ERROR_MESSAGE_INCOMPLETE_RESPONSE =
 297  'Compaction interrupted · This may be due to network issues — please try again.'
 298
 299export interface CompactionResult {
 300  boundaryMarker: SystemMessage
 301  summaryMessages: UserMessage[]
 302  attachments: AttachmentMessage[]
 303  hookResults: HookResultMessage[]
 304  messagesToKeep?: Message[]
 305  userDisplayMessage?: string
 306  preCompactTokenCount?: number
 307  postCompactTokenCount?: number
 308  truePostCompactTokenCount?: number
 309  compactionUsage?: ReturnType<typeof getTokenUsage>
 310}
 311
 312/**
 313 * Diagnosis context passed from autoCompactIfNeeded into compactConversation.
 314 * Lets the tengu_compact event disambiguate same-chain loops (H2) from
 315 * cross-agent (H1/H5) and manual-vs-auto (H3) compactions without joins.
 316 */
 317export type RecompactionInfo = {
 318  isRecompactionInChain: boolean
 319  turnsSincePreviousCompact: number
 320  previousCompactTurnId?: string
 321  autoCompactThreshold: number
 322  querySource?: QuerySource
 323}
 324
 325/**
 326 * Build the base post-compact messages array from a CompactionResult.
 327 * This ensures consistent ordering across all compaction paths.
 328 * Order: boundaryMarker, summaryMessages, messagesToKeep, attachments, hookResults
 329 */
 330export function buildPostCompactMessages(result: CompactionResult): Message[] {
 331  return [
 332    result.boundaryMarker,
 333    ...result.summaryMessages,
 334    ...(result.messagesToKeep ?? []),
 335    ...result.attachments,
 336    ...result.hookResults,
 337  ]
 338}
 339
 340/**
 341 * Annotate a compact boundary with relink metadata for messagesToKeep.
 342 * Preserved messages keep their original parentUuids on disk (dedup-skipped);
 343 * the loader uses this to patch head→anchor and anchor's-other-children→tail.
 344 *
 345 * `anchorUuid` = what sits immediately before keep[0] in the desired chain:
 346 *   - suffix-preserving (reactive/session-memory): last summary message
 347 *   - prefix-preserving (partial compact): the boundary itself
 348 */
 349export function annotateBoundaryWithPreservedSegment(
 350  boundary: SystemCompactBoundaryMessage,
 351  anchorUuid: UUID,
 352  messagesToKeep: readonly Message[] | undefined,
 353): SystemCompactBoundaryMessage {
 354  const keep = messagesToKeep ?? []
 355  if (keep.length === 0) return boundary
 356  return {
 357    ...boundary,
 358    compactMetadata: {
 359      ...boundary.compactMetadata,
 360      preservedSegment: {
 361        headUuid: keep[0]!.uuid,
 362        anchorUuid,
 363        tailUuid: keep.at(-1)!.uuid,
 364      },
 365    },
 366  }
 367}
 368
 369/**
 370 * Merges user-supplied custom instructions with hook-provided instructions.
 371 * User instructions come first; hook instructions are appended.
 372 * Empty strings normalize to undefined.
 373 */
 374export function mergeHookInstructions(
 375  userInstructions: string | undefined,
 376  hookInstructions: string | undefined,
 377): string | undefined {
 378  if (!hookInstructions) return userInstructions || undefined
 379  if (!userInstructions) return hookInstructions
 380  return `${userInstructions}\n\n${hookInstructions}`
 381}
 382
 383/**
 384 * Creates a compact version of a conversation by summarizing older messages
 385 * and preserving recent conversation history.
 386 */
 387export async function compactConversation(
 388  messages: Message[],
 389  context: ToolUseContext,
 390  cacheSafeParams: CacheSafeParams,
 391  suppressFollowUpQuestions: boolean,
 392  customInstructions?: string,
 393  isAutoCompact: boolean = false,
 394  recompactionInfo?: RecompactionInfo,
 395): Promise<CompactionResult> {
 396  try {
 397    if (messages.length === 0) {
 398      throw new Error(ERROR_MESSAGE_NOT_ENOUGH_MESSAGES)
 399    }
 400
 401    const preCompactTokenCount = tokenCountWithEstimation(messages)
 402
 403    const appState = context.getAppState()
 404    void logPermissionContextForAnts(appState.toolPermissionContext, 'summary')
 405
 406    context.onCompactProgress?.({
 407      type: 'hooks_start',
 408      hookType: 'pre_compact',
 409    })
 410
 411    // Execute PreCompact hooks
 412    context.setSDKStatus?.('compacting')
 413    const hookResult = await executePreCompactHooks(
 414      {
 415        trigger: isAutoCompact ? 'auto' : 'manual',
 416        customInstructions: customInstructions ?? null,
 417      },
 418      context.abortController.signal,
 419    )
 420    customInstructions = mergeHookInstructions(
 421      customInstructions,
 422      hookResult.newCustomInstructions,
 423    )
 424    const userDisplayMessage = hookResult.userDisplayMessage
 425
 426    // Show requesting mode with up arrow and custom message
 427    context.setStreamMode?.('requesting')
 428    context.setResponseLength?.(() => 0)
 429    context.onCompactProgress?.({ type: 'compact_start' })
 430
 431    // 3P default: true — forked-agent path reuses main conversation's prompt cache.
 432    // Experiment (Jan 2026) confirmed: false path is 98% cache miss, costs ~0.76% of
 433    // fleet cache_creation (~38B tok/day), concentrated in ephemeral envs (CCR/GHA/SDK)
 434    // with cold GB cache and 3P providers where GB is disabled. GB gate kept as kill-switch.
 435    const promptCacheSharingEnabled = getFeatureValue_CACHED_MAY_BE_STALE(
 436      'tengu_compact_cache_prefix',
 437      true,
 438    )
 439
 440    const compactPrompt = getCompactPrompt(customInstructions)
 441    const summaryRequest = createUserMessage({
 442      content: compactPrompt,
 443    })
 444
 445    let messagesToSummarize = messages
 446    let retryCacheSafeParams = cacheSafeParams
 447    let summaryResponse: AssistantMessage
 448    let summary: string | null
 449    let ptlAttempts = 0
 450    for (;;) {
 451      summaryResponse = await streamCompactSummary({
 452        messages: messagesToSummarize,
 453        summaryRequest,
 454        appState,
 455        context,
 456        preCompactTokenCount,
 457        cacheSafeParams: retryCacheSafeParams,
 458      })
 459      summary = getAssistantMessageText(summaryResponse)
 460      if (!summary?.startsWith(PROMPT_TOO_LONG_ERROR_MESSAGE)) break
 461
 462      // CC-1180: compact request itself hit prompt-too-long. Truncate the
 463      // oldest API-round groups and retry rather than leaving the user stuck.
 464      ptlAttempts++
 465      const truncated =
 466        ptlAttempts <= MAX_PTL_RETRIES
 467          ? truncateHeadForPTLRetry(messagesToSummarize, summaryResponse)
 468          : null
 469      if (!truncated) {
 470        logEvent('tengu_compact_failed', {
 471          reason:
 472            'prompt_too_long' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
 473          preCompactTokenCount,
 474          promptCacheSharingEnabled,
 475          ptlAttempts,
 476        })
 477        throw new Error(ERROR_MESSAGE_PROMPT_TOO_LONG)
 478      }
 479      logEvent('tengu_compact_ptl_retry', {
 480        attempt: ptlAttempts,
 481        droppedMessages: messagesToSummarize.length - truncated.length,
 482        remainingMessages: truncated.length,
 483      })
 484      messagesToSummarize = truncated
 485      // The forked-agent path reads from cacheSafeParams.forkContextMessages,
 486      // not the messages param — thread the truncated set through both paths.
 487      retryCacheSafeParams = {
 488        ...retryCacheSafeParams,
 489        forkContextMessages: truncated,
 490      }
 491    }
 492
 493    if (!summary) {
 494      logForDebugging(
 495        `Compact failed: no summary text in response. Response: ${jsonStringify(summaryResponse)}`,
 496        { level: 'error' },
 497      )
 498      logEvent('tengu_compact_failed', {
 499        reason:
 500          'no_summary' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
 501        preCompactTokenCount,
 502        promptCacheSharingEnabled,
 503      })
 504      throw new Error(
 505        `Failed to generate conversation summary - response did not contain valid text content`,
 506      )
 507    } else if (startsWithApiErrorPrefix(summary)) {
 508      logEvent('tengu_compact_failed', {
 509        reason:
 510          'api_error' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
 511        preCompactTokenCount,
 512        promptCacheSharingEnabled,
 513      })
 514      throw new Error(summary)
 515    }
 516
 517    // Store the current file state before clearing
 518    const preCompactReadFileState = cacheToObject(context.readFileState)
 519
 520    // Clear the cache
 521    context.readFileState.clear()
 522    context.loadedNestedMemoryPaths?.clear()
 523
 524    // Intentionally NOT resetting sentSkillNames: re-injecting the full
 525    // skill_listing (~4K tokens) post-compact is pure cache_creation with
 526    // marginal benefit. The model still has SkillTool in its schema and
 527    // invoked_skills attachment (below) preserves used-skill content. Ants
 528    // with EXPERIMENTAL_SKILL_SEARCH already skip re-injection via the
 529    // early-return in getSkillListingAttachments.
 530
 531    // Run async attachment generation in parallel
 532    const [fileAttachments, asyncAgentAttachments] = await Promise.all([
 533      createPostCompactFileAttachments(
 534        preCompactReadFileState,
 535        context,
 536        POST_COMPACT_MAX_FILES_TO_RESTORE,
 537      ),
 538      createAsyncAgentAttachmentsIfNeeded(context),
 539    ])
 540
 541    const postCompactFileAttachments: AttachmentMessage[] = [
 542      ...fileAttachments,
 543      ...asyncAgentAttachments,
 544    ]
 545    const planAttachment = createPlanAttachmentIfNeeded(context.agentId)
 546    if (planAttachment) {
 547      postCompactFileAttachments.push(planAttachment)
 548    }
 549
 550    // Add plan mode instructions if currently in plan mode, so the model
 551    // continues operating in plan mode after compaction
 552    const planModeAttachment = await createPlanModeAttachmentIfNeeded(context)
 553    if (planModeAttachment) {
 554      postCompactFileAttachments.push(planModeAttachment)
 555    }
 556
 557    // Add skill attachment if skills were invoked in this session
 558    const skillAttachment = createSkillAttachmentIfNeeded(context.agentId)
 559    if (skillAttachment) {
 560      postCompactFileAttachments.push(skillAttachment)
 561    }
 562
 563    // Compaction ate prior delta attachments. Re-announce from the current
 564    // state so the model has tool/instruction context on the first
 565    // post-compact turn. Empty message history → diff against nothing →
 566    // announces the full set.
 567    for (const att of getDeferredToolsDeltaAttachment(
 568      context.options.tools,
 569      context.options.mainLoopModel,
 570      [],
 571      { callSite: 'compact_full' },
 572    )) {
 573      postCompactFileAttachments.push(createAttachmentMessage(att))
 574    }
 575    for (const att of getAgentListingDeltaAttachment(context, [])) {
 576      postCompactFileAttachments.push(createAttachmentMessage(att))
 577    }
 578    for (const att of getMcpInstructionsDeltaAttachment(
 579      context.options.mcpClients,
 580      context.options.tools,
 581      context.options.mainLoopModel,
 582      [],
 583    )) {
 584      postCompactFileAttachments.push(createAttachmentMessage(att))
 585    }
 586
 587    context.onCompactProgress?.({
 588      type: 'hooks_start',
 589      hookType: 'session_start',
 590    })
 591    // Execute SessionStart hooks after successful compaction
 592    const hookMessages = await processSessionStartHooks('compact', {
 593      model: context.options.mainLoopModel,
 594    })
 595
 596    // Create the compact boundary marker and summary messages before the
 597    // event so we can compute the true resulting-context size.
 598    const boundaryMarker = createCompactBoundaryMessage(
 599      isAutoCompact ? 'auto' : 'manual',
 600      preCompactTokenCount ?? 0,
 601      messages.at(-1)?.uuid,
 602    )
 603    // Carry loaded-tool state — the summary doesn't preserve tool_reference
 604    // blocks, so the post-compact schema filter needs this to keep sending
 605    // already-loaded deferred tool schemas to the API.
 606    const preCompactDiscovered = extractDiscoveredToolNames(messages)
 607    if (preCompactDiscovered.size > 0) {
 608      boundaryMarker.compactMetadata.preCompactDiscoveredTools = [
 609        ...preCompactDiscovered,
 610      ].sort()
 611    }
 612
 613    const transcriptPath = getTranscriptPath()
 614    const summaryMessages: UserMessage[] = [
 615      createUserMessage({
 616        content: getCompactUserSummaryMessage(
 617          summary,
 618          suppressFollowUpQuestions,
 619          transcriptPath,
 620        ),
 621        isCompactSummary: true,
 622        isVisibleInTranscriptOnly: true,
 623      }),
 624    ]
 625
 626    // Previously "postCompactTokenCount" — renamed because this is the
 627    // compact API call's total usage (input_tokens ≈ preCompactTokenCount),
 628    // NOT the size of the resulting context. Kept for event-field continuity.
 629    const compactionCallTotalTokens = tokenCountFromLastAPIResponse([
 630      summaryResponse,
 631    ])
 632
 633    // Message-payload estimate of the resulting context. The next iteration's
 634    // shouldAutoCompact will see this PLUS ~20-40K for system prompt + tools +
 635    // userContext (via API usage.input_tokens). So `willRetriggerNextTurn: true`
 636    // is a strong signal; `false` may still retrigger when this is close to threshold.
 637    const truePostCompactTokenCount = roughTokenCountEstimationForMessages([
 638      boundaryMarker,
 639      ...summaryMessages,
 640      ...postCompactFileAttachments,
 641      ...hookMessages,
 642    ])
 643
 644    // Extract compaction API usage metrics
 645    const compactionUsage = getTokenUsage(summaryResponse)
 646
 647    const querySourceForEvent =
 648      recompactionInfo?.querySource ?? context.options.querySource ?? 'unknown'
 649
 650    logEvent('tengu_compact', {
 651      preCompactTokenCount,
 652      // Kept for continuity — semantically the compact API call's total usage
 653      postCompactTokenCount: compactionCallTotalTokens,
 654      truePostCompactTokenCount,
 655      autoCompactThreshold: recompactionInfo?.autoCompactThreshold ?? -1,
 656      willRetriggerNextTurn:
 657        recompactionInfo !== undefined &&
 658        truePostCompactTokenCount >= recompactionInfo.autoCompactThreshold,
 659      isAutoCompact,
 660      querySource:
 661        querySourceForEvent as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
 662      queryChainId: (context.queryTracking?.chainId ??
 663        '') as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
 664      queryDepth: context.queryTracking?.depth ?? -1,
 665      isRecompactionInChain: recompactionInfo?.isRecompactionInChain ?? false,
 666      turnsSincePreviousCompact:
 667        recompactionInfo?.turnsSincePreviousCompact ?? -1,
 668      previousCompactTurnId: (recompactionInfo?.previousCompactTurnId ??
 669        '') as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
 670      compactionInputTokens: compactionUsage?.input_tokens,
 671      compactionOutputTokens: compactionUsage?.output_tokens,
 672      compactionCacheReadTokens: compactionUsage?.cache_read_input_tokens ?? 0,
 673      compactionCacheCreationTokens:
 674        compactionUsage?.cache_creation_input_tokens ?? 0,
 675      compactionTotalTokens: compactionUsage
 676        ? compactionUsage.input_tokens +
 677          (compactionUsage.cache_creation_input_tokens ?? 0) +
 678          (compactionUsage.cache_read_input_tokens ?? 0) +
 679          compactionUsage.output_tokens
 680        : 0,
 681      promptCacheSharingEnabled,
 682      // analyzeContext walks every content block (~11ms on a 4.5K-message
 683      // session) purely for this telemetry breakdown. Computed here, past
 684      // the compaction-API await, so the sync walk doesn't starve the
 685      // render loop before compaction even starts. Same deferral pattern
 686      // as reactiveCompact.ts.
 687      ...(() => {
 688        try {
 689          return tokenStatsToStatsigMetrics(analyzeContext(messages))
 690        } catch (error) {
 691          logError(error as Error)
 692          return {}
 693        }
 694      })(),
 695    })
 696
 697    // Reset cache read baseline so the post-compact drop isn't flagged as a break
 698    if (feature('PROMPT_CACHE_BREAK_DETECTION')) {
 699      notifyCompaction(
 700        context.options.querySource ?? 'compact',
 701        context.agentId,
 702      )
 703    }
 704    markPostCompaction()
 705
 706    // Re-append session metadata (custom title, tag) so it stays within
 707    // the 16KB tail window that readLiteMetadata reads for --resume display.
 708    // Without this, enough post-compaction messages push the metadata entry
 709    // out of the window, causing --resume to show the auto-generated title
 710    // instead of the user-set session name.
 711    reAppendSessionMetadata()
 712
 713    // Write a reduced transcript segment for the pre-compaction messages
 714    // (assistant mode only). Fire-and-forget — errors are logged internally.
 715    if (feature('KAIROS')) {
 716      void sessionTranscriptModule?.writeSessionTranscriptSegment(messages)
 717    }
 718
 719    context.onCompactProgress?.({
 720      type: 'hooks_start',
 721      hookType: 'post_compact',
 722    })
 723    const postCompactHookResult = await executePostCompactHooks(
 724      {
 725        trigger: isAutoCompact ? 'auto' : 'manual',
 726        compactSummary: summary,
 727      },
 728      context.abortController.signal,
 729    )
 730
 731    const combinedUserDisplayMessage = [
 732      userDisplayMessage,
 733      postCompactHookResult.userDisplayMessage,
 734    ]
 735      .filter(Boolean)
 736      .join('\n')
 737
 738    return {
 739      boundaryMarker,
 740      summaryMessages,
 741      attachments: postCompactFileAttachments,
 742      hookResults: hookMessages,
 743      userDisplayMessage: combinedUserDisplayMessage || undefined,
 744      preCompactTokenCount,
 745      postCompactTokenCount: compactionCallTotalTokens,
 746      truePostCompactTokenCount,
 747      compactionUsage,
 748    }
 749  } catch (error) {
 750    // Only show the error notification for manual /compact.
 751    // Auto-compact failures are retried on the next turn and the
 752    // notification is confusing when compaction eventually succeeds.
 753    if (!isAutoCompact) {
 754      addErrorNotificationIfNeeded(error, context)
 755    }
 756    throw error
 757  } finally {
 758    context.setStreamMode?.('requesting')
 759    context.setResponseLength?.(() => 0)
 760    context.onCompactProgress?.({ type: 'compact_end' })
 761    context.setSDKStatus?.(null)
 762  }
 763}
 764
 765/**
 766 * Performs a partial compaction around the selected message index.
 767 * Direction 'from': summarizes messages after the index, keeps earlier ones.
 768 *   Prompt cache for kept (earlier) messages is preserved.
 769 * Direction 'up_to': summarizes messages before the index, keeps later ones.
 770 *   Prompt cache is invalidated since the summary precedes the kept messages.
 771 */
 772export async function partialCompactConversation(
 773  allMessages: Message[],
 774  pivotIndex: number,
 775  context: ToolUseContext,
 776  cacheSafeParams: CacheSafeParams,
 777  userFeedback?: string,
 778  direction: PartialCompactDirection = 'from',
 779): Promise<CompactionResult> {
 780  try {
 781    const messagesToSummarize =
 782      direction === 'up_to'
 783        ? allMessages.slice(0, pivotIndex)
 784        : allMessages.slice(pivotIndex)
 785    // 'up_to' must strip old compact boundaries/summaries: for 'up_to',
 786    // summary_B sits BEFORE kept, so a stale boundary_A in kept wins
 787    // findLastCompactBoundaryIndex's backward scan and drops summary_B.
 788    // 'from' keeps them: summary_B sits AFTER kept (backward scan still
 789    // works), and removing an old summary would lose its covered history.
 790    const messagesToKeep =
 791      direction === 'up_to'
 792        ? allMessages
 793            .slice(pivotIndex)
 794            .filter(
 795              m =>
 796                m.type !== 'progress' &&
 797                !isCompactBoundaryMessage(m) &&
 798                !(m.type === 'user' && m.isCompactSummary),
 799            )
 800        : allMessages.slice(0, pivotIndex).filter(m => m.type !== 'progress')
 801
 802    if (messagesToSummarize.length === 0) {
 803      throw new Error(
 804        direction === 'up_to'
 805          ? 'Nothing to summarize before the selected message.'
 806          : 'Nothing to summarize after the selected message.',
 807      )
 808    }
 809
 810    const preCompactTokenCount = tokenCountWithEstimation(allMessages)
 811
 812    context.onCompactProgress?.({
 813      type: 'hooks_start',
 814      hookType: 'pre_compact',
 815    })
 816
 817    context.setSDKStatus?.('compacting')
 818    const hookResult = await executePreCompactHooks(
 819      {
 820        trigger: 'manual',
 821        customInstructions: null,
 822      },
 823      context.abortController.signal,
 824    )
 825
 826    // Merge hook instructions with user feedback
 827    let customInstructions: string | undefined
 828    if (hookResult.newCustomInstructions && userFeedback) {
 829      customInstructions = `${hookResult.newCustomInstructions}\n\nUser context: ${userFeedback}`
 830    } else if (hookResult.newCustomInstructions) {
 831      customInstructions = hookResult.newCustomInstructions
 832    } else if (userFeedback) {
 833      customInstructions = `User context: ${userFeedback}`
 834    }
 835
 836    context.setStreamMode?.('requesting')
 837    context.setResponseLength?.(() => 0)
 838    context.onCompactProgress?.({ type: 'compact_start' })
 839
 840    const compactPrompt = getPartialCompactPrompt(customInstructions, direction)
 841    const summaryRequest = createUserMessage({
 842      content: compactPrompt,
 843    })
 844
 845    const failureMetadata = {
 846      preCompactTokenCount,
 847      direction:
 848        direction as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
 849      messagesSummarized: messagesToSummarize.length,
 850    }
 851
 852    // 'up_to' prefix hits cache directly; 'from' sends all (tail wouldn't cache).
 853    // PTL retry breaks the cache prefix but unblocks the user (CC-1180).
 854    let apiMessages = direction === 'up_to' ? messagesToSummarize : allMessages
 855    let retryCacheSafeParams =
 856      direction === 'up_to'
 857        ? { ...cacheSafeParams, forkContextMessages: messagesToSummarize }
 858        : cacheSafeParams
 859    let summaryResponse: AssistantMessage
 860    let summary: string | null
 861    let ptlAttempts = 0
 862    for (;;) {
 863      summaryResponse = await streamCompactSummary({
 864        messages: apiMessages,
 865        summaryRequest,
 866        appState: context.getAppState(),
 867        context,
 868        preCompactTokenCount,
 869        cacheSafeParams: retryCacheSafeParams,
 870      })
 871      summary = getAssistantMessageText(summaryResponse)
 872      if (!summary?.startsWith(PROMPT_TOO_LONG_ERROR_MESSAGE)) break
 873
 874      ptlAttempts++
 875      const truncated =
 876        ptlAttempts <= MAX_PTL_RETRIES
 877          ? truncateHeadForPTLRetry(apiMessages, summaryResponse)
 878          : null
 879      if (!truncated) {
 880        logEvent('tengu_partial_compact_failed', {
 881          reason:
 882            'prompt_too_long' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
 883          ...failureMetadata,
 884          ptlAttempts,
 885        })
 886        throw new Error(ERROR_MESSAGE_PROMPT_TOO_LONG)
 887      }
 888      logEvent('tengu_compact_ptl_retry', {
 889        attempt: ptlAttempts,
 890        droppedMessages: apiMessages.length - truncated.length,
 891        remainingMessages: truncated.length,
 892        path: 'partial' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
 893      })
 894      apiMessages = truncated
 895      retryCacheSafeParams = {
 896        ...retryCacheSafeParams,
 897        forkContextMessages: truncated,
 898      }
 899    }
 900    if (!summary) {
 901      logEvent('tengu_partial_compact_failed', {
 902        reason:
 903          'no_summary' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
 904        ...failureMetadata,
 905      })
 906      throw new Error(
 907        'Failed to generate conversation summary - response did not contain valid text content',
 908      )
 909    } else if (startsWithApiErrorPrefix(summary)) {
 910      logEvent('tengu_partial_compact_failed', {
 911        reason:
 912          'api_error' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
 913        ...failureMetadata,
 914      })
 915      throw new Error(summary)
 916    }
 917
 918    // Store the current file state before clearing
 919    const preCompactReadFileState = cacheToObject(context.readFileState)
 920    context.readFileState.clear()
 921    context.loadedNestedMemoryPaths?.clear()
 922    // Intentionally NOT resetting sentSkillNames — see compactConversation()
 923    // for rationale (~4K tokens saved per compact event).
 924
 925    const [fileAttachments, asyncAgentAttachments] = await Promise.all([
 926      createPostCompactFileAttachments(
 927        preCompactReadFileState,
 928        context,
 929        POST_COMPACT_MAX_FILES_TO_RESTORE,
 930        messagesToKeep,
 931      ),
 932      createAsyncAgentAttachmentsIfNeeded(context),
 933    ])
 934
 935    const postCompactFileAttachments: AttachmentMessage[] = [
 936      ...fileAttachments,
 937      ...asyncAgentAttachments,
 938    ]
 939    const planAttachment = createPlanAttachmentIfNeeded(context.agentId)
 940    if (planAttachment) {
 941      postCompactFileAttachments.push(planAttachment)
 942    }
 943
 944    // Add plan mode instructions if currently in plan mode
 945    const planModeAttachment = await createPlanModeAttachmentIfNeeded(context)
 946    if (planModeAttachment) {
 947      postCompactFileAttachments.push(planModeAttachment)
 948    }
 949
 950    const skillAttachment = createSkillAttachmentIfNeeded(context.agentId)
 951    if (skillAttachment) {
 952      postCompactFileAttachments.push(skillAttachment)
 953    }
 954
 955    // Re-announce only what was in the summarized portion — messagesToKeep
 956    // is scanned, so anything already announced there is skipped.
 957    for (const att of getDeferredToolsDeltaAttachment(
 958      context.options.tools,
 959      context.options.mainLoopModel,
 960      messagesToKeep,
 961      { callSite: 'compact_partial' },
 962    )) {
 963      postCompactFileAttachments.push(createAttachmentMessage(att))
 964    }
 965    for (const att of getAgentListingDeltaAttachment(context, messagesToKeep)) {
 966      postCompactFileAttachments.push(createAttachmentMessage(att))
 967    }
 968    for (const att of getMcpInstructionsDeltaAttachment(
 969      context.options.mcpClients,
 970      context.options.tools,
 971      context.options.mainLoopModel,
 972      messagesToKeep,
 973    )) {
 974      postCompactFileAttachments.push(createAttachmentMessage(att))
 975    }
 976
 977    context.onCompactProgress?.({
 978      type: 'hooks_start',
 979      hookType: 'session_start',
 980    })
 981    const hookMessages = await processSessionStartHooks('compact', {
 982      model: context.options.mainLoopModel,
 983    })
 984
 985    const postCompactTokenCount = tokenCountFromLastAPIResponse([
 986      summaryResponse,
 987    ])
 988    const compactionUsage = getTokenUsage(summaryResponse)
 989
 990    logEvent('tengu_partial_compact', {
 991      preCompactTokenCount,
 992      postCompactTokenCount,
 993      messagesKept: messagesToKeep.length,
 994      messagesSummarized: messagesToSummarize.length,
 995      direction:
 996        direction as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
 997      hasUserFeedback: !!userFeedback,
 998      trigger:
 999        'message_selector' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
1000      compactionInputTokens: compactionUsage?.input_tokens,
1001      compactionOutputTokens: compactionUsage?.output_tokens,
1002      compactionCacheReadTokens: compactionUsage?.cache_read_input_tokens ?? 0,
1003      compactionCacheCreationTokens:
1004        compactionUsage?.cache_creation_input_tokens ?? 0,
1005    })
1006
1007    // Progress messages aren't loggable, so forkSessionImpl would null out
1008    // a logicalParentUuid pointing at one. Both directions skip them.
1009    const lastPreCompactUuid =
1010      direction === 'up_to'
1011        ? allMessages.slice(0, pivotIndex).findLast(m => m.type !== 'progress')
1012            ?.uuid
1013        : messagesToKeep.at(-1)?.uuid
1014    const boundaryMarker = createCompactBoundaryMessage(
1015      'manual',
1016      preCompactTokenCount ?? 0,
1017      lastPreCompactUuid,
1018      userFeedback,
1019      messagesToSummarize.length,
1020    )
1021    // allMessages not just messagesToSummarize — set union is idempotent,
1022    // simpler than tracking which half each tool lived in.
1023    const preCompactDiscovered = extractDiscoveredToolNames(allMessages)
1024    if (preCompactDiscovered.size > 0) {
1025      boundaryMarker.compactMetadata.preCompactDiscoveredTools = [
1026        ...preCompactDiscovered,
1027      ].sort()
1028    }
1029
1030    const transcriptPath = getTranscriptPath()
1031    const summaryMessages: UserMessage[] = [
1032      createUserMessage({
1033        content: getCompactUserSummaryMessage(summary, false, transcriptPath),
1034        isCompactSummary: true,
1035        ...(messagesToKeep.length > 0
1036          ? {
1037              summarizeMetadata: {
1038                messagesSummarized: messagesToSummarize.length,
1039                userContext: userFeedback,
1040                direction,
1041              },
1042            }
1043          : { isVisibleInTranscriptOnly: true as const }),
1044      }),
1045    ]
1046
1047    if (feature('PROMPT_CACHE_BREAK_DETECTION')) {
1048      notifyCompaction(
1049        context.options.querySource ?? 'compact',
1050        context.agentId,
1051      )
1052    }
1053    markPostCompaction()
1054
1055    // Re-append session metadata (custom title, tag) so it stays within
1056    // the 16KB tail window that readLiteMetadata reads for --resume display.
1057    reAppendSessionMetadata()
1058
1059    if (feature('KAIROS')) {
1060      void sessionTranscriptModule?.writeSessionTranscriptSegment(
1061        messagesToSummarize,
1062      )
1063    }
1064
1065    context.onCompactProgress?.({
1066      type: 'hooks_start',
1067      hookType: 'post_compact',
1068    })
1069    const postCompactHookResult = await executePostCompactHooks(
1070      {
1071        trigger: 'manual',
1072        compactSummary: summary,
1073      },
1074      context.abortController.signal,
1075    )
1076
1077    // 'from': prefix-preserving → boundary; 'up_to': suffix → last summary
1078    const anchorUuid =
1079      direction === 'up_to'
1080        ? (summaryMessages.at(-1)?.uuid ?? boundaryMarker.uuid)
1081        : boundaryMarker.uuid
1082    return {
1083      boundaryMarker: annotateBoundaryWithPreservedSegment(
1084        boundaryMarker,
1085        anchorUuid,
1086        messagesToKeep,
1087      ),
1088      summaryMessages,
1089      messagesToKeep,
1090      attachments: postCompactFileAttachments,
1091      hookResults: hookMessages,
1092      userDisplayMessage: postCompactHookResult.userDisplayMessage,
1093      preCompactTokenCount,
1094      postCompactTokenCount,
1095      compactionUsage,
1096    }
1097  } catch (error) {
1098    addErrorNotificationIfNeeded(error, context)
1099    throw error
1100  } finally {
1101    context.setStreamMode?.('requesting')
1102    context.setResponseLength?.(() => 0)
1103    context.onCompactProgress?.({ type: 'compact_end' })
1104    context.setSDKStatus?.(null)
1105  }
1106}
1107
1108function addErrorNotificationIfNeeded(
1109  error: unknown,
1110  context: Pick<ToolUseContext, 'addNotification'>,
1111) {
1112  if (
1113    !hasExactErrorMessage(error, ERROR_MESSAGE_USER_ABORT) &&
1114    !hasExactErrorMessage(error, ERROR_MESSAGE_NOT_ENOUGH_MESSAGES)
1115  ) {
1116    context.addNotification?.({
1117      key: 'error-compacting-conversation',
1118      text: 'Error compacting conversation',
1119      priority: 'immediate',
1120      color: 'error',
1121    })
1122  }
1123}
1124
1125export function createCompactCanUseTool(): CanUseToolFn {
1126  return async () => ({
1127    behavior: 'deny' as const,
1128    message: 'Tool use is not allowed during compaction',
1129    decisionReason: {
1130      type: 'other' as const,
1131      reason: 'compaction agent should only produce text summary',
1132    },
1133  })
1134}
1135
1136async function streamCompactSummary({
1137  messages,
1138  summaryRequest,
1139  appState,
1140  context,
1141  preCompactTokenCount,
1142  cacheSafeParams,
1143}: {
1144  messages: Message[]
1145  summaryRequest: UserMessage
1146  appState: Awaited<ReturnType<ToolUseContext['getAppState']>>
1147  context: ToolUseContext
1148  preCompactTokenCount: number
1149  cacheSafeParams: CacheSafeParams
1150}): Promise<AssistantMessage> {
1151  // When prompt cache sharing is enabled, use forked agent to reuse the
1152  // main conversation's cached prefix (system prompt, tools, context messages).
1153  // Falls back to regular streaming path on failure.
1154  // 3P default: true — see comment at the other tengu_compact_cache_prefix read above.
1155  const promptCacheSharingEnabled = getFeatureValue_CACHED_MAY_BE_STALE(
1156    'tengu_compact_cache_prefix',
1157    true,
1158  )
1159  // Send keep-alive signals during compaction to prevent remote session
1160  // WebSocket idle timeouts from dropping bridge connections. Compaction
1161  // API calls can take 5-10+ seconds, during which no other messages
1162  // flow through the transport — without keep-alives, the server may
1163  // close the WebSocket for inactivity.
1164  // Two signals: (1) PUT /worker heartbeat via sessionActivity, and
1165  // (2) re-emit 'compacting' status so the SDK event stream stays active
1166  // and the server doesn't consider the session stale.
1167  const activityInterval = isSessionActivityTrackingActive()
1168    ? setInterval(
1169        (statusSetter?: (status: 'compacting' | null) => void) => {
1170          sendSessionActivitySignal()
1171          statusSetter?.('compacting')
1172        },
1173        30_000,
1174        context.setSDKStatus,
1175      )
1176    : undefined
1177
1178  try {
1179    if (promptCacheSharingEnabled) {
1180      try {
1181        // DO NOT set maxOutputTokens here. The fork piggybacks on the main thread's
1182        // prompt cache by sending identical cache-key params (system, tools, model,
1183        // messages prefix, thinking config). Setting maxOutputTokens would clamp
1184        // budget_tokens via Math.min(budget, maxOutputTokens-1) in claude.ts,
1185        // creating a thinking config mismatch that invalidates the cache.
1186        // The streaming fallback path (below) can safely set maxOutputTokensOverride
1187        // since it doesn't share cache with the main thread.
1188        const result = await runForkedAgent({
1189          promptMessages: [summaryRequest],
1190          cacheSafeParams,
1191          canUseTool: createCompactCanUseTool(),
1192          querySource: 'compact',
1193          forkLabel: 'compact',
1194          maxTurns: 1,
1195          skipCacheWrite: true,
1196          // Pass the compact context's abortController so user Esc aborts the
1197          // fork — same signal the streaming fallback uses at
1198          // `signal: context.abortController.signal` below.
1199          overrides: { abortController: context.abortController },
1200        })
1201        const assistantMsg = getLastAssistantMessage(result.messages)
1202        const assistantText = assistantMsg
1203          ? getAssistantMessageText(assistantMsg)
1204          : null
1205        // Guard isApiErrorMessage: query() catches API errors (including
1206        // APIUserAbortError on ESC) and yields them as synthetic assistant
1207        // messages. Without this check, an aborted compact "succeeds" with
1208        // "Request was aborted." as the summary — the text doesn't start with
1209        // "API Error" so the caller's startsWithApiErrorPrefix guard misses it.
1210        if (assistantMsg && assistantText && !assistantMsg.isApiErrorMessage) {
1211          // Skip success logging for PTL error text — it's returned so the
1212          // caller's retry loop catches it, but it's not a successful summary.
1213          if (!assistantText.startsWith(PROMPT_TOO_LONG_ERROR_MESSAGE)) {
1214            logEvent('tengu_compact_cache_sharing_success', {
1215              preCompactTokenCount,
1216              outputTokens: result.totalUsage.output_tokens,
1217              cacheReadInputTokens: result.totalUsage.cache_read_input_tokens,
1218              cacheCreationInputTokens:
1219                result.totalUsage.cache_creation_input_tokens,
1220              cacheHitRate:
1221                result.totalUsage.cache_read_input_tokens > 0
1222                  ? result.totalUsage.cache_read_input_tokens /
1223                    (result.totalUsage.cache_read_input_tokens +
1224                      result.totalUsage.cache_creation_input_tokens +
1225                      result.totalUsage.input_tokens)
1226                  : 0,
1227            })
1228          }
1229          return assistantMsg
1230        }
1231        logForDebugging(
1232          `Compact cache sharing: no text in response, falling back. Response: ${jsonStringify(assistantMsg)}`,
1233          { level: 'warn' },
1234        )
1235        logEvent('tengu_compact_cache_sharing_fallback', {
1236          reason:
1237            'no_text_response' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
1238          preCompactTokenCount,
1239        })
1240      } catch (error) {
1241        logError(error)
1242        logEvent('tengu_compact_cache_sharing_fallback', {
1243          reason:
1244            'error' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
1245          preCompactTokenCount,
1246        })
1247      }
1248    }
1249
1250    // Regular streaming path (fallback when cache sharing fails or is disabled)
1251    const retryEnabled = getFeatureValue_CACHED_MAY_BE_STALE(
1252      'tengu_compact_streaming_retry',
1253      false,
1254    )
1255    const maxAttempts = retryEnabled ? MAX_COMPACT_STREAMING_RETRIES : 1
1256
1257    for (let attempt = 1; attempt <= maxAttempts; attempt++) {
1258      // Reset state for retry
1259      let hasStartedStreaming = false
1260      let response: AssistantMessage | undefined
1261      context.setResponseLength?.(() => 0)
1262
1263      // Check if tool search is enabled using the main loop's tools list.
1264      // context.options.tools includes MCP tools merged via useMergedTools.
1265      const useToolSearch = await isToolSearchEnabled(
1266        context.options.mainLoopModel,
1267        context.options.tools,
1268        async () => appState.toolPermissionContext,
1269        context.options.agentDefinitions.activeAgents,
1270        'compact',
1271      )
1272
1273      // When tool search is enabled, include ToolSearchTool and MCP tools. They get
1274      // defer_loading: true and don't count against context - the API filters them out
1275      // of system_prompt_tools before token counting (see api/token_count_api/counting.py:188
1276      // and api/public_api/messages/handler.py:324).
1277      // Filter MCP tools from context.options.tools (not appState.mcp.tools) so we
1278      // get the permission-filtered set from useMergedTools — same source used for
1279      // isToolSearchEnabled above and normalizeMessagesForAPI below.
1280      // Deduplicate by name to avoid API errors when MCP tools share names with built-in tools.
1281      const tools: Tool[] = useToolSearch
1282        ? uniqBy(
1283            [
1284              FileReadTool,
1285              ToolSearchTool,
1286              ...context.options.tools.filter(t => t.isMcp),
1287            ],
1288            'name',
1289          )
1290        : [FileReadTool]
1291
1292      const streamingGen = queryModelWithStreaming({
1293        messages: normalizeMessagesForAPI(
1294          stripImagesFromMessages(
1295            stripReinjectedAttachments([
1296              ...getMessagesAfterCompactBoundary(messages),
1297              summaryRequest,
1298            ]),
1299          ),
1300          context.options.tools,
1301        ),
1302        systemPrompt: asSystemPrompt([
1303          'You are a helpful AI assistant tasked with summarizing conversations.',
1304        ]),
1305        thinkingConfig: { type: 'disabled' as const },
1306        tools,
1307        signal: context.abortController.signal,
1308        options: {
1309          async getToolPermissionContext() {
1310            const appState = context.getAppState()
1311            return appState.toolPermissionContext
1312          },
1313          model: context.options.mainLoopModel,
1314          toolChoice: undefined,
1315          isNonInteractiveSession: context.options.isNonInteractiveSession,
1316          hasAppendSystemPrompt: !!context.options.appendSystemPrompt,
1317          maxOutputTokensOverride: Math.min(
1318            COMPACT_MAX_OUTPUT_TOKENS,
1319            getMaxOutputTokensForModel(context.options.mainLoopModel),
1320          ),
1321          querySource: 'compact',
1322          agents: context.options.agentDefinitions.activeAgents,
1323          mcpTools: [],
1324          effortValue: appState.effortValue,
1325        },
1326      })
1327      const streamIter = streamingGen[Symbol.asyncIterator]()
1328      let next = await streamIter.next()
1329
1330      while (!next.done) {
1331        const event = next.value
1332
1333        if (
1334          !hasStartedStreaming &&
1335          event.type === 'stream_event' &&
1336          event.event.type === 'content_block_start' &&
1337          event.event.content_block.type === 'text'
1338        ) {
1339          hasStartedStreaming = true
1340          context.setStreamMode?.('responding')
1341        }
1342
1343        if (
1344          event.type === 'stream_event' &&
1345          event.event.type === 'content_block_delta' &&
1346          event.event.delta.type === 'text_delta'
1347        ) {
1348          const charactersStreamed = event.event.delta.text.length
1349          context.setResponseLength?.(length => length + charactersStreamed)
1350        }
1351
1352        if (event.type === 'assistant') {
1353          response = event
1354        }
1355
1356        next = await streamIter.next()
1357      }
1358
1359      if (response) {
1360        return response
1361      }
1362
1363      if (attempt < maxAttempts) {
1364        logEvent('tengu_compact_streaming_retry', {
1365          attempt,
1366          preCompactTokenCount,
1367          hasStartedStreaming,
1368        })
1369        await sleep(getRetryDelay(attempt), context.abortController.signal, {
1370          abortError: () => new APIUserAbortError(),
1371        })
1372        continue
1373      }
1374
1375      logForDebugging(
1376        `Compact streaming failed after ${attempt} attempts. hasStartedStreaming=${hasStartedStreaming}`,
1377        { level: 'error' },
1378      )
1379      logEvent('tengu_compact_failed', {
1380        reason:
1381          'no_streaming_response' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
1382        preCompactTokenCount,
1383        hasStartedStreaming,
1384        retryEnabled,
1385        attempts: attempt,
1386        promptCacheSharingEnabled,
1387      })
1388      throw new Error(ERROR_MESSAGE_INCOMPLETE_RESPONSE)
1389    }
1390
1391    // This should never be reached due to the throw above, but TypeScript needs it
1392    throw new Error(ERROR_MESSAGE_INCOMPLETE_RESPONSE)
1393  } finally {
1394    clearInterval(activityInterval)
1395  }
1396}
1397
1398/**
1399 * Creates attachment messages for recently accessed files to restore them after compaction.
1400 * This prevents the model from having to re-read files that were recently accessed.
1401 * Re-reads files using FileReadTool to get fresh content with proper validation.
1402 * Files are selected based on recency, but constrained by both file count and token budget limits.
1403 *
1404 * Files already present as Read tool results in preservedMessages are skipped —
1405 * re-injecting identical content the model can already see in the preserved tail
1406 * is pure waste (up to 25K tok/compact). Mirrors the diff-against-preserved
1407 * pattern that getDeferredToolsDeltaAttachment uses at the same call sites.
1408 *
1409 * @param readFileState The current file state tracking recently read files
1410 * @param toolUseContext The tool use context for calling FileReadTool
1411 * @param maxFiles Maximum number of files to restore (default: 5)
1412 * @param preservedMessages Messages kept post-compact; Read results here are skipped
1413 * @returns Array of attachment messages for the most recently accessed files that fit within token budget
1414 */
1415export async function createPostCompactFileAttachments(
1416  readFileState: Record<string, { content: string; timestamp: number }>,
1417  toolUseContext: ToolUseContext,
1418  maxFiles: number,
1419  preservedMessages: Message[] = [],
1420): Promise<AttachmentMessage[]> {
1421  const preservedReadPaths = collectReadToolFilePaths(preservedMessages)
1422  const recentFiles = Object.entries(readFileState)
1423    .map(([filename, state]) => ({ filename, ...state }))
1424    .filter(
1425      file =>
1426        !shouldExcludeFromPostCompactRestore(
1427          file.filename,
1428          toolUseContext.agentId,
1429        ) && !preservedReadPaths.has(expandPath(file.filename)),
1430    )
1431    .sort((a, b) => b.timestamp - a.timestamp)
1432    .slice(0, maxFiles)
1433
1434  const results = await Promise.all(
1435    recentFiles.map(async file => {
1436      const attachment = await generateFileAttachment(
1437        file.filename,
1438        {
1439          ...toolUseContext,
1440          fileReadingLimits: {
1441            maxTokens: POST_COMPACT_MAX_TOKENS_PER_FILE,
1442          },
1443        },
1444        'tengu_post_compact_file_restore_success',
1445        'tengu_post_compact_file_restore_error',
1446        'compact',
1447      )
1448      return attachment ? createAttachmentMessage(attachment) : null
1449    }),
1450  )
1451
1452  let usedTokens = 0
1453  return results.filter((result): result is AttachmentMessage => {
1454    if (result === null) {
1455      return false
1456    }
1457    const attachmentTokens = roughTokenCountEstimation(jsonStringify(result))
1458    if (usedTokens + attachmentTokens <= POST_COMPACT_TOKEN_BUDGET) {
1459      usedTokens += attachmentTokens
1460      return true
1461    }
1462    return false
1463  })
1464}
1465
1466/**
1467 * Creates a plan file attachment if a plan file exists for the current session.
1468 * This ensures the plan is preserved after compaction.
1469 */
1470export function createPlanAttachmentIfNeeded(
1471  agentId?: AgentId,
1472): AttachmentMessage | null {
1473  const planContent = getPlan(agentId)
1474
1475  if (!planContent) {
1476    return null
1477  }
1478
1479  const planFilePath = getPlanFilePath(agentId)
1480
1481  return createAttachmentMessage({
1482    type: 'plan_file_reference',
1483    planFilePath,
1484    planContent,
1485  })
1486}
1487
1488/**
1489 * Creates an attachment for invoked skills to preserve their content across compaction.
1490 * Only includes skills scoped to the given agent (or main session when agentId is null/undefined).
1491 * This ensures skill guidelines remain available after the conversation is summarized
1492 * without leaking skills from other agent contexts.
1493 */
1494export function createSkillAttachmentIfNeeded(
1495  agentId?: string,
1496): AttachmentMessage | null {
1497  const invokedSkills = getInvokedSkillsForAgent(agentId)
1498
1499  if (invokedSkills.size === 0) {
1500    return null
1501  }
1502
1503  // Sorted most-recent-first so budget pressure drops the least-relevant skills.
1504  // Per-skill truncation keeps the head of each file (where setup/usage
1505  // instructions typically live) rather than dropping whole skills.
1506  let usedTokens = 0
1507  const skills = Array.from(invokedSkills.values())
1508    .sort((a, b) => b.invokedAt - a.invokedAt)
1509    .map(skill => ({
1510      name: skill.skillName,
1511      path: skill.skillPath,
1512      content: truncateToTokens(
1513        skill.content,
1514        POST_COMPACT_MAX_TOKENS_PER_SKILL,
1515      ),
1516    }))
1517    .filter(skill => {
1518      const tokens = roughTokenCountEstimation(skill.content)
1519      if (usedTokens + tokens > POST_COMPACT_SKILLS_TOKEN_BUDGET) {
1520        return false
1521      }
1522      usedTokens += tokens
1523      return true
1524    })
1525
1526  if (skills.length === 0) {
1527    return null
1528  }
1529
1530  return createAttachmentMessage({
1531    type: 'invoked_skills',
1532    skills,
1533  })
1534}
1535
1536/**
1537 * Creates a plan_mode attachment if the user is currently in plan mode.
1538 * This ensures the model continues to operate in plan mode after compaction
1539 * (otherwise it would lose the plan mode instructions since those are
1540 * normally only injected on tool-use turns via getAttachmentMessages).
1541 */
1542export async function createPlanModeAttachmentIfNeeded(
1543  context: ToolUseContext,
1544): Promise<AttachmentMessage | null> {
1545  const appState = context.getAppState()
1546  if (appState.toolPermissionContext.mode !== 'plan') {
1547    return null
1548  }
1549
1550  const planFilePath = getPlanFilePath(context.agentId)
1551  const planExists = getPlan(context.agentId) !== null
1552
1553  return createAttachmentMessage({
1554    type: 'plan_mode',
1555    reminderType: 'full',
1556    isSubAgent: !!context.agentId,
1557    planFilePath,
1558    planExists,
1559  })
1560}
1561
1562/**
1563 * Creates attachments for async agents so the model knows about them after
1564 * compaction. Covers both agents still running in the background (so the model
1565 * doesn't spawn a duplicate) and agents that have finished but whose results
1566 * haven't been retrieved yet.
1567 */
1568export async function createAsyncAgentAttachmentsIfNeeded(
1569  context: ToolUseContext,
1570): Promise<AttachmentMessage[]> {
1571  const appState = context.getAppState()
1572  const asyncAgents = Object.values(appState.tasks).filter(
1573    (task): task is LocalAgentTaskState => task.type === 'local_agent',
1574  )
1575
1576  return asyncAgents.flatMap(agent => {
1577    if (
1578      agent.retrieved ||
1579      agent.status === 'pending' ||
1580      agent.agentId === context.agentId
1581    ) {
1582      return []
1583    }
1584    return [
1585      createAttachmentMessage({
1586        type: 'task_status',
1587        taskId: agent.agentId,
1588        taskType: 'local_agent',
1589        description: agent.description,
1590        status: agent.status,
1591        deltaSummary:
1592          agent.status === 'running'
1593            ? (agent.progress?.summary ?? null)
1594            : (agent.error ?? null),
1595        outputFilePath: getTaskOutputPath(agent.agentId),
1596      }),
1597    ]
1598  })
1599}
1600
1601/**
1602 * Scan messages for Read tool_use blocks and collect their file_path inputs
1603 * (normalized via expandPath). Used to dedup post-compact file restoration
1604 * against what's already visible in the preserved tail.
1605 *
1606 * Skips Reads whose tool_result is a dedup stub — the stub points at an
1607 * earlier full Read that may have been compacted away, so we want
1608 * createPostCompactFileAttachments to re-inject the real content.
1609 */
1610function collectReadToolFilePaths(messages: Message[]): Set<string> {
1611  const stubIds = new Set<string>()
1612  for (const message of messages) {
1613    if (message.type !== 'user' || !Array.isArray(message.message.content)) {
1614      continue
1615    }
1616    for (const block of message.message.content) {
1617      if (
1618        block.type === 'tool_result' &&
1619        typeof block.content === 'string' &&
1620        block.content.startsWith(FILE_UNCHANGED_STUB)
1621      ) {
1622        stubIds.add(block.tool_use_id)
1623      }
1624    }
1625  }
1626
1627  const paths = new Set<string>()
1628  for (const message of messages) {
1629    if (
1630      message.type !== 'assistant' ||
1631      !Array.isArray(message.message.content)
1632    ) {
1633      continue
1634    }
1635    for (const block of message.message.content) {
1636      if (
1637        block.type !== 'tool_use' ||
1638        block.name !== FILE_READ_TOOL_NAME ||
1639        stubIds.has(block.id)
1640      ) {
1641        continue
1642      }
1643      const input = block.input
1644      if (
1645        input &&
1646        typeof input === 'object' &&
1647        'file_path' in input &&
1648        typeof input.file_path === 'string'
1649      ) {
1650        paths.add(expandPath(input.file_path))
1651      }
1652    }
1653  }
1654  return paths
1655}
1656
1657const SKILL_TRUNCATION_MARKER =
1658  '\n\n[... skill content truncated for compaction; use Read on the skill path if you need the full text]'
1659
1660/**
1661 * Truncate content to roughly maxTokens, keeping the head. roughTokenCountEstimation
1662 * uses ~4 chars/token (its default bytesPerToken), so char budget = maxTokens * 4
1663 * minus the marker so the result stays within budget. Marker tells the model it
1664 * can Read the full file if needed.
1665 */
1666function truncateToTokens(content: string, maxTokens: number): string {
1667  if (roughTokenCountEstimation(content) <= maxTokens) {
1668    return content
1669  }
1670  const charBudget = maxTokens * 4 - SKILL_TRUNCATION_MARKER.length
1671  return content.slice(0, charBudget) + SKILL_TRUNCATION_MARKER
1672}
1673
1674function shouldExcludeFromPostCompactRestore(
1675  filename: string,
1676  agentId?: AgentId,
1677): boolean {
1678  const normalizedFilename = expandPath(filename)
1679  // Exclude plan files
1680  try {
1681    const planFilePath = expandPath(getPlanFilePath(agentId))
1682    if (normalizedFilename === planFilePath) {
1683      return true
1684    }
1685  } catch {
1686    // If we can't get plan file path, continue with other checks
1687  }
1688
1689  // Exclude all types of claude.md files
1690  // TODO: Refactor to use isMemoryFilePath() from claudemd.ts for consistency
1691  // and to also match child directory memory files (.claude/rules/*.md, etc.)
1692  try {
1693    const normalizedMemoryPaths = new Set(
1694      MEMORY_TYPE_VALUES.map(type => expandPath(getMemoryPath(type))),
1695    )
1696
1697    if (normalizedMemoryPaths.has(normalizedFilename)) {
1698      return true
1699    }
1700  } catch {
1701    // If we can't get memory paths, continue
1702  }
1703
1704  return false
1705}