utils/sessionStorage.ts at main · oppi.li/claude-code

oppi.li / claude-code
fork atom
source dump of claude code
fork atom
claude-code / utils / sessionStorage.ts
at main 5105 lines 181 kB view raw
wrap content
oppi.li dump from zip 4d ago
63aada3f
   1import { feature } from 'bun:bundle'
   2import type { UUID } from 'crypto'
   3import type { Dirent } from 'fs'
   4// Sync fs primitives for readFileTailSync — separate from fs/promises
   5// imports above. Named (not wildcard) per CLAUDE.md style; no collisions
   6// with the async-suffixed names.
   7import { closeSync, fstatSync, openSync, readSync } from 'fs'
   8import {
   9  appendFile as fsAppendFile,
  10  open as fsOpen,
  11  mkdir,
  12  readdir,
  13  readFile,
  14  stat,
  15  unlink,
  16  writeFile,
  17} from 'fs/promises'
  18import memoize from 'lodash-es/memoize.js'
  19import { basename, dirname, join } from 'path'
  20import {
  21  type AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
  22  logEvent,
  23} from 'src/services/analytics/index.js'
  24import {
  25  getOriginalCwd,
  26  getPlanSlugCache,
  27  getPromptId,
  28  getSessionId,
  29  getSessionProjectDir,
  30  isSessionPersistenceDisabled,
  31  switchSession,
  32} from '../bootstrap/state.js'
  33import { builtInCommandNames } from '../commands.js'
  34import { COMMAND_NAME_TAG, TICK_TAG } from '../constants/xml.js'
  35import { getFeatureValue_CACHED_MAY_BE_STALE } from '../services/analytics/growthbook.js'
  36import * as sessionIngress from '../services/api/sessionIngress.js'
  37import { REPL_TOOL_NAME } from '../tools/REPLTool/constants.js'
  38import {
  39  type AgentId,
  40  asAgentId,
  41  asSessionId,
  42  type SessionId,
  43} from '../types/ids.js'
  44import type { AttributionSnapshotMessage } from '../types/logs.js'
  45import {
  46  type ContentReplacementEntry,
  47  type ContextCollapseCommitEntry,
  48  type ContextCollapseSnapshotEntry,
  49  type Entry,
  50  type FileHistorySnapshotMessage,
  51  type LogOption,
  52  type PersistedWorktreeSession,
  53  type SerializedMessage,
  54  sortLogs,
  55  type TranscriptMessage,
  56} from '../types/logs.js'
  57import type {
  58  AssistantMessage,
  59  AttachmentMessage,
  60  Message,
  61  SystemCompactBoundaryMessage,
  62  SystemMessage,
  63  UserMessage,
  64} from '../types/message.js'
  65import type { QueueOperationMessage } from '../types/messageQueueTypes.js'
  66import { uniq } from './array.js'
  67import { registerCleanup } from './cleanupRegistry.js'
  68import { updateSessionName } from './concurrentSessions.js'
  69import { getCwd } from './cwd.js'
  70import { logForDebugging } from './debug.js'
  71import { logForDiagnosticsNoPII } from './diagLogs.js'
  72import { getClaudeConfigHomeDir, isEnvTruthy } from './envUtils.js'
  73import { isFsInaccessible } from './errors.js'
  74import type { FileHistorySnapshot } from './fileHistory.js'
  75import { formatFileSize } from './format.js'
  76import { getFsImplementation } from './fsOperations.js'
  77import { getWorktreePaths } from './getWorktreePaths.js'
  78import { getBranch } from './git.js'
  79import { gracefulShutdownSync, isShuttingDown } from './gracefulShutdown.js'
  80import { parseJSONL } from './json.js'
  81import { logError } from './log.js'
  82import { extractTag, isCompactBoundaryMessage } from './messages.js'
  83import { sanitizePath } from './path.js'
  84import {
  85  extractJsonStringField,
  86  extractLastJsonStringField,
  87  LITE_READ_BUF_SIZE,
  88  readHeadAndTail,
  89  readTranscriptForLoad,
  90  SKIP_PRECOMPACT_THRESHOLD,
  91} from './sessionStoragePortable.js'
  92import { getSettings_DEPRECATED } from './settings/settings.js'
  93import { jsonParse, jsonStringify } from './slowOperations.js'
  94import type { ContentReplacementRecord } from './toolResultStorage.js'
  95import { validateUuid } from './uuid.js'
  96
  97// Cache MACRO.VERSION at module level to work around bun --define bug in async contexts
  98// See: https://github.com/oven-sh/bun/issues/26168
  99const VERSION = typeof MACRO !== 'undefined' ? MACRO.VERSION : 'unknown'
 100
 101type Transcript = (
 102  | UserMessage
 103  | AssistantMessage
 104  | AttachmentMessage
 105  | SystemMessage
 106)[]
 107
 108// Use getOriginalCwd() at each call site instead of capturing at module load
 109// time. getCwd() at import time may run before bootstrap resolves symlinks via
 110// realpathSync, causing a different sanitized project directory than what
 111// getOriginalCwd() returns after bootstrap. This split-brain made sessions
 112// saved under one path invisible when loaded via the other.
 113
 114/**
 115 * Pre-compiled regex to skip non-meaningful messages when extracting first prompt.
 116 * Matches anything starting with a lowercase XML-like tag (IDE context, hook
 117 * output, task notifications, channel messages, etc.) or a synthetic interrupt
 118 * marker. Kept in sync with sessionStoragePortable.ts — generic pattern avoids
 119 * an ever-growing allowlist that falls behind as new notification types ship.
 120 */
 121// 50MB — prevents OOM in the tombstone slow path which reads + rewrites the
 122// entire session file. Session files can grow to multiple GB (inc-3930).
 123const MAX_TOMBSTONE_REWRITE_BYTES = 50 * 1024 * 1024
 124
 125const SKIP_FIRST_PROMPT_PATTERN =
 126  /^(?:\s*<[a-z][\w-]*[\s>]|\[Request interrupted by user[^\]]*\])/
 127
 128/**
 129 * Type guard to check if an entry is a transcript message.
 130 * Transcript messages include user, assistant, attachment, and system messages.
 131 * IMPORTANT: This is the single source of truth for what constitutes a transcript message.
 132 * loadTranscriptFile() uses this to determine which messages to load into the chain.
 133 *
 134 * Progress messages are NOT transcript messages. They are ephemeral UI state
 135 * and must not be persisted to the JSONL or participate in the parentUuid
 136 * chain. Including them caused chain forks that orphaned real conversation
 137 * messages on resume (see #14373, #23537).
 138 */
 139export function isTranscriptMessage(entry: Entry): entry is TranscriptMessage {
 140  return (
 141    entry.type === 'user' ||
 142    entry.type === 'assistant' ||
 143    entry.type === 'attachment' ||
 144    entry.type === 'system'
 145  )
 146}
 147
 148/**
 149 * Entries that participate in the parentUuid chain. Used on the write path
 150 * (insertMessageChain, useLogMessages) to skip progress when assigning
 151 * parentUuid. Old transcripts with progress already in the chain are handled
 152 * by the progressBridge rewrite in loadTranscriptFile.
 153 */
 154export function isChainParticipant(m: Pick<Message, 'type'>): boolean {
 155  return m.type !== 'progress'
 156}
 157
 158type LegacyProgressEntry = {
 159  type: 'progress'
 160  uuid: UUID
 161  parentUuid: UUID | null
 162}
 163
 164/**
 165 * Progress entries in transcripts written before PR #24099. They are not
 166 * in the Entry type union anymore but still exist on disk with uuid and
 167 * parentUuid fields. loadTranscriptFile bridges the chain across them.
 168 */
 169function isLegacyProgressEntry(entry: unknown): entry is LegacyProgressEntry {
 170  return (
 171    typeof entry === 'object' &&
 172    entry !== null &&
 173    'type' in entry &&
 174    entry.type === 'progress' &&
 175    'uuid' in entry &&
 176    typeof entry.uuid === 'string'
 177  )
 178}
 179
 180/**
 181 * High-frequency tool progress ticks (1/sec for Sleep, per-chunk for Bash).
 182 * These are UI-only: not sent to the API, not rendered after the tool
 183 * completes. Used by REPL.tsx to replace-in-place instead of appending, and
 184 * by loadTranscriptFile to skip legacy entries from old transcripts.
 185 */
 186const EPHEMERAL_PROGRESS_TYPES = new Set([
 187  'bash_progress',
 188  'powershell_progress',
 189  'mcp_progress',
 190  ...(feature('PROACTIVE') || feature('KAIROS')
 191    ? (['sleep_progress'] as const)
 192    : []),
 193])
 194export function isEphemeralToolProgress(dataType: unknown): boolean {
 195  return typeof dataType === 'string' && EPHEMERAL_PROGRESS_TYPES.has(dataType)
 196}
 197
 198export function getProjectsDir(): string {
 199  return join(getClaudeConfigHomeDir(), 'projects')
 200}
 201
 202export function getTranscriptPath(): string {
 203  const projectDir = getSessionProjectDir() ?? getProjectDir(getOriginalCwd())
 204  return join(projectDir, `${getSessionId()}.jsonl`)
 205}
 206
 207export function getTranscriptPathForSession(sessionId: string): string {
 208  // When asking for the CURRENT session's transcript, honor sessionProjectDir
 209  // the same way getTranscriptPath() does. Without this, hooks get a
 210  // transcript_path computed from originalCwd while the actual file was
 211  // written to sessionProjectDir (set by switchActiveSession on resume/branch)
 212  // — different directories, so the hook sees MISSING (gh-30217). CC-34
 213  // made sessionId + sessionProjectDir atomic precisely to prevent this
 214  // kind of drift; this function just wasn't updated to read both.
 215  //
 216  // For OTHER session IDs we can only guess via originalCwd — we don't
 217  // track a sessionId→projectDir map. Callers wanting a specific other
 218  // session's path should pass fullPath explicitly (most save* functions
 219  // already accept this).
 220  if (sessionId === getSessionId()) {
 221    return getTranscriptPath()
 222  }
 223  const projectDir = getProjectDir(getOriginalCwd())
 224  return join(projectDir, `${sessionId}.jsonl`)
 225}
 226
 227// 50 MB — session JSONL can grow to multiple GB (inc-3930). Callers that
 228// read the raw transcript must bail out above this threshold to avoid OOM.
 229export const MAX_TRANSCRIPT_READ_BYTES = 50 * 1024 * 1024
 230
 231// In-memory map of agentId → subdirectory for grouping related subagent
 232// transcripts (e.g. workflow runs write to subagents/workflows/<runId>/).
 233// Populated before the agent runs; consulted by getAgentTranscriptPath.
 234const agentTranscriptSubdirs = new Map<string, string>()
 235
 236export function setAgentTranscriptSubdir(
 237  agentId: string,
 238  subdir: string,
 239): void {
 240  agentTranscriptSubdirs.set(agentId, subdir)
 241}
 242
 243export function clearAgentTranscriptSubdir(agentId: string): void {
 244  agentTranscriptSubdirs.delete(agentId)
 245}
 246
 247export function getAgentTranscriptPath(agentId: AgentId): string {
 248  // Same sessionProjectDir consistency as getTranscriptPathForSession —
 249  // subagent transcripts live under the session dir, so if the session
 250  // transcript is at sessionProjectDir, subagent transcripts are too.
 251  const projectDir = getSessionProjectDir() ?? getProjectDir(getOriginalCwd())
 252  const sessionId = getSessionId()
 253  const subdir = agentTranscriptSubdirs.get(agentId)
 254  const base = subdir
 255    ? join(projectDir, sessionId, 'subagents', subdir)
 256    : join(projectDir, sessionId, 'subagents')
 257  return join(base, `agent-${agentId}.jsonl`)
 258}
 259
 260function getAgentMetadataPath(agentId: AgentId): string {
 261  return getAgentTranscriptPath(agentId).replace(/\.jsonl$/, '.meta.json')
 262}
 263
 264export type AgentMetadata = {
 265  agentType: string
 266  /** Worktree path if the agent was spawned with isolation: "worktree" */
 267  worktreePath?: string
 268  /** Original task description from the AgentTool input. Persisted so a
 269   * resumed agent's notification can show the original description instead
 270   * of a placeholder. Optional — older metadata files lack this field. */
 271  description?: string
 272}
 273
 274/**
 275 * Persist the agentType used to launch a subagent. Read by resume to
 276 * route correctly when subagent_type is omitted — without this, resuming
 277 * a fork silently degrades to general-purpose (4KB system prompt, no
 278 * inherited history). Sidecar file avoids JSONL schema changes.
 279 *
 280 * Also stores the worktreePath when the agent was spawned with worktree
 281 * isolation, enabling resume to restore the correct cwd.
 282 */
 283export async function writeAgentMetadata(
 284  agentId: AgentId,
 285  metadata: AgentMetadata,
 286): Promise<void> {
 287  const path = getAgentMetadataPath(agentId)
 288  await mkdir(dirname(path), { recursive: true })
 289  await writeFile(path, JSON.stringify(metadata))
 290}
 291
 292export async function readAgentMetadata(
 293  agentId: AgentId,
 294): Promise<AgentMetadata | null> {
 295  const path = getAgentMetadataPath(agentId)
 296  try {
 297    const raw = await readFile(path, 'utf-8')
 298    return JSON.parse(raw) as AgentMetadata
 299  } catch (e) {
 300    if (isFsInaccessible(e)) return null
 301    throw e
 302  }
 303}
 304
 305export type RemoteAgentMetadata = {
 306  taskId: string
 307  remoteTaskType: string
 308  /** CCR session ID — used to fetch live status from the Sessions API on resume. */
 309  sessionId: string
 310  title: string
 311  command: string
 312  spawnedAt: number
 313  toolUseId?: string
 314  isLongRunning?: boolean
 315  isUltraplan?: boolean
 316  isRemoteReview?: boolean
 317  remoteTaskMetadata?: Record<string, unknown>
 318}
 319
 320function getRemoteAgentsDir(): string {
 321  // Same sessionProjectDir fallback as getAgentTranscriptPath — the project
 322  // dir (containing the .jsonl), not the session dir, so sessionId is joined.
 323  const projectDir = getSessionProjectDir() ?? getProjectDir(getOriginalCwd())
 324  return join(projectDir, getSessionId(), 'remote-agents')
 325}
 326
 327function getRemoteAgentMetadataPath(taskId: string): string {
 328  return join(getRemoteAgentsDir(), `remote-agent-${taskId}.meta.json`)
 329}
 330
 331/**
 332 * Persist metadata for a remote-agent task so it can be restored on session
 333 * resume. Per-task sidecar file (sibling dir to subagents/) survives
 334 * hydrateSessionFromRemote's .jsonl wipe; status is always fetched fresh
 335 * from CCR on restore — only identity is persisted locally.
 336 */
 337export async function writeRemoteAgentMetadata(
 338  taskId: string,
 339  metadata: RemoteAgentMetadata,
 340): Promise<void> {
 341  const path = getRemoteAgentMetadataPath(taskId)
 342  await mkdir(dirname(path), { recursive: true })
 343  await writeFile(path, JSON.stringify(metadata))
 344}
 345
 346export async function readRemoteAgentMetadata(
 347  taskId: string,
 348): Promise<RemoteAgentMetadata | null> {
 349  const path = getRemoteAgentMetadataPath(taskId)
 350  try {
 351    const raw = await readFile(path, 'utf-8')
 352    return JSON.parse(raw) as RemoteAgentMetadata
 353  } catch (e) {
 354    if (isFsInaccessible(e)) return null
 355    throw e
 356  }
 357}
 358
 359export async function deleteRemoteAgentMetadata(taskId: string): Promise<void> {
 360  const path = getRemoteAgentMetadataPath(taskId)
 361  try {
 362    await unlink(path)
 363  } catch (e) {
 364    if (isFsInaccessible(e)) return
 365    throw e
 366  }
 367}
 368
 369/**
 370 * Scan the remote-agents/ directory for all persisted metadata files.
 371 * Used by restoreRemoteAgentTasks to reconnect to still-running CCR sessions.
 372 */
 373export async function listRemoteAgentMetadata(): Promise<
 374  RemoteAgentMetadata[]
 375> {
 376  const dir = getRemoteAgentsDir()
 377  let entries: Dirent[]
 378  try {
 379    entries = await readdir(dir, { withFileTypes: true })
 380  } catch (e) {
 381    if (isFsInaccessible(e)) return []
 382    throw e
 383  }
 384  const results: RemoteAgentMetadata[] = []
 385  for (const entry of entries) {
 386    if (!entry.isFile() || !entry.name.endsWith('.meta.json')) continue
 387    try {
 388      const raw = await readFile(join(dir, entry.name), 'utf-8')
 389      results.push(JSON.parse(raw) as RemoteAgentMetadata)
 390    } catch (e) {
 391      // Skip unreadable or corrupt files — a partial write from a crashed
 392      // fire-and-forget persist shouldn't take down the whole restore.
 393      logForDebugging(
 394        `listRemoteAgentMetadata: skipping ${entry.name}: ${String(e)}`,
 395      )
 396    }
 397  }
 398  return results
 399}
 400
 401export function sessionIdExists(sessionId: string): boolean {
 402  const projectDir = getProjectDir(getOriginalCwd())
 403  const sessionFile = join(projectDir, `${sessionId}.jsonl`)
 404  const fs = getFsImplementation()
 405  try {
 406    fs.statSync(sessionFile)
 407    return true
 408  } catch {
 409    return false
 410  }
 411}
 412
 413// exported for testing
 414export function getNodeEnv(): string {
 415  return process.env.NODE_ENV || 'development'
 416}
 417
 418// exported for testing
 419export function getUserType(): string {
 420  return process.env.USER_TYPE || 'external'
 421}
 422
 423function getEntrypoint(): string | undefined {
 424  return process.env.CLAUDE_CODE_ENTRYPOINT
 425}
 426
 427export function isCustomTitleEnabled(): boolean {
 428  return true
 429}
 430
 431// Memoized: called 12+ times per turn via hooks.ts createBaseHookInput
 432// (PostToolUse path, 5×/turn) + various save* functions. Input is a cwd
 433// string; homedir/env/regex are all session-invariant so the result is
 434// stable for a given input. Worktree switches just change the key — no
 435// cache clear needed.
 436export const getProjectDir = memoize((projectDir: string): string => {
 437  return join(getProjectsDir(), sanitizePath(projectDir))
 438})
 439
 440let project: Project | null = null
 441let cleanupRegistered = false
 442
 443function getProject(): Project {
 444  if (!project) {
 445    project = new Project()
 446
 447    // Register flush as a cleanup handler (only once)
 448    if (!cleanupRegistered) {
 449      registerCleanup(async () => {
 450        // Flush queued writes first, then re-append session metadata
 451        // (customTitle, tag) so they always appear in the last 64KB tail
 452        // window. readLiteMetadata only reads the tail to extract these
 453        // fields — if enough messages are appended after a /rename, the
 454        // custom-title entry gets pushed outside the window and --resume
 455        // shows the auto-generated firstPrompt instead.
 456        await project?.flush()
 457        try {
 458          project?.reAppendSessionMetadata()
 459        } catch {
 460          // Best-effort — don't let metadata re-append crash the cleanup
 461        }
 462      })
 463      cleanupRegistered = true
 464    }
 465  }
 466  return project
 467}
 468
 469/**
 470 * Reset the Project singleton's flush state for testing.
 471 * This ensures tests don't interfere with each other via shared counter state.
 472 */
 473export function resetProjectFlushStateForTesting(): void {
 474  project?._resetFlushState()
 475}
 476
 477/**
 478 * Reset the entire Project singleton for testing.
 479 * This ensures tests with different CLAUDE_CONFIG_DIR values
 480 * don't share stale sessionFile paths.
 481 */
 482export function resetProjectForTesting(): void {
 483  project = null
 484}
 485
 486export function setSessionFileForTesting(path: string): void {
 487  getProject().sessionFile = path
 488}
 489
 490type InternalEventWriter = (
 491  eventType: string,
 492  payload: Record<string, unknown>,
 493  options?: { isCompaction?: boolean; agentId?: string },
 494) => Promise<void>
 495
 496/**
 497 * Register a CCR v2 internal event writer for transcript persistence.
 498 * When set, transcript messages are written as internal worker events
 499 * instead of going through v1 Session Ingress.
 500 */
 501export function setInternalEventWriter(writer: InternalEventWriter): void {
 502  getProject().setInternalEventWriter(writer)
 503}
 504
 505type InternalEventReader = () => Promise<
 506  { payload: Record<string, unknown>; agent_id?: string }[] | null
 507>
 508
 509/**
 510 * Register a CCR v2 internal event reader for session resume.
 511 * When set, hydrateFromCCRv2InternalEvents() can fetch foreground and
 512 * subagent internal events to reconstruct conversation state on reconnection.
 513 */
 514export function setInternalEventReader(
 515  reader: InternalEventReader,
 516  subagentReader: InternalEventReader,
 517): void {
 518  getProject().setInternalEventReader(reader)
 519  getProject().setInternalSubagentEventReader(subagentReader)
 520}
 521
 522/**
 523 * Set the remote ingress URL on the current Project for testing.
 524 * This simulates what hydrateRemoteSession does in production.
 525 */
 526export function setRemoteIngressUrlForTesting(url: string): void {
 527  getProject().setRemoteIngressUrl(url)
 528}
 529
 530const REMOTE_FLUSH_INTERVAL_MS = 10
 531
 532class Project {
 533  // Minimal cache for current session only (not all sessions)
 534  currentSessionTag: string | undefined
 535  currentSessionTitle: string | undefined
 536  currentSessionAgentName: string | undefined
 537  currentSessionAgentColor: string | undefined
 538  currentSessionLastPrompt: string | undefined
 539  currentSessionAgentSetting: string | undefined
 540  currentSessionMode: 'coordinator' | 'normal' | undefined
 541  // Tri-state: undefined = never touched (don't write), null = exited worktree,
 542  // object = currently in worktree. reAppendSessionMetadata writes null so
 543  // --resume knows the session exited (vs. crashed while inside).
 544  currentSessionWorktree: PersistedWorktreeSession | null | undefined
 545  currentSessionPrNumber: number | undefined
 546  currentSessionPrUrl: string | undefined
 547  currentSessionPrRepository: string | undefined
 548
 549  sessionFile: string | null = null
 550  // Entries buffered while sessionFile is null. Flushed by materializeSessionFile
 551  // on the first user/assistant message — prevents metadata-only session files.
 552  private pendingEntries: Entry[] = []
 553  private remoteIngressUrl: string | null = null
 554  private internalEventWriter: InternalEventWriter | null = null
 555  private internalEventReader: InternalEventReader | null = null
 556  private internalSubagentEventReader: InternalEventReader | null = null
 557  private pendingWriteCount: number = 0
 558  private flushResolvers: Array<() => void> = []
 559  // Per-file write queues. Each entry carries a resolve callback so
 560  // callers of enqueueWrite can optionally await their specific write.
 561  private writeQueues = new Map<
 562    string,
 563    Array<{ entry: Entry; resolve: () => void }>
 564  >()
 565  private flushTimer: ReturnType<typeof setTimeout> | null = null
 566  private activeDrain: Promise<void> | null = null
 567  private FLUSH_INTERVAL_MS = 100
 568  private readonly MAX_CHUNK_BYTES = 100 * 1024 * 1024
 569
 570  constructor() {}
 571
 572  /** @internal Reset flush/queue state for testing. */
 573  _resetFlushState(): void {
 574    this.pendingWriteCount = 0
 575    this.flushResolvers = []
 576    if (this.flushTimer) clearTimeout(this.flushTimer)
 577    this.flushTimer = null
 578    this.activeDrain = null
 579    this.writeQueues = new Map()
 580  }
 581
 582  private incrementPendingWrites(): void {
 583    this.pendingWriteCount++
 584  }
 585
 586  private decrementPendingWrites(): void {
 587    this.pendingWriteCount--
 588    if (this.pendingWriteCount === 0) {
 589      // Resolve all waiting flush promises
 590      for (const resolve of this.flushResolvers) {
 591        resolve()
 592      }
 593      this.flushResolvers = []
 594    }
 595  }
 596
 597  private async trackWrite<T>(fn: () => Promise<T>): Promise<T> {
 598    this.incrementPendingWrites()
 599    try {
 600      return await fn()
 601    } finally {
 602      this.decrementPendingWrites()
 603    }
 604  }
 605
 606  private enqueueWrite(filePath: string, entry: Entry): Promise<void> {
 607    return new Promise<void>(resolve => {
 608      let queue = this.writeQueues.get(filePath)
 609      if (!queue) {
 610        queue = []
 611        this.writeQueues.set(filePath, queue)
 612      }
 613      queue.push({ entry, resolve })
 614      this.scheduleDrain()
 615    })
 616  }
 617
 618  private scheduleDrain(): void {
 619    if (this.flushTimer) {
 620      return
 621    }
 622    this.flushTimer = setTimeout(async () => {
 623      this.flushTimer = null
 624      this.activeDrain = this.drainWriteQueue()
 625      await this.activeDrain
 626      this.activeDrain = null
 627      // If more items arrived during drain, schedule again
 628      if (this.writeQueues.size > 0) {
 629        this.scheduleDrain()
 630      }
 631    }, this.FLUSH_INTERVAL_MS)
 632  }
 633
 634  private async appendToFile(filePath: string, data: string): Promise<void> {
 635    try {
 636      await fsAppendFile(filePath, data, { mode: 0o600 })
 637    } catch {
 638      // Directory may not exist — some NFS-like filesystems return
 639      // unexpected error codes, so don't discriminate on code.
 640      await mkdir(dirname(filePath), { recursive: true, mode: 0o700 })
 641      await fsAppendFile(filePath, data, { mode: 0o600 })
 642    }
 643  }
 644
 645  private async drainWriteQueue(): Promise<void> {
 646    for (const [filePath, queue] of this.writeQueues) {
 647      if (queue.length === 0) {
 648        continue
 649      }
 650      const batch = queue.splice(0)
 651
 652      let content = ''
 653      const resolvers: Array<() => void> = []
 654
 655      for (const { entry, resolve } of batch) {
 656        const line = jsonStringify(entry) + '\n'
 657
 658        if (content.length + line.length >= this.MAX_CHUNK_BYTES) {
 659          // Flush chunk and resolve its entries before starting a new one
 660          await this.appendToFile(filePath, content)
 661          for (const r of resolvers) {
 662            r()
 663          }
 664          resolvers.length = 0
 665          content = ''
 666        }
 667
 668        content += line
 669        resolvers.push(resolve)
 670      }
 671
 672      if (content.length > 0) {
 673        await this.appendToFile(filePath, content)
 674        for (const r of resolvers) {
 675          r()
 676        }
 677      }
 678    }
 679
 680    // Clean up empty queues
 681    for (const [filePath, queue] of this.writeQueues) {
 682      if (queue.length === 0) {
 683        this.writeQueues.delete(filePath)
 684      }
 685    }
 686  }
 687
 688  resetSessionFile(): void {
 689    this.sessionFile = null
 690    this.pendingEntries = []
 691  }
 692
 693  /**
 694   * Re-append cached session metadata to the end of the transcript file.
 695   * This ensures metadata stays within the tail window that readLiteMetadata
 696   * reads during progressive loading.
 697   *
 698   * Called from two contexts with different file-ordering implications:
 699   * - During compaction (compact.ts, reactiveCompact.ts): writes metadata
 700   *   just before the boundary marker is emitted - these entries end up
 701   *   before the boundary and are recovered by scanPreBoundaryMetadata.
 702   * - On session exit (cleanup handler): writes metadata at EOF after all
 703   *   boundaries - this is what enables loadTranscriptFile's pre-compact
 704   *   skip to find metadata without a forward scan.
 705   *
 706   * External-writer safety for SDK-mutable fields (custom-title, tag):
 707   * before re-appending, refresh the cache from the tail scan window. If an
 708   * external process (SDK renameSession/tagSession) wrote a fresher value,
 709   * our stale cache absorbs it and the re-append below persists it — not
 710   * the stale CLI value. If no entry is in the tail (evicted, or never
 711   * written by the SDK), the cache is the only source of truth and is
 712   * re-appended as-is.
 713   *
 714   * Re-append is unconditional (even when the value is already in the
 715   * tail): during compaction, a title 40KB from EOF is inside the current
 716   * tail window but will fall out once the post-compaction session grows.
 717   * Skipping the re-append would defeat the purpose of this call. Fields
 718   * the SDK cannot touch (last-prompt, agent-*, mode, pr-link) have no
 719   * external-writer concern — their caches are authoritative.
 720   */
 721  reAppendSessionMetadata(skipTitleRefresh = false): void {
 722    if (!this.sessionFile) return
 723    const sessionId = getSessionId() as UUID
 724    if (!sessionId) return
 725
 726    // One sync tail read to refresh SDK-mutable fields. Same
 727    // LITE_READ_BUF_SIZE window readLiteMetadata uses. Empty string on
 728    // failure → extract returns null → cache is the only source of truth.
 729    const tail = readFileTailSync(this.sessionFile)
 730
 731    // Absorb any fresher SDK-written title/tag into our cache. If the SDK
 732    // wrote while we had the session open, our cache is stale — the tail
 733    // value is authoritative. If the tail has nothing (evicted or never
 734    // written externally), the cache stands.
 735    //
 736    // Filter with startsWith to match only top-level JSONL entries (col 0)
 737    // and not "type":"tag" appearing inside a nested tool_use input that
 738    // happens to be JSON-serialized into a message.
 739    const tailLines = tail.split('\n')
 740    if (!skipTitleRefresh) {
 741      const titleLine = tailLines.findLast(l =>
 742        l.startsWith('{"type":"custom-title"'),
 743      )
 744      if (titleLine) {
 745        const tailTitle = extractLastJsonStringField(titleLine, 'customTitle')
 746        // `!== undefined` distinguishes no-match from empty-string match.
 747        // renameSession rejects empty titles, but the CLI is defensive: an
 748        // external writer with customTitle:"" should clear the cache so the
 749        // re-append below skips it (instead of resurrecting a stale title).
 750        if (tailTitle !== undefined) {
 751          this.currentSessionTitle = tailTitle || undefined
 752        }
 753      }
 754    }
 755    const tagLine = tailLines.findLast(l => l.startsWith('{"type":"tag"'))
 756    if (tagLine) {
 757      const tailTag = extractLastJsonStringField(tagLine, 'tag')
 758      // Same: tagSession(id, null) writes `tag:""` to clear.
 759      if (tailTag !== undefined) {
 760        this.currentSessionTag = tailTag || undefined
 761      }
 762    }
 763
 764    // lastPrompt is re-appended so readLiteMetadata can show what the
 765    // user was most recently doing. Written first so customTitle/tag/etc
 766    // land closer to EOF (they're the more critical fields for tail reads).
 767    if (this.currentSessionLastPrompt) {
 768      appendEntryToFile(this.sessionFile, {
 769        type: 'last-prompt',
 770        lastPrompt: this.currentSessionLastPrompt,
 771        sessionId,
 772      })
 773    }
 774    // Unconditional: cache was refreshed from tail above; re-append keeps
 775    // the entry at EOF so compaction-pushed content doesn't evict it.
 776    if (this.currentSessionTitle) {
 777      appendEntryToFile(this.sessionFile, {
 778        type: 'custom-title',
 779        customTitle: this.currentSessionTitle,
 780        sessionId,
 781      })
 782    }
 783    if (this.currentSessionTag) {
 784      appendEntryToFile(this.sessionFile, {
 785        type: 'tag',
 786        tag: this.currentSessionTag,
 787        sessionId,
 788      })
 789    }
 790    if (this.currentSessionAgentName) {
 791      appendEntryToFile(this.sessionFile, {
 792        type: 'agent-name',
 793        agentName: this.currentSessionAgentName,
 794        sessionId,
 795      })
 796    }
 797    if (this.currentSessionAgentColor) {
 798      appendEntryToFile(this.sessionFile, {
 799        type: 'agent-color',
 800        agentColor: this.currentSessionAgentColor,
 801        sessionId,
 802      })
 803    }
 804    if (this.currentSessionAgentSetting) {
 805      appendEntryToFile(this.sessionFile, {
 806        type: 'agent-setting',
 807        agentSetting: this.currentSessionAgentSetting,
 808        sessionId,
 809      })
 810    }
 811    if (this.currentSessionMode) {
 812      appendEntryToFile(this.sessionFile, {
 813        type: 'mode',
 814        mode: this.currentSessionMode,
 815        sessionId,
 816      })
 817    }
 818    if (this.currentSessionWorktree !== undefined) {
 819      appendEntryToFile(this.sessionFile, {
 820        type: 'worktree-state',
 821        worktreeSession: this.currentSessionWorktree,
 822        sessionId,
 823      })
 824    }
 825    if (
 826      this.currentSessionPrNumber !== undefined &&
 827      this.currentSessionPrUrl &&
 828      this.currentSessionPrRepository
 829    ) {
 830      appendEntryToFile(this.sessionFile, {
 831        type: 'pr-link',
 832        sessionId,
 833        prNumber: this.currentSessionPrNumber,
 834        prUrl: this.currentSessionPrUrl,
 835        prRepository: this.currentSessionPrRepository,
 836        timestamp: new Date().toISOString(),
 837      })
 838    }
 839  }
 840
 841  async flush(): Promise<void> {
 842    // Cancel pending timer
 843    if (this.flushTimer) {
 844      clearTimeout(this.flushTimer)
 845      this.flushTimer = null
 846    }
 847    // Wait for any in-flight drain to finish
 848    if (this.activeDrain) {
 849      await this.activeDrain
 850    }
 851    // Drain anything remaining in the queues
 852    await this.drainWriteQueue()
 853
 854    // Wait for non-queue tracked operations (e.g. removeMessageByUuid)
 855    if (this.pendingWriteCount === 0) {
 856      return
 857    }
 858    return new Promise<void>(resolve => {
 859      this.flushResolvers.push(resolve)
 860    })
 861  }
 862
 863  /**
 864   * Remove a message from the transcript by UUID.
 865   * Used for tombstoning orphaned messages from failed streaming attempts.
 866   *
 867   * The target is almost always the most recently appended entry, so we
 868   * read only the tail, locate the line, and splice it out with a
 869   * positional write + truncate instead of rewriting the whole file.
 870   */
 871  async removeMessageByUuid(targetUuid: UUID): Promise<void> {
 872    return this.trackWrite(async () => {
 873      if (this.sessionFile === null) return
 874      try {
 875        let fileSize = 0
 876        const fh = await fsOpen(this.sessionFile, 'r+')
 877        try {
 878          const { size } = await fh.stat()
 879          fileSize = size
 880          if (size === 0) return
 881
 882          const chunkLen = Math.min(size, LITE_READ_BUF_SIZE)
 883          const tailStart = size - chunkLen
 884          const buf = Buffer.allocUnsafe(chunkLen)
 885          const { bytesRead } = await fh.read(buf, 0, chunkLen, tailStart)
 886          const tail = buf.subarray(0, bytesRead)
 887
 888          // Entries are serialized via JSON.stringify (no key-value
 889          // whitespace). Search for the full `"uuid":"..."` pattern, not
 890          // just the bare UUID, so we do not match the same value sitting
 891          // in `parentUuid` of a child entry. UUIDs are pure ASCII so a
 892          // byte-level search is correct.
 893          const needle = `"uuid":"${targetUuid}"`
 894          const matchIdx = tail.lastIndexOf(needle)
 895
 896          if (matchIdx >= 0) {
 897            // 0x0a never appears inside a UTF-8 multi-byte sequence, so
 898            // byte-scanning for line boundaries is safe even if the chunk
 899            // starts mid-character.
 900            const prevNl = tail.lastIndexOf(0x0a, matchIdx)
 901            // If the preceding newline is outside our chunk and we did not
 902            // read from the start of the file, the line is longer than the
 903            // window - fall through to the slow path.
 904            if (prevNl >= 0 || tailStart === 0) {
 905              const lineStart = prevNl + 1 // 0 when prevNl === -1
 906              const nextNl = tail.indexOf(0x0a, matchIdx + needle.length)
 907              const lineEnd = nextNl >= 0 ? nextNl + 1 : bytesRead
 908
 909              const absLineStart = tailStart + lineStart
 910              const afterLen = bytesRead - lineEnd
 911              // Truncate first, then re-append the trailing lines. In the
 912              // common case (target is the last entry) afterLen is 0 and
 913              // this is a single ftruncate.
 914              await fh.truncate(absLineStart)
 915              if (afterLen > 0) {
 916                await fh.write(tail, lineEnd, afterLen, absLineStart)
 917              }
 918              return
 919            }
 920          }
 921        } finally {
 922          await fh.close()
 923        }
 924
 925        // Slow path: target was not in the last 64KB. Rare - requires many
 926        // large entries to have landed between the write and the tombstone.
 927        if (fileSize > MAX_TOMBSTONE_REWRITE_BYTES) {
 928          logForDebugging(
 929            `Skipping tombstone removal: session file too large (${formatFileSize(fileSize)})`,
 930            { level: 'warn' },
 931          )
 932          return
 933        }
 934        const content = await readFile(this.sessionFile, { encoding: 'utf-8' })
 935        const lines = content.split('\n').filter((line: string) => {
 936          if (!line.trim()) return true
 937          try {
 938            const entry = jsonParse(line)
 939            return entry.uuid !== targetUuid
 940          } catch {
 941            return true // Keep malformed lines
 942          }
 943        })
 944        await writeFile(this.sessionFile, lines.join('\n'), {
 945          encoding: 'utf8',
 946        })
 947      } catch {
 948        // Silently ignore errors - the file might not exist yet
 949      }
 950    })
 951  }
 952
 953  /**
 954   * True when test env / cleanupPeriodDays=0 / --no-session-persistence /
 955   * CLAUDE_CODE_SKIP_PROMPT_HISTORY should suppress all transcript writes.
 956   * Shared guard for appendEntry and materializeSessionFile so both skip
 957   * consistently. The env var is set by tmuxSocket.ts so Tungsten-spawned
 958   * test sessions don't pollute the user's --resume list.
 959   */
 960  private shouldSkipPersistence(): boolean {
 961    const allowTestPersistence = isEnvTruthy(
 962      process.env.TEST_ENABLE_SESSION_PERSISTENCE,
 963    )
 964    return (
 965      (getNodeEnv() === 'test' && !allowTestPersistence) ||
 966      getSettings_DEPRECATED()?.cleanupPeriodDays === 0 ||
 967      isSessionPersistenceDisabled() ||
 968      isEnvTruthy(process.env.CLAUDE_CODE_SKIP_PROMPT_HISTORY)
 969    )
 970  }
 971
 972  /**
 973   * Create the session file, write cached startup metadata, and flush
 974   * buffered entries. Called on the first user/assistant message.
 975   */
 976  private async materializeSessionFile(): Promise<void> {
 977    // Guard here too — reAppendSessionMetadata writes via appendEntryToFile
 978    // (not appendEntry) so it would bypass the per-entry persistence check
 979    // and create a metadata-only file despite --no-session-persistence.
 980    if (this.shouldSkipPersistence()) return
 981    this.ensureCurrentSessionFile()
 982    // mode/agentSetting are cache-only pre-materialization; write them now.
 983    this.reAppendSessionMetadata()
 984    if (this.pendingEntries.length > 0) {
 985      const buffered = this.pendingEntries
 986      this.pendingEntries = []
 987      for (const entry of buffered) {
 988        await this.appendEntry(entry)
 989      }
 990    }
 991  }
 992
 993  async insertMessageChain(
 994    messages: Transcript,
 995    isSidechain: boolean = false,
 996    agentId?: string,
 997    startingParentUuid?: UUID | null,
 998    teamInfo?: { teamName?: string; agentName?: string },
 999  ) {
1000    return this.trackWrite(async () => {
1001      let parentUuid: UUID | null = startingParentUuid ?? null
1002
1003      // First user/assistant message materializes the session file.
1004      // Hook progress/attachment messages alone stay buffered.
1005      if (
1006        this.sessionFile === null &&
1007        messages.some(m => m.type === 'user' || m.type === 'assistant')
1008      ) {
1009        await this.materializeSessionFile()
1010      }
1011
1012      // Get current git branch once for this message chain
1013      let gitBranch: string | undefined
1014      try {
1015        gitBranch = await getBranch()
1016      } catch {
1017        // Not in a git repo or git command failed
1018        gitBranch = undefined
1019      }
1020
1021      // Get slug if one exists for this session (used for plan files, etc.)
1022      const sessionId = getSessionId()
1023      const slug = getPlanSlugCache().get(sessionId)
1024
1025      for (const message of messages) {
1026        const isCompactBoundary = isCompactBoundaryMessage(message)
1027
1028        // For tool_result messages, use the assistant message UUID from the message
1029        // if available (set at creation time), otherwise fall back to sequential parent
1030        let effectiveParentUuid = parentUuid
1031        if (
1032          message.type === 'user' &&
1033          'sourceToolAssistantUUID' in message &&
1034          message.sourceToolAssistantUUID
1035        ) {
1036          effectiveParentUuid = message.sourceToolAssistantUUID
1037        }
1038
1039        const transcriptMessage: TranscriptMessage = {
1040          parentUuid: isCompactBoundary ? null : effectiveParentUuid,
1041          logicalParentUuid: isCompactBoundary ? parentUuid : undefined,
1042          isSidechain,
1043          teamName: teamInfo?.teamName,
1044          agentName: teamInfo?.agentName,
1045          promptId:
1046            message.type === 'user' ? (getPromptId() ?? undefined) : undefined,
1047          agentId,
1048          ...message,
1049          // Session-stamp fields MUST come after the spread. On --fork-session
1050          // and --resume, messages arrive as SerializedMessage (carries source
1051          // sessionId/cwd/etc. because removeExtraFields only strips parentUuid
1052          // and isSidechain). If sessionId isn't re-stamped, FRESH.jsonl ends up
1053          // with messages stamped sessionId=A but content-replacement entries
1054          // stamped sessionId=FRESH (from insertContentReplacement), and
1055          // loadFullLog's sessionId-keyed contentReplacements lookup misses →
1056          // replacement records lost → FROZEN misclassification.
1057          userType: getUserType(),
1058          entrypoint: getEntrypoint(),
1059          cwd: getCwd(),
1060          sessionId,
1061          version: VERSION,
1062          gitBranch,
1063          slug,
1064        }
1065        await this.appendEntry(transcriptMessage)
1066        if (isChainParticipant(message)) {
1067          parentUuid = message.uuid
1068        }
1069      }
1070
1071      // Cache this turn's user prompt for reAppendSessionMetadata —
1072      // the --resume picker shows what the user was last doing.
1073      // Overwritten every turn by design.
1074      if (!isSidechain) {
1075        const text = getFirstMeaningfulUserMessageTextContent(messages)
1076        if (text) {
1077          const flat = text.replace(/\n/g, ' ').trim()
1078          this.currentSessionLastPrompt =
1079            flat.length > 200 ? flat.slice(0, 200).trim() + '…' : flat
1080        }
1081      }
1082    })
1083  }
1084
1085  async insertFileHistorySnapshot(
1086    messageId: UUID,
1087    snapshot: FileHistorySnapshot,
1088    isSnapshotUpdate: boolean,
1089  ) {
1090    return this.trackWrite(async () => {
1091      const fileHistoryMessage: FileHistorySnapshotMessage = {
1092        type: 'file-history-snapshot',
1093        messageId,
1094        snapshot,
1095        isSnapshotUpdate,
1096      }
1097      await this.appendEntry(fileHistoryMessage)
1098    })
1099  }
1100
1101  async insertQueueOperation(queueOp: QueueOperationMessage) {
1102    return this.trackWrite(async () => {
1103      await this.appendEntry(queueOp)
1104    })
1105  }
1106
1107  async insertAttributionSnapshot(snapshot: AttributionSnapshotMessage) {
1108    return this.trackWrite(async () => {
1109      await this.appendEntry(snapshot)
1110    })
1111  }
1112
1113  async insertContentReplacement(
1114    replacements: ContentReplacementRecord[],
1115    agentId?: AgentId,
1116  ) {
1117    return this.trackWrite(async () => {
1118      const entry: ContentReplacementEntry = {
1119        type: 'content-replacement',
1120        sessionId: getSessionId() as UUID,
1121        agentId,
1122        replacements,
1123      }
1124      await this.appendEntry(entry)
1125    })
1126  }
1127
1128  async appendEntry(entry: Entry, sessionId: UUID = getSessionId() as UUID) {
1129    if (this.shouldSkipPersistence()) {
1130      return
1131    }
1132
1133    const currentSessionId = getSessionId() as UUID
1134    const isCurrentSession = sessionId === currentSessionId
1135
1136    let sessionFile: string
1137    if (isCurrentSession) {
1138      // Buffer until materializeSessionFile runs (first user/assistant message).
1139      if (this.sessionFile === null) {
1140        this.pendingEntries.push(entry)
1141        return
1142      }
1143      sessionFile = this.sessionFile
1144    } else {
1145      const existing = await this.getExistingSessionFile(sessionId)
1146      if (!existing) {
1147        logError(
1148          new Error(
1149            `appendEntry: session file not found for other session ${sessionId}`,
1150          ),
1151        )
1152        return
1153      }
1154      sessionFile = existing
1155    }
1156
1157    // Only load current session messages if needed
1158    if (entry.type === 'summary') {
1159      // Summaries can always be appended
1160      void this.enqueueWrite(sessionFile, entry)
1161    } else if (entry.type === 'custom-title') {
1162      // Custom titles can always be appended
1163      void this.enqueueWrite(sessionFile, entry)
1164    } else if (entry.type === 'ai-title') {
1165      // AI titles can always be appended
1166      void this.enqueueWrite(sessionFile, entry)
1167    } else if (entry.type === 'last-prompt') {
1168      void this.enqueueWrite(sessionFile, entry)
1169    } else if (entry.type === 'task-summary') {
1170      void this.enqueueWrite(sessionFile, entry)
1171    } else if (entry.type === 'tag') {
1172      // Tags can always be appended
1173      void this.enqueueWrite(sessionFile, entry)
1174    } else if (entry.type === 'agent-name') {
1175      // Agent names can always be appended
1176      void this.enqueueWrite(sessionFile, entry)
1177    } else if (entry.type === 'agent-color') {
1178      // Agent colors can always be appended
1179      void this.enqueueWrite(sessionFile, entry)
1180    } else if (entry.type === 'agent-setting') {
1181      // Agent settings can always be appended
1182      void this.enqueueWrite(sessionFile, entry)
1183    } else if (entry.type === 'pr-link') {
1184      // PR links can always be appended
1185      void this.enqueueWrite(sessionFile, entry)
1186    } else if (entry.type === 'file-history-snapshot') {
1187      // File history snapshots can always be appended
1188      void this.enqueueWrite(sessionFile, entry)
1189    } else if (entry.type === 'attribution-snapshot') {
1190      // Attribution snapshots can always be appended
1191      void this.enqueueWrite(sessionFile, entry)
1192    } else if (entry.type === 'speculation-accept') {
1193      // Speculation accept entries can always be appended
1194      void this.enqueueWrite(sessionFile, entry)
1195    } else if (entry.type === 'mode') {
1196      // Mode entries can always be appended
1197      void this.enqueueWrite(sessionFile, entry)
1198    } else if (entry.type === 'worktree-state') {
1199      void this.enqueueWrite(sessionFile, entry)
1200    } else if (entry.type === 'content-replacement') {
1201      // Content replacement records can always be appended. Subagent records
1202      // go to the sidechain file (for AgentTool resume); main-thread
1203      // records go to the session file (for /resume).
1204      const targetFile = entry.agentId
1205        ? getAgentTranscriptPath(entry.agentId)
1206        : sessionFile
1207      void this.enqueueWrite(targetFile, entry)
1208    } else if (entry.type === 'marble-origami-commit') {
1209      // Always append. Commit order matters for restore (later commits may
1210      // reference earlier commits' summary messages), so these must be
1211      // written in the order received and read back sequentially.
1212      void this.enqueueWrite(sessionFile, entry)
1213    } else if (entry.type === 'marble-origami-snapshot') {
1214      // Always append. Last-wins on restore — later entries supersede.
1215      void this.enqueueWrite(sessionFile, entry)
1216    } else {
1217      const messageSet = await getSessionMessages(sessionId)
1218      if (entry.type === 'queue-operation') {
1219        // Queue operations are always appended to the session file
1220        void this.enqueueWrite(sessionFile, entry)
1221      } else {
1222        // At this point, entry must be a TranscriptMessage (user/assistant/attachment/system)
1223        // All other entry types have been handled above
1224        const isAgentSidechain =
1225          entry.isSidechain && entry.agentId !== undefined
1226        const targetFile = isAgentSidechain
1227          ? getAgentTranscriptPath(asAgentId(entry.agentId!))
1228          : sessionFile
1229
1230        // For message entries, check if UUID already exists in current session.
1231        // Skip dedup for agent sidechain LOCAL writes — they go to a separate
1232        // file, and fork-inherited parent messages share UUIDs with the main
1233        // session transcript. Deduping against the main session's set would
1234        // drop them, leaving the persisted sidechain transcript incomplete
1235        // (resume-of-fork loads a 10KB file instead of the full 85KB inherited
1236        // context).
1237        //
1238        // The sidechain bypass applies ONLY to the local file write — remote
1239        // persistence (session-ingress) uses a single Last-Uuid chain per
1240        // sessionId, so re-POSTing a UUID it already has 409s and eventually
1241        // exhausts retries → gracefulShutdownSync(1). See inc-4718.
1242        const isNewUuid = !messageSet.has(entry.uuid)
1243        if (isAgentSidechain || isNewUuid) {
1244          // Enqueue write — appendToFile handles ENOENT by creating directories
1245          void this.enqueueWrite(targetFile, entry)
1246
1247          if (!isAgentSidechain) {
1248            // messageSet is main-file-authoritative. Sidechain entries go to a
1249            // separate agent file — adding their UUIDs here causes recordTranscript
1250            // to skip them on the main thread (line ~1270), so the message is never
1251            // written to the main session file. The next main-thread message then
1252            // chains its parentUuid to a UUID that only exists in the agent file,
1253            // and --resume's buildConversationChain terminates at the dangling ref.
1254            // Same constraint for remote (inc-4718 above): sidechain persisting a
1255            // UUID the main thread hasn't written yet → 409 when main writes it.
1256            messageSet.add(entry.uuid)
1257
1258            if (isTranscriptMessage(entry)) {
1259              await this.persistToRemote(sessionId, entry)
1260            }
1261          }
1262        }
1263      }
1264    }
1265  }
1266
1267  /**
1268   * Loads the sessionFile variable.
1269   * Do not need to create session files until they are written to.
1270   */
1271  private ensureCurrentSessionFile(): string {
1272    if (this.sessionFile === null) {
1273      this.sessionFile = getTranscriptPath()
1274    }
1275
1276    return this.sessionFile
1277  }
1278
1279  /**
1280   * Returns the session file path if it exists, null otherwise.
1281   * Used for writing to sessions other than the current one.
1282   * Caches positive results so we only stat once per session.
1283   */
1284  private existingSessionFiles = new Map<string, string>()
1285  private async getExistingSessionFile(
1286    sessionId: UUID,
1287  ): Promise<string | null> {
1288    const cached = this.existingSessionFiles.get(sessionId)
1289    if (cached) return cached
1290
1291    const targetFile = getTranscriptPathForSession(sessionId)
1292    try {
1293      await stat(targetFile)
1294      this.existingSessionFiles.set(sessionId, targetFile)
1295      return targetFile
1296    } catch (e) {
1297      if (isFsInaccessible(e)) return null
1298      throw e
1299    }
1300  }
1301
1302  private async persistToRemote(sessionId: UUID, entry: TranscriptMessage) {
1303    if (isShuttingDown()) {
1304      return
1305    }
1306
1307    // CCR v2 path: write as internal worker event
1308    if (this.internalEventWriter) {
1309      try {
1310        await this.internalEventWriter(
1311          'transcript',
1312          entry as unknown as Record<string, unknown>,
1313          {
1314            ...(isCompactBoundaryMessage(entry) && { isCompaction: true }),
1315            ...(entry.agentId && { agentId: entry.agentId }),
1316          },
1317        )
1318      } catch {
1319        logEvent('tengu_session_persistence_failed', {})
1320        logForDebugging('Failed to write transcript as internal event')
1321      }
1322      return
1323    }
1324
1325    // v1 Session Ingress path
1326    if (
1327      !isEnvTruthy(process.env.ENABLE_SESSION_PERSISTENCE) ||
1328      !this.remoteIngressUrl
1329    ) {
1330      return
1331    }
1332
1333    const success = await sessionIngress.appendSessionLog(
1334      sessionId,
1335      entry,
1336      this.remoteIngressUrl,
1337    )
1338
1339    if (!success) {
1340      logEvent('tengu_session_persistence_failed', {})
1341      gracefulShutdownSync(1, 'other')
1342    }
1343  }
1344
1345  setRemoteIngressUrl(url: string): void {
1346    this.remoteIngressUrl = url
1347    logForDebugging(`Remote persistence enabled with URL: ${url}`)
1348    if (url) {
1349      // If using CCR, don't delay messages by any more than 10ms.
1350      this.FLUSH_INTERVAL_MS = REMOTE_FLUSH_INTERVAL_MS
1351    }
1352  }
1353
1354  setInternalEventWriter(writer: InternalEventWriter): void {
1355    this.internalEventWriter = writer
1356    logForDebugging(
1357      'CCR v2 internal event writer registered for transcript persistence',
1358    )
1359    // Use fast flush interval for CCR v2
1360    this.FLUSH_INTERVAL_MS = REMOTE_FLUSH_INTERVAL_MS
1361  }
1362
1363  setInternalEventReader(reader: InternalEventReader): void {
1364    this.internalEventReader = reader
1365    logForDebugging(
1366      'CCR v2 internal event reader registered for session resume',
1367    )
1368  }
1369
1370  setInternalSubagentEventReader(reader: InternalEventReader): void {
1371    this.internalSubagentEventReader = reader
1372    logForDebugging(
1373      'CCR v2 subagent event reader registered for session resume',
1374    )
1375  }
1376
1377  getInternalEventReader(): InternalEventReader | null {
1378    return this.internalEventReader
1379  }
1380
1381  getInternalSubagentEventReader(): InternalEventReader | null {
1382    return this.internalSubagentEventReader
1383  }
1384}
1385
1386export type TeamInfo = {
1387  teamName?: string
1388  agentName?: string
1389}
1390
1391// Filter out already-recorded messages before passing to insertMessageChain.
1392// Without this, after compaction messagesToKeep (same UUIDs as pre-compact
1393// messages) are dedup-skipped by appendEntry but still advance the parentUuid
1394// cursor in insertMessageChain, causing new messages to chain from pre-compact
1395// UUIDs instead of the post-compact summary — orphaning the compact boundary.
1396//
1397// `startingParentUuidHint`: used by useLogMessages to pass the parent from
1398// the previous incremental slice, avoiding an O(n) scan to rediscover it.
1399//
1400// Skip-tracking: already-recorded messages are tracked as the parent ONLY if
1401// they form a PREFIX (appear before any new message). This handles both cases:
1402//  - Growing-array callers (QueryEngine, queryHelpers, LocalMainSessionTask,
1403//    trajectory): recorded messages are always a prefix → tracked → correct
1404//    parent chain for new messages.
1405//  - Compaction (useLogMessages): new CB/summary appear FIRST, then recorded
1406//    messagesToKeep → not a prefix → not tracked → CB gets parentUuid=null
1407//    (correct: truncates --continue chain at compact boundary).
1408export async function recordTranscript(
1409  messages: Message[],
1410  teamInfo?: TeamInfo,
1411  startingParentUuidHint?: UUID,
1412  allMessages?: readonly Message[],
1413): Promise<UUID | null> {
1414  const cleanedMessages = cleanMessagesForLogging(messages, allMessages)
1415  const sessionId = getSessionId() as UUID
1416  const messageSet = await getSessionMessages(sessionId)
1417  const newMessages: typeof cleanedMessages = []
1418  let startingParentUuid: UUID | undefined = startingParentUuidHint
1419  let seenNewMessage = false
1420  for (const m of cleanedMessages) {
1421    if (messageSet.has(m.uuid as UUID)) {
1422      // Only track skipped messages that form a prefix. After compaction,
1423      // messagesToKeep appear AFTER new CB/summary, so this skips them.
1424      if (!seenNewMessage && isChainParticipant(m)) {
1425        startingParentUuid = m.uuid as UUID
1426      }
1427    } else {
1428      newMessages.push(m)
1429      seenNewMessage = true
1430    }
1431  }
1432  if (newMessages.length > 0) {
1433    await getProject().insertMessageChain(
1434      newMessages,
1435      false,
1436      undefined,
1437      startingParentUuid,
1438      teamInfo,
1439    )
1440  }
1441  // Return the last ACTUALLY recorded chain-participant's UUID, OR the
1442  // prefix-tracked UUID if no new chain participants were recorded. This lets
1443  // callers (useLogMessages) maintain the correct parent chain even when the
1444  // slice is all-recorded (rewind, /resume scenarios where every message is
1445  // already in messageSet). Progress is skipped — it's written to the JSONL
1446  // but nothing chains TO it (see isChainParticipant).
1447  const lastRecorded = newMessages.findLast(isChainParticipant)
1448  return (lastRecorded?.uuid as UUID | undefined) ?? startingParentUuid ?? null
1449}
1450
1451export async function recordSidechainTranscript(
1452  messages: Message[],
1453  agentId?: string,
1454  startingParentUuid?: UUID | null,
1455) {
1456  await getProject().insertMessageChain(
1457    cleanMessagesForLogging(messages),
1458    true,
1459    agentId,
1460    startingParentUuid,
1461  )
1462}
1463
1464export async function recordQueueOperation(queueOp: QueueOperationMessage) {
1465  await getProject().insertQueueOperation(queueOp)
1466}
1467
1468/**
1469 * Remove a message from the transcript by UUID.
1470 * Used when a tombstone is received for an orphaned message.
1471 */
1472export async function removeTranscriptMessage(targetUuid: UUID): Promise<void> {
1473  await getProject().removeMessageByUuid(targetUuid)
1474}
1475
1476export async function recordFileHistorySnapshot(
1477  messageId: UUID,
1478  snapshot: FileHistorySnapshot,
1479  isSnapshotUpdate: boolean,
1480) {
1481  await getProject().insertFileHistorySnapshot(
1482    messageId,
1483    snapshot,
1484    isSnapshotUpdate,
1485  )
1486}
1487
1488export async function recordAttributionSnapshot(
1489  snapshot: AttributionSnapshotMessage,
1490) {
1491  await getProject().insertAttributionSnapshot(snapshot)
1492}
1493
1494export async function recordContentReplacement(
1495  replacements: ContentReplacementRecord[],
1496  agentId?: AgentId,
1497) {
1498  await getProject().insertContentReplacement(replacements, agentId)
1499}
1500
1501/**
1502 * Reset the session file pointer after switchSession/regenerateSessionId.
1503 * The new file is created lazily on the first user/assistant message.
1504 */
1505export async function resetSessionFilePointer() {
1506  getProject().resetSessionFile()
1507}
1508
1509/**
1510 * Adopt the existing session file after --continue/--resume (non-fork).
1511 * Call after switchSession + resetSessionFilePointer + restoreSessionMetadata:
1512 * getTranscriptPath() now derives the resumed file's path from the switched
1513 * sessionId, and the cache holds the final metadata (--name title, resumed
1514 * mode/tag/agent).
1515 *
1516 * Setting sessionFile here — instead of waiting for materializeSessionFile
1517 * on the first user message — lets the exit cleanup handler's
1518 * reAppendSessionMetadata run (it bails when sessionFile is null). Without
1519 * this, `-c -n foo` + quit-before-message drops the title on the floor:
1520 * the in-memory cache is correct but never written. The resumed file
1521 * already exists on disk (we loaded from it), so this can't create an
1522 * orphan the way a fresh --name session would.
1523 *
1524 * skipTitleRefresh: restoreSessionMetadata populated the cache from the
1525 * same disk read microseconds ago, so refreshing from the tail here is a
1526 * no-op — unless --name was used, in which case it would clobber the fresh
1527 * CLI title with the stale disk value. After this write, disk == cache and
1528 * later calls (compaction, exit cleanup) absorb SDK writes normally.
1529 */
1530export function adoptResumedSessionFile(): void {
1531  const project = getProject()
1532  project.sessionFile = getTranscriptPath()
1533  project.reAppendSessionMetadata(true)
1534}
1535
1536/**
1537 * Append a context-collapse commit entry to the transcript. One entry per
1538 * commit, in commit order. On resume these are collected into an ordered
1539 * array and handed to restoreFromEntries() which rebuilds the commit log.
1540 */
1541export async function recordContextCollapseCommit(commit: {
1542  collapseId: string
1543  summaryUuid: string
1544  summaryContent: string
1545  summary: string
1546  firstArchivedUuid: string
1547  lastArchivedUuid: string
1548}): Promise<void> {
1549  const sessionId = getSessionId() as UUID
1550  if (!sessionId) return
1551  await getProject().appendEntry({
1552    type: 'marble-origami-commit',
1553    sessionId,
1554    ...commit,
1555  })
1556}
1557
1558/**
1559 * Snapshot the staged queue + spawn state. Written after each ctx-agent
1560 * spawn resolves (when staged contents may have changed). Last-wins on
1561 * restore — the loader keeps only the most recent snapshot entry.
1562 */
1563export async function recordContextCollapseSnapshot(snapshot: {
1564  staged: Array<{
1565    startUuid: string
1566    endUuid: string
1567    summary: string
1568    risk: number
1569    stagedAt: number
1570  }>
1571  armed: boolean
1572  lastSpawnTokens: number
1573}): Promise<void> {
1574  const sessionId = getSessionId() as UUID
1575  if (!sessionId) return
1576  await getProject().appendEntry({
1577    type: 'marble-origami-snapshot',
1578    sessionId,
1579    ...snapshot,
1580  })
1581}
1582
1583export async function flushSessionStorage(): Promise<void> {
1584  await getProject().flush()
1585}
1586
1587export async function hydrateRemoteSession(
1588  sessionId: string,
1589  ingressUrl: string,
1590): Promise<boolean> {
1591  switchSession(asSessionId(sessionId))
1592
1593  const project = getProject()
1594
1595  try {
1596    const remoteLogs =
1597      (await sessionIngress.getSessionLogs(sessionId, ingressUrl)) || []
1598
1599    // Ensure the project directory and session file exist
1600    const projectDir = getProjectDir(getOriginalCwd())
1601    await mkdir(projectDir, { recursive: true, mode: 0o700 })
1602
1603    const sessionFile = getTranscriptPathForSession(sessionId)
1604
1605    // Replace local logs with remote logs. writeFile truncates, so no
1606    // unlink is needed; an empty remoteLogs array produces an empty file.
1607    const content = remoteLogs.map(e => jsonStringify(e) + '\n').join('')
1608    await writeFile(sessionFile, content, { encoding: 'utf8', mode: 0o600 })
1609
1610    logForDebugging(`Hydrated ${remoteLogs.length} entries from remote`)
1611    return remoteLogs.length > 0
1612  } catch (error) {
1613    logForDebugging(`Error hydrating session from remote: ${error}`)
1614    logForDiagnosticsNoPII('error', 'hydrate_remote_session_fail')
1615    return false
1616  } finally {
1617    // Set remote ingress URL after hydrating the remote session
1618    // to ensure we've always synced with the remote session
1619    // prior to enabling persistence
1620    project.setRemoteIngressUrl(ingressUrl)
1621  }
1622}
1623
1624/**
1625 * Hydrate session state from CCR v2 internal events.
1626 * Fetches foreground and subagent events via the registered readers,
1627 * extracts transcript entries from payloads, and writes them to the
1628 * local transcript files (main + per-agent).
1629 * The server handles compaction filtering — it returns events starting
1630 * from the latest compaction boundary.
1631 */
1632export async function hydrateFromCCRv2InternalEvents(
1633  sessionId: string,
1634): Promise<boolean> {
1635  const startMs = Date.now()
1636  switchSession(asSessionId(sessionId))
1637
1638  const project = getProject()
1639  const reader = project.getInternalEventReader()
1640  if (!reader) {
1641    logForDebugging('No internal event reader registered for CCR v2 resume')
1642    return false
1643  }
1644
1645  try {
1646    // Fetch foreground events
1647    const events = await reader()
1648    if (!events) {
1649      logForDebugging('Failed to read internal events for resume')
1650      logForDiagnosticsNoPII('error', 'hydrate_ccr_v2_read_fail')
1651      return false
1652    }
1653
1654    const projectDir = getProjectDir(getOriginalCwd())
1655    await mkdir(projectDir, { recursive: true, mode: 0o700 })
1656
1657    // Write foreground transcript
1658    const sessionFile = getTranscriptPathForSession(sessionId)
1659    const fgContent = events.map(e => jsonStringify(e.payload) + '\n').join('')
1660    await writeFile(sessionFile, fgContent, { encoding: 'utf8', mode: 0o600 })
1661
1662    logForDebugging(
1663      `Hydrated ${events.length} foreground entries from CCR v2 internal events`,
1664    )
1665
1666    // Fetch and write subagent events
1667    let subagentEventCount = 0
1668    const subagentReader = project.getInternalSubagentEventReader()
1669    if (subagentReader) {
1670      const subagentEvents = await subagentReader()
1671      if (subagentEvents && subagentEvents.length > 0) {
1672        subagentEventCount = subagentEvents.length
1673        // Group by agent_id
1674        const byAgent = new Map<string, Record<string, unknown>[]>()
1675        for (const e of subagentEvents) {
1676          const agentId = e.agent_id || ''
1677          if (!agentId) continue
1678          let list = byAgent.get(agentId)
1679          if (!list) {
1680            list = []
1681            byAgent.set(agentId, list)
1682          }
1683          list.push(e.payload)
1684        }
1685
1686        // Write each agent's transcript to its own file
1687        for (const [agentId, entries] of byAgent) {
1688          const agentFile = getAgentTranscriptPath(asAgentId(agentId))
1689          await mkdir(dirname(agentFile), { recursive: true, mode: 0o700 })
1690          const agentContent = entries
1691            .map(p => jsonStringify(p) + '\n')
1692            .join('')
1693          await writeFile(agentFile, agentContent, {
1694            encoding: 'utf8',
1695            mode: 0o600,
1696          })
1697        }
1698
1699        logForDebugging(
1700          `Hydrated ${subagentEvents.length} subagent entries across ${byAgent.size} agents`,
1701        )
1702      }
1703    }
1704
1705    logForDiagnosticsNoPII('info', 'hydrate_ccr_v2_completed', {
1706      duration_ms: Date.now() - startMs,
1707      event_count: events.length,
1708      subagent_event_count: subagentEventCount,
1709    })
1710    return events.length > 0
1711  } catch (error) {
1712    // Re-throw epoch mismatch so the worker doesn't race against gracefulShutdown
1713    if (
1714      error instanceof Error &&
1715      error.message === 'CCRClient: Epoch mismatch (409)'
1716    ) {
1717      throw error
1718    }
1719    logForDebugging(`Error hydrating session from CCR v2: ${error}`)
1720    logForDiagnosticsNoPII('error', 'hydrate_ccr_v2_fail')
1721    return false
1722  }
1723}
1724
1725function extractFirstPrompt(transcript: TranscriptMessage[]): string {
1726  const textContent = getFirstMeaningfulUserMessageTextContent(transcript)
1727  if (textContent) {
1728    let result = textContent.replace(/\n/g, ' ').trim()
1729
1730    // Store a reasonably long version for display-time truncation
1731    // The actual truncation will be applied at display time based on terminal width
1732    if (result.length > 200) {
1733      result = result.slice(0, 200).trim() + '…'
1734    }
1735
1736    return result
1737  }
1738
1739  return 'No prompt'
1740}
1741
1742/**
1743 * Gets the last user message that was processed (i.e., before any non-user message appears).
1744 * Used to determine if a session has valid user interaction.
1745 */
1746export function getFirstMeaningfulUserMessageTextContent<T extends Message>(
1747  transcript: T[],
1748): string | undefined {
1749  for (const msg of transcript) {
1750    if (msg.type !== 'user' || msg.isMeta) continue
1751    // Skip compact summary messages - they should not be treated as the first prompt
1752    if ('isCompactSummary' in msg && msg.isCompactSummary) continue
1753
1754    const content = msg.message?.content
1755    if (!content) continue
1756
1757    // Collect all text values. For array content (common in VS Code where
1758    // IDE metadata tags come before the user's actual prompt), iterate all
1759    // text blocks so we don't miss the real prompt hidden behind
1760    // <ide_selection>/<ide_opened_file> blocks.
1761    const texts: string[] = []
1762    if (typeof content === 'string') {
1763      texts.push(content)
1764    } else if (Array.isArray(content)) {
1765      for (const block of content) {
1766        if (block.type === 'text' && block.text) {
1767          texts.push(block.text)
1768        }
1769      }
1770    }
1771
1772    for (const textContent of texts) {
1773      if (!textContent) continue
1774
1775      const commandNameTag = extractTag(textContent, COMMAND_NAME_TAG)
1776      if (commandNameTag) {
1777        const commandName = commandNameTag.replace(/^\//, '')
1778
1779        // If it's a built-in command, then it's unlikely to provide
1780        // meaningful context (e.g. `/model sonnet`)
1781        if (builtInCommandNames().has(commandName)) {
1782          continue
1783        } else {
1784          // Otherwise, for custom commands, then keep it only if it has
1785          // arguments (e.g. `/review reticulate splines`)
1786          const commandArgs = extractTag(textContent, 'command-args')?.trim()
1787          if (!commandArgs) {
1788            continue
1789          }
1790          // Return clean formatted command instead of raw XML
1791          return `${commandNameTag} ${commandArgs}`
1792        }
1793      }
1794
1795      // Format bash input with ! prefix (as user typed it). Checked before
1796      // the generic XML skip so bash-mode sessions get a meaningful title.
1797      const bashInput = extractTag(textContent, 'bash-input')
1798      if (bashInput) {
1799        return `! ${bashInput}`
1800      }
1801
1802      // Skip non-meaningful messages (local command output, hook output,
1803      // autonomous tick prompts, task notifications, pure IDE metadata tags)
1804      if (SKIP_FIRST_PROMPT_PATTERN.test(textContent)) {
1805        continue
1806      }
1807
1808      return textContent
1809    }
1810  }
1811  return undefined
1812}
1813
1814export function removeExtraFields(
1815  transcript: TranscriptMessage[],
1816): SerializedMessage[] {
1817  return transcript.map(m => {
1818    const { isSidechain, parentUuid, ...serializedMessage } = m
1819    return serializedMessage
1820  })
1821}
1822
1823/**
1824 * Splice the preserved segment back into the chain after compaction.
1825 *
1826 * Preserved messages exist in the JSONL with their ORIGINAL pre-compact
1827 * parentUuids (recordTranscript dedup-skipped them — can't rewrite).
1828 * The internal chain (keep[i+1]→keep[i]) is intact; only endpoints need
1829 * patching: head→anchor, and anchor's other children→tail. Anchor is the
1830 * last summary for suffix-preserving, boundary itself for prefix-preserving.
1831 *
1832 * Only the LAST seg-boundary is relinked — earlier segs were summarized
1833 * into it. Everything physically before the absolute-last boundary (except
1834 * preservedUuids) is deleted, which handles all multi-boundary shapes
1835 * without special-casing.
1836 *
1837 * Mutates the Map in place.
1838 */
1839function applyPreservedSegmentRelinks(
1840  messages: Map<UUID, TranscriptMessage>,
1841): void {
1842  type Seg = NonNullable<
1843    SystemCompactBoundaryMessage['compactMetadata']['preservedSegment']
1844  >
1845
1846  // Find the absolute-last boundary and the last seg-boundary (can differ:
1847  // manual /compact after reactive compact → seg is stale).
1848  let lastSeg: Seg | undefined
1849  let lastSegBoundaryIdx = -1
1850  let absoluteLastBoundaryIdx = -1
1851  const entryIndex = new Map<UUID, number>()
1852  let i = 0
1853  for (const entry of messages.values()) {
1854    entryIndex.set(entry.uuid, i)
1855    if (isCompactBoundaryMessage(entry)) {
1856      absoluteLastBoundaryIdx = i
1857      const seg = entry.compactMetadata?.preservedSegment
1858      if (seg) {
1859        lastSeg = seg
1860        lastSegBoundaryIdx = i
1861      }
1862    }
1863    i++
1864  }
1865  // No seg anywhere → no-op. findUnresolvedToolUse etc. read the full map.
1866  if (!lastSeg) return
1867
1868  // Seg stale (no-seg boundary came after): skip relink, still prune at
1869  // absolute — otherwise the stale preserved chain becomes a phantom leaf.
1870  const segIsLive = lastSegBoundaryIdx === absoluteLastBoundaryIdx
1871
1872  // Validate tail→head BEFORE mutating so malformed metadata is a true
1873  // no-op (walk stops at headUuid, doesn't need the relink to run first).
1874  const preservedUuids = new Set<UUID>()
1875  if (segIsLive) {
1876    const walkSeen = new Set<UUID>()
1877    let cur = messages.get(lastSeg.tailUuid)
1878    let reachedHead = false
1879    while (cur && !walkSeen.has(cur.uuid)) {
1880      walkSeen.add(cur.uuid)
1881      preservedUuids.add(cur.uuid)
1882      if (cur.uuid === lastSeg.headUuid) {
1883        reachedHead = true
1884        break
1885      }
1886      cur = cur.parentUuid ? messages.get(cur.parentUuid) : undefined
1887    }
1888    if (!reachedHead) {
1889      // tail→head walk broke — a UUID in the preserved segment isn't in the
1890      // transcript. Returning here skips the prune below, so resume loads
1891      // the full pre-compact history. Known cause: mid-turn-yielded
1892      // attachment pushed to mutableMessages but never recordTranscript'd
1893      // (SDK subprocess restarted before next turn's qe:420 flush).
1894      logEvent('tengu_relink_walk_broken', {
1895        tailInTranscript: messages.has(lastSeg.tailUuid),
1896        headInTranscript: messages.has(lastSeg.headUuid),
1897        anchorInTranscript: messages.has(lastSeg.anchorUuid),
1898        walkSteps: walkSeen.size,
1899        transcriptSize: messages.size,
1900      })
1901      return
1902    }
1903  }
1904
1905  if (segIsLive) {
1906    const head = messages.get(lastSeg.headUuid)
1907    if (head) {
1908      messages.set(lastSeg.headUuid, {
1909        ...head,
1910        parentUuid: lastSeg.anchorUuid,
1911      })
1912    }
1913    // Tail-splice: anchor's other children → tail. No-op if already pointing
1914    // at tail (the useLogMessages race case).
1915    for (const [uuid, msg] of messages) {
1916      if (msg.parentUuid === lastSeg.anchorUuid && uuid !== lastSeg.headUuid) {
1917        messages.set(uuid, { ...msg, parentUuid: lastSeg.tailUuid })
1918      }
1919    }
1920    // Zero stale usage: on-disk input_tokens reflect pre-compact context
1921    // (~190K) — stripStaleUsage only patched in-memory copies that were
1922    // dedup-skipped. Without this, resume → immediate autocompact spiral.
1923    for (const uuid of preservedUuids) {
1924      const msg = messages.get(uuid)
1925      if (msg?.type !== 'assistant') continue
1926      messages.set(uuid, {
1927        ...msg,
1928        message: {
1929          ...msg.message,
1930          usage: {
1931            ...msg.message.usage,
1932            input_tokens: 0,
1933            output_tokens: 0,
1934            cache_creation_input_tokens: 0,
1935            cache_read_input_tokens: 0,
1936          },
1937        },
1938      })
1939    }
1940  }
1941
1942  // Prune everything physically before the absolute-last boundary that
1943  // isn't preserved. preservedUuids empty when !segIsLive → full prune.
1944  const toDelete: UUID[] = []
1945  for (const [uuid] of messages) {
1946    const idx = entryIndex.get(uuid)
1947    if (
1948      idx !== undefined &&
1949      idx < absoluteLastBoundaryIdx &&
1950      !preservedUuids.has(uuid)
1951    ) {
1952      toDelete.push(uuid)
1953    }
1954  }
1955  for (const uuid of toDelete) messages.delete(uuid)
1956}
1957
1958/**
1959 * Delete messages that Snip executions removed from the in-memory array,
1960 * and relink parentUuid across the gaps.
1961 *
1962 * Unlike compact_boundary which truncates a prefix, snip removes
1963 * middle ranges. The JSONL is append-only, so removed messages stay on disk
1964 * and the surviving messages' parentUuid chains walk through them. Without
1965 * this filter, buildConversationChain reconstructs the full unsnipped history
1966 * and resume immediately PTLs (adamr-20260320-165831: 397K displayed → 1.65M
1967 * actual).
1968 *
1969 * Deleting alone is not enough: the surviving message AFTER a removed range
1970 * has parentUuid pointing INTO the gap. buildConversationChain would hit
1971 * messages.get(undefined) and stop, orphaning everything before the gap. So
1972 * after delete we relink: for each survivor with a dangling parentUuid, walk
1973 * backward through the removed region's own parent links to the first
1974 * non-removed ancestor.
1975 *
1976 * The boundary records removedUuids at execution time so we can replay the
1977 * exact removal on load. Older boundaries without removedUuids are skipped —
1978 * resume loads their pre-snip history (the pre-fix behavior).
1979 *
1980 * Mutates the Map in place.
1981 */
1982function applySnipRemovals(messages: Map<UUID, TranscriptMessage>): void {
1983  // Structural check — snipMetadata only exists on the boundary subtype.
1984  // Avoids the subtype literal which is in excluded-strings.txt
1985  // (HISTORY_SNIP is ant-only; the literal must not leak into external builds).
1986  type WithSnipMeta = { snipMetadata?: { removedUuids?: UUID[] } }
1987  const toDelete = new Set<UUID>()
1988  for (const entry of messages.values()) {
1989    const removedUuids = (entry as WithSnipMeta).snipMetadata?.removedUuids
1990    if (!removedUuids) continue
1991    for (const uuid of removedUuids) toDelete.add(uuid)
1992  }
1993  if (toDelete.size === 0) return
1994
1995  // Capture each to-delete entry's own parentUuid BEFORE deleting so we can
1996  // walk backward through contiguous removed ranges. Entries not in the Map
1997  // (already absent, e.g. from a prior compact_boundary prune) contribute no
1998  // link; the relink walk will stop at the gap and pick up null (chain-root
1999  // behavior — same as if compact truncated there, which it did).
2000  const deletedParent = new Map<UUID, UUID | null>()
2001  let removedCount = 0
2002  for (const uuid of toDelete) {
2003    const entry = messages.get(uuid)
2004    if (!entry) continue
2005    deletedParent.set(uuid, entry.parentUuid)
2006    messages.delete(uuid)
2007    removedCount++
2008  }
2009
2010  // Relink survivors with dangling parentUuid. Walk backward through
2011  // deletedParent until we hit a UUID not in toDelete (or null). Path
2012  // compression: after resolving, seed the map with the resolved link so
2013  // subsequent survivors sharing the same chain segment don't re-walk.
2014  const resolve = (start: UUID): UUID | null => {
2015    const path: UUID[] = []
2016    let cur: UUID | null | undefined = start
2017    while (cur && toDelete.has(cur)) {
2018      path.push(cur)
2019      cur = deletedParent.get(cur)
2020      if (cur === undefined) {
2021        cur = null
2022        break
2023      }
2024    }
2025    for (const p of path) deletedParent.set(p, cur)
2026    return cur
2027  }
2028  let relinkedCount = 0
2029  for (const [uuid, msg] of messages) {
2030    if (!msg.parentUuid || !toDelete.has(msg.parentUuid)) continue
2031    messages.set(uuid, { ...msg, parentUuid: resolve(msg.parentUuid) })
2032    relinkedCount++
2033  }
2034
2035  logEvent('tengu_snip_resume_filtered', {
2036    removed_count: removedCount,
2037    relinked_count: relinkedCount,
2038  })
2039}
2040
2041/**
2042 * O(n) single-pass: find the message with the latest timestamp matching a predicate.
2043 * Replaces the `[...values].filter(pred).sort((a,b) => Date(b)-Date(a))[0]` pattern
2044 * which is O(n log n) + 2n Date allocations.
2045 */
2046function findLatestMessage<T extends { timestamp: string }>(
2047  messages: Iterable<T>,
2048  predicate: (m: T) => boolean,
2049): T | undefined {
2050  let latest: T | undefined
2051  let maxTime = -Infinity
2052  for (const m of messages) {
2053    if (!predicate(m)) continue
2054    const t = Date.parse(m.timestamp)
2055    if (t > maxTime) {
2056      maxTime = t
2057      latest = m
2058    }
2059  }
2060  return latest
2061}
2062
2063/**
2064 * Builds a conversation chain from a leaf message to root
2065 * @param messages Map of all messages
2066 * @param leafMessage The leaf message to start from
2067 * @returns Array of messages from root to leaf
2068 */
2069export function buildConversationChain(
2070  messages: Map<UUID, TranscriptMessage>,
2071  leafMessage: TranscriptMessage,
2072): TranscriptMessage[] {
2073  const transcript: TranscriptMessage[] = []
2074  const seen = new Set<UUID>()
2075  let currentMsg: TranscriptMessage | undefined = leafMessage
2076  while (currentMsg) {
2077    if (seen.has(currentMsg.uuid)) {
2078      logError(
2079        new Error(
2080          `Cycle detected in parentUuid chain at message ${currentMsg.uuid}. Returning partial transcript.`,
2081        ),
2082      )
2083      logEvent('tengu_chain_parent_cycle', {})
2084      break
2085    }
2086    seen.add(currentMsg.uuid)
2087    transcript.push(currentMsg)
2088    currentMsg = currentMsg.parentUuid
2089      ? messages.get(currentMsg.parentUuid)
2090      : undefined
2091  }
2092  transcript.reverse()
2093  return recoverOrphanedParallelToolResults(messages, transcript, seen)
2094}
2095
2096/**
2097 * Post-pass for buildConversationChain: recover sibling assistant blocks and
2098 * tool_results that the single-parent walk orphaned.
2099 *
2100 * Streaming (claude.ts:~2024) emits one AssistantMessage per content_block_stop
2101 * — N parallel tool_uses → N messages, distinct uuid, same message.id. Each
2102 * tool_result's sourceToolAssistantUUID points to its own one-block assistant,
2103 * so insertMessageChain's override (line ~894) writes each TR's parentUuid to a
2104 * DIFFERENT assistant. The topology is a DAG; the walk above is a linked-list
2105 * traversal and keeps only one branch.
2106 *
2107 * Two loss modes observed in production (both fixed here):
2108 *   1. Sibling assistant orphaned: walk goes prev→asstA→TR_A→next, drops asstB
2109 *      (same message.id, chained off asstA) and TR_B.
2110 *   2. Progress-fork (legacy, pre-#23537): each tool_use asst had a progress
2111 *      child (continued the write chain) AND a TR child. Walk followed
2112 *      progress; TRs were dropped. No longer written (progress removed from
2113 *      transcript persistence), but old transcripts still have this shape.
2114 *
2115 * Read-side fix: the write topology is already on disk for old transcripts;
2116 * this recovery pass handles them.
2117 */
2118function recoverOrphanedParallelToolResults(
2119  messages: Map<UUID, TranscriptMessage>,
2120  chain: TranscriptMessage[],
2121  seen: Set<UUID>,
2122): TranscriptMessage[] {
2123  type ChainAssistant = Extract<TranscriptMessage, { type: 'assistant' }>
2124  const chainAssistants = chain.filter(
2125    (m): m is ChainAssistant => m.type === 'assistant',
2126  )
2127  if (chainAssistants.length === 0) return chain
2128
2129  // Anchor = last on-chain member of each sibling group. chainAssistants is
2130  // already in chain order, so later iterations overwrite → last wins.
2131  const anchorByMsgId = new Map<string, ChainAssistant>()
2132  for (const a of chainAssistants) {
2133    if (a.message.id) anchorByMsgId.set(a.message.id, a)
2134  }
2135
2136  // O(n) precompute: sibling groups and TR index.
2137  // TRs indexed by parentUuid — insertMessageChain:~894 already wrote that
2138  // as the srcUUID, and --fork-session strips srcUUID but keeps parentUuid.
2139  const siblingsByMsgId = new Map<string, TranscriptMessage[]>()
2140  const toolResultsByAsst = new Map<UUID, TranscriptMessage[]>()
2141  for (const m of messages.values()) {
2142    if (m.type === 'assistant' && m.message.id) {
2143      const group = siblingsByMsgId.get(m.message.id)
2144      if (group) group.push(m)
2145      else siblingsByMsgId.set(m.message.id, [m])
2146    } else if (
2147      m.type === 'user' &&
2148      m.parentUuid &&
2149      Array.isArray(m.message.content) &&
2150      m.message.content.some(b => b.type === 'tool_result')
2151    ) {
2152      const group = toolResultsByAsst.get(m.parentUuid)
2153      if (group) group.push(m)
2154      else toolResultsByAsst.set(m.parentUuid, [m])
2155    }
2156  }
2157
2158  // For each message.id group touching the chain: collect off-chain siblings,
2159  // then off-chain TRs for ALL members. Splice right after the last on-chain
2160  // member so the group stays contiguous for normalizeMessagesForAPI's merge
2161  // and every TR lands after its tool_use.
2162  const processedGroups = new Set<string>()
2163  const inserts = new Map<UUID, TranscriptMessage[]>()
2164  let recoveredCount = 0
2165  for (const asst of chainAssistants) {
2166    const msgId = asst.message.id
2167    if (!msgId || processedGroups.has(msgId)) continue
2168    processedGroups.add(msgId)
2169
2170    const group = siblingsByMsgId.get(msgId) ?? [asst]
2171    const orphanedSiblings = group.filter(s => !seen.has(s.uuid))
2172    const orphanedTRs: TranscriptMessage[] = []
2173    for (const member of group) {
2174      const trs = toolResultsByAsst.get(member.uuid)
2175      if (!trs) continue
2176      for (const tr of trs) {
2177        if (!seen.has(tr.uuid)) orphanedTRs.push(tr)
2178      }
2179    }
2180    if (orphanedSiblings.length === 0 && orphanedTRs.length === 0) continue
2181
2182    // Timestamp sort keeps content-block / completion order; stable-sort
2183    // preserves JSONL write order on ties.
2184    orphanedSiblings.sort((a, b) => a.timestamp.localeCompare(b.timestamp))
2185    orphanedTRs.sort((a, b) => a.timestamp.localeCompare(b.timestamp))
2186
2187    const anchor = anchorByMsgId.get(msgId)!
2188    const recovered = [...orphanedSiblings, ...orphanedTRs]
2189    for (const r of recovered) seen.add(r.uuid)
2190    recoveredCount += recovered.length
2191    inserts.set(anchor.uuid, recovered)
2192  }
2193
2194  if (recoveredCount === 0) return chain
2195  logEvent('tengu_chain_parallel_tr_recovered', {
2196    recovered_count: recoveredCount,
2197  })
2198
2199  const result: TranscriptMessage[] = []
2200  for (const m of chain) {
2201    result.push(m)
2202    const toInsert = inserts.get(m.uuid)
2203    if (toInsert) result.push(...toInsert)
2204  }
2205  return result
2206}
2207
2208/**
2209 * Find the latest turn_duration checkpoint in the reconstructed chain and
2210 * compare its recorded messageCount against the chain's position at that
2211 * point. Emits tengu_resume_consistency_delta for BigQuery monitoring of
2212 * write→load round-trip drift — the class of bugs where snip/compact/
2213 * parallel-TR operations mutate in-memory but the parentUuid walk on disk
2214 * reconstructs a different set (adamr-20260320-165831: 397K displayed →
2215 * 1.65M actual on resume).
2216 *
2217 * delta > 0: resume loaded MORE than in-session (the usual failure mode)
2218 * delta < 0: resume loaded FEWER (chain truncation — #22453 class)
2219 * delta = 0: round-trip consistent
2220 *
2221 * Called from loadConversationForResume — fires once per resume, not on
2222 * /share or log-listing chain rebuilds.
2223 */
2224export function checkResumeConsistency(chain: Message[]): void {
2225  for (let i = chain.length - 1; i >= 0; i--) {
2226    const m = chain[i]!
2227    if (m.type !== 'system' || m.subtype !== 'turn_duration') continue
2228    const expected = m.messageCount
2229    if (expected === undefined) return
2230    // `i` is the 0-based index of the checkpoint in the reconstructed chain.
2231    // The checkpoint was appended AFTER messageCount messages, so its own
2232    // position should be messageCount (i.e., i === expected).
2233    const actual = i
2234    logEvent('tengu_resume_consistency_delta', {
2235      expected,
2236      actual,
2237      delta: actual - expected,
2238      chain_length: chain.length,
2239      checkpoint_age_entries: chain.length - 1 - i,
2240    })
2241    return
2242  }
2243}
2244
2245/**
2246 * Builds a filie history snapshot chain from the conversation
2247 */
2248function buildFileHistorySnapshotChain(
2249  fileHistorySnapshots: Map<UUID, FileHistorySnapshotMessage>,
2250  conversation: TranscriptMessage[],
2251): FileHistorySnapshot[] {
2252  const snapshots: FileHistorySnapshot[] = []
2253  // messageId → last index in snapshots[] for O(1) update lookup
2254  const indexByMessageId = new Map<string, number>()
2255  for (const message of conversation) {
2256    const snapshotMessage = fileHistorySnapshots.get(message.uuid)
2257    if (!snapshotMessage) {
2258      continue
2259    }
2260    const { snapshot, isSnapshotUpdate } = snapshotMessage
2261    const existingIndex = isSnapshotUpdate
2262      ? indexByMessageId.get(snapshot.messageId)
2263      : undefined
2264    if (existingIndex === undefined) {
2265      indexByMessageId.set(snapshot.messageId, snapshots.length)
2266      snapshots.push(snapshot)
2267    } else {
2268      snapshots[existingIndex] = snapshot
2269    }
2270  }
2271  return snapshots
2272}
2273
2274/**
2275 * Builds an attribution snapshot chain from the conversation.
2276 * Unlike file history snapshots, attribution snapshots are returned in full
2277 * because they use generated UUIDs (not message UUIDs) and represent
2278 * cumulative state that should be restored on session resume.
2279 */
2280function buildAttributionSnapshotChain(
2281  attributionSnapshots: Map<UUID, AttributionSnapshotMessage>,
2282  _conversation: TranscriptMessage[],
2283): AttributionSnapshotMessage[] {
2284  // Return all attribution snapshots - they will be merged during restore
2285  return Array.from(attributionSnapshots.values())
2286}
2287
2288/**
2289 * Loads a transcript from a JSON or JSONL file and converts it to LogOption format
2290 * @param filePath Path to the transcript file (.json or .jsonl)
2291 * @returns LogOption containing the transcript messages
2292 * @throws Error if file doesn't exist or contains invalid data
2293 */
2294export async function loadTranscriptFromFile(
2295  filePath: string,
2296): Promise<LogOption> {
2297  if (filePath.endsWith('.jsonl')) {
2298    const {
2299      messages,
2300      summaries,
2301      customTitles,
2302      tags,
2303      fileHistorySnapshots,
2304      attributionSnapshots,
2305      contextCollapseCommits,
2306      contextCollapseSnapshot,
2307      leafUuids,
2308      contentReplacements,
2309      worktreeStates,
2310    } = await loadTranscriptFile(filePath)
2311
2312    if (messages.size === 0) {
2313      throw new Error('No messages found in JSONL file')
2314    }
2315
2316    // Find the most recent leaf message using pre-computed leaf UUIDs
2317    const leafMessage = findLatestMessage(messages.values(), msg =>
2318      leafUuids.has(msg.uuid),
2319    )
2320
2321    if (!leafMessage) {
2322      throw new Error('No valid conversation chain found in JSONL file')
2323    }
2324
2325    // Build the conversation chain backwards from leaf to root
2326    const transcript = buildConversationChain(messages, leafMessage)
2327
2328    const summary = summaries.get(leafMessage.uuid)
2329    const customTitle = customTitles.get(leafMessage.sessionId as UUID)
2330    const tag = tags.get(leafMessage.sessionId as UUID)
2331    const sessionId = leafMessage.sessionId as UUID
2332    return {
2333      ...convertToLogOption(
2334        transcript,
2335        0,
2336        summary,
2337        customTitle,
2338        buildFileHistorySnapshotChain(fileHistorySnapshots, transcript),
2339        tag,
2340        filePath,
2341        buildAttributionSnapshotChain(attributionSnapshots, transcript),
2342        undefined,
2343        contentReplacements.get(sessionId) ?? [],
2344      ),
2345      contextCollapseCommits: contextCollapseCommits.filter(
2346        e => e.sessionId === sessionId,
2347      ),
2348      contextCollapseSnapshot:
2349        contextCollapseSnapshot?.sessionId === sessionId
2350          ? contextCollapseSnapshot
2351          : undefined,
2352      worktreeSession: worktreeStates.has(sessionId)
2353        ? worktreeStates.get(sessionId)
2354        : undefined,
2355    }
2356  }
2357
2358  // json log files
2359  const content = await readFile(filePath, { encoding: 'utf-8' })
2360  let parsed: unknown
2361
2362  try {
2363    parsed = jsonParse(content)
2364  } catch (error) {
2365    throw new Error(`Invalid JSON in transcript file: ${error}`)
2366  }
2367
2368  let messages: TranscriptMessage[]
2369
2370  if (Array.isArray(parsed)) {
2371    messages = parsed
2372  } else if (parsed && typeof parsed === 'object' && 'messages' in parsed) {
2373    if (!Array.isArray(parsed.messages)) {
2374      throw new Error('Transcript messages must be an array')
2375    }
2376    messages = parsed.messages
2377  } else {
2378    throw new Error(
2379      'Transcript must be an array of messages or an object with a messages array',
2380    )
2381  }
2382
2383  return convertToLogOption(
2384    messages,
2385    0,
2386    undefined,
2387    undefined,
2388    undefined,
2389    undefined,
2390    filePath,
2391  )
2392}
2393
2394/**
2395 * Checks if a user message has visible content (text or image, not just tool_result).
2396 * Tool results are displayed as part of collapsed groups, not as standalone messages.
2397 * Also excludes meta messages which are not shown to the user.
2398 */
2399function hasVisibleUserContent(message: TranscriptMessage): boolean {
2400  if (message.type !== 'user') return false
2401
2402  // Meta messages are not shown to the user
2403  if (message.isMeta) return false
2404
2405  const content = message.message?.content
2406  if (!content) return false
2407
2408  // String content is always visible
2409  if (typeof content === 'string') {
2410    return content.trim().length > 0
2411  }
2412
2413  // Array content: check for text or image blocks (not tool_result)
2414  if (Array.isArray(content)) {
2415    return content.some(
2416      block =>
2417        block.type === 'text' ||
2418        block.type === 'image' ||
2419        block.type === 'document',
2420    )
2421  }
2422
2423  return false
2424}
2425
2426/**
2427 * Checks if an assistant message has visible text content (not just tool_use blocks).
2428 * Tool uses are displayed as grouped/collapsed UI elements, not as standalone messages.
2429 */
2430function hasVisibleAssistantContent(message: TranscriptMessage): boolean {
2431  if (message.type !== 'assistant') return false
2432
2433  const content = message.message?.content
2434  if (!content || !Array.isArray(content)) return false
2435
2436  // Check for text block (not just tool_use/thinking blocks)
2437  return content.some(
2438    block =>
2439      block.type === 'text' &&
2440      typeof block.text === 'string' &&
2441      block.text.trim().length > 0,
2442  )
2443}
2444
2445/**
2446 * Counts visible messages that would appear as conversation turns in the UI.
2447 * Excludes:
2448 * - System, attachment, and progress messages
2449 * - User messages with isMeta flag (hidden from user)
2450 * - User messages that only contain tool_result blocks (displayed as collapsed groups)
2451 * - Assistant messages that only contain tool_use blocks (displayed as collapsed groups)
2452 */
2453function countVisibleMessages(transcript: TranscriptMessage[]): number {
2454  let count = 0
2455  for (const message of transcript) {
2456    switch (message.type) {
2457      case 'user':
2458        // Count user messages with visible content (text, image, not just tool_result or meta)
2459        if (hasVisibleUserContent(message)) {
2460          count++
2461        }
2462        break
2463      case 'assistant':
2464        // Count assistant messages with text content (not just tool_use)
2465        if (hasVisibleAssistantContent(message)) {
2466          count++
2467        }
2468        break
2469      case 'attachment':
2470      case 'system':
2471      case 'progress':
2472        // These message types are not counted as visible conversation turns
2473        break
2474    }
2475  }
2476  return count
2477}
2478
2479function convertToLogOption(
2480  transcript: TranscriptMessage[],
2481  value: number = 0,
2482  summary?: string,
2483  customTitle?: string,
2484  fileHistorySnapshots?: FileHistorySnapshot[],
2485  tag?: string,
2486  fullPath?: string,
2487  attributionSnapshots?: AttributionSnapshotMessage[],
2488  agentSetting?: string,
2489  contentReplacements?: ContentReplacementRecord[],
2490): LogOption {
2491  const lastMessage = transcript.at(-1)!
2492  const firstMessage = transcript[0]!
2493
2494  // Get the first user message for the prompt
2495  const firstPrompt = extractFirstPrompt(transcript)
2496
2497  // Create timestamps from message timestamps
2498  const created = new Date(firstMessage.timestamp)
2499  const modified = new Date(lastMessage.timestamp)
2500
2501  return {
2502    date: lastMessage.timestamp,
2503    messages: removeExtraFields(transcript),
2504    fullPath,
2505    value,
2506    created,
2507    modified,
2508    firstPrompt,
2509    messageCount: countVisibleMessages(transcript),
2510    isSidechain: firstMessage.isSidechain,
2511    teamName: firstMessage.teamName,
2512    agentName: firstMessage.agentName,
2513    agentSetting,
2514    leafUuid: lastMessage.uuid,
2515    summary,
2516    customTitle,
2517    tag,
2518    fileHistorySnapshots: fileHistorySnapshots,
2519    attributionSnapshots: attributionSnapshots,
2520    contentReplacements,
2521    gitBranch: lastMessage.gitBranch,
2522    projectPath: firstMessage.cwd,
2523  }
2524}
2525
2526async function trackSessionBranchingAnalytics(
2527  logs: LogOption[],
2528): Promise<void> {
2529  const sessionIdCounts = new Map<string, number>()
2530  let maxCount = 0
2531  for (const log of logs) {
2532    const sessionId = getSessionIdFromLog(log)
2533    if (sessionId) {
2534      const newCount = (sessionIdCounts.get(sessionId) || 0) + 1
2535      sessionIdCounts.set(sessionId, newCount)
2536      maxCount = Math.max(newCount, maxCount)
2537    }
2538  }
2539
2540  // Early exit if no duplicates detected
2541  if (maxCount <= 1) {
2542    return
2543  }
2544
2545  // Count sessions with branches and calculate stats using functional approach
2546  const branchCounts = Array.from(sessionIdCounts.values()).filter(c => c > 1)
2547  const sessionsWithBranches = branchCounts.length
2548  const totalBranches = branchCounts.reduce((sum, count) => sum + count, 0)
2549
2550  logEvent('tengu_session_forked_branches_fetched', {
2551    total_sessions: sessionIdCounts.size,
2552    sessions_with_branches: sessionsWithBranches,
2553    max_branches_per_session: Math.max(...branchCounts),
2554    avg_branches_per_session: Math.round(totalBranches / sessionsWithBranches),
2555    total_transcript_count: logs.length,
2556  })
2557}
2558
2559export async function fetchLogs(limit?: number): Promise<LogOption[]> {
2560  const projectDir = getProjectDir(getOriginalCwd())
2561  const logs = await getSessionFilesLite(projectDir, limit, getOriginalCwd())
2562
2563  await trackSessionBranchingAnalytics(logs)
2564
2565  return logs
2566}
2567
2568/**
2569 * Append an entry to a session file. Creates the parent dir if missing.
2570 */
2571/* eslint-disable custom-rules/no-sync-fs -- sync callers (exit cleanup, materialize) */
2572function appendEntryToFile(
2573  fullPath: string,
2574  entry: Record<string, unknown>,
2575): void {
2576  const fs = getFsImplementation()
2577  const line = jsonStringify(entry) + '\n'
2578  try {
2579    fs.appendFileSync(fullPath, line, { mode: 0o600 })
2580  } catch {
2581    fs.mkdirSync(dirname(fullPath), { mode: 0o700 })
2582    fs.appendFileSync(fullPath, line, { mode: 0o600 })
2583  }
2584}
2585
2586/**
2587 * Sync tail read for reAppendSessionMetadata's external-writer check.
2588 * fstat on the already-open fd (no extra path lookup); reads the same
2589 * LITE_READ_BUF_SIZE window that readLiteMetadata scans. Returns empty
2590 * string on any error so callers fall through to unconditional behavior.
2591 */
2592function readFileTailSync(fullPath: string): string {
2593  let fd: number | undefined
2594  try {
2595    fd = openSync(fullPath, 'r')
2596    const st = fstatSync(fd)
2597    const tailOffset = Math.max(0, st.size - LITE_READ_BUF_SIZE)
2598    const buf = Buffer.allocUnsafe(
2599      Math.min(LITE_READ_BUF_SIZE, st.size - tailOffset),
2600    )
2601    const bytesRead = readSync(fd, buf, 0, buf.length, tailOffset)
2602    return buf.toString('utf8', 0, bytesRead)
2603  } catch {
2604    return ''
2605  } finally {
2606    if (fd !== undefined) {
2607      try {
2608        closeSync(fd)
2609      } catch {
2610        // closeSync can throw; swallow to preserve return '' contract
2611      }
2612    }
2613  }
2614}
2615/* eslint-enable custom-rules/no-sync-fs */
2616
2617export async function saveCustomTitle(
2618  sessionId: UUID,
2619  customTitle: string,
2620  fullPath?: string,
2621  source: 'user' | 'auto' = 'user',
2622) {
2623  // Fall back to computed path if fullPath is not provided
2624  const resolvedPath = fullPath ?? getTranscriptPathForSession(sessionId)
2625  appendEntryToFile(resolvedPath, {
2626    type: 'custom-title',
2627    customTitle,
2628    sessionId,
2629  })
2630  // Cache for current session only (for immediate visibility)
2631  if (sessionId === getSessionId()) {
2632    getProject().currentSessionTitle = customTitle
2633  }
2634  logEvent('tengu_session_renamed', {
2635    source:
2636      source as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2637  })
2638}
2639
2640/**
2641 * Persist an AI-generated title to the JSONL as a distinct `ai-title` entry.
2642 *
2643 * Writing a separate entry type (vs. reusing `custom-title`) is load-bearing:
2644 * - Read preference: readers prefer `customTitle` field over `aiTitle`, so
2645 *   a user rename always wins regardless of append order.
2646 * - Resume safety: `loadTranscriptFile` only populates the `customTitles`
2647 *   Map from `custom-title` entries, so `restoreSessionMetadata` never
2648 *   caches an AI title and `reAppendSessionMetadata` never re-appends one
2649 *   at EOF — avoiding the clobber-on-resume bug where a stale AI title
2650 *   overwrites a mid-session user rename.
2651 * - CAS semantics: VS Code's `onlyIfNoCustomTitle` check scans for the
2652 *   `customTitle` field only, so AI can overwrite its own previous AI
2653 *   title but never a user title.
2654 * - Metrics: `tengu_session_renamed` is not fired for AI titles.
2655 *
2656 * Because the entry is never re-appended, it scrolls out of the 64KB tail
2657 * window once enough messages accumulate. Readers (`readLiteMetadata`,
2658 * `listSessionsImpl`, VS Code `fetchSessions`) fall back to scanning the
2659 * head buffer for `aiTitle` in that case. Both head and tail reads are
2660 * bounded (64KB each via `extractLastJsonStringField`), never a full scan.
2661 *
2662 * Callers with a stale-write guard (e.g., VS Code client) should prefer
2663 * passing `persist: false` to the SDK control request and persisting
2664 * through their own rename path after the guard passes, to avoid a race
2665 * where the AI title lands after a mid-flight user rename.
2666 */
2667export function saveAiGeneratedTitle(sessionId: UUID, aiTitle: string): void {
2668  appendEntryToFile(getTranscriptPathForSession(sessionId), {
2669    type: 'ai-title',
2670    aiTitle,
2671    sessionId,
2672  })
2673}
2674
2675/**
2676 * Append a periodic task summary for `claude ps`. Unlike ai-title this is
2677 * not re-appended by reAppendSessionMetadata — it's a rolling snapshot of
2678 * what the agent is doing *now*, so staleness is fine; ps reads the most
2679 * recent one from the tail.
2680 */
2681export function saveTaskSummary(sessionId: UUID, summary: string): void {
2682  appendEntryToFile(getTranscriptPathForSession(sessionId), {
2683    type: 'task-summary',
2684    summary,
2685    sessionId,
2686    timestamp: new Date().toISOString(),
2687  })
2688}
2689
2690export async function saveTag(sessionId: UUID, tag: string, fullPath?: string) {
2691  // Fall back to computed path if fullPath is not provided
2692  const resolvedPath = fullPath ?? getTranscriptPathForSession(sessionId)
2693  appendEntryToFile(resolvedPath, { type: 'tag', tag, sessionId })
2694  // Cache for current session only (for immediate visibility)
2695  if (sessionId === getSessionId()) {
2696    getProject().currentSessionTag = tag
2697  }
2698  logEvent('tengu_session_tagged', {})
2699}
2700
2701/**
2702 * Link a session to a GitHub pull request.
2703 * This stores the PR number, URL, and repository for tracking and navigation.
2704 */
2705export async function linkSessionToPR(
2706  sessionId: UUID,
2707  prNumber: number,
2708  prUrl: string,
2709  prRepository: string,
2710  fullPath?: string,
2711): Promise<void> {
2712  const resolvedPath = fullPath ?? getTranscriptPathForSession(sessionId)
2713  appendEntryToFile(resolvedPath, {
2714    type: 'pr-link',
2715    sessionId,
2716    prNumber,
2717    prUrl,
2718    prRepository,
2719    timestamp: new Date().toISOString(),
2720  })
2721  // Cache for current session so reAppendSessionMetadata can re-write after compaction
2722  if (sessionId === getSessionId()) {
2723    const project = getProject()
2724    project.currentSessionPrNumber = prNumber
2725    project.currentSessionPrUrl = prUrl
2726    project.currentSessionPrRepository = prRepository
2727  }
2728  logEvent('tengu_session_linked_to_pr', { prNumber })
2729}
2730
2731export function getCurrentSessionTag(sessionId: UUID): string | undefined {
2732  // Only returns tag for current session (the only one we cache)
2733  if (sessionId === getSessionId()) {
2734    return getProject().currentSessionTag
2735  }
2736  return undefined
2737}
2738
2739export function getCurrentSessionTitle(
2740  sessionId: SessionId,
2741): string | undefined {
2742  // Only returns title for current session (the only one we cache)
2743  if (sessionId === getSessionId()) {
2744    return getProject().currentSessionTitle
2745  }
2746  return undefined
2747}
2748
2749export function getCurrentSessionAgentColor(): string | undefined {
2750  return getProject().currentSessionAgentColor
2751}
2752
2753/**
2754 * Restore session metadata into in-memory cache on resume.
2755 * Populates the cache so metadata is available for display (e.g. the
2756 * agent banner) and re-appended on session exit via reAppendSessionMetadata.
2757 */
2758export function restoreSessionMetadata(meta: {
2759  customTitle?: string
2760  tag?: string
2761  agentName?: string
2762  agentColor?: string
2763  agentSetting?: string
2764  mode?: 'coordinator' | 'normal'
2765  worktreeSession?: PersistedWorktreeSession | null
2766  prNumber?: number
2767  prUrl?: string
2768  prRepository?: string
2769}): void {
2770  const project = getProject()
2771  // ??= so --name (cacheSessionTitle) wins over the resumed
2772  // session's title. REPL.tsx clears before calling, so /resume is unaffected.
2773  if (meta.customTitle) project.currentSessionTitle ??= meta.customTitle
2774  if (meta.tag !== undefined) project.currentSessionTag = meta.tag || undefined
2775  if (meta.agentName) project.currentSessionAgentName = meta.agentName
2776  if (meta.agentColor) project.currentSessionAgentColor = meta.agentColor
2777  if (meta.agentSetting) project.currentSessionAgentSetting = meta.agentSetting
2778  if (meta.mode) project.currentSessionMode = meta.mode
2779  if (meta.worktreeSession !== undefined)
2780    project.currentSessionWorktree = meta.worktreeSession
2781  if (meta.prNumber !== undefined)
2782    project.currentSessionPrNumber = meta.prNumber
2783  if (meta.prUrl) project.currentSessionPrUrl = meta.prUrl
2784  if (meta.prRepository) project.currentSessionPrRepository = meta.prRepository
2785}
2786
2787/**
2788 * Clear all cached session metadata (title, tag, agent name/color).
2789 * Called when /clear creates a new session so stale metadata
2790 * from the previous session does not leak into the new one.
2791 */
2792export function clearSessionMetadata(): void {
2793  const project = getProject()
2794  project.currentSessionTitle = undefined
2795  project.currentSessionTag = undefined
2796  project.currentSessionAgentName = undefined
2797  project.currentSessionAgentColor = undefined
2798  project.currentSessionLastPrompt = undefined
2799  project.currentSessionAgentSetting = undefined
2800  project.currentSessionMode = undefined
2801  project.currentSessionWorktree = undefined
2802  project.currentSessionPrNumber = undefined
2803  project.currentSessionPrUrl = undefined
2804  project.currentSessionPrRepository = undefined
2805}
2806
2807/**
2808 * Re-append cached session metadata (custom title, tag) to the end of the
2809 * transcript file. Call this after compaction so the metadata stays within
2810 * the 16KB tail window that readLiteMetadata reads during progressive loading.
2811 * Without this, enough post-compaction messages can push the metadata entry
2812 * out of the window, causing `--resume` to show the auto-generated firstPrompt
2813 * instead of the user-set session name.
2814 */
2815export function reAppendSessionMetadata(): void {
2816  getProject().reAppendSessionMetadata()
2817}
2818
2819export async function saveAgentName(
2820  sessionId: UUID,
2821  agentName: string,
2822  fullPath?: string,
2823  source: 'user' | 'auto' = 'user',
2824) {
2825  const resolvedPath = fullPath ?? getTranscriptPathForSession(sessionId)
2826  appendEntryToFile(resolvedPath, { type: 'agent-name', agentName, sessionId })
2827  // Cache for current session only (for immediate visibility)
2828  if (sessionId === getSessionId()) {
2829    getProject().currentSessionAgentName = agentName
2830    void updateSessionName(agentName)
2831  }
2832  logEvent('tengu_agent_name_set', {
2833    source:
2834      source as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2835  })
2836}
2837
2838export async function saveAgentColor(
2839  sessionId: UUID,
2840  agentColor: string,
2841  fullPath?: string,
2842) {
2843  const resolvedPath = fullPath ?? getTranscriptPathForSession(sessionId)
2844  appendEntryToFile(resolvedPath, {
2845    type: 'agent-color',
2846    agentColor,
2847    sessionId,
2848  })
2849  // Cache for current session only (for immediate visibility)
2850  if (sessionId === getSessionId()) {
2851    getProject().currentSessionAgentColor = agentColor
2852  }
2853  logEvent('tengu_agent_color_set', {})
2854}
2855
2856/**
2857 * Cache the session agent setting. Written to disk by materializeSessionFile
2858 * on the first user message, and re-stamped by reAppendSessionMetadata on exit.
2859 * Cache-only here to avoid creating metadata-only session files at startup.
2860 */
2861export function saveAgentSetting(agentSetting: string): void {
2862  getProject().currentSessionAgentSetting = agentSetting
2863}
2864
2865/**
2866 * Cache a session title set at startup (--name). Written to disk by
2867 * materializeSessionFile on the first user message. Cache-only here so no
2868 * orphan metadata-only file is created before the session ID is finalized.
2869 */
2870export function cacheSessionTitle(customTitle: string): void {
2871  getProject().currentSessionTitle = customTitle
2872}
2873
2874/**
2875 * Cache the session mode. Written to disk by materializeSessionFile on the
2876 * first user message, and re-stamped by reAppendSessionMetadata on exit.
2877 * Cache-only here to avoid creating metadata-only session files at startup.
2878 */
2879export function saveMode(mode: 'coordinator' | 'normal'): void {
2880  getProject().currentSessionMode = mode
2881}
2882
2883/**
2884 * Record the session's worktree state for --resume. Written to disk by
2885 * materializeSessionFile on the first user message and re-stamped by
2886 * reAppendSessionMetadata on exit. Pass null when exiting a worktree
2887 * so --resume knows not to cd back into it.
2888 */
2889export function saveWorktreeState(
2890  worktreeSession: PersistedWorktreeSession | null,
2891): void {
2892  // Strip ephemeral fields (creationDurationMs, usedSparsePaths) that callers
2893  // may pass via full WorktreeSession objects — TypeScript structural typing
2894  // allows this, but we don't want them serialized to the transcript.
2895  const stripped: PersistedWorktreeSession | null = worktreeSession
2896    ? {
2897        originalCwd: worktreeSession.originalCwd,
2898        worktreePath: worktreeSession.worktreePath,
2899        worktreeName: worktreeSession.worktreeName,
2900        worktreeBranch: worktreeSession.worktreeBranch,
2901        originalBranch: worktreeSession.originalBranch,
2902        originalHeadCommit: worktreeSession.originalHeadCommit,
2903        sessionId: worktreeSession.sessionId,
2904        tmuxSessionName: worktreeSession.tmuxSessionName,
2905        hookBased: worktreeSession.hookBased,
2906      }
2907    : null
2908  const project = getProject()
2909  project.currentSessionWorktree = stripped
2910  // Write eagerly when the file already exists (mid-session enter/exit).
2911  // For --worktree startup, sessionFile is null — materializeSessionFile
2912  // will write it on the first message via reAppendSessionMetadata.
2913  if (project.sessionFile) {
2914    appendEntryToFile(project.sessionFile, {
2915      type: 'worktree-state',
2916      worktreeSession: stripped,
2917      sessionId: getSessionId(),
2918    })
2919  }
2920}
2921
2922/**
2923 * Extracts the session ID from a log.
2924 * For lite logs, uses the sessionId field directly.
2925 * For full logs, extracts from the first message.
2926 */
2927export function getSessionIdFromLog(log: LogOption): UUID | undefined {
2928  // For lite logs, use the direct sessionId field
2929  if (log.sessionId) {
2930    return log.sessionId as UUID
2931  }
2932  // Fall back to extracting from first message (full logs)
2933  return log.messages[0]?.sessionId as UUID | undefined
2934}
2935
2936/**
2937 * Checks if a log is a lite log that needs full loading.
2938 * Lite logs have messages: [] and sessionId set.
2939 */
2940export function isLiteLog(log: LogOption): boolean {
2941  return log.messages.length === 0 && log.sessionId !== undefined
2942}
2943
2944/**
2945 * Loads full messages for a lite log by reading its JSONL file.
2946 * Returns a new LogOption with populated messages array.
2947 * If the log is already full or loading fails, returns the original log.
2948 */
2949export async function loadFullLog(log: LogOption): Promise<LogOption> {
2950  // If already full, return as-is
2951  if (!isLiteLog(log)) {
2952    return log
2953  }
2954
2955  // Use the fullPath from the index entry directly
2956  const sessionFile = log.fullPath
2957  if (!sessionFile) {
2958    return log
2959  }
2960
2961  try {
2962    const {
2963      messages,
2964      summaries,
2965      customTitles,
2966      tags,
2967      agentNames,
2968      agentColors,
2969      agentSettings,
2970      prNumbers,
2971      prUrls,
2972      prRepositories,
2973      modes,
2974      worktreeStates,
2975      fileHistorySnapshots,
2976      attributionSnapshots,
2977      contentReplacements,
2978      contextCollapseCommits,
2979      contextCollapseSnapshot,
2980      leafUuids,
2981    } = await loadTranscriptFile(sessionFile)
2982
2983    if (messages.size === 0) {
2984      return log
2985    }
2986
2987    // Find the most recent user/assistant leaf message from the transcript
2988    const mostRecentLeaf = findLatestMessage(
2989      messages.values(),
2990      msg =>
2991        leafUuids.has(msg.uuid) &&
2992        (msg.type === 'user' || msg.type === 'assistant'),
2993    )
2994    if (!mostRecentLeaf) {
2995      return log
2996    }
2997
2998    // Build the conversation chain from this leaf
2999    const transcript = buildConversationChain(messages, mostRecentLeaf)
3000    // Leaf's sessionId — forked sessions copy chain[0] from the source, but
3001    // metadata entries (custom-title etc.) are keyed by the current session.
3002    const sessionId = mostRecentLeaf.sessionId as UUID | undefined
3003    return {
3004      ...log,
3005      messages: removeExtraFields(transcript),
3006      firstPrompt: extractFirstPrompt(transcript),
3007      messageCount: countVisibleMessages(transcript),
3008      summary: mostRecentLeaf
3009        ? summaries.get(mostRecentLeaf.uuid)
3010        : log.summary,
3011      customTitle: sessionId ? customTitles.get(sessionId) : log.customTitle,
3012      tag: sessionId ? tags.get(sessionId) : log.tag,
3013      agentName: sessionId ? agentNames.get(sessionId) : log.agentName,
3014      agentColor: sessionId ? agentColors.get(sessionId) : log.agentColor,
3015      agentSetting: sessionId ? agentSettings.get(sessionId) : log.agentSetting,
3016      mode: sessionId ? (modes.get(sessionId) as LogOption['mode']) : log.mode,
3017      worktreeSession:
3018        sessionId && worktreeStates.has(sessionId)
3019          ? worktreeStates.get(sessionId)
3020          : log.worktreeSession,
3021      prNumber: sessionId ? prNumbers.get(sessionId) : log.prNumber,
3022      prUrl: sessionId ? prUrls.get(sessionId) : log.prUrl,
3023      prRepository: sessionId
3024        ? prRepositories.get(sessionId)
3025        : log.prRepository,
3026      gitBranch: mostRecentLeaf?.gitBranch ?? log.gitBranch,
3027      isSidechain: transcript[0]?.isSidechain ?? log.isSidechain,
3028      teamName: transcript[0]?.teamName ?? log.teamName,
3029      leafUuid: mostRecentLeaf?.uuid ?? log.leafUuid,
3030      fileHistorySnapshots: buildFileHistorySnapshotChain(
3031        fileHistorySnapshots,
3032        transcript,
3033      ),
3034      attributionSnapshots: buildAttributionSnapshotChain(
3035        attributionSnapshots,
3036        transcript,
3037      ),
3038      contentReplacements: sessionId
3039        ? (contentReplacements.get(sessionId) ?? [])
3040        : log.contentReplacements,
3041      // Filter to the resumed session's entries. loadTranscriptFile reads
3042      // the file sequentially so the array is already in commit order;
3043      // filter preserves that.
3044      contextCollapseCommits: sessionId
3045        ? contextCollapseCommits.filter(e => e.sessionId === sessionId)
3046        : undefined,
3047      contextCollapseSnapshot:
3048        sessionId && contextCollapseSnapshot?.sessionId === sessionId
3049          ? contextCollapseSnapshot
3050          : undefined,
3051    }
3052  } catch {
3053    // If loading fails, return the original log
3054    return log
3055  }
3056}
3057
3058/**
3059 * Searches for sessions by custom title match.
3060 * Returns matches sorted by recency (newest first).
3061 * Uses case-insensitive matching for better UX.
3062 * Deduplicates by sessionId (keeps most recent per session).
3063 * Searches across same-repo worktrees by default.
3064 */
3065export async function searchSessionsByCustomTitle(
3066  query: string,
3067  options?: { limit?: number; exact?: boolean },
3068): Promise<LogOption[]> {
3069  const { limit, exact } = options || {}
3070  // Use worktree-aware loading to search across same-repo sessions
3071  const worktreePaths = await getWorktreePaths(getOriginalCwd())
3072  const allStatLogs = await getStatOnlyLogsForWorktrees(worktreePaths)
3073  // Enrich all logs to access customTitle metadata
3074  const { logs } = await enrichLogs(allStatLogs, 0, allStatLogs.length)
3075  const normalizedQuery = query.toLowerCase().trim()
3076
3077  const matchingLogs = logs.filter(log => {
3078    const title = log.customTitle?.toLowerCase().trim()
3079    if (!title) return false
3080    return exact ? title === normalizedQuery : title.includes(normalizedQuery)
3081  })
3082
3083  // Deduplicate by sessionId - multiple logs can have the same sessionId
3084  // if they're different branches of the same conversation. Keep most recent.
3085  const sessionIdToLog = new Map<UUID, LogOption>()
3086  for (const log of matchingLogs) {
3087    const sessionId = getSessionIdFromLog(log)
3088    if (sessionId) {
3089      const existing = sessionIdToLog.get(sessionId)
3090      if (!existing || log.modified > existing.modified) {
3091        sessionIdToLog.set(sessionId, log)
3092      }
3093    }
3094  }
3095  const deduplicated = Array.from(sessionIdToLog.values())
3096
3097  // Sort by recency
3098  deduplicated.sort((a, b) => b.modified.getTime() - a.modified.getTime())
3099
3100  // Apply limit if specified
3101  if (limit) {
3102    return deduplicated.slice(0, limit)
3103  }
3104
3105  return deduplicated
3106}
3107
3108/**
3109 * Metadata entry types that can appear before a compact boundary but must
3110 * still be loaded (they're session-scoped, not message-scoped).
3111 * Kept as raw JSON string markers for cheap line filtering during streaming.
3112 */
3113const METADATA_TYPE_MARKERS = [
3114  '"type":"summary"',
3115  '"type":"custom-title"',
3116  '"type":"tag"',
3117  '"type":"agent-name"',
3118  '"type":"agent-color"',
3119  '"type":"agent-setting"',
3120  '"type":"mode"',
3121  '"type":"worktree-state"',
3122  '"type":"pr-link"',
3123]
3124const METADATA_MARKER_BUFS = METADATA_TYPE_MARKERS.map(m => Buffer.from(m))
3125// Longest marker is 22 bytes; +1 for leading `{` = 23.
3126const METADATA_PREFIX_BOUND = 25
3127
3128// null = carry spans whole chunk. Skips concat when carry provably isn't
3129// a metadata line (markers sit at byte 1 after `{`).
3130function resolveMetadataBuf(
3131  carry: Buffer | null,
3132  chunkBuf: Buffer,
3133): Buffer | null {
3134  if (carry === null || carry.length === 0) return chunkBuf
3135  if (carry.length < METADATA_PREFIX_BOUND) {
3136    return Buffer.concat([carry, chunkBuf])
3137  }
3138  if (carry[0] === 0x7b /* { */) {
3139    for (const m of METADATA_MARKER_BUFS) {
3140      if (carry.compare(m, 0, m.length, 1, 1 + m.length) === 0) {
3141        return Buffer.concat([carry, chunkBuf])
3142      }
3143    }
3144  }
3145  const firstNl = chunkBuf.indexOf(0x0a)
3146  return firstNl === -1 ? null : chunkBuf.subarray(firstNl + 1)
3147}
3148
3149/**
3150 * Lightweight forward scan of [0, endOffset) collecting only metadata-entry lines.
3151 * Uses raw Buffer chunks and byte-level marker matching — no readline, no per-line
3152 * string conversion for the ~99% of lines that are message content.
3153 *
3154 * Fast path: if a chunk contains zero markers (the common case — metadata entries
3155 * are <50 per session), the entire chunk is skipped without line splitting.
3156 */
3157async function scanPreBoundaryMetadata(
3158  filePath: string,
3159  endOffset: number,
3160): Promise<string[]> {
3161  const { createReadStream } = await import('fs')
3162  const NEWLINE = 0x0a
3163
3164  const stream = createReadStream(filePath, { end: endOffset - 1 })
3165  const metadataLines: string[] = []
3166  let carry: Buffer | null = null
3167
3168  for await (const chunk of stream) {
3169    const chunkBuf = chunk as Buffer
3170    const buf = resolveMetadataBuf(carry, chunkBuf)
3171    if (buf === null) {
3172      carry = null
3173      continue
3174    }
3175
3176    // Fast path: most chunks contain zero metadata markers. Skip line splitting.
3177    let hasAnyMarker = false
3178    for (const m of METADATA_MARKER_BUFS) {
3179      if (buf.includes(m)) {
3180        hasAnyMarker = true
3181        break
3182      }
3183    }
3184
3185    if (hasAnyMarker) {
3186      let lineStart = 0
3187      let nl = buf.indexOf(NEWLINE)
3188      while (nl !== -1) {
3189        // Bounded marker check: only look within this line's byte range
3190        for (const m of METADATA_MARKER_BUFS) {
3191          const mIdx = buf.indexOf(m, lineStart)
3192          if (mIdx !== -1 && mIdx < nl) {
3193            metadataLines.push(buf.toString('utf-8', lineStart, nl))
3194            break
3195          }
3196        }
3197        lineStart = nl + 1
3198        nl = buf.indexOf(NEWLINE, lineStart)
3199      }
3200      carry = buf.subarray(lineStart)
3201    } else {
3202      // No markers in this chunk — just preserve the incomplete trailing line
3203      const lastNl = buf.lastIndexOf(NEWLINE)
3204      carry = lastNl >= 0 ? buf.subarray(lastNl + 1) : buf
3205    }
3206
3207    // Guard against quadratic carry growth for pathological huge lines
3208    // (e.g., a 10 MB tool-output line with no newline). Real metadata entries
3209    // are <1 KB, so if carry exceeds this we're mid-message-content — drop it.
3210    if (carry.length > 64 * 1024) carry = null
3211  }
3212
3213  // Final incomplete line (no trailing newline at endOffset)
3214  if (carry !== null && carry.length > 0) {
3215    for (const m of METADATA_MARKER_BUFS) {
3216      if (carry.includes(m)) {
3217        metadataLines.push(carry.toString('utf-8'))
3218        break
3219      }
3220    }
3221  }
3222
3223  return metadataLines
3224}
3225
3226/**
3227 * Byte-level pre-filter that excises dead fork branches before parseJSONL.
3228 *
3229 * Every rewind/ctrl-z leaves an orphaned chain branch in the append-only
3230 * JSONL forever. buildConversationChain walks parentUuid from the latest leaf
3231 * and discards everything else, but by then parseJSONL has already paid to
3232 * JSON.parse all of it. Measured on fork-heavy sessions:
3233 *
3234 *   41 MB, 99% dead: parseJSONL 56.0 ms -> 3.9 ms (-93%)
3235 *   151 MB, 92% dead: 47.3 ms -> 9.4 ms (-80%)
3236 *
3237 * Sessions with few dead branches (5-7%) see a small win from the overhead of
3238 * the index pass roughly canceling the parse savings, so this is gated on
3239 * buffer size (same threshold as SKIP_PRECOMPACT_THRESHOLD).
3240 *
3241 * Relies on two invariants verified across 25k+ message lines in local
3242 * sessions (0 violations):
3243 *
3244 *   1. Transcript messages always serialize with parentUuid as the first key.
3245 *      JSON.stringify emits keys in insertion order and recordTranscript's
3246 *      object literal puts parentUuid first. So `{"parentUuid":` is a stable
3247 *      line prefix that distinguishes transcript messages from metadata.
3248 *
3249 *   2. Top-level uuid detection is handled by a suffix check + depth check
3250 *      (see inline comment in the scan loop). toolUseResult/mcpMeta serialize
3251 *      AFTER uuid with arbitrary server-controlled objects, and agent_progress
3252 *      entries serialize a nested Message in data BEFORE uuid — both can
3253 *      produce nested `"uuid":"<36>","timestamp":"` bytes, so suffix alone
3254 *      is insufficient. When multiple suffix matches exist, a brace-depth
3255 *      scan disambiguates.
3256 *
3257 * The append-only write discipline guarantees parents appear at earlier file
3258 * offsets than children, so walking backward from EOF always finds them.
3259 */
3260
3261/**
3262 * Disambiguate multiple `"uuid":"<36>","timestamp":"` matches in one line by
3263 * finding the one at JSON nesting depth 1. String-aware brace counter:
3264 * `{`/`}` inside string values don't count; `\"` and `\\` inside strings are
3265 * handled. Candidates is sorted ascending (the scan loop produces them in
3266 * byte order). Returns the first depth-1 candidate, or the last candidate if
3267 * none are at depth 1 (shouldn't happen for well-formed JSONL — depth-1 is
3268 * where the top-level object's fields live).
3269 *
3270 * Only called when ≥2 suffix matches exist (agent_progress with a nested
3271 * Message, or mcpMeta with a coincidentally-suffixed object). Cost is
3272 * O(max(candidates) - lineStart) — one forward byte pass, stopping at the
3273 * first depth-1 hit.
3274 */
3275function pickDepthOneUuidCandidate(
3276  buf: Buffer,
3277  lineStart: number,
3278  candidates: number[],
3279): number {
3280  const QUOTE = 0x22
3281  const BACKSLASH = 0x5c
3282  const OPEN_BRACE = 0x7b
3283  const CLOSE_BRACE = 0x7d
3284  let depth = 0
3285  let inString = false
3286  let escapeNext = false
3287  let ci = 0
3288  for (let i = lineStart; ci < candidates.length; i++) {
3289    if (i === candidates[ci]) {
3290      if (depth === 1 && !inString) return candidates[ci]!
3291      ci++
3292    }
3293    const b = buf[i]!
3294    if (escapeNext) {
3295      escapeNext = false
3296    } else if (inString) {
3297      if (b === BACKSLASH) escapeNext = true
3298      else if (b === QUOTE) inString = false
3299    } else if (b === QUOTE) inString = true
3300    else if (b === OPEN_BRACE) depth++
3301    else if (b === CLOSE_BRACE) depth--
3302  }
3303  return candidates.at(-1)!
3304}
3305
3306function walkChainBeforeParse(buf: Buffer): Buffer {
3307  const NEWLINE = 0x0a
3308  const OPEN_BRACE = 0x7b
3309  const QUOTE = 0x22
3310  const PARENT_PREFIX = Buffer.from('{"parentUuid":')
3311  const UUID_KEY = Buffer.from('"uuid":"')
3312  const SIDECHAIN_TRUE = Buffer.from('"isSidechain":true')
3313  const UUID_LEN = 36
3314  const TS_SUFFIX = Buffer.from('","timestamp":"')
3315  const TS_SUFFIX_LEN = TS_SUFFIX.length
3316  const PREFIX_LEN = PARENT_PREFIX.length
3317  const KEY_LEN = UUID_KEY.length
3318
3319  // Stride-3 flat index of transcript messages: [lineStart, lineEnd, parentStart].
3320  // parentStart is the byte offset of the parent uuid's first char, or -1 for null.
3321  // Metadata lines (summary, mode, file-history-snapshot, etc.) go in metaRanges
3322  // unfiltered - they lack the parentUuid prefix and downstream needs all of them.
3323  const msgIdx: number[] = []
3324  const metaRanges: number[] = []
3325  const uuidToSlot = new Map<string, number>()
3326
3327  let pos = 0
3328  const len = buf.length
3329  while (pos < len) {
3330    const nl = buf.indexOf(NEWLINE, pos)
3331    const lineEnd = nl === -1 ? len : nl + 1
3332    if (
3333      lineEnd - pos > PREFIX_LEN &&
3334      buf[pos] === OPEN_BRACE &&
3335      buf.compare(PARENT_PREFIX, 0, PREFIX_LEN, pos, pos + PREFIX_LEN) === 0
3336    ) {
3337      // `{"parentUuid":null,` or `{"parentUuid":"<36 chars>",`
3338      const parentStart =
3339        buf[pos + PREFIX_LEN] === QUOTE ? pos + PREFIX_LEN + 1 : -1
3340      // The top-level uuid is immediately followed by `","timestamp":"` in
3341      // user/assistant/attachment entries (the create* helpers put them
3342      // adjacent; both always defined). But the suffix is NOT unique:
3343      //   - agent_progress entries carry a nested Message in data.message,
3344      //     serialized BEFORE top-level uuid — that inner Message has its
3345      //     own uuid,timestamp adjacent, so its bytes also satisfy the
3346      //     suffix check.
3347      //   - mcpMeta/toolUseResult come AFTER top-level uuid and hold
3348      //     server-controlled Record<string,unknown> — a server returning
3349      //     {uuid:"<36>",timestamp:"..."} would also match.
3350      // Collect all suffix matches; a single one is unambiguous (common
3351      // case), multiple need a brace-depth check to pick the one at
3352      // JSON nesting depth 1. Entries with NO suffix match (some progress
3353      // variants put timestamp BEFORE uuid → `"uuid":"<36>"}` at EOL)
3354      // have only one `"uuid":"` and the first-match fallback is sound.
3355      let firstAny = -1
3356      let suffix0 = -1
3357      let suffixN: number[] | undefined
3358      let from = pos
3359      for (;;) {
3360        const next = buf.indexOf(UUID_KEY, from)
3361        if (next < 0 || next >= lineEnd) break
3362        if (firstAny < 0) firstAny = next
3363        const after = next + KEY_LEN + UUID_LEN
3364        if (
3365          after + TS_SUFFIX_LEN <= lineEnd &&
3366          buf.compare(
3367            TS_SUFFIX,
3368            0,
3369            TS_SUFFIX_LEN,
3370            after,
3371            after + TS_SUFFIX_LEN,
3372          ) === 0
3373        ) {
3374          if (suffix0 < 0) suffix0 = next
3375          else (suffixN ??= [suffix0]).push(next)
3376        }
3377        from = next + KEY_LEN
3378      }
3379      const uk = suffixN
3380        ? pickDepthOneUuidCandidate(buf, pos, suffixN)
3381        : suffix0 >= 0
3382          ? suffix0
3383          : firstAny
3384      if (uk >= 0) {
3385        const uuidStart = uk + KEY_LEN
3386        // UUIDs are pure ASCII so latin1 avoids UTF-8 decode overhead.
3387        const uuid = buf.toString('latin1', uuidStart, uuidStart + UUID_LEN)
3388        uuidToSlot.set(uuid, msgIdx.length)
3389        msgIdx.push(pos, lineEnd, parentStart)
3390      } else {
3391        metaRanges.push(pos, lineEnd)
3392      }
3393    } else {
3394      metaRanges.push(pos, lineEnd)
3395    }
3396    pos = lineEnd
3397  }
3398
3399  // Leaf = last non-sidechain entry. isSidechain is the 2nd or 3rd key
3400  // (after parentUuid, maybe logicalParentUuid) so indexOf from lineStart
3401  // finds it within a few dozen bytes when present; when absent it spills
3402  // into the next line, caught by the bounds check.
3403  let leafSlot = -1
3404  for (let i = msgIdx.length - 3; i >= 0; i -= 3) {
3405    const sc = buf.indexOf(SIDECHAIN_TRUE, msgIdx[i]!)
3406    if (sc === -1 || sc >= msgIdx[i + 1]!) {
3407      leafSlot = i
3408      break
3409    }
3410  }
3411  if (leafSlot < 0) return buf
3412
3413  // Walk parentUuid to root. Collect kept-message line starts and sum their
3414  // byte lengths so we can decide whether the concat is worth it. A dangling
3415  // parent (uuid not in file) is the normal termination for forked sessions
3416  // and post-boundary chains -- same semantics as buildConversationChain.
3417  // Correctness against index poisoning rests on the timestamp suffix check
3418  // above: a nested `"uuid":"` match without the suffix never becomes uk.
3419  const seen = new Set<number>()
3420  const chain = new Set<number>()
3421  let chainBytes = 0
3422  let slot: number | undefined = leafSlot
3423  while (slot !== undefined) {
3424    if (seen.has(slot)) break
3425    seen.add(slot)
3426    chain.add(msgIdx[slot]!)
3427    chainBytes += msgIdx[slot + 1]! - msgIdx[slot]!
3428    const parentStart = msgIdx[slot + 2]!
3429    if (parentStart < 0) break
3430    const parent = buf.toString('latin1', parentStart, parentStart + UUID_LEN)
3431    slot = uuidToSlot.get(parent)
3432  }
3433
3434  // parseJSONL cost scales with bytes, not entry count. A session can have
3435  // thousands of dead entries by count but only single-digit-% of bytes if
3436  // the dead branches are short turns and the live chain holds the fat
3437  // assistant responses (measured: 107 MB session, 69% dead entries, 30%
3438  // dead bytes - index+concat overhead exceeded parse savings). Gate on
3439  // bytes: only stitch if we would drop at least half the buffer. Metadata
3440  // is tiny so len - chainBytes approximates dead bytes closely enough.
3441  // Near break-even the concat memcpy (copying chainBytes into a fresh
3442  // allocation) dominates, so a conservative 50% gate stays safely on the
3443  // winning side.
3444  if (len - chainBytes < len >> 1) return buf
3445
3446  // Merge chain entries with metadata in original file order. Both msgIdx and
3447  // metaRanges are already sorted by offset; interleave them into subarray
3448  // views and concat once.
3449  const parts: Buffer[] = []
3450  let m = 0
3451  for (let i = 0; i < msgIdx.length; i += 3) {
3452    const start = msgIdx[i]!
3453    while (m < metaRanges.length && metaRanges[m]! < start) {
3454      parts.push(buf.subarray(metaRanges[m]!, metaRanges[m + 1]!))
3455      m += 2
3456    }
3457    if (chain.has(start)) {
3458      parts.push(buf.subarray(start, msgIdx[i + 1]!))
3459    }
3460  }
3461  while (m < metaRanges.length) {
3462    parts.push(buf.subarray(metaRanges[m]!, metaRanges[m + 1]!))
3463    m += 2
3464  }
3465  return Buffer.concat(parts)
3466}
3467
3468/**
3469 * Loads all messages, summaries, and file history snapshots from a transcript file.
3470 * Returns the messages, summaries, custom titles, tags, file history snapshots, and attribution snapshots.
3471 */
3472export async function loadTranscriptFile(
3473  filePath: string,
3474  opts?: { keepAllLeaves?: boolean },
3475): Promise<{
3476  messages: Map<UUID, TranscriptMessage>
3477  summaries: Map<UUID, string>
3478  customTitles: Map<UUID, string>
3479  tags: Map<UUID, string>
3480  agentNames: Map<UUID, string>
3481  agentColors: Map<UUID, string>
3482  agentSettings: Map<UUID, string>
3483  prNumbers: Map<UUID, number>
3484  prUrls: Map<UUID, string>
3485  prRepositories: Map<UUID, string>
3486  modes: Map<UUID, string>
3487  worktreeStates: Map<UUID, PersistedWorktreeSession | null>
3488  fileHistorySnapshots: Map<UUID, FileHistorySnapshotMessage>
3489  attributionSnapshots: Map<UUID, AttributionSnapshotMessage>
3490  contentReplacements: Map<UUID, ContentReplacementRecord[]>
3491  agentContentReplacements: Map<AgentId, ContentReplacementRecord[]>
3492  contextCollapseCommits: ContextCollapseCommitEntry[]
3493  contextCollapseSnapshot: ContextCollapseSnapshotEntry | undefined
3494  leafUuids: Set<UUID>
3495}> {
3496  const messages = new Map<UUID, TranscriptMessage>()
3497  const summaries = new Map<UUID, string>()
3498  const customTitles = new Map<UUID, string>()
3499  const tags = new Map<UUID, string>()
3500  const agentNames = new Map<UUID, string>()
3501  const agentColors = new Map<UUID, string>()
3502  const agentSettings = new Map<UUID, string>()
3503  const prNumbers = new Map<UUID, number>()
3504  const prUrls = new Map<UUID, string>()
3505  const prRepositories = new Map<UUID, string>()
3506  const modes = new Map<UUID, string>()
3507  const worktreeStates = new Map<UUID, PersistedWorktreeSession | null>()
3508  const fileHistorySnapshots = new Map<UUID, FileHistorySnapshotMessage>()
3509  const attributionSnapshots = new Map<UUID, AttributionSnapshotMessage>()
3510  const contentReplacements = new Map<UUID, ContentReplacementRecord[]>()
3511  const agentContentReplacements = new Map<
3512    AgentId,
3513    ContentReplacementRecord[]
3514  >()
3515  // Array, not Map — commit order matters (nested collapses).
3516  const contextCollapseCommits: ContextCollapseCommitEntry[] = []
3517  // Last-wins — later entries supersede.
3518  let contextCollapseSnapshot: ContextCollapseSnapshotEntry | undefined
3519
3520  try {
3521    // For large transcripts, avoid materializing megabytes of stale content.
3522    // Single forward chunked read: attribution-snapshot lines are skipped at
3523    // the fd level (never buffered), compact boundaries truncate the
3524    // accumulator in-stream. Peak allocation is the OUTPUT size, not the
3525    // file size — a 151 MB session that is 84% stale attr-snaps allocates
3526    // ~32 MB instead of 159+64 MB. This matters because mimalloc does not
3527    // return those pages to the OS even after JS-level GC frees the backing
3528    // buffers (measured: arrayBuffers=0 after Bun.gc(true) but RSS stuck at
3529    // ~316 MB on the old scan+strip path vs ~155 MB here).
3530    //
3531    // Pre-boundary metadata (agent-setting, mode, pr-link, etc.) is recovered
3532    // via a cheap byte-level forward scan of [0, boundary).
3533    let buf: Buffer | null = null
3534    let metadataLines: string[] | null = null
3535    let hasPreservedSegment = false
3536    if (!isEnvTruthy(process.env.CLAUDE_CODE_DISABLE_PRECOMPACT_SKIP)) {
3537      const { size } = await stat(filePath)
3538      if (size > SKIP_PRECOMPACT_THRESHOLD) {
3539        const scan = await readTranscriptForLoad(filePath, size)
3540        buf = scan.postBoundaryBuf
3541        hasPreservedSegment = scan.hasPreservedSegment
3542        // >0 means we truncated pre-boundary bytes and must recover
3543        // session-scoped metadata from that range. A preservedSegment
3544        // boundary does not truncate (preserved messages are physically
3545        // pre-boundary), so offset stays 0 unless an EARLIER non-preserved
3546        // boundary already truncated — in which case the preserved messages
3547        // for the later boundary are post-that-earlier-boundary and were
3548        // kept, and we still want the metadata scan.
3549        if (scan.boundaryStartOffset > 0) {
3550          metadataLines = await scanPreBoundaryMetadata(
3551            filePath,
3552            scan.boundaryStartOffset,
3553          )
3554        }
3555      }
3556    }
3557    buf ??= await readFile(filePath)
3558    // For large buffers (which here means readTranscriptForLoad output with
3559    // attr-snaps already stripped at the fd level — the <5MB readFile path
3560    // falls through the size gate below), the dominant cost is parsing dead
3561    // fork branches that buildConversationChain would discard anyway. Skip
3562    // when the caller needs all
3563    // leaves (loadAllLogsFromSessionFile for /insights picks the branch with
3564    // most user messages, not the latest), when the boundary has a
3565    // preservedSegment (those messages keep their pre-compact parentUuid on
3566    // disk -- applyPreservedSegmentRelinks splices them in-memory AFTER
3567    // parse, so a pre-parse chain walk would drop them as orphans), and when
3568    // CLAUDE_CODE_DISABLE_PRECOMPACT_SKIP is set (that kill switch means
3569    // "load everything, skip nothing"; this is another skip-before-parse
3570    // optimization and the scan it depends on for hasPreservedSegment did
3571    // not run).
3572    if (
3573      !opts?.keepAllLeaves &&
3574      !hasPreservedSegment &&
3575      !isEnvTruthy(process.env.CLAUDE_CODE_DISABLE_PRECOMPACT_SKIP) &&
3576      buf.length > SKIP_PRECOMPACT_THRESHOLD
3577    ) {
3578      buf = walkChainBeforeParse(buf)
3579    }
3580
3581    // First pass: process metadata-only lines collected during the boundary scan.
3582    // These populate the session-scoped maps (agentSettings, modes, prNumbers,
3583    // etc.) for entries written before the compact boundary. Any overlap with
3584    // the post-boundary buffer is harmless — later values overwrite earlier ones.
3585    if (metadataLines && metadataLines.length > 0) {
3586      const metaEntries = parseJSONL<Entry>(
3587        Buffer.from(metadataLines.join('\n')),
3588      )
3589      for (const entry of metaEntries) {
3590        if (entry.type === 'summary' && entry.leafUuid) {
3591          summaries.set(entry.leafUuid, entry.summary)
3592        } else if (entry.type === 'custom-title' && entry.sessionId) {
3593          customTitles.set(entry.sessionId, entry.customTitle)
3594        } else if (entry.type === 'tag' && entry.sessionId) {
3595          tags.set(entry.sessionId, entry.tag)
3596        } else if (entry.type === 'agent-name' && entry.sessionId) {
3597          agentNames.set(entry.sessionId, entry.agentName)
3598        } else if (entry.type === 'agent-color' && entry.sessionId) {
3599          agentColors.set(entry.sessionId, entry.agentColor)
3600        } else if (entry.type === 'agent-setting' && entry.sessionId) {
3601          agentSettings.set(entry.sessionId, entry.agentSetting)
3602        } else if (entry.type === 'mode' && entry.sessionId) {
3603          modes.set(entry.sessionId, entry.mode)
3604        } else if (entry.type === 'worktree-state' && entry.sessionId) {
3605          worktreeStates.set(entry.sessionId, entry.worktreeSession)
3606        } else if (entry.type === 'pr-link' && entry.sessionId) {
3607          prNumbers.set(entry.sessionId, entry.prNumber)
3608          prUrls.set(entry.sessionId, entry.prUrl)
3609          prRepositories.set(entry.sessionId, entry.prRepository)
3610        }
3611      }
3612    }
3613
3614    const entries = parseJSONL<Entry>(buf)
3615
3616    // Bridge map for legacy progress entries: progress_uuid → progress_parent_uuid.
3617    // PR #24099 removed progress from isTranscriptMessage, so old transcripts with
3618    // progress in the parentUuid chain would truncate at buildConversationChain
3619    // when messages.get(progressUuid) returns undefined. Since transcripts are
3620    // append-only (parents before children), we record each progress→parent link
3621    // as we see it, chain-resolving through consecutive progress entries, then
3622    // rewrite any subsequent message whose parentUuid lands in the bridge.
3623    const progressBridge = new Map<UUID, UUID | null>()
3624
3625    for (const entry of entries) {
3626      // Legacy progress check runs before the Entry-typed else-if chain —
3627      // progress is not in the Entry union, so checking it after TypeScript
3628      // has narrowed `entry` intersects to `never`.
3629      if (isLegacyProgressEntry(entry)) {
3630        // Chain-resolve through consecutive progress entries so a later
3631        // message pointing at the tail of a progress run bridges to the
3632        // nearest non-progress ancestor in one lookup.
3633        const parent = entry.parentUuid
3634        progressBridge.set(
3635          entry.uuid,
3636          parent && progressBridge.has(parent)
3637            ? (progressBridge.get(parent) ?? null)
3638            : parent,
3639        )
3640        continue
3641      }
3642      if (isTranscriptMessage(entry)) {
3643        if (entry.parentUuid && progressBridge.has(entry.parentUuid)) {
3644          entry.parentUuid = progressBridge.get(entry.parentUuid) ?? null
3645        }
3646        messages.set(entry.uuid, entry)
3647        // Compact boundary: prior marble-origami-commit entries reference
3648        // messages that won't be in the post-boundary chain. The >5MB
3649        // backward-scan path discards them naturally by never reading the
3650        // pre-boundary bytes; the <5MB path reads everything, so discard
3651        // here. Without this, getStats().collapsedSpans in /context
3652        // overcounts (projectView silently skips the stale commits but
3653        // they're still in the log).
3654        if (isCompactBoundaryMessage(entry)) {
3655          contextCollapseCommits.length = 0
3656          contextCollapseSnapshot = undefined
3657        }
3658      } else if (entry.type === 'summary' && entry.leafUuid) {
3659        summaries.set(entry.leafUuid, entry.summary)
3660      } else if (entry.type === 'custom-title' && entry.sessionId) {
3661        customTitles.set(entry.sessionId, entry.customTitle)
3662      } else if (entry.type === 'tag' && entry.sessionId) {
3663        tags.set(entry.sessionId, entry.tag)
3664      } else if (entry.type === 'agent-name' && entry.sessionId) {
3665        agentNames.set(entry.sessionId, entry.agentName)
3666      } else if (entry.type === 'agent-color' && entry.sessionId) {
3667        agentColors.set(entry.sessionId, entry.agentColor)
3668      } else if (entry.type === 'agent-setting' && entry.sessionId) {
3669        agentSettings.set(entry.sessionId, entry.agentSetting)
3670      } else if (entry.type === 'mode' && entry.sessionId) {
3671        modes.set(entry.sessionId, entry.mode)
3672      } else if (entry.type === 'worktree-state' && entry.sessionId) {
3673        worktreeStates.set(entry.sessionId, entry.worktreeSession)
3674      } else if (entry.type === 'pr-link' && entry.sessionId) {
3675        prNumbers.set(entry.sessionId, entry.prNumber)
3676        prUrls.set(entry.sessionId, entry.prUrl)
3677        prRepositories.set(entry.sessionId, entry.prRepository)
3678      } else if (entry.type === 'file-history-snapshot') {
3679        fileHistorySnapshots.set(entry.messageId, entry)
3680      } else if (entry.type === 'attribution-snapshot') {
3681        attributionSnapshots.set(entry.messageId, entry)
3682      } else if (entry.type === 'content-replacement') {
3683        // Subagent decisions key by agentId (sidechain resume); main-thread
3684        // decisions key by sessionId (/resume).
3685        if (entry.agentId) {
3686          const existing = agentContentReplacements.get(entry.agentId) ?? []
3687          agentContentReplacements.set(entry.agentId, existing)
3688          existing.push(...entry.replacements)
3689        } else {
3690          const existing = contentReplacements.get(entry.sessionId) ?? []
3691          contentReplacements.set(entry.sessionId, existing)
3692          existing.push(...entry.replacements)
3693        }
3694      } else if (entry.type === 'marble-origami-commit') {
3695        contextCollapseCommits.push(entry)
3696      } else if (entry.type === 'marble-origami-snapshot') {
3697        contextCollapseSnapshot = entry
3698      }
3699    }
3700  } catch {
3701    // File doesn't exist or can't be read
3702  }
3703
3704  applyPreservedSegmentRelinks(messages)
3705  applySnipRemovals(messages)
3706
3707  // Compute leaf UUIDs once at load time
3708  // Only user/assistant messages should be considered as leaves for anchoring resume.
3709  // Other message types (system, attachment) are metadata or auxiliary and shouldn't
3710  // anchor a conversation chain.
3711  //
3712  // We use standard parent relationship for main chain detection, but also need to
3713  // handle cases where the last message is a system/metadata message.
3714  // For each conversation chain (identified by following parent links), the leaf
3715  // is the most recent user/assistant message.
3716  const allMessages = [...messages.values()]
3717
3718  // Standard leaf computation using parent relationships
3719  const parentUuids = new Set(
3720    allMessages
3721      .map(msg => msg.parentUuid)
3722      .filter((uuid): uuid is UUID => uuid !== null),
3723  )
3724
3725  // Find all terminal messages (messages with no children)
3726  const terminalMessages = allMessages.filter(msg => !parentUuids.has(msg.uuid))
3727
3728  const leafUuids = new Set<UUID>()
3729  let hasCycle = false
3730
3731  if (getFeatureValue_CACHED_MAY_BE_STALE('tengu_pebble_leaf_prune', false)) {
3732    // Build a set of UUIDs that have user/assistant children
3733    // (these are mid-conversation nodes, not dead ends)
3734    const hasUserAssistantChild = new Set<UUID>()
3735    for (const msg of allMessages) {
3736      if (msg.parentUuid && (msg.type === 'user' || msg.type === 'assistant')) {
3737        hasUserAssistantChild.add(msg.parentUuid)
3738      }
3739    }
3740
3741    // For each terminal message, walk back to find the nearest user/assistant ancestor.
3742    // Skip ancestors that already have user/assistant children - those are mid-conversation
3743    // nodes where the conversation continued (e.g., an assistant tool_use message whose
3744    // progress child is terminal, but whose tool_result child continues the conversation).
3745    for (const terminal of terminalMessages) {
3746      const seen = new Set<UUID>()
3747      let current: TranscriptMessage | undefined = terminal
3748      while (current) {
3749        if (seen.has(current.uuid)) {
3750          hasCycle = true
3751          break
3752        }
3753        seen.add(current.uuid)
3754        if (current.type === 'user' || current.type === 'assistant') {
3755          if (!hasUserAssistantChild.has(current.uuid)) {
3756            leafUuids.add(current.uuid)
3757          }
3758          break
3759        }
3760        current = current.parentUuid
3761          ? messages.get(current.parentUuid)
3762          : undefined
3763      }
3764    }
3765  } else {
3766    // Original leaf computation: walk back from terminal messages to find
3767    // the nearest user/assistant ancestor unconditionally
3768    for (const terminal of terminalMessages) {
3769      const seen = new Set<UUID>()
3770      let current: TranscriptMessage | undefined = terminal
3771      while (current) {
3772        if (seen.has(current.uuid)) {
3773          hasCycle = true
3774          break
3775        }
3776        seen.add(current.uuid)
3777        if (current.type === 'user' || current.type === 'assistant') {
3778          leafUuids.add(current.uuid)
3779          break
3780        }
3781        current = current.parentUuid
3782          ? messages.get(current.parentUuid)
3783          : undefined
3784      }
3785    }
3786  }
3787
3788  if (hasCycle) {
3789    logEvent('tengu_transcript_parent_cycle', {})
3790  }
3791
3792  return {
3793    messages,
3794    summaries,
3795    customTitles,
3796    tags,
3797    agentNames,
3798    agentColors,
3799    agentSettings,
3800    prNumbers,
3801    prUrls,
3802    prRepositories,
3803    modes,
3804    worktreeStates,
3805    fileHistorySnapshots,
3806    attributionSnapshots,
3807    contentReplacements,
3808    agentContentReplacements,
3809    contextCollapseCommits,
3810    contextCollapseSnapshot,
3811    leafUuids,
3812  }
3813}
3814
3815/**
3816 * Loads all messages, summaries, file history snapshots, and attribution snapshots from a specific session file.
3817 */
3818async function loadSessionFile(sessionId: UUID): Promise<{
3819  messages: Map<UUID, TranscriptMessage>
3820  summaries: Map<UUID, string>
3821  customTitles: Map<UUID, string>
3822  tags: Map<UUID, string>
3823  agentSettings: Map<UUID, string>
3824  worktreeStates: Map<UUID, PersistedWorktreeSession | null>
3825  fileHistorySnapshots: Map<UUID, FileHistorySnapshotMessage>
3826  attributionSnapshots: Map<UUID, AttributionSnapshotMessage>
3827  contentReplacements: Map<UUID, ContentReplacementRecord[]>
3828  contextCollapseCommits: ContextCollapseCommitEntry[]
3829  contextCollapseSnapshot: ContextCollapseSnapshotEntry | undefined
3830}> {
3831  const sessionFile = join(
3832    getSessionProjectDir() ?? getProjectDir(getOriginalCwd()),
3833    `${sessionId}.jsonl`,
3834  )
3835  return loadTranscriptFile(sessionFile)
3836}
3837
3838/**
3839 * Gets message UUIDs for a specific session without loading all sessions.
3840 * Memoized to avoid re-reading the same session file multiple times.
3841 */
3842const getSessionMessages = memoize(
3843  async (sessionId: UUID): Promise<Set<UUID>> => {
3844    const { messages } = await loadSessionFile(sessionId)
3845    return new Set(messages.keys())
3846  },
3847  (sessionId: UUID) => sessionId,
3848)
3849
3850/**
3851 * Clear the memoized session messages cache.
3852 * Call after compaction when old message UUIDs are no longer valid.
3853 */
3854export function clearSessionMessagesCache(): void {
3855  getSessionMessages.cache.clear?.()
3856}
3857
3858/**
3859 * Check if a message UUID exists in the session storage
3860 */
3861export async function doesMessageExistInSession(
3862  sessionId: UUID,
3863  messageUuid: UUID,
3864): Promise<boolean> {
3865  const messageSet = await getSessionMessages(sessionId)
3866  return messageSet.has(messageUuid)
3867}
3868
3869export async function getLastSessionLog(
3870  sessionId: UUID,
3871): Promise<LogOption | null> {
3872  // Single read: load all session data at once instead of reading the file twice
3873  const {
3874    messages,
3875    summaries,
3876    customTitles,
3877    tags,
3878    agentSettings,
3879    worktreeStates,
3880    fileHistorySnapshots,
3881    attributionSnapshots,
3882    contentReplacements,
3883    contextCollapseCommits,
3884    contextCollapseSnapshot,
3885  } = await loadSessionFile(sessionId)
3886  if (messages.size === 0) return null
3887  // Prime getSessionMessages cache so recordTranscript (called after REPL
3888  // mount on --resume) skips a second full file load. -170~227ms on large sessions.
3889  // Guard: only prime if cache is empty. Mid-session callers (e.g. IssueFeedback)
3890  // may call getLastSessionLog on the current session — overwriting a live cache
3891  // with a stale disk snapshot would lose unflushed UUIDs and break dedup.
3892  if (!getSessionMessages.cache.has(sessionId)) {
3893    getSessionMessages.cache.set(
3894      sessionId,
3895      Promise.resolve(new Set(messages.keys())),
3896    )
3897  }
3898
3899  // Find the most recent non-sidechain message
3900  const lastMessage = findLatestMessage(messages.values(), m => !m.isSidechain)
3901  if (!lastMessage) return null
3902
3903  // Build the transcript chain from the last message
3904  const transcript = buildConversationChain(messages, lastMessage)
3905
3906  const summary = summaries.get(lastMessage.uuid)
3907  const customTitle = customTitles.get(lastMessage.sessionId as UUID)
3908  const tag = tags.get(lastMessage.sessionId as UUID)
3909  const agentSetting = agentSettings.get(sessionId)
3910  return {
3911    ...convertToLogOption(
3912      transcript,
3913      0,
3914      summary,
3915      customTitle,
3916      buildFileHistorySnapshotChain(fileHistorySnapshots, transcript),
3917      tag,
3918      getTranscriptPathForSession(sessionId),
3919      buildAttributionSnapshotChain(attributionSnapshots, transcript),
3920      agentSetting,
3921      contentReplacements.get(sessionId) ?? [],
3922    ),
3923    worktreeSession: worktreeStates.get(sessionId),
3924    contextCollapseCommits: contextCollapseCommits.filter(
3925      e => e.sessionId === sessionId,
3926    ),
3927    contextCollapseSnapshot:
3928      contextCollapseSnapshot?.sessionId === sessionId
3929        ? contextCollapseSnapshot
3930        : undefined,
3931  }
3932}
3933
3934/**
3935 * Loads the list of message logs
3936 * @param limit Optional limit on number of session files to load
3937 * @returns List of message logs sorted by date
3938 */
3939export async function loadMessageLogs(limit?: number): Promise<LogOption[]> {
3940  const sessionLogs = await fetchLogs(limit)
3941  // fetchLogs returns lite (stat-only) logs — enrich them to get metadata.
3942  // enrichLogs already filters out sidechains, empty sessions, etc.
3943  const { logs: enriched } = await enrichLogs(
3944    sessionLogs,
3945    0,
3946    sessionLogs.length,
3947  )
3948
3949  // enrichLogs returns fresh unshared objects — mutate in place to avoid
3950  // re-spreading every 30-field LogOption just to renumber the index.
3951  const sorted = sortLogs(enriched)
3952  sorted.forEach((log, i) => {
3953    log.value = i
3954  })
3955  return sorted
3956}
3957
3958/**
3959 * Loads message logs from all project directories.
3960 * @param limit Optional limit on number of session files to load per project (used when no index exists)
3961 * @returns List of message logs sorted by date
3962 */
3963export async function loadAllProjectsMessageLogs(
3964  limit?: number,
3965  options?: { skipIndex?: boolean; initialEnrichCount?: number },
3966): Promise<LogOption[]> {
3967  if (options?.skipIndex) {
3968    // Load all sessions with full message data (e.g. for /insights analysis)
3969    return loadAllProjectsMessageLogsFull(limit)
3970  }
3971  const result = await loadAllProjectsMessageLogsProgressive(
3972    limit,
3973    options?.initialEnrichCount ?? INITIAL_ENRICH_COUNT,
3974  )
3975  return result.logs
3976}
3977
3978async function loadAllProjectsMessageLogsFull(
3979  limit?: number,
3980): Promise<LogOption[]> {
3981  const projectsDir = getProjectsDir()
3982
3983  let dirents: Dirent[]
3984  try {
3985    dirents = await readdir(projectsDir, { withFileTypes: true })
3986  } catch {
3987    return []
3988  }
3989
3990  const projectDirs = dirents
3991    .filter(dirent => dirent.isDirectory())
3992    .map(dirent => join(projectsDir, dirent.name))
3993
3994  const logsPerProject = await Promise.all(
3995    projectDirs.map(projectDir => getLogsWithoutIndex(projectDir, limit)),
3996  )
3997  const allLogs = logsPerProject.flat()
3998
3999  // Deduplicate — same session+leaf can appear in multiple project dirs.
4000  // This path creates one LogOption per leaf, so use sessionId+leafUuid key.
4001  const deduped = new Map<string, LogOption>()
4002  for (const log of allLogs) {
4003    const key = `${log.sessionId ?? ''}:${log.leafUuid ?? ''}`
4004    const existing = deduped.get(key)
4005    if (!existing || log.modified.getTime() > existing.modified.getTime()) {
4006      deduped.set(key, log)
4007    }
4008  }
4009
4010  // deduped values are fresh from getLogsWithoutIndex — safe to mutate
4011  const sorted = sortLogs([...deduped.values()])
4012  sorted.forEach((log, i) => {
4013    log.value = i
4014  })
4015  return sorted
4016}
4017
4018export async function loadAllProjectsMessageLogsProgressive(
4019  limit?: number,
4020  initialEnrichCount: number = INITIAL_ENRICH_COUNT,
4021): Promise<SessionLogResult> {
4022  const projectsDir = getProjectsDir()
4023
4024  let dirents: Dirent[]
4025  try {
4026    dirents = await readdir(projectsDir, { withFileTypes: true })
4027  } catch {
4028    return { logs: [], allStatLogs: [], nextIndex: 0 }
4029  }
4030
4031  const projectDirs = dirents
4032    .filter(dirent => dirent.isDirectory())
4033    .map(dirent => join(projectsDir, dirent.name))
4034
4035  const rawLogs: LogOption[] = []
4036  for (const projectDir of projectDirs) {
4037    rawLogs.push(...(await getSessionFilesLite(projectDir, limit)))
4038  }
4039  // Deduplicate — same session can appear in multiple project dirs
4040  const sorted = deduplicateLogsBySessionId(rawLogs)
4041
4042  const { logs, nextIndex } = await enrichLogs(sorted, 0, initialEnrichCount)
4043
4044  // enrichLogs returns fresh unshared objects — safe to mutate in place
4045  logs.forEach((log, i) => {
4046    log.value = i
4047  })
4048  return { logs, allStatLogs: sorted, nextIndex }
4049}
4050
4051/**
4052 * Loads message logs from all worktrees of the same git repository.
4053 * Falls back to loadMessageLogs if no worktrees provided.
4054 *
4055 * Uses pure filesystem metadata for fast loading.
4056 *
4057 * @param worktreePaths Array of worktree paths (from getWorktreePaths)
4058 * @param limit Optional limit on number of session files to load per project
4059 * @returns List of message logs sorted by date
4060 */
4061/**
4062 * Result of loading session logs with progressive enrichment support.
4063 */
4064export type SessionLogResult = {
4065  /** Enriched logs ready for display */
4066  logs: LogOption[]
4067  /** Full stat-only list for progressive loading (call enrichLogs to get more) */
4068  allStatLogs: LogOption[]
4069  /** Index into allStatLogs where progressive loading should continue from */
4070  nextIndex: number
4071}
4072
4073export async function loadSameRepoMessageLogs(
4074  worktreePaths: string[],
4075  limit?: number,
4076  initialEnrichCount: number = INITIAL_ENRICH_COUNT,
4077): Promise<LogOption[]> {
4078  const result = await loadSameRepoMessageLogsProgressive(
4079    worktreePaths,
4080    limit,
4081    initialEnrichCount,
4082  )
4083  return result.logs
4084}
4085
4086export async function loadSameRepoMessageLogsProgressive(
4087  worktreePaths: string[],
4088  limit?: number,
4089  initialEnrichCount: number = INITIAL_ENRICH_COUNT,
4090): Promise<SessionLogResult> {
4091  logForDebugging(
4092    `/resume: loading sessions for cwd=${getOriginalCwd()}, worktrees=[${worktreePaths.join(', ')}]`,
4093  )
4094  const allStatLogs = await getStatOnlyLogsForWorktrees(worktreePaths, limit)
4095  logForDebugging(`/resume: found ${allStatLogs.length} session files on disk`)
4096
4097  const { logs, nextIndex } = await enrichLogs(
4098    allStatLogs,
4099    0,
4100    initialEnrichCount,
4101  )
4102
4103  // enrichLogs returns fresh unshared objects — safe to mutate in place
4104  logs.forEach((log, i) => {
4105    log.value = i
4106  })
4107  return { logs, allStatLogs, nextIndex }
4108}
4109
4110/**
4111 * Gets stat-only logs for worktree paths (no file reads).
4112 */
4113async function getStatOnlyLogsForWorktrees(
4114  worktreePaths: string[],
4115  limit?: number,
4116): Promise<LogOption[]> {
4117  const projectsDir = getProjectsDir()
4118
4119  if (worktreePaths.length <= 1) {
4120    const cwd = getOriginalCwd()
4121    const projectDir = getProjectDir(cwd)
4122    return getSessionFilesLite(projectDir, undefined, cwd)
4123  }
4124
4125  // On Windows, drive letter case can differ between git worktree list
4126  // output (e.g. C:/Users/...) and how paths were stored in project
4127  // directories (e.g. c:/Users/...). Use case-insensitive comparison.
4128  const caseInsensitive = process.platform === 'win32'
4129
4130  // Sort worktree paths by sanitized prefix length (longest first) so
4131  // more specific matches take priority over shorter ones. Without this,
4132  // a short prefix like -code-myrepo could match -code-myrepo-worktree1
4133  // before the longer, more specific prefix gets a chance.
4134  const indexed = worktreePaths.map(wt => {
4135    const sanitized = sanitizePath(wt)
4136    return {
4137      path: wt,
4138      prefix: caseInsensitive ? sanitized.toLowerCase() : sanitized,
4139    }
4140  })
4141  indexed.sort((a, b) => b.prefix.length - a.prefix.length)
4142
4143  const allLogs: LogOption[] = []
4144  const seenDirs = new Set<string>()
4145
4146  let allDirents: Dirent[]
4147  try {
4148    allDirents = await readdir(projectsDir, { withFileTypes: true })
4149  } catch (e) {
4150    // Fall back to current project
4151    logForDebugging(
4152      `Failed to read projects dir ${projectsDir}, falling back to current project: ${e}`,
4153    )
4154    const projectDir = getProjectDir(getOriginalCwd())
4155    return getSessionFilesLite(projectDir, limit, getOriginalCwd())
4156  }
4157
4158  for (const dirent of allDirents) {
4159    if (!dirent.isDirectory()) continue
4160    const dirName = caseInsensitive ? dirent.name.toLowerCase() : dirent.name
4161    if (seenDirs.has(dirName)) continue
4162
4163    for (const { path: wtPath, prefix } of indexed) {
4164      if (dirName === prefix || dirName.startsWith(prefix + '-')) {
4165        seenDirs.add(dirName)
4166        allLogs.push(
4167          ...(await getSessionFilesLite(
4168            join(projectsDir, dirent.name),
4169            undefined,
4170            wtPath,
4171          )),
4172        )
4173        break
4174      }
4175    }
4176  }
4177
4178  // Deduplicate by sessionId — the same session can appear in multiple
4179  // worktree project dirs. Keep the entry with the newest modified time.
4180  return deduplicateLogsBySessionId(allLogs)
4181}
4182
4183/**
4184 * Retrieves the transcript for a specific agent by agentId.
4185 * Directly loads the agent-specific transcript file.
4186 * @param agentId The agent ID to search for
4187 * @returns The conversation chain and budget replacement records for the agent,
4188 *          or null if not found
4189 */
4190export async function getAgentTranscript(agentId: AgentId): Promise<{
4191  messages: Message[]
4192  contentReplacements: ContentReplacementRecord[]
4193} | null> {
4194  const agentFile = getAgentTranscriptPath(agentId)
4195
4196  try {
4197    const { messages, agentContentReplacements } =
4198      await loadTranscriptFile(agentFile)
4199
4200    // Find messages with matching agentId
4201    const agentMessages = Array.from(messages.values()).filter(
4202      msg => msg.agentId === agentId && msg.isSidechain,
4203    )
4204
4205    if (agentMessages.length === 0) {
4206      return null
4207    }
4208
4209    // Find the most recent leaf message with this agentId
4210    const parentUuids = new Set(agentMessages.map(msg => msg.parentUuid))
4211    const leafMessage = findLatestMessage(
4212      agentMessages,
4213      msg => !parentUuids.has(msg.uuid),
4214    )
4215
4216    if (!leafMessage) {
4217      return null
4218    }
4219
4220    // Build the conversation chain
4221    const transcript = buildConversationChain(messages, leafMessage)
4222
4223    // Filter to only include messages with this agentId
4224    const agentTranscript = transcript.filter(msg => msg.agentId === agentId)
4225
4226    return {
4227      // Convert TranscriptMessage[] to Message[]
4228      messages: agentTranscript.map(
4229        ({ isSidechain, parentUuid, ...msg }) => msg,
4230      ),
4231      contentReplacements: agentContentReplacements.get(agentId) ?? [],
4232    }
4233  } catch {
4234    return null
4235  }
4236}
4237
4238/**
4239 * Extract agent IDs from progress messages in the conversation.
4240 * Agent/skill progress messages have type 'progress' with data.type
4241 * 'agent_progress' or 'skill_progress' and data.agentId.
4242 * This captures sync agents that emit progress messages during execution.
4243 */
4244export function extractAgentIdsFromMessages(messages: Message[]): string[] {
4245  const agentIds: string[] = []
4246
4247  for (const message of messages) {
4248    if (
4249      message.type === 'progress' &&
4250      message.data &&
4251      typeof message.data === 'object' &&
4252      'type' in message.data &&
4253      (message.data.type === 'agent_progress' ||
4254        message.data.type === 'skill_progress') &&
4255      'agentId' in message.data &&
4256      typeof message.data.agentId === 'string'
4257    ) {
4258      agentIds.push(message.data.agentId)
4259    }
4260  }
4261
4262  return uniq(agentIds)
4263}
4264
4265/**
4266 * Extract teammate transcripts directly from AppState tasks.
4267 * In-process teammates store their messages in task.messages,
4268 * which is more reliable than loading from disk since each teammate turn
4269 * uses a random agentId for transcript storage.
4270 */
4271export function extractTeammateTranscriptsFromTasks(tasks: {
4272  [taskId: string]: {
4273    type: string
4274    identity?: { agentId: string }
4275    messages?: Message[]
4276  }
4277}): { [agentId: string]: Message[] } {
4278  const transcripts: { [agentId: string]: Message[] } = {}
4279
4280  for (const task of Object.values(tasks)) {
4281    if (
4282      task.type === 'in_process_teammate' &&
4283      task.identity?.agentId &&
4284      task.messages &&
4285      task.messages.length > 0
4286    ) {
4287      transcripts[task.identity.agentId] = task.messages
4288    }
4289  }
4290
4291  return transcripts
4292}
4293
4294/**
4295 * Load subagent transcripts for the given agent IDs
4296 */
4297export async function loadSubagentTranscripts(
4298  agentIds: string[],
4299): Promise<{ [agentId: string]: Message[] }> {
4300  const results = await Promise.all(
4301    agentIds.map(async agentId => {
4302      try {
4303        const result = await getAgentTranscript(asAgentId(agentId))
4304        if (result && result.messages.length > 0) {
4305          return { agentId, transcript: result.messages }
4306        }
4307        return null
4308      } catch {
4309        // Skip if transcript can't be loaded
4310        return null
4311      }
4312    }),
4313  )
4314
4315  const transcripts: { [agentId: string]: Message[] } = {}
4316  for (const result of results) {
4317    if (result) {
4318      transcripts[result.agentId] = result.transcript
4319    }
4320  }
4321  return transcripts
4322}
4323
4324// Globs the session's subagents dir directly — unlike AppState.tasks, this survives task eviction.
4325export async function loadAllSubagentTranscriptsFromDisk(): Promise<{
4326  [agentId: string]: Message[]
4327}> {
4328  const subagentsDir = join(
4329    getSessionProjectDir() ?? getProjectDir(getOriginalCwd()),
4330    getSessionId(),
4331    'subagents',
4332  )
4333  let entries: Dirent[]
4334  try {
4335    entries = await readdir(subagentsDir, { withFileTypes: true })
4336  } catch {
4337    return {}
4338  }
4339  // Filename format is the inverse of getAgentTranscriptPath() — keep in sync.
4340  const agentIds = entries
4341    .filter(
4342      d =>
4343        d.isFile() && d.name.startsWith('agent-') && d.name.endsWith('.jsonl'),
4344    )
4345    .map(d => d.name.slice('agent-'.length, -'.jsonl'.length))
4346  return loadSubagentTranscripts(agentIds)
4347}
4348
4349// Exported so useLogMessages can sync-compute the last loggable uuid
4350// without awaiting recordTranscript's return value (race-free hint tracking).
4351export function isLoggableMessage(m: Message): boolean {
4352  if (m.type === 'progress') return false
4353  // IMPORTANT: We deliberately filter out most attachments for non-ants because
4354  // they have sensitive info for training that we don't want exposed to the public.
4355  // When enabled, we allow hook_additional_context through since it contains
4356  // user-configured hook output that is useful for session context on resume.
4357  if (m.type === 'attachment' && getUserType() !== 'ant') {
4358    if (
4359      m.attachment.type === 'hook_additional_context' &&
4360      isEnvTruthy(process.env.CLAUDE_CODE_SAVE_HOOK_ADDITIONAL_CONTEXT)
4361    ) {
4362      return true
4363    }
4364    return false
4365  }
4366  return true
4367}
4368
4369function collectReplIds(messages: readonly Message[]): Set<string> {
4370  const ids = new Set<string>()
4371  for (const m of messages) {
4372    if (m.type === 'assistant' && Array.isArray(m.message.content)) {
4373      for (const b of m.message.content) {
4374        if (b.type === 'tool_use' && b.name === REPL_TOOL_NAME) {
4375          ids.add(b.id)
4376        }
4377      }
4378    }
4379  }
4380  return ids
4381}
4382
4383/**
4384 * For external users, make REPL invisible in the persisted transcript: strip
4385 * REPL tool_use/tool_result pairs and promote isVirtual messages to real. On
4386 * --resume the model then sees a coherent native-tool-call history (assistant
4387 * called Bash, got result, called Read, got result) without the REPL wrapper.
4388 * Ant transcripts keep the wrapper so /share training data sees REPL usage.
4389 *
4390 * replIds is pre-collected from the FULL session array, not the slice being
4391 * transformed — recordTranscript receives incremental slices where the REPL
4392 * tool_use (earlier render) and its tool_result (later render, after async
4393 * execution) land in separate calls. A fresh per-call Set would miss the id
4394 * and leave an orphaned tool_result on disk.
4395 */
4396function transformMessagesForExternalTranscript(
4397  messages: Transcript,
4398  replIds: Set<string>,
4399): Transcript {
4400  return messages.flatMap(m => {
4401    if (m.type === 'assistant' && Array.isArray(m.message.content)) {
4402      const content = m.message.content
4403      const hasRepl = content.some(
4404        b => b.type === 'tool_use' && b.name === REPL_TOOL_NAME,
4405      )
4406      const filtered = hasRepl
4407        ? content.filter(
4408            b => !(b.type === 'tool_use' && b.name === REPL_TOOL_NAME),
4409          )
4410        : content
4411      if (filtered.length === 0) return []
4412      if (m.isVirtual) {
4413        const { isVirtual: _omit, ...rest } = m
4414        return [{ ...rest, message: { ...m.message, content: filtered } }]
4415      }
4416      if (filtered !== content) {
4417        return [{ ...m, message: { ...m.message, content: filtered } }]
4418      }
4419      return [m]
4420    }
4421    if (m.type === 'user' && Array.isArray(m.message.content)) {
4422      const content = m.message.content
4423      const hasRepl = content.some(
4424        b => b.type === 'tool_result' && replIds.has(b.tool_use_id),
4425      )
4426      const filtered = hasRepl
4427        ? content.filter(
4428            b => !(b.type === 'tool_result' && replIds.has(b.tool_use_id)),
4429          )
4430        : content
4431      if (filtered.length === 0) return []
4432      if (m.isVirtual) {
4433        const { isVirtual: _omit, ...rest } = m
4434        return [{ ...rest, message: { ...m.message, content: filtered } }]
4435      }
4436      if (filtered !== content) {
4437        return [{ ...m, message: { ...m.message, content: filtered } }]
4438      }
4439      return [m]
4440    }
4441    // string-content user, system, attachment
4442    if ('isVirtual' in m && m.isVirtual) {
4443      const { isVirtual: _omit, ...rest } = m
4444      return [rest]
4445    }
4446    return [m]
4447  }) as Transcript
4448}
4449
4450export function cleanMessagesForLogging(
4451  messages: Message[],
4452  allMessages: readonly Message[] = messages,
4453): Transcript {
4454  const filtered = messages.filter(isLoggableMessage) as Transcript
4455  return getUserType() !== 'ant'
4456    ? transformMessagesForExternalTranscript(
4457        filtered,
4458        collectReplIds(allMessages),
4459      )
4460    : filtered
4461}
4462
4463/**
4464 * Gets a log by its index
4465 * @param index Index in the sorted list of logs (0-based)
4466 * @returns Log data or null if not found
4467 */
4468export async function getLogByIndex(index: number): Promise<LogOption | null> {
4469  const logs = await loadMessageLogs()
4470  return logs[index] || null
4471}
4472
4473/**
4474 * Looks up unresolved tool uses in the transcript by tool_use_id.
4475 * Returns the assistant message containing the tool_use, or null if not found
4476 * or the tool call already has a tool_result.
4477 */
4478export async function findUnresolvedToolUse(
4479  toolUseId: string,
4480): Promise<AssistantMessage | null> {
4481  try {
4482    const transcriptPath = getTranscriptPath()
4483    const { messages } = await loadTranscriptFile(transcriptPath)
4484
4485    let toolUseMessage = null
4486
4487    // Find the tool use but make sure there's not also a result
4488    for (const message of messages.values()) {
4489      if (message.type === 'assistant') {
4490        const content = message.message.content
4491        if (Array.isArray(content)) {
4492          for (const block of content) {
4493            if (block.type === 'tool_use' && block.id === toolUseId) {
4494              toolUseMessage = message
4495              break
4496            }
4497          }
4498        }
4499      } else if (message.type === 'user') {
4500        const content = message.message.content
4501        if (Array.isArray(content)) {
4502          for (const block of content) {
4503            if (
4504              block.type === 'tool_result' &&
4505              block.tool_use_id === toolUseId
4506            ) {
4507              // Found tool result, bail out
4508              return null
4509            }
4510          }
4511        }
4512      }
4513    }
4514
4515    return toolUseMessage
4516  } catch {
4517    return null
4518  }
4519}
4520
4521/**
4522 * Gets all session JSONL files in a project directory with their stats.
4523 * Returns a map of sessionId → {path, mtime, ctime, size}.
4524 * Stats are batched via Promise.all to avoid serial syscalls in the hot loop.
4525 */
4526export async function getSessionFilesWithMtime(
4527  projectDir: string,
4528): Promise<
4529  Map<string, { path: string; mtime: number; ctime: number; size: number }>
4530> {
4531  const sessionFilesMap = new Map<
4532    string,
4533    { path: string; mtime: number; ctime: number; size: number }
4534  >()
4535
4536  let dirents: Dirent[]
4537  try {
4538    dirents = await readdir(projectDir, { withFileTypes: true })
4539  } catch {
4540    // Directory doesn't exist - return empty map
4541    return sessionFilesMap
4542  }
4543
4544  const candidates: Array<{ sessionId: string; filePath: string }> = []
4545  for (const dirent of dirents) {
4546    if (!dirent.isFile() || !dirent.name.endsWith('.jsonl')) continue
4547    const sessionId = validateUuid(basename(dirent.name, '.jsonl'))
4548    if (!sessionId) continue
4549    candidates.push({ sessionId, filePath: join(projectDir, dirent.name) })
4550  }
4551
4552  await Promise.all(
4553    candidates.map(async ({ sessionId, filePath }) => {
4554      try {
4555        const st = await stat(filePath)
4556        sessionFilesMap.set(sessionId, {
4557          path: filePath,
4558          mtime: st.mtime.getTime(),
4559          ctime: st.birthtime.getTime(),
4560          size: st.size,
4561        })
4562      } catch {
4563        logForDebugging(`Failed to stat session file: ${filePath}`)
4564      }
4565    }),
4566  )
4567
4568  return sessionFilesMap
4569}
4570
4571/**
4572 * Number of sessions to enrich on the initial load of the resume picker.
4573 * Each enrichment reads up to 128 KB per file (head + tail), so 50 sessions
4574 * means ~6.4 MB of I/O — fast on any modern filesystem while giving users
4575 * a much better initial view than the previous default of 10.
4576 */
4577const INITIAL_ENRICH_COUNT = 50
4578
4579type LiteMetadata = {
4580  firstPrompt: string
4581  gitBranch?: string
4582  isSidechain: boolean
4583  projectPath?: string
4584  teamName?: string
4585  customTitle?: string
4586  summary?: string
4587  tag?: string
4588  agentSetting?: string
4589  prNumber?: number
4590  prUrl?: string
4591  prRepository?: string
4592}
4593
4594/**
4595 * Loads all logs from a single session file with full message data.
4596 * Builds a LogOption for each leaf message in the file.
4597 */
4598export async function loadAllLogsFromSessionFile(
4599  sessionFile: string,
4600  projectPathOverride?: string,
4601): Promise<LogOption[]> {
4602  const {
4603    messages,
4604    summaries,
4605    customTitles,
4606    tags,
4607    agentNames,
4608    agentColors,
4609    agentSettings,
4610    prNumbers,
4611    prUrls,
4612    prRepositories,
4613    modes,
4614    fileHistorySnapshots,
4615    attributionSnapshots,
4616    contentReplacements,
4617    leafUuids,
4618  } = await loadTranscriptFile(sessionFile, { keepAllLeaves: true })
4619
4620  if (messages.size === 0) return []
4621
4622  const leafMessages: TranscriptMessage[] = []
4623  // Build parentUuid → children index once (O(n)), so trailing-message lookup is O(1) per leaf
4624  const childrenByParent = new Map<UUID, TranscriptMessage[]>()
4625  for (const msg of messages.values()) {
4626    if (leafUuids.has(msg.uuid)) {
4627      leafMessages.push(msg)
4628    } else if (msg.parentUuid) {
4629      const siblings = childrenByParent.get(msg.parentUuid)
4630      if (siblings) {
4631        siblings.push(msg)
4632      } else {
4633        childrenByParent.set(msg.parentUuid, [msg])
4634      }
4635    }
4636  }
4637
4638  const logs: LogOption[] = []
4639
4640  for (const leafMessage of leafMessages) {
4641    const chain = buildConversationChain(messages, leafMessage)
4642    if (chain.length === 0) continue
4643
4644    // Append trailing messages that are children of the leaf
4645    const trailingMessages = childrenByParent.get(leafMessage.uuid)
4646    if (trailingMessages) {
4647      // ISO-8601 UTC timestamps are lexically sortable
4648      trailingMessages.sort((a, b) =>
4649        a.timestamp < b.timestamp ? -1 : a.timestamp > b.timestamp ? 1 : 0,
4650      )
4651      chain.push(...trailingMessages)
4652    }
4653
4654    const firstMessage = chain[0]!
4655    const sessionId = leafMessage.sessionId as UUID
4656
4657    logs.push({
4658      date: leafMessage.timestamp,
4659      messages: removeExtraFields(chain),
4660      fullPath: sessionFile,
4661      value: 0,
4662      created: new Date(firstMessage.timestamp),
4663      modified: new Date(leafMessage.timestamp),
4664      firstPrompt: extractFirstPrompt(chain),
4665      messageCount: countVisibleMessages(chain),
4666      isSidechain: firstMessage.isSidechain ?? false,
4667      sessionId,
4668      leafUuid: leafMessage.uuid,
4669      summary: summaries.get(leafMessage.uuid),
4670      customTitle: customTitles.get(sessionId),
4671      tag: tags.get(sessionId),
4672      agentName: agentNames.get(sessionId),
4673      agentColor: agentColors.get(sessionId),
4674      agentSetting: agentSettings.get(sessionId),
4675      mode: modes.get(sessionId) as LogOption['mode'],
4676      prNumber: prNumbers.get(sessionId),
4677      prUrl: prUrls.get(sessionId),
4678      prRepository: prRepositories.get(sessionId),
4679      gitBranch: leafMessage.gitBranch,
4680      projectPath: projectPathOverride ?? firstMessage.cwd,
4681      fileHistorySnapshots: buildFileHistorySnapshotChain(
4682        fileHistorySnapshots,
4683        chain,
4684      ),
4685      attributionSnapshots: buildAttributionSnapshotChain(
4686        attributionSnapshots,
4687        chain,
4688      ),
4689      contentReplacements: contentReplacements.get(sessionId) ?? [],
4690    })
4691  }
4692
4693  return logs
4694}
4695
4696/**
4697 * Gets logs by loading all session files fully, bypassing the session index.
4698 * Use this when you need full message data (e.g., for /insights analysis).
4699
4700 */
4701async function getLogsWithoutIndex(
4702  projectDir: string,
4703  limit?: number,
4704): Promise<LogOption[]> {
4705  const sessionFilesMap = await getSessionFilesWithMtime(projectDir)
4706  if (sessionFilesMap.size === 0) return []
4707
4708  // If limit specified, only load N most recent files by mtime
4709  let filesToProcess: Array<{ path: string; mtime: number }>
4710  if (limit && sessionFilesMap.size > limit) {
4711    filesToProcess = [...sessionFilesMap.values()]
4712      .sort((a, b) => b.mtime - a.mtime)
4713      .slice(0, limit)
4714  } else {
4715    filesToProcess = [...sessionFilesMap.values()]
4716  }
4717
4718  const logs: LogOption[] = []
4719  for (const fileInfo of filesToProcess) {
4720    try {
4721      const fileLogOptions = await loadAllLogsFromSessionFile(fileInfo.path)
4722      logs.push(...fileLogOptions)
4723    } catch {
4724      logForDebugging(`Failed to load session file: ${fileInfo.path}`)
4725    }
4726  }
4727
4728  return logs
4729}
4730
4731/**
4732 * Reads the first and last ~64KB of a JSONL file and extracts lite metadata.
4733 *
4734 * Head (first 64KB): isSidechain, projectPath, teamName, firstPrompt.
4735 * Tail (last 64KB): customTitle, tag, PR link, latest gitBranch.
4736 *
4737 * Accepts a shared buffer to avoid per-file allocation overhead.
4738 */
4739async function readLiteMetadata(
4740  filePath: string,
4741  fileSize: number,
4742  buf: Buffer,
4743): Promise<LiteMetadata> {
4744  const { head, tail } = await readHeadAndTail(filePath, fileSize, buf)
4745  if (!head) return { firstPrompt: '', isSidechain: false }
4746
4747  // Extract stable metadata from the first line via string search.
4748  // Works even when the first line is truncated (>64KB message).
4749  const isSidechain =
4750    head.includes('"isSidechain":true') || head.includes('"isSidechain": true')
4751  const projectPath = extractJsonStringField(head, 'cwd')
4752  const teamName = extractJsonStringField(head, 'teamName')
4753  const agentSetting = extractJsonStringField(head, 'agentSetting')
4754
4755  // Prefer the last-prompt tail entry — captured by extractFirstPrompt at
4756  // write time (filtered, authoritative) and shows what the user was most
4757  // recently doing. Head scan is the fallback for sessions written before
4758  // last-prompt entries existed. Raw string scrapes of head are last resort
4759  // and catch array-format content blocks (VS Code <ide_selection> metadata).
4760  const firstPrompt =
4761    extractLastJsonStringField(tail, 'lastPrompt') ||
4762    extractFirstPromptFromChunk(head) ||
4763    extractJsonStringFieldPrefix(head, 'content', 200) ||
4764    extractJsonStringFieldPrefix(head, 'text', 200) ||
4765    ''
4766
4767  // Extract tail metadata via string search (last occurrence wins).
4768  // User titles (customTitle field, from custom-title entries) win over
4769  // AI titles (aiTitle field, from ai-title entries). The distinct field
4770  // names mean extractLastJsonStringField naturally disambiguates.
4771  const customTitle =
4772    extractLastJsonStringField(tail, 'customTitle') ??
4773    extractLastJsonStringField(head, 'customTitle') ??
4774    extractLastJsonStringField(tail, 'aiTitle') ??
4775    extractLastJsonStringField(head, 'aiTitle')
4776  const summary = extractLastJsonStringField(tail, 'summary')
4777  const tag = extractLastJsonStringField(tail, 'tag')
4778  const gitBranch =
4779    extractLastJsonStringField(tail, 'gitBranch') ??
4780    extractJsonStringField(head, 'gitBranch')
4781
4782  // PR link fields — prNumber is a number not a string, so try both
4783  const prUrl = extractLastJsonStringField(tail, 'prUrl')
4784  const prRepository = extractLastJsonStringField(tail, 'prRepository')
4785  let prNumber: number | undefined
4786  const prNumStr = extractLastJsonStringField(tail, 'prNumber')
4787  if (prNumStr) {
4788    prNumber = parseInt(prNumStr, 10) || undefined
4789  }
4790  if (!prNumber) {
4791    const prNumMatch = tail.lastIndexOf('"prNumber":')
4792    if (prNumMatch >= 0) {
4793      const afterColon = tail.slice(prNumMatch + 11, prNumMatch + 25)
4794      const num = parseInt(afterColon.trim(), 10)
4795      if (num > 0) prNumber = num
4796    }
4797  }
4798
4799  return {
4800    firstPrompt,
4801    gitBranch,
4802    isSidechain,
4803    projectPath,
4804    teamName,
4805    customTitle,
4806    summary,
4807    tag,
4808    agentSetting,
4809    prNumber,
4810    prUrl,
4811    prRepository,
4812  }
4813}
4814
4815/**
4816 * Scans a chunk of text for the first meaningful user prompt.
4817 */
4818function extractFirstPromptFromChunk(chunk: string): string {
4819  let start = 0
4820  let hasTickMessages = false
4821  let firstCommandFallback = ''
4822  while (start < chunk.length) {
4823    const newlineIdx = chunk.indexOf('\n', start)
4824    const line =
4825      newlineIdx >= 0 ? chunk.slice(start, newlineIdx) : chunk.slice(start)
4826    start = newlineIdx >= 0 ? newlineIdx + 1 : chunk.length
4827
4828    if (!line.includes('"type":"user"') && !line.includes('"type": "user"')) {
4829      continue
4830    }
4831    if (line.includes('"tool_result"')) continue
4832    if (line.includes('"isMeta":true') || line.includes('"isMeta": true'))
4833      continue
4834
4835    try {
4836      const entry = jsonParse(line) as Record<string, unknown>
4837      if (entry.type !== 'user') continue
4838
4839      const message = entry.message as Record<string, unknown> | undefined
4840      if (!message) continue
4841
4842      const content = message.content
4843      // Collect all text values from the message content. For array content
4844      // (common in VS Code where IDE metadata tags come before the user's
4845      // actual prompt), iterate all text blocks so we don't miss the real
4846      // prompt hidden behind <ide_selection>/<ide_opened_file> blocks.
4847      const texts: string[] = []
4848      if (typeof content === 'string') {
4849        texts.push(content)
4850      } else if (Array.isArray(content)) {
4851        for (const block of content) {
4852          const b = block as Record<string, unknown>
4853          if (b.type === 'text' && typeof b.text === 'string') {
4854            texts.push(b.text as string)
4855          }
4856        }
4857      }
4858
4859      for (const text of texts) {
4860        if (!text) continue
4861
4862        let result = text.replace(/\n/g, ' ').trim()
4863
4864        // Skip command messages (slash commands) but remember the first one
4865        // as a fallback title. Matches skip logic in
4866        // getFirstMeaningfulUserMessageTextContent, but instead of discarding
4867        // command messages entirely, we format them cleanly (e.g. "/clear")
4868        // so the session still appears in the resume picker.
4869        const commandNameTag = extractTag(result, COMMAND_NAME_TAG)
4870        if (commandNameTag) {
4871          const name = commandNameTag.replace(/^\//, '')
4872          const commandArgs = extractTag(result, 'command-args')?.trim() || ''
4873          if (builtInCommandNames().has(name) || !commandArgs) {
4874            if (!firstCommandFallback) {
4875              firstCommandFallback = commandNameTag
4876            }
4877            continue
4878          }
4879          // Custom command with meaningful args — use clean display
4880          return commandArgs
4881            ? `${commandNameTag} ${commandArgs}`
4882            : commandNameTag
4883        }
4884
4885        // Format bash input with ! prefix before the generic XML skip
4886        const bashInput = extractTag(result, 'bash-input')
4887        if (bashInput) return `! ${bashInput}`
4888
4889        if (SKIP_FIRST_PROMPT_PATTERN.test(result)) {
4890          if (
4891            (feature('PROACTIVE') || feature('KAIROS')) &&
4892            result.startsWith(`<${TICK_TAG}>`)
4893          )
4894            hasTickMessages = true
4895          continue
4896        }
4897        if (result.length > 200) {
4898          result = result.slice(0, 200).trim() + '…'
4899        }
4900        return result
4901      }
4902    } catch {
4903      continue
4904    }
4905  }
4906  // Session started with a slash command but had no subsequent real message —
4907  // use the clean command name so the session still appears in the resume picker
4908  if (firstCommandFallback) return firstCommandFallback
4909  // Proactive sessions have only tick messages — give them a synthetic prompt
4910  // so they're not filtered out by enrichLogs
4911  if ((feature('PROACTIVE') || feature('KAIROS')) && hasTickMessages)
4912    return 'Proactive session'
4913  return ''
4914}
4915
4916/**
4917 * Like extractJsonStringField but returns the first `maxLen` characters of the
4918 * value even when the closing quote is missing (truncated buffer). Newline
4919 * escapes are replaced with spaces and the result is trimmed.
4920 */
4921function extractJsonStringFieldPrefix(
4922  text: string,
4923  key: string,
4924  maxLen: number,
4925): string {
4926  const patterns = [`"${key}":"`, `"${key}": "`]
4927  for (const pattern of patterns) {
4928    const idx = text.indexOf(pattern)
4929    if (idx < 0) continue
4930
4931    const valueStart = idx + pattern.length
4932    // Grab up to maxLen characters from the value, stopping at closing quote
4933    let i = valueStart
4934    let collected = 0
4935    while (i < text.length && collected < maxLen) {
4936      if (text[i] === '\\') {
4937        i += 2 // skip escaped char
4938        collected++
4939        continue
4940      }
4941      if (text[i] === '"') break
4942      i++
4943      collected++
4944    }
4945    const raw = text.slice(valueStart, i)
4946    return raw.replace(/\\n/g, ' ').replace(/\\t/g, ' ').trim()
4947  }
4948  return ''
4949}
4950
4951/**
4952 * Deduplicates logs by sessionId, keeping the entry with the newest
4953 * modified time. Returns sorted logs with sequential value indices.
4954 */
4955function deduplicateLogsBySessionId(logs: LogOption[]): LogOption[] {
4956  const deduped = new Map<string, LogOption>()
4957  for (const log of logs) {
4958    if (!log.sessionId) continue
4959    const existing = deduped.get(log.sessionId)
4960    if (!existing || log.modified.getTime() > existing.modified.getTime()) {
4961      deduped.set(log.sessionId, log)
4962    }
4963  }
4964  return sortLogs([...deduped.values()]).map((log, i) => ({
4965    ...log,
4966    value: i,
4967  }))
4968}
4969
4970/**
4971 * Returns lite LogOption[] from pure filesystem metadata (stat only).
4972 * No file reads — instant. Call `enrichLogs` to enrich
4973 * visible sessions with firstPrompt, gitBranch, customTitle, etc.
4974 */
4975export async function getSessionFilesLite(
4976  projectDir: string,
4977  limit?: number,
4978  projectPath?: string,
4979): Promise<LogOption[]> {
4980  const sessionFilesMap = await getSessionFilesWithMtime(projectDir)
4981
4982  // Sort by mtime descending and apply limit
4983  let entries = [...sessionFilesMap.entries()].sort(
4984    (a, b) => b[1].mtime - a[1].mtime,
4985  )
4986  if (limit && entries.length > limit) {
4987    entries = entries.slice(0, limit)
4988  }
4989
4990  const logs: LogOption[] = []
4991
4992  for (const [sessionId, fileInfo] of entries) {
4993    logs.push({
4994      date: new Date(fileInfo.mtime).toISOString(),
4995      messages: [],
4996      isLite: true,
4997      fullPath: fileInfo.path,
4998      value: 0,
4999      created: new Date(fileInfo.ctime),
5000      modified: new Date(fileInfo.mtime),
5001      firstPrompt: '',
5002      messageCount: 0,
5003      fileSize: fileInfo.size,
5004      isSidechain: false,
5005      sessionId,
5006      projectPath,
5007    })
5008  }
5009
5010  // logs are freshly pushed above — safe to mutate in place
5011  const sorted = sortLogs(logs)
5012  sorted.forEach((log, i) => {
5013    log.value = i
5014  })
5015  return sorted
5016}
5017
5018/**
5019 * Enriches a lite log with metadata from its JSONL file.
5020 * Returns the enriched log, or null if the log has no meaningful content
5021 * (no firstPrompt, no customTitle — e.g., metadata-only session files).
5022 */
5023async function enrichLog(
5024  log: LogOption,
5025  readBuf: Buffer,
5026): Promise<LogOption | null> {
5027  if (!log.isLite || !log.fullPath) return log
5028
5029  const meta = await readLiteMetadata(log.fullPath, log.fileSize ?? 0, readBuf)
5030
5031  const enriched: LogOption = {
5032    ...log,
5033    isLite: false,
5034    firstPrompt: meta.firstPrompt,
5035    gitBranch: meta.gitBranch,
5036    isSidechain: meta.isSidechain,
5037    teamName: meta.teamName,
5038    customTitle: meta.customTitle,
5039    summary: meta.summary,
5040    tag: meta.tag,
5041    agentSetting: meta.agentSetting,
5042    prNumber: meta.prNumber,
5043    prUrl: meta.prUrl,
5044    prRepository: meta.prRepository,
5045    projectPath: meta.projectPath ?? log.projectPath,
5046  }
5047
5048  // Provide a fallback title for sessions where we couldn't extract the first
5049  // prompt (e.g., large first messages that exceed the 16KB read buffer).
5050  // Previously these sessions were silently dropped, making them inaccessible
5051  // via /resume after crashes or large-context sessions.
5052  if (!enriched.firstPrompt && !enriched.customTitle) {
5053    enriched.firstPrompt = '(session)'
5054  }
5055  // Filter: skip sidechains and agent sessions
5056  if (enriched.isSidechain) {
5057    logForDebugging(
5058      `Session ${log.sessionId} filtered from /resume: isSidechain=true`,
5059    )
5060    return null
5061  }
5062  if (enriched.teamName) {
5063    logForDebugging(
5064      `Session ${log.sessionId} filtered from /resume: teamName=${enriched.teamName}`,
5065    )
5066    return null
5067  }
5068
5069  return enriched
5070}
5071
5072/**
5073 * Enriches enough lite logs from `allLogs` (starting at `startIndex`) to
5074 * produce `count` valid results. Returns the valid enriched logs and the
5075 * index where scanning stopped (for progressive loading to continue from).
5076 */
5077export async function enrichLogs(
5078  allLogs: LogOption[],
5079  startIndex: number,
5080  count: number,
5081): Promise<{ logs: LogOption[]; nextIndex: number }> {
5082  const result: LogOption[] = []
5083  const readBuf = Buffer.alloc(LITE_READ_BUF_SIZE)
5084  let i = startIndex
5085
5086  while (i < allLogs.length && result.length < count) {
5087    const log = allLogs[i]!
5088    i++
5089
5090    const enriched = await enrichLog(log, readBuf)
5091    if (enriched) {
5092      result.push(enriched)
5093    }
5094  }
5095
5096  const scanned = i - startIndex
5097  const filtered = scanned - result.length
5098  if (filtered > 0) {
5099    logForDebugging(
5100      `/resume: enriched ${scanned} sessions, ${filtered} filtered out, ${result.length} visible (${allLogs.length - i} remaining on disk)`,
5101    )
5102  }
5103
5104  return { logs: result, nextIndex: i }
5105}