source dump of claude code
at main 5105 lines 181 kB view raw
1import { feature } from 'bun:bundle' 2import type { UUID } from 'crypto' 3import type { Dirent } from 'fs' 4// Sync fs primitives for readFileTailSync — separate from fs/promises 5// imports above. Named (not wildcard) per CLAUDE.md style; no collisions 6// with the async-suffixed names. 7import { closeSync, fstatSync, openSync, readSync } from 'fs' 8import { 9 appendFile as fsAppendFile, 10 open as fsOpen, 11 mkdir, 12 readdir, 13 readFile, 14 stat, 15 unlink, 16 writeFile, 17} from 'fs/promises' 18import memoize from 'lodash-es/memoize.js' 19import { basename, dirname, join } from 'path' 20import { 21 type AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS, 22 logEvent, 23} from 'src/services/analytics/index.js' 24import { 25 getOriginalCwd, 26 getPlanSlugCache, 27 getPromptId, 28 getSessionId, 29 getSessionProjectDir, 30 isSessionPersistenceDisabled, 31 switchSession, 32} from '../bootstrap/state.js' 33import { builtInCommandNames } from '../commands.js' 34import { COMMAND_NAME_TAG, TICK_TAG } from '../constants/xml.js' 35import { getFeatureValue_CACHED_MAY_BE_STALE } from '../services/analytics/growthbook.js' 36import * as sessionIngress from '../services/api/sessionIngress.js' 37import { REPL_TOOL_NAME } from '../tools/REPLTool/constants.js' 38import { 39 type AgentId, 40 asAgentId, 41 asSessionId, 42 type SessionId, 43} from '../types/ids.js' 44import type { AttributionSnapshotMessage } from '../types/logs.js' 45import { 46 type ContentReplacementEntry, 47 type ContextCollapseCommitEntry, 48 type ContextCollapseSnapshotEntry, 49 type Entry, 50 type FileHistorySnapshotMessage, 51 type LogOption, 52 type PersistedWorktreeSession, 53 type SerializedMessage, 54 sortLogs, 55 type TranscriptMessage, 56} from '../types/logs.js' 57import type { 58 AssistantMessage, 59 AttachmentMessage, 60 Message, 61 SystemCompactBoundaryMessage, 62 SystemMessage, 63 UserMessage, 64} from '../types/message.js' 65import type { QueueOperationMessage } from '../types/messageQueueTypes.js' 66import { uniq } from './array.js' 67import { registerCleanup } from './cleanupRegistry.js' 68import { updateSessionName } from './concurrentSessions.js' 69import { getCwd } from './cwd.js' 70import { logForDebugging } from './debug.js' 71import { logForDiagnosticsNoPII } from './diagLogs.js' 72import { getClaudeConfigHomeDir, isEnvTruthy } from './envUtils.js' 73import { isFsInaccessible } from './errors.js' 74import type { FileHistorySnapshot } from './fileHistory.js' 75import { formatFileSize } from './format.js' 76import { getFsImplementation } from './fsOperations.js' 77import { getWorktreePaths } from './getWorktreePaths.js' 78import { getBranch } from './git.js' 79import { gracefulShutdownSync, isShuttingDown } from './gracefulShutdown.js' 80import { parseJSONL } from './json.js' 81import { logError } from './log.js' 82import { extractTag, isCompactBoundaryMessage } from './messages.js' 83import { sanitizePath } from './path.js' 84import { 85 extractJsonStringField, 86 extractLastJsonStringField, 87 LITE_READ_BUF_SIZE, 88 readHeadAndTail, 89 readTranscriptForLoad, 90 SKIP_PRECOMPACT_THRESHOLD, 91} from './sessionStoragePortable.js' 92import { getSettings_DEPRECATED } from './settings/settings.js' 93import { jsonParse, jsonStringify } from './slowOperations.js' 94import type { ContentReplacementRecord } from './toolResultStorage.js' 95import { validateUuid } from './uuid.js' 96 97// Cache MACRO.VERSION at module level to work around bun --define bug in async contexts 98// See: https://github.com/oven-sh/bun/issues/26168 99const VERSION = typeof MACRO !== 'undefined' ? MACRO.VERSION : 'unknown' 100 101type Transcript = ( 102 | UserMessage 103 | AssistantMessage 104 | AttachmentMessage 105 | SystemMessage 106)[] 107 108// Use getOriginalCwd() at each call site instead of capturing at module load 109// time. getCwd() at import time may run before bootstrap resolves symlinks via 110// realpathSync, causing a different sanitized project directory than what 111// getOriginalCwd() returns after bootstrap. This split-brain made sessions 112// saved under one path invisible when loaded via the other. 113 114/** 115 * Pre-compiled regex to skip non-meaningful messages when extracting first prompt. 116 * Matches anything starting with a lowercase XML-like tag (IDE context, hook 117 * output, task notifications, channel messages, etc.) or a synthetic interrupt 118 * marker. Kept in sync with sessionStoragePortable.ts — generic pattern avoids 119 * an ever-growing allowlist that falls behind as new notification types ship. 120 */ 121// 50MB — prevents OOM in the tombstone slow path which reads + rewrites the 122// entire session file. Session files can grow to multiple GB (inc-3930). 123const MAX_TOMBSTONE_REWRITE_BYTES = 50 * 1024 * 1024 124 125const SKIP_FIRST_PROMPT_PATTERN = 126 /^(?:\s*<[a-z][\w-]*[\s>]|\[Request interrupted by user[^\]]*\])/ 127 128/** 129 * Type guard to check if an entry is a transcript message. 130 * Transcript messages include user, assistant, attachment, and system messages. 131 * IMPORTANT: This is the single source of truth for what constitutes a transcript message. 132 * loadTranscriptFile() uses this to determine which messages to load into the chain. 133 * 134 * Progress messages are NOT transcript messages. They are ephemeral UI state 135 * and must not be persisted to the JSONL or participate in the parentUuid 136 * chain. Including them caused chain forks that orphaned real conversation 137 * messages on resume (see #14373, #23537). 138 */ 139export function isTranscriptMessage(entry: Entry): entry is TranscriptMessage { 140 return ( 141 entry.type === 'user' || 142 entry.type === 'assistant' || 143 entry.type === 'attachment' || 144 entry.type === 'system' 145 ) 146} 147 148/** 149 * Entries that participate in the parentUuid chain. Used on the write path 150 * (insertMessageChain, useLogMessages) to skip progress when assigning 151 * parentUuid. Old transcripts with progress already in the chain are handled 152 * by the progressBridge rewrite in loadTranscriptFile. 153 */ 154export function isChainParticipant(m: Pick<Message, 'type'>): boolean { 155 return m.type !== 'progress' 156} 157 158type LegacyProgressEntry = { 159 type: 'progress' 160 uuid: UUID 161 parentUuid: UUID | null 162} 163 164/** 165 * Progress entries in transcripts written before PR #24099. They are not 166 * in the Entry type union anymore but still exist on disk with uuid and 167 * parentUuid fields. loadTranscriptFile bridges the chain across them. 168 */ 169function isLegacyProgressEntry(entry: unknown): entry is LegacyProgressEntry { 170 return ( 171 typeof entry === 'object' && 172 entry !== null && 173 'type' in entry && 174 entry.type === 'progress' && 175 'uuid' in entry && 176 typeof entry.uuid === 'string' 177 ) 178} 179 180/** 181 * High-frequency tool progress ticks (1/sec for Sleep, per-chunk for Bash). 182 * These are UI-only: not sent to the API, not rendered after the tool 183 * completes. Used by REPL.tsx to replace-in-place instead of appending, and 184 * by loadTranscriptFile to skip legacy entries from old transcripts. 185 */ 186const EPHEMERAL_PROGRESS_TYPES = new Set([ 187 'bash_progress', 188 'powershell_progress', 189 'mcp_progress', 190 ...(feature('PROACTIVE') || feature('KAIROS') 191 ? (['sleep_progress'] as const) 192 : []), 193]) 194export function isEphemeralToolProgress(dataType: unknown): boolean { 195 return typeof dataType === 'string' && EPHEMERAL_PROGRESS_TYPES.has(dataType) 196} 197 198export function getProjectsDir(): string { 199 return join(getClaudeConfigHomeDir(), 'projects') 200} 201 202export function getTranscriptPath(): string { 203 const projectDir = getSessionProjectDir() ?? getProjectDir(getOriginalCwd()) 204 return join(projectDir, `${getSessionId()}.jsonl`) 205} 206 207export function getTranscriptPathForSession(sessionId: string): string { 208 // When asking for the CURRENT session's transcript, honor sessionProjectDir 209 // the same way getTranscriptPath() does. Without this, hooks get a 210 // transcript_path computed from originalCwd while the actual file was 211 // written to sessionProjectDir (set by switchActiveSession on resume/branch) 212 // — different directories, so the hook sees MISSING (gh-30217). CC-34 213 // made sessionId + sessionProjectDir atomic precisely to prevent this 214 // kind of drift; this function just wasn't updated to read both. 215 // 216 // For OTHER session IDs we can only guess via originalCwd — we don't 217 // track a sessionId→projectDir map. Callers wanting a specific other 218 // session's path should pass fullPath explicitly (most save* functions 219 // already accept this). 220 if (sessionId === getSessionId()) { 221 return getTranscriptPath() 222 } 223 const projectDir = getProjectDir(getOriginalCwd()) 224 return join(projectDir, `${sessionId}.jsonl`) 225} 226 227// 50 MB — session JSONL can grow to multiple GB (inc-3930). Callers that 228// read the raw transcript must bail out above this threshold to avoid OOM. 229export const MAX_TRANSCRIPT_READ_BYTES = 50 * 1024 * 1024 230 231// In-memory map of agentId → subdirectory for grouping related subagent 232// transcripts (e.g. workflow runs write to subagents/workflows/<runId>/). 233// Populated before the agent runs; consulted by getAgentTranscriptPath. 234const agentTranscriptSubdirs = new Map<string, string>() 235 236export function setAgentTranscriptSubdir( 237 agentId: string, 238 subdir: string, 239): void { 240 agentTranscriptSubdirs.set(agentId, subdir) 241} 242 243export function clearAgentTranscriptSubdir(agentId: string): void { 244 agentTranscriptSubdirs.delete(agentId) 245} 246 247export function getAgentTranscriptPath(agentId: AgentId): string { 248 // Same sessionProjectDir consistency as getTranscriptPathForSession — 249 // subagent transcripts live under the session dir, so if the session 250 // transcript is at sessionProjectDir, subagent transcripts are too. 251 const projectDir = getSessionProjectDir() ?? getProjectDir(getOriginalCwd()) 252 const sessionId = getSessionId() 253 const subdir = agentTranscriptSubdirs.get(agentId) 254 const base = subdir 255 ? join(projectDir, sessionId, 'subagents', subdir) 256 : join(projectDir, sessionId, 'subagents') 257 return join(base, `agent-${agentId}.jsonl`) 258} 259 260function getAgentMetadataPath(agentId: AgentId): string { 261 return getAgentTranscriptPath(agentId).replace(/\.jsonl$/, '.meta.json') 262} 263 264export type AgentMetadata = { 265 agentType: string 266 /** Worktree path if the agent was spawned with isolation: "worktree" */ 267 worktreePath?: string 268 /** Original task description from the AgentTool input. Persisted so a 269 * resumed agent's notification can show the original description instead 270 * of a placeholder. Optional — older metadata files lack this field. */ 271 description?: string 272} 273 274/** 275 * Persist the agentType used to launch a subagent. Read by resume to 276 * route correctly when subagent_type is omitted — without this, resuming 277 * a fork silently degrades to general-purpose (4KB system prompt, no 278 * inherited history). Sidecar file avoids JSONL schema changes. 279 * 280 * Also stores the worktreePath when the agent was spawned with worktree 281 * isolation, enabling resume to restore the correct cwd. 282 */ 283export async function writeAgentMetadata( 284 agentId: AgentId, 285 metadata: AgentMetadata, 286): Promise<void> { 287 const path = getAgentMetadataPath(agentId) 288 await mkdir(dirname(path), { recursive: true }) 289 await writeFile(path, JSON.stringify(metadata)) 290} 291 292export async function readAgentMetadata( 293 agentId: AgentId, 294): Promise<AgentMetadata | null> { 295 const path = getAgentMetadataPath(agentId) 296 try { 297 const raw = await readFile(path, 'utf-8') 298 return JSON.parse(raw) as AgentMetadata 299 } catch (e) { 300 if (isFsInaccessible(e)) return null 301 throw e 302 } 303} 304 305export type RemoteAgentMetadata = { 306 taskId: string 307 remoteTaskType: string 308 /** CCR session ID — used to fetch live status from the Sessions API on resume. */ 309 sessionId: string 310 title: string 311 command: string 312 spawnedAt: number 313 toolUseId?: string 314 isLongRunning?: boolean 315 isUltraplan?: boolean 316 isRemoteReview?: boolean 317 remoteTaskMetadata?: Record<string, unknown> 318} 319 320function getRemoteAgentsDir(): string { 321 // Same sessionProjectDir fallback as getAgentTranscriptPath — the project 322 // dir (containing the .jsonl), not the session dir, so sessionId is joined. 323 const projectDir = getSessionProjectDir() ?? getProjectDir(getOriginalCwd()) 324 return join(projectDir, getSessionId(), 'remote-agents') 325} 326 327function getRemoteAgentMetadataPath(taskId: string): string { 328 return join(getRemoteAgentsDir(), `remote-agent-${taskId}.meta.json`) 329} 330 331/** 332 * Persist metadata for a remote-agent task so it can be restored on session 333 * resume. Per-task sidecar file (sibling dir to subagents/) survives 334 * hydrateSessionFromRemote's .jsonl wipe; status is always fetched fresh 335 * from CCR on restore — only identity is persisted locally. 336 */ 337export async function writeRemoteAgentMetadata( 338 taskId: string, 339 metadata: RemoteAgentMetadata, 340): Promise<void> { 341 const path = getRemoteAgentMetadataPath(taskId) 342 await mkdir(dirname(path), { recursive: true }) 343 await writeFile(path, JSON.stringify(metadata)) 344} 345 346export async function readRemoteAgentMetadata( 347 taskId: string, 348): Promise<RemoteAgentMetadata | null> { 349 const path = getRemoteAgentMetadataPath(taskId) 350 try { 351 const raw = await readFile(path, 'utf-8') 352 return JSON.parse(raw) as RemoteAgentMetadata 353 } catch (e) { 354 if (isFsInaccessible(e)) return null 355 throw e 356 } 357} 358 359export async function deleteRemoteAgentMetadata(taskId: string): Promise<void> { 360 const path = getRemoteAgentMetadataPath(taskId) 361 try { 362 await unlink(path) 363 } catch (e) { 364 if (isFsInaccessible(e)) return 365 throw e 366 } 367} 368 369/** 370 * Scan the remote-agents/ directory for all persisted metadata files. 371 * Used by restoreRemoteAgentTasks to reconnect to still-running CCR sessions. 372 */ 373export async function listRemoteAgentMetadata(): Promise< 374 RemoteAgentMetadata[] 375> { 376 const dir = getRemoteAgentsDir() 377 let entries: Dirent[] 378 try { 379 entries = await readdir(dir, { withFileTypes: true }) 380 } catch (e) { 381 if (isFsInaccessible(e)) return [] 382 throw e 383 } 384 const results: RemoteAgentMetadata[] = [] 385 for (const entry of entries) { 386 if (!entry.isFile() || !entry.name.endsWith('.meta.json')) continue 387 try { 388 const raw = await readFile(join(dir, entry.name), 'utf-8') 389 results.push(JSON.parse(raw) as RemoteAgentMetadata) 390 } catch (e) { 391 // Skip unreadable or corrupt files — a partial write from a crashed 392 // fire-and-forget persist shouldn't take down the whole restore. 393 logForDebugging( 394 `listRemoteAgentMetadata: skipping ${entry.name}: ${String(e)}`, 395 ) 396 } 397 } 398 return results 399} 400 401export function sessionIdExists(sessionId: string): boolean { 402 const projectDir = getProjectDir(getOriginalCwd()) 403 const sessionFile = join(projectDir, `${sessionId}.jsonl`) 404 const fs = getFsImplementation() 405 try { 406 fs.statSync(sessionFile) 407 return true 408 } catch { 409 return false 410 } 411} 412 413// exported for testing 414export function getNodeEnv(): string { 415 return process.env.NODE_ENV || 'development' 416} 417 418// exported for testing 419export function getUserType(): string { 420 return process.env.USER_TYPE || 'external' 421} 422 423function getEntrypoint(): string | undefined { 424 return process.env.CLAUDE_CODE_ENTRYPOINT 425} 426 427export function isCustomTitleEnabled(): boolean { 428 return true 429} 430 431// Memoized: called 12+ times per turn via hooks.ts createBaseHookInput 432// (PostToolUse path, 5×/turn) + various save* functions. Input is a cwd 433// string; homedir/env/regex are all session-invariant so the result is 434// stable for a given input. Worktree switches just change the key — no 435// cache clear needed. 436export const getProjectDir = memoize((projectDir: string): string => { 437 return join(getProjectsDir(), sanitizePath(projectDir)) 438}) 439 440let project: Project | null = null 441let cleanupRegistered = false 442 443function getProject(): Project { 444 if (!project) { 445 project = new Project() 446 447 // Register flush as a cleanup handler (only once) 448 if (!cleanupRegistered) { 449 registerCleanup(async () => { 450 // Flush queued writes first, then re-append session metadata 451 // (customTitle, tag) so they always appear in the last 64KB tail 452 // window. readLiteMetadata only reads the tail to extract these 453 // fields — if enough messages are appended after a /rename, the 454 // custom-title entry gets pushed outside the window and --resume 455 // shows the auto-generated firstPrompt instead. 456 await project?.flush() 457 try { 458 project?.reAppendSessionMetadata() 459 } catch { 460 // Best-effort — don't let metadata re-append crash the cleanup 461 } 462 }) 463 cleanupRegistered = true 464 } 465 } 466 return project 467} 468 469/** 470 * Reset the Project singleton's flush state for testing. 471 * This ensures tests don't interfere with each other via shared counter state. 472 */ 473export function resetProjectFlushStateForTesting(): void { 474 project?._resetFlushState() 475} 476 477/** 478 * Reset the entire Project singleton for testing. 479 * This ensures tests with different CLAUDE_CONFIG_DIR values 480 * don't share stale sessionFile paths. 481 */ 482export function resetProjectForTesting(): void { 483 project = null 484} 485 486export function setSessionFileForTesting(path: string): void { 487 getProject().sessionFile = path 488} 489 490type InternalEventWriter = ( 491 eventType: string, 492 payload: Record<string, unknown>, 493 options?: { isCompaction?: boolean; agentId?: string }, 494) => Promise<void> 495 496/** 497 * Register a CCR v2 internal event writer for transcript persistence. 498 * When set, transcript messages are written as internal worker events 499 * instead of going through v1 Session Ingress. 500 */ 501export function setInternalEventWriter(writer: InternalEventWriter): void { 502 getProject().setInternalEventWriter(writer) 503} 504 505type InternalEventReader = () => Promise< 506 { payload: Record<string, unknown>; agent_id?: string }[] | null 507> 508 509/** 510 * Register a CCR v2 internal event reader for session resume. 511 * When set, hydrateFromCCRv2InternalEvents() can fetch foreground and 512 * subagent internal events to reconstruct conversation state on reconnection. 513 */ 514export function setInternalEventReader( 515 reader: InternalEventReader, 516 subagentReader: InternalEventReader, 517): void { 518 getProject().setInternalEventReader(reader) 519 getProject().setInternalSubagentEventReader(subagentReader) 520} 521 522/** 523 * Set the remote ingress URL on the current Project for testing. 524 * This simulates what hydrateRemoteSession does in production. 525 */ 526export function setRemoteIngressUrlForTesting(url: string): void { 527 getProject().setRemoteIngressUrl(url) 528} 529 530const REMOTE_FLUSH_INTERVAL_MS = 10 531 532class Project { 533 // Minimal cache for current session only (not all sessions) 534 currentSessionTag: string | undefined 535 currentSessionTitle: string | undefined 536 currentSessionAgentName: string | undefined 537 currentSessionAgentColor: string | undefined 538 currentSessionLastPrompt: string | undefined 539 currentSessionAgentSetting: string | undefined 540 currentSessionMode: 'coordinator' | 'normal' | undefined 541 // Tri-state: undefined = never touched (don't write), null = exited worktree, 542 // object = currently in worktree. reAppendSessionMetadata writes null so 543 // --resume knows the session exited (vs. crashed while inside). 544 currentSessionWorktree: PersistedWorktreeSession | null | undefined 545 currentSessionPrNumber: number | undefined 546 currentSessionPrUrl: string | undefined 547 currentSessionPrRepository: string | undefined 548 549 sessionFile: string | null = null 550 // Entries buffered while sessionFile is null. Flushed by materializeSessionFile 551 // on the first user/assistant message — prevents metadata-only session files. 552 private pendingEntries: Entry[] = [] 553 private remoteIngressUrl: string | null = null 554 private internalEventWriter: InternalEventWriter | null = null 555 private internalEventReader: InternalEventReader | null = null 556 private internalSubagentEventReader: InternalEventReader | null = null 557 private pendingWriteCount: number = 0 558 private flushResolvers: Array<() => void> = [] 559 // Per-file write queues. Each entry carries a resolve callback so 560 // callers of enqueueWrite can optionally await their specific write. 561 private writeQueues = new Map< 562 string, 563 Array<{ entry: Entry; resolve: () => void }> 564 >() 565 private flushTimer: ReturnType<typeof setTimeout> | null = null 566 private activeDrain: Promise<void> | null = null 567 private FLUSH_INTERVAL_MS = 100 568 private readonly MAX_CHUNK_BYTES = 100 * 1024 * 1024 569 570 constructor() {} 571 572 /** @internal Reset flush/queue state for testing. */ 573 _resetFlushState(): void { 574 this.pendingWriteCount = 0 575 this.flushResolvers = [] 576 if (this.flushTimer) clearTimeout(this.flushTimer) 577 this.flushTimer = null 578 this.activeDrain = null 579 this.writeQueues = new Map() 580 } 581 582 private incrementPendingWrites(): void { 583 this.pendingWriteCount++ 584 } 585 586 private decrementPendingWrites(): void { 587 this.pendingWriteCount-- 588 if (this.pendingWriteCount === 0) { 589 // Resolve all waiting flush promises 590 for (const resolve of this.flushResolvers) { 591 resolve() 592 } 593 this.flushResolvers = [] 594 } 595 } 596 597 private async trackWrite<T>(fn: () => Promise<T>): Promise<T> { 598 this.incrementPendingWrites() 599 try { 600 return await fn() 601 } finally { 602 this.decrementPendingWrites() 603 } 604 } 605 606 private enqueueWrite(filePath: string, entry: Entry): Promise<void> { 607 return new Promise<void>(resolve => { 608 let queue = this.writeQueues.get(filePath) 609 if (!queue) { 610 queue = [] 611 this.writeQueues.set(filePath, queue) 612 } 613 queue.push({ entry, resolve }) 614 this.scheduleDrain() 615 }) 616 } 617 618 private scheduleDrain(): void { 619 if (this.flushTimer) { 620 return 621 } 622 this.flushTimer = setTimeout(async () => { 623 this.flushTimer = null 624 this.activeDrain = this.drainWriteQueue() 625 await this.activeDrain 626 this.activeDrain = null 627 // If more items arrived during drain, schedule again 628 if (this.writeQueues.size > 0) { 629 this.scheduleDrain() 630 } 631 }, this.FLUSH_INTERVAL_MS) 632 } 633 634 private async appendToFile(filePath: string, data: string): Promise<void> { 635 try { 636 await fsAppendFile(filePath, data, { mode: 0o600 }) 637 } catch { 638 // Directory may not exist — some NFS-like filesystems return 639 // unexpected error codes, so don't discriminate on code. 640 await mkdir(dirname(filePath), { recursive: true, mode: 0o700 }) 641 await fsAppendFile(filePath, data, { mode: 0o600 }) 642 } 643 } 644 645 private async drainWriteQueue(): Promise<void> { 646 for (const [filePath, queue] of this.writeQueues) { 647 if (queue.length === 0) { 648 continue 649 } 650 const batch = queue.splice(0) 651 652 let content = '' 653 const resolvers: Array<() => void> = [] 654 655 for (const { entry, resolve } of batch) { 656 const line = jsonStringify(entry) + '\n' 657 658 if (content.length + line.length >= this.MAX_CHUNK_BYTES) { 659 // Flush chunk and resolve its entries before starting a new one 660 await this.appendToFile(filePath, content) 661 for (const r of resolvers) { 662 r() 663 } 664 resolvers.length = 0 665 content = '' 666 } 667 668 content += line 669 resolvers.push(resolve) 670 } 671 672 if (content.length > 0) { 673 await this.appendToFile(filePath, content) 674 for (const r of resolvers) { 675 r() 676 } 677 } 678 } 679 680 // Clean up empty queues 681 for (const [filePath, queue] of this.writeQueues) { 682 if (queue.length === 0) { 683 this.writeQueues.delete(filePath) 684 } 685 } 686 } 687 688 resetSessionFile(): void { 689 this.sessionFile = null 690 this.pendingEntries = [] 691 } 692 693 /** 694 * Re-append cached session metadata to the end of the transcript file. 695 * This ensures metadata stays within the tail window that readLiteMetadata 696 * reads during progressive loading. 697 * 698 * Called from two contexts with different file-ordering implications: 699 * - During compaction (compact.ts, reactiveCompact.ts): writes metadata 700 * just before the boundary marker is emitted - these entries end up 701 * before the boundary and are recovered by scanPreBoundaryMetadata. 702 * - On session exit (cleanup handler): writes metadata at EOF after all 703 * boundaries - this is what enables loadTranscriptFile's pre-compact 704 * skip to find metadata without a forward scan. 705 * 706 * External-writer safety for SDK-mutable fields (custom-title, tag): 707 * before re-appending, refresh the cache from the tail scan window. If an 708 * external process (SDK renameSession/tagSession) wrote a fresher value, 709 * our stale cache absorbs it and the re-append below persists it — not 710 * the stale CLI value. If no entry is in the tail (evicted, or never 711 * written by the SDK), the cache is the only source of truth and is 712 * re-appended as-is. 713 * 714 * Re-append is unconditional (even when the value is already in the 715 * tail): during compaction, a title 40KB from EOF is inside the current 716 * tail window but will fall out once the post-compaction session grows. 717 * Skipping the re-append would defeat the purpose of this call. Fields 718 * the SDK cannot touch (last-prompt, agent-*, mode, pr-link) have no 719 * external-writer concern — their caches are authoritative. 720 */ 721 reAppendSessionMetadata(skipTitleRefresh = false): void { 722 if (!this.sessionFile) return 723 const sessionId = getSessionId() as UUID 724 if (!sessionId) return 725 726 // One sync tail read to refresh SDK-mutable fields. Same 727 // LITE_READ_BUF_SIZE window readLiteMetadata uses. Empty string on 728 // failure → extract returns null → cache is the only source of truth. 729 const tail = readFileTailSync(this.sessionFile) 730 731 // Absorb any fresher SDK-written title/tag into our cache. If the SDK 732 // wrote while we had the session open, our cache is stale — the tail 733 // value is authoritative. If the tail has nothing (evicted or never 734 // written externally), the cache stands. 735 // 736 // Filter with startsWith to match only top-level JSONL entries (col 0) 737 // and not "type":"tag" appearing inside a nested tool_use input that 738 // happens to be JSON-serialized into a message. 739 const tailLines = tail.split('\n') 740 if (!skipTitleRefresh) { 741 const titleLine = tailLines.findLast(l => 742 l.startsWith('{"type":"custom-title"'), 743 ) 744 if (titleLine) { 745 const tailTitle = extractLastJsonStringField(titleLine, 'customTitle') 746 // `!== undefined` distinguishes no-match from empty-string match. 747 // renameSession rejects empty titles, but the CLI is defensive: an 748 // external writer with customTitle:"" should clear the cache so the 749 // re-append below skips it (instead of resurrecting a stale title). 750 if (tailTitle !== undefined) { 751 this.currentSessionTitle = tailTitle || undefined 752 } 753 } 754 } 755 const tagLine = tailLines.findLast(l => l.startsWith('{"type":"tag"')) 756 if (tagLine) { 757 const tailTag = extractLastJsonStringField(tagLine, 'tag') 758 // Same: tagSession(id, null) writes `tag:""` to clear. 759 if (tailTag !== undefined) { 760 this.currentSessionTag = tailTag || undefined 761 } 762 } 763 764 // lastPrompt is re-appended so readLiteMetadata can show what the 765 // user was most recently doing. Written first so customTitle/tag/etc 766 // land closer to EOF (they're the more critical fields for tail reads). 767 if (this.currentSessionLastPrompt) { 768 appendEntryToFile(this.sessionFile, { 769 type: 'last-prompt', 770 lastPrompt: this.currentSessionLastPrompt, 771 sessionId, 772 }) 773 } 774 // Unconditional: cache was refreshed from tail above; re-append keeps 775 // the entry at EOF so compaction-pushed content doesn't evict it. 776 if (this.currentSessionTitle) { 777 appendEntryToFile(this.sessionFile, { 778 type: 'custom-title', 779 customTitle: this.currentSessionTitle, 780 sessionId, 781 }) 782 } 783 if (this.currentSessionTag) { 784 appendEntryToFile(this.sessionFile, { 785 type: 'tag', 786 tag: this.currentSessionTag, 787 sessionId, 788 }) 789 } 790 if (this.currentSessionAgentName) { 791 appendEntryToFile(this.sessionFile, { 792 type: 'agent-name', 793 agentName: this.currentSessionAgentName, 794 sessionId, 795 }) 796 } 797 if (this.currentSessionAgentColor) { 798 appendEntryToFile(this.sessionFile, { 799 type: 'agent-color', 800 agentColor: this.currentSessionAgentColor, 801 sessionId, 802 }) 803 } 804 if (this.currentSessionAgentSetting) { 805 appendEntryToFile(this.sessionFile, { 806 type: 'agent-setting', 807 agentSetting: this.currentSessionAgentSetting, 808 sessionId, 809 }) 810 } 811 if (this.currentSessionMode) { 812 appendEntryToFile(this.sessionFile, { 813 type: 'mode', 814 mode: this.currentSessionMode, 815 sessionId, 816 }) 817 } 818 if (this.currentSessionWorktree !== undefined) { 819 appendEntryToFile(this.sessionFile, { 820 type: 'worktree-state', 821 worktreeSession: this.currentSessionWorktree, 822 sessionId, 823 }) 824 } 825 if ( 826 this.currentSessionPrNumber !== undefined && 827 this.currentSessionPrUrl && 828 this.currentSessionPrRepository 829 ) { 830 appendEntryToFile(this.sessionFile, { 831 type: 'pr-link', 832 sessionId, 833 prNumber: this.currentSessionPrNumber, 834 prUrl: this.currentSessionPrUrl, 835 prRepository: this.currentSessionPrRepository, 836 timestamp: new Date().toISOString(), 837 }) 838 } 839 } 840 841 async flush(): Promise<void> { 842 // Cancel pending timer 843 if (this.flushTimer) { 844 clearTimeout(this.flushTimer) 845 this.flushTimer = null 846 } 847 // Wait for any in-flight drain to finish 848 if (this.activeDrain) { 849 await this.activeDrain 850 } 851 // Drain anything remaining in the queues 852 await this.drainWriteQueue() 853 854 // Wait for non-queue tracked operations (e.g. removeMessageByUuid) 855 if (this.pendingWriteCount === 0) { 856 return 857 } 858 return new Promise<void>(resolve => { 859 this.flushResolvers.push(resolve) 860 }) 861 } 862 863 /** 864 * Remove a message from the transcript by UUID. 865 * Used for tombstoning orphaned messages from failed streaming attempts. 866 * 867 * The target is almost always the most recently appended entry, so we 868 * read only the tail, locate the line, and splice it out with a 869 * positional write + truncate instead of rewriting the whole file. 870 */ 871 async removeMessageByUuid(targetUuid: UUID): Promise<void> { 872 return this.trackWrite(async () => { 873 if (this.sessionFile === null) return 874 try { 875 let fileSize = 0 876 const fh = await fsOpen(this.sessionFile, 'r+') 877 try { 878 const { size } = await fh.stat() 879 fileSize = size 880 if (size === 0) return 881 882 const chunkLen = Math.min(size, LITE_READ_BUF_SIZE) 883 const tailStart = size - chunkLen 884 const buf = Buffer.allocUnsafe(chunkLen) 885 const { bytesRead } = await fh.read(buf, 0, chunkLen, tailStart) 886 const tail = buf.subarray(0, bytesRead) 887 888 // Entries are serialized via JSON.stringify (no key-value 889 // whitespace). Search for the full `"uuid":"..."` pattern, not 890 // just the bare UUID, so we do not match the same value sitting 891 // in `parentUuid` of a child entry. UUIDs are pure ASCII so a 892 // byte-level search is correct. 893 const needle = `"uuid":"${targetUuid}"` 894 const matchIdx = tail.lastIndexOf(needle) 895 896 if (matchIdx >= 0) { 897 // 0x0a never appears inside a UTF-8 multi-byte sequence, so 898 // byte-scanning for line boundaries is safe even if the chunk 899 // starts mid-character. 900 const prevNl = tail.lastIndexOf(0x0a, matchIdx) 901 // If the preceding newline is outside our chunk and we did not 902 // read from the start of the file, the line is longer than the 903 // window - fall through to the slow path. 904 if (prevNl >= 0 || tailStart === 0) { 905 const lineStart = prevNl + 1 // 0 when prevNl === -1 906 const nextNl = tail.indexOf(0x0a, matchIdx + needle.length) 907 const lineEnd = nextNl >= 0 ? nextNl + 1 : bytesRead 908 909 const absLineStart = tailStart + lineStart 910 const afterLen = bytesRead - lineEnd 911 // Truncate first, then re-append the trailing lines. In the 912 // common case (target is the last entry) afterLen is 0 and 913 // this is a single ftruncate. 914 await fh.truncate(absLineStart) 915 if (afterLen > 0) { 916 await fh.write(tail, lineEnd, afterLen, absLineStart) 917 } 918 return 919 } 920 } 921 } finally { 922 await fh.close() 923 } 924 925 // Slow path: target was not in the last 64KB. Rare - requires many 926 // large entries to have landed between the write and the tombstone. 927 if (fileSize > MAX_TOMBSTONE_REWRITE_BYTES) { 928 logForDebugging( 929 `Skipping tombstone removal: session file too large (${formatFileSize(fileSize)})`, 930 { level: 'warn' }, 931 ) 932 return 933 } 934 const content = await readFile(this.sessionFile, { encoding: 'utf-8' }) 935 const lines = content.split('\n').filter((line: string) => { 936 if (!line.trim()) return true 937 try { 938 const entry = jsonParse(line) 939 return entry.uuid !== targetUuid 940 } catch { 941 return true // Keep malformed lines 942 } 943 }) 944 await writeFile(this.sessionFile, lines.join('\n'), { 945 encoding: 'utf8', 946 }) 947 } catch { 948 // Silently ignore errors - the file might not exist yet 949 } 950 }) 951 } 952 953 /** 954 * True when test env / cleanupPeriodDays=0 / --no-session-persistence / 955 * CLAUDE_CODE_SKIP_PROMPT_HISTORY should suppress all transcript writes. 956 * Shared guard for appendEntry and materializeSessionFile so both skip 957 * consistently. The env var is set by tmuxSocket.ts so Tungsten-spawned 958 * test sessions don't pollute the user's --resume list. 959 */ 960 private shouldSkipPersistence(): boolean { 961 const allowTestPersistence = isEnvTruthy( 962 process.env.TEST_ENABLE_SESSION_PERSISTENCE, 963 ) 964 return ( 965 (getNodeEnv() === 'test' && !allowTestPersistence) || 966 getSettings_DEPRECATED()?.cleanupPeriodDays === 0 || 967 isSessionPersistenceDisabled() || 968 isEnvTruthy(process.env.CLAUDE_CODE_SKIP_PROMPT_HISTORY) 969 ) 970 } 971 972 /** 973 * Create the session file, write cached startup metadata, and flush 974 * buffered entries. Called on the first user/assistant message. 975 */ 976 private async materializeSessionFile(): Promise<void> { 977 // Guard here too — reAppendSessionMetadata writes via appendEntryToFile 978 // (not appendEntry) so it would bypass the per-entry persistence check 979 // and create a metadata-only file despite --no-session-persistence. 980 if (this.shouldSkipPersistence()) return 981 this.ensureCurrentSessionFile() 982 // mode/agentSetting are cache-only pre-materialization; write them now. 983 this.reAppendSessionMetadata() 984 if (this.pendingEntries.length > 0) { 985 const buffered = this.pendingEntries 986 this.pendingEntries = [] 987 for (const entry of buffered) { 988 await this.appendEntry(entry) 989 } 990 } 991 } 992 993 async insertMessageChain( 994 messages: Transcript, 995 isSidechain: boolean = false, 996 agentId?: string, 997 startingParentUuid?: UUID | null, 998 teamInfo?: { teamName?: string; agentName?: string }, 999 ) { 1000 return this.trackWrite(async () => { 1001 let parentUuid: UUID | null = startingParentUuid ?? null 1002 1003 // First user/assistant message materializes the session file. 1004 // Hook progress/attachment messages alone stay buffered. 1005 if ( 1006 this.sessionFile === null && 1007 messages.some(m => m.type === 'user' || m.type === 'assistant') 1008 ) { 1009 await this.materializeSessionFile() 1010 } 1011 1012 // Get current git branch once for this message chain 1013 let gitBranch: string | undefined 1014 try { 1015 gitBranch = await getBranch() 1016 } catch { 1017 // Not in a git repo or git command failed 1018 gitBranch = undefined 1019 } 1020 1021 // Get slug if one exists for this session (used for plan files, etc.) 1022 const sessionId = getSessionId() 1023 const slug = getPlanSlugCache().get(sessionId) 1024 1025 for (const message of messages) { 1026 const isCompactBoundary = isCompactBoundaryMessage(message) 1027 1028 // For tool_result messages, use the assistant message UUID from the message 1029 // if available (set at creation time), otherwise fall back to sequential parent 1030 let effectiveParentUuid = parentUuid 1031 if ( 1032 message.type === 'user' && 1033 'sourceToolAssistantUUID' in message && 1034 message.sourceToolAssistantUUID 1035 ) { 1036 effectiveParentUuid = message.sourceToolAssistantUUID 1037 } 1038 1039 const transcriptMessage: TranscriptMessage = { 1040 parentUuid: isCompactBoundary ? null : effectiveParentUuid, 1041 logicalParentUuid: isCompactBoundary ? parentUuid : undefined, 1042 isSidechain, 1043 teamName: teamInfo?.teamName, 1044 agentName: teamInfo?.agentName, 1045 promptId: 1046 message.type === 'user' ? (getPromptId() ?? undefined) : undefined, 1047 agentId, 1048 ...message, 1049 // Session-stamp fields MUST come after the spread. On --fork-session 1050 // and --resume, messages arrive as SerializedMessage (carries source 1051 // sessionId/cwd/etc. because removeExtraFields only strips parentUuid 1052 // and isSidechain). If sessionId isn't re-stamped, FRESH.jsonl ends up 1053 // with messages stamped sessionId=A but content-replacement entries 1054 // stamped sessionId=FRESH (from insertContentReplacement), and 1055 // loadFullLog's sessionId-keyed contentReplacements lookup misses → 1056 // replacement records lost → FROZEN misclassification. 1057 userType: getUserType(), 1058 entrypoint: getEntrypoint(), 1059 cwd: getCwd(), 1060 sessionId, 1061 version: VERSION, 1062 gitBranch, 1063 slug, 1064 } 1065 await this.appendEntry(transcriptMessage) 1066 if (isChainParticipant(message)) { 1067 parentUuid = message.uuid 1068 } 1069 } 1070 1071 // Cache this turn's user prompt for reAppendSessionMetadata — 1072 // the --resume picker shows what the user was last doing. 1073 // Overwritten every turn by design. 1074 if (!isSidechain) { 1075 const text = getFirstMeaningfulUserMessageTextContent(messages) 1076 if (text) { 1077 const flat = text.replace(/\n/g, ' ').trim() 1078 this.currentSessionLastPrompt = 1079 flat.length > 200 ? flat.slice(0, 200).trim() + '…' : flat 1080 } 1081 } 1082 }) 1083 } 1084 1085 async insertFileHistorySnapshot( 1086 messageId: UUID, 1087 snapshot: FileHistorySnapshot, 1088 isSnapshotUpdate: boolean, 1089 ) { 1090 return this.trackWrite(async () => { 1091 const fileHistoryMessage: FileHistorySnapshotMessage = { 1092 type: 'file-history-snapshot', 1093 messageId, 1094 snapshot, 1095 isSnapshotUpdate, 1096 } 1097 await this.appendEntry(fileHistoryMessage) 1098 }) 1099 } 1100 1101 async insertQueueOperation(queueOp: QueueOperationMessage) { 1102 return this.trackWrite(async () => { 1103 await this.appendEntry(queueOp) 1104 }) 1105 } 1106 1107 async insertAttributionSnapshot(snapshot: AttributionSnapshotMessage) { 1108 return this.trackWrite(async () => { 1109 await this.appendEntry(snapshot) 1110 }) 1111 } 1112 1113 async insertContentReplacement( 1114 replacements: ContentReplacementRecord[], 1115 agentId?: AgentId, 1116 ) { 1117 return this.trackWrite(async () => { 1118 const entry: ContentReplacementEntry = { 1119 type: 'content-replacement', 1120 sessionId: getSessionId() as UUID, 1121 agentId, 1122 replacements, 1123 } 1124 await this.appendEntry(entry) 1125 }) 1126 } 1127 1128 async appendEntry(entry: Entry, sessionId: UUID = getSessionId() as UUID) { 1129 if (this.shouldSkipPersistence()) { 1130 return 1131 } 1132 1133 const currentSessionId = getSessionId() as UUID 1134 const isCurrentSession = sessionId === currentSessionId 1135 1136 let sessionFile: string 1137 if (isCurrentSession) { 1138 // Buffer until materializeSessionFile runs (first user/assistant message). 1139 if (this.sessionFile === null) { 1140 this.pendingEntries.push(entry) 1141 return 1142 } 1143 sessionFile = this.sessionFile 1144 } else { 1145 const existing = await this.getExistingSessionFile(sessionId) 1146 if (!existing) { 1147 logError( 1148 new Error( 1149 `appendEntry: session file not found for other session ${sessionId}`, 1150 ), 1151 ) 1152 return 1153 } 1154 sessionFile = existing 1155 } 1156 1157 // Only load current session messages if needed 1158 if (entry.type === 'summary') { 1159 // Summaries can always be appended 1160 void this.enqueueWrite(sessionFile, entry) 1161 } else if (entry.type === 'custom-title') { 1162 // Custom titles can always be appended 1163 void this.enqueueWrite(sessionFile, entry) 1164 } else if (entry.type === 'ai-title') { 1165 // AI titles can always be appended 1166 void this.enqueueWrite(sessionFile, entry) 1167 } else if (entry.type === 'last-prompt') { 1168 void this.enqueueWrite(sessionFile, entry) 1169 } else if (entry.type === 'task-summary') { 1170 void this.enqueueWrite(sessionFile, entry) 1171 } else if (entry.type === 'tag') { 1172 // Tags can always be appended 1173 void this.enqueueWrite(sessionFile, entry) 1174 } else if (entry.type === 'agent-name') { 1175 // Agent names can always be appended 1176 void this.enqueueWrite(sessionFile, entry) 1177 } else if (entry.type === 'agent-color') { 1178 // Agent colors can always be appended 1179 void this.enqueueWrite(sessionFile, entry) 1180 } else if (entry.type === 'agent-setting') { 1181 // Agent settings can always be appended 1182 void this.enqueueWrite(sessionFile, entry) 1183 } else if (entry.type === 'pr-link') { 1184 // PR links can always be appended 1185 void this.enqueueWrite(sessionFile, entry) 1186 } else if (entry.type === 'file-history-snapshot') { 1187 // File history snapshots can always be appended 1188 void this.enqueueWrite(sessionFile, entry) 1189 } else if (entry.type === 'attribution-snapshot') { 1190 // Attribution snapshots can always be appended 1191 void this.enqueueWrite(sessionFile, entry) 1192 } else if (entry.type === 'speculation-accept') { 1193 // Speculation accept entries can always be appended 1194 void this.enqueueWrite(sessionFile, entry) 1195 } else if (entry.type === 'mode') { 1196 // Mode entries can always be appended 1197 void this.enqueueWrite(sessionFile, entry) 1198 } else if (entry.type === 'worktree-state') { 1199 void this.enqueueWrite(sessionFile, entry) 1200 } else if (entry.type === 'content-replacement') { 1201 // Content replacement records can always be appended. Subagent records 1202 // go to the sidechain file (for AgentTool resume); main-thread 1203 // records go to the session file (for /resume). 1204 const targetFile = entry.agentId 1205 ? getAgentTranscriptPath(entry.agentId) 1206 : sessionFile 1207 void this.enqueueWrite(targetFile, entry) 1208 } else if (entry.type === 'marble-origami-commit') { 1209 // Always append. Commit order matters for restore (later commits may 1210 // reference earlier commits' summary messages), so these must be 1211 // written in the order received and read back sequentially. 1212 void this.enqueueWrite(sessionFile, entry) 1213 } else if (entry.type === 'marble-origami-snapshot') { 1214 // Always append. Last-wins on restore — later entries supersede. 1215 void this.enqueueWrite(sessionFile, entry) 1216 } else { 1217 const messageSet = await getSessionMessages(sessionId) 1218 if (entry.type === 'queue-operation') { 1219 // Queue operations are always appended to the session file 1220 void this.enqueueWrite(sessionFile, entry) 1221 } else { 1222 // At this point, entry must be a TranscriptMessage (user/assistant/attachment/system) 1223 // All other entry types have been handled above 1224 const isAgentSidechain = 1225 entry.isSidechain && entry.agentId !== undefined 1226 const targetFile = isAgentSidechain 1227 ? getAgentTranscriptPath(asAgentId(entry.agentId!)) 1228 : sessionFile 1229 1230 // For message entries, check if UUID already exists in current session. 1231 // Skip dedup for agent sidechain LOCAL writes — they go to a separate 1232 // file, and fork-inherited parent messages share UUIDs with the main 1233 // session transcript. Deduping against the main session's set would 1234 // drop them, leaving the persisted sidechain transcript incomplete 1235 // (resume-of-fork loads a 10KB file instead of the full 85KB inherited 1236 // context). 1237 // 1238 // The sidechain bypass applies ONLY to the local file write — remote 1239 // persistence (session-ingress) uses a single Last-Uuid chain per 1240 // sessionId, so re-POSTing a UUID it already has 409s and eventually 1241 // exhausts retries → gracefulShutdownSync(1). See inc-4718. 1242 const isNewUuid = !messageSet.has(entry.uuid) 1243 if (isAgentSidechain || isNewUuid) { 1244 // Enqueue write — appendToFile handles ENOENT by creating directories 1245 void this.enqueueWrite(targetFile, entry) 1246 1247 if (!isAgentSidechain) { 1248 // messageSet is main-file-authoritative. Sidechain entries go to a 1249 // separate agent file — adding their UUIDs here causes recordTranscript 1250 // to skip them on the main thread (line ~1270), so the message is never 1251 // written to the main session file. The next main-thread message then 1252 // chains its parentUuid to a UUID that only exists in the agent file, 1253 // and --resume's buildConversationChain terminates at the dangling ref. 1254 // Same constraint for remote (inc-4718 above): sidechain persisting a 1255 // UUID the main thread hasn't written yet → 409 when main writes it. 1256 messageSet.add(entry.uuid) 1257 1258 if (isTranscriptMessage(entry)) { 1259 await this.persistToRemote(sessionId, entry) 1260 } 1261 } 1262 } 1263 } 1264 } 1265 } 1266 1267 /** 1268 * Loads the sessionFile variable. 1269 * Do not need to create session files until they are written to. 1270 */ 1271 private ensureCurrentSessionFile(): string { 1272 if (this.sessionFile === null) { 1273 this.sessionFile = getTranscriptPath() 1274 } 1275 1276 return this.sessionFile 1277 } 1278 1279 /** 1280 * Returns the session file path if it exists, null otherwise. 1281 * Used for writing to sessions other than the current one. 1282 * Caches positive results so we only stat once per session. 1283 */ 1284 private existingSessionFiles = new Map<string, string>() 1285 private async getExistingSessionFile( 1286 sessionId: UUID, 1287 ): Promise<string | null> { 1288 const cached = this.existingSessionFiles.get(sessionId) 1289 if (cached) return cached 1290 1291 const targetFile = getTranscriptPathForSession(sessionId) 1292 try { 1293 await stat(targetFile) 1294 this.existingSessionFiles.set(sessionId, targetFile) 1295 return targetFile 1296 } catch (e) { 1297 if (isFsInaccessible(e)) return null 1298 throw e 1299 } 1300 } 1301 1302 private async persistToRemote(sessionId: UUID, entry: TranscriptMessage) { 1303 if (isShuttingDown()) { 1304 return 1305 } 1306 1307 // CCR v2 path: write as internal worker event 1308 if (this.internalEventWriter) { 1309 try { 1310 await this.internalEventWriter( 1311 'transcript', 1312 entry as unknown as Record<string, unknown>, 1313 { 1314 ...(isCompactBoundaryMessage(entry) && { isCompaction: true }), 1315 ...(entry.agentId && { agentId: entry.agentId }), 1316 }, 1317 ) 1318 } catch { 1319 logEvent('tengu_session_persistence_failed', {}) 1320 logForDebugging('Failed to write transcript as internal event') 1321 } 1322 return 1323 } 1324 1325 // v1 Session Ingress path 1326 if ( 1327 !isEnvTruthy(process.env.ENABLE_SESSION_PERSISTENCE) || 1328 !this.remoteIngressUrl 1329 ) { 1330 return 1331 } 1332 1333 const success = await sessionIngress.appendSessionLog( 1334 sessionId, 1335 entry, 1336 this.remoteIngressUrl, 1337 ) 1338 1339 if (!success) { 1340 logEvent('tengu_session_persistence_failed', {}) 1341 gracefulShutdownSync(1, 'other') 1342 } 1343 } 1344 1345 setRemoteIngressUrl(url: string): void { 1346 this.remoteIngressUrl = url 1347 logForDebugging(`Remote persistence enabled with URL: ${url}`) 1348 if (url) { 1349 // If using CCR, don't delay messages by any more than 10ms. 1350 this.FLUSH_INTERVAL_MS = REMOTE_FLUSH_INTERVAL_MS 1351 } 1352 } 1353 1354 setInternalEventWriter(writer: InternalEventWriter): void { 1355 this.internalEventWriter = writer 1356 logForDebugging( 1357 'CCR v2 internal event writer registered for transcript persistence', 1358 ) 1359 // Use fast flush interval for CCR v2 1360 this.FLUSH_INTERVAL_MS = REMOTE_FLUSH_INTERVAL_MS 1361 } 1362 1363 setInternalEventReader(reader: InternalEventReader): void { 1364 this.internalEventReader = reader 1365 logForDebugging( 1366 'CCR v2 internal event reader registered for session resume', 1367 ) 1368 } 1369 1370 setInternalSubagentEventReader(reader: InternalEventReader): void { 1371 this.internalSubagentEventReader = reader 1372 logForDebugging( 1373 'CCR v2 subagent event reader registered for session resume', 1374 ) 1375 } 1376 1377 getInternalEventReader(): InternalEventReader | null { 1378 return this.internalEventReader 1379 } 1380 1381 getInternalSubagentEventReader(): InternalEventReader | null { 1382 return this.internalSubagentEventReader 1383 } 1384} 1385 1386export type TeamInfo = { 1387 teamName?: string 1388 agentName?: string 1389} 1390 1391// Filter out already-recorded messages before passing to insertMessageChain. 1392// Without this, after compaction messagesToKeep (same UUIDs as pre-compact 1393// messages) are dedup-skipped by appendEntry but still advance the parentUuid 1394// cursor in insertMessageChain, causing new messages to chain from pre-compact 1395// UUIDs instead of the post-compact summary — orphaning the compact boundary. 1396// 1397// `startingParentUuidHint`: used by useLogMessages to pass the parent from 1398// the previous incremental slice, avoiding an O(n) scan to rediscover it. 1399// 1400// Skip-tracking: already-recorded messages are tracked as the parent ONLY if 1401// they form a PREFIX (appear before any new message). This handles both cases: 1402// - Growing-array callers (QueryEngine, queryHelpers, LocalMainSessionTask, 1403// trajectory): recorded messages are always a prefix → tracked → correct 1404// parent chain for new messages. 1405// - Compaction (useLogMessages): new CB/summary appear FIRST, then recorded 1406// messagesToKeep → not a prefix → not tracked → CB gets parentUuid=null 1407// (correct: truncates --continue chain at compact boundary). 1408export async function recordTranscript( 1409 messages: Message[], 1410 teamInfo?: TeamInfo, 1411 startingParentUuidHint?: UUID, 1412 allMessages?: readonly Message[], 1413): Promise<UUID | null> { 1414 const cleanedMessages = cleanMessagesForLogging(messages, allMessages) 1415 const sessionId = getSessionId() as UUID 1416 const messageSet = await getSessionMessages(sessionId) 1417 const newMessages: typeof cleanedMessages = [] 1418 let startingParentUuid: UUID | undefined = startingParentUuidHint 1419 let seenNewMessage = false 1420 for (const m of cleanedMessages) { 1421 if (messageSet.has(m.uuid as UUID)) { 1422 // Only track skipped messages that form a prefix. After compaction, 1423 // messagesToKeep appear AFTER new CB/summary, so this skips them. 1424 if (!seenNewMessage && isChainParticipant(m)) { 1425 startingParentUuid = m.uuid as UUID 1426 } 1427 } else { 1428 newMessages.push(m) 1429 seenNewMessage = true 1430 } 1431 } 1432 if (newMessages.length > 0) { 1433 await getProject().insertMessageChain( 1434 newMessages, 1435 false, 1436 undefined, 1437 startingParentUuid, 1438 teamInfo, 1439 ) 1440 } 1441 // Return the last ACTUALLY recorded chain-participant's UUID, OR the 1442 // prefix-tracked UUID if no new chain participants were recorded. This lets 1443 // callers (useLogMessages) maintain the correct parent chain even when the 1444 // slice is all-recorded (rewind, /resume scenarios where every message is 1445 // already in messageSet). Progress is skipped — it's written to the JSONL 1446 // but nothing chains TO it (see isChainParticipant). 1447 const lastRecorded = newMessages.findLast(isChainParticipant) 1448 return (lastRecorded?.uuid as UUID | undefined) ?? startingParentUuid ?? null 1449} 1450 1451export async function recordSidechainTranscript( 1452 messages: Message[], 1453 agentId?: string, 1454 startingParentUuid?: UUID | null, 1455) { 1456 await getProject().insertMessageChain( 1457 cleanMessagesForLogging(messages), 1458 true, 1459 agentId, 1460 startingParentUuid, 1461 ) 1462} 1463 1464export async function recordQueueOperation(queueOp: QueueOperationMessage) { 1465 await getProject().insertQueueOperation(queueOp) 1466} 1467 1468/** 1469 * Remove a message from the transcript by UUID. 1470 * Used when a tombstone is received for an orphaned message. 1471 */ 1472export async function removeTranscriptMessage(targetUuid: UUID): Promise<void> { 1473 await getProject().removeMessageByUuid(targetUuid) 1474} 1475 1476export async function recordFileHistorySnapshot( 1477 messageId: UUID, 1478 snapshot: FileHistorySnapshot, 1479 isSnapshotUpdate: boolean, 1480) { 1481 await getProject().insertFileHistorySnapshot( 1482 messageId, 1483 snapshot, 1484 isSnapshotUpdate, 1485 ) 1486} 1487 1488export async function recordAttributionSnapshot( 1489 snapshot: AttributionSnapshotMessage, 1490) { 1491 await getProject().insertAttributionSnapshot(snapshot) 1492} 1493 1494export async function recordContentReplacement( 1495 replacements: ContentReplacementRecord[], 1496 agentId?: AgentId, 1497) { 1498 await getProject().insertContentReplacement(replacements, agentId) 1499} 1500 1501/** 1502 * Reset the session file pointer after switchSession/regenerateSessionId. 1503 * The new file is created lazily on the first user/assistant message. 1504 */ 1505export async function resetSessionFilePointer() { 1506 getProject().resetSessionFile() 1507} 1508 1509/** 1510 * Adopt the existing session file after --continue/--resume (non-fork). 1511 * Call after switchSession + resetSessionFilePointer + restoreSessionMetadata: 1512 * getTranscriptPath() now derives the resumed file's path from the switched 1513 * sessionId, and the cache holds the final metadata (--name title, resumed 1514 * mode/tag/agent). 1515 * 1516 * Setting sessionFile here — instead of waiting for materializeSessionFile 1517 * on the first user message — lets the exit cleanup handler's 1518 * reAppendSessionMetadata run (it bails when sessionFile is null). Without 1519 * this, `-c -n foo` + quit-before-message drops the title on the floor: 1520 * the in-memory cache is correct but never written. The resumed file 1521 * already exists on disk (we loaded from it), so this can't create an 1522 * orphan the way a fresh --name session would. 1523 * 1524 * skipTitleRefresh: restoreSessionMetadata populated the cache from the 1525 * same disk read microseconds ago, so refreshing from the tail here is a 1526 * no-op — unless --name was used, in which case it would clobber the fresh 1527 * CLI title with the stale disk value. After this write, disk == cache and 1528 * later calls (compaction, exit cleanup) absorb SDK writes normally. 1529 */ 1530export function adoptResumedSessionFile(): void { 1531 const project = getProject() 1532 project.sessionFile = getTranscriptPath() 1533 project.reAppendSessionMetadata(true) 1534} 1535 1536/** 1537 * Append a context-collapse commit entry to the transcript. One entry per 1538 * commit, in commit order. On resume these are collected into an ordered 1539 * array and handed to restoreFromEntries() which rebuilds the commit log. 1540 */ 1541export async function recordContextCollapseCommit(commit: { 1542 collapseId: string 1543 summaryUuid: string 1544 summaryContent: string 1545 summary: string 1546 firstArchivedUuid: string 1547 lastArchivedUuid: string 1548}): Promise<void> { 1549 const sessionId = getSessionId() as UUID 1550 if (!sessionId) return 1551 await getProject().appendEntry({ 1552 type: 'marble-origami-commit', 1553 sessionId, 1554 ...commit, 1555 }) 1556} 1557 1558/** 1559 * Snapshot the staged queue + spawn state. Written after each ctx-agent 1560 * spawn resolves (when staged contents may have changed). Last-wins on 1561 * restore — the loader keeps only the most recent snapshot entry. 1562 */ 1563export async function recordContextCollapseSnapshot(snapshot: { 1564 staged: Array<{ 1565 startUuid: string 1566 endUuid: string 1567 summary: string 1568 risk: number 1569 stagedAt: number 1570 }> 1571 armed: boolean 1572 lastSpawnTokens: number 1573}): Promise<void> { 1574 const sessionId = getSessionId() as UUID 1575 if (!sessionId) return 1576 await getProject().appendEntry({ 1577 type: 'marble-origami-snapshot', 1578 sessionId, 1579 ...snapshot, 1580 }) 1581} 1582 1583export async function flushSessionStorage(): Promise<void> { 1584 await getProject().flush() 1585} 1586 1587export async function hydrateRemoteSession( 1588 sessionId: string, 1589 ingressUrl: string, 1590): Promise<boolean> { 1591 switchSession(asSessionId(sessionId)) 1592 1593 const project = getProject() 1594 1595 try { 1596 const remoteLogs = 1597 (await sessionIngress.getSessionLogs(sessionId, ingressUrl)) || [] 1598 1599 // Ensure the project directory and session file exist 1600 const projectDir = getProjectDir(getOriginalCwd()) 1601 await mkdir(projectDir, { recursive: true, mode: 0o700 }) 1602 1603 const sessionFile = getTranscriptPathForSession(sessionId) 1604 1605 // Replace local logs with remote logs. writeFile truncates, so no 1606 // unlink is needed; an empty remoteLogs array produces an empty file. 1607 const content = remoteLogs.map(e => jsonStringify(e) + '\n').join('') 1608 await writeFile(sessionFile, content, { encoding: 'utf8', mode: 0o600 }) 1609 1610 logForDebugging(`Hydrated ${remoteLogs.length} entries from remote`) 1611 return remoteLogs.length > 0 1612 } catch (error) { 1613 logForDebugging(`Error hydrating session from remote: ${error}`) 1614 logForDiagnosticsNoPII('error', 'hydrate_remote_session_fail') 1615 return false 1616 } finally { 1617 // Set remote ingress URL after hydrating the remote session 1618 // to ensure we've always synced with the remote session 1619 // prior to enabling persistence 1620 project.setRemoteIngressUrl(ingressUrl) 1621 } 1622} 1623 1624/** 1625 * Hydrate session state from CCR v2 internal events. 1626 * Fetches foreground and subagent events via the registered readers, 1627 * extracts transcript entries from payloads, and writes them to the 1628 * local transcript files (main + per-agent). 1629 * The server handles compaction filtering — it returns events starting 1630 * from the latest compaction boundary. 1631 */ 1632export async function hydrateFromCCRv2InternalEvents( 1633 sessionId: string, 1634): Promise<boolean> { 1635 const startMs = Date.now() 1636 switchSession(asSessionId(sessionId)) 1637 1638 const project = getProject() 1639 const reader = project.getInternalEventReader() 1640 if (!reader) { 1641 logForDebugging('No internal event reader registered for CCR v2 resume') 1642 return false 1643 } 1644 1645 try { 1646 // Fetch foreground events 1647 const events = await reader() 1648 if (!events) { 1649 logForDebugging('Failed to read internal events for resume') 1650 logForDiagnosticsNoPII('error', 'hydrate_ccr_v2_read_fail') 1651 return false 1652 } 1653 1654 const projectDir = getProjectDir(getOriginalCwd()) 1655 await mkdir(projectDir, { recursive: true, mode: 0o700 }) 1656 1657 // Write foreground transcript 1658 const sessionFile = getTranscriptPathForSession(sessionId) 1659 const fgContent = events.map(e => jsonStringify(e.payload) + '\n').join('') 1660 await writeFile(sessionFile, fgContent, { encoding: 'utf8', mode: 0o600 }) 1661 1662 logForDebugging( 1663 `Hydrated ${events.length} foreground entries from CCR v2 internal events`, 1664 ) 1665 1666 // Fetch and write subagent events 1667 let subagentEventCount = 0 1668 const subagentReader = project.getInternalSubagentEventReader() 1669 if (subagentReader) { 1670 const subagentEvents = await subagentReader() 1671 if (subagentEvents && subagentEvents.length > 0) { 1672 subagentEventCount = subagentEvents.length 1673 // Group by agent_id 1674 const byAgent = new Map<string, Record<string, unknown>[]>() 1675 for (const e of subagentEvents) { 1676 const agentId = e.agent_id || '' 1677 if (!agentId) continue 1678 let list = byAgent.get(agentId) 1679 if (!list) { 1680 list = [] 1681 byAgent.set(agentId, list) 1682 } 1683 list.push(e.payload) 1684 } 1685 1686 // Write each agent's transcript to its own file 1687 for (const [agentId, entries] of byAgent) { 1688 const agentFile = getAgentTranscriptPath(asAgentId(agentId)) 1689 await mkdir(dirname(agentFile), { recursive: true, mode: 0o700 }) 1690 const agentContent = entries 1691 .map(p => jsonStringify(p) + '\n') 1692 .join('') 1693 await writeFile(agentFile, agentContent, { 1694 encoding: 'utf8', 1695 mode: 0o600, 1696 }) 1697 } 1698 1699 logForDebugging( 1700 `Hydrated ${subagentEvents.length} subagent entries across ${byAgent.size} agents`, 1701 ) 1702 } 1703 } 1704 1705 logForDiagnosticsNoPII('info', 'hydrate_ccr_v2_completed', { 1706 duration_ms: Date.now() - startMs, 1707 event_count: events.length, 1708 subagent_event_count: subagentEventCount, 1709 }) 1710 return events.length > 0 1711 } catch (error) { 1712 // Re-throw epoch mismatch so the worker doesn't race against gracefulShutdown 1713 if ( 1714 error instanceof Error && 1715 error.message === 'CCRClient: Epoch mismatch (409)' 1716 ) { 1717 throw error 1718 } 1719 logForDebugging(`Error hydrating session from CCR v2: ${error}`) 1720 logForDiagnosticsNoPII('error', 'hydrate_ccr_v2_fail') 1721 return false 1722 } 1723} 1724 1725function extractFirstPrompt(transcript: TranscriptMessage[]): string { 1726 const textContent = getFirstMeaningfulUserMessageTextContent(transcript) 1727 if (textContent) { 1728 let result = textContent.replace(/\n/g, ' ').trim() 1729 1730 // Store a reasonably long version for display-time truncation 1731 // The actual truncation will be applied at display time based on terminal width 1732 if (result.length > 200) { 1733 result = result.slice(0, 200).trim() + '…' 1734 } 1735 1736 return result 1737 } 1738 1739 return 'No prompt' 1740} 1741 1742/** 1743 * Gets the last user message that was processed (i.e., before any non-user message appears). 1744 * Used to determine if a session has valid user interaction. 1745 */ 1746export function getFirstMeaningfulUserMessageTextContent<T extends Message>( 1747 transcript: T[], 1748): string | undefined { 1749 for (const msg of transcript) { 1750 if (msg.type !== 'user' || msg.isMeta) continue 1751 // Skip compact summary messages - they should not be treated as the first prompt 1752 if ('isCompactSummary' in msg && msg.isCompactSummary) continue 1753 1754 const content = msg.message?.content 1755 if (!content) continue 1756 1757 // Collect all text values. For array content (common in VS Code where 1758 // IDE metadata tags come before the user's actual prompt), iterate all 1759 // text blocks so we don't miss the real prompt hidden behind 1760 // <ide_selection>/<ide_opened_file> blocks. 1761 const texts: string[] = [] 1762 if (typeof content === 'string') { 1763 texts.push(content) 1764 } else if (Array.isArray(content)) { 1765 for (const block of content) { 1766 if (block.type === 'text' && block.text) { 1767 texts.push(block.text) 1768 } 1769 } 1770 } 1771 1772 for (const textContent of texts) { 1773 if (!textContent) continue 1774 1775 const commandNameTag = extractTag(textContent, COMMAND_NAME_TAG) 1776 if (commandNameTag) { 1777 const commandName = commandNameTag.replace(/^\//, '') 1778 1779 // If it's a built-in command, then it's unlikely to provide 1780 // meaningful context (e.g. `/model sonnet`) 1781 if (builtInCommandNames().has(commandName)) { 1782 continue 1783 } else { 1784 // Otherwise, for custom commands, then keep it only if it has 1785 // arguments (e.g. `/review reticulate splines`) 1786 const commandArgs = extractTag(textContent, 'command-args')?.trim() 1787 if (!commandArgs) { 1788 continue 1789 } 1790 // Return clean formatted command instead of raw XML 1791 return `${commandNameTag} ${commandArgs}` 1792 } 1793 } 1794 1795 // Format bash input with ! prefix (as user typed it). Checked before 1796 // the generic XML skip so bash-mode sessions get a meaningful title. 1797 const bashInput = extractTag(textContent, 'bash-input') 1798 if (bashInput) { 1799 return `! ${bashInput}` 1800 } 1801 1802 // Skip non-meaningful messages (local command output, hook output, 1803 // autonomous tick prompts, task notifications, pure IDE metadata tags) 1804 if (SKIP_FIRST_PROMPT_PATTERN.test(textContent)) { 1805 continue 1806 } 1807 1808 return textContent 1809 } 1810 } 1811 return undefined 1812} 1813 1814export function removeExtraFields( 1815 transcript: TranscriptMessage[], 1816): SerializedMessage[] { 1817 return transcript.map(m => { 1818 const { isSidechain, parentUuid, ...serializedMessage } = m 1819 return serializedMessage 1820 }) 1821} 1822 1823/** 1824 * Splice the preserved segment back into the chain after compaction. 1825 * 1826 * Preserved messages exist in the JSONL with their ORIGINAL pre-compact 1827 * parentUuids (recordTranscript dedup-skipped them — can't rewrite). 1828 * The internal chain (keep[i+1]→keep[i]) is intact; only endpoints need 1829 * patching: head→anchor, and anchor's other children→tail. Anchor is the 1830 * last summary for suffix-preserving, boundary itself for prefix-preserving. 1831 * 1832 * Only the LAST seg-boundary is relinked — earlier segs were summarized 1833 * into it. Everything physically before the absolute-last boundary (except 1834 * preservedUuids) is deleted, which handles all multi-boundary shapes 1835 * without special-casing. 1836 * 1837 * Mutates the Map in place. 1838 */ 1839function applyPreservedSegmentRelinks( 1840 messages: Map<UUID, TranscriptMessage>, 1841): void { 1842 type Seg = NonNullable< 1843 SystemCompactBoundaryMessage['compactMetadata']['preservedSegment'] 1844 > 1845 1846 // Find the absolute-last boundary and the last seg-boundary (can differ: 1847 // manual /compact after reactive compact → seg is stale). 1848 let lastSeg: Seg | undefined 1849 let lastSegBoundaryIdx = -1 1850 let absoluteLastBoundaryIdx = -1 1851 const entryIndex = new Map<UUID, number>() 1852 let i = 0 1853 for (const entry of messages.values()) { 1854 entryIndex.set(entry.uuid, i) 1855 if (isCompactBoundaryMessage(entry)) { 1856 absoluteLastBoundaryIdx = i 1857 const seg = entry.compactMetadata?.preservedSegment 1858 if (seg) { 1859 lastSeg = seg 1860 lastSegBoundaryIdx = i 1861 } 1862 } 1863 i++ 1864 } 1865 // No seg anywhere → no-op. findUnresolvedToolUse etc. read the full map. 1866 if (!lastSeg) return 1867 1868 // Seg stale (no-seg boundary came after): skip relink, still prune at 1869 // absolute — otherwise the stale preserved chain becomes a phantom leaf. 1870 const segIsLive = lastSegBoundaryIdx === absoluteLastBoundaryIdx 1871 1872 // Validate tail→head BEFORE mutating so malformed metadata is a true 1873 // no-op (walk stops at headUuid, doesn't need the relink to run first). 1874 const preservedUuids = new Set<UUID>() 1875 if (segIsLive) { 1876 const walkSeen = new Set<UUID>() 1877 let cur = messages.get(lastSeg.tailUuid) 1878 let reachedHead = false 1879 while (cur && !walkSeen.has(cur.uuid)) { 1880 walkSeen.add(cur.uuid) 1881 preservedUuids.add(cur.uuid) 1882 if (cur.uuid === lastSeg.headUuid) { 1883 reachedHead = true 1884 break 1885 } 1886 cur = cur.parentUuid ? messages.get(cur.parentUuid) : undefined 1887 } 1888 if (!reachedHead) { 1889 // tail→head walk broke — a UUID in the preserved segment isn't in the 1890 // transcript. Returning here skips the prune below, so resume loads 1891 // the full pre-compact history. Known cause: mid-turn-yielded 1892 // attachment pushed to mutableMessages but never recordTranscript'd 1893 // (SDK subprocess restarted before next turn's qe:420 flush). 1894 logEvent('tengu_relink_walk_broken', { 1895 tailInTranscript: messages.has(lastSeg.tailUuid), 1896 headInTranscript: messages.has(lastSeg.headUuid), 1897 anchorInTranscript: messages.has(lastSeg.anchorUuid), 1898 walkSteps: walkSeen.size, 1899 transcriptSize: messages.size, 1900 }) 1901 return 1902 } 1903 } 1904 1905 if (segIsLive) { 1906 const head = messages.get(lastSeg.headUuid) 1907 if (head) { 1908 messages.set(lastSeg.headUuid, { 1909 ...head, 1910 parentUuid: lastSeg.anchorUuid, 1911 }) 1912 } 1913 // Tail-splice: anchor's other children → tail. No-op if already pointing 1914 // at tail (the useLogMessages race case). 1915 for (const [uuid, msg] of messages) { 1916 if (msg.parentUuid === lastSeg.anchorUuid && uuid !== lastSeg.headUuid) { 1917 messages.set(uuid, { ...msg, parentUuid: lastSeg.tailUuid }) 1918 } 1919 } 1920 // Zero stale usage: on-disk input_tokens reflect pre-compact context 1921 // (~190K) — stripStaleUsage only patched in-memory copies that were 1922 // dedup-skipped. Without this, resume → immediate autocompact spiral. 1923 for (const uuid of preservedUuids) { 1924 const msg = messages.get(uuid) 1925 if (msg?.type !== 'assistant') continue 1926 messages.set(uuid, { 1927 ...msg, 1928 message: { 1929 ...msg.message, 1930 usage: { 1931 ...msg.message.usage, 1932 input_tokens: 0, 1933 output_tokens: 0, 1934 cache_creation_input_tokens: 0, 1935 cache_read_input_tokens: 0, 1936 }, 1937 }, 1938 }) 1939 } 1940 } 1941 1942 // Prune everything physically before the absolute-last boundary that 1943 // isn't preserved. preservedUuids empty when !segIsLive → full prune. 1944 const toDelete: UUID[] = [] 1945 for (const [uuid] of messages) { 1946 const idx = entryIndex.get(uuid) 1947 if ( 1948 idx !== undefined && 1949 idx < absoluteLastBoundaryIdx && 1950 !preservedUuids.has(uuid) 1951 ) { 1952 toDelete.push(uuid) 1953 } 1954 } 1955 for (const uuid of toDelete) messages.delete(uuid) 1956} 1957 1958/** 1959 * Delete messages that Snip executions removed from the in-memory array, 1960 * and relink parentUuid across the gaps. 1961 * 1962 * Unlike compact_boundary which truncates a prefix, snip removes 1963 * middle ranges. The JSONL is append-only, so removed messages stay on disk 1964 * and the surviving messages' parentUuid chains walk through them. Without 1965 * this filter, buildConversationChain reconstructs the full unsnipped history 1966 * and resume immediately PTLs (adamr-20260320-165831: 397K displayed → 1.65M 1967 * actual). 1968 * 1969 * Deleting alone is not enough: the surviving message AFTER a removed range 1970 * has parentUuid pointing INTO the gap. buildConversationChain would hit 1971 * messages.get(undefined) and stop, orphaning everything before the gap. So 1972 * after delete we relink: for each survivor with a dangling parentUuid, walk 1973 * backward through the removed region's own parent links to the first 1974 * non-removed ancestor. 1975 * 1976 * The boundary records removedUuids at execution time so we can replay the 1977 * exact removal on load. Older boundaries without removedUuids are skipped — 1978 * resume loads their pre-snip history (the pre-fix behavior). 1979 * 1980 * Mutates the Map in place. 1981 */ 1982function applySnipRemovals(messages: Map<UUID, TranscriptMessage>): void { 1983 // Structural check — snipMetadata only exists on the boundary subtype. 1984 // Avoids the subtype literal which is in excluded-strings.txt 1985 // (HISTORY_SNIP is ant-only; the literal must not leak into external builds). 1986 type WithSnipMeta = { snipMetadata?: { removedUuids?: UUID[] } } 1987 const toDelete = new Set<UUID>() 1988 for (const entry of messages.values()) { 1989 const removedUuids = (entry as WithSnipMeta).snipMetadata?.removedUuids 1990 if (!removedUuids) continue 1991 for (const uuid of removedUuids) toDelete.add(uuid) 1992 } 1993 if (toDelete.size === 0) return 1994 1995 // Capture each to-delete entry's own parentUuid BEFORE deleting so we can 1996 // walk backward through contiguous removed ranges. Entries not in the Map 1997 // (already absent, e.g. from a prior compact_boundary prune) contribute no 1998 // link; the relink walk will stop at the gap and pick up null (chain-root 1999 // behavior — same as if compact truncated there, which it did). 2000 const deletedParent = new Map<UUID, UUID | null>() 2001 let removedCount = 0 2002 for (const uuid of toDelete) { 2003 const entry = messages.get(uuid) 2004 if (!entry) continue 2005 deletedParent.set(uuid, entry.parentUuid) 2006 messages.delete(uuid) 2007 removedCount++ 2008 } 2009 2010 // Relink survivors with dangling parentUuid. Walk backward through 2011 // deletedParent until we hit a UUID not in toDelete (or null). Path 2012 // compression: after resolving, seed the map with the resolved link so 2013 // subsequent survivors sharing the same chain segment don't re-walk. 2014 const resolve = (start: UUID): UUID | null => { 2015 const path: UUID[] = [] 2016 let cur: UUID | null | undefined = start 2017 while (cur && toDelete.has(cur)) { 2018 path.push(cur) 2019 cur = deletedParent.get(cur) 2020 if (cur === undefined) { 2021 cur = null 2022 break 2023 } 2024 } 2025 for (const p of path) deletedParent.set(p, cur) 2026 return cur 2027 } 2028 let relinkedCount = 0 2029 for (const [uuid, msg] of messages) { 2030 if (!msg.parentUuid || !toDelete.has(msg.parentUuid)) continue 2031 messages.set(uuid, { ...msg, parentUuid: resolve(msg.parentUuid) }) 2032 relinkedCount++ 2033 } 2034 2035 logEvent('tengu_snip_resume_filtered', { 2036 removed_count: removedCount, 2037 relinked_count: relinkedCount, 2038 }) 2039} 2040 2041/** 2042 * O(n) single-pass: find the message with the latest timestamp matching a predicate. 2043 * Replaces the `[...values].filter(pred).sort((a,b) => Date(b)-Date(a))[0]` pattern 2044 * which is O(n log n) + 2n Date allocations. 2045 */ 2046function findLatestMessage<T extends { timestamp: string }>( 2047 messages: Iterable<T>, 2048 predicate: (m: T) => boolean, 2049): T | undefined { 2050 let latest: T | undefined 2051 let maxTime = -Infinity 2052 for (const m of messages) { 2053 if (!predicate(m)) continue 2054 const t = Date.parse(m.timestamp) 2055 if (t > maxTime) { 2056 maxTime = t 2057 latest = m 2058 } 2059 } 2060 return latest 2061} 2062 2063/** 2064 * Builds a conversation chain from a leaf message to root 2065 * @param messages Map of all messages 2066 * @param leafMessage The leaf message to start from 2067 * @returns Array of messages from root to leaf 2068 */ 2069export function buildConversationChain( 2070 messages: Map<UUID, TranscriptMessage>, 2071 leafMessage: TranscriptMessage, 2072): TranscriptMessage[] { 2073 const transcript: TranscriptMessage[] = [] 2074 const seen = new Set<UUID>() 2075 let currentMsg: TranscriptMessage | undefined = leafMessage 2076 while (currentMsg) { 2077 if (seen.has(currentMsg.uuid)) { 2078 logError( 2079 new Error( 2080 `Cycle detected in parentUuid chain at message ${currentMsg.uuid}. Returning partial transcript.`, 2081 ), 2082 ) 2083 logEvent('tengu_chain_parent_cycle', {}) 2084 break 2085 } 2086 seen.add(currentMsg.uuid) 2087 transcript.push(currentMsg) 2088 currentMsg = currentMsg.parentUuid 2089 ? messages.get(currentMsg.parentUuid) 2090 : undefined 2091 } 2092 transcript.reverse() 2093 return recoverOrphanedParallelToolResults(messages, transcript, seen) 2094} 2095 2096/** 2097 * Post-pass for buildConversationChain: recover sibling assistant blocks and 2098 * tool_results that the single-parent walk orphaned. 2099 * 2100 * Streaming (claude.ts:~2024) emits one AssistantMessage per content_block_stop 2101 * — N parallel tool_uses → N messages, distinct uuid, same message.id. Each 2102 * tool_result's sourceToolAssistantUUID points to its own one-block assistant, 2103 * so insertMessageChain's override (line ~894) writes each TR's parentUuid to a 2104 * DIFFERENT assistant. The topology is a DAG; the walk above is a linked-list 2105 * traversal and keeps only one branch. 2106 * 2107 * Two loss modes observed in production (both fixed here): 2108 * 1. Sibling assistant orphaned: walk goes prev→asstA→TR_A→next, drops asstB 2109 * (same message.id, chained off asstA) and TR_B. 2110 * 2. Progress-fork (legacy, pre-#23537): each tool_use asst had a progress 2111 * child (continued the write chain) AND a TR child. Walk followed 2112 * progress; TRs were dropped. No longer written (progress removed from 2113 * transcript persistence), but old transcripts still have this shape. 2114 * 2115 * Read-side fix: the write topology is already on disk for old transcripts; 2116 * this recovery pass handles them. 2117 */ 2118function recoverOrphanedParallelToolResults( 2119 messages: Map<UUID, TranscriptMessage>, 2120 chain: TranscriptMessage[], 2121 seen: Set<UUID>, 2122): TranscriptMessage[] { 2123 type ChainAssistant = Extract<TranscriptMessage, { type: 'assistant' }> 2124 const chainAssistants = chain.filter( 2125 (m): m is ChainAssistant => m.type === 'assistant', 2126 ) 2127 if (chainAssistants.length === 0) return chain 2128 2129 // Anchor = last on-chain member of each sibling group. chainAssistants is 2130 // already in chain order, so later iterations overwrite → last wins. 2131 const anchorByMsgId = new Map<string, ChainAssistant>() 2132 for (const a of chainAssistants) { 2133 if (a.message.id) anchorByMsgId.set(a.message.id, a) 2134 } 2135 2136 // O(n) precompute: sibling groups and TR index. 2137 // TRs indexed by parentUuid — insertMessageChain:~894 already wrote that 2138 // as the srcUUID, and --fork-session strips srcUUID but keeps parentUuid. 2139 const siblingsByMsgId = new Map<string, TranscriptMessage[]>() 2140 const toolResultsByAsst = new Map<UUID, TranscriptMessage[]>() 2141 for (const m of messages.values()) { 2142 if (m.type === 'assistant' && m.message.id) { 2143 const group = siblingsByMsgId.get(m.message.id) 2144 if (group) group.push(m) 2145 else siblingsByMsgId.set(m.message.id, [m]) 2146 } else if ( 2147 m.type === 'user' && 2148 m.parentUuid && 2149 Array.isArray(m.message.content) && 2150 m.message.content.some(b => b.type === 'tool_result') 2151 ) { 2152 const group = toolResultsByAsst.get(m.parentUuid) 2153 if (group) group.push(m) 2154 else toolResultsByAsst.set(m.parentUuid, [m]) 2155 } 2156 } 2157 2158 // For each message.id group touching the chain: collect off-chain siblings, 2159 // then off-chain TRs for ALL members. Splice right after the last on-chain 2160 // member so the group stays contiguous for normalizeMessagesForAPI's merge 2161 // and every TR lands after its tool_use. 2162 const processedGroups = new Set<string>() 2163 const inserts = new Map<UUID, TranscriptMessage[]>() 2164 let recoveredCount = 0 2165 for (const asst of chainAssistants) { 2166 const msgId = asst.message.id 2167 if (!msgId || processedGroups.has(msgId)) continue 2168 processedGroups.add(msgId) 2169 2170 const group = siblingsByMsgId.get(msgId) ?? [asst] 2171 const orphanedSiblings = group.filter(s => !seen.has(s.uuid)) 2172 const orphanedTRs: TranscriptMessage[] = [] 2173 for (const member of group) { 2174 const trs = toolResultsByAsst.get(member.uuid) 2175 if (!trs) continue 2176 for (const tr of trs) { 2177 if (!seen.has(tr.uuid)) orphanedTRs.push(tr) 2178 } 2179 } 2180 if (orphanedSiblings.length === 0 && orphanedTRs.length === 0) continue 2181 2182 // Timestamp sort keeps content-block / completion order; stable-sort 2183 // preserves JSONL write order on ties. 2184 orphanedSiblings.sort((a, b) => a.timestamp.localeCompare(b.timestamp)) 2185 orphanedTRs.sort((a, b) => a.timestamp.localeCompare(b.timestamp)) 2186 2187 const anchor = anchorByMsgId.get(msgId)! 2188 const recovered = [...orphanedSiblings, ...orphanedTRs] 2189 for (const r of recovered) seen.add(r.uuid) 2190 recoveredCount += recovered.length 2191 inserts.set(anchor.uuid, recovered) 2192 } 2193 2194 if (recoveredCount === 0) return chain 2195 logEvent('tengu_chain_parallel_tr_recovered', { 2196 recovered_count: recoveredCount, 2197 }) 2198 2199 const result: TranscriptMessage[] = [] 2200 for (const m of chain) { 2201 result.push(m) 2202 const toInsert = inserts.get(m.uuid) 2203 if (toInsert) result.push(...toInsert) 2204 } 2205 return result 2206} 2207 2208/** 2209 * Find the latest turn_duration checkpoint in the reconstructed chain and 2210 * compare its recorded messageCount against the chain's position at that 2211 * point. Emits tengu_resume_consistency_delta for BigQuery monitoring of 2212 * write→load round-trip drift — the class of bugs where snip/compact/ 2213 * parallel-TR operations mutate in-memory but the parentUuid walk on disk 2214 * reconstructs a different set (adamr-20260320-165831: 397K displayed → 2215 * 1.65M actual on resume). 2216 * 2217 * delta > 0: resume loaded MORE than in-session (the usual failure mode) 2218 * delta < 0: resume loaded FEWER (chain truncation — #22453 class) 2219 * delta = 0: round-trip consistent 2220 * 2221 * Called from loadConversationForResume — fires once per resume, not on 2222 * /share or log-listing chain rebuilds. 2223 */ 2224export function checkResumeConsistency(chain: Message[]): void { 2225 for (let i = chain.length - 1; i >= 0; i--) { 2226 const m = chain[i]! 2227 if (m.type !== 'system' || m.subtype !== 'turn_duration') continue 2228 const expected = m.messageCount 2229 if (expected === undefined) return 2230 // `i` is the 0-based index of the checkpoint in the reconstructed chain. 2231 // The checkpoint was appended AFTER messageCount messages, so its own 2232 // position should be messageCount (i.e., i === expected). 2233 const actual = i 2234 logEvent('tengu_resume_consistency_delta', { 2235 expected, 2236 actual, 2237 delta: actual - expected, 2238 chain_length: chain.length, 2239 checkpoint_age_entries: chain.length - 1 - i, 2240 }) 2241 return 2242 } 2243} 2244 2245/** 2246 * Builds a filie history snapshot chain from the conversation 2247 */ 2248function buildFileHistorySnapshotChain( 2249 fileHistorySnapshots: Map<UUID, FileHistorySnapshotMessage>, 2250 conversation: TranscriptMessage[], 2251): FileHistorySnapshot[] { 2252 const snapshots: FileHistorySnapshot[] = [] 2253 // messageId → last index in snapshots[] for O(1) update lookup 2254 const indexByMessageId = new Map<string, number>() 2255 for (const message of conversation) { 2256 const snapshotMessage = fileHistorySnapshots.get(message.uuid) 2257 if (!snapshotMessage) { 2258 continue 2259 } 2260 const { snapshot, isSnapshotUpdate } = snapshotMessage 2261 const existingIndex = isSnapshotUpdate 2262 ? indexByMessageId.get(snapshot.messageId) 2263 : undefined 2264 if (existingIndex === undefined) { 2265 indexByMessageId.set(snapshot.messageId, snapshots.length) 2266 snapshots.push(snapshot) 2267 } else { 2268 snapshots[existingIndex] = snapshot 2269 } 2270 } 2271 return snapshots 2272} 2273 2274/** 2275 * Builds an attribution snapshot chain from the conversation. 2276 * Unlike file history snapshots, attribution snapshots are returned in full 2277 * because they use generated UUIDs (not message UUIDs) and represent 2278 * cumulative state that should be restored on session resume. 2279 */ 2280function buildAttributionSnapshotChain( 2281 attributionSnapshots: Map<UUID, AttributionSnapshotMessage>, 2282 _conversation: TranscriptMessage[], 2283): AttributionSnapshotMessage[] { 2284 // Return all attribution snapshots - they will be merged during restore 2285 return Array.from(attributionSnapshots.values()) 2286} 2287 2288/** 2289 * Loads a transcript from a JSON or JSONL file and converts it to LogOption format 2290 * @param filePath Path to the transcript file (.json or .jsonl) 2291 * @returns LogOption containing the transcript messages 2292 * @throws Error if file doesn't exist or contains invalid data 2293 */ 2294export async function loadTranscriptFromFile( 2295 filePath: string, 2296): Promise<LogOption> { 2297 if (filePath.endsWith('.jsonl')) { 2298 const { 2299 messages, 2300 summaries, 2301 customTitles, 2302 tags, 2303 fileHistorySnapshots, 2304 attributionSnapshots, 2305 contextCollapseCommits, 2306 contextCollapseSnapshot, 2307 leafUuids, 2308 contentReplacements, 2309 worktreeStates, 2310 } = await loadTranscriptFile(filePath) 2311 2312 if (messages.size === 0) { 2313 throw new Error('No messages found in JSONL file') 2314 } 2315 2316 // Find the most recent leaf message using pre-computed leaf UUIDs 2317 const leafMessage = findLatestMessage(messages.values(), msg => 2318 leafUuids.has(msg.uuid), 2319 ) 2320 2321 if (!leafMessage) { 2322 throw new Error('No valid conversation chain found in JSONL file') 2323 } 2324 2325 // Build the conversation chain backwards from leaf to root 2326 const transcript = buildConversationChain(messages, leafMessage) 2327 2328 const summary = summaries.get(leafMessage.uuid) 2329 const customTitle = customTitles.get(leafMessage.sessionId as UUID) 2330 const tag = tags.get(leafMessage.sessionId as UUID) 2331 const sessionId = leafMessage.sessionId as UUID 2332 return { 2333 ...convertToLogOption( 2334 transcript, 2335 0, 2336 summary, 2337 customTitle, 2338 buildFileHistorySnapshotChain(fileHistorySnapshots, transcript), 2339 tag, 2340 filePath, 2341 buildAttributionSnapshotChain(attributionSnapshots, transcript), 2342 undefined, 2343 contentReplacements.get(sessionId) ?? [], 2344 ), 2345 contextCollapseCommits: contextCollapseCommits.filter( 2346 e => e.sessionId === sessionId, 2347 ), 2348 contextCollapseSnapshot: 2349 contextCollapseSnapshot?.sessionId === sessionId 2350 ? contextCollapseSnapshot 2351 : undefined, 2352 worktreeSession: worktreeStates.has(sessionId) 2353 ? worktreeStates.get(sessionId) 2354 : undefined, 2355 } 2356 } 2357 2358 // json log files 2359 const content = await readFile(filePath, { encoding: 'utf-8' }) 2360 let parsed: unknown 2361 2362 try { 2363 parsed = jsonParse(content) 2364 } catch (error) { 2365 throw new Error(`Invalid JSON in transcript file: ${error}`) 2366 } 2367 2368 let messages: TranscriptMessage[] 2369 2370 if (Array.isArray(parsed)) { 2371 messages = parsed 2372 } else if (parsed && typeof parsed === 'object' && 'messages' in parsed) { 2373 if (!Array.isArray(parsed.messages)) { 2374 throw new Error('Transcript messages must be an array') 2375 } 2376 messages = parsed.messages 2377 } else { 2378 throw new Error( 2379 'Transcript must be an array of messages or an object with a messages array', 2380 ) 2381 } 2382 2383 return convertToLogOption( 2384 messages, 2385 0, 2386 undefined, 2387 undefined, 2388 undefined, 2389 undefined, 2390 filePath, 2391 ) 2392} 2393 2394/** 2395 * Checks if a user message has visible content (text or image, not just tool_result). 2396 * Tool results are displayed as part of collapsed groups, not as standalone messages. 2397 * Also excludes meta messages which are not shown to the user. 2398 */ 2399function hasVisibleUserContent(message: TranscriptMessage): boolean { 2400 if (message.type !== 'user') return false 2401 2402 // Meta messages are not shown to the user 2403 if (message.isMeta) return false 2404 2405 const content = message.message?.content 2406 if (!content) return false 2407 2408 // String content is always visible 2409 if (typeof content === 'string') { 2410 return content.trim().length > 0 2411 } 2412 2413 // Array content: check for text or image blocks (not tool_result) 2414 if (Array.isArray(content)) { 2415 return content.some( 2416 block => 2417 block.type === 'text' || 2418 block.type === 'image' || 2419 block.type === 'document', 2420 ) 2421 } 2422 2423 return false 2424} 2425 2426/** 2427 * Checks if an assistant message has visible text content (not just tool_use blocks). 2428 * Tool uses are displayed as grouped/collapsed UI elements, not as standalone messages. 2429 */ 2430function hasVisibleAssistantContent(message: TranscriptMessage): boolean { 2431 if (message.type !== 'assistant') return false 2432 2433 const content = message.message?.content 2434 if (!content || !Array.isArray(content)) return false 2435 2436 // Check for text block (not just tool_use/thinking blocks) 2437 return content.some( 2438 block => 2439 block.type === 'text' && 2440 typeof block.text === 'string' && 2441 block.text.trim().length > 0, 2442 ) 2443} 2444 2445/** 2446 * Counts visible messages that would appear as conversation turns in the UI. 2447 * Excludes: 2448 * - System, attachment, and progress messages 2449 * - User messages with isMeta flag (hidden from user) 2450 * - User messages that only contain tool_result blocks (displayed as collapsed groups) 2451 * - Assistant messages that only contain tool_use blocks (displayed as collapsed groups) 2452 */ 2453function countVisibleMessages(transcript: TranscriptMessage[]): number { 2454 let count = 0 2455 for (const message of transcript) { 2456 switch (message.type) { 2457 case 'user': 2458 // Count user messages with visible content (text, image, not just tool_result or meta) 2459 if (hasVisibleUserContent(message)) { 2460 count++ 2461 } 2462 break 2463 case 'assistant': 2464 // Count assistant messages with text content (not just tool_use) 2465 if (hasVisibleAssistantContent(message)) { 2466 count++ 2467 } 2468 break 2469 case 'attachment': 2470 case 'system': 2471 case 'progress': 2472 // These message types are not counted as visible conversation turns 2473 break 2474 } 2475 } 2476 return count 2477} 2478 2479function convertToLogOption( 2480 transcript: TranscriptMessage[], 2481 value: number = 0, 2482 summary?: string, 2483 customTitle?: string, 2484 fileHistorySnapshots?: FileHistorySnapshot[], 2485 tag?: string, 2486 fullPath?: string, 2487 attributionSnapshots?: AttributionSnapshotMessage[], 2488 agentSetting?: string, 2489 contentReplacements?: ContentReplacementRecord[], 2490): LogOption { 2491 const lastMessage = transcript.at(-1)! 2492 const firstMessage = transcript[0]! 2493 2494 // Get the first user message for the prompt 2495 const firstPrompt = extractFirstPrompt(transcript) 2496 2497 // Create timestamps from message timestamps 2498 const created = new Date(firstMessage.timestamp) 2499 const modified = new Date(lastMessage.timestamp) 2500 2501 return { 2502 date: lastMessage.timestamp, 2503 messages: removeExtraFields(transcript), 2504 fullPath, 2505 value, 2506 created, 2507 modified, 2508 firstPrompt, 2509 messageCount: countVisibleMessages(transcript), 2510 isSidechain: firstMessage.isSidechain, 2511 teamName: firstMessage.teamName, 2512 agentName: firstMessage.agentName, 2513 agentSetting, 2514 leafUuid: lastMessage.uuid, 2515 summary, 2516 customTitle, 2517 tag, 2518 fileHistorySnapshots: fileHistorySnapshots, 2519 attributionSnapshots: attributionSnapshots, 2520 contentReplacements, 2521 gitBranch: lastMessage.gitBranch, 2522 projectPath: firstMessage.cwd, 2523 } 2524} 2525 2526async function trackSessionBranchingAnalytics( 2527 logs: LogOption[], 2528): Promise<void> { 2529 const sessionIdCounts = new Map<string, number>() 2530 let maxCount = 0 2531 for (const log of logs) { 2532 const sessionId = getSessionIdFromLog(log) 2533 if (sessionId) { 2534 const newCount = (sessionIdCounts.get(sessionId) || 0) + 1 2535 sessionIdCounts.set(sessionId, newCount) 2536 maxCount = Math.max(newCount, maxCount) 2537 } 2538 } 2539 2540 // Early exit if no duplicates detected 2541 if (maxCount <= 1) { 2542 return 2543 } 2544 2545 // Count sessions with branches and calculate stats using functional approach 2546 const branchCounts = Array.from(sessionIdCounts.values()).filter(c => c > 1) 2547 const sessionsWithBranches = branchCounts.length 2548 const totalBranches = branchCounts.reduce((sum, count) => sum + count, 0) 2549 2550 logEvent('tengu_session_forked_branches_fetched', { 2551 total_sessions: sessionIdCounts.size, 2552 sessions_with_branches: sessionsWithBranches, 2553 max_branches_per_session: Math.max(...branchCounts), 2554 avg_branches_per_session: Math.round(totalBranches / sessionsWithBranches), 2555 total_transcript_count: logs.length, 2556 }) 2557} 2558 2559export async function fetchLogs(limit?: number): Promise<LogOption[]> { 2560 const projectDir = getProjectDir(getOriginalCwd()) 2561 const logs = await getSessionFilesLite(projectDir, limit, getOriginalCwd()) 2562 2563 await trackSessionBranchingAnalytics(logs) 2564 2565 return logs 2566} 2567 2568/** 2569 * Append an entry to a session file. Creates the parent dir if missing. 2570 */ 2571/* eslint-disable custom-rules/no-sync-fs -- sync callers (exit cleanup, materialize) */ 2572function appendEntryToFile( 2573 fullPath: string, 2574 entry: Record<string, unknown>, 2575): void { 2576 const fs = getFsImplementation() 2577 const line = jsonStringify(entry) + '\n' 2578 try { 2579 fs.appendFileSync(fullPath, line, { mode: 0o600 }) 2580 } catch { 2581 fs.mkdirSync(dirname(fullPath), { mode: 0o700 }) 2582 fs.appendFileSync(fullPath, line, { mode: 0o600 }) 2583 } 2584} 2585 2586/** 2587 * Sync tail read for reAppendSessionMetadata's external-writer check. 2588 * fstat on the already-open fd (no extra path lookup); reads the same 2589 * LITE_READ_BUF_SIZE window that readLiteMetadata scans. Returns empty 2590 * string on any error so callers fall through to unconditional behavior. 2591 */ 2592function readFileTailSync(fullPath: string): string { 2593 let fd: number | undefined 2594 try { 2595 fd = openSync(fullPath, 'r') 2596 const st = fstatSync(fd) 2597 const tailOffset = Math.max(0, st.size - LITE_READ_BUF_SIZE) 2598 const buf = Buffer.allocUnsafe( 2599 Math.min(LITE_READ_BUF_SIZE, st.size - tailOffset), 2600 ) 2601 const bytesRead = readSync(fd, buf, 0, buf.length, tailOffset) 2602 return buf.toString('utf8', 0, bytesRead) 2603 } catch { 2604 return '' 2605 } finally { 2606 if (fd !== undefined) { 2607 try { 2608 closeSync(fd) 2609 } catch { 2610 // closeSync can throw; swallow to preserve return '' contract 2611 } 2612 } 2613 } 2614} 2615/* eslint-enable custom-rules/no-sync-fs */ 2616 2617export async function saveCustomTitle( 2618 sessionId: UUID, 2619 customTitle: string, 2620 fullPath?: string, 2621 source: 'user' | 'auto' = 'user', 2622) { 2623 // Fall back to computed path if fullPath is not provided 2624 const resolvedPath = fullPath ?? getTranscriptPathForSession(sessionId) 2625 appendEntryToFile(resolvedPath, { 2626 type: 'custom-title', 2627 customTitle, 2628 sessionId, 2629 }) 2630 // Cache for current session only (for immediate visibility) 2631 if (sessionId === getSessionId()) { 2632 getProject().currentSessionTitle = customTitle 2633 } 2634 logEvent('tengu_session_renamed', { 2635 source: 2636 source as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS, 2637 }) 2638} 2639 2640/** 2641 * Persist an AI-generated title to the JSONL as a distinct `ai-title` entry. 2642 * 2643 * Writing a separate entry type (vs. reusing `custom-title`) is load-bearing: 2644 * - Read preference: readers prefer `customTitle` field over `aiTitle`, so 2645 * a user rename always wins regardless of append order. 2646 * - Resume safety: `loadTranscriptFile` only populates the `customTitles` 2647 * Map from `custom-title` entries, so `restoreSessionMetadata` never 2648 * caches an AI title and `reAppendSessionMetadata` never re-appends one 2649 * at EOF — avoiding the clobber-on-resume bug where a stale AI title 2650 * overwrites a mid-session user rename. 2651 * - CAS semantics: VS Code's `onlyIfNoCustomTitle` check scans for the 2652 * `customTitle` field only, so AI can overwrite its own previous AI 2653 * title but never a user title. 2654 * - Metrics: `tengu_session_renamed` is not fired for AI titles. 2655 * 2656 * Because the entry is never re-appended, it scrolls out of the 64KB tail 2657 * window once enough messages accumulate. Readers (`readLiteMetadata`, 2658 * `listSessionsImpl`, VS Code `fetchSessions`) fall back to scanning the 2659 * head buffer for `aiTitle` in that case. Both head and tail reads are 2660 * bounded (64KB each via `extractLastJsonStringField`), never a full scan. 2661 * 2662 * Callers with a stale-write guard (e.g., VS Code client) should prefer 2663 * passing `persist: false` to the SDK control request and persisting 2664 * through their own rename path after the guard passes, to avoid a race 2665 * where the AI title lands after a mid-flight user rename. 2666 */ 2667export function saveAiGeneratedTitle(sessionId: UUID, aiTitle: string): void { 2668 appendEntryToFile(getTranscriptPathForSession(sessionId), { 2669 type: 'ai-title', 2670 aiTitle, 2671 sessionId, 2672 }) 2673} 2674 2675/** 2676 * Append a periodic task summary for `claude ps`. Unlike ai-title this is 2677 * not re-appended by reAppendSessionMetadata — it's a rolling snapshot of 2678 * what the agent is doing *now*, so staleness is fine; ps reads the most 2679 * recent one from the tail. 2680 */ 2681export function saveTaskSummary(sessionId: UUID, summary: string): void { 2682 appendEntryToFile(getTranscriptPathForSession(sessionId), { 2683 type: 'task-summary', 2684 summary, 2685 sessionId, 2686 timestamp: new Date().toISOString(), 2687 }) 2688} 2689 2690export async function saveTag(sessionId: UUID, tag: string, fullPath?: string) { 2691 // Fall back to computed path if fullPath is not provided 2692 const resolvedPath = fullPath ?? getTranscriptPathForSession(sessionId) 2693 appendEntryToFile(resolvedPath, { type: 'tag', tag, sessionId }) 2694 // Cache for current session only (for immediate visibility) 2695 if (sessionId === getSessionId()) { 2696 getProject().currentSessionTag = tag 2697 } 2698 logEvent('tengu_session_tagged', {}) 2699} 2700 2701/** 2702 * Link a session to a GitHub pull request. 2703 * This stores the PR number, URL, and repository for tracking and navigation. 2704 */ 2705export async function linkSessionToPR( 2706 sessionId: UUID, 2707 prNumber: number, 2708 prUrl: string, 2709 prRepository: string, 2710 fullPath?: string, 2711): Promise<void> { 2712 const resolvedPath = fullPath ?? getTranscriptPathForSession(sessionId) 2713 appendEntryToFile(resolvedPath, { 2714 type: 'pr-link', 2715 sessionId, 2716 prNumber, 2717 prUrl, 2718 prRepository, 2719 timestamp: new Date().toISOString(), 2720 }) 2721 // Cache for current session so reAppendSessionMetadata can re-write after compaction 2722 if (sessionId === getSessionId()) { 2723 const project = getProject() 2724 project.currentSessionPrNumber = prNumber 2725 project.currentSessionPrUrl = prUrl 2726 project.currentSessionPrRepository = prRepository 2727 } 2728 logEvent('tengu_session_linked_to_pr', { prNumber }) 2729} 2730 2731export function getCurrentSessionTag(sessionId: UUID): string | undefined { 2732 // Only returns tag for current session (the only one we cache) 2733 if (sessionId === getSessionId()) { 2734 return getProject().currentSessionTag 2735 } 2736 return undefined 2737} 2738 2739export function getCurrentSessionTitle( 2740 sessionId: SessionId, 2741): string | undefined { 2742 // Only returns title for current session (the only one we cache) 2743 if (sessionId === getSessionId()) { 2744 return getProject().currentSessionTitle 2745 } 2746 return undefined 2747} 2748 2749export function getCurrentSessionAgentColor(): string | undefined { 2750 return getProject().currentSessionAgentColor 2751} 2752 2753/** 2754 * Restore session metadata into in-memory cache on resume. 2755 * Populates the cache so metadata is available for display (e.g. the 2756 * agent banner) and re-appended on session exit via reAppendSessionMetadata. 2757 */ 2758export function restoreSessionMetadata(meta: { 2759 customTitle?: string 2760 tag?: string 2761 agentName?: string 2762 agentColor?: string 2763 agentSetting?: string 2764 mode?: 'coordinator' | 'normal' 2765 worktreeSession?: PersistedWorktreeSession | null 2766 prNumber?: number 2767 prUrl?: string 2768 prRepository?: string 2769}): void { 2770 const project = getProject() 2771 // ??= so --name (cacheSessionTitle) wins over the resumed 2772 // session's title. REPL.tsx clears before calling, so /resume is unaffected. 2773 if (meta.customTitle) project.currentSessionTitle ??= meta.customTitle 2774 if (meta.tag !== undefined) project.currentSessionTag = meta.tag || undefined 2775 if (meta.agentName) project.currentSessionAgentName = meta.agentName 2776 if (meta.agentColor) project.currentSessionAgentColor = meta.agentColor 2777 if (meta.agentSetting) project.currentSessionAgentSetting = meta.agentSetting 2778 if (meta.mode) project.currentSessionMode = meta.mode 2779 if (meta.worktreeSession !== undefined) 2780 project.currentSessionWorktree = meta.worktreeSession 2781 if (meta.prNumber !== undefined) 2782 project.currentSessionPrNumber = meta.prNumber 2783 if (meta.prUrl) project.currentSessionPrUrl = meta.prUrl 2784 if (meta.prRepository) project.currentSessionPrRepository = meta.prRepository 2785} 2786 2787/** 2788 * Clear all cached session metadata (title, tag, agent name/color). 2789 * Called when /clear creates a new session so stale metadata 2790 * from the previous session does not leak into the new one. 2791 */ 2792export function clearSessionMetadata(): void { 2793 const project = getProject() 2794 project.currentSessionTitle = undefined 2795 project.currentSessionTag = undefined 2796 project.currentSessionAgentName = undefined 2797 project.currentSessionAgentColor = undefined 2798 project.currentSessionLastPrompt = undefined 2799 project.currentSessionAgentSetting = undefined 2800 project.currentSessionMode = undefined 2801 project.currentSessionWorktree = undefined 2802 project.currentSessionPrNumber = undefined 2803 project.currentSessionPrUrl = undefined 2804 project.currentSessionPrRepository = undefined 2805} 2806 2807/** 2808 * Re-append cached session metadata (custom title, tag) to the end of the 2809 * transcript file. Call this after compaction so the metadata stays within 2810 * the 16KB tail window that readLiteMetadata reads during progressive loading. 2811 * Without this, enough post-compaction messages can push the metadata entry 2812 * out of the window, causing `--resume` to show the auto-generated firstPrompt 2813 * instead of the user-set session name. 2814 */ 2815export function reAppendSessionMetadata(): void { 2816 getProject().reAppendSessionMetadata() 2817} 2818 2819export async function saveAgentName( 2820 sessionId: UUID, 2821 agentName: string, 2822 fullPath?: string, 2823 source: 'user' | 'auto' = 'user', 2824) { 2825 const resolvedPath = fullPath ?? getTranscriptPathForSession(sessionId) 2826 appendEntryToFile(resolvedPath, { type: 'agent-name', agentName, sessionId }) 2827 // Cache for current session only (for immediate visibility) 2828 if (sessionId === getSessionId()) { 2829 getProject().currentSessionAgentName = agentName 2830 void updateSessionName(agentName) 2831 } 2832 logEvent('tengu_agent_name_set', { 2833 source: 2834 source as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS, 2835 }) 2836} 2837 2838export async function saveAgentColor( 2839 sessionId: UUID, 2840 agentColor: string, 2841 fullPath?: string, 2842) { 2843 const resolvedPath = fullPath ?? getTranscriptPathForSession(sessionId) 2844 appendEntryToFile(resolvedPath, { 2845 type: 'agent-color', 2846 agentColor, 2847 sessionId, 2848 }) 2849 // Cache for current session only (for immediate visibility) 2850 if (sessionId === getSessionId()) { 2851 getProject().currentSessionAgentColor = agentColor 2852 } 2853 logEvent('tengu_agent_color_set', {}) 2854} 2855 2856/** 2857 * Cache the session agent setting. Written to disk by materializeSessionFile 2858 * on the first user message, and re-stamped by reAppendSessionMetadata on exit. 2859 * Cache-only here to avoid creating metadata-only session files at startup. 2860 */ 2861export function saveAgentSetting(agentSetting: string): void { 2862 getProject().currentSessionAgentSetting = agentSetting 2863} 2864 2865/** 2866 * Cache a session title set at startup (--name). Written to disk by 2867 * materializeSessionFile on the first user message. Cache-only here so no 2868 * orphan metadata-only file is created before the session ID is finalized. 2869 */ 2870export function cacheSessionTitle(customTitle: string): void { 2871 getProject().currentSessionTitle = customTitle 2872} 2873 2874/** 2875 * Cache the session mode. Written to disk by materializeSessionFile on the 2876 * first user message, and re-stamped by reAppendSessionMetadata on exit. 2877 * Cache-only here to avoid creating metadata-only session files at startup. 2878 */ 2879export function saveMode(mode: 'coordinator' | 'normal'): void { 2880 getProject().currentSessionMode = mode 2881} 2882 2883/** 2884 * Record the session's worktree state for --resume. Written to disk by 2885 * materializeSessionFile on the first user message and re-stamped by 2886 * reAppendSessionMetadata on exit. Pass null when exiting a worktree 2887 * so --resume knows not to cd back into it. 2888 */ 2889export function saveWorktreeState( 2890 worktreeSession: PersistedWorktreeSession | null, 2891): void { 2892 // Strip ephemeral fields (creationDurationMs, usedSparsePaths) that callers 2893 // may pass via full WorktreeSession objects — TypeScript structural typing 2894 // allows this, but we don't want them serialized to the transcript. 2895 const stripped: PersistedWorktreeSession | null = worktreeSession 2896 ? { 2897 originalCwd: worktreeSession.originalCwd, 2898 worktreePath: worktreeSession.worktreePath, 2899 worktreeName: worktreeSession.worktreeName, 2900 worktreeBranch: worktreeSession.worktreeBranch, 2901 originalBranch: worktreeSession.originalBranch, 2902 originalHeadCommit: worktreeSession.originalHeadCommit, 2903 sessionId: worktreeSession.sessionId, 2904 tmuxSessionName: worktreeSession.tmuxSessionName, 2905 hookBased: worktreeSession.hookBased, 2906 } 2907 : null 2908 const project = getProject() 2909 project.currentSessionWorktree = stripped 2910 // Write eagerly when the file already exists (mid-session enter/exit). 2911 // For --worktree startup, sessionFile is null — materializeSessionFile 2912 // will write it on the first message via reAppendSessionMetadata. 2913 if (project.sessionFile) { 2914 appendEntryToFile(project.sessionFile, { 2915 type: 'worktree-state', 2916 worktreeSession: stripped, 2917 sessionId: getSessionId(), 2918 }) 2919 } 2920} 2921 2922/** 2923 * Extracts the session ID from a log. 2924 * For lite logs, uses the sessionId field directly. 2925 * For full logs, extracts from the first message. 2926 */ 2927export function getSessionIdFromLog(log: LogOption): UUID | undefined { 2928 // For lite logs, use the direct sessionId field 2929 if (log.sessionId) { 2930 return log.sessionId as UUID 2931 } 2932 // Fall back to extracting from first message (full logs) 2933 return log.messages[0]?.sessionId as UUID | undefined 2934} 2935 2936/** 2937 * Checks if a log is a lite log that needs full loading. 2938 * Lite logs have messages: [] and sessionId set. 2939 */ 2940export function isLiteLog(log: LogOption): boolean { 2941 return log.messages.length === 0 && log.sessionId !== undefined 2942} 2943 2944/** 2945 * Loads full messages for a lite log by reading its JSONL file. 2946 * Returns a new LogOption with populated messages array. 2947 * If the log is already full or loading fails, returns the original log. 2948 */ 2949export async function loadFullLog(log: LogOption): Promise<LogOption> { 2950 // If already full, return as-is 2951 if (!isLiteLog(log)) { 2952 return log 2953 } 2954 2955 // Use the fullPath from the index entry directly 2956 const sessionFile = log.fullPath 2957 if (!sessionFile) { 2958 return log 2959 } 2960 2961 try { 2962 const { 2963 messages, 2964 summaries, 2965 customTitles, 2966 tags, 2967 agentNames, 2968 agentColors, 2969 agentSettings, 2970 prNumbers, 2971 prUrls, 2972 prRepositories, 2973 modes, 2974 worktreeStates, 2975 fileHistorySnapshots, 2976 attributionSnapshots, 2977 contentReplacements, 2978 contextCollapseCommits, 2979 contextCollapseSnapshot, 2980 leafUuids, 2981 } = await loadTranscriptFile(sessionFile) 2982 2983 if (messages.size === 0) { 2984 return log 2985 } 2986 2987 // Find the most recent user/assistant leaf message from the transcript 2988 const mostRecentLeaf = findLatestMessage( 2989 messages.values(), 2990 msg => 2991 leafUuids.has(msg.uuid) && 2992 (msg.type === 'user' || msg.type === 'assistant'), 2993 ) 2994 if (!mostRecentLeaf) { 2995 return log 2996 } 2997 2998 // Build the conversation chain from this leaf 2999 const transcript = buildConversationChain(messages, mostRecentLeaf) 3000 // Leaf's sessionId — forked sessions copy chain[0] from the source, but 3001 // metadata entries (custom-title etc.) are keyed by the current session. 3002 const sessionId = mostRecentLeaf.sessionId as UUID | undefined 3003 return { 3004 ...log, 3005 messages: removeExtraFields(transcript), 3006 firstPrompt: extractFirstPrompt(transcript), 3007 messageCount: countVisibleMessages(transcript), 3008 summary: mostRecentLeaf 3009 ? summaries.get(mostRecentLeaf.uuid) 3010 : log.summary, 3011 customTitle: sessionId ? customTitles.get(sessionId) : log.customTitle, 3012 tag: sessionId ? tags.get(sessionId) : log.tag, 3013 agentName: sessionId ? agentNames.get(sessionId) : log.agentName, 3014 agentColor: sessionId ? agentColors.get(sessionId) : log.agentColor, 3015 agentSetting: sessionId ? agentSettings.get(sessionId) : log.agentSetting, 3016 mode: sessionId ? (modes.get(sessionId) as LogOption['mode']) : log.mode, 3017 worktreeSession: 3018 sessionId && worktreeStates.has(sessionId) 3019 ? worktreeStates.get(sessionId) 3020 : log.worktreeSession, 3021 prNumber: sessionId ? prNumbers.get(sessionId) : log.prNumber, 3022 prUrl: sessionId ? prUrls.get(sessionId) : log.prUrl, 3023 prRepository: sessionId 3024 ? prRepositories.get(sessionId) 3025 : log.prRepository, 3026 gitBranch: mostRecentLeaf?.gitBranch ?? log.gitBranch, 3027 isSidechain: transcript[0]?.isSidechain ?? log.isSidechain, 3028 teamName: transcript[0]?.teamName ?? log.teamName, 3029 leafUuid: mostRecentLeaf?.uuid ?? log.leafUuid, 3030 fileHistorySnapshots: buildFileHistorySnapshotChain( 3031 fileHistorySnapshots, 3032 transcript, 3033 ), 3034 attributionSnapshots: buildAttributionSnapshotChain( 3035 attributionSnapshots, 3036 transcript, 3037 ), 3038 contentReplacements: sessionId 3039 ? (contentReplacements.get(sessionId) ?? []) 3040 : log.contentReplacements, 3041 // Filter to the resumed session's entries. loadTranscriptFile reads 3042 // the file sequentially so the array is already in commit order; 3043 // filter preserves that. 3044 contextCollapseCommits: sessionId 3045 ? contextCollapseCommits.filter(e => e.sessionId === sessionId) 3046 : undefined, 3047 contextCollapseSnapshot: 3048 sessionId && contextCollapseSnapshot?.sessionId === sessionId 3049 ? contextCollapseSnapshot 3050 : undefined, 3051 } 3052 } catch { 3053 // If loading fails, return the original log 3054 return log 3055 } 3056} 3057 3058/** 3059 * Searches for sessions by custom title match. 3060 * Returns matches sorted by recency (newest first). 3061 * Uses case-insensitive matching for better UX. 3062 * Deduplicates by sessionId (keeps most recent per session). 3063 * Searches across same-repo worktrees by default. 3064 */ 3065export async function searchSessionsByCustomTitle( 3066 query: string, 3067 options?: { limit?: number; exact?: boolean }, 3068): Promise<LogOption[]> { 3069 const { limit, exact } = options || {} 3070 // Use worktree-aware loading to search across same-repo sessions 3071 const worktreePaths = await getWorktreePaths(getOriginalCwd()) 3072 const allStatLogs = await getStatOnlyLogsForWorktrees(worktreePaths) 3073 // Enrich all logs to access customTitle metadata 3074 const { logs } = await enrichLogs(allStatLogs, 0, allStatLogs.length) 3075 const normalizedQuery = query.toLowerCase().trim() 3076 3077 const matchingLogs = logs.filter(log => { 3078 const title = log.customTitle?.toLowerCase().trim() 3079 if (!title) return false 3080 return exact ? title === normalizedQuery : title.includes(normalizedQuery) 3081 }) 3082 3083 // Deduplicate by sessionId - multiple logs can have the same sessionId 3084 // if they're different branches of the same conversation. Keep most recent. 3085 const sessionIdToLog = new Map<UUID, LogOption>() 3086 for (const log of matchingLogs) { 3087 const sessionId = getSessionIdFromLog(log) 3088 if (sessionId) { 3089 const existing = sessionIdToLog.get(sessionId) 3090 if (!existing || log.modified > existing.modified) { 3091 sessionIdToLog.set(sessionId, log) 3092 } 3093 } 3094 } 3095 const deduplicated = Array.from(sessionIdToLog.values()) 3096 3097 // Sort by recency 3098 deduplicated.sort((a, b) => b.modified.getTime() - a.modified.getTime()) 3099 3100 // Apply limit if specified 3101 if (limit) { 3102 return deduplicated.slice(0, limit) 3103 } 3104 3105 return deduplicated 3106} 3107 3108/** 3109 * Metadata entry types that can appear before a compact boundary but must 3110 * still be loaded (they're session-scoped, not message-scoped). 3111 * Kept as raw JSON string markers for cheap line filtering during streaming. 3112 */ 3113const METADATA_TYPE_MARKERS = [ 3114 '"type":"summary"', 3115 '"type":"custom-title"', 3116 '"type":"tag"', 3117 '"type":"agent-name"', 3118 '"type":"agent-color"', 3119 '"type":"agent-setting"', 3120 '"type":"mode"', 3121 '"type":"worktree-state"', 3122 '"type":"pr-link"', 3123] 3124const METADATA_MARKER_BUFS = METADATA_TYPE_MARKERS.map(m => Buffer.from(m)) 3125// Longest marker is 22 bytes; +1 for leading `{` = 23. 3126const METADATA_PREFIX_BOUND = 25 3127 3128// null = carry spans whole chunk. Skips concat when carry provably isn't 3129// a metadata line (markers sit at byte 1 after `{`). 3130function resolveMetadataBuf( 3131 carry: Buffer | null, 3132 chunkBuf: Buffer, 3133): Buffer | null { 3134 if (carry === null || carry.length === 0) return chunkBuf 3135 if (carry.length < METADATA_PREFIX_BOUND) { 3136 return Buffer.concat([carry, chunkBuf]) 3137 } 3138 if (carry[0] === 0x7b /* { */) { 3139 for (const m of METADATA_MARKER_BUFS) { 3140 if (carry.compare(m, 0, m.length, 1, 1 + m.length) === 0) { 3141 return Buffer.concat([carry, chunkBuf]) 3142 } 3143 } 3144 } 3145 const firstNl = chunkBuf.indexOf(0x0a) 3146 return firstNl === -1 ? null : chunkBuf.subarray(firstNl + 1) 3147} 3148 3149/** 3150 * Lightweight forward scan of [0, endOffset) collecting only metadata-entry lines. 3151 * Uses raw Buffer chunks and byte-level marker matching — no readline, no per-line 3152 * string conversion for the ~99% of lines that are message content. 3153 * 3154 * Fast path: if a chunk contains zero markers (the common case — metadata entries 3155 * are <50 per session), the entire chunk is skipped without line splitting. 3156 */ 3157async function scanPreBoundaryMetadata( 3158 filePath: string, 3159 endOffset: number, 3160): Promise<string[]> { 3161 const { createReadStream } = await import('fs') 3162 const NEWLINE = 0x0a 3163 3164 const stream = createReadStream(filePath, { end: endOffset - 1 }) 3165 const metadataLines: string[] = [] 3166 let carry: Buffer | null = null 3167 3168 for await (const chunk of stream) { 3169 const chunkBuf = chunk as Buffer 3170 const buf = resolveMetadataBuf(carry, chunkBuf) 3171 if (buf === null) { 3172 carry = null 3173 continue 3174 } 3175 3176 // Fast path: most chunks contain zero metadata markers. Skip line splitting. 3177 let hasAnyMarker = false 3178 for (const m of METADATA_MARKER_BUFS) { 3179 if (buf.includes(m)) { 3180 hasAnyMarker = true 3181 break 3182 } 3183 } 3184 3185 if (hasAnyMarker) { 3186 let lineStart = 0 3187 let nl = buf.indexOf(NEWLINE) 3188 while (nl !== -1) { 3189 // Bounded marker check: only look within this line's byte range 3190 for (const m of METADATA_MARKER_BUFS) { 3191 const mIdx = buf.indexOf(m, lineStart) 3192 if (mIdx !== -1 && mIdx < nl) { 3193 metadataLines.push(buf.toString('utf-8', lineStart, nl)) 3194 break 3195 } 3196 } 3197 lineStart = nl + 1 3198 nl = buf.indexOf(NEWLINE, lineStart) 3199 } 3200 carry = buf.subarray(lineStart) 3201 } else { 3202 // No markers in this chunk — just preserve the incomplete trailing line 3203 const lastNl = buf.lastIndexOf(NEWLINE) 3204 carry = lastNl >= 0 ? buf.subarray(lastNl + 1) : buf 3205 } 3206 3207 // Guard against quadratic carry growth for pathological huge lines 3208 // (e.g., a 10 MB tool-output line with no newline). Real metadata entries 3209 // are <1 KB, so if carry exceeds this we're mid-message-content — drop it. 3210 if (carry.length > 64 * 1024) carry = null 3211 } 3212 3213 // Final incomplete line (no trailing newline at endOffset) 3214 if (carry !== null && carry.length > 0) { 3215 for (const m of METADATA_MARKER_BUFS) { 3216 if (carry.includes(m)) { 3217 metadataLines.push(carry.toString('utf-8')) 3218 break 3219 } 3220 } 3221 } 3222 3223 return metadataLines 3224} 3225 3226/** 3227 * Byte-level pre-filter that excises dead fork branches before parseJSONL. 3228 * 3229 * Every rewind/ctrl-z leaves an orphaned chain branch in the append-only 3230 * JSONL forever. buildConversationChain walks parentUuid from the latest leaf 3231 * and discards everything else, but by then parseJSONL has already paid to 3232 * JSON.parse all of it. Measured on fork-heavy sessions: 3233 * 3234 * 41 MB, 99% dead: parseJSONL 56.0 ms -> 3.9 ms (-93%) 3235 * 151 MB, 92% dead: 47.3 ms -> 9.4 ms (-80%) 3236 * 3237 * Sessions with few dead branches (5-7%) see a small win from the overhead of 3238 * the index pass roughly canceling the parse savings, so this is gated on 3239 * buffer size (same threshold as SKIP_PRECOMPACT_THRESHOLD). 3240 * 3241 * Relies on two invariants verified across 25k+ message lines in local 3242 * sessions (0 violations): 3243 * 3244 * 1. Transcript messages always serialize with parentUuid as the first key. 3245 * JSON.stringify emits keys in insertion order and recordTranscript's 3246 * object literal puts parentUuid first. So `{"parentUuid":` is a stable 3247 * line prefix that distinguishes transcript messages from metadata. 3248 * 3249 * 2. Top-level uuid detection is handled by a suffix check + depth check 3250 * (see inline comment in the scan loop). toolUseResult/mcpMeta serialize 3251 * AFTER uuid with arbitrary server-controlled objects, and agent_progress 3252 * entries serialize a nested Message in data BEFORE uuid — both can 3253 * produce nested `"uuid":"<36>","timestamp":"` bytes, so suffix alone 3254 * is insufficient. When multiple suffix matches exist, a brace-depth 3255 * scan disambiguates. 3256 * 3257 * The append-only write discipline guarantees parents appear at earlier file 3258 * offsets than children, so walking backward from EOF always finds them. 3259 */ 3260 3261/** 3262 * Disambiguate multiple `"uuid":"<36>","timestamp":"` matches in one line by 3263 * finding the one at JSON nesting depth 1. String-aware brace counter: 3264 * `{`/`}` inside string values don't count; `\"` and `\\` inside strings are 3265 * handled. Candidates is sorted ascending (the scan loop produces them in 3266 * byte order). Returns the first depth-1 candidate, or the last candidate if 3267 * none are at depth 1 (shouldn't happen for well-formed JSONL — depth-1 is 3268 * where the top-level object's fields live). 3269 * 3270 * Only called when ≥2 suffix matches exist (agent_progress with a nested 3271 * Message, or mcpMeta with a coincidentally-suffixed object). Cost is 3272 * O(max(candidates) - lineStart) — one forward byte pass, stopping at the 3273 * first depth-1 hit. 3274 */ 3275function pickDepthOneUuidCandidate( 3276 buf: Buffer, 3277 lineStart: number, 3278 candidates: number[], 3279): number { 3280 const QUOTE = 0x22 3281 const BACKSLASH = 0x5c 3282 const OPEN_BRACE = 0x7b 3283 const CLOSE_BRACE = 0x7d 3284 let depth = 0 3285 let inString = false 3286 let escapeNext = false 3287 let ci = 0 3288 for (let i = lineStart; ci < candidates.length; i++) { 3289 if (i === candidates[ci]) { 3290 if (depth === 1 && !inString) return candidates[ci]! 3291 ci++ 3292 } 3293 const b = buf[i]! 3294 if (escapeNext) { 3295 escapeNext = false 3296 } else if (inString) { 3297 if (b === BACKSLASH) escapeNext = true 3298 else if (b === QUOTE) inString = false 3299 } else if (b === QUOTE) inString = true 3300 else if (b === OPEN_BRACE) depth++ 3301 else if (b === CLOSE_BRACE) depth-- 3302 } 3303 return candidates.at(-1)! 3304} 3305 3306function walkChainBeforeParse(buf: Buffer): Buffer { 3307 const NEWLINE = 0x0a 3308 const OPEN_BRACE = 0x7b 3309 const QUOTE = 0x22 3310 const PARENT_PREFIX = Buffer.from('{"parentUuid":') 3311 const UUID_KEY = Buffer.from('"uuid":"') 3312 const SIDECHAIN_TRUE = Buffer.from('"isSidechain":true') 3313 const UUID_LEN = 36 3314 const TS_SUFFIX = Buffer.from('","timestamp":"') 3315 const TS_SUFFIX_LEN = TS_SUFFIX.length 3316 const PREFIX_LEN = PARENT_PREFIX.length 3317 const KEY_LEN = UUID_KEY.length 3318 3319 // Stride-3 flat index of transcript messages: [lineStart, lineEnd, parentStart]. 3320 // parentStart is the byte offset of the parent uuid's first char, or -1 for null. 3321 // Metadata lines (summary, mode, file-history-snapshot, etc.) go in metaRanges 3322 // unfiltered - they lack the parentUuid prefix and downstream needs all of them. 3323 const msgIdx: number[] = [] 3324 const metaRanges: number[] = [] 3325 const uuidToSlot = new Map<string, number>() 3326 3327 let pos = 0 3328 const len = buf.length 3329 while (pos < len) { 3330 const nl = buf.indexOf(NEWLINE, pos) 3331 const lineEnd = nl === -1 ? len : nl + 1 3332 if ( 3333 lineEnd - pos > PREFIX_LEN && 3334 buf[pos] === OPEN_BRACE && 3335 buf.compare(PARENT_PREFIX, 0, PREFIX_LEN, pos, pos + PREFIX_LEN) === 0 3336 ) { 3337 // `{"parentUuid":null,` or `{"parentUuid":"<36 chars>",` 3338 const parentStart = 3339 buf[pos + PREFIX_LEN] === QUOTE ? pos + PREFIX_LEN + 1 : -1 3340 // The top-level uuid is immediately followed by `","timestamp":"` in 3341 // user/assistant/attachment entries (the create* helpers put them 3342 // adjacent; both always defined). But the suffix is NOT unique: 3343 // - agent_progress entries carry a nested Message in data.message, 3344 // serialized BEFORE top-level uuid — that inner Message has its 3345 // own uuid,timestamp adjacent, so its bytes also satisfy the 3346 // suffix check. 3347 // - mcpMeta/toolUseResult come AFTER top-level uuid and hold 3348 // server-controlled Record<string,unknown> — a server returning 3349 // {uuid:"<36>",timestamp:"..."} would also match. 3350 // Collect all suffix matches; a single one is unambiguous (common 3351 // case), multiple need a brace-depth check to pick the one at 3352 // JSON nesting depth 1. Entries with NO suffix match (some progress 3353 // variants put timestamp BEFORE uuid → `"uuid":"<36>"}` at EOL) 3354 // have only one `"uuid":"` and the first-match fallback is sound. 3355 let firstAny = -1 3356 let suffix0 = -1 3357 let suffixN: number[] | undefined 3358 let from = pos 3359 for (;;) { 3360 const next = buf.indexOf(UUID_KEY, from) 3361 if (next < 0 || next >= lineEnd) break 3362 if (firstAny < 0) firstAny = next 3363 const after = next + KEY_LEN + UUID_LEN 3364 if ( 3365 after + TS_SUFFIX_LEN <= lineEnd && 3366 buf.compare( 3367 TS_SUFFIX, 3368 0, 3369 TS_SUFFIX_LEN, 3370 after, 3371 after + TS_SUFFIX_LEN, 3372 ) === 0 3373 ) { 3374 if (suffix0 < 0) suffix0 = next 3375 else (suffixN ??= [suffix0]).push(next) 3376 } 3377 from = next + KEY_LEN 3378 } 3379 const uk = suffixN 3380 ? pickDepthOneUuidCandidate(buf, pos, suffixN) 3381 : suffix0 >= 0 3382 ? suffix0 3383 : firstAny 3384 if (uk >= 0) { 3385 const uuidStart = uk + KEY_LEN 3386 // UUIDs are pure ASCII so latin1 avoids UTF-8 decode overhead. 3387 const uuid = buf.toString('latin1', uuidStart, uuidStart + UUID_LEN) 3388 uuidToSlot.set(uuid, msgIdx.length) 3389 msgIdx.push(pos, lineEnd, parentStart) 3390 } else { 3391 metaRanges.push(pos, lineEnd) 3392 } 3393 } else { 3394 metaRanges.push(pos, lineEnd) 3395 } 3396 pos = lineEnd 3397 } 3398 3399 // Leaf = last non-sidechain entry. isSidechain is the 2nd or 3rd key 3400 // (after parentUuid, maybe logicalParentUuid) so indexOf from lineStart 3401 // finds it within a few dozen bytes when present; when absent it spills 3402 // into the next line, caught by the bounds check. 3403 let leafSlot = -1 3404 for (let i = msgIdx.length - 3; i >= 0; i -= 3) { 3405 const sc = buf.indexOf(SIDECHAIN_TRUE, msgIdx[i]!) 3406 if (sc === -1 || sc >= msgIdx[i + 1]!) { 3407 leafSlot = i 3408 break 3409 } 3410 } 3411 if (leafSlot < 0) return buf 3412 3413 // Walk parentUuid to root. Collect kept-message line starts and sum their 3414 // byte lengths so we can decide whether the concat is worth it. A dangling 3415 // parent (uuid not in file) is the normal termination for forked sessions 3416 // and post-boundary chains -- same semantics as buildConversationChain. 3417 // Correctness against index poisoning rests on the timestamp suffix check 3418 // above: a nested `"uuid":"` match without the suffix never becomes uk. 3419 const seen = new Set<number>() 3420 const chain = new Set<number>() 3421 let chainBytes = 0 3422 let slot: number | undefined = leafSlot 3423 while (slot !== undefined) { 3424 if (seen.has(slot)) break 3425 seen.add(slot) 3426 chain.add(msgIdx[slot]!) 3427 chainBytes += msgIdx[slot + 1]! - msgIdx[slot]! 3428 const parentStart = msgIdx[slot + 2]! 3429 if (parentStart < 0) break 3430 const parent = buf.toString('latin1', parentStart, parentStart + UUID_LEN) 3431 slot = uuidToSlot.get(parent) 3432 } 3433 3434 // parseJSONL cost scales with bytes, not entry count. A session can have 3435 // thousands of dead entries by count but only single-digit-% of bytes if 3436 // the dead branches are short turns and the live chain holds the fat 3437 // assistant responses (measured: 107 MB session, 69% dead entries, 30% 3438 // dead bytes - index+concat overhead exceeded parse savings). Gate on 3439 // bytes: only stitch if we would drop at least half the buffer. Metadata 3440 // is tiny so len - chainBytes approximates dead bytes closely enough. 3441 // Near break-even the concat memcpy (copying chainBytes into a fresh 3442 // allocation) dominates, so a conservative 50% gate stays safely on the 3443 // winning side. 3444 if (len - chainBytes < len >> 1) return buf 3445 3446 // Merge chain entries with metadata in original file order. Both msgIdx and 3447 // metaRanges are already sorted by offset; interleave them into subarray 3448 // views and concat once. 3449 const parts: Buffer[] = [] 3450 let m = 0 3451 for (let i = 0; i < msgIdx.length; i += 3) { 3452 const start = msgIdx[i]! 3453 while (m < metaRanges.length && metaRanges[m]! < start) { 3454 parts.push(buf.subarray(metaRanges[m]!, metaRanges[m + 1]!)) 3455 m += 2 3456 } 3457 if (chain.has(start)) { 3458 parts.push(buf.subarray(start, msgIdx[i + 1]!)) 3459 } 3460 } 3461 while (m < metaRanges.length) { 3462 parts.push(buf.subarray(metaRanges[m]!, metaRanges[m + 1]!)) 3463 m += 2 3464 } 3465 return Buffer.concat(parts) 3466} 3467 3468/** 3469 * Loads all messages, summaries, and file history snapshots from a transcript file. 3470 * Returns the messages, summaries, custom titles, tags, file history snapshots, and attribution snapshots. 3471 */ 3472export async function loadTranscriptFile( 3473 filePath: string, 3474 opts?: { keepAllLeaves?: boolean }, 3475): Promise<{ 3476 messages: Map<UUID, TranscriptMessage> 3477 summaries: Map<UUID, string> 3478 customTitles: Map<UUID, string> 3479 tags: Map<UUID, string> 3480 agentNames: Map<UUID, string> 3481 agentColors: Map<UUID, string> 3482 agentSettings: Map<UUID, string> 3483 prNumbers: Map<UUID, number> 3484 prUrls: Map<UUID, string> 3485 prRepositories: Map<UUID, string> 3486 modes: Map<UUID, string> 3487 worktreeStates: Map<UUID, PersistedWorktreeSession | null> 3488 fileHistorySnapshots: Map<UUID, FileHistorySnapshotMessage> 3489 attributionSnapshots: Map<UUID, AttributionSnapshotMessage> 3490 contentReplacements: Map<UUID, ContentReplacementRecord[]> 3491 agentContentReplacements: Map<AgentId, ContentReplacementRecord[]> 3492 contextCollapseCommits: ContextCollapseCommitEntry[] 3493 contextCollapseSnapshot: ContextCollapseSnapshotEntry | undefined 3494 leafUuids: Set<UUID> 3495}> { 3496 const messages = new Map<UUID, TranscriptMessage>() 3497 const summaries = new Map<UUID, string>() 3498 const customTitles = new Map<UUID, string>() 3499 const tags = new Map<UUID, string>() 3500 const agentNames = new Map<UUID, string>() 3501 const agentColors = new Map<UUID, string>() 3502 const agentSettings = new Map<UUID, string>() 3503 const prNumbers = new Map<UUID, number>() 3504 const prUrls = new Map<UUID, string>() 3505 const prRepositories = new Map<UUID, string>() 3506 const modes = new Map<UUID, string>() 3507 const worktreeStates = new Map<UUID, PersistedWorktreeSession | null>() 3508 const fileHistorySnapshots = new Map<UUID, FileHistorySnapshotMessage>() 3509 const attributionSnapshots = new Map<UUID, AttributionSnapshotMessage>() 3510 const contentReplacements = new Map<UUID, ContentReplacementRecord[]>() 3511 const agentContentReplacements = new Map< 3512 AgentId, 3513 ContentReplacementRecord[] 3514 >() 3515 // Array, not Map — commit order matters (nested collapses). 3516 const contextCollapseCommits: ContextCollapseCommitEntry[] = [] 3517 // Last-wins — later entries supersede. 3518 let contextCollapseSnapshot: ContextCollapseSnapshotEntry | undefined 3519 3520 try { 3521 // For large transcripts, avoid materializing megabytes of stale content. 3522 // Single forward chunked read: attribution-snapshot lines are skipped at 3523 // the fd level (never buffered), compact boundaries truncate the 3524 // accumulator in-stream. Peak allocation is the OUTPUT size, not the 3525 // file size — a 151 MB session that is 84% stale attr-snaps allocates 3526 // ~32 MB instead of 159+64 MB. This matters because mimalloc does not 3527 // return those pages to the OS even after JS-level GC frees the backing 3528 // buffers (measured: arrayBuffers=0 after Bun.gc(true) but RSS stuck at 3529 // ~316 MB on the old scan+strip path vs ~155 MB here). 3530 // 3531 // Pre-boundary metadata (agent-setting, mode, pr-link, etc.) is recovered 3532 // via a cheap byte-level forward scan of [0, boundary). 3533 let buf: Buffer | null = null 3534 let metadataLines: string[] | null = null 3535 let hasPreservedSegment = false 3536 if (!isEnvTruthy(process.env.CLAUDE_CODE_DISABLE_PRECOMPACT_SKIP)) { 3537 const { size } = await stat(filePath) 3538 if (size > SKIP_PRECOMPACT_THRESHOLD) { 3539 const scan = await readTranscriptForLoad(filePath, size) 3540 buf = scan.postBoundaryBuf 3541 hasPreservedSegment = scan.hasPreservedSegment 3542 // >0 means we truncated pre-boundary bytes and must recover 3543 // session-scoped metadata from that range. A preservedSegment 3544 // boundary does not truncate (preserved messages are physically 3545 // pre-boundary), so offset stays 0 unless an EARLIER non-preserved 3546 // boundary already truncated — in which case the preserved messages 3547 // for the later boundary are post-that-earlier-boundary and were 3548 // kept, and we still want the metadata scan. 3549 if (scan.boundaryStartOffset > 0) { 3550 metadataLines = await scanPreBoundaryMetadata( 3551 filePath, 3552 scan.boundaryStartOffset, 3553 ) 3554 } 3555 } 3556 } 3557 buf ??= await readFile(filePath) 3558 // For large buffers (which here means readTranscriptForLoad output with 3559 // attr-snaps already stripped at the fd level — the <5MB readFile path 3560 // falls through the size gate below), the dominant cost is parsing dead 3561 // fork branches that buildConversationChain would discard anyway. Skip 3562 // when the caller needs all 3563 // leaves (loadAllLogsFromSessionFile for /insights picks the branch with 3564 // most user messages, not the latest), when the boundary has a 3565 // preservedSegment (those messages keep their pre-compact parentUuid on 3566 // disk -- applyPreservedSegmentRelinks splices them in-memory AFTER 3567 // parse, so a pre-parse chain walk would drop them as orphans), and when 3568 // CLAUDE_CODE_DISABLE_PRECOMPACT_SKIP is set (that kill switch means 3569 // "load everything, skip nothing"; this is another skip-before-parse 3570 // optimization and the scan it depends on for hasPreservedSegment did 3571 // not run). 3572 if ( 3573 !opts?.keepAllLeaves && 3574 !hasPreservedSegment && 3575 !isEnvTruthy(process.env.CLAUDE_CODE_DISABLE_PRECOMPACT_SKIP) && 3576 buf.length > SKIP_PRECOMPACT_THRESHOLD 3577 ) { 3578 buf = walkChainBeforeParse(buf) 3579 } 3580 3581 // First pass: process metadata-only lines collected during the boundary scan. 3582 // These populate the session-scoped maps (agentSettings, modes, prNumbers, 3583 // etc.) for entries written before the compact boundary. Any overlap with 3584 // the post-boundary buffer is harmless — later values overwrite earlier ones. 3585 if (metadataLines && metadataLines.length > 0) { 3586 const metaEntries = parseJSONL<Entry>( 3587 Buffer.from(metadataLines.join('\n')), 3588 ) 3589 for (const entry of metaEntries) { 3590 if (entry.type === 'summary' && entry.leafUuid) { 3591 summaries.set(entry.leafUuid, entry.summary) 3592 } else if (entry.type === 'custom-title' && entry.sessionId) { 3593 customTitles.set(entry.sessionId, entry.customTitle) 3594 } else if (entry.type === 'tag' && entry.sessionId) { 3595 tags.set(entry.sessionId, entry.tag) 3596 } else if (entry.type === 'agent-name' && entry.sessionId) { 3597 agentNames.set(entry.sessionId, entry.agentName) 3598 } else if (entry.type === 'agent-color' && entry.sessionId) { 3599 agentColors.set(entry.sessionId, entry.agentColor) 3600 } else if (entry.type === 'agent-setting' && entry.sessionId) { 3601 agentSettings.set(entry.sessionId, entry.agentSetting) 3602 } else if (entry.type === 'mode' && entry.sessionId) { 3603 modes.set(entry.sessionId, entry.mode) 3604 } else if (entry.type === 'worktree-state' && entry.sessionId) { 3605 worktreeStates.set(entry.sessionId, entry.worktreeSession) 3606 } else if (entry.type === 'pr-link' && entry.sessionId) { 3607 prNumbers.set(entry.sessionId, entry.prNumber) 3608 prUrls.set(entry.sessionId, entry.prUrl) 3609 prRepositories.set(entry.sessionId, entry.prRepository) 3610 } 3611 } 3612 } 3613 3614 const entries = parseJSONL<Entry>(buf) 3615 3616 // Bridge map for legacy progress entries: progress_uuid → progress_parent_uuid. 3617 // PR #24099 removed progress from isTranscriptMessage, so old transcripts with 3618 // progress in the parentUuid chain would truncate at buildConversationChain 3619 // when messages.get(progressUuid) returns undefined. Since transcripts are 3620 // append-only (parents before children), we record each progress→parent link 3621 // as we see it, chain-resolving through consecutive progress entries, then 3622 // rewrite any subsequent message whose parentUuid lands in the bridge. 3623 const progressBridge = new Map<UUID, UUID | null>() 3624 3625 for (const entry of entries) { 3626 // Legacy progress check runs before the Entry-typed else-if chain — 3627 // progress is not in the Entry union, so checking it after TypeScript 3628 // has narrowed `entry` intersects to `never`. 3629 if (isLegacyProgressEntry(entry)) { 3630 // Chain-resolve through consecutive progress entries so a later 3631 // message pointing at the tail of a progress run bridges to the 3632 // nearest non-progress ancestor in one lookup. 3633 const parent = entry.parentUuid 3634 progressBridge.set( 3635 entry.uuid, 3636 parent && progressBridge.has(parent) 3637 ? (progressBridge.get(parent) ?? null) 3638 : parent, 3639 ) 3640 continue 3641 } 3642 if (isTranscriptMessage(entry)) { 3643 if (entry.parentUuid && progressBridge.has(entry.parentUuid)) { 3644 entry.parentUuid = progressBridge.get(entry.parentUuid) ?? null 3645 } 3646 messages.set(entry.uuid, entry) 3647 // Compact boundary: prior marble-origami-commit entries reference 3648 // messages that won't be in the post-boundary chain. The >5MB 3649 // backward-scan path discards them naturally by never reading the 3650 // pre-boundary bytes; the <5MB path reads everything, so discard 3651 // here. Without this, getStats().collapsedSpans in /context 3652 // overcounts (projectView silently skips the stale commits but 3653 // they're still in the log). 3654 if (isCompactBoundaryMessage(entry)) { 3655 contextCollapseCommits.length = 0 3656 contextCollapseSnapshot = undefined 3657 } 3658 } else if (entry.type === 'summary' && entry.leafUuid) { 3659 summaries.set(entry.leafUuid, entry.summary) 3660 } else if (entry.type === 'custom-title' && entry.sessionId) { 3661 customTitles.set(entry.sessionId, entry.customTitle) 3662 } else if (entry.type === 'tag' && entry.sessionId) { 3663 tags.set(entry.sessionId, entry.tag) 3664 } else if (entry.type === 'agent-name' && entry.sessionId) { 3665 agentNames.set(entry.sessionId, entry.agentName) 3666 } else if (entry.type === 'agent-color' && entry.sessionId) { 3667 agentColors.set(entry.sessionId, entry.agentColor) 3668 } else if (entry.type === 'agent-setting' && entry.sessionId) { 3669 agentSettings.set(entry.sessionId, entry.agentSetting) 3670 } else if (entry.type === 'mode' && entry.sessionId) { 3671 modes.set(entry.sessionId, entry.mode) 3672 } else if (entry.type === 'worktree-state' && entry.sessionId) { 3673 worktreeStates.set(entry.sessionId, entry.worktreeSession) 3674 } else if (entry.type === 'pr-link' && entry.sessionId) { 3675 prNumbers.set(entry.sessionId, entry.prNumber) 3676 prUrls.set(entry.sessionId, entry.prUrl) 3677 prRepositories.set(entry.sessionId, entry.prRepository) 3678 } else if (entry.type === 'file-history-snapshot') { 3679 fileHistorySnapshots.set(entry.messageId, entry) 3680 } else if (entry.type === 'attribution-snapshot') { 3681 attributionSnapshots.set(entry.messageId, entry) 3682 } else if (entry.type === 'content-replacement') { 3683 // Subagent decisions key by agentId (sidechain resume); main-thread 3684 // decisions key by sessionId (/resume). 3685 if (entry.agentId) { 3686 const existing = agentContentReplacements.get(entry.agentId) ?? [] 3687 agentContentReplacements.set(entry.agentId, existing) 3688 existing.push(...entry.replacements) 3689 } else { 3690 const existing = contentReplacements.get(entry.sessionId) ?? [] 3691 contentReplacements.set(entry.sessionId, existing) 3692 existing.push(...entry.replacements) 3693 } 3694 } else if (entry.type === 'marble-origami-commit') { 3695 contextCollapseCommits.push(entry) 3696 } else if (entry.type === 'marble-origami-snapshot') { 3697 contextCollapseSnapshot = entry 3698 } 3699 } 3700 } catch { 3701 // File doesn't exist or can't be read 3702 } 3703 3704 applyPreservedSegmentRelinks(messages) 3705 applySnipRemovals(messages) 3706 3707 // Compute leaf UUIDs once at load time 3708 // Only user/assistant messages should be considered as leaves for anchoring resume. 3709 // Other message types (system, attachment) are metadata or auxiliary and shouldn't 3710 // anchor a conversation chain. 3711 // 3712 // We use standard parent relationship for main chain detection, but also need to 3713 // handle cases where the last message is a system/metadata message. 3714 // For each conversation chain (identified by following parent links), the leaf 3715 // is the most recent user/assistant message. 3716 const allMessages = [...messages.values()] 3717 3718 // Standard leaf computation using parent relationships 3719 const parentUuids = new Set( 3720 allMessages 3721 .map(msg => msg.parentUuid) 3722 .filter((uuid): uuid is UUID => uuid !== null), 3723 ) 3724 3725 // Find all terminal messages (messages with no children) 3726 const terminalMessages = allMessages.filter(msg => !parentUuids.has(msg.uuid)) 3727 3728 const leafUuids = new Set<UUID>() 3729 let hasCycle = false 3730 3731 if (getFeatureValue_CACHED_MAY_BE_STALE('tengu_pebble_leaf_prune', false)) { 3732 // Build a set of UUIDs that have user/assistant children 3733 // (these are mid-conversation nodes, not dead ends) 3734 const hasUserAssistantChild = new Set<UUID>() 3735 for (const msg of allMessages) { 3736 if (msg.parentUuid && (msg.type === 'user' || msg.type === 'assistant')) { 3737 hasUserAssistantChild.add(msg.parentUuid) 3738 } 3739 } 3740 3741 // For each terminal message, walk back to find the nearest user/assistant ancestor. 3742 // Skip ancestors that already have user/assistant children - those are mid-conversation 3743 // nodes where the conversation continued (e.g., an assistant tool_use message whose 3744 // progress child is terminal, but whose tool_result child continues the conversation). 3745 for (const terminal of terminalMessages) { 3746 const seen = new Set<UUID>() 3747 let current: TranscriptMessage | undefined = terminal 3748 while (current) { 3749 if (seen.has(current.uuid)) { 3750 hasCycle = true 3751 break 3752 } 3753 seen.add(current.uuid) 3754 if (current.type === 'user' || current.type === 'assistant') { 3755 if (!hasUserAssistantChild.has(current.uuid)) { 3756 leafUuids.add(current.uuid) 3757 } 3758 break 3759 } 3760 current = current.parentUuid 3761 ? messages.get(current.parentUuid) 3762 : undefined 3763 } 3764 } 3765 } else { 3766 // Original leaf computation: walk back from terminal messages to find 3767 // the nearest user/assistant ancestor unconditionally 3768 for (const terminal of terminalMessages) { 3769 const seen = new Set<UUID>() 3770 let current: TranscriptMessage | undefined = terminal 3771 while (current) { 3772 if (seen.has(current.uuid)) { 3773 hasCycle = true 3774 break 3775 } 3776 seen.add(current.uuid) 3777 if (current.type === 'user' || current.type === 'assistant') { 3778 leafUuids.add(current.uuid) 3779 break 3780 } 3781 current = current.parentUuid 3782 ? messages.get(current.parentUuid) 3783 : undefined 3784 } 3785 } 3786 } 3787 3788 if (hasCycle) { 3789 logEvent('tengu_transcript_parent_cycle', {}) 3790 } 3791 3792 return { 3793 messages, 3794 summaries, 3795 customTitles, 3796 tags, 3797 agentNames, 3798 agentColors, 3799 agentSettings, 3800 prNumbers, 3801 prUrls, 3802 prRepositories, 3803 modes, 3804 worktreeStates, 3805 fileHistorySnapshots, 3806 attributionSnapshots, 3807 contentReplacements, 3808 agentContentReplacements, 3809 contextCollapseCommits, 3810 contextCollapseSnapshot, 3811 leafUuids, 3812 } 3813} 3814 3815/** 3816 * Loads all messages, summaries, file history snapshots, and attribution snapshots from a specific session file. 3817 */ 3818async function loadSessionFile(sessionId: UUID): Promise<{ 3819 messages: Map<UUID, TranscriptMessage> 3820 summaries: Map<UUID, string> 3821 customTitles: Map<UUID, string> 3822 tags: Map<UUID, string> 3823 agentSettings: Map<UUID, string> 3824 worktreeStates: Map<UUID, PersistedWorktreeSession | null> 3825 fileHistorySnapshots: Map<UUID, FileHistorySnapshotMessage> 3826 attributionSnapshots: Map<UUID, AttributionSnapshotMessage> 3827 contentReplacements: Map<UUID, ContentReplacementRecord[]> 3828 contextCollapseCommits: ContextCollapseCommitEntry[] 3829 contextCollapseSnapshot: ContextCollapseSnapshotEntry | undefined 3830}> { 3831 const sessionFile = join( 3832 getSessionProjectDir() ?? getProjectDir(getOriginalCwd()), 3833 `${sessionId}.jsonl`, 3834 ) 3835 return loadTranscriptFile(sessionFile) 3836} 3837 3838/** 3839 * Gets message UUIDs for a specific session without loading all sessions. 3840 * Memoized to avoid re-reading the same session file multiple times. 3841 */ 3842const getSessionMessages = memoize( 3843 async (sessionId: UUID): Promise<Set<UUID>> => { 3844 const { messages } = await loadSessionFile(sessionId) 3845 return new Set(messages.keys()) 3846 }, 3847 (sessionId: UUID) => sessionId, 3848) 3849 3850/** 3851 * Clear the memoized session messages cache. 3852 * Call after compaction when old message UUIDs are no longer valid. 3853 */ 3854export function clearSessionMessagesCache(): void { 3855 getSessionMessages.cache.clear?.() 3856} 3857 3858/** 3859 * Check if a message UUID exists in the session storage 3860 */ 3861export async function doesMessageExistInSession( 3862 sessionId: UUID, 3863 messageUuid: UUID, 3864): Promise<boolean> { 3865 const messageSet = await getSessionMessages(sessionId) 3866 return messageSet.has(messageUuid) 3867} 3868 3869export async function getLastSessionLog( 3870 sessionId: UUID, 3871): Promise<LogOption | null> { 3872 // Single read: load all session data at once instead of reading the file twice 3873 const { 3874 messages, 3875 summaries, 3876 customTitles, 3877 tags, 3878 agentSettings, 3879 worktreeStates, 3880 fileHistorySnapshots, 3881 attributionSnapshots, 3882 contentReplacements, 3883 contextCollapseCommits, 3884 contextCollapseSnapshot, 3885 } = await loadSessionFile(sessionId) 3886 if (messages.size === 0) return null 3887 // Prime getSessionMessages cache so recordTranscript (called after REPL 3888 // mount on --resume) skips a second full file load. -170~227ms on large sessions. 3889 // Guard: only prime if cache is empty. Mid-session callers (e.g. IssueFeedback) 3890 // may call getLastSessionLog on the current session — overwriting a live cache 3891 // with a stale disk snapshot would lose unflushed UUIDs and break dedup. 3892 if (!getSessionMessages.cache.has(sessionId)) { 3893 getSessionMessages.cache.set( 3894 sessionId, 3895 Promise.resolve(new Set(messages.keys())), 3896 ) 3897 } 3898 3899 // Find the most recent non-sidechain message 3900 const lastMessage = findLatestMessage(messages.values(), m => !m.isSidechain) 3901 if (!lastMessage) return null 3902 3903 // Build the transcript chain from the last message 3904 const transcript = buildConversationChain(messages, lastMessage) 3905 3906 const summary = summaries.get(lastMessage.uuid) 3907 const customTitle = customTitles.get(lastMessage.sessionId as UUID) 3908 const tag = tags.get(lastMessage.sessionId as UUID) 3909 const agentSetting = agentSettings.get(sessionId) 3910 return { 3911 ...convertToLogOption( 3912 transcript, 3913 0, 3914 summary, 3915 customTitle, 3916 buildFileHistorySnapshotChain(fileHistorySnapshots, transcript), 3917 tag, 3918 getTranscriptPathForSession(sessionId), 3919 buildAttributionSnapshotChain(attributionSnapshots, transcript), 3920 agentSetting, 3921 contentReplacements.get(sessionId) ?? [], 3922 ), 3923 worktreeSession: worktreeStates.get(sessionId), 3924 contextCollapseCommits: contextCollapseCommits.filter( 3925 e => e.sessionId === sessionId, 3926 ), 3927 contextCollapseSnapshot: 3928 contextCollapseSnapshot?.sessionId === sessionId 3929 ? contextCollapseSnapshot 3930 : undefined, 3931 } 3932} 3933 3934/** 3935 * Loads the list of message logs 3936 * @param limit Optional limit on number of session files to load 3937 * @returns List of message logs sorted by date 3938 */ 3939export async function loadMessageLogs(limit?: number): Promise<LogOption[]> { 3940 const sessionLogs = await fetchLogs(limit) 3941 // fetchLogs returns lite (stat-only) logs — enrich them to get metadata. 3942 // enrichLogs already filters out sidechains, empty sessions, etc. 3943 const { logs: enriched } = await enrichLogs( 3944 sessionLogs, 3945 0, 3946 sessionLogs.length, 3947 ) 3948 3949 // enrichLogs returns fresh unshared objects — mutate in place to avoid 3950 // re-spreading every 30-field LogOption just to renumber the index. 3951 const sorted = sortLogs(enriched) 3952 sorted.forEach((log, i) => { 3953 log.value = i 3954 }) 3955 return sorted 3956} 3957 3958/** 3959 * Loads message logs from all project directories. 3960 * @param limit Optional limit on number of session files to load per project (used when no index exists) 3961 * @returns List of message logs sorted by date 3962 */ 3963export async function loadAllProjectsMessageLogs( 3964 limit?: number, 3965 options?: { skipIndex?: boolean; initialEnrichCount?: number }, 3966): Promise<LogOption[]> { 3967 if (options?.skipIndex) { 3968 // Load all sessions with full message data (e.g. for /insights analysis) 3969 return loadAllProjectsMessageLogsFull(limit) 3970 } 3971 const result = await loadAllProjectsMessageLogsProgressive( 3972 limit, 3973 options?.initialEnrichCount ?? INITIAL_ENRICH_COUNT, 3974 ) 3975 return result.logs 3976} 3977 3978async function loadAllProjectsMessageLogsFull( 3979 limit?: number, 3980): Promise<LogOption[]> { 3981 const projectsDir = getProjectsDir() 3982 3983 let dirents: Dirent[] 3984 try { 3985 dirents = await readdir(projectsDir, { withFileTypes: true }) 3986 } catch { 3987 return [] 3988 } 3989 3990 const projectDirs = dirents 3991 .filter(dirent => dirent.isDirectory()) 3992 .map(dirent => join(projectsDir, dirent.name)) 3993 3994 const logsPerProject = await Promise.all( 3995 projectDirs.map(projectDir => getLogsWithoutIndex(projectDir, limit)), 3996 ) 3997 const allLogs = logsPerProject.flat() 3998 3999 // Deduplicate — same session+leaf can appear in multiple project dirs. 4000 // This path creates one LogOption per leaf, so use sessionId+leafUuid key. 4001 const deduped = new Map<string, LogOption>() 4002 for (const log of allLogs) { 4003 const key = `${log.sessionId ?? ''}:${log.leafUuid ?? ''}` 4004 const existing = deduped.get(key) 4005 if (!existing || log.modified.getTime() > existing.modified.getTime()) { 4006 deduped.set(key, log) 4007 } 4008 } 4009 4010 // deduped values are fresh from getLogsWithoutIndex — safe to mutate 4011 const sorted = sortLogs([...deduped.values()]) 4012 sorted.forEach((log, i) => { 4013 log.value = i 4014 }) 4015 return sorted 4016} 4017 4018export async function loadAllProjectsMessageLogsProgressive( 4019 limit?: number, 4020 initialEnrichCount: number = INITIAL_ENRICH_COUNT, 4021): Promise<SessionLogResult> { 4022 const projectsDir = getProjectsDir() 4023 4024 let dirents: Dirent[] 4025 try { 4026 dirents = await readdir(projectsDir, { withFileTypes: true }) 4027 } catch { 4028 return { logs: [], allStatLogs: [], nextIndex: 0 } 4029 } 4030 4031 const projectDirs = dirents 4032 .filter(dirent => dirent.isDirectory()) 4033 .map(dirent => join(projectsDir, dirent.name)) 4034 4035 const rawLogs: LogOption[] = [] 4036 for (const projectDir of projectDirs) { 4037 rawLogs.push(...(await getSessionFilesLite(projectDir, limit))) 4038 } 4039 // Deduplicate — same session can appear in multiple project dirs 4040 const sorted = deduplicateLogsBySessionId(rawLogs) 4041 4042 const { logs, nextIndex } = await enrichLogs(sorted, 0, initialEnrichCount) 4043 4044 // enrichLogs returns fresh unshared objects — safe to mutate in place 4045 logs.forEach((log, i) => { 4046 log.value = i 4047 }) 4048 return { logs, allStatLogs: sorted, nextIndex } 4049} 4050 4051/** 4052 * Loads message logs from all worktrees of the same git repository. 4053 * Falls back to loadMessageLogs if no worktrees provided. 4054 * 4055 * Uses pure filesystem metadata for fast loading. 4056 * 4057 * @param worktreePaths Array of worktree paths (from getWorktreePaths) 4058 * @param limit Optional limit on number of session files to load per project 4059 * @returns List of message logs sorted by date 4060 */ 4061/** 4062 * Result of loading session logs with progressive enrichment support. 4063 */ 4064export type SessionLogResult = { 4065 /** Enriched logs ready for display */ 4066 logs: LogOption[] 4067 /** Full stat-only list for progressive loading (call enrichLogs to get more) */ 4068 allStatLogs: LogOption[] 4069 /** Index into allStatLogs where progressive loading should continue from */ 4070 nextIndex: number 4071} 4072 4073export async function loadSameRepoMessageLogs( 4074 worktreePaths: string[], 4075 limit?: number, 4076 initialEnrichCount: number = INITIAL_ENRICH_COUNT, 4077): Promise<LogOption[]> { 4078 const result = await loadSameRepoMessageLogsProgressive( 4079 worktreePaths, 4080 limit, 4081 initialEnrichCount, 4082 ) 4083 return result.logs 4084} 4085 4086export async function loadSameRepoMessageLogsProgressive( 4087 worktreePaths: string[], 4088 limit?: number, 4089 initialEnrichCount: number = INITIAL_ENRICH_COUNT, 4090): Promise<SessionLogResult> { 4091 logForDebugging( 4092 `/resume: loading sessions for cwd=${getOriginalCwd()}, worktrees=[${worktreePaths.join(', ')}]`, 4093 ) 4094 const allStatLogs = await getStatOnlyLogsForWorktrees(worktreePaths, limit) 4095 logForDebugging(`/resume: found ${allStatLogs.length} session files on disk`) 4096 4097 const { logs, nextIndex } = await enrichLogs( 4098 allStatLogs, 4099 0, 4100 initialEnrichCount, 4101 ) 4102 4103 // enrichLogs returns fresh unshared objects — safe to mutate in place 4104 logs.forEach((log, i) => { 4105 log.value = i 4106 }) 4107 return { logs, allStatLogs, nextIndex } 4108} 4109 4110/** 4111 * Gets stat-only logs for worktree paths (no file reads). 4112 */ 4113async function getStatOnlyLogsForWorktrees( 4114 worktreePaths: string[], 4115 limit?: number, 4116): Promise<LogOption[]> { 4117 const projectsDir = getProjectsDir() 4118 4119 if (worktreePaths.length <= 1) { 4120 const cwd = getOriginalCwd() 4121 const projectDir = getProjectDir(cwd) 4122 return getSessionFilesLite(projectDir, undefined, cwd) 4123 } 4124 4125 // On Windows, drive letter case can differ between git worktree list 4126 // output (e.g. C:/Users/...) and how paths were stored in project 4127 // directories (e.g. c:/Users/...). Use case-insensitive comparison. 4128 const caseInsensitive = process.platform === 'win32' 4129 4130 // Sort worktree paths by sanitized prefix length (longest first) so 4131 // more specific matches take priority over shorter ones. Without this, 4132 // a short prefix like -code-myrepo could match -code-myrepo-worktree1 4133 // before the longer, more specific prefix gets a chance. 4134 const indexed = worktreePaths.map(wt => { 4135 const sanitized = sanitizePath(wt) 4136 return { 4137 path: wt, 4138 prefix: caseInsensitive ? sanitized.toLowerCase() : sanitized, 4139 } 4140 }) 4141 indexed.sort((a, b) => b.prefix.length - a.prefix.length) 4142 4143 const allLogs: LogOption[] = [] 4144 const seenDirs = new Set<string>() 4145 4146 let allDirents: Dirent[] 4147 try { 4148 allDirents = await readdir(projectsDir, { withFileTypes: true }) 4149 } catch (e) { 4150 // Fall back to current project 4151 logForDebugging( 4152 `Failed to read projects dir ${projectsDir}, falling back to current project: ${e}`, 4153 ) 4154 const projectDir = getProjectDir(getOriginalCwd()) 4155 return getSessionFilesLite(projectDir, limit, getOriginalCwd()) 4156 } 4157 4158 for (const dirent of allDirents) { 4159 if (!dirent.isDirectory()) continue 4160 const dirName = caseInsensitive ? dirent.name.toLowerCase() : dirent.name 4161 if (seenDirs.has(dirName)) continue 4162 4163 for (const { path: wtPath, prefix } of indexed) { 4164 if (dirName === prefix || dirName.startsWith(prefix + '-')) { 4165 seenDirs.add(dirName) 4166 allLogs.push( 4167 ...(await getSessionFilesLite( 4168 join(projectsDir, dirent.name), 4169 undefined, 4170 wtPath, 4171 )), 4172 ) 4173 break 4174 } 4175 } 4176 } 4177 4178 // Deduplicate by sessionId — the same session can appear in multiple 4179 // worktree project dirs. Keep the entry with the newest modified time. 4180 return deduplicateLogsBySessionId(allLogs) 4181} 4182 4183/** 4184 * Retrieves the transcript for a specific agent by agentId. 4185 * Directly loads the agent-specific transcript file. 4186 * @param agentId The agent ID to search for 4187 * @returns The conversation chain and budget replacement records for the agent, 4188 * or null if not found 4189 */ 4190export async function getAgentTranscript(agentId: AgentId): Promise<{ 4191 messages: Message[] 4192 contentReplacements: ContentReplacementRecord[] 4193} | null> { 4194 const agentFile = getAgentTranscriptPath(agentId) 4195 4196 try { 4197 const { messages, agentContentReplacements } = 4198 await loadTranscriptFile(agentFile) 4199 4200 // Find messages with matching agentId 4201 const agentMessages = Array.from(messages.values()).filter( 4202 msg => msg.agentId === agentId && msg.isSidechain, 4203 ) 4204 4205 if (agentMessages.length === 0) { 4206 return null 4207 } 4208 4209 // Find the most recent leaf message with this agentId 4210 const parentUuids = new Set(agentMessages.map(msg => msg.parentUuid)) 4211 const leafMessage = findLatestMessage( 4212 agentMessages, 4213 msg => !parentUuids.has(msg.uuid), 4214 ) 4215 4216 if (!leafMessage) { 4217 return null 4218 } 4219 4220 // Build the conversation chain 4221 const transcript = buildConversationChain(messages, leafMessage) 4222 4223 // Filter to only include messages with this agentId 4224 const agentTranscript = transcript.filter(msg => msg.agentId === agentId) 4225 4226 return { 4227 // Convert TranscriptMessage[] to Message[] 4228 messages: agentTranscript.map( 4229 ({ isSidechain, parentUuid, ...msg }) => msg, 4230 ), 4231 contentReplacements: agentContentReplacements.get(agentId) ?? [], 4232 } 4233 } catch { 4234 return null 4235 } 4236} 4237 4238/** 4239 * Extract agent IDs from progress messages in the conversation. 4240 * Agent/skill progress messages have type 'progress' with data.type 4241 * 'agent_progress' or 'skill_progress' and data.agentId. 4242 * This captures sync agents that emit progress messages during execution. 4243 */ 4244export function extractAgentIdsFromMessages(messages: Message[]): string[] { 4245 const agentIds: string[] = [] 4246 4247 for (const message of messages) { 4248 if ( 4249 message.type === 'progress' && 4250 message.data && 4251 typeof message.data === 'object' && 4252 'type' in message.data && 4253 (message.data.type === 'agent_progress' || 4254 message.data.type === 'skill_progress') && 4255 'agentId' in message.data && 4256 typeof message.data.agentId === 'string' 4257 ) { 4258 agentIds.push(message.data.agentId) 4259 } 4260 } 4261 4262 return uniq(agentIds) 4263} 4264 4265/** 4266 * Extract teammate transcripts directly from AppState tasks. 4267 * In-process teammates store their messages in task.messages, 4268 * which is more reliable than loading from disk since each teammate turn 4269 * uses a random agentId for transcript storage. 4270 */ 4271export function extractTeammateTranscriptsFromTasks(tasks: { 4272 [taskId: string]: { 4273 type: string 4274 identity?: { agentId: string } 4275 messages?: Message[] 4276 } 4277}): { [agentId: string]: Message[] } { 4278 const transcripts: { [agentId: string]: Message[] } = {} 4279 4280 for (const task of Object.values(tasks)) { 4281 if ( 4282 task.type === 'in_process_teammate' && 4283 task.identity?.agentId && 4284 task.messages && 4285 task.messages.length > 0 4286 ) { 4287 transcripts[task.identity.agentId] = task.messages 4288 } 4289 } 4290 4291 return transcripts 4292} 4293 4294/** 4295 * Load subagent transcripts for the given agent IDs 4296 */ 4297export async function loadSubagentTranscripts( 4298 agentIds: string[], 4299): Promise<{ [agentId: string]: Message[] }> { 4300 const results = await Promise.all( 4301 agentIds.map(async agentId => { 4302 try { 4303 const result = await getAgentTranscript(asAgentId(agentId)) 4304 if (result && result.messages.length > 0) { 4305 return { agentId, transcript: result.messages } 4306 } 4307 return null 4308 } catch { 4309 // Skip if transcript can't be loaded 4310 return null 4311 } 4312 }), 4313 ) 4314 4315 const transcripts: { [agentId: string]: Message[] } = {} 4316 for (const result of results) { 4317 if (result) { 4318 transcripts[result.agentId] = result.transcript 4319 } 4320 } 4321 return transcripts 4322} 4323 4324// Globs the session's subagents dir directly — unlike AppState.tasks, this survives task eviction. 4325export async function loadAllSubagentTranscriptsFromDisk(): Promise<{ 4326 [agentId: string]: Message[] 4327}> { 4328 const subagentsDir = join( 4329 getSessionProjectDir() ?? getProjectDir(getOriginalCwd()), 4330 getSessionId(), 4331 'subagents', 4332 ) 4333 let entries: Dirent[] 4334 try { 4335 entries = await readdir(subagentsDir, { withFileTypes: true }) 4336 } catch { 4337 return {} 4338 } 4339 // Filename format is the inverse of getAgentTranscriptPath() — keep in sync. 4340 const agentIds = entries 4341 .filter( 4342 d => 4343 d.isFile() && d.name.startsWith('agent-') && d.name.endsWith('.jsonl'), 4344 ) 4345 .map(d => d.name.slice('agent-'.length, -'.jsonl'.length)) 4346 return loadSubagentTranscripts(agentIds) 4347} 4348 4349// Exported so useLogMessages can sync-compute the last loggable uuid 4350// without awaiting recordTranscript's return value (race-free hint tracking). 4351export function isLoggableMessage(m: Message): boolean { 4352 if (m.type === 'progress') return false 4353 // IMPORTANT: We deliberately filter out most attachments for non-ants because 4354 // they have sensitive info for training that we don't want exposed to the public. 4355 // When enabled, we allow hook_additional_context through since it contains 4356 // user-configured hook output that is useful for session context on resume. 4357 if (m.type === 'attachment' && getUserType() !== 'ant') { 4358 if ( 4359 m.attachment.type === 'hook_additional_context' && 4360 isEnvTruthy(process.env.CLAUDE_CODE_SAVE_HOOK_ADDITIONAL_CONTEXT) 4361 ) { 4362 return true 4363 } 4364 return false 4365 } 4366 return true 4367} 4368 4369function collectReplIds(messages: readonly Message[]): Set<string> { 4370 const ids = new Set<string>() 4371 for (const m of messages) { 4372 if (m.type === 'assistant' && Array.isArray(m.message.content)) { 4373 for (const b of m.message.content) { 4374 if (b.type === 'tool_use' && b.name === REPL_TOOL_NAME) { 4375 ids.add(b.id) 4376 } 4377 } 4378 } 4379 } 4380 return ids 4381} 4382 4383/** 4384 * For external users, make REPL invisible in the persisted transcript: strip 4385 * REPL tool_use/tool_result pairs and promote isVirtual messages to real. On 4386 * --resume the model then sees a coherent native-tool-call history (assistant 4387 * called Bash, got result, called Read, got result) without the REPL wrapper. 4388 * Ant transcripts keep the wrapper so /share training data sees REPL usage. 4389 * 4390 * replIds is pre-collected from the FULL session array, not the slice being 4391 * transformed — recordTranscript receives incremental slices where the REPL 4392 * tool_use (earlier render) and its tool_result (later render, after async 4393 * execution) land in separate calls. A fresh per-call Set would miss the id 4394 * and leave an orphaned tool_result on disk. 4395 */ 4396function transformMessagesForExternalTranscript( 4397 messages: Transcript, 4398 replIds: Set<string>, 4399): Transcript { 4400 return messages.flatMap(m => { 4401 if (m.type === 'assistant' && Array.isArray(m.message.content)) { 4402 const content = m.message.content 4403 const hasRepl = content.some( 4404 b => b.type === 'tool_use' && b.name === REPL_TOOL_NAME, 4405 ) 4406 const filtered = hasRepl 4407 ? content.filter( 4408 b => !(b.type === 'tool_use' && b.name === REPL_TOOL_NAME), 4409 ) 4410 : content 4411 if (filtered.length === 0) return [] 4412 if (m.isVirtual) { 4413 const { isVirtual: _omit, ...rest } = m 4414 return [{ ...rest, message: { ...m.message, content: filtered } }] 4415 } 4416 if (filtered !== content) { 4417 return [{ ...m, message: { ...m.message, content: filtered } }] 4418 } 4419 return [m] 4420 } 4421 if (m.type === 'user' && Array.isArray(m.message.content)) { 4422 const content = m.message.content 4423 const hasRepl = content.some( 4424 b => b.type === 'tool_result' && replIds.has(b.tool_use_id), 4425 ) 4426 const filtered = hasRepl 4427 ? content.filter( 4428 b => !(b.type === 'tool_result' && replIds.has(b.tool_use_id)), 4429 ) 4430 : content 4431 if (filtered.length === 0) return [] 4432 if (m.isVirtual) { 4433 const { isVirtual: _omit, ...rest } = m 4434 return [{ ...rest, message: { ...m.message, content: filtered } }] 4435 } 4436 if (filtered !== content) { 4437 return [{ ...m, message: { ...m.message, content: filtered } }] 4438 } 4439 return [m] 4440 } 4441 // string-content user, system, attachment 4442 if ('isVirtual' in m && m.isVirtual) { 4443 const { isVirtual: _omit, ...rest } = m 4444 return [rest] 4445 } 4446 return [m] 4447 }) as Transcript 4448} 4449 4450export function cleanMessagesForLogging( 4451 messages: Message[], 4452 allMessages: readonly Message[] = messages, 4453): Transcript { 4454 const filtered = messages.filter(isLoggableMessage) as Transcript 4455 return getUserType() !== 'ant' 4456 ? transformMessagesForExternalTranscript( 4457 filtered, 4458 collectReplIds(allMessages), 4459 ) 4460 : filtered 4461} 4462 4463/** 4464 * Gets a log by its index 4465 * @param index Index in the sorted list of logs (0-based) 4466 * @returns Log data or null if not found 4467 */ 4468export async function getLogByIndex(index: number): Promise<LogOption | null> { 4469 const logs = await loadMessageLogs() 4470 return logs[index] || null 4471} 4472 4473/** 4474 * Looks up unresolved tool uses in the transcript by tool_use_id. 4475 * Returns the assistant message containing the tool_use, or null if not found 4476 * or the tool call already has a tool_result. 4477 */ 4478export async function findUnresolvedToolUse( 4479 toolUseId: string, 4480): Promise<AssistantMessage | null> { 4481 try { 4482 const transcriptPath = getTranscriptPath() 4483 const { messages } = await loadTranscriptFile(transcriptPath) 4484 4485 let toolUseMessage = null 4486 4487 // Find the tool use but make sure there's not also a result 4488 for (const message of messages.values()) { 4489 if (message.type === 'assistant') { 4490 const content = message.message.content 4491 if (Array.isArray(content)) { 4492 for (const block of content) { 4493 if (block.type === 'tool_use' && block.id === toolUseId) { 4494 toolUseMessage = message 4495 break 4496 } 4497 } 4498 } 4499 } else if (message.type === 'user') { 4500 const content = message.message.content 4501 if (Array.isArray(content)) { 4502 for (const block of content) { 4503 if ( 4504 block.type === 'tool_result' && 4505 block.tool_use_id === toolUseId 4506 ) { 4507 // Found tool result, bail out 4508 return null 4509 } 4510 } 4511 } 4512 } 4513 } 4514 4515 return toolUseMessage 4516 } catch { 4517 return null 4518 } 4519} 4520 4521/** 4522 * Gets all session JSONL files in a project directory with their stats. 4523 * Returns a map of sessionId → {path, mtime, ctime, size}. 4524 * Stats are batched via Promise.all to avoid serial syscalls in the hot loop. 4525 */ 4526export async function getSessionFilesWithMtime( 4527 projectDir: string, 4528): Promise< 4529 Map<string, { path: string; mtime: number; ctime: number; size: number }> 4530> { 4531 const sessionFilesMap = new Map< 4532 string, 4533 { path: string; mtime: number; ctime: number; size: number } 4534 >() 4535 4536 let dirents: Dirent[] 4537 try { 4538 dirents = await readdir(projectDir, { withFileTypes: true }) 4539 } catch { 4540 // Directory doesn't exist - return empty map 4541 return sessionFilesMap 4542 } 4543 4544 const candidates: Array<{ sessionId: string; filePath: string }> = [] 4545 for (const dirent of dirents) { 4546 if (!dirent.isFile() || !dirent.name.endsWith('.jsonl')) continue 4547 const sessionId = validateUuid(basename(dirent.name, '.jsonl')) 4548 if (!sessionId) continue 4549 candidates.push({ sessionId, filePath: join(projectDir, dirent.name) }) 4550 } 4551 4552 await Promise.all( 4553 candidates.map(async ({ sessionId, filePath }) => { 4554 try { 4555 const st = await stat(filePath) 4556 sessionFilesMap.set(sessionId, { 4557 path: filePath, 4558 mtime: st.mtime.getTime(), 4559 ctime: st.birthtime.getTime(), 4560 size: st.size, 4561 }) 4562 } catch { 4563 logForDebugging(`Failed to stat session file: ${filePath}`) 4564 } 4565 }), 4566 ) 4567 4568 return sessionFilesMap 4569} 4570 4571/** 4572 * Number of sessions to enrich on the initial load of the resume picker. 4573 * Each enrichment reads up to 128 KB per file (head + tail), so 50 sessions 4574 * means ~6.4 MB of I/O — fast on any modern filesystem while giving users 4575 * a much better initial view than the previous default of 10. 4576 */ 4577const INITIAL_ENRICH_COUNT = 50 4578 4579type LiteMetadata = { 4580 firstPrompt: string 4581 gitBranch?: string 4582 isSidechain: boolean 4583 projectPath?: string 4584 teamName?: string 4585 customTitle?: string 4586 summary?: string 4587 tag?: string 4588 agentSetting?: string 4589 prNumber?: number 4590 prUrl?: string 4591 prRepository?: string 4592} 4593 4594/** 4595 * Loads all logs from a single session file with full message data. 4596 * Builds a LogOption for each leaf message in the file. 4597 */ 4598export async function loadAllLogsFromSessionFile( 4599 sessionFile: string, 4600 projectPathOverride?: string, 4601): Promise<LogOption[]> { 4602 const { 4603 messages, 4604 summaries, 4605 customTitles, 4606 tags, 4607 agentNames, 4608 agentColors, 4609 agentSettings, 4610 prNumbers, 4611 prUrls, 4612 prRepositories, 4613 modes, 4614 fileHistorySnapshots, 4615 attributionSnapshots, 4616 contentReplacements, 4617 leafUuids, 4618 } = await loadTranscriptFile(sessionFile, { keepAllLeaves: true }) 4619 4620 if (messages.size === 0) return [] 4621 4622 const leafMessages: TranscriptMessage[] = [] 4623 // Build parentUuid → children index once (O(n)), so trailing-message lookup is O(1) per leaf 4624 const childrenByParent = new Map<UUID, TranscriptMessage[]>() 4625 for (const msg of messages.values()) { 4626 if (leafUuids.has(msg.uuid)) { 4627 leafMessages.push(msg) 4628 } else if (msg.parentUuid) { 4629 const siblings = childrenByParent.get(msg.parentUuid) 4630 if (siblings) { 4631 siblings.push(msg) 4632 } else { 4633 childrenByParent.set(msg.parentUuid, [msg]) 4634 } 4635 } 4636 } 4637 4638 const logs: LogOption[] = [] 4639 4640 for (const leafMessage of leafMessages) { 4641 const chain = buildConversationChain(messages, leafMessage) 4642 if (chain.length === 0) continue 4643 4644 // Append trailing messages that are children of the leaf 4645 const trailingMessages = childrenByParent.get(leafMessage.uuid) 4646 if (trailingMessages) { 4647 // ISO-8601 UTC timestamps are lexically sortable 4648 trailingMessages.sort((a, b) => 4649 a.timestamp < b.timestamp ? -1 : a.timestamp > b.timestamp ? 1 : 0, 4650 ) 4651 chain.push(...trailingMessages) 4652 } 4653 4654 const firstMessage = chain[0]! 4655 const sessionId = leafMessage.sessionId as UUID 4656 4657 logs.push({ 4658 date: leafMessage.timestamp, 4659 messages: removeExtraFields(chain), 4660 fullPath: sessionFile, 4661 value: 0, 4662 created: new Date(firstMessage.timestamp), 4663 modified: new Date(leafMessage.timestamp), 4664 firstPrompt: extractFirstPrompt(chain), 4665 messageCount: countVisibleMessages(chain), 4666 isSidechain: firstMessage.isSidechain ?? false, 4667 sessionId, 4668 leafUuid: leafMessage.uuid, 4669 summary: summaries.get(leafMessage.uuid), 4670 customTitle: customTitles.get(sessionId), 4671 tag: tags.get(sessionId), 4672 agentName: agentNames.get(sessionId), 4673 agentColor: agentColors.get(sessionId), 4674 agentSetting: agentSettings.get(sessionId), 4675 mode: modes.get(sessionId) as LogOption['mode'], 4676 prNumber: prNumbers.get(sessionId), 4677 prUrl: prUrls.get(sessionId), 4678 prRepository: prRepositories.get(sessionId), 4679 gitBranch: leafMessage.gitBranch, 4680 projectPath: projectPathOverride ?? firstMessage.cwd, 4681 fileHistorySnapshots: buildFileHistorySnapshotChain( 4682 fileHistorySnapshots, 4683 chain, 4684 ), 4685 attributionSnapshots: buildAttributionSnapshotChain( 4686 attributionSnapshots, 4687 chain, 4688 ), 4689 contentReplacements: contentReplacements.get(sessionId) ?? [], 4690 }) 4691 } 4692 4693 return logs 4694} 4695 4696/** 4697 * Gets logs by loading all session files fully, bypassing the session index. 4698 * Use this when you need full message data (e.g., for /insights analysis). 4699 4700 */ 4701async function getLogsWithoutIndex( 4702 projectDir: string, 4703 limit?: number, 4704): Promise<LogOption[]> { 4705 const sessionFilesMap = await getSessionFilesWithMtime(projectDir) 4706 if (sessionFilesMap.size === 0) return [] 4707 4708 // If limit specified, only load N most recent files by mtime 4709 let filesToProcess: Array<{ path: string; mtime: number }> 4710 if (limit && sessionFilesMap.size > limit) { 4711 filesToProcess = [...sessionFilesMap.values()] 4712 .sort((a, b) => b.mtime - a.mtime) 4713 .slice(0, limit) 4714 } else { 4715 filesToProcess = [...sessionFilesMap.values()] 4716 } 4717 4718 const logs: LogOption[] = [] 4719 for (const fileInfo of filesToProcess) { 4720 try { 4721 const fileLogOptions = await loadAllLogsFromSessionFile(fileInfo.path) 4722 logs.push(...fileLogOptions) 4723 } catch { 4724 logForDebugging(`Failed to load session file: ${fileInfo.path}`) 4725 } 4726 } 4727 4728 return logs 4729} 4730 4731/** 4732 * Reads the first and last ~64KB of a JSONL file and extracts lite metadata. 4733 * 4734 * Head (first 64KB): isSidechain, projectPath, teamName, firstPrompt. 4735 * Tail (last 64KB): customTitle, tag, PR link, latest gitBranch. 4736 * 4737 * Accepts a shared buffer to avoid per-file allocation overhead. 4738 */ 4739async function readLiteMetadata( 4740 filePath: string, 4741 fileSize: number, 4742 buf: Buffer, 4743): Promise<LiteMetadata> { 4744 const { head, tail } = await readHeadAndTail(filePath, fileSize, buf) 4745 if (!head) return { firstPrompt: '', isSidechain: false } 4746 4747 // Extract stable metadata from the first line via string search. 4748 // Works even when the first line is truncated (>64KB message). 4749 const isSidechain = 4750 head.includes('"isSidechain":true') || head.includes('"isSidechain": true') 4751 const projectPath = extractJsonStringField(head, 'cwd') 4752 const teamName = extractJsonStringField(head, 'teamName') 4753 const agentSetting = extractJsonStringField(head, 'agentSetting') 4754 4755 // Prefer the last-prompt tail entry — captured by extractFirstPrompt at 4756 // write time (filtered, authoritative) and shows what the user was most 4757 // recently doing. Head scan is the fallback for sessions written before 4758 // last-prompt entries existed. Raw string scrapes of head are last resort 4759 // and catch array-format content blocks (VS Code <ide_selection> metadata). 4760 const firstPrompt = 4761 extractLastJsonStringField(tail, 'lastPrompt') || 4762 extractFirstPromptFromChunk(head) || 4763 extractJsonStringFieldPrefix(head, 'content', 200) || 4764 extractJsonStringFieldPrefix(head, 'text', 200) || 4765 '' 4766 4767 // Extract tail metadata via string search (last occurrence wins). 4768 // User titles (customTitle field, from custom-title entries) win over 4769 // AI titles (aiTitle field, from ai-title entries). The distinct field 4770 // names mean extractLastJsonStringField naturally disambiguates. 4771 const customTitle = 4772 extractLastJsonStringField(tail, 'customTitle') ?? 4773 extractLastJsonStringField(head, 'customTitle') ?? 4774 extractLastJsonStringField(tail, 'aiTitle') ?? 4775 extractLastJsonStringField(head, 'aiTitle') 4776 const summary = extractLastJsonStringField(tail, 'summary') 4777 const tag = extractLastJsonStringField(tail, 'tag') 4778 const gitBranch = 4779 extractLastJsonStringField(tail, 'gitBranch') ?? 4780 extractJsonStringField(head, 'gitBranch') 4781 4782 // PR link fields — prNumber is a number not a string, so try both 4783 const prUrl = extractLastJsonStringField(tail, 'prUrl') 4784 const prRepository = extractLastJsonStringField(tail, 'prRepository') 4785 let prNumber: number | undefined 4786 const prNumStr = extractLastJsonStringField(tail, 'prNumber') 4787 if (prNumStr) { 4788 prNumber = parseInt(prNumStr, 10) || undefined 4789 } 4790 if (!prNumber) { 4791 const prNumMatch = tail.lastIndexOf('"prNumber":') 4792 if (prNumMatch >= 0) { 4793 const afterColon = tail.slice(prNumMatch + 11, prNumMatch + 25) 4794 const num = parseInt(afterColon.trim(), 10) 4795 if (num > 0) prNumber = num 4796 } 4797 } 4798 4799 return { 4800 firstPrompt, 4801 gitBranch, 4802 isSidechain, 4803 projectPath, 4804 teamName, 4805 customTitle, 4806 summary, 4807 tag, 4808 agentSetting, 4809 prNumber, 4810 prUrl, 4811 prRepository, 4812 } 4813} 4814 4815/** 4816 * Scans a chunk of text for the first meaningful user prompt. 4817 */ 4818function extractFirstPromptFromChunk(chunk: string): string { 4819 let start = 0 4820 let hasTickMessages = false 4821 let firstCommandFallback = '' 4822 while (start < chunk.length) { 4823 const newlineIdx = chunk.indexOf('\n', start) 4824 const line = 4825 newlineIdx >= 0 ? chunk.slice(start, newlineIdx) : chunk.slice(start) 4826 start = newlineIdx >= 0 ? newlineIdx + 1 : chunk.length 4827 4828 if (!line.includes('"type":"user"') && !line.includes('"type": "user"')) { 4829 continue 4830 } 4831 if (line.includes('"tool_result"')) continue 4832 if (line.includes('"isMeta":true') || line.includes('"isMeta": true')) 4833 continue 4834 4835 try { 4836 const entry = jsonParse(line) as Record<string, unknown> 4837 if (entry.type !== 'user') continue 4838 4839 const message = entry.message as Record<string, unknown> | undefined 4840 if (!message) continue 4841 4842 const content = message.content 4843 // Collect all text values from the message content. For array content 4844 // (common in VS Code where IDE metadata tags come before the user's 4845 // actual prompt), iterate all text blocks so we don't miss the real 4846 // prompt hidden behind <ide_selection>/<ide_opened_file> blocks. 4847 const texts: string[] = [] 4848 if (typeof content === 'string') { 4849 texts.push(content) 4850 } else if (Array.isArray(content)) { 4851 for (const block of content) { 4852 const b = block as Record<string, unknown> 4853 if (b.type === 'text' && typeof b.text === 'string') { 4854 texts.push(b.text as string) 4855 } 4856 } 4857 } 4858 4859 for (const text of texts) { 4860 if (!text) continue 4861 4862 let result = text.replace(/\n/g, ' ').trim() 4863 4864 // Skip command messages (slash commands) but remember the first one 4865 // as a fallback title. Matches skip logic in 4866 // getFirstMeaningfulUserMessageTextContent, but instead of discarding 4867 // command messages entirely, we format them cleanly (e.g. "/clear") 4868 // so the session still appears in the resume picker. 4869 const commandNameTag = extractTag(result, COMMAND_NAME_TAG) 4870 if (commandNameTag) { 4871 const name = commandNameTag.replace(/^\//, '') 4872 const commandArgs = extractTag(result, 'command-args')?.trim() || '' 4873 if (builtInCommandNames().has(name) || !commandArgs) { 4874 if (!firstCommandFallback) { 4875 firstCommandFallback = commandNameTag 4876 } 4877 continue 4878 } 4879 // Custom command with meaningful args — use clean display 4880 return commandArgs 4881 ? `${commandNameTag} ${commandArgs}` 4882 : commandNameTag 4883 } 4884 4885 // Format bash input with ! prefix before the generic XML skip 4886 const bashInput = extractTag(result, 'bash-input') 4887 if (bashInput) return `! ${bashInput}` 4888 4889 if (SKIP_FIRST_PROMPT_PATTERN.test(result)) { 4890 if ( 4891 (feature('PROACTIVE') || feature('KAIROS')) && 4892 result.startsWith(`<${TICK_TAG}>`) 4893 ) 4894 hasTickMessages = true 4895 continue 4896 } 4897 if (result.length > 200) { 4898 result = result.slice(0, 200).trim() + '…' 4899 } 4900 return result 4901 } 4902 } catch { 4903 continue 4904 } 4905 } 4906 // Session started with a slash command but had no subsequent real message — 4907 // use the clean command name so the session still appears in the resume picker 4908 if (firstCommandFallback) return firstCommandFallback 4909 // Proactive sessions have only tick messages — give them a synthetic prompt 4910 // so they're not filtered out by enrichLogs 4911 if ((feature('PROACTIVE') || feature('KAIROS')) && hasTickMessages) 4912 return 'Proactive session' 4913 return '' 4914} 4915 4916/** 4917 * Like extractJsonStringField but returns the first `maxLen` characters of the 4918 * value even when the closing quote is missing (truncated buffer). Newline 4919 * escapes are replaced with spaces and the result is trimmed. 4920 */ 4921function extractJsonStringFieldPrefix( 4922 text: string, 4923 key: string, 4924 maxLen: number, 4925): string { 4926 const patterns = [`"${key}":"`, `"${key}": "`] 4927 for (const pattern of patterns) { 4928 const idx = text.indexOf(pattern) 4929 if (idx < 0) continue 4930 4931 const valueStart = idx + pattern.length 4932 // Grab up to maxLen characters from the value, stopping at closing quote 4933 let i = valueStart 4934 let collected = 0 4935 while (i < text.length && collected < maxLen) { 4936 if (text[i] === '\\') { 4937 i += 2 // skip escaped char 4938 collected++ 4939 continue 4940 } 4941 if (text[i] === '"') break 4942 i++ 4943 collected++ 4944 } 4945 const raw = text.slice(valueStart, i) 4946 return raw.replace(/\\n/g, ' ').replace(/\\t/g, ' ').trim() 4947 } 4948 return '' 4949} 4950 4951/** 4952 * Deduplicates logs by sessionId, keeping the entry with the newest 4953 * modified time. Returns sorted logs with sequential value indices. 4954 */ 4955function deduplicateLogsBySessionId(logs: LogOption[]): LogOption[] { 4956 const deduped = new Map<string, LogOption>() 4957 for (const log of logs) { 4958 if (!log.sessionId) continue 4959 const existing = deduped.get(log.sessionId) 4960 if (!existing || log.modified.getTime() > existing.modified.getTime()) { 4961 deduped.set(log.sessionId, log) 4962 } 4963 } 4964 return sortLogs([...deduped.values()]).map((log, i) => ({ 4965 ...log, 4966 value: i, 4967 })) 4968} 4969 4970/** 4971 * Returns lite LogOption[] from pure filesystem metadata (stat only). 4972 * No file reads — instant. Call `enrichLogs` to enrich 4973 * visible sessions with firstPrompt, gitBranch, customTitle, etc. 4974 */ 4975export async function getSessionFilesLite( 4976 projectDir: string, 4977 limit?: number, 4978 projectPath?: string, 4979): Promise<LogOption[]> { 4980 const sessionFilesMap = await getSessionFilesWithMtime(projectDir) 4981 4982 // Sort by mtime descending and apply limit 4983 let entries = [...sessionFilesMap.entries()].sort( 4984 (a, b) => b[1].mtime - a[1].mtime, 4985 ) 4986 if (limit && entries.length > limit) { 4987 entries = entries.slice(0, limit) 4988 } 4989 4990 const logs: LogOption[] = [] 4991 4992 for (const [sessionId, fileInfo] of entries) { 4993 logs.push({ 4994 date: new Date(fileInfo.mtime).toISOString(), 4995 messages: [], 4996 isLite: true, 4997 fullPath: fileInfo.path, 4998 value: 0, 4999 created: new Date(fileInfo.ctime), 5000 modified: new Date(fileInfo.mtime), 5001 firstPrompt: '', 5002 messageCount: 0, 5003 fileSize: fileInfo.size, 5004 isSidechain: false, 5005 sessionId, 5006 projectPath, 5007 }) 5008 } 5009 5010 // logs are freshly pushed above — safe to mutate in place 5011 const sorted = sortLogs(logs) 5012 sorted.forEach((log, i) => { 5013 log.value = i 5014 }) 5015 return sorted 5016} 5017 5018/** 5019 * Enriches a lite log with metadata from its JSONL file. 5020 * Returns the enriched log, or null if the log has no meaningful content 5021 * (no firstPrompt, no customTitle — e.g., metadata-only session files). 5022 */ 5023async function enrichLog( 5024 log: LogOption, 5025 readBuf: Buffer, 5026): Promise<LogOption | null> { 5027 if (!log.isLite || !log.fullPath) return log 5028 5029 const meta = await readLiteMetadata(log.fullPath, log.fileSize ?? 0, readBuf) 5030 5031 const enriched: LogOption = { 5032 ...log, 5033 isLite: false, 5034 firstPrompt: meta.firstPrompt, 5035 gitBranch: meta.gitBranch, 5036 isSidechain: meta.isSidechain, 5037 teamName: meta.teamName, 5038 customTitle: meta.customTitle, 5039 summary: meta.summary, 5040 tag: meta.tag, 5041 agentSetting: meta.agentSetting, 5042 prNumber: meta.prNumber, 5043 prUrl: meta.prUrl, 5044 prRepository: meta.prRepository, 5045 projectPath: meta.projectPath ?? log.projectPath, 5046 } 5047 5048 // Provide a fallback title for sessions where we couldn't extract the first 5049 // prompt (e.g., large first messages that exceed the 16KB read buffer). 5050 // Previously these sessions were silently dropped, making them inaccessible 5051 // via /resume after crashes or large-context sessions. 5052 if (!enriched.firstPrompt && !enriched.customTitle) { 5053 enriched.firstPrompt = '(session)' 5054 } 5055 // Filter: skip sidechains and agent sessions 5056 if (enriched.isSidechain) { 5057 logForDebugging( 5058 `Session ${log.sessionId} filtered from /resume: isSidechain=true`, 5059 ) 5060 return null 5061 } 5062 if (enriched.teamName) { 5063 logForDebugging( 5064 `Session ${log.sessionId} filtered from /resume: teamName=${enriched.teamName}`, 5065 ) 5066 return null 5067 } 5068 5069 return enriched 5070} 5071 5072/** 5073 * Enriches enough lite logs from `allLogs` (starting at `startIndex`) to 5074 * produce `count` valid results. Returns the valid enriched logs and the 5075 * index where scanning stopped (for progressive loading to continue from). 5076 */ 5077export async function enrichLogs( 5078 allLogs: LogOption[], 5079 startIndex: number, 5080 count: number, 5081): Promise<{ logs: LogOption[]; nextIndex: number }> { 5082 const result: LogOption[] = [] 5083 const readBuf = Buffer.alloc(LITE_READ_BUF_SIZE) 5084 let i = startIndex 5085 5086 while (i < allLogs.length && result.length < count) { 5087 const log = allLogs[i]! 5088 i++ 5089 5090 const enriched = await enrichLog(log, readBuf) 5091 if (enriched) { 5092 result.push(enriched) 5093 } 5094 } 5095 5096 const scanned = i - startIndex 5097 const filtered = scanned - result.length 5098 if (filtered > 0) { 5099 logForDebugging( 5100 `/resume: enriched ${scanned} sessions, ${filtered} filtered out, ${result.length} visible (${allLogs.length - i} remaining on disk)`, 5101 ) 5102 } 5103 5104 return { logs: result, nextIndex: i } 5105}