source dump of claude code
at main 793 lines 25 kB view raw
1/** 2 * Portable session storage utilities. 3 * 4 * Pure Node.js — no internal dependencies on logging, experiments, or feature 5 * flags. Shared between the CLI (src/utils/sessionStorage.ts) and the VS Code 6 * extension (packages/claude-vscode/src/common-host/sessionStorage.ts). 7 */ 8 9import type { UUID } from 'crypto' 10import { open as fsOpen, readdir, realpath, stat } from 'fs/promises' 11import { join } from 'path' 12import { getClaudeConfigHomeDir } from './envUtils.js' 13import { getWorktreePathsPortable } from './getWorktreePathsPortable.js' 14import { djb2Hash } from './hash.js' 15 16/** Size of the head/tail buffer for lite metadata reads. */ 17export const LITE_READ_BUF_SIZE = 65536 18 19// --------------------------------------------------------------------------- 20// UUID validation 21// --------------------------------------------------------------------------- 22 23const uuidRegex = 24 /^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i 25 26export function validateUuid(maybeUuid: unknown): UUID | null { 27 if (typeof maybeUuid !== 'string') return null 28 return uuidRegex.test(maybeUuid) ? (maybeUuid as UUID) : null 29} 30 31// --------------------------------------------------------------------------- 32// JSON string field extraction — no full parse, works on truncated lines 33// --------------------------------------------------------------------------- 34 35/** 36 * Unescape a JSON string value extracted as raw text. 37 * Only allocates a new string when escape sequences are present. 38 */ 39export function unescapeJsonString(raw: string): string { 40 if (!raw.includes('\\')) return raw 41 try { 42 return JSON.parse(`"${raw}"`) 43 } catch { 44 return raw 45 } 46} 47 48/** 49 * Extracts a simple JSON string field value from raw text without full parsing. 50 * Looks for `"key":"value"` or `"key": "value"` patterns. 51 * Returns the first match, or undefined if not found. 52 */ 53export function extractJsonStringField( 54 text: string, 55 key: string, 56): string | undefined { 57 const patterns = [`"${key}":"`, `"${key}": "`] 58 for (const pattern of patterns) { 59 const idx = text.indexOf(pattern) 60 if (idx < 0) continue 61 62 const valueStart = idx + pattern.length 63 let i = valueStart 64 while (i < text.length) { 65 if (text[i] === '\\') { 66 i += 2 67 continue 68 } 69 if (text[i] === '"') { 70 return unescapeJsonString(text.slice(valueStart, i)) 71 } 72 i++ 73 } 74 } 75 return undefined 76} 77 78/** 79 * Like extractJsonStringField but finds the LAST occurrence. 80 * Useful for fields that are appended (customTitle, tag, etc.). 81 */ 82export function extractLastJsonStringField( 83 text: string, 84 key: string, 85): string | undefined { 86 const patterns = [`"${key}":"`, `"${key}": "`] 87 let lastValue: string | undefined 88 for (const pattern of patterns) { 89 let searchFrom = 0 90 while (true) { 91 const idx = text.indexOf(pattern, searchFrom) 92 if (idx < 0) break 93 94 const valueStart = idx + pattern.length 95 let i = valueStart 96 while (i < text.length) { 97 if (text[i] === '\\') { 98 i += 2 99 continue 100 } 101 if (text[i] === '"') { 102 lastValue = unescapeJsonString(text.slice(valueStart, i)) 103 break 104 } 105 i++ 106 } 107 searchFrom = i + 1 108 } 109 } 110 return lastValue 111} 112 113// --------------------------------------------------------------------------- 114// First prompt extraction from head chunk 115// --------------------------------------------------------------------------- 116 117/** 118 * Pattern matching auto-generated or system messages that should be skipped 119 * when looking for the first meaningful user prompt. Matches anything that 120 * starts with a lowercase XML-like tag (IDE context, hook output, task 121 * notifications, channel messages, etc.) or a synthetic interrupt marker. 122 */ 123const SKIP_FIRST_PROMPT_PATTERN = 124 /^(?:\s*<[a-z][\w-]*[\s>]|\[Request interrupted by user[^\]]*\])/ 125 126const COMMAND_NAME_RE = /<command-name>(.*?)<\/command-name>/ 127 128/** 129 * Extracts the first meaningful user prompt from a JSONL head chunk. 130 * 131 * Skips tool_result messages, isMeta, isCompactSummary, command-name messages, 132 * and auto-generated patterns (session hooks, tick, IDE metadata, etc.). 133 * Truncates to 200 chars. 134 */ 135export function extractFirstPromptFromHead(head: string): string { 136 let start = 0 137 let commandFallback = '' 138 while (start < head.length) { 139 const newlineIdx = head.indexOf('\n', start) 140 const line = 141 newlineIdx >= 0 ? head.slice(start, newlineIdx) : head.slice(start) 142 start = newlineIdx >= 0 ? newlineIdx + 1 : head.length 143 144 if (!line.includes('"type":"user"') && !line.includes('"type": "user"')) 145 continue 146 if (line.includes('"tool_result"')) continue 147 if (line.includes('"isMeta":true') || line.includes('"isMeta": true')) 148 continue 149 if ( 150 line.includes('"isCompactSummary":true') || 151 line.includes('"isCompactSummary": true') 152 ) 153 continue 154 155 try { 156 const entry = JSON.parse(line) as Record<string, unknown> 157 if (entry.type !== 'user') continue 158 159 const message = entry.message as Record<string, unknown> | undefined 160 if (!message) continue 161 162 const content = message.content 163 const texts: string[] = [] 164 if (typeof content === 'string') { 165 texts.push(content) 166 } else if (Array.isArray(content)) { 167 for (const block of content as Record<string, unknown>[]) { 168 if (block.type === 'text' && typeof block.text === 'string') { 169 texts.push(block.text as string) 170 } 171 } 172 } 173 174 for (const raw of texts) { 175 let result = raw.replace(/\n/g, ' ').trim() 176 if (!result) continue 177 178 // Skip slash-command messages but remember first as fallback 179 const cmdMatch = COMMAND_NAME_RE.exec(result) 180 if (cmdMatch) { 181 if (!commandFallback) commandFallback = cmdMatch[1]! 182 continue 183 } 184 185 // Format bash input with ! prefix before the generic XML skip 186 const bashMatch = /<bash-input>([\s\S]*?)<\/bash-input>/.exec(result) 187 if (bashMatch) return `! ${bashMatch[1]!.trim()}` 188 189 if (SKIP_FIRST_PROMPT_PATTERN.test(result)) continue 190 191 if (result.length > 200) { 192 result = result.slice(0, 200).trim() + '\u2026' 193 } 194 return result 195 } 196 } catch { 197 continue 198 } 199 } 200 if (commandFallback) return commandFallback 201 return '' 202} 203 204// --------------------------------------------------------------------------- 205// File I/O — read head and tail of a file 206// --------------------------------------------------------------------------- 207 208/** 209 * Reads the first and last LITE_READ_BUF_SIZE bytes of a file. 210 * 211 * For small files where head covers tail, `tail === head`. 212 * Accepts a shared Buffer to avoid per-file allocation overhead. 213 * Returns `{ head: '', tail: '' }` on any error. 214 */ 215export async function readHeadAndTail( 216 filePath: string, 217 fileSize: number, 218 buf: Buffer, 219): Promise<{ head: string; tail: string }> { 220 try { 221 const fh = await fsOpen(filePath, 'r') 222 try { 223 const headResult = await fh.read(buf, 0, LITE_READ_BUF_SIZE, 0) 224 if (headResult.bytesRead === 0) return { head: '', tail: '' } 225 226 const head = buf.toString('utf8', 0, headResult.bytesRead) 227 228 const tailOffset = Math.max(0, fileSize - LITE_READ_BUF_SIZE) 229 let tail = head 230 if (tailOffset > 0) { 231 const tailResult = await fh.read(buf, 0, LITE_READ_BUF_SIZE, tailOffset) 232 tail = buf.toString('utf8', 0, tailResult.bytesRead) 233 } 234 235 return { head, tail } 236 } finally { 237 await fh.close() 238 } 239 } catch { 240 return { head: '', tail: '' } 241 } 242} 243 244export type LiteSessionFile = { 245 mtime: number 246 size: number 247 head: string 248 tail: string 249} 250 251/** 252 * Opens a single session file, stats it, and reads head + tail in one fd. 253 * Allocates its own buffer — safe for concurrent use with Promise.all. 254 * Returns null on any error. 255 */ 256export async function readSessionLite( 257 filePath: string, 258): Promise<LiteSessionFile | null> { 259 try { 260 const fh = await fsOpen(filePath, 'r') 261 try { 262 const stat = await fh.stat() 263 const buf = Buffer.allocUnsafe(LITE_READ_BUF_SIZE) 264 const headResult = await fh.read(buf, 0, LITE_READ_BUF_SIZE, 0) 265 if (headResult.bytesRead === 0) return null 266 267 const head = buf.toString('utf8', 0, headResult.bytesRead) 268 const tailOffset = Math.max(0, stat.size - LITE_READ_BUF_SIZE) 269 let tail = head 270 if (tailOffset > 0) { 271 const tailResult = await fh.read(buf, 0, LITE_READ_BUF_SIZE, tailOffset) 272 tail = buf.toString('utf8', 0, tailResult.bytesRead) 273 } 274 275 return { mtime: stat.mtime.getTime(), size: stat.size, head, tail } 276 } finally { 277 await fh.close() 278 } 279 } catch { 280 return null 281 } 282} 283 284// --------------------------------------------------------------------------- 285// Path sanitization 286// --------------------------------------------------------------------------- 287 288/** 289 * Maximum length for a single filesystem path component (directory or file name). 290 * Most filesystems (ext4, APFS, NTFS) limit individual components to 255 bytes. 291 * We use 200 to leave room for the hash suffix and separator. 292 */ 293export const MAX_SANITIZED_LENGTH = 200 294 295function simpleHash(str: string): string { 296 return Math.abs(djb2Hash(str)).toString(36) 297} 298 299/** 300 * Makes a string safe for use as a directory or file name. 301 * Replaces all non-alphanumeric characters with hyphens. 302 * This ensures compatibility across all platforms, including Windows 303 * where characters like colons are reserved. 304 * 305 * For deeply nested paths that would exceed filesystem limits (255 bytes), 306 * truncates and appends a hash suffix for uniqueness. 307 * 308 * @param name - The string to make safe (e.g., '/Users/foo/my-project' or 'plugin:name:server') 309 * @returns A safe name (e.g., '-Users-foo-my-project' or 'plugin-name-server') 310 */ 311export function sanitizePath(name: string): string { 312 const sanitized = name.replace(/[^a-zA-Z0-9]/g, '-') 313 if (sanitized.length <= MAX_SANITIZED_LENGTH) { 314 return sanitized 315 } 316 const hash = 317 typeof Bun !== 'undefined' ? Bun.hash(name).toString(36) : simpleHash(name) 318 return `${sanitized.slice(0, MAX_SANITIZED_LENGTH)}-${hash}` 319} 320 321// --------------------------------------------------------------------------- 322// Project directory discovery (shared by listSessions & getSessionMessages) 323// --------------------------------------------------------------------------- 324 325export function getProjectsDir(): string { 326 return join(getClaudeConfigHomeDir(), 'projects') 327} 328 329export function getProjectDir(projectDir: string): string { 330 return join(getProjectsDir(), sanitizePath(projectDir)) 331} 332 333/** 334 * Resolves a directory path to its canonical form using realpath + NFC 335 * normalization. Falls back to NFC-only if realpath fails (e.g., the 336 * directory doesn't exist yet). Ensures symlinked paths (e.g., 337 * /tmp → /private/tmp on macOS) resolve to the same project directory. 338 */ 339export async function canonicalizePath(dir: string): Promise<string> { 340 try { 341 return (await realpath(dir)).normalize('NFC') 342 } catch { 343 return dir.normalize('NFC') 344 } 345} 346 347/** 348 * Finds the project directory for a given path, tolerating hash mismatches 349 * for long paths (>200 chars). The CLI uses Bun.hash while the SDK under 350 * Node.js uses simpleHash — for paths that exceed MAX_SANITIZED_LENGTH, 351 * these produce different directory suffixes. This function falls back to 352 * prefix-based scanning when the exact match doesn't exist. 353 */ 354export async function findProjectDir( 355 projectPath: string, 356): Promise<string | undefined> { 357 const exact = getProjectDir(projectPath) 358 try { 359 await readdir(exact) 360 return exact 361 } catch { 362 // Exact match failed — for short paths this means no sessions exist. 363 // For long paths, try prefix matching to handle hash mismatches. 364 const sanitized = sanitizePath(projectPath) 365 if (sanitized.length <= MAX_SANITIZED_LENGTH) { 366 return undefined 367 } 368 const prefix = sanitized.slice(0, MAX_SANITIZED_LENGTH) 369 const projectsDir = getProjectsDir() 370 try { 371 const dirents = await readdir(projectsDir, { withFileTypes: true }) 372 const match = dirents.find( 373 d => d.isDirectory() && d.name.startsWith(prefix + '-'), 374 ) 375 return match ? join(projectsDir, match.name) : undefined 376 } catch { 377 return undefined 378 } 379 } 380} 381 382/** 383 * Resolve a sessionId to its on-disk JSONL file path. 384 * 385 * When `dir` is provided: canonicalize it, look in that project's directory 386 * (with findProjectDir fallback for Bun/Node hash mismatches), then fall back 387 * to sibling git worktrees. `projectPath` in the result is the canonical 388 * user-facing directory the file was found under. 389 * 390 * When `dir` is omitted: scan all project directories under ~/.claude/projects/. 391 * `projectPath` is undefined in this case (no meaningful project path to report). 392 * 393 * Existence is checked by stat (operate-then-catch-ENOENT, no existsSync). 394 * Zero-byte files are treated as not-found so callers continue searching past 395 * a truncated copy to find a valid one in a sibling directory. 396 * 397 * `fileSize` is returned so callers (loadSessionBuffer) don't need to re-stat. 398 * 399 * Shared by getSessionInfoImpl and getSessionMessagesImpl — the caller 400 * invokes its own reader (readSessionLite / loadSessionBuffer) on the 401 * resolved path. 402 */ 403export async function resolveSessionFilePath( 404 sessionId: string, 405 dir?: string, 406): Promise< 407 | { filePath: string; projectPath: string | undefined; fileSize: number } 408 | undefined 409> { 410 const fileName = `${sessionId}.jsonl` 411 412 if (dir) { 413 const canonical = await canonicalizePath(dir) 414 const projectDir = await findProjectDir(canonical) 415 if (projectDir) { 416 const filePath = join(projectDir, fileName) 417 try { 418 const s = await stat(filePath) 419 if (s.size > 0) 420 return { filePath, projectPath: canonical, fileSize: s.size } 421 } catch { 422 // ENOENT/EACCES — keep searching 423 } 424 } 425 // Worktree fallback — sessions may live under a different worktree root 426 let worktreePaths: string[] 427 try { 428 worktreePaths = await getWorktreePathsPortable(canonical) 429 } catch { 430 worktreePaths = [] 431 } 432 for (const wt of worktreePaths) { 433 if (wt === canonical) continue 434 const wtProjectDir = await findProjectDir(wt) 435 if (!wtProjectDir) continue 436 const filePath = join(wtProjectDir, fileName) 437 try { 438 const s = await stat(filePath) 439 if (s.size > 0) return { filePath, projectPath: wt, fileSize: s.size } 440 } catch { 441 // ENOENT/EACCES — keep searching 442 } 443 } 444 return undefined 445 } 446 447 // No dir — scan all project directories 448 const projectsDir = getProjectsDir() 449 let dirents: string[] 450 try { 451 dirents = await readdir(projectsDir) 452 } catch { 453 return undefined 454 } 455 for (const name of dirents) { 456 const filePath = join(projectsDir, name, fileName) 457 try { 458 const s = await stat(filePath) 459 if (s.size > 0) 460 return { filePath, projectPath: undefined, fileSize: s.size } 461 } catch { 462 // ENOENT/ENOTDIR — not in this project, keep scanning 463 } 464 } 465 return undefined 466} 467 468// --------------------------------------------------------------------------- 469// Compact-boundary chunked read (shared by loadTranscriptFile & SDK getSessionMessages) 470// --------------------------------------------------------------------------- 471 472/** Chunk size for the forward transcript reader. 1 MB balances I/O calls vs buffer growth. */ 473const TRANSCRIPT_READ_CHUNK_SIZE = 1024 * 1024 474 475/** 476 * File size below which precompact filtering is skipped. 477 * Large sessions (>5 MB) almost always have compact boundaries — they got big 478 * because of many turns triggering auto-compact. 479 */ 480export const SKIP_PRECOMPACT_THRESHOLD = 5 * 1024 * 1024 481 482/** Marker bytes searched for when locating the boundary. Lazy: allocated on 483 * first use, not at module load. Most sessions never resume. */ 484let _compactBoundaryMarker: Buffer | undefined 485function compactBoundaryMarker(): Buffer { 486 return (_compactBoundaryMarker ??= Buffer.from('"compact_boundary"')) 487} 488 489/** 490 * Confirm a byte-matched line is a real compact_boundary (marker can appear 491 * inside user content) and check for preservedSegment. 492 */ 493function parseBoundaryLine( 494 line: string, 495): { hasPreservedSegment: boolean } | null { 496 try { 497 const parsed = JSON.parse(line) as { 498 type?: string 499 subtype?: string 500 compactMetadata?: { preservedSegment?: unknown } 501 } 502 if (parsed.type !== 'system' || parsed.subtype !== 'compact_boundary') { 503 return null 504 } 505 return { 506 hasPreservedSegment: Boolean(parsed.compactMetadata?.preservedSegment), 507 } 508 } catch { 509 return null 510 } 511} 512 513/** 514 * Single forward chunked read for the --resume load path. Attr-snap lines 515 * are skipped at the fd level; compact boundaries truncate in-stream. Peak 516 * is the output size, not the file size. 517 * 518 * The surviving (last) attr-snap is appended at EOF instead of in-place; 519 * restoreAttributionStateFromSnapshots only reads [length-1] so position 520 * doesn't matter. 521 */ 522 523type Sink = { buf: Buffer; len: number; cap: number } 524 525function sinkWrite(s: Sink, src: Buffer, start: number, end: number): void { 526 const n = end - start 527 if (n <= 0) return 528 if (s.len + n > s.buf.length) { 529 const grown = Buffer.allocUnsafe( 530 Math.min(Math.max(s.buf.length * 2, s.len + n), s.cap), 531 ) 532 s.buf.copy(grown, 0, 0, s.len) 533 s.buf = grown 534 } 535 src.copy(s.buf, s.len, start, end) 536 s.len += n 537} 538 539function hasPrefix( 540 src: Buffer, 541 prefix: Buffer, 542 at: number, 543 end: number, 544): boolean { 545 return ( 546 end - at >= prefix.length && 547 src.compare(prefix, 0, prefix.length, at, at + prefix.length) === 0 548 ) 549} 550 551const ATTR_SNAP_PREFIX = Buffer.from('{"type":"attribution-snapshot"') 552const SYSTEM_PREFIX = Buffer.from('{"type":"system"') 553const LF = 0x0a 554const LF_BYTE = Buffer.from([LF]) 555const BOUNDARY_SEARCH_BOUND = 256 // marker sits ~28 bytes in; 256 is slack 556 557type LoadState = { 558 out: Sink 559 boundaryStartOffset: number 560 hasPreservedSegment: boolean 561 lastSnapSrc: Buffer | null // most-recent attr-snap, appended at EOF 562 lastSnapLen: number 563 lastSnapBuf: Buffer | undefined 564 bufFileOff: number // file offset of buf[0] 565 carryLen: number 566 carryBuf: Buffer | undefined 567 straddleSnapCarryLen: number // per-chunk; reset by processStraddle 568 straddleSnapTailEnd: number 569} 570 571// Line spanning the chunk seam. 0 = fall through to concat. 572function processStraddle( 573 s: LoadState, 574 chunk: Buffer, 575 bytesRead: number, 576): number { 577 s.straddleSnapCarryLen = 0 578 s.straddleSnapTailEnd = 0 579 if (s.carryLen === 0) return 0 580 const cb = s.carryBuf! 581 const firstNl = chunk.indexOf(LF) 582 if (firstNl === -1 || firstNl >= bytesRead) return 0 583 const tailEnd = firstNl + 1 584 if (hasPrefix(cb, ATTR_SNAP_PREFIX, 0, s.carryLen)) { 585 s.straddleSnapCarryLen = s.carryLen 586 s.straddleSnapTailEnd = tailEnd 587 s.lastSnapSrc = null 588 } else if (s.carryLen < ATTR_SNAP_PREFIX.length) { 589 return 0 // too short to rule out attr-snap 590 } else { 591 if (hasPrefix(cb, SYSTEM_PREFIX, 0, s.carryLen)) { 592 const hit = parseBoundaryLine( 593 cb.toString('utf-8', 0, s.carryLen) + 594 chunk.toString('utf-8', 0, firstNl), 595 ) 596 if (hit?.hasPreservedSegment) { 597 s.hasPreservedSegment = true 598 } else if (hit) { 599 s.out.len = 0 600 s.boundaryStartOffset = s.bufFileOff 601 s.hasPreservedSegment = false 602 s.lastSnapSrc = null 603 } 604 } 605 sinkWrite(s.out, cb, 0, s.carryLen) 606 sinkWrite(s.out, chunk, 0, tailEnd) 607 } 608 s.bufFileOff += s.carryLen + tailEnd 609 s.carryLen = 0 610 return tailEnd 611} 612 613// Strip attr-snaps, truncate on boundaries. Kept lines write as runs. 614function scanChunkLines( 615 s: LoadState, 616 buf: Buffer, 617 boundaryMarker: Buffer, 618): { lastSnapStart: number; lastSnapEnd: number; trailStart: number } { 619 let boundaryAt = buf.indexOf(boundaryMarker) 620 let runStart = 0 621 let lineStart = 0 622 let lastSnapStart = -1 623 let lastSnapEnd = -1 624 let nl = buf.indexOf(LF) 625 while (nl !== -1) { 626 const lineEnd = nl + 1 627 if (boundaryAt !== -1 && boundaryAt < lineStart) { 628 boundaryAt = buf.indexOf(boundaryMarker, lineStart) 629 } 630 if (hasPrefix(buf, ATTR_SNAP_PREFIX, lineStart, lineEnd)) { 631 sinkWrite(s.out, buf, runStart, lineStart) 632 lastSnapStart = lineStart 633 lastSnapEnd = lineEnd 634 runStart = lineEnd 635 } else if ( 636 boundaryAt >= lineStart && 637 boundaryAt < Math.min(lineStart + BOUNDARY_SEARCH_BOUND, lineEnd) 638 ) { 639 const hit = parseBoundaryLine(buf.toString('utf-8', lineStart, nl)) 640 if (hit?.hasPreservedSegment) { 641 s.hasPreservedSegment = true // don't truncate; preserved msgs already in output 642 } else if (hit) { 643 s.out.len = 0 644 s.boundaryStartOffset = s.bufFileOff + lineStart 645 s.hasPreservedSegment = false 646 s.lastSnapSrc = null 647 lastSnapStart = -1 648 s.straddleSnapCarryLen = 0 649 runStart = lineStart 650 } 651 boundaryAt = buf.indexOf( 652 boundaryMarker, 653 boundaryAt + boundaryMarker.length, 654 ) 655 } 656 lineStart = lineEnd 657 nl = buf.indexOf(LF, lineStart) 658 } 659 sinkWrite(s.out, buf, runStart, lineStart) 660 return { lastSnapStart, lastSnapEnd, trailStart: lineStart } 661} 662 663// In-buf snap wins over straddle (later in file). carryBuf still valid here. 664function captureSnap( 665 s: LoadState, 666 buf: Buffer, 667 chunk: Buffer, 668 lastSnapStart: number, 669 lastSnapEnd: number, 670): void { 671 if (lastSnapStart !== -1) { 672 s.lastSnapLen = lastSnapEnd - lastSnapStart 673 if (s.lastSnapBuf === undefined || s.lastSnapLen > s.lastSnapBuf.length) { 674 s.lastSnapBuf = Buffer.allocUnsafe(s.lastSnapLen) 675 } 676 buf.copy(s.lastSnapBuf, 0, lastSnapStart, lastSnapEnd) 677 s.lastSnapSrc = s.lastSnapBuf 678 } else if (s.straddleSnapCarryLen > 0) { 679 s.lastSnapLen = s.straddleSnapCarryLen + s.straddleSnapTailEnd 680 if (s.lastSnapBuf === undefined || s.lastSnapLen > s.lastSnapBuf.length) { 681 s.lastSnapBuf = Buffer.allocUnsafe(s.lastSnapLen) 682 } 683 s.carryBuf!.copy(s.lastSnapBuf, 0, 0, s.straddleSnapCarryLen) 684 chunk.copy(s.lastSnapBuf, s.straddleSnapCarryLen, 0, s.straddleSnapTailEnd) 685 s.lastSnapSrc = s.lastSnapBuf 686 } 687} 688 689function captureCarry(s: LoadState, buf: Buffer, trailStart: number): void { 690 s.carryLen = buf.length - trailStart 691 if (s.carryLen > 0) { 692 if (s.carryBuf === undefined || s.carryLen > s.carryBuf.length) { 693 s.carryBuf = Buffer.allocUnsafe(s.carryLen) 694 } 695 buf.copy(s.carryBuf, 0, trailStart, buf.length) 696 } 697} 698 699function finalizeOutput(s: LoadState): void { 700 if (s.carryLen > 0) { 701 const cb = s.carryBuf! 702 if (hasPrefix(cb, ATTR_SNAP_PREFIX, 0, s.carryLen)) { 703 s.lastSnapSrc = cb 704 s.lastSnapLen = s.carryLen 705 } else { 706 sinkWrite(s.out, cb, 0, s.carryLen) 707 } 708 } 709 if (s.lastSnapSrc) { 710 if (s.out.len > 0 && s.out.buf[s.out.len - 1] !== LF) { 711 sinkWrite(s.out, LF_BYTE, 0, 1) 712 } 713 sinkWrite(s.out, s.lastSnapSrc, 0, s.lastSnapLen) 714 } 715} 716 717export async function readTranscriptForLoad( 718 filePath: string, 719 fileSize: number, 720): Promise<{ 721 boundaryStartOffset: number 722 postBoundaryBuf: Buffer 723 hasPreservedSegment: boolean 724}> { 725 const boundaryMarker = compactBoundaryMarker() 726 const CHUNK_SIZE = TRANSCRIPT_READ_CHUNK_SIZE 727 728 const s: LoadState = { 729 out: { 730 // Gated callers enter with fileSize > 5MB, so min(fileSize, 8MB) lands 731 // in [5, 8]MB; large boundaryless sessions (24-31MB output) take 2 732 // grows. Ungated callers (attribution.ts) pass small files too — the 733 // min just right-sizes the initial buf, no grows. 734 buf: Buffer.allocUnsafe(Math.min(fileSize, 8 * 1024 * 1024)), 735 len: 0, 736 // +1: finalizeOutput may insert one LF between a non-LF-terminated 737 // carry and the reordered last attr-snap (crash-truncated file). 738 cap: fileSize + 1, 739 }, 740 boundaryStartOffset: 0, 741 hasPreservedSegment: false, 742 lastSnapSrc: null, 743 lastSnapLen: 0, 744 lastSnapBuf: undefined, 745 bufFileOff: 0, 746 carryLen: 0, 747 carryBuf: undefined, 748 straddleSnapCarryLen: 0, 749 straddleSnapTailEnd: 0, 750 } 751 752 const chunk = Buffer.allocUnsafe(CHUNK_SIZE) 753 const fd = await fsOpen(filePath, 'r') 754 try { 755 let filePos = 0 756 while (filePos < fileSize) { 757 const { bytesRead } = await fd.read( 758 chunk, 759 0, 760 Math.min(CHUNK_SIZE, fileSize - filePos), 761 filePos, 762 ) 763 if (bytesRead === 0) break 764 filePos += bytesRead 765 766 const chunkOff = processStraddle(s, chunk, bytesRead) 767 768 let buf: Buffer 769 if (s.carryLen > 0) { 770 const bufLen = s.carryLen + (bytesRead - chunkOff) 771 buf = Buffer.allocUnsafe(bufLen) 772 s.carryBuf!.copy(buf, 0, 0, s.carryLen) 773 chunk.copy(buf, s.carryLen, chunkOff, bytesRead) 774 } else { 775 buf = chunk.subarray(chunkOff, bytesRead) 776 } 777 778 const r = scanChunkLines(s, buf, boundaryMarker) 779 captureSnap(s, buf, chunk, r.lastSnapStart, r.lastSnapEnd) 780 captureCarry(s, buf, r.trailStart) 781 s.bufFileOff += r.trailStart 782 } 783 finalizeOutput(s) 784 } finally { 785 await fd.close() 786 } 787 788 return { 789 boundaryStartOffset: s.boundaryStartOffset, 790 postBoundaryBuf: s.out.buf.subarray(0, s.out.len), 791 hasPreservedSegment: s.hasPreservedSegment, 792 } 793}