source dump of claude code
at main 961 lines 30 kB view raw
1import { createHash, randomUUID, type UUID } from 'crypto' 2import { stat } from 'fs/promises' 3import { isAbsolute, join, relative, sep } from 'path' 4import { getOriginalCwd, getSessionId } from '../bootstrap/state.js' 5import type { 6 AttributionSnapshotMessage, 7 FileAttributionState, 8} from '../types/logs.js' 9import { getCwd } from './cwd.js' 10import { logForDebugging } from './debug.js' 11import { execFileNoThrowWithCwd } from './execFileNoThrow.js' 12import { getFsImplementation } from './fsOperations.js' 13import { isGeneratedFile } from './generatedFiles.js' 14import { getRemoteUrlForDir, resolveGitDir } from './git/gitFilesystem.js' 15import { findGitRoot, gitExe } from './git.js' 16import { logError } from './log.js' 17import { getCanonicalName, type ModelName } from './model/model.js' 18import { sequential } from './sequential.js' 19 20/** 21 * List of repos where internal model names are allowed in trailers. 22 * Includes both SSH and HTTPS URL formats. 23 * 24 * NOTE: This is intentionally a repo allowlist, not an org-wide check. 25 * The anthropics and anthropic-experimental orgs contain PUBLIC repos 26 * (e.g. anthropics/claude-code, anthropic-experimental/sandbox-runtime). 27 * Undercover mode must stay ON in those to prevent codename leaks. 28 * Only add repos here that are confirmed PRIVATE. 29 */ 30const INTERNAL_MODEL_REPOS = [ 31 'github.com:anthropics/claude-cli-internal', 32 'github.com/anthropics/claude-cli-internal', 33 'github.com:anthropics/anthropic', 34 'github.com/anthropics/anthropic', 35 'github.com:anthropics/apps', 36 'github.com/anthropics/apps', 37 'github.com:anthropics/casino', 38 'github.com/anthropics/casino', 39 'github.com:anthropics/dbt', 40 'github.com/anthropics/dbt', 41 'github.com:anthropics/dotfiles', 42 'github.com/anthropics/dotfiles', 43 'github.com:anthropics/terraform-config', 44 'github.com/anthropics/terraform-config', 45 'github.com:anthropics/hex-export', 46 'github.com/anthropics/hex-export', 47 'github.com:anthropics/feedback-v2', 48 'github.com/anthropics/feedback-v2', 49 'github.com:anthropics/labs', 50 'github.com/anthropics/labs', 51 'github.com:anthropics/argo-rollouts', 52 'github.com/anthropics/argo-rollouts', 53 'github.com:anthropics/starling-configs', 54 'github.com/anthropics/starling-configs', 55 'github.com:anthropics/ts-tools', 56 'github.com/anthropics/ts-tools', 57 'github.com:anthropics/ts-capsules', 58 'github.com/anthropics/ts-capsules', 59 'github.com:anthropics/feldspar-testing', 60 'github.com/anthropics/feldspar-testing', 61 'github.com:anthropics/trellis', 62 'github.com/anthropics/trellis', 63 'github.com:anthropics/claude-for-hiring', 64 'github.com/anthropics/claude-for-hiring', 65 'github.com:anthropics/forge-web', 66 'github.com/anthropics/forge-web', 67 'github.com:anthropics/infra-manifests', 68 'github.com/anthropics/infra-manifests', 69 'github.com:anthropics/mycro_manifests', 70 'github.com/anthropics/mycro_manifests', 71 'github.com:anthropics/mycro_configs', 72 'github.com/anthropics/mycro_configs', 73 'github.com:anthropics/mobile-apps', 74 'github.com/anthropics/mobile-apps', 75] 76 77/** 78 * Get the repo root for attribution operations. 79 * Uses getCwd() which respects agent worktree overrides (AsyncLocalStorage), 80 * then resolves to git root to handle `cd subdir` case. 81 * Falls back to getOriginalCwd() if git root can't be determined. 82 */ 83export function getAttributionRepoRoot(): string { 84 const cwd = getCwd() 85 return findGitRoot(cwd) ?? getOriginalCwd() 86} 87 88// Cache for repo classification result. Primed once per process. 89// 'internal' = remote matches INTERNAL_MODEL_REPOS allowlist 90// 'external' = has a remote, not on allowlist (public/open-source repo) 91// 'none' = no remote URL (not a git repo, or no remote configured) 92let repoClassCache: 'internal' | 'external' | 'none' | null = null 93 94/** 95 * Synchronously return the cached repo classification. 96 * Returns null if the async check hasn't run yet. 97 */ 98export function getRepoClassCached(): 'internal' | 'external' | 'none' | null { 99 return repoClassCache 100} 101 102/** 103 * Synchronously return the cached result of isInternalModelRepo(). 104 * Returns false if the check hasn't run yet (safe default: don't leak). 105 */ 106export function isInternalModelRepoCached(): boolean { 107 return repoClassCache === 'internal' 108} 109 110/** 111 * Check if the current repo is in the allowlist for internal model names. 112 * Memoized - only checks once per process. 113 */ 114export const isInternalModelRepo = sequential(async (): Promise<boolean> => { 115 if (repoClassCache !== null) { 116 return repoClassCache === 'internal' 117 } 118 119 const cwd = getAttributionRepoRoot() 120 const remoteUrl = await getRemoteUrlForDir(cwd) 121 122 if (!remoteUrl) { 123 repoClassCache = 'none' 124 return false 125 } 126 const isInternal = INTERNAL_MODEL_REPOS.some(repo => remoteUrl.includes(repo)) 127 repoClassCache = isInternal ? 'internal' : 'external' 128 return isInternal 129}) 130 131/** 132 * Sanitize a surface key to use public model names. 133 * Converts internal model variants to their public equivalents. 134 */ 135export function sanitizeSurfaceKey(surfaceKey: string): string { 136 // Split surface key into surface and model parts (e.g., "cli/opus-4-5-fast" -> ["cli", "opus-4-5-fast"]) 137 const slashIndex = surfaceKey.lastIndexOf('/') 138 if (slashIndex === -1) { 139 return surfaceKey 140 } 141 142 const surface = surfaceKey.slice(0, slashIndex) 143 const model = surfaceKey.slice(slashIndex + 1) 144 const sanitizedModel = sanitizeModelName(model) 145 146 return `${surface}/${sanitizedModel}` 147} 148 149// @[MODEL LAUNCH]: Add a mapping for the new model ID so git commit trailers show the public name. 150/** 151 * Sanitize a model name to its public equivalent. 152 * Maps internal variants to their public names based on model family. 153 */ 154export function sanitizeModelName(shortName: string): string { 155 // Map internal variants to public equivalents based on model family 156 if (shortName.includes('opus-4-6')) return 'claude-opus-4-6' 157 if (shortName.includes('opus-4-5')) return 'claude-opus-4-5' 158 if (shortName.includes('opus-4-1')) return 'claude-opus-4-1' 159 if (shortName.includes('opus-4')) return 'claude-opus-4' 160 if (shortName.includes('sonnet-4-6')) return 'claude-sonnet-4-6' 161 if (shortName.includes('sonnet-4-5')) return 'claude-sonnet-4-5' 162 if (shortName.includes('sonnet-4')) return 'claude-sonnet-4' 163 if (shortName.includes('sonnet-3-7')) return 'claude-sonnet-3-7' 164 if (shortName.includes('haiku-4-5')) return 'claude-haiku-4-5' 165 if (shortName.includes('haiku-3-5')) return 'claude-haiku-3-5' 166 // Unknown models get a generic name 167 return 'claude' 168} 169 170/** 171 * Attribution state for tracking Claude's contributions to files. 172 */ 173export type AttributionState = { 174 // File states keyed by relative path (from cwd) 175 fileStates: Map<string, FileAttributionState> 176 // Session baseline states for net change calculation 177 sessionBaselines: Map<string, { contentHash: string; mtime: number }> 178 // Surface from which edits were made 179 surface: string 180 // HEAD SHA at session start (for detecting external commits) 181 startingHeadSha: string | null 182 // Total prompts in session (for steer count calculation) 183 promptCount: number 184 // Prompts at last commit (to calculate steers for current commit) 185 promptCountAtLastCommit: number 186 // Permission prompt tracking 187 permissionPromptCount: number 188 permissionPromptCountAtLastCommit: number 189 // ESC press tracking (user cancelled permission prompt) 190 escapeCount: number 191 escapeCountAtLastCommit: number 192} 193 194/** 195 * Summary of Claude's contribution for a commit. 196 */ 197export type AttributionSummary = { 198 claudePercent: number 199 claudeChars: number 200 humanChars: number 201 surfaces: string[] 202} 203 204/** 205 * Per-file attribution details for git notes. 206 */ 207export type FileAttribution = { 208 claudeChars: number 209 humanChars: number 210 percent: number 211 surface: string 212} 213 214/** 215 * Full attribution data for git notes JSON. 216 */ 217export type AttributionData = { 218 version: 1 219 summary: AttributionSummary 220 files: Record<string, FileAttribution> 221 surfaceBreakdown: Record<string, { claudeChars: number; percent: number }> 222 excludedGenerated: string[] 223 sessions: string[] 224} 225 226/** 227 * Get the current client surface from environment. 228 */ 229export function getClientSurface(): string { 230 return process.env.CLAUDE_CODE_ENTRYPOINT ?? 'cli' 231} 232 233/** 234 * Build a surface key that includes the model name. 235 * Format: "surface/model" (e.g., "cli/claude-sonnet") 236 */ 237export function buildSurfaceKey(surface: string, model: ModelName): string { 238 return `${surface}/${getCanonicalName(model)}` 239} 240 241/** 242 * Compute SHA-256 hash of content. 243 */ 244export function computeContentHash(content: string): string { 245 return createHash('sha256').update(content).digest('hex') 246} 247 248/** 249 * Normalize file path to relative path from cwd for consistent tracking. 250 * Resolves symlinks to handle /tmp vs /private/tmp on macOS. 251 */ 252export function normalizeFilePath(filePath: string): string { 253 const fs = getFsImplementation() 254 const cwd = getAttributionRepoRoot() 255 256 if (!isAbsolute(filePath)) { 257 return filePath 258 } 259 260 // Resolve symlinks in both paths for consistent comparison 261 // (e.g., /tmp -> /private/tmp on macOS) 262 let resolvedPath = filePath 263 let resolvedCwd = cwd 264 265 try { 266 resolvedPath = fs.realpathSync(filePath) 267 } catch { 268 // File may not exist yet, use original path 269 } 270 271 try { 272 resolvedCwd = fs.realpathSync(cwd) 273 } catch { 274 // Keep original cwd 275 } 276 277 if ( 278 resolvedPath.startsWith(resolvedCwd + sep) || 279 resolvedPath === resolvedCwd 280 ) { 281 // Normalize to forward slashes so keys match git diff output on Windows 282 return relative(resolvedCwd, resolvedPath).replaceAll(sep, '/') 283 } 284 285 // Fallback: try original comparison 286 if (filePath.startsWith(cwd + sep) || filePath === cwd) { 287 return relative(cwd, filePath).replaceAll(sep, '/') 288 } 289 290 return filePath 291} 292 293/** 294 * Expand a relative path to absolute path. 295 */ 296export function expandFilePath(filePath: string): string { 297 if (isAbsolute(filePath)) { 298 return filePath 299 } 300 return join(getAttributionRepoRoot(), filePath) 301} 302 303/** 304 * Create an empty attribution state for a new session. 305 */ 306export function createEmptyAttributionState(): AttributionState { 307 return { 308 fileStates: new Map(), 309 sessionBaselines: new Map(), 310 surface: getClientSurface(), 311 startingHeadSha: null, 312 promptCount: 0, 313 promptCountAtLastCommit: 0, 314 permissionPromptCount: 0, 315 permissionPromptCountAtLastCommit: 0, 316 escapeCount: 0, 317 escapeCountAtLastCommit: 0, 318 } 319} 320 321/** 322 * Compute the character contribution for a file modification. 323 * Returns the FileAttributionState to store, or null if tracking failed. 324 */ 325function computeFileModificationState( 326 existingFileStates: Map<string, FileAttributionState>, 327 filePath: string, 328 oldContent: string, 329 newContent: string, 330 mtime: number, 331): FileAttributionState | null { 332 const normalizedPath = normalizeFilePath(filePath) 333 334 try { 335 // Calculate Claude's character contribution 336 let claudeContribution: number 337 338 if (oldContent === '' || newContent === '') { 339 // New file or full deletion - contribution is the content length 340 claudeContribution = 341 oldContent === '' ? newContent.length : oldContent.length 342 } else { 343 // Find actual changed region via common prefix/suffix matching. 344 // This correctly handles same-length replacements (e.g., "Esc" → "esc") 345 // where Math.abs(newLen - oldLen) would be 0. 346 const minLen = Math.min(oldContent.length, newContent.length) 347 let prefixEnd = 0 348 while ( 349 prefixEnd < minLen && 350 oldContent[prefixEnd] === newContent[prefixEnd] 351 ) { 352 prefixEnd++ 353 } 354 let suffixLen = 0 355 while ( 356 suffixLen < minLen - prefixEnd && 357 oldContent[oldContent.length - 1 - suffixLen] === 358 newContent[newContent.length - 1 - suffixLen] 359 ) { 360 suffixLen++ 361 } 362 const oldChangedLen = oldContent.length - prefixEnd - suffixLen 363 const newChangedLen = newContent.length - prefixEnd - suffixLen 364 claudeContribution = Math.max(oldChangedLen, newChangedLen) 365 } 366 367 // Get current file state if it exists 368 const existingState = existingFileStates.get(normalizedPath) 369 const existingContribution = existingState?.claudeContribution ?? 0 370 371 return { 372 contentHash: computeContentHash(newContent), 373 claudeContribution: existingContribution + claudeContribution, 374 mtime, 375 } 376 } catch (error) { 377 logError(error as Error) 378 return null 379 } 380} 381 382/** 383 * Get a file's modification time (mtimeMs), falling back to Date.now() if 384 * the file doesn't exist. This is async so it can be precomputed before 385 * entering a sync setAppState callback. 386 */ 387export async function getFileMtime(filePath: string): Promise<number> { 388 const normalizedPath = normalizeFilePath(filePath) 389 const absPath = expandFilePath(normalizedPath) 390 try { 391 const stats = await stat(absPath) 392 return stats.mtimeMs 393 } catch { 394 return Date.now() 395 } 396} 397 398/** 399 * Track a file modification by Claude. 400 * Called after Edit/Write tool completes. 401 */ 402export function trackFileModification( 403 state: AttributionState, 404 filePath: string, 405 oldContent: string, 406 newContent: string, 407 _userModified: boolean, 408 mtime: number = Date.now(), 409): AttributionState { 410 const normalizedPath = normalizeFilePath(filePath) 411 const newFileState = computeFileModificationState( 412 state.fileStates, 413 filePath, 414 oldContent, 415 newContent, 416 mtime, 417 ) 418 if (!newFileState) { 419 return state 420 } 421 422 const newFileStates = new Map(state.fileStates) 423 newFileStates.set(normalizedPath, newFileState) 424 425 logForDebugging( 426 `Attribution: Tracked ${newFileState.claudeContribution} chars for ${normalizedPath}`, 427 ) 428 429 return { 430 ...state, 431 fileStates: newFileStates, 432 } 433} 434 435/** 436 * Track a file creation by Claude (e.g., via bash command). 437 * Used when Claude creates a new file through a non-tracked mechanism. 438 */ 439export function trackFileCreation( 440 state: AttributionState, 441 filePath: string, 442 content: string, 443 mtime: number = Date.now(), 444): AttributionState { 445 // A creation is simply a modification from empty to the new content 446 return trackFileModification(state, filePath, '', content, false, mtime) 447} 448 449/** 450 * Track a file deletion by Claude (e.g., via bash rm command). 451 * Used when Claude deletes a file through a non-tracked mechanism. 452 */ 453export function trackFileDeletion( 454 state: AttributionState, 455 filePath: string, 456 oldContent: string, 457): AttributionState { 458 const normalizedPath = normalizeFilePath(filePath) 459 const existingState = state.fileStates.get(normalizedPath) 460 const existingContribution = existingState?.claudeContribution ?? 0 461 const deletedChars = oldContent.length 462 463 const newFileState: FileAttributionState = { 464 contentHash: '', // Empty hash for deleted files 465 claudeContribution: existingContribution + deletedChars, 466 mtime: Date.now(), 467 } 468 469 const newFileStates = new Map(state.fileStates) 470 newFileStates.set(normalizedPath, newFileState) 471 472 logForDebugging( 473 `Attribution: Tracked deletion of ${normalizedPath} (${deletedChars} chars removed, total contribution: ${newFileState.claudeContribution})`, 474 ) 475 476 return { 477 ...state, 478 fileStates: newFileStates, 479 } 480} 481 482// -- 483 484/** 485 * Track multiple file changes in bulk, mutating a single Map copy. 486 * This avoids the O(n²) cost of copying the Map per file when processing 487 * large git diffs (e.g., jj operations that touch hundreds of thousands of files). 488 */ 489export function trackBulkFileChanges( 490 state: AttributionState, 491 changes: ReadonlyArray<{ 492 path: string 493 type: 'modified' | 'created' | 'deleted' 494 oldContent: string 495 newContent: string 496 mtime?: number 497 }>, 498): AttributionState { 499 // Create ONE copy of the Map, then mutate it for each file 500 const newFileStates = new Map(state.fileStates) 501 502 for (const change of changes) { 503 const mtime = change.mtime ?? Date.now() 504 if (change.type === 'deleted') { 505 const normalizedPath = normalizeFilePath(change.path) 506 const existingState = newFileStates.get(normalizedPath) 507 const existingContribution = existingState?.claudeContribution ?? 0 508 const deletedChars = change.oldContent.length 509 510 newFileStates.set(normalizedPath, { 511 contentHash: '', 512 claudeContribution: existingContribution + deletedChars, 513 mtime, 514 }) 515 516 logForDebugging( 517 `Attribution: Tracked deletion of ${normalizedPath} (${deletedChars} chars removed, total contribution: ${existingContribution + deletedChars})`, 518 ) 519 } else { 520 const newFileState = computeFileModificationState( 521 newFileStates, 522 change.path, 523 change.oldContent, 524 change.newContent, 525 mtime, 526 ) 527 if (newFileState) { 528 const normalizedPath = normalizeFilePath(change.path) 529 newFileStates.set(normalizedPath, newFileState) 530 531 logForDebugging( 532 `Attribution: Tracked ${newFileState.claudeContribution} chars for ${normalizedPath}`, 533 ) 534 } 535 } 536 } 537 538 return { 539 ...state, 540 fileStates: newFileStates, 541 } 542} 543 544/** 545 * Calculate final attribution for staged files. 546 * Compares session baseline to committed state. 547 */ 548export async function calculateCommitAttribution( 549 states: AttributionState[], 550 stagedFiles: string[], 551): Promise<AttributionData> { 552 const cwd = getAttributionRepoRoot() 553 const sessionId = getSessionId() 554 555 const files: Record<string, FileAttribution> = {} 556 const excludedGenerated: string[] = [] 557 const surfaces = new Set<string>() 558 const surfaceCounts: Record<string, number> = {} 559 560 let totalClaudeChars = 0 561 let totalHumanChars = 0 562 563 // Merge file states from all sessions 564 const mergedFileStates = new Map<string, FileAttributionState>() 565 const mergedBaselines = new Map< 566 string, 567 { contentHash: string; mtime: number } 568 >() 569 570 for (const state of states) { 571 surfaces.add(state.surface) 572 573 // Merge baselines (earliest baseline wins) 574 // Handle both Map and plain object (in case of serialization) 575 const baselines = 576 state.sessionBaselines instanceof Map 577 ? state.sessionBaselines 578 : new Map( 579 Object.entries( 580 (state.sessionBaselines ?? {}) as Record< 581 string, 582 { contentHash: string; mtime: number } 583 >, 584 ), 585 ) 586 for (const [path, baseline] of baselines) { 587 if (!mergedBaselines.has(path)) { 588 mergedBaselines.set(path, baseline) 589 } 590 } 591 592 // Merge file states (accumulate contributions) 593 // Handle both Map and plain object (in case of serialization) 594 const fileStates = 595 state.fileStates instanceof Map 596 ? state.fileStates 597 : new Map( 598 Object.entries( 599 (state.fileStates ?? {}) as Record<string, FileAttributionState>, 600 ), 601 ) 602 for (const [path, fileState] of fileStates) { 603 const existing = mergedFileStates.get(path) 604 if (existing) { 605 mergedFileStates.set(path, { 606 ...fileState, 607 claudeContribution: 608 existing.claudeContribution + fileState.claudeContribution, 609 }) 610 } else { 611 mergedFileStates.set(path, fileState) 612 } 613 } 614 } 615 616 // Process files in parallel 617 const fileResults = await Promise.all( 618 stagedFiles.map(async file => { 619 // Skip generated files 620 if (isGeneratedFile(file)) { 621 return { type: 'generated' as const, file } 622 } 623 624 const absPath = join(cwd, file) 625 const fileState = mergedFileStates.get(file) 626 const baseline = mergedBaselines.get(file) 627 628 // Get the surface for this file 629 const fileSurface = states[0]!.surface 630 631 let claudeChars = 0 632 let humanChars = 0 633 634 // Check if file was deleted 635 const deleted = await isFileDeleted(file) 636 637 if (deleted) { 638 // File was deleted 639 if (fileState) { 640 // Claude deleted this file (tracked deletion) 641 claudeChars = fileState.claudeContribution 642 humanChars = 0 643 } else { 644 // Human deleted this file (untracked deletion) 645 // Use diff size to get the actual change size 646 const diffSize = await getGitDiffSize(file) 647 humanChars = diffSize > 0 ? diffSize : 100 // Minimum attribution for a deletion 648 } 649 } else { 650 try { 651 // Only need file size, not content - stat() avoids loading GB-scale 652 // build artifacts into memory when they appear in the working tree. 653 // stats.size (bytes) is an adequate proxy for char count here. 654 const stats = await stat(absPath) 655 656 if (fileState) { 657 // We have tracked modifications for this file 658 claudeChars = fileState.claudeContribution 659 humanChars = 0 660 } else if (baseline) { 661 // File was modified but not tracked - human modification 662 const diffSize = await getGitDiffSize(file) 663 humanChars = diffSize > 0 ? diffSize : stats.size 664 } else { 665 // New file not created by Claude 666 humanChars = stats.size 667 } 668 } catch { 669 // File doesn't exist or stat failed - skip it 670 return null 671 } 672 } 673 674 // Ensure non-negative values 675 claudeChars = Math.max(0, claudeChars) 676 humanChars = Math.max(0, humanChars) 677 678 const total = claudeChars + humanChars 679 const percent = total > 0 ? Math.round((claudeChars / total) * 100) : 0 680 681 return { 682 type: 'file' as const, 683 file, 684 claudeChars, 685 humanChars, 686 percent, 687 surface: fileSurface, 688 } 689 }), 690 ) 691 692 // Aggregate results 693 for (const result of fileResults) { 694 if (!result) continue 695 696 if (result.type === 'generated') { 697 excludedGenerated.push(result.file) 698 continue 699 } 700 701 files[result.file] = { 702 claudeChars: result.claudeChars, 703 humanChars: result.humanChars, 704 percent: result.percent, 705 surface: result.surface, 706 } 707 708 totalClaudeChars += result.claudeChars 709 totalHumanChars += result.humanChars 710 711 surfaceCounts[result.surface] = 712 (surfaceCounts[result.surface] ?? 0) + result.claudeChars 713 } 714 715 const totalChars = totalClaudeChars + totalHumanChars 716 const claudePercent = 717 totalChars > 0 ? Math.round((totalClaudeChars / totalChars) * 100) : 0 718 719 // Calculate surface breakdown (percentage of total content per surface) 720 const surfaceBreakdown: Record< 721 string, 722 { claudeChars: number; percent: number } 723 > = {} 724 for (const [surface, chars] of Object.entries(surfaceCounts)) { 725 // Calculate what percentage of TOTAL content this surface contributed 726 const percent = totalChars > 0 ? Math.round((chars / totalChars) * 100) : 0 727 surfaceBreakdown[surface] = { claudeChars: chars, percent } 728 } 729 730 return { 731 version: 1, 732 summary: { 733 claudePercent, 734 claudeChars: totalClaudeChars, 735 humanChars: totalHumanChars, 736 surfaces: Array.from(surfaces), 737 }, 738 files, 739 surfaceBreakdown, 740 excludedGenerated, 741 sessions: [sessionId], 742 } 743} 744 745/** 746 * Get the size of changes for a file from git diff. 747 * Returns the number of characters added/removed (absolute difference). 748 * For new files, returns the total file size. 749 * For deleted files, returns the size of the deleted content. 750 */ 751export async function getGitDiffSize(filePath: string): Promise<number> { 752 const cwd = getAttributionRepoRoot() 753 754 try { 755 // Use git diff --stat to get a summary of changes 756 const result = await execFileNoThrowWithCwd( 757 gitExe(), 758 ['diff', '--cached', '--stat', '--', filePath], 759 { cwd, timeout: 5000 }, 760 ) 761 762 if (result.code !== 0 || !result.stdout) { 763 return 0 764 } 765 766 // Parse the stat output to extract additions and deletions 767 // Format: " file | 5 ++---" or " file | 10 +" 768 const lines = result.stdout.split('\n').filter(Boolean) 769 let totalChanges = 0 770 771 for (const line of lines) { 772 // Skip the summary line (e.g., "1 file changed, 3 insertions(+), 2 deletions(-)") 773 if (line.includes('file changed') || line.includes('files changed')) { 774 const insertMatch = line.match(/(\d+) insertions?/) 775 const deleteMatch = line.match(/(\d+) deletions?/) 776 777 // Use line-based changes and approximate chars per line (~40 chars average) 778 const insertions = insertMatch ? parseInt(insertMatch[1]!, 10) : 0 779 const deletions = deleteMatch ? parseInt(deleteMatch[1]!, 10) : 0 780 totalChanges += (insertions + deletions) * 40 781 } 782 } 783 784 return totalChanges 785 } catch { 786 return 0 787 } 788} 789 790/** 791 * Check if a file was deleted in the staged changes. 792 */ 793export async function isFileDeleted(filePath: string): Promise<boolean> { 794 const cwd = getAttributionRepoRoot() 795 796 try { 797 const result = await execFileNoThrowWithCwd( 798 gitExe(), 799 ['diff', '--cached', '--name-status', '--', filePath], 800 { cwd, timeout: 5000 }, 801 ) 802 803 if (result.code === 0 && result.stdout) { 804 // Format: "D\tfilename" for deleted files 805 return result.stdout.trim().startsWith('D\t') 806 } 807 } catch { 808 // Ignore errors 809 } 810 811 return false 812} 813 814/** 815 * Get staged files from git. 816 */ 817export async function getStagedFiles(): Promise<string[]> { 818 const cwd = getAttributionRepoRoot() 819 820 try { 821 const result = await execFileNoThrowWithCwd( 822 gitExe(), 823 ['diff', '--cached', '--name-only'], 824 { cwd, timeout: 5000 }, 825 ) 826 827 if (result.code === 0 && result.stdout) { 828 return result.stdout.split('\n').filter(Boolean) 829 } 830 } catch (error) { 831 logError(error as Error) 832 } 833 834 return [] 835} 836 837// formatAttributionTrailer moved to attributionTrailer.ts for tree-shaking 838// (contains excluded strings that should not be in external builds) 839 840/** 841 * Check if we're in a transient git state (rebase, merge, cherry-pick). 842 */ 843export async function isGitTransientState(): Promise<boolean> { 844 const gitDir = await resolveGitDir(getAttributionRepoRoot()) 845 if (!gitDir) return false 846 847 const indicators = [ 848 'rebase-merge', 849 'rebase-apply', 850 'MERGE_HEAD', 851 'CHERRY_PICK_HEAD', 852 'BISECT_LOG', 853 ] 854 855 const results = await Promise.all( 856 indicators.map(async indicator => { 857 try { 858 await stat(join(gitDir, indicator)) 859 return true 860 } catch { 861 return false 862 } 863 }), 864 ) 865 866 return results.some(exists => exists) 867} 868 869/** 870 * Convert attribution state to snapshot message for persistence. 871 */ 872export function stateToSnapshotMessage( 873 state: AttributionState, 874 messageId: UUID, 875): AttributionSnapshotMessage { 876 const fileStates: Record<string, FileAttributionState> = {} 877 878 for (const [path, fileState] of state.fileStates) { 879 fileStates[path] = fileState 880 } 881 882 return { 883 type: 'attribution-snapshot', 884 messageId, 885 surface: state.surface, 886 fileStates, 887 promptCount: state.promptCount, 888 promptCountAtLastCommit: state.promptCountAtLastCommit, 889 permissionPromptCount: state.permissionPromptCount, 890 permissionPromptCountAtLastCommit: state.permissionPromptCountAtLastCommit, 891 escapeCount: state.escapeCount, 892 escapeCountAtLastCommit: state.escapeCountAtLastCommit, 893 } 894} 895 896/** 897 * Restore attribution state from snapshot messages. 898 */ 899export function restoreAttributionStateFromSnapshots( 900 snapshots: AttributionSnapshotMessage[], 901): AttributionState { 902 const state = createEmptyAttributionState() 903 904 // Snapshots are full-state dumps (see stateToSnapshotMessage), not deltas. 905 // The last snapshot has the most recent count for every path — fileStates 906 // never shrinks. Iterating and SUMMING counts across snapshots causes 907 // quadratic growth on restore (837 snapshots × 280 files → 1.15 quadrillion 908 // "chars" tracked for a 5KB file over a 5-day session). 909 const lastSnapshot = snapshots[snapshots.length - 1] 910 if (!lastSnapshot) { 911 return state 912 } 913 914 state.surface = lastSnapshot.surface 915 for (const [path, fileState] of Object.entries(lastSnapshot.fileStates)) { 916 state.fileStates.set(path, fileState) 917 } 918 919 // Restore prompt counts from the last snapshot (most recent state) 920 state.promptCount = lastSnapshot.promptCount ?? 0 921 state.promptCountAtLastCommit = lastSnapshot.promptCountAtLastCommit ?? 0 922 state.permissionPromptCount = lastSnapshot.permissionPromptCount ?? 0 923 state.permissionPromptCountAtLastCommit = 924 lastSnapshot.permissionPromptCountAtLastCommit ?? 0 925 state.escapeCount = lastSnapshot.escapeCount ?? 0 926 state.escapeCountAtLastCommit = lastSnapshot.escapeCountAtLastCommit ?? 0 927 928 return state 929} 930 931/** 932 * Restore attribution state from log snapshots on session resume. 933 */ 934export function attributionRestoreStateFromLog( 935 attributionSnapshots: AttributionSnapshotMessage[], 936 onUpdateState: (newState: AttributionState) => void, 937): void { 938 const state = restoreAttributionStateFromSnapshots(attributionSnapshots) 939 onUpdateState(state) 940} 941 942/** 943 * Increment promptCount and save an attribution snapshot. 944 * Used to persist the prompt count across compaction. 945 * 946 * @param attribution - Current attribution state 947 * @param saveSnapshot - Function to save the snapshot (allows async handling by caller) 948 * @returns New attribution state with incremented promptCount 949 */ 950export function incrementPromptCount( 951 attribution: AttributionState, 952 saveSnapshot: (snapshot: AttributionSnapshotMessage) => void, 953): AttributionState { 954 const newAttribution = { 955 ...attribution, 956 promptCount: attribution.promptCount + 1, 957 } 958 const snapshot = stateToSnapshotMessage(newAttribution, randomUUID()) 959 saveSnapshot(snapshot) 960 return newAttribution 961}