utils/commitAttribution.ts at main · oppi.li/claude-code

oppi.li / claude-code
fork atom
source dump of claude code
fork atom
claude-code / utils / commitAttribution.ts
at main 961 lines 30 kB view raw
wrap content
oppi.li dump from zip 3d ago
63aada3f
  1import { createHash, randomUUID, type UUID } from 'crypto'
  2import { stat } from 'fs/promises'
  3import { isAbsolute, join, relative, sep } from 'path'
  4import { getOriginalCwd, getSessionId } from '../bootstrap/state.js'
  5import type {
  6  AttributionSnapshotMessage,
  7  FileAttributionState,
  8} from '../types/logs.js'
  9import { getCwd } from './cwd.js'
 10import { logForDebugging } from './debug.js'
 11import { execFileNoThrowWithCwd } from './execFileNoThrow.js'
 12import { getFsImplementation } from './fsOperations.js'
 13import { isGeneratedFile } from './generatedFiles.js'
 14import { getRemoteUrlForDir, resolveGitDir } from './git/gitFilesystem.js'
 15import { findGitRoot, gitExe } from './git.js'
 16import { logError } from './log.js'
 17import { getCanonicalName, type ModelName } from './model/model.js'
 18import { sequential } from './sequential.js'
 19
 20/**
 21 * List of repos where internal model names are allowed in trailers.
 22 * Includes both SSH and HTTPS URL formats.
 23 *
 24 * NOTE: This is intentionally a repo allowlist, not an org-wide check.
 25 * The anthropics and anthropic-experimental orgs contain PUBLIC repos
 26 * (e.g. anthropics/claude-code, anthropic-experimental/sandbox-runtime).
 27 * Undercover mode must stay ON in those to prevent codename leaks.
 28 * Only add repos here that are confirmed PRIVATE.
 29 */
 30const INTERNAL_MODEL_REPOS = [
 31  'github.com:anthropics/claude-cli-internal',
 32  'github.com/anthropics/claude-cli-internal',
 33  'github.com:anthropics/anthropic',
 34  'github.com/anthropics/anthropic',
 35  'github.com:anthropics/apps',
 36  'github.com/anthropics/apps',
 37  'github.com:anthropics/casino',
 38  'github.com/anthropics/casino',
 39  'github.com:anthropics/dbt',
 40  'github.com/anthropics/dbt',
 41  'github.com:anthropics/dotfiles',
 42  'github.com/anthropics/dotfiles',
 43  'github.com:anthropics/terraform-config',
 44  'github.com/anthropics/terraform-config',
 45  'github.com:anthropics/hex-export',
 46  'github.com/anthropics/hex-export',
 47  'github.com:anthropics/feedback-v2',
 48  'github.com/anthropics/feedback-v2',
 49  'github.com:anthropics/labs',
 50  'github.com/anthropics/labs',
 51  'github.com:anthropics/argo-rollouts',
 52  'github.com/anthropics/argo-rollouts',
 53  'github.com:anthropics/starling-configs',
 54  'github.com/anthropics/starling-configs',
 55  'github.com:anthropics/ts-tools',
 56  'github.com/anthropics/ts-tools',
 57  'github.com:anthropics/ts-capsules',
 58  'github.com/anthropics/ts-capsules',
 59  'github.com:anthropics/feldspar-testing',
 60  'github.com/anthropics/feldspar-testing',
 61  'github.com:anthropics/trellis',
 62  'github.com/anthropics/trellis',
 63  'github.com:anthropics/claude-for-hiring',
 64  'github.com/anthropics/claude-for-hiring',
 65  'github.com:anthropics/forge-web',
 66  'github.com/anthropics/forge-web',
 67  'github.com:anthropics/infra-manifests',
 68  'github.com/anthropics/infra-manifests',
 69  'github.com:anthropics/mycro_manifests',
 70  'github.com/anthropics/mycro_manifests',
 71  'github.com:anthropics/mycro_configs',
 72  'github.com/anthropics/mycro_configs',
 73  'github.com:anthropics/mobile-apps',
 74  'github.com/anthropics/mobile-apps',
 75]
 76
 77/**
 78 * Get the repo root for attribution operations.
 79 * Uses getCwd() which respects agent worktree overrides (AsyncLocalStorage),
 80 * then resolves to git root to handle `cd subdir` case.
 81 * Falls back to getOriginalCwd() if git root can't be determined.
 82 */
 83export function getAttributionRepoRoot(): string {
 84  const cwd = getCwd()
 85  return findGitRoot(cwd) ?? getOriginalCwd()
 86}
 87
 88// Cache for repo classification result. Primed once per process.
 89// 'internal' = remote matches INTERNAL_MODEL_REPOS allowlist
 90// 'external' = has a remote, not on allowlist (public/open-source repo)
 91// 'none'     = no remote URL (not a git repo, or no remote configured)
 92let repoClassCache: 'internal' | 'external' | 'none' | null = null
 93
 94/**
 95 * Synchronously return the cached repo classification.
 96 * Returns null if the async check hasn't run yet.
 97 */
 98export function getRepoClassCached(): 'internal' | 'external' | 'none' | null {
 99  return repoClassCache
100}
101
102/**
103 * Synchronously return the cached result of isInternalModelRepo().
104 * Returns false if the check hasn't run yet (safe default: don't leak).
105 */
106export function isInternalModelRepoCached(): boolean {
107  return repoClassCache === 'internal'
108}
109
110/**
111 * Check if the current repo is in the allowlist for internal model names.
112 * Memoized - only checks once per process.
113 */
114export const isInternalModelRepo = sequential(async (): Promise<boolean> => {
115  if (repoClassCache !== null) {
116    return repoClassCache === 'internal'
117  }
118
119  const cwd = getAttributionRepoRoot()
120  const remoteUrl = await getRemoteUrlForDir(cwd)
121
122  if (!remoteUrl) {
123    repoClassCache = 'none'
124    return false
125  }
126  const isInternal = INTERNAL_MODEL_REPOS.some(repo => remoteUrl.includes(repo))
127  repoClassCache = isInternal ? 'internal' : 'external'
128  return isInternal
129})
130
131/**
132 * Sanitize a surface key to use public model names.
133 * Converts internal model variants to their public equivalents.
134 */
135export function sanitizeSurfaceKey(surfaceKey: string): string {
136  // Split surface key into surface and model parts (e.g., "cli/opus-4-5-fast" -> ["cli", "opus-4-5-fast"])
137  const slashIndex = surfaceKey.lastIndexOf('/')
138  if (slashIndex === -1) {
139    return surfaceKey
140  }
141
142  const surface = surfaceKey.slice(0, slashIndex)
143  const model = surfaceKey.slice(slashIndex + 1)
144  const sanitizedModel = sanitizeModelName(model)
145
146  return `${surface}/${sanitizedModel}`
147}
148
149// @[MODEL LAUNCH]: Add a mapping for the new model ID so git commit trailers show the public name.
150/**
151 * Sanitize a model name to its public equivalent.
152 * Maps internal variants to their public names based on model family.
153 */
154export function sanitizeModelName(shortName: string): string {
155  // Map internal variants to public equivalents based on model family
156  if (shortName.includes('opus-4-6')) return 'claude-opus-4-6'
157  if (shortName.includes('opus-4-5')) return 'claude-opus-4-5'
158  if (shortName.includes('opus-4-1')) return 'claude-opus-4-1'
159  if (shortName.includes('opus-4')) return 'claude-opus-4'
160  if (shortName.includes('sonnet-4-6')) return 'claude-sonnet-4-6'
161  if (shortName.includes('sonnet-4-5')) return 'claude-sonnet-4-5'
162  if (shortName.includes('sonnet-4')) return 'claude-sonnet-4'
163  if (shortName.includes('sonnet-3-7')) return 'claude-sonnet-3-7'
164  if (shortName.includes('haiku-4-5')) return 'claude-haiku-4-5'
165  if (shortName.includes('haiku-3-5')) return 'claude-haiku-3-5'
166  // Unknown models get a generic name
167  return 'claude'
168}
169
170/**
171 * Attribution state for tracking Claude's contributions to files.
172 */
173export type AttributionState = {
174  // File states keyed by relative path (from cwd)
175  fileStates: Map<string, FileAttributionState>
176  // Session baseline states for net change calculation
177  sessionBaselines: Map<string, { contentHash: string; mtime: number }>
178  // Surface from which edits were made
179  surface: string
180  // HEAD SHA at session start (for detecting external commits)
181  startingHeadSha: string | null
182  // Total prompts in session (for steer count calculation)
183  promptCount: number
184  // Prompts at last commit (to calculate steers for current commit)
185  promptCountAtLastCommit: number
186  // Permission prompt tracking
187  permissionPromptCount: number
188  permissionPromptCountAtLastCommit: number
189  // ESC press tracking (user cancelled permission prompt)
190  escapeCount: number
191  escapeCountAtLastCommit: number
192}
193
194/**
195 * Summary of Claude's contribution for a commit.
196 */
197export type AttributionSummary = {
198  claudePercent: number
199  claudeChars: number
200  humanChars: number
201  surfaces: string[]
202}
203
204/**
205 * Per-file attribution details for git notes.
206 */
207export type FileAttribution = {
208  claudeChars: number
209  humanChars: number
210  percent: number
211  surface: string
212}
213
214/**
215 * Full attribution data for git notes JSON.
216 */
217export type AttributionData = {
218  version: 1
219  summary: AttributionSummary
220  files: Record<string, FileAttribution>
221  surfaceBreakdown: Record<string, { claudeChars: number; percent: number }>
222  excludedGenerated: string[]
223  sessions: string[]
224}
225
226/**
227 * Get the current client surface from environment.
228 */
229export function getClientSurface(): string {
230  return process.env.CLAUDE_CODE_ENTRYPOINT ?? 'cli'
231}
232
233/**
234 * Build a surface key that includes the model name.
235 * Format: "surface/model" (e.g., "cli/claude-sonnet")
236 */
237export function buildSurfaceKey(surface: string, model: ModelName): string {
238  return `${surface}/${getCanonicalName(model)}`
239}
240
241/**
242 * Compute SHA-256 hash of content.
243 */
244export function computeContentHash(content: string): string {
245  return createHash('sha256').update(content).digest('hex')
246}
247
248/**
249 * Normalize file path to relative path from cwd for consistent tracking.
250 * Resolves symlinks to handle /tmp vs /private/tmp on macOS.
251 */
252export function normalizeFilePath(filePath: string): string {
253  const fs = getFsImplementation()
254  const cwd = getAttributionRepoRoot()
255
256  if (!isAbsolute(filePath)) {
257    return filePath
258  }
259
260  // Resolve symlinks in both paths for consistent comparison
261  // (e.g., /tmp -> /private/tmp on macOS)
262  let resolvedPath = filePath
263  let resolvedCwd = cwd
264
265  try {
266    resolvedPath = fs.realpathSync(filePath)
267  } catch {
268    // File may not exist yet, use original path
269  }
270
271  try {
272    resolvedCwd = fs.realpathSync(cwd)
273  } catch {
274    // Keep original cwd
275  }
276
277  if (
278    resolvedPath.startsWith(resolvedCwd + sep) ||
279    resolvedPath === resolvedCwd
280  ) {
281    // Normalize to forward slashes so keys match git diff output on Windows
282    return relative(resolvedCwd, resolvedPath).replaceAll(sep, '/')
283  }
284
285  // Fallback: try original comparison
286  if (filePath.startsWith(cwd + sep) || filePath === cwd) {
287    return relative(cwd, filePath).replaceAll(sep, '/')
288  }
289
290  return filePath
291}
292
293/**
294 * Expand a relative path to absolute path.
295 */
296export function expandFilePath(filePath: string): string {
297  if (isAbsolute(filePath)) {
298    return filePath
299  }
300  return join(getAttributionRepoRoot(), filePath)
301}
302
303/**
304 * Create an empty attribution state for a new session.
305 */
306export function createEmptyAttributionState(): AttributionState {
307  return {
308    fileStates: new Map(),
309    sessionBaselines: new Map(),
310    surface: getClientSurface(),
311    startingHeadSha: null,
312    promptCount: 0,
313    promptCountAtLastCommit: 0,
314    permissionPromptCount: 0,
315    permissionPromptCountAtLastCommit: 0,
316    escapeCount: 0,
317    escapeCountAtLastCommit: 0,
318  }
319}
320
321/**
322 * Compute the character contribution for a file modification.
323 * Returns the FileAttributionState to store, or null if tracking failed.
324 */
325function computeFileModificationState(
326  existingFileStates: Map<string, FileAttributionState>,
327  filePath: string,
328  oldContent: string,
329  newContent: string,
330  mtime: number,
331): FileAttributionState | null {
332  const normalizedPath = normalizeFilePath(filePath)
333
334  try {
335    // Calculate Claude's character contribution
336    let claudeContribution: number
337
338    if (oldContent === '' || newContent === '') {
339      // New file or full deletion - contribution is the content length
340      claudeContribution =
341        oldContent === '' ? newContent.length : oldContent.length
342    } else {
343      // Find actual changed region via common prefix/suffix matching.
344      // This correctly handles same-length replacements (e.g., "Esc" → "esc")
345      // where Math.abs(newLen - oldLen) would be 0.
346      const minLen = Math.min(oldContent.length, newContent.length)
347      let prefixEnd = 0
348      while (
349        prefixEnd < minLen &&
350        oldContent[prefixEnd] === newContent[prefixEnd]
351      ) {
352        prefixEnd++
353      }
354      let suffixLen = 0
355      while (
356        suffixLen < minLen - prefixEnd &&
357        oldContent[oldContent.length - 1 - suffixLen] ===
358          newContent[newContent.length - 1 - suffixLen]
359      ) {
360        suffixLen++
361      }
362      const oldChangedLen = oldContent.length - prefixEnd - suffixLen
363      const newChangedLen = newContent.length - prefixEnd - suffixLen
364      claudeContribution = Math.max(oldChangedLen, newChangedLen)
365    }
366
367    // Get current file state if it exists
368    const existingState = existingFileStates.get(normalizedPath)
369    const existingContribution = existingState?.claudeContribution ?? 0
370
371    return {
372      contentHash: computeContentHash(newContent),
373      claudeContribution: existingContribution + claudeContribution,
374      mtime,
375    }
376  } catch (error) {
377    logError(error as Error)
378    return null
379  }
380}
381
382/**
383 * Get a file's modification time (mtimeMs), falling back to Date.now() if
384 * the file doesn't exist. This is async so it can be precomputed before
385 * entering a sync setAppState callback.
386 */
387export async function getFileMtime(filePath: string): Promise<number> {
388  const normalizedPath = normalizeFilePath(filePath)
389  const absPath = expandFilePath(normalizedPath)
390  try {
391    const stats = await stat(absPath)
392    return stats.mtimeMs
393  } catch {
394    return Date.now()
395  }
396}
397
398/**
399 * Track a file modification by Claude.
400 * Called after Edit/Write tool completes.
401 */
402export function trackFileModification(
403  state: AttributionState,
404  filePath: string,
405  oldContent: string,
406  newContent: string,
407  _userModified: boolean,
408  mtime: number = Date.now(),
409): AttributionState {
410  const normalizedPath = normalizeFilePath(filePath)
411  const newFileState = computeFileModificationState(
412    state.fileStates,
413    filePath,
414    oldContent,
415    newContent,
416    mtime,
417  )
418  if (!newFileState) {
419    return state
420  }
421
422  const newFileStates = new Map(state.fileStates)
423  newFileStates.set(normalizedPath, newFileState)
424
425  logForDebugging(
426    `Attribution: Tracked ${newFileState.claudeContribution} chars for ${normalizedPath}`,
427  )
428
429  return {
430    ...state,
431    fileStates: newFileStates,
432  }
433}
434
435/**
436 * Track a file creation by Claude (e.g., via bash command).
437 * Used when Claude creates a new file through a non-tracked mechanism.
438 */
439export function trackFileCreation(
440  state: AttributionState,
441  filePath: string,
442  content: string,
443  mtime: number = Date.now(),
444): AttributionState {
445  // A creation is simply a modification from empty to the new content
446  return trackFileModification(state, filePath, '', content, false, mtime)
447}
448
449/**
450 * Track a file deletion by Claude (e.g., via bash rm command).
451 * Used when Claude deletes a file through a non-tracked mechanism.
452 */
453export function trackFileDeletion(
454  state: AttributionState,
455  filePath: string,
456  oldContent: string,
457): AttributionState {
458  const normalizedPath = normalizeFilePath(filePath)
459  const existingState = state.fileStates.get(normalizedPath)
460  const existingContribution = existingState?.claudeContribution ?? 0
461  const deletedChars = oldContent.length
462
463  const newFileState: FileAttributionState = {
464    contentHash: '', // Empty hash for deleted files
465    claudeContribution: existingContribution + deletedChars,
466    mtime: Date.now(),
467  }
468
469  const newFileStates = new Map(state.fileStates)
470  newFileStates.set(normalizedPath, newFileState)
471
472  logForDebugging(
473    `Attribution: Tracked deletion of ${normalizedPath} (${deletedChars} chars removed, total contribution: ${newFileState.claudeContribution})`,
474  )
475
476  return {
477    ...state,
478    fileStates: newFileStates,
479  }
480}
481
482// --
483
484/**
485 * Track multiple file changes in bulk, mutating a single Map copy.
486 * This avoids the O(n²) cost of copying the Map per file when processing
487 * large git diffs (e.g., jj operations that touch hundreds of thousands of files).
488 */
489export function trackBulkFileChanges(
490  state: AttributionState,
491  changes: ReadonlyArray<{
492    path: string
493    type: 'modified' | 'created' | 'deleted'
494    oldContent: string
495    newContent: string
496    mtime?: number
497  }>,
498): AttributionState {
499  // Create ONE copy of the Map, then mutate it for each file
500  const newFileStates = new Map(state.fileStates)
501
502  for (const change of changes) {
503    const mtime = change.mtime ?? Date.now()
504    if (change.type === 'deleted') {
505      const normalizedPath = normalizeFilePath(change.path)
506      const existingState = newFileStates.get(normalizedPath)
507      const existingContribution = existingState?.claudeContribution ?? 0
508      const deletedChars = change.oldContent.length
509
510      newFileStates.set(normalizedPath, {
511        contentHash: '',
512        claudeContribution: existingContribution + deletedChars,
513        mtime,
514      })
515
516      logForDebugging(
517        `Attribution: Tracked deletion of ${normalizedPath} (${deletedChars} chars removed, total contribution: ${existingContribution + deletedChars})`,
518      )
519    } else {
520      const newFileState = computeFileModificationState(
521        newFileStates,
522        change.path,
523        change.oldContent,
524        change.newContent,
525        mtime,
526      )
527      if (newFileState) {
528        const normalizedPath = normalizeFilePath(change.path)
529        newFileStates.set(normalizedPath, newFileState)
530
531        logForDebugging(
532          `Attribution: Tracked ${newFileState.claudeContribution} chars for ${normalizedPath}`,
533        )
534      }
535    }
536  }
537
538  return {
539    ...state,
540    fileStates: newFileStates,
541  }
542}
543
544/**
545 * Calculate final attribution for staged files.
546 * Compares session baseline to committed state.
547 */
548export async function calculateCommitAttribution(
549  states: AttributionState[],
550  stagedFiles: string[],
551): Promise<AttributionData> {
552  const cwd = getAttributionRepoRoot()
553  const sessionId = getSessionId()
554
555  const files: Record<string, FileAttribution> = {}
556  const excludedGenerated: string[] = []
557  const surfaces = new Set<string>()
558  const surfaceCounts: Record<string, number> = {}
559
560  let totalClaudeChars = 0
561  let totalHumanChars = 0
562
563  // Merge file states from all sessions
564  const mergedFileStates = new Map<string, FileAttributionState>()
565  const mergedBaselines = new Map<
566    string,
567    { contentHash: string; mtime: number }
568  >()
569
570  for (const state of states) {
571    surfaces.add(state.surface)
572
573    // Merge baselines (earliest baseline wins)
574    // Handle both Map and plain object (in case of serialization)
575    const baselines =
576      state.sessionBaselines instanceof Map
577        ? state.sessionBaselines
578        : new Map(
579            Object.entries(
580              (state.sessionBaselines ?? {}) as Record<
581                string,
582                { contentHash: string; mtime: number }
583              >,
584            ),
585          )
586    for (const [path, baseline] of baselines) {
587      if (!mergedBaselines.has(path)) {
588        mergedBaselines.set(path, baseline)
589      }
590    }
591
592    // Merge file states (accumulate contributions)
593    // Handle both Map and plain object (in case of serialization)
594    const fileStates =
595      state.fileStates instanceof Map
596        ? state.fileStates
597        : new Map(
598            Object.entries(
599              (state.fileStates ?? {}) as Record<string, FileAttributionState>,
600            ),
601          )
602    for (const [path, fileState] of fileStates) {
603      const existing = mergedFileStates.get(path)
604      if (existing) {
605        mergedFileStates.set(path, {
606          ...fileState,
607          claudeContribution:
608            existing.claudeContribution + fileState.claudeContribution,
609        })
610      } else {
611        mergedFileStates.set(path, fileState)
612      }
613    }
614  }
615
616  // Process files in parallel
617  const fileResults = await Promise.all(
618    stagedFiles.map(async file => {
619      // Skip generated files
620      if (isGeneratedFile(file)) {
621        return { type: 'generated' as const, file }
622      }
623
624      const absPath = join(cwd, file)
625      const fileState = mergedFileStates.get(file)
626      const baseline = mergedBaselines.get(file)
627
628      // Get the surface for this file
629      const fileSurface = states[0]!.surface
630
631      let claudeChars = 0
632      let humanChars = 0
633
634      // Check if file was deleted
635      const deleted = await isFileDeleted(file)
636
637      if (deleted) {
638        // File was deleted
639        if (fileState) {
640          // Claude deleted this file (tracked deletion)
641          claudeChars = fileState.claudeContribution
642          humanChars = 0
643        } else {
644          // Human deleted this file (untracked deletion)
645          // Use diff size to get the actual change size
646          const diffSize = await getGitDiffSize(file)
647          humanChars = diffSize > 0 ? diffSize : 100 // Minimum attribution for a deletion
648        }
649      } else {
650        try {
651          // Only need file size, not content - stat() avoids loading GB-scale
652          // build artifacts into memory when they appear in the working tree.
653          // stats.size (bytes) is an adequate proxy for char count here.
654          const stats = await stat(absPath)
655
656          if (fileState) {
657            // We have tracked modifications for this file
658            claudeChars = fileState.claudeContribution
659            humanChars = 0
660          } else if (baseline) {
661            // File was modified but not tracked - human modification
662            const diffSize = await getGitDiffSize(file)
663            humanChars = diffSize > 0 ? diffSize : stats.size
664          } else {
665            // New file not created by Claude
666            humanChars = stats.size
667          }
668        } catch {
669          // File doesn't exist or stat failed - skip it
670          return null
671        }
672      }
673
674      // Ensure non-negative values
675      claudeChars = Math.max(0, claudeChars)
676      humanChars = Math.max(0, humanChars)
677
678      const total = claudeChars + humanChars
679      const percent = total > 0 ? Math.round((claudeChars / total) * 100) : 0
680
681      return {
682        type: 'file' as const,
683        file,
684        claudeChars,
685        humanChars,
686        percent,
687        surface: fileSurface,
688      }
689    }),
690  )
691
692  // Aggregate results
693  for (const result of fileResults) {
694    if (!result) continue
695
696    if (result.type === 'generated') {
697      excludedGenerated.push(result.file)
698      continue
699    }
700
701    files[result.file] = {
702      claudeChars: result.claudeChars,
703      humanChars: result.humanChars,
704      percent: result.percent,
705      surface: result.surface,
706    }
707
708    totalClaudeChars += result.claudeChars
709    totalHumanChars += result.humanChars
710
711    surfaceCounts[result.surface] =
712      (surfaceCounts[result.surface] ?? 0) + result.claudeChars
713  }
714
715  const totalChars = totalClaudeChars + totalHumanChars
716  const claudePercent =
717    totalChars > 0 ? Math.round((totalClaudeChars / totalChars) * 100) : 0
718
719  // Calculate surface breakdown (percentage of total content per surface)
720  const surfaceBreakdown: Record<
721    string,
722    { claudeChars: number; percent: number }
723  > = {}
724  for (const [surface, chars] of Object.entries(surfaceCounts)) {
725    // Calculate what percentage of TOTAL content this surface contributed
726    const percent = totalChars > 0 ? Math.round((chars / totalChars) * 100) : 0
727    surfaceBreakdown[surface] = { claudeChars: chars, percent }
728  }
729
730  return {
731    version: 1,
732    summary: {
733      claudePercent,
734      claudeChars: totalClaudeChars,
735      humanChars: totalHumanChars,
736      surfaces: Array.from(surfaces),
737    },
738    files,
739    surfaceBreakdown,
740    excludedGenerated,
741    sessions: [sessionId],
742  }
743}
744
745/**
746 * Get the size of changes for a file from git diff.
747 * Returns the number of characters added/removed (absolute difference).
748 * For new files, returns the total file size.
749 * For deleted files, returns the size of the deleted content.
750 */
751export async function getGitDiffSize(filePath: string): Promise<number> {
752  const cwd = getAttributionRepoRoot()
753
754  try {
755    // Use git diff --stat to get a summary of changes
756    const result = await execFileNoThrowWithCwd(
757      gitExe(),
758      ['diff', '--cached', '--stat', '--', filePath],
759      { cwd, timeout: 5000 },
760    )
761
762    if (result.code !== 0 || !result.stdout) {
763      return 0
764    }
765
766    // Parse the stat output to extract additions and deletions
767    // Format: " file | 5 ++---" or " file | 10 +"
768    const lines = result.stdout.split('\n').filter(Boolean)
769    let totalChanges = 0
770
771    for (const line of lines) {
772      // Skip the summary line (e.g., "1 file changed, 3 insertions(+), 2 deletions(-)")
773      if (line.includes('file changed') || line.includes('files changed')) {
774        const insertMatch = line.match(/(\d+) insertions?/)
775        const deleteMatch = line.match(/(\d+) deletions?/)
776
777        // Use line-based changes and approximate chars per line (~40 chars average)
778        const insertions = insertMatch ? parseInt(insertMatch[1]!, 10) : 0
779        const deletions = deleteMatch ? parseInt(deleteMatch[1]!, 10) : 0
780        totalChanges += (insertions + deletions) * 40
781      }
782    }
783
784    return totalChanges
785  } catch {
786    return 0
787  }
788}
789
790/**
791 * Check if a file was deleted in the staged changes.
792 */
793export async function isFileDeleted(filePath: string): Promise<boolean> {
794  const cwd = getAttributionRepoRoot()
795
796  try {
797    const result = await execFileNoThrowWithCwd(
798      gitExe(),
799      ['diff', '--cached', '--name-status', '--', filePath],
800      { cwd, timeout: 5000 },
801    )
802
803    if (result.code === 0 && result.stdout) {
804      // Format: "D\tfilename" for deleted files
805      return result.stdout.trim().startsWith('D\t')
806    }
807  } catch {
808    // Ignore errors
809  }
810
811  return false
812}
813
814/**
815 * Get staged files from git.
816 */
817export async function getStagedFiles(): Promise<string[]> {
818  const cwd = getAttributionRepoRoot()
819
820  try {
821    const result = await execFileNoThrowWithCwd(
822      gitExe(),
823      ['diff', '--cached', '--name-only'],
824      { cwd, timeout: 5000 },
825    )
826
827    if (result.code === 0 && result.stdout) {
828      return result.stdout.split('\n').filter(Boolean)
829    }
830  } catch (error) {
831    logError(error as Error)
832  }
833
834  return []
835}
836
837// formatAttributionTrailer moved to attributionTrailer.ts for tree-shaking
838// (contains excluded strings that should not be in external builds)
839
840/**
841 * Check if we're in a transient git state (rebase, merge, cherry-pick).
842 */
843export async function isGitTransientState(): Promise<boolean> {
844  const gitDir = await resolveGitDir(getAttributionRepoRoot())
845  if (!gitDir) return false
846
847  const indicators = [
848    'rebase-merge',
849    'rebase-apply',
850    'MERGE_HEAD',
851    'CHERRY_PICK_HEAD',
852    'BISECT_LOG',
853  ]
854
855  const results = await Promise.all(
856    indicators.map(async indicator => {
857      try {
858        await stat(join(gitDir, indicator))
859        return true
860      } catch {
861        return false
862      }
863    }),
864  )
865
866  return results.some(exists => exists)
867}
868
869/**
870 * Convert attribution state to snapshot message for persistence.
871 */
872export function stateToSnapshotMessage(
873  state: AttributionState,
874  messageId: UUID,
875): AttributionSnapshotMessage {
876  const fileStates: Record<string, FileAttributionState> = {}
877
878  for (const [path, fileState] of state.fileStates) {
879    fileStates[path] = fileState
880  }
881
882  return {
883    type: 'attribution-snapshot',
884    messageId,
885    surface: state.surface,
886    fileStates,
887    promptCount: state.promptCount,
888    promptCountAtLastCommit: state.promptCountAtLastCommit,
889    permissionPromptCount: state.permissionPromptCount,
890    permissionPromptCountAtLastCommit: state.permissionPromptCountAtLastCommit,
891    escapeCount: state.escapeCount,
892    escapeCountAtLastCommit: state.escapeCountAtLastCommit,
893  }
894}
895
896/**
897 * Restore attribution state from snapshot messages.
898 */
899export function restoreAttributionStateFromSnapshots(
900  snapshots: AttributionSnapshotMessage[],
901): AttributionState {
902  const state = createEmptyAttributionState()
903
904  // Snapshots are full-state dumps (see stateToSnapshotMessage), not deltas.
905  // The last snapshot has the most recent count for every path — fileStates
906  // never shrinks. Iterating and SUMMING counts across snapshots causes
907  // quadratic growth on restore (837 snapshots × 280 files → 1.15 quadrillion
908  // "chars" tracked for a 5KB file over a 5-day session).
909  const lastSnapshot = snapshots[snapshots.length - 1]
910  if (!lastSnapshot) {
911    return state
912  }
913
914  state.surface = lastSnapshot.surface
915  for (const [path, fileState] of Object.entries(lastSnapshot.fileStates)) {
916    state.fileStates.set(path, fileState)
917  }
918
919  // Restore prompt counts from the last snapshot (most recent state)
920  state.promptCount = lastSnapshot.promptCount ?? 0
921  state.promptCountAtLastCommit = lastSnapshot.promptCountAtLastCommit ?? 0
922  state.permissionPromptCount = lastSnapshot.permissionPromptCount ?? 0
923  state.permissionPromptCountAtLastCommit =
924    lastSnapshot.permissionPromptCountAtLastCommit ?? 0
925  state.escapeCount = lastSnapshot.escapeCount ?? 0
926  state.escapeCountAtLastCommit = lastSnapshot.escapeCountAtLastCommit ?? 0
927
928  return state
929}
930
931/**
932 * Restore attribution state from log snapshots on session resume.
933 */
934export function attributionRestoreStateFromLog(
935  attributionSnapshots: AttributionSnapshotMessage[],
936  onUpdateState: (newState: AttributionState) => void,
937): void {
938  const state = restoreAttributionStateFromSnapshots(attributionSnapshots)
939  onUpdateState(state)
940}
941
942/**
943 * Increment promptCount and save an attribution snapshot.
944 * Used to persist the prompt count across compaction.
945 *
946 * @param attribution - Current attribution state
947 * @param saveSnapshot - Function to save the snapshot (allows async handling by caller)
948 * @returns New attribution state with incremented promptCount
949 */
950export function incrementPromptCount(
951  attribution: AttributionState,
952  saveSnapshot: (snapshot: AttributionSnapshotMessage) => void,
953): AttributionState {
954  const newAttribution = {
955    ...attribution,
956    promptCount: attribution.promptCount + 1,
957  }
958  const snapshot = stateToSnapshotMessage(newAttribution, randomUUID())
959  saveSnapshot(snapshot)
960  return newAttribution
961}