source dump of claude code
at main 532 lines 16 kB view raw
1import type { StructuredPatchHunk } from 'diff' 2import { access, readFile } from 'fs/promises' 3import { dirname, join, relative, sep } from 'path' 4import { getCwd } from './cwd.js' 5import { getCachedRepository } from './detectRepository.js' 6import { execFileNoThrow, execFileNoThrowWithCwd } from './execFileNoThrow.js' 7import { isFileWithinReadSizeLimit } from './file.js' 8import { 9 findGitRoot, 10 getDefaultBranch, 11 getGitDir, 12 getIsGit, 13 gitExe, 14} from './git.js' 15 16export type GitDiffStats = { 17 filesCount: number 18 linesAdded: number 19 linesRemoved: number 20} 21 22export type PerFileStats = { 23 added: number 24 removed: number 25 isBinary: boolean 26 isUntracked?: boolean 27} 28 29export type GitDiffResult = { 30 stats: GitDiffStats 31 perFileStats: Map<string, PerFileStats> 32 hunks: Map<string, StructuredPatchHunk[]> 33} 34 35const GIT_TIMEOUT_MS = 5000 36const MAX_FILES = 50 37const MAX_DIFF_SIZE_BYTES = 1_000_000 // 1 MB - skip files larger than this 38const MAX_LINES_PER_FILE = 400 // GitHub's auto-load limit 39const MAX_FILES_FOR_DETAILS = 500 // Skip per-file details if more files than this 40 41/** 42 * Fetch git diff stats and hunks comparing working tree to HEAD. 43 * Returns null if not in a git repo or if git commands fail. 44 * 45 * Returns null during merge/rebase/cherry-pick/revert operations since the 46 * working tree contains incoming changes that weren't intentionally 47 * made by the user. 48 */ 49export async function fetchGitDiff(): Promise<GitDiffResult | null> { 50 const isGit = await getIsGit() 51 if (!isGit) return null 52 53 // Skip diff calculation during transient git states since the 54 // working tree contains incoming changes, not user-intentional edits 55 if (await isInTransientGitState()) { 56 return null 57 } 58 59 // Quick probe: use --shortstat to get totals without loading all content. 60 // This is O(1) memory and lets us detect massive diffs (e.g., jj workspaces) 61 // before committing to expensive operations. 62 const { stdout: shortstatOut, code: shortstatCode } = await execFileNoThrow( 63 gitExe(), 64 ['--no-optional-locks', 'diff', 'HEAD', '--shortstat'], 65 { timeout: GIT_TIMEOUT_MS, preserveOutputOnError: false }, 66 ) 67 68 if (shortstatCode === 0) { 69 const quickStats = parseShortstat(shortstatOut) 70 if (quickStats && quickStats.filesCount > MAX_FILES_FOR_DETAILS) { 71 // Too many files - return accurate totals but skip per-file details 72 // to avoid loading hundreds of MB into memory 73 return { 74 stats: quickStats, 75 perFileStats: new Map(), 76 hunks: new Map(), 77 } 78 } 79 } 80 81 // Get stats via --numstat (all uncommitted changes vs HEAD) 82 const { stdout: numstatOut, code: numstatCode } = await execFileNoThrow( 83 gitExe(), 84 ['--no-optional-locks', 'diff', 'HEAD', '--numstat'], 85 { timeout: GIT_TIMEOUT_MS, preserveOutputOnError: false }, 86 ) 87 88 if (numstatCode !== 0) return null 89 90 const { stats, perFileStats } = parseGitNumstat(numstatOut) 91 92 // Include untracked files (new files not yet staged) 93 // Just filenames - no content reading for performance 94 const remainingSlots = MAX_FILES - perFileStats.size 95 if (remainingSlots > 0) { 96 const untrackedStats = await fetchUntrackedFiles(remainingSlots) 97 if (untrackedStats) { 98 stats.filesCount += untrackedStats.size 99 for (const [path, fileStats] of untrackedStats) { 100 perFileStats.set(path, fileStats) 101 } 102 } 103 } 104 105 // Return stats only - hunks are fetched on-demand via fetchGitDiffHunks() 106 // to avoid expensive git diff HEAD call on every poll 107 return { stats, perFileStats, hunks: new Map() } 108} 109 110/** 111 * Fetch git diff hunks on-demand (for DiffDialog). 112 * Separated from fetchGitDiff() to avoid expensive calls during polling. 113 */ 114export async function fetchGitDiffHunks(): Promise< 115 Map<string, StructuredPatchHunk[]> 116> { 117 const isGit = await getIsGit() 118 if (!isGit) return new Map() 119 120 if (await isInTransientGitState()) { 121 return new Map() 122 } 123 124 const { stdout: diffOut, code: diffCode } = await execFileNoThrow( 125 gitExe(), 126 ['--no-optional-locks', 'diff', 'HEAD'], 127 { timeout: GIT_TIMEOUT_MS, preserveOutputOnError: false }, 128 ) 129 130 if (diffCode !== 0) { 131 return new Map() 132 } 133 134 return parseGitDiff(diffOut) 135} 136 137export type NumstatResult = { 138 stats: GitDiffStats 139 perFileStats: Map<string, PerFileStats> 140} 141 142/** 143 * Parse git diff --numstat output into stats. 144 * Format: <added>\t<removed>\t<filename> 145 * Binary files show '-' for counts. 146 * Only stores first MAX_FILES entries in perFileStats. 147 */ 148export function parseGitNumstat(stdout: string): NumstatResult { 149 const lines = stdout.trim().split('\n').filter(Boolean) 150 let added = 0 151 let removed = 0 152 let validFileCount = 0 153 const perFileStats = new Map<string, PerFileStats>() 154 155 for (const line of lines) { 156 const parts = line.split('\t') 157 // Valid numstat lines have exactly 3 tab-separated parts: added, removed, filename 158 if (parts.length < 3) continue 159 160 validFileCount++ 161 const addStr = parts[0] 162 const remStr = parts[1] 163 const filePath = parts.slice(2).join('\t') // filename may contain tabs 164 const isBinary = addStr === '-' || remStr === '-' 165 const fileAdded = isBinary ? 0 : parseInt(addStr ?? '0', 10) || 0 166 const fileRemoved = isBinary ? 0 : parseInt(remStr ?? '0', 10) || 0 167 168 added += fileAdded 169 removed += fileRemoved 170 171 // Only store first MAX_FILES entries 172 if (perFileStats.size < MAX_FILES) { 173 perFileStats.set(filePath, { 174 added: fileAdded, 175 removed: fileRemoved, 176 isBinary, 177 }) 178 } 179 } 180 181 return { 182 stats: { 183 filesCount: validFileCount, 184 linesAdded: added, 185 linesRemoved: removed, 186 }, 187 perFileStats, 188 } 189} 190 191/** 192 * Parse unified diff output into per-file hunks. 193 * Splits by "diff --git" and parses each file's hunks. 194 * 195 * Applies limits: 196 * - MAX_FILES: stop after this many files 197 * - Files >1MB: skipped entirely (not in result map) 198 * - Files ≤1MB: parsed but limited to MAX_LINES_PER_FILE lines 199 */ 200export function parseGitDiff( 201 stdout: string, 202): Map<string, StructuredPatchHunk[]> { 203 const result = new Map<string, StructuredPatchHunk[]>() 204 if (!stdout.trim()) return result 205 206 // Split by file diffs 207 const fileDiffs = stdout.split(/^diff --git /m).filter(Boolean) 208 209 for (const fileDiff of fileDiffs) { 210 // Stop after MAX_FILES 211 if (result.size >= MAX_FILES) break 212 213 // Skip files larger than 1MB 214 if (fileDiff.length > MAX_DIFF_SIZE_BYTES) { 215 continue 216 } 217 218 const lines = fileDiff.split('\n') 219 220 // Extract filename from first line: "a/path/to/file b/path/to/file" 221 const headerMatch = lines[0]?.match(/^a\/(.+?) b\/(.+)$/) 222 if (!headerMatch) continue 223 const filePath = headerMatch[2] ?? headerMatch[1] ?? '' 224 225 // Find and parse hunks 226 const fileHunks: StructuredPatchHunk[] = [] 227 let currentHunk: StructuredPatchHunk | null = null 228 let lineCount = 0 229 230 for (let i = 1; i < lines.length; i++) { 231 const line = lines[i] ?? '' 232 233 // StructuredPatchHunk header: @@ -oldStart,oldLines +newStart,newLines @@ 234 const hunkMatch = line.match( 235 /^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@/, 236 ) 237 if (hunkMatch) { 238 if (currentHunk) { 239 fileHunks.push(currentHunk) 240 } 241 currentHunk = { 242 oldStart: parseInt(hunkMatch[1] ?? '0', 10), 243 oldLines: parseInt(hunkMatch[2] ?? '1', 10), 244 newStart: parseInt(hunkMatch[3] ?? '0', 10), 245 newLines: parseInt(hunkMatch[4] ?? '1', 10), 246 lines: [], 247 } 248 continue 249 } 250 251 // Skip binary file markers and other metadata 252 if ( 253 line.startsWith('index ') || 254 line.startsWith('---') || 255 line.startsWith('+++') || 256 line.startsWith('new file') || 257 line.startsWith('deleted file') || 258 line.startsWith('old mode') || 259 line.startsWith('new mode') || 260 line.startsWith('Binary files') 261 ) { 262 continue 263 } 264 265 // Add diff lines to current hunk (with line limit) 266 if ( 267 currentHunk && 268 (line.startsWith('+') || 269 line.startsWith('-') || 270 line.startsWith(' ') || 271 line === '') 272 ) { 273 // Stop adding lines once we hit the limit 274 if (lineCount >= MAX_LINES_PER_FILE) { 275 continue 276 } 277 // Force a flat string copy to break V8 sliced string references. 278 // When split() creates lines, V8 creates "sliced strings" that reference 279 // the parent. This keeps the entire parent string (~MBs) alive as long as 280 // any line is retained. Using '' + line forces a new flat string allocation, 281 // unlike slice(0) which V8 may optimize to return the same reference. 282 currentHunk.lines.push('' + line) 283 lineCount++ 284 } 285 } 286 287 // Don't forget the last hunk 288 if (currentHunk) { 289 fileHunks.push(currentHunk) 290 } 291 292 if (fileHunks.length > 0) { 293 result.set(filePath, fileHunks) 294 } 295 } 296 297 return result 298} 299 300/** 301 * Check if we're in a transient git state (merge, rebase, cherry-pick, or revert). 302 * During these operations, we skip diff calculation since the working 303 * tree contains incoming changes that weren't intentionally made. 304 * 305 * Uses fs.access to check for transient ref files, avoiding process spawns. 306 */ 307async function isInTransientGitState(): Promise<boolean> { 308 const gitDir = await getGitDir(getCwd()) 309 if (!gitDir) return false 310 311 const transientFiles = [ 312 'MERGE_HEAD', 313 'REBASE_HEAD', 314 'CHERRY_PICK_HEAD', 315 'REVERT_HEAD', 316 ] 317 318 const results = await Promise.all( 319 transientFiles.map(file => 320 access(join(gitDir, file)) 321 .then(() => true) 322 .catch(() => false), 323 ), 324 ) 325 return results.some(Boolean) 326} 327 328/** 329 * Fetch untracked file names (no content reading). 330 * Returns file paths only - they'll be displayed with a note to stage them. 331 * 332 * @param maxFiles Maximum number of untracked files to include 333 */ 334async function fetchUntrackedFiles( 335 maxFiles: number, 336): Promise<Map<string, PerFileStats> | null> { 337 // Get list of untracked files (excludes gitignored) 338 const { stdout, code } = await execFileNoThrow( 339 gitExe(), 340 ['--no-optional-locks', 'ls-files', '--others', '--exclude-standard'], 341 { timeout: GIT_TIMEOUT_MS, preserveOutputOnError: false }, 342 ) 343 344 if (code !== 0 || !stdout.trim()) return null 345 346 const untrackedPaths = stdout.trim().split('\n').filter(Boolean) 347 if (untrackedPaths.length === 0) return null 348 349 const perFileStats = new Map<string, PerFileStats>() 350 351 // Just record filenames, no content reading 352 for (const filePath of untrackedPaths.slice(0, maxFiles)) { 353 perFileStats.set(filePath, { 354 added: 0, 355 removed: 0, 356 isBinary: false, 357 isUntracked: true, 358 }) 359 } 360 361 return perFileStats 362} 363 364/** 365 * Parse git diff --shortstat output into stats. 366 * Format: " 1648 files changed, 52341 insertions(+), 8123 deletions(-)" 367 * 368 * This is O(1) memory regardless of diff size - git computes totals without 369 * loading all content. Used as a quick probe before expensive operations. 370 */ 371export function parseShortstat(stdout: string): GitDiffStats | null { 372 // Match: "N files changed" with optional ", N insertions(+)" and ", N deletions(-)" 373 const match = stdout.match( 374 /(\d+)\s+files?\s+changed(?:,\s+(\d+)\s+insertions?\(\+\))?(?:,\s+(\d+)\s+deletions?\(-\))?/, 375 ) 376 if (!match) return null 377 return { 378 filesCount: parseInt(match[1] ?? '0', 10), 379 linesAdded: parseInt(match[2] ?? '0', 10), 380 linesRemoved: parseInt(match[3] ?? '0', 10), 381 } 382} 383 384const SINGLE_FILE_DIFF_TIMEOUT_MS = 3000 385 386export type ToolUseDiff = { 387 filename: string 388 status: 'modified' | 'added' 389 additions: number 390 deletions: number 391 changes: number 392 patch: string 393 /** GitHub "owner/repo" when available (null for non-github.com or unknown repos) */ 394 repository: string | null 395} 396 397/** 398 * Fetch a structured diff for a single file against the merge base with the 399 * default branch. This produces a PR-like diff showing all changes since 400 * the branch diverged. Falls back to diffing against HEAD if the merge base 401 * cannot be determined (e.g., on the default branch itself). 402 * For untracked files, generates a synthetic diff showing all additions. 403 * Returns null if not in a git repo or if git commands fail. 404 */ 405export async function fetchSingleFileGitDiff( 406 absoluteFilePath: string, 407): Promise<ToolUseDiff | null> { 408 const gitRoot = findGitRoot(dirname(absoluteFilePath)) 409 if (!gitRoot) return null 410 411 const gitPath = relative(gitRoot, absoluteFilePath).split(sep).join('/') 412 const repository = getCachedRepository() 413 414 // Check if the file is tracked by git 415 const { code: lsFilesCode } = await execFileNoThrowWithCwd( 416 gitExe(), 417 ['--no-optional-locks', 'ls-files', '--error-unmatch', gitPath], 418 { cwd: gitRoot, timeout: SINGLE_FILE_DIFF_TIMEOUT_MS }, 419 ) 420 421 if (lsFilesCode === 0) { 422 // File is tracked - diff against merge base for PR-like view 423 const diffRef = await getDiffRef(gitRoot) 424 const { stdout, code } = await execFileNoThrowWithCwd( 425 gitExe(), 426 ['--no-optional-locks', 'diff', diffRef, '--', gitPath], 427 { cwd: gitRoot, timeout: SINGLE_FILE_DIFF_TIMEOUT_MS }, 428 ) 429 if (code !== 0) return null 430 if (!stdout) return null 431 return { 432 ...parseRawDiffToToolUseDiff(gitPath, stdout, 'modified'), 433 repository, 434 } 435 } 436 437 // File is untracked - generate synthetic diff 438 const syntheticDiff = await generateSyntheticDiff(gitPath, absoluteFilePath) 439 if (!syntheticDiff) return null 440 return { ...syntheticDiff, repository } 441} 442 443/** 444 * Parse raw unified diff output into the structured ToolUseDiff format. 445 * Extracts only the hunk content (starting from @@) as the patch, 446 * and counts additions/deletions. 447 */ 448function parseRawDiffToToolUseDiff( 449 filename: string, 450 rawDiff: string, 451 status: 'modified' | 'added', 452): Omit<ToolUseDiff, 'repository'> { 453 const lines = rawDiff.split('\n') 454 const patchLines: string[] = [] 455 let inHunks = false 456 let additions = 0 457 let deletions = 0 458 459 for (const line of lines) { 460 if (line.startsWith('@@')) { 461 inHunks = true 462 } 463 if (inHunks) { 464 patchLines.push(line) 465 if (line.startsWith('+') && !line.startsWith('+++')) { 466 additions++ 467 } else if (line.startsWith('-') && !line.startsWith('---')) { 468 deletions++ 469 } 470 } 471 } 472 473 return { 474 filename, 475 status, 476 additions, 477 deletions, 478 changes: additions + deletions, 479 patch: patchLines.join('\n'), 480 } 481} 482 483/** 484 * Determine the best ref to diff against for a PR-like diff. 485 * Priority: 486 * 1. CLAUDE_CODE_BASE_REF env var (set externally, e.g. by CCR managed containers) 487 * 2. Merge base with the default branch (best guess) 488 * 3. HEAD (fallback if merge-base fails) 489 */ 490async function getDiffRef(gitRoot: string): Promise<string> { 491 const baseBranch = 492 process.env.CLAUDE_CODE_BASE_REF || (await getDefaultBranch()) 493 const { stdout, code } = await execFileNoThrowWithCwd( 494 gitExe(), 495 ['--no-optional-locks', 'merge-base', 'HEAD', baseBranch], 496 { cwd: gitRoot, timeout: SINGLE_FILE_DIFF_TIMEOUT_MS }, 497 ) 498 if (code === 0 && stdout.trim()) { 499 return stdout.trim() 500 } 501 return 'HEAD' 502} 503 504async function generateSyntheticDiff( 505 gitPath: string, 506 absoluteFilePath: string, 507): Promise<Omit<ToolUseDiff, 'repository'> | null> { 508 try { 509 if (!isFileWithinReadSizeLimit(absoluteFilePath, MAX_DIFF_SIZE_BYTES)) { 510 return null 511 } 512 const content = await readFile(absoluteFilePath, 'utf-8') 513 const lines = content.split('\n') 514 // Remove trailing empty line from split if file ends with newline 515 if (lines.length > 0 && lines.at(-1) === '') { 516 lines.pop() 517 } 518 const lineCount = lines.length 519 const addedLines = lines.map(line => `+${line}`).join('\n') 520 const patch = `@@ -0,0 +1,${lineCount} @@\n${addedLines}` 521 return { 522 filename: gitPath, 523 status: 'added', 524 additions: lineCount, 525 deletions: 0, 526 changes: lineCount, 527 patch, 528 } 529 } catch { 530 return null 531 } 532}