source dump of claude code
at main 679 lines 21 kB view raw
1import type { ChildProcess, ExecFileException } from 'child_process' 2import { execFile, spawn } from 'child_process' 3import memoize from 'lodash-es/memoize.js' 4import { homedir } from 'os' 5import * as path from 'path' 6import { logEvent } from 'src/services/analytics/index.js' 7import { fileURLToPath } from 'url' 8import { isInBundledMode } from './bundledMode.js' 9import { logForDebugging } from './debug.js' 10import { isEnvDefinedFalsy } from './envUtils.js' 11import { execFileNoThrow } from './execFileNoThrow.js' 12import { findExecutable } from './findExecutable.js' 13import { logError } from './log.js' 14import { getPlatform } from './platform.js' 15import { countCharInString } from './stringUtils.js' 16 17const __filename = fileURLToPath(import.meta.url) 18// we use node:path.join instead of node:url.resolve because the former doesn't encode spaces 19const __dirname = path.join( 20 __filename, 21 process.env.NODE_ENV === 'test' ? '../../../' : '../', 22) 23 24type RipgrepConfig = { 25 mode: 'system' | 'builtin' | 'embedded' 26 command: string 27 args: string[] 28 argv0?: string 29} 30 31const getRipgrepConfig = memoize((): RipgrepConfig => { 32 const userWantsSystemRipgrep = isEnvDefinedFalsy( 33 process.env.USE_BUILTIN_RIPGREP, 34 ) 35 36 // Try system ripgrep if user wants it 37 if (userWantsSystemRipgrep) { 38 const { cmd: systemPath } = findExecutable('rg', []) 39 if (systemPath !== 'rg') { 40 // SECURITY: Use command name 'rg' instead of systemPath to prevent PATH hijacking 41 // If we used systemPath, a malicious ./rg.exe in current directory could be executed 42 // Using just 'rg' lets the OS resolve it safely with NoDefaultCurrentDirectoryInExePath protection 43 return { mode: 'system', command: 'rg', args: [] } 44 } 45 } 46 47 // In bundled (native) mode, ripgrep is statically compiled into bun-internal 48 // and dispatches based on argv[0]. We spawn ourselves with argv0='rg'. 49 if (isInBundledMode()) { 50 return { 51 mode: 'embedded', 52 command: process.execPath, 53 args: ['--no-config'], 54 argv0: 'rg', 55 } 56 } 57 58 const rgRoot = path.resolve(__dirname, 'vendor', 'ripgrep') 59 const command = 60 process.platform === 'win32' 61 ? path.resolve(rgRoot, `${process.arch}-win32`, 'rg.exe') 62 : path.resolve(rgRoot, `${process.arch}-${process.platform}`, 'rg') 63 64 return { mode: 'builtin', command, args: [] } 65}) 66 67export function ripgrepCommand(): { 68 rgPath: string 69 rgArgs: string[] 70 argv0?: string 71} { 72 const config = getRipgrepConfig() 73 return { 74 rgPath: config.command, 75 rgArgs: config.args, 76 argv0: config.argv0, 77 } 78} 79 80const MAX_BUFFER_SIZE = 20_000_000 // 20MB; large monorepos can have 200k+ files 81 82/** 83 * Check if an error is EAGAIN (resource temporarily unavailable). 84 * This happens in resource-constrained environments (Docker, CI) when 85 * ripgrep tries to spawn too many threads. 86 */ 87function isEagainError(stderr: string): boolean { 88 return ( 89 stderr.includes('os error 11') || 90 stderr.includes('Resource temporarily unavailable') 91 ) 92} 93 94/** 95 * Custom error class for ripgrep timeouts. 96 * This allows callers to distinguish between "no matches" and "timed out". 97 */ 98export class RipgrepTimeoutError extends Error { 99 constructor( 100 message: string, 101 public readonly partialResults: string[], 102 ) { 103 super(message) 104 this.name = 'RipgrepTimeoutError' 105 } 106} 107 108function ripGrepRaw( 109 args: string[], 110 target: string, 111 abortSignal: AbortSignal, 112 callback: ( 113 error: ExecFileException | null, 114 stdout: string, 115 stderr: string, 116 ) => void, 117 singleThread = false, 118): ChildProcess { 119 // NB: When running interactively, ripgrep does not require a path as its last 120 // argument, but when run non-interactively, it will hang unless a path or file 121 // pattern is provided 122 123 const { rgPath, rgArgs, argv0 } = ripgrepCommand() 124 125 // Use single-threaded mode only if explicitly requested for this call's retry 126 const threadArgs = singleThread ? ['-j', '1'] : [] 127 const fullArgs = [...rgArgs, ...threadArgs, ...args, target] 128 // Allow timeout to be configured via env var (in seconds), otherwise use platform defaults 129 // WSL has severe performance penalty for file reads (3-5x slower on WSL2) 130 const defaultTimeout = getPlatform() === 'wsl' ? 60_000 : 20_000 131 const parsedSeconds = 132 parseInt(process.env.CLAUDE_CODE_GLOB_TIMEOUT_SECONDS || '', 10) || 0 133 const timeout = parsedSeconds > 0 ? parsedSeconds * 1000 : defaultTimeout 134 135 // For embedded ripgrep, use spawn with argv0 (execFile doesn't support argv0 properly) 136 if (argv0) { 137 const child = spawn(rgPath, fullArgs, { 138 argv0, 139 signal: abortSignal, 140 // Prevent visible console window on Windows (no-op on other platforms) 141 windowsHide: true, 142 }) 143 144 let stdout = '' 145 let stderr = '' 146 let stdoutTruncated = false 147 let stderrTruncated = false 148 149 child.stdout?.on('data', (data: Buffer) => { 150 if (!stdoutTruncated) { 151 stdout += data.toString() 152 if (stdout.length > MAX_BUFFER_SIZE) { 153 stdout = stdout.slice(0, MAX_BUFFER_SIZE) 154 stdoutTruncated = true 155 } 156 } 157 }) 158 159 child.stderr?.on('data', (data: Buffer) => { 160 if (!stderrTruncated) { 161 stderr += data.toString() 162 if (stderr.length > MAX_BUFFER_SIZE) { 163 stderr = stderr.slice(0, MAX_BUFFER_SIZE) 164 stderrTruncated = true 165 } 166 } 167 }) 168 169 // Set up timeout with SIGKILL escalation. 170 // SIGTERM alone may not kill ripgrep if it's blocked in uninterruptible I/O 171 // (e.g., deep filesystem traversal). If SIGTERM doesn't work within 5 seconds, 172 // escalate to SIGKILL which cannot be caught or ignored. 173 // On Windows, child.kill('SIGTERM') throws; use default signal. 174 let killTimeoutId: ReturnType<typeof setTimeout> | undefined 175 const timeoutId = setTimeout(() => { 176 if (process.platform === 'win32') { 177 child.kill() 178 } else { 179 child.kill('SIGTERM') 180 killTimeoutId = setTimeout(c => c.kill('SIGKILL'), 5_000, child) 181 } 182 }, timeout) 183 184 // On Windows, both 'close' and 'error' can fire for the same process 185 // (e.g. when AbortSignal kills the child). Guard against double-callback. 186 let settled = false 187 child.on('close', (code, signal) => { 188 if (settled) return 189 settled = true 190 clearTimeout(timeoutId) 191 clearTimeout(killTimeoutId) 192 if (code === 0 || code === 1) { 193 // 0 = matches found, 1 = no matches (both are success) 194 callback(null, stdout, stderr) 195 } else { 196 const error: ExecFileException = new Error( 197 `ripgrep exited with code ${code}`, 198 ) 199 error.code = code ?? undefined 200 error.signal = signal ?? undefined 201 callback(error, stdout, stderr) 202 } 203 }) 204 205 child.on('error', (err: NodeJS.ErrnoException) => { 206 if (settled) return 207 settled = true 208 clearTimeout(timeoutId) 209 clearTimeout(killTimeoutId) 210 const error: ExecFileException = err 211 callback(error, stdout, stderr) 212 }) 213 214 return child 215 } 216 217 // For non-embedded ripgrep, use execFile 218 // Use SIGKILL as killSignal because SIGTERM may not terminate ripgrep 219 // when it's blocked in uninterruptible filesystem I/O. 220 // On Windows, SIGKILL throws; use default (undefined) which sends SIGTERM. 221 return execFile( 222 rgPath, 223 fullArgs, 224 { 225 maxBuffer: MAX_BUFFER_SIZE, 226 signal: abortSignal, 227 timeout, 228 killSignal: process.platform === 'win32' ? undefined : 'SIGKILL', 229 }, 230 callback, 231 ) 232} 233 234/** 235 * Stream-count lines from `rg --files` without buffering stdout. 236 * 237 * On large repos (e.g. 247k files, 16MB of paths), calling `ripGrep()` just 238 * to read `.length` materializes the full stdout string plus a 247k-element 239 * array. This counts newline bytes per chunk instead; peak memory is one 240 * stream chunk (~64KB). 241 * 242 * Intentionally minimal: the only caller is telemetry (countFilesRoundedRg), 243 * which swallows all errors. No EAGAIN retry, no stderr capture, no internal 244 * timeout (callers pass AbortSignal.timeout; spawn's signal option kills rg). 245 */ 246async function ripGrepFileCount( 247 args: string[], 248 target: string, 249 abortSignal: AbortSignal, 250): Promise<number> { 251 await codesignRipgrepIfNecessary() 252 const { rgPath, rgArgs, argv0 } = ripgrepCommand() 253 254 return new Promise<number>((resolve, reject) => { 255 const child = spawn(rgPath, [...rgArgs, ...args, target], { 256 argv0, 257 signal: abortSignal, 258 windowsHide: true, 259 stdio: ['ignore', 'pipe', 'ignore'], 260 }) 261 262 let lines = 0 263 child.stdout?.on('data', (chunk: Buffer) => { 264 lines += countCharInString(chunk, '\n') 265 }) 266 267 // On Windows, both 'close' and 'error' can fire for the same process. 268 let settled = false 269 child.on('close', code => { 270 if (settled) return 271 settled = true 272 if (code === 0 || code === 1) resolve(lines) 273 else reject(new Error(`rg --files exited ${code}`)) 274 }) 275 child.on('error', err => { 276 if (settled) return 277 settled = true 278 reject(err) 279 }) 280 }) 281} 282 283/** 284 * Stream lines from ripgrep as they arrive, calling `onLines` per stdout chunk. 285 * 286 * Unlike `ripGrep()` which buffers the entire stdout, this flushes complete 287 * lines as soon as each chunk arrives — first results paint while rg is still 288 * walking the tree (the fzf `change:reload` pattern). Partial trailing lines 289 * are carried across chunk boundaries. 290 * 291 * Callers that want to stop early (e.g. after N matches) should abort the 292 * signal — spawn's signal option kills rg. No EAGAIN retry, no internal 293 * timeout, stderr is ignored; interactive callers own recovery. 294 */ 295export async function ripGrepStream( 296 args: string[], 297 target: string, 298 abortSignal: AbortSignal, 299 onLines: (lines: string[]) => void, 300): Promise<void> { 301 await codesignRipgrepIfNecessary() 302 const { rgPath, rgArgs, argv0 } = ripgrepCommand() 303 304 return new Promise<void>((resolve, reject) => { 305 const child = spawn(rgPath, [...rgArgs, ...args, target], { 306 argv0, 307 signal: abortSignal, 308 windowsHide: true, 309 stdio: ['ignore', 'pipe', 'ignore'], 310 }) 311 312 const stripCR = (l: string) => (l.endsWith('\r') ? l.slice(0, -1) : l) 313 let remainder = '' 314 child.stdout?.on('data', (chunk: Buffer) => { 315 const data = remainder + chunk.toString() 316 const lines = data.split('\n') 317 remainder = lines.pop() ?? '' 318 if (lines.length) onLines(lines.map(stripCR)) 319 }) 320 321 // On Windows, both 'close' and 'error' can fire for the same process. 322 let settled = false 323 child.on('close', code => { 324 if (settled) return 325 // Abort races close — don't flush a torn tail from a killed process. 326 // Promise still settles: spawn's signal option fires 'error' with 327 // AbortError → reject below. 328 if (abortSignal.aborted) return 329 settled = true 330 if (code === 0 || code === 1) { 331 if (remainder) onLines([stripCR(remainder)]) 332 resolve() 333 } else { 334 reject(new Error(`ripgrep exited with code ${code}`)) 335 } 336 }) 337 child.on('error', err => { 338 if (settled) return 339 settled = true 340 reject(err) 341 }) 342 }) 343} 344 345export async function ripGrep( 346 args: string[], 347 target: string, 348 abortSignal: AbortSignal, 349): Promise<string[]> { 350 await codesignRipgrepIfNecessary() 351 352 // Test ripgrep on first use and cache the result (fire and forget) 353 void testRipgrepOnFirstUse().catch(error => { 354 logError(error) 355 }) 356 357 return new Promise((resolve, reject) => { 358 const handleResult = ( 359 error: ExecFileException | null, 360 stdout: string, 361 stderr: string, 362 isRetry: boolean, 363 ): void => { 364 // Success case 365 if (!error) { 366 resolve( 367 stdout 368 .trim() 369 .split('\n') 370 .map(line => line.replace(/\r$/, '')) 371 .filter(Boolean), 372 ) 373 return 374 } 375 376 // Exit code 1 is normal "no matches" 377 if (error.code === 1) { 378 resolve([]) 379 return 380 } 381 382 // Critical errors that indicate ripgrep is broken, not "no matches" 383 // These should be surfaced to the user rather than silently returning empty results 384 const CRITICAL_ERROR_CODES = ['ENOENT', 'EACCES', 'EPERM'] 385 if (CRITICAL_ERROR_CODES.includes(error.code as string)) { 386 reject(error) 387 return 388 } 389 390 // If we hit EAGAIN and haven't retried yet, retry with single-threaded mode 391 // Note: We only use -j 1 for this specific retry, not for future calls. 392 // Persisting single-threaded mode globally caused timeouts on large repos 393 // where EAGAIN was just a transient startup error. 394 if (!isRetry && isEagainError(stderr)) { 395 logForDebugging( 396 `rg EAGAIN error detected, retrying with single-threaded mode (-j 1)`, 397 ) 398 logEvent('tengu_ripgrep_eagain_retry', {}) 399 ripGrepRaw( 400 args, 401 target, 402 abortSignal, 403 (retryError, retryStdout, retryStderr) => { 404 handleResult(retryError, retryStdout, retryStderr, true) 405 }, 406 true, // Force single-threaded mode for this retry only 407 ) 408 return 409 } 410 411 // For all other errors, try to return partial results if available 412 const hasOutput = stdout && stdout.trim().length > 0 413 const isTimeout = 414 error.signal === 'SIGTERM' || 415 error.signal === 'SIGKILL' || 416 error.code === 'ABORT_ERR' 417 const isBufferOverflow = 418 error.code === 'ERR_CHILD_PROCESS_STDIO_MAXBUFFER' 419 420 let lines: string[] = [] 421 if (hasOutput) { 422 lines = stdout 423 .trim() 424 .split('\n') 425 .map(line => line.replace(/\r$/, '')) 426 .filter(Boolean) 427 // Drop last line for timeouts and buffer overflow - it may be incomplete 428 if (lines.length > 0 && (isTimeout || isBufferOverflow)) { 429 lines = lines.slice(0, -1) 430 } 431 } 432 433 logForDebugging( 434 `rg error (signal=${error.signal}, code=${error.code}, stderr: ${stderr}), ${lines.length} results`, 435 ) 436 437 // code 2 = ripgrep usage error (already handled); ABORT_ERR = caller 438 // explicitly aborted (not an error, just a cancellation — interactive 439 // callers may abort on every keystroke-after-debounce). 440 if (error.code !== 2 && error.code !== 'ABORT_ERR') { 441 logError(error) 442 } 443 444 // If we timed out with no results, throw an error so Claude knows the search 445 // didn't complete rather than thinking there were no matches 446 if (isTimeout && lines.length === 0) { 447 reject( 448 new RipgrepTimeoutError( 449 `Ripgrep search timed out after ${getPlatform() === 'wsl' ? 60 : 20} seconds. The search may have matched files but did not complete in time. Try searching a more specific path or pattern.`, 450 lines, 451 ), 452 ) 453 return 454 } 455 456 resolve(lines) 457 } 458 459 ripGrepRaw(args, target, abortSignal, (error, stdout, stderr) => { 460 handleResult(error, stdout, stderr, false) 461 }) 462 }) 463} 464 465/** 466 * Count files in a directory recursively using ripgrep and round to the nearest power of 10 for privacy 467 * 468 * This is much more efficient than using native Node.js methods for counting files 469 * in large directories since it uses ripgrep's highly optimized file traversal. 470 * 471 * @param path Directory path to count files in 472 * @param abortSignal AbortSignal to cancel the operation 473 * @param ignorePatterns Optional additional patterns to ignore (beyond .gitignore) 474 * @returns Approximate file count rounded to the nearest power of 10 475 */ 476export const countFilesRoundedRg = memoize( 477 async ( 478 dirPath: string, 479 abortSignal: AbortSignal, 480 ignorePatterns: string[] = [], 481 ): Promise<number | undefined> => { 482 // Skip file counting if we're in the home directory to avoid triggering 483 // macOS TCC permission dialogs for Desktop, Downloads, Documents, etc. 484 if (path.resolve(dirPath) === path.resolve(homedir())) { 485 return undefined 486 } 487 488 try { 489 // Build ripgrep arguments: 490 // --files: List files that would be searched (rather than searching them) 491 // --count: Only print a count of matching lines for each file 492 // --no-ignore-parent: Don't respect ignore files in parent directories 493 // --hidden: Search hidden files and directories 494 const args = ['--files', '--hidden'] 495 496 // Add ignore patterns if provided 497 ignorePatterns.forEach(pattern => { 498 args.push('--glob', `!${pattern}`) 499 }) 500 501 const count = await ripGrepFileCount(args, dirPath, abortSignal) 502 503 // Round to nearest power of 10 for privacy 504 if (count === 0) return 0 505 506 const magnitude = Math.floor(Math.log10(count)) 507 const power = Math.pow(10, magnitude) 508 509 // Round to nearest power of 10 510 // e.g., 8 -> 10, 42 -> 100, 350 -> 100, 750 -> 1000 511 return Math.round(count / power) * power 512 } catch (error) { 513 // AbortSignal.timeout firing is expected on large/slow repos, not an error. 514 if ((error as Error)?.name !== 'AbortError') logError(error) 515 } 516 }, 517 // lodash memoize's default resolver only uses the first argument. 518 // ignorePatterns affect the result, so include them in the cache key. 519 // abortSignal is intentionally excluded — it doesn't affect the count. 520 (dirPath, _abortSignal, ignorePatterns = []) => 521 `${dirPath}|${ignorePatterns.join(',')}`, 522) 523 524// Singleton to store ripgrep availability status 525let ripgrepStatus: { 526 working: boolean 527 lastTested: number 528 config: RipgrepConfig 529} | null = null 530 531/** 532 * Get ripgrep status and configuration info 533 * Returns current configuration immediately, with working status if available 534 */ 535export function getRipgrepStatus(): { 536 mode: 'system' | 'builtin' | 'embedded' 537 path: string 538 working: boolean | null // null if not yet tested 539} { 540 const config = getRipgrepConfig() 541 return { 542 mode: config.mode, 543 path: config.command, 544 working: ripgrepStatus?.working ?? null, 545 } 546} 547 548/** 549 * Test ripgrep availability on first use and cache the result 550 */ 551const testRipgrepOnFirstUse = memoize(async (): Promise<void> => { 552 // Already tested 553 if (ripgrepStatus !== null) { 554 return 555 } 556 557 const config = getRipgrepConfig() 558 559 try { 560 let test: { code: number; stdout: string } 561 562 // For embedded ripgrep, use Bun.spawn with argv0 563 if (config.argv0) { 564 // Only Bun embeds ripgrep. 565 // eslint-disable-next-line custom-rules/require-bun-typeof-guard 566 const proc = Bun.spawn([config.command, '--version'], { 567 argv0: config.argv0, 568 stderr: 'ignore', 569 stdout: 'pipe', 570 }) 571 572 // Bun's ReadableStream has .text() at runtime, but TS types don't reflect it 573 const [stdout, code] = await Promise.all([ 574 (proc.stdout as unknown as Blob).text(), 575 proc.exited, 576 ]) 577 test = { 578 code, 579 stdout, 580 } 581 } else { 582 test = await execFileNoThrow( 583 config.command, 584 [...config.args, '--version'], 585 { 586 timeout: 5000, 587 }, 588 ) 589 } 590 591 const working = 592 test.code === 0 && !!test.stdout && test.stdout.startsWith('ripgrep ') 593 594 ripgrepStatus = { 595 working, 596 lastTested: Date.now(), 597 config, 598 } 599 600 logForDebugging( 601 `Ripgrep first use test: ${working ? 'PASSED' : 'FAILED'} (mode=${config.mode}, path=${config.command})`, 602 ) 603 604 // Log telemetry for actual ripgrep availability 605 logEvent('tengu_ripgrep_availability', { 606 working: working ? 1 : 0, 607 using_system: config.mode === 'system' ? 1 : 0, 608 }) 609 } catch (error) { 610 ripgrepStatus = { 611 working: false, 612 lastTested: Date.now(), 613 config, 614 } 615 logError(error) 616 } 617}) 618 619let alreadyDoneSignCheck = false 620async function codesignRipgrepIfNecessary() { 621 if (process.platform !== 'darwin' || alreadyDoneSignCheck) { 622 return 623 } 624 625 alreadyDoneSignCheck = true 626 627 // Only sign the standalone vendored rg binary (npm builds) 628 const config = getRipgrepConfig() 629 if (config.mode !== 'builtin') { 630 return 631 } 632 const builtinPath = config.command 633 634 // First, check to see if ripgrep is already signed 635 const lines = ( 636 await execFileNoThrow('codesign', ['-vv', '-d', builtinPath], { 637 preserveOutputOnError: false, 638 }) 639 ).stdout.split('\n') 640 641 const needsSigned = lines.find(line => line.includes('linker-signed')) 642 if (!needsSigned) { 643 return 644 } 645 646 try { 647 const signResult = await execFileNoThrow('codesign', [ 648 '--sign', 649 '-', 650 '--force', 651 '--preserve-metadata=entitlements,requirements,flags,runtime', 652 builtinPath, 653 ]) 654 655 if (signResult.code !== 0) { 656 logError( 657 new Error( 658 `Failed to sign ripgrep: ${signResult.stdout} ${signResult.stderr}`, 659 ), 660 ) 661 } 662 663 const quarantineResult = await execFileNoThrow('xattr', [ 664 '-d', 665 'com.apple.quarantine', 666 builtinPath, 667 ]) 668 669 if (quarantineResult.code !== 0) { 670 logError( 671 new Error( 672 `Failed to remove quarantine: ${quarantineResult.stdout} ${quarantineResult.stderr}`, 673 ), 674 ) 675 } 676 } catch (e) { 677 logError(e) 678 } 679}