source dump of claude code
at main 577 lines 20 kB view raw
1import { z } from 'zod/v4' 2import type { ValidationResult } from '../../Tool.js' 3import { buildTool, type ToolDef } from '../../Tool.js' 4import { getCwd } from '../../utils/cwd.js' 5import { isENOENT } from '../../utils/errors.js' 6import { 7 FILE_NOT_FOUND_CWD_NOTE, 8 suggestPathUnderCwd, 9} from '../../utils/file.js' 10import { getFsImplementation } from '../../utils/fsOperations.js' 11import { lazySchema } from '../../utils/lazySchema.js' 12import { expandPath, toRelativePath } from '../../utils/path.js' 13import { 14 checkReadPermissionForTool, 15 getFileReadIgnorePatterns, 16 normalizePatternsToPath, 17} from '../../utils/permissions/filesystem.js' 18import type { PermissionDecision } from '../../utils/permissions/PermissionResult.js' 19import { matchWildcardPattern } from '../../utils/permissions/shellRuleMatching.js' 20import { getGlobExclusionsForPluginCache } from '../../utils/plugins/orphanedPluginFilter.js' 21import { ripGrep } from '../../utils/ripgrep.js' 22import { semanticBoolean } from '../../utils/semanticBoolean.js' 23import { semanticNumber } from '../../utils/semanticNumber.js' 24import { plural } from '../../utils/stringUtils.js' 25import { GREP_TOOL_NAME, getDescription } from './prompt.js' 26import { 27 getToolUseSummary, 28 renderToolResultMessage, 29 renderToolUseErrorMessage, 30 renderToolUseMessage, 31} from './UI.js' 32 33const inputSchema = lazySchema(() => 34 z.strictObject({ 35 pattern: z 36 .string() 37 .describe( 38 'The regular expression pattern to search for in file contents', 39 ), 40 path: z 41 .string() 42 .optional() 43 .describe( 44 'File or directory to search in (rg PATH). Defaults to current working directory.', 45 ), 46 glob: z 47 .string() 48 .optional() 49 .describe( 50 'Glob pattern to filter files (e.g. "*.js", "*.{ts,tsx}") - maps to rg --glob', 51 ), 52 output_mode: z 53 .enum(['content', 'files_with_matches', 'count']) 54 .optional() 55 .describe( 56 'Output mode: "content" shows matching lines (supports -A/-B/-C context, -n line numbers, head_limit), "files_with_matches" shows file paths (supports head_limit), "count" shows match counts (supports head_limit). Defaults to "files_with_matches".', 57 ), 58 '-B': semanticNumber(z.number().optional()).describe( 59 'Number of lines to show before each match (rg -B). Requires output_mode: "content", ignored otherwise.', 60 ), 61 '-A': semanticNumber(z.number().optional()).describe( 62 'Number of lines to show after each match (rg -A). Requires output_mode: "content", ignored otherwise.', 63 ), 64 '-C': semanticNumber(z.number().optional()).describe('Alias for context.'), 65 context: semanticNumber(z.number().optional()).describe( 66 'Number of lines to show before and after each match (rg -C). Requires output_mode: "content", ignored otherwise.', 67 ), 68 '-n': semanticBoolean(z.boolean().optional()).describe( 69 'Show line numbers in output (rg -n). Requires output_mode: "content", ignored otherwise. Defaults to true.', 70 ), 71 '-i': semanticBoolean(z.boolean().optional()).describe( 72 'Case insensitive search (rg -i)', 73 ), 74 type: z 75 .string() 76 .optional() 77 .describe( 78 'File type to search (rg --type). Common types: js, py, rust, go, java, etc. More efficient than include for standard file types.', 79 ), 80 head_limit: semanticNumber(z.number().optional()).describe( 81 'Limit output to first N lines/entries, equivalent to "| head -N". Works across all output modes: content (limits output lines), files_with_matches (limits file paths), count (limits count entries). Defaults to 250 when unspecified. Pass 0 for unlimited (use sparingly — large result sets waste context).', 82 ), 83 offset: semanticNumber(z.number().optional()).describe( 84 'Skip first N lines/entries before applying head_limit, equivalent to "| tail -n +N | head -N". Works across all output modes. Defaults to 0.', 85 ), 86 multiline: semanticBoolean(z.boolean().optional()).describe( 87 'Enable multiline mode where . matches newlines and patterns can span lines (rg -U --multiline-dotall). Default: false.', 88 ), 89 }), 90) 91type InputSchema = ReturnType<typeof inputSchema> 92 93// Version control system directories to exclude from searches 94// These are excluded automatically because they create noise in search results 95const VCS_DIRECTORIES_TO_EXCLUDE = [ 96 '.git', 97 '.svn', 98 '.hg', 99 '.bzr', 100 '.jj', 101 '.sl', 102] as const 103 104// Default cap on grep results when head_limit is unspecified. Unbounded content-mode 105// greps can fill up to the 20KB persist threshold (~6-24K tokens/grep-heavy session). 106// 250 is generous enough for exploratory searches while preventing context bloat. 107// Pass head_limit=0 explicitly for unlimited. 108const DEFAULT_HEAD_LIMIT = 250 109 110function applyHeadLimit<T>( 111 items: T[], 112 limit: number | undefined, 113 offset: number = 0, 114): { items: T[]; appliedLimit: number | undefined } { 115 // Explicit 0 = unlimited escape hatch 116 if (limit === 0) { 117 return { items: items.slice(offset), appliedLimit: undefined } 118 } 119 const effectiveLimit = limit ?? DEFAULT_HEAD_LIMIT 120 const sliced = items.slice(offset, offset + effectiveLimit) 121 // Only report appliedLimit when truncation actually occurred, so the model 122 // knows there may be more results and can paginate with offset. 123 const wasTruncated = items.length - offset > effectiveLimit 124 return { 125 items: sliced, 126 appliedLimit: wasTruncated ? effectiveLimit : undefined, 127 } 128} 129 130// Format limit/offset information for display in tool results. 131// appliedLimit is only set when truncation actually occurred (see applyHeadLimit), 132// so it may be undefined even when appliedOffset is set — build parts conditionally 133// to avoid "limit: undefined" appearing in user-visible output. 134function formatLimitInfo( 135 appliedLimit: number | undefined, 136 appliedOffset: number | undefined, 137): string { 138 const parts: string[] = [] 139 if (appliedLimit !== undefined) parts.push(`limit: ${appliedLimit}`) 140 if (appliedOffset) parts.push(`offset: ${appliedOffset}`) 141 return parts.join(', ') 142} 143 144const outputSchema = lazySchema(() => 145 z.object({ 146 mode: z.enum(['content', 'files_with_matches', 'count']).optional(), 147 numFiles: z.number(), 148 filenames: z.array(z.string()), 149 content: z.string().optional(), 150 numLines: z.number().optional(), // For content mode 151 numMatches: z.number().optional(), // For count mode 152 appliedLimit: z.number().optional(), // The limit that was applied (if any) 153 appliedOffset: z.number().optional(), // The offset that was applied 154 }), 155) 156type OutputSchema = ReturnType<typeof outputSchema> 157 158type Output = z.infer<OutputSchema> 159 160export const GrepTool = buildTool({ 161 name: GREP_TOOL_NAME, 162 searchHint: 'search file contents with regex (ripgrep)', 163 // 20K chars - tool result persistence threshold 164 maxResultSizeChars: 20_000, 165 strict: true, 166 async description() { 167 return getDescription() 168 }, 169 userFacingName() { 170 return 'Search' 171 }, 172 getToolUseSummary, 173 getActivityDescription(input) { 174 const summary = getToolUseSummary(input) 175 return summary ? `Searching for ${summary}` : 'Searching' 176 }, 177 get inputSchema(): InputSchema { 178 return inputSchema() 179 }, 180 get outputSchema(): OutputSchema { 181 return outputSchema() 182 }, 183 isConcurrencySafe() { 184 return true 185 }, 186 isReadOnly() { 187 return true 188 }, 189 toAutoClassifierInput(input) { 190 return input.path ? `${input.pattern} in ${input.path}` : input.pattern 191 }, 192 isSearchOrReadCommand() { 193 return { isSearch: true, isRead: false } 194 }, 195 getPath({ path }): string { 196 return path || getCwd() 197 }, 198 async preparePermissionMatcher({ pattern }) { 199 return rulePattern => matchWildcardPattern(rulePattern, pattern) 200 }, 201 async validateInput({ path }): Promise<ValidationResult> { 202 // If path is provided, validate that it exists 203 if (path) { 204 const fs = getFsImplementation() 205 const absolutePath = expandPath(path) 206 207 // SECURITY: Skip filesystem operations for UNC paths to prevent NTLM credential leaks. 208 if (absolutePath.startsWith('\\\\') || absolutePath.startsWith('//')) { 209 return { result: true } 210 } 211 212 try { 213 await fs.stat(absolutePath) 214 } catch (e: unknown) { 215 if (isENOENT(e)) { 216 const cwdSuggestion = await suggestPathUnderCwd(absolutePath) 217 let message = `Path does not exist: ${path}. ${FILE_NOT_FOUND_CWD_NOTE} ${getCwd()}.` 218 if (cwdSuggestion) { 219 message += ` Did you mean ${cwdSuggestion}?` 220 } 221 return { 222 result: false, 223 message, 224 errorCode: 1, 225 } 226 } 227 throw e 228 } 229 } 230 231 return { result: true } 232 }, 233 async checkPermissions(input, context): Promise<PermissionDecision> { 234 const appState = context.getAppState() 235 return checkReadPermissionForTool( 236 GrepTool, 237 input, 238 appState.toolPermissionContext, 239 ) 240 }, 241 async prompt() { 242 return getDescription() 243 }, 244 renderToolUseMessage, 245 renderToolUseErrorMessage, 246 renderToolResultMessage, 247 // SearchResultSummary shows content (mode=content) or filenames.join. 248 // numFiles/numLines/numMatches are chrome ("Found 3 files") — fine to 249 // skip (under-count, not phantom). Glob reuses this via UI.tsx:65. 250 extractSearchText({ mode, content, filenames }) { 251 if (mode === 'content' && content) return content 252 return filenames.join('\n') 253 }, 254 mapToolResultToToolResultBlockParam( 255 { 256 mode = 'files_with_matches', 257 numFiles, 258 filenames, 259 content, 260 numLines: _numLines, 261 numMatches, 262 appliedLimit, 263 appliedOffset, 264 }, 265 toolUseID, 266 ) { 267 if (mode === 'content') { 268 const limitInfo = formatLimitInfo(appliedLimit, appliedOffset) 269 const resultContent = content || 'No matches found' 270 const finalContent = limitInfo 271 ? `${resultContent}\n\n[Showing results with pagination = ${limitInfo}]` 272 : resultContent 273 return { 274 tool_use_id: toolUseID, 275 type: 'tool_result', 276 content: finalContent, 277 } 278 } 279 280 if (mode === 'count') { 281 const limitInfo = formatLimitInfo(appliedLimit, appliedOffset) 282 const rawContent = content || 'No matches found' 283 const matches = numMatches ?? 0 284 const files = numFiles ?? 0 285 const summary = `\n\nFound ${matches} total ${matches === 1 ? 'occurrence' : 'occurrences'} across ${files} ${files === 1 ? 'file' : 'files'}.${limitInfo ? ` with pagination = ${limitInfo}` : ''}` 286 return { 287 tool_use_id: toolUseID, 288 type: 'tool_result', 289 content: rawContent + summary, 290 } 291 } 292 293 // files_with_matches mode 294 const limitInfo = formatLimitInfo(appliedLimit, appliedOffset) 295 if (numFiles === 0) { 296 return { 297 tool_use_id: toolUseID, 298 type: 'tool_result', 299 content: 'No files found', 300 } 301 } 302 // head_limit has already been applied in call() method, so just show all filenames 303 const result = `Found ${numFiles} ${plural(numFiles, 'file')}${limitInfo ? ` ${limitInfo}` : ''}\n${filenames.join('\n')}` 304 return { 305 tool_use_id: toolUseID, 306 type: 'tool_result', 307 content: result, 308 } 309 }, 310 async call( 311 { 312 pattern, 313 path, 314 glob, 315 type, 316 output_mode = 'files_with_matches', 317 '-B': context_before, 318 '-A': context_after, 319 '-C': context_c, 320 context, 321 '-n': show_line_numbers = true, 322 '-i': case_insensitive = false, 323 head_limit, 324 offset = 0, 325 multiline = false, 326 }, 327 { abortController, getAppState }, 328 ) { 329 const absolutePath = path ? expandPath(path) : getCwd() 330 const args = ['--hidden'] 331 332 // Exclude VCS directories to avoid noise from version control metadata 333 for (const dir of VCS_DIRECTORIES_TO_EXCLUDE) { 334 args.push('--glob', `!${dir}`) 335 } 336 337 // Limit line length to prevent base64/minified content from cluttering output 338 args.push('--max-columns', '500') 339 340 // Only apply multiline flags when explicitly requested 341 if (multiline) { 342 args.push('-U', '--multiline-dotall') 343 } 344 345 // Add optional flags 346 if (case_insensitive) { 347 args.push('-i') 348 } 349 350 // Add output mode flags 351 if (output_mode === 'files_with_matches') { 352 args.push('-l') 353 } else if (output_mode === 'count') { 354 args.push('-c') 355 } 356 357 // Add line numbers if requested 358 if (show_line_numbers && output_mode === 'content') { 359 args.push('-n') 360 } 361 362 // Add context flags (-C/context takes precedence over context_before/context_after) 363 if (output_mode === 'content') { 364 if (context !== undefined) { 365 args.push('-C', context.toString()) 366 } else if (context_c !== undefined) { 367 args.push('-C', context_c.toString()) 368 } else { 369 if (context_before !== undefined) { 370 args.push('-B', context_before.toString()) 371 } 372 if (context_after !== undefined) { 373 args.push('-A', context_after.toString()) 374 } 375 } 376 } 377 378 // If pattern starts with dash, use -e flag to specify it as a pattern 379 // This prevents ripgrep from interpreting it as a command-line option 380 if (pattern.startsWith('-')) { 381 args.push('-e', pattern) 382 } else { 383 args.push(pattern) 384 } 385 386 // Add type filter if specified 387 if (type) { 388 args.push('--type', type) 389 } 390 391 if (glob) { 392 // Split on commas and spaces, but preserve patterns with braces 393 const globPatterns: string[] = [] 394 const rawPatterns = glob.split(/\s+/) 395 396 for (const rawPattern of rawPatterns) { 397 // If pattern contains braces, don't split further 398 if (rawPattern.includes('{') && rawPattern.includes('}')) { 399 globPatterns.push(rawPattern) 400 } else { 401 // Split on commas for patterns without braces 402 globPatterns.push(...rawPattern.split(',').filter(Boolean)) 403 } 404 } 405 406 for (const globPattern of globPatterns.filter(Boolean)) { 407 args.push('--glob', globPattern) 408 } 409 } 410 411 // Add ignore patterns 412 const appState = getAppState() 413 const ignorePatterns = normalizePatternsToPath( 414 getFileReadIgnorePatterns(appState.toolPermissionContext), 415 getCwd(), 416 ) 417 for (const ignorePattern of ignorePatterns) { 418 // Note: ripgrep only applies gitignore patterns relative to the working directory 419 // So for non-absolute paths, we need to prefix them with '**' 420 // See: https://github.com/BurntSushi/ripgrep/discussions/2156#discussioncomment-2316335 421 // 422 // We also need to negate the pattern with `!` to exclude it 423 const rgIgnorePattern = ignorePattern.startsWith('/') 424 ? `!${ignorePattern}` 425 : `!**/${ignorePattern}` 426 args.push('--glob', rgIgnorePattern) 427 } 428 429 // Exclude orphaned plugin version directories 430 for (const exclusion of await getGlobExclusionsForPluginCache( 431 absolutePath, 432 )) { 433 args.push('--glob', exclusion) 434 } 435 436 // WSL has severe performance penalty for file reads (3-5x slower on WSL2) 437 // The timeout is handled by ripgrep itself via execFile timeout option 438 // We don't use AbortController for timeout to avoid interrupting the agent loop 439 // If ripgrep times out, it throws RipgrepTimeoutError which propagates up 440 // so Claude knows the search didn't complete (rather than thinking there were no matches) 441 const results = await ripGrep(args, absolutePath, abortController.signal) 442 443 if (output_mode === 'content') { 444 // For content mode, results are the actual content lines 445 // Convert absolute paths to relative paths to save tokens 446 447 // Apply head_limit first — relativize is per-line work, so 448 // avoid processing lines that will be discarded (broad patterns can 449 // return 10k+ lines with head_limit keeping only ~30-100). 450 const { items: limitedResults, appliedLimit } = applyHeadLimit( 451 results, 452 head_limit, 453 offset, 454 ) 455 456 const finalLines = limitedResults.map(line => { 457 // Lines have format: /absolute/path:line_content or /absolute/path:num:content 458 const colonIndex = line.indexOf(':') 459 if (colonIndex > 0) { 460 const filePath = line.substring(0, colonIndex) 461 const rest = line.substring(colonIndex) 462 return toRelativePath(filePath) + rest 463 } 464 return line 465 }) 466 const output = { 467 mode: 'content' as const, 468 numFiles: 0, // Not applicable for content mode 469 filenames: [], 470 content: finalLines.join('\n'), 471 numLines: finalLines.length, 472 ...(appliedLimit !== undefined && { appliedLimit }), 473 ...(offset > 0 && { appliedOffset: offset }), 474 } 475 return { data: output } 476 } 477 478 if (output_mode === 'count') { 479 // For count mode, pass through raw ripgrep output (filename:count format) 480 // Apply head_limit first to avoid relativizing entries that will be discarded. 481 const { items: limitedResults, appliedLimit } = applyHeadLimit( 482 results, 483 head_limit, 484 offset, 485 ) 486 487 // Convert absolute paths to relative paths to save tokens 488 const finalCountLines = limitedResults.map(line => { 489 // Lines have format: /absolute/path:count 490 const colonIndex = line.lastIndexOf(':') 491 if (colonIndex > 0) { 492 const filePath = line.substring(0, colonIndex) 493 const count = line.substring(colonIndex) 494 return toRelativePath(filePath) + count 495 } 496 return line 497 }) 498 499 // Parse count output to extract total matches and file count 500 let totalMatches = 0 501 let fileCount = 0 502 for (const line of finalCountLines) { 503 const colonIndex = line.lastIndexOf(':') 504 if (colonIndex > 0) { 505 const countStr = line.substring(colonIndex + 1) 506 const count = parseInt(countStr, 10) 507 if (!isNaN(count)) { 508 totalMatches += count 509 fileCount += 1 510 } 511 } 512 } 513 514 const output = { 515 mode: 'count' as const, 516 numFiles: fileCount, 517 filenames: [], 518 content: finalCountLines.join('\n'), 519 numMatches: totalMatches, 520 ...(appliedLimit !== undefined && { appliedLimit }), 521 ...(offset > 0 && { appliedOffset: offset }), 522 } 523 return { data: output } 524 } 525 526 // For files_with_matches mode (default) 527 // Use allSettled so a single ENOENT (file deleted between ripgrep's scan 528 // and this stat) does not reject the whole batch. Failed stats sort as mtime 0. 529 const stats = await Promise.allSettled( 530 results.map(_ => getFsImplementation().stat(_)), 531 ) 532 const sortedMatches = results 533 // Sort by modification time 534 .map((_, i) => { 535 const r = stats[i]! 536 return [ 537 _, 538 r.status === 'fulfilled' ? (r.value.mtimeMs ?? 0) : 0, 539 ] as const 540 }) 541 .sort((a, b) => { 542 if (process.env.NODE_ENV === 'test') { 543 // In tests, we always want to sort by filename, so that results are deterministic 544 return a[0].localeCompare(b[0]) 545 } 546 const timeComparison = b[1] - a[1] 547 if (timeComparison === 0) { 548 // Sort by filename as a tiebreaker 549 return a[0].localeCompare(b[0]) 550 } 551 return timeComparison 552 }) 553 .map(_ => _[0]) 554 555 // Apply head_limit to sorted file list (like "| head -N") 556 const { items: finalMatches, appliedLimit } = applyHeadLimit( 557 sortedMatches, 558 head_limit, 559 offset, 560 ) 561 562 // Convert absolute paths to relative paths to save tokens 563 const relativeMatches = finalMatches.map(toRelativePath) 564 565 const output = { 566 mode: 'files_with_matches' as const, 567 filenames: relativeMatches, 568 numFiles: relativeMatches.length, 569 ...(appliedLimit !== undefined && { appliedLimit }), 570 ...(offset > 0 && { appliedOffset: offset }), 571 } 572 573 return { 574 data: output, 575 } 576 }, 577} satisfies ToolDef<InputSchema, Output>)