tools/GrepTool/GrepTool.ts at main · nonbinary.computer/claude-code

nonbinary.computer / claude-code
forked from oppi.li/claude-code
fork atom
source dump of claude code
fork atom
claude-code / tools / GrepTool / GrepTool.ts
at main 577 lines 20 kB view raw
wrap content
oppi.li dump from zip 13d ago
63aada3f
  1import { z } from 'zod/v4'
  2import type { ValidationResult } from '../../Tool.js'
  3import { buildTool, type ToolDef } from '../../Tool.js'
  4import { getCwd } from '../../utils/cwd.js'
  5import { isENOENT } from '../../utils/errors.js'
  6import {
  7  FILE_NOT_FOUND_CWD_NOTE,
  8  suggestPathUnderCwd,
  9} from '../../utils/file.js'
 10import { getFsImplementation } from '../../utils/fsOperations.js'
 11import { lazySchema } from '../../utils/lazySchema.js'
 12import { expandPath, toRelativePath } from '../../utils/path.js'
 13import {
 14  checkReadPermissionForTool,
 15  getFileReadIgnorePatterns,
 16  normalizePatternsToPath,
 17} from '../../utils/permissions/filesystem.js'
 18import type { PermissionDecision } from '../../utils/permissions/PermissionResult.js'
 19import { matchWildcardPattern } from '../../utils/permissions/shellRuleMatching.js'
 20import { getGlobExclusionsForPluginCache } from '../../utils/plugins/orphanedPluginFilter.js'
 21import { ripGrep } from '../../utils/ripgrep.js'
 22import { semanticBoolean } from '../../utils/semanticBoolean.js'
 23import { semanticNumber } from '../../utils/semanticNumber.js'
 24import { plural } from '../../utils/stringUtils.js'
 25import { GREP_TOOL_NAME, getDescription } from './prompt.js'
 26import {
 27  getToolUseSummary,
 28  renderToolResultMessage,
 29  renderToolUseErrorMessage,
 30  renderToolUseMessage,
 31} from './UI.js'
 32
 33const inputSchema = lazySchema(() =>
 34  z.strictObject({
 35    pattern: z
 36      .string()
 37      .describe(
 38        'The regular expression pattern to search for in file contents',
 39      ),
 40    path: z
 41      .string()
 42      .optional()
 43      .describe(
 44        'File or directory to search in (rg PATH). Defaults to current working directory.',
 45      ),
 46    glob: z
 47      .string()
 48      .optional()
 49      .describe(
 50        'Glob pattern to filter files (e.g. "*.js", "*.{ts,tsx}") - maps to rg --glob',
 51      ),
 52    output_mode: z
 53      .enum(['content', 'files_with_matches', 'count'])
 54      .optional()
 55      .describe(
 56        'Output mode: "content" shows matching lines (supports -A/-B/-C context, -n line numbers, head_limit), "files_with_matches" shows file paths (supports head_limit), "count" shows match counts (supports head_limit). Defaults to "files_with_matches".',
 57      ),
 58    '-B': semanticNumber(z.number().optional()).describe(
 59      'Number of lines to show before each match (rg -B). Requires output_mode: "content", ignored otherwise.',
 60    ),
 61    '-A': semanticNumber(z.number().optional()).describe(
 62      'Number of lines to show after each match (rg -A). Requires output_mode: "content", ignored otherwise.',
 63    ),
 64    '-C': semanticNumber(z.number().optional()).describe('Alias for context.'),
 65    context: semanticNumber(z.number().optional()).describe(
 66      'Number of lines to show before and after each match (rg -C). Requires output_mode: "content", ignored otherwise.',
 67    ),
 68    '-n': semanticBoolean(z.boolean().optional()).describe(
 69      'Show line numbers in output (rg -n). Requires output_mode: "content", ignored otherwise. Defaults to true.',
 70    ),
 71    '-i': semanticBoolean(z.boolean().optional()).describe(
 72      'Case insensitive search (rg -i)',
 73    ),
 74    type: z
 75      .string()
 76      .optional()
 77      .describe(
 78        'File type to search (rg --type). Common types: js, py, rust, go, java, etc. More efficient than include for standard file types.',
 79      ),
 80    head_limit: semanticNumber(z.number().optional()).describe(
 81      'Limit output to first N lines/entries, equivalent to "| head -N". Works across all output modes: content (limits output lines), files_with_matches (limits file paths), count (limits count entries). Defaults to 250 when unspecified. Pass 0 for unlimited (use sparingly — large result sets waste context).',
 82    ),
 83    offset: semanticNumber(z.number().optional()).describe(
 84      'Skip first N lines/entries before applying head_limit, equivalent to "| tail -n +N | head -N". Works across all output modes. Defaults to 0.',
 85    ),
 86    multiline: semanticBoolean(z.boolean().optional()).describe(
 87      'Enable multiline mode where . matches newlines and patterns can span lines (rg -U --multiline-dotall). Default: false.',
 88    ),
 89  }),
 90)
 91type InputSchema = ReturnType<typeof inputSchema>
 92
 93// Version control system directories to exclude from searches
 94// These are excluded automatically because they create noise in search results
 95const VCS_DIRECTORIES_TO_EXCLUDE = [
 96  '.git',
 97  '.svn',
 98  '.hg',
 99  '.bzr',
100  '.jj',
101  '.sl',
102] as const
103
104// Default cap on grep results when head_limit is unspecified. Unbounded content-mode
105// greps can fill up to the 20KB persist threshold (~6-24K tokens/grep-heavy session).
106// 250 is generous enough for exploratory searches while preventing context bloat.
107// Pass head_limit=0 explicitly for unlimited.
108const DEFAULT_HEAD_LIMIT = 250
109
110function applyHeadLimit<T>(
111  items: T[],
112  limit: number | undefined,
113  offset: number = 0,
114): { items: T[]; appliedLimit: number | undefined } {
115  // Explicit 0 = unlimited escape hatch
116  if (limit === 0) {
117    return { items: items.slice(offset), appliedLimit: undefined }
118  }
119  const effectiveLimit = limit ?? DEFAULT_HEAD_LIMIT
120  const sliced = items.slice(offset, offset + effectiveLimit)
121  // Only report appliedLimit when truncation actually occurred, so the model
122  // knows there may be more results and can paginate with offset.
123  const wasTruncated = items.length - offset > effectiveLimit
124  return {
125    items: sliced,
126    appliedLimit: wasTruncated ? effectiveLimit : undefined,
127  }
128}
129
130// Format limit/offset information for display in tool results.
131// appliedLimit is only set when truncation actually occurred (see applyHeadLimit),
132// so it may be undefined even when appliedOffset is set — build parts conditionally
133// to avoid "limit: undefined" appearing in user-visible output.
134function formatLimitInfo(
135  appliedLimit: number | undefined,
136  appliedOffset: number | undefined,
137): string {
138  const parts: string[] = []
139  if (appliedLimit !== undefined) parts.push(`limit: ${appliedLimit}`)
140  if (appliedOffset) parts.push(`offset: ${appliedOffset}`)
141  return parts.join(', ')
142}
143
144const outputSchema = lazySchema(() =>
145  z.object({
146    mode: z.enum(['content', 'files_with_matches', 'count']).optional(),
147    numFiles: z.number(),
148    filenames: z.array(z.string()),
149    content: z.string().optional(),
150    numLines: z.number().optional(), // For content mode
151    numMatches: z.number().optional(), // For count mode
152    appliedLimit: z.number().optional(), // The limit that was applied (if any)
153    appliedOffset: z.number().optional(), // The offset that was applied
154  }),
155)
156type OutputSchema = ReturnType<typeof outputSchema>
157
158type Output = z.infer<OutputSchema>
159
160export const GrepTool = buildTool({
161  name: GREP_TOOL_NAME,
162  searchHint: 'search file contents with regex (ripgrep)',
163  // 20K chars - tool result persistence threshold
164  maxResultSizeChars: 20_000,
165  strict: true,
166  async description() {
167    return getDescription()
168  },
169  userFacingName() {
170    return 'Search'
171  },
172  getToolUseSummary,
173  getActivityDescription(input) {
174    const summary = getToolUseSummary(input)
175    return summary ? `Searching for ${summary}` : 'Searching'
176  },
177  get inputSchema(): InputSchema {
178    return inputSchema()
179  },
180  get outputSchema(): OutputSchema {
181    return outputSchema()
182  },
183  isConcurrencySafe() {
184    return true
185  },
186  isReadOnly() {
187    return true
188  },
189  toAutoClassifierInput(input) {
190    return input.path ? `${input.pattern} in ${input.path}` : input.pattern
191  },
192  isSearchOrReadCommand() {
193    return { isSearch: true, isRead: false }
194  },
195  getPath({ path }): string {
196    return path || getCwd()
197  },
198  async preparePermissionMatcher({ pattern }) {
199    return rulePattern => matchWildcardPattern(rulePattern, pattern)
200  },
201  async validateInput({ path }): Promise<ValidationResult> {
202    // If path is provided, validate that it exists
203    if (path) {
204      const fs = getFsImplementation()
205      const absolutePath = expandPath(path)
206
207      // SECURITY: Skip filesystem operations for UNC paths to prevent NTLM credential leaks.
208      if (absolutePath.startsWith('\\\\') || absolutePath.startsWith('//')) {
209        return { result: true }
210      }
211
212      try {
213        await fs.stat(absolutePath)
214      } catch (e: unknown) {
215        if (isENOENT(e)) {
216          const cwdSuggestion = await suggestPathUnderCwd(absolutePath)
217          let message = `Path does not exist: ${path}. ${FILE_NOT_FOUND_CWD_NOTE} ${getCwd()}.`
218          if (cwdSuggestion) {
219            message += ` Did you mean ${cwdSuggestion}?`
220          }
221          return {
222            result: false,
223            message,
224            errorCode: 1,
225          }
226        }
227        throw e
228      }
229    }
230
231    return { result: true }
232  },
233  async checkPermissions(input, context): Promise<PermissionDecision> {
234    const appState = context.getAppState()
235    return checkReadPermissionForTool(
236      GrepTool,
237      input,
238      appState.toolPermissionContext,
239    )
240  },
241  async prompt() {
242    return getDescription()
243  },
244  renderToolUseMessage,
245  renderToolUseErrorMessage,
246  renderToolResultMessage,
247  // SearchResultSummary shows content (mode=content) or filenames.join.
248  // numFiles/numLines/numMatches are chrome ("Found 3 files") — fine to
249  // skip (under-count, not phantom). Glob reuses this via UI.tsx:65.
250  extractSearchText({ mode, content, filenames }) {
251    if (mode === 'content' && content) return content
252    return filenames.join('\n')
253  },
254  mapToolResultToToolResultBlockParam(
255    {
256      mode = 'files_with_matches',
257      numFiles,
258      filenames,
259      content,
260      numLines: _numLines,
261      numMatches,
262      appliedLimit,
263      appliedOffset,
264    },
265    toolUseID,
266  ) {
267    if (mode === 'content') {
268      const limitInfo = formatLimitInfo(appliedLimit, appliedOffset)
269      const resultContent = content || 'No matches found'
270      const finalContent = limitInfo
271        ? `${resultContent}\n\n[Showing results with pagination = ${limitInfo}]`
272        : resultContent
273      return {
274        tool_use_id: toolUseID,
275        type: 'tool_result',
276        content: finalContent,
277      }
278    }
279
280    if (mode === 'count') {
281      const limitInfo = formatLimitInfo(appliedLimit, appliedOffset)
282      const rawContent = content || 'No matches found'
283      const matches = numMatches ?? 0
284      const files = numFiles ?? 0
285      const summary = `\n\nFound ${matches} total ${matches === 1 ? 'occurrence' : 'occurrences'} across ${files} ${files === 1 ? 'file' : 'files'}.${limitInfo ? ` with pagination = ${limitInfo}` : ''}`
286      return {
287        tool_use_id: toolUseID,
288        type: 'tool_result',
289        content: rawContent + summary,
290      }
291    }
292
293    // files_with_matches mode
294    const limitInfo = formatLimitInfo(appliedLimit, appliedOffset)
295    if (numFiles === 0) {
296      return {
297        tool_use_id: toolUseID,
298        type: 'tool_result',
299        content: 'No files found',
300      }
301    }
302    // head_limit has already been applied in call() method, so just show all filenames
303    const result = `Found ${numFiles} ${plural(numFiles, 'file')}${limitInfo ? ` ${limitInfo}` : ''}\n${filenames.join('\n')}`
304    return {
305      tool_use_id: toolUseID,
306      type: 'tool_result',
307      content: result,
308    }
309  },
310  async call(
311    {
312      pattern,
313      path,
314      glob,
315      type,
316      output_mode = 'files_with_matches',
317      '-B': context_before,
318      '-A': context_after,
319      '-C': context_c,
320      context,
321      '-n': show_line_numbers = true,
322      '-i': case_insensitive = false,
323      head_limit,
324      offset = 0,
325      multiline = false,
326    },
327    { abortController, getAppState },
328  ) {
329    const absolutePath = path ? expandPath(path) : getCwd()
330    const args = ['--hidden']
331
332    // Exclude VCS directories to avoid noise from version control metadata
333    for (const dir of VCS_DIRECTORIES_TO_EXCLUDE) {
334      args.push('--glob', `!${dir}`)
335    }
336
337    // Limit line length to prevent base64/minified content from cluttering output
338    args.push('--max-columns', '500')
339
340    // Only apply multiline flags when explicitly requested
341    if (multiline) {
342      args.push('-U', '--multiline-dotall')
343    }
344
345    // Add optional flags
346    if (case_insensitive) {
347      args.push('-i')
348    }
349
350    // Add output mode flags
351    if (output_mode === 'files_with_matches') {
352      args.push('-l')
353    } else if (output_mode === 'count') {
354      args.push('-c')
355    }
356
357    // Add line numbers if requested
358    if (show_line_numbers && output_mode === 'content') {
359      args.push('-n')
360    }
361
362    // Add context flags (-C/context takes precedence over context_before/context_after)
363    if (output_mode === 'content') {
364      if (context !== undefined) {
365        args.push('-C', context.toString())
366      } else if (context_c !== undefined) {
367        args.push('-C', context_c.toString())
368      } else {
369        if (context_before !== undefined) {
370          args.push('-B', context_before.toString())
371        }
372        if (context_after !== undefined) {
373          args.push('-A', context_after.toString())
374        }
375      }
376    }
377
378    // If pattern starts with dash, use -e flag to specify it as a pattern
379    // This prevents ripgrep from interpreting it as a command-line option
380    if (pattern.startsWith('-')) {
381      args.push('-e', pattern)
382    } else {
383      args.push(pattern)
384    }
385
386    // Add type filter if specified
387    if (type) {
388      args.push('--type', type)
389    }
390
391    if (glob) {
392      // Split on commas and spaces, but preserve patterns with braces
393      const globPatterns: string[] = []
394      const rawPatterns = glob.split(/\s+/)
395
396      for (const rawPattern of rawPatterns) {
397        // If pattern contains braces, don't split further
398        if (rawPattern.includes('{') && rawPattern.includes('}')) {
399          globPatterns.push(rawPattern)
400        } else {
401          // Split on commas for patterns without braces
402          globPatterns.push(...rawPattern.split(',').filter(Boolean))
403        }
404      }
405
406      for (const globPattern of globPatterns.filter(Boolean)) {
407        args.push('--glob', globPattern)
408      }
409    }
410
411    // Add ignore patterns
412    const appState = getAppState()
413    const ignorePatterns = normalizePatternsToPath(
414      getFileReadIgnorePatterns(appState.toolPermissionContext),
415      getCwd(),
416    )
417    for (const ignorePattern of ignorePatterns) {
418      // Note: ripgrep only applies gitignore patterns relative to the working directory
419      // So for non-absolute paths, we need to prefix them with '**'
420      // See: https://github.com/BurntSushi/ripgrep/discussions/2156#discussioncomment-2316335
421      //
422      // We also need to negate the pattern with `!` to exclude it
423      const rgIgnorePattern = ignorePattern.startsWith('/')
424        ? `!${ignorePattern}`
425        : `!**/${ignorePattern}`
426      args.push('--glob', rgIgnorePattern)
427    }
428
429    // Exclude orphaned plugin version directories
430    for (const exclusion of await getGlobExclusionsForPluginCache(
431      absolutePath,
432    )) {
433      args.push('--glob', exclusion)
434    }
435
436    // WSL has severe performance penalty for file reads (3-5x slower on WSL2)
437    // The timeout is handled by ripgrep itself via execFile timeout option
438    // We don't use AbortController for timeout to avoid interrupting the agent loop
439    // If ripgrep times out, it throws RipgrepTimeoutError which propagates up
440    // so Claude knows the search didn't complete (rather than thinking there were no matches)
441    const results = await ripGrep(args, absolutePath, abortController.signal)
442
443    if (output_mode === 'content') {
444      // For content mode, results are the actual content lines
445      // Convert absolute paths to relative paths to save tokens
446
447      // Apply head_limit first — relativize is per-line work, so
448      // avoid processing lines that will be discarded (broad patterns can
449      // return 10k+ lines with head_limit keeping only ~30-100).
450      const { items: limitedResults, appliedLimit } = applyHeadLimit(
451        results,
452        head_limit,
453        offset,
454      )
455
456      const finalLines = limitedResults.map(line => {
457        // Lines have format: /absolute/path:line_content or /absolute/path:num:content
458        const colonIndex = line.indexOf(':')
459        if (colonIndex > 0) {
460          const filePath = line.substring(0, colonIndex)
461          const rest = line.substring(colonIndex)
462          return toRelativePath(filePath) + rest
463        }
464        return line
465      })
466      const output = {
467        mode: 'content' as const,
468        numFiles: 0, // Not applicable for content mode
469        filenames: [],
470        content: finalLines.join('\n'),
471        numLines: finalLines.length,
472        ...(appliedLimit !== undefined && { appliedLimit }),
473        ...(offset > 0 && { appliedOffset: offset }),
474      }
475      return { data: output }
476    }
477
478    if (output_mode === 'count') {
479      // For count mode, pass through raw ripgrep output (filename:count format)
480      // Apply head_limit first to avoid relativizing entries that will be discarded.
481      const { items: limitedResults, appliedLimit } = applyHeadLimit(
482        results,
483        head_limit,
484        offset,
485      )
486
487      // Convert absolute paths to relative paths to save tokens
488      const finalCountLines = limitedResults.map(line => {
489        // Lines have format: /absolute/path:count
490        const colonIndex = line.lastIndexOf(':')
491        if (colonIndex > 0) {
492          const filePath = line.substring(0, colonIndex)
493          const count = line.substring(colonIndex)
494          return toRelativePath(filePath) + count
495        }
496        return line
497      })
498
499      // Parse count output to extract total matches and file count
500      let totalMatches = 0
501      let fileCount = 0
502      for (const line of finalCountLines) {
503        const colonIndex = line.lastIndexOf(':')
504        if (colonIndex > 0) {
505          const countStr = line.substring(colonIndex + 1)
506          const count = parseInt(countStr, 10)
507          if (!isNaN(count)) {
508            totalMatches += count
509            fileCount += 1
510          }
511        }
512      }
513
514      const output = {
515        mode: 'count' as const,
516        numFiles: fileCount,
517        filenames: [],
518        content: finalCountLines.join('\n'),
519        numMatches: totalMatches,
520        ...(appliedLimit !== undefined && { appliedLimit }),
521        ...(offset > 0 && { appliedOffset: offset }),
522      }
523      return { data: output }
524    }
525
526    // For files_with_matches mode (default)
527    // Use allSettled so a single ENOENT (file deleted between ripgrep's scan
528    // and this stat) does not reject the whole batch. Failed stats sort as mtime 0.
529    const stats = await Promise.allSettled(
530      results.map(_ => getFsImplementation().stat(_)),
531    )
532    const sortedMatches = results
533      // Sort by modification time
534      .map((_, i) => {
535        const r = stats[i]!
536        return [
537          _,
538          r.status === 'fulfilled' ? (r.value.mtimeMs ?? 0) : 0,
539        ] as const
540      })
541      .sort((a, b) => {
542        if (process.env.NODE_ENV === 'test') {
543          // In tests, we always want to sort by filename, so that results are deterministic
544          return a[0].localeCompare(b[0])
545        }
546        const timeComparison = b[1] - a[1]
547        if (timeComparison === 0) {
548          // Sort by filename as a tiebreaker
549          return a[0].localeCompare(b[0])
550        }
551        return timeComparison
552      })
553      .map(_ => _[0])
554
555    // Apply head_limit to sorted file list (like "| head -N")
556    const { items: finalMatches, appliedLimit } = applyHeadLimit(
557      sortedMatches,
558      head_limit,
559      offset,
560    )
561
562    // Convert absolute paths to relative paths to save tokens
563    const relativeMatches = finalMatches.map(toRelativePath)
564
565    const output = {
566      mode: 'files_with_matches' as const,
567      filenames: relativeMatches,
568      numFiles: relativeMatches.length,
569      ...(appliedLimit !== undefined && { appliedLimit }),
570      ...(offset > 0 && { appliedOffset: offset }),
571    }
572
573    return {
574      data: output,
575    }
576  },
577} satisfies ToolDef<InputSchema, Output>)