tools/FileEditTool/utils.ts at main

oppi.li / claude-code
fork atom
source dump of claude code
fork atom
claude-code / tools / FileEditTool / utils.ts
at main 775 lines 22 kB view raw
wrap content
oppi.li dump from zip 9d ago
63aada3f
  1import { type StructuredPatchHunk, structuredPatch } from 'diff'
  2import { logError } from 'src/utils/log.js'
  3import { expandPath } from 'src/utils/path.js'
  4import { countCharInString } from 'src/utils/stringUtils.js'
  5import {
  6  DIFF_TIMEOUT_MS,
  7  getPatchForDisplay,
  8  getPatchFromContents,
  9} from '../../utils/diff.js'
 10import { errorMessage, isENOENT } from '../../utils/errors.js'
 11import {
 12  addLineNumbers,
 13  convertLeadingTabsToSpaces,
 14  readFileSyncCached,
 15} from '../../utils/file.js'
 16import type { EditInput, FileEdit } from './types.js'
 17
 18// Claude can't output curly quotes, so we define them as constants here for Claude to use
 19// in the code. We do this because we normalize curly quotes to straight quotes
 20// when applying edits.
 21export const LEFT_SINGLE_CURLY_QUOTE = '‘'
 22export const RIGHT_SINGLE_CURLY_QUOTE = '’'
 23export const LEFT_DOUBLE_CURLY_QUOTE = '“'
 24export const RIGHT_DOUBLE_CURLY_QUOTE = '”'
 25
 26/**
 27 * Normalizes quotes in a string by converting curly quotes to straight quotes
 28 * @param str The string to normalize
 29 * @returns The string with all curly quotes replaced by straight quotes
 30 */
 31export function normalizeQuotes(str: string): string {
 32  return str
 33    .replaceAll(LEFT_SINGLE_CURLY_QUOTE, "'")
 34    .replaceAll(RIGHT_SINGLE_CURLY_QUOTE, "'")
 35    .replaceAll(LEFT_DOUBLE_CURLY_QUOTE, '"')
 36    .replaceAll(RIGHT_DOUBLE_CURLY_QUOTE, '"')
 37}
 38
 39/**
 40 * Strips trailing whitespace from each line in a string while preserving line endings
 41 * @param str The string to process
 42 * @returns The string with trailing whitespace removed from each line
 43 */
 44export function stripTrailingWhitespace(str: string): string {
 45  // Handle different line endings: CRLF, LF, CR
 46  // Use a regex that matches line endings and captures them
 47  const lines = str.split(/(\r\n|\n|\r)/)
 48
 49  let result = ''
 50  for (let i = 0; i < lines.length; i++) {
 51    const part = lines[i]
 52    if (part !== undefined) {
 53      if (i % 2 === 0) {
 54        // Even indices are line content
 55        result += part.replace(/\s+$/, '')
 56      } else {
 57        // Odd indices are line endings
 58        result += part
 59      }
 60    }
 61  }
 62
 63  return result
 64}
 65
 66/**
 67 * Finds the actual string in the file content that matches the search string,
 68 * accounting for quote normalization
 69 * @param fileContent The file content to search in
 70 * @param searchString The string to search for
 71 * @returns The actual string found in the file, or null if not found
 72 */
 73export function findActualString(
 74  fileContent: string,
 75  searchString: string,
 76): string | null {
 77  // First try exact match
 78  if (fileContent.includes(searchString)) {
 79    return searchString
 80  }
 81
 82  // Try with normalized quotes
 83  const normalizedSearch = normalizeQuotes(searchString)
 84  const normalizedFile = normalizeQuotes(fileContent)
 85
 86  const searchIndex = normalizedFile.indexOf(normalizedSearch)
 87  if (searchIndex !== -1) {
 88    // Find the actual string in the file that matches
 89    return fileContent.substring(searchIndex, searchIndex + searchString.length)
 90  }
 91
 92  return null
 93}
 94
 95/**
 96 * When old_string matched via quote normalization (curly quotes in file,
 97 * straight quotes from model), apply the same curly quote style to new_string
 98 * so the edit preserves the file's typography.
 99 *
100 * Uses a simple open/close heuristic: a quote character preceded by whitespace,
101 * start of string, or opening punctuation is treated as an opening quote;
102 * otherwise it's a closing quote.
103 */
104export function preserveQuoteStyle(
105  oldString: string,
106  actualOldString: string,
107  newString: string,
108): string {
109  // If they're the same, no normalization happened
110  if (oldString === actualOldString) {
111    return newString
112  }
113
114  // Detect which curly quote types were in the file
115  const hasDoubleQuotes =
116    actualOldString.includes(LEFT_DOUBLE_CURLY_QUOTE) ||
117    actualOldString.includes(RIGHT_DOUBLE_CURLY_QUOTE)
118  const hasSingleQuotes =
119    actualOldString.includes(LEFT_SINGLE_CURLY_QUOTE) ||
120    actualOldString.includes(RIGHT_SINGLE_CURLY_QUOTE)
121
122  if (!hasDoubleQuotes && !hasSingleQuotes) {
123    return newString
124  }
125
126  let result = newString
127
128  if (hasDoubleQuotes) {
129    result = applyCurlyDoubleQuotes(result)
130  }
131  if (hasSingleQuotes) {
132    result = applyCurlySingleQuotes(result)
133  }
134
135  return result
136}
137
138function isOpeningContext(chars: string[], index: number): boolean {
139  if (index === 0) {
140    return true
141  }
142  const prev = chars[index - 1]
143  return (
144    prev === ' ' ||
145    prev === '\t' ||
146    prev === '\n' ||
147    prev === '\r' ||
148    prev === '(' ||
149    prev === '[' ||
150    prev === '{' ||
151    prev === '\u2014' || // em dash
152    prev === '\u2013' // en dash
153  )
154}
155
156function applyCurlyDoubleQuotes(str: string): string {
157  const chars = [...str]
158  const result: string[] = []
159  for (let i = 0; i < chars.length; i++) {
160    if (chars[i] === '"') {
161      result.push(
162        isOpeningContext(chars, i)
163          ? LEFT_DOUBLE_CURLY_QUOTE
164          : RIGHT_DOUBLE_CURLY_QUOTE,
165      )
166    } else {
167      result.push(chars[i]!)
168    }
169  }
170  return result.join('')
171}
172
173function applyCurlySingleQuotes(str: string): string {
174  const chars = [...str]
175  const result: string[] = []
176  for (let i = 0; i < chars.length; i++) {
177    if (chars[i] === "'") {
178      // Don't convert apostrophes in contractions (e.g., "don't", "it's")
179      // An apostrophe between two letters is a contraction, not a quote
180      const prev = i > 0 ? chars[i - 1] : undefined
181      const next = i < chars.length - 1 ? chars[i + 1] : undefined
182      const prevIsLetter = prev !== undefined && /\p{L}/u.test(prev)
183      const nextIsLetter = next !== undefined && /\p{L}/u.test(next)
184      if (prevIsLetter && nextIsLetter) {
185        // Apostrophe in a contraction — use right single curly quote
186        result.push(RIGHT_SINGLE_CURLY_QUOTE)
187      } else {
188        result.push(
189          isOpeningContext(chars, i)
190            ? LEFT_SINGLE_CURLY_QUOTE
191            : RIGHT_SINGLE_CURLY_QUOTE,
192        )
193      }
194    } else {
195      result.push(chars[i]!)
196    }
197  }
198  return result.join('')
199}
200
201/**
202 * Transform edits to ensure replace_all always has a boolean value
203 * @param edits Array of edits with optional replace_all
204 * @returns Array of edits with replace_all guaranteed to be boolean
205 */
206export function applyEditToFile(
207  originalContent: string,
208  oldString: string,
209  newString: string,
210  replaceAll: boolean = false,
211): string {
212  const f = replaceAll
213    ? (content: string, search: string, replace: string) =>
214        content.replaceAll(search, () => replace)
215    : (content: string, search: string, replace: string) =>
216        content.replace(search, () => replace)
217
218  if (newString !== '') {
219    return f(originalContent, oldString, newString)
220  }
221
222  const stripTrailingNewline =
223    !oldString.endsWith('\n') && originalContent.includes(oldString + '\n')
224
225  return stripTrailingNewline
226    ? f(originalContent, oldString + '\n', newString)
227    : f(originalContent, oldString, newString)
228}
229
230/**
231 * Applies an edit to a file and returns the patch and updated file.
232 * Does not write the file to disk.
233 */
234export function getPatchForEdit({
235  filePath,
236  fileContents,
237  oldString,
238  newString,
239  replaceAll = false,
240}: {
241  filePath: string
242  fileContents: string
243  oldString: string
244  newString: string
245  replaceAll?: boolean
246}): { patch: StructuredPatchHunk[]; updatedFile: string } {
247  return getPatchForEdits({
248    filePath,
249    fileContents,
250    edits: [
251      { old_string: oldString, new_string: newString, replace_all: replaceAll },
252    ],
253  })
254}
255
256/**
257 * Applies a list of edits to a file and returns the patch and updated file.
258 * Does not write the file to disk.
259 *
260 * NOTE: The returned patch is to be used for display purposes only - it has spaces instead of tabs
261 */
262export function getPatchForEdits({
263  filePath,
264  fileContents,
265  edits,
266}: {
267  filePath: string
268  fileContents: string
269  edits: FileEdit[]
270}): { patch: StructuredPatchHunk[]; updatedFile: string } {
271  let updatedFile = fileContents
272  const appliedNewStrings: string[] = []
273
274  // Special case for empty files.
275  if (
276    !fileContents &&
277    edits.length === 1 &&
278    edits[0] &&
279    edits[0].old_string === '' &&
280    edits[0].new_string === ''
281  ) {
282    const patch = getPatchForDisplay({
283      filePath,
284      fileContents,
285      edits: [
286        {
287          old_string: fileContents,
288          new_string: updatedFile,
289          replace_all: false,
290        },
291      ],
292    })
293    return { patch, updatedFile: '' }
294  }
295
296  // Apply each edit and check if it actually changes the file
297  for (const edit of edits) {
298    // Strip trailing newlines from old_string before checking
299    const oldStringToCheck = edit.old_string.replace(/\n+$/, '')
300
301    // Check if old_string is a substring of any previously applied new_string
302    for (const previousNewString of appliedNewStrings) {
303      if (
304        oldStringToCheck !== '' &&
305        previousNewString.includes(oldStringToCheck)
306      ) {
307        throw new Error(
308          'Cannot edit file: old_string is a substring of a new_string from a previous edit.',
309        )
310      }
311    }
312
313    const previousContent = updatedFile
314    updatedFile =
315      edit.old_string === ''
316        ? edit.new_string
317        : applyEditToFile(
318            updatedFile,
319            edit.old_string,
320            edit.new_string,
321            edit.replace_all,
322          )
323
324    // If this edit didn't change anything, throw an error
325    if (updatedFile === previousContent) {
326      throw new Error('String not found in file. Failed to apply edit.')
327    }
328
329    // Track the new string that was applied
330    appliedNewStrings.push(edit.new_string)
331  }
332
333  if (updatedFile === fileContents) {
334    throw new Error(
335      'Original and edited file match exactly. Failed to apply edit.',
336    )
337  }
338
339  // We already have before/after content, so call getPatchFromContents directly.
340  // Previously this went through getPatchForDisplay with edits=[{old:fileContents,new:updatedFile}],
341  // which transforms fileContents twice (once as preparedFileContents, again as escapedOldString
342  // inside the reduce) and runs a no-op full-content .replace(). This saves ~20% on large files.
343  const patch = getPatchFromContents({
344    filePath,
345    oldContent: convertLeadingTabsToSpaces(fileContents),
346    newContent: convertLeadingTabsToSpaces(updatedFile),
347  })
348
349  return { patch, updatedFile }
350}
351
352// Cap on edited_text_file attachment snippets. Format-on-save of a large file
353// previously injected the entire file per turn (observed max 16.1KB, ~14K
354// tokens/session). 8KB preserves meaningful context while bounding worst case.
355const DIFF_SNIPPET_MAX_BYTES = 8192
356
357/**
358 * Used for attachments, to show snippets when files change.
359 *
360 * TODO: Unify this with the other snippet logic.
361 */
362export function getSnippetForTwoFileDiff(
363  fileAContents: string,
364  fileBContents: string,
365): string {
366  const patch = structuredPatch(
367    'file.txt',
368    'file.txt',
369    fileAContents,
370    fileBContents,
371    undefined,
372    undefined,
373    {
374      context: 8,
375      timeout: DIFF_TIMEOUT_MS,
376    },
377  )
378
379  if (!patch) {
380    return ''
381  }
382
383  const full = patch.hunks
384    .map(_ => ({
385      startLine: _.oldStart,
386      content: _.lines
387        // Filter out deleted lines AND diff metadata lines
388        .filter(_ => !_.startsWith('-') && !_.startsWith('\\'))
389        .map(_ => _.slice(1))
390        .join('\n'),
391    }))
392    .map(addLineNumbers)
393    .join('\n...\n')
394
395  if (full.length <= DIFF_SNIPPET_MAX_BYTES) {
396    return full
397  }
398
399  // Truncate at the last line boundary that fits within the cap.
400  // Marker format matches BashTool/utils.ts.
401  const cutoff = full.lastIndexOf('\n', DIFF_SNIPPET_MAX_BYTES)
402  const kept =
403    cutoff > 0 ? full.slice(0, cutoff) : full.slice(0, DIFF_SNIPPET_MAX_BYTES)
404  const remaining = countCharInString(full, '\n', kept.length) + 1
405  return `${kept}\n\n... [${remaining} lines truncated] ...`
406}
407
408const CONTEXT_LINES = 4
409
410/**
411 * Gets a snippet from a file showing the context around a patch with line numbers.
412 * @param originalFile The original file content before applying the patch
413 * @param patch The diff hunks to use for determining snippet location
414 * @param newFile The file content after applying the patch
415 * @returns The snippet text with line numbers and the starting line number
416 */
417export function getSnippetForPatch(
418  patch: StructuredPatchHunk[],
419  newFile: string,
420): { formattedSnippet: string; startLine: number } {
421  if (patch.length === 0) {
422    // No changes, return empty snippet
423    return { formattedSnippet: '', startLine: 1 }
424  }
425
426  // Find the first and last changed lines across all hunks
427  let minLine = Infinity
428  let maxLine = -Infinity
429
430  for (const hunk of patch) {
431    if (hunk.oldStart < minLine) {
432      minLine = hunk.oldStart
433    }
434    // For the end line, we need to consider the new lines count since we're showing the new file
435    const hunkEnd = hunk.oldStart + (hunk.newLines || 0) - 1
436    if (hunkEnd > maxLine) {
437      maxLine = hunkEnd
438    }
439  }
440
441  // Calculate the range with context
442  const startLine = Math.max(1, minLine - CONTEXT_LINES)
443  const endLine = maxLine + CONTEXT_LINES
444
445  // Split the new file into lines and get the snippet
446  const fileLines = newFile.split(/\r?\n/)
447  const snippetLines = fileLines.slice(startLine - 1, endLine)
448  const snippet = snippetLines.join('\n')
449
450  // Add line numbers
451  const formattedSnippet = addLineNumbers({
452    content: snippet,
453    startLine,
454  })
455
456  return { formattedSnippet, startLine }
457}
458
459/**
460 * Gets a snippet from a file showing the context around a single edit.
461 * This is a convenience function that uses the original algorithm.
462 * @param originalFile The original file content
463 * @param oldString The text to replace
464 * @param newString The text to replace it with
465 * @param contextLines The number of lines to show before and after the change
466 * @returns The snippet and the starting line number
467 */
468export function getSnippet(
469  originalFile: string,
470  oldString: string,
471  newString: string,
472  contextLines: number = 4,
473): { snippet: string; startLine: number } {
474  // Use the original algorithm from FileEditTool.tsx
475  const before = originalFile.split(oldString)[0] ?? ''
476  const replacementLine = before.split(/\r?\n/).length - 1
477  const newFileLines = applyEditToFile(
478    originalFile,
479    oldString,
480    newString,
481  ).split(/\r?\n/)
482
483  // Calculate the start and end line numbers for the snippet
484  const startLine = Math.max(0, replacementLine - contextLines)
485  const endLine =
486    replacementLine + contextLines + newString.split(/\r?\n/).length
487
488  // Get snippet
489  const snippetLines = newFileLines.slice(startLine, endLine)
490  const snippet = snippetLines.join('\n')
491
492  return { snippet, startLine: startLine + 1 }
493}
494
495export function getEditsForPatch(patch: StructuredPatchHunk[]): FileEdit[] {
496  return patch.map(hunk => {
497    // Extract the changes from this hunk
498    const contextLines: string[] = []
499    const oldLines: string[] = []
500    const newLines: string[] = []
501
502    // Parse each line and categorize it
503    for (const line of hunk.lines) {
504      if (line.startsWith(' ')) {
505        // Context line - appears in both versions
506        contextLines.push(line.slice(1))
507        oldLines.push(line.slice(1))
508        newLines.push(line.slice(1))
509      } else if (line.startsWith('-')) {
510        // Deleted line - only in old version
511        oldLines.push(line.slice(1))
512      } else if (line.startsWith('+')) {
513        // Added line - only in new version
514        newLines.push(line.slice(1))
515      }
516    }
517
518    return {
519      old_string: oldLines.join('\n'),
520      new_string: newLines.join('\n'),
521      replace_all: false,
522    }
523  })
524}
525
526/**
527 * Contains replacements to de-sanitize strings from Claude
528 * Since Claude can't see any of these strings (sanitized in the API)
529 * It'll output the sanitized versions in the edit response
530 */
531const DESANITIZATIONS: Record<string, string> = {
532  '<fnr>': '<function_results>',
533  '<n>': '<name>',
534  '</n>': '</name>',
535  '<o>': '<output>',
536  '</o>': '</output>',
537  '<e>': '<error>',
538  '</e>': '</error>',
539  '<s>': '<system>',
540  '</s>': '</system>',
541  '<r>': '<result>',
542  '</r>': '</result>',
543  '< META_START >': '<META_START>',
544  '< META_END >': '<META_END>',
545  '< EOT >': '<EOT>',
546  '< META >': '<META>',
547  '< SOS >': '<SOS>',
548  '\n\nH:': '\n\nHuman:',
549  '\n\nA:': '\n\nAssistant:',
550}
551
552/**
553 * Normalizes a match string by applying specific replacements
554 * This helps handle when exact matches fail due to formatting differences
555 * @returns The normalized string and which replacements were applied
556 */
557function desanitizeMatchString(matchString: string): {
558  result: string
559  appliedReplacements: Array<{ from: string; to: string }>
560} {
561  let result = matchString
562  const appliedReplacements: Array<{ from: string; to: string }> = []
563
564  for (const [from, to] of Object.entries(DESANITIZATIONS)) {
565    const beforeReplace = result
566    result = result.replaceAll(from, to)
567
568    if (beforeReplace !== result) {
569      appliedReplacements.push({ from, to })
570    }
571  }
572
573  return { result, appliedReplacements }
574}
575
576/**
577 * Normalize the input for the FileEditTool
578 * If the string to replace is not found in the file, try with a normalized version
579 * Returns the normalized input if successful, or the original input if not
580 */
581export function normalizeFileEditInput({
582  file_path,
583  edits,
584}: {
585  file_path: string
586  edits: EditInput[]
587}): {
588  file_path: string
589  edits: EditInput[]
590} {
591  if (edits.length === 0) {
592    return { file_path, edits }
593  }
594
595  // Markdown uses two trailing spaces as a hard line break — stripping would
596  // silently change semantics. Skip stripTrailingWhitespace for .md/.mdx.
597  const isMarkdown = /\.(md|mdx)$/i.test(file_path)
598
599  try {
600    const fullPath = expandPath(file_path)
601
602    // Use cached file read to avoid redundant I/O operations.
603    // If the file doesn't exist, readFileSyncCached throws ENOENT which the
604    // catch below handles by returning the original input (no TOCTOU pre-check).
605    const fileContent = readFileSyncCached(fullPath)
606
607    return {
608      file_path,
609      edits: edits.map(({ old_string, new_string, replace_all }) => {
610        const normalizedNewString = isMarkdown
611          ? new_string
612          : stripTrailingWhitespace(new_string)
613
614        // If exact string match works, keep it as is
615        if (fileContent.includes(old_string)) {
616          return {
617            old_string,
618            new_string: normalizedNewString,
619            replace_all,
620          }
621        }
622
623        // Try de-sanitize string if exact match fails
624        const { result: desanitizedOldString, appliedReplacements } =
625          desanitizeMatchString(old_string)
626
627        if (fileContent.includes(desanitizedOldString)) {
628          // Apply the same exact replacements to new_string
629          let desanitizedNewString = normalizedNewString
630          for (const { from, to } of appliedReplacements) {
631            desanitizedNewString = desanitizedNewString.replaceAll(from, to)
632          }
633
634          return {
635            old_string: desanitizedOldString,
636            new_string: desanitizedNewString,
637            replace_all,
638          }
639        }
640
641        return {
642          old_string,
643          new_string: normalizedNewString,
644          replace_all,
645        }
646      }),
647    }
648  } catch (error) {
649    // If there's any error reading the file, just return original input.
650    // ENOENT is expected when the file doesn't exist yet (e.g., new file).
651    if (!isENOENT(error)) {
652      logError(error)
653    }
654  }
655
656  return { file_path, edits }
657}
658
659/**
660 * Compare two sets of edits to determine if they are equivalent
661 * by applying both sets to the original content and comparing results.
662 * This handles cases where edits might be different but produce the same outcome.
663 */
664export function areFileEditsEquivalent(
665  edits1: FileEdit[],
666  edits2: FileEdit[],
667  originalContent: string,
668): boolean {
669  // Fast path: check if edits are literally identical
670  if (
671    edits1.length === edits2.length &&
672    edits1.every((edit1, index) => {
673      const edit2 = edits2[index]
674      return (
675        edit2 !== undefined &&
676        edit1.old_string === edit2.old_string &&
677        edit1.new_string === edit2.new_string &&
678        edit1.replace_all === edit2.replace_all
679      )
680    })
681  ) {
682    return true
683  }
684
685  // Try applying both sets of edits
686  let result1: { patch: StructuredPatchHunk[]; updatedFile: string } | null =
687    null
688  let error1: string | null = null
689  let result2: { patch: StructuredPatchHunk[]; updatedFile: string } | null =
690    null
691  let error2: string | null = null
692
693  try {
694    result1 = getPatchForEdits({
695      filePath: 'temp',
696      fileContents: originalContent,
697      edits: edits1,
698    })
699  } catch (e) {
700    error1 = errorMessage(e)
701  }
702
703  try {
704    result2 = getPatchForEdits({
705      filePath: 'temp',
706      fileContents: originalContent,
707      edits: edits2,
708    })
709  } catch (e) {
710    error2 = errorMessage(e)
711  }
712
713  // If both threw errors, they're equal only if the errors are the same
714  if (error1 !== null && error2 !== null) {
715    // Normalize error messages for comparison
716    return error1 === error2
717  }
718
719  // If one threw an error and the other didn't, they're not equal
720  if (error1 !== null || error2 !== null) {
721    return false
722  }
723
724  // Both succeeded - compare the results
725  return result1!.updatedFile === result2!.updatedFile
726}
727
728/**
729 * Unified function to check if two file edit inputs are equivalent.
730 * Handles file edits (FileEditTool).
731 */
732export function areFileEditsInputsEquivalent(
733  input1: {
734    file_path: string
735    edits: FileEdit[]
736  },
737  input2: {
738    file_path: string
739    edits: FileEdit[]
740  },
741): boolean {
742  // Fast path: different files
743  if (input1.file_path !== input2.file_path) {
744    return false
745  }
746
747  // Fast path: literal equality
748  if (
749    input1.edits.length === input2.edits.length &&
750    input1.edits.every((edit1, index) => {
751      const edit2 = input2.edits[index]
752      return (
753        edit2 !== undefined &&
754        edit1.old_string === edit2.old_string &&
755        edit1.new_string === edit2.new_string &&
756        edit1.replace_all === edit2.replace_all
757      )
758    })
759  ) {
760    return true
761  }
762
763  // Semantic comparison (requires file read). If the file doesn't exist,
764  // compare against empty content (no TOCTOU pre-check).
765  let fileContent = ''
766  try {
767    fileContent = readFileSyncCached(input1.file_path)
768  } catch (error) {
769    if (!isENOENT(error)) {
770      throw error
771    }
772  }
773
774  return areFileEditsEquivalent(input1.edits, input2.edits, fileContent)
775}