source dump of claude code
at main 322 lines 9.6 kB view raw
1/** 2 * Parser for sed edit commands (-i flag substitutions) 3 * Extracts file paths and substitution patterns to enable file-edit-style rendering 4 */ 5 6import { randomBytes } from 'crypto' 7import { tryParseShellCommand } from '../../utils/bash/shellQuote.js' 8 9// BRE→ERE conversion placeholders (null-byte sentinels, never appear in user input) 10const BACKSLASH_PLACEHOLDER = '\x00BACKSLASH\x00' 11const PLUS_PLACEHOLDER = '\x00PLUS\x00' 12const QUESTION_PLACEHOLDER = '\x00QUESTION\x00' 13const PIPE_PLACEHOLDER = '\x00PIPE\x00' 14const LPAREN_PLACEHOLDER = '\x00LPAREN\x00' 15const RPAREN_PLACEHOLDER = '\x00RPAREN\x00' 16const BACKSLASH_PLACEHOLDER_RE = new RegExp(BACKSLASH_PLACEHOLDER, 'g') 17const PLUS_PLACEHOLDER_RE = new RegExp(PLUS_PLACEHOLDER, 'g') 18const QUESTION_PLACEHOLDER_RE = new RegExp(QUESTION_PLACEHOLDER, 'g') 19const PIPE_PLACEHOLDER_RE = new RegExp(PIPE_PLACEHOLDER, 'g') 20const LPAREN_PLACEHOLDER_RE = new RegExp(LPAREN_PLACEHOLDER, 'g') 21const RPAREN_PLACEHOLDER_RE = new RegExp(RPAREN_PLACEHOLDER, 'g') 22 23export type SedEditInfo = { 24 /** The file path being edited */ 25 filePath: string 26 /** The search pattern (regex) */ 27 pattern: string 28 /** The replacement string */ 29 replacement: string 30 /** Substitution flags (g, i, etc.) */ 31 flags: string 32 /** Whether to use extended regex (-E or -r flag) */ 33 extendedRegex: boolean 34} 35 36/** 37 * Check if a command is a sed in-place edit command 38 * Returns true only for simple sed -i 's/pattern/replacement/flags' file commands 39 */ 40export function isSedInPlaceEdit(command: string): boolean { 41 const info = parseSedEditCommand(command) 42 return info !== null 43} 44 45/** 46 * Parse a sed edit command and extract the edit information 47 * Returns null if the command is not a valid sed in-place edit 48 */ 49export function parseSedEditCommand(command: string): SedEditInfo | null { 50 const trimmed = command.trim() 51 52 // Must start with sed 53 const sedMatch = trimmed.match(/^\s*sed\s+/) 54 if (!sedMatch) return null 55 56 const withoutSed = trimmed.slice(sedMatch[0].length) 57 const parseResult = tryParseShellCommand(withoutSed) 58 if (!parseResult.success) return null 59 const tokens = parseResult.tokens 60 61 // Extract string tokens only 62 const args: string[] = [] 63 for (const token of tokens) { 64 if (typeof token === 'string') { 65 args.push(token) 66 } else if ( 67 typeof token === 'object' && 68 token !== null && 69 'op' in token && 70 token.op === 'glob' 71 ) { 72 // Glob patterns are too complex for this simple parser 73 return null 74 } 75 } 76 77 // Parse flags and arguments 78 let hasInPlaceFlag = false 79 let extendedRegex = false 80 let expression: string | null = null 81 let filePath: string | null = null 82 83 let i = 0 84 while (i < args.length) { 85 const arg = args[i]! 86 87 // Handle -i flag (with or without backup suffix) 88 if (arg === '-i' || arg === '--in-place') { 89 hasInPlaceFlag = true 90 i++ 91 // On macOS, -i requires a suffix argument (even if empty string) 92 // Check if next arg looks like a backup suffix (empty, or starts with dot) 93 // Don't consume flags (-E, -r) or sed expressions (starting with s, y, d) 94 if (i < args.length) { 95 const nextArg = args[i] 96 // If next arg is empty string or starts with dot, it's a backup suffix 97 if ( 98 typeof nextArg === 'string' && 99 !nextArg.startsWith('-') && 100 (nextArg === '' || nextArg.startsWith('.')) 101 ) { 102 i++ // Skip the backup suffix 103 } 104 } 105 continue 106 } 107 if (arg.startsWith('-i')) { 108 // -i.bak or similar (inline suffix) 109 hasInPlaceFlag = true 110 i++ 111 continue 112 } 113 114 // Handle extended regex flags 115 if (arg === '-E' || arg === '-r' || arg === '--regexp-extended') { 116 extendedRegex = true 117 i++ 118 continue 119 } 120 121 // Handle -e flag with expression 122 if (arg === '-e' || arg === '--expression') { 123 if (i + 1 < args.length && typeof args[i + 1] === 'string') { 124 // Only support single expression 125 if (expression !== null) return null 126 expression = args[i + 1]! 127 i += 2 128 continue 129 } 130 return null 131 } 132 if (arg.startsWith('--expression=')) { 133 if (expression !== null) return null 134 expression = arg.slice('--expression='.length) 135 i++ 136 continue 137 } 138 139 // Skip other flags we don't understand 140 if (arg.startsWith('-')) { 141 // Unknown flag - not safe to parse 142 return null 143 } 144 145 // Non-flag argument 146 if (expression === null) { 147 // First non-flag arg is the expression 148 expression = arg 149 } else if (filePath === null) { 150 // Second non-flag arg is the file path 151 filePath = arg 152 } else { 153 // More than one file - not supported for simple rendering 154 return null 155 } 156 157 i++ 158 } 159 160 // Must have -i flag, expression, and file path 161 if (!hasInPlaceFlag || !expression || !filePath) { 162 return null 163 } 164 165 // Parse the substitution expression: s/pattern/replacement/flags 166 // Only support / as delimiter for simplicity 167 const substMatch = expression.match(/^s\//) 168 if (!substMatch) { 169 return null 170 } 171 172 const rest = expression.slice(2) // Skip 's/' 173 174 // Find pattern and replacement by tracking escaped characters 175 let pattern = '' 176 let replacement = '' 177 let flags = '' 178 let state: 'pattern' | 'replacement' | 'flags' = 'pattern' 179 let j = 0 180 181 while (j < rest.length) { 182 const char = rest[j]! 183 184 if (char === '\\' && j + 1 < rest.length) { 185 // Escaped character 186 if (state === 'pattern') { 187 pattern += char + rest[j + 1] 188 } else if (state === 'replacement') { 189 replacement += char + rest[j + 1] 190 } else { 191 flags += char + rest[j + 1] 192 } 193 j += 2 194 continue 195 } 196 197 if (char === '/') { 198 if (state === 'pattern') { 199 state = 'replacement' 200 } else if (state === 'replacement') { 201 state = 'flags' 202 } else { 203 // Extra delimiter in flags - unexpected 204 return null 205 } 206 j++ 207 continue 208 } 209 210 if (state === 'pattern') { 211 pattern += char 212 } else if (state === 'replacement') { 213 replacement += char 214 } else { 215 flags += char 216 } 217 j++ 218 } 219 220 // Must have found all three parts (pattern, replacement delimiter, and optional flags) 221 if (state !== 'flags') { 222 return null 223 } 224 225 // Validate flags - only allow safe substitution flags 226 const validFlags = /^[gpimIM1-9]*$/ 227 if (!validFlags.test(flags)) { 228 return null 229 } 230 231 return { 232 filePath, 233 pattern, 234 replacement, 235 flags, 236 extendedRegex, 237 } 238} 239 240/** 241 * Apply a sed substitution to file content 242 * Returns the new content after applying the substitution 243 */ 244export function applySedSubstitution( 245 content: string, 246 sedInfo: SedEditInfo, 247): string { 248 // Convert sed pattern to JavaScript regex 249 let regexFlags = '' 250 251 // Handle global flag 252 if (sedInfo.flags.includes('g')) { 253 regexFlags += 'g' 254 } 255 256 // Handle case-insensitive flag (i or I in sed) 257 if (sedInfo.flags.includes('i') || sedInfo.flags.includes('I')) { 258 regexFlags += 'i' 259 } 260 261 // Handle multiline flag (m or M in sed) 262 if (sedInfo.flags.includes('m') || sedInfo.flags.includes('M')) { 263 regexFlags += 'm' 264 } 265 266 // Convert sed pattern to JavaScript regex pattern 267 let jsPattern = sedInfo.pattern 268 // Unescape \/ to / 269 .replace(/\\\//g, '/') 270 271 // In BRE mode (no -E flag), metacharacters have opposite escaping: 272 // BRE: \+ means "one or more", + is literal 273 // ERE/JS: + means "one or more", \+ is literal 274 // We need to convert BRE escaping to ERE for JavaScript regex 275 if (!sedInfo.extendedRegex) { 276 jsPattern = jsPattern 277 // Step 1: Protect literal backslashes (\\) first - in both BRE and ERE, \\ is literal backslash 278 .replace(/\\\\/g, BACKSLASH_PLACEHOLDER) 279 // Step 2: Replace escaped metacharacters with placeholders (these should become unescaped in JS) 280 .replace(/\\\+/g, PLUS_PLACEHOLDER) 281 .replace(/\\\?/g, QUESTION_PLACEHOLDER) 282 .replace(/\\\|/g, PIPE_PLACEHOLDER) 283 .replace(/\\\(/g, LPAREN_PLACEHOLDER) 284 .replace(/\\\)/g, RPAREN_PLACEHOLDER) 285 // Step 3: Escape unescaped metacharacters (these are literal in BRE) 286 .replace(/\+/g, '\\+') 287 .replace(/\?/g, '\\?') 288 .replace(/\|/g, '\\|') 289 .replace(/\(/g, '\\(') 290 .replace(/\)/g, '\\)') 291 // Step 4: Replace placeholders with their JS equivalents 292 .replace(BACKSLASH_PLACEHOLDER_RE, '\\\\') 293 .replace(PLUS_PLACEHOLDER_RE, '+') 294 .replace(QUESTION_PLACEHOLDER_RE, '?') 295 .replace(PIPE_PLACEHOLDER_RE, '|') 296 .replace(LPAREN_PLACEHOLDER_RE, '(') 297 .replace(RPAREN_PLACEHOLDER_RE, ')') 298 } 299 300 // Unescape sed-specific escapes in replacement 301 // Convert \n to newline, & to $& (match), etc. 302 // Use a unique placeholder with random salt to prevent injection attacks 303 const salt = randomBytes(8).toString('hex') 304 const ESCAPED_AMP_PLACEHOLDER = `___ESCAPED_AMPERSAND_${salt}___` 305 const jsReplacement = sedInfo.replacement 306 // Unescape \/ to / 307 .replace(/\\\//g, '/') 308 // First escape \& to a placeholder 309 .replace(/\\&/g, ESCAPED_AMP_PLACEHOLDER) 310 // Convert & to $& (full match) - use $$& to get literal $& in output 311 .replace(/&/g, '$$&') 312 // Convert placeholder back to literal & 313 .replace(new RegExp(ESCAPED_AMP_PLACEHOLDER, 'g'), '&') 314 315 try { 316 const regex = new RegExp(jsPattern, regexFlags) 317 return content.replace(regex, jsReplacement) 318 } catch { 319 // If regex is invalid, return original content 320 return content 321 } 322}