source dump of claude code
at main 318 lines 9.2 kB view raw
1import memoize from 'lodash-es/memoize.js' 2import { 3 extractOutputRedirections, 4 splitCommandWithOperators, 5} from './commands.js' 6import type { Node } from './parser.js' 7import { 8 analyzeCommand, 9 type TreeSitterAnalysis, 10} from './treeSitterAnalysis.js' 11 12export type OutputRedirection = { 13 target: string 14 operator: '>' | '>>' 15} 16 17/** 18 * Interface for parsed command implementations. 19 * Both tree-sitter and regex fallback implementations conform to this. 20 */ 21export interface IParsedCommand { 22 readonly originalCommand: string 23 toString(): string 24 getPipeSegments(): string[] 25 withoutOutputRedirections(): string 26 getOutputRedirections(): OutputRedirection[] 27 /** 28 * Returns tree-sitter analysis data if available. 29 * Returns null for the regex fallback implementation. 30 */ 31 getTreeSitterAnalysis(): TreeSitterAnalysis | null 32} 33 34/** 35 * @deprecated Legacy regex/shell-quote path. Only used when tree-sitter is 36 * unavailable. The primary gate is parseForSecurity (ast.ts). 37 * 38 * Regex-based fallback implementation using shell-quote parser. 39 * Used when tree-sitter is not available. 40 * Exported for testing purposes. 41 */ 42export class RegexParsedCommand_DEPRECATED implements IParsedCommand { 43 readonly originalCommand: string 44 45 constructor(command: string) { 46 this.originalCommand = command 47 } 48 49 toString(): string { 50 return this.originalCommand 51 } 52 53 getPipeSegments(): string[] { 54 try { 55 const parts = splitCommandWithOperators(this.originalCommand) 56 const segments: string[] = [] 57 let currentSegment: string[] = [] 58 59 for (const part of parts) { 60 if (part === '|') { 61 if (currentSegment.length > 0) { 62 segments.push(currentSegment.join(' ')) 63 currentSegment = [] 64 } 65 } else { 66 currentSegment.push(part) 67 } 68 } 69 70 if (currentSegment.length > 0) { 71 segments.push(currentSegment.join(' ')) 72 } 73 74 return segments.length > 0 ? segments : [this.originalCommand] 75 } catch { 76 return [this.originalCommand] 77 } 78 } 79 80 withoutOutputRedirections(): string { 81 if (!this.originalCommand.includes('>')) { 82 return this.originalCommand 83 } 84 const { commandWithoutRedirections, redirections } = 85 extractOutputRedirections(this.originalCommand) 86 return redirections.length > 0 87 ? commandWithoutRedirections 88 : this.originalCommand 89 } 90 91 getOutputRedirections(): OutputRedirection[] { 92 const { redirections } = extractOutputRedirections(this.originalCommand) 93 return redirections 94 } 95 96 getTreeSitterAnalysis(): TreeSitterAnalysis | null { 97 return null 98 } 99} 100 101type RedirectionNode = OutputRedirection & { 102 startIndex: number 103 endIndex: number 104} 105 106function visitNodes(node: Node, visitor: (node: Node) => void): void { 107 visitor(node) 108 for (const child of node.children) { 109 visitNodes(child, visitor) 110 } 111} 112 113function extractPipePositions(rootNode: Node): number[] { 114 const pipePositions: number[] = [] 115 visitNodes(rootNode, node => { 116 if (node.type === 'pipeline') { 117 for (const child of node.children) { 118 if (child.type === '|') { 119 pipePositions.push(child.startIndex) 120 } 121 } 122 } 123 }) 124 // visitNodes is depth-first. For `a | b && c | d`, the outer `list` nests 125 // the second pipeline as a sibling of the first, so the outer `|` is 126 // visited before the inner one — positions arrive out of order. 127 // getPipeSegments iterates them to slice left-to-right, so sort here. 128 return pipePositions.sort((a, b) => a - b) 129} 130 131function extractRedirectionNodes(rootNode: Node): RedirectionNode[] { 132 const redirections: RedirectionNode[] = [] 133 visitNodes(rootNode, node => { 134 if (node.type === 'file_redirect') { 135 const children = node.children 136 const op = children.find(c => c.type === '>' || c.type === '>>') 137 const target = children.find(c => c.type === 'word') 138 if (op && target) { 139 redirections.push({ 140 startIndex: node.startIndex, 141 endIndex: node.endIndex, 142 target: target.text, 143 operator: op.type as '>' | '>>', 144 }) 145 } 146 } 147 }) 148 return redirections 149} 150 151class TreeSitterParsedCommand implements IParsedCommand { 152 readonly originalCommand: string 153 // Tree-sitter's startIndex/endIndex are UTF-8 byte offsets, but JS 154 // String.slice() uses UTF-16 code-unit indices. For ASCII they coincide; 155 // for multi-byte code points (e.g. `—` U+2014: 3 UTF-8 bytes, 1 code unit) 156 // they diverge and slicing the string directly lands mid-token. Slicing 157 // the UTF-8 Buffer with tree-sitter's byte offsets and decoding back to 158 // string is correct regardless of code-point width. 159 private readonly commandBytes: Buffer 160 private readonly pipePositions: number[] 161 private readonly redirectionNodes: RedirectionNode[] 162 private readonly treeSitterAnalysis: TreeSitterAnalysis 163 164 constructor( 165 command: string, 166 pipePositions: number[], 167 redirectionNodes: RedirectionNode[], 168 treeSitterAnalysis: TreeSitterAnalysis, 169 ) { 170 this.originalCommand = command 171 this.commandBytes = Buffer.from(command, 'utf8') 172 this.pipePositions = pipePositions 173 this.redirectionNodes = redirectionNodes 174 this.treeSitterAnalysis = treeSitterAnalysis 175 } 176 177 toString(): string { 178 return this.originalCommand 179 } 180 181 getPipeSegments(): string[] { 182 if (this.pipePositions.length === 0) { 183 return [this.originalCommand] 184 } 185 186 const segments: string[] = [] 187 let currentStart = 0 188 189 for (const pipePos of this.pipePositions) { 190 const segment = this.commandBytes 191 .subarray(currentStart, pipePos) 192 .toString('utf8') 193 .trim() 194 if (segment) { 195 segments.push(segment) 196 } 197 currentStart = pipePos + 1 198 } 199 200 const lastSegment = this.commandBytes 201 .subarray(currentStart) 202 .toString('utf8') 203 .trim() 204 if (lastSegment) { 205 segments.push(lastSegment) 206 } 207 208 return segments 209 } 210 211 withoutOutputRedirections(): string { 212 if (this.redirectionNodes.length === 0) return this.originalCommand 213 214 const sorted = [...this.redirectionNodes].sort( 215 (a, b) => b.startIndex - a.startIndex, 216 ) 217 218 let result = this.commandBytes 219 for (const redir of sorted) { 220 result = Buffer.concat([ 221 result.subarray(0, redir.startIndex), 222 result.subarray(redir.endIndex), 223 ]) 224 } 225 return result.toString('utf8').trim().replace(/\s+/g, ' ') 226 } 227 228 getOutputRedirections(): OutputRedirection[] { 229 return this.redirectionNodes.map(({ target, operator }) => ({ 230 target, 231 operator, 232 })) 233 } 234 235 getTreeSitterAnalysis(): TreeSitterAnalysis { 236 return this.treeSitterAnalysis 237 } 238} 239 240const getTreeSitterAvailable = memoize(async (): Promise<boolean> => { 241 try { 242 const { parseCommand } = await import('./parser.js') 243 const testResult = await parseCommand('echo test') 244 return testResult !== null 245 } catch { 246 return false 247 } 248}) 249 250/** 251 * Build a TreeSitterParsedCommand from a pre-parsed AST root. Lets callers 252 * that already have the tree skip the redundant native.parse that 253 * ParsedCommand.parse would do. 254 */ 255export function buildParsedCommandFromRoot( 256 command: string, 257 root: Node, 258): IParsedCommand { 259 const pipePositions = extractPipePositions(root) 260 const redirectionNodes = extractRedirectionNodes(root) 261 const analysis = analyzeCommand(root, command) 262 return new TreeSitterParsedCommand( 263 command, 264 pipePositions, 265 redirectionNodes, 266 analysis, 267 ) 268} 269 270async function doParse(command: string): Promise<IParsedCommand | null> { 271 if (!command) return null 272 273 const treeSitterAvailable = await getTreeSitterAvailable() 274 if (treeSitterAvailable) { 275 try { 276 const { parseCommand } = await import('./parser.js') 277 const data = await parseCommand(command) 278 if (data) { 279 // Native NAPI parser returns plain JS objects (no WASM handles); 280 // nothing to free — extract directly. 281 return buildParsedCommandFromRoot(command, data.rootNode) 282 } 283 } catch { 284 // Fall through to regex implementation 285 } 286 } 287 288 // Fallback to regex implementation 289 return new RegexParsedCommand_DEPRECATED(command) 290} 291 292// Single-entry cache: legacy callers (bashCommandIsSafeAsync, 293// buildSegmentWithoutRedirections) may call ParsedCommand.parse repeatedly 294// with the same command string. Each parse() is ~1 native.parse + ~6 tree 295// walks, so caching the most recent command skips the redundant work. 296// Size-1 bound avoids leaking TreeSitterParsedCommand instances. 297let lastCmd: string | undefined 298let lastResult: Promise<IParsedCommand | null> | undefined 299 300/** 301 * ParsedCommand provides methods for working with shell commands. 302 * Uses tree-sitter when available for quote-aware parsing, 303 * falls back to regex-based parsing otherwise. 304 */ 305export const ParsedCommand = { 306 /** 307 * Parse a command string and return a ParsedCommand instance. 308 * Returns null if parsing fails completely. 309 */ 310 parse(command: string): Promise<IParsedCommand | null> { 311 if (command === lastCmd && lastResult !== undefined) { 312 return lastResult 313 } 314 lastCmd = command 315 lastResult = doParse(command) 316 return lastResult 317 }, 318}