source dump of claude code
at main 230 lines 6.7 kB view raw
1import { feature } from 'bun:bundle' 2import { logEvent } from '../../services/analytics/index.js' 3import { logForDebugging } from '../debug.js' 4import { 5 ensureParserInitialized, 6 getParserModule, 7 type TsNode, 8} from './bashParser.js' 9 10export type Node = TsNode 11 12export interface ParsedCommandData { 13 rootNode: Node 14 envVars: string[] 15 commandNode: Node | null 16 originalCommand: string 17} 18 19const MAX_COMMAND_LENGTH = 10000 20const DECLARATION_COMMANDS = new Set([ 21 'export', 22 'declare', 23 'typeset', 24 'readonly', 25 'local', 26 'unset', 27 'unsetenv', 28]) 29const ARGUMENT_TYPES = new Set(['word', 'string', 'raw_string', 'number']) 30const SUBSTITUTION_TYPES = new Set([ 31 'command_substitution', 32 'process_substitution', 33]) 34const COMMAND_TYPES = new Set(['command', 'declaration_command']) 35 36let logged = false 37function logLoadOnce(success: boolean): void { 38 if (logged) return 39 logged = true 40 logForDebugging( 41 success ? 'tree-sitter: native module loaded' : 'tree-sitter: unavailable', 42 ) 43 logEvent('tengu_tree_sitter_load', { success }) 44} 45 46/** 47 * Awaits WASM init (Parser.init + Language.load). Must be called before 48 * parseCommand/parseCommandRaw for the parser to be available. Idempotent. 49 */ 50export async function ensureInitialized(): Promise<void> { 51 if (feature('TREE_SITTER_BASH') || feature('TREE_SITTER_BASH_SHADOW')) { 52 await ensureParserInitialized() 53 } 54} 55 56export async function parseCommand( 57 command: string, 58): Promise<ParsedCommandData | null> { 59 if (!command || command.length > MAX_COMMAND_LENGTH) return null 60 61 // Gate: ant-only until pentest. External builds fall back to legacy 62 // regex/shell-quote path. Guarding the whole body inside the positive 63 // branch lets Bun DCE the NAPI import AND keeps telemetry honest — we 64 // only fire tengu_tree_sitter_load when a load was genuinely attempted. 65 if (feature('TREE_SITTER_BASH')) { 66 await ensureParserInitialized() 67 const mod = getParserModule() 68 logLoadOnce(mod !== null) 69 if (!mod) return null 70 71 try { 72 const rootNode = mod.parse(command) 73 if (!rootNode) return null 74 75 const commandNode = findCommandNode(rootNode, null) 76 const envVars = extractEnvVars(commandNode) 77 78 return { rootNode, envVars, commandNode, originalCommand: command } 79 } catch { 80 return null 81 } 82 } 83 return null 84} 85 86/** 87 * SECURITY: Sentinel for "parser was loaded and attempted, but aborted" 88 * (timeout / node budget / Rust panic). Distinct from `null` (module not 89 * loaded). Adversarial input can trigger abort under MAX_COMMAND_LENGTH: 90 * `(( a[0][0]... ))` with ~2800 subscripts hits PARSE_TIMEOUT_MICROS. 91 * Callers MUST treat this as fail-closed (too-complex), NOT route to legacy. 92 */ 93export const PARSE_ABORTED = Symbol('parse-aborted') 94 95/** 96 * Raw parse — skips findCommandNode/extractEnvVars which the security 97 * walker in ast.ts doesn't use. Saves one tree walk per bash command. 98 * 99 * Returns: 100 * - Node: parse succeeded 101 * - null: module not loaded / feature off / empty / over-length 102 * - PARSE_ABORTED: module loaded but parse failed (timeout/panic) 103 */ 104export async function parseCommandRaw( 105 command: string, 106): Promise<Node | null | typeof PARSE_ABORTED> { 107 if (!command || command.length > MAX_COMMAND_LENGTH) return null 108 if (feature('TREE_SITTER_BASH') || feature('TREE_SITTER_BASH_SHADOW')) { 109 await ensureParserInitialized() 110 const mod = getParserModule() 111 logLoadOnce(mod !== null) 112 if (!mod) return null 113 try { 114 const result = mod.parse(command) 115 // SECURITY: Module loaded; null here = timeout/node-budget abort in 116 // bashParser.ts (PARSE_TIMEOUT_MS=50, MAX_NODES=50_000). 117 // Previously collapsed into `return null` → parse-unavailable → legacy 118 // path, which lacks EVAL_LIKE_BUILTINS — `trap`, `enable`, `hash` leaked. 119 if (result === null) { 120 logEvent('tengu_tree_sitter_parse_abort', { 121 cmdLength: command.length, 122 panic: false, 123 }) 124 return PARSE_ABORTED 125 } 126 return result 127 } catch { 128 logEvent('tengu_tree_sitter_parse_abort', { 129 cmdLength: command.length, 130 panic: true, 131 }) 132 return PARSE_ABORTED 133 } 134 } 135 return null 136} 137 138function findCommandNode(node: Node, parent: Node | null): Node | null { 139 const { type, children } = node 140 141 if (COMMAND_TYPES.has(type)) return node 142 143 // Variable assignment followed by command 144 if (type === 'variable_assignment' && parent) { 145 return ( 146 parent.children.find( 147 c => COMMAND_TYPES.has(c.type) && c.startIndex > node.startIndex, 148 ) ?? null 149 ) 150 } 151 152 // Pipeline: recurse into first child (which may be a redirected_statement) 153 if (type === 'pipeline') { 154 for (const child of children) { 155 const result = findCommandNode(child, node) 156 if (result) return result 157 } 158 return null 159 } 160 161 // Redirected statement: find the command inside 162 if (type === 'redirected_statement') { 163 return children.find(c => COMMAND_TYPES.has(c.type)) ?? null 164 } 165 166 // Recursive search 167 for (const child of children) { 168 const result = findCommandNode(child, node) 169 if (result) return result 170 } 171 172 return null 173} 174 175function extractEnvVars(commandNode: Node | null): string[] { 176 if (!commandNode || commandNode.type !== 'command') return [] 177 178 const envVars: string[] = [] 179 for (const child of commandNode.children) { 180 if (child.type === 'variable_assignment') { 181 envVars.push(child.text) 182 } else if (child.type === 'command_name' || child.type === 'word') { 183 break 184 } 185 } 186 return envVars 187} 188 189export function extractCommandArguments(commandNode: Node): string[] { 190 // Declaration commands 191 if (commandNode.type === 'declaration_command') { 192 const firstChild = commandNode.children[0] 193 return firstChild && DECLARATION_COMMANDS.has(firstChild.text) 194 ? [firstChild.text] 195 : [] 196 } 197 198 const args: string[] = [] 199 let foundCommandName = false 200 201 for (const child of commandNode.children) { 202 if (child.type === 'variable_assignment') continue 203 204 // Command name 205 if ( 206 child.type === 'command_name' || 207 (!foundCommandName && child.type === 'word') 208 ) { 209 foundCommandName = true 210 args.push(child.text) 211 continue 212 } 213 214 // Arguments 215 if (ARGUMENT_TYPES.has(child.type)) { 216 args.push(stripQuotes(child.text)) 217 } else if (SUBSTITUTION_TYPES.has(child.type)) { 218 break 219 } 220 } 221 return args 222} 223 224function stripQuotes(text: string): string { 225 return text.length >= 2 && 226 ((text[0] === '"' && text.at(-1) === '"') || 227 (text[0] === "'" && text.at(-1) === "'")) 228 ? text.slice(1, -1) 229 : text 230}