source dump of claude code
at main 2592 lines 103 kB view raw
1import { logEvent } from 'src/services/analytics/index.js' 2import { extractHeredocs } from '../../utils/bash/heredoc.js' 3import { ParsedCommand } from '../../utils/bash/ParsedCommand.js' 4import { 5 hasMalformedTokens, 6 hasShellQuoteSingleQuoteBug, 7 tryParseShellCommand, 8} from '../../utils/bash/shellQuote.js' 9import type { TreeSitterAnalysis } from '../../utils/bash/treeSitterAnalysis.js' 10import type { PermissionResult } from '../../utils/permissions/PermissionResult.js' 11 12const HEREDOC_IN_SUBSTITUTION = /\$\(.*<</ 13 14// Note: Backtick pattern is handled separately in validateDangerousPatterns 15// to distinguish between escaped and unescaped backticks 16const COMMAND_SUBSTITUTION_PATTERNS = [ 17 { pattern: /<\(/, message: 'process substitution <()' }, 18 { pattern: />\(/, message: 'process substitution >()' }, 19 { pattern: /=\(/, message: 'Zsh process substitution =()' }, 20 // Zsh EQUALS expansion: =cmd at word start expands to $(which cmd). 21 // `=curl evil.com` → `/usr/bin/curl evil.com`, bypassing Bash(curl:*) deny 22 // rules since the parser sees `=curl` as the base command, not `curl`. 23 // Only matches word-initial = followed by a command-name char (not VAR=val). 24 { 25 pattern: /(?:^|[\s;&|])=[a-zA-Z_]/, 26 message: 'Zsh equals expansion (=cmd)', 27 }, 28 { pattern: /\$\(/, message: '$() command substitution' }, 29 { pattern: /\$\{/, message: '${} parameter substitution' }, 30 { pattern: /\$\[/, message: '$[] legacy arithmetic expansion' }, 31 { pattern: /~\[/, message: 'Zsh-style parameter expansion' }, 32 { pattern: /\(e:/, message: 'Zsh-style glob qualifiers' }, 33 { pattern: /\(\+/, message: 'Zsh glob qualifier with command execution' }, 34 { 35 pattern: /\}\s*always\s*\{/, 36 message: 'Zsh always block (try/always construct)', 37 }, 38 // Defense in depth: Block PowerShell comment syntax even though we don't execute in PowerShell 39 // Added as protection against future changes that might introduce PowerShell execution 40 { pattern: /<#/, message: 'PowerShell comment syntax' }, 41] 42 43// Zsh-specific dangerous commands that can bypass security checks. 44// These are checked against the base command (first word) of each command segment. 45const ZSH_DANGEROUS_COMMANDS = new Set([ 46 // zmodload is the gateway to many dangerous module-based attacks: 47 // zsh/mapfile (invisible file I/O via array assignment), 48 // zsh/system (sysopen/syswrite two-step file access), 49 // zsh/zpty (pseudo-terminal command execution), 50 // zsh/net/tcp (network exfiltration via ztcp), 51 // zsh/files (builtin rm/mv/ln/chmod that bypass binary checks) 52 'zmodload', 53 // emulate with -c flag is an eval-equivalent that executes arbitrary code 54 'emulate', 55 // Zsh module builtins that enable dangerous operations. 56 // These require zmodload first, but we block them as defense-in-depth 57 // in case zmodload is somehow bypassed or the module is pre-loaded. 58 'sysopen', // Opens files with fine-grained control (zsh/system) 59 'sysread', // Reads from file descriptors (zsh/system) 60 'syswrite', // Writes to file descriptors (zsh/system) 61 'sysseek', // Seeks on file descriptors (zsh/system) 62 'zpty', // Executes commands on pseudo-terminals (zsh/zpty) 63 'ztcp', // Creates TCP connections for exfiltration (zsh/net/tcp) 64 'zsocket', // Creates Unix/TCP sockets (zsh/net/socket) 65 'mapfile', // Not actually a command, but the associative array is set via zmodload 66 'zf_rm', // Builtin rm from zsh/files 67 'zf_mv', // Builtin mv from zsh/files 68 'zf_ln', // Builtin ln from zsh/files 69 'zf_chmod', // Builtin chmod from zsh/files 70 'zf_chown', // Builtin chown from zsh/files 71 'zf_mkdir', // Builtin mkdir from zsh/files 72 'zf_rmdir', // Builtin rmdir from zsh/files 73 'zf_chgrp', // Builtin chgrp from zsh/files 74]) 75 76// Numeric identifiers for bash security checks (to avoid logging strings) 77const BASH_SECURITY_CHECK_IDS = { 78 INCOMPLETE_COMMANDS: 1, 79 JQ_SYSTEM_FUNCTION: 2, 80 JQ_FILE_ARGUMENTS: 3, 81 OBFUSCATED_FLAGS: 4, 82 SHELL_METACHARACTERS: 5, 83 DANGEROUS_VARIABLES: 6, 84 NEWLINES: 7, 85 DANGEROUS_PATTERNS_COMMAND_SUBSTITUTION: 8, 86 DANGEROUS_PATTERNS_INPUT_REDIRECTION: 9, 87 DANGEROUS_PATTERNS_OUTPUT_REDIRECTION: 10, 88 IFS_INJECTION: 11, 89 GIT_COMMIT_SUBSTITUTION: 12, 90 PROC_ENVIRON_ACCESS: 13, 91 MALFORMED_TOKEN_INJECTION: 14, 92 BACKSLASH_ESCAPED_WHITESPACE: 15, 93 BRACE_EXPANSION: 16, 94 CONTROL_CHARACTERS: 17, 95 UNICODE_WHITESPACE: 18, 96 MID_WORD_HASH: 19, 97 ZSH_DANGEROUS_COMMANDS: 20, 98 BACKSLASH_ESCAPED_OPERATORS: 21, 99 COMMENT_QUOTE_DESYNC: 22, 100 QUOTED_NEWLINE: 23, 101} as const 102 103type ValidationContext = { 104 originalCommand: string 105 baseCommand: string 106 unquotedContent: string 107 fullyUnquotedContent: string 108 /** fullyUnquoted before stripSafeRedirections — used by validateBraceExpansion 109 * to avoid false negatives from redirection stripping creating backslash adjacencies */ 110 fullyUnquotedPreStrip: string 111 /** Like fullyUnquotedPreStrip but preserves quote characters ('/"): e.g., 112 * echo 'x'# → echo ''# (the quote chars remain, revealing adjacency to #) */ 113 unquotedKeepQuoteChars: string 114 /** Tree-sitter analysis data, if available. Validators can use this for 115 * more accurate analysis when present, falling back to regex otherwise. */ 116 treeSitter?: TreeSitterAnalysis | null 117} 118 119type QuoteExtraction = { 120 withDoubleQuotes: string 121 fullyUnquoted: string 122 /** Like fullyUnquoted but preserves quote characters ('/"): strips quoted 123 * content while keeping the delimiters. Used by validateMidWordHash to detect 124 * quote-adjacent # (e.g., 'x'# where quote stripping would hide adjacency). */ 125 unquotedKeepQuoteChars: string 126} 127 128function extractQuotedContent(command: string, isJq = false): QuoteExtraction { 129 let withDoubleQuotes = '' 130 let fullyUnquoted = '' 131 let unquotedKeepQuoteChars = '' 132 let inSingleQuote = false 133 let inDoubleQuote = false 134 let escaped = false 135 136 for (let i = 0; i < command.length; i++) { 137 const char = command[i] 138 139 if (escaped) { 140 escaped = false 141 if (!inSingleQuote) withDoubleQuotes += char 142 if (!inSingleQuote && !inDoubleQuote) fullyUnquoted += char 143 if (!inSingleQuote && !inDoubleQuote) unquotedKeepQuoteChars += char 144 continue 145 } 146 147 if (char === '\\' && !inSingleQuote) { 148 escaped = true 149 if (!inSingleQuote) withDoubleQuotes += char 150 if (!inSingleQuote && !inDoubleQuote) fullyUnquoted += char 151 if (!inSingleQuote && !inDoubleQuote) unquotedKeepQuoteChars += char 152 continue 153 } 154 155 if (char === "'" && !inDoubleQuote) { 156 inSingleQuote = !inSingleQuote 157 unquotedKeepQuoteChars += char 158 continue 159 } 160 161 if (char === '"' && !inSingleQuote) { 162 inDoubleQuote = !inDoubleQuote 163 unquotedKeepQuoteChars += char 164 // For jq, include quotes in extraction to ensure content is properly analyzed 165 if (!isJq) continue 166 } 167 168 if (!inSingleQuote) withDoubleQuotes += char 169 if (!inSingleQuote && !inDoubleQuote) fullyUnquoted += char 170 if (!inSingleQuote && !inDoubleQuote) unquotedKeepQuoteChars += char 171 } 172 173 return { withDoubleQuotes, fullyUnquoted, unquotedKeepQuoteChars } 174} 175 176function stripSafeRedirections(content: string): string { 177 // SECURITY: All three patterns MUST have a trailing boundary (?=\s|$). 178 // Without it, `> /dev/nullo` matches `/dev/null` as a PREFIX, strips 179 // `> /dev/null` leaving `o`, so `echo hi > /dev/nullo` becomes `echo hi o`. 180 // validateRedirections then sees no `>` and passes. The file write to 181 // /dev/nullo is auto-allowed via the read-only path (checkReadOnlyConstraints). 182 // Main bashPermissions flow is protected (checkPathConstraints validates the 183 // original command), but speculation.ts uses checkReadOnlyConstraints alone. 184 return content 185 .replace(/\s+2\s*>&\s*1(?=\s|$)/g, '') 186 .replace(/[012]?\s*>\s*\/dev\/null(?=\s|$)/g, '') 187 .replace(/\s*<\s*\/dev\/null(?=\s|$)/g, '') 188} 189 190/** 191 * Checks if content contains an unescaped occurrence of a single character. 192 * Handles bash escape sequences correctly where a backslash escapes the following character. 193 * 194 * IMPORTANT: This function only handles single characters, not strings. If you need to extend 195 * this to handle multi-character strings, be EXTREMELY CAREFUL about shell ANSI-C quoting 196 * (e.g., $'\n', $'\x41', $'\u0041') which can encode arbitrary characters and strings in ways 197 * that are very difficult to parse correctly. Incorrect handling could introduce security 198 * vulnerabilities by allowing attackers to bypass security checks. 199 * 200 * @param content - The string to search (typically from extractQuotedContent) 201 * @param char - Single character to search for (e.g., '`') 202 * @returns true if unescaped occurrence found, false otherwise 203 * 204 * Examples: 205 * hasUnescapedChar("test \`safe\`", '`') → false (escaped backticks) 206 * hasUnescapedChar("test `dangerous`", '`') → true (unescaped backticks) 207 * hasUnescapedChar("test\\`date`", '`') → true (escaped backslash + unescaped backtick) 208 */ 209function hasUnescapedChar(content: string, char: string): boolean { 210 if (char.length !== 1) { 211 throw new Error('hasUnescapedChar only works with single characters') 212 } 213 214 let i = 0 215 while (i < content.length) { 216 // If we see a backslash, skip it and the next character (they form an escape sequence) 217 if (content[i] === '\\' && i + 1 < content.length) { 218 i += 2 // Skip backslash and escaped character 219 continue 220 } 221 222 // Check if current character matches 223 if (content[i] === char) { 224 return true // Found unescaped occurrence 225 } 226 227 i++ 228 } 229 230 return false // No unescaped occurrences found 231} 232 233function validateEmpty(context: ValidationContext): PermissionResult { 234 if (!context.originalCommand.trim()) { 235 return { 236 behavior: 'allow', 237 updatedInput: { command: context.originalCommand }, 238 decisionReason: { type: 'other', reason: 'Empty command is safe' }, 239 } 240 } 241 return { behavior: 'passthrough', message: 'Command is not empty' } 242} 243 244function validateIncompleteCommands( 245 context: ValidationContext, 246): PermissionResult { 247 const { originalCommand } = context 248 const trimmed = originalCommand.trim() 249 250 if (/^\s*\t/.test(originalCommand)) { 251 logEvent('tengu_bash_security_check_triggered', { 252 checkId: BASH_SECURITY_CHECK_IDS.INCOMPLETE_COMMANDS, 253 subId: 1, 254 }) 255 return { 256 behavior: 'ask', 257 message: 'Command appears to be an incomplete fragment (starts with tab)', 258 } 259 } 260 261 if (trimmed.startsWith('-')) { 262 logEvent('tengu_bash_security_check_triggered', { 263 checkId: BASH_SECURITY_CHECK_IDS.INCOMPLETE_COMMANDS, 264 subId: 2, 265 }) 266 return { 267 behavior: 'ask', 268 message: 269 'Command appears to be an incomplete fragment (starts with flags)', 270 } 271 } 272 273 if (/^\s*(&&|\|\||;|>>?|<)/.test(originalCommand)) { 274 logEvent('tengu_bash_security_check_triggered', { 275 checkId: BASH_SECURITY_CHECK_IDS.INCOMPLETE_COMMANDS, 276 subId: 3, 277 }) 278 return { 279 behavior: 'ask', 280 message: 281 'Command appears to be a continuation line (starts with operator)', 282 } 283 } 284 285 return { behavior: 'passthrough', message: 'Command appears complete' } 286} 287 288/** 289 * Checks if a command is a "safe" heredoc-in-substitution pattern that can 290 * bypass the generic $() validator. 291 * 292 * This is an EARLY-ALLOW path: returning `true` causes bashCommandIsSafe to 293 * return `passthrough`, bypassing ALL subsequent validators. Given this 294 * authority, the check must be PROVABLY safe, not "probably safe". 295 * 296 * The only pattern we allow is: 297 * [prefix] $(cat <<'DELIM'\n 298 * [body lines]\n 299 * DELIM\n 300 * ) [suffix] 301 * 302 * Where: 303 * - The delimiter must be single-quoted ('DELIM') or escaped (\DELIM) so the 304 * body is literal text with no expansion 305 * - The closing delimiter must be on a line BY ITSELF (or with only trailing 306 * whitespace + `)` for the $(cat <<'EOF'\n...\nEOF)` inline form) 307 * - The closing delimiter must be the FIRST such line — matching bash's 308 * behavior exactly (no skipping past early delimiters to find EOF)) 309 * - There must be non-whitespace text BEFORE the $( (i.e., the substitution 310 * is used in argument position, not as a command name). Otherwise the 311 * heredoc body becomes an arbitrary command name with [suffix] as args. 312 * - The remaining text (with the heredoc stripped) must pass all validators 313 * 314 * This implementation uses LINE-BASED matching, not regex [\s\S]*?, to 315 * precisely replicate bash's heredoc-closing behavior. 316 */ 317function isSafeHeredoc(command: string): boolean { 318 if (!HEREDOC_IN_SUBSTITUTION.test(command)) return false 319 320 // SECURITY: Use [ \t] (not \s) between << and the delimiter. \s matches 321 // newlines, but bash requires the delimiter word on the same line as <<. 322 // Matching across newlines could accept malformed syntax that bash rejects. 323 // Handle quote variations: 'EOF', ''EOF'' (splitCommand may mangle quotes). 324 const heredocPattern = 325 /\$\(cat[ \t]*<<(-?)[ \t]*(?:'+([A-Za-z_]\w*)'+|\\([A-Za-z_]\w*))/g 326 let match 327 type HeredocMatch = { 328 start: number 329 operatorEnd: number 330 delimiter: string 331 isDash: boolean 332 } 333 const safeHeredocs: HeredocMatch[] = [] 334 335 while ((match = heredocPattern.exec(command)) !== null) { 336 const delimiter = match[2] || match[3] 337 if (delimiter) { 338 safeHeredocs.push({ 339 start: match.index, 340 operatorEnd: match.index + match[0].length, 341 delimiter, 342 isDash: match[1] === '-', 343 }) 344 } 345 } 346 347 // If no safe heredoc patterns found, it's not safe 348 if (safeHeredocs.length === 0) return false 349 350 // SECURITY: For each heredoc, find the closing delimiter using LINE-BASED 351 // matching that exactly replicates bash's behavior. Bash closes a heredoc 352 // at the FIRST line that exactly matches the delimiter. Any subsequent 353 // occurrence of the delimiter is just content (or a new command). Regex 354 // [\s\S]*? can skip past the first delimiter to find a later `DELIM)` 355 // pattern, hiding injected commands between the two delimiters. 356 type VerifiedHeredoc = { start: number; end: number } 357 const verified: VerifiedHeredoc[] = [] 358 359 for (const { start, operatorEnd, delimiter, isDash } of safeHeredocs) { 360 // The opening line must end immediately after the delimiter (only 361 // horizontal whitespace allowed before the newline). If there's other 362 // content (like `; rm -rf /`), this is not a simple safe heredoc. 363 const afterOperator = command.slice(operatorEnd) 364 const openLineEnd = afterOperator.indexOf('\n') 365 if (openLineEnd === -1) return false // No content at all 366 const openLineTail = afterOperator.slice(0, openLineEnd) 367 if (!/^[ \t]*$/.test(openLineTail)) return false // Extra content on open line 368 369 // Body starts after the newline 370 const bodyStart = operatorEnd + openLineEnd + 1 371 const body = command.slice(bodyStart) 372 const bodyLines = body.split('\n') 373 374 // Find the FIRST line that closes the heredoc. There are two valid forms: 375 // 1. `DELIM` alone on a line (bash-standard), followed by `)` on the 376 // next line (with only whitespace before it) 377 // 2. `DELIM)` on a line (the inline $(cat <<'EOF'\n...\nEOF) form, 378 // where bash's PST_EOFTOKEN closes both heredoc and substitution) 379 // For <<-, leading tabs are stripped before matching. 380 let closingLineIdx = -1 381 let closeParenLineIdx = -1 // Line index where `)` appears 382 let closeParenColIdx = -1 // Column index of `)` on that line 383 384 for (let i = 0; i < bodyLines.length; i++) { 385 const rawLine = bodyLines[i]! 386 const line = isDash ? rawLine.replace(/^\t*/, '') : rawLine 387 388 // Form 1: delimiter alone on a line 389 if (line === delimiter) { 390 closingLineIdx = i 391 // The `)` must be on the NEXT line with only whitespace before it 392 const nextLine = bodyLines[i + 1] 393 if (nextLine === undefined) return false // No closing `)` 394 const parenMatch = nextLine.match(/^([ \t]*)\)/) 395 if (!parenMatch) return false // `)` not at start of next line 396 closeParenLineIdx = i + 1 397 closeParenColIdx = parenMatch[1]!.length // Position of `)` 398 break 399 } 400 401 // Form 2: delimiter immediately followed by `)` (PST_EOFTOKEN form) 402 // Only whitespace allowed between delimiter and `)`. 403 if (line.startsWith(delimiter)) { 404 const afterDelim = line.slice(delimiter.length) 405 const parenMatch = afterDelim.match(/^([ \t]*)\)/) 406 if (parenMatch) { 407 closingLineIdx = i 408 closeParenLineIdx = i 409 // Column is in rawLine (pre-tab-strip), so recompute 410 const tabPrefix = isDash ? (rawLine.match(/^\t*/)?.[0] ?? '') : '' 411 closeParenColIdx = 412 tabPrefix.length + delimiter.length + parenMatch[1]!.length 413 break 414 } 415 // Line starts with delimiter but has other trailing content — 416 // this is NOT the closing line (bash requires exact match or EOF`)`). 417 // But it's also a red flag: if this were inside $(), bash might 418 // close early via PST_EOFTOKEN with other shell metacharacters. 419 // We already handle that case in extractHeredocs — here we just 420 // reject it as not matching our safe pattern. 421 if (/^[)}`|&;(<>]/.test(afterDelim)) { 422 return false // Ambiguous early-closure pattern 423 } 424 } 425 } 426 427 if (closingLineIdx === -1) return false // No closing delimiter found 428 429 // Compute the absolute end position (one past the `)` character) 430 let endPos = bodyStart 431 for (let i = 0; i < closeParenLineIdx; i++) { 432 endPos += bodyLines[i]!.length + 1 // +1 for newline 433 } 434 endPos += closeParenColIdx + 1 // +1 to include the `)` itself 435 436 verified.push({ start, end: endPos }) 437 } 438 439 // SECURITY: Reject nested matches. The regex finds $(cat <<'X' patterns 440 // in RAW TEXT without understanding quoted-heredoc semantics. When the 441 // outer heredoc has a quoted delimiter (<<'A'), its body is LITERAL text 442 // in bash — any inner $(cat <<'B' is just characters, not a real heredoc. 443 // But our regex matches both, producing NESTED ranges. Stripping nested 444 // ranges corrupts indices: after stripping the inner range, the outer 445 // range's `end` is stale (points past the shrunken string), causing 446 // `remaining.slice(end)` to return '' and silently drop any suffix 447 // (e.g., `; rm -rf /`). Since all our matched heredocs have quoted/escaped 448 // delimiters, a nested match inside the body is ALWAYS literal text — 449 // no legitimate user writes this pattern. Bail to safe fallback. 450 for (const outer of verified) { 451 for (const inner of verified) { 452 if (inner === outer) continue 453 if (inner.start > outer.start && inner.start < outer.end) { 454 return false 455 } 456 } 457 } 458 459 // Strip all verified heredocs from the command, building `remaining`. 460 // Process in reverse order so earlier indices stay valid. 461 const sortedVerified = [...verified].sort((a, b) => b.start - a.start) 462 let remaining = command 463 for (const { start, end } of sortedVerified) { 464 remaining = remaining.slice(0, start) + remaining.slice(end) 465 } 466 467 // SECURITY: The remaining text must NOT start with only whitespace before 468 // the (now-stripped) heredoc position IF there's non-whitespace after it. 469 // If the $() is in COMMAND-NAME position (no prefix), its output becomes 470 // the command to execute, with any suffix text as arguments: 471 // $(cat <<'EOF'\nchmod\nEOF\n) 777 /etc/shadow 472 // → runs `chmod 777 /etc/shadow` 473 // We only allow the substitution in ARGUMENT position: there must be a 474 // command word before the $(. 475 // After stripping, `remaining` should look like `cmd args... [more args]`. 476 // If remaining starts with only whitespace (or is empty), the $() WAS the 477 // command — that's only safe if there are no trailing arguments. 478 const trimmedRemaining = remaining.trim() 479 if (trimmedRemaining.length > 0) { 480 // There's a prefix command — good. But verify the original command 481 // also had a non-whitespace prefix before the FIRST $( (the heredoc 482 // could be one of several; we need the first one's prefix). 483 const firstHeredocStart = Math.min(...verified.map(v => v.start)) 484 const prefix = command.slice(0, firstHeredocStart) 485 if (prefix.trim().length === 0) { 486 // $() is in command-name position but there's trailing text — UNSAFE. 487 // The heredoc body becomes the command name, trailing text becomes args. 488 return false 489 } 490 } 491 492 // Check that remaining text contains only safe characters. 493 // After stripping safe heredocs, the remaining text should only be command 494 // names, arguments, quotes, and whitespace. Reject ANY shell metacharacter 495 // to prevent operators (|, &, &&, ||, ;) or expansions ($, `, {, <, >) from 496 // being used to chain dangerous commands after a safe heredoc. 497 // SECURITY: Use explicit ASCII space/tab only — \s matches unicode whitespace 498 // like \u00A0 which can be used to hide content. Newlines are also blocked 499 // (they would indicate multi-line commands outside the heredoc body). 500 if (!/^[a-zA-Z0-9 \t"'.\-/_@=,:+~]*$/.test(remaining)) return false 501 502 // SECURITY: The remaining text (command with heredocs stripped) must also 503 // pass all security validators. Without this, appending a safe heredoc to a 504 // dangerous command (e.g., `zmodload zsh/system $(cat <<'EOF'\nx\nEOF\n)`) 505 // causes this early-allow path to return passthrough, bypassing 506 // validateZshDangerousCommands, validateProcEnvironAccess, and any other 507 // main validator that checks allowlist-safe character patterns. 508 // No recursion risk: `remaining` has no `$(... <<` pattern, so the recursive 509 // call's validateSafeCommandSubstitution returns passthrough immediately. 510 if (bashCommandIsSafe_DEPRECATED(remaining).behavior !== 'passthrough') 511 return false 512 513 return true 514} 515 516/** 517 * Detects well-formed $(cat <<'DELIM'...DELIM) heredoc substitution patterns. 518 * Returns the command with matched heredocs stripped, or null if none found. 519 * Used by the pre-split gate to strip safe heredocs and re-check the remainder. 520 */ 521export function stripSafeHeredocSubstitutions(command: string): string | null { 522 if (!HEREDOC_IN_SUBSTITUTION.test(command)) return null 523 524 const heredocPattern = 525 /\$\(cat[ \t]*<<(-?)[ \t]*(?:'+([A-Za-z_]\w*)'+|\\([A-Za-z_]\w*))/g 526 let result = command 527 let found = false 528 let match 529 const ranges: Array<{ start: number; end: number }> = [] 530 while ((match = heredocPattern.exec(command)) !== null) { 531 if (match.index > 0 && command[match.index - 1] === '\\') continue 532 const delimiter = match[2] || match[3] 533 if (!delimiter) continue 534 const isDash = match[1] === '-' 535 const operatorEnd = match.index + match[0].length 536 537 const afterOperator = command.slice(operatorEnd) 538 const openLineEnd = afterOperator.indexOf('\n') 539 if (openLineEnd === -1) continue 540 if (!/^[ \t]*$/.test(afterOperator.slice(0, openLineEnd))) continue 541 542 const bodyStart = operatorEnd + openLineEnd + 1 543 const bodyLines = command.slice(bodyStart).split('\n') 544 for (let i = 0; i < bodyLines.length; i++) { 545 const rawLine = bodyLines[i]! 546 const line = isDash ? rawLine.replace(/^\t*/, '') : rawLine 547 if (line.startsWith(delimiter)) { 548 const after = line.slice(delimiter.length) 549 let closePos = -1 550 if (/^[ \t]*\)/.test(after)) { 551 const lineStart = 552 bodyStart + 553 bodyLines.slice(0, i).join('\n').length + 554 (i > 0 ? 1 : 0) 555 closePos = command.indexOf(')', lineStart) 556 } else if (after === '') { 557 const nextLine = bodyLines[i + 1] 558 if (nextLine !== undefined && /^[ \t]*\)/.test(nextLine)) { 559 const nextLineStart = 560 bodyStart + bodyLines.slice(0, i + 1).join('\n').length + 1 561 closePos = command.indexOf(')', nextLineStart) 562 } 563 } 564 if (closePos !== -1) { 565 ranges.push({ start: match.index, end: closePos + 1 }) 566 found = true 567 } 568 break 569 } 570 } 571 } 572 if (!found) return null 573 for (let i = ranges.length - 1; i >= 0; i--) { 574 const r = ranges[i]! 575 result = result.slice(0, r.start) + result.slice(r.end) 576 } 577 return result 578} 579 580/** Detection-only check: does the command contain a safe heredoc substitution? */ 581export function hasSafeHeredocSubstitution(command: string): boolean { 582 return stripSafeHeredocSubstitutions(command) !== null 583} 584 585function validateSafeCommandSubstitution( 586 context: ValidationContext, 587): PermissionResult { 588 const { originalCommand } = context 589 590 if (!HEREDOC_IN_SUBSTITUTION.test(originalCommand)) { 591 return { behavior: 'passthrough', message: 'No heredoc in substitution' } 592 } 593 594 if (isSafeHeredoc(originalCommand)) { 595 return { 596 behavior: 'allow', 597 updatedInput: { command: originalCommand }, 598 decisionReason: { 599 type: 'other', 600 reason: 601 'Safe command substitution: cat with quoted/escaped heredoc delimiter', 602 }, 603 } 604 } 605 606 return { 607 behavior: 'passthrough', 608 message: 'Command substitution needs validation', 609 } 610} 611 612function validateGitCommit(context: ValidationContext): PermissionResult { 613 const { originalCommand, baseCommand } = context 614 615 if (baseCommand !== 'git' || !/^git\s+commit\s+/.test(originalCommand)) { 616 return { behavior: 'passthrough', message: 'Not a git commit' } 617 } 618 619 // SECURITY: Backslashes can cause our regex to mis-identify quote boundaries 620 // (e.g., `git commit -m "test\"msg" && evil`). Legitimate commit messages 621 // virtually never contain backslashes, so bail to the full validator chain. 622 if (originalCommand.includes('\\')) { 623 return { 624 behavior: 'passthrough', 625 message: 'Git commit contains backslash, needs full validation', 626 } 627 } 628 629 // SECURITY: The `.*?` before `-m` must NOT match shell operators. Previously 630 // `.*?` matched anything except `\n`, including `;`, `&`, `|`, `` ` ``, `$(`. 631 // For `git commit ; curl evil.com -m 'x'`, `.*?` swallowed `; curl evil.com ` 632 // leaving remainder=`` (falsy → remainder check skipped) → returned `allow` 633 // for a compound command. Early-allow skips ALL main validators (line ~1908), 634 // nullifying validateQuotedNewline, validateBackslashEscapedOperators, etc. 635 // While splitCommand currently catches this downstream, early-allow is a 636 // POSITIVE ASSERTION that the FULL command is safe — which it is NOT. 637 // 638 // Also: `\s+` between `git` and `commit` must NOT match `\n`/`\r` (command 639 // separators in bash). Use `[ \t]+` for horizontal-only whitespace. 640 // 641 // The `[^;&|`$<>()\n\r]*?` class excludes shell metacharacters. We also 642 // exclude `<` and `>` here (redirects) — they're allowed in the REMAINDER 643 // for `--author="Name <email>"` but must not appear BEFORE `-m`. 644 const messageMatch = originalCommand.match( 645 /^git[ \t]+commit[ \t]+[^;&|`$<>()\n\r]*?-m[ \t]+(["'])([\s\S]*?)\1(.*)$/, 646 ) 647 648 if (messageMatch) { 649 const [, quote, messageContent, remainder] = messageMatch 650 651 if (quote === '"' && messageContent && /\$\(|`|\$\{/.test(messageContent)) { 652 logEvent('tengu_bash_security_check_triggered', { 653 checkId: BASH_SECURITY_CHECK_IDS.GIT_COMMIT_SUBSTITUTION, 654 subId: 1, 655 }) 656 return { 657 behavior: 'ask', 658 message: 'Git commit message contains command substitution patterns', 659 } 660 } 661 662 // SECURITY: Check remainder for shell operators that could chain commands 663 // or redirect output. The `.*` before `-m` in the regex can swallow flags 664 // like `--amend`, leaving `&& evil` or `> ~/.bashrc` in the remainder. 665 // Previously we only checked for $() / `` / ${} here, missing operators 666 // like ; | & && || < >. 667 // 668 // `<` and `>` can legitimately appear INSIDE quotes in --author values 669 // like `--author="Name <email>"`. An UNQUOTED `>` is a shell redirect 670 // operator. Because validateGitCommit is an EARLY validator, returning 671 // `allow` here short-circuits bashCommandIsSafe and SKIPS 672 // validateRedirections. So we must bail to passthrough on unquoted `<>` 673 // to let the main validators handle it. 674 // 675 // Attack: `git commit --allow-empty -m 'payload' > ~/.bashrc` 676 // validateGitCommit returns allow → bashCommandIsSafe short-circuits → 677 // validateRedirections NEVER runs → ~/.bashrc overwritten with git 678 // stdout containing `payload` → RCE on next shell login. 679 if (remainder && /[;|&()`]|\$\(|\$\{/.test(remainder)) { 680 return { 681 behavior: 'passthrough', 682 message: 'Git commit remainder contains shell metacharacters', 683 } 684 } 685 if (remainder) { 686 // Strip quoted content, then check for `<` or `>`. Quoted `<>` (email 687 // brackets in --author) are safe; unquoted `<>` are shell redirects. 688 // NOTE: This simple quote tracker has NO backslash handling. `\'`/`\"` 689 // outside quotes would desync it (bash: \' = literal ', tracker: toggles 690 // SQ). BUT line 584 already bailed on ANY backslash in originalCommand, 691 // so we never reach here with backslashes. For backslash-free input, 692 // simple quote toggling is correct (no way to escape quotes without \\). 693 let unquoted = '' 694 let inSQ = false 695 let inDQ = false 696 for (let i = 0; i < remainder.length; i++) { 697 const c = remainder[i] 698 if (c === "'" && !inDQ) { 699 inSQ = !inSQ 700 continue 701 } 702 if (c === '"' && !inSQ) { 703 inDQ = !inDQ 704 continue 705 } 706 if (!inSQ && !inDQ) unquoted += c 707 } 708 if (/[<>]/.test(unquoted)) { 709 return { 710 behavior: 'passthrough', 711 message: 'Git commit remainder contains unquoted redirect operator', 712 } 713 } 714 } 715 716 // Security hardening: block messages starting with dash 717 // This catches potential obfuscation patterns like git commit -m "---" 718 if (messageContent && messageContent.startsWith('-')) { 719 logEvent('tengu_bash_security_check_triggered', { 720 checkId: BASH_SECURITY_CHECK_IDS.OBFUSCATED_FLAGS, 721 subId: 5, 722 }) 723 return { 724 behavior: 'ask', 725 message: 'Command contains quoted characters in flag names', 726 } 727 } 728 729 return { 730 behavior: 'allow', 731 updatedInput: { command: originalCommand }, 732 decisionReason: { 733 type: 'other', 734 reason: 'Git commit with simple quoted message is allowed', 735 }, 736 } 737 } 738 739 return { behavior: 'passthrough', message: 'Git commit needs validation' } 740} 741 742function validateJqCommand(context: ValidationContext): PermissionResult { 743 const { originalCommand, baseCommand } = context 744 745 if (baseCommand !== 'jq') { 746 return { behavior: 'passthrough', message: 'Not jq' } 747 } 748 749 if (/\bsystem\s*\(/.test(originalCommand)) { 750 logEvent('tengu_bash_security_check_triggered', { 751 checkId: BASH_SECURITY_CHECK_IDS.JQ_SYSTEM_FUNCTION, 752 subId: 1, 753 }) 754 return { 755 behavior: 'ask', 756 message: 757 'jq command contains system() function which executes arbitrary commands', 758 } 759 } 760 761 // File arguments are now allowed - they will be validated by path validation in readOnlyValidation.ts 762 // Only block dangerous flags that could read files into jq variables 763 const afterJq = originalCommand.substring(3).trim() 764 if ( 765 /(?:^|\s)(?:-f\b|--from-file|--rawfile|--slurpfile|-L\b|--library-path)/.test( 766 afterJq, 767 ) 768 ) { 769 logEvent('tengu_bash_security_check_triggered', { 770 checkId: BASH_SECURITY_CHECK_IDS.JQ_FILE_ARGUMENTS, 771 subId: 1, 772 }) 773 return { 774 behavior: 'ask', 775 message: 776 'jq command contains dangerous flags that could execute code or read arbitrary files', 777 } 778 } 779 780 return { behavior: 'passthrough', message: 'jq command is safe' } 781} 782 783function validateShellMetacharacters( 784 context: ValidationContext, 785): PermissionResult { 786 const { unquotedContent } = context 787 const message = 788 'Command contains shell metacharacters (;, |, or &) in arguments' 789 790 if (/(?:^|\s)["'][^"']*[;&][^"']*["'](?:\s|$)/.test(unquotedContent)) { 791 logEvent('tengu_bash_security_check_triggered', { 792 checkId: BASH_SECURITY_CHECK_IDS.SHELL_METACHARACTERS, 793 subId: 1, 794 }) 795 return { behavior: 'ask', message } 796 } 797 798 const globPatterns = [ 799 /-name\s+["'][^"']*[;|&][^"']*["']/, 800 /-path\s+["'][^"']*[;|&][^"']*["']/, 801 /-iname\s+["'][^"']*[;|&][^"']*["']/, 802 ] 803 804 if (globPatterns.some(p => p.test(unquotedContent))) { 805 logEvent('tengu_bash_security_check_triggered', { 806 checkId: BASH_SECURITY_CHECK_IDS.SHELL_METACHARACTERS, 807 subId: 2, 808 }) 809 return { behavior: 'ask', message } 810 } 811 812 if (/-regex\s+["'][^"']*[;&][^"']*["']/.test(unquotedContent)) { 813 logEvent('tengu_bash_security_check_triggered', { 814 checkId: BASH_SECURITY_CHECK_IDS.SHELL_METACHARACTERS, 815 subId: 3, 816 }) 817 return { behavior: 'ask', message } 818 } 819 820 return { behavior: 'passthrough', message: 'No metacharacters' } 821} 822 823function validateDangerousVariables( 824 context: ValidationContext, 825): PermissionResult { 826 const { fullyUnquotedContent } = context 827 828 if ( 829 /[<>|]\s*\$[A-Za-z_]/.test(fullyUnquotedContent) || 830 /\$[A-Za-z_][A-Za-z0-9_]*\s*[|<>]/.test(fullyUnquotedContent) 831 ) { 832 logEvent('tengu_bash_security_check_triggered', { 833 checkId: BASH_SECURITY_CHECK_IDS.DANGEROUS_VARIABLES, 834 subId: 1, 835 }) 836 return { 837 behavior: 'ask', 838 message: 839 'Command contains variables in dangerous contexts (redirections or pipes)', 840 } 841 } 842 843 return { behavior: 'passthrough', message: 'No dangerous variables' } 844} 845 846function validateDangerousPatterns( 847 context: ValidationContext, 848): PermissionResult { 849 const { unquotedContent } = context 850 851 // Special handling for backticks - check for UNESCAPED backticks only 852 // Escaped backticks (e.g., \`) are safe and commonly used in SQL commands 853 if (hasUnescapedChar(unquotedContent, '`')) { 854 return { 855 behavior: 'ask', 856 message: 'Command contains backticks (`) for command substitution', 857 } 858 } 859 860 // Other command substitution checks (include double-quoted content) 861 for (const { pattern, message } of COMMAND_SUBSTITUTION_PATTERNS) { 862 if (pattern.test(unquotedContent)) { 863 logEvent('tengu_bash_security_check_triggered', { 864 checkId: 865 BASH_SECURITY_CHECK_IDS.DANGEROUS_PATTERNS_COMMAND_SUBSTITUTION, 866 subId: 1, 867 }) 868 return { behavior: 'ask', message: `Command contains ${message}` } 869 } 870 } 871 872 return { behavior: 'passthrough', message: 'No dangerous patterns' } 873} 874 875function validateRedirections(context: ValidationContext): PermissionResult { 876 const { fullyUnquotedContent } = context 877 878 if (/</.test(fullyUnquotedContent)) { 879 logEvent('tengu_bash_security_check_triggered', { 880 checkId: BASH_SECURITY_CHECK_IDS.DANGEROUS_PATTERNS_INPUT_REDIRECTION, 881 subId: 1, 882 }) 883 return { 884 behavior: 'ask', 885 message: 886 'Command contains input redirection (<) which could read sensitive files', 887 } 888 } 889 890 if (/>/.test(fullyUnquotedContent)) { 891 logEvent('tengu_bash_security_check_triggered', { 892 checkId: BASH_SECURITY_CHECK_IDS.DANGEROUS_PATTERNS_OUTPUT_REDIRECTION, 893 subId: 1, 894 }) 895 return { 896 behavior: 'ask', 897 message: 898 'Command contains output redirection (>) which could write to arbitrary files', 899 } 900 } 901 902 return { behavior: 'passthrough', message: 'No redirections' } 903} 904 905function validateNewlines(context: ValidationContext): PermissionResult { 906 // Use fullyUnquotedPreStrip (before stripSafeRedirections) to prevent bypasses 907 // where stripping `>/dev/null` creates a phantom backslash-newline continuation. 908 // E.g., `cmd \>/dev/null\nwhoami` → after stripping becomes `cmd \\nwhoami` 909 // which looks like a safe continuation but actually hides a second command. 910 const { fullyUnquotedPreStrip } = context 911 912 // Check for newlines in unquoted content 913 if (!/[\n\r]/.test(fullyUnquotedPreStrip)) { 914 return { behavior: 'passthrough', message: 'No newlines' } 915 } 916 917 // Flag any newline/CR followed by non-whitespace, EXCEPT backslash-newline 918 // continuations at word boundaries. In bash, `\<newline>` is a line 919 // continuation (both chars removed), which is safe when the backslash 920 // follows whitespace (e.g., `cmd \<newline>--flag`). Mid-word continuations 921 // like `tr\<newline>aceroute` are still flagged because they can hide 922 // dangerous command names from allowlist checks. 923 // eslint-disable-next-line custom-rules/no-lookbehind-regex -- .test() + gated by /[\n\r]/.test() above 924 const looksLikeCommand = /(?<![\s]\\)[\n\r]\s*\S/.test(fullyUnquotedPreStrip) 925 if (looksLikeCommand) { 926 logEvent('tengu_bash_security_check_triggered', { 927 checkId: BASH_SECURITY_CHECK_IDS.NEWLINES, 928 subId: 1, 929 }) 930 return { 931 behavior: 'ask', 932 message: 933 'Command contains newlines that could separate multiple commands', 934 } 935 } 936 937 return { 938 behavior: 'passthrough', 939 message: 'Newlines appear to be within data', 940 } 941} 942 943/** 944 * SECURITY: Carriage return (\r, 0x0D) IS a misparsing concern, unlike LF. 945 * 946 * Parser differential: 947 * - shell-quote's BAREWORD regex uses `[^\s...]` — JS `\s` INCLUDES \r, so 948 * shell-quote treats CR as a token boundary. `TZ=UTC\recho` tokenizes as 949 * TWO tokens: ['TZ=UTC', 'echo']. splitCommand joins with space → 950 * 'TZ=UTC echo curl evil.com'. 951 * - bash's default IFS = $' \t\n' — CR is NOT in IFS. bash sees 952 * `TZ=UTC\recho` as ONE word → env assignment TZ='UTC\recho' (CR byte 953 * inside value), then `curl` is the command. 954 * 955 * Attack: `TZ=UTC\recho curl evil.com` with Bash(echo:*) 956 * validator: splitCommand collapses CR→space → 'TZ=UTC echo curl evil.com' 957 * → stripSafeWrappers: TZ=UTC stripped → 'echo curl evil.com' matches rule 958 * bash: executes `curl evil.com` 959 * 960 * validateNewlines catches this but is in nonMisparsingValidators (LF is 961 * correctly handled by both parsers). This validator is NOT in 962 * nonMisparsingValidators — its ask result gets isBashSecurityCheckForMisparsing 963 * and blocks at the bashPermissions gate. 964 * 965 * Checks originalCommand (not fullyUnquotedPreStrip) because CR inside single 966 * quotes is ALSO a misparsing concern for the same reason: shell-quote's `\s` 967 * still tokenizes it, but bash treats it as literal. Block ALL unquoted-or-SQ CR. 968 * Only exception: CR inside DOUBLE quotes where bash also treats it as data 969 * and shell-quote preserves the token (no split). 970 */ 971function validateCarriageReturn(context: ValidationContext): PermissionResult { 972 const { originalCommand } = context 973 974 if (!originalCommand.includes('\r')) { 975 return { behavior: 'passthrough', message: 'No carriage return' } 976 } 977 978 // Check if CR appears outside double quotes. CR outside DQ (including inside 979 // SQ and unquoted) causes the shell-quote/bash tokenization differential. 980 let inSingleQuote = false 981 let inDoubleQuote = false 982 let escaped = false 983 for (let i = 0; i < originalCommand.length; i++) { 984 const c = originalCommand[i] 985 if (escaped) { 986 escaped = false 987 continue 988 } 989 if (c === '\\' && !inSingleQuote) { 990 escaped = true 991 continue 992 } 993 if (c === "'" && !inDoubleQuote) { 994 inSingleQuote = !inSingleQuote 995 continue 996 } 997 if (c === '"' && !inSingleQuote) { 998 inDoubleQuote = !inDoubleQuote 999 continue 1000 } 1001 if (c === '\r' && !inDoubleQuote) { 1002 logEvent('tengu_bash_security_check_triggered', { 1003 checkId: BASH_SECURITY_CHECK_IDS.NEWLINES, 1004 subId: 2, 1005 }) 1006 return { 1007 behavior: 'ask', 1008 message: 1009 'Command contains carriage return (\\r) which shell-quote and bash tokenize differently', 1010 } 1011 } 1012 } 1013 1014 return { behavior: 'passthrough', message: 'CR only inside double quotes' } 1015} 1016 1017function validateIFSInjection(context: ValidationContext): PermissionResult { 1018 const { originalCommand } = context 1019 1020 // Detect any usage of IFS variable which could be used to bypass regex validation 1021 // Check for $IFS and ${...IFS...} patterns (including parameter expansions like ${IFS:0:1}, ${#IFS}, etc.) 1022 // Using ${[^}]*IFS to catch all parameter expansion variations with IFS 1023 if (/\$IFS|\$\{[^}]*IFS/.test(originalCommand)) { 1024 logEvent('tengu_bash_security_check_triggered', { 1025 checkId: BASH_SECURITY_CHECK_IDS.IFS_INJECTION, 1026 subId: 1, 1027 }) 1028 return { 1029 behavior: 'ask', 1030 message: 1031 'Command contains IFS variable usage which could bypass security validation', 1032 } 1033 } 1034 1035 return { behavior: 'passthrough', message: 'No IFS injection detected' } 1036} 1037 1038// Additional hardening against reading environment variables via /proc filesystem. 1039// Path validation typically blocks /proc access, but this provides defense-in-depth. 1040// Environment files in /proc can expose sensitive data like API keys and secrets. 1041function validateProcEnvironAccess( 1042 context: ValidationContext, 1043): PermissionResult { 1044 const { originalCommand } = context 1045 1046 // Check for /proc paths that could expose environment variables 1047 // This catches patterns like: 1048 // - /proc/self/environ 1049 // - /proc/1/environ 1050 // - /proc/*/environ (with any PID) 1051 if (/\/proc\/.*\/environ/.test(originalCommand)) { 1052 logEvent('tengu_bash_security_check_triggered', { 1053 checkId: BASH_SECURITY_CHECK_IDS.PROC_ENVIRON_ACCESS, 1054 subId: 1, 1055 }) 1056 return { 1057 behavior: 'ask', 1058 message: 1059 'Command accesses /proc/*/environ which could expose sensitive environment variables', 1060 } 1061 } 1062 1063 return { 1064 behavior: 'passthrough', 1065 message: 'No /proc/environ access detected', 1066 } 1067} 1068 1069/** 1070 * Detects commands with malformed tokens (unbalanced delimiters) combined with 1071 * command separators. This catches potential injection patterns where ambiguous 1072 * shell syntax could be exploited. 1073 * 1074 * Security: This check catches the eval bypass discovered in HackerOne review. 1075 * When shell-quote parses ambiguous patterns like `echo {"hi":"hi;evil"}`, 1076 * it may produce unbalanced tokens (e.g., `{hi:"hi`). Combined with command 1077 * separators, this can lead to unintended command execution via eval re-parsing. 1078 * 1079 * By forcing user approval for these patterns, we ensure the user sees exactly 1080 * what will be executed before approving. 1081 */ 1082function validateMalformedTokenInjection( 1083 context: ValidationContext, 1084): PermissionResult { 1085 const { originalCommand } = context 1086 1087 const parseResult = tryParseShellCommand(originalCommand) 1088 if (!parseResult.success) { 1089 // Parse failed - this is handled elsewhere (bashToolHasPermission checks this) 1090 return { 1091 behavior: 'passthrough', 1092 message: 'Parse failed, handled elsewhere', 1093 } 1094 } 1095 1096 const parsed = parseResult.tokens 1097 1098 // Check for command separators (;, &&, ||) 1099 const hasCommandSeparator = parsed.some( 1100 entry => 1101 typeof entry === 'object' && 1102 entry !== null && 1103 'op' in entry && 1104 (entry.op === ';' || entry.op === '&&' || entry.op === '||'), 1105 ) 1106 1107 if (!hasCommandSeparator) { 1108 return { behavior: 'passthrough', message: 'No command separators' } 1109 } 1110 1111 // Check for malformed tokens (unbalanced delimiters) 1112 if (hasMalformedTokens(originalCommand, parsed)) { 1113 logEvent('tengu_bash_security_check_triggered', { 1114 checkId: BASH_SECURITY_CHECK_IDS.MALFORMED_TOKEN_INJECTION, 1115 subId: 1, 1116 }) 1117 return { 1118 behavior: 'ask', 1119 message: 1120 'Command contains ambiguous syntax with command separators that could be misinterpreted', 1121 } 1122 } 1123 1124 return { 1125 behavior: 'passthrough', 1126 message: 'No malformed token injection detected', 1127 } 1128} 1129 1130function validateObfuscatedFlags(context: ValidationContext): PermissionResult { 1131 // Block shell quoting bypass patterns used to circumvent negative lookaheads we use in our regexes to block known dangerous flags 1132 1133 const { originalCommand, baseCommand } = context 1134 1135 // Echo is safe for obfuscated flags, BUT only for simple echo commands. 1136 // For compound commands (with |, &, ;), we need to check the whole command 1137 // because the dangerous ANSI-C quoting might be after the operator. 1138 const hasShellOperators = /[|&;]/.test(originalCommand) 1139 if (baseCommand === 'echo' && !hasShellOperators) { 1140 return { 1141 behavior: 'passthrough', 1142 message: 'echo command is safe and has no dangerous flags', 1143 } 1144 } 1145 1146 // COMPREHENSIVE OBFUSCATION DETECTION 1147 // These checks catch various ways to hide flags using shell quoting 1148 1149 // 1. Block ANSI-C quoting ($'...') - can encode any character via escape sequences 1150 // Simple pattern that matches $'...' anywhere. This correctly handles: 1151 // - grep '$' file => no match ($ is regex anchor inside quotes, no $'...' structure) 1152 // - 'test'$'-exec' => match (quote concatenation with ANSI-C) 1153 // - Zero-width space and other invisible chars => match 1154 // The pattern requires $' followed by content (can be empty) followed by closing ' 1155 if (/\$'[^']*'/.test(originalCommand)) { 1156 logEvent('tengu_bash_security_check_triggered', { 1157 checkId: BASH_SECURITY_CHECK_IDS.OBFUSCATED_FLAGS, 1158 subId: 5, 1159 }) 1160 return { 1161 behavior: 'ask', 1162 message: 'Command contains ANSI-C quoting which can hide characters', 1163 } 1164 } 1165 1166 // 2. Block locale quoting ($"...") - can also use escape sequences 1167 // Same simple pattern as ANSI-C quoting above 1168 if (/\$"[^"]*"/.test(originalCommand)) { 1169 logEvent('tengu_bash_security_check_triggered', { 1170 checkId: BASH_SECURITY_CHECK_IDS.OBFUSCATED_FLAGS, 1171 subId: 6, 1172 }) 1173 return { 1174 behavior: 'ask', 1175 message: 'Command contains locale quoting which can hide characters', 1176 } 1177 } 1178 1179 // 3. Block empty ANSI-C or locale quotes followed by dash 1180 // $''-exec or $""-exec 1181 if (/\$['"]{2}\s*-/.test(originalCommand)) { 1182 logEvent('tengu_bash_security_check_triggered', { 1183 checkId: BASH_SECURITY_CHECK_IDS.OBFUSCATED_FLAGS, 1184 subId: 9, 1185 }) 1186 return { 1187 behavior: 'ask', 1188 message: 1189 'Command contains empty special quotes before dash (potential bypass)', 1190 } 1191 } 1192 1193 // 4. Block ANY sequence of empty quotes followed by dash 1194 // This catches: ''- ""- ''""- ""''- ''""''- etc. 1195 // The pattern looks for one or more empty quote pairs followed by optional whitespace and dash 1196 if (/(?:^|\s)(?:''|"")+\s*-/.test(originalCommand)) { 1197 logEvent('tengu_bash_security_check_triggered', { 1198 checkId: BASH_SECURITY_CHECK_IDS.OBFUSCATED_FLAGS, 1199 subId: 7, 1200 }) 1201 return { 1202 behavior: 'ask', 1203 message: 'Command contains empty quotes before dash (potential bypass)', 1204 } 1205 } 1206 1207 // 4b. SECURITY: Block homogeneous empty quote pair(s) immediately adjacent 1208 // to a quoted dash. Patterns like `"""-f"` (empty `""` + quoted `"-f"`) 1209 // concatenate in bash to `-f` but slip past all the above checks: 1210 // - Regex (4) above: `(?:''|"")+\s*-` matches `""` pair, then expects 1211 // optional space and dash — but finds a third `"` instead. No match. 1212 // - Quote-content scanner (below): Sees the first `""` pair with empty 1213 // content (doesn't start with dash). The third `"` opens a new quoted 1214 // region handled by the main quote-state tracker. 1215 // - Quote-state tracker: `""` toggles inDoubleQuote on/off; third `"` 1216 // opens it again. The `-` inside `"-f"` is INSIDE quotes → skipped. 1217 // - Flag scanner: Looks for `\s` before `-`. The `-` is preceded by `"`. 1218 // - fullyUnquotedContent: Both `""` and `"-f"` get stripped. 1219 // 1220 // In bash, `"""-f"` = empty string + string "-f" = `-f`. This bypass works 1221 // for ANY dangerous-flag check (jq -f, find -exec, fc -e) with a matching 1222 // prefix permission (Bash(jq:*), Bash(find:*)). 1223 // 1224 // The regex `(?:""|'')+['"]-` matches: 1225 // - One or more HOMOGENEOUS empty pairs (`""` or `''`) — the concatenation 1226 // point where bash joins the empty string to the flag. 1227 // - Immediately followed by ANY quote char — opens the flag-quoted region. 1228 // - Immediately followed by `-` — the obfuscated flag. 1229 // 1230 // POSITION-AGNOSTIC: We do NOT require word-start (`(?:^|\s)`) because 1231 // prefixes like `$x"""-f"` (unset/empty variable) concatenate the same way. 1232 // The homogeneous-empty-pair requirement filters out the `'"'"'` idiom 1233 // (no homogeneous empty pair — it's close, double-quoted-content, open). 1234 // 1235 // FALSE POSITIVE: Matches `echo '"""-f" text'` (pattern inside single-quoted 1236 // string). Extremely rare (requires echoing the literal attack). Acceptable. 1237 if (/(?:""|'')+['"]-/.test(originalCommand)) { 1238 logEvent('tengu_bash_security_check_triggered', { 1239 checkId: BASH_SECURITY_CHECK_IDS.OBFUSCATED_FLAGS, 1240 subId: 10, 1241 }) 1242 return { 1243 behavior: 'ask', 1244 message: 1245 'Command contains empty quote pair adjacent to quoted dash (potential flag obfuscation)', 1246 } 1247 } 1248 1249 // 4c. SECURITY: Also block 3+ consecutive quotes at word start even without 1250 // an immediate dash. Broader safety net for multi-quote obfuscation patterns 1251 // not enumerated above (e.g., `"""x"-f` where content between quotes shifts 1252 // the dash position). Legitimate commands never need `"""x"` when `"x"` works. 1253 if (/(?:^|\s)['"]{3,}/.test(originalCommand)) { 1254 logEvent('tengu_bash_security_check_triggered', { 1255 checkId: BASH_SECURITY_CHECK_IDS.OBFUSCATED_FLAGS, 1256 subId: 11, 1257 }) 1258 return { 1259 behavior: 'ask', 1260 message: 1261 'Command contains consecutive quote characters at word start (potential obfuscation)', 1262 } 1263 } 1264 1265 // Track quote state to avoid false positives for flags inside quoted strings 1266 let inSingleQuote = false 1267 let inDoubleQuote = false 1268 let escaped = false 1269 1270 for (let i = 0; i < originalCommand.length - 1; i++) { 1271 const currentChar = originalCommand[i] 1272 const nextChar = originalCommand[i + 1] 1273 1274 // Update quote state 1275 if (escaped) { 1276 escaped = false 1277 continue 1278 } 1279 1280 // SECURITY: Only treat backslash as escape OUTSIDE single quotes. In bash, 1281 // `\` inside `'...'` is LITERAL. Without this guard, `'\'` desyncs the 1282 // quote tracker: `\` sets escaped=true, closing `'` is consumed by the 1283 // escaped-skip above instead of toggling inSingleQuote. Parser stays in 1284 // single-quote mode, and the `if (inSingleQuote || inDoubleQuote) continue` 1285 // at line ~1121 skips ALL subsequent flag detection for the rest of the 1286 // command. Example: `jq '\' "-f" evil` — bash gets `-f` arg, but desynced 1287 // parser thinks ` "-f" evil` is inside quotes → flag detection bypassed. 1288 // Defense-in-depth: hasShellQuoteSingleQuoteBug catches `'\'` patterns at 1289 // line ~1856 before this runs. But we fix the tracker for consistency with 1290 // the CORRECT implementations elsewhere in this file (hasBackslashEscaped*, 1291 // extractQuotedContent) which all guard with `!inSingleQuote`. 1292 if (currentChar === '\\' && !inSingleQuote) { 1293 escaped = true 1294 continue 1295 } 1296 1297 if (currentChar === "'" && !inDoubleQuote) { 1298 inSingleQuote = !inSingleQuote 1299 continue 1300 } 1301 1302 if (currentChar === '"' && !inSingleQuote) { 1303 inDoubleQuote = !inDoubleQuote 1304 continue 1305 } 1306 1307 // Only look for flags when not inside quoted strings 1308 // This prevents false positives like: make test TEST="file.py -v" 1309 if (inSingleQuote || inDoubleQuote) { 1310 continue 1311 } 1312 1313 // Look for whitespace followed by quote that contains a dash (potential flag obfuscation) 1314 // SECURITY: Block ANY quoted content starting with dash - err on side of safety 1315 // Catches: "-"exec, "-file", "--flag", '-'output, etc. 1316 // Users can approve manually if legitimate (e.g., find . -name "-file") 1317 if ( 1318 currentChar && 1319 nextChar && 1320 /\s/.test(currentChar) && 1321 /['"`]/.test(nextChar) 1322 ) { 1323 const quoteChar = nextChar 1324 let j = i + 2 // Start after the opening quote 1325 let insideQuote = '' 1326 1327 // Collect content inside the quote 1328 while (j < originalCommand.length && originalCommand[j] !== quoteChar) { 1329 insideQuote += originalCommand[j]! 1330 j++ 1331 } 1332 1333 // If we found a closing quote and the content looks like an obfuscated flag, block it. 1334 // Three attack patterns to catch: 1335 // 1. Flag name inside quotes: "--flag", "-exec", "-X" (dashes + letters inside) 1336 // 2. Split-quote flag: "-"exec, "--"output (dashes inside, letters continue after quote) 1337 // 3. Chained quotes: "-""exec" (dashes in first quote, second quote contains letters) 1338 // Pure-dash strings like "---" or "--" followed by whitespace/separator are separators, 1339 // not flags, and should not trigger this check. 1340 const charAfterQuote = originalCommand[j + 1] 1341 // Inside double quotes, $VAR and `cmd` expand at runtime, so "-$VAR" can 1342 // become -exec. Blocking $ and ` here over-blocks single-quoted literals 1343 // like grep '-$' (where $ is literal), but main's startsWith('-') already 1344 // blocked those — this restores status quo, not a new false positive. 1345 // Brace expansion ({) does NOT happen inside quotes, so { is not needed here. 1346 const hasFlagCharsInside = /^-+[a-zA-Z0-9$`]/.test(insideQuote) 1347 // Characters that can continue a flag after a closing quote. This catches: 1348 // a-zA-Z0-9: "-"exec → -exec (direct concatenation) 1349 // \\: "-"\exec → -exec (backslash escape is stripped) 1350 // -: "-"-output → --output (extra dashes) 1351 // {: "-"{exec,delete} → -exec -delete (brace expansion) 1352 // $: "-"$VAR → -exec when VAR=exec (variable expansion) 1353 // `: "-"`echo exec` → -exec (command substitution) 1354 // Note: glob chars (*?[) are omitted — they require attacker-controlled 1355 // filenames in CWD to exploit, and blocking them would break patterns 1356 // like `ls -- "-"*` for listing files that start with dash. 1357 const FLAG_CONTINUATION_CHARS = /[a-zA-Z0-9\\${`-]/ 1358 const hasFlagCharsContinuing = 1359 /^-+$/.test(insideQuote) && 1360 charAfterQuote !== undefined && 1361 FLAG_CONTINUATION_CHARS.test(charAfterQuote) 1362 // Handle adjacent quote chaining: "-""exec" or "-""-"exec or """-"exec concatenates 1363 // to -exec in shell. Follow the chain of adjacent quoted segments until 1364 // we find one containing an alphanumeric char or hit a non-quote boundary. 1365 // Also handles empty prefix quotes: """-"exec where "" is followed by "-"exec 1366 // The combined segments form a flag if they contain dash(es) followed by alphanumerics. 1367 const hasFlagCharsInNextQuote = 1368 // Trigger when: first segment is only dashes OR empty (could be prefix for flag) 1369 (insideQuote === '' || /^-+$/.test(insideQuote)) && 1370 charAfterQuote !== undefined && 1371 /['"`]/.test(charAfterQuote) && 1372 (() => { 1373 let pos = j + 1 // Start at charAfterQuote (an opening quote) 1374 let combinedContent = insideQuote // Track what the shell will see 1375 while ( 1376 pos < originalCommand.length && 1377 /['"`]/.test(originalCommand[pos]!) 1378 ) { 1379 const segQuote = originalCommand[pos]! 1380 let end = pos + 1 1381 while ( 1382 end < originalCommand.length && 1383 originalCommand[end] !== segQuote 1384 ) { 1385 end++ 1386 } 1387 const segment = originalCommand.slice(pos + 1, end) 1388 combinedContent += segment 1389 1390 // Check if combined content so far forms a flag pattern. 1391 // Include $ and ` for in-quote expansion: "-""$VAR" → -exec 1392 if (/^-+[a-zA-Z0-9$`]/.test(combinedContent)) return true 1393 1394 // If this segment has alphanumeric/expansion and we already have dashes, 1395 // it's a flag. Catches "-""$*" where segment='$*' has no alnum but 1396 // expands to positional params at runtime. 1397 // Guard against segment.length === 0: slice(0, -0) → slice(0, 0) → ''. 1398 const priorContent = 1399 segment.length > 0 1400 ? combinedContent.slice(0, -segment.length) 1401 : combinedContent 1402 if (/^-+$/.test(priorContent)) { 1403 if (/[a-zA-Z0-9$`]/.test(segment)) return true 1404 } 1405 1406 if (end >= originalCommand.length) break // Unclosed quote 1407 pos = end + 1 // Move past closing quote to check next segment 1408 } 1409 // Also check the unquoted char at the end of the chain 1410 if ( 1411 pos < originalCommand.length && 1412 FLAG_CONTINUATION_CHARS.test(originalCommand[pos]!) 1413 ) { 1414 // If we have dashes in combined content, the trailing char completes a flag 1415 if (/^-+$/.test(combinedContent) || combinedContent === '') { 1416 // Check if we're about to form a flag with the following content 1417 const nextChar = originalCommand[pos]! 1418 if (nextChar === '-') { 1419 // More dashes, could still form a flag 1420 return true 1421 } 1422 if (/[a-zA-Z0-9\\${`]/.test(nextChar) && combinedContent !== '') { 1423 // We have dashes and now alphanumeric/expansion follows 1424 return true 1425 } 1426 } 1427 // Original check for dashes followed by alphanumeric 1428 if (/^-/.test(combinedContent)) { 1429 return true 1430 } 1431 } 1432 return false 1433 })() 1434 if ( 1435 j < originalCommand.length && 1436 originalCommand[j] === quoteChar && 1437 (hasFlagCharsInside || 1438 hasFlagCharsContinuing || 1439 hasFlagCharsInNextQuote) 1440 ) { 1441 logEvent('tengu_bash_security_check_triggered', { 1442 checkId: BASH_SECURITY_CHECK_IDS.OBFUSCATED_FLAGS, 1443 subId: 4, 1444 }) 1445 return { 1446 behavior: 'ask', 1447 message: 'Command contains quoted characters in flag names', 1448 } 1449 } 1450 } 1451 1452 // Look for whitespace followed by dash - this starts a flag 1453 if (currentChar && nextChar && /\s/.test(currentChar) && nextChar === '-') { 1454 let j = i + 1 // Start at the dash 1455 let flagContent = '' 1456 1457 // Collect flag content 1458 while (j < originalCommand.length) { 1459 const flagChar = originalCommand[j] 1460 if (!flagChar) break 1461 1462 // End flag content once we hit whitespace or an equals sign 1463 if (/[\s=]/.test(flagChar)) { 1464 break 1465 } 1466 // End flag collection if we hit quote followed by non-flag character. This is needed to handle cases like -d"," which should be parsed as just -d 1467 if (/['"`]/.test(flagChar)) { 1468 // Special case for cut -d flag: the delimiter value can be quoted 1469 // Example: cut -d'"' should parse as flag name: -d, value: '"' 1470 // Note: We only apply this exception to cut -d specifically to avoid bypasses. 1471 // Without this restriction, a command like `find -e"xec"` could be parsed as 1472 // flag name: -e, bypassing our blocklist for -exec. By restricting to cut -d, 1473 // we allow the legitimate use case while preventing obfuscation attacks on other 1474 // commands where quoted flag values could hide dangerous flag names. 1475 if ( 1476 baseCommand === 'cut' && 1477 flagContent === '-d' && 1478 /['"`]/.test(flagChar) 1479 ) { 1480 // This is cut -d followed by a quoted delimiter - flagContent is already '-d' 1481 break 1482 } 1483 1484 // Look ahead to see what follows the quote 1485 if (j + 1 < originalCommand.length) { 1486 const nextFlagChar = originalCommand[j + 1] 1487 if (nextFlagChar && !/[a-zA-Z0-9_'"-]/.test(nextFlagChar)) { 1488 // Quote followed by something that is clearly not part of a flag, end the parsing 1489 break 1490 } 1491 } 1492 } 1493 flagContent += flagChar 1494 j++ 1495 } 1496 1497 if (flagContent.includes('"') || flagContent.includes("'")) { 1498 logEvent('tengu_bash_security_check_triggered', { 1499 checkId: BASH_SECURITY_CHECK_IDS.OBFUSCATED_FLAGS, 1500 subId: 1, 1501 }) 1502 return { 1503 behavior: 'ask', 1504 message: 'Command contains quoted characters in flag names', 1505 } 1506 } 1507 } 1508 } 1509 1510 // Also handle flags that start with quotes: "--"output, '-'-output, etc. 1511 // Use fullyUnquotedContent to avoid false positives from legitimate quoted content like echo "---" 1512 if (/\s['"`]-/.test(context.fullyUnquotedContent)) { 1513 logEvent('tengu_bash_security_check_triggered', { 1514 checkId: BASH_SECURITY_CHECK_IDS.OBFUSCATED_FLAGS, 1515 subId: 2, 1516 }) 1517 return { 1518 behavior: 'ask', 1519 message: 'Command contains quoted characters in flag names', 1520 } 1521 } 1522 1523 // Also handles cases like ""--output 1524 // Use fullyUnquotedContent to avoid false positives from legitimate quoted content 1525 if (/['"`]{2}-/.test(context.fullyUnquotedContent)) { 1526 logEvent('tengu_bash_security_check_triggered', { 1527 checkId: BASH_SECURITY_CHECK_IDS.OBFUSCATED_FLAGS, 1528 subId: 3, 1529 }) 1530 return { 1531 behavior: 'ask', 1532 message: 'Command contains quoted characters in flag names', 1533 } 1534 } 1535 1536 return { behavior: 'passthrough', message: 'No obfuscated flags detected' } 1537} 1538 1539/** 1540 * Detects backslash-escaped whitespace characters (space, tab) outside of quotes. 1541 * 1542 * In bash, `echo\ test` is a single token (command named "echo test"), but 1543 * shell-quote decodes the escape and produces `echo test` (two separate tokens). 1544 * This discrepancy allows path traversal attacks like: 1545 * echo\ test/../../../usr/bin/touch /tmp/file 1546 * which the parser sees as `echo test/.../touch /tmp/file` (an echo command) 1547 * but bash resolves as `/usr/bin/touch /tmp/file` (via directory "echo test"). 1548 */ 1549function hasBackslashEscapedWhitespace(command: string): boolean { 1550 let inSingleQuote = false 1551 let inDoubleQuote = false 1552 1553 for (let i = 0; i < command.length; i++) { 1554 const char = command[i] 1555 1556 if (char === '\\' && !inSingleQuote) { 1557 if (!inDoubleQuote) { 1558 const nextChar = command[i + 1] 1559 if (nextChar === ' ' || nextChar === '\t') { 1560 return true 1561 } 1562 } 1563 // Skip the escaped character (both outside quotes and inside double quotes, 1564 // where \\, \", \$, \` are valid escape sequences) 1565 i++ 1566 continue 1567 } 1568 1569 if (char === '"' && !inSingleQuote) { 1570 inDoubleQuote = !inDoubleQuote 1571 continue 1572 } 1573 1574 if (char === "'" && !inDoubleQuote) { 1575 inSingleQuote = !inSingleQuote 1576 continue 1577 } 1578 } 1579 1580 return false 1581} 1582 1583function validateBackslashEscapedWhitespace( 1584 context: ValidationContext, 1585): PermissionResult { 1586 if (hasBackslashEscapedWhitespace(context.originalCommand)) { 1587 logEvent('tengu_bash_security_check_triggered', { 1588 checkId: BASH_SECURITY_CHECK_IDS.BACKSLASH_ESCAPED_WHITESPACE, 1589 }) 1590 return { 1591 behavior: 'ask', 1592 message: 1593 'Command contains backslash-escaped whitespace that could alter command parsing', 1594 } 1595 } 1596 1597 return { 1598 behavior: 'passthrough', 1599 message: 'No backslash-escaped whitespace', 1600 } 1601} 1602 1603/** 1604 * Detects a backslash immediately preceding a shell operator outside of quotes. 1605 * 1606 * SECURITY: splitCommand normalizes `\;` to a bare `;` in its output string. 1607 * When downstream code (checkReadOnlyConstraints, checkPathConstraints, etc.) 1608 * re-parses that normalized string, the bare `;` is seen as an operator and 1609 * causes a false split. This enables arbitrary file read bypassing path checks: 1610 * 1611 * cat safe.txt \; echo ~/.ssh/id_rsa 1612 * 1613 * In bash: ONE cat command reading safe.txt, ;, echo, ~/.ssh/id_rsa as files. 1614 * After splitCommand normalizes: "cat safe.txt ; echo ~/.ssh/id_rsa" 1615 * Nested re-parse: ["cat safe.txt", "echo ~/.ssh/id_rsa"] — both segments 1616 * pass isCommandReadOnly, sensitive path hidden in echo segment is never 1617 * validated by path constraints. Auto-allowed. Private key leaked. 1618 * 1619 * This check flags any \<operator> regardless of backslash parity. Even counts 1620 * (\\;) are dangerous in bash (\\ → \, ; separates). Odd counts (\;) are safe 1621 * in bash but trigger the double-parse bug above. Both must be flagged. 1622 * 1623 * Known false positive: `find . -exec cmd {} \;` — users will be prompted once. 1624 * 1625 * Note: `(` and `)` are NOT in this set — splitCommand preserves `\(` and `\)` 1626 * in its output (round-trip safe), so they don't trigger the double-parse bug. 1627 * This allows `find . \( -name x -o -name y \)` to pass without false positives. 1628 */ 1629const SHELL_OPERATORS = new Set([';', '|', '&', '<', '>']) 1630 1631function hasBackslashEscapedOperator(command: string): boolean { 1632 let inSingleQuote = false 1633 let inDoubleQuote = false 1634 1635 for (let i = 0; i < command.length; i++) { 1636 const char = command[i] 1637 1638 // SECURITY: Handle backslash FIRST, before quote toggles. In bash, inside 1639 // double quotes, `\"` is an escape sequence producing a literal `"` — it 1640 // does NOT close the quote. If we process quote toggles first, `\"` inside 1641 // `"..."` desyncs the tracker: 1642 // - `\` is ignored (gated by !inDoubleQuote) 1643 // - `"` toggles inDoubleQuote to FALSE (wrong — bash says still inside) 1644 // - next `"` (the real closing quote) toggles BACK to TRUE — locked desync 1645 // - subsequent `\;` is missed because !inDoubleQuote is false 1646 // Exploit: `tac "x\"y" \; echo ~/.ssh/id_rsa` — bash runs ONE tac reading 1647 // all args as files (leaking id_rsa), but desynced tracker misses `\;` and 1648 // splitCommand's double-parse normalization "sees" two safe commands. 1649 // 1650 // Fix structure matches hasBackslashEscapedWhitespace (which was correctly 1651 // fixed for this in commit prior to d000dfe84e): backslash check first, 1652 // gated only by !inSingleQuote (since backslash IS literal inside '...'), 1653 // unconditional i++ to skip the escaped char even inside double quotes. 1654 if (char === '\\' && !inSingleQuote) { 1655 // Only flag \<operator> when OUTSIDE double quotes (inside double quotes, 1656 // operators like ;|&<> are already not special, so \; is harmless there). 1657 if (!inDoubleQuote) { 1658 const nextChar = command[i + 1] 1659 if (nextChar && SHELL_OPERATORS.has(nextChar)) { 1660 return true 1661 } 1662 } 1663 // Skip the escaped character unconditionally. Inside double quotes, this 1664 // correctly consumes backslash pairs: `"x\\"` → pos 6 (`\`) skips pos 7 1665 // (`\`), then pos 8 (`"`) toggles inDoubleQuote off correctly. Without 1666 // unconditional skip, pos 7 would see `\`, see pos 8 (`"`) as nextChar, 1667 // skip it, and the closing quote would NEVER toggle inDoubleQuote — 1668 // permanently desyncing and missing subsequent `\;` outside quotes. 1669 // Exploit: `cat "x\\" \; echo /etc/passwd` — bash reads /etc/passwd. 1670 // 1671 // This correctly handles backslash parity: odd-count `\;` (1, 3, 5...) 1672 // is flagged (the unpaired `\` before `;` is detected). Even-count `\\;` 1673 // (2, 4...) is NOT flagged, which is CORRECT — bash treats `\\` as 1674 // literal `\` and `;` as a separator, so splitCommand handles it 1675 // normally (no double-parse bug). This matches 1676 // hasBackslashEscapedWhitespace line ~1340. 1677 i++ 1678 continue 1679 } 1680 1681 // Quote toggles come AFTER backslash handling (backslash already skipped 1682 // any escaped quote char, so these toggles only fire on unescaped quotes). 1683 if (char === "'" && !inDoubleQuote) { 1684 inSingleQuote = !inSingleQuote 1685 continue 1686 } 1687 if (char === '"' && !inSingleQuote) { 1688 inDoubleQuote = !inDoubleQuote 1689 continue 1690 } 1691 } 1692 1693 return false 1694} 1695 1696function validateBackslashEscapedOperators( 1697 context: ValidationContext, 1698): PermissionResult { 1699 // Tree-sitter path: if tree-sitter confirms no actual operator nodes exist 1700 // in the AST, then any \; is just an escaped character in a word argument 1701 // (e.g., `find . -exec cmd {} \;`). Skip the expensive regex check. 1702 if (context.treeSitter && !context.treeSitter.hasActualOperatorNodes) { 1703 return { behavior: 'passthrough', message: 'No operator nodes in AST' } 1704 } 1705 1706 if (hasBackslashEscapedOperator(context.originalCommand)) { 1707 logEvent('tengu_bash_security_check_triggered', { 1708 checkId: BASH_SECURITY_CHECK_IDS.BACKSLASH_ESCAPED_OPERATORS, 1709 }) 1710 return { 1711 behavior: 'ask', 1712 message: 1713 'Command contains a backslash before a shell operator (;, |, &, <, >) which can hide command structure', 1714 } 1715 } 1716 1717 return { 1718 behavior: 'passthrough', 1719 message: 'No backslash-escaped operators', 1720 } 1721} 1722 1723/** 1724 * Checks if a character at position `pos` in `content` is escaped by counting 1725 * consecutive backslashes before it. An odd number means it's escaped. 1726 */ 1727function isEscapedAtPosition(content: string, pos: number): boolean { 1728 let backslashCount = 0 1729 let i = pos - 1 1730 while (i >= 0 && content[i] === '\\') { 1731 backslashCount++ 1732 i-- 1733 } 1734 return backslashCount % 2 === 1 1735} 1736 1737/** 1738 * Detects unquoted brace expansion syntax that Bash expands but shell-quote/tree-sitter 1739 * treat as literal strings. This parsing discrepancy allows permission bypass: 1740 * git ls-remote {--upload-pack="touch /tmp/test",test} 1741 * Parser sees one literal arg, but Bash expands to: --upload-pack="touch /tmp/test" test 1742 * 1743 * Brace expansion has two forms: 1744 * 1. Comma-separated: {a,b,c} → a b c 1745 * 2. Sequence: {1..5} → 1 2 3 4 5 1746 * 1747 * Both single and double quotes suppress brace expansion in Bash, so we use 1748 * fullyUnquotedContent which has both quote types stripped. 1749 * Backslash-escaped braces (\{, \}) also suppress expansion. 1750 */ 1751function validateBraceExpansion(context: ValidationContext): PermissionResult { 1752 // Use pre-strip content to avoid false negatives from stripSafeRedirections 1753 // creating backslash adjacencies (e.g., `\>/dev/null{a,b}` → `\{a,b}` after 1754 // stripping, making isEscapedAtPosition think the brace is escaped). 1755 const content = context.fullyUnquotedPreStrip 1756 1757 // SECURITY: Check for MISMATCHED brace counts in fullyUnquoted content. 1758 // A mismatch indicates that quoted braces (e.g., `'{'` or `"{"`) were 1759 // stripped by extractQuotedContent, leaving unbalanced braces in the content 1760 // we analyze. Our depth-matching algorithm below assumes balanced braces — 1761 // with a mismatch, it closes at the WRONG position, missing commas that 1762 // bash's algorithm WOULD find. 1763 // 1764 // Exploit: `git diff {@'{'0},--output=/tmp/pwned}` 1765 // - Original: 2 `{`, 2 `}` (quoted `'{'` counts as content, not operator) 1766 // - fullyUnquoted: `git diff {@0},--output=/tmp/pwned}` — 1 `{`, 2 `}`! 1767 // - Our depth-matcher: closes at first `}` (after `0`), inner=`@0`, no `,` 1768 // - Bash (on original): quoted `{` is content; first unquoted `}` has no 1769 // `,` yet → bash treats as literal content, keeps scanning → finds `,` 1770 // → final `}` closes → expands to `@{0} --output=/tmp/pwned` 1771 // - git writes diff to /tmp/pwned. ARBITRARY FILE WRITE, ZERO PERMISSIONS. 1772 // 1773 // We count ONLY unescaped braces (backslash-escaped braces are literal in 1774 // bash). If counts mismatch AND at least one unescaped `{` exists, block — 1775 // our depth-matching cannot be trusted on this content. 1776 let unescapedOpenBraces = 0 1777 let unescapedCloseBraces = 0 1778 for (let i = 0; i < content.length; i++) { 1779 if (content[i] === '{' && !isEscapedAtPosition(content, i)) { 1780 unescapedOpenBraces++ 1781 } else if (content[i] === '}' && !isEscapedAtPosition(content, i)) { 1782 unescapedCloseBraces++ 1783 } 1784 } 1785 // Only block when CLOSE count EXCEEDS open count — this is the specific 1786 // attack signature. More `}` than `{` means a quoted `{` was stripped 1787 // (bash saw it as content, we see extra `}` unaccounted for). The inverse 1788 // (more `{` than `}`) is usually legitimate unclosed/escaped braces like 1789 // `{foo` or `{a,b\}` where bash doesn't expand anyway. 1790 if (unescapedOpenBraces > 0 && unescapedCloseBraces > unescapedOpenBraces) { 1791 logEvent('tengu_bash_security_check_triggered', { 1792 checkId: BASH_SECURITY_CHECK_IDS.BRACE_EXPANSION, 1793 subId: 2, 1794 }) 1795 return { 1796 behavior: 'ask', 1797 message: 1798 'Command has excess closing braces after quote stripping, indicating possible brace expansion obfuscation', 1799 } 1800 } 1801 1802 // SECURITY: Additionally, check the ORIGINAL command (before quote stripping) 1803 // for `'{'` or `"{"` INSIDE an unquoted brace context — this is the specific 1804 // attack primitive. A quoted brace inside an outer unquoted `{...}` is 1805 // essentially always an obfuscation attempt; legitimate commands don't nest 1806 // quoted braces inside brace expansion (awk/find patterns are fully quoted, 1807 // like `awk '{print $1}'` where the OUTER brace is inside quotes too). 1808 // 1809 // This catches the attack even if an attacker crafts a payload with balanced 1810 // stripped braces (defense-in-depth). We use a simple heuristic: if the 1811 // original command has `'{'` or `'}'` or `"{"` or `"}"` (quoted single brace) 1812 // AND also has an unquoted `{`, that's suspicious. 1813 if (unescapedOpenBraces > 0) { 1814 const orig = context.originalCommand 1815 // Look for quoted single-brace patterns: '{', '}', "{", "}" 1816 // These are the attack primitive — a brace char wrapped in quotes. 1817 if (/['"][{}]['"]/.test(orig)) { 1818 logEvent('tengu_bash_security_check_triggered', { 1819 checkId: BASH_SECURITY_CHECK_IDS.BRACE_EXPANSION, 1820 subId: 3, 1821 }) 1822 return { 1823 behavior: 'ask', 1824 message: 1825 'Command contains quoted brace character inside brace context (potential brace expansion obfuscation)', 1826 } 1827 } 1828 } 1829 1830 // Scan for unescaped `{` characters, then check if they form brace expansion. 1831 // We use a manual scan rather than a simple regex lookbehind because 1832 // lookbehinds can't handle double-escaped backslashes (\\{ is unescaped `{`). 1833 for (let i = 0; i < content.length; i++) { 1834 if (content[i] !== '{') continue 1835 if (isEscapedAtPosition(content, i)) continue 1836 1837 // Find matching unescaped `}` by tracking nesting depth. 1838 // Previous approach broke on nested `{`, missing commas between the outer 1839 // `{` and the nested one (e.g., `{--upload-pack="evil",{test}}`). 1840 let depth = 1 1841 let matchingClose = -1 1842 for (let j = i + 1; j < content.length; j++) { 1843 const ch = content[j] 1844 if (ch === '{' && !isEscapedAtPosition(content, j)) { 1845 depth++ 1846 } else if (ch === '}' && !isEscapedAtPosition(content, j)) { 1847 depth-- 1848 if (depth === 0) { 1849 matchingClose = j 1850 break 1851 } 1852 } 1853 } 1854 1855 if (matchingClose === -1) continue 1856 1857 // Check for `,` or `..` at the outermost nesting level between this 1858 // `{` and its matching `}`. Only depth-0 triggers matter — bash splits 1859 // brace expansion at outer-level commas/sequences. 1860 let innerDepth = 0 1861 for (let k = i + 1; k < matchingClose; k++) { 1862 const ch = content[k] 1863 if (ch === '{' && !isEscapedAtPosition(content, k)) { 1864 innerDepth++ 1865 } else if (ch === '}' && !isEscapedAtPosition(content, k)) { 1866 innerDepth-- 1867 } else if (innerDepth === 0) { 1868 if ( 1869 ch === ',' || 1870 (ch === '.' && k + 1 < matchingClose && content[k + 1] === '.') 1871 ) { 1872 logEvent('tengu_bash_security_check_triggered', { 1873 checkId: BASH_SECURITY_CHECK_IDS.BRACE_EXPANSION, 1874 subId: 1, 1875 }) 1876 return { 1877 behavior: 'ask', 1878 message: 1879 'Command contains brace expansion that could alter command parsing', 1880 } 1881 } 1882 } 1883 } 1884 // No expansion at this level — don't skip past; inner pairs will be 1885 // caught by subsequent iterations of the outer loop. 1886 } 1887 1888 return { 1889 behavior: 'passthrough', 1890 message: 'No brace expansion detected', 1891 } 1892} 1893 1894// Matches Unicode whitespace characters that shell-quote treats as word 1895// separators but bash treats as literal word content. While this differential 1896// is defense-favorable (shell-quote over-splits), blocking these proactively 1897// prevents future edge cases. 1898// eslint-disable-next-line no-misleading-character-class 1899const UNICODE_WS_RE = 1900 /[\u00A0\u1680\u2000-\u200A\u2028\u2029\u202F\u205F\u3000\uFEFF]/ 1901 1902function validateUnicodeWhitespace( 1903 context: ValidationContext, 1904): PermissionResult { 1905 const { originalCommand } = context 1906 if (UNICODE_WS_RE.test(originalCommand)) { 1907 logEvent('tengu_bash_security_check_triggered', { 1908 checkId: BASH_SECURITY_CHECK_IDS.UNICODE_WHITESPACE, 1909 }) 1910 return { 1911 behavior: 'ask', 1912 message: 1913 'Command contains Unicode whitespace characters that could cause parsing inconsistencies', 1914 } 1915 } 1916 return { behavior: 'passthrough', message: 'No Unicode whitespace' } 1917} 1918 1919function validateMidWordHash(context: ValidationContext): PermissionResult { 1920 const { unquotedKeepQuoteChars } = context 1921 // Match # preceded by a non-whitespace character (mid-word hash). 1922 // shell-quote treats mid-word # as comment-start but bash treats it as a 1923 // literal character, creating a parser differential. 1924 // 1925 // Uses unquotedKeepQuoteChars (which preserves quote delimiters but strips 1926 // quoted content) to catch quote-adjacent # like 'x'# — fullyUnquotedPreStrip 1927 // would strip both quotes and content, turning 'x'# into just # (word-start). 1928 // 1929 // SECURITY: Also check the CONTINUATION-JOINED version. The context is built 1930 // from the original command (pre-continuation-join). For `foo\<NL>#bar`, 1931 // pre-join the `#` is preceded by `\n` (whitespace → `/\S#/` doesn't match), 1932 // but post-join it's preceded by `o` (non-whitespace → matches). shell-quote 1933 // operates on the post-join text (line continuations are joined in 1934 // splitCommand), so the parser differential manifests on the joined text. 1935 // While not directly exploitable (the `#...` fragment still prompts as its 1936 // own subcommand), this is a defense-in-depth gap — shell-quote would drop 1937 // post-`#` content from path extraction. 1938 // 1939 // Exclude ${# which is bash string-length syntax (e.g., ${#var}). 1940 // Note: the lookbehind must be placed immediately before # (not before \S) 1941 // so that it checks the correct 2-char window. 1942 const joined = unquotedKeepQuoteChars.replace(/\\+\n/g, match => { 1943 const backslashCount = match.length - 1 1944 return backslashCount % 2 === 1 ? '\\'.repeat(backslashCount - 1) : match 1945 }) 1946 if ( 1947 // eslint-disable-next-line custom-rules/no-lookbehind-regex -- .test() with atom search: fast when # absent 1948 /\S(?<!\$\{)#/.test(unquotedKeepQuoteChars) || 1949 // eslint-disable-next-line custom-rules/no-lookbehind-regex -- same as above 1950 /\S(?<!\$\{)#/.test(joined) 1951 ) { 1952 logEvent('tengu_bash_security_check_triggered', { 1953 checkId: BASH_SECURITY_CHECK_IDS.MID_WORD_HASH, 1954 }) 1955 return { 1956 behavior: 'ask', 1957 message: 1958 'Command contains mid-word # which is parsed differently by shell-quote vs bash', 1959 } 1960 } 1961 return { behavior: 'passthrough', message: 'No mid-word hash' } 1962} 1963 1964/** 1965 * Detects when a `#` comment contains quote characters that would desync 1966 * downstream quote trackers (like extractQuotedContent). 1967 * 1968 * In bash, everything after an unquoted `#` on a line is a comment — quote 1969 * characters inside the comment are literal text, not quote toggles. But our 1970 * quote-tracking functions don't handle comments, so a `'` or `"` after `#` 1971 * toggles their quote state. Attackers can craft `# ' "` sequences that 1972 * precisely desync the tracker, causing subsequent content (on following 1973 * lines) to appear "inside quotes" when it's actually unquoted in bash. 1974 * 1975 * Example attack: 1976 * echo "it's" # ' " <<'MARKER'\n 1977 * rm -rf /\n 1978 * MARKER 1979 * In bash: `#` starts a comment, `rm -rf /` executes on line 2. 1980 * In extractQuotedContent: the `'` at position 14 (after #) opens a single 1981 * quote, and the `'` before MARKER closes it. But the `'` after MARKER opens 1982 * ANOTHER single quote, swallowing the newline and `rm -rf /`, so 1983 * validateNewlines sees no unquoted newlines. 1984 * 1985 * Defense: If we see an unquoted `#` followed by any quote character on the 1986 * same line, treat it as a misparsing concern. Legitimate commands rarely 1987 * have quote characters in their comments (and if they do, the user can 1988 * approve manually). 1989 */ 1990function validateCommentQuoteDesync( 1991 context: ValidationContext, 1992): PermissionResult { 1993 // Tree-sitter path: tree-sitter correctly identifies comment nodes and 1994 // quoted content. The desync concern is about regex quote tracking being 1995 // confused by quote characters inside comments. When tree-sitter provides 1996 // the quote context, this desync cannot happen — the AST is authoritative 1997 // regardless of whether the command contains a comment. 1998 if (context.treeSitter) { 1999 return { 2000 behavior: 'passthrough', 2001 message: 'Tree-sitter quote context is authoritative', 2002 } 2003 } 2004 2005 const { originalCommand } = context 2006 2007 // Track quote state character-by-character using the same (correct) logic 2008 // as extractQuotedContent: single quotes don't toggle inside double quotes. 2009 // When we encounter an unquoted `#`, check if the rest of the line (until 2010 // newline) contains any quote characters. 2011 let inSingleQuote = false 2012 let inDoubleQuote = false 2013 let escaped = false 2014 2015 for (let i = 0; i < originalCommand.length; i++) { 2016 const char = originalCommand[i] 2017 2018 if (escaped) { 2019 escaped = false 2020 continue 2021 } 2022 2023 if (inSingleQuote) { 2024 if (char === "'") inSingleQuote = false 2025 continue 2026 } 2027 2028 if (char === '\\') { 2029 escaped = true 2030 continue 2031 } 2032 2033 if (inDoubleQuote) { 2034 if (char === '"') inDoubleQuote = false 2035 // Single quotes inside double quotes are literal — no toggle 2036 continue 2037 } 2038 2039 if (char === "'") { 2040 inSingleQuote = true 2041 continue 2042 } 2043 2044 if (char === '"') { 2045 inDoubleQuote = true 2046 continue 2047 } 2048 2049 // Unquoted `#` — in bash, this starts a comment. Check if the rest of 2050 // the line contains quote characters that would desync other trackers. 2051 if (char === '#') { 2052 const lineEnd = originalCommand.indexOf('\n', i) 2053 const commentText = originalCommand.slice( 2054 i + 1, 2055 lineEnd === -1 ? originalCommand.length : lineEnd, 2056 ) 2057 if (/['"]/.test(commentText)) { 2058 logEvent('tengu_bash_security_check_triggered', { 2059 checkId: BASH_SECURITY_CHECK_IDS.COMMENT_QUOTE_DESYNC, 2060 }) 2061 return { 2062 behavior: 'ask', 2063 message: 2064 'Command contains quote characters inside a # comment which can desync quote tracking', 2065 } 2066 } 2067 // Skip to end of line (rest is comment) 2068 if (lineEnd === -1) break 2069 i = lineEnd // Loop increment will move past newline 2070 } 2071 } 2072 2073 return { behavior: 'passthrough', message: 'No comment quote desync' } 2074} 2075 2076/** 2077 * Detects a newline inside a quoted string where the NEXT line would be 2078 * stripped by stripCommentLines (trimmed line starts with `#`). 2079 * 2080 * In bash, `\n` inside quotes is a literal character and part of the argument. 2081 * But stripCommentLines (called by stripSafeWrappers in bashPermissions before 2082 * path validation and rule matching) processes commands LINE-BY-LINE via 2083 * `command.split('\n')` without tracking quote state. A quoted newline lets an 2084 * attacker position the next line to start with `#` (after trim), causing 2085 * stripCommentLines to drop that line entirely — hiding sensitive paths or 2086 * arguments from path validation and permission rule matching. 2087 * 2088 * Example attack (auto-allowed in acceptEdits mode without any Bash rules): 2089 * mv ./decoy '<\n>#' ~/.ssh/id_rsa ./exfil_dir 2090 * Bash: moves ./decoy AND ~/.ssh/id_rsa into ./exfil_dir/ (errors on `\n#`). 2091 * stripSafeWrappers: line 2 starts with `#` → stripped → "mv ./decoy '". 2092 * shell-quote: drops unbalanced trailing quote → ["mv", "./decoy"]. 2093 * checkPathConstraints: only sees ./decoy (in cwd) → passthrough. 2094 * acceptEdits mode: mv with all-cwd paths → ALLOW. Zero clicks, no warning. 2095 * 2096 * Also works with cp (exfil), rm/rm -rf (delete arbitrary files/dirs). 2097 * 2098 * Defense: block ONLY the specific stripCommentLines trigger — a newline inside 2099 * quotes where the next line starts with `#` after trim. This is the minimal 2100 * check that catches the parser differential while preserving legitimate 2101 * multi-line quoted arguments (echo 'line1\nline2', grep patterns, etc.). 2102 * Safe heredocs ($(cat <<'EOF'...)) and git commit -m "..." are handled by 2103 * early validators and never reach this check. 2104 * 2105 * This validator is NOT in nonMisparsingValidators — its ask result gets 2106 * isBashSecurityCheckForMisparsing: true, causing an early block in the 2107 * permission flow at bashPermissions.ts before any line-based processing runs. 2108 */ 2109function validateQuotedNewline(context: ValidationContext): PermissionResult { 2110 const { originalCommand } = context 2111 2112 // Fast path: must have both a newline byte AND a # character somewhere. 2113 // stripCommentLines only strips lines where trim().startsWith('#'), so 2114 // no # means no possible trigger. 2115 if (!originalCommand.includes('\n') || !originalCommand.includes('#')) { 2116 return { behavior: 'passthrough', message: 'No newline or no hash' } 2117 } 2118 2119 // Track quote state. Mirrors extractQuotedContent / validateCommentQuoteDesync: 2120 // - single quotes don't toggle inside double quotes 2121 // - backslash escapes the next char (but not inside single quotes) 2122 // stripCommentLines splits on '\n' (not \r), so we only treat \n as a line 2123 // separator. \r inside a line is removed by trim() and doesn't change the 2124 // trimmed-starts-with-# check. 2125 let inSingleQuote = false 2126 let inDoubleQuote = false 2127 let escaped = false 2128 2129 for (let i = 0; i < originalCommand.length; i++) { 2130 const char = originalCommand[i] 2131 2132 if (escaped) { 2133 escaped = false 2134 continue 2135 } 2136 2137 if (char === '\\' && !inSingleQuote) { 2138 escaped = true 2139 continue 2140 } 2141 2142 if (char === "'" && !inDoubleQuote) { 2143 inSingleQuote = !inSingleQuote 2144 continue 2145 } 2146 2147 if (char === '"' && !inSingleQuote) { 2148 inDoubleQuote = !inDoubleQuote 2149 continue 2150 } 2151 2152 // A newline inside quotes: the NEXT line (from bash's perspective) starts 2153 // inside a quoted string. Check if that line would be stripped by 2154 // stripCommentLines — i.e., after trim(), does it start with `#`? 2155 // This exactly mirrors: lines.filter(l => !l.trim().startsWith('#')) 2156 if (char === '\n' && (inSingleQuote || inDoubleQuote)) { 2157 const lineStart = i + 1 2158 const nextNewline = originalCommand.indexOf('\n', lineStart) 2159 const lineEnd = nextNewline === -1 ? originalCommand.length : nextNewline 2160 const nextLine = originalCommand.slice(lineStart, lineEnd) 2161 if (nextLine.trim().startsWith('#')) { 2162 logEvent('tengu_bash_security_check_triggered', { 2163 checkId: BASH_SECURITY_CHECK_IDS.QUOTED_NEWLINE, 2164 }) 2165 return { 2166 behavior: 'ask', 2167 message: 2168 'Command contains a quoted newline followed by a #-prefixed line, which can hide arguments from line-based permission checks', 2169 } 2170 } 2171 } 2172 } 2173 2174 return { behavior: 'passthrough', message: 'No quoted newline-hash pattern' } 2175} 2176 2177/** 2178 * Validates that the command doesn't use Zsh-specific dangerous commands that 2179 * can bypass security checks. These commands provide capabilities like loading 2180 * kernel modules, raw file I/O, network access, and pseudo-terminal execution 2181 * that circumvent normal permission checks. 2182 * 2183 * Also catches `fc -e` which can execute arbitrary editors on command history, 2184 * and `emulate` which with `-c` is an eval-equivalent. 2185 */ 2186function validateZshDangerousCommands( 2187 context: ValidationContext, 2188): PermissionResult { 2189 const { originalCommand } = context 2190 2191 // Extract the base command from the original command, stripping leading 2192 // whitespace, env var assignments, and Zsh precommand modifiers. 2193 // e.g., "FOO=bar command builtin zmodload" -> "zmodload" 2194 const ZSH_PRECOMMAND_MODIFIERS = new Set([ 2195 'command', 2196 'builtin', 2197 'noglob', 2198 'nocorrect', 2199 ]) 2200 const trimmed = originalCommand.trim() 2201 const tokens = trimmed.split(/\s+/) 2202 let baseCmd = '' 2203 for (const token of tokens) { 2204 // Skip env var assignments (VAR=value) 2205 if (/^[A-Za-z_]\w*=/.test(token)) continue 2206 // Skip Zsh precommand modifiers (they don't change what command runs) 2207 if (ZSH_PRECOMMAND_MODIFIERS.has(token)) continue 2208 baseCmd = token 2209 break 2210 } 2211 2212 if (ZSH_DANGEROUS_COMMANDS.has(baseCmd)) { 2213 logEvent('tengu_bash_security_check_triggered', { 2214 checkId: BASH_SECURITY_CHECK_IDS.ZSH_DANGEROUS_COMMANDS, 2215 subId: 1, 2216 }) 2217 return { 2218 behavior: 'ask', 2219 message: `Command uses Zsh-specific '${baseCmd}' which can bypass security checks`, 2220 } 2221 } 2222 2223 // Check for `fc -e` which allows executing arbitrary commands via editor 2224 // fc without -e is safe (just lists history), but -e specifies an editor 2225 // to run on the command, effectively an eval 2226 if (baseCmd === 'fc' && /\s-\S*e/.test(trimmed)) { 2227 logEvent('tengu_bash_security_check_triggered', { 2228 checkId: BASH_SECURITY_CHECK_IDS.ZSH_DANGEROUS_COMMANDS, 2229 subId: 2, 2230 }) 2231 return { 2232 behavior: 'ask', 2233 message: 2234 "Command uses 'fc -e' which can execute arbitrary commands via editor", 2235 } 2236 } 2237 2238 return { 2239 behavior: 'passthrough', 2240 message: 'No Zsh dangerous commands', 2241 } 2242} 2243 2244// Matches non-printable control characters that have no legitimate use in shell 2245// commands: 0x00-0x08, 0x0B-0x0C, 0x0E-0x1F, 0x7F. Excludes tab (0x09), 2246// newline (0x0A), and carriage return (0x0D) which are handled by other 2247// validators. Bash silently drops null bytes and ignores most control chars, 2248// so an attacker can use them to slip metacharacters past our checks while 2249// bash still executes them (e.g., "echo safe\x00; rm -rf /"). 2250// eslint-disable-next-line no-control-regex 2251const CONTROL_CHAR_RE = /[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/ 2252 2253/** 2254 * @deprecated Legacy regex/shell-quote path. Only used when tree-sitter is 2255 * unavailable. The primary gate is parseForSecurity (ast.ts). 2256 */ 2257export function bashCommandIsSafe_DEPRECATED( 2258 command: string, 2259): PermissionResult { 2260 // SECURITY: Block control characters before any other processing. Null bytes 2261 // and other non-printable chars are silently dropped by bash but confuse our 2262 // validators, allowing metacharacters adjacent to them to slip through. 2263 if (CONTROL_CHAR_RE.test(command)) { 2264 logEvent('tengu_bash_security_check_triggered', { 2265 checkId: BASH_SECURITY_CHECK_IDS.CONTROL_CHARACTERS, 2266 }) 2267 return { 2268 behavior: 'ask', 2269 message: 2270 'Command contains non-printable control characters that could be used to bypass security checks', 2271 isBashSecurityCheckForMisparsing: true, 2272 } 2273 } 2274 2275 // SECURITY: Detect '\' patterns that exploit shell-quote's incorrect handling 2276 // of backslashes inside single quotes. Must run before shell-quote parsing. 2277 if (hasShellQuoteSingleQuoteBug(command)) { 2278 return { 2279 behavior: 'ask', 2280 message: 2281 'Command contains single-quoted backslash pattern that could bypass security checks', 2282 isBashSecurityCheckForMisparsing: true, 2283 } 2284 } 2285 2286 // SECURITY: Strip heredoc bodies before running security validators. 2287 // Only strip bodies for quoted/escaped delimiters (<<'EOF', <<\EOF) where 2288 // the body is literal text — $(), backticks, and ${} are NOT expanded. 2289 // Unquoted heredocs (<<EOF) undergo full shell expansion, so their bodies 2290 // may contain executable command substitutions that validators must see. 2291 // When extractHeredocs bails out (can't parse safely), the raw command 2292 // goes through all validators — which is the safe direction. 2293 const { processedCommand } = extractHeredocs(command, { quotedOnly: true }) 2294 2295 const baseCommand = command.split(' ')[0] || '' 2296 const { withDoubleQuotes, fullyUnquoted, unquotedKeepQuoteChars } = 2297 extractQuotedContent(processedCommand, baseCommand === 'jq') 2298 2299 const context: ValidationContext = { 2300 originalCommand: command, 2301 baseCommand, 2302 unquotedContent: withDoubleQuotes, 2303 fullyUnquotedContent: stripSafeRedirections(fullyUnquoted), 2304 fullyUnquotedPreStrip: fullyUnquoted, 2305 unquotedKeepQuoteChars, 2306 } 2307 2308 const earlyValidators = [ 2309 validateEmpty, 2310 validateIncompleteCommands, 2311 validateSafeCommandSubstitution, 2312 validateGitCommit, 2313 ] 2314 2315 for (const validator of earlyValidators) { 2316 const result = validator(context) 2317 if (result.behavior === 'allow') { 2318 return { 2319 behavior: 'passthrough', 2320 message: 2321 result.decisionReason?.type === 'other' || 2322 result.decisionReason?.type === 'safetyCheck' 2323 ? result.decisionReason.reason 2324 : 'Command allowed', 2325 } 2326 } 2327 if (result.behavior !== 'passthrough') { 2328 return result.behavior === 'ask' 2329 ? { ...result, isBashSecurityCheckForMisparsing: true as const } 2330 : result 2331 } 2332 } 2333 2334 // Validators that don't set isBashSecurityCheckForMisparsing — their ask 2335 // results go through the standard permission flow rather than being blocked 2336 // early. LF newlines and redirections are normal patterns that splitCommand 2337 // handles correctly, not misparsing concerns. 2338 // 2339 // NOTE: validateCarriageReturn is NOT here — CR IS a misparsing concern. 2340 // shell-quote's `[^\s]` treats CR as a word separator (JS `\s` ⊃ \r), but 2341 // bash IFS does NOT include CR. splitCommand collapses CR→space, which IS 2342 // misparsing. See validateCarriageReturn for the full attack trace. 2343 const nonMisparsingValidators = new Set([ 2344 validateNewlines, 2345 validateRedirections, 2346 ]) 2347 2348 const validators = [ 2349 validateJqCommand, 2350 validateObfuscatedFlags, 2351 validateShellMetacharacters, 2352 validateDangerousVariables, 2353 // Run comment-quote-desync BEFORE validateNewlines: it detects cases where 2354 // the quote tracker would miss newlines due to # comment desync. 2355 validateCommentQuoteDesync, 2356 // Run quoted-newline BEFORE validateNewlines: it detects the INVERSE case 2357 // (newlines INSIDE quotes, which validateNewlines ignores by design). Quoted 2358 // newlines let attackers split commands across lines so that line-based 2359 // processing (stripCommentLines) drops sensitive content. 2360 validateQuotedNewline, 2361 // CR check runs BEFORE validateNewlines — CR is a MISPARSING concern 2362 // (shell-quote/bash tokenization differential), LF is not. 2363 validateCarriageReturn, 2364 validateNewlines, 2365 validateIFSInjection, 2366 validateProcEnvironAccess, 2367 validateDangerousPatterns, 2368 validateRedirections, 2369 validateBackslashEscapedWhitespace, 2370 validateBackslashEscapedOperators, 2371 validateUnicodeWhitespace, 2372 validateMidWordHash, 2373 validateBraceExpansion, 2374 validateZshDangerousCommands, 2375 // Run malformed token check last - other validators should catch specific patterns first 2376 // (e.g., $() substitution, backticks, etc.) since they have more precise error messages 2377 validateMalformedTokenInjection, 2378 ] 2379 2380 // SECURITY: We must NOT short-circuit when a non-misparsing validator 2381 // returns 'ask' if there are still misparsing validators later in the list. 2382 // Non-misparsing ask results are discarded at bashPermissions.ts:~1301-1303 2383 // (the gate only blocks when isBashSecurityCheckForMisparsing is set). If 2384 // validateRedirections (index 10, non-misparsing) fires first on `>`, it 2385 // returns ask-without-flag — but validateBackslashEscapedOperators (index 12, 2386 // misparsing) would have caught `\;` WITH the flag. Short-circuiting lets a 2387 // payload like `cat safe.txt \; echo /etc/passwd > ./out` slip through. 2388 // 2389 // Fix: defer non-misparsing ask results. Continue running validators; if any 2390 // misparsing validator fires, return THAT (with the flag). Only if we reach 2391 // the end without a misparsing ask, return the deferred non-misparsing ask. 2392 let deferredNonMisparsingResult: PermissionResult | null = null 2393 for (const validator of validators) { 2394 const result = validator(context) 2395 if (result.behavior === 'ask') { 2396 if (nonMisparsingValidators.has(validator)) { 2397 if (deferredNonMisparsingResult === null) { 2398 deferredNonMisparsingResult = result 2399 } 2400 continue 2401 } 2402 return { ...result, isBashSecurityCheckForMisparsing: true as const } 2403 } 2404 } 2405 if (deferredNonMisparsingResult !== null) { 2406 return deferredNonMisparsingResult 2407 } 2408 2409 return { 2410 behavior: 'passthrough', 2411 message: 'Command passed all security checks', 2412 } 2413} 2414 2415/** 2416 * @deprecated Legacy regex/shell-quote path. Only used when tree-sitter is 2417 * unavailable. The primary gate is parseForSecurity (ast.ts). 2418 * 2419 * Async version of bashCommandIsSafe that uses tree-sitter when available 2420 * for more accurate parsing. Falls back to the sync regex version when 2421 * tree-sitter is not available. 2422 * 2423 * This should be used by async callers (bashPermissions.ts, bashCommandHelpers.ts). 2424 * Sync callers (readOnlyValidation.ts) should continue using bashCommandIsSafe(). 2425 */ 2426export async function bashCommandIsSafeAsync_DEPRECATED( 2427 command: string, 2428 onDivergence?: () => void, 2429): Promise<PermissionResult> { 2430 // Try to get tree-sitter analysis 2431 const parsed = await ParsedCommand.parse(command) 2432 const tsAnalysis = parsed?.getTreeSitterAnalysis() ?? null 2433 2434 // If no tree-sitter, fall back to sync version 2435 if (!tsAnalysis) { 2436 return bashCommandIsSafe_DEPRECATED(command) 2437 } 2438 2439 // Run the same security checks but with tree-sitter enriched context. 2440 // The early checks (control chars, shell-quote bug) don't benefit from 2441 // tree-sitter, so we run them identically. 2442 if (CONTROL_CHAR_RE.test(command)) { 2443 logEvent('tengu_bash_security_check_triggered', { 2444 checkId: BASH_SECURITY_CHECK_IDS.CONTROL_CHARACTERS, 2445 }) 2446 return { 2447 behavior: 'ask', 2448 message: 2449 'Command contains non-printable control characters that could be used to bypass security checks', 2450 isBashSecurityCheckForMisparsing: true, 2451 } 2452 } 2453 2454 if (hasShellQuoteSingleQuoteBug(command)) { 2455 return { 2456 behavior: 'ask', 2457 message: 2458 'Command contains single-quoted backslash pattern that could bypass security checks', 2459 isBashSecurityCheckForMisparsing: true, 2460 } 2461 } 2462 2463 const { processedCommand } = extractHeredocs(command, { quotedOnly: true }) 2464 2465 const baseCommand = command.split(' ')[0] || '' 2466 2467 // Use tree-sitter quote context for more accurate analysis 2468 const tsQuote = tsAnalysis.quoteContext 2469 const regexQuote = extractQuotedContent( 2470 processedCommand, 2471 baseCommand === 'jq', 2472 ) 2473 2474 // Use tree-sitter quote context as primary, but keep regex as reference 2475 // for divergence logging 2476 const withDoubleQuotes = tsQuote.withDoubleQuotes 2477 const fullyUnquoted = tsQuote.fullyUnquoted 2478 const unquotedKeepQuoteChars = tsQuote.unquotedKeepQuoteChars 2479 2480 const context: ValidationContext = { 2481 originalCommand: command, 2482 baseCommand, 2483 unquotedContent: withDoubleQuotes, 2484 fullyUnquotedContent: stripSafeRedirections(fullyUnquoted), 2485 fullyUnquotedPreStrip: fullyUnquoted, 2486 unquotedKeepQuoteChars, 2487 treeSitter: tsAnalysis, 2488 } 2489 2490 // Log divergence between tree-sitter and regex quote extraction. 2491 // Skip for heredoc commands: tree-sitter strips (quoted) heredoc bodies 2492 // to nothing while the regex path replaces them with placeholder strings 2493 // (via extractHeredocs), so the two outputs can never match. Logging 2494 // divergence for every heredoc command would poison the signal. 2495 // 2496 // onDivergence callback: when called in a fanout loop (bashPermissions.ts 2497 // Promise.all over subcommands), the caller batches divergences into a 2498 // single logEvent instead of N separate calls. Each logEvent triggers 2499 // getEventMetadata() → buildProcessMetrics() → process.memoryUsage() → 2500 // /proc/self/stat read; with memoized metadata these resolve as microtasks 2501 // and starve the event loop (CC-643). Single-command callers omit the 2502 // callback and get the original per-call logEvent behavior. 2503 if (!tsAnalysis.dangerousPatterns.hasHeredoc) { 2504 const hasDivergence = 2505 tsQuote.fullyUnquoted !== regexQuote.fullyUnquoted || 2506 tsQuote.withDoubleQuotes !== regexQuote.withDoubleQuotes 2507 if (hasDivergence) { 2508 if (onDivergence) { 2509 onDivergence() 2510 } else { 2511 logEvent('tengu_tree_sitter_security_divergence', { 2512 quoteContextDivergence: true, 2513 }) 2514 } 2515 } 2516 } 2517 2518 const earlyValidators = [ 2519 validateEmpty, 2520 validateIncompleteCommands, 2521 validateSafeCommandSubstitution, 2522 validateGitCommit, 2523 ] 2524 2525 for (const validator of earlyValidators) { 2526 const result = validator(context) 2527 if (result.behavior === 'allow') { 2528 return { 2529 behavior: 'passthrough', 2530 message: 2531 result.decisionReason?.type === 'other' || 2532 result.decisionReason?.type === 'safetyCheck' 2533 ? result.decisionReason.reason 2534 : 'Command allowed', 2535 } 2536 } 2537 if (result.behavior !== 'passthrough') { 2538 return result.behavior === 'ask' 2539 ? { ...result, isBashSecurityCheckForMisparsing: true as const } 2540 : result 2541 } 2542 } 2543 2544 const nonMisparsingValidators = new Set([ 2545 validateNewlines, 2546 validateRedirections, 2547 ]) 2548 2549 const validators = [ 2550 validateJqCommand, 2551 validateObfuscatedFlags, 2552 validateShellMetacharacters, 2553 validateDangerousVariables, 2554 validateCommentQuoteDesync, 2555 validateQuotedNewline, 2556 validateCarriageReturn, 2557 validateNewlines, 2558 validateIFSInjection, 2559 validateProcEnvironAccess, 2560 validateDangerousPatterns, 2561 validateRedirections, 2562 validateBackslashEscapedWhitespace, 2563 validateBackslashEscapedOperators, 2564 validateUnicodeWhitespace, 2565 validateMidWordHash, 2566 validateBraceExpansion, 2567 validateZshDangerousCommands, 2568 validateMalformedTokenInjection, 2569 ] 2570 2571 let deferredNonMisparsingResult: PermissionResult | null = null 2572 for (const validator of validators) { 2573 const result = validator(context) 2574 if (result.behavior === 'ask') { 2575 if (nonMisparsingValidators.has(validator)) { 2576 if (deferredNonMisparsingResult === null) { 2577 deferredNonMisparsingResult = result 2578 } 2579 continue 2580 } 2581 return { ...result, isBashSecurityCheckForMisparsing: true as const } 2582 } 2583 } 2584 if (deferredNonMisparsingResult !== null) { 2585 return deferredNonMisparsingResult 2586 } 2587 2588 return { 2589 behavior: 'passthrough', 2590 message: 'Command passed all security checks', 2591 } 2592}