source dump of claude code
at main 4436 lines 131 kB view raw
1/** 2 * Pure-TypeScript bash parser producing tree-sitter-bash-compatible ASTs. 3 * 4 * Downstream code in parser.ts, ast.ts, prefix.ts, ParsedCommand.ts walks this 5 * by field name. startIndex/endIndex are UTF-8 BYTE offsets (not JS string 6 * indices). 7 * 8 * Grammar reference: tree-sitter-bash. Validated against a 3449-input golden 9 * corpus generated from the WASM parser. 10 */ 11 12export type TsNode = { 13 type: string 14 text: string 15 startIndex: number 16 endIndex: number 17 children: TsNode[] 18} 19 20type ParserModule = { 21 parse: (source: string, timeoutMs?: number) => TsNode | null 22} 23 24/** 25 * 50ms wall-clock cap — bails out on pathological/adversarial input. 26 * Pass `Infinity` via `parse(src, Infinity)` to disable (e.g. correctness 27 * tests, where CI jitter would otherwise cause spurious null returns). 28 */ 29const PARSE_TIMEOUT_MS = 50 30 31/** Node budget cap — bails out before OOM on deeply nested input. */ 32const MAX_NODES = 50_000 33 34const MODULE: ParserModule = { parse: parseSource } 35 36const READY = Promise.resolve() 37 38/** No-op: pure-TS parser needs no async init. Kept for API compatibility. */ 39export function ensureParserInitialized(): Promise<void> { 40 return READY 41} 42 43/** Always succeeds — pure-TS needs no init. */ 44export function getParserModule(): ParserModule | null { 45 return MODULE 46} 47 48// ───────────────────────────── Tokenizer ───────────────────────────── 49 50type TokenType = 51 | 'WORD' 52 | 'NUMBER' 53 | 'OP' 54 | 'NEWLINE' 55 | 'COMMENT' 56 | 'DQUOTE' 57 | 'SQUOTE' 58 | 'ANSI_C' 59 | 'DOLLAR' 60 | 'DOLLAR_PAREN' 61 | 'DOLLAR_BRACE' 62 | 'DOLLAR_DPAREN' 63 | 'BACKTICK' 64 | 'LT_PAREN' 65 | 'GT_PAREN' 66 | 'EOF' 67 68type Token = { 69 type: TokenType 70 value: string 71 /** UTF-8 byte offset of first char */ 72 start: number 73 /** UTF-8 byte offset one past last char */ 74 end: number 75} 76 77const SPECIAL_VARS = new Set(['?', '$', '@', '*', '#', '-', '!', '_']) 78 79const DECL_KEYWORDS = new Set([ 80 'export', 81 'declare', 82 'typeset', 83 'readonly', 84 'local', 85]) 86 87export const SHELL_KEYWORDS = new Set([ 88 'if', 89 'then', 90 'elif', 91 'else', 92 'fi', 93 'while', 94 'until', 95 'for', 96 'in', 97 'do', 98 'done', 99 'case', 100 'esac', 101 'function', 102 'select', 103]) 104 105/** 106 * Lexer state. Tracks both JS-string index (for charAt) and UTF-8 byte offset 107 * (for TsNode positions). ASCII fast path: byte == char index. Non-ASCII 108 * advances byte count per-codepoint. 109 */ 110type Lexer = { 111 src: string 112 len: number 113 /** JS string index */ 114 i: number 115 /** UTF-8 byte offset */ 116 b: number 117 /** Pending heredoc delimiters awaiting body scan at next newline */ 118 heredocs: HeredocPending[] 119 /** Precomputed byte offset for each char index (lazy for non-ASCII) */ 120 byteTable: Uint32Array | null 121} 122 123type HeredocPending = { 124 delim: string 125 stripTabs: boolean 126 quoted: boolean 127 /** Filled after body scan */ 128 bodyStart: number 129 bodyEnd: number 130 endStart: number 131 endEnd: number 132} 133 134function makeLexer(src: string): Lexer { 135 return { 136 src, 137 len: src.length, 138 i: 0, 139 b: 0, 140 heredocs: [], 141 byteTable: null, 142 } 143} 144 145/** Advance one JS char, updating byte offset for UTF-8. */ 146function advance(L: Lexer): void { 147 const c = L.src.charCodeAt(L.i) 148 L.i++ 149 if (c < 0x80) { 150 L.b++ 151 } else if (c < 0x800) { 152 L.b += 2 153 } else if (c >= 0xd800 && c <= 0xdbff) { 154 // High surrogate — next char completes the pair, total 4 UTF-8 bytes 155 L.b += 4 156 L.i++ 157 } else { 158 L.b += 3 159 } 160} 161 162function peek(L: Lexer, off = 0): string { 163 return L.i + off < L.len ? L.src[L.i + off]! : '' 164} 165 166function byteAt(L: Lexer, charIdx: number): number { 167 // Fast path: ASCII-only prefix means char idx == byte idx 168 if (L.byteTable) return L.byteTable[charIdx]! 169 // Build table on first non-trivial lookup 170 const t = new Uint32Array(L.len + 1) 171 let b = 0 172 let i = 0 173 while (i < L.len) { 174 t[i] = b 175 const c = L.src.charCodeAt(i) 176 if (c < 0x80) { 177 b++ 178 i++ 179 } else if (c < 0x800) { 180 b += 2 181 i++ 182 } else if (c >= 0xd800 && c <= 0xdbff) { 183 t[i + 1] = b + 2 184 b += 4 185 i += 2 186 } else { 187 b += 3 188 i++ 189 } 190 } 191 t[L.len] = b 192 L.byteTable = t 193 return t[charIdx]! 194} 195 196function isWordChar(c: string): boolean { 197 // Bash word chars: alphanumeric + various punctuation that doesn't start operators 198 return ( 199 (c >= 'a' && c <= 'z') || 200 (c >= 'A' && c <= 'Z') || 201 (c >= '0' && c <= '9') || 202 c === '_' || 203 c === '/' || 204 c === '.' || 205 c === '-' || 206 c === '+' || 207 c === ':' || 208 c === '@' || 209 c === '%' || 210 c === ',' || 211 c === '~' || 212 c === '^' || 213 c === '?' || 214 c === '*' || 215 c === '!' || 216 c === '=' || 217 c === '[' || 218 c === ']' 219 ) 220} 221 222function isWordStart(c: string): boolean { 223 return isWordChar(c) || c === '\\' 224} 225 226function isIdentStart(c: string): boolean { 227 return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c === '_' 228} 229 230function isIdentChar(c: string): boolean { 231 return isIdentStart(c) || (c >= '0' && c <= '9') 232} 233 234function isDigit(c: string): boolean { 235 return c >= '0' && c <= '9' 236} 237 238function isHexDigit(c: string): boolean { 239 return isDigit(c) || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F') 240} 241 242function isBaseDigit(c: string): boolean { 243 // Bash BASE#DIGITS: digits, letters, @ and _ (up to base 64) 244 return isIdentChar(c) || c === '@' 245} 246 247/** 248 * Unquoted heredoc delimiter chars. Bash accepts most non-metacharacters — 249 * not just identifiers. Stop at whitespace, redirects, pipe/list operators, 250 * and structural tokens. Allows !, -, ., +, etc. (e.g. <<!HEREDOC!). 251 */ 252function isHeredocDelimChar(c: string): boolean { 253 return ( 254 c !== '' && 255 c !== ' ' && 256 c !== '\t' && 257 c !== '\n' && 258 c !== '<' && 259 c !== '>' && 260 c !== '|' && 261 c !== '&' && 262 c !== ';' && 263 c !== '(' && 264 c !== ')' && 265 c !== "'" && 266 c !== '"' && 267 c !== '`' && 268 c !== '\\' 269 ) 270} 271 272function skipBlanks(L: Lexer): void { 273 while (L.i < L.len) { 274 const c = L.src[L.i]! 275 if (c === ' ' || c === '\t' || c === '\r') { 276 // \r is whitespace per tree-sitter-bash extras /\s/ — handles CRLF inputs 277 advance(L) 278 } else if (c === '\\') { 279 const nx = L.src[L.i + 1] 280 if (nx === '\n' || (nx === '\r' && L.src[L.i + 2] === '\n')) { 281 // Line continuation — tree-sitter extras: /\\\r?\n/ 282 advance(L) 283 advance(L) 284 if (nx === '\r') advance(L) 285 } else if (nx === ' ' || nx === '\t') { 286 // \<space> or \<tab> — tree-sitter's _whitespace is /\\?[ \t\v]+/ 287 advance(L) 288 advance(L) 289 } else { 290 break 291 } 292 } else { 293 break 294 } 295 } 296} 297 298/** 299 * Scan next token. Context-sensitive: `cmd` mode treats [ as operator (test 300 * command start), `arg` mode treats [ as word char (glob/subscript). 301 */ 302function nextToken(L: Lexer, ctx: 'cmd' | 'arg' = 'arg'): Token { 303 skipBlanks(L) 304 const start = L.b 305 if (L.i >= L.len) return { type: 'EOF', value: '', start, end: start } 306 307 const c = L.src[L.i]! 308 const c1 = peek(L, 1) 309 const c2 = peek(L, 2) 310 311 if (c === '\n') { 312 advance(L) 313 return { type: 'NEWLINE', value: '\n', start, end: L.b } 314 } 315 316 if (c === '#') { 317 const si = L.i 318 while (L.i < L.len && L.src[L.i] !== '\n') advance(L) 319 return { 320 type: 'COMMENT', 321 value: L.src.slice(si, L.i), 322 start, 323 end: L.b, 324 } 325 } 326 327 // Multi-char operators (longest match first) 328 if (c === '&' && c1 === '&') { 329 advance(L) 330 advance(L) 331 return { type: 'OP', value: '&&', start, end: L.b } 332 } 333 if (c === '|' && c1 === '|') { 334 advance(L) 335 advance(L) 336 return { type: 'OP', value: '||', start, end: L.b } 337 } 338 if (c === '|' && c1 === '&') { 339 advance(L) 340 advance(L) 341 return { type: 'OP', value: '|&', start, end: L.b } 342 } 343 if (c === ';' && c1 === ';' && c2 === '&') { 344 advance(L) 345 advance(L) 346 advance(L) 347 return { type: 'OP', value: ';;&', start, end: L.b } 348 } 349 if (c === ';' && c1 === ';') { 350 advance(L) 351 advance(L) 352 return { type: 'OP', value: ';;', start, end: L.b } 353 } 354 if (c === ';' && c1 === '&') { 355 advance(L) 356 advance(L) 357 return { type: 'OP', value: ';&', start, end: L.b } 358 } 359 if (c === '>' && c1 === '>') { 360 advance(L) 361 advance(L) 362 return { type: 'OP', value: '>>', start, end: L.b } 363 } 364 if (c === '>' && c1 === '&' && c2 === '-') { 365 advance(L) 366 advance(L) 367 advance(L) 368 return { type: 'OP', value: '>&-', start, end: L.b } 369 } 370 if (c === '>' && c1 === '&') { 371 advance(L) 372 advance(L) 373 return { type: 'OP', value: '>&', start, end: L.b } 374 } 375 if (c === '>' && c1 === '|') { 376 advance(L) 377 advance(L) 378 return { type: 'OP', value: '>|', start, end: L.b } 379 } 380 if (c === '&' && c1 === '>' && c2 === '>') { 381 advance(L) 382 advance(L) 383 advance(L) 384 return { type: 'OP', value: '&>>', start, end: L.b } 385 } 386 if (c === '&' && c1 === '>') { 387 advance(L) 388 advance(L) 389 return { type: 'OP', value: '&>', start, end: L.b } 390 } 391 if (c === '<' && c1 === '<' && c2 === '<') { 392 advance(L) 393 advance(L) 394 advance(L) 395 return { type: 'OP', value: '<<<', start, end: L.b } 396 } 397 if (c === '<' && c1 === '<' && c2 === '-') { 398 advance(L) 399 advance(L) 400 advance(L) 401 return { type: 'OP', value: '<<-', start, end: L.b } 402 } 403 if (c === '<' && c1 === '<') { 404 advance(L) 405 advance(L) 406 return { type: 'OP', value: '<<', start, end: L.b } 407 } 408 if (c === '<' && c1 === '&' && c2 === '-') { 409 advance(L) 410 advance(L) 411 advance(L) 412 return { type: 'OP', value: '<&-', start, end: L.b } 413 } 414 if (c === '<' && c1 === '&') { 415 advance(L) 416 advance(L) 417 return { type: 'OP', value: '<&', start, end: L.b } 418 } 419 if (c === '<' && c1 === '(') { 420 advance(L) 421 advance(L) 422 return { type: 'LT_PAREN', value: '<(', start, end: L.b } 423 } 424 if (c === '>' && c1 === '(') { 425 advance(L) 426 advance(L) 427 return { type: 'GT_PAREN', value: '>(', start, end: L.b } 428 } 429 if (c === '(' && c1 === '(') { 430 advance(L) 431 advance(L) 432 return { type: 'OP', value: '((', start, end: L.b } 433 } 434 if (c === ')' && c1 === ')') { 435 advance(L) 436 advance(L) 437 return { type: 'OP', value: '))', start, end: L.b } 438 } 439 440 if (c === '|' || c === '&' || c === ';' || c === '>' || c === '<') { 441 advance(L) 442 return { type: 'OP', value: c, start, end: L.b } 443 } 444 if (c === '(' || c === ')') { 445 advance(L) 446 return { type: 'OP', value: c, start, end: L.b } 447 } 448 449 // In cmd position, [ [[ { start test/group; in arg position they're word chars 450 if (ctx === 'cmd') { 451 if (c === '[' && c1 === '[') { 452 advance(L) 453 advance(L) 454 return { type: 'OP', value: '[[', start, end: L.b } 455 } 456 if (c === '[') { 457 advance(L) 458 return { type: 'OP', value: '[', start, end: L.b } 459 } 460 if (c === '{' && (c1 === ' ' || c1 === '\t' || c1 === '\n')) { 461 advance(L) 462 return { type: 'OP', value: '{', start, end: L.b } 463 } 464 if (c === '}') { 465 advance(L) 466 return { type: 'OP', value: '}', start, end: L.b } 467 } 468 if (c === '!' && (c1 === ' ' || c1 === '\t')) { 469 advance(L) 470 return { type: 'OP', value: '!', start, end: L.b } 471 } 472 } 473 474 if (c === '"') { 475 advance(L) 476 return { type: 'DQUOTE', value: '"', start, end: L.b } 477 } 478 if (c === "'") { 479 const si = L.i 480 advance(L) 481 while (L.i < L.len && L.src[L.i] !== "'") advance(L) 482 if (L.i < L.len) advance(L) 483 return { 484 type: 'SQUOTE', 485 value: L.src.slice(si, L.i), 486 start, 487 end: L.b, 488 } 489 } 490 491 if (c === '$') { 492 if (c1 === '(' && c2 === '(') { 493 advance(L) 494 advance(L) 495 advance(L) 496 return { type: 'DOLLAR_DPAREN', value: '$((', start, end: L.b } 497 } 498 if (c1 === '(') { 499 advance(L) 500 advance(L) 501 return { type: 'DOLLAR_PAREN', value: '$(', start, end: L.b } 502 } 503 if (c1 === '{') { 504 advance(L) 505 advance(L) 506 return { type: 'DOLLAR_BRACE', value: '${', start, end: L.b } 507 } 508 if (c1 === "'") { 509 // ANSI-C string $'...' 510 const si = L.i 511 advance(L) 512 advance(L) 513 while (L.i < L.len && L.src[L.i] !== "'") { 514 if (L.src[L.i] === '\\' && L.i + 1 < L.len) advance(L) 515 advance(L) 516 } 517 if (L.i < L.len) advance(L) 518 return { 519 type: 'ANSI_C', 520 value: L.src.slice(si, L.i), 521 start, 522 end: L.b, 523 } 524 } 525 advance(L) 526 return { type: 'DOLLAR', value: '$', start, end: L.b } 527 } 528 529 if (c === '`') { 530 advance(L) 531 return { type: 'BACKTICK', value: '`', start, end: L.b } 532 } 533 534 // File descriptor before redirect: digit+ immediately followed by > or < 535 if (isDigit(c)) { 536 let j = L.i 537 while (j < L.len && isDigit(L.src[j]!)) j++ 538 const after = j < L.len ? L.src[j]! : '' 539 if (after === '>' || after === '<') { 540 const si = L.i 541 while (L.i < j) advance(L) 542 return { 543 type: 'WORD', 544 value: L.src.slice(si, L.i), 545 start, 546 end: L.b, 547 } 548 } 549 } 550 551 // Word / number 552 if (isWordStart(c) || c === '{' || c === '}') { 553 const si = L.i 554 while (L.i < L.len) { 555 const ch = L.src[L.i]! 556 if (ch === '\\') { 557 if (L.i + 1 >= L.len) { 558 // Trailing `\` at EOF — tree-sitter excludes it from the word and 559 // emits a sibling ERROR. Stop here so the word ends before `\`. 560 break 561 } 562 // Escape next char (including \n for line continuation mid-word) 563 if (L.src[L.i + 1] === '\n') { 564 advance(L) 565 advance(L) 566 continue 567 } 568 advance(L) 569 advance(L) 570 continue 571 } 572 if (!isWordChar(ch) && ch !== '{' && ch !== '}') { 573 break 574 } 575 advance(L) 576 } 577 if (L.i > si) { 578 const v = L.src.slice(si, L.i) 579 // Number: optional sign then digits only 580 if (/^-?\d+$/.test(v)) { 581 return { type: 'NUMBER', value: v, start, end: L.b } 582 } 583 return { type: 'WORD', value: v, start, end: L.b } 584 } 585 // Empty word (lone `\` at EOF) — fall through to single-char consumer 586 } 587 588 // Unknown char — consume as single-char word 589 advance(L) 590 return { type: 'WORD', value: c, start, end: L.b } 591} 592 593// ───────────────────────────── Parser ───────────────────────────── 594 595type ParseState = { 596 L: Lexer 597 src: string 598 srcBytes: number 599 /** True when byte offsets == char indices (no multi-byte UTF-8) */ 600 isAscii: boolean 601 nodeCount: number 602 deadline: number 603 aborted: boolean 604 /** Depth of backtick nesting — inside `...`, ` terminates words */ 605 inBacktick: number 606 /** When set, parseSimpleCommand stops at this token (for `[` backtrack) */ 607 stopToken: string | null 608} 609 610function parseSource(source: string, timeoutMs?: number): TsNode | null { 611 const L = makeLexer(source) 612 const srcBytes = byteLengthUtf8(source) 613 const P: ParseState = { 614 L, 615 src: source, 616 srcBytes, 617 isAscii: srcBytes === source.length, 618 nodeCount: 0, 619 deadline: performance.now() + (timeoutMs ?? PARSE_TIMEOUT_MS), 620 aborted: false, 621 inBacktick: 0, 622 stopToken: null, 623 } 624 try { 625 const program = parseProgram(P) 626 if (P.aborted) return null 627 return program 628 } catch { 629 return null 630 } 631} 632 633function byteLengthUtf8(s: string): number { 634 let b = 0 635 for (let i = 0; i < s.length; i++) { 636 const c = s.charCodeAt(i) 637 if (c < 0x80) b++ 638 else if (c < 0x800) b += 2 639 else if (c >= 0xd800 && c <= 0xdbff) { 640 b += 4 641 i++ 642 } else b += 3 643 } 644 return b 645} 646 647function checkBudget(P: ParseState): void { 648 P.nodeCount++ 649 if (P.nodeCount > MAX_NODES) { 650 P.aborted = true 651 throw new Error('budget') 652 } 653 if ((P.nodeCount & 0x7f) === 0 && performance.now() > P.deadline) { 654 P.aborted = true 655 throw new Error('timeout') 656 } 657} 658 659/** Build a node. Slices text from source by byte range via char-index lookup. */ 660function mk( 661 P: ParseState, 662 type: string, 663 start: number, 664 end: number, 665 children: TsNode[], 666): TsNode { 667 checkBudget(P) 668 return { 669 type, 670 text: sliceBytes(P, start, end), 671 startIndex: start, 672 endIndex: end, 673 children, 674 } 675} 676 677function sliceBytes(P: ParseState, startByte: number, endByte: number): string { 678 if (P.isAscii) return P.src.slice(startByte, endByte) 679 // Find char indices for byte offsets. Build byte table if needed. 680 const L = P.L 681 if (!L.byteTable) byteAt(L, 0) 682 const t = L.byteTable! 683 // Binary search for char index where byte offset matches 684 let lo = 0 685 let hi = P.src.length 686 while (lo < hi) { 687 const m = (lo + hi) >>> 1 688 if (t[m]! < startByte) lo = m + 1 689 else hi = m 690 } 691 const sc = lo 692 lo = sc 693 hi = P.src.length 694 while (lo < hi) { 695 const m = (lo + hi) >>> 1 696 if (t[m]! < endByte) lo = m + 1 697 else hi = m 698 } 699 return P.src.slice(sc, lo) 700} 701 702function leaf(P: ParseState, type: string, tok: Token): TsNode { 703 return mk(P, type, tok.start, tok.end, []) 704} 705 706function parseProgram(P: ParseState): TsNode { 707 const children: TsNode[] = [] 708 // Skip leading whitespace & newlines — program start is first content byte 709 skipBlanks(P.L) 710 while (true) { 711 const save = saveLex(P.L) 712 const t = nextToken(P.L, 'cmd') 713 if (t.type === 'NEWLINE') { 714 skipBlanks(P.L) 715 continue 716 } 717 restoreLex(P.L, save) 718 break 719 } 720 const progStart = P.L.b 721 while (P.L.i < P.L.len) { 722 const save = saveLex(P.L) 723 const t = nextToken(P.L, 'cmd') 724 if (t.type === 'EOF') break 725 if (t.type === 'NEWLINE') continue 726 if (t.type === 'COMMENT') { 727 children.push(leaf(P, 'comment', t)) 728 continue 729 } 730 restoreLex(P.L, save) 731 const stmts = parseStatements(P, null) 732 for (const s of stmts) children.push(s) 733 if (stmts.length === 0) { 734 // Couldn't parse — emit ERROR and skip one token 735 const errTok = nextToken(P.L, 'cmd') 736 if (errTok.type === 'EOF') break 737 // Stray `;;` at program level (e.g., `var=;;` outside case) — tree-sitter 738 // silently elides. Keep leading `;` as ERROR (security: paste artifact). 739 if ( 740 errTok.type === 'OP' && 741 errTok.value === ';;' && 742 children.length > 0 743 ) { 744 continue 745 } 746 children.push(mk(P, 'ERROR', errTok.start, errTok.end, [])) 747 } 748 } 749 // tree-sitter includes trailing whitespace in program extent 750 const progEnd = children.length > 0 ? P.srcBytes : progStart 751 return mk(P, 'program', progStart, progEnd, children) 752} 753 754/** Packed as (b << 16) | i — avoids heap alloc on every backtrack. */ 755type LexSave = number 756function saveLex(L: Lexer): LexSave { 757 return L.b * 0x10000 + L.i 758} 759function restoreLex(L: Lexer, s: LexSave): void { 760 L.i = s & 0xffff 761 L.b = s >>> 16 762} 763 764/** 765 * Parse a sequence of statements separated by ; & newline. Returns a flat list 766 * where ; and & are sibling leaves (NOT wrapped in 'list' — only && || get 767 * that). Stops at terminator or EOF. 768 */ 769function parseStatements(P: ParseState, terminator: string | null): TsNode[] { 770 const out: TsNode[] = [] 771 while (true) { 772 skipBlanks(P.L) 773 const save = saveLex(P.L) 774 const t = nextToken(P.L, 'cmd') 775 if (t.type === 'EOF') { 776 restoreLex(P.L, save) 777 break 778 } 779 if (t.type === 'NEWLINE') { 780 // Process pending heredocs 781 if (P.L.heredocs.length > 0) { 782 scanHeredocBodies(P) 783 } 784 continue 785 } 786 if (t.type === 'COMMENT') { 787 out.push(leaf(P, 'comment', t)) 788 continue 789 } 790 if (terminator && t.type === 'OP' && t.value === terminator) { 791 restoreLex(P.L, save) 792 break 793 } 794 if ( 795 t.type === 'OP' && 796 (t.value === ')' || 797 t.value === '}' || 798 t.value === ';;' || 799 t.value === ';&' || 800 t.value === ';;&' || 801 t.value === '))' || 802 t.value === ']]' || 803 t.value === ']') 804 ) { 805 restoreLex(P.L, save) 806 break 807 } 808 if (t.type === 'BACKTICK' && P.inBacktick > 0) { 809 restoreLex(P.L, save) 810 break 811 } 812 if ( 813 t.type === 'WORD' && 814 (t.value === 'then' || 815 t.value === 'elif' || 816 t.value === 'else' || 817 t.value === 'fi' || 818 t.value === 'do' || 819 t.value === 'done' || 820 t.value === 'esac') 821 ) { 822 restoreLex(P.L, save) 823 break 824 } 825 restoreLex(P.L, save) 826 const stmt = parseAndOr(P) 827 if (!stmt) break 828 out.push(stmt) 829 // Look for separator 830 skipBlanks(P.L) 831 const save2 = saveLex(P.L) 832 const sep = nextToken(P.L, 'cmd') 833 if (sep.type === 'OP' && (sep.value === ';' || sep.value === '&')) { 834 // Check if terminator follows — if so, emit separator but stop 835 const save3 = saveLex(P.L) 836 const after = nextToken(P.L, 'cmd') 837 restoreLex(P.L, save3) 838 out.push(leaf(P, sep.value, sep)) 839 if ( 840 after.type === 'EOF' || 841 (after.type === 'OP' && 842 (after.value === ')' || 843 after.value === '}' || 844 after.value === ';;' || 845 after.value === ';&' || 846 after.value === ';;&')) || 847 (after.type === 'WORD' && 848 (after.value === 'then' || 849 after.value === 'elif' || 850 after.value === 'else' || 851 after.value === 'fi' || 852 after.value === 'do' || 853 after.value === 'done' || 854 after.value === 'esac')) 855 ) { 856 // Trailing separator — don't include it at program level unless 857 // there's content after. But at inner levels we keep it. 858 continue 859 } 860 } else if (sep.type === 'NEWLINE') { 861 if (P.L.heredocs.length > 0) { 862 scanHeredocBodies(P) 863 } 864 continue 865 } else { 866 restoreLex(P.L, save2) 867 } 868 } 869 // Trim trailing separator if at program level 870 return out 871} 872 873/** 874 * Parse pipeline chains joined by && ||. Left-associative nesting. 875 * tree-sitter quirk: trailing redirect on the last pipeline wraps the ENTIRE 876 * list in a redirected_statement — `a > x && b > y` becomes 877 * redirected_statement(list(redirected_statement(a,>x), &&, b), >y). 878 */ 879function parseAndOr(P: ParseState): TsNode | null { 880 let left = parsePipeline(P) 881 if (!left) return null 882 while (true) { 883 const save = saveLex(P.L) 884 const t = nextToken(P.L, 'cmd') 885 if (t.type === 'OP' && (t.value === '&&' || t.value === '||')) { 886 const op = leaf(P, t.value, t) 887 skipNewlines(P) 888 const right = parsePipeline(P) 889 if (!right) { 890 left = mk(P, 'list', left.startIndex, op.endIndex, [left, op]) 891 break 892 } 893 // If right is a redirected_statement, hoist its redirects to wrap the list. 894 if (right.type === 'redirected_statement' && right.children.length >= 2) { 895 const inner = right.children[0]! 896 const redirs = right.children.slice(1) 897 const listNode = mk(P, 'list', left.startIndex, inner.endIndex, [ 898 left, 899 op, 900 inner, 901 ]) 902 const lastR = redirs[redirs.length - 1]! 903 left = mk( 904 P, 905 'redirected_statement', 906 listNode.startIndex, 907 lastR.endIndex, 908 [listNode, ...redirs], 909 ) 910 } else { 911 left = mk(P, 'list', left.startIndex, right.endIndex, [left, op, right]) 912 } 913 } else { 914 restoreLex(P.L, save) 915 break 916 } 917 } 918 return left 919} 920 921function skipNewlines(P: ParseState): void { 922 while (true) { 923 const save = saveLex(P.L) 924 const t = nextToken(P.L, 'cmd') 925 if (t.type !== 'NEWLINE') { 926 restoreLex(P.L, save) 927 break 928 } 929 } 930} 931 932/** 933 * Parse commands joined by | or |&. Flat children with operator leaves. 934 * tree-sitter quirk: `a | b 2>nul | c` hoists the redirect on `b` to wrap 935 * the preceding pipeline fragment — pipeline(redirected_statement( 936 * pipeline(a,|,b), 2>nul), |, c). 937 */ 938function parsePipeline(P: ParseState): TsNode | null { 939 let first = parseCommand(P) 940 if (!first) return null 941 const parts: TsNode[] = [first] 942 while (true) { 943 const save = saveLex(P.L) 944 const t = nextToken(P.L, 'cmd') 945 if (t.type === 'OP' && (t.value === '|' || t.value === '|&')) { 946 const op = leaf(P, t.value, t) 947 skipNewlines(P) 948 const next = parseCommand(P) 949 if (!next) { 950 parts.push(op) 951 break 952 } 953 // Hoist trailing redirect on `next` to wrap current pipeline fragment 954 if ( 955 next.type === 'redirected_statement' && 956 next.children.length >= 2 && 957 parts.length >= 1 958 ) { 959 const inner = next.children[0]! 960 const redirs = next.children.slice(1) 961 // Wrap existing parts + op + inner as a pipeline 962 const pipeKids = [...parts, op, inner] 963 const pipeNode = mk( 964 P, 965 'pipeline', 966 pipeKids[0]!.startIndex, 967 inner.endIndex, 968 pipeKids, 969 ) 970 const lastR = redirs[redirs.length - 1]! 971 const wrapped = mk( 972 P, 973 'redirected_statement', 974 pipeNode.startIndex, 975 lastR.endIndex, 976 [pipeNode, ...redirs], 977 ) 978 parts.length = 0 979 parts.push(wrapped) 980 first = wrapped 981 continue 982 } 983 parts.push(op, next) 984 } else { 985 restoreLex(P.L, save) 986 break 987 } 988 } 989 if (parts.length === 1) return parts[0]! 990 const last = parts[parts.length - 1]! 991 return mk(P, 'pipeline', parts[0]!.startIndex, last.endIndex, parts) 992} 993 994/** Parse a single command: simple, compound, or control structure. */ 995function parseCommand(P: ParseState): TsNode | null { 996 skipBlanks(P.L) 997 const save = saveLex(P.L) 998 const t = nextToken(P.L, 'cmd') 999 1000 if (t.type === 'EOF') { 1001 restoreLex(P.L, save) 1002 return null 1003 } 1004 1005 // Negation — tree-sitter wraps just the command, redirects go outside. 1006 // `! cmd > out` → redirected_statement(negated_command(!, cmd), >out) 1007 if (t.type === 'OP' && t.value === '!') { 1008 const bang = leaf(P, '!', t) 1009 const inner = parseCommand(P) 1010 if (!inner) { 1011 restoreLex(P.L, save) 1012 return null 1013 } 1014 // If inner is a redirected_statement, hoist redirects outside negation 1015 if (inner.type === 'redirected_statement' && inner.children.length >= 2) { 1016 const cmd = inner.children[0]! 1017 const redirs = inner.children.slice(1) 1018 const neg = mk(P, 'negated_command', bang.startIndex, cmd.endIndex, [ 1019 bang, 1020 cmd, 1021 ]) 1022 const lastR = redirs[redirs.length - 1]! 1023 return mk(P, 'redirected_statement', neg.startIndex, lastR.endIndex, [ 1024 neg, 1025 ...redirs, 1026 ]) 1027 } 1028 return mk(P, 'negated_command', bang.startIndex, inner.endIndex, [ 1029 bang, 1030 inner, 1031 ]) 1032 } 1033 1034 if (t.type === 'OP' && t.value === '(') { 1035 const open = leaf(P, '(', t) 1036 const body = parseStatements(P, ')') 1037 const closeTok = nextToken(P.L, 'cmd') 1038 const close = 1039 closeTok.type === 'OP' && closeTok.value === ')' 1040 ? leaf(P, ')', closeTok) 1041 : mk(P, ')', open.endIndex, open.endIndex, []) 1042 const node = mk(P, 'subshell', open.startIndex, close.endIndex, [ 1043 open, 1044 ...body, 1045 close, 1046 ]) 1047 return maybeRedirect(P, node) 1048 } 1049 1050 if (t.type === 'OP' && t.value === '((') { 1051 const open = leaf(P, '((', t) 1052 const exprs = parseArithCommaList(P, '))', 'var') 1053 const closeTok = nextToken(P.L, 'cmd') 1054 const close = 1055 closeTok.value === '))' 1056 ? leaf(P, '))', closeTok) 1057 : mk(P, '))', open.endIndex, open.endIndex, []) 1058 return mk(P, 'compound_statement', open.startIndex, close.endIndex, [ 1059 open, 1060 ...exprs, 1061 close, 1062 ]) 1063 } 1064 1065 if (t.type === 'OP' && t.value === '{') { 1066 const open = leaf(P, '{', t) 1067 const body = parseStatements(P, '}') 1068 const closeTok = nextToken(P.L, 'cmd') 1069 const close = 1070 closeTok.type === 'OP' && closeTok.value === '}' 1071 ? leaf(P, '}', closeTok) 1072 : mk(P, '}', open.endIndex, open.endIndex, []) 1073 const node = mk(P, 'compound_statement', open.startIndex, close.endIndex, [ 1074 open, 1075 ...body, 1076 close, 1077 ]) 1078 return maybeRedirect(P, node) 1079 } 1080 1081 if (t.type === 'OP' && (t.value === '[' || t.value === '[[')) { 1082 const open = leaf(P, t.value, t) 1083 const closer = t.value === '[' ? ']' : ']]' 1084 // Grammar: `[` can contain choice(_expression, redirected_statement). 1085 // Try _expression first; if we don't reach `]`, backtrack and parse as 1086 // redirected_statement (handles `[ ! cmd -v go &>/dev/null ]`). 1087 const exprSave = saveLex(P.L) 1088 let expr = parseTestExpr(P, closer) 1089 skipBlanks(P.L) 1090 if (t.value === '[' && peek(P.L) !== ']') { 1091 // Expression parse didn't reach `]` — try as redirected_statement. 1092 // Thread `]` stop-token so parseSimpleCommand doesn't eat it as arg. 1093 restoreLex(P.L, exprSave) 1094 const prevStop = P.stopToken 1095 P.stopToken = ']' 1096 const rstmt = parseCommand(P) 1097 P.stopToken = prevStop 1098 if (rstmt && rstmt.type === 'redirected_statement') { 1099 expr = rstmt 1100 } else { 1101 // Neither worked — restore and keep the expression result 1102 restoreLex(P.L, exprSave) 1103 expr = parseTestExpr(P, closer) 1104 } 1105 skipBlanks(P.L) 1106 } 1107 const closeTok = nextToken(P.L, 'arg') 1108 let close: TsNode 1109 if (closeTok.value === closer) { 1110 close = leaf(P, closer, closeTok) 1111 } else { 1112 close = mk(P, closer, open.endIndex, open.endIndex, []) 1113 } 1114 const kids = expr ? [open, expr, close] : [open, close] 1115 return mk(P, 'test_command', open.startIndex, close.endIndex, kids) 1116 } 1117 1118 if (t.type === 'WORD') { 1119 if (t.value === 'if') return maybeRedirect(P, parseIf(P, t), true) 1120 if (t.value === 'while' || t.value === 'until') 1121 return maybeRedirect(P, parseWhile(P, t), true) 1122 if (t.value === 'for') return maybeRedirect(P, parseFor(P, t), true) 1123 if (t.value === 'select') return maybeRedirect(P, parseFor(P, t), true) 1124 if (t.value === 'case') return maybeRedirect(P, parseCase(P, t), true) 1125 if (t.value === 'function') return parseFunction(P, t) 1126 if (DECL_KEYWORDS.has(t.value)) 1127 return maybeRedirect(P, parseDeclaration(P, t)) 1128 if (t.value === 'unset' || t.value === 'unsetenv') { 1129 return maybeRedirect(P, parseUnset(P, t)) 1130 } 1131 } 1132 1133 restoreLex(P.L, save) 1134 return parseSimpleCommand(P) 1135} 1136 1137/** 1138 * Parse a simple command: [assignment]* word [arg|redirect]* 1139 * Returns variable_assignment if only one assignment and no command. 1140 */ 1141function parseSimpleCommand(P: ParseState): TsNode | null { 1142 const start = P.L.b 1143 const assignments: TsNode[] = [] 1144 const preRedirects: TsNode[] = [] 1145 1146 while (true) { 1147 skipBlanks(P.L) 1148 const a = tryParseAssignment(P) 1149 if (a) { 1150 assignments.push(a) 1151 continue 1152 } 1153 const r = tryParseRedirect(P) 1154 if (r) { 1155 preRedirects.push(r) 1156 continue 1157 } 1158 break 1159 } 1160 1161 skipBlanks(P.L) 1162 const save = saveLex(P.L) 1163 const nameTok = nextToken(P.L, 'cmd') 1164 if ( 1165 nameTok.type === 'EOF' || 1166 nameTok.type === 'NEWLINE' || 1167 nameTok.type === 'COMMENT' || 1168 (nameTok.type === 'OP' && 1169 nameTok.value !== '{' && 1170 nameTok.value !== '[' && 1171 nameTok.value !== '[[') || 1172 (nameTok.type === 'WORD' && 1173 SHELL_KEYWORDS.has(nameTok.value) && 1174 nameTok.value !== 'in') 1175 ) { 1176 restoreLex(P.L, save) 1177 // No command — standalone assignment(s) or redirect 1178 if (assignments.length === 1 && preRedirects.length === 0) { 1179 return assignments[0]! 1180 } 1181 if (preRedirects.length > 0 && assignments.length === 0) { 1182 // Bare redirect → redirected_statement with just file_redirect children 1183 const last = preRedirects[preRedirects.length - 1]! 1184 return mk( 1185 P, 1186 'redirected_statement', 1187 preRedirects[0]!.startIndex, 1188 last.endIndex, 1189 preRedirects, 1190 ) 1191 } 1192 if (assignments.length > 1 && preRedirects.length === 0) { 1193 // `A=1 B=2` with no command → variable_assignments (plural) 1194 const last = assignments[assignments.length - 1]! 1195 return mk( 1196 P, 1197 'variable_assignments', 1198 assignments[0]!.startIndex, 1199 last.endIndex, 1200 assignments, 1201 ) 1202 } 1203 if (assignments.length > 0 || preRedirects.length > 0) { 1204 const all = [...assignments, ...preRedirects] 1205 const last = all[all.length - 1]! 1206 return mk(P, 'command', start, last.endIndex, all) 1207 } 1208 return null 1209 } 1210 restoreLex(P.L, save) 1211 1212 // Check for function definition: name() { ... } 1213 const fnSave = saveLex(P.L) 1214 const nm = parseWord(P, 'cmd') 1215 if (nm && nm.type === 'word') { 1216 skipBlanks(P.L) 1217 if (peek(P.L) === '(' && peek(P.L, 1) === ')') { 1218 const oTok = nextToken(P.L, 'cmd') 1219 const cTok = nextToken(P.L, 'cmd') 1220 const oParen = leaf(P, '(', oTok) 1221 const cParen = leaf(P, ')', cTok) 1222 skipBlanks(P.L) 1223 skipNewlines(P) 1224 const body = parseCommand(P) 1225 if (body) { 1226 // If body is redirected_statement(compound_statement, file_redirect...), 1227 // hoist redirects to function_definition level per tree-sitter grammar 1228 let bodyKids: TsNode[] = [body] 1229 if ( 1230 body.type === 'redirected_statement' && 1231 body.children.length >= 2 && 1232 body.children[0]!.type === 'compound_statement' 1233 ) { 1234 bodyKids = body.children 1235 } 1236 const last = bodyKids[bodyKids.length - 1]! 1237 return mk(P, 'function_definition', nm.startIndex, last.endIndex, [ 1238 nm, 1239 oParen, 1240 cParen, 1241 ...bodyKids, 1242 ]) 1243 } 1244 } 1245 } 1246 restoreLex(P.L, fnSave) 1247 1248 const nameArg = parseWord(P, 'cmd') 1249 if (!nameArg) { 1250 if (assignments.length === 1) return assignments[0]! 1251 return null 1252 } 1253 1254 const cmdName = mk(P, 'command_name', nameArg.startIndex, nameArg.endIndex, [ 1255 nameArg, 1256 ]) 1257 1258 const args: TsNode[] = [] 1259 const redirects: TsNode[] = [] 1260 let heredocRedirect: TsNode | null = null 1261 1262 while (true) { 1263 skipBlanks(P.L) 1264 // Post-command redirects are greedy (repeat1 $._literal) — once a redirect 1265 // appears after command_name, subsequent literals attach to it per grammar's 1266 // prec.left. `grep 2>/dev/null -q foo` → file_redirect eats `-q foo`. 1267 // Args parsed BEFORE the first redirect still go to command (cat a b > out). 1268 const r = tryParseRedirect(P, true) 1269 if (r) { 1270 if (r.type === 'heredoc_redirect') { 1271 heredocRedirect = r 1272 } else if (r.type === 'herestring_redirect') { 1273 args.push(r) 1274 } else { 1275 redirects.push(r) 1276 } 1277 continue 1278 } 1279 // Once a file_redirect has been seen, command args are done — grammar's 1280 // command rule doesn't allow file_redirect in its post-name choice, so 1281 // anything after belongs to redirected_statement's file_redirect children. 1282 if (redirects.length > 0) break 1283 // `[` test_command backtrack — stop at `]` so outer handler can consume it 1284 if (P.stopToken === ']' && peek(P.L) === ']') break 1285 const save2 = saveLex(P.L) 1286 const pk = nextToken(P.L, 'arg') 1287 if ( 1288 pk.type === 'EOF' || 1289 pk.type === 'NEWLINE' || 1290 pk.type === 'COMMENT' || 1291 (pk.type === 'OP' && 1292 (pk.value === '|' || 1293 pk.value === '|&' || 1294 pk.value === '&&' || 1295 pk.value === '||' || 1296 pk.value === ';' || 1297 pk.value === ';;' || 1298 pk.value === ';&' || 1299 pk.value === ';;&' || 1300 pk.value === '&' || 1301 pk.value === ')' || 1302 pk.value === '}' || 1303 pk.value === '))')) 1304 ) { 1305 restoreLex(P.L, save2) 1306 break 1307 } 1308 restoreLex(P.L, save2) 1309 const arg = parseWord(P, 'arg') 1310 if (!arg) { 1311 // Lone `(` in arg position — tree-sitter parses this as subshell arg 1312 // e.g., `echo =(cmd)` → command has ERROR(=), subshell(cmd) as args 1313 if (peek(P.L) === '(') { 1314 const oTok = nextToken(P.L, 'cmd') 1315 const open = leaf(P, '(', oTok) 1316 const body = parseStatements(P, ')') 1317 const cTok = nextToken(P.L, 'cmd') 1318 const close = 1319 cTok.type === 'OP' && cTok.value === ')' 1320 ? leaf(P, ')', cTok) 1321 : mk(P, ')', open.endIndex, open.endIndex, []) 1322 args.push( 1323 mk(P, 'subshell', open.startIndex, close.endIndex, [ 1324 open, 1325 ...body, 1326 close, 1327 ]), 1328 ) 1329 continue 1330 } 1331 break 1332 } 1333 // Lone `=` in arg position is a parse error in bash — tree-sitter wraps 1334 // it in ERROR for recovery. Happens in `echo =(cmd)` (zsh process-sub). 1335 if (arg.type === 'word' && arg.text === '=') { 1336 args.push(mk(P, 'ERROR', arg.startIndex, arg.endIndex, [arg])) 1337 continue 1338 } 1339 // Word immediately followed by `(` (no whitespace) is a parse error — 1340 // bash doesn't allow glob-then-subshell adjacency. tree-sitter wraps the 1341 // word in ERROR. Catches zsh glob qualifiers like `*.(e:'cmd':)`. 1342 if ( 1343 (arg.type === 'word' || arg.type === 'concatenation') && 1344 peek(P.L) === '(' && 1345 P.L.b === arg.endIndex 1346 ) { 1347 args.push(mk(P, 'ERROR', arg.startIndex, arg.endIndex, [arg])) 1348 continue 1349 } 1350 args.push(arg) 1351 } 1352 1353 // preRedirects (e.g., `2>&1 cat`, `<<<str cmd`) go INSIDE the command node 1354 // before command_name per tree-sitter grammar, not in redirected_statement 1355 const cmdChildren = [...assignments, ...preRedirects, cmdName, ...args] 1356 const cmdEnd = 1357 cmdChildren.length > 0 1358 ? cmdChildren[cmdChildren.length - 1]!.endIndex 1359 : cmdName.endIndex 1360 const cmdStart = cmdChildren[0]!.startIndex 1361 const cmd = mk(P, 'command', cmdStart, cmdEnd, cmdChildren) 1362 1363 if (heredocRedirect) { 1364 // Scan heredoc body now 1365 scanHeredocBodies(P) 1366 const hd = P.L.heredocs.shift() 1367 if (hd && heredocRedirect.children.length >= 2) { 1368 const bodyNode = mk( 1369 P, 1370 'heredoc_body', 1371 hd.bodyStart, 1372 hd.bodyEnd, 1373 hd.quoted ? [] : parseHeredocBodyContent(P, hd.bodyStart, hd.bodyEnd), 1374 ) 1375 const endNode = mk(P, 'heredoc_end', hd.endStart, hd.endEnd, []) 1376 heredocRedirect.children.push(bodyNode, endNode) 1377 heredocRedirect.endIndex = hd.endEnd 1378 heredocRedirect.text = sliceBytes( 1379 P, 1380 heredocRedirect.startIndex, 1381 hd.endEnd, 1382 ) 1383 } 1384 const allR = [...preRedirects, heredocRedirect, ...redirects] 1385 const rStart = 1386 preRedirects.length > 0 1387 ? Math.min(cmd.startIndex, preRedirects[0]!.startIndex) 1388 : cmd.startIndex 1389 return mk(P, 'redirected_statement', rStart, heredocRedirect.endIndex, [ 1390 cmd, 1391 ...allR, 1392 ]) 1393 } 1394 1395 if (redirects.length > 0) { 1396 const last = redirects[redirects.length - 1]! 1397 return mk(P, 'redirected_statement', cmd.startIndex, last.endIndex, [ 1398 cmd, 1399 ...redirects, 1400 ]) 1401 } 1402 1403 return cmd 1404} 1405 1406function maybeRedirect( 1407 P: ParseState, 1408 node: TsNode, 1409 allowHerestring = false, 1410): TsNode { 1411 const redirects: TsNode[] = [] 1412 while (true) { 1413 skipBlanks(P.L) 1414 const save = saveLex(P.L) 1415 const r = tryParseRedirect(P) 1416 if (!r) break 1417 if (r.type === 'herestring_redirect' && !allowHerestring) { 1418 restoreLex(P.L, save) 1419 break 1420 } 1421 redirects.push(r) 1422 } 1423 if (redirects.length === 0) return node 1424 const last = redirects[redirects.length - 1]! 1425 return mk(P, 'redirected_statement', node.startIndex, last.endIndex, [ 1426 node, 1427 ...redirects, 1428 ]) 1429} 1430 1431function tryParseAssignment(P: ParseState): TsNode | null { 1432 const save = saveLex(P.L) 1433 skipBlanks(P.L) 1434 const startB = P.L.b 1435 // Must start with identifier 1436 if (!isIdentStart(peek(P.L))) { 1437 restoreLex(P.L, save) 1438 return null 1439 } 1440 while (isIdentChar(peek(P.L))) advance(P.L) 1441 const nameEnd = P.L.b 1442 // Optional subscript 1443 let subEnd = nameEnd 1444 if (peek(P.L) === '[') { 1445 advance(P.L) 1446 let depth = 1 1447 while (P.L.i < P.L.len && depth > 0) { 1448 const c = peek(P.L) 1449 if (c === '[') depth++ 1450 else if (c === ']') depth-- 1451 advance(P.L) 1452 } 1453 subEnd = P.L.b 1454 } 1455 const c = peek(P.L) 1456 const c1 = peek(P.L, 1) 1457 let op: string 1458 if (c === '=' && c1 !== '=') { 1459 op = '=' 1460 } else if (c === '+' && c1 === '=') { 1461 op = '+=' 1462 } else { 1463 restoreLex(P.L, save) 1464 return null 1465 } 1466 const nameNode = mk(P, 'variable_name', startB, nameEnd, []) 1467 // Subscript handling: wrap in subscript node if present 1468 let lhs: TsNode = nameNode 1469 if (subEnd > nameEnd) { 1470 const brOpen = mk(P, '[', nameEnd, nameEnd + 1, []) 1471 const idx = parseSubscriptIndex(P, nameEnd + 1, subEnd - 1) 1472 const brClose = mk(P, ']', subEnd - 1, subEnd, []) 1473 lhs = mk(P, 'subscript', startB, subEnd, [nameNode, brOpen, idx, brClose]) 1474 } 1475 const opStart = P.L.b 1476 advance(P.L) 1477 if (op === '+=') advance(P.L) 1478 const opEnd = P.L.b 1479 const opNode = mk(P, op, opStart, opEnd, []) 1480 let val: TsNode | null = null 1481 if (peek(P.L) === '(') { 1482 // Array 1483 const aoTok = nextToken(P.L, 'cmd') 1484 const aOpen = leaf(P, '(', aoTok) 1485 const elems: TsNode[] = [aOpen] 1486 while (true) { 1487 skipBlanks(P.L) 1488 if (peek(P.L) === ')') break 1489 const e = parseWord(P, 'arg') 1490 if (!e) break 1491 elems.push(e) 1492 } 1493 const acTok = nextToken(P.L, 'cmd') 1494 const aClose = 1495 acTok.value === ')' 1496 ? leaf(P, ')', acTok) 1497 : mk(P, ')', aOpen.endIndex, aOpen.endIndex, []) 1498 elems.push(aClose) 1499 val = mk(P, 'array', aOpen.startIndex, aClose.endIndex, elems) 1500 } else { 1501 const c2 = peek(P.L) 1502 if ( 1503 c2 && 1504 c2 !== ' ' && 1505 c2 !== '\t' && 1506 c2 !== '\n' && 1507 c2 !== ';' && 1508 c2 !== '&' && 1509 c2 !== '|' && 1510 c2 !== ')' && 1511 c2 !== '}' 1512 ) { 1513 val = parseWord(P, 'arg') 1514 } 1515 } 1516 const kids = val ? [lhs, opNode, val] : [lhs, opNode] 1517 const end = val ? val.endIndex : opEnd 1518 return mk(P, 'variable_assignment', startB, end, kids) 1519} 1520 1521/** 1522 * Parse subscript index content. Parsed arithmetically per tree-sitter grammar: 1523 * `${a[1+2]}` → binary_expression; `${a[++i]}` → unary_expression(word); 1524 * `${a[(($n+1))]}` → compound_statement(binary_expression). Falls back to 1525 * simple patterns (@, *) as word. 1526 */ 1527function parseSubscriptIndexInline(P: ParseState): TsNode | null { 1528 skipBlanks(P.L) 1529 const c = peek(P.L) 1530 // @ or * alone → word (associative array all-keys) 1531 if ((c === '@' || c === '*') && peek(P.L, 1) === ']') { 1532 const s = P.L.b 1533 advance(P.L) 1534 return mk(P, 'word', s, P.L.b, []) 1535 } 1536 // ((expr)) → compound_statement wrapping the inner arithmetic 1537 if (c === '(' && peek(P.L, 1) === '(') { 1538 const oStart = P.L.b 1539 advance(P.L) 1540 advance(P.L) 1541 const open = mk(P, '((', oStart, P.L.b, []) 1542 const inner = parseArithExpr(P, '))', 'var') 1543 skipBlanks(P.L) 1544 let close: TsNode 1545 if (peek(P.L) === ')' && peek(P.L, 1) === ')') { 1546 const cs = P.L.b 1547 advance(P.L) 1548 advance(P.L) 1549 close = mk(P, '))', cs, P.L.b, []) 1550 } else { 1551 close = mk(P, '))', P.L.b, P.L.b, []) 1552 } 1553 const kids = inner ? [open, inner, close] : [open, close] 1554 return mk(P, 'compound_statement', open.startIndex, close.endIndex, kids) 1555 } 1556 // Arithmetic — but bare identifiers in subscript use 'word' mode per 1557 // tree-sitter (${words[++counter]} → unary_expression(word)). 1558 return parseArithExpr(P, ']', 'word') 1559} 1560 1561/** Legacy byte-range subscript index parser — kept for callers that pre-scan. */ 1562function parseSubscriptIndex( 1563 P: ParseState, 1564 startB: number, 1565 endB: number, 1566): TsNode { 1567 const text = sliceBytes(P, startB, endB) 1568 if (/^\d+$/.test(text)) return mk(P, 'number', startB, endB, []) 1569 const m = /^\$([a-zA-Z_]\w*)$/.exec(text) 1570 if (m) { 1571 const dollar = mk(P, '$', startB, startB + 1, []) 1572 const vn = mk(P, 'variable_name', startB + 1, endB, []) 1573 return mk(P, 'simple_expansion', startB, endB, [dollar, vn]) 1574 } 1575 if (text.length === 2 && text[0] === '$' && SPECIAL_VARS.has(text[1]!)) { 1576 const dollar = mk(P, '$', startB, startB + 1, []) 1577 const vn = mk(P, 'special_variable_name', startB + 1, endB, []) 1578 return mk(P, 'simple_expansion', startB, endB, [dollar, vn]) 1579 } 1580 return mk(P, 'word', startB, endB, []) 1581} 1582 1583/** 1584 * Can the current position start a redirect destination literal? 1585 * Returns false at redirect ops, terminators, or file-descriptor-prefixed ops 1586 * so file_redirect's repeat1($._literal) stops at the right boundary. 1587 */ 1588function isRedirectLiteralStart(P: ParseState): boolean { 1589 const c = peek(P.L) 1590 if (c === '' || c === '\n') return false 1591 // Shell terminators and operators 1592 if (c === '|' || c === '&' || c === ';' || c === '(' || c === ')') 1593 return false 1594 // Redirect operators (< > with any suffix; <( >( handled by caller) 1595 if (c === '<' || c === '>') { 1596 // <( >( are process substitutions — those ARE literals 1597 return peek(P.L, 1) === '(' 1598 } 1599 // N< N> file descriptor prefix — starts a new redirect, not a literal 1600 if (isDigit(c)) { 1601 let j = P.L.i 1602 while (j < P.L.len && isDigit(P.L.src[j]!)) j++ 1603 const after = j < P.L.len ? P.L.src[j]! : '' 1604 if (after === '>' || after === '<') return false 1605 } 1606 // `}` only terminates if we're in a context where it's a closer — but 1607 // file_redirect sees `}` as word char (e.g., `>$HOME}` is valid path char). 1608 // Actually `}` at top level terminates compound_statement — need to stop. 1609 if (c === '}') return false 1610 // Test command closer — when parseSimpleCommand is called from `[` context, 1611 // `]` must terminate so parseCommand can return and `[` handler consume it. 1612 if (P.stopToken === ']' && c === ']') return false 1613 return true 1614} 1615 1616/** 1617 * Parse a redirect operator + destination(s). 1618 * @param greedy When true, file_redirect consumes repeat1($._literal) per 1619 * grammar's prec.left — `cmd >f a b c` attaches `a b c` to the redirect. 1620 * When false (preRedirect context), takes only 1 destination because 1621 * command's dynamic precedence beats redirected_statement's prec(-1). 1622 */ 1623function tryParseRedirect(P: ParseState, greedy = false): TsNode | null { 1624 const save = saveLex(P.L) 1625 skipBlanks(P.L) 1626 // File descriptor prefix? 1627 let fd: TsNode | null = null 1628 if (isDigit(peek(P.L))) { 1629 const startB = P.L.b 1630 let j = P.L.i 1631 while (j < P.L.len && isDigit(P.L.src[j]!)) j++ 1632 const after = j < P.L.len ? P.L.src[j]! : '' 1633 if (after === '>' || after === '<') { 1634 while (P.L.i < j) advance(P.L) 1635 fd = mk(P, 'file_descriptor', startB, P.L.b, []) 1636 } 1637 } 1638 const t = nextToken(P.L, 'arg') 1639 if (t.type !== 'OP') { 1640 restoreLex(P.L, save) 1641 return null 1642 } 1643 const v = t.value 1644 if (v === '<<<') { 1645 const op = leaf(P, '<<<', t) 1646 skipBlanks(P.L) 1647 const target = parseWord(P, 'arg') 1648 const end = target ? target.endIndex : op.endIndex 1649 const kids = target ? [op, target] : [op] 1650 return mk( 1651 P, 1652 'herestring_redirect', 1653 fd ? fd.startIndex : op.startIndex, 1654 end, 1655 fd ? [fd, ...kids] : kids, 1656 ) 1657 } 1658 if (v === '<<' || v === '<<-') { 1659 const op = leaf(P, v, t) 1660 // Heredoc start — delimiter word (may be quoted) 1661 skipBlanks(P.L) 1662 const dStart = P.L.b 1663 let quoted = false 1664 let delim = '' 1665 const dc = peek(P.L) 1666 if (dc === "'" || dc === '"') { 1667 quoted = true 1668 advance(P.L) 1669 while (P.L.i < P.L.len && peek(P.L) !== dc) { 1670 delim += peek(P.L) 1671 advance(P.L) 1672 } 1673 if (P.L.i < P.L.len) advance(P.L) 1674 } else if (dc === '\\') { 1675 // Backslash-escaped delimiter: \X — exactly one escaped char, body is 1676 // quoted (literal). Covers <<\EOF <<\' <<\\ etc. 1677 quoted = true 1678 advance(P.L) 1679 if (P.L.i < P.L.len && peek(P.L) !== '\n') { 1680 delim += peek(P.L) 1681 advance(P.L) 1682 } 1683 // May be followed by more ident chars (e.g. <<\EOF → delim "EOF") 1684 while (P.L.i < P.L.len && isIdentChar(peek(P.L))) { 1685 delim += peek(P.L) 1686 advance(P.L) 1687 } 1688 } else { 1689 // Unquoted delimiter: bash accepts most non-metacharacters (not just 1690 // identifiers). Allow !, -, ., etc. — stop at shell metachars. 1691 while (P.L.i < P.L.len && isHeredocDelimChar(peek(P.L))) { 1692 delim += peek(P.L) 1693 advance(P.L) 1694 } 1695 } 1696 const dEnd = P.L.b 1697 const startNode = mk(P, 'heredoc_start', dStart, dEnd, []) 1698 // Register pending heredoc — body scanned at next newline 1699 P.L.heredocs.push({ 1700 delim, 1701 stripTabs: v === '<<-', 1702 quoted, 1703 bodyStart: 0, 1704 bodyEnd: 0, 1705 endStart: 0, 1706 endEnd: 0, 1707 }) 1708 const kids = fd ? [fd, op, startNode] : [op, startNode] 1709 const startIdx = fd ? fd.startIndex : op.startIndex 1710 // SECURITY: tree-sitter nests any pipeline/list/file_redirect appearing 1711 // between heredoc_start and the newline as a CHILD of heredoc_redirect. 1712 // `ls <<'EOF' | rm -rf /tmp/evil` must not silently drop the rm. Parse 1713 // trailing words and file_redirects properly (ast.ts walkHeredocRedirect 1714 // fails closed on any unrecognized child via tooComplex). Pipeline / list 1715 // operators (| && || ;) are structurally complex — emit ERROR so the same 1716 // fail-closed path rejects them. 1717 while (true) { 1718 skipBlanks(P.L) 1719 const tc = peek(P.L) 1720 if (tc === '\n' || tc === '' || P.L.i >= P.L.len) break 1721 // File redirect after delimiter: cat <<EOF > out.txt 1722 if (tc === '>' || tc === '<' || isDigit(tc)) { 1723 const rSave = saveLex(P.L) 1724 const r = tryParseRedirect(P) 1725 if (r && r.type === 'file_redirect') { 1726 kids.push(r) 1727 continue 1728 } 1729 restoreLex(P.L, rSave) 1730 } 1731 // Pipeline after heredoc_start: `one <<EOF | grep two` — tree-sitter 1732 // nests the pipeline as a child of heredoc_redirect. ast.ts 1733 // walkHeredocRedirect fails closed on pipeline/command via tooComplex. 1734 if (tc === '|' && peek(P.L, 1) !== '|') { 1735 advance(P.L) 1736 skipBlanks(P.L) 1737 const pipeCmds: TsNode[] = [] 1738 while (true) { 1739 const cmd = parseCommand(P) 1740 if (!cmd) break 1741 pipeCmds.push(cmd) 1742 skipBlanks(P.L) 1743 if (peek(P.L) === '|' && peek(P.L, 1) !== '|') { 1744 const ps = P.L.b 1745 advance(P.L) 1746 pipeCmds.push(mk(P, '|', ps, P.L.b, [])) 1747 skipBlanks(P.L) 1748 continue 1749 } 1750 break 1751 } 1752 if (pipeCmds.length > 0) { 1753 const pl = pipeCmds[pipeCmds.length - 1]! 1754 // tree-sitter always wraps in pipeline after `|`, even single command 1755 kids.push( 1756 mk(P, 'pipeline', pipeCmds[0]!.startIndex, pl.endIndex, pipeCmds), 1757 ) 1758 } 1759 continue 1760 } 1761 // && / || after heredoc_start: `cat <<-EOF || die "..."` — tree-sitter 1762 // nests just the RHS command (not a list) as a child of heredoc_redirect. 1763 if ( 1764 (tc === '&' && peek(P.L, 1) === '&') || 1765 (tc === '|' && peek(P.L, 1) === '|') 1766 ) { 1767 advance(P.L) 1768 advance(P.L) 1769 skipBlanks(P.L) 1770 const rhs = parseCommand(P) 1771 if (rhs) kids.push(rhs) 1772 continue 1773 } 1774 // Terminator / unhandled metachar — consume rest of line as ERROR so 1775 // ast.ts rejects it. Covers ; & ( ) 1776 if (tc === '&' || tc === ';' || tc === '(' || tc === ')') { 1777 const eStart = P.L.b 1778 while (P.L.i < P.L.len && peek(P.L) !== '\n') advance(P.L) 1779 kids.push(mk(P, 'ERROR', eStart, P.L.b, [])) 1780 break 1781 } 1782 // Trailing word argument: newins <<-EOF - org.freedesktop.service 1783 const w = parseWord(P, 'arg') 1784 if (w) { 1785 kids.push(w) 1786 continue 1787 } 1788 // Unrecognized — consume rest of line as ERROR 1789 const eStart = P.L.b 1790 while (P.L.i < P.L.len && peek(P.L) !== '\n') advance(P.L) 1791 if (P.L.b > eStart) kids.push(mk(P, 'ERROR', eStart, P.L.b, [])) 1792 break 1793 } 1794 return mk(P, 'heredoc_redirect', startIdx, P.L.b, kids) 1795 } 1796 // Close-fd variants: `<&-` `>&-` have OPTIONAL destination (0 or 1) 1797 if (v === '<&-' || v === '>&-') { 1798 const op = leaf(P, v, t) 1799 const kids: TsNode[] = [] 1800 if (fd) kids.push(fd) 1801 kids.push(op) 1802 // Optional single destination — only consume if next is a literal 1803 skipBlanks(P.L) 1804 const dSave = saveLex(P.L) 1805 const dest = isRedirectLiteralStart(P) ? parseWord(P, 'arg') : null 1806 if (dest) { 1807 kids.push(dest) 1808 } else { 1809 restoreLex(P.L, dSave) 1810 } 1811 const startIdx = fd ? fd.startIndex : op.startIndex 1812 const end = dest ? dest.endIndex : op.endIndex 1813 return mk(P, 'file_redirect', startIdx, end, kids) 1814 } 1815 if ( 1816 v === '>' || 1817 v === '>>' || 1818 v === '>&' || 1819 v === '>|' || 1820 v === '&>' || 1821 v === '&>>' || 1822 v === '<' || 1823 v === '<&' 1824 ) { 1825 const op = leaf(P, v, t) 1826 const kids: TsNode[] = [] 1827 if (fd) kids.push(fd) 1828 kids.push(op) 1829 // Grammar: destination is repeat1($._literal) — greedily consume literals 1830 // until a non-literal (redirect op, terminator, etc). tree-sitter's 1831 // prec.left makes `cmd >f a b c` attach `a b c` to the file_redirect, 1832 // NOT to the command. Structural quirk but required for corpus parity. 1833 // In preRedirect context (greedy=false), take only 1 literal because 1834 // command's dynamic precedence beats redirected_statement's prec(-1). 1835 let end = op.endIndex 1836 let taken = 0 1837 while (true) { 1838 skipBlanks(P.L) 1839 if (!isRedirectLiteralStart(P)) break 1840 if (!greedy && taken >= 1) break 1841 const tc = peek(P.L) 1842 const tc1 = peek(P.L, 1) 1843 let target: TsNode | null = null 1844 if ((tc === '<' || tc === '>') && tc1 === '(') { 1845 target = parseProcessSub(P) 1846 } else { 1847 target = parseWord(P, 'arg') 1848 } 1849 if (!target) break 1850 kids.push(target) 1851 end = target.endIndex 1852 taken++ 1853 } 1854 const startIdx = fd ? fd.startIndex : op.startIndex 1855 return mk(P, 'file_redirect', startIdx, end, kids) 1856 } 1857 restoreLex(P.L, save) 1858 return null 1859} 1860 1861function parseProcessSub(P: ParseState): TsNode | null { 1862 const c = peek(P.L) 1863 if ((c !== '<' && c !== '>') || peek(P.L, 1) !== '(') return null 1864 const start = P.L.b 1865 advance(P.L) 1866 advance(P.L) 1867 const open = mk(P, c + '(', start, P.L.b, []) 1868 const body = parseStatements(P, ')') 1869 skipBlanks(P.L) 1870 let close: TsNode 1871 if (peek(P.L) === ')') { 1872 const cs = P.L.b 1873 advance(P.L) 1874 close = mk(P, ')', cs, P.L.b, []) 1875 } else { 1876 close = mk(P, ')', P.L.b, P.L.b, []) 1877 } 1878 return mk(P, 'process_substitution', start, close.endIndex, [ 1879 open, 1880 ...body, 1881 close, 1882 ]) 1883} 1884 1885function scanHeredocBodies(P: ParseState): void { 1886 // Skip to newline if not already there 1887 while (P.L.i < P.L.len && P.L.src[P.L.i] !== '\n') advance(P.L) 1888 if (P.L.i < P.L.len) advance(P.L) 1889 for (const hd of P.L.heredocs) { 1890 hd.bodyStart = P.L.b 1891 const delimLen = hd.delim.length 1892 while (P.L.i < P.L.len) { 1893 const lineStart = P.L.i 1894 const lineStartB = P.L.b 1895 // Skip leading tabs if <<- 1896 let checkI = lineStart 1897 if (hd.stripTabs) { 1898 while (checkI < P.L.len && P.L.src[checkI] === '\t') checkI++ 1899 } 1900 // Check if this line is the delimiter 1901 if ( 1902 P.L.src.startsWith(hd.delim, checkI) && 1903 (checkI + delimLen >= P.L.len || 1904 P.L.src[checkI + delimLen] === '\n' || 1905 P.L.src[checkI + delimLen] === '\r') 1906 ) { 1907 hd.bodyEnd = lineStartB 1908 // Advance past tabs 1909 while (P.L.i < checkI) advance(P.L) 1910 hd.endStart = P.L.b 1911 // Advance past delimiter 1912 for (let k = 0; k < delimLen; k++) advance(P.L) 1913 hd.endEnd = P.L.b 1914 // Skip trailing newline 1915 if (P.L.i < P.L.len && P.L.src[P.L.i] === '\n') advance(P.L) 1916 return 1917 } 1918 // Consume line 1919 while (P.L.i < P.L.len && P.L.src[P.L.i] !== '\n') advance(P.L) 1920 if (P.L.i < P.L.len) advance(P.L) 1921 } 1922 // Unterminated 1923 hd.bodyEnd = P.L.b 1924 hd.endStart = P.L.b 1925 hd.endEnd = P.L.b 1926 } 1927} 1928 1929function parseHeredocBodyContent( 1930 P: ParseState, 1931 start: number, 1932 end: number, 1933): TsNode[] { 1934 // Parse expansions inside an unquoted heredoc body. 1935 const saved = saveLex(P.L) 1936 // Position lexer at body start 1937 restoreLexToByte(P, start) 1938 const out: TsNode[] = [] 1939 let contentStart = P.L.b 1940 // tree-sitter-bash's heredoc_body rule hides the initial text segment 1941 // (_heredoc_body_beginning) — only content AFTER the first expansion is 1942 // emitted as heredoc_content. Track whether we've seen an expansion yet. 1943 let sawExpansion = false 1944 while (P.L.b < end) { 1945 const c = peek(P.L) 1946 // Backslash escapes suppress expansion: \$ \` stay literal in heredoc. 1947 if (c === '\\') { 1948 const nxt = peek(P.L, 1) 1949 if (nxt === '$' || nxt === '`' || nxt === '\\') { 1950 advance(P.L) 1951 advance(P.L) 1952 continue 1953 } 1954 advance(P.L) 1955 continue 1956 } 1957 if (c === '$' || c === '`') { 1958 const preB = P.L.b 1959 const exp = parseDollarLike(P) 1960 // Bare `$` followed by non-name (e.g. `$'` in a regex) returns a lone 1961 // '$' leaf, not an expansion — treat as literal content, don't split. 1962 if ( 1963 exp && 1964 (exp.type === 'simple_expansion' || 1965 exp.type === 'expansion' || 1966 exp.type === 'command_substitution' || 1967 exp.type === 'arithmetic_expansion') 1968 ) { 1969 if (sawExpansion && preB > contentStart) { 1970 out.push(mk(P, 'heredoc_content', contentStart, preB, [])) 1971 } 1972 out.push(exp) 1973 contentStart = P.L.b 1974 sawExpansion = true 1975 } 1976 continue 1977 } 1978 advance(P.L) 1979 } 1980 // Only emit heredoc_content children if there were expansions — otherwise 1981 // the heredoc_body is a leaf node (tree-sitter convention). 1982 if (sawExpansion) { 1983 out.push(mk(P, 'heredoc_content', contentStart, end, [])) 1984 } 1985 restoreLex(P.L, saved) 1986 return out 1987} 1988 1989function restoreLexToByte(P: ParseState, targetByte: number): void { 1990 if (!P.L.byteTable) byteAt(P.L, 0) 1991 const t = P.L.byteTable! 1992 let lo = 0 1993 let hi = P.src.length 1994 while (lo < hi) { 1995 const m = (lo + hi) >>> 1 1996 if (t[m]! < targetByte) lo = m + 1 1997 else hi = m 1998 } 1999 P.L.i = lo 2000 P.L.b = targetByte 2001} 2002 2003/** 2004 * Parse a word-position element: bare word, string, expansion, or concatenation 2005 * thereof. Returns a single node; if multiple adjacent fragments, wraps in 2006 * concatenation. 2007 */ 2008function parseWord(P: ParseState, _ctx: 'cmd' | 'arg'): TsNode | null { 2009 skipBlanks(P.L) 2010 const parts: TsNode[] = [] 2011 while (P.L.i < P.L.len) { 2012 const c = peek(P.L) 2013 if ( 2014 c === ' ' || 2015 c === '\t' || 2016 c === '\n' || 2017 c === '\r' || 2018 c === '' || 2019 c === '|' || 2020 c === '&' || 2021 c === ';' || 2022 c === '(' || 2023 c === ')' 2024 ) { 2025 break 2026 } 2027 // < > are redirect operators unless <( >( (process substitution) 2028 if (c === '<' || c === '>') { 2029 if (peek(P.L, 1) === '(') { 2030 const ps = parseProcessSub(P) 2031 if (ps) parts.push(ps) 2032 continue 2033 } 2034 break 2035 } 2036 if (c === '"') { 2037 parts.push(parseDoubleQuoted(P)) 2038 continue 2039 } 2040 if (c === "'") { 2041 const tok = nextToken(P.L, 'arg') 2042 parts.push(leaf(P, 'raw_string', tok)) 2043 continue 2044 } 2045 if (c === '$') { 2046 const c1 = peek(P.L, 1) 2047 if (c1 === "'") { 2048 const tok = nextToken(P.L, 'arg') 2049 parts.push(leaf(P, 'ansi_c_string', tok)) 2050 continue 2051 } 2052 if (c1 === '"') { 2053 // Translated string: emit $ leaf + string node 2054 const dTok: Token = { 2055 type: 'DOLLAR', 2056 value: '$', 2057 start: P.L.b, 2058 end: P.L.b + 1, 2059 } 2060 advance(P.L) 2061 parts.push(leaf(P, '$', dTok)) 2062 parts.push(parseDoubleQuoted(P)) 2063 continue 2064 } 2065 if (c1 === '`') { 2066 // `$` followed by backtick — tree-sitter elides the $ entirely 2067 // and emits just (command_substitution). Consume $ and let next 2068 // iteration handle the backtick. 2069 advance(P.L) 2070 continue 2071 } 2072 const exp = parseDollarLike(P) 2073 if (exp) parts.push(exp) 2074 continue 2075 } 2076 if (c === '`') { 2077 if (P.inBacktick > 0) break 2078 const bt = parseBacktick(P) 2079 if (bt) parts.push(bt) 2080 continue 2081 } 2082 // Brace expression {1..5} or {a,b,c} — only if looks like one 2083 if (c === '{') { 2084 const be = tryParseBraceExpr(P) 2085 if (be) { 2086 parts.push(be) 2087 continue 2088 } 2089 // SECURITY: if `{` is immediately followed by a command terminator 2090 // (; | & newline or EOF), it's a standalone word — don't slurp the 2091 // rest of the line via tryParseBraceLikeCat. `echo {;touch /tmp/evil` 2092 // must split on `;` so the security walker sees `touch`. 2093 const nc = peek(P.L, 1) 2094 if ( 2095 nc === ';' || 2096 nc === '|' || 2097 nc === '&' || 2098 nc === '\n' || 2099 nc === '' || 2100 nc === ')' || 2101 nc === ' ' || 2102 nc === '\t' 2103 ) { 2104 const bStart = P.L.b 2105 advance(P.L) 2106 parts.push(mk(P, 'word', bStart, P.L.b, [])) 2107 continue 2108 } 2109 // Otherwise treat { and } as word fragments 2110 const cat = tryParseBraceLikeCat(P) 2111 if (cat) { 2112 for (const p of cat) parts.push(p) 2113 continue 2114 } 2115 } 2116 // Standalone `}` in arg position is a word (e.g., `echo }foo`). 2117 // parseBareWord breaks on `}` so handle it here. 2118 if (c === '}') { 2119 const bStart = P.L.b 2120 advance(P.L) 2121 parts.push(mk(P, 'word', bStart, P.L.b, [])) 2122 continue 2123 } 2124 // `[` and `]` are single-char word fragments (tree-sitter splits at 2125 // brackets: `[:lower:]` → `[` `:lower:` `]`, `{o[k]}` → 6 words). 2126 if (c === '[' || c === ']') { 2127 const bStart = P.L.b 2128 advance(P.L) 2129 parts.push(mk(P, 'word', bStart, P.L.b, [])) 2130 continue 2131 } 2132 // Bare word fragment 2133 const frag = parseBareWord(P) 2134 if (!frag) break 2135 // `NN#${...}` or `NN#$(...)` → (number (expansion|command_substitution)). 2136 // Grammar: number can be seq(/-?(0x)?[0-9]+#/, choice(expansion, cmd_sub)). 2137 // `10#${cmd}` must NOT be concatenation — it's a single number node with 2138 // the expansion as child. Detect here: frag ends with `#`, next is $ {/(. 2139 if ( 2140 frag.type === 'word' && 2141 /^-?(0x)?[0-9]+#$/.test(frag.text) && 2142 peek(P.L) === '$' && 2143 (peek(P.L, 1) === '{' || peek(P.L, 1) === '(') 2144 ) { 2145 const exp = parseDollarLike(P) 2146 if (exp) { 2147 // Prefix `NN#` is an anonymous pattern in grammar — only the 2148 // expansion/cmd_sub is a named child. 2149 parts.push(mk(P, 'number', frag.startIndex, exp.endIndex, [exp])) 2150 continue 2151 } 2152 } 2153 parts.push(frag) 2154 } 2155 if (parts.length === 0) return null 2156 if (parts.length === 1) return parts[0]! 2157 // Concatenation 2158 const first = parts[0]! 2159 const last = parts[parts.length - 1]! 2160 return mk(P, 'concatenation', first.startIndex, last.endIndex, parts) 2161} 2162 2163function parseBareWord(P: ParseState): TsNode | null { 2164 const start = P.L.b 2165 const startI = P.L.i 2166 while (P.L.i < P.L.len) { 2167 const c = peek(P.L) 2168 if (c === '\\') { 2169 if (P.L.i + 1 >= P.L.len) { 2170 // Trailing unpaired `\` at true EOF — tree-sitter emits word WITHOUT 2171 // the `\` plus a sibling ERROR node. Stop here; caller emits ERROR. 2172 break 2173 } 2174 const nx = P.L.src[P.L.i + 1] 2175 if (nx === '\n' || (nx === '\r' && P.L.src[P.L.i + 2] === '\n')) { 2176 // Line continuation BREAKS the word (tree-sitter quirk) — handles \r?\n 2177 break 2178 } 2179 advance(P.L) 2180 advance(P.L) 2181 continue 2182 } 2183 if ( 2184 c === ' ' || 2185 c === '\t' || 2186 c === '\n' || 2187 c === '\r' || 2188 c === '' || 2189 c === '|' || 2190 c === '&' || 2191 c === ';' || 2192 c === '(' || 2193 c === ')' || 2194 c === '<' || 2195 c === '>' || 2196 c === '"' || 2197 c === "'" || 2198 c === '$' || 2199 c === '`' || 2200 c === '{' || 2201 c === '}' || 2202 c === '[' || 2203 c === ']' 2204 ) { 2205 break 2206 } 2207 advance(P.L) 2208 } 2209 if (P.L.b === start) return null 2210 const text = P.src.slice(startI, P.L.i) 2211 const type = /^-?\d+$/.test(text) ? 'number' : 'word' 2212 return mk(P, type, start, P.L.b, []) 2213} 2214 2215function tryParseBraceExpr(P: ParseState): TsNode | null { 2216 // {N..M} where N, M are numbers or single chars 2217 const save = saveLex(P.L) 2218 if (peek(P.L) !== '{') return null 2219 const oStart = P.L.b 2220 advance(P.L) 2221 const oEnd = P.L.b 2222 // First part 2223 const p1Start = P.L.b 2224 while (isDigit(peek(P.L)) || isIdentStart(peek(P.L))) advance(P.L) 2225 const p1End = P.L.b 2226 if (p1End === p1Start || peek(P.L) !== '.' || peek(P.L, 1) !== '.') { 2227 restoreLex(P.L, save) 2228 return null 2229 } 2230 const dotStart = P.L.b 2231 advance(P.L) 2232 advance(P.L) 2233 const dotEnd = P.L.b 2234 const p2Start = P.L.b 2235 while (isDigit(peek(P.L)) || isIdentStart(peek(P.L))) advance(P.L) 2236 const p2End = P.L.b 2237 if (p2End === p2Start || peek(P.L) !== '}') { 2238 restoreLex(P.L, save) 2239 return null 2240 } 2241 const cStart = P.L.b 2242 advance(P.L) 2243 const cEnd = P.L.b 2244 const p1Text = sliceBytes(P, p1Start, p1End) 2245 const p2Text = sliceBytes(P, p2Start, p2End) 2246 const p1IsNum = /^\d+$/.test(p1Text) 2247 const p2IsNum = /^\d+$/.test(p2Text) 2248 // Valid brace expression: both numbers OR both single chars. Mixed = reject. 2249 if (p1IsNum !== p2IsNum) { 2250 restoreLex(P.L, save) 2251 return null 2252 } 2253 if (!p1IsNum && (p1Text.length !== 1 || p2Text.length !== 1)) { 2254 restoreLex(P.L, save) 2255 return null 2256 } 2257 const p1Type = p1IsNum ? 'number' : 'word' 2258 const p2Type = p2IsNum ? 'number' : 'word' 2259 return mk(P, 'brace_expression', oStart, cEnd, [ 2260 mk(P, '{', oStart, oEnd, []), 2261 mk(P, p1Type, p1Start, p1End, []), 2262 mk(P, '..', dotStart, dotEnd, []), 2263 mk(P, p2Type, p2Start, p2End, []), 2264 mk(P, '}', cStart, cEnd, []), 2265 ]) 2266} 2267 2268function tryParseBraceLikeCat(P: ParseState): TsNode[] | null { 2269 // {a,b,c} or {} → split into word fragments like tree-sitter does 2270 if (peek(P.L) !== '{') return null 2271 const oStart = P.L.b 2272 advance(P.L) 2273 const oEnd = P.L.b 2274 const inner: TsNode[] = [mk(P, 'word', oStart, oEnd, [])] 2275 while (P.L.i < P.L.len) { 2276 const bc = peek(P.L) 2277 // SECURITY: stop at command terminators so `{foo;rm x` splits correctly. 2278 if ( 2279 bc === '}' || 2280 bc === '\n' || 2281 bc === ';' || 2282 bc === '|' || 2283 bc === '&' || 2284 bc === ' ' || 2285 bc === '\t' || 2286 bc === '<' || 2287 bc === '>' || 2288 bc === '(' || 2289 bc === ')' 2290 ) { 2291 break 2292 } 2293 // `[` and `]` are single-char words: {o[k]} → { o [ k ] } 2294 if (bc === '[' || bc === ']') { 2295 const bStart = P.L.b 2296 advance(P.L) 2297 inner.push(mk(P, 'word', bStart, P.L.b, [])) 2298 continue 2299 } 2300 const midStart = P.L.b 2301 while (P.L.i < P.L.len) { 2302 const mc = peek(P.L) 2303 if ( 2304 mc === '}' || 2305 mc === '\n' || 2306 mc === ';' || 2307 mc === '|' || 2308 mc === '&' || 2309 mc === ' ' || 2310 mc === '\t' || 2311 mc === '<' || 2312 mc === '>' || 2313 mc === '(' || 2314 mc === ')' || 2315 mc === '[' || 2316 mc === ']' 2317 ) { 2318 break 2319 } 2320 advance(P.L) 2321 } 2322 const midEnd = P.L.b 2323 if (midEnd > midStart) { 2324 const midText = sliceBytes(P, midStart, midEnd) 2325 const midType = /^-?\d+$/.test(midText) ? 'number' : 'word' 2326 inner.push(mk(P, midType, midStart, midEnd, [])) 2327 } else { 2328 break 2329 } 2330 } 2331 if (peek(P.L) === '}') { 2332 const cStart = P.L.b 2333 advance(P.L) 2334 inner.push(mk(P, 'word', cStart, P.L.b, [])) 2335 } 2336 return inner 2337} 2338 2339function parseDoubleQuoted(P: ParseState): TsNode { 2340 const qStart = P.L.b 2341 advance(P.L) 2342 const qEnd = P.L.b 2343 const openQ = mk(P, '"', qStart, qEnd, []) 2344 const parts: TsNode[] = [openQ] 2345 let contentStart = P.L.b 2346 let contentStartI = P.L.i 2347 const flushContent = (): void => { 2348 if (P.L.b > contentStart) { 2349 // Tree-sitter's extras rule /\s/ has higher precedence than 2350 // string_content (prec -1), so whitespace-only segments are elided. 2351 // `" ${x} "` → (string (expansion)) not (string (string_content)(expansion)(string_content)). 2352 // Note: this intentionally diverges from preserving all content — cc 2353 // tests relying on whitespace-only string_content need updating 2354 // (CCReconcile). 2355 const txt = P.src.slice(contentStartI, P.L.i) 2356 if (!/^[ \t]+$/.test(txt)) { 2357 parts.push(mk(P, 'string_content', contentStart, P.L.b, [])) 2358 } 2359 } 2360 } 2361 while (P.L.i < P.L.len) { 2362 const c = peek(P.L) 2363 if (c === '"') break 2364 if (c === '\\' && P.L.i + 1 < P.L.len) { 2365 advance(P.L) 2366 advance(P.L) 2367 continue 2368 } 2369 if (c === '\n') { 2370 // Split string_content at newline 2371 flushContent() 2372 advance(P.L) 2373 contentStart = P.L.b 2374 contentStartI = P.L.i 2375 continue 2376 } 2377 if (c === '$') { 2378 const c1 = peek(P.L, 1) 2379 if ( 2380 c1 === '(' || 2381 c1 === '{' || 2382 isIdentStart(c1) || 2383 SPECIAL_VARS.has(c1) || 2384 isDigit(c1) 2385 ) { 2386 flushContent() 2387 const exp = parseDollarLike(P) 2388 if (exp) parts.push(exp) 2389 contentStart = P.L.b 2390 contentStartI = P.L.i 2391 continue 2392 } 2393 // Bare $ not at end-of-string: tree-sitter emits it as an anonymous 2394 // '$' token, which splits string_content. $ immediately before the 2395 // closing " is absorbed into the preceding string_content. 2396 if (c1 !== '"' && c1 !== '') { 2397 flushContent() 2398 const dS = P.L.b 2399 advance(P.L) 2400 parts.push(mk(P, '$', dS, P.L.b, [])) 2401 contentStart = P.L.b 2402 contentStartI = P.L.i 2403 continue 2404 } 2405 } 2406 if (c === '`') { 2407 flushContent() 2408 const bt = parseBacktick(P) 2409 if (bt) parts.push(bt) 2410 contentStart = P.L.b 2411 contentStartI = P.L.i 2412 continue 2413 } 2414 advance(P.L) 2415 } 2416 flushContent() 2417 let close: TsNode 2418 if (peek(P.L) === '"') { 2419 const cStart = P.L.b 2420 advance(P.L) 2421 close = mk(P, '"', cStart, P.L.b, []) 2422 } else { 2423 close = mk(P, '"', P.L.b, P.L.b, []) 2424 } 2425 parts.push(close) 2426 return mk(P, 'string', qStart, close.endIndex, parts) 2427} 2428 2429function parseDollarLike(P: ParseState): TsNode | null { 2430 const c1 = peek(P.L, 1) 2431 const dStart = P.L.b 2432 if (c1 === '(' && peek(P.L, 2) === '(') { 2433 // $(( arithmetic )) 2434 advance(P.L) 2435 advance(P.L) 2436 advance(P.L) 2437 const open = mk(P, '$((', dStart, P.L.b, []) 2438 const exprs = parseArithCommaList(P, '))', 'var') 2439 skipBlanks(P.L) 2440 let close: TsNode 2441 if (peek(P.L) === ')' && peek(P.L, 1) === ')') { 2442 const cStart = P.L.b 2443 advance(P.L) 2444 advance(P.L) 2445 close = mk(P, '))', cStart, P.L.b, []) 2446 } else { 2447 close = mk(P, '))', P.L.b, P.L.b, []) 2448 } 2449 return mk(P, 'arithmetic_expansion', dStart, close.endIndex, [ 2450 open, 2451 ...exprs, 2452 close, 2453 ]) 2454 } 2455 if (c1 === '[') { 2456 // $[ arithmetic ] — legacy bash syntax, same as $((...)) 2457 advance(P.L) 2458 advance(P.L) 2459 const open = mk(P, '$[', dStart, P.L.b, []) 2460 const exprs = parseArithCommaList(P, ']', 'var') 2461 skipBlanks(P.L) 2462 let close: TsNode 2463 if (peek(P.L) === ']') { 2464 const cStart = P.L.b 2465 advance(P.L) 2466 close = mk(P, ']', cStart, P.L.b, []) 2467 } else { 2468 close = mk(P, ']', P.L.b, P.L.b, []) 2469 } 2470 return mk(P, 'arithmetic_expansion', dStart, close.endIndex, [ 2471 open, 2472 ...exprs, 2473 close, 2474 ]) 2475 } 2476 if (c1 === '(') { 2477 advance(P.L) 2478 advance(P.L) 2479 const open = mk(P, '$(', dStart, P.L.b, []) 2480 let body = parseStatements(P, ')') 2481 skipBlanks(P.L) 2482 let close: TsNode 2483 if (peek(P.L) === ')') { 2484 const cStart = P.L.b 2485 advance(P.L) 2486 close = mk(P, ')', cStart, P.L.b, []) 2487 } else { 2488 close = mk(P, ')', P.L.b, P.L.b, []) 2489 } 2490 // $(< file) shorthand: unwrap redirected_statement → bare file_redirect 2491 // tree-sitter emits (command_substitution (file_redirect (word))) directly 2492 if ( 2493 body.length === 1 && 2494 body[0]!.type === 'redirected_statement' && 2495 body[0]!.children.length === 1 && 2496 body[0]!.children[0]!.type === 'file_redirect' 2497 ) { 2498 body = body[0]!.children 2499 } 2500 return mk(P, 'command_substitution', dStart, close.endIndex, [ 2501 open, 2502 ...body, 2503 close, 2504 ]) 2505 } 2506 if (c1 === '{') { 2507 advance(P.L) 2508 advance(P.L) 2509 const open = mk(P, '${', dStart, P.L.b, []) 2510 const inner = parseExpansionBody(P) 2511 let close: TsNode 2512 if (peek(P.L) === '}') { 2513 const cStart = P.L.b 2514 advance(P.L) 2515 close = mk(P, '}', cStart, P.L.b, []) 2516 } else { 2517 close = mk(P, '}', P.L.b, P.L.b, []) 2518 } 2519 return mk(P, 'expansion', dStart, close.endIndex, [open, ...inner, close]) 2520 } 2521 // Simple expansion $VAR or $? $$ $@ etc 2522 advance(P.L) 2523 const dEnd = P.L.b 2524 const dollar = mk(P, '$', dStart, dEnd, []) 2525 const nc = peek(P.L) 2526 // $_ is special_variable_name only when not followed by more ident chars 2527 if (nc === '_' && !isIdentChar(peek(P.L, 1))) { 2528 const vStart = P.L.b 2529 advance(P.L) 2530 const vn = mk(P, 'special_variable_name', vStart, P.L.b, []) 2531 return mk(P, 'simple_expansion', dStart, P.L.b, [dollar, vn]) 2532 } 2533 if (isIdentStart(nc)) { 2534 const vStart = P.L.b 2535 while (isIdentChar(peek(P.L))) advance(P.L) 2536 const vn = mk(P, 'variable_name', vStart, P.L.b, []) 2537 return mk(P, 'simple_expansion', dStart, P.L.b, [dollar, vn]) 2538 } 2539 if (isDigit(nc)) { 2540 const vStart = P.L.b 2541 advance(P.L) 2542 const vn = mk(P, 'variable_name', vStart, P.L.b, []) 2543 return mk(P, 'simple_expansion', dStart, P.L.b, [dollar, vn]) 2544 } 2545 if (SPECIAL_VARS.has(nc)) { 2546 const vStart = P.L.b 2547 advance(P.L) 2548 const vn = mk(P, 'special_variable_name', vStart, P.L.b, []) 2549 return mk(P, 'simple_expansion', dStart, P.L.b, [dollar, vn]) 2550 } 2551 // Bare $ — just a $ leaf (tree-sitter treats trailing $ as literal) 2552 return dollar 2553} 2554 2555function parseExpansionBody(P: ParseState): TsNode[] { 2556 const out: TsNode[] = [] 2557 skipBlanks(P.L) 2558 // Bizarre cases: ${#!} ${!#} ${!##} ${!# } ${!## } all emit empty (expansion) 2559 // — both # and ! become anonymous nodes when only combined with each other 2560 // and optional trailing space before }. Note ${!##/} does NOT match (has 2561 // content after), so it parses normally as (special_variable_name)(regex). 2562 { 2563 const c0 = peek(P.L) 2564 const c1 = peek(P.L, 1) 2565 if (c0 === '#' && c1 === '!' && peek(P.L, 2) === '}') { 2566 advance(P.L) 2567 advance(P.L) 2568 return out 2569 } 2570 if (c0 === '!' && c1 === '#') { 2571 // ${!#} ${!##} with optional trailing space then } 2572 let j = 2 2573 if (peek(P.L, j) === '#') j++ 2574 if (peek(P.L, j) === ' ') j++ 2575 if (peek(P.L, j) === '}') { 2576 while (j-- > 0) advance(P.L) 2577 return out 2578 } 2579 } 2580 } 2581 // Optional # prefix for length 2582 if (peek(P.L) === '#') { 2583 const s = P.L.b 2584 advance(P.L) 2585 out.push(mk(P, '#', s, P.L.b, [])) 2586 } 2587 // Optional ! prefix for indirect expansion: ${!varname} ${!prefix*} ${!prefix@} 2588 // Only when followed by an identifier — ${!} alone is special var $! 2589 // Also = ~ prefixes (zsh-style ${=var} ${~var}) 2590 const pc = peek(P.L) 2591 if ( 2592 (pc === '!' || pc === '=' || pc === '~') && 2593 (isIdentStart(peek(P.L, 1)) || isDigit(peek(P.L, 1))) 2594 ) { 2595 const s = P.L.b 2596 advance(P.L) 2597 out.push(mk(P, pc, s, P.L.b, [])) 2598 } 2599 skipBlanks(P.L) 2600 // Variable name 2601 if (isIdentStart(peek(P.L))) { 2602 const s = P.L.b 2603 while (isIdentChar(peek(P.L))) advance(P.L) 2604 out.push(mk(P, 'variable_name', s, P.L.b, [])) 2605 } else if (isDigit(peek(P.L))) { 2606 const s = P.L.b 2607 while (isDigit(peek(P.L))) advance(P.L) 2608 out.push(mk(P, 'variable_name', s, P.L.b, [])) 2609 } else if (SPECIAL_VARS.has(peek(P.L))) { 2610 const s = P.L.b 2611 advance(P.L) 2612 out.push(mk(P, 'special_variable_name', s, P.L.b, [])) 2613 } 2614 // Optional subscript [idx] — parsed arithmetically 2615 if (peek(P.L) === '[') { 2616 const varNode = out[out.length - 1] 2617 const brOpen = P.L.b 2618 advance(P.L) 2619 const brOpenNode = mk(P, '[', brOpen, P.L.b, []) 2620 const idx = parseSubscriptIndexInline(P) 2621 skipBlanks(P.L) 2622 const brClose = P.L.b 2623 if (peek(P.L) === ']') advance(P.L) 2624 const brCloseNode = mk(P, ']', brClose, P.L.b, []) 2625 if (varNode) { 2626 const kids = idx 2627 ? [varNode, brOpenNode, idx, brCloseNode] 2628 : [varNode, brOpenNode, brCloseNode] 2629 out[out.length - 1] = mk(P, 'subscript', varNode.startIndex, P.L.b, kids) 2630 } 2631 } 2632 skipBlanks(P.L) 2633 // Trailing * or @ for indirect expansion (${!prefix*} ${!prefix@}) or 2634 // @operator for parameter transformation (${var@U} ${var@Q}) — anonymous 2635 const tc = peek(P.L) 2636 if ((tc === '*' || tc === '@') && peek(P.L, 1) === '}') { 2637 const s = P.L.b 2638 advance(P.L) 2639 out.push(mk(P, tc, s, P.L.b, [])) 2640 return out 2641 } 2642 if (tc === '@' && isIdentStart(peek(P.L, 1))) { 2643 // ${var@U} transformation — @ is anonymous, consume op char(s) 2644 const s = P.L.b 2645 advance(P.L) 2646 out.push(mk(P, '@', s, P.L.b, [])) 2647 while (isIdentChar(peek(P.L))) advance(P.L) 2648 return out 2649 } 2650 // Operator :- := :? :+ - = ? + # ## % %% / // ^ ^^ , ,, etc. 2651 const c = peek(P.L) 2652 // Bare `:` substring operator ${var:off:len} — offset and length parsed 2653 // arithmetically. Must come BEFORE the generic operator handling so `(` after 2654 // `:` goes to parenthesized_expression not the array path. `:-` `:=` `:?` 2655 // `:+` (no space) remain default-value operators; `: -1` (with space before 2656 // -1) is substring with negative offset. 2657 if (c === ':') { 2658 const c1 = peek(P.L, 1) 2659 // `:\n` or `:}` — empty substring expansion, emits nothing (variable_name only) 2660 if (c1 === '\n' || c1 === '}') { 2661 advance(P.L) 2662 while (peek(P.L) === '\n') advance(P.L) 2663 return out 2664 } 2665 if (c1 !== '-' && c1 !== '=' && c1 !== '?' && c1 !== '+') { 2666 advance(P.L) 2667 skipBlanks(P.L) 2668 // Offset — arithmetic. `-N` at top level is a single number node per 2669 // tree-sitter; inside parens it's unary_expression(number). 2670 const offC = peek(P.L) 2671 let off: TsNode | null 2672 if (offC === '-' && isDigit(peek(P.L, 1))) { 2673 const ns = P.L.b 2674 advance(P.L) 2675 while (isDigit(peek(P.L))) advance(P.L) 2676 off = mk(P, 'number', ns, P.L.b, []) 2677 } else { 2678 off = parseArithExpr(P, ':}', 'var') 2679 } 2680 if (off) out.push(off) 2681 skipBlanks(P.L) 2682 if (peek(P.L) === ':') { 2683 advance(P.L) 2684 skipBlanks(P.L) 2685 const lenC = peek(P.L) 2686 let len: TsNode | null 2687 if (lenC === '-' && isDigit(peek(P.L, 1))) { 2688 const ns = P.L.b 2689 advance(P.L) 2690 while (isDigit(peek(P.L))) advance(P.L) 2691 len = mk(P, 'number', ns, P.L.b, []) 2692 } else { 2693 len = parseArithExpr(P, '}', 'var') 2694 } 2695 if (len) out.push(len) 2696 } 2697 return out 2698 } 2699 } 2700 if ( 2701 c === ':' || 2702 c === '#' || 2703 c === '%' || 2704 c === '/' || 2705 c === '^' || 2706 c === ',' || 2707 c === '-' || 2708 c === '=' || 2709 c === '?' || 2710 c === '+' 2711 ) { 2712 const s = P.L.b 2713 const c1 = peek(P.L, 1) 2714 let op = c 2715 if (c === ':' && (c1 === '-' || c1 === '=' || c1 === '?' || c1 === '+')) { 2716 advance(P.L) 2717 advance(P.L) 2718 op = c + c1 2719 } else if ( 2720 (c === '#' || c === '%' || c === '/' || c === '^' || c === ',') && 2721 c1 === c 2722 ) { 2723 // Doubled operators: ## %% // ^^ ,, 2724 advance(P.L) 2725 advance(P.L) 2726 op = c + c 2727 } else { 2728 advance(P.L) 2729 } 2730 out.push(mk(P, op, s, P.L.b, [])) 2731 // Rest is the default/replacement — parse as word or regex until } 2732 // Pattern-matching operators (# ## % %% / // ^ ^^ , ,,) emit regex; 2733 // value-substitution operators (:- := :? :+ - = ? + :) emit word. 2734 // `/` and `//` split at next `/` into (regex)+(word) for pat/repl. 2735 const isPattern = 2736 op === '#' || 2737 op === '##' || 2738 op === '%' || 2739 op === '%%' || 2740 op === '/' || 2741 op === '//' || 2742 op === '^' || 2743 op === '^^' || 2744 op === ',' || 2745 op === ',,' 2746 if (op === '/' || op === '//') { 2747 // Optional /# or /% anchor prefix — anonymous node 2748 const ac = peek(P.L) 2749 if (ac === '#' || ac === '%') { 2750 const aStart = P.L.b 2751 advance(P.L) 2752 out.push(mk(P, ac, aStart, P.L.b, [])) 2753 } 2754 // Pattern: per grammar _expansion_regex_replacement, pattern is 2755 // choice(regex, string, cmd_sub, seq(string, regex)). If it STARTS 2756 // with ", emit (string) and any trailing chars become (regex). 2757 // `${v//"${old}"/}` → (string(expansion)); `${v//"${c}"\//}` → 2758 // (string)(regex). 2759 if (peek(P.L) === '"') { 2760 out.push(parseDoubleQuoted(P)) 2761 const tail = parseExpansionRest(P, 'regex', true) 2762 if (tail) out.push(tail) 2763 } else { 2764 const regex = parseExpansionRest(P, 'regex', true) 2765 if (regex) out.push(regex) 2766 } 2767 if (peek(P.L) === '/') { 2768 const sepStart = P.L.b 2769 advance(P.L) 2770 out.push(mk(P, '/', sepStart, P.L.b, [])) 2771 // Replacement: per grammar, choice includes `seq(cmd_sub, word)` 2772 // which emits TWO siblings (not concatenation). Also `(` at start 2773 // of replacement is a regular word char, NOT array — unlike `:-` 2774 // default-value context. `${v/(/(Gentoo ${x}, }` replacement 2775 // `(Gentoo ${x}, ` is (concatenation (word)(expansion)(word)). 2776 const repl = parseExpansionRest(P, 'replword', false) 2777 if (repl) { 2778 // seq(cmd_sub, word) special case → siblings. Detected when 2779 // replacement is a concatenation of exactly 2 parts with first 2780 // being command_substitution. 2781 if ( 2782 repl.type === 'concatenation' && 2783 repl.children.length === 2 && 2784 repl.children[0]!.type === 'command_substitution' 2785 ) { 2786 out.push(repl.children[0]!) 2787 out.push(repl.children[1]!) 2788 } else { 2789 out.push(repl) 2790 } 2791 } 2792 } 2793 } else if (op === '#' || op === '##' || op === '%' || op === '%%') { 2794 // Pattern-removal: per grammar _expansion_regex, pattern is 2795 // repeat(choice(regex, string, raw_string, ')')). Each quote/string 2796 // is a SIBLING, not absorbed into one regex. `${f%'str'*}` → 2797 // (raw_string)(regex); `${f/'str'*}` (slash) stays single regex. 2798 for (const p of parseExpansionRegexSegmented(P)) out.push(p) 2799 } else { 2800 const rest = parseExpansionRest(P, isPattern ? 'regex' : 'word', false) 2801 if (rest) out.push(rest) 2802 } 2803 } 2804 return out 2805} 2806 2807function parseExpansionRest( 2808 P: ParseState, 2809 nodeType: string, 2810 stopAtSlash: boolean, 2811): TsNode | null { 2812 // Don't skipBlanks — `${var:- }` space IS the word. Stop at } or newline 2813 // (`${var:\n}` emits no word). stopAtSlash=true stops at `/` for pat/repl 2814 // split in ${var/pat/repl}. nodeType 'replword' is word-mode for the 2815 // replacement in `/` `//` — same as 'word' but `(` is NOT array. 2816 const start = P.L.b 2817 // Value-substitution RHS starting with `(` parses as array: ${var:-(x)} → 2818 // (expansion (variable_name) (array (word))). Only for 'word' context (not 2819 // pattern-matching operators which emit regex, and not 'replword' where `(` 2820 // is a regular char per grammar `_expansion_regex_replacement`). 2821 if (nodeType === 'word' && peek(P.L) === '(') { 2822 advance(P.L) 2823 const open = mk(P, '(', start, P.L.b, []) 2824 const elems: TsNode[] = [open] 2825 while (P.L.i < P.L.len) { 2826 skipBlanks(P.L) 2827 const c = peek(P.L) 2828 if (c === ')' || c === '}' || c === '\n' || c === '') break 2829 const wStart = P.L.b 2830 while (P.L.i < P.L.len) { 2831 const wc = peek(P.L) 2832 if ( 2833 wc === ')' || 2834 wc === '}' || 2835 wc === ' ' || 2836 wc === '\t' || 2837 wc === '\n' || 2838 wc === '' 2839 ) { 2840 break 2841 } 2842 advance(P.L) 2843 } 2844 if (P.L.b > wStart) elems.push(mk(P, 'word', wStart, P.L.b, [])) 2845 else break 2846 } 2847 if (peek(P.L) === ')') { 2848 const cStart = P.L.b 2849 advance(P.L) 2850 elems.push(mk(P, ')', cStart, P.L.b, [])) 2851 } 2852 while (peek(P.L) === '\n') advance(P.L) 2853 return mk(P, 'array', start, P.L.b, elems) 2854 } 2855 // REGEX mode: flat single-span scan. Quotes are opaque (skipped past so 2856 // `/` inside them doesn't break stopAtSlash), but NOT emitted as separate 2857 // nodes — the entire range becomes one regex node. 2858 if (nodeType === 'regex') { 2859 let braceDepth = 0 2860 while (P.L.i < P.L.len) { 2861 const c = peek(P.L) 2862 if (c === '\n') break 2863 if (braceDepth === 0) { 2864 if (c === '}') break 2865 if (stopAtSlash && c === '/') break 2866 } 2867 if (c === '\\' && P.L.i + 1 < P.L.len) { 2868 advance(P.L) 2869 advance(P.L) 2870 continue 2871 } 2872 if (c === '"' || c === "'") { 2873 advance(P.L) 2874 while (P.L.i < P.L.len && peek(P.L) !== c) { 2875 if (peek(P.L) === '\\' && P.L.i + 1 < P.L.len) advance(P.L) 2876 advance(P.L) 2877 } 2878 if (peek(P.L) === c) advance(P.L) 2879 continue 2880 } 2881 // Skip past nested ${...} $(...) $[...] so their } / don't terminate us 2882 if (c === '$') { 2883 const c1 = peek(P.L, 1) 2884 if (c1 === '{') { 2885 let d = 0 2886 advance(P.L) 2887 advance(P.L) 2888 d++ 2889 while (P.L.i < P.L.len && d > 0) { 2890 const nc = peek(P.L) 2891 if (nc === '{') d++ 2892 else if (nc === '}') d-- 2893 advance(P.L) 2894 } 2895 continue 2896 } 2897 if (c1 === '(') { 2898 let d = 0 2899 advance(P.L) 2900 advance(P.L) 2901 d++ 2902 while (P.L.i < P.L.len && d > 0) { 2903 const nc = peek(P.L) 2904 if (nc === '(') d++ 2905 else if (nc === ')') d-- 2906 advance(P.L) 2907 } 2908 continue 2909 } 2910 } 2911 if (c === '{') braceDepth++ 2912 else if (c === '}' && braceDepth > 0) braceDepth-- 2913 advance(P.L) 2914 } 2915 const end = P.L.b 2916 while (peek(P.L) === '\n') advance(P.L) 2917 if (end === start) return null 2918 return mk(P, 'regex', start, end, []) 2919 } 2920 // WORD mode: segmenting parser — recognize nested ${...}, $(...), $'...', 2921 // "...", '...', $ident, <(...)/>(...); bare chars accumulate into word 2922 // segments. Multiple parts → wrapped in concatenation. 2923 const parts: TsNode[] = [] 2924 let segStart = P.L.b 2925 let braceDepth = 0 2926 const flushSeg = (): void => { 2927 if (P.L.b > segStart) { 2928 parts.push(mk(P, 'word', segStart, P.L.b, [])) 2929 } 2930 } 2931 while (P.L.i < P.L.len) { 2932 const c = peek(P.L) 2933 if (c === '\n') break 2934 if (braceDepth === 0) { 2935 if (c === '}') break 2936 if (stopAtSlash && c === '/') break 2937 } 2938 if (c === '\\' && P.L.i + 1 < P.L.len) { 2939 advance(P.L) 2940 advance(P.L) 2941 continue 2942 } 2943 const c1 = peek(P.L, 1) 2944 if (c === '$') { 2945 if (c1 === '{' || c1 === '(' || c1 === '[') { 2946 flushSeg() 2947 const exp = parseDollarLike(P) 2948 if (exp) parts.push(exp) 2949 segStart = P.L.b 2950 continue 2951 } 2952 if (c1 === "'") { 2953 // $'...' ANSI-C string 2954 flushSeg() 2955 const aStart = P.L.b 2956 advance(P.L) 2957 advance(P.L) 2958 while (P.L.i < P.L.len && peek(P.L) !== "'") { 2959 if (peek(P.L) === '\\' && P.L.i + 1 < P.L.len) advance(P.L) 2960 advance(P.L) 2961 } 2962 if (peek(P.L) === "'") advance(P.L) 2963 parts.push(mk(P, 'ansi_c_string', aStart, P.L.b, [])) 2964 segStart = P.L.b 2965 continue 2966 } 2967 if (isIdentStart(c1) || isDigit(c1) || SPECIAL_VARS.has(c1)) { 2968 flushSeg() 2969 const exp = parseDollarLike(P) 2970 if (exp) parts.push(exp) 2971 segStart = P.L.b 2972 continue 2973 } 2974 } 2975 if (c === '"') { 2976 flushSeg() 2977 parts.push(parseDoubleQuoted(P)) 2978 segStart = P.L.b 2979 continue 2980 } 2981 if (c === "'") { 2982 flushSeg() 2983 const rStart = P.L.b 2984 advance(P.L) 2985 while (P.L.i < P.L.len && peek(P.L) !== "'") advance(P.L) 2986 if (peek(P.L) === "'") advance(P.L) 2987 parts.push(mk(P, 'raw_string', rStart, P.L.b, [])) 2988 segStart = P.L.b 2989 continue 2990 } 2991 if ((c === '<' || c === '>') && c1 === '(') { 2992 flushSeg() 2993 const ps = parseProcessSub(P) 2994 if (ps) parts.push(ps) 2995 segStart = P.L.b 2996 continue 2997 } 2998 if (c === '`') { 2999 flushSeg() 3000 const bt = parseBacktick(P) 3001 if (bt) parts.push(bt) 3002 segStart = P.L.b 3003 continue 3004 } 3005 // Brace tracking so nested {a,b} brace-expansion chars don't prematurely 3006 // terminate (rare, but the `?` in `${cond}? (` should be treated as word). 3007 if (c === '{') braceDepth++ 3008 else if (c === '}' && braceDepth > 0) braceDepth-- 3009 advance(P.L) 3010 } 3011 flushSeg() 3012 // Consume trailing newlines before } so caller sees } 3013 while (peek(P.L) === '\n') advance(P.L) 3014 // Tree-sitter skips leading whitespace (extras) in expansion RHS when 3015 // there's content after: `${2+ ${2}}` → just (expansion). But `${v:- }` 3016 // (space-only RHS) keeps the space as (word). So drop leading whitespace- 3017 // only word segment if it's NOT the only part. 3018 if ( 3019 parts.length > 1 && 3020 parts[0]!.type === 'word' && 3021 /^[ \t]+$/.test(parts[0]!.text) 3022 ) { 3023 parts.shift() 3024 } 3025 if (parts.length === 0) return null 3026 if (parts.length === 1) return parts[0]! 3027 // Multiple parts: wrap in concatenation (word mode keeps concat wrapping; 3028 // regex mode also concats per tree-sitter for mixed quote+glob patterns). 3029 const last = parts[parts.length - 1]! 3030 return mk(P, 'concatenation', parts[0]!.startIndex, last.endIndex, parts) 3031} 3032 3033// Pattern for # ## % %% operators — per grammar _expansion_regex: 3034// repeat(choice(regex, string, raw_string, ')', /\s+/→regex)). Each quote 3035// becomes a SIBLING node, not absorbed. `${f%'str'*}` → (raw_string)(regex). 3036function parseExpansionRegexSegmented(P: ParseState): TsNode[] { 3037 const out: TsNode[] = [] 3038 let segStart = P.L.b 3039 const flushRegex = (): void => { 3040 if (P.L.b > segStart) out.push(mk(P, 'regex', segStart, P.L.b, [])) 3041 } 3042 while (P.L.i < P.L.len) { 3043 const c = peek(P.L) 3044 if (c === '}' || c === '\n') break 3045 if (c === '\\' && P.L.i + 1 < P.L.len) { 3046 advance(P.L) 3047 advance(P.L) 3048 continue 3049 } 3050 if (c === '"') { 3051 flushRegex() 3052 out.push(parseDoubleQuoted(P)) 3053 segStart = P.L.b 3054 continue 3055 } 3056 if (c === "'") { 3057 flushRegex() 3058 const rStart = P.L.b 3059 advance(P.L) 3060 while (P.L.i < P.L.len && peek(P.L) !== "'") advance(P.L) 3061 if (peek(P.L) === "'") advance(P.L) 3062 out.push(mk(P, 'raw_string', rStart, P.L.b, [])) 3063 segStart = P.L.b 3064 continue 3065 } 3066 // Nested ${...} $(...) — opaque scan so their } doesn't terminate us 3067 if (c === '$') { 3068 const c1 = peek(P.L, 1) 3069 if (c1 === '{') { 3070 let d = 1 3071 advance(P.L) 3072 advance(P.L) 3073 while (P.L.i < P.L.len && d > 0) { 3074 const nc = peek(P.L) 3075 if (nc === '{') d++ 3076 else if (nc === '}') d-- 3077 advance(P.L) 3078 } 3079 continue 3080 } 3081 if (c1 === '(') { 3082 let d = 1 3083 advance(P.L) 3084 advance(P.L) 3085 while (P.L.i < P.L.len && d > 0) { 3086 const nc = peek(P.L) 3087 if (nc === '(') d++ 3088 else if (nc === ')') d-- 3089 advance(P.L) 3090 } 3091 continue 3092 } 3093 } 3094 advance(P.L) 3095 } 3096 flushRegex() 3097 while (peek(P.L) === '\n') advance(P.L) 3098 return out 3099} 3100 3101function parseBacktick(P: ParseState): TsNode | null { 3102 const start = P.L.b 3103 advance(P.L) 3104 const open = mk(P, '`', start, P.L.b, []) 3105 P.inBacktick++ 3106 // Parse statements inline — stop at closing backtick 3107 const body: TsNode[] = [] 3108 while (true) { 3109 skipBlanks(P.L) 3110 if (peek(P.L) === '`' || peek(P.L) === '') break 3111 const save = saveLex(P.L) 3112 const t = nextToken(P.L, 'cmd') 3113 if (t.type === 'EOF' || t.type === 'BACKTICK') { 3114 restoreLex(P.L, save) 3115 break 3116 } 3117 if (t.type === 'NEWLINE') continue 3118 restoreLex(P.L, save) 3119 const stmt = parseAndOr(P) 3120 if (!stmt) break 3121 body.push(stmt) 3122 skipBlanks(P.L) 3123 if (peek(P.L) === '`') break 3124 const save2 = saveLex(P.L) 3125 const sep = nextToken(P.L, 'cmd') 3126 if (sep.type === 'OP' && (sep.value === ';' || sep.value === '&')) { 3127 body.push(leaf(P, sep.value, sep)) 3128 } else if (sep.type !== 'NEWLINE') { 3129 restoreLex(P.L, save2) 3130 } 3131 } 3132 P.inBacktick-- 3133 let close: TsNode 3134 if (peek(P.L) === '`') { 3135 const cStart = P.L.b 3136 advance(P.L) 3137 close = mk(P, '`', cStart, P.L.b, []) 3138 } else { 3139 close = mk(P, '`', P.L.b, P.L.b, []) 3140 } 3141 // Empty backticks (whitespace/newline only) are elided entirely by 3142 // tree-sitter — used as a line-continuation hack: "foo"`<newline>`"bar" 3143 // → (concatenation (string) (string)) with no command_substitution. 3144 if (body.length === 0) return null 3145 return mk(P, 'command_substitution', start, close.endIndex, [ 3146 open, 3147 ...body, 3148 close, 3149 ]) 3150} 3151 3152function parseIf(P: ParseState, ifTok: Token): TsNode { 3153 const ifKw = leaf(P, 'if', ifTok) 3154 const kids: TsNode[] = [ifKw] 3155 const cond = parseStatements(P, null) 3156 kids.push(...cond) 3157 consumeKeyword(P, 'then', kids) 3158 const body = parseStatements(P, null) 3159 kids.push(...body) 3160 while (true) { 3161 const save = saveLex(P.L) 3162 const t = nextToken(P.L, 'cmd') 3163 if (t.type === 'WORD' && t.value === 'elif') { 3164 const eKw = leaf(P, 'elif', t) 3165 const eCond = parseStatements(P, null) 3166 const eKids: TsNode[] = [eKw, ...eCond] 3167 consumeKeyword(P, 'then', eKids) 3168 const eBody = parseStatements(P, null) 3169 eKids.push(...eBody) 3170 const last = eKids[eKids.length - 1]! 3171 kids.push(mk(P, 'elif_clause', eKw.startIndex, last.endIndex, eKids)) 3172 } else if (t.type === 'WORD' && t.value === 'else') { 3173 const elKw = leaf(P, 'else', t) 3174 const elBody = parseStatements(P, null) 3175 const last = elBody.length > 0 ? elBody[elBody.length - 1]! : elKw 3176 kids.push( 3177 mk(P, 'else_clause', elKw.startIndex, last.endIndex, [elKw, ...elBody]), 3178 ) 3179 } else { 3180 restoreLex(P.L, save) 3181 break 3182 } 3183 } 3184 consumeKeyword(P, 'fi', kids) 3185 const last = kids[kids.length - 1]! 3186 return mk(P, 'if_statement', ifKw.startIndex, last.endIndex, kids) 3187} 3188 3189function parseWhile(P: ParseState, kwTok: Token): TsNode { 3190 const kw = leaf(P, kwTok.value, kwTok) 3191 const kids: TsNode[] = [kw] 3192 const cond = parseStatements(P, null) 3193 kids.push(...cond) 3194 const dg = parseDoGroup(P) 3195 if (dg) kids.push(dg) 3196 const last = kids[kids.length - 1]! 3197 return mk(P, 'while_statement', kw.startIndex, last.endIndex, kids) 3198} 3199 3200function parseFor(P: ParseState, forTok: Token): TsNode { 3201 const forKw = leaf(P, forTok.value, forTok) 3202 skipBlanks(P.L) 3203 // C-style for (( ; ; )) — only for `for`, not `select` 3204 if (forTok.value === 'for' && peek(P.L) === '(' && peek(P.L, 1) === '(') { 3205 const oStart = P.L.b 3206 advance(P.L) 3207 advance(P.L) 3208 const open = mk(P, '((', oStart, P.L.b, []) 3209 const kids: TsNode[] = [forKw, open] 3210 // init; cond; update — all three use 'assign' mode so `c = expr` emits 3211 // variable_assignment, while bare idents (c in `c<=5`) → word. Each 3212 // clause may be a comma-separated list. 3213 for (let k = 0; k < 3; k++) { 3214 skipBlanks(P.L) 3215 const es = parseArithCommaList(P, k < 2 ? ';' : '))', 'assign') 3216 kids.push(...es) 3217 if (k < 2) { 3218 if (peek(P.L) === ';') { 3219 const s = P.L.b 3220 advance(P.L) 3221 kids.push(mk(P, ';', s, P.L.b, [])) 3222 } 3223 } 3224 } 3225 skipBlanks(P.L) 3226 if (peek(P.L) === ')' && peek(P.L, 1) === ')') { 3227 const cStart = P.L.b 3228 advance(P.L) 3229 advance(P.L) 3230 kids.push(mk(P, '))', cStart, P.L.b, [])) 3231 } 3232 // Optional ; or newline 3233 const save = saveLex(P.L) 3234 const sep = nextToken(P.L, 'cmd') 3235 if (sep.type === 'OP' && sep.value === ';') { 3236 kids.push(leaf(P, ';', sep)) 3237 } else if (sep.type !== 'NEWLINE') { 3238 restoreLex(P.L, save) 3239 } 3240 const dg = parseDoGroup(P) 3241 if (dg) { 3242 kids.push(dg) 3243 } else { 3244 // C-style for can also use `{ ... }` body instead of `do ... done` 3245 skipNewlines(P) 3246 skipBlanks(P.L) 3247 if (peek(P.L) === '{') { 3248 const bOpen = P.L.b 3249 advance(P.L) 3250 const brace = mk(P, '{', bOpen, P.L.b, []) 3251 const body = parseStatements(P, '}') 3252 let bClose: TsNode 3253 if (peek(P.L) === '}') { 3254 const cs = P.L.b 3255 advance(P.L) 3256 bClose = mk(P, '}', cs, P.L.b, []) 3257 } else { 3258 bClose = mk(P, '}', P.L.b, P.L.b, []) 3259 } 3260 kids.push( 3261 mk(P, 'compound_statement', brace.startIndex, bClose.endIndex, [ 3262 brace, 3263 ...body, 3264 bClose, 3265 ]), 3266 ) 3267 } 3268 } 3269 const last = kids[kids.length - 1]! 3270 return mk(P, 'c_style_for_statement', forKw.startIndex, last.endIndex, kids) 3271 } 3272 // Regular for VAR in words; do ... done 3273 const kids: TsNode[] = [forKw] 3274 const varTok = nextToken(P.L, 'arg') 3275 kids.push(mk(P, 'variable_name', varTok.start, varTok.end, [])) 3276 skipBlanks(P.L) 3277 const save = saveLex(P.L) 3278 const inTok = nextToken(P.L, 'arg') 3279 if (inTok.type === 'WORD' && inTok.value === 'in') { 3280 kids.push(leaf(P, 'in', inTok)) 3281 while (true) { 3282 skipBlanks(P.L) 3283 const c = peek(P.L) 3284 if (c === ';' || c === '\n' || c === '') break 3285 const w = parseWord(P, 'arg') 3286 if (!w) break 3287 kids.push(w) 3288 } 3289 } else { 3290 restoreLex(P.L, save) 3291 } 3292 // Separator 3293 const save2 = saveLex(P.L) 3294 const sep = nextToken(P.L, 'cmd') 3295 if (sep.type === 'OP' && sep.value === ';') { 3296 kids.push(leaf(P, ';', sep)) 3297 } else if (sep.type !== 'NEWLINE') { 3298 restoreLex(P.L, save2) 3299 } 3300 const dg = parseDoGroup(P) 3301 if (dg) kids.push(dg) 3302 const last = kids[kids.length - 1]! 3303 return mk(P, 'for_statement', forKw.startIndex, last.endIndex, kids) 3304} 3305 3306function parseDoGroup(P: ParseState): TsNode | null { 3307 skipNewlines(P) 3308 const save = saveLex(P.L) 3309 const doTok = nextToken(P.L, 'cmd') 3310 if (doTok.type !== 'WORD' || doTok.value !== 'do') { 3311 restoreLex(P.L, save) 3312 return null 3313 } 3314 const doKw = leaf(P, 'do', doTok) 3315 const body = parseStatements(P, null) 3316 const kids: TsNode[] = [doKw, ...body] 3317 consumeKeyword(P, 'done', kids) 3318 const last = kids[kids.length - 1]! 3319 return mk(P, 'do_group', doKw.startIndex, last.endIndex, kids) 3320} 3321 3322function parseCase(P: ParseState, caseTok: Token): TsNode { 3323 const caseKw = leaf(P, 'case', caseTok) 3324 const kids: TsNode[] = [caseKw] 3325 skipBlanks(P.L) 3326 const word = parseWord(P, 'arg') 3327 if (word) kids.push(word) 3328 skipBlanks(P.L) 3329 consumeKeyword(P, 'in', kids) 3330 skipNewlines(P) 3331 while (true) { 3332 skipBlanks(P.L) 3333 skipNewlines(P) 3334 const save = saveLex(P.L) 3335 const t = nextToken(P.L, 'arg') 3336 if (t.type === 'WORD' && t.value === 'esac') { 3337 kids.push(leaf(P, 'esac', t)) 3338 break 3339 } 3340 if (t.type === 'EOF') break 3341 restoreLex(P.L, save) 3342 const item = parseCaseItem(P) 3343 if (!item) break 3344 kids.push(item) 3345 } 3346 const last = kids[kids.length - 1]! 3347 return mk(P, 'case_statement', caseKw.startIndex, last.endIndex, kids) 3348} 3349 3350function parseCaseItem(P: ParseState): TsNode | null { 3351 skipBlanks(P.L) 3352 const start = P.L.b 3353 const kids: TsNode[] = [] 3354 // Optional leading '(' before pattern — bash allows (pattern) syntax 3355 if (peek(P.L) === '(') { 3356 const s = P.L.b 3357 advance(P.L) 3358 kids.push(mk(P, '(', s, P.L.b, [])) 3359 } 3360 // Pattern(s) 3361 let isFirstAlt = true 3362 while (true) { 3363 skipBlanks(P.L) 3364 const c = peek(P.L) 3365 if (c === ')' || c === '') break 3366 const pats = parseCasePattern(P) 3367 if (pats.length === 0) break 3368 // tree-sitter quirk: first alternative with quotes is inlined as flat 3369 // siblings; subsequent alternatives are wrapped in (concatenation) with 3370 // `word` instead of `extglob_pattern` for bare segments. 3371 if (!isFirstAlt && pats.length > 1) { 3372 const rewritten = pats.map(p => 3373 p.type === 'extglob_pattern' 3374 ? mk(P, 'word', p.startIndex, p.endIndex, []) 3375 : p, 3376 ) 3377 const first = rewritten[0]! 3378 const last = rewritten[rewritten.length - 1]! 3379 kids.push( 3380 mk(P, 'concatenation', first.startIndex, last.endIndex, rewritten), 3381 ) 3382 } else { 3383 kids.push(...pats) 3384 } 3385 isFirstAlt = false 3386 skipBlanks(P.L) 3387 // \<newline> line continuation between alternatives 3388 if (peek(P.L) === '\\' && peek(P.L, 1) === '\n') { 3389 advance(P.L) 3390 advance(P.L) 3391 skipBlanks(P.L) 3392 } 3393 if (peek(P.L) === '|') { 3394 const s = P.L.b 3395 advance(P.L) 3396 kids.push(mk(P, '|', s, P.L.b, [])) 3397 // \<newline> after | is also a line continuation 3398 if (peek(P.L) === '\\' && peek(P.L, 1) === '\n') { 3399 advance(P.L) 3400 advance(P.L) 3401 } 3402 } else { 3403 break 3404 } 3405 } 3406 if (peek(P.L) === ')') { 3407 const s = P.L.b 3408 advance(P.L) 3409 kids.push(mk(P, ')', s, P.L.b, [])) 3410 } 3411 const body = parseStatements(P, null) 3412 kids.push(...body) 3413 const save = saveLex(P.L) 3414 const term = nextToken(P.L, 'cmd') 3415 if ( 3416 term.type === 'OP' && 3417 (term.value === ';;' || term.value === ';&' || term.value === ';;&') 3418 ) { 3419 kids.push(leaf(P, term.value, term)) 3420 } else { 3421 restoreLex(P.L, save) 3422 } 3423 if (kids.length === 0) return null 3424 // tree-sitter quirk: case_item with EMPTY body and a single pattern matching 3425 // extglob-operator-char-prefix (no actual glob metachars) downgrades to word. 3426 // `-o) owner=$2 ;;` (has body) → extglob_pattern; `-g) ;;` (empty) → word. 3427 if (body.length === 0) { 3428 for (let i = 0; i < kids.length; i++) { 3429 const k = kids[i]! 3430 if (k.type !== 'extglob_pattern') continue 3431 const text = sliceBytes(P, k.startIndex, k.endIndex) 3432 if (/^[-+?*@!][a-zA-Z]/.test(text) && !/[*?(]/.test(text)) { 3433 kids[i] = mk(P, 'word', k.startIndex, k.endIndex, []) 3434 } 3435 } 3436 } 3437 const last = kids[kids.length - 1]! 3438 return mk(P, 'case_item', start, last.endIndex, kids) 3439} 3440 3441function parseCasePattern(P: ParseState): TsNode[] { 3442 skipBlanks(P.L) 3443 const save = saveLex(P.L) 3444 const start = P.L.b 3445 const startI = P.L.i 3446 let parenDepth = 0 3447 let hasDollar = false 3448 let hasBracketOutsideParen = false 3449 let hasQuote = false 3450 while (P.L.i < P.L.len) { 3451 const c = peek(P.L) 3452 if (c === '\\' && P.L.i + 1 < P.L.len) { 3453 // Escaped char — consume both (handles `bar\ baz` as single pattern) 3454 // \<newline> is a line continuation; eat it but stay in pattern. 3455 advance(P.L) 3456 advance(P.L) 3457 continue 3458 } 3459 if (c === '"' || c === "'") { 3460 hasQuote = true 3461 // Skip past the quoted segment so its content (spaces, |, etc.) doesn't 3462 // break the peek-ahead scan. 3463 advance(P.L) 3464 while (P.L.i < P.L.len && peek(P.L) !== c) { 3465 if (peek(P.L) === '\\' && P.L.i + 1 < P.L.len) advance(P.L) 3466 advance(P.L) 3467 } 3468 if (peek(P.L) === c) advance(P.L) 3469 continue 3470 } 3471 // Paren counting: any ( inside pattern opens a scope; don't break at ) or | 3472 // until balanced. Handles extglob *(a|b) and nested shapes *([0-9])([0-9]). 3473 if (c === '(') { 3474 parenDepth++ 3475 advance(P.L) 3476 continue 3477 } 3478 if (parenDepth > 0) { 3479 if (c === ')') { 3480 parenDepth-- 3481 advance(P.L) 3482 continue 3483 } 3484 if (c === '\n') break 3485 advance(P.L) 3486 continue 3487 } 3488 if (c === ')' || c === '|' || c === ' ' || c === '\t' || c === '\n') break 3489 if (c === '$') hasDollar = true 3490 if (c === '[') hasBracketOutsideParen = true 3491 advance(P.L) 3492 } 3493 if (P.L.b === start) return [] 3494 const text = P.src.slice(startI, P.L.i) 3495 const hasExtglobParen = /[*?+@!]\(/.test(text) 3496 // Quoted segments in pattern: tree-sitter splits at quote boundaries into 3497 // multiple sibling nodes. `*"foo"*` → (extglob_pattern)(string)(extglob_pattern). 3498 // Re-scan with a segmenting pass. 3499 if (hasQuote && !hasExtglobParen) { 3500 restoreLex(P.L, save) 3501 return parseCasePatternSegmented(P) 3502 } 3503 // tree-sitter splits patterns with [ or $ into concatenation via word parsing 3504 // UNLESS pattern has extglob parens (those override and emit extglob_pattern). 3505 // `*.[1357]` → concat(word word number word); `${PN}.pot` → concat(expansion word); 3506 // but `*([0-9])` → extglob_pattern (has extglob paren). 3507 if (!hasExtglobParen && (hasDollar || hasBracketOutsideParen)) { 3508 restoreLex(P.L, save) 3509 const w = parseWord(P, 'arg') 3510 return w ? [w] : [] 3511 } 3512 // Patterns starting with extglob operator chars (+ - ? * @ !) followed by 3513 // identifier chars are extglob_pattern per tree-sitter, even without parens 3514 // or glob metachars. `-o)` → extglob_pattern; plain `foo)` → word. 3515 const type = 3516 hasExtglobParen || /[*?]/.test(text) || /^[-+?*@!][a-zA-Z]/.test(text) 3517 ? 'extglob_pattern' 3518 : 'word' 3519 return [mk(P, type, start, P.L.b, [])] 3520} 3521 3522// Segmented scan for case patterns containing quotes: `*"foo"*` → 3523// [extglob_pattern, string, extglob_pattern]. Bare segments → extglob_pattern 3524// if they have */?, else word. Stops at ) | space tab newline outside quotes. 3525function parseCasePatternSegmented(P: ParseState): TsNode[] { 3526 const parts: TsNode[] = [] 3527 let segStart = P.L.b 3528 let segStartI = P.L.i 3529 const flushSeg = (): void => { 3530 if (P.L.i > segStartI) { 3531 const t = P.src.slice(segStartI, P.L.i) 3532 const type = /[*?]/.test(t) ? 'extglob_pattern' : 'word' 3533 parts.push(mk(P, type, segStart, P.L.b, [])) 3534 } 3535 } 3536 while (P.L.i < P.L.len) { 3537 const c = peek(P.L) 3538 if (c === '\\' && P.L.i + 1 < P.L.len) { 3539 advance(P.L) 3540 advance(P.L) 3541 continue 3542 } 3543 if (c === '"') { 3544 flushSeg() 3545 parts.push(parseDoubleQuoted(P)) 3546 segStart = P.L.b 3547 segStartI = P.L.i 3548 continue 3549 } 3550 if (c === "'") { 3551 flushSeg() 3552 const tok = nextToken(P.L, 'arg') 3553 parts.push(leaf(P, 'raw_string', tok)) 3554 segStart = P.L.b 3555 segStartI = P.L.i 3556 continue 3557 } 3558 if (c === ')' || c === '|' || c === ' ' || c === '\t' || c === '\n') break 3559 advance(P.L) 3560 } 3561 flushSeg() 3562 return parts 3563} 3564 3565function parseFunction(P: ParseState, fnTok: Token): TsNode { 3566 const fnKw = leaf(P, 'function', fnTok) 3567 skipBlanks(P.L) 3568 const nameTok = nextToken(P.L, 'arg') 3569 const name = mk(P, 'word', nameTok.start, nameTok.end, []) 3570 const kids: TsNode[] = [fnKw, name] 3571 skipBlanks(P.L) 3572 if (peek(P.L) === '(' && peek(P.L, 1) === ')') { 3573 const o = nextToken(P.L, 'cmd') 3574 const c = nextToken(P.L, 'cmd') 3575 kids.push(leaf(P, '(', o)) 3576 kids.push(leaf(P, ')', c)) 3577 } 3578 skipBlanks(P.L) 3579 skipNewlines(P) 3580 const body = parseCommand(P) 3581 if (body) { 3582 // Hoist redirects from redirected_statement(compound_statement, ...) to 3583 // function_definition level per tree-sitter grammar 3584 if ( 3585 body.type === 'redirected_statement' && 3586 body.children.length >= 2 && 3587 body.children[0]!.type === 'compound_statement' 3588 ) { 3589 kids.push(...body.children) 3590 } else { 3591 kids.push(body) 3592 } 3593 } 3594 const last = kids[kids.length - 1]! 3595 return mk(P, 'function_definition', fnKw.startIndex, last.endIndex, kids) 3596} 3597 3598function parseDeclaration(P: ParseState, kwTok: Token): TsNode { 3599 const kw = leaf(P, kwTok.value, kwTok) 3600 const kids: TsNode[] = [kw] 3601 while (true) { 3602 skipBlanks(P.L) 3603 const c = peek(P.L) 3604 if ( 3605 c === '' || 3606 c === '\n' || 3607 c === ';' || 3608 c === '&' || 3609 c === '|' || 3610 c === ')' || 3611 c === '<' || 3612 c === '>' 3613 ) { 3614 break 3615 } 3616 const a = tryParseAssignment(P) 3617 if (a) { 3618 kids.push(a) 3619 continue 3620 } 3621 // Quoted string or concatenation: `export "FOO=bar"`, `export 'X'` 3622 if (c === '"' || c === "'" || c === '$') { 3623 const w = parseWord(P, 'arg') 3624 if (w) { 3625 kids.push(w) 3626 continue 3627 } 3628 break 3629 } 3630 // Flag like -a or bare variable name 3631 const save = saveLex(P.L) 3632 const tok = nextToken(P.L, 'arg') 3633 if (tok.type === 'WORD' || tok.type === 'NUMBER') { 3634 if (tok.value.startsWith('-')) { 3635 kids.push(leaf(P, 'word', tok)) 3636 } else if (isIdentStart(tok.value[0] ?? '')) { 3637 kids.push(mk(P, 'variable_name', tok.start, tok.end, [])) 3638 } else { 3639 kids.push(leaf(P, 'word', tok)) 3640 } 3641 } else { 3642 restoreLex(P.L, save) 3643 break 3644 } 3645 } 3646 const last = kids[kids.length - 1]! 3647 return mk(P, 'declaration_command', kw.startIndex, last.endIndex, kids) 3648} 3649 3650function parseUnset(P: ParseState, kwTok: Token): TsNode { 3651 const kw = leaf(P, 'unset', kwTok) 3652 const kids: TsNode[] = [kw] 3653 while (true) { 3654 skipBlanks(P.L) 3655 const c = peek(P.L) 3656 if ( 3657 c === '' || 3658 c === '\n' || 3659 c === ';' || 3660 c === '&' || 3661 c === '|' || 3662 c === ')' || 3663 c === '<' || 3664 c === '>' 3665 ) { 3666 break 3667 } 3668 // SECURITY: use parseWord (not raw nextToken) so quoted strings like 3669 // `unset 'a[$(id)]'` emit a raw_string child that ast.ts can reject. 3670 // Previously `break` silently dropped non-WORD args — hiding the 3671 // arithmetic-subscript code-exec vector from the security walker. 3672 const arg = parseWord(P, 'arg') 3673 if (!arg) break 3674 if (arg.type === 'word') { 3675 if (arg.text.startsWith('-')) { 3676 kids.push(arg) 3677 } else { 3678 kids.push(mk(P, 'variable_name', arg.startIndex, arg.endIndex, [])) 3679 } 3680 } else { 3681 kids.push(arg) 3682 } 3683 } 3684 const last = kids[kids.length - 1]! 3685 return mk(P, 'unset_command', kw.startIndex, last.endIndex, kids) 3686} 3687 3688function consumeKeyword(P: ParseState, name: string, kids: TsNode[]): void { 3689 skipNewlines(P) 3690 const save = saveLex(P.L) 3691 const t = nextToken(P.L, 'cmd') 3692 if (t.type === 'WORD' && t.value === name) { 3693 kids.push(leaf(P, name, t)) 3694 } else { 3695 restoreLex(P.L, save) 3696 } 3697} 3698 3699// ───────────────────── Test & Arithmetic Expressions ───────────────────── 3700 3701function parseTestExpr(P: ParseState, closer: string): TsNode | null { 3702 return parseTestOr(P, closer) 3703} 3704 3705function parseTestOr(P: ParseState, closer: string): TsNode | null { 3706 let left = parseTestAnd(P, closer) 3707 if (!left) return null 3708 while (true) { 3709 skipBlanks(P.L) 3710 const save = saveLex(P.L) 3711 if (peek(P.L) === '|' && peek(P.L, 1) === '|') { 3712 const s = P.L.b 3713 advance(P.L) 3714 advance(P.L) 3715 const op = mk(P, '||', s, P.L.b, []) 3716 const right = parseTestAnd(P, closer) 3717 if (!right) { 3718 restoreLex(P.L, save) 3719 break 3720 } 3721 left = mk(P, 'binary_expression', left.startIndex, right.endIndex, [ 3722 left, 3723 op, 3724 right, 3725 ]) 3726 } else { 3727 break 3728 } 3729 } 3730 return left 3731} 3732 3733function parseTestAnd(P: ParseState, closer: string): TsNode | null { 3734 let left = parseTestUnary(P, closer) 3735 if (!left) return null 3736 while (true) { 3737 skipBlanks(P.L) 3738 if (peek(P.L) === '&' && peek(P.L, 1) === '&') { 3739 const s = P.L.b 3740 advance(P.L) 3741 advance(P.L) 3742 const op = mk(P, '&&', s, P.L.b, []) 3743 const right = parseTestUnary(P, closer) 3744 if (!right) break 3745 left = mk(P, 'binary_expression', left.startIndex, right.endIndex, [ 3746 left, 3747 op, 3748 right, 3749 ]) 3750 } else { 3751 break 3752 } 3753 } 3754 return left 3755} 3756 3757function parseTestUnary(P: ParseState, closer: string): TsNode | null { 3758 skipBlanks(P.L) 3759 const c = peek(P.L) 3760 if (c === '(') { 3761 const s = P.L.b 3762 advance(P.L) 3763 const open = mk(P, '(', s, P.L.b, []) 3764 const inner = parseTestOr(P, closer) 3765 skipBlanks(P.L) 3766 let close: TsNode 3767 if (peek(P.L) === ')') { 3768 const cs = P.L.b 3769 advance(P.L) 3770 close = mk(P, ')', cs, P.L.b, []) 3771 } else { 3772 close = mk(P, ')', P.L.b, P.L.b, []) 3773 } 3774 const kids = inner ? [open, inner, close] : [open, close] 3775 return mk( 3776 P, 3777 'parenthesized_expression', 3778 open.startIndex, 3779 close.endIndex, 3780 kids, 3781 ) 3782 } 3783 return parseTestBinary(P, closer) 3784} 3785 3786/** 3787 * Parse `!`-negated or test-operator (`-f`) or parenthesized primary — but NOT 3788 * a binary comparison. Used as LHS of binary_expression so `! x =~ y` binds 3789 * `!` to `x` only, not the whole `x =~ y`. 3790 */ 3791function parseTestNegatablePrimary( 3792 P: ParseState, 3793 closer: string, 3794): TsNode | null { 3795 skipBlanks(P.L) 3796 const c = peek(P.L) 3797 if (c === '!') { 3798 const s = P.L.b 3799 advance(P.L) 3800 const bang = mk(P, '!', s, P.L.b, []) 3801 const inner = parseTestNegatablePrimary(P, closer) 3802 if (!inner) return bang 3803 return mk(P, 'unary_expression', bang.startIndex, inner.endIndex, [ 3804 bang, 3805 inner, 3806 ]) 3807 } 3808 if (c === '-' && isIdentStart(peek(P.L, 1))) { 3809 const s = P.L.b 3810 advance(P.L) 3811 while (isIdentChar(peek(P.L))) advance(P.L) 3812 const op = mk(P, 'test_operator', s, P.L.b, []) 3813 skipBlanks(P.L) 3814 const arg = parseTestPrimary(P, closer) 3815 if (!arg) return op 3816 return mk(P, 'unary_expression', op.startIndex, arg.endIndex, [op, arg]) 3817 } 3818 return parseTestPrimary(P, closer) 3819} 3820 3821function parseTestBinary(P: ParseState, closer: string): TsNode | null { 3822 skipBlanks(P.L) 3823 // `!` in test context binds tighter than =~/==. 3824 // `[[ ! "x" =~ y ]]` → (binary_expression (unary_expression (string)) (regex)) 3825 // `[[ ! -f x ]]` → (unary_expression ! (unary_expression (test_operator) (word))) 3826 const left = parseTestNegatablePrimary(P, closer) 3827 if (!left) return null 3828 skipBlanks(P.L) 3829 // Binary comparison: == != =~ -eq -lt etc. 3830 const c = peek(P.L) 3831 const c1 = peek(P.L, 1) 3832 let op: TsNode | null = null 3833 const os = P.L.b 3834 if (c === '=' && c1 === '=') { 3835 advance(P.L) 3836 advance(P.L) 3837 op = mk(P, '==', os, P.L.b, []) 3838 } else if (c === '!' && c1 === '=') { 3839 advance(P.L) 3840 advance(P.L) 3841 op = mk(P, '!=', os, P.L.b, []) 3842 } else if (c === '=' && c1 === '~') { 3843 advance(P.L) 3844 advance(P.L) 3845 op = mk(P, '=~', os, P.L.b, []) 3846 } else if (c === '=' && c1 !== '=') { 3847 advance(P.L) 3848 op = mk(P, '=', os, P.L.b, []) 3849 } else if (c === '<' && c1 !== '<') { 3850 advance(P.L) 3851 op = mk(P, '<', os, P.L.b, []) 3852 } else if (c === '>' && c1 !== '>') { 3853 advance(P.L) 3854 op = mk(P, '>', os, P.L.b, []) 3855 } else if (c === '-' && isIdentStart(c1)) { 3856 advance(P.L) 3857 while (isIdentChar(peek(P.L))) advance(P.L) 3858 op = mk(P, 'test_operator', os, P.L.b, []) 3859 } 3860 if (!op) return left 3861 skipBlanks(P.L) 3862 // In [[ ]], RHS of ==/!=/=/=~ gets special pattern parsing: paren counting 3863 // so @(a|b|c) doesn't break on |, and segments become extglob_pattern/regex. 3864 if (closer === ']]') { 3865 const opText = op.type 3866 if (opText === '=~') { 3867 skipBlanks(P.L) 3868 // If the ENTIRE RHS is a quoted string, emit string/raw_string not 3869 // regex: `[[ "$x" =~ "$y" ]]` → (binary_expression (string) (string)). 3870 // If there's content after the quote (`' boop '(.*)$`), the whole RHS 3871 // stays a single (regex). Peek past the quote to check. 3872 const rc = peek(P.L) 3873 let rhs: TsNode | null = null 3874 if (rc === '"' || rc === "'") { 3875 const save = saveLex(P.L) 3876 const quoted = 3877 rc === '"' 3878 ? parseDoubleQuoted(P) 3879 : leaf(P, 'raw_string', nextToken(P.L, 'arg')) 3880 // Check if RHS ends here: only whitespace then ]] or &&/|| or newline 3881 let j = P.L.i 3882 while (j < P.L.len && (P.src[j] === ' ' || P.src[j] === '\t')) j++ 3883 const nc = P.src[j] ?? '' 3884 const nc1 = P.src[j + 1] ?? '' 3885 if ( 3886 (nc === ']' && nc1 === ']') || 3887 (nc === '&' && nc1 === '&') || 3888 (nc === '|' && nc1 === '|') || 3889 nc === '\n' || 3890 nc === '' 3891 ) { 3892 rhs = quoted 3893 } else { 3894 restoreLex(P.L, save) 3895 } 3896 } 3897 if (!rhs) rhs = parseTestRegexRhs(P) 3898 if (!rhs) return left 3899 return mk(P, 'binary_expression', left.startIndex, rhs.endIndex, [ 3900 left, 3901 op, 3902 rhs, 3903 ]) 3904 } 3905 // Single `=` emits (regex) per tree-sitter; `==` and `!=` emit extglob_pattern 3906 if (opText === '=') { 3907 const rhs = parseTestRegexRhs(P) 3908 if (!rhs) return left 3909 return mk(P, 'binary_expression', left.startIndex, rhs.endIndex, [ 3910 left, 3911 op, 3912 rhs, 3913 ]) 3914 } 3915 if (opText === '==' || opText === '!=') { 3916 const parts = parseTestExtglobRhs(P) 3917 if (parts.length === 0) return left 3918 const last = parts[parts.length - 1]! 3919 return mk(P, 'binary_expression', left.startIndex, last.endIndex, [ 3920 left, 3921 op, 3922 ...parts, 3923 ]) 3924 } 3925 } 3926 const right = parseTestPrimary(P, closer) 3927 if (!right) return left 3928 return mk(P, 'binary_expression', left.startIndex, right.endIndex, [ 3929 left, 3930 op, 3931 right, 3932 ]) 3933} 3934 3935// RHS of =~ in [[ ]] — scan as single (regex) node with paren/bracket counting 3936// so | ( ) inside the regex don't break parsing. Stop at ]] or ws+&&/||. 3937function parseTestRegexRhs(P: ParseState): TsNode | null { 3938 skipBlanks(P.L) 3939 const start = P.L.b 3940 let parenDepth = 0 3941 let bracketDepth = 0 3942 while (P.L.i < P.L.len) { 3943 const c = peek(P.L) 3944 if (c === '\\' && P.L.i + 1 < P.L.len) { 3945 advance(P.L) 3946 advance(P.L) 3947 continue 3948 } 3949 if (c === '\n') break 3950 if (parenDepth === 0 && bracketDepth === 0) { 3951 if (c === ']' && peek(P.L, 1) === ']') break 3952 if (c === ' ' || c === '\t') { 3953 // Peek past blanks for ]] or &&/|| 3954 let j = P.L.i 3955 while (j < P.L.len && (P.L.src[j] === ' ' || P.L.src[j] === '\t')) j++ 3956 const nc = P.L.src[j] ?? '' 3957 const nc1 = P.L.src[j + 1] ?? '' 3958 if ( 3959 (nc === ']' && nc1 === ']') || 3960 (nc === '&' && nc1 === '&') || 3961 (nc === '|' && nc1 === '|') 3962 ) { 3963 break 3964 } 3965 advance(P.L) 3966 continue 3967 } 3968 } 3969 if (c === '(') parenDepth++ 3970 else if (c === ')' && parenDepth > 0) parenDepth-- 3971 else if (c === '[') bracketDepth++ 3972 else if (c === ']' && bracketDepth > 0) bracketDepth-- 3973 advance(P.L) 3974 } 3975 if (P.L.b === start) return null 3976 return mk(P, 'regex', start, P.L.b, []) 3977} 3978 3979// RHS of ==/!=/= in [[ ]] — returns array of parts. Bare text → extglob_pattern 3980// (with paren counting for @(a|b)); $(...)/${}/quoted → proper node types. 3981// Multiple parts become flat children of binary_expression per tree-sitter. 3982function parseTestExtglobRhs(P: ParseState): TsNode[] { 3983 skipBlanks(P.L) 3984 const parts: TsNode[] = [] 3985 let segStart = P.L.b 3986 let segStartI = P.L.i 3987 let parenDepth = 0 3988 const flushSeg = () => { 3989 if (P.L.i > segStartI) { 3990 const text = P.src.slice(segStartI, P.L.i) 3991 // Pure number stays number; everything else is extglob_pattern 3992 const type = /^\d+$/.test(text) ? 'number' : 'extglob_pattern' 3993 parts.push(mk(P, type, segStart, P.L.b, [])) 3994 } 3995 } 3996 while (P.L.i < P.L.len) { 3997 const c = peek(P.L) 3998 if (c === '\\' && P.L.i + 1 < P.L.len) { 3999 advance(P.L) 4000 advance(P.L) 4001 continue 4002 } 4003 if (c === '\n') break 4004 if (parenDepth === 0) { 4005 if (c === ']' && peek(P.L, 1) === ']') break 4006 if (c === ' ' || c === '\t') { 4007 let j = P.L.i 4008 while (j < P.L.len && (P.L.src[j] === ' ' || P.L.src[j] === '\t')) j++ 4009 const nc = P.L.src[j] ?? '' 4010 const nc1 = P.L.src[j + 1] ?? '' 4011 if ( 4012 (nc === ']' && nc1 === ']') || 4013 (nc === '&' && nc1 === '&') || 4014 (nc === '|' && nc1 === '|') 4015 ) { 4016 break 4017 } 4018 advance(P.L) 4019 continue 4020 } 4021 } 4022 // $ " ' must be parsed even inside @( ) extglob parens — parseDollarLike 4023 // consumes matching ) so parenDepth stays consistent. 4024 if (c === '$') { 4025 const c1 = peek(P.L, 1) 4026 if ( 4027 c1 === '(' || 4028 c1 === '{' || 4029 isIdentStart(c1) || 4030 SPECIAL_VARS.has(c1) 4031 ) { 4032 flushSeg() 4033 const exp = parseDollarLike(P) 4034 if (exp) parts.push(exp) 4035 segStart = P.L.b 4036 segStartI = P.L.i 4037 continue 4038 } 4039 } 4040 if (c === '"') { 4041 flushSeg() 4042 parts.push(parseDoubleQuoted(P)) 4043 segStart = P.L.b 4044 segStartI = P.L.i 4045 continue 4046 } 4047 if (c === "'") { 4048 flushSeg() 4049 const tok = nextToken(P.L, 'arg') 4050 parts.push(leaf(P, 'raw_string', tok)) 4051 segStart = P.L.b 4052 segStartI = P.L.i 4053 continue 4054 } 4055 if (c === '(') parenDepth++ 4056 else if (c === ')' && parenDepth > 0) parenDepth-- 4057 advance(P.L) 4058 } 4059 flushSeg() 4060 return parts 4061} 4062 4063function parseTestPrimary(P: ParseState, closer: string): TsNode | null { 4064 skipBlanks(P.L) 4065 // Stop at closer 4066 if (closer === ']' && peek(P.L) === ']') return null 4067 if (closer === ']]' && peek(P.L) === ']' && peek(P.L, 1) === ']') return null 4068 return parseWord(P, 'arg') 4069} 4070 4071/** 4072 * Arithmetic context modes: 4073 * - 'var': bare identifiers → variable_name (default, used in $((..)), ((..))) 4074 * - 'word': bare identifiers → word (c-style for head condition/update clauses) 4075 * - 'assign': identifiers with = → variable_assignment (c-style for init clause) 4076 */ 4077type ArithMode = 'var' | 'word' | 'assign' 4078 4079/** Operator precedence table (higher = tighter binding). */ 4080const ARITH_PREC: Record<string, number> = { 4081 '=': 2, 4082 '+=': 2, 4083 '-=': 2, 4084 '*=': 2, 4085 '/=': 2, 4086 '%=': 2, 4087 '<<=': 2, 4088 '>>=': 2, 4089 '&=': 2, 4090 '^=': 2, 4091 '|=': 2, 4092 '||': 4, 4093 '&&': 5, 4094 '|': 6, 4095 '^': 7, 4096 '&': 8, 4097 '==': 9, 4098 '!=': 9, 4099 '<': 10, 4100 '>': 10, 4101 '<=': 10, 4102 '>=': 10, 4103 '<<': 11, 4104 '>>': 11, 4105 '+': 12, 4106 '-': 12, 4107 '*': 13, 4108 '/': 13, 4109 '%': 13, 4110 '**': 14, 4111} 4112 4113/** Right-associative operators (assignment and exponent). */ 4114const ARITH_RIGHT_ASSOC = new Set([ 4115 '=', 4116 '+=', 4117 '-=', 4118 '*=', 4119 '/=', 4120 '%=', 4121 '<<=', 4122 '>>=', 4123 '&=', 4124 '^=', 4125 '|=', 4126 '**', 4127]) 4128 4129function parseArithExpr( 4130 P: ParseState, 4131 stop: string, 4132 mode: ArithMode = 'var', 4133): TsNode | null { 4134 return parseArithTernary(P, stop, mode) 4135} 4136 4137/** Top-level: comma-separated list. arithmetic_expansion emits multiple children. */ 4138function parseArithCommaList( 4139 P: ParseState, 4140 stop: string, 4141 mode: ArithMode = 'var', 4142): TsNode[] { 4143 const out: TsNode[] = [] 4144 while (true) { 4145 const e = parseArithTernary(P, stop, mode) 4146 if (e) out.push(e) 4147 skipBlanks(P.L) 4148 if (peek(P.L) === ',' && !isArithStop(P, stop)) { 4149 advance(P.L) 4150 continue 4151 } 4152 break 4153 } 4154 return out 4155} 4156 4157function parseArithTernary( 4158 P: ParseState, 4159 stop: string, 4160 mode: ArithMode, 4161): TsNode | null { 4162 const cond = parseArithBinary(P, stop, 0, mode) 4163 if (!cond) return null 4164 skipBlanks(P.L) 4165 if (peek(P.L) === '?') { 4166 const qs = P.L.b 4167 advance(P.L) 4168 const q = mk(P, '?', qs, P.L.b, []) 4169 const t = parseArithBinary(P, ':', 0, mode) 4170 skipBlanks(P.L) 4171 let colon: TsNode 4172 if (peek(P.L) === ':') { 4173 const cs = P.L.b 4174 advance(P.L) 4175 colon = mk(P, ':', cs, P.L.b, []) 4176 } else { 4177 colon = mk(P, ':', P.L.b, P.L.b, []) 4178 } 4179 const f = parseArithTernary(P, stop, mode) 4180 const last = f ?? colon 4181 const kids: TsNode[] = [cond, q] 4182 if (t) kids.push(t) 4183 kids.push(colon) 4184 if (f) kids.push(f) 4185 return mk(P, 'ternary_expression', cond.startIndex, last.endIndex, kids) 4186 } 4187 return cond 4188} 4189 4190/** Scan next arithmetic binary operator; returns [text, length] or null. */ 4191function scanArithOp(P: ParseState): [string, number] | null { 4192 const c = peek(P.L) 4193 const c1 = peek(P.L, 1) 4194 const c2 = peek(P.L, 2) 4195 // 3-char: <<= >>= 4196 if (c === '<' && c1 === '<' && c2 === '=') return ['<<=', 3] 4197 if (c === '>' && c1 === '>' && c2 === '=') return ['>>=', 3] 4198 // 2-char 4199 if (c === '*' && c1 === '*') return ['**', 2] 4200 if (c === '<' && c1 === '<') return ['<<', 2] 4201 if (c === '>' && c1 === '>') return ['>>', 2] 4202 if (c === '=' && c1 === '=') return ['==', 2] 4203 if (c === '!' && c1 === '=') return ['!=', 2] 4204 if (c === '<' && c1 === '=') return ['<=', 2] 4205 if (c === '>' && c1 === '=') return ['>=', 2] 4206 if (c === '&' && c1 === '&') return ['&&', 2] 4207 if (c === '|' && c1 === '|') return ['||', 2] 4208 if (c === '+' && c1 === '=') return ['+=', 2] 4209 if (c === '-' && c1 === '=') return ['-=', 2] 4210 if (c === '*' && c1 === '=') return ['*=', 2] 4211 if (c === '/' && c1 === '=') return ['/=', 2] 4212 if (c === '%' && c1 === '=') return ['%=', 2] 4213 if (c === '&' && c1 === '=') return ['&=', 2] 4214 if (c === '^' && c1 === '=') return ['^=', 2] 4215 if (c === '|' && c1 === '=') return ['|=', 2] 4216 // 1-char — but NOT ++ -- (those are pre/postfix) 4217 if (c === '+' && c1 !== '+') return ['+', 1] 4218 if (c === '-' && c1 !== '-') return ['-', 1] 4219 if (c === '*') return ['*', 1] 4220 if (c === '/') return ['/', 1] 4221 if (c === '%') return ['%', 1] 4222 if (c === '<') return ['<', 1] 4223 if (c === '>') return ['>', 1] 4224 if (c === '&') return ['&', 1] 4225 if (c === '|') return ['|', 1] 4226 if (c === '^') return ['^', 1] 4227 if (c === '=') return ['=', 1] 4228 return null 4229} 4230 4231/** Precedence-climbing binary expression parser. */ 4232function parseArithBinary( 4233 P: ParseState, 4234 stop: string, 4235 minPrec: number, 4236 mode: ArithMode, 4237): TsNode | null { 4238 let left = parseArithUnary(P, stop, mode) 4239 if (!left) return null 4240 while (true) { 4241 skipBlanks(P.L) 4242 if (isArithStop(P, stop)) break 4243 if (peek(P.L) === ',') break 4244 const opInfo = scanArithOp(P) 4245 if (!opInfo) break 4246 const [opText, opLen] = opInfo 4247 const prec = ARITH_PREC[opText] 4248 if (prec === undefined || prec < minPrec) break 4249 const os = P.L.b 4250 for (let k = 0; k < opLen; k++) advance(P.L) 4251 const op = mk(P, opText, os, P.L.b, []) 4252 const nextMin = ARITH_RIGHT_ASSOC.has(opText) ? prec : prec + 1 4253 const right = parseArithBinary(P, stop, nextMin, mode) 4254 if (!right) break 4255 left = mk(P, 'binary_expression', left.startIndex, right.endIndex, [ 4256 left, 4257 op, 4258 right, 4259 ]) 4260 } 4261 return left 4262} 4263 4264function parseArithUnary( 4265 P: ParseState, 4266 stop: string, 4267 mode: ArithMode, 4268): TsNode | null { 4269 skipBlanks(P.L) 4270 if (isArithStop(P, stop)) return null 4271 const c = peek(P.L) 4272 const c1 = peek(P.L, 1) 4273 // Prefix ++ -- 4274 if ((c === '+' && c1 === '+') || (c === '-' && c1 === '-')) { 4275 const s = P.L.b 4276 advance(P.L) 4277 advance(P.L) 4278 const op = mk(P, c + c1, s, P.L.b, []) 4279 const inner = parseArithUnary(P, stop, mode) 4280 if (!inner) return op 4281 return mk(P, 'unary_expression', op.startIndex, inner.endIndex, [op, inner]) 4282 } 4283 if (c === '-' || c === '+' || c === '!' || c === '~') { 4284 // In 'word'/'assign' mode (c-style for head), `-N` is a single number 4285 // literal per tree-sitter, not unary_expression. 'var' mode uses unary. 4286 if (mode !== 'var' && c === '-' && isDigit(c1)) { 4287 const s = P.L.b 4288 advance(P.L) 4289 while (isDigit(peek(P.L))) advance(P.L) 4290 return mk(P, 'number', s, P.L.b, []) 4291 } 4292 const s = P.L.b 4293 advance(P.L) 4294 const op = mk(P, c, s, P.L.b, []) 4295 const inner = parseArithUnary(P, stop, mode) 4296 if (!inner) return op 4297 return mk(P, 'unary_expression', op.startIndex, inner.endIndex, [op, inner]) 4298 } 4299 return parseArithPostfix(P, stop, mode) 4300} 4301 4302function parseArithPostfix( 4303 P: ParseState, 4304 stop: string, 4305 mode: ArithMode, 4306): TsNode | null { 4307 const prim = parseArithPrimary(P, stop, mode) 4308 if (!prim) return null 4309 const c = peek(P.L) 4310 const c1 = peek(P.L, 1) 4311 if ((c === '+' && c1 === '+') || (c === '-' && c1 === '-')) { 4312 const s = P.L.b 4313 advance(P.L) 4314 advance(P.L) 4315 const op = mk(P, c + c1, s, P.L.b, []) 4316 return mk(P, 'postfix_expression', prim.startIndex, op.endIndex, [prim, op]) 4317 } 4318 return prim 4319} 4320 4321function parseArithPrimary( 4322 P: ParseState, 4323 stop: string, 4324 mode: ArithMode, 4325): TsNode | null { 4326 skipBlanks(P.L) 4327 if (isArithStop(P, stop)) return null 4328 const c = peek(P.L) 4329 if (c === '(') { 4330 const s = P.L.b 4331 advance(P.L) 4332 const open = mk(P, '(', s, P.L.b, []) 4333 // Parenthesized expression may contain comma-separated exprs 4334 const inners = parseArithCommaList(P, ')', mode) 4335 skipBlanks(P.L) 4336 let close: TsNode 4337 if (peek(P.L) === ')') { 4338 const cs = P.L.b 4339 advance(P.L) 4340 close = mk(P, ')', cs, P.L.b, []) 4341 } else { 4342 close = mk(P, ')', P.L.b, P.L.b, []) 4343 } 4344 return mk(P, 'parenthesized_expression', open.startIndex, close.endIndex, [ 4345 open, 4346 ...inners, 4347 close, 4348 ]) 4349 } 4350 if (c === '"') { 4351 return parseDoubleQuoted(P) 4352 } 4353 if (c === '$') { 4354 return parseDollarLike(P) 4355 } 4356 if (isDigit(c)) { 4357 const s = P.L.b 4358 while (isDigit(peek(P.L))) advance(P.L) 4359 // Hex: 0x1f 4360 if ( 4361 P.L.b - s === 1 && 4362 c === '0' && 4363 (peek(P.L) === 'x' || peek(P.L) === 'X') 4364 ) { 4365 advance(P.L) 4366 while (isHexDigit(peek(P.L))) advance(P.L) 4367 } 4368 // Base notation: BASE#DIGITS e.g. 2#1010, 16#ff 4369 else if (peek(P.L) === '#') { 4370 advance(P.L) 4371 while (isBaseDigit(peek(P.L))) advance(P.L) 4372 } 4373 return mk(P, 'number', s, P.L.b, []) 4374 } 4375 if (isIdentStart(c)) { 4376 const s = P.L.b 4377 while (isIdentChar(peek(P.L))) advance(P.L) 4378 const nc = peek(P.L) 4379 // Assignment in 'assign' mode (c-style for init): emit variable_assignment 4380 // so chained `a = b = c = 1` nests correctly. Other modes treat `=` as a 4381 // binary_expression operator via the precedence table. 4382 if (mode === 'assign') { 4383 skipBlanks(P.L) 4384 const ac = peek(P.L) 4385 const ac1 = peek(P.L, 1) 4386 if (ac === '=' && ac1 !== '=') { 4387 const vn = mk(P, 'variable_name', s, P.L.b, []) 4388 const es = P.L.b 4389 advance(P.L) 4390 const eq = mk(P, '=', es, P.L.b, []) 4391 // RHS may itself be another assignment (chained) 4392 const val = parseArithTernary(P, stop, mode) 4393 const end = val ? val.endIndex : eq.endIndex 4394 const kids = val ? [vn, eq, val] : [vn, eq] 4395 return mk(P, 'variable_assignment', s, end, kids) 4396 } 4397 } 4398 // Subscript 4399 if (nc === '[') { 4400 const vn = mk(P, 'variable_name', s, P.L.b, []) 4401 const brS = P.L.b 4402 advance(P.L) 4403 const brOpen = mk(P, '[', brS, P.L.b, []) 4404 const idx = parseArithTernary(P, ']', 'var') ?? parseDollarLike(P) 4405 skipBlanks(P.L) 4406 let brClose: TsNode 4407 if (peek(P.L) === ']') { 4408 const cs = P.L.b 4409 advance(P.L) 4410 brClose = mk(P, ']', cs, P.L.b, []) 4411 } else { 4412 brClose = mk(P, ']', P.L.b, P.L.b, []) 4413 } 4414 const kids = idx ? [vn, brOpen, idx, brClose] : [vn, brOpen, brClose] 4415 return mk(P, 'subscript', s, brClose.endIndex, kids) 4416 } 4417 // Bare identifier: variable_name in 'var' mode, word in 'word'/'assign' mode. 4418 // 'assign' mode falls through to word when no `=` follows (c-style for 4419 // cond/update clauses: `c<=5` → binary_expression(word, number)). 4420 const identType = mode === 'var' ? 'variable_name' : 'word' 4421 return mk(P, identType, s, P.L.b, []) 4422 } 4423 return null 4424} 4425 4426function isArithStop(P: ParseState, stop: string): boolean { 4427 const c = peek(P.L) 4428 if (stop === '))') return c === ')' && peek(P.L, 1) === ')' 4429 if (stop === ')') return c === ')' 4430 if (stop === ';') return c === ';' 4431 if (stop === ':') return c === ':' 4432 if (stop === ']') return c === ']' 4433 if (stop === '}') return c === '}' 4434 if (stop === ':}') return c === ':' || c === '}' 4435 return c === '' || c === '\n' 4436}