utils/bash/bashParser.ts at main · oppi.li/claude-code

oppi.li / claude-code
fork atom
source dump of claude code
fork atom
claude-code / utils / bash / bashParser.ts
at main 4436 lines 131 kB view raw
wrap content
oppi.li dump from zip 3d ago
63aada3f
   1/**
   2 * Pure-TypeScript bash parser producing tree-sitter-bash-compatible ASTs.
   3 *
   4 * Downstream code in parser.ts, ast.ts, prefix.ts, ParsedCommand.ts walks this
   5 * by field name. startIndex/endIndex are UTF-8 BYTE offsets (not JS string
   6 * indices).
   7 *
   8 * Grammar reference: tree-sitter-bash. Validated against a 3449-input golden
   9 * corpus generated from the WASM parser.
  10 */
  11
  12export type TsNode = {
  13  type: string
  14  text: string
  15  startIndex: number
  16  endIndex: number
  17  children: TsNode[]
  18}
  19
  20type ParserModule = {
  21  parse: (source: string, timeoutMs?: number) => TsNode | null
  22}
  23
  24/**
  25 * 50ms wall-clock cap — bails out on pathological/adversarial input.
  26 * Pass `Infinity` via `parse(src, Infinity)` to disable (e.g. correctness
  27 * tests, where CI jitter would otherwise cause spurious null returns).
  28 */
  29const PARSE_TIMEOUT_MS = 50
  30
  31/** Node budget cap — bails out before OOM on deeply nested input. */
  32const MAX_NODES = 50_000
  33
  34const MODULE: ParserModule = { parse: parseSource }
  35
  36const READY = Promise.resolve()
  37
  38/** No-op: pure-TS parser needs no async init. Kept for API compatibility. */
  39export function ensureParserInitialized(): Promise<void> {
  40  return READY
  41}
  42
  43/** Always succeeds — pure-TS needs no init. */
  44export function getParserModule(): ParserModule | null {
  45  return MODULE
  46}
  47
  48// ───────────────────────────── Tokenizer ─────────────────────────────
  49
  50type TokenType =
  51  | 'WORD'
  52  | 'NUMBER'
  53  | 'OP'
  54  | 'NEWLINE'
  55  | 'COMMENT'
  56  | 'DQUOTE'
  57  | 'SQUOTE'
  58  | 'ANSI_C'
  59  | 'DOLLAR'
  60  | 'DOLLAR_PAREN'
  61  | 'DOLLAR_BRACE'
  62  | 'DOLLAR_DPAREN'
  63  | 'BACKTICK'
  64  | 'LT_PAREN'
  65  | 'GT_PAREN'
  66  | 'EOF'
  67
  68type Token = {
  69  type: TokenType
  70  value: string
  71  /** UTF-8 byte offset of first char */
  72  start: number
  73  /** UTF-8 byte offset one past last char */
  74  end: number
  75}
  76
  77const SPECIAL_VARS = new Set(['?', '$', '@', '*', '#', '-', '!', '_'])
  78
  79const DECL_KEYWORDS = new Set([
  80  'export',
  81  'declare',
  82  'typeset',
  83  'readonly',
  84  'local',
  85])
  86
  87export const SHELL_KEYWORDS = new Set([
  88  'if',
  89  'then',
  90  'elif',
  91  'else',
  92  'fi',
  93  'while',
  94  'until',
  95  'for',
  96  'in',
  97  'do',
  98  'done',
  99  'case',
 100  'esac',
 101  'function',
 102  'select',
 103])
 104
 105/**
 106 * Lexer state. Tracks both JS-string index (for charAt) and UTF-8 byte offset
 107 * (for TsNode positions). ASCII fast path: byte == char index. Non-ASCII
 108 * advances byte count per-codepoint.
 109 */
 110type Lexer = {
 111  src: string
 112  len: number
 113  /** JS string index */
 114  i: number
 115  /** UTF-8 byte offset */
 116  b: number
 117  /** Pending heredoc delimiters awaiting body scan at next newline */
 118  heredocs: HeredocPending[]
 119  /** Precomputed byte offset for each char index (lazy for non-ASCII) */
 120  byteTable: Uint32Array | null
 121}
 122
 123type HeredocPending = {
 124  delim: string
 125  stripTabs: boolean
 126  quoted: boolean
 127  /** Filled after body scan */
 128  bodyStart: number
 129  bodyEnd: number
 130  endStart: number
 131  endEnd: number
 132}
 133
 134function makeLexer(src: string): Lexer {
 135  return {
 136    src,
 137    len: src.length,
 138    i: 0,
 139    b: 0,
 140    heredocs: [],
 141    byteTable: null,
 142  }
 143}
 144
 145/** Advance one JS char, updating byte offset for UTF-8. */
 146function advance(L: Lexer): void {
 147  const c = L.src.charCodeAt(L.i)
 148  L.i++
 149  if (c < 0x80) {
 150    L.b++
 151  } else if (c < 0x800) {
 152    L.b += 2
 153  } else if (c >= 0xd800 && c <= 0xdbff) {
 154    // High surrogate — next char completes the pair, total 4 UTF-8 bytes
 155    L.b += 4
 156    L.i++
 157  } else {
 158    L.b += 3
 159  }
 160}
 161
 162function peek(L: Lexer, off = 0): string {
 163  return L.i + off < L.len ? L.src[L.i + off]! : ''
 164}
 165
 166function byteAt(L: Lexer, charIdx: number): number {
 167  // Fast path: ASCII-only prefix means char idx == byte idx
 168  if (L.byteTable) return L.byteTable[charIdx]!
 169  // Build table on first non-trivial lookup
 170  const t = new Uint32Array(L.len + 1)
 171  let b = 0
 172  let i = 0
 173  while (i < L.len) {
 174    t[i] = b
 175    const c = L.src.charCodeAt(i)
 176    if (c < 0x80) {
 177      b++
 178      i++
 179    } else if (c < 0x800) {
 180      b += 2
 181      i++
 182    } else if (c >= 0xd800 && c <= 0xdbff) {
 183      t[i + 1] = b + 2
 184      b += 4
 185      i += 2
 186    } else {
 187      b += 3
 188      i++
 189    }
 190  }
 191  t[L.len] = b
 192  L.byteTable = t
 193  return t[charIdx]!
 194}
 195
 196function isWordChar(c: string): boolean {
 197  // Bash word chars: alphanumeric + various punctuation that doesn't start operators
 198  return (
 199    (c >= 'a' && c <= 'z') ||
 200    (c >= 'A' && c <= 'Z') ||
 201    (c >= '0' && c <= '9') ||
 202    c === '_' ||
 203    c === '/' ||
 204    c === '.' ||
 205    c === '-' ||
 206    c === '+' ||
 207    c === ':' ||
 208    c === '@' ||
 209    c === '%' ||
 210    c === ',' ||
 211    c === '~' ||
 212    c === '^' ||
 213    c === '?' ||
 214    c === '*' ||
 215    c === '!' ||
 216    c === '=' ||
 217    c === '[' ||
 218    c === ']'
 219  )
 220}
 221
 222function isWordStart(c: string): boolean {
 223  return isWordChar(c) || c === '\\'
 224}
 225
 226function isIdentStart(c: string): boolean {
 227  return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c === '_'
 228}
 229
 230function isIdentChar(c: string): boolean {
 231  return isIdentStart(c) || (c >= '0' && c <= '9')
 232}
 233
 234function isDigit(c: string): boolean {
 235  return c >= '0' && c <= '9'
 236}
 237
 238function isHexDigit(c: string): boolean {
 239  return isDigit(c) || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F')
 240}
 241
 242function isBaseDigit(c: string): boolean {
 243  // Bash BASE#DIGITS: digits, letters, @ and _ (up to base 64)
 244  return isIdentChar(c) || c === '@'
 245}
 246
 247/**
 248 * Unquoted heredoc delimiter chars. Bash accepts most non-metacharacters —
 249 * not just identifiers. Stop at whitespace, redirects, pipe/list operators,
 250 * and structural tokens. Allows !, -, ., +, etc. (e.g. <<!HEREDOC!).
 251 */
 252function isHeredocDelimChar(c: string): boolean {
 253  return (
 254    c !== '' &&
 255    c !== ' ' &&
 256    c !== '\t' &&
 257    c !== '\n' &&
 258    c !== '<' &&
 259    c !== '>' &&
 260    c !== '|' &&
 261    c !== '&' &&
 262    c !== ';' &&
 263    c !== '(' &&
 264    c !== ')' &&
 265    c !== "'" &&
 266    c !== '"' &&
 267    c !== '`' &&
 268    c !== '\\'
 269  )
 270}
 271
 272function skipBlanks(L: Lexer): void {
 273  while (L.i < L.len) {
 274    const c = L.src[L.i]!
 275    if (c === ' ' || c === '\t' || c === '\r') {
 276      // \r is whitespace per tree-sitter-bash extras /\s/ — handles CRLF inputs
 277      advance(L)
 278    } else if (c === '\\') {
 279      const nx = L.src[L.i + 1]
 280      if (nx === '\n' || (nx === '\r' && L.src[L.i + 2] === '\n')) {
 281        // Line continuation — tree-sitter extras: /\\\r?\n/
 282        advance(L)
 283        advance(L)
 284        if (nx === '\r') advance(L)
 285      } else if (nx === ' ' || nx === '\t') {
 286        // \<space> or \<tab> — tree-sitter's _whitespace is /\\?[ \t\v]+/
 287        advance(L)
 288        advance(L)
 289      } else {
 290        break
 291      }
 292    } else {
 293      break
 294    }
 295  }
 296}
 297
 298/**
 299 * Scan next token. Context-sensitive: `cmd` mode treats [ as operator (test
 300 * command start), `arg` mode treats [ as word char (glob/subscript).
 301 */
 302function nextToken(L: Lexer, ctx: 'cmd' | 'arg' = 'arg'): Token {
 303  skipBlanks(L)
 304  const start = L.b
 305  if (L.i >= L.len) return { type: 'EOF', value: '', start, end: start }
 306
 307  const c = L.src[L.i]!
 308  const c1 = peek(L, 1)
 309  const c2 = peek(L, 2)
 310
 311  if (c === '\n') {
 312    advance(L)
 313    return { type: 'NEWLINE', value: '\n', start, end: L.b }
 314  }
 315
 316  if (c === '#') {
 317    const si = L.i
 318    while (L.i < L.len && L.src[L.i] !== '\n') advance(L)
 319    return {
 320      type: 'COMMENT',
 321      value: L.src.slice(si, L.i),
 322      start,
 323      end: L.b,
 324    }
 325  }
 326
 327  // Multi-char operators (longest match first)
 328  if (c === '&' && c1 === '&') {
 329    advance(L)
 330    advance(L)
 331    return { type: 'OP', value: '&&', start, end: L.b }
 332  }
 333  if (c === '|' && c1 === '|') {
 334    advance(L)
 335    advance(L)
 336    return { type: 'OP', value: '||', start, end: L.b }
 337  }
 338  if (c === '|' && c1 === '&') {
 339    advance(L)
 340    advance(L)
 341    return { type: 'OP', value: '|&', start, end: L.b }
 342  }
 343  if (c === ';' && c1 === ';' && c2 === '&') {
 344    advance(L)
 345    advance(L)
 346    advance(L)
 347    return { type: 'OP', value: ';;&', start, end: L.b }
 348  }
 349  if (c === ';' && c1 === ';') {
 350    advance(L)
 351    advance(L)
 352    return { type: 'OP', value: ';;', start, end: L.b }
 353  }
 354  if (c === ';' && c1 === '&') {
 355    advance(L)
 356    advance(L)
 357    return { type: 'OP', value: ';&', start, end: L.b }
 358  }
 359  if (c === '>' && c1 === '>') {
 360    advance(L)
 361    advance(L)
 362    return { type: 'OP', value: '>>', start, end: L.b }
 363  }
 364  if (c === '>' && c1 === '&' && c2 === '-') {
 365    advance(L)
 366    advance(L)
 367    advance(L)
 368    return { type: 'OP', value: '>&-', start, end: L.b }
 369  }
 370  if (c === '>' && c1 === '&') {
 371    advance(L)
 372    advance(L)
 373    return { type: 'OP', value: '>&', start, end: L.b }
 374  }
 375  if (c === '>' && c1 === '|') {
 376    advance(L)
 377    advance(L)
 378    return { type: 'OP', value: '>|', start, end: L.b }
 379  }
 380  if (c === '&' && c1 === '>' && c2 === '>') {
 381    advance(L)
 382    advance(L)
 383    advance(L)
 384    return { type: 'OP', value: '&>>', start, end: L.b }
 385  }
 386  if (c === '&' && c1 === '>') {
 387    advance(L)
 388    advance(L)
 389    return { type: 'OP', value: '&>', start, end: L.b }
 390  }
 391  if (c === '<' && c1 === '<' && c2 === '<') {
 392    advance(L)
 393    advance(L)
 394    advance(L)
 395    return { type: 'OP', value: '<<<', start, end: L.b }
 396  }
 397  if (c === '<' && c1 === '<' && c2 === '-') {
 398    advance(L)
 399    advance(L)
 400    advance(L)
 401    return { type: 'OP', value: '<<-', start, end: L.b }
 402  }
 403  if (c === '<' && c1 === '<') {
 404    advance(L)
 405    advance(L)
 406    return { type: 'OP', value: '<<', start, end: L.b }
 407  }
 408  if (c === '<' && c1 === '&' && c2 === '-') {
 409    advance(L)
 410    advance(L)
 411    advance(L)
 412    return { type: 'OP', value: '<&-', start, end: L.b }
 413  }
 414  if (c === '<' && c1 === '&') {
 415    advance(L)
 416    advance(L)
 417    return { type: 'OP', value: '<&', start, end: L.b }
 418  }
 419  if (c === '<' && c1 === '(') {
 420    advance(L)
 421    advance(L)
 422    return { type: 'LT_PAREN', value: '<(', start, end: L.b }
 423  }
 424  if (c === '>' && c1 === '(') {
 425    advance(L)
 426    advance(L)
 427    return { type: 'GT_PAREN', value: '>(', start, end: L.b }
 428  }
 429  if (c === '(' && c1 === '(') {
 430    advance(L)
 431    advance(L)
 432    return { type: 'OP', value: '((', start, end: L.b }
 433  }
 434  if (c === ')' && c1 === ')') {
 435    advance(L)
 436    advance(L)
 437    return { type: 'OP', value: '))', start, end: L.b }
 438  }
 439
 440  if (c === '|' || c === '&' || c === ';' || c === '>' || c === '<') {
 441    advance(L)
 442    return { type: 'OP', value: c, start, end: L.b }
 443  }
 444  if (c === '(' || c === ')') {
 445    advance(L)
 446    return { type: 'OP', value: c, start, end: L.b }
 447  }
 448
 449  // In cmd position, [ [[ { start test/group; in arg position they're word chars
 450  if (ctx === 'cmd') {
 451    if (c === '[' && c1 === '[') {
 452      advance(L)
 453      advance(L)
 454      return { type: 'OP', value: '[[', start, end: L.b }
 455    }
 456    if (c === '[') {
 457      advance(L)
 458      return { type: 'OP', value: '[', start, end: L.b }
 459    }
 460    if (c === '{' && (c1 === ' ' || c1 === '\t' || c1 === '\n')) {
 461      advance(L)
 462      return { type: 'OP', value: '{', start, end: L.b }
 463    }
 464    if (c === '}') {
 465      advance(L)
 466      return { type: 'OP', value: '}', start, end: L.b }
 467    }
 468    if (c === '!' && (c1 === ' ' || c1 === '\t')) {
 469      advance(L)
 470      return { type: 'OP', value: '!', start, end: L.b }
 471    }
 472  }
 473
 474  if (c === '"') {
 475    advance(L)
 476    return { type: 'DQUOTE', value: '"', start, end: L.b }
 477  }
 478  if (c === "'") {
 479    const si = L.i
 480    advance(L)
 481    while (L.i < L.len && L.src[L.i] !== "'") advance(L)
 482    if (L.i < L.len) advance(L)
 483    return {
 484      type: 'SQUOTE',
 485      value: L.src.slice(si, L.i),
 486      start,
 487      end: L.b,
 488    }
 489  }
 490
 491  if (c === '$') {
 492    if (c1 === '(' && c2 === '(') {
 493      advance(L)
 494      advance(L)
 495      advance(L)
 496      return { type: 'DOLLAR_DPAREN', value: '$((', start, end: L.b }
 497    }
 498    if (c1 === '(') {
 499      advance(L)
 500      advance(L)
 501      return { type: 'DOLLAR_PAREN', value: '$(', start, end: L.b }
 502    }
 503    if (c1 === '{') {
 504      advance(L)
 505      advance(L)
 506      return { type: 'DOLLAR_BRACE', value: '${', start, end: L.b }
 507    }
 508    if (c1 === "'") {
 509      // ANSI-C string $'...'
 510      const si = L.i
 511      advance(L)
 512      advance(L)
 513      while (L.i < L.len && L.src[L.i] !== "'") {
 514        if (L.src[L.i] === '\\' && L.i + 1 < L.len) advance(L)
 515        advance(L)
 516      }
 517      if (L.i < L.len) advance(L)
 518      return {
 519        type: 'ANSI_C',
 520        value: L.src.slice(si, L.i),
 521        start,
 522        end: L.b,
 523      }
 524    }
 525    advance(L)
 526    return { type: 'DOLLAR', value: '$', start, end: L.b }
 527  }
 528
 529  if (c === '`') {
 530    advance(L)
 531    return { type: 'BACKTICK', value: '`', start, end: L.b }
 532  }
 533
 534  // File descriptor before redirect: digit+ immediately followed by > or <
 535  if (isDigit(c)) {
 536    let j = L.i
 537    while (j < L.len && isDigit(L.src[j]!)) j++
 538    const after = j < L.len ? L.src[j]! : ''
 539    if (after === '>' || after === '<') {
 540      const si = L.i
 541      while (L.i < j) advance(L)
 542      return {
 543        type: 'WORD',
 544        value: L.src.slice(si, L.i),
 545        start,
 546        end: L.b,
 547      }
 548    }
 549  }
 550
 551  // Word / number
 552  if (isWordStart(c) || c === '{' || c === '}') {
 553    const si = L.i
 554    while (L.i < L.len) {
 555      const ch = L.src[L.i]!
 556      if (ch === '\\') {
 557        if (L.i + 1 >= L.len) {
 558          // Trailing `\` at EOF — tree-sitter excludes it from the word and
 559          // emits a sibling ERROR. Stop here so the word ends before `\`.
 560          break
 561        }
 562        // Escape next char (including \n for line continuation mid-word)
 563        if (L.src[L.i + 1] === '\n') {
 564          advance(L)
 565          advance(L)
 566          continue
 567        }
 568        advance(L)
 569        advance(L)
 570        continue
 571      }
 572      if (!isWordChar(ch) && ch !== '{' && ch !== '}') {
 573        break
 574      }
 575      advance(L)
 576    }
 577    if (L.i > si) {
 578      const v = L.src.slice(si, L.i)
 579      // Number: optional sign then digits only
 580      if (/^-?\d+$/.test(v)) {
 581        return { type: 'NUMBER', value: v, start, end: L.b }
 582      }
 583      return { type: 'WORD', value: v, start, end: L.b }
 584    }
 585    // Empty word (lone `\` at EOF) — fall through to single-char consumer
 586  }
 587
 588  // Unknown char — consume as single-char word
 589  advance(L)
 590  return { type: 'WORD', value: c, start, end: L.b }
 591}
 592
 593// ───────────────────────────── Parser ─────────────────────────────
 594
 595type ParseState = {
 596  L: Lexer
 597  src: string
 598  srcBytes: number
 599  /** True when byte offsets == char indices (no multi-byte UTF-8) */
 600  isAscii: boolean
 601  nodeCount: number
 602  deadline: number
 603  aborted: boolean
 604  /** Depth of backtick nesting — inside `...`, ` terminates words */
 605  inBacktick: number
 606  /** When set, parseSimpleCommand stops at this token (for `[` backtrack) */
 607  stopToken: string | null
 608}
 609
 610function parseSource(source: string, timeoutMs?: number): TsNode | null {
 611  const L = makeLexer(source)
 612  const srcBytes = byteLengthUtf8(source)
 613  const P: ParseState = {
 614    L,
 615    src: source,
 616    srcBytes,
 617    isAscii: srcBytes === source.length,
 618    nodeCount: 0,
 619    deadline: performance.now() + (timeoutMs ?? PARSE_TIMEOUT_MS),
 620    aborted: false,
 621    inBacktick: 0,
 622    stopToken: null,
 623  }
 624  try {
 625    const program = parseProgram(P)
 626    if (P.aborted) return null
 627    return program
 628  } catch {
 629    return null
 630  }
 631}
 632
 633function byteLengthUtf8(s: string): number {
 634  let b = 0
 635  for (let i = 0; i < s.length; i++) {
 636    const c = s.charCodeAt(i)
 637    if (c < 0x80) b++
 638    else if (c < 0x800) b += 2
 639    else if (c >= 0xd800 && c <= 0xdbff) {
 640      b += 4
 641      i++
 642    } else b += 3
 643  }
 644  return b
 645}
 646
 647function checkBudget(P: ParseState): void {
 648  P.nodeCount++
 649  if (P.nodeCount > MAX_NODES) {
 650    P.aborted = true
 651    throw new Error('budget')
 652  }
 653  if ((P.nodeCount & 0x7f) === 0 && performance.now() > P.deadline) {
 654    P.aborted = true
 655    throw new Error('timeout')
 656  }
 657}
 658
 659/** Build a node. Slices text from source by byte range via char-index lookup. */
 660function mk(
 661  P: ParseState,
 662  type: string,
 663  start: number,
 664  end: number,
 665  children: TsNode[],
 666): TsNode {
 667  checkBudget(P)
 668  return {
 669    type,
 670    text: sliceBytes(P, start, end),
 671    startIndex: start,
 672    endIndex: end,
 673    children,
 674  }
 675}
 676
 677function sliceBytes(P: ParseState, startByte: number, endByte: number): string {
 678  if (P.isAscii) return P.src.slice(startByte, endByte)
 679  // Find char indices for byte offsets. Build byte table if needed.
 680  const L = P.L
 681  if (!L.byteTable) byteAt(L, 0)
 682  const t = L.byteTable!
 683  // Binary search for char index where byte offset matches
 684  let lo = 0
 685  let hi = P.src.length
 686  while (lo < hi) {
 687    const m = (lo + hi) >>> 1
 688    if (t[m]! < startByte) lo = m + 1
 689    else hi = m
 690  }
 691  const sc = lo
 692  lo = sc
 693  hi = P.src.length
 694  while (lo < hi) {
 695    const m = (lo + hi) >>> 1
 696    if (t[m]! < endByte) lo = m + 1
 697    else hi = m
 698  }
 699  return P.src.slice(sc, lo)
 700}
 701
 702function leaf(P: ParseState, type: string, tok: Token): TsNode {
 703  return mk(P, type, tok.start, tok.end, [])
 704}
 705
 706function parseProgram(P: ParseState): TsNode {
 707  const children: TsNode[] = []
 708  // Skip leading whitespace & newlines — program start is first content byte
 709  skipBlanks(P.L)
 710  while (true) {
 711    const save = saveLex(P.L)
 712    const t = nextToken(P.L, 'cmd')
 713    if (t.type === 'NEWLINE') {
 714      skipBlanks(P.L)
 715      continue
 716    }
 717    restoreLex(P.L, save)
 718    break
 719  }
 720  const progStart = P.L.b
 721  while (P.L.i < P.L.len) {
 722    const save = saveLex(P.L)
 723    const t = nextToken(P.L, 'cmd')
 724    if (t.type === 'EOF') break
 725    if (t.type === 'NEWLINE') continue
 726    if (t.type === 'COMMENT') {
 727      children.push(leaf(P, 'comment', t))
 728      continue
 729    }
 730    restoreLex(P.L, save)
 731    const stmts = parseStatements(P, null)
 732    for (const s of stmts) children.push(s)
 733    if (stmts.length === 0) {
 734      // Couldn't parse — emit ERROR and skip one token
 735      const errTok = nextToken(P.L, 'cmd')
 736      if (errTok.type === 'EOF') break
 737      // Stray `;;` at program level (e.g., `var=;;` outside case) — tree-sitter
 738      // silently elides. Keep leading `;` as ERROR (security: paste artifact).
 739      if (
 740        errTok.type === 'OP' &&
 741        errTok.value === ';;' &&
 742        children.length > 0
 743      ) {
 744        continue
 745      }
 746      children.push(mk(P, 'ERROR', errTok.start, errTok.end, []))
 747    }
 748  }
 749  // tree-sitter includes trailing whitespace in program extent
 750  const progEnd = children.length > 0 ? P.srcBytes : progStart
 751  return mk(P, 'program', progStart, progEnd, children)
 752}
 753
 754/** Packed as (b << 16) | i — avoids heap alloc on every backtrack. */
 755type LexSave = number
 756function saveLex(L: Lexer): LexSave {
 757  return L.b * 0x10000 + L.i
 758}
 759function restoreLex(L: Lexer, s: LexSave): void {
 760  L.i = s & 0xffff
 761  L.b = s >>> 16
 762}
 763
 764/**
 765 * Parse a sequence of statements separated by ; & newline. Returns a flat list
 766 * where ; and & are sibling leaves (NOT wrapped in 'list' — only && || get
 767 * that). Stops at terminator or EOF.
 768 */
 769function parseStatements(P: ParseState, terminator: string | null): TsNode[] {
 770  const out: TsNode[] = []
 771  while (true) {
 772    skipBlanks(P.L)
 773    const save = saveLex(P.L)
 774    const t = nextToken(P.L, 'cmd')
 775    if (t.type === 'EOF') {
 776      restoreLex(P.L, save)
 777      break
 778    }
 779    if (t.type === 'NEWLINE') {
 780      // Process pending heredocs
 781      if (P.L.heredocs.length > 0) {
 782        scanHeredocBodies(P)
 783      }
 784      continue
 785    }
 786    if (t.type === 'COMMENT') {
 787      out.push(leaf(P, 'comment', t))
 788      continue
 789    }
 790    if (terminator && t.type === 'OP' && t.value === terminator) {
 791      restoreLex(P.L, save)
 792      break
 793    }
 794    if (
 795      t.type === 'OP' &&
 796      (t.value === ')' ||
 797        t.value === '}' ||
 798        t.value === ';;' ||
 799        t.value === ';&' ||
 800        t.value === ';;&' ||
 801        t.value === '))' ||
 802        t.value === ']]' ||
 803        t.value === ']')
 804    ) {
 805      restoreLex(P.L, save)
 806      break
 807    }
 808    if (t.type === 'BACKTICK' && P.inBacktick > 0) {
 809      restoreLex(P.L, save)
 810      break
 811    }
 812    if (
 813      t.type === 'WORD' &&
 814      (t.value === 'then' ||
 815        t.value === 'elif' ||
 816        t.value === 'else' ||
 817        t.value === 'fi' ||
 818        t.value === 'do' ||
 819        t.value === 'done' ||
 820        t.value === 'esac')
 821    ) {
 822      restoreLex(P.L, save)
 823      break
 824    }
 825    restoreLex(P.L, save)
 826    const stmt = parseAndOr(P)
 827    if (!stmt) break
 828    out.push(stmt)
 829    // Look for separator
 830    skipBlanks(P.L)
 831    const save2 = saveLex(P.L)
 832    const sep = nextToken(P.L, 'cmd')
 833    if (sep.type === 'OP' && (sep.value === ';' || sep.value === '&')) {
 834      // Check if terminator follows — if so, emit separator but stop
 835      const save3 = saveLex(P.L)
 836      const after = nextToken(P.L, 'cmd')
 837      restoreLex(P.L, save3)
 838      out.push(leaf(P, sep.value, sep))
 839      if (
 840        after.type === 'EOF' ||
 841        (after.type === 'OP' &&
 842          (after.value === ')' ||
 843            after.value === '}' ||
 844            after.value === ';;' ||
 845            after.value === ';&' ||
 846            after.value === ';;&')) ||
 847        (after.type === 'WORD' &&
 848          (after.value === 'then' ||
 849            after.value === 'elif' ||
 850            after.value === 'else' ||
 851            after.value === 'fi' ||
 852            after.value === 'do' ||
 853            after.value === 'done' ||
 854            after.value === 'esac'))
 855      ) {
 856        // Trailing separator — don't include it at program level unless
 857        // there's content after. But at inner levels we keep it.
 858        continue
 859      }
 860    } else if (sep.type === 'NEWLINE') {
 861      if (P.L.heredocs.length > 0) {
 862        scanHeredocBodies(P)
 863      }
 864      continue
 865    } else {
 866      restoreLex(P.L, save2)
 867    }
 868  }
 869  // Trim trailing separator if at program level
 870  return out
 871}
 872
 873/**
 874 * Parse pipeline chains joined by && ||. Left-associative nesting.
 875 * tree-sitter quirk: trailing redirect on the last pipeline wraps the ENTIRE
 876 * list in a redirected_statement — `a > x && b > y` becomes
 877 * redirected_statement(list(redirected_statement(a,>x), &&, b), >y).
 878 */
 879function parseAndOr(P: ParseState): TsNode | null {
 880  let left = parsePipeline(P)
 881  if (!left) return null
 882  while (true) {
 883    const save = saveLex(P.L)
 884    const t = nextToken(P.L, 'cmd')
 885    if (t.type === 'OP' && (t.value === '&&' || t.value === '||')) {
 886      const op = leaf(P, t.value, t)
 887      skipNewlines(P)
 888      const right = parsePipeline(P)
 889      if (!right) {
 890        left = mk(P, 'list', left.startIndex, op.endIndex, [left, op])
 891        break
 892      }
 893      // If right is a redirected_statement, hoist its redirects to wrap the list.
 894      if (right.type === 'redirected_statement' && right.children.length >= 2) {
 895        const inner = right.children[0]!
 896        const redirs = right.children.slice(1)
 897        const listNode = mk(P, 'list', left.startIndex, inner.endIndex, [
 898          left,
 899          op,
 900          inner,
 901        ])
 902        const lastR = redirs[redirs.length - 1]!
 903        left = mk(
 904          P,
 905          'redirected_statement',
 906          listNode.startIndex,
 907          lastR.endIndex,
 908          [listNode, ...redirs],
 909        )
 910      } else {
 911        left = mk(P, 'list', left.startIndex, right.endIndex, [left, op, right])
 912      }
 913    } else {
 914      restoreLex(P.L, save)
 915      break
 916    }
 917  }
 918  return left
 919}
 920
 921function skipNewlines(P: ParseState): void {
 922  while (true) {
 923    const save = saveLex(P.L)
 924    const t = nextToken(P.L, 'cmd')
 925    if (t.type !== 'NEWLINE') {
 926      restoreLex(P.L, save)
 927      break
 928    }
 929  }
 930}
 931
 932/**
 933 * Parse commands joined by | or |&. Flat children with operator leaves.
 934 * tree-sitter quirk: `a | b 2>nul | c` hoists the redirect on `b` to wrap
 935 * the preceding pipeline fragment — pipeline(redirected_statement(
 936 * pipeline(a,|,b), 2>nul), |, c).
 937 */
 938function parsePipeline(P: ParseState): TsNode | null {
 939  let first = parseCommand(P)
 940  if (!first) return null
 941  const parts: TsNode[] = [first]
 942  while (true) {
 943    const save = saveLex(P.L)
 944    const t = nextToken(P.L, 'cmd')
 945    if (t.type === 'OP' && (t.value === '|' || t.value === '|&')) {
 946      const op = leaf(P, t.value, t)
 947      skipNewlines(P)
 948      const next = parseCommand(P)
 949      if (!next) {
 950        parts.push(op)
 951        break
 952      }
 953      // Hoist trailing redirect on `next` to wrap current pipeline fragment
 954      if (
 955        next.type === 'redirected_statement' &&
 956        next.children.length >= 2 &&
 957        parts.length >= 1
 958      ) {
 959        const inner = next.children[0]!
 960        const redirs = next.children.slice(1)
 961        // Wrap existing parts + op + inner as a pipeline
 962        const pipeKids = [...parts, op, inner]
 963        const pipeNode = mk(
 964          P,
 965          'pipeline',
 966          pipeKids[0]!.startIndex,
 967          inner.endIndex,
 968          pipeKids,
 969        )
 970        const lastR = redirs[redirs.length - 1]!
 971        const wrapped = mk(
 972          P,
 973          'redirected_statement',
 974          pipeNode.startIndex,
 975          lastR.endIndex,
 976          [pipeNode, ...redirs],
 977        )
 978        parts.length = 0
 979        parts.push(wrapped)
 980        first = wrapped
 981        continue
 982      }
 983      parts.push(op, next)
 984    } else {
 985      restoreLex(P.L, save)
 986      break
 987    }
 988  }
 989  if (parts.length === 1) return parts[0]!
 990  const last = parts[parts.length - 1]!
 991  return mk(P, 'pipeline', parts[0]!.startIndex, last.endIndex, parts)
 992}
 993
 994/** Parse a single command: simple, compound, or control structure. */
 995function parseCommand(P: ParseState): TsNode | null {
 996  skipBlanks(P.L)
 997  const save = saveLex(P.L)
 998  const t = nextToken(P.L, 'cmd')
 999
1000  if (t.type === 'EOF') {
1001    restoreLex(P.L, save)
1002    return null
1003  }
1004
1005  // Negation — tree-sitter wraps just the command, redirects go outside.
1006  // `! cmd > out` → redirected_statement(negated_command(!, cmd), >out)
1007  if (t.type === 'OP' && t.value === '!') {
1008    const bang = leaf(P, '!', t)
1009    const inner = parseCommand(P)
1010    if (!inner) {
1011      restoreLex(P.L, save)
1012      return null
1013    }
1014    // If inner is a redirected_statement, hoist redirects outside negation
1015    if (inner.type === 'redirected_statement' && inner.children.length >= 2) {
1016      const cmd = inner.children[0]!
1017      const redirs = inner.children.slice(1)
1018      const neg = mk(P, 'negated_command', bang.startIndex, cmd.endIndex, [
1019        bang,
1020        cmd,
1021      ])
1022      const lastR = redirs[redirs.length - 1]!
1023      return mk(P, 'redirected_statement', neg.startIndex, lastR.endIndex, [
1024        neg,
1025        ...redirs,
1026      ])
1027    }
1028    return mk(P, 'negated_command', bang.startIndex, inner.endIndex, [
1029      bang,
1030      inner,
1031    ])
1032  }
1033
1034  if (t.type === 'OP' && t.value === '(') {
1035    const open = leaf(P, '(', t)
1036    const body = parseStatements(P, ')')
1037    const closeTok = nextToken(P.L, 'cmd')
1038    const close =
1039      closeTok.type === 'OP' && closeTok.value === ')'
1040        ? leaf(P, ')', closeTok)
1041        : mk(P, ')', open.endIndex, open.endIndex, [])
1042    const node = mk(P, 'subshell', open.startIndex, close.endIndex, [
1043      open,
1044      ...body,
1045      close,
1046    ])
1047    return maybeRedirect(P, node)
1048  }
1049
1050  if (t.type === 'OP' && t.value === '((') {
1051    const open = leaf(P, '((', t)
1052    const exprs = parseArithCommaList(P, '))', 'var')
1053    const closeTok = nextToken(P.L, 'cmd')
1054    const close =
1055      closeTok.value === '))'
1056        ? leaf(P, '))', closeTok)
1057        : mk(P, '))', open.endIndex, open.endIndex, [])
1058    return mk(P, 'compound_statement', open.startIndex, close.endIndex, [
1059      open,
1060      ...exprs,
1061      close,
1062    ])
1063  }
1064
1065  if (t.type === 'OP' && t.value === '{') {
1066    const open = leaf(P, '{', t)
1067    const body = parseStatements(P, '}')
1068    const closeTok = nextToken(P.L, 'cmd')
1069    const close =
1070      closeTok.type === 'OP' && closeTok.value === '}'
1071        ? leaf(P, '}', closeTok)
1072        : mk(P, '}', open.endIndex, open.endIndex, [])
1073    const node = mk(P, 'compound_statement', open.startIndex, close.endIndex, [
1074      open,
1075      ...body,
1076      close,
1077    ])
1078    return maybeRedirect(P, node)
1079  }
1080
1081  if (t.type === 'OP' && (t.value === '[' || t.value === '[[')) {
1082    const open = leaf(P, t.value, t)
1083    const closer = t.value === '[' ? ']' : ']]'
1084    // Grammar: `[` can contain choice(_expression, redirected_statement).
1085    // Try _expression first; if we don't reach `]`, backtrack and parse as
1086    // redirected_statement (handles `[ ! cmd -v go &>/dev/null ]`).
1087    const exprSave = saveLex(P.L)
1088    let expr = parseTestExpr(P, closer)
1089    skipBlanks(P.L)
1090    if (t.value === '[' && peek(P.L) !== ']') {
1091      // Expression parse didn't reach `]` — try as redirected_statement.
1092      // Thread `]` stop-token so parseSimpleCommand doesn't eat it as arg.
1093      restoreLex(P.L, exprSave)
1094      const prevStop = P.stopToken
1095      P.stopToken = ']'
1096      const rstmt = parseCommand(P)
1097      P.stopToken = prevStop
1098      if (rstmt && rstmt.type === 'redirected_statement') {
1099        expr = rstmt
1100      } else {
1101        // Neither worked — restore and keep the expression result
1102        restoreLex(P.L, exprSave)
1103        expr = parseTestExpr(P, closer)
1104      }
1105      skipBlanks(P.L)
1106    }
1107    const closeTok = nextToken(P.L, 'arg')
1108    let close: TsNode
1109    if (closeTok.value === closer) {
1110      close = leaf(P, closer, closeTok)
1111    } else {
1112      close = mk(P, closer, open.endIndex, open.endIndex, [])
1113    }
1114    const kids = expr ? [open, expr, close] : [open, close]
1115    return mk(P, 'test_command', open.startIndex, close.endIndex, kids)
1116  }
1117
1118  if (t.type === 'WORD') {
1119    if (t.value === 'if') return maybeRedirect(P, parseIf(P, t), true)
1120    if (t.value === 'while' || t.value === 'until')
1121      return maybeRedirect(P, parseWhile(P, t), true)
1122    if (t.value === 'for') return maybeRedirect(P, parseFor(P, t), true)
1123    if (t.value === 'select') return maybeRedirect(P, parseFor(P, t), true)
1124    if (t.value === 'case') return maybeRedirect(P, parseCase(P, t), true)
1125    if (t.value === 'function') return parseFunction(P, t)
1126    if (DECL_KEYWORDS.has(t.value))
1127      return maybeRedirect(P, parseDeclaration(P, t))
1128    if (t.value === 'unset' || t.value === 'unsetenv') {
1129      return maybeRedirect(P, parseUnset(P, t))
1130    }
1131  }
1132
1133  restoreLex(P.L, save)
1134  return parseSimpleCommand(P)
1135}
1136
1137/**
1138 * Parse a simple command: [assignment]* word [arg|redirect]*
1139 * Returns variable_assignment if only one assignment and no command.
1140 */
1141function parseSimpleCommand(P: ParseState): TsNode | null {
1142  const start = P.L.b
1143  const assignments: TsNode[] = []
1144  const preRedirects: TsNode[] = []
1145
1146  while (true) {
1147    skipBlanks(P.L)
1148    const a = tryParseAssignment(P)
1149    if (a) {
1150      assignments.push(a)
1151      continue
1152    }
1153    const r = tryParseRedirect(P)
1154    if (r) {
1155      preRedirects.push(r)
1156      continue
1157    }
1158    break
1159  }
1160
1161  skipBlanks(P.L)
1162  const save = saveLex(P.L)
1163  const nameTok = nextToken(P.L, 'cmd')
1164  if (
1165    nameTok.type === 'EOF' ||
1166    nameTok.type === 'NEWLINE' ||
1167    nameTok.type === 'COMMENT' ||
1168    (nameTok.type === 'OP' &&
1169      nameTok.value !== '{' &&
1170      nameTok.value !== '[' &&
1171      nameTok.value !== '[[') ||
1172    (nameTok.type === 'WORD' &&
1173      SHELL_KEYWORDS.has(nameTok.value) &&
1174      nameTok.value !== 'in')
1175  ) {
1176    restoreLex(P.L, save)
1177    // No command — standalone assignment(s) or redirect
1178    if (assignments.length === 1 && preRedirects.length === 0) {
1179      return assignments[0]!
1180    }
1181    if (preRedirects.length > 0 && assignments.length === 0) {
1182      // Bare redirect → redirected_statement with just file_redirect children
1183      const last = preRedirects[preRedirects.length - 1]!
1184      return mk(
1185        P,
1186        'redirected_statement',
1187        preRedirects[0]!.startIndex,
1188        last.endIndex,
1189        preRedirects,
1190      )
1191    }
1192    if (assignments.length > 1 && preRedirects.length === 0) {
1193      // `A=1 B=2` with no command → variable_assignments (plural)
1194      const last = assignments[assignments.length - 1]!
1195      return mk(
1196        P,
1197        'variable_assignments',
1198        assignments[0]!.startIndex,
1199        last.endIndex,
1200        assignments,
1201      )
1202    }
1203    if (assignments.length > 0 || preRedirects.length > 0) {
1204      const all = [...assignments, ...preRedirects]
1205      const last = all[all.length - 1]!
1206      return mk(P, 'command', start, last.endIndex, all)
1207    }
1208    return null
1209  }
1210  restoreLex(P.L, save)
1211
1212  // Check for function definition: name() { ... }
1213  const fnSave = saveLex(P.L)
1214  const nm = parseWord(P, 'cmd')
1215  if (nm && nm.type === 'word') {
1216    skipBlanks(P.L)
1217    if (peek(P.L) === '(' && peek(P.L, 1) === ')') {
1218      const oTok = nextToken(P.L, 'cmd')
1219      const cTok = nextToken(P.L, 'cmd')
1220      const oParen = leaf(P, '(', oTok)
1221      const cParen = leaf(P, ')', cTok)
1222      skipBlanks(P.L)
1223      skipNewlines(P)
1224      const body = parseCommand(P)
1225      if (body) {
1226        // If body is redirected_statement(compound_statement, file_redirect...),
1227        // hoist redirects to function_definition level per tree-sitter grammar
1228        let bodyKids: TsNode[] = [body]
1229        if (
1230          body.type === 'redirected_statement' &&
1231          body.children.length >= 2 &&
1232          body.children[0]!.type === 'compound_statement'
1233        ) {
1234          bodyKids = body.children
1235        }
1236        const last = bodyKids[bodyKids.length - 1]!
1237        return mk(P, 'function_definition', nm.startIndex, last.endIndex, [
1238          nm,
1239          oParen,
1240          cParen,
1241          ...bodyKids,
1242        ])
1243      }
1244    }
1245  }
1246  restoreLex(P.L, fnSave)
1247
1248  const nameArg = parseWord(P, 'cmd')
1249  if (!nameArg) {
1250    if (assignments.length === 1) return assignments[0]!
1251    return null
1252  }
1253
1254  const cmdName = mk(P, 'command_name', nameArg.startIndex, nameArg.endIndex, [
1255    nameArg,
1256  ])
1257
1258  const args: TsNode[] = []
1259  const redirects: TsNode[] = []
1260  let heredocRedirect: TsNode | null = null
1261
1262  while (true) {
1263    skipBlanks(P.L)
1264    // Post-command redirects are greedy (repeat1 $._literal) — once a redirect
1265    // appears after command_name, subsequent literals attach to it per grammar's
1266    // prec.left. `grep 2>/dev/null -q foo` → file_redirect eats `-q foo`.
1267    // Args parsed BEFORE the first redirect still go to command (cat a b > out).
1268    const r = tryParseRedirect(P, true)
1269    if (r) {
1270      if (r.type === 'heredoc_redirect') {
1271        heredocRedirect = r
1272      } else if (r.type === 'herestring_redirect') {
1273        args.push(r)
1274      } else {
1275        redirects.push(r)
1276      }
1277      continue
1278    }
1279    // Once a file_redirect has been seen, command args are done — grammar's
1280    // command rule doesn't allow file_redirect in its post-name choice, so
1281    // anything after belongs to redirected_statement's file_redirect children.
1282    if (redirects.length > 0) break
1283    // `[` test_command backtrack — stop at `]` so outer handler can consume it
1284    if (P.stopToken === ']' && peek(P.L) === ']') break
1285    const save2 = saveLex(P.L)
1286    const pk = nextToken(P.L, 'arg')
1287    if (
1288      pk.type === 'EOF' ||
1289      pk.type === 'NEWLINE' ||
1290      pk.type === 'COMMENT' ||
1291      (pk.type === 'OP' &&
1292        (pk.value === '|' ||
1293          pk.value === '|&' ||
1294          pk.value === '&&' ||
1295          pk.value === '||' ||
1296          pk.value === ';' ||
1297          pk.value === ';;' ||
1298          pk.value === ';&' ||
1299          pk.value === ';;&' ||
1300          pk.value === '&' ||
1301          pk.value === ')' ||
1302          pk.value === '}' ||
1303          pk.value === '))'))
1304    ) {
1305      restoreLex(P.L, save2)
1306      break
1307    }
1308    restoreLex(P.L, save2)
1309    const arg = parseWord(P, 'arg')
1310    if (!arg) {
1311      // Lone `(` in arg position — tree-sitter parses this as subshell arg
1312      // e.g., `echo =(cmd)` → command has ERROR(=), subshell(cmd) as args
1313      if (peek(P.L) === '(') {
1314        const oTok = nextToken(P.L, 'cmd')
1315        const open = leaf(P, '(', oTok)
1316        const body = parseStatements(P, ')')
1317        const cTok = nextToken(P.L, 'cmd')
1318        const close =
1319          cTok.type === 'OP' && cTok.value === ')'
1320            ? leaf(P, ')', cTok)
1321            : mk(P, ')', open.endIndex, open.endIndex, [])
1322        args.push(
1323          mk(P, 'subshell', open.startIndex, close.endIndex, [
1324            open,
1325            ...body,
1326            close,
1327          ]),
1328        )
1329        continue
1330      }
1331      break
1332    }
1333    // Lone `=` in arg position is a parse error in bash — tree-sitter wraps
1334    // it in ERROR for recovery. Happens in `echo =(cmd)` (zsh process-sub).
1335    if (arg.type === 'word' && arg.text === '=') {
1336      args.push(mk(P, 'ERROR', arg.startIndex, arg.endIndex, [arg]))
1337      continue
1338    }
1339    // Word immediately followed by `(` (no whitespace) is a parse error —
1340    // bash doesn't allow glob-then-subshell adjacency. tree-sitter wraps the
1341    // word in ERROR. Catches zsh glob qualifiers like `*.(e:'cmd':)`.
1342    if (
1343      (arg.type === 'word' || arg.type === 'concatenation') &&
1344      peek(P.L) === '(' &&
1345      P.L.b === arg.endIndex
1346    ) {
1347      args.push(mk(P, 'ERROR', arg.startIndex, arg.endIndex, [arg]))
1348      continue
1349    }
1350    args.push(arg)
1351  }
1352
1353  // preRedirects (e.g., `2>&1 cat`, `<<<str cmd`) go INSIDE the command node
1354  // before command_name per tree-sitter grammar, not in redirected_statement
1355  const cmdChildren = [...assignments, ...preRedirects, cmdName, ...args]
1356  const cmdEnd =
1357    cmdChildren.length > 0
1358      ? cmdChildren[cmdChildren.length - 1]!.endIndex
1359      : cmdName.endIndex
1360  const cmdStart = cmdChildren[0]!.startIndex
1361  const cmd = mk(P, 'command', cmdStart, cmdEnd, cmdChildren)
1362
1363  if (heredocRedirect) {
1364    // Scan heredoc body now
1365    scanHeredocBodies(P)
1366    const hd = P.L.heredocs.shift()
1367    if (hd && heredocRedirect.children.length >= 2) {
1368      const bodyNode = mk(
1369        P,
1370        'heredoc_body',
1371        hd.bodyStart,
1372        hd.bodyEnd,
1373        hd.quoted ? [] : parseHeredocBodyContent(P, hd.bodyStart, hd.bodyEnd),
1374      )
1375      const endNode = mk(P, 'heredoc_end', hd.endStart, hd.endEnd, [])
1376      heredocRedirect.children.push(bodyNode, endNode)
1377      heredocRedirect.endIndex = hd.endEnd
1378      heredocRedirect.text = sliceBytes(
1379        P,
1380        heredocRedirect.startIndex,
1381        hd.endEnd,
1382      )
1383    }
1384    const allR = [...preRedirects, heredocRedirect, ...redirects]
1385    const rStart =
1386      preRedirects.length > 0
1387        ? Math.min(cmd.startIndex, preRedirects[0]!.startIndex)
1388        : cmd.startIndex
1389    return mk(P, 'redirected_statement', rStart, heredocRedirect.endIndex, [
1390      cmd,
1391      ...allR,
1392    ])
1393  }
1394
1395  if (redirects.length > 0) {
1396    const last = redirects[redirects.length - 1]!
1397    return mk(P, 'redirected_statement', cmd.startIndex, last.endIndex, [
1398      cmd,
1399      ...redirects,
1400    ])
1401  }
1402
1403  return cmd
1404}
1405
1406function maybeRedirect(
1407  P: ParseState,
1408  node: TsNode,
1409  allowHerestring = false,
1410): TsNode {
1411  const redirects: TsNode[] = []
1412  while (true) {
1413    skipBlanks(P.L)
1414    const save = saveLex(P.L)
1415    const r = tryParseRedirect(P)
1416    if (!r) break
1417    if (r.type === 'herestring_redirect' && !allowHerestring) {
1418      restoreLex(P.L, save)
1419      break
1420    }
1421    redirects.push(r)
1422  }
1423  if (redirects.length === 0) return node
1424  const last = redirects[redirects.length - 1]!
1425  return mk(P, 'redirected_statement', node.startIndex, last.endIndex, [
1426    node,
1427    ...redirects,
1428  ])
1429}
1430
1431function tryParseAssignment(P: ParseState): TsNode | null {
1432  const save = saveLex(P.L)
1433  skipBlanks(P.L)
1434  const startB = P.L.b
1435  // Must start with identifier
1436  if (!isIdentStart(peek(P.L))) {
1437    restoreLex(P.L, save)
1438    return null
1439  }
1440  while (isIdentChar(peek(P.L))) advance(P.L)
1441  const nameEnd = P.L.b
1442  // Optional subscript
1443  let subEnd = nameEnd
1444  if (peek(P.L) === '[') {
1445    advance(P.L)
1446    let depth = 1
1447    while (P.L.i < P.L.len && depth > 0) {
1448      const c = peek(P.L)
1449      if (c === '[') depth++
1450      else if (c === ']') depth--
1451      advance(P.L)
1452    }
1453    subEnd = P.L.b
1454  }
1455  const c = peek(P.L)
1456  const c1 = peek(P.L, 1)
1457  let op: string
1458  if (c === '=' && c1 !== '=') {
1459    op = '='
1460  } else if (c === '+' && c1 === '=') {
1461    op = '+='
1462  } else {
1463    restoreLex(P.L, save)
1464    return null
1465  }
1466  const nameNode = mk(P, 'variable_name', startB, nameEnd, [])
1467  // Subscript handling: wrap in subscript node if present
1468  let lhs: TsNode = nameNode
1469  if (subEnd > nameEnd) {
1470    const brOpen = mk(P, '[', nameEnd, nameEnd + 1, [])
1471    const idx = parseSubscriptIndex(P, nameEnd + 1, subEnd - 1)
1472    const brClose = mk(P, ']', subEnd - 1, subEnd, [])
1473    lhs = mk(P, 'subscript', startB, subEnd, [nameNode, brOpen, idx, brClose])
1474  }
1475  const opStart = P.L.b
1476  advance(P.L)
1477  if (op === '+=') advance(P.L)
1478  const opEnd = P.L.b
1479  const opNode = mk(P, op, opStart, opEnd, [])
1480  let val: TsNode | null = null
1481  if (peek(P.L) === '(') {
1482    // Array
1483    const aoTok = nextToken(P.L, 'cmd')
1484    const aOpen = leaf(P, '(', aoTok)
1485    const elems: TsNode[] = [aOpen]
1486    while (true) {
1487      skipBlanks(P.L)
1488      if (peek(P.L) === ')') break
1489      const e = parseWord(P, 'arg')
1490      if (!e) break
1491      elems.push(e)
1492    }
1493    const acTok = nextToken(P.L, 'cmd')
1494    const aClose =
1495      acTok.value === ')'
1496        ? leaf(P, ')', acTok)
1497        : mk(P, ')', aOpen.endIndex, aOpen.endIndex, [])
1498    elems.push(aClose)
1499    val = mk(P, 'array', aOpen.startIndex, aClose.endIndex, elems)
1500  } else {
1501    const c2 = peek(P.L)
1502    if (
1503      c2 &&
1504      c2 !== ' ' &&
1505      c2 !== '\t' &&
1506      c2 !== '\n' &&
1507      c2 !== ';' &&
1508      c2 !== '&' &&
1509      c2 !== '|' &&
1510      c2 !== ')' &&
1511      c2 !== '}'
1512    ) {
1513      val = parseWord(P, 'arg')
1514    }
1515  }
1516  const kids = val ? [lhs, opNode, val] : [lhs, opNode]
1517  const end = val ? val.endIndex : opEnd
1518  return mk(P, 'variable_assignment', startB, end, kids)
1519}
1520
1521/**
1522 * Parse subscript index content. Parsed arithmetically per tree-sitter grammar:
1523 * `${a[1+2]}` → binary_expression; `${a[++i]}` → unary_expression(word);
1524 * `${a[(($n+1))]}` → compound_statement(binary_expression). Falls back to
1525 * simple patterns (@, *) as word.
1526 */
1527function parseSubscriptIndexInline(P: ParseState): TsNode | null {
1528  skipBlanks(P.L)
1529  const c = peek(P.L)
1530  // @ or * alone → word (associative array all-keys)
1531  if ((c === '@' || c === '*') && peek(P.L, 1) === ']') {
1532    const s = P.L.b
1533    advance(P.L)
1534    return mk(P, 'word', s, P.L.b, [])
1535  }
1536  // ((expr)) → compound_statement wrapping the inner arithmetic
1537  if (c === '(' && peek(P.L, 1) === '(') {
1538    const oStart = P.L.b
1539    advance(P.L)
1540    advance(P.L)
1541    const open = mk(P, '((', oStart, P.L.b, [])
1542    const inner = parseArithExpr(P, '))', 'var')
1543    skipBlanks(P.L)
1544    let close: TsNode
1545    if (peek(P.L) === ')' && peek(P.L, 1) === ')') {
1546      const cs = P.L.b
1547      advance(P.L)
1548      advance(P.L)
1549      close = mk(P, '))', cs, P.L.b, [])
1550    } else {
1551      close = mk(P, '))', P.L.b, P.L.b, [])
1552    }
1553    const kids = inner ? [open, inner, close] : [open, close]
1554    return mk(P, 'compound_statement', open.startIndex, close.endIndex, kids)
1555  }
1556  // Arithmetic — but bare identifiers in subscript use 'word' mode per
1557  // tree-sitter (${words[++counter]} → unary_expression(word)).
1558  return parseArithExpr(P, ']', 'word')
1559}
1560
1561/** Legacy byte-range subscript index parser — kept for callers that pre-scan. */
1562function parseSubscriptIndex(
1563  P: ParseState,
1564  startB: number,
1565  endB: number,
1566): TsNode {
1567  const text = sliceBytes(P, startB, endB)
1568  if (/^\d+$/.test(text)) return mk(P, 'number', startB, endB, [])
1569  const m = /^\$([a-zA-Z_]\w*)$/.exec(text)
1570  if (m) {
1571    const dollar = mk(P, '$', startB, startB + 1, [])
1572    const vn = mk(P, 'variable_name', startB + 1, endB, [])
1573    return mk(P, 'simple_expansion', startB, endB, [dollar, vn])
1574  }
1575  if (text.length === 2 && text[0] === '$' && SPECIAL_VARS.has(text[1]!)) {
1576    const dollar = mk(P, '$', startB, startB + 1, [])
1577    const vn = mk(P, 'special_variable_name', startB + 1, endB, [])
1578    return mk(P, 'simple_expansion', startB, endB, [dollar, vn])
1579  }
1580  return mk(P, 'word', startB, endB, [])
1581}
1582
1583/**
1584 * Can the current position start a redirect destination literal?
1585 * Returns false at redirect ops, terminators, or file-descriptor-prefixed ops
1586 * so file_redirect's repeat1($._literal) stops at the right boundary.
1587 */
1588function isRedirectLiteralStart(P: ParseState): boolean {
1589  const c = peek(P.L)
1590  if (c === '' || c === '\n') return false
1591  // Shell terminators and operators
1592  if (c === '|' || c === '&' || c === ';' || c === '(' || c === ')')
1593    return false
1594  // Redirect operators (< > with any suffix; <( >( handled by caller)
1595  if (c === '<' || c === '>') {
1596    // <( >( are process substitutions — those ARE literals
1597    return peek(P.L, 1) === '('
1598  }
1599  // N< N> file descriptor prefix — starts a new redirect, not a literal
1600  if (isDigit(c)) {
1601    let j = P.L.i
1602    while (j < P.L.len && isDigit(P.L.src[j]!)) j++
1603    const after = j < P.L.len ? P.L.src[j]! : ''
1604    if (after === '>' || after === '<') return false
1605  }
1606  // `}` only terminates if we're in a context where it's a closer — but
1607  // file_redirect sees `}` as word char (e.g., `>$HOME}` is valid path char).
1608  // Actually `}` at top level terminates compound_statement — need to stop.
1609  if (c === '}') return false
1610  // Test command closer — when parseSimpleCommand is called from `[` context,
1611  // `]` must terminate so parseCommand can return and `[` handler consume it.
1612  if (P.stopToken === ']' && c === ']') return false
1613  return true
1614}
1615
1616/**
1617 * Parse a redirect operator + destination(s).
1618 * @param greedy When true, file_redirect consumes repeat1($._literal) per
1619 *   grammar's prec.left — `cmd >f a b c` attaches `a b c` to the redirect.
1620 *   When false (preRedirect context), takes only 1 destination because
1621 *   command's dynamic precedence beats redirected_statement's prec(-1).
1622 */
1623function tryParseRedirect(P: ParseState, greedy = false): TsNode | null {
1624  const save = saveLex(P.L)
1625  skipBlanks(P.L)
1626  // File descriptor prefix?
1627  let fd: TsNode | null = null
1628  if (isDigit(peek(P.L))) {
1629    const startB = P.L.b
1630    let j = P.L.i
1631    while (j < P.L.len && isDigit(P.L.src[j]!)) j++
1632    const after = j < P.L.len ? P.L.src[j]! : ''
1633    if (after === '>' || after === '<') {
1634      while (P.L.i < j) advance(P.L)
1635      fd = mk(P, 'file_descriptor', startB, P.L.b, [])
1636    }
1637  }
1638  const t = nextToken(P.L, 'arg')
1639  if (t.type !== 'OP') {
1640    restoreLex(P.L, save)
1641    return null
1642  }
1643  const v = t.value
1644  if (v === '<<<') {
1645    const op = leaf(P, '<<<', t)
1646    skipBlanks(P.L)
1647    const target = parseWord(P, 'arg')
1648    const end = target ? target.endIndex : op.endIndex
1649    const kids = target ? [op, target] : [op]
1650    return mk(
1651      P,
1652      'herestring_redirect',
1653      fd ? fd.startIndex : op.startIndex,
1654      end,
1655      fd ? [fd, ...kids] : kids,
1656    )
1657  }
1658  if (v === '<<' || v === '<<-') {
1659    const op = leaf(P, v, t)
1660    // Heredoc start — delimiter word (may be quoted)
1661    skipBlanks(P.L)
1662    const dStart = P.L.b
1663    let quoted = false
1664    let delim = ''
1665    const dc = peek(P.L)
1666    if (dc === "'" || dc === '"') {
1667      quoted = true
1668      advance(P.L)
1669      while (P.L.i < P.L.len && peek(P.L) !== dc) {
1670        delim += peek(P.L)
1671        advance(P.L)
1672      }
1673      if (P.L.i < P.L.len) advance(P.L)
1674    } else if (dc === '\\') {
1675      // Backslash-escaped delimiter: \X — exactly one escaped char, body is
1676      // quoted (literal). Covers <<\EOF <<\' <<\\ etc.
1677      quoted = true
1678      advance(P.L)
1679      if (P.L.i < P.L.len && peek(P.L) !== '\n') {
1680        delim += peek(P.L)
1681        advance(P.L)
1682      }
1683      // May be followed by more ident chars (e.g. <<\EOF → delim "EOF")
1684      while (P.L.i < P.L.len && isIdentChar(peek(P.L))) {
1685        delim += peek(P.L)
1686        advance(P.L)
1687      }
1688    } else {
1689      // Unquoted delimiter: bash accepts most non-metacharacters (not just
1690      // identifiers). Allow !, -, ., etc. — stop at shell metachars.
1691      while (P.L.i < P.L.len && isHeredocDelimChar(peek(P.L))) {
1692        delim += peek(P.L)
1693        advance(P.L)
1694      }
1695    }
1696    const dEnd = P.L.b
1697    const startNode = mk(P, 'heredoc_start', dStart, dEnd, [])
1698    // Register pending heredoc — body scanned at next newline
1699    P.L.heredocs.push({
1700      delim,
1701      stripTabs: v === '<<-',
1702      quoted,
1703      bodyStart: 0,
1704      bodyEnd: 0,
1705      endStart: 0,
1706      endEnd: 0,
1707    })
1708    const kids = fd ? [fd, op, startNode] : [op, startNode]
1709    const startIdx = fd ? fd.startIndex : op.startIndex
1710    // SECURITY: tree-sitter nests any pipeline/list/file_redirect appearing
1711    // between heredoc_start and the newline as a CHILD of heredoc_redirect.
1712    // `ls <<'EOF' | rm -rf /tmp/evil` must not silently drop the rm. Parse
1713    // trailing words and file_redirects properly (ast.ts walkHeredocRedirect
1714    // fails closed on any unrecognized child via tooComplex). Pipeline / list
1715    // operators (| && || ;) are structurally complex — emit ERROR so the same
1716    // fail-closed path rejects them.
1717    while (true) {
1718      skipBlanks(P.L)
1719      const tc = peek(P.L)
1720      if (tc === '\n' || tc === '' || P.L.i >= P.L.len) break
1721      // File redirect after delimiter: cat <<EOF > out.txt
1722      if (tc === '>' || tc === '<' || isDigit(tc)) {
1723        const rSave = saveLex(P.L)
1724        const r = tryParseRedirect(P)
1725        if (r && r.type === 'file_redirect') {
1726          kids.push(r)
1727          continue
1728        }
1729        restoreLex(P.L, rSave)
1730      }
1731      // Pipeline after heredoc_start: `one <<EOF | grep two` — tree-sitter
1732      // nests the pipeline as a child of heredoc_redirect. ast.ts
1733      // walkHeredocRedirect fails closed on pipeline/command via tooComplex.
1734      if (tc === '|' && peek(P.L, 1) !== '|') {
1735        advance(P.L)
1736        skipBlanks(P.L)
1737        const pipeCmds: TsNode[] = []
1738        while (true) {
1739          const cmd = parseCommand(P)
1740          if (!cmd) break
1741          pipeCmds.push(cmd)
1742          skipBlanks(P.L)
1743          if (peek(P.L) === '|' && peek(P.L, 1) !== '|') {
1744            const ps = P.L.b
1745            advance(P.L)
1746            pipeCmds.push(mk(P, '|', ps, P.L.b, []))
1747            skipBlanks(P.L)
1748            continue
1749          }
1750          break
1751        }
1752        if (pipeCmds.length > 0) {
1753          const pl = pipeCmds[pipeCmds.length - 1]!
1754          // tree-sitter always wraps in pipeline after `|`, even single command
1755          kids.push(
1756            mk(P, 'pipeline', pipeCmds[0]!.startIndex, pl.endIndex, pipeCmds),
1757          )
1758        }
1759        continue
1760      }
1761      // && / || after heredoc_start: `cat <<-EOF || die "..."` — tree-sitter
1762      // nests just the RHS command (not a list) as a child of heredoc_redirect.
1763      if (
1764        (tc === '&' && peek(P.L, 1) === '&') ||
1765        (tc === '|' && peek(P.L, 1) === '|')
1766      ) {
1767        advance(P.L)
1768        advance(P.L)
1769        skipBlanks(P.L)
1770        const rhs = parseCommand(P)
1771        if (rhs) kids.push(rhs)
1772        continue
1773      }
1774      // Terminator / unhandled metachar — consume rest of line as ERROR so
1775      // ast.ts rejects it. Covers ; & ( )
1776      if (tc === '&' || tc === ';' || tc === '(' || tc === ')') {
1777        const eStart = P.L.b
1778        while (P.L.i < P.L.len && peek(P.L) !== '\n') advance(P.L)
1779        kids.push(mk(P, 'ERROR', eStart, P.L.b, []))
1780        break
1781      }
1782      // Trailing word argument: newins <<-EOF - org.freedesktop.service
1783      const w = parseWord(P, 'arg')
1784      if (w) {
1785        kids.push(w)
1786        continue
1787      }
1788      // Unrecognized — consume rest of line as ERROR
1789      const eStart = P.L.b
1790      while (P.L.i < P.L.len && peek(P.L) !== '\n') advance(P.L)
1791      if (P.L.b > eStart) kids.push(mk(P, 'ERROR', eStart, P.L.b, []))
1792      break
1793    }
1794    return mk(P, 'heredoc_redirect', startIdx, P.L.b, kids)
1795  }
1796  // Close-fd variants: `<&-` `>&-` have OPTIONAL destination (0 or 1)
1797  if (v === '<&-' || v === '>&-') {
1798    const op = leaf(P, v, t)
1799    const kids: TsNode[] = []
1800    if (fd) kids.push(fd)
1801    kids.push(op)
1802    // Optional single destination — only consume if next is a literal
1803    skipBlanks(P.L)
1804    const dSave = saveLex(P.L)
1805    const dest = isRedirectLiteralStart(P) ? parseWord(P, 'arg') : null
1806    if (dest) {
1807      kids.push(dest)
1808    } else {
1809      restoreLex(P.L, dSave)
1810    }
1811    const startIdx = fd ? fd.startIndex : op.startIndex
1812    const end = dest ? dest.endIndex : op.endIndex
1813    return mk(P, 'file_redirect', startIdx, end, kids)
1814  }
1815  if (
1816    v === '>' ||
1817    v === '>>' ||
1818    v === '>&' ||
1819    v === '>|' ||
1820    v === '&>' ||
1821    v === '&>>' ||
1822    v === '<' ||
1823    v === '<&'
1824  ) {
1825    const op = leaf(P, v, t)
1826    const kids: TsNode[] = []
1827    if (fd) kids.push(fd)
1828    kids.push(op)
1829    // Grammar: destination is repeat1($._literal) — greedily consume literals
1830    // until a non-literal (redirect op, terminator, etc). tree-sitter's
1831    // prec.left makes `cmd >f a b c` attach `a b c` to the file_redirect,
1832    // NOT to the command. Structural quirk but required for corpus parity.
1833    // In preRedirect context (greedy=false), take only 1 literal because
1834    // command's dynamic precedence beats redirected_statement's prec(-1).
1835    let end = op.endIndex
1836    let taken = 0
1837    while (true) {
1838      skipBlanks(P.L)
1839      if (!isRedirectLiteralStart(P)) break
1840      if (!greedy && taken >= 1) break
1841      const tc = peek(P.L)
1842      const tc1 = peek(P.L, 1)
1843      let target: TsNode | null = null
1844      if ((tc === '<' || tc === '>') && tc1 === '(') {
1845        target = parseProcessSub(P)
1846      } else {
1847        target = parseWord(P, 'arg')
1848      }
1849      if (!target) break
1850      kids.push(target)
1851      end = target.endIndex
1852      taken++
1853    }
1854    const startIdx = fd ? fd.startIndex : op.startIndex
1855    return mk(P, 'file_redirect', startIdx, end, kids)
1856  }
1857  restoreLex(P.L, save)
1858  return null
1859}
1860
1861function parseProcessSub(P: ParseState): TsNode | null {
1862  const c = peek(P.L)
1863  if ((c !== '<' && c !== '>') || peek(P.L, 1) !== '(') return null
1864  const start = P.L.b
1865  advance(P.L)
1866  advance(P.L)
1867  const open = mk(P, c + '(', start, P.L.b, [])
1868  const body = parseStatements(P, ')')
1869  skipBlanks(P.L)
1870  let close: TsNode
1871  if (peek(P.L) === ')') {
1872    const cs = P.L.b
1873    advance(P.L)
1874    close = mk(P, ')', cs, P.L.b, [])
1875  } else {
1876    close = mk(P, ')', P.L.b, P.L.b, [])
1877  }
1878  return mk(P, 'process_substitution', start, close.endIndex, [
1879    open,
1880    ...body,
1881    close,
1882  ])
1883}
1884
1885function scanHeredocBodies(P: ParseState): void {
1886  // Skip to newline if not already there
1887  while (P.L.i < P.L.len && P.L.src[P.L.i] !== '\n') advance(P.L)
1888  if (P.L.i < P.L.len) advance(P.L)
1889  for (const hd of P.L.heredocs) {
1890    hd.bodyStart = P.L.b
1891    const delimLen = hd.delim.length
1892    while (P.L.i < P.L.len) {
1893      const lineStart = P.L.i
1894      const lineStartB = P.L.b
1895      // Skip leading tabs if <<-
1896      let checkI = lineStart
1897      if (hd.stripTabs) {
1898        while (checkI < P.L.len && P.L.src[checkI] === '\t') checkI++
1899      }
1900      // Check if this line is the delimiter
1901      if (
1902        P.L.src.startsWith(hd.delim, checkI) &&
1903        (checkI + delimLen >= P.L.len ||
1904          P.L.src[checkI + delimLen] === '\n' ||
1905          P.L.src[checkI + delimLen] === '\r')
1906      ) {
1907        hd.bodyEnd = lineStartB
1908        // Advance past tabs
1909        while (P.L.i < checkI) advance(P.L)
1910        hd.endStart = P.L.b
1911        // Advance past delimiter
1912        for (let k = 0; k < delimLen; k++) advance(P.L)
1913        hd.endEnd = P.L.b
1914        // Skip trailing newline
1915        if (P.L.i < P.L.len && P.L.src[P.L.i] === '\n') advance(P.L)
1916        return
1917      }
1918      // Consume line
1919      while (P.L.i < P.L.len && P.L.src[P.L.i] !== '\n') advance(P.L)
1920      if (P.L.i < P.L.len) advance(P.L)
1921    }
1922    // Unterminated
1923    hd.bodyEnd = P.L.b
1924    hd.endStart = P.L.b
1925    hd.endEnd = P.L.b
1926  }
1927}
1928
1929function parseHeredocBodyContent(
1930  P: ParseState,
1931  start: number,
1932  end: number,
1933): TsNode[] {
1934  // Parse expansions inside an unquoted heredoc body.
1935  const saved = saveLex(P.L)
1936  // Position lexer at body start
1937  restoreLexToByte(P, start)
1938  const out: TsNode[] = []
1939  let contentStart = P.L.b
1940  // tree-sitter-bash's heredoc_body rule hides the initial text segment
1941  // (_heredoc_body_beginning) — only content AFTER the first expansion is
1942  // emitted as heredoc_content. Track whether we've seen an expansion yet.
1943  let sawExpansion = false
1944  while (P.L.b < end) {
1945    const c = peek(P.L)
1946    // Backslash escapes suppress expansion: \$ \` stay literal in heredoc.
1947    if (c === '\\') {
1948      const nxt = peek(P.L, 1)
1949      if (nxt === '$' || nxt === '`' || nxt === '\\') {
1950        advance(P.L)
1951        advance(P.L)
1952        continue
1953      }
1954      advance(P.L)
1955      continue
1956    }
1957    if (c === '$' || c === '`') {
1958      const preB = P.L.b
1959      const exp = parseDollarLike(P)
1960      // Bare `$` followed by non-name (e.g. `$'` in a regex) returns a lone
1961      // '$' leaf, not an expansion — treat as literal content, don't split.
1962      if (
1963        exp &&
1964        (exp.type === 'simple_expansion' ||
1965          exp.type === 'expansion' ||
1966          exp.type === 'command_substitution' ||
1967          exp.type === 'arithmetic_expansion')
1968      ) {
1969        if (sawExpansion && preB > contentStart) {
1970          out.push(mk(P, 'heredoc_content', contentStart, preB, []))
1971        }
1972        out.push(exp)
1973        contentStart = P.L.b
1974        sawExpansion = true
1975      }
1976      continue
1977    }
1978    advance(P.L)
1979  }
1980  // Only emit heredoc_content children if there were expansions — otherwise
1981  // the heredoc_body is a leaf node (tree-sitter convention).
1982  if (sawExpansion) {
1983    out.push(mk(P, 'heredoc_content', contentStart, end, []))
1984  }
1985  restoreLex(P.L, saved)
1986  return out
1987}
1988
1989function restoreLexToByte(P: ParseState, targetByte: number): void {
1990  if (!P.L.byteTable) byteAt(P.L, 0)
1991  const t = P.L.byteTable!
1992  let lo = 0
1993  let hi = P.src.length
1994  while (lo < hi) {
1995    const m = (lo + hi) >>> 1
1996    if (t[m]! < targetByte) lo = m + 1
1997    else hi = m
1998  }
1999  P.L.i = lo
2000  P.L.b = targetByte
2001}
2002
2003/**
2004 * Parse a word-position element: bare word, string, expansion, or concatenation
2005 * thereof. Returns a single node; if multiple adjacent fragments, wraps in
2006 * concatenation.
2007 */
2008function parseWord(P: ParseState, _ctx: 'cmd' | 'arg'): TsNode | null {
2009  skipBlanks(P.L)
2010  const parts: TsNode[] = []
2011  while (P.L.i < P.L.len) {
2012    const c = peek(P.L)
2013    if (
2014      c === ' ' ||
2015      c === '\t' ||
2016      c === '\n' ||
2017      c === '\r' ||
2018      c === '' ||
2019      c === '|' ||
2020      c === '&' ||
2021      c === ';' ||
2022      c === '(' ||
2023      c === ')'
2024    ) {
2025      break
2026    }
2027    // < > are redirect operators unless <( >( (process substitution)
2028    if (c === '<' || c === '>') {
2029      if (peek(P.L, 1) === '(') {
2030        const ps = parseProcessSub(P)
2031        if (ps) parts.push(ps)
2032        continue
2033      }
2034      break
2035    }
2036    if (c === '"') {
2037      parts.push(parseDoubleQuoted(P))
2038      continue
2039    }
2040    if (c === "'") {
2041      const tok = nextToken(P.L, 'arg')
2042      parts.push(leaf(P, 'raw_string', tok))
2043      continue
2044    }
2045    if (c === '$') {
2046      const c1 = peek(P.L, 1)
2047      if (c1 === "'") {
2048        const tok = nextToken(P.L, 'arg')
2049        parts.push(leaf(P, 'ansi_c_string', tok))
2050        continue
2051      }
2052      if (c1 === '"') {
2053        // Translated string: emit $ leaf + string node
2054        const dTok: Token = {
2055          type: 'DOLLAR',
2056          value: '$',
2057          start: P.L.b,
2058          end: P.L.b + 1,
2059        }
2060        advance(P.L)
2061        parts.push(leaf(P, '$', dTok))
2062        parts.push(parseDoubleQuoted(P))
2063        continue
2064      }
2065      if (c1 === '`') {
2066        // `$` followed by backtick — tree-sitter elides the $ entirely
2067        // and emits just (command_substitution). Consume $ and let next
2068        // iteration handle the backtick.
2069        advance(P.L)
2070        continue
2071      }
2072      const exp = parseDollarLike(P)
2073      if (exp) parts.push(exp)
2074      continue
2075    }
2076    if (c === '`') {
2077      if (P.inBacktick > 0) break
2078      const bt = parseBacktick(P)
2079      if (bt) parts.push(bt)
2080      continue
2081    }
2082    // Brace expression {1..5} or {a,b,c} — only if looks like one
2083    if (c === '{') {
2084      const be = tryParseBraceExpr(P)
2085      if (be) {
2086        parts.push(be)
2087        continue
2088      }
2089      // SECURITY: if `{` is immediately followed by a command terminator
2090      // (; | & newline or EOF), it's a standalone word — don't slurp the
2091      // rest of the line via tryParseBraceLikeCat. `echo {;touch /tmp/evil`
2092      // must split on `;` so the security walker sees `touch`.
2093      const nc = peek(P.L, 1)
2094      if (
2095        nc === ';' ||
2096        nc === '|' ||
2097        nc === '&' ||
2098        nc === '\n' ||
2099        nc === '' ||
2100        nc === ')' ||
2101        nc === ' ' ||
2102        nc === '\t'
2103      ) {
2104        const bStart = P.L.b
2105        advance(P.L)
2106        parts.push(mk(P, 'word', bStart, P.L.b, []))
2107        continue
2108      }
2109      // Otherwise treat { and } as word fragments
2110      const cat = tryParseBraceLikeCat(P)
2111      if (cat) {
2112        for (const p of cat) parts.push(p)
2113        continue
2114      }
2115    }
2116    // Standalone `}` in arg position is a word (e.g., `echo }foo`).
2117    // parseBareWord breaks on `}` so handle it here.
2118    if (c === '}') {
2119      const bStart = P.L.b
2120      advance(P.L)
2121      parts.push(mk(P, 'word', bStart, P.L.b, []))
2122      continue
2123    }
2124    // `[` and `]` are single-char word fragments (tree-sitter splits at
2125    // brackets: `[:lower:]` → `[` `:lower:` `]`, `{o[k]}` → 6 words).
2126    if (c === '[' || c === ']') {
2127      const bStart = P.L.b
2128      advance(P.L)
2129      parts.push(mk(P, 'word', bStart, P.L.b, []))
2130      continue
2131    }
2132    // Bare word fragment
2133    const frag = parseBareWord(P)
2134    if (!frag) break
2135    // `NN#${...}` or `NN#$(...)` → (number (expansion|command_substitution)).
2136    // Grammar: number can be seq(/-?(0x)?[0-9]+#/, choice(expansion, cmd_sub)).
2137    // `10#${cmd}` must NOT be concatenation — it's a single number node with
2138    // the expansion as child. Detect here: frag ends with `#`, next is $ {/(.
2139    if (
2140      frag.type === 'word' &&
2141      /^-?(0x)?[0-9]+#$/.test(frag.text) &&
2142      peek(P.L) === '$' &&
2143      (peek(P.L, 1) === '{' || peek(P.L, 1) === '(')
2144    ) {
2145      const exp = parseDollarLike(P)
2146      if (exp) {
2147        // Prefix `NN#` is an anonymous pattern in grammar — only the
2148        // expansion/cmd_sub is a named child.
2149        parts.push(mk(P, 'number', frag.startIndex, exp.endIndex, [exp]))
2150        continue
2151      }
2152    }
2153    parts.push(frag)
2154  }
2155  if (parts.length === 0) return null
2156  if (parts.length === 1) return parts[0]!
2157  // Concatenation
2158  const first = parts[0]!
2159  const last = parts[parts.length - 1]!
2160  return mk(P, 'concatenation', first.startIndex, last.endIndex, parts)
2161}
2162
2163function parseBareWord(P: ParseState): TsNode | null {
2164  const start = P.L.b
2165  const startI = P.L.i
2166  while (P.L.i < P.L.len) {
2167    const c = peek(P.L)
2168    if (c === '\\') {
2169      if (P.L.i + 1 >= P.L.len) {
2170        // Trailing unpaired `\` at true EOF — tree-sitter emits word WITHOUT
2171        // the `\` plus a sibling ERROR node. Stop here; caller emits ERROR.
2172        break
2173      }
2174      const nx = P.L.src[P.L.i + 1]
2175      if (nx === '\n' || (nx === '\r' && P.L.src[P.L.i + 2] === '\n')) {
2176        // Line continuation BREAKS the word (tree-sitter quirk) — handles \r?\n
2177        break
2178      }
2179      advance(P.L)
2180      advance(P.L)
2181      continue
2182    }
2183    if (
2184      c === ' ' ||
2185      c === '\t' ||
2186      c === '\n' ||
2187      c === '\r' ||
2188      c === '' ||
2189      c === '|' ||
2190      c === '&' ||
2191      c === ';' ||
2192      c === '(' ||
2193      c === ')' ||
2194      c === '<' ||
2195      c === '>' ||
2196      c === '"' ||
2197      c === "'" ||
2198      c === '$' ||
2199      c === '`' ||
2200      c === '{' ||
2201      c === '}' ||
2202      c === '[' ||
2203      c === ']'
2204    ) {
2205      break
2206    }
2207    advance(P.L)
2208  }
2209  if (P.L.b === start) return null
2210  const text = P.src.slice(startI, P.L.i)
2211  const type = /^-?\d+$/.test(text) ? 'number' : 'word'
2212  return mk(P, type, start, P.L.b, [])
2213}
2214
2215function tryParseBraceExpr(P: ParseState): TsNode | null {
2216  // {N..M} where N, M are numbers or single chars
2217  const save = saveLex(P.L)
2218  if (peek(P.L) !== '{') return null
2219  const oStart = P.L.b
2220  advance(P.L)
2221  const oEnd = P.L.b
2222  // First part
2223  const p1Start = P.L.b
2224  while (isDigit(peek(P.L)) || isIdentStart(peek(P.L))) advance(P.L)
2225  const p1End = P.L.b
2226  if (p1End === p1Start || peek(P.L) !== '.' || peek(P.L, 1) !== '.') {
2227    restoreLex(P.L, save)
2228    return null
2229  }
2230  const dotStart = P.L.b
2231  advance(P.L)
2232  advance(P.L)
2233  const dotEnd = P.L.b
2234  const p2Start = P.L.b
2235  while (isDigit(peek(P.L)) || isIdentStart(peek(P.L))) advance(P.L)
2236  const p2End = P.L.b
2237  if (p2End === p2Start || peek(P.L) !== '}') {
2238    restoreLex(P.L, save)
2239    return null
2240  }
2241  const cStart = P.L.b
2242  advance(P.L)
2243  const cEnd = P.L.b
2244  const p1Text = sliceBytes(P, p1Start, p1End)
2245  const p2Text = sliceBytes(P, p2Start, p2End)
2246  const p1IsNum = /^\d+$/.test(p1Text)
2247  const p2IsNum = /^\d+$/.test(p2Text)
2248  // Valid brace expression: both numbers OR both single chars. Mixed = reject.
2249  if (p1IsNum !== p2IsNum) {
2250    restoreLex(P.L, save)
2251    return null
2252  }
2253  if (!p1IsNum && (p1Text.length !== 1 || p2Text.length !== 1)) {
2254    restoreLex(P.L, save)
2255    return null
2256  }
2257  const p1Type = p1IsNum ? 'number' : 'word'
2258  const p2Type = p2IsNum ? 'number' : 'word'
2259  return mk(P, 'brace_expression', oStart, cEnd, [
2260    mk(P, '{', oStart, oEnd, []),
2261    mk(P, p1Type, p1Start, p1End, []),
2262    mk(P, '..', dotStart, dotEnd, []),
2263    mk(P, p2Type, p2Start, p2End, []),
2264    mk(P, '}', cStart, cEnd, []),
2265  ])
2266}
2267
2268function tryParseBraceLikeCat(P: ParseState): TsNode[] | null {
2269  // {a,b,c} or {} → split into word fragments like tree-sitter does
2270  if (peek(P.L) !== '{') return null
2271  const oStart = P.L.b
2272  advance(P.L)
2273  const oEnd = P.L.b
2274  const inner: TsNode[] = [mk(P, 'word', oStart, oEnd, [])]
2275  while (P.L.i < P.L.len) {
2276    const bc = peek(P.L)
2277    // SECURITY: stop at command terminators so `{foo;rm x` splits correctly.
2278    if (
2279      bc === '}' ||
2280      bc === '\n' ||
2281      bc === ';' ||
2282      bc === '|' ||
2283      bc === '&' ||
2284      bc === ' ' ||
2285      bc === '\t' ||
2286      bc === '<' ||
2287      bc === '>' ||
2288      bc === '(' ||
2289      bc === ')'
2290    ) {
2291      break
2292    }
2293    // `[` and `]` are single-char words: {o[k]} → { o [ k ] }
2294    if (bc === '[' || bc === ']') {
2295      const bStart = P.L.b
2296      advance(P.L)
2297      inner.push(mk(P, 'word', bStart, P.L.b, []))
2298      continue
2299    }
2300    const midStart = P.L.b
2301    while (P.L.i < P.L.len) {
2302      const mc = peek(P.L)
2303      if (
2304        mc === '}' ||
2305        mc === '\n' ||
2306        mc === ';' ||
2307        mc === '|' ||
2308        mc === '&' ||
2309        mc === ' ' ||
2310        mc === '\t' ||
2311        mc === '<' ||
2312        mc === '>' ||
2313        mc === '(' ||
2314        mc === ')' ||
2315        mc === '[' ||
2316        mc === ']'
2317      ) {
2318        break
2319      }
2320      advance(P.L)
2321    }
2322    const midEnd = P.L.b
2323    if (midEnd > midStart) {
2324      const midText = sliceBytes(P, midStart, midEnd)
2325      const midType = /^-?\d+$/.test(midText) ? 'number' : 'word'
2326      inner.push(mk(P, midType, midStart, midEnd, []))
2327    } else {
2328      break
2329    }
2330  }
2331  if (peek(P.L) === '}') {
2332    const cStart = P.L.b
2333    advance(P.L)
2334    inner.push(mk(P, 'word', cStart, P.L.b, []))
2335  }
2336  return inner
2337}
2338
2339function parseDoubleQuoted(P: ParseState): TsNode {
2340  const qStart = P.L.b
2341  advance(P.L)
2342  const qEnd = P.L.b
2343  const openQ = mk(P, '"', qStart, qEnd, [])
2344  const parts: TsNode[] = [openQ]
2345  let contentStart = P.L.b
2346  let contentStartI = P.L.i
2347  const flushContent = (): void => {
2348    if (P.L.b > contentStart) {
2349      // Tree-sitter's extras rule /\s/ has higher precedence than
2350      // string_content (prec -1), so whitespace-only segments are elided.
2351      // `" ${x} "` → (string (expansion)) not (string (string_content)(expansion)(string_content)).
2352      // Note: this intentionally diverges from preserving all content — cc
2353      // tests relying on whitespace-only string_content need updating
2354      // (CCReconcile).
2355      const txt = P.src.slice(contentStartI, P.L.i)
2356      if (!/^[ \t]+$/.test(txt)) {
2357        parts.push(mk(P, 'string_content', contentStart, P.L.b, []))
2358      }
2359    }
2360  }
2361  while (P.L.i < P.L.len) {
2362    const c = peek(P.L)
2363    if (c === '"') break
2364    if (c === '\\' && P.L.i + 1 < P.L.len) {
2365      advance(P.L)
2366      advance(P.L)
2367      continue
2368    }
2369    if (c === '\n') {
2370      // Split string_content at newline
2371      flushContent()
2372      advance(P.L)
2373      contentStart = P.L.b
2374      contentStartI = P.L.i
2375      continue
2376    }
2377    if (c === '$') {
2378      const c1 = peek(P.L, 1)
2379      if (
2380        c1 === '(' ||
2381        c1 === '{' ||
2382        isIdentStart(c1) ||
2383        SPECIAL_VARS.has(c1) ||
2384        isDigit(c1)
2385      ) {
2386        flushContent()
2387        const exp = parseDollarLike(P)
2388        if (exp) parts.push(exp)
2389        contentStart = P.L.b
2390        contentStartI = P.L.i
2391        continue
2392      }
2393      // Bare $ not at end-of-string: tree-sitter emits it as an anonymous
2394      // '$' token, which splits string_content. $ immediately before the
2395      // closing " is absorbed into the preceding string_content.
2396      if (c1 !== '"' && c1 !== '') {
2397        flushContent()
2398        const dS = P.L.b
2399        advance(P.L)
2400        parts.push(mk(P, '$', dS, P.L.b, []))
2401        contentStart = P.L.b
2402        contentStartI = P.L.i
2403        continue
2404      }
2405    }
2406    if (c === '`') {
2407      flushContent()
2408      const bt = parseBacktick(P)
2409      if (bt) parts.push(bt)
2410      contentStart = P.L.b
2411      contentStartI = P.L.i
2412      continue
2413    }
2414    advance(P.L)
2415  }
2416  flushContent()
2417  let close: TsNode
2418  if (peek(P.L) === '"') {
2419    const cStart = P.L.b
2420    advance(P.L)
2421    close = mk(P, '"', cStart, P.L.b, [])
2422  } else {
2423    close = mk(P, '"', P.L.b, P.L.b, [])
2424  }
2425  parts.push(close)
2426  return mk(P, 'string', qStart, close.endIndex, parts)
2427}
2428
2429function parseDollarLike(P: ParseState): TsNode | null {
2430  const c1 = peek(P.L, 1)
2431  const dStart = P.L.b
2432  if (c1 === '(' && peek(P.L, 2) === '(') {
2433    // $(( arithmetic ))
2434    advance(P.L)
2435    advance(P.L)
2436    advance(P.L)
2437    const open = mk(P, '$((', dStart, P.L.b, [])
2438    const exprs = parseArithCommaList(P, '))', 'var')
2439    skipBlanks(P.L)
2440    let close: TsNode
2441    if (peek(P.L) === ')' && peek(P.L, 1) === ')') {
2442      const cStart = P.L.b
2443      advance(P.L)
2444      advance(P.L)
2445      close = mk(P, '))', cStart, P.L.b, [])
2446    } else {
2447      close = mk(P, '))', P.L.b, P.L.b, [])
2448    }
2449    return mk(P, 'arithmetic_expansion', dStart, close.endIndex, [
2450      open,
2451      ...exprs,
2452      close,
2453    ])
2454  }
2455  if (c1 === '[') {
2456    // $[ arithmetic ] — legacy bash syntax, same as $((...))
2457    advance(P.L)
2458    advance(P.L)
2459    const open = mk(P, '$[', dStart, P.L.b, [])
2460    const exprs = parseArithCommaList(P, ']', 'var')
2461    skipBlanks(P.L)
2462    let close: TsNode
2463    if (peek(P.L) === ']') {
2464      const cStart = P.L.b
2465      advance(P.L)
2466      close = mk(P, ']', cStart, P.L.b, [])
2467    } else {
2468      close = mk(P, ']', P.L.b, P.L.b, [])
2469    }
2470    return mk(P, 'arithmetic_expansion', dStart, close.endIndex, [
2471      open,
2472      ...exprs,
2473      close,
2474    ])
2475  }
2476  if (c1 === '(') {
2477    advance(P.L)
2478    advance(P.L)
2479    const open = mk(P, '$(', dStart, P.L.b, [])
2480    let body = parseStatements(P, ')')
2481    skipBlanks(P.L)
2482    let close: TsNode
2483    if (peek(P.L) === ')') {
2484      const cStart = P.L.b
2485      advance(P.L)
2486      close = mk(P, ')', cStart, P.L.b, [])
2487    } else {
2488      close = mk(P, ')', P.L.b, P.L.b, [])
2489    }
2490    // $(< file) shorthand: unwrap redirected_statement → bare file_redirect
2491    // tree-sitter emits (command_substitution (file_redirect (word))) directly
2492    if (
2493      body.length === 1 &&
2494      body[0]!.type === 'redirected_statement' &&
2495      body[0]!.children.length === 1 &&
2496      body[0]!.children[0]!.type === 'file_redirect'
2497    ) {
2498      body = body[0]!.children
2499    }
2500    return mk(P, 'command_substitution', dStart, close.endIndex, [
2501      open,
2502      ...body,
2503      close,
2504    ])
2505  }
2506  if (c1 === '{') {
2507    advance(P.L)
2508    advance(P.L)
2509    const open = mk(P, '${', dStart, P.L.b, [])
2510    const inner = parseExpansionBody(P)
2511    let close: TsNode
2512    if (peek(P.L) === '}') {
2513      const cStart = P.L.b
2514      advance(P.L)
2515      close = mk(P, '}', cStart, P.L.b, [])
2516    } else {
2517      close = mk(P, '}', P.L.b, P.L.b, [])
2518    }
2519    return mk(P, 'expansion', dStart, close.endIndex, [open, ...inner, close])
2520  }
2521  // Simple expansion $VAR or $? $$ $@ etc
2522  advance(P.L)
2523  const dEnd = P.L.b
2524  const dollar = mk(P, '$', dStart, dEnd, [])
2525  const nc = peek(P.L)
2526  // $_ is special_variable_name only when not followed by more ident chars
2527  if (nc === '_' && !isIdentChar(peek(P.L, 1))) {
2528    const vStart = P.L.b
2529    advance(P.L)
2530    const vn = mk(P, 'special_variable_name', vStart, P.L.b, [])
2531    return mk(P, 'simple_expansion', dStart, P.L.b, [dollar, vn])
2532  }
2533  if (isIdentStart(nc)) {
2534    const vStart = P.L.b
2535    while (isIdentChar(peek(P.L))) advance(P.L)
2536    const vn = mk(P, 'variable_name', vStart, P.L.b, [])
2537    return mk(P, 'simple_expansion', dStart, P.L.b, [dollar, vn])
2538  }
2539  if (isDigit(nc)) {
2540    const vStart = P.L.b
2541    advance(P.L)
2542    const vn = mk(P, 'variable_name', vStart, P.L.b, [])
2543    return mk(P, 'simple_expansion', dStart, P.L.b, [dollar, vn])
2544  }
2545  if (SPECIAL_VARS.has(nc)) {
2546    const vStart = P.L.b
2547    advance(P.L)
2548    const vn = mk(P, 'special_variable_name', vStart, P.L.b, [])
2549    return mk(P, 'simple_expansion', dStart, P.L.b, [dollar, vn])
2550  }
2551  // Bare $ — just a $ leaf (tree-sitter treats trailing $ as literal)
2552  return dollar
2553}
2554
2555function parseExpansionBody(P: ParseState): TsNode[] {
2556  const out: TsNode[] = []
2557  skipBlanks(P.L)
2558  // Bizarre cases: ${#!} ${!#} ${!##} ${!# } ${!## } all emit empty (expansion)
2559  // — both # and ! become anonymous nodes when only combined with each other
2560  // and optional trailing space before }. Note ${!##/} does NOT match (has
2561  // content after), so it parses normally as (special_variable_name)(regex).
2562  {
2563    const c0 = peek(P.L)
2564    const c1 = peek(P.L, 1)
2565    if (c0 === '#' && c1 === '!' && peek(P.L, 2) === '}') {
2566      advance(P.L)
2567      advance(P.L)
2568      return out
2569    }
2570    if (c0 === '!' && c1 === '#') {
2571      // ${!#} ${!##} with optional trailing space then }
2572      let j = 2
2573      if (peek(P.L, j) === '#') j++
2574      if (peek(P.L, j) === ' ') j++
2575      if (peek(P.L, j) === '}') {
2576        while (j-- > 0) advance(P.L)
2577        return out
2578      }
2579    }
2580  }
2581  // Optional # prefix for length
2582  if (peek(P.L) === '#') {
2583    const s = P.L.b
2584    advance(P.L)
2585    out.push(mk(P, '#', s, P.L.b, []))
2586  }
2587  // Optional ! prefix for indirect expansion: ${!varname} ${!prefix*} ${!prefix@}
2588  // Only when followed by an identifier — ${!} alone is special var $!
2589  // Also = ~ prefixes (zsh-style ${=var} ${~var})
2590  const pc = peek(P.L)
2591  if (
2592    (pc === '!' || pc === '=' || pc === '~') &&
2593    (isIdentStart(peek(P.L, 1)) || isDigit(peek(P.L, 1)))
2594  ) {
2595    const s = P.L.b
2596    advance(P.L)
2597    out.push(mk(P, pc, s, P.L.b, []))
2598  }
2599  skipBlanks(P.L)
2600  // Variable name
2601  if (isIdentStart(peek(P.L))) {
2602    const s = P.L.b
2603    while (isIdentChar(peek(P.L))) advance(P.L)
2604    out.push(mk(P, 'variable_name', s, P.L.b, []))
2605  } else if (isDigit(peek(P.L))) {
2606    const s = P.L.b
2607    while (isDigit(peek(P.L))) advance(P.L)
2608    out.push(mk(P, 'variable_name', s, P.L.b, []))
2609  } else if (SPECIAL_VARS.has(peek(P.L))) {
2610    const s = P.L.b
2611    advance(P.L)
2612    out.push(mk(P, 'special_variable_name', s, P.L.b, []))
2613  }
2614  // Optional subscript [idx] — parsed arithmetically
2615  if (peek(P.L) === '[') {
2616    const varNode = out[out.length - 1]
2617    const brOpen = P.L.b
2618    advance(P.L)
2619    const brOpenNode = mk(P, '[', brOpen, P.L.b, [])
2620    const idx = parseSubscriptIndexInline(P)
2621    skipBlanks(P.L)
2622    const brClose = P.L.b
2623    if (peek(P.L) === ']') advance(P.L)
2624    const brCloseNode = mk(P, ']', brClose, P.L.b, [])
2625    if (varNode) {
2626      const kids = idx
2627        ? [varNode, brOpenNode, idx, brCloseNode]
2628        : [varNode, brOpenNode, brCloseNode]
2629      out[out.length - 1] = mk(P, 'subscript', varNode.startIndex, P.L.b, kids)
2630    }
2631  }
2632  skipBlanks(P.L)
2633  // Trailing * or @ for indirect expansion (${!prefix*} ${!prefix@}) or
2634  // @operator for parameter transformation (${var@U} ${var@Q}) — anonymous
2635  const tc = peek(P.L)
2636  if ((tc === '*' || tc === '@') && peek(P.L, 1) === '}') {
2637    const s = P.L.b
2638    advance(P.L)
2639    out.push(mk(P, tc, s, P.L.b, []))
2640    return out
2641  }
2642  if (tc === '@' && isIdentStart(peek(P.L, 1))) {
2643    // ${var@U} transformation — @ is anonymous, consume op char(s)
2644    const s = P.L.b
2645    advance(P.L)
2646    out.push(mk(P, '@', s, P.L.b, []))
2647    while (isIdentChar(peek(P.L))) advance(P.L)
2648    return out
2649  }
2650  // Operator :- := :? :+ - = ? + # ## % %% / // ^ ^^ , ,, etc.
2651  const c = peek(P.L)
2652  // Bare `:` substring operator ${var:off:len} — offset and length parsed
2653  // arithmetically. Must come BEFORE the generic operator handling so `(` after
2654  // `:` goes to parenthesized_expression not the array path. `:-` `:=` `:?`
2655  // `:+` (no space) remain default-value operators; `: -1` (with space before
2656  // -1) is substring with negative offset.
2657  if (c === ':') {
2658    const c1 = peek(P.L, 1)
2659    // `:\n` or `:}` — empty substring expansion, emits nothing (variable_name only)
2660    if (c1 === '\n' || c1 === '}') {
2661      advance(P.L)
2662      while (peek(P.L) === '\n') advance(P.L)
2663      return out
2664    }
2665    if (c1 !== '-' && c1 !== '=' && c1 !== '?' && c1 !== '+') {
2666      advance(P.L)
2667      skipBlanks(P.L)
2668      // Offset — arithmetic. `-N` at top level is a single number node per
2669      // tree-sitter; inside parens it's unary_expression(number).
2670      const offC = peek(P.L)
2671      let off: TsNode | null
2672      if (offC === '-' && isDigit(peek(P.L, 1))) {
2673        const ns = P.L.b
2674        advance(P.L)
2675        while (isDigit(peek(P.L))) advance(P.L)
2676        off = mk(P, 'number', ns, P.L.b, [])
2677      } else {
2678        off = parseArithExpr(P, ':}', 'var')
2679      }
2680      if (off) out.push(off)
2681      skipBlanks(P.L)
2682      if (peek(P.L) === ':') {
2683        advance(P.L)
2684        skipBlanks(P.L)
2685        const lenC = peek(P.L)
2686        let len: TsNode | null
2687        if (lenC === '-' && isDigit(peek(P.L, 1))) {
2688          const ns = P.L.b
2689          advance(P.L)
2690          while (isDigit(peek(P.L))) advance(P.L)
2691          len = mk(P, 'number', ns, P.L.b, [])
2692        } else {
2693          len = parseArithExpr(P, '}', 'var')
2694        }
2695        if (len) out.push(len)
2696      }
2697      return out
2698    }
2699  }
2700  if (
2701    c === ':' ||
2702    c === '#' ||
2703    c === '%' ||
2704    c === '/' ||
2705    c === '^' ||
2706    c === ',' ||
2707    c === '-' ||
2708    c === '=' ||
2709    c === '?' ||
2710    c === '+'
2711  ) {
2712    const s = P.L.b
2713    const c1 = peek(P.L, 1)
2714    let op = c
2715    if (c === ':' && (c1 === '-' || c1 === '=' || c1 === '?' || c1 === '+')) {
2716      advance(P.L)
2717      advance(P.L)
2718      op = c + c1
2719    } else if (
2720      (c === '#' || c === '%' || c === '/' || c === '^' || c === ',') &&
2721      c1 === c
2722    ) {
2723      // Doubled operators: ## %% // ^^ ,,
2724      advance(P.L)
2725      advance(P.L)
2726      op = c + c
2727    } else {
2728      advance(P.L)
2729    }
2730    out.push(mk(P, op, s, P.L.b, []))
2731    // Rest is the default/replacement — parse as word or regex until }
2732    // Pattern-matching operators (# ## % %% / // ^ ^^ , ,,) emit regex;
2733    // value-substitution operators (:- := :? :+ - = ? + :) emit word.
2734    // `/` and `//` split at next `/` into (regex)+(word) for pat/repl.
2735    const isPattern =
2736      op === '#' ||
2737      op === '##' ||
2738      op === '%' ||
2739      op === '%%' ||
2740      op === '/' ||
2741      op === '//' ||
2742      op === '^' ||
2743      op === '^^' ||
2744      op === ',' ||
2745      op === ',,'
2746    if (op === '/' || op === '//') {
2747      // Optional /# or /% anchor prefix — anonymous node
2748      const ac = peek(P.L)
2749      if (ac === '#' || ac === '%') {
2750        const aStart = P.L.b
2751        advance(P.L)
2752        out.push(mk(P, ac, aStart, P.L.b, []))
2753      }
2754      // Pattern: per grammar _expansion_regex_replacement, pattern is
2755      // choice(regex, string, cmd_sub, seq(string, regex)). If it STARTS
2756      // with ", emit (string) and any trailing chars become (regex).
2757      // `${v//"${old}"/}` → (string(expansion)); `${v//"${c}"\//}` →
2758      // (string)(regex).
2759      if (peek(P.L) === '"') {
2760        out.push(parseDoubleQuoted(P))
2761        const tail = parseExpansionRest(P, 'regex', true)
2762        if (tail) out.push(tail)
2763      } else {
2764        const regex = parseExpansionRest(P, 'regex', true)
2765        if (regex) out.push(regex)
2766      }
2767      if (peek(P.L) === '/') {
2768        const sepStart = P.L.b
2769        advance(P.L)
2770        out.push(mk(P, '/', sepStart, P.L.b, []))
2771        // Replacement: per grammar, choice includes `seq(cmd_sub, word)`
2772        // which emits TWO siblings (not concatenation). Also `(` at start
2773        // of replacement is a regular word char, NOT array — unlike `:-`
2774        // default-value context. `${v/(/(Gentoo ${x}, }` replacement
2775        // `(Gentoo ${x}, ` is (concatenation (word)(expansion)(word)).
2776        const repl = parseExpansionRest(P, 'replword', false)
2777        if (repl) {
2778          // seq(cmd_sub, word) special case → siblings. Detected when
2779          // replacement is a concatenation of exactly 2 parts with first
2780          // being command_substitution.
2781          if (
2782            repl.type === 'concatenation' &&
2783            repl.children.length === 2 &&
2784            repl.children[0]!.type === 'command_substitution'
2785          ) {
2786            out.push(repl.children[0]!)
2787            out.push(repl.children[1]!)
2788          } else {
2789            out.push(repl)
2790          }
2791        }
2792      }
2793    } else if (op === '#' || op === '##' || op === '%' || op === '%%') {
2794      // Pattern-removal: per grammar _expansion_regex, pattern is
2795      // repeat(choice(regex, string, raw_string, ')')). Each quote/string
2796      // is a SIBLING, not absorbed into one regex. `${f%'str'*}` →
2797      // (raw_string)(regex); `${f/'str'*}` (slash) stays single regex.
2798      for (const p of parseExpansionRegexSegmented(P)) out.push(p)
2799    } else {
2800      const rest = parseExpansionRest(P, isPattern ? 'regex' : 'word', false)
2801      if (rest) out.push(rest)
2802    }
2803  }
2804  return out
2805}
2806
2807function parseExpansionRest(
2808  P: ParseState,
2809  nodeType: string,
2810  stopAtSlash: boolean,
2811): TsNode | null {
2812  // Don't skipBlanks — `${var:- }` space IS the word. Stop at } or newline
2813  // (`${var:\n}` emits no word). stopAtSlash=true stops at `/` for pat/repl
2814  // split in ${var/pat/repl}. nodeType 'replword' is word-mode for the
2815  // replacement in `/` `//` — same as 'word' but `(` is NOT array.
2816  const start = P.L.b
2817  // Value-substitution RHS starting with `(` parses as array: ${var:-(x)} →
2818  // (expansion (variable_name) (array (word))). Only for 'word' context (not
2819  // pattern-matching operators which emit regex, and not 'replword' where `(`
2820  // is a regular char per grammar `_expansion_regex_replacement`).
2821  if (nodeType === 'word' && peek(P.L) === '(') {
2822    advance(P.L)
2823    const open = mk(P, '(', start, P.L.b, [])
2824    const elems: TsNode[] = [open]
2825    while (P.L.i < P.L.len) {
2826      skipBlanks(P.L)
2827      const c = peek(P.L)
2828      if (c === ')' || c === '}' || c === '\n' || c === '') break
2829      const wStart = P.L.b
2830      while (P.L.i < P.L.len) {
2831        const wc = peek(P.L)
2832        if (
2833          wc === ')' ||
2834          wc === '}' ||
2835          wc === ' ' ||
2836          wc === '\t' ||
2837          wc === '\n' ||
2838          wc === ''
2839        ) {
2840          break
2841        }
2842        advance(P.L)
2843      }
2844      if (P.L.b > wStart) elems.push(mk(P, 'word', wStart, P.L.b, []))
2845      else break
2846    }
2847    if (peek(P.L) === ')') {
2848      const cStart = P.L.b
2849      advance(P.L)
2850      elems.push(mk(P, ')', cStart, P.L.b, []))
2851    }
2852    while (peek(P.L) === '\n') advance(P.L)
2853    return mk(P, 'array', start, P.L.b, elems)
2854  }
2855  // REGEX mode: flat single-span scan. Quotes are opaque (skipped past so
2856  // `/` inside them doesn't break stopAtSlash), but NOT emitted as separate
2857  // nodes — the entire range becomes one regex node.
2858  if (nodeType === 'regex') {
2859    let braceDepth = 0
2860    while (P.L.i < P.L.len) {
2861      const c = peek(P.L)
2862      if (c === '\n') break
2863      if (braceDepth === 0) {
2864        if (c === '}') break
2865        if (stopAtSlash && c === '/') break
2866      }
2867      if (c === '\\' && P.L.i + 1 < P.L.len) {
2868        advance(P.L)
2869        advance(P.L)
2870        continue
2871      }
2872      if (c === '"' || c === "'") {
2873        advance(P.L)
2874        while (P.L.i < P.L.len && peek(P.L) !== c) {
2875          if (peek(P.L) === '\\' && P.L.i + 1 < P.L.len) advance(P.L)
2876          advance(P.L)
2877        }
2878        if (peek(P.L) === c) advance(P.L)
2879        continue
2880      }
2881      // Skip past nested ${...} $(...) $[...] so their } / don't terminate us
2882      if (c === '$') {
2883        const c1 = peek(P.L, 1)
2884        if (c1 === '{') {
2885          let d = 0
2886          advance(P.L)
2887          advance(P.L)
2888          d++
2889          while (P.L.i < P.L.len && d > 0) {
2890            const nc = peek(P.L)
2891            if (nc === '{') d++
2892            else if (nc === '}') d--
2893            advance(P.L)
2894          }
2895          continue
2896        }
2897        if (c1 === '(') {
2898          let d = 0
2899          advance(P.L)
2900          advance(P.L)
2901          d++
2902          while (P.L.i < P.L.len && d > 0) {
2903            const nc = peek(P.L)
2904            if (nc === '(') d++
2905            else if (nc === ')') d--
2906            advance(P.L)
2907          }
2908          continue
2909        }
2910      }
2911      if (c === '{') braceDepth++
2912      else if (c === '}' && braceDepth > 0) braceDepth--
2913      advance(P.L)
2914    }
2915    const end = P.L.b
2916    while (peek(P.L) === '\n') advance(P.L)
2917    if (end === start) return null
2918    return mk(P, 'regex', start, end, [])
2919  }
2920  // WORD mode: segmenting parser — recognize nested ${...}, $(...), $'...',
2921  // "...", '...', $ident, <(...)/>(...); bare chars accumulate into word
2922  // segments. Multiple parts → wrapped in concatenation.
2923  const parts: TsNode[] = []
2924  let segStart = P.L.b
2925  let braceDepth = 0
2926  const flushSeg = (): void => {
2927    if (P.L.b > segStart) {
2928      parts.push(mk(P, 'word', segStart, P.L.b, []))
2929    }
2930  }
2931  while (P.L.i < P.L.len) {
2932    const c = peek(P.L)
2933    if (c === '\n') break
2934    if (braceDepth === 0) {
2935      if (c === '}') break
2936      if (stopAtSlash && c === '/') break
2937    }
2938    if (c === '\\' && P.L.i + 1 < P.L.len) {
2939      advance(P.L)
2940      advance(P.L)
2941      continue
2942    }
2943    const c1 = peek(P.L, 1)
2944    if (c === '$') {
2945      if (c1 === '{' || c1 === '(' || c1 === '[') {
2946        flushSeg()
2947        const exp = parseDollarLike(P)
2948        if (exp) parts.push(exp)
2949        segStart = P.L.b
2950        continue
2951      }
2952      if (c1 === "'") {
2953        // $'...' ANSI-C string
2954        flushSeg()
2955        const aStart = P.L.b
2956        advance(P.L)
2957        advance(P.L)
2958        while (P.L.i < P.L.len && peek(P.L) !== "'") {
2959          if (peek(P.L) === '\\' && P.L.i + 1 < P.L.len) advance(P.L)
2960          advance(P.L)
2961        }
2962        if (peek(P.L) === "'") advance(P.L)
2963        parts.push(mk(P, 'ansi_c_string', aStart, P.L.b, []))
2964        segStart = P.L.b
2965        continue
2966      }
2967      if (isIdentStart(c1) || isDigit(c1) || SPECIAL_VARS.has(c1)) {
2968        flushSeg()
2969        const exp = parseDollarLike(P)
2970        if (exp) parts.push(exp)
2971        segStart = P.L.b
2972        continue
2973      }
2974    }
2975    if (c === '"') {
2976      flushSeg()
2977      parts.push(parseDoubleQuoted(P))
2978      segStart = P.L.b
2979      continue
2980    }
2981    if (c === "'") {
2982      flushSeg()
2983      const rStart = P.L.b
2984      advance(P.L)
2985      while (P.L.i < P.L.len && peek(P.L) !== "'") advance(P.L)
2986      if (peek(P.L) === "'") advance(P.L)
2987      parts.push(mk(P, 'raw_string', rStart, P.L.b, []))
2988      segStart = P.L.b
2989      continue
2990    }
2991    if ((c === '<' || c === '>') && c1 === '(') {
2992      flushSeg()
2993      const ps = parseProcessSub(P)
2994      if (ps) parts.push(ps)
2995      segStart = P.L.b
2996      continue
2997    }
2998    if (c === '`') {
2999      flushSeg()
3000      const bt = parseBacktick(P)
3001      if (bt) parts.push(bt)
3002      segStart = P.L.b
3003      continue
3004    }
3005    // Brace tracking so nested {a,b} brace-expansion chars don't prematurely
3006    // terminate (rare, but the `?` in `${cond}? (` should be treated as word).
3007    if (c === '{') braceDepth++
3008    else if (c === '}' && braceDepth > 0) braceDepth--
3009    advance(P.L)
3010  }
3011  flushSeg()
3012  // Consume trailing newlines before } so caller sees }
3013  while (peek(P.L) === '\n') advance(P.L)
3014  // Tree-sitter skips leading whitespace (extras) in expansion RHS when
3015  // there's content after: `${2+ ${2}}` → just (expansion). But `${v:- }`
3016  // (space-only RHS) keeps the space as (word). So drop leading whitespace-
3017  // only word segment if it's NOT the only part.
3018  if (
3019    parts.length > 1 &&
3020    parts[0]!.type === 'word' &&
3021    /^[ \t]+$/.test(parts[0]!.text)
3022  ) {
3023    parts.shift()
3024  }
3025  if (parts.length === 0) return null
3026  if (parts.length === 1) return parts[0]!
3027  // Multiple parts: wrap in concatenation (word mode keeps concat wrapping;
3028  // regex mode also concats per tree-sitter for mixed quote+glob patterns).
3029  const last = parts[parts.length - 1]!
3030  return mk(P, 'concatenation', parts[0]!.startIndex, last.endIndex, parts)
3031}
3032
3033// Pattern for # ## % %% operators — per grammar _expansion_regex:
3034// repeat(choice(regex, string, raw_string, ')', /\s+/→regex)). Each quote
3035// becomes a SIBLING node, not absorbed. `${f%'str'*}` → (raw_string)(regex).
3036function parseExpansionRegexSegmented(P: ParseState): TsNode[] {
3037  const out: TsNode[] = []
3038  let segStart = P.L.b
3039  const flushRegex = (): void => {
3040    if (P.L.b > segStart) out.push(mk(P, 'regex', segStart, P.L.b, []))
3041  }
3042  while (P.L.i < P.L.len) {
3043    const c = peek(P.L)
3044    if (c === '}' || c === '\n') break
3045    if (c === '\\' && P.L.i + 1 < P.L.len) {
3046      advance(P.L)
3047      advance(P.L)
3048      continue
3049    }
3050    if (c === '"') {
3051      flushRegex()
3052      out.push(parseDoubleQuoted(P))
3053      segStart = P.L.b
3054      continue
3055    }
3056    if (c === "'") {
3057      flushRegex()
3058      const rStart = P.L.b
3059      advance(P.L)
3060      while (P.L.i < P.L.len && peek(P.L) !== "'") advance(P.L)
3061      if (peek(P.L) === "'") advance(P.L)
3062      out.push(mk(P, 'raw_string', rStart, P.L.b, []))
3063      segStart = P.L.b
3064      continue
3065    }
3066    // Nested ${...} $(...) — opaque scan so their } doesn't terminate us
3067    if (c === '$') {
3068      const c1 = peek(P.L, 1)
3069      if (c1 === '{') {
3070        let d = 1
3071        advance(P.L)
3072        advance(P.L)
3073        while (P.L.i < P.L.len && d > 0) {
3074          const nc = peek(P.L)
3075          if (nc === '{') d++
3076          else if (nc === '}') d--
3077          advance(P.L)
3078        }
3079        continue
3080      }
3081      if (c1 === '(') {
3082        let d = 1
3083        advance(P.L)
3084        advance(P.L)
3085        while (P.L.i < P.L.len && d > 0) {
3086          const nc = peek(P.L)
3087          if (nc === '(') d++
3088          else if (nc === ')') d--
3089          advance(P.L)
3090        }
3091        continue
3092      }
3093    }
3094    advance(P.L)
3095  }
3096  flushRegex()
3097  while (peek(P.L) === '\n') advance(P.L)
3098  return out
3099}
3100
3101function parseBacktick(P: ParseState): TsNode | null {
3102  const start = P.L.b
3103  advance(P.L)
3104  const open = mk(P, '`', start, P.L.b, [])
3105  P.inBacktick++
3106  // Parse statements inline — stop at closing backtick
3107  const body: TsNode[] = []
3108  while (true) {
3109    skipBlanks(P.L)
3110    if (peek(P.L) === '`' || peek(P.L) === '') break
3111    const save = saveLex(P.L)
3112    const t = nextToken(P.L, 'cmd')
3113    if (t.type === 'EOF' || t.type === 'BACKTICK') {
3114      restoreLex(P.L, save)
3115      break
3116    }
3117    if (t.type === 'NEWLINE') continue
3118    restoreLex(P.L, save)
3119    const stmt = parseAndOr(P)
3120    if (!stmt) break
3121    body.push(stmt)
3122    skipBlanks(P.L)
3123    if (peek(P.L) === '`') break
3124    const save2 = saveLex(P.L)
3125    const sep = nextToken(P.L, 'cmd')
3126    if (sep.type === 'OP' && (sep.value === ';' || sep.value === '&')) {
3127      body.push(leaf(P, sep.value, sep))
3128    } else if (sep.type !== 'NEWLINE') {
3129      restoreLex(P.L, save2)
3130    }
3131  }
3132  P.inBacktick--
3133  let close: TsNode
3134  if (peek(P.L) === '`') {
3135    const cStart = P.L.b
3136    advance(P.L)
3137    close = mk(P, '`', cStart, P.L.b, [])
3138  } else {
3139    close = mk(P, '`', P.L.b, P.L.b, [])
3140  }
3141  // Empty backticks (whitespace/newline only) are elided entirely by
3142  // tree-sitter — used as a line-continuation hack: "foo"`<newline>`"bar"
3143  // → (concatenation (string) (string)) with no command_substitution.
3144  if (body.length === 0) return null
3145  return mk(P, 'command_substitution', start, close.endIndex, [
3146    open,
3147    ...body,
3148    close,
3149  ])
3150}
3151
3152function parseIf(P: ParseState, ifTok: Token): TsNode {
3153  const ifKw = leaf(P, 'if', ifTok)
3154  const kids: TsNode[] = [ifKw]
3155  const cond = parseStatements(P, null)
3156  kids.push(...cond)
3157  consumeKeyword(P, 'then', kids)
3158  const body = parseStatements(P, null)
3159  kids.push(...body)
3160  while (true) {
3161    const save = saveLex(P.L)
3162    const t = nextToken(P.L, 'cmd')
3163    if (t.type === 'WORD' && t.value === 'elif') {
3164      const eKw = leaf(P, 'elif', t)
3165      const eCond = parseStatements(P, null)
3166      const eKids: TsNode[] = [eKw, ...eCond]
3167      consumeKeyword(P, 'then', eKids)
3168      const eBody = parseStatements(P, null)
3169      eKids.push(...eBody)
3170      const last = eKids[eKids.length - 1]!
3171      kids.push(mk(P, 'elif_clause', eKw.startIndex, last.endIndex, eKids))
3172    } else if (t.type === 'WORD' && t.value === 'else') {
3173      const elKw = leaf(P, 'else', t)
3174      const elBody = parseStatements(P, null)
3175      const last = elBody.length > 0 ? elBody[elBody.length - 1]! : elKw
3176      kids.push(
3177        mk(P, 'else_clause', elKw.startIndex, last.endIndex, [elKw, ...elBody]),
3178      )
3179    } else {
3180      restoreLex(P.L, save)
3181      break
3182    }
3183  }
3184  consumeKeyword(P, 'fi', kids)
3185  const last = kids[kids.length - 1]!
3186  return mk(P, 'if_statement', ifKw.startIndex, last.endIndex, kids)
3187}
3188
3189function parseWhile(P: ParseState, kwTok: Token): TsNode {
3190  const kw = leaf(P, kwTok.value, kwTok)
3191  const kids: TsNode[] = [kw]
3192  const cond = parseStatements(P, null)
3193  kids.push(...cond)
3194  const dg = parseDoGroup(P)
3195  if (dg) kids.push(dg)
3196  const last = kids[kids.length - 1]!
3197  return mk(P, 'while_statement', kw.startIndex, last.endIndex, kids)
3198}
3199
3200function parseFor(P: ParseState, forTok: Token): TsNode {
3201  const forKw = leaf(P, forTok.value, forTok)
3202  skipBlanks(P.L)
3203  // C-style for (( ; ; )) — only for `for`, not `select`
3204  if (forTok.value === 'for' && peek(P.L) === '(' && peek(P.L, 1) === '(') {
3205    const oStart = P.L.b
3206    advance(P.L)
3207    advance(P.L)
3208    const open = mk(P, '((', oStart, P.L.b, [])
3209    const kids: TsNode[] = [forKw, open]
3210    // init; cond; update — all three use 'assign' mode so `c = expr` emits
3211    // variable_assignment, while bare idents (c in `c<=5`) → word. Each
3212    // clause may be a comma-separated list.
3213    for (let k = 0; k < 3; k++) {
3214      skipBlanks(P.L)
3215      const es = parseArithCommaList(P, k < 2 ? ';' : '))', 'assign')
3216      kids.push(...es)
3217      if (k < 2) {
3218        if (peek(P.L) === ';') {
3219          const s = P.L.b
3220          advance(P.L)
3221          kids.push(mk(P, ';', s, P.L.b, []))
3222        }
3223      }
3224    }
3225    skipBlanks(P.L)
3226    if (peek(P.L) === ')' && peek(P.L, 1) === ')') {
3227      const cStart = P.L.b
3228      advance(P.L)
3229      advance(P.L)
3230      kids.push(mk(P, '))', cStart, P.L.b, []))
3231    }
3232    // Optional ; or newline
3233    const save = saveLex(P.L)
3234    const sep = nextToken(P.L, 'cmd')
3235    if (sep.type === 'OP' && sep.value === ';') {
3236      kids.push(leaf(P, ';', sep))
3237    } else if (sep.type !== 'NEWLINE') {
3238      restoreLex(P.L, save)
3239    }
3240    const dg = parseDoGroup(P)
3241    if (dg) {
3242      kids.push(dg)
3243    } else {
3244      // C-style for can also use `{ ... }` body instead of `do ... done`
3245      skipNewlines(P)
3246      skipBlanks(P.L)
3247      if (peek(P.L) === '{') {
3248        const bOpen = P.L.b
3249        advance(P.L)
3250        const brace = mk(P, '{', bOpen, P.L.b, [])
3251        const body = parseStatements(P, '}')
3252        let bClose: TsNode
3253        if (peek(P.L) === '}') {
3254          const cs = P.L.b
3255          advance(P.L)
3256          bClose = mk(P, '}', cs, P.L.b, [])
3257        } else {
3258          bClose = mk(P, '}', P.L.b, P.L.b, [])
3259        }
3260        kids.push(
3261          mk(P, 'compound_statement', brace.startIndex, bClose.endIndex, [
3262            brace,
3263            ...body,
3264            bClose,
3265          ]),
3266        )
3267      }
3268    }
3269    const last = kids[kids.length - 1]!
3270    return mk(P, 'c_style_for_statement', forKw.startIndex, last.endIndex, kids)
3271  }
3272  // Regular for VAR in words; do ... done
3273  const kids: TsNode[] = [forKw]
3274  const varTok = nextToken(P.L, 'arg')
3275  kids.push(mk(P, 'variable_name', varTok.start, varTok.end, []))
3276  skipBlanks(P.L)
3277  const save = saveLex(P.L)
3278  const inTok = nextToken(P.L, 'arg')
3279  if (inTok.type === 'WORD' && inTok.value === 'in') {
3280    kids.push(leaf(P, 'in', inTok))
3281    while (true) {
3282      skipBlanks(P.L)
3283      const c = peek(P.L)
3284      if (c === ';' || c === '\n' || c === '') break
3285      const w = parseWord(P, 'arg')
3286      if (!w) break
3287      kids.push(w)
3288    }
3289  } else {
3290    restoreLex(P.L, save)
3291  }
3292  // Separator
3293  const save2 = saveLex(P.L)
3294  const sep = nextToken(P.L, 'cmd')
3295  if (sep.type === 'OP' && sep.value === ';') {
3296    kids.push(leaf(P, ';', sep))
3297  } else if (sep.type !== 'NEWLINE') {
3298    restoreLex(P.L, save2)
3299  }
3300  const dg = parseDoGroup(P)
3301  if (dg) kids.push(dg)
3302  const last = kids[kids.length - 1]!
3303  return mk(P, 'for_statement', forKw.startIndex, last.endIndex, kids)
3304}
3305
3306function parseDoGroup(P: ParseState): TsNode | null {
3307  skipNewlines(P)
3308  const save = saveLex(P.L)
3309  const doTok = nextToken(P.L, 'cmd')
3310  if (doTok.type !== 'WORD' || doTok.value !== 'do') {
3311    restoreLex(P.L, save)
3312    return null
3313  }
3314  const doKw = leaf(P, 'do', doTok)
3315  const body = parseStatements(P, null)
3316  const kids: TsNode[] = [doKw, ...body]
3317  consumeKeyword(P, 'done', kids)
3318  const last = kids[kids.length - 1]!
3319  return mk(P, 'do_group', doKw.startIndex, last.endIndex, kids)
3320}
3321
3322function parseCase(P: ParseState, caseTok: Token): TsNode {
3323  const caseKw = leaf(P, 'case', caseTok)
3324  const kids: TsNode[] = [caseKw]
3325  skipBlanks(P.L)
3326  const word = parseWord(P, 'arg')
3327  if (word) kids.push(word)
3328  skipBlanks(P.L)
3329  consumeKeyword(P, 'in', kids)
3330  skipNewlines(P)
3331  while (true) {
3332    skipBlanks(P.L)
3333    skipNewlines(P)
3334    const save = saveLex(P.L)
3335    const t = nextToken(P.L, 'arg')
3336    if (t.type === 'WORD' && t.value === 'esac') {
3337      kids.push(leaf(P, 'esac', t))
3338      break
3339    }
3340    if (t.type === 'EOF') break
3341    restoreLex(P.L, save)
3342    const item = parseCaseItem(P)
3343    if (!item) break
3344    kids.push(item)
3345  }
3346  const last = kids[kids.length - 1]!
3347  return mk(P, 'case_statement', caseKw.startIndex, last.endIndex, kids)
3348}
3349
3350function parseCaseItem(P: ParseState): TsNode | null {
3351  skipBlanks(P.L)
3352  const start = P.L.b
3353  const kids: TsNode[] = []
3354  // Optional leading '(' before pattern — bash allows (pattern) syntax
3355  if (peek(P.L) === '(') {
3356    const s = P.L.b
3357    advance(P.L)
3358    kids.push(mk(P, '(', s, P.L.b, []))
3359  }
3360  // Pattern(s)
3361  let isFirstAlt = true
3362  while (true) {
3363    skipBlanks(P.L)
3364    const c = peek(P.L)
3365    if (c === ')' || c === '') break
3366    const pats = parseCasePattern(P)
3367    if (pats.length === 0) break
3368    // tree-sitter quirk: first alternative with quotes is inlined as flat
3369    // siblings; subsequent alternatives are wrapped in (concatenation) with
3370    // `word` instead of `extglob_pattern` for bare segments.
3371    if (!isFirstAlt && pats.length > 1) {
3372      const rewritten = pats.map(p =>
3373        p.type === 'extglob_pattern'
3374          ? mk(P, 'word', p.startIndex, p.endIndex, [])
3375          : p,
3376      )
3377      const first = rewritten[0]!
3378      const last = rewritten[rewritten.length - 1]!
3379      kids.push(
3380        mk(P, 'concatenation', first.startIndex, last.endIndex, rewritten),
3381      )
3382    } else {
3383      kids.push(...pats)
3384    }
3385    isFirstAlt = false
3386    skipBlanks(P.L)
3387    // \<newline> line continuation between alternatives
3388    if (peek(P.L) === '\\' && peek(P.L, 1) === '\n') {
3389      advance(P.L)
3390      advance(P.L)
3391      skipBlanks(P.L)
3392    }
3393    if (peek(P.L) === '|') {
3394      const s = P.L.b
3395      advance(P.L)
3396      kids.push(mk(P, '|', s, P.L.b, []))
3397      // \<newline> after | is also a line continuation
3398      if (peek(P.L) === '\\' && peek(P.L, 1) === '\n') {
3399        advance(P.L)
3400        advance(P.L)
3401      }
3402    } else {
3403      break
3404    }
3405  }
3406  if (peek(P.L) === ')') {
3407    const s = P.L.b
3408    advance(P.L)
3409    kids.push(mk(P, ')', s, P.L.b, []))
3410  }
3411  const body = parseStatements(P, null)
3412  kids.push(...body)
3413  const save = saveLex(P.L)
3414  const term = nextToken(P.L, 'cmd')
3415  if (
3416    term.type === 'OP' &&
3417    (term.value === ';;' || term.value === ';&' || term.value === ';;&')
3418  ) {
3419    kids.push(leaf(P, term.value, term))
3420  } else {
3421    restoreLex(P.L, save)
3422  }
3423  if (kids.length === 0) return null
3424  // tree-sitter quirk: case_item with EMPTY body and a single pattern matching
3425  // extglob-operator-char-prefix (no actual glob metachars) downgrades to word.
3426  // `-o) owner=$2 ;;` (has body) → extglob_pattern; `-g) ;;` (empty) → word.
3427  if (body.length === 0) {
3428    for (let i = 0; i < kids.length; i++) {
3429      const k = kids[i]!
3430      if (k.type !== 'extglob_pattern') continue
3431      const text = sliceBytes(P, k.startIndex, k.endIndex)
3432      if (/^[-+?*@!][a-zA-Z]/.test(text) && !/[*?(]/.test(text)) {
3433        kids[i] = mk(P, 'word', k.startIndex, k.endIndex, [])
3434      }
3435    }
3436  }
3437  const last = kids[kids.length - 1]!
3438  return mk(P, 'case_item', start, last.endIndex, kids)
3439}
3440
3441function parseCasePattern(P: ParseState): TsNode[] {
3442  skipBlanks(P.L)
3443  const save = saveLex(P.L)
3444  const start = P.L.b
3445  const startI = P.L.i
3446  let parenDepth = 0
3447  let hasDollar = false
3448  let hasBracketOutsideParen = false
3449  let hasQuote = false
3450  while (P.L.i < P.L.len) {
3451    const c = peek(P.L)
3452    if (c === '\\' && P.L.i + 1 < P.L.len) {
3453      // Escaped char — consume both (handles `bar\ baz` as single pattern)
3454      // \<newline> is a line continuation; eat it but stay in pattern.
3455      advance(P.L)
3456      advance(P.L)
3457      continue
3458    }
3459    if (c === '"' || c === "'") {
3460      hasQuote = true
3461      // Skip past the quoted segment so its content (spaces, |, etc.) doesn't
3462      // break the peek-ahead scan.
3463      advance(P.L)
3464      while (P.L.i < P.L.len && peek(P.L) !== c) {
3465        if (peek(P.L) === '\\' && P.L.i + 1 < P.L.len) advance(P.L)
3466        advance(P.L)
3467      }
3468      if (peek(P.L) === c) advance(P.L)
3469      continue
3470    }
3471    // Paren counting: any ( inside pattern opens a scope; don't break at ) or |
3472    // until balanced. Handles extglob *(a|b) and nested shapes *([0-9])([0-9]).
3473    if (c === '(') {
3474      parenDepth++
3475      advance(P.L)
3476      continue
3477    }
3478    if (parenDepth > 0) {
3479      if (c === ')') {
3480        parenDepth--
3481        advance(P.L)
3482        continue
3483      }
3484      if (c === '\n') break
3485      advance(P.L)
3486      continue
3487    }
3488    if (c === ')' || c === '|' || c === ' ' || c === '\t' || c === '\n') break
3489    if (c === '$') hasDollar = true
3490    if (c === '[') hasBracketOutsideParen = true
3491    advance(P.L)
3492  }
3493  if (P.L.b === start) return []
3494  const text = P.src.slice(startI, P.L.i)
3495  const hasExtglobParen = /[*?+@!]\(/.test(text)
3496  // Quoted segments in pattern: tree-sitter splits at quote boundaries into
3497  // multiple sibling nodes. `*"foo"*` → (extglob_pattern)(string)(extglob_pattern).
3498  // Re-scan with a segmenting pass.
3499  if (hasQuote && !hasExtglobParen) {
3500    restoreLex(P.L, save)
3501    return parseCasePatternSegmented(P)
3502  }
3503  // tree-sitter splits patterns with [ or $ into concatenation via word parsing
3504  // UNLESS pattern has extglob parens (those override and emit extglob_pattern).
3505  // `*.[1357]` → concat(word word number word); `${PN}.pot` → concat(expansion word);
3506  // but `*([0-9])` → extglob_pattern (has extglob paren).
3507  if (!hasExtglobParen && (hasDollar || hasBracketOutsideParen)) {
3508    restoreLex(P.L, save)
3509    const w = parseWord(P, 'arg')
3510    return w ? [w] : []
3511  }
3512  // Patterns starting with extglob operator chars (+ - ? * @ !) followed by
3513  // identifier chars are extglob_pattern per tree-sitter, even without parens
3514  // or glob metachars. `-o)` → extglob_pattern; plain `foo)` → word.
3515  const type =
3516    hasExtglobParen || /[*?]/.test(text) || /^[-+?*@!][a-zA-Z]/.test(text)
3517      ? 'extglob_pattern'
3518      : 'word'
3519  return [mk(P, type, start, P.L.b, [])]
3520}
3521
3522// Segmented scan for case patterns containing quotes: `*"foo"*` →
3523// [extglob_pattern, string, extglob_pattern]. Bare segments → extglob_pattern
3524// if they have */?, else word. Stops at ) | space tab newline outside quotes.
3525function parseCasePatternSegmented(P: ParseState): TsNode[] {
3526  const parts: TsNode[] = []
3527  let segStart = P.L.b
3528  let segStartI = P.L.i
3529  const flushSeg = (): void => {
3530    if (P.L.i > segStartI) {
3531      const t = P.src.slice(segStartI, P.L.i)
3532      const type = /[*?]/.test(t) ? 'extglob_pattern' : 'word'
3533      parts.push(mk(P, type, segStart, P.L.b, []))
3534    }
3535  }
3536  while (P.L.i < P.L.len) {
3537    const c = peek(P.L)
3538    if (c === '\\' && P.L.i + 1 < P.L.len) {
3539      advance(P.L)
3540      advance(P.L)
3541      continue
3542    }
3543    if (c === '"') {
3544      flushSeg()
3545      parts.push(parseDoubleQuoted(P))
3546      segStart = P.L.b
3547      segStartI = P.L.i
3548      continue
3549    }
3550    if (c === "'") {
3551      flushSeg()
3552      const tok = nextToken(P.L, 'arg')
3553      parts.push(leaf(P, 'raw_string', tok))
3554      segStart = P.L.b
3555      segStartI = P.L.i
3556      continue
3557    }
3558    if (c === ')' || c === '|' || c === ' ' || c === '\t' || c === '\n') break
3559    advance(P.L)
3560  }
3561  flushSeg()
3562  return parts
3563}
3564
3565function parseFunction(P: ParseState, fnTok: Token): TsNode {
3566  const fnKw = leaf(P, 'function', fnTok)
3567  skipBlanks(P.L)
3568  const nameTok = nextToken(P.L, 'arg')
3569  const name = mk(P, 'word', nameTok.start, nameTok.end, [])
3570  const kids: TsNode[] = [fnKw, name]
3571  skipBlanks(P.L)
3572  if (peek(P.L) === '(' && peek(P.L, 1) === ')') {
3573    const o = nextToken(P.L, 'cmd')
3574    const c = nextToken(P.L, 'cmd')
3575    kids.push(leaf(P, '(', o))
3576    kids.push(leaf(P, ')', c))
3577  }
3578  skipBlanks(P.L)
3579  skipNewlines(P)
3580  const body = parseCommand(P)
3581  if (body) {
3582    // Hoist redirects from redirected_statement(compound_statement, ...) to
3583    // function_definition level per tree-sitter grammar
3584    if (
3585      body.type === 'redirected_statement' &&
3586      body.children.length >= 2 &&
3587      body.children[0]!.type === 'compound_statement'
3588    ) {
3589      kids.push(...body.children)
3590    } else {
3591      kids.push(body)
3592    }
3593  }
3594  const last = kids[kids.length - 1]!
3595  return mk(P, 'function_definition', fnKw.startIndex, last.endIndex, kids)
3596}
3597
3598function parseDeclaration(P: ParseState, kwTok: Token): TsNode {
3599  const kw = leaf(P, kwTok.value, kwTok)
3600  const kids: TsNode[] = [kw]
3601  while (true) {
3602    skipBlanks(P.L)
3603    const c = peek(P.L)
3604    if (
3605      c === '' ||
3606      c === '\n' ||
3607      c === ';' ||
3608      c === '&' ||
3609      c === '|' ||
3610      c === ')' ||
3611      c === '<' ||
3612      c === '>'
3613    ) {
3614      break
3615    }
3616    const a = tryParseAssignment(P)
3617    if (a) {
3618      kids.push(a)
3619      continue
3620    }
3621    // Quoted string or concatenation: `export "FOO=bar"`, `export 'X'`
3622    if (c === '"' || c === "'" || c === '$') {
3623      const w = parseWord(P, 'arg')
3624      if (w) {
3625        kids.push(w)
3626        continue
3627      }
3628      break
3629    }
3630    // Flag like -a or bare variable name
3631    const save = saveLex(P.L)
3632    const tok = nextToken(P.L, 'arg')
3633    if (tok.type === 'WORD' || tok.type === 'NUMBER') {
3634      if (tok.value.startsWith('-')) {
3635        kids.push(leaf(P, 'word', tok))
3636      } else if (isIdentStart(tok.value[0] ?? '')) {
3637        kids.push(mk(P, 'variable_name', tok.start, tok.end, []))
3638      } else {
3639        kids.push(leaf(P, 'word', tok))
3640      }
3641    } else {
3642      restoreLex(P.L, save)
3643      break
3644    }
3645  }
3646  const last = kids[kids.length - 1]!
3647  return mk(P, 'declaration_command', kw.startIndex, last.endIndex, kids)
3648}
3649
3650function parseUnset(P: ParseState, kwTok: Token): TsNode {
3651  const kw = leaf(P, 'unset', kwTok)
3652  const kids: TsNode[] = [kw]
3653  while (true) {
3654    skipBlanks(P.L)
3655    const c = peek(P.L)
3656    if (
3657      c === '' ||
3658      c === '\n' ||
3659      c === ';' ||
3660      c === '&' ||
3661      c === '|' ||
3662      c === ')' ||
3663      c === '<' ||
3664      c === '>'
3665    ) {
3666      break
3667    }
3668    // SECURITY: use parseWord (not raw nextToken) so quoted strings like
3669    // `unset 'a[$(id)]'` emit a raw_string child that ast.ts can reject.
3670    // Previously `break` silently dropped non-WORD args — hiding the
3671    // arithmetic-subscript code-exec vector from the security walker.
3672    const arg = parseWord(P, 'arg')
3673    if (!arg) break
3674    if (arg.type === 'word') {
3675      if (arg.text.startsWith('-')) {
3676        kids.push(arg)
3677      } else {
3678        kids.push(mk(P, 'variable_name', arg.startIndex, arg.endIndex, []))
3679      }
3680    } else {
3681      kids.push(arg)
3682    }
3683  }
3684  const last = kids[kids.length - 1]!
3685  return mk(P, 'unset_command', kw.startIndex, last.endIndex, kids)
3686}
3687
3688function consumeKeyword(P: ParseState, name: string, kids: TsNode[]): void {
3689  skipNewlines(P)
3690  const save = saveLex(P.L)
3691  const t = nextToken(P.L, 'cmd')
3692  if (t.type === 'WORD' && t.value === name) {
3693    kids.push(leaf(P, name, t))
3694  } else {
3695    restoreLex(P.L, save)
3696  }
3697}
3698
3699// ───────────────────── Test & Arithmetic Expressions ─────────────────────
3700
3701function parseTestExpr(P: ParseState, closer: string): TsNode | null {
3702  return parseTestOr(P, closer)
3703}
3704
3705function parseTestOr(P: ParseState, closer: string): TsNode | null {
3706  let left = parseTestAnd(P, closer)
3707  if (!left) return null
3708  while (true) {
3709    skipBlanks(P.L)
3710    const save = saveLex(P.L)
3711    if (peek(P.L) === '|' && peek(P.L, 1) === '|') {
3712      const s = P.L.b
3713      advance(P.L)
3714      advance(P.L)
3715      const op = mk(P, '||', s, P.L.b, [])
3716      const right = parseTestAnd(P, closer)
3717      if (!right) {
3718        restoreLex(P.L, save)
3719        break
3720      }
3721      left = mk(P, 'binary_expression', left.startIndex, right.endIndex, [
3722        left,
3723        op,
3724        right,
3725      ])
3726    } else {
3727      break
3728    }
3729  }
3730  return left
3731}
3732
3733function parseTestAnd(P: ParseState, closer: string): TsNode | null {
3734  let left = parseTestUnary(P, closer)
3735  if (!left) return null
3736  while (true) {
3737    skipBlanks(P.L)
3738    if (peek(P.L) === '&' && peek(P.L, 1) === '&') {
3739      const s = P.L.b
3740      advance(P.L)
3741      advance(P.L)
3742      const op = mk(P, '&&', s, P.L.b, [])
3743      const right = parseTestUnary(P, closer)
3744      if (!right) break
3745      left = mk(P, 'binary_expression', left.startIndex, right.endIndex, [
3746        left,
3747        op,
3748        right,
3749      ])
3750    } else {
3751      break
3752    }
3753  }
3754  return left
3755}
3756
3757function parseTestUnary(P: ParseState, closer: string): TsNode | null {
3758  skipBlanks(P.L)
3759  const c = peek(P.L)
3760  if (c === '(') {
3761    const s = P.L.b
3762    advance(P.L)
3763    const open = mk(P, '(', s, P.L.b, [])
3764    const inner = parseTestOr(P, closer)
3765    skipBlanks(P.L)
3766    let close: TsNode
3767    if (peek(P.L) === ')') {
3768      const cs = P.L.b
3769      advance(P.L)
3770      close = mk(P, ')', cs, P.L.b, [])
3771    } else {
3772      close = mk(P, ')', P.L.b, P.L.b, [])
3773    }
3774    const kids = inner ? [open, inner, close] : [open, close]
3775    return mk(
3776      P,
3777      'parenthesized_expression',
3778      open.startIndex,
3779      close.endIndex,
3780      kids,
3781    )
3782  }
3783  return parseTestBinary(P, closer)
3784}
3785
3786/**
3787 * Parse `!`-negated or test-operator (`-f`) or parenthesized primary — but NOT
3788 * a binary comparison. Used as LHS of binary_expression so `! x =~ y` binds
3789 * `!` to `x` only, not the whole `x =~ y`.
3790 */
3791function parseTestNegatablePrimary(
3792  P: ParseState,
3793  closer: string,
3794): TsNode | null {
3795  skipBlanks(P.L)
3796  const c = peek(P.L)
3797  if (c === '!') {
3798    const s = P.L.b
3799    advance(P.L)
3800    const bang = mk(P, '!', s, P.L.b, [])
3801    const inner = parseTestNegatablePrimary(P, closer)
3802    if (!inner) return bang
3803    return mk(P, 'unary_expression', bang.startIndex, inner.endIndex, [
3804      bang,
3805      inner,
3806    ])
3807  }
3808  if (c === '-' && isIdentStart(peek(P.L, 1))) {
3809    const s = P.L.b
3810    advance(P.L)
3811    while (isIdentChar(peek(P.L))) advance(P.L)
3812    const op = mk(P, 'test_operator', s, P.L.b, [])
3813    skipBlanks(P.L)
3814    const arg = parseTestPrimary(P, closer)
3815    if (!arg) return op
3816    return mk(P, 'unary_expression', op.startIndex, arg.endIndex, [op, arg])
3817  }
3818  return parseTestPrimary(P, closer)
3819}
3820
3821function parseTestBinary(P: ParseState, closer: string): TsNode | null {
3822  skipBlanks(P.L)
3823  // `!` in test context binds tighter than =~/==.
3824  // `[[ ! "x" =~ y ]]` → (binary_expression (unary_expression (string)) (regex))
3825  // `[[ ! -f x ]]` → (unary_expression ! (unary_expression (test_operator) (word)))
3826  const left = parseTestNegatablePrimary(P, closer)
3827  if (!left) return null
3828  skipBlanks(P.L)
3829  // Binary comparison: == != =~ -eq -lt etc.
3830  const c = peek(P.L)
3831  const c1 = peek(P.L, 1)
3832  let op: TsNode | null = null
3833  const os = P.L.b
3834  if (c === '=' && c1 === '=') {
3835    advance(P.L)
3836    advance(P.L)
3837    op = mk(P, '==', os, P.L.b, [])
3838  } else if (c === '!' && c1 === '=') {
3839    advance(P.L)
3840    advance(P.L)
3841    op = mk(P, '!=', os, P.L.b, [])
3842  } else if (c === '=' && c1 === '~') {
3843    advance(P.L)
3844    advance(P.L)
3845    op = mk(P, '=~', os, P.L.b, [])
3846  } else if (c === '=' && c1 !== '=') {
3847    advance(P.L)
3848    op = mk(P, '=', os, P.L.b, [])
3849  } else if (c === '<' && c1 !== '<') {
3850    advance(P.L)
3851    op = mk(P, '<', os, P.L.b, [])
3852  } else if (c === '>' && c1 !== '>') {
3853    advance(P.L)
3854    op = mk(P, '>', os, P.L.b, [])
3855  } else if (c === '-' && isIdentStart(c1)) {
3856    advance(P.L)
3857    while (isIdentChar(peek(P.L))) advance(P.L)
3858    op = mk(P, 'test_operator', os, P.L.b, [])
3859  }
3860  if (!op) return left
3861  skipBlanks(P.L)
3862  // In [[ ]], RHS of ==/!=/=/=~ gets special pattern parsing: paren counting
3863  // so @(a|b|c) doesn't break on |, and segments become extglob_pattern/regex.
3864  if (closer === ']]') {
3865    const opText = op.type
3866    if (opText === '=~') {
3867      skipBlanks(P.L)
3868      // If the ENTIRE RHS is a quoted string, emit string/raw_string not
3869      // regex: `[[ "$x" =~ "$y" ]]` → (binary_expression (string) (string)).
3870      // If there's content after the quote (`' boop '(.*)$`), the whole RHS
3871      // stays a single (regex). Peek past the quote to check.
3872      const rc = peek(P.L)
3873      let rhs: TsNode | null = null
3874      if (rc === '"' || rc === "'") {
3875        const save = saveLex(P.L)
3876        const quoted =
3877          rc === '"'
3878            ? parseDoubleQuoted(P)
3879            : leaf(P, 'raw_string', nextToken(P.L, 'arg'))
3880        // Check if RHS ends here: only whitespace then ]] or &&/|| or newline
3881        let j = P.L.i
3882        while (j < P.L.len && (P.src[j] === ' ' || P.src[j] === '\t')) j++
3883        const nc = P.src[j] ?? ''
3884        const nc1 = P.src[j + 1] ?? ''
3885        if (
3886          (nc === ']' && nc1 === ']') ||
3887          (nc === '&' && nc1 === '&') ||
3888          (nc === '|' && nc1 === '|') ||
3889          nc === '\n' ||
3890          nc === ''
3891        ) {
3892          rhs = quoted
3893        } else {
3894          restoreLex(P.L, save)
3895        }
3896      }
3897      if (!rhs) rhs = parseTestRegexRhs(P)
3898      if (!rhs) return left
3899      return mk(P, 'binary_expression', left.startIndex, rhs.endIndex, [
3900        left,
3901        op,
3902        rhs,
3903      ])
3904    }
3905    // Single `=` emits (regex) per tree-sitter; `==` and `!=` emit extglob_pattern
3906    if (opText === '=') {
3907      const rhs = parseTestRegexRhs(P)
3908      if (!rhs) return left
3909      return mk(P, 'binary_expression', left.startIndex, rhs.endIndex, [
3910        left,
3911        op,
3912        rhs,
3913      ])
3914    }
3915    if (opText === '==' || opText === '!=') {
3916      const parts = parseTestExtglobRhs(P)
3917      if (parts.length === 0) return left
3918      const last = parts[parts.length - 1]!
3919      return mk(P, 'binary_expression', left.startIndex, last.endIndex, [
3920        left,
3921        op,
3922        ...parts,
3923      ])
3924    }
3925  }
3926  const right = parseTestPrimary(P, closer)
3927  if (!right) return left
3928  return mk(P, 'binary_expression', left.startIndex, right.endIndex, [
3929    left,
3930    op,
3931    right,
3932  ])
3933}
3934
3935// RHS of =~ in [[ ]] — scan as single (regex) node with paren/bracket counting
3936// so | ( ) inside the regex don't break parsing. Stop at ]] or ws+&&/||.
3937function parseTestRegexRhs(P: ParseState): TsNode | null {
3938  skipBlanks(P.L)
3939  const start = P.L.b
3940  let parenDepth = 0
3941  let bracketDepth = 0
3942  while (P.L.i < P.L.len) {
3943    const c = peek(P.L)
3944    if (c === '\\' && P.L.i + 1 < P.L.len) {
3945      advance(P.L)
3946      advance(P.L)
3947      continue
3948    }
3949    if (c === '\n') break
3950    if (parenDepth === 0 && bracketDepth === 0) {
3951      if (c === ']' && peek(P.L, 1) === ']') break
3952      if (c === ' ' || c === '\t') {
3953        // Peek past blanks for ]] or &&/||
3954        let j = P.L.i
3955        while (j < P.L.len && (P.L.src[j] === ' ' || P.L.src[j] === '\t')) j++
3956        const nc = P.L.src[j] ?? ''
3957        const nc1 = P.L.src[j + 1] ?? ''
3958        if (
3959          (nc === ']' && nc1 === ']') ||
3960          (nc === '&' && nc1 === '&') ||
3961          (nc === '|' && nc1 === '|')
3962        ) {
3963          break
3964        }
3965        advance(P.L)
3966        continue
3967      }
3968    }
3969    if (c === '(') parenDepth++
3970    else if (c === ')' && parenDepth > 0) parenDepth--
3971    else if (c === '[') bracketDepth++
3972    else if (c === ']' && bracketDepth > 0) bracketDepth--
3973    advance(P.L)
3974  }
3975  if (P.L.b === start) return null
3976  return mk(P, 'regex', start, P.L.b, [])
3977}
3978
3979// RHS of ==/!=/= in [[ ]] — returns array of parts. Bare text → extglob_pattern
3980// (with paren counting for @(a|b)); $(...)/${}/quoted → proper node types.
3981// Multiple parts become flat children of binary_expression per tree-sitter.
3982function parseTestExtglobRhs(P: ParseState): TsNode[] {
3983  skipBlanks(P.L)
3984  const parts: TsNode[] = []
3985  let segStart = P.L.b
3986  let segStartI = P.L.i
3987  let parenDepth = 0
3988  const flushSeg = () => {
3989    if (P.L.i > segStartI) {
3990      const text = P.src.slice(segStartI, P.L.i)
3991      // Pure number stays number; everything else is extglob_pattern
3992      const type = /^\d+$/.test(text) ? 'number' : 'extglob_pattern'
3993      parts.push(mk(P, type, segStart, P.L.b, []))
3994    }
3995  }
3996  while (P.L.i < P.L.len) {
3997    const c = peek(P.L)
3998    if (c === '\\' && P.L.i + 1 < P.L.len) {
3999      advance(P.L)
4000      advance(P.L)
4001      continue
4002    }
4003    if (c === '\n') break
4004    if (parenDepth === 0) {
4005      if (c === ']' && peek(P.L, 1) === ']') break
4006      if (c === ' ' || c === '\t') {
4007        let j = P.L.i
4008        while (j < P.L.len && (P.L.src[j] === ' ' || P.L.src[j] === '\t')) j++
4009        const nc = P.L.src[j] ?? ''
4010        const nc1 = P.L.src[j + 1] ?? ''
4011        if (
4012          (nc === ']' && nc1 === ']') ||
4013          (nc === '&' && nc1 === '&') ||
4014          (nc === '|' && nc1 === '|')
4015        ) {
4016          break
4017        }
4018        advance(P.L)
4019        continue
4020      }
4021    }
4022    // $ " ' must be parsed even inside @( ) extglob parens — parseDollarLike
4023    // consumes matching ) so parenDepth stays consistent.
4024    if (c === '$') {
4025      const c1 = peek(P.L, 1)
4026      if (
4027        c1 === '(' ||
4028        c1 === '{' ||
4029        isIdentStart(c1) ||
4030        SPECIAL_VARS.has(c1)
4031      ) {
4032        flushSeg()
4033        const exp = parseDollarLike(P)
4034        if (exp) parts.push(exp)
4035        segStart = P.L.b
4036        segStartI = P.L.i
4037        continue
4038      }
4039    }
4040    if (c === '"') {
4041      flushSeg()
4042      parts.push(parseDoubleQuoted(P))
4043      segStart = P.L.b
4044      segStartI = P.L.i
4045      continue
4046    }
4047    if (c === "'") {
4048      flushSeg()
4049      const tok = nextToken(P.L, 'arg')
4050      parts.push(leaf(P, 'raw_string', tok))
4051      segStart = P.L.b
4052      segStartI = P.L.i
4053      continue
4054    }
4055    if (c === '(') parenDepth++
4056    else if (c === ')' && parenDepth > 0) parenDepth--
4057    advance(P.L)
4058  }
4059  flushSeg()
4060  return parts
4061}
4062
4063function parseTestPrimary(P: ParseState, closer: string): TsNode | null {
4064  skipBlanks(P.L)
4065  // Stop at closer
4066  if (closer === ']' && peek(P.L) === ']') return null
4067  if (closer === ']]' && peek(P.L) === ']' && peek(P.L, 1) === ']') return null
4068  return parseWord(P, 'arg')
4069}
4070
4071/**
4072 * Arithmetic context modes:
4073 * - 'var': bare identifiers → variable_name (default, used in $((..)), ((..)))
4074 * - 'word': bare identifiers → word (c-style for head condition/update clauses)
4075 * - 'assign': identifiers with = → variable_assignment (c-style for init clause)
4076 */
4077type ArithMode = 'var' | 'word' | 'assign'
4078
4079/** Operator precedence table (higher = tighter binding). */
4080const ARITH_PREC: Record<string, number> = {
4081  '=': 2,
4082  '+=': 2,
4083  '-=': 2,
4084  '*=': 2,
4085  '/=': 2,
4086  '%=': 2,
4087  '<<=': 2,
4088  '>>=': 2,
4089  '&=': 2,
4090  '^=': 2,
4091  '|=': 2,
4092  '||': 4,
4093  '&&': 5,
4094  '|': 6,
4095  '^': 7,
4096  '&': 8,
4097  '==': 9,
4098  '!=': 9,
4099  '<': 10,
4100  '>': 10,
4101  '<=': 10,
4102  '>=': 10,
4103  '<<': 11,
4104  '>>': 11,
4105  '+': 12,
4106  '-': 12,
4107  '*': 13,
4108  '/': 13,
4109  '%': 13,
4110  '**': 14,
4111}
4112
4113/** Right-associative operators (assignment and exponent). */
4114const ARITH_RIGHT_ASSOC = new Set([
4115  '=',
4116  '+=',
4117  '-=',
4118  '*=',
4119  '/=',
4120  '%=',
4121  '<<=',
4122  '>>=',
4123  '&=',
4124  '^=',
4125  '|=',
4126  '**',
4127])
4128
4129function parseArithExpr(
4130  P: ParseState,
4131  stop: string,
4132  mode: ArithMode = 'var',
4133): TsNode | null {
4134  return parseArithTernary(P, stop, mode)
4135}
4136
4137/** Top-level: comma-separated list. arithmetic_expansion emits multiple children. */
4138function parseArithCommaList(
4139  P: ParseState,
4140  stop: string,
4141  mode: ArithMode = 'var',
4142): TsNode[] {
4143  const out: TsNode[] = []
4144  while (true) {
4145    const e = parseArithTernary(P, stop, mode)
4146    if (e) out.push(e)
4147    skipBlanks(P.L)
4148    if (peek(P.L) === ',' && !isArithStop(P, stop)) {
4149      advance(P.L)
4150      continue
4151    }
4152    break
4153  }
4154  return out
4155}
4156
4157function parseArithTernary(
4158  P: ParseState,
4159  stop: string,
4160  mode: ArithMode,
4161): TsNode | null {
4162  const cond = parseArithBinary(P, stop, 0, mode)
4163  if (!cond) return null
4164  skipBlanks(P.L)
4165  if (peek(P.L) === '?') {
4166    const qs = P.L.b
4167    advance(P.L)
4168    const q = mk(P, '?', qs, P.L.b, [])
4169    const t = parseArithBinary(P, ':', 0, mode)
4170    skipBlanks(P.L)
4171    let colon: TsNode
4172    if (peek(P.L) === ':') {
4173      const cs = P.L.b
4174      advance(P.L)
4175      colon = mk(P, ':', cs, P.L.b, [])
4176    } else {
4177      colon = mk(P, ':', P.L.b, P.L.b, [])
4178    }
4179    const f = parseArithTernary(P, stop, mode)
4180    const last = f ?? colon
4181    const kids: TsNode[] = [cond, q]
4182    if (t) kids.push(t)
4183    kids.push(colon)
4184    if (f) kids.push(f)
4185    return mk(P, 'ternary_expression', cond.startIndex, last.endIndex, kids)
4186  }
4187  return cond
4188}
4189
4190/** Scan next arithmetic binary operator; returns [text, length] or null. */
4191function scanArithOp(P: ParseState): [string, number] | null {
4192  const c = peek(P.L)
4193  const c1 = peek(P.L, 1)
4194  const c2 = peek(P.L, 2)
4195  // 3-char: <<= >>=
4196  if (c === '<' && c1 === '<' && c2 === '=') return ['<<=', 3]
4197  if (c === '>' && c1 === '>' && c2 === '=') return ['>>=', 3]
4198  // 2-char
4199  if (c === '*' && c1 === '*') return ['**', 2]
4200  if (c === '<' && c1 === '<') return ['<<', 2]
4201  if (c === '>' && c1 === '>') return ['>>', 2]
4202  if (c === '=' && c1 === '=') return ['==', 2]
4203  if (c === '!' && c1 === '=') return ['!=', 2]
4204  if (c === '<' && c1 === '=') return ['<=', 2]
4205  if (c === '>' && c1 === '=') return ['>=', 2]
4206  if (c === '&' && c1 === '&') return ['&&', 2]
4207  if (c === '|' && c1 === '|') return ['||', 2]
4208  if (c === '+' && c1 === '=') return ['+=', 2]
4209  if (c === '-' && c1 === '=') return ['-=', 2]
4210  if (c === '*' && c1 === '=') return ['*=', 2]
4211  if (c === '/' && c1 === '=') return ['/=', 2]
4212  if (c === '%' && c1 === '=') return ['%=', 2]
4213  if (c === '&' && c1 === '=') return ['&=', 2]
4214  if (c === '^' && c1 === '=') return ['^=', 2]
4215  if (c === '|' && c1 === '=') return ['|=', 2]
4216  // 1-char — but NOT ++ -- (those are pre/postfix)
4217  if (c === '+' && c1 !== '+') return ['+', 1]
4218  if (c === '-' && c1 !== '-') return ['-', 1]
4219  if (c === '*') return ['*', 1]
4220  if (c === '/') return ['/', 1]
4221  if (c === '%') return ['%', 1]
4222  if (c === '<') return ['<', 1]
4223  if (c === '>') return ['>', 1]
4224  if (c === '&') return ['&', 1]
4225  if (c === '|') return ['|', 1]
4226  if (c === '^') return ['^', 1]
4227  if (c === '=') return ['=', 1]
4228  return null
4229}
4230
4231/** Precedence-climbing binary expression parser. */
4232function parseArithBinary(
4233  P: ParseState,
4234  stop: string,
4235  minPrec: number,
4236  mode: ArithMode,
4237): TsNode | null {
4238  let left = parseArithUnary(P, stop, mode)
4239  if (!left) return null
4240  while (true) {
4241    skipBlanks(P.L)
4242    if (isArithStop(P, stop)) break
4243    if (peek(P.L) === ',') break
4244    const opInfo = scanArithOp(P)
4245    if (!opInfo) break
4246    const [opText, opLen] = opInfo
4247    const prec = ARITH_PREC[opText]
4248    if (prec === undefined || prec < minPrec) break
4249    const os = P.L.b
4250    for (let k = 0; k < opLen; k++) advance(P.L)
4251    const op = mk(P, opText, os, P.L.b, [])
4252    const nextMin = ARITH_RIGHT_ASSOC.has(opText) ? prec : prec + 1
4253    const right = parseArithBinary(P, stop, nextMin, mode)
4254    if (!right) break
4255    left = mk(P, 'binary_expression', left.startIndex, right.endIndex, [
4256      left,
4257      op,
4258      right,
4259    ])
4260  }
4261  return left
4262}
4263
4264function parseArithUnary(
4265  P: ParseState,
4266  stop: string,
4267  mode: ArithMode,
4268): TsNode | null {
4269  skipBlanks(P.L)
4270  if (isArithStop(P, stop)) return null
4271  const c = peek(P.L)
4272  const c1 = peek(P.L, 1)
4273  // Prefix ++ --
4274  if ((c === '+' && c1 === '+') || (c === '-' && c1 === '-')) {
4275    const s = P.L.b
4276    advance(P.L)
4277    advance(P.L)
4278    const op = mk(P, c + c1, s, P.L.b, [])
4279    const inner = parseArithUnary(P, stop, mode)
4280    if (!inner) return op
4281    return mk(P, 'unary_expression', op.startIndex, inner.endIndex, [op, inner])
4282  }
4283  if (c === '-' || c === '+' || c === '!' || c === '~') {
4284    // In 'word'/'assign' mode (c-style for head), `-N` is a single number
4285    // literal per tree-sitter, not unary_expression. 'var' mode uses unary.
4286    if (mode !== 'var' && c === '-' && isDigit(c1)) {
4287      const s = P.L.b
4288      advance(P.L)
4289      while (isDigit(peek(P.L))) advance(P.L)
4290      return mk(P, 'number', s, P.L.b, [])
4291    }
4292    const s = P.L.b
4293    advance(P.L)
4294    const op = mk(P, c, s, P.L.b, [])
4295    const inner = parseArithUnary(P, stop, mode)
4296    if (!inner) return op
4297    return mk(P, 'unary_expression', op.startIndex, inner.endIndex, [op, inner])
4298  }
4299  return parseArithPostfix(P, stop, mode)
4300}
4301
4302function parseArithPostfix(
4303  P: ParseState,
4304  stop: string,
4305  mode: ArithMode,
4306): TsNode | null {
4307  const prim = parseArithPrimary(P, stop, mode)
4308  if (!prim) return null
4309  const c = peek(P.L)
4310  const c1 = peek(P.L, 1)
4311  if ((c === '+' && c1 === '+') || (c === '-' && c1 === '-')) {
4312    const s = P.L.b
4313    advance(P.L)
4314    advance(P.L)
4315    const op = mk(P, c + c1, s, P.L.b, [])
4316    return mk(P, 'postfix_expression', prim.startIndex, op.endIndex, [prim, op])
4317  }
4318  return prim
4319}
4320
4321function parseArithPrimary(
4322  P: ParseState,
4323  stop: string,
4324  mode: ArithMode,
4325): TsNode | null {
4326  skipBlanks(P.L)
4327  if (isArithStop(P, stop)) return null
4328  const c = peek(P.L)
4329  if (c === '(') {
4330    const s = P.L.b
4331    advance(P.L)
4332    const open = mk(P, '(', s, P.L.b, [])
4333    // Parenthesized expression may contain comma-separated exprs
4334    const inners = parseArithCommaList(P, ')', mode)
4335    skipBlanks(P.L)
4336    let close: TsNode
4337    if (peek(P.L) === ')') {
4338      const cs = P.L.b
4339      advance(P.L)
4340      close = mk(P, ')', cs, P.L.b, [])
4341    } else {
4342      close = mk(P, ')', P.L.b, P.L.b, [])
4343    }
4344    return mk(P, 'parenthesized_expression', open.startIndex, close.endIndex, [
4345      open,
4346      ...inners,
4347      close,
4348    ])
4349  }
4350  if (c === '"') {
4351    return parseDoubleQuoted(P)
4352  }
4353  if (c === '$') {
4354    return parseDollarLike(P)
4355  }
4356  if (isDigit(c)) {
4357    const s = P.L.b
4358    while (isDigit(peek(P.L))) advance(P.L)
4359    // Hex: 0x1f
4360    if (
4361      P.L.b - s === 1 &&
4362      c === '0' &&
4363      (peek(P.L) === 'x' || peek(P.L) === 'X')
4364    ) {
4365      advance(P.L)
4366      while (isHexDigit(peek(P.L))) advance(P.L)
4367    }
4368    // Base notation: BASE#DIGITS e.g. 2#1010, 16#ff
4369    else if (peek(P.L) === '#') {
4370      advance(P.L)
4371      while (isBaseDigit(peek(P.L))) advance(P.L)
4372    }
4373    return mk(P, 'number', s, P.L.b, [])
4374  }
4375  if (isIdentStart(c)) {
4376    const s = P.L.b
4377    while (isIdentChar(peek(P.L))) advance(P.L)
4378    const nc = peek(P.L)
4379    // Assignment in 'assign' mode (c-style for init): emit variable_assignment
4380    // so chained `a = b = c = 1` nests correctly. Other modes treat `=` as a
4381    // binary_expression operator via the precedence table.
4382    if (mode === 'assign') {
4383      skipBlanks(P.L)
4384      const ac = peek(P.L)
4385      const ac1 = peek(P.L, 1)
4386      if (ac === '=' && ac1 !== '=') {
4387        const vn = mk(P, 'variable_name', s, P.L.b, [])
4388        const es = P.L.b
4389        advance(P.L)
4390        const eq = mk(P, '=', es, P.L.b, [])
4391        // RHS may itself be another assignment (chained)
4392        const val = parseArithTernary(P, stop, mode)
4393        const end = val ? val.endIndex : eq.endIndex
4394        const kids = val ? [vn, eq, val] : [vn, eq]
4395        return mk(P, 'variable_assignment', s, end, kids)
4396      }
4397    }
4398    // Subscript
4399    if (nc === '[') {
4400      const vn = mk(P, 'variable_name', s, P.L.b, [])
4401      const brS = P.L.b
4402      advance(P.L)
4403      const brOpen = mk(P, '[', brS, P.L.b, [])
4404      const idx = parseArithTernary(P, ']', 'var') ?? parseDollarLike(P)
4405      skipBlanks(P.L)
4406      let brClose: TsNode
4407      if (peek(P.L) === ']') {
4408        const cs = P.L.b
4409        advance(P.L)
4410        brClose = mk(P, ']', cs, P.L.b, [])
4411      } else {
4412        brClose = mk(P, ']', P.L.b, P.L.b, [])
4413      }
4414      const kids = idx ? [vn, brOpen, idx, brClose] : [vn, brOpen, brClose]
4415      return mk(P, 'subscript', s, brClose.endIndex, kids)
4416    }
4417    // Bare identifier: variable_name in 'var' mode, word in 'word'/'assign' mode.
4418    // 'assign' mode falls through to word when no `=` follows (c-style for
4419    // cond/update clauses: `c<=5` → binary_expression(word, number)).
4420    const identType = mode === 'var' ? 'variable_name' : 'word'
4421    return mk(P, identType, s, P.L.b, [])
4422  }
4423  return null
4424}
4425
4426function isArithStop(P: ParseState, stop: string): boolean {
4427  const c = peek(P.L)
4428  if (stop === '))') return c === ')' && peek(P.L, 1) === ')'
4429  if (stop === ')') return c === ')'
4430  if (stop === ';') return c === ';'
4431  if (stop === ':') return c === ':'
4432  if (stop === ']') return c === ']'
4433  if (stop === '}') return c === '}'
4434  if (stop === ':}') return c === ':' || c === '}'
4435  return c === '' || c === '\n'
4436}