Mirror: The magical sticky regex-based parser generator 🧙
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge pull request #13 from kitten/feat/lookahead-shorthand

Add support for lookahead/non-capturing shorthand syntax

authored by kitten.sh and committed by

GitHub f743a482 d54242f3

+128 -23
+2 -1
src/codegen.js
··· 26 26 `; 27 27 28 28 const astExpression = (ast, depth, opts) => { 29 + const capture = !!opts.capture && !ast.capture; 29 30 const restoreLength = 30 31 (opts.length && opts.abort && js`${_node}.length = ln${opts.length};`) || 31 32 ''; 32 33 const expression = `${ast.expression.id}(${_state})`; 33 34 return js` 34 35 if ((${_match} = ${ast.expression.id}(${_state})) != null) { 35 - ${opts.capture ? js`${_node}.push(${_match})` : ''} 36 + ${capture ? js`${_node}.push(${_match})` : ''} 36 37 } else { 37 38 ${opts.onAbort} 38 39 ${restoreIndex(opts.index)}
+50
src/core.test.js
··· 358 358 ); 359 359 }); 360 360 361 + describe('non-capturing shorthand', () => { 362 + const node = match('node')`${/1/} :${/2/}+`; 363 + it.each` 364 + input | result | lastIndex 365 + ${'12'} | ${['1']} | ${2} 366 + ${'122'} | ${['1']} | ${3} 367 + ${'13'} | ${undefined} | ${0} 368 + ${'1'} | ${undefined} | ${0} 369 + ${'_'} | ${undefined} | ${0} 370 + `( 371 + 'should return $result when $input is passed', 372 + ({ input, result, lastIndex }) => { 373 + expectToParse(node, input, result, lastIndex); 374 + } 375 + ); 376 + }); 377 + 361 378 describe('non-capturing group with plus matcher, then required matcher', () => { 362 379 const node = match('node')`(?: ${/1/}+) ${/2/}`; 363 380 it.each` ··· 445 462 ); 446 463 }); 447 464 465 + describe('positive lookahead shorthand', () => { 466 + const node = match('node')`=${/1/} ${/\d/}`; 467 + it.each` 468 + input | result | lastIndex 469 + ${'1'} | ${['1']} | ${1} 470 + ${'13'} | ${['1']} | ${1} 471 + ${'2'} | ${undefined} | ${0} 472 + ${'_'} | ${undefined} | ${0} 473 + `( 474 + 'should return $result when $input is passed', 475 + ({ input, result, lastIndex }) => { 476 + expectToParse(node, input, result, lastIndex); 477 + } 478 + ); 479 + }); 480 + 448 481 describe('positive lookahead group with plus matcher', () => { 449 482 const node = match('node')`(?= ${/1/}+) ${/\d/}`; 450 483 it.each` ··· 484 517 485 518 describe('negative lookahead group', () => { 486 519 const node = match('node')`(?! ${/1/}) ${/\d/}`; 520 + it.each` 521 + input | result | lastIndex 522 + ${'2'} | ${['2']} | ${1} 523 + ${'23'} | ${['2']} | ${1} 524 + ${'1'} | ${undefined} | ${0} 525 + ${'1'} | ${undefined} | ${0} 526 + ${'_'} | ${undefined} | ${0} 527 + `( 528 + 'should return $result when $input is passed', 529 + ({ input, result, lastIndex }) => { 530 + expectToParse(node, input, result, lastIndex); 531 + } 532 + ); 533 + }); 534 + 535 + describe('negative lookahead shorthand', () => { 536 + const node = match('node')`!${/1/} ${/\d/}`; 487 537 it.each` 488 538 input | result | lastIndex 489 539 ${'2'} | ${['2']} | ${1}
+14 -7
src/parser.js
··· 12 12 let currentGroup = null; 13 13 let lastMatch; 14 14 let currentSequence = rootSequence; 15 + let capture; 15 16 16 17 for ( 17 18 let quasiIndex = 0, stackIndex = 0; ··· 19 20 stackIndex++ 20 21 ) { 21 22 if (stackIndex % 2 !== 0) { 22 - currentSequence.push({ 23 - expression: expressions[stackIndex++ >> 1], 24 - }); 23 + const expression = expressions[stackIndex++ >> 1]; 24 + currentSequence.push({ expression, capture }); 25 + capture = undefined; 25 26 } 26 27 27 28 const quasi = quasis[stackIndex >> 1]; ··· 36 37 if (!currentSequence) syntaxError(char); 37 38 } else if (char === '(') { 38 39 sequenceStack.push(currentSequence); 39 - currentSequence.push((currentGroup = { sequence: [] })); 40 + currentSequence.push((currentGroup = { sequence: [], capture })); 40 41 currentSequence = currentGroup.sequence; 42 + capture = undefined; 43 + } else if (char === ':' || char === '=' || char === '!') { 44 + capture = char; 45 + const nextChar = quasi[quasiIndex]; 46 + if (quasi[quasiIndex] && quasi[quasiIndex] !== '(') syntaxError(char); 41 47 } else if (char === '?' && !currentSequence.length && currentGroup) { 42 - const nextChar = quasi[quasiIndex++]; 43 - if (nextChar === ':' || nextChar === '=' || nextChar === '!') { 44 - currentGroup.capture = nextChar; 48 + capture = quasi[quasiIndex++]; 49 + if (capture === ':' || capture === '=' || capture === '!') { 50 + currentGroup.capture = capture; 51 + capture = undefined; 45 52 } else { 46 53 syntaxError(char); 47 54 }
+62 -15
src/parser.test.js
··· 42 42 expect(ast).toHaveProperty('0.sequence.0.quantifier', undefined); 43 43 }); 44 44 45 - it('supports non-capturing groups', () => { 46 - const ast = parseTag`(?: ${1})`; 47 - expect(ast).toHaveProperty('length', 1); 48 - expect(ast).toHaveProperty('0.capture', ':'); 49 - expect(ast).toHaveProperty('0.sequence.length', 1); 45 + describe('non-capturing syntax', () => { 46 + it('supports regex-like syntax', () => { 47 + const ast = parseTag`(?: ${1})`; 48 + expect(ast).toHaveProperty('length', 1); 49 + expect(ast).toHaveProperty('0.capture', ':'); 50 + expect(ast).toHaveProperty('0.sequence.length', 1); 51 + }); 52 + 53 + it('supports shorthand', () => { 54 + let ast = parseTag`:${1}`; 55 + expect(ast).toHaveProperty('length', 1); 56 + expect(ast).toHaveProperty('0.capture', ':'); 57 + expect(ast).toHaveProperty('0.expression', 1); 58 + ast = parseTag`:(${1})`; 59 + expect(ast).toHaveProperty('length', 1); 60 + expect(ast).toHaveProperty('0.capture', ':'); 61 + expect(ast).toHaveProperty('0.sequence.length', 1); 62 + }); 63 + 64 + it('fails on invalid usage', () => { 65 + expect(() => parseTag`${1} : ${2}`).toThrow(); 66 + expect(() => parseTag`${1} :|${2}`).toThrow(); 67 + }); 50 68 }); 51 69 52 - it('supports positive lookahead groups', () => { 53 - const ast = parseTag`(?= ${1})`; 54 - expect(ast).toHaveProperty('length', 1); 55 - expect(ast).toHaveProperty('0.capture', '='); 56 - expect(ast).toHaveProperty('0.sequence.length', 1); 70 + describe('positive lookaheads syntax', () => { 71 + it('supports regex-like syntax', () => { 72 + const ast = parseTag`(?= ${1})`; 73 + expect(ast).toHaveProperty('length', 1); 74 + expect(ast).toHaveProperty('0.capture', '='); 75 + expect(ast).toHaveProperty('0.sequence.length', 1); 76 + }); 77 + 78 + it('supports shorthand', () => { 79 + let ast = parseTag`=${1}`; 80 + expect(ast).toHaveProperty('length', 1); 81 + expect(ast).toHaveProperty('0.capture', '='); 82 + expect(ast).toHaveProperty('0.expression', 1); 83 + ast = parseTag`=(${1})`; 84 + expect(ast).toHaveProperty('length', 1); 85 + expect(ast).toHaveProperty('0.capture', '='); 86 + expect(ast).toHaveProperty('0.sequence.length', 1); 87 + }); 57 88 }); 58 89 59 - it('supports negative lookahead groups', () => { 60 - const ast = parseTag`(?! ${1})`; 61 - expect(ast).toHaveProperty('length', 1); 62 - expect(ast).toHaveProperty('0.capture', '!'); 63 - expect(ast).toHaveProperty('0.sequence.length', 1); 90 + describe('negative lookaheads syntax', () => { 91 + it('supports regex-like syntax', () => { 92 + const ast = parseTag`(?! ${1})`; 93 + expect(ast).toHaveProperty('length', 1); 94 + expect(ast).toHaveProperty('0.capture', '!'); 95 + expect(ast).toHaveProperty('0.sequence.length', 1); 96 + }); 97 + 98 + it('supports shorthand', () => { 99 + let ast = parseTag`!${1}`; 100 + expect(ast).toHaveProperty('length', 1); 101 + expect(ast).toHaveProperty('0.capture', '!'); 102 + expect(ast).toHaveProperty('0.expression', 1); 103 + ast = parseTag`!(${1})`; 104 + expect(ast).toHaveProperty('length', 1); 105 + expect(ast).toHaveProperty('0.capture', '!'); 106 + expect(ast).toHaveProperty('0.sequence.length', 1); 107 + }); 64 108 }); 65 109 66 110 it('supports groups with alternates', () => { 67 111 expect(parseTag`(${1} | ${2}) ${3}`).toMatchInlineSnapshot(` 68 112 Array [ 69 113 Object { 114 + "capture": undefined, 70 115 "sequence": Array [ 71 116 Object { 117 + "capture": undefined, 72 118 "expression": 1, 73 119 }, 74 120 ], 75 121 }, 76 122 Object { 123 + "capture": undefined, 77 124 "expression": 3, 78 125 }, 79 126 ]