Mirror: The magical sticky regex-based parser generator 🧙

Merge branch 'feat/tagged-template-parsing' into master

+1 -1
babel.js
··· 1 - module.exports = require('./dist/reghex-babel.js').default; 1 + module.exports = require('./dist/reghex-babel.js');
-2
package.json
··· 22 22 "require": "./dist/reghex-core.js" 23 23 }, 24 24 "./babel": { 25 - "import": "./dist/reghex-babel.mjs", 26 25 "require": "./dist/reghex-babel.js" 27 26 }, 28 27 "./macro": { 29 - "import": "./dist/reghex-macro.mjs", 30 28 "require": "./dist/reghex-macro.js" 31 29 }, 32 30 "./package.json": "./package.json"
+4
rollup.config.js
··· 66 66 }, 67 67 { 68 68 ...base, 69 + output: { 70 + ...output('cjs', '.js'), 71 + exports: 'default', 72 + }, 69 73 input: { 70 74 babel: './src/babel/plugin.js', 71 75 macro: './src/babel/macro.js',
+93 -60
src/babel/__snapshots__/plugin.test.js.snap
··· 5 5 6 6 var _reghex = require(\\"reghex\\"); 7 7 8 - var _node_expression = (0, _reghex._pattern)(1), 9 - _node_expression2 = (0, _reghex._pattern)(2); 8 + var _node_expression = _reghex.__private.pattern(1), 9 + _node_expression2 = _reghex.__private.pattern(2); 10 10 11 11 const node = function (state) { 12 - var idx1 = state.index; 12 + var y1 = state.y, 13 + x1 = state.x; 13 14 var node = []; 14 15 var x; 15 16 16 - if (x = (0, _reghex._exec)(state, _node_expression)) { 17 + if (x = _reghex.__private.exec(state, _node_expression)) { 17 18 node.push(x); 18 19 } else { 19 - state.index = idx1; 20 + state.y = y1; 21 + state.x = x1; 20 22 return; 21 23 } 22 24 23 - if (x = (0, _reghex._exec)(state, _node_expression2)) { 25 + if (x = _reghex.__private.exec(state, _node_expression2)) { 24 26 node.push(x); 25 27 } else { 26 - state.index = idx1; 28 + state.y = y1; 29 + state.x = x1; 27 30 return; 28 31 } 29 32 ··· 38 41 `; 39 42 40 43 exports[`works with local recursion 1`] = ` 41 - "import { match as m, tag, _exec, _pattern } from 'reghex'; 44 + "import { match as m, tag, __private } from 'reghex'; 42 45 43 - var _inner_expression = _pattern(/inner/); 46 + var _inner_expression = __private.pattern(/inner/); 44 47 45 48 const inner = function (state) { 46 - var idx1 = state.index; 49 + var y1 = state.y, 50 + x1 = state.x; 47 51 var node = []; 48 52 var x; 49 53 50 - if (x = _exec(state, _inner_expression)) { 54 + if (x = __private.exec(state, _inner_expression)) { 51 55 node.push(x); 52 56 } else { 53 - state.index = idx1; 57 + state.y = y1; 58 + state.x = x1; 54 59 return; 55 60 } 56 61 ··· 59 64 }; 60 65 61 66 const node = function (state) { 62 - var idx1 = state.index; 67 + var y1 = state.y, 68 + x1 = state.x; 63 69 var node = []; 64 70 var x; 65 71 66 72 if (x = inner(state)) { 67 73 node.push(x); 68 74 } else { 69 - state.index = idx1; 75 + state.y = y1; 76 + state.x = x1; 70 77 return; 71 78 } 72 79 ··· 76 83 `; 77 84 78 85 exports[`works with non-capturing groups 1`] = ` 79 - "import { match, _exec, _pattern, tag as _tag } from 'reghex'; 86 + "import { match, __private } from 'reghex'; 80 87 81 - var _node_expression = _pattern(1), 82 - _node_expression2 = _pattern(2), 83 - _node_expression3 = _pattern(3); 88 + var _node_expression = __private.pattern(1), 89 + _node_expression2 = __private.pattern(2), 90 + _node_expression3 = __private.pattern(3); 84 91 85 92 const node = function (state) { 86 - var idx1 = state.index; 93 + var y1 = state.y, 94 + x1 = state.x; 87 95 var node = []; 88 96 var x; 89 97 90 - if (x = _exec(state, _node_expression)) { 98 + if (x = __private.exec(state, _node_expression)) { 91 99 node.push(x); 92 100 } else { 93 - state.index = idx1; 101 + state.y = y1; 102 + state.x = x1; 94 103 return; 95 104 } 96 105 ··· 98 107 99 108 alt_3: { 100 109 block_3: { 101 - var idx3 = state.index; 110 + var y3 = state.y, 111 + x3 = state.x; 102 112 103 - if (x = _exec(state, _node_expression2)) { 113 + if (x = __private.exec(state, _node_expression2)) { 104 114 node.push(x); 105 115 } else { 106 - state.index = idx3; 116 + state.y = y3; 117 + state.x = x3; 107 118 node.length = ln2; 108 119 break block_3; 109 120 } ··· 112 123 } 113 124 114 125 loop_3: for (var j3 = 0; 1; j3++) { 115 - var idx3 = state.index; 126 + var y3 = state.y, 127 + x3 = state.x; 116 128 117 - if (!_exec(state, _node_expression3)) { 129 + if (x = __private.exec(state, _node_expression3)) {} else { 118 130 if (j3) { 119 - state.index = idx3; 131 + state.y = y3; 132 + state.x = x3; 120 133 break loop_3; 121 134 } else {} 122 135 123 - state.index = idx1; 136 + state.y = y1; 137 + state.x = x1; 124 138 node.length = ln2; 125 139 return; 126 140 } ··· 133 147 `; 134 148 135 149 exports[`works with self-referential thunks 1`] = ` 136 - "import { match, tag, _exec, _pattern } from 'reghex'; 150 + "import { match, tag, __private } from 'reghex'; 137 151 138 152 const inner = function (state) { 139 - var idx1 = state.index; 153 + var y1 = state.y, 154 + x1 = state.x; 140 155 var node = []; 141 156 var x; 142 157 143 158 if (x = node(state)) { 144 159 node.push(x); 145 160 } else { 146 - state.index = idx1; 161 + state.y = y1; 162 + state.x = x1; 147 163 return; 148 164 } 149 165 ··· 152 168 }; 153 169 154 170 const node = function (state) { 155 - var idx1 = state.index; 171 + var y1 = state.y, 172 + x1 = state.x; 156 173 var node = []; 157 174 var x; 158 175 159 176 if (x = inner(state)) { 160 177 node.push(x); 161 178 } else { 162 - state.index = idx1; 179 + state.y = y1; 180 + state.x = x1; 163 181 return; 164 182 } 165 183 ··· 169 187 `; 170 188 171 189 exports[`works with standard features 1`] = ` 172 - "import { match, _exec, _pattern, tag as _tag } from \\"reghex\\"; 190 + "import { match, __private } from \\"reghex\\"; 173 191 174 - var _node_expression = _pattern(1), 175 - _node_expression2 = _pattern(2), 176 - _node_expression3 = _pattern(3), 177 - _node_expression4 = _pattern(4), 178 - _node_expression5 = _pattern(5); 192 + var _node_expression = __private.pattern(1), 193 + _node_expression2 = __private.pattern(2), 194 + _node_expression3 = __private.pattern(3), 195 + _node_expression4 = __private.pattern(4), 196 + _node_expression5 = __private.pattern(5); 179 197 180 198 const node = function (state) { 181 - var idx1 = state.index; 199 + var y1 = state.y, 200 + x1 = state.x; 182 201 var node = []; 183 202 var x; 184 203 185 204 alt_2: { 186 205 block_2: { 187 - var idx2 = state.index; 206 + var y2 = state.y, 207 + x2 = state.x; 188 208 189 209 loop_2: for (var j2 = 0; 1; j2++) { 190 - var idx2 = state.index; 210 + var y2 = state.y, 211 + x2 = state.x; 191 212 192 - if (x = _exec(state, _node_expression)) { 213 + if (x = __private.exec(state, _node_expression)) { 193 214 node.push(x); 194 215 } else { 195 216 if (j2) { 196 - state.index = idx2; 217 + state.y = y2; 218 + state.x = x2; 197 219 break loop_2; 198 220 } else {} 199 221 200 - state.index = idx2; 222 + state.y = y2; 223 + state.x = x2; 201 224 break block_2; 202 225 } 203 226 } ··· 206 229 } 207 230 208 231 loop_2: for (var j2 = 0; 1; j2++) { 209 - var idx2 = state.index; 232 + var y2 = state.y, 233 + x2 = state.x; 210 234 211 - if (x = _exec(state, _node_expression2)) { 235 + if (x = __private.exec(state, _node_expression2)) { 212 236 node.push(x); 213 237 } else { 214 238 if (j2) { 215 - state.index = idx2; 239 + state.y = y2; 240 + state.x = x2; 216 241 break loop_2; 217 242 } else {} 218 243 219 - state.index = idx1; 244 + state.y = y1; 245 + state.x = x1; 220 246 return; 221 247 } 222 248 } 223 249 224 250 loop_2: for (;;) { 225 - var idx2 = state.index; 251 + var y2 = state.y, 252 + x2 = state.x; 226 253 var ln2 = node.length; 227 254 228 - if (x = _exec(state, _node_expression3)) { 255 + if (x = __private.exec(state, _node_expression3)) { 229 256 node.push(x); 230 257 } else { 231 - state.index = idx2; 258 + state.y = y2; 259 + state.x = x2; 232 260 node.length = ln2; 233 261 break loop_2; 234 262 } 235 263 236 - var idx4 = state.index; 264 + var y4 = state.y, 265 + x4 = state.x; 237 266 238 - if (x = _exec(state, _node_expression4)) { 267 + if (x = __private.exec(state, _node_expression4)) { 239 268 node.push(x); 240 269 } else { 241 - state.index = idx4; 270 + state.y = y4; 271 + state.x = x4; 242 272 } 243 273 244 - if (x = _exec(state, _node_expression5)) { 274 + if (x = __private.exec(state, _node_expression5)) { 245 275 node.push(x); 246 276 } else { 247 - state.index = idx2; 277 + state.y = y2; 278 + state.x = x2; 248 279 node.length = ln2; 249 280 break loop_2; 250 281 } ··· 257 288 `; 258 289 259 290 exports[`works with transform functions 1`] = ` 260 - "import { match, _exec, _pattern, tag as _tag } from 'reghex'; 291 + "import { match, __private } from 'reghex'; 261 292 262 293 var _inner_transform = x => x; 263 294 264 295 const first = function (state) { 265 - var idx1 = state.index; 296 + var y1 = state.y, 297 + x1 = state.x; 266 298 var node = []; 267 299 var x; 268 300 node.tag = 'inner'; ··· 272 304 const transform = x => x; 273 305 274 306 const second = function (state) { 275 - var idx1 = state.index; 307 + var y1 = state.y, 308 + x1 = state.x; 276 309 var node = []; 277 310 var x; 278 311 node.tag = 'node';
-20
src/babel/sharedIds.js
··· 1 - export class SharedIds { 2 - constructor(t) { 3 - this.t = t; 4 - this.execId = t.identifier('_exec'); 5 - this.patternId = t.identifier('_pattern'); 6 - this.tagId = t.identifier('tag'); 7 - } 8 - 9 - get exec() { 10 - return this.t.identifier(this.execId.name); 11 - } 12 - 13 - get pattern() { 14 - return this.t.identifier(this.patternId.name); 15 - } 16 - 17 - get tag() { 18 - return this.t.identifier(this.tagId.name); 19 - } 20 - }
+12 -18
src/babel/transform.js
··· 1 + import { astRoot, _private } from '../codegen'; 1 2 import { parse } from '../parser'; 2 - import { astRoot } from '../codegen'; 3 - import { SharedIds } from './sharedIds'; 4 3 5 4 export function makeHelpers({ types: t, template }) { 6 5 const regexPatternsRe = /^[()\[\]|.+?*]|[^\\][()\[\]|.+?*$^]|\\[wdsWDS]/; 7 6 const importSourceRe = /reghex$|^reghex\/macro/; 8 7 const importName = 'reghex'; 9 - const ids = new SharedIds(t); 10 8 11 9 let _hasUpdatedImport = false; 10 + let _matchId = t.identifier('match'); 11 + let _privateId = t.identifier(_private); 12 + 13 + const privateMethod = (name) => 14 + t.memberExpression(t.identifier(_privateId.name), t.identifier(name)); 12 15 13 16 return { 14 17 /** Adds the reghex import declaration to the Program scope */ ··· 21 24 path.node.source = t.stringLiteral(importName); 22 25 } 23 26 24 - path.node.specifiers.push( 25 - t.importSpecifier( 26 - (ids.execId = path.scope.generateUidIdentifier('exec')), 27 - t.identifier('_exec') 28 - ), 29 - t.importSpecifier( 30 - (ids.patternId = path.scope.generateUidIdentifier('pattern')), 31 - t.identifier('_pattern') 32 - ) 33 - ); 27 + path.node.specifiers.push(t.importSpecifier(_privateId, _privateId)); 34 28 35 29 const tagImport = path.node.specifiers.find((node) => { 36 - return t.isImportSpecifier(node) && node.imported.name === 'tag'; 30 + return t.isImportSpecifier(node) && node.imported.name === 'match'; 37 31 }); 38 32 39 33 if (!tagImport) { 40 34 path.node.specifiers.push( 41 35 t.importSpecifier( 42 - (ids.tagId = path.scope.generateUidIdentifier('tag')), 43 - t.identifier('tag') 36 + (_matchId = path.scope.generateUidIdentifier('match')), 37 + t.identifier('match') 44 38 ) 45 39 ); 46 40 } else { 47 - ids.tagId = tagImport.imported; 41 + _matchId = tagImport.imported; 48 42 } 49 43 }, 50 44 ··· 140 134 variableDeclarators.push( 141 135 t.variableDeclarator( 142 136 id, 143 - t.callExpression(ids.pattern, [expression]) 137 + t.callExpression(privateMethod('pattern'), [expression]) 144 138 ) 145 139 ); 146 140
+33 -49
src/codegen.js
··· 1 - export const _exec = '_exec'; 1 + export const _private = '__private'; 2 + 2 3 const _state = 'state'; 3 4 const _node = 'node'; 4 5 const _match = 'x'; ··· 16 17 return next; 17 18 }; 18 19 19 - const assignIndex = (depth) => 20 - depth ? js`var idx${depth} = ${_state}.index;` : ''; 21 - 22 - const restoreIndex = (depth) => 23 - depth ? js`${_state}.index = idx${depth};` : ''; 20 + const assignIndex = (depth) => js` 21 + var y${depth} = ${_state}.y, 22 + x${depth} = ${_state}.x; 23 + `; 24 24 25 - const abortOnCondition = (condition, hooks) => js` 26 - if (${condition}) { 27 - ${restoreIndex(opts.index)} 28 - ${opts.abort} 29 - } else { 30 - ${opts.onAbort} 31 - } 25 + const restoreIndex = (depth) => js` 26 + ${_state}.y = y${depth}; 27 + ${_state}.x = x${depth}; 32 28 `; 33 29 34 30 const astExpression = (ast, depth, opts) => { 35 31 const restoreLength = 36 32 (opts.length && opts.abort && js`${_node}.length = ln${opts.length};`) || 37 33 ''; 38 - 39 - const abort = js` 40 - ${opts.onAbort} 41 - ${restoreIndex(opts.index)} 42 - ${restoreLength} 43 - ${opts.abort} 44 - `; 45 - 46 34 const expression = ast.expression.fn 47 35 ? `${ast.expression.id}(${_state})` 48 - : `${_exec}(${_state}, ${ast.expression.id})`; 49 - 50 - if (!opts.capture) { 51 - return js` 52 - if (!${expression}) { 53 - ${abort} 54 - } 55 - `; 56 - } 36 + : `${_private}.exec(${_state}, ${ast.expression.id})`; 57 37 58 38 return js` 59 39 if (${_match} = ${expression}) { 60 - ${_node}.push(${_match}); 40 + ${opts.capture ? js`${_node}.push(${_match})` : ''} 61 41 } else { 62 - ${abort} 42 + ${opts.onAbort} 43 + ${restoreIndex(opts.index)} 44 + ${restoreLength} 45 + ${opts.abort} 63 46 } 64 47 `; 65 48 }; ··· 70 53 opts = copy(opts); 71 54 opts.capture = capture; 72 55 73 - let group = ''; 74 56 if (!opts.length && capture) { 75 57 opts.length = depth; 76 58 return js` ··· 200 182 `; 201 183 }; 202 184 203 - const astRoot = (ast, name, transform) => js` 204 - (function (${_state}) { 205 - ${assignIndex(1)} 206 - var ${_node} = []; 207 - var ${_match}; 185 + const astRoot = (ast, name, transform) => { 186 + return js` 187 + (function (${_state}) { 188 + ${assignIndex(1)} 189 + var ${_node} = []; 190 + var ${_match}; 208 191 209 - ${astSequence(ast, 2, { 210 - index: 1, 211 - length: 0, 212 - onAbort: '', 213 - abort: js`return;`, 214 - capture: true, 215 - })} 192 + ${astSequence(ast, 2, { 193 + index: 1, 194 + length: 0, 195 + onAbort: '', 196 + abort: js`return;`, 197 + capture: true, 198 + })} 216 199 217 - ${_node}.tag = ${name}; 218 - return ${transform ? js`(${transform})(${_node})` : _node}; 219 - }) 220 - `; 200 + ${_node}.tag = ${name}; 201 + return ${transform ? js`(${transform})(${_node})` : _node}; 202 + }) 203 + `; 204 + }; 221 205 222 206 export { astRoot };
+53 -26
src/core.js
··· 1 - import { astRoot, _exec as execId } from './codegen'; 1 + import { astRoot, _private as privateId } from './codegen'; 2 2 import { parse as parseDSL } from './parser'; 3 3 4 4 const isStickySupported = typeof /./g.sticky === 'boolean'; 5 5 6 - export const _pattern = (input) => { 7 - if (typeof input === 'function') return input; 8 - const source = typeof input !== 'string' ? input.source : input; 9 - return isStickySupported 10 - ? new RegExp(source, 'y') 11 - : new RegExp(source + '|()', 'g'); 6 + export const __private = { 7 + pattern(input) { 8 + if (typeof input === 'function') return input; 9 + const source = typeof input !== 'string' ? input.source : input; 10 + return isStickySupported 11 + ? new RegExp(source, 'y') 12 + : new RegExp(source + '|()', 'g'); 13 + }, 14 + 15 + exec(state, pattern) { 16 + let match; 17 + 18 + if (typeof pattern === 'function') { 19 + if (!pattern.length) pattern = pattern(); 20 + return pattern(state); 21 + } 22 + 23 + const input = state.quasis[state.x]; 24 + if (input && (pattern.lastIndex = state.y) < input.length) { 25 + if (isStickySupported) { 26 + if (pattern.test(input)) 27 + match = input.slice(state.y, pattern.lastIndex); 28 + } else { 29 + match = pattern.exec(input)[0] || match; 30 + } 31 + 32 + state.y = pattern.lastIndex; 33 + } 34 + 35 + return match; 36 + }, 12 37 }; 13 38 14 - export const _exec = (state, pattern) => { 39 + export const interpolation = (predicate) => (state) => { 15 40 let match; 16 41 17 - if (typeof pattern === 'function') { 18 - if (!pattern.length) pattern = pattern(); 19 - return pattern(state); 20 - } 21 - 22 - pattern.lastIndex = state.index; 23 - 24 - if (isStickySupported) { 25 - if (pattern.test(state.input)) 26 - match = state.input.slice(state.index, pattern.lastIndex); 27 - } else { 28 - match = pattern.exec(state.input)[0] || match; 42 + if ( 43 + state.y >= state.quasis[state.x].length && 44 + state.x < state.expressions.length 45 + ) { 46 + state.y = 0; 47 + match = state.expressions[state.x++]; 48 + if (predicate && match) match = predicate(match); 29 49 } 30 50 31 - state.index = pattern.lastIndex; 32 51 return match; 33 52 }; 34 53 35 - export const parse = (pattern) => (input) => { 36 - const state = { input, index: 0 }; 37 - return pattern(state); 54 + export const parse = (matcher) => (quasis, ...expressions) => { 55 + if (typeof quasis === 'string') quasis = [quasis]; 56 + const state = { quasis, expressions, x: 0, y: 0 }; 57 + return matcher(state); 38 58 }; 39 59 40 60 export const match = (name, transform) => (quasis, ...expressions) => { ··· 47 67 ); 48 68 49 69 const makeMatcher = new Function( 50 - execId + ',_n,_t,' + expressions.map((_expression, i) => `_${i}`).join(','), 70 + privateId + 71 + ',_n,_t,' + 72 + expressions.map((_expression, i) => `_${i}`).join(','), 51 73 'return ' + astRoot(ast, '_n', transform ? '_t' : null) 52 74 ); 53 75 54 - return makeMatcher(_exec, name, transform, ...expressions.map(_pattern)); 76 + return makeMatcher( 77 + __private, 78 + name, 79 + transform, 80 + ...expressions.map(__private.pattern) 81 + ); 55 82 };
+25 -4
src/core.test.js
··· 1 - import { match } from './core'; 1 + import { parse, match, interpolation } from './core'; 2 2 3 3 const expectToParse = (node, input, result, lastIndex = 0) => { 4 - const state = { input, index: 0 }; 4 + const state = { quasis: [input], expressions: [], x: 0, y: 0 }; 5 5 if (result) result.tag = 'node'; 6 6 expect(node(state)).toEqual(result); 7 7 8 8 // NOTE: After parsing we expect the current index to exactly match the 9 9 // sum amount of matched characters 10 10 if (result === undefined) { 11 - expect(state.index).toBe(0); 11 + expect(state.y).toBe(0); 12 12 } else { 13 13 const index = lastIndex || result.reduce((acc, x) => acc + x.length, 0); 14 - expect(state.index).toBe(index); 14 + expect(state.y).toBe(index); 15 15 } 16 16 }; 17 17 ··· 552 552 } 553 553 ); 554 554 }); 555 + 556 + describe('interpolation parsing', () => { 557 + const node = match('node')` 558 + ${/1/} 559 + ${interpolation((x) => x > 1 && x)} 560 + ${/3/} 561 + `; 562 + 563 + it('matches interpolations', () => { 564 + const expected = ['1', 2, '3']; 565 + expected.tag = 'node'; 566 + expect(parse(node)`1${2}3`).toEqual(expected); 567 + }); 568 + 569 + it('does not match invalid inputs', () => { 570 + expect(parse(node)`13`).toBe(undefined); 571 + expect(parse(node)`13${2}`).toBe(undefined); 572 + expect(parse(node)`${2}13`).toBe(undefined); 573 + expect(parse(node)`1${1}3`).toBe(undefined); 574 + }); 575 + });