OR-1 dataflow CPU sketch
1type AliasRule = { type: 'ALIAS'; named: boolean; content: Rule; value: string };
2type BlankRule = { type: 'BLANK' };
3type ChoiceRule = { type: 'CHOICE'; members: Rule[] };
4type FieldRule = { type: 'FIELD'; name: string; content: Rule };
5type ImmediateTokenRule = { type: 'IMMEDIATE_TOKEN'; content: Rule };
6type PatternRule = { type: 'PATTERN'; value: string };
7type PrecDynamicRule = { type: 'PREC_DYNAMIC'; content: Rule; value: number };
8type PrecLeftRule = { type: 'PREC_LEFT'; content: Rule; value: number };
9type PrecRightRule = { type: 'PREC_RIGHT'; content: Rule; value: number };
10type PrecRule = { type: 'PREC'; content: Rule; value: number };
11type Repeat1Rule = { type: 'REPEAT1'; content: Rule };
12type RepeatRule = { type: 'REPEAT'; content: Rule };
13type ReservedRule = { type: 'RESERVED'; content: Rule; context_name: string };
14type SeqRule = { type: 'SEQ'; members: Rule[] };
15type StringRule = { type: 'STRING'; value: string };
16type SymbolRule<Name extends string> = { type: 'SYMBOL'; name: Name };
17type TokenRule = { type: 'TOKEN'; content: Rule };
18
19type Rule =
20 | AliasRule
21 | BlankRule
22 | ChoiceRule
23 | FieldRule
24 | ImmediateTokenRule
25 | PatternRule
26 | PrecDynamicRule
27 | PrecLeftRule
28 | PrecRightRule
29 | PrecRule
30 | Repeat1Rule
31 | RepeatRule
32 | SeqRule
33 | StringRule
34 | SymbolRule<string>
35 | TokenRule;
36
37declare class RustRegex {
38 value: string;
39
40 constructor(pattern: string);
41}
42
43type RuleOrLiteral = Rule | RegExp | RustRegex | string;
44
45type GrammarSymbols<RuleName extends string> = {
46 [name in RuleName]: SymbolRule<name>;
47} &
48 Record<string, SymbolRule<string>>;
49
50type RuleBuilder<RuleName extends string> = (
51 $: GrammarSymbols<RuleName>,
52 previous?: Rule,
53) => RuleOrLiteral;
54
55type RuleBuilders<
56 RuleName extends string,
57 BaseGrammarRuleName extends string
58> = {
59 [name in RuleName]: RuleBuilder<RuleName | BaseGrammarRuleName>;
60 };
61
62interface Grammar<
63 RuleName extends string,
64 BaseGrammarRuleName extends string = never,
65 Rules extends RuleBuilders<RuleName, BaseGrammarRuleName> = RuleBuilders<
66 RuleName,
67 BaseGrammarRuleName
68 >
69> {
70 /**
71 * Name of the grammar language.
72 */
73 name: string;
74
75 /** Mapping of grammar rule names to rule builder functions. */
76 rules: Rules;
77
78 /**
79 * An array of arrays of precedence names or rules. Each inner array represents
80 * a *descending* ordering. Names/rules listed earlier in one of these arrays
81 * have higher precedence than any names/rules listed later in the same array.
82 *
83 * Using rules is just a shorthand way for using a name then calling prec()
84 * with that name. It is just a convenience.
85 */
86 precedences?: (
87 $: GrammarSymbols<RuleName | BaseGrammarRuleName>,
88 previous: Rule[][],
89 ) => RuleOrLiteral[][],
90
91 /**
92 * An array of arrays of rule names. Each inner array represents a set of
93 * rules that's involved in an _LR(1) conflict_ that is _intended to exist_
94 * in the grammar. When these conflicts occur at runtime, Tree-sitter will
95 * use the GLR algorithm to explore all of the possible interpretations. If
96 * _multiple_ parses end up succeeding, Tree-sitter will pick the subtree
97 * whose corresponding rule has the highest total _dynamic precedence_.
98 *
99 * @param $ grammar rules
100 */
101 conflicts?: (
102 $: GrammarSymbols<RuleName | BaseGrammarRuleName>,
103 previous: Rule[][],
104 ) => RuleOrLiteral[][];
105
106 /**
107 * An array of token names which can be returned by an _external scanner_.
108 * External scanners allow you to write custom C code which runs during the
109 * lexing process in order to handle lexical rules (e.g. Python's indentation
110 * tokens) that cannot be described by regular expressions.
111 *
112 * @param $ grammar rules
113 * @param previous array of externals from the base schema, if any
114 *
115 * @see https://tree-sitter.github.io/tree-sitter/creating-parsers/4-external-scanners
116 */
117 externals?: (
118 $: Record<string, SymbolRule<string>>,
119 previous: Rule[],
120 ) => RuleOrLiteral[];
121
122 /**
123 * An array of tokens that may appear anywhere in the language. This
124 * is often used for whitespace and comments. The default value of
125 * extras is to accept whitespace. To control whitespace explicitly,
126 * specify extras: `$ => []` in your grammar.
127 *
128 * @param $ grammar rules
129 */
130 extras?: (
131 $: GrammarSymbols<RuleName | BaseGrammarRuleName>,
132 ) => RuleOrLiteral[];
133
134 /**
135 * An array of rules that should be automatically removed from the
136 * grammar by replacing all of their usages with a copy of their definition.
137 * This is useful for rules that are used in multiple places but for which
138 * you don't want to create syntax tree nodes at runtime.
139 *
140 * @param $ grammar rules
141 */
142 inline?: (
143 $: GrammarSymbols<RuleName | BaseGrammarRuleName>,
144 previous: Rule[],
145 ) => RuleOrLiteral[];
146
147 /**
148 * A list of hidden rule names that should be considered supertypes in the
149 * generated node types file.
150 *
151 * @param $ grammar rules
152 *
153 * @see https://tree-sitter.github.io/tree-sitter/using-parsers/6-static-node-types
154 */
155 supertypes?: (
156 $: GrammarSymbols<RuleName | BaseGrammarRuleName>,
157 previous: Rule[],
158 ) => RuleOrLiteral[];
159
160 /**
161 * The name of a token that will match keywords for the purpose of the
162 * keyword extraction optimization.
163 *
164 * @param $ grammar rules
165 *
166 * @see https://tree-sitter.github.io/tree-sitter/creating-parsers/3-writing-the-grammar#keyword-extraction
167 */
168 word?: ($: GrammarSymbols<RuleName | BaseGrammarRuleName>) => RuleOrLiteral;
169
170
171 /**
172 * Mapping of names to reserved word sets. The first reserved word set is the
173 * global word set, meaning it applies to every rule in every parse state.
174 * The other word sets can be used with the `reserved` function.
175 */
176 reserved?: Record<
177 string,
178 ($: GrammarSymbols<RuleName | BaseGrammarRuleName>) => RuleOrLiteral[]
179 >;
180}
181
182type GrammarSchema<RuleName extends string> = {
183 [K in keyof Grammar<RuleName>]: K extends 'rules'
184 ? Record<RuleName, Rule>
185 : Grammar<RuleName>[K];
186};
187
188/**
189 * Causes the given rule to appear with an alternative name in the syntax tree.
190 * For instance with `alias($.foo, 'bar')`, the aliased rule will appear as an
191 * anonymous node, as if the rule had been written as the simple string.
192 *
193 * @param rule rule that will be aliased
194 * @param name target name for the alias
195 */
196declare function alias(rule: RuleOrLiteral, name: string): AliasRule;
197
198/**
199 * Causes the given rule to appear as an alternative named node, for instance
200 * with `alias($.foo, $.bar)`, the aliased rule `foo` will appear as a named
201 * node called `bar`.
202 *
203 * @param rule rule that will be aliased
204 * @param symbol target symbol for the alias
205 */
206declare function alias(
207 rule: RuleOrLiteral,
208 symbol: SymbolRule<string>,
209): AliasRule;
210
211/**
212 * Creates a blank rule, matching nothing.
213 */
214declare function blank(): BlankRule;
215
216/**
217 * Assigns a field name to the child node(s) matched by the given rule.
218 * In the resulting syntax tree, you can then use that field name to
219 * access specific children.
220 *
221 * @param name name of the field
222 * @param rule rule the field should match
223 */
224declare function field(name: string, rule: RuleOrLiteral): FieldRule;
225
226/**
227 * Creates a rule that matches one of a set of possible rules. The order
228 * of the arguments does not matter. This is analogous to the `|` (pipe)
229 * operator in EBNF notation.
230 *
231 * @param options possible rule choices
232 */
233declare function choice(...options: RuleOrLiteral[]): ChoiceRule;
234
235/**
236 * Creates a rule that matches zero or one occurrence of a given rule.
237 * It is analogous to the `[x]` (square bracket) syntax in EBNF notation.
238 *
239 * @param value rule to be made optional
240 */
241declare function optional(rule: RuleOrLiteral): ChoiceRule;
242
243/**
244 * Marks the given rule with a precedence which will be used to resolve LR(1)
245 * conflicts at parser-generation time. When two rules overlap in a way that
246 * represents either a true ambiguity or a _local_ ambiguity given one token
247 * of lookahead, Tree-sitter will try to resolve the conflict by matching the
248 * rule with the higher precedence.
249 *
250 * Precedence values can either be strings or numbers. When comparing rules
251 * with numerical precedence, higher numbers indicate higher precedences. To
252 * compare rules with string precedence, Tree-sitter uses the grammar's `precedences`
253 * field.
254 *
255 * rules is zero. This works similarly to the precedence directives in Yacc grammars.
256 *
257 * @param value precedence weight
258 * @param rule rule being weighted
259 *
260 * @see https://en.wikipedia.org/wiki/LR_parser#Conflicts_in_the_constructed_tables
261 * @see https://docs.oracle.com/cd/E19504-01/802-5880/6i9k05dh3/index.html
262 */
263declare const prec: {
264 (value: string | number, rule: RuleOrLiteral): PrecRule;
265
266 /**
267 * Marks the given rule as left-associative (and optionally applies a
268 * numerical precedence). When an LR(1) conflict arises in which all of the
269 * rules have the same numerical precedence, Tree-sitter will consult the
270 * rules' associativity. If there is a left-associative rule, Tree-sitter
271 * will prefer matching a rule that ends _earlier_. This works similarly to
272 * associativity directives in Yacc grammars.
273 *
274 * @param value (optional) precedence weight
275 * @param rule rule to mark as left-associative
276 *
277 * @see https://docs.oracle.com/cd/E19504-01/802-5880/6i9k05dh3/index.html
278 */
279 left(rule: RuleOrLiteral): PrecLeftRule;
280 left(value: string | number, rule: RuleOrLiteral): PrecLeftRule;
281
282 /**
283 * Marks the given rule as right-associative (and optionally applies a
284 * numerical precedence). When an LR(1) conflict arises in which all of the
285 * rules have the same numerical precedence, Tree-sitter will consult the
286 * rules' associativity. If there is a right-associative rule, Tree-sitter
287 * will prefer matching a rule that ends _later_. This works similarly to
288 * associativity directives in Yacc grammars.
289 *
290 * @param value (optional) precedence weight
291 * @param rule rule to mark as right-associative
292 *
293 * @see https://docs.oracle.com/cd/E19504-01/802-5880/6i9k05dh3/index.html
294 */
295 right(rule: RuleOrLiteral): PrecRightRule;
296 right(value: string | number, rule: RuleOrLiteral): PrecRightRule;
297
298 /**
299 * Marks the given rule with a numerical precedence which will be used to
300 * resolve LR(1) conflicts at _runtime_ instead of parser-generation time.
301 * This is only necessary when handling a conflict dynamically using the
302 * `conflicts` field in the grammar, and when there is a genuine _ambiguity_:
303 * multiple rules correctly match a given piece of code. In that event,
304 * Tree-sitter compares the total dynamic precedence associated with each
305 * rule, and selects the one with the highest total. This is similar to
306 * dynamic precedence directives in Bison grammars.
307 *
308 * @param value precedence weight
309 * @param rule rule being weighted
310 *
311 * @see https://www.gnu.org/software/bison/manual/html_node/Generalized-LR-Parsing.html
312 */
313 dynamic(value: string | number, rule: RuleOrLiteral): PrecDynamicRule;
314};
315
316/**
317 * Creates a rule that matches _zero-or-more_ occurrences of a given rule.
318 * It is analogous to the `{x}` (curly brace) syntax in EBNF notation. This
319 * rule is implemented in terms of `repeat1` but is included because it
320 * is very commonly used.
321 *
322 * @param rule rule to repeat, zero or more times
323 */
324declare function repeat(rule: RuleOrLiteral): RepeatRule;
325
326/**
327 * Creates a rule that matches one-or-more occurrences of a given rule.
328 *
329 * @param rule rule to repeat, one or more times
330 */
331declare function repeat1(rule: RuleOrLiteral): Repeat1Rule;
332
333/**
334 * Overrides the global reserved word set for a given rule. The word set name
335 * should be defined in the `reserved` field in the grammar.
336 *
337 * @param wordset name of the reserved word set
338 * @param rule rule that will use the reserved word set
339 */
340declare function reserved(wordset: string, rule: RuleOrLiteral): ReservedRule;
341
342/**
343 * Creates a rule that matches any number of other rules, one after another.
344 * It is analogous to simply writing multiple symbols next to each other
345 * in EBNF notation.
346 *
347 * @param rules ordered rules that comprise the sequence
348 */
349declare function seq(...rules: RuleOrLiteral[]): SeqRule;
350
351/**
352 * Creates a symbol rule, representing another rule in the grammar by name.
353 *
354 * @param name name of the target rule
355 */
356declare function sym<Name extends string>(name: Name): SymbolRule<Name>;
357
358/**
359 * Marks the given rule as producing only a single token. Tree-sitter's
360 * default is to treat each string or RegExp literal in the grammar as a
361 * separate token. Each token is matched separately by the lexer and
362 * returned as its own leaf node in the tree. The token function allows
363 * you to express a complex rule using the DSL functions (rather
364 * than as a single regular expression) but still have Tree-sitter treat
365 * it as a single token.
366 *
367 * @param rule rule to represent as a single token
368 */
369declare const token: {
370 (rule: RuleOrLiteral): TokenRule;
371
372 /**
373 * Marks the given rule as producing an immediate token. This allows
374 * the parser to produce a different token based on whether or not
375 * there are `extras` preceding the token's main content. When there
376 * are _no_ leading `extras`, an immediate token is preferred over a
377 * normal token which would otherwise match.
378 *
379 * @param rule rule to represent as an immediate token
380 */
381 immediate(rule: RuleOrLiteral): ImmediateTokenRule;
382};
383
384/**
385 * Creates a new language grammar with the provided schema.
386 *
387 * @param options grammar options
388 */
389declare function grammar<RuleName extends string>(
390 options: Grammar<RuleName>,
391): GrammarSchema<RuleName>;
392
393/**
394 * Extends an existing language grammar with the provided options,
395 * creating a new language.
396 *
397 * @param baseGrammar base grammar schema to extend from
398 * @param options grammar options for the new extended language
399 */
400declare function grammar<
401 BaseGrammarRuleName extends string,
402 RuleName extends string
403>(
404 baseGrammar: GrammarSchema<BaseGrammarRuleName>,
405 options: Grammar<RuleName, BaseGrammarRuleName>,
406): GrammarSchema<RuleName | BaseGrammarRuleName>;