OR-1 dataflow CPU sketch
at main 406 lines 14 kB view raw
1type AliasRule = { type: 'ALIAS'; named: boolean; content: Rule; value: string }; 2type BlankRule = { type: 'BLANK' }; 3type ChoiceRule = { type: 'CHOICE'; members: Rule[] }; 4type FieldRule = { type: 'FIELD'; name: string; content: Rule }; 5type ImmediateTokenRule = { type: 'IMMEDIATE_TOKEN'; content: Rule }; 6type PatternRule = { type: 'PATTERN'; value: string }; 7type PrecDynamicRule = { type: 'PREC_DYNAMIC'; content: Rule; value: number }; 8type PrecLeftRule = { type: 'PREC_LEFT'; content: Rule; value: number }; 9type PrecRightRule = { type: 'PREC_RIGHT'; content: Rule; value: number }; 10type PrecRule = { type: 'PREC'; content: Rule; value: number }; 11type Repeat1Rule = { type: 'REPEAT1'; content: Rule }; 12type RepeatRule = { type: 'REPEAT'; content: Rule }; 13type ReservedRule = { type: 'RESERVED'; content: Rule; context_name: string }; 14type SeqRule = { type: 'SEQ'; members: Rule[] }; 15type StringRule = { type: 'STRING'; value: string }; 16type SymbolRule<Name extends string> = { type: 'SYMBOL'; name: Name }; 17type TokenRule = { type: 'TOKEN'; content: Rule }; 18 19type Rule = 20 | AliasRule 21 | BlankRule 22 | ChoiceRule 23 | FieldRule 24 | ImmediateTokenRule 25 | PatternRule 26 | PrecDynamicRule 27 | PrecLeftRule 28 | PrecRightRule 29 | PrecRule 30 | Repeat1Rule 31 | RepeatRule 32 | SeqRule 33 | StringRule 34 | SymbolRule<string> 35 | TokenRule; 36 37declare class RustRegex { 38 value: string; 39 40 constructor(pattern: string); 41} 42 43type RuleOrLiteral = Rule | RegExp | RustRegex | string; 44 45type GrammarSymbols<RuleName extends string> = { 46 [name in RuleName]: SymbolRule<name>; 47} & 48 Record<string, SymbolRule<string>>; 49 50type RuleBuilder<RuleName extends string> = ( 51 $: GrammarSymbols<RuleName>, 52 previous?: Rule, 53) => RuleOrLiteral; 54 55type RuleBuilders< 56 RuleName extends string, 57 BaseGrammarRuleName extends string 58> = { 59 [name in RuleName]: RuleBuilder<RuleName | BaseGrammarRuleName>; 60 }; 61 62interface Grammar< 63 RuleName extends string, 64 BaseGrammarRuleName extends string = never, 65 Rules extends RuleBuilders<RuleName, BaseGrammarRuleName> = RuleBuilders< 66 RuleName, 67 BaseGrammarRuleName 68 > 69> { 70 /** 71 * Name of the grammar language. 72 */ 73 name: string; 74 75 /** Mapping of grammar rule names to rule builder functions. */ 76 rules: Rules; 77 78 /** 79 * An array of arrays of precedence names or rules. Each inner array represents 80 * a *descending* ordering. Names/rules listed earlier in one of these arrays 81 * have higher precedence than any names/rules listed later in the same array. 82 * 83 * Using rules is just a shorthand way for using a name then calling prec() 84 * with that name. It is just a convenience. 85 */ 86 precedences?: ( 87 $: GrammarSymbols<RuleName | BaseGrammarRuleName>, 88 previous: Rule[][], 89 ) => RuleOrLiteral[][], 90 91 /** 92 * An array of arrays of rule names. Each inner array represents a set of 93 * rules that's involved in an _LR(1) conflict_ that is _intended to exist_ 94 * in the grammar. When these conflicts occur at runtime, Tree-sitter will 95 * use the GLR algorithm to explore all of the possible interpretations. If 96 * _multiple_ parses end up succeeding, Tree-sitter will pick the subtree 97 * whose corresponding rule has the highest total _dynamic precedence_. 98 * 99 * @param $ grammar rules 100 */ 101 conflicts?: ( 102 $: GrammarSymbols<RuleName | BaseGrammarRuleName>, 103 previous: Rule[][], 104 ) => RuleOrLiteral[][]; 105 106 /** 107 * An array of token names which can be returned by an _external scanner_. 108 * External scanners allow you to write custom C code which runs during the 109 * lexing process in order to handle lexical rules (e.g. Python's indentation 110 * tokens) that cannot be described by regular expressions. 111 * 112 * @param $ grammar rules 113 * @param previous array of externals from the base schema, if any 114 * 115 * @see https://tree-sitter.github.io/tree-sitter/creating-parsers/4-external-scanners 116 */ 117 externals?: ( 118 $: Record<string, SymbolRule<string>>, 119 previous: Rule[], 120 ) => RuleOrLiteral[]; 121 122 /** 123 * An array of tokens that may appear anywhere in the language. This 124 * is often used for whitespace and comments. The default value of 125 * extras is to accept whitespace. To control whitespace explicitly, 126 * specify extras: `$ => []` in your grammar. 127 * 128 * @param $ grammar rules 129 */ 130 extras?: ( 131 $: GrammarSymbols<RuleName | BaseGrammarRuleName>, 132 ) => RuleOrLiteral[]; 133 134 /** 135 * An array of rules that should be automatically removed from the 136 * grammar by replacing all of their usages with a copy of their definition. 137 * This is useful for rules that are used in multiple places but for which 138 * you don't want to create syntax tree nodes at runtime. 139 * 140 * @param $ grammar rules 141 */ 142 inline?: ( 143 $: GrammarSymbols<RuleName | BaseGrammarRuleName>, 144 previous: Rule[], 145 ) => RuleOrLiteral[]; 146 147 /** 148 * A list of hidden rule names that should be considered supertypes in the 149 * generated node types file. 150 * 151 * @param $ grammar rules 152 * 153 * @see https://tree-sitter.github.io/tree-sitter/using-parsers/6-static-node-types 154 */ 155 supertypes?: ( 156 $: GrammarSymbols<RuleName | BaseGrammarRuleName>, 157 previous: Rule[], 158 ) => RuleOrLiteral[]; 159 160 /** 161 * The name of a token that will match keywords for the purpose of the 162 * keyword extraction optimization. 163 * 164 * @param $ grammar rules 165 * 166 * @see https://tree-sitter.github.io/tree-sitter/creating-parsers/3-writing-the-grammar#keyword-extraction 167 */ 168 word?: ($: GrammarSymbols<RuleName | BaseGrammarRuleName>) => RuleOrLiteral; 169 170 171 /** 172 * Mapping of names to reserved word sets. The first reserved word set is the 173 * global word set, meaning it applies to every rule in every parse state. 174 * The other word sets can be used with the `reserved` function. 175 */ 176 reserved?: Record< 177 string, 178 ($: GrammarSymbols<RuleName | BaseGrammarRuleName>) => RuleOrLiteral[] 179 >; 180} 181 182type GrammarSchema<RuleName extends string> = { 183 [K in keyof Grammar<RuleName>]: K extends 'rules' 184 ? Record<RuleName, Rule> 185 : Grammar<RuleName>[K]; 186}; 187 188/** 189 * Causes the given rule to appear with an alternative name in the syntax tree. 190 * For instance with `alias($.foo, 'bar')`, the aliased rule will appear as an 191 * anonymous node, as if the rule had been written as the simple string. 192 * 193 * @param rule rule that will be aliased 194 * @param name target name for the alias 195 */ 196declare function alias(rule: RuleOrLiteral, name: string): AliasRule; 197 198/** 199 * Causes the given rule to appear as an alternative named node, for instance 200 * with `alias($.foo, $.bar)`, the aliased rule `foo` will appear as a named 201 * node called `bar`. 202 * 203 * @param rule rule that will be aliased 204 * @param symbol target symbol for the alias 205 */ 206declare function alias( 207 rule: RuleOrLiteral, 208 symbol: SymbolRule<string>, 209): AliasRule; 210 211/** 212 * Creates a blank rule, matching nothing. 213 */ 214declare function blank(): BlankRule; 215 216/** 217 * Assigns a field name to the child node(s) matched by the given rule. 218 * In the resulting syntax tree, you can then use that field name to 219 * access specific children. 220 * 221 * @param name name of the field 222 * @param rule rule the field should match 223 */ 224declare function field(name: string, rule: RuleOrLiteral): FieldRule; 225 226/** 227 * Creates a rule that matches one of a set of possible rules. The order 228 * of the arguments does not matter. This is analogous to the `|` (pipe) 229 * operator in EBNF notation. 230 * 231 * @param options possible rule choices 232 */ 233declare function choice(...options: RuleOrLiteral[]): ChoiceRule; 234 235/** 236 * Creates a rule that matches zero or one occurrence of a given rule. 237 * It is analogous to the `[x]` (square bracket) syntax in EBNF notation. 238 * 239 * @param value rule to be made optional 240 */ 241declare function optional(rule: RuleOrLiteral): ChoiceRule; 242 243/** 244 * Marks the given rule with a precedence which will be used to resolve LR(1) 245 * conflicts at parser-generation time. When two rules overlap in a way that 246 * represents either a true ambiguity or a _local_ ambiguity given one token 247 * of lookahead, Tree-sitter will try to resolve the conflict by matching the 248 * rule with the higher precedence. 249 * 250 * Precedence values can either be strings or numbers. When comparing rules 251 * with numerical precedence, higher numbers indicate higher precedences. To 252 * compare rules with string precedence, Tree-sitter uses the grammar's `precedences` 253 * field. 254 * 255 * rules is zero. This works similarly to the precedence directives in Yacc grammars. 256 * 257 * @param value precedence weight 258 * @param rule rule being weighted 259 * 260 * @see https://en.wikipedia.org/wiki/LR_parser#Conflicts_in_the_constructed_tables 261 * @see https://docs.oracle.com/cd/E19504-01/802-5880/6i9k05dh3/index.html 262 */ 263declare const prec: { 264 (value: string | number, rule: RuleOrLiteral): PrecRule; 265 266 /** 267 * Marks the given rule as left-associative (and optionally applies a 268 * numerical precedence). When an LR(1) conflict arises in which all of the 269 * rules have the same numerical precedence, Tree-sitter will consult the 270 * rules' associativity. If there is a left-associative rule, Tree-sitter 271 * will prefer matching a rule that ends _earlier_. This works similarly to 272 * associativity directives in Yacc grammars. 273 * 274 * @param value (optional) precedence weight 275 * @param rule rule to mark as left-associative 276 * 277 * @see https://docs.oracle.com/cd/E19504-01/802-5880/6i9k05dh3/index.html 278 */ 279 left(rule: RuleOrLiteral): PrecLeftRule; 280 left(value: string | number, rule: RuleOrLiteral): PrecLeftRule; 281 282 /** 283 * Marks the given rule as right-associative (and optionally applies a 284 * numerical precedence). When an LR(1) conflict arises in which all of the 285 * rules have the same numerical precedence, Tree-sitter will consult the 286 * rules' associativity. If there is a right-associative rule, Tree-sitter 287 * will prefer matching a rule that ends _later_. This works similarly to 288 * associativity directives in Yacc grammars. 289 * 290 * @param value (optional) precedence weight 291 * @param rule rule to mark as right-associative 292 * 293 * @see https://docs.oracle.com/cd/E19504-01/802-5880/6i9k05dh3/index.html 294 */ 295 right(rule: RuleOrLiteral): PrecRightRule; 296 right(value: string | number, rule: RuleOrLiteral): PrecRightRule; 297 298 /** 299 * Marks the given rule with a numerical precedence which will be used to 300 * resolve LR(1) conflicts at _runtime_ instead of parser-generation time. 301 * This is only necessary when handling a conflict dynamically using the 302 * `conflicts` field in the grammar, and when there is a genuine _ambiguity_: 303 * multiple rules correctly match a given piece of code. In that event, 304 * Tree-sitter compares the total dynamic precedence associated with each 305 * rule, and selects the one with the highest total. This is similar to 306 * dynamic precedence directives in Bison grammars. 307 * 308 * @param value precedence weight 309 * @param rule rule being weighted 310 * 311 * @see https://www.gnu.org/software/bison/manual/html_node/Generalized-LR-Parsing.html 312 */ 313 dynamic(value: string | number, rule: RuleOrLiteral): PrecDynamicRule; 314}; 315 316/** 317 * Creates a rule that matches _zero-or-more_ occurrences of a given rule. 318 * It is analogous to the `{x}` (curly brace) syntax in EBNF notation. This 319 * rule is implemented in terms of `repeat1` but is included because it 320 * is very commonly used. 321 * 322 * @param rule rule to repeat, zero or more times 323 */ 324declare function repeat(rule: RuleOrLiteral): RepeatRule; 325 326/** 327 * Creates a rule that matches one-or-more occurrences of a given rule. 328 * 329 * @param rule rule to repeat, one or more times 330 */ 331declare function repeat1(rule: RuleOrLiteral): Repeat1Rule; 332 333/** 334 * Overrides the global reserved word set for a given rule. The word set name 335 * should be defined in the `reserved` field in the grammar. 336 * 337 * @param wordset name of the reserved word set 338 * @param rule rule that will use the reserved word set 339 */ 340declare function reserved(wordset: string, rule: RuleOrLiteral): ReservedRule; 341 342/** 343 * Creates a rule that matches any number of other rules, one after another. 344 * It is analogous to simply writing multiple symbols next to each other 345 * in EBNF notation. 346 * 347 * @param rules ordered rules that comprise the sequence 348 */ 349declare function seq(...rules: RuleOrLiteral[]): SeqRule; 350 351/** 352 * Creates a symbol rule, representing another rule in the grammar by name. 353 * 354 * @param name name of the target rule 355 */ 356declare function sym<Name extends string>(name: Name): SymbolRule<Name>; 357 358/** 359 * Marks the given rule as producing only a single token. Tree-sitter's 360 * default is to treat each string or RegExp literal in the grammar as a 361 * separate token. Each token is matched separately by the lexer and 362 * returned as its own leaf node in the tree. The token function allows 363 * you to express a complex rule using the DSL functions (rather 364 * than as a single regular expression) but still have Tree-sitter treat 365 * it as a single token. 366 * 367 * @param rule rule to represent as a single token 368 */ 369declare const token: { 370 (rule: RuleOrLiteral): TokenRule; 371 372 /** 373 * Marks the given rule as producing an immediate token. This allows 374 * the parser to produce a different token based on whether or not 375 * there are `extras` preceding the token's main content. When there 376 * are _no_ leading `extras`, an immediate token is preferred over a 377 * normal token which would otherwise match. 378 * 379 * @param rule rule to represent as an immediate token 380 */ 381 immediate(rule: RuleOrLiteral): ImmediateTokenRule; 382}; 383 384/** 385 * Creates a new language grammar with the provided schema. 386 * 387 * @param options grammar options 388 */ 389declare function grammar<RuleName extends string>( 390 options: Grammar<RuleName>, 391): GrammarSchema<RuleName>; 392 393/** 394 * Extends an existing language grammar with the provided options, 395 * creating a new language. 396 * 397 * @param baseGrammar base grammar schema to extend from 398 * @param options grammar options for the new extended language 399 */ 400declare function grammar< 401 BaseGrammarRuleName extends string, 402 RuleName extends string 403>( 404 baseGrammar: GrammarSchema<BaseGrammarRuleName>, 405 options: Grammar<RuleName, BaseGrammarRuleName>, 406): GrammarSchema<RuleName | BaseGrammarRuleName>;