Diffdown is a real-time collaborative Markdown editor/previewer built on the AT Protocol diffdown.com
at main 303 lines 9.6 kB view raw
1import { Tree, NodePropSource, ParseWrapper, Parser, NodeSet, Input, TreeFragment, PartialParse, NodeType } from '@lezer/common'; 2 3/** 4A parse stack. These are used internally by the parser to track 5parsing progress. They also provide some properties and methods 6that external code such as a tokenizer can use to get information 7about the parse state. 8*/ 9declare class Stack { 10 /** 11 The input position up to which this stack has parsed. 12 */ 13 pos: number; 14 /** 15 The stack's current [context](#lr.ContextTracker) value, if 16 any. Its type will depend on the context tracker's type 17 parameter, or it will be `null` if there is no context 18 tracker. 19 */ 20 get context(): any; 21 /** 22 Check if the given term would be able to be shifted (optionally 23 after some reductions) on this stack. This can be useful for 24 external tokenizers that want to make sure they only provide a 25 given token when it applies. 26 */ 27 canShift(term: number): boolean; 28 /** 29 Get the parser used by this stack. 30 */ 31 get parser(): LRParser; 32 /** 33 Test whether a given dialect (by numeric ID, as exported from 34 the terms file) is enabled. 35 */ 36 dialectEnabled(dialectID: number): boolean; 37 private shiftContext; 38 private reduceContext; 39 private updateContext; 40} 41 42/** 43[Tokenizers](#lr.ExternalTokenizer) interact with the input 44through this interface. It presents the input as a stream of 45characters, tracking lookahead and hiding the complexity of 46[ranges](#common.Parser.parse^ranges) from tokenizer code. 47*/ 48declare class InputStream { 49 /** 50 Backup chunk 51 */ 52 private chunk2; 53 private chunk2Pos; 54 /** 55 The character code of the next code unit in the input, or -1 56 when the stream is at the end of the input. 57 */ 58 next: number; 59 /** 60 The current position of the stream. Note that, due to parses 61 being able to cover non-contiguous 62 [ranges](#common.Parser.startParse), advancing the stream does 63 not always mean its position moves a single unit. 64 */ 65 pos: number; 66 private rangeIndex; 67 private range; 68 /** 69 Look at a code unit near the stream position. `.peek(0)` equals 70 `.next`, `.peek(-1)` gives you the previous character, and so 71 on. 72 73 Note that looking around during tokenizing creates dependencies 74 on potentially far-away content, which may reduce the 75 effectiveness incremental parsing—when looking forward—or even 76 cause invalid reparses when looking backward more than 25 code 77 units, since the library does not track lookbehind. 78 */ 79 peek(offset: number): number; 80 /** 81 Accept a token. By default, the end of the token is set to the 82 current stream position, but you can pass an offset (relative to 83 the stream position) to change that. 84 */ 85 acceptToken(token: number, endOffset?: number): void; 86 /** 87 Accept a token ending at a specific given position. 88 */ 89 acceptTokenTo(token: number, endPos: number): void; 90 private getChunk; 91 private readNext; 92 /** 93 Move the stream forward N (defaults to 1) code units. Returns 94 the new value of [`next`](#lr.InputStream.next). 95 */ 96 advance(n?: number): number; 97 private setDone; 98} 99interface Tokenizer { 100} 101/** 102@hide 103*/ 104declare class LocalTokenGroup implements Tokenizer { 105 readonly precTable: number; 106 readonly elseToken?: number | undefined; 107 contextual: boolean; 108 fallback: boolean; 109 extend: boolean; 110 readonly data: Readonly<Uint16Array>; 111 constructor(data: Readonly<Uint16Array> | string, precTable: number, elseToken?: number | undefined); 112 token(input: InputStream, stack: Stack): void; 113} 114interface ExternalOptions { 115 /** 116 When set to true, mark this tokenizer as depending on the 117 current parse stack, which prevents its result from being cached 118 between parser actions at the same positions. 119 */ 120 contextual?: boolean; 121 /** 122 By defaults, when a tokenizer returns a token, that prevents 123 tokenizers with lower precedence from even running. When 124 `fallback` is true, the tokenizer is allowed to run when a 125 previous tokenizer returned a token that didn't match any of the 126 current state's actions. 127 */ 128 fallback?: boolean; 129 /** 130 When set to true, tokenizing will not stop after this tokenizer 131 has produced a token. (But it will still fail to reach this one 132 if a higher-precedence tokenizer produced a token.) 133 */ 134 extend?: boolean; 135} 136/** 137`@external tokens` declarations in the grammar should resolve to 138an instance of this class. 139*/ 140declare class ExternalTokenizer { 141 /** 142 Create a tokenizer. The first argument is the function that, 143 given an input stream, scans for the types of tokens it 144 recognizes at the stream's position, and calls 145 [`acceptToken`](#lr.InputStream.acceptToken) when it finds 146 one. 147 */ 148 constructor( 149 /** 150 @internal 151 */ 152 token: (input: InputStream, stack: Stack) => void, options?: ExternalOptions); 153} 154 155/** 156Context trackers are used to track stateful context (such as 157indentation in the Python grammar, or parent elements in the XML 158grammar) needed by external tokenizers. You declare them in a 159grammar file as `@context exportName from "module"`. 160 161Context values should be immutable, and can be updated (replaced) 162on shift or reduce actions. 163 164The export used in a `@context` declaration should be of this 165type. 166*/ 167declare class ContextTracker<T> { 168 /** 169 Define a context tracker. 170 */ 171 constructor(spec: { 172 /** 173 The initial value of the context at the start of the parse. 174 */ 175 start: T; 176 /** 177 Update the context when the parser executes a 178 [shift](https://en.wikipedia.org/wiki/LR_parser#Shift_and_reduce_actions) 179 action. 180 */ 181 shift?(context: T, term: number, stack: Stack, input: InputStream): T; 182 /** 183 Update the context when the parser executes a reduce action. 184 */ 185 reduce?(context: T, term: number, stack: Stack, input: InputStream): T; 186 /** 187 Update the context when the parser reuses a node from a tree 188 fragment. 189 */ 190 reuse?(context: T, node: Tree, stack: Stack, input: InputStream): T; 191 /** 192 Reduce a context value to a number (for cheap storage and 193 comparison). Only needed for strict contexts. 194 */ 195 hash?(context: T): number; 196 /** 197 By default, nodes can only be reused during incremental 198 parsing if they were created in the same context as the one in 199 which they are reused. Set this to false to disable that 200 check (and the overhead of storing the hashes). 201 */ 202 strict?: boolean; 203 }); 204} 205/** 206Configuration options when 207[reconfiguring](#lr.LRParser.configure) a parser. 208*/ 209interface ParserConfig { 210 /** 211 Node prop values to add to the parser's node set. 212 */ 213 props?: readonly NodePropSource[]; 214 /** 215 The name of the `@top` declaration to parse from. If not 216 specified, the first top rule declaration in the grammar is 217 used. 218 */ 219 top?: string; 220 /** 221 A space-separated string of dialects to enable. 222 */ 223 dialect?: string; 224 /** 225 Replace the given external tokenizers with new ones. 226 */ 227 tokenizers?: { 228 from: ExternalTokenizer; 229 to: ExternalTokenizer; 230 }[]; 231 /** 232 Replace external specializers with new ones. 233 */ 234 specializers?: { 235 from: (value: string, stack: Stack) => number; 236 to: (value: string, stack: Stack) => number; 237 }[]; 238 /** 239 Replace the context tracker with a new one. 240 */ 241 contextTracker?: ContextTracker<any>; 242 /** 243 When true, the parser will raise an exception, rather than run 244 its error-recovery strategies, when the input doesn't match the 245 grammar. 246 */ 247 strict?: boolean; 248 /** 249 Add a wrapper, which can extend parses created by this parser 250 with additional logic (usually used to add 251 [mixed-language](#common.parseMixed) parsing). 252 */ 253 wrap?: ParseWrapper; 254 /** 255 The maximum length of the TreeBuffers generated in the output 256 tree. Defaults to 1024. 257 */ 258 bufferLength?: number; 259} 260/** 261Holds the parse tables for a given grammar, as generated by 262`lezer-generator`, and provides [methods](#common.Parser) to parse 263content with. 264*/ 265declare class LRParser extends Parser { 266 /** 267 The nodes used in the trees emitted by this parser. 268 */ 269 readonly nodeSet: NodeSet; 270 createParse(input: Input, fragments: readonly TreeFragment[], ranges: readonly { 271 from: number; 272 to: number; 273 }[]): PartialParse; 274 /** 275 Configure the parser. Returns a new parser instance that has the 276 given settings modified. Settings not provided in `config` are 277 kept from the original parser. 278 */ 279 configure(config: ParserConfig): LRParser; 280 /** 281 Tells you whether any [parse wrappers](#lr.ParserConfig.wrap) 282 are registered for this parser. 283 */ 284 hasWrappers(): boolean; 285 /** 286 Returns the name associated with a given term. This will only 287 work for all terms when the parser was generated with the 288 `--names` option. By default, only the names of tagged terms are 289 stored. 290 */ 291 getName(term: number): string; 292 /** 293 The type of top node produced by the parser. 294 */ 295 get topNode(): NodeType; 296 /** 297 Used by the output of the parser generator. Not available to 298 user code. @hide 299 */ 300 static deserialize(spec: any): LRParser; 301} 302 303export { ContextTracker, ExternalTokenizer, InputStream, LRParser, LocalTokenGroup, type ParserConfig, Stack };