👁️
1/**
2 * Recursive descent parser for Scryfall search syntax
3 *
4 * Grammar:
5 * query = or_expr
6 * or_expr = and_expr ("OR" and_expr)*
7 * and_expr = unary_expr+
8 * unary_expr = "-" unary_expr | primary
9 * primary = "(" or_expr ")" | field_expr | name_expr
10 * field_expr = WORD operator value
11 * name_expr = EXACT_NAME | WORD | QUOTED | REGEX
12 */
13
14import { getRegexPattern, tokenize } from "./lexer";
15import {
16 type AndNode,
17 type ComparisonOp,
18 type ExactNameNode,
19 err,
20 FIELD_ALIASES,
21 type FieldName,
22 type FieldNode,
23 type FieldValue,
24 type NameNode,
25 type NotNode,
26 type OrNode,
27 ok,
28 type ParseError,
29 type Result,
30 type SearchNode,
31 type Span,
32 type Token,
33 type TokenType,
34} from "./types";
35
36/**
37 * Parser class encapsulates parsing state
38 */
39class Parser {
40 private tokens: Token[];
41 private pos: number = 0;
42 private input: string;
43
44 constructor(tokens: Token[], input: string) {
45 this.tokens = tokens;
46 this.input = input;
47 }
48
49 private peek(): Token {
50 return this.tokens[this.pos] ?? this.tokens[this.tokens.length - 1];
51 }
52
53 private previous(): Token {
54 return this.tokens[this.pos - 1];
55 }
56
57 private isAtEnd(): boolean {
58 return this.peek().type === "EOF";
59 }
60
61 private check(type: TokenType): boolean {
62 return this.peek().type === type;
63 }
64
65 private advance(): Token {
66 if (!this.isAtEnd()) {
67 this.pos++;
68 }
69 return this.previous();
70 }
71
72 /**
73 * Match token type and advance if found.
74 * WARNING: This consumes the token. For speculative parsing,
75 * save pos before and restore on failure.
76 */
77 private match(...types: TokenType[]): boolean {
78 for (const type of types) {
79 if (this.check(type)) {
80 this.advance();
81 return true;
82 }
83 }
84 return false;
85 }
86
87 private makeError(message: string, span?: Span): ParseError {
88 return {
89 message,
90 span: span ?? this.peek().span,
91 input: this.input,
92 };
93 }
94
95 /**
96 * Parse the token stream into an AST
97 */
98 parse(): Result<SearchNode> {
99 if (this.isAtEnd()) {
100 return err(this.makeError("Empty query"));
101 }
102
103 const result = this.parseOrExpr();
104 if (!result.ok) return result;
105
106 if (!this.isAtEnd()) {
107 return err(this.makeError(`Unexpected token: ${this.peek().value}`));
108 }
109
110 return result;
111 }
112
113 /**
114 * or_expr = and_expr ("OR" and_expr)*
115 */
116 private parseOrExpr(): Result<SearchNode> {
117 const firstResult = this.parseAndExpr();
118 if (!firstResult.ok) return firstResult;
119
120 const children: SearchNode[] = [firstResult.value];
121 const startSpan = firstResult.value.span;
122
123 while (this.match("OR")) {
124 const nextResult = this.parseAndExpr();
125 if (!nextResult.ok) return nextResult;
126 children.push(nextResult.value);
127 }
128
129 if (children.length === 1) {
130 return ok(children[0]);
131 }
132
133 const endSpan = children[children.length - 1].span;
134 const node: OrNode = {
135 type: "OR",
136 children,
137 span: { start: startSpan.start, end: endSpan.end },
138 };
139 return ok(node);
140 }
141
142 /**
143 * and_expr = unary_expr+
144 */
145 private parseAndExpr(): Result<SearchNode> {
146 const children: SearchNode[] = [];
147
148 while (!this.isAtEnd() && !this.check("OR") && !this.check("RPAREN")) {
149 const result = this.parseUnaryExpr();
150 if (!result.ok) return result;
151 children.push(result.value);
152 }
153
154 if (children.length === 0) {
155 return err(this.makeError("Expected expression"));
156 }
157
158 if (children.length === 1) {
159 return ok(children[0]);
160 }
161
162 const node: AndNode = {
163 type: "AND",
164 children,
165 span: {
166 start: children[0].span.start,
167 end: children[children.length - 1].span.end,
168 },
169 };
170 return ok(node);
171 }
172
173 /**
174 * unary_expr = "-" unary_expr | primary
175 */
176 private parseUnaryExpr(): Result<SearchNode> {
177 if (this.match("NOT")) {
178 const notToken = this.previous();
179 const result = this.parseUnaryExpr();
180 if (!result.ok) return result;
181
182 const node: NotNode = {
183 type: "NOT",
184 child: result.value,
185 span: { start: notToken.span.start, end: result.value.span.end },
186 };
187 return ok(node);
188 }
189
190 return this.parsePrimary();
191 }
192
193 /**
194 * primary = "(" or_expr ")" | field_expr | name_expr
195 */
196 private parsePrimary(): Result<SearchNode> {
197 // Grouped expression
198 if (this.match("LPAREN")) {
199 const openParen = this.previous();
200 const result = this.parseOrExpr();
201 if (!result.ok) return result;
202
203 if (!this.match("RPAREN")) {
204 return err(
205 this.makeError("Expected closing parenthesis", openParen.span),
206 );
207 }
208
209 // Update span to include parens
210 result.value.span = {
211 start: openParen.span.start,
212 end: this.previous().span.end,
213 };
214 return ok(result.value);
215 }
216
217 // Try field expression first (with backtracking)
218 const savepoint = this.pos;
219 const fieldResult = this.tryParseFieldExpr();
220 if (fieldResult) {
221 return fieldResult;
222 }
223 this.pos = savepoint; // Restore on failure
224
225 // Name expression
226 return this.parseNameExpr();
227 }
228
229 /**
230 * Check if token type is a comparison operator
231 */
232 private isOperator(type: TokenType): boolean {
233 return (
234 type === "COLON" ||
235 type === "EQUALS" ||
236 type === "NOT_EQUALS" ||
237 type === "LT" ||
238 type === "GT" ||
239 type === "LTE" ||
240 type === "GTE"
241 );
242 }
243
244 /**
245 * Try to parse a field expression, returning null if not a field expr.
246 * Caller must save/restore pos on null return.
247 */
248 private tryParseFieldExpr(): Result<FieldNode> | null {
249 if (!this.check("WORD")) {
250 return null;
251 }
252
253 const fieldToken = this.peek();
254 const fieldName = FIELD_ALIASES[fieldToken.value.toLowerCase()];
255
256 if (!fieldName) {
257 return null;
258 }
259
260 // Check if followed by operator (lookahead)
261 const nextToken = this.tokens[this.pos + 1];
262 if (!nextToken || !this.isOperator(nextToken.type)) {
263 return null;
264 }
265
266 // Commit to parsing field expression
267 this.advance(); // consume field name
268 const opToken = this.advance(); // consume operator
269 const operator = this.tokenToOperator(opToken.type);
270
271 const valueResult = this.parseFieldValue(fieldName);
272 if (!valueResult.ok) {
273 // Return the error - caller will restore pos
274 return valueResult as Result<FieldNode>;
275 }
276
277 const node: FieldNode = {
278 type: "FIELD",
279 field: fieldName,
280 operator,
281 value: valueResult.value,
282 span: {
283 start: fieldToken.span.start,
284 end: this.previous().span.end,
285 },
286 };
287 return ok(node);
288 }
289
290 /**
291 * Convert token type to comparison operator
292 */
293 private tokenToOperator(type: TokenType): ComparisonOp {
294 switch (type) {
295 case "COLON":
296 return ":";
297 case "EQUALS":
298 return "=";
299 case "NOT_EQUALS":
300 return "!=";
301 case "LT":
302 return "<";
303 case "GT":
304 return ">";
305 case "LTE":
306 return "<=";
307 case "GTE":
308 return ">=";
309 default:
310 return ":";
311 }
312 }
313
314 /**
315 * Parse field value based on field type
316 */
317 private parseFieldValue(field: FieldName): Result<FieldValue> {
318 // Regex value
319 if (this.match("REGEX")) {
320 const pattern = getRegexPattern(this.previous());
321 if (!pattern) {
322 return err(
323 this.makeError("Invalid regex pattern", this.previous().span),
324 );
325 }
326 return ok({
327 kind: "regex",
328 pattern,
329 source: this.previous().value,
330 });
331 }
332
333 // Quoted string
334 if (this.match("QUOTED")) {
335 return ok({ kind: "string", value: this.previous().value });
336 }
337
338 // Negative number: NOT followed by WORD that looks numeric
339 if (
340 this.check("NOT") &&
341 this.isNumericField(field) &&
342 this.pos + 1 < this.tokens.length &&
343 this.tokens[this.pos + 1].type === "WORD"
344 ) {
345 const nextValue = this.tokens[this.pos + 1].value;
346 const num = parseFloat(nextValue);
347 if (!Number.isNaN(num)) {
348 this.advance(); // consume NOT
349 this.advance(); // consume WORD
350 return ok({ kind: "number", value: -num });
351 }
352 }
353
354 // Word value
355 if (this.match("WORD")) {
356 const value = this.previous().value;
357
358 // For color fields, parse as colors - but identity can also take numeric values
359 // for counting colors (id>1 = "more than 1 color in identity")
360 if (field === "color" || field === "identity") {
361 // Check if value is purely numeric (for identity count queries like id>1)
362 if (field === "identity" && /^\d+$/.test(value)) {
363 const num = parseInt(value, 10);
364 return ok({ kind: "number", value: num });
365 }
366 return ok({
367 kind: "colors",
368 colors: this.parseColors(value),
369 });
370 }
371
372 // For numeric fields, try to parse as number
373 if (this.isNumericField(field)) {
374 const num = parseFloat(value);
375 if (!Number.isNaN(num)) {
376 return ok({ kind: "number", value: num });
377 }
378 // Could be * for power/toughness
379 if (value === "*") {
380 return ok({ kind: "string", value: "*" });
381 }
382 }
383
384 return ok({ kind: "string", value });
385 }
386
387 return err(this.makeError("Expected field value"));
388 }
389
390 /**
391 * Check if field expects numeric values
392 */
393 private isNumericField(field: FieldName): boolean {
394 return (
395 field === "manavalue" ||
396 field === "power" ||
397 field === "toughness" ||
398 field === "loyalty" ||
399 field === "defense" ||
400 field === "year"
401 );
402 }
403
404 /**
405 * Parse color string into color set
406 */
407 private parseColors(input: string): Set<string> {
408 const colors = new Set<string>();
409 const upper = input.toUpperCase();
410
411 for (const char of upper) {
412 if ("WUBRGC".includes(char)) {
413 colors.add(char);
414 }
415 }
416
417 // Handle full names and aliases
418 const lower = input.toLowerCase();
419 if (lower.includes("white")) colors.add("W");
420 if (lower.includes("blue")) colors.add("U");
421 if (lower.includes("black")) colors.add("B");
422 if (lower.includes("red")) colors.add("R");
423 if (lower.includes("green")) colors.add("G");
424 if (lower.includes("colorless")) colors.add("C");
425
426 return colors;
427 }
428
429 /**
430 * name_expr = EXACT_NAME | WORD | QUOTED | REGEX
431 */
432 private parseNameExpr(): Result<SearchNode> {
433 const token = this.peek();
434
435 // Exact name match
436 if (this.match("EXACT_NAME")) {
437 const node: ExactNameNode = {
438 type: "EXACT_NAME",
439 value: this.previous().value,
440 span: this.previous().span,
441 };
442 return ok(node);
443 }
444
445 // Regex match
446 if (this.match("REGEX")) {
447 const prevToken = this.previous();
448 const pattern = getRegexPattern(prevToken);
449 const node: NameNode = {
450 type: "NAME",
451 value: prevToken.value,
452 pattern: pattern ?? null,
453 span: prevToken.span,
454 };
455 return ok(node);
456 }
457
458 // Quoted string match
459 if (this.match("QUOTED")) {
460 const node: NameNode = {
461 type: "NAME",
462 value: this.previous().value,
463 pattern: null,
464 span: this.previous().span,
465 };
466 return ok(node);
467 }
468
469 // Word match
470 if (this.match("WORD")) {
471 const node: NameNode = {
472 type: "NAME",
473 value: this.previous().value,
474 pattern: null,
475 span: this.previous().span,
476 };
477 return ok(node);
478 }
479
480 return err(this.makeError(`Unexpected token: ${token.value}`));
481 }
482}
483
484/**
485 * Parse a search query string into an AST
486 */
487export function parse(input: string): Result<SearchNode> {
488 const tokenResult = tokenize(input);
489 if (!tokenResult.ok) return tokenResult;
490
491 const parser = new Parser(tokenResult.value, input);
492 return parser.parse();
493}