at dev 493 lines 11 kB view raw
1/** 2 * Recursive descent parser for Scryfall search syntax 3 * 4 * Grammar: 5 * query = or_expr 6 * or_expr = and_expr ("OR" and_expr)* 7 * and_expr = unary_expr+ 8 * unary_expr = "-" unary_expr | primary 9 * primary = "(" or_expr ")" | field_expr | name_expr 10 * field_expr = WORD operator value 11 * name_expr = EXACT_NAME | WORD | QUOTED | REGEX 12 */ 13 14import { getRegexPattern, tokenize } from "./lexer"; 15import { 16 type AndNode, 17 type ComparisonOp, 18 type ExactNameNode, 19 err, 20 FIELD_ALIASES, 21 type FieldName, 22 type FieldNode, 23 type FieldValue, 24 type NameNode, 25 type NotNode, 26 type OrNode, 27 ok, 28 type ParseError, 29 type Result, 30 type SearchNode, 31 type Span, 32 type Token, 33 type TokenType, 34} from "./types"; 35 36/** 37 * Parser class encapsulates parsing state 38 */ 39class Parser { 40 private tokens: Token[]; 41 private pos: number = 0; 42 private input: string; 43 44 constructor(tokens: Token[], input: string) { 45 this.tokens = tokens; 46 this.input = input; 47 } 48 49 private peek(): Token { 50 return this.tokens[this.pos] ?? this.tokens[this.tokens.length - 1]; 51 } 52 53 private previous(): Token { 54 return this.tokens[this.pos - 1]; 55 } 56 57 private isAtEnd(): boolean { 58 return this.peek().type === "EOF"; 59 } 60 61 private check(type: TokenType): boolean { 62 return this.peek().type === type; 63 } 64 65 private advance(): Token { 66 if (!this.isAtEnd()) { 67 this.pos++; 68 } 69 return this.previous(); 70 } 71 72 /** 73 * Match token type and advance if found. 74 * WARNING: This consumes the token. For speculative parsing, 75 * save pos before and restore on failure. 76 */ 77 private match(...types: TokenType[]): boolean { 78 for (const type of types) { 79 if (this.check(type)) { 80 this.advance(); 81 return true; 82 } 83 } 84 return false; 85 } 86 87 private makeError(message: string, span?: Span): ParseError { 88 return { 89 message, 90 span: span ?? this.peek().span, 91 input: this.input, 92 }; 93 } 94 95 /** 96 * Parse the token stream into an AST 97 */ 98 parse(): Result<SearchNode> { 99 if (this.isAtEnd()) { 100 return err(this.makeError("Empty query")); 101 } 102 103 const result = this.parseOrExpr(); 104 if (!result.ok) return result; 105 106 if (!this.isAtEnd()) { 107 return err(this.makeError(`Unexpected token: ${this.peek().value}`)); 108 } 109 110 return result; 111 } 112 113 /** 114 * or_expr = and_expr ("OR" and_expr)* 115 */ 116 private parseOrExpr(): Result<SearchNode> { 117 const firstResult = this.parseAndExpr(); 118 if (!firstResult.ok) return firstResult; 119 120 const children: SearchNode[] = [firstResult.value]; 121 const startSpan = firstResult.value.span; 122 123 while (this.match("OR")) { 124 const nextResult = this.parseAndExpr(); 125 if (!nextResult.ok) return nextResult; 126 children.push(nextResult.value); 127 } 128 129 if (children.length === 1) { 130 return ok(children[0]); 131 } 132 133 const endSpan = children[children.length - 1].span; 134 const node: OrNode = { 135 type: "OR", 136 children, 137 span: { start: startSpan.start, end: endSpan.end }, 138 }; 139 return ok(node); 140 } 141 142 /** 143 * and_expr = unary_expr+ 144 */ 145 private parseAndExpr(): Result<SearchNode> { 146 const children: SearchNode[] = []; 147 148 while (!this.isAtEnd() && !this.check("OR") && !this.check("RPAREN")) { 149 const result = this.parseUnaryExpr(); 150 if (!result.ok) return result; 151 children.push(result.value); 152 } 153 154 if (children.length === 0) { 155 return err(this.makeError("Expected expression")); 156 } 157 158 if (children.length === 1) { 159 return ok(children[0]); 160 } 161 162 const node: AndNode = { 163 type: "AND", 164 children, 165 span: { 166 start: children[0].span.start, 167 end: children[children.length - 1].span.end, 168 }, 169 }; 170 return ok(node); 171 } 172 173 /** 174 * unary_expr = "-" unary_expr | primary 175 */ 176 private parseUnaryExpr(): Result<SearchNode> { 177 if (this.match("NOT")) { 178 const notToken = this.previous(); 179 const result = this.parseUnaryExpr(); 180 if (!result.ok) return result; 181 182 const node: NotNode = { 183 type: "NOT", 184 child: result.value, 185 span: { start: notToken.span.start, end: result.value.span.end }, 186 }; 187 return ok(node); 188 } 189 190 return this.parsePrimary(); 191 } 192 193 /** 194 * primary = "(" or_expr ")" | field_expr | name_expr 195 */ 196 private parsePrimary(): Result<SearchNode> { 197 // Grouped expression 198 if (this.match("LPAREN")) { 199 const openParen = this.previous(); 200 const result = this.parseOrExpr(); 201 if (!result.ok) return result; 202 203 if (!this.match("RPAREN")) { 204 return err( 205 this.makeError("Expected closing parenthesis", openParen.span), 206 ); 207 } 208 209 // Update span to include parens 210 result.value.span = { 211 start: openParen.span.start, 212 end: this.previous().span.end, 213 }; 214 return ok(result.value); 215 } 216 217 // Try field expression first (with backtracking) 218 const savepoint = this.pos; 219 const fieldResult = this.tryParseFieldExpr(); 220 if (fieldResult) { 221 return fieldResult; 222 } 223 this.pos = savepoint; // Restore on failure 224 225 // Name expression 226 return this.parseNameExpr(); 227 } 228 229 /** 230 * Check if token type is a comparison operator 231 */ 232 private isOperator(type: TokenType): boolean { 233 return ( 234 type === "COLON" || 235 type === "EQUALS" || 236 type === "NOT_EQUALS" || 237 type === "LT" || 238 type === "GT" || 239 type === "LTE" || 240 type === "GTE" 241 ); 242 } 243 244 /** 245 * Try to parse a field expression, returning null if not a field expr. 246 * Caller must save/restore pos on null return. 247 */ 248 private tryParseFieldExpr(): Result<FieldNode> | null { 249 if (!this.check("WORD")) { 250 return null; 251 } 252 253 const fieldToken = this.peek(); 254 const fieldName = FIELD_ALIASES[fieldToken.value.toLowerCase()]; 255 256 if (!fieldName) { 257 return null; 258 } 259 260 // Check if followed by operator (lookahead) 261 const nextToken = this.tokens[this.pos + 1]; 262 if (!nextToken || !this.isOperator(nextToken.type)) { 263 return null; 264 } 265 266 // Commit to parsing field expression 267 this.advance(); // consume field name 268 const opToken = this.advance(); // consume operator 269 const operator = this.tokenToOperator(opToken.type); 270 271 const valueResult = this.parseFieldValue(fieldName); 272 if (!valueResult.ok) { 273 // Return the error - caller will restore pos 274 return valueResult as Result<FieldNode>; 275 } 276 277 const node: FieldNode = { 278 type: "FIELD", 279 field: fieldName, 280 operator, 281 value: valueResult.value, 282 span: { 283 start: fieldToken.span.start, 284 end: this.previous().span.end, 285 }, 286 }; 287 return ok(node); 288 } 289 290 /** 291 * Convert token type to comparison operator 292 */ 293 private tokenToOperator(type: TokenType): ComparisonOp { 294 switch (type) { 295 case "COLON": 296 return ":"; 297 case "EQUALS": 298 return "="; 299 case "NOT_EQUALS": 300 return "!="; 301 case "LT": 302 return "<"; 303 case "GT": 304 return ">"; 305 case "LTE": 306 return "<="; 307 case "GTE": 308 return ">="; 309 default: 310 return ":"; 311 } 312 } 313 314 /** 315 * Parse field value based on field type 316 */ 317 private parseFieldValue(field: FieldName): Result<FieldValue> { 318 // Regex value 319 if (this.match("REGEX")) { 320 const pattern = getRegexPattern(this.previous()); 321 if (!pattern) { 322 return err( 323 this.makeError("Invalid regex pattern", this.previous().span), 324 ); 325 } 326 return ok({ 327 kind: "regex", 328 pattern, 329 source: this.previous().value, 330 }); 331 } 332 333 // Quoted string 334 if (this.match("QUOTED")) { 335 return ok({ kind: "string", value: this.previous().value }); 336 } 337 338 // Negative number: NOT followed by WORD that looks numeric 339 if ( 340 this.check("NOT") && 341 this.isNumericField(field) && 342 this.pos + 1 < this.tokens.length && 343 this.tokens[this.pos + 1].type === "WORD" 344 ) { 345 const nextValue = this.tokens[this.pos + 1].value; 346 const num = parseFloat(nextValue); 347 if (!Number.isNaN(num)) { 348 this.advance(); // consume NOT 349 this.advance(); // consume WORD 350 return ok({ kind: "number", value: -num }); 351 } 352 } 353 354 // Word value 355 if (this.match("WORD")) { 356 const value = this.previous().value; 357 358 // For color fields, parse as colors - but identity can also take numeric values 359 // for counting colors (id>1 = "more than 1 color in identity") 360 if (field === "color" || field === "identity") { 361 // Check if value is purely numeric (for identity count queries like id>1) 362 if (field === "identity" && /^\d+$/.test(value)) { 363 const num = parseInt(value, 10); 364 return ok({ kind: "number", value: num }); 365 } 366 return ok({ 367 kind: "colors", 368 colors: this.parseColors(value), 369 }); 370 } 371 372 // For numeric fields, try to parse as number 373 if (this.isNumericField(field)) { 374 const num = parseFloat(value); 375 if (!Number.isNaN(num)) { 376 return ok({ kind: "number", value: num }); 377 } 378 // Could be * for power/toughness 379 if (value === "*") { 380 return ok({ kind: "string", value: "*" }); 381 } 382 } 383 384 return ok({ kind: "string", value }); 385 } 386 387 return err(this.makeError("Expected field value")); 388 } 389 390 /** 391 * Check if field expects numeric values 392 */ 393 private isNumericField(field: FieldName): boolean { 394 return ( 395 field === "manavalue" || 396 field === "power" || 397 field === "toughness" || 398 field === "loyalty" || 399 field === "defense" || 400 field === "year" 401 ); 402 } 403 404 /** 405 * Parse color string into color set 406 */ 407 private parseColors(input: string): Set<string> { 408 const colors = new Set<string>(); 409 const upper = input.toUpperCase(); 410 411 for (const char of upper) { 412 if ("WUBRGC".includes(char)) { 413 colors.add(char); 414 } 415 } 416 417 // Handle full names and aliases 418 const lower = input.toLowerCase(); 419 if (lower.includes("white")) colors.add("W"); 420 if (lower.includes("blue")) colors.add("U"); 421 if (lower.includes("black")) colors.add("B"); 422 if (lower.includes("red")) colors.add("R"); 423 if (lower.includes("green")) colors.add("G"); 424 if (lower.includes("colorless")) colors.add("C"); 425 426 return colors; 427 } 428 429 /** 430 * name_expr = EXACT_NAME | WORD | QUOTED | REGEX 431 */ 432 private parseNameExpr(): Result<SearchNode> { 433 const token = this.peek(); 434 435 // Exact name match 436 if (this.match("EXACT_NAME")) { 437 const node: ExactNameNode = { 438 type: "EXACT_NAME", 439 value: this.previous().value, 440 span: this.previous().span, 441 }; 442 return ok(node); 443 } 444 445 // Regex match 446 if (this.match("REGEX")) { 447 const prevToken = this.previous(); 448 const pattern = getRegexPattern(prevToken); 449 const node: NameNode = { 450 type: "NAME", 451 value: prevToken.value, 452 pattern: pattern ?? null, 453 span: prevToken.span, 454 }; 455 return ok(node); 456 } 457 458 // Quoted string match 459 if (this.match("QUOTED")) { 460 const node: NameNode = { 461 type: "NAME", 462 value: this.previous().value, 463 pattern: null, 464 span: this.previous().span, 465 }; 466 return ok(node); 467 } 468 469 // Word match 470 if (this.match("WORD")) { 471 const node: NameNode = { 472 type: "NAME", 473 value: this.previous().value, 474 pattern: null, 475 span: this.previous().span, 476 }; 477 return ok(node); 478 } 479 480 return err(this.makeError(`Unexpected token: ${token.value}`)); 481 } 482} 483 484/** 485 * Parse a search query string into an AST 486 */ 487export function parse(input: string): Result<SearchNode> { 488 const tokenResult = tokenize(input); 489 if (!tokenResult.ok) return tokenResult; 490 491 const parser = new Parser(tokenResult.value, input); 492 return parser.parse(); 493}