import { describe, it, expect } from "vitest"; import { parsePlaces, Place } from "../src/parser"; import fc from "fast-check"; // ─── Contract 1: Top-level bullets start new Place blocks ────────────── describe("Contract 1: Top-level bullets", () => { it("parses a single asterisk bullet", () => { const result = parsePlaces("* Sagrada Familia"); expect(result).toHaveLength(1); expect(result[0].name).toBe("Sagrada Familia"); }); it("parses a single dash bullet", () => { const result = parsePlaces("- Sagrada Familia"); expect(result).toHaveLength(1); expect(result[0].name).toBe("Sagrada Familia"); }); it("parses multiple top-level bullets", () => { const result = parsePlaces("* Place A\n* Place B\n* Place C"); expect(result).toHaveLength(3); expect(result[0].name).toBe("Place A"); expect(result[1].name).toBe("Place B"); expect(result[2].name).toBe("Place C"); }); it("supports mixed * and - bullet styles", () => { const result = parsePlaces("* Place A\n- Place B"); expect(result).toHaveLength(2); expect(result[0].name).toBe("Place A"); expect(result[1].name).toBe("Place B"); }); it("does not treat + as a bullet marker", () => { const result = parsePlaces("+ Not a place"); expect(result).toHaveLength(0); }); it("does not treat ordered lists as bullets", () => { const result = parsePlaces("1. Not a place\n2. Also not"); expect(result).toHaveLength(0); }); it("requires bullet at column 0", () => { const result = parsePlaces(" * Indented bullet"); // An indented bullet at the start (no preceding top-level bullet) is ignored expect(result).toHaveLength(0); }); }); // ─── Contract 2: Sub-bullets belong to preceding top-level bullet ────── describe("Contract 2: Sub-bullets", () => { it("assigns tab-indented sub-bullets to the preceding top-level", () => { const result = parsePlaces("* Place A\n\t* Sub note"); expect(result).toHaveLength(1); expect(result[0].notes).toContain("Sub note"); }); it("assigns space-indented sub-bullets to the preceding top-level", () => { const result = parsePlaces("* Place A\n * Sub note"); expect(result).toHaveLength(1); expect(result[0].notes).toContain("Sub note"); }); it("assigns 4-space-indented sub-bullets", () => { const result = parsePlaces("* Place A\n * Sub note"); expect(result).toHaveLength(1); expect(result[0].notes).toContain("Sub note"); }); it("treats deeply nested bullets as sub-bullets of current block", () => { const result = parsePlaces("* Place A\n\t* Level 1\n\t\t* Level 2"); expect(result).toHaveLength(1); expect(result[0].notes).toContain("Level 1"); expect(result[0].notes).toContain("Level 2"); }); it("supports dash sub-bullets under asterisk top-level", () => { const result = parsePlaces("* Place A\n\t- Sub note"); expect(result).toHaveLength(1); expect(result[0].notes).toContain("Sub note"); }); it("ignores sub-bullets with no preceding top-level bullet", () => { const result = parsePlaces("\t* Orphan sub-bullet\n* Place A"); expect(result).toHaveLength(1); expect(result[0].name).toBe("Place A"); expect(result[0].notes).toHaveLength(0); }); }); // ─── Contract 3: Structured field parsing ────────────────────────────── describe("Contract 3: Structured fields", () => { it("parses key: value sub-bullets into fields", () => { const result = parsePlaces("* Place A\n\t* category: Architecture"); expect(result[0].fields).toEqual({ category: "Architecture" }); }); it("stores keys as lowercase-trimmed", () => { const result = parsePlaces("* Place A\n\t* Category: Art"); expect(result[0].fields).toEqual({ category: "Art" }); }); it("trims values", () => { const result = parsePlaces("* Place A\n\t* category: Art "); expect(result[0].fields).toEqual({ category: "Art" }); }); it("parses multiple fields", () => { const result = parsePlaces( "* Place A\n\t* category: Art\n\t* rating: 5" ); expect(result[0].fields).toEqual({ category: "Art", rating: "5" }); }); it("last field wins on duplicate keys", () => { const result = parsePlaces( "* Place A\n\t* category: Art\n\t* category: Architecture" ); expect(result[0].fields).toEqual({ category: "Architecture" }); }); it("does not treat key with spaces as a field", () => { const result = parsePlaces("* Place A\n\t* some key: value"); expect(result[0].fields).toEqual({}); expect(result[0].notes).toContain("some key: value"); }); it("does not treat key:value (no space after colon) as a field", () => { const result = parsePlaces("* Place A\n\t* category:Art"); expect(result[0].fields).toEqual({}); expect(result[0].notes).toContain("category:Art"); }); it("does not treat : value (no key) as a field", () => { const result = parsePlaces("* Place A\n\t* : value"); expect(result[0].fields).toEqual({}); expect(result[0].notes).toContain(": value"); }); }); // ─── Contract 4: Geo field parsing ───────────────────────────────────── describe("Contract 4: Geo field", () => { it("sets lat/lng for valid geo coordinates", () => { const result = parsePlaces("* Place A\n\t* geo: 41.403600,2.174400"); expect(result[0].lat).toBe(41.4036); expect(result[0].lng).toBe(2.1744); expect(result[0].fields.geo).toBe("41.403600,2.174400"); }); it("handles negative coordinates (southern/western hemispheres)", () => { const result = parsePlaces("* Place A\n\t* geo: -33.8688,151.2093"); expect(result[0].lat).toBe(-33.8688); expect(result[0].lng).toBe(151.2093); }); it("handles space after comma in geo", () => { const result = parsePlaces("* Place A\n\t* geo: 41.4036, 2.1744"); expect(result[0].lat).toBe(41.4036); expect(result[0].lng).toBe(2.1744); }); it("handles integer coordinates", () => { const result = parsePlaces("* Place A\n\t* geo: 41,2"); expect(result[0].lat).toBe(41); expect(result[0].lng).toBe(2); }); it("stores malformed geo in fields but lat/lng remain undefined", () => { const result = parsePlaces("* Place A\n\t* geo: abc,def"); expect(result[0].fields.geo).toBe("abc,def"); expect(result[0].lat).toBeUndefined(); expect(result[0].lng).toBeUndefined(); }); it("rejects out-of-range lat (>90)", () => { const result = parsePlaces("* Place A\n\t* geo: 999,999"); expect(result[0].fields.geo).toBe("999,999"); expect(result[0].lat).toBeUndefined(); expect(result[0].lng).toBeUndefined(); }); it("rejects out-of-range lat (<-90)", () => { const result = parsePlaces("* Place A\n\t* geo: -91,0"); expect(result[0].lat).toBeUndefined(); expect(result[0].lng).toBeUndefined(); }); it("rejects out-of-range lng (>180)", () => { const result = parsePlaces("* Place A\n\t* geo: 0,181"); expect(result[0].lat).toBeUndefined(); expect(result[0].lng).toBeUndefined(); }); it("rejects out-of-range lng (<-180)", () => { const result = parsePlaces("* Place A\n\t* geo: 0,-181"); expect(result[0].lat).toBeUndefined(); expect(result[0].lng).toBeUndefined(); }); it("rejects trailing dot (e.g., 41.,2.)", () => { const result = parsePlaces("* Place A\n\t* geo: 41.,2."); expect(result[0].lat).toBeUndefined(); expect(result[0].lng).toBeUndefined(); }); it("accepts boundary values (90, 180)", () => { const result = parsePlaces("* Place A\n\t* geo: 90,180"); expect(result[0].lat).toBe(90); expect(result[0].lng).toBe(180); }); it("accepts boundary values (-90, -180)", () => { const result = parsePlaces("* Place A\n\t* geo: -90,-180"); expect(result[0].lat).toBe(-90); expect(result[0].lng).toBe(-180); }); it("last geo field wins when multiple geo sub-bullets", () => { const result = parsePlaces( "* Place A\n\t* geo: 41.4036,2.1744\n\t* geo: 48.8606,2.3376" ); expect(result[0].lat).toBe(48.8606); expect(result[0].lng).toBe(2.3376); }); }); // ─── Contract 5: Freeform notes ──────────────────────────────────────── describe("Contract 5: Freeform notes", () => { it("stores non-field sub-bullets as notes", () => { const result = parsePlaces("* Place A\n\t* Amazing architecture"); expect(result[0].notes).toEqual(["Amazing architecture"]); }); it("strips bullet prefix from notes", () => { const result = parsePlaces("* Place A\n\t* A note\n\t- Another note"); expect(result[0].notes).toEqual(["A note", "Another note"]); }); it("trims note text", () => { const result = parsePlaces("* Place A\n\t* A note "); expect(result[0].notes).toEqual(["A note"]); }); it("preserves order of notes", () => { const result = parsePlaces("* Place A\n\t* Note 1\n\t* Note 2\n\t* Note 3"); expect(result[0].notes).toEqual(["Note 1", "Note 2", "Note 3"]); }); it("keeps notes separate from fields", () => { const result = parsePlaces( "* Place A\n\t* A note\n\t* category: Art\n\t* Another note" ); expect(result[0].notes).toEqual(["A note", "Another note"]); expect(result[0].fields).toEqual({ category: "Art" }); }); }); // ─── Contract 6: Markdown links ──────────────────────────────────────── describe("Contract 6: Markdown links", () => { it("extracts name and url from markdown link", () => { const result = parsePlaces( "* [The Louvre](https://en.wikipedia.org/wiki/Louvre)" ); expect(result[0].name).toBe("The Louvre"); expect(result[0].url).toBe("https://en.wikipedia.org/wiki/Louvre"); }); it("ignores title attribute in markdown link", () => { const result = parsePlaces( '* [The Louvre](https://example.com "A museum")' ); expect(result[0].name).toBe("The Louvre"); expect(result[0].url).toBe("https://example.com"); }); it("excludes empty text markdown links", () => { const result = parsePlaces("* [](https://example.com)"); expect(result).toHaveLength(0); }); it("excludes whitespace-only text markdown links", () => { const result = parsePlaces("* [ ](https://example.com)"); expect(result).toHaveLength(0); }); it("treats markdown link with trailing text as plain text", () => { const result = parsePlaces("* [Name](https://example.com) extra text"); expect(result[0].name).toBe("[Name](https://example.com) extra text"); expect(result[0].url).toBeUndefined(); }); it("sets url to undefined for empty URL", () => { const result = parsePlaces("* [Some Place]()"); expect(result[0].name).toBe("Some Place"); expect(result[0].url).toBeUndefined(); }); it("falls back to plain text for URLs containing literal quotes (known limitation)", () => { // URLs containing literal double-quote characters break MD_LINK_RE // and fall through to plain text parsing. const result = parsePlaces( '* [Place](https://example.com/q="test")' ); expect(result).toHaveLength(1); // The regex can't distinguish URL-with-quotes from title syntax, // so it falls back to plain text expect(result[0].url).toBeUndefined(); }); it("falls back to plain text for URLs with parentheses (known limitation)", () => { // URLs with parentheses (e.g., Wikipedia disambiguation) break MD_LINK_RE // and fall through to plain text parsing. This is a known limitation. const result = parsePlaces( "* [Place](https://en.wikipedia.org/wiki/Place_(disambiguation))" ); expect(result).toHaveLength(1); // Falls back to plain text since the regex can't handle parens in URL expect(result[0].name).toBe( "[Place](https://en.wikipedia.org/wiki/Place_(disambiguation))" ); expect(result[0].url).toBeUndefined(); }); }); // ─── Contract 7: Wiki-links ─────────────────────────────────────────── describe("Contract 7: Wiki-links", () => { it("extracts name from wiki-link", () => { const result = parsePlaces("* [[Page Name]]"); expect(result[0].name).toBe("Page Name"); expect(result[0].url).toBeUndefined(); }); it("uses display name from piped wiki-link", () => { const result = parsePlaces("* [[Target|Display Name]]"); expect(result[0].name).toBe("Display Name"); expect(result[0].url).toBeUndefined(); }); it("excludes empty wiki-links", () => { const result = parsePlaces("* [[]]"); expect(result).toHaveLength(0); }); it("excludes wiki-link with pipe but empty display name", () => { const result = parsePlaces("* [[Target|]]"); expect(result).toHaveLength(0); }); it("handles multiple pipes in wiki-link (everything after first pipe is display)", () => { const result = parsePlaces("* [[Target|Display|Extra]]"); expect(result[0].name).toBe("Display|Extra"); }); it("treats wiki-link with trailing text as plain text", () => { const result = parsePlaces("* [[Page Name]] extra text"); expect(result[0].name).toBe("[[Page Name]] extra text"); expect(result[0].url).toBeUndefined(); }); }); // ─── Contract 8: Plain text names ────────────────────────────────────── describe("Contract 8: Plain text names", () => { it("sets name to trimmed bullet content", () => { const result = parsePlaces("* Blue Bottle Coffee, Tokyo "); expect(result[0].name).toBe("Blue Bottle Coffee, Tokyo"); }); it("preserves inline markdown in names", () => { const result = parsePlaces("* **Bold Place**"); expect(result[0].name).toBe("**Bold Place**"); }); it("preserves strikethrough in names", () => { const result = parsePlaces("* ~~Closed Restaurant~~"); expect(result[0].name).toBe("~~Closed Restaurant~~"); }); }); // ─── Contract 9: Line numbers ────────────────────────────────────────── describe("Contract 9: Line numbers (0-based)", () => { it("sets startLine and endLine for a single bullet", () => { const result = parsePlaces("* Place A"); expect(result[0].startLine).toBe(0); expect(result[0].endLine).toBe(0); }); it("sets endLine to last sub-bullet line", () => { const result = parsePlaces("* Place A\n\t* Note 1\n\t* Note 2"); expect(result[0].startLine).toBe(0); expect(result[0].endLine).toBe(2); }); it("handles multiple places with correct line ranges", () => { const content = "* Place A\n\t* Note A\n* Place B\n\t* Note B1\n\t* Note B2"; const result = parsePlaces(content); expect(result[0].startLine).toBe(0); expect(result[0].endLine).toBe(1); expect(result[1].startLine).toBe(2); expect(result[1].endLine).toBe(4); }); it("handles blank lines between places (dead zones)", () => { const content = "* Place A\n\t* Note A\n\n* Place B"; const result = parsePlaces(content); expect(result[0].startLine).toBe(0); expect(result[0].endLine).toBe(1); // dead zone line 2 not included expect(result[1].startLine).toBe(3); expect(result[1].endLine).toBe(3); }); it("endLine includes deeply nested descendants", () => { const content = "* Place A\n\t* Level 1\n\t\t* Level 2\n\t\t\t* Level 3"; const result = parsePlaces(content); expect(result[0].endLine).toBe(3); }); }); // ─── Contract 10: Empty/whitespace names excluded ────────────────────── describe("Contract 10: Empty names excluded", () => { it("excludes bullet with no text", () => { const result = parsePlaces("* "); expect(result).toHaveLength(0); }); it("excludes bullet with only whitespace", () => { const result = parsePlaces("* "); expect(result).toHaveLength(0); }); it("excludes dash bullet with no text", () => { const result = parsePlaces("- "); expect(result).toHaveLength(0); }); }); // ─── Contract 11: Non-bullet lines ignored ───────────────────────────── describe("Contract 11: Non-bullet lines ignored", () => { it("ignores headings", () => { const result = parsePlaces("# Heading\n* Place A"); expect(result).toHaveLength(1); expect(result[0].name).toBe("Place A"); }); it("ignores paragraphs", () => { const result = parsePlaces("Some text\n* Place A"); expect(result).toHaveLength(1); }); it("ignores blank lines", () => { const result = parsePlaces("\n\n* Place A\n\n"); expect(result).toHaveLength(1); }); it("dead zone lines don't affect endLine of preceding place", () => { const content = "* Place A\n\t* Note\nSome paragraph\n\n* Place B"; const result = parsePlaces(content); expect(result[0].endLine).toBe(1); // Not 2 or 3 }); }); // ─── Contract 13: Duplicate field keys ───────────────────────────────── describe("Contract 13: Duplicate field keys — last wins", () => { it("last value wins for duplicate keys", () => { const result = parsePlaces( "* Place A\n\t* rating: 3\n\t* rating: 5" ); expect(result[0].fields.rating).toBe("5"); }); }); // ─── Edge Cases ──────────────────────────────────────────────────────── describe("Edge cases", () => { it("returns empty array for empty string", () => { expect(parsePlaces("")).toEqual([]); }); it("returns empty array for string with no bullets", () => { expect(parsePlaces("Just some text\nAnother line")).toEqual([]); }); it("handles Windows line endings (\\r\\n)", () => { const result = parsePlaces("* Place A\r\n\t* Note\r\n* Place B"); expect(result).toHaveLength(2); expect(result[0].name).toBe("Place A"); expect(result[0].notes).toEqual(["Note"]); expect(result[1].name).toBe("Place B"); }); it("handles tab-indented sub-bullets", () => { const result = parsePlaces("* Place A\n\t* category: Art"); expect(result[0].fields.category).toBe("Art"); }); it("handles space-indented sub-bullets", () => { const result = parsePlaces("* Place A\n * category: Art"); expect(result[0].fields.category).toBe("Art"); }); it("handles the full example from the spec", () => { const content = [ "* Sagrada Familia", "\t* Amazing architecture, book tickets in advance", "\t* category: Architecture", "\t* geo: 41.403600,2.174400", "* [The Louvre](https://en.wikipedia.org/wiki/Louvre)", "\t* Must see the Mona Lisa", "\t* category: Art", "\t* geo: 48.860600,2.337600", "* Blue Bottle Coffee, Tokyo", ].join("\n"); const result = parsePlaces(content); expect(result).toHaveLength(3); // Sagrada Familia expect(result[0].name).toBe("Sagrada Familia"); expect(result[0].url).toBeUndefined(); expect(result[0].notes).toEqual([ "Amazing architecture, book tickets in advance", ]); expect(result[0].fields.category).toBe("Architecture"); expect(result[0].lat).toBe(41.4036); expect(result[0].lng).toBe(2.1744); expect(result[0].startLine).toBe(0); expect(result[0].endLine).toBe(3); // The Louvre expect(result[1].name).toBe("The Louvre"); expect(result[1].url).toBe("https://en.wikipedia.org/wiki/Louvre"); expect(result[1].notes).toEqual(["Must see the Mona Lisa"]); expect(result[1].fields.category).toBe("Art"); expect(result[1].lat).toBe(48.8606); expect(result[1].lng).toBe(2.3376); expect(result[1].startLine).toBe(4); expect(result[1].endLine).toBe(7); // Blue Bottle Coffee expect(result[2].name).toBe("Blue Bottle Coffee, Tokyo"); expect(result[2].url).toBeUndefined(); expect(result[2].fields).toEqual({}); expect(result[2].notes).toEqual([]); expect(result[2].lat).toBeUndefined(); expect(result[2].lng).toBeUndefined(); expect(result[2].startLine).toBe(8); expect(result[2].endLine).toBe(8); }); it("accepts geo: 0,0 (Null Island)", () => { const result = parsePlaces("* Place A\n\t* geo: 0,0"); expect(result[0].lat).toBe(0); expect(result[0].lng).toBe(0); }); it("ignores single-space-indented bullet (dead zone)", () => { const result = parsePlaces("* Place A\n * Not a sub-bullet"); expect(result).toHaveLength(1); expect(result[0].notes).toHaveLength(0); expect(result[0].endLine).toBe(0); }); it("rejects geo with space before comma", () => { const result = parsePlaces("* Place A\n\t* geo: 41.4036 ,2.1744"); expect(result[0].lat).toBeUndefined(); expect(result[0].lng).toBeUndefined(); }); it("rejects geo with leading dot and no digit (.5,.5)", () => { const result = parsePlaces("* Place A\n\t* geo: .5,.5"); expect(result[0].lat).toBeUndefined(); expect(result[0].lng).toBeUndefined(); }); it("does not add empty strings to notes from blank sub-bullets", () => { const result = parsePlaces("* Place A\n\t* "); expect(result[0].notes).toEqual([]); }); it("accepts digit-only field keys (spec: word characters include digits)", () => { const result = parsePlaces("* Place A\n\t* 2024: A great year"); expect(result[0].fields["2024"]).toBe("A great year"); expect(result[0].notes).toHaveLength(0); }); it("handles bare CR line endings (old Mac)", () => { const result = parsePlaces("* Place A\r\t* Note\r* Place B"); expect(result).toHaveLength(2); expect(result[0].notes).toEqual(["Note"]); expect(result[1].name).toBe("Place B"); }); it("field key 'constructor' doesn't collide with Object prototype", () => { const result = parsePlaces("* Place A\n\t* constructor: value"); expect(result[0].fields.constructor).toBe("value"); expect(typeof result[0].fields.constructor).toBe("string"); }); it("field key 'toString' doesn't collide with Object prototype", () => { const result = parsePlaces("* Place A\n\t* tostring: value"); expect(result[0].fields.tostring).toBe("value"); expect(typeof result[0].fields.tostring).toBe("string"); }); it("accepts underscore in field keys", () => { const result = parsePlaces("* Place A\n\t* my_field: value"); expect(result[0].fields.my_field).toBe("value"); }); it("default Place has empty fields and notes", () => { const result = parsePlaces("* Simple Place"); expect(result[0].fields).toEqual({}); expect(result[0].notes).toEqual([]); expect(result[0].lat).toBeUndefined(); expect(result[0].lng).toBeUndefined(); expect(result[0].url).toBeUndefined(); }); }); // ─── Property-based tests (fast-check) ──────────────────────────────── describe("Property-based tests", () => { // Generate valid place names: non-empty, no newlines, no link-like syntax // that could produce empty names after parsing (e.g., [[]], [](url)) const placeNameArb = fc .stringOf( fc.oneof( fc.char().filter( (c) => c !== "\n" && c !== "\r" && c !== "[" && c !== "]" ), fc.constant(" ") ), { minLength: 1, maxLength: 50 } ) .filter((s) => s.trim().length > 0); it("number of top-level bullets equals number of places", () => { fc.assert( fc.property( fc.array(placeNameArb, { minLength: 1, maxLength: 20 }), (names) => { const content = names.map((n) => `* ${n}`).join("\n"); const result = parsePlaces(content); // placeNameArb guarantees non-empty trimmed names with no link syntax expect(result.length).toBe(names.length); } ) ); }); it("startLine is always <= endLine", () => { fc.assert( fc.property( fc.array(placeNameArb, { minLength: 1, maxLength: 10 }), (names) => { const content = names.map((n) => `* ${n}\n\t* A note`).join("\n"); const result = parsePlaces(content); for (const place of result) { expect(place.startLine).toBeLessThanOrEqual(place.endLine); } } ) ); }); it("line ranges never overlap between places", () => { fc.assert( fc.property( fc.array(placeNameArb, { minLength: 2, maxLength: 10 }), (names) => { const content = names.map((n) => `* ${n}\n\t* note`).join("\n"); const result = parsePlaces(content); for (let i = 1; i < result.length; i++) { expect(result[i].startLine).toBeGreaterThan(result[i - 1].endLine); } } ) ); }); it("valid geo coordinates always produce defined lat/lng within bounds", () => { fc.assert( fc.property( fc.double({ min: -90, max: 90, noNaN: true, noDefaultInfinity: true }), fc.double({ min: -180, max: 180, noNaN: true, noDefaultInfinity: true }), (lat, lng) => { // Format with fixed decimals to avoid trailing dot issues const latStr = lat.toFixed(6); const lngStr = lng.toFixed(6); const content = `* Place\n\t* geo: ${latStr},${lngStr}`; const result = parsePlaces(content); expect(result).toHaveLength(1); expect(result[0].lat).toBeDefined(); expect(result[0].lng).toBeDefined(); expect(result[0].lat!).toBeGreaterThanOrEqual(-90); expect(result[0].lat!).toBeLessThanOrEqual(90); expect(result[0].lng!).toBeGreaterThanOrEqual(-180); expect(result[0].lng!).toBeLessThanOrEqual(180); } ) ); }); it("fields record keys are always lowercase", () => { const keyArb = fc.stringOf(fc.char().filter((c) => /\w/.test(c) && c !== ":"), { minLength: 1, maxLength: 10, }).filter((s) => /^\w+$/.test(s)); fc.assert( fc.property(keyArb, fc.string({ minLength: 1, maxLength: 20 }), (key, value) => { const safeValue = value.replace(/\n/g, " ").replace(/\r/g, " "); const content = `* Place\n\t* ${key}: ${safeValue}`; const result = parsePlaces(content); if (result.length > 0) { for (const k of Object.keys(result[0].fields)) { expect(k).toBe(k.toLowerCase()); } } }) ); }); it("parsePlaces never throws on arbitrary string input", () => { fc.assert( fc.property(fc.string({ maxLength: 500 }), (input) => { expect(() => parsePlaces(input)).not.toThrow(); }) ); }); });