A CLI for publishing standard.site documents to ATProto sequoia.pub
standard site lexicon cli publishing

Use dedicated libs to parse TOML, YAML #26

merged opened by heaths.dev targeting main from heaths.dev/sequoia: issue15

Fixes #15, parsing YAML and TOML using dedicated libraries that support multiline strings correctly.

Also adds tests that were run before and after changes. Multiline string tests expectedly failed before.

Labels

None yet.

assignee

None yet.

Participants 1
AT URI
at://did:plc:tg3tb5wukiml4xmxml6qm637/sh.tangled.repo.pull/3mffip2ndxi22
+402 -82
Diff #0
+11
bun.lock
··· 34 34 "@clack/prompts": "^1.0.0", 35 35 "cmd-ts": "^0.14.3", 36 36 "glob": "^13.0.0", 37 + "js-yaml": "^4.1.1", 37 38 "mime-types": "^2.1.35", 38 39 "minimatch": "^10.1.1", 39 40 "open": "^11.0.0", 41 + "smol-toml": "^1.6.0", 40 42 }, 41 43 "devDependencies": { 42 44 "@biomejs/biome": "^2.3.13", 45 + "@types/js-yaml": "^4.0.9", 43 46 "@types/mime-types": "^3.0.1", 44 47 "@types/node": "^20", 45 48 }, ··· 581 584 582 585 "@types/hast": ["@types/hast@3.0.4", "", { "dependencies": { "@types/unist": "*" } }, "sha512-WPs+bbQw5aCj+x6laNGWLH3wviHtoCv/P3+otBhbOhJgG8qtpdAMlTCxLtsTWA7LH1Oh/bFCHsBn0TPS5m30EQ=="], 583 586 587 + "@types/js-yaml": ["@types/js-yaml@4.0.9", "", {}, "sha512-k4MGaQl5TGo/iipqb2UDG2UwjXziSWkh0uysQelTlJpX1qGlpUZYm8PnO4DxG1qBomtJUdYJ6qR6xdIah10JLg=="], 588 + 584 589 "@types/mdast": ["@types/mdast@4.0.4", "", { "dependencies": { "@types/unist": "*" } }, "sha512-kGaNbPh1k7AFzgpud/gMdvIm5xuECykRR+JnWKQno9TAXVa6WIVCGTPvYGekIDL4uwCZQSYbUxNBSb1aUo79oA=="], 585 590 586 591 "@types/mdx": ["@types/mdx@2.0.13", "", {}, "sha512-+OWZQfAYyio6YkJb3HLxDrvnx6SWWDbC0zVPfBRzUk0/nqoDyf6dNxQi3eArPe8rJ473nobTMQ/8Zk+LxJ+Yuw=="], ··· 623 628 624 629 "ansi-regex": ["ansi-regex@6.2.2", "", {}, "sha512-Bq3SmSpyFHaWjPk8If9yc6svM8c56dB5BAtW4Qbw5jHTwwXXcTLoRMkpDJp6VL0XzlWaCHTXrkFURMYmD0sLqg=="], 625 630 631 + "argparse": ["argparse@2.0.1", "", {}, "sha512-8+9WqebbFzpX9OR+Wa6O29asIogeRMzcGtAINdpMHHyAg10f05aSFVBbcEqGf/PXw1EjAZ+q2/bEBg3DvurK3Q=="], 632 + 626 633 "aria-hidden": ["aria-hidden@1.2.6", "", { "dependencies": { "tslib": "^2.0.0" } }, "sha512-ik3ZgC9dY/lYVVM++OISsaYDeg1tb0VtP5uL3ouh1koGOaUMDPpbFIei4JkFimWUFPn90sbMNMXQAIVOlnYKJA=="], 627 634 628 635 "astring": ["astring@1.9.0", "", { "bin": { "astring": "bin/astring" } }, "sha512-LElXdjswlqjWrPpJFg1Fx4wpkOCxj1TDHlSV4PlaRxHGWko024xICaa97ZkMfs6DRKlCguiAI+rbXv5GWwXIkg=="], ··· 1003 1010 1004 1011 "js-tokens": ["js-tokens@4.0.0", "", {}, "sha512-RdJUflcE3cUzKiMqQgsCu06FPu9UdIJO0beYbPhHN4k6apgJtifcoCtT9bcxOpYBtpD2kCM6Sbzg4CausW/PKQ=="], 1005 1012 1013 + "js-yaml": ["js-yaml@4.1.1", "", { "dependencies": { "argparse": "^2.0.1" }, "bin": { "js-yaml": "bin/js-yaml.js" } }, "sha512-qQKT4zQxXl8lLwBtHMWwaTcGfFOZviOJet3Oy/xmGk2gZH677CJM9EvtfdSkgWcATZhj/55JZ0rmy3myCT5lsA=="], 1014 + 1006 1015 "jsesc": ["jsesc@3.1.0", "", { "bin": { "jsesc": "bin/jsesc" } }, "sha512-/sM3dO2FOzXjKQhJuo0Q173wf2KOo8t4I8vHy6lF9poUp7bKT0/NHE8fPX23PwfhnykfqnC2xRxOnVw5XuGIaA=="], 1007 1016 1008 1017 "json5": ["json5@2.2.3", "", { "bin": { "json5": "lib/cli.js" } }, "sha512-XmOWe7eyHYH14cLdVPoyg+GOH3rYX++KpzrylJwSW98t3Nk+U8XOl8FWKOgwtzdb8lXGf6zYwDUzeHMWfxasyg=="], ··· 1375 1384 1376 1385 "sisteransi": ["sisteransi@1.0.5", "", {}, "sha512-bLGGlR1QxBcynn2d5YmDX4MGjlZvy2MRBDRNHLJ8VI6l6+9FUiyTFNJ0IveOSP0bcXgVDPRcfGqA0pjaqUpfVg=="], 1377 1386 1387 + "smol-toml": ["smol-toml@1.6.0", "", {}, "sha512-4zemZi0HvTnYwLfrpk/CF9LOd9Lt87kAt50GnqhMpyF9U3poDAP2+iukq2bZsO/ufegbYehBkqINbsWxj4l4cw=="], 1388 + 1378 1389 "source-map": ["source-map@0.7.6", "", {}, "sha512-i5uvt8C3ikiWeNZSVZNWcfZPItFQOsYTUAOkcUPGd8DqDy1uOUikjt5dG+uRlwyvR108Fb9DOd4GvXfT0N2/uQ=="], 1379 1390 1380 1391 "source-map-js": ["source-map-js@1.2.1", "", {}, "sha512-UXWMKhLOwVKb728IUtQPXxfYU+usdybtUrK/8uGE8CQMvrhOpwvzDBwj0QhSL7MQc7vIsISBG8VQ8+IDQxpfQA=="],
+5 -1
packages/cli/package.json
··· 17 17 "lint": "biome lint --write", 18 18 "format": "biome format --write", 19 19 "build": "bun build src/index.ts --target node --outdir dist && mkdir -p dist/components && cp src/components/*.js dist/components/", 20 + "test": "bun test", 20 21 "dev": "bun run build && bun link", 21 22 "deploy": "bun run build && bun publish" 22 23 }, 23 24 "devDependencies": { 24 25 "@biomejs/biome": "^2.3.13", 26 + "@types/js-yaml": "^4.0.9", 25 27 "@types/mime-types": "^3.0.1", 26 28 "@types/node": "^20" 27 29 }, ··· 34 36 "@clack/prompts": "^1.0.0", 35 37 "cmd-ts": "^0.14.3", 36 38 "glob": "^13.0.0", 39 + "js-yaml": "^4.1.1", 37 40 "mime-types": "^2.1.35", 38 41 "minimatch": "^10.1.1", 39 - "open": "^11.0.0" 42 + "open": "^11.0.0", 43 + "smol-toml": "^1.6.0" 40 44 } 41 45 }
+9 -81
packages/cli/src/lib/markdown.ts
··· 1 1 import * as fs from "node:fs/promises"; 2 2 import * as path from "node:path"; 3 3 import { glob } from "glob"; 4 + import yaml from "js-yaml"; 4 5 import { minimatch } from "minimatch"; 6 + import * as toml from "smol-toml"; 5 7 import type { BlogPost, FrontmatterMapping, PostFrontmatter } from "./types"; 6 8 7 9 export function parseFrontmatter( ··· 31 33 // +++ uses TOML (key = value) 32 34 // --- and *** use YAML (key: value) 33 35 const isToml = delimiter === "+++"; 34 - const separator = isToml ? "=" : ":"; 35 - 36 - // Parse frontmatter manually 37 - const raw: Record<string, unknown> = {}; 38 - const lines = frontmatterStr.split("\n"); 39 - 40 - let i = 0; 41 - while (i < lines.length) { 42 - const line = lines[i]; 43 - if (line === undefined) { 44 - i++; 45 - continue; 46 - } 47 - const sepIndex = line.indexOf(separator); 48 - if (sepIndex === -1) { 49 - i++; 50 - continue; 51 - } 52 - 53 - const key = line.slice(0, sepIndex).trim(); 54 - let value = line.slice(sepIndex + 1).trim(); 55 36 56 - // Handle quoted strings 57 - if ( 58 - (value.startsWith('"') && value.endsWith('"')) || 59 - (value.startsWith("'") && value.endsWith("'")) 60 - ) { 61 - value = value.slice(1, -1); 62 - } 63 - 64 - // Handle inline arrays (simple case for tags) 65 - if (value.startsWith("[") && value.endsWith("]")) { 66 - const arrayContent = value.slice(1, -1); 67 - raw[key] = arrayContent 68 - .split(",") 69 - .map((item) => item.trim().replace(/^["']|["']$/g, "")); 70 - } else if (value === "" && !isToml) { 71 - // Check for YAML-style multiline array (key with no value followed by - items) 72 - const arrayItems: string[] = []; 73 - let j = i + 1; 74 - while (j < lines.length) { 75 - const nextLine = lines[j]; 76 - if (nextLine === undefined) { 77 - j++; 78 - continue; 79 - } 80 - // Check if line is a list item (starts with whitespace and -) 81 - const listMatch = nextLine.match(/^\s+-\s*(.*)$/); 82 - if (listMatch && listMatch[1] !== undefined) { 83 - let itemValue = listMatch[1].trim(); 84 - // Remove quotes if present 85 - if ( 86 - (itemValue.startsWith('"') && itemValue.endsWith('"')) || 87 - (itemValue.startsWith("'") && itemValue.endsWith("'")) 88 - ) { 89 - itemValue = itemValue.slice(1, -1); 90 - } 91 - arrayItems.push(itemValue); 92 - j++; 93 - } else if (nextLine.trim() === "") { 94 - // Skip empty lines within the array 95 - j++; 96 - } else { 97 - // Hit a new key or non-list content 98 - break; 99 - } 100 - } 101 - if (arrayItems.length > 0) { 102 - raw[key] = arrayItems; 103 - i = j; 104 - continue; 105 - } else { 106 - raw[key] = value; 107 - } 108 - } else if (value === "true") { 109 - raw[key] = true; 110 - } else if (value === "false") { 111 - raw[key] = false; 112 - } else { 113 - raw[key] = value; 114 - } 115 - i++; 37 + // Parse frontmatter using the appropriate library 38 + let raw: Record<string, unknown>; 39 + if (isToml) { 40 + raw = toml.parse(frontmatterStr) as Record<string, unknown>; 41 + } else { 42 + // Use CORE_SCHEMA to keep dates as strings rather than Date objects 43 + raw = (yaml.load(frontmatterStr, { schema: yaml.CORE_SCHEMA }) as Record<string, unknown>) ?? {}; 116 44 } 117 45 118 46 // Apply field mappings to normalize to standard PostFrontmatter fields
+377
packages/cli/test/markdown.test.ts
··· 1 + import { describe, expect, it } from "bun:test"; 2 + import { parseFrontmatter } from "../src/lib/markdown"; 3 + 4 + describe("parseFrontmatter", () => { 5 + describe("delimiters", () => { 6 + it("parses YAML frontmatter (--- delimiter)", () => { 7 + const content = `--- 8 + title: Hello World 9 + --- 10 + Body content here.`; 11 + const { frontmatter, body } = parseFrontmatter(content); 12 + expect(frontmatter.title).toBe("Hello World"); 13 + expect(body).toBe("Body content here."); 14 + }); 15 + 16 + it("parses TOML frontmatter (+++ delimiter)", () => { 17 + const content = `+++ 18 + title = "Hugo Post" 19 + +++ 20 + Body content here.`; 21 + const { frontmatter, body } = parseFrontmatter(content); 22 + expect(frontmatter.title).toBe("Hugo Post"); 23 + expect(body).toBe("Body content here."); 24 + }); 25 + 26 + it("parses alternative frontmatter (*** delimiter)", () => { 27 + const content = `*** 28 + title: Alt Post 29 + *** 30 + Body content here.`; 31 + const { frontmatter, body } = parseFrontmatter(content); 32 + expect(frontmatter.title).toBe("Alt Post"); 33 + expect(body).toBe("Body content here."); 34 + }); 35 + 36 + it("throws when no frontmatter is present", () => { 37 + const content = "Just plain content with no frontmatter."; 38 + expect(() => parseFrontmatter(content)).toThrow( 39 + "Could not parse frontmatter", 40 + ); 41 + }); 42 + }); 43 + 44 + describe("scalar values", () => { 45 + it("parses a string value", () => { 46 + const content = `--- 47 + title: My Post 48 + description: A short description 49 + --- 50 + `; 51 + const { frontmatter } = parseFrontmatter(content); 52 + expect(frontmatter.title).toBe("My Post"); 53 + expect(frontmatter.description).toBe("A short description"); 54 + }); 55 + 56 + it("strips double quotes from values", () => { 57 + const content = `--- 58 + title: "Quoted Title" 59 + --- 60 + `; 61 + const { frontmatter } = parseFrontmatter(content); 62 + expect(frontmatter.title).toBe("Quoted Title"); 63 + }); 64 + 65 + it("strips single quotes from values", () => { 66 + const content = `--- 67 + title: 'Single Quoted' 68 + --- 69 + `; 70 + const { frontmatter } = parseFrontmatter(content); 71 + expect(frontmatter.title).toBe("Single Quoted"); 72 + }); 73 + 74 + it("parses YAML folded multiline string", () => { 75 + const content = `--- 76 + excerpt: > 77 + This is a folded 78 + multiline string 79 + --- 80 + `; 81 + const { rawFrontmatter } = parseFrontmatter(content); 82 + expect(rawFrontmatter.excerpt).toBe("This is a folded multiline string\n"); 83 + }); 84 + 85 + it("parses YAML stripped folded multiline string", () => { 86 + const content = `--- 87 + excerpt: >- 88 + This is a stripped folded 89 + multiline string 90 + --- 91 + `; 92 + const { rawFrontmatter } = parseFrontmatter(content); 93 + expect(rawFrontmatter.excerpt).toBe("This is a stripped folded multiline string"); 94 + }); 95 + 96 + it("parses YAML literal multiline string", () => { 97 + const content = `--- 98 + excerpt: | 99 + This is a literal 100 + multiline string 101 + --- 102 + `; 103 + const { rawFrontmatter } = parseFrontmatter(content); 104 + expect(rawFrontmatter.excerpt).toBe("This is a literal\nmultiline string\n"); 105 + }); 106 + 107 + it("parses YAML kept literal multiline string", () => { 108 + const content = `--- 109 + excerpt: |+ 110 + This is a kept literal 111 + multiline string 112 + 113 + end: true 114 + --- 115 + `; 116 + const { rawFrontmatter } = parseFrontmatter(content); 117 + expect(rawFrontmatter.excerpt).toBe("This is a kept literal\nmultiline string\n\n"); 118 + }); 119 + 120 + it("parses boolean true", () => { 121 + const content = `--- 122 + draft: true 123 + --- 124 + `; 125 + const { frontmatter } = parseFrontmatter(content); 126 + expect(frontmatter.draft).toBe(true); 127 + }); 128 + 129 + it("parses boolean false", () => { 130 + const content = `--- 131 + draft: false 132 + --- 133 + `; 134 + const { frontmatter } = parseFrontmatter(content); 135 + expect(frontmatter.draft).toBe(false); 136 + }); 137 + 138 + it('parses string "true" in draft field as boolean true', () => { 139 + const content = `--- 140 + draft: true 141 + --- 142 + `; 143 + const { rawFrontmatter } = parseFrontmatter(content); 144 + expect(rawFrontmatter.draft).toBe(true); 145 + }); 146 + }); 147 + 148 + describe("arrays", () => { 149 + it("parses inline YAML arrays", () => { 150 + const content = `--- 151 + tags: [typescript, bun, testing] 152 + --- 153 + `; 154 + const { frontmatter } = parseFrontmatter(content); 155 + expect(frontmatter.tags).toEqual(["typescript", "bun", "testing"]); 156 + }); 157 + 158 + it("parses inline YAML arrays with quoted items", () => { 159 + const content = `--- 160 + tags: ["typescript", "bun", "testing"] 161 + --- 162 + `; 163 + const { frontmatter } = parseFrontmatter(content); 164 + expect(frontmatter.tags).toEqual(["typescript", "bun", "testing"]); 165 + }); 166 + 167 + it("parses YAML block arrays", () => { 168 + const content = `--- 169 + tags: 170 + - typescript 171 + - bun 172 + - testing 173 + --- 174 + `; 175 + const { frontmatter } = parseFrontmatter(content); 176 + expect(frontmatter.tags).toEqual(["typescript", "bun", "testing"]); 177 + }); 178 + 179 + it("parses YAML block arrays with quoted items", () => { 180 + const content = `--- 181 + tags: 182 + - "typescript" 183 + - 'bun' 184 + --- 185 + `; 186 + const { frontmatter } = parseFrontmatter(content); 187 + expect(frontmatter.tags).toEqual(["typescript", "bun"]); 188 + }); 189 + 190 + it("parses inline TOML arrays", () => { 191 + const content = `+++ 192 + tags = ["typescript", "bun"] 193 + +++ 194 + `; 195 + const { frontmatter } = parseFrontmatter(content); 196 + expect(frontmatter.tags).toEqual(["typescript", "bun"]); 197 + }); 198 + }); 199 + 200 + describe("publish date fallbacks", () => { 201 + it("uses publishDate field directly", () => { 202 + const content = `--- 203 + publishDate: 2024-01-15 204 + --- 205 + `; 206 + const { frontmatter } = parseFrontmatter(content); 207 + expect(frontmatter.publishDate).toBe("2024-01-15"); 208 + }); 209 + 210 + it("falls back to pubDate", () => { 211 + const content = `--- 212 + pubDate: 2024-02-01 213 + --- 214 + `; 215 + const { frontmatter } = parseFrontmatter(content); 216 + expect(frontmatter.publishDate).toBe("2024-02-01"); 217 + }); 218 + 219 + it("falls back to date", () => { 220 + const content = `--- 221 + date: 2024-03-10 222 + --- 223 + `; 224 + const { frontmatter } = parseFrontmatter(content); 225 + expect(frontmatter.publishDate).toBe("2024-03-10"); 226 + }); 227 + 228 + it("falls back to createdAt", () => { 229 + const content = `--- 230 + createdAt: 2024-04-20 231 + --- 232 + `; 233 + const { frontmatter } = parseFrontmatter(content); 234 + expect(frontmatter.publishDate).toBe("2024-04-20"); 235 + }); 236 + 237 + it("falls back to created_at", () => { 238 + const content = `--- 239 + created_at: 2024-05-30 240 + --- 241 + `; 242 + const { frontmatter } = parseFrontmatter(content); 243 + expect(frontmatter.publishDate).toBe("2024-05-30"); 244 + }); 245 + 246 + it("prefers publishDate over other fallbacks", () => { 247 + const content = `--- 248 + publishDate: 2024-01-01 249 + date: 2023-01-01 250 + --- 251 + `; 252 + const { frontmatter } = parseFrontmatter(content); 253 + expect(frontmatter.publishDate).toBe("2024-01-01"); 254 + }); 255 + }); 256 + 257 + describe("rawFrontmatter", () => { 258 + it("returns all raw fields", () => { 259 + const content = `--- 260 + title: Raw Test 261 + custom: value 262 + --- 263 + `; 264 + const { rawFrontmatter } = parseFrontmatter(content); 265 + expect(rawFrontmatter.title).toBe("Raw Test"); 266 + expect(rawFrontmatter.custom).toBe("value"); 267 + }); 268 + 269 + it("preserves atUri in both frontmatter and rawFrontmatter", () => { 270 + const content = `--- 271 + title: Post 272 + atUri: at://did:plc:abc123/app.bsky.feed.post/xyz 273 + --- 274 + `; 275 + const { frontmatter, rawFrontmatter } = parseFrontmatter(content); 276 + expect(frontmatter.atUri).toBe( 277 + "at://did:plc:abc123/app.bsky.feed.post/xyz", 278 + ); 279 + expect(rawFrontmatter.atUri).toBe( 280 + "at://did:plc:abc123/app.bsky.feed.post/xyz", 281 + ); 282 + }); 283 + }); 284 + 285 + describe("FrontmatterMapping", () => { 286 + it("maps a custom title field", () => { 287 + const content = `--- 288 + name: My Mapped Title 289 + --- 290 + `; 291 + const { frontmatter } = parseFrontmatter(content, { title: "name" }); 292 + expect(frontmatter.title).toBe("My Mapped Title"); 293 + }); 294 + 295 + it("maps a custom description field", () => { 296 + const content = `--- 297 + summary: Custom description 298 + --- 299 + `; 300 + const { frontmatter } = parseFrontmatter(content, { 301 + description: "summary", 302 + }); 303 + expect(frontmatter.description).toBe("Custom description"); 304 + }); 305 + 306 + it("maps a custom publishDate field", () => { 307 + const content = `--- 308 + publishedOn: 2024-06-15 309 + --- 310 + `; 311 + const { frontmatter } = parseFrontmatter(content, { 312 + publishDate: "publishedOn", 313 + }); 314 + expect(frontmatter.publishDate).toBe("2024-06-15"); 315 + }); 316 + 317 + it("maps a custom coverImage field", () => { 318 + const content = `--- 319 + heroImage: /images/cover.jpg 320 + --- 321 + `; 322 + const { frontmatter } = parseFrontmatter(content, { 323 + coverImage: "heroImage", 324 + }); 325 + expect(frontmatter.ogImage).toBe("/images/cover.jpg"); 326 + }); 327 + 328 + it("maps a custom tags field", () => { 329 + const content = `--- 330 + categories: [news, updates] 331 + --- 332 + `; 333 + const { frontmatter } = parseFrontmatter(content, { tags: "categories" }); 334 + expect(frontmatter.tags).toEqual(["news", "updates"]); 335 + }); 336 + 337 + it("maps a custom draft field", () => { 338 + const content = `--- 339 + unpublished: true 340 + --- 341 + `; 342 + const { frontmatter } = parseFrontmatter(content, { draft: "unpublished" }); 343 + expect(frontmatter.draft).toBe(true); 344 + }); 345 + 346 + it("falls back to standard field name when mapped field is absent", () => { 347 + const content = `--- 348 + title: Standard Title 349 + --- 350 + `; 351 + const { frontmatter } = parseFrontmatter(content, { title: "heading" }); 352 + expect(frontmatter.title).toBe("Standard Title"); 353 + }); 354 + }); 355 + 356 + describe("body", () => { 357 + it("returns the body content after the closing delimiter", () => { 358 + const content = `--- 359 + title: Post 360 + --- 361 + # Heading 362 + 363 + Some paragraph text.`; 364 + const { body } = parseFrontmatter(content); 365 + expect(body).toBe("# Heading\n\nSome paragraph text."); 366 + }); 367 + 368 + it("returns an empty body when there is no content after frontmatter", () => { 369 + const content = `--- 370 + title: Post 371 + --- 372 + `; 373 + const { body } = parseFrontmatter(content); 374 + expect(body).toBe(""); 375 + }); 376 + }); 377 + });

History

1 round 0 comments
sign up or login to add to the discussion
heaths.dev submitted #0
1 commit
expand
Use dedicated libs to parse TOML, YAML
expand 0 comments
pull request successfully merged