A CLI for publishing standard.site documents to ATProto
at main 418 lines 11 kB view raw
1import { webcrypto as crypto } from "node:crypto"; 2import * as fs from "node:fs/promises"; 3import * as path from "node:path"; 4import { glob } from "glob"; 5import { minimatch } from "minimatch"; 6import type { BlogPost, FrontmatterMapping, PostFrontmatter } from "./types"; 7 8export function parseFrontmatter( 9 content: string, 10 mapping?: FrontmatterMapping, 11): { 12 frontmatter: PostFrontmatter; 13 body: string; 14 rawFrontmatter: Record<string, unknown>; 15} { 16 // Support multiple frontmatter delimiters: 17 // --- (YAML) - Jekyll, Astro, most SSGs 18 // +++ (TOML) - Hugo 19 // *** - Alternative format 20 const frontmatterRegex = /^(---|\+\+\+|\*\*\*)\n([\s\S]*?)\n\1\n([\s\S]*)$/; 21 const match = content.match(frontmatterRegex); 22 23 if (!match) { 24 const [, titleMatch] = content.trim().match(/^# (.+)$/m) || [] 25 const title = titleMatch ?? "" 26 const [publishDate] = new Date().toISOString().split("T") 27 28 return { 29 frontmatter: { 30 title, 31 publishDate: publishDate ?? "" 32 }, 33 body: content, 34 rawFrontmatter: { 35 title: 36 publishDate 37 } 38 } 39 } 40 41 const delimiter = match[1]; 42 const frontmatterStr = match[2] ?? ""; 43 const body = match[3] ?? ""; 44 45 // Determine format based on delimiter: 46 // +++ uses TOML (key = value) 47 // --- and *** use YAML (key: value) 48 const isToml = delimiter === "+++"; 49 const separator = isToml ? "=" : ":"; 50 51 // Parse frontmatter manually 52 const raw: Record<string, unknown> = {}; 53 const lines = frontmatterStr.split("\n"); 54 55 let i = 0; 56 while (i < lines.length) { 57 const line = lines[i]; 58 if (line === undefined) { 59 i++; 60 continue; 61 } 62 const sepIndex = line.indexOf(separator); 63 if (sepIndex === -1) { 64 i++; 65 continue; 66 } 67 68 const key = line.slice(0, sepIndex).trim(); 69 let value = line.slice(sepIndex + 1).trim(); 70 71 // Handle quoted strings 72 if ( 73 (value.startsWith('"') && value.endsWith('"')) || 74 (value.startsWith("'") && value.endsWith("'")) 75 ) { 76 value = value.slice(1, -1); 77 } 78 79 // Handle inline arrays (simple case for tags) 80 if (value.startsWith("[") && value.endsWith("]")) { 81 const arrayContent = value.slice(1, -1); 82 raw[key] = arrayContent 83 .split(",") 84 .map((item) => item.trim().replace(/^["']|["']$/g, "")); 85 } else if (value === "" && !isToml) { 86 // Check for YAML-style multiline array (key with no value followed by - items) 87 const arrayItems: string[] = []; 88 let j = i + 1; 89 while (j < lines.length) { 90 const nextLine = lines[j]; 91 if (nextLine === undefined) { 92 j++; 93 continue; 94 } 95 // Check if line is a list item (starts with whitespace and -) 96 const listMatch = nextLine.match(/^\s+-\s*(.*)$/); 97 if (listMatch && listMatch[1] !== undefined) { 98 let itemValue = listMatch[1].trim(); 99 // Remove quotes if present 100 if ( 101 (itemValue.startsWith('"') && itemValue.endsWith('"')) || 102 (itemValue.startsWith("'") && itemValue.endsWith("'")) 103 ) { 104 itemValue = itemValue.slice(1, -1); 105 } 106 arrayItems.push(itemValue); 107 j++; 108 } else if (nextLine.trim() === "") { 109 // Skip empty lines within the array 110 j++; 111 } else { 112 // Hit a new key or non-list content 113 break; 114 } 115 } 116 if (arrayItems.length > 0) { 117 raw[key] = arrayItems; 118 i = j; 119 continue; 120 } else { 121 raw[key] = value; 122 } 123 } else if (value === "true") { 124 raw[key] = true; 125 } else if (value === "false") { 126 raw[key] = false; 127 } else { 128 raw[key] = value; 129 } 130 i++; 131 } 132 133 // Apply field mappings to normalize to standard PostFrontmatter fields 134 const frontmatter: Record<string, unknown> = {}; 135 136 // Title mapping 137 const titleField = mapping?.title || "title"; 138 frontmatter.title = raw[titleField] || raw.title; 139 140 // Description mapping 141 const descField = mapping?.description || "description"; 142 frontmatter.description = raw[descField] || raw.description; 143 144 // Publish date mapping - check custom field first, then fallbacks 145 const dateField = mapping?.publishDate; 146 if (dateField && raw[dateField]) { 147 frontmatter.publishDate = raw[dateField]; 148 } else if (raw.publishDate) { 149 frontmatter.publishDate = raw.publishDate; 150 } else { 151 // Fallback to common date field names 152 const dateFields = ["pubDate", "date", "createdAt", "created_at"]; 153 for (const field of dateFields) { 154 if (raw[field]) { 155 frontmatter.publishDate = raw[field]; 156 break; 157 } 158 } 159 } 160 161 // Cover image mapping 162 const coverField = mapping?.coverImage || "ogImage"; 163 frontmatter.ogImage = raw[coverField] || raw.ogImage; 164 165 // Tags mapping 166 const tagsField = mapping?.tags || "tags"; 167 frontmatter.tags = raw[tagsField] || raw.tags; 168 169 // Draft mapping 170 const draftField = mapping?.draft || "draft"; 171 const draftValue = raw[draftField] ?? raw.draft; 172 if (draftValue !== undefined) { 173 frontmatter.draft = draftValue === true || draftValue === "true"; 174 } 175 176 // Always preserve atUri (internal field) 177 frontmatter.atUri = raw.atUri; 178 179 return { 180 frontmatter: frontmatter as unknown as PostFrontmatter, 181 body, 182 rawFrontmatter: raw, 183 }; 184} 185 186export function getSlugFromFilename(filename: string): string { 187 return filename 188 .replace(/\.mdx?$/, "") 189 .toLowerCase() 190 .replace(/\s+/g, "-"); 191} 192 193export interface SlugOptions { 194 slugField?: string; 195 removeIndexFromSlug?: boolean; 196 stripDatePrefix?: boolean; 197} 198 199export function getSlugFromOptions( 200 relativePath: string, 201 rawFrontmatter: Record<string, unknown>, 202 options: SlugOptions = {}, 203): string { 204 const { 205 slugField, 206 removeIndexFromSlug = false, 207 stripDatePrefix = false, 208 } = options; 209 210 let slug: string; 211 212 // If slugField is set, try to get the value from frontmatter 213 if (slugField) { 214 const frontmatterValue = rawFrontmatter[slugField]; 215 if (frontmatterValue && typeof frontmatterValue === "string") { 216 // Remove leading slash if present 217 slug = frontmatterValue 218 .replace(/^\//, "") 219 .toLowerCase() 220 .replace(/\s+/g, "-"); 221 } else { 222 // Fallback to filepath if frontmatter field not found 223 slug = relativePath 224 .replace(/\.mdx?$/, "") 225 .toLowerCase() 226 .replace(/\s+/g, "-"); 227 } 228 } else { 229 // Default: use filepath 230 slug = relativePath 231 .replace(/\.mdx?$/, "") 232 .toLowerCase() 233 .replace(/\s+/g, "-"); 234 } 235 236 // Remove /index or /_index suffix if configured 237 if (removeIndexFromSlug) { 238 slug = slug.replace(/\/_?index$/, ""); 239 } 240 241 // Strip Jekyll-style date prefix (YYYY-MM-DD-) from filename 242 if (stripDatePrefix) { 243 slug = slug.replace(/(^|\/)(\d{4}-\d{2}-\d{2})-/g, "$1"); 244 } 245 246 return slug; 247} 248 249export async function getContentHash(content: string): Promise<string> { 250 const encoder = new TextEncoder(); 251 const data = encoder.encode(content); 252 const hashBuffer = await crypto.subtle.digest("SHA-256", data); 253 const hashArray = Array.from(new Uint8Array(hashBuffer)); 254 return hashArray.map((b) => b.toString(16).padStart(2, "0")).join(""); 255} 256 257function shouldIgnore(relativePath: string, ignorePatterns: string[]): boolean { 258 for (const pattern of ignorePatterns) { 259 if (minimatch(relativePath, pattern)) { 260 return true; 261 } 262 } 263 return false; 264} 265 266export interface ScanOptions { 267 frontmatterMapping?: FrontmatterMapping; 268 ignorePatterns?: string[]; 269 slugField?: string; 270 removeIndexFromSlug?: boolean; 271 stripDatePrefix?: boolean; 272} 273 274export async function scanContentDirectory( 275 contentDir: string, 276 frontmatterMappingOrOptions?: FrontmatterMapping | ScanOptions, 277 ignorePatterns: string[] = [], 278): Promise<BlogPost[]> { 279 // Handle both old signature (frontmatterMapping, ignorePatterns) and new signature (options) 280 let options: ScanOptions; 281 if ( 282 frontmatterMappingOrOptions && 283 ("frontmatterMapping" in frontmatterMappingOrOptions || 284 "ignorePatterns" in frontmatterMappingOrOptions || 285 "slugField" in frontmatterMappingOrOptions) 286 ) { 287 options = frontmatterMappingOrOptions as ScanOptions; 288 } else { 289 // Old signature: (contentDir, frontmatterMapping?, ignorePatterns?) 290 options = { 291 frontmatterMapping: frontmatterMappingOrOptions as 292 | FrontmatterMapping 293 | undefined, 294 ignorePatterns, 295 }; 296 } 297 298 const { 299 frontmatterMapping, 300 ignorePatterns: ignore = [], 301 slugField, 302 removeIndexFromSlug, 303 stripDatePrefix, 304 } = options; 305 306 const patterns = ["**/*.md", "**/*.mdx"]; 307 const posts: BlogPost[] = []; 308 309 for (const pattern of patterns) { 310 const files = await glob(pattern, { 311 cwd: contentDir, 312 absolute: false, 313 }); 314 315 for (const relativePath of files) { 316 // Skip files matching ignore patterns 317 if (shouldIgnore(relativePath, ignore)) { 318 continue; 319 } 320 321 const filePath = path.join(contentDir, relativePath); 322 const rawContent = await fs.readFile(filePath, "utf-8"); 323 324 try { 325 const { frontmatter, body, rawFrontmatter } = parseFrontmatter( 326 rawContent, 327 frontmatterMapping, 328 ); 329 const slug = getSlugFromOptions(relativePath, rawFrontmatter, { 330 slugField, 331 removeIndexFromSlug, 332 stripDatePrefix, 333 }); 334 335 posts.push({ 336 filePath, 337 slug, 338 frontmatter, 339 content: body, 340 rawContent, 341 rawFrontmatter, 342 }); 343 } catch (error) { 344 console.error(`Error parsing ${relativePath}:`, error); 345 } 346 } 347 } 348 349 // Sort by publish date (newest first) 350 posts.sort((a, b) => { 351 const dateA = new Date(a.frontmatter.publishDate); 352 const dateB = new Date(b.frontmatter.publishDate); 353 return dateB.getTime() - dateA.getTime(); 354 }); 355 356 return posts; 357} 358 359export function updateFrontmatterWithAtUri( 360 rawContent: string, 361 atUri: string, 362): string { 363 // Detect which delimiter is used (---, +++, or ***) 364 const delimiterMatch = rawContent.match(/^(---|\+\+\+|\*\*\*)/); 365 const delimiter = delimiterMatch?.[1] ?? "---"; 366 const isToml = delimiter === "+++"; 367 368 // Format the atUri entry based on frontmatter type 369 const atUriEntry = isToml ? `atUri = "${atUri}"` : `atUri: "${atUri}"`; 370 371 // No frontmatter: create one with atUri 372 if (!delimiterMatch) { 373 return `---\n${atUriEntry}\n---\n\n${rawContent}`; 374 } 375 376 // Check if atUri already exists in frontmatter (handle both formats) 377 if (rawContent.includes("atUri:") || rawContent.includes("atUri =")) { 378 // Replace existing atUri (match both YAML and TOML formats) 379 return rawContent.replace( 380 /atUri\s*[=:]\s*["']?[^"'\n]+["']?\n?/, 381 `${atUriEntry}\n`, 382 ); 383 } 384 385 // Insert atUri before the closing delimiter 386 const frontmatterEndIndex = rawContent.indexOf(delimiter, 4); 387 if (frontmatterEndIndex === -1) { 388 throw new Error("Could not find frontmatter end"); 389 } 390 391 const beforeEnd = rawContent.slice(0, frontmatterEndIndex); 392 const afterEnd = rawContent.slice(frontmatterEndIndex); 393 394 return `${beforeEnd}${atUriEntry}\n${afterEnd}`; 395} 396 397export function stripMarkdownForText(markdown: string): string { 398 return markdown 399 .replace(/#{1,6}\s/g, "") // Remove headers 400 .replace(/\*\*([^*]+)\*\*/g, "$1") // Remove bold 401 .replace(/\*([^*]+)\*/g, "$1") // Remove italic 402 .replace(/\[([^\]]+)\]\([^)]+\)/g, "$1") // Remove links, keep text 403 .replace(/`{3}[\s\S]*?`{3}/g, "") // Remove code blocks 404 .replace(/`([^`]+)`/g, "$1") // Remove inline code formatting 405 .replace(/!\[.*?\]\(.*?\)/g, "") // Remove images 406 .replace(/\n{3,}/g, "\n\n") // Normalize multiple newlines 407 .trim(); 408} 409 410export function getTextContent( 411 post: { content: string; rawFrontmatter?: Record<string, unknown> }, 412 textContentField?: string, 413): string { 414 if (textContentField && post.rawFrontmatter?.[textContentField]) { 415 return String(post.rawFrontmatter[textContentField]); 416 } 417 return stripMarkdownForText(post.content); 418}