A CLI for publishing standard.site documents to ATProto
at main 436 lines 12 kB view raw
1import * as fs from "node:fs/promises"; 2import * as path from "node:path"; 3import { glob } from "glob"; 4import { minimatch } from "minimatch"; 5import type { BlogPost, FrontmatterMapping, PostFrontmatter } from "./types"; 6 7export function parseFrontmatter( 8 content: string, 9 mapping?: FrontmatterMapping, 10): { 11 frontmatter: PostFrontmatter; 12 body: string; 13 rawFrontmatter: Record<string, unknown>; 14} { 15 // Support multiple frontmatter delimiters: 16 // --- (YAML) - Jekyll, Astro, most SSGs 17 // +++ (TOML) - Hugo 18 // *** - Alternative format 19 const frontmatterRegex = /^(---|\+\+\+|\*\*\*)\n([\s\S]*?)\n\1\n([\s\S]*)$/; 20 const match = content.match(frontmatterRegex); 21 22 if (!match) { 23 throw new Error("Could not parse frontmatter"); 24 } 25 26 const delimiter = match[1]; 27 const frontmatterStr = match[2] ?? ""; 28 const body = match[3] ?? ""; 29 30 // Determine format based on delimiter: 31 // +++ uses TOML (key = value) 32 // --- and *** use YAML (key: value) 33 const isToml = delimiter === "+++"; 34 const separator = isToml ? "=" : ":"; 35 36 // Parse frontmatter manually 37 const raw: Record<string, unknown> = {}; 38 const lines = frontmatterStr.split("\n"); 39 40 let i = 0; 41 while (i < lines.length) { 42 const line = lines[i]; 43 if (line === undefined) { 44 i++; 45 continue; 46 } 47 const sepIndex = line.indexOf(separator); 48 if (sepIndex === -1) { 49 i++; 50 continue; 51 } 52 53 const key = line.slice(0, sepIndex).trim(); 54 let value = line.slice(sepIndex + 1).trim(); 55 56 // Handle quoted strings 57 if ( 58 (value.startsWith('"') && value.endsWith('"')) || 59 (value.startsWith("'") && value.endsWith("'")) 60 ) { 61 value = value.slice(1, -1); 62 } 63 64 // Handle inline arrays (simple case for tags) 65 if (value.startsWith("[") && value.endsWith("]")) { 66 const arrayContent = value.slice(1, -1); 67 raw[key] = arrayContent 68 .split(",") 69 .map((item) => item.trim().replace(/^["']|["']$/g, "")); 70 } else if (value === "" && !isToml) { 71 // Check for YAML-style multiline array (key with no value followed by - items) 72 const arrayItems: string[] = []; 73 let j = i + 1; 74 while (j < lines.length) { 75 const nextLine = lines[j]; 76 if (nextLine === undefined) { 77 j++; 78 continue; 79 } 80 // Check if line is a list item (starts with whitespace and -) 81 const listMatch = nextLine.match(/^\s+-\s*(.*)$/); 82 if (listMatch && listMatch[1] !== undefined) { 83 let itemValue = listMatch[1].trim(); 84 // Remove quotes if present 85 if ( 86 (itemValue.startsWith('"') && itemValue.endsWith('"')) || 87 (itemValue.startsWith("'") && itemValue.endsWith("'")) 88 ) { 89 itemValue = itemValue.slice(1, -1); 90 } 91 arrayItems.push(itemValue); 92 j++; 93 } else if (nextLine.trim() === "") { 94 // Skip empty lines within the array 95 j++; 96 } else { 97 // Hit a new key or non-list content 98 break; 99 } 100 } 101 if (arrayItems.length > 0) { 102 raw[key] = arrayItems; 103 i = j; 104 continue; 105 } else { 106 raw[key] = value; 107 } 108 } else if (value === "true") { 109 raw[key] = true; 110 } else if (value === "false") { 111 raw[key] = false; 112 } else { 113 raw[key] = value; 114 } 115 i++; 116 } 117 118 // Apply field mappings to normalize to standard PostFrontmatter fields 119 const frontmatter: Record<string, unknown> = {}; 120 121 // Title mapping 122 const titleField = mapping?.title || "title"; 123 frontmatter.title = raw[titleField] || raw.title; 124 125 // Description mapping 126 const descField = mapping?.description || "description"; 127 frontmatter.description = raw[descField] || raw.description; 128 129 // Publish date mapping - check custom field first, then fallbacks 130 const dateField = mapping?.publishDate; 131 if (dateField && raw[dateField]) { 132 frontmatter.publishDate = raw[dateField]; 133 } else if (raw.publishDate) { 134 frontmatter.publishDate = raw.publishDate; 135 } else { 136 // Fallback to common date field names 137 const dateFields = ["pubDate", "date", "createdAt", "created_at"]; 138 for (const field of dateFields) { 139 if (raw[field]) { 140 frontmatter.publishDate = raw[field]; 141 break; 142 } 143 } 144 } 145 146 // Cover image mapping 147 const coverField = mapping?.coverImage || "ogImage"; 148 frontmatter.ogImage = raw[coverField] || raw.ogImage; 149 150 // Tags mapping 151 const tagsField = mapping?.tags || "tags"; 152 frontmatter.tags = raw[tagsField] || raw.tags; 153 154 // Draft mapping 155 const draftField = mapping?.draft || "draft"; 156 const draftValue = raw[draftField] ?? raw.draft; 157 if (draftValue !== undefined) { 158 frontmatter.draft = draftValue === true || draftValue === "true"; 159 } 160 161 // Always preserve atUri (internal field) 162 frontmatter.atUri = raw.atUri; 163 164 return { 165 frontmatter: frontmatter as unknown as PostFrontmatter, 166 body, 167 rawFrontmatter: raw, 168 }; 169} 170 171export function getSlugFromFilename(filename: string): string { 172 return filename 173 .replace(/\.mdx?$/, "") 174 .toLowerCase() 175 .replace(/\s+/g, "-"); 176} 177 178export interface SlugOptions { 179 slugField?: string; 180 removeIndexFromSlug?: boolean; 181 stripDatePrefix?: boolean; 182} 183 184export function getSlugFromOptions( 185 relativePath: string, 186 rawFrontmatter: Record<string, unknown>, 187 options: SlugOptions = {}, 188): string { 189 const { 190 slugField, 191 removeIndexFromSlug = false, 192 stripDatePrefix = false, 193 } = options; 194 195 let slug: string; 196 197 // If slugField is set, try to get the value from frontmatter 198 if (slugField) { 199 const frontmatterValue = rawFrontmatter[slugField]; 200 if (frontmatterValue && typeof frontmatterValue === "string") { 201 // Remove leading slash if present 202 slug = frontmatterValue 203 .replace(/^\//, "") 204 .toLowerCase() 205 .replace(/\s+/g, "-"); 206 } else { 207 // Fallback to filepath if frontmatter field not found 208 slug = relativePath 209 .replace(/\.mdx?$/, "") 210 .toLowerCase() 211 .replace(/\s+/g, "-"); 212 } 213 } else { 214 // Default: use filepath 215 slug = relativePath 216 .replace(/\.mdx?$/, "") 217 .toLowerCase() 218 .replace(/\s+/g, "-"); 219 } 220 221 // Remove /index or /_index suffix if configured 222 if (removeIndexFromSlug) { 223 slug = slug.replace(/\/_?index$/, ""); 224 } 225 226 // Strip Jekyll-style date prefix (YYYY-MM-DD-) from filename 227 if (stripDatePrefix) { 228 slug = slug.replace(/(^|\/)(\d{4}-\d{2}-\d{2})-/g, "$1"); 229 } 230 231 return slug; 232} 233 234export function resolvePathTemplate(template: string, post: BlogPost): string { 235 const publishDate = new Date(post.frontmatter.publishDate); 236 const year = String(publishDate.getFullYear()); 237 const month = String(publishDate.getMonth() + 1).padStart(2, "0"); 238 const day = String(publishDate.getDate()).padStart(2, "0"); 239 240 const slugifiedTitle = (post.frontmatter.title || "") 241 .toLowerCase() 242 .replace(/\s+/g, "-") 243 .replace(/[^\w-]/g, ""); 244 245 // Replace known tokens 246 let result = template 247 .replace(/\{slug\}/g, post.slug) 248 .replace(/\{year\}/g, year) 249 .replace(/\{month\}/g, month) 250 .replace(/\{day\}/g, day) 251 .replace(/\{title\}/g, slugifiedTitle); 252 253 // Replace any remaining {field} tokens with raw frontmatter values 254 result = result.replace(/\{(\w+)\}/g, (_match, field: string) => { 255 const value = post.rawFrontmatter[field]; 256 if (value != null && typeof value === "string") { 257 return value; 258 } 259 return ""; 260 }); 261 262 // Ensure leading slash 263 if (!result.startsWith("/")) { 264 result = `/${result}`; 265 } 266 267 return result; 268} 269 270export function resolvePostPath( 271 post: BlogPost, 272 pathPrefix?: string, 273 pathTemplate?: string, 274): string { 275 if (pathTemplate) { 276 return resolvePathTemplate(pathTemplate, post); 277 } 278 const prefix = pathPrefix || "/posts"; 279 return `${prefix}/${post.slug}`; 280} 281 282export async function getContentHash(content: string): Promise<string> { 283 const encoder = new TextEncoder(); 284 const data = encoder.encode(content); 285 const hashBuffer = await crypto.subtle.digest("SHA-256", data); 286 const hashArray = Array.from(new Uint8Array(hashBuffer)); 287 return hashArray.map((b) => b.toString(16).padStart(2, "0")).join(""); 288} 289 290function shouldIgnore(relativePath: string, ignorePatterns: string[]): boolean { 291 for (const pattern of ignorePatterns) { 292 if (minimatch(relativePath, pattern)) { 293 return true; 294 } 295 } 296 return false; 297} 298 299export interface ScanOptions { 300 frontmatterMapping?: FrontmatterMapping; 301 ignorePatterns?: string[]; 302 slugField?: string; 303 removeIndexFromSlug?: boolean; 304 stripDatePrefix?: boolean; 305} 306 307export async function scanContentDirectory( 308 contentDir: string, 309 frontmatterMappingOrOptions?: FrontmatterMapping | ScanOptions, 310 ignorePatterns: string[] = [], 311): Promise<BlogPost[]> { 312 // Handle both old signature (frontmatterMapping, ignorePatterns) and new signature (options) 313 let options: ScanOptions; 314 if ( 315 frontmatterMappingOrOptions && 316 ("frontmatterMapping" in frontmatterMappingOrOptions || 317 "ignorePatterns" in frontmatterMappingOrOptions || 318 "slugField" in frontmatterMappingOrOptions) 319 ) { 320 options = frontmatterMappingOrOptions as ScanOptions; 321 } else { 322 // Old signature: (contentDir, frontmatterMapping?, ignorePatterns?) 323 options = { 324 frontmatterMapping: frontmatterMappingOrOptions as 325 | FrontmatterMapping 326 | undefined, 327 ignorePatterns, 328 }; 329 } 330 331 const { 332 frontmatterMapping, 333 ignorePatterns: ignore = [], 334 slugField, 335 removeIndexFromSlug, 336 stripDatePrefix, 337 } = options; 338 339 const patterns = ["**/*.md", "**/*.mdx"]; 340 const posts: BlogPost[] = []; 341 342 for (const pattern of patterns) { 343 const files = await glob(pattern, { 344 cwd: contentDir, 345 absolute: false, 346 }); 347 348 for (const relativePath of files) { 349 // Skip files matching ignore patterns 350 if (shouldIgnore(relativePath, ignore)) { 351 continue; 352 } 353 354 const filePath = path.join(contentDir, relativePath); 355 const rawContent = await fs.readFile(filePath, "utf-8"); 356 357 try { 358 const { frontmatter, body, rawFrontmatter } = parseFrontmatter( 359 rawContent, 360 frontmatterMapping, 361 ); 362 const slug = getSlugFromOptions(relativePath, rawFrontmatter, { 363 slugField, 364 removeIndexFromSlug, 365 stripDatePrefix, 366 }); 367 368 posts.push({ 369 filePath, 370 slug, 371 frontmatter, 372 content: body, 373 rawContent, 374 rawFrontmatter, 375 }); 376 } catch (error) { 377 console.error(`Error parsing ${relativePath}:`, error); 378 } 379 } 380 } 381 382 // Sort by publish date (newest first) 383 posts.sort((a, b) => { 384 const dateA = new Date(a.frontmatter.publishDate); 385 const dateB = new Date(b.frontmatter.publishDate); 386 return dateB.getTime() - dateA.getTime(); 387 }); 388 389 return posts; 390} 391 392export function updateFrontmatterWithAtUri( 393 rawContent: string, 394 atUri: string, 395): string { 396 // Detect which delimiter is used (---, +++, or ***) 397 const delimiterMatch = rawContent.match(/^(---|\+\+\+|\*\*\*)/); 398 const delimiter = delimiterMatch?.[1] ?? "---"; 399 const isToml = delimiter === "+++"; 400 401 // Format the atUri entry based on frontmatter type 402 const atUriEntry = isToml ? `atUri = "${atUri}"` : `atUri: "${atUri}"`; 403 404 // Check if atUri already exists in frontmatter (handle both formats) 405 if (rawContent.includes("atUri:") || rawContent.includes("atUri =")) { 406 // Replace existing atUri (match both YAML and TOML formats) 407 return rawContent.replace( 408 /atUri\s*[=:]\s*["']?[^"'\n]+["']?\n?/, 409 `${atUriEntry}\n`, 410 ); 411 } 412 413 // Insert atUri before the closing delimiter 414 const frontmatterEndIndex = rawContent.indexOf(delimiter, 4); 415 if (frontmatterEndIndex === -1) { 416 throw new Error("Could not find frontmatter end"); 417 } 418 419 const beforeEnd = rawContent.slice(0, frontmatterEndIndex); 420 const afterEnd = rawContent.slice(frontmatterEndIndex); 421 422 return `${beforeEnd}${atUriEntry}\n${afterEnd}`; 423} 424 425export function stripMarkdownForText(markdown: string): string { 426 return markdown 427 .replace(/#{1,6}\s/g, "") // Remove headers 428 .replace(/\*\*([^*]+)\*\*/g, "$1") // Remove bold 429 .replace(/\*([^*]+)\*/g, "$1") // Remove italic 430 .replace(/\[([^\]]+)\]\([^)]+\)/g, "$1") // Remove links, keep text 431 .replace(/`{3}[\s\S]*?`{3}/g, "") // Remove code blocks 432 .replace(/`([^`]+)`/g, "$1") // Remove inline code formatting 433 .replace(/!\[.*?\]\(.*?\)/g, "") // Remove images 434 .replace(/\n{3,}/g, "\n\n") // Normalize multiple newlines 435 .trim(); 436}