A CLI for publishing standard.site documents to ATProto
at main 391 lines 11 kB view raw
1import * as fs from "node:fs/promises"; 2import * as path from "node:path"; 3import { glob } from "glob"; 4import yaml from "js-yaml"; 5import { minimatch } from "minimatch"; 6import * as toml from "smol-toml"; 7import type { BlogPost, FrontmatterMapping, PostFrontmatter } from "./types"; 8 9export function parseFrontmatter( 10 content: string, 11 mapping?: FrontmatterMapping, 12): { 13 frontmatter: PostFrontmatter; 14 body: string; 15 rawFrontmatter: Record<string, unknown>; 16} { 17 // Support multiple frontmatter delimiters: 18 // --- (YAML) - Jekyll, Astro, most SSGs 19 // +++ (TOML) - Hugo 20 // *** - Alternative format 21 const frontmatterRegex = /^(---|\+\+\+|\*\*\*)\n([\s\S]*?)\n\1\n([\s\S]*)$/; 22 const match = content.match(frontmatterRegex); 23 24 if (!match) { 25 throw new Error("Could not parse frontmatter"); 26 } 27 28 const delimiter = match[1]; 29 const frontmatterStr = match[2] ?? ""; 30 const body = match[3] ?? ""; 31 32 // Determine format based on delimiter: 33 // +++ uses TOML (key = value) 34 // --- and *** use YAML (key: value) 35 const isToml = delimiter === "+++"; 36 37 // Parse frontmatter using the appropriate library 38 let raw: Record<string, unknown>; 39 if (isToml) { 40 raw = toml.parse(frontmatterStr) as Record<string, unknown>; 41 } else { 42 // Use CORE_SCHEMA to keep dates as strings rather than Date objects 43 raw = 44 (yaml.load(frontmatterStr, { schema: yaml.CORE_SCHEMA }) as Record< 45 string, 46 unknown 47 >) ?? {}; 48 } 49 50 // Apply field mappings to normalize to standard PostFrontmatter fields 51 const frontmatter: Record<string, unknown> = {}; 52 53 // Title mapping 54 const titleField = mapping?.title || "title"; 55 frontmatter.title = raw[titleField] || raw.title; 56 57 // Description mapping 58 const descField = mapping?.description || "description"; 59 frontmatter.description = raw[descField] || raw.description; 60 61 // Publish date mapping - check custom field first, then fallbacks 62 const dateField = mapping?.publishDate; 63 if (dateField && raw[dateField]) { 64 frontmatter.publishDate = raw[dateField]; 65 } else if (raw.publishDate) { 66 frontmatter.publishDate = raw.publishDate; 67 } else { 68 // Fallback to common date field names 69 const dateFields = ["pubDate", "date", "createdAt", "created_at"]; 70 for (const field of dateFields) { 71 if (raw[field]) { 72 frontmatter.publishDate = raw[field]; 73 break; 74 } 75 } 76 } 77 78 // Updated date mapping - check custom field first, then fallbacks 79 const updatedAtField = mapping?.updatedAt; 80 if (updatedAtField && raw[updatedAtField]) { 81 frontmatter.updatedAt = raw[updatedAtField]; 82 } else if (raw.updatedAt) { 83 frontmatter.updatedAt = raw.updatedAt; 84 } else { 85 // Fallback to common date field names 86 const updatedAtFields = ["updated_at", "modifiedAt", "modified_at"]; 87 for (const field of updatedAtFields) { 88 if (raw[field]) { 89 frontmatter.updatedAt = raw[field]; 90 break; 91 } 92 } 93 } 94 95 // Cover image mapping 96 const coverField = mapping?.coverImage || "ogImage"; 97 frontmatter.ogImage = raw[coverField] || raw.ogImage; 98 99 // Tags mapping 100 const tagsField = mapping?.tags || "tags"; 101 frontmatter.tags = raw[tagsField] || raw.tags; 102 103 // Draft mapping 104 const draftField = mapping?.draft || "draft"; 105 const draftValue = raw[draftField] ?? raw.draft; 106 if (draftValue !== undefined) { 107 frontmatter.draft = draftValue === true || draftValue === "true"; 108 } 109 110 // Always preserve atUri (internal field) 111 frontmatter.atUri = raw.atUri; 112 113 return { 114 frontmatter: frontmatter as unknown as PostFrontmatter, 115 body, 116 rawFrontmatter: raw, 117 }; 118} 119 120export function getSlugFromFilename(filename: string): string { 121 return filename 122 .replace(/\.mdx?$/, "") 123 .toLowerCase() 124 .replace(/\s+/g, "-"); 125} 126 127export interface SlugOptions { 128 slugField?: string; 129 removeIndexFromSlug?: boolean; 130 stripDatePrefix?: boolean; 131} 132 133export function getSlugFromOptions( 134 relativePath: string, 135 rawFrontmatter: Record<string, unknown>, 136 options: SlugOptions = {}, 137): string { 138 const { 139 slugField, 140 removeIndexFromSlug = false, 141 stripDatePrefix = false, 142 } = options; 143 144 let slug: string; 145 146 // If slugField is set, try to get the value from frontmatter 147 if (slugField) { 148 const frontmatterValue = rawFrontmatter[slugField]; 149 if (frontmatterValue && typeof frontmatterValue === "string") { 150 // Remove leading slash if present 151 slug = frontmatterValue 152 .replace(/^\//, "") 153 .toLowerCase() 154 .replace(/\s+/g, "-"); 155 } else { 156 // Fallback to filepath if frontmatter field not found 157 slug = relativePath 158 .replace(/\.mdx?$/, "") 159 .toLowerCase() 160 .replace(/\s+/g, "-"); 161 } 162 } else { 163 // Default: use filepath 164 slug = relativePath 165 .replace(/\.mdx?$/, "") 166 .toLowerCase() 167 .replace(/\s+/g, "-"); 168 } 169 170 // Remove /index or /_index suffix if configured 171 if (removeIndexFromSlug) { 172 slug = slug.replace(/\/_?index$/, ""); 173 } 174 175 // Strip Jekyll-style date prefix (YYYY-MM-DD-) from filename 176 if (stripDatePrefix) { 177 slug = slug.replace(/(^|\/)(\d{4}-\d{2}-\d{2})-/g, "$1"); 178 } 179 180 return slug; 181} 182 183export function resolvePathTemplate(template: string, post: BlogPost): string { 184 const publishDate = new Date(post.frontmatter.publishDate); 185 const year = String(publishDate.getFullYear()); 186 const yearUTC = String(publishDate.getUTCFullYear()); 187 const month = String(publishDate.getMonth() + 1).padStart(2, "0"); 188 const monthUTC = String(publishDate.getUTCMonth() + 1).padStart(2, "0"); 189 const day = String(publishDate.getDate()).padStart(2, "0"); 190 const dayUTC = String(publishDate.getUTCDate()).padStart(2, "0"); 191 192 const slugifiedTitle = (post.frontmatter.title || "") 193 .toLowerCase() 194 .replace(/\s+/g, "-") 195 .replace(/[^\w-]/g, ""); 196 197 // Replace known tokens 198 let result = template 199 .replace(/\{slug\}/g, post.slug) 200 .replace(/\{year\}/g, year) 201 .replace(/\{yearUTC\}/g, yearUTC) 202 .replace(/\{month\}/g, month) 203 .replace(/\{monthUTC\}/g, monthUTC) 204 .replace(/\{day\}/g, day) 205 .replace(/\{dayUTC\}/g, dayUTC) 206 .replace(/\{title\}/g, slugifiedTitle); 207 208 // Replace any remaining {field} tokens with raw frontmatter values 209 result = result.replace(/\{(\w+)\}/g, (_match, field: string) => { 210 const value = post.rawFrontmatter[field]; 211 if (value != null && typeof value === "string") { 212 return value; 213 } 214 return ""; 215 }); 216 217 // Ensure leading slash 218 if (!result.startsWith("/")) { 219 result = `/${result}`; 220 } 221 222 return result; 223} 224 225export function resolvePostPath( 226 post: BlogPost, 227 pathPrefix?: string, 228 pathTemplate?: string, 229): string { 230 if (pathTemplate) { 231 return resolvePathTemplate(pathTemplate, post); 232 } 233 const prefix = pathPrefix || "/posts"; 234 return `${prefix}/${post.slug}`; 235} 236 237export async function getContentHash(content: string): Promise<string> { 238 const encoder = new TextEncoder(); 239 const data = encoder.encode(content); 240 const hashBuffer = await crypto.subtle.digest("SHA-256", data); 241 const hashArray = Array.from(new Uint8Array(hashBuffer)); 242 return hashArray.map((b) => b.toString(16).padStart(2, "0")).join(""); 243} 244 245function shouldIgnore(relativePath: string, ignorePatterns: string[]): boolean { 246 for (const pattern of ignorePatterns) { 247 if (minimatch(relativePath, pattern)) { 248 return true; 249 } 250 } 251 return false; 252} 253 254export interface ScanOptions { 255 frontmatterMapping?: FrontmatterMapping; 256 ignorePatterns?: string[]; 257 slugField?: string; 258 removeIndexFromSlug?: boolean; 259 stripDatePrefix?: boolean; 260} 261 262export async function scanContentDirectory( 263 contentDir: string, 264 frontmatterMappingOrOptions?: FrontmatterMapping | ScanOptions, 265 ignorePatterns: string[] = [], 266): Promise<BlogPost[]> { 267 // Handle both old signature (frontmatterMapping, ignorePatterns) and new signature (options) 268 let options: ScanOptions; 269 if ( 270 frontmatterMappingOrOptions && 271 ("frontmatterMapping" in frontmatterMappingOrOptions || 272 "ignorePatterns" in frontmatterMappingOrOptions || 273 "slugField" in frontmatterMappingOrOptions) 274 ) { 275 options = frontmatterMappingOrOptions as ScanOptions; 276 } else { 277 // Old signature: (contentDir, frontmatterMapping?, ignorePatterns?) 278 options = { 279 frontmatterMapping: frontmatterMappingOrOptions as 280 | FrontmatterMapping 281 | undefined, 282 ignorePatterns, 283 }; 284 } 285 286 const { 287 frontmatterMapping, 288 ignorePatterns: ignore = [], 289 slugField, 290 removeIndexFromSlug, 291 stripDatePrefix, 292 } = options; 293 294 const patterns = ["**/*.md", "**/*.mdx"]; 295 const posts: BlogPost[] = []; 296 297 for (const pattern of patterns) { 298 const files = await glob(pattern, { 299 cwd: contentDir, 300 absolute: false, 301 }); 302 303 for (const relativePath of files) { 304 // Skip files matching ignore patterns 305 if (shouldIgnore(relativePath, ignore)) { 306 continue; 307 } 308 309 const filePath = path.join(contentDir, relativePath); 310 const rawContent = await fs.readFile(filePath, "utf-8"); 311 312 try { 313 const { frontmatter, body, rawFrontmatter } = parseFrontmatter( 314 rawContent, 315 frontmatterMapping, 316 ); 317 const slug = getSlugFromOptions(relativePath, rawFrontmatter, { 318 slugField, 319 removeIndexFromSlug, 320 stripDatePrefix, 321 }); 322 323 posts.push({ 324 filePath, 325 slug, 326 frontmatter, 327 content: body, 328 rawContent, 329 rawFrontmatter, 330 }); 331 } catch (error) { 332 console.error(`Error parsing ${relativePath}:`, error); 333 } 334 } 335 } 336 337 // Sort by publish date (newest first) 338 posts.sort((a, b) => { 339 const dateA = new Date(a.frontmatter.publishDate); 340 const dateB = new Date(b.frontmatter.publishDate); 341 return dateB.getTime() - dateA.getTime(); 342 }); 343 344 return posts; 345} 346 347export function updateFrontmatterWithAtUri( 348 rawContent: string, 349 atUri: string, 350): string { 351 // Detect which delimiter is used (---, +++, or ***) 352 const delimiterMatch = rawContent.match(/^(---|\+\+\+|\*\*\*)/); 353 const delimiter = delimiterMatch?.[1] ?? "---"; 354 const isToml = delimiter === "+++"; 355 356 // Format the atUri entry based on frontmatter type 357 const atUriEntry = isToml ? `atUri = "${atUri}"` : `atUri: "${atUri}"`; 358 359 // Check if atUri already exists in frontmatter (handle both formats) 360 if (rawContent.includes("atUri:") || rawContent.includes("atUri =")) { 361 // Replace existing atUri (match both YAML and TOML formats) 362 return rawContent.replace( 363 /atUri\s*[=:]\s*["']?[^"'\n]+["']?\n?/, 364 `${atUriEntry}\n`, 365 ); 366 } 367 368 // Insert atUri before the closing delimiter 369 const frontmatterEndIndex = rawContent.indexOf(delimiter, 4); 370 if (frontmatterEndIndex === -1) { 371 throw new Error("Could not find frontmatter end"); 372 } 373 374 const beforeEnd = rawContent.slice(0, frontmatterEndIndex); 375 const afterEnd = rawContent.slice(frontmatterEndIndex); 376 377 return `${beforeEnd}${atUriEntry}\n${afterEnd}`; 378} 379 380export function stripMarkdownForText(markdown: string): string { 381 return markdown 382 .replace(/#{1,6}\s/g, "") // Remove headers 383 .replace(/\*\*([^*]+)\*\*/g, "$1") // Remove bold 384 .replace(/\*([^*]+)\*/g, "$1") // Remove italic 385 .replace(/\[([^\]]+)\]\([^)]+\)/g, "$1") // Remove links, keep text 386 .replace(/`{3}[\s\S]*?`{3}/g, "") // Remove code blocks 387 .replace(/`([^`]+)`/g, "$1") // Remove inline code formatting 388 .replace(/!\[.*?\]\(.*?\)/g, "") // Remove images 389 .replace(/\n{3,}/g, "\n\n") // Normalize multiple newlines 390 .trim(); 391}