this repo has no description
at main 395 lines 11 kB view raw
1import { webcrypto as crypto } from "node:crypto"; 2import * as fs from "node:fs/promises"; 3import * as path from "node:path"; 4import { glob } from "glob"; 5import yaml from "js-yaml"; 6import { minimatch } from "minimatch"; 7import * as toml from "smol-toml"; 8import type { BlogPost, FrontmatterMapping, PostFrontmatter } from "./types"; 9 10export function parseFrontmatter( 11 content: string, 12 mapping?: FrontmatterMapping, 13): { 14 frontmatter: PostFrontmatter; 15 body: string; 16 rawFrontmatter: Record<string, unknown>; 17} { 18 // Support multiple frontmatter delimiters: 19 // --- (YAML) - Jekyll, Astro, most SSGs 20 // +++ (TOML) - Hugo 21 // *** - Alternative format 22 const frontmatterRegex = /^(---|\+\+\+|\*\*\*)\n([\s\S]*?)\n\1\n([\s\S]*)$/; 23 const match = content.match(frontmatterRegex); 24 25 if (!match) { 26 const [, titleMatch] = content.trim().match(/^# (.+)$/m) || []; 27 const title = titleMatch ?? ""; 28 const [publishDate] = new Date().toISOString().split("T"); 29 30 return { 31 frontmatter: { 32 title, 33 publishDate: publishDate ?? "", 34 }, 35 body: content, 36 rawFrontmatter: { 37 title: publishDate, 38 }, 39 }; 40 } 41 42 const delimiter = match[1]; 43 const frontmatterStr = match[2] ?? ""; 44 const body = match[3] ?? ""; 45 46 // Determine format based on delimiter: 47 // +++ uses TOML (key = value) 48 // --- and *** use YAML (key: value) 49 const isToml = delimiter === "+++"; 50 51 // Parse frontmatter using the appropriate library 52 let raw: Record<string, unknown>; 53 if (isToml) { 54 raw = toml.parse(frontmatterStr) as Record<string, unknown>; 55 } else { 56 // Use CORE_SCHEMA to keep dates as strings rather than Date objects 57 raw = 58 (yaml.load(frontmatterStr, { schema: yaml.CORE_SCHEMA }) as Record< 59 string, 60 unknown 61 >) ?? {}; 62 } 63 64 // Apply field mappings to normalize to standard PostFrontmatter fields 65 const frontmatter: Record<string, unknown> = {}; 66 67 // Title mapping 68 const titleField = mapping?.title || "title"; 69 frontmatter.title = raw[titleField] || raw.title; 70 71 // Description mapping 72 const descField = mapping?.description || "description"; 73 frontmatter.description = raw[descField] || raw.description; 74 75 // Publish date mapping - check custom field first, then fallbacks 76 const dateField = mapping?.publishDate; 77 if (dateField && raw[dateField]) { 78 frontmatter.publishDate = raw[dateField]; 79 } else if (raw.publishDate) { 80 frontmatter.publishDate = raw.publishDate; 81 } else { 82 // Fallback to common date field names 83 const dateFields = ["pubDate", "date", "createdAt", "created_at"]; 84 for (const field of dateFields) { 85 if (raw[field]) { 86 frontmatter.publishDate = raw[field]; 87 break; 88 } 89 } 90 } 91 92 // Cover image mapping 93 const coverField = mapping?.coverImage || "ogImage"; 94 frontmatter.ogImage = raw[coverField] || raw.ogImage; 95 96 // Theme, font family and font size 97 frontmatter.theme = raw.theme; 98 frontmatter.fontFamily = raw.fontFamily; 99 frontmatter.fontSize = raw.fontSize; 100 101 // Tags mapping 102 const tagsField = mapping?.tags || "tags"; 103 frontmatter.tags = raw[tagsField] || raw.tags; 104 105 // Draft mapping 106 const draftField = mapping?.draft || "draft"; 107 const draftValue = raw[draftField] ?? raw.draft; 108 if (draftValue !== undefined) { 109 frontmatter.draft = draftValue === true || draftValue === "true"; 110 } 111 112 // Discoverable mapping (defaults to true if not set or invalid) 113 const discoverableValue = raw.discoverable; 114 if (discoverableValue === false || discoverableValue === "false") { 115 frontmatter.discoverable = false; 116 } else { 117 frontmatter.discoverable = true; 118 } 119 120 // Always preserve atUri (internal field) 121 frontmatter.atUri = raw.atUri; 122 123 return { 124 frontmatter: frontmatter as unknown as PostFrontmatter, 125 body, 126 rawFrontmatter: raw, 127 }; 128} 129 130export function getSlugFromFilename(filename: string): string { 131 return filename 132 .replace(/\.mdx?$/, "") 133 .toLowerCase() 134 .replace(/\s+/g, "-"); 135} 136 137export interface SlugOptions { 138 slugField?: string; 139 removeIndexFromSlug?: boolean; 140 stripDatePrefix?: boolean; 141} 142 143export function getSlugFromOptions( 144 relativePath: string, 145 rawFrontmatter: Record<string, unknown>, 146 options: SlugOptions = {}, 147): string { 148 const { 149 slugField, 150 removeIndexFromSlug = false, 151 stripDatePrefix = false, 152 } = options; 153 154 let slug: string; 155 156 // If slugField is set, try to get the value from frontmatter 157 if (slugField) { 158 const frontmatterValue = rawFrontmatter[slugField]; 159 if (frontmatterValue && typeof frontmatterValue === "string") { 160 // Remove leading slash if present 161 slug = frontmatterValue 162 .replace(/^\//, "") 163 .toLowerCase() 164 .replace(/\s+/g, "-"); 165 } else { 166 // Fallback to filepath if frontmatter field not found 167 slug = relativePath 168 .replace(/\.mdx?$/, "") 169 .toLowerCase() 170 .replace(/\s+/g, "-"); 171 } 172 } else { 173 // Default: use filepath 174 slug = relativePath 175 .replace(/\.mdx?$/, "") 176 .toLowerCase() 177 .replace(/\s+/g, "-"); 178 } 179 180 // Remove /index or /_index suffix if configured 181 if (removeIndexFromSlug) { 182 slug = slug.replace(/\/_?index$/, ""); 183 } 184 185 // Strip Jekyll-style date prefix (YYYY-MM-DD-) from filename 186 if (stripDatePrefix) { 187 slug = slug.replace(/(^|\/)(\d{4}-\d{2}-\d{2})-/g, "$1"); 188 } 189 190 return slug; 191} 192 193export function slugifyTitle(title: string): string { 194 return (title || "") 195 .toLowerCase() 196 .replace(/\s+/g, "-") 197 .replace(/[^\w-]/g, ""); 198} 199 200export async function getContentHash(content: string): Promise<string> { 201 const encoder = new TextEncoder(); 202 const data = encoder.encode(content); 203 const hashBuffer = await crypto.subtle.digest("SHA-256", data); 204 const hashArray = Array.from(new Uint8Array(hashBuffer)); 205 return hashArray.map((b) => b.toString(16).padStart(2, "0")).join(""); 206} 207 208function shouldIgnore(relativePath: string, ignorePatterns: string[]): boolean { 209 for (const pattern of ignorePatterns) { 210 if (minimatch(relativePath, pattern)) { 211 return true; 212 } 213 } 214 return false; 215} 216 217export interface ScanOptions { 218 frontmatterMapping?: FrontmatterMapping; 219 ignorePatterns?: string[]; 220 slugField?: string; 221 removeIndexFromSlug?: boolean; 222 stripDatePrefix?: boolean; 223} 224 225export async function scanContentDirectory( 226 contentDir: string, 227 frontmatterMappingOrOptions?: FrontmatterMapping | ScanOptions, 228 ignorePatterns: string[] = [], 229): Promise<BlogPost[]> { 230 // Handle both old signature (frontmatterMapping, ignorePatterns) and new signature (options) 231 let options: ScanOptions; 232 if ( 233 frontmatterMappingOrOptions && 234 ("frontmatterMapping" in frontmatterMappingOrOptions || 235 "ignorePatterns" in frontmatterMappingOrOptions || 236 "slugField" in frontmatterMappingOrOptions) 237 ) { 238 options = frontmatterMappingOrOptions as ScanOptions; 239 } else { 240 // Old signature: (contentDir, frontmatterMapping?, ignorePatterns?) 241 options = { 242 frontmatterMapping: frontmatterMappingOrOptions as 243 | FrontmatterMapping 244 | undefined, 245 ignorePatterns, 246 }; 247 } 248 249 const { 250 frontmatterMapping, 251 ignorePatterns: ignore = [], 252 slugField, 253 removeIndexFromSlug, 254 stripDatePrefix, 255 } = options; 256 257 const patterns = ["**/*.md", "**/*.mdx"]; 258 const posts: BlogPost[] = []; 259 260 for (const pattern of patterns) { 261 const files = await glob(pattern, { 262 cwd: contentDir, 263 absolute: false, 264 }); 265 266 for (const relativePath of files) { 267 // Skip files matching ignore patterns 268 if (shouldIgnore(relativePath, ignore)) { 269 continue; 270 } 271 272 const filePath = path.join(contentDir, relativePath); 273 const rawContent = await fs.readFile(filePath, "utf-8"); 274 275 try { 276 const { frontmatter, body, rawFrontmatter } = parseFrontmatter( 277 rawContent, 278 frontmatterMapping, 279 ); 280 const slug = getSlugFromOptions(relativePath, rawFrontmatter, { 281 slugField, 282 removeIndexFromSlug, 283 stripDatePrefix, 284 }); 285 286 posts.push({ 287 filePath, 288 slug, 289 frontmatter, 290 content: body, 291 rawContent, 292 rawFrontmatter, 293 }); 294 } catch (error) { 295 console.error(`Error parsing ${relativePath}:`, error); 296 } 297 } 298 } 299 300 // Sort by publish date (newest first) 301 posts.sort((a, b) => { 302 const dateA = new Date(a.frontmatter.publishDate); 303 const dateB = new Date(b.frontmatter.publishDate); 304 return dateB.getTime() - dateA.getTime(); 305 }); 306 307 return posts; 308} 309 310export function updateFrontmatterWithAtUri( 311 rawContent: string, 312 atUri: string, 313): string { 314 // Detect which delimiter is used (---, +++, or ***) 315 const delimiterMatch = rawContent.match(/^(---|\+\+\+|\*\*\*)/); 316 const delimiter = delimiterMatch?.[1] ?? "---"; 317 const isToml = delimiter === "+++"; 318 319 // Format the atUri entry based on frontmatter type 320 const atUriEntry = isToml ? `atUri = "${atUri}"` : `atUri: "${atUri}"`; 321 322 // No frontmatter: create one with atUri 323 if (!delimiterMatch) { 324 return `---\n${atUriEntry}\n---\n\n${rawContent}`; 325 } 326 327 // Check if atUri already exists in frontmatter (handle both formats) 328 if (rawContent.includes("atUri:") || rawContent.includes("atUri =")) { 329 // Replace existing atUri (match both YAML and TOML formats) 330 return rawContent.replace( 331 /atUri\s*[=:]\s*["']?[^"'\n]+["']?\n?/, 332 `${atUriEntry}\n`, 333 ); 334 } 335 336 // Insert atUri before the closing delimiter 337 const frontmatterEndIndex = rawContent.indexOf(delimiter, 4); 338 if (frontmatterEndIndex === -1) { 339 throw new Error("Could not find frontmatter end"); 340 } 341 342 const beforeEnd = rawContent.slice(0, frontmatterEndIndex); 343 const afterEnd = rawContent.slice(frontmatterEndIndex); 344 345 return `${beforeEnd}${atUriEntry}\n${afterEnd}`; 346} 347 348export function removeFrontmatterAtUri(rawContent: string): string { 349 const frontmatterRegex = /^(---|\+\+\+|\*\*\*)\n([\s\S]*?)\n\1\n/; 350 const match = rawContent.match(frontmatterRegex); 351 if (!match) return rawContent; 352 353 const delimiter = match[1]; 354 const frontmatterStr = match[2] ?? ""; 355 356 // Remove the atUri line 357 const lines = frontmatterStr 358 .split("\n") 359 .filter((line) => !line.match(/^\s*atUri\s*[=:]\s*/)); 360 361 // Check if remaining frontmatter has any non-empty lines 362 const hasContent = lines.some((line) => line.trim() !== ""); 363 364 const afterFrontmatter = rawContent.slice(match[0].length); 365 366 if (!hasContent) { 367 // Remove entire frontmatter block, trim leading newlines 368 return afterFrontmatter.replace(/^\n+/, ""); 369 } 370 371 return `${delimiter}\n${lines.join("\n")}\n${delimiter}\n${afterFrontmatter}`; 372} 373 374export function stripMarkdownForText(markdown: string): string { 375 return markdown 376 .replace(/#{1,6}\s/g, "") // Remove headers 377 .replace(/\*\*([^*]+)\*\*/g, "$1") // Remove bold 378 .replace(/\*([^*]+)\*/g, "$1") // Remove italic 379 .replace(/\[([^\]]+)\]\([^)]+\)/g, "$1") // Remove links, keep text 380 .replace(/`{3}[\s\S]*?`{3}/g, "") // Remove code blocks 381 .replace(/`([^`]+)`/g, "$1") // Remove inline code formatting 382 .replace(/!\[.*?\]\(.*?\)/g, "") // Remove images 383 .replace(/\n{3,}/g, "\n\n") // Normalize multiple newlines 384 .trim(); 385} 386 387export function getTextContent( 388 post: { content: string; rawFrontmatter?: Record<string, unknown> }, 389 textContentField?: string, 390): string { 391 if (textContentField && post.rawFrontmatter?.[textContentField]) { 392 return String(post.rawFrontmatter[textContentField]); 393 } 394 return stripMarkdownForText(post.content); 395}