import { webcrypto as crypto } from "node:crypto"; import * as fs from "node:fs/promises"; import * as path from "node:path"; import { glob } from "glob"; import yaml from "js-yaml"; import { minimatch } from "minimatch"; import * as toml from "smol-toml"; import type { BlogPost, FrontmatterMapping, PostFrontmatter } from "./types"; export function parseFrontmatter( content: string, mapping?: FrontmatterMapping, ): { frontmatter: PostFrontmatter; body: string; rawFrontmatter: Record; } { // Support multiple frontmatter delimiters: // --- (YAML) - Jekyll, Astro, most SSGs // +++ (TOML) - Hugo // *** - Alternative format const frontmatterRegex = /^(---|\+\+\+|\*\*\*)\n([\s\S]*?)\n\1\n([\s\S]*)$/; const match = content.match(frontmatterRegex); if (!match) { const [, titleMatch] = content.trim().match(/^# (.+)$/m) || []; const title = titleMatch ?? ""; const [publishDate] = new Date().toISOString().split("T"); return { frontmatter: { title, publishDate: publishDate ?? "", }, body: content, rawFrontmatter: { title: publishDate, }, }; } const delimiter = match[1]; const frontmatterStr = match[2] ?? ""; const body = match[3] ?? ""; // Determine format based on delimiter: // +++ uses TOML (key = value) // --- and *** use YAML (key: value) const isToml = delimiter === "+++"; // Parse frontmatter using the appropriate library let raw: Record; if (isToml) { raw = toml.parse(frontmatterStr) as Record; } else { // Use CORE_SCHEMA to keep dates as strings rather than Date objects raw = (yaml.load(frontmatterStr, { schema: yaml.CORE_SCHEMA }) as Record< string, unknown >) ?? {}; } // Apply field mappings to normalize to standard PostFrontmatter fields const frontmatter: Record = {}; // Title mapping const titleField = mapping?.title || "title"; frontmatter.title = raw[titleField] || raw.title; // Description mapping const descField = mapping?.description || "description"; frontmatter.description = raw[descField] || raw.description; // Publish date mapping - check custom field first, then fallbacks const dateField = mapping?.publishDate; if (dateField && raw[dateField]) { frontmatter.publishDate = raw[dateField]; } else if (raw.publishDate) { frontmatter.publishDate = raw.publishDate; } else { // Fallback to common date field names const dateFields = ["pubDate", "date", "createdAt", "created_at"]; for (const field of dateFields) { if (raw[field]) { frontmatter.publishDate = raw[field]; break; } } } // Cover image mapping const coverField = mapping?.coverImage || "ogImage"; frontmatter.ogImage = raw[coverField] || raw.ogImage; // Theme, font family and font size frontmatter.theme = raw.theme; frontmatter.fontFamily = raw.fontFamily; frontmatter.fontSize = raw.fontSize; // Tags mapping const tagsField = mapping?.tags || "tags"; frontmatter.tags = raw[tagsField] || raw.tags; // Draft mapping const draftField = mapping?.draft || "draft"; const draftValue = raw[draftField] ?? raw.draft; if (draftValue !== undefined) { frontmatter.draft = draftValue === true || draftValue === "true"; } // Discoverable mapping (defaults to true if not set or invalid) const discoverableValue = raw.discoverable; if (discoverableValue === false || discoverableValue === "false") { frontmatter.discoverable = false; } else { frontmatter.discoverable = true; } // Always preserve atUri (internal field) frontmatter.atUri = raw.atUri; return { frontmatter: frontmatter as unknown as PostFrontmatter, body, rawFrontmatter: raw, }; } export function getSlugFromFilename(filename: string): string { return filename .replace(/\.mdx?$/, "") .toLowerCase() .replace(/\s+/g, "-"); } export interface SlugOptions { slugField?: string; removeIndexFromSlug?: boolean; stripDatePrefix?: boolean; } export function getSlugFromOptions( relativePath: string, rawFrontmatter: Record, options: SlugOptions = {}, ): string { const { slugField, removeIndexFromSlug = false, stripDatePrefix = false, } = options; let slug: string; // If slugField is set, try to get the value from frontmatter if (slugField) { const frontmatterValue = rawFrontmatter[slugField]; if (frontmatterValue && typeof frontmatterValue === "string") { // Remove leading slash if present slug = frontmatterValue .replace(/^\//, "") .toLowerCase() .replace(/\s+/g, "-"); } else { // Fallback to filepath if frontmatter field not found slug = relativePath .replace(/\.mdx?$/, "") .toLowerCase() .replace(/\s+/g, "-"); } } else { // Default: use filepath slug = relativePath .replace(/\.mdx?$/, "") .toLowerCase() .replace(/\s+/g, "-"); } // Remove /index or /_index suffix if configured if (removeIndexFromSlug) { slug = slug.replace(/\/_?index$/, ""); } // Strip Jekyll-style date prefix (YYYY-MM-DD-) from filename if (stripDatePrefix) { slug = slug.replace(/(^|\/)(\d{4}-\d{2}-\d{2})-/g, "$1"); } return slug; } export function slugifyTitle(title: string): string { return (title || "") .toLowerCase() .replace(/\s+/g, "-") .replace(/[^\w-]/g, ""); } export async function getContentHash(content: string): Promise { const encoder = new TextEncoder(); const data = encoder.encode(content); const hashBuffer = await crypto.subtle.digest("SHA-256", data); const hashArray = Array.from(new Uint8Array(hashBuffer)); return hashArray.map((b) => b.toString(16).padStart(2, "0")).join(""); } function shouldIgnore(relativePath: string, ignorePatterns: string[]): boolean { for (const pattern of ignorePatterns) { if (minimatch(relativePath, pattern)) { return true; } } return false; } export interface ScanOptions { frontmatterMapping?: FrontmatterMapping; ignorePatterns?: string[]; slugField?: string; removeIndexFromSlug?: boolean; stripDatePrefix?: boolean; } export async function scanContentDirectory( contentDir: string, frontmatterMappingOrOptions?: FrontmatterMapping | ScanOptions, ignorePatterns: string[] = [], ): Promise { // Handle both old signature (frontmatterMapping, ignorePatterns) and new signature (options) let options: ScanOptions; if ( frontmatterMappingOrOptions && ("frontmatterMapping" in frontmatterMappingOrOptions || "ignorePatterns" in frontmatterMappingOrOptions || "slugField" in frontmatterMappingOrOptions) ) { options = frontmatterMappingOrOptions as ScanOptions; } else { // Old signature: (contentDir, frontmatterMapping?, ignorePatterns?) options = { frontmatterMapping: frontmatterMappingOrOptions as | FrontmatterMapping | undefined, ignorePatterns, }; } const { frontmatterMapping, ignorePatterns: ignore = [], slugField, removeIndexFromSlug, stripDatePrefix, } = options; const patterns = ["**/*.md", "**/*.mdx"]; const posts: BlogPost[] = []; for (const pattern of patterns) { const files = await glob(pattern, { cwd: contentDir, absolute: false, }); for (const relativePath of files) { // Skip files matching ignore patterns if (shouldIgnore(relativePath, ignore)) { continue; } const filePath = path.join(contentDir, relativePath); const rawContent = await fs.readFile(filePath, "utf-8"); try { const { frontmatter, body, rawFrontmatter } = parseFrontmatter( rawContent, frontmatterMapping, ); const slug = getSlugFromOptions(relativePath, rawFrontmatter, { slugField, removeIndexFromSlug, stripDatePrefix, }); posts.push({ filePath, slug, frontmatter, content: body, rawContent, rawFrontmatter, }); } catch (error) { console.error(`Error parsing ${relativePath}:`, error); } } } // Sort by publish date (newest first) posts.sort((a, b) => { const dateA = new Date(a.frontmatter.publishDate); const dateB = new Date(b.frontmatter.publishDate); return dateB.getTime() - dateA.getTime(); }); return posts; } export function updateFrontmatterWithAtUri( rawContent: string, atUri: string, ): string { // Detect which delimiter is used (---, +++, or ***) const delimiterMatch = rawContent.match(/^(---|\+\+\+|\*\*\*)/); const delimiter = delimiterMatch?.[1] ?? "---"; const isToml = delimiter === "+++"; // Format the atUri entry based on frontmatter type const atUriEntry = isToml ? `atUri = "${atUri}"` : `atUri: "${atUri}"`; // No frontmatter: create one with atUri if (!delimiterMatch) { return `---\n${atUriEntry}\n---\n\n${rawContent}`; } // Check if atUri already exists in frontmatter (handle both formats) if (rawContent.includes("atUri:") || rawContent.includes("atUri =")) { // Replace existing atUri (match both YAML and TOML formats) return rawContent.replace( /atUri\s*[=:]\s*["']?[^"'\n]+["']?\n?/, `${atUriEntry}\n`, ); } // Insert atUri before the closing delimiter const frontmatterEndIndex = rawContent.indexOf(delimiter, 4); if (frontmatterEndIndex === -1) { throw new Error("Could not find frontmatter end"); } const beforeEnd = rawContent.slice(0, frontmatterEndIndex); const afterEnd = rawContent.slice(frontmatterEndIndex); return `${beforeEnd}${atUriEntry}\n${afterEnd}`; } export function removeFrontmatterAtUri(rawContent: string): string { const frontmatterRegex = /^(---|\+\+\+|\*\*\*)\n([\s\S]*?)\n\1\n/; const match = rawContent.match(frontmatterRegex); if (!match) return rawContent; const delimiter = match[1]; const frontmatterStr = match[2] ?? ""; // Remove the atUri line const lines = frontmatterStr .split("\n") .filter((line) => !line.match(/^\s*atUri\s*[=:]\s*/)); // Check if remaining frontmatter has any non-empty lines const hasContent = lines.some((line) => line.trim() !== ""); const afterFrontmatter = rawContent.slice(match[0].length); if (!hasContent) { // Remove entire frontmatter block, trim leading newlines return afterFrontmatter.replace(/^\n+/, ""); } return `${delimiter}\n${lines.join("\n")}\n${delimiter}\n${afterFrontmatter}`; } export function stripMarkdownForText(markdown: string): string { return markdown .replace(/#{1,6}\s/g, "") // Remove headers .replace(/\*\*([^*]+)\*\*/g, "$1") // Remove bold .replace(/\*([^*]+)\*/g, "$1") // Remove italic .replace(/\[([^\]]+)\]\([^)]+\)/g, "$1") // Remove links, keep text .replace(/`{3}[\s\S]*?`{3}/g, "") // Remove code blocks .replace(/`([^`]+)`/g, "$1") // Remove inline code formatting .replace(/!\[.*?\]\(.*?\)/g, "") // Remove images .replace(/\n{3,}/g, "\n\n") // Normalize multiple newlines .trim(); } export function getTextContent( post: { content: string; rawFrontmatter?: Record }, textContentField?: string, ): string { if (textContentField && post.rawFrontmatter?.[textContentField]) { return String(post.rawFrontmatter[textContentField]); } return stripMarkdownForText(post.content); } const NOTE_CONTENT_MAX = 30000; export async function computeNoteHash(post: BlogPost): Promise { const key = [ post.content.trim().slice(0, NOTE_CONTENT_MAX), post.frontmatter.theme ?? "", String(post.frontmatter.fontSize ?? ""), post.frontmatter.fontFamily ?? "", String(post.frontmatter.discoverable ?? true), ].join("\0"); return getContentHash(key); }