this repo has no description
1import { webcrypto as crypto } from "node:crypto";
2import * as fs from "node:fs/promises";
3import * as path from "node:path";
4import { glob } from "glob";
5import yaml from "js-yaml";
6import { minimatch } from "minimatch";
7import * as toml from "smol-toml";
8import type { BlogPost, FrontmatterMapping, PostFrontmatter } from "./types";
9
10export function parseFrontmatter(
11 content: string,
12 mapping?: FrontmatterMapping,
13): {
14 frontmatter: PostFrontmatter;
15 body: string;
16 rawFrontmatter: Record<string, unknown>;
17} {
18 // Support multiple frontmatter delimiters:
19 // --- (YAML) - Jekyll, Astro, most SSGs
20 // +++ (TOML) - Hugo
21 // *** - Alternative format
22 const frontmatterRegex = /^(---|\+\+\+|\*\*\*)\n([\s\S]*?)\n\1\n([\s\S]*)$/;
23 const match = content.match(frontmatterRegex);
24
25 if (!match) {
26 const [, titleMatch] = content.trim().match(/^# (.+)$/m) || [];
27 const title = titleMatch ?? "";
28 const [publishDate] = new Date().toISOString().split("T");
29
30 return {
31 frontmatter: {
32 title,
33 publishDate: publishDate ?? "",
34 },
35 body: content,
36 rawFrontmatter: {
37 title: publishDate,
38 },
39 };
40 }
41
42 const delimiter = match[1];
43 const frontmatterStr = match[2] ?? "";
44 const body = match[3] ?? "";
45
46 // Determine format based on delimiter:
47 // +++ uses TOML (key = value)
48 // --- and *** use YAML (key: value)
49 const isToml = delimiter === "+++";
50
51 // Parse frontmatter using the appropriate library
52 let raw: Record<string, unknown>;
53 if (isToml) {
54 raw = toml.parse(frontmatterStr) as Record<string, unknown>;
55 } else {
56 // Use CORE_SCHEMA to keep dates as strings rather than Date objects
57 raw =
58 (yaml.load(frontmatterStr, { schema: yaml.CORE_SCHEMA }) as Record<
59 string,
60 unknown
61 >) ?? {};
62 }
63
64 // Apply field mappings to normalize to standard PostFrontmatter fields
65 const frontmatter: Record<string, unknown> = {};
66
67 // Title mapping
68 const titleField = mapping?.title || "title";
69 frontmatter.title = raw[titleField] || raw.title;
70
71 // Description mapping
72 const descField = mapping?.description || "description";
73 frontmatter.description = raw[descField] || raw.description;
74
75 // Publish date mapping - check custom field first, then fallbacks
76 const dateField = mapping?.publishDate;
77 if (dateField && raw[dateField]) {
78 frontmatter.publishDate = raw[dateField];
79 } else if (raw.publishDate) {
80 frontmatter.publishDate = raw.publishDate;
81 } else {
82 // Fallback to common date field names
83 const dateFields = ["pubDate", "date", "createdAt", "created_at"];
84 for (const field of dateFields) {
85 if (raw[field]) {
86 frontmatter.publishDate = raw[field];
87 break;
88 }
89 }
90 }
91
92 // Cover image mapping
93 const coverField = mapping?.coverImage || "ogImage";
94 frontmatter.ogImage = raw[coverField] || raw.ogImage;
95
96 // Theme, font family and font size
97 frontmatter.theme = raw.theme;
98 frontmatter.fontFamily = raw.fontFamily;
99 frontmatter.fontSize = raw.fontSize;
100
101 // Tags mapping
102 const tagsField = mapping?.tags || "tags";
103 frontmatter.tags = raw[tagsField] || raw.tags;
104
105 // Draft mapping
106 const draftField = mapping?.draft || "draft";
107 const draftValue = raw[draftField] ?? raw.draft;
108 if (draftValue !== undefined) {
109 frontmatter.draft = draftValue === true || draftValue === "true";
110 }
111
112 // Discoverable mapping (defaults to true if not set or invalid)
113 const discoverableValue = raw.discoverable;
114 if (discoverableValue === false || discoverableValue === "false") {
115 frontmatter.discoverable = false;
116 } else {
117 frontmatter.discoverable = true;
118 }
119
120 // Always preserve atUri (internal field)
121 frontmatter.atUri = raw.atUri;
122
123 return {
124 frontmatter: frontmatter as unknown as PostFrontmatter,
125 body,
126 rawFrontmatter: raw,
127 };
128}
129
130export function getSlugFromFilename(filename: string): string {
131 return filename
132 .replace(/\.mdx?$/, "")
133 .toLowerCase()
134 .replace(/\s+/g, "-");
135}
136
137export interface SlugOptions {
138 slugField?: string;
139 removeIndexFromSlug?: boolean;
140 stripDatePrefix?: boolean;
141}
142
143export function getSlugFromOptions(
144 relativePath: string,
145 rawFrontmatter: Record<string, unknown>,
146 options: SlugOptions = {},
147): string {
148 const {
149 slugField,
150 removeIndexFromSlug = false,
151 stripDatePrefix = false,
152 } = options;
153
154 let slug: string;
155
156 // If slugField is set, try to get the value from frontmatter
157 if (slugField) {
158 const frontmatterValue = rawFrontmatter[slugField];
159 if (frontmatterValue && typeof frontmatterValue === "string") {
160 // Remove leading slash if present
161 slug = frontmatterValue
162 .replace(/^\//, "")
163 .toLowerCase()
164 .replace(/\s+/g, "-");
165 } else {
166 // Fallback to filepath if frontmatter field not found
167 slug = relativePath
168 .replace(/\.mdx?$/, "")
169 .toLowerCase()
170 .replace(/\s+/g, "-");
171 }
172 } else {
173 // Default: use filepath
174 slug = relativePath
175 .replace(/\.mdx?$/, "")
176 .toLowerCase()
177 .replace(/\s+/g, "-");
178 }
179
180 // Remove /index or /_index suffix if configured
181 if (removeIndexFromSlug) {
182 slug = slug.replace(/\/_?index$/, "");
183 }
184
185 // Strip Jekyll-style date prefix (YYYY-MM-DD-) from filename
186 if (stripDatePrefix) {
187 slug = slug.replace(/(^|\/)(\d{4}-\d{2}-\d{2})-/g, "$1");
188 }
189
190 return slug;
191}
192
193export function slugifyTitle(title: string): string {
194 return (title || "")
195 .toLowerCase()
196 .replace(/\s+/g, "-")
197 .replace(/[^\w-]/g, "");
198}
199
200export async function getContentHash(content: string): Promise<string> {
201 const encoder = new TextEncoder();
202 const data = encoder.encode(content);
203 const hashBuffer = await crypto.subtle.digest("SHA-256", data);
204 const hashArray = Array.from(new Uint8Array(hashBuffer));
205 return hashArray.map((b) => b.toString(16).padStart(2, "0")).join("");
206}
207
208function shouldIgnore(relativePath: string, ignorePatterns: string[]): boolean {
209 for (const pattern of ignorePatterns) {
210 if (minimatch(relativePath, pattern)) {
211 return true;
212 }
213 }
214 return false;
215}
216
217export interface ScanOptions {
218 frontmatterMapping?: FrontmatterMapping;
219 ignorePatterns?: string[];
220 slugField?: string;
221 removeIndexFromSlug?: boolean;
222 stripDatePrefix?: boolean;
223}
224
225export async function scanContentDirectory(
226 contentDir: string,
227 frontmatterMappingOrOptions?: FrontmatterMapping | ScanOptions,
228 ignorePatterns: string[] = [],
229): Promise<BlogPost[]> {
230 // Handle both old signature (frontmatterMapping, ignorePatterns) and new signature (options)
231 let options: ScanOptions;
232 if (
233 frontmatterMappingOrOptions &&
234 ("frontmatterMapping" in frontmatterMappingOrOptions ||
235 "ignorePatterns" in frontmatterMappingOrOptions ||
236 "slugField" in frontmatterMappingOrOptions)
237 ) {
238 options = frontmatterMappingOrOptions as ScanOptions;
239 } else {
240 // Old signature: (contentDir, frontmatterMapping?, ignorePatterns?)
241 options = {
242 frontmatterMapping: frontmatterMappingOrOptions as
243 | FrontmatterMapping
244 | undefined,
245 ignorePatterns,
246 };
247 }
248
249 const {
250 frontmatterMapping,
251 ignorePatterns: ignore = [],
252 slugField,
253 removeIndexFromSlug,
254 stripDatePrefix,
255 } = options;
256
257 const patterns = ["**/*.md", "**/*.mdx"];
258 const posts: BlogPost[] = [];
259
260 for (const pattern of patterns) {
261 const files = await glob(pattern, {
262 cwd: contentDir,
263 absolute: false,
264 });
265
266 for (const relativePath of files) {
267 // Skip files matching ignore patterns
268 if (shouldIgnore(relativePath, ignore)) {
269 continue;
270 }
271
272 const filePath = path.join(contentDir, relativePath);
273 const rawContent = await fs.readFile(filePath, "utf-8");
274
275 try {
276 const { frontmatter, body, rawFrontmatter } = parseFrontmatter(
277 rawContent,
278 frontmatterMapping,
279 );
280 const slug = getSlugFromOptions(relativePath, rawFrontmatter, {
281 slugField,
282 removeIndexFromSlug,
283 stripDatePrefix,
284 });
285
286 posts.push({
287 filePath,
288 slug,
289 frontmatter,
290 content: body,
291 rawContent,
292 rawFrontmatter,
293 });
294 } catch (error) {
295 console.error(`Error parsing ${relativePath}:`, error);
296 }
297 }
298 }
299
300 // Sort by publish date (newest first)
301 posts.sort((a, b) => {
302 const dateA = new Date(a.frontmatter.publishDate);
303 const dateB = new Date(b.frontmatter.publishDate);
304 return dateB.getTime() - dateA.getTime();
305 });
306
307 return posts;
308}
309
310export function updateFrontmatterWithAtUri(
311 rawContent: string,
312 atUri: string,
313): string {
314 // Detect which delimiter is used (---, +++, or ***)
315 const delimiterMatch = rawContent.match(/^(---|\+\+\+|\*\*\*)/);
316 const delimiter = delimiterMatch?.[1] ?? "---";
317 const isToml = delimiter === "+++";
318
319 // Format the atUri entry based on frontmatter type
320 const atUriEntry = isToml ? `atUri = "${atUri}"` : `atUri: "${atUri}"`;
321
322 // No frontmatter: create one with atUri
323 if (!delimiterMatch) {
324 return `---\n${atUriEntry}\n---\n\n${rawContent}`;
325 }
326
327 // Check if atUri already exists in frontmatter (handle both formats)
328 if (rawContent.includes("atUri:") || rawContent.includes("atUri =")) {
329 // Replace existing atUri (match both YAML and TOML formats)
330 return rawContent.replace(
331 /atUri\s*[=:]\s*["']?[^"'\n]+["']?\n?/,
332 `${atUriEntry}\n`,
333 );
334 }
335
336 // Insert atUri before the closing delimiter
337 const frontmatterEndIndex = rawContent.indexOf(delimiter, 4);
338 if (frontmatterEndIndex === -1) {
339 throw new Error("Could not find frontmatter end");
340 }
341
342 const beforeEnd = rawContent.slice(0, frontmatterEndIndex);
343 const afterEnd = rawContent.slice(frontmatterEndIndex);
344
345 return `${beforeEnd}${atUriEntry}\n${afterEnd}`;
346}
347
348export function removeFrontmatterAtUri(rawContent: string): string {
349 const frontmatterRegex = /^(---|\+\+\+|\*\*\*)\n([\s\S]*?)\n\1\n/;
350 const match = rawContent.match(frontmatterRegex);
351 if (!match) return rawContent;
352
353 const delimiter = match[1];
354 const frontmatterStr = match[2] ?? "";
355
356 // Remove the atUri line
357 const lines = frontmatterStr
358 .split("\n")
359 .filter((line) => !line.match(/^\s*atUri\s*[=:]\s*/));
360
361 // Check if remaining frontmatter has any non-empty lines
362 const hasContent = lines.some((line) => line.trim() !== "");
363
364 const afterFrontmatter = rawContent.slice(match[0].length);
365
366 if (!hasContent) {
367 // Remove entire frontmatter block, trim leading newlines
368 return afterFrontmatter.replace(/^\n+/, "");
369 }
370
371 return `${delimiter}\n${lines.join("\n")}\n${delimiter}\n${afterFrontmatter}`;
372}
373
374export function stripMarkdownForText(markdown: string): string {
375 return markdown
376 .replace(/#{1,6}\s/g, "") // Remove headers
377 .replace(/\*\*([^*]+)\*\*/g, "$1") // Remove bold
378 .replace(/\*([^*]+)\*/g, "$1") // Remove italic
379 .replace(/\[([^\]]+)\]\([^)]+\)/g, "$1") // Remove links, keep text
380 .replace(/`{3}[\s\S]*?`{3}/g, "") // Remove code blocks
381 .replace(/`([^`]+)`/g, "$1") // Remove inline code formatting
382 .replace(/!\[.*?\]\(.*?\)/g, "") // Remove images
383 .replace(/\n{3,}/g, "\n\n") // Normalize multiple newlines
384 .trim();
385}
386
387export function getTextContent(
388 post: { content: string; rawFrontmatter?: Record<string, unknown> },
389 textContentField?: string,
390): string {
391 if (textContentField && post.rawFrontmatter?.[textContentField]) {
392 return String(post.rawFrontmatter[textContentField]);
393 }
394 return stripMarkdownForText(post.content);
395}