forked from
stevedylan.dev/sequoia
A CLI for publishing standard.site documents to ATProto
1import { webcrypto as crypto } from "node:crypto";
2import * as fs from "node:fs/promises";
3import * as path from "node:path";
4import { glob } from "glob";
5import { minimatch } from "minimatch";
6import type { BlogPost, FrontmatterMapping, PostFrontmatter } from "./types";
7
8export function parseFrontmatter(
9 content: string,
10 mapping?: FrontmatterMapping,
11): {
12 frontmatter: PostFrontmatter;
13 body: string;
14 rawFrontmatter: Record<string, unknown>;
15} {
16 // Support multiple frontmatter delimiters:
17 // --- (YAML) - Jekyll, Astro, most SSGs
18 // +++ (TOML) - Hugo
19 // *** - Alternative format
20 const frontmatterRegex = /^(---|\+\+\+|\*\*\*)\n([\s\S]*?)\n\1\n([\s\S]*)$/;
21 const match = content.match(frontmatterRegex);
22
23 if (!match) {
24 const [, titleMatch] = content.trim().match(/^# (.+)$/m) || []
25 const title = titleMatch ?? ""
26 const [publishDate] = new Date().toISOString().split("T")
27
28 return {
29 frontmatter: {
30 title,
31 publishDate: publishDate ?? ""
32 },
33 body: content,
34 rawFrontmatter: {
35 title:
36 publishDate
37 }
38 }
39 }
40
41 const delimiter = match[1];
42 const frontmatterStr = match[2] ?? "";
43 const body = match[3] ?? "";
44
45 // Determine format based on delimiter:
46 // +++ uses TOML (key = value)
47 // --- and *** use YAML (key: value)
48 const isToml = delimiter === "+++";
49 const separator = isToml ? "=" : ":";
50
51 // Parse frontmatter manually
52 const raw: Record<string, unknown> = {};
53 const lines = frontmatterStr.split("\n");
54
55 let i = 0;
56 while (i < lines.length) {
57 const line = lines[i];
58 if (line === undefined) {
59 i++;
60 continue;
61 }
62 const sepIndex = line.indexOf(separator);
63 if (sepIndex === -1) {
64 i++;
65 continue;
66 }
67
68 const key = line.slice(0, sepIndex).trim();
69 let value = line.slice(sepIndex + 1).trim();
70
71 // Handle quoted strings
72 if (
73 (value.startsWith('"') && value.endsWith('"')) ||
74 (value.startsWith("'") && value.endsWith("'"))
75 ) {
76 value = value.slice(1, -1);
77 }
78
79 // Handle inline arrays (simple case for tags)
80 if (value.startsWith("[") && value.endsWith("]")) {
81 const arrayContent = value.slice(1, -1);
82 raw[key] = arrayContent
83 .split(",")
84 .map((item) => item.trim().replace(/^["']|["']$/g, ""));
85 } else if (value === "" && !isToml) {
86 // Check for YAML-style multiline array (key with no value followed by - items)
87 const arrayItems: string[] = [];
88 let j = i + 1;
89 while (j < lines.length) {
90 const nextLine = lines[j];
91 if (nextLine === undefined) {
92 j++;
93 continue;
94 }
95 // Check if line is a list item (starts with whitespace and -)
96 const listMatch = nextLine.match(/^\s+-\s*(.*)$/);
97 if (listMatch && listMatch[1] !== undefined) {
98 let itemValue = listMatch[1].trim();
99 // Remove quotes if present
100 if (
101 (itemValue.startsWith('"') && itemValue.endsWith('"')) ||
102 (itemValue.startsWith("'") && itemValue.endsWith("'"))
103 ) {
104 itemValue = itemValue.slice(1, -1);
105 }
106 arrayItems.push(itemValue);
107 j++;
108 } else if (nextLine.trim() === "") {
109 // Skip empty lines within the array
110 j++;
111 } else {
112 // Hit a new key or non-list content
113 break;
114 }
115 }
116 if (arrayItems.length > 0) {
117 raw[key] = arrayItems;
118 i = j;
119 continue;
120 } else {
121 raw[key] = value;
122 }
123 } else if (value === "true") {
124 raw[key] = true;
125 } else if (value === "false") {
126 raw[key] = false;
127 } else {
128 raw[key] = value;
129 }
130 i++;
131 }
132
133 // Apply field mappings to normalize to standard PostFrontmatter fields
134 const frontmatter: Record<string, unknown> = {};
135
136 // Title mapping
137 const titleField = mapping?.title || "title";
138 frontmatter.title = raw[titleField] || raw.title;
139
140 // Description mapping
141 const descField = mapping?.description || "description";
142 frontmatter.description = raw[descField] || raw.description;
143
144 // Publish date mapping - check custom field first, then fallbacks
145 const dateField = mapping?.publishDate;
146 if (dateField && raw[dateField]) {
147 frontmatter.publishDate = raw[dateField];
148 } else if (raw.publishDate) {
149 frontmatter.publishDate = raw.publishDate;
150 } else {
151 // Fallback to common date field names
152 const dateFields = ["pubDate", "date", "createdAt", "created_at"];
153 for (const field of dateFields) {
154 if (raw[field]) {
155 frontmatter.publishDate = raw[field];
156 break;
157 }
158 }
159 }
160
161 // Cover image mapping
162 const coverField = mapping?.coverImage || "ogImage";
163 frontmatter.ogImage = raw[coverField] || raw.ogImage;
164
165 // Tags mapping
166 const tagsField = mapping?.tags || "tags";
167 frontmatter.tags = raw[tagsField] || raw.tags;
168
169 // Draft mapping
170 const draftField = mapping?.draft || "draft";
171 const draftValue = raw[draftField] ?? raw.draft;
172 if (draftValue !== undefined) {
173 frontmatter.draft = draftValue === true || draftValue === "true";
174 }
175
176 // Always preserve atUri (internal field)
177 frontmatter.atUri = raw.atUri;
178
179 return {
180 frontmatter: frontmatter as unknown as PostFrontmatter,
181 body,
182 rawFrontmatter: raw,
183 };
184}
185
186export function getSlugFromFilename(filename: string): string {
187 return filename
188 .replace(/\.mdx?$/, "")
189 .toLowerCase()
190 .replace(/\s+/g, "-");
191}
192
193export interface SlugOptions {
194 slugField?: string;
195 removeIndexFromSlug?: boolean;
196 stripDatePrefix?: boolean;
197}
198
199export function getSlugFromOptions(
200 relativePath: string,
201 rawFrontmatter: Record<string, unknown>,
202 options: SlugOptions = {},
203): string {
204 const {
205 slugField,
206 removeIndexFromSlug = false,
207 stripDatePrefix = false,
208 } = options;
209
210 let slug: string;
211
212 // If slugField is set, try to get the value from frontmatter
213 if (slugField) {
214 const frontmatterValue = rawFrontmatter[slugField];
215 if (frontmatterValue && typeof frontmatterValue === "string") {
216 // Remove leading slash if present
217 slug = frontmatterValue
218 .replace(/^\//, "")
219 .toLowerCase()
220 .replace(/\s+/g, "-");
221 } else {
222 // Fallback to filepath if frontmatter field not found
223 slug = relativePath
224 .replace(/\.mdx?$/, "")
225 .toLowerCase()
226 .replace(/\s+/g, "-");
227 }
228 } else {
229 // Default: use filepath
230 slug = relativePath
231 .replace(/\.mdx?$/, "")
232 .toLowerCase()
233 .replace(/\s+/g, "-");
234 }
235
236 // Remove /index or /_index suffix if configured
237 if (removeIndexFromSlug) {
238 slug = slug.replace(/\/_?index$/, "");
239 }
240
241 // Strip Jekyll-style date prefix (YYYY-MM-DD-) from filename
242 if (stripDatePrefix) {
243 slug = slug.replace(/(^|\/)(\d{4}-\d{2}-\d{2})-/g, "$1");
244 }
245
246 return slug;
247}
248
249export async function getContentHash(content: string): Promise<string> {
250 const encoder = new TextEncoder();
251 const data = encoder.encode(content);
252 const hashBuffer = await crypto.subtle.digest("SHA-256", data);
253 const hashArray = Array.from(new Uint8Array(hashBuffer));
254 return hashArray.map((b) => b.toString(16).padStart(2, "0")).join("");
255}
256
257function shouldIgnore(relativePath: string, ignorePatterns: string[]): boolean {
258 for (const pattern of ignorePatterns) {
259 if (minimatch(relativePath, pattern)) {
260 return true;
261 }
262 }
263 return false;
264}
265
266export interface ScanOptions {
267 frontmatterMapping?: FrontmatterMapping;
268 ignorePatterns?: string[];
269 slugField?: string;
270 removeIndexFromSlug?: boolean;
271 stripDatePrefix?: boolean;
272}
273
274export async function scanContentDirectory(
275 contentDir: string,
276 frontmatterMappingOrOptions?: FrontmatterMapping | ScanOptions,
277 ignorePatterns: string[] = [],
278): Promise<BlogPost[]> {
279 // Handle both old signature (frontmatterMapping, ignorePatterns) and new signature (options)
280 let options: ScanOptions;
281 if (
282 frontmatterMappingOrOptions &&
283 ("frontmatterMapping" in frontmatterMappingOrOptions ||
284 "ignorePatterns" in frontmatterMappingOrOptions ||
285 "slugField" in frontmatterMappingOrOptions)
286 ) {
287 options = frontmatterMappingOrOptions as ScanOptions;
288 } else {
289 // Old signature: (contentDir, frontmatterMapping?, ignorePatterns?)
290 options = {
291 frontmatterMapping: frontmatterMappingOrOptions as
292 | FrontmatterMapping
293 | undefined,
294 ignorePatterns,
295 };
296 }
297
298 const {
299 frontmatterMapping,
300 ignorePatterns: ignore = [],
301 slugField,
302 removeIndexFromSlug,
303 stripDatePrefix,
304 } = options;
305
306 const patterns = ["**/*.md", "**/*.mdx"];
307 const posts: BlogPost[] = [];
308
309 for (const pattern of patterns) {
310 const files = await glob(pattern, {
311 cwd: contentDir,
312 absolute: false,
313 });
314
315 for (const relativePath of files) {
316 // Skip files matching ignore patterns
317 if (shouldIgnore(relativePath, ignore)) {
318 continue;
319 }
320
321 const filePath = path.join(contentDir, relativePath);
322 const rawContent = await fs.readFile(filePath, "utf-8");
323
324 try {
325 const { frontmatter, body, rawFrontmatter } = parseFrontmatter(
326 rawContent,
327 frontmatterMapping,
328 );
329 const slug = getSlugFromOptions(relativePath, rawFrontmatter, {
330 slugField,
331 removeIndexFromSlug,
332 stripDatePrefix,
333 });
334
335 posts.push({
336 filePath,
337 slug,
338 frontmatter,
339 content: body,
340 rawContent,
341 rawFrontmatter,
342 });
343 } catch (error) {
344 console.error(`Error parsing ${relativePath}:`, error);
345 }
346 }
347 }
348
349 // Sort by publish date (newest first)
350 posts.sort((a, b) => {
351 const dateA = new Date(a.frontmatter.publishDate);
352 const dateB = new Date(b.frontmatter.publishDate);
353 return dateB.getTime() - dateA.getTime();
354 });
355
356 return posts;
357}
358
359export function updateFrontmatterWithAtUri(
360 rawContent: string,
361 atUri: string,
362): string {
363 // Detect which delimiter is used (---, +++, or ***)
364 const delimiterMatch = rawContent.match(/^(---|\+\+\+|\*\*\*)/);
365 const delimiter = delimiterMatch?.[1] ?? "---";
366 const isToml = delimiter === "+++";
367
368 // Format the atUri entry based on frontmatter type
369 const atUriEntry = isToml ? `atUri = "${atUri}"` : `atUri: "${atUri}"`;
370
371 // No frontmatter: create one with atUri
372 if (!delimiterMatch) {
373 return `---\n${atUriEntry}\n---\n\n${rawContent}`;
374 }
375
376 // Check if atUri already exists in frontmatter (handle both formats)
377 if (rawContent.includes("atUri:") || rawContent.includes("atUri =")) {
378 // Replace existing atUri (match both YAML and TOML formats)
379 return rawContent.replace(
380 /atUri\s*[=:]\s*["']?[^"'\n]+["']?\n?/,
381 `${atUriEntry}\n`,
382 );
383 }
384
385 // Insert atUri before the closing delimiter
386 const frontmatterEndIndex = rawContent.indexOf(delimiter, 4);
387 if (frontmatterEndIndex === -1) {
388 throw new Error("Could not find frontmatter end");
389 }
390
391 const beforeEnd = rawContent.slice(0, frontmatterEndIndex);
392 const afterEnd = rawContent.slice(frontmatterEndIndex);
393
394 return `${beforeEnd}${atUriEntry}\n${afterEnd}`;
395}
396
397export function stripMarkdownForText(markdown: string): string {
398 return markdown
399 .replace(/#{1,6}\s/g, "") // Remove headers
400 .replace(/\*\*([^*]+)\*\*/g, "$1") // Remove bold
401 .replace(/\*([^*]+)\*/g, "$1") // Remove italic
402 .replace(/\[([^\]]+)\]\([^)]+\)/g, "$1") // Remove links, keep text
403 .replace(/`{3}[\s\S]*?`{3}/g, "") // Remove code blocks
404 .replace(/`([^`]+)`/g, "$1") // Remove inline code formatting
405 .replace(/!\[.*?\]\(.*?\)/g, "") // Remove images
406 .replace(/\n{3,}/g, "\n\n") // Normalize multiple newlines
407 .trim();
408}
409
410export function getTextContent(
411 post: { content: string; rawFrontmatter?: Record<string, unknown> },
412 textContentField?: string,
413): string {
414 if (textContentField && post.rawFrontmatter?.[textContentField]) {
415 return String(post.rawFrontmatter[textContentField]);
416 }
417 return stripMarkdownForText(post.content);
418}