forked from
stevedylan.dev/sequoia
A CLI for publishing standard.site documents to ATProto
1import * as fs from "node:fs/promises";
2import * as path from "node:path";
3import { glob } from "glob";
4import { minimatch } from "minimatch";
5import type { BlogPost, FrontmatterMapping, PostFrontmatter } from "./types";
6
7export function parseFrontmatter(
8 content: string,
9 mapping?: FrontmatterMapping,
10): {
11 frontmatter: PostFrontmatter;
12 body: string;
13 rawFrontmatter: Record<string, unknown>;
14} {
15 // Support multiple frontmatter delimiters:
16 // --- (YAML) - Jekyll, Astro, most SSGs
17 // +++ (TOML) - Hugo
18 // *** - Alternative format
19 const frontmatterRegex = /^(---|\+\+\+|\*\*\*)\n([\s\S]*?)\n\1\n([\s\S]*)$/;
20 const match = content.match(frontmatterRegex);
21
22 if (!match) {
23 throw new Error("Could not parse frontmatter");
24 }
25
26 const delimiter = match[1];
27 const frontmatterStr = match[2] ?? "";
28 const body = match[3] ?? "";
29
30 // Determine format based on delimiter:
31 // +++ uses TOML (key = value)
32 // --- and *** use YAML (key: value)
33 const isToml = delimiter === "+++";
34 const separator = isToml ? "=" : ":";
35
36 // Parse frontmatter manually
37 const raw: Record<string, unknown> = {};
38 const lines = frontmatterStr.split("\n");
39
40 let i = 0;
41 while (i < lines.length) {
42 const line = lines[i];
43 if (line === undefined) {
44 i++;
45 continue;
46 }
47 const sepIndex = line.indexOf(separator);
48 if (sepIndex === -1) {
49 i++;
50 continue;
51 }
52
53 const key = line.slice(0, sepIndex).trim();
54 let value = line.slice(sepIndex + 1).trim();
55
56 // Handle quoted strings
57 if (
58 (value.startsWith('"') && value.endsWith('"')) ||
59 (value.startsWith("'") && value.endsWith("'"))
60 ) {
61 value = value.slice(1, -1);
62 }
63
64 // Handle inline arrays (simple case for tags)
65 if (value.startsWith("[") && value.endsWith("]")) {
66 const arrayContent = value.slice(1, -1);
67 raw[key] = arrayContent
68 .split(",")
69 .map((item) => item.trim().replace(/^["']|["']$/g, ""));
70 } else if (value === "" && !isToml) {
71 // Check for YAML-style multiline array (key with no value followed by - items)
72 const arrayItems: string[] = [];
73 let j = i + 1;
74 while (j < lines.length) {
75 const nextLine = lines[j];
76 if (nextLine === undefined) {
77 j++;
78 continue;
79 }
80 // Check if line is a list item (starts with whitespace and -)
81 const listMatch = nextLine.match(/^\s+-\s*(.*)$/);
82 if (listMatch && listMatch[1] !== undefined) {
83 let itemValue = listMatch[1].trim();
84 // Remove quotes if present
85 if (
86 (itemValue.startsWith('"') && itemValue.endsWith('"')) ||
87 (itemValue.startsWith("'") && itemValue.endsWith("'"))
88 ) {
89 itemValue = itemValue.slice(1, -1);
90 }
91 arrayItems.push(itemValue);
92 j++;
93 } else if (nextLine.trim() === "") {
94 // Skip empty lines within the array
95 j++;
96 } else {
97 // Hit a new key or non-list content
98 break;
99 }
100 }
101 if (arrayItems.length > 0) {
102 raw[key] = arrayItems;
103 i = j;
104 continue;
105 } else {
106 raw[key] = value;
107 }
108 } else if (value === "true") {
109 raw[key] = true;
110 } else if (value === "false") {
111 raw[key] = false;
112 } else {
113 raw[key] = value;
114 }
115 i++;
116 }
117
118 // Apply field mappings to normalize to standard PostFrontmatter fields
119 const frontmatter: Record<string, unknown> = {};
120
121 // Title mapping
122 const titleField = mapping?.title || "title";
123 frontmatter.title = raw[titleField] || raw.title;
124
125 // Description mapping
126 const descField = mapping?.description || "description";
127 frontmatter.description = raw[descField] || raw.description;
128
129 // Publish date mapping - check custom field first, then fallbacks
130 const dateField = mapping?.publishDate;
131 if (dateField && raw[dateField]) {
132 frontmatter.publishDate = raw[dateField];
133 } else if (raw.publishDate) {
134 frontmatter.publishDate = raw.publishDate;
135 } else {
136 // Fallback to common date field names
137 const dateFields = ["pubDate", "date", "createdAt", "created_at"];
138 for (const field of dateFields) {
139 if (raw[field]) {
140 frontmatter.publishDate = raw[field];
141 break;
142 }
143 }
144 }
145
146 // Cover image mapping
147 const coverField = mapping?.coverImage || "ogImage";
148 frontmatter.ogImage = raw[coverField] || raw.ogImage;
149
150 // Tags mapping
151 const tagsField = mapping?.tags || "tags";
152 frontmatter.tags = raw[tagsField] || raw.tags;
153
154 // Draft mapping
155 const draftField = mapping?.draft || "draft";
156 const draftValue = raw[draftField] ?? raw.draft;
157 if (draftValue !== undefined) {
158 frontmatter.draft = draftValue === true || draftValue === "true";
159 }
160
161 // Always preserve atUri (internal field)
162 frontmatter.atUri = raw.atUri;
163
164 return {
165 frontmatter: frontmatter as unknown as PostFrontmatter,
166 body,
167 rawFrontmatter: raw,
168 };
169}
170
171export function getSlugFromFilename(filename: string): string {
172 return filename
173 .replace(/\.mdx?$/, "")
174 .toLowerCase()
175 .replace(/\s+/g, "-");
176}
177
178export interface SlugOptions {
179 slugField?: string;
180 removeIndexFromSlug?: boolean;
181 stripDatePrefix?: boolean;
182}
183
184export function getSlugFromOptions(
185 relativePath: string,
186 rawFrontmatter: Record<string, unknown>,
187 options: SlugOptions = {},
188): string {
189 const {
190 slugField,
191 removeIndexFromSlug = false,
192 stripDatePrefix = false,
193 } = options;
194
195 let slug: string;
196
197 // If slugField is set, try to get the value from frontmatter
198 if (slugField) {
199 const frontmatterValue = rawFrontmatter[slugField];
200 if (frontmatterValue && typeof frontmatterValue === "string") {
201 // Remove leading slash if present
202 slug = frontmatterValue
203 .replace(/^\//, "")
204 .toLowerCase()
205 .replace(/\s+/g, "-");
206 } else {
207 // Fallback to filepath if frontmatter field not found
208 slug = relativePath
209 .replace(/\.mdx?$/, "")
210 .toLowerCase()
211 .replace(/\s+/g, "-");
212 }
213 } else {
214 // Default: use filepath
215 slug = relativePath
216 .replace(/\.mdx?$/, "")
217 .toLowerCase()
218 .replace(/\s+/g, "-");
219 }
220
221 // Remove /index or /_index suffix if configured
222 if (removeIndexFromSlug) {
223 slug = slug.replace(/\/_?index$/, "");
224 }
225
226 // Strip Jekyll-style date prefix (YYYY-MM-DD-) from filename
227 if (stripDatePrefix) {
228 slug = slug.replace(/(^|\/)(\d{4}-\d{2}-\d{2})-/g, "$1");
229 }
230
231 return slug;
232}
233
234export function resolvePathTemplate(template: string, post: BlogPost): string {
235 const publishDate = new Date(post.frontmatter.publishDate);
236 const year = String(publishDate.getFullYear());
237 const month = String(publishDate.getMonth() + 1).padStart(2, "0");
238 const day = String(publishDate.getDate()).padStart(2, "0");
239
240 const slugifiedTitle = (post.frontmatter.title || "")
241 .toLowerCase()
242 .replace(/\s+/g, "-")
243 .replace(/[^\w-]/g, "");
244
245 // Replace known tokens
246 let result = template
247 .replace(/\{slug\}/g, post.slug)
248 .replace(/\{year\}/g, year)
249 .replace(/\{month\}/g, month)
250 .replace(/\{day\}/g, day)
251 .replace(/\{title\}/g, slugifiedTitle);
252
253 // Replace any remaining {field} tokens with raw frontmatter values
254 result = result.replace(/\{(\w+)\}/g, (_match, field: string) => {
255 const value = post.rawFrontmatter[field];
256 if (value != null && typeof value === "string") {
257 return value;
258 }
259 return "";
260 });
261
262 // Ensure leading slash
263 if (!result.startsWith("/")) {
264 result = `/${result}`;
265 }
266
267 return result;
268}
269
270export function resolvePostPath(
271 post: BlogPost,
272 pathPrefix?: string,
273 pathTemplate?: string,
274): string {
275 if (pathTemplate) {
276 return resolvePathTemplate(pathTemplate, post);
277 }
278 const prefix = pathPrefix || "/posts";
279 return `${prefix}/${post.slug}`;
280}
281
282export async function getContentHash(content: string): Promise<string> {
283 const encoder = new TextEncoder();
284 const data = encoder.encode(content);
285 const hashBuffer = await crypto.subtle.digest("SHA-256", data);
286 const hashArray = Array.from(new Uint8Array(hashBuffer));
287 return hashArray.map((b) => b.toString(16).padStart(2, "0")).join("");
288}
289
290function shouldIgnore(relativePath: string, ignorePatterns: string[]): boolean {
291 for (const pattern of ignorePatterns) {
292 if (minimatch(relativePath, pattern)) {
293 return true;
294 }
295 }
296 return false;
297}
298
299export interface ScanOptions {
300 frontmatterMapping?: FrontmatterMapping;
301 ignorePatterns?: string[];
302 slugField?: string;
303 removeIndexFromSlug?: boolean;
304 stripDatePrefix?: boolean;
305}
306
307export async function scanContentDirectory(
308 contentDir: string,
309 frontmatterMappingOrOptions?: FrontmatterMapping | ScanOptions,
310 ignorePatterns: string[] = [],
311): Promise<BlogPost[]> {
312 // Handle both old signature (frontmatterMapping, ignorePatterns) and new signature (options)
313 let options: ScanOptions;
314 if (
315 frontmatterMappingOrOptions &&
316 ("frontmatterMapping" in frontmatterMappingOrOptions ||
317 "ignorePatterns" in frontmatterMappingOrOptions ||
318 "slugField" in frontmatterMappingOrOptions)
319 ) {
320 options = frontmatterMappingOrOptions as ScanOptions;
321 } else {
322 // Old signature: (contentDir, frontmatterMapping?, ignorePatterns?)
323 options = {
324 frontmatterMapping: frontmatterMappingOrOptions as
325 | FrontmatterMapping
326 | undefined,
327 ignorePatterns,
328 };
329 }
330
331 const {
332 frontmatterMapping,
333 ignorePatterns: ignore = [],
334 slugField,
335 removeIndexFromSlug,
336 stripDatePrefix,
337 } = options;
338
339 const patterns = ["**/*.md", "**/*.mdx"];
340 const posts: BlogPost[] = [];
341
342 for (const pattern of patterns) {
343 const files = await glob(pattern, {
344 cwd: contentDir,
345 absolute: false,
346 });
347
348 for (const relativePath of files) {
349 // Skip files matching ignore patterns
350 if (shouldIgnore(relativePath, ignore)) {
351 continue;
352 }
353
354 const filePath = path.join(contentDir, relativePath);
355 const rawContent = await fs.readFile(filePath, "utf-8");
356
357 try {
358 const { frontmatter, body, rawFrontmatter } = parseFrontmatter(
359 rawContent,
360 frontmatterMapping,
361 );
362 const slug = getSlugFromOptions(relativePath, rawFrontmatter, {
363 slugField,
364 removeIndexFromSlug,
365 stripDatePrefix,
366 });
367
368 posts.push({
369 filePath,
370 slug,
371 frontmatter,
372 content: body,
373 rawContent,
374 rawFrontmatter,
375 });
376 } catch (error) {
377 console.error(`Error parsing ${relativePath}:`, error);
378 }
379 }
380 }
381
382 // Sort by publish date (newest first)
383 posts.sort((a, b) => {
384 const dateA = new Date(a.frontmatter.publishDate);
385 const dateB = new Date(b.frontmatter.publishDate);
386 return dateB.getTime() - dateA.getTime();
387 });
388
389 return posts;
390}
391
392export function updateFrontmatterWithAtUri(
393 rawContent: string,
394 atUri: string,
395): string {
396 // Detect which delimiter is used (---, +++, or ***)
397 const delimiterMatch = rawContent.match(/^(---|\+\+\+|\*\*\*)/);
398 const delimiter = delimiterMatch?.[1] ?? "---";
399 const isToml = delimiter === "+++";
400
401 // Format the atUri entry based on frontmatter type
402 const atUriEntry = isToml ? `atUri = "${atUri}"` : `atUri: "${atUri}"`;
403
404 // Check if atUri already exists in frontmatter (handle both formats)
405 if (rawContent.includes("atUri:") || rawContent.includes("atUri =")) {
406 // Replace existing atUri (match both YAML and TOML formats)
407 return rawContent.replace(
408 /atUri\s*[=:]\s*["']?[^"'\n]+["']?\n?/,
409 `${atUriEntry}\n`,
410 );
411 }
412
413 // Insert atUri before the closing delimiter
414 const frontmatterEndIndex = rawContent.indexOf(delimiter, 4);
415 if (frontmatterEndIndex === -1) {
416 throw new Error("Could not find frontmatter end");
417 }
418
419 const beforeEnd = rawContent.slice(0, frontmatterEndIndex);
420 const afterEnd = rawContent.slice(frontmatterEndIndex);
421
422 return `${beforeEnd}${atUriEntry}\n${afterEnd}`;
423}
424
425export function stripMarkdownForText(markdown: string): string {
426 return markdown
427 .replace(/#{1,6}\s/g, "") // Remove headers
428 .replace(/\*\*([^*]+)\*\*/g, "$1") // Remove bold
429 .replace(/\*([^*]+)\*/g, "$1") // Remove italic
430 .replace(/\[([^\]]+)\]\([^)]+\)/g, "$1") // Remove links, keep text
431 .replace(/`{3}[\s\S]*?`{3}/g, "") // Remove code blocks
432 .replace(/`([^`]+)`/g, "$1") // Remove inline code formatting
433 .replace(/!\[.*?\]\(.*?\)/g, "") // Remove images
434 .replace(/\n{3,}/g, "\n\n") // Normalize multiple newlines
435 .trim();
436}