forked from
stevedylan.dev/sequoia
A CLI for publishing standard.site documents to ATProto
1import * as fs from "node:fs/promises";
2import * as path from "node:path";
3import { glob } from "glob";
4import yaml from "js-yaml";
5import { minimatch } from "minimatch";
6import * as toml from "smol-toml";
7import type { BlogPost, FrontmatterMapping, PostFrontmatter } from "./types";
8
9export function parseFrontmatter(
10 content: string,
11 mapping?: FrontmatterMapping,
12): {
13 frontmatter: PostFrontmatter;
14 body: string;
15 rawFrontmatter: Record<string, unknown>;
16} {
17 // Support multiple frontmatter delimiters:
18 // --- (YAML) - Jekyll, Astro, most SSGs
19 // +++ (TOML) - Hugo
20 // *** - Alternative format
21 const frontmatterRegex = /^(---|\+\+\+|\*\*\*)\n([\s\S]*?)\n\1\n([\s\S]*)$/;
22 const match = content.match(frontmatterRegex);
23
24 if (!match) {
25 throw new Error("Could not parse frontmatter");
26 }
27
28 const delimiter = match[1];
29 const frontmatterStr = match[2] ?? "";
30 const body = match[3] ?? "";
31
32 // Determine format based on delimiter:
33 // +++ uses TOML (key = value)
34 // --- and *** use YAML (key: value)
35 const isToml = delimiter === "+++";
36
37 // Parse frontmatter using the appropriate library
38 let raw: Record<string, unknown>;
39 if (isToml) {
40 raw = toml.parse(frontmatterStr) as Record<string, unknown>;
41 } else {
42 // Use CORE_SCHEMA to keep dates as strings rather than Date objects
43 raw =
44 (yaml.load(frontmatterStr, { schema: yaml.CORE_SCHEMA }) as Record<
45 string,
46 unknown
47 >) ?? {};
48 }
49
50 // Apply field mappings to normalize to standard PostFrontmatter fields
51 const frontmatter: Record<string, unknown> = {};
52
53 // Title mapping
54 const titleField = mapping?.title || "title";
55 frontmatter.title = raw[titleField] || raw.title;
56
57 // Description mapping
58 const descField = mapping?.description || "description";
59 frontmatter.description = raw[descField] || raw.description;
60
61 // Publish date mapping - check custom field first, then fallbacks
62 const dateField = mapping?.publishDate;
63 if (dateField && raw[dateField]) {
64 frontmatter.publishDate = raw[dateField];
65 } else if (raw.publishDate) {
66 frontmatter.publishDate = raw.publishDate;
67 } else {
68 // Fallback to common date field names
69 const dateFields = ["pubDate", "date", "createdAt", "created_at"];
70 for (const field of dateFields) {
71 if (raw[field]) {
72 frontmatter.publishDate = raw[field];
73 break;
74 }
75 }
76 }
77
78 // Updated date mapping - check custom field first, then fallbacks
79 const updatedAtField = mapping?.updatedAt;
80 if (updatedAtField && raw[updatedAtField]) {
81 frontmatter.updatedAt = raw[updatedAtField];
82 } else if (raw.updatedAt) {
83 frontmatter.updatedAt = raw.updatedAt;
84 } else {
85 // Fallback to common date field names
86 const updatedAtFields = ["updated_at", "modifiedAt", "modified_at"];
87 for (const field of updatedAtFields) {
88 if (raw[field]) {
89 frontmatter.updatedAt = raw[field];
90 break;
91 }
92 }
93 }
94
95 // Cover image mapping
96 const coverField = mapping?.coverImage || "ogImage";
97 frontmatter.ogImage = raw[coverField] || raw.ogImage;
98
99 // Tags mapping
100 const tagsField = mapping?.tags || "tags";
101 frontmatter.tags = raw[tagsField] || raw.tags;
102
103 // Draft mapping
104 const draftField = mapping?.draft || "draft";
105 const draftValue = raw[draftField] ?? raw.draft;
106 if (draftValue !== undefined) {
107 frontmatter.draft = draftValue === true || draftValue === "true";
108 }
109
110 // Always preserve atUri (internal field)
111 frontmatter.atUri = raw.atUri;
112
113 return {
114 frontmatter: frontmatter as unknown as PostFrontmatter,
115 body,
116 rawFrontmatter: raw,
117 };
118}
119
120export function getSlugFromFilename(filename: string): string {
121 return filename
122 .replace(/\.mdx?$/, "")
123 .toLowerCase()
124 .replace(/\s+/g, "-");
125}
126
127export interface SlugOptions {
128 slugField?: string;
129 removeIndexFromSlug?: boolean;
130 stripDatePrefix?: boolean;
131}
132
133export function getSlugFromOptions(
134 relativePath: string,
135 rawFrontmatter: Record<string, unknown>,
136 options: SlugOptions = {},
137): string {
138 const {
139 slugField,
140 removeIndexFromSlug = false,
141 stripDatePrefix = false,
142 } = options;
143
144 let slug: string;
145
146 // If slugField is set, try to get the value from frontmatter
147 if (slugField) {
148 const frontmatterValue = rawFrontmatter[slugField];
149 if (frontmatterValue && typeof frontmatterValue === "string") {
150 // Remove leading slash if present
151 slug = frontmatterValue
152 .replace(/^\//, "")
153 .toLowerCase()
154 .replace(/\s+/g, "-");
155 } else {
156 // Fallback to filepath if frontmatter field not found
157 slug = relativePath
158 .replace(/\.mdx?$/, "")
159 .toLowerCase()
160 .replace(/\s+/g, "-");
161 }
162 } else {
163 // Default: use filepath
164 slug = relativePath
165 .replace(/\.mdx?$/, "")
166 .toLowerCase()
167 .replace(/\s+/g, "-");
168 }
169
170 // Remove /index or /_index suffix if configured
171 if (removeIndexFromSlug) {
172 slug = slug.replace(/\/_?index$/, "");
173 }
174
175 // Strip Jekyll-style date prefix (YYYY-MM-DD-) from filename
176 if (stripDatePrefix) {
177 slug = slug.replace(/(^|\/)(\d{4}-\d{2}-\d{2})-/g, "$1");
178 }
179
180 return slug;
181}
182
183export function resolvePathTemplate(template: string, post: BlogPost): string {
184 const publishDate = new Date(post.frontmatter.publishDate);
185 const year = String(publishDate.getFullYear());
186 const yearUTC = String(publishDate.getUTCFullYear());
187 const month = String(publishDate.getMonth() + 1).padStart(2, "0");
188 const monthUTC = String(publishDate.getUTCMonth() + 1).padStart(2, "0");
189 const day = String(publishDate.getDate()).padStart(2, "0");
190 const dayUTC = String(publishDate.getUTCDate()).padStart(2, "0");
191
192 const slugifiedTitle = (post.frontmatter.title || "")
193 .toLowerCase()
194 .replace(/\s+/g, "-")
195 .replace(/[^\w-]/g, "");
196
197 // Replace known tokens
198 let result = template
199 .replace(/\{slug\}/g, post.slug)
200 .replace(/\{year\}/g, year)
201 .replace(/\{yearUTC\}/g, yearUTC)
202 .replace(/\{month\}/g, month)
203 .replace(/\{monthUTC\}/g, monthUTC)
204 .replace(/\{day\}/g, day)
205 .replace(/\{dayUTC\}/g, dayUTC)
206 .replace(/\{title\}/g, slugifiedTitle);
207
208 // Replace any remaining {field} tokens with raw frontmatter values
209 result = result.replace(/\{(\w+)\}/g, (_match, field: string) => {
210 const value = post.rawFrontmatter[field];
211 if (value != null && typeof value === "string") {
212 return value;
213 }
214 return "";
215 });
216
217 // Ensure leading slash
218 if (!result.startsWith("/")) {
219 result = `/${result}`;
220 }
221
222 return result;
223}
224
225export function resolvePostPath(
226 post: BlogPost,
227 pathPrefix?: string,
228 pathTemplate?: string,
229): string {
230 if (pathTemplate) {
231 return resolvePathTemplate(pathTemplate, post);
232 }
233 const prefix = pathPrefix || "/posts";
234 return `${prefix}/${post.slug}`;
235}
236
237export async function getContentHash(content: string): Promise<string> {
238 const encoder = new TextEncoder();
239 const data = encoder.encode(content);
240 const hashBuffer = await crypto.subtle.digest("SHA-256", data);
241 const hashArray = Array.from(new Uint8Array(hashBuffer));
242 return hashArray.map((b) => b.toString(16).padStart(2, "0")).join("");
243}
244
245function shouldIgnore(relativePath: string, ignorePatterns: string[]): boolean {
246 for (const pattern of ignorePatterns) {
247 if (minimatch(relativePath, pattern)) {
248 return true;
249 }
250 }
251 return false;
252}
253
254export interface ScanOptions {
255 frontmatterMapping?: FrontmatterMapping;
256 ignorePatterns?: string[];
257 slugField?: string;
258 removeIndexFromSlug?: boolean;
259 stripDatePrefix?: boolean;
260}
261
262export async function scanContentDirectory(
263 contentDir: string,
264 frontmatterMappingOrOptions?: FrontmatterMapping | ScanOptions,
265 ignorePatterns: string[] = [],
266): Promise<BlogPost[]> {
267 // Handle both old signature (frontmatterMapping, ignorePatterns) and new signature (options)
268 let options: ScanOptions;
269 if (
270 frontmatterMappingOrOptions &&
271 ("frontmatterMapping" in frontmatterMappingOrOptions ||
272 "ignorePatterns" in frontmatterMappingOrOptions ||
273 "slugField" in frontmatterMappingOrOptions)
274 ) {
275 options = frontmatterMappingOrOptions as ScanOptions;
276 } else {
277 // Old signature: (contentDir, frontmatterMapping?, ignorePatterns?)
278 options = {
279 frontmatterMapping: frontmatterMappingOrOptions as
280 | FrontmatterMapping
281 | undefined,
282 ignorePatterns,
283 };
284 }
285
286 const {
287 frontmatterMapping,
288 ignorePatterns: ignore = [],
289 slugField,
290 removeIndexFromSlug,
291 stripDatePrefix,
292 } = options;
293
294 const patterns = ["**/*.md", "**/*.mdx"];
295 const posts: BlogPost[] = [];
296
297 for (const pattern of patterns) {
298 const files = await glob(pattern, {
299 cwd: contentDir,
300 absolute: false,
301 });
302
303 for (const relativePath of files) {
304 // Skip files matching ignore patterns
305 if (shouldIgnore(relativePath, ignore)) {
306 continue;
307 }
308
309 const filePath = path.join(contentDir, relativePath);
310 const rawContent = await fs.readFile(filePath, "utf-8");
311
312 try {
313 const { frontmatter, body, rawFrontmatter } = parseFrontmatter(
314 rawContent,
315 frontmatterMapping,
316 );
317 const slug = getSlugFromOptions(relativePath, rawFrontmatter, {
318 slugField,
319 removeIndexFromSlug,
320 stripDatePrefix,
321 });
322
323 posts.push({
324 filePath,
325 slug,
326 frontmatter,
327 content: body,
328 rawContent,
329 rawFrontmatter,
330 });
331 } catch (error) {
332 console.error(`Error parsing ${relativePath}:`, error);
333 }
334 }
335 }
336
337 // Sort by publish date (newest first)
338 posts.sort((a, b) => {
339 const dateA = new Date(a.frontmatter.publishDate);
340 const dateB = new Date(b.frontmatter.publishDate);
341 return dateB.getTime() - dateA.getTime();
342 });
343
344 return posts;
345}
346
347export function updateFrontmatterWithAtUri(
348 rawContent: string,
349 atUri: string,
350): string {
351 // Detect which delimiter is used (---, +++, or ***)
352 const delimiterMatch = rawContent.match(/^(---|\+\+\+|\*\*\*)/);
353 const delimiter = delimiterMatch?.[1] ?? "---";
354 const isToml = delimiter === "+++";
355
356 // Format the atUri entry based on frontmatter type
357 const atUriEntry = isToml ? `atUri = "${atUri}"` : `atUri: "${atUri}"`;
358
359 // Check if atUri already exists in frontmatter (handle both formats)
360 if (rawContent.includes("atUri:") || rawContent.includes("atUri =")) {
361 // Replace existing atUri (match both YAML and TOML formats)
362 return rawContent.replace(
363 /atUri\s*[=:]\s*["']?[^"'\n]+["']?\n?/,
364 `${atUriEntry}\n`,
365 );
366 }
367
368 // Insert atUri before the closing delimiter
369 const frontmatterEndIndex = rawContent.indexOf(delimiter, 4);
370 if (frontmatterEndIndex === -1) {
371 throw new Error("Could not find frontmatter end");
372 }
373
374 const beforeEnd = rawContent.slice(0, frontmatterEndIndex);
375 const afterEnd = rawContent.slice(frontmatterEndIndex);
376
377 return `${beforeEnd}${atUriEntry}\n${afterEnd}`;
378}
379
380export function stripMarkdownForText(markdown: string): string {
381 return markdown
382 .replace(/#{1,6}\s/g, "") // Remove headers
383 .replace(/\*\*([^*]+)\*\*/g, "$1") // Remove bold
384 .replace(/\*([^*]+)\*/g, "$1") // Remove italic
385 .replace(/\[([^\]]+)\]\([^)]+\)/g, "$1") // Remove links, keep text
386 .replace(/`{3}[\s\S]*?`{3}/g, "") // Remove code blocks
387 .replace(/`([^`]+)`/g, "$1") // Remove inline code formatting
388 .replace(/!\[.*?\]\(.*?\)/g, "") // Remove images
389 .replace(/\n{3,}/g, "\n\n") // Normalize multiple newlines
390 .trim();
391}