forked from
stevedylan.dev/sequoia
A CLI for publishing standard.site documents to ATProto
1import * as fs from "node:fs/promises";
2import * as path from "node:path";
3import { glob } from "glob";
4import yaml from "js-yaml";
5import { minimatch } from "minimatch";
6import * as toml from "smol-toml";
7import type { BlogPost, FrontmatterMapping, PostFrontmatter } from "./types";
8
9export function parseFrontmatter(
10 content: string,
11 mapping?: FrontmatterMapping,
12): {
13 frontmatter: PostFrontmatter;
14 body: string;
15 rawFrontmatter: Record<string, unknown>;
16} {
17 // Support multiple frontmatter delimiters:
18 // --- (YAML) - Jekyll, Astro, most SSGs
19 // +++ (TOML) - Hugo
20 // *** - Alternative format
21 const frontmatterRegex = /^(---|\+\+\+|\*\*\*)\n([\s\S]*?)\n\1\n([\s\S]*)$/;
22 const match = content.match(frontmatterRegex);
23
24 if (!match) {
25 throw new Error("Could not parse frontmatter");
26 }
27
28 const delimiter = match[1];
29 const frontmatterStr = match[2] ?? "";
30 const body = match[3] ?? "";
31
32 // Determine format based on delimiter:
33 // +++ uses TOML (key = value)
34 // --- and *** use YAML (key: value)
35 const isToml = delimiter === "+++";
36
37 // Parse frontmatter using the appropriate library
38 let raw: Record<string, unknown>;
39 if (isToml) {
40 raw = toml.parse(frontmatterStr) as Record<string, unknown>;
41 } else {
42 // Use CORE_SCHEMA to keep dates as strings rather than Date objects
43 raw =
44 (yaml.load(frontmatterStr, { schema: yaml.CORE_SCHEMA }) as Record<
45 string,
46 unknown
47 >) ?? {};
48 }
49
50 // Apply field mappings to normalize to standard PostFrontmatter fields
51 const frontmatter: Record<string, unknown> = {};
52
53 // Title mapping
54 const titleField = mapping?.title || "title";
55 frontmatter.title = raw[titleField] || raw.title;
56
57 // Description mapping
58 const descField = mapping?.description || "description";
59 frontmatter.description = raw[descField] || raw.description;
60
61 // Publish date mapping - check custom field first, then fallbacks
62 const dateField = mapping?.publishDate;
63 if (dateField && raw[dateField]) {
64 frontmatter.publishDate = raw[dateField];
65 } else if (raw.publishDate) {
66 frontmatter.publishDate = raw.publishDate;
67 } else {
68 // Fallback to common date field names
69 const dateFields = ["pubDate", "date", "createdAt", "created_at"];
70 for (const field of dateFields) {
71 if (raw[field]) {
72 frontmatter.publishDate = raw[field];
73 break;
74 }
75 }
76 }
77
78 // Cover image mapping
79 const coverField = mapping?.coverImage || "ogImage";
80 frontmatter.ogImage = raw[coverField] || raw.ogImage;
81
82 // Tags mapping
83 const tagsField = mapping?.tags || "tags";
84 frontmatter.tags = raw[tagsField] || raw.tags;
85
86 // Draft mapping
87 const draftField = mapping?.draft || "draft";
88 const draftValue = raw[draftField] ?? raw.draft;
89 if (draftValue !== undefined) {
90 frontmatter.draft = draftValue === true || draftValue === "true";
91 }
92
93 // Always preserve atUri (internal field)
94 frontmatter.atUri = raw.atUri;
95
96 return {
97 frontmatter: frontmatter as unknown as PostFrontmatter,
98 body,
99 rawFrontmatter: raw,
100 };
101}
102
103export function getSlugFromFilename(filename: string): string {
104 return filename
105 .replace(/\.mdx?$/, "")
106 .toLowerCase()
107 .replace(/\s+/g, "-");
108}
109
110export interface SlugOptions {
111 slugField?: string;
112 removeIndexFromSlug?: boolean;
113 stripDatePrefix?: boolean;
114}
115
116export function getSlugFromOptions(
117 relativePath: string,
118 rawFrontmatter: Record<string, unknown>,
119 options: SlugOptions = {},
120): string {
121 const {
122 slugField,
123 removeIndexFromSlug = false,
124 stripDatePrefix = false,
125 } = options;
126
127 let slug: string;
128
129 // If slugField is set, try to get the value from frontmatter
130 if (slugField) {
131 const frontmatterValue = rawFrontmatter[slugField];
132 if (frontmatterValue && typeof frontmatterValue === "string") {
133 // Remove leading slash if present
134 slug = frontmatterValue
135 .replace(/^\//, "")
136 .toLowerCase()
137 .replace(/\s+/g, "-");
138 } else {
139 // Fallback to filepath if frontmatter field not found
140 slug = relativePath
141 .replace(/\.mdx?$/, "")
142 .toLowerCase()
143 .replace(/\s+/g, "-");
144 }
145 } else {
146 // Default: use filepath
147 slug = relativePath
148 .replace(/\.mdx?$/, "")
149 .toLowerCase()
150 .replace(/\s+/g, "-");
151 }
152
153 // Remove /index or /_index suffix if configured
154 if (removeIndexFromSlug) {
155 slug = slug.replace(/\/_?index$/, "");
156 }
157
158 // Strip Jekyll-style date prefix (YYYY-MM-DD-) from filename
159 if (stripDatePrefix) {
160 slug = slug.replace(/(^|\/)(\d{4}-\d{2}-\d{2})-/g, "$1");
161 }
162
163 return slug;
164}
165
166export function resolvePathTemplate(template: string, post: BlogPost): string {
167 const publishDate = new Date(post.frontmatter.publishDate);
168 const year = String(publishDate.getFullYear());
169 const yearUTC = String(publishDate.getUTCFullYear());
170 const month = String(publishDate.getMonth() + 1).padStart(2, "0");
171 const monthUTC = String(publishDate.getUTCMonth() + 1).padStart(2, "0");
172 const day = String(publishDate.getDate()).padStart(2, "0");
173 const dayUTC = String(publishDate.getUTCDate()).padStart(2, "0");
174
175 const slugifiedTitle = (post.frontmatter.title || "")
176 .toLowerCase()
177 .replace(/\s+/g, "-")
178 .replace(/[^\w-]/g, "");
179
180 // Replace known tokens
181 let result = template
182 .replace(/\{slug\}/g, post.slug)
183 .replace(/\{year\}/g, year)
184 .replace(/\{yearUTC\}/g, yearUTC)
185 .replace(/\{month\}/g, month)
186 .replace(/\{monthUTC\}/g, monthUTC)
187 .replace(/\{day\}/g, day)
188 .replace(/\{dayUTC\}/g, dayUTC)
189 .replace(/\{title\}/g, slugifiedTitle);
190
191 // Replace any remaining {field} tokens with raw frontmatter values
192 result = result.replace(/\{(\w+)\}/g, (_match, field: string) => {
193 const value = post.rawFrontmatter[field];
194 if (value != null && typeof value === "string") {
195 return value;
196 }
197 return "";
198 });
199
200 // Ensure leading slash
201 if (!result.startsWith("/")) {
202 result = `/${result}`;
203 }
204
205 return result;
206}
207
208export function resolvePostPath(
209 post: BlogPost,
210 pathPrefix?: string,
211 pathTemplate?: string,
212): string {
213 if (pathTemplate) {
214 return resolvePathTemplate(pathTemplate, post);
215 }
216 const prefix = pathPrefix || "/posts";
217 return `${prefix}/${post.slug}`;
218}
219
220export async function getContentHash(content: string): Promise<string> {
221 const encoder = new TextEncoder();
222 const data = encoder.encode(content);
223 const hashBuffer = await crypto.subtle.digest("SHA-256", data);
224 const hashArray = Array.from(new Uint8Array(hashBuffer));
225 return hashArray.map((b) => b.toString(16).padStart(2, "0")).join("");
226}
227
228function shouldIgnore(relativePath: string, ignorePatterns: string[]): boolean {
229 for (const pattern of ignorePatterns) {
230 if (minimatch(relativePath, pattern)) {
231 return true;
232 }
233 }
234 return false;
235}
236
237export interface ScanOptions {
238 frontmatterMapping?: FrontmatterMapping;
239 ignorePatterns?: string[];
240 slugField?: string;
241 removeIndexFromSlug?: boolean;
242 stripDatePrefix?: boolean;
243}
244
245export async function scanContentDirectory(
246 contentDir: string,
247 frontmatterMappingOrOptions?: FrontmatterMapping | ScanOptions,
248 ignorePatterns: string[] = [],
249): Promise<BlogPost[]> {
250 // Handle both old signature (frontmatterMapping, ignorePatterns) and new signature (options)
251 let options: ScanOptions;
252 if (
253 frontmatterMappingOrOptions &&
254 ("frontmatterMapping" in frontmatterMappingOrOptions ||
255 "ignorePatterns" in frontmatterMappingOrOptions ||
256 "slugField" in frontmatterMappingOrOptions)
257 ) {
258 options = frontmatterMappingOrOptions as ScanOptions;
259 } else {
260 // Old signature: (contentDir, frontmatterMapping?, ignorePatterns?)
261 options = {
262 frontmatterMapping: frontmatterMappingOrOptions as
263 | FrontmatterMapping
264 | undefined,
265 ignorePatterns,
266 };
267 }
268
269 const {
270 frontmatterMapping,
271 ignorePatterns: ignore = [],
272 slugField,
273 removeIndexFromSlug,
274 stripDatePrefix,
275 } = options;
276
277 const patterns = ["**/*.md", "**/*.mdx"];
278 const posts: BlogPost[] = [];
279
280 for (const pattern of patterns) {
281 const files = await glob(pattern, {
282 cwd: contentDir,
283 absolute: false,
284 });
285
286 for (const relativePath of files) {
287 // Skip files matching ignore patterns
288 if (shouldIgnore(relativePath, ignore)) {
289 continue;
290 }
291
292 const filePath = path.join(contentDir, relativePath);
293 const rawContent = await fs.readFile(filePath, "utf-8");
294
295 try {
296 const { frontmatter, body, rawFrontmatter } = parseFrontmatter(
297 rawContent,
298 frontmatterMapping,
299 );
300 const slug = getSlugFromOptions(relativePath, rawFrontmatter, {
301 slugField,
302 removeIndexFromSlug,
303 stripDatePrefix,
304 });
305
306 posts.push({
307 filePath,
308 slug,
309 frontmatter,
310 content: body,
311 rawContent,
312 rawFrontmatter,
313 });
314 } catch (error) {
315 console.error(`Error parsing ${relativePath}:`, error);
316 }
317 }
318 }
319
320 // Sort by publish date (newest first)
321 posts.sort((a, b) => {
322 const dateA = new Date(a.frontmatter.publishDate);
323 const dateB = new Date(b.frontmatter.publishDate);
324 return dateB.getTime() - dateA.getTime();
325 });
326
327 return posts;
328}
329
330export function updateFrontmatterWithAtUri(
331 rawContent: string,
332 atUri: string,
333): string {
334 // Detect which delimiter is used (---, +++, or ***)
335 const delimiterMatch = rawContent.match(/^(---|\+\+\+|\*\*\*)/);
336 const delimiter = delimiterMatch?.[1] ?? "---";
337 const isToml = delimiter === "+++";
338
339 // Format the atUri entry based on frontmatter type
340 const atUriEntry = isToml ? `atUri = "${atUri}"` : `atUri: "${atUri}"`;
341
342 // Check if atUri already exists in frontmatter (handle both formats)
343 if (rawContent.includes("atUri:") || rawContent.includes("atUri =")) {
344 // Replace existing atUri (match both YAML and TOML formats)
345 return rawContent.replace(
346 /atUri\s*[=:]\s*["']?[^"'\n]+["']?\n?/,
347 `${atUriEntry}\n`,
348 );
349 }
350
351 // Insert atUri before the closing delimiter
352 const frontmatterEndIndex = rawContent.indexOf(delimiter, 4);
353 if (frontmatterEndIndex === -1) {
354 throw new Error("Could not find frontmatter end");
355 }
356
357 const beforeEnd = rawContent.slice(0, frontmatterEndIndex);
358 const afterEnd = rawContent.slice(frontmatterEndIndex);
359
360 return `${beforeEnd}${atUriEntry}\n${afterEnd}`;
361}
362
363export function stripMarkdownForText(markdown: string): string {
364 return markdown
365 .replace(/#{1,6}\s/g, "") // Remove headers
366 .replace(/\*\*([^*]+)\*\*/g, "$1") // Remove bold
367 .replace(/\*([^*]+)\*/g, "$1") // Remove italic
368 .replace(/\[([^\]]+)\]\([^)]+\)/g, "$1") // Remove links, keep text
369 .replace(/`{3}[\s\S]*?`{3}/g, "") // Remove code blocks
370 .replace(/`([^`]+)`/g, "$1") // Remove inline code formatting
371 .replace(/!\[.*?\]\(.*?\)/g, "") // Remove images
372 .replace(/\n{3,}/g, "\n\n") // Normalize multiple newlines
373 .trim();
374}