forked from
stevedylan.dev/sequoia
A CLI for publishing standard.site documents to ATProto
1import * as path from "path";
2import { Glob } from "bun";
3import type { PostFrontmatter, BlogPost, FrontmatterMapping } from "./types";
4
5export function parseFrontmatter(content: string, mapping?: FrontmatterMapping): {
6 frontmatter: PostFrontmatter;
7 body: string;
8} {
9 // Support multiple frontmatter delimiters:
10 // --- (YAML) - Jekyll, Astro, most SSGs
11 // +++ (TOML) - Hugo
12 // *** - Alternative format
13 const frontmatterRegex = /^(---|\+\+\+|\*\*\*)\n([\s\S]*?)\n\1\n([\s\S]*)$/;
14 const match = content.match(frontmatterRegex);
15
16 if (!match) {
17 throw new Error("Could not parse frontmatter");
18 }
19
20 const delimiter = match[1];
21 const frontmatterStr = match[2] ?? "";
22 const body = match[3] ?? "";
23
24 // Determine format based on delimiter:
25 // +++ uses TOML (key = value)
26 // --- and *** use YAML (key: value)
27 const isToml = delimiter === "+++";
28 const separator = isToml ? "=" : ":";
29
30 // Parse frontmatter manually
31 const raw: Record<string, unknown> = {};
32 const lines = frontmatterStr.split("\n");
33
34 for (const line of lines) {
35 const sepIndex = line.indexOf(separator);
36 if (sepIndex === -1) continue;
37
38 const key = line.slice(0, sepIndex).trim();
39 let value = line.slice(sepIndex + 1).trim();
40
41 // Handle quoted strings
42 if (
43 (value.startsWith('"') && value.endsWith('"')) ||
44 (value.startsWith("'") && value.endsWith("'"))
45 ) {
46 value = value.slice(1, -1);
47 }
48
49 // Handle arrays (simple case for tags)
50 if (value.startsWith("[") && value.endsWith("]")) {
51 const arrayContent = value.slice(1, -1);
52 raw[key] = arrayContent
53 .split(",")
54 .map((item) => item.trim().replace(/^["']|["']$/g, ""));
55 } else if (value === "true") {
56 raw[key] = true;
57 } else if (value === "false") {
58 raw[key] = false;
59 } else {
60 raw[key] = value;
61 }
62 }
63
64 // Apply field mappings to normalize to standard PostFrontmatter fields
65 const frontmatter: Record<string, unknown> = {};
66
67 // Title mapping
68 const titleField = mapping?.title || "title";
69 frontmatter.title = raw[titleField] || raw.title;
70
71 // Description mapping
72 const descField = mapping?.description || "description";
73 frontmatter.description = raw[descField] || raw.description;
74
75 // Publish date mapping - check custom field first, then fallbacks
76 const dateField = mapping?.publishDate;
77 if (dateField && raw[dateField]) {
78 frontmatter.publishDate = raw[dateField];
79 } else if (raw.publishDate) {
80 frontmatter.publishDate = raw.publishDate;
81 } else {
82 // Fallback to common date field names
83 const dateFields = ["pubDate", "date", "createdAt", "created_at"];
84 for (const field of dateFields) {
85 if (raw[field]) {
86 frontmatter.publishDate = raw[field];
87 break;
88 }
89 }
90 }
91
92 // Cover image mapping
93 const coverField = mapping?.coverImage || "ogImage";
94 frontmatter.ogImage = raw[coverField] || raw.ogImage;
95
96 // Tags mapping
97 const tagsField = mapping?.tags || "tags";
98 frontmatter.tags = raw[tagsField] || raw.tags;
99
100 // Always preserve atUri (internal field)
101 frontmatter.atUri = raw.atUri;
102
103 return { frontmatter: frontmatter as unknown as PostFrontmatter, body };
104}
105
106export function getSlugFromFilename(filename: string): string {
107 return filename
108 .replace(/\.mdx?$/, "")
109 .toLowerCase()
110 .replace(/\s+/g, "-");
111}
112
113export async function getContentHash(content: string): Promise<string> {
114 const encoder = new TextEncoder();
115 const data = encoder.encode(content);
116 const hashBuffer = await crypto.subtle.digest("SHA-256", data);
117 const hashArray = Array.from(new Uint8Array(hashBuffer));
118 return hashArray.map((b) => b.toString(16).padStart(2, "0")).join("");
119}
120
121function shouldIgnore(relativePath: string, ignorePatterns: string[]): boolean {
122 for (const pattern of ignorePatterns) {
123 const glob = new Glob(pattern);
124 if (glob.match(relativePath)) {
125 return true;
126 }
127 }
128 return false;
129}
130
131export async function scanContentDirectory(
132 contentDir: string,
133 frontmatterMapping?: FrontmatterMapping,
134 ignorePatterns: string[] = []
135): Promise<BlogPost[]> {
136 const patterns = ["**/*.md", "**/*.mdx"];
137 const posts: BlogPost[] = [];
138
139 for (const pattern of patterns) {
140 const glob = new Glob(pattern);
141
142 for await (const relativePath of glob.scan({
143 cwd: contentDir,
144 absolute: false,
145 })) {
146 // Skip files matching ignore patterns
147 if (shouldIgnore(relativePath, ignorePatterns)) {
148 continue;
149 }
150
151 const filePath = path.join(contentDir, relativePath);
152 const file = Bun.file(filePath);
153 const rawContent = await file.text();
154
155 try {
156 const { frontmatter, body } = parseFrontmatter(rawContent, frontmatterMapping);
157 const filename = path.basename(relativePath);
158 const slug = getSlugFromFilename(filename);
159
160 posts.push({
161 filePath,
162 slug,
163 frontmatter,
164 content: body,
165 rawContent,
166 });
167 } catch (error) {
168 console.error(`Error parsing ${relativePath}:`, error);
169 }
170 }
171 }
172
173 // Sort by publish date (newest first)
174 posts.sort((a, b) => {
175 const dateA = new Date(a.frontmatter.publishDate);
176 const dateB = new Date(b.frontmatter.publishDate);
177 return dateB.getTime() - dateA.getTime();
178 });
179
180 return posts;
181}
182
183export function updateFrontmatterWithAtUri(rawContent: string, atUri: string): string {
184 // Detect which delimiter is used (---, +++, or ***)
185 const delimiterMatch = rawContent.match(/^(---|\+\+\+|\*\*\*)/);
186 const delimiter = delimiterMatch?.[1] ?? "---";
187 const isToml = delimiter === "+++";
188
189 // Format the atUri entry based on frontmatter type
190 const atUriEntry = isToml ? `atUri = "${atUri}"` : `atUri: "${atUri}"`;
191
192 // Check if atUri already exists in frontmatter (handle both formats)
193 if (rawContent.includes("atUri:") || rawContent.includes("atUri =")) {
194 // Replace existing atUri (match both YAML and TOML formats)
195 return rawContent.replace(/atUri\s*[=:]\s*["']?[^"'\n]+["']?\n?/, `${atUriEntry}\n`);
196 }
197
198 // Insert atUri before the closing delimiter
199 const frontmatterEndIndex = rawContent.indexOf(delimiter, 4);
200 if (frontmatterEndIndex === -1) {
201 throw new Error("Could not find frontmatter end");
202 }
203
204 const beforeEnd = rawContent.slice(0, frontmatterEndIndex);
205 const afterEnd = rawContent.slice(frontmatterEndIndex);
206
207 return `${beforeEnd}${atUriEntry}\n${afterEnd}`;
208}
209
210export function stripMarkdownForText(markdown: string): string {
211 return markdown
212 .replace(/#{1,6}\s/g, "") // Remove headers
213 .replace(/\*\*([^*]+)\*\*/g, "$1") // Remove bold
214 .replace(/\*([^*]+)\*/g, "$1") // Remove italic
215 .replace(/\[([^\]]+)\]\([^)]+\)/g, "$1") // Remove links, keep text
216 .replace(/`{3}[\s\S]*?`{3}/g, "") // Remove code blocks
217 .replace(/`([^`]+)`/g, "$1") // Remove inline code formatting
218 .replace(/!\[.*?\]\(.*?\)/g, "") // Remove images
219 .replace(/\n{3,}/g, "\n\n") // Normalize multiple newlines
220 .trim();
221}