A CLI for publishing standard.site documents to ATProto
sequoia.pub
standard
site
lexicon
cli
publishing
1import * as fs from "fs/promises";
2import * as path from "path";
3import { glob } from "glob";
4import { minimatch } from "minimatch";
5import type { PostFrontmatter, BlogPost, FrontmatterMapping } from "./types";
6
7export function parseFrontmatter(content: string, mapping?: FrontmatterMapping): {
8 frontmatter: PostFrontmatter;
9 body: string;
10} {
11 // Support multiple frontmatter delimiters:
12 // --- (YAML) - Jekyll, Astro, most SSGs
13 // +++ (TOML) - Hugo
14 // *** - Alternative format
15 const frontmatterRegex = /^(---|\+\+\+|\*\*\*)\n([\s\S]*?)\n\1\n([\s\S]*)$/;
16 const match = content.match(frontmatterRegex);
17
18 if (!match) {
19 throw new Error("Could not parse frontmatter");
20 }
21
22 const delimiter = match[1];
23 const frontmatterStr = match[2] ?? "";
24 const body = match[3] ?? "";
25
26 // Determine format based on delimiter:
27 // +++ uses TOML (key = value)
28 // --- and *** use YAML (key: value)
29 const isToml = delimiter === "+++";
30 const separator = isToml ? "=" : ":";
31
32 // Parse frontmatter manually
33 const raw: Record<string, unknown> = {};
34 const lines = frontmatterStr.split("\n");
35
36 for (const line of lines) {
37 const sepIndex = line.indexOf(separator);
38 if (sepIndex === -1) continue;
39
40 const key = line.slice(0, sepIndex).trim();
41 let value = line.slice(sepIndex + 1).trim();
42
43 // Handle quoted strings
44 if (
45 (value.startsWith('"') && value.endsWith('"')) ||
46 (value.startsWith("'") && value.endsWith("'"))
47 ) {
48 value = value.slice(1, -1);
49 }
50
51 // Handle arrays (simple case for tags)
52 if (value.startsWith("[") && value.endsWith("]")) {
53 const arrayContent = value.slice(1, -1);
54 raw[key] = arrayContent
55 .split(",")
56 .map((item) => item.trim().replace(/^["']|["']$/g, ""));
57 } else if (value === "true") {
58 raw[key] = true;
59 } else if (value === "false") {
60 raw[key] = false;
61 } else {
62 raw[key] = value;
63 }
64 }
65
66 // Apply field mappings to normalize to standard PostFrontmatter fields
67 const frontmatter: Record<string, unknown> = {};
68
69 // Title mapping
70 const titleField = mapping?.title || "title";
71 frontmatter.title = raw[titleField] || raw.title;
72
73 // Description mapping
74 const descField = mapping?.description || "description";
75 frontmatter.description = raw[descField] || raw.description;
76
77 // Publish date mapping - check custom field first, then fallbacks
78 const dateField = mapping?.publishDate;
79 if (dateField && raw[dateField]) {
80 frontmatter.publishDate = raw[dateField];
81 } else if (raw.publishDate) {
82 frontmatter.publishDate = raw.publishDate;
83 } else {
84 // Fallback to common date field names
85 const dateFields = ["pubDate", "date", "createdAt", "created_at"];
86 for (const field of dateFields) {
87 if (raw[field]) {
88 frontmatter.publishDate = raw[field];
89 break;
90 }
91 }
92 }
93
94 // Cover image mapping
95 const coverField = mapping?.coverImage || "ogImage";
96 frontmatter.ogImage = raw[coverField] || raw.ogImage;
97
98 // Tags mapping
99 const tagsField = mapping?.tags || "tags";
100 frontmatter.tags = raw[tagsField] || raw.tags;
101
102 // Draft mapping
103 const draftField = mapping?.draft || "draft";
104 const draftValue = raw[draftField] ?? raw.draft;
105 if (draftValue !== undefined) {
106 frontmatter.draft = draftValue === true || draftValue === "true";
107 }
108
109 // Always preserve atUri (internal field)
110 frontmatter.atUri = raw.atUri;
111
112 return { frontmatter: frontmatter as unknown as PostFrontmatter, body };
113}
114
115export function getSlugFromFilename(filename: string): string {
116 return filename
117 .replace(/\.mdx?$/, "")
118 .toLowerCase()
119 .replace(/\s+/g, "-");
120}
121
122export async function getContentHash(content: string): Promise<string> {
123 const encoder = new TextEncoder();
124 const data = encoder.encode(content);
125 const hashBuffer = await crypto.subtle.digest("SHA-256", data);
126 const hashArray = Array.from(new Uint8Array(hashBuffer));
127 return hashArray.map((b) => b.toString(16).padStart(2, "0")).join("");
128}
129
130function shouldIgnore(relativePath: string, ignorePatterns: string[]): boolean {
131 for (const pattern of ignorePatterns) {
132 if (minimatch(relativePath, pattern)) {
133 return true;
134 }
135 }
136 return false;
137}
138
139export async function scanContentDirectory(
140 contentDir: string,
141 frontmatterMapping?: FrontmatterMapping,
142 ignorePatterns: string[] = []
143): Promise<BlogPost[]> {
144 const patterns = ["**/*.md", "**/*.mdx"];
145 const posts: BlogPost[] = [];
146
147 for (const pattern of patterns) {
148 const files = await glob(pattern, {
149 cwd: contentDir,
150 absolute: false,
151 });
152
153 for (const relativePath of files) {
154 // Skip files matching ignore patterns
155 if (shouldIgnore(relativePath, ignorePatterns)) {
156 continue;
157 }
158
159 const filePath = path.join(contentDir, relativePath);
160 const rawContent = await fs.readFile(filePath, "utf-8");
161
162 try {
163 const { frontmatter, body } = parseFrontmatter(rawContent, frontmatterMapping);
164 const filename = path.basename(relativePath);
165 const slug = getSlugFromFilename(filename);
166
167 posts.push({
168 filePath,
169 slug,
170 frontmatter,
171 content: body,
172 rawContent,
173 });
174 } catch (error) {
175 console.error(`Error parsing ${relativePath}:`, error);
176 }
177 }
178 }
179
180 // Sort by publish date (newest first)
181 posts.sort((a, b) => {
182 const dateA = new Date(a.frontmatter.publishDate);
183 const dateB = new Date(b.frontmatter.publishDate);
184 return dateB.getTime() - dateA.getTime();
185 });
186
187 return posts;
188}
189
190export function updateFrontmatterWithAtUri(rawContent: string, atUri: string): string {
191 // Detect which delimiter is used (---, +++, or ***)
192 const delimiterMatch = rawContent.match(/^(---|\+\+\+|\*\*\*)/);
193 const delimiter = delimiterMatch?.[1] ?? "---";
194 const isToml = delimiter === "+++";
195
196 // Format the atUri entry based on frontmatter type
197 const atUriEntry = isToml ? `atUri = "${atUri}"` : `atUri: "${atUri}"`;
198
199 // Check if atUri already exists in frontmatter (handle both formats)
200 if (rawContent.includes("atUri:") || rawContent.includes("atUri =")) {
201 // Replace existing atUri (match both YAML and TOML formats)
202 return rawContent.replace(/atUri\s*[=:]\s*["']?[^"'\n]+["']?\n?/, `${atUriEntry}\n`);
203 }
204
205 // Insert atUri before the closing delimiter
206 const frontmatterEndIndex = rawContent.indexOf(delimiter, 4);
207 if (frontmatterEndIndex === -1) {
208 throw new Error("Could not find frontmatter end");
209 }
210
211 const beforeEnd = rawContent.slice(0, frontmatterEndIndex);
212 const afterEnd = rawContent.slice(frontmatterEndIndex);
213
214 return `${beforeEnd}${atUriEntry}\n${afterEnd}`;
215}
216
217export function stripMarkdownForText(markdown: string): string {
218 return markdown
219 .replace(/#{1,6}\s/g, "") // Remove headers
220 .replace(/\*\*([^*]+)\*\*/g, "$1") // Remove bold
221 .replace(/\*([^*]+)\*/g, "$1") // Remove italic
222 .replace(/\[([^\]]+)\]\([^)]+\)/g, "$1") // Remove links, keep text
223 .replace(/`{3}[\s\S]*?`{3}/g, "") // Remove code blocks
224 .replace(/`([^`]+)`/g, "$1") // Remove inline code formatting
225 .replace(/!\[.*?\]\(.*?\)/g, "") // Remove images
226 .replace(/\n{3,}/g, "\n\n") // Normalize multiple newlines
227 .trim();
228}