packages/cli/src/lib/markdown.ts at v0.2.0 · stevedylan.dev/sequoia

stevedylan.dev / sequoia
A CLI for publishing standard.site documents to ATProto sequoia.pub
standard site lexicon cli publishing
sequoia / packages / cli / src / lib / markdown.ts
at v0.2.0 228 lines 7.3 kB view raw
  1import * as fs from "fs/promises";
  2import * as path from "path";
  3import { glob } from "glob";
  4import { minimatch } from "minimatch";
  5import type { PostFrontmatter, BlogPost, FrontmatterMapping } from "./types";
  6
  7export function parseFrontmatter(content: string, mapping?: FrontmatterMapping): {
  8  frontmatter: PostFrontmatter;
  9  body: string;
 10} {
 11  // Support multiple frontmatter delimiters:
 12  // --- (YAML) - Jekyll, Astro, most SSGs
 13  // +++ (TOML) - Hugo
 14  // *** - Alternative format
 15  const frontmatterRegex = /^(---|\+\+\+|\*\*\*)\n([\s\S]*?)\n\1\n([\s\S]*)$/;
 16  const match = content.match(frontmatterRegex);
 17
 18  if (!match) {
 19    throw new Error("Could not parse frontmatter");
 20  }
 21
 22  const delimiter = match[1];
 23  const frontmatterStr = match[2] ?? "";
 24  const body = match[3] ?? "";
 25
 26  // Determine format based on delimiter:
 27  // +++ uses TOML (key = value)
 28  // --- and *** use YAML (key: value)
 29  const isToml = delimiter === "+++";
 30  const separator = isToml ? "=" : ":";
 31
 32  // Parse frontmatter manually
 33  const raw: Record<string, unknown> = {};
 34  const lines = frontmatterStr.split("\n");
 35
 36  for (const line of lines) {
 37    const sepIndex = line.indexOf(separator);
 38    if (sepIndex === -1) continue;
 39
 40    const key = line.slice(0, sepIndex).trim();
 41    let value = line.slice(sepIndex + 1).trim();
 42
 43    // Handle quoted strings
 44    if (
 45      (value.startsWith('"') && value.endsWith('"')) ||
 46      (value.startsWith("'") && value.endsWith("'"))
 47    ) {
 48      value = value.slice(1, -1);
 49    }
 50
 51    // Handle arrays (simple case for tags)
 52    if (value.startsWith("[") && value.endsWith("]")) {
 53      const arrayContent = value.slice(1, -1);
 54      raw[key] = arrayContent
 55        .split(",")
 56        .map((item) => item.trim().replace(/^["']|["']$/g, ""));
 57    } else if (value === "true") {
 58      raw[key] = true;
 59    } else if (value === "false") {
 60      raw[key] = false;
 61    } else {
 62      raw[key] = value;
 63    }
 64  }
 65
 66  // Apply field mappings to normalize to standard PostFrontmatter fields
 67  const frontmatter: Record<string, unknown> = {};
 68
 69  // Title mapping
 70  const titleField = mapping?.title || "title";
 71  frontmatter.title = raw[titleField] || raw.title;
 72
 73  // Description mapping
 74  const descField = mapping?.description || "description";
 75  frontmatter.description = raw[descField] || raw.description;
 76
 77  // Publish date mapping - check custom field first, then fallbacks
 78  const dateField = mapping?.publishDate;
 79  if (dateField && raw[dateField]) {
 80    frontmatter.publishDate = raw[dateField];
 81  } else if (raw.publishDate) {
 82    frontmatter.publishDate = raw.publishDate;
 83  } else {
 84    // Fallback to common date field names
 85    const dateFields = ["pubDate", "date", "createdAt", "created_at"];
 86    for (const field of dateFields) {
 87      if (raw[field]) {
 88        frontmatter.publishDate = raw[field];
 89        break;
 90      }
 91    }
 92  }
 93
 94  // Cover image mapping
 95  const coverField = mapping?.coverImage || "ogImage";
 96  frontmatter.ogImage = raw[coverField] || raw.ogImage;
 97
 98  // Tags mapping
 99  const tagsField = mapping?.tags || "tags";
100  frontmatter.tags = raw[tagsField] || raw.tags;
101
102  // Draft mapping
103  const draftField = mapping?.draft || "draft";
104  const draftValue = raw[draftField] ?? raw.draft;
105  if (draftValue !== undefined) {
106    frontmatter.draft = draftValue === true || draftValue === "true";
107  }
108
109  // Always preserve atUri (internal field)
110  frontmatter.atUri = raw.atUri;
111
112  return { frontmatter: frontmatter as unknown as PostFrontmatter, body };
113}
114
115export function getSlugFromFilename(filename: string): string {
116  return filename
117    .replace(/\.mdx?$/, "")
118    .toLowerCase()
119    .replace(/\s+/g, "-");
120}
121
122export async function getContentHash(content: string): Promise<string> {
123  const encoder = new TextEncoder();
124  const data = encoder.encode(content);
125  const hashBuffer = await crypto.subtle.digest("SHA-256", data);
126  const hashArray = Array.from(new Uint8Array(hashBuffer));
127  return hashArray.map((b) => b.toString(16).padStart(2, "0")).join("");
128}
129
130function shouldIgnore(relativePath: string, ignorePatterns: string[]): boolean {
131  for (const pattern of ignorePatterns) {
132    if (minimatch(relativePath, pattern)) {
133      return true;
134    }
135  }
136  return false;
137}
138
139export async function scanContentDirectory(
140  contentDir: string,
141  frontmatterMapping?: FrontmatterMapping,
142  ignorePatterns: string[] = []
143): Promise<BlogPost[]> {
144  const patterns = ["**/*.md", "**/*.mdx"];
145  const posts: BlogPost[] = [];
146
147  for (const pattern of patterns) {
148    const files = await glob(pattern, {
149      cwd: contentDir,
150      absolute: false,
151    });
152
153    for (const relativePath of files) {
154      // Skip files matching ignore patterns
155      if (shouldIgnore(relativePath, ignorePatterns)) {
156        continue;
157      }
158
159      const filePath = path.join(contentDir, relativePath);
160      const rawContent = await fs.readFile(filePath, "utf-8");
161
162      try {
163        const { frontmatter, body } = parseFrontmatter(rawContent, frontmatterMapping);
164        const filename = path.basename(relativePath);
165        const slug = getSlugFromFilename(filename);
166
167        posts.push({
168          filePath,
169          slug,
170          frontmatter,
171          content: body,
172          rawContent,
173        });
174      } catch (error) {
175        console.error(`Error parsing ${relativePath}:`, error);
176      }
177    }
178  }
179
180  // Sort by publish date (newest first)
181  posts.sort((a, b) => {
182    const dateA = new Date(a.frontmatter.publishDate);
183    const dateB = new Date(b.frontmatter.publishDate);
184    return dateB.getTime() - dateA.getTime();
185  });
186
187  return posts;
188}
189
190export function updateFrontmatterWithAtUri(rawContent: string, atUri: string): string {
191  // Detect which delimiter is used (---, +++, or ***)
192  const delimiterMatch = rawContent.match(/^(---|\+\+\+|\*\*\*)/);
193  const delimiter = delimiterMatch?.[1] ?? "---";
194  const isToml = delimiter === "+++";
195
196  // Format the atUri entry based on frontmatter type
197  const atUriEntry = isToml ? `atUri = "${atUri}"` : `atUri: "${atUri}"`;
198
199  // Check if atUri already exists in frontmatter (handle both formats)
200  if (rawContent.includes("atUri:") || rawContent.includes("atUri =")) {
201    // Replace existing atUri (match both YAML and TOML formats)
202    return rawContent.replace(/atUri\s*[=:]\s*["']?[^"'\n]+["']?\n?/, `${atUriEntry}\n`);
203  }
204
205  // Insert atUri before the closing delimiter
206  const frontmatterEndIndex = rawContent.indexOf(delimiter, 4);
207  if (frontmatterEndIndex === -1) {
208    throw new Error("Could not find frontmatter end");
209  }
210
211  const beforeEnd = rawContent.slice(0, frontmatterEndIndex);
212  const afterEnd = rawContent.slice(frontmatterEndIndex);
213
214  return `${beforeEnd}${atUriEntry}\n${afterEnd}`;
215}
216
217export function stripMarkdownForText(markdown: string): string {
218  return markdown
219    .replace(/#{1,6}\s/g, "") // Remove headers
220    .replace(/\*\*([^*]+)\*\*/g, "$1") // Remove bold
221    .replace(/\*([^*]+)\*/g, "$1") // Remove italic
222    .replace(/\[([^\]]+)\]\([^)]+\)/g, "$1") // Remove links, keep text
223    .replace(/`{3}[\s\S]*?`{3}/g, "") // Remove code blocks
224    .replace(/`([^`]+)`/g, "$1") // Remove inline code formatting
225    .replace(/!\[.*?\]\(.*?\)/g, "") // Remove images
226    .replace(/\n{3,}/g, "\n\n") // Normalize multiple newlines
227    .trim();
228}