this repo has no description
1#!/usr/bin/env bun
2/**
3 * Import static site content to ATProto PDS as standard.site documents
4 *
5 * Usage:
6 * bun run scripts/import-content.ts --content-dir <path> --publication <at-uri> --identifier <handle-or-did> --password <app-password>
7 *
8 * Environment variables (alternative to CLI args):
9 * ATPROTO_IDENTIFIER - Your handle or DID
10 * ATPROTO_PASSWORD - App password (create at https://bsky.app/settings/app-passwords)
11 * PDS_URL - PDS endpoint (defaults to https://bsky.social)
12 */
13
14import { readdir, readFile, stat } from "node:fs/promises";
15import { basename, extname, join, relative } from "node:path";
16import { AtpAgent } from "@atproto/api";
17
18const DOCUMENT_COLLECTION = "site.standard.document";
19
20interface Frontmatter {
21 title?: string;
22 date?: string;
23 tags?: string[];
24 author?: { name?: string; uri?: string };
25 description?: string;
26 view?: string;
27}
28
29interface ParsedDocument {
30 frontmatter: Frontmatter;
31 content: string;
32 filePath: string;
33 relativePath: string;
34}
35
36/**
37 * Parse YAML-like frontmatter from markdown content
38 */
39function parseFrontmatter(content: string): {
40 frontmatter: Frontmatter;
41 body: string;
42} {
43 const frontmatterMatch = content.match(
44 /^---\r?\n([\s\S]*?)\r?\n---\r?\n([\s\S]*)$/,
45 );
46
47 if (!frontmatterMatch) {
48 return { frontmatter: {}, body: content };
49 }
50
51 const [, yamlContent, body] = frontmatterMatch;
52 const frontmatter: Frontmatter = {};
53
54 // Simple YAML parser for our needs
55 const lines = yamlContent!.split("\n");
56 let currentKey: string | null = null;
57 let currentArray: string[] | null = null;
58 let inAuthor = false;
59 let authorObj: { name?: string; uri?: string } = {};
60
61 for (const line of lines) {
62 const trimmed = line.trim();
63 if (!trimmed) continue;
64
65 // Array item
66 if (trimmed.startsWith("- ") && currentKey) {
67 const value = trimmed
68 .slice(2)
69 .trim()
70 .replace(/^["']|["']$/g, "");
71 if (currentArray) {
72 currentArray.push(value);
73 }
74 continue;
75 }
76
77 // Nested key (for author)
78 if (line.startsWith(" ") && inAuthor) {
79 const match = trimmed.match(/^(\w+):\s*(.*)$/);
80 if (match) {
81 const [, key, value] = match;
82 const cleanValue = value?.replace(/^["']|["']$/g, "") || "";
83 if (key === "name") authorObj.name = cleanValue;
84 if (key === "uri") authorObj.uri = cleanValue;
85 }
86 continue;
87 }
88
89 // Key-value pair
90 const kvMatch = trimmed.match(/^(\w+):\s*(.*)$/);
91 if (kvMatch) {
92 const [, key, rawValue] = kvMatch;
93 if (!key) continue;
94 const value = rawValue?.trim();
95
96 // Save previous author object
97 if (inAuthor && currentKey === "author") {
98 frontmatter.author = authorObj;
99 authorObj = {};
100 }
101 inAuthor = false;
102 currentArray = null;
103
104 if (!value) {
105 // Could be array or nested object
106 if (key === "tags") {
107 currentKey = key;
108 currentArray = [];
109 frontmatter.tags = currentArray;
110 } else if (key === "author") {
111 currentKey = key;
112 inAuthor = true;
113 }
114 } else {
115 // Direct value
116 const cleanValue = value.replace(/^["']|["']$/g, "");
117 if (key === "title") frontmatter.title = cleanValue;
118 if (key === "date") frontmatter.date = cleanValue;
119 if (key === "description") frontmatter.description = cleanValue;
120 if (key === "view") frontmatter.view = cleanValue;
121 if (key === "tags" && value.startsWith("[")) {
122 // Inline array like tags: ["a", "b"]
123 frontmatter.tags = value
124 .slice(1, -1)
125 .split(",")
126 .map((t) => t.trim().replace(/^["']|["']$/g, ""));
127 }
128 currentKey = key;
129 }
130 }
131 }
132
133 // Save final author if needed
134 if (inAuthor) {
135 frontmatter.author = authorObj;
136 }
137
138 return { frontmatter, body: body || "" };
139}
140
141/**
142 * Generate a record key from a file path
143 * e.g., "notes/2025-03-16_grounding-questions.md" -> "notes-2025-03-16-grounding-questions"
144 */
145function generateRkey(relativePath: string): string {
146 const withoutExt = relativePath.replace(/\.(md|html)$/, "");
147 // Replace path separators and underscores with dashes, remove invalid chars
148 return withoutExt
149 .replace(/[/\\]/g, "-")
150 .replace(/_/g, "-")
151 .replace(/[^a-zA-Z0-9-]/g, "")
152 .toLowerCase()
153 .slice(0, 512); // ATProto rkey max length
154}
155
156/**
157 * Convert file path to URL path, removing date prefix from filename
158 * e.g., "notes/2025-03-16_grounding-questions.md" -> "/notes/grounding-questions"
159 */
160function generatePath(relativePath: string): string {
161 const withoutExt = relativePath.replace(/\.(md|html)$/, "");
162 // Handle index files
163 if (basename(withoutExt) === "index") {
164 const dir = withoutExt.replace(/\/?index$/, "");
165 return dir ? `/${dir}` : "/";
166 }
167 // Remove date prefix (YYYY-MM-DD_) from filename
168 const parts = withoutExt.split("/");
169 const filename = parts[parts.length - 1] ?? "";
170 const filenameWithoutDate = filename.replace(/^\d{4}-\d{2}-\d{2}_/, "");
171 parts[parts.length - 1] = filenameWithoutDate;
172 return `/${parts.join("/")}`;
173}
174
175/**
176 * Recursively find all content files
177 */
178async function findContentFiles(
179 dir: string,
180 baseDir: string = dir,
181): Promise<string[]> {
182 const files: string[] = [];
183 const entries = await readdir(dir, { withFileTypes: true });
184
185 for (const entry of entries) {
186 const fullPath = join(dir, entry.name);
187 if (entry.isDirectory()) {
188 const subFiles = await findContentFiles(fullPath, baseDir);
189 files.push(...subFiles);
190 } else if (entry.isFile() && /\.(md|html)$/.test(entry.name)) {
191 files.push(fullPath);
192 }
193 }
194
195 return files;
196}
197
198/**
199 * Parse a content file into a document
200 */
201async function parseContentFile(
202 filePath: string,
203 baseDir: string,
204): Promise<ParsedDocument> {
205 const content = await readFile(filePath, "utf-8");
206 const relativePath = relative(baseDir, filePath);
207 const { frontmatter, body } = parseFrontmatter(content);
208
209 return {
210 frontmatter,
211 content: body,
212 filePath,
213 relativePath,
214 };
215}
216
217/**
218 * Create a standard.site document record
219 */
220function createDocumentRecord(
221 doc: ParsedDocument,
222 publicationUri: string,
223): {
224 rkey: string;
225 record: Record<string, unknown>;
226} {
227 const rkey = generateRkey(doc.relativePath);
228 const path = generatePath(doc.relativePath);
229
230 const record: Record<string, unknown> = {
231 $type: DOCUMENT_COLLECTION,
232 site: publicationUri,
233 title:
234 doc.frontmatter.title ||
235 basename(doc.relativePath, extname(doc.relativePath)),
236 path,
237 // TODO: add textContent with markdown or any other formatting stripped
238 content: {
239 $type: "markdown",
240 markdown: doc.content,
241 },
242 createdAt: new Date().toISOString(),
243 };
244
245 // Add optional fields
246 if (doc.frontmatter.description) {
247 record.description = doc.frontmatter.description;
248 }
249
250 if (doc.frontmatter.tags && doc.frontmatter.tags.length > 0) {
251 record.tags = doc.frontmatter.tags;
252 }
253
254 if (doc.frontmatter.date) {
255 // Parse date and convert to ISO string
256 const date = new Date(doc.frontmatter.date);
257 if (!isNaN(date.getTime())) {
258 record.publishedAt = date.toISOString();
259 }
260 }
261
262 return { rkey, record };
263}
264
265async function main() {
266 // Parse arguments
267 const args = process.argv.slice(2);
268 const getArg = (name: string): string | undefined => {
269 const idx = args.indexOf(`--${name}`);
270 return idx !== -1 ? args[idx + 1] : undefined;
271 };
272
273 const contentDir = getArg("content-dir");
274 const publicationUri = getArg("publication");
275 const identifier = getArg("identifier") || process.env.ATPROTO_IDENTIFIER;
276 const password = getArg("password") || process.env.ATPROTO_PASSWORD;
277 const pdsUrl = getArg("pds") || process.env.PDS_URL || "https://bsky.social";
278 const dryRun = args.includes("--dry-run");
279
280 if (!contentDir || !publicationUri || !identifier || !password) {
281 console.error(`Usage: bun run scripts/import-content.ts \\
282 --content-dir <path> \\
283 --publication <at-uri> \\
284 --identifier <handle-or-did> \\
285 --password <app-password> \\
286 [--pds <pds-url>] \\
287 [--dry-run]
288
289Environment variables:
290 ATPROTO_IDENTIFIER - Your handle or DID
291 ATPROTO_PASSWORD - App password
292 PDS_URL - PDS endpoint (default: https://bsky.social)
293`);
294 process.exit(1);
295 }
296
297 console.log(`Content directory: ${contentDir}`);
298 console.log(`Publication: ${publicationUri}`);
299 console.log(`PDS: ${pdsUrl}`);
300 console.log(`Dry run: ${dryRun}`);
301 console.log();
302
303 // Verify content directory exists
304 try {
305 const stats = await stat(contentDir);
306 if (!stats.isDirectory()) {
307 console.error(`Error: ${contentDir} is not a directory`);
308 process.exit(1);
309 }
310 } catch {
311 console.error(`Error: ${contentDir} does not exist`);
312 process.exit(1);
313 }
314
315 // Find and parse all content files
316 console.log("Scanning for content files...");
317 const files = await findContentFiles(contentDir);
318 console.log(`Found ${files.length} files\n`);
319
320 const documents: ParsedDocument[] = [];
321 for (const file of files) {
322 const doc = await parseContentFile(file, contentDir);
323 documents.push(doc);
324 }
325
326 // Create ATProto agent and authenticate
327 const agent = new AtpAgent({ service: pdsUrl });
328
329 if (!dryRun) {
330 console.log(`Authenticating as ${identifier}...`);
331 await agent.login({ identifier, password });
332 console.log(`Authenticated as ${agent.session?.did}\n`);
333 }
334
335 // Process each document
336 let created = 0;
337 let skipped = 0;
338 let failed = 0;
339
340 for (const doc of documents) {
341 const { rkey, record } = createDocumentRecord(doc, publicationUri);
342
343 console.log(`Processing: ${doc.relativePath}`);
344 console.log(` Title: ${record.title}`);
345 console.log(` Path: ${record.path}`);
346 console.log(` Rkey: ${rkey}`);
347 if (record.tags)
348 console.log(` Tags: ${(record.tags as string[]).join(", ")}`);
349 if (record.publishedAt) console.log(` Published: ${record.publishedAt}`);
350
351 if (dryRun) {
352 console.log(` [DRY RUN] Would create record\n`);
353 created++;
354 continue;
355 }
356
357 try {
358 await agent.api.com.atproto.repo.putRecord({
359 repo: agent.session!.did,
360 collection: DOCUMENT_COLLECTION,
361 rkey,
362 record,
363 });
364 console.log(` ✓ Created\n`);
365 created++;
366 } catch (error) {
367 const message = error instanceof Error ? error.message : String(error);
368 console.log(` ✗ Failed: ${message}\n`);
369 failed++;
370 }
371 }
372
373 console.log("---");
374 console.log(`Summary:`);
375 console.log(` Created: ${created}`);
376 console.log(` Skipped: ${skipped}`);
377 console.log(` Failed: ${failed}`);
378}
379
380main().catch((error) => {
381 console.error("Fatal error:", error);
382 process.exit(1);
383});