this repo has no description
at main 383 lines 11 kB view raw
1#!/usr/bin/env bun 2/** 3 * Import static site content to ATProto PDS as standard.site documents 4 * 5 * Usage: 6 * bun run scripts/import-content.ts --content-dir <path> --publication <at-uri> --identifier <handle-or-did> --password <app-password> 7 * 8 * Environment variables (alternative to CLI args): 9 * ATPROTO_IDENTIFIER - Your handle or DID 10 * ATPROTO_PASSWORD - App password (create at https://bsky.app/settings/app-passwords) 11 * PDS_URL - PDS endpoint (defaults to https://bsky.social) 12 */ 13 14import { readdir, readFile, stat } from "node:fs/promises"; 15import { basename, extname, join, relative } from "node:path"; 16import { AtpAgent } from "@atproto/api"; 17 18const DOCUMENT_COLLECTION = "site.standard.document"; 19 20interface Frontmatter { 21 title?: string; 22 date?: string; 23 tags?: string[]; 24 author?: { name?: string; uri?: string }; 25 description?: string; 26 view?: string; 27} 28 29interface ParsedDocument { 30 frontmatter: Frontmatter; 31 content: string; 32 filePath: string; 33 relativePath: string; 34} 35 36/** 37 * Parse YAML-like frontmatter from markdown content 38 */ 39function parseFrontmatter(content: string): { 40 frontmatter: Frontmatter; 41 body: string; 42} { 43 const frontmatterMatch = content.match( 44 /^---\r?\n([\s\S]*?)\r?\n---\r?\n([\s\S]*)$/, 45 ); 46 47 if (!frontmatterMatch) { 48 return { frontmatter: {}, body: content }; 49 } 50 51 const [, yamlContent, body] = frontmatterMatch; 52 const frontmatter: Frontmatter = {}; 53 54 // Simple YAML parser for our needs 55 const lines = yamlContent!.split("\n"); 56 let currentKey: string | null = null; 57 let currentArray: string[] | null = null; 58 let inAuthor = false; 59 let authorObj: { name?: string; uri?: string } = {}; 60 61 for (const line of lines) { 62 const trimmed = line.trim(); 63 if (!trimmed) continue; 64 65 // Array item 66 if (trimmed.startsWith("- ") && currentKey) { 67 const value = trimmed 68 .slice(2) 69 .trim() 70 .replace(/^["']|["']$/g, ""); 71 if (currentArray) { 72 currentArray.push(value); 73 } 74 continue; 75 } 76 77 // Nested key (for author) 78 if (line.startsWith(" ") && inAuthor) { 79 const match = trimmed.match(/^(\w+):\s*(.*)$/); 80 if (match) { 81 const [, key, value] = match; 82 const cleanValue = value?.replace(/^["']|["']$/g, "") || ""; 83 if (key === "name") authorObj.name = cleanValue; 84 if (key === "uri") authorObj.uri = cleanValue; 85 } 86 continue; 87 } 88 89 // Key-value pair 90 const kvMatch = trimmed.match(/^(\w+):\s*(.*)$/); 91 if (kvMatch) { 92 const [, key, rawValue] = kvMatch; 93 if (!key) continue; 94 const value = rawValue?.trim(); 95 96 // Save previous author object 97 if (inAuthor && currentKey === "author") { 98 frontmatter.author = authorObj; 99 authorObj = {}; 100 } 101 inAuthor = false; 102 currentArray = null; 103 104 if (!value) { 105 // Could be array or nested object 106 if (key === "tags") { 107 currentKey = key; 108 currentArray = []; 109 frontmatter.tags = currentArray; 110 } else if (key === "author") { 111 currentKey = key; 112 inAuthor = true; 113 } 114 } else { 115 // Direct value 116 const cleanValue = value.replace(/^["']|["']$/g, ""); 117 if (key === "title") frontmatter.title = cleanValue; 118 if (key === "date") frontmatter.date = cleanValue; 119 if (key === "description") frontmatter.description = cleanValue; 120 if (key === "view") frontmatter.view = cleanValue; 121 if (key === "tags" && value.startsWith("[")) { 122 // Inline array like tags: ["a", "b"] 123 frontmatter.tags = value 124 .slice(1, -1) 125 .split(",") 126 .map((t) => t.trim().replace(/^["']|["']$/g, "")); 127 } 128 currentKey = key; 129 } 130 } 131 } 132 133 // Save final author if needed 134 if (inAuthor) { 135 frontmatter.author = authorObj; 136 } 137 138 return { frontmatter, body: body || "" }; 139} 140 141/** 142 * Generate a record key from a file path 143 * e.g., "notes/2025-03-16_grounding-questions.md" -> "notes-2025-03-16-grounding-questions" 144 */ 145function generateRkey(relativePath: string): string { 146 const withoutExt = relativePath.replace(/\.(md|html)$/, ""); 147 // Replace path separators and underscores with dashes, remove invalid chars 148 return withoutExt 149 .replace(/[/\\]/g, "-") 150 .replace(/_/g, "-") 151 .replace(/[^a-zA-Z0-9-]/g, "") 152 .toLowerCase() 153 .slice(0, 512); // ATProto rkey max length 154} 155 156/** 157 * Convert file path to URL path, removing date prefix from filename 158 * e.g., "notes/2025-03-16_grounding-questions.md" -> "/notes/grounding-questions" 159 */ 160function generatePath(relativePath: string): string { 161 const withoutExt = relativePath.replace(/\.(md|html)$/, ""); 162 // Handle index files 163 if (basename(withoutExt) === "index") { 164 const dir = withoutExt.replace(/\/?index$/, ""); 165 return dir ? `/${dir}` : "/"; 166 } 167 // Remove date prefix (YYYY-MM-DD_) from filename 168 const parts = withoutExt.split("/"); 169 const filename = parts[parts.length - 1] ?? ""; 170 const filenameWithoutDate = filename.replace(/^\d{4}-\d{2}-\d{2}_/, ""); 171 parts[parts.length - 1] = filenameWithoutDate; 172 return `/${parts.join("/")}`; 173} 174 175/** 176 * Recursively find all content files 177 */ 178async function findContentFiles( 179 dir: string, 180 baseDir: string = dir, 181): Promise<string[]> { 182 const files: string[] = []; 183 const entries = await readdir(dir, { withFileTypes: true }); 184 185 for (const entry of entries) { 186 const fullPath = join(dir, entry.name); 187 if (entry.isDirectory()) { 188 const subFiles = await findContentFiles(fullPath, baseDir); 189 files.push(...subFiles); 190 } else if (entry.isFile() && /\.(md|html)$/.test(entry.name)) { 191 files.push(fullPath); 192 } 193 } 194 195 return files; 196} 197 198/** 199 * Parse a content file into a document 200 */ 201async function parseContentFile( 202 filePath: string, 203 baseDir: string, 204): Promise<ParsedDocument> { 205 const content = await readFile(filePath, "utf-8"); 206 const relativePath = relative(baseDir, filePath); 207 const { frontmatter, body } = parseFrontmatter(content); 208 209 return { 210 frontmatter, 211 content: body, 212 filePath, 213 relativePath, 214 }; 215} 216 217/** 218 * Create a standard.site document record 219 */ 220function createDocumentRecord( 221 doc: ParsedDocument, 222 publicationUri: string, 223): { 224 rkey: string; 225 record: Record<string, unknown>; 226} { 227 const rkey = generateRkey(doc.relativePath); 228 const path = generatePath(doc.relativePath); 229 230 const record: Record<string, unknown> = { 231 $type: DOCUMENT_COLLECTION, 232 site: publicationUri, 233 title: 234 doc.frontmatter.title || 235 basename(doc.relativePath, extname(doc.relativePath)), 236 path, 237 // TODO: add textContent with markdown or any other formatting stripped 238 content: { 239 $type: "markdown", 240 markdown: doc.content, 241 }, 242 createdAt: new Date().toISOString(), 243 }; 244 245 // Add optional fields 246 if (doc.frontmatter.description) { 247 record.description = doc.frontmatter.description; 248 } 249 250 if (doc.frontmatter.tags && doc.frontmatter.tags.length > 0) { 251 record.tags = doc.frontmatter.tags; 252 } 253 254 if (doc.frontmatter.date) { 255 // Parse date and convert to ISO string 256 const date = new Date(doc.frontmatter.date); 257 if (!isNaN(date.getTime())) { 258 record.publishedAt = date.toISOString(); 259 } 260 } 261 262 return { rkey, record }; 263} 264 265async function main() { 266 // Parse arguments 267 const args = process.argv.slice(2); 268 const getArg = (name: string): string | undefined => { 269 const idx = args.indexOf(`--${name}`); 270 return idx !== -1 ? args[idx + 1] : undefined; 271 }; 272 273 const contentDir = getArg("content-dir"); 274 const publicationUri = getArg("publication"); 275 const identifier = getArg("identifier") || process.env.ATPROTO_IDENTIFIER; 276 const password = getArg("password") || process.env.ATPROTO_PASSWORD; 277 const pdsUrl = getArg("pds") || process.env.PDS_URL || "https://bsky.social"; 278 const dryRun = args.includes("--dry-run"); 279 280 if (!contentDir || !publicationUri || !identifier || !password) { 281 console.error(`Usage: bun run scripts/import-content.ts \\ 282 --content-dir <path> \\ 283 --publication <at-uri> \\ 284 --identifier <handle-or-did> \\ 285 --password <app-password> \\ 286 [--pds <pds-url>] \\ 287 [--dry-run] 288 289Environment variables: 290 ATPROTO_IDENTIFIER - Your handle or DID 291 ATPROTO_PASSWORD - App password 292 PDS_URL - PDS endpoint (default: https://bsky.social) 293`); 294 process.exit(1); 295 } 296 297 console.log(`Content directory: ${contentDir}`); 298 console.log(`Publication: ${publicationUri}`); 299 console.log(`PDS: ${pdsUrl}`); 300 console.log(`Dry run: ${dryRun}`); 301 console.log(); 302 303 // Verify content directory exists 304 try { 305 const stats = await stat(contentDir); 306 if (!stats.isDirectory()) { 307 console.error(`Error: ${contentDir} is not a directory`); 308 process.exit(1); 309 } 310 } catch { 311 console.error(`Error: ${contentDir} does not exist`); 312 process.exit(1); 313 } 314 315 // Find and parse all content files 316 console.log("Scanning for content files..."); 317 const files = await findContentFiles(contentDir); 318 console.log(`Found ${files.length} files\n`); 319 320 const documents: ParsedDocument[] = []; 321 for (const file of files) { 322 const doc = await parseContentFile(file, contentDir); 323 documents.push(doc); 324 } 325 326 // Create ATProto agent and authenticate 327 const agent = new AtpAgent({ service: pdsUrl }); 328 329 if (!dryRun) { 330 console.log(`Authenticating as ${identifier}...`); 331 await agent.login({ identifier, password }); 332 console.log(`Authenticated as ${agent.session?.did}\n`); 333 } 334 335 // Process each document 336 let created = 0; 337 let skipped = 0; 338 let failed = 0; 339 340 for (const doc of documents) { 341 const { rkey, record } = createDocumentRecord(doc, publicationUri); 342 343 console.log(`Processing: ${doc.relativePath}`); 344 console.log(` Title: ${record.title}`); 345 console.log(` Path: ${record.path}`); 346 console.log(` Rkey: ${rkey}`); 347 if (record.tags) 348 console.log(` Tags: ${(record.tags as string[]).join(", ")}`); 349 if (record.publishedAt) console.log(` Published: ${record.publishedAt}`); 350 351 if (dryRun) { 352 console.log(` [DRY RUN] Would create record\n`); 353 created++; 354 continue; 355 } 356 357 try { 358 await agent.api.com.atproto.repo.putRecord({ 359 repo: agent.session!.did, 360 collection: DOCUMENT_COLLECTION, 361 rkey, 362 record, 363 }); 364 console.log(` ✓ Created\n`); 365 created++; 366 } catch (error) { 367 const message = error instanceof Error ? error.message : String(error); 368 console.log(` ✗ Failed: ${message}\n`); 369 failed++; 370 } 371 } 372 373 console.log("---"); 374 console.log(`Summary:`); 375 console.log(` Created: ${created}`); 376 console.log(` Skipped: ${skipped}`); 377 console.log(` Failed: ${failed}`); 378} 379 380main().catch((error) => { 381 console.error("Fatal error:", error); 382 process.exit(1); 383});