#!/usr/bin/env bun /** * Import static site content to ATProto PDS as standard.site documents * * Usage: * bun run scripts/import-content.ts --content-dir --publication --identifier --password * * Environment variables (alternative to CLI args): * ATPROTO_IDENTIFIER - Your handle or DID * ATPROTO_PASSWORD - App password (create at https://bsky.app/settings/app-passwords) * PDS_URL - PDS endpoint (defaults to https://bsky.social) */ import { readdir, readFile, stat } from "node:fs/promises"; import { basename, extname, join, relative } from "node:path"; import { AtpAgent } from "@atproto/api"; const DOCUMENT_COLLECTION = "site.standard.document"; interface Frontmatter { title?: string; date?: string; tags?: string[]; author?: { name?: string; uri?: string }; description?: string; view?: string; } interface ParsedDocument { frontmatter: Frontmatter; content: string; filePath: string; relativePath: string; } /** * Parse YAML-like frontmatter from markdown content */ function parseFrontmatter(content: string): { frontmatter: Frontmatter; body: string; } { const frontmatterMatch = content.match( /^---\r?\n([\s\S]*?)\r?\n---\r?\n([\s\S]*)$/, ); if (!frontmatterMatch) { return { frontmatter: {}, body: content }; } const [, yamlContent, body] = frontmatterMatch; const frontmatter: Frontmatter = {}; // Simple YAML parser for our needs const lines = yamlContent!.split("\n"); let currentKey: string | null = null; let currentArray: string[] | null = null; let inAuthor = false; let authorObj: { name?: string; uri?: string } = {}; for (const line of lines) { const trimmed = line.trim(); if (!trimmed) continue; // Array item if (trimmed.startsWith("- ") && currentKey) { const value = trimmed .slice(2) .trim() .replace(/^["']|["']$/g, ""); if (currentArray) { currentArray.push(value); } continue; } // Nested key (for author) if (line.startsWith(" ") && inAuthor) { const match = trimmed.match(/^(\w+):\s*(.*)$/); if (match) { const [, key, value] = match; const cleanValue = value?.replace(/^["']|["']$/g, "") || ""; if (key === "name") authorObj.name = cleanValue; if (key === "uri") authorObj.uri = cleanValue; } continue; } // Key-value pair const kvMatch = trimmed.match(/^(\w+):\s*(.*)$/); if (kvMatch) { const [, key, rawValue] = kvMatch; if (!key) continue; const value = rawValue?.trim(); // Save previous author object if (inAuthor && currentKey === "author") { frontmatter.author = authorObj; authorObj = {}; } inAuthor = false; currentArray = null; if (!value) { // Could be array or nested object if (key === "tags") { currentKey = key; currentArray = []; frontmatter.tags = currentArray; } else if (key === "author") { currentKey = key; inAuthor = true; } } else { // Direct value const cleanValue = value.replace(/^["']|["']$/g, ""); if (key === "title") frontmatter.title = cleanValue; if (key === "date") frontmatter.date = cleanValue; if (key === "description") frontmatter.description = cleanValue; if (key === "view") frontmatter.view = cleanValue; if (key === "tags" && value.startsWith("[")) { // Inline array like tags: ["a", "b"] frontmatter.tags = value .slice(1, -1) .split(",") .map((t) => t.trim().replace(/^["']|["']$/g, "")); } currentKey = key; } } } // Save final author if needed if (inAuthor) { frontmatter.author = authorObj; } return { frontmatter, body: body || "" }; } /** * Generate a record key from a file path * e.g., "notes/2025-03-16_grounding-questions.md" -> "notes-2025-03-16-grounding-questions" */ function generateRkey(relativePath: string): string { const withoutExt = relativePath.replace(/\.(md|html)$/, ""); // Replace path separators and underscores with dashes, remove invalid chars return withoutExt .replace(/[/\\]/g, "-") .replace(/_/g, "-") .replace(/[^a-zA-Z0-9-]/g, "") .toLowerCase() .slice(0, 512); // ATProto rkey max length } /** * Convert file path to URL path, removing date prefix from filename * e.g., "notes/2025-03-16_grounding-questions.md" -> "/notes/grounding-questions" */ function generatePath(relativePath: string): string { const withoutExt = relativePath.replace(/\.(md|html)$/, ""); // Handle index files if (basename(withoutExt) === "index") { const dir = withoutExt.replace(/\/?index$/, ""); return dir ? `/${dir}` : "/"; } // Remove date prefix (YYYY-MM-DD_) from filename const parts = withoutExt.split("/"); const filename = parts[parts.length - 1] ?? ""; const filenameWithoutDate = filename.replace(/^\d{4}-\d{2}-\d{2}_/, ""); parts[parts.length - 1] = filenameWithoutDate; return `/${parts.join("/")}`; } /** * Recursively find all content files */ async function findContentFiles( dir: string, baseDir: string = dir, ): Promise { const files: string[] = []; const entries = await readdir(dir, { withFileTypes: true }); for (const entry of entries) { const fullPath = join(dir, entry.name); if (entry.isDirectory()) { const subFiles = await findContentFiles(fullPath, baseDir); files.push(...subFiles); } else if (entry.isFile() && /\.(md|html)$/.test(entry.name)) { files.push(fullPath); } } return files; } /** * Parse a content file into a document */ async function parseContentFile( filePath: string, baseDir: string, ): Promise { const content = await readFile(filePath, "utf-8"); const relativePath = relative(baseDir, filePath); const { frontmatter, body } = parseFrontmatter(content); return { frontmatter, content: body, filePath, relativePath, }; } /** * Create a standard.site document record */ function createDocumentRecord( doc: ParsedDocument, publicationUri: string, ): { rkey: string; record: Record; } { const rkey = generateRkey(doc.relativePath); const path = generatePath(doc.relativePath); const record: Record = { $type: DOCUMENT_COLLECTION, site: publicationUri, title: doc.frontmatter.title || basename(doc.relativePath, extname(doc.relativePath)), path, // TODO: add textContent with markdown or any other formatting stripped content: { $type: "markdown", markdown: doc.content, }, createdAt: new Date().toISOString(), }; // Add optional fields if (doc.frontmatter.description) { record.description = doc.frontmatter.description; } if (doc.frontmatter.tags && doc.frontmatter.tags.length > 0) { record.tags = doc.frontmatter.tags; } if (doc.frontmatter.date) { // Parse date and convert to ISO string const date = new Date(doc.frontmatter.date); if (!isNaN(date.getTime())) { record.publishedAt = date.toISOString(); } } return { rkey, record }; } async function main() { // Parse arguments const args = process.argv.slice(2); const getArg = (name: string): string | undefined => { const idx = args.indexOf(`--${name}`); return idx !== -1 ? args[idx + 1] : undefined; }; const contentDir = getArg("content-dir"); const publicationUri = getArg("publication"); const identifier = getArg("identifier") || process.env.ATPROTO_IDENTIFIER; const password = getArg("password") || process.env.ATPROTO_PASSWORD; const pdsUrl = getArg("pds") || process.env.PDS_URL || "https://bsky.social"; const dryRun = args.includes("--dry-run"); if (!contentDir || !publicationUri || !identifier || !password) { console.error(`Usage: bun run scripts/import-content.ts \\ --content-dir \\ --publication \\ --identifier \\ --password \\ [--pds ] \\ [--dry-run] Environment variables: ATPROTO_IDENTIFIER - Your handle or DID ATPROTO_PASSWORD - App password PDS_URL - PDS endpoint (default: https://bsky.social) `); process.exit(1); } console.log(`Content directory: ${contentDir}`); console.log(`Publication: ${publicationUri}`); console.log(`PDS: ${pdsUrl}`); console.log(`Dry run: ${dryRun}`); console.log(); // Verify content directory exists try { const stats = await stat(contentDir); if (!stats.isDirectory()) { console.error(`Error: ${contentDir} is not a directory`); process.exit(1); } } catch { console.error(`Error: ${contentDir} does not exist`); process.exit(1); } // Find and parse all content files console.log("Scanning for content files..."); const files = await findContentFiles(contentDir); console.log(`Found ${files.length} files\n`); const documents: ParsedDocument[] = []; for (const file of files) { const doc = await parseContentFile(file, contentDir); documents.push(doc); } // Create ATProto agent and authenticate const agent = new AtpAgent({ service: pdsUrl }); if (!dryRun) { console.log(`Authenticating as ${identifier}...`); await agent.login({ identifier, password }); console.log(`Authenticated as ${agent.session?.did}\n`); } // Process each document let created = 0; let skipped = 0; let failed = 0; for (const doc of documents) { const { rkey, record } = createDocumentRecord(doc, publicationUri); console.log(`Processing: ${doc.relativePath}`); console.log(` Title: ${record.title}`); console.log(` Path: ${record.path}`); console.log(` Rkey: ${rkey}`); if (record.tags) console.log(` Tags: ${(record.tags as string[]).join(", ")}`); if (record.publishedAt) console.log(` Published: ${record.publishedAt}`); if (dryRun) { console.log(` [DRY RUN] Would create record\n`); created++; continue; } try { await agent.api.com.atproto.repo.putRecord({ repo: agent.session!.did, collection: DOCUMENT_COLLECTION, rkey, record, }); console.log(` ✓ Created\n`); created++; } catch (error) { const message = error instanceof Error ? error.message : String(error); console.log(` ✗ Failed: ${message}\n`); failed++; } } console.log("---"); console.log(`Summary:`); console.log(` Created: ${created}`); console.log(` Skipped: ${skipped}`); console.log(` Failed: ${failed}`); } main().catch((error) => { console.error("Fatal error:", error); process.exit(1); });