[READ-ONLY] a fast, modern browser for the npm registry
at main 133 lines 4.5 kB view raw
1import process from 'node:process' 2import { createHash } from 'node:crypto' 3import { defineNuxtModule, useNuxt, createResolver } from 'nuxt/kit' 4import { safeParse } from 'valibot' 5import * as site from '../shared/types/lexicons/site' 6import { BlogPostSchema } from '../shared/schemas/blog' 7import { NPMX_SITE } from '../shared/utils/constants' 8import { read } from 'gray-matter' 9import { TID } from '@atproto/common' 10import { Client } from '@atproto/lex' 11 12const syncedDocuments = new Map<string, string>() 13const CLOCK_ID_THREE = 3 14const DATE_TO_MICROSECONDS = 1000 15 16// TODO: Currently logging quite a lot, can remove some later if we want 17export default defineNuxtModule({ 18 meta: { name: 'standard-site-sync' }, 19 async setup() { 20 const nuxt = useNuxt() 21 const { resolve } = createResolver(import.meta.url) 22 const contentDir = resolve('../app/pages/blog') 23 24 // Authentication with PDS using an app password 25 const pdsUrl = process.env.NPMX_PDS_URL 26 if (!pdsUrl) { 27 console.warn('[standard-site-sync] NPMX_PDS_URL not set, skipping sync') 28 return 29 } 30 // Instantiate a single new client instance that is reused for every file 31 const client = new Client(pdsUrl) 32 33 if (nuxt.options._prepare) return 34 35 nuxt.hook('build:before', async () => { 36 const { glob } = await import('tinyglobby') 37 const files: string[] = await glob(`${contentDir}/**/*.md`) 38 39 // INFO: Arbitrarily chosen concurrency limit, can be changed if needed 40 const concurrencyLimit = 5 41 for (let i = 0; i < files.length; i += concurrencyLimit) { 42 const batch = files.slice(i, i + concurrencyLimit) 43 // Process files in parallel 44 await Promise.all( 45 batch.map(file => 46 syncFile(file, NPMX_SITE, client).catch(error => 47 console.error(`[standard-site-sync] Error in ${file}:` + error), 48 ), 49 ), 50 ) 51 } 52 }) 53 54 nuxt.hook('builder:watch', async (event, path) => { 55 if (!path.endsWith('.md')) return 56 57 // Ignore deleted files 58 if (event === 'unlink') { 59 console.log(`[standard-site-sync] File deleted: ${path}`) 60 return 61 } 62 63 // Process add/change events only 64 await syncFile(resolve(nuxt.options.rootDir, path), NPMX_SITE, client).catch(err => 65 console.error(`[standard-site-sync] Failed ${path}:`, err), 66 ) 67 }) 68 }, 69}) 70 71/* 72 * INFO: Loads record to atproto and ensures uniqueness by checking the date the article is published 73 * publishedAt is an id that does not change 74 * Atomicity is enforced with upsert using publishedAt so we always update existing records instead of creating new ones 75 * Clock id(3) provides a deterministic ID 76 * WARN: DOES NOT CATCH ERRORS, THIS MUST BE HANDLED 77 */ 78const syncFile = async (filePath: string, siteUrl: string, client: Client) => { 79 const { data: frontmatter } = read(filePath) 80 81 // Schema expects 'path' & frontmatter provides 'slug' 82 const normalizedFrontmatter = { 83 ...frontmatter, 84 path: typeof frontmatter.slug === 'string' ? `/blog/${frontmatter.slug}` : frontmatter.path, 85 } 86 87 const result = safeParse(BlogPostSchema, normalizedFrontmatter) 88 if (!result.success) { 89 console.warn(`[standard-site-sync] Validation failed for ${filePath}`, result.issues) 90 return 91 } 92 93 const data = result.output 94 95 // filter drafts 96 if (data.draft) { 97 if (process.env.DEBUG === 'true') { 98 console.debug(`[standard-site-sync] Skipping draft: ${data.path}`) 99 } 100 return 101 } 102 103 // Keys are sorted to provide a more stable hash 104 const hash = createHash('sha256') 105 .update(JSON.stringify(data, Object.keys(data).sort())) 106 .digest('hex') 107 108 if (syncedDocuments.get(data.path) === hash) { 109 return 110 } 111 112 const document = site.standard.document.$build({ 113 site: siteUrl as `${string}:${string}`, 114 path: data.path, 115 title: data.title, 116 description: data.description ?? data.excerpt, 117 tags: data.tags, 118 // This can be extended to update the site.standard.document .updatedAt if it is changed and use the posts date here 119 publishedAt: new Date(data.date).toISOString(), 120 }) 121 122 const dateInMicroSeconds = new Date(result.output.date).getTime() * DATE_TO_MICROSECONDS 123 124 // Clock id(3) needs to be the same everytime to get the same TID from a timestamp 125 const tid = TID.fromTime(dateInMicroSeconds, CLOCK_ID_THREE) 126 127 // client.put is async and needs to be awaited 128 await client.put(site.standard.document, document, { 129 rkey: tid.str, 130 }) 131 132 syncedDocuments.set(data.path, hash) 133}