forked from
npmx.dev/npmx.dev
[READ-ONLY]
a fast, modern browser for the npm registry
1import process from 'node:process'
2import { createHash } from 'node:crypto'
3import { defineNuxtModule, useNuxt, createResolver } from 'nuxt/kit'
4import { safeParse } from 'valibot'
5import * as site from '../shared/types/lexicons/site'
6import { BlogPostSchema } from '../shared/schemas/blog'
7import { NPMX_SITE } from '../shared/utils/constants'
8import { read } from 'gray-matter'
9import { TID } from '@atproto/common'
10import { Client } from '@atproto/lex'
11
12const syncedDocuments = new Map<string, string>()
13const CLOCK_ID_THREE = 3
14const DATE_TO_MICROSECONDS = 1000
15
16// TODO: Currently logging quite a lot, can remove some later if we want
17export default defineNuxtModule({
18 meta: { name: 'standard-site-sync' },
19 async setup() {
20 const nuxt = useNuxt()
21 const { resolve } = createResolver(import.meta.url)
22 const contentDir = resolve('../app/pages/blog')
23
24 // Authentication with PDS using an app password
25 const pdsUrl = process.env.NPMX_PDS_URL
26 if (!pdsUrl) {
27 console.warn('[standard-site-sync] NPMX_PDS_URL not set, skipping sync')
28 return
29 }
30 // Instantiate a single new client instance that is reused for every file
31 const client = new Client(pdsUrl)
32
33 if (nuxt.options._prepare) return
34
35 nuxt.hook('build:before', async () => {
36 const { glob } = await import('tinyglobby')
37 const files: string[] = await glob(`${contentDir}/**/*.md`)
38
39 // INFO: Arbitrarily chosen concurrency limit, can be changed if needed
40 const concurrencyLimit = 5
41 for (let i = 0; i < files.length; i += concurrencyLimit) {
42 const batch = files.slice(i, i + concurrencyLimit)
43 // Process files in parallel
44 await Promise.all(
45 batch.map(file =>
46 syncFile(file, NPMX_SITE, client).catch(error =>
47 console.error(`[standard-site-sync] Error in ${file}:` + error),
48 ),
49 ),
50 )
51 }
52 })
53
54 nuxt.hook('builder:watch', async (event, path) => {
55 if (!path.endsWith('.md')) return
56
57 // Ignore deleted files
58 if (event === 'unlink') {
59 console.log(`[standard-site-sync] File deleted: ${path}`)
60 return
61 }
62
63 // Process add/change events only
64 await syncFile(resolve(nuxt.options.rootDir, path), NPMX_SITE, client).catch(err =>
65 console.error(`[standard-site-sync] Failed ${path}:`, err),
66 )
67 })
68 },
69})
70
71/*
72 * INFO: Loads record to atproto and ensures uniqueness by checking the date the article is published
73 * publishedAt is an id that does not change
74 * Atomicity is enforced with upsert using publishedAt so we always update existing records instead of creating new ones
75 * Clock id(3) provides a deterministic ID
76 * WARN: DOES NOT CATCH ERRORS, THIS MUST BE HANDLED
77 */
78const syncFile = async (filePath: string, siteUrl: string, client: Client) => {
79 const { data: frontmatter } = read(filePath)
80
81 // Schema expects 'path' & frontmatter provides 'slug'
82 const normalizedFrontmatter = {
83 ...frontmatter,
84 path: typeof frontmatter.slug === 'string' ? `/blog/${frontmatter.slug}` : frontmatter.path,
85 }
86
87 const result = safeParse(BlogPostSchema, normalizedFrontmatter)
88 if (!result.success) {
89 console.warn(`[standard-site-sync] Validation failed for ${filePath}`, result.issues)
90 return
91 }
92
93 const data = result.output
94
95 // filter drafts
96 if (data.draft) {
97 if (process.env.DEBUG === 'true') {
98 console.debug(`[standard-site-sync] Skipping draft: ${data.path}`)
99 }
100 return
101 }
102
103 // Keys are sorted to provide a more stable hash
104 const hash = createHash('sha256')
105 .update(JSON.stringify(data, Object.keys(data).sort()))
106 .digest('hex')
107
108 if (syncedDocuments.get(data.path) === hash) {
109 return
110 }
111
112 const document = site.standard.document.$build({
113 site: siteUrl as `${string}:${string}`,
114 path: data.path,
115 title: data.title,
116 description: data.description ?? data.excerpt,
117 tags: data.tags,
118 // This can be extended to update the site.standard.document .updatedAt if it is changed and use the posts date here
119 publishedAt: new Date(data.date).toISOString(),
120 })
121
122 const dateInMicroSeconds = new Date(result.output.date).getTime() * DATE_TO_MICROSECONDS
123
124 // Clock id(3) needs to be the same everytime to get the same TID from a timestamp
125 const tid = TID.fromTime(dateInMicroSeconds, CLOCK_ID_THREE)
126
127 // client.put is async and needs to be awaited
128 await client.put(site.standard.document, document, {
129 rkey: tid.str,
130 })
131
132 syncedDocuments.set(data.path, hash)
133}