forked from
npmx.dev/npmx.dev
[READ-ONLY]
a fast, modern browser for the npm registry
1import { marked, type Tokens } from 'marked'
2import sanitizeHtml from 'sanitize-html'
3import { hasProtocol } from 'ufo'
4import type { ReadmeResponse, TocItem } from '#shared/types/readme'
5import { convertBlobOrFileToRawUrl, type RepositoryInfo } from '#shared/utils/git-providers'
6import { highlightCodeSync } from './shiki'
7import { convertToEmoji } from '#shared/utils/emoji'
8
9/**
10 * Playground provider configuration
11 */
12interface PlaygroundProvider {
13 id: string // Provider identifier
14 name: string
15 domains: string[] // Associated domains
16 path?: string
17 icon?: string // Provider icon name
18}
19
20/**
21 * Known playground/demo providers
22 */
23const PLAYGROUND_PROVIDERS: PlaygroundProvider[] = [
24 {
25 id: 'stackblitz',
26 name: 'StackBlitz',
27 domains: ['stackblitz.com', 'stackblitz.io'],
28 icon: 'stackblitz',
29 },
30 {
31 id: 'codesandbox',
32 name: 'CodeSandbox',
33 domains: ['codesandbox.io', 'githubbox.com', 'csb.app'],
34 icon: 'codesandbox',
35 },
36 {
37 id: 'codepen',
38 name: 'CodePen',
39 domains: ['codepen.io'],
40 icon: 'codepen',
41 },
42 {
43 id: 'jsfiddle',
44 name: 'JSFiddle',
45 domains: ['jsfiddle.net'],
46 icon: 'jsfiddle',
47 },
48 {
49 id: 'replit',
50 name: 'Replit',
51 domains: ['repl.it', 'replit.com'],
52 icon: 'replit',
53 },
54 {
55 id: 'gitpod',
56 name: 'Gitpod',
57 domains: ['gitpod.io'],
58 icon: 'gitpod',
59 },
60 {
61 id: 'vue-playground',
62 name: 'Vue Playground',
63 domains: ['play.vuejs.org', 'sfc.vuejs.org'],
64 icon: 'vue',
65 },
66 {
67 id: 'nuxt-new',
68 name: 'Nuxt Starter',
69 domains: ['nuxt.new'],
70 icon: 'nuxt',
71 },
72 {
73 id: 'vite-new',
74 name: 'Vite Starter',
75 domains: ['vite.new'],
76 icon: 'vite',
77 },
78 {
79 id: 'typescript-playground',
80 name: 'TypeScript Playground',
81 domains: ['typescriptlang.org'],
82 path: '/play',
83 icon: 'typescript',
84 },
85]
86
87/**
88 * Check if a URL is a playground link and return provider info
89 */
90function matchPlaygroundProvider(url: string): PlaygroundProvider | null {
91 try {
92 const parsed = new URL(url)
93 const hostname = parsed.hostname.toLowerCase()
94
95 for (const provider of PLAYGROUND_PROVIDERS) {
96 for (const domain of provider.domains) {
97 if (
98 (hostname === domain || hostname.endsWith(`.${domain}`)) &&
99 (!provider.path || parsed.pathname.startsWith(provider.path))
100 ) {
101 return provider
102 }
103 }
104 }
105 } catch {
106 // Invalid URL
107 }
108 return null
109}
110
111// allow h1-h6, but replace h1-h2 later since we shift README headings down by 2 levels
112// (page h1 = package name, h2 = "Readme" section, so README h1 → h3)
113const ALLOWED_TAGS = [
114 'h1',
115 'h2',
116 'h3',
117 'h4',
118 'h5',
119 'h6',
120 'p',
121 'br',
122 'hr',
123 'ul',
124 'ol',
125 'li',
126 'blockquote',
127 'pre',
128 'code',
129 'a',
130 'strong',
131 'em',
132 'del',
133 's',
134 'table',
135 'thead',
136 'tbody',
137 'tr',
138 'th',
139 'td',
140 'img',
141 'picture',
142 'source',
143 'details',
144 'summary',
145 'div',
146 'span',
147 'sup',
148 'sub',
149 'kbd',
150 'mark',
151 'button',
152]
153
154const ALLOWED_ATTR: Record<string, string[]> = {
155 '*': ['id'], // Allow id on all tags
156 'a': ['href', 'title', 'target', 'rel'],
157 'img': ['src', 'alt', 'title', 'width', 'height', 'align'],
158 'source': ['src', 'srcset', 'type', 'media'],
159 'button': ['class', 'title', 'type', 'aria-label', 'data-copy'],
160 'th': ['colspan', 'rowspan', 'align', 'valign', 'width'],
161 'td': ['colspan', 'rowspan', 'align', 'valign', 'width'],
162 'h3': ['data-level', 'align'],
163 'h4': ['data-level', 'align'],
164 'h5': ['data-level', 'align'],
165 'h6': ['data-level', 'align'],
166 'blockquote': ['data-callout'],
167 'details': ['open'],
168 'code': ['class'],
169 'pre': ['class', 'style'],
170 'span': ['class', 'style'],
171 'div': ['class', 'style', 'align'],
172 'p': ['align'],
173}
174
175// GitHub-style callout types
176// Format: > [!NOTE], > [!TIP], > [!IMPORTANT], > [!WARNING], > [!CAUTION]
177
178/**
179 * Generate a GitHub-style slug from heading text.
180 * - Convert to lowercase
181 * - Remove HTML tags
182 * - Replace spaces with hyphens
183 * - Remove special characters (keep alphanumeric, hyphens, underscores)
184 * - Collapse multiple hyphens
185 */
186function slugify(text: string): string {
187 return text
188 .replace(/<[^>]*>/g, '') // Strip HTML tags
189 .toLowerCase()
190 .trim()
191 .replace(/\s+/g, '-') // Spaces to hyphens
192 .replace(/[^\w\u4e00-\u9fff\u3040-\u309f\u30a0-\u30ff-]/g, '') // Keep alphanumeric, CJK, hyphens
193 .replace(/-+/g, '-') // Collapse multiple hyphens
194 .replace(/^-|-$/g, '') // Trim leading/trailing hyphens
195}
196
197/** These path on npmjs.com don't belong to packages or search, so we shouldn't try to replace them with npmx.dev urls */
198const reservedPathsNpmJs = [
199 'products',
200 'login',
201 'signup',
202 'advisories',
203 'blog',
204 'about',
205 'press',
206 'policies',
207]
208
209const isNpmJsUrlThatCanBeRedirected = (url: URL) => {
210 if (url.host !== 'www.npmjs.com' && url.host !== 'npmjs.com') {
211 return false
212 }
213
214 if (
215 url.pathname === '/' ||
216 reservedPathsNpmJs.some(path => url.pathname.startsWith(`/${path}`))
217 ) {
218 return false
219 }
220
221 return true
222}
223
224/**
225 * Resolve a relative URL to an absolute URL.
226 * If repository info is available, resolve to provider's raw file URLs.
227 * For markdown files (.md), use blob URLs so they render properly.
228 * Otherwise, fall back to jsdelivr CDN (except for .md files which are left unchanged).
229 */
230function resolveUrl(url: string, packageName: string, repoInfo?: RepositoryInfo): string {
231 if (!url) return url
232 if (url.startsWith('#')) {
233 // Prefix anchor links to match heading IDs (avoids collision with page IDs)
234 return `#user-content-${url.slice(1)}`
235 }
236 if (hasProtocol(url, { acceptRelative: true })) {
237 try {
238 const parsed = new URL(url, 'https://example.com')
239 if (parsed.protocol === 'http:' || parsed.protocol === 'https:') {
240 // Redirect npmjs urls to ourself
241 if (isNpmJsUrlThatCanBeRedirected(parsed)) {
242 return parsed.pathname + parsed.search + parsed.hash
243 }
244 return url
245 }
246 } catch {
247 // Invalid URL, fall through to resolve as relative
248 }
249 // return protocol-relative URLs (//example.com) as-is
250 if (url.startsWith('//')) {
251 return url
252 }
253 // for non-HTTP protocols (javascript:, data:, etc.), don't return, treat as relative
254 }
255
256 // Check if this is a markdown file link
257 const isMarkdownFile = /\.md$/i.test(url.split('?')[0]?.split('#')[0] ?? '')
258
259 // Use provider's URL base when repository info is available
260 // This handles assets that exist in the repo but not in the npm tarball
261 if (repoInfo?.rawBaseUrl) {
262 // Normalize the relative path (remove leading ./)
263 let relativePath = url.replace(/^\.\//, '')
264
265 // If package is in a subdirectory, resolve relative paths from there
266 // e.g., for packages/ai with ./assets/hero.gif → packages/ai/assets/hero.gif
267 // but for ../../.github/assets/banner.jpg → resolve relative to subdirectory
268 if (repoInfo.directory) {
269 // Split directory into parts for relative path resolution
270 const dirParts = repoInfo.directory.split('/').filter(Boolean)
271
272 // Handle ../ navigation
273 while (relativePath.startsWith('../')) {
274 relativePath = relativePath.slice(3)
275 dirParts.pop()
276 }
277
278 // Reconstruct the path
279 if (dirParts.length > 0) {
280 relativePath = `${dirParts.join('/')}/${relativePath}`
281 }
282 }
283
284 // For markdown files, use blob URL so they render on the provider's site
285 // For other files, use raw URL for direct access
286 const baseUrl = isMarkdownFile ? repoInfo.blobBaseUrl : repoInfo.rawBaseUrl
287 return `${baseUrl}/${relativePath}`
288 }
289
290 // For markdown files without repo info, leave unchanged (like npm does)
291 // This avoids 404s from jsdelivr which doesn't render markdown
292 if (isMarkdownFile) {
293 return url
294 }
295
296 // Fallback: relative URLs → jsdelivr CDN (may 404 if asset not in npm tarball)
297 return `https://cdn.jsdelivr.net/npm/${packageName}/${url.replace(/^\.\//, '')}`
298}
299
300// Convert blob/src URLs to raw URLs for images across all providers
301// e.g. https://github.com/nuxt/nuxt/blob/main/.github/assets/banner.svg
302// → https://github.com/nuxt/nuxt/raw/main/.github/assets/banner.svg
303function resolveImageUrl(url: string, packageName: string, repoInfo?: RepositoryInfo): string {
304 const resolved = resolveUrl(url, packageName, repoInfo)
305 if (repoInfo?.provider) {
306 return convertBlobOrFileToRawUrl(resolved, repoInfo.provider)
307 }
308 return resolved
309}
310
311// Helper to prefix id attributes with 'user-content-'
312function prefixId(tagName: string, attribs: sanitizeHtml.Attributes) {
313 if (attribs.id && !attribs.id.startsWith('user-content-')) {
314 attribs.id = `user-content-${attribs.id}`
315 }
316 return { tagName, attribs }
317}
318
319// README h1 always becomes h3
320// For deeper levels, ensure sequential order
321// Don't allow jumping more than 1 level deeper than previous
322function calculateSemanticDepth(depth: number, lastSemanticLevel: number) {
323 if (depth === 1) return 3
324 const maxAllowed = Math.min(lastSemanticLevel + 1, 6)
325 return Math.min(depth + 2, maxAllowed)
326}
327
328export async function renderReadmeHtml(
329 content: string,
330 packageName: string,
331 repoInfo?: RepositoryInfo,
332): Promise<ReadmeResponse> {
333 if (!content) return { html: '', playgroundLinks: [], toc: [] }
334
335 const shiki = await getShikiHighlighter()
336 const renderer = new marked.Renderer()
337
338 // Collect playground links during parsing
339 const collectedLinks: PlaygroundLink[] = []
340 const seenUrls = new Set<string>()
341
342 // Collect table of contents items during parsing
343 const toc: TocItem[] = []
344
345 // Track used heading slugs to handle duplicates (GitHub-style: foo, foo-1, foo-2)
346 const usedSlugs = new Map<string, number>()
347
348 // Track heading hierarchy to ensure sequential order for accessibility
349 // Page h1 = package name, h2 = "Readme" section heading
350 // So README starts at h3, and we ensure no levels are skipped
351 // Visual styling preserved via data-level attribute (original depth)
352 let lastSemanticLevel = 2 // Start after h2 (the "Readme" section heading)
353 renderer.heading = function ({ tokens, depth }: Tokens.Heading) {
354 // Calculate the target semantic level based on document structure
355 // Start at h3 (since page h1 + section h2 already exist)
356 // But ensure we never skip levels - can only go down by 1 or stay same/go up
357 const semanticLevel = calculateSemanticDepth(depth, lastSemanticLevel)
358 lastSemanticLevel = semanticLevel
359 const text = this.parser.parseInline(tokens)
360
361 // Generate GitHub-style slug for anchor links
362 let slug = slugify(text)
363 if (!slug) slug = 'heading' // Fallback for empty headings
364
365 // Handle duplicate slugs (GitHub-style: foo, foo-1, foo-2)
366 const count = usedSlugs.get(slug) ?? 0
367 usedSlugs.set(slug, count + 1)
368 const uniqueSlug = count === 0 ? slug : `${slug}-${count}`
369
370 // Prefix with 'user-content-' to avoid collisions with page IDs
371 // (e.g., #install, #dependencies, #versions are used by the package page)
372 const id = `user-content-${uniqueSlug}`
373
374 // Collect TOC item with plain text (HTML stripped)
375 const plainText = text.replace(/<[^>]*>/g, '').trim()
376 if (plainText) {
377 toc.push({ text: plainText, id, depth })
378 }
379
380 /** The link href uses the unique slug WITHOUT the 'user-content-' prefix, because that will later be added for all links. */
381 return `<h${semanticLevel} id="${id}" data-level="${depth}"><a href="#${uniqueSlug}">${plainText}</a></h${semanticLevel}>\n`
382 }
383
384 // Syntax highlighting for code blocks (uses shared highlighter)
385 renderer.code = ({ text, lang }: Tokens.Code) => {
386 const html = highlightCodeSync(shiki, text, lang || 'text')
387 // Add copy button
388 return `<div class="readme-code-block" >
389<button type="button" class="readme-copy-button" aria-label="Copy code" check-icon="i-lucide:check" copy-icon="i-lucide:copy" data-copy>
390<span class="i-lucide:copy" aria-hidden="true"></span>
391<span class="sr-only">Copy code</span>
392</button>
393${html}
394</div>`
395 }
396
397 // Resolve image URLs (with GitHub blob → raw conversion)
398 renderer.image = ({ href, title, text }: Tokens.Image) => {
399 const resolvedHref = resolveImageUrl(href, packageName, repoInfo)
400 const titleAttr = title ? ` title="${title}"` : ''
401 const altAttr = text ? ` alt="${text}"` : ''
402 return `<img src="${resolvedHref}"${altAttr}${titleAttr}>`
403 }
404
405 // // Resolve link URLs, add security attributes, and collect playground links
406 renderer.link = function ({ href, title, tokens }: Tokens.Link) {
407 const text = this.parser.parseInline(tokens)
408 const titleAttr = title ? ` title="${title}"` : ''
409 let plainText = text.replace(/<[^>]*>/g, '').trim()
410
411 // If plain text is empty, check if we have an image with alt text
412 if (!plainText && tokens.length === 1 && tokens[0]?.type === 'image') {
413 plainText = tokens[0].text
414 }
415
416 const intermediateTitleAttr =
417 plainText || title ? ` data-title-intermediate="${plainText || title}"` : ''
418
419 return `<a href="${href}"${titleAttr}${intermediateTitleAttr}>${text}</a>`
420 }
421
422 // GitHub-style callouts: > [!NOTE], > [!TIP], etc.
423 renderer.blockquote = function ({ tokens }: Tokens.Blockquote) {
424 const body = this.parser.parse(tokens)
425
426 const calloutMatch = body.match(/^<p>\[!(NOTE|TIP|IMPORTANT|WARNING|CAUTION)\](?:<br>)?\s*/i)
427
428 if (calloutMatch?.[1]) {
429 const calloutType = calloutMatch[1].toLowerCase()
430 const cleanedBody = body.replace(calloutMatch[0], '<p>')
431 return `<blockquote data-callout="${calloutType}">${cleanedBody}</blockquote>\n`
432 }
433
434 return `<blockquote>${body}</blockquote>\n`
435 }
436
437 marked.setOptions({ renderer })
438
439 const rawHtml = marked.parse(content) as string
440
441 const sanitized = sanitizeHtml(rawHtml, {
442 allowedTags: ALLOWED_TAGS,
443 allowedAttributes: ALLOWED_ATTR,
444 allowedSchemes: ['http', 'https', 'mailto'],
445 // Transform img src URLs (GitHub blob → raw, relative → GitHub raw)
446 transformTags: {
447 h1: (_, attribs) => {
448 return { tagName: 'h3', attribs: { ...attribs, 'data-level': '1' } }
449 },
450 h2: (_, attribs) => {
451 return { tagName: 'h4', attribs: { ...attribs, 'data-level': '2' } }
452 },
453 h3: (_, attribs) => {
454 if (attribs['data-level']) return { tagName: 'h3', attribs: attribs }
455 return { tagName: 'h5', attribs: { ...attribs, 'data-level': '3' } }
456 },
457 h4: (_, attribs) => {
458 if (attribs['data-level']) return { tagName: 'h4', attribs: attribs }
459 return { tagName: 'h6', attribs: { ...attribs, 'data-level': '4' } }
460 },
461 h5: (_, attribs) => {
462 if (attribs['data-level']) return { tagName: 'h5', attribs: attribs }
463 return { tagName: 'h6', attribs: { ...attribs, 'data-level': '5' } }
464 },
465 h6: (_, attribs) => {
466 if (attribs['data-level']) return { tagName: 'h6', attribs: attribs }
467 return { tagName: 'h6', attribs: { ...attribs, 'data-level': '6' } }
468 },
469 img: (tagName, attribs) => {
470 if (attribs.src) {
471 attribs.src = resolveImageUrl(attribs.src, packageName, repoInfo)
472 }
473 return { tagName, attribs }
474 },
475 source: (tagName, attribs) => {
476 if (attribs.src) {
477 attribs.src = resolveImageUrl(attribs.src, packageName, repoInfo)
478 }
479 if (attribs.srcset) {
480 attribs.srcset = attribs.srcset
481 .split(',')
482 .map(entry => {
483 const parts = entry.trim().split(/\s+/)
484 const url = parts[0]
485 if (!url) return entry.trim()
486 const descriptor = parts[1]
487 const resolvedUrl = resolveImageUrl(url, packageName, repoInfo)
488 return descriptor ? `${resolvedUrl} ${descriptor}` : resolvedUrl
489 })
490 .join(', ')
491 }
492 return { tagName, attribs }
493 },
494 a: (tagName, attribs) => {
495 if (!attribs.href) {
496 return { tagName, attribs }
497 }
498
499 const resolvedHref = resolveUrl(attribs.href, packageName, repoInfo)
500
501 const provider = matchPlaygroundProvider(resolvedHref)
502 if (provider && !seenUrls.has(resolvedHref)) {
503 seenUrls.add(resolvedHref)
504
505 collectedLinks.push({
506 url: resolvedHref,
507 provider: provider.id,
508 providerName: provider.name,
509 /**
510 * We need to set some data attribute before hand because `transformTags` doesn't
511 * provide the text of the element. This will automatically be removed, because there
512 * is an allow list for link attributes.
513 * */
514 label: attribs['data-title-intermediate'] || provider.name,
515 })
516 }
517
518 // Add security attributes for external links
519 if (resolvedHref && hasProtocol(resolvedHref, { acceptRelative: true })) {
520 attribs.rel = 'nofollow noreferrer noopener'
521 attribs.target = '_blank'
522 }
523 attribs.href = resolvedHref
524 return { tagName, attribs }
525 },
526 div: prefixId,
527 p: prefixId,
528 span: prefixId,
529 section: prefixId,
530 article: prefixId,
531 },
532 })
533
534 return {
535 html: convertToEmoji(sanitized),
536 mdExists: Boolean(content),
537 playgroundLinks: collectedLinks,
538 toc,
539 }
540}