[READ-ONLY] a fast, modern browser for the npm registry
at main 540 lines 17 kB view raw
1import { marked, type Tokens } from 'marked' 2import sanitizeHtml from 'sanitize-html' 3import { hasProtocol } from 'ufo' 4import type { ReadmeResponse, TocItem } from '#shared/types/readme' 5import { convertBlobOrFileToRawUrl, type RepositoryInfo } from '#shared/utils/git-providers' 6import { highlightCodeSync } from './shiki' 7import { convertToEmoji } from '#shared/utils/emoji' 8 9/** 10 * Playground provider configuration 11 */ 12interface PlaygroundProvider { 13 id: string // Provider identifier 14 name: string 15 domains: string[] // Associated domains 16 path?: string 17 icon?: string // Provider icon name 18} 19 20/** 21 * Known playground/demo providers 22 */ 23const PLAYGROUND_PROVIDERS: PlaygroundProvider[] = [ 24 { 25 id: 'stackblitz', 26 name: 'StackBlitz', 27 domains: ['stackblitz.com', 'stackblitz.io'], 28 icon: 'stackblitz', 29 }, 30 { 31 id: 'codesandbox', 32 name: 'CodeSandbox', 33 domains: ['codesandbox.io', 'githubbox.com', 'csb.app'], 34 icon: 'codesandbox', 35 }, 36 { 37 id: 'codepen', 38 name: 'CodePen', 39 domains: ['codepen.io'], 40 icon: 'codepen', 41 }, 42 { 43 id: 'jsfiddle', 44 name: 'JSFiddle', 45 domains: ['jsfiddle.net'], 46 icon: 'jsfiddle', 47 }, 48 { 49 id: 'replit', 50 name: 'Replit', 51 domains: ['repl.it', 'replit.com'], 52 icon: 'replit', 53 }, 54 { 55 id: 'gitpod', 56 name: 'Gitpod', 57 domains: ['gitpod.io'], 58 icon: 'gitpod', 59 }, 60 { 61 id: 'vue-playground', 62 name: 'Vue Playground', 63 domains: ['play.vuejs.org', 'sfc.vuejs.org'], 64 icon: 'vue', 65 }, 66 { 67 id: 'nuxt-new', 68 name: 'Nuxt Starter', 69 domains: ['nuxt.new'], 70 icon: 'nuxt', 71 }, 72 { 73 id: 'vite-new', 74 name: 'Vite Starter', 75 domains: ['vite.new'], 76 icon: 'vite', 77 }, 78 { 79 id: 'typescript-playground', 80 name: 'TypeScript Playground', 81 domains: ['typescriptlang.org'], 82 path: '/play', 83 icon: 'typescript', 84 }, 85] 86 87/** 88 * Check if a URL is a playground link and return provider info 89 */ 90function matchPlaygroundProvider(url: string): PlaygroundProvider | null { 91 try { 92 const parsed = new URL(url) 93 const hostname = parsed.hostname.toLowerCase() 94 95 for (const provider of PLAYGROUND_PROVIDERS) { 96 for (const domain of provider.domains) { 97 if ( 98 (hostname === domain || hostname.endsWith(`.${domain}`)) && 99 (!provider.path || parsed.pathname.startsWith(provider.path)) 100 ) { 101 return provider 102 } 103 } 104 } 105 } catch { 106 // Invalid URL 107 } 108 return null 109} 110 111// allow h1-h6, but replace h1-h2 later since we shift README headings down by 2 levels 112// (page h1 = package name, h2 = "Readme" section, so README h1 → h3) 113const ALLOWED_TAGS = [ 114 'h1', 115 'h2', 116 'h3', 117 'h4', 118 'h5', 119 'h6', 120 'p', 121 'br', 122 'hr', 123 'ul', 124 'ol', 125 'li', 126 'blockquote', 127 'pre', 128 'code', 129 'a', 130 'strong', 131 'em', 132 'del', 133 's', 134 'table', 135 'thead', 136 'tbody', 137 'tr', 138 'th', 139 'td', 140 'img', 141 'picture', 142 'source', 143 'details', 144 'summary', 145 'div', 146 'span', 147 'sup', 148 'sub', 149 'kbd', 150 'mark', 151 'button', 152] 153 154const ALLOWED_ATTR: Record<string, string[]> = { 155 '*': ['id'], // Allow id on all tags 156 'a': ['href', 'title', 'target', 'rel'], 157 'img': ['src', 'alt', 'title', 'width', 'height', 'align'], 158 'source': ['src', 'srcset', 'type', 'media'], 159 'button': ['class', 'title', 'type', 'aria-label', 'data-copy'], 160 'th': ['colspan', 'rowspan', 'align', 'valign', 'width'], 161 'td': ['colspan', 'rowspan', 'align', 'valign', 'width'], 162 'h3': ['data-level', 'align'], 163 'h4': ['data-level', 'align'], 164 'h5': ['data-level', 'align'], 165 'h6': ['data-level', 'align'], 166 'blockquote': ['data-callout'], 167 'details': ['open'], 168 'code': ['class'], 169 'pre': ['class', 'style'], 170 'span': ['class', 'style'], 171 'div': ['class', 'style', 'align'], 172 'p': ['align'], 173} 174 175// GitHub-style callout types 176// Format: > [!NOTE], > [!TIP], > [!IMPORTANT], > [!WARNING], > [!CAUTION] 177 178/** 179 * Generate a GitHub-style slug from heading text. 180 * - Convert to lowercase 181 * - Remove HTML tags 182 * - Replace spaces with hyphens 183 * - Remove special characters (keep alphanumeric, hyphens, underscores) 184 * - Collapse multiple hyphens 185 */ 186function slugify(text: string): string { 187 return text 188 .replace(/<[^>]*>/g, '') // Strip HTML tags 189 .toLowerCase() 190 .trim() 191 .replace(/\s+/g, '-') // Spaces to hyphens 192 .replace(/[^\w\u4e00-\u9fff\u3040-\u309f\u30a0-\u30ff-]/g, '') // Keep alphanumeric, CJK, hyphens 193 .replace(/-+/g, '-') // Collapse multiple hyphens 194 .replace(/^-|-$/g, '') // Trim leading/trailing hyphens 195} 196 197/** These path on npmjs.com don't belong to packages or search, so we shouldn't try to replace them with npmx.dev urls */ 198const reservedPathsNpmJs = [ 199 'products', 200 'login', 201 'signup', 202 'advisories', 203 'blog', 204 'about', 205 'press', 206 'policies', 207] 208 209const isNpmJsUrlThatCanBeRedirected = (url: URL) => { 210 if (url.host !== 'www.npmjs.com' && url.host !== 'npmjs.com') { 211 return false 212 } 213 214 if ( 215 url.pathname === '/' || 216 reservedPathsNpmJs.some(path => url.pathname.startsWith(`/${path}`)) 217 ) { 218 return false 219 } 220 221 return true 222} 223 224/** 225 * Resolve a relative URL to an absolute URL. 226 * If repository info is available, resolve to provider's raw file URLs. 227 * For markdown files (.md), use blob URLs so they render properly. 228 * Otherwise, fall back to jsdelivr CDN (except for .md files which are left unchanged). 229 */ 230function resolveUrl(url: string, packageName: string, repoInfo?: RepositoryInfo): string { 231 if (!url) return url 232 if (url.startsWith('#')) { 233 // Prefix anchor links to match heading IDs (avoids collision with page IDs) 234 return `#user-content-${url.slice(1)}` 235 } 236 if (hasProtocol(url, { acceptRelative: true })) { 237 try { 238 const parsed = new URL(url, 'https://example.com') 239 if (parsed.protocol === 'http:' || parsed.protocol === 'https:') { 240 // Redirect npmjs urls to ourself 241 if (isNpmJsUrlThatCanBeRedirected(parsed)) { 242 return parsed.pathname + parsed.search + parsed.hash 243 } 244 return url 245 } 246 } catch { 247 // Invalid URL, fall through to resolve as relative 248 } 249 // return protocol-relative URLs (//example.com) as-is 250 if (url.startsWith('//')) { 251 return url 252 } 253 // for non-HTTP protocols (javascript:, data:, etc.), don't return, treat as relative 254 } 255 256 // Check if this is a markdown file link 257 const isMarkdownFile = /\.md$/i.test(url.split('?')[0]?.split('#')[0] ?? '') 258 259 // Use provider's URL base when repository info is available 260 // This handles assets that exist in the repo but not in the npm tarball 261 if (repoInfo?.rawBaseUrl) { 262 // Normalize the relative path (remove leading ./) 263 let relativePath = url.replace(/^\.\//, '') 264 265 // If package is in a subdirectory, resolve relative paths from there 266 // e.g., for packages/ai with ./assets/hero.gif → packages/ai/assets/hero.gif 267 // but for ../../.github/assets/banner.jpg → resolve relative to subdirectory 268 if (repoInfo.directory) { 269 // Split directory into parts for relative path resolution 270 const dirParts = repoInfo.directory.split('/').filter(Boolean) 271 272 // Handle ../ navigation 273 while (relativePath.startsWith('../')) { 274 relativePath = relativePath.slice(3) 275 dirParts.pop() 276 } 277 278 // Reconstruct the path 279 if (dirParts.length > 0) { 280 relativePath = `${dirParts.join('/')}/${relativePath}` 281 } 282 } 283 284 // For markdown files, use blob URL so they render on the provider's site 285 // For other files, use raw URL for direct access 286 const baseUrl = isMarkdownFile ? repoInfo.blobBaseUrl : repoInfo.rawBaseUrl 287 return `${baseUrl}/${relativePath}` 288 } 289 290 // For markdown files without repo info, leave unchanged (like npm does) 291 // This avoids 404s from jsdelivr which doesn't render markdown 292 if (isMarkdownFile) { 293 return url 294 } 295 296 // Fallback: relative URLs → jsdelivr CDN (may 404 if asset not in npm tarball) 297 return `https://cdn.jsdelivr.net/npm/${packageName}/${url.replace(/^\.\//, '')}` 298} 299 300// Convert blob/src URLs to raw URLs for images across all providers 301// e.g. https://github.com/nuxt/nuxt/blob/main/.github/assets/banner.svg 302// → https://github.com/nuxt/nuxt/raw/main/.github/assets/banner.svg 303function resolveImageUrl(url: string, packageName: string, repoInfo?: RepositoryInfo): string { 304 const resolved = resolveUrl(url, packageName, repoInfo) 305 if (repoInfo?.provider) { 306 return convertBlobOrFileToRawUrl(resolved, repoInfo.provider) 307 } 308 return resolved 309} 310 311// Helper to prefix id attributes with 'user-content-' 312function prefixId(tagName: string, attribs: sanitizeHtml.Attributes) { 313 if (attribs.id && !attribs.id.startsWith('user-content-')) { 314 attribs.id = `user-content-${attribs.id}` 315 } 316 return { tagName, attribs } 317} 318 319// README h1 always becomes h3 320// For deeper levels, ensure sequential order 321// Don't allow jumping more than 1 level deeper than previous 322function calculateSemanticDepth(depth: number, lastSemanticLevel: number) { 323 if (depth === 1) return 3 324 const maxAllowed = Math.min(lastSemanticLevel + 1, 6) 325 return Math.min(depth + 2, maxAllowed) 326} 327 328export async function renderReadmeHtml( 329 content: string, 330 packageName: string, 331 repoInfo?: RepositoryInfo, 332): Promise<ReadmeResponse> { 333 if (!content) return { html: '', playgroundLinks: [], toc: [] } 334 335 const shiki = await getShikiHighlighter() 336 const renderer = new marked.Renderer() 337 338 // Collect playground links during parsing 339 const collectedLinks: PlaygroundLink[] = [] 340 const seenUrls = new Set<string>() 341 342 // Collect table of contents items during parsing 343 const toc: TocItem[] = [] 344 345 // Track used heading slugs to handle duplicates (GitHub-style: foo, foo-1, foo-2) 346 const usedSlugs = new Map<string, number>() 347 348 // Track heading hierarchy to ensure sequential order for accessibility 349 // Page h1 = package name, h2 = "Readme" section heading 350 // So README starts at h3, and we ensure no levels are skipped 351 // Visual styling preserved via data-level attribute (original depth) 352 let lastSemanticLevel = 2 // Start after h2 (the "Readme" section heading) 353 renderer.heading = function ({ tokens, depth }: Tokens.Heading) { 354 // Calculate the target semantic level based on document structure 355 // Start at h3 (since page h1 + section h2 already exist) 356 // But ensure we never skip levels - can only go down by 1 or stay same/go up 357 const semanticLevel = calculateSemanticDepth(depth, lastSemanticLevel) 358 lastSemanticLevel = semanticLevel 359 const text = this.parser.parseInline(tokens) 360 361 // Generate GitHub-style slug for anchor links 362 let slug = slugify(text) 363 if (!slug) slug = 'heading' // Fallback for empty headings 364 365 // Handle duplicate slugs (GitHub-style: foo, foo-1, foo-2) 366 const count = usedSlugs.get(slug) ?? 0 367 usedSlugs.set(slug, count + 1) 368 const uniqueSlug = count === 0 ? slug : `${slug}-${count}` 369 370 // Prefix with 'user-content-' to avoid collisions with page IDs 371 // (e.g., #install, #dependencies, #versions are used by the package page) 372 const id = `user-content-${uniqueSlug}` 373 374 // Collect TOC item with plain text (HTML stripped) 375 const plainText = text.replace(/<[^>]*>/g, '').trim() 376 if (plainText) { 377 toc.push({ text: plainText, id, depth }) 378 } 379 380 /** The link href uses the unique slug WITHOUT the 'user-content-' prefix, because that will later be added for all links. */ 381 return `<h${semanticLevel} id="${id}" data-level="${depth}"><a href="#${uniqueSlug}">${plainText}</a></h${semanticLevel}>\n` 382 } 383 384 // Syntax highlighting for code blocks (uses shared highlighter) 385 renderer.code = ({ text, lang }: Tokens.Code) => { 386 const html = highlightCodeSync(shiki, text, lang || 'text') 387 // Add copy button 388 return `<div class="readme-code-block" > 389<button type="button" class="readme-copy-button" aria-label="Copy code" check-icon="i-lucide:check" copy-icon="i-lucide:copy" data-copy> 390<span class="i-lucide:copy" aria-hidden="true"></span> 391<span class="sr-only">Copy code</span> 392</button> 393${html} 394</div>` 395 } 396 397 // Resolve image URLs (with GitHub blob → raw conversion) 398 renderer.image = ({ href, title, text }: Tokens.Image) => { 399 const resolvedHref = resolveImageUrl(href, packageName, repoInfo) 400 const titleAttr = title ? ` title="${title}"` : '' 401 const altAttr = text ? ` alt="${text}"` : '' 402 return `<img src="${resolvedHref}"${altAttr}${titleAttr}>` 403 } 404 405 // // Resolve link URLs, add security attributes, and collect playground links 406 renderer.link = function ({ href, title, tokens }: Tokens.Link) { 407 const text = this.parser.parseInline(tokens) 408 const titleAttr = title ? ` title="${title}"` : '' 409 let plainText = text.replace(/<[^>]*>/g, '').trim() 410 411 // If plain text is empty, check if we have an image with alt text 412 if (!plainText && tokens.length === 1 && tokens[0]?.type === 'image') { 413 plainText = tokens[0].text 414 } 415 416 const intermediateTitleAttr = 417 plainText || title ? ` data-title-intermediate="${plainText || title}"` : '' 418 419 return `<a href="${href}"${titleAttr}${intermediateTitleAttr}>${text}</a>` 420 } 421 422 // GitHub-style callouts: > [!NOTE], > [!TIP], etc. 423 renderer.blockquote = function ({ tokens }: Tokens.Blockquote) { 424 const body = this.parser.parse(tokens) 425 426 const calloutMatch = body.match(/^<p>\[!(NOTE|TIP|IMPORTANT|WARNING|CAUTION)\](?:<br>)?\s*/i) 427 428 if (calloutMatch?.[1]) { 429 const calloutType = calloutMatch[1].toLowerCase() 430 const cleanedBody = body.replace(calloutMatch[0], '<p>') 431 return `<blockquote data-callout="${calloutType}">${cleanedBody}</blockquote>\n` 432 } 433 434 return `<blockquote>${body}</blockquote>\n` 435 } 436 437 marked.setOptions({ renderer }) 438 439 const rawHtml = marked.parse(content) as string 440 441 const sanitized = sanitizeHtml(rawHtml, { 442 allowedTags: ALLOWED_TAGS, 443 allowedAttributes: ALLOWED_ATTR, 444 allowedSchemes: ['http', 'https', 'mailto'], 445 // Transform img src URLs (GitHub blob → raw, relative → GitHub raw) 446 transformTags: { 447 h1: (_, attribs) => { 448 return { tagName: 'h3', attribs: { ...attribs, 'data-level': '1' } } 449 }, 450 h2: (_, attribs) => { 451 return { tagName: 'h4', attribs: { ...attribs, 'data-level': '2' } } 452 }, 453 h3: (_, attribs) => { 454 if (attribs['data-level']) return { tagName: 'h3', attribs: attribs } 455 return { tagName: 'h5', attribs: { ...attribs, 'data-level': '3' } } 456 }, 457 h4: (_, attribs) => { 458 if (attribs['data-level']) return { tagName: 'h4', attribs: attribs } 459 return { tagName: 'h6', attribs: { ...attribs, 'data-level': '4' } } 460 }, 461 h5: (_, attribs) => { 462 if (attribs['data-level']) return { tagName: 'h5', attribs: attribs } 463 return { tagName: 'h6', attribs: { ...attribs, 'data-level': '5' } } 464 }, 465 h6: (_, attribs) => { 466 if (attribs['data-level']) return { tagName: 'h6', attribs: attribs } 467 return { tagName: 'h6', attribs: { ...attribs, 'data-level': '6' } } 468 }, 469 img: (tagName, attribs) => { 470 if (attribs.src) { 471 attribs.src = resolveImageUrl(attribs.src, packageName, repoInfo) 472 } 473 return { tagName, attribs } 474 }, 475 source: (tagName, attribs) => { 476 if (attribs.src) { 477 attribs.src = resolveImageUrl(attribs.src, packageName, repoInfo) 478 } 479 if (attribs.srcset) { 480 attribs.srcset = attribs.srcset 481 .split(',') 482 .map(entry => { 483 const parts = entry.trim().split(/\s+/) 484 const url = parts[0] 485 if (!url) return entry.trim() 486 const descriptor = parts[1] 487 const resolvedUrl = resolveImageUrl(url, packageName, repoInfo) 488 return descriptor ? `${resolvedUrl} ${descriptor}` : resolvedUrl 489 }) 490 .join(', ') 491 } 492 return { tagName, attribs } 493 }, 494 a: (tagName, attribs) => { 495 if (!attribs.href) { 496 return { tagName, attribs } 497 } 498 499 const resolvedHref = resolveUrl(attribs.href, packageName, repoInfo) 500 501 const provider = matchPlaygroundProvider(resolvedHref) 502 if (provider && !seenUrls.has(resolvedHref)) { 503 seenUrls.add(resolvedHref) 504 505 collectedLinks.push({ 506 url: resolvedHref, 507 provider: provider.id, 508 providerName: provider.name, 509 /** 510 * We need to set some data attribute before hand because `transformTags` doesn't 511 * provide the text of the element. This will automatically be removed, because there 512 * is an allow list for link attributes. 513 * */ 514 label: attribs['data-title-intermediate'] || provider.name, 515 }) 516 } 517 518 // Add security attributes for external links 519 if (resolvedHref && hasProtocol(resolvedHref, { acceptRelative: true })) { 520 attribs.rel = 'nofollow noreferrer noopener' 521 attribs.target = '_blank' 522 } 523 attribs.href = resolvedHref 524 return { tagName, attribs } 525 }, 526 div: prefixId, 527 p: prefixId, 528 span: prefixId, 529 section: prefixId, 530 article: prefixId, 531 }, 532 }) 533 534 return { 535 html: convertToEmoji(sanitized), 536 mdExists: Boolean(content), 537 playgroundLinks: collectedLinks, 538 toc, 539 } 540}