forked from
npmx.dev/npmx.dev
[READ-ONLY]
a fast, modern browser for the npm registry
1import { decodeHtmlEntities } from '~/utils/formatters'
2
3interface UseMarkdownOptions {
4 text: string
5 /** When true, renders link text without the anchor tag (useful when inside another link) */
6 plain?: boolean
7 /** Package name to strip from the beginning of the description (if present) */
8 packageName?: string
9}
10
11/** @public */
12export function useMarkdown(options: MaybeRefOrGetter<UseMarkdownOptions>) {
13 return computed(() => parseMarkdown(toValue(options)))
14}
15
16// Strip markdown image badges from text
17function stripMarkdownImages(text: string): string {
18 // Remove linked images: [](link-url) - handles incomplete URLs too
19 // Using {0,500} instead of * to prevent ReDoS on pathological inputs
20 text = text.replace(/\[!\[[^\]]{0,500}\]\([^)]{0,2000}\)\]\([^)]{0,2000}\)?/g, '')
21 // Remove standalone images: 
22 text = text.replace(/!\[[^\]]{0,500}\]\([^)]{0,2000}\)/g, '')
23 // Remove any leftover empty links or broken markdown link syntax
24 text = text.replace(/\[\]\([^)]{0,2000}\)?/g, '')
25 return text.trim()
26}
27
28// Strip HTML tags and escape remaining HTML to prevent XSS
29function stripAndEscapeHtml(text: string, packageName?: string): string {
30 // First decode any HTML entities in the input
31 let stripped = decodeHtmlEntities(text)
32
33 // Then strip markdown image badges
34 stripped = stripMarkdownImages(stripped)
35
36 // Then strip actual HTML tags (keep their text content)
37 // Only match tags that start with a letter or / (to avoid matching things like "a < b > c")
38 stripped = stripped.replace(/<\/?[a-z][^>]*>/gi, '')
39
40 // Strip HTML comments: <!-- ... --> (including unclosed comments from truncation)
41 stripped = stripped.replace(/<!--[\s\S]*?(-->|$)/g, '')
42
43 if (packageName) {
44 // Trim first to handle leading/trailing whitespace from stripped HTML
45 stripped = stripped.trim()
46 // Collapse multiple whitespace into single space
47 stripped = stripped.replace(/\s+/g, ' ')
48 // Escape special regex characters in package name
49 const escapedName = packageName.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')
50 // Match package name at the start, optionally followed by: space, dash, colon, hyphen, or just space
51 const namePattern = new RegExp(`^${escapedName}\\s*[-:—]?\\s*`, 'i')
52 stripped = stripped.replace(namePattern, '').trim()
53 }
54
55 // Then escape any remaining HTML entities
56 return stripped
57 .replace(/&/g, '&')
58 .replace(/</g, '<')
59 .replace(/>/g, '>')
60 .replace(/"/g, '"')
61 .replace(/'/g, ''')
62}
63
64// Parse simple inline markdown to HTML
65function parseMarkdown({ text, packageName, plain }: UseMarkdownOptions): string {
66 if (!text) return ''
67
68 // First strip HTML tags and escape remaining HTML
69 let html = stripAndEscapeHtml(text, packageName)
70
71 // Bold: **text** or __text__
72 html = html.replace(/\*\*(.+?)\*\*/g, '<strong>$1</strong>')
73 html = html.replace(/__(.+?)__/g, '<strong>$1</strong>')
74
75 // Italic: *text* or _text_
76 html = html.replace(/(?<!\*)\*(?!\*)(.+?)(?<!\*)\*(?!\*)/g, '<em>$1</em>')
77 html = html.replace(/\b_(.+?)_\b/g, '<em>$1</em>')
78
79 // Inline code: `code`
80 html = html.replace(/`([^`]+)`/g, '<code>$1</code>')
81
82 // Strikethrough: ~~text~~
83 html = html.replace(/~~(.+?)~~/g, '<del>$1</del>')
84
85 // Links: [text](url) - only allow https, mailto
86 html = html.replace(/\[([^\]]+)\]\(([^)]+)\)/g, (_match, text, url) => {
87 // In plain mode, just render the link text without the anchor
88 if (plain) {
89 return text
90 }
91 const decodedUrl = url.replace(/&/g, '&')
92 try {
93 const { protocol, href } = new URL(decodedUrl)
94 if (['https:', 'mailto:'].includes(protocol)) {
95 const safeUrl = href.replace(/"/g, '"')
96 return `<a href="${safeUrl}" rel="nofollow noreferrer noopener" target="_blank">${text}</a>`
97 }
98 } catch {}
99 return `${text} (${url})`
100 })
101
102 return html
103}