[READ-ONLY] a fast, modern browser for the npm registry
at main 103 lines 3.9 kB view raw
1import { decodeHtmlEntities } from '~/utils/formatters' 2 3interface UseMarkdownOptions { 4 text: string 5 /** When true, renders link text without the anchor tag (useful when inside another link) */ 6 plain?: boolean 7 /** Package name to strip from the beginning of the description (if present) */ 8 packageName?: string 9} 10 11/** @public */ 12export function useMarkdown(options: MaybeRefOrGetter<UseMarkdownOptions>) { 13 return computed(() => parseMarkdown(toValue(options))) 14} 15 16// Strip markdown image badges from text 17function stripMarkdownImages(text: string): string { 18 // Remove linked images: [![alt](image-url)](link-url) - handles incomplete URLs too 19 // Using {0,500} instead of * to prevent ReDoS on pathological inputs 20 text = text.replace(/\[!\[[^\]]{0,500}\]\([^)]{0,2000}\)\]\([^)]{0,2000}\)?/g, '') 21 // Remove standalone images: ![alt](url) 22 text = text.replace(/!\[[^\]]{0,500}\]\([^)]{0,2000}\)/g, '') 23 // Remove any leftover empty links or broken markdown link syntax 24 text = text.replace(/\[\]\([^)]{0,2000}\)?/g, '') 25 return text.trim() 26} 27 28// Strip HTML tags and escape remaining HTML to prevent XSS 29function stripAndEscapeHtml(text: string, packageName?: string): string { 30 // First decode any HTML entities in the input 31 let stripped = decodeHtmlEntities(text) 32 33 // Then strip markdown image badges 34 stripped = stripMarkdownImages(stripped) 35 36 // Then strip actual HTML tags (keep their text content) 37 // Only match tags that start with a letter or / (to avoid matching things like "a < b > c") 38 stripped = stripped.replace(/<\/?[a-z][^>]*>/gi, '') 39 40 // Strip HTML comments: <!-- ... --> (including unclosed comments from truncation) 41 stripped = stripped.replace(/<!--[\s\S]*?(-->|$)/g, '') 42 43 if (packageName) { 44 // Trim first to handle leading/trailing whitespace from stripped HTML 45 stripped = stripped.trim() 46 // Collapse multiple whitespace into single space 47 stripped = stripped.replace(/\s+/g, ' ') 48 // Escape special regex characters in package name 49 const escapedName = packageName.replace(/[.*+?^${}()|[\]\\]/g, '\\$&') 50 // Match package name at the start, optionally followed by: space, dash, colon, hyphen, or just space 51 const namePattern = new RegExp(`^${escapedName}\\s*[-:—]?\\s*`, 'i') 52 stripped = stripped.replace(namePattern, '').trim() 53 } 54 55 // Then escape any remaining HTML entities 56 return stripped 57 .replace(/&/g, '&amp;') 58 .replace(/</g, '&lt;') 59 .replace(/>/g, '&gt;') 60 .replace(/"/g, '&quot;') 61 .replace(/'/g, '&#039;') 62} 63 64// Parse simple inline markdown to HTML 65function parseMarkdown({ text, packageName, plain }: UseMarkdownOptions): string { 66 if (!text) return '' 67 68 // First strip HTML tags and escape remaining HTML 69 let html = stripAndEscapeHtml(text, packageName) 70 71 // Bold: **text** or __text__ 72 html = html.replace(/\*\*(.+?)\*\*/g, '<strong>$1</strong>') 73 html = html.replace(/__(.+?)__/g, '<strong>$1</strong>') 74 75 // Italic: *text* or _text_ 76 html = html.replace(/(?<!\*)\*(?!\*)(.+?)(?<!\*)\*(?!\*)/g, '<em>$1</em>') 77 html = html.replace(/\b_(.+?)_\b/g, '<em>$1</em>') 78 79 // Inline code: `code` 80 html = html.replace(/`([^`]+)`/g, '<code>$1</code>') 81 82 // Strikethrough: ~~text~~ 83 html = html.replace(/~~(.+?)~~/g, '<del>$1</del>') 84 85 // Links: [text](url) - only allow https, mailto 86 html = html.replace(/\[([^\]]+)\]\(([^)]+)\)/g, (_match, text, url) => { 87 // In plain mode, just render the link text without the anchor 88 if (plain) { 89 return text 90 } 91 const decodedUrl = url.replace(/&amp;/g, '&') 92 try { 93 const { protocol, href } = new URL(decodedUrl) 94 if (['https:', 'mailto:'].includes(protocol)) { 95 const safeUrl = href.replace(/"/g, '&quot;') 96 return `<a href="${safeUrl}" rel="nofollow noreferrer noopener" target="_blank">${text}</a>` 97 } 98 } catch {} 99 return `${text} (${url})` 100 }) 101 102 return html 103}