app/composables/useMarkdown.ts at main · davidwindham.com/npmx.dev

davidwindham.com / npmx.dev
forked from npmx.dev/npmx.dev
fork atom
[READ-ONLY] a fast, modern browser for the npm registry
fork atom
npmx.dev / app / composables / useMarkdown.ts
at main 103 lines 3.9 kB view raw
wrap content
Alexander Lichter fix: remove html comments from package description and deprecation notices (#1397) 6w ago
a81faa9a
  1import { decodeHtmlEntities } from '~/utils/formatters'
  2
  3interface UseMarkdownOptions {
  4  text: string
  5  /** When true, renders link text without the anchor tag (useful when inside another link) */
  6  plain?: boolean
  7  /** Package name to strip from the beginning of the description (if present) */
  8  packageName?: string
  9}
 10
 11/** @public */
 12export function useMarkdown(options: MaybeRefOrGetter<UseMarkdownOptions>) {
 13  return computed(() => parseMarkdown(toValue(options)))
 14}
 15
 16// Strip markdown image badges from text
 17function stripMarkdownImages(text: string): string {
 18  // Remove linked images: [![alt](image-url)](link-url) - handles incomplete URLs too
 19  // Using {0,500} instead of * to prevent ReDoS on pathological inputs
 20  text = text.replace(/\[!\[[^\]]{0,500}\]\([^)]{0,2000}\)\]\([^)]{0,2000}\)?/g, '')
 21  // Remove standalone images: ![alt](url)
 22  text = text.replace(/!\[[^\]]{0,500}\]\([^)]{0,2000}\)/g, '')
 23  // Remove any leftover empty links or broken markdown link syntax
 24  text = text.replace(/\[\]\([^)]{0,2000}\)?/g, '')
 25  return text.trim()
 26}
 27
 28// Strip HTML tags and escape remaining HTML to prevent XSS
 29function stripAndEscapeHtml(text: string, packageName?: string): string {
 30  // First decode any HTML entities in the input
 31  let stripped = decodeHtmlEntities(text)
 32
 33  // Then strip markdown image badges
 34  stripped = stripMarkdownImages(stripped)
 35
 36  // Then strip actual HTML tags (keep their text content)
 37  // Only match tags that start with a letter or / (to avoid matching things like "a < b > c")
 38  stripped = stripped.replace(/<\/?[a-z][^>]*>/gi, '')
 39
 40  // Strip HTML comments: <!-- ... --> (including unclosed comments from truncation)
 41  stripped = stripped.replace(/<!--[\s\S]*?(-->|$)/g, '')
 42
 43  if (packageName) {
 44    // Trim first to handle leading/trailing whitespace from stripped HTML
 45    stripped = stripped.trim()
 46    // Collapse multiple whitespace into single space
 47    stripped = stripped.replace(/\s+/g, ' ')
 48    // Escape special regex characters in package name
 49    const escapedName = packageName.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')
 50    // Match package name at the start, optionally followed by: space, dash, colon, hyphen, or just space
 51    const namePattern = new RegExp(`^${escapedName}\\s*[-:—]?\\s*`, 'i')
 52    stripped = stripped.replace(namePattern, '').trim()
 53  }
 54
 55  // Then escape any remaining HTML entities
 56  return stripped
 57    .replace(/&/g, '&amp;')
 58    .replace(/</g, '&lt;')
 59    .replace(/>/g, '&gt;')
 60    .replace(/"/g, '&quot;')
 61    .replace(/'/g, '&#039;')
 62}
 63
 64// Parse simple inline markdown to HTML
 65function parseMarkdown({ text, packageName, plain }: UseMarkdownOptions): string {
 66  if (!text) return ''
 67
 68  // First strip HTML tags and escape remaining HTML
 69  let html = stripAndEscapeHtml(text, packageName)
 70
 71  // Bold: **text** or __text__
 72  html = html.replace(/\*\*(.+?)\*\*/g, '<strong>$1</strong>')
 73  html = html.replace(/__(.+?)__/g, '<strong>$1</strong>')
 74
 75  // Italic: *text* or _text_
 76  html = html.replace(/(?<!\*)\*(?!\*)(.+?)(?<!\*)\*(?!\*)/g, '<em>$1</em>')
 77  html = html.replace(/\b_(.+?)_\b/g, '<em>$1</em>')
 78
 79  // Inline code: `code`
 80  html = html.replace(/`([^`]+)`/g, '<code>$1</code>')
 81
 82  // Strikethrough: ~~text~~
 83  html = html.replace(/~~(.+?)~~/g, '<del>$1</del>')
 84
 85  // Links: [text](url) - only allow https, mailto
 86  html = html.replace(/\[([^\]]+)\]\(([^)]+)\)/g, (_match, text, url) => {
 87    // In plain mode, just render the link text without the anchor
 88    if (plain) {
 89      return text
 90    }
 91    const decodedUrl = url.replace(/&amp;/g, '&')
 92    try {
 93      const { protocol, href } = new URL(decodedUrl)
 94      if (['https:', 'mailto:'].includes(protocol)) {
 95        const safeUrl = href.replace(/"/g, '&quot;')
 96        return `<a href="${safeUrl}" rel="nofollow noreferrer noopener" target="_blank">${text}</a>`
 97      }
 98    } catch {}
 99    return `${text} (${url})`
100  })
101
102  return html
103}