npmx.dev/app/composables/useMarkdown.ts at 4454fa942db6cd96446725f5e014992f83de8ccc · npmx-dev/npmx.dev · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
import { decodeHtmlEntities } from '~/utils/formatters'

interface UseMarkdownOptions {
  text: string
  /** When true, renders link text without the anchor tag (useful when inside another link) */
  plain?: boolean
  /** Package name to strip from the beginning of the description (if present) */
  packageName?: string
}

/** @public */
export function useMarkdown(options: MaybeRefOrGetter<UseMarkdownOptions>) {
  return computed(() => parseMarkdown(toValue(options)))
}

// Strip markdown image badges from text
function stripMarkdownImages(text: string): string {
  // Remove linked images: [![alt](image-url)](link-url) - handles incomplete URLs too
  // Using {0,500} instead of * to prevent ReDoS on pathological inputs
  text = text.replace(/\[!\[[^\]]{0,500}\]\([^)]{0,2000}\)\]\([^)]{0,2000}\)?/g, '')
  // Remove standalone images: ![alt](url)
  text = text.replace(/!\[[^\]]{0,500}\]\([^)]{0,2000}\)/g, '')
  // Remove any leftover empty links or broken markdown link syntax
  text = text.replace(/\[\]\([^)]{0,2000}\)?/g, '')
  return text.trim()
}

// Strip HTML tags and escape remaining HTML to prevent XSS
function stripAndEscapeHtml(text: string, packageName?: string): string {
  // First decode any HTML entities in the input
  let stripped = decodeHtmlEntities(text)

  // Then strip markdown image badges
  stripped = stripMarkdownImages(stripped)

  // Then strip actual HTML tags (keep their text content)
  // Only match tags that start with a letter or / (to avoid matching things like "a < b > c")
  stripped = stripped.replace(/<\/?[a-z][^>]*>/gi, '')

  // Strip HTML comments: <!-- ... --> (including unclosed comments from truncation)
  stripped = stripped.replace(/<!--[\s\S]*?(-->|$)/g, '')

  if (packageName) {
    // Trim first to handle leading/trailing whitespace from stripped HTML
    stripped = stripped.trim()
    // Collapse multiple whitespace into single space
    stripped = stripped.replace(/\s+/g, ' ')
    // Match package name at the start, optionally followed by: space, dash, colon, hyphen, or just space
    const namePattern = new RegExp(`^${RegExp.escape(packageName)}\\s*[-:—]?\\s*`, 'i')
    stripped = stripped.replace(namePattern, '').trim()
  }

  // Then escape any remaining HTML entities
  return stripped
    .replace(/&/g, '&amp;')
    .replace(/</g, '&lt;')
    .replace(/>/g, '&gt;')
    .replace(/"/g, '&quot;')
    .replace(/'/g, '&#039;')
}

// Parse simple inline markdown to HTML
function parseMarkdown({ text, packageName, plain }: UseMarkdownOptions): string {
  if (!text) return ''

  // First strip HTML tags and escape remaining HTML
  let html = stripAndEscapeHtml(text, packageName)

  // Bold: **text** or __text__
  html = html.replace(/\*\*(.+?)\*\*/g, '<strong>$1</strong>')
  html = html.replace(/__(.+?)__/g, '<strong>$1</strong>')

  // Italic: *text* or _text_
  html = html.replace(/(?<!\*)\*(?!\*)(.+?)(?<!\*)\*(?!\*)/g, '<em>$1</em>')
  html = html.replace(/\b_(.+?)_\b/g, '<em>$1</em>')

  // Inline code: `code`
  html = html.replace(/`([^`]+)`/g, '<code>$1</code>')

  // Strikethrough: ~~text~~
  html = html.replace(/~~(.+?)~~/g, '<del>$1</del>')

  // Links: [text](url) - only allow https, mailto
  html = html.replace(/\[([^\]]+)\]\(([^)]+)\)/g, (_match, text, url) => {
    // In plain mode, just render the link text without the anchor
    if (plain) {
      return text
    }
    const decodedUrl = url.replace(/&amp;/g, '&')
    try {
      const { protocol, href } = new URL(decodedUrl)
      if (['https:', 'mailto:'].includes(protocol)) {
        const safeUrl = href.replace(/"/g, '&quot;')
        return `<a href="${safeUrl}" rel="nofollow noreferrer noopener" target="_blank">${text}</a>`
      }
    } catch {}
    return `${text} (${url})`
  })

  return html
}