Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 2 additions & 3 deletions app/components/Package/Playgrounds.vue
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
<script setup lang="ts">
import type { PlaygroundLink } from '#shared/types'
import { decodeHtmlEntities } from '~/utils/formatters'

const props = defineProps<{
links: PlaygroundLink[]
Expand Down Expand Up @@ -130,7 +129,7 @@ function focusMenuItem(index: number) {
:class="[getIcon(firstLink.provider), getColor(firstLink.provider), 'w-4 h-4 shrink-0']"
aria-hidden="true"
/>
<span class="truncate text-fg-muted">{{ decodeHtmlEntities(firstLink.label) }}</span>
<span class="truncate text-fg-muted">{{ firstLink.label }}</span>
</a>
</TooltipApp>

Expand Down Expand Up @@ -186,7 +185,7 @@ function focusMenuItem(index: number) {
:class="[getIcon(link.provider), getColor(link.provider), 'w-4 h-4 shrink-0']"
aria-hidden="true"
/>
<span class="truncate">{{ decodeHtmlEntities(link.label) }}</span>
<span class="truncate">{{ link.label }}</span>
</a>
</TooltipApp>
</div>
Expand Down
7 changes: 3 additions & 4 deletions app/components/ReadmeTocDropdown.vue
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
<script setup lang="ts">
import type { TocItem } from '#shared/types/readme'
import { onClickOutside, useEventListener } from '@vueuse/core'
import { decodeHtmlEntities } from '~/utils/formatters'

const props = defineProps<{
toc: TocItem[]
Expand Down Expand Up @@ -202,7 +201,7 @@ function handleKeydown(event: KeyboardEvent) {
@click="select()"
@mouseenter="highlightedIndex = getIndex(node.id)"
>
<span class="truncate">{{ decodeHtmlEntities(node.text) }}</span>
<span class="truncate">{{ node.text }}</span>
</NuxtLink>

<template v-for="child in node.children" :key="child.id">
Expand All @@ -220,7 +219,7 @@ function handleKeydown(event: KeyboardEvent) {
@click="select()"
@mouseenter="highlightedIndex = getIndex(child.id)"
>
<span class="truncate">{{ decodeHtmlEntities(child.text) }}</span>
<span class="truncate">{{ child.text }}</span>
</NuxtLink>

<NuxtLink
Expand All @@ -241,7 +240,7 @@ function handleKeydown(event: KeyboardEvent) {
@click="select()"
@mouseenter="highlightedIndex = getIndex(grandchild.id)"
>
<span class="truncate">{{ decodeHtmlEntities(grandchild.text) }}</span>
<span class="truncate">{{ grandchild.text }}</span>
</NuxtLink>
</template>
</template>
Expand Down
2 changes: 1 addition & 1 deletion app/composables/useMarkdown.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { decodeHtmlEntities } from '~/utils/formatters'
import { decodeHtmlEntities } from '#shared/utils/html'

interface UseMarkdownOptions {
text: string
Expand Down
14 changes: 0 additions & 14 deletions app/utils/formatters.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,3 @@ export function toIsoDateString(date: Date): string {
const day = String(date.getUTCDate()).padStart(2, '0')
return `${year}-${month}-${day}`
}

const htmlEntities: Record<string, string> = {
'&amp;': '&',
'&lt;': '<',
'&gt;': '>',
'&quot;': '"',
'&#39;': "'",
'&apos;': "'",
'&nbsp;': ' ',
}

export function decodeHtmlEntities(text: string): string {
return text.replace(/&(?:amp|lt|gt|quot|apos|nbsp|#39);/g, match => htmlEntities[match] || match)
}
2 changes: 1 addition & 1 deletion server/api/registry/readme/[...pkg].get.ts
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ export default defineCachedEventHandler(
swr: true,
getKey: event => {
const pkg = getRouterParam(event, 'pkg') ?? ''
return `readme:v8:${pkg.replace(/\/+$/, '').trim()}`
return `readme:v9:${pkg.replace(/\/+$/, '').trim()}`
},
},
)
34 changes: 24 additions & 10 deletions server/utils/readme.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,11 @@ import sanitizeHtml from 'sanitize-html'
import { hasProtocol } from 'ufo'
import type { ReadmeResponse, TocItem } from '#shared/types/readme'
import { convertBlobOrFileToRawUrl, type RepositoryInfo } from '#shared/utils/git-providers'
import { highlightCodeSync } from './shiki'
import { decodeHtmlEntities } from '#shared/utils/html'
import { convertToEmoji } from '#shared/utils/emoji'

import { highlightCodeSync } from './shiki'

/**
* Playground provider configuration
*/
Expand Down Expand Up @@ -172,8 +174,21 @@ const ALLOWED_ATTR: Record<string, string[]> = {
'p': ['align'],
}

// GitHub-style callout types
// Format: > [!NOTE], > [!TIP], > [!IMPORTANT], > [!WARNING], > [!CAUTION]
/**
* Strip all HTML tags from a string, looping until stable to prevent
* incomplete sanitization from nested/interleaved tags
* (e.g. `<scr<script>ipt>` → `<script>` after one pass).
*/
Comment on lines +178 to +181
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

Comment example does not match the actual regex behaviour.

The doc comment illustrates <scr<script>ipt><script> after one pass, which is the failure mode of the narrower regex /<[^<>]*>/g (which excludes both < and >). The actual regex used here is /<[^>]*>/g, whose greedy [^>]* also matches <, so the first pass strips <scr<script> (from the opening < to the first >), leaving ipt> — not <script>.

📝 Proposed fix for the comment
 /**
  * Strip all HTML tags from a string, looping until stable to prevent
  * incomplete sanitization from nested/interleaved tags
- * (e.g. `<scr<script>ipt>` → `<script>` after one pass).
+ * (e.g. `<<script>>` → `<script>` after one pass, then `''` after the second).
  */

function stripHtmlTags(text: string): string {
const tagPattern = /<[^>]*>/g
let result = text
let previous: string
do {
previous = result
result = result.replace(tagPattern, '')
} while (result !== previous)
return result
}

/**
* Generate a GitHub-style slug from heading text.
Expand All @@ -184,8 +199,7 @@ const ALLOWED_ATTR: Record<string, string[]> = {
* - Collapse multiple hyphens
*/
function slugify(text: string): string {
return text
.replace(/<[^>]*>/g, '') // Strip HTML tags
return stripHtmlTags(text)
.toLowerCase()
.trim()
.replace(/\s+/g, '-') // Spaces to hyphens
Expand Down Expand Up @@ -371,8 +385,8 @@ export async function renderReadmeHtml(
// (e.g., #install, #dependencies, #versions are used by the package page)
const id = `user-content-${uniqueSlug}`

// Collect TOC item with plain text (HTML stripped)
const plainText = text.replace(/<[^>]*>/g, '').trim()
// Collect TOC item with plain text (HTML stripped, entities decoded)
const plainText = decodeHtmlEntities(stripHtmlTags(text).trim())
if (plainText) {
toc.push({ text: plainText, id, depth })
}
Expand Down Expand Up @@ -402,11 +416,11 @@ ${html}
return `<img src="${resolvedHref}"${altAttr}${titleAttr}>`
}

// // Resolve link URLs, add security attributes, and collect playground links
// Resolve link URLs, add security attributes, and collect playground links
renderer.link = function ({ href, title, tokens }: Tokens.Link) {
const text = this.parser.parseInline(tokens)
const titleAttr = title ? ` title="${title}"` : ''
let plainText = text.replace(/<[^>]*>/g, '').trim()
let plainText = stripHtmlTags(text).trim()

// If plain text is empty, check if we have an image with alt text
if (!plainText && tokens.length === 1 && tokens[0]?.type === 'image') {
Expand Down Expand Up @@ -511,7 +525,7 @@ ${html}
* provide the text of the element. This will automatically be removed, because there
* is an allow list for link attributes.
* */
label: attribs['data-title-intermediate'] || provider.name,
label: decodeHtmlEntities(attribs['data-title-intermediate'] || provider.name),
})
}

Expand Down
13 changes: 13 additions & 0 deletions shared/utils/html.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
const htmlEntities: Record<string, string> = {
'&amp;': '&',
'&lt;': '<',
'&gt;': '>',
'&quot;': '"',
'&#39;': "'",
'&apos;': "'",
'&nbsp;': ' ',
}

export function decodeHtmlEntities(text: string): string {
return text.replace(/&(?:amp|lt|gt|quot|apos|nbsp|#39);/g, match => htmlEntities[match] || match)
}
28 changes: 1 addition & 27 deletions test/unit/app/utils/formatters.spec.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { describe, expect, it } from 'vitest'
import { decodeHtmlEntities, toIsoDateString } from '../../../../app/utils/formatters'
import { toIsoDateString } from '../../../../app/utils/formatters'

describe('toIsoDateString', () => {
it('formats a date as YYYY-MM-DD', () => {
Expand All @@ -10,29 +10,3 @@ describe('toIsoDateString', () => {
expect(toIsoDateString(new Date('2024-01-05T00:00:00Z'))).toBe('2024-01-05')
})
})

describe('decodeHtmlEntities', () => {
it.each([
['&amp;', '&'],
['&lt;', '<'],
['&gt;', '>'],
['&quot;', '"'],
['&#39;', "'"],
['&apos;', "'"],
['&nbsp;', ' '],
] as const)('%s → %s', (input, expected) => {
expect(decodeHtmlEntities(input)).toBe(expected)
})

it('decodes multiple entities in one string', () => {
expect(decodeHtmlEntities('a &amp; b &lt; c')).toBe('a & b < c')
})

it('leaves plain text unchanged', () => {
expect(decodeHtmlEntities('say no to bloat')).toBe('say no to bloat')
})

it('leaves unknown entities unchanged', () => {
expect(decodeHtmlEntities('&unknown;')).toBe('&unknown;')
})
})
28 changes: 28 additions & 0 deletions test/unit/shared/utils/html.spec.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
import { describe, expect, it } from 'vitest'
import { decodeHtmlEntities } from '../../../../shared/utils/html'

describe('decodeHtmlEntities', () => {
it.each([
['&amp;', '&'],
['&lt;', '<'],
['&gt;', '>'],
['&quot;', '"'],
['&#39;', "'"],
['&apos;', "'"],
['&nbsp;', ' '],
] as const)('%s → %s', (input, expected) => {
expect(decodeHtmlEntities(input)).toBe(expected)
})
Comment thread
coderabbitai[bot] marked this conversation as resolved.

it('decodes multiple entities in one string', () => {
expect(decodeHtmlEntities('a &amp; b &lt; c')).toBe('a & b < c')
})

it('leaves plain text unchanged', () => {
expect(decodeHtmlEntities('say no to bloat')).toBe('say no to bloat')
})

it('leaves unknown entities unchanged', () => {
expect(decodeHtmlEntities('&unknown;')).toBe('&unknown;')
})
})
Loading