Skip to content

Commit 54aa86a

Browse files
authored
perf: move html entity decoding to server side (#1561)
1 parent 0f52b01 commit 54aa86a

File tree

9 files changed

+73
-60
lines changed

9 files changed

+73
-60
lines changed

app/components/Package/Playgrounds.vue

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
<script setup lang="ts">
22
import type { PlaygroundLink } from '#shared/types'
3-
import { decodeHtmlEntities } from '~/utils/formatters'
43
54
const props = defineProps<{
65
links: PlaygroundLink[]
@@ -130,7 +129,7 @@ function focusMenuItem(index: number) {
130129
:class="[getIcon(firstLink.provider), getColor(firstLink.provider), 'w-4 h-4 shrink-0']"
131130
aria-hidden="true"
132131
/>
133-
<span class="truncate text-fg-muted">{{ decodeHtmlEntities(firstLink.label) }}</span>
132+
<span class="truncate text-fg-muted">{{ firstLink.label }}</span>
134133
</a>
135134
</TooltipApp>
136135

@@ -186,7 +185,7 @@ function focusMenuItem(index: number) {
186185
:class="[getIcon(link.provider), getColor(link.provider), 'w-4 h-4 shrink-0']"
187186
aria-hidden="true"
188187
/>
189-
<span class="truncate">{{ decodeHtmlEntities(link.label) }}</span>
188+
<span class="truncate">{{ link.label }}</span>
190189
</a>
191190
</TooltipApp>
192191
</div>

app/components/ReadmeTocDropdown.vue

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
<script setup lang="ts">
22
import type { TocItem } from '#shared/types/readme'
33
import { onClickOutside, useEventListener } from '@vueuse/core'
4-
import { decodeHtmlEntities } from '~/utils/formatters'
54
65
const props = defineProps<{
76
toc: TocItem[]
@@ -202,7 +201,7 @@ function handleKeydown(event: KeyboardEvent) {
202201
@click="select()"
203202
@mouseenter="highlightedIndex = getIndex(node.id)"
204203
>
205-
<span class="truncate">{{ decodeHtmlEntities(node.text) }}</span>
204+
<span class="truncate">{{ node.text }}</span>
206205
</NuxtLink>
207206

208207
<template v-for="child in node.children" :key="child.id">
@@ -220,7 +219,7 @@ function handleKeydown(event: KeyboardEvent) {
220219
@click="select()"
221220
@mouseenter="highlightedIndex = getIndex(child.id)"
222221
>
223-
<span class="truncate">{{ decodeHtmlEntities(child.text) }}</span>
222+
<span class="truncate">{{ child.text }}</span>
224223
</NuxtLink>
225224

226225
<NuxtLink
@@ -241,7 +240,7 @@ function handleKeydown(event: KeyboardEvent) {
241240
@click="select()"
242241
@mouseenter="highlightedIndex = getIndex(grandchild.id)"
243242
>
244-
<span class="truncate">{{ decodeHtmlEntities(grandchild.text) }}</span>
243+
<span class="truncate">{{ grandchild.text }}</span>
245244
</NuxtLink>
246245
</template>
247246
</template>

app/composables/useMarkdown.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import { decodeHtmlEntities } from '~/utils/formatters'
1+
import { decodeHtmlEntities } from '#shared/utils/html'
22

33
interface UseMarkdownOptions {
44
text: string

app/utils/formatters.ts

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -4,17 +4,3 @@ export function toIsoDateString(date: Date): string {
44
const day = String(date.getUTCDate()).padStart(2, '0')
55
return `${year}-${month}-${day}`
66
}
7-
8-
const htmlEntities: Record<string, string> = {
9-
'&amp;': '&',
10-
'&lt;': '<',
11-
'&gt;': '>',
12-
'&quot;': '"',
13-
'&#39;': "'",
14-
'&apos;': "'",
15-
'&nbsp;': ' ',
16-
}
17-
18-
export function decodeHtmlEntities(text: string): string {
19-
return text.replace(/&(?:amp|lt|gt|quot|apos|nbsp|#39);/g, match => htmlEntities[match] || match)
20-
}

server/api/registry/readme/[...pkg].get.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ export default defineCachedEventHandler(
3333
swr: true,
3434
getKey: event => {
3535
const pkg = getRouterParam(event, 'pkg') ?? ''
36-
return `readme:v8:${pkg.replace(/\/+$/, '').trim()}`
36+
return `readme:v9:${pkg.replace(/\/+$/, '').trim()}`
3737
},
3838
},
3939
)

server/utils/readme.ts

Lines changed: 24 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,11 @@ import sanitizeHtml from 'sanitize-html'
33
import { hasProtocol } from 'ufo'
44
import type { ReadmeResponse, TocItem } from '#shared/types/readme'
55
import { convertBlobOrFileToRawUrl, type RepositoryInfo } from '#shared/utils/git-providers'
6-
import { highlightCodeSync } from './shiki'
6+
import { decodeHtmlEntities } from '#shared/utils/html'
77
import { convertToEmoji } from '#shared/utils/emoji'
88

9+
import { highlightCodeSync } from './shiki'
10+
911
/**
1012
* Playground provider configuration
1113
*/
@@ -172,8 +174,21 @@ const ALLOWED_ATTR: Record<string, string[]> = {
172174
'p': ['align'],
173175
}
174176

175-
// GitHub-style callout types
176-
// Format: > [!NOTE], > [!TIP], > [!IMPORTANT], > [!WARNING], > [!CAUTION]
177+
/**
178+
* Strip all HTML tags from a string, looping until stable to prevent
179+
* incomplete sanitization from nested/interleaved tags
180+
* (e.g. `<scr<script>ipt>` → `<script>` after one pass).
181+
*/
182+
function stripHtmlTags(text: string): string {
183+
const tagPattern = /<[^>]*>/g
184+
let result = text
185+
let previous: string
186+
do {
187+
previous = result
188+
result = result.replace(tagPattern, '')
189+
} while (result !== previous)
190+
return result
191+
}
177192

178193
/**
179194
* Generate a GitHub-style slug from heading text.
@@ -184,8 +199,7 @@ const ALLOWED_ATTR: Record<string, string[]> = {
184199
* - Collapse multiple hyphens
185200
*/
186201
function slugify(text: string): string {
187-
return text
188-
.replace(/<[^>]*>/g, '') // Strip HTML tags
202+
return stripHtmlTags(text)
189203
.toLowerCase()
190204
.trim()
191205
.replace(/\s+/g, '-') // Spaces to hyphens
@@ -371,8 +385,8 @@ export async function renderReadmeHtml(
371385
// (e.g., #install, #dependencies, #versions are used by the package page)
372386
const id = `user-content-${uniqueSlug}`
373387

374-
// Collect TOC item with plain text (HTML stripped)
375-
const plainText = text.replace(/<[^>]*>/g, '').trim()
388+
// Collect TOC item with plain text (HTML stripped, entities decoded)
389+
const plainText = decodeHtmlEntities(stripHtmlTags(text).trim())
376390
if (plainText) {
377391
toc.push({ text: plainText, id, depth })
378392
}
@@ -402,11 +416,11 @@ ${html}
402416
return `<img src="${resolvedHref}"${altAttr}${titleAttr}>`
403417
}
404418

405-
// // Resolve link URLs, add security attributes, and collect playground links
419+
// Resolve link URLs, add security attributes, and collect playground links
406420
renderer.link = function ({ href, title, tokens }: Tokens.Link) {
407421
const text = this.parser.parseInline(tokens)
408422
const titleAttr = title ? ` title="${title}"` : ''
409-
let plainText = text.replace(/<[^>]*>/g, '').trim()
423+
let plainText = stripHtmlTags(text).trim()
410424

411425
// If plain text is empty, check if we have an image with alt text
412426
if (!plainText && tokens.length === 1 && tokens[0]?.type === 'image') {
@@ -511,7 +525,7 @@ ${html}
511525
* provide the text of the element. This will automatically be removed, because there
512526
* is an allow list for link attributes.
513527
* */
514-
label: attribs['data-title-intermediate'] || provider.name,
528+
label: decodeHtmlEntities(attribs['data-title-intermediate'] || provider.name),
515529
})
516530
}
517531

shared/utils/html.ts

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
const htmlEntities: Record<string, string> = {
2+
'&amp;': '&',
3+
'&lt;': '<',
4+
'&gt;': '>',
5+
'&quot;': '"',
6+
'&#39;': "'",
7+
'&apos;': "'",
8+
'&nbsp;': '\u00A0',
9+
}
10+
11+
export function decodeHtmlEntities(text: string): string {
12+
return text.replace(/&(?:amp|lt|gt|quot|apos|nbsp|#39);/g, match => htmlEntities[match] || match)
13+
}
Lines changed: 1 addition & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import { describe, expect, it } from 'vitest'
2-
import { decodeHtmlEntities, toIsoDateString } from '../../../../app/utils/formatters'
2+
import { toIsoDateString } from '../../../../app/utils/formatters'
33

44
describe('toIsoDateString', () => {
55
it('formats a date as YYYY-MM-DD', () => {
@@ -10,29 +10,3 @@ describe('toIsoDateString', () => {
1010
expect(toIsoDateString(new Date('2024-01-05T00:00:00Z'))).toBe('2024-01-05')
1111
})
1212
})
13-
14-
describe('decodeHtmlEntities', () => {
15-
it.each([
16-
['&amp;', '&'],
17-
['&lt;', '<'],
18-
['&gt;', '>'],
19-
['&quot;', '"'],
20-
['&#39;', "'"],
21-
['&apos;', "'"],
22-
['&nbsp;', ' '],
23-
] as const)('%s → %s', (input, expected) => {
24-
expect(decodeHtmlEntities(input)).toBe(expected)
25-
})
26-
27-
it('decodes multiple entities in one string', () => {
28-
expect(decodeHtmlEntities('a &amp; b &lt; c')).toBe('a & b < c')
29-
})
30-
31-
it('leaves plain text unchanged', () => {
32-
expect(decodeHtmlEntities('say no to bloat')).toBe('say no to bloat')
33-
})
34-
35-
it('leaves unknown entities unchanged', () => {
36-
expect(decodeHtmlEntities('&unknown;')).toBe('&unknown;')
37-
})
38-
})
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
import { describe, expect, it } from 'vitest'
2+
import { decodeHtmlEntities } from '../../../../shared/utils/html'
3+
4+
describe('decodeHtmlEntities', () => {
5+
it.each([
6+
['&amp;', '&'],
7+
['&lt;', '<'],
8+
['&gt;', '>'],
9+
['&quot;', '"'],
10+
['&#39;', "'"],
11+
['&apos;', "'"],
12+
['&nbsp;', '\u00A0'],
13+
] as const)('%s → %s', (input, expected) => {
14+
expect(decodeHtmlEntities(input)).toBe(expected)
15+
})
16+
17+
it('decodes multiple entities in one string', () => {
18+
expect(decodeHtmlEntities('a &amp; b &lt; c')).toBe('a & b < c')
19+
})
20+
21+
it('leaves plain text unchanged', () => {
22+
expect(decodeHtmlEntities('say no to bloat')).toBe('say no to bloat')
23+
})
24+
25+
it('leaves unknown entities unchanged', () => {
26+
expect(decodeHtmlEntities('&unknown;')).toBe('&unknown;')
27+
})
28+
})

0 commit comments

Comments
 (0)