Skip to content

Commit 5e3b034

Browse files
serhalpdanielroe
andauthored
fix: strip HTML tags from plaintext search result descriptions (#1702)
Co-authored-by: Daniel Roe <daniel@roe.dev>
1 parent 4dcc19c commit 5e3b034

File tree

5 files changed

+43
-20
lines changed

5 files changed

+43
-20
lines changed

app/components/Compare/PackageSelector.vue

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -301,7 +301,7 @@ onClickOutside(containerRef, () => {
301301
v-if="result.description"
302302
class="text-xs text-fg-muted truncate mt-0.5 w-full block"
303303
>
304-
{{ decodeHtmlEntities(result.description) }}
304+
{{ stripHtmlTags(decodeHtmlEntities(result.description)) }}
305305
</span>
306306
</ButtonBase>
307307
</div>

app/components/Package/TableRow.vue

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ const allMaintainersText = computed(() => {
6969
v-if="isColumnVisible('description')"
7070
class="py-2 px-3 text-sm text-fg-muted max-w-xs truncate"
7171
>
72-
{{ decodeHtmlEntities(pkg.description || '-') }}
72+
{{ stripHtmlTags(decodeHtmlEntities(pkg.description || '-')) }}
7373
</td>
7474

7575
<!-- Downloads -->

server/utils/readme.ts

Lines changed: 1 addition & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ import sanitizeHtml from 'sanitize-html'
33
import { hasProtocol } from 'ufo'
44
import type { ReadmeResponse, TocItem } from '#shared/types/readme'
55
import { convertBlobOrFileToRawUrl, type RepositoryInfo } from '#shared/utils/git-providers'
6-
import { decodeHtmlEntities } from '#shared/utils/html'
6+
import { decodeHtmlEntities, stripHtmlTags } from '#shared/utils/html'
77
import { convertToEmoji } from '#shared/utils/emoji'
88
import { toProxiedImageUrl } from '#server/utils/image-proxy'
99

@@ -194,22 +194,6 @@ const ALLOWED_ATTR: Record<string, string[]> = {
194194
'p': ['align'],
195195
}
196196

197-
/**
198-
* Strip all HTML tags from a string, looping until stable to prevent
199-
* incomplete sanitization from nested/interleaved tags
200-
* (e.g. `<scr<script>ipt>` → `<script>` after one pass).
201-
*/
202-
function stripHtmlTags(text: string): string {
203-
const tagPattern = /<[^>]*>/g
204-
let result = text
205-
let previous: string
206-
do {
207-
previous = result
208-
result = result.replace(tagPattern, '')
209-
} while (result !== previous)
210-
return result
211-
}
212-
213197
/**
214198
* Generate a GitHub-style slug from heading text.
215199
* - Convert to lowercase

shared/utils/html.ts

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,3 +11,19 @@ const htmlEntities: Record<string, string> = {
1111
export function decodeHtmlEntities(text: string): string {
1212
return text.replace(/&(?:amp|lt|gt|quot|apos|nbsp|#39);/g, match => htmlEntities[match] || match)
1313
}
14+
15+
/**
16+
* Strip all HTML tags from a string, looping until stable to prevent
17+
* incomplete sanitization from nested/interleaved tags
18+
* (e.g. `<scr<script>ipt>` → `<script>` after one pass).
19+
*/
20+
export function stripHtmlTags(text: string): string {
21+
const tagPattern = /<[^>]*>/g
22+
let result = text
23+
let previous: string
24+
do {
25+
previous = result
26+
result = result.replace(tagPattern, '')
27+
} while (result !== previous)
28+
return result
29+
}

test/unit/shared/utils/html.spec.ts

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import { describe, expect, it } from 'vitest'
2-
import { decodeHtmlEntities } from '../../../../shared/utils/html'
2+
import { decodeHtmlEntities, stripHtmlTags } from '../../../../shared/utils/html'
33

44
describe('decodeHtmlEntities', () => {
55
it.each([
@@ -26,3 +26,26 @@ describe('decodeHtmlEntities', () => {
2626
expect(decodeHtmlEntities('&unknown;')).toBe('&unknown;')
2727
})
2828
})
29+
30+
describe('stripHtmlTags', () => {
31+
it('removes simple HTML tags', () => {
32+
expect(stripHtmlTags('<b>bold</b>')).toBe('bold')
33+
})
34+
35+
it('removes anchor tags keeping text content', () => {
36+
expect(stripHtmlTags('<a href="https://example.com">link</a>')).toBe('link')
37+
})
38+
39+
it('removes self-closing tags', () => {
40+
expect(stripHtmlTags('before<br/>after')).toBe('beforeafter')
41+
})
42+
43+
it('leaves plain text unchanged', () => {
44+
expect(stripHtmlTags('no tags here')).toBe('no tags here')
45+
})
46+
47+
it('works with decodeHtmlEntities to clean descriptions', () => {
48+
const raw = '&lt;a href=&quot;url&quot;&gt;link&lt;/a&gt; and text'
49+
expect(stripHtmlTags(decodeHtmlEntities(raw))).toBe('link and text')
50+
})
51+
})

0 commit comments

Comments
 (0)