Skip to content

Commit 90313e5

Browse files
committed
fix: improve html tag stripping
1 parent 6a99a32 commit 90313e5

File tree

1 file changed

+19
-7
lines changed

1 file changed

+19
-7
lines changed

server/utils/readme.ts

Lines changed: 19 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -174,8 +174,21 @@ const ALLOWED_ATTR: Record<string, string[]> = {
174174
'p': ['align'],
175175
}
176176

177-
// GitHub-style callout types
178-
// Format: > [!NOTE], > [!TIP], > [!IMPORTANT], > [!WARNING], > [!CAUTION]
177+
/**
178+
* Strip all HTML tags from a string, looping until stable to prevent
179+
* incomplete sanitization from nested/interleaved tags
180+
* (e.g. `<scr<script>ipt>` → `<script>` after one pass).
181+
*/
182+
function stripHtmlTags(text: string): string {
183+
const tagPattern = /<[^>]*>/g
184+
let result = text
185+
let previous: string
186+
do {
187+
previous = result
188+
result = result.replace(tagPattern, '')
189+
} while (result !== previous)
190+
return result
191+
}
179192

180193
/**
181194
* Generate a GitHub-style slug from heading text.
@@ -186,8 +199,7 @@ const ALLOWED_ATTR: Record<string, string[]> = {
186199
* - Collapse multiple hyphens
187200
*/
188201
function slugify(text: string): string {
189-
return text
190-
.replace(/<[^>]*>/g, '') // Strip HTML tags
202+
return stripHtmlTags(text)
191203
.toLowerCase()
192204
.trim()
193205
.replace(/\s+/g, '-') // Spaces to hyphens
@@ -374,7 +386,7 @@ export async function renderReadmeHtml(
374386
const id = `user-content-${uniqueSlug}`
375387

376388
// Collect TOC item with plain text (HTML stripped, entities decoded)
377-
const plainText = decodeHtmlEntities(text.replace(/<[^>]*>/g, '').trim())
389+
const plainText = decodeHtmlEntities(stripHtmlTags(text).trim())
378390
if (plainText) {
379391
toc.push({ text: plainText, id, depth })
380392
}
@@ -404,11 +416,11 @@ ${html}
404416
return `<img src="${resolvedHref}"${altAttr}${titleAttr}>`
405417
}
406418

407-
// // Resolve link URLs, add security attributes, and collect playground links
419+
// Resolve link URLs, add security attributes, and collect playground links
408420
renderer.link = function ({ href, title, tokens }: Tokens.Link) {
409421
const text = this.parser.parseInline(tokens)
410422
const titleAttr = title ? ` title="${title}"` : ''
411-
let plainText = text.replace(/<[^>]*>/g, '').trim()
423+
let plainText = stripHtmlTags(text).trim()
412424

413425
// If plain text is empty, check if we have an image with alt text
414426
if (!plainText && tokens.length === 1 && tokens[0]?.type === 'image') {

0 commit comments

Comments
 (0)