@@ -3,9 +3,11 @@ import sanitizeHtml from 'sanitize-html'
33import { hasProtocol } from 'ufo'
44import type { ReadmeResponse , TocItem } from '#shared/types/readme'
55import { convertBlobOrFileToRawUrl , type RepositoryInfo } from '#shared/utils/git-providers'
6- import { highlightCodeSync } from './shiki '
6+ import { decodeHtmlEntities } from '#shared/utils/html '
77import { convertToEmoji } from '#shared/utils/emoji'
88
9+ import { highlightCodeSync } from './shiki'
10+
911/**
1012 * Playground provider configuration
1113 */
@@ -172,8 +174,21 @@ const ALLOWED_ATTR: Record<string, string[]> = {
172174 'p' : [ 'align' ] ,
173175}
174176
175- // GitHub-style callout types
176- // Format: > [!NOTE], > [!TIP], > [!IMPORTANT], > [!WARNING], > [!CAUTION]
177+ /**
178+ * Strip all HTML tags from a string, looping until stable to prevent
179+ * incomplete sanitization from nested/interleaved tags
180+ * (e.g. `<scr<script>ipt>` → `<script>` after one pass).
181+ */
182+ function stripHtmlTags ( text : string ) : string {
183+ const tagPattern = / < [ ^ > ] * > / g
184+ let result = text
185+ let previous : string
186+ do {
187+ previous = result
188+ result = result . replace ( tagPattern , '' )
189+ } while ( result !== previous )
190+ return result
191+ }
177192
178193/**
179194 * Generate a GitHub-style slug from heading text.
@@ -184,8 +199,7 @@ const ALLOWED_ATTR: Record<string, string[]> = {
184199 * - Collapse multiple hyphens
185200 */
186201function slugify ( text : string ) : string {
187- return text
188- . replace ( / < [ ^ > ] * > / g, '' ) // Strip HTML tags
202+ return stripHtmlTags ( text )
189203 . toLowerCase ( )
190204 . trim ( )
191205 . replace ( / \s + / g, '-' ) // Spaces to hyphens
@@ -371,8 +385,8 @@ export async function renderReadmeHtml(
371385 // (e.g., #install, #dependencies, #versions are used by the package page)
372386 const id = `user-content-${ uniqueSlug } `
373387
374- // Collect TOC item with plain text (HTML stripped)
375- const plainText = text . replace ( / < [ ^ > ] * > / g , '' ) . trim ( )
388+ // Collect TOC item with plain text (HTML stripped, entities decoded )
389+ const plainText = decodeHtmlEntities ( stripHtmlTags ( text ) . trim ( ) )
376390 if ( plainText ) {
377391 toc . push ( { text : plainText , id, depth } )
378392 }
@@ -402,11 +416,11 @@ ${html}
402416 return `<img src="${ resolvedHref } "${ altAttr } ${ titleAttr } >`
403417 }
404418
405- // // Resolve link URLs, add security attributes, and collect playground links
419+ // Resolve link URLs, add security attributes, and collect playground links
406420 renderer . link = function ( { href, title, tokens } : Tokens . Link ) {
407421 const text = this . parser . parseInline ( tokens )
408422 const titleAttr = title ? ` title="${ title } "` : ''
409- let plainText = text . replace ( / < [ ^ > ] * > / g , '' ) . trim ( )
423+ let plainText = stripHtmlTags ( text ) . trim ( )
410424
411425 // If plain text is empty, check if we have an image with alt text
412426 if ( ! plainText && tokens . length === 1 && tokens [ 0 ] ?. type === 'image' ) {
@@ -511,7 +525,7 @@ ${html}
511525 * provide the text of the element. This will automatically be removed, because there
512526 * is an allow list for link attributes.
513527 * */
514- label : attribs [ 'data-title-intermediate' ] || provider . name ,
528+ label : decodeHtmlEntities ( attribs [ 'data-title-intermediate' ] || provider . name ) ,
515529 } )
516530 }
517531
0 commit comments