@@ -210,7 +210,7 @@ const ALLOWED_ATTR: Record<string, string[]> = {
210210 * - Collapse multiple hyphens
211211 */
212212function slugify ( text : string ) : string {
213- return stripHtmlTags ( text )
213+ return decodeHtmlEntities ( stripHtmlTags ( text ) )
214214 . toLowerCase ( )
215215 . trim ( )
216216 . replace ( / \s + / g, '-' ) // Spaces to hyphens
@@ -219,6 +219,14 @@ function slugify(text: string): string {
219219 . replace ( / ^ - | - $ / g, '' ) // Trim leading/trailing hyphens
220220}
221221
222+ function getHeadingPlainText ( text : string ) : string {
223+ return decodeHtmlEntities ( stripHtmlTags ( text ) . trim ( ) )
224+ }
225+
226+ function getHeadingSlugSource ( text : string ) : string {
227+ return stripHtmlTags ( text ) . trim ( )
228+ }
229+
222230/**
223231 * Lazy ATX heading extension for marked: allows headings without a space after `#`.
224232 *
@@ -273,6 +281,30 @@ const reservedPathsNpmJs = [
273281
274282const npmJsHosts = new Set ( [ 'www.npmjs.com' , 'npmjs.com' , 'www.npmjs.org' , 'npmjs.org' ] )
275283
284+ const USER_CONTENT_PREFIX = 'user-content-'
285+
286+ function withUserContentPrefix ( value : string ) : string {
287+ return value . startsWith ( USER_CONTENT_PREFIX ) ? value : `${ USER_CONTENT_PREFIX } ${ value } `
288+ }
289+
290+ function toUserContentId ( value : string ) : string {
291+ return `${ USER_CONTENT_PREFIX } ${ value } `
292+ }
293+
294+ function toUserContentHash ( value : string ) : string {
295+ return `#${ withUserContentPrefix ( value ) } `
296+ }
297+
298+ function normalizePreservedAnchorAttrs ( attrs : string ) : string {
299+ const cleanedAttrs = attrs
300+ . replace ( / \s + h r e f \s * = \s * ( " [ ^ " ] * " | ' [ ^ ' ] * ' | [ ^ \s > ] + ) / gi, '' )
301+ . replace ( / \s + r e l \s * = \s * ( " [ ^ " ] * " | ' [ ^ ' ] * ' | [ ^ \s > ] + ) / gi, '' )
302+ . replace ( / \s + t a r g e t \s * = \s * ( " [ ^ " ] * " | ' [ ^ ' ] * ' | [ ^ \s > ] + ) / gi, '' )
303+ . trim ( )
304+
305+ return cleanedAttrs ? ` ${ cleanedAttrs } ` : ''
306+ }
307+
276308const isNpmJsUrlThatCanBeRedirected = ( url : URL ) => {
277309 if ( ! npmJsHosts . has ( url . host ) ) {
278310 return false
@@ -298,8 +330,11 @@ function resolveUrl(url: string, packageName: string, repoInfo?: RepositoryInfo)
298330 if ( ! url ) return url
299331 if ( url . startsWith ( '#' ) ) {
300332 // Prefix anchor links to match heading IDs (avoids collision with page IDs)
301- return `#user-content-${ url . slice ( 1 ) } `
333+ // Idempotent: don't double-prefix if already prefixed
334+ return toUserContentHash ( url . slice ( 1 ) )
302335 }
336+ // Absolute paths (e.g. /package/foo from a previous npmjs redirect) are already resolved
337+ if ( url . startsWith ( '/' ) ) return url
303338 if ( hasProtocol ( url , { acceptRelative : true } ) ) {
304339 try {
305340 const parsed = new URL ( url , 'https://example.com' )
@@ -388,8 +423,8 @@ function resolveImageUrl(url: string, packageName: string, repoInfo?: Repository
388423
389424// Helper to prefix id attributes with 'user-content-'
390425function prefixId ( tagName : string , attribs : sanitizeHtml . Attributes ) {
391- if ( attribs . id && ! attribs . id . startsWith ( 'user-content-' ) ) {
392- attribs . id = `user-content- ${ attribs . id } `
426+ if ( attribs . id ) {
427+ attribs . id = withUserContentPrefix ( attribs . id )
393428 }
394429 return { tagName, attribs }
395430}
@@ -428,35 +463,71 @@ export async function renderReadmeHtml(
428463 // So README starts at h3, and we ensure no levels are skipped
429464 // Visual styling preserved via data-level attribute (original depth)
430465 let lastSemanticLevel = 2 // Start after h2 (the "Readme" section heading)
431- renderer . heading = function ( { tokens, depth } : Tokens . Heading ) {
432- // Calculate the target semantic level based on document structure
433- // Start at h3 (since page h1 + section h2 already exist)
434- // But ensure we never skip levels - can only go down by 1 or stay same/go up
466+
467+ // Shared heading processing for both markdown and HTML headings
468+ function processHeading (
469+ depth : number ,
470+ displayHtml : string ,
471+ plainText : string ,
472+ slugSource : string ,
473+ preservedAttrs = '' ,
474+ ) {
435475 const semanticLevel = calculateSemanticDepth ( depth , lastSemanticLevel )
436476 lastSemanticLevel = semanticLevel
437- const text = this . parser . parseInline ( tokens )
438477
439- // Generate GitHub-style slug for anchor links
440- let slug = slugify ( text )
441- if ( ! slug ) slug = 'heading' // Fallback for empty headings
478+ let slug = slugify ( slugSource )
479+ if ( ! slug ) slug = 'heading'
442480
443- // Handle duplicate slugs (GitHub-style: foo, foo-1, foo-2)
444481 const count = usedSlugs . get ( slug ) ?? 0
445482 usedSlugs . set ( slug , count + 1 )
446483 const uniqueSlug = count === 0 ? slug : `${ slug } -${ count } `
484+ const id = toUserContentId ( uniqueSlug )
447485
448- // Prefix with 'user-content-' to avoid collisions with page IDs
449- // (e.g., #install, #dependencies, #versions are used by the package page)
450- const id = `user-content-${ uniqueSlug } `
451-
452- // Collect TOC item with plain text (HTML stripped, entities decoded)
453- const plainText = decodeHtmlEntities ( stripHtmlTags ( text ) . trim ( ) )
454486 if ( plainText ) {
455487 toc . push ( { text : plainText , id, depth } )
456488 }
457489
458- /** The link href uses the unique slug WITHOUT the 'user-content-' prefix, because that will later be added for all links. */
459- return `<h${ semanticLevel } id="${ id } " data-level="${ depth } "><a href="#${ uniqueSlug } ">${ plainText } </a></h${ semanticLevel } >\n`
490+ return `<h${ semanticLevel } id="${ id } " data-level="${ depth } "${ preservedAttrs } ><a href="#${ id } ">${ displayHtml } </a></h${ semanticLevel } >\n`
491+ }
492+
493+ renderer . heading = function ( { tokens, depth } : Tokens . Heading ) {
494+ const displayHtml = this . parser . parseInline ( tokens )
495+ const plainText = getHeadingPlainText ( displayHtml )
496+ const slugSource = getHeadingSlugSource ( displayHtml )
497+ return processHeading ( depth , displayHtml , plainText , slugSource )
498+ }
499+
500+ // Extract and preserve allowed attributes from HTML heading tags
501+ function extractHeadingAttrs ( attrsString : string ) : string {
502+ if ( ! attrsString ) return ''
503+ const preserved : string [ ] = [ ]
504+ const alignMatch = / \b a l i g n = ( [ " ' ] ? ) ( [ ^ " ' \s > ] + ) \1/ i. exec ( attrsString )
505+ if ( alignMatch ?. [ 2 ] ) {
506+ preserved . push ( `align="${ alignMatch [ 2 ] } "` )
507+ }
508+ return preserved . length > 0 ? ` ${ preserved . join ( ' ' ) } ` : ''
509+ }
510+
511+ // Intercept HTML headings so they get id, TOC entry, and correct semantic level.
512+ // Also intercept raw HTML <a> tags so playground links are collected in the same pass.
513+ const htmlHeadingRe = / < h ( [ 1 - 6 ] ) ( \s [ ^ > ] * ) ? > ( [ \s \S ] * ?) < \/ h \1> / gi
514+ const htmlAnchorRe = / < a ( \s [ ^ > ] * ?) h r e f = ( [ " ' ] ) ( [ ^ " ' ] * ) \2( [ ^ > ] * ) > ( [ \s \S ] * ?) < \/ a > / gi
515+ renderer . html = function ( { text } : Tokens . HTML ) {
516+ let result = text . replace ( htmlHeadingRe , ( _ , level , attrs = '' , inner ) => {
517+ const depth = parseInt ( level )
518+ const plainText = getHeadingPlainText ( inner )
519+ const slugSource = getHeadingSlugSource ( inner )
520+ const preservedAttrs = extractHeadingAttrs ( attrs )
521+ return processHeading ( depth , inner , plainText , slugSource , preservedAttrs ) . trimEnd ( )
522+ } )
523+ // Process raw HTML <a> tags for playground link collection and URL resolution
524+ result = result . replace ( htmlAnchorRe , ( _full , beforeHref , _quote , href , afterHref , inner ) => {
525+ const label = decodeHtmlEntities ( stripHtmlTags ( inner ) . trim ( ) )
526+ const { resolvedHref, extraAttrs } = processLink ( href , label )
527+ const preservedAttrs = normalizePreservedAnchorAttrs ( `${ beforeHref ?? '' } ${ afterHref ?? '' } ` )
528+ return `<a${ preservedAttrs } href="${ resolvedHref } "${ extraAttrs } >${ inner } </a>`
529+ } )
530+ return result
460531 }
461532
462533 // Syntax highlighting for code blocks (uses shared highlighter)
@@ -480,7 +551,35 @@ ${html}
480551 return `<img src="${ resolvedHref } "${ altAttr } ${ titleAttr } >`
481552 }
482553
554+ // Helper: resolve a link href, collect playground links, and build <a> attributes.
555+ // Used by both the markdown renderer.link and the HTML <a> interceptor so that
556+ // all link processing happens in a single pass during marked rendering.
557+ function processLink ( href : string , label : string ) : { resolvedHref : string ; extraAttrs : string } {
558+ const resolvedHref = resolveUrl ( href , packageName , repoInfo )
559+
560+ // Collect playground links
561+ const provider = matchPlaygroundProvider ( resolvedHref )
562+ if ( provider && ! seenUrls . has ( resolvedHref ) ) {
563+ seenUrls . add ( resolvedHref )
564+ collectedLinks . push ( {
565+ url : resolvedHref ,
566+ provider : provider . id ,
567+ providerName : provider . name ,
568+ label : decodeHtmlEntities ( label || provider . name ) ,
569+ } )
570+ }
571+
572+ // Security attributes for external links
573+ let extraAttrs =
574+ resolvedHref && hasProtocol ( resolvedHref , { acceptRelative : true } )
575+ ? ' rel="nofollow noreferrer noopener" target="_blank"'
576+ : ''
577+
578+ return { resolvedHref, extraAttrs }
579+ }
580+
483581 // Resolve link URLs, add security attributes, and collect playground links
582+ // — all in a single pass during marked rendering (no deferred processing)
484583 renderer . link = function ( { href, title, tokens } : Tokens . Link ) {
485584 const text = this . parser . parseInline ( tokens )
486585 const titleAttr = title ? ` title="${ title } "` : ''
@@ -491,10 +590,9 @@ ${html}
491590 plainText = tokens [ 0 ] . text
492591 }
493592
494- const intermediateTitleAttr =
495- plainText || title ? ` data-title-intermediate="${ plainText || title } "` : ''
593+ const { resolvedHref, extraAttrs } = processLink ( href , plainText || title || '' )
496594
497- return `<a href="${ href } "${ titleAttr } ${ intermediateTitleAttr } >${ text } </a>`
595+ return `<a href="${ resolvedHref } "${ titleAttr } ${ extraAttrs } >${ text } </a>`
498596 }
499597
500598 // GitHub-style callouts: > [!NOTE], > [!TIP], etc.
@@ -514,34 +612,44 @@ ${html}
514612
515613 marked . setOptions ( { renderer } )
516614
517- const rawHtml = marked . parse ( content ) as string
615+ // Strip trailing whitespace (tabs/spaces) from code block closing fences.
616+ // While marky-markdown handles these gracefully, marked fails to recognize
617+ // the end of a code block if the closing fences are followed by unexpected whitespaces.
618+ const normalizedContent = content . replace ( / ^ ( { 0 , 3 } (?: ` { 3 , } | ~ { 3 , } ) ) \s * $ / gm, '$1' )
619+ const rawHtml = marked . parse ( normalizedContent ) as string
518620
519621 const sanitized = sanitizeHtml ( rawHtml , {
520622 allowedTags : ALLOWED_TAGS ,
521623 allowedAttributes : ALLOWED_ATTR ,
522624 allowedSchemes : [ 'http' , 'https' , 'mailto' ] ,
523625 // Transform img src URLs (GitHub blob → raw, relative → GitHub raw)
524626 transformTags : {
627+ // Headings are already processed to correct semantic levels by processHeading()
628+ // during the marked rendering pass. The sanitizer just needs to preserve them.
629+ // For any stray headings that didn't go through processHeading (shouldn't happen),
630+ // we still apply a safe fallback shift.
525631 h1 : ( _ , attribs ) => {
632+ if ( attribs [ 'data-level' ] ) return { tagName : 'h1' , attribs }
526633 return { tagName : 'h3' , attribs : { ...attribs , 'data-level' : '1' } }
527634 } ,
528635 h2 : ( _ , attribs ) => {
636+ if ( attribs [ 'data-level' ] ) return { tagName : 'h2' , attribs }
529637 return { tagName : 'h4' , attribs : { ...attribs , 'data-level' : '2' } }
530638 } ,
531639 h3 : ( _ , attribs ) => {
532- if ( attribs [ 'data-level' ] ) return { tagName : 'h3' , attribs : attribs }
640+ if ( attribs [ 'data-level' ] ) return { tagName : 'h3' , attribs }
533641 return { tagName : 'h5' , attribs : { ...attribs , 'data-level' : '3' } }
534642 } ,
535643 h4 : ( _ , attribs ) => {
536- if ( attribs [ 'data-level' ] ) return { tagName : 'h4' , attribs : attribs }
644+ if ( attribs [ 'data-level' ] ) return { tagName : 'h4' , attribs }
537645 return { tagName : 'h6' , attribs : { ...attribs , 'data-level' : '4' } }
538646 } ,
539647 h5 : ( _ , attribs ) => {
540- if ( attribs [ 'data-level' ] ) return { tagName : 'h5' , attribs : attribs }
648+ if ( attribs [ 'data-level' ] ) return { tagName : 'h5' , attribs }
541649 return { tagName : 'h6' , attribs : { ...attribs , 'data-level' : '5' } }
542650 } ,
543651 h6 : ( _ , attribs ) => {
544- if ( attribs [ 'data-level' ] ) return { tagName : 'h6' , attribs : attribs }
652+ if ( attribs [ 'data-level' ] ) return { tagName : 'h6' , attribs }
545653 return { tagName : 'h6' , attribs : { ...attribs , 'data-level' : '6' } }
546654 } ,
547655 img : ( tagName , attribs ) => {
@@ -569,31 +677,34 @@ ${html}
569677 }
570678 return { tagName, attribs }
571679 } ,
680+ // Markdown links are fully processed in renderer.link (single-pass).
681+ // However, inline HTML <a> tags inside paragraphs are NOT seen by
682+ // renderer.html (marked parses them as paragraph tokens, not html tokens).
683+ // So we still need to collect playground links here for those cases.
684+ // The seenUrls set ensures no duplicates across both paths.
572685 a : ( tagName , attribs ) => {
573686 if ( ! attribs . href ) {
574687 return { tagName, attribs }
575688 }
576689
577690 const resolvedHref = resolveUrl ( attribs . href , packageName , repoInfo )
578691
692+ // Collect playground links from inline HTML <a> tags that weren't
693+ // caught by renderer.link or renderer.html
579694 const provider = matchPlaygroundProvider ( resolvedHref )
580695 if ( provider && ! seenUrls . has ( resolvedHref ) ) {
581696 seenUrls . add ( resolvedHref )
582-
583697 collectedLinks . push ( {
584698 url : resolvedHref ,
585699 provider : provider . id ,
586700 providerName : provider . name ,
587- /**
588- * We need to set some data attribute before hand because `transformTags` doesn't
589- * provide the text of the element. This will automatically be removed, because there
590- * is an allow list for link attributes.
591- * */
592- label : decodeHtmlEntities ( attribs [ 'data-title-intermediate' ] || provider . name ) ,
701+ // sanitize-html transformTags doesn't provide element text content,
702+ // so we fall back to the provider name for the label
703+ label : provider . name ,
593704 } )
594705 }
595706
596- // Add security attributes for external links
707+ // Add security attributes for external links (idempotent)
597708 if ( resolvedHref && hasProtocol ( resolvedHref , { acceptRelative : true } ) ) {
598709 attribs . rel = 'nofollow noreferrer noopener'
599710 attribs . target = '_blank'
0 commit comments