Skip to content

Commit 8d87c8c

Browse files
committed
fix(readme): parse headings without space after # to match npm
Many READMEs in the npm registry use instead of . CommonMark (and marked) requires the space, so these render as plain text instead of headings on npmx.dev. npm's own renderer (https://npmx.dev/package/marky-markdown) handles this via https://npmx.dev/package/markdown-it-lazy-headers, a markdown-it plugin that relaxes the space requirement. This commit reimplements that behavior as a marked tokenizer extension, since we use marked rather than markdown-it. The extension only handles the no-space case and falls through to marked's default tokenizer for standard headings. Closes #1697
1 parent b6eb04f commit 8d87c8c

File tree

2 files changed

+126
-0
lines changed

2 files changed

+126
-0
lines changed

server/utils/readme.ts

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -228,6 +228,46 @@ function slugify(text: string): string {
228228
.replace(/^-|-$/g, '') // Trim leading/trailing hyphens
229229
}
230230

231+
/**
232+
* Lazy ATX heading extension for marked: allows headings without a space after `#`.
233+
*
234+
* Reimplements the behavior of markdown-it-lazy-headers
235+
* (https://npmx.dev/package/markdown-it-lazy-headers), which is used by npm's own markdown renderer
236+
* marky-markdown (https://npmx.dev/package/marky-markdown).
237+
*
238+
* CommonMark requires a space after # for ATX headings, but many READMEs in the npm registry omit
239+
* this space. This extension allows marked to parse these headings the same way npm does.
240+
*/
241+
marked.use({
242+
tokenizer: {
243+
heading(src: string) {
244+
// Only match headings where `#` is immediately followed by non-whitespace, non-`#` content.
245+
// Normal headings (with space) return false to fall through to marked's default tokenizer.
246+
const match = /^ {0,3}(#{1,6})([^\s#][^\n]*)(?:\n+|$)/.exec(src)
247+
if (!match) return false
248+
249+
let text = match[2]!.trim()
250+
251+
// Strip trailing # characters only if preceded by a space (CommonMark behavior).
252+
// e.g., "#heading ##" → "heading", but "#heading#" stays as "heading#"
253+
if (text.endsWith('#')) {
254+
const stripped = text.replace(/#+$/, '')
255+
if (!stripped || stripped.endsWith(' ')) {
256+
text = stripped.trim()
257+
}
258+
}
259+
260+
return {
261+
type: 'heading' as const,
262+
raw: match[0]!,
263+
depth: match[1]!.length as number,
264+
text,
265+
tokens: this.lexer.inline(text),
266+
}
267+
},
268+
},
269+
})
270+
231271
/** These path on npmjs.com don't belong to packages or search, so we shouldn't try to replace them with npmx.dev urls */
232272
const reservedPathsNpmJs = [
233273
'products',

test/unit/server/utils/readme.spec.ts

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -465,6 +465,92 @@ describe('ReadmeResponse shape (HTML route contract)', () => {
465465
})
466466
})
467467

468+
// Tests for the lazy ATX heading extension, matching the behavior of
469+
// markdown-it-lazy-headers (https://npmx.dev/package/markdown-it-lazy-headers).
470+
describe('Lazy ATX headings (no space after #)', () => {
471+
it('parses #foo through ######foo as headings', async () => {
472+
const markdown = '#foo\n\n##foo\n\n###foo\n\n####foo\n\n#####foo\n\n######foo'
473+
const result = await renderReadmeHtml(markdown, 'test-pkg')
474+
475+
expect(result.toc).toHaveLength(6)
476+
expect(result.toc[0]).toMatchObject({ text: 'foo', depth: 1 })
477+
expect(result.toc[1]).toMatchObject({ text: 'foo', depth: 2 })
478+
expect(result.toc[2]).toMatchObject({ text: 'foo', depth: 3 })
479+
expect(result.toc[3]).toMatchObject({ text: 'foo', depth: 4 })
480+
expect(result.toc[4]).toMatchObject({ text: 'foo', depth: 5 })
481+
expect(result.toc[5]).toMatchObject({ text: 'foo', depth: 6 })
482+
})
483+
484+
it('rejects 7+ # characters as not a heading', async () => {
485+
const markdown = '#######foo'
486+
const result = await renderReadmeHtml(markdown, 'test-pkg')
487+
488+
expect(result.toc).toHaveLength(0)
489+
expect(result.html).toContain('#######foo')
490+
})
491+
492+
it('does not affect headings that already have spaces', async () => {
493+
const markdown = '# Title\n\n## Subtitle'
494+
const result = await renderReadmeHtml(markdown, 'test-pkg')
495+
496+
expect(result.toc).toHaveLength(2)
497+
expect(result.toc[0]).toMatchObject({ text: 'Title', depth: 1 })
498+
expect(result.toc[1]).toMatchObject({ text: 'Subtitle', depth: 2 })
499+
})
500+
501+
it('strips optional trailing # sequence preceded by space', async () => {
502+
const markdown = '##foo ##'
503+
const result = await renderReadmeHtml(markdown, 'test-pkg')
504+
505+
expect(result.toc).toHaveLength(1)
506+
expect(result.toc[0]).toMatchObject({ text: 'foo', depth: 2 })
507+
})
508+
509+
it('keeps trailing # not preceded by space as part of content', async () => {
510+
const markdown = '#foo#'
511+
const result = await renderReadmeHtml(markdown, 'test-pkg')
512+
513+
expect(result.toc).toHaveLength(1)
514+
expect(result.toc[0]).toMatchObject({ text: 'foo#', depth: 1 })
515+
})
516+
517+
it('does not modify lines inside fenced code blocks', async () => {
518+
const markdown = '```\n#not-a-heading\n```'
519+
const result = await renderReadmeHtml(markdown, 'test-pkg')
520+
521+
expect(result.toc).toHaveLength(0)
522+
expect(result.html).toContain('#not-a-heading')
523+
})
524+
525+
it('handles mixed headings with and without spaces', async () => {
526+
const markdown = '#Title\n\nSome text\n\n## Subtitle\n\n###Another'
527+
const result = await renderReadmeHtml(markdown, 'test-pkg')
528+
529+
expect(result.toc).toHaveLength(3)
530+
expect(result.toc[0]).toMatchObject({ text: 'Title', depth: 1 })
531+
expect(result.toc[1]).toMatchObject({ text: 'Subtitle', depth: 2 })
532+
expect(result.toc[2]).toMatchObject({ text: 'Another', depth: 3 })
533+
})
534+
535+
it('allows 1-3 spaces indentation', async () => {
536+
const markdown = ' ###foo\n\n ##foo\n\n #foo'
537+
const result = await renderReadmeHtml(markdown, 'test-pkg')
538+
539+
expect(result.toc).toHaveLength(3)
540+
expect(result.toc[0]).toMatchObject({ text: 'foo', depth: 3 })
541+
expect(result.toc[1]).toMatchObject({ text: 'foo', depth: 2 })
542+
expect(result.toc[2]).toMatchObject({ text: 'foo', depth: 1 })
543+
})
544+
545+
it('works after paragraphs separated by blank lines', async () => {
546+
const markdown = 'Foo bar\n\n#baz\n\nBar foo'
547+
const result = await renderReadmeHtml(markdown, 'test-pkg')
548+
549+
expect(result.toc).toHaveLength(1)
550+
expect(result.toc[0]).toMatchObject({ text: 'baz', depth: 1 })
551+
})
552+
})
553+
468554
describe('HTML output', () => {
469555
it('returns sanitized html', async () => {
470556
const markdown = `# Title\n\nSome **bold** text and a [link](https://example.com).`

0 commit comments

Comments
 (0)