Skip to content

Commit 433494c

Browse files
authored
fix: parse markdown headings without space after # to match npmjs (#1717)
1 parent 78a64c7 commit 433494c

File tree

2 files changed

+126
-0
lines changed

2 files changed

+126
-0
lines changed

server/utils/readme.ts

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -212,6 +212,46 @@ function slugify(text: string): string {
212212
.replace(/^-|-$/g, '') // Trim leading/trailing hyphens
213213
}
214214

215+
/**
216+
* Lazy ATX heading extension for marked: allows headings without a space after `#`.
217+
*
218+
* Reimplements the behavior of markdown-it-lazy-headers
219+
* (https://npmx.dev/package/markdown-it-lazy-headers), which is used by npm's own markdown renderer
220+
* marky-markdown (https://npmx.dev/package/marky-markdown).
221+
*
222+
* CommonMark requires a space after # for ATX headings, but many READMEs in the npm registry omit
223+
* this space. This extension allows marked to parse these headings the same way npm does.
224+
*/
225+
marked.use({
226+
tokenizer: {
227+
heading(src: string) {
228+
// Only match headings where `#` is immediately followed by non-whitespace, non-`#` content.
229+
// Normal headings (with space) return false to fall through to marked's default tokenizer.
230+
const match = /^ {0,3}(#{1,6})([^\s#][^\n]*)(?:\n+|$)/.exec(src)
231+
if (!match) return false
232+
233+
let text = match[2]!.trim()
234+
235+
// Strip trailing # characters only if preceded by a space (CommonMark behavior).
236+
// e.g., "#heading ##" → "heading", but "#heading#" stays as "heading#"
237+
if (text.endsWith('#')) {
238+
const stripped = text.replace(/#+$/, '')
239+
if (!stripped || stripped.endsWith(' ')) {
240+
text = stripped.trim()
241+
}
242+
}
243+
244+
return {
245+
type: 'heading' as const,
246+
raw: match[0]!,
247+
depth: match[1]!.length as number,
248+
text,
249+
tokens: this.lexer.inline(text),
250+
}
251+
},
252+
},
253+
})
254+
215255
/** These path on npmjs.com don't belong to packages or search, so we shouldn't try to replace them with npmx.dev urls */
216256
const reservedPathsNpmJs = [
217257
'products',

test/unit/server/utils/readme.spec.ts

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -465,6 +465,92 @@ describe('ReadmeResponse shape (HTML route contract)', () => {
465465
})
466466
})
467467

468+
// Tests for the lazy ATX heading extension, matching the behavior of
469+
// markdown-it-lazy-headers (https://npmx.dev/package/markdown-it-lazy-headers).
470+
describe('Lazy ATX headings (no space after #)', () => {
471+
it('parses #foo through ######foo as headings', async () => {
472+
const markdown = '#foo\n\n##foo\n\n###foo\n\n####foo\n\n#####foo\n\n######foo'
473+
const result = await renderReadmeHtml(markdown, 'test-pkg')
474+
475+
expect(result.toc).toHaveLength(6)
476+
expect(result.toc[0]).toMatchObject({ text: 'foo', depth: 1 })
477+
expect(result.toc[1]).toMatchObject({ text: 'foo', depth: 2 })
478+
expect(result.toc[2]).toMatchObject({ text: 'foo', depth: 3 })
479+
expect(result.toc[3]).toMatchObject({ text: 'foo', depth: 4 })
480+
expect(result.toc[4]).toMatchObject({ text: 'foo', depth: 5 })
481+
expect(result.toc[5]).toMatchObject({ text: 'foo', depth: 6 })
482+
})
483+
484+
it('rejects 7+ # characters as not a heading', async () => {
485+
const markdown = '#######foo'
486+
const result = await renderReadmeHtml(markdown, 'test-pkg')
487+
488+
expect(result.toc).toHaveLength(0)
489+
expect(result.html).toContain('#######foo')
490+
})
491+
492+
it('does not affect headings that already have spaces', async () => {
493+
const markdown = '# Title\n\n## Subtitle'
494+
const result = await renderReadmeHtml(markdown, 'test-pkg')
495+
496+
expect(result.toc).toHaveLength(2)
497+
expect(result.toc[0]).toMatchObject({ text: 'Title', depth: 1 })
498+
expect(result.toc[1]).toMatchObject({ text: 'Subtitle', depth: 2 })
499+
})
500+
501+
it('strips optional trailing # sequence preceded by space', async () => {
502+
const markdown = '##foo ##'
503+
const result = await renderReadmeHtml(markdown, 'test-pkg')
504+
505+
expect(result.toc).toHaveLength(1)
506+
expect(result.toc[0]).toMatchObject({ text: 'foo', depth: 2 })
507+
})
508+
509+
it('keeps trailing # not preceded by space as part of content', async () => {
510+
const markdown = '#foo#'
511+
const result = await renderReadmeHtml(markdown, 'test-pkg')
512+
513+
expect(result.toc).toHaveLength(1)
514+
expect(result.toc[0]).toMatchObject({ text: 'foo#', depth: 1 })
515+
})
516+
517+
it('does not modify lines inside fenced code blocks', async () => {
518+
const markdown = '```\n#not-a-heading\n```'
519+
const result = await renderReadmeHtml(markdown, 'test-pkg')
520+
521+
expect(result.toc).toHaveLength(0)
522+
expect(result.html).toContain('#not-a-heading')
523+
})
524+
525+
it('handles mixed headings with and without spaces', async () => {
526+
const markdown = '#Title\n\nSome text\n\n## Subtitle\n\n###Another'
527+
const result = await renderReadmeHtml(markdown, 'test-pkg')
528+
529+
expect(result.toc).toHaveLength(3)
530+
expect(result.toc[0]).toMatchObject({ text: 'Title', depth: 1 })
531+
expect(result.toc[1]).toMatchObject({ text: 'Subtitle', depth: 2 })
532+
expect(result.toc[2]).toMatchObject({ text: 'Another', depth: 3 })
533+
})
534+
535+
it('allows 1-3 spaces indentation', async () => {
536+
const markdown = ' ###foo\n\n ##foo\n\n #foo'
537+
const result = await renderReadmeHtml(markdown, 'test-pkg')
538+
539+
expect(result.toc).toHaveLength(3)
540+
expect(result.toc[0]).toMatchObject({ text: 'foo', depth: 3 })
541+
expect(result.toc[1]).toMatchObject({ text: 'foo', depth: 2 })
542+
expect(result.toc[2]).toMatchObject({ text: 'foo', depth: 1 })
543+
})
544+
545+
it('works after paragraphs separated by blank lines', async () => {
546+
const markdown = 'Foo bar\n\n#baz\n\nBar foo'
547+
const result = await renderReadmeHtml(markdown, 'test-pkg')
548+
549+
expect(result.toc).toHaveLength(1)
550+
expect(result.toc[0]).toMatchObject({ text: 'baz', depth: 1 })
551+
})
552+
})
553+
468554
describe('HTML output', () => {
469555
it('returns sanitized html', async () => {
470556
const markdown = `# Title\n\nSome **bold** text and a [link](https://example.com).`

0 commit comments

Comments
 (0)