fix(readme): parse headings without space after # to match npm

serhalp · serhalp · commit 8d87c8cc2e7e · 2026-02-27T15:34:21.000-05:00
Many READMEs in the npm registry use instead of . CommonMark (and marked) requires the space, so these render as plain text instead of headings on npmx.dev. npm's own renderer (https://npmx.dev/package/marky-markdown) handles this via https://npmx.dev/package/markdown-it-lazy-headers, a markdown-it plugin that relaxes the space requirement. This commit reimplements that behavior as a marked tokenizer extension, since we use marked rather than markdown-it. The extension only handles the no-space case and falls through to marked's default tokenizer for standard headings. Closes #1697
diff --git a/server/utils/readme.ts b/server/utils/readme.ts
@@ -228,6 +228,46 @@ function slugify(text: string): string {
     .replace(/^-|-$/g, '') // Trim leading/trailing hyphens
 }
 
+/**
+ * Lazy ATX heading extension for marked: allows headings without a space after `#`.
+ *
+ * Reimplements the behavior of markdown-it-lazy-headers
+ * (https://npmx.dev/package/markdown-it-lazy-headers), which is used by npm's own markdown renderer
+ * marky-markdown (https://npmx.dev/package/marky-markdown).
+ *
+ * CommonMark requires a space after # for ATX headings, but many READMEs in the npm registry omit
+ * this space. This extension allows marked to parse these headings the same way npm does.
+ */
+marked.use({
+  tokenizer: {
+    heading(src: string) {
+      // Only match headings where `#` is immediately followed by non-whitespace, non-`#` content.
+      // Normal headings (with space) return false to fall through to marked's default tokenizer.
+      const match = /^ {0,3}(#{1,6})([^\s#][^\n]*)(?:\n+|$)/.exec(src)
+      if (!match) return false
+
+      let text = match[2]!.trim()
+
+      // Strip trailing # characters only if preceded by a space (CommonMark behavior).
+      // e.g., "#heading ##" → "heading", but "#heading#" stays as "heading#"
+      if (text.endsWith('#')) {
+        const stripped = text.replace(/#+$/, '')
+        if (!stripped || stripped.endsWith(' ')) {
+          text = stripped.trim()
+        }
+      }
+
+      return {
+        type: 'heading' as const,
+        raw: match[0]!,
+        depth: match[1]!.length as number,
+        text,
+        tokens: this.lexer.inline(text),
+      }
+    },
+  },
+})
+
 /** These path on npmjs.com don't belong to packages or search, so we shouldn't try to replace them with npmx.dev urls */
 const reservedPathsNpmJs = [
   'products',
diff --git a/test/unit/server/utils/readme.spec.ts b/test/unit/server/utils/readme.spec.ts
@@ -465,6 +465,92 @@ describe('ReadmeResponse shape (HTML route contract)', () => {
   })
 })
 
+// Tests for the lazy ATX heading extension, matching the behavior of
+// markdown-it-lazy-headers (https://npmx.dev/package/markdown-it-lazy-headers).
+describe('Lazy ATX headings (no space after #)', () => {
+  it('parses #foo through ######foo as headings', async () => {
+    const markdown = '#foo\n\n##foo\n\n###foo\n\n####foo\n\n#####foo\n\n######foo'
+    const result = await renderReadmeHtml(markdown, 'test-pkg')
+
+    expect(result.toc).toHaveLength(6)
+    expect(result.toc[0]).toMatchObject({ text: 'foo', depth: 1 })
+    expect(result.toc[1]).toMatchObject({ text: 'foo', depth: 2 })
+    expect(result.toc[2]).toMatchObject({ text: 'foo', depth: 3 })
+    expect(result.toc[3]).toMatchObject({ text: 'foo', depth: 4 })
+    expect(result.toc[4]).toMatchObject({ text: 'foo', depth: 5 })
+    expect(result.toc[5]).toMatchObject({ text: 'foo', depth: 6 })
+  })
+
+  it('rejects 7+ # characters as not a heading', async () => {
+    const markdown = '#######foo'
+    const result = await renderReadmeHtml(markdown, 'test-pkg')
+
+    expect(result.toc).toHaveLength(0)
+    expect(result.html).toContain('#######foo')
+  })
+
+  it('does not affect headings that already have spaces', async () => {
+    const markdown = '# Title\n\n## Subtitle'
+    const result = await renderReadmeHtml(markdown, 'test-pkg')
+
+    expect(result.toc).toHaveLength(2)
+    expect(result.toc[0]).toMatchObject({ text: 'Title', depth: 1 })
+    expect(result.toc[1]).toMatchObject({ text: 'Subtitle', depth: 2 })
+  })
+
+  it('strips optional trailing # sequence preceded by space', async () => {
+    const markdown = '##foo ##'
+    const result = await renderReadmeHtml(markdown, 'test-pkg')
+
+    expect(result.toc).toHaveLength(1)
+    expect(result.toc[0]).toMatchObject({ text: 'foo', depth: 2 })
+  })
+
+  it('keeps trailing # not preceded by space as part of content', async () => {
+    const markdown = '#foo#'
+    const result = await renderReadmeHtml(markdown, 'test-pkg')
+
+    expect(result.toc).toHaveLength(1)
+    expect(result.toc[0]).toMatchObject({ text: 'foo#', depth: 1 })
+  })
+
+  it('does not modify lines inside fenced code blocks', async () => {
+    const markdown = '```\n#not-a-heading\n```'
+    const result = await renderReadmeHtml(markdown, 'test-pkg')
+
+    expect(result.toc).toHaveLength(0)
+    expect(result.html).toContain('#not-a-heading')
+  })
+
+  it('handles mixed headings with and without spaces', async () => {
+    const markdown = '#Title\n\nSome text\n\n## Subtitle\n\n###Another'
+    const result = await renderReadmeHtml(markdown, 'test-pkg')
+
+    expect(result.toc).toHaveLength(3)
+    expect(result.toc[0]).toMatchObject({ text: 'Title', depth: 1 })
+    expect(result.toc[1]).toMatchObject({ text: 'Subtitle', depth: 2 })
+    expect(result.toc[2]).toMatchObject({ text: 'Another', depth: 3 })
+  })
+
+  it('allows 1-3 spaces indentation', async () => {
+    const markdown = ' ###foo\n\n  ##foo\n\n   #foo'
+    const result = await renderReadmeHtml(markdown, 'test-pkg')
+
+    expect(result.toc).toHaveLength(3)
+    expect(result.toc[0]).toMatchObject({ text: 'foo', depth: 3 })
+    expect(result.toc[1]).toMatchObject({ text: 'foo', depth: 2 })
+    expect(result.toc[2]).toMatchObject({ text: 'foo', depth: 1 })
+  })
+
+  it('works after paragraphs separated by blank lines', async () => {
+    const markdown = 'Foo bar\n\n#baz\n\nBar foo'
+    const result = await renderReadmeHtml(markdown, 'test-pkg')
+
+    expect(result.toc).toHaveLength(1)
+    expect(result.toc[0]).toMatchObject({ text: 'baz', depth: 1 })
+  })
+})
+
 describe('HTML output', () => {
   it('returns sanitized html', async () => {
     const markdown = `# Title\n\nSome **bold** text and a [link](https://example.com).`