Skip to content

Commit cbba528

Browse files
committed
fix(readme): fetch from jsDelivr when packument readme is likely truncated
The npm registry truncates the packument readme field at 65,536 characters. When the readme length exceeds 64,000 characters, fetch the full file from jsDelivr CDN instead. Closes #1458
1 parent f628414 commit cbba528

3 files changed

Lines changed: 105 additions & 112 deletions

File tree

server/utils/readme-loaders.ts

Lines changed: 32 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,10 @@
11
import * as v from 'valibot'
22
import { PackageRouteParamsSchema } from '#shared/schemas/package'
3-
import { CACHE_MAX_AGE_ONE_HOUR, NPM_MISSING_README_SENTINEL } from '#shared/utils/constants'
3+
import {
4+
CACHE_MAX_AGE_ONE_HOUR,
5+
NPM_MISSING_README_SENTINEL,
6+
NPM_README_TRUNCATION_THRESHOLD,
7+
} from '#shared/utils/constants'
48

59
/** Standard README filenames to try when fetching from jsdelivr (case-sensitive CDN) */
610
const standardReadmeFilenames = [
@@ -58,34 +62,39 @@ export const resolvePackageReadmeSource = defineCachedFunction(
5862
})
5963

6064
const packageData = await fetchNpmPackage(packageName)
61-
const resolvedVersion = version ?? packageData['dist-tags']?.latest
6265

63-
// Prefer jsDelivr (actual file from npm tarball) because the npm registry
64-
// truncates the packument readme field at 65,536 characters.
65-
let readmeContent = await fetchReadmeFromJsdelivr(
66-
packageName,
67-
standardReadmeFilenames,
68-
resolvedVersion,
69-
)
70-
71-
// Fall back to packument readme if jsDelivr didn't have a standard README.
72-
// This covers packages with non-standard readme filenames (e.g. README.zh-TW.md)
73-
// or packages that don't include a README in the tarball.
74-
if (!readmeContent) {
75-
let packumentReadme: string | undefined
76-
77-
if (version) {
78-
packumentReadme = packageData.versions?.[version]?.readme
79-
} else {
80-
packumentReadme = packageData.readme
66+
let readmeContent: string | undefined
67+
let readmeFilename: string | undefined
68+
69+
if (version) {
70+
const versionData = packageData.versions[version]
71+
if (versionData) {
72+
readmeContent = versionData.readme
73+
readmeFilename = versionData.readmeFilename
8174
}
75+
} else {
76+
readmeContent = packageData.readme
77+
readmeFilename = packageData.readmeFilename
78+
}
8279

83-
if (packumentReadme && packumentReadme !== NPM_MISSING_README_SENTINEL) {
84-
readmeContent = packumentReadme
80+
const hasValidNpmReadme = readmeContent && readmeContent !== NPM_MISSING_README_SENTINEL
81+
82+
const isLikelyTruncated =
83+
hasValidNpmReadme && readmeContent!.length >= NPM_README_TRUNCATION_THRESHOLD
84+
85+
if (!hasValidNpmReadme || !isStandardReadme(readmeFilename) || isLikelyTruncated) {
86+
const resolvedVersion = version ?? packageData['dist-tags']?.latest
87+
const jsdelivrReadme = await fetchReadmeFromJsdelivr(
88+
packageName,
89+
standardReadmeFilenames,
90+
resolvedVersion,
91+
)
92+
if (jsdelivrReadme) {
93+
readmeContent = jsdelivrReadme
8594
}
8695
}
8796

88-
if (!readmeContent) {
97+
if (!readmeContent || readmeContent === NPM_MISSING_README_SENTINEL) {
8998
return {
9099
packageName,
91100
version,

shared/utils/constants.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@ export const ERROR_PACKAGE_REQUIREMENTS_FAILED =
2121
export const ERROR_FILE_LIST_FETCH_FAILED = 'Failed to fetch file list.'
2222
export const ERROR_CALC_INSTALL_SIZE_FAILED = 'Failed to calculate install size.'
2323
export const NPM_MISSING_README_SENTINEL = 'ERROR: No README data found!'
24+
/** The npm registry truncates the packument readme field at 65,536 characters (2^16) */
25+
export const NPM_README_TRUNCATION_THRESHOLD = 64_000
2426
export const ERROR_JSR_FETCH_FAILED = 'Failed to fetch package from JSR registry.'
2527
export const ERROR_NPM_FETCH_FAILED = 'Failed to fetch package from npm registry.'
2628
export const ERROR_PROVENANCE_FETCH_FAILED = 'Failed to fetch provenance.'

test/unit/server/utils/readme-loaders.spec.ts

Lines changed: 71 additions & 89 deletions
Original file line numberDiff line numberDiff line change
@@ -81,14 +81,13 @@ describe('resolvePackageReadmeSource', () => {
8181
parseRepositoryInfoMock.mockReset()
8282
})
8383

84-
it('prefers jsDelivr readme over packument readme (latest)', async () => {
85-
const jsdelivrContent = '# Full README from CDN'
84+
it('returns markdown and repoInfo when package has valid npm readme (latest)', async () => {
85+
const markdown = '# Hello'
8686
fetchNpmPackageMock.mockResolvedValue({
87-
'readme': '# Truncated',
88-
'readmeFilename': 'README.md',
89-
'repository': { url: 'https://github.com/u/r' },
90-
'versions': {},
91-
'dist-tags': { latest: '2.0.0' },
87+
readme: markdown,
88+
readmeFilename: 'README.md',
89+
repository: { url: 'https://github.com/u/r' },
90+
versions: {},
9291
})
9392
parseRepositoryInfoMock.mockReturnValue({
9493
provider: 'github',
@@ -97,53 +96,46 @@ describe('resolvePackageReadmeSource', () => {
9796
rawBaseUrl: 'https://raw.githubusercontent.com/u/r/HEAD',
9897
blobBaseUrl: 'https://github.com/u/r/blob/HEAD',
9998
})
100-
const fetchMock = vi.fn().mockResolvedValue({
101-
ok: true,
102-
text: async () => jsdelivrContent,
103-
})
104-
vi.stubGlobal('fetch', fetchMock)
10599

106100
const result = await resolvePackageReadmeSource('some-pkg')
107101

108102
expect(result).toMatchObject({
109103
packageName: 'some-pkg',
110104
version: undefined,
111-
markdown: jsdelivrContent,
105+
markdown,
112106
repoInfo: { provider: 'github', owner: 'u', repo: 'r' },
113107
})
114108
expect(fetchNpmPackageMock).toHaveBeenCalledWith('some-pkg')
115109
})
116110

117-
it('uses resolved latest version for jsDelivr when no version specified', async () => {
111+
it('returns markdown from version when packagePath includes version', async () => {
112+
const markdown = '# Version readme'
118113
fetchNpmPackageMock.mockResolvedValue({
119-
'readme': '# Packument',
120-
'readmeFilename': 'README.md',
121-
'repository': undefined,
122-
'versions': {},
123-
'dist-tags': { latest: '3.1.0' },
114+
readme: 'latest readme',
115+
readmeFilename: 'README.md',
116+
repository: undefined,
117+
versions: {
118+
'1.0.0': { readme: markdown, readmeFilename: 'README.md' },
119+
},
124120
})
125121
parseRepositoryInfoMock.mockReturnValue(undefined)
126-
const fetchMock = vi.fn().mockResolvedValue({
127-
ok: true,
128-
text: async () => '# CDN',
129-
})
130-
vi.stubGlobal('fetch', fetchMock)
131122

132-
await resolvePackageReadmeSource('pkg')
123+
const result = await resolvePackageReadmeSource('some-pkg/v/1.0.0')
133124

134-
expect(fetchMock).toHaveBeenCalledWith(expect.stringContaining('pkg@3.1.0'))
125+
expect(result).toMatchObject({
126+
packageName: 'some-pkg',
127+
version: '1.0.0',
128+
markdown,
129+
})
135130
})
136131

137-
it('returns markdown from specific version jsDelivr when packagePath includes version', async () => {
138-
const jsdelivrContent = '# Version readme from CDN'
132+
it('falls back to jsdelivr when npm readme is missing sentinel', async () => {
133+
const jsdelivrContent = '# From CDN'
139134
fetchNpmPackageMock.mockResolvedValue({
140-
'readme': 'latest readme',
141-
'readmeFilename': 'README.md',
142-
'repository': undefined,
143-
'versions': {
144-
'1.0.0': { readme: 'version readme from packument', readmeFilename: 'README.md' },
145-
},
146-
'dist-tags': { latest: '2.0.0' },
135+
readme: NPM_MISSING_README_SENTINEL,
136+
readmeFilename: 'README.md',
137+
repository: undefined,
138+
versions: {},
147139
})
148140
parseRepositoryInfoMock.mockReturnValue(undefined)
149141
const fetchMock = vi.fn().mockResolvedValue({
@@ -152,69 +144,64 @@ describe('resolvePackageReadmeSource', () => {
152144
})
153145
vi.stubGlobal('fetch', fetchMock)
154146

155-
const result = await resolvePackageReadmeSource('some-pkg/v/1.0.0')
147+
const result = await resolvePackageReadmeSource('pkg')
156148

157149
expect(result).toMatchObject({
158-
packageName: 'some-pkg',
159-
version: '1.0.0',
150+
packageName: 'pkg',
160151
markdown: jsdelivrContent,
152+
repoInfo: undefined,
161153
})
162-
expect(fetchMock).toHaveBeenCalledWith(expect.stringContaining('some-pkg@1.0.0'))
154+
expect(fetchMock).toHaveBeenCalled()
163155
})
164156

165-
it('falls back to packument readme when jsDelivr fails', async () => {
166-
const packumentReadme = '# From packument'
157+
it('falls back to jsdelivr when readmeFilename is not standard', async () => {
158+
const jsdelivrContent = '# From CDN'
167159
fetchNpmPackageMock.mockResolvedValue({
168-
'readme': packumentReadme,
169-
'readmeFilename': 'README.md',
170-
'repository': undefined,
171-
'versions': {},
172-
'dist-tags': { latest: '1.0.0' },
160+
readme: 'content',
161+
readmeFilename: 'DOCS.md',
162+
repository: undefined,
163+
versions: {},
173164
})
174165
parseRepositoryInfoMock.mockReturnValue(undefined)
175-
const fetchMock = vi.fn().mockResolvedValue({ ok: false })
166+
const fetchMock = vi.fn().mockResolvedValue({
167+
ok: true,
168+
text: async () => jsdelivrContent,
169+
})
176170
vi.stubGlobal('fetch', fetchMock)
177171

178172
const result = await resolvePackageReadmeSource('pkg')
179173

180-
expect(result).toMatchObject({
181-
packageName: 'pkg',
182-
markdown: packumentReadme,
183-
})
174+
expect(result).toMatchObject({ markdown: jsdelivrContent })
184175
})
185176

186-
it('falls back to version packument readme when jsDelivr fails', async () => {
187-
const versionReadme = '# Version readme'
177+
it('returns undefined markdown when no content and jsdelivr fails', async () => {
188178
fetchNpmPackageMock.mockResolvedValue({
189-
'readme': 'latest readme',
190-
'repository': undefined,
191-
'versions': {
192-
'1.0.0': { readme: versionReadme },
193-
},
194-
'dist-tags': { latest: '1.0.0' },
179+
readme: undefined,
180+
readmeFilename: undefined,
181+
repository: undefined,
182+
versions: {},
195183
})
196184
parseRepositoryInfoMock.mockReturnValue(undefined)
197185
const fetchMock = vi.fn().mockResolvedValue({ ok: false })
198186
vi.stubGlobal('fetch', fetchMock)
199187

200-
const result = await resolvePackageReadmeSource('pkg/v/1.0.0')
188+
const result = await resolvePackageReadmeSource('pkg')
201189

202190
expect(result).toMatchObject({
203191
packageName: 'pkg',
204-
version: '1.0.0',
205-
markdown: versionReadme,
192+
version: undefined,
193+
markdown: undefined,
194+
repoInfo: undefined,
206195
})
207196
})
208197

209-
it('skips packument readme with missing sentinel in fallback', async () => {
198+
it('returns undefined markdown when content is NPM_MISSING_README_SENTINEL and jsdelivr fails', async () => {
210199
fetchNpmPackageMock.mockResolvedValue({
211-
'readme': NPM_MISSING_README_SENTINEL,
212-
'readmeFilename': 'README.md',
213-
'repository': undefined,
214-
'versions': {},
215-
'dist-tags': { latest: '1.0.0' },
200+
readme: NPM_MISSING_README_SENTINEL,
201+
readmeFilename: 'README.md',
202+
repository: undefined,
203+
versions: {},
216204
})
217-
parseRepositoryInfoMock.mockReturnValue(undefined)
218205
const fetchMock = vi.fn().mockResolvedValue({ ok: false })
219206
vi.stubGlobal('fetch', fetchMock)
220207

@@ -227,35 +214,35 @@ describe('resolvePackageReadmeSource', () => {
227214
})
228215
})
229216

230-
it('returns undefined markdown when no content anywhere', async () => {
217+
it('fetches from jsdelivr when packument readme exceeds truncation threshold', async () => {
218+
const truncatedReadme = 'x'.repeat(64_000)
219+
const fullReadme = 'x'.repeat(80_000)
231220
fetchNpmPackageMock.mockResolvedValue({
232-
'readme': undefined,
233-
'readmeFilename': undefined,
221+
'readme': truncatedReadme,
222+
'readmeFilename': 'README.md',
234223
'repository': undefined,
235224
'versions': {},
236225
'dist-tags': { latest: '1.0.0' },
237226
})
238227
parseRepositoryInfoMock.mockReturnValue(undefined)
239-
const fetchMock = vi.fn().mockResolvedValue({ ok: false })
228+
const fetchMock = vi.fn().mockResolvedValue({
229+
ok: true,
230+
text: async () => fullReadme,
231+
})
240232
vi.stubGlobal('fetch', fetchMock)
241233

242234
const result = await resolvePackageReadmeSource('pkg')
243235

244-
expect(result).toMatchObject({
245-
packageName: 'pkg',
246-
version: undefined,
247-
markdown: undefined,
248-
repoInfo: undefined,
249-
})
236+
expect(result).toMatchObject({ markdown: fullReadme })
237+
expect(fetchMock).toHaveBeenCalled()
250238
})
251239

252240
it('uses package repository for repoInfo when markdown is present', async () => {
253241
fetchNpmPackageMock.mockResolvedValue({
254-
'readme': '# Hi',
255-
'readmeFilename': 'README.md',
256-
'repository': { url: 'https://github.com/a/b' },
257-
'versions': {},
258-
'dist-tags': { latest: '1.0.0' },
242+
readme: '# Hi',
243+
readmeFilename: 'README.md',
244+
repository: { url: 'https://github.com/a/b' },
245+
versions: {},
259246
})
260247
const repoInfo = {
261248
provider: 'github' as const,
@@ -265,11 +252,6 @@ describe('resolvePackageReadmeSource', () => {
265252
blobBaseUrl: 'https://github.com/a/b/blob/HEAD',
266253
}
267254
parseRepositoryInfoMock.mockReturnValue(repoInfo)
268-
const fetchMock = vi.fn().mockResolvedValue({
269-
ok: true,
270-
text: async () => '# CDN',
271-
})
272-
vi.stubGlobal('fetch', fetchMock)
273255

274256
const result = await resolvePackageReadmeSource('pkg')
275257

0 commit comments

Comments
 (0)