Skip to content

Commit b7b4a1d

Browse files
committed
feat(llms-txt): add core utility functions
Add discoverAgentFiles, fetchAgentFiles, generateLlmsTxt, and handleLlmsTxt orchestrator for llms.txt generation from npm packages.
1 parent 4ba35ca commit b7b4a1d

File tree

1 file changed

+246
-0
lines changed

1 file changed

+246
-0
lines changed

server/utils/llms-txt.ts

Lines changed: 246 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,246 @@
1+
import type { Packument } from '@npm/types'
2+
import type { JsDelivrFileNode, AgentFile, LlmsTxtResult } from '#shared/types'
3+
import { NPM_MISSING_README_SENTINEL } from '#shared/utils/constants'
4+
5+
/** Well-known agent instruction files at the package root */
6+
const ROOT_AGENT_FILES: Record<string, string> = {
7+
'CLAUDE.md': 'Claude Code',
8+
'AGENTS.md': 'Agent Instructions',
9+
'AGENT.md': 'Agent Instructions',
10+
'.cursorrules': 'Cursor Rules',
11+
'.windsurfrules': 'Windsurf Rules',
12+
'.clinerules': 'Cline Rules',
13+
}
14+
15+
/** Well-known agent files inside specific directories */
16+
const DIRECTORY_AGENT_FILES: Record<string, string> = {
17+
'.github/copilot-instructions.md': 'GitHub Copilot',
18+
}
19+
20+
/** Directories containing rule files (match *.md inside) */
21+
const RULE_DIRECTORIES: Record<string, string> = {
22+
'.cursor/rules': 'Cursor Rules',
23+
'.windsurf/rules': 'Windsurf Rules',
24+
}
25+
26+
/**
27+
* Discover agent instruction file paths from a jsDelivr file tree.
28+
* Scans root-level files, known subdirectory files, and rule directories.
29+
*/
30+
export function discoverAgentFiles(files: JsDelivrFileNode[]): string[] {
31+
const discovered: string[] = []
32+
33+
for (const node of files) {
34+
// Root-level well-known files
35+
if (node.type === 'file' && node.name in ROOT_AGENT_FILES) {
36+
discovered.push(node.name)
37+
}
38+
39+
// Directory-based files
40+
if (node.type === 'directory') {
41+
// .github/copilot-instructions.md
42+
if (node.name === '.github' && node.files) {
43+
for (const child of node.files) {
44+
const fullPath = `.github/${child.name}`
45+
if (child.type === 'file' && fullPath in DIRECTORY_AGENT_FILES) {
46+
discovered.push(fullPath)
47+
}
48+
}
49+
}
50+
51+
// .cursor/rules/*.md and .windsurf/rules/*.md
52+
for (const dirPath of Object.keys(RULE_DIRECTORIES)) {
53+
const [topDir, subDir] = dirPath.split('/')
54+
if (node.name === topDir && node.files) {
55+
const rulesDir = node.files.find(f => f.type === 'directory' && f.name === subDir)
56+
if (rulesDir?.files) {
57+
for (const ruleFile of rulesDir.files) {
58+
if (ruleFile.type === 'file' && ruleFile.name.endsWith('.md')) {
59+
discovered.push(`${dirPath}/${ruleFile.name}`)
60+
}
61+
}
62+
}
63+
}
64+
}
65+
}
66+
}
67+
68+
return discovered
69+
}
70+
71+
/**
72+
* Get the display name for an agent file path.
73+
*/
74+
function getDisplayName(filePath: string): string {
75+
if (filePath in ROOT_AGENT_FILES) return ROOT_AGENT_FILES[filePath]
76+
if (filePath in DIRECTORY_AGENT_FILES) return DIRECTORY_AGENT_FILES[filePath]
77+
78+
for (const [dirPath, displayName] of Object.entries(RULE_DIRECTORIES)) {
79+
if (filePath.startsWith(`${dirPath}/`)) return `${displayName}: ${filePath.split('/').pop()}`
80+
}
81+
82+
return filePath
83+
}
84+
85+
/**
86+
* Fetch agent instruction files from jsDelivr CDN.
87+
* Fetches in parallel, gracefully skipping failures.
88+
*/
89+
export async function fetchAgentFiles(
90+
packageName: string,
91+
version: string,
92+
filePaths: string[],
93+
): Promise<AgentFile[]> {
94+
const results = await Promise.all(
95+
filePaths.map(async (path): Promise<AgentFile | null> => {
96+
try {
97+
const url = `https://cdn.jsdelivr.net/npm/${packageName}@${version}/${path}`
98+
const response = await fetch(url)
99+
if (!response.ok) return null
100+
const content = await response.text()
101+
return { path, content, displayName: getDisplayName(path) }
102+
} catch {
103+
return null
104+
}
105+
}),
106+
)
107+
108+
return results.filter((r): r is AgentFile => r !== null)
109+
}
110+
111+
/**
112+
* Generate llms.txt markdown content per the llmstxt.org spec.
113+
*
114+
* Structure:
115+
* - H1 title with package name and version
116+
* - Blockquote description (if available)
117+
* - Metadata list (homepage, repository, npm)
118+
* - README section
119+
* - Agent Instructions section (one sub-heading per file)
120+
*/
121+
export function generateLlmsTxt(result: LlmsTxtResult): string {
122+
const lines: string[] = []
123+
124+
// Title
125+
lines.push(`# ${result.packageName}@${result.version}`)
126+
lines.push('')
127+
128+
// Description blockquote
129+
if (result.description) {
130+
lines.push(`> ${result.description}`)
131+
lines.push('')
132+
}
133+
134+
// Metadata
135+
const meta: string[] = []
136+
if (result.homepage) meta.push(`- Homepage: ${result.homepage}`)
137+
if (result.repositoryUrl) meta.push(`- Repository: ${result.repositoryUrl}`)
138+
meta.push(`- npm: https://www.npmjs.com/package/${result.packageName}/v/${result.version}`)
139+
lines.push(...meta)
140+
lines.push('')
141+
142+
// README
143+
if (result.readme) {
144+
lines.push('## README')
145+
lines.push('')
146+
lines.push(result.readme)
147+
lines.push('')
148+
}
149+
150+
// Agent instructions
151+
if (result.agentFiles.length > 0) {
152+
lines.push('## Agent Instructions')
153+
lines.push('')
154+
155+
for (const file of result.agentFiles) {
156+
lines.push(`### ${file.displayName} (\`${file.path}\`)`)
157+
lines.push('')
158+
lines.push(file.content)
159+
lines.push('')
160+
}
161+
}
162+
163+
return lines.join('\n').trimEnd() + '\n'
164+
}
165+
166+
/** Standard README filenames to try from jsDelivr CDN */
167+
const README_FILENAMES = ['README.md', 'readme.md', 'Readme.md']
168+
169+
/** Fetch README from jsDelivr CDN as fallback */
170+
async function fetchReadmeFromCdn(packageName: string, version: string): Promise<string | null> {
171+
for (const filename of README_FILENAMES) {
172+
try {
173+
const url = `https://cdn.jsdelivr.net/npm/${packageName}@${version}/${filename}`
174+
const response = await fetch(url)
175+
if (response.ok) return await response.text()
176+
} catch {
177+
// Try next
178+
}
179+
}
180+
return null
181+
}
182+
183+
/** Extract README from packument data */
184+
function getReadmeFromPackument(packageData: Packument, requestedVersion?: string): string | null {
185+
const readme = requestedVersion
186+
? packageData.versions[requestedVersion]?.readme
187+
: packageData.readme
188+
189+
if (readme && readme !== NPM_MISSING_README_SENTINEL) {
190+
return readme
191+
}
192+
return null
193+
}
194+
195+
/** Extract a clean repository URL from packument repository field */
196+
function parseRepoUrl(
197+
repository?: { type?: string; url?: string; directory?: string } | string,
198+
): string | undefined {
199+
if (!repository) return undefined
200+
const url = typeof repository === 'string' ? repository : repository.url
201+
if (!url) return undefined
202+
return url.replace(/^git\+/, '').replace(/\.git$/, '')
203+
}
204+
205+
/**
206+
* Orchestrates fetching all data and generating llms.txt for a package.
207+
* Shared by both versioned and unversioned route handlers.
208+
*/
209+
export async function handleLlmsTxt(
210+
packageName: string,
211+
requestedVersion?: string,
212+
): Promise<string> {
213+
const packageData = await fetchNpmPackage(packageName)
214+
const resolvedVersion = requestedVersion ?? packageData['dist-tags']?.latest
215+
216+
if (!resolvedVersion) {
217+
throw createError({ statusCode: 404, message: 'Could not resolve package version.' })
218+
}
219+
220+
// Extract README from packument (sync)
221+
const readmeFromPackument = getReadmeFromPackument(packageData, requestedVersion)
222+
223+
// Fetch file tree (and README from CDN if packument didn't have one)
224+
const [fileTreeData, cdnReadme] = await Promise.all([
225+
fetchFileTree(packageName, resolvedVersion),
226+
readmeFromPackument ? null : fetchReadmeFromCdn(packageName, resolvedVersion),
227+
])
228+
229+
const readme = readmeFromPackument ?? cdnReadme ?? undefined
230+
231+
// Discover and fetch agent files
232+
const agentFilePaths = discoverAgentFiles(fileTreeData.files)
233+
const agentFiles = await fetchAgentFiles(packageName, resolvedVersion, agentFilePaths)
234+
235+
const result: LlmsTxtResult = {
236+
packageName,
237+
version: resolvedVersion,
238+
description: packageData.description,
239+
homepage: packageData.homepage,
240+
repositoryUrl: parseRepoUrl(packageData.repository),
241+
readme,
242+
agentFiles,
243+
}
244+
245+
return generateLlmsTxt(result)
246+
}

0 commit comments

Comments
 (0)