diff --git a/__tests__/index-reindex-totals.test.ts b/__tests__/index-reindex-totals.test.ts new file mode 100644 index 000000000..28383355e --- /dev/null +++ b/__tests__/index-reindex-totals.test.ts @@ -0,0 +1,83 @@ +/** + * Regression test for #874 — `codegraph index` reported "0 nodes, 0 edges" + * on a re-index of unchanged files. + * + * `indexAll()` reports `nodesCreated`/`edgesCreated` as the *net delta* of the + * run. When `init` has already populated the index and `index` is re-run with + * no file changes, every file's content hash matches, so no nodes are + * re-inserted and the delta is 0 — even though the index is fully populated. + * + * The fix adds absolute `totalNodes`/`totalEdges` to the result so the CLI can + * show the real index size instead of a misleading 0. These tests pin that + * `indexAll` keeps the index populated across a re-index and surfaces the + * absolute totals. + */ + +import { describe, it, expect, beforeEach, afterEach } from 'vitest'; +import * as fs from 'fs'; +import * as path from 'path'; +import * as os from 'os'; +import CodeGraph from '../src/index'; + +describe('#874 — re-index of unchanged files reports absolute totals', () => { + let testDir: string; + let cg: CodeGraph; + + beforeEach(() => { + testDir = fs.mkdtempSync(path.join(os.tmpdir(), 'codegraph-reindex-totals-')); + fs.writeFileSync( + path.join(testDir, 'app.ts'), + `export function hello(name: string) { return 'hi ' + name; }\n` + + `export class Greeter {\n` + + ` constructor(private prefix: string) {}\n` + + ` greet(name: string) { return this.prefix + hello(name); }\n` + + `}\n` + ); + + cg = CodeGraph.initSync(testDir, { + config: { include: ['**/*.ts'], exclude: [] }, + }); + }); + + afterEach(() => { + if (cg) cg.destroy(); + if (fs.existsSync(testDir)) { + fs.rmSync(testDir, { recursive: true, force: true }); + } + }); + + it('populates totals on the first full index', async () => { + const first = await cg.indexAll(); + + expect(first.success).toBe(true); + expect(first.filesIndexed).toBeGreaterThan(0); + // First index into an empty DB: delta == absolute total. + expect(first.nodesCreated).toBeGreaterThan(0); + expect(first.totalNodes).toBe(first.nodesCreated); + expect(first.totalEdges).toBe(first.edgesCreated); + }); + + it('re-index of unchanged files: delta is 0 but totals stay populated (#874)', async () => { + const first = await cg.indexAll(); + const firstTotalNodes = first.totalNodes; + const firstTotalEdges = first.totalEdges; + + // Re-run with no file changes — every content hash matches, so nothing is + // re-inserted and the net delta is 0. + const second = await cg.indexAll(); + + expect(second.success).toBe(true); + expect(second.filesIndexed).toBeGreaterThan(0); + // The misleading part the user saw: net delta is 0... + expect(second.nodesCreated).toBe(0); + expect(second.edgesCreated).toBe(0); + // ...but the index is NOT empty — absolute totals match the first index. + expect(second.totalNodes).toBe(firstTotalNodes); + expect(second.totalEdges).toBe(firstTotalEdges); + expect(second.totalNodes).toBeGreaterThan(0); + + // And the data really is still queryable. + const results = cg.searchNodes('Greeter'); + expect(results.length).toBeGreaterThan(0); + }); +}); diff --git a/src/bin/codegraph.ts b/src/bin/codegraph.ts index 12facd5aa..ad61ff9ad 100644 --- a/src/bin/codegraph.ts +++ b/src/bin/codegraph.ts @@ -318,6 +318,8 @@ type IndexResult = { filesErrored: number; nodesCreated: number; edgesCreated: number; + totalNodes?: number; + totalEdges?: number; errors: Array<{ message: string; filePath?: string; severity: string; code?: string }>; durationMs: number; }; @@ -351,7 +353,22 @@ function printIndexResult(clack: typeof import('@clack/prompts'), result: IndexR } else { clack.log.success(`Indexed ${formatNumber(result.filesIndexed)} files`); } - clack.log.info(`${formatNumber(result.nodesCreated)} nodes, ${formatNumber(result.edgesCreated)} edges in ${formatDuration(result.durationMs)}`); + // `nodesCreated`/`edgesCreated` are this run's *net delta*. On a re-index of + // unchanged files the delta is 0 even though the index is fully populated, + // which read as a misleading "0 nodes, 0 edges" (#874). Prefer the absolute + // totals when available and the delta is 0, so the summary reflects the real + // index size; otherwise fall back to the delta. + const showTotals = + result.nodesCreated === 0 && + result.edgesCreated === 0 && + result.totalNodes !== undefined && + result.totalEdges !== undefined && + (result.totalNodes > 0 || result.totalEdges > 0); + if (showTotals) { + clack.log.info(`${formatNumber(result.totalNodes!)} nodes, ${formatNumber(result.totalEdges!)} edges in ${formatDuration(result.durationMs)} (index already up to date)`); + } else { + clack.log.info(`${formatNumber(result.nodesCreated)} nodes, ${formatNumber(result.edgesCreated)} edges in ${formatDuration(result.durationMs)}`); + } } else if (hasErrors) { clack.log.error(`Indexing failed ${getGlyphs().dash} all ${formatNumber(result.filesErrored)} files had errors`); } else { diff --git a/src/extraction/index.ts b/src/extraction/index.ts index 643634d66..cfd0ac48d 100644 --- a/src/extraction/index.ts +++ b/src/extraction/index.ts @@ -69,6 +69,17 @@ export interface IndexResult { filesErrored: number; nodesCreated: number; edgesCreated: number; + /** + * Absolute node/edge totals in the index after this operation. Unlike + * `nodesCreated`/`edgesCreated` (which are the *net delta* of this run), + * these reflect the full graph size. On a re-index of unchanged files the + * delta is 0 even though the index is fully populated, so the CLI prefers + * these totals to avoid reporting a misleading "0 nodes, 0 edges" (#874). + * Optional/back-compat: only set by `CodeGraph.indexAll`, not the raw + * orchestrator path. + */ + totalNodes?: number; + totalEdges?: number; errors: ExtractionError[]; durationMs: number; } diff --git a/src/index.ts b/src/index.ts index 35855e8b1..473013684 100644 --- a/src/index.ts +++ b/src/index.ts @@ -400,6 +400,11 @@ export class CodeGraph { const after = this.queries.getNodeAndEdgeCount(); result.nodesCreated = after.nodes - before.nodes; result.edgesCreated = after.edges - before.edges; + // Absolute totals so the CLI can report the real index size even + // when this run's net delta is 0 (re-index of unchanged files). See + // IndexResult.totalNodes / #874. + result.totalNodes = after.nodes; + result.totalEdges = after.edges; } // Stamp the index with the engine that built it, so `codegraph status`