From e21e33f412198fee1394f1192b90ace5cdd97713 Mon Sep 17 00:00:00 2001 From: Dashsoap <42135402+Dashsoap@users.noreply.github.com> Date: Sun, 14 Jun 2026 21:05:01 +0800 Subject: [PATCH] fix(cli): report absolute index totals on re-index of unchanged files (#874) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `codegraph index` printed "0 nodes, 0 edges" when run after `init` on an unchanged tree, making it look like the index had been wiped — even though the data was fully intact (`status`/`query` still worked, and re-running `init` was unaffected). Root cause: indexAll() reports nodesCreated/edgesCreated as the *net delta* of the run (after - before). On a re-index with no file changes, every file's content hash matches in storeExtractionResult(), so nothing is re-inserted and the delta is legitimately 0 — but the index is still fully populated. Fix: add optional absolute totalNodes/totalEdges to IndexResult (populated from the same getNodeAndEdgeCount() the delta already reads). The CLI now prefers the absolute totals when the delta is 0, printing e.g. "8 nodes, 11 edges (index already up to date)" instead of "0 nodes, 0 edges". Behavior preserved: - first full index (empty DB): delta == totals, unchanged output - --force re-index (clears first): real rebuild counts, unchanged output - changed files: real net delta shown (no 'up to date' suffix) Adds regression tests pinning that a re-index of unchanged files keeps totalNodes/totalEdges populated while the delta is 0, and that data stays queryable. --- __tests__/index-reindex-totals.test.ts | 83 ++++++++++++++++++++++++++ src/bin/codegraph.ts | 19 +++++- src/extraction/index.ts | 11 ++++ src/index.ts | 5 ++ 4 files changed, 117 insertions(+), 1 deletion(-) create mode 100644 __tests__/index-reindex-totals.test.ts diff --git a/__tests__/index-reindex-totals.test.ts b/__tests__/index-reindex-totals.test.ts new file mode 100644 index 000000000..28383355e --- /dev/null +++ b/__tests__/index-reindex-totals.test.ts @@ -0,0 +1,83 @@ +/** + * Regression test for #874 — `codegraph index` reported "0 nodes, 0 edges" + * on a re-index of unchanged files. + * + * `indexAll()` reports `nodesCreated`/`edgesCreated` as the *net delta* of the + * run. When `init` has already populated the index and `index` is re-run with + * no file changes, every file's content hash matches, so no nodes are + * re-inserted and the delta is 0 — even though the index is fully populated. + * + * The fix adds absolute `totalNodes`/`totalEdges` to the result so the CLI can + * show the real index size instead of a misleading 0. These tests pin that + * `indexAll` keeps the index populated across a re-index and surfaces the + * absolute totals. + */ + +import { describe, it, expect, beforeEach, afterEach } from 'vitest'; +import * as fs from 'fs'; +import * as path from 'path'; +import * as os from 'os'; +import CodeGraph from '../src/index'; + +describe('#874 — re-index of unchanged files reports absolute totals', () => { + let testDir: string; + let cg: CodeGraph; + + beforeEach(() => { + testDir = fs.mkdtempSync(path.join(os.tmpdir(), 'codegraph-reindex-totals-')); + fs.writeFileSync( + path.join(testDir, 'app.ts'), + `export function hello(name: string) { return 'hi ' + name; }\n` + + `export class Greeter {\n` + + ` constructor(private prefix: string) {}\n` + + ` greet(name: string) { return this.prefix + hello(name); }\n` + + `}\n` + ); + + cg = CodeGraph.initSync(testDir, { + config: { include: ['**/*.ts'], exclude: [] }, + }); + }); + + afterEach(() => { + if (cg) cg.destroy(); + if (fs.existsSync(testDir)) { + fs.rmSync(testDir, { recursive: true, force: true }); + } + }); + + it('populates totals on the first full index', async () => { + const first = await cg.indexAll(); + + expect(first.success).toBe(true); + expect(first.filesIndexed).toBeGreaterThan(0); + // First index into an empty DB: delta == absolute total. + expect(first.nodesCreated).toBeGreaterThan(0); + expect(first.totalNodes).toBe(first.nodesCreated); + expect(first.totalEdges).toBe(first.edgesCreated); + }); + + it('re-index of unchanged files: delta is 0 but totals stay populated (#874)', async () => { + const first = await cg.indexAll(); + const firstTotalNodes = first.totalNodes; + const firstTotalEdges = first.totalEdges; + + // Re-run with no file changes — every content hash matches, so nothing is + // re-inserted and the net delta is 0. + const second = await cg.indexAll(); + + expect(second.success).toBe(true); + expect(second.filesIndexed).toBeGreaterThan(0); + // The misleading part the user saw: net delta is 0... + expect(second.nodesCreated).toBe(0); + expect(second.edgesCreated).toBe(0); + // ...but the index is NOT empty — absolute totals match the first index. + expect(second.totalNodes).toBe(firstTotalNodes); + expect(second.totalEdges).toBe(firstTotalEdges); + expect(second.totalNodes).toBeGreaterThan(0); + + // And the data really is still queryable. + const results = cg.searchNodes('Greeter'); + expect(results.length).toBeGreaterThan(0); + }); +}); diff --git a/src/bin/codegraph.ts b/src/bin/codegraph.ts index 12facd5aa..ad61ff9ad 100644 --- a/src/bin/codegraph.ts +++ b/src/bin/codegraph.ts @@ -318,6 +318,8 @@ type IndexResult = { filesErrored: number; nodesCreated: number; edgesCreated: number; + totalNodes?: number; + totalEdges?: number; errors: Array<{ message: string; filePath?: string; severity: string; code?: string }>; durationMs: number; }; @@ -351,7 +353,22 @@ function printIndexResult(clack: typeof import('@clack/prompts'), result: IndexR } else { clack.log.success(`Indexed ${formatNumber(result.filesIndexed)} files`); } - clack.log.info(`${formatNumber(result.nodesCreated)} nodes, ${formatNumber(result.edgesCreated)} edges in ${formatDuration(result.durationMs)}`); + // `nodesCreated`/`edgesCreated` are this run's *net delta*. On a re-index of + // unchanged files the delta is 0 even though the index is fully populated, + // which read as a misleading "0 nodes, 0 edges" (#874). Prefer the absolute + // totals when available and the delta is 0, so the summary reflects the real + // index size; otherwise fall back to the delta. + const showTotals = + result.nodesCreated === 0 && + result.edgesCreated === 0 && + result.totalNodes !== undefined && + result.totalEdges !== undefined && + (result.totalNodes > 0 || result.totalEdges > 0); + if (showTotals) { + clack.log.info(`${formatNumber(result.totalNodes!)} nodes, ${formatNumber(result.totalEdges!)} edges in ${formatDuration(result.durationMs)} (index already up to date)`); + } else { + clack.log.info(`${formatNumber(result.nodesCreated)} nodes, ${formatNumber(result.edgesCreated)} edges in ${formatDuration(result.durationMs)}`); + } } else if (hasErrors) { clack.log.error(`Indexing failed ${getGlyphs().dash} all ${formatNumber(result.filesErrored)} files had errors`); } else { diff --git a/src/extraction/index.ts b/src/extraction/index.ts index 643634d66..cfd0ac48d 100644 --- a/src/extraction/index.ts +++ b/src/extraction/index.ts @@ -69,6 +69,17 @@ export interface IndexResult { filesErrored: number; nodesCreated: number; edgesCreated: number; + /** + * Absolute node/edge totals in the index after this operation. Unlike + * `nodesCreated`/`edgesCreated` (which are the *net delta* of this run), + * these reflect the full graph size. On a re-index of unchanged files the + * delta is 0 even though the index is fully populated, so the CLI prefers + * these totals to avoid reporting a misleading "0 nodes, 0 edges" (#874). + * Optional/back-compat: only set by `CodeGraph.indexAll`, not the raw + * orchestrator path. + */ + totalNodes?: number; + totalEdges?: number; errors: ExtractionError[]; durationMs: number; } diff --git a/src/index.ts b/src/index.ts index 35855e8b1..473013684 100644 --- a/src/index.ts +++ b/src/index.ts @@ -400,6 +400,11 @@ export class CodeGraph { const after = this.queries.getNodeAndEdgeCount(); result.nodesCreated = after.nodes - before.nodes; result.edgesCreated = after.edges - before.edges; + // Absolute totals so the CLI can report the real index size even + // when this run's net delta is 0 (re-index of unchanged files). See + // IndexResult.totalNodes / #874. + result.totalNodes = after.nodes; + result.totalEdges = after.edges; } // Stamp the index with the engine that built it, so `codegraph status`