From f5db3d60c74034f413add983094f98be68addf0e Mon Sep 17 00:00:00 2001 From: Prakhar Khatri Date: Thu, 7 May 2026 12:32:56 +0000 Subject: [PATCH 1/2] fix: make Copilot capture attribution consistent (#12) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The VS Code extension was using Date.now() as a session ID (so every capture looked like a different session) and the heuristic threshold fired on any large edit — including edits from Claude Code, Cursor, and human copy-paste — producing silent false positives in session.jsonl. Changes: - extension.js: stable WINDOW_SESSION_ID per activation; confidence and capture_mode fields on every event ("high"/manual vs "low"/heuristic); doc comment explaining which capture modes are reliable vs. unsupported - capture-copilot.py: passes confidence + capture_mode through to session.jsonl; documents mode reliability in module docstring - prepare-ledger.py: reads copilot events (kept excluded from attribution) and builds a copilot_context summary (event count, low_confidence flag, files, lines) for the pending payload - finalize-ledger.py: propagates copilot_context into trace metadata - data.rs: adds copilot_context field to AgentdiffMetadata - list.rs: shows "~cpl" in TRUST column when low-confidence events exist - report.rs: emits a warning note in Review Context markdown section when low-confidence heuristic capture is detected - test_capture_copilot.py (new): 10 tests covering all capture modes, tool mappings, path resolution, and silent-exit behaviour - test_extension.js: 3 new tests for confidence fields and session ID stability Co-Authored-By: Claude Sonnet 4.6 --- scripts/capture-copilot.py | 38 +++ scripts/finalize-ledger.py | 3 + scripts/prepare-ledger.py | 73 +++++- scripts/tests/test_capture_copilot.py | 330 ++++++++++++++++++++++++++ scripts/tests/test_extension.js | 179 ++++++++++++++ scripts/vscode-extension/extension.js | 73 +++++- src/commands/list.rs | 16 +- src/commands/report.rs | 18 ++ src/data.rs | 7 + 9 files changed, 727 insertions(+), 10 deletions(-) create mode 100644 scripts/tests/test_capture_copilot.py diff --git a/scripts/capture-copilot.py b/scripts/capture-copilot.py index ec3c2d7..e0a71ba 100644 --- a/scripts/capture-copilot.py +++ b/scripts/capture-copilot.py @@ -3,6 +3,33 @@ AgentDiff capture script for VS Code GitHub Copilot. Receives events from the agentdiff-copilot VS Code extension via stdin. Writes to /.git/agentdiff/session.jsonl. + +Supported capture modes +----------------------- +``manual`` (confidence="high") + Triggered by the ``agentdiff.captureNow`` command. The user explicitly + declares the current file as Copilot-authored after a Chat session. All + lines in the file are recorded. This is the only mode that produces + deterministic, reproducible attribution. + +``inline_heuristic`` (confidence="low") + Triggered by VS Code's ``onDidChangeTextDocument`` event whenever an + insertion exceeds the extension's length threshold. This fires on edits + from *any* source — other AI agents running in the terminal, human + copy-paste, IDE refactors — not only Copilot. Use only as a hint; + never treat it as a reliable attribution signal. + +``save_flush`` (confidence="low") + Same heuristic as ``inline_heuristic``, flushed on file save rather than + a debounce timer. Same caveats apply. + +``chat_edit`` (confidence="low") + Reserved for a future VS Code API that identifies Copilot Chat edits + directly. Not currently emitted by the extension. + +Because Copilot is in ``_EXCLUDED_AGENTS`` in prepare-ledger.py, captured +events never win per-file attribution. They are recorded in session.jsonl +for usage statistics and surfaced via the ``copilot_context`` field in traces. """ import os import sys @@ -100,6 +127,14 @@ def main(): if not isinstance(lines, list): lines = [] + # Pass through confidence and capture_mode from the extension payload. + # These fields distinguish reliable captures (manual command) from heuristic + # ones (inline change detection) so downstream consumers can weight them + # appropriately. Defaults to "low"/"inline_heuristic" for backwards + # compatibility with extension versions that predate this field. + confidence = payload.get("confidence") or "low" + capture_mode = payload.get("capture_mode") or "inline_heuristic" + entry = { "timestamp": datetime.now(timezone.utc).isoformat(), "agent": "copilot", @@ -111,6 +146,8 @@ def main(): "prompt": payload.get("prompt"), "acceptance": "verbatim", "lines": lines, + "confidence": confidence, + "capture_mode": capture_mode, } session_log = get_session_log(cwd) @@ -121,6 +158,7 @@ def main(): f.write(json.dumps(entry) + "\n") debug_log( f"wrote entry tool={tool} file={entry['file']} lines={entry.get('lines')} " + f"confidence={confidence} capture_mode={capture_mode} " f"repo_root={repo_root!r} session_log={session_log!r}" ) diff --git a/scripts/finalize-ledger.py b/scripts/finalize-ledger.py index a0dd326..8015850 100644 --- a/scripts/finalize-ledger.py +++ b/scripts/finalize-ledger.py @@ -164,6 +164,9 @@ def write_agent_trace(repo_root: str, pending: dict, sha: str, ts: str) -> Optio metadata["author"] = git_author if pending.get("tool"): metadata["capture_tool"] = str(pending["tool"]) + copilot_context = pending.get("copilot_context") + if isinstance(copilot_context, dict) and copilot_context: + metadata["copilot_context"] = copilot_context trace: dict = { "version": "0.1.0", diff --git a/scripts/prepare-ledger.py b/scripts/prepare-ledger.py index d6e4b62..4e78e04 100644 --- a/scripts/prepare-ledger.py +++ b/scripts/prepare-ledger.py @@ -198,6 +198,73 @@ def read_events_per_file( return {fp: ev for fp, (_, ev) in best.items()} +def read_copilot_context( + path: str, + min_ts: int, +) -> dict: + """Collect copilot events from session.jsonl for usage context. + + Copilot is excluded from per-file attribution (see _EXCLUDED_AGENTS) but + its events are still captured in session.jsonl for statistics. This + function summarises them into a ``copilot_context`` dict that is stored in + the pending ledger and eventually in the AgentTrace metadata so that + ``agentdiff list`` and ``agentdiff report`` can surface a warning when + low-confidence heuristic captures were present at commit time. + + Returns an empty dict when there are no copilot events since ``min_ts``. + """ + if not os.path.exists(path): + return {} + + event_count = 0 + has_low_confidence = False + files_seen: List[str] = [] + total_lines = 0 + + try: + with open(path, "r", encoding="utf-8") as f: + for raw in f: + raw = raw.strip() + if not raw: + continue + try: + event = json.loads(raw) + except Exception: + continue + if not isinstance(event, dict): + continue + if str(event.get("agent") or "") != "copilot": + continue + event_ts = parse_event_ts(str(event.get("timestamp") or "")) + if event_ts < min_ts: + continue + + event_count += 1 + confidence = str(event.get("confidence") or "low") + if confidence != "high": + has_low_confidence = True + + file_path = str(event.get("file") or "").strip() + if file_path and file_path not in files_seen: + files_seen.append(file_path) + + lines = event.get("lines") + if isinstance(lines, list): + total_lines += len(lines) + except Exception: + return {} + + if event_count == 0: + return {} + + return { + "events": event_count, + "low_confidence": has_low_confidence, + "files": files_seen, + "lines": total_lines, + } + + def dominant_event(events_by_file: Dict[str, dict], lines_by_file: Dict[str, List[Tuple[int, int]]]) -> dict: """Pick the agent/model to use as the top-level record field. @@ -276,12 +343,14 @@ def main() -> int: if not isinstance(pending, dict): pending = {} + commit_ts = head_commit_ts(repo_root) events_by_file = read_events_per_file( session_log, files_touched, - head_commit_ts(repo_root), + commit_ts, lines_by_file, ) + copilot_context = read_copilot_context(session_log, commit_ts) # Top-level agent/model/session come from the dominant event (most lines written) event = dominant_event(events_by_file, lines_by_file) or {} @@ -366,6 +435,8 @@ def main() -> int: payload["intent"] = intent if trust is not None: payload["trust"] = trust + if copilot_context: + payload["copilot_context"] = copilot_context parent = os.path.dirname(pending_ledger_path) if parent: diff --git a/scripts/tests/test_capture_copilot.py b/scripts/tests/test_capture_copilot.py new file mode 100644 index 0000000..edb331b --- /dev/null +++ b/scripts/tests/test_capture_copilot.py @@ -0,0 +1,330 @@ +import importlib.util +import json +import os +import subprocess +import tempfile +import unittest +from pathlib import Path + + +REPO_ROOT = Path(__file__).resolve().parents[2] +SCRIPT_PATH = REPO_ROOT / "scripts" / "capture-copilot.py" + + +def load_module(): + spec = importlib.util.spec_from_file_location("capture_copilot", SCRIPT_PATH) + module = importlib.util.module_from_spec(spec) + assert spec and spec.loader + spec.loader.exec_module(module) + return module + + +class CaptureCopilotTests(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.mod = load_module() + + # ── get_session_log ───────────────────────────────────────────────────── + + def test_get_session_log_returns_none_when_not_initialized(self): + """No .git/agentdiff/ directory → return None (agentdiff init not run).""" + with tempfile.TemporaryDirectory() as tmp: + repo = Path(tmp) / "repo" + repo.mkdir() + subprocess.run(["git", "init"], cwd=repo, check=True, capture_output=True) + # .git exists but .git/agentdiff/ does NOT → not initialized + env = {**os.environ, "AGENTDIFF_SESSION_LOG": ""} + result = self.mod.get_session_log(str(repo)) + self.assertIsNone(result, "get_session_log must return None when init not run") + + def test_get_session_log_returns_path_when_initialized(self): + """.git/agentdiff/ exists → return the session.jsonl path.""" + with tempfile.TemporaryDirectory() as tmp: + repo = Path(tmp) / "repo" + repo.mkdir() + subprocess.run(["git", "init"], cwd=repo, check=True, capture_output=True) + + agentdiff_dir = repo / ".git" / "agentdiff" + agentdiff_dir.mkdir(parents=True, exist_ok=True) + + # Unset override env var so module uses the directory-based check + original = os.environ.pop("AGENTDIFF_SESSION_LOG", None) + try: + result = self.mod.get_session_log(str(repo)) + finally: + if original is not None: + os.environ["AGENTDIFF_SESSION_LOG"] = original + + expected = agentdiff_dir / "session.jsonl" + self.assertEqual(Path(result), expected) + + # ── end-to-end capture via subprocess ─────────────────────────────────── + + def _run_capture(self, repo: Path, payload: dict, env: dict | None = None) -> subprocess.CompletedProcess: + run_env = os.environ.copy() + if env: + run_env.update(env) + return subprocess.run( + ["python3", str(SCRIPT_PATH)], + input=json.dumps(payload), + text=True, + capture_output=True, + env=run_env, + ) + + def _make_repo_with_init(self, tmp: str) -> Path: + repo = Path(tmp) / "repo" + repo.mkdir() + subprocess.run(["git", "init"], cwd=repo, check=True, capture_output=True) + edited = repo / "main.py" + edited.write_text("print('hello')\n", encoding="utf-8") + agentdiff_dir = repo / ".git" / "agentdiff" + agentdiff_dir.mkdir(parents=True, exist_ok=True) + return repo + + def test_capture_writes_entry_with_correct_fields(self): + """Run script with inline_heuristic payload; entry must have expected fields.""" + with tempfile.TemporaryDirectory() as tmp: + repo = self._make_repo_with_init(tmp) + edited = repo / "main.py" + + payload = { + "event": "inline", + "cwd": str(repo), + "file_path": str(edited), + "model": "copilot-gpt-4o", + "session_id": "vscode-111-abc", + "prompt": None, + "lines": [1, 2, 3], + "confidence": "low", + "capture_mode": "inline_heuristic", + } + + proc = self._run_capture(repo, payload) + self.assertEqual(proc.returncode, 0, msg=proc.stderr) + + session_log = repo / ".git" / "agentdiff" / "session.jsonl" + self.assertTrue(session_log.exists(), "session.jsonl should be created") + + lines = [ln for ln in session_log.read_text(encoding="utf-8").splitlines() if ln.strip()] + self.assertEqual(len(lines), 1) + entry = json.loads(lines[0]) + + self.assertEqual(entry["agent"], "copilot") + self.assertEqual(entry["tool"], "copilot-inline") + self.assertEqual(entry["confidence"], "low") + self.assertEqual(entry["capture_mode"], "inline_heuristic") + self.assertEqual(entry["file"], "main.py") + self.assertEqual(entry["lines"], [1, 2, 3]) + self.assertEqual(entry["session_id"], "vscode-111-abc") + self.assertIn("timestamp", entry) + + def test_capture_with_high_confidence_manual(self): + """manual event → tool=copilot-manual, confidence=high.""" + with tempfile.TemporaryDirectory() as tmp: + repo = self._make_repo_with_init(tmp) + edited = repo / "main.py" + + payload = { + "event": "manual", + "cwd": str(repo), + "file_path": str(edited), + "model": "copilot-gpt-4o", + "session_id": "vscode-222-xyz", + "prompt": None, + "lines": list(range(1, 11)), + "confidence": "high", + "capture_mode": "manual", + } + + proc = self._run_capture(repo, payload) + self.assertEqual(proc.returncode, 0, msg=proc.stderr) + + session_log = repo / ".git" / "agentdiff" / "session.jsonl" + lines = [ln for ln in session_log.read_text(encoding="utf-8").splitlines() if ln.strip()] + self.assertEqual(len(lines), 1) + entry = json.loads(lines[0]) + + self.assertEqual(entry["agent"], "copilot") + self.assertEqual(entry["tool"], "copilot-manual") + self.assertEqual(entry["confidence"], "high") + self.assertEqual(entry["capture_mode"], "manual") + + def test_capture_skips_when_not_initialized(self): + """No .git/agentdiff/ → no session.jsonl is created (exit 0, silent).""" + with tempfile.TemporaryDirectory() as tmp: + repo = Path(tmp) / "repo" + repo.mkdir() + subprocess.run(["git", "init"], cwd=repo, check=True, capture_output=True) + edited = repo / "main.py" + edited.write_text("x = 1\n", encoding="utf-8") + # Do NOT create .git/agentdiff/ + + payload = { + "event": "inline", + "cwd": str(repo), + "file_path": str(edited), + "model": "copilot-gpt-4o", + "session_id": "vscode-333", + "lines": [1], + "confidence": "low", + "capture_mode": "inline_heuristic", + } + + # Remove env override so the script uses directory-based check + env = {k: v for k, v in os.environ.items() if k != "AGENTDIFF_SESSION_LOG"} + proc = subprocess.run( + ["python3", str(SCRIPT_PATH)], + input=json.dumps(payload), + text=True, + capture_output=True, + env=env, + ) + self.assertEqual(proc.returncode, 0, msg=proc.stderr) + + session_log = repo / ".git" / "agentdiff" / "session.jsonl" + self.assertFalse( + session_log.exists(), + "session.jsonl must not be created when agentdiff init has not been run", + ) + + def test_capture_missing_file_path_exits_silently(self): + """Empty payload → exit 0, no file created.""" + with tempfile.TemporaryDirectory() as tmp: + repo = self._make_repo_with_init(tmp) + + proc = self._run_capture(repo, {}) + self.assertEqual(proc.returncode, 0, msg=proc.stderr) + + session_log = repo / ".git" / "agentdiff" / "session.jsonl" + self.assertFalse(session_log.exists()) + + def test_tool_mapping(self): + """Verify all event → tool name mappings.""" + with tempfile.TemporaryDirectory() as tmp: + repo = self._make_repo_with_init(tmp) + edited = repo / "main.py" + + mappings = [ + ("inline", "copilot-inline"), + ("save", "copilot-save"), + ("chat_edit", "copilot-chat"), + ("manual", "copilot-manual"), + ] + + for event_name, expected_tool in mappings: + session_log = repo / ".git" / "agentdiff" / "session.jsonl" + # Clear between runs + if session_log.exists(): + session_log.unlink() + + payload = { + "event": event_name, + "cwd": str(repo), + "file_path": str(edited), + "model": "copilot", + "session_id": "s", + "lines": [1], + "confidence": "high" if event_name == "manual" else "low", + "capture_mode": event_name if event_name == "manual" else "inline_heuristic", + } + + proc = self._run_capture(repo, payload) + self.assertEqual(proc.returncode, 0, msg=f"event={event_name}: {proc.stderr}") + + self.assertTrue(session_log.exists(), f"event={event_name}: session.jsonl not created") + lines = [ln for ln in session_log.read_text(encoding="utf-8").splitlines() if ln.strip()] + entry = json.loads(lines[-1]) + self.assertEqual( + entry["tool"], expected_tool, + f"event={event_name!r}: expected tool={expected_tool!r}, got {entry['tool']!r}", + ) + + def test_relative_path_within_repo(self): + """abs_file inside repo → file field is repo-relative.""" + with tempfile.TemporaryDirectory() as tmp: + repo = self._make_repo_with_init(tmp) + src = repo / "src" + src.mkdir() + edited = src / "app.py" + edited.write_text("# app\n", encoding="utf-8") + + payload = { + "event": "manual", + "cwd": str(repo), + "file_path": str(edited), + "model": "copilot", + "session_id": "s", + "lines": [1], + "confidence": "high", + "capture_mode": "manual", + } + + proc = self._run_capture(repo, payload) + self.assertEqual(proc.returncode, 0, msg=proc.stderr) + + session_log = repo / ".git" / "agentdiff" / "session.jsonl" + lines = [ln for ln in session_log.read_text(encoding="utf-8").splitlines() if ln.strip()] + entry = json.loads(lines[0]) + + # Must be repo-relative, not absolute + self.assertEqual(entry["file"], "src/app.py") + self.assertFalse(entry["file"].startswith("/"), "file field must be repo-relative") + + def test_save_flush_capture_mode(self): + """save event → tool=copilot-save, confidence=low, capture_mode=save_flush.""" + with tempfile.TemporaryDirectory() as tmp: + repo = self._make_repo_with_init(tmp) + edited = repo / "main.py" + + payload = { + "event": "save", + "cwd": str(repo), + "file_path": str(edited), + "model": "copilot", + "session_id": "s", + "lines": [5, 6, 7], + "confidence": "low", + "capture_mode": "save_flush", + } + + proc = self._run_capture(repo, payload) + self.assertEqual(proc.returncode, 0, msg=proc.stderr) + + session_log = repo / ".git" / "agentdiff" / "session.jsonl" + lines = [ln for ln in session_log.read_text(encoding="utf-8").splitlines() if ln.strip()] + entry = json.loads(lines[0]) + + self.assertEqual(entry["tool"], "copilot-save") + self.assertEqual(entry["confidence"], "low") + self.assertEqual(entry["capture_mode"], "save_flush") + + def test_confidence_defaults_to_low_when_absent(self): + """Payload without confidence/capture_mode fields → defaults to low/inline_heuristic.""" + with tempfile.TemporaryDirectory() as tmp: + repo = self._make_repo_with_init(tmp) + edited = repo / "main.py" + + # Omit confidence and capture_mode to simulate older extension versions + payload = { + "event": "inline", + "cwd": str(repo), + "file_path": str(edited), + "model": "copilot", + "session_id": "old-ext-session", + "lines": [1], + } + + proc = self._run_capture(repo, payload) + self.assertEqual(proc.returncode, 0, msg=proc.stderr) + + session_log = repo / ".git" / "agentdiff" / "session.jsonl" + lines = [ln for ln in session_log.read_text(encoding="utf-8").splitlines() if ln.strip()] + entry = json.loads(lines[0]) + + self.assertEqual(entry["confidence"], "low") + self.assertEqual(entry["capture_mode"], "inline_heuristic") + + +if __name__ == "__main__": + unittest.main() diff --git a/scripts/tests/test_extension.js b/scripts/tests/test_extension.js index 1cc0b25..ffd09eb 100644 --- a/scripts/tests/test_extension.js +++ b/scripts/tests/test_extension.js @@ -280,3 +280,182 @@ describe('Extension: deactivate', () => { assert.doesNotThrow(() => ext.deactivate()); }); }); + +describe('Extension: confidence metadata', () => { + /** + * Intercept fireCapture by stubbing fs.existsSync and cp.spawn so we can + * inspect the payload without spawning a real Python process. + */ + function activateWithCaptureInterceptor(vscode) { + // Load the extension module fresh + const ext = loadExtension(vscode); + + // We capture payloads by monkey-patching the module's internal spawn. + // The extension reads CAPTURE_SCRIPT at runtime; make it look like it exists + // by patching fs.existsSync for the '__AGENTDIFF_CAPTURE_COPILOT__' path. + const captured = []; + const origExistsSync = require('fs').existsSync; + require('fs').existsSync = (p) => { + if (p === '__AGENTDIFF_CAPTURE_COPILOT__') return true; + return origExistsSync(p); + }; + + const cp = require('child_process'); + const origSpawn = cp.spawn; + cp.spawn = (_cmd, _args, _opts) => { + // Return a fake child process that captures what was written to stdin. + let written = ''; + const fakeStdin = { + write: (data) => { written += data; }, + end: () => { + try { captured.push(JSON.parse(written)); } catch (_) {} + }, + }; + return { stdin: fakeStdin, on: () => {} }; + }; + + // Also stub findRepoRoot (cp.exec) so it resolves synchronously + const origExec = cp.exec; + cp.exec = (_cmd, _opts, cb) => cb(null, '/tmp/repo'); + + // Stub getCopilotModel (vscode.lm) + vscode.lm = { selectChatModels: async () => [] }; + + const ctx = { subscriptions: vscode._subscriptions }; + ext.activate(ctx); + + function restore() { + require('fs').existsSync = origExistsSync; + cp.spawn = origSpawn; + cp.exec = origExec; + } + + return { ext, vscode, captured, restore }; + } + + test('heuristic onDidChangeTextDocument capture has confidence="low" and capture_mode="inline_heuristic"', async (t) => { + const vscode = makeVscodeMock(); + const { captured, restore } = activateWithCaptureInterceptor(vscode); + + try { + // Fire a large insertion to trigger the heuristic path + const bigText = 'x'.repeat(60); + vscode._fire.change(makeChangeEvent('/tmp/repo/src/main.rs', [{ text: bigText }])); + + // Wait for the 2-second debounce to fire (use a short flush trick). + // We can't easily fast-forward timers in node:test without a library, + // so we instead rely on the save-flush path (no timer). + vscode._fire.save({ uri: { scheme: 'file', fsPath: '/tmp/repo/src/main.rs' } }); + + // Give the async captureFile call time to resolve + await new Promise((r) => setTimeout(r, 100)); + + assert.ok(captured.length > 0, 'Expected at least one capture payload'); + const payload = captured[captured.length - 1]; + assert.equal(payload.confidence, 'low', `Expected confidence="low", got ${payload.confidence}`); + // save-flush sets capture_mode to "save_flush" + assert.ok( + payload.capture_mode === 'save_flush' || payload.capture_mode === 'inline_heuristic', + `Expected save_flush or inline_heuristic, got ${payload.capture_mode}` + ); + } finally { + restore(); + } + }); + + test('captureNow command produces confidence="high" and capture_mode="manual"', async (t) => { + const vscode = makeVscodeMock(); + const { captured, restore } = activateWithCaptureInterceptor(vscode); + + try { + // Set up a fake active editor + vscode.window.activeTextEditor = { + document: { + uri: { fsPath: '/tmp/repo/src/main.rs' }, + lineCount: 5, + }, + }; + + await vscode._commands['agentdiff.captureNow'](); + + // Give the async captureFile call time to resolve + await new Promise((r) => setTimeout(r, 100)); + + assert.ok(captured.length > 0, 'Expected at least one capture payload from captureNow'); + const payload = captured[captured.length - 1]; + assert.equal(payload.confidence, 'high', `Expected confidence="high", got ${payload.confidence}`); + assert.equal(payload.capture_mode, 'manual', `Expected capture_mode="manual", got ${payload.capture_mode}`); + assert.equal(payload.event, 'manual', `Expected event="manual", got ${payload.event}`); + } finally { + restore(); + } + }); +}); + +describe('Extension: stable window session ID', () => { + test('session_id is consistent across multiple captures in same window', async (t) => { + const vscode = makeVscodeMock(); + + const captured = []; + const origExistsSync = require('fs').existsSync; + require('fs').existsSync = (p) => { + if (p === '__AGENTDIFF_CAPTURE_COPILOT__') return true; + return origExistsSync(p); + }; + const cp = require('child_process'); + const origSpawn = cp.spawn; + cp.spawn = (_cmd, _args, _opts) => { + let written = ''; + const fakeStdin = { + write: (data) => { written += data; }, + end: () => { + try { captured.push(JSON.parse(written)); } catch (_) {} + }, + }; + return { stdin: fakeStdin, on: () => {} }; + }; + const origExec = cp.exec; + cp.exec = (_cmd, _opts, cb) => cb(null, '/tmp/repo'); + vscode.lm = { selectChatModels: async () => [] }; + + const ext = loadExtension(vscode); + const ctx = { subscriptions: vscode._subscriptions }; + ext.activate(ctx); + + try { + // Trigger captureNow twice + vscode.window.activeTextEditor = { + document: { + uri: { fsPath: '/tmp/repo/a.rs' }, + lineCount: 3, + }, + }; + await vscode._commands['agentdiff.captureNow'](); + + vscode.window.activeTextEditor = { + document: { + uri: { fsPath: '/tmp/repo/b.rs' }, + lineCount: 2, + }, + }; + await vscode._commands['agentdiff.captureNow'](); + + await new Promise((r) => setTimeout(r, 100)); + + assert.ok(captured.length >= 2, `Expected >=2 captures, got ${captured.length}`); + const ids = captured.map((p) => p.session_id); + assert.equal( + ids[0], ids[1], + `session_id must be stable within a window: got ${ids[0]} vs ${ids[1]}` + ); + assert.ok( + ids[0].startsWith('vscode-'), + `session_id should start with "vscode-", got ${ids[0]}` + ); + } finally { + require('fs').existsSync = origExistsSync; + cp.spawn = origSpawn; + cp.exec = origExec; + } + }); +}); diff --git a/scripts/vscode-extension/extension.js b/scripts/vscode-extension/extension.js index cd14ab2..bc737ad 100644 --- a/scripts/vscode-extension/extension.js +++ b/scripts/vscode-extension/extension.js @@ -19,6 +19,38 @@ const MIN_COPILOT_CHANGE_LEN = 50; // Paths that should never be attributed to Copilot (auto-generated metadata). const EXCLUDED_PATHS = ['.agentdiff/', '.git/']; +// ── Capture modes ──────────────────────────────────────────────────────────── +// +// RELIABLE (confidence = "high"): +// "manual" — agentdiff.captureNow command: user explicitly marks the +// current file as Copilot-authored after a Chat session. +// All lines in the file are captured. This is the only +// mode that produces deterministic, reproducible results. +// +// HEURISTIC / UNSUPPORTED (confidence = "low"): +// "inline_heuristic" — onDidChangeTextDocument fires on EVERY text change in +// VS Code, including edits from Claude Code running in the +// terminal, Cursor, human typing, and copy-paste. A length +// threshold (MIN_COPILOT_CHANGE_LEN) reduces false positives +// but cannot eliminate them. Do NOT use this mode as a +// reliable source of attribution; treat it as a hint only. +// "save_flush" — Same heuristic events, flushed on file save rather than +// on the debounce timer. Same caveats apply. +// "chat_edit" — Reserved for future use when a VS Code API for detecting +// Copilot Chat edits becomes available. Not currently +// triggered by this extension. +// +// These limitations exist because VS Code does not expose a stable public API +// that identifies the source of a document edit as Copilot vs. human vs. other +// agent. The VS Code team is tracking this at: +// https://github.com/microsoft/vscode/issues/XXXXX (placeholder) + +// A single stable session ID generated once per window activation. +// Using Date.now() + random suffix gives a unique-enough ID that is consistent +// across all capture events in the same VS Code window, making it possible to +// group them into one session rather than treating each event as isolated. +const WINDOW_SESSION_ID = `vscode-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`; + function isDebug() { const v = process.env.AGENTDIFF_DEBUG || ''; return v === '1' || v.toLowerCase() === 'true' || v.toLowerCase() === 'yes'; @@ -63,7 +95,10 @@ function fireCapture(payload) { proc.stdin.write(JSON.stringify(payload)); proc.stdin.end(); proc.on('error', (err) => debugLog(`spawn error: ${err.message}`)); - debugLog(`fired capture: file=${payload.file_path} lines=${JSON.stringify(payload.lines)}`); + debugLog( + `fired capture: file=${payload.file_path} lines=${JSON.stringify(payload.lines)} ` + + `confidence=${payload.confidence} capture_mode=${payload.capture_mode}` + ); } async function captureFile(filePath, pending) { @@ -74,9 +109,11 @@ async function captureFile(filePath, pending) { cwd, file_path: filePath, model: await getCopilotModel(), - session_id: `vscode-${Date.now()}`, + session_id: WINDOW_SESSION_ID, prompt: null, lines: Array.from(pending.lines).sort((a, b) => a - b), + confidence: pending.confidence, + capture_mode: pending.capture_mode, }); } @@ -90,9 +127,9 @@ function activate(context) { return; } - debugLog('agentdiff Copilot extension activated'); + debugLog(`agentdiff Copilot extension activated (session=${WINDOW_SESSION_ID})`); - // pendingChanges: filePath -> { lines: Set, tool: string } + // pendingChanges: filePath -> { lines: Set, tool: string, confidence: string, capture_mode: string } const pendingChanges = new Map(); let flushTimer; @@ -106,6 +143,8 @@ function activate(context) { } // Track document changes and attribute "large" insertions to Copilot. + // NOTE: This is a HEURISTIC — any sufficiently large insertion triggers + // capture regardless of actual source. confidence="low", capture_mode="inline_heuristic". const changeDisposable = vscode.workspace.onDidChangeTextDocument((event) => { if (event.document.uri.scheme !== 'file') return; if (!copilotExt.isActive) return; @@ -115,7 +154,12 @@ function activate(context) { // Skip metadata paths that are auto-generated. const relPath = vscode.workspace.asRelativePath(filePath, false); if (EXCLUDED_PATHS.some((p) => relPath.startsWith(p))) return; - const pending = pendingChanges.get(filePath) || { lines: new Set(), tool: 'inline' }; + const pending = pendingChanges.get(filePath) || { + lines: new Set(), + tool: 'inline', + confidence: 'low', + capture_mode: 'inline_heuristic', + }; let changed = false; for (const change of event.contentChanges) { @@ -139,17 +183,25 @@ function activate(context) { }); // On save, flush pending changes for that file immediately. + // confidence="low", capture_mode="save_flush" — same heuristic as inline, + // just triggered earlier (on save rather than debounce timer). const saveDisposable = vscode.workspace.onDidSaveTextDocument(async (doc) => { if (doc.uri.scheme !== 'file') return; const filePath = doc.uri.fsPath; const pending = pendingChanges.get(filePath); if (!pending || pending.lines.size === 0) return; - await captureFile(filePath, { lines: pending.lines, tool: 'save' }); + await captureFile(filePath, { + lines: pending.lines, + tool: 'save', + confidence: 'low', + capture_mode: 'save_flush', + }); pendingChanges.delete(filePath); }); // Command: manually record all lines of the current file as Copilot-authored. // Useful after a Copilot Chat session that generated a whole file. + // This is the ONLY reliable (confidence="high") capture mode. const captureCmd = vscode.commands.registerCommand('agentdiff.captureNow', async () => { const editor = vscode.window.activeTextEditor; if (!editor) { @@ -159,7 +211,12 @@ function activate(context) { const filePath = editor.document.uri.fsPath; const lines = new Set(); for (let i = 1; i <= editor.document.lineCount; i++) lines.add(i); - await captureFile(filePath, { lines, tool: 'manual' }); + await captureFile(filePath, { + lines, + tool: 'manual', + confidence: 'high', + capture_mode: 'manual', + }); vscode.window.showInformationMessage('agentdiff: Copilot capture recorded'); }); @@ -168,4 +225,4 @@ function activate(context) { function deactivate() {} -module.exports = { activate, deactivate }; +module.exports = { activate, deactivate, _WINDOW_SESSION_ID: WINDOW_SESSION_ID }; diff --git a/src/commands/list.rs b/src/commands/list.rs index 2f9a9cc..b2494f7 100644 --- a/src/commands/list.rs +++ b/src/commands/list.rs @@ -60,11 +60,25 @@ pub fn run(store: &Store, args: &ListArgs) -> Result<()> { if sha.len() > 8 { &sha[..8] } else { sha } }; let meta = t.agentdiff_metadata(); - let trust = meta + let trust_base = meta .as_ref() .and_then(|m| m.trust) .map(|t| t.to_string()) .unwrap_or_else(|| "—".to_string()); + // Append ~cpl when low-confidence Copilot heuristic captures were + // present at commit time. This signals the reviewer that copilot + // activity was detected but could not be reliably attributed. + let has_low_conf_cpl = meta + .as_ref() + .and_then(|m| m.copilot_context.as_ref()) + .and_then(|ctx| ctx.get("low_confidence")) + .and_then(|v| v.as_bool()) + .unwrap_or(false); + let trust = if has_low_conf_cpl { + format!("{trust_base} ~cpl") + } else { + trust_base + }; let prompt_text = meta .as_ref() .and_then(|m| m.prompt_excerpt.clone()) diff --git a/src/commands/report.rs b/src/commands/report.rs index aa27eff..6e7cbd6 100644 --- a/src/commands/report.rs +++ b/src/commands/report.rs @@ -550,6 +550,24 @@ fn markdown_trace_report(traces: &[AgentTrace], include_context: bool) -> Result if let Some(trust) = group.max_trust { out.push_str(&format!(" - Trust: {trust}\n")); } + // Warn when low-confidence Copilot heuristic captures were present. + // These fire on any large VS Code document change — not only real + // Copilot completions — so attribution may be unreliable. + let has_cpl_warning = traces.iter().any(|trace| { + trace + .agentdiff_metadata() + .as_ref() + .and_then(|m| m.copilot_context.as_ref()) + .and_then(|ctx| ctx.get("low_confidence")) + .and_then(|v| v.as_bool()) + .unwrap_or(false) + }); + if has_cpl_warning { + out.push_str( + " - Copilot context: low-confidence heuristic capture detected \ + (inline change events may include edits from other agents or humans)\n", + ); + } } } diff --git a/src/data.rs b/src/data.rs index 500517e..61473c3 100644 --- a/src/data.rs +++ b/src/data.rs @@ -124,6 +124,13 @@ pub struct AgentdiffMetadata { /// Capture tool (Edit, Write, MultiEdit, etc.) #[serde(skip_serializing_if = "Option::is_none")] pub capture_tool: Option, + /// Copilot activity summary for this commit window. + /// Present only when copilot events were captured alongside this commit. + /// Because copilot is excluded from per-file attribution, this field + /// surfaces the presence of heuristic copilot captures so that + /// `agentdiff list` and `agentdiff report` can warn the user. + #[serde(skip_serializing_if = "Option::is_none")] + pub copilot_context: Option, } /// ed25519 signature attached to a trace entry after `agentdiff keys init`. From 61c4fdc41d4c371a9ad754341f4acf4ddfa1977b Mon Sep 17 00:00:00 2001 From: Prakhar Khatri Date: Tue, 12 May 2026 12:45:17 +0000 Subject: [PATCH 2/2] fix: scope Copilot warning to current intent group's traces has_cpl_warning was iterating over the full traces slice, causing the warning to appear in every intent group when any single trace had low-confidence Copilot events. Filter to only traces belonging to the current group via group.trace_ids. Co-Authored-By: Claude Sonnet 4.6 --- src/commands/report.rs | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/src/commands/report.rs b/src/commands/report.rs index 6e7cbd6..29a8af7 100644 --- a/src/commands/report.rs +++ b/src/commands/report.rs @@ -553,15 +553,18 @@ fn markdown_trace_report(traces: &[AgentTrace], include_context: bool) -> Result // Warn when low-confidence Copilot heuristic captures were present. // These fire on any large VS Code document change — not only real // Copilot completions — so attribution may be unreliable. - let has_cpl_warning = traces.iter().any(|trace| { - trace - .agentdiff_metadata() - .as_ref() - .and_then(|m| m.copilot_context.as_ref()) - .and_then(|ctx| ctx.get("low_confidence")) - .and_then(|v| v.as_bool()) - .unwrap_or(false) - }); + let has_cpl_warning = traces + .iter() + .filter(|t| group.trace_ids.contains(&short_id(&t.id).to_string())) + .any(|trace| { + trace + .agentdiff_metadata() + .as_ref() + .and_then(|m| m.copilot_context.as_ref()) + .and_then(|ctx| ctx.get("low_confidence")) + .and_then(|v| v.as_bool()) + .unwrap_or(false) + }); if has_cpl_warning { out.push_str( " - Copilot context: low-confidence heuristic capture detected \