codeprakhar25 · codeprakhar25 · May 7, 2026 · May 12, 2026
diff --git a/scripts/capture-copilot.py b/scripts/capture-copilot.py
@@ -3,6 +3,33 @@
 AgentDiff capture script for VS Code GitHub Copilot.
 Receives events from the agentdiff-copilot VS Code extension via stdin.
 Writes to <repo>/.git/agentdiff/session.jsonl.
+
+Supported capture modes
+-----------------------
+``manual`` (confidence="high")
+    Triggered by the ``agentdiff.captureNow`` command.  The user explicitly
+    declares the current file as Copilot-authored after a Chat session.  All
+    lines in the file are recorded.  This is the only mode that produces
+    deterministic, reproducible attribution.
+
+``inline_heuristic`` (confidence="low")
+    Triggered by VS Code's ``onDidChangeTextDocument`` event whenever an
+    insertion exceeds the extension's length threshold.  This fires on edits
+    from *any* source — other AI agents running in the terminal, human
+    copy-paste, IDE refactors — not only Copilot.  Use only as a hint;
+    never treat it as a reliable attribution signal.
+
+``save_flush`` (confidence="low")
+    Same heuristic as ``inline_heuristic``, flushed on file save rather than
+    a debounce timer.  Same caveats apply.
+
+``chat_edit`` (confidence="low")
+    Reserved for a future VS Code API that identifies Copilot Chat edits
+    directly.  Not currently emitted by the extension.
+
+Because Copilot is in ``_EXCLUDED_AGENTS`` in prepare-ledger.py, captured
+events never win per-file attribution.  They are recorded in session.jsonl
+for usage statistics and surfaced via the ``copilot_context`` field in traces.
 """
 import os
 import sys
@@ -100,6 +127,14 @@ def main():
     if not isinstance(lines, list):
         lines = []
 
+    # Pass through confidence and capture_mode from the extension payload.
+    # These fields distinguish reliable captures (manual command) from heuristic
+    # ones (inline change detection) so downstream consumers can weight them
+    # appropriately.  Defaults to "low"/"inline_heuristic" for backwards
+    # compatibility with extension versions that predate this field.
+    confidence = payload.get("confidence") or "low"
+    capture_mode = payload.get("capture_mode") or "inline_heuristic"
+
     entry = {
         "timestamp": datetime.now(timezone.utc).isoformat(),
         "agent": "copilot",
@@ -111,6 +146,8 @@ def main():
         "prompt": payload.get("prompt"),
         "acceptance": "verbatim",
         "lines": lines,
+        "confidence": confidence,
+        "capture_mode": capture_mode,
     }
 
     session_log = get_session_log(cwd)
@@ -121,6 +158,7 @@ def main():
         f.write(json.dumps(entry) + "\n")
     debug_log(
         f"wrote entry tool={tool} file={entry['file']} lines={entry.get('lines')} "
+        f"confidence={confidence} capture_mode={capture_mode} "
         f"repo_root={repo_root!r} session_log={session_log!r}"
     )
 

diff --git a/scripts/finalize-ledger.py b/scripts/finalize-ledger.py
@@ -164,6 +164,9 @@ def write_agent_trace(repo_root: str, pending: dict, sha: str, ts: str) -> Optio
         metadata["author"] = git_author
     if pending.get("tool"):
         metadata["capture_tool"] = str(pending["tool"])
+    copilot_context = pending.get("copilot_context")
+    if isinstance(copilot_context, dict) and copilot_context:
+        metadata["copilot_context"] = copilot_context
 
     trace: dict = {
         "version": "0.1.0",

diff --git a/scripts/prepare-ledger.py b/scripts/prepare-ledger.py
@@ -198,6 +198,73 @@ def read_events_per_file(
     return {fp: ev for fp, (_, ev) in best.items()}
 
 
+def read_copilot_context(
+    path: str,
+    min_ts: int,
+) -> dict:
+    """Collect copilot events from session.jsonl for usage context.
+
+    Copilot is excluded from per-file attribution (see _EXCLUDED_AGENTS) but
+    its events are still captured in session.jsonl for statistics.  This
+    function summarises them into a ``copilot_context`` dict that is stored in
+    the pending ledger and eventually in the AgentTrace metadata so that
+    ``agentdiff list`` and ``agentdiff report`` can surface a warning when
+    low-confidence heuristic captures were present at commit time.
+
+    Returns an empty dict when there are no copilot events since ``min_ts``.
+    """
+    if not os.path.exists(path):
+        return {}
+
+    event_count = 0
+    has_low_confidence = False
+    files_seen: List[str] = []
+    total_lines = 0
+
+    try:
+        with open(path, "r", encoding="utf-8") as f:
+            for raw in f:
+                raw = raw.strip()
+                if not raw:
+                    continue
+                try:
+                    event = json.loads(raw)
+                except Exception:
+                    continue
+                if not isinstance(event, dict):
+                    continue
+                if str(event.get("agent") or "") != "copilot":
+                    continue
+                event_ts = parse_event_ts(str(event.get("timestamp") or ""))
+                if event_ts < min_ts:
+                    continue
+
+                event_count += 1
+                confidence = str(event.get("confidence") or "low")
+                if confidence != "high":
+                    has_low_confidence = True
+
+                file_path = str(event.get("file") or "").strip()
+                if file_path and file_path not in files_seen:
+                    files_seen.append(file_path)
+
+                lines = event.get("lines")
+                if isinstance(lines, list):
+                    total_lines += len(lines)
+    except Exception:
+        return {}
+
+    if event_count == 0:
+        return {}
+
+    return {
+        "events": event_count,
+        "low_confidence": has_low_confidence,
+        "files": files_seen,
+        "lines": total_lines,
+    }
+
+
 def dominant_event(events_by_file: Dict[str, dict], lines_by_file: Dict[str, List[Tuple[int, int]]]) -> dict:
     """Pick the agent/model to use as the top-level record field.
 
@@ -276,12 +343,14 @@ def main() -> int:
     if not isinstance(pending, dict):
         pending = {}
 
+    commit_ts = head_commit_ts(repo_root)
     events_by_file = read_events_per_file(
         session_log,
         files_touched,
-        head_commit_ts(repo_root),
+        commit_ts,
         lines_by_file,
     )
+    copilot_context = read_copilot_context(session_log, commit_ts)
 
     # Top-level agent/model/session come from the dominant event (most lines written)
     event = dominant_event(events_by_file, lines_by_file) or {}
@@ -366,6 +435,8 @@ def main() -> int:
         payload["intent"] = intent
     if trust is not None:
         payload["trust"] = trust
+    if copilot_context:
+        payload["copilot_context"] = copilot_context
 
     parent = os.path.dirname(pending_ledger_path)
     if parent: