Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 38 additions & 0 deletions scripts/capture-copilot.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,33 @@
AgentDiff capture script for VS Code GitHub Copilot.
Receives events from the agentdiff-copilot VS Code extension via stdin.
Writes to <repo>/.git/agentdiff/session.jsonl.

Supported capture modes
-----------------------
``manual`` (confidence="high")
Triggered by the ``agentdiff.captureNow`` command. The user explicitly
declares the current file as Copilot-authored after a Chat session. All
lines in the file are recorded. This is the only mode that produces
deterministic, reproducible attribution.

``inline_heuristic`` (confidence="low")
Triggered by VS Code's ``onDidChangeTextDocument`` event whenever an
insertion exceeds the extension's length threshold. This fires on edits
from *any* source — other AI agents running in the terminal, human
copy-paste, IDE refactors — not only Copilot. Use only as a hint;
never treat it as a reliable attribution signal.

``save_flush`` (confidence="low")
Same heuristic as ``inline_heuristic``, flushed on file save rather than
a debounce timer. Same caveats apply.

``chat_edit`` (confidence="low")
Reserved for a future VS Code API that identifies Copilot Chat edits
directly. Not currently emitted by the extension.

Because Copilot is in ``_EXCLUDED_AGENTS`` in prepare-ledger.py, captured
events never win per-file attribution. They are recorded in session.jsonl
for usage statistics and surfaced via the ``copilot_context`` field in traces.
"""
import os
import sys
Expand Down Expand Up @@ -100,6 +127,14 @@ def main():
if not isinstance(lines, list):
lines = []

# Pass through confidence and capture_mode from the extension payload.
# These fields distinguish reliable captures (manual command) from heuristic
# ones (inline change detection) so downstream consumers can weight them
# appropriately. Defaults to "low"/"inline_heuristic" for backwards
# compatibility with extension versions that predate this field.
confidence = payload.get("confidence") or "low"
capture_mode = payload.get("capture_mode") or "inline_heuristic"

entry = {
"timestamp": datetime.now(timezone.utc).isoformat(),
"agent": "copilot",
Expand All @@ -111,6 +146,8 @@ def main():
"prompt": payload.get("prompt"),
"acceptance": "verbatim",
"lines": lines,
"confidence": confidence,
"capture_mode": capture_mode,
}

session_log = get_session_log(cwd)
Expand All @@ -121,6 +158,7 @@ def main():
f.write(json.dumps(entry) + "\n")
debug_log(
f"wrote entry tool={tool} file={entry['file']} lines={entry.get('lines')} "
f"confidence={confidence} capture_mode={capture_mode} "
f"repo_root={repo_root!r} session_log={session_log!r}"
)

Expand Down
3 changes: 3 additions & 0 deletions scripts/finalize-ledger.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,9 @@ def write_agent_trace(repo_root: str, pending: dict, sha: str, ts: str) -> Optio
metadata["author"] = git_author
if pending.get("tool"):
metadata["capture_tool"] = str(pending["tool"])
copilot_context = pending.get("copilot_context")
if isinstance(copilot_context, dict) and copilot_context:
metadata["copilot_context"] = copilot_context

trace: dict = {
"version": "0.1.0",
Expand Down
73 changes: 72 additions & 1 deletion scripts/prepare-ledger.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,73 @@ def read_events_per_file(
return {fp: ev for fp, (_, ev) in best.items()}


def read_copilot_context(
path: str,
min_ts: int,
) -> dict:
"""Collect copilot events from session.jsonl for usage context.

Copilot is excluded from per-file attribution (see _EXCLUDED_AGENTS) but
its events are still captured in session.jsonl for statistics. This
function summarises them into a ``copilot_context`` dict that is stored in
the pending ledger and eventually in the AgentTrace metadata so that
``agentdiff list`` and ``agentdiff report`` can surface a warning when
low-confidence heuristic captures were present at commit time.

Returns an empty dict when there are no copilot events since ``min_ts``.
"""
if not os.path.exists(path):
return {}

event_count = 0
has_low_confidence = False
files_seen: List[str] = []
total_lines = 0

try:
with open(path, "r", encoding="utf-8") as f:
for raw in f:
raw = raw.strip()
if not raw:
continue
try:
event = json.loads(raw)
except Exception:
continue
if not isinstance(event, dict):
continue
if str(event.get("agent") or "") != "copilot":
continue
event_ts = parse_event_ts(str(event.get("timestamp") or ""))
if event_ts < min_ts:
continue

event_count += 1
confidence = str(event.get("confidence") or "low")
if confidence != "high":
has_low_confidence = True

file_path = str(event.get("file") or "").strip()
if file_path and file_path not in files_seen:
files_seen.append(file_path)

lines = event.get("lines")
if isinstance(lines, list):
total_lines += len(lines)
except Exception:
return {}

if event_count == 0:
return {}

return {
"events": event_count,
"low_confidence": has_low_confidence,
"files": files_seen,
"lines": total_lines,
}


def dominant_event(events_by_file: Dict[str, dict], lines_by_file: Dict[str, List[Tuple[int, int]]]) -> dict:
"""Pick the agent/model to use as the top-level record field.

Expand Down Expand Up @@ -276,12 +343,14 @@ def main() -> int:
if not isinstance(pending, dict):
pending = {}

commit_ts = head_commit_ts(repo_root)
events_by_file = read_events_per_file(
session_log,
files_touched,
head_commit_ts(repo_root),
commit_ts,
lines_by_file,
)
copilot_context = read_copilot_context(session_log, commit_ts)

# Top-level agent/model/session come from the dominant event (most lines written)
event = dominant_event(events_by_file, lines_by_file) or {}
Expand Down Expand Up @@ -366,6 +435,8 @@ def main() -> int:
payload["intent"] = intent
if trust is not None:
payload["trust"] = trust
if copilot_context:
payload["copilot_context"] = copilot_context

parent = os.path.dirname(pending_ledger_path)
if parent:
Expand Down
Loading
Loading