Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "agentdiff"
version = "0.1.28"
version = "0.1.29"
edition = "2024"
rust-version = "1.85"
description = "Audit and trace autonomous AI code contributions in git repositories"
Expand Down
42 changes: 42 additions & 0 deletions scripts/finalize-ledger.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,43 @@ def remove_if_exists(path: str) -> None:
pass


def remove_consumed_intents(session_path: str) -> None:
"""Strip type=intent events from session.jsonl after they've been committed.

Intent events are one-shot: set_intent is called just before a commit, so
by the time finalize runs they've been consumed. Leaving them causes stale
intent to bleed into the next commit when two commits share the same second.
"""
if not os.path.exists(session_path):
return
kept = []
try:
with open(session_path, "r", encoding="utf-8") as f:
for raw in f:
line = raw.strip()
if not line:
continue
try:
event = json.loads(line)
if isinstance(event, dict) and event.get("type") == "intent":
continue
Comment on lines +82 to +85
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 Prune only consumed intent This removes every type=intent line in session.jsonl, not just the intent that was attached to the commit being finalized. If an agent records the next commit's intent while this post-commit hook is still running, that new event is deleted before the next pre-commit can read it, so the next trace silently loses its intent.

except Exception:
pass
kept.append(raw if raw.endswith("\n") else raw + "\n")
except (OSError, IOError):
return
tmp = session_path + ".tmp"
try:
with open(tmp, "w", encoding="utf-8") as f:
f.writelines(kept)
os.replace(tmp, session_path)
except (OSError, IOError):
try:
os.remove(tmp)
except OSError:
pass


def sha_already_recorded(traces_path: str, sha: str) -> bool:
"""Skip finalize if this commit already has a trace recorded locally."""
if not os.path.exists(traces_path):
Expand Down Expand Up @@ -158,6 +195,8 @@ def write_agent_trace(repo_root: str, pending: dict, sha: str, ts: str) -> Optio
metadata["session_id"] = str(pending["session_id"])
if pending.get("intent"):
metadata["intent"] = str(pending["intent"])
if pending.get("intent_type"):
metadata["intent_type"] = str(pending["intent_type"])
if isinstance(pending.get("files_read"), list) and pending["files_read"]:
metadata["files_read"] = [str(p) for p in pending["files_read"]]
if git_author:
Expand Down Expand Up @@ -228,9 +267,12 @@ def main() -> int:
return 1
ts = ts_res.stdout.strip()

session_path = os.path.join(repo_root, ".git", "agentdiff", "session.jsonl")
result = write_agent_trace(repo_root, pending, sha, ts)
remove_if_exists(pending_ledger_path)
remove_if_exists(pending_context_path)
if result is not None:
remove_consumed_intents(session_path)
return 0 if result is not None else 1


Expand Down
71 changes: 67 additions & 4 deletions scripts/prepare-ledger.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,41 @@ def read_events_per_file(
return {fp: ev for fp, (_, ev) in best.items()}


def read_intent_events(
path: str,
min_ts: int,
) -> List[dict]:
"""Read intent events from session.jsonl (type=intent, written by set_intent MCP tool).

Returns all matching intent events sorted by timestamp (most recent last).
"""
if not os.path.exists(path):
return []
results = []
try:
with open(path, "r", encoding="utf-8") as f:
for raw in f:
line = raw.strip()
if not line:
continue
try:
event = json.loads(line)
except Exception:
continue
if not isinstance(event, dict):
continue
if event.get("type") != "intent":
continue
event_ts = parse_event_ts(str(event.get("timestamp") or ""))
if event_ts < min_ts:
continue
results.append(event)
except Exception:
pass
results.sort(key=lambda e: parse_event_ts(str(e.get("timestamp") or "")))
return results


def dominant_event(events_by_file: Dict[str, dict], lines_by_file: Dict[str, List[Tuple[int, int]]]) -> dict:
"""Pick the agent/model to use as the top-level record field.

Expand Down Expand Up @@ -276,13 +311,17 @@ def main() -> int:
if not isinstance(pending, dict):
pending = {}

min_ts = head_commit_ts(repo_root)
events_by_file = read_events_per_file(
session_log,
files_touched,
head_commit_ts(repo_root),
min_ts,
lines_by_file,
)

# Read agent-stated intent events from session.jsonl (written by set_intent MCP tool)
intent_events = read_intent_events(session_log, min_ts)

# Top-level agent/model/session come from the dominant event (most lines written)
event = dominant_event(events_by_file, lines_by_file) or {}

Expand All @@ -307,9 +346,31 @@ def main() -> int:
flags = []
flags = [str(f) for f in flags]

intent = pending.get("intent")
if intent is not None:
intent = str(intent)
# Intent priority: agent-stated intent event > pending context > fallback
intent = None
intent_type = None
if intent_events:
# Use the most recent intent event (prefer matching session, else latest)
best_intent = None
if session_id and session_id != "unknown":
for ie in reversed(intent_events):
if str(ie.get("session_id") or "") == session_id:
best_intent = ie
break
if not best_intent:
best_intent = intent_events[-1]
Comment on lines +360 to +361
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 Avoid unrelated fallback When the current commit has a known session_id but no intent event matches it, this falls back to the latest intent from any session. A commit attributed to sess-A can therefore receive an intent recorded by sess-B as long as that event is newer than HEAD, which is another cross-session bleed case.

Suggested change
if not best_intent:
best_intent = intent_events[-1]
if not best_intent and (not session_id or session_id == "unknown"):
best_intent = intent_events[-1]

intent = str(best_intent.get("description") or "").strip() or None
intent_type = str(best_intent.get("intent_type") or "").strip() or None

if not intent:
raw = pending.get("intent")
if raw is not None:
intent = str(raw).strip() or None

if not intent_type:
raw = pending.get("intent_type")
if raw is not None:
intent_type = str(raw).strip() or None

# Per-file attribution — each file maps to the agent/model that wrote it.
# Files with a session event that matches the dominant agent are omitted (finalize
Expand Down Expand Up @@ -364,6 +425,8 @@ def main() -> int:
payload["attribution"] = attribution
if intent:
payload["intent"] = intent
if intent_type:
payload["intent_type"] = intent_type
if trust is not None:
payload["trust"] = trust

Expand Down
4 changes: 4 additions & 0 deletions scripts/record-context.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,9 @@ def main() -> int:
parser.add_argument("--prompt", default="")
parser.add_argument("--files-read", default="")
parser.add_argument("--intent", default="")
parser.add_argument("--intent-type", default="",
choices=["bugfix","feature","refactor","test","docs",
"security","performance","config","dependency",""])
parser.add_argument("--trust", type=int, default=None)
parser.add_argument("--flags", default="")
args = parser.parse_args()
Expand All @@ -90,6 +93,7 @@ def main() -> int:
"prompt": args.prompt or str(payload.get("prompt") or ""),
"files_read": parse_json_array(args.files_read) or payload.get("files_read") or [],
"intent": args.intent or str(payload.get("intent") or ""),
"intent_type": args.intent_type or str(payload.get("intent_type") or ""),
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Validate merged payload The argparse choices check only applies to the CLI flag. This line still accepts intent_type from stdin JSON without validating it, and that value can be persisted by prepare/finalize into the trace. A caller piping {"intent_type":"other"} through the documented stdin path bypasses the new allowlist.

"flags": parse_json_array(args.flags) or payload.get("flags") or [],
}

Expand Down
47 changes: 47 additions & 0 deletions scripts/tests/test_capture_prompts.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,53 @@ def test_write_agent_trace_persists_structured_context_metadata(self):
self.assertEqual(metadata["author"], "Prakhar")
self.assertEqual(metadata["capture_tool"], "afterFileEdit")

def test_write_agent_trace_persists_intent_type(self):
with tempfile.TemporaryDirectory() as tmp:
repo = Path(tmp) / "repo"
repo.mkdir()
subprocess.run(["git", "init", "-b", "main"], cwd=repo, check=True, capture_output=True)
subprocess.run(["git", "config", "user.email", "test@example.com"], cwd=repo, check=True)
subprocess.run(["git", "config", "user.name", "Test User"], cwd=repo, check=True)
(repo / "README.md").write_text("test\n", encoding="utf-8")
subprocess.run(["git", "add", "README.md"], cwd=repo, check=True)
subprocess.run(["git", "commit", "-m", "init"], cwd=repo, check=True, capture_output=True)

pending = {
"agent": "cursor",
"git_author": "Prakhar",
"model": "cursor-test",
"session_id": "sess-2",
"lines": {"src/app.py": [[1, 5]]},
"prompt_excerpt": "extract auth middleware",
"prompt_hash": "def456",
"intent": "eliminate duplicate token validation across route handlers",
"intent_type": "refactor",
"files_read": [],
"trust": 85,
"flags": [],
"tool": "afterFileEdit",
}

original = os.environ.get("HOME")
try:
os.environ["HOME"] = tmp
traces_path = self.mod.write_agent_trace(
str(repo), pending, "cafebabe", "2026-05-21T00:00:00Z"
)
finally:
if original is not None:
os.environ["HOME"] = original

self.assertIsNotNone(traces_path)
raw = Path(traces_path).read_text(encoding="utf-8").strip()
trace = json.loads(raw)
metadata = trace["metadata"]["agentdiff"]
self.assertEqual(metadata["intent_type"], "refactor")
self.assertEqual(
metadata["intent"],
"eliminate duplicate token validation across route handlers",
)


if __name__ == "__main__":
unittest.main()
Loading
Loading