|
| 1 | +#!/usr/bin/env bash |
| 2 | +set -e |
| 3 | + |
| 4 | +# Continuous Learning — Observation Hook (SCC) |
| 5 | +# |
| 6 | +# Captures tool use events for pattern analysis by the learning-engine agent. |
| 7 | +# Claude Code passes hook data via stdin as JSON. |
| 8 | +# |
| 9 | +# Adapted from ECC continuous-learning-v2/hooks/observe.sh for SCC. |
| 10 | +# Registered via hooks/hooks.json (PreToolUse + PostToolUse, standard+strict profiles). |
| 11 | + |
| 12 | +HOOK_PHASE="${1:-post}" |
| 13 | + |
| 14 | +# ───────────────────────────────────────────── |
| 15 | +# Read stdin (before any processing) |
| 16 | +# ───────────────────────────────────────────── |
| 17 | + |
| 18 | +INPUT_JSON=$(cat) |
| 19 | + |
| 20 | +if [ -z "$INPUT_JSON" ]; then |
| 21 | + exit 0 |
| 22 | +fi |
| 23 | + |
| 24 | +# ───────────────────────────────────────────── |
| 25 | +# Find a Python interpreter |
| 26 | +# ───────────────────────────────────────────── |
| 27 | + |
| 28 | +resolve_python_cmd() { |
| 29 | + if command -v python3 >/dev/null 2>&1; then |
| 30 | + printf '%s\n' python3 |
| 31 | + return 0 |
| 32 | + fi |
| 33 | + if command -v python >/dev/null 2>&1; then |
| 34 | + printf '%s\n' python |
| 35 | + return 0 |
| 36 | + fi |
| 37 | + return 1 |
| 38 | +} |
| 39 | + |
| 40 | +PYTHON_CMD="$(resolve_python_cmd 2>/dev/null || true)" |
| 41 | +if [ -z "$PYTHON_CMD" ]; then |
| 42 | + exit 0 |
| 43 | +fi |
| 44 | + |
| 45 | +# ───────────────────────────────────────────── |
| 46 | +# Session guards — skip automated/subagent sessions |
| 47 | +# ───────────────────────────────────────────── |
| 48 | + |
| 49 | +# Only run for interactive CLI sessions |
| 50 | +case "${CLAUDE_CODE_ENTRYPOINT:-cli}" in |
| 51 | + cli|sdk-ts) ;; |
| 52 | + *) exit 0 ;; |
| 53 | +esac |
| 54 | + |
| 55 | +# Minimal profile suppresses non-essential hooks |
| 56 | +[ "${SCC_HOOK_PROFILE:-standard}" = "minimal" ] && exit 0 |
| 57 | + |
| 58 | +# Cooperative skip for automated sessions |
| 59 | +[ "${SCC_SKIP_OBSERVE:-0}" = "1" ] && exit 0 |
| 60 | + |
| 61 | +# Skip subagent sessions |
| 62 | +_AGENT_ID=$(echo "$INPUT_JSON" | "$PYTHON_CMD" -c "import json,sys; print(json.load(sys.stdin).get('agent_id',''))" 2>/dev/null || true) |
| 63 | +[ -n "$_AGENT_ID" ] && exit 0 |
| 64 | + |
| 65 | +# ───────────────────────────────────────────── |
| 66 | +# Project detection |
| 67 | +# ───────────────────────────────────────────── |
| 68 | + |
| 69 | +STDIN_CWD=$(echo "$INPUT_JSON" | "$PYTHON_CMD" -c ' |
| 70 | +import json, sys |
| 71 | +try: |
| 72 | + data = json.load(sys.stdin) |
| 73 | + print(data.get("cwd", "")) |
| 74 | +except (KeyError, TypeError, ValueError): |
| 75 | + print("") |
| 76 | +' 2>/dev/null || echo "") |
| 77 | + |
| 78 | +# Determine project ID from git or cwd |
| 79 | +if [ -n "$STDIN_CWD" ] && [ -d "$STDIN_CWD" ]; then |
| 80 | + PROJECT_ROOT="$STDIN_CWD" |
| 81 | +else |
| 82 | + PROJECT_ROOT="${CLAUDE_PROJECT_DIR:-$(pwd)}" |
| 83 | +fi |
| 84 | + |
| 85 | +PROJECT_ID=$(cd "$PROJECT_ROOT" 2>/dev/null && git rev-parse --show-toplevel 2>/dev/null | shasum -a 256 | cut -c1-16 || echo "global") |
| 86 | + |
| 87 | +# ───────────────────────────────────────────── |
| 88 | +# Configuration |
| 89 | +# ───────────────────────────────────────────── |
| 90 | + |
| 91 | +CONFIG_DIR="${HOME}/.claude/homunculus" |
| 92 | +PROJECT_DIR="${CONFIG_DIR}/projects/${PROJECT_ID}" |
| 93 | +mkdir -p "$PROJECT_DIR" |
| 94 | + |
| 95 | +OBSERVATIONS_FILE="${PROJECT_DIR}/observations.jsonl" |
| 96 | +MAX_FILE_SIZE_MB=10 |
| 97 | + |
| 98 | +# Skip if disabled |
| 99 | +if [ -f "${CONFIG_DIR}/disabled" ]; then |
| 100 | + exit 0 |
| 101 | +fi |
| 102 | + |
| 103 | +# Auto-purge observation files older than 30 days (runs once per day) |
| 104 | +PURGE_MARKER="${PROJECT_DIR}/.last-purge" |
| 105 | +if [ ! -f "$PURGE_MARKER" ] || [ "$(find "$PURGE_MARKER" -mtime +1 2>/dev/null)" ]; then |
| 106 | + find "${PROJECT_DIR}" -name "observations-*.jsonl" -mtime +30 -delete 2>/dev/null || true |
| 107 | + touch "$PURGE_MARKER" 2>/dev/null || true |
| 108 | +fi |
| 109 | + |
| 110 | +# ───────────────────────────────────────────── |
| 111 | +# Parse tool event and write observation |
| 112 | +# ───────────────────────────────────────────── |
| 113 | + |
| 114 | +PARSED=$(echo "$INPUT_JSON" | HOOK_PHASE="$HOOK_PHASE" "$PYTHON_CMD" -c ' |
| 115 | +import json, sys, os |
| 116 | +
|
| 117 | +try: |
| 118 | + data = json.load(sys.stdin) |
| 119 | + hook_phase = os.environ.get("HOOK_PHASE", "post") |
| 120 | + event = "tool_start" if hook_phase == "pre" else "tool_complete" |
| 121 | +
|
| 122 | + tool_name = data.get("tool_name", data.get("tool", "unknown")) |
| 123 | + tool_input = data.get("tool_input", data.get("input", {})) |
| 124 | + tool_output = data.get("tool_response", data.get("tool_output", data.get("output", ""))) |
| 125 | + session_id = data.get("session_id", "unknown") |
| 126 | + tool_use_id = data.get("tool_use_id", "") |
| 127 | +
|
| 128 | + # Truncate large values |
| 129 | + if isinstance(tool_input, dict): |
| 130 | + tool_input_str = json.dumps(tool_input)[:5000] |
| 131 | + else: |
| 132 | + tool_input_str = str(tool_input)[:5000] |
| 133 | +
|
| 134 | + if isinstance(tool_output, dict): |
| 135 | + tool_output_str = json.dumps(tool_output)[:5000] |
| 136 | + else: |
| 137 | + tool_output_str = str(tool_output)[:5000] |
| 138 | +
|
| 139 | + print(json.dumps({ |
| 140 | + "parsed": True, |
| 141 | + "event": event, |
| 142 | + "tool": tool_name, |
| 143 | + "input": tool_input_str if event == "tool_start" else None, |
| 144 | + "output": tool_output_str if event == "tool_complete" else None, |
| 145 | + "session": session_id, |
| 146 | + "tool_use_id": tool_use_id |
| 147 | + })) |
| 148 | +except Exception as e: |
| 149 | + print(json.dumps({"parsed": False, "error": str(e)})) |
| 150 | +') |
| 151 | + |
| 152 | +PARSED_OK=$(echo "$PARSED" | "$PYTHON_CMD" -c "import json,sys; print(json.load(sys.stdin).get('parsed', False))" 2>/dev/null || echo "False") |
| 153 | + |
| 154 | +if [ "$PARSED_OK" != "True" ]; then |
| 155 | + exit 0 |
| 156 | +fi |
| 157 | + |
| 158 | +# Archive if file too large |
| 159 | +if [ -f "$OBSERVATIONS_FILE" ]; then |
| 160 | + file_size_mb=$(du -m "$OBSERVATIONS_FILE" 2>/dev/null | cut -f1) |
| 161 | + if [ "${file_size_mb:-0}" -ge "$MAX_FILE_SIZE_MB" ]; then |
| 162 | + archive_dir="${PROJECT_DIR}/observations.archive" |
| 163 | + mkdir -p "$archive_dir" |
| 164 | + mv "$OBSERVATIONS_FILE" "$archive_dir/observations-$(date +%Y%m%d-%H%M%S)-$$.jsonl" 2>/dev/null || true |
| 165 | + fi |
| 166 | +fi |
| 167 | + |
| 168 | +# Write observation with secret scrubbing |
| 169 | +timestamp=$(date -u +"%Y-%m-%dT%H:%M:%SZ") |
| 170 | + |
| 171 | +export PROJECT_ID_ENV="$PROJECT_ID" |
| 172 | +export TIMESTAMP="$timestamp" |
| 173 | + |
| 174 | +echo "$PARSED" | "$PYTHON_CMD" -c ' |
| 175 | +import json, sys, os, re |
| 176 | +
|
| 177 | +parsed = json.load(sys.stdin) |
| 178 | +observation = { |
| 179 | + "timestamp": os.environ["TIMESTAMP"], |
| 180 | + "event": parsed["event"], |
| 181 | + "tool": parsed["tool"], |
| 182 | + "session": parsed["session"], |
| 183 | + "project_id": os.environ.get("PROJECT_ID_ENV", "global") |
| 184 | +} |
| 185 | +
|
| 186 | +# Scrub secrets |
| 187 | +_SECRET_RE = re.compile( |
| 188 | + r"(?i)(api[_-]?key|token|secret|password|authorization|credentials?|auth)" |
| 189 | + r"""([\"'"'"'"'"'"'\s:=]+)""" |
| 190 | + r"([A-Za-z]+\s+)?" |
| 191 | + r"([A-Za-z0-9_\-/.+=]{8,})" |
| 192 | +) |
| 193 | +
|
| 194 | +def scrub(val): |
| 195 | + if val is None: |
| 196 | + return None |
| 197 | + return _SECRET_RE.sub(lambda m: m.group(1) + m.group(2) + (m.group(3) or "") + "[REDACTED]", str(val)) |
| 198 | +
|
| 199 | +if parsed["input"]: |
| 200 | + observation["input"] = scrub(parsed["input"]) |
| 201 | +if parsed["output"] is not None: |
| 202 | + observation["output"] = scrub(parsed["output"]) |
| 203 | +
|
| 204 | +print(json.dumps(observation)) |
| 205 | +' >> "$OBSERVATIONS_FILE" |
| 206 | + |
| 207 | +exit 0 |
0 commit comments