Comfy-Org · mattmillerai · Jun 29, 2026 · Jun 29, 2026 · coderabbitai · Jun 29, 2026
diff --git a/.github/cursor-review/extract-findings.py b/.github/cursor-review/extract-findings.py
@@ -1,10 +1,10 @@
 #!/usr/bin/env python3
 """Parse raw cursor-agent output into a normalized findings record.
 
-Used by per-cell matrix steps. Each cell calls this to convert the model's
-raw stdout into a JSON file the consolidate step can ingest. The output is
-always structured — even on parse failures or empty output — so the
-consolidate step has a uniform input.
+Used by per-cell matrix steps AND by the judge/consolidate step. Each caller
+converts the model's raw stdout into a JSON file the next step can ingest. The
+output is always structured — even on parse failures or empty output — so the
+downstream step has a uniform input.
 
 Output shape:
     {
@@ -21,35 +21,110 @@
 import re
 
 
+def _try_load(snippet: str):
+    """json.loads `snippet`, returning the value only if it's a list or dict.
+
+    A bare number/string/bool is never a findings payload, so reject it — this
+    keeps the brace-scan below from "succeeding" on a stray scalar.
+    """
+    try:
+        value = json.loads(snippet)
+    except (json.JSONDecodeError, ValueError):
+        return None
+    return value if isinstance(value, (list, dict)) else None
+
+
+def _iter_json_candidates(text: str):
+    """Yield each top-level balanced {...} / [...] region embedded in `text`.
+
+    String- and escape-aware: braces or brackets inside JSON string literals
+    don't throw off the nesting count, so prose like `... the findings […] are`
+    surrounding a real array doesn't corrupt the match the way a naive
+    first-`[`/last-`]` slice does. Regions are yielded in document order; the
+    caller parses each and takes the first that loads.
+    """
+    openers = {"{", "["}
+    closers = {"}", "]"}
+    i, n = 0, len(text)
+    while i < n:
+        if text[i] not in openers:
+            i += 1
+            continue
+        depth = 0
+        in_str = False
+        escape = False
+        j = i
+        while j < n:
+            c = text[j]
+            if in_str:
+                if escape:
+                    escape = False
+                elif c == "\\":
+                    escape = True
+                elif c == '"':
+                    in_str = False
+            elif c == '"':
+                in_str = True
+            elif c in openers:
+                depth += 1
+            elif c in closers:
+                depth -= 1
+                if depth == 0:
+                    yield text[i : j + 1]
+                    break
+            j += 1
+        # Resume scanning after this region (or after an unbalanced opener).
+        i = j + 1
-        # Resume scanning after this region (or after an unbalanced opener).
-        i = j + 1
+        # Resume after a closed region; otherwise advance one char so later JSON
+        # candidates are still reachable.
+        if j < n and depth == 0:
+            i = j + 1
+        else:
+            i += 1
-        # Resume scanning after this region (or after an unbalanced opener).
-        i = j + 1
+        # Resume after a closed region; otherwise advance one char so later JSON
+        # candidates are still reachable.
+        if j < n and depth == 0:
+            i = j + 1
+        else:
+            i += 1
+
+
 def parse_json_findings(raw_text: str):
-    """Extract a JSON array from raw model output, tolerating surrounding prose.
+    """Extract a JSON value (array or object) from raw model output.
 
-    Returns the parsed value, or None if no JSON array could be located.
+    Tolerates surrounding prose and markdown fences. Returns the parsed value
+    (list or dict), or None if no JSON could be located. Layered most- to
+    least-strict so a clean response takes the fast path:
+
+    1. The whole output is JSON.
+    2. A fenced ```json (or bare ```) block holds the JSON.
+    3. A balanced {...}/[...] region is embedded in prose.
     """
     text = raw_text.strip()
-    try:
-        return json.loads(text)
-    except json.JSONDecodeError:
-        pass
-
-    fenced = re.search(r"```(?:json)?\s*\n(.*?)```", text, re.DOTALL)
-    if fenced:
-        try:
-            return json.loads(fenced.group(1).strip())
-        except json.JSONDecodeError:
-            pass
-
-    start = text.find("[")
-    end = text.rfind("]")
-    if start != -1 and end != -1 and end > start:
-        try:
-            return json.loads(text[start : end + 1])
-        except json.JSONDecodeError:
-            pass
+
+    parsed = _try_load(text)
+    if parsed is not None:
+        return parsed
+
+    for match in re.finditer(r"```(?:json)?\s*\n(.*?)```", text, re.DOTALL):
+        parsed = _try_load(match.group(1).strip())
+        if parsed is not None:
+            return parsed
+
+    for candidate in _iter_json_candidates(text):
+        parsed = _try_load(candidate)
+        if parsed is not None:
+            return parsed
 
     return None
 
 
+def coerce_findings_list(parsed):
+    """Reduce a parsed JSON value to the findings list, or None if it isn't one.
+
+    The panel cells and judge are all asked for a bare JSON array, but a model
+    intermittently wraps it as `{"findings": [...]}` (or a near-synonym key).
+    Unwrap those so a well-formed-but-wrapped response parses instead of being
+    discarded as a parse_error.
+    """
+    if isinstance(parsed, list):
+        return parsed
+    if isinstance(parsed, dict):
+        for key in ("findings", "results", "items", "reviews"):
+            value = parsed.get(key)
+            if isinstance(value, list):
+                return value
-    if isinstance(parsed, dict):
-        for key in ("findings", "results", "items", "reviews"):
-            value = parsed.get(key)
-            if isinstance(value, list):
-                return value
+def _is_findings_list(value):
+    required = {"file", "line", "side", "severity", "body"}
+    return isinstance(value, list) and all(
+        isinstance(item, dict) and required.issubset(item) for item in value
+    )
+
+    if _is_findings_list(parsed):
+        return parsed
+    if isinstance(parsed, dict):
+        for key in ("findings", "results", "items", "reviews"):
+            value = parsed.get(key)
+            if _is_findings_list(value):
+                return value
-    if isinstance(parsed, dict):
-        for key in ("findings", "results", "items", "reviews"):
-            value = parsed.get(key)
-            if isinstance(value, list):
-                return value
+def _is_findings_list(value):
+    required = {"file", "line", "side", "severity", "body"}
+    return isinstance(value, list) and all(
+        isinstance(item, dict) and required.issubset(item) for item in value
+    )
+
+    if _is_findings_list(parsed):
+        return parsed
+    if isinstance(parsed, dict):
+        for key in ("findings", "results", "items", "reviews"):
+            value = parsed.get(key)
+            if _is_findings_list(value):
+                return value
+    return None
+
+
 def main():
     parser = argparse.ArgumentParser()
     parser.add_argument("--raw", required=True, help="Path to raw cursor-agent output")
@@ -75,7 +150,7 @@ def main():
             json.dump(record, f)
         return
 
-    findings = parse_json_findings(raw)
+    findings = coerce_findings_list(parse_json_findings(raw))
 
     if findings is None:
         # Truncate raw so artifacts stay small even on chatty parse failures.
@@ -84,12 +159,6 @@ def main():
             error=f"Could not parse JSON findings from output. First 500 chars:\n{raw[:500]}",
             findings=[],
         )
-    elif not isinstance(findings, list):
-        record.update(
-            status="parse_error",
-            error=f"Output parsed but is not an array (got {type(findings).__name__}).",
-            findings=[],
-        )
     else:
         record.update(status="ok", findings=findings)
 

diff --git a/.github/cursor-review/post-review.py b/.github/cursor-review/post-review.py
@@ -262,10 +262,19 @@ def main():
     parser.add_argument("--commit-sha", required=True)
     parser.add_argument("--triggered-by", default=None)
     parser.add_argument("--error-message", default=None, help="If set, post an error review with this message")
+    parser.add_argument(
+        "--notice",
+        default=None,
+        help="Banner prepended to the review body (e.g. a judge-failed degradation note).",
+    )
     args = parser.parse_args()
 
     attribution = f"\n\n_Triggered by @{args.triggered_by}._" if args.triggered_by else ""
     header = f"## 🔍 Cursor Review — Consolidated panel{attribution}"
+    if args.notice:
+        # Surface a degradation banner (judge failed → raw panel findings) right
+        # under the title so every rendered body carries it.
+        header += f"\n\n{neutralize_mentions(args.notice)}"
 
     if args.error_message:
         post_error_review(args.repo, args.pr_number, args.commit_sha, header, args.error_message)

diff --git a/.github/cursor-review/prompt-judge.md b/.github/cursor-review/prompt-judge.md
@@ -1,3 +1,8 @@
+RESPOND WITH ONLY A JSON ARRAY. Your entire response must be a single JSON
+array of finding objects (or `[]` if none) — no prose, no preamble, no
+explanation, no markdown code fences before or after it. Do not narrate your
+reasoning. The exact element schema is specified at the end of this prompt.
+
 You are a senior software engineer adjudicating findings from a panel of AI
 code reviewers. The panel ran a 4-lab × 2-review-type matrix (8 cells total):
 - Labs: OpenAI, Anthropic, Google, Moonshot

diff --git a/.github/cursor-review/tests/test_extract_findings.py b/.github/cursor-review/tests/test_extract_findings.py
@@ -0,0 +1,148 @@
+#!/usr/bin/env python3
+"""Regression tests for extract-findings.py JSON recovery.
+
+The judge/consolidate step intermittently returns valid findings JSON wrapped
+in a sentence or two of prose. Discarding the whole run on that (the BE-1916
+parse_error class — hit on ComfyUI #487 and Alex's PR) is the bug these tests
+guard against: prose-wrapped JSON must be recovered, not thrown away.
+
+Run: python3 .github/cursor-review/tests/test_extract_findings.py
+"""
+
+import importlib.util
+import json
+import os
+import tempfile
+import unittest
+
+# extract-findings.py has a hyphen, so import it by path rather than `import`.
+_MODULE_PATH = os.path.join(os.path.dirname(__file__), "..", "extract-findings.py")
+_spec = importlib.util.spec_from_file_location("extract_findings", _MODULE_PATH)
+ef = importlib.util.module_from_spec(_spec)
+_spec.loader.exec_module(ef)
+
+
+# A real-finding payload reused across cases.
+FINDINGS = [
+    {
+        "file": "internal/api/handler.go",
+        "line": 42,
+        "side": "RIGHT",
+        "severity": "high",
+        "body": "User-supplied filename reaches os.Open without traversal checks.",
+    },
+    {
+        "file": "internal/worker/upload.go",
+        "line": 118,
+        "side": "RIGHT",
+        "severity": "medium",
+        "body": "Context is cancelled before the upload completes [see hunk], losing data.",
+    },
+]
+
+
+def _findings(raw):
+    """Mirror main()'s parse → coerce pipeline."""
+    return ef.coerce_findings_list(ef.parse_json_findings(raw))
+
+
+class ParseJsonFindingsTest(unittest.TestCase):
+    def test_bare_array(self):
+        self.assertEqual(_findings(json.dumps(FINDINGS)), FINDINGS)
+
+    def test_empty_array(self):
+        self.assertEqual(_findings("[]"), [])
+
+    def test_prose_wrapped_array_487(self):
+        # The #487 failure shape: a prose verdict, THEN the JSON array. The
+        # prose even contains a bracket pair, which is what broke the old naive
+        # first-`[`/last-`]` slice.
+        raw = (
+            "Based on my review of the actual code, I can adjudicate the two "
+            "findings [both raised by multiple reviewers] as follows. Here is "
+            "the consolidated result:\n\n" + json.dumps(FINDINGS) + "\n\n"
+            "These are the only items that rise to the bar."
+        )
+        self.assertEqual(_findings(raw), FINDINGS)
+
+    def test_fenced_json_block(self):
+        raw = (
+            "Sure — here are the findings:\n\n```json\n"
+            + json.dumps(FINDINGS)
+            + "\n```\nLet me know if you need more."
+        )
+        self.assertEqual(_findings(raw), FINDINGS)
+
+    def test_fenced_block_no_lang(self):
+        raw = "Result:\n\n```\n" + json.dumps(FINDINGS) + "\n```\n"
+        self.assertEqual(_findings(raw), FINDINGS)
+
+    def test_object_wrapped_findings(self):
+        raw = "Here you go:\n" + json.dumps({"findings": FINDINGS})
+        self.assertEqual(_findings(raw), FINDINGS)
+
+    def test_brackets_inside_string_literals(self):
+        # Brackets inside a body string must not confuse the balanced scanner.
+        payload = [{"file": "a.py", "line": 1, "side": "RIGHT", "body": "uses arr[i] and m['k']"}]
+        raw = "Verdict below.\n" + json.dumps(payload)
+        self.assertEqual(_findings(raw), payload)
+
+    def test_genuinely_malformed_returns_none(self):
+        self.assertIsNone(_findings("I reviewed the code and found nothing actionable."))
+
+    def test_truncated_json_returns_none(self):
+        # An unterminated array can't be recovered — must fail, not half-parse.
+        self.assertIsNone(_findings('[{"file": "a.py", "line": 1, "body": "oops"'))
+
+    def test_scalar_is_not_findings(self):
+        # A bare number is valid JSON but never a findings payload.
+        self.assertIsNone(_findings("42"))
+
+    def test_object_without_findings_key(self):
+        self.assertIsNone(_findings('{"summary": "looks good", "count": 0}'))
+
+
+class MainEndToEndTest(unittest.TestCase):
+    """Drive main() the way the workflow does, asserting the status field."""
+
+    def _run(self, raw_text):
+        with tempfile.TemporaryDirectory() as d:
+            raw_path = os.path.join(d, "raw.txt")
+            out_path = os.path.join(d, "out.json")
+            with open(raw_path, "w", encoding="utf-8") as f:
+                f.write(raw_text)
+            import sys
+
+            argv = sys.argv
+            sys.argv = [
+                "extract-findings.py",
+                "--raw", raw_path,
+                "--out", out_path,
+                "--model", "judge-model",
+                "--review-type", "judge",
+            ]
+            try:
+                ef.main()
+            finally:
+                sys.argv = argv
+            with open(out_path, encoding="utf-8") as f:
+                return json.load(f)
+
+    def test_prose_wrapped_parses_ok(self):
+        raw = "After review, my verdict is:\n\n" + json.dumps(FINDINGS)
+        record = self._run(raw)
+        self.assertEqual(record["status"], "ok")
+        self.assertEqual(record["findings"], FINDINGS)
+
+    def test_malformed_is_parse_error(self):
+        record = self._run("I could not find anything worth flagging.")
+        self.assertEqual(record["status"], "parse_error")
+        self.assertEqual(record["findings"], [])
+
+    def test_empty_is_empty(self):
+        record = self._run("   \n  ")
+        self.assertEqual(record["status"], "empty")
+
+
+if __name__ == "__main__":
+    unittest.main()