diff --git a/.github/cursor-review/extract-findings.py b/.github/cursor-review/extract-findings.py
index 8c774eb..42d0a08 100644
--- a/.github/cursor-review/extract-findings.py
+++ b/.github/cursor-review/extract-findings.py
@@ -1,10 +1,10 @@
 #!/usr/bin/env python3
 """Parse raw cursor-agent output into a normalized findings record.
 
-Used by per-cell matrix steps. Each cell calls this to convert the model's
-raw stdout into a JSON file the consolidate step can ingest. The output is
-always structured — even on parse failures or empty output — so the
-consolidate step has a uniform input.
+Used by per-cell matrix steps AND by the judge/consolidate step. Each caller
+converts the model's raw stdout into a JSON file the next step can ingest. The
+output is always structured — even on parse failures or empty output — so the
+downstream step has a uniform input.
 
 Output shape:
     {
@@ -21,35 +21,110 @@
 import re
 
 
+def _try_load(snippet: str):
+    """json.loads `snippet`, returning the value only if it's a list or dict.
+
+    A bare number/string/bool is never a findings payload, so reject it — this
+    keeps the brace-scan below from "succeeding" on a stray scalar.
+    """
+    try:
+        value = json.loads(snippet)
+    except (json.JSONDecodeError, ValueError):
+        return None
+    return value if isinstance(value, (list, dict)) else None
+
+
+def _iter_json_candidates(text: str):
+    """Yield each top-level balanced {...} / [...] region embedded in `text`.
+
+    String- and escape-aware: braces or brackets inside JSON string literals
+    don't throw off the nesting count, so prose like `... the findings […] are`
+    surrounding a real array doesn't corrupt the match the way a naive
+    first-`[`/last-`]` slice does. Regions are yielded in document order; the
+    caller parses each and takes the first that loads.
+    """
+    openers = {"{", "["}
+    closers = {"}", "]"}
+    i, n = 0, len(text)
+    while i < n:
+        if text[i] not in openers:
+            i += 1
+            continue
+        depth = 0
+        in_str = False
+        escape = False
+        j = i
+        while j < n:
+            c = text[j]
+            if in_str:
+                if escape:
+                    escape = False
+                elif c == "\\":
+                    escape = True
+                elif c == '"':
+                    in_str = False
+            elif c == '"':
+                in_str = True
+            elif c in openers:
+                depth += 1
+            elif c in closers:
+                depth -= 1
+                if depth == 0:
+                    yield text[i : j + 1]
+                    break
+            j += 1
+        # Resume scanning after this region (or after an unbalanced opener).
+        i = j + 1
+
+
 def parse_json_findings(raw_text: str):
-    """Extract a JSON array from raw model output, tolerating surrounding prose.
+    """Extract a JSON value (array or object) from raw model output.
 
-    Returns the parsed value, or None if no JSON array could be located.
+    Tolerates surrounding prose and markdown fences. Returns the parsed value
+    (list or dict), or None if no JSON could be located. Layered most- to
+    least-strict so a clean response takes the fast path:
+
+    1. The whole output is JSON.
+    2. A fenced ```json (or bare ```) block holds the JSON.
+    3. A balanced {...}/[...] region is embedded in prose.
     """
     text = raw_text.strip()
-    try:
-        return json.loads(text)
-    except json.JSONDecodeError:
-        pass
-
-    fenced = re.search(r"```(?:json)?\s*\n(.*?)```", text, re.DOTALL)
-    if fenced:
-        try:
-            return json.loads(fenced.group(1).strip())
-        except json.JSONDecodeError:
-            pass
-
-    start = text.find("[")
-    end = text.rfind("]")
-    if start != -1 and end != -1 and end > start:
-        try:
-            return json.loads(text[start : end + 1])
-        except json.JSONDecodeError:
-            pass
+
+    parsed = _try_load(text)
+    if parsed is not None:
+        return parsed
+
+    for match in re.finditer(r"```(?:json)?\s*\n(.*?)```", text, re.DOTALL):
+        parsed = _try_load(match.group(1).strip())
+        if parsed is not None:
+            return parsed
+
+    for candidate in _iter_json_candidates(text):
+        parsed = _try_load(candidate)
+        if parsed is not None:
+            return parsed
 
     return None
 
 
+def coerce_findings_list(parsed):
+    """Reduce a parsed JSON value to the findings list, or None if it isn't one.
+
+    The panel cells and judge are all asked for a bare JSON array, but a model
+    intermittently wraps it as `{"findings": [...]}` (or a near-synonym key).
+    Unwrap those so a well-formed-but-wrapped response parses instead of being
+    discarded as a parse_error.
+    """
+    if isinstance(parsed, list):
+        return parsed
+    if isinstance(parsed, dict):
+        for key in ("findings", "results", "items", "reviews"):
+            value = parsed.get(key)
+            if isinstance(value, list):
+                return value
+    return None
+
+
 def main():
     parser = argparse.ArgumentParser()
     parser.add_argument("--raw", required=True, help="Path to raw cursor-agent output")
@@ -75,7 +150,7 @@ def main():
             json.dump(record, f)
         return
 
-    findings = parse_json_findings(raw)
+    findings = coerce_findings_list(parse_json_findings(raw))
 
     if findings is None:
         # Truncate raw so artifacts stay small even on chatty parse failures.
@@ -84,12 +159,6 @@ def main():
             error=f"Could not parse JSON findings from output. First 500 chars:\n{raw[:500]}",
             findings=[],
         )
-    elif not isinstance(findings, list):
-        record.update(
-            status="parse_error",
-            error=f"Output parsed but is not an array (got {type(findings).__name__}).",
-            findings=[],
-        )
     else:
         record.update(status="ok", findings=findings)
 
diff --git a/.github/cursor-review/post-review.py b/.github/cursor-review/post-review.py
index 118a953..9a83bfe 100644
--- a/.github/cursor-review/post-review.py
+++ b/.github/cursor-review/post-review.py
@@ -262,10 +262,19 @@ def main():
     parser.add_argument("--commit-sha", required=True)
     parser.add_argument("--triggered-by", default=None)
     parser.add_argument("--error-message", default=None, help="If set, post an error review with this message")
+    parser.add_argument(
+        "--notice",
+        default=None,
+        help="Banner prepended to the review body (e.g. a judge-failed degradation note).",
+    )
     args = parser.parse_args()
 
     attribution = f"\n\n_Triggered by @{args.triggered_by}._" if args.triggered_by else ""
     header = f"## 🔍 Cursor Review — Consolidated panel{attribution}"
+    if args.notice:
+        # Surface a degradation banner (judge failed → raw panel findings) right
+        # under the title so every rendered body carries it.
+        header += f"\n\n{neutralize_mentions(args.notice)}"
 
     if args.error_message:
         post_error_review(args.repo, args.pr_number, args.commit_sha, header, args.error_message)
diff --git a/.github/cursor-review/prompt-judge.md b/.github/cursor-review/prompt-judge.md
index a372fb8..7af5d28 100644
--- a/.github/cursor-review/prompt-judge.md
+++ b/.github/cursor-review/prompt-judge.md
@@ -1,3 +1,8 @@
+RESPOND WITH ONLY A JSON ARRAY. Your entire response must be a single JSON
+array of finding objects (or `[]` if none) — no prose, no preamble, no
+explanation, no markdown code fences before or after it. Do not narrate your
+reasoning. The exact element schema is specified at the end of this prompt.
+
 You are a senior software engineer adjudicating findings from a panel of AI
 code reviewers. The panel ran a 4-lab × 2-review-type matrix (8 cells total):
 - Labs: OpenAI, Anthropic, Google, Moonshot
diff --git a/.github/cursor-review/tests/test_extract_findings.py b/.github/cursor-review/tests/test_extract_findings.py
new file mode 100644
index 0000000..e735138
--- /dev/null
+++ b/.github/cursor-review/tests/test_extract_findings.py
@@ -0,0 +1,148 @@
+#!/usr/bin/env python3
+"""Regression tests for extract-findings.py JSON recovery.
+
+The judge/consolidate step intermittently returns valid findings JSON wrapped
+in a sentence or two of prose. Discarding the whole run on that (the BE-1916
+parse_error class — hit on ComfyUI #487 and Alex's PR) is the bug these tests
+guard against: prose-wrapped JSON must be recovered, not thrown away.
+
+Run: python3 .github/cursor-review/tests/test_extract_findings.py
+"""
+
+import importlib.util
+import json
+import os
+import tempfile
+import unittest
+
+# extract-findings.py has a hyphen, so import it by path rather than `import`.
+_MODULE_PATH = os.path.join(os.path.dirname(__file__), "..", "extract-findings.py")
+_spec = importlib.util.spec_from_file_location("extract_findings", _MODULE_PATH)
+ef = importlib.util.module_from_spec(_spec)
+_spec.loader.exec_module(ef)
+
+
+# A real-finding payload reused across cases.
+FINDINGS = [
+    {
+        "file": "internal/api/handler.go",
+        "line": 42,
+        "side": "RIGHT",
+        "severity": "high",
+        "body": "User-supplied filename reaches os.Open without traversal checks.",
+    },
+    {
+        "file": "internal/worker/upload.go",
+        "line": 118,
+        "side": "RIGHT",
+        "severity": "medium",
+        "body": "Context is cancelled before the upload completes [see hunk], losing data.",
+    },
+]
+
+
+def _findings(raw):
+    """Mirror main()'s parse → coerce pipeline."""
+    return ef.coerce_findings_list(ef.parse_json_findings(raw))
+
+
+class ParseJsonFindingsTest(unittest.TestCase):
+    def test_bare_array(self):
+        self.assertEqual(_findings(json.dumps(FINDINGS)), FINDINGS)
+
+    def test_empty_array(self):
+        self.assertEqual(_findings("[]"), [])
+
+    def test_prose_wrapped_array_487(self):
+        # The #487 failure shape: a prose verdict, THEN the JSON array. The
+        # prose even contains a bracket pair, which is what broke the old naive
+        # first-`[`/last-`]` slice.
+        raw = (
+            "Based on my review of the actual code, I can adjudicate the two "
+            "findings [both raised by multiple reviewers] as follows. Here is "
+            "the consolidated result:\n\n" + json.dumps(FINDINGS) + "\n\n"
+            "These are the only items that rise to the bar."
+        )
+        self.assertEqual(_findings(raw), FINDINGS)
+
+    def test_fenced_json_block(self):
+        raw = (
+            "Sure — here are the findings:\n\n```json\n"
+            + json.dumps(FINDINGS)
+            + "\n```\nLet me know if you need more."
+        )
+        self.assertEqual(_findings(raw), FINDINGS)
+
+    def test_fenced_block_no_lang(self):
+        raw = "Result:\n\n```\n" + json.dumps(FINDINGS) + "\n```\n"
+        self.assertEqual(_findings(raw), FINDINGS)
+
+    def test_object_wrapped_findings(self):
+        raw = "Here you go:\n" + json.dumps({"findings": FINDINGS})
+        self.assertEqual(_findings(raw), FINDINGS)
+
+    def test_brackets_inside_string_literals(self):
+        # Brackets inside a body string must not confuse the balanced scanner.
+        payload = [{"file": "a.py", "line": 1, "side": "RIGHT", "body": "uses arr[i] and m['k']"}]
+        raw = "Verdict below.\n" + json.dumps(payload)
+        self.assertEqual(_findings(raw), payload)
+
+    def test_genuinely_malformed_returns_none(self):
+        self.assertIsNone(_findings("I reviewed the code and found nothing actionable."))
+
+    def test_truncated_json_returns_none(self):
+        # An unterminated array can't be recovered — must fail, not half-parse.
+        self.assertIsNone(_findings('[{"file": "a.py", "line": 1, "body": "oops"'))
+
+    def test_scalar_is_not_findings(self):
+        # A bare number is valid JSON but never a findings payload.
+        self.assertIsNone(_findings("42"))
+
+    def test_object_without_findings_key(self):
+        self.assertIsNone(_findings('{"summary": "looks good", "count": 0}'))
+
+
+class MainEndToEndTest(unittest.TestCase):
+    """Drive main() the way the workflow does, asserting the status field."""
+
+    def _run(self, raw_text):
+        with tempfile.TemporaryDirectory() as d:
+            raw_path = os.path.join(d, "raw.txt")
+            out_path = os.path.join(d, "out.json")
+            with open(raw_path, "w", encoding="utf-8") as f:
+                f.write(raw_text)
+            import sys
+
+            argv = sys.argv
+            sys.argv = [
+                "extract-findings.py",
+                "--raw", raw_path,
+                "--out", out_path,
+                "--model", "judge-model",
+                "--review-type", "judge",
+            ]
+            try:
+                ef.main()
+            finally:
+                sys.argv = argv
+            with open(out_path, encoding="utf-8") as f:
+                return json.load(f)
+
+    def test_prose_wrapped_parses_ok(self):
+        raw = "After review, my verdict is:\n\n" + json.dumps(FINDINGS)
+        record = self._run(raw)
+        self.assertEqual(record["status"], "ok")
+        self.assertEqual(record["findings"], FINDINGS)
+
+    def test_malformed_is_parse_error(self):
+        record = self._run("I could not find anything worth flagging.")
+        self.assertEqual(record["status"], "parse_error")
+        self.assertEqual(record["findings"], [])
+
+    def test_empty_is_empty(self):
+        record = self._run("   \n  ")
+        self.assertEqual(record["status"], "empty")
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/.github/workflows/cursor-review.yml b/.github/workflows/cursor-review.yml
index 5531a15..1561c0f 100644
--- a/.github/workflows/cursor-review.yml
+++ b/.github/workflows/cursor-review.yml
@@ -508,12 +508,16 @@ jobs:
         env:
           CURSOR_API_KEY: ${{ secrets.CURSOR_API_KEY }}
         run: |
-          cursor-agent \
-            --print \
-            --trust \
-            --model "$JUDGE_MODEL" \
-            < /tmp/judge-prompt.txt \
-            > /tmp/judge-raw.txt 2>/tmp/judge-stderr.txt || true
+          run_judge() {
+            cursor-agent \
+              --print \
+              --trust \
+              --model "$JUDGE_MODEL" \
+              < "$1" \
+              > /tmp/judge-raw.txt 2>/tmp/judge-stderr.txt || true
+          }
+
+          run_judge /tmp/judge-prompt.txt
 
           echo "=== Judge raw output (first 1000 chars) ==="
           head -c 1000 /tmp/judge-raw.txt
@@ -521,6 +525,35 @@ jobs:
           echo "=== Judge stderr ==="
           cat /tmp/judge-stderr.txt
 
+          # LLMs intermittently wrap the findings array in prose. extract-findings.py
+          # recovers most of that (fenced block / embedded brace-match), but if it
+          # still can't parse, retry the judge ONCE with a terse strict-JSON
+          # reminder appended — a single retry clears the large majority of these
+          # transient parse failures. The retry overwrites /tmp/judge-raw.txt, so
+          # the parse step below reads whichever attempt the judge last produced.
+          python3 "$CURSOR_REVIEW_ASSETS/extract-findings.py" \
+            --raw /tmp/judge-raw.txt \
+            --out /tmp/judge-probe.json \
+            --model "$JUDGE_MODEL" \
+            --review-type judge
+          PROBE_STATUS=$(python3 -c "import json; print(json.load(open('/tmp/judge-probe.json')).get('status','?'))")
+
+          if [ "$PROBE_STATUS" != "ok" ]; then
+            echo "Judge output did not parse (status=$PROBE_STATUS) — retrying once with a strict-JSON reminder."
+            {
+              cat /tmp/judge-prompt.txt
+              echo ""
+              echo "=== REMINDER ==="
+              echo "Your previous response could not be parsed as JSON. Return ONLY the JSON array of findings — no prose, no explanation, no markdown code fences. Your entire response must be a single JSON array (return [] if there are no findings)."
+            } > /tmp/judge-prompt-retry.txt
+            run_judge /tmp/judge-prompt-retry.txt
+            echo "=== Judge retry raw output (first 1000 chars) ==="
+            head -c 1000 /tmp/judge-raw.txt
+            echo ""
+            echo "=== Judge retry stderr ==="
+            cat /tmp/judge-stderr.txt
+          fi
+
       - name: Build consolidated findings file
         id: consolidated
         env:
@@ -550,9 +583,13 @@ jobs:
 
           # Combine judge findings with panel metadata (model, review_type,
           # status only — drop full per-cell findings to keep the post payload
-          # small) for the post-review script.
+          # small) for the post-review script. If the judge FAILED (still no
+          # parseable verdict after the retry), fall back to the raw per-cell
+          # findings so the run surfaces what the panel found instead of a bare
+          # "Review failed".
           python3 - <<'PY'
-          import json
+          import json, os
+
           with open("/tmp/panel.json", encoding="utf-8") as f:
               panel = json.load(f)
           panel_meta = [
@@ -561,11 +598,39 @@ jobs:
           ]
           with open("/tmp/judge-findings.json", encoding="utf-8") as f:
               judge = json.load(f)
-          findings = judge.get("findings") or []
+
+          degraded = judge.get("status") != "ok"
+          if not degraded:
+              findings = judge.get("findings") or []
+          else:
+              # Judge unusable — union the per-cell findings, deduping identical
+              # (file, line, body) across cells to cut the obvious cross-cell
+              # repetition the judge would normally collapse.
+              seen, findings = set(), []
+              for cell in panel:
+                  if cell.get("status") != "ok":
+                      continue
+                  for fnd in cell.get("findings") or []:
+                      if not isinstance(fnd, dict):
+                          continue
+                      key = (fnd.get("file"), fnd.get("line"), fnd.get("body"))
+                      if key in seen:
+                          continue
+                      seen.add(key)
+                      findings.append(fnd)
+
           consolidated = {"findings": findings, "panel": panel_meta}
           with open("/tmp/consolidated.json", "w", encoding="utf-8") as f:
               json.dump(consolidated, f)
-          print(f"Consolidated: {len(findings)} judge finding(s), {len(panel_meta)} panel cells.")
+
+          with open(os.environ["GITHUB_OUTPUT"], "a") as g:
+              g.write(f"consolidated_count={len(findings)}\n")
+              g.write(f"degraded={'true' if degraded else 'false'}\n")
+          print(
+              f"Consolidated: {len(findings)} finding(s) "
+              f"({'degraded panel fallback' if degraded else 'judge-adjudicated'}), "
+              f"{len(panel_meta)} panel cells."
+          )
           PY
 
       - name: Resolve triggerer
@@ -610,12 +675,21 @@ jobs:
           HEAD_SHA: ${{ github.event.pull_request.head.sha }}
           TRIGGERED_BY: ${{ steps.triggerer.outputs.triggered_by }}
           JUDGE_STATUS: ${{ steps.consolidated.outputs.judge_status }}
+          CONSOLIDATED_COUNT: ${{ steps.consolidated.outputs.consolidated_count }}
         run: |
-          # If the judge call itself failed but the panel had content, post
-          # an error review so we don't silently misreport. The "ok with
-          # zero findings" path stays normal — that's the legitimate
-          # "no high-signal findings" case.
-          if [ "$JUDGE_STATUS" != "ok" ]; then
+          # Three cases:
+          # 1. Judge ok                       → normal consolidated review.
+          # 2. Judge failed, panel has findings → post the raw per-cell findings
+          #    with a notice banner (degraded, but NOT a bare "Review failed").
+          # 3. Judge failed, no findings       → honest error review.
+          if [ "$JUDGE_STATUS" = "ok" ]; then
+            python3 "$CURSOR_REVIEW_ASSETS/post-review.py" \
+              --findings /tmp/consolidated.json \
+              --pr-number "$PR_NUMBER" \
+              --repo "$REPO" \
+              --commit-sha "$HEAD_SHA" \
+              --triggered-by "$TRIGGERED_BY"
+          elif [ "${CONSOLIDATED_COUNT:-0}" != "0" ]; then
             JUDGE_ERROR=$(python3 -c "import json; print(json.load(open('/tmp/judge-findings.json')).get('error','(no error message)'))")
             python3 "$CURSOR_REVIEW_ASSETS/post-review.py" \
               --findings /tmp/consolidated.json \
@@ -623,14 +697,16 @@ jobs:
               --repo "$REPO" \
               --commit-sha "$HEAD_SHA" \
               --triggered-by "$TRIGGERED_BY" \
-              --error-message "Judge call failed (status=${JUDGE_STATUS}): ${JUDGE_ERROR}"
+              --notice "⚠️ The judge step could not produce a parseable verdict after a retry (status=${JUDGE_STATUS}). The findings below are the raw, un-adjudicated panel output — they may contain duplicates or false positives the judge would normally filter."
           else
+            JUDGE_ERROR=$(python3 -c "import json; print(json.load(open('/tmp/judge-findings.json')).get('error','(no error message)'))")
             python3 "$CURSOR_REVIEW_ASSETS/post-review.py" \
               --findings /tmp/consolidated.json \
               --pr-number "$PR_NUMBER" \
               --repo "$REPO" \
               --commit-sha "$HEAD_SHA" \
-              --triggered-by "$TRIGGERED_BY"
+              --triggered-by "$TRIGGERED_BY" \
+              --error-message "Judge call failed (status=${JUDGE_STATUS}): ${JUDGE_ERROR}"
           fi
 
   notify-start:
diff --git a/.github/workflows/test-cursor-review-scripts.yml b/.github/workflows/test-cursor-review-scripts.yml
new file mode 100644
index 0000000..fde5e6e
--- /dev/null
+++ b/.github/workflows/test-cursor-review-scripts.yml
@@ -0,0 +1,38 @@
+name: Test cursor-review scripts
+
+# Runs the Python unit tests for the cursor-review helper scripts (the
+# extract-findings.py JSON parser in particular). These scripts drive the
+# review panel + judge consolidation, so a parser regression silently breaks
+# every consumer repo's review — cheap to guard with a unit run on change.
+
+on:
+  pull_request:
+    paths:
+      - '.github/cursor-review/**'
+      - '.github/workflows/test-cursor-review-scripts.yml'
+  push:
+    branches: [main]
+    paths:
+      - '.github/cursor-review/**'
+      - '.github/workflows/test-cursor-review-scripts.yml'
+
+permissions:
+  contents: read
+
+jobs:
+  test:
+    name: unittest
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v6
+        with:
+          persist-credentials: false
+
+      - name: Set up Python
+        uses: actions/setup-python@v6
+        with:
+          python-version: '3.12'
+
+      - name: Run cursor-review script tests
+        run: python3 -m unittest discover -s .github/cursor-review/tests -p 'test_*.py' -v