diff --git a/.gitignore b/.gitignore
index 4782d94..928ed09 100644
--- a/.gitignore
+++ b/.gitignore
@@ -30,6 +30,18 @@ spotify_duration_rejections.csv
 spotify_album_context_audit.csv
 spotify_orphaned_tracks.csv
 
+# Commons imagery audit artifacts (regenerate via scripts/audit_commons_imagery.py
+# + scripts/build_commons_audit_viewer.py). Transient worklists, not source.
+commons_imagery_audit_*.csv
+commons_imagery_audit_*.html
+
+# Performer->Wikipedia ground-truth pipeline. The verification *queue* and the
+# generated viewer HTML are regenerable scratch; the human-verified ground-truth
+# export (performer_wikipedia_groundtruth*.json) is NOT ignored and should be
+# committed — it's the authoritative manual dataset.
+data/ground_truth/wikipedia_queue_*.json
+data/ground_truth/*.html
+
 # Claude Code local state (agent memory, worktrees, per-user settings)
 .claude/
 
diff --git a/backend/core/commons_imagery.py b/backend/core/commons_imagery.py
index 124188a..e7b19e1 100644
--- a/backend/core/commons_imagery.py
+++ b/backend/core/commons_imagery.py
@@ -204,71 +204,86 @@ def _to_int(v) -> Optional[int]:
 # Category resolution
 # ---------------------------------------------------------------------------
 
-def resolve_commons_category(session: requests.Session, artist_name: str,
-                             wikipedia_url: Optional[str] = None) -> Optional[str]:
-    """Find the performer's Wikimedia Commons category via Wikidata P373,
-    falling back to a verified "Category:<Name>" guess."""
-    qid = None
-    if wikipedia_url:
-        m = re.search(r"/wiki/(.+)$", wikipedia_url)
-        if m:
-            title = requests.utils.unquote(m.group(1))
-            try:
-                r = session.get(WIKIPEDIA_API, params={
-                    "action": "query", "format": "json", "titles": title,
-                    "prop": "pageprops", "ppprop": "wikibase_item",
-                }, timeout=15)
-                pages = r.json().get("query", {}).get("pages", {})
-                page = next(iter(pages.values()), {})
-                qid = page.get("pageprops", {}).get("wikibase_item")
-            except Exception as e:
-                logger.debug("Wikipedia->QID lookup failed: %s", e)
+def _wikidata_claims(session: requests.Session,
+                     qids: List[str]) -> Dict[str, dict]:
+    """Fetch claims for one or more Wikidata QIDs (single batched request)."""
+    if not qids:
+        return {}
+    try:
+        r = session.get(WIKIDATA_API, params={
+            "action": "wbgetentities", "ids": "|".join(qids),
+            "format": "json", "props": "claims",
+        }, timeout=15)
+        return r.json().get("entities", {})
+    except Exception as e:
+        logger.debug("Wikidata claims lookup failed: %s", e)
+        return {}
 
-    if not qid:
-        try:
-            r = session.get(WIKIDATA_API, params={
-                "action": "wbsearchentities", "search": artist_name,
-                "language": "en", "format": "json", "type": "item", "limit": 5,
-            }, timeout=15)
-            hits = r.json().get("search", [])
-            qid = hits[0]["id"] if hits else None
-        except Exception as e:
-            logger.debug("Wikidata search failed: %s", e)
 
-    if qid:
-        try:
-            r = session.get(WIKIDATA_API, params={
-                "action": "wbgetentities", "ids": qid, "format": "json",
-                "props": "claims",
-            }, timeout=15)
-            claims = r.json().get("entities", {}).get(qid, {}).get("claims", {})
-            p373 = claims.get("P373")
-            if p373:
-                cat = p373[0]["mainsnak"]["datavalue"]["value"]
-                logger.info("Resolved Commons category via Wikidata %s: %s", qid, cat)
-                return f"Category:{cat}"
-        except Exception as e:
-            logger.debug("Wikidata P373 lookup failed: %s", e)
+def _commons_category_from_claims(claims: Dict[str, Any]) -> Optional[str]:
+    """Return the entity's Commons category (P373) as a Category:<…> title."""
+    p373 = claims.get("P373")
+    if not p373:
+        return None
+    try:
+        return f"Category:{p373[0]['mainsnak']['datavalue']['value']}"
+    except (KeyError, TypeError):
+        return None
 
-    guess = f"Category:{artist_name}"
-    if _category_exists(session, guess):
-        logger.info("Using guessed Commons category: %s", guess)
-        return guess
 
-    logger.warning("Could not resolve a Commons category for %r", artist_name)
-    return None
+def resolve_commons_category(session: requests.Session, artist_name: str,
+                             wikipedia_url: Optional[str] = None) -> Optional[str]:
+    """Resolve the performer's Commons category via their Wikipedia article.
+
+    We deliberately resolve imagery ONLY for performers that have a Wikipedia
+    URL. The app already does significant work to attach the *correct* Wikipedia
+    article to a performer, so it is a trusted identity anchor: its canonical
+    Wikidata item gives us the right Commons category (P373).
+
+    There is NO name-based fallback. Searching Wikidata by a bare name is
+    unreliable for common names — "Andrew Williams" matches an archaeologist
+    whose Commons category is full of catalogued coin photos, not a jazz
+    musician — and a same-name match (even a verified human) silently feeds
+    unrelated images into the ranking pipeline. A performer without a Wikipedia
+    link simply gets no Commons imagery; no imagery beats the wrong imagery.
+    """
+    if not wikipedia_url:
+        logger.info("No Wikipedia URL for %r; skipping Commons imagery", artist_name)
+        return None
 
+    m = re.search(r"/wiki/(.+)$", wikipedia_url)
+    if not m:
+        logger.warning("Unparseable Wikipedia URL %r for %r; skipping imagery",
+                       wikipedia_url, artist_name)
+        return None
 
-def _category_exists(session: requests.Session, category: str) -> bool:
+    title = requests.utils.unquote(m.group(1))
     try:
-        r = session.get(COMMONS_API, params={
-            "action": "query", "format": "json", "titles": category,
-            "prop": "info",
+        r = session.get(WIKIPEDIA_API, params={
+            "action": "query", "format": "json", "titles": title,
+            "prop": "pageprops", "ppprop": "wikibase_item",
         }, timeout=15)
         pages = r.json().get("query", {}).get("pages", {})
-        return all(int(pid) > 0 for pid in pages)
-    except Exception:
-        return False
+        page = next(iter(pages.values()), {})
+        qid = page.get("pageprops", {}).get("wikibase_item")
+    except Exception as e:
+        logger.debug("Wikipedia->QID lookup failed: %s", e)
+        return None
+
+    if not qid:
+        logger.info("Wikipedia article for %r has no Wikidata item; "
+                    "skipping imagery", artist_name)
+        return None
+
+    claims = _wikidata_claims(session, [qid]).get(qid, {}).get("claims", {})
+    cat = _commons_category_from_claims(claims)
+    if cat:
+        logger.info("Resolved Commons category via Wikidata %s: %s", qid, cat)
+        return cat
+
+    logger.info("Wikidata %s for %r has no Commons category (P373); "
+                "skipping imagery", qid, artist_name)
+    return None
 
 
 # ---------------------------------------------------------------------------
@@ -573,7 +588,13 @@ def analyze_and_rank(
     # processed one at a time and NOT retained — holding every candidate's
     # full-res bytes at once is what OOM'd the worker. The few images that get
     # reranked are re-downloaded in phase 3.
-    for r in records:
+    #
+    # This loop is the slowest silent stretch (a download + decode + analysis
+    # per candidate, up to max_candidates), so emit periodic progress to make
+    # it obvious the worker is alive rather than hung.
+    total = len(records)
+    logger.info("Phase 1: downloading + gating %d candidate(s)", total)
+    for i, r in enumerate(records, 1):
         img_bytes = download(session, r.url) or download(session, r.thumbnail_url)
         if not img_bytes:
             r.analysis = {"passed_gate": False, "reasons": ["download failed"],
@@ -592,6 +613,8 @@ def analyze_and_rank(
                                        vision=None).to_dict()
         r.quality_score = score
         del img_bytes  # release immediately
+        if i % 20 == 0 or i == total:
+            logger.info("Phase 1: analyzed %d/%d candidate(s)", i, total)
 
     # Phase 2: gate filter
     if config.do_gate:
diff --git a/backend/core/performer_commons_imagery.py b/backend/core/performer_commons_imagery.py
index 1c83f07..26c52e4 100644
--- a/backend/core/performer_commons_imagery.py
+++ b/backend/core/performer_commons_imagery.py
@@ -38,13 +38,24 @@
 def find_candidate_performer_ids(stale_days: int = DEFAULT_STALE_DAYS,
                                  limit: Optional[int] = None) -> list[str]:
     """UUIDs of performers due for an imagery (re)check: never checked, or
-    last checked more than `stale_days` ago. Newest performers first."""
+    last checked more than `stale_days` ago. Newest performers first.
+
+    Restricted to performers that have a Wikipedia URL (column or
+    external_links.wikipedia). The Commons resolver only trusts a performer's
+    validated Wikipedia article as an identity anchor — name-based matching is
+    unreliable for common names — so performers without one would be a guaranteed
+    no-op. Skipping them here avoids spending worker cycles and vision quota on
+    jobs that can never add imagery."""
     limit_clause = "LIMIT %s" if limit is not None else ""
     sql = f"""
         SELECT id
         FROM performers
-        WHERE last_imagery_check IS NULL
-           OR last_imagery_check < now() - make_interval(days => %s)
+        WHERE (last_imagery_check IS NULL
+               OR last_imagery_check < now() - make_interval(days => %s))
+          AND (
+                btrim(COALESCE(wikipedia_url, '')) <> ''
+             OR btrim(COALESCE(external_links->>'wikipedia', '')) <> ''
+          )
         ORDER BY created_at DESC
         {limit_clause}
     """
diff --git a/backend/scripts/audit_commons_imagery.py b/backend/scripts/audit_commons_imagery.py
new file mode 100644
index 0000000..6813a66
--- /dev/null
+++ b/backend/scripts/audit_commons_imagery.py
@@ -0,0 +1,232 @@
+#!/usr/bin/env python3
+"""
+Audit performer Commons imagery for wrong-category contamination.
+
+Background: earlier runs of the Commons imagery enricher fell back to a blind
+``Category:<Name>`` guess when Wikidata had no Commons-category claim. For
+common names that picked up an *unrelated* same-named person's category (e.g.
+an archaeologist's catalogued coin finds rather than photos of a musician).
+The resolver no longer does this — it requires either the performer's own
+Wikipedia article or a Wikidata hit verified as a human (P31=Q5) — but images
+already linked by the old path are still in the database.
+
+This script is READ-ONLY. For every performer that currently has at least one
+``wikimedia_commons`` image it:
+
+  1. Re-runs the *current* resolver (core.commons_imagery.resolve_commons_category)
+     with the same Wikipedia/external-link inputs the worker uses.
+  2. Classifies each existing Commons image:
+       NO_CATEGORY            - resolver now returns nothing, so the image could
+                                only have come from the removed guess path.
+                                Whole performer is suspect.
+       NOT_IN_RESOLVED_CATEGORY - resolver returns a category, but this image's
+                                Commons pageid is NOT a member of it (walked at
+                                the worker's recurse depth of 0). Strong signal
+                                the image came from a different/old category.
+       OK                     - image's pageid is in the resolved category.
+
+Flagged rows are written to a CSV worklist for manual review / cleanup. The
+script never deletes anything.
+
+Usage:
+    python scripts/audit_commons_imagery.py                     # full sweep
+    python scripts/audit_commons_imagery.py --limit 200
+    python scripts/audit_commons_imagery.py --name "Andrew Williams"
+    python scripts/audit_commons_imagery.py --id <performer-uuid>
+    python scripts/audit_commons_imagery.py --since 2026-06-09T17:00:00
+    python scripts/audit_commons_imagery.py --all -o all_commons.csv
+"""
+
+import csv
+from datetime import datetime
+
+from script_base import ScriptBase, run_script
+from db_utils import get_db_connection
+from core import commons_imagery as ci
+
+# Mirror the worker's GatherConfig recurse depth (research_worker/handlers/commons.py
+# builds GatherConfig() without overriding recurse_subcats, so it stays 0).
+_RECURSE_SUBCATS = ci.GatherConfig().recurse_subcats
+_ACCEPTED_LICENSES = list(ci.GatherConfig().licenses)
+
+_PERFORMERS_WITH_COMMONS_SQL = """
+    SELECT
+        p.id,
+        p.name,
+        p.wikipedia_url,
+        p.external_links,
+        i.id   AS image_id,
+        i.url  AS image_url,
+        i.source_identifier,
+        i.source_page_url,
+        ai.is_primary,
+        ai.created_at AS linked_at
+    FROM artist_images ai
+    JOIN images i      ON i.id = ai.image_id
+    JOIN performers p  ON p.id = ai.performer_id
+    WHERE i.source = 'wikimedia_commons'
+    {where}
+    ORDER BY p.name, ai.display_order
+"""
+
+
+def _wikipedia_url(row) -> str | None:
+    """Same precedence the worker uses: explicit column, then external_links."""
+    direct = (row.get("wikipedia_url") or "").strip()
+    if direct:
+        return direct
+    links = row.get("external_links") or {}
+    if isinstance(links, dict):
+        return (links.get("wikipedia") or "").strip() or None
+    return None
+
+
+def _load_rows(name=None, performer_id=None, since=None, limit=None):
+    clauses, params = [], []
+    if performer_id:
+        clauses.append("p.id = %s")
+        params.append(performer_id)
+    if name:
+        clauses.append("LOWER(p.name) = LOWER(%s)")
+        params.append(name)
+    if since:
+        clauses.append("ai.created_at >= %s")
+        params.append(since)
+    where = ("AND " + " AND ".join(clauses)) if clauses else ""
+    sql = _PERFORMERS_WITH_COMMONS_SQL.format(where=where)
+    if limit:
+        sql += "\n    LIMIT %s"
+        params.append(limit)
+    with get_db_connection() as conn:
+        with conn.cursor() as cur:
+            cur.execute(sql, tuple(params))
+            return cur.fetchall()
+
+
+def _group_by_performer(rows):
+    """rows -> {performer_id: {"meta": row, "images": [rows]}} preserving order."""
+    grouped = {}
+    for r in rows:
+        pid = str(r["id"])
+        grouped.setdefault(pid, {"meta": r, "images": []})["images"].append(r)
+    return grouped
+
+
+def _resolved_category_pageids(session, category):
+    """Return the set of Commons pageids in `category` (as strings)."""
+    records = ci.fetch_commons_category_files(
+        session, category, _ACCEPTED_LICENSES, include_nkcr=False,
+        recurse_subcats=_RECURSE_SUBCATS,
+    )
+    return {str(r.source_identifier) for r in records}
+
+
+def main() -> bool:
+    script = ScriptBase(
+        name="audit_commons_imagery",
+        description="Audit performer Commons imagery for wrong-category contamination",
+        epilog=__doc__,
+    )
+    group = script.parser.add_mutually_exclusive_group(required=False)
+    group.add_argument("--name", help="Audit a single performer by name")
+    group.add_argument("--id", help="Audit a single performer by UUID")
+    script.parser.add_argument("--since", default=None,
+                               help="Only images linked at/after this ISO timestamp "
+                                    "(e.g. 2026-06-09T17:00:00). Useful to focus on "
+                                    "a specific enrichment run.")
+    script.parser.add_argument("--limit", type=int, default=None,
+                               help="Cap the number of image rows scanned")
+    script.parser.add_argument("--all", action="store_true",
+                               help="Include OK rows in the CSV (default: flagged only)")
+    script.parser.add_argument("-o", "--output", default=None,
+                               help="Output CSV path (default: "
+                                    "commons_imagery_audit_<ts>.csv)")
+    args = script.parse_args()
+
+    script.print_header({
+        "SINGLE": args.name or args.id or False,
+        "SINCE": args.since or False,
+        "LIMIT": args.limit or False,
+        "INCLUDE OK": args.all,
+    })
+
+    rows = _load_rows(name=args.name, performer_id=args.id,
+                      since=args.since, limit=args.limit)
+    if not rows:
+        script.logger.info("No wikimedia_commons images matched the filters.")
+        return True
+
+    grouped = _group_by_performer(rows)
+    script.logger.info("Scanning %d image(s) across %d performer(s)",
+                       len(rows), len(grouped))
+
+    session = ci.make_session()
+    out_path = args.output or (
+        f"commons_imagery_audit_{datetime.now():%Y%m%d_%H%M%S}.csv")
+
+    counts = {"NO_CATEGORY": 0, "NOT_IN_RESOLVED_CATEGORY": 0, "OK": 0}
+    fieldnames = [
+        "performer_id", "performer_name", "verdict", "resolved_category",
+        "image_id", "image_url", "source_identifier", "source_page_url",
+        "is_primary", "linked_at",
+    ]
+    written = 0
+
+    with open(out_path, "w", newline="") as fh:
+        writer = csv.DictWriter(fh, fieldnames=fieldnames)
+        writer.writeheader()
+
+        for pid, bundle in grouped.items():
+            meta = bundle["meta"]
+            name = meta["name"]
+            category = ci.resolve_commons_category(
+                session, name, _wikipedia_url(meta))
+
+            member_pageids = set()
+            if category:
+                try:
+                    member_pageids = _resolved_category_pageids(session, category)
+                except Exception as e:  # network/category hiccup -> don't crash the sweep
+                    script.logger.warning(
+                        "Could not list %s for %s (%s); treating members as unknown",
+                        category, name, e)
+
+            for img in bundle["images"]:
+                if category is None:
+                    verdict = "NO_CATEGORY"
+                elif str(img["source_identifier"]) in member_pageids:
+                    verdict = "OK"
+                else:
+                    # Either the image isn't in the resolved category, or the
+                    # category couldn't be enumerated above (member_pageids
+                    # empty) — both warrant a manual look rather than a pass.
+                    verdict = "NOT_IN_RESOLVED_CATEGORY"
+                counts[verdict] += 1
+
+                if verdict == "OK" and not args.all:
+                    continue
+                writer.writerow({
+                    "performer_id": pid,
+                    "performer_name": name,
+                    "verdict": verdict,
+                    "resolved_category": category or "",
+                    "image_id": str(img["image_id"]),
+                    "image_url": img["image_url"],
+                    "source_identifier": img["source_identifier"],
+                    "source_page_url": img["source_page_url"] or "",
+                    "is_primary": img["is_primary"],
+                    "linked_at": img["linked_at"].isoformat() if img["linked_at"] else "",
+                })
+                written += 1
+
+    flagged = counts["NO_CATEGORY"] + counts["NOT_IN_RESOLVED_CATEGORY"]
+    script.logger.info("Done. flagged=%d (NO_CATEGORY=%d, "
+                       "NOT_IN_RESOLVED_CATEGORY=%d), ok=%d",
+                       flagged, counts["NO_CATEGORY"],
+                       counts["NOT_IN_RESOLVED_CATEGORY"], counts["OK"])
+    script.logger.info("Wrote %d row(s) to %s", written, out_path)
+    return True
+
+
+if __name__ == "__main__":
+    run_script(main)
diff --git a/backend/scripts/build_commons_audit_viewer.py b/backend/scripts/build_commons_audit_viewer.py
new file mode 100644
index 0000000..2599ccb
--- /dev/null
+++ b/backend/scripts/build_commons_audit_viewer.py
@@ -0,0 +1,319 @@
+#!/usr/bin/env python3
+"""
+Build a self-contained HTML viewer for a commons_imagery_audit CSV.
+
+The audit CSV (output of audit_commons_imagery.py) lists performer Commons
+images flagged for review. NO_CATEGORY means the resolver no longer finds a
+Wikipedia-anchored category for that performer — which is a *mix* of genuinely
+wrong imagery (a same-named person's photos) and genuinely correct imagery from
+performers who simply lack a Wikipedia URL. You can't bulk-delete it; you have
+to look. This viewer makes looking fast.
+
+Output is a single HTML file with the data embedded — no server, no network
+except Commons thumbnails. Open it with `open <file>.html`. It lets you:
+
+  - browse images grouped by performer, as lazy-loaded thumbnails
+  - click an image (or use per-performer bulk actions) to mark it for deletion
+  - mark a performer "reviewed" and hide reviewed ones to track progress
+  - export the marked rows as a delete-list CSV, or copy a ready DELETE
+    statement to run in psql
+
+Marks and review state persist in the browser's localStorage (keyed by the CSV
+filename), so you can close and resume. Nothing is written back to the DB.
+
+Usage:
+    python scripts/build_commons_audit_viewer.py commons_imagery_audit_<ts>.csv
+    python scripts/build_commons_audit_viewer.py audit.csv -o viewer.html
+"""
+
+import argparse
+import csv
+import json
+from pathlib import Path
+
+
+def _thumb_url(row: dict, width: int = 180) -> str:
+    """A small Commons thumbnail via Special:FilePath (avoids full-res fetch)."""
+    spu = row.get("source_page_url") or ""
+    marker = "/wiki/File:"
+    if marker in spu:
+        fname = spu.split(marker, 1)[1]  # already percent-encoded in the CSV
+        return f"https://commons.wikimedia.org/wiki/Special:FilePath/{fname}?width={width}"
+    return row.get("image_url") or ""
+
+
+def _load_records(csv_path: Path) -> list[dict]:
+    records = []
+    with open(csv_path, newline="") as fh:
+        for row in csv.DictReader(fh):
+            records.append({
+                "performer_id": row["performer_id"],
+                "performer_name": row["performer_name"],
+                "verdict": row["verdict"],
+                "resolved_category": row.get("resolved_category") or "",
+                "image_id": row["image_id"],
+                "image_url": row["image_url"],
+                "source_page_url": row.get("source_page_url") or "",
+                "is_primary": str(row.get("is_primary")).lower() == "true",
+                "thumb": _thumb_url(row),
+            })
+    return records
+
+
+_HTML_TEMPLATE = r"""<!DOCTYPE html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width, initial-scale=1">
+<title>__TITLE__</title>
+<style>
+  :root { --bg:#11141a; --panel:#1b212c; --line:#2b3442; --txt:#e6edf3;
+          --muted:#8b97a7; --del:#e5484d; --ok:#3fb950; --accent:#388bfd; }
+  * { box-sizing: border-box; }
+  body { margin:0; font:14px/1.4 -apple-system,BlinkMacSystemFont,"Segoe UI",sans-serif;
+         background:var(--bg); color:var(--txt); }
+  header { position:sticky; top:0; z-index:10; background:var(--panel);
+           border-bottom:1px solid var(--line); padding:10px 14px;
+           display:flex; flex-wrap:wrap; gap:10px; align-items:center; }
+  header h1 { font-size:15px; margin:0 12px 0 0; font-weight:600; }
+  .stat { color:var(--muted); font-size:12px; }
+  .stat b { color:var(--txt); }
+  input[type=search], select { background:var(--bg); color:var(--txt);
+           border:1px solid var(--line); border-radius:6px; padding:5px 8px; font-size:13px; }
+  input[type=search] { min-width:200px; }
+  button { background:var(--bg); color:var(--txt); border:1px solid var(--line);
+           border-radius:6px; padding:5px 10px; font-size:12px; cursor:pointer; }
+  button:hover { border-color:var(--accent); }
+  button.danger:hover { border-color:var(--del); color:var(--del); }
+  label.chk { color:var(--muted); font-size:12px; display:inline-flex; gap:5px; align-items:center; cursor:pointer; }
+  .spacer { flex:1; }
+  main { padding:14px; }
+  .perf { border:1px solid var(--line); border-radius:8px; margin-bottom:14px; background:var(--panel); }
+  .perf.reviewed { opacity:.55; }
+  .perf > .phead { display:flex; flex-wrap:wrap; gap:10px; align-items:center;
+                   padding:9px 12px; border-bottom:1px solid var(--line); position:sticky; top:52px; background:var(--panel); }
+  .pname { font-weight:600; }
+  .badge { font-size:11px; padding:2px 7px; border-radius:10px; border:1px solid var(--line); color:var(--muted); }
+  .badge.cat { color:var(--ok); border-color:#244a2e; }
+  .badge.nocat { color:var(--del); border-color:#4a2426; }
+  .pmeta { color:var(--muted); font-size:12px; }
+  .grid { display:grid; grid-template-columns:repeat(auto-fill,minmax(150px,1fr)); gap:10px; padding:12px; }
+  .card { border:2px solid transparent; border-radius:8px; overflow:hidden; background:var(--bg);
+          cursor:pointer; position:relative; }
+  .card.del { border-color:var(--del); }
+  .card.del .thumb { opacity:.4; }
+  .card .thumb { width:100%; height:150px; object-fit:cover; display:block; background:#0c0f14; }
+  .card .meta { padding:5px 7px; font-size:11px; }
+  .card .fn { color:var(--txt); word-break:break-word; max-height:32px; overflow:hidden; display:block; }
+  .card a { color:var(--accent); text-decoration:none; font-size:11px; }
+  .card .mark { position:absolute; top:6px; left:6px; background:rgba(229,72,77,.92);
+                color:#fff; font-size:10px; padding:1px 6px; border-radius:4px; display:none; }
+  .card.del .mark { display:block; }
+  .card .star { position:absolute; top:6px; right:6px; font-size:13px; color:#ffd33d; display:none; }
+  .card.primary .star { display:block; }
+  .hidden { display:none !important; }
+  .empty { color:var(--muted); padding:30px; text-align:center; }
+</style>
+</head>
+<body>
+<header>
+  <h1>Commons imagery audit</h1>
+  <span class="stat"><b id="sPerf">0</b> performers · <b id="sImg">0</b> images ·
+        <b id="sDel" style="color:var(--del)">0</b> marked · reviewed <b id="sRev">0</b>/<b id="sPerfTot">0</b></span>
+  <span class="spacer"></span>
+  <input type="search" id="q" placeholder="filter by performer name…">
+  <select id="vf">
+    <option value="">all verdicts</option>
+    <option value="NO_CATEGORY">NO_CATEGORY</option>
+    <option value="NOT_IN_RESOLVED_CATEGORY">NOT_IN_RESOLVED_CATEGORY</option>
+  </select>
+  <label class="chk"><input type="checkbox" id="hideRev"> hide reviewed</label>
+  <button id="expCsv">Export delete CSV</button>
+  <button id="expSql">Copy DELETE SQL</button>
+  <button id="clear" class="danger">Clear marks</button>
+</header>
+<main id="main"></main>
+<script>
+const DATA = /*DATA*/;
+const KEY = "__STORAGE_KEY__";
+const load = (k, d) => { try { return JSON.parse(localStorage.getItem(KEY+":"+k)) ?? d; } catch(e){ return d; } };
+const save = (k, v) => localStorage.setItem(KEY+":"+k, JSON.stringify(v));
+
+let marks = new Set(load("marks", []));        // image_id -> delete
+let reviewed = new Set(load("reviewed", []));  // performer_id -> reviewed
+const persist = () => { save("marks",[...marks]); save("reviewed",[...reviewed]); refreshStats(); };
+
+// group rows by performer, preserving CSV order
+const groups = [];
+const byId = new Map();
+for (const r of DATA) {
+  let g = byId.get(r.performer_id);
+  if (!g) { g = {id:r.performer_id, name:r.performer_name, cat:r.resolved_category, verdicts:new Set(), rows:[]};
+            byId.set(r.performer_id, g); groups.push(g); }
+  g.rows.push(r); g.verdicts.add(r.verdict);
+}
+groups.sort((a,b)=> a.name.toLowerCase().localeCompare(b.name.toLowerCase()));
+
+const main = document.getElementById("main");
+const esc = s => (s||"").replace(/[&<>"]/g, c=>({"&":"&amp;","<":"&lt;",">":"&gt;","\"":"&quot;"}[c]));
+
+function render() {
+  const q = document.getElementById("q").value.trim().toLowerCase();
+  const vf = document.getElementById("vf").value;
+  const hideRev = document.getElementById("hideRev").checked;
+  main.innerHTML = "";
+  let shown = 0;
+  for (const g of groups) {
+    if (q && !g.name.toLowerCase().includes(q)) continue;
+    if (vf && !g.verdicts.has(vf)) continue;
+    if (hideRev && reviewed.has(g.id)) continue;
+    shown++;
+    const rev = reviewed.has(g.id);
+    const sec = document.createElement("section");
+    sec.className = "perf" + (rev ? " reviewed" : "");
+    const catBadge = g.cat
+      ? `<span class="badge cat">${esc(g.cat)}</span>`
+      : `<span class="badge nocat">no Wikipedia category</span>`;
+    sec.innerHTML = `
+      <div class="phead">
+        <span class="pname">${esc(g.name)}</span>
+        <span class="pmeta">${g.rows.length} image(s)</span>
+        ${catBadge}
+        <span class="spacer"></span>
+        <button data-act="delall">Mark all delete</button>
+        <button data-act="keepall">Keep all</button>
+        <label class="chk"><input type="checkbox" data-act="rev" ${rev?"checked":""}> reviewed</label>
+      </div>
+      <div class="grid"></div>`;
+    const grid = sec.querySelector(".grid");
+    for (const r of g.rows) {
+      const card = document.createElement("div");
+      card.className = "card" + (marks.has(r.image_id)?" del":"") + (r.is_primary?" primary":"");
+      card.dataset.img = r.image_id;
+      const fn = decodeURIComponent((r.source_page_url.split("/wiki/File:")[1])||r.image_id);
+      card.innerHTML = `
+        <span class="mark">DELETE</span><span class="star" title="primary">★</span>
+        <img class="thumb" loading="lazy" src="${esc(r.thumb)}" alt="">
+        <div class="meta">
+          <span class="fn">${esc(fn)}</span>
+          <a href="${esc(r.source_page_url||r.image_url)}" target="_blank" rel="noopener">Commons ↗</a>
+        </div>`;
+      card.querySelector(".thumb").addEventListener("error", e => { e.target.style.opacity=.15; });
+      card.addEventListener("click", ev => {
+        if (ev.target.tagName === "A") return;
+        toggle(r.image_id, card);
+      });
+      grid.appendChild(card);
+    }
+    sec.querySelector('[data-act=delall]').onclick = () => {
+      g.rows.forEach(r => marks.add(r.image_id)); persist(); render();
+    };
+    sec.querySelector('[data-act=keepall]').onclick = () => {
+      g.rows.forEach(r => marks.delete(r.image_id)); persist(); render();
+    };
+    sec.querySelector('[data-act=rev]').onchange = e => {
+      if (e.target.checked) reviewed.add(g.id); else reviewed.delete(g.id);
+      persist();
+      if (document.getElementById("hideRev").checked) render();
+      else sec.classList.toggle("reviewed", e.target.checked);
+    };
+    main.appendChild(sec);
+  }
+  if (!shown) main.innerHTML = '<div class="empty">No performers match the current filter.</div>';
+}
+
+function toggle(imgId, card) {
+  if (marks.has(imgId)) marks.delete(imgId); else marks.add(imgId);
+  card.classList.toggle("del", marks.has(imgId));
+  persist();
+}
+
+function refreshStats() {
+  document.getElementById("sPerf").textContent = groups.length;
+  document.getElementById("sPerfTot").textContent = groups.length;
+  document.getElementById("sImg").textContent = DATA.length;
+  document.getElementById("sDel").textContent = marks.size;
+  document.getElementById("sRev").textContent = reviewed.size;
+}
+
+function markedRows() { return DATA.filter(r => marks.has(r.image_id)); }
+
+function download(name, text, type) {
+  const blob = new Blob([text], {type});
+  const a = document.createElement("a");
+  a.href = URL.createObjectURL(blob); a.download = name; a.click();
+  URL.revokeObjectURL(a.href);
+}
+
+document.getElementById("expCsv").onclick = () => {
+  const rows = markedRows();
+  if (!rows.length) return alert("Nothing marked for deletion.");
+  const csv = "performer_id,image_id\n" + rows.map(r=>`${r.performer_id},${r.image_id}`).join("\n") + "\n";
+  download("commons_delete_list.csv", csv, "text/csv");
+};
+
+document.getElementById("expSql").onclick = async () => {
+  const rows = markedRows();
+  if (!rows.length) return alert("Nothing marked for deletion.");
+  const vals = rows.map(r=>`  ('${r.performer_id}'::uuid,'${r.image_id}'::uuid)`).join(",\n");
+  const sql =
+`-- ${rows.length} artist_images link(s) marked for deletion
+DELETE FROM artist_images ai
+USING (VALUES
+${vals}
+) AS f(performer_id, image_id)
+WHERE ai.performer_id = f.performer_id AND ai.image_id = f.image_id;`;
+  try { await navigator.clipboard.writeText(sql); alert(`Copied DELETE for ${rows.length} link(s) to clipboard.`); }
+  catch(e) { download("commons_delete.sql", sql, "text/plain"); }
+};
+
+document.getElementById("clear").onclick = () => {
+  if (!marks.size || !confirm(`Clear all ${marks.size} deletion marks?`)) return;
+  marks.clear(); persist(); render();
+};
+
+document.getElementById("q").addEventListener("input", render);
+document.getElementById("vf").addEventListener("change", render);
+document.getElementById("hideRev").addEventListener("change", render);
+
+refreshStats();
+render();
+</script>
+</body>
+</html>
+"""
+
+
+def build_html(records: list[dict], title: str, storage_key: str) -> str:
+    data_json = json.dumps(records, ensure_ascii=False)
+    return (_HTML_TEMPLATE
+            .replace("__TITLE__", title)
+            .replace("__STORAGE_KEY__", storage_key)
+            .replace("/*DATA*/", data_json))
+
+
+def main() -> None:
+    p = argparse.ArgumentParser(
+        description="Build a self-contained HTML viewer for an audit CSV.",
+        formatter_class=argparse.RawDescriptionHelpFormatter, epilog=__doc__)
+    p.add_argument("csv", help="commons_imagery_audit_<ts>.csv path")
+    p.add_argument("-o", "--output", default=None,
+                   help="Output HTML path (default: <csv>.html)")
+    args = p.parse_args()
+
+    csv_path = Path(args.csv)
+    if not csv_path.exists():
+        raise SystemExit(f"No such file: {csv_path}")
+
+    records = _load_records(csv_path)
+    out_path = Path(args.output) if args.output else csv_path.with_suffix(".html")
+    html = build_html(records, title=csv_path.name, storage_key=csv_path.stem)
+    out_path.write_text(html, encoding="utf-8")
+
+    performers = len({r["performer_id"] for r in records})
+    print(f"Wrote {out_path} — {len(records)} image(s) across {performers} performer(s)")
+    print(f"Open it with:  open {out_path}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/backend/scripts/build_wikipedia_groundtruth_queue.py b/backend/scripts/build_wikipedia_groundtruth_queue.py
new file mode 100644
index 0000000..a9b807c
--- /dev/null
+++ b/backend/scripts/build_wikipedia_groundtruth_queue.py
@@ -0,0 +1,331 @@
+#!/usr/bin/env python3
+"""
+Build a verification *queue* for manually grounding performer -> Wikipedia links.
+
+Target: performers that HAVE Commons imagery but NO Wikipedia link on record.
+These are the highest-value enrichment opportunities — we already have visual
+evidence of who the person is (the Commons photos), we just haven't recorded the
+authoritative Wikipedia article. A human can confirm the link quickly and that
+confirmation becomes ground truth (a different, more trustworthy class of data
+than anything a crawler guesses).
+
+For each such performer this script derives candidate Wikipedia links:
+
+  1. CATEGORY-DERIVED (the "implicit" link): walk the Commons categories the
+     performer's own image files sit in, map each category to its Wikidata item
+     (Commons pageprops.wikibase_item), and take that item's English Wikipedia
+     sitelink. This ties the candidate directly to the evidence we already hold.
+  2. NAME-SEARCH FALLBACK: when no category yields a real biography article,
+     search Wikidata by the performer's name and surface human/group hits that
+     do have an English Wikipedia article.
+
+Junk is filtered: "Wikimedia category" items (P31=Q4167836) and sitelinks that
+are themselves Category: pages are dropped — they are topic categories, not
+people (e.g. Category:Public speaking).
+
+Output is a queue JSON under data/ground_truth/, consumed by
+build_wikipedia_groundtruth_viewer.py to produce the human-verification UI.
+This script only READS the database and public Wikimedia APIs; it writes nothing
+back. The human's decisions become the ground-truth file (exported from the
+viewer) — this is just the worklist.
+
+Usage:
+    python scripts/build_wikipedia_groundtruth_queue.py --limit 50
+    python scripts/build_wikipedia_groundtruth_queue.py            # full subset
+    python scripts/build_wikipedia_groundtruth_queue.py -o /tmp/queue.json
+"""
+
+import argparse
+import json
+import logging
+import sys
+import time
+from datetime import datetime, timezone
+from pathlib import Path
+
+from dotenv import load_dotenv
+
+REPO_ROOT = Path(__file__).resolve().parents[2]
+BACKEND_DIR = Path(__file__).resolve().parents[1]
+sys.path.insert(0, str(BACKEND_DIR))  # make core.* / db_utils importable (script_base does this too)
+load_dotenv(BACKEND_DIR / ".env")
+
+from core import commons_imagery as ci  # noqa: E402  (session + endpoint constants)
+from db_utils import get_db_connection  # noqa: E402
+
+logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
+logger = logging.getLogger("groundtruth_queue")
+
+WIKIMEDIA_CATEGORY_QID = "Q4167836"   # "Wikimedia category" — a topic category, not a person
+MAX_FILES_FOR_CATEGORIES = 3          # category-walk this many of a performer's files
+MAX_EVIDENCE_IMAGES = 8               # thumbnails shown to the reviewer
+NAME_SEARCH_LIMIT = 7
+
+_SUBSET_SQL = """
+    SELECT p.id, p.name, i.url, i.source_page_url, ai.is_primary, ai.display_order
+    FROM performers p
+    JOIN artist_images ai ON ai.performer_id = p.id
+    JOIN images i         ON i.id = ai.image_id
+    WHERE i.source = 'wikimedia_commons'
+      AND btrim(COALESCE(p.wikipedia_url, '')) = ''
+      AND btrim(COALESCE(p.external_links->>'wikipedia', '')) = ''
+    ORDER BY p.name, ai.is_primary DESC, ai.display_order
+"""
+
+
+# --------------------------------------------------------------------------- #
+# Wikimedia helpers (with in-run caches; many performers share categories/QIDs)
+# --------------------------------------------------------------------------- #
+
+class WM:
+    def __init__(self, session, delay: float):
+        self.s = session
+        self.delay = delay
+        self._cat_qid: dict = {}
+        self._qid_info: dict = {}
+        self._name_search: dict = {}
+
+    def _get(self, url, params):
+        if self.delay:
+            time.sleep(self.delay)
+        try:
+            return self.s.get(url, params=params, timeout=20).json()
+        except Exception as e:
+            logger.debug("API error %s: %s", url, e)
+            return {}
+
+    def file_categories(self, file_title: str) -> list[str]:
+        j = self._get(ci.COMMONS_API, {
+            "action": "query", "format": "json", "titles": file_title,
+            "prop": "categories", "cllimit": "max", "clshow": "!hidden",
+        })
+        pg = next(iter(j.get("query", {}).get("pages", {}).values()), {})
+        return [c["title"] for c in pg.get("categories", [])]
+
+    def category_qid(self, cat_title: str) -> str | None:
+        if cat_title in self._cat_qid:
+            return self._cat_qid[cat_title]
+        j = self._get(ci.COMMONS_API, {
+            "action": "query", "format": "json", "titles": cat_title,
+            "prop": "pageprops", "ppprop": "wikibase_item",
+        })
+        pg = next(iter(j.get("query", {}).get("pages", {}).values()), {})
+        qid = pg.get("pageprops", {}).get("wikibase_item")
+        self._cat_qid[cat_title] = qid
+        return qid
+
+    def qid_info(self, qid: str) -> dict:
+        if qid in self._qid_info:
+            return self._qid_info[qid]
+        j = self._get(ci.WIKIDATA_API, {
+            "action": "wbgetentities", "ids": qid, "format": "json",
+            "props": "sitelinks|descriptions|claims",
+        })
+        e = j.get("entities", {}).get(qid, {})
+        claims = e.get("claims", {})
+        def _ids(prop):
+            out = []
+            for s in claims.get(prop, []):
+                v = s.get("mainsnak", {}).get("datavalue", {}).get("value")
+                if isinstance(v, dict) and "id" in v:
+                    out.append(v["id"])
+            return out
+        p18 = None
+        for s in claims.get("P18", []):
+            v = s.get("mainsnak", {}).get("datavalue", {}).get("value")
+            if isinstance(v, str):
+                p18 = v
+                break
+        enwiki = e.get("sitelinks", {}).get("enwiki", {}).get("title")
+        info = {
+            "qid": qid,
+            "enwiki_title": enwiki,
+            "description": e.get("descriptions", {}).get("en", {}).get("value"),
+            "p31": _ids("P31"),
+            "p106": _ids("P106"),
+            "image": p18,
+        }
+        self._qid_info[qid] = info
+        return info
+
+    def name_search_qids(self, name: str) -> list[str]:
+        if name in self._name_search:
+            return self._name_search[name]
+        j = self._get(ci.WIKIDATA_API, {
+            "action": "wbsearchentities", "search": name, "language": "en",
+            "format": "json", "type": "item", "limit": NAME_SEARCH_LIMIT,
+        })
+        qids = [h["id"] for h in j.get("search", []) if h.get("id")]
+        self._name_search[name] = qids
+        return qids
+
+
+# --------------------------------------------------------------------------- #
+# Candidate construction
+# --------------------------------------------------------------------------- #
+
+def _wiki_url(enwiki_title: str | None) -> str | None:
+    """A real biography article URL, or None for missing / Category: sitelinks."""
+    if not enwiki_title or enwiki_title.startswith("Category:"):
+        return None
+    return "https://en.wikipedia.org/wiki/" + enwiki_title.replace(" ", "_")
+
+
+def _candidate_from_info(info: dict, *, method: str, commons_category: str | None) -> dict | None:
+    if WIKIMEDIA_CATEGORY_QID in info["p31"]:
+        return None  # a topic category, not a person/group
+    url = _wiki_url(info["enwiki_title"])
+    if not url:
+        return None  # no usable biography article -> not a Wikipedia-link candidate
+    thumb = None
+    if info["image"]:
+        thumb = (f"https://commons.wikimedia.org/wiki/Special:FilePath/"
+                 f"{info['image'].replace(' ', '_')}?width=180")
+    return {
+        "method": method,
+        "commons_category": commons_category,
+        "wikidata_qid": info["qid"],
+        "wikipedia_url": url,
+        "title": info["enwiki_title"],
+        "description": info["description"],
+        "is_human": "Q5" in info["p31"],
+        "thumb": thumb,
+    }
+
+
+def _norm(s: str) -> str:
+    return "".join(ch for ch in s.lower() if ch.isalnum())
+
+
+def derive_candidates(wm: WM, name: str, files: list[str]) -> list[dict]:
+    candidates: list[dict] = []
+    seen_qids: set[str] = set()
+
+    # 1. Category-derived (the implicit link).
+    categories: list[str] = []
+    for ftitle in files[:MAX_FILES_FOR_CATEGORIES]:
+        for c in wm.file_categories(ftitle):
+            if c not in categories:
+                categories.append(c)
+    for cat in categories:
+        qid = wm.category_qid(cat)
+        if not qid or qid in seen_qids:
+            continue
+        cand = _candidate_from_info(wm.qid_info(qid), method="category",
+                                    commons_category=cat)
+        if cand:
+            seen_qids.add(qid)
+            candidates.append(cand)
+
+    # 2. Name-search fallback — only when no category produced a candidate.
+    if not candidates:
+        for qid in wm.name_search_qids(name):
+            if qid in seen_qids:
+                continue
+            cand = _candidate_from_info(wm.qid_info(qid), method="name_search",
+                                        commons_category=None)
+            if cand:
+                seen_qids.add(qid)
+                candidates.append(cand)
+
+    # Rank: category before name-search; human before non-human; exact-name match first.
+    target = _norm(name)
+    def key(c):
+        return (
+            0 if c["method"] == "category" else 1,
+            0 if c["is_human"] else 1,
+            0 if _norm(c["title"]) == target else 1,
+        )
+    candidates.sort(key=key)
+    return candidates
+
+
+# --------------------------------------------------------------------------- #
+# Main
+# --------------------------------------------------------------------------- #
+
+def _load_subset() -> list[dict]:
+    """Returns [{performer_id, name, images:[{url,page,title}]}] for the subset."""
+    with get_db_connection() as conn, conn.cursor() as cur:
+        cur.execute(_SUBSET_SQL)
+        rows = cur.fetchall()
+    grouped: dict[str, dict] = {}
+    order: list[str] = []
+    for r in rows:
+        pid = str(r["id"])
+        g = grouped.get(pid)
+        if g is None:
+            g = {"performer_id": pid, "name": r["name"], "images": []}
+            grouped[pid] = g
+            order.append(pid)
+        spu = r["source_page_url"] or ""
+        title = spu.split("/wiki/", 1)[1] if "/wiki/" in spu else None
+        g["images"].append({"url": r["url"], "page": spu, "title": title})
+    return [grouped[pid] for pid in order]
+
+
+def main() -> None:
+    p = argparse.ArgumentParser(
+        description="Build the performer->Wikipedia verification queue JSON.",
+        formatter_class=argparse.RawDescriptionHelpFormatter, epilog=__doc__)
+    p.add_argument("--limit", type=int, default=None,
+                   help="Cap the number of performers (alphabetical) for a first pass")
+    p.add_argument("--delay", type=float, default=0.1,
+                   help="Seconds between Wikimedia API calls (politeness; default 0.1)")
+    p.add_argument("-o", "--output", default=None,
+                   help="Output queue JSON path (default: "
+                        "data/ground_truth/wikipedia_queue_<ts>.json)")
+    args = p.parse_args()
+
+    subset = _load_subset()
+    logger.info("Subset: %d performer(s) with Commons imagery and no Wikipedia link",
+                len(subset))
+    if args.limit:
+        subset = subset[: args.limit]
+        logger.info("Capped to %d performer(s)", len(subset))
+
+    wm = WM(ci.make_session(), delay=args.delay)
+    records = []
+    with_candidate = 0
+    for idx, perf in enumerate(subset, 1):
+        file_titles = [img["title"] and f"File:{img['title'].split('File:')[-1]}"
+                       for img in perf["images"] if img.get("title")]
+        file_titles = [f for f in file_titles if f]
+        candidates = derive_candidates(wm, perf["name"], file_titles)
+        if candidates:
+            with_candidate += 1
+        evidence = []
+        for img in perf["images"][:MAX_EVIDENCE_IMAGES]:
+            t = img.get("title")
+            thumb = (f"https://commons.wikimedia.org/wiki/Special:FilePath/"
+                     f"{t.split('File:')[-1]}?width=180") if t and "File:" in t else img["url"]
+            evidence.append({"thumb": thumb, "page": img["page"], "title": t})
+        records.append({
+            "performer_id": perf["performer_id"],
+            "name": perf["name"],
+            "evidence_images": evidence,
+            "candidates": candidates,
+        })
+        if idx % 25 == 0 or idx == len(subset):
+            logger.info("Processed %d/%d performer(s); %d with a candidate so far",
+                        idx, len(subset), with_candidate)
+
+    out_dir = REPO_ROOT / "data" / "ground_truth"
+    out_dir.mkdir(parents=True, exist_ok=True)
+    ts = datetime.now(timezone.utc).strftime("%Y%m%d_%H%M%S")
+    out_path = Path(args.output) if args.output else out_dir / f"wikipedia_queue_{ts}.json"
+    payload = {
+        "schema": "performer_wikipedia_queue/v1",
+        "generated_at": datetime.now(timezone.utc).isoformat(),
+        "performer_count": len(records),
+        "with_candidate": with_candidate,
+        "records": records,
+    }
+    out_path.write_text(json.dumps(payload, ensure_ascii=False, indent=2), encoding="utf-8")
+    logger.info("Wrote %s — %d performer(s), %d with >=1 candidate",
+                out_path, len(records), with_candidate)
+    print(out_path)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/backend/scripts/build_wikipedia_groundtruth_viewer.py b/backend/scripts/build_wikipedia_groundtruth_viewer.py
new file mode 100644
index 0000000..1cf2aa3
--- /dev/null
+++ b/backend/scripts/build_wikipedia_groundtruth_viewer.py
@@ -0,0 +1,280 @@
+#!/usr/bin/env python3
+"""
+Build a self-contained HTML verification UI from a wikipedia_queue_*.json.
+
+Input is the queue produced by build_wikipedia_groundtruth_queue.py: performers
+with Commons imagery but no Wikipedia link, each with evidence thumbnails and
+candidate Wikipedia articles (category-derived, with name-search fallbacks).
+
+The page lets you, per performer:
+  - look at the Commons photos we already hold (the evidence),
+  - pick the candidate Wikipedia article that matches them (or paste a custom
+    URL, or mark "no match"),
+  - and export the confirmed set as a GROUND-TRUTH JSON — human-verified links,
+    stamped with manual provenance, suitable for re-ingest or for diffing
+    against automated crawlers.
+
+Decisions persist in the browser (localStorage, keyed by the queue filename),
+so you can close and resume. Nothing is written back to the database; the
+exported JSON is the deliverable.
+
+Usage:
+    python scripts/build_wikipedia_groundtruth_viewer.py data/ground_truth/wikipedia_queue_<ts>.json
+    python scripts/build_wikipedia_groundtruth_viewer.py queue.json -o verify.html
+"""
+
+import argparse
+import json
+from pathlib import Path
+
+
+_HTML = r"""<!DOCTYPE html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width, initial-scale=1">
+<title>__TITLE__</title>
+<style>
+  :root { --bg:#11141a; --panel:#1b212c; --line:#2b3442; --txt:#e6edf3;
+          --muted:#8b97a7; --ok:#3fb950; --no:#e5484d; --accent:#388bfd; --warn:#d29922; }
+  * { box-sizing:border-box; }
+  body { margin:0; font:14px/1.45 -apple-system,BlinkMacSystemFont,"Segoe UI",sans-serif; background:var(--bg); color:var(--txt); }
+  header { position:sticky; top:0; z-index:10; background:var(--panel); border-bottom:1px solid var(--line);
+           padding:10px 14px; display:flex; flex-wrap:wrap; gap:10px; align-items:center; }
+  header h1 { font-size:15px; margin:0 8px 0 0; font-weight:600; }
+  .stat { color:var(--muted); font-size:12px; } .stat b { color:var(--txt); }
+  input[type=search], select, input[type=text] { background:var(--bg); color:var(--txt); border:1px solid var(--line);
+           border-radius:6px; padding:5px 8px; font-size:13px; }
+  button { background:var(--bg); color:var(--txt); border:1px solid var(--line); border-radius:6px;
+           padding:5px 10px; font-size:12px; cursor:pointer; }
+  button:hover { border-color:var(--accent); }
+  button.primary { background:#1f6feb22; border-color:var(--accent); color:#cfe0ff; }
+  .spacer { flex:1; }
+  main { padding:14px; max-width:1100px; margin:0 auto; }
+  .perf { border:1px solid var(--line); border-radius:10px; margin-bottom:14px; background:var(--panel); }
+  .perf.done { border-color:#27412c; }
+  .perf.nomatch { border-color:#4a2426; }
+  .phead { display:flex; gap:10px; align-items:center; padding:10px 12px; border-bottom:1px solid var(--line);
+           position:sticky; top:52px; background:var(--panel); }
+  .pname { font-weight:600; font-size:15px; }
+  .pill { font-size:11px; padding:2px 8px; border-radius:10px; border:1px solid var(--line); color:var(--muted); }
+  .pill.ok { color:var(--ok); border-color:#27412c; } .pill.no { color:var(--no); border-color:#4a2426; }
+  .body { display:grid; grid-template-columns:300px 1fr; gap:14px; padding:12px; }
+  @media (max-width:760px){ .body { grid-template-columns:1fr; } }
+  .evidence h3, .cands h3 { margin:0 0 7px; font-size:11px; text-transform:uppercase; letter-spacing:.06em; color:var(--muted); }
+  .egrid { display:grid; grid-template-columns:repeat(auto-fill,minmax(80px,1fr)); gap:6px; }
+  .egrid a { display:block; } .egrid img { width:100%; height:80px; object-fit:cover; border-radius:6px; background:#0c0f14; display:block; }
+  .cand { display:flex; gap:10px; padding:8px; border:1px solid var(--line); border-radius:8px; margin-bottom:7px; cursor:pointer; align-items:flex-start; }
+  .cand:hover { border-color:var(--accent); }
+  .cand.sel { border-color:var(--ok); background:#10391c33; }
+  .cand img { width:64px; height:64px; object-fit:cover; border-radius:6px; background:#0c0f14; flex:none; }
+  .cand .noimg { width:64px; height:64px; border-radius:6px; background:#0c0f14; display:flex; align-items:center; justify-content:center; color:var(--muted); font-size:10px; flex:none; }
+  .cand .ct { font-weight:600; } .cand .cd { color:var(--muted); font-size:12.5px; margin:2px 0; }
+  .cand a.ext { color:var(--accent); text-decoration:none; font-size:12px; }
+  .tag { font-size:10px; padding:1px 6px; border-radius:8px; border:1px solid var(--line); color:var(--muted); margin-right:5px; }
+  .tag.cat { color:#8ad; border-color:#24405a; } .tag.name { color:var(--warn); border-color:#4a3d1c; }
+  .tag.human { color:var(--ok); border-color:#27412c; } .tag.nonhuman { color:var(--no); border-color:#4a2426; }
+  .alt { display:flex; gap:10px; align-items:center; padding:7px 8px; border:1px dashed var(--line); border-radius:8px; margin-top:7px; flex-wrap:wrap; }
+  .alt label { display:flex; gap:6px; align-items:center; cursor:pointer; color:var(--muted); font-size:13px; }
+  .alt.selno { border-color:var(--no); } .alt.selcustom { border-color:var(--ok); }
+  .nocand { color:var(--warn); font-size:13px; margin-bottom:7px; }
+  .hidden { display:none !important; }
+  a.searchlink { color:var(--accent); font-size:12px; text-decoration:none; }
+</style>
+</head>
+<body>
+<header>
+  <h1>Wikipedia ground-truth</h1>
+  <span class="stat">decided <b id="sDone">0</b>/<b id="sTot">0</b> · verified <b id="sVer" style="color:var(--ok)">0</b> · no-match <b id="sNo" style="color:var(--no)">0</b></span>
+  <span class="spacer"></span>
+  <input type="search" id="q" placeholder="filter by name…">
+  <select id="ff">
+    <option value="">all</option>
+    <option value="undecided">undecided</option>
+    <option value="verified">verified</option>
+    <option value="no_match">no-match</option>
+    <option value="hascand">has candidate</option>
+    <option value="nocand">no candidate</option>
+  </select>
+  <button id="exp" class="primary">Export ground truth JSON</button>
+</header>
+<main id="main"></main>
+<script>
+const Q = /*DATA*/;
+const KEY = "__STORAGE_KEY__";
+const SRC = "__SOURCE__";
+const load = (k,d)=>{ try{ return JSON.parse(localStorage.getItem(KEY+":"+k)) ?? d; }catch(e){ return d; } };
+const save = (k,v)=> localStorage.setItem(KEY+":"+k, JSON.stringify(v));
+
+// decisions: performer_id -> {choice, url, qid, method, category, title, at}
+let decisions = load("decisions", {});
+const persist = ()=>{ save("decisions", decisions); stats(); };
+const esc = s => (s||"").replace(/[&<>"]/g,c=>({"&":"&amp;","<":"&lt;",">":"&gt;","\"":"&quot;"}[c]));
+
+const main = document.getElementById("main");
+
+function decide(pid, d){ if(d) decisions[pid]=Object.assign({at:new Date().toISOString()},d); else delete decisions[pid]; persist(); render(); }
+
+function statusOf(pid){ const d=decisions[pid]; if(!d) return "undecided"; return d.choice==="no_match"?"no_match":"verified"; }
+
+function render(){
+  const q=document.getElementById("q").value.trim().toLowerCase();
+  const ff=document.getElementById("ff").value;
+  main.innerHTML=""; let shown=0;
+  for(const r of Q.records){
+    if(q && !r.name.toLowerCase().includes(q)) continue;
+    const st=statusOf(r.performer_id);
+    if(ff==="undecided" && st!=="undecided") continue;
+    if(ff==="verified" && st!=="verified") continue;
+    if(ff==="no_match" && st!=="no_match") continue;
+    if(ff==="hascand" && !r.candidates.length) continue;
+    if(ff==="nocand" && r.candidates.length) continue;
+    shown++;
+    const d=decisions[r.performer_id];
+    const sec=document.createElement("section");
+    sec.className="perf"+(st==="verified"?" done":st==="no_match"?" nomatch":"");
+    const statusPill = st==="verified" ? '<span class="pill ok">✓ verified</span>'
+                     : st==="no_match" ? '<span class="pill no">✗ no match</span>'
+                     : '<span class="pill">undecided</span>';
+    sec.innerHTML = `
+      <div class="phead">
+        <span class="pname">${esc(r.name)}</span>
+        <span class="pill">${r.evidence_images.length} photo(s)</span>
+        ${statusPill}
+      </div>
+      <div class="body">
+        <div class="evidence"><h3>Our Commons photos</h3><div class="egrid"></div></div>
+        <div class="cands"><h3>Candidate Wikipedia article</h3><div class="clist"></div></div>
+      </div>`;
+    const eg=sec.querySelector(".egrid");
+    for(const img of r.evidence_images){
+      const a=document.createElement("a"); a.href=img.page||img.thumb; a.target="_blank"; a.rel="noopener";
+      a.innerHTML=`<img loading="lazy" src="${esc(img.thumb)}" alt="">`;
+      a.querySelector("img").addEventListener("error",e=>e.target.style.opacity=.15);
+      eg.appendChild(a);
+    }
+    const cl=sec.querySelector(".clist");
+    if(!r.candidates.length){
+      const sr=`https://en.wikipedia.org/w/index.php?search=${encodeURIComponent(r.name)}`;
+      cl.insertAdjacentHTML("beforeend",
+        `<div class="nocand">No automatic candidate. <a class="searchlink" href="${sr}" target="_blank" rel="noopener">Search Wikipedia ↗</a></div>`);
+    }
+    r.candidates.forEach((c,i)=>{
+      const selected = d && d.choice==="cand" && d.qid===c.wikidata_qid;
+      const div=document.createElement("div");
+      div.className="cand"+(selected?" sel":"");
+      const img = c.thumb ? `<img loading="lazy" src="${esc(c.thumb)}" alt="">` : `<div class="noimg">no photo</div>`;
+      div.innerHTML=`${img}
+        <div>
+          <div class="ct">${esc(c.title)}</div>
+          <div>
+            <span class="tag ${c.method==='category'?'cat':'name'}">${c.method==='category'?'category-derived':'name search'}</span>
+            <span class="tag ${c.is_human?'human':'nonhuman'}">${c.is_human?'human':'not human'}</span>
+            <span class="tag">${esc(c.wikidata_qid)}</span>
+          </div>
+          <div class="cd">${esc(c.description||"")}</div>
+          <a class="ext" href="${esc(c.wikipedia_url)}" target="_blank" rel="noopener">${esc(c.wikipedia_url)} ↗</a>
+        </div>`;
+      div.addEventListener("click",ev=>{
+        if(ev.target.tagName==="A") return;
+        decide(r.performer_id, {choice:"cand", url:c.wikipedia_url, qid:c.wikidata_qid,
+                                method:c.method, category:c.commons_category, title:c.title});
+      });
+      cl.appendChild(div);
+    });
+    // alt row: no-match + custom URL
+    const isNo = d && d.choice==="no_match";
+    const isCustom = d && d.choice==="custom";
+    const alt=document.createElement("div");
+    alt.className="alt"+(isNo?" selno":isCustom?" selcustom":"");
+    alt.innerHTML=`
+      <label><input type="radio" name="alt-${r.performer_id}" ${isNo?"checked":""} data-no> ✗ No match</label>
+      <label><input type="radio" name="alt-${r.performer_id}" ${isCustom?"checked":""} data-custom> ✎ Custom URL:</label>
+      <input type="text" style="flex:1;min-width:180px" placeholder="https://en.wikipedia.org/wiki/…"
+             value="${isCustom?esc(d.url||""):""}" data-customurl>
+      ${d ? '<button data-clear>clear</button>' : ''}`;
+    alt.querySelector("[data-no]").addEventListener("change",()=>decide(r.performer_id,{choice:"no_match",url:null,qid:null,method:null,category:null}));
+    const cu=alt.querySelector("[data-customurl]");
+    const commitCustom=()=>{ const u=cu.value.trim(); if(u) decide(r.performer_id,{choice:"custom",url:u,qid:null,method:"custom",category:null}); };
+    alt.querySelector("[data-custom]").addEventListener("change",()=>{ if(cu.value.trim()) commitCustom(); else cu.focus(); });
+    cu.addEventListener("change",commitCustom);
+    const clr=alt.querySelector("[data-clear]"); if(clr) clr.addEventListener("click",()=>decide(r.performer_id,null));
+    sec.querySelector(".cands").appendChild(alt);
+    main.appendChild(sec);
+  }
+  if(!shown) main.innerHTML='<div style="color:var(--muted);padding:30px;text-align:center">No performers match this filter.</div>';
+}
+
+function stats(){
+  const ids=Q.records.map(r=>r.performer_id);
+  let ver=0,no=0;
+  for(const id of ids){ const s=statusOf(id); if(s==="verified")ver++; else if(s==="no_match")no++; }
+  document.getElementById("sTot").textContent=ids.length;
+  document.getElementById("sDone").textContent=ver+no;
+  document.getElementById("sVer").textContent=ver;
+  document.getElementById("sNo").textContent=no;
+}
+
+document.getElementById("exp").onclick=()=>{
+  const records={};
+  for(const r of Q.records){
+    const d=decisions[r.performer_id]; if(!d) continue;  // export only decided
+    const verified = d.choice!=="no_match";
+    records[r.performer_id]={
+      name:r.name,
+      status: verified?"verified":"no_match",
+      wikipedia_url: verified ? d.url : null,
+      wikidata_qid: verified ? (d.qid||null) : null,
+      method:"manual",
+      candidate_method: verified ? d.method : null,
+      evidence: { commons_category: d.category||null },
+      verified_at: d.at
+    };
+  }
+  const n=Object.keys(records).length;
+  if(!n) return alert("No decisions yet to export.");
+  const out={ schema:"performer_wikipedia_groundtruth/v1", exported_at:new Date().toISOString(),
+              source_queue:SRC, record_count:n, records };
+  const blob=new Blob([JSON.stringify(out,null,2)],{type:"application/json"});
+  const a=document.createElement("a"); a.href=URL.createObjectURL(blob);
+  a.download="performer_wikipedia_groundtruth.json"; a.click(); URL.revokeObjectURL(a.href);
+};
+
+document.getElementById("q").addEventListener("input",render);
+document.getElementById("ff").addEventListener("change",render);
+stats(); render();
+</script>
+</body>
+</html>
+"""
+
+
+def main() -> None:
+    p = argparse.ArgumentParser(
+        description="Build the Wikipedia ground-truth verification HTML from a queue JSON.",
+        formatter_class=argparse.RawDescriptionHelpFormatter, epilog=__doc__)
+    p.add_argument("queue", help="wikipedia_queue_<ts>.json path")
+    p.add_argument("-o", "--output", default=None, help="Output HTML (default: <queue>.html)")
+    args = p.parse_args()
+
+    qpath = Path(args.queue)
+    if not qpath.exists():
+        raise SystemExit(f"No such file: {qpath}")
+    data = json.loads(qpath.read_text(encoding="utf-8"))
+
+    html = (_HTML
+            .replace("__TITLE__", qpath.name)
+            .replace("__STORAGE_KEY__", qpath.stem)
+            .replace("__SOURCE__", qpath.name)
+            .replace("/*DATA*/", json.dumps(data, ensure_ascii=False)))
+
+    out = Path(args.output) if args.output else qpath.with_suffix(".html")
+    out.write_text(html, encoding="utf-8")
+    recs = data.get("records", [])
+    withc = sum(1 for r in recs if r.get("candidates"))
+    print(f"Wrote {out} — {len(recs)} performer(s), {withc} with >=1 candidate")
+    print(f"Open it with:  open {out}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/backend/tests/test_performer_commons_imagery.py b/backend/tests/test_performer_commons_imagery.py
index 26f0cf7..c93a09e 100644
--- a/backend/tests/test_performer_commons_imagery.py
+++ b/backend/tests/test_performer_commons_imagery.py
@@ -34,8 +34,9 @@
 PERFORMER_NEVER = _NS.format(0x00001)   # last_imagery_check NULL -> due
 PERFORMER_STALE = _NS.format(0x00002)   # checked 100 days ago    -> due (>90d)
 PERFORMER_FRESH = _NS.format(0x00003)   # checked 10 days ago      -> not due
+PERFORMER_NOWIKI = _NS.format(0x00004)  # never checked but no Wikipedia URL -> skipped
 
-_ALL_FIXTURE_IDS = (PERFORMER_NEVER, PERFORMER_STALE, PERFORMER_FRESH)
+_ALL_FIXTURE_IDS = (PERFORMER_NEVER, PERFORMER_STALE, PERFORMER_FRESH, PERFORMER_NOWIKI)
 
 
 def _cleanup(conn):
@@ -58,19 +59,29 @@ def perf_fixture(db):
     _cleanup(db)
     with db.cursor() as cur:
         cur.execute(
-            "INSERT INTO performers (id, name, last_imagery_check) "
-            "VALUES (%s, %s, NULL)",
-            (PERFORMER_NEVER, "Never Checked"),
+            "INSERT INTO performers (id, name, wikipedia_url, last_imagery_check) "
+            "VALUES (%s, %s, %s, NULL)",
+            (PERFORMER_NEVER, "Never Checked",
+             "https://en.wikipedia.org/wiki/Never_Checked"),
         )
         cur.execute(
-            "INSERT INTO performers (id, name, last_imagery_check) "
-            "VALUES (%s, %s, now() - make_interval(days => 100))",
-            (PERFORMER_STALE, "Stale Checked"),
+            "INSERT INTO performers (id, name, wikipedia_url, last_imagery_check) "
+            "VALUES (%s, %s, %s, now() - make_interval(days => 100))",
+            (PERFORMER_STALE, "Stale Checked",
+             "https://en.wikipedia.org/wiki/Stale_Checked"),
+        )
+        cur.execute(
+            "INSERT INTO performers (id, name, wikipedia_url, last_imagery_check) "
+            "VALUES (%s, %s, %s, now() - make_interval(days => 10))",
+            (PERFORMER_FRESH, "Fresh Checked",
+             "https://en.wikipedia.org/wiki/Fresh_Checked"),
         )
+        # Due (never checked) but has no Wikipedia URL: the sweep must skip it,
+        # since the Commons resolver only trusts a validated Wikipedia article.
         cur.execute(
             "INSERT INTO performers (id, name, last_imagery_check) "
-            "VALUES (%s, %s, now() - make_interval(days => 10))",
-            (PERFORMER_FRESH, "Fresh Checked"),
+            "VALUES (%s, %s, NULL)",
+            (PERFORMER_NOWIKI, "No Wiki"),
         )
     db.commit()
     yield
@@ -116,6 +127,11 @@ def test_find_candidates_includes_never_and_stale_not_fresh(self, perf_fixture):
         assert PERFORMER_STALE in candidates
         assert PERFORMER_FRESH not in candidates
 
+    def test_find_candidates_excludes_performers_without_wikipedia(self, perf_fixture):
+        # Due by staleness, but no Wikipedia URL -> not a candidate.
+        candidates = sweep_mod.find_candidate_performer_ids()
+        assert PERFORMER_NOWIKI not in candidates
+
     def test_stale_days_window_excludes_within_window(self, perf_fixture):
         # With a 200-day window, the 100-day-stale row is no longer due;
         # the never-checked row always is.
diff --git a/data/ground_truth/README.md b/data/ground_truth/README.md
new file mode 100644
index 0000000..a48b4a4
--- /dev/null
+++ b/data/ground_truth/README.md
@@ -0,0 +1,95 @@
+# Ground-truth datasets
+
+Human-verified reference data, kept deliberately separate from anything a
+crawler produces automatically. The distinguishing marker is provenance: every
+record here carries `"method": "manual"`, meaning a person looked at the
+evidence and confirmed it. These files are the authoritative source for
+re-ingest and for diffing against automated crawlers.
+
+## What's tracked vs ignored
+
+Committed (authoritative):
+- `README.md` — this file.
+- `performer_wikipedia_groundtruth*.json` — verified performer → Wikipedia links.
+
+Ignored (regenerable scratch — see `.gitignore`):
+- `wikipedia_queue_*.json` — the verification *worklist* (rebuilt from the DB).
+- `*.html` — generated verification viewers.
+
+## Pipeline: performer → Wikipedia links
+
+Goal: for performers that have Commons imagery but **no** Wikipedia link on
+record, confirm the correct Wikipedia article so the system gets smarter about
+them going forward.
+
+```
+backend/scripts/build_wikipedia_groundtruth_queue.py     # DB + Wikimedia  -> queue JSON
+backend/scripts/build_wikipedia_groundtruth_viewer.py    # queue JSON      -> verification HTML
+# (review in browser, click Export)                       # decisions       -> ground-truth JSON
+```
+
+Candidates are **category-derived** first — the Commons category the
+performer's own photos sit in → its Wikidata item → the English Wikipedia
+sitelink (the "implicit" link) — falling back to a Wikidata name search when no
+category yields a real biography article.
+
+### Schema: `performer_wikipedia_queue/v1` (worklist, ignored)
+
+```jsonc
+{
+  "schema": "performer_wikipedia_queue/v1",
+  "generated_at": "<iso8601>",
+  "performer_count": 1224,
+  "with_candidate": 812,
+  "records": [
+    {
+      "performer_id": "<uuid>",
+      "name": "?uestlove",
+      "evidence_images": [ { "thumb": "<url>", "page": "<commons File: url>", "title": "File:…" } ],
+      "candidates": [
+        {
+          "method": "category" | "name_search",
+          "commons_category": "Category:Questlove" | null,
+          "wikidata_qid": "Q263024",
+          "wikipedia_url": "https://en.wikipedia.org/wiki/Questlove",
+          "title": "Questlove",
+          "description": "American hip hop musician, record producer and DJ",
+          "is_human": true,
+          "thumb": "<wikidata P18 thumbnail url>" | null
+        }
+      ]
+    }
+  ]
+}
+```
+
+### Schema: `performer_wikipedia_groundtruth/v1` (authoritative, committed)
+
+Exported from the viewer; only performers the reviewer actually decided on are
+included. `no_match` is a real, useful decision (a crawler proposing a link
+there is wrong).
+
+```jsonc
+{
+  "schema": "performer_wikipedia_groundtruth/v1",
+  "exported_at": "<iso8601>",
+  "source_queue": "wikipedia_queue_<ts>.json",
+  "record_count": 137,
+  "records": {
+    "<performer_id>": {
+      "name": "?uestlove",
+      "status": "verified" | "no_match",
+      "wikipedia_url": "https://en.wikipedia.org/wiki/Questlove" | null,
+      "wikidata_qid": "Q263024" | null,
+      "method": "manual",
+      "candidate_method": "category" | "name_search" | "custom" | null,
+      "evidence": { "commons_category": "Category:Questlove" | null },
+      "verified_at": "<iso8601>"
+    }
+  }
+}
+```
+
+Re-ingest (not yet built): read this file, `UPDATE performers SET wikipedia_url`
+for `status == "verified"` rows, stamping a manual-provenance marker
+(e.g. `updated_by = 'groundtruth_manual'`) so the distinction survives in the DB.
diff --git a/sql/migrations/add_commons_imagery_enrichment.sql b/sql/migrations/add_commons_imagery_enrichment.sql
deleted file mode 100644
index a6a02d1..0000000
--- a/sql/migrations/add_commons_imagery_enrichment.sql
+++ /dev/null
@@ -1,73 +0,0 @@
--- ============================================================================
--- Migration: Commons imagery enrichment
--- Description:
---   Adds the producer/handler support for the ('commons',
---   'enrich_performer_imagery') research job:
---
---     1. performers.last_imagery_check — when the performer was last swept for
---        Commons imagery. The producer (core/performer_commons_imagery.py)
---        treats NULL or "older than the staleness window" as due; the handler
---        (research_worker/handlers/commons.py) stamps it now() on every
---        completion (even a no-op).
---
---     2. A 'commons' daily quota row in source_quotas, used by the handler to
---        cap the number of paid Claude vision-rerank calls per day. One unit =
---        one image reranked. When the budget is spent the worker reschedules
---        the job for the next reset (QuotaExhausted).
---
--- Idempotent: ADD COLUMN IF NOT EXISTS + INSERT ... ON CONFLICT DO NOTHING, so
--- safe to re-run.
---
--- Run: psql $DATABASE_URL -f sql/migrations/add_commons_imagery_enrichment.sql
--- ============================================================================
-
-BEGIN;
-
--- ----------------------------------------------------------------------------
--- 1. performers.last_imagery_check
--- ----------------------------------------------------------------------------
-
-ALTER TABLE performers
-    ADD COLUMN IF NOT EXISTS last_imagery_check TIMESTAMPTZ;
-
-COMMENT ON COLUMN performers.last_imagery_check IS
-    'When this performer was last swept for Commons imagery by the '
-    '(commons, enrich_performer_imagery) research job. NULL = never checked. '
-    'The producer enqueues performers that are NULL or older than the '
-    'staleness window (default 90 days); the handler stamps now() on every '
-    'completion.';
-
--- Supports the producer''s "due" scan (NULL-first / oldest-first).
-CREATE INDEX IF NOT EXISTS idx_performers_last_imagery_check
-    ON performers (last_imagery_check NULLS FIRST);
-
--- ----------------------------------------------------------------------------
--- 2. 'commons' daily quota (caps paid Claude rerank calls/day)
--- ----------------------------------------------------------------------------
--- units_limit = max images reranked per day across all performers. At the
--- handler default rerank_cap of 12 images/performer, 2000 covers ~166
--- performers/day. Tune with:
---   UPDATE source_quotas SET units_limit = <n> WHERE source = 'commons';
--- resets_at uses the default 'day' window (next UTC midnight), matching
--- research_worker/quota.py's _DEFAULT_RESET_SQL.
-
-INSERT INTO source_quotas (source, window_name, units_used, units_limit, resets_at)
-VALUES (
-    'commons',
-    'day',
-    0,
-    2000,
-    (date_trunc('day', now()) + interval '1 day')
-)
-ON CONFLICT (source, window_name) DO NOTHING;
-
-COMMIT;
-
--- ============================================================================
--- ROLLBACK (manual)
--- ============================================================================
--- BEGIN;
---   DROP INDEX IF EXISTS idx_performers_last_imagery_check;
---   ALTER TABLE performers DROP COLUMN IF EXISTS last_imagery_check;
---   DELETE FROM source_quotas WHERE source = 'commons';
--- COMMIT;