iris-sast · IcebladeLabs · Jun 23, 2026 · Jun 23, 2026 · Jun 23, 2026 · Jun 23, 2026
diff --git a/README.md b/README.md
@@ -10,6 +10,7 @@
 ⚠️ Code and data for the [ICLR 2025 Paper](https://arxiv.org/pdf/2405.17238) can be found in the v1 branch, license and citation below.
 
 ## 📰 News
+* **[Jun. 23, 2026]**: Only the CVEs added in the previous [update](https://github.com/iris-sast/iris/commit/67061ae6ea769edcddd7286380dfd699ed516a58) to CWE-Bench-Java did not include information regarding specific methods that were altered. This missing fix information has been added. Additionally, guidelines for future contributions to the benchmark have been established at [```contributing_cwe_bench_java.md```](data/contributing_cwe_bench_java.md).
 * **[Nov. 29, 2025]**: Added a dataset with manually extracted source and sinks for the vulnerabilities in CodeQL format for 50 CVEs.
 * **[Nov. 24, 2025]**: Updated queries to version 1.8.1 to work with CodeQL 2.23.2.
 * **[Nov. 24, 2025]**: Updated the Docker integration in the main IRIS pipeline so that the container images include the project dependencies. The updated images can be found in [IRIS Docker Hub](https://hub.docker.com/r/irissast/cwe-bench-java-containers-v2). The instructions to use the Docker integration can be found in the [**Using Docker containers with IRIS**](#using-docker-containers-with-iris) section below. 

diff --git a/data/contributing_cwe_bench_java.md b/data/contributing_cwe_bench_java.md
@@ -0,0 +1,26 @@
+# Contributing to CWE-Bench-Java
+
+Projects in CWE-Bench-Java follow a strict framework in how they are recorded. All projects should be logged in `project_info.csv`, `build_info.csv`, and `fix_info.csv`. Details on how each should be formatted are below.
+
+---
+
+## `project_info.csv`
+
+All fields must be filled out. 
+
+* The `project_slug` consists of: `[github_username]__[github_repository_name]_[cve_id]_[github_tag]`. 
+* If there are multiple fix commits, separate them via **semicolons** (`;`). 
+* If the commit exists in multiple branches, choose the branch closest to `main`.
+
+## `build_info.csv`
+
+Please include a single tested build configuration for each project. Do not include the project if it cannot be built using one of the included systems (Maven, Gradle, or Gradle Wrapper).
+
+## `fix_info.csv`
+
+Each row in this file represents a **single changed method**. Include every method for a project that fits the following criteria: 
+
+1.  An existing method is changed (a method is not purely added or removed). 
+2.  The method is related to the patch. 
+
+> **Note:** If a project has no such methods, do not include it. Line numbers should align with the fixed version, including whitespace and closing brackets.
diff --git a/data/fix_info.csv b/data/fix_info.csv
diff --git a/data/scripts/fix_info_generator.py b/data/scripts/fix_info_generator.py
@@ -0,0 +1,132 @@
+# This script was used to generate the missing fix info based on slugs present in project_info.csv, and can be adapted to generate method level information for any project given key identifiers.
+# It leverages the Github API to request a commit diff, and then uses the Gemini API to generate data in accordance with the standards defined in contributing.md.
+
+
+import requests
+import csv
+import re
+import time
+import base64
+from google import genai
+from pydantic import BaseModel, Field
+from typing import Optional, List
+import os
+from dotenv import load_dotenv
+
+# Setup APIs and open CSVs
+load_dotenv()
+GITHUB_TOKEN = os.getenv("GITHUB_TOKEN")
+
+HEADERS = {
+    "Authorization": f"Bearer {GITHUB_TOKEN}",
+    "Accept": "application/vnd.github.patch",
+    "X-GitHub-Api-Version": "2026-03-10"
+}
+BASE_URL = "https://api.github.com"
+
+
+api_key = os.getenv("GEMINI_API_KEY")
+
+class Row(BaseModel):
+    related: List[bool] = Field(description="Is this change related to the CVE, not a pure addition, and part of a method?")
+    file_name: List[str] = Field(description="Name of file change is in.")
+    class_name: List[str] = Field(description="Class the change is in.")
+    class_start: List[int] = Field(description="Starting line number of the class.")
+    class_end: List[int] = Field(description="Ending line number of the class.")
+    method_name: List[str] = Field(description="Method the change is in, e.g. evaluate")
+    method_start: List[int] = Field(description="Starting line number of the method.")
+    method_end: List[int] = Field(description="Ending line number of the class.")
+    method_signature: List[str] = Field(description="Full method signature, e.g. boolean evaluate(byte)")
+
+client = genai.Client()
+
+projects = open('project_info.csv', newline='')
+fix = open('fix_info.csv', newline='')
+missing_fix = open('missing_fix_info.csv', 'w', newline='', encoding='utf-8')
+
+p = csv.reader(projects)
+f = csv.reader(fix)
+m = csv.writer(missing_fix)
+m.writerow(['project_slug','cve_id','github_username','github_repository_name','commit','file','class','class_start','class_end','method','method_start','method_end','signature'])
+
+# Select missing projects
+included_slugs = set()
+
+for row in f:
+    included_slugs.add(row[0])
+
+for row in p:
+    if row[1] not in included_slugs:
+        print("ran")
+        cve = row[2]
+        organization = row[5]
+        repository = row[6]
+        commit = row[11]
+        if cve == "" or organization == "" or repository == "" or commit == "":
+            print(f"Null value for project {row[1]}")
+            continue
+        if ";" in commit:
+            print(f"Multiple commits for project {row[1]}")
+
+        # Find and pull full changed files
+        response = requests.get(f"{BASE_URL}/repos/{organization}/{repository}/commits/{commit}", headers=HEADERS)
+        if not response.ok:
+            print(f"Diff call failed for project {row[1]}")
+            continue
+        diff = response.text
+        changed_paths = set(re.findall(r"b/(.*)\.java", diff))
+
+
+        all_diffs = ""
+        for diff_file in changed_paths:
+            path = diff_file + ".java"
+            response_per = requests.get(f"{BASE_URL}/repos/{organization}/{repository}/contents/{path}?ref={commit}", headers=HEADERS)
+            if not response_per.ok:
+                print(f"Diff call failed for project {row[1]}")
+                continue
+            full_file = base64.b64decode(response_per.json()['content']).decode('utf-8')
+            time.sleep(5)
+            all_diffs += full_file + "\n"
+
+        prompt = f"""
+        You are a security expert. Your goal, given a CVE, a patch diff, and the full files referenced in the diff,
+        to identify whether each change in the diff satisfies the following criteria:
+        The change occurs within a method. That method is preexisting and is modified, not a pure addition/ deletion.
+        It is directly related to patching the CVE. If the change does not satisfy the criteria, return false for
+        related and set the other fields to null. Otherwise, provide the requested information about the file, class
+        and method in which the change is made. Make certain to provide output for EVERY change in the diff. Each
+        output field is a List, provide an entry in each list for EVERY change.
+
+        CVE: {cve}
+
+        Diff: {diff}
+
+        All files referenced in the diff: {all_diffs}
+        """
+
+        response = client.models.generate_content(
+            model="gemini-3.1-pro-preview",
+            contents=prompt,
+            config={
+                "tools": [
+                    {"google_search": {}},
+                ],
+                "response_mime_type": "application/json",
+                "response_schema": Row.model_json_schema(),
+            },
+        )
+
+        output = Row.model_validate_json(response.text)
+        print(f"Project: {row[1]} successful")
+
+        # Write to CSV
+        count = 0
+        for i in output.related:
+            if i:
+                to_add = [row[1], cve, organization, repository, commit, output.file_name[count], output.class_name[count], output.class_start[count], output.class_end[count], output.method_name[count], output.method_start[count], output.method_end[count], output.method_signature[count]]
+                m.writerow(to_add)
+            count += 1
+
+projects.close()
+fix.close()
+missing_fix.close()
diff --git a/data/tests/blank_values_test.py b/data/tests/blank_values_test.py
@@ -0,0 +1,29 @@
+# Ensure fix_info.csv rows have no blank values in required columns.
+
+# Columns that must always carry a value for a row to be usable.
+REQUIRED_COLUMNS = [
+    "project_slug",
+    "cve_id",
+    "github_username",
+    "github_repository_name",
+    "commit",
+    "file",
+]
+
+
+def test_required_columns_are_not_blank(fix_info):
+    _fieldnames, rows = fix_info
+
+    offenders = []
+    for lineno, row in rows:
+        blank = [col for col in REQUIRED_COLUMNS if not (row.get(col) or "").strip()]
+        if blank:
+            offenders.append(
+                f"  line {lineno}: blank {blank} "
+                f"(project_slug={row.get('project_slug')!r})"
+            )
+
+    assert not offenders, (
+        f"{len(offenders)} fix_info.csv row(s) have blank required values:\n"
+        + "\n".join(offenders)
+    )
diff --git a/data/tests/commit_diff_alignment_test.py b/data/tests/commit_diff_alignment_test.py
@@ -0,0 +1,98 @@
+# Check that each fix_info.csv row agrees with the real commit on GitHub.
+#
+# For every row we ask the GitHub commits API (the public endpoint) 
+# what the commit changed, then assert:
+#   * the recorded ``file`` is one of the files the commit touched, and
+#   * the recorded ``method`` name shows up in that file's patch.
+
+import csv
+import json
+import os
+import pathlib
+import urllib.error
+import urllib.request
+
+import pytest
+
+FIX_CSV = pathlib.Path(__file__).resolve().parent.parent / "fix_info.csv"
+
+
+def _auth_headers():
+    # Optional: a GITHUB_TOKEN lifts the rate limit from ~60 to 5000 req/hr,
+    # which is what lets a single run cover every distinct commit. Absent a
+    # token the test still works -- it just verifies fewer rows before the
+    # keyless limit forces a skip. CI (GitHub Actions) injects this for free.
+    headers = {
+        "User-Agent": "iris-data-tests",
+        "Accept": "application/vnd.github+json",
+    }
+    token = os.environ.get("GITHUB_TOKEN")
+    if token:
+        headers["Authorization"] = f"Bearer {token}"
+    return headers
+
+def _load_rows():
+    with FIX_CSV.open(newline="", encoding="utf-8") as fh:
+        return list(csv.DictReader(fh))
+
+
+def _commit_files(owner, repo, sha, cache):
+    """Return {filename: patch} for a commit, or raise to signal skip."""
+    if sha in cache:
+        return cache[sha]
+    url = f"https://api.github.com/repos/{owner}/{repo}/commits/{sha}"
+    req = urllib.request.Request(url, headers=_auth_headers())
+    try:
+        with urllib.request.urlopen(req, timeout=30) as resp:
+            data = json.load(resp)
+    except urllib.error.HTTPError as exc:
+        if exc.code in (403, 429):
+            pytest.skip("GitHub API rate-limited (run with a token for full coverage)")
+        cache[sha] = None  # 404 etc. -> commit not fetchable
+        return None
+    except (urllib.error.URLError, TimeoutError, OSError) as exc:
+        pytest.skip(f"GitHub API unreachable: {exc}")
+    files = {f["filename"]: (f.get("patch") or "") for f in data.get("files", [])}
+    cache[sha] = files
+    return files
+
+
+def _commit_touches(changed_files, path):
+    """True if the commit changed `path`, tolerating module-prefix differences."""
+    if path in changed_files:
+        return True
+    basename = path.rsplit("/", 1)[-1]
+    for changed in changed_files:
+        if changed.endswith("/" + path) or path.endswith("/" + changed):
+            return True
+        if changed.rsplit("/", 1)[-1] == basename:
+            return True
+    return False
+
+
+def test_fix_rows_align_with_commit_diff():
+    rows = _load_rows()
+    cache = {}
+    mismatches = []
+    for row in rows:
+        owner = (row.get("github_username") or "").strip()
+        repo = (row.get("github_repository_name") or "").strip()
+        sha = (row.get("commit") or "").strip()
+        path = (row.get("file") or "").strip()
+        if not (owner and repo and sha and path):
+            continue  # blank identity columns are the blank-values test's job
+
+        files = _commit_files(owner, repo, sha, cache)
+        if files is None:
+            continue  # commit not fetchable -> can't verify this row
+
+        if not _commit_touches(files, path):
+            mismatches.append(
+                f"  {row.get('project_slug')} @ {sha[:10]}: "
+                f"file {path!r} not changed by commit"
+            )
+
+    assert not mismatches, (
+        f"{len(mismatches)} fix_info.csv row(s) disagree with their commit:\n"
+        + "\n".join(mismatches)
+    )
diff --git a/data/tests/conftest.py b/data/tests/conftest.py
@@ -0,0 +1,42 @@
+# Shared fixtures for the data-quality tests.
+
+import csv
+import pathlib
+
+import pytest
+
+# data/tests/conftest.py -> the CSVs live one directory up, in data/.
+DATA_DIR = pathlib.Path(__file__).resolve().parent.parent
+
+
+def _load(name):
+    """Load ``data/<name>.csv`` as a list of dict rows.
+
+    Each row is paired with its 1-based line number in the file (accounting
+    for the header) so failure messages can point at the offending row.
+    """
+    path = DATA_DIR / f"{name}.csv"
+    with path.open(newline="", encoding="utf-8") as fh:
+        reader = csv.DictReader(fh)
+        fieldnames = reader.fieldnames
+        # line 1 is the header, so the first data row is line 2.
+        rows = [(lineno, row) for lineno, row in enumerate(reader, start=2)]
+    return fieldnames, rows
+
+
+@pytest.fixture(scope="session")
+def fix_info():
+    """(fieldnames, [(lineno, row), ...]) for fix_info.csv."""
+    return _load("fix_info")
+
+
+@pytest.fixture(scope="session")
+def build_info():
+    """(fieldnames, [(lineno, row), ...]) for build_info.csv."""
+    return _load("build_info")
+
+
+@pytest.fixture(scope="session")
+def project_info():
+    """(fieldnames, [(lineno, row), ...]) for project_info.csv."""
+    return _load("project_info")
diff --git a/data/tests/project_coverage_test.py b/data/tests/project_coverage_test.py
@@ -0,0 +1,33 @@
+# Ensure every project in build_info / project_info appears in fix_info.
+
+def _slug_set(loaded):
+    _fieldnames, rows = loaded
+    return {(row.get("project_slug") or "").strip() for _lineno, row in rows}
+
+
+def _missing(source_loaded, fix_slugs):
+    _fieldnames, rows = source_loaded
+    missing = []
+    for lineno, row in rows:
+        slug = (row.get("project_slug") or "").strip()
+        if slug not in fix_slugs:
+            missing.append(f"  line {lineno}: {slug!r}")
+    return missing
+
+
+def test_build_info_projects_are_in_fix_info(build_info, fix_info):
+    fix_slugs = _slug_set(fix_info)
+    missing = _missing(build_info, fix_slugs)
+    assert not missing, (
+        f"{len(missing)} build_info.csv project(s) have no row in fix_info.csv:\n"
+        + "\n".join(missing)
+    )
+
+
+def test_project_info_projects_are_in_fix_info(project_info, fix_info):
+    fix_slugs = _slug_set(fix_info)
+    missing = _missing(project_info, fix_slugs)
+    assert not missing, (
+        f"{len(missing)} project_info.csv project(s) have no row in fix_info.csv:\n"
+        + "\n".join(missing)
+    )