feat: enhance rollup aggregation with dedup logic (#1843)

gustavobtflores · web-flow · commit 1e925d5dfc45 · 2026-04-07T21:26:28.000Z
* feat: enhance tree_tests_rollup aggregation with dedup logic

* test: add unit tests for rollup entry correction behavior
diff --git a/backend/kernelCI_app/management/commands/helpers/process_pending_helpers.py b/backend/kernelCI_app/management/commands/helpers/process_pending_helpers.py
@@ -53,6 +53,8 @@ def extract_path_group(path: str) -> str:
 def accumulate_rollup_entry(
     rollup_data: dict[tuple, dict],
     entry: RollupEntryData,
+    *,
+    is_correction: bool = False,
 ) -> None:
     """Accumulate a single test entry into rollup_data in-place."""
     checkout = entry["checkout"]
@@ -91,21 +93,30 @@ def accumulate_rollup_entry(
     )
 
     counter = ROLLUP_STATUS_FIELDS.get(entry["status"], "null_tests")
-    record[counter] += 1
-    record["total_tests"] += 1
+
+    if is_correction:
+        record["null_tests"] -= 1
+        record[counter] += 1
+    else:
+        record[counter] += 1
+        record["total_tests"] += 1
 
 
 def aggregate_tests_rollup(
     ready_tests: Sequence[PendingTest],
     test_builds_by_id: dict[str, Builds],
     issues_map: dict[str, dict],
+    reprocess_test_ids: set[str] | None = None,
 ) -> dict[tuple, dict]:
     """
     Build rollup data from pending tests.
     Returns rollup data without touching the database.
     """
     rollup_data: dict[tuple, dict] = {}
 
+    if reprocess_test_ids is None:
+        reprocess_test_ids = set()
+
     for test in ready_tests:
         # shouldn't happen, but being defensive here
         try:
@@ -155,6 +166,7 @@ def aggregate_tests_rollup(
                 "is_boot": test.is_boot,
                 "status": test.full_status,
             },
+            is_correction=test.test_id in reprocess_test_ids,
         )
 
     return rollup_data
diff --git a/backend/kernelCI_app/management/commands/process_pending_aggregations.py b/backend/kernelCI_app/management/commands/process_pending_aggregations.py
@@ -85,6 +85,11 @@ def get_tree_listing_key(
     ).digest()
 
 
+def get_rollup_key(test_id: str) -> bytes:
+    """Generate a hash (rollup key) from test_id with 'rollup|' prefix for namespacing."""
+    return hashlib.sha256(f"rollup|{test_id}".encode("utf-8")).digest()
+
+
 SIMPLIFIED_STATUS_TO_COUNT = {
     SimplifiedStatusChoices.PASS: (1, 0, 0),
     SimplifiedStatusChoices.FAIL: (0, 1, 0),
@@ -983,10 +988,68 @@ def _process_tests_rollup_batch(
         if not ready_tests:
             return
 
-        test_ids = [t.test_id for t in ready_tests]
+        rollup_keys_by_test_id = {
+            t.test_id: get_rollup_key(t.test_id) for t in ready_tests
+        }
+
+        existing_processed = _get_existing_processed(
+            set(rollup_keys_by_test_id.values())
+        )
+        existing_by_key = {
+            (e.listing_item_key, e.checkout_id): e for e in existing_processed
+        }
+
+        tests_to_process: list[PendingTest] = []
+        test_ids = []
+        reprocess_test_ids: set[str] = set()
+        new_processed_entries: set[ProcessedListingItems] = set()
+
+        for test in ready_tests:
+            rollup_key = rollup_keys_by_test_id[test.test_id]
+
+            try:
+                build = test_builds_by_id[test.build_id]
+                checkout_id = build.checkout.id
+            except KeyError:
+                continue
+
+            found_existing = existing_by_key.get((rollup_key, checkout_id), None)
+
+            if found_existing:
+                stored_status = found_existing.status
+
+                if stored_status is not None:
+                    continue
+                if test.status is None:
+                    # Both null - already counted as null
+                    continue
+
+                # null -> non-null: This is a correction (reprocess)
+                reprocess_test_ids.add(test.test_id)
+
+            tests_to_process.append(test)
+            test_ids.append(test.test_id)
+            new_processed_entries.add(
+                ProcessedListingItems(
+                    listing_item_key=rollup_key,
+                    checkout_id=checkout_id,
+                    status=test.status,
+                )
+            )
+
+        if not tests_to_process:
+            return
+
         issues_map = _fetch_test_issues(test_ids)
-        rollup_data = aggregate_tests_rollup(ready_tests, test_builds_by_id, issues_map)
+        rollup_data = aggregate_tests_rollup(
+            tests_to_process,
+            test_builds_by_id,
+            issues_map,
+            reprocess_test_ids=reprocess_test_ids,
+        )
+
         self._process_tests_rollup(rollup_data)
+        self._process_new_processed_entries(new_processed_entries)
 
     def _process_hardware_batch(
         self,
diff --git a/backend/kernelCI_app/tests/unitTests/commands/process_pending_helpers_test.py b/backend/kernelCI_app/tests/unitTests/commands/process_pending_helpers_test.py
@@ -447,3 +447,264 @@ def test_skips_test_with_missing_build_but_processes_others(self):
         self.assertEqual(len(result), 1)
         record = next(iter(result.values()))
         self.assertEqual(record["total_tests"], 1)
+
+
+class TestAccumulateRollupEntryCorrection(SimpleTestCase):
+    """Test is_correction=True behavior: null -> non-null transitions."""
+
+    def _make_rollup_data_with_null_entry(self, initial_null_count=1):
+        """Pre-seed rollup_data with a record having some null_tests."""
+        checkout = _make_checkout()
+        entry = _make_rollup_entry(checkout=checkout, status=None)
+        rollup_key = RollupKey(
+            origin=checkout.origin,
+            tree_name=checkout.tree_name,
+            git_repository_branch=checkout.git_repository_branch,
+            git_repository_url=checkout.git_repository_url,
+            git_commit_hash=checkout.git_commit_hash,
+            path_group=entry["path_group"],
+            config=entry["config"],
+            arch=entry["arch"],
+            compiler=entry["compiler"],
+            hardware_key=entry["hardware_key"],
+            platform=entry["platform"],
+            lab=entry["lab"],
+            test_origin=entry["origin"],
+            issue_id=entry["issue_id"],
+            issue_version=entry["issue_version"],
+            issue_uncategorized=entry["issue_uncategorized"],
+            is_boot=entry["is_boot"],
+        )
+        rollup_data = {
+            rollup_key: {
+                "pass_tests": 0,
+                "fail_tests": 0,
+                "skip_tests": 0,
+                "error_tests": 0,
+                "miss_tests": 0,
+                "done_tests": 0,
+                "null_tests": initial_null_count,
+                "total_tests": initial_null_count,
+            }
+        }
+        return rollup_data, entry, rollup_key
+
+    def test_correction_decrements_null_tests(self):
+        """Correction moves count from null_tests to the new status bucket."""
+        rollup_data, entry, rollup_key = self._make_rollup_data_with_null_entry()
+        entry["status"] = StatusChoices.PASS
+
+        accumulate_rollup_entry(rollup_data, entry, is_correction=True)
+
+        record = rollup_data[rollup_key]
+        self.assertEqual(record["null_tests"], 0)
+
+    def test_correction_increments_new_status_bucket(self):
+        """The new status bucket gets the count moved from null_tests."""
+        rollup_data, entry, rollup_key = self._make_rollup_data_with_null_entry()
+        entry["status"] = StatusChoices.PASS
+
+        accumulate_rollup_entry(rollup_data, entry, is_correction=True)
+
+        record = rollup_data[rollup_key]
+        self.assertEqual(record["pass_tests"], 1)
+
+    def test_correction_does_not_change_total_tests(self):
+        """Total should remain unchanged - just moving from null to bucket."""
+        rollup_data, entry, rollup_key = self._make_rollup_data_with_null_entry()
+        entry["status"] = StatusChoices.PASS
+
+        accumulate_rollup_entry(rollup_data, entry, is_correction=True)
+
+        record = rollup_data[rollup_key]
+        self.assertEqual(record["total_tests"], 1)
+
+    def test_correction_with_fail_status(self):
+        """Correction works with fail status too."""
+        rollup_data, entry, rollup_key = self._make_rollup_data_with_null_entry()
+        entry["status"] = StatusChoices.FAIL
+
+        accumulate_rollup_entry(rollup_data, entry, is_correction=True)
+
+        record = rollup_data[rollup_key]
+        self.assertEqual(record["null_tests"], 0)
+        self.assertEqual(record["fail_tests"], 1)
+        self.assertEqual(record["total_tests"], 1)
+
+    def test_correction_with_skip_status(self):
+        """Correction works with skip status."""
+        rollup_data, entry, rollup_key = self._make_rollup_data_with_null_entry()
+        entry["status"] = StatusChoices.SKIP
+
+        accumulate_rollup_entry(rollup_data, entry, is_correction=True)
+
+        record = rollup_data[rollup_key]
+        self.assertEqual(record["null_tests"], 0)
+        self.assertEqual(record["skip_tests"], 1)
+        self.assertEqual(record["total_tests"], 1)
+
+    def test_correction_with_null_as_new_status(self):
+        """If new status is also null, both operations hit null_tests (net zero)."""
+        rollup_data, entry, rollup_key = self._make_rollup_data_with_null_entry()
+        entry["status"] = None
+
+        accumulate_rollup_entry(rollup_data, entry, is_correction=True)
+
+        record = rollup_data[rollup_key]
+        # Decrement then increment null_tests: net change is 0
+        self.assertEqual(record["null_tests"], 1)
+        self.assertEqual(record["total_tests"], 1)
+
+    def test_multiple_corrections_on_same_rollup_key(self):
+        """Multiple corrections on the same key accumulate correctly."""
+        checkout = _make_checkout()
+        base_entry = _make_rollup_entry(checkout=checkout, status=None)
+        rollup_key = RollupKey(
+            origin=checkout.origin,
+            tree_name=checkout.tree_name,
+            git_repository_branch=checkout.git_repository_branch,
+            git_repository_url=checkout.git_repository_url,
+            git_commit_hash=checkout.git_commit_hash,
+            path_group=base_entry["path_group"],
+            config=base_entry["config"],
+            arch=base_entry["arch"],
+            compiler=base_entry["compiler"],
+            hardware_key=base_entry["hardware_key"],
+            platform=base_entry["platform"],
+            lab=base_entry["lab"],
+            test_origin=base_entry["origin"],
+            issue_id=base_entry["issue_id"],
+            issue_version=base_entry["issue_version"],
+            issue_uncategorized=base_entry["issue_uncategorized"],
+            is_boot=base_entry["is_boot"],
+        )
+        rollup_data = {
+            rollup_key: {
+                "pass_tests": 0,
+                "fail_tests": 0,
+                "skip_tests": 0,
+                "error_tests": 0,
+                "miss_tests": 0,
+                "done_tests": 0,
+                "null_tests": 3,
+                "total_tests": 3,
+            }
+        }
+
+        # Three corrections: PASS, FAIL, SKIP
+        entry1 = _make_rollup_entry(checkout=checkout, status=StatusChoices.PASS)
+        entry2 = _make_rollup_entry(checkout=checkout, status=StatusChoices.FAIL)
+        entry3 = _make_rollup_entry(checkout=checkout, status=StatusChoices.SKIP)
+
+        accumulate_rollup_entry(rollup_data, entry1, is_correction=True)
+        accumulate_rollup_entry(rollup_data, entry2, is_correction=True)
+        accumulate_rollup_entry(rollup_data, entry3, is_correction=True)
+
+        record = rollup_data[rollup_key]
+        self.assertEqual(record["null_tests"], 0)
+        self.assertEqual(record["pass_tests"], 1)
+        self.assertEqual(record["fail_tests"], 1)
+        self.assertEqual(record["skip_tests"], 1)
+        self.assertEqual(record["total_tests"], 3)
+
+
+class TestAggregateTestsRollupWithReprocess(SimpleTestCase):
+    """Test aggregate_tests_rollup with reprocess_test_ids parameter."""
+
+    def test_reprocess_test_id_applies_correction(self):
+        """Test in reprocess_test_ids gets correction: null_tests decremented."""
+        checkout = _make_checkout()
+        build = _make_build(checkout=checkout)
+        # This test will be marked as reprocess
+        test = _make_pending_test(
+            test_id="test-1",
+            build_id="build-1",
+            full_status=StatusChoices.PASS,
+        )
+
+        result = aggregate_tests_rollup(
+            [test],
+            {"build-1": build},
+            {},
+            reprocess_test_ids={"test-1"},  # Mark as correction
+        )
+
+        record = next(iter(result.values()))
+        # Correction: total should not increment, pass_tests should be 1
+        # But since there's no prior null_tests to decrement, it goes negative
+        self.assertEqual(record["pass_tests"], 1)
+        self.assertEqual(record["null_tests"], -1)
+        self.assertEqual(record["total_tests"], 0)
+
+    def test_normal_test_not_in_reprocess(self):
+        """Test not in reprocess_test_ids behaves like normal."""
+        checkout = _make_checkout()
+        build = _make_build(checkout=checkout)
+        test = _make_pending_test(
+            test_id="test-1",
+            build_id="build-1",
+            full_status=StatusChoices.PASS,
+        )
+
+        result = aggregate_tests_rollup(
+            [test],
+            {"build-1": build},
+            {},
+            reprocess_test_ids=set(),  # Empty set
+        )
+
+        record = next(iter(result.values()))
+        self.assertEqual(record["pass_tests"], 1)
+        self.assertEqual(record["total_tests"], 1)
+        self.assertEqual(record["null_tests"], 0)
+
+    def test_mixed_batch_corrections_and_new(self):
+        """Two tests same rollup key: one correction + one normal."""
+        checkout = _make_checkout()
+        build = _make_build(checkout=checkout)
+        # Correction: was counted as null, now becoming PASS
+        test_correction = _make_pending_test(
+            test_id="t1",
+            build_id="build-1",
+            full_status=StatusChoices.PASS,
+        )
+        # New test, fresh PASS
+        test_new = _make_pending_test(
+            test_id="t2",
+            build_id="build-1",
+            full_status=StatusChoices.PASS,
+        )
+
+        result = aggregate_tests_rollup(
+            [test_correction, test_new],
+            {"build-1": build},
+            {},
+            reprocess_test_ids={"t1"},  # Only first is correction
+        )
+
+        self.assertEqual(len(result), 1)
+        record = next(iter(result.values()))
+        # Correction: null_tests -1, pass_tests +1, total_tests 0
+        # New: pass_tests +1, total_tests +1
+        # Result: null_tests -1, pass_tests 2, total_tests 1
+        self.assertEqual(record["null_tests"], -1)
+        self.assertEqual(record["pass_tests"], 2)
+        self.assertEqual(record["total_tests"], 1)
+
+    def test_default_reprocess_test_ids_is_empty(self):
+        """Not passing reprocess_test_ids defaults to empty set."""
+        checkout = _make_checkout()
+        build = _make_build(checkout=checkout)
+        test = _make_pending_test(
+            test_id="test-1",
+            build_id="build-1",
+            full_status=StatusChoices.FAIL,
+        )
+
+        # Call without the reprocess_test_ids parameter
+        result = aggregate_tests_rollup([test], {"build-1": build}, {})
+
+        record = next(iter(result.values()))
+        self.assertEqual(record["fail_tests"], 1)
+        self.assertEqual(record["total_tests"], 1)
+        self.assertEqual(record["null_tests"], 0)