From bb826957f511280d4a95d5ea9754cb3bdd0f0344 Mon Sep 17 00:00:00 2001
From: William Zijie Zhang <william@gridmatic.com>
Date: Wed, 3 Jun 2026 12:54:32 -0400
Subject: [PATCH 1/3] Fix int32 overflow in CSR allocation upper bounds

Several allocation sites cap a sparse CSR/CSC size with MIN(loose_nnz_bound,
rows*cols), or seed a growable vector with rows*cols. The dense cell-count product
overflows int for large dimensions (e.g. n_vars*n_vars with n_vars=250000 = 6.25e10
wraps to a negative int32). When the product wraps negative, MIN selects it and
new_CSR_matrix does a calloc overflow -> SIGSEGV. (A different size could wrap to a
small positive value -> silent under-allocation, which is worse.) In practice this
crashes get_problem_data on e.g. a QP with >46341 (=sqrt INT_MAX) variables and the
OptimalAdvertising benchmark (transpose/reductions of a 250000-variable matrix).

Add a saturating sat_mul_int(a, b) helper (clamps to INT_MAX on overflow) and use
MIN(loose_bound, sat_mul_int(rows, cols)) at the affected MIN sites; seed the two
iVec growable vectors from the source nnz instead of J->n*m. The allocation is
unchanged whenever the product fits, and falls back to the loose bound -- always a
valid upper bound at these sites -- when the product would overflow.

Sites: problem.c (Lagrange hessian), hstack.c (wsum hess), bivariate matmul
(chain-rule jacobian), CSR_sum.c (sum_4_csr_alloc), sparse_matrix.c (index_alloc,
sum_row_partition), stacked_pd.c (sum_row_partition), and the iVec seeds in
linalg_sparse_matmuls.c and linalg_dense_sparse_matmuls.c.

Adds tests/utils/test_alloc_overflow.h: a sat_mul_int unit test and a
sparse_index_alloc regression that segfaults before this fix.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 include/utils/utils.h                   | 12 ++++
 src/atoms/affine/hstack.c               |  2 +-
 src/atoms/bivariate_full_dom/matmul.c   |  2 +-
 src/problem.c                           |  2 +-
 src/utils/CSR_sum.c                     |  2 +-
 src/utils/linalg_dense_sparse_matmuls.c |  5 +-
 src/utils/linalg_sparse_matmuls.c       |  7 ++-
 src/utils/sparse_matrix.c               |  5 +-
 src/utils/stacked_pd.c                  |  2 +-
 tests/all_tests.c                       |  3 +
 tests/utils/test_alloc_overflow.h       | 82 +++++++++++++++++++++++++
 11 files changed, 114 insertions(+), 10 deletions(-)
 create mode 100644 tests/utils/test_alloc_overflow.h

diff --git a/include/utils/utils.h b/include/utils/utils.h
index fbdcca6..ee06f50 100644
--- a/include/utils/utils.h
+++ b/include/utils/utils.h
@@ -19,6 +19,7 @@
 #define UTILS_H
 
 #include "iVec.h"
+#include <limits.h>
 #include <stdbool.h>
 
 #ifndef MAX
@@ -28,6 +29,17 @@
 #define MIN(a, b) ((a) < (b) ? (a) : (b))
 #endif
 
+/* Dense cell count a*b for allocation upper bounds, clamped to INT_MAX on overflow.
+   Used as MIN(loose_nnz_bound, sat_mul_int(rows, cols)): a real nnz bound is always
+   < INT_MAX, so on overflow the MIN correctly falls back to the loose bound instead of
+   selecting a wrapped (negative or small-positive) product. */
+static inline int sat_mul_int(int a, int b)
+{
+    if (a <= 0 || b <= 0) return 0;
+    if (a > INT_MAX / b) return INT_MAX;
+    return a * b;
+}
+
 /* Sort an array of integers in ascending order */
 void sort_int_array(int *array, int size);
 
diff --git a/src/atoms/affine/hstack.c b/src/atoms/affine/hstack.c
index a139fa6..a0e0a59 100644
--- a/src/atoms/affine/hstack.c
+++ b/src/atoms/affine/hstack.c
@@ -116,7 +116,7 @@ static void wsum_hess_init_impl(expr *node)
 
     /* worst-case scenario the nnz of node->wsum_hess is the sum of children's
        nnz, capped by the output cell count */
-    int nnz_ub = MIN(nnz, node->n_vars * node->n_vars);
+    int nnz_ub = MIN(nnz, sat_mul_int(node->n_vars, node->n_vars));
     CSR_matrix *H = new_CSR_matrix(node->n_vars, node->n_vars, nnz_ub);
     hnode->CSR_work = new_CSR_matrix(node->n_vars, node->n_vars, nnz_ub);
 
diff --git a/src/atoms/bivariate_full_dom/matmul.c b/src/atoms/bivariate_full_dom/matmul.c
index 7ab72f3..769e6a9 100644
--- a/src/atoms/bivariate_full_dom/matmul.c
+++ b/src/atoms/bivariate_full_dom/matmul.c
@@ -241,7 +241,7 @@ static void jacobian_init_chain_rule(expr *node)
     mnode->term1_CSR = YT_kron_I_alloc(m, k, n, f->work->jacobian_csc);
     mnode->term2_CSR = I_kron_X_alloc(m, k, n, g->work->jacobian_csc);
     int max_nnz = mnode->term1_CSR->nnz + mnode->term2_CSR->nnz;
-    max_nnz = MIN(max_nnz, node->size * node->n_vars);
+    max_nnz = MIN(max_nnz, sat_mul_int(node->size, node->n_vars));
     CSR_matrix *jac = new_CSR_matrix(node->size, node->n_vars, max_nnz);
     sum_csr_alloc(mnode->term1_CSR, mnode->term2_CSR, jac);
     node->jacobian = new_sparse_matrix(jac);
diff --git a/src/problem.c b/src/problem.c
index 4afb1b3..09cc14d 100644
--- a/src/problem.c
+++ b/src/problem.c
@@ -256,7 +256,7 @@ void problem_init_hessian(problem *prob)
         nnz += prob->constraints[i]->wsum_hess->nnz;
     }
 
-    int hess_nnz_ub = MIN(nnz, prob->n_vars * prob->n_vars);
+    int hess_nnz_ub = MIN(nnz, sat_mul_int(prob->n_vars, prob->n_vars));
     prob->lagrange_hessian = new_CSR_matrix(prob->n_vars, prob->n_vars, hess_nnz_ub);
 
     /* affine shortcut */
diff --git a/src/utils/CSR_sum.c b/src/utils/CSR_sum.c
index 7249acb..19f99a3 100644
--- a/src/utils/CSR_sum.c
+++ b/src/utils/CSR_sum.c
@@ -367,7 +367,7 @@ CSR_matrix *sum_4_csr_alloc(const CSR_matrix *A, const CSR_matrix *B,
     const CSR_matrix *inputs[4] = {A, B, C, D};
     int m = A->m;
     int n = A->n;
-    int nnz_ub = MIN(A->nnz + B->nnz + C->nnz + D->nnz, m * n);
+    int nnz_ub = MIN(A->nnz + B->nnz + C->nnz + D->nnz, sat_mul_int(m, n));
 
     /* allocate output and index maps */
     CSR_matrix *out = new_CSR_matrix(m, n, nnz_ub);
diff --git a/src/utils/linalg_dense_sparse_matmuls.c b/src/utils/linalg_dense_sparse_matmuls.c
index 6211082..9eb025a 100644
--- a/src/utils/linalg_dense_sparse_matmuls.c
+++ b/src/utils/linalg_dense_sparse_matmuls.c
@@ -35,8 +35,11 @@ CSC_matrix *I_kron_A_alloc(const matrix *A, const CSC_matrix *J, int p)
     int n = A->n;
     int i, j, jj, block, block_start, block_end, block_jj_start, row_offset;
 
+    /* Ci grows via iVec_append, so this is only a capacity hint: seed from the source's
+       true nnz, never the dense product J->n * m (which overflows int for large operands
+       and over-allocates hugely for a sparse result). */
     int *Cp = (int *) sp_malloc((J->n + 1) * sizeof(int));
-    iVec *Ci = iVec_new(J->n * m);
+    iVec *Ci = iVec_new(J->nnz > 0 ? J->nnz : 1);
     Cp[0] = 0;
 
     /* for each column of J */
diff --git a/src/utils/linalg_sparse_matmuls.c b/src/utils/linalg_sparse_matmuls.c
index f208022..c6c7f6a 100644
--- a/src/utils/linalg_sparse_matmuls.c
+++ b/src/utils/linalg_sparse_matmuls.c
@@ -95,9 +95,12 @@ CSC_matrix *block_left_multiply_fill_sparsity(const CSR_matrix *A,
     int j, jj, block, block_start, block_end, block_jj_start, block_jj_end,
         row_offset;
 
-    /* allocate column pointers and an estimate of row indices */
+    /* allocate column pointers and an estimate of row indices. Ci grows via
+       iVec_append, so this is only a capacity hint: seed from the source's true nnz,
+       never the dense product J->n * m (which overflows int for large operands and
+       over-allocates hugely for a sparse result). */
     int *Cp = (int *) sp_malloc((J->n + 1) * sizeof(int));
-    iVec *Ci = iVec_new(J->n * m);
+    iVec *Ci = iVec_new(J->nnz > 0 ? J->nnz : 1);
     Cp[0] = 0;
 
     /* for each column of J */
diff --git a/src/utils/sparse_matrix.c b/src/utils/sparse_matrix.c
index 22c3fc9..f6ae640 100644
--- a/src/utils/sparse_matrix.c
+++ b/src/utils/sparse_matrix.c
@@ -124,7 +124,8 @@ static void sparse_transpose_fill_values(const matrix *self, matrix *out)
 static matrix *sparse_index_alloc(matrix *self, const int *indices, int n_idxs)
 {
     CSR_matrix *Jx = ((sparse_matrix *) self)->csr;
-    CSR_matrix *J = new_CSR_matrix(n_idxs, self->n, MIN(Jx->nnz, n_idxs * self->n));
+    CSR_matrix *J =
+        new_CSR_matrix(n_idxs, self->n, MIN(Jx->nnz, sat_mul_int(n_idxs, self->n)));
 
     J->p[0] = 0;
     for (int i = 0; i < n_idxs; i++)
@@ -329,7 +330,7 @@ static matrix *sparse_sum_row_partition_alloc(matrix *self, int axis, int d1,
     {
         m = d1;
     }
-    int max_nnz = MIN(A->nnz, m * A->n);
+    int max_nnz = MIN(A->nnz, sat_mul_int(m, A->n));
     CSR_matrix *out = new_CSR_matrix(m, A->n, max_nnz);
     int *iwork = (int *) sp_malloc(MAX(A->n, A->nnz) * sizeof(int));
 
diff --git a/src/utils/stacked_pd.c b/src/utils/stacked_pd.c
index 5ac5a3c..c55f65a 100644
--- a/src/utils/stacked_pd.c
+++ b/src/utils/stacked_pd.c
@@ -447,7 +447,7 @@ static matrix *stacked_pd_vtable_sum_row_partition_alloc(matrix *self, int axis,
     /* axis == 0 or 1: CSR fallback */
     CSR_matrix *A = self->to_csr(self);
     int m_out = (axis == 0) ? A->m / d1 : d1;
-    int max_out_nnz = MIN(A->nnz, m_out * A->n);
+    int max_out_nnz = MIN(A->nnz, sat_mul_int(m_out, A->n));
     CSR_matrix *out = new_CSR_matrix(m_out, A->n, max_out_nnz);
     int *iwork = (int *) sp_malloc(MAX(A->n, A->nnz) * sizeof(int));
 
diff --git a/tests/all_tests.c b/tests/all_tests.c
index e571683..b680d0f 100644
--- a/tests/all_tests.c
+++ b/tests/all_tests.c
@@ -60,6 +60,7 @@
 #include "problem/test_param_prob.h"
 #include "problem/test_problem.h"
 #include "utils/test_COO_matrix.h"
+#include "utils/test_alloc_overflow.h"
 #include "utils/test_cblas.h"
 #include "utils/test_csc_matrix.h"
 #include "utils/test_csr_csc_conversion.h"
@@ -346,6 +347,8 @@ int main(void)
     mu_run_test(test_wsum_hess_matmul_X_X, tests_run);
 
     printf("\n--- Utility Tests ---\n");
+    mu_run_test(test_sat_mul_int_clamps_on_overflow, tests_run);
+    mu_run_test(test_sparse_index_alloc_no_int_overflow, tests_run);
     mu_run_test(test_cblas_ddot, tests_run);
     mu_run_test(test_diag_csr_mult, tests_run);
     mu_run_test(test_csr_sum, tests_run);
diff --git a/tests/utils/test_alloc_overflow.h b/tests/utils/test_alloc_overflow.h
new file mode 100644
index 0000000..8c4005d
--- /dev/null
+++ b/tests/utils/test_alloc_overflow.h
@@ -0,0 +1,82 @@
+/*
+ * Copyright 2026 Daniel Cederberg and William Zhang
+ *
+ * This file is part of the SparseDiffEngine project.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef TEST_ALLOC_OVERFLOW_H
+#define TEST_ALLOC_OVERFLOW_H
+
+#include "minunit.h"
+#include "utils/CSR_matrix.h"
+#include "utils/matrix.h"
+#include "utils/sparse_matrix.h"
+#include "utils/utils.h"
+#include <limits.h>
+#include <stdio.h>
+#include <string.h>
+
+/* sat_mul_int returns the exact product when it fits and clamps to INT_MAX on
+   overflow (never wraps to a negative or small-positive value). */
+const char *test_sat_mul_int_clamps_on_overflow(void)
+{
+    mu_assert("exact product", sat_mul_int(40000, 40000) == 1600000000);
+    mu_assert("zero operand", sat_mul_int(0, 12345) == 0);
+    mu_assert("negative operand", sat_mul_int(-1, 12345) == 0);
+    /* 250000 * 250000 = 6.25e10 overflows int32; must clamp, not wrap negative. */
+    mu_assert("overflow clamps to INT_MAX", sat_mul_int(250000, 250000) == INT_MAX);
+    mu_assert("boundary still positive", sat_mul_int(46341, 46341) == INT_MAX);
+    return 0;
+}
+
+/* sparse_index_alloc sized its allocation as MIN(Jx->nnz, n_idxs * self->n). For a
+   large jacobian the dense product n_idxs * self->n overflows int (e.g. a transpose of
+   a 250000-variable matrix), wrapping negative so MIN selected it -> calloc overflow ->
+   SIGSEGV. This builds an index op whose product overflows and checks the result is a
+   valid CSR with the true (subset) nnz. */
+const char *test_sparse_index_alloc_no_int_overflow(void)
+{
+    /* n_idxs * self->n = 25000 * 100000 = 2.5e9 overflows int32. */
+    const int n_idxs = 25000;
+    const int n_cols = 100000;
+
+    /* identity-like CSR: n_idxs rows, one nonzero per row */
+    CSR_matrix *Jx = new_CSR_matrix(n_idxs, n_cols, n_idxs);
+    for (int i = 0; i < n_idxs; i++)
+    {
+        Jx->p[i] = i;
+        Jx->i[i] = i;
+        Jx->x[i] = 1.0;
+    }
+    Jx->p[n_idxs] = n_idxs;
+
+    matrix *mat = new_sparse_matrix(Jx);
+
+    int *indices = (int *) malloc(n_idxs * sizeof(int));
+    for (int i = 0; i < n_idxs; i++) indices[i] = i;
+
+    /* pre-fix: crashes here in new_CSR_matrix(... negative nnz) */
+    matrix *out = mat->index_alloc(mat, indices, n_idxs);
+    CSR_matrix *out_csr = out->to_csr(out);
+
+    mu_assert("nnz must equal selected-row total", out_csr->nnz == n_idxs);
+    mu_assert("nnz must be non-negative", out_csr->nnz >= 0);
+
+    free(indices);
+    free_matrix(out);
+    free_matrix(mat);
+    return 0;
+}
+
+#endif /* TEST_ALLOC_OVERFLOW_H */

From b84220fc28f7a0493ba9318246600ede9443ffad Mon Sep 17 00:00:00 2001
From: William Zijie Zhang <william@gridmatic.com>
Date: Wed, 3 Jun 2026 13:36:20 -0400
Subject: [PATCH 2/3] Apply clang-format comment re-wrapping

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 include/utils/utils.h                   | 4 ++--
 src/utils/linalg_dense_sparse_matmuls.c | 6 +++---
 tests/utils/test_alloc_overflow.h       | 8 ++++----
 3 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/include/utils/utils.h b/include/utils/utils.h
index ee06f50..c2a4b97 100644
--- a/include/utils/utils.h
+++ b/include/utils/utils.h
@@ -31,8 +31,8 @@
 
 /* Dense cell count a*b for allocation upper bounds, clamped to INT_MAX on overflow.
    Used as MIN(loose_nnz_bound, sat_mul_int(rows, cols)): a real nnz bound is always
-   < INT_MAX, so on overflow the MIN correctly falls back to the loose bound instead of
-   selecting a wrapped (negative or small-positive) product. */
+   < INT_MAX, so on overflow the MIN correctly falls back to the loose bound instead
+   of selecting a wrapped (negative or small-positive) product. */
 static inline int sat_mul_int(int a, int b)
 {
     if (a <= 0 || b <= 0) return 0;
diff --git a/src/utils/linalg_dense_sparse_matmuls.c b/src/utils/linalg_dense_sparse_matmuls.c
index 9eb025a..dd4a700 100644
--- a/src/utils/linalg_dense_sparse_matmuls.c
+++ b/src/utils/linalg_dense_sparse_matmuls.c
@@ -35,9 +35,9 @@ CSC_matrix *I_kron_A_alloc(const matrix *A, const CSC_matrix *J, int p)
     int n = A->n;
     int i, j, jj, block, block_start, block_end, block_jj_start, row_offset;
 
-    /* Ci grows via iVec_append, so this is only a capacity hint: seed from the source's
-       true nnz, never the dense product J->n * m (which overflows int for large operands
-       and over-allocates hugely for a sparse result). */
+    /* Ci grows via iVec_append, so this is only a capacity hint: seed from the
+       source's true nnz, never the dense product J->n * m (which overflows int for
+       large operands and over-allocates hugely for a sparse result). */
     int *Cp = (int *) sp_malloc((J->n + 1) * sizeof(int));
     iVec *Ci = iVec_new(J->nnz > 0 ? J->nnz : 1);
     Cp[0] = 0;
diff --git a/tests/utils/test_alloc_overflow.h b/tests/utils/test_alloc_overflow.h
index 8c4005d..5e783ee 100644
--- a/tests/utils/test_alloc_overflow.h
+++ b/tests/utils/test_alloc_overflow.h
@@ -41,10 +41,10 @@ const char *test_sat_mul_int_clamps_on_overflow(void)
 }
 
 /* sparse_index_alloc sized its allocation as MIN(Jx->nnz, n_idxs * self->n). For a
-   large jacobian the dense product n_idxs * self->n overflows int (e.g. a transpose of
-   a 250000-variable matrix), wrapping negative so MIN selected it -> calloc overflow ->
-   SIGSEGV. This builds an index op whose product overflows and checks the result is a
-   valid CSR with the true (subset) nnz. */
+   large jacobian the dense product n_idxs * self->n overflows int (e.g. a transpose
+   of a 250000-variable matrix), wrapping negative so MIN selected it -> calloc
+   overflow -> SIGSEGV. This builds an index op whose product overflows and checks
+   the result is a valid CSR with the true (subset) nnz. */
 const char *test_sparse_index_alloc_no_int_overflow(void)
 {
     /* n_idxs * self->n = 25000 * 100000 = 2.5e9 overflows int32. */

From 0664e552f4faace581b296986e56588bdefef577 Mon Sep 17 00:00:00 2001
From: dance858 <danielcederberg1@gmail.com>
Date: Wed, 3 Jun 2026 11:18:01 -0700
Subject: [PATCH 3/3] very minor changes

---
 src/utils/linalg_dense_sparse_matmuls.c | 4 +---
 src/utils/linalg_sparse_matmuls.c       | 6 ++----
 src/utils/sparse_matrix.c               | 4 ++--
 3 files changed, 5 insertions(+), 9 deletions(-)

diff --git a/src/utils/linalg_dense_sparse_matmuls.c b/src/utils/linalg_dense_sparse_matmuls.c
index dd4a700..f3da7ad 100644
--- a/src/utils/linalg_dense_sparse_matmuls.c
+++ b/src/utils/linalg_dense_sparse_matmuls.c
@@ -35,9 +35,7 @@ CSC_matrix *I_kron_A_alloc(const matrix *A, const CSC_matrix *J, int p)
     int n = A->n;
     int i, j, jj, block, block_start, block_end, block_jj_start, row_offset;
 
-    /* Ci grows via iVec_append, so this is only a capacity hint: seed from the
-       source's true nnz, never the dense product J->n * m (which overflows int for
-       large operands and over-allocates hugely for a sparse result). */
+    /* capacity hint based on J->nnz and not on the dense product */
     int *Cp = (int *) sp_malloc((J->n + 1) * sizeof(int));
     iVec *Ci = iVec_new(J->nnz > 0 ? J->nnz : 1);
     Cp[0] = 0;
diff --git a/src/utils/linalg_sparse_matmuls.c b/src/utils/linalg_sparse_matmuls.c
index c6c7f6a..c029a86 100644
--- a/src/utils/linalg_sparse_matmuls.c
+++ b/src/utils/linalg_sparse_matmuls.c
@@ -95,10 +95,8 @@ CSC_matrix *block_left_multiply_fill_sparsity(const CSR_matrix *A,
     int j, jj, block, block_start, block_end, block_jj_start, block_jj_end,
         row_offset;
 
-    /* allocate column pointers and an estimate of row indices. Ci grows via
-       iVec_append, so this is only a capacity hint: seed from the source's true nnz,
-       never the dense product J->n * m (which overflows int for large operands and
-       over-allocates hugely for a sparse result). */
+    /* Allocate column pointers and an estimate of row indices. Capacity hint
+       based on J->nnz and not on the dense product */
     int *Cp = (int *) sp_malloc((J->n + 1) * sizeof(int));
     iVec *Ci = iVec_new(J->nnz > 0 ? J->nnz : 1);
     Cp[0] = 0;
diff --git a/src/utils/sparse_matrix.c b/src/utils/sparse_matrix.c
index f6ae640..d55007a 100644
--- a/src/utils/sparse_matrix.c
+++ b/src/utils/sparse_matrix.c
@@ -124,8 +124,8 @@ static void sparse_transpose_fill_values(const matrix *self, matrix *out)
 static matrix *sparse_index_alloc(matrix *self, const int *indices, int n_idxs)
 {
     CSR_matrix *Jx = ((sparse_matrix *) self)->csr;
-    CSR_matrix *J =
-        new_CSR_matrix(n_idxs, self->n, MIN(Jx->nnz, sat_mul_int(n_idxs, self->n)));
+    int alloc_ub = sat_mul_int(n_idxs, self->n);
+    CSR_matrix *J = new_CSR_matrix(n_idxs, self->n, MIN(Jx->nnz, alloc_ub));
 
     J->p[0] = 0;
     for (int i = 0; i < n_idxs; i++)