From bb826957f511280d4a95d5ea9754cb3bdd0f0344 Mon Sep 17 00:00:00 2001 From: William Zijie Zhang Date: Wed, 3 Jun 2026 12:54:32 -0400 Subject: [PATCH 1/3] Fix int32 overflow in CSR allocation upper bounds Several allocation sites cap a sparse CSR/CSC size with MIN(loose_nnz_bound, rows*cols), or seed a growable vector with rows*cols. The dense cell-count product overflows int for large dimensions (e.g. n_vars*n_vars with n_vars=250000 = 6.25e10 wraps to a negative int32). When the product wraps negative, MIN selects it and new_CSR_matrix does a calloc overflow -> SIGSEGV. (A different size could wrap to a small positive value -> silent under-allocation, which is worse.) In practice this crashes get_problem_data on e.g. a QP with >46341 (=sqrt INT_MAX) variables and the OptimalAdvertising benchmark (transpose/reductions of a 250000-variable matrix). Add a saturating sat_mul_int(a, b) helper (clamps to INT_MAX on overflow) and use MIN(loose_bound, sat_mul_int(rows, cols)) at the affected MIN sites; seed the two iVec growable vectors from the source nnz instead of J->n*m. The allocation is unchanged whenever the product fits, and falls back to the loose bound -- always a valid upper bound at these sites -- when the product would overflow. Sites: problem.c (Lagrange hessian), hstack.c (wsum hess), bivariate matmul (chain-rule jacobian), CSR_sum.c (sum_4_csr_alloc), sparse_matrix.c (index_alloc, sum_row_partition), stacked_pd.c (sum_row_partition), and the iVec seeds in linalg_sparse_matmuls.c and linalg_dense_sparse_matmuls.c. Adds tests/utils/test_alloc_overflow.h: a sat_mul_int unit test and a sparse_index_alloc regression that segfaults before this fix. Co-Authored-By: Claude Opus 4.8 (1M context) --- include/utils/utils.h | 12 ++++ src/atoms/affine/hstack.c | 2 +- src/atoms/bivariate_full_dom/matmul.c | 2 +- src/problem.c | 2 +- src/utils/CSR_sum.c | 2 +- src/utils/linalg_dense_sparse_matmuls.c | 5 +- src/utils/linalg_sparse_matmuls.c | 7 ++- src/utils/sparse_matrix.c | 5 +- src/utils/stacked_pd.c | 2 +- tests/all_tests.c | 3 + tests/utils/test_alloc_overflow.h | 82 +++++++++++++++++++++++++ 11 files changed, 114 insertions(+), 10 deletions(-) create mode 100644 tests/utils/test_alloc_overflow.h diff --git a/include/utils/utils.h b/include/utils/utils.h index fbdcca6..ee06f50 100644 --- a/include/utils/utils.h +++ b/include/utils/utils.h @@ -19,6 +19,7 @@ #define UTILS_H #include "iVec.h" +#include #include #ifndef MAX @@ -28,6 +29,17 @@ #define MIN(a, b) ((a) < (b) ? (a) : (b)) #endif +/* Dense cell count a*b for allocation upper bounds, clamped to INT_MAX on overflow. + Used as MIN(loose_nnz_bound, sat_mul_int(rows, cols)): a real nnz bound is always + < INT_MAX, so on overflow the MIN correctly falls back to the loose bound instead of + selecting a wrapped (negative or small-positive) product. */ +static inline int sat_mul_int(int a, int b) +{ + if (a <= 0 || b <= 0) return 0; + if (a > INT_MAX / b) return INT_MAX; + return a * b; +} + /* Sort an array of integers in ascending order */ void sort_int_array(int *array, int size); diff --git a/src/atoms/affine/hstack.c b/src/atoms/affine/hstack.c index a139fa6..a0e0a59 100644 --- a/src/atoms/affine/hstack.c +++ b/src/atoms/affine/hstack.c @@ -116,7 +116,7 @@ static void wsum_hess_init_impl(expr *node) /* worst-case scenario the nnz of node->wsum_hess is the sum of children's nnz, capped by the output cell count */ - int nnz_ub = MIN(nnz, node->n_vars * node->n_vars); + int nnz_ub = MIN(nnz, sat_mul_int(node->n_vars, node->n_vars)); CSR_matrix *H = new_CSR_matrix(node->n_vars, node->n_vars, nnz_ub); hnode->CSR_work = new_CSR_matrix(node->n_vars, node->n_vars, nnz_ub); diff --git a/src/atoms/bivariate_full_dom/matmul.c b/src/atoms/bivariate_full_dom/matmul.c index 7ab72f3..769e6a9 100644 --- a/src/atoms/bivariate_full_dom/matmul.c +++ b/src/atoms/bivariate_full_dom/matmul.c @@ -241,7 +241,7 @@ static void jacobian_init_chain_rule(expr *node) mnode->term1_CSR = YT_kron_I_alloc(m, k, n, f->work->jacobian_csc); mnode->term2_CSR = I_kron_X_alloc(m, k, n, g->work->jacobian_csc); int max_nnz = mnode->term1_CSR->nnz + mnode->term2_CSR->nnz; - max_nnz = MIN(max_nnz, node->size * node->n_vars); + max_nnz = MIN(max_nnz, sat_mul_int(node->size, node->n_vars)); CSR_matrix *jac = new_CSR_matrix(node->size, node->n_vars, max_nnz); sum_csr_alloc(mnode->term1_CSR, mnode->term2_CSR, jac); node->jacobian = new_sparse_matrix(jac); diff --git a/src/problem.c b/src/problem.c index 4afb1b3..09cc14d 100644 --- a/src/problem.c +++ b/src/problem.c @@ -256,7 +256,7 @@ void problem_init_hessian(problem *prob) nnz += prob->constraints[i]->wsum_hess->nnz; } - int hess_nnz_ub = MIN(nnz, prob->n_vars * prob->n_vars); + int hess_nnz_ub = MIN(nnz, sat_mul_int(prob->n_vars, prob->n_vars)); prob->lagrange_hessian = new_CSR_matrix(prob->n_vars, prob->n_vars, hess_nnz_ub); /* affine shortcut */ diff --git a/src/utils/CSR_sum.c b/src/utils/CSR_sum.c index 7249acb..19f99a3 100644 --- a/src/utils/CSR_sum.c +++ b/src/utils/CSR_sum.c @@ -367,7 +367,7 @@ CSR_matrix *sum_4_csr_alloc(const CSR_matrix *A, const CSR_matrix *B, const CSR_matrix *inputs[4] = {A, B, C, D}; int m = A->m; int n = A->n; - int nnz_ub = MIN(A->nnz + B->nnz + C->nnz + D->nnz, m * n); + int nnz_ub = MIN(A->nnz + B->nnz + C->nnz + D->nnz, sat_mul_int(m, n)); /* allocate output and index maps */ CSR_matrix *out = new_CSR_matrix(m, n, nnz_ub); diff --git a/src/utils/linalg_dense_sparse_matmuls.c b/src/utils/linalg_dense_sparse_matmuls.c index 6211082..9eb025a 100644 --- a/src/utils/linalg_dense_sparse_matmuls.c +++ b/src/utils/linalg_dense_sparse_matmuls.c @@ -35,8 +35,11 @@ CSC_matrix *I_kron_A_alloc(const matrix *A, const CSC_matrix *J, int p) int n = A->n; int i, j, jj, block, block_start, block_end, block_jj_start, row_offset; + /* Ci grows via iVec_append, so this is only a capacity hint: seed from the source's + true nnz, never the dense product J->n * m (which overflows int for large operands + and over-allocates hugely for a sparse result). */ int *Cp = (int *) sp_malloc((J->n + 1) * sizeof(int)); - iVec *Ci = iVec_new(J->n * m); + iVec *Ci = iVec_new(J->nnz > 0 ? J->nnz : 1); Cp[0] = 0; /* for each column of J */ diff --git a/src/utils/linalg_sparse_matmuls.c b/src/utils/linalg_sparse_matmuls.c index f208022..c6c7f6a 100644 --- a/src/utils/linalg_sparse_matmuls.c +++ b/src/utils/linalg_sparse_matmuls.c @@ -95,9 +95,12 @@ CSC_matrix *block_left_multiply_fill_sparsity(const CSR_matrix *A, int j, jj, block, block_start, block_end, block_jj_start, block_jj_end, row_offset; - /* allocate column pointers and an estimate of row indices */ + /* allocate column pointers and an estimate of row indices. Ci grows via + iVec_append, so this is only a capacity hint: seed from the source's true nnz, + never the dense product J->n * m (which overflows int for large operands and + over-allocates hugely for a sparse result). */ int *Cp = (int *) sp_malloc((J->n + 1) * sizeof(int)); - iVec *Ci = iVec_new(J->n * m); + iVec *Ci = iVec_new(J->nnz > 0 ? J->nnz : 1); Cp[0] = 0; /* for each column of J */ diff --git a/src/utils/sparse_matrix.c b/src/utils/sparse_matrix.c index 22c3fc9..f6ae640 100644 --- a/src/utils/sparse_matrix.c +++ b/src/utils/sparse_matrix.c @@ -124,7 +124,8 @@ static void sparse_transpose_fill_values(const matrix *self, matrix *out) static matrix *sparse_index_alloc(matrix *self, const int *indices, int n_idxs) { CSR_matrix *Jx = ((sparse_matrix *) self)->csr; - CSR_matrix *J = new_CSR_matrix(n_idxs, self->n, MIN(Jx->nnz, n_idxs * self->n)); + CSR_matrix *J = + new_CSR_matrix(n_idxs, self->n, MIN(Jx->nnz, sat_mul_int(n_idxs, self->n))); J->p[0] = 0; for (int i = 0; i < n_idxs; i++) @@ -329,7 +330,7 @@ static matrix *sparse_sum_row_partition_alloc(matrix *self, int axis, int d1, { m = d1; } - int max_nnz = MIN(A->nnz, m * A->n); + int max_nnz = MIN(A->nnz, sat_mul_int(m, A->n)); CSR_matrix *out = new_CSR_matrix(m, A->n, max_nnz); int *iwork = (int *) sp_malloc(MAX(A->n, A->nnz) * sizeof(int)); diff --git a/src/utils/stacked_pd.c b/src/utils/stacked_pd.c index 5ac5a3c..c55f65a 100644 --- a/src/utils/stacked_pd.c +++ b/src/utils/stacked_pd.c @@ -447,7 +447,7 @@ static matrix *stacked_pd_vtable_sum_row_partition_alloc(matrix *self, int axis, /* axis == 0 or 1: CSR fallback */ CSR_matrix *A = self->to_csr(self); int m_out = (axis == 0) ? A->m / d1 : d1; - int max_out_nnz = MIN(A->nnz, m_out * A->n); + int max_out_nnz = MIN(A->nnz, sat_mul_int(m_out, A->n)); CSR_matrix *out = new_CSR_matrix(m_out, A->n, max_out_nnz); int *iwork = (int *) sp_malloc(MAX(A->n, A->nnz) * sizeof(int)); diff --git a/tests/all_tests.c b/tests/all_tests.c index e571683..b680d0f 100644 --- a/tests/all_tests.c +++ b/tests/all_tests.c @@ -60,6 +60,7 @@ #include "problem/test_param_prob.h" #include "problem/test_problem.h" #include "utils/test_COO_matrix.h" +#include "utils/test_alloc_overflow.h" #include "utils/test_cblas.h" #include "utils/test_csc_matrix.h" #include "utils/test_csr_csc_conversion.h" @@ -346,6 +347,8 @@ int main(void) mu_run_test(test_wsum_hess_matmul_X_X, tests_run); printf("\n--- Utility Tests ---\n"); + mu_run_test(test_sat_mul_int_clamps_on_overflow, tests_run); + mu_run_test(test_sparse_index_alloc_no_int_overflow, tests_run); mu_run_test(test_cblas_ddot, tests_run); mu_run_test(test_diag_csr_mult, tests_run); mu_run_test(test_csr_sum, tests_run); diff --git a/tests/utils/test_alloc_overflow.h b/tests/utils/test_alloc_overflow.h new file mode 100644 index 0000000..8c4005d --- /dev/null +++ b/tests/utils/test_alloc_overflow.h @@ -0,0 +1,82 @@ +/* + * Copyright 2026 Daniel Cederberg and William Zhang + * + * This file is part of the SparseDiffEngine project. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef TEST_ALLOC_OVERFLOW_H +#define TEST_ALLOC_OVERFLOW_H + +#include "minunit.h" +#include "utils/CSR_matrix.h" +#include "utils/matrix.h" +#include "utils/sparse_matrix.h" +#include "utils/utils.h" +#include +#include +#include + +/* sat_mul_int returns the exact product when it fits and clamps to INT_MAX on + overflow (never wraps to a negative or small-positive value). */ +const char *test_sat_mul_int_clamps_on_overflow(void) +{ + mu_assert("exact product", sat_mul_int(40000, 40000) == 1600000000); + mu_assert("zero operand", sat_mul_int(0, 12345) == 0); + mu_assert("negative operand", sat_mul_int(-1, 12345) == 0); + /* 250000 * 250000 = 6.25e10 overflows int32; must clamp, not wrap negative. */ + mu_assert("overflow clamps to INT_MAX", sat_mul_int(250000, 250000) == INT_MAX); + mu_assert("boundary still positive", sat_mul_int(46341, 46341) == INT_MAX); + return 0; +} + +/* sparse_index_alloc sized its allocation as MIN(Jx->nnz, n_idxs * self->n). For a + large jacobian the dense product n_idxs * self->n overflows int (e.g. a transpose of + a 250000-variable matrix), wrapping negative so MIN selected it -> calloc overflow -> + SIGSEGV. This builds an index op whose product overflows and checks the result is a + valid CSR with the true (subset) nnz. */ +const char *test_sparse_index_alloc_no_int_overflow(void) +{ + /* n_idxs * self->n = 25000 * 100000 = 2.5e9 overflows int32. */ + const int n_idxs = 25000; + const int n_cols = 100000; + + /* identity-like CSR: n_idxs rows, one nonzero per row */ + CSR_matrix *Jx = new_CSR_matrix(n_idxs, n_cols, n_idxs); + for (int i = 0; i < n_idxs; i++) + { + Jx->p[i] = i; + Jx->i[i] = i; + Jx->x[i] = 1.0; + } + Jx->p[n_idxs] = n_idxs; + + matrix *mat = new_sparse_matrix(Jx); + + int *indices = (int *) malloc(n_idxs * sizeof(int)); + for (int i = 0; i < n_idxs; i++) indices[i] = i; + + /* pre-fix: crashes here in new_CSR_matrix(... negative nnz) */ + matrix *out = mat->index_alloc(mat, indices, n_idxs); + CSR_matrix *out_csr = out->to_csr(out); + + mu_assert("nnz must equal selected-row total", out_csr->nnz == n_idxs); + mu_assert("nnz must be non-negative", out_csr->nnz >= 0); + + free(indices); + free_matrix(out); + free_matrix(mat); + return 0; +} + +#endif /* TEST_ALLOC_OVERFLOW_H */ From b84220fc28f7a0493ba9318246600ede9443ffad Mon Sep 17 00:00:00 2001 From: William Zijie Zhang Date: Wed, 3 Jun 2026 13:36:20 -0400 Subject: [PATCH 2/3] Apply clang-format comment re-wrapping Co-Authored-By: Claude Opus 4.8 (1M context) --- include/utils/utils.h | 4 ++-- src/utils/linalg_dense_sparse_matmuls.c | 6 +++--- tests/utils/test_alloc_overflow.h | 8 ++++---- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/include/utils/utils.h b/include/utils/utils.h index ee06f50..c2a4b97 100644 --- a/include/utils/utils.h +++ b/include/utils/utils.h @@ -31,8 +31,8 @@ /* Dense cell count a*b for allocation upper bounds, clamped to INT_MAX on overflow. Used as MIN(loose_nnz_bound, sat_mul_int(rows, cols)): a real nnz bound is always - < INT_MAX, so on overflow the MIN correctly falls back to the loose bound instead of - selecting a wrapped (negative or small-positive) product. */ + < INT_MAX, so on overflow the MIN correctly falls back to the loose bound instead + of selecting a wrapped (negative or small-positive) product. */ static inline int sat_mul_int(int a, int b) { if (a <= 0 || b <= 0) return 0; diff --git a/src/utils/linalg_dense_sparse_matmuls.c b/src/utils/linalg_dense_sparse_matmuls.c index 9eb025a..dd4a700 100644 --- a/src/utils/linalg_dense_sparse_matmuls.c +++ b/src/utils/linalg_dense_sparse_matmuls.c @@ -35,9 +35,9 @@ CSC_matrix *I_kron_A_alloc(const matrix *A, const CSC_matrix *J, int p) int n = A->n; int i, j, jj, block, block_start, block_end, block_jj_start, row_offset; - /* Ci grows via iVec_append, so this is only a capacity hint: seed from the source's - true nnz, never the dense product J->n * m (which overflows int for large operands - and over-allocates hugely for a sparse result). */ + /* Ci grows via iVec_append, so this is only a capacity hint: seed from the + source's true nnz, never the dense product J->n * m (which overflows int for + large operands and over-allocates hugely for a sparse result). */ int *Cp = (int *) sp_malloc((J->n + 1) * sizeof(int)); iVec *Ci = iVec_new(J->nnz > 0 ? J->nnz : 1); Cp[0] = 0; diff --git a/tests/utils/test_alloc_overflow.h b/tests/utils/test_alloc_overflow.h index 8c4005d..5e783ee 100644 --- a/tests/utils/test_alloc_overflow.h +++ b/tests/utils/test_alloc_overflow.h @@ -41,10 +41,10 @@ const char *test_sat_mul_int_clamps_on_overflow(void) } /* sparse_index_alloc sized its allocation as MIN(Jx->nnz, n_idxs * self->n). For a - large jacobian the dense product n_idxs * self->n overflows int (e.g. a transpose of - a 250000-variable matrix), wrapping negative so MIN selected it -> calloc overflow -> - SIGSEGV. This builds an index op whose product overflows and checks the result is a - valid CSR with the true (subset) nnz. */ + large jacobian the dense product n_idxs * self->n overflows int (e.g. a transpose + of a 250000-variable matrix), wrapping negative so MIN selected it -> calloc + overflow -> SIGSEGV. This builds an index op whose product overflows and checks + the result is a valid CSR with the true (subset) nnz. */ const char *test_sparse_index_alloc_no_int_overflow(void) { /* n_idxs * self->n = 25000 * 100000 = 2.5e9 overflows int32. */ From 0664e552f4faace581b296986e56588bdefef577 Mon Sep 17 00:00:00 2001 From: dance858 Date: Wed, 3 Jun 2026 11:18:01 -0700 Subject: [PATCH 3/3] very minor changes --- src/utils/linalg_dense_sparse_matmuls.c | 4 +--- src/utils/linalg_sparse_matmuls.c | 6 ++---- src/utils/sparse_matrix.c | 4 ++-- 3 files changed, 5 insertions(+), 9 deletions(-) diff --git a/src/utils/linalg_dense_sparse_matmuls.c b/src/utils/linalg_dense_sparse_matmuls.c index dd4a700..f3da7ad 100644 --- a/src/utils/linalg_dense_sparse_matmuls.c +++ b/src/utils/linalg_dense_sparse_matmuls.c @@ -35,9 +35,7 @@ CSC_matrix *I_kron_A_alloc(const matrix *A, const CSC_matrix *J, int p) int n = A->n; int i, j, jj, block, block_start, block_end, block_jj_start, row_offset; - /* Ci grows via iVec_append, so this is only a capacity hint: seed from the - source's true nnz, never the dense product J->n * m (which overflows int for - large operands and over-allocates hugely for a sparse result). */ + /* capacity hint based on J->nnz and not on the dense product */ int *Cp = (int *) sp_malloc((J->n + 1) * sizeof(int)); iVec *Ci = iVec_new(J->nnz > 0 ? J->nnz : 1); Cp[0] = 0; diff --git a/src/utils/linalg_sparse_matmuls.c b/src/utils/linalg_sparse_matmuls.c index c6c7f6a..c029a86 100644 --- a/src/utils/linalg_sparse_matmuls.c +++ b/src/utils/linalg_sparse_matmuls.c @@ -95,10 +95,8 @@ CSC_matrix *block_left_multiply_fill_sparsity(const CSR_matrix *A, int j, jj, block, block_start, block_end, block_jj_start, block_jj_end, row_offset; - /* allocate column pointers and an estimate of row indices. Ci grows via - iVec_append, so this is only a capacity hint: seed from the source's true nnz, - never the dense product J->n * m (which overflows int for large operands and - over-allocates hugely for a sparse result). */ + /* Allocate column pointers and an estimate of row indices. Capacity hint + based on J->nnz and not on the dense product */ int *Cp = (int *) sp_malloc((J->n + 1) * sizeof(int)); iVec *Ci = iVec_new(J->nnz > 0 ? J->nnz : 1); Cp[0] = 0; diff --git a/src/utils/sparse_matrix.c b/src/utils/sparse_matrix.c index f6ae640..d55007a 100644 --- a/src/utils/sparse_matrix.c +++ b/src/utils/sparse_matrix.c @@ -124,8 +124,8 @@ static void sparse_transpose_fill_values(const matrix *self, matrix *out) static matrix *sparse_index_alloc(matrix *self, const int *indices, int n_idxs) { CSR_matrix *Jx = ((sparse_matrix *) self)->csr; - CSR_matrix *J = - new_CSR_matrix(n_idxs, self->n, MIN(Jx->nnz, sat_mul_int(n_idxs, self->n))); + int alloc_ub = sat_mul_int(n_idxs, self->n); + CSR_matrix *J = new_CSR_matrix(n_idxs, self->n, MIN(Jx->nnz, alloc_ub)); J->p[0] = 0; for (int i = 0; i < n_idxs; i++)