diff --git a/include/utils/utils.h b/include/utils/utils.h index fbdcca6..c2a4b97 100644 --- a/include/utils/utils.h +++ b/include/utils/utils.h @@ -19,6 +19,7 @@ #define UTILS_H #include "iVec.h" +#include #include #ifndef MAX @@ -28,6 +29,17 @@ #define MIN(a, b) ((a) < (b) ? (a) : (b)) #endif +/* Dense cell count a*b for allocation upper bounds, clamped to INT_MAX on overflow. + Used as MIN(loose_nnz_bound, sat_mul_int(rows, cols)): a real nnz bound is always + < INT_MAX, so on overflow the MIN correctly falls back to the loose bound instead + of selecting a wrapped (negative or small-positive) product. */ +static inline int sat_mul_int(int a, int b) +{ + if (a <= 0 || b <= 0) return 0; + if (a > INT_MAX / b) return INT_MAX; + return a * b; +} + /* Sort an array of integers in ascending order */ void sort_int_array(int *array, int size); diff --git a/src/atoms/affine/hstack.c b/src/atoms/affine/hstack.c index a139fa6..a0e0a59 100644 --- a/src/atoms/affine/hstack.c +++ b/src/atoms/affine/hstack.c @@ -116,7 +116,7 @@ static void wsum_hess_init_impl(expr *node) /* worst-case scenario the nnz of node->wsum_hess is the sum of children's nnz, capped by the output cell count */ - int nnz_ub = MIN(nnz, node->n_vars * node->n_vars); + int nnz_ub = MIN(nnz, sat_mul_int(node->n_vars, node->n_vars)); CSR_matrix *H = new_CSR_matrix(node->n_vars, node->n_vars, nnz_ub); hnode->CSR_work = new_CSR_matrix(node->n_vars, node->n_vars, nnz_ub); diff --git a/src/atoms/bivariate_full_dom/matmul.c b/src/atoms/bivariate_full_dom/matmul.c index 7ab72f3..769e6a9 100644 --- a/src/atoms/bivariate_full_dom/matmul.c +++ b/src/atoms/bivariate_full_dom/matmul.c @@ -241,7 +241,7 @@ static void jacobian_init_chain_rule(expr *node) mnode->term1_CSR = YT_kron_I_alloc(m, k, n, f->work->jacobian_csc); mnode->term2_CSR = I_kron_X_alloc(m, k, n, g->work->jacobian_csc); int max_nnz = mnode->term1_CSR->nnz + mnode->term2_CSR->nnz; - max_nnz = MIN(max_nnz, node->size * node->n_vars); + max_nnz = MIN(max_nnz, sat_mul_int(node->size, node->n_vars)); CSR_matrix *jac = new_CSR_matrix(node->size, node->n_vars, max_nnz); sum_csr_alloc(mnode->term1_CSR, mnode->term2_CSR, jac); node->jacobian = new_sparse_matrix(jac); diff --git a/src/problem.c b/src/problem.c index 4afb1b3..09cc14d 100644 --- a/src/problem.c +++ b/src/problem.c @@ -256,7 +256,7 @@ void problem_init_hessian(problem *prob) nnz += prob->constraints[i]->wsum_hess->nnz; } - int hess_nnz_ub = MIN(nnz, prob->n_vars * prob->n_vars); + int hess_nnz_ub = MIN(nnz, sat_mul_int(prob->n_vars, prob->n_vars)); prob->lagrange_hessian = new_CSR_matrix(prob->n_vars, prob->n_vars, hess_nnz_ub); /* affine shortcut */ diff --git a/src/utils/CSR_sum.c b/src/utils/CSR_sum.c index 7249acb..19f99a3 100644 --- a/src/utils/CSR_sum.c +++ b/src/utils/CSR_sum.c @@ -367,7 +367,7 @@ CSR_matrix *sum_4_csr_alloc(const CSR_matrix *A, const CSR_matrix *B, const CSR_matrix *inputs[4] = {A, B, C, D}; int m = A->m; int n = A->n; - int nnz_ub = MIN(A->nnz + B->nnz + C->nnz + D->nnz, m * n); + int nnz_ub = MIN(A->nnz + B->nnz + C->nnz + D->nnz, sat_mul_int(m, n)); /* allocate output and index maps */ CSR_matrix *out = new_CSR_matrix(m, n, nnz_ub); diff --git a/src/utils/linalg_dense_sparse_matmuls.c b/src/utils/linalg_dense_sparse_matmuls.c index 6211082..f3da7ad 100644 --- a/src/utils/linalg_dense_sparse_matmuls.c +++ b/src/utils/linalg_dense_sparse_matmuls.c @@ -35,8 +35,9 @@ CSC_matrix *I_kron_A_alloc(const matrix *A, const CSC_matrix *J, int p) int n = A->n; int i, j, jj, block, block_start, block_end, block_jj_start, row_offset; + /* capacity hint based on J->nnz and not on the dense product */ int *Cp = (int *) sp_malloc((J->n + 1) * sizeof(int)); - iVec *Ci = iVec_new(J->n * m); + iVec *Ci = iVec_new(J->nnz > 0 ? J->nnz : 1); Cp[0] = 0; /* for each column of J */ diff --git a/src/utils/linalg_sparse_matmuls.c b/src/utils/linalg_sparse_matmuls.c index f208022..c029a86 100644 --- a/src/utils/linalg_sparse_matmuls.c +++ b/src/utils/linalg_sparse_matmuls.c @@ -95,9 +95,10 @@ CSC_matrix *block_left_multiply_fill_sparsity(const CSR_matrix *A, int j, jj, block, block_start, block_end, block_jj_start, block_jj_end, row_offset; - /* allocate column pointers and an estimate of row indices */ + /* Allocate column pointers and an estimate of row indices. Capacity hint + based on J->nnz and not on the dense product */ int *Cp = (int *) sp_malloc((J->n + 1) * sizeof(int)); - iVec *Ci = iVec_new(J->n * m); + iVec *Ci = iVec_new(J->nnz > 0 ? J->nnz : 1); Cp[0] = 0; /* for each column of J */ diff --git a/src/utils/sparse_matrix.c b/src/utils/sparse_matrix.c index 22c3fc9..d55007a 100644 --- a/src/utils/sparse_matrix.c +++ b/src/utils/sparse_matrix.c @@ -124,7 +124,8 @@ static void sparse_transpose_fill_values(const matrix *self, matrix *out) static matrix *sparse_index_alloc(matrix *self, const int *indices, int n_idxs) { CSR_matrix *Jx = ((sparse_matrix *) self)->csr; - CSR_matrix *J = new_CSR_matrix(n_idxs, self->n, MIN(Jx->nnz, n_idxs * self->n)); + int alloc_ub = sat_mul_int(n_idxs, self->n); + CSR_matrix *J = new_CSR_matrix(n_idxs, self->n, MIN(Jx->nnz, alloc_ub)); J->p[0] = 0; for (int i = 0; i < n_idxs; i++) @@ -329,7 +330,7 @@ static matrix *sparse_sum_row_partition_alloc(matrix *self, int axis, int d1, { m = d1; } - int max_nnz = MIN(A->nnz, m * A->n); + int max_nnz = MIN(A->nnz, sat_mul_int(m, A->n)); CSR_matrix *out = new_CSR_matrix(m, A->n, max_nnz); int *iwork = (int *) sp_malloc(MAX(A->n, A->nnz) * sizeof(int)); diff --git a/src/utils/stacked_pd.c b/src/utils/stacked_pd.c index 5ac5a3c..c55f65a 100644 --- a/src/utils/stacked_pd.c +++ b/src/utils/stacked_pd.c @@ -447,7 +447,7 @@ static matrix *stacked_pd_vtable_sum_row_partition_alloc(matrix *self, int axis, /* axis == 0 or 1: CSR fallback */ CSR_matrix *A = self->to_csr(self); int m_out = (axis == 0) ? A->m / d1 : d1; - int max_out_nnz = MIN(A->nnz, m_out * A->n); + int max_out_nnz = MIN(A->nnz, sat_mul_int(m_out, A->n)); CSR_matrix *out = new_CSR_matrix(m_out, A->n, max_out_nnz); int *iwork = (int *) sp_malloc(MAX(A->n, A->nnz) * sizeof(int)); diff --git a/tests/all_tests.c b/tests/all_tests.c index e571683..b680d0f 100644 --- a/tests/all_tests.c +++ b/tests/all_tests.c @@ -60,6 +60,7 @@ #include "problem/test_param_prob.h" #include "problem/test_problem.h" #include "utils/test_COO_matrix.h" +#include "utils/test_alloc_overflow.h" #include "utils/test_cblas.h" #include "utils/test_csc_matrix.h" #include "utils/test_csr_csc_conversion.h" @@ -346,6 +347,8 @@ int main(void) mu_run_test(test_wsum_hess_matmul_X_X, tests_run); printf("\n--- Utility Tests ---\n"); + mu_run_test(test_sat_mul_int_clamps_on_overflow, tests_run); + mu_run_test(test_sparse_index_alloc_no_int_overflow, tests_run); mu_run_test(test_cblas_ddot, tests_run); mu_run_test(test_diag_csr_mult, tests_run); mu_run_test(test_csr_sum, tests_run); diff --git a/tests/utils/test_alloc_overflow.h b/tests/utils/test_alloc_overflow.h new file mode 100644 index 0000000..5e783ee --- /dev/null +++ b/tests/utils/test_alloc_overflow.h @@ -0,0 +1,82 @@ +/* + * Copyright 2026 Daniel Cederberg and William Zhang + * + * This file is part of the SparseDiffEngine project. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef TEST_ALLOC_OVERFLOW_H +#define TEST_ALLOC_OVERFLOW_H + +#include "minunit.h" +#include "utils/CSR_matrix.h" +#include "utils/matrix.h" +#include "utils/sparse_matrix.h" +#include "utils/utils.h" +#include +#include +#include + +/* sat_mul_int returns the exact product when it fits and clamps to INT_MAX on + overflow (never wraps to a negative or small-positive value). */ +const char *test_sat_mul_int_clamps_on_overflow(void) +{ + mu_assert("exact product", sat_mul_int(40000, 40000) == 1600000000); + mu_assert("zero operand", sat_mul_int(0, 12345) == 0); + mu_assert("negative operand", sat_mul_int(-1, 12345) == 0); + /* 250000 * 250000 = 6.25e10 overflows int32; must clamp, not wrap negative. */ + mu_assert("overflow clamps to INT_MAX", sat_mul_int(250000, 250000) == INT_MAX); + mu_assert("boundary still positive", sat_mul_int(46341, 46341) == INT_MAX); + return 0; +} + +/* sparse_index_alloc sized its allocation as MIN(Jx->nnz, n_idxs * self->n). For a + large jacobian the dense product n_idxs * self->n overflows int (e.g. a transpose + of a 250000-variable matrix), wrapping negative so MIN selected it -> calloc + overflow -> SIGSEGV. This builds an index op whose product overflows and checks + the result is a valid CSR with the true (subset) nnz. */ +const char *test_sparse_index_alloc_no_int_overflow(void) +{ + /* n_idxs * self->n = 25000 * 100000 = 2.5e9 overflows int32. */ + const int n_idxs = 25000; + const int n_cols = 100000; + + /* identity-like CSR: n_idxs rows, one nonzero per row */ + CSR_matrix *Jx = new_CSR_matrix(n_idxs, n_cols, n_idxs); + for (int i = 0; i < n_idxs; i++) + { + Jx->p[i] = i; + Jx->i[i] = i; + Jx->x[i] = 1.0; + } + Jx->p[n_idxs] = n_idxs; + + matrix *mat = new_sparse_matrix(Jx); + + int *indices = (int *) malloc(n_idxs * sizeof(int)); + for (int i = 0; i < n_idxs; i++) indices[i] = i; + + /* pre-fix: crashes here in new_CSR_matrix(... negative nnz) */ + matrix *out = mat->index_alloc(mat, indices, n_idxs); + CSR_matrix *out_csr = out->to_csr(out); + + mu_assert("nnz must equal selected-row total", out_csr->nnz == n_idxs); + mu_assert("nnz must be non-negative", out_csr->nnz >= 0); + + free(indices); + free_matrix(out); + free_matrix(mat); + return 0; +} + +#endif /* TEST_ALLOC_OVERFLOW_H */