From 5125ef752fa8974ead08ed712c38c28c2cdd4da9 Mon Sep 17 00:00:00 2001 From: Transurgeon Date: Mon, 1 Jun 2026 22:37:17 -0400 Subject: [PATCH 1/9] initial attempt at dense quadform --- include/atoms/non_elementwise_full_dom.h | 13 ++ include/subexpr.h | 17 +- src/atoms/affine/left_matmul.c | 7 +- src/atoms/other/quad_form.c | 160 ++++++++++++++++++- src/utils/permuted_dense.c | 4 + tests/all_tests.c | 2 + tests/wsum_hess/other/test_quad_form_dense.h | 58 +++++++ 7 files changed, 257 insertions(+), 4 deletions(-) create mode 100644 tests/wsum_hess/other/test_quad_form_dense.h diff --git a/include/atoms/non_elementwise_full_dom.h b/include/atoms/non_elementwise_full_dom.h index 65c6070..1129ab4 100644 --- a/include/atoms/non_elementwise_full_dom.h +++ b/include/atoms/non_elementwise_full_dom.h @@ -24,6 +24,19 @@ expr *new_quad_form(expr *child, CSR_matrix *Q); +/* Dense / parametric quadratic form y = x' P x over a leaf variable x. + * + * P is n x n, row-major, and assumed symmetric (matching the new_quad_form + * convention where the Hessian of x'Qx is taken to be 2Q). The Hessian is + * materialized as a dense permuted_dense block. + * + * - constant P: P_data points to n*n doubles, param_source == NULL. + * - parametric P: P_data == NULL, param_source is the parameter node that + * supplies P (n*n doubles) and is refreshed each solve. + */ +expr *new_quad_form_dense(expr *child, int n, const double *P_data, + expr *param_source); + /* product of all entries, without axis argument */ expr *new_prod(expr *child); diff --git a/include/subexpr.h b/include/subexpr.h index 0dd6c8e..9ecbd3d 100644 --- a/include/subexpr.h +++ b/include/subexpr.h @@ -54,12 +54,27 @@ typedef struct power_expr double p; } power_expr; -/* Quadratic form: y = x'*Q*x */ +/* Quadratic form: y = x'*Q*x. + * + * Two storage paths share this struct: + * - sparse: Q (CSR) is set, P_dense == NULL. The original code path. + * - dense: P_dense (n x n, row-major, symmetric) is set, Q == NULL. Used for a + * dense or parametric quadratic matrix over a leaf variable; the Hessian + * is materialized as a permuted_dense block (2*P) instead of a sparse CSR. + */ typedef struct quad_form_expr { expr base; CSR_matrix *Q; CSC_matrix *QJf; /* Q * J_f in CSC_matrix (for chain rule hessian) */ + + /* dense path (NULL on the sparse path) */ + matrix *P; /* n x n permuted_dense, row-major, symmetric */ + int n; /* = left->size (dense path only) */ + + /* parametric dense Q: param_source feeds P_dense each solve (NULL otherwise) */ + expr *param_source; + void (*refresh_param_values)(struct quad_form_expr *); } quad_form_expr; /* Sum reduction along an axis */ diff --git a/src/atoms/affine/left_matmul.c b/src/atoms/affine/left_matmul.c index edd36c3..15991c5 100644 --- a/src/atoms/affine/left_matmul.c +++ b/src/atoms/affine/left_matmul.c @@ -33,7 +33,8 @@ Here, f(x) can be a vector-valued expression and a matrix-valued expression. The dimensions are A - m x n, f(x) - n x p, y - m x p. Note that here A does not have global column indices but it is a local matrix. - This is an important distinction compared to linear_op_expr. + This is an important distinction compared to linear_op_expr. # Should we remove this + comment and all other mentions of linear_op_expr since it won't be used anymore? * To compute the forward pass: vec(y) = A_kron @ vec(f(x)), where A_kron = I_p kron A is a Kronecker product of size (m*p) x (n*p), @@ -42,6 +43,9 @@ only conceptually. This led to a 100x speedup in the initialization of the Jacobian sparsity pattern. + This comment above now uses the permuted dense abstraction. + I think we can maybe update it? + * To compute the Jacobian: J_y = A_kron @ J_f(x), where J_f(x) is the Jacobian of f(x) of size (n*p) x n_vars. @@ -189,6 +193,7 @@ static void eval_jacobian_sparse(expr *node) lnode->csc_to_csr_work); } +/* question, why doesn't the hessian have different methods for sparse vs. dense? */ static void wsum_hess_init_impl(expr *node) { /* initialize child's hessian */ diff --git a/src/atoms/other/quad_form.c b/src/atoms/other/quad_form.c index d02e52c..4e4c5aa 100644 --- a/src/atoms/other/quad_form.c +++ b/src/atoms/other/quad_form.c @@ -20,6 +20,7 @@ #include "utils/CSC_matrix.h" #include "utils/cblas_wrapper.h" #include "utils/matrix_sum.h" +#include "utils/permuted_dense.h" #include "utils/sparse_matrix.h" #include "utils/tracked_alloc.h" #include @@ -28,6 +29,15 @@ #include #include +/* This atom implements the quadratic form y = x' Q x. There are two storage + paths, chosen at construction time (mirroring left_matmul's sparse/dense + split): the sparse path stores Q as a CSR_matrix; the dense path stores Q as + an n x n permuted_dense P (optionally fed by a parameter) and materializes the + Hessian as a dense permuted_dense block. Both paths assume Q symmetric, so the + Hessian of x'Qx is taken to be 2Q. */ + +/* ===================== sparse path (Q is a CSR_matrix) ===================== */ + static void forward(expr *node, const double *u) { expr *x = node->left; @@ -233,16 +243,107 @@ static void eval_wsum_hess(expr *node, const double *w) } } +/* ============== dense path (P is an n x n permuted_dense block) ============= */ + +/* Copy the latest parameter value into P's dense buffer. P is symmetric, so the + column-major layout Python sends equals the row-major layout we store. */ +static void refresh_dense_P(quad_form_expr *qnode) +{ + memcpy(qnode->P->x, qnode->param_source->value, + (size_t) qnode->n * qnode->n * sizeof(double)); +} + +/* Refresh wrapper mirroring left_matmul: runs the callback once per solve. */ +static void refresh_param_values_qf(quad_form_expr *qnode) +{ + if (qnode->param_source == NULL || !qnode->base.needs_parameter_refresh) + { + return; + } + qnode->base.needs_parameter_refresh = false; + qnode->refresh_param_values(qnode); +} + +static void forward_dense(expr *node, const double *u) +{ + quad_form_expr *qnode = (quad_form_expr *) node; + expr *x = node->left; + + /* refresh P from the parameter if needed (mirrors left_matmul forward) */ + if (qnode->param_source != NULL && node->needs_parameter_refresh) + { + qnode->param_source->forward(qnode->param_source, NULL); + } + refresh_param_values_qf(qnode); + + /* child's forward pass */ + x->forward(x, u); + + /* dwork = P @ x; value = x' (P x) */ + matrix *P = qnode->P; + P->block_left_mult_vec(P, x->value, node->work->dwork, 1); + node->value[0] = cblas_ddot(qnode->n, x->value, 1, node->work->dwork, 1); +} + +static void eval_jacobian_dense(expr *node) +{ + quad_form_expr *qnode = (quad_form_expr *) node; + expr *x = node->left; + CSR_matrix *jac = node->jacobian->to_csr(node->jacobian); + + /* jacobian = 2 * (P @ x)^T (leaf x: sparsity is the variable block) */ + matrix *P = qnode->P; + P->block_left_mult_vec(P, x->value, jac->x, 1); + cblas_dscal(qnode->n, 2.0, jac->x, 1); +} + +static void wsum_hess_init_dense(expr *node) +{ + quad_form_expr *qnode = (quad_form_expr *) node; + expr *x = node->left; + int n = qnode->n; + + /* Hessian is the dense block 2P over x's contiguous variable range. */ + int *perm = (int *) sp_malloc(n * sizeof(int)); + for (int i = 0; i < n; i++) + { + perm[i] = x->var_id + i; + } + node->wsum_hess = + new_permuted_dense(node->n_vars, node->n_vars, n, n, perm, perm, NULL); + sp_free(perm); +} + +static void eval_wsum_hess_dense(expr *node, const double *w) +{ + quad_form_expr *qnode = (quad_form_expr *) node; + int nn = qnode->n * qnode->n; + + /* Hessian = 2 w P (P symmetric, constant up to the weight). The PD's value + buffer (->x) aliases its dense block, so writing it updates to_csr too. */ + memcpy(node->wsum_hess->x, qnode->P->x, nn * sizeof(double)); + cblas_dscal(nn, 2.0 * w[0], node->wsum_hess->x, 1); +} + +/* ============================= shared / ctors ============================== */ + static void free_type_data(expr *node) { quad_form_expr *qnode = (quad_form_expr *) node; - free_CSR_matrix(qnode->Q); - qnode->Q = NULL; + if (qnode->Q != NULL) + { + free_CSR_matrix(qnode->Q); + qnode->Q = NULL; + } if (qnode->QJf != NULL) { free_CSC_matrix(qnode->QJf); qnode->QJf = NULL; } + free_matrix(qnode->P); + qnode->P = NULL; + free_expr(qnode->param_source); + qnode->param_source = NULL; } static bool is_affine(const expr *node) @@ -271,3 +372,58 @@ expr *new_quad_form(expr *left, CSR_matrix *Q) node->work->dwork = (double *) sp_malloc(left->size * sizeof(double)); return node; } + +expr *new_quad_form_dense(expr *child, int n, const double *P_data, + expr *param_source) +{ + assert(child->d1 == 1 || child->d2 == 1); /* child must be a vector */ + assert(child->size == n); + /* Dense path supports a leaf variable only (the only case cvxpy emits). */ + if (child->var_id == NOT_A_VARIABLE) + { + fprintf(stderr, + "Error in new_quad_form_dense: dense path requires a leaf " + "variable child\n"); + exit(1); + } + + quad_form_expr *qnode = (quad_form_expr *) sp_calloc(1, sizeof(quad_form_expr)); + expr *node = &qnode->base; + + init_expr(node, 1, 1, child->n_vars, forward_dense, jacobian_init_impl, + eval_jacobian_dense, is_affine, wsum_hess_init_dense, + eval_wsum_hess_dense, free_type_data); + node->left = child; + expr_retain(child); + + qnode->n = n; + /* dwork stores P @ x in the forward pass */ + node->work->dwork = (double *) sp_malloc(n * sizeof(double)); + + qnode->param_source = param_source; + if (param_source != NULL) + { + if (P_data != NULL) + { + fprintf(stderr, "Error in new_quad_form_dense: param and data both " + "set\n"); + exit(1); + } + expr_retain(param_source); + qnode->refresh_param_values = refresh_dense_P; + /* P buffer is filled by refresh_dense_P on the first forward pass. */ + qnode->P = new_permuted_dense_full(n, n, NULL); + node->needs_parameter_refresh = true; + } + else + { + if (P_data == NULL) + { + fprintf(stderr, "Error in new_quad_form_dense: need P data\n"); + exit(1); + } + qnode->P = new_permuted_dense_full(n, n, P_data); + } + + return node; +} diff --git a/src/utils/permuted_dense.c b/src/utils/permuted_dense.c index ed7dca5..84c1861 100644 --- a/src/utils/permuted_dense.c +++ b/src/utils/permuted_dense.c @@ -157,6 +157,8 @@ matrix *promote_pd_alloc(const permuted_dense *A, int size) { assert(A->m0 <= 1); + /* does this actually happen? if so, when? + actually, what even is m0 here, we should have more documentation */ if (A->m0 == 0) { /* source row is all-zero; output is also structurally all-zero. */ @@ -164,6 +166,8 @@ matrix *promote_pd_alloc(const permuted_dense *A, int size) NULL); } + /* does it make sense to use the row_permutation of the original pd matrix A? + it seems like the broadcast atom does that. */ int *new_row_perm = (int *) sp_malloc(size * sizeof(int)); for (int i = 0; i < size; i++) { diff --git a/tests/all_tests.c b/tests/all_tests.c index e571683..ae54b6e 100644 --- a/tests/all_tests.c +++ b/tests/all_tests.c @@ -103,6 +103,7 @@ #include "wsum_hess/other/test_prod_axis_one.h" #include "wsum_hess/other/test_prod_axis_zero.h" #include "wsum_hess/other/test_quad_form.h" +#include "wsum_hess/other/test_quad_form_dense.h" #endif /* PROFILE_ONLY */ #ifdef PROFILE_ONLY @@ -293,6 +294,7 @@ int main(void) mu_run_test(test_wsum_hess_quad_over_lin_xy, tests_run); mu_run_test(test_wsum_hess_quad_over_lin_yx, tests_run); mu_run_test(test_wsum_hess_quad_form, tests_run); + mu_run_test(test_wsum_hess_quad_form_dense, tests_run); mu_run_test(test_wsum_hess_scalar_mult_log_vector, tests_run); mu_run_test(test_wsum_hess_scalar_mult_log_matrix, tests_run); mu_run_test(test_wsum_hess_vector_mult_log_vector, tests_run); diff --git a/tests/wsum_hess/other/test_quad_form_dense.h b/tests/wsum_hess/other/test_quad_form_dense.h new file mode 100644 index 0000000..920d5f6 --- /dev/null +++ b/tests/wsum_hess/other/test_quad_form_dense.h @@ -0,0 +1,58 @@ +#include "atoms/affine.h" +#include "atoms/non_elementwise_full_dom.h" +#include "expr.h" +#include "minunit.h" +#include "test_helpers.h" +#include +#include + +/* Dense path of quad_form: y = x' P x with a dense (permuted_dense) P. + * x is 3x1 with global index 2, total variables = 5. + * P = [1 2 0; 2 3 0; 0 0 4] (symmetric), x = [1, 2, 3]. + * value = x' P x = 57 + * gradient = 2 P x = [10, 16, 24] on columns {2, 3, 4} + * Hessian = 2 w P (full dense block over rows/cols {2, 3, 4}) + */ +const char *test_wsum_hess_quad_form_dense(void) +{ + double u_vals[5] = {0.0, 0.0, 1.0, 2.0, 3.0}; + double w = 2.0; + + /* row-major 3x3 dense P */ + double P[9] = {1.0, 2.0, 0.0, 2.0, 3.0, 0.0, 0.0, 0.0, 4.0}; + + expr *x = new_variable(3, 1, 2, 5); + expr *node = new_quad_form_dense(x, 3, P, NULL); + + jacobian_init(node); + node->forward(node, u_vals); + node->eval_jacobian(node); + wsum_hess_init(node); + node->eval_wsum_hess(node, &w); + + /* forward value */ + mu_assert("dense quad_form value fail", fabs(node->value[0] - 57.0) < 1e-9); + + /* gradient = 2 P x on columns {2,3,4} */ + double expected_grad[3] = {10.0, 16.0, 24.0}; + int expected_jp[2] = {0, 3}; + int expected_ji[3] = {2, 3, 4}; + mu_assert("dense quad_form jacobian vals fail", + cmp_values(node->jacobian, expected_grad, 3)); + mu_assert("dense quad_form jacobian sparsity fail", + cmp_sparsity(node->jacobian, expected_jp, expected_ji, 1, 3)); + + /* Hessian = 2 w P = 4 P as a dense block over rows/cols {2,3,4} */ + mu_assert("dense quad_form hessian is not permuted_dense", + node->wsum_hess->is_permuted_dense); + int expected_hp[6] = {0, 0, 0, 3, 6, 9}; + int expected_hi[9] = {2, 3, 4, 2, 3, 4, 2, 3, 4}; + double expected_hx[9] = {4.0, 8.0, 0.0, 8.0, 12.0, 0.0, 0.0, 0.0, 16.0}; + mu_assert("dense quad_form hessian sparsity fail", + cmp_sparsity(node->wsum_hess, expected_hp, expected_hi, 5, 9)); + mu_assert("dense quad_form hessian vals fail", + cmp_values(node->wsum_hess, expected_hx, 9)); + + free_expr(node); + return 0; +} From 388efe2278d5ec7fd5d2dfe0df3424b7d65e9292 Mon Sep 17 00:00:00 2001 From: Transurgeon Date: Tue, 2 Jun 2026 02:53:05 -0400 Subject: [PATCH 2/9] some cleanups to the matrix abstraction and run formatter --- include/subexpr.h | 20 +++------ src/atoms/other/quad_form.c | 82 +++++++++++++++++-------------------- 2 files changed, 43 insertions(+), 59 deletions(-) diff --git a/include/subexpr.h b/include/subexpr.h index 9ecbd3d..539bf8b 100644 --- a/include/subexpr.h +++ b/include/subexpr.h @@ -54,27 +54,17 @@ typedef struct power_expr double p; } power_expr; -/* Quadratic form: y = x'*Q*x. - * - * Two storage paths share this struct: - * - sparse: Q (CSR) is set, P_dense == NULL. The original code path. - * - dense: P_dense (n x n, row-major, symmetric) is set, Q == NULL. Used for a - * dense or parametric quadratic matrix over a leaf variable; the Hessian - * is materialized as a permuted_dense block (2*P) instead of a sparse CSR. - */ +/* Quadratic form: y = x'*Q*x. Q is a polymorphic matrix: a sparse (CSR) backend + on the sparse path, or a dense (permuted_dense) backend on the dense path. */ typedef struct quad_form_expr { expr base; - CSR_matrix *Q; + matrix *Q; CSC_matrix *QJf; /* Q * J_f in CSC_matrix (for chain rule hessian) */ + int n; /* = left->size (dense path only) */ - /* dense path (NULL on the sparse path) */ - matrix *P; /* n x n permuted_dense, row-major, symmetric */ - int n; /* = left->size (dense path only) */ - - /* parametric dense Q: param_source feeds P_dense each solve (NULL otherwise) */ + /* parametric dense path: param_source feeds Q each solve (NULL otherwise) */ expr *param_source; - void (*refresh_param_values)(struct quad_form_expr *); } quad_form_expr; /* Sum reduction along an axis */ diff --git a/src/atoms/other/quad_form.c b/src/atoms/other/quad_form.c index 4e4c5aa..6a8a824 100644 --- a/src/atoms/other/quad_form.c +++ b/src/atoms/other/quad_form.c @@ -29,12 +29,9 @@ #include #include -/* This atom implements the quadratic form y = x' Q x. There are two storage - paths, chosen at construction time (mirroring left_matmul's sparse/dense - split): the sparse path stores Q as a CSR_matrix; the dense path stores Q as - an n x n permuted_dense P (optionally fed by a parameter) and materializes the - Hessian as a dense permuted_dense block. Both paths assume Q symmetric, so the - Hessian of x'Qx is taken to be 2Q. */ +/* Quadratic form y = x'Qx. Sparse path: Q is a CSR matrix. Dense path: Q is an + n x n permuted_dense (optionally parameter-fed) over a leaf variable, with the + Hessian 2Q materialized as a dense block. Q is assumed symmetric. */ /* ===================== sparse path (Q is a CSR_matrix) ===================== */ @@ -46,7 +43,8 @@ static void forward(expr *node, const double *u) x->forward(x, u); /* local forward pass */ - CSR_matrix *Q = ((quad_form_expr *) node)->Q; + CSR_matrix *Q = + ((quad_form_expr *) node)->Q->to_csr(((quad_form_expr *) node)->Q); Ax_csr(Q, x->value, node->work->dwork, 0); node->value[0] = 0.0; @@ -101,7 +99,8 @@ static void jacobian_init_impl(expr *node) static void eval_jacobian(expr *node) { expr *x = node->left; - CSR_matrix *Q = ((quad_form_expr *) node)->Q; + CSR_matrix *Q = + ((quad_form_expr *) node)->Q->to_csr(((quad_form_expr *) node)->Q); CSR_matrix *jac = node->jacobian->to_csr(node->jacobian); if (x->var_id != NOT_A_VARIABLE) @@ -136,7 +135,8 @@ static void eval_jacobian(expr *node) static void wsum_hess_init_impl(expr *node) { - CSR_matrix *Q = ((quad_form_expr *) node)->Q; + CSR_matrix *Q = + ((quad_form_expr *) node)->Q->to_csr(((quad_form_expr *) node)->Q); expr *x = node->left; if (x->var_id != NOT_A_VARIABLE) @@ -193,7 +193,8 @@ static void wsum_hess_init_impl(expr *node) static void eval_wsum_hess(expr *node, const double *w) { - CSR_matrix *Q = ((quad_form_expr *) node)->Q; + CSR_matrix *Q = + ((quad_form_expr *) node)->Q->to_csr(((quad_form_expr *) node)->Q); expr *x = node->left; double two_w = 2.0 * w[0]; @@ -243,17 +244,16 @@ static void eval_wsum_hess(expr *node, const double *w) } } -/* ============== dense path (P is an n x n permuted_dense block) ============= */ +/* ============== dense path (Q is an n x n permuted_dense block) ============= */ -/* Copy the latest parameter value into P's dense buffer. P is symmetric, so the - column-major layout Python sends equals the row-major layout we store. */ -static void refresh_dense_P(quad_form_expr *qnode) +/* Copy the latest parameter value into Q (symmetric: column-major == row-major). */ +static void refresh_dense_Q(quad_form_expr *qnode) { - memcpy(qnode->P->x, qnode->param_source->value, + memcpy(qnode->Q->x, qnode->param_source->value, (size_t) qnode->n * qnode->n * sizeof(double)); } -/* Refresh wrapper mirroring left_matmul: runs the callback once per solve. */ +/* Refresh Q from the parameter once per solve. */ static void refresh_param_values_qf(quad_form_expr *qnode) { if (qnode->param_source == NULL || !qnode->base.needs_parameter_refresh) @@ -261,7 +261,7 @@ static void refresh_param_values_qf(quad_form_expr *qnode) return; } qnode->base.needs_parameter_refresh = false; - qnode->refresh_param_values(qnode); + refresh_dense_Q(qnode); } static void forward_dense(expr *node, const double *u) @@ -269,7 +269,7 @@ static void forward_dense(expr *node, const double *u) quad_form_expr *qnode = (quad_form_expr *) node; expr *x = node->left; - /* refresh P from the parameter if needed (mirrors left_matmul forward) */ + /* refresh Q from the parameter if needed */ if (qnode->param_source != NULL && node->needs_parameter_refresh) { qnode->param_source->forward(qnode->param_source, NULL); @@ -279,9 +279,9 @@ static void forward_dense(expr *node, const double *u) /* child's forward pass */ x->forward(x, u); - /* dwork = P @ x; value = x' (P x) */ - matrix *P = qnode->P; - P->block_left_mult_vec(P, x->value, node->work->dwork, 1); + /* dwork = Q @ x; value = x' (Q x) */ + matrix *Q = qnode->Q; + Q->block_left_mult_vec(Q, x->value, node->work->dwork, 1); node->value[0] = cblas_ddot(qnode->n, x->value, 1, node->work->dwork, 1); } @@ -291,9 +291,9 @@ static void eval_jacobian_dense(expr *node) expr *x = node->left; CSR_matrix *jac = node->jacobian->to_csr(node->jacobian); - /* jacobian = 2 * (P @ x)^T (leaf x: sparsity is the variable block) */ - matrix *P = qnode->P; - P->block_left_mult_vec(P, x->value, jac->x, 1); + /* jacobian = 2 * (Q @ x)^T (leaf x: sparsity is the variable block) */ + matrix *Q = qnode->Q; + Q->block_left_mult_vec(Q, x->value, jac->x, 1); cblas_dscal(qnode->n, 2.0, jac->x, 1); } @@ -319,9 +319,9 @@ static void eval_wsum_hess_dense(expr *node, const double *w) quad_form_expr *qnode = (quad_form_expr *) node; int nn = qnode->n * qnode->n; - /* Hessian = 2 w P (P symmetric, constant up to the weight). The PD's value + /* Hessian = 2 w Q (Q symmetric, constant up to the weight). The PD's value buffer (->x) aliases its dense block, so writing it updates to_csr too. */ - memcpy(node->wsum_hess->x, qnode->P->x, nn * sizeof(double)); + memcpy(node->wsum_hess->x, qnode->Q->x, nn * sizeof(double)); cblas_dscal(nn, 2.0 * w[0], node->wsum_hess->x, 1); } @@ -330,18 +330,13 @@ static void eval_wsum_hess_dense(expr *node, const double *w) static void free_type_data(expr *node) { quad_form_expr *qnode = (quad_form_expr *) node; - if (qnode->Q != NULL) - { - free_CSR_matrix(qnode->Q); - qnode->Q = NULL; - } + free_matrix(qnode->Q); + qnode->Q = NULL; if (qnode->QJf != NULL) { free_CSC_matrix(qnode->QJf); qnode->QJf = NULL; } - free_matrix(qnode->P); - qnode->P = NULL; free_expr(qnode->param_source); qnode->param_source = NULL; } @@ -364,9 +359,8 @@ expr *new_quad_form(expr *left, CSR_matrix *Q) node->left = left; expr_retain(left); - /* Set type-specific field */ - qnode->Q = new_CSR_matrix(Q->m, Q->n, Q->nnz); - copy_CSR_matrix(Q, qnode->Q); + /* Set type-specific field. new_sparse_matrix takes ownership, so clone. */ + qnode->Q = new_sparse_matrix(new_csr(Q)); /* dwork stores the result of Q @ f(x) in the forward pass */ node->work->dwork = (double *) sp_malloc(left->size * sizeof(double)); @@ -381,9 +375,8 @@ expr *new_quad_form_dense(expr *child, int n, const double *P_data, /* Dense path supports a leaf variable only (the only case cvxpy emits). */ if (child->var_id == NOT_A_VARIABLE) { - fprintf(stderr, - "Error in new_quad_form_dense: dense path requires a leaf " - "variable child\n"); + fprintf(stderr, "Error in new_quad_form_dense: dense path requires a leaf " + "variable child\n"); exit(1); } @@ -397,9 +390,11 @@ expr *new_quad_form_dense(expr *child, int n, const double *P_data, expr_retain(child); qnode->n = n; - /* dwork stores P @ x in the forward pass */ + /* dwork stores Q @ x in the forward pass */ node->work->dwork = (double *) sp_malloc(n * sizeof(double)); + /* Convention: exactly one of P_data (constant) or param_source (parametric) + is set. */ qnode->param_source = param_source; if (param_source != NULL) { @@ -410,9 +405,8 @@ expr *new_quad_form_dense(expr *child, int n, const double *P_data, exit(1); } expr_retain(param_source); - qnode->refresh_param_values = refresh_dense_P; - /* P buffer is filled by refresh_dense_P on the first forward pass. */ - qnode->P = new_permuted_dense_full(n, n, NULL); + /* Q is filled by refresh_dense_Q on the first forward pass. */ + qnode->Q = new_permuted_dense_full(n, n, NULL); node->needs_parameter_refresh = true; } else @@ -422,7 +416,7 @@ expr *new_quad_form_dense(expr *child, int n, const double *P_data, fprintf(stderr, "Error in new_quad_form_dense: need P data\n"); exit(1); } - qnode->P = new_permuted_dense_full(n, n, P_data); + qnode->Q = new_permuted_dense_full(n, n, P_data); } return node; From a8038071a46dd125f48d7fbaf585828da3347e63 Mon Sep 17 00:00:00 2001 From: Transurgeon Date: Tue, 2 Jun 2026 05:33:50 -0400 Subject: [PATCH 3/9] use more of the matrix abstraction --- include/subexpr.h | 2 +- src/atoms/affine/left_matmul.c | 7 +- src/atoms/other/quad_form.c | 125 +++++++----------- src/utils/permuted_dense.c | 4 - tests/all_tests.c | 2 + .../other/test_quad_form_dense_param.h | 81 ++++++++++++ 6 files changed, 133 insertions(+), 88 deletions(-) create mode 100644 tests/wsum_hess/other/test_quad_form_dense_param.h diff --git a/include/subexpr.h b/include/subexpr.h index 539bf8b..fe5bca3 100644 --- a/include/subexpr.h +++ b/include/subexpr.h @@ -61,7 +61,7 @@ typedef struct quad_form_expr expr base; matrix *Q; CSC_matrix *QJf; /* Q * J_f in CSC_matrix (for chain rule hessian) */ - int n; /* = left->size (dense path only) */ + int n; /* quadratic dimension = left->size */ /* parametric dense path: param_source feeds Q each solve (NULL otherwise) */ expr *param_source; diff --git a/src/atoms/affine/left_matmul.c b/src/atoms/affine/left_matmul.c index 15991c5..edd36c3 100644 --- a/src/atoms/affine/left_matmul.c +++ b/src/atoms/affine/left_matmul.c @@ -33,8 +33,7 @@ Here, f(x) can be a vector-valued expression and a matrix-valued expression. The dimensions are A - m x n, f(x) - n x p, y - m x p. Note that here A does not have global column indices but it is a local matrix. - This is an important distinction compared to linear_op_expr. # Should we remove this - comment and all other mentions of linear_op_expr since it won't be used anymore? + This is an important distinction compared to linear_op_expr. * To compute the forward pass: vec(y) = A_kron @ vec(f(x)), where A_kron = I_p kron A is a Kronecker product of size (m*p) x (n*p), @@ -43,9 +42,6 @@ only conceptually. This led to a 100x speedup in the initialization of the Jacobian sparsity pattern. - This comment above now uses the permuted dense abstraction. - I think we can maybe update it? - * To compute the Jacobian: J_y = A_kron @ J_f(x), where J_f(x) is the Jacobian of f(x) of size (n*p) x n_vars. @@ -193,7 +189,6 @@ static void eval_jacobian_sparse(expr *node) lnode->csc_to_csr_work); } -/* question, why doesn't the hessian have different methods for sparse vs. dense? */ static void wsum_hess_init_impl(expr *node) { /* initialize child's hessian */ diff --git a/src/atoms/other/quad_form.c b/src/atoms/other/quad_form.c index 6a8a824..d68439a 100644 --- a/src/atoms/other/quad_form.c +++ b/src/atoms/other/quad_form.c @@ -33,25 +33,45 @@ n x n permuted_dense (optionally parameter-fed) over a leaf variable, with the Hessian 2Q materialized as a dense block. Q is assumed symmetric. */ -/* ===================== sparse path (Q is a CSR_matrix) ===================== */ +/* ============ shared forward + jacobian (via the matrix vtable) ============ */ + +/* Copy the latest parameter value into Q (symmetric: column-major == row-major). */ +static void refresh_dense_Q(quad_form_expr *qnode) +{ + memcpy(qnode->Q->x, qnode->param_source->value, + (size_t) qnode->n * qnode->n * sizeof(double)); +} + +/* Refresh Q from the parameter once per solve (no-op when Q is constant). */ +static void refresh_param_values_qf(quad_form_expr *qnode) +{ + if (qnode->param_source == NULL || !qnode->base.needs_parameter_refresh) + { + return; + } + qnode->base.needs_parameter_refresh = false; + refresh_dense_Q(qnode); +} static void forward(expr *node, const double *u) { + quad_form_expr *qnode = (quad_form_expr *) node; expr *x = node->left; + /* refresh Q from the parameter if needed (no-op on the constant/sparse path) */ + if (qnode->param_source != NULL && node->needs_parameter_refresh) + { + qnode->param_source->forward(qnode->param_source, NULL); + } + refresh_param_values_qf(qnode); + /* child's forward pass */ x->forward(x, u); - /* local forward pass */ - CSR_matrix *Q = - ((quad_form_expr *) node)->Q->to_csr(((quad_form_expr *) node)->Q); - Ax_csr(Q, x->value, node->work->dwork, 0); - node->value[0] = 0.0; - - for (int i = 0; i < x->size; i++) - { - node->value[0] += x->value[i] * node->work->dwork[i]; - } + /* dwork = Q @ x; value = x' (Q x) */ + matrix *Q = qnode->Q; + Q->block_left_mult_vec(Q, x->value, node->work->dwork, 1); + node->value[0] = cblas_ddot(qnode->n, x->value, 1, node->work->dwork, 1); } static void jacobian_init_impl(expr *node) @@ -98,16 +118,16 @@ static void jacobian_init_impl(expr *node) static void eval_jacobian(expr *node) { + quad_form_expr *qnode = (quad_form_expr *) node; expr *x = node->left; - CSR_matrix *Q = - ((quad_form_expr *) node)->Q->to_csr(((quad_form_expr *) node)->Q); CSR_matrix *jac = node->jacobian->to_csr(node->jacobian); if (x->var_id != NOT_A_VARIABLE) { - /* jacobian = 2 * (Q @ x)^T */ - Ax_csr(Q, x->value, jac->x, 0); - cblas_dscal(x->size, 2.0, jac->x, 1); + /* jacobian = 2 * (Q @ x)^T (leaf x: sparsity is the variable block) */ + matrix *Q = qnode->Q; + Q->block_left_mult_vec(Q, x->value, jac->x, 1); + cblas_dscal(qnode->n, 2.0, jac->x, 1); } else { @@ -133,10 +153,13 @@ static void eval_jacobian(expr *node) } } +/* ===== hessian, sparse backend (raw CSR/CSC symmetric products; the non-leaf + chain rule J_f^T Q J_f has no matrix-vtable equivalent) ===================== */ + static void wsum_hess_init_impl(expr *node) { - CSR_matrix *Q = - ((quad_form_expr *) node)->Q->to_csr(((quad_form_expr *) node)->Q); + quad_form_expr *qnode = (quad_form_expr *) node; + CSR_matrix *Q = qnode->Q->to_csr(qnode->Q); expr *x = node->left; if (x->var_id != NOT_A_VARIABLE) @@ -170,7 +193,6 @@ static void wsum_hess_init_impl(expr *node) */ /* jacobian_csc_init(x) already called in jacobian_init */ - quad_form_expr *qnode = (quad_form_expr *) node; CSC_matrix *Jf = x->work->jacobian_csc; /* term1 = Jf^T W Jf = Jf^T B*/ @@ -193,8 +215,8 @@ static void wsum_hess_init_impl(expr *node) static void eval_wsum_hess(expr *node, const double *w) { - CSR_matrix *Q = - ((quad_form_expr *) node)->Q->to_csr(((quad_form_expr *) node)->Q); + quad_form_expr *qnode = (quad_form_expr *) node; + CSR_matrix *Q = qnode->Q->to_csr(qnode->Q); expr *x = node->left; double two_w = 2.0 * w[0]; @@ -220,7 +242,7 @@ static void eval_wsum_hess(expr *node, const double *w) } } - CSC_matrix *QJf = ((quad_form_expr *) node)->QJf; + CSC_matrix *QJf = qnode->QJf; CSR_matrix *term1 = node->work->hess_term1->to_csr(node->work->hess_term1); /* term1 = J_f^T Q J_f = J_f^T B */ @@ -244,58 +266,7 @@ static void eval_wsum_hess(expr *node, const double *w) } } -/* ============== dense path (Q is an n x n permuted_dense block) ============= */ - -/* Copy the latest parameter value into Q (symmetric: column-major == row-major). */ -static void refresh_dense_Q(quad_form_expr *qnode) -{ - memcpy(qnode->Q->x, qnode->param_source->value, - (size_t) qnode->n * qnode->n * sizeof(double)); -} - -/* Refresh Q from the parameter once per solve. */ -static void refresh_param_values_qf(quad_form_expr *qnode) -{ - if (qnode->param_source == NULL || !qnode->base.needs_parameter_refresh) - { - return; - } - qnode->base.needs_parameter_refresh = false; - refresh_dense_Q(qnode); -} - -static void forward_dense(expr *node, const double *u) -{ - quad_form_expr *qnode = (quad_form_expr *) node; - expr *x = node->left; - - /* refresh Q from the parameter if needed */ - if (qnode->param_source != NULL && node->needs_parameter_refresh) - { - qnode->param_source->forward(qnode->param_source, NULL); - } - refresh_param_values_qf(qnode); - - /* child's forward pass */ - x->forward(x, u); - - /* dwork = Q @ x; value = x' (Q x) */ - matrix *Q = qnode->Q; - Q->block_left_mult_vec(Q, x->value, node->work->dwork, 1); - node->value[0] = cblas_ddot(qnode->n, x->value, 1, node->work->dwork, 1); -} - -static void eval_jacobian_dense(expr *node) -{ - quad_form_expr *qnode = (quad_form_expr *) node; - expr *x = node->left; - CSR_matrix *jac = node->jacobian->to_csr(node->jacobian); - - /* jacobian = 2 * (Q @ x)^T (leaf x: sparsity is the variable block) */ - matrix *Q = qnode->Q; - Q->block_left_mult_vec(Q, x->value, jac->x, 1); - cblas_dscal(qnode->n, 2.0, jac->x, 1); -} +/* ============== hessian, dense backend (permuted_dense block) ============== */ static void wsum_hess_init_dense(expr *node) { @@ -361,6 +332,7 @@ expr *new_quad_form(expr *left, CSR_matrix *Q) /* Set type-specific field. new_sparse_matrix takes ownership, so clone. */ qnode->Q = new_sparse_matrix(new_csr(Q)); + qnode->n = left->size; /* quadratic dimension; used by the shared forward */ /* dwork stores the result of Q @ f(x) in the forward pass */ node->work->dwork = (double *) sp_malloc(left->size * sizeof(double)); @@ -383,9 +355,8 @@ expr *new_quad_form_dense(expr *child, int n, const double *P_data, quad_form_expr *qnode = (quad_form_expr *) sp_calloc(1, sizeof(quad_form_expr)); expr *node = &qnode->base; - init_expr(node, 1, 1, child->n_vars, forward_dense, jacobian_init_impl, - eval_jacobian_dense, is_affine, wsum_hess_init_dense, - eval_wsum_hess_dense, free_type_data); + init_expr(node, 1, 1, child->n_vars, forward, jacobian_init_impl, eval_jacobian, + is_affine, wsum_hess_init_dense, eval_wsum_hess_dense, free_type_data); node->left = child; expr_retain(child); diff --git a/src/utils/permuted_dense.c b/src/utils/permuted_dense.c index 84c1861..ed7dca5 100644 --- a/src/utils/permuted_dense.c +++ b/src/utils/permuted_dense.c @@ -157,8 +157,6 @@ matrix *promote_pd_alloc(const permuted_dense *A, int size) { assert(A->m0 <= 1); - /* does this actually happen? if so, when? - actually, what even is m0 here, we should have more documentation */ if (A->m0 == 0) { /* source row is all-zero; output is also structurally all-zero. */ @@ -166,8 +164,6 @@ matrix *promote_pd_alloc(const permuted_dense *A, int size) NULL); } - /* does it make sense to use the row_permutation of the original pd matrix A? - it seems like the broadcast atom does that. */ int *new_row_perm = (int *) sp_malloc(size * sizeof(int)); for (int i = 0; i < size; i++) { diff --git a/tests/all_tests.c b/tests/all_tests.c index ae54b6e..f01684c 100644 --- a/tests/all_tests.c +++ b/tests/all_tests.c @@ -104,6 +104,7 @@ #include "wsum_hess/other/test_prod_axis_zero.h" #include "wsum_hess/other/test_quad_form.h" #include "wsum_hess/other/test_quad_form_dense.h" +#include "wsum_hess/other/test_quad_form_dense_param.h" #endif /* PROFILE_ONLY */ #ifdef PROFILE_ONLY @@ -295,6 +296,7 @@ int main(void) mu_run_test(test_wsum_hess_quad_over_lin_yx, tests_run); mu_run_test(test_wsum_hess_quad_form, tests_run); mu_run_test(test_wsum_hess_quad_form_dense, tests_run); + mu_run_test(test_wsum_hess_quad_form_dense_param, tests_run); mu_run_test(test_wsum_hess_scalar_mult_log_vector, tests_run); mu_run_test(test_wsum_hess_scalar_mult_log_matrix, tests_run); mu_run_test(test_wsum_hess_vector_mult_log_vector, tests_run); diff --git a/tests/wsum_hess/other/test_quad_form_dense_param.h b/tests/wsum_hess/other/test_quad_form_dense_param.h new file mode 100644 index 0000000..d679c50 --- /dev/null +++ b/tests/wsum_hess/other/test_quad_form_dense_param.h @@ -0,0 +1,81 @@ +#include "atoms/affine.h" +#include "atoms/non_elementwise_full_dom.h" +#include "expr.h" +#include "minunit.h" +#include "test_helpers.h" +#include +#include + +/* Parametric dense path of quad_form: y = x' P x where P is fed by a parameter + * and refreshed each solve. Same setup as the constant test (x is 3x1 at global + * index 2, total variables = 5, x = [1, 2, 3]), then the parameter is updated + * and the derivatives are re-verified. + * + * P1 = [1 2 0; 2 3 0; 0 0 4]: value = 57, grad = [10,16,24], Hessian = 4 P1. + * P2 = [2 1 0; 1 4 0; 0 0 5]: value = 67, grad = [8,18,30], Hessian = 4 P2. + */ +const char *test_wsum_hess_quad_form_dense_param(void) +{ + double u_vals[5] = {0.0, 0.0, 1.0, 2.0, 3.0}; + double w = 2.0; + + /* row-major 3x3 dense P (symmetric, so column-major == row-major) */ + double P1[9] = {1.0, 2.0, 0.0, 2.0, 3.0, 0.0, 0.0, 0.0, 4.0}; + + expr *param_P = new_parameter(3, 3, 0, 5, P1); /* param_id 0 = updatable */ + expr *x = new_variable(3, 1, 2, 5); + expr *node = + new_quad_form_dense(x, 3, NULL, param_P); /* parametric: data NULL */ + + jacobian_init(node); + wsum_hess_init(node); + + node->forward(node, u_vals); + node->eval_jacobian(node); + node->eval_wsum_hess(node, &w); + + int expected_jp[2] = {0, 3}; + int expected_ji[3] = {2, 3, 4}; + int expected_hp[6] = {0, 0, 0, 3, 6, 9}; + int expected_hi[9] = {2, 3, 4, 2, 3, 4, 2, 3, 4}; + + /* --- parameter P1 --- */ + mu_assert("param quad_form value (P1) fail", fabs(node->value[0] - 57.0) < 1e-9); + double grad1[3] = {10.0, 16.0, 24.0}; + mu_assert("param quad_form jacobian vals (P1) fail", + cmp_values(node->jacobian, grad1, 3)); + mu_assert("param quad_form jacobian sparsity (P1) fail", + cmp_sparsity(node->jacobian, expected_jp, expected_ji, 1, 3)); + mu_assert("param quad_form hessian not permuted_dense", + node->wsum_hess->is_permuted_dense); + double hess1[9] = {4.0, 8.0, 0.0, 8.0, 12.0, 0.0, 0.0, 0.0, 16.0}; + mu_assert("param quad_form hessian vals (P1) fail", + cmp_values(node->wsum_hess, hess1, 9)); + mu_assert("param quad_form hessian sparsity (P1) fail", + cmp_sparsity(node->wsum_hess, expected_hp, expected_hi, 5, 9)); + + /* --- update the parameter and re-evaluate (sparsity is reused) --- */ + double P2[9] = {2.0, 1.0, 0.0, 1.0, 4.0, 0.0, 0.0, 0.0, 5.0}; + memcpy(param_P->value, P2, 9 * sizeof(double)); + expr_set_needs_refresh(node); + + node->forward(node, u_vals); + node->eval_jacobian(node); + node->eval_wsum_hess(node, &w); + + /* --- parameter P2 --- */ + mu_assert("param quad_form value (P2) fail", fabs(node->value[0] - 67.0) < 1e-9); + double grad2[3] = {8.0, 18.0, 30.0}; + mu_assert("param quad_form jacobian vals (P2) fail", + cmp_values(node->jacobian, grad2, 3)); + mu_assert("param quad_form jacobian sparsity (P2) fail", + cmp_sparsity(node->jacobian, expected_jp, expected_ji, 1, 3)); + double hess2[9] = {8.0, 4.0, 0.0, 4.0, 16.0, 0.0, 0.0, 0.0, 20.0}; + mu_assert("param quad_form hessian vals (P2) fail", + cmp_values(node->wsum_hess, hess2, 9)); + mu_assert("param quad_form hessian sparsity (P2) fail", + cmp_sparsity(node->wsum_hess, expected_hp, expected_hi, 5, 9)); + + free_expr(node); + return 0; +} From 8ed4408330fca8f1d8e894b100534815ea633150 Mon Sep 17 00:00:00 2001 From: Transurgeon Date: Tue, 2 Jun 2026 06:49:22 -0400 Subject: [PATCH 4/9] final touchups before a first review --- src/atoms/other/quad_form.c | 23 +++--- tests/all_tests.c | 1 - tests/wsum_hess/other/test_quad_form_dense.h | 57 +++++++++++++ .../other/test_quad_form_dense_param.h | 81 ------------------- 4 files changed, 67 insertions(+), 95 deletions(-) delete mode 100644 tests/wsum_hess/other/test_quad_form_dense_param.h diff --git a/src/atoms/other/quad_form.c b/src/atoms/other/quad_form.c index d68439a..6f68f55 100644 --- a/src/atoms/other/quad_form.c +++ b/src/atoms/other/quad_form.c @@ -33,8 +33,6 @@ n x n permuted_dense (optionally parameter-fed) over a leaf variable, with the Hessian 2Q materialized as a dense block. Q is assumed symmetric. */ -/* ============ shared forward + jacobian (via the matrix vtable) ============ */ - /* Copy the latest parameter value into Q (symmetric: column-major == row-major). */ static void refresh_dense_Q(quad_form_expr *qnode) { @@ -153,9 +151,8 @@ static void eval_jacobian(expr *node) } } -/* ===== hessian, sparse backend (raw CSR/CSC symmetric products; the non-leaf - chain rule J_f^T Q J_f has no matrix-vtable equivalent) ===================== */ - +/* Sparse-backend hessian. The non-leaf chain rule J_f^T Q J_f uses raw CSR/CSC + symmetric products that have no matrix-vtable equivalent. */ static void wsum_hess_init_impl(expr *node) { quad_form_expr *qnode = (quad_form_expr *) node; @@ -245,7 +242,7 @@ static void eval_wsum_hess(expr *node, const double *w) CSC_matrix *QJf = qnode->QJf; CSR_matrix *term1 = node->work->hess_term1->to_csr(node->work->hess_term1); - /* term1 = J_f^T Q J_f = J_f^T B */ + /* term1 = J_f^T Q J_f = J_f^T B */ BA_fill_values(Q, Jf, QJf); BTDA_fill_values(Jf, QJf, NULL, term1); @@ -266,15 +263,14 @@ static void eval_wsum_hess(expr *node, const double *w) } } -/* ============== hessian, dense backend (permuted_dense block) ============== */ - +/* Dense-backend hessian: 2wQ as a permuted_dense block. */ static void wsum_hess_init_dense(expr *node) { quad_form_expr *qnode = (quad_form_expr *) node; expr *x = node->left; int n = qnode->n; - /* Hessian is the dense block 2P over x's contiguous variable range. */ + /* Hessian is the dense block 2Q over x's contiguous variable range. */ int *perm = (int *) sp_malloc(n * sizeof(int)); for (int i = 0; i < n; i++) { @@ -296,8 +292,6 @@ static void eval_wsum_hess_dense(expr *node, const double *w) cblas_dscal(nn, 2.0 * w[0], node->wsum_hess->x, 1); } -/* ============================= shared / ctors ============================== */ - static void free_type_data(expr *node) { quad_form_expr *qnode = (quad_form_expr *) node; @@ -364,8 +358,7 @@ expr *new_quad_form_dense(expr *child, int n, const double *P_data, /* dwork stores Q @ x in the forward pass */ node->work->dwork = (double *) sp_malloc(n * sizeof(double)); - /* Convention: exactly one of P_data (constant) or param_source (parametric) - is set. */ + /* parameter support */ qnode->param_source = param_source; if (param_source != NULL) { @@ -375,11 +368,14 @@ expr *new_quad_form_dense(expr *child, int n, const double *P_data, "set\n"); exit(1); } + expr_retain(param_source); + /* Q is filled by refresh_dense_Q on the first forward pass. */ qnode->Q = new_permuted_dense_full(n, n, NULL); node->needs_parameter_refresh = true; } + /* constant matrix case */ else { if (P_data == NULL) @@ -387,6 +383,7 @@ expr *new_quad_form_dense(expr *child, int n, const double *P_data, fprintf(stderr, "Error in new_quad_form_dense: need P data\n"); exit(1); } + qnode->Q = new_permuted_dense_full(n, n, P_data); } diff --git a/tests/all_tests.c b/tests/all_tests.c index f01684c..4e86892 100644 --- a/tests/all_tests.c +++ b/tests/all_tests.c @@ -104,7 +104,6 @@ #include "wsum_hess/other/test_prod_axis_zero.h" #include "wsum_hess/other/test_quad_form.h" #include "wsum_hess/other/test_quad_form_dense.h" -#include "wsum_hess/other/test_quad_form_dense_param.h" #endif /* PROFILE_ONLY */ #ifdef PROFILE_ONLY diff --git a/tests/wsum_hess/other/test_quad_form_dense.h b/tests/wsum_hess/other/test_quad_form_dense.h index 920d5f6..87961a4 100644 --- a/tests/wsum_hess/other/test_quad_form_dense.h +++ b/tests/wsum_hess/other/test_quad_form_dense.h @@ -56,3 +56,60 @@ const char *test_wsum_hess_quad_form_dense(void) free_expr(node); return 0; } + +/* Parametric dense quad_form: P is supplied by a parameter and refreshed each + * solve. Same setup as above; updating the parameter changes the values but not + * the sparsity. + * P1 = [1 2 0; 2 3 0; 0 0 4]: value 57, grad [10,16,24], Hessian 4 P1. + * P2 = [2 1 0; 1 4 0; 0 0 5]: value 67, grad [8,18,30], Hessian 4 P2. + */ +const char *test_wsum_hess_quad_form_dense_param(void) +{ + double u_vals[5] = {0.0, 0.0, 1.0, 2.0, 3.0}; + double w = 2.0; + + double P1[9] = {1.0, 2.0, 0.0, 2.0, 3.0, 0.0, 0.0, 0.0, 4.0}; + expr *param_P = new_parameter(3, 3, 0, 5, P1); /* param_id 0 = updatable */ + expr *x = new_variable(3, 1, 2, 5); + expr *node = new_quad_form_dense(x, 3, NULL, param_P); /* P fed by parameter */ + + jacobian_init(node); + wsum_hess_init(node); + node->forward(node, u_vals); + node->eval_jacobian(node); + node->eval_wsum_hess(node, &w); + + /* parameter P1: check values and sparsity */ + int expected_jp[2] = {0, 3}; + int expected_ji[3] = {2, 3, 4}; + int expected_hp[6] = {0, 0, 0, 3, 6, 9}; + int expected_hi[9] = {2, 3, 4, 2, 3, 4, 2, 3, 4}; + double grad1[3] = {10.0, 16.0, 24.0}; + double hess1[9] = {4.0, 8.0, 0.0, 8.0, 12.0, 0.0, 0.0, 0.0, 16.0}; + mu_assert("P1 value fail", fabs(node->value[0] - 57.0) < 1e-9); + mu_assert("P1 grad vals fail", cmp_values(node->jacobian, grad1, 3)); + mu_assert("P1 grad sparsity fail", + cmp_sparsity(node->jacobian, expected_jp, expected_ji, 1, 3)); + mu_assert("hessian not permuted_dense", node->wsum_hess->is_permuted_dense); + mu_assert("P1 hessian vals fail", cmp_values(node->wsum_hess, hess1, 9)); + mu_assert("P1 hessian sparsity fail", + cmp_sparsity(node->wsum_hess, expected_hp, expected_hi, 5, 9)); + + /* update the parameter; the refresh recomputes only the values */ + double P2[9] = {2.0, 1.0, 0.0, 1.0, 4.0, 0.0, 0.0, 0.0, 5.0}; + memcpy(param_P->value, P2, 9 * sizeof(double)); + expr_set_needs_refresh(node); + node->forward(node, u_vals); + node->eval_jacobian(node); + node->eval_wsum_hess(node, &w); + + /* parameter P2: only the values change, sparsity is unchanged */ + double grad2[3] = {8.0, 18.0, 30.0}; + double hess2[9] = {8.0, 4.0, 0.0, 4.0, 16.0, 0.0, 0.0, 0.0, 20.0}; + mu_assert("P2 value fail", fabs(node->value[0] - 67.0) < 1e-9); + mu_assert("P2 grad vals fail", cmp_values(node->jacobian, grad2, 3)); + mu_assert("P2 hessian vals fail", cmp_values(node->wsum_hess, hess2, 9)); + + free_expr(node); + return 0; +} diff --git a/tests/wsum_hess/other/test_quad_form_dense_param.h b/tests/wsum_hess/other/test_quad_form_dense_param.h deleted file mode 100644 index d679c50..0000000 --- a/tests/wsum_hess/other/test_quad_form_dense_param.h +++ /dev/null @@ -1,81 +0,0 @@ -#include "atoms/affine.h" -#include "atoms/non_elementwise_full_dom.h" -#include "expr.h" -#include "minunit.h" -#include "test_helpers.h" -#include -#include - -/* Parametric dense path of quad_form: y = x' P x where P is fed by a parameter - * and refreshed each solve. Same setup as the constant test (x is 3x1 at global - * index 2, total variables = 5, x = [1, 2, 3]), then the parameter is updated - * and the derivatives are re-verified. - * - * P1 = [1 2 0; 2 3 0; 0 0 4]: value = 57, grad = [10,16,24], Hessian = 4 P1. - * P2 = [2 1 0; 1 4 0; 0 0 5]: value = 67, grad = [8,18,30], Hessian = 4 P2. - */ -const char *test_wsum_hess_quad_form_dense_param(void) -{ - double u_vals[5] = {0.0, 0.0, 1.0, 2.0, 3.0}; - double w = 2.0; - - /* row-major 3x3 dense P (symmetric, so column-major == row-major) */ - double P1[9] = {1.0, 2.0, 0.0, 2.0, 3.0, 0.0, 0.0, 0.0, 4.0}; - - expr *param_P = new_parameter(3, 3, 0, 5, P1); /* param_id 0 = updatable */ - expr *x = new_variable(3, 1, 2, 5); - expr *node = - new_quad_form_dense(x, 3, NULL, param_P); /* parametric: data NULL */ - - jacobian_init(node); - wsum_hess_init(node); - - node->forward(node, u_vals); - node->eval_jacobian(node); - node->eval_wsum_hess(node, &w); - - int expected_jp[2] = {0, 3}; - int expected_ji[3] = {2, 3, 4}; - int expected_hp[6] = {0, 0, 0, 3, 6, 9}; - int expected_hi[9] = {2, 3, 4, 2, 3, 4, 2, 3, 4}; - - /* --- parameter P1 --- */ - mu_assert("param quad_form value (P1) fail", fabs(node->value[0] - 57.0) < 1e-9); - double grad1[3] = {10.0, 16.0, 24.0}; - mu_assert("param quad_form jacobian vals (P1) fail", - cmp_values(node->jacobian, grad1, 3)); - mu_assert("param quad_form jacobian sparsity (P1) fail", - cmp_sparsity(node->jacobian, expected_jp, expected_ji, 1, 3)); - mu_assert("param quad_form hessian not permuted_dense", - node->wsum_hess->is_permuted_dense); - double hess1[9] = {4.0, 8.0, 0.0, 8.0, 12.0, 0.0, 0.0, 0.0, 16.0}; - mu_assert("param quad_form hessian vals (P1) fail", - cmp_values(node->wsum_hess, hess1, 9)); - mu_assert("param quad_form hessian sparsity (P1) fail", - cmp_sparsity(node->wsum_hess, expected_hp, expected_hi, 5, 9)); - - /* --- update the parameter and re-evaluate (sparsity is reused) --- */ - double P2[9] = {2.0, 1.0, 0.0, 1.0, 4.0, 0.0, 0.0, 0.0, 5.0}; - memcpy(param_P->value, P2, 9 * sizeof(double)); - expr_set_needs_refresh(node); - - node->forward(node, u_vals); - node->eval_jacobian(node); - node->eval_wsum_hess(node, &w); - - /* --- parameter P2 --- */ - mu_assert("param quad_form value (P2) fail", fabs(node->value[0] - 67.0) < 1e-9); - double grad2[3] = {8.0, 18.0, 30.0}; - mu_assert("param quad_form jacobian vals (P2) fail", - cmp_values(node->jacobian, grad2, 3)); - mu_assert("param quad_form jacobian sparsity (P2) fail", - cmp_sparsity(node->jacobian, expected_jp, expected_ji, 1, 3)); - double hess2[9] = {8.0, 4.0, 0.0, 4.0, 16.0, 0.0, 0.0, 0.0, 20.0}; - mu_assert("param quad_form hessian vals (P2) fail", - cmp_values(node->wsum_hess, hess2, 9)); - mu_assert("param quad_form hessian sparsity (P2) fail", - cmp_sparsity(node->wsum_hess, expected_hp, expected_hi, 5, 9)); - - free_expr(node); - return 0; -} From 49d2dca8f8b896b807aa9345fea4407532c7f21d Mon Sep 17 00:00:00 2001 From: William Zijie Zhang Date: Thu, 4 Jun 2026 04:05:42 -0400 Subject: [PATCH 5/9] make dense path work with compositions --- include/subexpr.h | 6 +- src/atoms/other/quad_form.c | 118 +++++++++++++++++++++++++++++------- 2 files changed, 100 insertions(+), 24 deletions(-) diff --git a/include/subexpr.h b/include/subexpr.h index fe5bca3..edc75b8 100644 --- a/include/subexpr.h +++ b/include/subexpr.h @@ -60,8 +60,10 @@ typedef struct quad_form_expr { expr base; matrix *Q; - CSC_matrix *QJf; /* Q * J_f in CSC_matrix (for chain rule hessian) */ - int n; /* quadratic dimension = left->size */ + CSC_matrix *QJf; /* Q * J_f in CSC_matrix (sparse-path chain-rule hessian) */ + matrix *QJf_dense; /* Q * J_f as permuted_dense (dense-path composition hessian) */ + double *diag_w; /* length-n diagonal (= 2w) fed to BTDA on the dense path */ + int n; /* quadratic dimension = left->size */ /* parametric dense path: param_source feeds Q each solve (NULL otherwise) */ expr *param_source; diff --git a/src/atoms/other/quad_form.c b/src/atoms/other/quad_form.c index 6f68f55..6940fd1 100644 --- a/src/atoms/other/quad_form.c +++ b/src/atoms/other/quad_form.c @@ -19,6 +19,7 @@ #include "subexpr.h" #include "utils/CSC_matrix.h" #include "utils/cblas_wrapper.h" +#include "utils/matmul_dispatchers.h" #include "utils/matrix_sum.h" #include "utils/permuted_dense.h" #include "utils/sparse_matrix.h" @@ -30,8 +31,9 @@ #include /* Quadratic form y = x'Qx. Sparse path: Q is a CSR matrix. Dense path: Q is an - n x n permuted_dense (optionally parameter-fed) over a leaf variable, with the - Hessian 2Q materialized as a dense block. Q is assumed symmetric. */ + n x n permuted_dense (optionally parameter-fed). For a leaf x the Hessian 2Q is + materialized as a dense block; for a composition x = f(u) the dense path forms the + chain rule J_f^T Q J_f via the PD matmul dispatchers. Q is assumed symmetric. */ /* Copy the latest parameter value into Q (symmetric: column-major == row-major). */ static void refresh_dense_Q(quad_form_expr *qnode) @@ -263,33 +265,102 @@ static void eval_wsum_hess(expr *node, const double *w) } } -/* Dense-backend hessian: 2wQ as a permuted_dense block. */ +/* Dense-backend hessian. Leaf x: 2wQ materialized as a permuted_dense block (the + fast common case). Composition x = f(u): the chain rule + H = J_f^T (2w Q) J_f + sum_i (2w Q f(u))_i nabla^2 f_i = term1 + term2, + with term1 formed via the PD matmul dispatchers (Q symmetric PD so QJf = Q J_f + is PD; J_f^T Q J_f = (Q J_f)^T J_f keeps the PD operand on the dispatch key). */ static void wsum_hess_init_dense(expr *node) { quad_form_expr *qnode = (quad_form_expr *) node; expr *x = node->left; int n = qnode->n; - /* Hessian is the dense block 2Q over x's contiguous variable range. */ - int *perm = (int *) sp_malloc(n * sizeof(int)); - for (int i = 0; i < n; i++) + if (x->var_id != NOT_A_VARIABLE) + { + /* Hessian is the dense block 2Q over x's contiguous variable range. */ + int *perm = (int *) sp_malloc(n * sizeof(int)); + for (int i = 0; i < n; i++) + { + perm[i] = x->var_id + i; + } + node->wsum_hess = + new_permuted_dense(node->n_vars, node->n_vars, n, n, perm, perm, NULL); + sp_free(perm); + } + else { - perm[i] = x->var_id + i; + jacobian_init(x); + + /* The dispatchers read a sparse child jacobian through its csc_cache. */ + if (!x->jacobian->is_permuted_dense && !x->jacobian->is_stacked_pd) + { + sparse_matrix_ensure_csc_cache((sparse_matrix *) x->jacobian); + } + + /* term1 = J_f^T Q J_f = (Q J_f)^T J_f. QJf is PD; passing it as the + transposed operand B keeps the PD type on the dispatch key. */ + permuted_dense *Q_pd = (permuted_dense *) qnode->Q; + qnode->QJf_dense = BA_pd_matrices_alloc(Q_pd, x->jacobian); + node->work->hess_term1 = BTA_matrices_alloc(x->jacobian, qnode->QJf_dense); + qnode->diag_w = (double *) sp_malloc(n * sizeof(double)); + + /* term2 = sum_i (Q f(x))_i nabla^2 f_i */ + wsum_hess_init(x); + node->work->hess_term2 = x->wsum_hess->copy_sparsity(x->wsum_hess); + + /* hess = term1 + term2 (CSR-backed; sum_matrices is type-agnostic) */ + int max_nnz = node->work->hess_term1->nnz + node->work->hess_term2->nnz; + node->wsum_hess = + new_sparse_matrix_alloc(node->n_vars, node->n_vars, max_nnz); + sum_matrices_alloc(node->work->hess_term1, node->work->hess_term2, + node->wsum_hess); } - node->wsum_hess = - new_permuted_dense(node->n_vars, node->n_vars, n, n, perm, perm, NULL); - sp_free(perm); } static void eval_wsum_hess_dense(expr *node, const double *w) { quad_form_expr *qnode = (quad_form_expr *) node; - int nn = qnode->n * qnode->n; + expr *x = node->left; + double two_w = 2.0 * w[0]; + + if (x->var_id != NOT_A_VARIABLE) + { + int nn = qnode->n * qnode->n; + /* Hessian = 2 w Q (Q symmetric, constant up to the weight). The PD's value + buffer (->x) aliases its dense block, so writing it updates to_csr too. */ + memcpy(node->wsum_hess->x, qnode->Q->x, nn * sizeof(double)); + cblas_dscal(nn, two_w, node->wsum_hess->x, 1); + } + else + { + /* Refresh the child jacobian's csc_cache unconditionally: the dispatchers + read it, and jacobian_csc_filled tracks the separate work->jacobian_csc + mirror (already set by the gradient pass), so it must NOT gate this. */ + x->eval_jacobian(x); + x->jacobian->refresh_csc_values(x->jacobian); + + /* term1 = 2w J_f^T Q J_f. The dispatcher fill is B^T diag(d) A (no plain + B^T A form); a constant diagonal d = 2w carries the weight. */ + for (int i = 0; i < qnode->n; i++) + { + qnode->diag_w[i] = two_w; + } + BA_pd_matrices_fill_values((permuted_dense *) qnode->Q, x->jacobian, + (permuted_dense *) qnode->QJf_dense); + BTDA_matrices_fill_values(x->jacobian, qnode->diag_w, qnode->QJf_dense, + node->work->hess_term1); + + /* term2 = 2w sum_i (Q f(x))_i nabla^2 f_i (dwork = Q f(x) from forward) */ + x->eval_wsum_hess(x, node->work->dwork); + memcpy(node->work->hess_term2->x, x->wsum_hess->x, + x->wsum_hess->nnz * sizeof(double)); + cblas_dscal(node->work->hess_term2->nnz, two_w, node->work->hess_term2->x, + 1); - /* Hessian = 2 w Q (Q symmetric, constant up to the weight). The PD's value - buffer (->x) aliases its dense block, so writing it updates to_csr too. */ - memcpy(node->wsum_hess->x, qnode->Q->x, nn * sizeof(double)); - cblas_dscal(nn, 2.0 * w[0], node->wsum_hess->x, 1); + sum_matrices_fill_values(node->work->hess_term1, node->work->hess_term2, + node->wsum_hess); + } } static void free_type_data(expr *node) @@ -302,6 +373,16 @@ static void free_type_data(expr *node) free_CSC_matrix(qnode->QJf); qnode->QJf = NULL; } + if (qnode->QJf_dense != NULL) + { + free_matrix(qnode->QJf_dense); + qnode->QJf_dense = NULL; + } + if (qnode->diag_w != NULL) + { + sp_free(qnode->diag_w); + qnode->diag_w = NULL; + } free_expr(qnode->param_source); qnode->param_source = NULL; } @@ -338,13 +419,6 @@ expr *new_quad_form_dense(expr *child, int n, const double *P_data, { assert(child->d1 == 1 || child->d2 == 1); /* child must be a vector */ assert(child->size == n); - /* Dense path supports a leaf variable only (the only case cvxpy emits). */ - if (child->var_id == NOT_A_VARIABLE) - { - fprintf(stderr, "Error in new_quad_form_dense: dense path requires a leaf " - "variable child\n"); - exit(1); - } quad_form_expr *qnode = (quad_form_expr *) sp_calloc(1, sizeof(quad_form_expr)); expr *node = &qnode->base; From 1f3e9414ba8fa0bd2f4f21254becc621aa429c36 Mon Sep 17 00:00:00 2001 From: William Zijie Zhang Date: Thu, 4 Jun 2026 04:15:13 -0400 Subject: [PATCH 6/9] Tidy quad_form dense path for review Collapse the single-use refresh_dense_Q helper into refresh_param_values_qf, drop the unused include, remove low-value label comments, and reflow the quad_form_expr struct comments to be clang-format clean. No behavior change. Co-Authored-By: Claude Opus 4.8 (1M context) --- include/subexpr.h | 10 ++++++---- src/atoms/other/quad_form.c | 18 +++++------------- 2 files changed, 11 insertions(+), 17 deletions(-) diff --git a/include/subexpr.h b/include/subexpr.h index edc75b8..a81afea 100644 --- a/include/subexpr.h +++ b/include/subexpr.h @@ -60,10 +60,12 @@ typedef struct quad_form_expr { expr base; matrix *Q; - CSC_matrix *QJf; /* Q * J_f in CSC_matrix (sparse-path chain-rule hessian) */ - matrix *QJf_dense; /* Q * J_f as permuted_dense (dense-path composition hessian) */ - double *diag_w; /* length-n diagonal (= 2w) fed to BTDA on the dense path */ - int n; /* quadratic dimension = left->size */ + /* Q * J_f for the composition chain-rule hessian: CSC on the sparse path, + permuted_dense on the dense path. */ + CSC_matrix *QJf; + matrix *QJf_dense; + double *diag_w; /* length-n diagonal (= 2w) fed to BTDA on the dense path */ + int n; /* quadratic dimension = left->size */ /* parametric dense path: param_source feeds Q each solve (NULL otherwise) */ expr *param_source; diff --git a/src/atoms/other/quad_form.c b/src/atoms/other/quad_form.c index 6940fd1..c50e5a6 100644 --- a/src/atoms/other/quad_form.c +++ b/src/atoms/other/quad_form.c @@ -25,7 +25,6 @@ #include "utils/sparse_matrix.h" #include "utils/tracked_alloc.h" #include -#include #include #include #include @@ -35,14 +34,8 @@ materialized as a dense block; for a composition x = f(u) the dense path forms the chain rule J_f^T Q J_f via the PD matmul dispatchers. Q is assumed symmetric. */ -/* Copy the latest parameter value into Q (symmetric: column-major == row-major). */ -static void refresh_dense_Q(quad_form_expr *qnode) -{ - memcpy(qnode->Q->x, qnode->param_source->value, - (size_t) qnode->n * qnode->n * sizeof(double)); -} - -/* Refresh Q from the parameter once per solve (no-op when Q is constant). */ +/* Refresh Q from the parameter once per solve (no-op when Q is constant). + Q is symmetric, so column-major == row-major and the copy is verbatim. */ static void refresh_param_values_qf(quad_form_expr *qnode) { if (qnode->param_source == NULL || !qnode->base.needs_parameter_refresh) @@ -50,7 +43,8 @@ static void refresh_param_values_qf(quad_form_expr *qnode) return; } qnode->base.needs_parameter_refresh = false; - refresh_dense_Q(qnode); + memcpy(qnode->Q->x, qnode->param_source->value, + (size_t) qnode->n * qnode->n * sizeof(double)); } static void forward(expr *node, const double *u) @@ -432,7 +426,6 @@ expr *new_quad_form_dense(expr *child, int n, const double *P_data, /* dwork stores Q @ x in the forward pass */ node->work->dwork = (double *) sp_malloc(n * sizeof(double)); - /* parameter support */ qnode->param_source = param_source; if (param_source != NULL) { @@ -445,11 +438,10 @@ expr *new_quad_form_dense(expr *child, int n, const double *P_data, expr_retain(param_source); - /* Q is filled by refresh_dense_Q on the first forward pass. */ + /* Q is filled from the parameter on the first forward pass. */ qnode->Q = new_permuted_dense_full(n, n, NULL); node->needs_parameter_refresh = true; } - /* constant matrix case */ else { if (P_data == NULL) From bd7074e23914aeff217b4cf1689ba42f16c1a11e Mon Sep 17 00:00:00 2001 From: William Zijie Zhang Date: Thu, 4 Jun 2026 04:21:39 -0400 Subject: [PATCH 7/9] Clarify quad_form QJf fields are mutually exclusive MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Note in the quad_form_expr comment that QJf (sparse CSC) and QJf_dense (permuted_dense) are never both used — one per node — and why they can't share a single matrix* field (the sparse symmetric products have no matrix-vtable form). Doc-only; no behavior change. Co-Authored-By: Claude Opus 4.8 (1M context) --- include/subexpr.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/include/subexpr.h b/include/subexpr.h index a81afea..26aaaf6 100644 --- a/include/subexpr.h +++ b/include/subexpr.h @@ -60,8 +60,9 @@ typedef struct quad_form_expr { expr base; matrix *Q; - /* Q * J_f for the composition chain-rule hessian: CSC on the sparse path, - permuted_dense on the dense path. */ + /* Q * J_f for the composition chain-rule hessian; exactly one is used per + node. Sparse path: CSC (raw symmetric products, no matrix-vtable form). + Dense path: permuted_dense via the matrix dispatchers. */ CSC_matrix *QJf; matrix *QJf_dense; double *diag_w; /* length-n diagonal (= 2w) fed to BTDA on the dense path */ From 503ef335432d7f9e507127deb52bece8ac77ace4 Mon Sep 17 00:00:00 2001 From: William Zijie Zhang Date: Sat, 6 Jun 2026 06:04:18 -0400 Subject: [PATCH 8/9] respond to dance858 review comments --- include/atoms/non_elementwise_full_dom.h | 11 ++--- src/atoms/other/quad_form.c | 21 +++++---- tests/all_tests.c | 2 + .../composite/test_chain_rule_jacobian.h | 4 +- tests/jacobian_tests/other/test_quad_form.h | 4 +- .../composite/test_chain_rule_wsum_hess.h | 6 +-- tests/wsum_hess/other/test_quad_form.h | 2 +- tests/wsum_hess/other/test_quad_form_dense.h | 44 +++++++++++++++++++ 8 files changed, 70 insertions(+), 24 deletions(-) diff --git a/include/atoms/non_elementwise_full_dom.h b/include/atoms/non_elementwise_full_dom.h index 1129ab4..94a7239 100644 --- a/include/atoms/non_elementwise_full_dom.h +++ b/include/atoms/non_elementwise_full_dom.h @@ -22,13 +22,14 @@ #include "subexpr.h" #include "utils/CSR_matrix.h" -expr *new_quad_form(expr *child, CSR_matrix *Q); +expr *new_quad_form_sparse(expr *child, CSR_matrix *Q); -/* Dense / parametric quadratic form y = x' P x over a leaf variable x. +/* Dense / parametric quadratic form y = x' P x over a vector expression x (a + * leaf variable, or a composition x = f(u) handled via the chain rule). * - * P is n x n, row-major, and assumed symmetric (matching the new_quad_form - * convention where the Hessian of x'Qx is taken to be 2Q). The Hessian is - * materialized as a dense permuted_dense block. + * P is n x n, row-major, and assumed symmetric (matching the new_quad_form_sparse + * convention where the Hessian of x'Qx is taken to be 2Q). For a leaf x the + * Hessian is materialized as a dense permuted_dense block. * * - constant P: P_data points to n*n doubles, param_source == NULL. * - parametric P: P_data == NULL, param_source is the parameter node that diff --git a/src/atoms/other/quad_form.c b/src/atoms/other/quad_form.c index c50e5a6..9116569 100644 --- a/src/atoms/other/quad_form.c +++ b/src/atoms/other/quad_form.c @@ -149,7 +149,7 @@ static void eval_jacobian(expr *node) /* Sparse-backend hessian. The non-leaf chain rule J_f^T Q J_f uses raw CSR/CSC symmetric products that have no matrix-vtable equivalent. */ -static void wsum_hess_init_impl(expr *node) +static void wsum_hess_init_sparse(expr *node) { quad_form_expr *qnode = (quad_form_expr *) node; CSR_matrix *Q = qnode->Q->to_csr(qnode->Q); @@ -206,7 +206,7 @@ static void wsum_hess_init_impl(expr *node) } } -static void eval_wsum_hess(expr *node, const double *w) +static void eval_wsum_hess_sparse(expr *node, const double *w) { quad_form_expr *qnode = (quad_form_expr *) node; CSR_matrix *Q = qnode->Q->to_csr(qnode->Q); @@ -284,8 +284,6 @@ static void wsum_hess_init_dense(expr *node) } else { - jacobian_init(x); - /* The dispatchers read a sparse child jacobian through its csc_cache. */ if (!x->jacobian->is_permuted_dense && !x->jacobian->is_stacked_pd) { @@ -328,14 +326,14 @@ static void eval_wsum_hess_dense(expr *node, const double *w) } else { - /* Refresh the child jacobian's csc_cache unconditionally: the dispatchers - read it, and jacobian_csc_filled tracks the separate work->jacobian_csc - mirror (already set by the gradient pass), so it must NOT gate this. */ - x->eval_jacobian(x); + /* Mirror the child jacobian's current values into its csc_cache; the PD + dispatchers below read from it. */ x->jacobian->refresh_csc_values(x->jacobian); /* term1 = 2w J_f^T Q J_f. The dispatcher fill is B^T diag(d) A (no plain - B^T A form); a constant diagonal d = 2w carries the weight. */ + B^T A form); a constant diagonal d = 2w carries the weight. + Potential TODO: Add back BTA_matrices_fill_values_kernel so we don't have + to form diag_w. */ for (int i = 0; i < qnode->n; i++) { qnode->diag_w[i] = two_w; @@ -388,14 +386,15 @@ static bool is_affine(const expr *node) return false; } -expr *new_quad_form(expr *left, CSR_matrix *Q) +expr *new_quad_form_sparse(expr *left, CSR_matrix *Q) { assert(left->d1 == 1 || left->d2 == 1); /* left must be a vector */ quad_form_expr *qnode = (quad_form_expr *) sp_calloc(1, sizeof(quad_form_expr)); expr *node = &qnode->base; init_expr(node, 1, 1, left->n_vars, forward, jacobian_init_impl, eval_jacobian, - is_affine, wsum_hess_init_impl, eval_wsum_hess, free_type_data); + is_affine, wsum_hess_init_sparse, eval_wsum_hess_sparse, + free_type_data); node->left = left; expr_retain(left); diff --git a/tests/all_tests.c b/tests/all_tests.c index 4e86892..4c0f193 100644 --- a/tests/all_tests.c +++ b/tests/all_tests.c @@ -295,6 +295,8 @@ int main(void) mu_run_test(test_wsum_hess_quad_over_lin_yx, tests_run); mu_run_test(test_wsum_hess_quad_form, tests_run); mu_run_test(test_wsum_hess_quad_form_dense, tests_run); + mu_run_test(test_wsum_hess_quad_form_dense_affine, tests_run); + mu_run_test(test_wsum_hess_quad_form_dense_exp, tests_run); mu_run_test(test_wsum_hess_quad_form_dense_param, tests_run); mu_run_test(test_wsum_hess_scalar_mult_log_vector, tests_run); mu_run_test(test_wsum_hess_scalar_mult_log_matrix, tests_run); diff --git a/tests/jacobian_tests/composite/test_chain_rule_jacobian.h b/tests/jacobian_tests/composite/test_chain_rule_jacobian.h index 44bb966..a7870f1 100644 --- a/tests/jacobian_tests/composite/test_chain_rule_jacobian.h +++ b/tests/jacobian_tests/composite/test_chain_rule_jacobian.h @@ -135,7 +135,7 @@ const char *test_jacobian_quad_form_Ax(void) expr *x = new_variable(4, 1, 1, 6); expr *Ax = new_left_matmul(NULL, x, A); expr *sin_Ax = new_sin(Ax); - expr *node = new_quad_form(sin_Ax, Q); + expr *node = new_quad_form_sparse(sin_Ax, Q); mu_assert("check_jacobian failed", check_jacobian_num(node, u_vals, NUMERICAL_DIFF_DEFAULT_H)); @@ -162,7 +162,7 @@ const char *test_jacobian_quad_form_exp(void) expr *x = new_variable(3, 1, 0, 3); expr *exp_x = new_exp(x); - expr *node = new_quad_form(exp_x, Q); + expr *node = new_quad_form_sparse(exp_x, Q); mu_assert("check_jacobian failed", check_jacobian_num(node, u_vals, NUMERICAL_DIFF_DEFAULT_H)); diff --git a/tests/jacobian_tests/other/test_quad_form.h b/tests/jacobian_tests/other/test_quad_form.h index 22d33c0..f4c1b7e 100644 --- a/tests/jacobian_tests/other/test_quad_form.h +++ b/tests/jacobian_tests/other/test_quad_form.h @@ -21,7 +21,7 @@ const char *test_quad_form(void) memcpy(Q->x, Qx, 5 * sizeof(double)); memcpy(Q->i, Qi, 5 * sizeof(int)); memcpy(Q->p, Qp, 4 * sizeof(int)); - expr *node = new_quad_form(x, Q); + expr *node = new_quad_form_sparse(x, Q); jacobian_init(node); node->forward(node, u_vals); @@ -67,7 +67,7 @@ memcpy(A->x, Ax, 10 * sizeof(double)); memcpy(A->i, Ai, 10 * sizeof(int)); memcpy(A->p, Ap, 4 * sizeof(int)); expr *Au = new_linear(u, A, NULL); -expr *node = new_quad_form(Au, Q); +expr *node = new_quad_form_sparse(Au, Q); jacobian_init(node); node->forward(node, u_vals); diff --git a/tests/wsum_hess/composite/test_chain_rule_wsum_hess.h b/tests/wsum_hess/composite/test_chain_rule_wsum_hess.h index 296dd3c..ed2c405 100644 --- a/tests/wsum_hess/composite/test_chain_rule_wsum_hess.h +++ b/tests/wsum_hess/composite/test_chain_rule_wsum_hess.h @@ -341,7 +341,7 @@ const char *test_wsum_hess_quad_form_Ax(void) expr *x = new_variable(4, 1, 1, 6); expr *Ax = new_left_matmul(NULL, x, A); - expr *node = new_quad_form(Ax, Q); + expr *node = new_quad_form_sparse(Ax, Q); mu_assert("check_wsum_hess failed", check_wsum_hess(node, u_vals, &w, NUMERICAL_DIFF_DEFAULT_H)); @@ -371,7 +371,7 @@ const char *test_wsum_hess_quad_form_sin_Ax(void) expr *x = new_variable(4, 1, 1, 6); expr *Ax = new_left_matmul(NULL, x, A); expr *sin_Ax = new_sin(Ax); - expr *node = new_quad_form(sin_Ax, Q); + expr *node = new_quad_form_sparse(sin_Ax, Q); mu_assert("check_wsum_hess failed", check_wsum_hess(node, u_vals, &w, NUMERICAL_DIFF_DEFAULT_H)); @@ -501,7 +501,7 @@ const char *test_wsum_hess_quad_form_exp(void) expr *x = new_variable(3, 1, 0, 3); expr *exp_x = new_exp(x); - expr *node = new_quad_form(exp_x, Q); + expr *node = new_quad_form_sparse(exp_x, Q); mu_assert("check_wsum_hess failed", check_wsum_hess(node, u_vals, &w, NUMERICAL_DIFF_DEFAULT_H)); diff --git a/tests/wsum_hess/other/test_quad_form.h b/tests/wsum_hess/other/test_quad_form.h index bd62d5a..fa8956f 100644 --- a/tests/wsum_hess/other/test_quad_form.h +++ b/tests/wsum_hess/other/test_quad_form.h @@ -26,7 +26,7 @@ const char *test_wsum_hess_quad_form(void) memcpy(Q->p, Qp, 5 * sizeof(int)); expr *x = new_variable(4, 1, 3, 10); - expr *node = new_quad_form(x, Q); + expr *node = new_quad_form_sparse(x, Q); jacobian_init(node); node->forward(node, u_vals); diff --git a/tests/wsum_hess/other/test_quad_form_dense.h b/tests/wsum_hess/other/test_quad_form_dense.h index 87961a4..becaa4f 100644 --- a/tests/wsum_hess/other/test_quad_form_dense.h +++ b/tests/wsum_hess/other/test_quad_form_dense.h @@ -1,7 +1,9 @@ #include "atoms/affine.h" +#include "atoms/elementwise_full_dom.h" #include "atoms/non_elementwise_full_dom.h" #include "expr.h" #include "minunit.h" +#include "numerical_diff.h" #include "test_helpers.h" #include #include @@ -57,6 +59,48 @@ const char *test_wsum_hess_quad_form_dense(void) return 0; } +/* Dense quad_form over an affine composition x = A u (term2 vanishes). */ +const char *test_wsum_hess_quad_form_dense_affine(void) +{ + double u_vals[3] = {0.5, 1.0, 1.5}; + double w = 2.0; + + /* row-major 3x3 dense P */ + double P[9] = {1.0, 2.0, 0.0, 2.0, 3.0, 0.0, 0.0, 0.0, 4.0}; + /* row-major 3x3 A (square, so A u has size n) */ + double A[9] = {0.5, -1.0, 0.2, 0.3, 0.7, -0.4, -0.6, 0.1, 0.9}; + + expr *x = new_variable(3, 1, 0, 3); + expr *Ax = new_left_matmul_dense(NULL, x, 3, 3, A); + expr *node = new_quad_form_dense(Ax, 3, P, NULL); + + mu_assert("dense quad_form affine composition wsum_hess failed", + check_wsum_hess(node, u_vals, &w, NUMERICAL_DIFF_DEFAULT_H)); + + free_expr(node); + return 0; +} + +/* Dense quad_form over a nonlinear composition x = exp(u) (nonzero term2). */ +const char *test_wsum_hess_quad_form_dense_exp(void) +{ + double u_vals[3] = {0.5, 1.0, 1.5}; + double w = 3.0; + + /* row-major 3x3 dense P */ + double P[9] = {1.0, 2.0, 0.0, 2.0, 3.0, 0.0, 0.0, 0.0, 4.0}; + + expr *x = new_variable(3, 1, 0, 3); + expr *exp_x = new_exp(x); + expr *node = new_quad_form_dense(exp_x, 3, P, NULL); + + mu_assert("dense quad_form nonlinear composition wsum_hess failed", + check_wsum_hess(node, u_vals, &w, NUMERICAL_DIFF_DEFAULT_H)); + + free_expr(node); + return 0; +} + /* Parametric dense quad_form: P is supplied by a parameter and refreshed each * solve. Same setup as above; updating the parameter changes the values but not * the sparsity. From 960030efd10b919e0e5156fe9d3f6b33388a0d0f Mon Sep 17 00:00:00 2001 From: dance858 Date: Mon, 8 Jun 2026 18:45:58 +0200 Subject: [PATCH 9/9] minor --- include/atoms/non_elementwise_full_dom.h | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/include/atoms/non_elementwise_full_dom.h b/include/atoms/non_elementwise_full_dom.h index 94a7239..72832ca 100644 --- a/include/atoms/non_elementwise_full_dom.h +++ b/include/atoms/non_elementwise_full_dom.h @@ -22,20 +22,14 @@ #include "subexpr.h" #include "utils/CSR_matrix.h" +/* quad-form with sparse constant matrix Q */ expr *new_quad_form_sparse(expr *child, CSR_matrix *Q); -/* Dense / parametric quadratic form y = x' P x over a vector expression x (a - * leaf variable, or a composition x = f(u) handled via the chain rule). - * - * P is n x n, row-major, and assumed symmetric (matching the new_quad_form_sparse - * convention where the Hessian of x'Qx is taken to be 2Q). For a leaf x the - * Hessian is materialized as a dense permuted_dense block. - * - * - constant P: P_data points to n*n doubles, param_source == NULL. - * - parametric P: P_data == NULL, param_source is the parameter node that - * supplies P (n*n doubles) and is refreshed each solve. - */ -expr *new_quad_form_dense(expr *child, int n, const double *P_data, +/* quad-form with dense constant or parametric matrix Q (assumed to be + symmetric). For constant Q, Q_data should point to the values of Q in + row-major order. For parametric Q, Q_data should be NULL and param_source + should point to the parameter node. */ +expr *new_quad_form_dense(expr *child, int n, const double *Q_data, expr *param_source); /* product of all entries, without axis argument */