JuliaDiff
diff --git a/‎DifferentiationInterface/Project.toml‎
Lines changed: 1 addition & 1 deletion b/‎DifferentiationInterface/Project.toml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎DifferentiationInterface/docs/src/explanation/advanced.md‎
Lines changed: 13 additions & 5 deletions b/‎DifferentiationInterface/docs/src/explanation/advanced.md‎
Lines changed: 13 additions & 5 deletions
diff --git a/‎DifferentiationInterface/docs/src/tutorials/advanced.md‎
Lines changed: 63 additions & 9 deletions b/‎DifferentiationInterface/docs/src/tutorials/advanced.md‎
Lines changed: 63 additions & 9 deletions
diff --git a/‎DifferentiationInterface/ext/DifferentiationInterfaceSparseMatrixColoringsExt/DifferentiationInterfaceSparseMatrixColoringsExt.jl‎
Lines changed: 2 additions & 0 deletions b/‎DifferentiationInterface/ext/DifferentiationInterfaceSparseMatrixColoringsExt/DifferentiationInterfaceSparseMatrixColoringsExt.jl‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎DifferentiationInterface/ext/DifferentiationInterfaceSparseMatrixColoringsExt/hessian.jl‎
Lines changed: 11 additions & 23 deletions b/‎DifferentiationInterface/ext/DifferentiationInterfaceSparseMatrixColoringsExt/hessian.jl‎
Lines changed: 11 additions & 23 deletions
diff --git a/‎DifferentiationInterface/ext/DifferentiationInterfaceSparseMatrixColoringsExt/jacobian.jl‎
Lines changed: 20 additions & 80 deletions b/‎DifferentiationInterface/ext/DifferentiationInterfaceSparseMatrixColoringsExt/jacobian.jl‎
Lines changed: 20 additions & 80 deletions
@@ -60,7 +60,7 @@ PolyesterForwardDiff = "0.1.1"
 ReverseDiff = "1.15.1"
 SparseArrays = "<0.0.1,1"
 SparseConnectivityTracer = "0.5.0,0.6"
-SparseMatrixColorings = "0.4.0"
+SparseMatrixColorings = "0.4.4"
 Symbolics = "5.27.1, 6"
 Tracker = "0.2.33"
 Zygote = "0.6.69"
 
@@ -37,17 +37,18 @@ DifferentiationInterface does this automatically if you pass a backend of type [
 
 ### `AutoSparse` object
 
+`AutoSparse` backends only support [`jacobian`](@ref) and [`hessian`](@ref) (as well as their variants), because other operators do not output matrices.
 An `AutoSparse` backend must be constructed from three ingredients:
 
-1. An underlying (dense) backend
+1. An underlying (dense) backend, which can be [`SecondOrder`](@ref) or anything from [ADTypes.jl](https://github.com/SciML/ADTypes.jl)
 2. A sparsity pattern detector like:
    - [`TracerSparsityDetector`](@extref SparseConnectivityTracer.TracerSparsityDetector) from [SparseConnectivityTracer.jl](https://github.com/adrhill/SparseConnectivityTracer.jl)
    - [`SymbolicsSparsityDetector`](@extref Symbolics.SymbolicsSparsityDetector) from [Symbolics.jl](https://github.com/JuliaSymbolics/Symbolics.jl)
    - [`DenseSparsityDetector`](@ref) from DifferentiationInterface.jl (beware that this detector only gives a locally valid pattern)
-3. A coloring algorithm: [`GreedyColoringAlgorithm`](@extref SparseMatrixColorings.GreedyColoringAlgorithm) from [SparseMatrixColorings.jl](https://github.com/gdalle/SparseMatrixColorings.jl) is the only one we support. As a result, sparse AD is now located in a package extension which depends on SparseMatrixColorings.jl.
-
-`AutoSparse` backends only support [`jacobian`](@ref) and [`hessian`](@ref) (as well as their variants), because other operators do not output matrices.
-To obtain sparse Hessians, you need to put the `SecondOrder` backend inside `AutoSparse`, and not the other way around.
+   - [`KnownJacobianSparsityDetector`](@extref ADTypes.KnownJacobianSparsityDetector) or [`KnownHessianSparsityDetector`](@extref ADTypes.KnownHessianSparsityDetector) from [ADTypes.jl](https://github.com/SciML/ADTypes.jl) (if you already know the pattern)
+3. A coloring algorithm from [SparseMatrixColorings.jl](https://github.com/gdalle/SparseMatrixColorings.jl), such as:
+   - [`GreedyColoringAlgorithm`](@extref SparseMatrixColorings.GreedyColoringAlgorithm) (our generic recommendation)
+   - [`ConstantColoringAlgorithm`](@extref SparseMatrixColorings.ConstantColoringAlgorithm) (if you have already computed the optimal coloring and always want to return it)
 
 !!! note
     Symbolic backends have built-in sparsity handling, so `AutoSparse(AutoSymbolics())` and `AutoSparse(AutoFastDifferentiation())` do not need additional configuration for pattern detection or coloring.
@@ -59,3 +60,10 @@ But after preparation, the more zeros are present in the matrix, the greater the
 
 !!! danger
     The result of preparation for an `AutoSparse` backend cannot be reused if the sparsity pattern changes.
+
+### Tuning the coloring algorithm
+
+The complexity of sparse Jacobians or Hessians grows with the number of distinct colors in a coloring of the sparsity pattern.
+To reduce this number of colors, [`GreedyColoringAlgorithm`](@ref) has two main settings: the order used for vertices and the decompression method.
+Depending on your use case, you may want to modify either of these options to increase performance.
+See the documentation of [SparseMatrixColorings.jl](https://github.com/gdalle/SparseMatrixColorings.jl) for details.
@@ -7,7 +7,7 @@ using BenchmarkTools
 using DifferentiationInterface
 import ForwardDiff, Zygote
 using SparseConnectivityTracer: TracerSparsityDetector
-using SparseMatrixColorings: GreedyColoringAlgorithm
+using SparseMatrixColorings
 ```
 
 ## Contexts
@@ -88,13 +88,13 @@ The following are reasonable defaults:
 
 ```@example tuto_advanced
 sparse_first_order_backend = AutoSparse(
-    AutoForwardDiff();
+    dense_first_order_backend;
     sparsity_detector=TracerSparsityDetector(),
     coloring_algorithm=GreedyColoringAlgorithm(),
 )
 
 sparse_second_order_backend = AutoSparse(
-    SecondOrder(AutoForwardDiff(), AutoZygote());
+    dense_second_order_backend;
     sparsity_detector=TracerSparsityDetector(),
     coloring_algorithm=GreedyColoringAlgorithm(),
 )
@@ -116,19 +116,73 @@ hessian(f_sparse_scalar, sparse_second_order_backend, x)
 In the examples above, we didn't use preparation.
 Sparse preparation is more costly than dense preparation, but it is even more essential.
 Indeed, once preparation is done, sparse differentiation is much faster than dense differentiation, because it makes fewer calls to the underlying function.
-The speedup becomes very visible in large dimensions.
+
+Some result analysis functions from [SparseMatrixColorings.jl](https://github.com/gdalle/SparseMatrixColorings.jl) can help you figure out what the preparation contains.
+First, it records the sparsity pattern itself (the one returned by the detector).
+
+```@example tuto_advanced
+jac_prep = prepare_jacobian(f_sparse_vector, sparse_first_order_backend, x)
+sparsity_pattern(jac_prep)
+```
+
+In forward mode, each column of the sparsity pattern gets a color.
+
+```@example tuto_advanced
+column_colors(jac_prep)
+```
+
+And the colors in turn define non-overlapping groups (for Jacobians at least, Hessians are a bit more complicated).
+
+```@example tuto_advanced
+column_groups(jac_prep)
+```
+
+### Sparsity speedup
+
+When preparation is used, the speedup due to sparsity becomes very visible in large dimensions.
 
 ```@example tuto_advanced
-n = 1000
-jac_prep_dense = prepare_jacobian(f_sparse_vector, dense_first_order_backend, zeros(n))
-jac_prep_sparse = prepare_jacobian(f_sparse_vector, sparse_first_order_backend, zeros(n))
+xbig = rand(1000)
 nothing  # hide
 ```
 
 ```@example tuto_advanced
-@benchmark jacobian($f_sparse_vector, $jac_prep_dense, $dense_first_order_backend, $(randn(n)))
+jac_prep_dense = prepare_jacobian(f_sparse_vector, dense_first_order_backend, zero(xbig))
+@benchmark jacobian($f_sparse_vector, $jac_prep_dense, $dense_first_order_backend, $xbig)
 ```
 
 ```@example tuto_advanced
-@benchmark jacobian($f_sparse_vector, $jac_prep_sparse, $sparse_first_order_backend, $(randn(n)))
+jac_prep_sparse = prepare_jacobian(f_sparse_vector, sparse_first_order_backend, zero(xbig))
+@benchmark jacobian($f_sparse_vector, $jac_prep_sparse, $sparse_first_order_backend, $xbig)
+```
+
+Better memory use can be achieved by pre-allocating the matrix from the preparation result (so that it has the correct structure).
+
+```@example tuto_advanced
+jac_buffer = similar(sparsity_pattern(jac_prep_sparse), eltype(xbig))
+@benchmark jacobian!($f_sparse_vector, $jac_buffer, $jac_prep_sparse, $sparse_first_order_backend, $xbig)
+```
+
+And for optimal speed, one should write non-allocating and type-stable functions.
+
+```@example tuto_advanced
+function f_sparse_vector!(y::AbstractVector, x::AbstractVector)
+    n = length(x)
+    for i in eachindex(y)
+        y[i] = abs2(x[i + 1]) - abs2(x[i]) + abs2(x[n - i]) - abs2(x[n - i + 1])
+    end
+    return nothing
+end
+
+ybig = zeros(length(xbig) - 1)
+f_sparse_vector!(ybig, xbig)
+ybig ≈ f_sparse_vector(xbig)
+```
+
+In this case, the sparse Jacobian should also become non-allocating (for our specific choice of backend).
+
+```@example tuto_advanced
+jac_prep_sparse_nonallocating = prepare_jacobian(f_sparse_vector!, zero(ybig), sparse_first_order_backend, zero(xbig))
+jac_buffer = similar(sparsity_pattern(jac_prep_sparse_nonallocating), eltype(xbig))
+@benchmark jacobian!($f_sparse_vector!, $ybig, $jac_buffer, $jac_prep_sparse_nonallocating, $sparse_first_order_backend, $xbig)
 ```
@@ -37,8 +37,10 @@ using SparseMatrixColorings:
     row_colors,
     column_groups,
     row_groups,
+    sparsity_pattern,
     decompress,
     decompress!
+import SparseMatrixColorings as SMC
 
 include("jacobian.jl")
 include("hessian.jl")
 
@@ -33,6 +33,10 @@ function SparseHessianPrep{B}(;
     )
 end
 
+SMC.sparsity_pattern(prep::SparseHessianPrep) = sparsity_pattern(prep.coloring_result)
+SMC.column_colors(prep::SparseHessianPrep) = column_colors(prep.coloring_result)
+SMC.column_groups(prep::SparseHessianPrep) = column_groups(prep.coloring_result)
+
 ## Hessian, one argument
 
 function DI.prepare_hessian(
@@ -68,29 +72,6 @@ function DI.prepare_hessian(
     )
 end
 
-function DI.hessian(
-    f::F, prep::SparseHessianPrep{B}, backend::AutoSparse, x, contexts::Vararg{Context,C}
-) where {F,B,C}
-    @compat (; coloring_result, batched_seeds, hvp_prep) = prep
-    dense_backend = dense_ad(backend)
-    Ng = length(column_groups(coloring_result))
-
-    hvp_prep_same = prepare_hvp_same_point(
-        f, hvp_prep, dense_backend, x, batched_seeds[1], contexts...
-    )
-
-    compressed_blocks = map(eachindex(batched_seeds)) do a
-        dg_batch = hvp(f, hvp_prep_same, dense_backend, x, batched_seeds[a], contexts...)
-        stack(vec, dg_batch; dims=2)
-    end
-
-    compressed_matrix = reduce(hcat, compressed_blocks)
-    if Ng < size(compressed_matrix, 2)
-        compressed_matrix = compressed_matrix[:, 1:Ng]
-    end
-    return decompress(compressed_matrix, coloring_result)
-end
-
 function DI.hessian!(
     f::F,
     hess,
@@ -132,6 +113,13 @@ function DI.hessian!(
     return hess
 end
 
+function DI.hessian(
+    f::F, prep::SparseHessianPrep{B}, backend::AutoSparse, x, contexts::Vararg{Context,C}
+) where {F,B,C}
+    hess = similar(sparsity_pattern(prep), eltype(x))
+    return DI.hessian!(f, hess, prep, backend, x, contexts...)
+end
+
 function DI.value_gradient_and_hessian!(
     f::F,
     grad,
 
@@ -10,6 +10,12 @@ end
 
 abstract type SparseJacobianPrep <: JacobianPrep end
 
+SMC.sparsity_pattern(prep::SparseJacobianPrep) = sparsity_pattern(prep.coloring_result)
+SMC.column_colors(prep::SparseJacobianPrep) = column_colors(prep.coloring_result)
+SMC.column_groups(prep::SparseJacobianPrep) = column_groups(prep.coloring_result)
+SMC.row_colors(prep::SparseJacobianPrep) = row_colors(prep.coloring_result)
+SMC.row_groups(prep::SparseJacobianPrep) = row_groups(prep.coloring_result)
+
 struct PushforwardSparseJacobianPrep{
     B,
     C<:AbstractColoringResult{:nonsymmetric,:column},
@@ -148,18 +154,19 @@ end
 
 ## One argument
 
-function DI.jacobian(
-    f::F, prep::SparseJacobianPrep, backend::AutoSparse, x, contexts::Vararg{Context,C}
-) where {F,C}
-    return _sparse_jacobian_aux((f,), prep, backend, x, contexts...)
-end
-
 function DI.jacobian!(
     f::F, jac, prep::SparseJacobianPrep, backend::AutoSparse, x, contexts::Vararg{Context,C}
 ) where {F,C}
     return _sparse_jacobian_aux!((f,), jac, prep, backend, x, contexts...)
 end
 
+function DI.jacobian(
+    f::F, prep::SparseJacobianPrep, backend::AutoSparse, x, contexts::Vararg{Context,C}
+) where {F,C}
+    jac = similar(sparsity_pattern(prep), eltype(x))
+    return DI.jacobian!(f, jac, prep, backend, x, contexts...)
+end
+
 function DI.value_and_jacobian(
     f::F, prep::SparseJacobianPrep, backend::AutoSparse, x, contexts::Vararg{Context,C}
 ) where {F,C}
@@ -174,12 +181,6 @@ end
 
 ## Two arguments
 
-function DI.jacobian(
-    f!::F, y, prep::SparseJacobianPrep, backend::AutoSparse, x, contexts::Vararg{Context,C}
-) where {F,C}
-    return _sparse_jacobian_aux((f!, y), prep, backend, x, contexts...)
-end
-
 function DI.jacobian!(
     f!::F,
     y,
@@ -192,6 +193,13 @@ function DI.jacobian!(
     return _sparse_jacobian_aux!((f!, y), jac, prep, backend, x, contexts...)
 end
 
+function DI.jacobian(
+    f!::F, y, prep::SparseJacobianPrep, backend::AutoSparse, x, contexts::Vararg{Context,C}
+) where {F,C}
+    jac = similar(sparsity_pattern(prep), promote_type(eltype(x), eltype(y)))
+    return DI.jacobian!(f!, y, jac, prep, backend, x, contexts...)
+end
+
 function DI.value_and_jacobian(
     f!::F, y, prep::SparseJacobianPrep, backend::AutoSparse, x, contexts::Vararg{Context,C}
 ) where {F,C}
@@ -216,74 +224,6 @@ end
 
 ## Common auxiliaries
 
-function _sparse_jacobian_aux(
-    f_or_f!y::FY,
-    prep::PushforwardSparseJacobianPrep{B},
-    backend::AutoSparse,
-    x,
-    contexts::Vararg{Context,C},
-) where {FY,B,C}
-    @compat (; coloring_result, batched_seeds, pushforward_prep) = prep
-    dense_backend = dense_ad(backend)
-    Ng = length(column_groups(coloring_result))
-
-    pushforward_prep_same = prepare_pushforward_same_point(
-        f_or_f!y..., pushforward_prep, dense_backend, x, batched_seeds[1], contexts...
-    )
-
-    compressed_blocks = map(eachindex(batched_seeds)) do a
-        dy_batch = pushforward(
-            f_or_f!y...,
-            pushforward_prep_same,
-            dense_backend,
-            x,
-            batched_seeds[a],
-            contexts...,
-        )
-        stack(vec, dy_batch; dims=2)
-    end
-
-    compressed_matrix = reduce(hcat, compressed_blocks)
-    if Ng < size(compressed_matrix, 2)
-        compressed_matrix = compressed_matrix[:, 1:Ng]
-    end
-    return decompress(compressed_matrix, coloring_result)
-end
-
-function _sparse_jacobian_aux(
-    f_or_f!y::FY,
-    prep::PullbackSparseJacobianPrep{B},
-    backend::AutoSparse,
-    x,
-    contexts::Vararg{Context,C},
-) where {FY,B,C}
-    @compat (; coloring_result, batched_seeds, pullback_prep) = prep
-    dense_backend = dense_ad(backend)
-    Ng = length(row_groups(coloring_result))
-
-    pullback_prep_same = prepare_pullback_same_point(
-        f_or_f!y..., pullback_prep, dense_backend, x, batched_seeds[1], contexts...
-    )
-
-    compressed_blocks = map(eachindex(batched_seeds)) do a
-        dx_batch = pullback(
-            f_or_f!y...,
-            pullback_prep_same,
-            dense_backend,
-            x,
-            batched_seeds[a],
-            contexts...,
-        )
-        stack(vec, dx_batch; dims=1)
-    end
-
-    compressed_matrix = reduce(vcat, compressed_blocks)
-    if Ng < size(compressed_matrix, 1)
-        compressed_matrix = compressed_matrix[1:Ng, :]
-    end
-    return decompress(compressed_matrix, coloring_result)
-end
-
 function _sparse_jacobian_aux!(
     f_or_f!y::FY,
     jac,