The Cache in DifferentiationInterface is really a great feature for in-place mutating arrays inside some preallocated cache stuff, but I found it's slow and allocates a lot when trying the Enzyme backend. I'm not sure if I am using it correctly; here is a simple MWE:
using ConcreteStructs
using DifferentiationInterface
using Enzyme
using FiniteDiff
const DI = DifferentiationInterface
using BenchmarkTools
@concrete struct TESTCache
res
u
end
# helper function to transform between vector of arrays and flat vector
@views function recursive_unflatten!(y::Vector{<:AbstractArray}, x::AbstractVector)
i = 0
for yᵢ in y
copyto!(yᵢ, x[(i + 1):(i + length(yᵢ))])
i += length(yᵢ)
end
return y
end
@views function recursive_flatten!(y::AbstractVector, x::Vector{<:AbstractArray})
i = 0
for xᵢ in x
copyto!(y[(i + 1):(i + length(xᵢ))], xᵢ)
i += length(xᵢ)
end
return y
end
recursive_length(x::Vector{<:AbstractArray}) = sum(length, x)
@views function bc(residual, u, p, t)
residual[1] = u[1] .+ pi / 2
residual[2] = u[end] .- pi / 2
end
@views function test(du, u, y, residual)
y_ = recursive_unflatten!(y, u)
bc(residual, y_, 0, 0)
recursive_flatten!(du, residual)
end
function f(du, u, c::TESTCache)
test(du, u, c.u, c.res)
end
x = rand(100)
y = similar(x)
test_cache = TESTCache([rand(2) for _ in 1:50], [rand(2) for _ in 1:50])
backend_finitediff = AutoFiniteDiff()
backend_enzyme = AutoEnzyme(; mode = Enzyme.Forward)
prep_finitediff = DI.prepare_jacobian(f, y, backend_finitediff, x, Cache(test_cache))
@benchmark DI.jacobian(f, y, prep_finitediff, backend_finitediff, x, Cache(test_cache))
prep_enzyme = DI.prepare_jacobian(f, y, backend_enzyme, x, Cache(test_cache))
@benchmark DI.jacobian(f, y, prep_enzyme, backend_enzyme, x, Cache(test_cache))
FiniteDiff
BenchmarkTools.Trial: 10000 samples with 1 evaluation per sample.
Range (min … max): 45.334 μs … 5.164 ms ┊ GC (min … max): 0.00% … 98.88%
Time (median): 52.333 μs ┊ GC (median): 0.00%
Time (mean ± σ): 56.231 μs ± 105.995 μs ┊ GC (mean ± σ): 6.85% ± 4.87%
▅▃▁ ▄▇▄▂▄▇█▇▅▄▂▂▂▂▁▁▁ ▂
██████████████████████▇▇▆▆▆▆▅▅▄▁▄▅▄▄▃▄▁▄▁▃▄▃▄▁▄▄▁▃▅▄▄▃▁▄▄▄▄▄ █
45.3 μs Histogram: log(frequency) by time 87 μs <
Memory estimate: 94.11 KiB, allocs estimate: 411.
Enzyme
BenchmarkTools.Trial: 5592 samples with 1 evaluation per sample.
Range (min … max): 584.958 μs … 51.009 ms ┊ GC (min … max): 0.00% … 97.69%
Time (median): 665.562 μs ┊ GC (median): 0.00%
Time (mean ± σ): 890.874 μs ± 1.279 ms ┊ GC (mean ± σ): 14.73% ± 16.62%
▆█▆▃▃▁ ▁
███████▇▆▆▄▄▂▄▄▄▄▅▄▃▄▄▄▄▄▄▃▂▃▆▆▆▇▇██▇▇▆▇▆▆▅▆▆▄▆▅▅▄▂▄▄▃▄▄▄▃▃▃ █
585 μs Histogram: log(frequency) by time 3.74 ms <
Memory estimate: 2.24 MiB, allocs estimate: 23486.
The
Cachein DifferentiationInterface is really a great feature for in-place mutating arrays inside some preallocated cache stuff, but I found it's slow and allocates a lot when trying the Enzyme backend. I'm not sure if I am using it correctly; here is a simple MWE:FiniteDiff
Enzyme