SciML
diff --git a/‎docs/Project.toml‎
Lines changed: 5 additions & 1 deletion b/‎docs/Project.toml‎
Lines changed: 5 additions & 1 deletion
diff --git a/‎docs/src/Benchmark.md‎
Lines changed: 5 additions & 2 deletions b/‎docs/src/Benchmark.md‎
Lines changed: 5 additions & 2 deletions
diff --git a/‎docs/src/examples/dde/delay_diffeq.md‎
Lines changed: 10 additions & 5 deletions b/‎docs/src/examples/dde/delay_diffeq.md‎
Lines changed: 10 additions & 5 deletions
diff --git a/‎docs/src/examples/hybrid_jump/bouncing_ball.md‎
Lines changed: 7 additions & 2 deletions b/‎docs/src/examples/hybrid_jump/bouncing_ball.md‎
Lines changed: 7 additions & 2 deletions
diff --git a/‎docs/src/examples/hybrid_jump/hybrid_diffeq.md‎
Lines changed: 6 additions & 4 deletions b/‎docs/src/examples/hybrid_jump/hybrid_diffeq.md‎
Lines changed: 6 additions & 4 deletions
diff --git a/‎docs/src/examples/neural_ode/simplechains.md‎
Lines changed: 1 addition & 0 deletions b/‎docs/src/examples/neural_ode/simplechains.md‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎docs/src/examples/ode/exogenous_input.md‎
Lines changed: 2 additions & 1 deletion b/‎docs/src/examples/ode/exogenous_input.md‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎docs/src/examples/ode/second_order_adjoints.md‎
Lines changed: 8 additions & 5 deletions b/‎docs/src/examples/ode/second_order_adjoints.md‎
Lines changed: 8 additions & 5 deletions
diff --git a/‎docs/src/examples/ode/second_order_neural.md‎
Lines changed: 2 additions & 1 deletion b/‎docs/src/examples/ode/second_order_neural.md‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎docs/src/examples/optimal_control/feedback_control.md‎
Lines changed: 1 addition & 0 deletions b/‎docs/src/examples/optimal_control/feedback_control.md‎
Lines changed: 1 addition & 0 deletions
@@ -7,6 +7,7 @@ DelimitedFiles = "8bb1440f-4735-579b-a4ab-409b98df4dab"
 DiffEqCallbacks = "459566f4-90b8-5000-8ac3-15dfb0a30def"
 DiffEqNoiseProcess = "77a26b50-5914-5dd7-bc55-306e6241c503"
 DifferentialEquations = "0c46a032-eb83-5123-abaf-570d42b7fbaa"
+DifferentiationInterface = "a0c0ee7d-e4b9-4e03-894e-1c5f64a51d63"
 Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
 Enzyme = "7da242da-08ed-463a-9acd-ee780be4f1d9"
 Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c"
@@ -15,6 +16,7 @@ IterTools = "c8e1da08-722c-5040-9ed9-7db0dc04731e"
 Lux = "b2108857-7c20-44ae-9111-449ecde12c47"
 LuxCUDA = "d0bbae9a-e099-4d5b-a835-1c6931763bda"
 MLUtils = "f1d291b0-491e-4a28-83b9-f70985020b54"
+Mooncake = "da2b9cff-9c12-43a0-ae48-6db2b0edb7d6"
 Optimization = "7f7a1694-90dd-40f0-9382-eb1efda571ba"
 OptimizationOptimJL = "36348300-93cb-4f02-beb5-3c3902f8871e"
 OptimizationOptimisers = "42dfb2eb-d2b4-4451-abcd-913932933ac1"
@@ -35,13 +37,14 @@ Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
 
 [compat]
 Calculus = "0.5"
-ComponentArrays = "0.15"
+ComponentArrays = "0.15.34"
 DataInterpolations = "3.10, 4, 5, 6, 7, 8"
 DelayDiffEq = "5"
 DelimitedFiles = "1"
 DiffEqCallbacks = "2.24, 3, 4"
 DiffEqNoiseProcess = "5.14"
 DifferentialEquations = "7"
+DifferentiationInterface = "0.6, 0.7"
 Documenter = "1"
 Enzyme = "0.12, 0.13"
 Flux = "0.14, 0.15, 0.16"
@@ -50,6 +53,7 @@ IterTools = "1"
 Lux = "1"
 LuxCUDA = "0.3"
 MLUtils = "0.4"
+Mooncake = "0.5"
 Optimization = "3.9, 4, 5"
 OptimizationOptimJL = "0.2, 0.3, 0.4"
 OptimizationOptimisers = "0.2, 0.3"
 
@@ -40,7 +40,8 @@ Quick summary:
 import OrdinaryDiffEq as ODE
 import Lux
 import SciMLSensitivity as SMS
-import Zygote
+import Mooncake
+import DifferentiationInterface as DI
 import BenchmarkTools
 import Random
 import ComponentArrays as CA
@@ -80,7 +81,9 @@ for sensealg in (SMS.InterpolatingAdjoint(autojacvec = SMS.ZygoteVJP()),
         return loss
     end
 
-    t = BenchmarkTools.@belapsed Zygote.gradient($loss_neuralode, $u0, $ps, $st)
+    backend = DI.AutoMooncake(; config = Mooncake.Config(; friendly_tangents = true))
+    loss_ps = p -> loss_neuralode(u0, p, st)
+    t = BenchmarkTools.@belapsed DI.gradient($loss_ps, $backend, $ps)
     println("$(sensealg) took $(t)s")
 end
 
 
@@ -10,6 +10,7 @@ import Optimization as OPT
 import SciMLSensitivity as SMS
 import OptimizationPolyalgorithms as OPA
 import DelayDiffEq as DDE
+import Mooncake
 
 # Define the same LV equation, but including a delay parameter
 function delay_lotka_volterra!(du, u, h, p, t)
@@ -35,7 +36,7 @@ prob_dde = DDE.DDEProblem(delay_lotka_volterra!, u0, h, (0.0, 10.0),
 
 function predict_dde(p)
     return Array(ODE.solve(prob_dde, DDE.MethodOfSteps(ODE.Tsit5());
-        u0, p, saveat = 0.1, sensealg = SMS.ReverseDiffAdjoint()))
+        u0, p, saveat = 0.1))
 end
 
 loss_dde(p) = sum(abs2, x - 1 for x in predict_dde(p))
@@ -50,14 +51,18 @@ callback = function (state, l; doplot = false)
     return false
 end
 
-adtype = OPT.AutoZygote()
+adtype = OPT.AutoMooncake(; config = Mooncake.Config(; friendly_tangents = true))
 optf = OPT.OptimizationFunction((x, p) -> loss_dde(x), adtype)
 optprob = OPT.OptimizationProblem(optf, p)
 result_dde = OPT.solve(optprob, OPA.PolyOpt(); maxiters = 300, callback)
 ```
 
-Notice that we chose `sensealg = ReverseDiffAdjoint()` to utilize the ReverseDiff.jl
-reverse-mode to handle the delay differential equation.
+The `sensealg` is left at its default. For DDEs the automatic choice is
+[`ForwardDiffSensitivity`](@ref) (which differentiates through
+`MethodOfSteps` via dual numbers) for problems with fewer than 100
+parameters, and [`ReverseDiffAdjoint`](@ref) for larger ones —
+[continuous adjoints](@ref sensitivity_diffeq) are not yet defined for
+DDEs, so the discretize-then-optimize methods are the only option.
 
 We define a callback to display the solution at the current parameters for each step of the training:
 
@@ -76,7 +81,7 @@ end
 We use `Optimization.solve` to optimize the parameters for our loss function:
 
 ```@example dde
-adtype = OPT.AutoZygote()
+adtype = OPT.AutoMooncake(; config = Mooncake.Config(; friendly_tangents = true))
 optf = OPT.OptimizationFunction((x, p) -> loss_dde(x), adtype)
 optprob = OPT.OptimizationProblem(optf, p)
 result_dde = OPT.solve(optprob, OPA.PolyOpt(); callback)
 
@@ -13,6 +13,7 @@ import OptimizationPolyalgorithms as OPA
 import SciMLSensitivity as SMS
 import OrdinaryDiffEq as ODE
 import DiffEqCallbacks as DEC
+import Mooncake
 
 function f(du, u, p, t)
     du[1] = u[2]
@@ -44,11 +45,15 @@ the value 20:
 function loss(θ)
     sol = ODE.solve(prob, ODE.Tsit5(), p = [9.8, θ[1]]; callback)
     target = 20.0
-    abs2(sol[end][1] - target)
+    # Use `last(sol.u)[1]` instead of `sol[end][1]` — Mooncake's pullback for
+    # `getindex(::ODESolution, end)` currently has a `BoundsError` bug
+    # (`SciMLBaseMooncakeExt._scatter_pullback`).  Indexing the underlying
+    # `sol.u::Vector{Vector{Float64}}` directly avoids the bad path.
+    abs2(last(sol.u)[1] - target)
 end
 
 loss([0.8])
-adtype = OPT.AutoZygote()
+adtype = OPT.AutoMooncake(; config = Mooncake.Config(; friendly_tangents = true))
 optf = OPT.OptimizationFunction((x, p) -> loss(x), adtype)
 optprob = OPT.OptimizationProblem(optf, [0.8])
 @time res = OPT.solve(optprob, OPA.PolyOpt(), maxiters = 300)
 
@@ -12,6 +12,7 @@ import ComponentArrays as CA
 import Random
 import SciMLSensitivity as SMS
 import Lux
+import Mooncake
 import OrdinaryDiffEq as ODE
 import Plots
 import Optimization as OPT
@@ -50,9 +51,7 @@ cb = DEC.PresetTimeCallback(dosetimes, affect!, save_positions = (false, false))
 
 function predict_n_ode(p)
     _prob = ODE.remake(prob; p)
-    Array(ODE.solve(_prob, ODE.Tsit5(); u0 = z0, p, callback = cb, saveat = t,
-        sensealg = SMS.ReverseDiffAdjoint()))[1:2, :]
-    #Array(solve(prob,Tsit5();u0=z0,p,saveat=t))[1:2,:]
+    Array(ODE.solve(_prob, ODE.Tsit5(); u0 = z0, p, callback = cb, saveat = t))[1:2, :]
 end
 
 function loss_n_ode(p, _)
@@ -73,7 +72,10 @@ cba = function (state, l; doplot = false) #callback function to observe training
 end
 
 res = OPT.solve(
-    OPT.OptimizationProblem(OPT.OptimizationFunction(loss_n_ode, OPT.AutoZygote()),
+    OPT.OptimizationProblem(
+        OPT.OptimizationFunction(
+            loss_n_ode, OPT.AutoMooncake(; config = Mooncake.Config(; friendly_tangents = true))
+        ),
         CA.ComponentArray(ps)),
     OPO.Adam(0.05); callback = cba, maxiters = 1000)
 ```
 
@@ -1,5 +1,6 @@
 # Faster Neural Ordinary Differential Equations with SimpleChains
 
+
 [SimpleChains](https://github.com/PumasAI/SimpleChains.jl) has demonstrated performance boosts of ~5x and ~30x when compared to other mainstream deep learning frameworks like Pytorch for the training and evaluation in the specific case of small neural networks. For the nitty-gritty details, as well as, some SciML related videos around the need and applications of such a library, we can refer to this [blogpost](https://julialang.org/blog/2022/04/simple-chains/). As for doing Scientific Machine Learning, how do we even begin with training neural ODEs with any generic deep learning library?
 
 ## Training Data
 
@@ -49,6 +49,7 @@ import OptimizationPolyalgorithms as OPA
 import OptimizationOptimisers as OPO
 import Plots
 import Random
+import Mooncake
 
 rng = Random.default_rng()
 tspan = (0.1, 10.0)
@@ -93,7 +94,7 @@ function loss(p)
     return sum(abs2.(y[1:N] .- sol')) / N
 end
 
-adtype = OPT.AutoZygote()
+adtype = OPT.AutoMooncake(; config = Mooncake.Config(; friendly_tangents = true))
 optf = OPT.OptimizationFunction((x, p) -> loss(x), adtype)
 optprob = OPT.OptimizationProblem(optf, CA.ComponentArray{Float64}(p_model))
 
 
@@ -13,6 +13,7 @@ optimization, while `KrylovTrustRegion` will utilize a Krylov-based method
 with Hessian-vector products (never forming the Hessian) for large parameter
 optimizations.
 
+
 ```@example secondorderadjoints
 import SciMLSensitivity as SMS
 import Lux
@@ -23,6 +24,7 @@ import OrdinaryDiffEq as ODE
 import Plots
 import Random
 import OptimizationOptimJL as OOJ
+import Mooncake
 
 u0 = Float32[2.0; 0.0]
 datasize = 30
@@ -83,13 +85,14 @@ callback = function (state, l; doplot = false)
     return l < 0.01
 end
 
-adtype = OPT.AutoZygote()
-optf = OPT.OptimizationFunction((x, p) -> loss_neuralode(x), adtype)
-
-optprob1 = OPT.OptimizationProblem(optf, ps)
+adtype1 = OPT.AutoMooncake(; config = Mooncake.Config(; friendly_tangents = true))
+optf1 = OPT.OptimizationFunction((x, p) -> loss_neuralode(x), adtype1)
+optprob1 = OPT.OptimizationProblem(optf1, ps)
 pstart = OPT.solve(optprob1, OPO.Adam(0.01); callback, maxiters = 100).u
 
-optprob2 = OPT.OptimizationProblem(optf, pstart)
+adtype2 = OPT.AutoZygote()
+optf2 = OPT.OptimizationFunction((x, p) -> loss_neuralode(x), adtype2)
+optprob2 = OPT.OptimizationProblem(optf2, pstart)
 pmin = OPT.solve(optprob2, OOJ.NewtonTrustRegion(); callback, maxiters = 200)
 ```
 
 
@@ -29,6 +29,7 @@ import OptimizationOptimisers as OPO
 import RecursiveArrayTools
 import Random
 import ComponentArrays as CA
+import Mooncake
 
 u0 = Float32[0.0; 2.0]
 du0 = Float32[0.0; 0.0]
@@ -61,7 +62,7 @@ callback = function (state, l)
     l < 0.01
 end
 
-adtype = OPT.AutoZygote()
+adtype = OPT.AutoMooncake(; config = Mooncake.Config(; friendly_tangents = true))
 optf = OPT.OptimizationFunction((x, p) -> loss_n_ode(x), adtype)
 optprob = OPT.OptimizationProblem(optf, ps)
 
 
@@ -4,6 +4,7 @@ You can also mix a known differential equation and a neural differential
 equation, so that the parameters and the neural network are estimated
 simultaneously!
 
+
 We will assume that we know the dynamics of the second equation
 (linear dynamics), and our goal is to find a neural network that is dependent
 on the current state of the dynamical system that will control the second