diff --git a/.JuliaFormatter.toml b/.JuliaFormatter.toml
index cc91afe1d..2989e64db 100644
--- a/.JuliaFormatter.toml
+++ b/.JuliaFormatter.toml
@@ -1,6 +1,3 @@
 style = "blue"
-align_assignment = true
-align_struct_field = true
-align_conditional = true
-align_pair_arrow = true
-align_matrix = true
+format_docstrings = true
+format_markdown = true
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index bfa4f3a3e..c3b01a95e 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -2,8 +2,6 @@ repos:
 -   repo: https://github.com/pre-commit/pre-commit-hooks
     rev: v3.2.0
     hooks:
-    -   id: trailing-whitespace
-    -   id: end-of-file-fixer
     -   id: check-yaml
     -   id: check-added-large-files
     -   id: check-merge-conflict
diff --git a/DifferentiationInterface/CHANGELOG.md b/DifferentiationInterface/CHANGELOG.md
index a40a9a1fa..7d097a81e 100644
--- a/DifferentiationInterface/CHANGELOG.md
+++ b/DifferentiationInterface/CHANGELOG.md
@@ -5,92 +5,67 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
-## [Unreleased]
+## [Unreleased](https://github.com/JuliaDiff/DifferentiationInterface.jl/compare/DifferentiationInterface-v0.7.5...main)
 
-## [0.7.5]
+## [0.7.5](https://github.com/JuliaDiff/DifferentiationInterface.jl/compare/DifferentiationInterface-v0.7.4...DifferentiationInterface-v0.7.5)
 
 ### Added
 
-- Support forward-mode Mooncake with `AutoMooncakeForward` ([#813])
+  - Support forward-mode Mooncake with `AutoMooncakeForward` ([#813](https://github.com/JuliaDiff/DifferentiationInterface.jl/pull/813))
 
-## [0.7.4]
+## [0.7.4](https://github.com/JuliaDiff/DifferentiationInterface.jl/compare/DifferentiationInterface-v0.7.3...DifferentiationInterface-v0.7.4)
 
 ### Added
 
-- Make `AutoForwardFromPrimitive` and `AutoReverseFromPrimitive` public ([#825])
+  - Make `AutoForwardFromPrimitive` and `AutoReverseFromPrimitive` public ([#825](https://github.com/JuliaDiff/DifferentiationInterface.jl/pull/825))
 
 ### Fixed
 
-- Replace `one` with `oneunit` in basis computation ([#826])
+  - Replace `one` with `oneunit` in basis computation ([#826](https://github.com/JuliaDiff/DifferentiationInterface.jl/pull/826))
 
-## [0.7.3]
+## [0.7.3](https://github.com/JuliaDiff/DifferentiationInterface.jl/compare/DifferentiationInterface-v0.7.2...DifferentiationInterface-v0.7.3)
 
 ### Fixed
 
-- Bump compat for SparseConnectivityTracer v1 ([#823])
+  - Bump compat for SparseConnectivityTracer v1 ([#823](https://github.com/JuliaDiff/DifferentiationInterface.jl/pull/823))
 
-## [0.7.2]
+## [0.7.2](https://github.com/JuliaDiff/DifferentiationInterface.jl/compare/DifferentiationInterface-v0.7.1...DifferentiationInterface-v0.7.2)
 
 ### Feat
 
-- Backend switching for Mooncake ([#768])
+  - Backend switching for Mooncake ([#768](https://github.com/JuliaDiff/DifferentiationInterface.jl/pull/768))
 
 ### Fixed
 
-- Speed up sparse preparation for GPU arrays ([#818])
+  - Speed up sparse preparation for GPU arrays ([#818](https://github.com/JuliaDiff/DifferentiationInterface.jl/pull/818))
 
-## [0.7.1]
+## [0.7.1](https://github.com/JuliaDiff/DifferentiationInterface.jl/compare/DifferentiationInterface-v0.7.0...DifferentiationInterface-v0.7.1)
 
 ### Feat
 
-- Use Mooncake's internal copy utilities ([#809])
+  - Use Mooncake's internal copy utilities ([#809](https://github.com/JuliaDiff/DifferentiationInterface.jl/pull/809))
 
 ### Fixed
 
-- Take `absstep` into account for FiniteDiff ([#812])
-- Make basis work for `CuArray` ([#810])
+  - Take `absstep` into account for FiniteDiff ([#812](https://github.com/JuliaDiff/DifferentiationInterface.jl/pull/812))
+  - Make basis work for `CuArray` ([#810](https://github.com/JuliaDiff/DifferentiationInterface.jl/pull/810))
 
-## [0.7.0]
+## [0.7.0](https://github.com/JuliaDiff/DifferentiationInterface.jl/compare/DifferentiationInterface-v0.6.54...DifferentiationInterface-v0.7.0)
 
 ### Changed
 
-- Preparation is now strict by default ([#799])
-- New Arxiv preprint for citation ([#795])
+  - Preparation is now strict by default ([#799](https://github.com/JuliaDiff/DifferentiationInterface.jl/pull/799))
+  - New Arxiv preprint for citation ([#795](https://github.com/JuliaDiff/DifferentiationInterface.jl/pull/795))
 
-## [0.6.54] - 2025-05-11
+## [0.6.54](https://github.com/JuliaDiff/DifferentiationInterface.jl/compare/DifferentiationInterface-v0.6.53...DifferentiationInterface-v0.6.54) - 2025-05-11
 
 ### Added
 
-- Dependency compat bounds for extras ([#790])
-- Error hints for Enzyme ([#788])
+  - Dependency compat bounds for extras ([#790](https://github.com/JuliaDiff/DifferentiationInterface.jl/pull/790))
+  - Error hints for Enzyme ([#788](https://github.com/JuliaDiff/DifferentiationInterface.jl/pull/788))
 
-## [0.6.53] - 2025-05-07
+## [0.6.53](https://github.com/JuliaDiff/DifferentiationInterface.jl/compare/DifferentiationInterface-v0.6.52...DifferentiationInterface-v0.6.53) - 2025-05-07
 
 ### Changed
 
-- Allocate Enzyme shadow memory during preparation ([#782])
-
-[unreleased]: https://github.com/JuliaDiff/DifferentiationInterface.jl/compare/DifferentiationInterface-v0.7.5...main
-[0.7.5]: https://github.com/JuliaDiff/DifferentiationInterface.jl/compare/DifferentiationInterface-v0.7.4...DifferentiationInterface-v0.7.5
-[0.7.4]: https://github.com/JuliaDiff/DifferentiationInterface.jl/compare/DifferentiationInterface-v0.7.3...DifferentiationInterface-v0.7.4
-[0.7.3]: https://github.com/JuliaDiff/DifferentiationInterface.jl/compare/DifferentiationInterface-v0.7.2...DifferentiationInterface-v0.7.3
-[0.7.2]: https://github.com/JuliaDiff/DifferentiationInterface.jl/compare/DifferentiationInterface-v0.7.1...DifferentiationInterface-v0.7.2
-[0.7.1]: https://github.com/JuliaDiff/DifferentiationInterface.jl/compare/DifferentiationInterface-v0.7.0...DifferentiationInterface-v0.7.1
-[0.7.0]: https://github.com/JuliaDiff/DifferentiationInterface.jl/compare/DifferentiationInterface-v0.6.54...DifferentiationInterface-v0.7.0
-[0.6.54]: https://github.com/JuliaDiff/DifferentiationInterface.jl/compare/DifferentiationInterface-v0.6.53...DifferentiationInterface-v0.6.54
-[0.6.53]: https://github.com/JuliaDiff/DifferentiationInterface.jl/compare/DifferentiationInterface-v0.6.52...DifferentiationInterface-v0.6.53
-
-[#826]: https://github.com/JuliaDiff/DifferentiationInterface.jl/pull/826
-[#825]: https://github.com/JuliaDiff/DifferentiationInterface.jl/pull/825
-[#823]: https://github.com/JuliaDiff/DifferentiationInterface.jl/pull/823
-[#818]: https://github.com/JuliaDiff/DifferentiationInterface.jl/pull/818
-[#813]: https://github.com/JuliaDiff/DifferentiationInterface.jl/pull/813
-[#812]: https://github.com/JuliaDiff/DifferentiationInterface.jl/pull/812
-[#810]: https://github.com/JuliaDiff/DifferentiationInterface.jl/pull/810
-[#809]: https://github.com/JuliaDiff/DifferentiationInterface.jl/pull/809
-[#799]: https://github.com/JuliaDiff/DifferentiationInterface.jl/pull/799
-[#795]: https://github.com/JuliaDiff/DifferentiationInterface.jl/pull/795
-[#790]: https://github.com/JuliaDiff/DifferentiationInterface.jl/pull/790
-[#788]: https://github.com/JuliaDiff/DifferentiationInterface.jl/pull/788
-[#782]: https://github.com/JuliaDiff/DifferentiationInterface.jl/pull/782
-[#768]: https://github.com/JuliaDiff/DifferentiationInterface.jl/pull/768
+  - Allocate Enzyme shadow memory during preparation ([#782](https://github.com/JuliaDiff/DifferentiationInterface.jl/pull/782))
diff --git a/DifferentiationInterface/README.md b/DifferentiationInterface/README.md
index a82202fe5..a1c7dd5ed 100644
--- a/DifferentiationInterface/README.md
+++ b/DifferentiationInterface/README.md
@@ -5,12 +5,12 @@
 [![Build Status](https://github.com/JuliaDiff/DifferentiationInterface.jl/actions/workflows/Test.yml/badge.svg?branch=main)](https://github.com/JuliaDiff/DifferentiationInterface.jl/actions/workflows/Test.yml?query=branch%3Amain)
 [![Coverage](https://codecov.io/gh/JuliaDiff/DifferentiationInterface.jl/branch/main/graph/badge.svg?flag=DI)](https://app.codecov.io/gh/JuliaDiff/DifferentiationInterface.jl)
 [![Code Style: Blue](https://img.shields.io/badge/code%20style-blue-4495d1.svg)](https://github.com/JuliaDiff/BlueStyle)
-[![ColPrac: Contributor's Guide on Collaborative Practices for Community Packages](https://img.shields.io/badge/ColPrac-Contributor's%20Guide-blueviolet)](https://github.com/SciML/ColPrac)
+[![ColPrac: Contributor's Guide on Collaborative Practices for Community Packages](https://img.shields.io/badge/ColPrac-Contributor%27s%20Guide-blueviolet)](https://github.com/SciML/ColPrac)
 [![DOI](https://zenodo.org/badge/740973714.svg)](https://zenodo.org/doi/10.5281/zenodo.11092033)
 
-|           Package            |                                                                                                                                                    Docs                                                                                                                                                    |
-| :--------------------------: | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
-|   DifferentiationInterface   |   [![Stable](https://img.shields.io/badge/docs-stable-blue.svg)](https://juliadiff.org/DifferentiationInterface.jl/DifferentiationInterface/stable/)     [![Dev](https://img.shields.io/badge/docs-dev-blue.svg)](https://juliadiff.org/DifferentiationInterface.jl/DifferentiationInterface/dev/)   |
+| Package                      | Docs                                                                                                                                                                                                                                                                                                 |
+|:----------------------------:|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------:|
+| DifferentiationInterface     | [![Stable](https://img.shields.io/badge/docs-stable-blue.svg)](https://juliadiff.org/DifferentiationInterface.jl/DifferentiationInterface/stable/)     [![Dev](https://img.shields.io/badge/docs-dev-blue.svg)](https://juliadiff.org/DifferentiationInterface.jl/DifferentiationInterface/dev/)     |
 | DifferentiationInterfaceTest | [![Stable](https://img.shields.io/badge/docs-stable-blue.svg)](https://juliadiff.org/DifferentiationInterface.jl/DifferentiationInterfaceTest/stable/) [![Dev](https://img.shields.io/badge/docs-dev-blue.svg)](https://juliadiff.org/DifferentiationInterface.jl/DifferentiationInterfaceTest/dev/) |
 
 An interface to various automatic differentiation (AD) backends in Julia.
@@ -19,31 +19,31 @@ An interface to various automatic differentiation (AD) backends in Julia.
 
 This package provides a unified syntax to differentiate functions, including:
 
-- First- and second-order operators (gradients, Jacobians, Hessians and more)
-- In-place and out-of-place differentiation
-- Preparation mechanism (e.g. to pre-allocate a cache or record a tape)
-- Built-in sparsity handling
-- Thorough validation on standard inputs and outputs (numbers, vectors, matrices)
-- Testing and benchmarking utilities accessible to users with [DifferentiationInterfaceTest](https://github.com/JuliaDiff/DifferentiationInterface.jl/tree/main/DifferentiationInterfaceTest)
+  - First- and second-order operators (gradients, Jacobians, Hessians and more)
+  - In-place and out-of-place differentiation
+  - Preparation mechanism (e.g. to pre-allocate a cache or record a tape)
+  - Built-in sparsity handling
+  - Thorough validation on standard inputs and outputs (numbers, vectors, matrices)
+  - Testing and benchmarking utilities accessible to users with [DifferentiationInterfaceTest](https://github.com/JuliaDiff/DifferentiationInterface.jl/tree/main/DifferentiationInterfaceTest)
 
 ## Compatibility
 
 We support the following backends defined by [ADTypes.jl](https://github.com/SciML/ADTypes.jl):
 
-- [ChainRulesCore.jl](https://github.com/JuliaDiff/ChainRulesCore.jl)
-- [Diffractor.jl](https://github.com/JuliaDiff/Diffractor.jl) (currently broken)
-- [Enzyme.jl](https://github.com/EnzymeAD/Enzyme.jl) (see below)
-- [FastDifferentiation.jl](https://github.com/brianguenter/FastDifferentiation.jl)
-- [FiniteDiff.jl](https://github.com/JuliaDiff/FiniteDiff.jl)
-- [FiniteDifferences.jl](https://github.com/JuliaDiff/FiniteDifferences.jl)
-- [ForwardDiff.jl](https://github.com/JuliaDiff/ForwardDiff.jl)
-- [GTPSA.jl](https://github.com/bmad-sim/GTPSA.jl)
-- [Mooncake.jl](https://github.com/chalk-lab/Mooncake.jl)
-- [PolyesterForwardDiff.jl](https://github.com/JuliaDiff/PolyesterForwardDiff.jl)
-- [ReverseDiff.jl](https://github.com/JuliaDiff/ReverseDiff.jl)
-- [Symbolics.jl](https://github.com/JuliaSymbolics/Symbolics.jl)
-- [Tracker.jl](https://github.com/FluxML/Tracker.jl)
-- [Zygote.jl](https://github.com/FluxML/Zygote.jl)
+  - [ChainRulesCore.jl](https://github.com/JuliaDiff/ChainRulesCore.jl)
+  - [Diffractor.jl](https://github.com/JuliaDiff/Diffractor.jl) (currently broken)
+  - [Enzyme.jl](https://github.com/EnzymeAD/Enzyme.jl) (see below)
+  - [FastDifferentiation.jl](https://github.com/brianguenter/FastDifferentiation.jl)
+  - [FiniteDiff.jl](https://github.com/JuliaDiff/FiniteDiff.jl)
+  - [FiniteDifferences.jl](https://github.com/JuliaDiff/FiniteDifferences.jl)
+  - [ForwardDiff.jl](https://github.com/JuliaDiff/ForwardDiff.jl)
+  - [GTPSA.jl](https://github.com/bmad-sim/GTPSA.jl)
+  - [Mooncake.jl](https://github.com/chalk-lab/Mooncake.jl)
+  - [PolyesterForwardDiff.jl](https://github.com/JuliaDiff/PolyesterForwardDiff.jl)
+  - [ReverseDiff.jl](https://github.com/JuliaDiff/ReverseDiff.jl)
+  - [Symbolics.jl](https://github.com/JuliaSymbolics/Symbolics.jl)
+  - [Tracker.jl](https://github.com/FluxML/Tracker.jl)
+  - [Zygote.jl](https://github.com/FluxML/Zygote.jl)
 
 > [!CAUTION]
 > Note that in some cases, going through DifferentiationInterface.jl might be slower or cause more errors than a direct call to the backend's API. This is especially true for Enzyme.jl, whose handling of activities and multiple arguments is not fully supported here. We are working on this challenge, and welcome any suggestions or contributions. Meanwhile, if differentiation fails or takes too long, consider using Enzyme.jl through its [native API](https://enzymead.github.io/Enzyme.jl/stable/) instead.
@@ -63,9 +63,9 @@ To install the development version, run this instead:
 ```julia
 using Pkg
 
-Pkg.add(
+Pkg.add(;
     url="https://github.com/JuliaDiff/DifferentiationInterface.jl",
-    subdir="DifferentiationInterface"
+    subdir="DifferentiationInterface",
 )
 ```
 
@@ -73,15 +73,17 @@ Pkg.add(
 
 ```julia
 using DifferentiationInterface
-import ForwardDiff, Enzyme, Zygote  # AD backends you want to use
+using ForwardDiff: ForwardDiff
+using Enzyme: Enzyme
+using Zygote: Zygote  # AD backends you want to use
 
 f(x) = sum(abs2, x)
 
 x = [1.0, 2.0]
 
 value_and_gradient(f, AutoForwardDiff(), x) # returns (5.0, [2.0, 4.0]) with ForwardDiff.jl
-value_and_gradient(f, AutoEnzyme(),      x) # returns (5.0, [2.0, 4.0]) with Enzyme.jl
-value_and_gradient(f, AutoZygote(),      x) # returns (5.0, [2.0, 4.0]) with Zygote.jl
+value_and_gradient(f, AutoEnzyme(), x) # returns (5.0, [2.0, 4.0]) with Enzyme.jl
+value_and_gradient(f, AutoZygote(), x) # returns (5.0, [2.0, 4.0]) with Zygote.jl
 ```
 
 To improve your performance by up to several orders of magnitude compared to this example, take a look at the tutorial and its section on operator preparation.
@@ -90,8 +92,8 @@ To improve your performance by up to several orders of magnitude compared to thi
 
 Whenever you refer to this package or the ideas it contains, please cite:
 
-1. our preprint [*A Common Interface for Automatic Differentiation*](https://arxiv.org/abs/2505.05542);
-2. our inspiration [AbstractDifferentiation.jl](https://github.com/JuliaDiff/AbstractDifferentiation.jl).
+ 1. our preprint [*A Common Interface for Automatic Differentiation*](https://arxiv.org/abs/2505.05542);
+ 2. our inspiration [AbstractDifferentiation.jl](https://github.com/JuliaDiff/AbstractDifferentiation.jl).
 
 You can use the provided [`CITATION.cff`](https://github.com/JuliaDiff/DifferentiationInterface.jl/blob/main/CITATION.cff) file or the following BibTeX entries:
 
diff --git a/DifferentiationInterface/docs/src/assets/logo.jl b/DifferentiationInterface/docs/src/assets/logo.jl
index ef4609669..d21ef420e 100644
--- a/DifferentiationInterface/docs/src/assets/logo.jl
+++ b/DifferentiationInterface/docs/src/assets/logo.jl
@@ -13,11 +13,11 @@ end
 # ╔═╡ d1b44dac-8487-4581-82fd-ece0ce89f8e2
 begin
     const purple = Luxor.julia_purple
-    const red    = Luxor.julia_red
-    const green  = Luxor.julia_green
-    const blue   = Luxor.julia_blue
-    const white  = RGB(1, 1, 1)
-    const black  = RGB(0, 0, 0)
+    const red = Luxor.julia_red
+    const green = Luxor.julia_green
+    const blue = Luxor.julia_blue
+    const white = RGB(1, 1, 1)
+    const black = RGB(0, 0, 0)
     const colors = (purple, red, green)
 end;
 
diff --git a/DifferentiationInterface/docs/src/dev_guide.md b/DifferentiationInterface/docs/src/dev_guide.md
index 3fa1b66b9..4538a1117 100644
--- a/DifferentiationInterface/docs/src/dev_guide.md
+++ b/DifferentiationInterface/docs/src/dev_guide.md
@@ -7,14 +7,14 @@ It is not part of the public API and the content below may become outdated, in w
 
 The package is structured around 8 [operators](@ref Operators):
 
-- [`derivative`](@ref)
-- [`second_derivative`](@ref)
-- [`gradient`](@ref)
-- [`jacobian`](@ref)
-- [`hessian`](@ref)
-- [`pushforward`](@ref)
-- [`pullback`](@ref)
-- [`hvp`](@ref)
+  - [`derivative`](@ref)
+  - [`second_derivative`](@ref)
+  - [`gradient`](@ref)
+  - [`jacobian`](@ref)
+  - [`hessian`](@ref)
+  - [`pushforward`](@ref)
+  - [`pullback`](@ref)
+  - [`hvp`](@ref)
 
 Most operators have 4 variants, which look like this in the first order: `operator`, `operator!`, `value_and_operator`, `value_and_operator!`.
 
@@ -39,6 +39,7 @@ In the main package, you should define a new struct `SuperDiffBackend` which sub
 You also have to define [`ADTypes.mode`](@extref) and [`DifferentiationInterface.inplace_support`](@ref) on `SuperDiffBackend`.
 
 !!! info
+    
     In the end, this backend struct will need to be contributed to [ADTypes.jl](https://github.com/SciML/ADTypes.jl).
     However, putting it in the DifferentiationInterface.jl PR is a good first step for debugging.
 
@@ -46,7 +47,7 @@ In a [package extension](https://pkgdocs.julialang.org/v1/creating-packages/#Con
 The exact requirements depend on the differentiation mode you chose:
 
 | backend mode                                      | pushforward necessary | pullback necessary |
-| :------------------------------------------------ | :-------------------- | :----------------- |
+|:------------------------------------------------- |:--------------------- |:------------------ |
 | [`ADTypes.ForwardMode`](@extref ADTypes)          | yes                   | no                 |
 | [`ADTypes.ReverseMode`](@extref ADTypes)          | no                    | yes                |
 | [`ADTypes.ForwardOrReverseMode`](@extref ADTypes) | yes                   | yes                |
diff --git a/DifferentiationInterface/docs/src/explanation/advanced.md b/DifferentiationInterface/docs/src/explanation/advanced.md
index 4b12c963e..5aa699291 100644
--- a/DifferentiationInterface/docs/src/explanation/advanced.md
+++ b/DifferentiationInterface/docs/src/explanation/advanced.md
@@ -16,6 +16,7 @@ Every context argument must be wrapped in a subtype of [`Context`](@ref) and com
 Right now, there are two kinds of context: [`Constant`](@ref) and [`Cache`](@ref).
 
 !!! warning
+    
     Not every backend supports every type of context. See the documentation on [Backends](@ref) for more details.
 
 Semantically, both of these calls compute the partial gradient of `f(x, c)` with respect to `x`, but they consider `c` differently:
@@ -37,6 +38,7 @@ When faced with sparse Jacobian or Hessian matrices, one can take advantage of t
 DifferentiationInterface does this automatically if you pass a backend of type [`AutoSparse`](@extref ADTypes.AutoSparse).
 
 !!! tip
+    
     To know more about sparse AD, read the survey [_What Color Is Your Jacobian? Graph Coloring for Computing Derivatives_](https://epubs.siam.org/doi/10.1137/S0036144504444711) (Gebremedhin et al., 2005).
 
 ### `AutoSparse` object
@@ -44,17 +46,21 @@ DifferentiationInterface does this automatically if you pass a backend of type [
 `AutoSparse` backends only support [`jacobian`](@ref) and [`hessian`](@ref) (as well as their variants), because other operators do not output matrices.
 An `AutoSparse` backend must be constructed from three ingredients:
 
-1. An underlying (dense) backend, which can be [`SecondOrder`](@ref) or anything from [ADTypes.jl](https://github.com/SciML/ADTypes.jl)
-2. A sparsity pattern detector like:
-   - [`TracerSparsityDetector`](@extref SparseConnectivityTracer.TracerSparsityDetector) from [SparseConnectivityTracer.jl](https://github.com/adrhill/SparseConnectivityTracer.jl)
-   - [`SymbolicsSparsityDetector`](@extref Symbolics.SymbolicsSparsityDetector) from [Symbolics.jl](https://github.com/JuliaSymbolics/Symbolics.jl)
-   - [`DenseSparsityDetector`](@ref) from DifferentiationInterface.jl (beware that this detector only gives a locally valid pattern)
-   - [`KnownJacobianSparsityDetector`](@extref ADTypes.KnownJacobianSparsityDetector) or [`KnownHessianSparsityDetector`](@extref ADTypes.KnownHessianSparsityDetector) from [ADTypes.jl](https://github.com/SciML/ADTypes.jl) (if you already know the pattern)
-3. A coloring algorithm from [SparseMatrixColorings.jl](https://github.com/gdalle/SparseMatrixColorings.jl), such as:
-   - [`GreedyColoringAlgorithm`](@extref SparseMatrixColorings.GreedyColoringAlgorithm) (our generic recommendation)
-   - [`ConstantColoringAlgorithm`](@extref SparseMatrixColorings.ConstantColoringAlgorithm) (if you have already computed the optimal coloring and always want to return it)
+ 1. An underlying (dense) backend, which can be [`SecondOrder`](@ref) or anything from [ADTypes.jl](https://github.com/SciML/ADTypes.jl)
+
+ 2. A sparsity pattern detector like:
+    
+      + [`TracerSparsityDetector`](@extref SparseConnectivityTracer.TracerSparsityDetector) from [SparseConnectivityTracer.jl](https://github.com/adrhill/SparseConnectivityTracer.jl)
+      + [`SymbolicsSparsityDetector`](@extref Symbolics.SymbolicsSparsityDetector) from [Symbolics.jl](https://github.com/JuliaSymbolics/Symbolics.jl)
+      + [`DenseSparsityDetector`](@ref) from DifferentiationInterface.jl (beware that this detector only gives a locally valid pattern)
+      + [`KnownJacobianSparsityDetector`](@extref ADTypes.KnownJacobianSparsityDetector) or [`KnownHessianSparsityDetector`](@extref ADTypes.KnownHessianSparsityDetector) from [ADTypes.jl](https://github.com/SciML/ADTypes.jl) (if you already know the pattern)
+ 3. A coloring algorithm from [SparseMatrixColorings.jl](https://github.com/gdalle/SparseMatrixColorings.jl), such as:
+    
+      + [`GreedyColoringAlgorithm`](@extref SparseMatrixColorings.GreedyColoringAlgorithm) (our generic recommendation)
+      + [`ConstantColoringAlgorithm`](@extref SparseMatrixColorings.ConstantColoringAlgorithm) (if you have already computed the optimal coloring and always want to return it)
 
 !!! note
+    
     Symbolic backends have built-in sparsity handling, so `AutoSparse(AutoSymbolics())` and `AutoSparse(AutoFastDifferentiation())` do not need additional configuration for pattern detection or coloring.
 
 ### Cost of sparse preparation
@@ -63,6 +69,7 @@ The preparation step of `jacobian` or `hessian` with an `AutoSparse` backend can
 But after preparation, the more zeros are present in the matrix, the greater the speedup will be compared to dense differentiation.
 
 !!! danger
+    
     The result of preparation for an `AutoSparse` backend cannot be reused if the sparsity pattern changes.
 
 ### Tuning the coloring algorithm
@@ -80,9 +87,7 @@ This behavior is triggered as soon as you put a [`MixedMode`](@ref) object insid
 
 ```julia
 AutoSparse(
-    MixedMode(forward_backend, reverse_backend);
-    sparsity_detector,
-    coloring_algorithm
+    MixedMode(forward_backend, reverse_backend); sparsity_detector, coloring_algorithm
 )
 ```
 
@@ -94,7 +99,9 @@ Thus, the right setup looks like:
 using StableRNGs
 
 seed = 3
-coloring_algorithm = GreedyColoringAlgorithm(RandomOrder(StableRNG(seed), seed); postprocessing=true)
+coloring_algorithm = GreedyColoringAlgorithm(
+    RandomOrder(StableRNG(seed), seed); postprocessing=true
+)
 ```
 
 ## Batch mode
diff --git a/DifferentiationInterface/docs/src/explanation/backends.md b/DifferentiationInterface/docs/src/explanation/backends.md
index 3be877d0d..845ab4b9b 100644
--- a/DifferentiationInterface/docs/src/explanation/backends.md
+++ b/DifferentiationInterface/docs/src/explanation/backends.md
@@ -4,40 +4,41 @@
 
 We support the following dense backend choices from [ADTypes.jl](https://github.com/SciML/ADTypes.jl):
 
-- [`AutoChainRules`](@extref ADTypes.AutoChainRules)
-- [`AutoDiffractor`](@extref ADTypes.AutoDiffractor)
-- [`AutoEnzyme`](@extref ADTypes.AutoEnzyme)
-- [`AutoFastDifferentiation`](@extref ADTypes.AutoFastDifferentiation)
-- [`AutoFiniteDiff`](@extref ADTypes.AutoFiniteDiff)
-- [`AutoFiniteDifferences`](@extref ADTypes.AutoFiniteDifferences)
-- [`AutoForwardDiff`](@extref ADTypes.AutoForwardDiff)
-- [`AutoGTPSA`](@extref ADTypes.AutoGTPSA)
-- [`AutoMooncake`](@extref ADTypes.AutoMooncake) and [`AutoMooncakeForward`](@extref ADTypes.AutoMooncake) (the latter is experimental)
-- [`AutoPolyesterForwardDiff`](@extref ADTypes.AutoPolyesterForwardDiff)
-- [`AutoReverseDiff`](@extref ADTypes.AutoReverseDiff)
-- [`AutoSymbolics`](@extref ADTypes.AutoSymbolics)
-- [`AutoTracker`](@extref ADTypes.AutoTracker)
-- [`AutoZygote`](@extref ADTypes.AutoZygote)
+  - [`AutoChainRules`](@extref ADTypes.AutoChainRules)
+  - [`AutoDiffractor`](@extref ADTypes.AutoDiffractor)
+  - [`AutoEnzyme`](@extref ADTypes.AutoEnzyme)
+  - [`AutoFastDifferentiation`](@extref ADTypes.AutoFastDifferentiation)
+  - [`AutoFiniteDiff`](@extref ADTypes.AutoFiniteDiff)
+  - [`AutoFiniteDifferences`](@extref ADTypes.AutoFiniteDifferences)
+  - [`AutoForwardDiff`](@extref ADTypes.AutoForwardDiff)
+  - [`AutoGTPSA`](@extref ADTypes.AutoGTPSA)
+  - [`AutoMooncake`](@extref ADTypes.AutoMooncake) and [`AutoMooncakeForward`](@extref ADTypes.AutoMooncake) (the latter is experimental)
+  - [`AutoPolyesterForwardDiff`](@extref ADTypes.AutoPolyesterForwardDiff)
+  - [`AutoReverseDiff`](@extref ADTypes.AutoReverseDiff)
+  - [`AutoSymbolics`](@extref ADTypes.AutoSymbolics)
+  - [`AutoTracker`](@extref ADTypes.AutoTracker)
+  - [`AutoZygote`](@extref ADTypes.AutoZygote)
 
 ## Features
 
 Given a backend object, you can use:
 
-- [`check_available`](@ref) to know whether the required AD package is loaded
-- [`check_inplace`](@ref) to know whether the backend supports in-place functions (all backends support out-of-place functions)
+  - [`check_available`](@ref) to know whether the required AD package is loaded
+  - [`check_inplace`](@ref) to know whether the backend supports in-place functions (all backends support out-of-place functions)
 
 In theory, all we need from each backend is either a `pushforward` or a `pullback`: we can deduce every other operator from these two.
 In practice, many AD backends have custom implementations for high-level operators like `gradient` or `jacobian`, which we reuse whenever possible.
 
 !!! details
+    
     In the rough summary table below,
-
-    - ✅ means that we reuse the custom implementation from the backend;
-    - ❌ means that a custom implementation doesn't exist, so we use our default fallbacks;
-    - 🔀 means it's complicated or not done yet.
-
+    
+      - ✅ means that we reuse the custom implementation from the backend;
+      - ❌ means that a custom implementation doesn't exist, so we use our default fallbacks;
+      - 🔀 means it's complicated or not done yet.
+    
     |                            | `pf` | `pb` | `der` | `grad` | `jac` | `hess` | `hvp` | `der2` |
-    | -------------------------- | ---- | ---- | ----- | ------ | ----- | ------ | ----- | ------ |
+    |:-------------------------- |:---- |:---- |:----- |:------ |:----- |:------ |:----- |:------ |
     | `AutoChainRules`           | ❌    | ✅    | ❌     | ❌      | ❌     | ❌      | ❌     | ❌      |
     | `AutoDiffractor`           | ✅    | ❌    | ❌     | ❌      | ❌     | ❌      | ❌     | ❌      |
     | `AutoEnzyme` (forward)     | ✅    | ❌    | ❌     | ✅      | ✅     | ❌      | ❌     | ❌      |
@@ -58,7 +59,7 @@ In practice, many AD backends have custom implementations for high-level operato
 Moreover, each context type is supported by a specific subset of backends:
 
 |                            | [`Constant`](@ref) | [`Cache`](@ref) |
-| -------------------------- | ------------------ | --------------- |
+|:-------------------------- |:------------------ |:--------------- |
 | `AutoChainRules`           | ✅                  | ❌               |
 | `AutoDiffractor`           | ❌                  | ❌               |
 | `AutoEnzyme` (forward)     | ✅                  | ✅               |
@@ -89,6 +90,7 @@ The inner backend will be called first, and the outer backend will differentiate
 In general, using a forward outer backend over a reverse inner backend will yield the best performance.
 
 !!! danger
+    
     Second-order AD is tricky, and many backend combinations will fail (even if you combine a backend with itself).
     Be ready to experiment and open issues if necessary.
 
@@ -115,6 +117,7 @@ Same-point preparation runs the forward sweep and returns the pullback closure.
 We only implement `pushforward`.
 
 !!! danger
+    
     The latest releases of Diffractor [broke DifferentiationInterface](https://github.com/JuliaDiff/Diffractor.jl/issues/290).
 
 ### Enzyme
@@ -123,6 +126,7 @@ Depending on the `mode` attribute inside [`AutoEnzyme`](@extref ADTypes.AutoEnzy
 When necessary, preparation chooses a number of chunks (for `gradient` and `jacobian` in forward mode, for `jacobian` only in reverse mode).
 
 !!! warning
+    
     Enzyme.jl's handling of activities and multiple arguments is not fully supported here, which can cause slowdowns or errors.
     If differentiation fails or takes too long, consider using Enzyme.jl through its [native API](https://enzymead.github.io/Enzyme.jl/stable/) instead.
 
@@ -131,6 +135,7 @@ When necessary, preparation chooses a number of chunks (for `gradient` and `jaco
 For every operator, preparation generates an [executable function](https://brianguenter.github.io/FastDifferentiation.jl/stable/makefunction/) from the symbolic expression of the differentiated function.
 
 !!! warning
+    
     Preparation can be very slow for symbolic AD.
 
 ### FiniteDiff
@@ -154,6 +159,7 @@ For all operators, preparation preallocates the input [`TPS`s](https://bmad-sim.
 If a GTPSA [`Descriptor`](https://bmad-sim.github.io/GTPSA.jl/stable/man/b_descriptor/) is not provided to `AutoGTPSA`, then a `Descriptor` will be generated in preparation based on the context.
 
 !!! danger
+    
     When providing a custom GTPSA `Descriptor` to `AutoGTPSA`, it is the responsibility of the user to ensure that the number of [GTPSA "variables"](https://bmad-sim.github.io/GTPSA.jl/stable/quickstart/#Calculating-a-Truncated-Power-Series) specified in the `Descriptor` is consistent with the number of inputs of the provided function. Undefined behavior and crashes may occur if this is not the case.
 
 ### PolyesterForwardDiff
@@ -169,6 +175,7 @@ This tape is computed from the input `x` provided at preparation time.
 It is control-flow dependent, so only one branch is recorded at each `if` statement.
 
 !!! danger
+    
     If your function has value-specific control flow (like `if x[1] > 0` or `if c == 1`), you may get silently wrong results whenever it takes new branches that were not taken during preparation.
     You must make sure to run preparation with an input and contexts whose values trigger the correct control flow for future executions.
 
@@ -179,6 +186,7 @@ Whenever contexts are provided, tape recording is deactivated in all cases, beca
 For all operators, preparation generates an [executable function](https://docs.sciml.ai/Symbolics/stable/manual/build_function/) from the symbolic expression of the differentiated function.
 
 !!! warning
+    
     Preparation can be very slow for symbolic AD.
 
 ### Mooncake
diff --git a/DifferentiationInterface/docs/src/explanation/operators.md b/DifferentiationInterface/docs/src/explanation/operators.md
index 55d3c0518..8ff5efe0e 100644
--- a/DifferentiationInterface/docs/src/explanation/operators.md
+++ b/DifferentiationInterface/docs/src/explanation/operators.md
@@ -1,19 +1,21 @@
 # Operators
 
 !!! tip
+    
     If there are some concepts you do not understand, take a look at the book [_The Elements of Differentiable Programming_](https://arxiv.org/abs/2403.14606) (Blondel and Roulet, 2024).
 
 ## List of operators
 
 Given a function `f(x) = y`, there are several differentiation operators available. The terminology depends on:
 
-- the type and shape of the input `x`
-- the type and shape of the output `y`
-- the order of differentiation
+  - the type and shape of the input `x`
+  - the type and shape of the output `y`
+  - the order of differentiation
 
 Below we list and describe all the operators we support.
 
 !!! warning
+    
     The package is thoroughly tested with inputs and outputs of the following types: `Float64`, `Vector{Float64}` and `Matrix{Float64}`.
     We also expect it to work on most kinds of `Number` and `AbstractArray` variables.
     Beyond that, you are in uncharted territory.
@@ -24,7 +26,7 @@ Below we list and describe all the operators we support.
 These operators are computed using only the input `x`.
 
 | operator                    | order | input `x`       | output `y`      | operator result type | operator result shape    |
-| :-------------------------- | :---- | :-------------- | :-------------- | :------------------- | :----------------------- |
+|:--------------------------- |:----- |:--------------- |:--------------- |:-------------------- |:------------------------ |
 | [`derivative`](@ref)        | 1     | `Number`        | `Any`           | similar to `y`       | `size(y)`                |
 | [`second_derivative`](@ref) | 2     | `Number`        | `Any`           | similar to `y`       | `size(y)`                |
 | [`gradient`](@ref)          | 1     | `Any`           | `Number`        | similar to `x`       | `size(x)`                |
@@ -37,7 +39,7 @@ These operators are computed using the input `x` and another argument `t` of typ
 You can think of tangents as perturbations propagated through the function; they live either in the same space as `x` or in the same space as `y`.
 
 | operator                    | order | input `x` | output `y` | element type of `t` | operator result type | operator result shape |
-| :-------------------------- | :---- | :-------- | :--------- | :------------------ | :------------------- | :-------------------- |
+|:--------------------------- |:----- |:--------- |:---------- |:------------------- |:-------------------- |:--------------------- |
 | [`pushforward`](@ref) (JVP) | 1     | `Any`     | `Any`      | similar to `x`      | similar to `y`       | `size(y)`             |
 | [`pullback`](@ref) (VJP)    | 1     | `Any`     | `Any`      | similar to `y`      | similar to `x`       | `size(x)`             |
 | [`hvp`](@ref)               | 2     | `Any`     | `Number`   | similar to `x`      | similar to `x`       | `size(x)`             |
@@ -46,11 +48,11 @@ You can think of tangents as perturbations propagated through the function; they
 
 Several variants of each operator are defined:
 
-- out-of-place operators return a new derivative object
-- in-place operators mutate the provided derivative object
+  - out-of-place operators return a new derivative object
+  - in-place operators mutate the provided derivative object
 
 | out-of-place                | in-place                     | out-of-place + primal                            | in-place + primal                                 |
-| :-------------------------- | :--------------------------- | :----------------------------------------------- | :------------------------------------------------ |
+|:--------------------------- |:---------------------------- |:------------------------------------------------ |:------------------------------------------------- |
 | [`derivative`](@ref)        | [`derivative!`](@ref)        | [`value_and_derivative`](@ref)                   | [`value_and_derivative!`](@ref)                   |
 | [`second_derivative`](@ref) | [`second_derivative!`](@ref) | [`value_derivative_and_second_derivative`](@ref) | [`value_derivative_and_second_derivative!`](@ref) |
 | [`gradient`](@ref)          | [`gradient!`](@ref)          | [`value_and_gradient`](@ref)                     | [`value_and_gradient!`](@ref)                     |
@@ -64,21 +66,23 @@ Several variants of each operator are defined:
 
 Two kinds of functions are supported:
 
-- out-of-place functions `f(x) = y`
-- in-place functions `f!(y, x) = nothing`
+  - out-of-place functions `f(x) = y`
+  - in-place functions `f!(y, x) = nothing`
 
 !!! warning
+    
     In-place functions only work with [`pushforward`](@ref), [`pullback`](@ref), [`derivative`](@ref) and [`jacobian`](@ref).
     The other operators [`hvp`](@ref), [`gradient`](@ref) and [`hessian`](@ref) require scalar outputs, so it makes no sense to mutate the number `y`.
 
 This results in various operator signatures (the necessary arguments and their order):
 
 | function signature        | out-of-place operator (returns `result`) | in-place  operator (mutates `result`) |
-| :------------------------ | :--------------------------------------- | :------------------------------------ |
+|:------------------------- |:---------------------------------------- |:------------------------------------- |
 | out-of-place function `f` | `op(f, backend, x, [t])`                 | `op!(f, result, backend, x, [t])`     |
 | in-place function `f!`    | `op(f!, y, backend, x, [t])`             | `op!(f!, y, result, backend, x, [t])` |
 
 !!! warning
+    
     The positional arguments between `f`/`f!` and `backend` are always mutated, regardless of the bang `!` in the operator name.
     In particular, for in-place functions `f!(y, x)`, every variant of every operator will mutate `y`.
 
@@ -90,7 +94,7 @@ In many cases, AD can be accelerated if the function has been called at least on
 This preparation procedure is backend-specific, but we expose a common syntax to achieve it.
 
 | operator            | preparation (different point)       | preparation (same point)                 |
-| :------------------ | :---------------------------------- | :--------------------------------------- |
+|:------------------- |:----------------------------------- |:---------------------------------------- |
 | `derivative`        | [`prepare_derivative`](@ref)        | -                                        |
 | `gradient`          | [`prepare_gradient`](@ref)          | -                                        |
 | `jacobian`          | [`prepare_jacobian`](@ref)          | -                                        |
@@ -103,7 +107,7 @@ This preparation procedure is backend-specific, but we expose a common syntax to
 In addition, the preparation syntax depends on the number of arguments accepted by the function.
 
 | function signature    | preparation signature                |
-| :-------------------- | :----------------------------------- |
+|:--------------------- |:------------------------------------ |
 | out-of-place function | `prepare_op(f, backend, x, [t])`     |
 | in-place function     | `prepare_op(f!, y, backend, x, [t])` |
 
@@ -116,6 +120,7 @@ op(f, prep, backend, x, [t])  # fast because it skips preparation
 ```
 
 !!! warning
+    
     The `prep` object is the last argument before `backend` and it is always mutated, regardless of the bang `!` in the operator name.
     As a consequence, preparation is **not thread-safe** and  sharing `prep` objects between threads may lead to unexpected behavior. If you need to run differentiation concurrently, prepare separate `prep` objects for each thread.
 
@@ -136,20 +141,22 @@ op(f, prep, [other_y], backend, other_x, [other_t, other_contexts...])
 
 provided that the following conditions all hold:
 
-- `f` and `backend` remain the same
-- `other_x` has the same type and size as `x`
-- `other_y` has the same type and size as `y`
-- `other_t` has the same type and size as `t`
-- all the elements of `other_contexts` have the same type and size as the corresponding elements of `contexts`
+  - `f` and `backend` remain the same
+  - `other_x` has the same type and size as `x`
+  - `other_y` has the same type and size as `y`
+  - `other_t` has the same type and size as `t`
+  - all the elements of `other_contexts` have the same type and size as the corresponding elements of `contexts`
 
 For same-point preparation, the same rules hold with two modifications:
 
-- `other_x` must be _equal_ to `x`
-- any element of `other_contexts` with type `Constant` must be _equal_ to the corresponding element of `contexts`
+  - `other_x` must be _equal_ to `x`
+  - any element of `other_contexts` with type `Constant` must be _equal_ to the corresponding element of `contexts`
 
 !!! danger
+    
     Reusing preparation with different types or sizes may work with some backends and error with others, so it is not allowed by the API of DifferentiationInterface.
 
 !!! warning
+    
     These rules hold for the majority of backends, but there are some exceptions.
     The most important exception is [ReverseDiff.jl](https://github.com/JuliaDiff/ReverseDiff.jl) and its taping mechanism, which is sensitive to control flow inside the function.
diff --git a/DifferentiationInterface/docs/src/faq/differentiability.md b/DifferentiationInterface/docs/src/faq/differentiability.md
index 1fd3be287..7c6da62bc 100644
--- a/DifferentiationInterface/docs/src/faq/differentiability.md
+++ b/DifferentiationInterface/docs/src/faq/differentiability.md
@@ -7,10 +7,11 @@ To make your functions compatible with several backends, you need to mind the re
 The list of backends available at [juliadiff.org](https://juliadiff.org/) is split into 2 main families: operator overloading and source transformation.
 Writing differentiable code requires a specific approach in each paradigm:
 
-- For operator overloading, ensure type-genericity.
-- For source transformation, rely on existing rules or write your own.
+  - For operator overloading, ensure type-genericity.
+  - For source transformation, rely on existing rules or write your own.
 
 !!! tip
+    
     Depending on your intended use case, you may not need to ensure compatibility with every single backend.
     In particular, some applications strongly suggest a specific "mode" of AD (forward or reverse), in which case backends limited to the other mode are mostly irrelevant.
 
@@ -25,20 +26,20 @@ It performs AD at a scalar level by replacing plain numbers with [`Dual` numbers
 As explained in the [limitations of ForwardDiff](https://juliadiff.org/ForwardDiff.jl/stable/user/limitations/), this will only work if the differentiated code does not restrict number types too much.
 Otherwise, you may encounter errors like this one:
 
-```julia
+```
 MethodError: no method matching Float64(::ForwardDiff.Dual{...})
 ```
 
 To prevent them, here are a few things to look out for:
 
-- Avoid functions with overly specific type annotations.
+  - Avoid functions with overly specific type annotations.
 
 ```julia
-f(x::Vector{Float64}) = ... # bad
-f(x::AbstractVector{<:Real}) = ... # good
+f(x::Vector{Float64}) = x # bad
+f(x::AbstractVector{<:Real}) = x # good
 ```
 
-- When creating new containers or buffers, adapt to the input number type if necessary.
+  - When creating new containers or buffers, adapt to the input number type if necessary.
 
 ```julia
 tmp = zeros(length(x))  # bad
@@ -66,7 +67,7 @@ The operator overloading aims at reconstructing a symbolic representation of the
 [Zygote.jl](https://github.com/FluxML/Zygote.jl) can differentiate a lot of Julia code, but it does have some major [limitations](https://fluxml.ai/Zygote.jl/stable/limitations/).
 The most frequently encountered is the lack of support for mutation: if you try to modify the contents of an array during differentiation, you will get an error like
 
-```julia
+```
 ERROR: Mutating arrays is not supported
 ```
 
@@ -92,10 +93,10 @@ Its [rule system](https://chalk-lab.github.io/Mooncake.jl/stable/understanding_m
 
 To summarize, here are the main rule systems which coexist at the moment:
 
-- `Dual` numbers in ForwardDiff.jl
-- ChainRulesCore.jl
-- Enzyme.jl
-- Mooncake.jl
+  - `Dual` numbers in ForwardDiff.jl
+  - ChainRulesCore.jl
+  - Enzyme.jl
+  - Mooncake.jl
 
 ### Rule translation
 
@@ -104,9 +105,9 @@ ChainRulesCore.jl is the closest thing we have to a standard, but it does not ha
 As a result, Enzyme.jl and Mooncake.jl both rolled out their own designs, which are not mutually compatible.
 There are, however, translation utilities:
 
-- from ChainRulesCore.jl to ForwardDiff.jl with [ForwardDiffChainRules.jl](https://github.com/ThummeTo/ForwardDiffChainRules.jl)
-- from ChainRulesCore.jl to Enzyme.jl with [`Enzyme.@import_rrule`](https://enzymead.github.io/Enzyme.jl/stable/api/#Enzyme.@import_rrule-Tuple)
-- from ChainRulesCore.jl to Mooncake.jl with [`Mooncake.@from_rrule`](https://chalk-lab.github.io/Mooncake.jl/stable/utilities/defining_rules/#Using-ChainRules.jl)
+  - from ChainRulesCore.jl to ForwardDiff.jl with [ForwardDiffChainRules.jl](https://github.com/ThummeTo/ForwardDiffChainRules.jl)
+  - from ChainRulesCore.jl to Enzyme.jl with [`Enzyme.@import_rrule`](https://enzymead.github.io/Enzyme.jl/stable/api/#Enzyme.@import_rrule-Tuple)
+  - from ChainRulesCore.jl to Mooncake.jl with [`Mooncake.@from_rrule`](https://chalk-lab.github.io/Mooncake.jl/stable/utilities/defining_rules/#Using-ChainRules.jl)
 
 ### Backend switch
 
diff --git a/DifferentiationInterface/docs/src/tutorials/advanced.md b/DifferentiationInterface/docs/src/tutorials/advanced.md
index 54a216990..0751f0e22 100644
--- a/DifferentiationInterface/docs/src/tutorials/advanced.md
+++ b/DifferentiationInterface/docs/src/tutorials/advanced.md
@@ -6,7 +6,8 @@ We present contexts and sparsity handling with DifferentiationInterface.jl.
 using ADTypes
 using BenchmarkTools
 using DifferentiationInterface
-import ForwardDiff, Zygote
+using ForwardDiff: ForwardDiff
+using Zygote: Zygote
 using Random
 using SparseConnectivityTracer
 using SparseMatrixColorings
@@ -56,6 +57,7 @@ For additional arguments which act as mutated buffers, the [`Cache`](@ref) wrapp
 ## Sparsity
 
 !!! tip
+    
     If you use DifferentiationInterface's Sparse AD functionality in your research,
     please cite our preprint [*Sparser, Better, Faster, Stronger: Efficient Automatic Differentiation for Sparse Jacobians and Hessians*](https://arxiv.org/abs/2501.17737).
 
@@ -168,7 +170,9 @@ Better memory use can be achieved by pre-allocating the matrix from the preparat
 
 ```@example tuto_advanced
 jac_buffer = similar(sparsity_pattern(jac_prep_sparse), eltype(xbig))
-@benchmark jacobian!($f_sparse_vector, $jac_buffer, $jac_prep_sparse, $sparse_forward_backend, $xbig)
+@benchmark jacobian!(
+    $f_sparse_vector, $jac_buffer, $jac_prep_sparse, $sparse_forward_backend, $xbig
+)
 ```
 
 And for optimal speed, one should write non-allocating and type-stable functions.
@@ -190,9 +194,18 @@ ybig ≈ f_sparse_vector(xbig)
 In this case, the sparse Jacobian should also become non-allocating (for our specific choice of backend).
 
 ```@example tuto_advanced
-jac_prep_sparse_nonallocating = prepare_jacobian(f_sparse_vector!, zero(ybig), sparse_forward_backend, zero(xbig))
+jac_prep_sparse_nonallocating = prepare_jacobian(
+    f_sparse_vector!, zero(ybig), sparse_forward_backend, zero(xbig)
+)
 jac_buffer = similar(sparsity_pattern(jac_prep_sparse_nonallocating), eltype(xbig))
-@benchmark jacobian!($f_sparse_vector!, $ybig, $jac_buffer, $jac_prep_sparse_nonallocating, $sparse_forward_backend, $xbig)
+@benchmark jacobian!(
+    $f_sparse_vector!,
+    $ybig,
+    $jac_buffer,
+    $jac_prep_sparse_nonallocating,
+    $sparse_forward_backend,
+    $xbig,
+)
 ```
 
 ### Mixed mode
@@ -210,7 +223,7 @@ This is achieved using the [`MixedMode`](@ref) wrapper, for which we recommend a
 
 ```@example tuto_advanced
 sparse_mixed_backend = AutoSparse(
-    MixedMode(AutoForwardDiff(), AutoZygote()),
+    MixedMode(AutoForwardDiff(), AutoZygote());
     sparsity_detector=TracerSparsityDetector(),
     coloring_algorithm=GreedyColoringAlgorithm(RandomOrder(MersenneTwister(), 0)),
 )
@@ -219,9 +232,13 @@ sparse_mixed_backend = AutoSparse(
 It unlocks a large speedup compared to pure forward mode, and the same would be true compared to reverse mode:
 
 ```@example tuto_advanced
-@benchmark jacobian($arrowhead, prep, $sparse_forward_backend, $xbig) setup=(prep=prepare_jacobian(arrowhead, sparse_forward_backend, xbig))
+@benchmark jacobian($arrowhead, prep, $sparse_forward_backend, $xbig) setup=(
+    prep=prepare_jacobian(arrowhead, sparse_forward_backend, xbig)
+)
 ```
 
 ```@example tuto_advanced
-@benchmark jacobian($arrowhead, prep, $sparse_mixed_backend, $xbig) setup=(prep=prepare_jacobian(arrowhead, sparse_mixed_backend, xbig))
+@benchmark jacobian($arrowhead, prep, $sparse_mixed_backend, $xbig) setup=(
+    prep=prepare_jacobian(arrowhead, sparse_mixed_backend, xbig)
+)
 ```
diff --git a/DifferentiationInterface/docs/src/tutorials/basic.md b/DifferentiationInterface/docs/src/tutorials/basic.md
index 494b77be7..bfda33933 100644
--- a/DifferentiationInterface/docs/src/tutorials/basic.md
+++ b/DifferentiationInterface/docs/src/tutorials/basic.md
@@ -25,12 +25,13 @@ Most backend types are defined by [ADTypes.jl](https://github.com/SciML/ADTypes.
 [ForwardDiff.jl](https://github.com/JuliaDiff/ForwardDiff.jl) is very generic and efficient for low-dimensional inputs, so it's a good starting point:
 
 ```@example tuto_basic
-import ForwardDiff
+using ForwardDiff: ForwardDiff
 
 backend = AutoForwardDiff()
 ```
 
 !!! tip
+    
     To avoid name conflicts, load AD packages with `import` instead of `using`.
     Indeed, most AD packages also export operators like `gradient` and `jacobian`, but you only want to use the ones from DifferentiationInterface.jl.
 
@@ -107,7 +108,7 @@ The whole point of DifferentiationInterface.jl is that you can easily experiment
 Typically, for gradients, reverse mode AD might be a better fit, so let's try [Zygote.jl](https://github.com/FluxML/Zygote.jl)!
 
 ```@example tuto_basic
-import Zygote
+using Zygote: Zygote
 
 backend2 = AutoZygote()
 ```
diff --git a/DifferentiationInterface/src/first_order/mixed_mode.jl b/DifferentiationInterface/src/first_order/mixed_mode.jl
index 5951b456c..d15a3102f 100644
--- a/DifferentiationInterface/src/first_order/mixed_mode.jl
+++ b/DifferentiationInterface/src/first_order/mixed_mode.jl
@@ -4,6 +4,7 @@
 Combination of a forward and a reverse mode backend for mixed-mode sparse Jacobian computation.
 
 !!! danger
+
     `MixedMode` backends only support [`jacobian`](@ref) and its variants, and it should be used inside an [`AutoSparse`](@extref ADTypes.AutoSparse) wrapper.
 
 # Constructor
diff --git a/DifferentiationInterface/src/first_order/pullback.jl b/DifferentiationInterface/src/first_order/pullback.jl
index 1606f01d6..6c6e6cc10 100644
--- a/DifferentiationInterface/src/first_order/pullback.jl
+++ b/DifferentiationInterface/src/first_order/pullback.jl
@@ -143,10 +143,12 @@ Compute the value and the pullback of the function `f` at point `x` with a tuple
 $(docstring_preparation_hint("pullback"; same_point=true))
 
 !!! tip
+
     Pullbacks are also commonly called vector-Jacobian products or VJPs.
     This function could have been named `value_and_vjp`.
 
 !!! info
+
     Required primitive for reverse mode backends.
 """
 function value_and_pullback(
@@ -172,6 +174,7 @@ Compute the value and the pullback of the function `f` at point `x` with a tuple
 $(docstring_preparation_hint("pullback"; same_point=true))
 
 !!! tip
+
     Pullbacks are also commonly called vector-Jacobian products or VJPs.
     This function could have been named `value_and_vjp!`.
 """
@@ -198,6 +201,7 @@ Compute the pullback of the function `f` at point `x` with a tuple of tangents `
 $(docstring_preparation_hint("pullback"; same_point=true))
 
 !!! tip
+
     Pullbacks are also commonly called vector-Jacobian products or VJPs.
     This function could have been named `vjp`.
 """
@@ -224,6 +228,7 @@ Compute the pullback of the function `f` at point `x` with a tuple of tangents `
 $(docstring_preparation_hint("pullback"; same_point=true))
 
 !!! tip
+
     Pullbacks are also commonly called vector-Jacobian products or VJPs.
     This function could have been named `vjp!`.
 """
diff --git a/DifferentiationInterface/src/first_order/pushforward.jl b/DifferentiationInterface/src/first_order/pushforward.jl
index d338c292d..46d249d67 100644
--- a/DifferentiationInterface/src/first_order/pushforward.jl
+++ b/DifferentiationInterface/src/first_order/pushforward.jl
@@ -147,10 +147,12 @@ Compute the value and the pushforward of the function `f` at point `x` with a tu
 $(docstring_preparation_hint("pushforward"; same_point=true))
 
 !!! tip
+
     Pushforwards are also commonly called Jacobian-vector products or JVPs.
     This function could have been named `value_and_jvp`.
 
 !!! info
+
     Required primitive for forward mode backends.
 """
 function value_and_pushforward(
@@ -176,6 +178,7 @@ Compute the value and the pushforward of the function `f` at point `x` with a tu
 $(docstring_preparation_hint("pushforward"; same_point=true))
 
 !!! tip
+
     Pushforwards are also commonly called Jacobian-vector products or JVPs.
     This function could have been named `value_and_jvp!`.
 """
@@ -202,6 +205,7 @@ Compute the pushforward of the function `f` at point `x` with a tuple of tangent
 $(docstring_preparation_hint("pushforward"; same_point=true))
 
 !!! tip
+
     Pushforwards are also commonly called Jacobian-vector products or JVPs.
     This function could have been named `jvp`.
 """
@@ -228,6 +232,7 @@ Compute the pushforward of the function `f` at point `x` with a tuple of tangent
 $(docstring_preparation_hint("pushforward"; same_point=true))
 
 !!! tip
+
     Pushforwards are also commonly called Jacobian-vector products or JVPs.
     This function could have been named `jvp!`.
 """
diff --git a/DifferentiationInterface/src/misc/differentiate_with.jl b/DifferentiationInterface/src/misc/differentiate_with.jl
index 98cb63802..eeac288d9 100644
--- a/DifferentiationInterface/src/misc/differentiate_with.jl
+++ b/DifferentiationInterface/src/misc/differentiate_with.jl
@@ -8,24 +8,28 @@ Then `f2 = DifferentiateWith(f, AutoEnzyme())` is a new function that behaves li
 Moreover, any larger algorithm `alg` that calls `f2` instead of `f` will also be differentiable with Zygote (as long as `f` was the only Zygote blocker).
 
 !!! tip
+
     This is mainly relevant for package developers who want to produce differentiable code at low cost, without writing the differentiation rules themselves.
     If you sprinkle a few `DifferentiateWith` in places where some AD backends may struggle, end users can pick from a wider variety of packages to differentiate your algorithms.
 
 !!! warning
+
     `DifferentiateWith` only supports out-of-place functions `y = f(x)` without additional context arguments.
     It only makes these functions differentiable if the true backend is either [ForwardDiff](https://github.com/JuliaDiff/ForwardDiff.jl), reverse-mode [Mooncake](https://github.com/chalk-lab/Mooncake.jl), or if it automatically importing rules from [ChainRules](https://github.com/JuliaDiff/ChainRules.jl) (e.g. [Zygote](https://github.com/FluxML/Zygote.jl)). Some backends are also able to [manually import rules](https://juliadiff.org/ChainRulesCore.jl/stable/#Packages-supporting-importing-rules-from-ChainRules.) from ChainRules.
     For any other true backend, the differentiation behavior is not altered by `DifferentiateWith` (it becomes a transparent wrapper).
 
 !!! warning
+
     When using `DifferentiateWith(f, AutoSomething())`, the function `f` must not close over any active data.
     As of now, we cannot differentiate with respect to parameters stored inside `f`.
 
 # Fields
 
-- `f`: the function in question, with signature `f(x)`
-- `backend::AbstractADType`: the substitute backend to use for differentiation
+  - `f`: the function in question, with signature `f(x)`
+  - `backend::AbstractADType`: the substitute backend to use for differentiation
 
 !!! note
+
     For the substitute AD backend to be called under the hood, its package needs to be loaded in addition to the package of the true AD backend.
 
 # Constructor
@@ -37,7 +41,9 @@ Moreover, any larger algorithm `alg` that calls `f2` instead of `f` will also be
 ```jldoctest
 julia> using DifferentiationInterface
 
-julia> import FiniteDiff, ForwardDiff, Zygote
+julia> using FiniteDiff: FiniteDiff
+       using ForwardDiff: ForwardDiff
+       using Zygote: Zygote
 
 julia> function f(x::Vector{Float64})
            a = Vector{Float64}(undef, 1)  # type constraint breaks ForwardDiff
diff --git a/DifferentiationInterface/src/misc/from_primitive.jl b/DifferentiationInterface/src/misc/from_primitive.jl
index 81f7ea0b6..db8e7b276 100644
--- a/DifferentiationInterface/src/misc/from_primitive.jl
+++ b/DifferentiationInterface/src/misc/from_primitive.jl
@@ -32,6 +32,7 @@ end
 Wrapper which forces a given backend to act as a forward-mode backend, using only its native `value_and_pushforward` primitive and re-implementing the rest from scratch.
 
 !!! tip
+
     This can be useful to circumvent high-level operators when they have impractical limitations.
     For instance, ForwardDiff.jl's `jacobian` does not support GPU arrays but its `pushforward` does, so `AutoForwardFromPrimitive(AutoForwardDiff())` has a GPU-friendly `jacobian`.
 """
diff --git a/DifferentiationInterface/src/misc/overloading.jl b/DifferentiationInterface/src/misc/overloading.jl
index 8c11a5bcf..20a38aa5f 100644
--- a/DifferentiationInterface/src/misc/overloading.jl
+++ b/DifferentiationInterface/src/misc/overloading.jl
@@ -4,6 +4,7 @@
 If it exists, return the overloaded input type which will be passed to the differentiated function when preparation result `prep` is reused.
 
 !!! danger
+
     This function is experimental and not part of the public API.
 """
 function overloaded_input_type end
diff --git a/DifferentiationInterface/src/misc/sparsity_detector.jl b/DifferentiationInterface/src/misc/sparsity_detector.jl
index 143b5700a..8d3823e1d 100644
--- a/DifferentiationInterface/src/misc/sparsity_detector.jl
+++ b/DifferentiationInterface/src/misc/sparsity_detector.jl
@@ -7,15 +7,17 @@ The nonzeros in a Jacobian or Hessian are detected by computing the relevant mat
 This process can be very slow, and should only be used if its output can be exploited multiple times to compute many sparse matrices.
 
 !!! danger
+
     In general, the sparsity pattern you obtain can depend on the provided input `x`. If you want to reuse the pattern, make sure that it is input-agnostic.
 
 !!! warning
+
     `DenseSparsityDetector` functionality is now located in a package extension, please load the SparseArrays.jl standard library before you use it.
 
 # Fields
 
-- `backend::AbstractADType` is the dense AD backend used under the hood
-- `atol::Float64` is the minimum magnitude of a matrix entry to be considered nonzero
+  - `backend::AbstractADType` is the dense AD backend used under the hood
+  - `atol::Float64` is the minimum magnitude of a matrix entry to be considered nonzero
 
 # Constructor
 
@@ -23,8 +25,8 @@ This process can be very slow, and should only be used if its output can be expl
 
 The keyword argument `method::Symbol` can be either:
 
-- `:iterative`: compute the matrix in a sequence of matrix-vector products (memory-efficient)
-- `:direct`: compute the matrix all at once (memory-hungry but sometimes faster).
+  - `:iterative`: compute the matrix in a sequence of matrix-vector products (memory-efficient)
+  - `:direct`: compute the matrix all at once (memory-hungry but sometimes faster).
 
 Note that the constructor is type-unstable because `method` ends up being a type parameter of the `DenseSparsityDetector` object (this is not part of the API and might change).
 
@@ -32,7 +34,7 @@ Note that the constructor is type-unstable because `method` ends up being a type
 
 ```jldoctest detector
 using ADTypes, DifferentiationInterface, SparseArrays
-import ForwardDiff
+using ForwardDiff: ForwardDiff
 
 detector = DenseSparsityDetector(AutoForwardDiff(); atol=1e-5, method=:direct)
 
diff --git a/DifferentiationInterface/src/second_order/second_order.jl b/DifferentiationInterface/src/second_order/second_order.jl
index eea89b56d..0045869dc 100644
--- a/DifferentiationInterface/src/second_order/second_order.jl
+++ b/DifferentiationInterface/src/second_order/second_order.jl
@@ -4,6 +4,7 @@
 Combination of two backends for second-order differentiation.
 
 !!! danger
+
     `SecondOrder` backends do not support first-order operators.
 
 # Constructor
@@ -12,8 +13,8 @@ Combination of two backends for second-order differentiation.
 
 # Fields
 
-- `outer::AbstractADType`: backend for the outer differentiation
-- `inner::AbstractADType`: backend for the inner differentiation
+  - `outer::AbstractADType`: backend for the outer differentiation
+  - `inner::AbstractADType`: backend for the inner differentiation
 """
 struct SecondOrder{ADO<:AbstractADType,ADI<:AbstractADType} <: AbstractADType
     outer::ADO
diff --git a/DifferentiationInterface/src/utils/batchsize.jl b/DifferentiationInterface/src/utils/batchsize.jl
index 054d5c9b9..02fe6f74e 100644
--- a/DifferentiationInterface/src/utils/batchsize.jl
+++ b/DifferentiationInterface/src/utils/batchsize.jl
@@ -5,15 +5,15 @@ Configuration for the batch size deduced from a backend and a sample array of le
 
 # Type parameters
 
-- `B::Int`: batch size
-- `singlebatch::Bool`: whether `B == N` (`B > N` is not allowed)
-- `aligned::Bool`: whether `N % B == 0`
+  - `B::Int`: batch size
+  - `singlebatch::Bool`: whether `B == N` (`B > N` is not allowed)
+  - `aligned::Bool`: whether `N % B == 0`
 
 # Fields
 
-- `N::Int`: array length
-- `A::Int`: number of batches `A = div(N, B, RoundUp)`
-- `B_last::Int`: size of the last batch (if `aligned` is `false`)
+  - `N::Int`: array length
+  - `A::Int`: number of batches `A = div(N, B, RoundUp)`
+  - `B_last::Int`: size of the last batch (if `aligned` is `false`)
 """
 struct BatchSizeSettings{B,singlebatch,aligned}
     N::Int
@@ -117,8 +117,8 @@ end
 
 Reproduces the heuristic from ForwardDiff to minimize
 
-1. the number of batches necessary to cover an array of length `N`
-2. the number of leftover indices in the last partial batch
+ 1. the number of batches necessary to cover an array of length `N`
+ 2. the number of leftover indices in the last partial batch
 
 Source: https://github.com/JuliaDiff/ForwardDiff.jl/blob/ec74fbc32b10bbf60b3c527d8961666310733728/src/prelude.jl#L19-L29
 """
diff --git a/DifferentiationInterface/src/utils/context.jl b/DifferentiationInterface/src/utils/context.jl
index 74c59efd7..f834a90e7 100644
--- a/DifferentiationInterface/src/utils/context.jl
+++ b/DifferentiationInterface/src/utils/context.jl
@@ -5,9 +5,9 @@ Abstract supertype for additional context arguments, which can be passed to diff
 
 # Subtypes
 
-- [`Constant`](@ref)
-- [`Cache`](@ref)
-- [`ConstantOrCache`](@ref)
+  - [`Constant`](@ref)
+  - [`Cache`](@ref)
+  - [`ConstantOrCache`](@ref)
 """
 abstract type Context end
 
@@ -27,6 +27,7 @@ Note that an operator can be prepared with an arbitrary value of the constant.
 However, same-point preparation must occur with the exact value that will be reused later.
 
 !!! warning
+
     Some backends require any `Constant` context to be a `Number` or an `AbstractArray`.
 
 # Example
@@ -34,7 +35,7 @@ However, same-point preparation must occur with the exact value that will be reu
 ```jldoctest
 julia> using DifferentiationInterface
 
-julia> import ForwardDiff
+julia> using ForwardDiff: ForwardDiff
 
 julia> f(x, c) = c * sum(abs2, x);
 
@@ -67,6 +68,7 @@ The initial values present inside the cache do not matter.
 For some backends, preparation allocates the required memory for `Cache` contexts with the right element type, similar to [PreallocationTools.jl](https://github.com/SciML/PreallocationTools.jl).
 
 !!! warning
+
     Some backends require any `Cache` context to be an `AbstractArray`, others accept nested (named) tuples of `AbstractArray`s.
 
 # Example
@@ -74,7 +76,7 @@ For some backends, preparation allocates the required memory for `Cache` context
 ```jldoctest
 julia> using DifferentiationInterface
 
-julia> import ForwardDiff
+julia> using ForwardDiff: ForwardDiff
 
 julia> f(x, c) = sum(copyto!(c, x));
 
@@ -153,6 +155,7 @@ end
     FixTail
 
 Closure around a function `f` and a set of tail argument `tail_args` such that
+
 ```
 (ft::FixTail)(args...) = ft.f(args..., ft.tail_args...)
 ```
diff --git a/DifferentiationInterface/src/utils/traits.jl b/DifferentiationInterface/src/utils/traits.jl
index 06b23529b..8a059870c 100644
--- a/DifferentiationInterface/src/utils/traits.jl
+++ b/DifferentiationInterface/src/utils/traits.jl
@@ -148,10 +148,10 @@ const ForwardOverAnything = Union{ForwardOverForward,ForwardOverReverse}
 
 Return the best combination of modes for [`hvp`](@ref) and its variants, among the following options:
 
-- [`ForwardOverForward`](@ref)
-- [`ForwardOverReverse`](@ref)
-- [`ReverseOverForward`](@ref)
-- [`ReverseOverReverse`](@ref)
+  - [`ForwardOverForward`](@ref)
+  - [`ForwardOverReverse`](@ref)
+  - [`ReverseOverForward`](@ref)
+  - [`ReverseOverReverse`](@ref)
 """
 hvp_mode(backend::AbstractADType) = hvp_mode(SecondOrder(backend, backend))
 
diff --git a/DifferentiationInterfaceTest/CHANGELOG.md b/DifferentiationInterfaceTest/CHANGELOG.md
index 569fbe0ab..d69873d99 100644
--- a/DifferentiationInterfaceTest/CHANGELOG.md
+++ b/DifferentiationInterfaceTest/CHANGELOG.md
@@ -5,37 +5,26 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
-## [Unreleased]
+## [Unreleased](https://github.com/JuliaDiff/DifferentiationInterface.jl/compare/DifferentiationInterfaceTest-v0.10.0...main)
 
 ### Fixed
 
-- Bump compat for SparseConnectivityTracer v1 ([#823])
+  - Bump compat for SparseConnectivityTracer v1 ([#823](https://github.com/JuliaDiff/DifferentiationInterface.jl/pull/823))
 
-## [0.10.0]
+## [0.10.0](https://github.com/JuliaDiff/DifferentiationInterface.jl/compare/DifferentiationInterfaceTest-v0.9.6...DifferentiationInterfaceTest-v0.10.0)
 
 ### Changed
 
-- Specify preparation arguments in DIT Scenario ([#786])
+  - Specify preparation arguments in DIT Scenario ([#786](https://github.com/JuliaDiff/DifferentiationInterface.jl/pull/786))
 
 ### Removed
 
-- Remove scenario lists from public API ([#796])
+  - Remove scenario lists from public API ([#796](https://github.com/JuliaDiff/DifferentiationInterface.jl/pull/796))
 
-## [0.9.6] - 2025-03-28
+## [0.9.6](https://github.com/JuliaDiff/DifferentiationInterface.jl/compare/DifferentiationInterfaceTest-v0.9.5...DifferentiationInterfaceTest-v0.9.6) - 2025-03-28
 
 ### Added
 
-- Add new ConstantOrCache context ([#749])
-- Support nested tuples of arrays as Caches ([#748])
-- Test type consistency between preparation and execution ([#745])
-
-[unreleased]: https://github.com/JuliaDiff/DifferentiationInterface.jl/compare/DifferentiationInterfaceTest-v0.10.0...main
-[0.10.0]: https://github.com/JuliaDiff/DifferentiationInterface.jl/compare/DifferentiationInterfaceTest-v0.9.6...DifferentiationInterfaceTest-v0.10.0
-[0.9.6]: https://github.com/JuliaDiff/DifferentiationInterface.jl/compare/DifferentiationInterfaceTest-v0.9.5...DifferentiationInterfaceTest-v0.9.6
-
-[#823]: https://github.com/JuliaDiff/DifferentiationInterface.jl/pull/823
-[#796]: https://github.com/JuliaDiff/DifferentiationInterface.jl/pull/796
-[#786]: https://github.com/JuliaDiff/DifferentiationInterface.jl/pull/786
-[#749]: https://github.com/JuliaDiff/DifferentiationInterface.jl/pull/749
-[#748]: https://github.com/JuliaDiff/DifferentiationInterface.jl/pull/748
-[#745]: https://github.com/JuliaDiff/DifferentiationInterface.jl/pull/745
+  - Add new ConstantOrCache context ([#749](https://github.com/JuliaDiff/DifferentiationInterface.jl/pull/749))
+  - Support nested tuples of arrays as Caches ([#748](https://github.com/JuliaDiff/DifferentiationInterface.jl/pull/748))
+  - Test type consistency between preparation and execution ([#745](https://github.com/JuliaDiff/DifferentiationInterface.jl/pull/745))
diff --git a/DifferentiationInterfaceTest/README.md b/DifferentiationInterfaceTest/README.md
index f3036cf32..376e6d93d 100644
--- a/DifferentiationInterfaceTest/README.md
+++ b/DifferentiationInterfaceTest/README.md
@@ -3,12 +3,12 @@
 [![Build Status](https://github.com/JuliaDiff/DifferentiationInterface.jl/actions/workflows/Test.yml/badge.svg?branch=main)](https://github.com/JuliaDiff/DifferentiationInterface.jl/actions/workflows/Test.yml?query=branch%3Amain)
 [![Coverage](https://codecov.io/gh/JuliaDiff/DifferentiationInterface.jl/branch/main/graph/badge.svg?flag=DIT)](https://app.codecov.io/gh/JuliaDiff/DifferentiationInterface.jl)
 [![Code Style: Blue](https://img.shields.io/badge/code%20style-blue-4495d1.svg)](https://github.com/JuliaDiff/BlueStyle)
-[![ColPrac: Contributor's Guide on Collaborative Practices for Community Packages](https://img.shields.io/badge/ColPrac-Contributor's%20Guide-blueviolet)](https://github.com/SciML/ColPrac)
+[![ColPrac: Contributor's Guide on Collaborative Practices for Community Packages](https://img.shields.io/badge/ColPrac-Contributor%27s%20Guide-blueviolet)](https://github.com/SciML/ColPrac)
 [![DOI](https://zenodo.org/badge/740973714.svg)](https://zenodo.org/doi/10.5281/zenodo.11092033)
 
-|           Package            |                                                                                                                                                    Docs                                                                                                                                                    |
-| :--------------------------: | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
-|   DifferentiationInterface   |   [![Stable](https://img.shields.io/badge/docs-stable-blue.svg)](https://juliadiff.org/DifferentiationInterface.jl/DifferentiationInterface/stable/)     [![Dev](https://img.shields.io/badge/docs-dev-blue.svg)](https://juliadiff.org/DifferentiationInterface.jl/DifferentiationInterface/dev/)   |
+| Package                      | Docs                                                                                                                                                                                                                                                                                                 |
+|:----------------------------:|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------:|
+| DifferentiationInterface     | [![Stable](https://img.shields.io/badge/docs-stable-blue.svg)](https://juliadiff.org/DifferentiationInterface.jl/DifferentiationInterface/stable/)     [![Dev](https://img.shields.io/badge/docs-dev-blue.svg)](https://juliadiff.org/DifferentiationInterface.jl/DifferentiationInterface/dev/)     |
 | DifferentiationInterfaceTest | [![Stable](https://img.shields.io/badge/docs-stable-blue.svg)](https://juliadiff.org/DifferentiationInterface.jl/DifferentiationInterfaceTest/stable/) [![Dev](https://img.shields.io/badge/docs-dev-blue.svg)](https://juliadiff.org/DifferentiationInterface.jl/DifferentiationInterfaceTest/dev/) |
 
 Testing and benchmarking utilities for automatic differentiation (AD) in Julia, based on [DifferentiationInterface](https://github.com/JuliaDiff/DifferentiationInterface.jl/tree/main/DifferentiationInterface).
@@ -17,16 +17,16 @@ Testing and benchmarking utilities for automatic differentiation (AD) in Julia,
 
 Make it easy to know, for a given function:
 
-- which AD backends can differentiate it
-- how fast they can do it
+  - which AD backends can differentiate it
+  - how fast they can do it
 
 ## Features
 
-- Definition of custom test scenarios
-- Correctness tests
-- Type stability tests
-- Count calls to the function
-- Benchmark runtime and allocations
+  - Definition of custom test scenarios
+  - Correctness tests
+  - Type stability tests
+  - Count calls to the function
+  - Benchmark runtime and allocations
 
 ## Installation
 
@@ -43,13 +43,13 @@ To install the development version, run this instead:
 ```julia
 using Pkg
 
-Pkg.add(
+Pkg.add(;
     url="https://github.com/JuliaDiff/DifferentiationInterface.jl",
-    subdir="DifferentiationInterface"
+    subdir="DifferentiationInterface",
 )
 
-Pkg.add(
+Pkg.add(;
     url="https://github.com/JuliaDiff/DifferentiationInterface.jl",
-    subdir="DifferentiationInterfaceTest"
+    subdir="DifferentiationInterfaceTest",
 )
 ```
diff --git a/DifferentiationInterfaceTest/docs/src/tutorial.md b/DifferentiationInterfaceTest/docs/src/tutorial.md
index 46907dbc4..57ca9e2d4 100644
--- a/DifferentiationInterfaceTest/docs/src/tutorial.md
+++ b/DifferentiationInterfaceTest/docs/src/tutorial.md
@@ -4,7 +4,8 @@ We present a typical workflow with DifferentiationInterfaceTest.jl, building on
 
 ```@repl tuto
 using DifferentiationInterface, DifferentiationInterfaceTest
-import ForwardDiff, Zygote
+using ForwardDiff: ForwardDiff
+using Zygote: Zygote
 ```
 
 ## Introduction
@@ -29,19 +30,19 @@ Of course we know the true gradient mapping:
 
 DifferentiationInterfaceTest.jl relies with so-called [`Scenario`](@ref)s, in which you encapsulate the information needed for your test:
 
-- the operator category (here `:gradient`)
-- the behavior of the operator (either `:in` or `:out` of place)
-- the function `f`
-- the input `x` of the function `f` (and possible tangents or contexts)
-- the reference first-order result `res1` (and possible second-order result `res2`) of the operator
-- the arguments `prep_args` passed during preparation
+  - the operator category (here `:gradient`)
+  - the behavior of the operator (either `:in` or `:out` of place)
+  - the function `f`
+  - the input `x` of the function `f` (and possible tangents or contexts)
+  - the reference first-order result `res1` (and possible second-order result `res2`) of the operator
+  - the arguments `prep_args` passed during preparation
 
 ```@example tuto
 xv = rand(Float32, 3)
 xm = rand(Float64, 3, 2)
 scenarios = [
     Scenario{:gradient,:out}(f, xv; res1=∇f(xv)),
-    Scenario{:gradient,:out}(f, xm; res1=∇f(xm))
+    Scenario{:gradient,:out}(f, xm; res1=∇f(xm)),
 ];
 nothing  # hide
 ```
@@ -54,7 +55,7 @@ It has many options, but the main ingredients are the following:
 ```@repl tuto
 test_differentiation(
     backends,  # the backends you want to compare
-    scenarios,  # the scenarios you defined,
+    scenarios;  # the scenarios you defined,
     correctness=true,  # compares values against the reference
     type_stability=:none,  # checks type stability with JET.jl
     detailed=true,  # prints a detailed test set
diff --git a/DifferentiationInterfaceTest/ext/DifferentiationInterfaceTestStaticArraysExt/DifferentiationInterfaceTestStaticArraysExt.jl b/DifferentiationInterfaceTest/ext/DifferentiationInterfaceTestStaticArraysExt/DifferentiationInterfaceTestStaticArraysExt.jl
index 9aac9fd44..af862f81b 100644
--- a/DifferentiationInterfaceTest/ext/DifferentiationInterfaceTestStaticArraysExt/DifferentiationInterfaceTestStaticArraysExt.jl
+++ b/DifferentiationInterfaceTest/ext/DifferentiationInterfaceTestStaticArraysExt/DifferentiationInterfaceTestStaticArraysExt.jl
@@ -8,11 +8,11 @@ using StaticArrays: StaticArray, MArray, MMatrix, MVector, SArray, SMatrix, SVec
 static_num_to_vec(x::Number) = sin.(SVector(1, 2) .* x)
 static_num_to_mat(x::Number) = hcat(static_num_to_vec(x), static_num_to_vec(3x))
 
-const NTV                         = typeof(DIT.num_to_vec)
-const NTM                         = typeof(DIT.num_to_mat)
-mystatic(f::Function)             = f
-mystatic(::NTV)                   = static_num_to_vec
-mystatic(::NTM)                   = static_num_to_mat
+const NTV = typeof(DIT.num_to_vec)
+const NTM = typeof(DIT.num_to_mat)
+mystatic(f::Function) = f
+mystatic(::NTV) = static_num_to_vec
+mystatic(::NTM) = static_num_to_mat
 mystatic(f::DIT.FunctionModifier) = f
 
 mystatic(x::Number) = x
diff --git a/DifferentiationInterfaceTest/src/scenarios/allocfree.jl b/DifferentiationInterfaceTest/src/scenarios/allocfree.jl
index 643ebde95..d6f089195 100644
--- a/DifferentiationInterfaceTest/src/scenarios/allocfree.jl
+++ b/DifferentiationInterfaceTest/src/scenarios/allocfree.jl
@@ -46,6 +46,7 @@ end
 Create a vector of [`Scenario`](@ref)s with functions that do not allocate.
 
 !!! warning
+
     At the moment, second-order scenarios are excluded.
 """
 function allocfree_scenarios()
diff --git a/DifferentiationInterfaceTest/src/scenarios/extensions.jl b/DifferentiationInterfaceTest/src/scenarios/extensions.jl
index 8d161e937..e4e6de3df 100644
--- a/DifferentiationInterfaceTest/src/scenarios/extensions.jl
+++ b/DifferentiationInterfaceTest/src/scenarios/extensions.jl
@@ -4,6 +4,7 @@
 Create a vector of [`Scenario`](@ref)s with static array types from [StaticArrays.jl](https://github.com/JuliaArrays/StaticArrays.jl).
 
 !!! warning
+
     This function requires StaticArrays.jl to be loaded (it is implemented in a package extension).
 """
 function static_scenarios end
@@ -14,6 +15,7 @@ function static_scenarios end
 Create a vector of [`Scenario`](@ref)s with component array types from [ComponentArrays.jl](https://github.com/jonniedie/ComponentArrays.jl).
 
 !!! warning
+
     This function requires ComponentArrays.jl to be loaded (it is implemented in a package extension).
 """
 function component_scenarios end
@@ -24,6 +26,7 @@ function component_scenarios end
 Create a vector of [`Scenario`](@ref)s with GPU array types from [JLArrays.jl](https://github.com/JuliaGPU/GPUArrays.jl/tree/master/lib/JLArrays).
 
 !!! warning
+
     This function requires JLArrays.jl to be loaded (it is implemented in a package extension).
 """
 function gpu_scenarios end
@@ -34,9 +37,11 @@ function gpu_scenarios end
 Create a vector of [`Scenario`](@ref)s with neural networks from [Flux.jl](https://github.com/FluxML/Flux.jl).
 
 !!! warning
+
     This function requires FiniteDifferences.jl and Flux.jl to be loaded (it is implemented in a package extension).
 
 !!! danger
+
     These scenarios are still experimental and not part of the public API.
     Their ground truth values are computed with finite differences, and thus subject to imprecision.
 """
@@ -55,9 +60,11 @@ function flux_isapprox end
 Create a vector of [`Scenario`](@ref)s with neural networks from [Lux.jl](https://github.com/LuxDL/Lux.jl).
 
 !!! warning
+
     This function requires ComponentArrays.jl, ForwardDiff.jl, Lux.jl and LuxTestUtils.jl to be loaded (it is implemented in a package extension).
 
 !!! danger
+
     These scenarios are still experimental and not part of the public API.
 """
 function lux_scenarios end
diff --git a/DifferentiationInterfaceTest/src/scenarios/scenario.jl b/DifferentiationInterfaceTest/src/scenarios/scenario.jl
index 03d844cd3..f092a9efd 100644
--- a/DifferentiationInterfaceTest/src/scenarios/scenario.jl
+++ b/DifferentiationInterfaceTest/src/scenarios/scenario.jl
@@ -7,9 +7,9 @@ This generic type should never be used directly: use the specific constructor co
 
 # Type parameters
 
-- `op`: one  of `:pushforward`, `:pullback`, `:derivative`, `:gradient`, `:jacobian`,`:second_derivative`, `:hvp`, `:hessian`
-- `pl_op`: either `:in` (for `op!(f, result, backend, x)`) or `:out` (for `result = op(f, backend, x)`)
-- `pl_fun`: either `:in` (for `f!(y, x)`) or `:out` (for `y = f(x)`)
+  - `op`: one  of `:pushforward`, `:pullback`, `:derivative`, `:gradient`, `:jacobian`,`:second_derivative`, `:hvp`, `:hessian`
+  - `pl_op`: either `:in` (for `op!(f, result, backend, x)`) or `:out` (for `result = op(f, backend, x)`)
+  - `pl_fun`: either `:in` (for `f!(y, x)`) or `:out` (for `y = f(x)`)
 
 # Constructors
 
@@ -25,9 +25,9 @@ This generic type should never be used directly: use the specific constructor co
 
 Default values:
 
-- `prep_args = ` the result of `zero` applied to each execution argument
-- `res1 = res2 = nothing`
-- `name = nothing`
+  - `prep_args = ` the result of `zero` applied to each execution argument
+  - `res1 = res2 = nothing`
+  - `name = nothing`
 
 # Fields
 
diff --git a/DifferentiationInterfaceTest/src/test_differentiation.jl b/DifferentiationInterfaceTest/src/test_differentiation.jl
index f333150a8..3db0a5aab 100644
--- a/DifferentiationInterfaceTest/src/test_differentiation.jl
+++ b/DifferentiationInterfaceTest/src/test_differentiation.jl
@@ -7,64 +7,64 @@ Apply a list of `backends` on a list of `scenarios`, running a variety of differ
 
 This function always creates and runs a `@testset`, though its contents may vary.
 
-- if `benchmark == :none`, it returns `nothing`.
-- if `benchmark != :none`, it returns a `DataFrame` of benchmark results, whose columns correspond to the fields of [`DifferentiationBenchmarkDataRow`](@ref).
+  - if `benchmark == :none`, it returns `nothing`.
+  - if `benchmark != :none`, it returns a `DataFrame` of benchmark results, whose columns correspond to the fields of [`DifferentiationBenchmarkDataRow`](@ref).
 
 # Positional arguments
 
-- `backends::Vector{<:AbstractADType}`: the backends to test
-- `scenarios::Vector{<:Scenario}`: the scenarios on which to test these backends. Defaults to a standard set of first- and second-order scenarios, whose contents are not part of the public API and may change without notice.
+  - `backends::Vector{<:AbstractADType}`: the backends to test
+  - `scenarios::Vector{<:Scenario}`: the scenarios on which to test these backends. Defaults to a standard set of first- and second-order scenarios, whose contents are not part of the public API and may change without notice.
 
 # Keyword arguments
 
-- `testset_name=nothing`: how to display the test set
+  - `testset_name=nothing`: how to display the test set
 
 **Test categories:**
 
-- `correctness=true`: whether to compare the differentiation results with the theoretical values specified in each scenario
-- `type_stability=:none`: whether (and how) to check type stability of operators with JET.jl.
-- `allocations=:none`: whether (and how) to check allocations inside operators with AllocCheck.jl
-- `benchmark=:none`: whether (and how) to benchmark operators with Chairmarks.jl
+  - `correctness=true`: whether to compare the differentiation results with the theoretical values specified in each scenario
+  - `type_stability=:none`: whether (and how) to check type stability of operators with JET.jl.
+  - `allocations=:none`: whether (and how) to check allocations inside operators with AllocCheck.jl
+  - `benchmark=:none`: whether (and how) to benchmark operators with Chairmarks.jl
 
 For `type_stability`, `allocations` and `benchmark`, the possible values are `:none`, `:prepared` or `:full`.
 Each setting tests/benchmarks a different subset of calls:
 
-| kwarg | prepared operator | unprepared operator | preparation |
-|---|---|---|---|
-| `:none` | no | no | no |
-| `:prepared` | yes | no | no |
-| `:full` | yes | yes | yes |
+| kwarg       | prepared operator | unprepared operator | preparation |
+|:----------- |:----------------- |:------------------- |:----------- |
+| `:none`     | no                | no                  | no          |
+| `:prepared` | yes               | no                  | no          |
+| `:full`     | yes               | yes                 | yes         |
 
 **Misc options:**
 
-- `excluded::Vector{Symbol}`: list of operators to exclude, such as [`FIRST_ORDER`](@ref) or [`SECOND_ORDER`](@ref)
-- `detailed=false`: whether to create a detailed or condensed testset
-- `logging=false`: whether to log progress
+  - `excluded::Vector{Symbol}`: list of operators to exclude, such as [`FIRST_ORDER`](@ref) or [`SECOND_ORDER`](@ref)
+  - `detailed=false`: whether to create a detailed or condensed testset
+  - `logging=false`: whether to log progress
 
 **Correctness options:**
 
-- `isapprox=isapprox`: function used to compare objects approximately, with the standard signature `isapprox(x, y; atol, rtol)`
-- `atol=0`: absolute precision for correctness testing (when comparing to the reference outputs)
-- `rtol=1e-3`: relative precision for correctness testing (when comparing to the reference outputs)
-- `scenario_intact=true`: whether to check that the scenario remains unchanged after the operators are applied
-- `sparsity=false`: whether to check sparsity patterns for Jacobians / Hessians
-- `reprepare::Bool=true`: whether to modify preparation before testing when the preparation arguments have the wrong size
+  - `isapprox=isapprox`: function used to compare objects approximately, with the standard signature `isapprox(x, y; atol, rtol)`
+  - `atol=0`: absolute precision for correctness testing (when comparing to the reference outputs)
+  - `rtol=1e-3`: relative precision for correctness testing (when comparing to the reference outputs)
+  - `scenario_intact=true`: whether to check that the scenario remains unchanged after the operators are applied
+  - `sparsity=false`: whether to check sparsity patterns for Jacobians / Hessians
+  - `reprepare::Bool=true`: whether to modify preparation before testing when the preparation arguments have the wrong size
 
 **Type stability options:**
 
-- `ignored_modules=nothing`: list of modules that JET.jl should ignore
-- `function_filter`: filter for functions that JET.jl should ignore (with a reasonable default)
+  - `ignored_modules=nothing`: list of modules that JET.jl should ignore
+  - `function_filter`: filter for functions that JET.jl should ignore (with a reasonable default)
 
 **Benchmark options:**
 
-- `count_calls=true`: whether to also count function calls during benchmarking
-- `benchmark_test=true`: whether to include tests which succeed iff benchmark doesn't error
-- `benchmark_seconds=1`: how long to run each benchmark for
-- `benchmark_aggregation=minimum`: function used to aggregate sample measurements
+  - `count_calls=true`: whether to also count function calls during benchmarking
+  - `benchmark_test=true`: whether to include tests which succeed iff benchmark doesn't error
+  - `benchmark_seconds=1`: how long to run each benchmark for
+  - `benchmark_aggregation=minimum`: function used to aggregate sample measurements
 
 **Batch size options**
 
-- `adaptive_batchsize=true`: whether to cap the backend's preset batch size (when it exists) to prevent errors on small inputs
+  - `adaptive_batchsize=true`: whether to cap the backend's preset batch size (when it exists) to prevent errors on small inputs
 """
 function test_differentiation(
     backends::Vector{<:AbstractADType},
diff --git a/DifferentiationInterfaceTest/src/tests/benchmark.jl b/DifferentiationInterfaceTest/src/tests/benchmark.jl
index 05963afd1..fca1ce77a 100644
--- a/DifferentiationInterfaceTest/src/tests/benchmark.jl
+++ b/DifferentiationInterfaceTest/src/tests/benchmark.jl
@@ -50,7 +50,7 @@ end
 
 Ad-hoc storage type for differentiation benchmarking results.
 
-#  Fields
+# Fields
 
 $(TYPEDFIELDS)