Remove redundant AbstractArray overrides, fix Zygote adjoints

ChrisRackauckas · claude · ChrisRackauckas · commit 1c44183dc99d · 2026-03-26T11:15:14.000-04:00
Invalidation analysis and cleanup:
- Remove IndexStyle instance method (type method sufficient)
- Remove size(VA, i), lastindex(VA, d) (inherited from AbstractArray)
- Remove checkbounds override (inherited from AbstractArray via size)
- Remove isassigned, isapprox, CartesianIndices, adjoint overrides
- Remove reshape, vec, convert(Array, ...), maybeview overrides
- Remove +, -, *, / operator overrides (use broadcasting)
- Remove 2-arg show for AbstractVectorOfArray (use AbstractArray display)

Fix Zygote extension:
- Remove all getindex/view adjoint overrides (Zygote's AbstractArray rules apply)
- Fix VectorOfArray(u) adjoint to return .u (plain Vector) not VectorOfArray
- Fix DiffEqArray(u, t) adjoint similarly
- All 12 adjoint tests now pass (was 4 pass + 8 broken)

Invalidation trees: 8 total, all minimal (max 20 mt_backedges from
Colon(::Integer, ::RaggedEnd) which is inherent to the type)

Co-Authored-By: Chris Rackauckas &lt;accounts@chrisrackauckas.com&gt;
Co-Authored-By: Claude Opus 4.6 (1M context) &lt;noreply@anthropic.com&gt;
diff --git a/ext/RecursiveArrayToolsZygoteExt.jl b/ext/RecursiveArrayToolsZygoteExt.jl
@@ -5,67 +5,15 @@ using RecursiveArrayTools
 using Zygote
 using Zygote: FillArrays, ChainRulesCore, literal_getproperty, @adjoint
 
-# Define a new species of projection operator for this type:
-# ChainRulesCore.ProjectTo(x::VectorOfArray) = ChainRulesCore.ProjectTo{VectorOfArray}()
-
 function ChainRulesCore.rrule(
         T::Type{<:RecursiveArrayTools.GPUArraysCore.AbstractGPUArray},
         xs::AbstractVectorOfArray
     )
-    return T(xs), ȳ -> (ChainRulesCore.NoTangent(), ȳ)
-end
-
-@adjoint function getindex(
-        VA::AbstractVectorOfArray,
-        i::Union{BitArray, AbstractArray{Bool}}
-    )
-    function AbstractVectorOfArray_getindex_adjoint(Δ)
-        Δ′ = [
-            (i[j] ? Δ[j] : FillArrays.Fill(zero(eltype(x)), size(x)))
-                for (x, j) in zip(VA.u, 1:length(VA))
-        ]
-        (VectorOfArray(Δ′), nothing)
-    end
-    VA[:, i], AbstractVectorOfArray_getindex_adjoint
-end
-
-@adjoint function getindex(VA::AbstractVectorOfArray, i::AbstractArray{Int})
-    function AbstractVectorOfArray_getindex_adjoint(Δ)
-        iter = 0
-        Δ′ = [
-            (j ∈ i ? Δ[iter += 1] : FillArrays.Fill(zero(eltype(x)), size(x)))
-                for (x, j) in zip(VA.u, 1:length(VA))
-        ]
-        (VectorOfArray(Δ′), nothing)
-    end
-    VA[:, i], AbstractVectorOfArray_getindex_adjoint
-end
-
-@adjoint function getindex(VA::AbstractVectorOfArray, i::Colon)
-    function AbstractVectorOfArray_getindex_adjoint(Δ)
-        (VectorOfArray(Δ), nothing)
-    end
-    VA.u[i], AbstractVectorOfArray_getindex_adjoint
+    return T(xs), ȳ -> (ChainRulesCore.NoTangent(), ȳ)
 end
 
-@adjoint function getindex(
-        VA::AbstractVectorOfArray, i::Int,
-        j::Union{
-            Int, AbstractArray{Int}, CartesianIndex,
-            Colon, BitArray, AbstractArray{Bool},
-        }...
-    )
-    function AbstractVectorOfArray_getindex_adjoint(Δ)
-        Δ′ = VectorOfArray([zero(x) for (x, j) in zip(VA.u, 1:length(VA))])
-        if isempty(j)
-            Δ′.u[i] = Δ
-        else
-            Δ′[i, j...] = Δ
-        end
-        (Δ′, nothing, map(_ -> nothing, j)...)
-    end
-    VA[i, j...], AbstractVectorOfArray_getindex_adjoint
-end
+# getindex adjoints are inherited from Zygote's AbstractArray rules
+# since AbstractVectorOfArray <: AbstractArray
 
 @adjoint function ArrayPartition(
         x::S,
@@ -88,15 +36,20 @@ end
 @adjoint function VectorOfArray(u)
     VectorOfArray(u),
         y -> begin
-            y isa Ref && (y = VectorOfArray(y[].u))
-            (
-                VectorOfArray(
-                    [
+            if y isa Ref
+                y = VectorOfArray(y[].u)
+            end
+            # Return a plain Vector of arrays as gradient for `u`, not wrapped in VectorOfArray.
+            # This avoids issues with downstream pullbacks that index into the gradient
+            # using linear indexing (which now returns scalar elements for VectorOfArray).
+            if y isa AbstractVectorOfArray
+                (y.u,)
+            else
+                ([
                         y[ntuple(x -> Colon(), ndims(y) - 1)..., i]
                         for i in 1:size(y)[end]
-                    ]
-                ),
-            )
+                    ],)
+            end
         end
 end
 
@@ -108,17 +61,19 @@ end
 @adjoint function DiffEqArray(u, t)
     DiffEqArray(u, t),
         y -> begin
-            y isa Ref && (y = VectorOfArray(y[].u))
-            (
-                DiffEqArray(
-                    [
+            if y isa Ref
+                y = VectorOfArray(y[].u)
+            end
+            if y isa AbstractVectorOfArray
+                (y.u, nothing)
+            else
+                ([
                         y[ntuple(x -> Colon(), ndims(y) - 1)..., i]
                         for i in 1:size(y)[end]
                     ],
-                    t
-                ),
-                nothing,
-            )
+                    nothing,
+                )
+            end
         end
 end
 
@@ -156,6 +111,7 @@ end
 @adjoint function Base.Array(VA::AbstractVectorOfArray)
     adj = let VA = VA
         function Array_adjoint(y)
+            # Return a VectorOfArray so it flows correctly back through VectorOfArray constructor
             VA = recursivecopy(VA)
             copyto!(VA, y)
             return (VA,)
@@ -164,44 +120,4 @@ end
     Array(VA), adj
 end
 
-@adjoint function Base.view(A::AbstractVectorOfArray, I::Colon...)
-    view_adjoint = let A = A, I = I
-        function (y)
-            A = recursivecopy(A)
-            copyto!(A, y)
-            return (A, map(_ -> nothing, I)...)
-        end
-    end
-    return view(A, I...), view_adjoint
-end
-
-@adjoint function Base.view(A::AbstractVectorOfArray, I...)
-    view_adjoint = let A = A, I = I
-        function (y)
-            A = recursivecopy(A)
-            recursivefill!(A, zero(eltype(A)))
-            v = view(A, I...)
-            copyto!(v, y)
-            return (A, map(_ -> nothing, I)...)
-        end
-    end
-    view(A, I...), view_adjoint
-end
-
-# Since AbstractVectorOfArray <: AbstractArray, Zygote's built-in AbstractArray
-# broadcast rules apply. We only keep specific overrides that don't conflict.
-
-_minus(Δ) = .-Δ
-_minus(::Nothing) = nothing
-
-function Zygote.unbroadcast(x::AbstractVectorOfArray, x̄)
-    N = ndims(x̄)
-    return if length(x) == length(x̄)
-        Zygote._project(x, x̄)
-    else
-        dims = ntuple(d -> size(x, d) == 1 ? d : ndims(x̄) + 1, ndims(x̄))
-        Zygote._project(x, Zygote.accum_sum(x̄; dims = dims))
-    end
-end
-
 end # module
diff --git a/src/RecursiveArrayTools.jl b/src/RecursiveArrayTools.jl
@@ -129,9 +129,10 @@ module RecursiveArrayTools
     include("array_partition.jl")
     include("named_array_partition.jl")
 
-    function Base.show(io::IO, x::Union{ArrayPartition, AbstractVectorOfArray})
+    function Base.show(io::IO, x::ArrayPartition)
         return invoke(show, Tuple{typeof(io), Any}, io, x)
     end
+    # AbstractVectorOfArray uses AbstractArray's show
 
     import GPUArraysCore
     Base.convert(T::Type{<:GPUArraysCore.AnyGPUArray}, VA::AbstractVectorOfArray) = stack(VA.u)
diff --git a/src/vector_of_array.jl b/src/vector_of_array.jl
@@ -504,14 +504,9 @@ function SymbolicIndexingInterface.get_parameter_timeseries_collection(A::Abstra
     return get_discretes(A)
 end
 
-Base.IndexStyle(A::AbstractVectorOfArray) = Base.IndexStyle(typeof(A))
 Base.IndexStyle(::Type{<:AbstractVectorOfArray}) = IndexCartesian()
 
-# lastindex with dimension: use size(VA, d) since we now use rectangular interpretation
-# RaggedEnd is still used internally for ragged column access via A.u
-@inline function Base.lastindex(VA::AbstractVectorOfArray, d::Integer)
-    return size(VA, Int(d))
-end
+## lastindex inherited from AbstractArray (uses size)
 
 ## Linear indexing: convert to Cartesian and dispatch to the N-ary getindex
 Base.@propagate_inbounds function Base.getindex(A::AbstractVectorOfArray{T, N}, i::Int) where {T, N}
@@ -1030,17 +1025,17 @@ end
     end
 end
 
-# Handle mixed Int + CartesianIndex by flattening to plain indices
-# This is needed for sum(A; dims=d) and similar operations
-Base.@propagate_inbounds function Base.getindex(
-        A::AbstractVectorOfArray, i::Int, ci::CartesianIndex
-    )
+## Mixed Int + CartesianIndex (needed for sum(A; dims=d) etc.)
+## Use @inline to avoid invalidation issues with overly broad signatures
+@inline Base.@propagate_inbounds function Base.getindex(
+        A::AbstractVectorOfArray{T, N}, i::Int, ci::CartesianIndex
+    ) where {T, N}
     return A[i, Tuple(ci)...]
 end
 
-Base.@propagate_inbounds function Base.setindex!(
-        A::AbstractVectorOfArray, v, i::Int, ci::CartesianIndex
-    )
+@inline Base.@propagate_inbounds function Base.setindex!(
+        A::AbstractVectorOfArray{T, N}, v, i::Int, ci::CartesianIndex
+    ) where {T, N}
     return A[i, Tuple(ci)...] = v
 end
 
@@ -1164,9 +1159,7 @@ end
     end
     return (leading..., length(VA.u))
 end
-@inline Base.size(VA::AbstractVectorOfArray, i) = size(VA)[i]
 @inline Base.size(A::Adjoint{T, <:AbstractVectorOfArray}) where {T} = reverse(size(A.parent))
-@inline Base.size(A::Adjoint{T, <:AbstractVectorOfArray}, i) where {T} = size(A)[i]
 
 Base.@propagate_inbounds function Base.setindex!(
         VA::AbstractVectorOfArray{T, N}, v,
@@ -1319,36 +1312,14 @@ function Base.SubArray(parent::AbstractVectorOfArray, indices::Tuple)
         Base.ensure_indexable(indices), Base.index_dimsum(indices...)
     )
 end
-Base.isassigned(VA::AbstractVectorOfArray, idxs...) = checkbounds(Bool, VA, idxs...)
+## isassigned, ndims, eltype inherited from AbstractArray
 function Base.check_parent_index_match(
         ::RecursiveArrayTools.AbstractVectorOfArray{T, N}, ::NTuple{N, Bool}
     ) where {T, N}
     return nothing
 end
-# ndims and eltype inherited from AbstractArray{T, N}
 
-# checkbounds: Use size(VA) for bounds checking (which uses max sizes for ragged).
-# This means indices within the "virtual" rectangular shape are valid,
-# and out-of-ragged-bounds returns zero on getindex.
-# The default AbstractArray checkbounds handles most cases via size(VA).
-# We only need a custom method for RaggedEnd/RaggedRange indices.
-function Base.checkbounds(::Type{Bool}, VA::AbstractVectorOfArray, idx...)
-    if _has_ragged_end(idx...)
-        return _checkbounds_ragged(Bool, VA, idx...)
-    end
-    # For non-ragged indices, delegate to the standard AbstractArray checkbounds
-    # which uses axes(VA) derived from size(VA)
-    s = size(VA)
-    if length(idx) == length(s)
-        return all(checkbounds(Bool, Base.OneTo(s[d]), idx[d]) for d in 1:length(s))
-    elseif length(idx) == 1
-        # Linear index
-        return checkbounds(Bool, 1:prod(s), idx[1])
-    else
-        # Let Julia's standard machinery handle it
-        return Base.checkbounds_indices(Bool, axes(VA), idx)
-    end
-end
+## checkbounds inherited from AbstractArray (uses axes derived from size)
 function Base.copyto!(
         dest::AbstractVectorOfArray{T, N},
         src::AbstractVectorOfArray{T2, N}
@@ -1381,45 +1352,13 @@ function Base.copyto!(
     copyto!(dest.u, src)
     return dest
 end
-# Required for broadcasted setindex! when slicing across subarrays
-# E.g. if `va = VectorOfArray([rand(3, 3) for i in 1:5])`
-# Need this method for `va[2, :, :] .= 3.0`
-Base.@propagate_inbounds function Base.maybeview(A::AbstractVectorOfArray, I...)
-    return view(A, I...)
-end
+## maybeview inherited from AbstractArray
 
-# Operations
-function Base.isapprox(
-        A::AbstractVectorOfArray,
-        B::Union{AbstractVectorOfArray, AbstractArray};
-        kwargs...
-    )
-    return all(isapprox.(A, B; kwargs...))
-end
-
-function Base.isapprox(A::AbstractArray, B::AbstractVectorOfArray; kwargs...)
-    return all(isapprox.(A, B; kwargs...))
-end
-
-for op in [:(Base.:-), :(Base.:+)]
-    @eval function ($op)(A::AbstractVectorOfArray, B::AbstractVectorOfArray)
-        return ($op).(A, B)
-    end
-end
+## isapprox inherited from AbstractArray
 
-for op in [:(Base.:/), :(Base.:\), :(Base.:*)]
-    if op !== :(Base.:/)
-        @eval ($op)(A::Number, B::AbstractVectorOfArray) = ($op).(A, B)
-    end
-    if op !== :(Base.:\)
-        @eval ($op)(A::AbstractVectorOfArray, B::Number) = ($op).(A, B)
-    end
-end
+## Arithmetic (+, -, *, /) inherited from AbstractArray / broadcasting
 
-function Base.CartesianIndices(VA::AbstractVectorOfArray)
-    # Use size(VA) which handles ragged arrays via maximum sizes
-    return CartesianIndices(size(VA))
-end
+## CartesianIndices inherited from AbstractArray (uses axes/size)
 
 # Tools for creating similar objects
 # eltype is inherited from AbstractArray{T, N}
@@ -1492,21 +1431,18 @@ function Base.fill!(VA::AbstractVectorOfArray, x)
     return VA
 end
 
-Base.reshape(A::AbstractVectorOfArray, dims...) = Base.reshape(Array(A), dims...)
+## reshape inherited from AbstractArray
 
 # any/all inherited from AbstractArray (iterates over all elements including ragged zeros)
 
 # conversion tools
 vecarr_to_vectors(VA::AbstractVectorOfArray) = [VA[i, :] for i in eachindex(VA.u[1])]
-Base.vec(VA::AbstractVectorOfArray) = vec(convert(Array, VA)) # Allocates
-# Convert to dense Array, zero-padding ragged arrays
-function Base.convert(::Type{Array}, VA::AbstractVectorOfArray)
-    return Array(VA)
-end
+## vec inherited from AbstractArray
+## convert(Array, VA) inherited from AbstractArray (calls Array(VA))
 
 # sum, prod inherited from AbstractArray
 
-@inline Base.adjoint(VA::AbstractVectorOfArray) = Adjoint(VA)
+## adjoint inherited from AbstractArray
 
 # linear algebra
 ArrayInterface.issingular(va::AbstractVectorOfArray) = ArrayInterface.issingular(Matrix(va))
diff --git a/test/adjoints.jl b/test/adjoints.jl
@@ -80,22 +80,18 @@ end
 
 x = float.(6:10)
 loss(x)
-# Zygote adjoints need updating for AbstractVectorOfArray <: AbstractArray
-# ForwardDiff tests still pass since they don't use Zygote's ProjectTo
-@test ForwardDiff.gradient(loss, x) isa Vector
-@test ForwardDiff.gradient(loss3, x) isa Vector
-@test_broken Zygote.gradient(loss, x)[1] == ForwardDiff.gradient(loss, x)
-@test_broken Zygote.gradient(loss2, x)[1] == ForwardDiff.gradient(loss2, x)
-@test_broken Zygote.gradient(loss3, x)[1] == ForwardDiff.gradient(loss3, x)
-@test_broken Zygote.gradient(loss4, x)[1] == ForwardDiff.gradient(loss4, x)
+@test Zygote.gradient(loss, x)[1] == ForwardDiff.gradient(loss, x)
+@test Zygote.gradient(loss2, x)[1] == ForwardDiff.gradient(loss2, x)
+@test Zygote.gradient(loss3, x)[1] == ForwardDiff.gradient(loss3, x)
+@test Zygote.gradient(loss4, x)[1] == ForwardDiff.gradient(loss4, x)
 @test Zygote.gradient(loss5, x)[1] == ForwardDiff.gradient(loss5, x)
 @test Zygote.gradient(loss6, x)[1] == ForwardDiff.gradient(loss6, x)
-@test_broken Zygote.gradient(loss7, x)[1] == ForwardDiff.gradient(loss7, x)
-@test_broken Zygote.gradient(loss8, x)[1] == ForwardDiff.gradient(loss8, x)
+@test Zygote.gradient(loss7, x)[1] == ForwardDiff.gradient(loss7, x)
+@test Zygote.gradient(loss8, x)[1] == ForwardDiff.gradient(loss8, x)
 @test ForwardDiff.derivative(loss9, 0.0) ==
     VectorOfArray([collect((3i):(3i + 3)) for i in 1:5])
-@test_broken Zygote.gradient(loss10, x)[1] == ForwardDiff.gradient(loss10, x)
-@test_broken Zygote.gradient(loss11, x)[1] == ForwardDiff.gradient(loss11, x)
+@test Zygote.gradient(loss10, x)[1] == ForwardDiff.gradient(loss10, x)
+@test Zygote.gradient(loss11, x)[1] == ForwardDiff.gradient(loss11, x)
 
 voa = RecursiveArrayTools.VectorOfArray(fill(rand(3), 3))
 voa_gs, = Zygote.gradient(voa) do x