@@ -71,10 +71,10 @@ public static (bool Difference, Rectangle Bounds) DeDuplicatePixels<TPixel>(
7171 PixelOperations < TPixel > . Instance . ToRgba32 ( configuration , nextFrame . DangerousGetPixelRowMemory ( y ) . Span , next ) ;
7272 }
7373
74- ref Vector256 < byte > previousBase = ref Unsafe . As < Rgba32 , Vector256 < byte > > ( ref MemoryMarshal . GetReference ( previous ) ) ;
75- ref Vector256 < byte > currentBase = ref Unsafe . As < Rgba32 , Vector256 < byte > > ( ref MemoryMarshal . GetReference ( current ) ) ;
76- ref Vector256 < byte > nextBase = ref Unsafe . As < Rgba32 , Vector256 < byte > > ( ref MemoryMarshal . GetReference ( next ) ) ;
77- ref Vector256 < byte > resultBase = ref Unsafe . As < Rgba32 , Vector256 < byte > > ( ref MemoryMarshal . GetReference ( result ) ) ;
74+ ref Vector256 < byte > previousBase256 = ref Unsafe . As < Rgba32 , Vector256 < byte > > ( ref MemoryMarshal . GetReference ( previous ) ) ;
75+ ref Vector256 < byte > currentBase256 = ref Unsafe . As < Rgba32 , Vector256 < byte > > ( ref MemoryMarshal . GetReference ( current ) ) ;
76+ ref Vector256 < byte > nextBase256 = ref Unsafe . As < Rgba32 , Vector256 < byte > > ( ref MemoryMarshal . GetReference ( next ) ) ;
77+ ref Vector256 < byte > resultBase256 = ref Unsafe . As < Rgba32 , Vector256 < byte > > ( ref MemoryMarshal . GetReference ( result ) ) ;
7878
7979 int i = 0 ;
8080 uint x = 0 ;
@@ -93,19 +93,19 @@ public static (bool Difference, Rectangle Bounds) DeDuplicatePixels<TPixel>(
9393
9494 while ( remaining >= 8 )
9595 {
96- Vector256 < uint > p = Unsafe . Add ( ref previousBase , x ) . AsUInt32 ( ) ;
97- Vector256 < uint > c = Unsafe . Add ( ref currentBase , x ) . AsUInt32 ( ) ;
96+ Vector256 < uint > p = Unsafe . Add ( ref previousBase256 , x ) . AsUInt32 ( ) ;
97+ Vector256 < uint > c = Unsafe . Add ( ref currentBase256 , x ) . AsUInt32 ( ) ;
9898
9999 Vector256 < uint > eq = Avx2 . CompareEqual ( p , c ) ;
100100 Vector256 < uint > r = Avx2 . BlendVariable ( c , r256 , Avx2 . And ( eq , vmb256 ) ) ;
101101
102102 if ( nextFrame != null )
103103 {
104- Vector256 < int > n = Avx2 . ShiftRightLogical ( Unsafe . Add ( ref nextBase , x ) . AsUInt32 ( ) , 24 ) . AsInt32 ( ) ;
104+ Vector256 < int > n = Avx2 . ShiftRightLogical ( Unsafe . Add ( ref nextBase256 , x ) . AsUInt32 ( ) , 24 ) . AsInt32 ( ) ;
105105 eq = Avx2 . AndNot ( Avx2 . CompareGreaterThan ( Avx2 . ShiftRightLogical ( c , 24 ) . AsInt32 ( ) , n ) . AsUInt32 ( ) , eq ) ;
106106 }
107107
108- Unsafe . Add ( ref resultBase , x ) = r. AsByte ( ) ;
108+ Unsafe . Add ( ref resultBase256 , x ) = r. AsByte ( ) ;
109109
110110 uint msk = ( uint ) Avx2 . MoveMask ( eq . AsByte ( ) ) ;
111111 msk = ~ msk ;
@@ -128,9 +128,10 @@ public static (bool Difference, Rectangle Bounds) DeDuplicatePixels<TPixel>(
128128 }
129129 }
130130
131- // TODO: There's a bug here. See WebpEncoderTests.Encode_AnimatedLossless
132- if ( Sse2 . IsSupported && remaining >= 4 && false )
131+ if ( Sse2 . IsSupported && remaining >= 4 )
133132 {
133+ // Update offset since we may be operating on the remainder previously incremented by pixel steps of 8.
134+ x *= 2 ;
134135 Vector128 < uint > r128 = previousFrame != null ? Vector128 . Create ( bg . PackedValue ) : Vector128 < uint > . Zero ;
135136 Vector128 < uint > vmb128 = Vector128 < uint > . Zero ;
136137 if ( blend )
@@ -140,19 +141,19 @@ public static (bool Difference, Rectangle Bounds) DeDuplicatePixels<TPixel>(
140141
141142 while ( remaining >= 4 )
142143 {
143- Vector128 < uint > p = Unsafe . Add ( ref Unsafe . As < Vector256 < byte > , Vector128 < uint > > ( ref previousBase ) , x ) ;
144- Vector128 < uint > c = Unsafe . Add ( ref Unsafe . As < Vector256 < byte > , Vector128 < uint > > ( ref currentBase ) , x ) ;
144+ Vector128 < uint > p = Unsafe . Add ( ref Unsafe . As < Vector256 < byte > , Vector128 < uint > > ( ref previousBase256 ) , x ) ;
145+ Vector128 < uint > c = Unsafe . Add ( ref Unsafe . As < Vector256 < byte > , Vector128 < uint > > ( ref currentBase256 ) , x ) ;
145146
146147 Vector128 < uint > eq = Sse2 . CompareEqual ( p , c ) ;
147148 Vector128 < uint > r = SimdUtils . HwIntrinsics . BlendVariable ( c , r128 , Sse2 . And ( eq , vmb128 ) ) ;
148149
149150 if ( nextFrame != null )
150151 {
151- Vector128 < int > n = Sse2 . ShiftRightLogical ( Unsafe . Add ( ref Unsafe . As < Vector256 < byte > , Vector128 < uint > > ( ref nextBase ) , x ) , 24 ) . AsInt32 ( ) ;
152+ Vector128 < int > n = Sse2 . ShiftRightLogical ( Unsafe . Add ( ref Unsafe . As < Vector256 < byte > , Vector128 < uint > > ( ref nextBase256 ) , x ) , 24 ) . AsInt32 ( ) ;
152153 eq = Sse2 . AndNot ( Sse2 . CompareGreaterThan ( Sse2 . ShiftRightLogical ( c , 24 ) . AsInt32 ( ) , n ) . AsUInt32 ( ) , eq ) ;
153154 }
154155
155- Unsafe . Add ( ref Unsafe . As < Vector256 < byte > , Vector128 < uint > > ( ref resultBase ) , x ) = r;
156+ Unsafe . Add ( ref Unsafe . As < Vector256 < byte > , Vector128 < uint > > ( ref resultBase256 ) , x ) = r;
156157
157158 ushort msk = ( ushort ) ( uint ) Sse2 . MoveMask ( eq . AsByte ( ) ) ;
158159 msk = ( ushort ) ~ msk ;
0 commit comments