66using System . Runtime . CompilerServices ;
77using System . Runtime . InteropServices ;
88using System . Runtime . Intrinsics ;
9+ using System . Runtime . Intrinsics . Arm ;
910using System . Runtime . Intrinsics . X86 ;
11+ using SixLabors . ImageSharp . Common . Helpers ;
1012using SixLabors . ImageSharp . Formats . Webp . BitReader ;
1113using SixLabors . ImageSharp . Formats . Webp . Lossless ;
1214using SixLabors . ImageSharp . Memory ;
@@ -311,8 +313,7 @@ private static void ColorIndexInverseTransformAlpha(
311313
312314 private static void HorizontalUnfilter ( Span < byte > prev , Span < byte > input , Span < byte > dst , int width )
313315 {
314- // TODO: Investigate AdvSimd support for this method.
315- if ( Sse2 . IsSupported && width >= 9 )
316+ if ( ( Sse2 . IsSupported || AdvSimd . IsSupported ) && width >= 9 )
316317 {
317318 dst [ 0 ] = ( byte ) ( input [ 0 ] + ( prev . IsEmpty ? 0 : prev [ 0 ] ) ) ;
318319 nuint i ;
@@ -323,17 +324,17 @@ private static void HorizontalUnfilter(Span<byte> prev, Span<byte> input, Span<b
323324 for ( i = 1 ; i <= ( uint ) width - 8 ; i += 8 )
324325 {
325326 Vector128 < long > a0 = Vector128 . Create ( Unsafe . As < byte , long > ( ref Unsafe . Add ( ref srcRef , i ) ) , 0 ) ;
326- Vector128 < byte > a1 = Sse2 . Add ( a0 . AsByte ( ) , last . AsByte ( ) ) ;
327- Vector128 < byte > a2 = Sse2 . ShiftLeftLogical128BitLane ( a1 , 1 ) ;
328- Vector128 < byte > a3 = Sse2 . Add ( a1 , a2 ) ;
329- Vector128 < byte > a4 = Sse2 . ShiftLeftLogical128BitLane ( a3 , 2 ) ;
330- Vector128 < byte > a5 = Sse2 . Add ( a3 , a4 ) ;
331- Vector128 < byte > a6 = Sse2 . ShiftLeftLogical128BitLane ( a5 , 4 ) ;
332- Vector128 < byte > a7 = Sse2 . Add ( a5 , a6 ) ;
327+ Vector128 < byte > a1 = a0 . AsByte ( ) + last . AsByte ( ) ;
328+ Vector128 < byte > a2 = Vector128Utilities . ShiftLeftBytesInVector ( a1 , 1 ) ;
329+ Vector128 < byte > a3 = a1 + a2 ;
330+ Vector128 < byte > a4 = Vector128Utilities . ShiftLeftBytesInVector ( a3 , 2 ) ;
331+ Vector128 < byte > a5 = a3 + a4 ;
332+ Vector128 < byte > a6 = Vector128Utilities . ShiftLeftBytesInVector ( a5 , 4 ) ;
333+ Vector128 < byte > a7 = a5 + a6 ;
333334
334335 ref byte outputRef = ref Unsafe . Add ( ref dstRef , i ) ;
335336 Unsafe . As < byte , Vector64 < byte > > ( ref outputRef ) = a7. GetLower ( ) ;
336- last = Sse2 . ShiftRightLogical ( a7 . AsInt64 ( ) , 56 ) . AsInt32 ( ) ;
337+ last = Vector128 . ShiftRightLogical ( a7 . AsInt64 ( ) , 56 ) . AsInt32 ( ) ;
337338 }
338339
339340 for ( ; i < ( uint ) width ; ++ i )
0 commit comments