88using System . Runtime . Intrinsics ;
99using System . Runtime . Intrinsics . Arm ;
1010using System . Runtime . Intrinsics . X86 ;
11+ using SixLabors . ImageSharp . Common . Helpers ;
1112using SixLabors . ImageSharp . PixelFormats ;
1213
1314namespace SixLabors . ImageSharp ;
@@ -95,15 +96,15 @@ public static void Shuffle4Reduce(
9596
9697 /// <summary>
9798 /// Shuffle 8-bit integers <paramref name="source"/>
98- /// using the control and store the results in <paramref name="dest "/>.
99+ /// using the control and store the results in <paramref name="destination "/>.
99100 /// </summary>
100101 /// <param name="source">The source span of bytes.</param>
101- /// <param name="dest ">The destination span of bytes.</param>
102+ /// <param name="destination ">The destination span of bytes.</param>
102103 /// <param name="control">The byte control.</param>
103104 [ MethodImpl ( InliningOptions . ShortMethod ) ]
104105 public static void Shuffle4Reduce (
105106 ref ReadOnlySpan < byte > source ,
106- ref Span < byte > dest ,
107+ ref Span < byte > destination ,
107108 byte control )
108109 {
109110 if ( Vector512 . IsHardwareAccelerated || Vector256 . IsHardwareAccelerated || Vector128 . IsHardwareAccelerated )
@@ -128,29 +129,29 @@ public static void Shuffle4Reduce(
128129 {
129130 Shuffle4 (
130131 source [ ..adjustedCount ] ,
131- dest [ ..adjustedCount ] ,
132+ destination [ ..adjustedCount ] ,
132133 control ) ;
133134
134135 source = source [ adjustedCount ..] ;
135- dest = dest [ adjustedCount ..] ;
136+ destination = destination [ adjustedCount ..] ;
136137 }
137138 }
138139 }
139140
140141 /// <summary>
141142 /// Shuffles 8-bit integer triplets within 128-bit lanes in <paramref name="source"/>
142- /// using the control and store the results in <paramref name="dest "/>.
143+ /// using the control and store the results in <paramref name="destination "/>.
143144 /// </summary>
144145 /// <param name="source">The source span of bytes.</param>
145- /// <param name="dest ">The destination span of bytes.</param>
146+ /// <param name="destination ">The destination span of bytes.</param>
146147 /// <param name="control">The byte control.</param>
147148 [ MethodImpl ( InliningOptions . ShortMethod ) ]
148149 public static void Shuffle3Reduce (
149150 ref ReadOnlySpan < byte > source ,
150- ref Span < byte > dest ,
151+ ref Span < byte > destination ,
151152 byte control )
152153 {
153- if ( Ssse3 . IsSupported )
154+ if ( Vector128 . IsHardwareAccelerated && Vector128Utilities . SupportsRightShift )
154155 {
155156 int remainder = source . Length % ( Vector128 < byte > . Count * 3 ) ;
156157
@@ -160,11 +161,11 @@ public static void Shuffle3Reduce(
160161 {
161162 Shuffle3 (
162163 source [ ..adjustedCount ] ,
163- dest [ ..adjustedCount ] ,
164+ destination [ ..adjustedCount ] ,
164165 control ) ;
165166
166167 source = source [ adjustedCount ..] ;
167- dest = dest [ adjustedCount ..] ;
168+ destination = destination [ adjustedCount ..] ;
168169 }
169170 }
170171 }
@@ -446,24 +447,21 @@ private static void Shuffle4(
446447 [ MethodImpl ( InliningOptions . ShortMethod ) ]
447448 private static void Shuffle3 (
448449 ReadOnlySpan < byte > source ,
449- Span < byte > dest ,
450+ Span < byte > destination ,
450451 byte control )
451452 {
452- if ( Ssse3 . IsSupported )
453+ if ( Vector128 . IsHardwareAccelerated && Vector128Utilities . SupportsRightShift )
453454 {
454- Vector128 < byte > vmask = ShuffleMaskPad4Nx16 ( ) ;
455- Vector128 < byte > vmasko = ShuffleMaskSlice4Nx16 ( ) ;
456- Vector128 < byte > vmaske = Ssse3 . AlignRight ( vmasko , vmasko , 12 ) ;
455+ Vector128 < byte > maskPad4Nx16 = ShuffleMaskPad4Nx16 ( ) ;
456+ Vector128 < byte > maskSlice4Nx16 = ShuffleMaskSlice4Nx16 ( ) ;
457+ Vector128 < byte > maskE = Vector128Utilities . AlignRight ( maskSlice4Nx16 , maskSlice4Nx16 , 12 ) ;
457458
458459 Span < byte > bytes = stackalloc byte [ Vector128 < byte > . Count ] ;
459460 Shuffle . MMShuffleSpan ( ref bytes , control ) ;
460- Vector128 < byte > vshuffle = Unsafe . As < byte , Vector128 < byte > > ( ref MemoryMarshal . GetReference ( bytes ) ) ;
461+ Vector128 < byte > mask = Unsafe . As < byte , Vector128 < byte > > ( ref MemoryMarshal . GetReference ( bytes ) ) ;
461462
462- ref Vector128 < byte > sourceBase =
463- ref Unsafe . As < byte , Vector128 < byte > > ( ref MemoryMarshal . GetReference ( source ) ) ;
464-
465- ref Vector128 < byte > destBase =
466- ref Unsafe . As < byte , Vector128 < byte > > ( ref MemoryMarshal . GetReference ( dest ) ) ;
463+ ref Vector128 < byte > sourceBase = ref Unsafe . As < byte , Vector128 < byte > > ( ref MemoryMarshal . GetReference ( source ) ) ;
464+ ref Vector128 < byte > destinationBase = ref Unsafe . As < byte , Vector128 < byte > > ( ref MemoryMarshal . GetReference ( destination ) ) ;
467465
468466 nuint n = source . Vector128Count < byte > ( ) ;
469467
@@ -472,44 +470,44 @@ private static void Shuffle3(
472470 ref Vector128 < byte > vs = ref Unsafe . Add ( ref sourceBase , i ) ;
473471
474472 Vector128 < byte > v0 = vs ;
475- Vector128 < byte > v1 = Unsafe . Add ( ref vs , 1 ) ;
476- Vector128 < byte > v2 = Unsafe . Add ( ref vs , 2 ) ;
477- Vector128 < byte > v3 = Sse2 . ShiftRightLogical128BitLane ( v2 , 4 ) ;
473+ Vector128 < byte > v1 = Unsafe . Add ( ref vs , ( nuint ) 1 ) ;
474+ Vector128 < byte > v2 = Unsafe . Add ( ref vs , ( nuint ) 2 ) ;
475+ Vector128 < byte > v3 = Vector128Utilities . ShiftRightBytesInVector ( v2 , 4 ) ;
478476
479- v2 = Ssse3 . AlignRight ( v2 , v1 , 8 ) ;
480- v1 = Ssse3 . AlignRight ( v1 , v0 , 12 ) ;
477+ v2 = Vector128Utilities . AlignRight ( v2 , v1 , 8 ) ;
478+ v1 = Vector128Utilities . AlignRight ( v1 , v0 , 12 ) ;
481479
482- v0 = Ssse3 . Shuffle ( Ssse3 . Shuffle ( v0 , vmask ) , vshuffle ) ;
483- v1 = Ssse3 . Shuffle ( Ssse3 . Shuffle ( v1 , vmask ) , vshuffle ) ;
484- v2 = Ssse3 . Shuffle ( Ssse3 . Shuffle ( v2 , vmask ) , vshuffle ) ;
485- v3 = Ssse3 . Shuffle ( Ssse3 . Shuffle ( v3 , vmask ) , vshuffle ) ;
480+ v0 = Vector128 . Shuffle ( Vector128 . Shuffle ( v0 , maskPad4Nx16 ) , mask ) ;
481+ v1 = Vector128 . Shuffle ( Vector128 . Shuffle ( v1 , maskPad4Nx16 ) , mask ) ;
482+ v2 = Vector128 . Shuffle ( Vector128 . Shuffle ( v2 , maskPad4Nx16 ) , mask ) ;
483+ v3 = Vector128 . Shuffle ( Vector128 . Shuffle ( v3 , maskPad4Nx16 ) , mask ) ;
486484
487- v0 = Ssse3 . Shuffle ( v0 , vmaske ) ;
488- v1 = Ssse3 . Shuffle ( v1 , vmasko ) ;
489- v2 = Ssse3 . Shuffle ( v2 , vmaske ) ;
490- v3 = Ssse3 . Shuffle ( v3 , vmasko ) ;
485+ v0 = Vector128 . Shuffle ( v0 , maskE ) ;
486+ v1 = Vector128 . Shuffle ( v1 , maskSlice4Nx16 ) ;
487+ v2 = Vector128 . Shuffle ( v2 , maskE ) ;
488+ v3 = Vector128 . Shuffle ( v3 , maskSlice4Nx16 ) ;
491489
492- v0 = Ssse3 . AlignRight ( v1 , v0 , 4 ) ;
493- v3 = Ssse3 . AlignRight ( v3 , v2 , 12 ) ;
490+ v0 = Vector128Utilities . AlignRight ( v1 , v0 , 4 ) ;
491+ v3 = Vector128Utilities . AlignRight ( v3 , v2 , 12 ) ;
494492
495- v1 = Sse2 . ShiftLeftLogical128BitLane ( v1 , 4 ) ;
496- v2 = Sse2 . ShiftRightLogical128BitLane ( v2 , 4 ) ;
493+ v1 = Vector128Utilities . ShiftLeftBytesInVector ( v1 , 4 ) ;
494+ v2 = Vector128Utilities . ShiftRightBytesInVector ( v2 , 4 ) ;
497495
498- v1 = Ssse3 . AlignRight ( v2 , v1 , 8 ) ;
496+ v1 = Vector128Utilities . AlignRight ( v2 , v1 , 8 ) ;
499497
500- ref Vector128 < byte > vd = ref Unsafe . Add ( ref destBase , i ) ;
498+ ref Vector128 < byte > vd = ref Unsafe . Add ( ref destinationBase , i ) ;
501499
502500 vd = v0 ;
503- Unsafe . Add ( ref vd , 1 ) = v1;
504- Unsafe . Add ( ref vd , 2 ) = v3;
501+ Unsafe . Add ( ref vd , ( nuint ) 1 ) = v1;
502+ Unsafe . Add ( ref vd , ( nuint ) 2 ) = v3;
505503 }
506504 }
507505 }
508506
509507 [ MethodImpl ( InliningOptions . ShortMethod ) ]
510508 private static void Pad3Shuffle4 (
511509 ReadOnlySpan < byte > source ,
512- Span < byte > dest ,
510+ Span < byte > destination ,
513511 byte control )
514512 {
515513 if ( Ssse3 . IsSupported )
@@ -525,7 +523,7 @@ private static void Pad3Shuffle4(
525523 ref Unsafe . As < byte , Vector128 < byte > > ( ref MemoryMarshal . GetReference ( source ) ) ;
526524
527525 ref Vector128 < byte > destBase =
528- ref Unsafe . As < byte , Vector128 < byte > > ( ref MemoryMarshal . GetReference ( dest ) ) ;
526+ ref Unsafe . As < byte , Vector128 < byte > > ( ref MemoryMarshal . GetReference ( destination ) ) ;
529527
530528 nuint n = source . Vector128Count < byte > ( ) ;
531529
0 commit comments