11// Copyright (c) Six Labors.
22// Licensed under the Six Labors Split License.
33
4- using System . Buffers . Binary ;
5- using System . Numerics ;
6- using System . Runtime . CompilerServices ;
7- using System . Runtime . InteropServices ;
8- using static SixLabors . ImageSharp . SimdUtils ;
9-
104// The JIT can detect and optimize rotation idioms ROTL (Rotate Left)
115// and ROTR (Rotate Right) emitting efficient CPU instructions:
126// https://github.com/dotnet/coreclr/pull/1830
@@ -20,7 +14,7 @@ internal interface IComponentShuffle
2014{
2115 /// <summary>
2216 /// Shuffles then slices 8-bit integers in <paramref name="source"/>
23- /// using the control and store the results in <paramref name="destination"/>.
17+ /// using a byte control and store the results in <paramref name="destination"/>.
2418 /// If successful, this method will reduce the length of <paramref name="source"/> length
2519 /// by the shuffle amount.
2620 /// </summary>
@@ -40,168 +34,3 @@ internal interface IComponentShuffle
4034 /// </remarks>
4135 void Shuffle ( ReadOnlySpan < byte > source , Span < byte > destination ) ;
4236}
43-
44- /// <inheritdoc/>
45- internal interface IShuffle4 : IComponentShuffle
46- {
47- }
48-
49- internal readonly struct DefaultShuffle4 ( byte control ) : IShuffle4
50- {
51- public byte Control { get ; } = control ;
52-
53- [ MethodImpl ( InliningOptions . ShortMethod ) ]
54- public void ShuffleReduce ( ref ReadOnlySpan < byte > source , ref Span < byte > destination )
55- => HwIntrinsics . Shuffle4Reduce ( ref source , ref destination , this . Control ) ;
56-
57- [ MethodImpl ( InliningOptions . ShortMethod ) ]
58- public void Shuffle ( ReadOnlySpan < byte > source , Span < byte > destination )
59- {
60- ref byte sBase = ref MemoryMarshal . GetReference ( source ) ;
61- ref byte dBase = ref MemoryMarshal . GetReference ( destination ) ;
62-
63- SimdUtils . Shuffle . InverseMMShuffle ( this . Control , out uint p3 , out uint p2 , out uint p1 , out uint p0 ) ;
64-
65- for ( nuint i = 0 ; i < ( uint ) source . Length ; i += 4 )
66- {
67- Unsafe . Add ( ref dBase , i + 0 ) = Unsafe. Add ( ref sBase , p0 + i ) ;
68- Unsafe . Add ( ref dBase , i + 1 ) = Unsafe. Add ( ref sBase , p1 + i ) ;
69- Unsafe . Add ( ref dBase , i + 2 ) = Unsafe. Add ( ref sBase , p2 + i ) ;
70- Unsafe . Add ( ref dBase , i + 3 ) = Unsafe. Add ( ref sBase , p3 + i ) ;
71- }
72- }
73- }
74-
75- internal readonly struct WXYZShuffle4 : IShuffle4
76- {
77- [ MethodImpl ( InliningOptions . ShortMethod ) ]
78- public void ShuffleReduce ( ref ReadOnlySpan < byte > source , ref Span < byte > destination )
79- => HwIntrinsics . Shuffle4Reduce ( ref source , ref destination , SimdUtils . Shuffle . MMShuffle2103 ) ;
80-
81- [ MethodImpl ( InliningOptions . ShortMethod ) ]
82- public void Shuffle ( ReadOnlySpan < byte > source , Span < byte > destination )
83- {
84- ref uint sBase = ref Unsafe . As < byte , uint > ( ref MemoryMarshal . GetReference ( source ) ) ;
85- ref uint dBase = ref Unsafe . As < byte , uint > ( ref MemoryMarshal . GetReference ( destination ) ) ;
86- uint n = ( uint ) source . Length / 4 ;
87-
88- for ( nuint i = 0 ; i < n ; i ++ )
89- {
90- uint packed = Unsafe . Add ( ref sBase , i ) ;
91-
92- // packed = [W Z Y X]
93- // ROTL(8, packed) = [Z Y X W]
94- Unsafe . Add ( ref dBase , i ) = ( packed << 8 ) | ( packed >> 24 ) ;
95- }
96- }
97- }
98-
99- internal readonly struct WZYXShuffle4 : IShuffle4
100- {
101- [ MethodImpl ( InliningOptions . ShortMethod ) ]
102- public void ShuffleReduce ( ref ReadOnlySpan < byte > source , ref Span < byte > destination )
103- => HwIntrinsics . Shuffle4Reduce ( ref source , ref destination , SimdUtils . Shuffle . MMShuffle0123 ) ;
104-
105- [ MethodImpl ( InliningOptions . ShortMethod ) ]
106- public void Shuffle ( ReadOnlySpan < byte > source , Span < byte > destination )
107- {
108- ref uint sBase = ref Unsafe . As < byte , uint > ( ref MemoryMarshal . GetReference ( source ) ) ;
109- ref uint dBase = ref Unsafe . As < byte , uint > ( ref MemoryMarshal . GetReference ( destination ) ) ;
110- uint n = ( uint ) source . Length / 4 ;
111-
112- for ( nuint i = 0 ; i < n ; i ++ )
113- {
114- uint packed = Unsafe . Add ( ref sBase , i ) ;
115-
116- // packed = [W Z Y X]
117- // REVERSE(packedArgb) = [X Y Z W]
118- Unsafe . Add ( ref dBase , i ) = BinaryPrimitives. ReverseEndianness ( packed ) ;
119- }
120- }
121- }
122-
123- internal readonly struct YZWXShuffle4 : IShuffle4
124- {
125- [ MethodImpl ( InliningOptions . ShortMethod ) ]
126- public void ShuffleReduce ( ref ReadOnlySpan < byte > source , ref Span < byte > destination )
127- => HwIntrinsics . Shuffle4Reduce ( ref source , ref destination , SimdUtils . Shuffle . MMShuffle0321 ) ;
128-
129- [ MethodImpl ( InliningOptions . ShortMethod ) ]
130- public void Shuffle ( ReadOnlySpan < byte > source , Span < byte > destination )
131- {
132- ref uint sBase = ref Unsafe . As < byte , uint > ( ref MemoryMarshal . GetReference ( source ) ) ;
133- ref uint dBase = ref Unsafe . As < byte , uint > ( ref MemoryMarshal . GetReference ( destination ) ) ;
134- uint n = ( uint ) source . Length / 4 ;
135-
136- for ( nuint i = 0 ; i < n ; i ++ )
137- {
138- uint packed = Unsafe . Add ( ref sBase , i ) ;
139-
140- // packed = [W Z Y X]
141- // ROTR(8, packedArgb) = [Y Z W X]
142- Unsafe . Add ( ref dBase , i ) = BitOperations. RotateRight ( packed , 8 ) ;
143- }
144- }
145- }
146-
147- internal readonly struct ZYXWShuffle4 : IShuffle4
148- {
149- [ MethodImpl ( InliningOptions . ShortMethod ) ]
150- public void ShuffleReduce ( ref ReadOnlySpan < byte > source , ref Span < byte > destination )
151- => HwIntrinsics . Shuffle4Reduce ( ref source , ref destination , SimdUtils . Shuffle . MMShuffle3012 ) ;
152-
153- [ MethodImpl ( InliningOptions . ShortMethod ) ]
154- public void Shuffle ( ReadOnlySpan < byte > source , Span < byte > destination )
155- {
156- ref uint sBase = ref Unsafe . As < byte , uint > ( ref MemoryMarshal . GetReference ( source ) ) ;
157- ref uint dBase = ref Unsafe . As < byte , uint > ( ref MemoryMarshal . GetReference ( destination ) ) ;
158- uint n = ( uint ) source . Length / 4 ;
159-
160- for ( nuint i = 0 ; i < n ; i ++ )
161- {
162- uint packed = Unsafe . Add ( ref sBase , i ) ;
163-
164- // packed = [W Z Y X]
165- // tmp1 = [W 0 Y 0]
166- // tmp2 = [0 Z 0 X]
167- // tmp3=ROTL(16, tmp2) = [0 X 0 Z]
168- // tmp1 + tmp3 = [W X Y Z]
169- uint tmp1 = packed & 0xFF00FF00 ;
170- uint tmp2 = packed & 0x00FF00FF ;
171- uint tmp3 = BitOperations . RotateLeft ( tmp2 , 16 ) ;
172-
173- Unsafe . Add ( ref dBase , i ) = tmp1 + tmp3 ;
174- }
175- }
176- }
177-
178- internal readonly struct XWZYShuffle4 : IShuffle4
179- {
180- [ MethodImpl ( InliningOptions . ShortMethod ) ]
181- public void ShuffleReduce ( ref ReadOnlySpan < byte > source , ref Span < byte > destination )
182- => HwIntrinsics . Shuffle4Reduce ( ref source , ref destination , SimdUtils . Shuffle . MMShuffle1230 ) ;
183-
184- [ MethodImpl ( InliningOptions . ShortMethod ) ]
185- public void Shuffle ( ReadOnlySpan < byte > source , Span < byte > destination )
186- {
187- ref uint sBase = ref Unsafe . As < byte , uint > ( ref MemoryMarshal . GetReference ( source ) ) ;
188- ref uint dBase = ref Unsafe . As < byte , uint > ( ref MemoryMarshal . GetReference ( destination ) ) ;
189- uint n = ( uint ) source . Length / 4 ;
190-
191- for ( nuint i = 0 ; i < n ; i ++ )
192- {
193- uint packed = Unsafe . Add ( ref sBase , i ) ;
194-
195- // packed = [W Z Y X]
196- // tmp1 = [0 Z 0 X]
197- // tmp2 = [W 0 Y 0]
198- // tmp3=ROTL(16, tmp2) = [Y 0 W 0]
199- // tmp1 + tmp3 = [Y Z W X]
200- uint tmp1 = packed & 0x00FF00FF ;
201- uint tmp2 = packed & 0xFF00FF00 ;
202- uint tmp3 = BitOperations . RotateLeft ( tmp2 , 16 ) ;
203-
204- Unsafe . Add ( ref dBase , i ) = tmp1 + tmp3 ;
205- }
206- }
207- }
0 commit comments