|
4 | 4 | using System.Runtime.CompilerServices; |
5 | 5 | using System.Runtime.InteropServices; |
6 | 6 | using System.Runtime.Intrinsics; |
| 7 | +using System.Runtime.Intrinsics.Arm; |
7 | 8 | using System.Runtime.Intrinsics.X86; |
8 | 9 | using SixLabors.ImageSharp.PixelFormats; |
9 | 10 |
|
@@ -221,7 +222,7 @@ private static void Shuffle4( |
221 | 222 | ref Vector256<float> destBase = |
222 | 223 | ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(dest)); |
223 | 224 |
|
224 | | - nint n = (nint)(uint)(dest.Length / Vector256<float>.Count); |
| 225 | + nint n = (nint)dest.Vector256Count<float>(); |
225 | 226 | nint m = Numerics.Modulo4(n); |
226 | 227 | nint u = n - m; |
227 | 228 |
|
@@ -391,7 +392,7 @@ private static void Shuffle3( |
391 | 392 | ref Vector128<byte> destBase = |
392 | 393 | ref Unsafe.As<byte, Vector128<byte>>(ref MemoryMarshal.GetReference(dest)); |
393 | 394 |
|
394 | | - nuint n = (uint)source.Length / (uint)Vector128<byte>.Count; |
| 395 | + nuint n = source.Vector128Count<byte>(); |
395 | 396 |
|
396 | 397 | for (nuint i = 0; i < n; i += 3) |
397 | 398 | { |
@@ -454,7 +455,7 @@ private static void Pad3Shuffle4( |
454 | 455 | ref Vector128<byte> destBase = |
455 | 456 | ref Unsafe.As<byte, Vector128<byte>>(ref MemoryMarshal.GetReference(dest)); |
456 | 457 |
|
457 | | - nuint n = (uint)source.Length / (uint)Vector128<byte>.Count; |
| 458 | + nuint n = source.Vector128Count<byte>(); |
458 | 459 |
|
459 | 460 | for (nuint i = 0, j = 0; i < n; i += 3, j += 4) |
460 | 461 | { |
@@ -498,7 +499,7 @@ private static void Shuffle4Slice3( |
498 | 499 | ref Vector128<byte> destBase = |
499 | 500 | ref Unsafe.As<byte, Vector128<byte>>(ref MemoryMarshal.GetReference(dest)); |
500 | 501 |
|
501 | | - nuint n = (uint)source.Length / (uint)Vector128<byte>.Count; |
| 502 | + nuint n = source.Vector128Count<byte>(); |
502 | 503 |
|
503 | 504 | for (nuint i = 0, j = 0; i < n; i += 4, j += 3) |
504 | 505 | { |
@@ -554,6 +555,34 @@ public static Vector256<float> MultiplyAdd( |
554 | 555 | return Avx.Add(Avx.Multiply(vm0, vm1), va); |
555 | 556 | } |
556 | 557 |
|
| 558 | + /// <summary> |
| 559 | + /// Performs a multiplication and an addition of the <see cref="Vector128{Single}"/>. |
| 560 | + /// TODO: Fix. The arguments are in a different order to the FMA intrinsic. |
| 561 | + /// </summary> |
| 562 | + /// <remarks>ret = (vm0 * vm1) + va</remarks> |
| 563 | + /// <param name="va">The vector to add to the intermediate result.</param> |
| 564 | + /// <param name="vm0">The first vector to multiply.</param> |
| 565 | + /// <param name="vm1">The second vector to multiply.</param> |
| 566 | + /// <returns>The <see cref="Vector256{T}"/>.</returns> |
| 567 | + [MethodImpl(InliningOptions.AlwaysInline)] |
| 568 | + public static Vector128<float> MultiplyAdd( |
| 569 | + Vector128<float> va, |
| 570 | + Vector128<float> vm0, |
| 571 | + Vector128<float> vm1) |
| 572 | + { |
| 573 | + if (Fma.IsSupported) |
| 574 | + { |
| 575 | + return Fma.MultiplyAdd(vm1, vm0, va); |
| 576 | + } |
| 577 | + |
| 578 | + if (AdvSimd.IsSupported) |
| 579 | + { |
| 580 | + return AdvSimd.Add(AdvSimd.Multiply(vm0, vm1), va); |
| 581 | + } |
| 582 | + |
| 583 | + return Sse.Add(Sse.Multiply(vm0, vm1), va); |
| 584 | + } |
| 585 | + |
557 | 586 | /// <summary> |
558 | 587 | /// Performs a multiplication and a subtraction of the <see cref="Vector256{Single}"/>. |
559 | 588 | /// TODO: Fix. The arguments are in a different order to the FMA intrinsic. |
@@ -650,7 +679,7 @@ internal static unsafe void ByteToNormalizedFloat( |
650 | 679 | { |
651 | 680 | VerifySpanInput(source, dest, Vector256<byte>.Count); |
652 | 681 |
|
653 | | - nuint n = (uint)dest.Length / (uint)Vector256<byte>.Count; |
| 682 | + nuint n = dest.Vector256Count<byte>(); |
654 | 683 |
|
655 | 684 | ref Vector256<float> destBase = |
656 | 685 | ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(dest)); |
@@ -683,7 +712,7 @@ internal static unsafe void ByteToNormalizedFloat( |
683 | 712 | // Sse |
684 | 713 | VerifySpanInput(source, dest, Vector128<byte>.Count); |
685 | 714 |
|
686 | | - nuint n = (uint)dest.Length / (uint)Vector128<byte>.Count; |
| 715 | + nuint n = dest.Vector128Count<byte>(); |
687 | 716 |
|
688 | 717 | ref Vector128<float> destBase = |
689 | 718 | ref Unsafe.As<float, Vector128<float>>(ref MemoryMarshal.GetReference(dest)); |
@@ -782,7 +811,7 @@ internal static void NormalizedFloatToByteSaturate( |
782 | 811 | { |
783 | 812 | VerifySpanInput(source, dest, Vector256<byte>.Count); |
784 | 813 |
|
785 | | - nuint n = (uint)dest.Length / (uint)Vector256<byte>.Count; |
| 814 | + nuint n = dest.Vector256Count<byte>(); |
786 | 815 |
|
787 | 816 | ref Vector256<float> sourceBase = |
788 | 817 | ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(source)); |
@@ -821,7 +850,7 @@ internal static void NormalizedFloatToByteSaturate( |
821 | 850 | // Sse |
822 | 851 | VerifySpanInput(source, dest, Vector128<byte>.Count); |
823 | 852 |
|
824 | | - nuint n = (uint)dest.Length / (uint)Vector128<byte>.Count; |
| 853 | + nuint n = dest.Vector128Count<byte>(); |
825 | 854 |
|
826 | 855 | ref Vector128<float> sourceBase = |
827 | 856 | ref Unsafe.As<float, Vector128<float>>(ref MemoryMarshal.GetReference(source)); |
@@ -864,7 +893,7 @@ internal static void PackFromRgbPlanesAvx2Reduce( |
864 | 893 | ref Vector256<byte> bBase = ref Unsafe.As<byte, Vector256<byte>>(ref MemoryMarshal.GetReference(blueChannel)); |
865 | 894 | ref byte dBase = ref Unsafe.As<Rgb24, byte>(ref MemoryMarshal.GetReference(destination)); |
866 | 895 |
|
867 | | - nuint count = (uint)redChannel.Length / (uint)Vector256<byte>.Count; |
| 896 | + nuint count = redChannel.Vector256Count<byte>(); |
868 | 897 |
|
869 | 898 | ref byte control1Bytes = ref MemoryMarshal.GetReference(PermuteMaskEvenOdd8x32); |
870 | 899 | Vector256<uint> control1 = Unsafe.As<byte, Vector256<uint>>(ref control1Bytes); |
@@ -936,7 +965,7 @@ internal static void PackFromRgbPlanesAvx2Reduce( |
936 | 965 | ref Vector256<byte> bBase = ref Unsafe.As<byte, Vector256<byte>>(ref MemoryMarshal.GetReference(blueChannel)); |
937 | 966 | ref Vector256<byte> dBase = ref Unsafe.As<Rgba32, Vector256<byte>>(ref MemoryMarshal.GetReference(destination)); |
938 | 967 |
|
939 | | - nuint count = (uint)redChannel.Length / (uint)Vector256<byte>.Count; |
| 968 | + nuint count = redChannel.Vector256Count<byte>(); |
940 | 969 | ref byte control1Bytes = ref MemoryMarshal.GetReference(PermuteMaskEvenOdd8x32); |
941 | 970 | Vector256<uint> control1 = Unsafe.As<byte, Vector256<uint>>(ref control1Bytes); |
942 | 971 | var a = Vector256.Create((byte)255); |
|
0 commit comments