|
| 1 | +// Copyright (c) Six Labors. |
| 2 | +// Licensed under the Six Labors Split License. |
| 3 | + |
| 4 | +using System.Runtime.CompilerServices; |
| 5 | +using System.Runtime.InteropServices; |
| 6 | +using System.Runtime.Intrinsics; |
| 7 | +using System.Runtime.Intrinsics.X86; |
| 8 | +using BenchmarkDotNet.Attributes; |
| 9 | + |
| 10 | +namespace SixLabors.ImageSharp.Drawing.Benchmarks.Drawing; |
| 11 | +public class Rounding |
| 12 | +{ |
| 13 | + private PointF[] vertices; |
| 14 | + private float[] destination; |
| 15 | + private float[] destinationAvx; |
| 16 | + |
| 17 | + [GlobalSetup] |
| 18 | + public void Setup() |
| 19 | + { |
| 20 | + this.vertices = new PointF[1000]; |
| 21 | + this.destination = new float[this.vertices.Length]; |
| 22 | + this.destinationAvx = new float[this.vertices.Length]; |
| 23 | + Random r = new(42); |
| 24 | + for (int i = 0; i < this.vertices.Length; i++) |
| 25 | + { |
| 26 | + this.vertices[i] = new PointF((float)r.NextDouble(), (float)r.NextDouble()); |
| 27 | + } |
| 28 | + } |
| 29 | + |
| 30 | + [Benchmark] |
| 31 | + public void RoundYAvx() => RoundYAvx(this.vertices, this.destinationAvx, 16); |
| 32 | + |
| 33 | + [Benchmark(Baseline = true)] |
| 34 | + public void RoundY() => RoundY(this.vertices, this.destination, 16); |
| 35 | + |
| 36 | + private static void RoundYAvx(ReadOnlySpan<PointF> vertices, Span<float> destination, float subsamplingRatio) |
| 37 | + { |
| 38 | + int ri = 0; |
| 39 | + if (Avx.IsSupported) |
| 40 | + { |
| 41 | + // If the length of the input buffer as a float array is a multiple of 16, we can use AVX instructions: |
| 42 | + int verticesLengthInFloats = vertices.Length * 2; |
| 43 | + int vector256FloatCount_x2 = Vector256<float>.Count * 2; |
| 44 | + int remainder = verticesLengthInFloats % vector256FloatCount_x2; |
| 45 | + int verticesLength = verticesLengthInFloats - remainder; |
| 46 | + |
| 47 | + if (verticesLength > 0) |
| 48 | + { |
| 49 | + ri = vertices.Length - (remainder / 2); |
| 50 | + float maxIterations = verticesLength / (Vector256<float>.Count * 2); |
| 51 | + ref Vector256<float> sourceBase = ref Unsafe.As<PointF, Vector256<float>>(ref MemoryMarshal.GetReference(vertices)); |
| 52 | + ref Vector256<float> destinationBase = ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(destination)); |
| 53 | + |
| 54 | + Vector256<float> ssRatio = Vector256.Create(subsamplingRatio); |
| 55 | + Vector256<float> inverseSsRatio = Vector256.Create(1F / subsamplingRatio); |
| 56 | + |
| 57 | + // For every 1 vector we add to the destination we read 2 from the vertices. |
| 58 | + for (nint i = 0, j = 0; i < maxIterations; i++, j += 2) |
| 59 | + { |
| 60 | + // Load 8 PointF |
| 61 | + Vector256<float> points1 = Unsafe.Add(ref sourceBase, j); |
| 62 | + Vector256<float> points2 = Unsafe.Add(ref sourceBase, j + 1); |
| 63 | + |
| 64 | + // Shuffle the points to group the Y properties |
| 65 | + Vector128<float> points1Y = Sse.Shuffle(points1.GetLower(), points1.GetUpper(), 0b11_01_11_01); |
| 66 | + Vector128<float> points2Y = Sse.Shuffle(points2.GetLower(), points2.GetUpper(), 0b11_01_11_01); |
| 67 | + Vector256<float> pointsY = Vector256.Create(points1Y, points2Y); |
| 68 | + |
| 69 | + // Multiply by the subsampling ratio, round, then multiply by the inverted subsampling ratio and assign. |
| 70 | + // https://www.ocf.berkeley.edu/~horie/rounding.html |
| 71 | + Vector256<float> rounded = Avx.RoundToPositiveInfinity(Avx.Multiply(pointsY, ssRatio)); |
| 72 | + Unsafe.Add(ref destinationBase, i) = Avx.Multiply(rounded, inverseSsRatio); |
| 73 | + } |
| 74 | + } |
| 75 | + } |
| 76 | + |
| 77 | + for (; ri < vertices.Length; ri++) |
| 78 | + { |
| 79 | + destination[ri] = MathF.Round(vertices[ri].Y * subsamplingRatio, MidpointRounding.AwayFromZero) / subsamplingRatio; |
| 80 | + } |
| 81 | + } |
| 82 | + |
| 83 | + private static void RoundY(ReadOnlySpan<PointF> vertices, Span<float> destination, float subsamplingRatio) |
| 84 | + { |
| 85 | + int ri = 0; |
| 86 | + for (; ri < vertices.Length; ri++) |
| 87 | + { |
| 88 | + destination[ri] = MathF.Round(vertices[ri].Y * subsamplingRatio, MidpointRounding.AwayFromZero) / subsamplingRatio; |
| 89 | + } |
| 90 | + } |
| 91 | +} |
0 commit comments