Skip to content

Commit 3a50436

Browse files
Add Avx rounding
1 parent 3f1b4fa commit 3a50436

2 files changed

Lines changed: 135 additions & 5 deletions

File tree

src/ImageSharp.Drawing/Shapes/Rasterization/ScanEdgeCollection.Build.cs

Lines changed: 44 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,9 @@
44
using System.Buffers;
55
using System.Diagnostics;
66
using System.Runtime.CompilerServices;
7+
using System.Runtime.InteropServices;
8+
using System.Runtime.Intrinsics;
9+
using System.Runtime.Intrinsics.X86;
710
using SixLabors.ImageSharp.Memory;
811

912
namespace SixLabors.ImageSharp.Drawing.Shapes.Rasterization;
@@ -82,12 +85,48 @@ internal static ScanEdgeCollection Create(TessellatedMultipolygon multipolygon,
8285

8386
static void RoundY(ReadOnlySpan<PointF> vertices, Span<float> destination, float subsamplingRatio)
8487
{
85-
for (int i = 0; i < vertices.Length; i++)
88+
int ri = 0;
89+
if (Avx.IsSupported)
8690
{
87-
// for future SIMD impl:
88-
// https://www.ocf.berkeley.edu/~horie/rounding.html
89-
// Avx.RoundToPositiveInfinity()
90-
destination[i] = MathF.Round(vertices[i].Y * subsamplingRatio, MidpointRounding.AwayFromZero) / subsamplingRatio;
91+
// If the length of the input buffer as a float array is a multiple of 16, we can use AVX instructions:
92+
int verticesLengthInFloats = vertices.Length * 2;
93+
int vector256FloatCount_x2 = Vector256<float>.Count * 2;
94+
int remainder = verticesLengthInFloats % vector256FloatCount_x2;
95+
int verticesLength = verticesLengthInFloats - remainder;
96+
97+
if (verticesLength > 0)
98+
{
99+
ri = vertices.Length - (remainder / 2);
100+
float maxIterations = verticesLength / (Vector256<float>.Count * 2);
101+
ref Vector256<float> sourceBase = ref Unsafe.As<PointF, Vector256<float>>(ref MemoryMarshal.GetReference(vertices));
102+
ref Vector256<float> destinationBase = ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(destination));
103+
104+
Vector256<float> ssRatio = Vector256.Create(subsamplingRatio);
105+
Vector256<float> inverseSsRatio = Vector256.Create(1F / subsamplingRatio);
106+
107+
// For every 1 vector we add to the destination we read 2 from the vertices.
108+
for (nint i = 0, j = 0; i < maxIterations; i++, j += 2)
109+
{
110+
// Load 8 PointF
111+
Vector256<float> points1 = Unsafe.Add(ref sourceBase, j);
112+
Vector256<float> points2 = Unsafe.Add(ref sourceBase, j + 1);
113+
114+
// Shuffle the points to group the Y properties
115+
Vector128<float> points1Y = Sse.Shuffle(points1.GetLower(), points1.GetUpper(), 0b11_01_11_01);
116+
Vector128<float> points2Y = Sse.Shuffle(points2.GetLower(), points2.GetUpper(), 0b11_01_11_01);
117+
Vector256<float> pointsY = Vector256.Create(points1Y, points2Y);
118+
119+
// Multiply by the subsampling ratio, round, then multiply by the inverted subsampling ratio and assign.
120+
// https://www.ocf.berkeley.edu/~horie/rounding.html
121+
Vector256<float> rounded = Avx.RoundToPositiveInfinity(Avx.Multiply(pointsY, ssRatio));
122+
Unsafe.Add(ref destinationBase, i) = Avx.Multiply(rounded, inverseSsRatio);
123+
}
124+
}
125+
}
126+
127+
for (; ri < vertices.Length; ri++)
128+
{
129+
destination[ri] = MathF.Round(vertices[ri].Y * subsamplingRatio, MidpointRounding.AwayFromZero) / subsamplingRatio;
91130
}
92131
}
93132

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
// Copyright (c) Six Labors.
2+
// Licensed under the Six Labors Split License.
3+
4+
using System.Runtime.CompilerServices;
5+
using System.Runtime.InteropServices;
6+
using System.Runtime.Intrinsics;
7+
using System.Runtime.Intrinsics.X86;
8+
using BenchmarkDotNet.Attributes;
9+
10+
namespace SixLabors.ImageSharp.Drawing.Benchmarks.Drawing;
11+
public class Rounding
12+
{
13+
private PointF[] vertices;
14+
private float[] destination;
15+
private float[] destinationAvx;
16+
17+
[GlobalSetup]
18+
public void Setup()
19+
{
20+
this.vertices = new PointF[1000];
21+
this.destination = new float[this.vertices.Length];
22+
this.destinationAvx = new float[this.vertices.Length];
23+
Random r = new(42);
24+
for (int i = 0; i < this.vertices.Length; i++)
25+
{
26+
this.vertices[i] = new PointF((float)r.NextDouble(), (float)r.NextDouble());
27+
}
28+
}
29+
30+
[Benchmark]
31+
public void RoundYAvx() => RoundYAvx(this.vertices, this.destinationAvx, 16);
32+
33+
[Benchmark(Baseline = true)]
34+
public void RoundY() => RoundY(this.vertices, this.destination, 16);
35+
36+
private static void RoundYAvx(ReadOnlySpan<PointF> vertices, Span<float> destination, float subsamplingRatio)
37+
{
38+
int ri = 0;
39+
if (Avx.IsSupported)
40+
{
41+
// If the length of the input buffer as a float array is a multiple of 16, we can use AVX instructions:
42+
int verticesLengthInFloats = vertices.Length * 2;
43+
int vector256FloatCount_x2 = Vector256<float>.Count * 2;
44+
int remainder = verticesLengthInFloats % vector256FloatCount_x2;
45+
int verticesLength = verticesLengthInFloats - remainder;
46+
47+
if (verticesLength > 0)
48+
{
49+
ri = vertices.Length - (remainder / 2);
50+
float maxIterations = verticesLength / (Vector256<float>.Count * 2);
51+
ref Vector256<float> sourceBase = ref Unsafe.As<PointF, Vector256<float>>(ref MemoryMarshal.GetReference(vertices));
52+
ref Vector256<float> destinationBase = ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(destination));
53+
54+
Vector256<float> ssRatio = Vector256.Create(subsamplingRatio);
55+
Vector256<float> inverseSsRatio = Vector256.Create(1F / subsamplingRatio);
56+
57+
// For every 1 vector we add to the destination we read 2 from the vertices.
58+
for (nint i = 0, j = 0; i < maxIterations; i++, j += 2)
59+
{
60+
// Load 8 PointF
61+
Vector256<float> points1 = Unsafe.Add(ref sourceBase, j);
62+
Vector256<float> points2 = Unsafe.Add(ref sourceBase, j + 1);
63+
64+
// Shuffle the points to group the Y properties
65+
Vector128<float> points1Y = Sse.Shuffle(points1.GetLower(), points1.GetUpper(), 0b11_01_11_01);
66+
Vector128<float> points2Y = Sse.Shuffle(points2.GetLower(), points2.GetUpper(), 0b11_01_11_01);
67+
Vector256<float> pointsY = Vector256.Create(points1Y, points2Y);
68+
69+
// Multiply by the subsampling ratio, round, then multiply by the inverted subsampling ratio and assign.
70+
// https://www.ocf.berkeley.edu/~horie/rounding.html
71+
Vector256<float> rounded = Avx.RoundToPositiveInfinity(Avx.Multiply(pointsY, ssRatio));
72+
Unsafe.Add(ref destinationBase, i) = Avx.Multiply(rounded, inverseSsRatio);
73+
}
74+
}
75+
}
76+
77+
for (; ri < vertices.Length; ri++)
78+
{
79+
destination[ri] = MathF.Round(vertices[ri].Y * subsamplingRatio, MidpointRounding.AwayFromZero) / subsamplingRatio;
80+
}
81+
}
82+
83+
private static void RoundY(ReadOnlySpan<PointF> vertices, Span<float> destination, float subsamplingRatio)
84+
{
85+
int ri = 0;
86+
for (; ri < vertices.Length; ri++)
87+
{
88+
destination[ri] = MathF.Round(vertices[ri].Y * subsamplingRatio, MidpointRounding.AwayFromZero) / subsamplingRatio;
89+
}
90+
}
91+
}

0 commit comments

Comments
 (0)