Skip to content

Commit 6b2632f

Browse files
Use inline array, fix GPU retry counter logic
1 parent f1a0cdb commit 6b2632f

File tree

4 files changed

+63
-79
lines changed

4 files changed

+63
-79
lines changed

src/ImageSharp.Drawing.WebGPU/WebGPUDrawingBackend.cs

Lines changed: 18 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -20,13 +20,20 @@ namespace SixLabors.ImageSharp.Drawing.Processing.Backends;
2020
/// </remarks>
2121
public sealed unsafe partial class WebGPUDrawingBackend : IDrawingBackend, IDisposable
2222
{
23-
// A single flush can rerun the staged path a small number of times while the scratch
24-
// buffers converge on the capacity reported by the scheduling stages.
25-
// The prepare shader cancels early when any single buffer overflows, so each
26-
// retry only discovers one new overflow. 8 attempts covers all 7 bump buffers
27-
// plus the final successful run. Only needed on the first flush; subsequent
28-
// flushes reuse the persisted GPU-reported sizes and need zero retries.
29-
private const int MaxDynamicGrowthAttempts = 8;
23+
// Number of independently sized scratch buffers tracked by WebGPUSceneBumpSizes.
24+
// A first-use flush can expose at most one newly visible allocator overflow per
25+
// failed pass, so the retry budget is expressed in terms of this count. The
26+
// tracked allocators are Lines, Binning, PathTiles, SegCounts, Segments,
27+
// BlendSpill, and Ptcl.
28+
private const int ScratchAllocatorCount = 7;
29+
30+
// A first flush can rerun the staged path while the GPU-reported scratch capacities
31+
// converge. Earlier scheduling overflows can prevent later stages from reporting
32+
// their own demand, so one failed pass can be needed per tracked allocator. A
33+
// Failed-only report can also require one conservative force-growth pass when no
34+
// individual counter exceeded its current capacity. Add one final pass for the
35+
// successful render after the last growth.
36+
private const int MaxDynamicGrowthAttempts = ScratchAllocatorCount + 2;
3037

3138
private readonly DefaultDrawingBackend fallbackBackend;
3239

@@ -155,12 +162,10 @@ public void FlushCompositions<TPixel>(
155162
WebGPUSceneResourceArena? resourceArena = Interlocked.Exchange(ref this.cachedResourceArena, null);
156163
try
157164
{
158-
// Retry loop: bump allocators start small (Vello defaults) and the GPU discovers
159-
// the actual sizes needed. Each overflow grows the failing buffers. The prepare
160-
// shader does not cancel on overflow so all stages report true demand per pass,
161-
// but data dependencies mean later stages report zero when earlier ones overflow.
162-
// Typically converges in 3-5 attempts on first use, then zero retries thereafter
163-
// because successful sizes are persisted in this.bumpSizes.
165+
// Retry loop: scratch allocators start small and the GPU reports actual demand.
166+
// Earlier scheduling overflows can hide later-stage demand, so a first-use flush
167+
// can require several passes before the capacities converge. Successful sizes are
168+
// persisted in this.bumpSizes, so later flushes usually run without retries.
164169
for (int attempt = 0; attempt < MaxDynamicGrowthAttempts; attempt++)
165170
{
166171
if (!WebGPUSceneDispatch.TryCreateStagedScene(configuration, target, compositionScene, currentBumpSizes, ref resourceArena, out bool exceedsBindingLimit, out WebGPUSceneDispatch.BindingLimitFailure bindingLimitFailure, out WebGPUStagedScene stagedScene, out string? error))

src/ImageSharp.Drawing.WebGPU/WebGPUSceneDispatch.cs

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -894,8 +894,10 @@ public static unsafe bool TryRenderStagedScene(
894894
return false;
895895
}
896896

897-
// Overflow: fine output is garbage but bump counters report true demand for all
898-
// buffers. Prepare shader never cancels, so one retry with doubled sizes suffices.
897+
// Overflow: the fine output is discarded, but the scheduling readback still reports
898+
// the scratch usage visible to this pass. Later-stage demand can stay hidden until
899+
// earlier overflows are resolved, so the backend retries with larger buffers until
900+
// the capacities converge or the bounded attempt budget is exhausted.
899901
if (RequiresScratchReallocation(in bumpAllocators, stagedScene.Config.BumpSizes))
900902
{
901903
requiresGrowth = true;

src/ImageSharp.Drawing/Processing/Backends/DefaultRasterizer.RasterizableGeometry.cs

Lines changed: 5 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -154,31 +154,14 @@ public ReadOnlySpan<int> GetCoversForRow(int localRowIndex)
154154
/// </summary>
155155
public void Dispose()
156156
{
157-
if (this.IsX16)
157+
if (this.linesX16 is not null)
158158
{
159-
for (int i = 0; i < this.linesX16!.Length; i++)
160-
{
161-
LineArrayX16Y16Block? block = this.linesX16[i];
162-
while (block is not null)
163-
{
164-
LineArrayX16Y16Block? next = block.Next;
165-
block.Dispose();
166-
block = next;
167-
}
168-
}
159+
Array.Clear(this.linesX16);
169160
}
170-
else
161+
162+
if (this.linesX32 is not null)
171163
{
172-
for (int i = 0; i < this.linesX32!.Length; i++)
173-
{
174-
LineArrayX32Y16Block? block = this.linesX32[i];
175-
while (block is not null)
176-
{
177-
LineArrayX32Y16Block? next = block.Next;
178-
block.Dispose();
179-
block = next;
180-
}
181-
}
164+
Array.Clear(this.linesX32);
182165
}
183166

184167
for (int i = 0; i < this.startCoverTable.Length; i++)

src/ImageSharp.Drawing/Processing/Backends/DefaultRasterizer.RetainedTypes.cs

Lines changed: 36 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
// Licensed under the Six Labors Split License.
33

44
using System.Runtime.CompilerServices;
5-
using System.Runtime.InteropServices;
65

76
namespace SixLabors.ImageSharp.Drawing.Processing.Backends;
87

@@ -256,25 +255,19 @@ public void AppendLine(int x0, int y0, int x1, int y1)
256255
/// <summary>
257256
/// Represents one retained 32-bit-X line block.
258257
/// </summary>
259-
internal sealed class LineArrayX32Y16Block : ILineBlock<LineArrayX32Y16Block>, IDisposable
258+
internal sealed class LineArrayX32Y16Block : ILineBlock<LineArrayX32Y16Block>
260259
{
261-
// These retained line blocks are one of the few places where direct native allocation
262-
// consistently outperforms the upstream allocator on larger retained-fill workloads.
263-
// Keep this isolated to the retained block storage rather than broadening native allocation usage.
264-
private readonly unsafe PackedLineX32Y16* lines;
260+
private const int BlockLineCount = 32;
261+
private PackedLineX32Y16Buffer lines;
265262

266263
/// <summary>
267264
/// Initializes a new instance of the <see cref="LineArrayX32Y16Block"/> class.
268265
/// </summary>
269266
/// <param name="next">The next block in the retained chain.</param>
270-
public unsafe LineArrayX32Y16Block(LineArrayX32Y16Block? next)
271-
{
272-
this.lines = (PackedLineX32Y16*)NativeMemory.Alloc((nuint)LineCount, (nuint)sizeof(PackedLineX32Y16));
273-
this.Next = next;
274-
}
267+
public LineArrayX32Y16Block(LineArrayX32Y16Block? next) => this.Next = next;
275268

276269
/// <inheritdoc />
277-
public static int LineCount => 32;
270+
public static int LineCount => BlockLineCount;
278271

279272
/// <inheritdoc />
280273
public LineArrayX32Y16Block? Next { get; }
@@ -287,7 +280,7 @@ public unsafe LineArrayX32Y16Block(LineArrayX32Y16Block? next)
287280
/// <param name="x0">The starting X coordinate in 24.8 fixed-point.</param>
288281
/// <param name="x1">The ending X coordinate in 24.8 fixed-point.</param>
289282
[MethodImpl(MethodImplOptions.AggressiveInlining)]
290-
public unsafe void Set(int index, int packedY0Y1, int x0, int x1)
283+
public void Set(int index, int packedY0Y1, int x0, int x1)
291284
{
292285
ref PackedLineX32Y16 line = ref this.lines[index];
293286
line.PackedY0Y1 = packedY0Y1;
@@ -297,12 +290,11 @@ public unsafe void Set(int index, int packedY0Y1, int x0, int x1)
297290

298291
/// <inheritdoc />
299292
[MethodImpl(MethodImplOptions.AggressiveInlining)]
300-
public unsafe void Rasterize(int count, ref Context context)
293+
public void Rasterize(int count, ref Context context)
301294
{
302-
ReadOnlySpan<PackedLineX32Y16> lines = new(this.lines, LineCount);
303295
for (int i = 0; i < count; i++)
304296
{
305-
PackedLineX32Y16 line = lines[i];
297+
PackedLineX32Y16 line = this.lines[i];
306298
context.RasterizeLineSegment(line.X0, UnpackLo(line.PackedY0Y1), line.X1, UnpackHi(line.PackedY0Y1));
307299
}
308300
}
@@ -324,11 +316,6 @@ public void Iterate(int firstBlockLineCount, ref Context context)
324316
}
325317
}
326318

327-
/// <summary>
328-
/// Releases the native block storage.
329-
/// </summary>
330-
public unsafe void Dispose() => NativeMemory.Free(this.lines);
331-
332319
/// <summary>
333320
/// Unpacks the low signed 16-bit value from a packed endpoint pair.
334321
/// </summary>
@@ -346,7 +333,7 @@ public void Iterate(int firstBlockLineCount, ref Context context)
346333
private static int UnpackHi(int packed) => packed >> 16;
347334

348335
/// <summary>
349-
/// Holds one retained 32-bit-X line record in native block storage.
336+
/// Holds one retained 32-bit-X line record in block-local storage.
350337
/// </summary>
351338
private struct PackedLineX32Y16
352339
{
@@ -365,6 +352,15 @@ private struct PackedLineX32Y16
365352
/// </summary>
366353
public int X1;
367354
}
355+
356+
/// <summary>
357+
/// Holds the fixed-capacity retained line payload inline with the block object.
358+
/// </summary>
359+
[InlineArray(BlockLineCount)]
360+
private struct PackedLineX32Y16Buffer
361+
{
362+
private PackedLineX32Y16 element0;
363+
}
368364
}
369365

370366
/// <summary>
@@ -432,24 +428,19 @@ public void AppendLine(int x0, int y0, int x1, int y1)
432428
/// <summary>
433429
/// Represents one retained 16-bit-X line block.
434430
/// </summary>
435-
internal sealed class LineArrayX16Y16Block : ILineBlock<LineArrayX16Y16Block>, IDisposable
431+
internal sealed class LineArrayX16Y16Block : ILineBlock<LineArrayX16Y16Block>
436432
{
437-
// Match the X32 block rationale above: this tiny retained block is hot enough that
438-
// direct native backing beats allocator-owned storage on larger retained-fill workloads.
439-
private readonly unsafe PackedLineX16Y16* lines;
433+
private const int BlockLineCount = 32;
434+
private PackedLineX16Y16Buffer lines;
440435

441436
/// <summary>
442437
/// Initializes a new instance of the <see cref="LineArrayX16Y16Block"/> class.
443438
/// </summary>
444439
/// <param name="next">The next block in the retained chain.</param>
445-
public unsafe LineArrayX16Y16Block(LineArrayX16Y16Block? next)
446-
{
447-
this.lines = (PackedLineX16Y16*)NativeMemory.Alloc((nuint)LineCount, (nuint)sizeof(PackedLineX16Y16));
448-
this.Next = next;
449-
}
440+
public LineArrayX16Y16Block(LineArrayX16Y16Block? next) => this.Next = next;
450441

451442
/// <inheritdoc />
452-
public static int LineCount => 32;
443+
public static int LineCount => BlockLineCount;
453444

454445
/// <inheritdoc />
455446
public LineArrayX16Y16Block? Next { get; }
@@ -461,7 +452,7 @@ public unsafe LineArrayX16Y16Block(LineArrayX16Y16Block? next)
461452
/// <param name="packedY0Y1">The packed 16-bit Y endpoints.</param>
462453
/// <param name="packedX0X1">The packed 16-bit X endpoints.</param>
463454
[MethodImpl(MethodImplOptions.AggressiveInlining)]
464-
public unsafe void Set(int index, int packedY0Y1, int packedX0X1)
455+
public void Set(int index, int packedY0Y1, int packedX0X1)
465456
{
466457
ref PackedLineX16Y16 line = ref this.lines[index];
467458
line.PackedY0Y1 = packedY0Y1;
@@ -470,12 +461,11 @@ public unsafe void Set(int index, int packedY0Y1, int packedX0X1)
470461

471462
/// <inheritdoc />
472463
[MethodImpl(MethodImplOptions.AggressiveInlining)]
473-
public unsafe void Rasterize(int count, ref Context context)
464+
public void Rasterize(int count, ref Context context)
474465
{
475-
ReadOnlySpan<PackedLineX16Y16> lines = new(this.lines, LineCount);
476466
for (int i = 0; i < count; i++)
477467
{
478-
PackedLineX16Y16 line = lines[i];
468+
PackedLineX16Y16 line = this.lines[i];
479469
context.RasterizeLineSegment(
480470
UnpackLo(line.PackedX0X1),
481471
UnpackLo(line.PackedY0Y1),
@@ -501,11 +491,6 @@ public void Iterate(int firstBlockLineCount, ref Context context)
501491
}
502492
}
503493

504-
/// <summary>
505-
/// Releases the native block storage.
506-
/// </summary>
507-
public unsafe void Dispose() => NativeMemory.Free(this.lines);
508-
509494
/// <summary>
510495
/// Unpacks the low signed 16-bit value from a packed endpoint pair.
511496
/// </summary>
@@ -523,7 +508,7 @@ public void Iterate(int firstBlockLineCount, ref Context context)
523508
private static int UnpackHi(int packed) => packed >> 16;
524509

525510
/// <summary>
526-
/// Holds one retained 16-bit-X line record in native block storage.
511+
/// Holds one retained 16-bit-X line record in block-local storage.
527512
/// </summary>
528513
private struct PackedLineX16Y16
529514
{
@@ -537,5 +522,14 @@ private struct PackedLineX16Y16
537522
/// </summary>
538523
public int PackedX0X1;
539524
}
525+
526+
/// <summary>
527+
/// Holds the fixed-capacity retained line payload inline with the block object.
528+
/// </summary>
529+
[InlineArray(BlockLineCount)]
530+
private struct PackedLineX16Y16Buffer
531+
{
532+
private PackedLineX16Y16 element0;
533+
}
540534
}
541535
}

0 commit comments

Comments
 (0)