@@ -35,6 +35,10 @@ public sealed unsafe partial class WebGPUDrawingBackend : IDrawingBackend, IDisp
3535 // The staged pipeline keeps the most recently successful scratch capacities so later flushes
3636 // can start closer to the scene sizes the current device has already proven it needs.
3737 private WebGPUSceneBumpSizes bumpSizes = WebGPUSceneBumpSizes . Initial ( ) ;
38+
39+ // Cached arenas for cross-flush buffer reuse. Rented via Interlocked.Exchange at flush
40+ // start and returned at flush end so parallel flushes on different threads don't contend.
41+ private WebGPUSceneSchedulingArena ? cachedSchedulingArena ;
3842 private bool isDisposed ;
3943
4044 private static readonly Dictionary < Type , CompositePixelRegistration > CompositePixelHandlers = CreateCompositePixelHandlers ( ) ;
@@ -250,9 +254,18 @@ public void FlushCompositions<TPixel>(
250254 this . TestingLastFlushUsedChunking = false ;
251255 this . TestingLastChunkingBindingFailure = WebGPUSceneDispatch . BindingLimitBuffer . None ;
252256 WebGPUSceneBumpSizes currentBumpSizes = this . bumpSizes ;
253- WebGPUSceneSchedulingArena schedulingArena = default ;
257+
258+ // Rent the cached scheduling arena. Null on first flush or if another thread has it.
259+ // Returned in the finally block for the next flush to reuse.
260+ WebGPUSceneSchedulingArena ? schedulingArena = Interlocked . Exchange ( ref this . cachedSchedulingArena , null ) ;
254261 try
255262 {
263+ // Retry loop: bump allocators start small (Vello defaults) and the GPU discovers
264+ // the actual sizes needed. Each overflow grows the failing buffers. The prepare
265+ // shader does not cancel on overflow so all stages report true demand per pass,
266+ // but data dependencies mean later stages report zero when earlier ones overflow.
267+ // Typically converges in 3-5 attempts on first use, then zero retries thereafter
268+ // because successful sizes are persisted in this.bumpSizes.
256269 for ( int attempt = 0 ; attempt < MaxDynamicGrowthAttempts ; attempt ++ )
257270 {
258271 if ( ! WebGPUSceneDispatch . TryCreateStagedScene ( configuration , target , compositionScene , currentBumpSizes , out bool exceedsBindingLimit , out WebGPUSceneDispatch . BindingLimitFailure bindingLimitFailure , out WebGPUStagedScene stagedScene , out string ? error ) )
@@ -277,13 +290,15 @@ public void FlushCompositions<TPixel>(
277290
278291 if ( WebGPUSceneDispatch . TryRenderStagedScene ( ref stagedScene , ref schedulingArena , out bool requiresGrowth , out WebGPUSceneBumpSizes grownBumpSizes , out error ) )
279292 {
293+ // Persist GPU-reported actual usage for next flush.
280294 this . bumpSizes = grownBumpSizes ;
281295 return ;
282296 }
283297
284298 this . TestingLastFlushUsedGPU = false ;
285299 if ( requiresGrowth )
286300 {
301+ // Bump overflow — retry with GPU-reported sizes.
287302 currentBumpSizes = grownBumpSizes ;
288303 continue ;
289304 }
@@ -304,7 +319,10 @@ public void FlushCompositions<TPixel>(
304319 }
305320 finally
306321 {
307- WebGPUSceneDispatch . DisposeSchedulingArena ( ref schedulingArena ) ;
322+ // Return the arena for the next flush. If another thread already returned one,
323+ // dispose the displaced arena (at most one survives in the cache).
324+ WebGPUSceneSchedulingArena ? prev = Interlocked . Exchange ( ref this . cachedSchedulingArena , schedulingArena ) ;
325+ WebGPUSceneDispatch . DisposeSchedulingArena ( prev ) ;
308326 }
309327 }
310328
@@ -563,6 +581,7 @@ public void Dispose()
563581
564582 this . TestingLastFlushUsedGPU = false ;
565583 this . TestingLastGPUInitializationFailure = null ;
584+ WebGPUSceneDispatch . DisposeSchedulingArena ( this . cachedSchedulingArena ) ;
566585 this . isDisposed = true ;
567586 }
568587
0 commit comments