Skip to content

Commit 49ebbe6

Browse files
Add start_cover fast path; skip outside edges
1 parent ccf8a4a commit 49ebbe6

1 file changed

Lines changed: 58 additions & 1 deletion

File tree

src/ImageSharp.Drawing.WebGPU/Shaders/CompositeComputeShader.cs

Lines changed: 58 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -709,6 +709,51 @@ fn clip_vertical(ex0: i32, ey0: i32, ex1: i32, ey1: i32, min_y: i32, max_y: i32)
709709
return ClippedEdge(rx0, ry0, rx1, ry1, 1);
710710
}
711711
712+
fn accumulate_start_cover(ey0: i32, ey1: i32, clip_top: i32, clip_bottom: i32, tile_top_fixed: i32) {
713+
// Fast path for edges entirely left of the tile.
714+
// Only start_cover is affected (no area). The total cover delta per row
715+
// is the signed height of the edge within that row, which telescopes
716+
// across columns. This avoids the full column-walking overhead.
717+
var cy0 = clamp(ey0, clip_top, clip_bottom);
718+
var cy1 = clamp(ey1, clip_top, clip_bottom);
719+
if cy0 == cy1 { return; }
720+
721+
let ly0 = cy0 - tile_top_fixed;
722+
let ly1 = cy1 - tile_top_fixed;
723+
724+
if ly0 < ly1 {
725+
// Downward.
726+
let row0 = ly0 >> FIXED_SHIFT;
727+
let row1 = (ly1 - 1) >> FIXED_SHIFT;
728+
let fy0 = ly0 - (row0 << FIXED_SHIFT);
729+
let fy1 = ly1 - (row1 << FIXED_SHIFT);
730+
if row0 == row1 {
731+
atomicAdd(&tile_start_cover[row0], fy0 - fy1);
732+
return;
733+
}
734+
atomicAdd(&tile_start_cover[row0], fy0 - FIXED_ONE);
735+
for (var r = row0 + 1; r < row1; r++) {
736+
atomicAdd(&tile_start_cover[r], -FIXED_ONE);
737+
}
738+
atomicAdd(&tile_start_cover[row1], -fy1);
739+
} else {
740+
// Upward.
741+
let row0 = (ly0 - 1) >> FIXED_SHIFT;
742+
let row1 = ly1 >> FIXED_SHIFT;
743+
let fy0 = ly0 - (row0 << FIXED_SHIFT);
744+
let fy1 = ly1 - (row1 << FIXED_SHIFT);
745+
if row0 == row1 {
746+
atomicAdd(&tile_start_cover[row0], fy0 - fy1);
747+
return;
748+
}
749+
atomicAdd(&tile_start_cover[row0], fy0);
750+
for (var r = row0 - 1; r > row1; r--) {
751+
atomicAdd(&tile_start_cover[r], FIXED_ONE);
752+
}
753+
atomicAdd(&tile_start_cover[row1], FIXED_ONE - fy1);
754+
}
755+
}
756+
712757
fn rasterize_edge(edge: Edge, band_top: i32, band_left_fixed: i32, clip_top_fixed: i32, clip_bottom_fixed: i32) {
713758
let band_top_fixed = band_top << FIXED_SHIFT;
714759
let ex0 = edge.x0 - band_left_fixed;
@@ -857,6 +902,7 @@ fn cs_main(
857902
// Cooperatively rasterize edges from the relevant CSR bands.
858903
let tile_top_fixed = band_top << FIXED_SHIFT;
859904
let tile_bottom_fixed = tile_top_fixed + (i32(16) << FIXED_SHIFT);
905+
let tile_right_fixed = band_left_fixed + (i32(16) << FIXED_SHIFT);
860906
for (var band = first_band; band <= last_band; band++) {
861907
let csr_start = csr_offsets[command.csr_offsets_start + u32(band)];
862908
let csr_end = csr_offsets[command.csr_offsets_start + u32(band) + 1u];
@@ -872,7 +918,18 @@ fn cs_main(
872918
}
873919
let edge_local_idx = csr_indices[csr_start + ei];
874920
let edge = edges[command.edge_start + edge_local_idx];
875-
rasterize_edge(edge, band_top, band_left_fixed, clip_top, clip_bottom);
921+
922+
// X-range spatial filter: skip edges that cannot affect this tile.
923+
if min(edge.x0, edge.x1) >= tile_right_fixed {
924+
// Edge entirely right of tile: no contribution.
925+
} else if max(edge.x0, edge.x1) < band_left_fixed {
926+
// Edge entirely left of tile: only affects start_cover.
927+
accumulate_start_cover(edge.y0, edge.y1, clip_top, clip_bottom, tile_top_fixed);
928+
} else {
929+
// Edge overlaps tile: full rasterization.
930+
rasterize_edge(edge, band_top, band_left_fixed, clip_top, clip_bottom);
931+
}
932+
876933
ei += 256u;
877934
}
878935
}

0 commit comments

Comments
 (0)