backdrop.comp raw
1 // SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense
2
3 // Propagation of tile backdrop for filling.
4 //
5 // Each thread reads one path element and calculates the number of spanned tiles
6 // based on the bounding box.
7 // In a further compaction step, the workgroup loops over the corresponding tile rows per element in parallel.
8 // For each row the per tile backdrop will be read, as calculated in the previous coarse path segment kernel,
9 // and propagated from the left to the right (prefix summed).
10 //
11 // Output state:
12 // - Each path element has an array of tiles covering the whole path based on boundig box
13 // - Each tile per path element contains the 'backdrop' and a list of subdivided path segments
14
15 #version 450
16 #extension GL_GOOGLE_include_directive : enable
17
18 #include "mem.h"
19 #include "setup.h"
20
21 #define LG_BACKDROP_WG (7 + LG_WG_FACTOR)
22 #define BACKDROP_WG (1 << LG_BACKDROP_WG)
23
24 layout(local_size_x = BACKDROP_WG, local_size_y = 1) in;
25
26 layout(set = 0, binding = 1) readonly buffer ConfigBuf {
27 Config conf;
28 };
29
30 #include "annotated.h"
31 #include "tile.h"
32
33 shared uint sh_row_count[BACKDROP_WG];
34 shared Alloc sh_row_alloc[BACKDROP_WG];
35 shared uint sh_row_width[BACKDROP_WG];
36
37 void main() {
38 uint th_ix = gl_LocalInvocationID.x;
39 uint element_ix = gl_GlobalInvocationID.x;
40 AnnotatedRef ref = AnnotatedRef(conf.anno_alloc.offset + element_ix * Annotated_size);
41
42 // Work assignment: 1 thread : 1 path element
43 uint row_count = 0;
44 bool mem_ok = mem_error == NO_ERROR;
45 if (element_ix < conf.n_elements) {
46 AnnotatedTag tag = Annotated_tag(conf.anno_alloc, ref);
47 switch (tag.tag) {
48 case Annotated_Image:
49 case Annotated_BeginClip:
50 case Annotated_Color:
51 if (fill_mode_from_flags(tag.flags) != MODE_NONZERO) {
52 break;
53 }
54 // Fall through.
55 PathRef path_ref = PathRef(conf.tile_alloc.offset + element_ix * Path_size);
56 Path path = Path_read(conf.tile_alloc, path_ref);
57 sh_row_width[th_ix] = path.bbox.z - path.bbox.x;
58 row_count = path.bbox.w - path.bbox.y;
59 // Paths that don't cross tile top edges don't have backdrops.
60 // Don't apply the optimization to paths that may cross the y = 0
61 // top edge, but clipped to 1 row.
62 if (row_count == 1 && path.bbox.y > 0) {
63 // Note: this can probably be expanded to width = 2 as
64 // long as it doesn't cross the left edge.
65 row_count = 0;
66 }
67 Alloc path_alloc = new_alloc(path.tiles.offset, (path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y) * Tile_size, mem_ok);
68 sh_row_alloc[th_ix] = path_alloc;
69 }
70 }
71
72 sh_row_count[th_ix] = row_count;
73 // Prefix sum of sh_row_count
74 for (uint i = 0; i < LG_BACKDROP_WG; i++) {
75 barrier();
76 if (th_ix >= (1 << i)) {
77 row_count += sh_row_count[th_ix - (1 << i)];
78 }
79 barrier();
80 sh_row_count[th_ix] = row_count;
81 }
82 barrier();
83 // Work assignment: 1 thread : 1 path element row
84 uint total_rows = sh_row_count[BACKDROP_WG - 1];
85 for (uint row = th_ix; row < total_rows; row += BACKDROP_WG) {
86 // Binary search to find element
87 uint el_ix = 0;
88 for (uint i = 0; i < LG_BACKDROP_WG; i++) {
89 uint probe = el_ix + ((BACKDROP_WG / 2) >> i);
90 if (row >= sh_row_count[probe - 1]) {
91 el_ix = probe;
92 }
93 }
94 uint width = sh_row_width[el_ix];
95 if (width > 0 && mem_ok) {
96 // Process one row sequentially
97 // Read backdrop value per tile and prefix sum it
98 Alloc tiles_alloc = sh_row_alloc[el_ix];
99 uint seq_ix = row - (el_ix > 0 ? sh_row_count[el_ix - 1] : 0);
100 uint tile_el_ix = (tiles_alloc.offset >> 2) + 1 + seq_ix * 2 * width;
101 uint sum = read_mem(tiles_alloc, tile_el_ix);
102 for (uint x = 1; x < width; x++) {
103 tile_el_ix += 2;
104 sum += read_mem(tiles_alloc, tile_el_ix);
105 write_mem(tiles_alloc, tile_el_ix, sum);
106 }
107 }
108 }
109 }
110