tile_alloc.comp raw
1 // SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense
2
3 // Allocation and initialization of tiles for paths.
4
5 #version 450
6 #extension GL_GOOGLE_include_directive : enable
7
8 #include "mem.h"
9 #include "setup.h"
10
11 #define LG_TILE_ALLOC_WG (7 + LG_WG_FACTOR)
12 #define TILE_ALLOC_WG (1 << LG_TILE_ALLOC_WG)
13
14 layout(local_size_x = TILE_ALLOC_WG, local_size_y = 1) in;
15
16 layout(set = 0, binding = 1) readonly buffer ConfigBuf {
17 Config conf;
18 };
19
20 #include "annotated.h"
21 #include "tile.h"
22
23 // scale factors useful for converting coordinates to tiles
24 #define SX (1.0 / float(TILE_WIDTH_PX))
25 #define SY (1.0 / float(TILE_HEIGHT_PX))
26
27 shared uint sh_tile_count[TILE_ALLOC_WG];
28 shared MallocResult sh_tile_alloc;
29
30 void main() {
31 uint th_ix = gl_LocalInvocationID.x;
32 uint element_ix = gl_GlobalInvocationID.x;
33 PathRef path_ref = PathRef(conf.tile_alloc.offset + element_ix * Path_size);
34 AnnotatedRef ref = AnnotatedRef(conf.anno_alloc.offset + element_ix * Annotated_size);
35
36 uint tag = Annotated_Nop;
37 if (element_ix < conf.n_elements) {
38 tag = Annotated_tag(conf.anno_alloc, ref).tag;
39 }
40 int x0 = 0, y0 = 0, x1 = 0, y1 = 0;
41 switch (tag) {
42 case Annotated_Color:
43 case Annotated_Image:
44 case Annotated_BeginClip:
45 case Annotated_EndClip:
46 // Note: we take advantage of the fact that fills, strokes, and
47 // clips have compatible layout.
48 AnnoEndClip clip = Annotated_EndClip_read(conf.anno_alloc, ref);
49 x0 = int(floor(clip.bbox.x * SX));
50 y0 = int(floor(clip.bbox.y * SY));
51 x1 = int(ceil(clip.bbox.z * SX));
52 y1 = int(ceil(clip.bbox.w * SY));
53 break;
54 }
55 x0 = clamp(x0, 0, int(conf.width_in_tiles));
56 y0 = clamp(y0, 0, int(conf.height_in_tiles));
57 x1 = clamp(x1, 0, int(conf.width_in_tiles));
58 y1 = clamp(y1, 0, int(conf.height_in_tiles));
59
60 Path path;
61 path.bbox = uvec4(x0, y0, x1, y1);
62 uint tile_count = (x1 - x0) * (y1 - y0);
63 if (tag == Annotated_EndClip) {
64 // Don't actually allocate tiles for an end clip, but we do want
65 // the path structure (especially bbox) allocated for it.
66 tile_count = 0;
67 }
68
69 sh_tile_count[th_ix] = tile_count;
70 uint total_tile_count = tile_count;
71 // Prefix sum of sh_tile_count
72 for (uint i = 0; i < LG_TILE_ALLOC_WG; i++) {
73 barrier();
74 if (th_ix >= (1 << i)) {
75 total_tile_count += sh_tile_count[th_ix - (1 << i)];
76 }
77 barrier();
78 sh_tile_count[th_ix] = total_tile_count;
79 }
80 if (th_ix == TILE_ALLOC_WG - 1) {
81 sh_tile_alloc = malloc(total_tile_count * Tile_size);
82 }
83 barrier();
84 MallocResult alloc_start = sh_tile_alloc;
85 if (alloc_start.failed || mem_error != NO_ERROR) {
86 return;
87 }
88
89 if (element_ix < conf.n_elements) {
90 uint tile_subix = th_ix > 0 ? sh_tile_count[th_ix - 1] : 0;
91 Alloc tiles_alloc = slice_mem(alloc_start.alloc, Tile_size * tile_subix, Tile_size * tile_count);
92 path.tiles = TileRef(tiles_alloc.offset);
93 Path_write(conf.tile_alloc, path_ref, path);
94 }
95
96 // Zero out allocated tiles efficiently
97 uint total_count = sh_tile_count[TILE_ALLOC_WG - 1] * (Tile_size / 4);
98 uint start_ix = alloc_start.alloc.offset >> 2;
99 for (uint i = th_ix; i < total_count; i += TILE_ALLOC_WG) {
100 // Note: this interleaving is faster than using Tile_write
101 // by a significant amount.
102 write_mem(alloc_start.alloc, start_ix + i, 0);
103 }
104 }
105