path_coarse.comp raw
1 // SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense
2
3 // Coarse rasterization of path segments.
4
5 // Allocation and initialization of tiles for paths.
6
7 #version 450
8 #extension GL_GOOGLE_include_directive : enable
9
10 #include "mem.h"
11 #include "setup.h"
12
13 #define LG_COARSE_WG 5
14 #define COARSE_WG (1 << LG_COARSE_WG)
15
16 layout(local_size_x = COARSE_WG, local_size_y = 1) in;
17
18 layout(set = 0, binding = 1) readonly buffer ConfigBuf {
19 Config conf;
20 };
21
22 #include "pathseg.h"
23 #include "tile.h"
24
25 // scale factors useful for converting coordinates to tiles
26 #define SX (1.0 / float(TILE_WIDTH_PX))
27 #define SY (1.0 / float(TILE_HEIGHT_PX))
28
29 #define ACCURACY 0.25
30 #define Q_ACCURACY (ACCURACY * 0.1)
31 #define REM_ACCURACY (ACCURACY - Q_ACCURACY)
32 #define MAX_HYPOT2 (432.0 * Q_ACCURACY * Q_ACCURACY)
33
34 vec2 eval_quad(vec2 p0, vec2 p1, vec2 p2, float t) {
35 float mt = 1.0 - t;
36 return p0 * (mt * mt) + (p1 * (mt * 2.0) + p2 * t) * t;
37 }
38
39 vec2 eval_cubic(vec2 p0, vec2 p1, vec2 p2, vec2 p3, float t) {
40 float mt = 1.0 - t;
41 return p0 * (mt * mt * mt) + (p1 * (mt * mt * 3.0) + (p2 * (mt * 3.0) + p3 * t) * t) * t;
42 }
43
44 struct SubdivResult {
45 float val;
46 float a0;
47 float a2;
48 };
49
50 /// An approximation to $\int (1 + 4x^2) ^ -0.25 dx$
51 ///
52 /// This is used for flattening curves.
53 #define D 0.67
54 float approx_parabola_integral(float x) {
55 return x * inversesqrt(sqrt(1.0 - D + (D * D * D * D + 0.25 * x * x)));
56 }
57
58 /// An approximation to the inverse parabola integral.
59 #define B 0.39
60 float approx_parabola_inv_integral(float x) {
61 return x * sqrt(1.0 - B + (B * B + 0.25 * x * x));
62 }
63
64 SubdivResult estimate_subdiv(vec2 p0, vec2 p1, vec2 p2, float sqrt_tol) {
65 vec2 d01 = p1 - p0;
66 vec2 d12 = p2 - p1;
67 vec2 dd = d01 - d12;
68 float cross = (p2.x - p0.x) * dd.y - (p2.y - p0.y) * dd.x;
69 float x0 = (d01.x * dd.x + d01.y * dd.y) / cross;
70 float x2 = (d12.x * dd.x + d12.y * dd.y) / cross;
71 float scale = abs(cross / (length(dd) * (x2 - x0)));
72
73 float a0 = approx_parabola_integral(x0);
74 float a2 = approx_parabola_integral(x2);
75 float val = 0.0;
76 if (scale < 1e9) {
77 float da = abs(a2 - a0);
78 float sqrt_scale = sqrt(scale);
79 if (sign(x0) == sign(x2)) {
80 val = da * sqrt_scale;
81 } else {
82 float xmin = sqrt_tol / sqrt_scale;
83 val = sqrt_tol * da / approx_parabola_integral(xmin);
84 }
85 }
86 return SubdivResult(val, a0, a2);
87 }
88
89 void main() {
90 uint element_ix = gl_GlobalInvocationID.x;
91 PathSegRef ref = PathSegRef(conf.pathseg_alloc.offset + element_ix * PathSeg_size);
92
93 PathSegTag tag = PathSegTag(PathSeg_Nop, 0);
94 if (element_ix < conf.n_pathseg) {
95 tag = PathSeg_tag(conf.pathseg_alloc, ref);
96 }
97 bool mem_ok = mem_error == NO_ERROR;
98 switch (tag.tag) {
99 case PathSeg_Cubic:
100 PathCubic cubic = PathSeg_Cubic_read(conf.pathseg_alloc, ref);
101
102 uint trans_ix = cubic.trans_ix;
103 if (trans_ix > 0) {
104 TransformSegRef trans_ref = TransformSegRef(conf.trans_alloc.offset + (trans_ix - 1) * TransformSeg_size);
105 TransformSeg trans = TransformSeg_read(conf.trans_alloc, trans_ref);
106 cubic.p0 = trans.mat.xy * cubic.p0.x + trans.mat.zw * cubic.p0.y + trans.translate;
107 cubic.p1 = trans.mat.xy * cubic.p1.x + trans.mat.zw * cubic.p1.y + trans.translate;
108 cubic.p2 = trans.mat.xy * cubic.p2.x + trans.mat.zw * cubic.p2.y + trans.translate;
109 cubic.p3 = trans.mat.xy * cubic.p3.x + trans.mat.zw * cubic.p3.y + trans.translate;
110 }
111
112 vec2 err_v = 3.0 * (cubic.p2 - cubic.p1) + cubic.p0 - cubic.p3;
113 float err = err_v.x * err_v.x + err_v.y * err_v.y;
114 // The number of quadratics.
115 uint n_quads = max(uint(ceil(pow(err * (1.0 / MAX_HYPOT2), 1.0 / 6.0))), 1);
116 // Iterate over quadratics and tote up the estimated number of segments.
117 float val = 0.0;
118 vec2 qp0 = cubic.p0;
119 float step = 1.0 / float(n_quads);
120 for (uint i = 0; i < n_quads; i++) {
121 float t = float(i + 1) * step;
122 vec2 qp2 = eval_cubic(cubic.p0, cubic.p1, cubic.p2, cubic.p3, t);
123 vec2 qp1 = eval_cubic(cubic.p0, cubic.p1, cubic.p2, cubic.p3, t - 0.5 * step);
124 qp1 = 2.0 * qp1 - 0.5 * (qp0 + qp2);
125 SubdivResult params = estimate_subdiv(qp0, qp1, qp2, sqrt(REM_ACCURACY));
126 val += params.val;
127
128 qp0 = qp2;
129 }
130 uint n = max(uint(ceil(val * 0.5 / sqrt(REM_ACCURACY))), 1);
131
132 bool is_stroke = fill_mode_from_flags(tag.flags) == MODE_STROKE;
133 uint path_ix = cubic.path_ix;
134 Path path = Path_read(conf.tile_alloc, PathRef(conf.tile_alloc.offset + path_ix * Path_size));
135 Alloc path_alloc = new_alloc(path.tiles.offset, (path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y) * Tile_size, mem_ok);
136 ivec4 bbox = ivec4(path.bbox);
137 vec2 p0 = cubic.p0;
138 qp0 = cubic.p0;
139 float v_step = val / float(n);
140 int n_out = 1;
141 float val_sum = 0.0;
142 for (uint i = 0; i < n_quads; i++) {
143 float t = float(i + 1) * step;
144 vec2 qp2 = eval_cubic(cubic.p0, cubic.p1, cubic.p2, cubic.p3, t);
145 vec2 qp1 = eval_cubic(cubic.p0, cubic.p1, cubic.p2, cubic.p3, t - 0.5 * step);
146 qp1 = 2.0 * qp1 - 0.5 * (qp0 + qp2);
147 SubdivResult params = estimate_subdiv(qp0, qp1, qp2, sqrt(REM_ACCURACY));
148 float u0 = approx_parabola_inv_integral(params.a0);
149 float u2 = approx_parabola_inv_integral(params.a2);
150 float uscale = 1.0 / (u2 - u0);
151 float target = float(n_out) * v_step;
152 while (n_out == n || target < val_sum + params.val) {
153 vec2 p1;
154 if (n_out == n) {
155 p1 = cubic.p3;
156 } else {
157 float u = (target - val_sum) / params.val;
158 float a = mix(params.a0, params.a2, u);
159 float au = approx_parabola_inv_integral(a);
160 float t = (au - u0) * uscale;
161 p1 = eval_quad(qp0, qp1, qp2, t);
162 }
163
164 // Output line segment
165
166 // Bounding box of element in pixel coordinates.
167 float xmin = min(p0.x, p1.x) - cubic.stroke.x;
168 float xmax = max(p0.x, p1.x) + cubic.stroke.x;
169 float ymin = min(p0.y, p1.y) - cubic.stroke.y;
170 float ymax = max(p0.y, p1.y) + cubic.stroke.y;
171 float dx = p1.x - p0.x;
172 float dy = p1.y - p0.y;
173 // Set up for per-scanline coverage formula, below.
174 float invslope = abs(dy) < 1e-9 ? 1e9 : dx / dy;
175 float c = (cubic.stroke.x + abs(invslope) * (0.5 * float(TILE_HEIGHT_PX) + cubic.stroke.y)) * SX;
176 float b = invslope; // Note: assumes square tiles, otherwise scale.
177 float a = (p0.x - (p0.y - 0.5 * float(TILE_HEIGHT_PX)) * b) * SX;
178
179 int x0 = int(floor(xmin * SX));
180 int x1 = int(floor(xmax * SX) + 1);
181 int y0 = int(floor(ymin * SY));
182 int y1 = int(floor(ymax * SY) + 1);
183
184 x0 = clamp(x0, bbox.x, bbox.z);
185 y0 = clamp(y0, bbox.y, bbox.w);
186 x1 = clamp(x1, bbox.x, bbox.z);
187 y1 = clamp(y1, bbox.y, bbox.w);
188 float xc = a + b * float(y0);
189 int stride = bbox.z - bbox.x;
190 int base = (y0 - bbox.y) * stride - bbox.x;
191 // TODO: can be tighter, use c to bound width
192 uint n_tile_alloc = uint((x1 - x0) * (y1 - y0));
193 // Consider using subgroups to aggregate atomic add.
194 MallocResult tile_alloc = malloc(n_tile_alloc * TileSeg_size);
195 if (tile_alloc.failed || !mem_ok) {
196 return;
197 }
198 uint tile_offset = tile_alloc.alloc.offset;
199
200 TileSeg tile_seg;
201
202 int xray = int(floor(p0.x*SX));
203 int last_xray = int(floor(p1.x*SX));
204 if (p0.y > p1.y) {
205 int tmp = xray;
206 xray = last_xray;
207 last_xray = tmp;
208 }
209 for (int y = y0; y < y1; y++) {
210 float tile_y0 = float(y * TILE_HEIGHT_PX);
211 int xbackdrop = max(xray + 1, bbox.x);
212 if (!is_stroke && min(p0.y, p1.y) < tile_y0 && xbackdrop < bbox.z) {
213 int backdrop = p1.y < p0.y ? 1 : -1;
214 TileRef tile_ref = Tile_index(path.tiles, uint(base + xbackdrop));
215 uint tile_el = tile_ref.offset >> 2;
216 if (touch_mem(path_alloc, tile_el + 1)) {
217 atomicAdd(memory[tile_el + 1], backdrop);
218 }
219 }
220
221 // next_xray is the xray for the next scanline; the line segment intersects
222 // all tiles between xray and next_xray.
223 int next_xray = last_xray;
224 if (y < y1 - 1) {
225 float tile_y1 = float((y + 1) * TILE_HEIGHT_PX);
226 float x_edge = mix(p0.x, p1.x, (tile_y1 - p0.y) / dy);
227 next_xray = int(floor(x_edge*SX));
228 }
229
230 int min_xray = min(xray, next_xray);
231 int max_xray = max(xray, next_xray);
232 int xx0 = min(int(floor(xc - c)), min_xray);
233 int xx1 = max(int(ceil(xc + c)), max_xray + 1);
234 xx0 = clamp(xx0, x0, x1);
235 xx1 = clamp(xx1, x0, x1);
236
237 for (int x = xx0; x < xx1; x++) {
238 float tile_x0 = float(x * TILE_WIDTH_PX);
239 TileRef tile_ref = Tile_index(TileRef(path.tiles.offset), uint(base + x));
240 uint tile_el = tile_ref.offset >> 2;
241 uint old = 0;
242 if (touch_mem(path_alloc, tile_el)) {
243 old = atomicExchange(memory[tile_el], tile_offset);
244 }
245 tile_seg.origin = p0;
246 tile_seg.vector = p1 - p0;
247 float y_edge = 0.0;
248 if (!is_stroke) {
249 y_edge = mix(p0.y, p1.y, (tile_x0 - p0.x) / dx);
250 if (min(p0.x, p1.x) < tile_x0) {
251 vec2 p = vec2(tile_x0, y_edge);
252 if (p0.x > p1.x) {
253 tile_seg.vector = p - p0;
254 } else {
255 tile_seg.origin = p;
256 tile_seg.vector = p1 - p;
257 }
258 // kernel4 uses sign(vector.x) for the sign of the intersection backdrop.
259 // Nudge zeroes towards the intended sign.
260 if (tile_seg.vector.x == 0) {
261 tile_seg.vector.x = sign(p1.x - p0.x)*1e-9;
262 }
263 }
264 if (x <= min_xray || max_xray < x) {
265 // Reject inconsistent intersections.
266 y_edge = 1e9;
267 }
268 }
269 tile_seg.y_edge = y_edge;
270 tile_seg.next.offset = old;
271 TileSeg_write(tile_alloc.alloc, TileSegRef(tile_offset), tile_seg);
272 tile_offset += TileSeg_size;
273 }
274 xc += b;
275 base += stride;
276 xray = next_xray;
277 }
278
279 n_out += 1;
280 target += v_step;
281 p0 = p1;
282 }
283 val_sum += params.val;
284
285 qp0 = qp2;
286 }
287
288 break;
289 }
290 }
291