backdrop.comp raw

   1  // SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense
   2  
   3  // Propagation of tile backdrop for filling.
   4  //
   5  // Each thread reads one path element and calculates the number of spanned tiles
   6  // based on the bounding box.
   7  // In a further compaction step, the workgroup loops over the corresponding tile rows per element in parallel.
   8  // For each row the per tile backdrop will be read, as calculated in the previous coarse path segment kernel,
   9  // and propagated from the left to the right (prefix summed).
  10  //
  11  // Output state:
  12  //  - Each path element has an array of tiles covering the whole path based on boundig box
  13  //  - Each tile per path element contains the 'backdrop' and a list of subdivided path segments
  14  
  15  #version 450
  16  #extension GL_GOOGLE_include_directive : enable
  17  
  18  #include "mem.h"
  19  #include "setup.h"
  20  
  21  #define LG_BACKDROP_WG (7 + LG_WG_FACTOR)
  22  #define BACKDROP_WG (1 << LG_BACKDROP_WG)
  23  
  24  layout(local_size_x = BACKDROP_WG, local_size_y = 1) in;
  25  
  26  layout(set = 0, binding = 1) readonly buffer ConfigBuf {
  27      Config conf;
  28  };
  29  
  30  #include "annotated.h"
  31  #include "tile.h"
  32  
  33  shared uint sh_row_count[BACKDROP_WG];
  34  shared Alloc sh_row_alloc[BACKDROP_WG];
  35  shared uint sh_row_width[BACKDROP_WG];
  36  
  37  void main() {
  38      uint th_ix = gl_LocalInvocationID.x;
  39      uint element_ix = gl_GlobalInvocationID.x;
  40      AnnotatedRef ref = AnnotatedRef(conf.anno_alloc.offset + element_ix * Annotated_size);
  41  
  42      // Work assignment: 1 thread : 1 path element
  43      uint row_count = 0;
  44      bool mem_ok = mem_error == NO_ERROR;
  45      if (element_ix < conf.n_elements) {
  46          AnnotatedTag tag = Annotated_tag(conf.anno_alloc, ref);
  47          switch (tag.tag) {
  48          case Annotated_Image:
  49          case Annotated_BeginClip:
  50          case Annotated_Color:
  51              if (fill_mode_from_flags(tag.flags) != MODE_NONZERO) {
  52                  break;
  53              }
  54              // Fall through.
  55              PathRef path_ref = PathRef(conf.tile_alloc.offset + element_ix * Path_size);
  56              Path path = Path_read(conf.tile_alloc, path_ref);
  57              sh_row_width[th_ix] = path.bbox.z - path.bbox.x;
  58              row_count = path.bbox.w - path.bbox.y;
  59              // Paths that don't cross tile top edges don't have backdrops.
  60              // Don't apply the optimization to paths that may cross the y = 0
  61              // top edge, but clipped to 1 row.
  62              if (row_count == 1 && path.bbox.y > 0) {
  63                  // Note: this can probably be expanded to width = 2 as
  64                  // long as it doesn't cross the left edge.
  65                  row_count = 0;
  66              }
  67              Alloc path_alloc = new_alloc(path.tiles.offset, (path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y) * Tile_size, mem_ok);
  68              sh_row_alloc[th_ix] = path_alloc;
  69          }
  70      }
  71  
  72      sh_row_count[th_ix] = row_count;
  73      // Prefix sum of sh_row_count
  74      for (uint i = 0; i < LG_BACKDROP_WG; i++) {
  75          barrier();
  76          if (th_ix >= (1 << i)) {
  77              row_count += sh_row_count[th_ix - (1 << i)];
  78          }
  79          barrier();
  80          sh_row_count[th_ix] = row_count;
  81      }
  82      barrier();
  83      // Work assignment: 1 thread : 1 path element row
  84      uint total_rows = sh_row_count[BACKDROP_WG - 1];
  85      for (uint row = th_ix; row < total_rows; row += BACKDROP_WG) {
  86          // Binary search to find element
  87          uint el_ix = 0;
  88          for (uint i = 0; i < LG_BACKDROP_WG; i++) {
  89              uint probe = el_ix + ((BACKDROP_WG / 2) >> i);
  90              if (row >= sh_row_count[probe - 1]) {
  91                  el_ix = probe;
  92              }
  93          }
  94          uint width = sh_row_width[el_ix];
  95          if (width > 0 && mem_ok) {
  96              // Process one row sequentially
  97              // Read backdrop value per tile and prefix sum it
  98              Alloc tiles_alloc = sh_row_alloc[el_ix];
  99              uint seq_ix = row - (el_ix > 0 ? sh_row_count[el_ix - 1] : 0);
 100              uint tile_el_ix = (tiles_alloc.offset >> 2) + 1 + seq_ix * 2 * width;
 101              uint sum = read_mem(tiles_alloc, tile_el_ix);
 102              for (uint x = 1; x < width; x++) {
 103                  tile_el_ix += 2;
 104                  sum += read_mem(tiles_alloc, tile_el_ix);
 105                  write_mem(tiles_alloc, tile_el_ix, sum);
 106              }
 107          }
 108      }
 109  }
 110