compute.go raw
1 // SPDX-License-Identifier: Unlicense OR MIT
2
3 package gpu
4
5 import (
6 "encoding/binary"
7 "errors"
8 "fmt"
9 "image"
10 "image/color"
11 "math/bits"
12 "time"
13 "unsafe"
14
15 "github.com/p9c/p9/pkg/gel/gio/f32"
16 "github.com/p9c/p9/pkg/gel/gio/gpu/internal/driver"
17 "github.com/p9c/p9/pkg/gel/gio/internal/byteslice"
18 "github.com/p9c/p9/pkg/gel/gio/internal/f32color"
19 "github.com/p9c/p9/pkg/gel/gio/internal/ops"
20 "github.com/p9c/p9/pkg/gel/gio/internal/scene"
21 "github.com/p9c/p9/pkg/gel/gio/layout"
22 "github.com/p9c/p9/pkg/gel/gio/op"
23 )
24
25 type compute struct {
26 ctx driver.Device
27 enc encoder
28
29 drawOps drawOps
30 texOps []textureOp
31 cache *resourceCache
32 maxTextureDim int
33
34 programs struct {
35 elements driver.Program
36 tileAlloc driver.Program
37 pathCoarse driver.Program
38 backdrop driver.Program
39 binning driver.Program
40 coarse driver.Program
41 kernel4 driver.Program
42 }
43 buffers struct {
44 config driver.Buffer
45 scene sizedBuffer
46 state sizedBuffer
47 memory sizedBuffer
48 }
49 output struct {
50 size image.Point
51 // image is the output texture. Note that it is in RGBA format,
52 // but contains data in sRGB. See blitOutput for more detail.
53 image driver.Texture
54 blitProg driver.Program
55 }
56 // images contains ImageOp images packed into a texture atlas.
57 images struct {
58 packer packer
59 // positions maps imageOpData.handles to positions inside tex.
60 positions map[interface{}]image.Point
61 tex driver.Texture
62 }
63 // materials contains the pre-processed materials (transformed images for
64 // now, gradients etc. later) packed in a texture atlas. The atlas is used
65 // as source in kernel4.
66 materials struct {
67 // offsets maps texture ops to the offsets to put in their FillImage commands.
68 offsets map[textureKey]image.Point
69
70 prog driver.Program
71 layout driver.InputLayout
72
73 packer packer
74
75 tex driver.Texture
76 fbo driver.Framebuffer
77 quads []materialVertex
78
79 bufSize int
80 buffer driver.Buffer
81 }
82 timers struct {
83 profile string
84 t *timers
85 elements *timer
86 tileAlloc *timer
87 pathCoarse *timer
88 backdropBinning *timer
89 coarse *timer
90 kernel4 *timer
91 }
92
93 // The following fields hold scratch space to avoid garbage.
94 zeroSlice []byte
95 memHeader *memoryHeader
96 conf *config
97 }
98
99 // materialVertex describes a vertex of a quad used to render a transformed
100 // material.
101 type materialVertex struct {
102 posX, posY float32
103 u, v float32
104 }
105
106 // textureKey identifies textureOp.
107 type textureKey struct {
108 handle interface{}
109 transform f32.Affine2D
110 }
111
112 // textureOp represents an imageOp that requires texture space.
113 type textureOp struct {
114 // sceneIdx is the index in the scene that contains the fill image command
115 // that corresponds to the operation.
116 sceneIdx int
117 key textureKey
118 img imageOpData
119
120 // pos is the position of the untransformed image in the images texture.
121 pos image.Point
122 }
123
124 type encoder struct {
125 scene []scene.Command
126 npath int
127 npathseg int
128 ntrans int
129 }
130
131 type encodeState struct {
132 trans f32.Affine2D
133 clip f32.Rectangle
134 }
135
136 type sizedBuffer struct {
137 size int
138 buffer driver.Buffer
139 }
140
141 // config matches Config in setup.h
142 type config struct {
143 n_elements uint32 // paths
144 n_pathseg uint32
145 width_in_tiles uint32
146 height_in_tiles uint32
147 tile_alloc memAlloc
148 bin_alloc memAlloc
149 ptcl_alloc memAlloc
150 pathseg_alloc memAlloc
151 anno_alloc memAlloc
152 trans_alloc memAlloc
153 }
154
155 // memAlloc matches Alloc in mem.h
156 type memAlloc struct {
157 offset uint32
158 //size uint32
159 }
160
161 // memoryHeader matches the header of Memory in mem.h.
162 type memoryHeader struct {
163 mem_offset uint32
164 mem_error uint32
165 }
166
167 // GPU structure sizes and constants.
168 const (
169 tileWidthPx = 32
170 tileHeightPx = 32
171 ptclInitialAlloc = 1024
172 kernel4OutputUnit = 2
173 kernel4AtlasUnit = 3
174
175 pathSize = 12
176 binSize = 8
177 pathsegSize = 52
178 annoSize = 32
179 transSize = 24
180 stateSize = 60
181 stateStride = 4 + 2*stateSize
182 )
183
184 // mem.h constants.
185 const (
186 memNoError = 0 // NO_ERROR
187 memMallocFailed = 1 // ERR_MALLOC_FAILED
188 )
189
190 func newCompute(ctx driver.Device) (*compute, error) {
191 maxDim := ctx.Caps().MaxTextureSize
192 // Large atlas textures cause artifacts due to precision loss in
193 // shaders.
194 if cap := 8192; maxDim > cap {
195 maxDim = cap
196 }
197 g := &compute{
198 ctx: ctx,
199 cache: newResourceCache(),
200 maxTextureDim: maxDim,
201 conf: new(config),
202 memHeader: new(memoryHeader),
203 }
204
205 blitProg, err := ctx.NewProgram(shader_copy_vert, shader_copy_frag)
206 if err != nil {
207 g.Release()
208 return nil, err
209 }
210 g.output.blitProg = blitProg
211
212 materialProg, err := ctx.NewProgram(shader_material_vert, shader_material_frag)
213 if err != nil {
214 g.Release()
215 return nil, err
216 }
217 g.materials.prog = materialProg
218 progLayout, err := ctx.NewInputLayout(shader_material_vert, []driver.InputDesc{
219 {Type: driver.DataTypeFloat, Size: 2, Offset: 0},
220 {Type: driver.DataTypeFloat, Size: 2, Offset: 4 * 2},
221 })
222 if err != nil {
223 g.Release()
224 return nil, err
225 }
226 g.materials.layout = progLayout
227
228 g.drawOps.pathCache = newOpCache()
229 g.drawOps.compute = true
230
231 buf, err := ctx.NewBuffer(driver.BufferBindingShaderStorage, int(unsafe.Sizeof(config{})))
232 if err != nil {
233 g.Release()
234 return nil, err
235 }
236 g.buffers.config = buf
237
238 shaders := []struct {
239 prog *driver.Program
240 src driver.ShaderSources
241 }{
242 {&g.programs.elements, shader_elements_comp},
243 {&g.programs.tileAlloc, shader_tile_alloc_comp},
244 {&g.programs.pathCoarse, shader_path_coarse_comp},
245 {&g.programs.backdrop, shader_backdrop_comp},
246 {&g.programs.binning, shader_binning_comp},
247 {&g.programs.coarse, shader_coarse_comp},
248 {&g.programs.kernel4, shader_kernel4_comp},
249 }
250 for _, shader := range shaders {
251 p, err := ctx.NewComputeProgram(shader.src)
252 if err != nil {
253 g.Release()
254 return nil, err
255 }
256 *shader.prog = p
257 }
258 return g, nil
259 }
260
261 func (g *compute) Collect(viewport image.Point, ops *op.Ops) {
262 g.drawOps.reset(g.cache, viewport)
263 g.drawOps.collect(g.ctx, g.cache, ops, viewport)
264 for _, img := range g.drawOps.allImageOps {
265 expandPathOp(img.path, img.clip)
266 }
267 if g.drawOps.profile && g.timers.t == nil && g.ctx.Caps().Features.Has(driver.FeatureTimers) {
268 t := &g.timers
269 t.t = newTimers(g.ctx)
270 t.elements = g.timers.t.newTimer()
271 t.tileAlloc = g.timers.t.newTimer()
272 t.pathCoarse = g.timers.t.newTimer()
273 t.backdropBinning = g.timers.t.newTimer()
274 t.coarse = g.timers.t.newTimer()
275 t.kernel4 = g.timers.t.newTimer()
276 }
277 }
278
279 func (g *compute) Clear(col color.NRGBA) {
280 g.drawOps.clear = true
281 g.drawOps.clearColor = f32color.LinearFromSRGB(col)
282 }
283
284 func (g *compute) Frame() error {
285 viewport := g.drawOps.viewport
286 tileDims := image.Point{
287 X: (viewport.X + tileWidthPx - 1) / tileWidthPx,
288 Y: (viewport.Y + tileHeightPx - 1) / tileHeightPx,
289 }
290
291 defFBO := g.ctx.BeginFrame()
292 defer g.ctx.EndFrame()
293
294 if err := g.encode(viewport); err != nil {
295 return err
296 }
297 if err := g.uploadImages(); err != nil {
298 return err
299 }
300 if err := g.renderMaterials(); err != nil {
301 return err
302 }
303 if err := g.render(tileDims); err != nil {
304 return err
305 }
306 g.ctx.BindFramebuffer(defFBO)
307 g.blitOutput(viewport)
308 g.cache.frame()
309 g.drawOps.pathCache.frame()
310 t := &g.timers
311 if g.drawOps.profile && t.t.ready() {
312 et, tat, pct, bbt := t.elements.Elapsed, t.tileAlloc.Elapsed, t.pathCoarse.Elapsed, t.backdropBinning.Elapsed
313 ct, k4t := t.coarse.Elapsed, t.kernel4.Elapsed
314 ft := et + tat + pct + bbt + ct + k4t
315 q := 100 * time.Microsecond
316 ft = ft.Round(q)
317 et, tat, pct, bbt = et.Round(q), tat.Round(q), pct.Round(q), bbt.Round(q)
318 ct, k4t = ct.Round(q), k4t.Round(q)
319 t.profile = fmt.Sprintf("ft:%7s et:%7s tat:%7s pct:%7s bbt:%7s ct:%7s k4t:%7s", ft, et, tat, pct, bbt, ct, k4t)
320 }
321 g.drawOps.clear = false
322 return nil
323 }
324
325 func (g *compute) Profile() string {
326 return g.timers.profile
327 }
328
329 // blitOutput copies the compute render output to the output FBO. We need to
330 // copy because compute shaders can only write to textures, not FBOs. Compute
331 // shader can only write to RGBA textures, but since we actually render in sRGB
332 // format we can't use glBlitFramebuffer, because it does sRGB conversion.
333 func (g *compute) blitOutput(viewport image.Point) {
334 if !g.drawOps.clear {
335 g.ctx.BlendFunc(driver.BlendFactorOne, driver.BlendFactorOneMinusSrcAlpha)
336 g.ctx.SetBlend(true)
337 defer g.ctx.SetBlend(false)
338 }
339 g.ctx.Viewport(0, 0, viewport.X, viewport.Y)
340 g.ctx.BindTexture(0, g.output.image)
341 g.ctx.BindProgram(g.output.blitProg)
342 g.ctx.DrawArrays(driver.DrawModeTriangleStrip, 0, 4)
343 }
344
345 func (g *compute) encode(viewport image.Point) error {
346 g.texOps = g.texOps[:0]
347 g.enc.reset()
348
349 // Flip Y-axis.
350 flipY := f32.Affine2D{}.Scale(f32.Pt(0, 0), f32.Pt(1, -1)).Offset(f32.Pt(0, float32(viewport.Y)))
351 g.enc.transform(flipY)
352 if g.drawOps.clear {
353 g.enc.rect(f32.Rectangle{Max: layout.FPt(viewport)})
354 g.enc.fillColor(f32color.NRGBAToRGBA(g.drawOps.clearColor.SRGB()))
355 }
356 return g.encodeOps(flipY, viewport, g.drawOps.allImageOps)
357 }
358
359 func (g *compute) renderMaterials() error {
360 m := &g.materials
361 m.quads = m.quads[:0]
362 resize := false
363 reclaimed := false
364 restart:
365 for {
366 for _, op := range g.texOps {
367 if off, exists := m.offsets[op.key]; exists {
368 g.enc.setFillImageOffset(op.sceneIdx, off)
369 continue
370 }
371 quad, bounds := g.materialQuad(op.key.transform, op.img, op.pos)
372
373 // A material is clipped to avoid drawing outside its bounds inside the atlas. However,
374 // imprecision in the clipping may cause a single pixel overflow. Be safe.
375 size := bounds.Size().Add(image.Pt(1, 1))
376 place, fits := m.packer.tryAdd(size)
377 if !fits {
378 m.offsets = nil
379 m.quads = m.quads[:0]
380 m.packer.clear()
381 if !reclaimed {
382 // Some images may no longer be in use, try again
383 // after clearing existing maps.
384 reclaimed = true
385 } else {
386 m.packer.maxDim += 256
387 resize = true
388 if m.packer.maxDim > g.maxTextureDim {
389 return errors.New("compute: no space left in material atlas")
390 }
391 }
392 m.packer.newPage()
393 continue restart
394 }
395 // Position quad to match place.
396 offset := place.Pos.Sub(bounds.Min)
397 offsetf := layout.FPt(offset)
398 for i := range quad {
399 quad[i].posX += offsetf.X
400 quad[i].posY += offsetf.Y
401 }
402 // Draw quad as two triangles.
403 m.quads = append(m.quads, quad[0], quad[1], quad[3], quad[3], quad[1], quad[2])
404 if m.offsets == nil {
405 m.offsets = make(map[textureKey]image.Point)
406 }
407 m.offsets[op.key] = offset
408 g.enc.setFillImageOffset(op.sceneIdx, offset)
409 }
410 break
411 }
412 if len(m.quads) == 0 {
413 return nil
414 }
415 texSize := m.packer.maxDim
416 if resize {
417 if m.fbo != nil {
418 m.fbo.Release()
419 m.fbo = nil
420 }
421 if m.tex != nil {
422 m.tex.Release()
423 m.tex = nil
424 }
425 handle, err := g.ctx.NewTexture(driver.TextureFormatRGBA8, texSize, texSize,
426 driver.FilterNearest, driver.FilterNearest,
427 driver.BufferBindingShaderStorage|driver.BufferBindingFramebuffer)
428 if err != nil {
429 return fmt.Errorf("compute: failed to create material atlas: %v", err)
430 }
431 m.tex = handle
432 fbo, err := g.ctx.NewFramebuffer(handle, 0)
433 if err != nil {
434 return fmt.Errorf("compute: failed to create material framebuffer: %v", err)
435 }
436 m.fbo = fbo
437 }
438 // TODO: move to shaders.
439 // Transform to clip space: [-1, -1] - [1, 1].
440 clip := f32.Affine2D{}.Scale(f32.Pt(0, 0), f32.Pt(2/float32(texSize), 2/float32(texSize))).Offset(f32.Pt(-1, -1))
441 for i, v := range m.quads {
442 p := clip.Transform(f32.Pt(v.posX, v.posY))
443 m.quads[i].posX = p.X
444 m.quads[i].posY = p.Y
445 }
446 vertexData := byteslice.Slice(m.quads)
447 if len(vertexData) > m.bufSize {
448 if m.buffer != nil {
449 m.buffer.Release()
450 m.buffer = nil
451 }
452 n := pow2Ceil(len(vertexData))
453 buf, err := g.ctx.NewBuffer(driver.BufferBindingVertices, n)
454 if err != nil {
455 return err
456 }
457 m.bufSize = n
458 m.buffer = buf
459 }
460 m.buffer.Upload(vertexData)
461 g.ctx.BindTexture(0, g.images.tex)
462 g.ctx.BindFramebuffer(m.fbo)
463 g.ctx.Viewport(0, 0, texSize, texSize)
464 if reclaimed {
465 g.ctx.Clear(0, 0, 0, 0)
466 }
467 g.ctx.BindProgram(m.prog)
468 g.ctx.BindVertexBuffer(m.buffer, int(unsafe.Sizeof(m.quads[0])), 0)
469 g.ctx.BindInputLayout(m.layout)
470 g.ctx.DrawArrays(driver.DrawModeTriangles, 0, len(m.quads))
471 return nil
472 }
473
474 func (g *compute) uploadImages() error {
475 // padding is the number of pixels added to the right and below
476 // images, to avoid atlas filtering artifacts.
477 const padding = 1
478
479 a := &g.images
480 var uploads map[interface{}]*image.RGBA
481 resize := false
482 reclaimed := false
483 restart:
484 for {
485 for i, op := range g.texOps {
486 if pos, exists := a.positions[op.img.handle]; exists {
487 g.texOps[i].pos = pos
488 continue
489 }
490 size := op.img.src.Bounds().Size().Add(image.Pt(padding, padding))
491 place, fits := a.packer.tryAdd(size)
492 if !fits {
493 a.positions = nil
494 uploads = nil
495 a.packer.clear()
496 if !reclaimed {
497 // Some images may no longer be in use, try again
498 // after clearing existing maps.
499 reclaimed = true
500 } else {
501 a.packer.maxDim += 256
502 resize = true
503 if a.packer.maxDim > g.maxTextureDim {
504 return errors.New("compute: no space left in image atlas")
505 }
506 }
507 a.packer.newPage()
508 continue restart
509 }
510 if a.positions == nil {
511 a.positions = make(map[interface{}]image.Point)
512 }
513 a.positions[op.img.handle] = place.Pos
514 g.texOps[i].pos = place.Pos
515 if uploads == nil {
516 uploads = make(map[interface{}]*image.RGBA)
517 }
518 uploads[op.img.handle] = op.img.src
519 }
520 break
521 }
522 if len(uploads) == 0 {
523 return nil
524 }
525 if resize {
526 if a.tex != nil {
527 a.tex.Release()
528 a.tex = nil
529 }
530 sz := a.packer.maxDim
531 handle, err := g.ctx.NewTexture(driver.TextureFormatSRGB, sz, sz, driver.FilterLinear, driver.FilterLinear, driver.BufferBindingTexture)
532 if err != nil {
533 return fmt.Errorf("compute: failed to create image atlas: %v", err)
534 }
535 a.tex = handle
536 }
537 for h, img := range uploads {
538 pos, ok := a.positions[h]
539 if !ok {
540 panic("compute: internal error: image not placed")
541 }
542 size := img.Bounds().Size()
543 driver.UploadImage(a.tex, pos, img)
544 rightPadding := image.Pt(padding, size.Y)
545 a.tex.Upload(image.Pt(pos.X+size.X, pos.Y), rightPadding, g.zeros(rightPadding.X*rightPadding.Y*4))
546 bottomPadding := image.Pt(size.X, padding)
547 a.tex.Upload(image.Pt(pos.X, pos.Y+size.Y), bottomPadding, g.zeros(bottomPadding.X*bottomPadding.Y*4))
548 }
549 return nil
550 }
551
552 func pow2Ceil(v int) int {
553 exp := bits.Len(uint(v))
554 if bits.OnesCount(uint(v)) == 1 {
555 exp--
556 }
557 return 1 << exp
558 }
559
560 // materialQuad constructs a quad that represents the transformed image. It returns the quad
561 // and its bounds.
562 func (g *compute) materialQuad(M f32.Affine2D, img imageOpData, uvPos image.Point) ([4]materialVertex, image.Rectangle) {
563 imgSize := layout.FPt(img.src.Bounds().Size())
564 sx, hx, ox, hy, sy, oy := M.Elems()
565 transOff := f32.Pt(ox, oy)
566 // The 4 corners of the image rectangle transformed by M, excluding its offset, are:
567 //
568 // q0: M * (0, 0) q3: M * (w, 0)
569 // q1: M * (0, h) q2: M * (w, h)
570 //
571 // Note that q0 = M*0 = 0, q2 = q1 + q3.
572 q0 := f32.Pt(0, 0)
573 q1 := f32.Pt(hx*imgSize.Y, sy*imgSize.Y)
574 q3 := f32.Pt(sx*imgSize.X, hy*imgSize.X)
575 q2 := q1.Add(q3)
576 q0 = q0.Add(transOff)
577 q1 = q1.Add(transOff)
578 q2 = q2.Add(transOff)
579 q3 = q3.Add(transOff)
580
581 boundsf := f32.Rectangle{
582 Min: min(min(q0, q1), min(q2, q3)),
583 Max: max(max(q0, q1), max(q2, q3)),
584 }
585
586 bounds := boundRectF(boundsf)
587 uvPosf := layout.FPt(uvPos)
588 atlasScale := 1 / float32(g.images.packer.maxDim)
589 uvBounds := f32.Rectangle{
590 Min: uvPosf.Mul(atlasScale),
591 Max: uvPosf.Add(imgSize).Mul(atlasScale),
592 }
593 quad := [4]materialVertex{
594 {posX: q0.X, posY: q0.Y, u: uvBounds.Min.X, v: uvBounds.Min.Y},
595 {posX: q1.X, posY: q1.Y, u: uvBounds.Min.X, v: uvBounds.Max.Y},
596 {posX: q2.X, posY: q2.Y, u: uvBounds.Max.X, v: uvBounds.Max.Y},
597 {posX: q3.X, posY: q3.Y, u: uvBounds.Max.X, v: uvBounds.Min.Y},
598 }
599 return quad, bounds
600 }
601
602 func max(p1, p2 f32.Point) f32.Point {
603 p := p1
604 if p2.X > p.X {
605 p.X = p2.X
606 }
607 if p2.Y > p.Y {
608 p.Y = p2.Y
609 }
610 return p
611 }
612
613 func min(p1, p2 f32.Point) f32.Point {
614 p := p1
615 if p2.X < p.X {
616 p.X = p2.X
617 }
618 if p2.Y < p.Y {
619 p.Y = p2.Y
620 }
621 return p
622 }
623
624 func (g *compute) encodeOps(trans f32.Affine2D, viewport image.Point, ops []imageOp) error {
625 for _, op := range ops {
626 bounds := layout.FRect(op.clip)
627 // clip is the union of all drawing affected by the clipping
628 // operation. TODO: tighten.
629 clip := f32.Rect(0, 0, float32(viewport.X), float32(viewport.Y))
630 nclips := g.encodeClipStack(clip, bounds, op.path, false)
631 m := op.material
632 switch m.material {
633 case materialTexture:
634 t := trans.Mul(m.trans)
635 g.texOps = append(g.texOps, textureOp{
636 sceneIdx: len(g.enc.scene),
637 img: m.data,
638 key: textureKey{
639 transform: t,
640 handle: m.data.handle,
641 },
642 })
643 // Add fill command, its offset is resolved and filled in renderMaterials.
644 g.enc.fillImage(0)
645 case materialColor:
646 g.enc.fillColor(f32color.NRGBAToRGBA(op.material.color.SRGB()))
647 case materialLinearGradient:
648 // TODO: implement.
649 g.enc.fillColor(f32color.NRGBAToRGBA(op.material.color1.SRGB()))
650 default:
651 panic("not implemented")
652 }
653 if op.path != nil && op.path.path {
654 g.enc.fillMode(scene.FillModeNonzero)
655 g.enc.transform(op.path.trans.Invert())
656 }
657 // Pop the clip stack.
658 for i := 0; i < nclips; i++ {
659 g.enc.endClip(clip)
660 }
661 }
662 return nil
663 }
664
665 // encodeClips encodes a stack of clip paths and return the stack depth.
666 func (g *compute) encodeClipStack(clip, bounds f32.Rectangle, p *pathOp, begin bool) int {
667 nclips := 0
668 if p != nil && p.parent != nil {
669 nclips += g.encodeClipStack(clip, bounds, p.parent, true)
670 nclips += 1
671 }
672 isStroke := p.stroke.Width > 0
673 if p != nil && p.path {
674 if isStroke {
675 g.enc.fillMode(scene.FillModeStroke)
676 g.enc.lineWidth(p.stroke.Width)
677 }
678 pathData, _ := g.drawOps.pathCache.get(p.pathKey)
679 g.enc.transform(p.trans)
680 g.enc.append(pathData.computePath)
681 } else {
682 g.enc.rect(bounds)
683 }
684 if begin {
685 g.enc.beginClip(clip)
686 if isStroke {
687 g.enc.fillMode(scene.FillModeNonzero)
688 }
689 if p != nil && p.path {
690 g.enc.transform(p.trans.Invert())
691 }
692 }
693 return nclips
694 }
695
696 func encodePath(verts []byte) encoder {
697 var enc encoder
698 for len(verts) >= scene.CommandSize+4 {
699 cmd := ops.DecodeCommand(verts[4:])
700 enc.scene = append(enc.scene, cmd)
701 enc.npathseg++
702 verts = verts[scene.CommandSize+4:]
703 }
704 return enc
705 }
706
707 func (g *compute) render(tileDims image.Point) error {
708 const (
709 // wgSize is the largest and most common workgroup size.
710 wgSize = 128
711 // PARTITION_SIZE from elements.comp
712 partitionSize = 32 * 4
713 )
714 widthInBins := (tileDims.X + 15) / 16
715 heightInBins := (tileDims.Y + 7) / 8
716 if widthInBins*heightInBins > wgSize {
717 return fmt.Errorf("gpu: output too large (%dx%d)", tileDims.X*tileWidthPx, tileDims.Y*tileHeightPx)
718 }
719
720 // Pad scene with zeroes to avoid reading garbage in elements.comp.
721 scenePadding := partitionSize - len(g.enc.scene)%partitionSize
722 g.enc.scene = append(g.enc.scene, make([]scene.Command, scenePadding)...)
723
724 realloced := false
725 scene := byteslice.Slice(g.enc.scene)
726 if s := len(scene); s > g.buffers.scene.size {
727 realloced = true
728 paddedCap := s * 11 / 10
729 if err := g.buffers.scene.ensureCapacity(g.ctx, paddedCap); err != nil {
730 return err
731 }
732 }
733 g.buffers.scene.buffer.Upload(scene)
734
735 w, h := tileDims.X*tileWidthPx, tileDims.Y*tileHeightPx
736 if g.output.size.X != w || g.output.size.Y != h {
737 if err := g.resizeOutput(image.Pt(w, h)); err != nil {
738 return err
739 }
740 }
741 g.ctx.BindImageTexture(kernel4OutputUnit, g.output.image, driver.AccessWrite, driver.TextureFormatRGBA8)
742 if t := g.materials.tex; t != nil {
743 g.ctx.BindImageTexture(kernel4AtlasUnit, t, driver.AccessRead, driver.TextureFormatRGBA8)
744 }
745
746 // alloc is the number of allocated bytes for static buffers.
747 var alloc uint32
748 round := func(v, quantum int) int {
749 return (v + quantum - 1) &^ (quantum - 1)
750 }
751 malloc := func(size int) memAlloc {
752 size = round(size, 4)
753 offset := alloc
754 alloc += uint32(size)
755 return memAlloc{offset /*, uint32(size)*/}
756 }
757
758 *g.conf = config{
759 n_elements: uint32(g.enc.npath),
760 n_pathseg: uint32(g.enc.npathseg),
761 width_in_tiles: uint32(tileDims.X),
762 height_in_tiles: uint32(tileDims.Y),
763 tile_alloc: malloc(g.enc.npath * pathSize),
764 bin_alloc: malloc(round(g.enc.npath, wgSize) * binSize),
765 ptcl_alloc: malloc(tileDims.X * tileDims.Y * ptclInitialAlloc),
766 pathseg_alloc: malloc(g.enc.npathseg * pathsegSize),
767 anno_alloc: malloc(g.enc.npath * annoSize),
768 trans_alloc: malloc(g.enc.ntrans * transSize),
769 }
770
771 numPartitions := (g.enc.numElements() + 127) / 128
772 // clearSize is the atomic partition counter plus flag and 2 states per partition.
773 clearSize := 4 + numPartitions*stateStride
774 if clearSize > g.buffers.state.size {
775 realloced = true
776 paddedCap := clearSize * 11 / 10
777 if err := g.buffers.state.ensureCapacity(g.ctx, paddedCap); err != nil {
778 return err
779 }
780 }
781
782 g.buffers.config.Upload(byteslice.Struct(g.conf))
783
784 minSize := int(unsafe.Sizeof(memoryHeader{})) + int(alloc)
785 if minSize > g.buffers.memory.size {
786 realloced = true
787 // Add space for dynamic GPU allocations.
788 const sizeBump = 4 * 1024 * 1024
789 minSize += sizeBump
790 if err := g.buffers.memory.ensureCapacity(g.ctx, minSize); err != nil {
791 return err
792 }
793 }
794 for {
795 *g.memHeader = memoryHeader{
796 mem_offset: alloc,
797 }
798 g.buffers.memory.buffer.Upload(byteslice.Struct(g.memHeader))
799 g.buffers.state.buffer.Upload(g.zeros(clearSize))
800
801 if realloced {
802 realloced = false
803 g.bindBuffers()
804 }
805 t := &g.timers
806 g.ctx.MemoryBarrier()
807 t.elements.begin()
808 g.ctx.BindProgram(g.programs.elements)
809 g.ctx.DispatchCompute(numPartitions, 1, 1)
810 g.ctx.MemoryBarrier()
811 t.elements.end()
812 t.tileAlloc.begin()
813 g.ctx.BindProgram(g.programs.tileAlloc)
814 g.ctx.DispatchCompute((g.enc.npath+wgSize-1)/wgSize, 1, 1)
815 g.ctx.MemoryBarrier()
816 t.tileAlloc.end()
817 t.pathCoarse.begin()
818 g.ctx.BindProgram(g.programs.pathCoarse)
819 g.ctx.DispatchCompute((g.enc.npathseg+31)/32, 1, 1)
820 g.ctx.MemoryBarrier()
821 t.pathCoarse.end()
822 t.backdropBinning.begin()
823 g.ctx.BindProgram(g.programs.backdrop)
824 g.ctx.DispatchCompute((g.enc.npath+wgSize-1)/wgSize, 1, 1)
825 // No barrier needed between backdrop and binning.
826 g.ctx.BindProgram(g.programs.binning)
827 g.ctx.DispatchCompute((g.enc.npath+wgSize-1)/wgSize, 1, 1)
828 g.ctx.MemoryBarrier()
829 t.backdropBinning.end()
830 t.coarse.begin()
831 g.ctx.BindProgram(g.programs.coarse)
832 g.ctx.DispatchCompute(widthInBins, heightInBins, 1)
833 g.ctx.MemoryBarrier()
834 t.coarse.end()
835 t.kernel4.begin()
836 g.ctx.BindProgram(g.programs.kernel4)
837 g.ctx.DispatchCompute(tileDims.X, tileDims.Y, 1)
838 g.ctx.MemoryBarrier()
839 t.kernel4.end()
840
841 if err := g.buffers.memory.buffer.Download(byteslice.Struct(g.memHeader)); err != nil {
842 if err == driver.ErrContentLost {
843 continue
844 }
845 return err
846 }
847 switch errCode := g.memHeader.mem_error; errCode {
848 case memNoError:
849 return nil
850 case memMallocFailed:
851 // Resize memory and try again.
852 realloced = true
853 sz := g.buffers.memory.size * 15 / 10
854 if err := g.buffers.memory.ensureCapacity(g.ctx, sz); err != nil {
855 return err
856 }
857 continue
858 default:
859 return fmt.Errorf("compute: shader program failed with error %d", errCode)
860 }
861 }
862 }
863
864 // zeros returns a byte slice with size bytes of zeros.
865 func (g *compute) zeros(size int) []byte {
866 if cap(g.zeroSlice) < size {
867 g.zeroSlice = append(g.zeroSlice, make([]byte, size)...)
868 }
869 return g.zeroSlice[:size]
870 }
871
872 func (g *compute) resizeOutput(size image.Point) error {
873 if g.output.image != nil {
874 g.output.image.Release()
875 g.output.image = nil
876 }
877 img, err := g.ctx.NewTexture(driver.TextureFormatRGBA8, size.X, size.Y,
878 driver.FilterNearest,
879 driver.FilterNearest,
880 driver.BufferBindingShaderStorage|driver.BufferBindingTexture)
881 if err != nil {
882 return err
883 }
884 g.output.image = img
885 g.output.size = size
886 return nil
887 }
888
889 func (g *compute) Release() {
890 if g.drawOps.pathCache != nil {
891 g.drawOps.pathCache.release()
892 }
893 if g.cache != nil {
894 g.cache.release()
895 }
896 progs := []driver.Program{
897 g.programs.elements,
898 g.programs.tileAlloc,
899 g.programs.pathCoarse,
900 g.programs.backdrop,
901 g.programs.binning,
902 g.programs.coarse,
903 g.programs.kernel4,
904 }
905 if p := g.output.blitProg; p != nil {
906 p.Release()
907 }
908 for _, p := range progs {
909 if p != nil {
910 p.Release()
911 }
912 }
913 g.buffers.scene.release()
914 g.buffers.state.release()
915 g.buffers.memory.release()
916 if b := g.buffers.config; b != nil {
917 b.Release()
918 }
919 if g.output.image != nil {
920 g.output.image.Release()
921 }
922 if g.images.tex != nil {
923 g.images.tex.Release()
924 }
925 if g.materials.layout != nil {
926 g.materials.layout.Release()
927 }
928 if g.materials.prog != nil {
929 g.materials.prog.Release()
930 }
931 if g.materials.fbo != nil {
932 g.materials.fbo.Release()
933 }
934 if g.materials.tex != nil {
935 g.materials.tex.Release()
936 }
937 if g.materials.buffer != nil {
938 g.materials.buffer.Release()
939 }
940 if g.timers.t != nil {
941 g.timers.t.release()
942 }
943
944 *g = compute{}
945 }
946
947 func (g *compute) bindBuffers() {
948 bindStorageBuffers(g.programs.elements, g.buffers.memory.buffer, g.buffers.config, g.buffers.scene.buffer, g.buffers.state.buffer)
949 bindStorageBuffers(g.programs.tileAlloc, g.buffers.memory.buffer, g.buffers.config)
950 bindStorageBuffers(g.programs.pathCoarse, g.buffers.memory.buffer, g.buffers.config)
951 bindStorageBuffers(g.programs.backdrop, g.buffers.memory.buffer, g.buffers.config)
952 bindStorageBuffers(g.programs.binning, g.buffers.memory.buffer, g.buffers.config)
953 bindStorageBuffers(g.programs.coarse, g.buffers.memory.buffer, g.buffers.config)
954 bindStorageBuffers(g.programs.kernel4, g.buffers.memory.buffer, g.buffers.config)
955 }
956
957 func (b *sizedBuffer) release() {
958 if b.buffer == nil {
959 return
960 }
961 b.buffer.Release()
962 *b = sizedBuffer{}
963 }
964
965 func (b *sizedBuffer) ensureCapacity(ctx driver.Device, size int) error {
966 if b.size >= size {
967 return nil
968 }
969 if b.buffer != nil {
970 b.release()
971 }
972 buf, err := ctx.NewBuffer(driver.BufferBindingShaderStorage, size)
973 if err != nil {
974 return err
975 }
976 b.buffer = buf
977 b.size = size
978 return nil
979 }
980
981 func bindStorageBuffers(prog driver.Program, buffers ...driver.Buffer) {
982 for i, buf := range buffers {
983 prog.SetStorageBuffer(i, buf)
984 }
985 }
986
987 var bo = binary.LittleEndian
988
989 func (e *encoder) reset() {
990 e.scene = e.scene[:0]
991 e.npath = 0
992 e.npathseg = 0
993 e.ntrans = 0
994 }
995
996 func (e *encoder) numElements() int {
997 return len(e.scene)
998 }
999
1000 func (e *encoder) append(e2 encoder) {
1001 e.scene = append(e.scene, e2.scene...)
1002 e.npath += e2.npath
1003 e.npathseg += e2.npathseg
1004 e.ntrans += e2.ntrans
1005 }
1006
1007 func (e *encoder) transform(m f32.Affine2D) {
1008 e.scene = append(e.scene, scene.Transform(m))
1009 e.ntrans++
1010 }
1011
1012 func (e *encoder) lineWidth(width float32) {
1013 e.scene = append(e.scene, scene.SetLineWidth(width))
1014 }
1015
1016 func (e *encoder) fillMode(mode scene.FillMode) {
1017 e.scene = append(e.scene, scene.SetFillMode(mode))
1018 }
1019
1020 func (e *encoder) beginClip(bbox f32.Rectangle) {
1021 e.scene = append(e.scene, scene.BeginClip(bbox))
1022 e.npath++
1023 }
1024
1025 func (e *encoder) endClip(bbox f32.Rectangle) {
1026 e.scene = append(e.scene, scene.EndClip(bbox))
1027 e.npath++
1028 }
1029
1030 func (e *encoder) rect(r f32.Rectangle) {
1031 // Rectangle corners, clock-wise.
1032 c0, c1, c2, c3 := r.Min, f32.Pt(r.Min.X, r.Max.Y), r.Max, f32.Pt(r.Max.X, r.Min.Y)
1033 e.line(c0, c1)
1034 e.line(c1, c2)
1035 e.line(c2, c3)
1036 e.line(c3, c0)
1037 }
1038
1039 func (e *encoder) fillColor(col color.RGBA) {
1040 e.scene = append(e.scene, scene.FillColor(col))
1041 e.npath++
1042 }
1043
1044 func (e *encoder) setFillImageOffset(index int, offset image.Point) {
1045 x := int16(offset.X)
1046 y := int16(offset.Y)
1047 e.scene[index][2] = uint32(uint16(x)) | uint32(uint16(y))<<16
1048 }
1049
1050 func (e *encoder) fillImage(index int) {
1051 e.scene = append(e.scene, scene.FillImage(index))
1052 e.npath++
1053 }
1054
1055 func (e *encoder) line(start, end f32.Point) {
1056 e.scene = append(e.scene, scene.Line(start, end))
1057 e.npathseg++
1058 }
1059
1060 func (e *encoder) quad(start, ctrl, end f32.Point) {
1061 e.scene = append(e.scene, scene.Quad(start, ctrl, end))
1062 e.npathseg++
1063 }
1064