gpu.go raw

   1  // SPDX-License-Identifier: Unlicense OR MIT
   2  
   3  /*
   4  Package gpu implements the rendering of Gio drawing operations. It
   5  is used by package app and package app/headless and is otherwise not
   6  useful except for integrating with external window implementations.
   7  */
   8  package gpu
   9  
  10  import (
  11  	"encoding/binary"
  12  	"errors"
  13  	"fmt"
  14  	"image"
  15  	"image/color"
  16  	"math"
  17  	"os"
  18  	"reflect"
  19  	"time"
  20  	"unsafe"
  21  
  22  	"github.com/p9c/p9/pkg/gel/gio/f32"
  23  	"github.com/p9c/p9/pkg/gel/gio/gpu/internal/driver"
  24  	"github.com/p9c/p9/pkg/gel/gio/internal/byteslice"
  25  	"github.com/p9c/p9/pkg/gel/gio/internal/f32color"
  26  	"github.com/p9c/p9/pkg/gel/gio/internal/opconst"
  27  	"github.com/p9c/p9/pkg/gel/gio/internal/ops"
  28  	"github.com/p9c/p9/pkg/gel/gio/internal/scene"
  29  	"github.com/p9c/p9/pkg/gel/gio/internal/stroke"
  30  	"github.com/p9c/p9/pkg/gel/gio/layout"
  31  	"github.com/p9c/p9/pkg/gel/gio/op"
  32  	"github.com/p9c/p9/pkg/gel/gio/op/clip"
  33  
  34  	// Register backends.
  35  	_ "github.com/p9c/p9/pkg/gel/gio/gpu/internal/d3d11"
  36  	_ "github.com/p9c/p9/pkg/gel/gio/gpu/internal/opengl"
  37  )
  38  
  39  type GPU interface {
  40  	// Release non-Go resources. The GPU is no longer valid after Release.
  41  	Release()
  42  	// Clear sets the clear color for the next Frame.
  43  	Clear(color color.NRGBA)
  44  	// Collect the graphics operations from frame, given the viewport.
  45  	Collect(viewport image.Point, frame *op.Ops)
  46  	// Frame clears the color buffer and draws the collected operations.
  47  	Frame() error
  48  	// Profile returns the last available profiling information. Profiling
  49  	// information is requested when Collect sees a ProfileOp, and the result
  50  	// is available through Profile at some later time.
  51  	Profile() string
  52  }
  53  
  54  type gpu struct {
  55  	cache *resourceCache
  56  
  57  	profile                                           string
  58  	timers                                            *timers
  59  	frameStart                                        time.Time
  60  	zopsTimer, stencilTimer, coverTimer, cleanupTimer *timer
  61  	drawOps                                           drawOps
  62  	ctx                                               driver.Device
  63  	renderer                                          *renderer
  64  }
  65  
  66  type renderer struct {
  67  	ctx           driver.Device
  68  	blitter       *blitter
  69  	pather        *pather
  70  	packer        packer
  71  	intersections packer
  72  }
  73  
  74  type drawOps struct {
  75  	profile    bool
  76  	reader     ops.Reader
  77  	states     []drawState
  78  	cache      *resourceCache
  79  	vertCache  []byte
  80  	viewport   image.Point
  81  	clear      bool
  82  	clearColor f32color.RGBA
  83  	// allImageOps is the combined list of imageOps and
  84  	// zimageOps, in drawing order.
  85  	allImageOps []imageOp
  86  	imageOps    []imageOp
  87  	// zimageOps are the rectangle clipped opaque images
  88  	// that can use fast front-to-back rendering with z-test
  89  	// and no blending.
  90  	zimageOps   []imageOp
  91  	pathOps     []*pathOp
  92  	pathOpCache []pathOp
  93  	qs          quadSplitter
  94  	pathCache   *opCache
  95  	// hack for the compute renderer to access
  96  	// converted path data.
  97  	compute bool
  98  }
  99  
 100  type drawState struct {
 101  	clip  f32.Rectangle
 102  	t     f32.Affine2D
 103  	cpath *pathOp
 104  	rect  bool
 105  
 106  	matType materialType
 107  	// Current paint.ImageOp
 108  	image imageOpData
 109  	// Current paint.ColorOp, if any.
 110  	color color.NRGBA
 111  
 112  	// Current paint.LinearGradientOp.
 113  	stop1  f32.Point
 114  	stop2  f32.Point
 115  	color1 color.NRGBA
 116  	color2 color.NRGBA
 117  }
 118  
 119  type pathOp struct {
 120  	off f32.Point
 121  	// clip is the union of all
 122  	// later clip rectangles.
 123  	clip      image.Rectangle
 124  	bounds    f32.Rectangle
 125  	pathKey   ops.Key
 126  	path      bool
 127  	pathVerts []byte
 128  	parent    *pathOp
 129  	place     placement
 130  
 131  	// For compute
 132  	trans  f32.Affine2D
 133  	stroke clip.StrokeStyle
 134  }
 135  
 136  type imageOp struct {
 137  	z        float32
 138  	path     *pathOp
 139  	clip     image.Rectangle
 140  	material material
 141  	clipType clipType
 142  	place    placement
 143  }
 144  
 145  func decodeStrokeOp(data []byte) clip.StrokeStyle {
 146  	_ = data[4]
 147  	if opconst.OpType(data[0]) != opconst.TypeStroke {
 148  		panic("invalid op")
 149  	}
 150  	bo := binary.LittleEndian
 151  	return clip.StrokeStyle{
 152  		Width: math.Float32frombits(bo.Uint32(data[1:])),
 153  	}
 154  }
 155  
 156  type quadsOp struct {
 157  	key ops.Key
 158  	aux []byte
 159  }
 160  
 161  type material struct {
 162  	material materialType
 163  	opaque   bool
 164  	// For materialTypeColor.
 165  	color f32color.RGBA
 166  	// For materialTypeLinearGradient.
 167  	color1 f32color.RGBA
 168  	color2 f32color.RGBA
 169  	// For materialTypeTexture.
 170  	data    imageOpData
 171  	uvTrans f32.Affine2D
 172  
 173  	// For the compute backend.
 174  	trans f32.Affine2D
 175  }
 176  
 177  // clipOp is the shadow of clip.Op.
 178  type clipOp struct {
 179  	// TODO: Use image.Rectangle?
 180  	bounds  f32.Rectangle
 181  	outline bool
 182  }
 183  
 184  // imageOpData is the shadow of paint.ImageOp.
 185  type imageOpData struct {
 186  	src    *image.RGBA
 187  	handle interface{}
 188  }
 189  
 190  type linearGradientOpData struct {
 191  	stop1  f32.Point
 192  	color1 color.NRGBA
 193  	stop2  f32.Point
 194  	color2 color.NRGBA
 195  }
 196  
 197  func (op *clipOp) decode(data []byte) {
 198  	if opconst.OpType(data[0]) != opconst.TypeClip {
 199  		panic("invalid op")
 200  	}
 201  	bo := binary.LittleEndian
 202  	r := image.Rectangle{
 203  		Min: image.Point{
 204  			X: int(int32(bo.Uint32(data[1:]))),
 205  			Y: int(int32(bo.Uint32(data[5:]))),
 206  		},
 207  		Max: image.Point{
 208  			X: int(int32(bo.Uint32(data[9:]))),
 209  			Y: int(int32(bo.Uint32(data[13:]))),
 210  		},
 211  	}
 212  	*op = clipOp{
 213  		bounds:  layout.FRect(r),
 214  		outline: data[17] == 1,
 215  	}
 216  }
 217  
 218  func decodeImageOp(data []byte, refs []interface{}) imageOpData {
 219  	if opconst.OpType(data[0]) != opconst.TypeImage {
 220  		panic("invalid op")
 221  	}
 222  	handle := refs[1]
 223  	if handle == nil {
 224  		return imageOpData{}
 225  	}
 226  	return imageOpData{
 227  		src:    refs[0].(*image.RGBA),
 228  		handle: handle,
 229  	}
 230  }
 231  
 232  func decodeColorOp(data []byte) color.NRGBA {
 233  	if opconst.OpType(data[0]) != opconst.TypeColor {
 234  		panic("invalid op")
 235  	}
 236  	return color.NRGBA{
 237  		R: data[1],
 238  		G: data[2],
 239  		B: data[3],
 240  		A: data[4],
 241  	}
 242  }
 243  
 244  func decodeLinearGradientOp(data []byte) linearGradientOpData {
 245  	if opconst.OpType(data[0]) != opconst.TypeLinearGradient {
 246  		panic("invalid op")
 247  	}
 248  	bo := binary.LittleEndian
 249  	return linearGradientOpData{
 250  		stop1: f32.Point{
 251  			X: math.Float32frombits(bo.Uint32(data[1:])),
 252  			Y: math.Float32frombits(bo.Uint32(data[5:])),
 253  		},
 254  		stop2: f32.Point{
 255  			X: math.Float32frombits(bo.Uint32(data[9:])),
 256  			Y: math.Float32frombits(bo.Uint32(data[13:])),
 257  		},
 258  		color1: color.NRGBA{
 259  			R: data[17+0],
 260  			G: data[17+1],
 261  			B: data[17+2],
 262  			A: data[17+3],
 263  		},
 264  		color2: color.NRGBA{
 265  			R: data[21+0],
 266  			G: data[21+1],
 267  			B: data[21+2],
 268  			A: data[21+3],
 269  		},
 270  	}
 271  }
 272  
 273  type clipType uint8
 274  
 275  type resource interface {
 276  	release()
 277  }
 278  
 279  type texture struct {
 280  	src *image.RGBA
 281  	tex driver.Texture
 282  }
 283  
 284  type blitter struct {
 285  	ctx                    driver.Device
 286  	viewport               image.Point
 287  	prog                   [3]*program
 288  	layout                 driver.InputLayout
 289  	colUniforms            *blitColUniforms
 290  	texUniforms            *blitTexUniforms
 291  	linearGradientUniforms *blitLinearGradientUniforms
 292  	quadVerts              driver.Buffer
 293  }
 294  
 295  type blitColUniforms struct {
 296  	vert struct {
 297  		blitUniforms
 298  		_ [12]byte // Padding to a multiple of 16.
 299  	}
 300  	frag struct {
 301  		colorUniforms
 302  	}
 303  }
 304  
 305  type blitTexUniforms struct {
 306  	vert struct {
 307  		blitUniforms
 308  		_ [12]byte // Padding to a multiple of 16.
 309  	}
 310  }
 311  
 312  type blitLinearGradientUniforms struct {
 313  	vert struct {
 314  		blitUniforms
 315  		_ [12]byte // Padding to a multiple of 16.
 316  	}
 317  	frag struct {
 318  		gradientUniforms
 319  	}
 320  }
 321  
 322  type uniformBuffer struct {
 323  	buf driver.Buffer
 324  	ptr []byte
 325  }
 326  
 327  type program struct {
 328  	prog         driver.Program
 329  	vertUniforms *uniformBuffer
 330  	fragUniforms *uniformBuffer
 331  }
 332  
 333  type blitUniforms struct {
 334  	transform     [4]float32
 335  	uvTransformR1 [4]float32
 336  	uvTransformR2 [4]float32
 337  	z             float32
 338  }
 339  
 340  type colorUniforms struct {
 341  	color f32color.RGBA
 342  }
 343  
 344  type gradientUniforms struct {
 345  	color1 f32color.RGBA
 346  	color2 f32color.RGBA
 347  }
 348  
 349  type materialType uint8
 350  
 351  const (
 352  	clipTypeNone clipType = iota
 353  	clipTypePath
 354  	clipTypeIntersection
 355  )
 356  
 357  const (
 358  	materialColor materialType = iota
 359  	materialLinearGradient
 360  	materialTexture
 361  )
 362  
 363  func New(api API) (GPU, error) {
 364  	d, err := driver.NewDevice(api)
 365  	if err != nil {
 366  		return nil, err
 367  	}
 368  	forceCompute := os.Getenv("GIORENDERER") == "forcecompute"
 369  	feats := d.Caps().Features
 370  	switch {
 371  	case !forceCompute && feats.Has(driver.FeatureFloatRenderTargets):
 372  		return newGPU(d)
 373  	case feats.Has(driver.FeatureCompute):
 374  		return newCompute(d)
 375  	default:
 376  		return nil, errors.New("gpu: no support for float render targets nor compute")
 377  	}
 378  }
 379  
 380  func newGPU(ctx driver.Device) (*gpu, error) {
 381  	g := &gpu{
 382  		cache: newResourceCache(),
 383  	}
 384  	g.drawOps.pathCache = newOpCache()
 385  	if err := g.init(ctx); err != nil {
 386  		return nil, err
 387  	}
 388  	return g, nil
 389  }
 390  
 391  func (g *gpu) init(ctx driver.Device) error {
 392  	g.ctx = ctx
 393  	g.renderer = newRenderer(ctx)
 394  	return nil
 395  }
 396  
 397  func (g *gpu) Clear(col color.NRGBA) {
 398  	g.drawOps.clear = true
 399  	g.drawOps.clearColor = f32color.LinearFromSRGB(col)
 400  }
 401  
 402  func (g *gpu) Release() {
 403  	g.renderer.release()
 404  	g.drawOps.pathCache.release()
 405  	g.cache.release()
 406  	if g.timers != nil {
 407  		g.timers.release()
 408  	}
 409  	g.ctx.Release()
 410  }
 411  
 412  func (g *gpu) Collect(viewport image.Point, frameOps *op.Ops) {
 413  	g.renderer.blitter.viewport = viewport
 414  	g.renderer.pather.viewport = viewport
 415  	g.drawOps.reset(g.cache, viewport)
 416  	g.drawOps.collect(g.ctx, g.cache, frameOps, viewport)
 417  	g.frameStart = time.Now()
 418  	if g.drawOps.profile && g.timers == nil && g.ctx.Caps().Features.Has(driver.FeatureTimers) {
 419  		g.timers = newTimers(g.ctx)
 420  		g.zopsTimer = g.timers.newTimer()
 421  		g.stencilTimer = g.timers.newTimer()
 422  		g.coverTimer = g.timers.newTimer()
 423  		g.cleanupTimer = g.timers.newTimer()
 424  	}
 425  }
 426  
 427  func (g *gpu) Frame() error {
 428  	defFBO := g.ctx.BeginFrame()
 429  	defer g.ctx.EndFrame()
 430  	viewport := g.renderer.blitter.viewport
 431  	for _, img := range g.drawOps.imageOps {
 432  		expandPathOp(img.path, img.clip)
 433  	}
 434  	if g.drawOps.profile {
 435  		g.zopsTimer.begin()
 436  	}
 437  	g.ctx.BindFramebuffer(defFBO)
 438  	g.ctx.DepthFunc(driver.DepthFuncGreater)
 439  	// Note that Clear must be before ClearDepth if nothing else is rendered
 440  	// (len(zimageOps) == 0). If not, the Fairphone 2 will corrupt the depth buffer.
 441  	if g.drawOps.clear {
 442  		g.drawOps.clear = false
 443  		g.ctx.Clear(g.drawOps.clearColor.Float32())
 444  	}
 445  	g.ctx.ClearDepth(0.0)
 446  	g.ctx.Viewport(0, 0, viewport.X, viewport.Y)
 447  	g.renderer.drawZOps(g.cache, g.drawOps.zimageOps)
 448  	g.zopsTimer.end()
 449  	g.stencilTimer.begin()
 450  	g.ctx.SetBlend(true)
 451  	g.renderer.packStencils(&g.drawOps.pathOps)
 452  	g.renderer.stencilClips(g.drawOps.pathCache, g.drawOps.pathOps)
 453  	g.renderer.packIntersections(g.drawOps.imageOps)
 454  	g.renderer.intersect(g.drawOps.imageOps)
 455  	g.stencilTimer.end()
 456  	g.coverTimer.begin()
 457  	g.ctx.BindFramebuffer(defFBO)
 458  	g.ctx.Viewport(0, 0, viewport.X, viewport.Y)
 459  	g.renderer.drawOps(g.cache, g.drawOps.imageOps)
 460  	g.ctx.SetBlend(false)
 461  	g.renderer.pather.stenciler.invalidateFBO()
 462  	g.coverTimer.end()
 463  	g.ctx.BindFramebuffer(defFBO)
 464  	g.cleanupTimer.begin()
 465  	g.cache.frame()
 466  	g.drawOps.pathCache.frame()
 467  	g.cleanupTimer.end()
 468  	if g.drawOps.profile && g.timers.ready() {
 469  		zt, st, covt, cleant := g.zopsTimer.Elapsed, g.stencilTimer.Elapsed, g.coverTimer.Elapsed, g.cleanupTimer.Elapsed
 470  		ft := zt + st + covt + cleant
 471  		q := 100 * time.Microsecond
 472  		zt, st, covt = zt.Round(q), st.Round(q), covt.Round(q)
 473  		frameDur := time.Since(g.frameStart).Round(q)
 474  		ft = ft.Round(q)
 475  		g.profile = fmt.Sprintf("draw:%7s gpu:%7s zt:%7s st:%7s cov:%7s", frameDur, ft, zt, st, covt)
 476  	}
 477  	return nil
 478  }
 479  
 480  func (g *gpu) Profile() string {
 481  	return g.profile
 482  }
 483  
 484  func (r *renderer) texHandle(cache *resourceCache, data imageOpData) driver.Texture {
 485  	var tex *texture
 486  	t, exists := cache.get(data.handle)
 487  	if !exists {
 488  		t = &texture{
 489  			src: data.src,
 490  		}
 491  		cache.put(data.handle, t)
 492  	}
 493  	tex = t.(*texture)
 494  	if tex.tex != nil {
 495  		return tex.tex
 496  	}
 497  	handle, err := r.ctx.NewTexture(driver.TextureFormatSRGB, data.src.Bounds().Dx(), data.src.Bounds().Dy(), driver.FilterLinear, driver.FilterLinear, driver.BufferBindingTexture)
 498  	if err != nil {
 499  		panic(err)
 500  	}
 501  	driver.UploadImage(handle, image.Pt(0, 0), data.src)
 502  	tex.tex = handle
 503  	return tex.tex
 504  }
 505  
 506  func (t *texture) release() {
 507  	if t.tex != nil {
 508  		t.tex.Release()
 509  	}
 510  }
 511  
 512  func newRenderer(ctx driver.Device) *renderer {
 513  	r := &renderer{
 514  		ctx:     ctx,
 515  		blitter: newBlitter(ctx),
 516  		pather:  newPather(ctx),
 517  	}
 518  
 519  	maxDim := ctx.Caps().MaxTextureSize
 520  	// Large atlas textures cause artifacts due to precision loss in
 521  	// shaders.
 522  	if cap := 8192; maxDim > cap {
 523  		maxDim = cap
 524  	}
 525  
 526  	r.packer.maxDim = maxDim
 527  	r.intersections.maxDim = maxDim
 528  	return r
 529  }
 530  
 531  func (r *renderer) release() {
 532  	r.pather.release()
 533  	r.blitter.release()
 534  }
 535  
 536  func newBlitter(ctx driver.Device) *blitter {
 537  	quadVerts, err := ctx.NewImmutableBuffer(driver.BufferBindingVertices,
 538  		byteslice.Slice([]float32{
 539  			-1, +1, 0, 0,
 540  			+1, +1, 1, 0,
 541  			-1, -1, 0, 1,
 542  			+1, -1, 1, 1,
 543  		}),
 544  	)
 545  	if err != nil {
 546  		panic(err)
 547  	}
 548  	b := &blitter{
 549  		ctx:       ctx,
 550  		quadVerts: quadVerts,
 551  	}
 552  	b.colUniforms = new(blitColUniforms)
 553  	b.texUniforms = new(blitTexUniforms)
 554  	b.linearGradientUniforms = new(blitLinearGradientUniforms)
 555  	prog, layout, err := createColorPrograms(ctx, shader_blit_vert, shader_blit_frag,
 556  		[3]interface{}{&b.colUniforms.vert, &b.linearGradientUniforms.vert, &b.texUniforms.vert},
 557  		[3]interface{}{&b.colUniforms.frag, &b.linearGradientUniforms.frag, nil},
 558  	)
 559  	if err != nil {
 560  		panic(err)
 561  	}
 562  	b.prog = prog
 563  	b.layout = layout
 564  	return b
 565  }
 566  
 567  func (b *blitter) release() {
 568  	b.quadVerts.Release()
 569  	for _, p := range b.prog {
 570  		p.Release()
 571  	}
 572  	b.layout.Release()
 573  }
 574  
 575  func createColorPrograms(b driver.Device, vsSrc driver.ShaderSources, fsSrc [3]driver.ShaderSources, vertUniforms, fragUniforms [3]interface{}) ([3]*program, driver.InputLayout, error) {
 576  	var progs [3]*program
 577  	{
 578  		prog, err := b.NewProgram(vsSrc, fsSrc[materialTexture])
 579  		if err != nil {
 580  			return progs, nil, err
 581  		}
 582  		var vertBuffer, fragBuffer *uniformBuffer
 583  		if u := vertUniforms[materialTexture]; u != nil {
 584  			vertBuffer = newUniformBuffer(b, u)
 585  			prog.SetVertexUniforms(vertBuffer.buf)
 586  		}
 587  		if u := fragUniforms[materialTexture]; u != nil {
 588  			fragBuffer = newUniformBuffer(b, u)
 589  			prog.SetFragmentUniforms(fragBuffer.buf)
 590  		}
 591  		progs[materialTexture] = newProgram(prog, vertBuffer, fragBuffer)
 592  	}
 593  	{
 594  		var vertBuffer, fragBuffer *uniformBuffer
 595  		prog, err := b.NewProgram(vsSrc, fsSrc[materialColor])
 596  		if err != nil {
 597  			progs[materialTexture].Release()
 598  			return progs, nil, err
 599  		}
 600  		if u := vertUniforms[materialColor]; u != nil {
 601  			vertBuffer = newUniformBuffer(b, u)
 602  			prog.SetVertexUniforms(vertBuffer.buf)
 603  		}
 604  		if u := fragUniforms[materialColor]; u != nil {
 605  			fragBuffer = newUniformBuffer(b, u)
 606  			prog.SetFragmentUniforms(fragBuffer.buf)
 607  		}
 608  		progs[materialColor] = newProgram(prog, vertBuffer, fragBuffer)
 609  	}
 610  	{
 611  		var vertBuffer, fragBuffer *uniformBuffer
 612  		prog, err := b.NewProgram(vsSrc, fsSrc[materialLinearGradient])
 613  		if err != nil {
 614  			progs[materialTexture].Release()
 615  			progs[materialColor].Release()
 616  			return progs, nil, err
 617  		}
 618  		if u := vertUniforms[materialLinearGradient]; u != nil {
 619  			vertBuffer = newUniformBuffer(b, u)
 620  			prog.SetVertexUniforms(vertBuffer.buf)
 621  		}
 622  		if u := fragUniforms[materialLinearGradient]; u != nil {
 623  			fragBuffer = newUniformBuffer(b, u)
 624  			prog.SetFragmentUniforms(fragBuffer.buf)
 625  		}
 626  		progs[materialLinearGradient] = newProgram(prog, vertBuffer, fragBuffer)
 627  	}
 628  	layout, err := b.NewInputLayout(vsSrc, []driver.InputDesc{
 629  		{Type: driver.DataTypeFloat, Size: 2, Offset: 0},
 630  		{Type: driver.DataTypeFloat, Size: 2, Offset: 4 * 2},
 631  	})
 632  	if err != nil {
 633  		progs[materialTexture].Release()
 634  		progs[materialColor].Release()
 635  		progs[materialLinearGradient].Release()
 636  		return progs, nil, err
 637  	}
 638  	return progs, layout, nil
 639  }
 640  
 641  func (r *renderer) stencilClips(pathCache *opCache, ops []*pathOp) {
 642  	if len(r.packer.sizes) == 0 {
 643  		return
 644  	}
 645  	fbo := -1
 646  	r.pather.begin(r.packer.sizes)
 647  	for _, p := range ops {
 648  		if fbo != p.place.Idx {
 649  			fbo = p.place.Idx
 650  			f := r.pather.stenciler.cover(fbo)
 651  			r.ctx.BindFramebuffer(f.fbo)
 652  			r.ctx.Clear(0.0, 0.0, 0.0, 0.0)
 653  		}
 654  		v, _ := pathCache.get(p.pathKey)
 655  		r.pather.stencilPath(p.clip, p.off, p.place.Pos, v.data)
 656  	}
 657  }
 658  
 659  func (r *renderer) intersect(ops []imageOp) {
 660  	if len(r.intersections.sizes) == 0 {
 661  		return
 662  	}
 663  	fbo := -1
 664  	r.pather.stenciler.beginIntersect(r.intersections.sizes)
 665  	r.ctx.BindVertexBuffer(r.blitter.quadVerts, 4*4, 0)
 666  	r.ctx.BindInputLayout(r.pather.stenciler.iprog.layout)
 667  	for _, img := range ops {
 668  		if img.clipType != clipTypeIntersection {
 669  			continue
 670  		}
 671  		if fbo != img.place.Idx {
 672  			fbo = img.place.Idx
 673  			f := r.pather.stenciler.intersections.fbos[fbo]
 674  			r.ctx.BindFramebuffer(f.fbo)
 675  			r.ctx.Clear(1.0, 0.0, 0.0, 0.0)
 676  		}
 677  		r.ctx.Viewport(img.place.Pos.X, img.place.Pos.Y, img.clip.Dx(), img.clip.Dy())
 678  		r.intersectPath(img.path, img.clip)
 679  	}
 680  }
 681  
 682  func (r *renderer) intersectPath(p *pathOp, clip image.Rectangle) {
 683  	if p.parent != nil {
 684  		r.intersectPath(p.parent, clip)
 685  	}
 686  	if !p.path {
 687  		return
 688  	}
 689  	uv := image.Rectangle{
 690  		Min: p.place.Pos,
 691  		Max: p.place.Pos.Add(p.clip.Size()),
 692  	}
 693  	o := clip.Min.Sub(p.clip.Min)
 694  	sub := image.Rectangle{
 695  		Min: o,
 696  		Max: o.Add(clip.Size()),
 697  	}
 698  	fbo := r.pather.stenciler.cover(p.place.Idx)
 699  	r.ctx.BindTexture(0, fbo.tex)
 700  	coverScale, coverOff := texSpaceTransform(layout.FRect(uv), fbo.size)
 701  	subScale, subOff := texSpaceTransform(layout.FRect(sub), p.clip.Size())
 702  	r.pather.stenciler.iprog.uniforms.vert.uvTransform = [4]float32{coverScale.X, coverScale.Y, coverOff.X, coverOff.Y}
 703  	r.pather.stenciler.iprog.uniforms.vert.subUVTransform = [4]float32{subScale.X, subScale.Y, subOff.X, subOff.Y}
 704  	r.pather.stenciler.iprog.prog.UploadUniforms()
 705  	r.ctx.DrawArrays(driver.DrawModeTriangleStrip, 0, 4)
 706  }
 707  
 708  func (r *renderer) packIntersections(ops []imageOp) {
 709  	r.intersections.clear()
 710  	for i, img := range ops {
 711  		var npaths int
 712  		var onePath *pathOp
 713  		for p := img.path; p != nil; p = p.parent {
 714  			if p.path {
 715  				onePath = p
 716  				npaths++
 717  			}
 718  		}
 719  		switch npaths {
 720  		case 0:
 721  		case 1:
 722  			place := onePath.place
 723  			place.Pos = place.Pos.Sub(onePath.clip.Min).Add(img.clip.Min)
 724  			ops[i].place = place
 725  			ops[i].clipType = clipTypePath
 726  		default:
 727  			sz := image.Point{X: img.clip.Dx(), Y: img.clip.Dy()}
 728  			place, ok := r.intersections.add(sz)
 729  			if !ok {
 730  				panic("internal error: if the intersection fit, the intersection should fit as well")
 731  			}
 732  			ops[i].clipType = clipTypeIntersection
 733  			ops[i].place = place
 734  		}
 735  	}
 736  }
 737  
 738  func (r *renderer) packStencils(pops *[]*pathOp) {
 739  	r.packer.clear()
 740  	ops := *pops
 741  	// Allocate atlas space for cover textures.
 742  	var i int
 743  	for i < len(ops) {
 744  		p := ops[i]
 745  		if p.clip.Empty() {
 746  			ops[i] = ops[len(ops)-1]
 747  			ops = ops[:len(ops)-1]
 748  			continue
 749  		}
 750  		sz := image.Point{X: p.clip.Dx(), Y: p.clip.Dy()}
 751  		place, ok := r.packer.add(sz)
 752  		if !ok {
 753  			// The clip area is at most the entire screen. Hopefully no
 754  			// screen is larger than GL_MAX_TEXTURE_SIZE.
 755  			panic(fmt.Errorf("clip area %v is larger than maximum texture size %dx%d", p.clip, r.packer.maxDim, r.packer.maxDim))
 756  		}
 757  		p.place = place
 758  		i++
 759  	}
 760  	*pops = ops
 761  }
 762  
 763  // intersects intersects clip and b where b is offset by off.
 764  // ceilRect returns a bounding image.Rectangle for a f32.Rectangle.
 765  func boundRectF(r f32.Rectangle) image.Rectangle {
 766  	return image.Rectangle{
 767  		Min: image.Point{
 768  			X: int(floor(r.Min.X)),
 769  			Y: int(floor(r.Min.Y)),
 770  		},
 771  		Max: image.Point{
 772  			X: int(ceil(r.Max.X)),
 773  			Y: int(ceil(r.Max.Y)),
 774  		},
 775  	}
 776  }
 777  
 778  func ceil(v float32) int {
 779  	return int(math.Ceil(float64(v)))
 780  }
 781  
 782  func floor(v float32) int {
 783  	return int(math.Floor(float64(v)))
 784  }
 785  
 786  func (d *drawOps) reset(cache *resourceCache, viewport image.Point) {
 787  	d.profile = false
 788  	d.cache = cache
 789  	d.viewport = viewport
 790  	d.imageOps = d.imageOps[:0]
 791  	d.allImageOps = d.allImageOps[:0]
 792  	d.zimageOps = d.zimageOps[:0]
 793  	d.pathOps = d.pathOps[:0]
 794  	d.pathOpCache = d.pathOpCache[:0]
 795  	d.vertCache = d.vertCache[:0]
 796  }
 797  
 798  func (d *drawOps) collect(ctx driver.Device, cache *resourceCache, root *op.Ops, viewport image.Point) {
 799  	clip := f32.Rectangle{
 800  		Max: f32.Point{X: float32(viewport.X), Y: float32(viewport.Y)},
 801  	}
 802  	d.reader.Reset(root)
 803  	state := drawState{
 804  		clip:  clip,
 805  		rect:  true,
 806  		color: color.NRGBA{A: 0xff},
 807  	}
 808  	d.collectOps(&d.reader, state)
 809  	for _, p := range d.pathOps {
 810  		if v, exists := d.pathCache.get(p.pathKey); !exists || v.data.data == nil {
 811  			data := buildPath(ctx, p.pathVerts)
 812  			var computePath encoder
 813  			if d.compute {
 814  				computePath = encodePath(p.pathVerts)
 815  			}
 816  			d.pathCache.put(p.pathKey, opCacheValue{
 817  				data:        data,
 818  				bounds:      p.bounds,
 819  				computePath: computePath,
 820  			})
 821  		}
 822  		p.pathVerts = nil
 823  	}
 824  }
 825  
 826  func (d *drawOps) newPathOp() *pathOp {
 827  	d.pathOpCache = append(d.pathOpCache, pathOp{})
 828  	return &d.pathOpCache[len(d.pathOpCache)-1]
 829  }
 830  
 831  func (d *drawOps) addClipPath(state *drawState, aux []byte, auxKey ops.Key, bounds f32.Rectangle, off f32.Point, tr f32.Affine2D, stroke clip.StrokeStyle) {
 832  	npath := d.newPathOp()
 833  	*npath = pathOp{
 834  		parent: state.cpath,
 835  		bounds: bounds,
 836  		off:    off,
 837  		trans:  tr,
 838  		stroke: stroke,
 839  	}
 840  	state.cpath = npath
 841  	if len(aux) > 0 {
 842  		state.rect = false
 843  		state.cpath.pathKey = auxKey
 844  		state.cpath.path = true
 845  		state.cpath.pathVerts = aux
 846  		d.pathOps = append(d.pathOps, state.cpath)
 847  	}
 848  }
 849  
 850  // split a transform into two parts, one which is pur offset and the
 851  // other representing the scaling, shearing and rotation part
 852  func splitTransform(t f32.Affine2D) (srs f32.Affine2D, offset f32.Point) {
 853  	sx, hx, ox, hy, sy, oy := t.Elems()
 854  	offset = f32.Point{X: ox, Y: oy}
 855  	srs = f32.NewAffine2D(sx, hx, 0, hy, sy, 0)
 856  	return
 857  }
 858  
 859  func (d *drawOps) save(id int, state drawState) {
 860  	if extra := id - len(d.states) + 1; extra > 0 {
 861  		d.states = append(d.states, make([]drawState, extra)...)
 862  	}
 863  	d.states[id] = state
 864  }
 865  
 866  func (d *drawOps) collectOps(r *ops.Reader, state drawState) {
 867  	var (
 868  		quads quadsOp
 869  		str   clip.StrokeStyle
 870  		z     int
 871  	)
 872  	d.save(opconst.InitialStateID, state)
 873  loop:
 874  	for encOp, ok := r.Decode(); ok; encOp, ok = r.Decode() {
 875  		switch opconst.OpType(encOp.Data[0]) {
 876  		case opconst.TypeProfile:
 877  			d.profile = true
 878  		case opconst.TypeTransform:
 879  			dop := ops.DecodeTransform(encOp.Data)
 880  			state.t = state.t.Mul(dop)
 881  
 882  		case opconst.TypeStroke:
 883  			str = decodeStrokeOp(encOp.Data)
 884  
 885  		case opconst.TypePath:
 886  			encOp, ok = r.Decode()
 887  			if !ok {
 888  				break loop
 889  			}
 890  			quads.aux = encOp.Data[opconst.TypeAuxLen:]
 891  			quads.key = encOp.Key
 892  
 893  		case opconst.TypeClip:
 894  			var op clipOp
 895  			op.decode(encOp.Data)
 896  			bounds := op.bounds
 897  			trans, off := splitTransform(state.t)
 898  			if len(quads.aux) > 0 {
 899  				// There is a clipping path, build the gpu data and update the
 900  				// cache key such that it will be equal only if the transform is the
 901  				// same also. Use cached data if we have it.
 902  				quads.key = quads.key.SetTransform(trans)
 903  				if v, ok := d.pathCache.get(quads.key); ok {
 904  					// Since the GPU data exists in the cache aux will not be used.
 905  					// Why is this not used for the offset shapes?
 906  					op.bounds = v.bounds
 907  				} else {
 908  					pathData, bounds := d.buildVerts(
 909  						quads.aux, trans, op.outline, str,
 910  					)
 911  					op.bounds = bounds
 912  					if !d.compute {
 913  						quads.aux = pathData
 914  					}
 915  					// add it to the cache, without GPU data, so the transform can be
 916  					// reused.
 917  					d.pathCache.put(quads.key, opCacheValue{bounds: op.bounds})
 918  				}
 919  			} else {
 920  				quads.aux, op.bounds, _ = d.boundsForTransformedRect(bounds, trans)
 921  				quads.key = encOp.Key
 922  				quads.key.SetTransform(trans)
 923  			}
 924  			state.clip = state.clip.Intersect(op.bounds.Add(off))
 925  			d.addClipPath(&state, quads.aux, quads.key, op.bounds, off, state.t, str)
 926  			quads = quadsOp{}
 927  			str = clip.StrokeStyle{}
 928  
 929  		case opconst.TypeColor:
 930  			state.matType = materialColor
 931  			state.color = decodeColorOp(encOp.Data)
 932  		case opconst.TypeLinearGradient:
 933  			state.matType = materialLinearGradient
 934  			op := decodeLinearGradientOp(encOp.Data)
 935  			state.stop1 = op.stop1
 936  			state.stop2 = op.stop2
 937  			state.color1 = op.color1
 938  			state.color2 = op.color2
 939  		case opconst.TypeImage:
 940  			state.matType = materialTexture
 941  			state.image = decodeImageOp(encOp.Data, encOp.Refs)
 942  		case opconst.TypePaint:
 943  			// Transform (if needed) the painting rectangle and if so generate a clip path,
 944  			// for those cases also compute a partialTrans that maps texture coordinates between
 945  			// the new bounding rectangle and the transformed original paint rectangle.
 946  			trans, off := splitTransform(state.t)
 947  			// Fill the clip area, unless the material is a (bounded) image.
 948  			// TODO: Find a tighter bound.
 949  			inf := float32(1e6)
 950  			dst := f32.Rect(-inf, -inf, inf, inf)
 951  			if state.matType == materialTexture {
 952  				dst = layout.FRect(state.image.src.Rect)
 953  			}
 954  			clipData, bnd, partialTrans := d.boundsForTransformedRect(dst, trans)
 955  			cl := state.clip.Intersect(bnd.Add(off))
 956  			if cl.Empty() {
 957  				continue
 958  			}
 959  
 960  			wasrect := state.rect
 961  			if clipData != nil {
 962  				// The paint operation is sheared or rotated, add a clip path representing
 963  				// this transformed rectangle.
 964  				encOp.Key.SetTransform(trans)
 965  				d.addClipPath(&state, clipData, encOp.Key, bnd, off, state.t, clip.StrokeStyle{})
 966  			}
 967  
 968  			bounds := boundRectF(cl)
 969  			mat := state.materialFor(bnd, off, partialTrans, bounds, state.t)
 970  
 971  			if bounds.Min == (image.Point{}) && bounds.Max == d.viewport && state.rect && mat.opaque && (mat.material == materialColor) {
 972  				// The image is a uniform opaque color and takes up the whole screen.
 973  				// Scrap images up to and including this image and set clear color.
 974  				d.allImageOps = d.allImageOps[:0]
 975  				d.zimageOps = d.zimageOps[:0]
 976  				d.imageOps = d.imageOps[:0]
 977  				z = 0
 978  				d.clearColor = mat.color.Opaque()
 979  				d.clear = true
 980  				continue
 981  			}
 982  			z++
 983  			if z != int(uint16(z)) {
 984  				// TODO(eliasnaur) github.com/p9c/p9/pkg/gel/gio/issue/127.
 985  				panic("more than 65k paint objects not supported")
 986  			}
 987  			// Assume 16-bit depth buffer.
 988  			const zdepth = 1 << 16
 989  			// Convert z to window-space, assuming depth range [0;1].
 990  			zf := float32(z)*2/zdepth - 1.0
 991  			img := imageOp{
 992  				z:        zf,
 993  				path:     state.cpath,
 994  				clip:     bounds,
 995  				material: mat,
 996  			}
 997  
 998  			d.allImageOps = append(d.allImageOps, img)
 999  			if state.rect && img.material.opaque {
1000  				d.zimageOps = append(d.zimageOps, img)
1001  			} else {
1002  				d.imageOps = append(d.imageOps, img)
1003  			}
1004  			if clipData != nil {
1005  				// we added a clip path that should not remain
1006  				state.cpath = state.cpath.parent
1007  				state.rect = wasrect
1008  			}
1009  		case opconst.TypeSave:
1010  			id := ops.DecodeSave(encOp.Data)
1011  			d.save(id, state)
1012  		case opconst.TypeLoad:
1013  			id, mask := ops.DecodeLoad(encOp.Data)
1014  			s := d.states[id]
1015  			if mask&opconst.TransformState != 0 {
1016  				state.t = s.t
1017  			}
1018  			if mask&^opconst.TransformState != 0 {
1019  				state = s
1020  			}
1021  		}
1022  	}
1023  }
1024  
1025  func expandPathOp(p *pathOp, clip image.Rectangle) {
1026  	for p != nil {
1027  		pclip := p.clip
1028  		if !pclip.Empty() {
1029  			clip = clip.Union(pclip)
1030  		}
1031  		p.clip = clip
1032  		p = p.parent
1033  	}
1034  }
1035  
1036  func (d *drawState) materialFor(rect f32.Rectangle, off f32.Point, partTrans f32.Affine2D, clip image.Rectangle, trans f32.Affine2D) material {
1037  	var m material
1038  	switch d.matType {
1039  	case materialColor:
1040  		m.material = materialColor
1041  		m.color = f32color.LinearFromSRGB(d.color)
1042  		m.opaque = m.color.A == 1.0
1043  	case materialLinearGradient:
1044  		m.material = materialLinearGradient
1045  
1046  		m.color1 = f32color.LinearFromSRGB(d.color1)
1047  		m.color2 = f32color.LinearFromSRGB(d.color2)
1048  		m.opaque = m.color1.A == 1.0 && m.color2.A == 1.0
1049  
1050  		m.uvTrans = partTrans.Mul(gradientSpaceTransform(clip, off, d.stop1, d.stop2))
1051  	case materialTexture:
1052  		m.material = materialTexture
1053  		dr := boundRectF(rect.Add(off))
1054  		sz := d.image.src.Bounds().Size()
1055  		sr := f32.Rectangle{
1056  			Max: f32.Point{
1057  				X: float32(sz.X),
1058  				Y: float32(sz.Y),
1059  			},
1060  		}
1061  		dx := float32(dr.Dx())
1062  		sdx := sr.Dx()
1063  		sr.Min.X += float32(clip.Min.X-dr.Min.X) * sdx / dx
1064  		sr.Max.X -= float32(dr.Max.X-clip.Max.X) * sdx / dx
1065  		dy := float32(dr.Dy())
1066  		sdy := sr.Dy()
1067  		sr.Min.Y += float32(clip.Min.Y-dr.Min.Y) * sdy / dy
1068  		sr.Max.Y -= float32(dr.Max.Y-clip.Max.Y) * sdy / dy
1069  		uvScale, uvOffset := texSpaceTransform(sr, sz)
1070  		m.uvTrans = partTrans.Mul(f32.Affine2D{}.Scale(f32.Point{}, uvScale).Offset(uvOffset))
1071  		m.trans = trans
1072  		m.data = d.image
1073  	}
1074  	return m
1075  }
1076  
1077  func (r *renderer) drawZOps(cache *resourceCache, ops []imageOp) {
1078  	r.ctx.SetDepthTest(true)
1079  	r.ctx.BindVertexBuffer(r.blitter.quadVerts, 4*4, 0)
1080  	r.ctx.BindInputLayout(r.blitter.layout)
1081  	// Render front to back.
1082  	for i := len(ops) - 1; i >= 0; i-- {
1083  		img := ops[i]
1084  		m := img.material
1085  		switch m.material {
1086  		case materialTexture:
1087  			r.ctx.BindTexture(0, r.texHandle(cache, m.data))
1088  		}
1089  		drc := img.clip
1090  		scale, off := clipSpaceTransform(drc, r.blitter.viewport)
1091  		r.blitter.blit(img.z, m.material, m.color, m.color1, m.color2, scale, off, m.uvTrans)
1092  	}
1093  	r.ctx.SetDepthTest(false)
1094  }
1095  
1096  func (r *renderer) drawOps(cache *resourceCache, ops []imageOp) {
1097  	r.ctx.SetDepthTest(true)
1098  	r.ctx.DepthMask(false)
1099  	r.ctx.BlendFunc(driver.BlendFactorOne, driver.BlendFactorOneMinusSrcAlpha)
1100  	r.ctx.BindVertexBuffer(r.blitter.quadVerts, 4*4, 0)
1101  	r.ctx.BindInputLayout(r.pather.coverer.layout)
1102  	var coverTex driver.Texture
1103  	for _, img := range ops {
1104  		m := img.material
1105  		switch m.material {
1106  		case materialTexture:
1107  			r.ctx.BindTexture(0, r.texHandle(cache, m.data))
1108  		}
1109  		drc := img.clip
1110  
1111  		scale, off := clipSpaceTransform(drc, r.blitter.viewport)
1112  		var fbo stencilFBO
1113  		switch img.clipType {
1114  		case clipTypeNone:
1115  			r.blitter.blit(img.z, m.material, m.color, m.color1, m.color2, scale, off, m.uvTrans)
1116  			continue
1117  		case clipTypePath:
1118  			fbo = r.pather.stenciler.cover(img.place.Idx)
1119  		case clipTypeIntersection:
1120  			fbo = r.pather.stenciler.intersections.fbos[img.place.Idx]
1121  		}
1122  		if coverTex != fbo.tex {
1123  			coverTex = fbo.tex
1124  			r.ctx.BindTexture(1, coverTex)
1125  		}
1126  		uv := image.Rectangle{
1127  			Min: img.place.Pos,
1128  			Max: img.place.Pos.Add(drc.Size()),
1129  		}
1130  		coverScale, coverOff := texSpaceTransform(layout.FRect(uv), fbo.size)
1131  		r.pather.cover(img.z, m.material, m.color, m.color1, m.color2, scale, off, m.uvTrans, coverScale, coverOff)
1132  	}
1133  	r.ctx.DepthMask(true)
1134  	r.ctx.SetDepthTest(false)
1135  }
1136  
1137  func (b *blitter) blit(z float32, mat materialType, col f32color.RGBA, col1, col2 f32color.RGBA, scale, off f32.Point, uvTrans f32.Affine2D) {
1138  	p := b.prog[mat]
1139  	b.ctx.BindProgram(p.prog)
1140  	var uniforms *blitUniforms
1141  	switch mat {
1142  	case materialColor:
1143  		b.colUniforms.frag.color = col
1144  		uniforms = &b.colUniforms.vert.blitUniforms
1145  	case materialTexture:
1146  		t1, t2, t3, t4, t5, t6 := uvTrans.Elems()
1147  		b.texUniforms.vert.blitUniforms.uvTransformR1 = [4]float32{t1, t2, t3, 0}
1148  		b.texUniforms.vert.blitUniforms.uvTransformR2 = [4]float32{t4, t5, t6, 0}
1149  		uniforms = &b.texUniforms.vert.blitUniforms
1150  	case materialLinearGradient:
1151  		b.linearGradientUniforms.frag.color1 = col1
1152  		b.linearGradientUniforms.frag.color2 = col2
1153  
1154  		t1, t2, t3, t4, t5, t6 := uvTrans.Elems()
1155  		b.linearGradientUniforms.vert.blitUniforms.uvTransformR1 = [4]float32{t1, t2, t3, 0}
1156  		b.linearGradientUniforms.vert.blitUniforms.uvTransformR2 = [4]float32{t4, t5, t6, 0}
1157  		uniforms = &b.linearGradientUniforms.vert.blitUniforms
1158  	}
1159  	uniforms.z = z
1160  	uniforms.transform = [4]float32{scale.X, scale.Y, off.X, off.Y}
1161  	p.UploadUniforms()
1162  	b.ctx.DrawArrays(driver.DrawModeTriangleStrip, 0, 4)
1163  }
1164  
1165  // newUniformBuffer creates a new GPU uniform buffer backed by the
1166  // structure uniformBlock points to.
1167  func newUniformBuffer(b driver.Device, uniformBlock interface{}) *uniformBuffer {
1168  	ref := reflect.ValueOf(uniformBlock)
1169  	// Determine the size of the uniforms structure, *uniforms.
1170  	size := ref.Elem().Type().Size()
1171  	// Map the uniforms structure as a byte slice.
1172  	ptr := (*[1 << 30]byte)(unsafe.Pointer(ref.Pointer()))[:size:size]
1173  	ubuf, err := b.NewBuffer(driver.BufferBindingUniforms, len(ptr))
1174  	if err != nil {
1175  		panic(err)
1176  	}
1177  	return &uniformBuffer{buf: ubuf, ptr: ptr}
1178  }
1179  
1180  func (u *uniformBuffer) Upload() {
1181  	u.buf.Upload(u.ptr)
1182  }
1183  
1184  func (u *uniformBuffer) Release() {
1185  	u.buf.Release()
1186  	u.buf = nil
1187  }
1188  
1189  func newProgram(prog driver.Program, vertUniforms, fragUniforms *uniformBuffer) *program {
1190  	if vertUniforms != nil {
1191  		prog.SetVertexUniforms(vertUniforms.buf)
1192  	}
1193  	if fragUniforms != nil {
1194  		prog.SetFragmentUniforms(fragUniforms.buf)
1195  	}
1196  	return &program{prog: prog, vertUniforms: vertUniforms, fragUniforms: fragUniforms}
1197  }
1198  
1199  func (p *program) UploadUniforms() {
1200  	if p.vertUniforms != nil {
1201  		p.vertUniforms.Upload()
1202  	}
1203  	if p.fragUniforms != nil {
1204  		p.fragUniforms.Upload()
1205  	}
1206  }
1207  
1208  func (p *program) Release() {
1209  	p.prog.Release()
1210  	p.prog = nil
1211  	if p.vertUniforms != nil {
1212  		p.vertUniforms.Release()
1213  		p.vertUniforms = nil
1214  	}
1215  	if p.fragUniforms != nil {
1216  		p.fragUniforms.Release()
1217  		p.fragUniforms = nil
1218  	}
1219  }
1220  
1221  // texSpaceTransform return the scale and offset that transforms the given subimage
1222  // into quad texture coordinates.
1223  func texSpaceTransform(r f32.Rectangle, bounds image.Point) (f32.Point, f32.Point) {
1224  	size := f32.Point{X: float32(bounds.X), Y: float32(bounds.Y)}
1225  	scale := f32.Point{X: r.Dx() / size.X, Y: r.Dy() / size.Y}
1226  	offset := f32.Point{X: r.Min.X / size.X, Y: r.Min.Y / size.Y}
1227  	return scale, offset
1228  }
1229  
1230  // gradientSpaceTransform transforms stop1 and stop2 to [(0,0), (1,1)].
1231  func gradientSpaceTransform(clip image.Rectangle, off f32.Point, stop1, stop2 f32.Point) f32.Affine2D {
1232  	d := stop2.Sub(stop1)
1233  	l := float32(math.Sqrt(float64(d.X*d.X + d.Y*d.Y)))
1234  	a := float32(math.Atan2(float64(-d.Y), float64(d.X)))
1235  
1236  	// TODO: optimize
1237  	zp := f32.Point{}
1238  	return f32.Affine2D{}.
1239  		Scale(zp, layout.FPt(clip.Size())).            // scale to pixel space
1240  		Offset(zp.Sub(off).Add(layout.FPt(clip.Min))). // offset to clip space
1241  		Offset(zp.Sub(stop1)).                         // offset to first stop point
1242  		Rotate(zp, a).                                 // rotate to align gradient
1243  		Scale(zp, f32.Pt(1/l, 1/l))                    // scale gradient to right size
1244  }
1245  
1246  // clipSpaceTransform returns the scale and offset that transforms the given
1247  // rectangle from a viewport into OpenGL clip space.
1248  func clipSpaceTransform(r image.Rectangle, viewport image.Point) (f32.Point, f32.Point) {
1249  	// First, transform UI coordinates to OpenGL coordinates:
1250  	//
1251  	//	[(-1, +1) (+1, +1)]
1252  	//	[(-1, -1) (+1, -1)]
1253  	//
1254  	x, y := float32(r.Min.X), float32(r.Min.Y)
1255  	w, h := float32(r.Dx()), float32(r.Dy())
1256  	vx, vy := 2/float32(viewport.X), 2/float32(viewport.Y)
1257  	x = x*vx - 1
1258  	y = 1 - y*vy
1259  	w *= vx
1260  	h *= vy
1261  
1262  	// Then, compute the transformation from the fullscreen quad to
1263  	// the rectangle at (x, y) and dimensions (w, h).
1264  	scale := f32.Point{X: w * .5, Y: h * .5}
1265  	offset := f32.Point{X: x + w*.5, Y: y - h*.5}
1266  
1267  	return scale, offset
1268  }
1269  
1270  // Fill in maximal Y coordinates of the NW and NE corners.
1271  func fillMaxY(verts []byte) {
1272  	contour := 0
1273  	bo := binary.LittleEndian
1274  	for len(verts) > 0 {
1275  		maxy := float32(math.Inf(-1))
1276  		i := 0
1277  		for ; i+vertStride*4 <= len(verts); i += vertStride * 4 {
1278  			vert := verts[i : i+vertStride]
1279  			// MaxY contains the integer contour index.
1280  			pathContour := int(bo.Uint32(vert[int(unsafe.Offsetof(((*vertex)(nil)).MaxY)):]))
1281  			if contour != pathContour {
1282  				contour = pathContour
1283  				break
1284  			}
1285  			fromy := math.Float32frombits(bo.Uint32(vert[int(unsafe.Offsetof(((*vertex)(nil)).FromY)):]))
1286  			ctrly := math.Float32frombits(bo.Uint32(vert[int(unsafe.Offsetof(((*vertex)(nil)).CtrlY)):]))
1287  			toy := math.Float32frombits(bo.Uint32(vert[int(unsafe.Offsetof(((*vertex)(nil)).ToY)):]))
1288  			if fromy > maxy {
1289  				maxy = fromy
1290  			}
1291  			if ctrly > maxy {
1292  				maxy = ctrly
1293  			}
1294  			if toy > maxy {
1295  				maxy = toy
1296  			}
1297  		}
1298  		fillContourMaxY(maxy, verts[:i])
1299  		verts = verts[i:]
1300  	}
1301  }
1302  
1303  func fillContourMaxY(maxy float32, verts []byte) {
1304  	bo := binary.LittleEndian
1305  	for i := 0; i < len(verts); i += vertStride {
1306  		off := int(unsafe.Offsetof(((*vertex)(nil)).MaxY))
1307  		bo.PutUint32(verts[i+off:], math.Float32bits(maxy))
1308  	}
1309  }
1310  
1311  func (d *drawOps) writeVertCache(n int) []byte {
1312  	d.vertCache = append(d.vertCache, make([]byte, n)...)
1313  	return d.vertCache[len(d.vertCache)-n:]
1314  }
1315  
1316  // transform, split paths as needed, calculate maxY, bounds and create GPU vertices.
1317  func (d *drawOps) buildVerts(pathData []byte, tr f32.Affine2D, outline bool, str clip.StrokeStyle) (verts []byte, bounds f32.Rectangle) {
1318  	inf := float32(math.Inf(+1))
1319  	d.qs.bounds = f32.Rectangle{
1320  		Min: f32.Point{X: inf, Y: inf},
1321  		Max: f32.Point{X: -inf, Y: -inf},
1322  	}
1323  	d.qs.d = d
1324  	startLength := len(d.vertCache)
1325  
1326  	switch {
1327  	case str.Width > 0:
1328  		// Stroke path.
1329  		ss := stroke.StrokeStyle{
1330  			Width: str.Width,
1331  			Miter: str.Miter,
1332  			Cap:   stroke.StrokeCap(str.Cap),
1333  			Join:  stroke.StrokeJoin(str.Join),
1334  		}
1335  		quads := stroke.StrokePathCommands(ss, stroke.DashOp{}, pathData)
1336  		for _, quad := range quads {
1337  			d.qs.contour = quad.Contour
1338  			quad.Quad = quad.Quad.Transform(tr)
1339  
1340  			d.qs.splitAndEncode(quad.Quad)
1341  		}
1342  
1343  	case outline:
1344  		decodeToOutlineQuads(&d.qs, tr, pathData)
1345  	}
1346  
1347  	fillMaxY(d.vertCache[startLength:])
1348  	return d.vertCache[startLength:], d.qs.bounds
1349  }
1350  
1351  // decodeOutlineQuads decodes scene commands, splits them into quadratic béziers
1352  // as needed and feeds them to the supplied splitter.
1353  func decodeToOutlineQuads(qs *quadSplitter, tr f32.Affine2D, pathData []byte) {
1354  	for len(pathData) >= scene.CommandSize+4 {
1355  		qs.contour = bo.Uint32(pathData)
1356  		cmd := ops.DecodeCommand(pathData[4:])
1357  		switch cmd.Op() {
1358  		case scene.OpLine:
1359  			var q stroke.QuadSegment
1360  			q.From, q.To = scene.DecodeLine(cmd)
1361  			q.Ctrl = q.From.Add(q.To).Mul(.5)
1362  			q = q.Transform(tr)
1363  			qs.splitAndEncode(q)
1364  		case scene.OpQuad:
1365  			var q stroke.QuadSegment
1366  			q.From, q.Ctrl, q.To = scene.DecodeQuad(cmd)
1367  			q = q.Transform(tr)
1368  			qs.splitAndEncode(q)
1369  		case scene.OpCubic:
1370  			for _, q := range stroke.SplitCubic(scene.DecodeCubic(cmd)) {
1371  				q = q.Transform(tr)
1372  				qs.splitAndEncode(q)
1373  			}
1374  		default:
1375  			panic("unsupported scene command")
1376  		}
1377  		pathData = pathData[scene.CommandSize+4:]
1378  	}
1379  }
1380  
1381  // create GPU vertices for transformed r, find the bounds and establish texture transform.
1382  func (d *drawOps) boundsForTransformedRect(r f32.Rectangle, tr f32.Affine2D) (aux []byte, bnd f32.Rectangle, ptr f32.Affine2D) {
1383  	if isPureOffset(tr) {
1384  		// fast-path to allow blitting of pure rectangles
1385  		_, _, ox, _, _, oy := tr.Elems()
1386  		off := f32.Pt(ox, oy)
1387  		bnd.Min = r.Min.Add(off)
1388  		bnd.Max = r.Max.Add(off)
1389  		return
1390  	}
1391  
1392  	// transform all corners, find new bounds
1393  	corners := [4]f32.Point{
1394  		tr.Transform(r.Min), tr.Transform(f32.Pt(r.Max.X, r.Min.Y)),
1395  		tr.Transform(r.Max), tr.Transform(f32.Pt(r.Min.X, r.Max.Y)),
1396  	}
1397  	bnd.Min = f32.Pt(math.MaxFloat32, math.MaxFloat32)
1398  	bnd.Max = f32.Pt(-math.MaxFloat32, -math.MaxFloat32)
1399  	for _, c := range corners {
1400  		if c.X < bnd.Min.X {
1401  			bnd.Min.X = c.X
1402  		}
1403  		if c.Y < bnd.Min.Y {
1404  			bnd.Min.Y = c.Y
1405  		}
1406  		if c.X > bnd.Max.X {
1407  			bnd.Max.X = c.X
1408  		}
1409  		if c.Y > bnd.Max.Y {
1410  			bnd.Max.Y = c.Y
1411  		}
1412  	}
1413  
1414  	// build the GPU vertices
1415  	l := len(d.vertCache)
1416  	if !d.compute {
1417  		d.vertCache = append(d.vertCache, make([]byte, vertStride*4*4)...)
1418  		aux = d.vertCache[l:]
1419  		encodeQuadTo(aux, 0, corners[0], corners[0].Add(corners[1]).Mul(0.5), corners[1])
1420  		encodeQuadTo(aux[vertStride*4:], 0, corners[1], corners[1].Add(corners[2]).Mul(0.5), corners[2])
1421  		encodeQuadTo(aux[vertStride*4*2:], 0, corners[2], corners[2].Add(corners[3]).Mul(0.5), corners[3])
1422  		encodeQuadTo(aux[vertStride*4*3:], 0, corners[3], corners[3].Add(corners[0]).Mul(0.5), corners[0])
1423  		fillMaxY(aux)
1424  	} else {
1425  		d.vertCache = append(d.vertCache, make([]byte, (scene.CommandSize+4)*4)...)
1426  		aux = d.vertCache[l:]
1427  		buf := aux
1428  		bo := binary.LittleEndian
1429  		bo.PutUint32(buf, 0) // Contour
1430  		ops.EncodeCommand(buf[4:], scene.Line(r.Min, f32.Pt(r.Max.X, r.Min.Y)))
1431  		buf = buf[4+scene.CommandSize:]
1432  		bo.PutUint32(buf, 0)
1433  		ops.EncodeCommand(buf[4:], scene.Line(f32.Pt(r.Max.X, r.Min.Y), r.Max))
1434  		buf = buf[4+scene.CommandSize:]
1435  		bo.PutUint32(buf, 0)
1436  		ops.EncodeCommand(buf[4:], scene.Line(r.Max, f32.Pt(r.Min.X, r.Max.Y)))
1437  		buf = buf[4+scene.CommandSize:]
1438  		bo.PutUint32(buf, 0)
1439  		ops.EncodeCommand(buf[4:], scene.Line(f32.Pt(r.Min.X, r.Max.Y), r.Min))
1440  	}
1441  
1442  	// establish the transform mapping from bounds rectangle to transformed corners
1443  	var P1, P2, P3 f32.Point
1444  	P1.X = (corners[1].X - bnd.Min.X) / (bnd.Max.X - bnd.Min.X)
1445  	P1.Y = (corners[1].Y - bnd.Min.Y) / (bnd.Max.Y - bnd.Min.Y)
1446  	P2.X = (corners[2].X - bnd.Min.X) / (bnd.Max.X - bnd.Min.X)
1447  	P2.Y = (corners[2].Y - bnd.Min.Y) / (bnd.Max.Y - bnd.Min.Y)
1448  	P3.X = (corners[3].X - bnd.Min.X) / (bnd.Max.X - bnd.Min.X)
1449  	P3.Y = (corners[3].Y - bnd.Min.Y) / (bnd.Max.Y - bnd.Min.Y)
1450  	sx, sy := P2.X-P3.X, P2.Y-P3.Y
1451  	ptr = f32.NewAffine2D(sx, P2.X-P1.X, P1.X-sx, sy, P2.Y-P1.Y, P1.Y-sy).Invert()
1452  
1453  	return
1454  }
1455  
1456  func isPureOffset(t f32.Affine2D) bool {
1457  	a, b, _, d, e, _ := t.Elems()
1458  	return a == 1 && b == 0 && d == 0 && e == 1
1459  }
1460