order.mx raw

   1  // Copyright 2023 The Go Authors. All rights reserved.
   2  // Use of this source code is governed by a BSD-style
   3  // license that can be found in the LICENSE file.
   4  
   5  package trace
   6  
   7  import (
   8  	"fmt"
   9  	"slices"
  10  	"bytes"
  11  
  12  	"internal/trace/tracev2"
  13  	"internal/trace/version"
  14  )
  15  
  16  // ordering emulates Go scheduler state for both validation and
  17  // for putting events in the right order.
  18  //
  19  // The interface to ordering consists of two methods: Advance
  20  // and Next. Advance is called to try and advance an event and
  21  // add completed events to the ordering. Next is used to pick
  22  // off events in the ordering.
  23  type ordering struct {
  24  	traceVer    version.Version
  25  	gStates     map[GoID]*gState
  26  	pStates     map[ProcID]*pState // TODO: The keys are dense, so this can be a slice.
  27  	mStates     map[ThreadID]*mState
  28  	activeTasks map[TaskID]taskState
  29  	gcSeq       uint64
  30  	gcState     gcState
  31  	initialGen  uint64
  32  	queue       queue[Event]
  33  }
  34  
  35  // Advance checks if it's valid to proceed with ev which came from thread m.
  36  //
  37  // It assumes the gen value passed to it is monotonically increasing across calls.
  38  //
  39  // If any error is returned, then the trace is broken and trace parsing must cease.
  40  // If it's not valid to advance with ev, but no error was encountered, the caller
  41  // should attempt to advance with other candidate events from other threads. If the
  42  // caller runs out of candidates, the trace is invalid.
  43  //
  44  // If this returns true, Next is guaranteed to return a complete event. However,
  45  // multiple events may be added to the ordering, so the caller should (but is not
  46  // required to) continue to call Next until it is exhausted.
  47  func (o *ordering) Advance(ev *baseEvent, evt *evTable, m ThreadID, gen uint64) (bool, error) {
  48  	if o.initialGen == 0 {
  49  		// Set the initial gen if necessary.
  50  		o.initialGen = gen
  51  	}
  52  
  53  	var curCtx, newCtx schedCtx
  54  	curCtx.M = m
  55  	newCtx.M = m
  56  
  57  	var ms *mState
  58  	if m == NoThread {
  59  		curCtx.P = NoProc
  60  		curCtx.G = NoGoroutine
  61  		newCtx = curCtx
  62  	} else {
  63  		// Pull out or create the mState for this event.
  64  		var ok bool
  65  		ms, ok = o.mStates[m]
  66  		if !ok {
  67  			ms = &mState{
  68  				g: NoGoroutine,
  69  				p: NoProc,
  70  			}
  71  			o.mStates[m] = ms
  72  		}
  73  		curCtx.P = ms.p
  74  		curCtx.G = ms.g
  75  		newCtx = curCtx
  76  	}
  77  
  78  	f := orderingDispatch[ev.typ]
  79  	if f == nil {
  80  		return false, fmt.Errorf("bad event type found while ordering: %v", ev.typ)
  81  	}
  82  	newCtx, ok, err := f(o, ev, evt, m, gen, curCtx)
  83  	if err == nil && ok && ms != nil {
  84  		// Update the mState for this event.
  85  		ms.p = newCtx.P
  86  		ms.g = newCtx.G
  87  	}
  88  	return ok, err
  89  }
  90  
  91  func (o *ordering) evName(typ tracev2.EventType) []byte {
  92  	return o.traceVer.EventName(typ)
  93  }
  94  
  95  type orderingHandleFunc func(o *ordering, ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error)
  96  
  97  var orderingDispatch = [256]orderingHandleFunc{
  98  	// Procs.
  99  	tracev2.EvProcsChange: (*ordering).advanceAnnotation,
 100  	tracev2.EvProcStart:   (*ordering).advanceProcStart,
 101  	tracev2.EvProcStop:    (*ordering).advanceProcStop,
 102  	tracev2.EvProcSteal:   (*ordering).advanceProcSteal,
 103  	tracev2.EvProcStatus:  (*ordering).advanceProcStatus,
 104  
 105  	// Goroutines.
 106  	tracev2.EvGoCreate:            (*ordering).advanceGoCreate,
 107  	tracev2.EvGoCreateSyscall:     (*ordering).advanceGoCreateSyscall,
 108  	tracev2.EvGoStart:             (*ordering).advanceGoStart,
 109  	tracev2.EvGoDestroy:           (*ordering).advanceGoStopExec,
 110  	tracev2.EvGoDestroySyscall:    (*ordering).advanceGoDestroySyscall,
 111  	tracev2.EvGoStop:              (*ordering).advanceGoStopExec,
 112  	tracev2.EvGoBlock:             (*ordering).advanceGoStopExec,
 113  	tracev2.EvGoUnblock:           (*ordering).advanceGoUnblock,
 114  	tracev2.EvGoSyscallBegin:      (*ordering).advanceGoSyscallBegin,
 115  	tracev2.EvGoSyscallEnd:        (*ordering).advanceGoSyscallEnd,
 116  	tracev2.EvGoSyscallEndBlocked: (*ordering).advanceGoSyscallEndBlocked,
 117  	tracev2.EvGoStatus:            (*ordering).advanceGoStatus,
 118  
 119  	// STW.
 120  	tracev2.EvSTWBegin: (*ordering).advanceGoRangeBegin,
 121  	tracev2.EvSTWEnd:   (*ordering).advanceGoRangeEnd,
 122  
 123  	// GC events.
 124  	tracev2.EvGCActive:           (*ordering).advanceGCActive,
 125  	tracev2.EvGCBegin:            (*ordering).advanceGCBegin,
 126  	tracev2.EvGCEnd:              (*ordering).advanceGCEnd,
 127  	tracev2.EvGCSweepActive:      (*ordering).advanceGCSweepActive,
 128  	tracev2.EvGCSweepBegin:       (*ordering).advanceGCSweepBegin,
 129  	tracev2.EvGCSweepEnd:         (*ordering).advanceGCSweepEnd,
 130  	tracev2.EvGCMarkAssistActive: (*ordering).advanceGoRangeActive,
 131  	tracev2.EvGCMarkAssistBegin:  (*ordering).advanceGoRangeBegin,
 132  	tracev2.EvGCMarkAssistEnd:    (*ordering).advanceGoRangeEnd,
 133  	tracev2.EvHeapAlloc:          (*ordering).advanceHeapMetric,
 134  	tracev2.EvHeapGoal:           (*ordering).advanceHeapMetric,
 135  
 136  	// Annotations.
 137  	tracev2.EvGoLabel:         (*ordering).advanceAnnotation,
 138  	tracev2.EvUserTaskBegin:   (*ordering).advanceUserTaskBegin,
 139  	tracev2.EvUserTaskEnd:     (*ordering).advanceUserTaskEnd,
 140  	tracev2.EvUserRegionBegin: (*ordering).advanceUserRegionBegin,
 141  	tracev2.EvUserRegionEnd:   (*ordering).advanceUserRegionEnd,
 142  	tracev2.EvUserLog:         (*ordering).advanceAnnotation,
 143  
 144  	// Coroutines. Added in Go 1.23.
 145  	tracev2.EvGoSwitch:        (*ordering).advanceGoSwitch,
 146  	tracev2.EvGoSwitchDestroy: (*ordering).advanceGoSwitch,
 147  	tracev2.EvGoCreateBlocked: (*ordering).advanceGoCreate,
 148  
 149  	// GoStatus event with a stack. Added in Go 1.23.
 150  	tracev2.EvGoStatusStack: (*ordering).advanceGoStatus,
 151  
 152  	// Experimental events.
 153  
 154  	// Experimental heap span events. Added in Go 1.23.
 155  	tracev2.EvSpan:      (*ordering).advanceAllocFree,
 156  	tracev2.EvSpanAlloc: (*ordering).advanceAllocFree,
 157  	tracev2.EvSpanFree:  (*ordering).advanceAllocFree,
 158  
 159  	// Experimental heap object events. Added in Go 1.23.
 160  	tracev2.EvHeapObject:      (*ordering).advanceAllocFree,
 161  	tracev2.EvHeapObjectAlloc: (*ordering).advanceAllocFree,
 162  	tracev2.EvHeapObjectFree:  (*ordering).advanceAllocFree,
 163  
 164  	// Experimental goroutine stack events. Added in Go 1.23.
 165  	tracev2.EvGoroutineStack:      (*ordering).advanceAllocFree,
 166  	tracev2.EvGoroutineStackAlloc: (*ordering).advanceAllocFree,
 167  	tracev2.EvGoroutineStackFree:  (*ordering).advanceAllocFree,
 168  }
 169  
 170  func (o *ordering) advanceProcStatus(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
 171  	pid := ProcID(ev.args[0])
 172  	status := tracev2.ProcStatus(ev.args[1])
 173  	if int(status) >= len(tracev2ProcStatus2ProcState) {
 174  		return curCtx, false, fmt.Errorf("invalid status for proc %d: %d", pid, status)
 175  	}
 176  	oldState := tracev2ProcStatus2ProcState[status]
 177  	if s, ok := o.pStates[pid]; ok {
 178  		if status == tracev2.ProcSyscallAbandoned && s.status == tracev2.ProcSyscall {
 179  			// ProcSyscallAbandoned is a special case of ProcSyscall. It indicates a
 180  			// potential loss of information, but if we're already in ProcSyscall,
 181  			// we haven't lost the relevant information. Promote the status and advance.
 182  			oldState = ProcRunning
 183  			ev.args[1] = uint64(tracev2.ProcSyscall)
 184  		} else if status == tracev2.ProcSyscallAbandoned && s.status == tracev2.ProcSyscallAbandoned {
 185  			// If we're passing through ProcSyscallAbandoned, then there's no promotion
 186  			// to do. We've lost the M that this P is associated with. However it got there,
 187  			// it's going to appear as idle in the API, so pass through as idle.
 188  			oldState = ProcIdle
 189  			ev.args[1] = uint64(tracev2.ProcSyscallAbandoned)
 190  		} else if s.status != status {
 191  			return curCtx, false, fmt.Errorf("inconsistent status for proc %d: old %v vs. new %v", pid, s.status, status)
 192  		}
 193  		s.seq = makeSeq(gen, 0) // Reset seq.
 194  	} else {
 195  		o.pStates[pid] = &pState{id: pid, status: status, seq: makeSeq(gen, 0)}
 196  		if gen == o.initialGen {
 197  			oldState = ProcUndetermined
 198  		} else {
 199  			oldState = ProcNotExist
 200  		}
 201  	}
 202  	ev.extra(version.Go122)[0] = uint64(oldState) // Smuggle in the old state for StateTransition.
 203  
 204  	// Bind the proc to the new context, if it's running.
 205  	newCtx := curCtx
 206  	if status == tracev2.ProcRunning || status == tracev2.ProcSyscall {
 207  		newCtx.P = pid
 208  	}
 209  	// If we're advancing through ProcSyscallAbandoned *but* oldState is running then we've
 210  	// promoted it to ProcSyscall. However, because it's ProcSyscallAbandoned, we know this
 211  	// P is about to get stolen and its status very likely isn't being emitted by the same
 212  	// thread it was bound to. Since this status is Running -> Running and Running is binding,
 213  	// we need to make sure we emit it in the right context: the context to which it is bound.
 214  	// Find it, and set our current context to it.
 215  	if status == tracev2.ProcSyscallAbandoned && oldState == ProcRunning {
 216  		// N.B. This is slow but it should be fairly rare.
 217  		found := false
 218  		for mid, ms := range o.mStates {
 219  			if ms.p == pid {
 220  				curCtx.M = mid
 221  				curCtx.P = pid
 222  				curCtx.G = ms.g
 223  				found = true
 224  			}
 225  		}
 226  		if !found {
 227  			return curCtx, false, fmt.Errorf("failed to find sched context for proc %d that's about to be stolen", pid)
 228  		}
 229  	}
 230  	o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
 231  	return newCtx, true, nil
 232  }
 233  
 234  func (o *ordering) advanceProcStart(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
 235  	pid := ProcID(ev.args[0])
 236  	seq := makeSeq(gen, ev.args[1])
 237  
 238  	// Try to advance. We might fail here due to sequencing, because the P hasn't
 239  	// had a status emitted, or because we already have a P and we're in a syscall,
 240  	// and we haven't observed that it was stolen from us yet.
 241  	state, ok := o.pStates[pid]
 242  	if !ok || state.status != tracev2.ProcIdle || !seq.succeeds(state.seq) || curCtx.P != NoProc {
 243  		// We can't make an inference as to whether this is bad. We could just be seeing
 244  		// a ProcStart on a different M before the proc's state was emitted, or before we
 245  		// got to the right point in the trace.
 246  		//
 247  		// Note that we also don't advance here if we have a P and we're in a syscall.
 248  		return curCtx, false, nil
 249  	}
 250  	// We can advance this P. Check some invariants.
 251  	//
 252  	// We might have a goroutine if a goroutine is exiting a syscall.
 253  	reqs := schedReqs{M: mustHave, P: mustNotHave, G: mayHave}
 254  	if err := validateCtx(curCtx, reqs); err != nil {
 255  		return curCtx, false, err
 256  	}
 257  	state.status = tracev2.ProcRunning
 258  	state.seq = seq
 259  	newCtx := curCtx
 260  	newCtx.P = pid
 261  	o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
 262  	return newCtx, true, nil
 263  }
 264  
 265  func (o *ordering) advanceProcStop(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
 266  	// We must be able to advance this P.
 267  	//
 268  	// There are 2 ways a P can stop: ProcStop and ProcSteal. ProcStop is used when the P
 269  	// is stopped by the same M that started it, while ProcSteal is used when another M
 270  	// steals the P by stopping it from a distance.
 271  	//
 272  	// Since a P is bound to an M, and we're stopping on the same M we started, it must
 273  	// always be possible to advance the current M's P from a ProcStop. This is also why
 274  	// ProcStop doesn't need a sequence number.
 275  	state, ok := o.pStates[curCtx.P]
 276  	if !ok {
 277  		return curCtx, false, fmt.Errorf("event %s for proc (%v) that doesn't exist", o.evName(ev.typ), curCtx.P)
 278  	}
 279  	if state.status != tracev2.ProcRunning && state.status != tracev2.ProcSyscall {
 280  		return curCtx, false, fmt.Errorf("%s event for proc that's not %s or %s", o.evName(ev.typ), tracev2.ProcRunning, tracev2.ProcSyscall)
 281  	}
 282  	reqs := schedReqs{M: mustHave, P: mustHave, G: mayHave}
 283  	if err := validateCtx(curCtx, reqs); err != nil {
 284  		return curCtx, false, err
 285  	}
 286  	state.status = tracev2.ProcIdle
 287  	newCtx := curCtx
 288  	newCtx.P = NoProc
 289  	o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
 290  	return newCtx, true, nil
 291  }
 292  
 293  func (o *ordering) advanceProcSteal(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
 294  	pid := ProcID(ev.args[0])
 295  	seq := makeSeq(gen, ev.args[1])
 296  	state, ok := o.pStates[pid]
 297  	if !ok || (state.status != tracev2.ProcSyscall && state.status != tracev2.ProcSyscallAbandoned) || !seq.succeeds(state.seq) {
 298  		// We can't make an inference as to whether this is bad. We could just be seeing
 299  		// a ProcStart on a different M before the proc's state was emitted, or before we
 300  		// got to the right point in the trace.
 301  		return curCtx, false, nil
 302  	}
 303  	// We can advance this P. Check some invariants.
 304  	reqs := schedReqs{M: mustHave, P: mayHave, G: mayHave}
 305  	if err := validateCtx(curCtx, reqs); err != nil {
 306  		return curCtx, false, err
 307  	}
 308  	// Smuggle in the P state that let us advance so we can surface information to the event.
 309  	// Specifically, we need to make sure that the event is interpreted not as a transition of
 310  	// ProcRunning -> ProcIdle but ProcIdle -> ProcIdle instead.
 311  	//
 312  	// ProcRunning is binding, but we may be running with a P on the current M and we can't
 313  	// bind another P. This P is about to go ProcIdle anyway.
 314  	oldStatus := state.status
 315  	ev.extra(version.Go122)[0] = uint64(oldStatus)
 316  
 317  	// Update the P's status and sequence number.
 318  	state.status = tracev2.ProcIdle
 319  	state.seq = seq
 320  
 321  	// If we've lost information then don't try to do anything with the M.
 322  	// It may have moved on and we can't be sure.
 323  	if oldStatus == tracev2.ProcSyscallAbandoned {
 324  		o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
 325  		return curCtx, true, nil
 326  	}
 327  
 328  	// Validate that the M we're stealing from is what we expect.
 329  	mid := ThreadID(ev.args[2]) // The M we're stealing from.
 330  
 331  	newCtx := curCtx
 332  	if mid == curCtx.M {
 333  		// We're stealing from ourselves. This behaves like a ProcStop.
 334  		if curCtx.P != pid {
 335  			return curCtx, false, fmt.Errorf("tried to self-steal proc %d (thread %d), but got proc %d instead", pid, mid, curCtx.P)
 336  		}
 337  		newCtx.P = NoProc
 338  		o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
 339  		return newCtx, true, nil
 340  	}
 341  
 342  	// We're stealing from some other M.
 343  	mState, ok := o.mStates[mid]
 344  	if !ok {
 345  		return curCtx, false, fmt.Errorf("stole proc from non-existent thread %d", mid)
 346  	}
 347  
 348  	// Make sure we're actually stealing the right P.
 349  	if mState.p != pid {
 350  		return curCtx, false, fmt.Errorf("tried to steal proc %d from thread %d, but got proc %d instead", pid, mid, mState.p)
 351  	}
 352  
 353  	// Tell the M it has no P so it can proceed.
 354  	//
 355  	// This is safe because we know the P was in a syscall and
 356  	// the other M must be trying to get out of the syscall.
 357  	// GoSyscallEndBlocked cannot advance until the corresponding
 358  	// M loses its P.
 359  	mState.p = NoProc
 360  	o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
 361  	return newCtx, true, nil
 362  }
 363  
 364  func (o *ordering) advanceGoStatus(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
 365  	gid := GoID(ev.args[0])
 366  	mid := ThreadID(ev.args[1])
 367  	status := tracev2.GoStatus(ev.args[2])
 368  
 369  	if int(status) >= len(tracev2GoStatus2GoState) {
 370  		return curCtx, false, fmt.Errorf("invalid status for goroutine %d: %d", gid, status)
 371  	}
 372  	oldState := tracev2GoStatus2GoState[status]
 373  	if s, ok := o.gStates[gid]; ok {
 374  		if s.status != status {
 375  			return curCtx, false, fmt.Errorf("inconsistent status for goroutine %d: old %v vs. new %v", gid, s.status, status)
 376  		}
 377  		s.seq = makeSeq(gen, 0) // Reset seq.
 378  	} else if gen == o.initialGen {
 379  		// Set the state.
 380  		o.gStates[gid] = &gState{id: gid, status: status, seq: makeSeq(gen, 0)}
 381  		oldState = GoUndetermined
 382  	} else {
 383  		return curCtx, false, fmt.Errorf("found goroutine status for new goroutine after the first generation: id=%v status=%v", gid, status)
 384  	}
 385  	ev.args[2] = uint64(oldState)<<32 | uint64(status) // Smuggle in the old state for StateTransition.
 386  
 387  	newCtx := curCtx
 388  	switch status {
 389  	case tracev2.GoRunning:
 390  		// Bind the goroutine to the new context, since it's running.
 391  		newCtx.G = gid
 392  	case tracev2.GoSyscall:
 393  		if mid == NoThread {
 394  			return curCtx, false, fmt.Errorf("found goroutine %d in syscall without a thread", gid)
 395  		}
 396  		// Is the syscall on this thread? If so, bind it to the context.
 397  		// Otherwise, we're talking about a G sitting in a syscall on an M.
 398  		// Validate the named M.
 399  		if mid == curCtx.M {
 400  			if gen != o.initialGen && curCtx.G != gid {
 401  				// If this isn't the first generation, we *must* have seen this
 402  				// binding occur already. Even if the G was blocked in a syscall
 403  				// for multiple generations since trace start, we would have seen
 404  				// a previous GoStatus event that bound the goroutine to an M.
 405  				return curCtx, false, fmt.Errorf("inconsistent thread for syscalling goroutine %d: thread has goroutine %d", gid, curCtx.G)
 406  			}
 407  			newCtx.G = gid
 408  			break
 409  		}
 410  		// Now we're talking about a thread and goroutine that have been
 411  		// blocked on a syscall for the entire generation. This case must
 412  		// not have a P; the runtime makes sure that all Ps are traced at
 413  		// the beginning of a generation, which involves taking a P back
 414  		// from every thread.
 415  		ms, ok := o.mStates[mid]
 416  		if ok {
 417  			// This M has been seen. That means we must have seen this
 418  			// goroutine go into a syscall on this thread at some point.
 419  			if ms.g != gid {
 420  				// But the G on the M doesn't match. Something's wrong.
 421  				return curCtx, false, fmt.Errorf("inconsistent thread for syscalling goroutine %d: thread has goroutine %d", gid, ms.g)
 422  			}
 423  			// This case is just a Syscall->Syscall event, which needs to
 424  			// appear as having the G currently bound to this M.
 425  			curCtx.G = ms.g
 426  		} else if !ok {
 427  			// The M hasn't been seen yet. That means this goroutine
 428  			// has just been sitting in a syscall on this M. Create
 429  			// a state for it.
 430  			o.mStates[mid] = &mState{g: gid, p: NoProc}
 431  			// Don't set curCtx.G in this case because this event is the
 432  			// binding event (and curCtx represents the "before" state).
 433  		}
 434  		// Update the current context to the M we're talking about.
 435  		curCtx.M = mid
 436  	}
 437  	o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
 438  	return newCtx, true, nil
 439  }
 440  
 441  func (o *ordering) advanceGoCreate(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
 442  	// Goroutines must be created on a running P, but may or may not be created
 443  	// by a running goroutine.
 444  	reqs := schedReqs{M: mustHave, P: mustHave, G: mayHave}
 445  	if err := validateCtx(curCtx, reqs); err != nil {
 446  		return curCtx, false, err
 447  	}
 448  	// If we have a goroutine, it must be running.
 449  	if state, ok := o.gStates[curCtx.G]; ok && state.status != tracev2.GoRunning {
 450  		return curCtx, false, fmt.Errorf("%s event for goroutine that's not %s", o.evName(ev.typ), GoRunning)
 451  	}
 452  	// This goroutine created another. Add a state for it.
 453  	newgid := GoID(ev.args[0])
 454  	if _, ok := o.gStates[newgid]; ok {
 455  		return curCtx, false, fmt.Errorf("tried to create goroutine (%v) that already exists", newgid)
 456  	}
 457  	status := tracev2.GoRunnable
 458  	if ev.typ == tracev2.EvGoCreateBlocked {
 459  		status = tracev2.GoWaiting
 460  	}
 461  	o.gStates[newgid] = &gState{id: newgid, status: status, seq: makeSeq(gen, 0)}
 462  	o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
 463  	return curCtx, true, nil
 464  }
 465  
 466  func (o *ordering) advanceGoStopExec(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
 467  	// These are goroutine events that all require an active running
 468  	// goroutine on some thread. They must *always* be advance-able,
 469  	// since running goroutines are bound to their M.
 470  	if err := validateCtx(curCtx, userGoReqs); err != nil {
 471  		return curCtx, false, err
 472  	}
 473  	state, ok := o.gStates[curCtx.G]
 474  	if !ok {
 475  		return curCtx, false, fmt.Errorf("event %s for goroutine (%v) that doesn't exist", o.evName(ev.typ), curCtx.G)
 476  	}
 477  	if state.status != tracev2.GoRunning {
 478  		return curCtx, false, fmt.Errorf("%s event for goroutine that's not %s", o.evName(ev.typ), GoRunning)
 479  	}
 480  	// Handle each case slightly differently; we just group them together
 481  	// because they have shared preconditions.
 482  	newCtx := curCtx
 483  	switch ev.typ {
 484  	case tracev2.EvGoDestroy:
 485  		// This goroutine is exiting itself.
 486  		delete(o.gStates, curCtx.G)
 487  		newCtx.G = NoGoroutine
 488  	case tracev2.EvGoStop:
 489  		// Goroutine stopped (yielded). It's runnable but not running on this M.
 490  		state.status = tracev2.GoRunnable
 491  		newCtx.G = NoGoroutine
 492  	case tracev2.EvGoBlock:
 493  		// Goroutine blocked. It's waiting now and not running on this M.
 494  		state.status = tracev2.GoWaiting
 495  		newCtx.G = NoGoroutine
 496  	}
 497  	o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
 498  	return newCtx, true, nil
 499  }
 500  
 501  func (o *ordering) advanceGoStart(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
 502  	gid := GoID(ev.args[0])
 503  	seq := makeSeq(gen, ev.args[1])
 504  	state, ok := o.gStates[gid]
 505  	if !ok || state.status != tracev2.GoRunnable || !seq.succeeds(state.seq) {
 506  		// We can't make an inference as to whether this is bad. We could just be seeing
 507  		// a GoStart on a different M before the goroutine was created, before it had its
 508  		// state emitted, or before we got to the right point in the trace yet.
 509  		return curCtx, false, nil
 510  	}
 511  	// We can advance this goroutine. Check some invariants.
 512  	reqs := schedReqs{M: mustHave, P: mustHave, G: mustNotHave}
 513  	if err := validateCtx(curCtx, reqs); err != nil {
 514  		return curCtx, false, err
 515  	}
 516  	state.status = tracev2.GoRunning
 517  	state.seq = seq
 518  	newCtx := curCtx
 519  	newCtx.G = gid
 520  	o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
 521  	return newCtx, true, nil
 522  }
 523  
 524  func (o *ordering) advanceGoUnblock(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
 525  	// N.B. These both reference the goroutine to unblock, not the current goroutine.
 526  	gid := GoID(ev.args[0])
 527  	seq := makeSeq(gen, ev.args[1])
 528  	state, ok := o.gStates[gid]
 529  	if !ok || state.status != tracev2.GoWaiting || !seq.succeeds(state.seq) {
 530  		// We can't make an inference as to whether this is bad. We could just be seeing
 531  		// a GoUnblock on a different M before the goroutine was created and blocked itself,
 532  		// before it had its state emitted, or before we got to the right point in the trace yet.
 533  		return curCtx, false, nil
 534  	}
 535  	state.status = tracev2.GoRunnable
 536  	state.seq = seq
 537  	// N.B. No context to validate. Basically anything can unblock
 538  	// a goroutine (e.g. sysmon).
 539  	o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
 540  	return curCtx, true, nil
 541  }
 542  
 543  func (o *ordering) advanceGoSwitch(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
 544  	// GoSwitch and GoSwitchDestroy represent a trio of events:
 545  	// - Unblock of the goroutine to switch to.
 546  	// - Block or destroy of the current goroutine.
 547  	// - Start executing the next goroutine.
 548  	//
 549  	// Because it acts like a GoStart for the next goroutine, we can
 550  	// only advance it if the sequence numbers line up.
 551  	//
 552  	// The current goroutine on the thread must be actively running.
 553  	if err := validateCtx(curCtx, userGoReqs); err != nil {
 554  		return curCtx, false, err
 555  	}
 556  	curGState, ok := o.gStates[curCtx.G]
 557  	if !ok {
 558  		return curCtx, false, fmt.Errorf("event %s for goroutine (%v) that doesn't exist", o.evName(ev.typ), curCtx.G)
 559  	}
 560  	if curGState.status != tracev2.GoRunning {
 561  		return curCtx, false, fmt.Errorf("%s event for goroutine that's not %s", o.evName(ev.typ), GoRunning)
 562  	}
 563  	nextg := GoID(ev.args[0])
 564  	seq := makeSeq(gen, ev.args[1]) // seq is for nextg, not curCtx.G.
 565  	nextGState, ok := o.gStates[nextg]
 566  	if !ok || nextGState.status != tracev2.GoWaiting || !seq.succeeds(nextGState.seq) {
 567  		// We can't make an inference as to whether this is bad. We could just be seeing
 568  		// a GoSwitch on a different M before the goroutine was created, before it had its
 569  		// state emitted, or before we got to the right point in the trace yet.
 570  		return curCtx, false, nil
 571  	}
 572  	o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
 573  
 574  	// Update the state of the executing goroutine and emit an event for it
 575  	// (GoSwitch and GoSwitchDestroy will be interpreted as GoUnblock events
 576  	// for nextg).
 577  	switch ev.typ {
 578  	case tracev2.EvGoSwitch:
 579  		// Goroutine blocked. It's waiting now and not running on this M.
 580  		curGState.status = tracev2.GoWaiting
 581  
 582  		// Emit a GoBlock event.
 583  		// TODO(mknyszek): Emit a reason.
 584  		o.queue.push(makeEvent(evt, curCtx, tracev2.EvGoBlock, ev.time, 0 /* no reason */, 0 /* no stack */))
 585  	case tracev2.EvGoSwitchDestroy:
 586  		// This goroutine is exiting itself.
 587  		delete(o.gStates, curCtx.G)
 588  
 589  		// Emit a GoDestroy event.
 590  		o.queue.push(makeEvent(evt, curCtx, tracev2.EvGoDestroy, ev.time))
 591  	}
 592  	// Update the state of the next goroutine.
 593  	nextGState.status = tracev2.GoRunning
 594  	nextGState.seq = seq
 595  	newCtx := curCtx
 596  	newCtx.G = nextg
 597  
 598  	// Queue an event for the next goroutine starting to run.
 599  	startCtx := curCtx
 600  	startCtx.G = NoGoroutine
 601  	o.queue.push(makeEvent(evt, startCtx, tracev2.EvGoStart, ev.time, uint64(nextg), ev.args[1]))
 602  	return newCtx, true, nil
 603  }
 604  
 605  func (o *ordering) advanceGoSyscallBegin(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
 606  	// Entering a syscall requires an active running goroutine with a
 607  	// proc on some thread. It is always advancable.
 608  	if err := validateCtx(curCtx, userGoReqs); err != nil {
 609  		return curCtx, false, err
 610  	}
 611  	state, ok := o.gStates[curCtx.G]
 612  	if !ok {
 613  		return curCtx, false, fmt.Errorf("event %s for goroutine (%v) that doesn't exist", o.evName(ev.typ), curCtx.G)
 614  	}
 615  	if state.status != tracev2.GoRunning {
 616  		return curCtx, false, fmt.Errorf("%s event for goroutine that's not %s", o.evName(ev.typ), GoRunning)
 617  	}
 618  	// Goroutine entered a syscall. It's still running on this P and M.
 619  	state.status = tracev2.GoSyscall
 620  	pState, ok := o.pStates[curCtx.P]
 621  	if !ok {
 622  		return curCtx, false, fmt.Errorf("uninitialized proc %d found during %s", curCtx.P, o.evName(ev.typ))
 623  	}
 624  	pState.status = tracev2.ProcSyscall
 625  	// Validate the P sequence number on the event and advance it.
 626  	//
 627  	// We have a P sequence number for what is supposed to be a goroutine event
 628  	// so that we can correctly model P stealing. Without this sequence number here,
 629  	// the syscall from which a ProcSteal event is stealing can be ambiguous in the
 630  	// face of broken timestamps. See the go122-syscall-steal-proc-ambiguous test for
 631  	// more details.
 632  	//
 633  	// Note that because this sequence number only exists as a tool for disambiguation,
 634  	// we can enforce that we have the right sequence number at this point; we don't need
 635  	// to back off and see if any other events will advance. This is a running P.
 636  	pSeq := makeSeq(gen, ev.args[0])
 637  	if !pSeq.succeeds(pState.seq) {
 638  		return curCtx, false, fmt.Errorf("failed to advance %s: can't make sequence: %s -> %s", o.evName(ev.typ), pState.seq, pSeq)
 639  	}
 640  	pState.seq = pSeq
 641  	o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
 642  	return curCtx, true, nil
 643  }
 644  
 645  func (o *ordering) advanceGoSyscallEnd(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
 646  	// This event is always advance-able because it happens on the same
 647  	// thread that EvGoSyscallStart happened, and the goroutine can't leave
 648  	// that thread until its done.
 649  	if err := validateCtx(curCtx, userGoReqs); err != nil {
 650  		return curCtx, false, err
 651  	}
 652  	state, ok := o.gStates[curCtx.G]
 653  	if !ok {
 654  		return curCtx, false, fmt.Errorf("event %s for goroutine (%v) that doesn't exist", o.evName(ev.typ), curCtx.G)
 655  	}
 656  	if state.status != tracev2.GoSyscall {
 657  		return curCtx, false, fmt.Errorf("%s event for goroutine that's not %s", o.evName(ev.typ), GoRunning)
 658  	}
 659  	state.status = tracev2.GoRunning
 660  
 661  	// Transfer the P back to running from syscall.
 662  	pState, ok := o.pStates[curCtx.P]
 663  	if !ok {
 664  		return curCtx, false, fmt.Errorf("uninitialized proc %d found during %s", curCtx.P, o.evName(ev.typ))
 665  	}
 666  	if pState.status != tracev2.ProcSyscall {
 667  		return curCtx, false, fmt.Errorf("expected proc %d in state %v, but got %v instead", curCtx.P, tracev2.ProcSyscall, pState.status)
 668  	}
 669  	pState.status = tracev2.ProcRunning
 670  	o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
 671  	return curCtx, true, nil
 672  }
 673  
 674  func (o *ordering) advanceGoSyscallEndBlocked(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
 675  	// This event becomes advanceable when its P is not in a syscall state
 676  	// (lack of a P altogether is also acceptable for advancing).
 677  	// The transfer out of ProcSyscall can happen either voluntarily via
 678  	// ProcStop or involuntarily via ProcSteal. We may also acquire a new P
 679  	// before we get here (after the transfer out) but that's OK: that new
 680  	// P won't be in the ProcSyscall state anymore.
 681  	//
 682  	// Basically: while we have a preemptible P, don't advance, because we
 683  	// *know* from the event that we're going to lose it at some point during
 684  	// the syscall. We shouldn't advance until that happens.
 685  	if curCtx.P != NoProc {
 686  		pState, ok := o.pStates[curCtx.P]
 687  		if !ok {
 688  			return curCtx, false, fmt.Errorf("uninitialized proc %d found during %s", curCtx.P, o.evName(ev.typ))
 689  		}
 690  		if pState.status == tracev2.ProcSyscall {
 691  			return curCtx, false, nil
 692  		}
 693  	}
 694  	// As mentioned above, we may have a P here if we ProcStart
 695  	// before this event.
 696  	if err := validateCtx(curCtx, schedReqs{M: mustHave, P: mayHave, G: mustHave}); err != nil {
 697  		return curCtx, false, err
 698  	}
 699  	state, ok := o.gStates[curCtx.G]
 700  	if !ok {
 701  		return curCtx, false, fmt.Errorf("event %s for goroutine (%v) that doesn't exist", o.evName(ev.typ), curCtx.G)
 702  	}
 703  	if state.status != tracev2.GoSyscall {
 704  		return curCtx, false, fmt.Errorf("%s event for goroutine that's not %s", o.evName(ev.typ), GoRunning)
 705  	}
 706  	newCtx := curCtx
 707  	newCtx.G = NoGoroutine
 708  	state.status = tracev2.GoRunnable
 709  	o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
 710  	return newCtx, true, nil
 711  }
 712  
 713  func (o *ordering) advanceGoCreateSyscall(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
 714  	// This event indicates that a goroutine is effectively
 715  	// being created out of a cgo callback. Such a goroutine
 716  	// is 'created' in the syscall state.
 717  	if err := validateCtx(curCtx, schedReqs{M: mustHave, P: mayHave, G: mustNotHave}); err != nil {
 718  		return curCtx, false, err
 719  	}
 720  	// This goroutine is effectively being created. Add a state for it.
 721  	newgid := GoID(ev.args[0])
 722  	if _, ok := o.gStates[newgid]; ok {
 723  		return curCtx, false, fmt.Errorf("tried to create goroutine (%v) in syscall that already exists", newgid)
 724  	}
 725  	o.gStates[newgid] = &gState{id: newgid, status: tracev2.GoSyscall, seq: makeSeq(gen, 0)}
 726  	// Goroutine is executing. Bind it to the context.
 727  	newCtx := curCtx
 728  	newCtx.G = newgid
 729  	o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
 730  	return newCtx, true, nil
 731  }
 732  
 733  func (o *ordering) advanceGoDestroySyscall(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
 734  	// This event indicates that a goroutine created for a
 735  	// cgo callback is disappearing, either because the callback
 736  	// ending or the C thread that called it is being destroyed.
 737  	//
 738  	// Also, treat this as if we lost our P too.
 739  	// The thread ID may be reused by the platform and we'll get
 740  	// really confused if we try to steal the P is this is running
 741  	// with later. The new M with the same ID could even try to
 742  	// steal back this P from itself!
 743  	//
 744  	// The runtime is careful to make sure that any GoCreateSyscall
 745  	// event will enter the runtime emitting events for reacquiring a P.
 746  	//
 747  	// Note: we might have a P here. The P might not be released
 748  	// eagerly by the runtime, and it might get stolen back later
 749  	// (or never again, if the program is going to exit).
 750  	if err := validateCtx(curCtx, schedReqs{M: mustHave, P: mayHave, G: mustHave}); err != nil {
 751  		return curCtx, false, err
 752  	}
 753  	// Check to make sure the goroutine exists in the right state.
 754  	state, ok := o.gStates[curCtx.G]
 755  	if !ok {
 756  		return curCtx, false, fmt.Errorf("event %s for goroutine (%v) that doesn't exist", o.evName(ev.typ), curCtx.G)
 757  	}
 758  	if state.status != tracev2.GoSyscall {
 759  		return curCtx, false, fmt.Errorf("%s event for goroutine that's not %v", o.evName(ev.typ), GoSyscall)
 760  	}
 761  	// This goroutine is exiting itself.
 762  	delete(o.gStates, curCtx.G)
 763  	newCtx := curCtx
 764  	newCtx.G = NoGoroutine
 765  
 766  	// If we have a proc, then we're dissociating from it now. See the comment at the top of the case.
 767  	if curCtx.P != NoProc {
 768  		pState, ok := o.pStates[curCtx.P]
 769  		if !ok {
 770  			return curCtx, false, fmt.Errorf("found invalid proc %d during %s", curCtx.P, o.evName(ev.typ))
 771  		}
 772  		if pState.status != tracev2.ProcSyscall {
 773  			return curCtx, false, fmt.Errorf("proc %d in unexpected state %s during %s", curCtx.P, pState.status, o.evName(ev.typ))
 774  		}
 775  		// See the go122-create-syscall-reuse-thread-id test case for more details.
 776  		pState.status = tracev2.ProcSyscallAbandoned
 777  		newCtx.P = NoProc
 778  
 779  		// Queue an extra self-ProcSteal event.
 780  		extra := makeEvent(evt, curCtx, tracev2.EvProcSteal, ev.time, uint64(curCtx.P))
 781  		extra.base.extra(version.Go122)[0] = uint64(tracev2.ProcSyscall)
 782  		o.queue.push(extra)
 783  	}
 784  	o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
 785  	return newCtx, true, nil
 786  }
 787  
 788  func (o *ordering) advanceUserTaskBegin(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
 789  	// Handle tasks. Tasks are interesting because:
 790  	// - There's no Begin event required to reference a task.
 791  	// - End for a particular task ID can appear multiple times.
 792  	// As a result, there's very little to validate. The only
 793  	// thing we have to be sure of is that a task didn't begin
 794  	// after it had already begun. Task IDs are allowed to be
 795  	// reused, so we don't care about a Begin after an End.
 796  	id := TaskID(ev.args[0])
 797  	if _, ok := o.activeTasks[id]; ok {
 798  		return curCtx, false, fmt.Errorf("task ID conflict: %d", id)
 799  	}
 800  	// Get the parent ID, but don't validate it. There's no guarantee
 801  	// we actually have information on whether it's active.
 802  	parentID := TaskID(ev.args[1])
 803  	if parentID == BackgroundTask {
 804  		// Note: a value of 0 here actually means no parent, *not* the
 805  		// background task. Automatic background task attachment only
 806  		// applies to regions.
 807  		parentID = NoTask
 808  		ev.args[1] = uint64(NoTask)
 809  	}
 810  
 811  	// Validate the name and record it. We'll need to pass it through to
 812  	// EvUserTaskEnd.
 813  	nameID := stringID(ev.args[2])
 814  	name, ok := evt.bytes.get(nameID)
 815  	if !ok {
 816  		return curCtx, false, fmt.Errorf("invalid string ID %v for %v event", nameID, ev.typ)
 817  	}
 818  	o.activeTasks[id] = taskState{name: name, parentID: parentID}
 819  	if err := validateCtx(curCtx, userGoReqs); err != nil {
 820  		return curCtx, false, err
 821  	}
 822  	o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
 823  	return curCtx, true, nil
 824  }
 825  
 826  func (o *ordering) advanceUserTaskEnd(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
 827  	id := TaskID(ev.args[0])
 828  	if ts, ok := o.activeTasks[id]; ok {
 829  		// Smuggle the task info. This may happen in a different generation,
 830  		// which may not have the name in its string table. Add it to the extra
 831  		// strings table so we can look it up later.
 832  		ev.extra(version.Go122)[0] = uint64(ts.parentID)
 833  		ev.extra(version.Go122)[1] = uint64(evt.addExtraString(ts.name))
 834  		delete(o.activeTasks, id)
 835  	} else {
 836  		// Explicitly clear the task info.
 837  		ev.extra(version.Go122)[0] = uint64(NoTask)
 838  		ev.extra(version.Go122)[1] = uint64(evt.addExtraString(""))
 839  	}
 840  	if err := validateCtx(curCtx, userGoReqs); err != nil {
 841  		return curCtx, false, err
 842  	}
 843  	o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
 844  	return curCtx, true, nil
 845  }
 846  
 847  func (o *ordering) advanceUserRegionBegin(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
 848  	if err := validateCtx(curCtx, userGoReqs); err != nil {
 849  		return curCtx, false, err
 850  	}
 851  	tid := TaskID(ev.args[0])
 852  	nameID := stringID(ev.args[1])
 853  	name, ok := evt.bytes.get(nameID)
 854  	if !ok {
 855  		return curCtx, false, fmt.Errorf("invalid string ID %v for %v event", nameID, ev.typ)
 856  	}
 857  	gState, ok := o.gStates[curCtx.G]
 858  	if !ok {
 859  		return curCtx, false, fmt.Errorf("encountered EvUserRegionBegin without known state for current goroutine %d", curCtx.G)
 860  	}
 861  	if err := gState.beginRegion(userRegion{tid, name}); err != nil {
 862  		return curCtx, false, err
 863  	}
 864  	o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
 865  	return curCtx, true, nil
 866  }
 867  
 868  func (o *ordering) advanceUserRegionEnd(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
 869  	if err := validateCtx(curCtx, userGoReqs); err != nil {
 870  		return curCtx, false, err
 871  	}
 872  	tid := TaskID(ev.args[0])
 873  	nameID := stringID(ev.args[1])
 874  	name, ok := evt.bytes.get(nameID)
 875  	if !ok {
 876  		return curCtx, false, fmt.Errorf("invalid string ID %v for %v event", nameID, ev.typ)
 877  	}
 878  	gState, ok := o.gStates[curCtx.G]
 879  	if !ok {
 880  		return curCtx, false, fmt.Errorf("encountered EvUserRegionEnd without known state for current goroutine %d", curCtx.G)
 881  	}
 882  	if err := gState.endRegion(userRegion{tid, name}); err != nil {
 883  		return curCtx, false, err
 884  	}
 885  	o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
 886  	return curCtx, true, nil
 887  }
 888  
 889  // Handle the GC mark phase.
 890  //
 891  // We have sequence numbers for both start and end because they
 892  // can happen on completely different threads. We want an explicit
 893  // partial order edge between start and end here, otherwise we're
 894  // relying entirely on timestamps to make sure we don't advance a
 895  // GCEnd for a _different_ GC cycle if timestamps are wildly broken.
 896  func (o *ordering) advanceGCActive(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
 897  	seq := ev.args[0]
 898  	if gen == o.initialGen {
 899  		if o.gcState != gcUndetermined {
 900  			return curCtx, false, fmt.Errorf("GCActive in the first generation isn't first GC event")
 901  		}
 902  		o.gcSeq = seq
 903  		o.gcState = gcRunning
 904  		o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
 905  		return curCtx, true, nil
 906  	}
 907  	if seq != o.gcSeq+1 {
 908  		// This is not the right GC cycle.
 909  		return curCtx, false, nil
 910  	}
 911  	if o.gcState != gcRunning {
 912  		return curCtx, false, fmt.Errorf("encountered GCActive while GC was not in progress")
 913  	}
 914  	o.gcSeq = seq
 915  	if err := validateCtx(curCtx, userGoReqs); err != nil {
 916  		return curCtx, false, err
 917  	}
 918  	o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
 919  	return curCtx, true, nil
 920  }
 921  
 922  func (o *ordering) advanceGCBegin(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
 923  	seq := ev.args[0]
 924  	if o.gcState == gcUndetermined {
 925  		o.gcSeq = seq
 926  		o.gcState = gcRunning
 927  		o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
 928  		return curCtx, true, nil
 929  	}
 930  	if seq != o.gcSeq+1 {
 931  		// This is not the right GC cycle.
 932  		return curCtx, false, nil
 933  	}
 934  	if o.gcState == gcRunning {
 935  		return curCtx, false, fmt.Errorf("encountered GCBegin while GC was already in progress")
 936  	}
 937  	o.gcSeq = seq
 938  	o.gcState = gcRunning
 939  	if err := validateCtx(curCtx, userGoReqs); err != nil {
 940  		return curCtx, false, err
 941  	}
 942  	o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
 943  	return curCtx, true, nil
 944  }
 945  
 946  func (o *ordering) advanceGCEnd(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
 947  	seq := ev.args[0]
 948  	if seq != o.gcSeq+1 {
 949  		// This is not the right GC cycle.
 950  		return curCtx, false, nil
 951  	}
 952  	if o.gcState == gcNotRunning {
 953  		return curCtx, false, fmt.Errorf("encountered GCEnd when GC was not in progress")
 954  	}
 955  	if o.gcState == gcUndetermined {
 956  		return curCtx, false, fmt.Errorf("encountered GCEnd when GC was in an undetermined state")
 957  	}
 958  	o.gcSeq = seq
 959  	o.gcState = gcNotRunning
 960  	if err := validateCtx(curCtx, userGoReqs); err != nil {
 961  		return curCtx, false, err
 962  	}
 963  	o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
 964  	return curCtx, true, nil
 965  }
 966  
 967  func (o *ordering) advanceAnnotation(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
 968  	// Handle simple instantaneous events that require a G.
 969  	if err := validateCtx(curCtx, userGoReqs); err != nil {
 970  		return curCtx, false, err
 971  	}
 972  	o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
 973  	return curCtx, true, nil
 974  }
 975  
 976  func (o *ordering) advanceHeapMetric(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
 977  	// Handle allocation metrics, which don't require a G.
 978  	if err := validateCtx(curCtx, schedReqs{M: mustHave, P: mustHave, G: mayHave}); err != nil {
 979  		return curCtx, false, err
 980  	}
 981  	o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
 982  	return curCtx, true, nil
 983  }
 984  
 985  func (o *ordering) advanceGCSweepBegin(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
 986  	// Handle sweep, which is bound to a P and doesn't require a G.
 987  	if err := validateCtx(curCtx, schedReqs{M: mustHave, P: mustHave, G: mayHave}); err != nil {
 988  		return curCtx, false, err
 989  	}
 990  	if err := o.pStates[curCtx.P].beginRange(makeRangeType(ev.typ, 0)); err != nil {
 991  		return curCtx, false, err
 992  	}
 993  	o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
 994  	return curCtx, true, nil
 995  }
 996  
 997  func (o *ordering) advanceGCSweepActive(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
 998  	pid := ProcID(ev.args[0])
 999  	// N.B. In practice Ps can't block while they're sweeping, so this can only
1000  	// ever reference curCtx.P. However, be lenient about this like we are with
1001  	// GCMarkAssistActive; there's no reason the runtime couldn't change to block
1002  	// in the middle of a sweep.
1003  	pState, ok := o.pStates[pid]
1004  	if !ok {
1005  		return curCtx, false, fmt.Errorf("encountered GCSweepActive for unknown proc %d", pid)
1006  	}
1007  	if err := pState.activeRange(makeRangeType(ev.typ, 0), gen == o.initialGen); err != nil {
1008  		return curCtx, false, err
1009  	}
1010  	o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
1011  	return curCtx, true, nil
1012  }
1013  
1014  func (o *ordering) advanceGCSweepEnd(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
1015  	if err := validateCtx(curCtx, schedReqs{M: mustHave, P: mustHave, G: mayHave}); err != nil {
1016  		return curCtx, false, err
1017  	}
1018  	_, err := o.pStates[curCtx.P].endRange(ev.typ)
1019  	if err != nil {
1020  		return curCtx, false, err
1021  	}
1022  	o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
1023  	return curCtx, true, nil
1024  }
1025  
1026  func (o *ordering) advanceGoRangeBegin(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
1027  	// Handle special goroutine-bound event ranges.
1028  	if err := validateCtx(curCtx, userGoReqs); err != nil {
1029  		return curCtx, false, err
1030  	}
1031  	desc := stringID(0)
1032  	if ev.typ == tracev2.EvSTWBegin {
1033  		desc = stringID(ev.args[0])
1034  	}
1035  	gState, ok := o.gStates[curCtx.G]
1036  	if !ok {
1037  		return curCtx, false, fmt.Errorf("encountered event of type %d without known state for current goroutine %d", ev.typ, curCtx.G)
1038  	}
1039  	if err := gState.beginRange(makeRangeType(ev.typ, desc)); err != nil {
1040  		return curCtx, false, err
1041  	}
1042  	o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
1043  	return curCtx, true, nil
1044  }
1045  
1046  func (o *ordering) advanceGoRangeActive(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
1047  	gid := GoID(ev.args[0])
1048  	// N.B. Like GoStatus, this can happen at any time, because it can
1049  	// reference a non-running goroutine. Don't check anything about the
1050  	// current scheduler context.
1051  	gState, ok := o.gStates[gid]
1052  	if !ok {
1053  		return curCtx, false, fmt.Errorf("uninitialized goroutine %d found during %s", gid, o.evName(ev.typ))
1054  	}
1055  	if err := gState.activeRange(makeRangeType(ev.typ, 0), gen == o.initialGen); err != nil {
1056  		return curCtx, false, err
1057  	}
1058  	o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
1059  	return curCtx, true, nil
1060  }
1061  
1062  func (o *ordering) advanceGoRangeEnd(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
1063  	if err := validateCtx(curCtx, userGoReqs); err != nil {
1064  		return curCtx, false, err
1065  	}
1066  	gState, ok := o.gStates[curCtx.G]
1067  	if !ok {
1068  		return curCtx, false, fmt.Errorf("encountered event of type %d without known state for current goroutine %d", ev.typ, curCtx.G)
1069  	}
1070  	desc, err := gState.endRange(ev.typ)
1071  	if err != nil {
1072  		return curCtx, false, err
1073  	}
1074  	if ev.typ == tracev2.EvSTWEnd {
1075  		// Smuggle the kind into the event.
1076  		// Don't use ev.extra here so we have symmetry with STWBegin.
1077  		ev.args[0] = uint64(desc)
1078  	}
1079  	o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
1080  	return curCtx, true, nil
1081  }
1082  
1083  func (o *ordering) advanceAllocFree(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
1084  	// Handle simple instantaneous events that may or may not have a P.
1085  	if err := validateCtx(curCtx, schedReqs{M: mustHave, P: mayHave, G: mayHave}); err != nil {
1086  		return curCtx, false, err
1087  	}
1088  	o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
1089  	return curCtx, true, nil
1090  }
1091  
1092  // Next returns the next event in the ordering.
1093  func (o *ordering) Next() (Event, bool) {
1094  	return o.queue.pop()
1095  }
1096  
1097  // schedCtx represents the scheduling resources associated with an event.
1098  type schedCtx struct {
1099  	G GoID
1100  	P ProcID
1101  	M ThreadID
1102  }
1103  
1104  // validateCtx ensures that ctx conforms to some reqs, returning an error if
1105  // it doesn't.
1106  func validateCtx(ctx schedCtx, reqs schedReqs) error {
1107  	// Check thread requirements.
1108  	if reqs.M == mustHave && ctx.M == NoThread {
1109  		return fmt.Errorf("expected a thread but didn't have one")
1110  	} else if reqs.M == mustNotHave && ctx.M != NoThread {
1111  		return fmt.Errorf("expected no thread but had one")
1112  	}
1113  
1114  	// Check proc requirements.
1115  	if reqs.P == mustHave && ctx.P == NoProc {
1116  		return fmt.Errorf("expected a proc but didn't have one")
1117  	} else if reqs.P == mustNotHave && ctx.P != NoProc {
1118  		return fmt.Errorf("expected no proc but had one")
1119  	}
1120  
1121  	// Check goroutine requirements.
1122  	if reqs.G == mustHave && ctx.G == NoGoroutine {
1123  		return fmt.Errorf("expected a goroutine but didn't have one")
1124  	} else if reqs.G == mustNotHave && ctx.G != NoGoroutine {
1125  		return fmt.Errorf("expected no goroutine but had one")
1126  	}
1127  	return nil
1128  }
1129  
1130  // gcState is a trinary variable for the current state of the GC.
1131  //
1132  // The third state besides "enabled" and "disabled" is "undetermined."
1133  type gcState uint8
1134  
1135  const (
1136  	gcUndetermined gcState = iota
1137  	gcNotRunning
1138  	gcRunning
1139  )
1140  
1141  // String returns a human-readable string for the GC state.
1142  func (s gcState) String() string {
1143  	switch s {
1144  	case gcUndetermined:
1145  		return "Undetermined"
1146  	case gcNotRunning:
1147  		return "NotRunning"
1148  	case gcRunning:
1149  		return "Running"
1150  	}
1151  	return "Bad"
1152  }
1153  
1154  // userRegion represents a unique user region when attached to some gState.
1155  type userRegion struct {
1156  	// name must be a resolved string because the string ID for the same
1157  	// string may change across generations, but we care about checking
1158  	// the value itself.
1159  	taskID TaskID
1160  	name   []byte
1161  }
1162  
1163  // rangeType is a way to classify special ranges of time.
1164  //
1165  // These typically correspond 1:1 with "Begin" events, but
1166  // they may have an optional subtype that describes the range
1167  // in more detail.
1168  type rangeType struct {
1169  	typ  tracev2.EventType // "Begin" event.
1170  	desc stringID          // Optional subtype.
1171  }
1172  
1173  // makeRangeType constructs a new rangeType.
1174  func makeRangeType(typ tracev2.EventType, desc stringID) rangeType {
1175  	if styp := tracev2.Specs()[typ].StartEv; styp != tracev2.EvNone {
1176  		typ = styp
1177  	}
1178  	return rangeType{typ, desc}
1179  }
1180  
1181  // gState is the state of a goroutine at a point in the trace.
1182  type gState struct {
1183  	id     GoID
1184  	status tracev2.GoStatus
1185  	seq    seqCounter
1186  
1187  	// regions are the active user regions for this goroutine.
1188  	regions []userRegion
1189  
1190  	// rangeState is the state of special time ranges bound to this goroutine.
1191  	rangeState
1192  }
1193  
1194  // beginRegion starts a user region on the goroutine.
1195  func (s *gState) beginRegion(r userRegion) error {
1196  	s.regions = append(s.regions, r)
1197  	return nil
1198  }
1199  
1200  // endRegion ends a user region on the goroutine.
1201  func (s *gState) endRegion(r userRegion) error {
1202  	if len(s.regions) == 0 {
1203  		// We do not know about regions that began before tracing started.
1204  		return nil
1205  	}
1206  	if next := s.regions[len(s.regions)-1]; next != r {
1207  		return fmt.Errorf("misuse of region in goroutine %v: region end %v when the inner-most active region start event is %v", s.id, r, next)
1208  	}
1209  	s.regions = s.regions[:len(s.regions)-1]
1210  	return nil
1211  }
1212  
1213  // pState is the state of a proc at a point in the trace.
1214  type pState struct {
1215  	id     ProcID
1216  	status tracev2.ProcStatus
1217  	seq    seqCounter
1218  
1219  	// rangeState is the state of special time ranges bound to this proc.
1220  	rangeState
1221  }
1222  
1223  // mState is the state of a thread at a point in the trace.
1224  type mState struct {
1225  	g GoID   // Goroutine bound to this M. (The goroutine's state is Executing.)
1226  	p ProcID // Proc bound to this M. (The proc's state is Executing.)
1227  }
1228  
1229  // rangeState represents the state of special time ranges.
1230  type rangeState struct {
1231  	// inFlight contains the rangeTypes of any ranges bound to a resource.
1232  	inFlight []rangeType
1233  }
1234  
1235  // beginRange begins a special range in time on the goroutine.
1236  //
1237  // Returns an error if the range is already in progress.
1238  func (s *rangeState) beginRange(typ rangeType) error {
1239  	if s.hasRange(typ) {
1240  		return fmt.Errorf("discovered event already in-flight for when starting event %v", tracev2.Specs()[typ.typ].Name)
1241  	}
1242  	s.inFlight = append(s.inFlight, typ)
1243  	return nil
1244  }
1245  
1246  // activeRange marks special range in time on the goroutine as active in the
1247  // initial generation, or confirms that it is indeed active in later generations.
1248  func (s *rangeState) activeRange(typ rangeType, isInitialGen bool) error {
1249  	if isInitialGen {
1250  		if s.hasRange(typ) {
1251  			return fmt.Errorf("found named active range already in first gen: %v", typ)
1252  		}
1253  		s.inFlight = append(s.inFlight, typ)
1254  	} else if !s.hasRange(typ) {
1255  		return fmt.Errorf("resource is missing active range: %v %v", tracev2.Specs()[typ.typ].Name, s.inFlight)
1256  	}
1257  	return nil
1258  }
1259  
1260  // hasRange returns true if a special time range on the goroutine as in progress.
1261  func (s *rangeState) hasRange(typ rangeType) bool {
1262  	return slices.Contains(s.inFlight, typ)
1263  }
1264  
1265  // endRange ends a special range in time on the goroutine.
1266  //
1267  // This must line up with the start event type  of the range the goroutine is currently in.
1268  func (s *rangeState) endRange(typ tracev2.EventType) (stringID, error) {
1269  	st := tracev2.Specs()[typ].StartEv
1270  	idx := -1
1271  	for i, r := range s.inFlight {
1272  		if r.typ == st {
1273  			idx = i
1274  			break
1275  		}
1276  	}
1277  	if idx < 0 {
1278  		return 0, fmt.Errorf("tried to end event %v, but not in-flight", tracev2.Specs()[st].Name)
1279  	}
1280  	// Swap remove.
1281  	desc := s.inFlight[idx].desc
1282  	s.inFlight[idx], s.inFlight[len(s.inFlight)-1] = s.inFlight[len(s.inFlight)-1], s.inFlight[idx]
1283  	s.inFlight = s.inFlight[:len(s.inFlight)-1]
1284  	return desc, nil
1285  }
1286  
1287  // seqCounter represents a global sequence counter for a resource.
1288  type seqCounter struct {
1289  	gen uint64 // The generation for the local sequence counter seq.
1290  	seq uint64 // The sequence number local to the generation.
1291  }
1292  
1293  // makeSeq creates a new seqCounter.
1294  func makeSeq(gen, seq uint64) seqCounter {
1295  	return seqCounter{gen: gen, seq: seq}
1296  }
1297  
1298  // succeeds returns true if a is the immediate successor of b.
1299  func (a seqCounter) succeeds(b seqCounter) bool {
1300  	return a.gen == b.gen && a.seq == b.seq+1
1301  }
1302  
1303  // String returns a debug string representation of the seqCounter.
1304  func (c seqCounter) String() string {
1305  	return fmt.Sprintf("%d (gen=%d)", c.seq, c.gen)
1306  }
1307  
1308  func dumpOrdering(order *ordering) []byte {
1309  	var sb bytes.Buffer
1310  	for id, state := range order.gStates {
1311  		fmt.Fprintf(&sb, "G %d [status=%s seq=%s]\n", id, state.status, state.seq)
1312  	}
1313  	fmt.Fprintln(&sb)
1314  	for id, state := range order.pStates {
1315  		fmt.Fprintf(&sb, "P %d [status=%s seq=%s]\n", id, state.status, state.seq)
1316  	}
1317  	fmt.Fprintln(&sb)
1318  	for id, state := range order.mStates {
1319  		fmt.Fprintf(&sb, "M %d [g=%d p=%d]\n", id, state.g, state.p)
1320  	}
1321  	fmt.Fprintln(&sb)
1322  	fmt.Fprintf(&sb, "GC %d %s\n", order.gcSeq, order.gcState)
1323  	return sb.String()
1324  }
1325  
1326  // taskState represents an active task.
1327  type taskState struct {
1328  	// name is the type of the active task.
1329  	name []byte
1330  
1331  	// parentID is the parent ID of the active task.
1332  	parentID TaskID
1333  }
1334  
1335  // queue implements a growable ring buffer with a queue API.
1336  type queue[T any] struct {
1337  	start, end int
1338  	buf        []T
1339  }
1340  
1341  // push adds a new event to the back of the queue.
1342  func (q *queue[T]) push(value T) {
1343  	if q.end-q.start == len(q.buf) {
1344  		q.grow()
1345  	}
1346  	q.buf[q.end%len(q.buf)] = value
1347  	q.end++
1348  }
1349  
1350  // grow increases the size of the queue.
1351  func (q *queue[T]) grow() {
1352  	if len(q.buf) == 0 {
1353  		q.buf = []T{:2}
1354  		return
1355  	}
1356  
1357  	// Create new buf and copy data over.
1358  	newBuf := []T{:len(q.buf)*2}
1359  	pivot := q.start % len(q.buf)
1360  	first, last := q.buf[pivot:], q.buf[:pivot]
1361  	copy(newBuf[:len(first)], first)
1362  	copy(newBuf[len(first):], last)
1363  
1364  	// Update the queue state.
1365  	q.start = 0
1366  	q.end = len(q.buf)
1367  	q.buf = newBuf
1368  }
1369  
1370  // pop removes an event from the front of the queue. If the
1371  // queue is empty, it returns an EventBad event.
1372  func (q *queue[T]) pop() (T, bool) {
1373  	if q.end-q.start == 0 {
1374  		var zero T
1375  		return zero, false
1376  	}
1377  	elem := &q.buf[q.start%len(q.buf)]
1378  	value := *elem
1379  	var zero T
1380  	*elem = zero // Clear the entry before returning, so we don't hold onto old tables.
1381  	q.start++
1382  	return value, true
1383  }
1384  
1385  // makeEvent creates an Event from the provided information.
1386  //
1387  // It's just a convenience function; it's always OK to construct
1388  // an Event manually if this isn't quite the right way to express
1389  // the contents of the event.
1390  func makeEvent(table *evTable, ctx schedCtx, typ tracev2.EventType, time Time, args ...uint64) Event {
1391  	ev := Event{
1392  		table: table,
1393  		ctx:   ctx,
1394  		base: baseEvent{
1395  			typ:  typ,
1396  			time: time,
1397  		},
1398  	}
1399  	copy(ev.base.args[:], args)
1400  	return ev
1401  }
1402  
1403  // schedReqs is a set of constraints on what the scheduling
1404  // context must look like.
1405  type schedReqs struct {
1406  	M constraint
1407  	P constraint
1408  	G constraint
1409  }
1410  
1411  // constraint represents a various presence requirements.
1412  type constraint uint8
1413  
1414  const (
1415  	mustNotHave constraint = iota
1416  	mayHave
1417  	mustHave
1418  )
1419  
1420  // userGoReqs is a common requirement among events that are running
1421  // or are close to running user code.
1422  var userGoReqs = schedReqs{M: mustHave, P: mustHave, G: mustHave}
1423