1 // Copyright 2023 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4 5 package trace
6 7 import (
8 "fmt"
9 "slices"
10 "bytes"
11 12 "internal/trace/tracev2"
13 "internal/trace/version"
14 )
15 16 // ordering emulates Go scheduler state for both validation and
17 // for putting events in the right order.
18 //
19 // The interface to ordering consists of two methods: Advance
20 // and Next. Advance is called to try and advance an event and
21 // add completed events to the ordering. Next is used to pick
22 // off events in the ordering.
23 type ordering struct {
24 traceVer version.Version
25 gStates map[GoID]*gState
26 pStates map[ProcID]*pState // TODO: The keys are dense, so this can be a slice.
27 mStates map[ThreadID]*mState
28 activeTasks map[TaskID]taskState
29 gcSeq uint64
30 gcState gcState
31 initialGen uint64
32 queue queue[Event]
33 }
34 35 // Advance checks if it's valid to proceed with ev which came from thread m.
36 //
37 // It assumes the gen value passed to it is monotonically increasing across calls.
38 //
39 // If any error is returned, then the trace is broken and trace parsing must cease.
40 // If it's not valid to advance with ev, but no error was encountered, the caller
41 // should attempt to advance with other candidate events from other threads. If the
42 // caller runs out of candidates, the trace is invalid.
43 //
44 // If this returns true, Next is guaranteed to return a complete event. However,
45 // multiple events may be added to the ordering, so the caller should (but is not
46 // required to) continue to call Next until it is exhausted.
47 func (o *ordering) Advance(ev *baseEvent, evt *evTable, m ThreadID, gen uint64) (bool, error) {
48 if o.initialGen == 0 {
49 // Set the initial gen if necessary.
50 o.initialGen = gen
51 }
52 53 var curCtx, newCtx schedCtx
54 curCtx.M = m
55 newCtx.M = m
56 57 var ms *mState
58 if m == NoThread {
59 curCtx.P = NoProc
60 curCtx.G = NoGoroutine
61 newCtx = curCtx
62 } else {
63 // Pull out or create the mState for this event.
64 var ok bool
65 ms, ok = o.mStates[m]
66 if !ok {
67 ms = &mState{
68 g: NoGoroutine,
69 p: NoProc,
70 }
71 o.mStates[m] = ms
72 }
73 curCtx.P = ms.p
74 curCtx.G = ms.g
75 newCtx = curCtx
76 }
77 78 f := orderingDispatch[ev.typ]
79 if f == nil {
80 return false, fmt.Errorf("bad event type found while ordering: %v", ev.typ)
81 }
82 newCtx, ok, err := f(o, ev, evt, m, gen, curCtx)
83 if err == nil && ok && ms != nil {
84 // Update the mState for this event.
85 ms.p = newCtx.P
86 ms.g = newCtx.G
87 }
88 return ok, err
89 }
90 91 func (o *ordering) evName(typ tracev2.EventType) []byte {
92 return o.traceVer.EventName(typ)
93 }
94 95 type orderingHandleFunc func(o *ordering, ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error)
96 97 var orderingDispatch = [256]orderingHandleFunc{
98 // Procs.
99 tracev2.EvProcsChange: (*ordering).advanceAnnotation,
100 tracev2.EvProcStart: (*ordering).advanceProcStart,
101 tracev2.EvProcStop: (*ordering).advanceProcStop,
102 tracev2.EvProcSteal: (*ordering).advanceProcSteal,
103 tracev2.EvProcStatus: (*ordering).advanceProcStatus,
104 105 // Goroutines.
106 tracev2.EvGoCreate: (*ordering).advanceGoCreate,
107 tracev2.EvGoCreateSyscall: (*ordering).advanceGoCreateSyscall,
108 tracev2.EvGoStart: (*ordering).advanceGoStart,
109 tracev2.EvGoDestroy: (*ordering).advanceGoStopExec,
110 tracev2.EvGoDestroySyscall: (*ordering).advanceGoDestroySyscall,
111 tracev2.EvGoStop: (*ordering).advanceGoStopExec,
112 tracev2.EvGoBlock: (*ordering).advanceGoStopExec,
113 tracev2.EvGoUnblock: (*ordering).advanceGoUnblock,
114 tracev2.EvGoSyscallBegin: (*ordering).advanceGoSyscallBegin,
115 tracev2.EvGoSyscallEnd: (*ordering).advanceGoSyscallEnd,
116 tracev2.EvGoSyscallEndBlocked: (*ordering).advanceGoSyscallEndBlocked,
117 tracev2.EvGoStatus: (*ordering).advanceGoStatus,
118 119 // STW.
120 tracev2.EvSTWBegin: (*ordering).advanceGoRangeBegin,
121 tracev2.EvSTWEnd: (*ordering).advanceGoRangeEnd,
122 123 // GC events.
124 tracev2.EvGCActive: (*ordering).advanceGCActive,
125 tracev2.EvGCBegin: (*ordering).advanceGCBegin,
126 tracev2.EvGCEnd: (*ordering).advanceGCEnd,
127 tracev2.EvGCSweepActive: (*ordering).advanceGCSweepActive,
128 tracev2.EvGCSweepBegin: (*ordering).advanceGCSweepBegin,
129 tracev2.EvGCSweepEnd: (*ordering).advanceGCSweepEnd,
130 tracev2.EvGCMarkAssistActive: (*ordering).advanceGoRangeActive,
131 tracev2.EvGCMarkAssistBegin: (*ordering).advanceGoRangeBegin,
132 tracev2.EvGCMarkAssistEnd: (*ordering).advanceGoRangeEnd,
133 tracev2.EvHeapAlloc: (*ordering).advanceHeapMetric,
134 tracev2.EvHeapGoal: (*ordering).advanceHeapMetric,
135 136 // Annotations.
137 tracev2.EvGoLabel: (*ordering).advanceAnnotation,
138 tracev2.EvUserTaskBegin: (*ordering).advanceUserTaskBegin,
139 tracev2.EvUserTaskEnd: (*ordering).advanceUserTaskEnd,
140 tracev2.EvUserRegionBegin: (*ordering).advanceUserRegionBegin,
141 tracev2.EvUserRegionEnd: (*ordering).advanceUserRegionEnd,
142 tracev2.EvUserLog: (*ordering).advanceAnnotation,
143 144 // Coroutines. Added in Go 1.23.
145 tracev2.EvGoSwitch: (*ordering).advanceGoSwitch,
146 tracev2.EvGoSwitchDestroy: (*ordering).advanceGoSwitch,
147 tracev2.EvGoCreateBlocked: (*ordering).advanceGoCreate,
148 149 // GoStatus event with a stack. Added in Go 1.23.
150 tracev2.EvGoStatusStack: (*ordering).advanceGoStatus,
151 152 // Experimental events.
153 154 // Experimental heap span events. Added in Go 1.23.
155 tracev2.EvSpan: (*ordering).advanceAllocFree,
156 tracev2.EvSpanAlloc: (*ordering).advanceAllocFree,
157 tracev2.EvSpanFree: (*ordering).advanceAllocFree,
158 159 // Experimental heap object events. Added in Go 1.23.
160 tracev2.EvHeapObject: (*ordering).advanceAllocFree,
161 tracev2.EvHeapObjectAlloc: (*ordering).advanceAllocFree,
162 tracev2.EvHeapObjectFree: (*ordering).advanceAllocFree,
163 164 // Experimental goroutine stack events. Added in Go 1.23.
165 tracev2.EvGoroutineStack: (*ordering).advanceAllocFree,
166 tracev2.EvGoroutineStackAlloc: (*ordering).advanceAllocFree,
167 tracev2.EvGoroutineStackFree: (*ordering).advanceAllocFree,
168 }
169 170 func (o *ordering) advanceProcStatus(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
171 pid := ProcID(ev.args[0])
172 status := tracev2.ProcStatus(ev.args[1])
173 if int(status) >= len(tracev2ProcStatus2ProcState) {
174 return curCtx, false, fmt.Errorf("invalid status for proc %d: %d", pid, status)
175 }
176 oldState := tracev2ProcStatus2ProcState[status]
177 if s, ok := o.pStates[pid]; ok {
178 if status == tracev2.ProcSyscallAbandoned && s.status == tracev2.ProcSyscall {
179 // ProcSyscallAbandoned is a special case of ProcSyscall. It indicates a
180 // potential loss of information, but if we're already in ProcSyscall,
181 // we haven't lost the relevant information. Promote the status and advance.
182 oldState = ProcRunning
183 ev.args[1] = uint64(tracev2.ProcSyscall)
184 } else if status == tracev2.ProcSyscallAbandoned && s.status == tracev2.ProcSyscallAbandoned {
185 // If we're passing through ProcSyscallAbandoned, then there's no promotion
186 // to do. We've lost the M that this P is associated with. However it got there,
187 // it's going to appear as idle in the API, so pass through as idle.
188 oldState = ProcIdle
189 ev.args[1] = uint64(tracev2.ProcSyscallAbandoned)
190 } else if s.status != status {
191 return curCtx, false, fmt.Errorf("inconsistent status for proc %d: old %v vs. new %v", pid, s.status, status)
192 }
193 s.seq = makeSeq(gen, 0) // Reset seq.
194 } else {
195 o.pStates[pid] = &pState{id: pid, status: status, seq: makeSeq(gen, 0)}
196 if gen == o.initialGen {
197 oldState = ProcUndetermined
198 } else {
199 oldState = ProcNotExist
200 }
201 }
202 ev.extra(version.Go122)[0] = uint64(oldState) // Smuggle in the old state for StateTransition.
203 204 // Bind the proc to the new context, if it's running.
205 newCtx := curCtx
206 if status == tracev2.ProcRunning || status == tracev2.ProcSyscall {
207 newCtx.P = pid
208 }
209 // If we're advancing through ProcSyscallAbandoned *but* oldState is running then we've
210 // promoted it to ProcSyscall. However, because it's ProcSyscallAbandoned, we know this
211 // P is about to get stolen and its status very likely isn't being emitted by the same
212 // thread it was bound to. Since this status is Running -> Running and Running is binding,
213 // we need to make sure we emit it in the right context: the context to which it is bound.
214 // Find it, and set our current context to it.
215 if status == tracev2.ProcSyscallAbandoned && oldState == ProcRunning {
216 // N.B. This is slow but it should be fairly rare.
217 found := false
218 for mid, ms := range o.mStates {
219 if ms.p == pid {
220 curCtx.M = mid
221 curCtx.P = pid
222 curCtx.G = ms.g
223 found = true
224 }
225 }
226 if !found {
227 return curCtx, false, fmt.Errorf("failed to find sched context for proc %d that's about to be stolen", pid)
228 }
229 }
230 o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
231 return newCtx, true, nil
232 }
233 234 func (o *ordering) advanceProcStart(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
235 pid := ProcID(ev.args[0])
236 seq := makeSeq(gen, ev.args[1])
237 238 // Try to advance. We might fail here due to sequencing, because the P hasn't
239 // had a status emitted, or because we already have a P and we're in a syscall,
240 // and we haven't observed that it was stolen from us yet.
241 state, ok := o.pStates[pid]
242 if !ok || state.status != tracev2.ProcIdle || !seq.succeeds(state.seq) || curCtx.P != NoProc {
243 // We can't make an inference as to whether this is bad. We could just be seeing
244 // a ProcStart on a different M before the proc's state was emitted, or before we
245 // got to the right point in the trace.
246 //
247 // Note that we also don't advance here if we have a P and we're in a syscall.
248 return curCtx, false, nil
249 }
250 // We can advance this P. Check some invariants.
251 //
252 // We might have a goroutine if a goroutine is exiting a syscall.
253 reqs := schedReqs{M: mustHave, P: mustNotHave, G: mayHave}
254 if err := validateCtx(curCtx, reqs); err != nil {
255 return curCtx, false, err
256 }
257 state.status = tracev2.ProcRunning
258 state.seq = seq
259 newCtx := curCtx
260 newCtx.P = pid
261 o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
262 return newCtx, true, nil
263 }
264 265 func (o *ordering) advanceProcStop(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
266 // We must be able to advance this P.
267 //
268 // There are 2 ways a P can stop: ProcStop and ProcSteal. ProcStop is used when the P
269 // is stopped by the same M that started it, while ProcSteal is used when another M
270 // steals the P by stopping it from a distance.
271 //
272 // Since a P is bound to an M, and we're stopping on the same M we started, it must
273 // always be possible to advance the current M's P from a ProcStop. This is also why
274 // ProcStop doesn't need a sequence number.
275 state, ok := o.pStates[curCtx.P]
276 if !ok {
277 return curCtx, false, fmt.Errorf("event %s for proc (%v) that doesn't exist", o.evName(ev.typ), curCtx.P)
278 }
279 if state.status != tracev2.ProcRunning && state.status != tracev2.ProcSyscall {
280 return curCtx, false, fmt.Errorf("%s event for proc that's not %s or %s", o.evName(ev.typ), tracev2.ProcRunning, tracev2.ProcSyscall)
281 }
282 reqs := schedReqs{M: mustHave, P: mustHave, G: mayHave}
283 if err := validateCtx(curCtx, reqs); err != nil {
284 return curCtx, false, err
285 }
286 state.status = tracev2.ProcIdle
287 newCtx := curCtx
288 newCtx.P = NoProc
289 o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
290 return newCtx, true, nil
291 }
292 293 func (o *ordering) advanceProcSteal(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
294 pid := ProcID(ev.args[0])
295 seq := makeSeq(gen, ev.args[1])
296 state, ok := o.pStates[pid]
297 if !ok || (state.status != tracev2.ProcSyscall && state.status != tracev2.ProcSyscallAbandoned) || !seq.succeeds(state.seq) {
298 // We can't make an inference as to whether this is bad. We could just be seeing
299 // a ProcStart on a different M before the proc's state was emitted, or before we
300 // got to the right point in the trace.
301 return curCtx, false, nil
302 }
303 // We can advance this P. Check some invariants.
304 reqs := schedReqs{M: mustHave, P: mayHave, G: mayHave}
305 if err := validateCtx(curCtx, reqs); err != nil {
306 return curCtx, false, err
307 }
308 // Smuggle in the P state that let us advance so we can surface information to the event.
309 // Specifically, we need to make sure that the event is interpreted not as a transition of
310 // ProcRunning -> ProcIdle but ProcIdle -> ProcIdle instead.
311 //
312 // ProcRunning is binding, but we may be running with a P on the current M and we can't
313 // bind another P. This P is about to go ProcIdle anyway.
314 oldStatus := state.status
315 ev.extra(version.Go122)[0] = uint64(oldStatus)
316 317 // Update the P's status and sequence number.
318 state.status = tracev2.ProcIdle
319 state.seq = seq
320 321 // If we've lost information then don't try to do anything with the M.
322 // It may have moved on and we can't be sure.
323 if oldStatus == tracev2.ProcSyscallAbandoned {
324 o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
325 return curCtx, true, nil
326 }
327 328 // Validate that the M we're stealing from is what we expect.
329 mid := ThreadID(ev.args[2]) // The M we're stealing from.
330 331 newCtx := curCtx
332 if mid == curCtx.M {
333 // We're stealing from ourselves. This behaves like a ProcStop.
334 if curCtx.P != pid {
335 return curCtx, false, fmt.Errorf("tried to self-steal proc %d (thread %d), but got proc %d instead", pid, mid, curCtx.P)
336 }
337 newCtx.P = NoProc
338 o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
339 return newCtx, true, nil
340 }
341 342 // We're stealing from some other M.
343 mState, ok := o.mStates[mid]
344 if !ok {
345 return curCtx, false, fmt.Errorf("stole proc from non-existent thread %d", mid)
346 }
347 348 // Make sure we're actually stealing the right P.
349 if mState.p != pid {
350 return curCtx, false, fmt.Errorf("tried to steal proc %d from thread %d, but got proc %d instead", pid, mid, mState.p)
351 }
352 353 // Tell the M it has no P so it can proceed.
354 //
355 // This is safe because we know the P was in a syscall and
356 // the other M must be trying to get out of the syscall.
357 // GoSyscallEndBlocked cannot advance until the corresponding
358 // M loses its P.
359 mState.p = NoProc
360 o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
361 return newCtx, true, nil
362 }
363 364 func (o *ordering) advanceGoStatus(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
365 gid := GoID(ev.args[0])
366 mid := ThreadID(ev.args[1])
367 status := tracev2.GoStatus(ev.args[2])
368 369 if int(status) >= len(tracev2GoStatus2GoState) {
370 return curCtx, false, fmt.Errorf("invalid status for goroutine %d: %d", gid, status)
371 }
372 oldState := tracev2GoStatus2GoState[status]
373 if s, ok := o.gStates[gid]; ok {
374 if s.status != status {
375 return curCtx, false, fmt.Errorf("inconsistent status for goroutine %d: old %v vs. new %v", gid, s.status, status)
376 }
377 s.seq = makeSeq(gen, 0) // Reset seq.
378 } else if gen == o.initialGen {
379 // Set the state.
380 o.gStates[gid] = &gState{id: gid, status: status, seq: makeSeq(gen, 0)}
381 oldState = GoUndetermined
382 } else {
383 return curCtx, false, fmt.Errorf("found goroutine status for new goroutine after the first generation: id=%v status=%v", gid, status)
384 }
385 ev.args[2] = uint64(oldState)<<32 | uint64(status) // Smuggle in the old state for StateTransition.
386 387 newCtx := curCtx
388 switch status {
389 case tracev2.GoRunning:
390 // Bind the goroutine to the new context, since it's running.
391 newCtx.G = gid
392 case tracev2.GoSyscall:
393 if mid == NoThread {
394 return curCtx, false, fmt.Errorf("found goroutine %d in syscall without a thread", gid)
395 }
396 // Is the syscall on this thread? If so, bind it to the context.
397 // Otherwise, we're talking about a G sitting in a syscall on an M.
398 // Validate the named M.
399 if mid == curCtx.M {
400 if gen != o.initialGen && curCtx.G != gid {
401 // If this isn't the first generation, we *must* have seen this
402 // binding occur already. Even if the G was blocked in a syscall
403 // for multiple generations since trace start, we would have seen
404 // a previous GoStatus event that bound the goroutine to an M.
405 return curCtx, false, fmt.Errorf("inconsistent thread for syscalling goroutine %d: thread has goroutine %d", gid, curCtx.G)
406 }
407 newCtx.G = gid
408 break
409 }
410 // Now we're talking about a thread and goroutine that have been
411 // blocked on a syscall for the entire generation. This case must
412 // not have a P; the runtime makes sure that all Ps are traced at
413 // the beginning of a generation, which involves taking a P back
414 // from every thread.
415 ms, ok := o.mStates[mid]
416 if ok {
417 // This M has been seen. That means we must have seen this
418 // goroutine go into a syscall on this thread at some point.
419 if ms.g != gid {
420 // But the G on the M doesn't match. Something's wrong.
421 return curCtx, false, fmt.Errorf("inconsistent thread for syscalling goroutine %d: thread has goroutine %d", gid, ms.g)
422 }
423 // This case is just a Syscall->Syscall event, which needs to
424 // appear as having the G currently bound to this M.
425 curCtx.G = ms.g
426 } else if !ok {
427 // The M hasn't been seen yet. That means this goroutine
428 // has just been sitting in a syscall on this M. Create
429 // a state for it.
430 o.mStates[mid] = &mState{g: gid, p: NoProc}
431 // Don't set curCtx.G in this case because this event is the
432 // binding event (and curCtx represents the "before" state).
433 }
434 // Update the current context to the M we're talking about.
435 curCtx.M = mid
436 }
437 o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
438 return newCtx, true, nil
439 }
440 441 func (o *ordering) advanceGoCreate(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
442 // Goroutines must be created on a running P, but may or may not be created
443 // by a running goroutine.
444 reqs := schedReqs{M: mustHave, P: mustHave, G: mayHave}
445 if err := validateCtx(curCtx, reqs); err != nil {
446 return curCtx, false, err
447 }
448 // If we have a goroutine, it must be running.
449 if state, ok := o.gStates[curCtx.G]; ok && state.status != tracev2.GoRunning {
450 return curCtx, false, fmt.Errorf("%s event for goroutine that's not %s", o.evName(ev.typ), GoRunning)
451 }
452 // This goroutine created another. Add a state for it.
453 newgid := GoID(ev.args[0])
454 if _, ok := o.gStates[newgid]; ok {
455 return curCtx, false, fmt.Errorf("tried to create goroutine (%v) that already exists", newgid)
456 }
457 status := tracev2.GoRunnable
458 if ev.typ == tracev2.EvGoCreateBlocked {
459 status = tracev2.GoWaiting
460 }
461 o.gStates[newgid] = &gState{id: newgid, status: status, seq: makeSeq(gen, 0)}
462 o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
463 return curCtx, true, nil
464 }
465 466 func (o *ordering) advanceGoStopExec(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
467 // These are goroutine events that all require an active running
468 // goroutine on some thread. They must *always* be advance-able,
469 // since running goroutines are bound to their M.
470 if err := validateCtx(curCtx, userGoReqs); err != nil {
471 return curCtx, false, err
472 }
473 state, ok := o.gStates[curCtx.G]
474 if !ok {
475 return curCtx, false, fmt.Errorf("event %s for goroutine (%v) that doesn't exist", o.evName(ev.typ), curCtx.G)
476 }
477 if state.status != tracev2.GoRunning {
478 return curCtx, false, fmt.Errorf("%s event for goroutine that's not %s", o.evName(ev.typ), GoRunning)
479 }
480 // Handle each case slightly differently; we just group them together
481 // because they have shared preconditions.
482 newCtx := curCtx
483 switch ev.typ {
484 case tracev2.EvGoDestroy:
485 // This goroutine is exiting itself.
486 delete(o.gStates, curCtx.G)
487 newCtx.G = NoGoroutine
488 case tracev2.EvGoStop:
489 // Goroutine stopped (yielded). It's runnable but not running on this M.
490 state.status = tracev2.GoRunnable
491 newCtx.G = NoGoroutine
492 case tracev2.EvGoBlock:
493 // Goroutine blocked. It's waiting now and not running on this M.
494 state.status = tracev2.GoWaiting
495 newCtx.G = NoGoroutine
496 }
497 o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
498 return newCtx, true, nil
499 }
500 501 func (o *ordering) advanceGoStart(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
502 gid := GoID(ev.args[0])
503 seq := makeSeq(gen, ev.args[1])
504 state, ok := o.gStates[gid]
505 if !ok || state.status != tracev2.GoRunnable || !seq.succeeds(state.seq) {
506 // We can't make an inference as to whether this is bad. We could just be seeing
507 // a GoStart on a different M before the goroutine was created, before it had its
508 // state emitted, or before we got to the right point in the trace yet.
509 return curCtx, false, nil
510 }
511 // We can advance this goroutine. Check some invariants.
512 reqs := schedReqs{M: mustHave, P: mustHave, G: mustNotHave}
513 if err := validateCtx(curCtx, reqs); err != nil {
514 return curCtx, false, err
515 }
516 state.status = tracev2.GoRunning
517 state.seq = seq
518 newCtx := curCtx
519 newCtx.G = gid
520 o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
521 return newCtx, true, nil
522 }
523 524 func (o *ordering) advanceGoUnblock(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
525 // N.B. These both reference the goroutine to unblock, not the current goroutine.
526 gid := GoID(ev.args[0])
527 seq := makeSeq(gen, ev.args[1])
528 state, ok := o.gStates[gid]
529 if !ok || state.status != tracev2.GoWaiting || !seq.succeeds(state.seq) {
530 // We can't make an inference as to whether this is bad. We could just be seeing
531 // a GoUnblock on a different M before the goroutine was created and blocked itself,
532 // before it had its state emitted, or before we got to the right point in the trace yet.
533 return curCtx, false, nil
534 }
535 state.status = tracev2.GoRunnable
536 state.seq = seq
537 // N.B. No context to validate. Basically anything can unblock
538 // a goroutine (e.g. sysmon).
539 o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
540 return curCtx, true, nil
541 }
542 543 func (o *ordering) advanceGoSwitch(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
544 // GoSwitch and GoSwitchDestroy represent a trio of events:
545 // - Unblock of the goroutine to switch to.
546 // - Block or destroy of the current goroutine.
547 // - Start executing the next goroutine.
548 //
549 // Because it acts like a GoStart for the next goroutine, we can
550 // only advance it if the sequence numbers line up.
551 //
552 // The current goroutine on the thread must be actively running.
553 if err := validateCtx(curCtx, userGoReqs); err != nil {
554 return curCtx, false, err
555 }
556 curGState, ok := o.gStates[curCtx.G]
557 if !ok {
558 return curCtx, false, fmt.Errorf("event %s for goroutine (%v) that doesn't exist", o.evName(ev.typ), curCtx.G)
559 }
560 if curGState.status != tracev2.GoRunning {
561 return curCtx, false, fmt.Errorf("%s event for goroutine that's not %s", o.evName(ev.typ), GoRunning)
562 }
563 nextg := GoID(ev.args[0])
564 seq := makeSeq(gen, ev.args[1]) // seq is for nextg, not curCtx.G.
565 nextGState, ok := o.gStates[nextg]
566 if !ok || nextGState.status != tracev2.GoWaiting || !seq.succeeds(nextGState.seq) {
567 // We can't make an inference as to whether this is bad. We could just be seeing
568 // a GoSwitch on a different M before the goroutine was created, before it had its
569 // state emitted, or before we got to the right point in the trace yet.
570 return curCtx, false, nil
571 }
572 o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
573 574 // Update the state of the executing goroutine and emit an event for it
575 // (GoSwitch and GoSwitchDestroy will be interpreted as GoUnblock events
576 // for nextg).
577 switch ev.typ {
578 case tracev2.EvGoSwitch:
579 // Goroutine blocked. It's waiting now and not running on this M.
580 curGState.status = tracev2.GoWaiting
581 582 // Emit a GoBlock event.
583 // TODO(mknyszek): Emit a reason.
584 o.queue.push(makeEvent(evt, curCtx, tracev2.EvGoBlock, ev.time, 0 /* no reason */, 0 /* no stack */))
585 case tracev2.EvGoSwitchDestroy:
586 // This goroutine is exiting itself.
587 delete(o.gStates, curCtx.G)
588 589 // Emit a GoDestroy event.
590 o.queue.push(makeEvent(evt, curCtx, tracev2.EvGoDestroy, ev.time))
591 }
592 // Update the state of the next goroutine.
593 nextGState.status = tracev2.GoRunning
594 nextGState.seq = seq
595 newCtx := curCtx
596 newCtx.G = nextg
597 598 // Queue an event for the next goroutine starting to run.
599 startCtx := curCtx
600 startCtx.G = NoGoroutine
601 o.queue.push(makeEvent(evt, startCtx, tracev2.EvGoStart, ev.time, uint64(nextg), ev.args[1]))
602 return newCtx, true, nil
603 }
604 605 func (o *ordering) advanceGoSyscallBegin(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
606 // Entering a syscall requires an active running goroutine with a
607 // proc on some thread. It is always advancable.
608 if err := validateCtx(curCtx, userGoReqs); err != nil {
609 return curCtx, false, err
610 }
611 state, ok := o.gStates[curCtx.G]
612 if !ok {
613 return curCtx, false, fmt.Errorf("event %s for goroutine (%v) that doesn't exist", o.evName(ev.typ), curCtx.G)
614 }
615 if state.status != tracev2.GoRunning {
616 return curCtx, false, fmt.Errorf("%s event for goroutine that's not %s", o.evName(ev.typ), GoRunning)
617 }
618 // Goroutine entered a syscall. It's still running on this P and M.
619 state.status = tracev2.GoSyscall
620 pState, ok := o.pStates[curCtx.P]
621 if !ok {
622 return curCtx, false, fmt.Errorf("uninitialized proc %d found during %s", curCtx.P, o.evName(ev.typ))
623 }
624 pState.status = tracev2.ProcSyscall
625 // Validate the P sequence number on the event and advance it.
626 //
627 // We have a P sequence number for what is supposed to be a goroutine event
628 // so that we can correctly model P stealing. Without this sequence number here,
629 // the syscall from which a ProcSteal event is stealing can be ambiguous in the
630 // face of broken timestamps. See the go122-syscall-steal-proc-ambiguous test for
631 // more details.
632 //
633 // Note that because this sequence number only exists as a tool for disambiguation,
634 // we can enforce that we have the right sequence number at this point; we don't need
635 // to back off and see if any other events will advance. This is a running P.
636 pSeq := makeSeq(gen, ev.args[0])
637 if !pSeq.succeeds(pState.seq) {
638 return curCtx, false, fmt.Errorf("failed to advance %s: can't make sequence: %s -> %s", o.evName(ev.typ), pState.seq, pSeq)
639 }
640 pState.seq = pSeq
641 o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
642 return curCtx, true, nil
643 }
644 645 func (o *ordering) advanceGoSyscallEnd(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
646 // This event is always advance-able because it happens on the same
647 // thread that EvGoSyscallStart happened, and the goroutine can't leave
648 // that thread until its done.
649 if err := validateCtx(curCtx, userGoReqs); err != nil {
650 return curCtx, false, err
651 }
652 state, ok := o.gStates[curCtx.G]
653 if !ok {
654 return curCtx, false, fmt.Errorf("event %s for goroutine (%v) that doesn't exist", o.evName(ev.typ), curCtx.G)
655 }
656 if state.status != tracev2.GoSyscall {
657 return curCtx, false, fmt.Errorf("%s event for goroutine that's not %s", o.evName(ev.typ), GoRunning)
658 }
659 state.status = tracev2.GoRunning
660 661 // Transfer the P back to running from syscall.
662 pState, ok := o.pStates[curCtx.P]
663 if !ok {
664 return curCtx, false, fmt.Errorf("uninitialized proc %d found during %s", curCtx.P, o.evName(ev.typ))
665 }
666 if pState.status != tracev2.ProcSyscall {
667 return curCtx, false, fmt.Errorf("expected proc %d in state %v, but got %v instead", curCtx.P, tracev2.ProcSyscall, pState.status)
668 }
669 pState.status = tracev2.ProcRunning
670 o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
671 return curCtx, true, nil
672 }
673 674 func (o *ordering) advanceGoSyscallEndBlocked(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
675 // This event becomes advanceable when its P is not in a syscall state
676 // (lack of a P altogether is also acceptable for advancing).
677 // The transfer out of ProcSyscall can happen either voluntarily via
678 // ProcStop or involuntarily via ProcSteal. We may also acquire a new P
679 // before we get here (after the transfer out) but that's OK: that new
680 // P won't be in the ProcSyscall state anymore.
681 //
682 // Basically: while we have a preemptible P, don't advance, because we
683 // *know* from the event that we're going to lose it at some point during
684 // the syscall. We shouldn't advance until that happens.
685 if curCtx.P != NoProc {
686 pState, ok := o.pStates[curCtx.P]
687 if !ok {
688 return curCtx, false, fmt.Errorf("uninitialized proc %d found during %s", curCtx.P, o.evName(ev.typ))
689 }
690 if pState.status == tracev2.ProcSyscall {
691 return curCtx, false, nil
692 }
693 }
694 // As mentioned above, we may have a P here if we ProcStart
695 // before this event.
696 if err := validateCtx(curCtx, schedReqs{M: mustHave, P: mayHave, G: mustHave}); err != nil {
697 return curCtx, false, err
698 }
699 state, ok := o.gStates[curCtx.G]
700 if !ok {
701 return curCtx, false, fmt.Errorf("event %s for goroutine (%v) that doesn't exist", o.evName(ev.typ), curCtx.G)
702 }
703 if state.status != tracev2.GoSyscall {
704 return curCtx, false, fmt.Errorf("%s event for goroutine that's not %s", o.evName(ev.typ), GoRunning)
705 }
706 newCtx := curCtx
707 newCtx.G = NoGoroutine
708 state.status = tracev2.GoRunnable
709 o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
710 return newCtx, true, nil
711 }
712 713 func (o *ordering) advanceGoCreateSyscall(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
714 // This event indicates that a goroutine is effectively
715 // being created out of a cgo callback. Such a goroutine
716 // is 'created' in the syscall state.
717 if err := validateCtx(curCtx, schedReqs{M: mustHave, P: mayHave, G: mustNotHave}); err != nil {
718 return curCtx, false, err
719 }
720 // This goroutine is effectively being created. Add a state for it.
721 newgid := GoID(ev.args[0])
722 if _, ok := o.gStates[newgid]; ok {
723 return curCtx, false, fmt.Errorf("tried to create goroutine (%v) in syscall that already exists", newgid)
724 }
725 o.gStates[newgid] = &gState{id: newgid, status: tracev2.GoSyscall, seq: makeSeq(gen, 0)}
726 // Goroutine is executing. Bind it to the context.
727 newCtx := curCtx
728 newCtx.G = newgid
729 o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
730 return newCtx, true, nil
731 }
732 733 func (o *ordering) advanceGoDestroySyscall(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
734 // This event indicates that a goroutine created for a
735 // cgo callback is disappearing, either because the callback
736 // ending or the C thread that called it is being destroyed.
737 //
738 // Also, treat this as if we lost our P too.
739 // The thread ID may be reused by the platform and we'll get
740 // really confused if we try to steal the P is this is running
741 // with later. The new M with the same ID could even try to
742 // steal back this P from itself!
743 //
744 // The runtime is careful to make sure that any GoCreateSyscall
745 // event will enter the runtime emitting events for reacquiring a P.
746 //
747 // Note: we might have a P here. The P might not be released
748 // eagerly by the runtime, and it might get stolen back later
749 // (or never again, if the program is going to exit).
750 if err := validateCtx(curCtx, schedReqs{M: mustHave, P: mayHave, G: mustHave}); err != nil {
751 return curCtx, false, err
752 }
753 // Check to make sure the goroutine exists in the right state.
754 state, ok := o.gStates[curCtx.G]
755 if !ok {
756 return curCtx, false, fmt.Errorf("event %s for goroutine (%v) that doesn't exist", o.evName(ev.typ), curCtx.G)
757 }
758 if state.status != tracev2.GoSyscall {
759 return curCtx, false, fmt.Errorf("%s event for goroutine that's not %v", o.evName(ev.typ), GoSyscall)
760 }
761 // This goroutine is exiting itself.
762 delete(o.gStates, curCtx.G)
763 newCtx := curCtx
764 newCtx.G = NoGoroutine
765 766 // If we have a proc, then we're dissociating from it now. See the comment at the top of the case.
767 if curCtx.P != NoProc {
768 pState, ok := o.pStates[curCtx.P]
769 if !ok {
770 return curCtx, false, fmt.Errorf("found invalid proc %d during %s", curCtx.P, o.evName(ev.typ))
771 }
772 if pState.status != tracev2.ProcSyscall {
773 return curCtx, false, fmt.Errorf("proc %d in unexpected state %s during %s", curCtx.P, pState.status, o.evName(ev.typ))
774 }
775 // See the go122-create-syscall-reuse-thread-id test case for more details.
776 pState.status = tracev2.ProcSyscallAbandoned
777 newCtx.P = NoProc
778 779 // Queue an extra self-ProcSteal event.
780 extra := makeEvent(evt, curCtx, tracev2.EvProcSteal, ev.time, uint64(curCtx.P))
781 extra.base.extra(version.Go122)[0] = uint64(tracev2.ProcSyscall)
782 o.queue.push(extra)
783 }
784 o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
785 return newCtx, true, nil
786 }
787 788 func (o *ordering) advanceUserTaskBegin(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
789 // Handle tasks. Tasks are interesting because:
790 // - There's no Begin event required to reference a task.
791 // - End for a particular task ID can appear multiple times.
792 // As a result, there's very little to validate. The only
793 // thing we have to be sure of is that a task didn't begin
794 // after it had already begun. Task IDs are allowed to be
795 // reused, so we don't care about a Begin after an End.
796 id := TaskID(ev.args[0])
797 if _, ok := o.activeTasks[id]; ok {
798 return curCtx, false, fmt.Errorf("task ID conflict: %d", id)
799 }
800 // Get the parent ID, but don't validate it. There's no guarantee
801 // we actually have information on whether it's active.
802 parentID := TaskID(ev.args[1])
803 if parentID == BackgroundTask {
804 // Note: a value of 0 here actually means no parent, *not* the
805 // background task. Automatic background task attachment only
806 // applies to regions.
807 parentID = NoTask
808 ev.args[1] = uint64(NoTask)
809 }
810 811 // Validate the name and record it. We'll need to pass it through to
812 // EvUserTaskEnd.
813 nameID := stringID(ev.args[2])
814 name, ok := evt.bytes.get(nameID)
815 if !ok {
816 return curCtx, false, fmt.Errorf("invalid string ID %v for %v event", nameID, ev.typ)
817 }
818 o.activeTasks[id] = taskState{name: name, parentID: parentID}
819 if err := validateCtx(curCtx, userGoReqs); err != nil {
820 return curCtx, false, err
821 }
822 o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
823 return curCtx, true, nil
824 }
825 826 func (o *ordering) advanceUserTaskEnd(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
827 id := TaskID(ev.args[0])
828 if ts, ok := o.activeTasks[id]; ok {
829 // Smuggle the task info. This may happen in a different generation,
830 // which may not have the name in its string table. Add it to the extra
831 // strings table so we can look it up later.
832 ev.extra(version.Go122)[0] = uint64(ts.parentID)
833 ev.extra(version.Go122)[1] = uint64(evt.addExtraString(ts.name))
834 delete(o.activeTasks, id)
835 } else {
836 // Explicitly clear the task info.
837 ev.extra(version.Go122)[0] = uint64(NoTask)
838 ev.extra(version.Go122)[1] = uint64(evt.addExtraString(""))
839 }
840 if err := validateCtx(curCtx, userGoReqs); err != nil {
841 return curCtx, false, err
842 }
843 o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
844 return curCtx, true, nil
845 }
846 847 func (o *ordering) advanceUserRegionBegin(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
848 if err := validateCtx(curCtx, userGoReqs); err != nil {
849 return curCtx, false, err
850 }
851 tid := TaskID(ev.args[0])
852 nameID := stringID(ev.args[1])
853 name, ok := evt.bytes.get(nameID)
854 if !ok {
855 return curCtx, false, fmt.Errorf("invalid string ID %v for %v event", nameID, ev.typ)
856 }
857 gState, ok := o.gStates[curCtx.G]
858 if !ok {
859 return curCtx, false, fmt.Errorf("encountered EvUserRegionBegin without known state for current goroutine %d", curCtx.G)
860 }
861 if err := gState.beginRegion(userRegion{tid, name}); err != nil {
862 return curCtx, false, err
863 }
864 o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
865 return curCtx, true, nil
866 }
867 868 func (o *ordering) advanceUserRegionEnd(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
869 if err := validateCtx(curCtx, userGoReqs); err != nil {
870 return curCtx, false, err
871 }
872 tid := TaskID(ev.args[0])
873 nameID := stringID(ev.args[1])
874 name, ok := evt.bytes.get(nameID)
875 if !ok {
876 return curCtx, false, fmt.Errorf("invalid string ID %v for %v event", nameID, ev.typ)
877 }
878 gState, ok := o.gStates[curCtx.G]
879 if !ok {
880 return curCtx, false, fmt.Errorf("encountered EvUserRegionEnd without known state for current goroutine %d", curCtx.G)
881 }
882 if err := gState.endRegion(userRegion{tid, name}); err != nil {
883 return curCtx, false, err
884 }
885 o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
886 return curCtx, true, nil
887 }
888 889 // Handle the GC mark phase.
890 //
891 // We have sequence numbers for both start and end because they
892 // can happen on completely different threads. We want an explicit
893 // partial order edge between start and end here, otherwise we're
894 // relying entirely on timestamps to make sure we don't advance a
895 // GCEnd for a _different_ GC cycle if timestamps are wildly broken.
896 func (o *ordering) advanceGCActive(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
897 seq := ev.args[0]
898 if gen == o.initialGen {
899 if o.gcState != gcUndetermined {
900 return curCtx, false, fmt.Errorf("GCActive in the first generation isn't first GC event")
901 }
902 o.gcSeq = seq
903 o.gcState = gcRunning
904 o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
905 return curCtx, true, nil
906 }
907 if seq != o.gcSeq+1 {
908 // This is not the right GC cycle.
909 return curCtx, false, nil
910 }
911 if o.gcState != gcRunning {
912 return curCtx, false, fmt.Errorf("encountered GCActive while GC was not in progress")
913 }
914 o.gcSeq = seq
915 if err := validateCtx(curCtx, userGoReqs); err != nil {
916 return curCtx, false, err
917 }
918 o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
919 return curCtx, true, nil
920 }
921 922 func (o *ordering) advanceGCBegin(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
923 seq := ev.args[0]
924 if o.gcState == gcUndetermined {
925 o.gcSeq = seq
926 o.gcState = gcRunning
927 o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
928 return curCtx, true, nil
929 }
930 if seq != o.gcSeq+1 {
931 // This is not the right GC cycle.
932 return curCtx, false, nil
933 }
934 if o.gcState == gcRunning {
935 return curCtx, false, fmt.Errorf("encountered GCBegin while GC was already in progress")
936 }
937 o.gcSeq = seq
938 o.gcState = gcRunning
939 if err := validateCtx(curCtx, userGoReqs); err != nil {
940 return curCtx, false, err
941 }
942 o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
943 return curCtx, true, nil
944 }
945 946 func (o *ordering) advanceGCEnd(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
947 seq := ev.args[0]
948 if seq != o.gcSeq+1 {
949 // This is not the right GC cycle.
950 return curCtx, false, nil
951 }
952 if o.gcState == gcNotRunning {
953 return curCtx, false, fmt.Errorf("encountered GCEnd when GC was not in progress")
954 }
955 if o.gcState == gcUndetermined {
956 return curCtx, false, fmt.Errorf("encountered GCEnd when GC was in an undetermined state")
957 }
958 o.gcSeq = seq
959 o.gcState = gcNotRunning
960 if err := validateCtx(curCtx, userGoReqs); err != nil {
961 return curCtx, false, err
962 }
963 o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
964 return curCtx, true, nil
965 }
966 967 func (o *ordering) advanceAnnotation(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
968 // Handle simple instantaneous events that require a G.
969 if err := validateCtx(curCtx, userGoReqs); err != nil {
970 return curCtx, false, err
971 }
972 o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
973 return curCtx, true, nil
974 }
975 976 func (o *ordering) advanceHeapMetric(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
977 // Handle allocation metrics, which don't require a G.
978 if err := validateCtx(curCtx, schedReqs{M: mustHave, P: mustHave, G: mayHave}); err != nil {
979 return curCtx, false, err
980 }
981 o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
982 return curCtx, true, nil
983 }
984 985 func (o *ordering) advanceGCSweepBegin(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
986 // Handle sweep, which is bound to a P and doesn't require a G.
987 if err := validateCtx(curCtx, schedReqs{M: mustHave, P: mustHave, G: mayHave}); err != nil {
988 return curCtx, false, err
989 }
990 if err := o.pStates[curCtx.P].beginRange(makeRangeType(ev.typ, 0)); err != nil {
991 return curCtx, false, err
992 }
993 o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
994 return curCtx, true, nil
995 }
996 997 func (o *ordering) advanceGCSweepActive(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
998 pid := ProcID(ev.args[0])
999 // N.B. In practice Ps can't block while they're sweeping, so this can only
1000 // ever reference curCtx.P. However, be lenient about this like we are with
1001 // GCMarkAssistActive; there's no reason the runtime couldn't change to block
1002 // in the middle of a sweep.
1003 pState, ok := o.pStates[pid]
1004 if !ok {
1005 return curCtx, false, fmt.Errorf("encountered GCSweepActive for unknown proc %d", pid)
1006 }
1007 if err := pState.activeRange(makeRangeType(ev.typ, 0), gen == o.initialGen); err != nil {
1008 return curCtx, false, err
1009 }
1010 o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
1011 return curCtx, true, nil
1012 }
1013 1014 func (o *ordering) advanceGCSweepEnd(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
1015 if err := validateCtx(curCtx, schedReqs{M: mustHave, P: mustHave, G: mayHave}); err != nil {
1016 return curCtx, false, err
1017 }
1018 _, err := o.pStates[curCtx.P].endRange(ev.typ)
1019 if err != nil {
1020 return curCtx, false, err
1021 }
1022 o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
1023 return curCtx, true, nil
1024 }
1025 1026 func (o *ordering) advanceGoRangeBegin(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
1027 // Handle special goroutine-bound event ranges.
1028 if err := validateCtx(curCtx, userGoReqs); err != nil {
1029 return curCtx, false, err
1030 }
1031 desc := stringID(0)
1032 if ev.typ == tracev2.EvSTWBegin {
1033 desc = stringID(ev.args[0])
1034 }
1035 gState, ok := o.gStates[curCtx.G]
1036 if !ok {
1037 return curCtx, false, fmt.Errorf("encountered event of type %d without known state for current goroutine %d", ev.typ, curCtx.G)
1038 }
1039 if err := gState.beginRange(makeRangeType(ev.typ, desc)); err != nil {
1040 return curCtx, false, err
1041 }
1042 o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
1043 return curCtx, true, nil
1044 }
1045 1046 func (o *ordering) advanceGoRangeActive(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
1047 gid := GoID(ev.args[0])
1048 // N.B. Like GoStatus, this can happen at any time, because it can
1049 // reference a non-running goroutine. Don't check anything about the
1050 // current scheduler context.
1051 gState, ok := o.gStates[gid]
1052 if !ok {
1053 return curCtx, false, fmt.Errorf("uninitialized goroutine %d found during %s", gid, o.evName(ev.typ))
1054 }
1055 if err := gState.activeRange(makeRangeType(ev.typ, 0), gen == o.initialGen); err != nil {
1056 return curCtx, false, err
1057 }
1058 o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
1059 return curCtx, true, nil
1060 }
1061 1062 func (o *ordering) advanceGoRangeEnd(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
1063 if err := validateCtx(curCtx, userGoReqs); err != nil {
1064 return curCtx, false, err
1065 }
1066 gState, ok := o.gStates[curCtx.G]
1067 if !ok {
1068 return curCtx, false, fmt.Errorf("encountered event of type %d without known state for current goroutine %d", ev.typ, curCtx.G)
1069 }
1070 desc, err := gState.endRange(ev.typ)
1071 if err != nil {
1072 return curCtx, false, err
1073 }
1074 if ev.typ == tracev2.EvSTWEnd {
1075 // Smuggle the kind into the event.
1076 // Don't use ev.extra here so we have symmetry with STWBegin.
1077 ev.args[0] = uint64(desc)
1078 }
1079 o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
1080 return curCtx, true, nil
1081 }
1082 1083 func (o *ordering) advanceAllocFree(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
1084 // Handle simple instantaneous events that may or may not have a P.
1085 if err := validateCtx(curCtx, schedReqs{M: mustHave, P: mayHave, G: mayHave}); err != nil {
1086 return curCtx, false, err
1087 }
1088 o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
1089 return curCtx, true, nil
1090 }
1091 1092 // Next returns the next event in the ordering.
1093 func (o *ordering) Next() (Event, bool) {
1094 return o.queue.pop()
1095 }
1096 1097 // schedCtx represents the scheduling resources associated with an event.
1098 type schedCtx struct {
1099 G GoID
1100 P ProcID
1101 M ThreadID
1102 }
1103 1104 // validateCtx ensures that ctx conforms to some reqs, returning an error if
1105 // it doesn't.
1106 func validateCtx(ctx schedCtx, reqs schedReqs) error {
1107 // Check thread requirements.
1108 if reqs.M == mustHave && ctx.M == NoThread {
1109 return fmt.Errorf("expected a thread but didn't have one")
1110 } else if reqs.M == mustNotHave && ctx.M != NoThread {
1111 return fmt.Errorf("expected no thread but had one")
1112 }
1113 1114 // Check proc requirements.
1115 if reqs.P == mustHave && ctx.P == NoProc {
1116 return fmt.Errorf("expected a proc but didn't have one")
1117 } else if reqs.P == mustNotHave && ctx.P != NoProc {
1118 return fmt.Errorf("expected no proc but had one")
1119 }
1120 1121 // Check goroutine requirements.
1122 if reqs.G == mustHave && ctx.G == NoGoroutine {
1123 return fmt.Errorf("expected a goroutine but didn't have one")
1124 } else if reqs.G == mustNotHave && ctx.G != NoGoroutine {
1125 return fmt.Errorf("expected no goroutine but had one")
1126 }
1127 return nil
1128 }
1129 1130 // gcState is a trinary variable for the current state of the GC.
1131 //
1132 // The third state besides "enabled" and "disabled" is "undetermined."
1133 type gcState uint8
1134 1135 const (
1136 gcUndetermined gcState = iota
1137 gcNotRunning
1138 gcRunning
1139 )
1140 1141 // String returns a human-readable string for the GC state.
1142 func (s gcState) String() string {
1143 switch s {
1144 case gcUndetermined:
1145 return "Undetermined"
1146 case gcNotRunning:
1147 return "NotRunning"
1148 case gcRunning:
1149 return "Running"
1150 }
1151 return "Bad"
1152 }
1153 1154 // userRegion represents a unique user region when attached to some gState.
1155 type userRegion struct {
1156 // name must be a resolved string because the string ID for the same
1157 // string may change across generations, but we care about checking
1158 // the value itself.
1159 taskID TaskID
1160 name []byte
1161 }
1162 1163 // rangeType is a way to classify special ranges of time.
1164 //
1165 // These typically correspond 1:1 with "Begin" events, but
1166 // they may have an optional subtype that describes the range
1167 // in more detail.
1168 type rangeType struct {
1169 typ tracev2.EventType // "Begin" event.
1170 desc stringID // Optional subtype.
1171 }
1172 1173 // makeRangeType constructs a new rangeType.
1174 func makeRangeType(typ tracev2.EventType, desc stringID) rangeType {
1175 if styp := tracev2.Specs()[typ].StartEv; styp != tracev2.EvNone {
1176 typ = styp
1177 }
1178 return rangeType{typ, desc}
1179 }
1180 1181 // gState is the state of a goroutine at a point in the trace.
1182 type gState struct {
1183 id GoID
1184 status tracev2.GoStatus
1185 seq seqCounter
1186 1187 // regions are the active user regions for this goroutine.
1188 regions []userRegion
1189 1190 // rangeState is the state of special time ranges bound to this goroutine.
1191 rangeState
1192 }
1193 1194 // beginRegion starts a user region on the goroutine.
1195 func (s *gState) beginRegion(r userRegion) error {
1196 s.regions = append(s.regions, r)
1197 return nil
1198 }
1199 1200 // endRegion ends a user region on the goroutine.
1201 func (s *gState) endRegion(r userRegion) error {
1202 if len(s.regions) == 0 {
1203 // We do not know about regions that began before tracing started.
1204 return nil
1205 }
1206 if next := s.regions[len(s.regions)-1]; next != r {
1207 return fmt.Errorf("misuse of region in goroutine %v: region end %v when the inner-most active region start event is %v", s.id, r, next)
1208 }
1209 s.regions = s.regions[:len(s.regions)-1]
1210 return nil
1211 }
1212 1213 // pState is the state of a proc at a point in the trace.
1214 type pState struct {
1215 id ProcID
1216 status tracev2.ProcStatus
1217 seq seqCounter
1218 1219 // rangeState is the state of special time ranges bound to this proc.
1220 rangeState
1221 }
1222 1223 // mState is the state of a thread at a point in the trace.
1224 type mState struct {
1225 g GoID // Goroutine bound to this M. (The goroutine's state is Executing.)
1226 p ProcID // Proc bound to this M. (The proc's state is Executing.)
1227 }
1228 1229 // rangeState represents the state of special time ranges.
1230 type rangeState struct {
1231 // inFlight contains the rangeTypes of any ranges bound to a resource.
1232 inFlight []rangeType
1233 }
1234 1235 // beginRange begins a special range in time on the goroutine.
1236 //
1237 // Returns an error if the range is already in progress.
1238 func (s *rangeState) beginRange(typ rangeType) error {
1239 if s.hasRange(typ) {
1240 return fmt.Errorf("discovered event already in-flight for when starting event %v", tracev2.Specs()[typ.typ].Name)
1241 }
1242 s.inFlight = append(s.inFlight, typ)
1243 return nil
1244 }
1245 1246 // activeRange marks special range in time on the goroutine as active in the
1247 // initial generation, or confirms that it is indeed active in later generations.
1248 func (s *rangeState) activeRange(typ rangeType, isInitialGen bool) error {
1249 if isInitialGen {
1250 if s.hasRange(typ) {
1251 return fmt.Errorf("found named active range already in first gen: %v", typ)
1252 }
1253 s.inFlight = append(s.inFlight, typ)
1254 } else if !s.hasRange(typ) {
1255 return fmt.Errorf("resource is missing active range: %v %v", tracev2.Specs()[typ.typ].Name, s.inFlight)
1256 }
1257 return nil
1258 }
1259 1260 // hasRange returns true if a special time range on the goroutine as in progress.
1261 func (s *rangeState) hasRange(typ rangeType) bool {
1262 return slices.Contains(s.inFlight, typ)
1263 }
1264 1265 // endRange ends a special range in time on the goroutine.
1266 //
1267 // This must line up with the start event type of the range the goroutine is currently in.
1268 func (s *rangeState) endRange(typ tracev2.EventType) (stringID, error) {
1269 st := tracev2.Specs()[typ].StartEv
1270 idx := -1
1271 for i, r := range s.inFlight {
1272 if r.typ == st {
1273 idx = i
1274 break
1275 }
1276 }
1277 if idx < 0 {
1278 return 0, fmt.Errorf("tried to end event %v, but not in-flight", tracev2.Specs()[st].Name)
1279 }
1280 // Swap remove.
1281 desc := s.inFlight[idx].desc
1282 s.inFlight[idx], s.inFlight[len(s.inFlight)-1] = s.inFlight[len(s.inFlight)-1], s.inFlight[idx]
1283 s.inFlight = s.inFlight[:len(s.inFlight)-1]
1284 return desc, nil
1285 }
1286 1287 // seqCounter represents a global sequence counter for a resource.
1288 type seqCounter struct {
1289 gen uint64 // The generation for the local sequence counter seq.
1290 seq uint64 // The sequence number local to the generation.
1291 }
1292 1293 // makeSeq creates a new seqCounter.
1294 func makeSeq(gen, seq uint64) seqCounter {
1295 return seqCounter{gen: gen, seq: seq}
1296 }
1297 1298 // succeeds returns true if a is the immediate successor of b.
1299 func (a seqCounter) succeeds(b seqCounter) bool {
1300 return a.gen == b.gen && a.seq == b.seq+1
1301 }
1302 1303 // String returns a debug string representation of the seqCounter.
1304 func (c seqCounter) String() string {
1305 return fmt.Sprintf("%d (gen=%d)", c.seq, c.gen)
1306 }
1307 1308 func dumpOrdering(order *ordering) []byte {
1309 var sb bytes.Buffer
1310 for id, state := range order.gStates {
1311 fmt.Fprintf(&sb, "G %d [status=%s seq=%s]\n", id, state.status, state.seq)
1312 }
1313 fmt.Fprintln(&sb)
1314 for id, state := range order.pStates {
1315 fmt.Fprintf(&sb, "P %d [status=%s seq=%s]\n", id, state.status, state.seq)
1316 }
1317 fmt.Fprintln(&sb)
1318 for id, state := range order.mStates {
1319 fmt.Fprintf(&sb, "M %d [g=%d p=%d]\n", id, state.g, state.p)
1320 }
1321 fmt.Fprintln(&sb)
1322 fmt.Fprintf(&sb, "GC %d %s\n", order.gcSeq, order.gcState)
1323 return sb.String()
1324 }
1325 1326 // taskState represents an active task.
1327 type taskState struct {
1328 // name is the type of the active task.
1329 name []byte
1330 1331 // parentID is the parent ID of the active task.
1332 parentID TaskID
1333 }
1334 1335 // queue implements a growable ring buffer with a queue API.
1336 type queue[T any] struct {
1337 start, end int
1338 buf []T
1339 }
1340 1341 // push adds a new event to the back of the queue.
1342 func (q *queue[T]) push(value T) {
1343 if q.end-q.start == len(q.buf) {
1344 q.grow()
1345 }
1346 q.buf[q.end%len(q.buf)] = value
1347 q.end++
1348 }
1349 1350 // grow increases the size of the queue.
1351 func (q *queue[T]) grow() {
1352 if len(q.buf) == 0 {
1353 q.buf = []T{:2}
1354 return
1355 }
1356 1357 // Create new buf and copy data over.
1358 newBuf := []T{:len(q.buf)*2}
1359 pivot := q.start % len(q.buf)
1360 first, last := q.buf[pivot:], q.buf[:pivot]
1361 copy(newBuf[:len(first)], first)
1362 copy(newBuf[len(first):], last)
1363 1364 // Update the queue state.
1365 q.start = 0
1366 q.end = len(q.buf)
1367 q.buf = newBuf
1368 }
1369 1370 // pop removes an event from the front of the queue. If the
1371 // queue is empty, it returns an EventBad event.
1372 func (q *queue[T]) pop() (T, bool) {
1373 if q.end-q.start == 0 {
1374 var zero T
1375 return zero, false
1376 }
1377 elem := &q.buf[q.start%len(q.buf)]
1378 value := *elem
1379 var zero T
1380 *elem = zero // Clear the entry before returning, so we don't hold onto old tables.
1381 q.start++
1382 return value, true
1383 }
1384 1385 // makeEvent creates an Event from the provided information.
1386 //
1387 // It's just a convenience function; it's always OK to construct
1388 // an Event manually if this isn't quite the right way to express
1389 // the contents of the event.
1390 func makeEvent(table *evTable, ctx schedCtx, typ tracev2.EventType, time Time, args ...uint64) Event {
1391 ev := Event{
1392 table: table,
1393 ctx: ctx,
1394 base: baseEvent{
1395 typ: typ,
1396 time: time,
1397 },
1398 }
1399 copy(ev.base.args[:], args)
1400 return ev
1401 }
1402 1403 // schedReqs is a set of constraints on what the scheduling
1404 // context must look like.
1405 type schedReqs struct {
1406 M constraint
1407 P constraint
1408 G constraint
1409 }
1410 1411 // constraint represents a various presence requirements.
1412 type constraint uint8
1413 1414 const (
1415 mustNotHave constraint = iota
1416 mayHave
1417 mustHave
1418 )
1419 1420 // userGoReqs is a common requirement among events that are running
1421 // or are close to running user code.
1422 var userGoReqs = schedReqs{M: mustHave, P: mustHave, G: mustHave}
1423