merge.mx raw

   1  // Copyright 2019 The Go Authors. All rights reserved.
   2  // Use of this source code is governed by a BSD-style
   3  // license that can be found in the LICENSE file.
   4  
   5  package profile
   6  
   7  import (
   8  	"fmt"
   9  	"sort"
  10  	"strconv"
  11  	"bytes"
  12  )
  13  
  14  // Merge merges all the profiles in profs into a single Profile.
  15  // Returns a new profile independent of the input profiles. The merged
  16  // profile is compacted to eliminate unused samples, locations,
  17  // functions and mappings. Profiles must have identical profile sample
  18  // and period types or the merge will fail. profile.Period of the
  19  // resulting profile will be the maximum of all profiles, and
  20  // profile.TimeNanos will be the earliest nonzero one.
  21  func Merge(srcs []*Profile) (*Profile, error) {
  22  	if len(srcs) == 0 {
  23  		return nil, fmt.Errorf("no profiles to merge")
  24  	}
  25  	p, err := combineHeaders(srcs)
  26  	if err != nil {
  27  		return nil, err
  28  	}
  29  
  30  	pm := &profileMerger{
  31  		p:         p,
  32  		samples:   map[sampleKey]*Sample{},
  33  		locations: map[locationKey]*Location{},
  34  		functions: map[functionKey]*Function{},
  35  		mappings:  map[mappingKey]*Mapping{},
  36  	}
  37  
  38  	for _, src := range srcs {
  39  		// Clear the profile-specific hash tables
  40  		pm.locationsByID = map[uint64]*Location{}
  41  		pm.functionsByID = map[uint64]*Function{}
  42  		pm.mappingsByID = map[uint64]mapInfo{}
  43  
  44  		if len(pm.mappings) == 0 && len(src.Mapping) > 0 {
  45  			// The Mapping list has the property that the first mapping
  46  			// represents the main binary. Take the first Mapping we see,
  47  			// otherwise the operations below will add mappings in an
  48  			// arbitrary order.
  49  			pm.mapMapping(src.Mapping[0])
  50  		}
  51  
  52  		for _, s := range src.Sample {
  53  			if !isZeroSample(s) {
  54  				pm.mapSample(s)
  55  			}
  56  		}
  57  	}
  58  
  59  	for _, s := range p.Sample {
  60  		if isZeroSample(s) {
  61  			// If there are any zero samples, re-merge the profile to GC
  62  			// them.
  63  			return Merge([]*Profile{p})
  64  		}
  65  	}
  66  
  67  	return p, nil
  68  }
  69  
  70  // Normalize normalizes the source profile by multiplying each value in profile by the
  71  // ratio of the sum of the base profile's values of that sample type to the sum of the
  72  // source profile's value of that sample type.
  73  func (p *Profile) Normalize(pb *Profile) error {
  74  
  75  	if err := p.compatible(pb); err != nil {
  76  		return err
  77  	}
  78  
  79  	baseVals := []int64{:len(p.SampleType)}
  80  	for _, s := range pb.Sample {
  81  		for i, v := range s.Value {
  82  			baseVals[i] += v
  83  		}
  84  	}
  85  
  86  	srcVals := []int64{:len(p.SampleType)}
  87  	for _, s := range p.Sample {
  88  		for i, v := range s.Value {
  89  			srcVals[i] += v
  90  		}
  91  	}
  92  
  93  	normScale := []float64{:len(baseVals)}
  94  	for i := range baseVals {
  95  		if srcVals[i] == 0 {
  96  			normScale[i] = 0.0
  97  		} else {
  98  			normScale[i] = float64(baseVals[i]) / float64(srcVals[i])
  99  		}
 100  	}
 101  	p.ScaleN(normScale)
 102  	return nil
 103  }
 104  
 105  func isZeroSample(s *Sample) bool {
 106  	for _, v := range s.Value {
 107  		if v != 0 {
 108  			return false
 109  		}
 110  	}
 111  	return true
 112  }
 113  
 114  type profileMerger struct {
 115  	p *Profile
 116  
 117  	// Memoization tables within a profile.
 118  	locationsByID map[uint64]*Location
 119  	functionsByID map[uint64]*Function
 120  	mappingsByID  map[uint64]mapInfo
 121  
 122  	// Memoization tables for profile entities.
 123  	samples   map[sampleKey]*Sample
 124  	locations map[locationKey]*Location
 125  	functions map[functionKey]*Function
 126  	mappings  map[mappingKey]*Mapping
 127  }
 128  
 129  type mapInfo struct {
 130  	m      *Mapping
 131  	offset int64
 132  }
 133  
 134  func (pm *profileMerger) mapSample(src *Sample) *Sample {
 135  	s := &Sample{
 136  		Location: []*Location{:len(src.Location)},
 137  		Value:    []int64{:len(src.Value)},
 138  		Label:    map[string][][]byte{},
 139  		NumLabel: map[string][]int64{},
 140  		NumUnit:  map[string][][]byte{},
 141  	}
 142  	for i, l := range src.Location {
 143  		s.Location[i] = pm.mapLocation(l)
 144  	}
 145  	for k, v := range src.Label {
 146  		vv := [][]byte{:len(v)}
 147  		copy(vv, v)
 148  		s.Label[k] = vv
 149  	}
 150  	for k, v := range src.NumLabel {
 151  		u := src.NumUnit[k]
 152  		vv := []int64{:len(v)}
 153  		uu := [][]byte{:len(u)}
 154  		copy(vv, v)
 155  		copy(uu, u)
 156  		s.NumLabel[k] = vv
 157  		s.NumUnit[k] = uu
 158  	}
 159  	// Check memoization table. Must be done on the remapped location to
 160  	// account for the remapped mapping. Add current values to the
 161  	// existing sample.
 162  	k := s.key()
 163  	if ss, ok := pm.samples[k]; ok {
 164  		for i, v := range src.Value {
 165  			ss.Value[i] += v
 166  		}
 167  		return ss
 168  	}
 169  	copy(s.Value, src.Value)
 170  	pm.samples[k] = s
 171  	pm.p.Sample = append(pm.p.Sample, s)
 172  	return s
 173  }
 174  
 175  // key generates sampleKey to be used as a key for maps.
 176  func (sample *Sample) key() sampleKey {
 177  	ids := [][]byte{:len(sample.Location)}
 178  	for i, l := range sample.Location {
 179  		ids[i] = strconv.FormatUint(l.ID, 16)
 180  	}
 181  
 182  	labels := [][]byte{:0:len(sample.Label)}
 183  	for k, v := range sample.Label {
 184  		labels = append(labels, fmt.Sprintf("%q%q", k, v))
 185  	}
 186  	sort.Strings(labels)
 187  
 188  	numlabels := [][]byte{:0:len(sample.NumLabel)}
 189  	for k, v := range sample.NumLabel {
 190  		numlabels = append(numlabels, fmt.Sprintf("%q%x%x", k, v, sample.NumUnit[k]))
 191  	}
 192  	sort.Strings(numlabels)
 193  
 194  	return sampleKey{
 195  		bytes.Join(ids, "|"),
 196  		bytes.Join(labels, ""),
 197  		bytes.Join(numlabels, ""),
 198  	}
 199  }
 200  
 201  type sampleKey struct {
 202  	locations []byte
 203  	labels    []byte
 204  	numlabels []byte
 205  }
 206  
 207  func (pm *profileMerger) mapLocation(src *Location) *Location {
 208  	if src == nil {
 209  		return nil
 210  	}
 211  
 212  	if l, ok := pm.locationsByID[src.ID]; ok {
 213  		pm.locationsByID[src.ID] = l
 214  		return l
 215  	}
 216  
 217  	mi := pm.mapMapping(src.Mapping)
 218  	l := &Location{
 219  		ID:       uint64(len(pm.p.Location) + 1),
 220  		Mapping:  mi.m,
 221  		Address:  uint64(int64(src.Address) + mi.offset),
 222  		Line:     []Line{:len(src.Line)},
 223  		IsFolded: src.IsFolded,
 224  	}
 225  	for i, ln := range src.Line {
 226  		l.Line[i] = pm.mapLine(ln)
 227  	}
 228  	// Check memoization table. Must be done on the remapped location to
 229  	// account for the remapped mapping ID.
 230  	k := l.key()
 231  	if ll, ok := pm.locations[k]; ok {
 232  		pm.locationsByID[src.ID] = ll
 233  		return ll
 234  	}
 235  	pm.locationsByID[src.ID] = l
 236  	pm.locations[k] = l
 237  	pm.p.Location = append(pm.p.Location, l)
 238  	return l
 239  }
 240  
 241  // key generates locationKey to be used as a key for maps.
 242  func (l *Location) key() locationKey {
 243  	key := locationKey{
 244  		addr:     l.Address,
 245  		isFolded: l.IsFolded,
 246  	}
 247  	if l.Mapping != nil {
 248  		// Normalizes address to handle address space randomization.
 249  		key.addr -= l.Mapping.Start
 250  		key.mappingID = l.Mapping.ID
 251  	}
 252  	lines := [][]byte{:len(l.Line)*2}
 253  	for i, line := range l.Line {
 254  		if line.Function != nil {
 255  			lines[i*2] = strconv.FormatUint(line.Function.ID, 16)
 256  		}
 257  		lines[i*2+1] = strconv.FormatInt(line.Line, 16)
 258  	}
 259  	key.lines = bytes.Join(lines, "|")
 260  	return key
 261  }
 262  
 263  type locationKey struct {
 264  	addr, mappingID uint64
 265  	lines           []byte
 266  	isFolded        bool
 267  }
 268  
 269  func (pm *profileMerger) mapMapping(src *Mapping) mapInfo {
 270  	if src == nil {
 271  		return mapInfo{}
 272  	}
 273  
 274  	if mi, ok := pm.mappingsByID[src.ID]; ok {
 275  		return mi
 276  	}
 277  
 278  	// Check memoization tables.
 279  	mk := src.key()
 280  	if m, ok := pm.mappings[mk]; ok {
 281  		mi := mapInfo{m, int64(m.Start) - int64(src.Start)}
 282  		pm.mappingsByID[src.ID] = mi
 283  		return mi
 284  	}
 285  	m := &Mapping{
 286  		ID:              uint64(len(pm.p.Mapping) + 1),
 287  		Start:           src.Start,
 288  		Limit:           src.Limit,
 289  		Offset:          src.Offset,
 290  		File:            src.File,
 291  		BuildID:         src.BuildID,
 292  		HasFunctions:    src.HasFunctions,
 293  		HasFilenames:    src.HasFilenames,
 294  		HasLineNumbers:  src.HasLineNumbers,
 295  		HasInlineFrames: src.HasInlineFrames,
 296  	}
 297  	pm.p.Mapping = append(pm.p.Mapping, m)
 298  
 299  	// Update memoization tables.
 300  	pm.mappings[mk] = m
 301  	mi := mapInfo{m, 0}
 302  	pm.mappingsByID[src.ID] = mi
 303  	return mi
 304  }
 305  
 306  // key generates encoded strings of Mapping to be used as a key for
 307  // maps.
 308  func (m *Mapping) key() mappingKey {
 309  	// Normalize addresses to handle address space randomization.
 310  	// Round up to next 4K boundary to avoid minor discrepancies.
 311  	const mapsizeRounding = 0x1000
 312  
 313  	size := m.Limit - m.Start
 314  	size = size + mapsizeRounding - 1
 315  	size = size - (size % mapsizeRounding)
 316  	key := mappingKey{
 317  		size:   size,
 318  		offset: m.Offset,
 319  	}
 320  
 321  	switch {
 322  	case m.BuildID != "":
 323  		key.buildIDOrFile = m.BuildID
 324  	case m.File != "":
 325  		key.buildIDOrFile = m.File
 326  	default:
 327  		// A mapping containing neither build ID nor file name is a fake mapping. A
 328  		// key with empty buildIDOrFile is used for fake mappings so that they are
 329  		// treated as the same mapping during merging.
 330  	}
 331  	return key
 332  }
 333  
 334  type mappingKey struct {
 335  	size, offset  uint64
 336  	buildIDOrFile []byte
 337  }
 338  
 339  func (pm *profileMerger) mapLine(src Line) Line {
 340  	ln := Line{
 341  		Function: pm.mapFunction(src.Function),
 342  		Line:     src.Line,
 343  	}
 344  	return ln
 345  }
 346  
 347  func (pm *profileMerger) mapFunction(src *Function) *Function {
 348  	if src == nil {
 349  		return nil
 350  	}
 351  	if f, ok := pm.functionsByID[src.ID]; ok {
 352  		return f
 353  	}
 354  	k := src.key()
 355  	if f, ok := pm.functions[k]; ok {
 356  		pm.functionsByID[src.ID] = f
 357  		return f
 358  	}
 359  	f := &Function{
 360  		ID:         uint64(len(pm.p.Function) + 1),
 361  		Name:       src.Name,
 362  		SystemName: src.SystemName,
 363  		Filename:   src.Filename,
 364  		StartLine:  src.StartLine,
 365  	}
 366  	pm.functions[k] = f
 367  	pm.functionsByID[src.ID] = f
 368  	pm.p.Function = append(pm.p.Function, f)
 369  	return f
 370  }
 371  
 372  // key generates a struct to be used as a key for maps.
 373  func (f *Function) key() functionKey {
 374  	return functionKey{
 375  		f.StartLine,
 376  		f.Name,
 377  		f.SystemName,
 378  		f.Filename,
 379  	}
 380  }
 381  
 382  type functionKey struct {
 383  	startLine                  int64
 384  	name, systemName, fileName []byte
 385  }
 386  
 387  // combineHeaders checks that all profiles can be merged and returns
 388  // their combined profile.
 389  func combineHeaders(srcs []*Profile) (*Profile, error) {
 390  	for _, s := range srcs[1:] {
 391  		if err := srcs[0].compatible(s); err != nil {
 392  			return nil, err
 393  		}
 394  	}
 395  
 396  	var timeNanos, durationNanos, period int64
 397  	var comments [][]byte
 398  	seenComments := map[string]bool{}
 399  	var defaultSampleType []byte
 400  	for _, s := range srcs {
 401  		if timeNanos == 0 || s.TimeNanos < timeNanos {
 402  			timeNanos = s.TimeNanos
 403  		}
 404  		durationNanos += s.DurationNanos
 405  		if period == 0 || period < s.Period {
 406  			period = s.Period
 407  		}
 408  		for _, c := range s.Comments {
 409  			if seen := seenComments[c]; !seen {
 410  				comments = append(comments, c)
 411  				seenComments[c] = true
 412  			}
 413  		}
 414  		if defaultSampleType == "" {
 415  			defaultSampleType = s.DefaultSampleType
 416  		}
 417  	}
 418  
 419  	p := &Profile{
 420  		SampleType: []*ValueType{:len(srcs[0].SampleType)},
 421  
 422  		DropFrames: srcs[0].DropFrames,
 423  		KeepFrames: srcs[0].KeepFrames,
 424  
 425  		TimeNanos:     timeNanos,
 426  		DurationNanos: durationNanos,
 427  		PeriodType:    srcs[0].PeriodType,
 428  		Period:        period,
 429  
 430  		Comments:          comments,
 431  		DefaultSampleType: defaultSampleType,
 432  	}
 433  	copy(p.SampleType, srcs[0].SampleType)
 434  	return p, nil
 435  }
 436  
 437  // compatible determines if two profiles can be compared/merged.
 438  // returns nil if the profiles are compatible; otherwise an error with
 439  // details on the incompatibility.
 440  func (p *Profile) compatible(pb *Profile) error {
 441  	if !equalValueType(p.PeriodType, pb.PeriodType) {
 442  		return fmt.Errorf("incompatible period types %v and %v", p.PeriodType, pb.PeriodType)
 443  	}
 444  
 445  	if len(p.SampleType) != len(pb.SampleType) {
 446  		return fmt.Errorf("incompatible sample types %v and %v", p.SampleType, pb.SampleType)
 447  	}
 448  
 449  	for i := range p.SampleType {
 450  		if !equalValueType(p.SampleType[i], pb.SampleType[i]) {
 451  			return fmt.Errorf("incompatible sample types %v and %v", p.SampleType, pb.SampleType)
 452  		}
 453  	}
 454  	return nil
 455  }
 456  
 457  // equalValueType returns true if the two value types are semantically
 458  // equal. It ignores the internal fields used during encode/decode.
 459  func equalValueType(st1, st2 *ValueType) bool {
 460  	return st1.Type == st2.Type && st1.Unit == st2.Unit
 461  }
 462