merge.go raw

   1  // Copyright 2014 Google Inc. All Rights Reserved.
   2  //
   3  // Licensed under the Apache License, Version 2.0 (the "License");
   4  // you may not use this file except in compliance with the License.
   5  // You may obtain a copy of the License at
   6  //
   7  //     http://www.apache.org/licenses/LICENSE-2.0
   8  //
   9  // Unless required by applicable law or agreed to in writing, software
  10  // distributed under the License is distributed on an "AS IS" BASIS,
  11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12  // See the License for the specific language governing permissions and
  13  // limitations under the License.
  14  
  15  package profile
  16  
  17  import (
  18  	"encoding/binary"
  19  	"fmt"
  20  	"slices"
  21  	"sort"
  22  	"strconv"
  23  	"strings"
  24  )
  25  
  26  // Compact performs garbage collection on a profile to remove any
  27  // unreferenced fields. This is useful to reduce the size of a profile
  28  // after samples or locations have been removed.
  29  func (p *Profile) Compact() *Profile {
  30  	p, _ = Merge([]*Profile{p})
  31  	return p
  32  }
  33  
  34  // Merge merges all the profiles in profs into a single Profile.
  35  // Returns a new profile independent of the input profiles. The merged
  36  // profile is compacted to eliminate unused samples, locations,
  37  // functions and mappings. Profiles must have identical profile sample
  38  // and period types or the merge will fail. profile.Period of the
  39  // resulting profile will be the maximum of all profiles, and
  40  // profile.TimeNanos will be the earliest nonzero one. Merges are
  41  // associative with the caveat of the first profile having some
  42  // specialization in how headers are combined. There may be other
  43  // subtleties now or in the future regarding associativity.
  44  func Merge(srcs []*Profile) (*Profile, error) {
  45  	if len(srcs) == 0 {
  46  		return nil, fmt.Errorf("no profiles to merge")
  47  	}
  48  	p, err := combineHeaders(srcs)
  49  	if err != nil {
  50  		return nil, err
  51  	}
  52  
  53  	pm := &profileMerger{
  54  		p:         p,
  55  		samples:   make(map[sampleKey]*Sample, len(srcs[0].Sample)),
  56  		locations: make(map[locationKey]*Location, len(srcs[0].Location)),
  57  		functions: make(map[functionKey]*Function, len(srcs[0].Function)),
  58  		mappings:  make(map[mappingKey]*Mapping, len(srcs[0].Mapping)),
  59  	}
  60  
  61  	for _, src := range srcs {
  62  		// Clear the profile-specific hash tables
  63  		pm.locationsByID = makeLocationIDMap(len(src.Location))
  64  		pm.functionsByID = make(map[uint64]*Function, len(src.Function))
  65  		pm.mappingsByID = make(map[uint64]mapInfo, len(src.Mapping))
  66  
  67  		if len(pm.mappings) == 0 && len(src.Mapping) > 0 {
  68  			// The Mapping list has the property that the first mapping
  69  			// represents the main binary. Take the first Mapping we see,
  70  			// otherwise the operations below will add mappings in an
  71  			// arbitrary order.
  72  			pm.mapMapping(src.Mapping[0])
  73  		}
  74  
  75  		for _, s := range src.Sample {
  76  			if !isZeroSample(s) {
  77  				pm.mapSample(s)
  78  			}
  79  		}
  80  	}
  81  
  82  	if slices.ContainsFunc(p.Sample, isZeroSample) {
  83  		// If there are any zero samples, re-merge the profile to GC
  84  		// them.
  85  		return Merge([]*Profile{p})
  86  	}
  87  
  88  	return p, nil
  89  }
  90  
  91  // Normalize normalizes the source profile by multiplying each value in profile by the
  92  // ratio of the sum of the base profile's values of that sample type to the sum of the
  93  // source profile's value of that sample type.
  94  func (p *Profile) Normalize(pb *Profile) error {
  95  
  96  	if err := p.compatible(pb); err != nil {
  97  		return err
  98  	}
  99  
 100  	baseVals := make([]int64, len(p.SampleType))
 101  	for _, s := range pb.Sample {
 102  		for i, v := range s.Value {
 103  			baseVals[i] += v
 104  		}
 105  	}
 106  
 107  	srcVals := make([]int64, len(p.SampleType))
 108  	for _, s := range p.Sample {
 109  		for i, v := range s.Value {
 110  			srcVals[i] += v
 111  		}
 112  	}
 113  
 114  	normScale := make([]float64, len(baseVals))
 115  	for i := range baseVals {
 116  		if srcVals[i] == 0 {
 117  			normScale[i] = 0.0
 118  		} else {
 119  			normScale[i] = float64(baseVals[i]) / float64(srcVals[i])
 120  		}
 121  	}
 122  	p.ScaleN(normScale)
 123  	return nil
 124  }
 125  
 126  func isZeroSample(s *Sample) bool {
 127  	for _, v := range s.Value {
 128  		if v != 0 {
 129  			return false
 130  		}
 131  	}
 132  	return true
 133  }
 134  
 135  type profileMerger struct {
 136  	p *Profile
 137  
 138  	// Memoization tables within a profile.
 139  	locationsByID locationIDMap
 140  	functionsByID map[uint64]*Function
 141  	mappingsByID  map[uint64]mapInfo
 142  
 143  	// Memoization tables for profile entities.
 144  	samples   map[sampleKey]*Sample
 145  	locations map[locationKey]*Location
 146  	functions map[functionKey]*Function
 147  	mappings  map[mappingKey]*Mapping
 148  }
 149  
 150  type mapInfo struct {
 151  	m      *Mapping
 152  	offset int64
 153  }
 154  
 155  func (pm *profileMerger) mapSample(src *Sample) *Sample {
 156  	// Check memoization table
 157  	k := pm.sampleKey(src)
 158  	if ss, ok := pm.samples[k]; ok {
 159  		for i, v := range src.Value {
 160  			ss.Value[i] += v
 161  		}
 162  		return ss
 163  	}
 164  
 165  	// Make new sample.
 166  	s := &Sample{
 167  		Location: make([]*Location, len(src.Location)),
 168  		Value:    make([]int64, len(src.Value)),
 169  		Label:    make(map[string][]string, len(src.Label)),
 170  		NumLabel: make(map[string][]int64, len(src.NumLabel)),
 171  		NumUnit:  make(map[string][]string, len(src.NumLabel)),
 172  	}
 173  	for i, l := range src.Location {
 174  		s.Location[i] = pm.mapLocation(l)
 175  	}
 176  	for k, v := range src.Label {
 177  		vv := make([]string, len(v))
 178  		copy(vv, v)
 179  		s.Label[k] = vv
 180  	}
 181  	for k, v := range src.NumLabel {
 182  		u := src.NumUnit[k]
 183  		vv := make([]int64, len(v))
 184  		uu := make([]string, len(u))
 185  		copy(vv, v)
 186  		copy(uu, u)
 187  		s.NumLabel[k] = vv
 188  		s.NumUnit[k] = uu
 189  	}
 190  	copy(s.Value, src.Value)
 191  	pm.samples[k] = s
 192  	pm.p.Sample = append(pm.p.Sample, s)
 193  	return s
 194  }
 195  
 196  func (pm *profileMerger) sampleKey(sample *Sample) sampleKey {
 197  	// Accumulate contents into a string.
 198  	var buf strings.Builder
 199  	buf.Grow(64) // Heuristic to avoid extra allocs
 200  
 201  	// encode a number
 202  	putNumber := func(v uint64) {
 203  		var num [binary.MaxVarintLen64]byte
 204  		n := binary.PutUvarint(num[:], v)
 205  		buf.Write(num[:n])
 206  	}
 207  
 208  	// encode a string prefixed with its length.
 209  	putDelimitedString := func(s string) {
 210  		putNumber(uint64(len(s)))
 211  		buf.WriteString(s)
 212  	}
 213  
 214  	for _, l := range sample.Location {
 215  		// Get the location in the merged profile, which may have a different ID.
 216  		if loc := pm.mapLocation(l); loc != nil {
 217  			putNumber(loc.ID)
 218  		}
 219  	}
 220  	putNumber(0) // Delimiter
 221  
 222  	for _, l := range sortedKeys1(sample.Label) {
 223  		putDelimitedString(l)
 224  		values := sample.Label[l]
 225  		putNumber(uint64(len(values)))
 226  		for _, v := range values {
 227  			putDelimitedString(v)
 228  		}
 229  	}
 230  
 231  	for _, l := range sortedKeys2(sample.NumLabel) {
 232  		putDelimitedString(l)
 233  		values := sample.NumLabel[l]
 234  		putNumber(uint64(len(values)))
 235  		for _, v := range values {
 236  			putNumber(uint64(v))
 237  		}
 238  		units := sample.NumUnit[l]
 239  		putNumber(uint64(len(units)))
 240  		for _, v := range units {
 241  			putDelimitedString(v)
 242  		}
 243  	}
 244  
 245  	return sampleKey(buf.String())
 246  }
 247  
 248  type sampleKey string
 249  
 250  // sortedKeys1 returns the sorted keys found in a string->[]string map.
 251  //
 252  // Note: this is currently non-generic since github pprof runs golint,
 253  // which does not support generics. When that issue is fixed, it can
 254  // be merged with sortedKeys2 and made into a generic function.
 255  func sortedKeys1(m map[string][]string) []string {
 256  	if len(m) == 0 {
 257  		return nil
 258  	}
 259  	keys := make([]string, 0, len(m))
 260  	for k := range m {
 261  		keys = append(keys, k)
 262  	}
 263  	sort.Strings(keys)
 264  	return keys
 265  }
 266  
 267  // sortedKeys2 returns the sorted keys found in a string->[]int64 map.
 268  //
 269  // Note: this is currently non-generic since github pprof runs golint,
 270  // which does not support generics. When that issue is fixed, it can
 271  // be merged with sortedKeys1 and made into a generic function.
 272  func sortedKeys2(m map[string][]int64) []string {
 273  	if len(m) == 0 {
 274  		return nil
 275  	}
 276  	keys := make([]string, 0, len(m))
 277  	for k := range m {
 278  		keys = append(keys, k)
 279  	}
 280  	sort.Strings(keys)
 281  	return keys
 282  }
 283  
 284  func (pm *profileMerger) mapLocation(src *Location) *Location {
 285  	if src == nil {
 286  		return nil
 287  	}
 288  
 289  	if l := pm.locationsByID.get(src.ID); l != nil {
 290  		return l
 291  	}
 292  
 293  	mi := pm.mapMapping(src.Mapping)
 294  	l := &Location{
 295  		ID:       uint64(len(pm.p.Location) + 1),
 296  		Mapping:  mi.m,
 297  		Address:  uint64(int64(src.Address) + mi.offset),
 298  		Line:     make([]Line, len(src.Line)),
 299  		IsFolded: src.IsFolded,
 300  	}
 301  	for i, ln := range src.Line {
 302  		l.Line[i] = pm.mapLine(ln)
 303  	}
 304  	// Check memoization table. Must be done on the remapped location to
 305  	// account for the remapped mapping ID.
 306  	k := l.key()
 307  	if ll, ok := pm.locations[k]; ok {
 308  		pm.locationsByID.set(src.ID, ll)
 309  		return ll
 310  	}
 311  	pm.locationsByID.set(src.ID, l)
 312  	pm.locations[k] = l
 313  	pm.p.Location = append(pm.p.Location, l)
 314  	return l
 315  }
 316  
 317  // key generates locationKey to be used as a key for maps.
 318  func (l *Location) key() locationKey {
 319  	key := locationKey{
 320  		addr:     l.Address,
 321  		isFolded: l.IsFolded,
 322  	}
 323  	if l.Mapping != nil {
 324  		// Normalizes address to handle address space randomization.
 325  		key.addr -= l.Mapping.Start
 326  		key.mappingID = l.Mapping.ID
 327  	}
 328  	lines := make([]string, len(l.Line)*3)
 329  	for i, line := range l.Line {
 330  		if line.Function != nil {
 331  			lines[i*2] = strconv.FormatUint(line.Function.ID, 16)
 332  		}
 333  		lines[i*2+1] = strconv.FormatInt(line.Line, 16)
 334  		lines[i*2+2] = strconv.FormatInt(line.Column, 16)
 335  	}
 336  	key.lines = strings.Join(lines, "|")
 337  	return key
 338  }
 339  
 340  type locationKey struct {
 341  	addr, mappingID uint64
 342  	lines           string
 343  	isFolded        bool
 344  }
 345  
 346  func (pm *profileMerger) mapMapping(src *Mapping) mapInfo {
 347  	if src == nil {
 348  		return mapInfo{}
 349  	}
 350  
 351  	if mi, ok := pm.mappingsByID[src.ID]; ok {
 352  		return mi
 353  	}
 354  
 355  	// Check memoization tables.
 356  	mk := src.key()
 357  	if m, ok := pm.mappings[mk]; ok {
 358  		mi := mapInfo{m, int64(m.Start) - int64(src.Start)}
 359  		pm.mappingsByID[src.ID] = mi
 360  		return mi
 361  	}
 362  	m := &Mapping{
 363  		ID:                     uint64(len(pm.p.Mapping) + 1),
 364  		Start:                  src.Start,
 365  		Limit:                  src.Limit,
 366  		Offset:                 src.Offset,
 367  		File:                   src.File,
 368  		KernelRelocationSymbol: src.KernelRelocationSymbol,
 369  		BuildID:                src.BuildID,
 370  		HasFunctions:           src.HasFunctions,
 371  		HasFilenames:           src.HasFilenames,
 372  		HasLineNumbers:         src.HasLineNumbers,
 373  		HasInlineFrames:        src.HasInlineFrames,
 374  	}
 375  	pm.p.Mapping = append(pm.p.Mapping, m)
 376  
 377  	// Update memoization tables.
 378  	pm.mappings[mk] = m
 379  	mi := mapInfo{m, 0}
 380  	pm.mappingsByID[src.ID] = mi
 381  	return mi
 382  }
 383  
 384  // key generates encoded strings of Mapping to be used as a key for
 385  // maps.
 386  func (m *Mapping) key() mappingKey {
 387  	// Normalize addresses to handle address space randomization.
 388  	// Round up to next 4K boundary to avoid minor discrepancies.
 389  	const mapsizeRounding = 0x1000
 390  
 391  	size := m.Limit - m.Start
 392  	size = size + mapsizeRounding - 1
 393  	size = size - (size % mapsizeRounding)
 394  	key := mappingKey{
 395  		size:   size,
 396  		offset: m.Offset,
 397  	}
 398  
 399  	switch {
 400  	case m.BuildID != "":
 401  		key.buildIDOrFile = m.BuildID
 402  	case m.File != "":
 403  		key.buildIDOrFile = m.File
 404  	default:
 405  		// A mapping containing neither build ID nor file name is a fake mapping. A
 406  		// key with empty buildIDOrFile is used for fake mappings so that they are
 407  		// treated as the same mapping during merging.
 408  	}
 409  	return key
 410  }
 411  
 412  type mappingKey struct {
 413  	size, offset  uint64
 414  	buildIDOrFile string
 415  }
 416  
 417  func (pm *profileMerger) mapLine(src Line) Line {
 418  	ln := Line{
 419  		Function: pm.mapFunction(src.Function),
 420  		Line:     src.Line,
 421  		Column:   src.Column,
 422  	}
 423  	return ln
 424  }
 425  
 426  func (pm *profileMerger) mapFunction(src *Function) *Function {
 427  	if src == nil {
 428  		return nil
 429  	}
 430  	if f, ok := pm.functionsByID[src.ID]; ok {
 431  		return f
 432  	}
 433  	k := src.key()
 434  	if f, ok := pm.functions[k]; ok {
 435  		pm.functionsByID[src.ID] = f
 436  		return f
 437  	}
 438  	f := &Function{
 439  		ID:         uint64(len(pm.p.Function) + 1),
 440  		Name:       src.Name,
 441  		SystemName: src.SystemName,
 442  		Filename:   src.Filename,
 443  		StartLine:  src.StartLine,
 444  	}
 445  	pm.functions[k] = f
 446  	pm.functionsByID[src.ID] = f
 447  	pm.p.Function = append(pm.p.Function, f)
 448  	return f
 449  }
 450  
 451  // key generates a struct to be used as a key for maps.
 452  func (f *Function) key() functionKey {
 453  	return functionKey{
 454  		f.StartLine,
 455  		f.Name,
 456  		f.SystemName,
 457  		f.Filename,
 458  	}
 459  }
 460  
 461  type functionKey struct {
 462  	startLine                  int64
 463  	name, systemName, fileName string
 464  }
 465  
 466  // combineHeaders checks that all profiles can be merged and returns
 467  // their combined profile.
 468  func combineHeaders(srcs []*Profile) (*Profile, error) {
 469  	for _, s := range srcs[1:] {
 470  		if err := srcs[0].compatible(s); err != nil {
 471  			return nil, err
 472  		}
 473  	}
 474  
 475  	var timeNanos, durationNanos, period int64
 476  	var comments []string
 477  	seenComments := map[string]bool{}
 478  	var docURL string
 479  	var defaultSampleType string
 480  	for _, s := range srcs {
 481  		if timeNanos == 0 || s.TimeNanos < timeNanos {
 482  			timeNanos = s.TimeNanos
 483  		}
 484  		durationNanos += s.DurationNanos
 485  		if period == 0 || period < s.Period {
 486  			period = s.Period
 487  		}
 488  		for _, c := range s.Comments {
 489  			if seen := seenComments[c]; !seen {
 490  				comments = append(comments, c)
 491  				seenComments[c] = true
 492  			}
 493  		}
 494  		if defaultSampleType == "" {
 495  			defaultSampleType = s.DefaultSampleType
 496  		}
 497  		if docURL == "" {
 498  			docURL = s.DocURL
 499  		}
 500  	}
 501  
 502  	p := &Profile{
 503  		SampleType: make([]*ValueType, len(srcs[0].SampleType)),
 504  
 505  		DropFrames: srcs[0].DropFrames,
 506  		KeepFrames: srcs[0].KeepFrames,
 507  
 508  		TimeNanos:     timeNanos,
 509  		DurationNanos: durationNanos,
 510  		PeriodType:    srcs[0].PeriodType,
 511  		Period:        period,
 512  
 513  		Comments:          comments,
 514  		DefaultSampleType: defaultSampleType,
 515  		DocURL:            docURL,
 516  	}
 517  	copy(p.SampleType, srcs[0].SampleType)
 518  	return p, nil
 519  }
 520  
 521  // compatible determines if two profiles can be compared/merged.
 522  // returns nil if the profiles are compatible; otherwise an error with
 523  // details on the incompatibility.
 524  func (p *Profile) compatible(pb *Profile) error {
 525  	if !equalValueType(p.PeriodType, pb.PeriodType) {
 526  		return fmt.Errorf("incompatible period types %v and %v", p.PeriodType, pb.PeriodType)
 527  	}
 528  
 529  	if len(p.SampleType) != len(pb.SampleType) {
 530  		return fmt.Errorf("incompatible sample types %v and %v", p.SampleType, pb.SampleType)
 531  	}
 532  
 533  	for i := range p.SampleType {
 534  		if !equalValueType(p.SampleType[i], pb.SampleType[i]) {
 535  			return fmt.Errorf("incompatible sample types %v and %v", p.SampleType, pb.SampleType)
 536  		}
 537  	}
 538  	return nil
 539  }
 540  
 541  // equalValueType returns true if the two value types are semantically
 542  // equal. It ignores the internal fields used during encode/decode.
 543  func equalValueType(st1, st2 *ValueType) bool {
 544  	return st1.Type == st2.Type && st1.Unit == st2.Unit
 545  }
 546  
 547  // locationIDMap is like a map[uint64]*Location, but provides efficiency for
 548  // ids that are densely numbered, which is often the case.
 549  type locationIDMap struct {
 550  	dense  []*Location          // indexed by id for id < len(dense)
 551  	sparse map[uint64]*Location // indexed by id for id >= len(dense)
 552  }
 553  
 554  func makeLocationIDMap(n int) locationIDMap {
 555  	return locationIDMap{
 556  		dense:  make([]*Location, n),
 557  		sparse: map[uint64]*Location{},
 558  	}
 559  }
 560  
 561  func (lm locationIDMap) get(id uint64) *Location {
 562  	if id < uint64(len(lm.dense)) {
 563  		return lm.dense[int(id)]
 564  	}
 565  	return lm.sparse[id]
 566  }
 567  
 568  func (lm locationIDMap) set(id uint64, loc *Location) {
 569  	if id < uint64(len(lm.dense)) {
 570  		lm.dense[id] = loc
 571  		return
 572  	}
 573  	lm.sparse[id] = loc
 574  }
 575  
 576  // CompatibilizeSampleTypes makes profiles compatible to be compared/merged. It
 577  // keeps sample types that appear in all profiles only and drops/reorders the
 578  // sample types as necessary.
 579  //
 580  // In the case of sample types order is not the same for given profiles the
 581  // order is derived from the first profile.
 582  //
 583  // Profiles are modified in-place.
 584  //
 585  // It returns an error if the sample type's intersection is empty.
 586  func CompatibilizeSampleTypes(ps []*Profile) error {
 587  	sTypes := commonSampleTypes(ps)
 588  	if len(sTypes) == 0 {
 589  		return fmt.Errorf("profiles have empty common sample type list")
 590  	}
 591  	for _, p := range ps {
 592  		if err := compatibilizeSampleTypes(p, sTypes); err != nil {
 593  			return err
 594  		}
 595  	}
 596  	return nil
 597  }
 598  
 599  // commonSampleTypes returns sample types that appear in all profiles in the
 600  // order how they ordered in the first profile.
 601  func commonSampleTypes(ps []*Profile) []string {
 602  	if len(ps) == 0 {
 603  		return nil
 604  	}
 605  	sTypes := map[string]int{}
 606  	for _, p := range ps {
 607  		for _, st := range p.SampleType {
 608  			sTypes[st.Type]++
 609  		}
 610  	}
 611  	var res []string
 612  	for _, st := range ps[0].SampleType {
 613  		if sTypes[st.Type] == len(ps) {
 614  			res = append(res, st.Type)
 615  		}
 616  	}
 617  	return res
 618  }
 619  
 620  // compatibilizeSampleTypes drops sample types that are not present in sTypes
 621  // list and reorder them if needed.
 622  //
 623  // It sets DefaultSampleType to sType[0] if it is not in sType list.
 624  //
 625  // It assumes that all sample types from the sTypes list are present in the
 626  // given profile otherwise it returns an error.
 627  func compatibilizeSampleTypes(p *Profile, sTypes []string) error {
 628  	if len(sTypes) == 0 {
 629  		return fmt.Errorf("sample type list is empty")
 630  	}
 631  	defaultSampleType := sTypes[0]
 632  	reMap, needToModify := make([]int, len(sTypes)), false
 633  	for i, st := range sTypes {
 634  		if st == p.DefaultSampleType {
 635  			defaultSampleType = p.DefaultSampleType
 636  		}
 637  		idx := searchValueType(p.SampleType, st)
 638  		if idx < 0 {
 639  			return fmt.Errorf("%q sample type is not found in profile", st)
 640  		}
 641  		reMap[i] = idx
 642  		if idx != i {
 643  			needToModify = true
 644  		}
 645  	}
 646  	if !needToModify && len(sTypes) == len(p.SampleType) {
 647  		return nil
 648  	}
 649  	p.DefaultSampleType = defaultSampleType
 650  	oldSampleTypes := p.SampleType
 651  	p.SampleType = make([]*ValueType, len(sTypes))
 652  	for i, idx := range reMap {
 653  		p.SampleType[i] = oldSampleTypes[idx]
 654  	}
 655  	values := make([]int64, len(sTypes))
 656  	for _, s := range p.Sample {
 657  		for i, idx := range reMap {
 658  			values[i] = s.Value[idx]
 659  		}
 660  		s.Value = s.Value[:len(values)]
 661  		copy(s.Value, values)
 662  	}
 663  	return nil
 664  }
 665  
 666  func searchValueType(vts []*ValueType, s string) int {
 667  	for i, vt := range vts {
 668  		if vt.Type == s {
 669  			return i
 670  		}
 671  	}
 672  	return -1
 673  }
 674