legacy_profile.go raw

   1  // Copyright 2014 Google Inc. All Rights Reserved.
   2  //
   3  // Licensed under the Apache License, Version 2.0 (the "License");
   4  // you may not use this file except in compliance with the License.
   5  // You may obtain a copy of the License at
   6  //
   7  //     http://www.apache.org/licenses/LICENSE-2.0
   8  //
   9  // Unless required by applicable law or agreed to in writing, software
  10  // distributed under the License is distributed on an "AS IS" BASIS,
  11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12  // See the License for the specific language governing permissions and
  13  // limitations under the License.
  14  
  15  // This file implements parsers to convert legacy profiles into the
  16  // profile.proto format.
  17  
  18  package profile
  19  
  20  import (
  21  	"bufio"
  22  	"bytes"
  23  	"fmt"
  24  	"io"
  25  	"math"
  26  	"regexp"
  27  	"strconv"
  28  	"strings"
  29  )
  30  
  31  var (
  32  	countStartRE = regexp.MustCompile(`\A(\S+) profile: total \d+\z`)
  33  	countRE      = regexp.MustCompile(`\A(\d+) @(( 0x[0-9a-f]+)+)\z`)
  34  
  35  	heapHeaderRE = regexp.MustCompile(`heap profile: *(\d+): *(\d+) *\[ *(\d+): *(\d+) *\] *@ *(heap[_a-z0-9]*)/?(\d*)`)
  36  	heapSampleRE = regexp.MustCompile(`(-?\d+): *(-?\d+) *\[ *(\d+): *(\d+) *] @([ x0-9a-f]*)`)
  37  
  38  	contentionSampleRE = regexp.MustCompile(`(\d+) *(\d+) @([ x0-9a-f]*)`)
  39  
  40  	hexNumberRE = regexp.MustCompile(`0x[0-9a-f]+`)
  41  
  42  	growthHeaderRE = regexp.MustCompile(`heap profile: *(\d+): *(\d+) *\[ *(\d+): *(\d+) *\] @ growthz?`)
  43  
  44  	fragmentationHeaderRE = regexp.MustCompile(`heap profile: *(\d+): *(\d+) *\[ *(\d+): *(\d+) *\] @ fragmentationz?`)
  45  
  46  	threadzStartRE = regexp.MustCompile(`--- threadz \d+ ---`)
  47  	threadStartRE  = regexp.MustCompile(`--- Thread ([[:xdigit:]]+) \(name: (.*)/(\d+)\) stack: ---`)
  48  
  49  	// Regular expressions to parse process mappings. Support the format used by Linux /proc/.../maps and other tools.
  50  	// Recommended format:
  51  	// Start   End     object file name     offset(optional)   linker build id
  52  	// 0x40000-0x80000 /path/to/binary      (@FF00)            abc123456
  53  	spaceDigits = `\s+[[:digit:]]+`
  54  	hexPair     = `\s+[[:xdigit:]]+:[[:xdigit:]]+`
  55  	oSpace      = `\s*`
  56  	// Capturing expressions.
  57  	cHex           = `(?:0x)?([[:xdigit:]]+)`
  58  	cHexRange      = `\s*` + cHex + `[\s-]?` + oSpace + cHex + `:?`
  59  	cSpaceString   = `(?:\s+(\S+))?`
  60  	cSpaceHex      = `(?:\s+([[:xdigit:]]+))?`
  61  	cSpaceAtOffset = `(?:\s+\(@([[:xdigit:]]+)\))?`
  62  	cPerm          = `(?:\s+([-rwxp]+))?`
  63  
  64  	procMapsRE  = regexp.MustCompile(`^` + cHexRange + cPerm + cSpaceHex + hexPair + spaceDigits + cSpaceString)
  65  	briefMapsRE = regexp.MustCompile(`^` + cHexRange + cPerm + cSpaceString + cSpaceAtOffset + cSpaceHex)
  66  
  67  	// Regular expression to parse log data, of the form:
  68  	// ... file:line] msg...
  69  	logInfoRE = regexp.MustCompile(`^[^\[\]]+:[0-9]+]\s`)
  70  )
  71  
  72  func isSpaceOrComment(line string) bool {
  73  	trimmed := strings.TrimSpace(line)
  74  	return len(trimmed) == 0 || trimmed[0] == '#'
  75  }
  76  
  77  // parseGoCount parses a Go count profile (e.g., threadcreate or
  78  // goroutine) and returns a new Profile.
  79  func parseGoCount(b []byte) (*Profile, error) {
  80  	s := bufio.NewScanner(bytes.NewBuffer(b))
  81  	// Skip comments at the beginning of the file.
  82  	for s.Scan() && isSpaceOrComment(s.Text()) {
  83  	}
  84  	if err := s.Err(); err != nil {
  85  		return nil, err
  86  	}
  87  	m := countStartRE.FindStringSubmatch(s.Text())
  88  	if m == nil {
  89  		return nil, errUnrecognized
  90  	}
  91  	profileType := m[1]
  92  	p := &Profile{
  93  		PeriodType: &ValueType{Type: profileType, Unit: "count"},
  94  		Period:     1,
  95  		SampleType: []*ValueType{{Type: profileType, Unit: "count"}},
  96  	}
  97  	locations := make(map[uint64]*Location)
  98  	for s.Scan() {
  99  		line := s.Text()
 100  		if isSpaceOrComment(line) {
 101  			continue
 102  		}
 103  		if strings.HasPrefix(line, "---") {
 104  			break
 105  		}
 106  		m := countRE.FindStringSubmatch(line)
 107  		if m == nil {
 108  			return nil, errMalformed
 109  		}
 110  		n, err := strconv.ParseInt(m[1], 0, 64)
 111  		if err != nil {
 112  			return nil, errMalformed
 113  		}
 114  		fields := strings.Fields(m[2])
 115  		locs := make([]*Location, 0, len(fields))
 116  		for _, stk := range fields {
 117  			addr, err := strconv.ParseUint(stk, 0, 64)
 118  			if err != nil {
 119  				return nil, errMalformed
 120  			}
 121  			// Adjust all frames by -1 to land on top of the call instruction.
 122  			addr--
 123  			loc := locations[addr]
 124  			if loc == nil {
 125  				loc = &Location{
 126  					Address: addr,
 127  				}
 128  				locations[addr] = loc
 129  				p.Location = append(p.Location, loc)
 130  			}
 131  			locs = append(locs, loc)
 132  		}
 133  		p.Sample = append(p.Sample, &Sample{
 134  			Location: locs,
 135  			Value:    []int64{n},
 136  		})
 137  	}
 138  	if err := s.Err(); err != nil {
 139  		return nil, err
 140  	}
 141  
 142  	if err := parseAdditionalSections(s, p); err != nil {
 143  		return nil, err
 144  	}
 145  	return p, nil
 146  }
 147  
 148  // remapLocationIDs ensures there is a location for each address
 149  // referenced by a sample, and remaps the samples to point to the new
 150  // location ids.
 151  func (p *Profile) remapLocationIDs() {
 152  	seen := make(map[*Location]bool, len(p.Location))
 153  	var locs []*Location
 154  
 155  	for _, s := range p.Sample {
 156  		for _, l := range s.Location {
 157  			if seen[l] {
 158  				continue
 159  			}
 160  			l.ID = uint64(len(locs) + 1)
 161  			locs = append(locs, l)
 162  			seen[l] = true
 163  		}
 164  	}
 165  	p.Location = locs
 166  }
 167  
 168  func (p *Profile) remapFunctionIDs() {
 169  	seen := make(map[*Function]bool, len(p.Function))
 170  	var fns []*Function
 171  
 172  	for _, l := range p.Location {
 173  		for _, ln := range l.Line {
 174  			fn := ln.Function
 175  			if fn == nil || seen[fn] {
 176  				continue
 177  			}
 178  			fn.ID = uint64(len(fns) + 1)
 179  			fns = append(fns, fn)
 180  			seen[fn] = true
 181  		}
 182  	}
 183  	p.Function = fns
 184  }
 185  
 186  // remapMappingIDs matches location addresses with existing mappings
 187  // and updates them appropriately. This is O(N*M), if this ever shows
 188  // up as a bottleneck, evaluate sorting the mappings and doing a
 189  // binary search, which would make it O(N*log(M)).
 190  func (p *Profile) remapMappingIDs() {
 191  	// Some profile handlers will incorrectly set regions for the main
 192  	// executable if its section is remapped. Fix them through heuristics.
 193  
 194  	if len(p.Mapping) > 0 {
 195  		// Remove the initial mapping if named '/anon_hugepage' and has a
 196  		// consecutive adjacent mapping.
 197  		if m := p.Mapping[0]; strings.HasPrefix(m.File, "/anon_hugepage") {
 198  			if len(p.Mapping) > 1 && m.Limit == p.Mapping[1].Start {
 199  				p.Mapping = p.Mapping[1:]
 200  			}
 201  		}
 202  	}
 203  
 204  	// Subtract the offset from the start of the main mapping if it
 205  	// ends up at a recognizable start address.
 206  	if len(p.Mapping) > 0 {
 207  		const expectedStart = 0x400000
 208  		if m := p.Mapping[0]; m.Start-m.Offset == expectedStart {
 209  			m.Start = expectedStart
 210  			m.Offset = 0
 211  		}
 212  	}
 213  
 214  	// Associate each location with an address to the corresponding
 215  	// mapping. Create fake mapping if a suitable one isn't found.
 216  	var fake *Mapping
 217  nextLocation:
 218  	for _, l := range p.Location {
 219  		a := l.Address
 220  		if l.Mapping != nil || a == 0 {
 221  			continue
 222  		}
 223  		for _, m := range p.Mapping {
 224  			if m.Start <= a && a < m.Limit {
 225  				l.Mapping = m
 226  				continue nextLocation
 227  			}
 228  		}
 229  		// Work around legacy handlers failing to encode the first
 230  		// part of mappings split into adjacent ranges.
 231  		for _, m := range p.Mapping {
 232  			if m.Offset != 0 && m.Start-m.Offset <= a && a < m.Start {
 233  				m.Start -= m.Offset
 234  				m.Offset = 0
 235  				l.Mapping = m
 236  				continue nextLocation
 237  			}
 238  		}
 239  		// If there is still no mapping, create a fake one.
 240  		// This is important for the Go legacy handler, which produced
 241  		// no mappings.
 242  		if fake == nil {
 243  			fake = &Mapping{
 244  				ID:    1,
 245  				Limit: ^uint64(0),
 246  			}
 247  			p.Mapping = append(p.Mapping, fake)
 248  		}
 249  		l.Mapping = fake
 250  	}
 251  
 252  	// Reset all mapping IDs.
 253  	for i, m := range p.Mapping {
 254  		m.ID = uint64(i + 1)
 255  	}
 256  }
 257  
 258  var cpuInts = []func([]byte) (uint64, []byte){
 259  	get32l,
 260  	get32b,
 261  	get64l,
 262  	get64b,
 263  }
 264  
 265  func get32l(b []byte) (uint64, []byte) {
 266  	if len(b) < 4 {
 267  		return 0, nil
 268  	}
 269  	return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24, b[4:]
 270  }
 271  
 272  func get32b(b []byte) (uint64, []byte) {
 273  	if len(b) < 4 {
 274  		return 0, nil
 275  	}
 276  	return uint64(b[3]) | uint64(b[2])<<8 | uint64(b[1])<<16 | uint64(b[0])<<24, b[4:]
 277  }
 278  
 279  func get64l(b []byte) (uint64, []byte) {
 280  	if len(b) < 8 {
 281  		return 0, nil
 282  	}
 283  	return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 | uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56, b[8:]
 284  }
 285  
 286  func get64b(b []byte) (uint64, []byte) {
 287  	if len(b) < 8 {
 288  		return 0, nil
 289  	}
 290  	return uint64(b[7]) | uint64(b[6])<<8 | uint64(b[5])<<16 | uint64(b[4])<<24 | uint64(b[3])<<32 | uint64(b[2])<<40 | uint64(b[1])<<48 | uint64(b[0])<<56, b[8:]
 291  }
 292  
 293  // parseCPU parses a profilez legacy profile and returns a newly
 294  // populated Profile.
 295  //
 296  // The general format for profilez samples is a sequence of words in
 297  // binary format. The first words are a header with the following data:
 298  //
 299  //	1st word -- 0
 300  //	2nd word -- 3
 301  //	3rd word -- 0 if a c++ application, 1 if a java application.
 302  //	4th word -- Sampling period (in microseconds).
 303  //	5th word -- Padding.
 304  func parseCPU(b []byte) (*Profile, error) {
 305  	var parse func([]byte) (uint64, []byte)
 306  	var n1, n2, n3, n4, n5 uint64
 307  	for _, parse = range cpuInts {
 308  		var tmp []byte
 309  		n1, tmp = parse(b)
 310  		n2, tmp = parse(tmp)
 311  		n3, tmp = parse(tmp)
 312  		n4, tmp = parse(tmp)
 313  		n5, tmp = parse(tmp)
 314  
 315  		if tmp != nil && n1 == 0 && n2 == 3 && n3 == 0 && n4 > 0 && n5 == 0 {
 316  			b = tmp
 317  			return cpuProfile(b, int64(n4), parse)
 318  		}
 319  		if tmp != nil && n1 == 0 && n2 == 3 && n3 == 1 && n4 > 0 && n5 == 0 {
 320  			b = tmp
 321  			return javaCPUProfile(b, int64(n4), parse)
 322  		}
 323  	}
 324  	return nil, errUnrecognized
 325  }
 326  
 327  // cpuProfile returns a new Profile from C++ profilez data.
 328  // b is the profile bytes after the header, period is the profiling
 329  // period, and parse is a function to parse 8-byte chunks from the
 330  // profile in its native endianness.
 331  func cpuProfile(b []byte, period int64, parse func(b []byte) (uint64, []byte)) (*Profile, error) {
 332  	p := &Profile{
 333  		Period:     period * 1000,
 334  		PeriodType: &ValueType{Type: "cpu", Unit: "nanoseconds"},
 335  		SampleType: []*ValueType{
 336  			{Type: "samples", Unit: "count"},
 337  			{Type: "cpu", Unit: "nanoseconds"},
 338  		},
 339  	}
 340  	var err error
 341  	if b, _, err = parseCPUSamples(b, parse, true, p); err != nil {
 342  		return nil, err
 343  	}
 344  
 345  	// If *most* samples have the same second-to-the-bottom frame, it
 346  	// strongly suggests that it is an uninteresting artifact of
 347  	// measurement -- a stack frame pushed by the signal handler. The
 348  	// bottom frame is always correct as it is picked up from the signal
 349  	// structure, not the stack. Check if this is the case and if so,
 350  	// remove.
 351  
 352  	// Remove up to two frames.
 353  	maxiter := 2
 354  	// Allow one different sample for this many samples with the same
 355  	// second-to-last frame.
 356  	similarSamples := 32
 357  	margin := len(p.Sample) / similarSamples
 358  
 359  	for iter := 0; iter < maxiter; iter++ {
 360  		addr1 := make(map[uint64]int)
 361  		for _, s := range p.Sample {
 362  			if len(s.Location) > 1 {
 363  				a := s.Location[1].Address
 364  				addr1[a] = addr1[a] + 1
 365  			}
 366  		}
 367  
 368  		for id1, count := range addr1 {
 369  			if count >= len(p.Sample)-margin {
 370  				// Found uninteresting frame, strip it out from all samples
 371  				for _, s := range p.Sample {
 372  					if len(s.Location) > 1 && s.Location[1].Address == id1 {
 373  						s.Location = append(s.Location[:1], s.Location[2:]...)
 374  					}
 375  				}
 376  				break
 377  			}
 378  		}
 379  	}
 380  
 381  	if err := p.ParseMemoryMap(bytes.NewBuffer(b)); err != nil {
 382  		return nil, err
 383  	}
 384  
 385  	cleanupDuplicateLocations(p)
 386  	return p, nil
 387  }
 388  
 389  func cleanupDuplicateLocations(p *Profile) {
 390  	// The profile handler may duplicate the leaf frame, because it gets
 391  	// its address both from stack unwinding and from the signal
 392  	// context. Detect this and delete the duplicate, which has been
 393  	// adjusted by -1. The leaf address should not be adjusted as it is
 394  	// not a call.
 395  	for _, s := range p.Sample {
 396  		if len(s.Location) > 1 && s.Location[0].Address == s.Location[1].Address+1 {
 397  			s.Location = append(s.Location[:1], s.Location[2:]...)
 398  		}
 399  	}
 400  }
 401  
 402  // parseCPUSamples parses a collection of profilez samples from a
 403  // profile.
 404  //
 405  // profilez samples are a repeated sequence of stack frames of the
 406  // form:
 407  //
 408  //	1st word -- The number of times this stack was encountered.
 409  //	2nd word -- The size of the stack (StackSize).
 410  //	3rd word -- The first address on the stack.
 411  //	...
 412  //	StackSize + 2 -- The last address on the stack
 413  //
 414  // The last stack trace is of the form:
 415  //
 416  //	1st word -- 0
 417  //	2nd word -- 1
 418  //	3rd word -- 0
 419  //
 420  // Addresses from stack traces may point to the next instruction after
 421  // each call. Optionally adjust by -1 to land somewhere on the actual
 422  // call (except for the leaf, which is not a call).
 423  func parseCPUSamples(b []byte, parse func(b []byte) (uint64, []byte), adjust bool, p *Profile) ([]byte, map[uint64]*Location, error) {
 424  	locs := make(map[uint64]*Location)
 425  	for len(b) > 0 {
 426  		var count, nstk uint64
 427  		count, b = parse(b)
 428  		nstk, b = parse(b)
 429  		if b == nil || nstk > uint64(len(b)/4) {
 430  			return nil, nil, errUnrecognized
 431  		}
 432  		var sloc []*Location
 433  		addrs := make([]uint64, nstk)
 434  		for i := 0; i < int(nstk); i++ {
 435  			addrs[i], b = parse(b)
 436  		}
 437  
 438  		if count == 0 && nstk == 1 && addrs[0] == 0 {
 439  			// End of data marker
 440  			break
 441  		}
 442  		for i, addr := range addrs {
 443  			if adjust && i > 0 {
 444  				addr--
 445  			}
 446  			loc := locs[addr]
 447  			if loc == nil {
 448  				loc = &Location{
 449  					Address: addr,
 450  				}
 451  				locs[addr] = loc
 452  				p.Location = append(p.Location, loc)
 453  			}
 454  			sloc = append(sloc, loc)
 455  		}
 456  		p.Sample = append(p.Sample,
 457  			&Sample{
 458  				Value:    []int64{int64(count), int64(count) * p.Period},
 459  				Location: sloc,
 460  			})
 461  	}
 462  	// Reached the end without finding the EOD marker.
 463  	return b, locs, nil
 464  }
 465  
 466  // parseHeap parses a heapz legacy or a growthz profile and
 467  // returns a newly populated Profile.
 468  func parseHeap(b []byte) (p *Profile, err error) {
 469  	s := bufio.NewScanner(bytes.NewBuffer(b))
 470  	if !s.Scan() {
 471  		if err := s.Err(); err != nil {
 472  			return nil, err
 473  		}
 474  		return nil, errUnrecognized
 475  	}
 476  	p = &Profile{}
 477  
 478  	sampling := ""
 479  	hasAlloc := false
 480  
 481  	line := s.Text()
 482  	p.PeriodType = &ValueType{Type: "space", Unit: "bytes"}
 483  	if header := heapHeaderRE.FindStringSubmatch(line); header != nil {
 484  		sampling, p.Period, hasAlloc, err = parseHeapHeader(line)
 485  		if err != nil {
 486  			return nil, err
 487  		}
 488  	} else if header = growthHeaderRE.FindStringSubmatch(line); header != nil {
 489  		p.Period = 1
 490  	} else if header = fragmentationHeaderRE.FindStringSubmatch(line); header != nil {
 491  		p.Period = 1
 492  	} else {
 493  		return nil, errUnrecognized
 494  	}
 495  
 496  	if hasAlloc {
 497  		// Put alloc before inuse so that default pprof selection
 498  		// will prefer inuse_space.
 499  		p.SampleType = []*ValueType{
 500  			{Type: "alloc_objects", Unit: "count"},
 501  			{Type: "alloc_space", Unit: "bytes"},
 502  			{Type: "inuse_objects", Unit: "count"},
 503  			{Type: "inuse_space", Unit: "bytes"},
 504  		}
 505  	} else {
 506  		p.SampleType = []*ValueType{
 507  			{Type: "objects", Unit: "count"},
 508  			{Type: "space", Unit: "bytes"},
 509  		}
 510  	}
 511  
 512  	locs := make(map[uint64]*Location)
 513  	for s.Scan() {
 514  		line := strings.TrimSpace(s.Text())
 515  
 516  		if isSpaceOrComment(line) {
 517  			continue
 518  		}
 519  
 520  		if isMemoryMapSentinel(line) {
 521  			break
 522  		}
 523  
 524  		value, blocksize, addrs, err := parseHeapSample(line, p.Period, sampling, hasAlloc)
 525  		if err != nil {
 526  			return nil, err
 527  		}
 528  
 529  		var sloc []*Location
 530  		for _, addr := range addrs {
 531  			// Addresses from stack traces point to the next instruction after
 532  			// each call. Adjust by -1 to land somewhere on the actual call.
 533  			addr--
 534  			loc := locs[addr]
 535  			if locs[addr] == nil {
 536  				loc = &Location{
 537  					Address: addr,
 538  				}
 539  				p.Location = append(p.Location, loc)
 540  				locs[addr] = loc
 541  			}
 542  			sloc = append(sloc, loc)
 543  		}
 544  
 545  		p.Sample = append(p.Sample, &Sample{
 546  			Value:    value,
 547  			Location: sloc,
 548  			NumLabel: map[string][]int64{"bytes": {blocksize}},
 549  		})
 550  	}
 551  	if err := s.Err(); err != nil {
 552  		return nil, err
 553  	}
 554  	if err := parseAdditionalSections(s, p); err != nil {
 555  		return nil, err
 556  	}
 557  	return p, nil
 558  }
 559  
 560  func parseHeapHeader(line string) (sampling string, period int64, hasAlloc bool, err error) {
 561  	header := heapHeaderRE.FindStringSubmatch(line)
 562  	if header == nil {
 563  		return "", 0, false, errUnrecognized
 564  	}
 565  
 566  	if len(header[6]) > 0 {
 567  		if period, err = strconv.ParseInt(header[6], 10, 64); err != nil {
 568  			return "", 0, false, errUnrecognized
 569  		}
 570  	}
 571  
 572  	if (header[3] != header[1] && header[3] != "0") || (header[4] != header[2] && header[4] != "0") {
 573  		hasAlloc = true
 574  	}
 575  
 576  	switch header[5] {
 577  	case "heapz_v2", "heap_v2":
 578  		return "v2", period, hasAlloc, nil
 579  	case "heapprofile":
 580  		return "", 1, hasAlloc, nil
 581  	case "heap":
 582  		return "v2", period / 2, hasAlloc, nil
 583  	default:
 584  		return "", 0, false, errUnrecognized
 585  	}
 586  }
 587  
 588  // parseHeapSample parses a single row from a heap profile into a new Sample.
 589  func parseHeapSample(line string, rate int64, sampling string, includeAlloc bool) (value []int64, blocksize int64, addrs []uint64, err error) {
 590  	sampleData := heapSampleRE.FindStringSubmatch(line)
 591  	if len(sampleData) != 6 {
 592  		return nil, 0, nil, fmt.Errorf("unexpected number of sample values: got %d, want 6", len(sampleData))
 593  	}
 594  
 595  	// This is a local-scoped helper function to avoid needing to pass
 596  	// around rate, sampling and many return parameters.
 597  	addValues := func(countString, sizeString string, label string) error {
 598  		count, err := strconv.ParseInt(countString, 10, 64)
 599  		if err != nil {
 600  			return fmt.Errorf("malformed sample: %s: %v", line, err)
 601  		}
 602  		size, err := strconv.ParseInt(sizeString, 10, 64)
 603  		if err != nil {
 604  			return fmt.Errorf("malformed sample: %s: %v", line, err)
 605  		}
 606  		if count == 0 && size != 0 {
 607  			return fmt.Errorf("%s count was 0 but %s bytes was %d", label, label, size)
 608  		}
 609  		if count != 0 {
 610  			blocksize = size / count
 611  			if sampling == "v2" {
 612  				count, size = scaleHeapSample(count, size, rate)
 613  			}
 614  		}
 615  		value = append(value, count, size)
 616  		return nil
 617  	}
 618  
 619  	if includeAlloc {
 620  		if err := addValues(sampleData[3], sampleData[4], "allocation"); err != nil {
 621  			return nil, 0, nil, err
 622  		}
 623  	}
 624  
 625  	if err := addValues(sampleData[1], sampleData[2], "inuse"); err != nil {
 626  		return nil, 0, nil, err
 627  	}
 628  
 629  	addrs, err = parseHexAddresses(sampleData[5])
 630  	if err != nil {
 631  		return nil, 0, nil, fmt.Errorf("malformed sample: %s: %v", line, err)
 632  	}
 633  
 634  	return value, blocksize, addrs, nil
 635  }
 636  
 637  // parseHexAddresses extracts hex numbers from a string, attempts to convert
 638  // each to an unsigned 64-bit number and returns the resulting numbers as a
 639  // slice, or an error if the string contains hex numbers which are too large to
 640  // handle (which means a malformed profile).
 641  func parseHexAddresses(s string) ([]uint64, error) {
 642  	hexStrings := hexNumberRE.FindAllString(s, -1)
 643  	var addrs []uint64
 644  	for _, s := range hexStrings {
 645  		if addr, err := strconv.ParseUint(s, 0, 64); err == nil {
 646  			addrs = append(addrs, addr)
 647  		} else {
 648  			return nil, fmt.Errorf("failed to parse as hex 64-bit number: %s", s)
 649  		}
 650  	}
 651  	return addrs, nil
 652  }
 653  
 654  // scaleHeapSample adjusts the data from a heapz Sample to
 655  // account for its probability of appearing in the collected
 656  // data. heapz profiles are a sampling of the memory allocations
 657  // requests in a program. We estimate the unsampled value by dividing
 658  // each collected sample by its probability of appearing in the
 659  // profile. heapz v2 profiles rely on a poisson process to determine
 660  // which samples to collect, based on the desired average collection
 661  // rate R. The probability of a sample of size S to appear in that
 662  // profile is 1-exp(-S/R).
 663  func scaleHeapSample(count, size, rate int64) (int64, int64) {
 664  	if count == 0 || size == 0 {
 665  		return 0, 0
 666  	}
 667  
 668  	if rate <= 1 {
 669  		// if rate==1 all samples were collected so no adjustment is needed.
 670  		// if rate<1 treat as unknown and skip scaling.
 671  		return count, size
 672  	}
 673  
 674  	avgSize := float64(size) / float64(count)
 675  	scale := 1 / (1 - math.Exp(-avgSize/float64(rate)))
 676  
 677  	return int64(float64(count) * scale), int64(float64(size) * scale)
 678  }
 679  
 680  // parseContention parses a mutex or contention profile. There are 2 cases:
 681  // "--- contentionz " for legacy C++ profiles (and backwards compatibility)
 682  // "--- mutex:" or "--- contention:" for profiles generated by the Go runtime.
 683  func parseContention(b []byte) (*Profile, error) {
 684  	s := bufio.NewScanner(bytes.NewBuffer(b))
 685  	if !s.Scan() {
 686  		if err := s.Err(); err != nil {
 687  			return nil, err
 688  		}
 689  		return nil, errUnrecognized
 690  	}
 691  
 692  	switch l := s.Text(); {
 693  	case strings.HasPrefix(l, "--- contentionz "):
 694  	case strings.HasPrefix(l, "--- mutex:"):
 695  	case strings.HasPrefix(l, "--- contention:"):
 696  	default:
 697  		return nil, errUnrecognized
 698  	}
 699  
 700  	p := &Profile{
 701  		PeriodType: &ValueType{Type: "contentions", Unit: "count"},
 702  		Period:     1,
 703  		SampleType: []*ValueType{
 704  			{Type: "contentions", Unit: "count"},
 705  			{Type: "delay", Unit: "nanoseconds"},
 706  		},
 707  	}
 708  
 709  	var cpuHz int64
 710  	// Parse text of the form "attribute = value" before the samples.
 711  	const delimiter = "="
 712  	for s.Scan() {
 713  		line := s.Text()
 714  		if line = strings.TrimSpace(line); isSpaceOrComment(line) {
 715  			continue
 716  		}
 717  		if strings.HasPrefix(line, "---") {
 718  			break
 719  		}
 720  		attr := strings.SplitN(line, delimiter, 2)
 721  		if len(attr) != 2 {
 722  			break
 723  		}
 724  		key, val := strings.TrimSpace(attr[0]), strings.TrimSpace(attr[1])
 725  		var err error
 726  		switch key {
 727  		case "cycles/second":
 728  			if cpuHz, err = strconv.ParseInt(val, 0, 64); err != nil {
 729  				return nil, errUnrecognized
 730  			}
 731  		case "sampling period":
 732  			if p.Period, err = strconv.ParseInt(val, 0, 64); err != nil {
 733  				return nil, errUnrecognized
 734  			}
 735  		case "ms since reset":
 736  			ms, err := strconv.ParseInt(val, 0, 64)
 737  			if err != nil {
 738  				return nil, errUnrecognized
 739  			}
 740  			p.DurationNanos = ms * 1000 * 1000
 741  		case "format":
 742  			// CPP contentionz profiles don't have format.
 743  			return nil, errUnrecognized
 744  		case "resolution":
 745  			// CPP contentionz profiles don't have resolution.
 746  			return nil, errUnrecognized
 747  		case "discarded samples":
 748  		default:
 749  			return nil, errUnrecognized
 750  		}
 751  	}
 752  	if err := s.Err(); err != nil {
 753  		return nil, err
 754  	}
 755  
 756  	locs := make(map[uint64]*Location)
 757  	for {
 758  		line := strings.TrimSpace(s.Text())
 759  		if strings.HasPrefix(line, "---") {
 760  			break
 761  		}
 762  		if !isSpaceOrComment(line) {
 763  			value, addrs, err := parseContentionSample(line, p.Period, cpuHz)
 764  			if err != nil {
 765  				return nil, err
 766  			}
 767  			var sloc []*Location
 768  			for _, addr := range addrs {
 769  				// Addresses from stack traces point to the next instruction after
 770  				// each call. Adjust by -1 to land somewhere on the actual call.
 771  				addr--
 772  				loc := locs[addr]
 773  				if locs[addr] == nil {
 774  					loc = &Location{
 775  						Address: addr,
 776  					}
 777  					p.Location = append(p.Location, loc)
 778  					locs[addr] = loc
 779  				}
 780  				sloc = append(sloc, loc)
 781  			}
 782  			p.Sample = append(p.Sample, &Sample{
 783  				Value:    value,
 784  				Location: sloc,
 785  			})
 786  		}
 787  		if !s.Scan() {
 788  			break
 789  		}
 790  	}
 791  	if err := s.Err(); err != nil {
 792  		return nil, err
 793  	}
 794  
 795  	if err := parseAdditionalSections(s, p); err != nil {
 796  		return nil, err
 797  	}
 798  
 799  	return p, nil
 800  }
 801  
 802  // parseContentionSample parses a single row from a contention profile
 803  // into a new Sample.
 804  func parseContentionSample(line string, period, cpuHz int64) (value []int64, addrs []uint64, err error) {
 805  	sampleData := contentionSampleRE.FindStringSubmatch(line)
 806  	if sampleData == nil {
 807  		return nil, nil, errUnrecognized
 808  	}
 809  
 810  	v1, err := strconv.ParseInt(sampleData[1], 10, 64)
 811  	if err != nil {
 812  		return nil, nil, fmt.Errorf("malformed sample: %s: %v", line, err)
 813  	}
 814  	v2, err := strconv.ParseInt(sampleData[2], 10, 64)
 815  	if err != nil {
 816  		return nil, nil, fmt.Errorf("malformed sample: %s: %v", line, err)
 817  	}
 818  
 819  	// Unsample values if period and cpuHz are available.
 820  	// - Delays are scaled to cycles and then to nanoseconds.
 821  	// - Contentions are scaled to cycles.
 822  	if period > 0 {
 823  		if cpuHz > 0 {
 824  			cpuGHz := float64(cpuHz) / 1e9
 825  			v1 = int64(float64(v1) * float64(period) / cpuGHz)
 826  		}
 827  		v2 = v2 * period
 828  	}
 829  
 830  	value = []int64{v2, v1}
 831  	addrs, err = parseHexAddresses(sampleData[3])
 832  	if err != nil {
 833  		return nil, nil, fmt.Errorf("malformed sample: %s: %v", line, err)
 834  	}
 835  
 836  	return value, addrs, nil
 837  }
 838  
 839  // parseThread parses a Threadz profile and returns a new Profile.
 840  func parseThread(b []byte) (*Profile, error) {
 841  	s := bufio.NewScanner(bytes.NewBuffer(b))
 842  	// Skip past comments and empty lines seeking a real header.
 843  	for s.Scan() && isSpaceOrComment(s.Text()) {
 844  	}
 845  
 846  	line := s.Text()
 847  	if m := threadzStartRE.FindStringSubmatch(line); m != nil {
 848  		// Advance over initial comments until first stack trace.
 849  		for s.Scan() {
 850  			if line = s.Text(); isMemoryMapSentinel(line) || strings.HasPrefix(line, "-") {
 851  				break
 852  			}
 853  		}
 854  	} else if t := threadStartRE.FindStringSubmatch(line); len(t) != 4 {
 855  		return nil, errUnrecognized
 856  	}
 857  
 858  	p := &Profile{
 859  		SampleType: []*ValueType{{Type: "thread", Unit: "count"}},
 860  		PeriodType: &ValueType{Type: "thread", Unit: "count"},
 861  		Period:     1,
 862  	}
 863  
 864  	locs := make(map[uint64]*Location)
 865  	// Recognize each thread and populate profile samples.
 866  	for !isMemoryMapSentinel(line) {
 867  		if strings.HasPrefix(line, "---- no stack trace for") {
 868  			break
 869  		}
 870  		if t := threadStartRE.FindStringSubmatch(line); len(t) != 4 {
 871  			return nil, errUnrecognized
 872  		}
 873  
 874  		var addrs []uint64
 875  		var err error
 876  		line, addrs, err = parseThreadSample(s)
 877  		if err != nil {
 878  			return nil, err
 879  		}
 880  		if len(addrs) == 0 {
 881  			// We got a --same as previous threads--. Bump counters.
 882  			if len(p.Sample) > 0 {
 883  				s := p.Sample[len(p.Sample)-1]
 884  				s.Value[0]++
 885  			}
 886  			continue
 887  		}
 888  
 889  		var sloc []*Location
 890  		for i, addr := range addrs {
 891  			// Addresses from stack traces point to the next instruction after
 892  			// each call. Adjust by -1 to land somewhere on the actual call
 893  			// (except for the leaf, which is not a call).
 894  			if i > 0 {
 895  				addr--
 896  			}
 897  			loc := locs[addr]
 898  			if locs[addr] == nil {
 899  				loc = &Location{
 900  					Address: addr,
 901  				}
 902  				p.Location = append(p.Location, loc)
 903  				locs[addr] = loc
 904  			}
 905  			sloc = append(sloc, loc)
 906  		}
 907  
 908  		p.Sample = append(p.Sample, &Sample{
 909  			Value:    []int64{1},
 910  			Location: sloc,
 911  		})
 912  	}
 913  
 914  	if err := parseAdditionalSections(s, p); err != nil {
 915  		return nil, err
 916  	}
 917  
 918  	cleanupDuplicateLocations(p)
 919  	return p, nil
 920  }
 921  
 922  // parseThreadSample parses a symbolized or unsymbolized stack trace.
 923  // Returns the first line after the traceback, the sample (or nil if
 924  // it hits a 'same-as-previous' marker) and an error.
 925  func parseThreadSample(s *bufio.Scanner) (nextl string, addrs []uint64, err error) {
 926  	var line string
 927  	sameAsPrevious := false
 928  	for s.Scan() {
 929  		line = strings.TrimSpace(s.Text())
 930  		if line == "" {
 931  			continue
 932  		}
 933  
 934  		if strings.HasPrefix(line, "---") {
 935  			break
 936  		}
 937  		if strings.Contains(line, "same as previous thread") {
 938  			sameAsPrevious = true
 939  			continue
 940  		}
 941  
 942  		curAddrs, err := parseHexAddresses(line)
 943  		if err != nil {
 944  			return "", nil, fmt.Errorf("malformed sample: %s: %v", line, err)
 945  		}
 946  		addrs = append(addrs, curAddrs...)
 947  	}
 948  	if err := s.Err(); err != nil {
 949  		return "", nil, err
 950  	}
 951  	if sameAsPrevious {
 952  		return line, nil, nil
 953  	}
 954  	return line, addrs, nil
 955  }
 956  
 957  // parseAdditionalSections parses any additional sections in the
 958  // profile, ignoring any unrecognized sections.
 959  func parseAdditionalSections(s *bufio.Scanner, p *Profile) error {
 960  	for !isMemoryMapSentinel(s.Text()) && s.Scan() {
 961  	}
 962  	if err := s.Err(); err != nil {
 963  		return err
 964  	}
 965  	return p.ParseMemoryMapFromScanner(s)
 966  }
 967  
 968  // ParseProcMaps parses a memory map in the format of /proc/self/maps.
 969  // ParseMemoryMap should be called after setting on a profile to
 970  // associate locations to the corresponding mapping based on their
 971  // address.
 972  func ParseProcMaps(rd io.Reader) ([]*Mapping, error) {
 973  	s := bufio.NewScanner(rd)
 974  	return parseProcMapsFromScanner(s)
 975  }
 976  
 977  func parseProcMapsFromScanner(s *bufio.Scanner) ([]*Mapping, error) {
 978  	var mapping []*Mapping
 979  
 980  	var attrs []string
 981  	const delimiter = "="
 982  	r := strings.NewReplacer()
 983  	for s.Scan() {
 984  		line := r.Replace(removeLoggingInfo(s.Text()))
 985  		m, err := parseMappingEntry(line)
 986  		if err != nil {
 987  			if err == errUnrecognized {
 988  				// Recognize assignments of the form: attr=value, and replace
 989  				// $attr with value on subsequent mappings.
 990  				if attr := strings.SplitN(line, delimiter, 2); len(attr) == 2 {
 991  					attrs = append(attrs, "$"+strings.TrimSpace(attr[0]), strings.TrimSpace(attr[1]))
 992  					r = strings.NewReplacer(attrs...)
 993  				}
 994  				// Ignore any unrecognized entries
 995  				continue
 996  			}
 997  			return nil, err
 998  		}
 999  		if m == nil {
1000  			continue
1001  		}
1002  		mapping = append(mapping, m)
1003  	}
1004  	if err := s.Err(); err != nil {
1005  		return nil, err
1006  	}
1007  	return mapping, nil
1008  }
1009  
1010  // removeLoggingInfo detects and removes log prefix entries generated
1011  // by the glog package. If no logging prefix is detected, the string
1012  // is returned unmodified.
1013  func removeLoggingInfo(line string) string {
1014  	if match := logInfoRE.FindStringIndex(line); match != nil {
1015  		return line[match[1]:]
1016  	}
1017  	return line
1018  }
1019  
1020  // ParseMemoryMap parses a memory map in the format of
1021  // /proc/self/maps, and overrides the mappings in the current profile.
1022  // It renumbers the samples and locations in the profile correspondingly.
1023  func (p *Profile) ParseMemoryMap(rd io.Reader) error {
1024  	return p.ParseMemoryMapFromScanner(bufio.NewScanner(rd))
1025  }
1026  
1027  // ParseMemoryMapFromScanner parses a memory map in the format of
1028  // /proc/self/maps or a variety of legacy format, and overrides the
1029  // mappings in the current profile.  It renumbers the samples and
1030  // locations in the profile correspondingly.
1031  func (p *Profile) ParseMemoryMapFromScanner(s *bufio.Scanner) error {
1032  	mapping, err := parseProcMapsFromScanner(s)
1033  	if err != nil {
1034  		return err
1035  	}
1036  	p.Mapping = append(p.Mapping, mapping...)
1037  	p.massageMappings()
1038  	p.remapLocationIDs()
1039  	p.remapFunctionIDs()
1040  	p.remapMappingIDs()
1041  	return nil
1042  }
1043  
1044  func parseMappingEntry(l string) (*Mapping, error) {
1045  	var start, end, perm, file, offset, buildID string
1046  	if me := procMapsRE.FindStringSubmatch(l); len(me) == 6 {
1047  		start, end, perm, offset, file = me[1], me[2], me[3], me[4], me[5]
1048  	} else if me := briefMapsRE.FindStringSubmatch(l); len(me) == 7 {
1049  		start, end, perm, file, offset, buildID = me[1], me[2], me[3], me[4], me[5], me[6]
1050  	} else {
1051  		return nil, errUnrecognized
1052  	}
1053  
1054  	var err error
1055  	mapping := &Mapping{
1056  		File:    file,
1057  		BuildID: buildID,
1058  	}
1059  	if perm != "" && !strings.Contains(perm, "x") {
1060  		// Skip non-executable entries.
1061  		return nil, nil
1062  	}
1063  	if mapping.Start, err = strconv.ParseUint(start, 16, 64); err != nil {
1064  		return nil, errUnrecognized
1065  	}
1066  	if mapping.Limit, err = strconv.ParseUint(end, 16, 64); err != nil {
1067  		return nil, errUnrecognized
1068  	}
1069  	if offset != "" {
1070  		if mapping.Offset, err = strconv.ParseUint(offset, 16, 64); err != nil {
1071  			return nil, errUnrecognized
1072  		}
1073  	}
1074  	return mapping, nil
1075  }
1076  
1077  var memoryMapSentinels = []string{
1078  	"--- Memory map: ---",
1079  	"MAPPED_LIBRARIES:",
1080  }
1081  
1082  // isMemoryMapSentinel returns true if the string contains one of the
1083  // known sentinels for memory map information.
1084  func isMemoryMapSentinel(line string) bool {
1085  	for _, s := range memoryMapSentinels {
1086  		if strings.Contains(line, s) {
1087  			return true
1088  		}
1089  	}
1090  	return false
1091  }
1092  
1093  func (p *Profile) addLegacyFrameInfo() {
1094  	switch {
1095  	case isProfileType(p, heapzSampleTypes):
1096  		p.DropFrames, p.KeepFrames = allocRxStr, allocSkipRxStr
1097  	case isProfileType(p, contentionzSampleTypes):
1098  		p.DropFrames, p.KeepFrames = lockRxStr, ""
1099  	default:
1100  		p.DropFrames, p.KeepFrames = cpuProfilerRxStr, ""
1101  	}
1102  }
1103  
1104  var heapzSampleTypes = [][]string{
1105  	{"allocations", "size"}, // early Go pprof profiles
1106  	{"objects", "space"},
1107  	{"inuse_objects", "inuse_space"},
1108  	{"alloc_objects", "alloc_space"},
1109  	{"alloc_objects", "alloc_space", "inuse_objects", "inuse_space"}, // Go pprof legacy profiles
1110  }
1111  var contentionzSampleTypes = [][]string{
1112  	{"contentions", "delay"},
1113  }
1114  
1115  func isProfileType(p *Profile, types [][]string) bool {
1116  	st := p.SampleType
1117  nextType:
1118  	for _, t := range types {
1119  		if len(st) != len(t) {
1120  			continue
1121  		}
1122  
1123  		for i := range st {
1124  			if st[i].Type != t[i] {
1125  				continue nextType
1126  			}
1127  		}
1128  		return true
1129  	}
1130  	return false
1131  }
1132  
1133  var allocRxStr = strings.Join([]string{
1134  	// POSIX entry points.
1135  	`calloc`,
1136  	`cfree`,
1137  	`malloc`,
1138  	`free`,
1139  	`memalign`,
1140  	`do_memalign`,
1141  	`(__)?posix_memalign`,
1142  	`pvalloc`,
1143  	`valloc`,
1144  	`realloc`,
1145  
1146  	// TC malloc.
1147  	`tcmalloc::.*`,
1148  	`tc_calloc`,
1149  	`tc_cfree`,
1150  	`tc_malloc`,
1151  	`tc_free`,
1152  	`tc_memalign`,
1153  	`tc_posix_memalign`,
1154  	`tc_pvalloc`,
1155  	`tc_valloc`,
1156  	`tc_realloc`,
1157  	`tc_new`,
1158  	`tc_delete`,
1159  	`tc_newarray`,
1160  	`tc_deletearray`,
1161  	`tc_new_nothrow`,
1162  	`tc_newarray_nothrow`,
1163  
1164  	// Memory-allocation routines on OS X.
1165  	`malloc_zone_malloc`,
1166  	`malloc_zone_calloc`,
1167  	`malloc_zone_valloc`,
1168  	`malloc_zone_realloc`,
1169  	`malloc_zone_memalign`,
1170  	`malloc_zone_free`,
1171  
1172  	// Go runtime
1173  	`runtime\..*`,
1174  
1175  	// Other misc. memory allocation routines
1176  	`BaseArena::.*`,
1177  	`(::)?do_malloc_no_errno`,
1178  	`(::)?do_malloc_pages`,
1179  	`(::)?do_malloc`,
1180  	`DoSampledAllocation`,
1181  	`MallocedMemBlock::MallocedMemBlock`,
1182  	`_M_allocate`,
1183  	`__builtin_(vec_)?delete`,
1184  	`__builtin_(vec_)?new`,
1185  	`__gnu_cxx::new_allocator::allocate`,
1186  	`__libc_malloc`,
1187  	`__malloc_alloc_template::allocate`,
1188  	`allocate`,
1189  	`cpp_alloc`,
1190  	`operator new(\[\])?`,
1191  	`simple_alloc::allocate`,
1192  }, `|`)
1193  
1194  var allocSkipRxStr = strings.Join([]string{
1195  	// Preserve Go runtime frames that appear in the middle/bottom of
1196  	// the stack.
1197  	`runtime\.panic`,
1198  	`runtime\.reflectcall`,
1199  	`runtime\.call[0-9]*`,
1200  }, `|`)
1201  
1202  var cpuProfilerRxStr = strings.Join([]string{
1203  	`ProfileData::Add`,
1204  	`ProfileData::prof_handler`,
1205  	`CpuProfiler::prof_handler`,
1206  	`__pthread_sighandler`,
1207  	`__restore`,
1208  }, `|`)
1209  
1210  var lockRxStr = strings.Join([]string{
1211  	`RecordLockProfileData`,
1212  	`(base::)?RecordLockProfileData.*`,
1213  	`(base::)?SubmitMutexProfileData.*`,
1214  	`(base::)?SubmitSpinLockProfileData.*`,
1215  	`(base::Mutex::)?AwaitCommon.*`,
1216  	`(base::Mutex::)?Unlock.*`,
1217  	`(base::Mutex::)?UnlockSlow.*`,
1218  	`(base::Mutex::)?ReaderUnlock.*`,
1219  	`(base::MutexLock::)?~MutexLock.*`,
1220  	`(Mutex::)?AwaitCommon.*`,
1221  	`(Mutex::)?Unlock.*`,
1222  	`(Mutex::)?UnlockSlow.*`,
1223  	`(Mutex::)?ReaderUnlock.*`,
1224  	`(MutexLock::)?~MutexLock.*`,
1225  	`(SpinLock::)?Unlock.*`,
1226  	`(SpinLock::)?SlowUnlock.*`,
1227  	`(SpinLockHolder::)?~SpinLockHolder.*`,
1228  }, `|`)
1229