...

Source file src/pkg/cmd/vendor/github.com/google/pprof/profile/legacy_profile.go

     1	// Copyright 2014 Google Inc. All Rights Reserved.
     2	//
     3	// Licensed under the Apache License, Version 2.0 (the "License");
     4	// you may not use this file except in compliance with the License.
     5	// You may obtain a copy of the License at
     6	//
     7	//     http://www.apache.org/licenses/LICENSE-2.0
     8	//
     9	// Unless required by applicable law or agreed to in writing, software
    10	// distributed under the License is distributed on an "AS IS" BASIS,
    11	// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12	// See the License for the specific language governing permissions and
    13	// limitations under the License.
    14	
    15	// This file implements parsers to convert legacy profiles into the
    16	// profile.proto format.
    17	
    18	package profile
    19	
    20	import (
    21		"bufio"
    22		"bytes"
    23		"fmt"
    24		"io"
    25		"math"
    26		"regexp"
    27		"strconv"
    28		"strings"
    29	)
    30	
    31	var (
    32		countStartRE = regexp.MustCompile(`\A(\S+) profile: total \d+\z`)
    33		countRE      = regexp.MustCompile(`\A(\d+) @(( 0x[0-9a-f]+)+)\z`)
    34	
    35		heapHeaderRE = regexp.MustCompile(`heap profile: *(\d+): *(\d+) *\[ *(\d+): *(\d+) *\] *@ *(heap[_a-z0-9]*)/?(\d*)`)
    36		heapSampleRE = regexp.MustCompile(`(-?\d+): *(-?\d+) *\[ *(\d+): *(\d+) *] @([ x0-9a-f]*)`)
    37	
    38		contentionSampleRE = regexp.MustCompile(`(\d+) *(\d+) @([ x0-9a-f]*)`)
    39	
    40		hexNumberRE = regexp.MustCompile(`0x[0-9a-f]+`)
    41	
    42		growthHeaderRE = regexp.MustCompile(`heap profile: *(\d+): *(\d+) *\[ *(\d+): *(\d+) *\] @ growthz?`)
    43	
    44		fragmentationHeaderRE = regexp.MustCompile(`heap profile: *(\d+): *(\d+) *\[ *(\d+): *(\d+) *\] @ fragmentationz?`)
    45	
    46		threadzStartRE = regexp.MustCompile(`--- threadz \d+ ---`)
    47		threadStartRE  = regexp.MustCompile(`--- Thread ([[:xdigit:]]+) \(name: (.*)/(\d+)\) stack: ---`)
    48	
    49		// Regular expressions to parse process mappings. Support the format used by Linux /proc/.../maps and other tools.
    50		// Recommended format:
    51		// Start   End     object file name     offset(optional)   linker build id
    52		// 0x40000-0x80000 /path/to/binary      (@FF00)            abc123456
    53		spaceDigits = `\s+[[:digit:]]+`
    54		hexPair     = `\s+[[:xdigit:]]+:[[:xdigit:]]+`
    55		oSpace      = `\s*`
    56		// Capturing expressions.
    57		cHex           = `(?:0x)?([[:xdigit:]]+)`
    58		cHexRange      = `\s*` + cHex + `[\s-]?` + oSpace + cHex + `:?`
    59		cSpaceString   = `(?:\s+(\S+))?`
    60		cSpaceHex      = `(?:\s+([[:xdigit:]]+))?`
    61		cSpaceAtOffset = `(?:\s+\(@([[:xdigit:]]+)\))?`
    62		cPerm          = `(?:\s+([-rwxp]+))?`
    63	
    64		procMapsRE  = regexp.MustCompile(`^` + cHexRange + cPerm + cSpaceHex + hexPair + spaceDigits + cSpaceString)
    65		briefMapsRE = regexp.MustCompile(`^` + cHexRange + cPerm + cSpaceString + cSpaceAtOffset + cSpaceHex)
    66	
    67		// Regular expression to parse log data, of the form:
    68		// ... file:line] msg...
    69		logInfoRE = regexp.MustCompile(`^[^\[\]]+:[0-9]+]\s`)
    70	)
    71	
    72	func isSpaceOrComment(line string) bool {
    73		trimmed := strings.TrimSpace(line)
    74		return len(trimmed) == 0 || trimmed[0] == '#'
    75	}
    76	
    77	// parseGoCount parses a Go count profile (e.g., threadcreate or
    78	// goroutine) and returns a new Profile.
    79	func parseGoCount(b []byte) (*Profile, error) {
    80		s := bufio.NewScanner(bytes.NewBuffer(b))
    81		// Skip comments at the beginning of the file.
    82		for s.Scan() && isSpaceOrComment(s.Text()) {
    83		}
    84		if err := s.Err(); err != nil {
    85			return nil, err
    86		}
    87		m := countStartRE.FindStringSubmatch(s.Text())
    88		if m == nil {
    89			return nil, errUnrecognized
    90		}
    91		profileType := m[1]
    92		p := &Profile{
    93			PeriodType: &ValueType{Type: profileType, Unit: "count"},
    94			Period:     1,
    95			SampleType: []*ValueType{{Type: profileType, Unit: "count"}},
    96		}
    97		locations := make(map[uint64]*Location)
    98		for s.Scan() {
    99			line := s.Text()
   100			if isSpaceOrComment(line) {
   101				continue
   102			}
   103			if strings.HasPrefix(line, "---") {
   104				break
   105			}
   106			m := countRE.FindStringSubmatch(line)
   107			if m == nil {
   108				return nil, errMalformed
   109			}
   110			n, err := strconv.ParseInt(m[1], 0, 64)
   111			if err != nil {
   112				return nil, errMalformed
   113			}
   114			fields := strings.Fields(m[2])
   115			locs := make([]*Location, 0, len(fields))
   116			for _, stk := range fields {
   117				addr, err := strconv.ParseUint(stk, 0, 64)
   118				if err != nil {
   119					return nil, errMalformed
   120				}
   121				// Adjust all frames by -1 to land on top of the call instruction.
   122				addr--
   123				loc := locations[addr]
   124				if loc == nil {
   125					loc = &Location{
   126						Address: addr,
   127					}
   128					locations[addr] = loc
   129					p.Location = append(p.Location, loc)
   130				}
   131				locs = append(locs, loc)
   132			}
   133			p.Sample = append(p.Sample, &Sample{
   134				Location: locs,
   135				Value:    []int64{n},
   136			})
   137		}
   138		if err := s.Err(); err != nil {
   139			return nil, err
   140		}
   141	
   142		if err := parseAdditionalSections(s, p); err != nil {
   143			return nil, err
   144		}
   145		return p, nil
   146	}
   147	
   148	// remapLocationIDs ensures there is a location for each address
   149	// referenced by a sample, and remaps the samples to point to the new
   150	// location ids.
   151	func (p *Profile) remapLocationIDs() {
   152		seen := make(map[*Location]bool, len(p.Location))
   153		var locs []*Location
   154	
   155		for _, s := range p.Sample {
   156			for _, l := range s.Location {
   157				if seen[l] {
   158					continue
   159				}
   160				l.ID = uint64(len(locs) + 1)
   161				locs = append(locs, l)
   162				seen[l] = true
   163			}
   164		}
   165		p.Location = locs
   166	}
   167	
   168	func (p *Profile) remapFunctionIDs() {
   169		seen := make(map[*Function]bool, len(p.Function))
   170		var fns []*Function
   171	
   172		for _, l := range p.Location {
   173			for _, ln := range l.Line {
   174				fn := ln.Function
   175				if fn == nil || seen[fn] {
   176					continue
   177				}
   178				fn.ID = uint64(len(fns) + 1)
   179				fns = append(fns, fn)
   180				seen[fn] = true
   181			}
   182		}
   183		p.Function = fns
   184	}
   185	
   186	// remapMappingIDs matches location addresses with existing mappings
   187	// and updates them appropriately. This is O(N*M), if this ever shows
   188	// up as a bottleneck, evaluate sorting the mappings and doing a
   189	// binary search, which would make it O(N*log(M)).
   190	func (p *Profile) remapMappingIDs() {
   191		// Some profile handlers will incorrectly set regions for the main
   192		// executable if its section is remapped. Fix them through heuristics.
   193	
   194		if len(p.Mapping) > 0 {
   195			// Remove the initial mapping if named '/anon_hugepage' and has a
   196			// consecutive adjacent mapping.
   197			if m := p.Mapping[0]; strings.HasPrefix(m.File, "/anon_hugepage") {
   198				if len(p.Mapping) > 1 && m.Limit == p.Mapping[1].Start {
   199					p.Mapping = p.Mapping[1:]
   200				}
   201			}
   202		}
   203	
   204		// Subtract the offset from the start of the main mapping if it
   205		// ends up at a recognizable start address.
   206		if len(p.Mapping) > 0 {
   207			const expectedStart = 0x400000
   208			if m := p.Mapping[0]; m.Start-m.Offset == expectedStart {
   209				m.Start = expectedStart
   210				m.Offset = 0
   211			}
   212		}
   213	
   214		// Associate each location with an address to the corresponding
   215		// mapping. Create fake mapping if a suitable one isn't found.
   216		var fake *Mapping
   217	nextLocation:
   218		for _, l := range p.Location {
   219			a := l.Address
   220			if l.Mapping != nil || a == 0 {
   221				continue
   222			}
   223			for _, m := range p.Mapping {
   224				if m.Start <= a && a < m.Limit {
   225					l.Mapping = m
   226					continue nextLocation
   227				}
   228			}
   229			// Work around legacy handlers failing to encode the first
   230			// part of mappings split into adjacent ranges.
   231			for _, m := range p.Mapping {
   232				if m.Offset != 0 && m.Start-m.Offset <= a && a < m.Start {
   233					m.Start -= m.Offset
   234					m.Offset = 0
   235					l.Mapping = m
   236					continue nextLocation
   237				}
   238			}
   239			// If there is still no mapping, create a fake one.
   240			// This is important for the Go legacy handler, which produced
   241			// no mappings.
   242			if fake == nil {
   243				fake = &Mapping{
   244					ID:    1,
   245					Limit: ^uint64(0),
   246				}
   247				p.Mapping = append(p.Mapping, fake)
   248			}
   249			l.Mapping = fake
   250		}
   251	
   252		// Reset all mapping IDs.
   253		for i, m := range p.Mapping {
   254			m.ID = uint64(i + 1)
   255		}
   256	}
   257	
   258	var cpuInts = []func([]byte) (uint64, []byte){
   259		get32l,
   260		get32b,
   261		get64l,
   262		get64b,
   263	}
   264	
   265	func get32l(b []byte) (uint64, []byte) {
   266		if len(b) < 4 {
   267			return 0, nil
   268		}
   269		return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24, b[4:]
   270	}
   271	
   272	func get32b(b []byte) (uint64, []byte) {
   273		if len(b) < 4 {
   274			return 0, nil
   275		}
   276		return uint64(b[3]) | uint64(b[2])<<8 | uint64(b[1])<<16 | uint64(b[0])<<24, b[4:]
   277	}
   278	
   279	func get64l(b []byte) (uint64, []byte) {
   280		if len(b) < 8 {
   281			return 0, nil
   282		}
   283		return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 | uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56, b[8:]
   284	}
   285	
   286	func get64b(b []byte) (uint64, []byte) {
   287		if len(b) < 8 {
   288			return 0, nil
   289		}
   290		return uint64(b[7]) | uint64(b[6])<<8 | uint64(b[5])<<16 | uint64(b[4])<<24 | uint64(b[3])<<32 | uint64(b[2])<<40 | uint64(b[1])<<48 | uint64(b[0])<<56, b[8:]
   291	}
   292	
   293	// parseCPU parses a profilez legacy profile and returns a newly
   294	// populated Profile.
   295	//
   296	// The general format for profilez samples is a sequence of words in
   297	// binary format. The first words are a header with the following data:
   298	//   1st word -- 0
   299	//   2nd word -- 3
   300	//   3rd word -- 0 if a c++ application, 1 if a java application.
   301	//   4th word -- Sampling period (in microseconds).
   302	//   5th word -- Padding.
   303	func parseCPU(b []byte) (*Profile, error) {
   304		var parse func([]byte) (uint64, []byte)
   305		var n1, n2, n3, n4, n5 uint64
   306		for _, parse = range cpuInts {
   307			var tmp []byte
   308			n1, tmp = parse(b)
   309			n2, tmp = parse(tmp)
   310			n3, tmp = parse(tmp)
   311			n4, tmp = parse(tmp)
   312			n5, tmp = parse(tmp)
   313	
   314			if tmp != nil && n1 == 0 && n2 == 3 && n3 == 0 && n4 > 0 && n5 == 0 {
   315				b = tmp
   316				return cpuProfile(b, int64(n4), parse)
   317			}
   318			if tmp != nil && n1 == 0 && n2 == 3 && n3 == 1 && n4 > 0 && n5 == 0 {
   319				b = tmp
   320				return javaCPUProfile(b, int64(n4), parse)
   321			}
   322		}
   323		return nil, errUnrecognized
   324	}
   325	
   326	// cpuProfile returns a new Profile from C++ profilez data.
   327	// b is the profile bytes after the header, period is the profiling
   328	// period, and parse is a function to parse 8-byte chunks from the
   329	// profile in its native endianness.
   330	func cpuProfile(b []byte, period int64, parse func(b []byte) (uint64, []byte)) (*Profile, error) {
   331		p := &Profile{
   332			Period:     period * 1000,
   333			PeriodType: &ValueType{Type: "cpu", Unit: "nanoseconds"},
   334			SampleType: []*ValueType{
   335				{Type: "samples", Unit: "count"},
   336				{Type: "cpu", Unit: "nanoseconds"},
   337			},
   338		}
   339		var err error
   340		if b, _, err = parseCPUSamples(b, parse, true, p); err != nil {
   341			return nil, err
   342		}
   343	
   344		// If *most* samples have the same second-to-the-bottom frame, it
   345		// strongly suggests that it is an uninteresting artifact of
   346		// measurement -- a stack frame pushed by the signal handler. The
   347		// bottom frame is always correct as it is picked up from the signal
   348		// structure, not the stack. Check if this is the case and if so,
   349		// remove.
   350	
   351		// Remove up to two frames.
   352		maxiter := 2
   353		// Allow one different sample for this many samples with the same
   354		// second-to-last frame.
   355		similarSamples := 32
   356		margin := len(p.Sample) / similarSamples
   357	
   358		for iter := 0; iter < maxiter; iter++ {
   359			addr1 := make(map[uint64]int)
   360			for _, s := range p.Sample {
   361				if len(s.Location) > 1 {
   362					a := s.Location[1].Address
   363					addr1[a] = addr1[a] + 1
   364				}
   365			}
   366	
   367			for id1, count := range addr1 {
   368				if count >= len(p.Sample)-margin {
   369					// Found uninteresting frame, strip it out from all samples
   370					for _, s := range p.Sample {
   371						if len(s.Location) > 1 && s.Location[1].Address == id1 {
   372							s.Location = append(s.Location[:1], s.Location[2:]...)
   373						}
   374					}
   375					break
   376				}
   377			}
   378		}
   379	
   380		if err := p.ParseMemoryMap(bytes.NewBuffer(b)); err != nil {
   381			return nil, err
   382		}
   383	
   384		cleanupDuplicateLocations(p)
   385		return p, nil
   386	}
   387	
   388	func cleanupDuplicateLocations(p *Profile) {
   389		// The profile handler may duplicate the leaf frame, because it gets
   390		// its address both from stack unwinding and from the signal
   391		// context. Detect this and delete the duplicate, which has been
   392		// adjusted by -1. The leaf address should not be adjusted as it is
   393		// not a call.
   394		for _, s := range p.Sample {
   395			if len(s.Location) > 1 && s.Location[0].Address == s.Location[1].Address+1 {
   396				s.Location = append(s.Location[:1], s.Location[2:]...)
   397			}
   398		}
   399	}
   400	
   401	// parseCPUSamples parses a collection of profilez samples from a
   402	// profile.
   403	//
   404	// profilez samples are a repeated sequence of stack frames of the
   405	// form:
   406	//    1st word -- The number of times this stack was encountered.
   407	//    2nd word -- The size of the stack (StackSize).
   408	//    3rd word -- The first address on the stack.
   409	//    ...
   410	//    StackSize + 2 -- The last address on the stack
   411	// The last stack trace is of the form:
   412	//   1st word -- 0
   413	//   2nd word -- 1
   414	//   3rd word -- 0
   415	//
   416	// Addresses from stack traces may point to the next instruction after
   417	// each call. Optionally adjust by -1 to land somewhere on the actual
   418	// call (except for the leaf, which is not a call).
   419	func parseCPUSamples(b []byte, parse func(b []byte) (uint64, []byte), adjust bool, p *Profile) ([]byte, map[uint64]*Location, error) {
   420		locs := make(map[uint64]*Location)
   421		for len(b) > 0 {
   422			var count, nstk uint64
   423			count, b = parse(b)
   424			nstk, b = parse(b)
   425			if b == nil || nstk > uint64(len(b)/4) {
   426				return nil, nil, errUnrecognized
   427			}
   428			var sloc []*Location
   429			addrs := make([]uint64, nstk)
   430			for i := 0; i < int(nstk); i++ {
   431				addrs[i], b = parse(b)
   432			}
   433	
   434			if count == 0 && nstk == 1 && addrs[0] == 0 {
   435				// End of data marker
   436				break
   437			}
   438			for i, addr := range addrs {
   439				if adjust && i > 0 {
   440					addr--
   441				}
   442				loc := locs[addr]
   443				if loc == nil {
   444					loc = &Location{
   445						Address: addr,
   446					}
   447					locs[addr] = loc
   448					p.Location = append(p.Location, loc)
   449				}
   450				sloc = append(sloc, loc)
   451			}
   452			p.Sample = append(p.Sample,
   453				&Sample{
   454					Value:    []int64{int64(count), int64(count) * p.Period},
   455					Location: sloc,
   456				})
   457		}
   458		// Reached the end without finding the EOD marker.
   459		return b, locs, nil
   460	}
   461	
   462	// parseHeap parses a heapz legacy or a growthz profile and
   463	// returns a newly populated Profile.
   464	func parseHeap(b []byte) (p *Profile, err error) {
   465		s := bufio.NewScanner(bytes.NewBuffer(b))
   466		if !s.Scan() {
   467			if err := s.Err(); err != nil {
   468				return nil, err
   469			}
   470			return nil, errUnrecognized
   471		}
   472		p = &Profile{}
   473	
   474		sampling := ""
   475		hasAlloc := false
   476	
   477		line := s.Text()
   478		p.PeriodType = &ValueType{Type: "space", Unit: "bytes"}
   479		if header := heapHeaderRE.FindStringSubmatch(line); header != nil {
   480			sampling, p.Period, hasAlloc, err = parseHeapHeader(line)
   481			if err != nil {
   482				return nil, err
   483			}
   484		} else if header = growthHeaderRE.FindStringSubmatch(line); header != nil {
   485			p.Period = 1
   486		} else if header = fragmentationHeaderRE.FindStringSubmatch(line); header != nil {
   487			p.Period = 1
   488		} else {
   489			return nil, errUnrecognized
   490		}
   491	
   492		if hasAlloc {
   493			// Put alloc before inuse so that default pprof selection
   494			// will prefer inuse_space.
   495			p.SampleType = []*ValueType{
   496				{Type: "alloc_objects", Unit: "count"},
   497				{Type: "alloc_space", Unit: "bytes"},
   498				{Type: "inuse_objects", Unit: "count"},
   499				{Type: "inuse_space", Unit: "bytes"},
   500			}
   501		} else {
   502			p.SampleType = []*ValueType{
   503				{Type: "objects", Unit: "count"},
   504				{Type: "space", Unit: "bytes"},
   505			}
   506		}
   507	
   508		locs := make(map[uint64]*Location)
   509		for s.Scan() {
   510			line := strings.TrimSpace(s.Text())
   511	
   512			if isSpaceOrComment(line) {
   513				continue
   514			}
   515	
   516			if isMemoryMapSentinel(line) {
   517				break
   518			}
   519	
   520			value, blocksize, addrs, err := parseHeapSample(line, p.Period, sampling, hasAlloc)
   521			if err != nil {
   522				return nil, err
   523			}
   524	
   525			var sloc []*Location
   526			for _, addr := range addrs {
   527				// Addresses from stack traces point to the next instruction after
   528				// each call. Adjust by -1 to land somewhere on the actual call.
   529				addr--
   530				loc := locs[addr]
   531				if locs[addr] == nil {
   532					loc = &Location{
   533						Address: addr,
   534					}
   535					p.Location = append(p.Location, loc)
   536					locs[addr] = loc
   537				}
   538				sloc = append(sloc, loc)
   539			}
   540	
   541			p.Sample = append(p.Sample, &Sample{
   542				Value:    value,
   543				Location: sloc,
   544				NumLabel: map[string][]int64{"bytes": {blocksize}},
   545			})
   546		}
   547		if err := s.Err(); err != nil {
   548			return nil, err
   549		}
   550		if err := parseAdditionalSections(s, p); err != nil {
   551			return nil, err
   552		}
   553		return p, nil
   554	}
   555	
   556	func parseHeapHeader(line string) (sampling string, period int64, hasAlloc bool, err error) {
   557		header := heapHeaderRE.FindStringSubmatch(line)
   558		if header == nil {
   559			return "", 0, false, errUnrecognized
   560		}
   561	
   562		if len(header[6]) > 0 {
   563			if period, err = strconv.ParseInt(header[6], 10, 64); err != nil {
   564				return "", 0, false, errUnrecognized
   565			}
   566		}
   567	
   568		if (header[3] != header[1] && header[3] != "0") || (header[4] != header[2] && header[4] != "0") {
   569			hasAlloc = true
   570		}
   571	
   572		switch header[5] {
   573		case "heapz_v2", "heap_v2":
   574			return "v2", period, hasAlloc, nil
   575		case "heapprofile":
   576			return "", 1, hasAlloc, nil
   577		case "heap":
   578			return "v2", period / 2, hasAlloc, nil
   579		default:
   580			return "", 0, false, errUnrecognized
   581		}
   582	}
   583	
   584	// parseHeapSample parses a single row from a heap profile into a new Sample.
   585	func parseHeapSample(line string, rate int64, sampling string, includeAlloc bool) (value []int64, blocksize int64, addrs []uint64, err error) {
   586		sampleData := heapSampleRE.FindStringSubmatch(line)
   587		if len(sampleData) != 6 {
   588			return nil, 0, nil, fmt.Errorf("unexpected number of sample values: got %d, want 6", len(sampleData))
   589		}
   590	
   591		// This is a local-scoped helper function to avoid needing to pass
   592		// around rate, sampling and many return parameters.
   593		addValues := func(countString, sizeString string, label string) error {
   594			count, err := strconv.ParseInt(countString, 10, 64)
   595			if err != nil {
   596				return fmt.Errorf("malformed sample: %s: %v", line, err)
   597			}
   598			size, err := strconv.ParseInt(sizeString, 10, 64)
   599			if err != nil {
   600				return fmt.Errorf("malformed sample: %s: %v", line, err)
   601			}
   602			if count == 0 && size != 0 {
   603				return fmt.Errorf("%s count was 0 but %s bytes was %d", label, label, size)
   604			}
   605			if count != 0 {
   606				blocksize = size / count
   607				if sampling == "v2" {
   608					count, size = scaleHeapSample(count, size, rate)
   609				}
   610			}
   611			value = append(value, count, size)
   612			return nil
   613		}
   614	
   615		if includeAlloc {
   616			if err := addValues(sampleData[3], sampleData[4], "allocation"); err != nil {
   617				return nil, 0, nil, err
   618			}
   619		}
   620	
   621		if err := addValues(sampleData[1], sampleData[2], "inuse"); err != nil {
   622			return nil, 0, nil, err
   623		}
   624	
   625		addrs, err = parseHexAddresses(sampleData[5])
   626		if err != nil {
   627			return nil, 0, nil, fmt.Errorf("malformed sample: %s: %v", line, err)
   628		}
   629	
   630		return value, blocksize, addrs, nil
   631	}
   632	
   633	// parseHexAddresses extracts hex numbers from a string, attempts to convert
   634	// each to an unsigned 64-bit number and returns the resulting numbers as a
   635	// slice, or an error if the string contains hex numbers which are too large to
   636	// handle (which means a malformed profile).
   637	func parseHexAddresses(s string) ([]uint64, error) {
   638		hexStrings := hexNumberRE.FindAllString(s, -1)
   639		var addrs []uint64
   640		for _, s := range hexStrings {
   641			if addr, err := strconv.ParseUint(s, 0, 64); err == nil {
   642				addrs = append(addrs, addr)
   643			} else {
   644				return nil, fmt.Errorf("failed to parse as hex 64-bit number: %s", s)
   645			}
   646		}
   647		return addrs, nil
   648	}
   649	
   650	// scaleHeapSample adjusts the data from a heapz Sample to
   651	// account for its probability of appearing in the collected
   652	// data. heapz profiles are a sampling of the memory allocations
   653	// requests in a program. We estimate the unsampled value by dividing
   654	// each collected sample by its probability of appearing in the
   655	// profile. heapz v2 profiles rely on a poisson process to determine
   656	// which samples to collect, based on the desired average collection
   657	// rate R. The probability of a sample of size S to appear in that
   658	// profile is 1-exp(-S/R).
   659	func scaleHeapSample(count, size, rate int64) (int64, int64) {
   660		if count == 0 || size == 0 {
   661			return 0, 0
   662		}
   663	
   664		if rate <= 1 {
   665			// if rate==1 all samples were collected so no adjustment is needed.
   666			// if rate<1 treat as unknown and skip scaling.
   667			return count, size
   668		}
   669	
   670		avgSize := float64(size) / float64(count)
   671		scale := 1 / (1 - math.Exp(-avgSize/float64(rate)))
   672	
   673		return int64(float64(count) * scale), int64(float64(size) * scale)
   674	}
   675	
   676	// parseContention parses a mutex or contention profile. There are 2 cases:
   677	// "--- contentionz " for legacy C++ profiles (and backwards compatibility)
   678	// "--- mutex:" or "--- contention:" for profiles generated by the Go runtime.
   679	func parseContention(b []byte) (*Profile, error) {
   680		s := bufio.NewScanner(bytes.NewBuffer(b))
   681		if !s.Scan() {
   682			if err := s.Err(); err != nil {
   683				return nil, err
   684			}
   685			return nil, errUnrecognized
   686		}
   687	
   688		switch l := s.Text(); {
   689		case strings.HasPrefix(l, "--- contentionz "):
   690		case strings.HasPrefix(l, "--- mutex:"):
   691		case strings.HasPrefix(l, "--- contention:"):
   692		default:
   693			return nil, errUnrecognized
   694		}
   695	
   696		p := &Profile{
   697			PeriodType: &ValueType{Type: "contentions", Unit: "count"},
   698			Period:     1,
   699			SampleType: []*ValueType{
   700				{Type: "contentions", Unit: "count"},
   701				{Type: "delay", Unit: "nanoseconds"},
   702			},
   703		}
   704	
   705		var cpuHz int64
   706		// Parse text of the form "attribute = value" before the samples.
   707		const delimiter = "="
   708		for s.Scan() {
   709			line := s.Text()
   710			if line = strings.TrimSpace(line); isSpaceOrComment(line) {
   711				continue
   712			}
   713			if strings.HasPrefix(line, "---") {
   714				break
   715			}
   716			attr := strings.SplitN(line, delimiter, 2)
   717			if len(attr) != 2 {
   718				break
   719			}
   720			key, val := strings.TrimSpace(attr[0]), strings.TrimSpace(attr[1])
   721			var err error
   722			switch key {
   723			case "cycles/second":
   724				if cpuHz, err = strconv.ParseInt(val, 0, 64); err != nil {
   725					return nil, errUnrecognized
   726				}
   727			case "sampling period":
   728				if p.Period, err = strconv.ParseInt(val, 0, 64); err != nil {
   729					return nil, errUnrecognized
   730				}
   731			case "ms since reset":
   732				ms, err := strconv.ParseInt(val, 0, 64)
   733				if err != nil {
   734					return nil, errUnrecognized
   735				}
   736				p.DurationNanos = ms * 1000 * 1000
   737			case "format":
   738				// CPP contentionz profiles don't have format.
   739				return nil, errUnrecognized
   740			case "resolution":
   741				// CPP contentionz profiles don't have resolution.
   742				return nil, errUnrecognized
   743			case "discarded samples":
   744			default:
   745				return nil, errUnrecognized
   746			}
   747		}
   748		if err := s.Err(); err != nil {
   749			return nil, err
   750		}
   751	
   752		locs := make(map[uint64]*Location)
   753		for {
   754			line := strings.TrimSpace(s.Text())
   755			if strings.HasPrefix(line, "---") {
   756				break
   757			}
   758			if !isSpaceOrComment(line) {
   759				value, addrs, err := parseContentionSample(line, p.Period, cpuHz)
   760				if err != nil {
   761					return nil, err
   762				}
   763				var sloc []*Location
   764				for _, addr := range addrs {
   765					// Addresses from stack traces point to the next instruction after
   766					// each call. Adjust by -1 to land somewhere on the actual call.
   767					addr--
   768					loc := locs[addr]
   769					if locs[addr] == nil {
   770						loc = &Location{
   771							Address: addr,
   772						}
   773						p.Location = append(p.Location, loc)
   774						locs[addr] = loc
   775					}
   776					sloc = append(sloc, loc)
   777				}
   778				p.Sample = append(p.Sample, &Sample{
   779					Value:    value,
   780					Location: sloc,
   781				})
   782			}
   783			if !s.Scan() {
   784				break
   785			}
   786		}
   787		if err := s.Err(); err != nil {
   788			return nil, err
   789		}
   790	
   791		if err := parseAdditionalSections(s, p); err != nil {
   792			return nil, err
   793		}
   794	
   795		return p, nil
   796	}
   797	
   798	// parseContentionSample parses a single row from a contention profile
   799	// into a new Sample.
   800	func parseContentionSample(line string, period, cpuHz int64) (value []int64, addrs []uint64, err error) {
   801		sampleData := contentionSampleRE.FindStringSubmatch(line)
   802		if sampleData == nil {
   803			return nil, nil, errUnrecognized
   804		}
   805	
   806		v1, err := strconv.ParseInt(sampleData[1], 10, 64)
   807		if err != nil {
   808			return nil, nil, fmt.Errorf("malformed sample: %s: %v", line, err)
   809		}
   810		v2, err := strconv.ParseInt(sampleData[2], 10, 64)
   811		if err != nil {
   812			return nil, nil, fmt.Errorf("malformed sample: %s: %v", line, err)
   813		}
   814	
   815		// Unsample values if period and cpuHz are available.
   816		// - Delays are scaled to cycles and then to nanoseconds.
   817		// - Contentions are scaled to cycles.
   818		if period > 0 {
   819			if cpuHz > 0 {
   820				cpuGHz := float64(cpuHz) / 1e9
   821				v1 = int64(float64(v1) * float64(period) / cpuGHz)
   822			}
   823			v2 = v2 * period
   824		}
   825	
   826		value = []int64{v2, v1}
   827		addrs, err = parseHexAddresses(sampleData[3])
   828		if err != nil {
   829			return nil, nil, fmt.Errorf("malformed sample: %s: %v", line, err)
   830		}
   831	
   832		return value, addrs, nil
   833	}
   834	
   835	// parseThread parses a Threadz profile and returns a new Profile.
   836	func parseThread(b []byte) (*Profile, error) {
   837		s := bufio.NewScanner(bytes.NewBuffer(b))
   838		// Skip past comments and empty lines seeking a real header.
   839		for s.Scan() && isSpaceOrComment(s.Text()) {
   840		}
   841	
   842		line := s.Text()
   843		if m := threadzStartRE.FindStringSubmatch(line); m != nil {
   844			// Advance over initial comments until first stack trace.
   845			for s.Scan() {
   846				if line = s.Text(); isMemoryMapSentinel(line) || strings.HasPrefix(line, "-") {
   847					break
   848				}
   849			}
   850		} else if t := threadStartRE.FindStringSubmatch(line); len(t) != 4 {
   851			return nil, errUnrecognized
   852		}
   853	
   854		p := &Profile{
   855			SampleType: []*ValueType{{Type: "thread", Unit: "count"}},
   856			PeriodType: &ValueType{Type: "thread", Unit: "count"},
   857			Period:     1,
   858		}
   859	
   860		locs := make(map[uint64]*Location)
   861		// Recognize each thread and populate profile samples.
   862		for !isMemoryMapSentinel(line) {
   863			if strings.HasPrefix(line, "---- no stack trace for") {
   864				line = ""
   865				break
   866			}
   867			if t := threadStartRE.FindStringSubmatch(line); len(t) != 4 {
   868				return nil, errUnrecognized
   869			}
   870	
   871			var addrs []uint64
   872			var err error
   873			line, addrs, err = parseThreadSample(s)
   874			if err != nil {
   875				return nil, err
   876			}
   877			if len(addrs) == 0 {
   878				// We got a --same as previous threads--. Bump counters.
   879				if len(p.Sample) > 0 {
   880					s := p.Sample[len(p.Sample)-1]
   881					s.Value[0]++
   882				}
   883				continue
   884			}
   885	
   886			var sloc []*Location
   887			for i, addr := range addrs {
   888				// Addresses from stack traces point to the next instruction after
   889				// each call. Adjust by -1 to land somewhere on the actual call
   890				// (except for the leaf, which is not a call).
   891				if i > 0 {
   892					addr--
   893				}
   894				loc := locs[addr]
   895				if locs[addr] == nil {
   896					loc = &Location{
   897						Address: addr,
   898					}
   899					p.Location = append(p.Location, loc)
   900					locs[addr] = loc
   901				}
   902				sloc = append(sloc, loc)
   903			}
   904	
   905			p.Sample = append(p.Sample, &Sample{
   906				Value:    []int64{1},
   907				Location: sloc,
   908			})
   909		}
   910	
   911		if err := parseAdditionalSections(s, p); err != nil {
   912			return nil, err
   913		}
   914	
   915		cleanupDuplicateLocations(p)
   916		return p, nil
   917	}
   918	
   919	// parseThreadSample parses a symbolized or unsymbolized stack trace.
   920	// Returns the first line after the traceback, the sample (or nil if
   921	// it hits a 'same-as-previous' marker) and an error.
   922	func parseThreadSample(s *bufio.Scanner) (nextl string, addrs []uint64, err error) {
   923		var line string
   924		sameAsPrevious := false
   925		for s.Scan() {
   926			line = strings.TrimSpace(s.Text())
   927			if line == "" {
   928				continue
   929			}
   930	
   931			if strings.HasPrefix(line, "---") {
   932				break
   933			}
   934			if strings.Contains(line, "same as previous thread") {
   935				sameAsPrevious = true
   936				continue
   937			}
   938	
   939			curAddrs, err := parseHexAddresses(line)
   940			if err != nil {
   941				return "", nil, fmt.Errorf("malformed sample: %s: %v", line, err)
   942			}
   943			addrs = append(addrs, curAddrs...)
   944		}
   945		if err := s.Err(); err != nil {
   946			return "", nil, err
   947		}
   948		if sameAsPrevious {
   949			return line, nil, nil
   950		}
   951		return line, addrs, nil
   952	}
   953	
   954	// parseAdditionalSections parses any additional sections in the
   955	// profile, ignoring any unrecognized sections.
   956	func parseAdditionalSections(s *bufio.Scanner, p *Profile) error {
   957		for !isMemoryMapSentinel(s.Text()) && s.Scan() {
   958		}
   959		if err := s.Err(); err != nil {
   960			return err
   961		}
   962		return p.ParseMemoryMapFromScanner(s)
   963	}
   964	
   965	// ParseProcMaps parses a memory map in the format of /proc/self/maps.
   966	// ParseMemoryMap should be called after setting on a profile to
   967	// associate locations to the corresponding mapping based on their
   968	// address.
   969	func ParseProcMaps(rd io.Reader) ([]*Mapping, error) {
   970		s := bufio.NewScanner(rd)
   971		return parseProcMapsFromScanner(s)
   972	}
   973	
   974	func parseProcMapsFromScanner(s *bufio.Scanner) ([]*Mapping, error) {
   975		var mapping []*Mapping
   976	
   977		var attrs []string
   978		const delimiter = "="
   979		r := strings.NewReplacer()
   980		for s.Scan() {
   981			line := r.Replace(removeLoggingInfo(s.Text()))
   982			m, err := parseMappingEntry(line)
   983			if err != nil {
   984				if err == errUnrecognized {
   985					// Recognize assignments of the form: attr=value, and replace
   986					// $attr with value on subsequent mappings.
   987					if attr := strings.SplitN(line, delimiter, 2); len(attr) == 2 {
   988						attrs = append(attrs, "$"+strings.TrimSpace(attr[0]), strings.TrimSpace(attr[1]))
   989						r = strings.NewReplacer(attrs...)
   990					}
   991					// Ignore any unrecognized entries
   992					continue
   993				}
   994				return nil, err
   995			}
   996			if m == nil {
   997				continue
   998			}
   999			mapping = append(mapping, m)
  1000		}
  1001		if err := s.Err(); err != nil {
  1002			return nil, err
  1003		}
  1004		return mapping, nil
  1005	}
  1006	
  1007	// removeLoggingInfo detects and removes log prefix entries generated
  1008	// by the glog package. If no logging prefix is detected, the string
  1009	// is returned unmodified.
  1010	func removeLoggingInfo(line string) string {
  1011		if match := logInfoRE.FindStringIndex(line); match != nil {
  1012			return line[match[1]:]
  1013		}
  1014		return line
  1015	}
  1016	
  1017	// ParseMemoryMap parses a memory map in the format of
  1018	// /proc/self/maps, and overrides the mappings in the current profile.
  1019	// It renumbers the samples and locations in the profile correspondingly.
  1020	func (p *Profile) ParseMemoryMap(rd io.Reader) error {
  1021		return p.ParseMemoryMapFromScanner(bufio.NewScanner(rd))
  1022	}
  1023	
  1024	// ParseMemoryMapFromScanner parses a memory map in the format of
  1025	// /proc/self/maps or a variety of legacy format, and overrides the
  1026	// mappings in the current profile.  It renumbers the samples and
  1027	// locations in the profile correspondingly.
  1028	func (p *Profile) ParseMemoryMapFromScanner(s *bufio.Scanner) error {
  1029		mapping, err := parseProcMapsFromScanner(s)
  1030		if err != nil {
  1031			return err
  1032		}
  1033		p.Mapping = append(p.Mapping, mapping...)
  1034		p.massageMappings()
  1035		p.remapLocationIDs()
  1036		p.remapFunctionIDs()
  1037		p.remapMappingIDs()
  1038		return nil
  1039	}
  1040	
  1041	func parseMappingEntry(l string) (*Mapping, error) {
  1042		var start, end, perm, file, offset, buildID string
  1043		if me := procMapsRE.FindStringSubmatch(l); len(me) == 6 {
  1044			start, end, perm, offset, file = me[1], me[2], me[3], me[4], me[5]
  1045		} else if me := briefMapsRE.FindStringSubmatch(l); len(me) == 7 {
  1046			start, end, perm, file, offset, buildID = me[1], me[2], me[3], me[4], me[5], me[6]
  1047		} else {
  1048			return nil, errUnrecognized
  1049		}
  1050	
  1051		var err error
  1052		mapping := &Mapping{
  1053			File:    file,
  1054			BuildID: buildID,
  1055		}
  1056		if perm != "" && !strings.Contains(perm, "x") {
  1057			// Skip non-executable entries.
  1058			return nil, nil
  1059		}
  1060		if mapping.Start, err = strconv.ParseUint(start, 16, 64); err != nil {
  1061			return nil, errUnrecognized
  1062		}
  1063		if mapping.Limit, err = strconv.ParseUint(end, 16, 64); err != nil {
  1064			return nil, errUnrecognized
  1065		}
  1066		if offset != "" {
  1067			if mapping.Offset, err = strconv.ParseUint(offset, 16, 64); err != nil {
  1068				return nil, errUnrecognized
  1069			}
  1070		}
  1071		return mapping, nil
  1072	}
  1073	
  1074	var memoryMapSentinels = []string{
  1075		"--- Memory map: ---",
  1076		"MAPPED_LIBRARIES:",
  1077	}
  1078	
  1079	// isMemoryMapSentinel returns true if the string contains one of the
  1080	// known sentinels for memory map information.
  1081	func isMemoryMapSentinel(line string) bool {
  1082		for _, s := range memoryMapSentinels {
  1083			if strings.Contains(line, s) {
  1084				return true
  1085			}
  1086		}
  1087		return false
  1088	}
  1089	
  1090	func (p *Profile) addLegacyFrameInfo() {
  1091		switch {
  1092		case isProfileType(p, heapzSampleTypes):
  1093			p.DropFrames, p.KeepFrames = allocRxStr, allocSkipRxStr
  1094		case isProfileType(p, contentionzSampleTypes):
  1095			p.DropFrames, p.KeepFrames = lockRxStr, ""
  1096		default:
  1097			p.DropFrames, p.KeepFrames = cpuProfilerRxStr, ""
  1098		}
  1099	}
  1100	
  1101	var heapzSampleTypes = [][]string{
  1102		{"allocations", "size"}, // early Go pprof profiles
  1103		{"objects", "space"},
  1104		{"inuse_objects", "inuse_space"},
  1105		{"alloc_objects", "alloc_space"},
  1106		{"alloc_objects", "alloc_space", "inuse_objects", "inuse_space"}, // Go pprof legacy profiles
  1107	}
  1108	var contentionzSampleTypes = [][]string{
  1109		{"contentions", "delay"},
  1110	}
  1111	
  1112	func isProfileType(p *Profile, types [][]string) bool {
  1113		st := p.SampleType
  1114	nextType:
  1115		for _, t := range types {
  1116			if len(st) != len(t) {
  1117				continue
  1118			}
  1119	
  1120			for i := range st {
  1121				if st[i].Type != t[i] {
  1122					continue nextType
  1123				}
  1124			}
  1125			return true
  1126		}
  1127		return false
  1128	}
  1129	
  1130	var allocRxStr = strings.Join([]string{
  1131		// POSIX entry points.
  1132		`calloc`,
  1133		`cfree`,
  1134		`malloc`,
  1135		`free`,
  1136		`memalign`,
  1137		`do_memalign`,
  1138		`(__)?posix_memalign`,
  1139		`pvalloc`,
  1140		`valloc`,
  1141		`realloc`,
  1142	
  1143		// TC malloc.
  1144		`tcmalloc::.*`,
  1145		`tc_calloc`,
  1146		`tc_cfree`,
  1147		`tc_malloc`,
  1148		`tc_free`,
  1149		`tc_memalign`,
  1150		`tc_posix_memalign`,
  1151		`tc_pvalloc`,
  1152		`tc_valloc`,
  1153		`tc_realloc`,
  1154		`tc_new`,
  1155		`tc_delete`,
  1156		`tc_newarray`,
  1157		`tc_deletearray`,
  1158		`tc_new_nothrow`,
  1159		`tc_newarray_nothrow`,
  1160	
  1161		// Memory-allocation routines on OS X.
  1162		`malloc_zone_malloc`,
  1163		`malloc_zone_calloc`,
  1164		`malloc_zone_valloc`,
  1165		`malloc_zone_realloc`,
  1166		`malloc_zone_memalign`,
  1167		`malloc_zone_free`,
  1168	
  1169		// Go runtime
  1170		`runtime\..*`,
  1171	
  1172		// Other misc. memory allocation routines
  1173		`BaseArena::.*`,
  1174		`(::)?do_malloc_no_errno`,
  1175		`(::)?do_malloc_pages`,
  1176		`(::)?do_malloc`,
  1177		`DoSampledAllocation`,
  1178		`MallocedMemBlock::MallocedMemBlock`,
  1179		`_M_allocate`,
  1180		`__builtin_(vec_)?delete`,
  1181		`__builtin_(vec_)?new`,
  1182		`__gnu_cxx::new_allocator::allocate`,
  1183		`__libc_malloc`,
  1184		`__malloc_alloc_template::allocate`,
  1185		`allocate`,
  1186		`cpp_alloc`,
  1187		`operator new(\[\])?`,
  1188		`simple_alloc::allocate`,
  1189	}, `|`)
  1190	
  1191	var allocSkipRxStr = strings.Join([]string{
  1192		// Preserve Go runtime frames that appear in the middle/bottom of
  1193		// the stack.
  1194		`runtime\.panic`,
  1195		`runtime\.reflectcall`,
  1196		`runtime\.call[0-9]*`,
  1197	}, `|`)
  1198	
  1199	var cpuProfilerRxStr = strings.Join([]string{
  1200		`ProfileData::Add`,
  1201		`ProfileData::prof_handler`,
  1202		`CpuProfiler::prof_handler`,
  1203		`__pthread_sighandler`,
  1204		`__restore`,
  1205	}, `|`)
  1206	
  1207	var lockRxStr = strings.Join([]string{
  1208		`RecordLockProfileData`,
  1209		`(base::)?RecordLockProfileData.*`,
  1210		`(base::)?SubmitMutexProfileData.*`,
  1211		`(base::)?SubmitSpinLockProfileData.*`,
  1212		`(base::Mutex::)?AwaitCommon.*`,
  1213		`(base::Mutex::)?Unlock.*`,
  1214		`(base::Mutex::)?UnlockSlow.*`,
  1215		`(base::Mutex::)?ReaderUnlock.*`,
  1216		`(base::MutexLock::)?~MutexLock.*`,
  1217		`(Mutex::)?AwaitCommon.*`,
  1218		`(Mutex::)?Unlock.*`,
  1219		`(Mutex::)?UnlockSlow.*`,
  1220		`(Mutex::)?ReaderUnlock.*`,
  1221		`(MutexLock::)?~MutexLock.*`,
  1222		`(SpinLock::)?Unlock.*`,
  1223		`(SpinLock::)?SlowUnlock.*`,
  1224		`(SpinLockHolder::)?~SpinLockHolder.*`,
  1225	}, `|`)
  1226	

View as plain text