...

Source file src/runtime/pprof/internal/profile/legacy_profile.go

     1	// Copyright 2014 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	// This file implements parsers to convert legacy profiles into the
     6	// profile.proto format.
     7	
     8	package profile
     9	
    10	import (
    11		"bufio"
    12		"bytes"
    13		"fmt"
    14		"io"
    15		"math"
    16		"regexp"
    17		"strconv"
    18		"strings"
    19	)
    20	
    21	var (
    22		countStartRE = regexp.MustCompile(`\A(\w+) profile: total \d+\n\z`)
    23		countRE      = regexp.MustCompile(`\A(\d+) @(( 0x[0-9a-f]+)+)\n\z`)
    24	
    25		heapHeaderRE = regexp.MustCompile(`heap profile: *(\d+): *(\d+) *\[ *(\d+): *(\d+) *\] *@ *(heap[_a-z0-9]*)/?(\d*)`)
    26		heapSampleRE = regexp.MustCompile(`(-?\d+): *(-?\d+) *\[ *(\d+): *(\d+) *] @([ x0-9a-f]*)`)
    27	
    28		contentionSampleRE = regexp.MustCompile(`(\d+) *(\d+) @([ x0-9a-f]*)`)
    29	
    30		hexNumberRE = regexp.MustCompile(`0x[0-9a-f]+`)
    31	
    32		growthHeaderRE = regexp.MustCompile(`heap profile: *(\d+): *(\d+) *\[ *(\d+): *(\d+) *\] @ growthz`)
    33	
    34		fragmentationHeaderRE = regexp.MustCompile(`heap profile: *(\d+): *(\d+) *\[ *(\d+): *(\d+) *\] @ fragmentationz`)
    35	
    36		threadzStartRE = regexp.MustCompile(`--- threadz \d+ ---`)
    37		threadStartRE  = regexp.MustCompile(`--- Thread ([[:xdigit:]]+) \(name: (.*)/(\d+)\) stack: ---`)
    38	
    39		procMapsRE = regexp.MustCompile(`([[:xdigit:]]+)-([[:xdigit:]]+)\s+([-rwxp]+)\s+([[:xdigit:]]+)\s+([[:xdigit:]]+):([[:xdigit:]]+)\s+([[:digit:]]+)\s*(\S+)?`)
    40	
    41		briefMapsRE = regexp.MustCompile(`\s*([[:xdigit:]]+)-([[:xdigit:]]+):\s*(\S+)(\s.*@)?([[:xdigit:]]+)?`)
    42	
    43		// LegacyHeapAllocated instructs the heapz parsers to use the
    44		// allocated memory stats instead of the default in-use memory. Note
    45		// that tcmalloc doesn't provide all allocated memory, only in-use
    46		// stats.
    47		LegacyHeapAllocated bool
    48	)
    49	
    50	func isSpaceOrComment(line string) bool {
    51		trimmed := strings.TrimSpace(line)
    52		return len(trimmed) == 0 || trimmed[0] == '#'
    53	}
    54	
    55	// parseGoCount parses a Go count profile (e.g., threadcreate or
    56	// goroutine) and returns a new Profile.
    57	func parseGoCount(b []byte) (*Profile, error) {
    58		r := bytes.NewBuffer(b)
    59	
    60		var line string
    61		var err error
    62		for {
    63			// Skip past comments and empty lines seeking a real header.
    64			line, err = r.ReadString('\n')
    65			if err != nil {
    66				return nil, err
    67			}
    68			if !isSpaceOrComment(line) {
    69				break
    70			}
    71		}
    72	
    73		m := countStartRE.FindStringSubmatch(line)
    74		if m == nil {
    75			return nil, errUnrecognized
    76		}
    77		profileType := m[1]
    78		p := &Profile{
    79			PeriodType: &ValueType{Type: profileType, Unit: "count"},
    80			Period:     1,
    81			SampleType: []*ValueType{{Type: profileType, Unit: "count"}},
    82		}
    83		locations := make(map[uint64]*Location)
    84		for {
    85			line, err = r.ReadString('\n')
    86			if err != nil {
    87				if err == io.EOF {
    88					break
    89				}
    90				return nil, err
    91			}
    92			if isSpaceOrComment(line) {
    93				continue
    94			}
    95			if strings.HasPrefix(line, "---") {
    96				break
    97			}
    98			m := countRE.FindStringSubmatch(line)
    99			if m == nil {
   100				return nil, errMalformed
   101			}
   102			n, err := strconv.ParseInt(m[1], 0, 64)
   103			if err != nil {
   104				return nil, errMalformed
   105			}
   106			fields := strings.Fields(m[2])
   107			locs := make([]*Location, 0, len(fields))
   108			for _, stk := range fields {
   109				addr, err := strconv.ParseUint(stk, 0, 64)
   110				if err != nil {
   111					return nil, errMalformed
   112				}
   113				// Adjust all frames by -1 to land on the call instruction.
   114				addr--
   115				loc := locations[addr]
   116				if loc == nil {
   117					loc = &Location{
   118						Address: addr,
   119					}
   120					locations[addr] = loc
   121					p.Location = append(p.Location, loc)
   122				}
   123				locs = append(locs, loc)
   124			}
   125			p.Sample = append(p.Sample, &Sample{
   126				Location: locs,
   127				Value:    []int64{n},
   128			})
   129		}
   130	
   131		if err = parseAdditionalSections(strings.TrimSpace(line), r, p); err != nil {
   132			return nil, err
   133		}
   134		return p, nil
   135	}
   136	
   137	// remapLocationIDs ensures there is a location for each address
   138	// referenced by a sample, and remaps the samples to point to the new
   139	// location ids.
   140	func (p *Profile) remapLocationIDs() {
   141		seen := make(map[*Location]bool, len(p.Location))
   142		var locs []*Location
   143	
   144		for _, s := range p.Sample {
   145			for _, l := range s.Location {
   146				if seen[l] {
   147					continue
   148				}
   149				l.ID = uint64(len(locs) + 1)
   150				locs = append(locs, l)
   151				seen[l] = true
   152			}
   153		}
   154		p.Location = locs
   155	}
   156	
   157	func (p *Profile) remapFunctionIDs() {
   158		seen := make(map[*Function]bool, len(p.Function))
   159		var fns []*Function
   160	
   161		for _, l := range p.Location {
   162			for _, ln := range l.Line {
   163				fn := ln.Function
   164				if fn == nil || seen[fn] {
   165					continue
   166				}
   167				fn.ID = uint64(len(fns) + 1)
   168				fns = append(fns, fn)
   169				seen[fn] = true
   170			}
   171		}
   172		p.Function = fns
   173	}
   174	
   175	// remapMappingIDs matches location addresses with existing mappings
   176	// and updates them appropriately. This is O(N*M), if this ever shows
   177	// up as a bottleneck, evaluate sorting the mappings and doing a
   178	// binary search, which would make it O(N*log(M)).
   179	func (p *Profile) remapMappingIDs() {
   180		if len(p.Mapping) == 0 {
   181			return
   182		}
   183	
   184		// Some profile handlers will incorrectly set regions for the main
   185		// executable if its section is remapped. Fix them through heuristics.
   186	
   187		// Remove the initial mapping if named '/anon_hugepage' and has a
   188		// consecutive adjacent mapping.
   189		if m := p.Mapping[0]; strings.HasPrefix(m.File, "/anon_hugepage") {
   190			if len(p.Mapping) > 1 && m.Limit == p.Mapping[1].Start {
   191				p.Mapping = p.Mapping[1:]
   192			}
   193		}
   194	
   195		// Subtract the offset from the start of the main mapping if it
   196		// ends up at a recognizable start address.
   197		const expectedStart = 0x400000
   198		if m := p.Mapping[0]; m.Start-m.Offset == expectedStart {
   199			m.Start = expectedStart
   200			m.Offset = 0
   201		}
   202	
   203		for _, l := range p.Location {
   204			if a := l.Address; a != 0 {
   205				for _, m := range p.Mapping {
   206					if m.Start <= a && a < m.Limit {
   207						l.Mapping = m
   208						break
   209					}
   210				}
   211			}
   212		}
   213	
   214		// Reset all mapping IDs.
   215		for i, m := range p.Mapping {
   216			m.ID = uint64(i + 1)
   217		}
   218	}
   219	
   220	var cpuInts = []func([]byte) (uint64, []byte){
   221		get32l,
   222		get32b,
   223		get64l,
   224		get64b,
   225	}
   226	
   227	func get32l(b []byte) (uint64, []byte) {
   228		if len(b) < 4 {
   229			return 0, nil
   230		}
   231		return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24, b[4:]
   232	}
   233	
   234	func get32b(b []byte) (uint64, []byte) {
   235		if len(b) < 4 {
   236			return 0, nil
   237		}
   238		return uint64(b[3]) | uint64(b[2])<<8 | uint64(b[1])<<16 | uint64(b[0])<<24, b[4:]
   239	}
   240	
   241	func get64l(b []byte) (uint64, []byte) {
   242		if len(b) < 8 {
   243			return 0, nil
   244		}
   245		return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 | uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56, b[8:]
   246	}
   247	
   248	func get64b(b []byte) (uint64, []byte) {
   249		if len(b) < 8 {
   250			return 0, nil
   251		}
   252		return uint64(b[7]) | uint64(b[6])<<8 | uint64(b[5])<<16 | uint64(b[4])<<24 | uint64(b[3])<<32 | uint64(b[2])<<40 | uint64(b[1])<<48 | uint64(b[0])<<56, b[8:]
   253	}
   254	
   255	// ParseTracebacks parses a set of tracebacks and returns a newly
   256	// populated profile. It will accept any text file and generate a
   257	// Profile out of it with any hex addresses it can identify, including
   258	// a process map if it can recognize one. Each sample will include a
   259	// tag "source" with the addresses recognized in string format.
   260	func ParseTracebacks(b []byte) (*Profile, error) {
   261		r := bytes.NewBuffer(b)
   262	
   263		p := &Profile{
   264			PeriodType: &ValueType{Type: "trace", Unit: "count"},
   265			Period:     1,
   266			SampleType: []*ValueType{
   267				{Type: "trace", Unit: "count"},
   268			},
   269		}
   270	
   271		var sources []string
   272		var sloc []*Location
   273	
   274		locs := make(map[uint64]*Location)
   275		for {
   276			l, err := r.ReadString('\n')
   277			if err != nil {
   278				if err != io.EOF {
   279					return nil, err
   280				}
   281				if l == "" {
   282					break
   283				}
   284			}
   285			if sectionTrigger(l) == memoryMapSection {
   286				break
   287			}
   288			if s, addrs := extractHexAddresses(l); len(s) > 0 {
   289				for _, addr := range addrs {
   290					// Addresses from stack traces point to the next instruction after
   291					// each call. Adjust by -1 to land somewhere on the actual call.
   292					addr--
   293					loc := locs[addr]
   294					if locs[addr] == nil {
   295						loc = &Location{
   296							Address: addr,
   297						}
   298						p.Location = append(p.Location, loc)
   299						locs[addr] = loc
   300					}
   301					sloc = append(sloc, loc)
   302				}
   303	
   304				sources = append(sources, s...)
   305			} else {
   306				if len(sources) > 0 || len(sloc) > 0 {
   307					addTracebackSample(sloc, sources, p)
   308					sloc, sources = nil, nil
   309				}
   310			}
   311		}
   312	
   313		// Add final sample to save any leftover data.
   314		if len(sources) > 0 || len(sloc) > 0 {
   315			addTracebackSample(sloc, sources, p)
   316		}
   317	
   318		if err := p.ParseMemoryMap(r); err != nil {
   319			return nil, err
   320		}
   321		return p, nil
   322	}
   323	
   324	func addTracebackSample(l []*Location, s []string, p *Profile) {
   325		p.Sample = append(p.Sample,
   326			&Sample{
   327				Value:    []int64{1},
   328				Location: l,
   329				Label:    map[string][]string{"source": s},
   330			})
   331	}
   332	
   333	// parseCPU parses a profilez legacy profile and returns a newly
   334	// populated Profile.
   335	//
   336	// The general format for profilez samples is a sequence of words in
   337	// binary format. The first words are a header with the following data:
   338	//   1st word -- 0
   339	//   2nd word -- 3
   340	//   3rd word -- 0 if a c++ application, 1 if a java application.
   341	//   4th word -- Sampling period (in microseconds).
   342	//   5th word -- Padding.
   343	func parseCPU(b []byte) (*Profile, error) {
   344		var parse func([]byte) (uint64, []byte)
   345		var n1, n2, n3, n4, n5 uint64
   346		for _, parse = range cpuInts {
   347			var tmp []byte
   348			n1, tmp = parse(b)
   349			n2, tmp = parse(tmp)
   350			n3, tmp = parse(tmp)
   351			n4, tmp = parse(tmp)
   352			n5, tmp = parse(tmp)
   353	
   354			if tmp != nil && n1 == 0 && n2 == 3 && n3 == 0 && n4 > 0 && n5 == 0 {
   355				b = tmp
   356				return cpuProfile(b, int64(n4), parse)
   357			}
   358		}
   359		return nil, errUnrecognized
   360	}
   361	
   362	// cpuProfile returns a new Profile from C++ profilez data.
   363	// b is the profile bytes after the header, period is the profiling
   364	// period, and parse is a function to parse 8-byte chunks from the
   365	// profile in its native endianness.
   366	func cpuProfile(b []byte, period int64, parse func(b []byte) (uint64, []byte)) (*Profile, error) {
   367		p := &Profile{
   368			Period:     period * 1000,
   369			PeriodType: &ValueType{Type: "cpu", Unit: "nanoseconds"},
   370			SampleType: []*ValueType{
   371				{Type: "samples", Unit: "count"},
   372				{Type: "cpu", Unit: "nanoseconds"},
   373			},
   374		}
   375		var err error
   376		if b, _, err = parseCPUSamples(b, parse, true, p); err != nil {
   377			return nil, err
   378		}
   379	
   380		// If all samples have the same second-to-the-bottom frame, it
   381		// strongly suggests that it is an uninteresting artifact of
   382		// measurement -- a stack frame pushed by the signal handler. The
   383		// bottom frame is always correct as it is picked up from the signal
   384		// structure, not the stack. Check if this is the case and if so,
   385		// remove.
   386		if len(p.Sample) > 1 && len(p.Sample[0].Location) > 1 {
   387			allSame := true
   388			id1 := p.Sample[0].Location[1].Address
   389			for _, s := range p.Sample {
   390				if len(s.Location) < 2 || id1 != s.Location[1].Address {
   391					allSame = false
   392					break
   393				}
   394			}
   395			if allSame {
   396				for _, s := range p.Sample {
   397					s.Location = append(s.Location[:1], s.Location[2:]...)
   398				}
   399			}
   400		}
   401	
   402		if err := p.ParseMemoryMap(bytes.NewBuffer(b)); err != nil {
   403			return nil, err
   404		}
   405		return p, nil
   406	}
   407	
   408	// parseCPUSamples parses a collection of profilez samples from a
   409	// profile.
   410	//
   411	// profilez samples are a repeated sequence of stack frames of the
   412	// form:
   413	//    1st word -- The number of times this stack was encountered.
   414	//    2nd word -- The size of the stack (StackSize).
   415	//    3rd word -- The first address on the stack.
   416	//    ...
   417	//    StackSize + 2 -- The last address on the stack
   418	// The last stack trace is of the form:
   419	//   1st word -- 0
   420	//   2nd word -- 1
   421	//   3rd word -- 0
   422	//
   423	// Addresses from stack traces may point to the next instruction after
   424	// each call. Optionally adjust by -1 to land somewhere on the actual
   425	// call (except for the leaf, which is not a call).
   426	func parseCPUSamples(b []byte, parse func(b []byte) (uint64, []byte), adjust bool, p *Profile) ([]byte, map[uint64]*Location, error) {
   427		locs := make(map[uint64]*Location)
   428		for len(b) > 0 {
   429			var count, nstk uint64
   430			count, b = parse(b)
   431			nstk, b = parse(b)
   432			if b == nil || nstk > uint64(len(b)/4) {
   433				return nil, nil, errUnrecognized
   434			}
   435			var sloc []*Location
   436			addrs := make([]uint64, nstk)
   437			for i := 0; i < int(nstk); i++ {
   438				addrs[i], b = parse(b)
   439			}
   440	
   441			if count == 0 && nstk == 1 && addrs[0] == 0 {
   442				// End of data marker
   443				break
   444			}
   445			for i, addr := range addrs {
   446				if adjust && i > 0 {
   447					addr--
   448				}
   449				loc := locs[addr]
   450				if loc == nil {
   451					loc = &Location{
   452						Address: addr,
   453					}
   454					locs[addr] = loc
   455					p.Location = append(p.Location, loc)
   456				}
   457				sloc = append(sloc, loc)
   458			}
   459			p.Sample = append(p.Sample,
   460				&Sample{
   461					Value:    []int64{int64(count), int64(count) * p.Period},
   462					Location: sloc,
   463				})
   464		}
   465		// Reached the end without finding the EOD marker.
   466		return b, locs, nil
   467	}
   468	
   469	// parseHeap parses a heapz legacy or a growthz profile and
   470	// returns a newly populated Profile.
   471	func parseHeap(b []byte) (p *Profile, err error) {
   472		r := bytes.NewBuffer(b)
   473		l, err := r.ReadString('\n')
   474		if err != nil {
   475			return nil, errUnrecognized
   476		}
   477	
   478		sampling := ""
   479	
   480		if header := heapHeaderRE.FindStringSubmatch(l); header != nil {
   481			p = &Profile{
   482				SampleType: []*ValueType{
   483					{Type: "objects", Unit: "count"},
   484					{Type: "space", Unit: "bytes"},
   485				},
   486				PeriodType: &ValueType{Type: "objects", Unit: "bytes"},
   487			}
   488	
   489			var period int64
   490			if len(header[6]) > 0 {
   491				if period, err = strconv.ParseInt(header[6], 10, 64); err != nil {
   492					return nil, errUnrecognized
   493				}
   494			}
   495	
   496			switch header[5] {
   497			case "heapz_v2", "heap_v2":
   498				sampling, p.Period = "v2", period
   499			case "heapprofile":
   500				sampling, p.Period = "", 1
   501			case "heap":
   502				sampling, p.Period = "v2", period/2
   503			default:
   504				return nil, errUnrecognized
   505			}
   506		} else if header = growthHeaderRE.FindStringSubmatch(l); header != nil {
   507			p = &Profile{
   508				SampleType: []*ValueType{
   509					{Type: "objects", Unit: "count"},
   510					{Type: "space", Unit: "bytes"},
   511				},
   512				PeriodType: &ValueType{Type: "heapgrowth", Unit: "count"},
   513				Period:     1,
   514			}
   515		} else if header = fragmentationHeaderRE.FindStringSubmatch(l); header != nil {
   516			p = &Profile{
   517				SampleType: []*ValueType{
   518					{Type: "objects", Unit: "count"},
   519					{Type: "space", Unit: "bytes"},
   520				},
   521				PeriodType: &ValueType{Type: "allocations", Unit: "count"},
   522				Period:     1,
   523			}
   524		} else {
   525			return nil, errUnrecognized
   526		}
   527	
   528		if LegacyHeapAllocated {
   529			for _, st := range p.SampleType {
   530				st.Type = "alloc_" + st.Type
   531			}
   532		} else {
   533			for _, st := range p.SampleType {
   534				st.Type = "inuse_" + st.Type
   535			}
   536		}
   537	
   538		locs := make(map[uint64]*Location)
   539		for {
   540			l, err = r.ReadString('\n')
   541			if err != nil {
   542				if err != io.EOF {
   543					return nil, err
   544				}
   545	
   546				if l == "" {
   547					break
   548				}
   549			}
   550	
   551			if isSpaceOrComment(l) {
   552				continue
   553			}
   554			l = strings.TrimSpace(l)
   555	
   556			if sectionTrigger(l) != unrecognizedSection {
   557				break
   558			}
   559	
   560			value, blocksize, addrs, err := parseHeapSample(l, p.Period, sampling)
   561			if err != nil {
   562				return nil, err
   563			}
   564			var sloc []*Location
   565			for _, addr := range addrs {
   566				// Addresses from stack traces point to the next instruction after
   567				// each call. Adjust by -1 to land somewhere on the actual call.
   568				addr--
   569				loc := locs[addr]
   570				if locs[addr] == nil {
   571					loc = &Location{
   572						Address: addr,
   573					}
   574					p.Location = append(p.Location, loc)
   575					locs[addr] = loc
   576				}
   577				sloc = append(sloc, loc)
   578			}
   579	
   580			p.Sample = append(p.Sample, &Sample{
   581				Value:    value,
   582				Location: sloc,
   583				NumLabel: map[string][]int64{"bytes": {blocksize}},
   584			})
   585		}
   586	
   587		if err = parseAdditionalSections(l, r, p); err != nil {
   588			return nil, err
   589		}
   590		return p, nil
   591	}
   592	
   593	// parseHeapSample parses a single row from a heap profile into a new Sample.
   594	func parseHeapSample(line string, rate int64, sampling string) (value []int64, blocksize int64, addrs []uint64, err error) {
   595		sampleData := heapSampleRE.FindStringSubmatch(line)
   596		if len(sampleData) != 6 {
   597			return value, blocksize, addrs, fmt.Errorf("unexpected number of sample values: got %d, want 6", len(sampleData))
   598		}
   599	
   600		// Use first two values by default; tcmalloc sampling generates the
   601		// same value for both, only the older heap-profile collect separate
   602		// stats for in-use and allocated objects.
   603		valueIndex := 1
   604		if LegacyHeapAllocated {
   605			valueIndex = 3
   606		}
   607	
   608		var v1, v2 int64
   609		if v1, err = strconv.ParseInt(sampleData[valueIndex], 10, 64); err != nil {
   610			return value, blocksize, addrs, fmt.Errorf("malformed sample: %s: %v", line, err)
   611		}
   612		if v2, err = strconv.ParseInt(sampleData[valueIndex+1], 10, 64); err != nil {
   613			return value, blocksize, addrs, fmt.Errorf("malformed sample: %s: %v", line, err)
   614		}
   615	
   616		if v1 == 0 {
   617			if v2 != 0 {
   618				return value, blocksize, addrs, fmt.Errorf("allocation count was 0 but allocation bytes was %d", v2)
   619			}
   620		} else {
   621			blocksize = v2 / v1
   622			if sampling == "v2" {
   623				v1, v2 = scaleHeapSample(v1, v2, rate)
   624			}
   625		}
   626	
   627		value = []int64{v1, v2}
   628		addrs = parseHexAddresses(sampleData[5])
   629	
   630		return value, blocksize, addrs, nil
   631	}
   632	
   633	// extractHexAddresses extracts hex numbers from a string and returns
   634	// them, together with their numeric value, in a slice.
   635	func extractHexAddresses(s string) ([]string, []uint64) {
   636		hexStrings := hexNumberRE.FindAllString(s, -1)
   637		var ids []uint64
   638		for _, s := range hexStrings {
   639			if id, err := strconv.ParseUint(s, 0, 64); err == nil {
   640				ids = append(ids, id)
   641			} else {
   642				// Do not expect any parsing failures due to the regexp matching.
   643				panic("failed to parse hex value:" + s)
   644			}
   645		}
   646		return hexStrings, ids
   647	}
   648	
   649	// parseHexAddresses parses hex numbers from a string and returns them
   650	// in a slice.
   651	func parseHexAddresses(s string) []uint64 {
   652		_, ids := extractHexAddresses(s)
   653		return ids
   654	}
   655	
   656	// scaleHeapSample adjusts the data from a heapz Sample to
   657	// account for its probability of appearing in the collected
   658	// data. heapz profiles are a sampling of the memory allocations
   659	// requests in a program. We estimate the unsampled value by dividing
   660	// each collected sample by its probability of appearing in the
   661	// profile. heapz v2 profiles rely on a poisson process to determine
   662	// which samples to collect, based on the desired average collection
   663	// rate R. The probability of a sample of size S to appear in that
   664	// profile is 1-exp(-S/R).
   665	func scaleHeapSample(count, size, rate int64) (int64, int64) {
   666		if count == 0 || size == 0 {
   667			return 0, 0
   668		}
   669	
   670		if rate <= 1 {
   671			// if rate==1 all samples were collected so no adjustment is needed.
   672			// if rate<1 treat as unknown and skip scaling.
   673			return count, size
   674		}
   675	
   676		avgSize := float64(size) / float64(count)
   677		scale := 1 / (1 - math.Exp(-avgSize/float64(rate)))
   678	
   679		return int64(float64(count) * scale), int64(float64(size) * scale)
   680	}
   681	
   682	// parseContention parses a mutex or contention profile. There are 2 cases:
   683	// "--- contentionz " for legacy C++ profiles (and backwards compatibility)
   684	// "--- mutex:" or "--- contention:" for profiles generated by the Go runtime.
   685	// This code converts the text output from runtime into a *Profile. (In the future
   686	// the runtime might write a serialized Profile directly making this unnecessary.)
   687	func parseContention(b []byte) (*Profile, error) {
   688		r := bytes.NewBuffer(b)
   689		var l string
   690		var err error
   691		for {
   692			// Skip past comments and empty lines seeking a real header.
   693			l, err = r.ReadString('\n')
   694			if err != nil {
   695				return nil, err
   696			}
   697			if !isSpaceOrComment(l) {
   698				break
   699			}
   700		}
   701	
   702		if strings.HasPrefix(l, "--- contentionz ") {
   703			return parseCppContention(r)
   704		} else if strings.HasPrefix(l, "--- mutex:") {
   705			return parseCppContention(r)
   706		} else if strings.HasPrefix(l, "--- contention:") {
   707			return parseCppContention(r)
   708		}
   709		return nil, errUnrecognized
   710	}
   711	
   712	// parseCppContention parses the output from synchronization_profiling.cc
   713	// for backward compatibility, and the compatible (non-debug) block profile
   714	// output from the Go runtime.
   715	func parseCppContention(r *bytes.Buffer) (*Profile, error) {
   716		p := &Profile{
   717			PeriodType: &ValueType{Type: "contentions", Unit: "count"},
   718			Period:     1,
   719			SampleType: []*ValueType{
   720				{Type: "contentions", Unit: "count"},
   721				{Type: "delay", Unit: "nanoseconds"},
   722			},
   723		}
   724	
   725		var cpuHz int64
   726		var l string
   727		var err error
   728		// Parse text of the form "attribute = value" before the samples.
   729		const delimiter = "="
   730		for {
   731			l, err = r.ReadString('\n')
   732			if err != nil {
   733				if err != io.EOF {
   734					return nil, err
   735				}
   736	
   737				if l == "" {
   738					break
   739				}
   740			}
   741			if isSpaceOrComment(l) {
   742				continue
   743			}
   744	
   745			if l = strings.TrimSpace(l); l == "" {
   746				continue
   747			}
   748	
   749			if strings.HasPrefix(l, "---") {
   750				break
   751			}
   752	
   753			attr := strings.SplitN(l, delimiter, 2)
   754			if len(attr) != 2 {
   755				break
   756			}
   757			key, val := strings.TrimSpace(attr[0]), strings.TrimSpace(attr[1])
   758			var err error
   759			switch key {
   760			case "cycles/second":
   761				if cpuHz, err = strconv.ParseInt(val, 0, 64); err != nil {
   762					return nil, errUnrecognized
   763				}
   764			case "sampling period":
   765				if p.Period, err = strconv.ParseInt(val, 0, 64); err != nil {
   766					return nil, errUnrecognized
   767				}
   768			case "ms since reset":
   769				ms, err := strconv.ParseInt(val, 0, 64)
   770				if err != nil {
   771					return nil, errUnrecognized
   772				}
   773				p.DurationNanos = ms * 1000 * 1000
   774			case "format":
   775				// CPP contentionz profiles don't have format.
   776				return nil, errUnrecognized
   777			case "resolution":
   778				// CPP contentionz profiles don't have resolution.
   779				return nil, errUnrecognized
   780			case "discarded samples":
   781			default:
   782				return nil, errUnrecognized
   783			}
   784		}
   785	
   786		locs := make(map[uint64]*Location)
   787		for {
   788			if !isSpaceOrComment(l) {
   789				if l = strings.TrimSpace(l); strings.HasPrefix(l, "---") {
   790					break
   791				}
   792				value, addrs, err := parseContentionSample(l, p.Period, cpuHz)
   793				if err != nil {
   794					return nil, err
   795				}
   796				var sloc []*Location
   797				for _, addr := range addrs {
   798					// Addresses from stack traces point to the next instruction after
   799					// each call. Adjust by -1 to land somewhere on the actual call.
   800					addr--
   801					loc := locs[addr]
   802					if locs[addr] == nil {
   803						loc = &Location{
   804							Address: addr,
   805						}
   806						p.Location = append(p.Location, loc)
   807						locs[addr] = loc
   808					}
   809					sloc = append(sloc, loc)
   810				}
   811				p.Sample = append(p.Sample, &Sample{
   812					Value:    value,
   813					Location: sloc,
   814				})
   815			}
   816	
   817			if l, err = r.ReadString('\n'); err != nil {
   818				if err != io.EOF {
   819					return nil, err
   820				}
   821				if l == "" {
   822					break
   823				}
   824			}
   825		}
   826	
   827		if err = parseAdditionalSections(l, r, p); err != nil {
   828			return nil, err
   829		}
   830	
   831		return p, nil
   832	}
   833	
   834	// parseContentionSample parses a single row from a contention profile
   835	// into a new Sample.
   836	func parseContentionSample(line string, period, cpuHz int64) (value []int64, addrs []uint64, err error) {
   837		sampleData := contentionSampleRE.FindStringSubmatch(line)
   838		if sampleData == nil {
   839			return value, addrs, errUnrecognized
   840		}
   841	
   842		v1, err := strconv.ParseInt(sampleData[1], 10, 64)
   843		if err != nil {
   844			return value, addrs, fmt.Errorf("malformed sample: %s: %v", line, err)
   845		}
   846		v2, err := strconv.ParseInt(sampleData[2], 10, 64)
   847		if err != nil {
   848			return value, addrs, fmt.Errorf("malformed sample: %s: %v", line, err)
   849		}
   850	
   851		// Unsample values if period and cpuHz are available.
   852		// - Delays are scaled to cycles and then to nanoseconds.
   853		// - Contentions are scaled to cycles.
   854		if period > 0 {
   855			if cpuHz > 0 {
   856				cpuGHz := float64(cpuHz) / 1e9
   857				v1 = int64(float64(v1) * float64(period) / cpuGHz)
   858			}
   859			v2 = v2 * period
   860		}
   861	
   862		value = []int64{v2, v1}
   863		addrs = parseHexAddresses(sampleData[3])
   864	
   865		return value, addrs, nil
   866	}
   867	
   868	// parseThread parses a Threadz profile and returns a new Profile.
   869	func parseThread(b []byte) (*Profile, error) {
   870		r := bytes.NewBuffer(b)
   871	
   872		var line string
   873		var err error
   874		for {
   875			// Skip past comments and empty lines seeking a real header.
   876			line, err = r.ReadString('\n')
   877			if err != nil {
   878				return nil, err
   879			}
   880			if !isSpaceOrComment(line) {
   881				break
   882			}
   883		}
   884	
   885		if m := threadzStartRE.FindStringSubmatch(line); m != nil {
   886			// Advance over initial comments until first stack trace.
   887			for {
   888				line, err = r.ReadString('\n')
   889				if err != nil {
   890					if err != io.EOF {
   891						return nil, err
   892					}
   893	
   894					if line == "" {
   895						break
   896					}
   897				}
   898				if sectionTrigger(line) != unrecognizedSection || line[0] == '-' {
   899					break
   900				}
   901			}
   902		} else if t := threadStartRE.FindStringSubmatch(line); len(t) != 4 {
   903			return nil, errUnrecognized
   904		}
   905	
   906		p := &Profile{
   907			SampleType: []*ValueType{{Type: "thread", Unit: "count"}},
   908			PeriodType: &ValueType{Type: "thread", Unit: "count"},
   909			Period:     1,
   910		}
   911	
   912		locs := make(map[uint64]*Location)
   913		// Recognize each thread and populate profile samples.
   914		for sectionTrigger(line) == unrecognizedSection {
   915			if strings.HasPrefix(line, "---- no stack trace for") {
   916				line = ""
   917				break
   918			}
   919			if t := threadStartRE.FindStringSubmatch(line); len(t) != 4 {
   920				return nil, errUnrecognized
   921			}
   922	
   923			var addrs []uint64
   924			line, addrs, err = parseThreadSample(r)
   925			if err != nil {
   926				return nil, errUnrecognized
   927			}
   928			if len(addrs) == 0 {
   929				// We got a --same as previous threads--. Bump counters.
   930				if len(p.Sample) > 0 {
   931					s := p.Sample[len(p.Sample)-1]
   932					s.Value[0]++
   933				}
   934				continue
   935			}
   936	
   937			var sloc []*Location
   938			for _, addr := range addrs {
   939				// Addresses from stack traces point to the next instruction after
   940				// each call. Adjust by -1 to land somewhere on the actual call.
   941				addr--
   942				loc := locs[addr]
   943				if locs[addr] == nil {
   944					loc = &Location{
   945						Address: addr,
   946					}
   947					p.Location = append(p.Location, loc)
   948					locs[addr] = loc
   949				}
   950				sloc = append(sloc, loc)
   951			}
   952	
   953			p.Sample = append(p.Sample, &Sample{
   954				Value:    []int64{1},
   955				Location: sloc,
   956			})
   957		}
   958	
   959		if err = parseAdditionalSections(line, r, p); err != nil {
   960			return nil, err
   961		}
   962	
   963		return p, nil
   964	}
   965	
   966	// parseThreadSample parses a symbolized or unsymbolized stack trace.
   967	// Returns the first line after the traceback, the sample (or nil if
   968	// it hits a 'same-as-previous' marker) and an error.
   969	func parseThreadSample(b *bytes.Buffer) (nextl string, addrs []uint64, err error) {
   970		var l string
   971		sameAsPrevious := false
   972		for {
   973			if l, err = b.ReadString('\n'); err != nil {
   974				if err != io.EOF {
   975					return "", nil, err
   976				}
   977				if l == "" {
   978					break
   979				}
   980			}
   981			if l = strings.TrimSpace(l); l == "" {
   982				continue
   983			}
   984	
   985			if strings.HasPrefix(l, "---") {
   986				break
   987			}
   988			if strings.Contains(l, "same as previous thread") {
   989				sameAsPrevious = true
   990				continue
   991			}
   992	
   993			addrs = append(addrs, parseHexAddresses(l)...)
   994		}
   995	
   996		if sameAsPrevious {
   997			return l, nil, nil
   998		}
   999		return l, addrs, nil
  1000	}
  1001	
  1002	// parseAdditionalSections parses any additional sections in the
  1003	// profile, ignoring any unrecognized sections.
  1004	func parseAdditionalSections(l string, b *bytes.Buffer, p *Profile) (err error) {
  1005		for {
  1006			if sectionTrigger(l) == memoryMapSection {
  1007				break
  1008			}
  1009			// Ignore any unrecognized sections.
  1010			if l, err := b.ReadString('\n'); err != nil {
  1011				if err != io.EOF {
  1012					return err
  1013				}
  1014				if l == "" {
  1015					break
  1016				}
  1017			}
  1018		}
  1019		return p.ParseMemoryMap(b)
  1020	}
  1021	
  1022	// ParseMemoryMap parses a memory map in the format of
  1023	// /proc/self/maps, and overrides the mappings in the current profile.
  1024	// It renumbers the samples and locations in the profile correspondingly.
  1025	func (p *Profile) ParseMemoryMap(rd io.Reader) error {
  1026		b := bufio.NewReader(rd)
  1027	
  1028		var attrs []string
  1029		var r *strings.Replacer
  1030		const delimiter = "="
  1031		for {
  1032			l, err := b.ReadString('\n')
  1033			if err != nil {
  1034				if err != io.EOF {
  1035					return err
  1036				}
  1037				if l == "" {
  1038					break
  1039				}
  1040			}
  1041			if l = strings.TrimSpace(l); l == "" {
  1042				continue
  1043			}
  1044	
  1045			if r != nil {
  1046				l = r.Replace(l)
  1047			}
  1048			m, err := parseMappingEntry(l)
  1049			if err != nil {
  1050				if err == errUnrecognized {
  1051					// Recognize assignments of the form: attr=value, and replace
  1052					// $attr with value on subsequent mappings.
  1053					if attr := strings.SplitN(l, delimiter, 2); len(attr) == 2 {
  1054						attrs = append(attrs, "$"+strings.TrimSpace(attr[0]), strings.TrimSpace(attr[1]))
  1055						r = strings.NewReplacer(attrs...)
  1056					}
  1057					// Ignore any unrecognized entries
  1058					continue
  1059				}
  1060				return err
  1061			}
  1062			if m == nil || (m.File == "" && len(p.Mapping) != 0) {
  1063				// In some cases the first entry may include the address range
  1064				// but not the name of the file. It should be followed by
  1065				// another entry with the name.
  1066				continue
  1067			}
  1068			if len(p.Mapping) == 1 && p.Mapping[0].File == "" {
  1069				// Update the name if this is the entry following that empty one.
  1070				p.Mapping[0].File = m.File
  1071				continue
  1072			}
  1073			p.Mapping = append(p.Mapping, m)
  1074		}
  1075		p.remapLocationIDs()
  1076		p.remapFunctionIDs()
  1077		p.remapMappingIDs()
  1078		return nil
  1079	}
  1080	
  1081	func parseMappingEntry(l string) (*Mapping, error) {
  1082		mapping := &Mapping{}
  1083		var err error
  1084		if me := procMapsRE.FindStringSubmatch(l); len(me) == 9 {
  1085			if !strings.Contains(me[3], "x") {
  1086				// Skip non-executable entries.
  1087				return nil, nil
  1088			}
  1089			if mapping.Start, err = strconv.ParseUint(me[1], 16, 64); err != nil {
  1090				return nil, errUnrecognized
  1091			}
  1092			if mapping.Limit, err = strconv.ParseUint(me[2], 16, 64); err != nil {
  1093				return nil, errUnrecognized
  1094			}
  1095			if me[4] != "" {
  1096				if mapping.Offset, err = strconv.ParseUint(me[4], 16, 64); err != nil {
  1097					return nil, errUnrecognized
  1098				}
  1099			}
  1100			mapping.File = me[8]
  1101			return mapping, nil
  1102		}
  1103	
  1104		if me := briefMapsRE.FindStringSubmatch(l); len(me) == 6 {
  1105			if mapping.Start, err = strconv.ParseUint(me[1], 16, 64); err != nil {
  1106				return nil, errUnrecognized
  1107			}
  1108			if mapping.Limit, err = strconv.ParseUint(me[2], 16, 64); err != nil {
  1109				return nil, errUnrecognized
  1110			}
  1111			mapping.File = me[3]
  1112			if me[5] != "" {
  1113				if mapping.Offset, err = strconv.ParseUint(me[5], 16, 64); err != nil {
  1114					return nil, errUnrecognized
  1115				}
  1116			}
  1117			return mapping, nil
  1118		}
  1119	
  1120		return nil, errUnrecognized
  1121	}
  1122	
  1123	type sectionType int
  1124	
  1125	const (
  1126		unrecognizedSection sectionType = iota
  1127		memoryMapSection
  1128	)
  1129	
  1130	var memoryMapTriggers = []string{
  1131		"--- Memory map: ---",
  1132		"MAPPED_LIBRARIES:",
  1133	}
  1134	
  1135	func sectionTrigger(line string) sectionType {
  1136		for _, trigger := range memoryMapTriggers {
  1137			if strings.Contains(line, trigger) {
  1138				return memoryMapSection
  1139			}
  1140		}
  1141		return unrecognizedSection
  1142	}
  1143	
  1144	func (p *Profile) addLegacyFrameInfo() {
  1145		switch {
  1146		case isProfileType(p, heapzSampleTypes) ||
  1147			isProfileType(p, heapzInUseSampleTypes) ||
  1148			isProfileType(p, heapzAllocSampleTypes):
  1149			p.DropFrames, p.KeepFrames = allocRxStr, allocSkipRxStr
  1150		case isProfileType(p, contentionzSampleTypes):
  1151			p.DropFrames, p.KeepFrames = lockRxStr, ""
  1152		default:
  1153			p.DropFrames, p.KeepFrames = cpuProfilerRxStr, ""
  1154		}
  1155	}
  1156	
  1157	var heapzSampleTypes = []string{"allocations", "size"} // early Go pprof profiles
  1158	var heapzInUseSampleTypes = []string{"inuse_objects", "inuse_space"}
  1159	var heapzAllocSampleTypes = []string{"alloc_objects", "alloc_space"}
  1160	var contentionzSampleTypes = []string{"contentions", "delay"}
  1161	
  1162	func isProfileType(p *Profile, t []string) bool {
  1163		st := p.SampleType
  1164		if len(st) != len(t) {
  1165			return false
  1166		}
  1167	
  1168		for i := range st {
  1169			if st[i].Type != t[i] {
  1170				return false
  1171			}
  1172		}
  1173		return true
  1174	}
  1175	
  1176	var allocRxStr = strings.Join([]string{
  1177		// POSIX entry points.
  1178		`calloc`,
  1179		`cfree`,
  1180		`malloc`,
  1181		`free`,
  1182		`memalign`,
  1183		`do_memalign`,
  1184		`(__)?posix_memalign`,
  1185		`pvalloc`,
  1186		`valloc`,
  1187		`realloc`,
  1188	
  1189		// TC malloc.
  1190		`tcmalloc::.*`,
  1191		`tc_calloc`,
  1192		`tc_cfree`,
  1193		`tc_malloc`,
  1194		`tc_free`,
  1195		`tc_memalign`,
  1196		`tc_posix_memalign`,
  1197		`tc_pvalloc`,
  1198		`tc_valloc`,
  1199		`tc_realloc`,
  1200		`tc_new`,
  1201		`tc_delete`,
  1202		`tc_newarray`,
  1203		`tc_deletearray`,
  1204		`tc_new_nothrow`,
  1205		`tc_newarray_nothrow`,
  1206	
  1207		// Memory-allocation routines on OS X.
  1208		`malloc_zone_malloc`,
  1209		`malloc_zone_calloc`,
  1210		`malloc_zone_valloc`,
  1211		`malloc_zone_realloc`,
  1212		`malloc_zone_memalign`,
  1213		`malloc_zone_free`,
  1214	
  1215		// Go runtime
  1216		`runtime\..*`,
  1217	
  1218		// Other misc. memory allocation routines
  1219		`BaseArena::.*`,
  1220		`(::)?do_malloc_no_errno`,
  1221		`(::)?do_malloc_pages`,
  1222		`(::)?do_malloc`,
  1223		`DoSampledAllocation`,
  1224		`MallocedMemBlock::MallocedMemBlock`,
  1225		`_M_allocate`,
  1226		`__builtin_(vec_)?delete`,
  1227		`__builtin_(vec_)?new`,
  1228		`__gnu_cxx::new_allocator::allocate`,
  1229		`__libc_malloc`,
  1230		`__malloc_alloc_template::allocate`,
  1231		`allocate`,
  1232		`cpp_alloc`,
  1233		`operator new(\[\])?`,
  1234		`simple_alloc::allocate`,
  1235	}, `|`)
  1236	
  1237	var allocSkipRxStr = strings.Join([]string{
  1238		// Preserve Go runtime frames that appear in the middle/bottom of
  1239		// the stack.
  1240		`runtime\.panic`,
  1241		`runtime\.reflectcall`,
  1242		`runtime\.call[0-9]*`,
  1243	}, `|`)
  1244	
  1245	var cpuProfilerRxStr = strings.Join([]string{
  1246		`ProfileData::Add`,
  1247		`ProfileData::prof_handler`,
  1248		`CpuProfiler::prof_handler`,
  1249		`__pthread_sighandler`,
  1250		`__restore`,
  1251	}, `|`)
  1252	
  1253	var lockRxStr = strings.Join([]string{
  1254		`RecordLockProfileData`,
  1255		`(base::)?RecordLockProfileData.*`,
  1256		`(base::)?SubmitMutexProfileData.*`,
  1257		`(base::)?SubmitSpinLockProfileData.*`,
  1258		`(Mutex::)?AwaitCommon.*`,
  1259		`(Mutex::)?Unlock.*`,
  1260		`(Mutex::)?UnlockSlow.*`,
  1261		`(Mutex::)?ReaderUnlock.*`,
  1262		`(MutexLock::)?~MutexLock.*`,
  1263		`(SpinLock::)?Unlock.*`,
  1264		`(SpinLock::)?SlowUnlock.*`,
  1265		`(SpinLockHolder::)?~SpinLockHolder.*`,
  1266	}, `|`)
  1267	

View as plain text