...

Source file src/pkg/cmd/vendor/github.com/google/pprof/internal/binutils/binutils.go

     1	// Copyright 2014 Google Inc. All Rights Reserved.
     2	//
     3	// Licensed under the Apache License, Version 2.0 (the "License");
     4	// you may not use this file except in compliance with the License.
     5	// You may obtain a copy of the License at
     6	//
     7	//     http://www.apache.org/licenses/LICENSE-2.0
     8	//
     9	// Unless required by applicable law or agreed to in writing, software
    10	// distributed under the License is distributed on an "AS IS" BASIS,
    11	// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12	// See the License for the specific language governing permissions and
    13	// limitations under the License.
    14	
    15	// Package binutils provides access to the GNU binutils.
    16	package binutils
    17	
    18	import (
    19		"debug/elf"
    20		"debug/macho"
    21		"encoding/binary"
    22		"fmt"
    23		"io"
    24		"os"
    25		"os/exec"
    26		"path/filepath"
    27		"regexp"
    28		"runtime"
    29		"strings"
    30		"sync"
    31	
    32		"github.com/google/pprof/internal/elfexec"
    33		"github.com/google/pprof/internal/plugin"
    34	)
    35	
    36	// A Binutils implements plugin.ObjTool by invoking the GNU binutils.
    37	type Binutils struct {
    38		mu  sync.Mutex
    39		rep *binrep
    40	}
    41	
    42	// binrep is an immutable representation for Binutils.  It is atomically
    43	// replaced on every mutation to provide thread-safe access.
    44	type binrep struct {
    45		// Commands to invoke.
    46		llvmSymbolizer      string
    47		llvmSymbolizerFound bool
    48		addr2line           string
    49		addr2lineFound      bool
    50		nm                  string
    51		nmFound             bool
    52		objdump             string
    53		objdumpFound        bool
    54	
    55		// if fast, perform symbolization using nm (symbol names only),
    56		// instead of file-line detail from the slower addr2line.
    57		fast bool
    58	}
    59	
    60	// get returns the current representation for bu, initializing it if necessary.
    61	func (bu *Binutils) get() *binrep {
    62		bu.mu.Lock()
    63		r := bu.rep
    64		if r == nil {
    65			r = &binrep{}
    66			initTools(r, "")
    67			bu.rep = r
    68		}
    69		bu.mu.Unlock()
    70		return r
    71	}
    72	
    73	// update modifies the rep for bu via the supplied function.
    74	func (bu *Binutils) update(fn func(r *binrep)) {
    75		r := &binrep{}
    76		bu.mu.Lock()
    77		defer bu.mu.Unlock()
    78		if bu.rep == nil {
    79			initTools(r, "")
    80		} else {
    81			*r = *bu.rep
    82		}
    83		fn(r)
    84		bu.rep = r
    85	}
    86	
    87	// String returns string representation of the binutils state for debug logging.
    88	func (bu *Binutils) String() string {
    89		r := bu.get()
    90		var llvmSymbolizer, addr2line, nm, objdump string
    91		if r.llvmSymbolizerFound {
    92			llvmSymbolizer = r.llvmSymbolizer
    93		}
    94		if r.addr2lineFound {
    95			addr2line = r.addr2line
    96		}
    97		if r.nmFound {
    98			nm = r.nm
    99		}
   100		if r.objdumpFound {
   101			objdump = r.objdump
   102		}
   103		return fmt.Sprintf("llvm-symbolizer=%q addr2line=%q nm=%q objdump=%q fast=%t",
   104			llvmSymbolizer, addr2line, nm, objdump, r.fast)
   105	}
   106	
   107	// SetFastSymbolization sets a toggle that makes binutils use fast
   108	// symbolization (using nm), which is much faster than addr2line but
   109	// provides only symbol name information (no file/line).
   110	func (bu *Binutils) SetFastSymbolization(fast bool) {
   111		bu.update(func(r *binrep) { r.fast = fast })
   112	}
   113	
   114	// SetTools processes the contents of the tools option. It
   115	// expects a set of entries separated by commas; each entry is a pair
   116	// of the form t:path, where cmd will be used to look only for the
   117	// tool named t. If t is not specified, the path is searched for all
   118	// tools.
   119	func (bu *Binutils) SetTools(config string) {
   120		bu.update(func(r *binrep) { initTools(r, config) })
   121	}
   122	
   123	func initTools(b *binrep, config string) {
   124		// paths collect paths per tool; Key "" contains the default.
   125		paths := make(map[string][]string)
   126		for _, t := range strings.Split(config, ",") {
   127			name, path := "", t
   128			if ct := strings.SplitN(t, ":", 2); len(ct) == 2 {
   129				name, path = ct[0], ct[1]
   130			}
   131			paths[name] = append(paths[name], path)
   132		}
   133	
   134		defaultPath := paths[""]
   135		b.llvmSymbolizer, b.llvmSymbolizerFound = findExe("llvm-symbolizer", append(paths["llvm-symbolizer"], defaultPath...))
   136		b.addr2line, b.addr2lineFound = findExe("addr2line", append(paths["addr2line"], defaultPath...))
   137		if !b.addr2lineFound {
   138			// On MacOS, brew installs addr2line under gaddr2line name, so search for
   139			// that if the tool is not found by its default name.
   140			b.addr2line, b.addr2lineFound = findExe("gaddr2line", append(paths["addr2line"], defaultPath...))
   141		}
   142		b.nm, b.nmFound = findExe("nm", append(paths["nm"], defaultPath...))
   143		b.objdump, b.objdumpFound = findExe("objdump", append(paths["objdump"], defaultPath...))
   144	}
   145	
   146	// findExe looks for an executable command on a set of paths.
   147	// If it cannot find it, returns cmd.
   148	func findExe(cmd string, paths []string) (string, bool) {
   149		for _, p := range paths {
   150			cp := filepath.Join(p, cmd)
   151			if c, err := exec.LookPath(cp); err == nil {
   152				return c, true
   153			}
   154		}
   155		return cmd, false
   156	}
   157	
   158	// Disasm returns the assembly instructions for the specified address range
   159	// of a binary.
   160	func (bu *Binutils) Disasm(file string, start, end uint64) ([]plugin.Inst, error) {
   161		b := bu.get()
   162		cmd := exec.Command(b.objdump, "-d", "-C", "--no-show-raw-insn", "-l",
   163			fmt.Sprintf("--start-address=%#x", start),
   164			fmt.Sprintf("--stop-address=%#x", end),
   165			file)
   166		out, err := cmd.Output()
   167		if err != nil {
   168			return nil, fmt.Errorf("%v: %v", cmd.Args, err)
   169		}
   170	
   171		return disassemble(out)
   172	}
   173	
   174	// Open satisfies the plugin.ObjTool interface.
   175	func (bu *Binutils) Open(name string, start, limit, offset uint64) (plugin.ObjFile, error) {
   176		b := bu.get()
   177	
   178		// Make sure file is a supported executable.
   179		// This uses magic numbers, mainly to provide better error messages but
   180		// it should also help speed.
   181	
   182		if _, err := os.Stat(name); err != nil {
   183			// For testing, do not require file name to exist.
   184			if strings.Contains(b.addr2line, "testdata/") {
   185				return &fileAddr2Line{file: file{b: b, name: name}}, nil
   186			}
   187			return nil, err
   188		}
   189	
   190		// Read the first 4 bytes of the file.
   191	
   192		f, err := os.Open(name)
   193		if err != nil {
   194			return nil, fmt.Errorf("error opening %s: %v", name, err)
   195		}
   196		defer f.Close()
   197	
   198		var header [4]byte
   199		if _, err = io.ReadFull(f, header[:]); err != nil {
   200			return nil, fmt.Errorf("error reading magic number from %s: %v", name, err)
   201		}
   202	
   203		elfMagic := string(header[:])
   204	
   205		// Match against supported file types.
   206		if elfMagic == elf.ELFMAG {
   207			f, err := b.openELF(name, start, limit, offset)
   208			if err != nil {
   209				return nil, fmt.Errorf("error reading ELF file %s: %v", name, err)
   210			}
   211			return f, nil
   212		}
   213	
   214		// Mach-O magic numbers can be big or little endian.
   215		machoMagicLittle := binary.LittleEndian.Uint32(header[:])
   216		machoMagicBig := binary.BigEndian.Uint32(header[:])
   217	
   218		if machoMagicLittle == macho.Magic32 || machoMagicLittle == macho.Magic64 ||
   219			machoMagicBig == macho.Magic32 || machoMagicBig == macho.Magic64 {
   220			f, err := b.openMachO(name, start, limit, offset)
   221			if err != nil {
   222				return nil, fmt.Errorf("error reading Mach-O file %s: %v", name, err)
   223			}
   224			return f, nil
   225		}
   226		if machoMagicLittle == macho.MagicFat || machoMagicBig == macho.MagicFat {
   227			f, err := b.openFatMachO(name, start, limit, offset)
   228			if err != nil {
   229				return nil, fmt.Errorf("error reading fat Mach-O file %s: %v", name, err)
   230			}
   231			return f, nil
   232		}
   233	
   234		return nil, fmt.Errorf("unrecognized binary format: %s", name)
   235	}
   236	
   237	func (b *binrep) openMachOCommon(name string, of *macho.File, start, limit, offset uint64) (plugin.ObjFile, error) {
   238	
   239		// Subtract the load address of the __TEXT section. Usually 0 for shared
   240		// libraries or 0x100000000 for executables. You can check this value by
   241		// running `objdump -private-headers <file>`.
   242	
   243		textSegment := of.Segment("__TEXT")
   244		if textSegment == nil {
   245			return nil, fmt.Errorf("could not identify base for %s: no __TEXT segment", name)
   246		}
   247		if textSegment.Addr > start {
   248			return nil, fmt.Errorf("could not identify base for %s: __TEXT segment address (0x%x) > mapping start address (0x%x)",
   249				name, textSegment.Addr, start)
   250		}
   251	
   252		base := start - textSegment.Addr
   253	
   254		if b.fast || (!b.addr2lineFound && !b.llvmSymbolizerFound) {
   255			return &fileNM{file: file{b: b, name: name, base: base}}, nil
   256		}
   257		return &fileAddr2Line{file: file{b: b, name: name, base: base}}, nil
   258	}
   259	
   260	func (b *binrep) openFatMachO(name string, start, limit, offset uint64) (plugin.ObjFile, error) {
   261		of, err := macho.OpenFat(name)
   262		if err != nil {
   263			return nil, fmt.Errorf("error parsing %s: %v", name, err)
   264		}
   265		defer of.Close()
   266	
   267		if len(of.Arches) == 0 {
   268			return nil, fmt.Errorf("empty fat Mach-O file: %s", name)
   269		}
   270	
   271		var arch macho.Cpu
   272		// Use the host architecture.
   273		// TODO: This is not ideal because the host architecture may not be the one
   274		// that was profiled. E.g. an amd64 host can profile a 386 program.
   275		switch runtime.GOARCH {
   276		case "386":
   277			arch = macho.Cpu386
   278		case "amd64", "amd64p32":
   279			arch = macho.CpuAmd64
   280		case "arm", "armbe", "arm64", "arm64be":
   281			arch = macho.CpuArm
   282		case "ppc":
   283			arch = macho.CpuPpc
   284		case "ppc64", "ppc64le":
   285			arch = macho.CpuPpc64
   286		default:
   287			return nil, fmt.Errorf("unsupported host architecture for %s: %s", name, runtime.GOARCH)
   288		}
   289		for i := range of.Arches {
   290			if of.Arches[i].Cpu == arch {
   291				return b.openMachOCommon(name, of.Arches[i].File, start, limit, offset)
   292			}
   293		}
   294		return nil, fmt.Errorf("architecture not found in %s: %s", name, runtime.GOARCH)
   295	}
   296	
   297	func (b *binrep) openMachO(name string, start, limit, offset uint64) (plugin.ObjFile, error) {
   298		of, err := macho.Open(name)
   299		if err != nil {
   300			return nil, fmt.Errorf("error parsing %s: %v", name, err)
   301		}
   302		defer of.Close()
   303	
   304		return b.openMachOCommon(name, of, start, limit, offset)
   305	}
   306	
   307	func (b *binrep) openELF(name string, start, limit, offset uint64) (plugin.ObjFile, error) {
   308		ef, err := elf.Open(name)
   309		if err != nil {
   310			return nil, fmt.Errorf("error parsing %s: %v", name, err)
   311		}
   312		defer ef.Close()
   313	
   314		var stextOffset *uint64
   315		var pageAligned = func(addr uint64) bool { return addr%4096 == 0 }
   316		if strings.Contains(name, "vmlinux") || !pageAligned(start) || !pageAligned(limit) || !pageAligned(offset) {
   317			// Reading all Symbols is expensive, and we only rarely need it so
   318			// we don't want to do it every time. But if _stext happens to be
   319			// page-aligned but isn't the same as Vaddr, we would symbolize
   320			// wrong. So if the name the addresses aren't page aligned, or if
   321			// the name is "vmlinux" we read _stext. We can be wrong if: (1)
   322			// someone passes a kernel path that doesn't contain "vmlinux" AND
   323			// (2) _stext is page-aligned AND (3) _stext is not at Vaddr
   324			symbols, err := ef.Symbols()
   325			if err != nil && err != elf.ErrNoSymbols {
   326				return nil, err
   327			}
   328			for _, s := range symbols {
   329				if s.Name == "_stext" {
   330					// The kernel may use _stext as the mapping start address.
   331					stextOffset = &s.Value
   332					break
   333				}
   334			}
   335		}
   336	
   337		base, err := elfexec.GetBase(&ef.FileHeader, elfexec.FindTextProgHeader(ef), stextOffset, start, limit, offset)
   338		if err != nil {
   339			return nil, fmt.Errorf("could not identify base for %s: %v", name, err)
   340		}
   341	
   342		buildID := ""
   343		if f, err := os.Open(name); err == nil {
   344			if id, err := elfexec.GetBuildID(f); err == nil {
   345				buildID = fmt.Sprintf("%x", id)
   346			}
   347		}
   348		if b.fast || (!b.addr2lineFound && !b.llvmSymbolizerFound) {
   349			return &fileNM{file: file{b, name, base, buildID}}, nil
   350		}
   351		return &fileAddr2Line{file: file{b, name, base, buildID}}, nil
   352	}
   353	
   354	// file implements the binutils.ObjFile interface.
   355	type file struct {
   356		b       *binrep
   357		name    string
   358		base    uint64
   359		buildID string
   360	}
   361	
   362	func (f *file) Name() string {
   363		return f.name
   364	}
   365	
   366	func (f *file) Base() uint64 {
   367		return f.base
   368	}
   369	
   370	func (f *file) BuildID() string {
   371		return f.buildID
   372	}
   373	
   374	func (f *file) SourceLine(addr uint64) ([]plugin.Frame, error) {
   375		return []plugin.Frame{}, nil
   376	}
   377	
   378	func (f *file) Close() error {
   379		return nil
   380	}
   381	
   382	func (f *file) Symbols(r *regexp.Regexp, addr uint64) ([]*plugin.Sym, error) {
   383		// Get from nm a list of symbols sorted by address.
   384		cmd := exec.Command(f.b.nm, "-n", f.name)
   385		out, err := cmd.Output()
   386		if err != nil {
   387			return nil, fmt.Errorf("%v: %v", cmd.Args, err)
   388		}
   389	
   390		return findSymbols(out, f.name, r, addr)
   391	}
   392	
   393	// fileNM implements the binutils.ObjFile interface, using 'nm' to map
   394	// addresses to symbols (without file/line number information). It is
   395	// faster than fileAddr2Line.
   396	type fileNM struct {
   397		file
   398		addr2linernm *addr2LinerNM
   399	}
   400	
   401	func (f *fileNM) SourceLine(addr uint64) ([]plugin.Frame, error) {
   402		if f.addr2linernm == nil {
   403			addr2liner, err := newAddr2LinerNM(f.b.nm, f.name, f.base)
   404			if err != nil {
   405				return nil, err
   406			}
   407			f.addr2linernm = addr2liner
   408		}
   409		return f.addr2linernm.addrInfo(addr)
   410	}
   411	
   412	// fileAddr2Line implements the binutils.ObjFile interface, using
   413	// llvm-symbolizer, if that's available, or addr2line to map addresses to
   414	// symbols (with file/line number information). It can be slow for large
   415	// binaries with debug information.
   416	type fileAddr2Line struct {
   417		once sync.Once
   418		file
   419		addr2liner     *addr2Liner
   420		llvmSymbolizer *llvmSymbolizer
   421	}
   422	
   423	func (f *fileAddr2Line) SourceLine(addr uint64) ([]plugin.Frame, error) {
   424		f.once.Do(f.init)
   425		if f.llvmSymbolizer != nil {
   426			return f.llvmSymbolizer.addrInfo(addr)
   427		}
   428		if f.addr2liner != nil {
   429			return f.addr2liner.addrInfo(addr)
   430		}
   431		return nil, fmt.Errorf("could not find local addr2liner")
   432	}
   433	
   434	func (f *fileAddr2Line) init() {
   435		if llvmSymbolizer, err := newLLVMSymbolizer(f.b.llvmSymbolizer, f.name, f.base); err == nil {
   436			f.llvmSymbolizer = llvmSymbolizer
   437			return
   438		}
   439	
   440		if addr2liner, err := newAddr2Liner(f.b.addr2line, f.name, f.base); err == nil {
   441			f.addr2liner = addr2liner
   442	
   443			// When addr2line encounters some gcc compiled binaries, it
   444			// drops interesting parts of names in anonymous namespaces.
   445			// Fallback to NM for better function names.
   446			if nm, err := newAddr2LinerNM(f.b.nm, f.name, f.base); err == nil {
   447				f.addr2liner.nm = nm
   448			}
   449		}
   450	}
   451	
   452	func (f *fileAddr2Line) Close() error {
   453		if f.llvmSymbolizer != nil {
   454			f.llvmSymbolizer.rw.close()
   455			f.llvmSymbolizer = nil
   456		}
   457		if f.addr2liner != nil {
   458			f.addr2liner.rw.close()
   459			f.addr2liner = nil
   460		}
   461		return nil
   462	}
   463	

View as plain text