Source file src/cmd/internal/goobj/read.go

     1	// Copyright 2013 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	// Package goobj implements reading of Go object files and archives.
     6	//
     7	// TODO(rsc): Decide where this package should live. (golang.org/issue/6932)
     8	// TODO(rsc): Decide the appropriate integer types for various fields.
     9	package goobj
    10	
    11	import (
    12		"bufio"
    13		"bytes"
    14		"cmd/internal/objabi"
    15		"errors"
    16		"fmt"
    17		"io"
    18		"os"
    19		"strconv"
    20		"strings"
    21	)
    22	
    23	// A Sym is a named symbol in an object file.
    24	type Sym struct {
    25		SymID                // symbol identifier (name and version)
    26		Kind  objabi.SymKind // kind of symbol
    27		DupOK bool           // are duplicate definitions okay?
    28		Size  int64          // size of corresponding data
    29		Type  SymID          // symbol for Go type information
    30		Data  Data           // memory image of symbol
    31		Reloc []Reloc        // relocations to apply to Data
    32		Func  *Func          // additional data for functions
    33	}
    34	
    35	// A SymID - the combination of Name and Version - uniquely identifies
    36	// a symbol within a package.
    37	type SymID struct {
    38		// Name is the name of a symbol.
    39		Name string
    40	
    41		// Version is zero for symbols with global visibility.
    42		// Symbols with only file visibility (such as file-level static
    43		// declarations in C) have a non-zero version distinguishing
    44		// a symbol in one file from a symbol of the same name
    45		// in another file
    46		Version int64
    47	}
    48	
    49	func (s SymID) String() string {
    50		if s.Version == 0 {
    51			return s.Name
    52		}
    53		return fmt.Sprintf("%s<%d>", s.Name, s.Version)
    54	}
    55	
    56	// A Data is a reference to data stored in an object file.
    57	// It records the offset and size of the data, so that a client can
    58	// read the data only if necessary.
    59	type Data struct {
    60		Offset int64
    61		Size   int64
    62	}
    63	
    64	// A Reloc describes a relocation applied to a memory image to refer
    65	// to an address within a particular symbol.
    66	type Reloc struct {
    67		// The bytes at [Offset, Offset+Size) within the containing Sym
    68		// should be updated to refer to the address Add bytes after the start
    69		// of the symbol Sym.
    70		Offset int64
    71		Size   int64
    72		Sym    SymID
    73		Add    int64
    74	
    75		// The Type records the form of address expected in the bytes
    76		// described by the previous fields: absolute, PC-relative, and so on.
    77		// TODO(rsc): The interpretation of Type is not exposed by this package.
    78		Type objabi.RelocType
    79	}
    80	
    81	// A Var describes a variable in a function stack frame: a declared
    82	// local variable, an input argument, or an output result.
    83	type Var struct {
    84		// The combination of Name, Kind, and Offset uniquely
    85		// identifies a variable in a function stack frame.
    86		// Using fewer of these - in particular, using only Name - does not.
    87		Name   string // Name of variable.
    88		Kind   int64  // TODO(rsc): Define meaning.
    89		Offset int64  // Frame offset. TODO(rsc): Define meaning.
    90	
    91		Type SymID // Go type for variable.
    92	}
    93	
    94	// Func contains additional per-symbol information specific to functions.
    95	type Func struct {
    96		Args     int64      // size in bytes of argument frame: inputs and outputs
    97		Frame    int64      // size in bytes of local variable frame
    98		Leaf     bool       // function omits save of link register (ARM)
    99		NoSplit  bool       // function omits stack split prologue
   100		TopFrame bool       // function is the top of the call stack
   101		Var      []Var      // detail about local variables
   102		PCSP     Data       // PC → SP offset map
   103		PCFile   Data       // PC → file number map (index into File)
   104		PCLine   Data       // PC → line number map
   105		PCInline Data       // PC → inline tree index map
   106		PCData   []Data     // PC → runtime support data map
   107		FuncData []FuncData // non-PC-specific runtime support data
   108		File     []string   // paths indexed by PCFile
   109		InlTree  []InlinedCall
   110	}
   111	
   112	// TODO: Add PCData []byte and PCDataIter (similar to liblink).
   113	
   114	// A FuncData is a single function-specific data value.
   115	type FuncData struct {
   116		Sym    SymID // symbol holding data
   117		Offset int64 // offset into symbol for funcdata pointer
   118	}
   119	
   120	// An InlinedCall is a node in an InlTree.
   121	// See cmd/internal/obj.InlTree for details.
   122	type InlinedCall struct {
   123		Parent   int64
   124		File     string
   125		Line     int64
   126		Func     SymID
   127		ParentPC int64
   128	}
   129	
   130	// A Package is a parsed Go object file or archive defining a Go package.
   131	type Package struct {
   132		ImportPath string          // import path denoting this package
   133		Imports    []string        // packages imported by this package
   134		SymRefs    []SymID         // list of symbol names and versions referred to by this pack
   135		Syms       []*Sym          // symbols defined by this package
   136		MaxVersion int64           // maximum Version in any SymID in Syms
   137		Arch       string          // architecture
   138		Native     []*NativeReader // native object data (e.g. ELF)
   139	}
   140	
   141	type NativeReader struct {
   142		Name string
   143		io.ReaderAt
   144	}
   145	
   146	var (
   147		archiveHeader = []byte("!<arch>\n")
   148		archiveMagic  = []byte("`\n")
   149		goobjHeader   = []byte("go objec") // truncated to size of archiveHeader
   150	
   151		errCorruptArchive   = errors.New("corrupt archive")
   152		errTruncatedArchive = errors.New("truncated archive")
   153		errCorruptObject    = errors.New("corrupt object file")
   154		errNotObject        = errors.New("unrecognized object file format")
   155	)
   156	
   157	// An objReader is an object file reader.
   158	type objReader struct {
   159		p          *Package
   160		b          *bufio.Reader
   161		f          *os.File
   162		err        error
   163		offset     int64
   164		dataOffset int64
   165		limit      int64
   166		tmp        [256]byte
   167		pkgprefix  string
   168	}
   169	
   170	// init initializes r to read package p from f.
   171	func (r *objReader) init(f *os.File, p *Package) {
   172		r.f = f
   173		r.p = p
   174		r.offset, _ = f.Seek(0, io.SeekCurrent)
   175		r.limit, _ = f.Seek(0, io.SeekEnd)
   176		f.Seek(r.offset, io.SeekStart)
   177		r.b = bufio.NewReader(f)
   178		r.pkgprefix = objabi.PathToPrefix(p.ImportPath) + "."
   179	}
   180	
   181	// error records that an error occurred.
   182	// It returns only the first error, so that an error
   183	// caused by an earlier error does not discard information
   184	// about the earlier error.
   185	func (r *objReader) error(err error) error {
   186		if r.err == nil {
   187			if err == io.EOF {
   188				err = io.ErrUnexpectedEOF
   189			}
   190			r.err = err
   191		}
   192		// panic("corrupt") // useful for debugging
   193		return r.err
   194	}
   195	
   196	// peek returns the next n bytes without advancing the reader.
   197	func (r *objReader) peek(n int) ([]byte, error) {
   198		if r.err != nil {
   199			return nil, r.err
   200		}
   201		if r.offset >= r.limit {
   202			r.error(io.ErrUnexpectedEOF)
   203			return nil, r.err
   204		}
   205		b, err := r.b.Peek(n)
   206		if err != nil {
   207			if err != bufio.ErrBufferFull {
   208				r.error(err)
   209			}
   210		}
   211		return b, err
   212	}
   213	
   214	// readByte reads and returns a byte from the input file.
   215	// On I/O error or EOF, it records the error but returns byte 0.
   216	// A sequence of 0 bytes will eventually terminate any
   217	// parsing state in the object file. In particular, it ends the
   218	// reading of a varint.
   219	func (r *objReader) readByte() byte {
   220		if r.err != nil {
   221			return 0
   222		}
   223		if r.offset >= r.limit {
   224			r.error(io.ErrUnexpectedEOF)
   225			return 0
   226		}
   227		b, err := r.b.ReadByte()
   228		if err != nil {
   229			if err == io.EOF {
   230				err = io.ErrUnexpectedEOF
   231			}
   232			r.error(err)
   233			b = 0
   234		} else {
   235			r.offset++
   236		}
   237		return b
   238	}
   239	
   240	// read reads exactly len(b) bytes from the input file.
   241	// If an error occurs, read returns the error but also
   242	// records it, so it is safe for callers to ignore the result
   243	// as long as delaying the report is not a problem.
   244	func (r *objReader) readFull(b []byte) error {
   245		if r.err != nil {
   246			return r.err
   247		}
   248		if r.offset+int64(len(b)) > r.limit {
   249			return r.error(io.ErrUnexpectedEOF)
   250		}
   251		n, err := io.ReadFull(r.b, b)
   252		r.offset += int64(n)
   253		if err != nil {
   254			return r.error(err)
   255		}
   256		return nil
   257	}
   258	
   259	// readInt reads a zigzag varint from the input file.
   260	func (r *objReader) readInt() int64 {
   261		var u uint64
   262	
   263		for shift := uint(0); ; shift += 7 {
   264			if shift >= 64 {
   265				r.error(errCorruptObject)
   266				return 0
   267			}
   268			c := r.readByte()
   269			u |= uint64(c&0x7F) << shift
   270			if c&0x80 == 0 {
   271				break
   272			}
   273		}
   274	
   275		return int64(u>>1) ^ (int64(u) << 63 >> 63)
   276	}
   277	
   278	// readString reads a length-delimited string from the input file.
   279	func (r *objReader) readString() string {
   280		n := r.readInt()
   281		buf := make([]byte, n)
   282		r.readFull(buf)
   283		return string(buf)
   284	}
   285	
   286	// readSymID reads a SymID from the input file.
   287	func (r *objReader) readSymID() SymID {
   288		i := r.readInt()
   289		return r.p.SymRefs[i]
   290	}
   291	
   292	func (r *objReader) readRef() {
   293		name, abiOrStatic := r.readString(), r.readInt()
   294	
   295		// In a symbol name in an object file, "". denotes the
   296		// prefix for the package in which the object file has been found.
   297		// Expand it.
   298		name = strings.ReplaceAll(name, `"".`, r.pkgprefix)
   299	
   300		// The ABI field records either the ABI or -1 for static symbols.
   301		//
   302		// To distinguish different static symbols with the same name,
   303		// we use the symbol "version". Version 0 corresponds to
   304		// global symbols, and each file has a unique version > 0 for
   305		// all of its static symbols. The version is incremented on
   306		// each call to parseObject.
   307		//
   308		// For global symbols, we currently ignore the ABI.
   309		//
   310		// TODO(austin): Record the ABI in SymID. Since this is a
   311		// public API, we'll have to keep Version as 0 and record the
   312		// ABI in a new field (which differs from how the linker does
   313		// this, but that's okay). Show the ABI in things like
   314		// objdump.
   315		var vers int64
   316		if abiOrStatic == -1 {
   317			// Static symbol
   318			vers = r.p.MaxVersion
   319		}
   320		r.p.SymRefs = append(r.p.SymRefs, SymID{name, vers})
   321	}
   322	
   323	// readData reads a data reference from the input file.
   324	func (r *objReader) readData() Data {
   325		n := r.readInt()
   326		d := Data{Offset: r.dataOffset, Size: n}
   327		r.dataOffset += n
   328		return d
   329	}
   330	
   331	// skip skips n bytes in the input.
   332	func (r *objReader) skip(n int64) {
   333		if n < 0 {
   334			r.error(fmt.Errorf("debug/goobj: internal error: misuse of skip"))
   335		}
   336		if n < int64(len(r.tmp)) {
   337			// Since the data is so small, a just reading from the buffered
   338			// reader is better than flushing the buffer and seeking.
   339			r.readFull(r.tmp[:n])
   340		} else if n <= int64(r.b.Buffered()) {
   341			// Even though the data is not small, it has already been read.
   342			// Advance the buffer instead of seeking.
   343			for n > int64(len(r.tmp)) {
   344				r.readFull(r.tmp[:])
   345				n -= int64(len(r.tmp))
   346			}
   347			r.readFull(r.tmp[:n])
   348		} else {
   349			// Seek, giving up buffered data.
   350			_, err := r.f.Seek(r.offset+n, io.SeekStart)
   351			if err != nil {
   352				r.error(err)
   353			}
   354			r.offset += n
   355			r.b.Reset(r.f)
   356		}
   357	}
   358	
   359	// Parse parses an object file or archive from f,
   360	// assuming that its import path is pkgpath.
   361	func Parse(f *os.File, pkgpath string) (*Package, error) {
   362		if pkgpath == "" {
   363			pkgpath = `""`
   364		}
   365		p := new(Package)
   366		p.ImportPath = pkgpath
   367	
   368		var rd objReader
   369		rd.init(f, p)
   370		err := rd.readFull(rd.tmp[:8])
   371		if err != nil {
   372			if err == io.EOF {
   373				err = io.ErrUnexpectedEOF
   374			}
   375			return nil, err
   376		}
   377	
   378		switch {
   379		default:
   380			return nil, errNotObject
   381	
   382		case bytes.Equal(rd.tmp[:8], archiveHeader):
   383			if err := rd.parseArchive(); err != nil {
   384				return nil, err
   385			}
   386		case bytes.Equal(rd.tmp[:8], goobjHeader):
   387			if err := rd.parseObject(goobjHeader); err != nil {
   388				return nil, err
   389			}
   390		}
   391	
   392		return p, nil
   393	}
   394	
   395	// trimSpace removes trailing spaces from b and returns the corresponding string.
   396	// This effectively parses the form used in archive headers.
   397	func trimSpace(b []byte) string {
   398		return string(bytes.TrimRight(b, " "))
   399	}
   400	
   401	// parseArchive parses a Unix archive of Go object files.
   402	func (r *objReader) parseArchive() error {
   403		for r.offset < r.limit {
   404			if err := r.readFull(r.tmp[:60]); err != nil {
   405				return err
   406			}
   407			data := r.tmp[:60]
   408	
   409			// Each file is preceded by this text header (slice indices in first column):
   410			//	 0:16	name
   411			//	16:28 date
   412			//	28:34 uid
   413			//	34:40 gid
   414			//	40:48 mode
   415			//	48:58 size
   416			//	58:60 magic - `\n
   417			// We only care about name, size, and magic.
   418			// The fields are space-padded on the right.
   419			// The size is in decimal.
   420			// The file data - size bytes - follows the header.
   421			// Headers are 2-byte aligned, so if size is odd, an extra padding
   422			// byte sits between the file data and the next header.
   423			// The file data that follows is padded to an even number of bytes:
   424			// if size is odd, an extra padding byte is inserted betw the next header.
   425			if len(data) < 60 {
   426				return errTruncatedArchive
   427			}
   428			if !bytes.Equal(data[58:60], archiveMagic) {
   429				return errCorruptArchive
   430			}
   431			name := trimSpace(data[0:16])
   432			size, err := strconv.ParseInt(trimSpace(data[48:58]), 10, 64)
   433			if err != nil {
   434				return errCorruptArchive
   435			}
   436			data = data[60:]
   437			fsize := size + size&1
   438			if fsize < 0 || fsize < size {
   439				return errCorruptArchive
   440			}
   441			switch name {
   442			case "__.PKGDEF":
   443				r.skip(size)
   444			default:
   445				oldLimit := r.limit
   446				r.limit = r.offset + size
   447	
   448				p, err := r.peek(8)
   449				if err != nil {
   450					return err
   451				}
   452				if bytes.Equal(p, goobjHeader) {
   453					if err := r.parseObject(nil); err != nil {
   454						return fmt.Errorf("parsing archive member %q: %v", name, err)
   455					}
   456				} else {
   457					r.p.Native = append(r.p.Native, &NativeReader{
   458						Name:     name,
   459						ReaderAt: io.NewSectionReader(r.f, r.offset, size),
   460					})
   461				}
   462	
   463				r.skip(r.limit - r.offset)
   464				r.limit = oldLimit
   465			}
   466			if size&1 != 0 {
   467				r.skip(1)
   468			}
   469		}
   470		return nil
   471	}
   472	
   473	// parseObject parses a single Go object file.
   474	// The prefix is the bytes already read from the file,
   475	// typically in order to detect that this is an object file.
   476	// The object file consists of a textual header ending in "\n!\n"
   477	// and then the part we want to parse begins.
   478	// The format of that part is defined in a comment at the top
   479	// of src/liblink/objfile.c.
   480	func (r *objReader) parseObject(prefix []byte) error {
   481		r.p.MaxVersion++
   482		h := make([]byte, 0, 256)
   483		h = append(h, prefix...)
   484		var c1, c2, c3 byte
   485		for {
   486			c1, c2, c3 = c2, c3, r.readByte()
   487			h = append(h, c3)
   488			// The new export format can contain 0 bytes.
   489			// Don't consider them errors, only look for r.err != nil.
   490			if r.err != nil {
   491				return errCorruptObject
   492			}
   493			if c1 == '\n' && c2 == '!' && c3 == '\n' {
   494				break
   495			}
   496		}
   497	
   498		hs := strings.Fields(string(h))
   499		if len(hs) >= 4 {
   500			r.p.Arch = hs[3]
   501		}
   502		// TODO: extract OS + build ID if/when we need it
   503	
   504		r.readFull(r.tmp[:8])
   505		if !bytes.Equal(r.tmp[:8], []byte("\x00go112ld")) {
   506			return r.error(errCorruptObject)
   507		}
   508	
   509		b := r.readByte()
   510		if b != 1 {
   511			return r.error(errCorruptObject)
   512		}
   513	
   514		// Direct package dependencies.
   515		for {
   516			s := r.readString()
   517			if s == "" {
   518				break
   519			}
   520			r.p.Imports = append(r.p.Imports, s)
   521		}
   522	
   523		r.p.SymRefs = []SymID{{"", 0}}
   524		for {
   525			if b := r.readByte(); b != 0xfe {
   526				if b != 0xff {
   527					return r.error(errCorruptObject)
   528				}
   529				break
   530			}
   531	
   532			r.readRef()
   533		}
   534	
   535		dataLength := r.readInt()
   536		r.readInt() // n relocations - ignore
   537		r.readInt() // n pcdata - ignore
   538		r.readInt() // n autom - ignore
   539		r.readInt() // n funcdata - ignore
   540		r.readInt() // n files - ignore
   541	
   542		r.dataOffset = r.offset
   543		r.skip(dataLength)
   544	
   545		// Symbols.
   546		for {
   547			if b := r.readByte(); b != 0xfe {
   548				if b != 0xff {
   549					return r.error(errCorruptObject)
   550				}
   551				break
   552			}
   553	
   554			typ := r.readByte()
   555			s := &Sym{SymID: r.readSymID()}
   556			r.p.Syms = append(r.p.Syms, s)
   557			s.Kind = objabi.SymKind(typ)
   558			flags := r.readInt()
   559			s.DupOK = flags&1 != 0
   560			s.Size = r.readInt()
   561			s.Type = r.readSymID()
   562			s.Data = r.readData()
   563			s.Reloc = make([]Reloc, r.readInt())
   564			for i := range s.Reloc {
   565				rel := &s.Reloc[i]
   566				rel.Offset = r.readInt()
   567				rel.Size = r.readInt()
   568				rel.Type = objabi.RelocType(r.readInt())
   569				rel.Add = r.readInt()
   570				rel.Sym = r.readSymID()
   571			}
   572	
   573			if s.Kind == objabi.STEXT {
   574				f := new(Func)
   575				s.Func = f
   576				f.Args = r.readInt()
   577				f.Frame = r.readInt()
   578				flags := r.readInt()
   579				f.Leaf = flags&(1<<0) != 0
   580				f.TopFrame = flags&(1<<4) != 0
   581				f.NoSplit = r.readInt() != 0
   582				f.Var = make([]Var, r.readInt())
   583				for i := range f.Var {
   584					v := &f.Var[i]
   585					v.Name = r.readSymID().Name
   586					v.Offset = r.readInt()
   587					v.Kind = r.readInt()
   588					v.Type = r.readSymID()
   589				}
   590	
   591				f.PCSP = r.readData()
   592				f.PCFile = r.readData()
   593				f.PCLine = r.readData()
   594				f.PCInline = r.readData()
   595				f.PCData = make([]Data, r.readInt())
   596				for i := range f.PCData {
   597					f.PCData[i] = r.readData()
   598				}
   599				f.FuncData = make([]FuncData, r.readInt())
   600				for i := range f.FuncData {
   601					f.FuncData[i].Sym = r.readSymID()
   602				}
   603				for i := range f.FuncData {
   604					f.FuncData[i].Offset = r.readInt() // TODO
   605				}
   606				f.File = make([]string, r.readInt())
   607				for i := range f.File {
   608					f.File[i] = r.readSymID().Name
   609				}
   610				f.InlTree = make([]InlinedCall, r.readInt())
   611				for i := range f.InlTree {
   612					f.InlTree[i].Parent = r.readInt()
   613					f.InlTree[i].File = r.readSymID().Name
   614					f.InlTree[i].Line = r.readInt()
   615					f.InlTree[i].Func = r.readSymID()
   616					f.InlTree[i].ParentPC = r.readInt()
   617				}
   618			}
   619		}
   620	
   621		r.readFull(r.tmp[:7])
   622		if !bytes.Equal(r.tmp[:7], []byte("go112ld")) {
   623			return r.error(errCorruptObject)
   624		}
   625	
   626		return nil
   627	}
   628	
   629	func (r *Reloc) String(insnOffset uint64) string {
   630		delta := r.Offset - int64(insnOffset)
   631		s := fmt.Sprintf("[%d:%d]%s", delta, delta+r.Size, r.Type)
   632		if r.Sym.Name != "" {
   633			if r.Add != 0 {
   634				return fmt.Sprintf("%s:%s+%d", s, r.Sym.Name, r.Add)
   635			}
   636			return fmt.Sprintf("%s:%s", s, r.Sym.Name)
   637		}
   638		if r.Add != 0 {
   639			return fmt.Sprintf("%s:%d", s, r.Add)
   640		}
   641		return s
   642	}
   643
View as plain text