...

Source file src/pkg/cmd/link/internal/wasm/asm.go

     1	// Copyright 2018 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	package wasm
     6	
     7	import (
     8		"bytes"
     9		"cmd/internal/objabi"
    10		"cmd/link/internal/ld"
    11		"cmd/link/internal/sym"
    12		"io"
    13		"regexp"
    14		"runtime"
    15	)
    16	
    17	const (
    18		I32 = 0x7F
    19		I64 = 0x7E
    20		F32 = 0x7D
    21		F64 = 0x7C
    22	)
    23	
    24	const (
    25		sectionCustom   = 0
    26		sectionType     = 1
    27		sectionImport   = 2
    28		sectionFunction = 3
    29		sectionTable    = 4
    30		sectionMemory   = 5
    31		sectionGlobal   = 6
    32		sectionExport   = 7
    33		sectionStart    = 8
    34		sectionElement  = 9
    35		sectionCode     = 10
    36		sectionData     = 11
    37	)
    38	
    39	// funcValueOffset is the offset between the PC_F value of a function and the index of the function in WebAssembly
    40	const funcValueOffset = 0x1000 // TODO(neelance): make function addresses play nice with heap addresses
    41	
    42	func gentext(ctxt *ld.Link) {
    43	}
    44	
    45	type wasmFunc struct {
    46		Name string
    47		Type uint32
    48		Code []byte
    49	}
    50	
    51	type wasmFuncType struct {
    52		Params  []byte
    53		Results []byte
    54	}
    55	
    56	var wasmFuncTypes = map[string]*wasmFuncType{
    57		"_rt0_wasm_js":           {Params: []byte{}},                                         //
    58		"wasm_export_run":        {Params: []byte{I32, I32}},                                 // argc, argv
    59		"wasm_export_resume":     {Params: []byte{}},                                         //
    60		"wasm_export_getsp":      {Results: []byte{I32}},                                     // sp
    61		"wasm_pc_f_loop":         {Params: []byte{}},                                         //
    62		"runtime.wasmMove":       {Params: []byte{I32, I32, I32}},                            // dst, src, len
    63		"runtime.wasmZero":       {Params: []byte{I32, I32}},                                 // ptr, len
    64		"runtime.wasmDiv":        {Params: []byte{I64, I64}, Results: []byte{I64}},           // x, y -> x/y
    65		"runtime.wasmTruncS":     {Params: []byte{F64}, Results: []byte{I64}},                // x -> int(x)
    66		"runtime.wasmTruncU":     {Params: []byte{F64}, Results: []byte{I64}},                // x -> uint(x)
    67		"runtime.gcWriteBarrier": {Params: []byte{I64, I64}},                                 // ptr, val
    68		"cmpbody":                {Params: []byte{I64, I64, I64, I64}, Results: []byte{I64}}, // a, alen, b, blen -> -1/0/1
    69		"memeqbody":              {Params: []byte{I64, I64, I64}, Results: []byte{I64}},      // a, b, len -> 0/1
    70		"memcmp":                 {Params: []byte{I32, I32, I32}, Results: []byte{I32}},      // a, b, len -> <0/0/>0
    71		"memchr":                 {Params: []byte{I32, I32, I32}, Results: []byte{I32}},      // s, c, len -> index
    72	}
    73	
    74	func assignAddress(ctxt *ld.Link, sect *sym.Section, n int, s *sym.Symbol, va uint64, isTramp bool) (*sym.Section, int, uint64) {
    75		// WebAssembly functions do not live in the same address space as the linear memory.
    76		// Instead, WebAssembly automatically assigns indices. Imported functions (section "import")
    77		// have indices 0 to n. They are followed by native functions (sections "function" and "code")
    78		// with indices n+1 and following.
    79		//
    80		// The following rules describe how wasm handles function indices and addresses:
    81		//   PC_F = funcValueOffset + WebAssembly function index (not including the imports)
    82		//   s.Value = PC = PC_F<<16 + PC_B
    83		//
    84		// The funcValueOffset is necessary to avoid conflicts with expectations
    85		// that the Go runtime has about function addresses.
    86		// The field "s.Value" corresponds to the concept of PC at runtime.
    87		// However, there is no PC register, only PC_F and PC_B. PC_F denotes the function,
    88		// PC_B the resume point inside of that function. The entry of the function has PC_B = 0.
    89		s.Sect = sect
    90		s.Value = int64(funcValueOffset+va/ld.MINFUNC) << 16 // va starts at zero
    91		va += uint64(ld.MINFUNC)
    92		return sect, n, va
    93	}
    94	
    95	func asmb(ctxt *ld.Link) {} // dummy
    96	
    97	// asmb writes the final WebAssembly module binary.
    98	// Spec: https://webassembly.github.io/spec/core/binary/modules.html
    99	func asmb2(ctxt *ld.Link) {
   100		if ctxt.Debugvlog != 0 {
   101			ctxt.Logf("%5.2f asmb\n", ld.Cputime())
   102		}
   103	
   104		types := []*wasmFuncType{
   105			// For normal Go functions, the single parameter is PC_B,
   106			// the return value is
   107			// 0 if the function returned normally or
   108			// 1 if the stack needs to be unwound.
   109			{Params: []byte{I32}, Results: []byte{I32}},
   110		}
   111	
   112		// collect host imports (functions that get imported from the WebAssembly host, usually JavaScript)
   113		hostImports := []*wasmFunc{
   114			{
   115				Name: "debug",
   116				Type: lookupType(&wasmFuncType{Params: []byte{I32}}, &types),
   117			},
   118		}
   119		hostImportMap := make(map[*sym.Symbol]int64)
   120		for _, fn := range ctxt.Textp {
   121			for _, r := range fn.R {
   122				if r.Type == objabi.R_WASMIMPORT {
   123					hostImportMap[r.Sym] = int64(len(hostImports))
   124					hostImports = append(hostImports, &wasmFunc{
   125						Name: r.Sym.Name,
   126						Type: lookupType(&wasmFuncType{Params: []byte{I32}}, &types),
   127					})
   128				}
   129			}
   130		}
   131	
   132		// collect functions with WebAssembly body
   133		var buildid []byte
   134		fns := make([]*wasmFunc, len(ctxt.Textp))
   135		for i, fn := range ctxt.Textp {
   136			wfn := new(bytes.Buffer)
   137			if fn.Name == "go.buildid" {
   138				writeUleb128(wfn, 0) // number of sets of locals
   139				writeI32Const(wfn, 0)
   140				wfn.WriteByte(0x0b) // end
   141				buildid = fn.P
   142			} else {
   143				// Relocations have variable length, handle them here.
   144				off := int32(0)
   145				for _, r := range fn.R {
   146					wfn.Write(fn.P[off:r.Off])
   147					off = r.Off
   148					switch r.Type {
   149					case objabi.R_ADDR:
   150						writeSleb128(wfn, r.Sym.Value+r.Add)
   151					case objabi.R_CALL:
   152						writeSleb128(wfn, int64(len(hostImports))+r.Sym.Value>>16-funcValueOffset)
   153					case objabi.R_WASMIMPORT:
   154						writeSleb128(wfn, hostImportMap[r.Sym])
   155					default:
   156						ld.Errorf(fn, "bad reloc type %d (%s)", r.Type, sym.RelocName(ctxt.Arch, r.Type))
   157						continue
   158					}
   159				}
   160				wfn.Write(fn.P[off:])
   161			}
   162	
   163			typ := uint32(0)
   164			if sig, ok := wasmFuncTypes[fn.Name]; ok {
   165				typ = lookupType(sig, &types)
   166			}
   167	
   168			name := nameRegexp.ReplaceAllString(fn.Name, "_")
   169			fns[i] = &wasmFunc{Name: name, Type: typ, Code: wfn.Bytes()}
   170		}
   171	
   172		ctxt.Out.Write([]byte{0x00, 0x61, 0x73, 0x6d}) // magic
   173		ctxt.Out.Write([]byte{0x01, 0x00, 0x00, 0x00}) // version
   174	
   175		// Add any buildid early in the binary:
   176		if len(buildid) != 0 {
   177			writeBuildID(ctxt, buildid)
   178		}
   179	
   180		writeGoVersion(ctxt)
   181		writeTypeSec(ctxt, types)
   182		writeImportSec(ctxt, hostImports)
   183		writeFunctionSec(ctxt, fns)
   184		writeTableSec(ctxt, fns)
   185		writeMemorySec(ctxt)
   186		writeGlobalSec(ctxt)
   187		writeExportSec(ctxt, len(hostImports))
   188		writeElementSec(ctxt, uint64(len(hostImports)), uint64(len(fns)))
   189		writeCodeSec(ctxt, fns)
   190		writeDataSec(ctxt)
   191		if !*ld.FlagS {
   192			writeNameSec(ctxt, len(hostImports), fns)
   193		}
   194	
   195		ctxt.Out.Flush()
   196	}
   197	
   198	func lookupType(sig *wasmFuncType, types *[]*wasmFuncType) uint32 {
   199		for i, t := range *types {
   200			if bytes.Equal(sig.Params, t.Params) && bytes.Equal(sig.Results, t.Results) {
   201				return uint32(i)
   202			}
   203		}
   204		*types = append(*types, sig)
   205		return uint32(len(*types) - 1)
   206	}
   207	
   208	func writeSecHeader(ctxt *ld.Link, id uint8) int64 {
   209		ctxt.Out.WriteByte(id)
   210		sizeOffset := ctxt.Out.Offset()
   211		ctxt.Out.Write(make([]byte, 5)) // placeholder for length
   212		return sizeOffset
   213	}
   214	
   215	func writeSecSize(ctxt *ld.Link, sizeOffset int64) {
   216		endOffset := ctxt.Out.Offset()
   217		ctxt.Out.SeekSet(sizeOffset)
   218		writeUleb128FixedLength(ctxt.Out, uint64(endOffset-sizeOffset-5), 5)
   219		ctxt.Out.SeekSet(endOffset)
   220	}
   221	
   222	func writeBuildID(ctxt *ld.Link, buildid []byte) {
   223		sizeOffset := writeSecHeader(ctxt, sectionCustom)
   224		writeName(ctxt.Out, "go.buildid")
   225		ctxt.Out.Write(buildid)
   226		writeSecSize(ctxt, sizeOffset)
   227	}
   228	
   229	func writeGoVersion(ctxt *ld.Link) {
   230		sizeOffset := writeSecHeader(ctxt, sectionCustom)
   231		writeName(ctxt.Out, "go.version")
   232		ctxt.Out.Write([]byte(runtime.Version()))
   233		writeSecSize(ctxt, sizeOffset)
   234	}
   235	
   236	// writeTypeSec writes the section that declares all function types
   237	// so they can be referenced by index.
   238	func writeTypeSec(ctxt *ld.Link, types []*wasmFuncType) {
   239		sizeOffset := writeSecHeader(ctxt, sectionType)
   240	
   241		writeUleb128(ctxt.Out, uint64(len(types)))
   242	
   243		for _, t := range types {
   244			ctxt.Out.WriteByte(0x60) // functype
   245			writeUleb128(ctxt.Out, uint64(len(t.Params)))
   246			for _, v := range t.Params {
   247				ctxt.Out.WriteByte(byte(v))
   248			}
   249			writeUleb128(ctxt.Out, uint64(len(t.Results)))
   250			for _, v := range t.Results {
   251				ctxt.Out.WriteByte(byte(v))
   252			}
   253		}
   254	
   255		writeSecSize(ctxt, sizeOffset)
   256	}
   257	
   258	// writeImportSec writes the section that lists the functions that get
   259	// imported from the WebAssembly host, usually JavaScript.
   260	func writeImportSec(ctxt *ld.Link, hostImports []*wasmFunc) {
   261		sizeOffset := writeSecHeader(ctxt, sectionImport)
   262	
   263		writeUleb128(ctxt.Out, uint64(len(hostImports))) // number of imports
   264		for _, fn := range hostImports {
   265			writeName(ctxt.Out, "go") // provided by the import object in wasm_exec.js
   266			writeName(ctxt.Out, fn.Name)
   267			ctxt.Out.WriteByte(0x00) // func import
   268			writeUleb128(ctxt.Out, uint64(fn.Type))
   269		}
   270	
   271		writeSecSize(ctxt, sizeOffset)
   272	}
   273	
   274	// writeFunctionSec writes the section that declares the types of functions.
   275	// The bodies of these functions will later be provided in the "code" section.
   276	func writeFunctionSec(ctxt *ld.Link, fns []*wasmFunc) {
   277		sizeOffset := writeSecHeader(ctxt, sectionFunction)
   278	
   279		writeUleb128(ctxt.Out, uint64(len(fns)))
   280		for _, fn := range fns {
   281			writeUleb128(ctxt.Out, uint64(fn.Type))
   282		}
   283	
   284		writeSecSize(ctxt, sizeOffset)
   285	}
   286	
   287	// writeTableSec writes the section that declares tables. Currently there is only a single table
   288	// that is used by the CallIndirect operation to dynamically call any function.
   289	// The contents of the table get initialized by the "element" section.
   290	func writeTableSec(ctxt *ld.Link, fns []*wasmFunc) {
   291		sizeOffset := writeSecHeader(ctxt, sectionTable)
   292	
   293		numElements := uint64(funcValueOffset + len(fns))
   294		writeUleb128(ctxt.Out, 1)           // number of tables
   295		ctxt.Out.WriteByte(0x70)            // type: anyfunc
   296		ctxt.Out.WriteByte(0x00)            // no max
   297		writeUleb128(ctxt.Out, numElements) // min
   298	
   299		writeSecSize(ctxt, sizeOffset)
   300	}
   301	
   302	// writeMemorySec writes the section that declares linear memories. Currently one linear memory is being used.
   303	// Linear memory always starts at address zero. More memory can be requested with the GrowMemory instruction.
   304	func writeMemorySec(ctxt *ld.Link) {
   305		sizeOffset := writeSecHeader(ctxt, sectionMemory)
   306	
   307		const (
   308			initialSize  = 16 << 20 // 16MB, enough for runtime init without growing
   309			wasmPageSize = 64 << 10 // 64KB
   310		)
   311	
   312		writeUleb128(ctxt.Out, 1)                        // number of memories
   313		ctxt.Out.WriteByte(0x00)                         // no maximum memory size
   314		writeUleb128(ctxt.Out, initialSize/wasmPageSize) // minimum (initial) memory size
   315	
   316		writeSecSize(ctxt, sizeOffset)
   317	}
   318	
   319	// writeGlobalSec writes the section that declares global variables.
   320	func writeGlobalSec(ctxt *ld.Link) {
   321		sizeOffset := writeSecHeader(ctxt, sectionGlobal)
   322	
   323		globalRegs := []byte{
   324			I32, // 0: SP
   325			I64, // 1: CTXT
   326			I64, // 2: g
   327			I64, // 3: RET0
   328			I64, // 4: RET1
   329			I64, // 5: RET2
   330			I64, // 6: RET3
   331			I32, // 7: PAUSE
   332		}
   333	
   334		writeUleb128(ctxt.Out, uint64(len(globalRegs))) // number of globals
   335	
   336		for _, typ := range globalRegs {
   337			ctxt.Out.WriteByte(typ)
   338			ctxt.Out.WriteByte(0x01) // var
   339			switch typ {
   340			case I32:
   341				writeI32Const(ctxt.Out, 0)
   342			case I64:
   343				writeI64Const(ctxt.Out, 0)
   344			}
   345			ctxt.Out.WriteByte(0x0b) // end
   346		}
   347	
   348		writeSecSize(ctxt, sizeOffset)
   349	}
   350	
   351	// writeExportSec writes the section that declares exports.
   352	// Exports can be accessed by the WebAssembly host, usually JavaScript.
   353	// The wasm_export_* functions and the linear memory get exported.
   354	func writeExportSec(ctxt *ld.Link, lenHostImports int) {
   355		sizeOffset := writeSecHeader(ctxt, sectionExport)
   356	
   357		writeUleb128(ctxt.Out, 4) // number of exports
   358	
   359		for _, name := range []string{"run", "resume", "getsp"} {
   360			idx := uint32(lenHostImports) + uint32(ctxt.Syms.ROLookup("wasm_export_"+name, 0).Value>>16) - funcValueOffset
   361			writeName(ctxt.Out, name)           // inst.exports.run/resume/getsp in wasm_exec.js
   362			ctxt.Out.WriteByte(0x00)            // func export
   363			writeUleb128(ctxt.Out, uint64(idx)) // funcidx
   364		}
   365	
   366		writeName(ctxt.Out, "mem") // inst.exports.mem in wasm_exec.js
   367		ctxt.Out.WriteByte(0x02)   // mem export
   368		writeUleb128(ctxt.Out, 0)  // memidx
   369	
   370		writeSecSize(ctxt, sizeOffset)
   371	}
   372	
   373	// writeElementSec writes the section that initializes the tables declared by the "table" section.
   374	// The table for CallIndirect gets initialized in a very simple way so that each table index (PC_F value)
   375	// maps linearly to the function index (numImports + PC_F).
   376	func writeElementSec(ctxt *ld.Link, numImports, numFns uint64) {
   377		sizeOffset := writeSecHeader(ctxt, sectionElement)
   378	
   379		writeUleb128(ctxt.Out, 1) // number of element segments
   380	
   381		writeUleb128(ctxt.Out, 0) // tableidx
   382		writeI32Const(ctxt.Out, funcValueOffset)
   383		ctxt.Out.WriteByte(0x0b) // end
   384	
   385		writeUleb128(ctxt.Out, numFns) // number of entries
   386		for i := uint64(0); i < numFns; i++ {
   387			writeUleb128(ctxt.Out, numImports+i)
   388		}
   389	
   390		writeSecSize(ctxt, sizeOffset)
   391	}
   392	
   393	// writeElementSec writes the section that provides the function bodies for the functions
   394	// declared by the "func" section.
   395	func writeCodeSec(ctxt *ld.Link, fns []*wasmFunc) {
   396		sizeOffset := writeSecHeader(ctxt, sectionCode)
   397	
   398		writeUleb128(ctxt.Out, uint64(len(fns))) // number of code entries
   399		for _, fn := range fns {
   400			writeUleb128(ctxt.Out, uint64(len(fn.Code)))
   401			ctxt.Out.Write(fn.Code)
   402		}
   403	
   404		writeSecSize(ctxt, sizeOffset)
   405	}
   406	
   407	// writeDataSec writes the section that provides data that will be used to initialize the linear memory.
   408	func writeDataSec(ctxt *ld.Link) {
   409		sizeOffset := writeSecHeader(ctxt, sectionData)
   410	
   411		sections := []*sym.Section{
   412			ctxt.Syms.Lookup("runtime.rodata", 0).Sect,
   413			ctxt.Syms.Lookup("runtime.typelink", 0).Sect,
   414			ctxt.Syms.Lookup("runtime.itablink", 0).Sect,
   415			ctxt.Syms.Lookup("runtime.symtab", 0).Sect,
   416			ctxt.Syms.Lookup("runtime.pclntab", 0).Sect,
   417			ctxt.Syms.Lookup("runtime.noptrdata", 0).Sect,
   418			ctxt.Syms.Lookup("runtime.data", 0).Sect,
   419		}
   420	
   421		type dataSegment struct {
   422			offset int32
   423			data   []byte
   424		}
   425	
   426		// Omit blocks of zeroes and instead emit data segments with offsets skipping the zeroes.
   427		// This reduces the size of the WebAssembly binary. We use 8 bytes as an estimate for the
   428		// overhead of adding a new segment (same as wasm-opt's memory-packing optimization uses).
   429		const segmentOverhead = 8
   430	
   431		// Generate at most this many segments. A higher number of segments gets rejected by some WebAssembly runtimes.
   432		const maxNumSegments = 100000
   433	
   434		var segments []*dataSegment
   435		for secIndex, sec := range sections {
   436			data := ld.DatblkBytes(ctxt, int64(sec.Vaddr), int64(sec.Length))
   437			offset := int32(sec.Vaddr)
   438	
   439			// skip leading zeroes
   440			for len(data) > 0 && data[0] == 0 {
   441				data = data[1:]
   442				offset++
   443			}
   444	
   445			for len(data) > 0 {
   446				dataLen := int32(len(data))
   447				var segmentEnd, zeroEnd int32
   448				if len(segments)+(len(sections)-secIndex) == maxNumSegments {
   449					segmentEnd = dataLen
   450					zeroEnd = dataLen
   451				} else {
   452					for {
   453						// look for beginning of zeroes
   454						for segmentEnd < dataLen && data[segmentEnd] != 0 {
   455							segmentEnd++
   456						}
   457						// look for end of zeroes
   458						zeroEnd = segmentEnd
   459						for zeroEnd < dataLen && data[zeroEnd] == 0 {
   460							zeroEnd++
   461						}
   462						// emit segment if omitting zeroes reduces the output size
   463						if zeroEnd-segmentEnd >= segmentOverhead || zeroEnd == dataLen {
   464							break
   465						}
   466						segmentEnd = zeroEnd
   467					}
   468				}
   469	
   470				segments = append(segments, &dataSegment{
   471					offset: offset,
   472					data:   data[:segmentEnd],
   473				})
   474				data = data[zeroEnd:]
   475				offset += zeroEnd
   476			}
   477		}
   478	
   479		writeUleb128(ctxt.Out, uint64(len(segments))) // number of data entries
   480		for _, seg := range segments {
   481			writeUleb128(ctxt.Out, 0) // memidx
   482			writeI32Const(ctxt.Out, seg.offset)
   483			ctxt.Out.WriteByte(0x0b) // end
   484			writeUleb128(ctxt.Out, uint64(len(seg.data)))
   485			ctxt.Out.Write(seg.data)
   486		}
   487	
   488		writeSecSize(ctxt, sizeOffset)
   489	}
   490	
   491	var nameRegexp = regexp.MustCompile(`[^\w\.]`)
   492	
   493	// writeNameSec writes an optional section that assigns names to the functions declared by the "func" section.
   494	// The names are only used by WebAssembly stack traces, debuggers and decompilers.
   495	// TODO(neelance): add symbol table of DATA symbols
   496	func writeNameSec(ctxt *ld.Link, firstFnIndex int, fns []*wasmFunc) {
   497		sizeOffset := writeSecHeader(ctxt, sectionCustom)
   498		writeName(ctxt.Out, "name")
   499	
   500		sizeOffset2 := writeSecHeader(ctxt, 0x01) // function names
   501		writeUleb128(ctxt.Out, uint64(len(fns)))
   502		for i, fn := range fns {
   503			writeUleb128(ctxt.Out, uint64(firstFnIndex+i))
   504			writeName(ctxt.Out, fn.Name)
   505		}
   506		writeSecSize(ctxt, sizeOffset2)
   507	
   508		writeSecSize(ctxt, sizeOffset)
   509	}
   510	
   511	type nameWriter interface {
   512		io.ByteWriter
   513		io.Writer
   514	}
   515	
   516	func writeI32Const(w io.ByteWriter, v int32) {
   517		w.WriteByte(0x41) // i32.const
   518		writeSleb128(w, int64(v))
   519	}
   520	
   521	func writeI64Const(w io.ByteWriter, v int64) {
   522		w.WriteByte(0x42) // i64.const
   523		writeSleb128(w, v)
   524	}
   525	
   526	func writeName(w nameWriter, name string) {
   527		writeUleb128(w, uint64(len(name)))
   528		w.Write([]byte(name))
   529	}
   530	
   531	func writeUleb128(w io.ByteWriter, v uint64) {
   532		more := true
   533		for more {
   534			c := uint8(v & 0x7f)
   535			v >>= 7
   536			more = v != 0
   537			if more {
   538				c |= 0x80
   539			}
   540			w.WriteByte(c)
   541		}
   542	}
   543	
   544	func writeUleb128FixedLength(w io.ByteWriter, v uint64, length int) {
   545		for i := 0; i < length; i++ {
   546			c := uint8(v & 0x7f)
   547			v >>= 7
   548			if i < length-1 {
   549				c |= 0x80
   550			}
   551			w.WriteByte(c)
   552		}
   553		if v != 0 {
   554			panic("writeUleb128FixedLength: length too small")
   555		}
   556	}
   557	
   558	func writeSleb128(w io.ByteWriter, v int64) {
   559		more := true
   560		for more {
   561			c := uint8(v & 0x7f)
   562			s := uint8(v & 0x40)
   563			v >>= 7
   564			more = !((v == 0 && s == 0) || (v == -1 && s != 0))
   565			if more {
   566				c |= 0x80
   567			}
   568			w.WriteByte(c)
   569		}
   570	}
   571	

View as plain text