...

Source file src/runtime/mkduff.go

     1	// Copyright 2015 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	// +build ignore
     6	
     7	// runtime·duffzero is a Duff's device for zeroing memory.
     8	// The compiler jumps to computed addresses within
     9	// the routine to zero chunks of memory.
    10	// Do not change duffzero without also
    11	// changing the uses in cmd/compile/internal/*/*.go.
    12	
    13	// runtime·duffcopy is a Duff's device for copying memory.
    14	// The compiler jumps to computed addresses within
    15	// the routine to copy chunks of memory.
    16	// Source and destination must not overlap.
    17	// Do not change duffcopy without also
    18	// changing the uses in cmd/compile/internal/*/*.go.
    19	
    20	// See the zero* and copy* generators below
    21	// for architecture-specific comments.
    22	
    23	// mkduff generates duff_*.s.
    24	package main
    25	
    26	import (
    27		"bytes"
    28		"fmt"
    29		"io"
    30		"io/ioutil"
    31		"log"
    32	)
    33	
    34	func main() {
    35		gen("amd64", notags, zeroAMD64, copyAMD64)
    36		gen("386", notags, zero386, copy386)
    37		gen("arm", notags, zeroARM, copyARM)
    38		gen("arm64", notags, zeroARM64, copyARM64)
    39		gen("ppc64x", tagsPPC64x, zeroPPC64x, copyPPC64x)
    40		gen("mips64x", tagsMIPS64x, zeroMIPS64x, copyMIPS64x)
    41	}
    42	
    43	func gen(arch string, tags, zero, copy func(io.Writer)) {
    44		var buf bytes.Buffer
    45	
    46		fmt.Fprintln(&buf, "// Code generated by mkduff.go; DO NOT EDIT.")
    47		fmt.Fprintln(&buf, "// Run go generate from src/runtime to update.")
    48		fmt.Fprintln(&buf, "// See mkduff.go for comments.")
    49		tags(&buf)
    50		fmt.Fprintln(&buf, "#include \"textflag.h\"")
    51		fmt.Fprintln(&buf)
    52		zero(&buf)
    53		fmt.Fprintln(&buf)
    54		copy(&buf)
    55	
    56		if err := ioutil.WriteFile("duff_"+arch+".s", buf.Bytes(), 0644); err != nil {
    57			log.Fatalln(err)
    58		}
    59	}
    60	
    61	func notags(w io.Writer) { fmt.Fprintln(w) }
    62	
    63	func zeroAMD64(w io.Writer) {
    64		// X0: zero
    65		// DI: ptr to memory to be zeroed
    66		// DI is updated as a side effect.
    67		fmt.Fprintln(w, "TEXT runtime·duffzero(SB), NOSPLIT, $0-0")
    68		for i := 0; i < 16; i++ {
    69			fmt.Fprintln(w, "\tMOVUPS\tX0,(DI)")
    70			fmt.Fprintln(w, "\tMOVUPS\tX0,16(DI)")
    71			fmt.Fprintln(w, "\tMOVUPS\tX0,32(DI)")
    72			fmt.Fprintln(w, "\tMOVUPS\tX0,48(DI)")
    73			fmt.Fprintln(w, "\tLEAQ\t64(DI),DI") // We use lea instead of add, to avoid clobbering flags
    74			fmt.Fprintln(w)
    75		}
    76		fmt.Fprintln(w, "\tRET")
    77	}
    78	
    79	func copyAMD64(w io.Writer) {
    80		// SI: ptr to source memory
    81		// DI: ptr to destination memory
    82		// SI and DI are updated as a side effect.
    83		//
    84		// This is equivalent to a sequence of MOVSQ but
    85		// for some reason that is 3.5x slower than this code.
    86		// The STOSQ in duffzero seem fine, though.
    87		fmt.Fprintln(w, "TEXT runtime·duffcopy(SB), NOSPLIT, $0-0")
    88		for i := 0; i < 64; i++ {
    89			fmt.Fprintln(w, "\tMOVUPS\t(SI), X0")
    90			fmt.Fprintln(w, "\tADDQ\t$16, SI")
    91			fmt.Fprintln(w, "\tMOVUPS\tX0, (DI)")
    92			fmt.Fprintln(w, "\tADDQ\t$16, DI")
    93			fmt.Fprintln(w)
    94		}
    95		fmt.Fprintln(w, "\tRET")
    96	}
    97	
    98	func zero386(w io.Writer) {
    99		// AX: zero
   100		// DI: ptr to memory to be zeroed
   101		// DI is updated as a side effect.
   102		fmt.Fprintln(w, "TEXT runtime·duffzero(SB), NOSPLIT, $0-0")
   103		for i := 0; i < 128; i++ {
   104			fmt.Fprintln(w, "\tSTOSL")
   105		}
   106		fmt.Fprintln(w, "\tRET")
   107	}
   108	
   109	func copy386(w io.Writer) {
   110		// SI: ptr to source memory
   111		// DI: ptr to destination memory
   112		// SI and DI are updated as a side effect.
   113		//
   114		// This is equivalent to a sequence of MOVSL but
   115		// for some reason MOVSL is really slow.
   116		fmt.Fprintln(w, "TEXT runtime·duffcopy(SB), NOSPLIT, $0-0")
   117		for i := 0; i < 128; i++ {
   118			fmt.Fprintln(w, "\tMOVL\t(SI), CX")
   119			fmt.Fprintln(w, "\tADDL\t$4, SI")
   120			fmt.Fprintln(w, "\tMOVL\tCX, (DI)")
   121			fmt.Fprintln(w, "\tADDL\t$4, DI")
   122			fmt.Fprintln(w)
   123		}
   124		fmt.Fprintln(w, "\tRET")
   125	}
   126	
   127	func zeroARM(w io.Writer) {
   128		// R0: zero
   129		// R1: ptr to memory to be zeroed
   130		// R1 is updated as a side effect.
   131		fmt.Fprintln(w, "TEXT runtime·duffzero(SB), NOSPLIT, $0-0")
   132		for i := 0; i < 128; i++ {
   133			fmt.Fprintln(w, "\tMOVW.P\tR0, 4(R1)")
   134		}
   135		fmt.Fprintln(w, "\tRET")
   136	}
   137	
   138	func copyARM(w io.Writer) {
   139		// R0: scratch space
   140		// R1: ptr to source memory
   141		// R2: ptr to destination memory
   142		// R1 and R2 are updated as a side effect
   143		fmt.Fprintln(w, "TEXT runtime·duffcopy(SB), NOSPLIT, $0-0")
   144		for i := 0; i < 128; i++ {
   145			fmt.Fprintln(w, "\tMOVW.P\t4(R1), R0")
   146			fmt.Fprintln(w, "\tMOVW.P\tR0, 4(R2)")
   147			fmt.Fprintln(w)
   148		}
   149		fmt.Fprintln(w, "\tRET")
   150	}
   151	
   152	func zeroARM64(w io.Writer) {
   153		// ZR: always zero
   154		// R20: ptr to memory to be zeroed
   155		// On return, R20 points to the last zeroed dword.
   156		fmt.Fprintln(w, "TEXT runtime·duffzero(SB), NOSPLIT|NOFRAME, $0-0")
   157		for i := 0; i < 63; i++ {
   158			fmt.Fprintln(w, "\tSTP.P\t(ZR, ZR), 16(R20)")
   159		}
   160		fmt.Fprintln(w, "\tSTP\t(ZR, ZR), (R20)")
   161		fmt.Fprintln(w, "\tRET")
   162	}
   163	
   164	func copyARM64(w io.Writer) {
   165		// R20: ptr to source memory
   166		// R21: ptr to destination memory
   167		// R26, R27 (aka REGTMP): scratch space
   168		// R20 and R21 are updated as a side effect
   169		fmt.Fprintln(w, "TEXT runtime·duffcopy(SB), NOSPLIT|NOFRAME, $0-0")
   170	
   171		for i := 0; i < 64; i++ {
   172			fmt.Fprintln(w, "\tLDP.P\t16(R20), (R26, R27)")
   173			fmt.Fprintln(w, "\tSTP.P\t(R26, R27), 16(R21)")
   174			fmt.Fprintln(w)
   175		}
   176		fmt.Fprintln(w, "\tRET")
   177	}
   178	
   179	func tagsPPC64x(w io.Writer) {
   180		fmt.Fprintln(w)
   181		fmt.Fprintln(w, "// +build ppc64 ppc64le")
   182		fmt.Fprintln(w)
   183	}
   184	
   185	func zeroPPC64x(w io.Writer) {
   186		// R0: always zero
   187		// R3 (aka REGRT1): ptr to memory to be zeroed - 8
   188		// On return, R3 points to the last zeroed dword.
   189		fmt.Fprintln(w, "TEXT runtime·duffzero(SB), NOSPLIT|NOFRAME, $0-0")
   190		for i := 0; i < 128; i++ {
   191			fmt.Fprintln(w, "\tMOVDU\tR0, 8(R3)")
   192		}
   193		fmt.Fprintln(w, "\tRET")
   194	}
   195	
   196	func copyPPC64x(w io.Writer) {
   197		fmt.Fprintln(w, "// TODO: Implement runtime·duffcopy.")
   198	}
   199	
   200	func tagsMIPS64x(w io.Writer) {
   201		fmt.Fprintln(w)
   202		fmt.Fprintln(w, "// +build mips64 mips64le")
   203		fmt.Fprintln(w)
   204	}
   205	
   206	func zeroMIPS64x(w io.Writer) {
   207		// R0: always zero
   208		// R1 (aka REGRT1): ptr to memory to be zeroed - 8
   209		// On return, R1 points to the last zeroed dword.
   210		fmt.Fprintln(w, "TEXT runtime·duffzero(SB), NOSPLIT|NOFRAME, $0-0")
   211		for i := 0; i < 128; i++ {
   212			fmt.Fprintln(w, "\tMOVV\tR0, 8(R1)")
   213			fmt.Fprintln(w, "\tADDV\t$8, R1")
   214		}
   215		fmt.Fprintln(w, "\tRET")
   216	}
   217	
   218	func copyMIPS64x(w io.Writer) {
   219		fmt.Fprintln(w, "// TODO: Implement runtime·duffcopy.")
   220	}
   221	

View as plain text