...

Source file src/pkg/testing/benchmark.go

     1	// Copyright 2009 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	package testing
     6	
     7	import (
     8		"flag"
     9		"fmt"
    10		"internal/race"
    11		"io"
    12		"math"
    13		"os"
    14		"runtime"
    15		"sort"
    16		"strconv"
    17		"strings"
    18		"sync"
    19		"sync/atomic"
    20		"time"
    21		"unicode"
    22	)
    23	
    24	func initBenchmarkFlags() {
    25		matchBenchmarks = flag.String("test.bench", "", "run only benchmarks matching `regexp`")
    26		benchmarkMemory = flag.Bool("test.benchmem", false, "print memory allocations for benchmarks")
    27		flag.Var(&benchTime, "test.benchtime", "run each benchmark for duration `d`")
    28	}
    29	
    30	var (
    31		matchBenchmarks *string
    32		benchmarkMemory *bool
    33	
    34		benchTime = benchTimeFlag{d: 1 * time.Second} // changed during test of testing package
    35	)
    36	
    37	type benchTimeFlag struct {
    38		d time.Duration
    39		n int
    40	}
    41	
    42	func (f *benchTimeFlag) String() string {
    43		if f.n > 0 {
    44			return fmt.Sprintf("%dx", f.n)
    45		}
    46		return time.Duration(f.d).String()
    47	}
    48	
    49	func (f *benchTimeFlag) Set(s string) error {
    50		if strings.HasSuffix(s, "x") {
    51			n, err := strconv.ParseInt(s[:len(s)-1], 10, 0)
    52			if err != nil || n <= 0 {
    53				return fmt.Errorf("invalid count")
    54			}
    55			*f = benchTimeFlag{n: int(n)}
    56			return nil
    57		}
    58		d, err := time.ParseDuration(s)
    59		if err != nil || d <= 0 {
    60			return fmt.Errorf("invalid duration")
    61		}
    62		*f = benchTimeFlag{d: d}
    63		return nil
    64	}
    65	
    66	// Global lock to ensure only one benchmark runs at a time.
    67	var benchmarkLock sync.Mutex
    68	
    69	// Used for every benchmark for measuring memory.
    70	var memStats runtime.MemStats
    71	
    72	// An internal type but exported because it is cross-package; part of the implementation
    73	// of the "go test" command.
    74	type InternalBenchmark struct {
    75		Name string
    76		F    func(b *B)
    77	}
    78	
    79	// B is a type passed to Benchmark functions to manage benchmark
    80	// timing and to specify the number of iterations to run.
    81	//
    82	// A benchmark ends when its Benchmark function returns or calls any of the methods
    83	// FailNow, Fatal, Fatalf, SkipNow, Skip, or Skipf. Those methods must be called
    84	// only from the goroutine running the Benchmark function.
    85	// The other reporting methods, such as the variations of Log and Error,
    86	// may be called simultaneously from multiple goroutines.
    87	//
    88	// Like in tests, benchmark logs are accumulated during execution
    89	// and dumped to standard error when done. Unlike in tests, benchmark logs
    90	// are always printed, so as not to hide output whose existence may be
    91	// affecting benchmark results.
    92	type B struct {
    93		common
    94		importPath       string // import path of the package containing the benchmark
    95		context          *benchContext
    96		N                int
    97		previousN        int           // number of iterations in the previous run
    98		previousDuration time.Duration // total duration of the previous run
    99		benchFunc        func(b *B)
   100		benchTime        benchTimeFlag
   101		bytes            int64
   102		missingBytes     bool // one of the subbenchmarks does not have bytes set.
   103		timerOn          bool
   104		showAllocResult  bool
   105		result           BenchmarkResult
   106		parallelism      int // RunParallel creates parallelism*GOMAXPROCS goroutines
   107		// The initial states of memStats.Mallocs and memStats.TotalAlloc.
   108		startAllocs uint64
   109		startBytes  uint64
   110		// The net total of this test after being run.
   111		netAllocs uint64
   112		netBytes  uint64
   113		// Extra metrics collected by ReportMetric.
   114		extra map[string]float64
   115	}
   116	
   117	// StartTimer starts timing a test. This function is called automatically
   118	// before a benchmark starts, but it can also be used to resume timing after
   119	// a call to StopTimer.
   120	func (b *B) StartTimer() {
   121		if !b.timerOn {
   122			runtime.ReadMemStats(&memStats)
   123			b.startAllocs = memStats.Mallocs
   124			b.startBytes = memStats.TotalAlloc
   125			b.start = time.Now()
   126			b.timerOn = true
   127		}
   128	}
   129	
   130	// StopTimer stops timing a test. This can be used to pause the timer
   131	// while performing complex initialization that you don't
   132	// want to measure.
   133	func (b *B) StopTimer() {
   134		if b.timerOn {
   135			b.duration += time.Since(b.start)
   136			runtime.ReadMemStats(&memStats)
   137			b.netAllocs += memStats.Mallocs - b.startAllocs
   138			b.netBytes += memStats.TotalAlloc - b.startBytes
   139			b.timerOn = false
   140		}
   141	}
   142	
   143	// ResetTimer zeroes the elapsed benchmark time and memory allocation counters
   144	// and deletes user-reported metrics.
   145	// It does not affect whether the timer is running.
   146	func (b *B) ResetTimer() {
   147		if b.extra == nil {
   148			// Allocate the extra map before reading memory stats.
   149			// Pre-size it to make more allocation unlikely.
   150			b.extra = make(map[string]float64, 16)
   151		} else {
   152			for k := range b.extra {
   153				delete(b.extra, k)
   154			}
   155		}
   156		if b.timerOn {
   157			runtime.ReadMemStats(&memStats)
   158			b.startAllocs = memStats.Mallocs
   159			b.startBytes = memStats.TotalAlloc
   160			b.start = time.Now()
   161		}
   162		b.duration = 0
   163		b.netAllocs = 0
   164		b.netBytes = 0
   165	}
   166	
   167	// SetBytes records the number of bytes processed in a single operation.
   168	// If this is called, the benchmark will report ns/op and MB/s.
   169	func (b *B) SetBytes(n int64) { b.bytes = n }
   170	
   171	// ReportAllocs enables malloc statistics for this benchmark.
   172	// It is equivalent to setting -test.benchmem, but it only affects the
   173	// benchmark function that calls ReportAllocs.
   174	func (b *B) ReportAllocs() {
   175		b.showAllocResult = true
   176	}
   177	
   178	// runN runs a single benchmark for the specified number of iterations.
   179	func (b *B) runN(n int) {
   180		benchmarkLock.Lock()
   181		defer benchmarkLock.Unlock()
   182		// Try to get a comparable environment for each run
   183		// by clearing garbage from previous runs.
   184		runtime.GC()
   185		b.raceErrors = -race.Errors()
   186		b.N = n
   187		b.parallelism = 1
   188		b.ResetTimer()
   189		b.StartTimer()
   190		b.benchFunc(b)
   191		b.StopTimer()
   192		b.previousN = n
   193		b.previousDuration = b.duration
   194		b.raceErrors += race.Errors()
   195		if b.raceErrors > 0 {
   196			b.Errorf("race detected during execution of benchmark")
   197		}
   198	}
   199	
   200	func min(x, y int64) int64 {
   201		if x > y {
   202			return y
   203		}
   204		return x
   205	}
   206	
   207	func max(x, y int64) int64 {
   208		if x < y {
   209			return y
   210		}
   211		return x
   212	}
   213	
   214	// run1 runs the first iteration of benchFunc. It reports whether more
   215	// iterations of this benchmarks should be run.
   216	func (b *B) run1() bool {
   217		if ctx := b.context; ctx != nil {
   218			// Extend maxLen, if needed.
   219			if n := len(b.name) + ctx.extLen + 1; n > ctx.maxLen {
   220				ctx.maxLen = n + 8 // Add additional slack to avoid too many jumps in size.
   221			}
   222		}
   223		go func() {
   224			// Signal that we're done whether we return normally
   225			// or by FailNow's runtime.Goexit.
   226			defer func() {
   227				b.signal <- true
   228			}()
   229	
   230			b.runN(1)
   231		}()
   232		<-b.signal
   233		if b.failed {
   234			fmt.Fprintf(b.w, "--- FAIL: %s\n%s", b.name, b.output)
   235			return false
   236		}
   237		// Only print the output if we know we are not going to proceed.
   238		// Otherwise it is printed in processBench.
   239		if atomic.LoadInt32(&b.hasSub) != 0 || b.finished {
   240			tag := "BENCH"
   241			if b.skipped {
   242				tag = "SKIP"
   243			}
   244			if b.chatty && (len(b.output) > 0 || b.finished) {
   245				b.trimOutput()
   246				fmt.Fprintf(b.w, "--- %s: %s\n%s", tag, b.name, b.output)
   247			}
   248			return false
   249		}
   250		return true
   251	}
   252	
   253	var labelsOnce sync.Once
   254	
   255	// run executes the benchmark in a separate goroutine, including all of its
   256	// subbenchmarks. b must not have subbenchmarks.
   257	func (b *B) run() {
   258		labelsOnce.Do(func() {
   259			fmt.Fprintf(b.w, "goos: %s\n", runtime.GOOS)
   260			fmt.Fprintf(b.w, "goarch: %s\n", runtime.GOARCH)
   261			if b.importPath != "" {
   262				fmt.Fprintf(b.w, "pkg: %s\n", b.importPath)
   263			}
   264		})
   265		if b.context != nil {
   266			// Running go test --test.bench
   267			b.context.processBench(b) // Must call doBench.
   268		} else {
   269			// Running func Benchmark.
   270			b.doBench()
   271		}
   272	}
   273	
   274	func (b *B) doBench() BenchmarkResult {
   275		go b.launch()
   276		<-b.signal
   277		return b.result
   278	}
   279	
   280	// launch launches the benchmark function. It gradually increases the number
   281	// of benchmark iterations until the benchmark runs for the requested benchtime.
   282	// launch is run by the doBench function as a separate goroutine.
   283	// run1 must have been called on b.
   284	func (b *B) launch() {
   285		// Signal that we're done whether we return normally
   286		// or by FailNow's runtime.Goexit.
   287		defer func() {
   288			b.signal <- true
   289		}()
   290	
   291		// Run the benchmark for at least the specified amount of time.
   292		if b.benchTime.n > 0 {
   293			b.runN(b.benchTime.n)
   294		} else {
   295			d := b.benchTime.d
   296			for n := int64(1); !b.failed && b.duration < d && n < 1e9; {
   297				last := n
   298				// Predict required iterations.
   299				goalns := d.Nanoseconds()
   300				prevIters := int64(b.N)
   301				prevns := b.duration.Nanoseconds()
   302				if prevns <= 0 {
   303					// Round up, to avoid div by zero.
   304					prevns = 1
   305				}
   306				// Order of operations matters.
   307				// For very fast benchmarks, prevIters ~= prevns.
   308				// If you divide first, you get 0 or 1,
   309				// which can hide an order of magnitude in execution time.
   310				// So multiply first, then divide.
   311				n = goalns * prevIters / prevns
   312				// Run more iterations than we think we'll need (1.2x).
   313				n += n / 5
   314				// Don't grow too fast in case we had timing errors previously.
   315				n = min(n, 100*last)
   316				// Be sure to run at least one more than last time.
   317				n = max(n, last+1)
   318				// Don't run more than 1e9 times. (This also keeps n in int range on 32 bit platforms.)
   319				n = min(n, 1e9)
   320				b.runN(int(n))
   321			}
   322		}
   323		b.result = BenchmarkResult{b.N, b.duration, b.bytes, b.netAllocs, b.netBytes, b.extra}
   324	}
   325	
   326	// ReportMetric adds "n unit" to the reported benchmark results.
   327	// If the metric is per-iteration, the caller should divide by b.N,
   328	// and by convention units should end in "/op".
   329	// ReportMetric overrides any previously reported value for the same unit.
   330	// ReportMetric panics if unit is the empty string or if unit contains
   331	// any whitespace.
   332	// If unit is a unit normally reported by the benchmark framework itself
   333	// (such as "allocs/op"), ReportMetric will override that metric.
   334	// Setting "ns/op" to 0 will suppress that built-in metric.
   335	func (b *B) ReportMetric(n float64, unit string) {
   336		if unit == "" {
   337			panic("metric unit must not be empty")
   338		}
   339		if strings.IndexFunc(unit, unicode.IsSpace) >= 0 {
   340			panic("metric unit must not contain whitespace")
   341		}
   342		b.extra[unit] = n
   343	}
   344	
   345	// The results of a benchmark run.
   346	type BenchmarkResult struct {
   347		N         int           // The number of iterations.
   348		T         time.Duration // The total time taken.
   349		Bytes     int64         // Bytes processed in one iteration.
   350		MemAllocs uint64        // The total number of memory allocations.
   351		MemBytes  uint64        // The total number of bytes allocated.
   352	
   353		// Extra records additional metrics reported by ReportMetric.
   354		Extra map[string]float64
   355	}
   356	
   357	// NsPerOp returns the "ns/op" metric.
   358	func (r BenchmarkResult) NsPerOp() int64 {
   359		if v, ok := r.Extra["ns/op"]; ok {
   360			return int64(v)
   361		}
   362		if r.N <= 0 {
   363			return 0
   364		}
   365		return r.T.Nanoseconds() / int64(r.N)
   366	}
   367	
   368	// mbPerSec returns the "MB/s" metric.
   369	func (r BenchmarkResult) mbPerSec() float64 {
   370		if v, ok := r.Extra["MB/s"]; ok {
   371			return v
   372		}
   373		if r.Bytes <= 0 || r.T <= 0 || r.N <= 0 {
   374			return 0
   375		}
   376		return (float64(r.Bytes) * float64(r.N) / 1e6) / r.T.Seconds()
   377	}
   378	
   379	// AllocsPerOp returns the "allocs/op" metric,
   380	// which is calculated as r.MemAllocs / r.N.
   381	func (r BenchmarkResult) AllocsPerOp() int64 {
   382		if v, ok := r.Extra["allocs/op"]; ok {
   383			return int64(v)
   384		}
   385		if r.N <= 0 {
   386			return 0
   387		}
   388		return int64(r.MemAllocs) / int64(r.N)
   389	}
   390	
   391	// AllocedBytesPerOp returns the "B/op" metric,
   392	// which is calculated as r.MemBytes / r.N.
   393	func (r BenchmarkResult) AllocedBytesPerOp() int64 {
   394		if v, ok := r.Extra["B/op"]; ok {
   395			return int64(v)
   396		}
   397		if r.N <= 0 {
   398			return 0
   399		}
   400		return int64(r.MemBytes) / int64(r.N)
   401	}
   402	
   403	// String returns a summary of the benchmark results.
   404	// It follows the benchmark result line format from
   405	// https://golang.org/design/14313-benchmark-format, not including the
   406	// benchmark name.
   407	// Extra metrics override built-in metrics of the same name.
   408	// String does not include allocs/op or B/op, since those are reported
   409	// by MemString.
   410	func (r BenchmarkResult) String() string {
   411		buf := new(strings.Builder)
   412		fmt.Fprintf(buf, "%8d", r.N)
   413	
   414		// Get ns/op as a float.
   415		ns, ok := r.Extra["ns/op"]
   416		if !ok {
   417			ns = float64(r.T.Nanoseconds()) / float64(r.N)
   418		}
   419		if ns != 0 {
   420			buf.WriteByte('\t')
   421			prettyPrint(buf, ns, "ns/op")
   422		}
   423	
   424		if mbs := r.mbPerSec(); mbs != 0 {
   425			fmt.Fprintf(buf, "\t%7.2f MB/s", mbs)
   426		}
   427	
   428		// Print extra metrics that aren't represented in the standard
   429		// metrics.
   430		var extraKeys []string
   431		for k := range r.Extra {
   432			switch k {
   433			case "ns/op", "MB/s", "B/op", "allocs/op":
   434				// Built-in metrics reported elsewhere.
   435				continue
   436			}
   437			extraKeys = append(extraKeys, k)
   438		}
   439		sort.Strings(extraKeys)
   440		for _, k := range extraKeys {
   441			buf.WriteByte('\t')
   442			prettyPrint(buf, r.Extra[k], k)
   443		}
   444		return buf.String()
   445	}
   446	
   447	func prettyPrint(w io.Writer, x float64, unit string) {
   448		// Print all numbers with 10 places before the decimal point
   449		// and small numbers with three sig figs.
   450		var format string
   451		switch y := math.Abs(x); {
   452		case y == 0 || y >= 99.95:
   453			format = "%10.0f %s"
   454		case y >= 9.995:
   455			format = "%12.1f %s"
   456		case y >= 0.9995:
   457			format = "%13.2f %s"
   458		case y >= 0.09995:
   459			format = "%14.3f %s"
   460		case y >= 0.009995:
   461			format = "%15.4f %s"
   462		case y >= 0.0009995:
   463			format = "%16.5f %s"
   464		default:
   465			format = "%17.6f %s"
   466		}
   467		fmt.Fprintf(w, format, x, unit)
   468	}
   469	
   470	// MemString returns r.AllocedBytesPerOp and r.AllocsPerOp in the same format as 'go test'.
   471	func (r BenchmarkResult) MemString() string {
   472		return fmt.Sprintf("%8d B/op\t%8d allocs/op",
   473			r.AllocedBytesPerOp(), r.AllocsPerOp())
   474	}
   475	
   476	// benchmarkName returns full name of benchmark including procs suffix.
   477	func benchmarkName(name string, n int) string {
   478		if n != 1 {
   479			return fmt.Sprintf("%s-%d", name, n)
   480		}
   481		return name
   482	}
   483	
   484	type benchContext struct {
   485		match *matcher
   486	
   487		maxLen int // The largest recorded benchmark name.
   488		extLen int // Maximum extension length.
   489	}
   490	
   491	// An internal function but exported because it is cross-package; part of the implementation
   492	// of the "go test" command.
   493	func RunBenchmarks(matchString func(pat, str string) (bool, error), benchmarks []InternalBenchmark) {
   494		runBenchmarks("", matchString, benchmarks)
   495	}
   496	
   497	func runBenchmarks(importPath string, matchString func(pat, str string) (bool, error), benchmarks []InternalBenchmark) bool {
   498		// If no flag was specified, don't run benchmarks.
   499		if len(*matchBenchmarks) == 0 {
   500			return true
   501		}
   502		// Collect matching benchmarks and determine longest name.
   503		maxprocs := 1
   504		for _, procs := range cpuList {
   505			if procs > maxprocs {
   506				maxprocs = procs
   507			}
   508		}
   509		ctx := &benchContext{
   510			match:  newMatcher(matchString, *matchBenchmarks, "-test.bench"),
   511			extLen: len(benchmarkName("", maxprocs)),
   512		}
   513		var bs []InternalBenchmark
   514		for _, Benchmark := range benchmarks {
   515			if _, matched, _ := ctx.match.fullName(nil, Benchmark.Name); matched {
   516				bs = append(bs, Benchmark)
   517				benchName := benchmarkName(Benchmark.Name, maxprocs)
   518				if l := len(benchName) + ctx.extLen + 1; l > ctx.maxLen {
   519					ctx.maxLen = l
   520				}
   521			}
   522		}
   523		main := &B{
   524			common: common{
   525				name:   "Main",
   526				w:      os.Stdout,
   527				chatty: *chatty,
   528			},
   529			importPath: importPath,
   530			benchFunc: func(b *B) {
   531				for _, Benchmark := range bs {
   532					b.Run(Benchmark.Name, Benchmark.F)
   533				}
   534			},
   535			benchTime: benchTime,
   536			context:   ctx,
   537		}
   538		main.runN(1)
   539		return !main.failed
   540	}
   541	
   542	// processBench runs bench b for the configured CPU counts and prints the results.
   543	func (ctx *benchContext) processBench(b *B) {
   544		for i, procs := range cpuList {
   545			for j := uint(0); j < *count; j++ {
   546				runtime.GOMAXPROCS(procs)
   547				benchName := benchmarkName(b.name, procs)
   548				fmt.Fprintf(b.w, "%-*s\t", ctx.maxLen, benchName)
   549				// Recompute the running time for all but the first iteration.
   550				if i > 0 || j > 0 {
   551					b = &B{
   552						common: common{
   553							signal: make(chan bool),
   554							name:   b.name,
   555							w:      b.w,
   556							chatty: b.chatty,
   557						},
   558						benchFunc: b.benchFunc,
   559						benchTime: b.benchTime,
   560					}
   561					b.run1()
   562				}
   563				r := b.doBench()
   564				if b.failed {
   565					// The output could be very long here, but probably isn't.
   566					// We print it all, regardless, because we don't want to trim the reason
   567					// the benchmark failed.
   568					fmt.Fprintf(b.w, "--- FAIL: %s\n%s", benchName, b.output)
   569					continue
   570				}
   571				results := r.String()
   572				if *benchmarkMemory || b.showAllocResult {
   573					results += "\t" + r.MemString()
   574				}
   575				fmt.Fprintln(b.w, results)
   576				// Unlike with tests, we ignore the -chatty flag and always print output for
   577				// benchmarks since the output generation time will skew the results.
   578				if len(b.output) > 0 {
   579					b.trimOutput()
   580					fmt.Fprintf(b.w, "--- BENCH: %s\n%s", benchName, b.output)
   581				}
   582				if p := runtime.GOMAXPROCS(-1); p != procs {
   583					fmt.Fprintf(os.Stderr, "testing: %s left GOMAXPROCS set to %d\n", benchName, p)
   584				}
   585			}
   586		}
   587	}
   588	
   589	// Run benchmarks f as a subbenchmark with the given name. It reports
   590	// whether there were any failures.
   591	//
   592	// A subbenchmark is like any other benchmark. A benchmark that calls Run at
   593	// least once will not be measured itself and will be called once with N=1.
   594	func (b *B) Run(name string, f func(b *B)) bool {
   595		// Since b has subbenchmarks, we will no longer run it as a benchmark itself.
   596		// Release the lock and acquire it on exit to ensure locks stay paired.
   597		atomic.StoreInt32(&b.hasSub, 1)
   598		benchmarkLock.Unlock()
   599		defer benchmarkLock.Lock()
   600	
   601		benchName, ok, partial := b.name, true, false
   602		if b.context != nil {
   603			benchName, ok, partial = b.context.match.fullName(&b.common, name)
   604		}
   605		if !ok {
   606			return true
   607		}
   608		var pc [maxStackLen]uintptr
   609		n := runtime.Callers(2, pc[:])
   610		sub := &B{
   611			common: common{
   612				signal:  make(chan bool),
   613				name:    benchName,
   614				parent:  &b.common,
   615				level:   b.level + 1,
   616				creator: pc[:n],
   617				w:       b.w,
   618				chatty:  b.chatty,
   619			},
   620			importPath: b.importPath,
   621			benchFunc:  f,
   622			benchTime:  b.benchTime,
   623			context:    b.context,
   624		}
   625		if partial {
   626			// Partial name match, like -bench=X/Y matching BenchmarkX.
   627			// Only process sub-benchmarks, if any.
   628			atomic.StoreInt32(&sub.hasSub, 1)
   629		}
   630		if sub.run1() {
   631			sub.run()
   632		}
   633		b.add(sub.result)
   634		return !sub.failed
   635	}
   636	
   637	// add simulates running benchmarks in sequence in a single iteration. It is
   638	// used to give some meaningful results in case func Benchmark is used in
   639	// combination with Run.
   640	func (b *B) add(other BenchmarkResult) {
   641		r := &b.result
   642		// The aggregated BenchmarkResults resemble running all subbenchmarks as
   643		// in sequence in a single benchmark.
   644		r.N = 1
   645		r.T += time.Duration(other.NsPerOp())
   646		if other.Bytes == 0 {
   647			// Summing Bytes is meaningless in aggregate if not all subbenchmarks
   648			// set it.
   649			b.missingBytes = true
   650			r.Bytes = 0
   651		}
   652		if !b.missingBytes {
   653			r.Bytes += other.Bytes
   654		}
   655		r.MemAllocs += uint64(other.AllocsPerOp())
   656		r.MemBytes += uint64(other.AllocedBytesPerOp())
   657	}
   658	
   659	// trimOutput shortens the output from a benchmark, which can be very long.
   660	func (b *B) trimOutput() {
   661		// The output is likely to appear multiple times because the benchmark
   662		// is run multiple times, but at least it will be seen. This is not a big deal
   663		// because benchmarks rarely print, but just in case, we trim it if it's too long.
   664		const maxNewlines = 10
   665		for nlCount, j := 0, 0; j < len(b.output); j++ {
   666			if b.output[j] == '\n' {
   667				nlCount++
   668				if nlCount >= maxNewlines {
   669					b.output = append(b.output[:j], "\n\t... [output truncated]\n"...)
   670					break
   671				}
   672			}
   673		}
   674	}
   675	
   676	// A PB is used by RunParallel for running parallel benchmarks.
   677	type PB struct {
   678		globalN *uint64 // shared between all worker goroutines iteration counter
   679		grain   uint64  // acquire that many iterations from globalN at once
   680		cache   uint64  // local cache of acquired iterations
   681		bN      uint64  // total number of iterations to execute (b.N)
   682	}
   683	
   684	// Next reports whether there are more iterations to execute.
   685	func (pb *PB) Next() bool {
   686		if pb.cache == 0 {
   687			n := atomic.AddUint64(pb.globalN, pb.grain)
   688			if n <= pb.bN {
   689				pb.cache = pb.grain
   690			} else if n < pb.bN+pb.grain {
   691				pb.cache = pb.bN + pb.grain - n
   692			} else {
   693				return false
   694			}
   695		}
   696		pb.cache--
   697		return true
   698	}
   699	
   700	// RunParallel runs a benchmark in parallel.
   701	// It creates multiple goroutines and distributes b.N iterations among them.
   702	// The number of goroutines defaults to GOMAXPROCS. To increase parallelism for
   703	// non-CPU-bound benchmarks, call SetParallelism before RunParallel.
   704	// RunParallel is usually used with the go test -cpu flag.
   705	//
   706	// The body function will be run in each goroutine. It should set up any
   707	// goroutine-local state and then iterate until pb.Next returns false.
   708	// It should not use the StartTimer, StopTimer, or ResetTimer functions,
   709	// because they have global effect. It should also not call Run.
   710	func (b *B) RunParallel(body func(*PB)) {
   711		if b.N == 0 {
   712			return // Nothing to do when probing.
   713		}
   714		// Calculate grain size as number of iterations that take ~100µs.
   715		// 100µs is enough to amortize the overhead and provide sufficient
   716		// dynamic load balancing.
   717		grain := uint64(0)
   718		if b.previousN > 0 && b.previousDuration > 0 {
   719			grain = 1e5 * uint64(b.previousN) / uint64(b.previousDuration)
   720		}
   721		if grain < 1 {
   722			grain = 1
   723		}
   724		// We expect the inner loop and function call to take at least 10ns,
   725		// so do not do more than 100µs/10ns=1e4 iterations.
   726		if grain > 1e4 {
   727			grain = 1e4
   728		}
   729	
   730		n := uint64(0)
   731		numProcs := b.parallelism * runtime.GOMAXPROCS(0)
   732		var wg sync.WaitGroup
   733		wg.Add(numProcs)
   734		for p := 0; p < numProcs; p++ {
   735			go func() {
   736				defer wg.Done()
   737				pb := &PB{
   738					globalN: &n,
   739					grain:   grain,
   740					bN:      uint64(b.N),
   741				}
   742				body(pb)
   743			}()
   744		}
   745		wg.Wait()
   746		if n <= uint64(b.N) && !b.Failed() {
   747			b.Fatal("RunParallel: body exited without pb.Next() == false")
   748		}
   749	}
   750	
   751	// SetParallelism sets the number of goroutines used by RunParallel to p*GOMAXPROCS.
   752	// There is usually no need to call SetParallelism for CPU-bound benchmarks.
   753	// If p is less than 1, this call will have no effect.
   754	func (b *B) SetParallelism(p int) {
   755		if p >= 1 {
   756			b.parallelism = p
   757		}
   758	}
   759	
   760	// Benchmark benchmarks a single function. It is useful for creating
   761	// custom benchmarks that do not use the "go test" command.
   762	//
   763	// If f depends on testing flags, then Init must be used to register
   764	// those flags before calling Benchmark and before calling flag.Parse.
   765	//
   766	// If f calls Run, the result will be an estimate of running all its
   767	// subbenchmarks that don't call Run in sequence in a single benchmark.
   768	func Benchmark(f func(b *B)) BenchmarkResult {
   769		b := &B{
   770			common: common{
   771				signal: make(chan bool),
   772				w:      discard{},
   773			},
   774			benchFunc: f,
   775			benchTime: benchTime,
   776		}
   777		if b.run1() {
   778			b.run()
   779		}
   780		return b.result
   781	}
   782	
   783	type discard struct{}
   784	
   785	func (discard) Write(b []byte) (n int, err error) { return len(b), nil }
   786	

View as plain text