...

Text file src/pkg/math/exp_amd64.s

     1	// Copyright 2010 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	#include "textflag.h"
     6	
     7	// The method is based on a paper by Naoki Shibata: "Efficient evaluation
     8	// methods of elementary functions suitable for SIMD computation", Proc.
     9	// of International Supercomputing Conference 2010 (ISC'10), pp. 25 -- 32
    10	// (May 2010). The paper is available at
    11	// https://www.springerlink.com/content/340228x165742104/
    12	//
    13	// The original code and the constants below are from the author's
    14	// implementation available at http://freshmeat.net/projects/sleef.
    15	// The README file says, "The software is in public domain.
    16	// You can use the software without any obligation."
    17	//
    18	// This code is a simplified version of the original.
    19	
    20	#define LN2 0.6931471805599453094172321214581766 // log_e(2)
    21	#define LOG2E 1.4426950408889634073599246810018920 // 1/LN2
    22	#define LN2U 0.69314718055966295651160180568695068359375 // upper half LN2
    23	#define LN2L 0.28235290563031577122588448175013436025525412068e-12 // lower half LN2
    24	#define PosInf 0x7FF0000000000000
    25	#define NegInf 0xFFF0000000000000
    26	#define Overflow 7.09782712893384e+02
    27	
    28	DATA exprodata<>+0(SB)/8, $0.5
    29	DATA exprodata<>+8(SB)/8, $1.0
    30	DATA exprodata<>+16(SB)/8, $2.0
    31	DATA exprodata<>+24(SB)/8, $1.6666666666666666667e-1
    32	DATA exprodata<>+32(SB)/8, $4.1666666666666666667e-2
    33	DATA exprodata<>+40(SB)/8, $8.3333333333333333333e-3
    34	DATA exprodata<>+48(SB)/8, $1.3888888888888888889e-3
    35	DATA exprodata<>+56(SB)/8, $1.9841269841269841270e-4
    36	DATA exprodata<>+64(SB)/8, $2.4801587301587301587e-5
    37	GLOBL exprodata<>+0(SB), RODATA, $72
    38	
    39	// func Exp(x float64) float64
    40	TEXT ·Exp(SB),NOSPLIT,$0
    41		// test bits for not-finite
    42		MOVQ    x+0(FP), BX
    43		MOVQ    $~(1<<63), AX // sign bit mask
    44		MOVQ    BX, DX
    45		ANDQ    AX, DX
    46		MOVQ    $PosInf, AX
    47		CMPQ    AX, DX
    48		JLE     notFinite
    49		// check if argument will overflow
    50		MOVQ    BX, X0
    51		MOVSD   $Overflow, X1
    52		COMISD  X1, X0
    53		JA      overflow
    54		MOVSD   $LOG2E, X1
    55		MULSD   X0, X1
    56		CVTSD2SL X1, BX // BX = exponent
    57		CVTSL2SD BX, X1
    58		CMPB ·useFMA(SB), $1
    59		JE   avxfma
    60		MOVSD   $LN2U, X2
    61		MULSD   X1, X2
    62		SUBSD   X2, X0
    63		MOVSD   $LN2L, X2
    64		MULSD   X1, X2
    65		SUBSD   X2, X0
    66		// reduce argument
    67		MULSD   $0.0625, X0
    68		// Taylor series evaluation
    69		MOVSD   exprodata<>+64(SB), X1
    70		MULSD   X0, X1
    71		ADDSD   exprodata<>+56(SB), X1
    72		MULSD   X0, X1
    73		ADDSD   exprodata<>+48(SB), X1
    74		MULSD   X0, X1
    75		ADDSD   exprodata<>+40(SB), X1
    76		MULSD   X0, X1
    77		ADDSD   exprodata<>+32(SB), X1
    78		MULSD   X0, X1
    79		ADDSD   exprodata<>+24(SB), X1
    80		MULSD   X0, X1
    81		ADDSD   exprodata<>+0(SB), X1
    82		MULSD   X0, X1
    83		ADDSD   exprodata<>+8(SB), X1
    84		MULSD   X1, X0
    85		MOVSD   exprodata<>+16(SB), X1
    86		ADDSD   X0, X1
    87		MULSD   X1, X0
    88		MOVSD   exprodata<>+16(SB), X1
    89		ADDSD   X0, X1
    90		MULSD   X1, X0
    91		MOVSD   exprodata<>+16(SB), X1
    92		ADDSD   X0, X1
    93		MULSD   X1, X0
    94		MOVSD   exprodata<>+16(SB), X1
    95		ADDSD   X0, X1
    96		MULSD   X1, X0
    97		ADDSD exprodata<>+8(SB), X0
    98		// return fr * 2**exponent
    99	ldexp:
   100		ADDL    $0x3FF, BX // add bias
   101		JLE     denormal
   102		CMPL    BX, $0x7FF
   103		JGE     overflow
   104	lastStep:
   105		SHLQ    $52, BX
   106		MOVQ    BX, X1
   107		MULSD   X1, X0
   108		MOVSD   X0, ret+8(FP)
   109		RET
   110	notFinite:
   111		// test bits for -Inf
   112		MOVQ    $NegInf, AX
   113		CMPQ    AX, BX
   114		JNE     notNegInf
   115		// -Inf, return 0
   116	underflow: // return 0
   117		MOVQ    $0, ret+8(FP)
   118		RET
   119	overflow: // return +Inf
   120		MOVQ    $PosInf, BX
   121	notNegInf: // NaN or +Inf, return x
   122		MOVQ    BX, ret+8(FP)
   123		RET
   124	denormal:
   125		CMPL    BX, $-52
   126		JL      underflow
   127		ADDL    $0x3FE, BX // add bias - 1
   128		SHLQ    $52, BX
   129		MOVQ    BX, X1
   130		MULSD   X1, X0
   131		MOVQ    $1, BX
   132		JMP     lastStep
   133	
   134	avxfma:
   135		MOVSD   $LN2U, X2
   136		VFNMADD231SD X2, X1, X0
   137		MOVSD   $LN2L, X2
   138		VFNMADD231SD X2, X1, X0
   139		// reduce argument
   140		MULSD   $0.0625, X0
   141		// Taylor series evaluation
   142		MOVSD   exprodata<>+64(SB), X1
   143		VFMADD213SD exprodata<>+56(SB), X0, X1
   144		VFMADD213SD exprodata<>+48(SB), X0, X1
   145		VFMADD213SD exprodata<>+40(SB), X0, X1
   146		VFMADD213SD exprodata<>+32(SB), X0, X1
   147		VFMADD213SD exprodata<>+24(SB), X0, X1
   148		VFMADD213SD exprodata<>+0(SB), X0, X1
   149		VFMADD213SD exprodata<>+8(SB), X0, X1
   150		MULSD   X1, X0
   151		VADDSD exprodata<>+16(SB), X0, X1
   152		MULSD   X1, X0
   153		VADDSD exprodata<>+16(SB), X0, X1
   154		MULSD   X1, X0
   155		VADDSD exprodata<>+16(SB), X0, X1
   156		MULSD   X1, X0
   157		VADDSD exprodata<>+16(SB), X0, X1
   158		VFMADD213SD   exprodata<>+8(SB), X1, X0
   159		JMP ldexp

View as plain text