...

Text file src/pkg/math/atan2_s390x.s

     1	// Copyright 2017 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	#include "textflag.h"
     6	
     7	#define PosInf		0x7FF0000000000000
     8	#define NegInf		0xFFF0000000000000
     9	#define NegZero		0x8000000000000000
    10	#define Pi		0x400921FB54442D18
    11	#define NegPi		0xC00921FB54442D18
    12	#define Pi3Div4		0x4002D97C7F3321D2	// 3Pi/4
    13	#define NegPi3Div4	0xC002D97C7F3321D2	// -3Pi/4
    14	#define PiDiv4		0x3FE921FB54442D18	// Pi/4
    15	#define NegPiDiv4	0xBFE921FB54442D18	// -Pi/4
    16	
    17	// Minimax polynomial coefficients and other constants
    18	DATA ·atan2rodataL25<> + 0(SB)/8, $0.199999999999554423E+00
    19	DATA ·atan2rodataL25<> + 8(SB)/8, $-.333333333333330928E+00
    20	DATA ·atan2rodataL25<> + 16(SB)/8, $0.111111110136634272E+00
    21	DATA ·atan2rodataL25<> + 24(SB)/8, $-.142857142828026806E+00
    22	DATA ·atan2rodataL25<> + 32(SB)/8, $0.769228118888682505E-01
    23	DATA ·atan2rodataL25<> + 40(SB)/8, $0.588059263575587687E-01
    24	DATA ·atan2rodataL25<> + 48(SB)/8, $-.909090711945939878E-01
    25	DATA ·atan2rodataL25<> + 56(SB)/8, $-.666641501287528609E-01
    26	DATA ·atan2rodataL25<> + 64(SB)/8, $0.472329433805024762E-01
    27	DATA ·atan2rodataL25<> + 72(SB)/8, $-.525380587584426406E-01
    28	DATA ·atan2rodataL25<> + 80(SB)/8, $-.422172007412067035E-01
    29	DATA ·atan2rodataL25<> + 88(SB)/8, $0.366935664549587481E-01
    30	DATA ·atan2rodataL25<> + 96(SB)/8, $0.220852012160300086E-01
    31	DATA ·atan2rodataL25<> + 104(SB)/8, $-.299856214685512712E-01
    32	DATA ·atan2rodataL25<> + 112(SB)/8, $0.726338160757602439E-02
    33	DATA ·atan2rodataL25<> + 120(SB)/8, $0.134893651284712515E-04
    34	DATA ·atan2rodataL25<> + 128(SB)/8, $-.291935324869629616E-02
    35	DATA ·atan2rodataL25<> + 136(SB)/8, $-.154797890856877418E-03
    36	DATA ·atan2rodataL25<> + 144(SB)/8, $0.843488472994227321E-03
    37	DATA ·atan2rodataL25<> + 152(SB)/8, $-.139950258898989925E-01
    38	GLOBL ·atan2rodataL25<> + 0(SB), RODATA, $160
    39	
    40	DATA ·atan2xpi2h<> + 0(SB)/8, $0x3ff330e4e4fa7b1b
    41	DATA ·atan2xpi2h<> + 8(SB)/8, $0xbff330e4e4fa7b1b
    42	DATA ·atan2xpi2h<> + 16(SB)/8, $0x400330e4e4fa7b1b
    43	DATA ·atan2xpi2h<> + 24(SB)/8, $0xc00330e4e4fa7b1b
    44	GLOBL ·atan2xpi2h<> + 0(SB), RODATA, $32
    45	DATA ·atan2xpim<> + 0(SB)/8, $0x3ff4f42b00000000
    46	GLOBL ·atan2xpim<> + 0(SB), RODATA, $8
    47	
    48	// Atan2 returns the arc tangent of y/x, using
    49	// the signs of the two to determine the quadrant
    50	// of the return value.
    51	//
    52	// Special cases are (in order):
    53	//      Atan2(y, NaN) = NaN
    54	//      Atan2(NaN, x) = NaN
    55	//      Atan2(+0, x>=0) = +0
    56	//      Atan2(-0, x>=0) = -0
    57	//      Atan2(+0, x<=-0) = +Pi
    58	//      Atan2(-0, x<=-0) = -Pi
    59	//      Atan2(y>0, 0) = +Pi/2
    60	//      Atan2(y<0, 0) = -Pi/2
    61	//      Atan2(+Inf, +Inf) = +Pi/4
    62	//      Atan2(-Inf, +Inf) = -Pi/4
    63	//      Atan2(+Inf, -Inf) = 3Pi/4
    64	//      Atan2(-Inf, -Inf) = -3Pi/4
    65	//      Atan2(y, +Inf) = 0
    66	//      Atan2(y>0, -Inf) = +Pi
    67	//      Atan2(y<0, -Inf) = -Pi
    68	//      Atan2(+Inf, x) = +Pi/2
    69	//      Atan2(-Inf, x) = -Pi/2
    70	// The algorithm used is minimax polynomial approximation
    71	// with coefficients determined with a Remez exchange algorithm.
    72	
    73	TEXT	·atan2Asm(SB), NOSPLIT, $0-24
    74		// special case
    75		MOVD	x+0(FP), R1
    76		MOVD	y+8(FP), R2
    77	
    78		// special case Atan2(NaN, y) = NaN
    79		MOVD	$~(1<<63), R5
    80		AND	R1, R5		// x = |x|
    81		MOVD	$PosInf, R3
    82		CMPUBLT	R3, R5, returnX
    83	
    84		// special case Atan2(x, NaN) = NaN
    85		MOVD	$~(1<<63), R5
    86		AND	R2, R5
    87		CMPUBLT R3, R5, returnY
    88	
    89		MOVD	$NegZero, R3
    90		CMPUBEQ	R3, R1, xIsNegZero
    91	
    92		MOVD	$0, R3
    93		CMPUBEQ	R3, R1, xIsPosZero
    94	
    95		MOVD	$PosInf, R4
    96		CMPUBEQ	R4, R2, yIsPosInf
    97	
    98		MOVD	$NegInf, R4
    99		CMPUBEQ	R4, R2, yIsNegInf
   100		BR	Normal
   101	xIsNegZero:
   102		// special case Atan(-0, y>=0) = -0
   103		MOVD	$0, R4
   104		CMPBLE	R4, R2, returnX
   105	
   106		//special case Atan2(-0, y<=-0) = -Pi
   107		MOVD	$NegZero, R4
   108		CMPBGE	R4, R2, returnNegPi
   109		BR	Normal
   110	xIsPosZero:
   111		//special case Atan2(0, 0) = 0
   112		MOVD	$0, R4
   113		CMPUBEQ	R4, R2, returnX
   114	
   115		//special case Atan2(0, y<=-0) = Pi
   116		MOVD	$NegZero, R4
   117		CMPBGE	R4, R2, returnPi
   118		BR Normal
   119	yIsNegInf:
   120		//special case Atan2(+Inf, -Inf) = 3Pi/4
   121		MOVD	$PosInf, R3
   122		CMPUBEQ	R3, R1, posInfNegInf
   123	
   124		//special case Atan2(-Inf, -Inf) = -3Pi/4
   125		MOVD	$NegInf, R3
   126		CMPUBEQ	R3, R1, negInfNegInf
   127		BR Normal
   128	yIsPosInf:
   129		//special case Atan2(+Inf, +Inf) = Pi/4
   130		MOVD	$PosInf, R3
   131		CMPUBEQ	R3, R1, posInfPosInf
   132	
   133		//special case Atan2(-Inf, +Inf) = -Pi/4
   134		MOVD	$NegInf, R3
   135		CMPUBEQ	R3, R1, negInfPosInf
   136	
   137		//special case Atan2(-Pi, +Inf) = Pi
   138		MOVD	$NegPi, R3
   139		CMPUBEQ	R3, R1, negPiPosInf
   140	
   141	Normal:
   142		FMOVD	x+0(FP), F0
   143		FMOVD	y+8(FP), F2
   144		MOVD	$·atan2rodataL25<>+0(SB), R9
   145		LGDR	F0, R2
   146		LGDR	F2, R1
   147		RISBGNZ	$32, $63, $32, R2, R2
   148		RISBGNZ	$32, $63, $32, R1, R1
   149		WORD	$0xB9170032	//llgtr	%r3,%r2
   150		RISBGZ	$63, $63, $33, R2, R5
   151		WORD	$0xB9170041	//llgtr	%r4,%r1
   152		WFLCDB	V0, V20
   153		MOVW	R4, R6
   154		MOVW	R3, R7
   155		CMPUBLT	R6, R7, L17
   156		WFDDB	V2, V0, V3
   157		ADDW	$2, R5, R2
   158		MOVW	R4, R6
   159		MOVW	R3, R7
   160		CMPUBLE	R6, R7, L20
   161	L3:
   162		WFMDB	V3, V3, V4
   163		VLEG	$0, 152(R9), V18
   164		VLEG	$0, 144(R9), V16
   165		FMOVD	136(R9), F1
   166		FMOVD	128(R9), F5
   167		FMOVD	120(R9), F6
   168		WFMADB	V4, V16, V5, V16
   169		WFMADB	V4, V6, V1, V6
   170		FMOVD	112(R9), F7
   171		WFMDB	V4, V4, V1
   172		WFMADB	V4, V7, V18, V7
   173		VLEG	$0, 104(R9), V18
   174		WFMADB	V1, V6, V16, V6
   175		CMPWU	R4, R3
   176		FMOVD	96(R9), F5
   177		VLEG	$0, 88(R9), V16
   178		WFMADB	V4, V5, V18, V5
   179		VLEG	$0, 80(R9), V18
   180		VLEG	$0, 72(R9), V22
   181		WFMADB	V4, V16, V18, V16
   182		VLEG	$0, 64(R9), V18
   183		WFMADB	V1, V7, V5, V7
   184		WFMADB	V4, V18, V22, V18
   185		WFMDB	V1, V1, V5
   186		WFMADB	V1, V16, V18, V16
   187		VLEG	$0, 56(R9), V18
   188		WFMADB	V5, V6, V7, V6
   189		VLEG	$0, 48(R9), V22
   190		FMOVD	40(R9), F7
   191		WFMADB	V4, V7, V18, V7
   192		VLEG	$0, 32(R9), V18
   193		WFMADB	V5, V6, V16, V6
   194		WFMADB	V4, V18, V22, V18
   195		VLEG	$0, 24(R9), V16
   196		WFMADB	V1, V7, V18, V7
   197		VLEG	$0, 16(R9), V18
   198		VLEG	$0, 8(R9), V22
   199		WFMADB	V4, V18, V16, V18
   200		VLEG	$0, 0(R9), V16
   201		WFMADB	V5, V6, V7, V6
   202		WFMADB	V4, V16, V22, V16
   203		FMUL	F3, F4
   204		WFMADB	V1, V18, V16, V1
   205		FMADD	F6, F5, F1
   206		WFMADB	V4, V1, V3, V4
   207		BLT	L18
   208		BGT	L7
   209		LTDBR	F2, F2
   210		BLTU	L21
   211	L8:
   212		LTDBR	F0, F0
   213		BLTU	L22
   214	L9:
   215		WFCHDBS	V2, V0, V0
   216		BNE	L18
   217	L7:
   218		MOVW	R1, R6
   219		CMPBGE	R6, $0, L1
   220	L18:
   221		RISBGZ	$58, $60, $3, R2, R2
   222		MOVD	$·atan2xpi2h<>+0(SB), R1
   223		MOVD	·atan2xpim<>+0(SB), R3
   224		LDGR	R3, F0
   225		WORD	$0xED021000	//madb	%f4,%f0,0(%r2,%r1)
   226		BYTE	$0x40
   227		BYTE	$0x1E
   228	L1:
   229		FMOVD	F4, ret+16(FP)
   230		RET
   231	
   232	L20:
   233		LTDBR	F2, F2
   234		BLTU	L23
   235		FMOVD	F2, F6
   236	L4:
   237		LTDBR	F0, F0
   238		BLTU	L24
   239		FMOVD	F0, F4
   240	L5:
   241		WFCHDBS	V6, V4, V4
   242		BEQ	L3
   243	L17:
   244		WFDDB	V0, V2, V4
   245		BYTE	$0x18	//lr	%r2,%r5
   246		BYTE	$0x25
   247		WORD	$0xB3130034	//lcdbr	%f3,%f4
   248		BR	L3
   249	L23:
   250		WORD	$0xB3130062	//lcdbr	%f6,%f2
   251		BR	L4
   252	L22:
   253		VLR	V20, V0
   254		BR	L9
   255	L21:
   256		WORD	$0xB3130022	//lcdbr	%f2,%f2
   257		BR	L8
   258	L24:
   259		VLR	V20, V4
   260		BR	L5
   261	returnX:	//the result is same as the first argument
   262		MOVD	R1, ret+16(FP)
   263		RET
   264	returnY:	//the result is same as the second argument
   265		MOVD	R2, ret+16(FP)
   266		RET
   267	returnPi:
   268		MOVD	$Pi, R1
   269		MOVD	R1, ret+16(FP)
   270		RET
   271	returnNegPi:
   272		MOVD	$NegPi, R1
   273		MOVD	R1, ret+16(FP)
   274		RET
   275	posInfNegInf:
   276		MOVD	$Pi3Div4, R1
   277		MOVD	R1, ret+16(FP)
   278		RET
   279	negInfNegInf:
   280		MOVD	$NegPi3Div4, R1
   281		MOVD	R1, ret+16(FP)
   282		RET
   283	posInfPosInf:
   284		MOVD	$PiDiv4, R1
   285		MOVD	R1, ret+16(FP)
   286		RET
   287	negInfPosInf:
   288		MOVD	$NegPiDiv4, R1
   289		MOVD	R1, ret+16(FP)
   290		RET
   291	negPiPosInf:
   292		MOVD	$NegZero, R1
   293		MOVD	R1, ret+16(FP)
   294		RET

View as plain text