...

Text file src/math/big/arith_386.s

     1	// Copyright 2009 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	// +build !math_big_pure_go
     6	
     7	#include "textflag.h"
     8	
     9	// This file provides fast assembly versions for the elementary
    10	// arithmetic operations on vectors implemented in arith.go.
    11	
    12	// func mulWW(x, y Word) (z1, z0 Word)
    13	TEXT ·mulWW(SB),NOSPLIT,$0
    14		MOVL x+0(FP), AX
    15		MULL y+4(FP)
    16		MOVL DX, z1+8(FP)
    17		MOVL AX, z0+12(FP)
    18		RET
    19	
    20	
    21	// func divWW(x1, x0, y Word) (q, r Word)
    22	TEXT ·divWW(SB),NOSPLIT,$0
    23		MOVL x1+0(FP), DX
    24		MOVL x0+4(FP), AX
    25		DIVL y+8(FP)
    26		MOVL AX, q+12(FP)
    27		MOVL DX, r+16(FP)
    28		RET
    29	
    30	
    31	// func addVV(z, x, y []Word) (c Word)
    32	TEXT ·addVV(SB),NOSPLIT,$0
    33		MOVL z+0(FP), DI
    34		MOVL x+12(FP), SI
    35		MOVL y+24(FP), CX
    36		MOVL z_len+4(FP), BP
    37		MOVL $0, BX		// i = 0
    38		MOVL $0, DX		// c = 0
    39		JMP E1
    40	
    41	L1:	MOVL (SI)(BX*4), AX
    42		ADDL DX, DX		// restore CF
    43		ADCL (CX)(BX*4), AX
    44		SBBL DX, DX		// save CF
    45		MOVL AX, (DI)(BX*4)
    46		ADDL $1, BX		// i++
    47	
    48	E1:	CMPL BX, BP		// i < n
    49		JL L1
    50	
    51		NEGL DX
    52		MOVL DX, c+36(FP)
    53		RET
    54	
    55	
    56	// func subVV(z, x, y []Word) (c Word)
    57	// (same as addVV except for SBBL instead of ADCL and label names)
    58	TEXT ·subVV(SB),NOSPLIT,$0
    59		MOVL z+0(FP), DI
    60		MOVL x+12(FP), SI
    61		MOVL y+24(FP), CX
    62		MOVL z_len+4(FP), BP
    63		MOVL $0, BX		// i = 0
    64		MOVL $0, DX		// c = 0
    65		JMP E2
    66	
    67	L2:	MOVL (SI)(BX*4), AX
    68		ADDL DX, DX		// restore CF
    69		SBBL (CX)(BX*4), AX
    70		SBBL DX, DX		// save CF
    71		MOVL AX, (DI)(BX*4)
    72		ADDL $1, BX		// i++
    73	
    74	E2:	CMPL BX, BP		// i < n
    75		JL L2
    76	
    77		NEGL DX
    78		MOVL DX, c+36(FP)
    79		RET
    80	
    81	
    82	// func addVW(z, x []Word, y Word) (c Word)
    83	TEXT ·addVW(SB),NOSPLIT,$0
    84		MOVL z+0(FP), DI
    85		MOVL x+12(FP), SI
    86		MOVL y+24(FP), AX	// c = y
    87		MOVL z_len+4(FP), BP
    88		MOVL $0, BX		// i = 0
    89		JMP E3
    90	
    91	L3:	ADDL (SI)(BX*4), AX
    92		MOVL AX, (DI)(BX*4)
    93		SBBL AX, AX		// save CF
    94		NEGL AX
    95		ADDL $1, BX		// i++
    96	
    97	E3:	CMPL BX, BP		// i < n
    98		JL L3
    99	
   100		MOVL AX, c+28(FP)
   101		RET
   102	
   103	
   104	// func subVW(z, x []Word, y Word) (c Word)
   105	TEXT ·subVW(SB),NOSPLIT,$0
   106		MOVL z+0(FP), DI
   107		MOVL x+12(FP), SI
   108		MOVL y+24(FP), AX	// c = y
   109		MOVL z_len+4(FP), BP
   110		MOVL $0, BX		// i = 0
   111		JMP E4
   112	
   113	L4:	MOVL (SI)(BX*4), DX
   114		SUBL AX, DX
   115		MOVL DX, (DI)(BX*4)
   116		SBBL AX, AX		// save CF
   117		NEGL AX
   118		ADDL $1, BX		// i++
   119	
   120	E4:	CMPL BX, BP		// i < n
   121		JL L4
   122	
   123		MOVL AX, c+28(FP)
   124		RET
   125	
   126	
   127	// func shlVU(z, x []Word, s uint) (c Word)
   128	TEXT ·shlVU(SB),NOSPLIT,$0
   129		MOVL z_len+4(FP), BX	// i = z
   130		SUBL $1, BX		// i--
   131		JL X8b			// i < 0	(n <= 0)
   132	
   133		// n > 0
   134		MOVL z+0(FP), DI
   135		MOVL x+12(FP), SI
   136		MOVL s+24(FP), CX
   137		MOVL (SI)(BX*4), AX	// w1 = x[n-1]
   138		MOVL $0, DX
   139		SHLL CX, AX, DX		// w1>>ŝ
   140		MOVL DX, c+28(FP)
   141	
   142		CMPL BX, $0
   143		JLE X8a			// i <= 0
   144	
   145		// i > 0
   146	L8:	MOVL AX, DX		// w = w1
   147		MOVL -4(SI)(BX*4), AX	// w1 = x[i-1]
   148		SHLL CX, AX, DX		// w<<s | w1>>ŝ
   149		MOVL DX, (DI)(BX*4)	// z[i] = w<<s | w1>>ŝ
   150		SUBL $1, BX		// i--
   151		JG L8			// i > 0
   152	
   153		// i <= 0
   154	X8a:	SHLL CX, AX		// w1<<s
   155		MOVL AX, (DI)		// z[0] = w1<<s
   156		RET
   157	
   158	X8b:	MOVL $0, c+28(FP)
   159		RET
   160	
   161	
   162	// func shrVU(z, x []Word, s uint) (c Word)
   163	TEXT ·shrVU(SB),NOSPLIT,$0
   164		MOVL z_len+4(FP), BP
   165		SUBL $1, BP		// n--
   166		JL X9b			// n < 0	(n <= 0)
   167	
   168		// n > 0
   169		MOVL z+0(FP), DI
   170		MOVL x+12(FP), SI
   171		MOVL s+24(FP), CX
   172		MOVL (SI), AX		// w1 = x[0]
   173		MOVL $0, DX
   174		SHRL CX, AX, DX		// w1<<ŝ
   175		MOVL DX, c+28(FP)
   176	
   177		MOVL $0, BX		// i = 0
   178		JMP E9
   179	
   180		// i < n-1
   181	L9:	MOVL AX, DX		// w = w1
   182		MOVL 4(SI)(BX*4), AX	// w1 = x[i+1]
   183		SHRL CX, AX, DX		// w>>s | w1<<ŝ
   184		MOVL DX, (DI)(BX*4)	// z[i] = w>>s | w1<<ŝ
   185		ADDL $1, BX		// i++
   186	
   187	E9:	CMPL BX, BP
   188		JL L9			// i < n-1
   189	
   190		// i >= n-1
   191	X9a:	SHRL CX, AX		// w1>>s
   192		MOVL AX, (DI)(BP*4)	// z[n-1] = w1>>s
   193		RET
   194	
   195	X9b:	MOVL $0, c+28(FP)
   196		RET
   197	
   198	
   199	// func mulAddVWW(z, x []Word, y, r Word) (c Word)
   200	TEXT ·mulAddVWW(SB),NOSPLIT,$0
   201		MOVL z+0(FP), DI
   202		MOVL x+12(FP), SI
   203		MOVL y+24(FP), BP
   204		MOVL r+28(FP), CX	// c = r
   205		MOVL z_len+4(FP), BX
   206		LEAL (DI)(BX*4), DI
   207		LEAL (SI)(BX*4), SI
   208		NEGL BX			// i = -n
   209		JMP E5
   210	
   211	L5:	MOVL (SI)(BX*4), AX
   212		MULL BP
   213		ADDL CX, AX
   214		ADCL $0, DX
   215		MOVL AX, (DI)(BX*4)
   216		MOVL DX, CX
   217		ADDL $1, BX		// i++
   218	
   219	E5:	CMPL BX, $0		// i < 0
   220		JL L5
   221	
   222		MOVL CX, c+32(FP)
   223		RET
   224	
   225	
   226	// func addMulVVW(z, x []Word, y Word) (c Word)
   227	TEXT ·addMulVVW(SB),NOSPLIT,$0
   228		MOVL z+0(FP), DI
   229		MOVL x+12(FP), SI
   230		MOVL y+24(FP), BP
   231		MOVL z_len+4(FP), BX
   232		LEAL (DI)(BX*4), DI
   233		LEAL (SI)(BX*4), SI
   234		NEGL BX			// i = -n
   235		MOVL $0, CX		// c = 0
   236		JMP E6
   237	
   238	L6:	MOVL (SI)(BX*4), AX
   239		MULL BP
   240		ADDL CX, AX
   241		ADCL $0, DX
   242		ADDL AX, (DI)(BX*4)
   243		ADCL $0, DX
   244		MOVL DX, CX
   245		ADDL $1, BX		// i++
   246	
   247	E6:	CMPL BX, $0		// i < 0
   248		JL L6
   249	
   250		MOVL CX, c+28(FP)
   251		RET
   252	
   253	
   254	// func divWVW(z* Word, xn Word, x []Word, y Word) (r Word)
   255	TEXT ·divWVW(SB),NOSPLIT,$0
   256		MOVL z+0(FP), DI
   257		MOVL xn+12(FP), DX	// r = xn
   258		MOVL x+16(FP), SI
   259		MOVL y+28(FP), CX
   260		MOVL z_len+4(FP), BX	// i = z
   261		JMP E7
   262	
   263	L7:	MOVL (SI)(BX*4), AX
   264		DIVL CX
   265		MOVL AX, (DI)(BX*4)
   266	
   267	E7:	SUBL $1, BX		// i--
   268		JGE L7			// i >= 0
   269	
   270		MOVL DX, r+32(FP)
   271		RET

View as plain text