Text file src/math/big/arith_386.s
1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 // +build !math_big_pure_go
6
7 #include "textflag.h"
8
9 // This file provides fast assembly versions for the elementary
10 // arithmetic operations on vectors implemented in arith.go.
11
12 // func mulWW(x, y Word) (z1, z0 Word)
13 TEXT ·mulWW(SB),NOSPLIT,$0
14 MOVL x+0(FP), AX
15 MULL y+4(FP)
16 MOVL DX, z1+8(FP)
17 MOVL AX, z0+12(FP)
18 RET
19
20
21 // func divWW(x1, x0, y Word) (q, r Word)
22 TEXT ·divWW(SB),NOSPLIT,$0
23 MOVL x1+0(FP), DX
24 MOVL x0+4(FP), AX
25 DIVL y+8(FP)
26 MOVL AX, q+12(FP)
27 MOVL DX, r+16(FP)
28 RET
29
30
31 // func addVV(z, x, y []Word) (c Word)
32 TEXT ·addVV(SB),NOSPLIT,$0
33 MOVL z+0(FP), DI
34 MOVL x+12(FP), SI
35 MOVL y+24(FP), CX
36 MOVL z_len+4(FP), BP
37 MOVL $0, BX // i = 0
38 MOVL $0, DX // c = 0
39 JMP E1
40
41 L1: MOVL (SI)(BX*4), AX
42 ADDL DX, DX // restore CF
43 ADCL (CX)(BX*4), AX
44 SBBL DX, DX // save CF
45 MOVL AX, (DI)(BX*4)
46 ADDL $1, BX // i++
47
48 E1: CMPL BX, BP // i < n
49 JL L1
50
51 NEGL DX
52 MOVL DX, c+36(FP)
53 RET
54
55
56 // func subVV(z, x, y []Word) (c Word)
57 // (same as addVV except for SBBL instead of ADCL and label names)
58 TEXT ·subVV(SB),NOSPLIT,$0
59 MOVL z+0(FP), DI
60 MOVL x+12(FP), SI
61 MOVL y+24(FP), CX
62 MOVL z_len+4(FP), BP
63 MOVL $0, BX // i = 0
64 MOVL $0, DX // c = 0
65 JMP E2
66
67 L2: MOVL (SI)(BX*4), AX
68 ADDL DX, DX // restore CF
69 SBBL (CX)(BX*4), AX
70 SBBL DX, DX // save CF
71 MOVL AX, (DI)(BX*4)
72 ADDL $1, BX // i++
73
74 E2: CMPL BX, BP // i < n
75 JL L2
76
77 NEGL DX
78 MOVL DX, c+36(FP)
79 RET
80
81
82 // func addVW(z, x []Word, y Word) (c Word)
83 TEXT ·addVW(SB),NOSPLIT,$0
84 MOVL z+0(FP), DI
85 MOVL x+12(FP), SI
86 MOVL y+24(FP), AX // c = y
87 MOVL z_len+4(FP), BP
88 MOVL $0, BX // i = 0
89 JMP E3
90
91 L3: ADDL (SI)(BX*4), AX
92 MOVL AX, (DI)(BX*4)
93 SBBL AX, AX // save CF
94 NEGL AX
95 ADDL $1, BX // i++
96
97 E3: CMPL BX, BP // i < n
98 JL L3
99
100 MOVL AX, c+28(FP)
101 RET
102
103
104 // func subVW(z, x []Word, y Word) (c Word)
105 TEXT ·subVW(SB),NOSPLIT,$0
106 MOVL z+0(FP), DI
107 MOVL x+12(FP), SI
108 MOVL y+24(FP), AX // c = y
109 MOVL z_len+4(FP), BP
110 MOVL $0, BX // i = 0
111 JMP E4
112
113 L4: MOVL (SI)(BX*4), DX
114 SUBL AX, DX
115 MOVL DX, (DI)(BX*4)
116 SBBL AX, AX // save CF
117 NEGL AX
118 ADDL $1, BX // i++
119
120 E4: CMPL BX, BP // i < n
121 JL L4
122
123 MOVL AX, c+28(FP)
124 RET
125
126
127 // func shlVU(z, x []Word, s uint) (c Word)
128 TEXT ·shlVU(SB),NOSPLIT,$0
129 MOVL z_len+4(FP), BX // i = z
130 SUBL $1, BX // i--
131 JL X8b // i < 0 (n <= 0)
132
133 // n > 0
134 MOVL z+0(FP), DI
135 MOVL x+12(FP), SI
136 MOVL s+24(FP), CX
137 MOVL (SI)(BX*4), AX // w1 = x[n-1]
138 MOVL $0, DX
139 SHLL CX, AX, DX // w1>>ŝ
140 MOVL DX, c+28(FP)
141
142 CMPL BX, $0
143 JLE X8a // i <= 0
144
145 // i > 0
146 L8: MOVL AX, DX // w = w1
147 MOVL -4(SI)(BX*4), AX // w1 = x[i-1]
148 SHLL CX, AX, DX // w<<s | w1>>ŝ
149 MOVL DX, (DI)(BX*4) // z[i] = w<<s | w1>>ŝ
150 SUBL $1, BX // i--
151 JG L8 // i > 0
152
153 // i <= 0
154 X8a: SHLL CX, AX // w1<<s
155 MOVL AX, (DI) // z[0] = w1<<s
156 RET
157
158 X8b: MOVL $0, c+28(FP)
159 RET
160
161
162 // func shrVU(z, x []Word, s uint) (c Word)
163 TEXT ·shrVU(SB),NOSPLIT,$0
164 MOVL z_len+4(FP), BP
165 SUBL $1, BP // n--
166 JL X9b // n < 0 (n <= 0)
167
168 // n > 0
169 MOVL z+0(FP), DI
170 MOVL x+12(FP), SI
171 MOVL s+24(FP), CX
172 MOVL (SI), AX // w1 = x[0]
173 MOVL $0, DX
174 SHRL CX, AX, DX // w1<<ŝ
175 MOVL DX, c+28(FP)
176
177 MOVL $0, BX // i = 0
178 JMP E9
179
180 // i < n-1
181 L9: MOVL AX, DX // w = w1
182 MOVL 4(SI)(BX*4), AX // w1 = x[i+1]
183 SHRL CX, AX, DX // w>>s | w1<<ŝ
184 MOVL DX, (DI)(BX*4) // z[i] = w>>s | w1<<ŝ
185 ADDL $1, BX // i++
186
187 E9: CMPL BX, BP
188 JL L9 // i < n-1
189
190 // i >= n-1
191 X9a: SHRL CX, AX // w1>>s
192 MOVL AX, (DI)(BP*4) // z[n-1] = w1>>s
193 RET
194
195 X9b: MOVL $0, c+28(FP)
196 RET
197
198
199 // func mulAddVWW(z, x []Word, y, r Word) (c Word)
200 TEXT ·mulAddVWW(SB),NOSPLIT,$0
201 MOVL z+0(FP), DI
202 MOVL x+12(FP), SI
203 MOVL y+24(FP), BP
204 MOVL r+28(FP), CX // c = r
205 MOVL z_len+4(FP), BX
206 LEAL (DI)(BX*4), DI
207 LEAL (SI)(BX*4), SI
208 NEGL BX // i = -n
209 JMP E5
210
211 L5: MOVL (SI)(BX*4), AX
212 MULL BP
213 ADDL CX, AX
214 ADCL $0, DX
215 MOVL AX, (DI)(BX*4)
216 MOVL DX, CX
217 ADDL $1, BX // i++
218
219 E5: CMPL BX, $0 // i < 0
220 JL L5
221
222 MOVL CX, c+32(FP)
223 RET
224
225
226 // func addMulVVW(z, x []Word, y Word) (c Word)
227 TEXT ·addMulVVW(SB),NOSPLIT,$0
228 MOVL z+0(FP), DI
229 MOVL x+12(FP), SI
230 MOVL y+24(FP), BP
231 MOVL z_len+4(FP), BX
232 LEAL (DI)(BX*4), DI
233 LEAL (SI)(BX*4), SI
234 NEGL BX // i = -n
235 MOVL $0, CX // c = 0
236 JMP E6
237
238 L6: MOVL (SI)(BX*4), AX
239 MULL BP
240 ADDL CX, AX
241 ADCL $0, DX
242 ADDL AX, (DI)(BX*4)
243 ADCL $0, DX
244 MOVL DX, CX
245 ADDL $1, BX // i++
246
247 E6: CMPL BX, $0 // i < 0
248 JL L6
249
250 MOVL CX, c+28(FP)
251 RET
252
253
254 // func divWVW(z* Word, xn Word, x []Word, y Word) (r Word)
255 TEXT ·divWVW(SB),NOSPLIT,$0
256 MOVL z+0(FP), DI
257 MOVL xn+12(FP), DX // r = xn
258 MOVL x+16(FP), SI
259 MOVL y+28(FP), CX
260 MOVL z_len+4(FP), BX // i = z
261 JMP E7
262
263 L7: MOVL (SI)(BX*4), AX
264 DIVL CX
265 MOVL AX, (DI)(BX*4)
266
267 E7: SUBL $1, BX // i--
268 JGE L7 // i >= 0
269
270 MOVL DX, r+32(FP)
271 RET
View as plain text