diff options
author | Sona Sarmadi <sona.sarmadi@enea.com> | 2015-03-06 07:26:43 +0100 |
---|---|---|
committer | Tudor Florea <tudor.florea@enea.com> | 2015-07-06 20:19:39 +0200 |
commit | 13e4abbce92f9b7630563a0b7f9d8be6db3919c9 (patch) | |
tree | aee692962e07fd1e47006afe385085d4ef5c7679 /meta/recipes-connectivity/openssl/openssl/0008-CVE-2014-3570.patch | |
parent | 72bec03e72908b002355a3dba39c9b9caec2b473 (diff) | |
download | poky-13e4abbce92f9b7630563a0b7f9d8be6db3919c9.tar.gz |
openssl: multiple CVEs fixes
This patch addresses following CVEs:
CVE-2014-3569
CVE-2015-0204
CVE-2015-0205
CVE-2014-8275
CVE-2014-3571
CVE-2014-3570
Additional two patches (0004 & 0005) which were needed for CVE-2014-8275
have been backported from 1.0.1 stable (OpenSSL_1_0_1-stable) branch.
Reference
https://www.openssl.org/news/secadv_20150108.txt
Signed-off-by: Sona Sarmadi <sona.sarmadi@enea.com>
Diffstat (limited to 'meta/recipes-connectivity/openssl/openssl/0008-CVE-2014-3570.patch')
-rw-r--r-- | meta/recipes-connectivity/openssl/openssl/0008-CVE-2014-3570.patch | 3160 |
1 files changed, 3160 insertions, 0 deletions
diff --git a/meta/recipes-connectivity/openssl/openssl/0008-CVE-2014-3570.patch b/meta/recipes-connectivity/openssl/openssl/0008-CVE-2014-3570.patch new file mode 100644 index 0000000000..100f8a2f85 --- /dev/null +++ b/meta/recipes-connectivity/openssl/openssl/0008-CVE-2014-3570.patch | |||
@@ -0,0 +1,3160 @@ | |||
1 | From e078642ddea29bbb6ba29788a6a513796387fbbb Mon Sep 17 00:00:00 2001 | ||
2 | From: Andy Polyakov <appro@openssl.org> | ||
3 | Date: Mon, 5 Jan 2015 14:52:56 +0100 | ||
4 | Subject: [PATCH] Fix for CVE-2014-3570. | ||
5 | |||
6 | Reviewed-by: Emilia Kasper <emilia@openssl.org> | ||
7 | (cherry picked from commit e793809ba50c1e90ab592fb640a856168e50f3de) | ||
8 | (with 1.0.1-specific addendum) | ||
9 | Fixes CVE-2014-3570. | ||
10 | |||
11 | Upstream-Status: Backport | ||
12 | |||
13 | Signed-off-by: Sona Sarmadi <sona.sarmadi@enea.com> | ||
14 | --- | ||
15 | crypto/bn/asm/mips.pl | 611 +++--------- | ||
16 | crypto/bn/asm/mips3.s | 2201 -------------------------------------------- | ||
17 | crypto/bn/asm/x86_64-gcc.c | 34 +- | ||
18 | crypto/bn/bn_asm.c | 16 +- | ||
19 | crypto/bn/bntest.c | 102 +- | ||
20 | 5 files changed, 234 insertions(+), 2730 deletions(-) | ||
21 | delete mode 100644 crypto/bn/asm/mips3.s | ||
22 | |||
23 | diff --git a/crypto/bn/asm/mips.pl b/crypto/bn/asm/mips.pl | ||
24 | index d2f3ef7..215c9a7 100644 | ||
25 | --- a/crypto/bn/asm/mips.pl | ||
26 | +++ b/crypto/bn/asm/mips.pl | ||
27 | @@ -1872,6 +1872,41 @@ ___ | ||
28 | |||
29 | ($a_4,$a_5,$a_6,$a_7)=($b_0,$b_1,$b_2,$b_3); | ||
30 | |||
31 | +sub add_c2 () { | ||
32 | +my ($hi,$lo,$c0,$c1,$c2, | ||
33 | + $warm, # !$warm denotes first call with specific sequence of | ||
34 | + # $c_[XYZ] when there is no Z-carry to accumulate yet; | ||
35 | + $an,$bn # these two are arguments for multiplication which | ||
36 | + # result is used in *next* step [which is why it's | ||
37 | + # commented as "forward multiplication" below]; | ||
38 | + )=@_; | ||
39 | +$code.=<<___; | ||
40 | + mflo $lo | ||
41 | + mfhi $hi | ||
42 | + $ADDU $c0,$lo | ||
43 | + sltu $at,$c0,$lo | ||
44 | + $MULTU $an,$bn # forward multiplication | ||
45 | + $ADDU $c0,$lo | ||
46 | + $ADDU $at,$hi | ||
47 | + sltu $lo,$c0,$lo | ||
48 | + $ADDU $c1,$at | ||
49 | + $ADDU $hi,$lo | ||
50 | +___ | ||
51 | +$code.=<<___ if (!$warm); | ||
52 | + sltu $c2,$c1,$at | ||
53 | + $ADDU $c1,$hi | ||
54 | + sltu $hi,$c1,$hi | ||
55 | + $ADDU $c2,$hi | ||
56 | +___ | ||
57 | +$code.=<<___ if ($warm); | ||
58 | + sltu $at,$c1,$at | ||
59 | + $ADDU $c1,$hi | ||
60 | + $ADDU $c2,$at | ||
61 | + sltu $hi,$c1,$hi | ||
62 | + $ADDU $c2,$hi | ||
63 | +___ | ||
64 | +} | ||
65 | + | ||
66 | $code.=<<___; | ||
67 | |||
68 | .align 5 | ||
69 | @@ -1920,21 +1955,10 @@ $code.=<<___; | ||
70 | sltu $at,$c_2,$t_1 | ||
71 | $ADDU $c_3,$t_2,$at | ||
72 | $ST $c_2,$BNSZ($a0) | ||
73 | - | ||
74 | - mflo $t_1 | ||
75 | - mfhi $t_2 | ||
76 | - slt $c_2,$t_2,$zero | ||
77 | - $SLL $t_2,1 | ||
78 | - $MULTU $a_1,$a_1 # mul_add_c(a[1],b[1],c3,c1,c2); | ||
79 | - slt $a2,$t_1,$zero | ||
80 | - $ADDU $t_2,$a2 | ||
81 | - $SLL $t_1,1 | ||
82 | - $ADDU $c_3,$t_1 | ||
83 | - sltu $at,$c_3,$t_1 | ||
84 | - $ADDU $t_2,$at | ||
85 | - $ADDU $c_1,$t_2 | ||
86 | - sltu $at,$c_1,$t_2 | ||
87 | - $ADDU $c_2,$at | ||
88 | +___ | ||
89 | + &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,0, | ||
90 | + $a_1,$a_1); # mul_add_c(a[1],b[1],c3,c1,c2); | ||
91 | +$code.=<<___; | ||
92 | mflo $t_1 | ||
93 | mfhi $t_2 | ||
94 | $ADDU $c_3,$t_1 | ||
95 | @@ -1945,67 +1969,19 @@ $code.=<<___; | ||
96 | sltu $at,$c_1,$t_2 | ||
97 | $ADDU $c_2,$at | ||
98 | $ST $c_3,2*$BNSZ($a0) | ||
99 | - | ||
100 | - mflo $t_1 | ||
101 | - mfhi $t_2 | ||
102 | - slt $c_3,$t_2,$zero | ||
103 | - $SLL $t_2,1 | ||
104 | - $MULTU $a_1,$a_2 # mul_add_c2(a[1],b[2],c1,c2,c3); | ||
105 | - slt $a2,$t_1,$zero | ||
106 | - $ADDU $t_2,$a2 | ||
107 | - $SLL $t_1,1 | ||
108 | - $ADDU $c_1,$t_1 | ||
109 | - sltu $at,$c_1,$t_1 | ||
110 | - $ADDU $t_2,$at | ||
111 | - $ADDU $c_2,$t_2 | ||
112 | - sltu $at,$c_2,$t_2 | ||
113 | - $ADDU $c_3,$at | ||
114 | - mflo $t_1 | ||
115 | - mfhi $t_2 | ||
116 | - slt $at,$t_2,$zero | ||
117 | - $ADDU $c_3,$at | ||
118 | - $MULTU $a_4,$a_0 # mul_add_c2(a[4],b[0],c2,c3,c1); | ||
119 | - $SLL $t_2,1 | ||
120 | - slt $a2,$t_1,$zero | ||
121 | - $ADDU $t_2,$a2 | ||
122 | - $SLL $t_1,1 | ||
123 | - $ADDU $c_1,$t_1 | ||
124 | - sltu $at,$c_1,$t_1 | ||
125 | - $ADDU $t_2,$at | ||
126 | - $ADDU $c_2,$t_2 | ||
127 | - sltu $at,$c_2,$t_2 | ||
128 | - $ADDU $c_3,$at | ||
129 | +___ | ||
130 | + &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,0, | ||
131 | + $a_1,$a_2); # mul_add_c2(a[1],b[2],c1,c2,c3); | ||
132 | + &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,1, | ||
133 | + $a_4,$a_0); # mul_add_c2(a[4],b[0],c2,c3,c1); | ||
134 | +$code.=<<___; | ||
135 | $ST $c_1,3*$BNSZ($a0) | ||
136 | - | ||
137 | - mflo $t_1 | ||
138 | - mfhi $t_2 | ||
139 | - slt $c_1,$t_2,$zero | ||
140 | - $SLL $t_2,1 | ||
141 | - $MULTU $a_3,$a_1 # mul_add_c2(a[3],b[1],c2,c3,c1); | ||
142 | - slt $a2,$t_1,$zero | ||
143 | - $ADDU $t_2,$a2 | ||
144 | - $SLL $t_1,1 | ||
145 | - $ADDU $c_2,$t_1 | ||
146 | - sltu $at,$c_2,$t_1 | ||
147 | - $ADDU $t_2,$at | ||
148 | - $ADDU $c_3,$t_2 | ||
149 | - sltu $at,$c_3,$t_2 | ||
150 | - $ADDU $c_1,$at | ||
151 | - mflo $t_1 | ||
152 | - mfhi $t_2 | ||
153 | - slt $at,$t_2,$zero | ||
154 | - $ADDU $c_1,$at | ||
155 | - $MULTU $a_2,$a_2 # mul_add_c(a[2],b[2],c2,c3,c1); | ||
156 | - $SLL $t_2,1 | ||
157 | - slt $a2,$t_1,$zero | ||
158 | - $ADDU $t_2,$a2 | ||
159 | - $SLL $t_1,1 | ||
160 | - $ADDU $c_2,$t_1 | ||
161 | - sltu $at,$c_2,$t_1 | ||
162 | - $ADDU $t_2,$at | ||
163 | - $ADDU $c_3,$t_2 | ||
164 | - sltu $at,$c_3,$t_2 | ||
165 | - $ADDU $c_1,$at | ||
166 | +___ | ||
167 | + &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,0, | ||
168 | + $a_3,$a_1); # mul_add_c2(a[3],b[1],c2,c3,c1); | ||
169 | + &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,1, | ||
170 | + $a_2,$a_2); # mul_add_c(a[2],b[2],c2,c3,c1); | ||
171 | +$code.=<<___; | ||
172 | mflo $t_1 | ||
173 | mfhi $t_2 | ||
174 | $ADDU $c_2,$t_1 | ||
175 | @@ -2016,97 +1992,23 @@ $code.=<<___; | ||
176 | sltu $at,$c_3,$t_2 | ||
177 | $ADDU $c_1,$at | ||
178 | $ST $c_2,4*$BNSZ($a0) | ||
179 | - | ||
180 | - mflo $t_1 | ||
181 | - mfhi $t_2 | ||
182 | - slt $c_2,$t_2,$zero | ||
183 | - $SLL $t_2,1 | ||
184 | - $MULTU $a_1,$a_4 # mul_add_c2(a[1],b[4],c3,c1,c2); | ||
185 | - slt $a2,$t_1,$zero | ||
186 | - $ADDU $t_2,$a2 | ||
187 | - $SLL $t_1,1 | ||
188 | - $ADDU $c_3,$t_1 | ||
189 | - sltu $at,$c_3,$t_1 | ||
190 | - $ADDU $t_2,$at | ||
191 | - $ADDU $c_1,$t_2 | ||
192 | - sltu $at,$c_1,$t_2 | ||
193 | - $ADDU $c_2,$at | ||
194 | - mflo $t_1 | ||
195 | - mfhi $t_2 | ||
196 | - slt $at,$t_2,$zero | ||
197 | - $ADDU $c_2,$at | ||
198 | - $MULTU $a_2,$a_3 # mul_add_c2(a[2],b[3],c3,c1,c2); | ||
199 | - $SLL $t_2,1 | ||
200 | - slt $a2,$t_1,$zero | ||
201 | - $ADDU $t_2,$a2 | ||
202 | - $SLL $t_1,1 | ||
203 | - $ADDU $c_3,$t_1 | ||
204 | - sltu $at,$c_3,$t_1 | ||
205 | - $ADDU $t_2,$at | ||
206 | - $ADDU $c_1,$t_2 | ||
207 | - sltu $at,$c_1,$t_2 | ||
208 | - $ADDU $c_2,$at | ||
209 | - mflo $t_1 | ||
210 | - mfhi $t_2 | ||
211 | - slt $at,$t_2,$zero | ||
212 | - $MULTU $a_6,$a_0 # mul_add_c2(a[6],b[0],c1,c2,c3); | ||
213 | - $ADDU $c_2,$at | ||
214 | - $SLL $t_2,1 | ||
215 | - slt $a2,$t_1,$zero | ||
216 | - $ADDU $t_2,$a2 | ||
217 | - $SLL $t_1,1 | ||
218 | - $ADDU $c_3,$t_1 | ||
219 | - sltu $at,$c_3,$t_1 | ||
220 | - $ADDU $t_2,$at | ||
221 | - $ADDU $c_1,$t_2 | ||
222 | - sltu $at,$c_1,$t_2 | ||
223 | - $ADDU $c_2,$at | ||
224 | +___ | ||
225 | + &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,0, | ||
226 | + $a_1,$a_4); # mul_add_c2(a[1],b[4],c3,c1,c2); | ||
227 | + &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,1, | ||
228 | + $a_2,$a_3); # mul_add_c2(a[2],b[3],c3,c1,c2); | ||
229 | + &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,1, | ||
230 | + $a_6,$a_0); # mul_add_c2(a[6],b[0],c1,c2,c3); | ||
231 | +$code.=<<___; | ||
232 | $ST $c_3,5*$BNSZ($a0) | ||
233 | - | ||
234 | - mflo $t_1 | ||
235 | - mfhi $t_2 | ||
236 | - slt $c_3,$t_2,$zero | ||
237 | - $SLL $t_2,1 | ||
238 | - $MULTU $a_5,$a_1 # mul_add_c2(a[5],b[1],c1,c2,c3); | ||
239 | - slt $a2,$t_1,$zero | ||
240 | - $ADDU $t_2,$a2 | ||
241 | - $SLL $t_1,1 | ||
242 | - $ADDU $c_1,$t_1 | ||
243 | - sltu $at,$c_1,$t_1 | ||
244 | - $ADDU $t_2,$at | ||
245 | - $ADDU $c_2,$t_2 | ||
246 | - sltu $at,$c_2,$t_2 | ||
247 | - $ADDU $c_3,$at | ||
248 | - mflo $t_1 | ||
249 | - mfhi $t_2 | ||
250 | - slt $at,$t_2,$zero | ||
251 | - $ADDU $c_3,$at | ||
252 | - $MULTU $a_4,$a_2 # mul_add_c2(a[4],b[2],c1,c2,c3); | ||
253 | - $SLL $t_2,1 | ||
254 | - slt $a2,$t_1,$zero | ||
255 | - $ADDU $t_2,$a2 | ||
256 | - $SLL $t_1,1 | ||
257 | - $ADDU $c_1,$t_1 | ||
258 | - sltu $at,$c_1,$t_1 | ||
259 | - $ADDU $t_2,$at | ||
260 | - $ADDU $c_2,$t_2 | ||
261 | - sltu $at,$c_2,$t_2 | ||
262 | - $ADDU $c_3,$at | ||
263 | - mflo $t_1 | ||
264 | - mfhi $t_2 | ||
265 | - slt $at,$t_2,$zero | ||
266 | - $ADDU $c_3,$at | ||
267 | - $MULTU $a_3,$a_3 # mul_add_c(a[3],b[3],c1,c2,c3); | ||
268 | - $SLL $t_2,1 | ||
269 | - slt $a2,$t_1,$zero | ||
270 | - $ADDU $t_2,$a2 | ||
271 | - $SLL $t_1,1 | ||
272 | - $ADDU $c_1,$t_1 | ||
273 | - sltu $at,$c_1,$t_1 | ||
274 | - $ADDU $t_2,$at | ||
275 | - $ADDU $c_2,$t_2 | ||
276 | - sltu $at,$c_2,$t_2 | ||
277 | - $ADDU $c_3,$at | ||
278 | +___ | ||
279 | + &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,0, | ||
280 | + $a_5,$a_1); # mul_add_c2(a[5],b[1],c1,c2,c3); | ||
281 | + &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,1, | ||
282 | + $a_4,$a_2); # mul_add_c2(a[4],b[2],c1,c2,c3); | ||
283 | + &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,1, | ||
284 | + $a_3,$a_3); # mul_add_c(a[3],b[3],c1,c2,c3); | ||
285 | +$code.=<<___; | ||
286 | mflo $t_1 | ||
287 | mfhi $t_2 | ||
288 | $ADDU $c_1,$t_1 | ||
289 | @@ -2117,112 +2019,25 @@ $code.=<<___; | ||
290 | sltu $at,$c_2,$t_2 | ||
291 | $ADDU $c_3,$at | ||
292 | $ST $c_1,6*$BNSZ($a0) | ||
293 | - | ||
294 | - mflo $t_1 | ||
295 | - mfhi $t_2 | ||
296 | - slt $c_1,$t_2,$zero | ||
297 | - $SLL $t_2,1 | ||
298 | - $MULTU $a_1,$a_6 # mul_add_c2(a[1],b[6],c2,c3,c1); | ||
299 | - slt $a2,$t_1,$zero | ||
300 | - $ADDU $t_2,$a2 | ||
301 | - $SLL $t_1,1 | ||
302 | - $ADDU $c_2,$t_1 | ||
303 | - sltu $at,$c_2,$t_1 | ||
304 | - $ADDU $t_2,$at | ||
305 | - $ADDU $c_3,$t_2 | ||
306 | - sltu $at,$c_3,$t_2 | ||
307 | - $ADDU $c_1,$at | ||
308 | - mflo $t_1 | ||
309 | - mfhi $t_2 | ||
310 | - slt $at,$t_2,$zero | ||
311 | - $ADDU $c_1,$at | ||
312 | - $MULTU $a_2,$a_5 # mul_add_c2(a[2],b[5],c2,c3,c1); | ||
313 | - $SLL $t_2,1 | ||
314 | - slt $a2,$t_1,$zero | ||
315 | - $ADDU $t_2,$a2 | ||
316 | - $SLL $t_1,1 | ||
317 | - $ADDU $c_2,$t_1 | ||
318 | - sltu $at,$c_2,$t_1 | ||
319 | - $ADDU $t_2,$at | ||
320 | - $ADDU $c_3,$t_2 | ||
321 | - sltu $at,$c_3,$t_2 | ||
322 | - $ADDU $c_1,$at | ||
323 | - mflo $t_1 | ||
324 | - mfhi $t_2 | ||
325 | - slt $at,$t_2,$zero | ||
326 | - $ADDU $c_1,$at | ||
327 | - $MULTU $a_3,$a_4 # mul_add_c2(a[3],b[4],c2,c3,c1); | ||
328 | - $SLL $t_2,1 | ||
329 | - slt $a2,$t_1,$zero | ||
330 | - $ADDU $t_2,$a2 | ||
331 | - $SLL $t_1,1 | ||
332 | - $ADDU $c_2,$t_1 | ||
333 | - sltu $at,$c_2,$t_1 | ||
334 | - $ADDU $t_2,$at | ||
335 | - $ADDU $c_3,$t_2 | ||
336 | - sltu $at,$c_3,$t_2 | ||
337 | - $ADDU $c_1,$at | ||
338 | - mflo $t_1 | ||
339 | - mfhi $t_2 | ||
340 | - slt $at,$t_2,$zero | ||
341 | - $ADDU $c_1,$at | ||
342 | - $MULTU $a_7,$a_1 # mul_add_c2(a[7],b[1],c3,c1,c2); | ||
343 | - $SLL $t_2,1 | ||
344 | - slt $a2,$t_1,$zero | ||
345 | - $ADDU $t_2,$a2 | ||
346 | - $SLL $t_1,1 | ||
347 | - $ADDU $c_2,$t_1 | ||
348 | - sltu $at,$c_2,$t_1 | ||
349 | - $ADDU $t_2,$at | ||
350 | - $ADDU $c_3,$t_2 | ||
351 | - sltu $at,$c_3,$t_2 | ||
352 | - $ADDU $c_1,$at | ||
353 | +___ | ||
354 | + &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,0, | ||
355 | + $a_1,$a_6); # mul_add_c2(a[1],b[6],c2,c3,c1); | ||
356 | + &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,1, | ||
357 | + $a_2,$a_5); # mul_add_c2(a[2],b[5],c2,c3,c1); | ||
358 | + &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,1, | ||
359 | + $a_3,$a_4); # mul_add_c2(a[3],b[4],c2,c3,c1); | ||
360 | + &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,1, | ||
361 | + $a_7,$a_1); # mul_add_c2(a[7],b[1],c3,c1,c2); | ||
362 | +$code.=<<___; | ||
363 | $ST $c_2,7*$BNSZ($a0) | ||
364 | - | ||
365 | - mflo $t_1 | ||
366 | - mfhi $t_2 | ||
367 | - slt $c_2,$t_2,$zero | ||
368 | - $SLL $t_2,1 | ||
369 | - $MULTU $a_6,$a_2 # mul_add_c2(a[6],b[2],c3,c1,c2); | ||
370 | - slt $a2,$t_1,$zero | ||
371 | - $ADDU $t_2,$a2 | ||
372 | - $SLL $t_1,1 | ||
373 | - $ADDU $c_3,$t_1 | ||
374 | - sltu $at,$c_3,$t_1 | ||
375 | - $ADDU $t_2,$at | ||
376 | - $ADDU $c_1,$t_2 | ||
377 | - sltu $at,$c_1,$t_2 | ||
378 | - $ADDU $c_2,$at | ||
379 | - mflo $t_1 | ||
380 | - mfhi $t_2 | ||
381 | - slt $at,$t_2,$zero | ||
382 | - $ADDU $c_2,$at | ||
383 | - $MULTU $a_5,$a_3 # mul_add_c2(a[5],b[3],c3,c1,c2); | ||
384 | - $SLL $t_2,1 | ||
385 | - slt $a2,$t_1,$zero | ||
386 | - $ADDU $t_2,$a2 | ||
387 | - $SLL $t_1,1 | ||
388 | - $ADDU $c_3,$t_1 | ||
389 | - sltu $at,$c_3,$t_1 | ||
390 | - $ADDU $t_2,$at | ||
391 | - $ADDU $c_1,$t_2 | ||
392 | - sltu $at,$c_1,$t_2 | ||
393 | - $ADDU $c_2,$at | ||
394 | - mflo $t_1 | ||
395 | - mfhi $t_2 | ||
396 | - slt $at,$t_2,$zero | ||
397 | - $ADDU $c_2,$at | ||
398 | - $MULTU $a_4,$a_4 # mul_add_c(a[4],b[4],c3,c1,c2); | ||
399 | - $SLL $t_2,1 | ||
400 | - slt $a2,$t_1,$zero | ||
401 | - $ADDU $t_2,$a2 | ||
402 | - $SLL $t_1,1 | ||
403 | - $ADDU $c_3,$t_1 | ||
404 | - sltu $at,$c_3,$t_1 | ||
405 | - $ADDU $t_2,$at | ||
406 | - $ADDU $c_1,$t_2 | ||
407 | - sltu $at,$c_1,$t_2 | ||
408 | - $ADDU $c_2,$at | ||
409 | +___ | ||
410 | + &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,0, | ||
411 | + $a_6,$a_2); # mul_add_c2(a[6],b[2],c3,c1,c2); | ||
412 | + &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,1, | ||
413 | + $a_5,$a_3); # mul_add_c2(a[5],b[3],c3,c1,c2); | ||
414 | + &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,1, | ||
415 | + $a_4,$a_4); # mul_add_c(a[4],b[4],c3,c1,c2); | ||
416 | +$code.=<<___; | ||
417 | mflo $t_1 | ||
418 | mfhi $t_2 | ||
419 | $ADDU $c_3,$t_1 | ||
420 | @@ -2233,82 +2048,21 @@ $code.=<<___; | ||
421 | sltu $at,$c_1,$t_2 | ||
422 | $ADDU $c_2,$at | ||
423 | $ST $c_3,8*$BNSZ($a0) | ||
424 | - | ||
425 | - mflo $t_1 | ||
426 | - mfhi $t_2 | ||
427 | - slt $c_3,$t_2,$zero | ||
428 | - $SLL $t_2,1 | ||
429 | - $MULTU $a_3,$a_6 # mul_add_c2(a[3],b[6],c1,c2,c3); | ||
430 | - slt $a2,$t_1,$zero | ||
431 | - $ADDU $t_2,$a2 | ||
432 | - $SLL $t_1,1 | ||
433 | - $ADDU $c_1,$t_1 | ||
434 | - sltu $at,$c_1,$t_1 | ||
435 | - $ADDU $t_2,$at | ||
436 | - $ADDU $c_2,$t_2 | ||
437 | - sltu $at,$c_2,$t_2 | ||
438 | - $ADDU $c_3,$at | ||
439 | - mflo $t_1 | ||
440 | - mfhi $t_2 | ||
441 | - slt $at,$t_2,$zero | ||
442 | - $ADDU $c_3,$at | ||
443 | - $MULTU $a_4,$a_5 # mul_add_c2(a[4],b[5],c1,c2,c3); | ||
444 | - $SLL $t_2,1 | ||
445 | - slt $a2,$t_1,$zero | ||
446 | - $ADDU $t_2,$a2 | ||
447 | - $SLL $t_1,1 | ||
448 | - $ADDU $c_1,$t_1 | ||
449 | - sltu $at,$c_1,$t_1 | ||
450 | - $ADDU $t_2,$at | ||
451 | - $ADDU $c_2,$t_2 | ||
452 | - sltu $at,$c_2,$t_2 | ||
453 | - $ADDU $c_3,$at | ||
454 | - mflo $t_1 | ||
455 | - mfhi $t_2 | ||
456 | - slt $at,$t_2,$zero | ||
457 | - $ADDU $c_3,$at | ||
458 | - $MULTU $a_7,$a_3 # mul_add_c2(a[7],b[3],c2,c3,c1); | ||
459 | - $SLL $t_2,1 | ||
460 | - slt $a2,$t_1,$zero | ||
461 | - $ADDU $t_2,$a2 | ||
462 | - $SLL $t_1,1 | ||
463 | - $ADDU $c_1,$t_1 | ||
464 | - sltu $at,$c_1,$t_1 | ||
465 | - $ADDU $t_2,$at | ||
466 | - $ADDU $c_2,$t_2 | ||
467 | - sltu $at,$c_2,$t_2 | ||
468 | - $ADDU $c_3,$at | ||
469 | +___ | ||
470 | + &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,0, | ||
471 | + $a_3,$a_6); # mul_add_c2(a[3],b[6],c1,c2,c3); | ||
472 | + &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,1, | ||
473 | + $a_4,$a_5); # mul_add_c2(a[4],b[5],c1,c2,c3); | ||
474 | + &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,1, | ||
475 | + $a_7,$a_3); # mul_add_c2(a[7],b[3],c2,c3,c1); | ||
476 | +$code.=<<___; | ||
477 | $ST $c_1,9*$BNSZ($a0) | ||
478 | - | ||
479 | - mflo $t_1 | ||
480 | - mfhi $t_2 | ||
481 | - slt $c_1,$t_2,$zero | ||
482 | - $SLL $t_2,1 | ||
483 | - $MULTU $a_6,$a_4 # mul_add_c2(a[6],b[4],c2,c3,c1); | ||
484 | - slt $a2,$t_1,$zero | ||
485 | - $ADDU $t_2,$a2 | ||
486 | - $SLL $t_1,1 | ||
487 | - $ADDU $c_2,$t_1 | ||
488 | - sltu $at,$c_2,$t_1 | ||
489 | - $ADDU $t_2,$at | ||
490 | - $ADDU $c_3,$t_2 | ||
491 | - sltu $at,$c_3,$t_2 | ||
492 | - $ADDU $c_1,$at | ||
493 | - mflo $t_1 | ||
494 | - mfhi $t_2 | ||
495 | - slt $at,$t_2,$zero | ||
496 | - $ADDU $c_1,$at | ||
497 | - $MULTU $a_5,$a_5 # mul_add_c(a[5],b[5],c2,c3,c1); | ||
498 | - $SLL $t_2,1 | ||
499 | - slt $a2,$t_1,$zero | ||
500 | - $ADDU $t_2,$a2 | ||
501 | - $SLL $t_1,1 | ||
502 | - $ADDU $c_2,$t_1 | ||
503 | - sltu $at,$c_2,$t_1 | ||
504 | - $ADDU $t_2,$at | ||
505 | - $ADDU $c_3,$t_2 | ||
506 | - sltu $at,$c_3,$t_2 | ||
507 | - $ADDU $c_1,$at | ||
508 | +___ | ||
509 | + &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,0, | ||
510 | + $a_6,$a_4); # mul_add_c2(a[6],b[4],c2,c3,c1); | ||
511 | + &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,1, | ||
512 | + $a_5,$a_5); # mul_add_c(a[5],b[5],c2,c3,c1); | ||
513 | +$code.=<<___; | ||
514 | mflo $t_1 | ||
515 | mfhi $t_2 | ||
516 | $ADDU $c_2,$t_1 | ||
517 | @@ -2319,52 +2073,17 @@ $code.=<<___; | ||
518 | sltu $at,$c_3,$t_2 | ||
519 | $ADDU $c_1,$at | ||
520 | $ST $c_2,10*$BNSZ($a0) | ||
521 | - | ||
522 | - mflo $t_1 | ||
523 | - mfhi $t_2 | ||
524 | - slt $c_2,$t_2,$zero | ||
525 | - $SLL $t_2,1 | ||
526 | - $MULTU $a_5,$a_6 # mul_add_c2(a[5],b[6],c3,c1,c2); | ||
527 | - slt $a2,$t_1,$zero | ||
528 | - $ADDU $t_2,$a2 | ||
529 | - $SLL $t_1,1 | ||
530 | - $ADDU $c_3,$t_1 | ||
531 | - sltu $at,$c_3,$t_1 | ||
532 | - $ADDU $t_2,$at | ||
533 | - $ADDU $c_1,$t_2 | ||
534 | - sltu $at,$c_1,$t_2 | ||
535 | - $ADDU $c_2,$at | ||
536 | - mflo $t_1 | ||
537 | - mfhi $t_2 | ||
538 | - slt $at,$t_2,$zero | ||
539 | - $ADDU $c_2,$at | ||
540 | - $MULTU $a_7,$a_5 # mul_add_c2(a[7],b[5],c1,c2,c3); | ||
541 | - $SLL $t_2,1 | ||
542 | - slt $a2,$t_1,$zero | ||
543 | - $ADDU $t_2,$a2 | ||
544 | - $SLL $t_1,1 | ||
545 | - $ADDU $c_3,$t_1 | ||
546 | - sltu $at,$c_3,$t_1 | ||
547 | - $ADDU $t_2,$at | ||
548 | - $ADDU $c_1,$t_2 | ||
549 | - sltu $at,$c_1,$t_2 | ||
550 | - $ADDU $c_2,$at | ||
551 | +___ | ||
552 | + &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,0, | ||
553 | + $a_5,$a_6); # mul_add_c2(a[5],b[6],c3,c1,c2); | ||
554 | + &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,1, | ||
555 | + $a_7,$a_5); # mul_add_c2(a[7],b[5],c1,c2,c3); | ||
556 | +$code.=<<___; | ||
557 | $ST $c_3,11*$BNSZ($a0) | ||
558 | - | ||
559 | - mflo $t_1 | ||
560 | - mfhi $t_2 | ||
561 | - slt $c_3,$t_2,$zero | ||
562 | - $SLL $t_2,1 | ||
563 | - $MULTU $a_6,$a_6 # mul_add_c(a[6],b[6],c1,c2,c3); | ||
564 | - slt $a2,$t_1,$zero | ||
565 | - $ADDU $t_2,$a2 | ||
566 | - $SLL $t_1,1 | ||
567 | - $ADDU $c_1,$t_1 | ||
568 | - sltu $at,$c_1,$t_1 | ||
569 | - $ADDU $t_2,$at | ||
570 | - $ADDU $c_2,$t_2 | ||
571 | - sltu $at,$c_2,$t_2 | ||
572 | - $ADDU $c_3,$at | ||
573 | +___ | ||
574 | + &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,0, | ||
575 | + $a_6,$a_6); # mul_add_c(a[6],b[6],c1,c2,c3); | ||
576 | +$code.=<<___; | ||
577 | mflo $t_1 | ||
578 | mfhi $t_2 | ||
579 | $ADDU $c_1,$t_1 | ||
580 | @@ -2375,21 +2094,10 @@ $code.=<<___; | ||
581 | sltu $at,$c_2,$t_2 | ||
582 | $ADDU $c_3,$at | ||
583 | $ST $c_1,12*$BNSZ($a0) | ||
584 | - | ||
585 | - mflo $t_1 | ||
586 | - mfhi $t_2 | ||
587 | - slt $c_1,$t_2,$zero | ||
588 | - $SLL $t_2,1 | ||
589 | - $MULTU $a_7,$a_7 # mul_add_c(a[7],b[7],c3,c1,c2); | ||
590 | - slt $a2,$t_1,$zero | ||
591 | - $ADDU $t_2,$a2 | ||
592 | - $SLL $t_1,1 | ||
593 | - $ADDU $c_2,$t_1 | ||
594 | - sltu $at,$c_2,$t_1 | ||
595 | - $ADDU $t_2,$at | ||
596 | - $ADDU $c_3,$t_2 | ||
597 | - sltu $at,$c_3,$t_2 | ||
598 | - $ADDU $c_1,$at | ||
599 | +___ | ||
600 | + &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,0, | ||
601 | + $a_7,$a_7); # mul_add_c(a[7],b[7],c3,c1,c2); | ||
602 | +$code.=<<___; | ||
603 | $ST $c_2,13*$BNSZ($a0) | ||
604 | |||
605 | mflo $t_1 | ||
606 | @@ -2457,21 +2165,10 @@ $code.=<<___; | ||
607 | sltu $at,$c_2,$t_1 | ||
608 | $ADDU $c_3,$t_2,$at | ||
609 | $ST $c_2,$BNSZ($a0) | ||
610 | - | ||
611 | - mflo $t_1 | ||
612 | - mfhi $t_2 | ||
613 | - slt $c_2,$t_2,$zero | ||
614 | - $SLL $t_2,1 | ||
615 | - $MULTU $a_1,$a_1 # mul_add_c(a[1],b[1],c3,c1,c2); | ||
616 | - slt $a2,$t_1,$zero | ||
617 | - $ADDU $t_2,$a2 | ||
618 | - $SLL $t_1,1 | ||
619 | - $ADDU $c_3,$t_1 | ||
620 | - sltu $at,$c_3,$t_1 | ||
621 | - $ADDU $t_2,$at | ||
622 | - $ADDU $c_1,$t_2 | ||
623 | - sltu $at,$c_1,$t_2 | ||
624 | - $ADDU $c_2,$at | ||
625 | +___ | ||
626 | + &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,0, | ||
627 | + $a_1,$a_1); # mul_add_c(a[1],b[1],c3,c1,c2); | ||
628 | +$code.=<<___; | ||
629 | mflo $t_1 | ||
630 | mfhi $t_2 | ||
631 | $ADDU $c_3,$t_1 | ||
632 | @@ -2482,52 +2179,17 @@ $code.=<<___; | ||
633 | sltu $at,$c_1,$t_2 | ||
634 | $ADDU $c_2,$at | ||
635 | $ST $c_3,2*$BNSZ($a0) | ||
636 | - | ||
637 | - mflo $t_1 | ||
638 | - mfhi $t_2 | ||
639 | - slt $c_3,$t_2,$zero | ||
640 | - $SLL $t_2,1 | ||
641 | - $MULTU $a_1,$a_2 # mul_add_c(a2[1],b[2],c1,c2,c3); | ||
642 | - slt $a2,$t_1,$zero | ||
643 | - $ADDU $t_2,$a2 | ||
644 | - $SLL $t_1,1 | ||
645 | - $ADDU $c_1,$t_1 | ||
646 | - sltu $at,$c_1,$t_1 | ||
647 | - $ADDU $t_2,$at | ||
648 | - $ADDU $c_2,$t_2 | ||
649 | - sltu $at,$c_2,$t_2 | ||
650 | - $ADDU $c_3,$at | ||
651 | - mflo $t_1 | ||
652 | - mfhi $t_2 | ||
653 | - slt $at,$t_2,$zero | ||
654 | - $ADDU $c_3,$at | ||
655 | - $MULTU $a_3,$a_1 # mul_add_c2(a[3],b[1],c2,c3,c1); | ||
656 | - $SLL $t_2,1 | ||
657 | - slt $a2,$t_1,$zero | ||
658 | - $ADDU $t_2,$a2 | ||
659 | - $SLL $t_1,1 | ||
660 | - $ADDU $c_1,$t_1 | ||
661 | - sltu $at,$c_1,$t_1 | ||
662 | - $ADDU $t_2,$at | ||
663 | - $ADDU $c_2,$t_2 | ||
664 | - sltu $at,$c_2,$t_2 | ||
665 | - $ADDU $c_3,$at | ||
666 | +___ | ||
667 | + &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,0, | ||
668 | + $a_1,$a_2); # mul_add_c2(a2[1],b[2],c1,c2,c3); | ||
669 | + &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,1, | ||
670 | + $a_3,$a_1); # mul_add_c2(a[3],b[1],c2,c3,c1); | ||
671 | +$code.=<<___; | ||
672 | $ST $c_1,3*$BNSZ($a0) | ||
673 | - | ||
674 | - mflo $t_1 | ||
675 | - mfhi $t_2 | ||
676 | - slt $c_1,$t_2,$zero | ||
677 | - $SLL $t_2,1 | ||
678 | - $MULTU $a_2,$a_2 # mul_add_c(a[2],b[2],c2,c3,c1); | ||
679 | - slt $a2,$t_1,$zero | ||
680 | - $ADDU $t_2,$a2 | ||
681 | - $SLL $t_1,1 | ||
682 | - $ADDU $c_2,$t_1 | ||
683 | - sltu $at,$c_2,$t_1 | ||
684 | - $ADDU $t_2,$at | ||
685 | - $ADDU $c_3,$t_2 | ||
686 | - sltu $at,$c_3,$t_2 | ||
687 | - $ADDU $c_1,$at | ||
688 | +___ | ||
689 | + &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,0, | ||
690 | + $a_2,$a_2); # mul_add_c(a[2],b[2],c2,c3,c1); | ||
691 | +$code.=<<___; | ||
692 | mflo $t_1 | ||
693 | mfhi $t_2 | ||
694 | $ADDU $c_2,$t_1 | ||
695 | @@ -2538,21 +2200,10 @@ $code.=<<___; | ||
696 | sltu $at,$c_3,$t_2 | ||
697 | $ADDU $c_1,$at | ||
698 | $ST $c_2,4*$BNSZ($a0) | ||
699 | - | ||
700 | - mflo $t_1 | ||
701 | - mfhi $t_2 | ||
702 | - slt $c_2,$t_2,$zero | ||
703 | - $SLL $t_2,1 | ||
704 | - $MULTU $a_3,$a_3 # mul_add_c(a[3],b[3],c1,c2,c3); | ||
705 | - slt $a2,$t_1,$zero | ||
706 | - $ADDU $t_2,$a2 | ||
707 | - $SLL $t_1,1 | ||
708 | - $ADDU $c_3,$t_1 | ||
709 | - sltu $at,$c_3,$t_1 | ||
710 | - $ADDU $t_2,$at | ||
711 | - $ADDU $c_1,$t_2 | ||
712 | - sltu $at,$c_1,$t_2 | ||
713 | - $ADDU $c_2,$at | ||
714 | +___ | ||
715 | + &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,0, | ||
716 | + $a_3,$a_3); # mul_add_c(a[3],b[3],c1,c2,c3); | ||
717 | +$code.=<<___; | ||
718 | $ST $c_3,5*$BNSZ($a0) | ||
719 | |||
720 | mflo $t_1 | ||
721 | diff --git a/crypto/bn/asm/mips3.s b/crypto/bn/asm/mips3.s | ||
722 | deleted file mode 100644 | ||
723 | index dca4105..0000000 | ||
724 | --- a/crypto/bn/asm/mips3.s | ||
725 | +++ /dev/null | ||
726 | @@ -1,2201 +0,0 @@ | ||
727 | -.rdata | ||
728 | -.asciiz "mips3.s, Version 1.1" | ||
729 | -.asciiz "MIPS III/IV ISA artwork by Andy Polyakov <appro@fy.chalmers.se>" | ||
730 | - | ||
731 | -/* | ||
732 | - * ==================================================================== | ||
733 | - * Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL | ||
734 | - * project. | ||
735 | - * | ||
736 | - * Rights for redistribution and usage in source and binary forms are | ||
737 | - * granted according to the OpenSSL license. Warranty of any kind is | ||
738 | - * disclaimed. | ||
739 | - * ==================================================================== | ||
740 | - */ | ||
741 | - | ||
742 | -/* | ||
743 | - * This is my modest contributon to the OpenSSL project (see | ||
744 | - * http://www.openssl.org/ for more information about it) and is | ||
745 | - * a drop-in MIPS III/IV ISA replacement for crypto/bn/bn_asm.c | ||
746 | - * module. For updates see http://fy.chalmers.se/~appro/hpe/. | ||
747 | - * | ||
748 | - * The module is designed to work with either of the "new" MIPS ABI(5), | ||
749 | - * namely N32 or N64, offered by IRIX 6.x. It's not ment to work under | ||
750 | - * IRIX 5.x not only because it doesn't support new ABIs but also | ||
751 | - * because 5.x kernels put R4x00 CPU into 32-bit mode and all those | ||
752 | - * 64-bit instructions (daddu, dmultu, etc.) found below gonna only | ||
753 | - * cause illegal instruction exception:-( | ||
754 | - * | ||
755 | - * In addition the code depends on preprocessor flags set up by MIPSpro | ||
756 | - * compiler driver (either as or cc) and therefore (probably?) can't be | ||
757 | - * compiled by the GNU assembler. GNU C driver manages fine though... | ||
758 | - * I mean as long as -mmips-as is specified or is the default option, | ||
759 | - * because then it simply invokes /usr/bin/as which in turn takes | ||
760 | - * perfect care of the preprocessor definitions. Another neat feature | ||
761 | - * offered by the MIPSpro assembler is an optimization pass. This gave | ||
762 | - * me the opportunity to have the code looking more regular as all those | ||
763 | - * architecture dependent instruction rescheduling details were left to | ||
764 | - * the assembler. Cool, huh? | ||
765 | - * | ||
766 | - * Performance improvement is astonishing! 'apps/openssl speed rsa dsa' | ||
767 | - * goes way over 3 times faster! | ||
768 | - * | ||
769 | - * <appro@fy.chalmers.se> | ||
770 | - */ | ||
771 | -#include <asm.h> | ||
772 | -#include <regdef.h> | ||
773 | - | ||
774 | -#if _MIPS_ISA>=4 | ||
775 | -#define MOVNZ(cond,dst,src) \ | ||
776 | - movn dst,src,cond | ||
777 | -#else | ||
778 | -#define MOVNZ(cond,dst,src) \ | ||
779 | - .set noreorder; \ | ||
780 | - bnezl cond,.+8; \ | ||
781 | - move dst,src; \ | ||
782 | - .set reorder | ||
783 | -#endif | ||
784 | - | ||
785 | -.text | ||
786 | - | ||
787 | -.set noat | ||
788 | -.set reorder | ||
789 | - | ||
790 | -#define MINUS4 v1 | ||
791 | - | ||
792 | -.align 5 | ||
793 | -LEAF(bn_mul_add_words) | ||
794 | - .set noreorder | ||
795 | - bgtzl a2,.L_bn_mul_add_words_proceed | ||
796 | - ld t0,0(a1) | ||
797 | - jr ra | ||
798 | - move v0,zero | ||
799 | - .set reorder | ||
800 | - | ||
801 | -.L_bn_mul_add_words_proceed: | ||
802 | - li MINUS4,-4 | ||
803 | - and ta0,a2,MINUS4 | ||
804 | - move v0,zero | ||
805 | - beqz ta0,.L_bn_mul_add_words_tail | ||
806 | - | ||
807 | -.L_bn_mul_add_words_loop: | ||
808 | - dmultu t0,a3 | ||
809 | - ld t1,0(a0) | ||
810 | - ld t2,8(a1) | ||
811 | - ld t3,8(a0) | ||
812 | - ld ta0,16(a1) | ||
813 | - ld ta1,16(a0) | ||
814 | - daddu t1,v0 | ||
815 | - sltu v0,t1,v0 /* All manuals say it "compares 32-bit | ||
816 | - * values", but it seems to work fine | ||
817 | - * even on 64-bit registers. */ | ||
818 | - mflo AT | ||
819 | - mfhi t0 | ||
820 | - daddu t1,AT | ||
821 | - daddu v0,t0 | ||
822 | - sltu AT,t1,AT | ||
823 | - sd t1,0(a0) | ||
824 | - daddu v0,AT | ||
825 | - | ||
826 | - dmultu t2,a3 | ||
827 | - ld ta2,24(a1) | ||
828 | - ld ta3,24(a0) | ||
829 | - daddu t3,v0 | ||
830 | - sltu v0,t3,v0 | ||
831 | - mflo AT | ||
832 | - mfhi t2 | ||
833 | - daddu t3,AT | ||
834 | - daddu v0,t2 | ||
835 | - sltu AT,t3,AT | ||
836 | - sd t3,8(a0) | ||
837 | - daddu v0,AT | ||
838 | - | ||
839 | - dmultu ta0,a3 | ||
840 | - subu a2,4 | ||
841 | - PTR_ADD a0,32 | ||
842 | - PTR_ADD a1,32 | ||
843 | - daddu ta1,v0 | ||
844 | - sltu v0,ta1,v0 | ||
845 | - mflo AT | ||
846 | - mfhi ta0 | ||
847 | - daddu ta1,AT | ||
848 | - daddu v0,ta0 | ||
849 | - sltu AT,ta1,AT | ||
850 | - sd ta1,-16(a0) | ||
851 | - daddu v0,AT | ||
852 | - | ||
853 | - | ||
854 | - dmultu ta2,a3 | ||
855 | - and ta0,a2,MINUS4 | ||
856 | - daddu ta3,v0 | ||
857 | - sltu v0,ta3,v0 | ||
858 | - mflo AT | ||
859 | - mfhi ta2 | ||
860 | - daddu ta3,AT | ||
861 | - daddu v0,ta2 | ||
862 | - sltu AT,ta3,AT | ||
863 | - sd ta3,-8(a0) | ||
864 | - daddu v0,AT | ||
865 | - .set noreorder | ||
866 | - bgtzl ta0,.L_bn_mul_add_words_loop | ||
867 | - ld t0,0(a1) | ||
868 | - | ||
869 | - bnezl a2,.L_bn_mul_add_words_tail | ||
870 | - ld t0,0(a1) | ||
871 | - .set reorder | ||
872 | - | ||
873 | -.L_bn_mul_add_words_return: | ||
874 | - jr ra | ||
875 | - | ||
876 | -.L_bn_mul_add_words_tail: | ||
877 | - dmultu t0,a3 | ||
878 | - ld t1,0(a0) | ||
879 | - subu a2,1 | ||
880 | - daddu t1,v0 | ||
881 | - sltu v0,t1,v0 | ||
882 | - mflo AT | ||
883 | - mfhi t0 | ||
884 | - daddu t1,AT | ||
885 | - daddu v0,t0 | ||
886 | - sltu AT,t1,AT | ||
887 | - sd t1,0(a0) | ||
888 | - daddu v0,AT | ||
889 | - beqz a2,.L_bn_mul_add_words_return | ||
890 | - | ||
891 | - ld t0,8(a1) | ||
892 | - dmultu t0,a3 | ||
893 | - ld t1,8(a0) | ||
894 | - subu a2,1 | ||
895 | - daddu t1,v0 | ||
896 | - sltu v0,t1,v0 | ||
897 | - mflo AT | ||
898 | - mfhi t0 | ||
899 | - daddu t1,AT | ||
900 | - daddu v0,t0 | ||
901 | - sltu AT,t1,AT | ||
902 | - sd t1,8(a0) | ||
903 | - daddu v0,AT | ||
904 | - beqz a2,.L_bn_mul_add_words_return | ||
905 | - | ||
906 | - ld t0,16(a1) | ||
907 | - dmultu t0,a3 | ||
908 | - ld t1,16(a0) | ||
909 | - daddu t1,v0 | ||
910 | - sltu v0,t1,v0 | ||
911 | - mflo AT | ||
912 | - mfhi t0 | ||
913 | - daddu t1,AT | ||
914 | - daddu v0,t0 | ||
915 | - sltu AT,t1,AT | ||
916 | - sd t1,16(a0) | ||
917 | - daddu v0,AT | ||
918 | - jr ra | ||
919 | -END(bn_mul_add_words) | ||
920 | - | ||
921 | -.align 5 | ||
922 | -LEAF(bn_mul_words) | ||
923 | - .set noreorder | ||
924 | - bgtzl a2,.L_bn_mul_words_proceed | ||
925 | - ld t0,0(a1) | ||
926 | - jr ra | ||
927 | - move v0,zero | ||
928 | - .set reorder | ||
929 | - | ||
930 | -.L_bn_mul_words_proceed: | ||
931 | - li MINUS4,-4 | ||
932 | - and ta0,a2,MINUS4 | ||
933 | - move v0,zero | ||
934 | - beqz ta0,.L_bn_mul_words_tail | ||
935 | - | ||
936 | -.L_bn_mul_words_loop: | ||
937 | - dmultu t0,a3 | ||
938 | - ld t2,8(a1) | ||
939 | - ld ta0,16(a1) | ||
940 | - ld ta2,24(a1) | ||
941 | - mflo AT | ||
942 | - mfhi t0 | ||
943 | - daddu v0,AT | ||
944 | - sltu t1,v0,AT | ||
945 | - sd v0,0(a0) | ||
946 | - daddu v0,t1,t0 | ||
947 | - | ||
948 | - dmultu t2,a3 | ||
949 | - subu a2,4 | ||
950 | - PTR_ADD a0,32 | ||
951 | - PTR_ADD a1,32 | ||
952 | - mflo AT | ||
953 | - mfhi t2 | ||
954 | - daddu v0,AT | ||
955 | - sltu t3,v0,AT | ||
956 | - sd v0,-24(a0) | ||
957 | - daddu v0,t3,t2 | ||
958 | - | ||
959 | - dmultu ta0,a3 | ||
960 | - mflo AT | ||
961 | - mfhi ta0 | ||
962 | - daddu v0,AT | ||
963 | - sltu ta1,v0,AT | ||
964 | - sd v0,-16(a0) | ||
965 | - daddu v0,ta1,ta0 | ||
966 | - | ||
967 | - | ||
968 | - dmultu ta2,a3 | ||
969 | - and ta0,a2,MINUS4 | ||
970 | - mflo AT | ||
971 | - mfhi ta2 | ||
972 | - daddu v0,AT | ||
973 | - sltu ta3,v0,AT | ||
974 | - sd v0,-8(a0) | ||
975 | - daddu v0,ta3,ta2 | ||
976 | - .set noreorder | ||
977 | - bgtzl ta0,.L_bn_mul_words_loop | ||
978 | - ld t0,0(a1) | ||
979 | - | ||
980 | - bnezl a2,.L_bn_mul_words_tail | ||
981 | - ld t0,0(a1) | ||
982 | - .set reorder | ||
983 | - | ||
984 | -.L_bn_mul_words_return: | ||
985 | - jr ra | ||
986 | - | ||
987 | -.L_bn_mul_words_tail: | ||
988 | - dmultu t0,a3 | ||
989 | - subu a2,1 | ||
990 | - mflo AT | ||
991 | - mfhi t0 | ||
992 | - daddu v0,AT | ||
993 | - sltu t1,v0,AT | ||
994 | - sd v0,0(a0) | ||
995 | - daddu v0,t1,t0 | ||
996 | - beqz a2,.L_bn_mul_words_return | ||
997 | - | ||
998 | - ld t0,8(a1) | ||
999 | - dmultu t0,a3 | ||
1000 | - subu a2,1 | ||
1001 | - mflo AT | ||
1002 | - mfhi t0 | ||
1003 | - daddu v0,AT | ||
1004 | - sltu t1,v0,AT | ||
1005 | - sd v0,8(a0) | ||
1006 | - daddu v0,t1,t0 | ||
1007 | - beqz a2,.L_bn_mul_words_return | ||
1008 | - | ||
1009 | - ld t0,16(a1) | ||
1010 | - dmultu t0,a3 | ||
1011 | - mflo AT | ||
1012 | - mfhi t0 | ||
1013 | - daddu v0,AT | ||
1014 | - sltu t1,v0,AT | ||
1015 | - sd v0,16(a0) | ||
1016 | - daddu v0,t1,t0 | ||
1017 | - jr ra | ||
1018 | -END(bn_mul_words) | ||
1019 | - | ||
1020 | -.align 5 | ||
1021 | -LEAF(bn_sqr_words) | ||
1022 | - .set noreorder | ||
1023 | - bgtzl a2,.L_bn_sqr_words_proceed | ||
1024 | - ld t0,0(a1) | ||
1025 | - jr ra | ||
1026 | - move v0,zero | ||
1027 | - .set reorder | ||
1028 | - | ||
1029 | -.L_bn_sqr_words_proceed: | ||
1030 | - li MINUS4,-4 | ||
1031 | - and ta0,a2,MINUS4 | ||
1032 | - move v0,zero | ||
1033 | - beqz ta0,.L_bn_sqr_words_tail | ||
1034 | - | ||
1035 | -.L_bn_sqr_words_loop: | ||
1036 | - dmultu t0,t0 | ||
1037 | - ld t2,8(a1) | ||
1038 | - ld ta0,16(a1) | ||
1039 | - ld ta2,24(a1) | ||
1040 | - mflo t1 | ||
1041 | - mfhi t0 | ||
1042 | - sd t1,0(a0) | ||
1043 | - sd t0,8(a0) | ||
1044 | - | ||
1045 | - dmultu t2,t2 | ||
1046 | - subu a2,4 | ||
1047 | - PTR_ADD a0,64 | ||
1048 | - PTR_ADD a1,32 | ||
1049 | - mflo t3 | ||
1050 | - mfhi t2 | ||
1051 | - sd t3,-48(a0) | ||
1052 | - sd t2,-40(a0) | ||
1053 | - | ||
1054 | - dmultu ta0,ta0 | ||
1055 | - mflo ta1 | ||
1056 | - mfhi ta0 | ||
1057 | - sd ta1,-32(a0) | ||
1058 | - sd ta0,-24(a0) | ||
1059 | - | ||
1060 | - | ||
1061 | - dmultu ta2,ta2 | ||
1062 | - and ta0,a2,MINUS4 | ||
1063 | - mflo ta3 | ||
1064 | - mfhi ta2 | ||
1065 | - sd ta3,-16(a0) | ||
1066 | - sd ta2,-8(a0) | ||
1067 | - | ||
1068 | - .set noreorder | ||
1069 | - bgtzl ta0,.L_bn_sqr_words_loop | ||
1070 | - ld t0,0(a1) | ||
1071 | - | ||
1072 | - bnezl a2,.L_bn_sqr_words_tail | ||
1073 | - ld t0,0(a1) | ||
1074 | - .set reorder | ||
1075 | - | ||
1076 | -.L_bn_sqr_words_return: | ||
1077 | - move v0,zero | ||
1078 | - jr ra | ||
1079 | - | ||
1080 | -.L_bn_sqr_words_tail: | ||
1081 | - dmultu t0,t0 | ||
1082 | - subu a2,1 | ||
1083 | - mflo t1 | ||
1084 | - mfhi t0 | ||
1085 | - sd t1,0(a0) | ||
1086 | - sd t0,8(a0) | ||
1087 | - beqz a2,.L_bn_sqr_words_return | ||
1088 | - | ||
1089 | - ld t0,8(a1) | ||
1090 | - dmultu t0,t0 | ||
1091 | - subu a2,1 | ||
1092 | - mflo t1 | ||
1093 | - mfhi t0 | ||
1094 | - sd t1,16(a0) | ||
1095 | - sd t0,24(a0) | ||
1096 | - beqz a2,.L_bn_sqr_words_return | ||
1097 | - | ||
1098 | - ld t0,16(a1) | ||
1099 | - dmultu t0,t0 | ||
1100 | - mflo t1 | ||
1101 | - mfhi t0 | ||
1102 | - sd t1,32(a0) | ||
1103 | - sd t0,40(a0) | ||
1104 | - jr ra | ||
1105 | -END(bn_sqr_words) | ||
1106 | - | ||
1107 | -.align 5 | ||
1108 | -LEAF(bn_add_words) | ||
1109 | - .set noreorder | ||
1110 | - bgtzl a3,.L_bn_add_words_proceed | ||
1111 | - ld t0,0(a1) | ||
1112 | - jr ra | ||
1113 | - move v0,zero | ||
1114 | - .set reorder | ||
1115 | - | ||
1116 | -.L_bn_add_words_proceed: | ||
1117 | - li MINUS4,-4 | ||
1118 | - and AT,a3,MINUS4 | ||
1119 | - move v0,zero | ||
1120 | - beqz AT,.L_bn_add_words_tail | ||
1121 | - | ||
1122 | -.L_bn_add_words_loop: | ||
1123 | - ld ta0,0(a2) | ||
1124 | - subu a3,4 | ||
1125 | - ld t1,8(a1) | ||
1126 | - and AT,a3,MINUS4 | ||
1127 | - ld t2,16(a1) | ||
1128 | - PTR_ADD a2,32 | ||
1129 | - ld t3,24(a1) | ||
1130 | - PTR_ADD a0,32 | ||
1131 | - ld ta1,-24(a2) | ||
1132 | - PTR_ADD a1,32 | ||
1133 | - ld ta2,-16(a2) | ||
1134 | - ld ta3,-8(a2) | ||
1135 | - daddu ta0,t0 | ||
1136 | - sltu t8,ta0,t0 | ||
1137 | - daddu t0,ta0,v0 | ||
1138 | - sltu v0,t0,ta0 | ||
1139 | - sd t0,-32(a0) | ||
1140 | - daddu v0,t8 | ||
1141 | - | ||
1142 | - daddu ta1,t1 | ||
1143 | - sltu t9,ta1,t1 | ||
1144 | - daddu t1,ta1,v0 | ||
1145 | - sltu v0,t1,ta1 | ||
1146 | - sd t1,-24(a0) | ||
1147 | - daddu v0,t9 | ||
1148 | - | ||
1149 | - daddu ta2,t2 | ||
1150 | - sltu t8,ta2,t2 | ||
1151 | - daddu t2,ta2,v0 | ||
1152 | - sltu v0,t2,ta2 | ||
1153 | - sd t2,-16(a0) | ||
1154 | - daddu v0,t8 | ||
1155 | - | ||
1156 | - daddu ta3,t3 | ||
1157 | - sltu t9,ta3,t3 | ||
1158 | - daddu t3,ta3,v0 | ||
1159 | - sltu v0,t3,ta3 | ||
1160 | - sd t3,-8(a0) | ||
1161 | - daddu v0,t9 | ||
1162 | - | ||
1163 | - .set noreorder | ||
1164 | - bgtzl AT,.L_bn_add_words_loop | ||
1165 | - ld t0,0(a1) | ||
1166 | - | ||
1167 | - bnezl a3,.L_bn_add_words_tail | ||
1168 | - ld t0,0(a1) | ||
1169 | - .set reorder | ||
1170 | - | ||
1171 | -.L_bn_add_words_return: | ||
1172 | - jr ra | ||
1173 | - | ||
1174 | -.L_bn_add_words_tail: | ||
1175 | - ld ta0,0(a2) | ||
1176 | - daddu ta0,t0 | ||
1177 | - subu a3,1 | ||
1178 | - sltu t8,ta0,t0 | ||
1179 | - daddu t0,ta0,v0 | ||
1180 | - sltu v0,t0,ta0 | ||
1181 | - sd t0,0(a0) | ||
1182 | - daddu v0,t8 | ||
1183 | - beqz a3,.L_bn_add_words_return | ||
1184 | - | ||
1185 | - ld t1,8(a1) | ||
1186 | - ld ta1,8(a2) | ||
1187 | - daddu ta1,t1 | ||
1188 | - subu a3,1 | ||
1189 | - sltu t9,ta1,t1 | ||
1190 | - daddu t1,ta1,v0 | ||
1191 | - sltu v0,t1,ta1 | ||
1192 | - sd t1,8(a0) | ||
1193 | - daddu v0,t9 | ||
1194 | - beqz a3,.L_bn_add_words_return | ||
1195 | - | ||
1196 | - ld t2,16(a1) | ||
1197 | - ld ta2,16(a2) | ||
1198 | - daddu ta2,t2 | ||
1199 | - sltu t8,ta2,t2 | ||
1200 | - daddu t2,ta2,v0 | ||
1201 | - sltu v0,t2,ta2 | ||
1202 | - sd t2,16(a0) | ||
1203 | - daddu v0,t8 | ||
1204 | - jr ra | ||
1205 | -END(bn_add_words) | ||
1206 | - | ||
1207 | -.align 5 | ||
1208 | -LEAF(bn_sub_words) | ||
1209 | - .set noreorder | ||
1210 | - bgtzl a3,.L_bn_sub_words_proceed | ||
1211 | - ld t0,0(a1) | ||
1212 | - jr ra | ||
1213 | - move v0,zero | ||
1214 | - .set reorder | ||
1215 | - | ||
1216 | -.L_bn_sub_words_proceed: | ||
1217 | - li MINUS4,-4 | ||
1218 | - and AT,a3,MINUS4 | ||
1219 | - move v0,zero | ||
1220 | - beqz AT,.L_bn_sub_words_tail | ||
1221 | - | ||
1222 | -.L_bn_sub_words_loop: | ||
1223 | - ld ta0,0(a2) | ||
1224 | - subu a3,4 | ||
1225 | - ld t1,8(a1) | ||
1226 | - and AT,a3,MINUS4 | ||
1227 | - ld t2,16(a1) | ||
1228 | - PTR_ADD a2,32 | ||
1229 | - ld t3,24(a1) | ||
1230 | - PTR_ADD a0,32 | ||
1231 | - ld ta1,-24(a2) | ||
1232 | - PTR_ADD a1,32 | ||
1233 | - ld ta2,-16(a2) | ||
1234 | - ld ta3,-8(a2) | ||
1235 | - sltu t8,t0,ta0 | ||
1236 | - dsubu t0,ta0 | ||
1237 | - dsubu ta0,t0,v0 | ||
1238 | - sd ta0,-32(a0) | ||
1239 | - MOVNZ (t0,v0,t8) | ||
1240 | - | ||
1241 | - sltu t9,t1,ta1 | ||
1242 | - dsubu t1,ta1 | ||
1243 | - dsubu ta1,t1,v0 | ||
1244 | - sd ta1,-24(a0) | ||
1245 | - MOVNZ (t1,v0,t9) | ||
1246 | - | ||
1247 | - | ||
1248 | - sltu t8,t2,ta2 | ||
1249 | - dsubu t2,ta2 | ||
1250 | - dsubu ta2,t2,v0 | ||
1251 | - sd ta2,-16(a0) | ||
1252 | - MOVNZ (t2,v0,t8) | ||
1253 | - | ||
1254 | - sltu t9,t3,ta3 | ||
1255 | - dsubu t3,ta3 | ||
1256 | - dsubu ta3,t3,v0 | ||
1257 | - sd ta3,-8(a0) | ||
1258 | - MOVNZ (t3,v0,t9) | ||
1259 | - | ||
1260 | - .set noreorder | ||
1261 | - bgtzl AT,.L_bn_sub_words_loop | ||
1262 | - ld t0,0(a1) | ||
1263 | - | ||
1264 | - bnezl a3,.L_bn_sub_words_tail | ||
1265 | - ld t0,0(a1) | ||
1266 | - .set reorder | ||
1267 | - | ||
1268 | -.L_bn_sub_words_return: | ||
1269 | - jr ra | ||
1270 | - | ||
1271 | -.L_bn_sub_words_tail: | ||
1272 | - ld ta0,0(a2) | ||
1273 | - subu a3,1 | ||
1274 | - sltu t8,t0,ta0 | ||
1275 | - dsubu t0,ta0 | ||
1276 | - dsubu ta0,t0,v0 | ||
1277 | - MOVNZ (t0,v0,t8) | ||
1278 | - sd ta0,0(a0) | ||
1279 | - beqz a3,.L_bn_sub_words_return | ||
1280 | - | ||
1281 | - ld t1,8(a1) | ||
1282 | - subu a3,1 | ||
1283 | - ld ta1,8(a2) | ||
1284 | - sltu t9,t1,ta1 | ||
1285 | - dsubu t1,ta1 | ||
1286 | - dsubu ta1,t1,v0 | ||
1287 | - MOVNZ (t1,v0,t9) | ||
1288 | - sd ta1,8(a0) | ||
1289 | - beqz a3,.L_bn_sub_words_return | ||
1290 | - | ||
1291 | - ld t2,16(a1) | ||
1292 | - ld ta2,16(a2) | ||
1293 | - sltu t8,t2,ta2 | ||
1294 | - dsubu t2,ta2 | ||
1295 | - dsubu ta2,t2,v0 | ||
1296 | - MOVNZ (t2,v0,t8) | ||
1297 | - sd ta2,16(a0) | ||
1298 | - jr ra | ||
1299 | -END(bn_sub_words) | ||
1300 | - | ||
1301 | -#undef MINUS4 | ||
1302 | - | ||
1303 | -.align 5 | ||
1304 | -LEAF(bn_div_3_words) | ||
1305 | - .set reorder | ||
1306 | - move a3,a0 /* we know that bn_div_words doesn't | ||
1307 | - * touch a3, ta2, ta3 and preserves a2 | ||
1308 | - * so that we can save two arguments | ||
1309 | - * and return address in registers | ||
1310 | - * instead of stack:-) | ||
1311 | - */ | ||
1312 | - ld a0,(a3) | ||
1313 | - move ta2,a1 | ||
1314 | - ld a1,-8(a3) | ||
1315 | - bne a0,a2,.L_bn_div_3_words_proceed | ||
1316 | - li v0,-1 | ||
1317 | - jr ra | ||
1318 | -.L_bn_div_3_words_proceed: | ||
1319 | - move ta3,ra | ||
1320 | - bal bn_div_words | ||
1321 | - move ra,ta3 | ||
1322 | - dmultu ta2,v0 | ||
1323 | - ld t2,-16(a3) | ||
1324 | - move ta0,zero | ||
1325 | - mfhi t1 | ||
1326 | - mflo t0 | ||
1327 | - sltu t8,t1,v1 | ||
1328 | -.L_bn_div_3_words_inner_loop: | ||
1329 | - bnez t8,.L_bn_div_3_words_inner_loop_done | ||
1330 | - sgeu AT,t2,t0 | ||
1331 | - seq t9,t1,v1 | ||
1332 | - and AT,t9 | ||
1333 | - sltu t3,t0,ta2 | ||
1334 | - daddu v1,a2 | ||
1335 | - dsubu t1,t3 | ||
1336 | - dsubu t0,ta2 | ||
1337 | - sltu t8,t1,v1 | ||
1338 | - sltu ta0,v1,a2 | ||
1339 | - or t8,ta0 | ||
1340 | - .set noreorder | ||
1341 | - beqzl AT,.L_bn_div_3_words_inner_loop | ||
1342 | - dsubu v0,1 | ||
1343 | - .set reorder | ||
1344 | -.L_bn_div_3_words_inner_loop_done: | ||
1345 | - jr ra | ||
1346 | -END(bn_div_3_words) | ||
1347 | - | ||
1348 | -.align 5 | ||
1349 | -LEAF(bn_div_words) | ||
1350 | - .set noreorder | ||
1351 | - bnezl a2,.L_bn_div_words_proceed | ||
1352 | - move v1,zero | ||
1353 | - jr ra | ||
1354 | - li v0,-1 /* I'd rather signal div-by-zero | ||
1355 | - * which can be done with 'break 7' */ | ||
1356 | - | ||
1357 | -.L_bn_div_words_proceed: | ||
1358 | - bltz a2,.L_bn_div_words_body | ||
1359 | - move t9,v1 | ||
1360 | - dsll a2,1 | ||
1361 | - bgtz a2,.-4 | ||
1362 | - addu t9,1 | ||
1363 | - | ||
1364 | - .set reorder | ||
1365 | - negu t1,t9 | ||
1366 | - li t2,-1 | ||
1367 | - dsll t2,t1 | ||
1368 | - and t2,a0 | ||
1369 | - dsrl AT,a1,t1 | ||
1370 | - .set noreorder | ||
1371 | - bnezl t2,.+8 | ||
1372 | - break 6 /* signal overflow */ | ||
1373 | - .set reorder | ||
1374 | - dsll a0,t9 | ||
1375 | - dsll a1,t9 | ||
1376 | - or a0,AT | ||
1377 | - | ||
1378 | -#define QT ta0 | ||
1379 | -#define HH ta1 | ||
1380 | -#define DH v1 | ||
1381 | -.L_bn_div_words_body: | ||
1382 | - dsrl DH,a2,32 | ||
1383 | - sgeu AT,a0,a2 | ||
1384 | - .set noreorder | ||
1385 | - bnezl AT,.+8 | ||
1386 | - dsubu a0,a2 | ||
1387 | - .set reorder | ||
1388 | - | ||
1389 | - li QT,-1 | ||
1390 | - dsrl HH,a0,32 | ||
1391 | - dsrl QT,32 /* q=0xffffffff */ | ||
1392 | - beq DH,HH,.L_bn_div_words_skip_div1 | ||
1393 | - ddivu zero,a0,DH | ||
1394 | - mflo QT | ||
1395 | -.L_bn_div_words_skip_div1: | ||
1396 | - dmultu a2,QT | ||
1397 | - dsll t3,a0,32 | ||
1398 | - dsrl AT,a1,32 | ||
1399 | - or t3,AT | ||
1400 | - mflo t0 | ||
1401 | - mfhi t1 | ||
1402 | -.L_bn_div_words_inner_loop1: | ||
1403 | - sltu t2,t3,t0 | ||
1404 | - seq t8,HH,t1 | ||
1405 | - sltu AT,HH,t1 | ||
1406 | - and t2,t8 | ||
1407 | - sltu v0,t0,a2 | ||
1408 | - or AT,t2 | ||
1409 | - .set noreorder | ||
1410 | - beqz AT,.L_bn_div_words_inner_loop1_done | ||
1411 | - dsubu t1,v0 | ||
1412 | - dsubu t0,a2 | ||
1413 | - b .L_bn_div_words_inner_loop1 | ||
1414 | - dsubu QT,1 | ||
1415 | - .set reorder | ||
1416 | -.L_bn_div_words_inner_loop1_done: | ||
1417 | - | ||
1418 | - dsll a1,32 | ||
1419 | - dsubu a0,t3,t0 | ||
1420 | - dsll v0,QT,32 | ||
1421 | - | ||
1422 | - li QT,-1 | ||
1423 | - dsrl HH,a0,32 | ||
1424 | - dsrl QT,32 /* q=0xffffffff */ | ||
1425 | - beq DH,HH,.L_bn_div_words_skip_div2 | ||
1426 | - ddivu zero,a0,DH | ||
1427 | - mflo QT | ||
1428 | -.L_bn_div_words_skip_div2: | ||
1429 | -#undef DH | ||
1430 | - dmultu a2,QT | ||
1431 | - dsll t3,a0,32 | ||
1432 | - dsrl AT,a1,32 | ||
1433 | - or t3,AT | ||
1434 | - mflo t0 | ||
1435 | - mfhi t1 | ||
1436 | -.L_bn_div_words_inner_loop2: | ||
1437 | - sltu t2,t3,t0 | ||
1438 | - seq t8,HH,t1 | ||
1439 | - sltu AT,HH,t1 | ||
1440 | - and t2,t8 | ||
1441 | - sltu v1,t0,a2 | ||
1442 | - or AT,t2 | ||
1443 | - .set noreorder | ||
1444 | - beqz AT,.L_bn_div_words_inner_loop2_done | ||
1445 | - dsubu t1,v1 | ||
1446 | - dsubu t0,a2 | ||
1447 | - b .L_bn_div_words_inner_loop2 | ||
1448 | - dsubu QT,1 | ||
1449 | - .set reorder | ||
1450 | -.L_bn_div_words_inner_loop2_done: | ||
1451 | -#undef HH | ||
1452 | - | ||
1453 | - dsubu a0,t3,t0 | ||
1454 | - or v0,QT | ||
1455 | - dsrl v1,a0,t9 /* v1 contains remainder if anybody wants it */ | ||
1456 | - dsrl a2,t9 /* restore a2 */ | ||
1457 | - jr ra | ||
1458 | -#undef QT | ||
1459 | -END(bn_div_words) | ||
1460 | - | ||
1461 | -#define a_0 t0 | ||
1462 | -#define a_1 t1 | ||
1463 | -#define a_2 t2 | ||
1464 | -#define a_3 t3 | ||
1465 | -#define b_0 ta0 | ||
1466 | -#define b_1 ta1 | ||
1467 | -#define b_2 ta2 | ||
1468 | -#define b_3 ta3 | ||
1469 | - | ||
1470 | -#define a_4 s0 | ||
1471 | -#define a_5 s2 | ||
1472 | -#define a_6 s4 | ||
1473 | -#define a_7 a1 /* once we load a[7] we don't need a anymore */ | ||
1474 | -#define b_4 s1 | ||
1475 | -#define b_5 s3 | ||
1476 | -#define b_6 s5 | ||
1477 | -#define b_7 a2 /* once we load b[7] we don't need b anymore */ | ||
1478 | - | ||
1479 | -#define t_1 t8 | ||
1480 | -#define t_2 t9 | ||
1481 | - | ||
1482 | -#define c_1 v0 | ||
1483 | -#define c_2 v1 | ||
1484 | -#define c_3 a3 | ||
1485 | - | ||
1486 | -#define FRAME_SIZE 48 | ||
1487 | - | ||
1488 | -.align 5 | ||
1489 | -LEAF(bn_mul_comba8) | ||
1490 | - .set noreorder | ||
1491 | - PTR_SUB sp,FRAME_SIZE | ||
1492 | - .frame sp,64,ra | ||
1493 | - .set reorder | ||
1494 | - ld a_0,0(a1) /* If compiled with -mips3 option on | ||
1495 | - * R5000 box assembler barks on this | ||
1496 | - * line with "shouldn't have mult/div | ||
1497 | - * as last instruction in bb (R10K | ||
1498 | - * bug)" warning. If anybody out there | ||
1499 | - * has a clue about how to circumvent | ||
1500 | - * this do send me a note. | ||
1501 | - * <appro@fy.chalmers.se> | ||
1502 | - */ | ||
1503 | - ld b_0,0(a2) | ||
1504 | - ld a_1,8(a1) | ||
1505 | - ld a_2,16(a1) | ||
1506 | - ld a_3,24(a1) | ||
1507 | - ld b_1,8(a2) | ||
1508 | - ld b_2,16(a2) | ||
1509 | - ld b_3,24(a2) | ||
1510 | - dmultu a_0,b_0 /* mul_add_c(a[0],b[0],c1,c2,c3); */ | ||
1511 | - sd s0,0(sp) | ||
1512 | - sd s1,8(sp) | ||
1513 | - sd s2,16(sp) | ||
1514 | - sd s3,24(sp) | ||
1515 | - sd s4,32(sp) | ||
1516 | - sd s5,40(sp) | ||
1517 | - mflo c_1 | ||
1518 | - mfhi c_2 | ||
1519 | - | ||
1520 | - dmultu a_0,b_1 /* mul_add_c(a[0],b[1],c2,c3,c1); */ | ||
1521 | - ld a_4,32(a1) | ||
1522 | - ld a_5,40(a1) | ||
1523 | - ld a_6,48(a1) | ||
1524 | - ld a_7,56(a1) | ||
1525 | - ld b_4,32(a2) | ||
1526 | - ld b_5,40(a2) | ||
1527 | - mflo t_1 | ||
1528 | - mfhi t_2 | ||
1529 | - daddu c_2,t_1 | ||
1530 | - sltu AT,c_2,t_1 | ||
1531 | - daddu c_3,t_2,AT | ||
1532 | - dmultu a_1,b_0 /* mul_add_c(a[1],b[0],c2,c3,c1); */ | ||
1533 | - ld b_6,48(a2) | ||
1534 | - ld b_7,56(a2) | ||
1535 | - sd c_1,0(a0) /* r[0]=c1; */ | ||
1536 | - mflo t_1 | ||
1537 | - mfhi t_2 | ||
1538 | - daddu c_2,t_1 | ||
1539 | - sltu AT,c_2,t_1 | ||
1540 | - daddu t_2,AT | ||
1541 | - daddu c_3,t_2 | ||
1542 | - sltu c_1,c_3,t_2 | ||
1543 | - sd c_2,8(a0) /* r[1]=c2; */ | ||
1544 | - | ||
1545 | - dmultu a_2,b_0 /* mul_add_c(a[2],b[0],c3,c1,c2); */ | ||
1546 | - mflo t_1 | ||
1547 | - mfhi t_2 | ||
1548 | - daddu c_3,t_1 | ||
1549 | - sltu AT,c_3,t_1 | ||
1550 | - daddu t_2,AT | ||
1551 | - daddu c_1,t_2 | ||
1552 | - dmultu a_1,b_1 /* mul_add_c(a[1],b[1],c3,c1,c2); */ | ||
1553 | - mflo t_1 | ||
1554 | - mfhi t_2 | ||
1555 | - daddu c_3,t_1 | ||
1556 | - sltu AT,c_3,t_1 | ||
1557 | - daddu t_2,AT | ||
1558 | - daddu c_1,t_2 | ||
1559 | - sltu c_2,c_1,t_2 | ||
1560 | - dmultu a_0,b_2 /* mul_add_c(a[0],b[2],c3,c1,c2); */ | ||
1561 | - mflo t_1 | ||
1562 | - mfhi t_2 | ||
1563 | - daddu c_3,t_1 | ||
1564 | - sltu AT,c_3,t_1 | ||
1565 | - daddu t_2,AT | ||
1566 | - daddu c_1,t_2 | ||
1567 | - sltu AT,c_1,t_2 | ||
1568 | - daddu c_2,AT | ||
1569 | - sd c_3,16(a0) /* r[2]=c3; */ | ||
1570 | - | ||
1571 | - dmultu a_0,b_3 /* mul_add_c(a[0],b[3],c1,c2,c3); */ | ||
1572 | - mflo t_1 | ||
1573 | - mfhi t_2 | ||
1574 | - daddu c_1,t_1 | ||
1575 | - sltu AT,c_1,t_1 | ||
1576 | - daddu t_2,AT | ||
1577 | - daddu c_2,t_2 | ||
1578 | - sltu c_3,c_2,t_2 | ||
1579 | - dmultu a_1,b_2 /* mul_add_c(a[1],b[2],c1,c2,c3); */ | ||
1580 | - mflo t_1 | ||
1581 | - mfhi t_2 | ||
1582 | - daddu c_1,t_1 | ||
1583 | - sltu AT,c_1,t_1 | ||
1584 | - daddu t_2,AT | ||
1585 | - daddu c_2,t_2 | ||
1586 | - sltu AT,c_2,t_2 | ||
1587 | - daddu c_3,AT | ||
1588 | - dmultu a_2,b_1 /* mul_add_c(a[2],b[1],c1,c2,c3); */ | ||
1589 | - mflo t_1 | ||
1590 | - mfhi t_2 | ||
1591 | - daddu c_1,t_1 | ||
1592 | - sltu AT,c_1,t_1 | ||
1593 | - daddu t_2,AT | ||
1594 | - daddu c_2,t_2 | ||
1595 | - sltu AT,c_2,t_2 | ||
1596 | - daddu c_3,AT | ||
1597 | - dmultu a_3,b_0 /* mul_add_c(a[3],b[0],c1,c2,c3); */ | ||
1598 | - mflo t_1 | ||
1599 | - mfhi t_2 | ||
1600 | - daddu c_1,t_1 | ||
1601 | - sltu AT,c_1,t_1 | ||
1602 | - daddu t_2,AT | ||
1603 | - daddu c_2,t_2 | ||
1604 | - sltu AT,c_2,t_2 | ||
1605 | - daddu c_3,AT | ||
1606 | - sd c_1,24(a0) /* r[3]=c1; */ | ||
1607 | - | ||
1608 | - dmultu a_4,b_0 /* mul_add_c(a[4],b[0],c2,c3,c1); */ | ||
1609 | - mflo t_1 | ||
1610 | - mfhi t_2 | ||
1611 | - daddu c_2,t_1 | ||
1612 | - sltu AT,c_2,t_1 | ||
1613 | - daddu t_2,AT | ||
1614 | - daddu c_3,t_2 | ||
1615 | - sltu c_1,c_3,t_2 | ||
1616 | - dmultu a_3,b_1 /* mul_add_c(a[3],b[1],c2,c3,c1); */ | ||
1617 | - mflo t_1 | ||
1618 | - mfhi t_2 | ||
1619 | - daddu c_2,t_1 | ||
1620 | - sltu AT,c_2,t_1 | ||
1621 | - daddu t_2,AT | ||
1622 | - daddu c_3,t_2 | ||
1623 | - sltu AT,c_3,t_2 | ||
1624 | - daddu c_1,AT | ||
1625 | - dmultu a_2,b_2 /* mul_add_c(a[2],b[2],c2,c3,c1); */ | ||
1626 | - mflo t_1 | ||
1627 | - mfhi t_2 | ||
1628 | - daddu c_2,t_1 | ||
1629 | - sltu AT,c_2,t_1 | ||
1630 | - daddu t_2,AT | ||
1631 | - daddu c_3,t_2 | ||
1632 | - sltu AT,c_3,t_2 | ||
1633 | - daddu c_1,AT | ||
1634 | - dmultu a_1,b_3 /* mul_add_c(a[1],b[3],c2,c3,c1); */ | ||
1635 | - mflo t_1 | ||
1636 | - mfhi t_2 | ||
1637 | - daddu c_2,t_1 | ||
1638 | - sltu AT,c_2,t_1 | ||
1639 | - daddu t_2,AT | ||
1640 | - daddu c_3,t_2 | ||
1641 | - sltu AT,c_3,t_2 | ||
1642 | - daddu c_1,AT | ||
1643 | - dmultu a_0,b_4 /* mul_add_c(a[0],b[4],c2,c3,c1); */ | ||
1644 | - mflo t_1 | ||
1645 | - mfhi t_2 | ||
1646 | - daddu c_2,t_1 | ||
1647 | - sltu AT,c_2,t_1 | ||
1648 | - daddu t_2,AT | ||
1649 | - daddu c_3,t_2 | ||
1650 | - sltu AT,c_3,t_2 | ||
1651 | - daddu c_1,AT | ||
1652 | - sd c_2,32(a0) /* r[4]=c2; */ | ||
1653 | - | ||
1654 | - dmultu a_0,b_5 /* mul_add_c(a[0],b[5],c3,c1,c2); */ | ||
1655 | - mflo t_1 | ||
1656 | - mfhi t_2 | ||
1657 | - daddu c_3,t_1 | ||
1658 | - sltu AT,c_3,t_1 | ||
1659 | - daddu t_2,AT | ||
1660 | - daddu c_1,t_2 | ||
1661 | - sltu c_2,c_1,t_2 | ||
1662 | - dmultu a_1,b_4 /* mul_add_c(a[1],b[4],c3,c1,c2); */ | ||
1663 | - mflo t_1 | ||
1664 | - mfhi t_2 | ||
1665 | - daddu c_3,t_1 | ||
1666 | - sltu AT,c_3,t_1 | ||
1667 | - daddu t_2,AT | ||
1668 | - daddu c_1,t_2 | ||
1669 | - sltu AT,c_1,t_2 | ||
1670 | - daddu c_2,AT | ||
1671 | - dmultu a_2,b_3 /* mul_add_c(a[2],b[3],c3,c1,c2); */ | ||
1672 | - mflo t_1 | ||
1673 | - mfhi t_2 | ||
1674 | - daddu c_3,t_1 | ||
1675 | - sltu AT,c_3,t_1 | ||
1676 | - daddu t_2,AT | ||
1677 | - daddu c_1,t_2 | ||
1678 | - sltu AT,c_1,t_2 | ||
1679 | - daddu c_2,AT | ||
1680 | - dmultu a_3,b_2 /* mul_add_c(a[3],b[2],c3,c1,c2); */ | ||
1681 | - mflo t_1 | ||
1682 | - mfhi t_2 | ||
1683 | - daddu c_3,t_1 | ||
1684 | - sltu AT,c_3,t_1 | ||
1685 | - daddu t_2,AT | ||
1686 | - daddu c_1,t_2 | ||
1687 | - sltu AT,c_1,t_2 | ||
1688 | - daddu c_2,AT | ||
1689 | - dmultu a_4,b_1 /* mul_add_c(a[4],b[1],c3,c1,c2); */ | ||
1690 | - mflo t_1 | ||
1691 | - mfhi t_2 | ||
1692 | - daddu c_3,t_1 | ||
1693 | - sltu AT,c_3,t_1 | ||
1694 | - daddu t_2,AT | ||
1695 | - daddu c_1,t_2 | ||
1696 | - sltu AT,c_1,t_2 | ||
1697 | - daddu c_2,AT | ||
1698 | - dmultu a_5,b_0 /* mul_add_c(a[5],b[0],c3,c1,c2); */ | ||
1699 | - mflo t_1 | ||
1700 | - mfhi t_2 | ||
1701 | - daddu c_3,t_1 | ||
1702 | - sltu AT,c_3,t_1 | ||
1703 | - daddu t_2,AT | ||
1704 | - daddu c_1,t_2 | ||
1705 | - sltu AT,c_1,t_2 | ||
1706 | - daddu c_2,AT | ||
1707 | - sd c_3,40(a0) /* r[5]=c3; */ | ||
1708 | - | ||
1709 | - dmultu a_6,b_0 /* mul_add_c(a[6],b[0],c1,c2,c3); */ | ||
1710 | - mflo t_1 | ||
1711 | - mfhi t_2 | ||
1712 | - daddu c_1,t_1 | ||
1713 | - sltu AT,c_1,t_1 | ||
1714 | - daddu t_2,AT | ||
1715 | - daddu c_2,t_2 | ||
1716 | - sltu c_3,c_2,t_2 | ||
1717 | - dmultu a_5,b_1 /* mul_add_c(a[5],b[1],c1,c2,c3); */ | ||
1718 | - mflo t_1 | ||
1719 | - mfhi t_2 | ||
1720 | - daddu c_1,t_1 | ||
1721 | - sltu AT,c_1,t_1 | ||
1722 | - daddu t_2,AT | ||
1723 | - daddu c_2,t_2 | ||
1724 | - sltu AT,c_2,t_2 | ||
1725 | - daddu c_3,AT | ||
1726 | - dmultu a_4,b_2 /* mul_add_c(a[4],b[2],c1,c2,c3); */ | ||
1727 | - mflo t_1 | ||
1728 | - mfhi t_2 | ||
1729 | - daddu c_1,t_1 | ||
1730 | - sltu AT,c_1,t_1 | ||
1731 | - daddu t_2,AT | ||
1732 | - daddu c_2,t_2 | ||
1733 | - sltu AT,c_2,t_2 | ||
1734 | - daddu c_3,AT | ||
1735 | - dmultu a_3,b_3 /* mul_add_c(a[3],b[3],c1,c2,c3); */ | ||
1736 | - mflo t_1 | ||
1737 | - mfhi t_2 | ||
1738 | - daddu c_1,t_1 | ||
1739 | - sltu AT,c_1,t_1 | ||
1740 | - daddu t_2,AT | ||
1741 | - daddu c_2,t_2 | ||
1742 | - sltu AT,c_2,t_2 | ||
1743 | - daddu c_3,AT | ||
1744 | - dmultu a_2,b_4 /* mul_add_c(a[2],b[4],c1,c2,c3); */ | ||
1745 | - mflo t_1 | ||
1746 | - mfhi t_2 | ||
1747 | - daddu c_1,t_1 | ||
1748 | - sltu AT,c_1,t_1 | ||
1749 | - daddu t_2,AT | ||
1750 | - daddu c_2,t_2 | ||
1751 | - sltu AT,c_2,t_2 | ||
1752 | - daddu c_3,AT | ||
1753 | - dmultu a_1,b_5 /* mul_add_c(a[1],b[5],c1,c2,c3); */ | ||
1754 | - mflo t_1 | ||
1755 | - mfhi t_2 | ||
1756 | - daddu c_1,t_1 | ||
1757 | - sltu AT,c_1,t_1 | ||
1758 | - daddu t_2,AT | ||
1759 | - daddu c_2,t_2 | ||
1760 | - sltu AT,c_2,t_2 | ||
1761 | - daddu c_3,AT | ||
1762 | - dmultu a_0,b_6 /* mul_add_c(a[0],b[6],c1,c2,c3); */ | ||
1763 | - mflo t_1 | ||
1764 | - mfhi t_2 | ||
1765 | - daddu c_1,t_1 | ||
1766 | - sltu AT,c_1,t_1 | ||
1767 | - daddu t_2,AT | ||
1768 | - daddu c_2,t_2 | ||
1769 | - sltu AT,c_2,t_2 | ||
1770 | - daddu c_3,AT | ||
1771 | - sd c_1,48(a0) /* r[6]=c1; */ | ||
1772 | - | ||
1773 | - dmultu a_0,b_7 /* mul_add_c(a[0],b[7],c2,c3,c1); */ | ||
1774 | - mflo t_1 | ||
1775 | - mfhi t_2 | ||
1776 | - daddu c_2,t_1 | ||
1777 | - sltu AT,c_2,t_1 | ||
1778 | - daddu t_2,AT | ||
1779 | - daddu c_3,t_2 | ||
1780 | - sltu c_1,c_3,t_2 | ||
1781 | - dmultu a_1,b_6 /* mul_add_c(a[1],b[6],c2,c3,c1); */ | ||
1782 | - mflo t_1 | ||
1783 | - mfhi t_2 | ||
1784 | - daddu c_2,t_1 | ||
1785 | - sltu AT,c_2,t_1 | ||
1786 | - daddu t_2,AT | ||
1787 | - daddu c_3,t_2 | ||
1788 | - sltu AT,c_3,t_2 | ||
1789 | - daddu c_1,AT | ||
1790 | - dmultu a_2,b_5 /* mul_add_c(a[2],b[5],c2,c3,c1); */ | ||
1791 | - mflo t_1 | ||
1792 | - mfhi t_2 | ||
1793 | - daddu c_2,t_1 | ||
1794 | - sltu AT,c_2,t_1 | ||
1795 | - daddu t_2,AT | ||
1796 | - daddu c_3,t_2 | ||
1797 | - sltu AT,c_3,t_2 | ||
1798 | - daddu c_1,AT | ||
1799 | - dmultu a_3,b_4 /* mul_add_c(a[3],b[4],c2,c3,c1); */ | ||
1800 | - mflo t_1 | ||
1801 | - mfhi t_2 | ||
1802 | - daddu c_2,t_1 | ||
1803 | - sltu AT,c_2,t_1 | ||
1804 | - daddu t_2,AT | ||
1805 | - daddu c_3,t_2 | ||
1806 | - sltu AT,c_3,t_2 | ||
1807 | - daddu c_1,AT | ||
1808 | - dmultu a_4,b_3 /* mul_add_c(a[4],b[3],c2,c3,c1); */ | ||
1809 | - mflo t_1 | ||
1810 | - mfhi t_2 | ||
1811 | - daddu c_2,t_1 | ||
1812 | - sltu AT,c_2,t_1 | ||
1813 | - daddu t_2,AT | ||
1814 | - daddu c_3,t_2 | ||
1815 | - sltu AT,c_3,t_2 | ||
1816 | - daddu c_1,AT | ||
1817 | - dmultu a_5,b_2 /* mul_add_c(a[5],b[2],c2,c3,c1); */ | ||
1818 | - mflo t_1 | ||
1819 | - mfhi t_2 | ||
1820 | - daddu c_2,t_1 | ||
1821 | - sltu AT,c_2,t_1 | ||
1822 | - daddu t_2,AT | ||
1823 | - daddu c_3,t_2 | ||
1824 | - sltu AT,c_3,t_2 | ||
1825 | - daddu c_1,AT | ||
1826 | - dmultu a_6,b_1 /* mul_add_c(a[6],b[1],c2,c3,c1); */ | ||
1827 | - mflo t_1 | ||
1828 | - mfhi t_2 | ||
1829 | - daddu c_2,t_1 | ||
1830 | - sltu AT,c_2,t_1 | ||
1831 | - daddu t_2,AT | ||
1832 | - daddu c_3,t_2 | ||
1833 | - sltu AT,c_3,t_2 | ||
1834 | - daddu c_1,AT | ||
1835 | - dmultu a_7,b_0 /* mul_add_c(a[7],b[0],c2,c3,c1); */ | ||
1836 | - mflo t_1 | ||
1837 | - mfhi t_2 | ||
1838 | - daddu c_2,t_1 | ||
1839 | - sltu AT,c_2,t_1 | ||
1840 | - daddu t_2,AT | ||
1841 | - daddu c_3,t_2 | ||
1842 | - sltu AT,c_3,t_2 | ||
1843 | - daddu c_1,AT | ||
1844 | - sd c_2,56(a0) /* r[7]=c2; */ | ||
1845 | - | ||
1846 | - dmultu a_7,b_1 /* mul_add_c(a[7],b[1],c3,c1,c2); */ | ||
1847 | - mflo t_1 | ||
1848 | - mfhi t_2 | ||
1849 | - daddu c_3,t_1 | ||
1850 | - sltu AT,c_3,t_1 | ||
1851 | - daddu t_2,AT | ||
1852 | - daddu c_1,t_2 | ||
1853 | - sltu c_2,c_1,t_2 | ||
1854 | - dmultu a_6,b_2 /* mul_add_c(a[6],b[2],c3,c1,c2); */ | ||
1855 | - mflo t_1 | ||
1856 | - mfhi t_2 | ||
1857 | - daddu c_3,t_1 | ||
1858 | - sltu AT,c_3,t_1 | ||
1859 | - daddu t_2,AT | ||
1860 | - daddu c_1,t_2 | ||
1861 | - sltu AT,c_1,t_2 | ||
1862 | - daddu c_2,AT | ||
1863 | - dmultu a_5,b_3 /* mul_add_c(a[5],b[3],c3,c1,c2); */ | ||
1864 | - mflo t_1 | ||
1865 | - mfhi t_2 | ||
1866 | - daddu c_3,t_1 | ||
1867 | - sltu AT,c_3,t_1 | ||
1868 | - daddu t_2,AT | ||
1869 | - daddu c_1,t_2 | ||
1870 | - sltu AT,c_1,t_2 | ||
1871 | - daddu c_2,AT | ||
1872 | - dmultu a_4,b_4 /* mul_add_c(a[4],b[4],c3,c1,c2); */ | ||
1873 | - mflo t_1 | ||
1874 | - mfhi t_2 | ||
1875 | - daddu c_3,t_1 | ||
1876 | - sltu AT,c_3,t_1 | ||
1877 | - daddu t_2,AT | ||
1878 | - daddu c_1,t_2 | ||
1879 | - sltu AT,c_1,t_2 | ||
1880 | - daddu c_2,AT | ||
1881 | - dmultu a_3,b_5 /* mul_add_c(a[3],b[5],c3,c1,c2); */ | ||
1882 | - mflo t_1 | ||
1883 | - mfhi t_2 | ||
1884 | - daddu c_3,t_1 | ||
1885 | - sltu AT,c_3,t_1 | ||
1886 | - daddu t_2,AT | ||
1887 | - daddu c_1,t_2 | ||
1888 | - sltu AT,c_1,t_2 | ||
1889 | - daddu c_2,AT | ||
1890 | - dmultu a_2,b_6 /* mul_add_c(a[2],b[6],c3,c1,c2); */ | ||
1891 | - mflo t_1 | ||
1892 | - mfhi t_2 | ||
1893 | - daddu c_3,t_1 | ||
1894 | - sltu AT,c_3,t_1 | ||
1895 | - daddu t_2,AT | ||
1896 | - daddu c_1,t_2 | ||
1897 | - sltu AT,c_1,t_2 | ||
1898 | - daddu c_2,AT | ||
1899 | - dmultu a_1,b_7 /* mul_add_c(a[1],b[7],c3,c1,c2); */ | ||
1900 | - mflo t_1 | ||
1901 | - mfhi t_2 | ||
1902 | - daddu c_3,t_1 | ||
1903 | - sltu AT,c_3,t_1 | ||
1904 | - daddu t_2,AT | ||
1905 | - daddu c_1,t_2 | ||
1906 | - sltu AT,c_1,t_2 | ||
1907 | - daddu c_2,AT | ||
1908 | - sd c_3,64(a0) /* r[8]=c3; */ | ||
1909 | - | ||
1910 | - dmultu a_2,b_7 /* mul_add_c(a[2],b[7],c1,c2,c3); */ | ||
1911 | - mflo t_1 | ||
1912 | - mfhi t_2 | ||
1913 | - daddu c_1,t_1 | ||
1914 | - sltu AT,c_1,t_1 | ||
1915 | - daddu t_2,AT | ||
1916 | - daddu c_2,t_2 | ||
1917 | - sltu c_3,c_2,t_2 | ||
1918 | - dmultu a_3,b_6 /* mul_add_c(a[3],b[6],c1,c2,c3); */ | ||
1919 | - mflo t_1 | ||
1920 | - mfhi t_2 | ||
1921 | - daddu c_1,t_1 | ||
1922 | - sltu AT,c_1,t_1 | ||
1923 | - daddu t_2,AT | ||
1924 | - daddu c_2,t_2 | ||
1925 | - sltu AT,c_2,t_2 | ||
1926 | - daddu c_3,AT | ||
1927 | - dmultu a_4,b_5 /* mul_add_c(a[4],b[5],c1,c2,c3); */ | ||
1928 | - mflo t_1 | ||
1929 | - mfhi t_2 | ||
1930 | - daddu c_1,t_1 | ||
1931 | - sltu AT,c_1,t_1 | ||
1932 | - daddu t_2,AT | ||
1933 | - daddu c_2,t_2 | ||
1934 | - sltu AT,c_2,t_2 | ||
1935 | - daddu c_3,AT | ||
1936 | - dmultu a_5,b_4 /* mul_add_c(a[5],b[4],c1,c2,c3); */ | ||
1937 | - mflo t_1 | ||
1938 | - mfhi t_2 | ||
1939 | - daddu c_1,t_1 | ||
1940 | - sltu AT,c_1,t_1 | ||
1941 | - daddu t_2,AT | ||
1942 | - daddu c_2,t_2 | ||
1943 | - sltu AT,c_2,t_2 | ||
1944 | - daddu c_3,AT | ||
1945 | - dmultu a_6,b_3 /* mul_add_c(a[6],b[3],c1,c2,c3); */ | ||
1946 | - mflo t_1 | ||
1947 | - mfhi t_2 | ||
1948 | - daddu c_1,t_1 | ||
1949 | - sltu AT,c_1,t_1 | ||
1950 | - daddu t_2,AT | ||
1951 | - daddu c_2,t_2 | ||
1952 | - sltu AT,c_2,t_2 | ||
1953 | - daddu c_3,AT | ||
1954 | - dmultu a_7,b_2 /* mul_add_c(a[7],b[2],c1,c2,c3); */ | ||
1955 | - mflo t_1 | ||
1956 | - mfhi t_2 | ||
1957 | - daddu c_1,t_1 | ||
1958 | - sltu AT,c_1,t_1 | ||
1959 | - daddu t_2,AT | ||
1960 | - daddu c_2,t_2 | ||
1961 | - sltu AT,c_2,t_2 | ||
1962 | - daddu c_3,AT | ||
1963 | - sd c_1,72(a0) /* r[9]=c1; */ | ||
1964 | - | ||
1965 | - dmultu a_7,b_3 /* mul_add_c(a[7],b[3],c2,c3,c1); */ | ||
1966 | - mflo t_1 | ||
1967 | - mfhi t_2 | ||
1968 | - daddu c_2,t_1 | ||
1969 | - sltu AT,c_2,t_1 | ||
1970 | - daddu t_2,AT | ||
1971 | - daddu c_3,t_2 | ||
1972 | - sltu c_1,c_3,t_2 | ||
1973 | - dmultu a_6,b_4 /* mul_add_c(a[6],b[4],c2,c3,c1); */ | ||
1974 | - mflo t_1 | ||
1975 | - mfhi t_2 | ||
1976 | - daddu c_2,t_1 | ||
1977 | - sltu AT,c_2,t_1 | ||
1978 | - daddu t_2,AT | ||
1979 | - daddu c_3,t_2 | ||
1980 | - sltu AT,c_3,t_2 | ||
1981 | - daddu c_1,AT | ||
1982 | - dmultu a_5,b_5 /* mul_add_c(a[5],b[5],c2,c3,c1); */ | ||
1983 | - mflo t_1 | ||
1984 | - mfhi t_2 | ||
1985 | - daddu c_2,t_1 | ||
1986 | - sltu AT,c_2,t_1 | ||
1987 | - daddu t_2,AT | ||
1988 | - daddu c_3,t_2 | ||
1989 | - sltu AT,c_3,t_2 | ||
1990 | - daddu c_1,AT | ||
1991 | - dmultu a_4,b_6 /* mul_add_c(a[4],b[6],c2,c3,c1); */ | ||
1992 | - mflo t_1 | ||
1993 | - mfhi t_2 | ||
1994 | - daddu c_2,t_1 | ||
1995 | - sltu AT,c_2,t_1 | ||
1996 | - daddu t_2,AT | ||
1997 | - daddu c_3,t_2 | ||
1998 | - sltu AT,c_3,t_2 | ||
1999 | - daddu c_1,AT | ||
2000 | - dmultu a_3,b_7 /* mul_add_c(a[3],b[7],c2,c3,c1); */ | ||
2001 | - mflo t_1 | ||
2002 | - mfhi t_2 | ||
2003 | - daddu c_2,t_1 | ||
2004 | - sltu AT,c_2,t_1 | ||
2005 | - daddu t_2,AT | ||
2006 | - daddu c_3,t_2 | ||
2007 | - sltu AT,c_3,t_2 | ||
2008 | - daddu c_1,AT | ||
2009 | - sd c_2,80(a0) /* r[10]=c2; */ | ||
2010 | - | ||
2011 | - dmultu a_4,b_7 /* mul_add_c(a[4],b[7],c3,c1,c2); */ | ||
2012 | - mflo t_1 | ||
2013 | - mfhi t_2 | ||
2014 | - daddu c_3,t_1 | ||
2015 | - sltu AT,c_3,t_1 | ||
2016 | - daddu t_2,AT | ||
2017 | - daddu c_1,t_2 | ||
2018 | - sltu c_2,c_1,t_2 | ||
2019 | - dmultu a_5,b_6 /* mul_add_c(a[5],b[6],c3,c1,c2); */ | ||
2020 | - mflo t_1 | ||
2021 | - mfhi t_2 | ||
2022 | - daddu c_3,t_1 | ||
2023 | - sltu AT,c_3,t_1 | ||
2024 | - daddu t_2,AT | ||
2025 | - daddu c_1,t_2 | ||
2026 | - sltu AT,c_1,t_2 | ||
2027 | - daddu c_2,AT | ||
2028 | - dmultu a_6,b_5 /* mul_add_c(a[6],b[5],c3,c1,c2); */ | ||
2029 | - mflo t_1 | ||
2030 | - mfhi t_2 | ||
2031 | - daddu c_3,t_1 | ||
2032 | - sltu AT,c_3,t_1 | ||
2033 | - daddu t_2,AT | ||
2034 | - daddu c_1,t_2 | ||
2035 | - sltu AT,c_1,t_2 | ||
2036 | - daddu c_2,AT | ||
2037 | - dmultu a_7,b_4 /* mul_add_c(a[7],b[4],c3,c1,c2); */ | ||
2038 | - mflo t_1 | ||
2039 | - mfhi t_2 | ||
2040 | - daddu c_3,t_1 | ||
2041 | - sltu AT,c_3,t_1 | ||
2042 | - daddu t_2,AT | ||
2043 | - daddu c_1,t_2 | ||
2044 | - sltu AT,c_1,t_2 | ||
2045 | - daddu c_2,AT | ||
2046 | - sd c_3,88(a0) /* r[11]=c3; */ | ||
2047 | - | ||
2048 | - dmultu a_7,b_5 /* mul_add_c(a[7],b[5],c1,c2,c3); */ | ||
2049 | - mflo t_1 | ||
2050 | - mfhi t_2 | ||
2051 | - daddu c_1,t_1 | ||
2052 | - sltu AT,c_1,t_1 | ||
2053 | - daddu t_2,AT | ||
2054 | - daddu c_2,t_2 | ||
2055 | - sltu c_3,c_2,t_2 | ||
2056 | - dmultu a_6,b_6 /* mul_add_c(a[6],b[6],c1,c2,c3); */ | ||
2057 | - mflo t_1 | ||
2058 | - mfhi t_2 | ||
2059 | - daddu c_1,t_1 | ||
2060 | - sltu AT,c_1,t_1 | ||
2061 | - daddu t_2,AT | ||
2062 | - daddu c_2,t_2 | ||
2063 | - sltu AT,c_2,t_2 | ||
2064 | - daddu c_3,AT | ||
2065 | - dmultu a_5,b_7 /* mul_add_c(a[5],b[7],c1,c2,c3); */ | ||
2066 | - mflo t_1 | ||
2067 | - mfhi t_2 | ||
2068 | - daddu c_1,t_1 | ||
2069 | - sltu AT,c_1,t_1 | ||
2070 | - daddu t_2,AT | ||
2071 | - daddu c_2,t_2 | ||
2072 | - sltu AT,c_2,t_2 | ||
2073 | - daddu c_3,AT | ||
2074 | - sd c_1,96(a0) /* r[12]=c1; */ | ||
2075 | - | ||
2076 | - dmultu a_6,b_7 /* mul_add_c(a[6],b[7],c2,c3,c1); */ | ||
2077 | - mflo t_1 | ||
2078 | - mfhi t_2 | ||
2079 | - daddu c_2,t_1 | ||
2080 | - sltu AT,c_2,t_1 | ||
2081 | - daddu t_2,AT | ||
2082 | - daddu c_3,t_2 | ||
2083 | - sltu c_1,c_3,t_2 | ||
2084 | - dmultu a_7,b_6 /* mul_add_c(a[7],b[6],c2,c3,c1); */ | ||
2085 | - mflo t_1 | ||
2086 | - mfhi t_2 | ||
2087 | - daddu c_2,t_1 | ||
2088 | - sltu AT,c_2,t_1 | ||
2089 | - daddu t_2,AT | ||
2090 | - daddu c_3,t_2 | ||
2091 | - sltu AT,c_3,t_2 | ||
2092 | - daddu c_1,AT | ||
2093 | - sd c_2,104(a0) /* r[13]=c2; */ | ||
2094 | - | ||
2095 | - dmultu a_7,b_7 /* mul_add_c(a[7],b[7],c3,c1,c2); */ | ||
2096 | - ld s0,0(sp) | ||
2097 | - ld s1,8(sp) | ||
2098 | - ld s2,16(sp) | ||
2099 | - ld s3,24(sp) | ||
2100 | - ld s4,32(sp) | ||
2101 | - ld s5,40(sp) | ||
2102 | - mflo t_1 | ||
2103 | - mfhi t_2 | ||
2104 | - daddu c_3,t_1 | ||
2105 | - sltu AT,c_3,t_1 | ||
2106 | - daddu t_2,AT | ||
2107 | - daddu c_1,t_2 | ||
2108 | - sd c_3,112(a0) /* r[14]=c3; */ | ||
2109 | - sd c_1,120(a0) /* r[15]=c1; */ | ||
2110 | - | ||
2111 | - PTR_ADD sp,FRAME_SIZE | ||
2112 | - | ||
2113 | - jr ra | ||
2114 | -END(bn_mul_comba8) | ||
2115 | - | ||
2116 | -.align 5 | ||
2117 | -LEAF(bn_mul_comba4) | ||
2118 | - .set reorder | ||
2119 | - ld a_0,0(a1) | ||
2120 | - ld b_0,0(a2) | ||
2121 | - ld a_1,8(a1) | ||
2122 | - ld a_2,16(a1) | ||
2123 | - dmultu a_0,b_0 /* mul_add_c(a[0],b[0],c1,c2,c3); */ | ||
2124 | - ld a_3,24(a1) | ||
2125 | - ld b_1,8(a2) | ||
2126 | - ld b_2,16(a2) | ||
2127 | - ld b_3,24(a2) | ||
2128 | - mflo c_1 | ||
2129 | - mfhi c_2 | ||
2130 | - sd c_1,0(a0) | ||
2131 | - | ||
2132 | - dmultu a_0,b_1 /* mul_add_c(a[0],b[1],c2,c3,c1); */ | ||
2133 | - mflo t_1 | ||
2134 | - mfhi t_2 | ||
2135 | - daddu c_2,t_1 | ||
2136 | - sltu AT,c_2,t_1 | ||
2137 | - daddu c_3,t_2,AT | ||
2138 | - dmultu a_1,b_0 /* mul_add_c(a[1],b[0],c2,c3,c1); */ | ||
2139 | - mflo t_1 | ||
2140 | - mfhi t_2 | ||
2141 | - daddu c_2,t_1 | ||
2142 | - sltu AT,c_2,t_1 | ||
2143 | - daddu t_2,AT | ||
2144 | - daddu c_3,t_2 | ||
2145 | - sltu c_1,c_3,t_2 | ||
2146 | - sd c_2,8(a0) | ||
2147 | - | ||
2148 | - dmultu a_2,b_0 /* mul_add_c(a[2],b[0],c3,c1,c2); */ | ||
2149 | - mflo t_1 | ||
2150 | - mfhi t_2 | ||
2151 | - daddu c_3,t_1 | ||
2152 | - sltu AT,c_3,t_1 | ||
2153 | - daddu t_2,AT | ||
2154 | - daddu c_1,t_2 | ||
2155 | - dmultu a_1,b_1 /* mul_add_c(a[1],b[1],c3,c1,c2); */ | ||
2156 | - mflo t_1 | ||
2157 | - mfhi t_2 | ||
2158 | - daddu c_3,t_1 | ||
2159 | - sltu AT,c_3,t_1 | ||
2160 | - daddu t_2,AT | ||
2161 | - daddu c_1,t_2 | ||
2162 | - sltu c_2,c_1,t_2 | ||
2163 | - dmultu a_0,b_2 /* mul_add_c(a[0],b[2],c3,c1,c2); */ | ||
2164 | - mflo t_1 | ||
2165 | - mfhi t_2 | ||
2166 | - daddu c_3,t_1 | ||
2167 | - sltu AT,c_3,t_1 | ||
2168 | - daddu t_2,AT | ||
2169 | - daddu c_1,t_2 | ||
2170 | - sltu AT,c_1,t_2 | ||
2171 | - daddu c_2,AT | ||
2172 | - sd c_3,16(a0) | ||
2173 | - | ||
2174 | - dmultu a_0,b_3 /* mul_add_c(a[0],b[3],c1,c2,c3); */ | ||
2175 | - mflo t_1 | ||
2176 | - mfhi t_2 | ||
2177 | - daddu c_1,t_1 | ||
2178 | - sltu AT,c_1,t_1 | ||
2179 | - daddu t_2,AT | ||
2180 | - daddu c_2,t_2 | ||
2181 | - sltu c_3,c_2,t_2 | ||
2182 | - dmultu a_1,b_2 /* mul_add_c(a[1],b[2],c1,c2,c3); */ | ||
2183 | - mflo t_1 | ||
2184 | - mfhi t_2 | ||
2185 | - daddu c_1,t_1 | ||
2186 | - sltu AT,c_1,t_1 | ||
2187 | - daddu t_2,AT | ||
2188 | - daddu c_2,t_2 | ||
2189 | - sltu AT,c_2,t_2 | ||
2190 | - daddu c_3,AT | ||
2191 | - dmultu a_2,b_1 /* mul_add_c(a[2],b[1],c1,c2,c3); */ | ||
2192 | - mflo t_1 | ||
2193 | - mfhi t_2 | ||
2194 | - daddu c_1,t_1 | ||
2195 | - sltu AT,c_1,t_1 | ||
2196 | - daddu t_2,AT | ||
2197 | - daddu c_2,t_2 | ||
2198 | - sltu AT,c_2,t_2 | ||
2199 | - daddu c_3,AT | ||
2200 | - dmultu a_3,b_0 /* mul_add_c(a[3],b[0],c1,c2,c3); */ | ||
2201 | - mflo t_1 | ||
2202 | - mfhi t_2 | ||
2203 | - daddu c_1,t_1 | ||
2204 | - sltu AT,c_1,t_1 | ||
2205 | - daddu t_2,AT | ||
2206 | - daddu c_2,t_2 | ||
2207 | - sltu AT,c_2,t_2 | ||
2208 | - daddu c_3,AT | ||
2209 | - sd c_1,24(a0) | ||
2210 | - | ||
2211 | - dmultu a_3,b_1 /* mul_add_c(a[3],b[1],c2,c3,c1); */ | ||
2212 | - mflo t_1 | ||
2213 | - mfhi t_2 | ||
2214 | - daddu c_2,t_1 | ||
2215 | - sltu AT,c_2,t_1 | ||
2216 | - daddu t_2,AT | ||
2217 | - daddu c_3,t_2 | ||
2218 | - sltu c_1,c_3,t_2 | ||
2219 | - dmultu a_2,b_2 /* mul_add_c(a[2],b[2],c2,c3,c1); */ | ||
2220 | - mflo t_1 | ||
2221 | - mfhi t_2 | ||
2222 | - daddu c_2,t_1 | ||
2223 | - sltu AT,c_2,t_1 | ||
2224 | - daddu t_2,AT | ||
2225 | - daddu c_3,t_2 | ||
2226 | - sltu AT,c_3,t_2 | ||
2227 | - daddu c_1,AT | ||
2228 | - dmultu a_1,b_3 /* mul_add_c(a[1],b[3],c2,c3,c1); */ | ||
2229 | - mflo t_1 | ||
2230 | - mfhi t_2 | ||
2231 | - daddu c_2,t_1 | ||
2232 | - sltu AT,c_2,t_1 | ||
2233 | - daddu t_2,AT | ||
2234 | - daddu c_3,t_2 | ||
2235 | - sltu AT,c_3,t_2 | ||
2236 | - daddu c_1,AT | ||
2237 | - sd c_2,32(a0) | ||
2238 | - | ||
2239 | - dmultu a_2,b_3 /* mul_add_c(a[2],b[3],c3,c1,c2); */ | ||
2240 | - mflo t_1 | ||
2241 | - mfhi t_2 | ||
2242 | - daddu c_3,t_1 | ||
2243 | - sltu AT,c_3,t_1 | ||
2244 | - daddu t_2,AT | ||
2245 | - daddu c_1,t_2 | ||
2246 | - sltu c_2,c_1,t_2 | ||
2247 | - dmultu a_3,b_2 /* mul_add_c(a[3],b[2],c3,c1,c2); */ | ||
2248 | - mflo t_1 | ||
2249 | - mfhi t_2 | ||
2250 | - daddu c_3,t_1 | ||
2251 | - sltu AT,c_3,t_1 | ||
2252 | - daddu t_2,AT | ||
2253 | - daddu c_1,t_2 | ||
2254 | - sltu AT,c_1,t_2 | ||
2255 | - daddu c_2,AT | ||
2256 | - sd c_3,40(a0) | ||
2257 | - | ||
2258 | - dmultu a_3,b_3 /* mul_add_c(a[3],b[3],c1,c2,c3); */ | ||
2259 | - mflo t_1 | ||
2260 | - mfhi t_2 | ||
2261 | - daddu c_1,t_1 | ||
2262 | - sltu AT,c_1,t_1 | ||
2263 | - daddu t_2,AT | ||
2264 | - daddu c_2,t_2 | ||
2265 | - sd c_1,48(a0) | ||
2266 | - sd c_2,56(a0) | ||
2267 | - | ||
2268 | - jr ra | ||
2269 | -END(bn_mul_comba4) | ||
2270 | - | ||
2271 | -#undef a_4 | ||
2272 | -#undef a_5 | ||
2273 | -#undef a_6 | ||
2274 | -#undef a_7 | ||
2275 | -#define a_4 b_0 | ||
2276 | -#define a_5 b_1 | ||
2277 | -#define a_6 b_2 | ||
2278 | -#define a_7 b_3 | ||
2279 | - | ||
2280 | -.align 5 | ||
2281 | -LEAF(bn_sqr_comba8) | ||
2282 | - .set reorder | ||
2283 | - ld a_0,0(a1) | ||
2284 | - ld a_1,8(a1) | ||
2285 | - ld a_2,16(a1) | ||
2286 | - ld a_3,24(a1) | ||
2287 | - | ||
2288 | - dmultu a_0,a_0 /* mul_add_c(a[0],b[0],c1,c2,c3); */ | ||
2289 | - ld a_4,32(a1) | ||
2290 | - ld a_5,40(a1) | ||
2291 | - ld a_6,48(a1) | ||
2292 | - ld a_7,56(a1) | ||
2293 | - mflo c_1 | ||
2294 | - mfhi c_2 | ||
2295 | - sd c_1,0(a0) | ||
2296 | - | ||
2297 | - dmultu a_0,a_1 /* mul_add_c2(a[0],b[1],c2,c3,c1); */ | ||
2298 | - mflo t_1 | ||
2299 | - mfhi t_2 | ||
2300 | - slt c_1,t_2,zero | ||
2301 | - dsll t_2,1 | ||
2302 | - slt a2,t_1,zero | ||
2303 | - daddu t_2,a2 | ||
2304 | - dsll t_1,1 | ||
2305 | - daddu c_2,t_1 | ||
2306 | - sltu AT,c_2,t_1 | ||
2307 | - daddu c_3,t_2,AT | ||
2308 | - sd c_2,8(a0) | ||
2309 | - | ||
2310 | - dmultu a_2,a_0 /* mul_add_c2(a[2],b[0],c3,c1,c2); */ | ||
2311 | - mflo t_1 | ||
2312 | - mfhi t_2 | ||
2313 | - slt c_2,t_2,zero | ||
2314 | - dsll t_2,1 | ||
2315 | - slt a2,t_1,zero | ||
2316 | - daddu t_2,a2 | ||
2317 | - dsll t_1,1 | ||
2318 | - daddu c_3,t_1 | ||
2319 | - sltu AT,c_3,t_1 | ||
2320 | - daddu t_2,AT | ||
2321 | - daddu c_1,t_2 | ||
2322 | - sltu AT,c_1,t_2 | ||
2323 | - daddu c_2,AT | ||
2324 | - dmultu a_1,a_1 /* mul_add_c(a[1],b[1],c3,c1,c2); */ | ||
2325 | - mflo t_1 | ||
2326 | - mfhi t_2 | ||
2327 | - daddu c_3,t_1 | ||
2328 | - sltu AT,c_3,t_1 | ||
2329 | - daddu t_2,AT | ||
2330 | - daddu c_1,t_2 | ||
2331 | - sltu AT,c_1,t_2 | ||
2332 | - daddu c_2,AT | ||
2333 | - sd c_3,16(a0) | ||
2334 | - | ||
2335 | - dmultu a_0,a_3 /* mul_add_c2(a[0],b[3],c1,c2,c3); */ | ||
2336 | - mflo t_1 | ||
2337 | - mfhi t_2 | ||
2338 | - slt c_3,t_2,zero | ||
2339 | - dsll t_2,1 | ||
2340 | - slt a2,t_1,zero | ||
2341 | - daddu t_2,a2 | ||
2342 | - dsll t_1,1 | ||
2343 | - daddu c_1,t_1 | ||
2344 | - sltu AT,c_1,t_1 | ||
2345 | - daddu t_2,AT | ||
2346 | - daddu c_2,t_2 | ||
2347 | - sltu AT,c_2,t_2 | ||
2348 | - daddu c_3,AT | ||
2349 | - dmultu a_1,a_2 /* mul_add_c2(a[1],b[2],c1,c2,c3); */ | ||
2350 | - mflo t_1 | ||
2351 | - mfhi t_2 | ||
2352 | - slt AT,t_2,zero | ||
2353 | - daddu c_3,AT | ||
2354 | - dsll t_2,1 | ||
2355 | - slt a2,t_1,zero | ||
2356 | - daddu t_2,a2 | ||
2357 | - dsll t_1,1 | ||
2358 | - daddu c_1,t_1 | ||
2359 | - sltu AT,c_1,t_1 | ||
2360 | - daddu t_2,AT | ||
2361 | - daddu c_2,t_2 | ||
2362 | - sltu AT,c_2,t_2 | ||
2363 | - daddu c_3,AT | ||
2364 | - sd c_1,24(a0) | ||
2365 | - | ||
2366 | - dmultu a_4,a_0 /* mul_add_c2(a[4],b[0],c2,c3,c1); */ | ||
2367 | - mflo t_1 | ||
2368 | - mfhi t_2 | ||
2369 | - slt c_1,t_2,zero | ||
2370 | - dsll t_2,1 | ||
2371 | - slt a2,t_1,zero | ||
2372 | - daddu t_2,a2 | ||
2373 | - dsll t_1,1 | ||
2374 | - daddu c_2,t_1 | ||
2375 | - sltu AT,c_2,t_1 | ||
2376 | - daddu t_2,AT | ||
2377 | - daddu c_3,t_2 | ||
2378 | - sltu AT,c_3,t_2 | ||
2379 | - daddu c_1,AT | ||
2380 | - dmultu a_3,a_1 /* mul_add_c2(a[3],b[1],c2,c3,c1); */ | ||
2381 | - mflo t_1 | ||
2382 | - mfhi t_2 | ||
2383 | - slt AT,t_2,zero | ||
2384 | - daddu c_1,AT | ||
2385 | - dsll t_2,1 | ||
2386 | - slt a2,t_1,zero | ||
2387 | - daddu t_2,a2 | ||
2388 | - dsll t_1,1 | ||
2389 | - daddu c_2,t_1 | ||
2390 | - sltu AT,c_2,t_1 | ||
2391 | - daddu t_2,AT | ||
2392 | - daddu c_3,t_2 | ||
2393 | - sltu AT,c_3,t_2 | ||
2394 | - daddu c_1,AT | ||
2395 | - dmultu a_2,a_2 /* mul_add_c(a[2],b[2],c2,c3,c1); */ | ||
2396 | - mflo t_1 | ||
2397 | - mfhi t_2 | ||
2398 | - daddu c_2,t_1 | ||
2399 | - sltu AT,c_2,t_1 | ||
2400 | - daddu t_2,AT | ||
2401 | - daddu c_3,t_2 | ||
2402 | - sltu AT,c_3,t_2 | ||
2403 | - daddu c_1,AT | ||
2404 | - sd c_2,32(a0) | ||
2405 | - | ||
2406 | - dmultu a_0,a_5 /* mul_add_c2(a[0],b[5],c3,c1,c2); */ | ||
2407 | - mflo t_1 | ||
2408 | - mfhi t_2 | ||
2409 | - slt c_2,t_2,zero | ||
2410 | - dsll t_2,1 | ||
2411 | - slt a2,t_1,zero | ||
2412 | - daddu t_2,a2 | ||
2413 | - dsll t_1,1 | ||
2414 | - daddu c_3,t_1 | ||
2415 | - sltu AT,c_3,t_1 | ||
2416 | - daddu t_2,AT | ||
2417 | - daddu c_1,t_2 | ||
2418 | - sltu AT,c_1,t_2 | ||
2419 | - daddu c_2,AT | ||
2420 | - dmultu a_1,a_4 /* mul_add_c2(a[1],b[4],c3,c1,c2); */ | ||
2421 | - mflo t_1 | ||
2422 | - mfhi t_2 | ||
2423 | - slt AT,t_2,zero | ||
2424 | - daddu c_2,AT | ||
2425 | - dsll t_2,1 | ||
2426 | - slt a2,t_1,zero | ||
2427 | - daddu t_2,a2 | ||
2428 | - dsll t_1,1 | ||
2429 | - daddu c_3,t_1 | ||
2430 | - sltu AT,c_3,t_1 | ||
2431 | - daddu t_2,AT | ||
2432 | - daddu c_1,t_2 | ||
2433 | - sltu AT,c_1,t_2 | ||
2434 | - daddu c_2,AT | ||
2435 | - dmultu a_2,a_3 /* mul_add_c2(a[2],b[3],c3,c1,c2); */ | ||
2436 | - mflo t_1 | ||
2437 | - mfhi t_2 | ||
2438 | - slt AT,t_2,zero | ||
2439 | - daddu c_2,AT | ||
2440 | - dsll t_2,1 | ||
2441 | - slt a2,t_1,zero | ||
2442 | - daddu t_2,a2 | ||
2443 | - dsll t_1,1 | ||
2444 | - daddu c_3,t_1 | ||
2445 | - sltu AT,c_3,t_1 | ||
2446 | - daddu t_2,AT | ||
2447 | - daddu c_1,t_2 | ||
2448 | - sltu AT,c_1,t_2 | ||
2449 | - daddu c_2,AT | ||
2450 | - sd c_3,40(a0) | ||
2451 | - | ||
2452 | - dmultu a_6,a_0 /* mul_add_c2(a[6],b[0],c1,c2,c3); */ | ||
2453 | - mflo t_1 | ||
2454 | - mfhi t_2 | ||
2455 | - slt c_3,t_2,zero | ||
2456 | - dsll t_2,1 | ||
2457 | - slt a2,t_1,zero | ||
2458 | - daddu t_2,a2 | ||
2459 | - dsll t_1,1 | ||
2460 | - daddu c_1,t_1 | ||
2461 | - sltu AT,c_1,t_1 | ||
2462 | - daddu t_2,AT | ||
2463 | - daddu c_2,t_2 | ||
2464 | - sltu AT,c_2,t_2 | ||
2465 | - daddu c_3,AT | ||
2466 | - dmultu a_5,a_1 /* mul_add_c2(a[5],b[1],c1,c2,c3); */ | ||
2467 | - mflo t_1 | ||
2468 | - mfhi t_2 | ||
2469 | - slt AT,t_2,zero | ||
2470 | - daddu c_3,AT | ||
2471 | - dsll t_2,1 | ||
2472 | - slt a2,t_1,zero | ||
2473 | - daddu t_2,a2 | ||
2474 | - dsll t_1,1 | ||
2475 | - daddu c_1,t_1 | ||
2476 | - sltu AT,c_1,t_1 | ||
2477 | - daddu t_2,AT | ||
2478 | - daddu c_2,t_2 | ||
2479 | - sltu AT,c_2,t_2 | ||
2480 | - daddu c_3,AT | ||
2481 | - dmultu a_4,a_2 /* mul_add_c2(a[4],b[2],c1,c2,c3); */ | ||
2482 | - mflo t_1 | ||
2483 | - mfhi t_2 | ||
2484 | - slt AT,t_2,zero | ||
2485 | - daddu c_3,AT | ||
2486 | - dsll t_2,1 | ||
2487 | - slt a2,t_1,zero | ||
2488 | - daddu t_2,a2 | ||
2489 | - dsll t_1,1 | ||
2490 | - daddu c_1,t_1 | ||
2491 | - sltu AT,c_1,t_1 | ||
2492 | - daddu t_2,AT | ||
2493 | - daddu c_2,t_2 | ||
2494 | - sltu AT,c_2,t_2 | ||
2495 | - daddu c_3,AT | ||
2496 | - dmultu a_3,a_3 /* mul_add_c(a[3],b[3],c1,c2,c3); */ | ||
2497 | - mflo t_1 | ||
2498 | - mfhi t_2 | ||
2499 | - daddu c_1,t_1 | ||
2500 | - sltu AT,c_1,t_1 | ||
2501 | - daddu t_2,AT | ||
2502 | - daddu c_2,t_2 | ||
2503 | - sltu AT,c_2,t_2 | ||
2504 | - daddu c_3,AT | ||
2505 | - sd c_1,48(a0) | ||
2506 | - | ||
2507 | - dmultu a_0,a_7 /* mul_add_c2(a[0],b[7],c2,c3,c1); */ | ||
2508 | - mflo t_1 | ||
2509 | - mfhi t_2 | ||
2510 | - slt c_1,t_2,zero | ||
2511 | - dsll t_2,1 | ||
2512 | - slt a2,t_1,zero | ||
2513 | - daddu t_2,a2 | ||
2514 | - dsll t_1,1 | ||
2515 | - daddu c_2,t_1 | ||
2516 | - sltu AT,c_2,t_1 | ||
2517 | - daddu t_2,AT | ||
2518 | - daddu c_3,t_2 | ||
2519 | - sltu AT,c_3,t_2 | ||
2520 | - daddu c_1,AT | ||
2521 | - dmultu a_1,a_6 /* mul_add_c2(a[1],b[6],c2,c3,c1); */ | ||
2522 | - mflo t_1 | ||
2523 | - mfhi t_2 | ||
2524 | - slt AT,t_2,zero | ||
2525 | - daddu c_1,AT | ||
2526 | - dsll t_2,1 | ||
2527 | - slt a2,t_1,zero | ||
2528 | - daddu t_2,a2 | ||
2529 | - dsll t_1,1 | ||
2530 | - daddu c_2,t_1 | ||
2531 | - sltu AT,c_2,t_1 | ||
2532 | - daddu t_2,AT | ||
2533 | - daddu c_3,t_2 | ||
2534 | - sltu AT,c_3,t_2 | ||
2535 | - daddu c_1,AT | ||
2536 | - dmultu a_2,a_5 /* mul_add_c2(a[2],b[5],c2,c3,c1); */ | ||
2537 | - mflo t_1 | ||
2538 | - mfhi t_2 | ||
2539 | - slt AT,t_2,zero | ||
2540 | - daddu c_1,AT | ||
2541 | - dsll t_2,1 | ||
2542 | - slt a2,t_1,zero | ||
2543 | - daddu t_2,a2 | ||
2544 | - dsll t_1,1 | ||
2545 | - daddu c_2,t_1 | ||
2546 | - sltu AT,c_2,t_1 | ||
2547 | - daddu t_2,AT | ||
2548 | - daddu c_3,t_2 | ||
2549 | - sltu AT,c_3,t_2 | ||
2550 | - daddu c_1,AT | ||
2551 | - dmultu a_3,a_4 /* mul_add_c2(a[3],b[4],c2,c3,c1); */ | ||
2552 | - mflo t_1 | ||
2553 | - mfhi t_2 | ||
2554 | - slt AT,t_2,zero | ||
2555 | - daddu c_1,AT | ||
2556 | - dsll t_2,1 | ||
2557 | - slt a2,t_1,zero | ||
2558 | - daddu t_2,a2 | ||
2559 | - dsll t_1,1 | ||
2560 | - daddu c_2,t_1 | ||
2561 | - sltu AT,c_2,t_1 | ||
2562 | - daddu t_2,AT | ||
2563 | - daddu c_3,t_2 | ||
2564 | - sltu AT,c_3,t_2 | ||
2565 | - daddu c_1,AT | ||
2566 | - sd c_2,56(a0) | ||
2567 | - | ||
2568 | - dmultu a_7,a_1 /* mul_add_c2(a[7],b[1],c3,c1,c2); */ | ||
2569 | - mflo t_1 | ||
2570 | - mfhi t_2 | ||
2571 | - slt c_2,t_2,zero | ||
2572 | - dsll t_2,1 | ||
2573 | - slt a2,t_1,zero | ||
2574 | - daddu t_2,a2 | ||
2575 | - dsll t_1,1 | ||
2576 | - daddu c_3,t_1 | ||
2577 | - sltu AT,c_3,t_1 | ||
2578 | - daddu t_2,AT | ||
2579 | - daddu c_1,t_2 | ||
2580 | - sltu AT,c_1,t_2 | ||
2581 | - daddu c_2,AT | ||
2582 | - dmultu a_6,a_2 /* mul_add_c2(a[6],b[2],c3,c1,c2); */ | ||
2583 | - mflo t_1 | ||
2584 | - mfhi t_2 | ||
2585 | - slt AT,t_2,zero | ||
2586 | - daddu c_2,AT | ||
2587 | - dsll t_2,1 | ||
2588 | - slt a2,t_1,zero | ||
2589 | - daddu t_2,a2 | ||
2590 | - dsll t_1,1 | ||
2591 | - daddu c_3,t_1 | ||
2592 | - sltu AT,c_3,t_1 | ||
2593 | - daddu t_2,AT | ||
2594 | - daddu c_1,t_2 | ||
2595 | - sltu AT,c_1,t_2 | ||
2596 | - daddu c_2,AT | ||
2597 | - dmultu a_5,a_3 /* mul_add_c2(a[5],b[3],c3,c1,c2); */ | ||
2598 | - mflo t_1 | ||
2599 | - mfhi t_2 | ||
2600 | - slt AT,t_2,zero | ||
2601 | - daddu c_2,AT | ||
2602 | - dsll t_2,1 | ||
2603 | - slt a2,t_1,zero | ||
2604 | - daddu t_2,a2 | ||
2605 | - dsll t_1,1 | ||
2606 | - daddu c_3,t_1 | ||
2607 | - sltu AT,c_3,t_1 | ||
2608 | - daddu t_2,AT | ||
2609 | - daddu c_1,t_2 | ||
2610 | - sltu AT,c_1,t_2 | ||
2611 | - daddu c_2,AT | ||
2612 | - dmultu a_4,a_4 /* mul_add_c(a[4],b[4],c3,c1,c2); */ | ||
2613 | - mflo t_1 | ||
2614 | - mfhi t_2 | ||
2615 | - daddu c_3,t_1 | ||
2616 | - sltu AT,c_3,t_1 | ||
2617 | - daddu t_2,AT | ||
2618 | - daddu c_1,t_2 | ||
2619 | - sltu AT,c_1,t_2 | ||
2620 | - daddu c_2,AT | ||
2621 | - sd c_3,64(a0) | ||
2622 | - | ||
2623 | - dmultu a_2,a_7 /* mul_add_c2(a[2],b[7],c1,c2,c3); */ | ||
2624 | - mflo t_1 | ||
2625 | - mfhi t_2 | ||
2626 | - slt c_3,t_2,zero | ||
2627 | - dsll t_2,1 | ||
2628 | - slt a2,t_1,zero | ||
2629 | - daddu t_2,a2 | ||
2630 | - dsll t_1,1 | ||
2631 | - daddu c_1,t_1 | ||
2632 | - sltu AT,c_1,t_1 | ||
2633 | - daddu t_2,AT | ||
2634 | - daddu c_2,t_2 | ||
2635 | - sltu AT,c_2,t_2 | ||
2636 | - daddu c_3,AT | ||
2637 | - dmultu a_3,a_6 /* mul_add_c2(a[3],b[6],c1,c2,c3); */ | ||
2638 | - mflo t_1 | ||
2639 | - mfhi t_2 | ||
2640 | - slt AT,t_2,zero | ||
2641 | - daddu c_3,AT | ||
2642 | - dsll t_2,1 | ||
2643 | - slt a2,t_1,zero | ||
2644 | - daddu t_2,a2 | ||
2645 | - dsll t_1,1 | ||
2646 | - daddu c_1,t_1 | ||
2647 | - sltu AT,c_1,t_1 | ||
2648 | - daddu t_2,AT | ||
2649 | - daddu c_2,t_2 | ||
2650 | - sltu AT,c_2,t_2 | ||
2651 | - daddu c_3,AT | ||
2652 | - dmultu a_4,a_5 /* mul_add_c2(a[4],b[5],c1,c2,c3); */ | ||
2653 | - mflo t_1 | ||
2654 | - mfhi t_2 | ||
2655 | - slt AT,t_2,zero | ||
2656 | - daddu c_3,AT | ||
2657 | - dsll t_2,1 | ||
2658 | - slt a2,t_1,zero | ||
2659 | - daddu t_2,a2 | ||
2660 | - dsll t_1,1 | ||
2661 | - daddu c_1,t_1 | ||
2662 | - sltu AT,c_1,t_1 | ||
2663 | - daddu t_2,AT | ||
2664 | - daddu c_2,t_2 | ||
2665 | - sltu AT,c_2,t_2 | ||
2666 | - daddu c_3,AT | ||
2667 | - sd c_1,72(a0) | ||
2668 | - | ||
2669 | - dmultu a_7,a_3 /* mul_add_c2(a[7],b[3],c2,c3,c1); */ | ||
2670 | - mflo t_1 | ||
2671 | - mfhi t_2 | ||
2672 | - slt c_1,t_2,zero | ||
2673 | - dsll t_2,1 | ||
2674 | - slt a2,t_1,zero | ||
2675 | - daddu t_2,a2 | ||
2676 | - dsll t_1,1 | ||
2677 | - daddu c_2,t_1 | ||
2678 | - sltu AT,c_2,t_1 | ||
2679 | - daddu t_2,AT | ||
2680 | - daddu c_3,t_2 | ||
2681 | - sltu AT,c_3,t_2 | ||
2682 | - daddu c_1,AT | ||
2683 | - dmultu a_6,a_4 /* mul_add_c2(a[6],b[4],c2,c3,c1); */ | ||
2684 | - mflo t_1 | ||
2685 | - mfhi t_2 | ||
2686 | - slt AT,t_2,zero | ||
2687 | - daddu c_1,AT | ||
2688 | - dsll t_2,1 | ||
2689 | - slt a2,t_1,zero | ||
2690 | - daddu t_2,a2 | ||
2691 | - dsll t_1,1 | ||
2692 | - daddu c_2,t_1 | ||
2693 | - sltu AT,c_2,t_1 | ||
2694 | - daddu t_2,AT | ||
2695 | - daddu c_3,t_2 | ||
2696 | - sltu AT,c_3,t_2 | ||
2697 | - daddu c_1,AT | ||
2698 | - dmultu a_5,a_5 /* mul_add_c(a[5],b[5],c2,c3,c1); */ | ||
2699 | - mflo t_1 | ||
2700 | - mfhi t_2 | ||
2701 | - daddu c_2,t_1 | ||
2702 | - sltu AT,c_2,t_1 | ||
2703 | - daddu t_2,AT | ||
2704 | - daddu c_3,t_2 | ||
2705 | - sltu AT,c_3,t_2 | ||
2706 | - daddu c_1,AT | ||
2707 | - sd c_2,80(a0) | ||
2708 | - | ||
2709 | - dmultu a_4,a_7 /* mul_add_c2(a[4],b[7],c3,c1,c2); */ | ||
2710 | - mflo t_1 | ||
2711 | - mfhi t_2 | ||
2712 | - slt c_2,t_2,zero | ||
2713 | - dsll t_2,1 | ||
2714 | - slt a2,t_1,zero | ||
2715 | - daddu t_2,a2 | ||
2716 | - dsll t_1,1 | ||
2717 | - daddu c_3,t_1 | ||
2718 | - sltu AT,c_3,t_1 | ||
2719 | - daddu t_2,AT | ||
2720 | - daddu c_1,t_2 | ||
2721 | - sltu AT,c_1,t_2 | ||
2722 | - daddu c_2,AT | ||
2723 | - dmultu a_5,a_6 /* mul_add_c2(a[5],b[6],c3,c1,c2); */ | ||
2724 | - mflo t_1 | ||
2725 | - mfhi t_2 | ||
2726 | - slt AT,t_2,zero | ||
2727 | - daddu c_2,AT | ||
2728 | - dsll t_2,1 | ||
2729 | - slt a2,t_1,zero | ||
2730 | - daddu t_2,a2 | ||
2731 | - dsll t_1,1 | ||
2732 | - daddu c_3,t_1 | ||
2733 | - sltu AT,c_3,t_1 | ||
2734 | - daddu t_2,AT | ||
2735 | - daddu c_1,t_2 | ||
2736 | - sltu AT,c_1,t_2 | ||
2737 | - daddu c_2,AT | ||
2738 | - sd c_3,88(a0) | ||
2739 | - | ||
2740 | - dmultu a_7,a_5 /* mul_add_c2(a[7],b[5],c1,c2,c3); */ | ||
2741 | - mflo t_1 | ||
2742 | - mfhi t_2 | ||
2743 | - slt c_3,t_2,zero | ||
2744 | - dsll t_2,1 | ||
2745 | - slt a2,t_1,zero | ||
2746 | - daddu t_2,a2 | ||
2747 | - dsll t_1,1 | ||
2748 | - daddu c_1,t_1 | ||
2749 | - sltu AT,c_1,t_1 | ||
2750 | - daddu t_2,AT | ||
2751 | - daddu c_2,t_2 | ||
2752 | - sltu AT,c_2,t_2 | ||
2753 | - daddu c_3,AT | ||
2754 | - dmultu a_6,a_6 /* mul_add_c(a[6],b[6],c1,c2,c3); */ | ||
2755 | - mflo t_1 | ||
2756 | - mfhi t_2 | ||
2757 | - daddu c_1,t_1 | ||
2758 | - sltu AT,c_1,t_1 | ||
2759 | - daddu t_2,AT | ||
2760 | - daddu c_2,t_2 | ||
2761 | - sltu AT,c_2,t_2 | ||
2762 | - daddu c_3,AT | ||
2763 | - sd c_1,96(a0) | ||
2764 | - | ||
2765 | - dmultu a_6,a_7 /* mul_add_c2(a[6],b[7],c2,c3,c1); */ | ||
2766 | - mflo t_1 | ||
2767 | - mfhi t_2 | ||
2768 | - slt c_1,t_2,zero | ||
2769 | - dsll t_2,1 | ||
2770 | - slt a2,t_1,zero | ||
2771 | - daddu t_2,a2 | ||
2772 | - dsll t_1,1 | ||
2773 | - daddu c_2,t_1 | ||
2774 | - sltu AT,c_2,t_1 | ||
2775 | - daddu t_2,AT | ||
2776 | - daddu c_3,t_2 | ||
2777 | - sltu AT,c_3,t_2 | ||
2778 | - daddu c_1,AT | ||
2779 | - sd c_2,104(a0) | ||
2780 | - | ||
2781 | - dmultu a_7,a_7 /* mul_add_c(a[7],b[7],c3,c1,c2); */ | ||
2782 | - mflo t_1 | ||
2783 | - mfhi t_2 | ||
2784 | - daddu c_3,t_1 | ||
2785 | - sltu AT,c_3,t_1 | ||
2786 | - daddu t_2,AT | ||
2787 | - daddu c_1,t_2 | ||
2788 | - sd c_3,112(a0) | ||
2789 | - sd c_1,120(a0) | ||
2790 | - | ||
2791 | - jr ra | ||
2792 | -END(bn_sqr_comba8) | ||
2793 | - | ||
2794 | -.align 5 | ||
2795 | -LEAF(bn_sqr_comba4) | ||
2796 | - .set reorder | ||
2797 | - ld a_0,0(a1) | ||
2798 | - ld a_1,8(a1) | ||
2799 | - ld a_2,16(a1) | ||
2800 | - ld a_3,24(a1) | ||
2801 | - dmultu a_0,a_0 /* mul_add_c(a[0],b[0],c1,c2,c3); */ | ||
2802 | - mflo c_1 | ||
2803 | - mfhi c_2 | ||
2804 | - sd c_1,0(a0) | ||
2805 | - | ||
2806 | - dmultu a_0,a_1 /* mul_add_c2(a[0],b[1],c2,c3,c1); */ | ||
2807 | - mflo t_1 | ||
2808 | - mfhi t_2 | ||
2809 | - slt c_1,t_2,zero | ||
2810 | - dsll t_2,1 | ||
2811 | - slt a2,t_1,zero | ||
2812 | - daddu t_2,a2 | ||
2813 | - dsll t_1,1 | ||
2814 | - daddu c_2,t_1 | ||
2815 | - sltu AT,c_2,t_1 | ||
2816 | - daddu c_3,t_2,AT | ||
2817 | - sd c_2,8(a0) | ||
2818 | - | ||
2819 | - dmultu a_2,a_0 /* mul_add_c2(a[2],b[0],c3,c1,c2); */ | ||
2820 | - mflo t_1 | ||
2821 | - mfhi t_2 | ||
2822 | - slt c_2,t_2,zero | ||
2823 | - dsll t_2,1 | ||
2824 | - slt a2,t_1,zero | ||
2825 | - daddu t_2,a2 | ||
2826 | - dsll t_1,1 | ||
2827 | - daddu c_3,t_1 | ||
2828 | - sltu AT,c_3,t_1 | ||
2829 | - daddu t_2,AT | ||
2830 | - daddu c_1,t_2 | ||
2831 | - sltu AT,c_1,t_2 | ||
2832 | - daddu c_2,AT | ||
2833 | - dmultu a_1,a_1 /* mul_add_c(a[1],b[1],c3,c1,c2); */ | ||
2834 | - mflo t_1 | ||
2835 | - mfhi t_2 | ||
2836 | - daddu c_3,t_1 | ||
2837 | - sltu AT,c_3,t_1 | ||
2838 | - daddu t_2,AT | ||
2839 | - daddu c_1,t_2 | ||
2840 | - sltu AT,c_1,t_2 | ||
2841 | - daddu c_2,AT | ||
2842 | - sd c_3,16(a0) | ||
2843 | - | ||
2844 | - dmultu a_0,a_3 /* mul_add_c2(a[0],b[3],c1,c2,c3); */ | ||
2845 | - mflo t_1 | ||
2846 | - mfhi t_2 | ||
2847 | - slt c_3,t_2,zero | ||
2848 | - dsll t_2,1 | ||
2849 | - slt a2,t_1,zero | ||
2850 | - daddu t_2,a2 | ||
2851 | - dsll t_1,1 | ||
2852 | - daddu c_1,t_1 | ||
2853 | - sltu AT,c_1,t_1 | ||
2854 | - daddu t_2,AT | ||
2855 | - daddu c_2,t_2 | ||
2856 | - sltu AT,c_2,t_2 | ||
2857 | - daddu c_3,AT | ||
2858 | - dmultu a_1,a_2 /* mul_add_c(a2[1],b[2],c1,c2,c3); */ | ||
2859 | - mflo t_1 | ||
2860 | - mfhi t_2 | ||
2861 | - slt AT,t_2,zero | ||
2862 | - daddu c_3,AT | ||
2863 | - dsll t_2,1 | ||
2864 | - slt a2,t_1,zero | ||
2865 | - daddu t_2,a2 | ||
2866 | - dsll t_1,1 | ||
2867 | - daddu c_1,t_1 | ||
2868 | - sltu AT,c_1,t_1 | ||
2869 | - daddu t_2,AT | ||
2870 | - daddu c_2,t_2 | ||
2871 | - sltu AT,c_2,t_2 | ||
2872 | - daddu c_3,AT | ||
2873 | - sd c_1,24(a0) | ||
2874 | - | ||
2875 | - dmultu a_3,a_1 /* mul_add_c2(a[3],b[1],c2,c3,c1); */ | ||
2876 | - mflo t_1 | ||
2877 | - mfhi t_2 | ||
2878 | - slt c_1,t_2,zero | ||
2879 | - dsll t_2,1 | ||
2880 | - slt a2,t_1,zero | ||
2881 | - daddu t_2,a2 | ||
2882 | - dsll t_1,1 | ||
2883 | - daddu c_2,t_1 | ||
2884 | - sltu AT,c_2,t_1 | ||
2885 | - daddu t_2,AT | ||
2886 | - daddu c_3,t_2 | ||
2887 | - sltu AT,c_3,t_2 | ||
2888 | - daddu c_1,AT | ||
2889 | - dmultu a_2,a_2 /* mul_add_c(a[2],b[2],c2,c3,c1); */ | ||
2890 | - mflo t_1 | ||
2891 | - mfhi t_2 | ||
2892 | - daddu c_2,t_1 | ||
2893 | - sltu AT,c_2,t_1 | ||
2894 | - daddu t_2,AT | ||
2895 | - daddu c_3,t_2 | ||
2896 | - sltu AT,c_3,t_2 | ||
2897 | - daddu c_1,AT | ||
2898 | - sd c_2,32(a0) | ||
2899 | - | ||
2900 | - dmultu a_2,a_3 /* mul_add_c2(a[2],b[3],c3,c1,c2); */ | ||
2901 | - mflo t_1 | ||
2902 | - mfhi t_2 | ||
2903 | - slt c_2,t_2,zero | ||
2904 | - dsll t_2,1 | ||
2905 | - slt a2,t_1,zero | ||
2906 | - daddu t_2,a2 | ||
2907 | - dsll t_1,1 | ||
2908 | - daddu c_3,t_1 | ||
2909 | - sltu AT,c_3,t_1 | ||
2910 | - daddu t_2,AT | ||
2911 | - daddu c_1,t_2 | ||
2912 | - sltu AT,c_1,t_2 | ||
2913 | - daddu c_2,AT | ||
2914 | - sd c_3,40(a0) | ||
2915 | - | ||
2916 | - dmultu a_3,a_3 /* mul_add_c(a[3],b[3],c1,c2,c3); */ | ||
2917 | - mflo t_1 | ||
2918 | - mfhi t_2 | ||
2919 | - daddu c_1,t_1 | ||
2920 | - sltu AT,c_1,t_1 | ||
2921 | - daddu t_2,AT | ||
2922 | - daddu c_2,t_2 | ||
2923 | - sd c_1,48(a0) | ||
2924 | - sd c_2,56(a0) | ||
2925 | - | ||
2926 | - jr ra | ||
2927 | -END(bn_sqr_comba4) | ||
2928 | diff --git a/crypto/bn/asm/x86_64-gcc.c b/crypto/bn/asm/x86_64-gcc.c | ||
2929 | index 31476ab..2d39407 100644 | ||
2930 | --- a/crypto/bn/asm/x86_64-gcc.c | ||
2931 | +++ b/crypto/bn/asm/x86_64-gcc.c | ||
2932 | @@ -273,6 +273,10 @@ BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n) | ||
2933 | /* sqr_add_c(a,i,c0,c1,c2) -- c+=a[i]^2 for three word number c=(c2,c1,c0) */ | ||
2934 | /* sqr_add_c2(a,i,c0,c1,c2) -- c+=2*a[i]*a[j] for three word number c=(c2,c1,c0) */ | ||
2935 | |||
2936 | +/* | ||
2937 | + * Keep in mind that carrying into high part of multiplication result | ||
2938 | + * can not overflow, because it cannot be all-ones. | ||
2939 | + */ | ||
2940 | #if 0 | ||
2941 | /* original macros are kept for reference purposes */ | ||
2942 | #define mul_add_c(a,b,c0,c1,c2) { \ | ||
2943 | @@ -287,10 +291,10 @@ BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n) | ||
2944 | BN_ULONG ta=(a),tb=(b),t0; \ | ||
2945 | t1 = BN_UMULT_HIGH(ta,tb); \ | ||
2946 | t0 = ta * tb; \ | ||
2947 | - t2 = t1+t1; c2 += (t2<t1)?1:0; \ | ||
2948 | - t1 = t0+t0; t2 += (t1<t0)?1:0; \ | ||
2949 | - c0 += t1; t2 += (c0<t1)?1:0; \ | ||
2950 | + c0 += t0; t2 = t1+((c0<t0)?1:0);\ | ||
2951 | c1 += t2; c2 += (c1<t2)?1:0; \ | ||
2952 | + c0 += t0; t1 += (c0<t0)?1:0; \ | ||
2953 | + c1 += t1; c2 += (c1<t1)?1:0; \ | ||
2954 | } | ||
2955 | #else | ||
2956 | #define mul_add_c(a,b,c0,c1,c2) do { \ | ||
2957 | @@ -328,22 +332,14 @@ BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n) | ||
2958 | : "=a"(t1),"=d"(t2) \ | ||
2959 | : "a"(a),"m"(b) \ | ||
2960 | : "cc"); \ | ||
2961 | - asm ("addq %0,%0; adcq %2,%1" \ | ||
2962 | - : "+d"(t2),"+r"(c2) \ | ||
2963 | - : "g"(0) \ | ||
2964 | - : "cc"); \ | ||
2965 | - asm ("addq %0,%0; adcq %2,%1" \ | ||
2966 | - : "+a"(t1),"+d"(t2) \ | ||
2967 | - : "g"(0) \ | ||
2968 | - : "cc"); \ | ||
2969 | - asm ("addq %2,%0; adcq %3,%1" \ | ||
2970 | - : "+r"(c0),"+d"(t2) \ | ||
2971 | - : "a"(t1),"g"(0) \ | ||
2972 | - : "cc"); \ | ||
2973 | - asm ("addq %2,%0; adcq %3,%1" \ | ||
2974 | - : "+r"(c1),"+r"(c2) \ | ||
2975 | - : "d"(t2),"g"(0) \ | ||
2976 | - : "cc"); \ | ||
2977 | + asm ("addq %3,%0; adcq %4,%1; adcq %5,%2" \ | ||
2978 | + : "+r"(c0),"+r"(c1),"+r"(c2) \ | ||
2979 | + : "r"(t1),"r"(t2),"g"(0) \ | ||
2980 | + : "cc"); \ | ||
2981 | + asm ("addq %3,%0; adcq %4,%1; adcq %5,%2" \ | ||
2982 | + : "+r"(c0),"+r"(c1),"+r"(c2) \ | ||
2983 | + : "r"(t1),"r"(t2),"g"(0) \ | ||
2984 | + : "cc"); \ | ||
2985 | } while (0) | ||
2986 | #endif | ||
2987 | |||
2988 | diff --git a/crypto/bn/bn_asm.c b/crypto/bn/bn_asm.c | ||
2989 | index c43c91c..a33b634 100644 | ||
2990 | --- a/crypto/bn/bn_asm.c | ||
2991 | +++ b/crypto/bn/bn_asm.c | ||
2992 | @@ -438,6 +438,10 @@ BN_ULONG bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n) | ||
2993 | /* sqr_add_c(a,i,c0,c1,c2) -- c+=a[i]^2 for three word number c=(c2,c1,c0) */ | ||
2994 | /* sqr_add_c2(a,i,c0,c1,c2) -- c+=2*a[i]*a[j] for three word number c=(c2,c1,c0) */ | ||
2995 | |||
2996 | +/* | ||
2997 | + * Keep in mind that carrying into high part of multiplication result | ||
2998 | + * can not overflow, because it cannot be all-ones. | ||
2999 | + */ | ||
3000 | #ifdef BN_LLONG | ||
3001 | #define mul_add_c(a,b,c0,c1,c2) \ | ||
3002 | t=(BN_ULLONG)a*b; \ | ||
3003 | @@ -478,10 +482,10 @@ BN_ULONG bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n) | ||
3004 | #define mul_add_c2(a,b,c0,c1,c2) { \ | ||
3005 | BN_ULONG ta=(a),tb=(b),t0; \ | ||
3006 | BN_UMULT_LOHI(t0,t1,ta,tb); \ | ||
3007 | - t2 = t1+t1; c2 += (t2<t1)?1:0; \ | ||
3008 | - t1 = t0+t0; t2 += (t1<t0)?1:0; \ | ||
3009 | - c0 += t1; t2 += (c0<t1)?1:0; \ | ||
3010 | + c0 += t0; t2 = t1+((c0<t0)?1:0);\ | ||
3011 | c1 += t2; c2 += (c1<t2)?1:0; \ | ||
3012 | + c0 += t0; t1 += (c0<t0)?1:0; \ | ||
3013 | + c1 += t1; c2 += (c1<t1)?1:0; \ | ||
3014 | } | ||
3015 | |||
3016 | #define sqr_add_c(a,i,c0,c1,c2) { \ | ||
3017 | @@ -508,10 +512,10 @@ BN_ULONG bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n) | ||
3018 | BN_ULONG ta=(a),tb=(b),t0; \ | ||
3019 | t1 = BN_UMULT_HIGH(ta,tb); \ | ||
3020 | t0 = ta * tb; \ | ||
3021 | - t2 = t1+t1; c2 += (t2<t1)?1:0; \ | ||
3022 | - t1 = t0+t0; t2 += (t1<t0)?1:0; \ | ||
3023 | - c0 += t1; t2 += (c0<t1)?1:0; \ | ||
3024 | + c0 += t0; t2 = t1+((c0<t0)?1:0);\ | ||
3025 | c1 += t2; c2 += (c1<t2)?1:0; \ | ||
3026 | + c0 += t0; t1 += (c0<t0)?1:0; \ | ||
3027 | + c1 += t1; c2 += (c1<t1)?1:0; \ | ||
3028 | } | ||
3029 | |||
3030 | #define sqr_add_c(a,i,c0,c1,c2) { \ | ||
3031 | diff --git a/crypto/bn/bntest.c b/crypto/bn/bntest.c | ||
3032 | index 7771e92..48bc633 100644 | ||
3033 | --- a/crypto/bn/bntest.c | ||
3034 | +++ b/crypto/bn/bntest.c | ||
3035 | @@ -678,44 +678,98 @@ int test_mul(BIO *bp) | ||
3036 | |||
3037 | int test_sqr(BIO *bp, BN_CTX *ctx) | ||
3038 | { | ||
3039 | - BIGNUM a,c,d,e; | ||
3040 | - int i; | ||
3041 | + BIGNUM *a,*c,*d,*e; | ||
3042 | + int i, ret = 0; | ||
3043 | |||
3044 | - BN_init(&a); | ||
3045 | - BN_init(&c); | ||
3046 | - BN_init(&d); | ||
3047 | - BN_init(&e); | ||
3048 | + a = BN_new(); | ||
3049 | + c = BN_new(); | ||
3050 | + d = BN_new(); | ||
3051 | + e = BN_new(); | ||
3052 | + if (a == NULL || c == NULL || d == NULL || e == NULL) | ||
3053 | + { | ||
3054 | + goto err; | ||
3055 | + } | ||
3056 | |||
3057 | for (i=0; i<num0; i++) | ||
3058 | { | ||
3059 | - BN_bntest_rand(&a,40+i*10,0,0); | ||
3060 | - a.neg=rand_neg(); | ||
3061 | - BN_sqr(&c,&a,ctx); | ||
3062 | + BN_bntest_rand(a,40+i*10,0,0); | ||
3063 | + a->neg=rand_neg(); | ||
3064 | + BN_sqr(c,a,ctx); | ||
3065 | if (bp != NULL) | ||
3066 | { | ||
3067 | if (!results) | ||
3068 | { | ||
3069 | - BN_print(bp,&a); | ||
3070 | + BN_print(bp,a); | ||
3071 | BIO_puts(bp," * "); | ||
3072 | - BN_print(bp,&a); | ||
3073 | + BN_print(bp,a); | ||
3074 | BIO_puts(bp," - "); | ||
3075 | } | ||
3076 | - BN_print(bp,&c); | ||
3077 | + BN_print(bp,c); | ||
3078 | BIO_puts(bp,"\n"); | ||
3079 | } | ||
3080 | - BN_div(&d,&e,&c,&a,ctx); | ||
3081 | - BN_sub(&d,&d,&a); | ||
3082 | - if(!BN_is_zero(&d) || !BN_is_zero(&e)) | ||
3083 | - { | ||
3084 | - fprintf(stderr,"Square test failed!\n"); | ||
3085 | - return 0; | ||
3086 | - } | ||
3087 | + BN_div(d,e,c,a,ctx); | ||
3088 | + BN_sub(d,d,a); | ||
3089 | + if(!BN_is_zero(d) || !BN_is_zero(e)) | ||
3090 | + { | ||
3091 | + fprintf(stderr,"Square test failed!\n"); | ||
3092 | + goto err; | ||
3093 | + } | ||
3094 | } | ||
3095 | - BN_free(&a); | ||
3096 | - BN_free(&c); | ||
3097 | - BN_free(&d); | ||
3098 | - BN_free(&e); | ||
3099 | - return(1); | ||
3100 | + | ||
3101 | + /* Regression test for a BN_sqr overflow bug. */ | ||
3102 | + BN_hex2bn(&a, | ||
3103 | + "80000000000000008000000000000001FFFFFFFFFFFFFFFE0000000000000000"); | ||
3104 | + BN_sqr(c, a, ctx); | ||
3105 | + if (bp != NULL) | ||
3106 | + { | ||
3107 | + if (!results) | ||
3108 | + { | ||
3109 | + BN_print(bp,a); | ||
3110 | + BIO_puts(bp," * "); | ||
3111 | + BN_print(bp,a); | ||
3112 | + BIO_puts(bp," - "); | ||
3113 | + } | ||
3114 | + BN_print(bp,c); | ||
3115 | + BIO_puts(bp,"\n"); | ||
3116 | + } | ||
3117 | + BN_mul(d, a, a, ctx); | ||
3118 | + if (BN_cmp(c, d)) | ||
3119 | + { | ||
3120 | + fprintf(stderr, "Square test failed: BN_sqr and BN_mul produce " | ||
3121 | + "different results!\n"); | ||
3122 | + goto err; | ||
3123 | + } | ||
3124 | + | ||
3125 | + /* Regression test for a BN_sqr overflow bug. */ | ||
3126 | + BN_hex2bn(&a, | ||
3127 | + "80000000000000000000000080000001FFFFFFFE000000000000000000000000"); | ||
3128 | + BN_sqr(c, a, ctx); | ||
3129 | + if (bp != NULL) | ||
3130 | + { | ||
3131 | + if (!results) | ||
3132 | + { | ||
3133 | + BN_print(bp,a); | ||
3134 | + BIO_puts(bp," * "); | ||
3135 | + BN_print(bp,a); | ||
3136 | + BIO_puts(bp," - "); | ||
3137 | + } | ||
3138 | + BN_print(bp,c); | ||
3139 | + BIO_puts(bp,"\n"); | ||
3140 | + } | ||
3141 | + BN_mul(d, a, a, ctx); | ||
3142 | + if (BN_cmp(c, d)) | ||
3143 | + { | ||
3144 | + fprintf(stderr, "Square test failed: BN_sqr and BN_mul produce " | ||
3145 | + "different results!\n"); | ||
3146 | + goto err; | ||
3147 | + } | ||
3148 | + ret = 1; | ||
3149 | +err: | ||
3150 | + if (a != NULL) BN_free(a); | ||
3151 | + if (c != NULL) BN_free(c); | ||
3152 | + if (d != NULL) BN_free(d); | ||
3153 | + if (e != NULL) BN_free(e); | ||
3154 | + return ret; | ||
3155 | } | ||
3156 | |||
3157 | int test_mont(BIO *bp, BN_CTX *ctx) | ||
3158 | -- | ||
3159 | 1.9.1 | ||
3160 | |||