summaryrefslogtreecommitdiffstats
path: root/meta/recipes-connectivity/openssl/openssl/0008-CVE-2014-3570.patch
diff options
context:
space:
mode:
Diffstat (limited to 'meta/recipes-connectivity/openssl/openssl/0008-CVE-2014-3570.patch')
-rw-r--r--meta/recipes-connectivity/openssl/openssl/0008-CVE-2014-3570.patch3160
1 files changed, 3160 insertions, 0 deletions
diff --git a/meta/recipes-connectivity/openssl/openssl/0008-CVE-2014-3570.patch b/meta/recipes-connectivity/openssl/openssl/0008-CVE-2014-3570.patch
new file mode 100644
index 0000000000..100f8a2f85
--- /dev/null
+++ b/meta/recipes-connectivity/openssl/openssl/0008-CVE-2014-3570.patch
@@ -0,0 +1,3160 @@
1From e078642ddea29bbb6ba29788a6a513796387fbbb Mon Sep 17 00:00:00 2001
2From: Andy Polyakov <appro@openssl.org>
3Date: Mon, 5 Jan 2015 14:52:56 +0100
4Subject: [PATCH] Fix for CVE-2014-3570.
5
6Reviewed-by: Emilia Kasper <emilia@openssl.org>
7(cherry picked from commit e793809ba50c1e90ab592fb640a856168e50f3de)
8(with 1.0.1-specific addendum)
9Fixes CVE-2014-3570.
10
11Upstream-Status: Backport
12
13Signed-off-by: Sona Sarmadi <sona.sarmadi@enea.com>
14---
15 crypto/bn/asm/mips.pl | 611 +++---------
16 crypto/bn/asm/mips3.s | 2201 --------------------------------------------
17 crypto/bn/asm/x86_64-gcc.c | 34 +-
18 crypto/bn/bn_asm.c | 16 +-
19 crypto/bn/bntest.c | 102 +-
20 5 files changed, 234 insertions(+), 2730 deletions(-)
21 delete mode 100644 crypto/bn/asm/mips3.s
22
23diff --git a/crypto/bn/asm/mips.pl b/crypto/bn/asm/mips.pl
24index d2f3ef7..215c9a7 100644
25--- a/crypto/bn/asm/mips.pl
26+++ b/crypto/bn/asm/mips.pl
27@@ -1872,6 +1872,41 @@ ___
28
29 ($a_4,$a_5,$a_6,$a_7)=($b_0,$b_1,$b_2,$b_3);
30
31+sub add_c2 () {
32+my ($hi,$lo,$c0,$c1,$c2,
33+ $warm, # !$warm denotes first call with specific sequence of
34+ # $c_[XYZ] when there is no Z-carry to accumulate yet;
35+ $an,$bn # these two are arguments for multiplication which
36+ # result is used in *next* step [which is why it's
37+ # commented as "forward multiplication" below];
38+ )=@_;
39+$code.=<<___;
40+ mflo $lo
41+ mfhi $hi
42+ $ADDU $c0,$lo
43+ sltu $at,$c0,$lo
44+ $MULTU $an,$bn # forward multiplication
45+ $ADDU $c0,$lo
46+ $ADDU $at,$hi
47+ sltu $lo,$c0,$lo
48+ $ADDU $c1,$at
49+ $ADDU $hi,$lo
50+___
51+$code.=<<___ if (!$warm);
52+ sltu $c2,$c1,$at
53+ $ADDU $c1,$hi
54+ sltu $hi,$c1,$hi
55+ $ADDU $c2,$hi
56+___
57+$code.=<<___ if ($warm);
58+ sltu $at,$c1,$at
59+ $ADDU $c1,$hi
60+ $ADDU $c2,$at
61+ sltu $hi,$c1,$hi
62+ $ADDU $c2,$hi
63+___
64+}
65+
66 $code.=<<___;
67
68 .align 5
69@@ -1920,21 +1955,10 @@ $code.=<<___;
70 sltu $at,$c_2,$t_1
71 $ADDU $c_3,$t_2,$at
72 $ST $c_2,$BNSZ($a0)
73-
74- mflo $t_1
75- mfhi $t_2
76- slt $c_2,$t_2,$zero
77- $SLL $t_2,1
78- $MULTU $a_1,$a_1 # mul_add_c(a[1],b[1],c3,c1,c2);
79- slt $a2,$t_1,$zero
80- $ADDU $t_2,$a2
81- $SLL $t_1,1
82- $ADDU $c_3,$t_1
83- sltu $at,$c_3,$t_1
84- $ADDU $t_2,$at
85- $ADDU $c_1,$t_2
86- sltu $at,$c_1,$t_2
87- $ADDU $c_2,$at
88+___
89+ &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,0,
90+ $a_1,$a_1); # mul_add_c(a[1],b[1],c3,c1,c2);
91+$code.=<<___;
92 mflo $t_1
93 mfhi $t_2
94 $ADDU $c_3,$t_1
95@@ -1945,67 +1969,19 @@ $code.=<<___;
96 sltu $at,$c_1,$t_2
97 $ADDU $c_2,$at
98 $ST $c_3,2*$BNSZ($a0)
99-
100- mflo $t_1
101- mfhi $t_2
102- slt $c_3,$t_2,$zero
103- $SLL $t_2,1
104- $MULTU $a_1,$a_2 # mul_add_c2(a[1],b[2],c1,c2,c3);
105- slt $a2,$t_1,$zero
106- $ADDU $t_2,$a2
107- $SLL $t_1,1
108- $ADDU $c_1,$t_1
109- sltu $at,$c_1,$t_1
110- $ADDU $t_2,$at
111- $ADDU $c_2,$t_2
112- sltu $at,$c_2,$t_2
113- $ADDU $c_3,$at
114- mflo $t_1
115- mfhi $t_2
116- slt $at,$t_2,$zero
117- $ADDU $c_3,$at
118- $MULTU $a_4,$a_0 # mul_add_c2(a[4],b[0],c2,c3,c1);
119- $SLL $t_2,1
120- slt $a2,$t_1,$zero
121- $ADDU $t_2,$a2
122- $SLL $t_1,1
123- $ADDU $c_1,$t_1
124- sltu $at,$c_1,$t_1
125- $ADDU $t_2,$at
126- $ADDU $c_2,$t_2
127- sltu $at,$c_2,$t_2
128- $ADDU $c_3,$at
129+___
130+ &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,0,
131+ $a_1,$a_2); # mul_add_c2(a[1],b[2],c1,c2,c3);
132+ &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,1,
133+ $a_4,$a_0); # mul_add_c2(a[4],b[0],c2,c3,c1);
134+$code.=<<___;
135 $ST $c_1,3*$BNSZ($a0)
136-
137- mflo $t_1
138- mfhi $t_2
139- slt $c_1,$t_2,$zero
140- $SLL $t_2,1
141- $MULTU $a_3,$a_1 # mul_add_c2(a[3],b[1],c2,c3,c1);
142- slt $a2,$t_1,$zero
143- $ADDU $t_2,$a2
144- $SLL $t_1,1
145- $ADDU $c_2,$t_1
146- sltu $at,$c_2,$t_1
147- $ADDU $t_2,$at
148- $ADDU $c_3,$t_2
149- sltu $at,$c_3,$t_2
150- $ADDU $c_1,$at
151- mflo $t_1
152- mfhi $t_2
153- slt $at,$t_2,$zero
154- $ADDU $c_1,$at
155- $MULTU $a_2,$a_2 # mul_add_c(a[2],b[2],c2,c3,c1);
156- $SLL $t_2,1
157- slt $a2,$t_1,$zero
158- $ADDU $t_2,$a2
159- $SLL $t_1,1
160- $ADDU $c_2,$t_1
161- sltu $at,$c_2,$t_1
162- $ADDU $t_2,$at
163- $ADDU $c_3,$t_2
164- sltu $at,$c_3,$t_2
165- $ADDU $c_1,$at
166+___
167+ &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,0,
168+ $a_3,$a_1); # mul_add_c2(a[3],b[1],c2,c3,c1);
169+ &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,1,
170+ $a_2,$a_2); # mul_add_c(a[2],b[2],c2,c3,c1);
171+$code.=<<___;
172 mflo $t_1
173 mfhi $t_2
174 $ADDU $c_2,$t_1
175@@ -2016,97 +1992,23 @@ $code.=<<___;
176 sltu $at,$c_3,$t_2
177 $ADDU $c_1,$at
178 $ST $c_2,4*$BNSZ($a0)
179-
180- mflo $t_1
181- mfhi $t_2
182- slt $c_2,$t_2,$zero
183- $SLL $t_2,1
184- $MULTU $a_1,$a_4 # mul_add_c2(a[1],b[4],c3,c1,c2);
185- slt $a2,$t_1,$zero
186- $ADDU $t_2,$a2
187- $SLL $t_1,1
188- $ADDU $c_3,$t_1
189- sltu $at,$c_3,$t_1
190- $ADDU $t_2,$at
191- $ADDU $c_1,$t_2
192- sltu $at,$c_1,$t_2
193- $ADDU $c_2,$at
194- mflo $t_1
195- mfhi $t_2
196- slt $at,$t_2,$zero
197- $ADDU $c_2,$at
198- $MULTU $a_2,$a_3 # mul_add_c2(a[2],b[3],c3,c1,c2);
199- $SLL $t_2,1
200- slt $a2,$t_1,$zero
201- $ADDU $t_2,$a2
202- $SLL $t_1,1
203- $ADDU $c_3,$t_1
204- sltu $at,$c_3,$t_1
205- $ADDU $t_2,$at
206- $ADDU $c_1,$t_2
207- sltu $at,$c_1,$t_2
208- $ADDU $c_2,$at
209- mflo $t_1
210- mfhi $t_2
211- slt $at,$t_2,$zero
212- $MULTU $a_6,$a_0 # mul_add_c2(a[6],b[0],c1,c2,c3);
213- $ADDU $c_2,$at
214- $SLL $t_2,1
215- slt $a2,$t_1,$zero
216- $ADDU $t_2,$a2
217- $SLL $t_1,1
218- $ADDU $c_3,$t_1
219- sltu $at,$c_3,$t_1
220- $ADDU $t_2,$at
221- $ADDU $c_1,$t_2
222- sltu $at,$c_1,$t_2
223- $ADDU $c_2,$at
224+___
225+ &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,0,
226+ $a_1,$a_4); # mul_add_c2(a[1],b[4],c3,c1,c2);
227+ &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,1,
228+ $a_2,$a_3); # mul_add_c2(a[2],b[3],c3,c1,c2);
229+ &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,1,
230+ $a_6,$a_0); # mul_add_c2(a[6],b[0],c1,c2,c3);
231+$code.=<<___;
232 $ST $c_3,5*$BNSZ($a0)
233-
234- mflo $t_1
235- mfhi $t_2
236- slt $c_3,$t_2,$zero
237- $SLL $t_2,1
238- $MULTU $a_5,$a_1 # mul_add_c2(a[5],b[1],c1,c2,c3);
239- slt $a2,$t_1,$zero
240- $ADDU $t_2,$a2
241- $SLL $t_1,1
242- $ADDU $c_1,$t_1
243- sltu $at,$c_1,$t_1
244- $ADDU $t_2,$at
245- $ADDU $c_2,$t_2
246- sltu $at,$c_2,$t_2
247- $ADDU $c_3,$at
248- mflo $t_1
249- mfhi $t_2
250- slt $at,$t_2,$zero
251- $ADDU $c_3,$at
252- $MULTU $a_4,$a_2 # mul_add_c2(a[4],b[2],c1,c2,c3);
253- $SLL $t_2,1
254- slt $a2,$t_1,$zero
255- $ADDU $t_2,$a2
256- $SLL $t_1,1
257- $ADDU $c_1,$t_1
258- sltu $at,$c_1,$t_1
259- $ADDU $t_2,$at
260- $ADDU $c_2,$t_2
261- sltu $at,$c_2,$t_2
262- $ADDU $c_3,$at
263- mflo $t_1
264- mfhi $t_2
265- slt $at,$t_2,$zero
266- $ADDU $c_3,$at
267- $MULTU $a_3,$a_3 # mul_add_c(a[3],b[3],c1,c2,c3);
268- $SLL $t_2,1
269- slt $a2,$t_1,$zero
270- $ADDU $t_2,$a2
271- $SLL $t_1,1
272- $ADDU $c_1,$t_1
273- sltu $at,$c_1,$t_1
274- $ADDU $t_2,$at
275- $ADDU $c_2,$t_2
276- sltu $at,$c_2,$t_2
277- $ADDU $c_3,$at
278+___
279+ &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,0,
280+ $a_5,$a_1); # mul_add_c2(a[5],b[1],c1,c2,c3);
281+ &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,1,
282+ $a_4,$a_2); # mul_add_c2(a[4],b[2],c1,c2,c3);
283+ &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,1,
284+ $a_3,$a_3); # mul_add_c(a[3],b[3],c1,c2,c3);
285+$code.=<<___;
286 mflo $t_1
287 mfhi $t_2
288 $ADDU $c_1,$t_1
289@@ -2117,112 +2019,25 @@ $code.=<<___;
290 sltu $at,$c_2,$t_2
291 $ADDU $c_3,$at
292 $ST $c_1,6*$BNSZ($a0)
293-
294- mflo $t_1
295- mfhi $t_2
296- slt $c_1,$t_2,$zero
297- $SLL $t_2,1
298- $MULTU $a_1,$a_6 # mul_add_c2(a[1],b[6],c2,c3,c1);
299- slt $a2,$t_1,$zero
300- $ADDU $t_2,$a2
301- $SLL $t_1,1
302- $ADDU $c_2,$t_1
303- sltu $at,$c_2,$t_1
304- $ADDU $t_2,$at
305- $ADDU $c_3,$t_2
306- sltu $at,$c_3,$t_2
307- $ADDU $c_1,$at
308- mflo $t_1
309- mfhi $t_2
310- slt $at,$t_2,$zero
311- $ADDU $c_1,$at
312- $MULTU $a_2,$a_5 # mul_add_c2(a[2],b[5],c2,c3,c1);
313- $SLL $t_2,1
314- slt $a2,$t_1,$zero
315- $ADDU $t_2,$a2
316- $SLL $t_1,1
317- $ADDU $c_2,$t_1
318- sltu $at,$c_2,$t_1
319- $ADDU $t_2,$at
320- $ADDU $c_3,$t_2
321- sltu $at,$c_3,$t_2
322- $ADDU $c_1,$at
323- mflo $t_1
324- mfhi $t_2
325- slt $at,$t_2,$zero
326- $ADDU $c_1,$at
327- $MULTU $a_3,$a_4 # mul_add_c2(a[3],b[4],c2,c3,c1);
328- $SLL $t_2,1
329- slt $a2,$t_1,$zero
330- $ADDU $t_2,$a2
331- $SLL $t_1,1
332- $ADDU $c_2,$t_1
333- sltu $at,$c_2,$t_1
334- $ADDU $t_2,$at
335- $ADDU $c_3,$t_2
336- sltu $at,$c_3,$t_2
337- $ADDU $c_1,$at
338- mflo $t_1
339- mfhi $t_2
340- slt $at,$t_2,$zero
341- $ADDU $c_1,$at
342- $MULTU $a_7,$a_1 # mul_add_c2(a[7],b[1],c3,c1,c2);
343- $SLL $t_2,1
344- slt $a2,$t_1,$zero
345- $ADDU $t_2,$a2
346- $SLL $t_1,1
347- $ADDU $c_2,$t_1
348- sltu $at,$c_2,$t_1
349- $ADDU $t_2,$at
350- $ADDU $c_3,$t_2
351- sltu $at,$c_3,$t_2
352- $ADDU $c_1,$at
353+___
354+ &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,0,
355+ $a_1,$a_6); # mul_add_c2(a[1],b[6],c2,c3,c1);
356+ &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,1,
357+ $a_2,$a_5); # mul_add_c2(a[2],b[5],c2,c3,c1);
358+ &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,1,
359+ $a_3,$a_4); # mul_add_c2(a[3],b[4],c2,c3,c1);
360+ &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,1,
361+ $a_7,$a_1); # mul_add_c2(a[7],b[1],c3,c1,c2);
362+$code.=<<___;
363 $ST $c_2,7*$BNSZ($a0)
364-
365- mflo $t_1
366- mfhi $t_2
367- slt $c_2,$t_2,$zero
368- $SLL $t_2,1
369- $MULTU $a_6,$a_2 # mul_add_c2(a[6],b[2],c3,c1,c2);
370- slt $a2,$t_1,$zero
371- $ADDU $t_2,$a2
372- $SLL $t_1,1
373- $ADDU $c_3,$t_1
374- sltu $at,$c_3,$t_1
375- $ADDU $t_2,$at
376- $ADDU $c_1,$t_2
377- sltu $at,$c_1,$t_2
378- $ADDU $c_2,$at
379- mflo $t_1
380- mfhi $t_2
381- slt $at,$t_2,$zero
382- $ADDU $c_2,$at
383- $MULTU $a_5,$a_3 # mul_add_c2(a[5],b[3],c3,c1,c2);
384- $SLL $t_2,1
385- slt $a2,$t_1,$zero
386- $ADDU $t_2,$a2
387- $SLL $t_1,1
388- $ADDU $c_3,$t_1
389- sltu $at,$c_3,$t_1
390- $ADDU $t_2,$at
391- $ADDU $c_1,$t_2
392- sltu $at,$c_1,$t_2
393- $ADDU $c_2,$at
394- mflo $t_1
395- mfhi $t_2
396- slt $at,$t_2,$zero
397- $ADDU $c_2,$at
398- $MULTU $a_4,$a_4 # mul_add_c(a[4],b[4],c3,c1,c2);
399- $SLL $t_2,1
400- slt $a2,$t_1,$zero
401- $ADDU $t_2,$a2
402- $SLL $t_1,1
403- $ADDU $c_3,$t_1
404- sltu $at,$c_3,$t_1
405- $ADDU $t_2,$at
406- $ADDU $c_1,$t_2
407- sltu $at,$c_1,$t_2
408- $ADDU $c_2,$at
409+___
410+ &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,0,
411+ $a_6,$a_2); # mul_add_c2(a[6],b[2],c3,c1,c2);
412+ &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,1,
413+ $a_5,$a_3); # mul_add_c2(a[5],b[3],c3,c1,c2);
414+ &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,1,
415+ $a_4,$a_4); # mul_add_c(a[4],b[4],c3,c1,c2);
416+$code.=<<___;
417 mflo $t_1
418 mfhi $t_2
419 $ADDU $c_3,$t_1
420@@ -2233,82 +2048,21 @@ $code.=<<___;
421 sltu $at,$c_1,$t_2
422 $ADDU $c_2,$at
423 $ST $c_3,8*$BNSZ($a0)
424-
425- mflo $t_1
426- mfhi $t_2
427- slt $c_3,$t_2,$zero
428- $SLL $t_2,1
429- $MULTU $a_3,$a_6 # mul_add_c2(a[3],b[6],c1,c2,c3);
430- slt $a2,$t_1,$zero
431- $ADDU $t_2,$a2
432- $SLL $t_1,1
433- $ADDU $c_1,$t_1
434- sltu $at,$c_1,$t_1
435- $ADDU $t_2,$at
436- $ADDU $c_2,$t_2
437- sltu $at,$c_2,$t_2
438- $ADDU $c_3,$at
439- mflo $t_1
440- mfhi $t_2
441- slt $at,$t_2,$zero
442- $ADDU $c_3,$at
443- $MULTU $a_4,$a_5 # mul_add_c2(a[4],b[5],c1,c2,c3);
444- $SLL $t_2,1
445- slt $a2,$t_1,$zero
446- $ADDU $t_2,$a2
447- $SLL $t_1,1
448- $ADDU $c_1,$t_1
449- sltu $at,$c_1,$t_1
450- $ADDU $t_2,$at
451- $ADDU $c_2,$t_2
452- sltu $at,$c_2,$t_2
453- $ADDU $c_3,$at
454- mflo $t_1
455- mfhi $t_2
456- slt $at,$t_2,$zero
457- $ADDU $c_3,$at
458- $MULTU $a_7,$a_3 # mul_add_c2(a[7],b[3],c2,c3,c1);
459- $SLL $t_2,1
460- slt $a2,$t_1,$zero
461- $ADDU $t_2,$a2
462- $SLL $t_1,1
463- $ADDU $c_1,$t_1
464- sltu $at,$c_1,$t_1
465- $ADDU $t_2,$at
466- $ADDU $c_2,$t_2
467- sltu $at,$c_2,$t_2
468- $ADDU $c_3,$at
469+___
470+ &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,0,
471+ $a_3,$a_6); # mul_add_c2(a[3],b[6],c1,c2,c3);
472+ &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,1,
473+ $a_4,$a_5); # mul_add_c2(a[4],b[5],c1,c2,c3);
474+ &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,1,
475+ $a_7,$a_3); # mul_add_c2(a[7],b[3],c2,c3,c1);
476+$code.=<<___;
477 $ST $c_1,9*$BNSZ($a0)
478-
479- mflo $t_1
480- mfhi $t_2
481- slt $c_1,$t_2,$zero
482- $SLL $t_2,1
483- $MULTU $a_6,$a_4 # mul_add_c2(a[6],b[4],c2,c3,c1);
484- slt $a2,$t_1,$zero
485- $ADDU $t_2,$a2
486- $SLL $t_1,1
487- $ADDU $c_2,$t_1
488- sltu $at,$c_2,$t_1
489- $ADDU $t_2,$at
490- $ADDU $c_3,$t_2
491- sltu $at,$c_3,$t_2
492- $ADDU $c_1,$at
493- mflo $t_1
494- mfhi $t_2
495- slt $at,$t_2,$zero
496- $ADDU $c_1,$at
497- $MULTU $a_5,$a_5 # mul_add_c(a[5],b[5],c2,c3,c1);
498- $SLL $t_2,1
499- slt $a2,$t_1,$zero
500- $ADDU $t_2,$a2
501- $SLL $t_1,1
502- $ADDU $c_2,$t_1
503- sltu $at,$c_2,$t_1
504- $ADDU $t_2,$at
505- $ADDU $c_3,$t_2
506- sltu $at,$c_3,$t_2
507- $ADDU $c_1,$at
508+___
509+ &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,0,
510+ $a_6,$a_4); # mul_add_c2(a[6],b[4],c2,c3,c1);
511+ &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,1,
512+ $a_5,$a_5); # mul_add_c(a[5],b[5],c2,c3,c1);
513+$code.=<<___;
514 mflo $t_1
515 mfhi $t_2
516 $ADDU $c_2,$t_1
517@@ -2319,52 +2073,17 @@ $code.=<<___;
518 sltu $at,$c_3,$t_2
519 $ADDU $c_1,$at
520 $ST $c_2,10*$BNSZ($a0)
521-
522- mflo $t_1
523- mfhi $t_2
524- slt $c_2,$t_2,$zero
525- $SLL $t_2,1
526- $MULTU $a_5,$a_6 # mul_add_c2(a[5],b[6],c3,c1,c2);
527- slt $a2,$t_1,$zero
528- $ADDU $t_2,$a2
529- $SLL $t_1,1
530- $ADDU $c_3,$t_1
531- sltu $at,$c_3,$t_1
532- $ADDU $t_2,$at
533- $ADDU $c_1,$t_2
534- sltu $at,$c_1,$t_2
535- $ADDU $c_2,$at
536- mflo $t_1
537- mfhi $t_2
538- slt $at,$t_2,$zero
539- $ADDU $c_2,$at
540- $MULTU $a_7,$a_5 # mul_add_c2(a[7],b[5],c1,c2,c3);
541- $SLL $t_2,1
542- slt $a2,$t_1,$zero
543- $ADDU $t_2,$a2
544- $SLL $t_1,1
545- $ADDU $c_3,$t_1
546- sltu $at,$c_3,$t_1
547- $ADDU $t_2,$at
548- $ADDU $c_1,$t_2
549- sltu $at,$c_1,$t_2
550- $ADDU $c_2,$at
551+___
552+ &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,0,
553+ $a_5,$a_6); # mul_add_c2(a[5],b[6],c3,c1,c2);
554+ &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,1,
555+ $a_7,$a_5); # mul_add_c2(a[7],b[5],c1,c2,c3);
556+$code.=<<___;
557 $ST $c_3,11*$BNSZ($a0)
558-
559- mflo $t_1
560- mfhi $t_2
561- slt $c_3,$t_2,$zero
562- $SLL $t_2,1
563- $MULTU $a_6,$a_6 # mul_add_c(a[6],b[6],c1,c2,c3);
564- slt $a2,$t_1,$zero
565- $ADDU $t_2,$a2
566- $SLL $t_1,1
567- $ADDU $c_1,$t_1
568- sltu $at,$c_1,$t_1
569- $ADDU $t_2,$at
570- $ADDU $c_2,$t_2
571- sltu $at,$c_2,$t_2
572- $ADDU $c_3,$at
573+___
574+ &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,0,
575+ $a_6,$a_6); # mul_add_c(a[6],b[6],c1,c2,c3);
576+$code.=<<___;
577 mflo $t_1
578 mfhi $t_2
579 $ADDU $c_1,$t_1
580@@ -2375,21 +2094,10 @@ $code.=<<___;
581 sltu $at,$c_2,$t_2
582 $ADDU $c_3,$at
583 $ST $c_1,12*$BNSZ($a0)
584-
585- mflo $t_1
586- mfhi $t_2
587- slt $c_1,$t_2,$zero
588- $SLL $t_2,1
589- $MULTU $a_7,$a_7 # mul_add_c(a[7],b[7],c3,c1,c2);
590- slt $a2,$t_1,$zero
591- $ADDU $t_2,$a2
592- $SLL $t_1,1
593- $ADDU $c_2,$t_1
594- sltu $at,$c_2,$t_1
595- $ADDU $t_2,$at
596- $ADDU $c_3,$t_2
597- sltu $at,$c_3,$t_2
598- $ADDU $c_1,$at
599+___
600+ &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,0,
601+ $a_7,$a_7); # mul_add_c(a[7],b[7],c3,c1,c2);
602+$code.=<<___;
603 $ST $c_2,13*$BNSZ($a0)
604
605 mflo $t_1
606@@ -2457,21 +2165,10 @@ $code.=<<___;
607 sltu $at,$c_2,$t_1
608 $ADDU $c_3,$t_2,$at
609 $ST $c_2,$BNSZ($a0)
610-
611- mflo $t_1
612- mfhi $t_2
613- slt $c_2,$t_2,$zero
614- $SLL $t_2,1
615- $MULTU $a_1,$a_1 # mul_add_c(a[1],b[1],c3,c1,c2);
616- slt $a2,$t_1,$zero
617- $ADDU $t_2,$a2
618- $SLL $t_1,1
619- $ADDU $c_3,$t_1
620- sltu $at,$c_3,$t_1
621- $ADDU $t_2,$at
622- $ADDU $c_1,$t_2
623- sltu $at,$c_1,$t_2
624- $ADDU $c_2,$at
625+___
626+ &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,0,
627+ $a_1,$a_1); # mul_add_c(a[1],b[1],c3,c1,c2);
628+$code.=<<___;
629 mflo $t_1
630 mfhi $t_2
631 $ADDU $c_3,$t_1
632@@ -2482,52 +2179,17 @@ $code.=<<___;
633 sltu $at,$c_1,$t_2
634 $ADDU $c_2,$at
635 $ST $c_3,2*$BNSZ($a0)
636-
637- mflo $t_1
638- mfhi $t_2
639- slt $c_3,$t_2,$zero
640- $SLL $t_2,1
641- $MULTU $a_1,$a_2 # mul_add_c(a2[1],b[2],c1,c2,c3);
642- slt $a2,$t_1,$zero
643- $ADDU $t_2,$a2
644- $SLL $t_1,1
645- $ADDU $c_1,$t_1
646- sltu $at,$c_1,$t_1
647- $ADDU $t_2,$at
648- $ADDU $c_2,$t_2
649- sltu $at,$c_2,$t_2
650- $ADDU $c_3,$at
651- mflo $t_1
652- mfhi $t_2
653- slt $at,$t_2,$zero
654- $ADDU $c_3,$at
655- $MULTU $a_3,$a_1 # mul_add_c2(a[3],b[1],c2,c3,c1);
656- $SLL $t_2,1
657- slt $a2,$t_1,$zero
658- $ADDU $t_2,$a2
659- $SLL $t_1,1
660- $ADDU $c_1,$t_1
661- sltu $at,$c_1,$t_1
662- $ADDU $t_2,$at
663- $ADDU $c_2,$t_2
664- sltu $at,$c_2,$t_2
665- $ADDU $c_3,$at
666+___
667+ &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,0,
668+ $a_1,$a_2); # mul_add_c2(a2[1],b[2],c1,c2,c3);
669+ &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,1,
670+ $a_3,$a_1); # mul_add_c2(a[3],b[1],c2,c3,c1);
671+$code.=<<___;
672 $ST $c_1,3*$BNSZ($a0)
673-
674- mflo $t_1
675- mfhi $t_2
676- slt $c_1,$t_2,$zero
677- $SLL $t_2,1
678- $MULTU $a_2,$a_2 # mul_add_c(a[2],b[2],c2,c3,c1);
679- slt $a2,$t_1,$zero
680- $ADDU $t_2,$a2
681- $SLL $t_1,1
682- $ADDU $c_2,$t_1
683- sltu $at,$c_2,$t_1
684- $ADDU $t_2,$at
685- $ADDU $c_3,$t_2
686- sltu $at,$c_3,$t_2
687- $ADDU $c_1,$at
688+___
689+ &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,0,
690+ $a_2,$a_2); # mul_add_c(a[2],b[2],c2,c3,c1);
691+$code.=<<___;
692 mflo $t_1
693 mfhi $t_2
694 $ADDU $c_2,$t_1
695@@ -2538,21 +2200,10 @@ $code.=<<___;
696 sltu $at,$c_3,$t_2
697 $ADDU $c_1,$at
698 $ST $c_2,4*$BNSZ($a0)
699-
700- mflo $t_1
701- mfhi $t_2
702- slt $c_2,$t_2,$zero
703- $SLL $t_2,1
704- $MULTU $a_3,$a_3 # mul_add_c(a[3],b[3],c1,c2,c3);
705- slt $a2,$t_1,$zero
706- $ADDU $t_2,$a2
707- $SLL $t_1,1
708- $ADDU $c_3,$t_1
709- sltu $at,$c_3,$t_1
710- $ADDU $t_2,$at
711- $ADDU $c_1,$t_2
712- sltu $at,$c_1,$t_2
713- $ADDU $c_2,$at
714+___
715+ &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,0,
716+ $a_3,$a_3); # mul_add_c(a[3],b[3],c1,c2,c3);
717+$code.=<<___;
718 $ST $c_3,5*$BNSZ($a0)
719
720 mflo $t_1
721diff --git a/crypto/bn/asm/mips3.s b/crypto/bn/asm/mips3.s
722deleted file mode 100644
723index dca4105..0000000
724--- a/crypto/bn/asm/mips3.s
725+++ /dev/null
726@@ -1,2201 +0,0 @@
727-.rdata
728-.asciiz "mips3.s, Version 1.1"
729-.asciiz "MIPS III/IV ISA artwork by Andy Polyakov <appro@fy.chalmers.se>"
730-
731-/*
732- * ====================================================================
733- * Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
734- * project.
735- *
736- * Rights for redistribution and usage in source and binary forms are
737- * granted according to the OpenSSL license. Warranty of any kind is
738- * disclaimed.
739- * ====================================================================
740- */
741-
742-/*
743- * This is my modest contributon to the OpenSSL project (see
744- * http://www.openssl.org/ for more information about it) and is
745- * a drop-in MIPS III/IV ISA replacement for crypto/bn/bn_asm.c
746- * module. For updates see http://fy.chalmers.se/~appro/hpe/.
747- *
748- * The module is designed to work with either of the "new" MIPS ABI(5),
749- * namely N32 or N64, offered by IRIX 6.x. It's not ment to work under
750- * IRIX 5.x not only because it doesn't support new ABIs but also
751- * because 5.x kernels put R4x00 CPU into 32-bit mode and all those
752- * 64-bit instructions (daddu, dmultu, etc.) found below gonna only
753- * cause illegal instruction exception:-(
754- *
755- * In addition the code depends on preprocessor flags set up by MIPSpro
756- * compiler driver (either as or cc) and therefore (probably?) can't be
757- * compiled by the GNU assembler. GNU C driver manages fine though...
758- * I mean as long as -mmips-as is specified or is the default option,
759- * because then it simply invokes /usr/bin/as which in turn takes
760- * perfect care of the preprocessor definitions. Another neat feature
761- * offered by the MIPSpro assembler is an optimization pass. This gave
762- * me the opportunity to have the code looking more regular as all those
763- * architecture dependent instruction rescheduling details were left to
764- * the assembler. Cool, huh?
765- *
766- * Performance improvement is astonishing! 'apps/openssl speed rsa dsa'
767- * goes way over 3 times faster!
768- *
769- * <appro@fy.chalmers.se>
770- */
771-#include <asm.h>
772-#include <regdef.h>
773-
774-#if _MIPS_ISA>=4
775-#define MOVNZ(cond,dst,src) \
776- movn dst,src,cond
777-#else
778-#define MOVNZ(cond,dst,src) \
779- .set noreorder; \
780- bnezl cond,.+8; \
781- move dst,src; \
782- .set reorder
783-#endif
784-
785-.text
786-
787-.set noat
788-.set reorder
789-
790-#define MINUS4 v1
791-
792-.align 5
793-LEAF(bn_mul_add_words)
794- .set noreorder
795- bgtzl a2,.L_bn_mul_add_words_proceed
796- ld t0,0(a1)
797- jr ra
798- move v0,zero
799- .set reorder
800-
801-.L_bn_mul_add_words_proceed:
802- li MINUS4,-4
803- and ta0,a2,MINUS4
804- move v0,zero
805- beqz ta0,.L_bn_mul_add_words_tail
806-
807-.L_bn_mul_add_words_loop:
808- dmultu t0,a3
809- ld t1,0(a0)
810- ld t2,8(a1)
811- ld t3,8(a0)
812- ld ta0,16(a1)
813- ld ta1,16(a0)
814- daddu t1,v0
815- sltu v0,t1,v0 /* All manuals say it "compares 32-bit
816- * values", but it seems to work fine
817- * even on 64-bit registers. */
818- mflo AT
819- mfhi t0
820- daddu t1,AT
821- daddu v0,t0
822- sltu AT,t1,AT
823- sd t1,0(a0)
824- daddu v0,AT
825-
826- dmultu t2,a3
827- ld ta2,24(a1)
828- ld ta3,24(a0)
829- daddu t3,v0
830- sltu v0,t3,v0
831- mflo AT
832- mfhi t2
833- daddu t3,AT
834- daddu v0,t2
835- sltu AT,t3,AT
836- sd t3,8(a0)
837- daddu v0,AT
838-
839- dmultu ta0,a3
840- subu a2,4
841- PTR_ADD a0,32
842- PTR_ADD a1,32
843- daddu ta1,v0
844- sltu v0,ta1,v0
845- mflo AT
846- mfhi ta0
847- daddu ta1,AT
848- daddu v0,ta0
849- sltu AT,ta1,AT
850- sd ta1,-16(a0)
851- daddu v0,AT
852-
853-
854- dmultu ta2,a3
855- and ta0,a2,MINUS4
856- daddu ta3,v0
857- sltu v0,ta3,v0
858- mflo AT
859- mfhi ta2
860- daddu ta3,AT
861- daddu v0,ta2
862- sltu AT,ta3,AT
863- sd ta3,-8(a0)
864- daddu v0,AT
865- .set noreorder
866- bgtzl ta0,.L_bn_mul_add_words_loop
867- ld t0,0(a1)
868-
869- bnezl a2,.L_bn_mul_add_words_tail
870- ld t0,0(a1)
871- .set reorder
872-
873-.L_bn_mul_add_words_return:
874- jr ra
875-
876-.L_bn_mul_add_words_tail:
877- dmultu t0,a3
878- ld t1,0(a0)
879- subu a2,1
880- daddu t1,v0
881- sltu v0,t1,v0
882- mflo AT
883- mfhi t0
884- daddu t1,AT
885- daddu v0,t0
886- sltu AT,t1,AT
887- sd t1,0(a0)
888- daddu v0,AT
889- beqz a2,.L_bn_mul_add_words_return
890-
891- ld t0,8(a1)
892- dmultu t0,a3
893- ld t1,8(a0)
894- subu a2,1
895- daddu t1,v0
896- sltu v0,t1,v0
897- mflo AT
898- mfhi t0
899- daddu t1,AT
900- daddu v0,t0
901- sltu AT,t1,AT
902- sd t1,8(a0)
903- daddu v0,AT
904- beqz a2,.L_bn_mul_add_words_return
905-
906- ld t0,16(a1)
907- dmultu t0,a3
908- ld t1,16(a0)
909- daddu t1,v0
910- sltu v0,t1,v0
911- mflo AT
912- mfhi t0
913- daddu t1,AT
914- daddu v0,t0
915- sltu AT,t1,AT
916- sd t1,16(a0)
917- daddu v0,AT
918- jr ra
919-END(bn_mul_add_words)
920-
921-.align 5
922-LEAF(bn_mul_words)
923- .set noreorder
924- bgtzl a2,.L_bn_mul_words_proceed
925- ld t0,0(a1)
926- jr ra
927- move v0,zero
928- .set reorder
929-
930-.L_bn_mul_words_proceed:
931- li MINUS4,-4
932- and ta0,a2,MINUS4
933- move v0,zero
934- beqz ta0,.L_bn_mul_words_tail
935-
936-.L_bn_mul_words_loop:
937- dmultu t0,a3
938- ld t2,8(a1)
939- ld ta0,16(a1)
940- ld ta2,24(a1)
941- mflo AT
942- mfhi t0
943- daddu v0,AT
944- sltu t1,v0,AT
945- sd v0,0(a0)
946- daddu v0,t1,t0
947-
948- dmultu t2,a3
949- subu a2,4
950- PTR_ADD a0,32
951- PTR_ADD a1,32
952- mflo AT
953- mfhi t2
954- daddu v0,AT
955- sltu t3,v0,AT
956- sd v0,-24(a0)
957- daddu v0,t3,t2
958-
959- dmultu ta0,a3
960- mflo AT
961- mfhi ta0
962- daddu v0,AT
963- sltu ta1,v0,AT
964- sd v0,-16(a0)
965- daddu v0,ta1,ta0
966-
967-
968- dmultu ta2,a3
969- and ta0,a2,MINUS4
970- mflo AT
971- mfhi ta2
972- daddu v0,AT
973- sltu ta3,v0,AT
974- sd v0,-8(a0)
975- daddu v0,ta3,ta2
976- .set noreorder
977- bgtzl ta0,.L_bn_mul_words_loop
978- ld t0,0(a1)
979-
980- bnezl a2,.L_bn_mul_words_tail
981- ld t0,0(a1)
982- .set reorder
983-
984-.L_bn_mul_words_return:
985- jr ra
986-
987-.L_bn_mul_words_tail:
988- dmultu t0,a3
989- subu a2,1
990- mflo AT
991- mfhi t0
992- daddu v0,AT
993- sltu t1,v0,AT
994- sd v0,0(a0)
995- daddu v0,t1,t0
996- beqz a2,.L_bn_mul_words_return
997-
998- ld t0,8(a1)
999- dmultu t0,a3
1000- subu a2,1
1001- mflo AT
1002- mfhi t0
1003- daddu v0,AT
1004- sltu t1,v0,AT
1005- sd v0,8(a0)
1006- daddu v0,t1,t0
1007- beqz a2,.L_bn_mul_words_return
1008-
1009- ld t0,16(a1)
1010- dmultu t0,a3
1011- mflo AT
1012- mfhi t0
1013- daddu v0,AT
1014- sltu t1,v0,AT
1015- sd v0,16(a0)
1016- daddu v0,t1,t0
1017- jr ra
1018-END(bn_mul_words)
1019-
1020-.align 5
1021-LEAF(bn_sqr_words)
1022- .set noreorder
1023- bgtzl a2,.L_bn_sqr_words_proceed
1024- ld t0,0(a1)
1025- jr ra
1026- move v0,zero
1027- .set reorder
1028-
1029-.L_bn_sqr_words_proceed:
1030- li MINUS4,-4
1031- and ta0,a2,MINUS4
1032- move v0,zero
1033- beqz ta0,.L_bn_sqr_words_tail
1034-
1035-.L_bn_sqr_words_loop:
1036- dmultu t0,t0
1037- ld t2,8(a1)
1038- ld ta0,16(a1)
1039- ld ta2,24(a1)
1040- mflo t1
1041- mfhi t0
1042- sd t1,0(a0)
1043- sd t0,8(a0)
1044-
1045- dmultu t2,t2
1046- subu a2,4
1047- PTR_ADD a0,64
1048- PTR_ADD a1,32
1049- mflo t3
1050- mfhi t2
1051- sd t3,-48(a0)
1052- sd t2,-40(a0)
1053-
1054- dmultu ta0,ta0
1055- mflo ta1
1056- mfhi ta0
1057- sd ta1,-32(a0)
1058- sd ta0,-24(a0)
1059-
1060-
1061- dmultu ta2,ta2
1062- and ta0,a2,MINUS4
1063- mflo ta3
1064- mfhi ta2
1065- sd ta3,-16(a0)
1066- sd ta2,-8(a0)
1067-
1068- .set noreorder
1069- bgtzl ta0,.L_bn_sqr_words_loop
1070- ld t0,0(a1)
1071-
1072- bnezl a2,.L_bn_sqr_words_tail
1073- ld t0,0(a1)
1074- .set reorder
1075-
1076-.L_bn_sqr_words_return:
1077- move v0,zero
1078- jr ra
1079-
1080-.L_bn_sqr_words_tail:
1081- dmultu t0,t0
1082- subu a2,1
1083- mflo t1
1084- mfhi t0
1085- sd t1,0(a0)
1086- sd t0,8(a0)
1087- beqz a2,.L_bn_sqr_words_return
1088-
1089- ld t0,8(a1)
1090- dmultu t0,t0
1091- subu a2,1
1092- mflo t1
1093- mfhi t0
1094- sd t1,16(a0)
1095- sd t0,24(a0)
1096- beqz a2,.L_bn_sqr_words_return
1097-
1098- ld t0,16(a1)
1099- dmultu t0,t0
1100- mflo t1
1101- mfhi t0
1102- sd t1,32(a0)
1103- sd t0,40(a0)
1104- jr ra
1105-END(bn_sqr_words)
1106-
1107-.align 5
1108-LEAF(bn_add_words)
1109- .set noreorder
1110- bgtzl a3,.L_bn_add_words_proceed
1111- ld t0,0(a1)
1112- jr ra
1113- move v0,zero
1114- .set reorder
1115-
1116-.L_bn_add_words_proceed:
1117- li MINUS4,-4
1118- and AT,a3,MINUS4
1119- move v0,zero
1120- beqz AT,.L_bn_add_words_tail
1121-
1122-.L_bn_add_words_loop:
1123- ld ta0,0(a2)
1124- subu a3,4
1125- ld t1,8(a1)
1126- and AT,a3,MINUS4
1127- ld t2,16(a1)
1128- PTR_ADD a2,32
1129- ld t3,24(a1)
1130- PTR_ADD a0,32
1131- ld ta1,-24(a2)
1132- PTR_ADD a1,32
1133- ld ta2,-16(a2)
1134- ld ta3,-8(a2)
1135- daddu ta0,t0
1136- sltu t8,ta0,t0
1137- daddu t0,ta0,v0
1138- sltu v0,t0,ta0
1139- sd t0,-32(a0)
1140- daddu v0,t8
1141-
1142- daddu ta1,t1
1143- sltu t9,ta1,t1
1144- daddu t1,ta1,v0
1145- sltu v0,t1,ta1
1146- sd t1,-24(a0)
1147- daddu v0,t9
1148-
1149- daddu ta2,t2
1150- sltu t8,ta2,t2
1151- daddu t2,ta2,v0
1152- sltu v0,t2,ta2
1153- sd t2,-16(a0)
1154- daddu v0,t8
1155-
1156- daddu ta3,t3
1157- sltu t9,ta3,t3
1158- daddu t3,ta3,v0
1159- sltu v0,t3,ta3
1160- sd t3,-8(a0)
1161- daddu v0,t9
1162-
1163- .set noreorder
1164- bgtzl AT,.L_bn_add_words_loop
1165- ld t0,0(a1)
1166-
1167- bnezl a3,.L_bn_add_words_tail
1168- ld t0,0(a1)
1169- .set reorder
1170-
1171-.L_bn_add_words_return:
1172- jr ra
1173-
1174-.L_bn_add_words_tail:
1175- ld ta0,0(a2)
1176- daddu ta0,t0
1177- subu a3,1
1178- sltu t8,ta0,t0
1179- daddu t0,ta0,v0
1180- sltu v0,t0,ta0
1181- sd t0,0(a0)
1182- daddu v0,t8
1183- beqz a3,.L_bn_add_words_return
1184-
1185- ld t1,8(a1)
1186- ld ta1,8(a2)
1187- daddu ta1,t1
1188- subu a3,1
1189- sltu t9,ta1,t1
1190- daddu t1,ta1,v0
1191- sltu v0,t1,ta1
1192- sd t1,8(a0)
1193- daddu v0,t9
1194- beqz a3,.L_bn_add_words_return
1195-
1196- ld t2,16(a1)
1197- ld ta2,16(a2)
1198- daddu ta2,t2
1199- sltu t8,ta2,t2
1200- daddu t2,ta2,v0
1201- sltu v0,t2,ta2
1202- sd t2,16(a0)
1203- daddu v0,t8
1204- jr ra
1205-END(bn_add_words)
1206-
1207-.align 5
1208-LEAF(bn_sub_words)
1209- .set noreorder
1210- bgtzl a3,.L_bn_sub_words_proceed
1211- ld t0,0(a1)
1212- jr ra
1213- move v0,zero
1214- .set reorder
1215-
1216-.L_bn_sub_words_proceed:
1217- li MINUS4,-4
1218- and AT,a3,MINUS4
1219- move v0,zero
1220- beqz AT,.L_bn_sub_words_tail
1221-
1222-.L_bn_sub_words_loop:
1223- ld ta0,0(a2)
1224- subu a3,4
1225- ld t1,8(a1)
1226- and AT,a3,MINUS4
1227- ld t2,16(a1)
1228- PTR_ADD a2,32
1229- ld t3,24(a1)
1230- PTR_ADD a0,32
1231- ld ta1,-24(a2)
1232- PTR_ADD a1,32
1233- ld ta2,-16(a2)
1234- ld ta3,-8(a2)
1235- sltu t8,t0,ta0
1236- dsubu t0,ta0
1237- dsubu ta0,t0,v0
1238- sd ta0,-32(a0)
1239- MOVNZ (t0,v0,t8)
1240-
1241- sltu t9,t1,ta1
1242- dsubu t1,ta1
1243- dsubu ta1,t1,v0
1244- sd ta1,-24(a0)
1245- MOVNZ (t1,v0,t9)
1246-
1247-
1248- sltu t8,t2,ta2
1249- dsubu t2,ta2
1250- dsubu ta2,t2,v0
1251- sd ta2,-16(a0)
1252- MOVNZ (t2,v0,t8)
1253-
1254- sltu t9,t3,ta3
1255- dsubu t3,ta3
1256- dsubu ta3,t3,v0
1257- sd ta3,-8(a0)
1258- MOVNZ (t3,v0,t9)
1259-
1260- .set noreorder
1261- bgtzl AT,.L_bn_sub_words_loop
1262- ld t0,0(a1)
1263-
1264- bnezl a3,.L_bn_sub_words_tail
1265- ld t0,0(a1)
1266- .set reorder
1267-
1268-.L_bn_sub_words_return:
1269- jr ra
1270-
1271-.L_bn_sub_words_tail:
1272- ld ta0,0(a2)
1273- subu a3,1
1274- sltu t8,t0,ta0
1275- dsubu t0,ta0
1276- dsubu ta0,t0,v0
1277- MOVNZ (t0,v0,t8)
1278- sd ta0,0(a0)
1279- beqz a3,.L_bn_sub_words_return
1280-
1281- ld t1,8(a1)
1282- subu a3,1
1283- ld ta1,8(a2)
1284- sltu t9,t1,ta1
1285- dsubu t1,ta1
1286- dsubu ta1,t1,v0
1287- MOVNZ (t1,v0,t9)
1288- sd ta1,8(a0)
1289- beqz a3,.L_bn_sub_words_return
1290-
1291- ld t2,16(a1)
1292- ld ta2,16(a2)
1293- sltu t8,t2,ta2
1294- dsubu t2,ta2
1295- dsubu ta2,t2,v0
1296- MOVNZ (t2,v0,t8)
1297- sd ta2,16(a0)
1298- jr ra
1299-END(bn_sub_words)
1300-
1301-#undef MINUS4
1302-
1303-.align 5
1304-LEAF(bn_div_3_words)
1305- .set reorder
1306- move a3,a0 /* we know that bn_div_words doesn't
1307- * touch a3, ta2, ta3 and preserves a2
1308- * so that we can save two arguments
1309- * and return address in registers
1310- * instead of stack:-)
1311- */
1312- ld a0,(a3)
1313- move ta2,a1
1314- ld a1,-8(a3)
1315- bne a0,a2,.L_bn_div_3_words_proceed
1316- li v0,-1
1317- jr ra
1318-.L_bn_div_3_words_proceed:
1319- move ta3,ra
1320- bal bn_div_words
1321- move ra,ta3
1322- dmultu ta2,v0
1323- ld t2,-16(a3)
1324- move ta0,zero
1325- mfhi t1
1326- mflo t0
1327- sltu t8,t1,v1
1328-.L_bn_div_3_words_inner_loop:
1329- bnez t8,.L_bn_div_3_words_inner_loop_done
1330- sgeu AT,t2,t0
1331- seq t9,t1,v1
1332- and AT,t9
1333- sltu t3,t0,ta2
1334- daddu v1,a2
1335- dsubu t1,t3
1336- dsubu t0,ta2
1337- sltu t8,t1,v1
1338- sltu ta0,v1,a2
1339- or t8,ta0
1340- .set noreorder
1341- beqzl AT,.L_bn_div_3_words_inner_loop
1342- dsubu v0,1
1343- .set reorder
1344-.L_bn_div_3_words_inner_loop_done:
1345- jr ra
1346-END(bn_div_3_words)
1347-
1348-.align 5
1349-LEAF(bn_div_words)
1350- .set noreorder
1351- bnezl a2,.L_bn_div_words_proceed
1352- move v1,zero
1353- jr ra
1354- li v0,-1 /* I'd rather signal div-by-zero
1355- * which can be done with 'break 7' */
1356-
1357-.L_bn_div_words_proceed:
1358- bltz a2,.L_bn_div_words_body
1359- move t9,v1
1360- dsll a2,1
1361- bgtz a2,.-4
1362- addu t9,1
1363-
1364- .set reorder
1365- negu t1,t9
1366- li t2,-1
1367- dsll t2,t1
1368- and t2,a0
1369- dsrl AT,a1,t1
1370- .set noreorder
1371- bnezl t2,.+8
1372- break 6 /* signal overflow */
1373- .set reorder
1374- dsll a0,t9
1375- dsll a1,t9
1376- or a0,AT
1377-
1378-#define QT ta0
1379-#define HH ta1
1380-#define DH v1
1381-.L_bn_div_words_body:
1382- dsrl DH,a2,32
1383- sgeu AT,a0,a2
1384- .set noreorder
1385- bnezl AT,.+8
1386- dsubu a0,a2
1387- .set reorder
1388-
1389- li QT,-1
1390- dsrl HH,a0,32
1391- dsrl QT,32 /* q=0xffffffff */
1392- beq DH,HH,.L_bn_div_words_skip_div1
1393- ddivu zero,a0,DH
1394- mflo QT
1395-.L_bn_div_words_skip_div1:
1396- dmultu a2,QT
1397- dsll t3,a0,32
1398- dsrl AT,a1,32
1399- or t3,AT
1400- mflo t0
1401- mfhi t1
1402-.L_bn_div_words_inner_loop1:
1403- sltu t2,t3,t0
1404- seq t8,HH,t1
1405- sltu AT,HH,t1
1406- and t2,t8
1407- sltu v0,t0,a2
1408- or AT,t2
1409- .set noreorder
1410- beqz AT,.L_bn_div_words_inner_loop1_done
1411- dsubu t1,v0
1412- dsubu t0,a2
1413- b .L_bn_div_words_inner_loop1
1414- dsubu QT,1
1415- .set reorder
1416-.L_bn_div_words_inner_loop1_done:
1417-
1418- dsll a1,32
1419- dsubu a0,t3,t0
1420- dsll v0,QT,32
1421-
1422- li QT,-1
1423- dsrl HH,a0,32
1424- dsrl QT,32 /* q=0xffffffff */
1425- beq DH,HH,.L_bn_div_words_skip_div2
1426- ddivu zero,a0,DH
1427- mflo QT
1428-.L_bn_div_words_skip_div2:
1429-#undef DH
1430- dmultu a2,QT
1431- dsll t3,a0,32
1432- dsrl AT,a1,32
1433- or t3,AT
1434- mflo t0
1435- mfhi t1
1436-.L_bn_div_words_inner_loop2:
1437- sltu t2,t3,t0
1438- seq t8,HH,t1
1439- sltu AT,HH,t1
1440- and t2,t8
1441- sltu v1,t0,a2
1442- or AT,t2
1443- .set noreorder
1444- beqz AT,.L_bn_div_words_inner_loop2_done
1445- dsubu t1,v1
1446- dsubu t0,a2
1447- b .L_bn_div_words_inner_loop2
1448- dsubu QT,1
1449- .set reorder
1450-.L_bn_div_words_inner_loop2_done:
1451-#undef HH
1452-
1453- dsubu a0,t3,t0
1454- or v0,QT
1455- dsrl v1,a0,t9 /* v1 contains remainder if anybody wants it */
1456- dsrl a2,t9 /* restore a2 */
1457- jr ra
1458-#undef QT
1459-END(bn_div_words)
1460-
1461-#define a_0 t0
1462-#define a_1 t1
1463-#define a_2 t2
1464-#define a_3 t3
1465-#define b_0 ta0
1466-#define b_1 ta1
1467-#define b_2 ta2
1468-#define b_3 ta3
1469-
1470-#define a_4 s0
1471-#define a_5 s2
1472-#define a_6 s4
1473-#define a_7 a1 /* once we load a[7] we don't need a anymore */
1474-#define b_4 s1
1475-#define b_5 s3
1476-#define b_6 s5
1477-#define b_7 a2 /* once we load b[7] we don't need b anymore */
1478-
1479-#define t_1 t8
1480-#define t_2 t9
1481-
1482-#define c_1 v0
1483-#define c_2 v1
1484-#define c_3 a3
1485-
1486-#define FRAME_SIZE 48
1487-
1488-.align 5
1489-LEAF(bn_mul_comba8)
1490- .set noreorder
1491- PTR_SUB sp,FRAME_SIZE
1492- .frame sp,64,ra
1493- .set reorder
1494- ld a_0,0(a1) /* If compiled with -mips3 option on
1495- * R5000 box assembler barks on this
1496- * line with "shouldn't have mult/div
1497- * as last instruction in bb (R10K
1498- * bug)" warning. If anybody out there
1499- * has a clue about how to circumvent
1500- * this do send me a note.
1501- * <appro@fy.chalmers.se>
1502- */
1503- ld b_0,0(a2)
1504- ld a_1,8(a1)
1505- ld a_2,16(a1)
1506- ld a_3,24(a1)
1507- ld b_1,8(a2)
1508- ld b_2,16(a2)
1509- ld b_3,24(a2)
1510- dmultu a_0,b_0 /* mul_add_c(a[0],b[0],c1,c2,c3); */
1511- sd s0,0(sp)
1512- sd s1,8(sp)
1513- sd s2,16(sp)
1514- sd s3,24(sp)
1515- sd s4,32(sp)
1516- sd s5,40(sp)
1517- mflo c_1
1518- mfhi c_2
1519-
1520- dmultu a_0,b_1 /* mul_add_c(a[0],b[1],c2,c3,c1); */
1521- ld a_4,32(a1)
1522- ld a_5,40(a1)
1523- ld a_6,48(a1)
1524- ld a_7,56(a1)
1525- ld b_4,32(a2)
1526- ld b_5,40(a2)
1527- mflo t_1
1528- mfhi t_2
1529- daddu c_2,t_1
1530- sltu AT,c_2,t_1
1531- daddu c_3,t_2,AT
1532- dmultu a_1,b_0 /* mul_add_c(a[1],b[0],c2,c3,c1); */
1533- ld b_6,48(a2)
1534- ld b_7,56(a2)
1535- sd c_1,0(a0) /* r[0]=c1; */
1536- mflo t_1
1537- mfhi t_2
1538- daddu c_2,t_1
1539- sltu AT,c_2,t_1
1540- daddu t_2,AT
1541- daddu c_3,t_2
1542- sltu c_1,c_3,t_2
1543- sd c_2,8(a0) /* r[1]=c2; */
1544-
1545- dmultu a_2,b_0 /* mul_add_c(a[2],b[0],c3,c1,c2); */
1546- mflo t_1
1547- mfhi t_2
1548- daddu c_3,t_1
1549- sltu AT,c_3,t_1
1550- daddu t_2,AT
1551- daddu c_1,t_2
1552- dmultu a_1,b_1 /* mul_add_c(a[1],b[1],c3,c1,c2); */
1553- mflo t_1
1554- mfhi t_2
1555- daddu c_3,t_1
1556- sltu AT,c_3,t_1
1557- daddu t_2,AT
1558- daddu c_1,t_2
1559- sltu c_2,c_1,t_2
1560- dmultu a_0,b_2 /* mul_add_c(a[0],b[2],c3,c1,c2); */
1561- mflo t_1
1562- mfhi t_2
1563- daddu c_3,t_1
1564- sltu AT,c_3,t_1
1565- daddu t_2,AT
1566- daddu c_1,t_2
1567- sltu AT,c_1,t_2
1568- daddu c_2,AT
1569- sd c_3,16(a0) /* r[2]=c3; */
1570-
1571- dmultu a_0,b_3 /* mul_add_c(a[0],b[3],c1,c2,c3); */
1572- mflo t_1
1573- mfhi t_2
1574- daddu c_1,t_1
1575- sltu AT,c_1,t_1
1576- daddu t_2,AT
1577- daddu c_2,t_2
1578- sltu c_3,c_2,t_2
1579- dmultu a_1,b_2 /* mul_add_c(a[1],b[2],c1,c2,c3); */
1580- mflo t_1
1581- mfhi t_2
1582- daddu c_1,t_1
1583- sltu AT,c_1,t_1
1584- daddu t_2,AT
1585- daddu c_2,t_2
1586- sltu AT,c_2,t_2
1587- daddu c_3,AT
1588- dmultu a_2,b_1 /* mul_add_c(a[2],b[1],c1,c2,c3); */
1589- mflo t_1
1590- mfhi t_2
1591- daddu c_1,t_1
1592- sltu AT,c_1,t_1
1593- daddu t_2,AT
1594- daddu c_2,t_2
1595- sltu AT,c_2,t_2
1596- daddu c_3,AT
1597- dmultu a_3,b_0 /* mul_add_c(a[3],b[0],c1,c2,c3); */
1598- mflo t_1
1599- mfhi t_2
1600- daddu c_1,t_1
1601- sltu AT,c_1,t_1
1602- daddu t_2,AT
1603- daddu c_2,t_2
1604- sltu AT,c_2,t_2
1605- daddu c_3,AT
1606- sd c_1,24(a0) /* r[3]=c1; */
1607-
1608- dmultu a_4,b_0 /* mul_add_c(a[4],b[0],c2,c3,c1); */
1609- mflo t_1
1610- mfhi t_2
1611- daddu c_2,t_1
1612- sltu AT,c_2,t_1
1613- daddu t_2,AT
1614- daddu c_3,t_2
1615- sltu c_1,c_3,t_2
1616- dmultu a_3,b_1 /* mul_add_c(a[3],b[1],c2,c3,c1); */
1617- mflo t_1
1618- mfhi t_2
1619- daddu c_2,t_1
1620- sltu AT,c_2,t_1
1621- daddu t_2,AT
1622- daddu c_3,t_2
1623- sltu AT,c_3,t_2
1624- daddu c_1,AT
1625- dmultu a_2,b_2 /* mul_add_c(a[2],b[2],c2,c3,c1); */
1626- mflo t_1
1627- mfhi t_2
1628- daddu c_2,t_1
1629- sltu AT,c_2,t_1
1630- daddu t_2,AT
1631- daddu c_3,t_2
1632- sltu AT,c_3,t_2
1633- daddu c_1,AT
1634- dmultu a_1,b_3 /* mul_add_c(a[1],b[3],c2,c3,c1); */
1635- mflo t_1
1636- mfhi t_2
1637- daddu c_2,t_1
1638- sltu AT,c_2,t_1
1639- daddu t_2,AT
1640- daddu c_3,t_2
1641- sltu AT,c_3,t_2
1642- daddu c_1,AT
1643- dmultu a_0,b_4 /* mul_add_c(a[0],b[4],c2,c3,c1); */
1644- mflo t_1
1645- mfhi t_2
1646- daddu c_2,t_1
1647- sltu AT,c_2,t_1
1648- daddu t_2,AT
1649- daddu c_3,t_2
1650- sltu AT,c_3,t_2
1651- daddu c_1,AT
1652- sd c_2,32(a0) /* r[4]=c2; */
1653-
1654- dmultu a_0,b_5 /* mul_add_c(a[0],b[5],c3,c1,c2); */
1655- mflo t_1
1656- mfhi t_2
1657- daddu c_3,t_1
1658- sltu AT,c_3,t_1
1659- daddu t_2,AT
1660- daddu c_1,t_2
1661- sltu c_2,c_1,t_2
1662- dmultu a_1,b_4 /* mul_add_c(a[1],b[4],c3,c1,c2); */
1663- mflo t_1
1664- mfhi t_2
1665- daddu c_3,t_1
1666- sltu AT,c_3,t_1
1667- daddu t_2,AT
1668- daddu c_1,t_2
1669- sltu AT,c_1,t_2
1670- daddu c_2,AT
1671- dmultu a_2,b_3 /* mul_add_c(a[2],b[3],c3,c1,c2); */
1672- mflo t_1
1673- mfhi t_2
1674- daddu c_3,t_1
1675- sltu AT,c_3,t_1
1676- daddu t_2,AT
1677- daddu c_1,t_2
1678- sltu AT,c_1,t_2
1679- daddu c_2,AT
1680- dmultu a_3,b_2 /* mul_add_c(a[3],b[2],c3,c1,c2); */
1681- mflo t_1
1682- mfhi t_2
1683- daddu c_3,t_1
1684- sltu AT,c_3,t_1
1685- daddu t_2,AT
1686- daddu c_1,t_2
1687- sltu AT,c_1,t_2
1688- daddu c_2,AT
1689- dmultu a_4,b_1 /* mul_add_c(a[4],b[1],c3,c1,c2); */
1690- mflo t_1
1691- mfhi t_2
1692- daddu c_3,t_1
1693- sltu AT,c_3,t_1
1694- daddu t_2,AT
1695- daddu c_1,t_2
1696- sltu AT,c_1,t_2
1697- daddu c_2,AT
1698- dmultu a_5,b_0 /* mul_add_c(a[5],b[0],c3,c1,c2); */
1699- mflo t_1
1700- mfhi t_2
1701- daddu c_3,t_1
1702- sltu AT,c_3,t_1
1703- daddu t_2,AT
1704- daddu c_1,t_2
1705- sltu AT,c_1,t_2
1706- daddu c_2,AT
1707- sd c_3,40(a0) /* r[5]=c3; */
1708-
1709- dmultu a_6,b_0 /* mul_add_c(a[6],b[0],c1,c2,c3); */
1710- mflo t_1
1711- mfhi t_2
1712- daddu c_1,t_1
1713- sltu AT,c_1,t_1
1714- daddu t_2,AT
1715- daddu c_2,t_2
1716- sltu c_3,c_2,t_2
1717- dmultu a_5,b_1 /* mul_add_c(a[5],b[1],c1,c2,c3); */
1718- mflo t_1
1719- mfhi t_2
1720- daddu c_1,t_1
1721- sltu AT,c_1,t_1
1722- daddu t_2,AT
1723- daddu c_2,t_2
1724- sltu AT,c_2,t_2
1725- daddu c_3,AT
1726- dmultu a_4,b_2 /* mul_add_c(a[4],b[2],c1,c2,c3); */
1727- mflo t_1
1728- mfhi t_2
1729- daddu c_1,t_1
1730- sltu AT,c_1,t_1
1731- daddu t_2,AT
1732- daddu c_2,t_2
1733- sltu AT,c_2,t_2
1734- daddu c_3,AT
1735- dmultu a_3,b_3 /* mul_add_c(a[3],b[3],c1,c2,c3); */
1736- mflo t_1
1737- mfhi t_2
1738- daddu c_1,t_1
1739- sltu AT,c_1,t_1
1740- daddu t_2,AT
1741- daddu c_2,t_2
1742- sltu AT,c_2,t_2
1743- daddu c_3,AT
1744- dmultu a_2,b_4 /* mul_add_c(a[2],b[4],c1,c2,c3); */
1745- mflo t_1
1746- mfhi t_2
1747- daddu c_1,t_1
1748- sltu AT,c_1,t_1
1749- daddu t_2,AT
1750- daddu c_2,t_2
1751- sltu AT,c_2,t_2
1752- daddu c_3,AT
1753- dmultu a_1,b_5 /* mul_add_c(a[1],b[5],c1,c2,c3); */
1754- mflo t_1
1755- mfhi t_2
1756- daddu c_1,t_1
1757- sltu AT,c_1,t_1
1758- daddu t_2,AT
1759- daddu c_2,t_2
1760- sltu AT,c_2,t_2
1761- daddu c_3,AT
1762- dmultu a_0,b_6 /* mul_add_c(a[0],b[6],c1,c2,c3); */
1763- mflo t_1
1764- mfhi t_2
1765- daddu c_1,t_1
1766- sltu AT,c_1,t_1
1767- daddu t_2,AT
1768- daddu c_2,t_2
1769- sltu AT,c_2,t_2
1770- daddu c_3,AT
1771- sd c_1,48(a0) /* r[6]=c1; */
1772-
1773- dmultu a_0,b_7 /* mul_add_c(a[0],b[7],c2,c3,c1); */
1774- mflo t_1
1775- mfhi t_2
1776- daddu c_2,t_1
1777- sltu AT,c_2,t_1
1778- daddu t_2,AT
1779- daddu c_3,t_2
1780- sltu c_1,c_3,t_2
1781- dmultu a_1,b_6 /* mul_add_c(a[1],b[6],c2,c3,c1); */
1782- mflo t_1
1783- mfhi t_2
1784- daddu c_2,t_1
1785- sltu AT,c_2,t_1
1786- daddu t_2,AT
1787- daddu c_3,t_2
1788- sltu AT,c_3,t_2
1789- daddu c_1,AT
1790- dmultu a_2,b_5 /* mul_add_c(a[2],b[5],c2,c3,c1); */
1791- mflo t_1
1792- mfhi t_2
1793- daddu c_2,t_1
1794- sltu AT,c_2,t_1
1795- daddu t_2,AT
1796- daddu c_3,t_2
1797- sltu AT,c_3,t_2
1798- daddu c_1,AT
1799- dmultu a_3,b_4 /* mul_add_c(a[3],b[4],c2,c3,c1); */
1800- mflo t_1
1801- mfhi t_2
1802- daddu c_2,t_1
1803- sltu AT,c_2,t_1
1804- daddu t_2,AT
1805- daddu c_3,t_2
1806- sltu AT,c_3,t_2
1807- daddu c_1,AT
1808- dmultu a_4,b_3 /* mul_add_c(a[4],b[3],c2,c3,c1); */
1809- mflo t_1
1810- mfhi t_2
1811- daddu c_2,t_1
1812- sltu AT,c_2,t_1
1813- daddu t_2,AT
1814- daddu c_3,t_2
1815- sltu AT,c_3,t_2
1816- daddu c_1,AT
1817- dmultu a_5,b_2 /* mul_add_c(a[5],b[2],c2,c3,c1); */
1818- mflo t_1
1819- mfhi t_2
1820- daddu c_2,t_1
1821- sltu AT,c_2,t_1
1822- daddu t_2,AT
1823- daddu c_3,t_2
1824- sltu AT,c_3,t_2
1825- daddu c_1,AT
1826- dmultu a_6,b_1 /* mul_add_c(a[6],b[1],c2,c3,c1); */
1827- mflo t_1
1828- mfhi t_2
1829- daddu c_2,t_1
1830- sltu AT,c_2,t_1
1831- daddu t_2,AT
1832- daddu c_3,t_2
1833- sltu AT,c_3,t_2
1834- daddu c_1,AT
1835- dmultu a_7,b_0 /* mul_add_c(a[7],b[0],c2,c3,c1); */
1836- mflo t_1
1837- mfhi t_2
1838- daddu c_2,t_1
1839- sltu AT,c_2,t_1
1840- daddu t_2,AT
1841- daddu c_3,t_2
1842- sltu AT,c_3,t_2
1843- daddu c_1,AT
1844- sd c_2,56(a0) /* r[7]=c2; */
1845-
1846- dmultu a_7,b_1 /* mul_add_c(a[7],b[1],c3,c1,c2); */
1847- mflo t_1
1848- mfhi t_2
1849- daddu c_3,t_1
1850- sltu AT,c_3,t_1
1851- daddu t_2,AT
1852- daddu c_1,t_2
1853- sltu c_2,c_1,t_2
1854- dmultu a_6,b_2 /* mul_add_c(a[6],b[2],c3,c1,c2); */
1855- mflo t_1
1856- mfhi t_2
1857- daddu c_3,t_1
1858- sltu AT,c_3,t_1
1859- daddu t_2,AT
1860- daddu c_1,t_2
1861- sltu AT,c_1,t_2
1862- daddu c_2,AT
1863- dmultu a_5,b_3 /* mul_add_c(a[5],b[3],c3,c1,c2); */
1864- mflo t_1
1865- mfhi t_2
1866- daddu c_3,t_1
1867- sltu AT,c_3,t_1
1868- daddu t_2,AT
1869- daddu c_1,t_2
1870- sltu AT,c_1,t_2
1871- daddu c_2,AT
1872- dmultu a_4,b_4 /* mul_add_c(a[4],b[4],c3,c1,c2); */
1873- mflo t_1
1874- mfhi t_2
1875- daddu c_3,t_1
1876- sltu AT,c_3,t_1
1877- daddu t_2,AT
1878- daddu c_1,t_2
1879- sltu AT,c_1,t_2
1880- daddu c_2,AT
1881- dmultu a_3,b_5 /* mul_add_c(a[3],b[5],c3,c1,c2); */
1882- mflo t_1
1883- mfhi t_2
1884- daddu c_3,t_1
1885- sltu AT,c_3,t_1
1886- daddu t_2,AT
1887- daddu c_1,t_2
1888- sltu AT,c_1,t_2
1889- daddu c_2,AT
1890- dmultu a_2,b_6 /* mul_add_c(a[2],b[6],c3,c1,c2); */
1891- mflo t_1
1892- mfhi t_2
1893- daddu c_3,t_1
1894- sltu AT,c_3,t_1
1895- daddu t_2,AT
1896- daddu c_1,t_2
1897- sltu AT,c_1,t_2
1898- daddu c_2,AT
1899- dmultu a_1,b_7 /* mul_add_c(a[1],b[7],c3,c1,c2); */
1900- mflo t_1
1901- mfhi t_2
1902- daddu c_3,t_1
1903- sltu AT,c_3,t_1
1904- daddu t_2,AT
1905- daddu c_1,t_2
1906- sltu AT,c_1,t_2
1907- daddu c_2,AT
1908- sd c_3,64(a0) /* r[8]=c3; */
1909-
1910- dmultu a_2,b_7 /* mul_add_c(a[2],b[7],c1,c2,c3); */
1911- mflo t_1
1912- mfhi t_2
1913- daddu c_1,t_1
1914- sltu AT,c_1,t_1
1915- daddu t_2,AT
1916- daddu c_2,t_2
1917- sltu c_3,c_2,t_2
1918- dmultu a_3,b_6 /* mul_add_c(a[3],b[6],c1,c2,c3); */
1919- mflo t_1
1920- mfhi t_2
1921- daddu c_1,t_1
1922- sltu AT,c_1,t_1
1923- daddu t_2,AT
1924- daddu c_2,t_2
1925- sltu AT,c_2,t_2
1926- daddu c_3,AT
1927- dmultu a_4,b_5 /* mul_add_c(a[4],b[5],c1,c2,c3); */
1928- mflo t_1
1929- mfhi t_2
1930- daddu c_1,t_1
1931- sltu AT,c_1,t_1
1932- daddu t_2,AT
1933- daddu c_2,t_2
1934- sltu AT,c_2,t_2
1935- daddu c_3,AT
1936- dmultu a_5,b_4 /* mul_add_c(a[5],b[4],c1,c2,c3); */
1937- mflo t_1
1938- mfhi t_2
1939- daddu c_1,t_1
1940- sltu AT,c_1,t_1
1941- daddu t_2,AT
1942- daddu c_2,t_2
1943- sltu AT,c_2,t_2
1944- daddu c_3,AT
1945- dmultu a_6,b_3 /* mul_add_c(a[6],b[3],c1,c2,c3); */
1946- mflo t_1
1947- mfhi t_2
1948- daddu c_1,t_1
1949- sltu AT,c_1,t_1
1950- daddu t_2,AT
1951- daddu c_2,t_2
1952- sltu AT,c_2,t_2
1953- daddu c_3,AT
1954- dmultu a_7,b_2 /* mul_add_c(a[7],b[2],c1,c2,c3); */
1955- mflo t_1
1956- mfhi t_2
1957- daddu c_1,t_1
1958- sltu AT,c_1,t_1
1959- daddu t_2,AT
1960- daddu c_2,t_2
1961- sltu AT,c_2,t_2
1962- daddu c_3,AT
1963- sd c_1,72(a0) /* r[9]=c1; */
1964-
1965- dmultu a_7,b_3 /* mul_add_c(a[7],b[3],c2,c3,c1); */
1966- mflo t_1
1967- mfhi t_2
1968- daddu c_2,t_1
1969- sltu AT,c_2,t_1
1970- daddu t_2,AT
1971- daddu c_3,t_2
1972- sltu c_1,c_3,t_2
1973- dmultu a_6,b_4 /* mul_add_c(a[6],b[4],c2,c3,c1); */
1974- mflo t_1
1975- mfhi t_2
1976- daddu c_2,t_1
1977- sltu AT,c_2,t_1
1978- daddu t_2,AT
1979- daddu c_3,t_2
1980- sltu AT,c_3,t_2
1981- daddu c_1,AT
1982- dmultu a_5,b_5 /* mul_add_c(a[5],b[5],c2,c3,c1); */
1983- mflo t_1
1984- mfhi t_2
1985- daddu c_2,t_1
1986- sltu AT,c_2,t_1
1987- daddu t_2,AT
1988- daddu c_3,t_2
1989- sltu AT,c_3,t_2
1990- daddu c_1,AT
1991- dmultu a_4,b_6 /* mul_add_c(a[4],b[6],c2,c3,c1); */
1992- mflo t_1
1993- mfhi t_2
1994- daddu c_2,t_1
1995- sltu AT,c_2,t_1
1996- daddu t_2,AT
1997- daddu c_3,t_2
1998- sltu AT,c_3,t_2
1999- daddu c_1,AT
2000- dmultu a_3,b_7 /* mul_add_c(a[3],b[7],c2,c3,c1); */
2001- mflo t_1
2002- mfhi t_2
2003- daddu c_2,t_1
2004- sltu AT,c_2,t_1
2005- daddu t_2,AT
2006- daddu c_3,t_2
2007- sltu AT,c_3,t_2
2008- daddu c_1,AT
2009- sd c_2,80(a0) /* r[10]=c2; */
2010-
2011- dmultu a_4,b_7 /* mul_add_c(a[4],b[7],c3,c1,c2); */
2012- mflo t_1
2013- mfhi t_2
2014- daddu c_3,t_1
2015- sltu AT,c_3,t_1
2016- daddu t_2,AT
2017- daddu c_1,t_2
2018- sltu c_2,c_1,t_2
2019- dmultu a_5,b_6 /* mul_add_c(a[5],b[6],c3,c1,c2); */
2020- mflo t_1
2021- mfhi t_2
2022- daddu c_3,t_1
2023- sltu AT,c_3,t_1
2024- daddu t_2,AT
2025- daddu c_1,t_2
2026- sltu AT,c_1,t_2
2027- daddu c_2,AT
2028- dmultu a_6,b_5 /* mul_add_c(a[6],b[5],c3,c1,c2); */
2029- mflo t_1
2030- mfhi t_2
2031- daddu c_3,t_1
2032- sltu AT,c_3,t_1
2033- daddu t_2,AT
2034- daddu c_1,t_2
2035- sltu AT,c_1,t_2
2036- daddu c_2,AT
2037- dmultu a_7,b_4 /* mul_add_c(a[7],b[4],c3,c1,c2); */
2038- mflo t_1
2039- mfhi t_2
2040- daddu c_3,t_1
2041- sltu AT,c_3,t_1
2042- daddu t_2,AT
2043- daddu c_1,t_2
2044- sltu AT,c_1,t_2
2045- daddu c_2,AT
2046- sd c_3,88(a0) /* r[11]=c3; */
2047-
2048- dmultu a_7,b_5 /* mul_add_c(a[7],b[5],c1,c2,c3); */
2049- mflo t_1
2050- mfhi t_2
2051- daddu c_1,t_1
2052- sltu AT,c_1,t_1
2053- daddu t_2,AT
2054- daddu c_2,t_2
2055- sltu c_3,c_2,t_2
2056- dmultu a_6,b_6 /* mul_add_c(a[6],b[6],c1,c2,c3); */
2057- mflo t_1
2058- mfhi t_2
2059- daddu c_1,t_1
2060- sltu AT,c_1,t_1
2061- daddu t_2,AT
2062- daddu c_2,t_2
2063- sltu AT,c_2,t_2
2064- daddu c_3,AT
2065- dmultu a_5,b_7 /* mul_add_c(a[5],b[7],c1,c2,c3); */
2066- mflo t_1
2067- mfhi t_2
2068- daddu c_1,t_1
2069- sltu AT,c_1,t_1
2070- daddu t_2,AT
2071- daddu c_2,t_2
2072- sltu AT,c_2,t_2
2073- daddu c_3,AT
2074- sd c_1,96(a0) /* r[12]=c1; */
2075-
2076- dmultu a_6,b_7 /* mul_add_c(a[6],b[7],c2,c3,c1); */
2077- mflo t_1
2078- mfhi t_2
2079- daddu c_2,t_1
2080- sltu AT,c_2,t_1
2081- daddu t_2,AT
2082- daddu c_3,t_2
2083- sltu c_1,c_3,t_2
2084- dmultu a_7,b_6 /* mul_add_c(a[7],b[6],c2,c3,c1); */
2085- mflo t_1
2086- mfhi t_2
2087- daddu c_2,t_1
2088- sltu AT,c_2,t_1
2089- daddu t_2,AT
2090- daddu c_3,t_2
2091- sltu AT,c_3,t_2
2092- daddu c_1,AT
2093- sd c_2,104(a0) /* r[13]=c2; */
2094-
2095- dmultu a_7,b_7 /* mul_add_c(a[7],b[7],c3,c1,c2); */
2096- ld s0,0(sp)
2097- ld s1,8(sp)
2098- ld s2,16(sp)
2099- ld s3,24(sp)
2100- ld s4,32(sp)
2101- ld s5,40(sp)
2102- mflo t_1
2103- mfhi t_2
2104- daddu c_3,t_1
2105- sltu AT,c_3,t_1
2106- daddu t_2,AT
2107- daddu c_1,t_2
2108- sd c_3,112(a0) /* r[14]=c3; */
2109- sd c_1,120(a0) /* r[15]=c1; */
2110-
2111- PTR_ADD sp,FRAME_SIZE
2112-
2113- jr ra
2114-END(bn_mul_comba8)
2115-
2116-.align 5
2117-LEAF(bn_mul_comba4)
2118- .set reorder
2119- ld a_0,0(a1)
2120- ld b_0,0(a2)
2121- ld a_1,8(a1)
2122- ld a_2,16(a1)
2123- dmultu a_0,b_0 /* mul_add_c(a[0],b[0],c1,c2,c3); */
2124- ld a_3,24(a1)
2125- ld b_1,8(a2)
2126- ld b_2,16(a2)
2127- ld b_3,24(a2)
2128- mflo c_1
2129- mfhi c_2
2130- sd c_1,0(a0)
2131-
2132- dmultu a_0,b_1 /* mul_add_c(a[0],b[1],c2,c3,c1); */
2133- mflo t_1
2134- mfhi t_2
2135- daddu c_2,t_1
2136- sltu AT,c_2,t_1
2137- daddu c_3,t_2,AT
2138- dmultu a_1,b_0 /* mul_add_c(a[1],b[0],c2,c3,c1); */
2139- mflo t_1
2140- mfhi t_2
2141- daddu c_2,t_1
2142- sltu AT,c_2,t_1
2143- daddu t_2,AT
2144- daddu c_3,t_2
2145- sltu c_1,c_3,t_2
2146- sd c_2,8(a0)
2147-
2148- dmultu a_2,b_0 /* mul_add_c(a[2],b[0],c3,c1,c2); */
2149- mflo t_1
2150- mfhi t_2
2151- daddu c_3,t_1
2152- sltu AT,c_3,t_1
2153- daddu t_2,AT
2154- daddu c_1,t_2
2155- dmultu a_1,b_1 /* mul_add_c(a[1],b[1],c3,c1,c2); */
2156- mflo t_1
2157- mfhi t_2
2158- daddu c_3,t_1
2159- sltu AT,c_3,t_1
2160- daddu t_2,AT
2161- daddu c_1,t_2
2162- sltu c_2,c_1,t_2
2163- dmultu a_0,b_2 /* mul_add_c(a[0],b[2],c3,c1,c2); */
2164- mflo t_1
2165- mfhi t_2
2166- daddu c_3,t_1
2167- sltu AT,c_3,t_1
2168- daddu t_2,AT
2169- daddu c_1,t_2
2170- sltu AT,c_1,t_2
2171- daddu c_2,AT
2172- sd c_3,16(a0)
2173-
2174- dmultu a_0,b_3 /* mul_add_c(a[0],b[3],c1,c2,c3); */
2175- mflo t_1
2176- mfhi t_2
2177- daddu c_1,t_1
2178- sltu AT,c_1,t_1
2179- daddu t_2,AT
2180- daddu c_2,t_2
2181- sltu c_3,c_2,t_2
2182- dmultu a_1,b_2 /* mul_add_c(a[1],b[2],c1,c2,c3); */
2183- mflo t_1
2184- mfhi t_2
2185- daddu c_1,t_1
2186- sltu AT,c_1,t_1
2187- daddu t_2,AT
2188- daddu c_2,t_2
2189- sltu AT,c_2,t_2
2190- daddu c_3,AT
2191- dmultu a_2,b_1 /* mul_add_c(a[2],b[1],c1,c2,c3); */
2192- mflo t_1
2193- mfhi t_2
2194- daddu c_1,t_1
2195- sltu AT,c_1,t_1
2196- daddu t_2,AT
2197- daddu c_2,t_2
2198- sltu AT,c_2,t_2
2199- daddu c_3,AT
2200- dmultu a_3,b_0 /* mul_add_c(a[3],b[0],c1,c2,c3); */
2201- mflo t_1
2202- mfhi t_2
2203- daddu c_1,t_1
2204- sltu AT,c_1,t_1
2205- daddu t_2,AT
2206- daddu c_2,t_2
2207- sltu AT,c_2,t_2
2208- daddu c_3,AT
2209- sd c_1,24(a0)
2210-
2211- dmultu a_3,b_1 /* mul_add_c(a[3],b[1],c2,c3,c1); */
2212- mflo t_1
2213- mfhi t_2
2214- daddu c_2,t_1
2215- sltu AT,c_2,t_1
2216- daddu t_2,AT
2217- daddu c_3,t_2
2218- sltu c_1,c_3,t_2
2219- dmultu a_2,b_2 /* mul_add_c(a[2],b[2],c2,c3,c1); */
2220- mflo t_1
2221- mfhi t_2
2222- daddu c_2,t_1
2223- sltu AT,c_2,t_1
2224- daddu t_2,AT
2225- daddu c_3,t_2
2226- sltu AT,c_3,t_2
2227- daddu c_1,AT
2228- dmultu a_1,b_3 /* mul_add_c(a[1],b[3],c2,c3,c1); */
2229- mflo t_1
2230- mfhi t_2
2231- daddu c_2,t_1
2232- sltu AT,c_2,t_1
2233- daddu t_2,AT
2234- daddu c_3,t_2
2235- sltu AT,c_3,t_2
2236- daddu c_1,AT
2237- sd c_2,32(a0)
2238-
2239- dmultu a_2,b_3 /* mul_add_c(a[2],b[3],c3,c1,c2); */
2240- mflo t_1
2241- mfhi t_2
2242- daddu c_3,t_1
2243- sltu AT,c_3,t_1
2244- daddu t_2,AT
2245- daddu c_1,t_2
2246- sltu c_2,c_1,t_2
2247- dmultu a_3,b_2 /* mul_add_c(a[3],b[2],c3,c1,c2); */
2248- mflo t_1
2249- mfhi t_2
2250- daddu c_3,t_1
2251- sltu AT,c_3,t_1
2252- daddu t_2,AT
2253- daddu c_1,t_2
2254- sltu AT,c_1,t_2
2255- daddu c_2,AT
2256- sd c_3,40(a0)
2257-
2258- dmultu a_3,b_3 /* mul_add_c(a[3],b[3],c1,c2,c3); */
2259- mflo t_1
2260- mfhi t_2
2261- daddu c_1,t_1
2262- sltu AT,c_1,t_1
2263- daddu t_2,AT
2264- daddu c_2,t_2
2265- sd c_1,48(a0)
2266- sd c_2,56(a0)
2267-
2268- jr ra
2269-END(bn_mul_comba4)
2270-
2271-#undef a_4
2272-#undef a_5
2273-#undef a_6
2274-#undef a_7
2275-#define a_4 b_0
2276-#define a_5 b_1
2277-#define a_6 b_2
2278-#define a_7 b_3
2279-
2280-.align 5
2281-LEAF(bn_sqr_comba8)
2282- .set reorder
2283- ld a_0,0(a1)
2284- ld a_1,8(a1)
2285- ld a_2,16(a1)
2286- ld a_3,24(a1)
2287-
2288- dmultu a_0,a_0 /* mul_add_c(a[0],b[0],c1,c2,c3); */
2289- ld a_4,32(a1)
2290- ld a_5,40(a1)
2291- ld a_6,48(a1)
2292- ld a_7,56(a1)
2293- mflo c_1
2294- mfhi c_2
2295- sd c_1,0(a0)
2296-
2297- dmultu a_0,a_1 /* mul_add_c2(a[0],b[1],c2,c3,c1); */
2298- mflo t_1
2299- mfhi t_2
2300- slt c_1,t_2,zero
2301- dsll t_2,1
2302- slt a2,t_1,zero
2303- daddu t_2,a2
2304- dsll t_1,1
2305- daddu c_2,t_1
2306- sltu AT,c_2,t_1
2307- daddu c_3,t_2,AT
2308- sd c_2,8(a0)
2309-
2310- dmultu a_2,a_0 /* mul_add_c2(a[2],b[0],c3,c1,c2); */
2311- mflo t_1
2312- mfhi t_2
2313- slt c_2,t_2,zero
2314- dsll t_2,1
2315- slt a2,t_1,zero
2316- daddu t_2,a2
2317- dsll t_1,1
2318- daddu c_3,t_1
2319- sltu AT,c_3,t_1
2320- daddu t_2,AT
2321- daddu c_1,t_2
2322- sltu AT,c_1,t_2
2323- daddu c_2,AT
2324- dmultu a_1,a_1 /* mul_add_c(a[1],b[1],c3,c1,c2); */
2325- mflo t_1
2326- mfhi t_2
2327- daddu c_3,t_1
2328- sltu AT,c_3,t_1
2329- daddu t_2,AT
2330- daddu c_1,t_2
2331- sltu AT,c_1,t_2
2332- daddu c_2,AT
2333- sd c_3,16(a0)
2334-
2335- dmultu a_0,a_3 /* mul_add_c2(a[0],b[3],c1,c2,c3); */
2336- mflo t_1
2337- mfhi t_2
2338- slt c_3,t_2,zero
2339- dsll t_2,1
2340- slt a2,t_1,zero
2341- daddu t_2,a2
2342- dsll t_1,1
2343- daddu c_1,t_1
2344- sltu AT,c_1,t_1
2345- daddu t_2,AT
2346- daddu c_2,t_2
2347- sltu AT,c_2,t_2
2348- daddu c_3,AT
2349- dmultu a_1,a_2 /* mul_add_c2(a[1],b[2],c1,c2,c3); */
2350- mflo t_1
2351- mfhi t_2
2352- slt AT,t_2,zero
2353- daddu c_3,AT
2354- dsll t_2,1
2355- slt a2,t_1,zero
2356- daddu t_2,a2
2357- dsll t_1,1
2358- daddu c_1,t_1
2359- sltu AT,c_1,t_1
2360- daddu t_2,AT
2361- daddu c_2,t_2
2362- sltu AT,c_2,t_2
2363- daddu c_3,AT
2364- sd c_1,24(a0)
2365-
2366- dmultu a_4,a_0 /* mul_add_c2(a[4],b[0],c2,c3,c1); */
2367- mflo t_1
2368- mfhi t_2
2369- slt c_1,t_2,zero
2370- dsll t_2,1
2371- slt a2,t_1,zero
2372- daddu t_2,a2
2373- dsll t_1,1
2374- daddu c_2,t_1
2375- sltu AT,c_2,t_1
2376- daddu t_2,AT
2377- daddu c_3,t_2
2378- sltu AT,c_3,t_2
2379- daddu c_1,AT
2380- dmultu a_3,a_1 /* mul_add_c2(a[3],b[1],c2,c3,c1); */
2381- mflo t_1
2382- mfhi t_2
2383- slt AT,t_2,zero
2384- daddu c_1,AT
2385- dsll t_2,1
2386- slt a2,t_1,zero
2387- daddu t_2,a2
2388- dsll t_1,1
2389- daddu c_2,t_1
2390- sltu AT,c_2,t_1
2391- daddu t_2,AT
2392- daddu c_3,t_2
2393- sltu AT,c_3,t_2
2394- daddu c_1,AT
2395- dmultu a_2,a_2 /* mul_add_c(a[2],b[2],c2,c3,c1); */
2396- mflo t_1
2397- mfhi t_2
2398- daddu c_2,t_1
2399- sltu AT,c_2,t_1
2400- daddu t_2,AT
2401- daddu c_3,t_2
2402- sltu AT,c_3,t_2
2403- daddu c_1,AT
2404- sd c_2,32(a0)
2405-
2406- dmultu a_0,a_5 /* mul_add_c2(a[0],b[5],c3,c1,c2); */
2407- mflo t_1
2408- mfhi t_2
2409- slt c_2,t_2,zero
2410- dsll t_2,1
2411- slt a2,t_1,zero
2412- daddu t_2,a2
2413- dsll t_1,1
2414- daddu c_3,t_1
2415- sltu AT,c_3,t_1
2416- daddu t_2,AT
2417- daddu c_1,t_2
2418- sltu AT,c_1,t_2
2419- daddu c_2,AT
2420- dmultu a_1,a_4 /* mul_add_c2(a[1],b[4],c3,c1,c2); */
2421- mflo t_1
2422- mfhi t_2
2423- slt AT,t_2,zero
2424- daddu c_2,AT
2425- dsll t_2,1
2426- slt a2,t_1,zero
2427- daddu t_2,a2
2428- dsll t_1,1
2429- daddu c_3,t_1
2430- sltu AT,c_3,t_1
2431- daddu t_2,AT
2432- daddu c_1,t_2
2433- sltu AT,c_1,t_2
2434- daddu c_2,AT
2435- dmultu a_2,a_3 /* mul_add_c2(a[2],b[3],c3,c1,c2); */
2436- mflo t_1
2437- mfhi t_2
2438- slt AT,t_2,zero
2439- daddu c_2,AT
2440- dsll t_2,1
2441- slt a2,t_1,zero
2442- daddu t_2,a2
2443- dsll t_1,1
2444- daddu c_3,t_1
2445- sltu AT,c_3,t_1
2446- daddu t_2,AT
2447- daddu c_1,t_2
2448- sltu AT,c_1,t_2
2449- daddu c_2,AT
2450- sd c_3,40(a0)
2451-
2452- dmultu a_6,a_0 /* mul_add_c2(a[6],b[0],c1,c2,c3); */
2453- mflo t_1
2454- mfhi t_2
2455- slt c_3,t_2,zero
2456- dsll t_2,1
2457- slt a2,t_1,zero
2458- daddu t_2,a2
2459- dsll t_1,1
2460- daddu c_1,t_1
2461- sltu AT,c_1,t_1
2462- daddu t_2,AT
2463- daddu c_2,t_2
2464- sltu AT,c_2,t_2
2465- daddu c_3,AT
2466- dmultu a_5,a_1 /* mul_add_c2(a[5],b[1],c1,c2,c3); */
2467- mflo t_1
2468- mfhi t_2
2469- slt AT,t_2,zero
2470- daddu c_3,AT
2471- dsll t_2,1
2472- slt a2,t_1,zero
2473- daddu t_2,a2
2474- dsll t_1,1
2475- daddu c_1,t_1
2476- sltu AT,c_1,t_1
2477- daddu t_2,AT
2478- daddu c_2,t_2
2479- sltu AT,c_2,t_2
2480- daddu c_3,AT
2481- dmultu a_4,a_2 /* mul_add_c2(a[4],b[2],c1,c2,c3); */
2482- mflo t_1
2483- mfhi t_2
2484- slt AT,t_2,zero
2485- daddu c_3,AT
2486- dsll t_2,1
2487- slt a2,t_1,zero
2488- daddu t_2,a2
2489- dsll t_1,1
2490- daddu c_1,t_1
2491- sltu AT,c_1,t_1
2492- daddu t_2,AT
2493- daddu c_2,t_2
2494- sltu AT,c_2,t_2
2495- daddu c_3,AT
2496- dmultu a_3,a_3 /* mul_add_c(a[3],b[3],c1,c2,c3); */
2497- mflo t_1
2498- mfhi t_2
2499- daddu c_1,t_1
2500- sltu AT,c_1,t_1
2501- daddu t_2,AT
2502- daddu c_2,t_2
2503- sltu AT,c_2,t_2
2504- daddu c_3,AT
2505- sd c_1,48(a0)
2506-
2507- dmultu a_0,a_7 /* mul_add_c2(a[0],b[7],c2,c3,c1); */
2508- mflo t_1
2509- mfhi t_2
2510- slt c_1,t_2,zero
2511- dsll t_2,1
2512- slt a2,t_1,zero
2513- daddu t_2,a2
2514- dsll t_1,1
2515- daddu c_2,t_1
2516- sltu AT,c_2,t_1
2517- daddu t_2,AT
2518- daddu c_3,t_2
2519- sltu AT,c_3,t_2
2520- daddu c_1,AT
2521- dmultu a_1,a_6 /* mul_add_c2(a[1],b[6],c2,c3,c1); */
2522- mflo t_1
2523- mfhi t_2
2524- slt AT,t_2,zero
2525- daddu c_1,AT
2526- dsll t_2,1
2527- slt a2,t_1,zero
2528- daddu t_2,a2
2529- dsll t_1,1
2530- daddu c_2,t_1
2531- sltu AT,c_2,t_1
2532- daddu t_2,AT
2533- daddu c_3,t_2
2534- sltu AT,c_3,t_2
2535- daddu c_1,AT
2536- dmultu a_2,a_5 /* mul_add_c2(a[2],b[5],c2,c3,c1); */
2537- mflo t_1
2538- mfhi t_2
2539- slt AT,t_2,zero
2540- daddu c_1,AT
2541- dsll t_2,1
2542- slt a2,t_1,zero
2543- daddu t_2,a2
2544- dsll t_1,1
2545- daddu c_2,t_1
2546- sltu AT,c_2,t_1
2547- daddu t_2,AT
2548- daddu c_3,t_2
2549- sltu AT,c_3,t_2
2550- daddu c_1,AT
2551- dmultu a_3,a_4 /* mul_add_c2(a[3],b[4],c2,c3,c1); */
2552- mflo t_1
2553- mfhi t_2
2554- slt AT,t_2,zero
2555- daddu c_1,AT
2556- dsll t_2,1
2557- slt a2,t_1,zero
2558- daddu t_2,a2
2559- dsll t_1,1
2560- daddu c_2,t_1
2561- sltu AT,c_2,t_1
2562- daddu t_2,AT
2563- daddu c_3,t_2
2564- sltu AT,c_3,t_2
2565- daddu c_1,AT
2566- sd c_2,56(a0)
2567-
2568- dmultu a_7,a_1 /* mul_add_c2(a[7],b[1],c3,c1,c2); */
2569- mflo t_1
2570- mfhi t_2
2571- slt c_2,t_2,zero
2572- dsll t_2,1
2573- slt a2,t_1,zero
2574- daddu t_2,a2
2575- dsll t_1,1
2576- daddu c_3,t_1
2577- sltu AT,c_3,t_1
2578- daddu t_2,AT
2579- daddu c_1,t_2
2580- sltu AT,c_1,t_2
2581- daddu c_2,AT
2582- dmultu a_6,a_2 /* mul_add_c2(a[6],b[2],c3,c1,c2); */
2583- mflo t_1
2584- mfhi t_2
2585- slt AT,t_2,zero
2586- daddu c_2,AT
2587- dsll t_2,1
2588- slt a2,t_1,zero
2589- daddu t_2,a2
2590- dsll t_1,1
2591- daddu c_3,t_1
2592- sltu AT,c_3,t_1
2593- daddu t_2,AT
2594- daddu c_1,t_2
2595- sltu AT,c_1,t_2
2596- daddu c_2,AT
2597- dmultu a_5,a_3 /* mul_add_c2(a[5],b[3],c3,c1,c2); */
2598- mflo t_1
2599- mfhi t_2
2600- slt AT,t_2,zero
2601- daddu c_2,AT
2602- dsll t_2,1
2603- slt a2,t_1,zero
2604- daddu t_2,a2
2605- dsll t_1,1
2606- daddu c_3,t_1
2607- sltu AT,c_3,t_1
2608- daddu t_2,AT
2609- daddu c_1,t_2
2610- sltu AT,c_1,t_2
2611- daddu c_2,AT
2612- dmultu a_4,a_4 /* mul_add_c(a[4],b[4],c3,c1,c2); */
2613- mflo t_1
2614- mfhi t_2
2615- daddu c_3,t_1
2616- sltu AT,c_3,t_1
2617- daddu t_2,AT
2618- daddu c_1,t_2
2619- sltu AT,c_1,t_2
2620- daddu c_2,AT
2621- sd c_3,64(a0)
2622-
2623- dmultu a_2,a_7 /* mul_add_c2(a[2],b[7],c1,c2,c3); */
2624- mflo t_1
2625- mfhi t_2
2626- slt c_3,t_2,zero
2627- dsll t_2,1
2628- slt a2,t_1,zero
2629- daddu t_2,a2
2630- dsll t_1,1
2631- daddu c_1,t_1
2632- sltu AT,c_1,t_1
2633- daddu t_2,AT
2634- daddu c_2,t_2
2635- sltu AT,c_2,t_2
2636- daddu c_3,AT
2637- dmultu a_3,a_6 /* mul_add_c2(a[3],b[6],c1,c2,c3); */
2638- mflo t_1
2639- mfhi t_2
2640- slt AT,t_2,zero
2641- daddu c_3,AT
2642- dsll t_2,1
2643- slt a2,t_1,zero
2644- daddu t_2,a2
2645- dsll t_1,1
2646- daddu c_1,t_1
2647- sltu AT,c_1,t_1
2648- daddu t_2,AT
2649- daddu c_2,t_2
2650- sltu AT,c_2,t_2
2651- daddu c_3,AT
2652- dmultu a_4,a_5 /* mul_add_c2(a[4],b[5],c1,c2,c3); */
2653- mflo t_1
2654- mfhi t_2
2655- slt AT,t_2,zero
2656- daddu c_3,AT
2657- dsll t_2,1
2658- slt a2,t_1,zero
2659- daddu t_2,a2
2660- dsll t_1,1
2661- daddu c_1,t_1
2662- sltu AT,c_1,t_1
2663- daddu t_2,AT
2664- daddu c_2,t_2
2665- sltu AT,c_2,t_2
2666- daddu c_3,AT
2667- sd c_1,72(a0)
2668-
2669- dmultu a_7,a_3 /* mul_add_c2(a[7],b[3],c2,c3,c1); */
2670- mflo t_1
2671- mfhi t_2
2672- slt c_1,t_2,zero
2673- dsll t_2,1
2674- slt a2,t_1,zero
2675- daddu t_2,a2
2676- dsll t_1,1
2677- daddu c_2,t_1
2678- sltu AT,c_2,t_1
2679- daddu t_2,AT
2680- daddu c_3,t_2
2681- sltu AT,c_3,t_2
2682- daddu c_1,AT
2683- dmultu a_6,a_4 /* mul_add_c2(a[6],b[4],c2,c3,c1); */
2684- mflo t_1
2685- mfhi t_2
2686- slt AT,t_2,zero
2687- daddu c_1,AT
2688- dsll t_2,1
2689- slt a2,t_1,zero
2690- daddu t_2,a2
2691- dsll t_1,1
2692- daddu c_2,t_1
2693- sltu AT,c_2,t_1
2694- daddu t_2,AT
2695- daddu c_3,t_2
2696- sltu AT,c_3,t_2
2697- daddu c_1,AT
2698- dmultu a_5,a_5 /* mul_add_c(a[5],b[5],c2,c3,c1); */
2699- mflo t_1
2700- mfhi t_2
2701- daddu c_2,t_1
2702- sltu AT,c_2,t_1
2703- daddu t_2,AT
2704- daddu c_3,t_2
2705- sltu AT,c_3,t_2
2706- daddu c_1,AT
2707- sd c_2,80(a0)
2708-
2709- dmultu a_4,a_7 /* mul_add_c2(a[4],b[7],c3,c1,c2); */
2710- mflo t_1
2711- mfhi t_2
2712- slt c_2,t_2,zero
2713- dsll t_2,1
2714- slt a2,t_1,zero
2715- daddu t_2,a2
2716- dsll t_1,1
2717- daddu c_3,t_1
2718- sltu AT,c_3,t_1
2719- daddu t_2,AT
2720- daddu c_1,t_2
2721- sltu AT,c_1,t_2
2722- daddu c_2,AT
2723- dmultu a_5,a_6 /* mul_add_c2(a[5],b[6],c3,c1,c2); */
2724- mflo t_1
2725- mfhi t_2
2726- slt AT,t_2,zero
2727- daddu c_2,AT
2728- dsll t_2,1
2729- slt a2,t_1,zero
2730- daddu t_2,a2
2731- dsll t_1,1
2732- daddu c_3,t_1
2733- sltu AT,c_3,t_1
2734- daddu t_2,AT
2735- daddu c_1,t_2
2736- sltu AT,c_1,t_2
2737- daddu c_2,AT
2738- sd c_3,88(a0)
2739-
2740- dmultu a_7,a_5 /* mul_add_c2(a[7],b[5],c1,c2,c3); */
2741- mflo t_1
2742- mfhi t_2
2743- slt c_3,t_2,zero
2744- dsll t_2,1
2745- slt a2,t_1,zero
2746- daddu t_2,a2
2747- dsll t_1,1
2748- daddu c_1,t_1
2749- sltu AT,c_1,t_1
2750- daddu t_2,AT
2751- daddu c_2,t_2
2752- sltu AT,c_2,t_2
2753- daddu c_3,AT
2754- dmultu a_6,a_6 /* mul_add_c(a[6],b[6],c1,c2,c3); */
2755- mflo t_1
2756- mfhi t_2
2757- daddu c_1,t_1
2758- sltu AT,c_1,t_1
2759- daddu t_2,AT
2760- daddu c_2,t_2
2761- sltu AT,c_2,t_2
2762- daddu c_3,AT
2763- sd c_1,96(a0)
2764-
2765- dmultu a_6,a_7 /* mul_add_c2(a[6],b[7],c2,c3,c1); */
2766- mflo t_1
2767- mfhi t_2
2768- slt c_1,t_2,zero
2769- dsll t_2,1
2770- slt a2,t_1,zero
2771- daddu t_2,a2
2772- dsll t_1,1
2773- daddu c_2,t_1
2774- sltu AT,c_2,t_1
2775- daddu t_2,AT
2776- daddu c_3,t_2
2777- sltu AT,c_3,t_2
2778- daddu c_1,AT
2779- sd c_2,104(a0)
2780-
2781- dmultu a_7,a_7 /* mul_add_c(a[7],b[7],c3,c1,c2); */
2782- mflo t_1
2783- mfhi t_2
2784- daddu c_3,t_1
2785- sltu AT,c_3,t_1
2786- daddu t_2,AT
2787- daddu c_1,t_2
2788- sd c_3,112(a0)
2789- sd c_1,120(a0)
2790-
2791- jr ra
2792-END(bn_sqr_comba8)
2793-
2794-.align 5
2795-LEAF(bn_sqr_comba4)
2796- .set reorder
2797- ld a_0,0(a1)
2798- ld a_1,8(a1)
2799- ld a_2,16(a1)
2800- ld a_3,24(a1)
2801- dmultu a_0,a_0 /* mul_add_c(a[0],b[0],c1,c2,c3); */
2802- mflo c_1
2803- mfhi c_2
2804- sd c_1,0(a0)
2805-
2806- dmultu a_0,a_1 /* mul_add_c2(a[0],b[1],c2,c3,c1); */
2807- mflo t_1
2808- mfhi t_2
2809- slt c_1,t_2,zero
2810- dsll t_2,1
2811- slt a2,t_1,zero
2812- daddu t_2,a2
2813- dsll t_1,1
2814- daddu c_2,t_1
2815- sltu AT,c_2,t_1
2816- daddu c_3,t_2,AT
2817- sd c_2,8(a0)
2818-
2819- dmultu a_2,a_0 /* mul_add_c2(a[2],b[0],c3,c1,c2); */
2820- mflo t_1
2821- mfhi t_2
2822- slt c_2,t_2,zero
2823- dsll t_2,1
2824- slt a2,t_1,zero
2825- daddu t_2,a2
2826- dsll t_1,1
2827- daddu c_3,t_1
2828- sltu AT,c_3,t_1
2829- daddu t_2,AT
2830- daddu c_1,t_2
2831- sltu AT,c_1,t_2
2832- daddu c_2,AT
2833- dmultu a_1,a_1 /* mul_add_c(a[1],b[1],c3,c1,c2); */
2834- mflo t_1
2835- mfhi t_2
2836- daddu c_3,t_1
2837- sltu AT,c_3,t_1
2838- daddu t_2,AT
2839- daddu c_1,t_2
2840- sltu AT,c_1,t_2
2841- daddu c_2,AT
2842- sd c_3,16(a0)
2843-
2844- dmultu a_0,a_3 /* mul_add_c2(a[0],b[3],c1,c2,c3); */
2845- mflo t_1
2846- mfhi t_2
2847- slt c_3,t_2,zero
2848- dsll t_2,1
2849- slt a2,t_1,zero
2850- daddu t_2,a2
2851- dsll t_1,1
2852- daddu c_1,t_1
2853- sltu AT,c_1,t_1
2854- daddu t_2,AT
2855- daddu c_2,t_2
2856- sltu AT,c_2,t_2
2857- daddu c_3,AT
2858- dmultu a_1,a_2 /* mul_add_c(a2[1],b[2],c1,c2,c3); */
2859- mflo t_1
2860- mfhi t_2
2861- slt AT,t_2,zero
2862- daddu c_3,AT
2863- dsll t_2,1
2864- slt a2,t_1,zero
2865- daddu t_2,a2
2866- dsll t_1,1
2867- daddu c_1,t_1
2868- sltu AT,c_1,t_1
2869- daddu t_2,AT
2870- daddu c_2,t_2
2871- sltu AT,c_2,t_2
2872- daddu c_3,AT
2873- sd c_1,24(a0)
2874-
2875- dmultu a_3,a_1 /* mul_add_c2(a[3],b[1],c2,c3,c1); */
2876- mflo t_1
2877- mfhi t_2
2878- slt c_1,t_2,zero
2879- dsll t_2,1
2880- slt a2,t_1,zero
2881- daddu t_2,a2
2882- dsll t_1,1
2883- daddu c_2,t_1
2884- sltu AT,c_2,t_1
2885- daddu t_2,AT
2886- daddu c_3,t_2
2887- sltu AT,c_3,t_2
2888- daddu c_1,AT
2889- dmultu a_2,a_2 /* mul_add_c(a[2],b[2],c2,c3,c1); */
2890- mflo t_1
2891- mfhi t_2
2892- daddu c_2,t_1
2893- sltu AT,c_2,t_1
2894- daddu t_2,AT
2895- daddu c_3,t_2
2896- sltu AT,c_3,t_2
2897- daddu c_1,AT
2898- sd c_2,32(a0)
2899-
2900- dmultu a_2,a_3 /* mul_add_c2(a[2],b[3],c3,c1,c2); */
2901- mflo t_1
2902- mfhi t_2
2903- slt c_2,t_2,zero
2904- dsll t_2,1
2905- slt a2,t_1,zero
2906- daddu t_2,a2
2907- dsll t_1,1
2908- daddu c_3,t_1
2909- sltu AT,c_3,t_1
2910- daddu t_2,AT
2911- daddu c_1,t_2
2912- sltu AT,c_1,t_2
2913- daddu c_2,AT
2914- sd c_3,40(a0)
2915-
2916- dmultu a_3,a_3 /* mul_add_c(a[3],b[3],c1,c2,c3); */
2917- mflo t_1
2918- mfhi t_2
2919- daddu c_1,t_1
2920- sltu AT,c_1,t_1
2921- daddu t_2,AT
2922- daddu c_2,t_2
2923- sd c_1,48(a0)
2924- sd c_2,56(a0)
2925-
2926- jr ra
2927-END(bn_sqr_comba4)
2928diff --git a/crypto/bn/asm/x86_64-gcc.c b/crypto/bn/asm/x86_64-gcc.c
2929index 31476ab..2d39407 100644
2930--- a/crypto/bn/asm/x86_64-gcc.c
2931+++ b/crypto/bn/asm/x86_64-gcc.c
2932@@ -273,6 +273,10 @@ BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n)
2933 /* sqr_add_c(a,i,c0,c1,c2) -- c+=a[i]^2 for three word number c=(c2,c1,c0) */
2934 /* sqr_add_c2(a,i,c0,c1,c2) -- c+=2*a[i]*a[j] for three word number c=(c2,c1,c0) */
2935
2936+/*
2937+ * Keep in mind that carrying into high part of multiplication result
2938+ * can not overflow, because it cannot be all-ones.
2939+ */
2940 #if 0
2941 /* original macros are kept for reference purposes */
2942 #define mul_add_c(a,b,c0,c1,c2) { \
2943@@ -287,10 +291,10 @@ BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n)
2944 BN_ULONG ta=(a),tb=(b),t0; \
2945 t1 = BN_UMULT_HIGH(ta,tb); \
2946 t0 = ta * tb; \
2947- t2 = t1+t1; c2 += (t2<t1)?1:0; \
2948- t1 = t0+t0; t2 += (t1<t0)?1:0; \
2949- c0 += t1; t2 += (c0<t1)?1:0; \
2950+ c0 += t0; t2 = t1+((c0<t0)?1:0);\
2951 c1 += t2; c2 += (c1<t2)?1:0; \
2952+ c0 += t0; t1 += (c0<t0)?1:0; \
2953+ c1 += t1; c2 += (c1<t1)?1:0; \
2954 }
2955 #else
2956 #define mul_add_c(a,b,c0,c1,c2) do { \
2957@@ -328,22 +332,14 @@ BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n)
2958 : "=a"(t1),"=d"(t2) \
2959 : "a"(a),"m"(b) \
2960 : "cc"); \
2961- asm ("addq %0,%0; adcq %2,%1" \
2962- : "+d"(t2),"+r"(c2) \
2963- : "g"(0) \
2964- : "cc"); \
2965- asm ("addq %0,%0; adcq %2,%1" \
2966- : "+a"(t1),"+d"(t2) \
2967- : "g"(0) \
2968- : "cc"); \
2969- asm ("addq %2,%0; adcq %3,%1" \
2970- : "+r"(c0),"+d"(t2) \
2971- : "a"(t1),"g"(0) \
2972- : "cc"); \
2973- asm ("addq %2,%0; adcq %3,%1" \
2974- : "+r"(c1),"+r"(c2) \
2975- : "d"(t2),"g"(0) \
2976- : "cc"); \
2977+ asm ("addq %3,%0; adcq %4,%1; adcq %5,%2" \
2978+ : "+r"(c0),"+r"(c1),"+r"(c2) \
2979+ : "r"(t1),"r"(t2),"g"(0) \
2980+ : "cc"); \
2981+ asm ("addq %3,%0; adcq %4,%1; adcq %5,%2" \
2982+ : "+r"(c0),"+r"(c1),"+r"(c2) \
2983+ : "r"(t1),"r"(t2),"g"(0) \
2984+ : "cc"); \
2985 } while (0)
2986 #endif
2987
2988diff --git a/crypto/bn/bn_asm.c b/crypto/bn/bn_asm.c
2989index c43c91c..a33b634 100644
2990--- a/crypto/bn/bn_asm.c
2991+++ b/crypto/bn/bn_asm.c
2992@@ -438,6 +438,10 @@ BN_ULONG bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n)
2993 /* sqr_add_c(a,i,c0,c1,c2) -- c+=a[i]^2 for three word number c=(c2,c1,c0) */
2994 /* sqr_add_c2(a,i,c0,c1,c2) -- c+=2*a[i]*a[j] for three word number c=(c2,c1,c0) */
2995
2996+/*
2997+ * Keep in mind that carrying into high part of multiplication result
2998+ * can not overflow, because it cannot be all-ones.
2999+ */
3000 #ifdef BN_LLONG
3001 #define mul_add_c(a,b,c0,c1,c2) \
3002 t=(BN_ULLONG)a*b; \
3003@@ -478,10 +482,10 @@ BN_ULONG bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n)
3004 #define mul_add_c2(a,b,c0,c1,c2) { \
3005 BN_ULONG ta=(a),tb=(b),t0; \
3006 BN_UMULT_LOHI(t0,t1,ta,tb); \
3007- t2 = t1+t1; c2 += (t2<t1)?1:0; \
3008- t1 = t0+t0; t2 += (t1<t0)?1:0; \
3009- c0 += t1; t2 += (c0<t1)?1:0; \
3010+ c0 += t0; t2 = t1+((c0<t0)?1:0);\
3011 c1 += t2; c2 += (c1<t2)?1:0; \
3012+ c0 += t0; t1 += (c0<t0)?1:0; \
3013+ c1 += t1; c2 += (c1<t1)?1:0; \
3014 }
3015
3016 #define sqr_add_c(a,i,c0,c1,c2) { \
3017@@ -508,10 +512,10 @@ BN_ULONG bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n)
3018 BN_ULONG ta=(a),tb=(b),t0; \
3019 t1 = BN_UMULT_HIGH(ta,tb); \
3020 t0 = ta * tb; \
3021- t2 = t1+t1; c2 += (t2<t1)?1:0; \
3022- t1 = t0+t0; t2 += (t1<t0)?1:0; \
3023- c0 += t1; t2 += (c0<t1)?1:0; \
3024+ c0 += t0; t2 = t1+((c0<t0)?1:0);\
3025 c1 += t2; c2 += (c1<t2)?1:0; \
3026+ c0 += t0; t1 += (c0<t0)?1:0; \
3027+ c1 += t1; c2 += (c1<t1)?1:0; \
3028 }
3029
3030 #define sqr_add_c(a,i,c0,c1,c2) { \
3031diff --git a/crypto/bn/bntest.c b/crypto/bn/bntest.c
3032index 7771e92..48bc633 100644
3033--- a/crypto/bn/bntest.c
3034+++ b/crypto/bn/bntest.c
3035@@ -678,44 +678,98 @@ int test_mul(BIO *bp)
3036
3037 int test_sqr(BIO *bp, BN_CTX *ctx)
3038 {
3039- BIGNUM a,c,d,e;
3040- int i;
3041+ BIGNUM *a,*c,*d,*e;
3042+ int i, ret = 0;
3043
3044- BN_init(&a);
3045- BN_init(&c);
3046- BN_init(&d);
3047- BN_init(&e);
3048+ a = BN_new();
3049+ c = BN_new();
3050+ d = BN_new();
3051+ e = BN_new();
3052+ if (a == NULL || c == NULL || d == NULL || e == NULL)
3053+ {
3054+ goto err;
3055+ }
3056
3057 for (i=0; i<num0; i++)
3058 {
3059- BN_bntest_rand(&a,40+i*10,0,0);
3060- a.neg=rand_neg();
3061- BN_sqr(&c,&a,ctx);
3062+ BN_bntest_rand(a,40+i*10,0,0);
3063+ a->neg=rand_neg();
3064+ BN_sqr(c,a,ctx);
3065 if (bp != NULL)
3066 {
3067 if (!results)
3068 {
3069- BN_print(bp,&a);
3070+ BN_print(bp,a);
3071 BIO_puts(bp," * ");
3072- BN_print(bp,&a);
3073+ BN_print(bp,a);
3074 BIO_puts(bp," - ");
3075 }
3076- BN_print(bp,&c);
3077+ BN_print(bp,c);
3078 BIO_puts(bp,"\n");
3079 }
3080- BN_div(&d,&e,&c,&a,ctx);
3081- BN_sub(&d,&d,&a);
3082- if(!BN_is_zero(&d) || !BN_is_zero(&e))
3083- {
3084- fprintf(stderr,"Square test failed!\n");
3085- return 0;
3086- }
3087+ BN_div(d,e,c,a,ctx);
3088+ BN_sub(d,d,a);
3089+ if(!BN_is_zero(d) || !BN_is_zero(e))
3090+ {
3091+ fprintf(stderr,"Square test failed!\n");
3092+ goto err;
3093+ }
3094 }
3095- BN_free(&a);
3096- BN_free(&c);
3097- BN_free(&d);
3098- BN_free(&e);
3099- return(1);
3100+
3101+ /* Regression test for a BN_sqr overflow bug. */
3102+ BN_hex2bn(&a,
3103+ "80000000000000008000000000000001FFFFFFFFFFFFFFFE0000000000000000");
3104+ BN_sqr(c, a, ctx);
3105+ if (bp != NULL)
3106+ {
3107+ if (!results)
3108+ {
3109+ BN_print(bp,a);
3110+ BIO_puts(bp," * ");
3111+ BN_print(bp,a);
3112+ BIO_puts(bp," - ");
3113+ }
3114+ BN_print(bp,c);
3115+ BIO_puts(bp,"\n");
3116+ }
3117+ BN_mul(d, a, a, ctx);
3118+ if (BN_cmp(c, d))
3119+ {
3120+ fprintf(stderr, "Square test failed: BN_sqr and BN_mul produce "
3121+ "different results!\n");
3122+ goto err;
3123+ }
3124+
3125+ /* Regression test for a BN_sqr overflow bug. */
3126+ BN_hex2bn(&a,
3127+ "80000000000000000000000080000001FFFFFFFE000000000000000000000000");
3128+ BN_sqr(c, a, ctx);
3129+ if (bp != NULL)
3130+ {
3131+ if (!results)
3132+ {
3133+ BN_print(bp,a);
3134+ BIO_puts(bp," * ");
3135+ BN_print(bp,a);
3136+ BIO_puts(bp," - ");
3137+ }
3138+ BN_print(bp,c);
3139+ BIO_puts(bp,"\n");
3140+ }
3141+ BN_mul(d, a, a, ctx);
3142+ if (BN_cmp(c, d))
3143+ {
3144+ fprintf(stderr, "Square test failed: BN_sqr and BN_mul produce "
3145+ "different results!\n");
3146+ goto err;
3147+ }
3148+ ret = 1;
3149+err:
3150+ if (a != NULL) BN_free(a);
3151+ if (c != NULL) BN_free(c);
3152+ if (d != NULL) BN_free(d);
3153+ if (e != NULL) BN_free(e);
3154+ return ret;
3155 }
3156
3157 int test_mont(BIO *bp, BN_CTX *ctx)
3158--
31591.9.1
3160