diff options
3 files changed, 2049 insertions, 0 deletions
diff --git a/meta/recipes-connectivity/openssl/openssl/CVE-2025-27587-1.patch b/meta/recipes-connectivity/openssl/openssl/CVE-2025-27587-1.patch new file mode 100644 index 0000000000..eb3fc52dca --- /dev/null +++ b/meta/recipes-connectivity/openssl/openssl/CVE-2025-27587-1.patch | |||
@@ -0,0 +1,1918 @@ | |||
1 | From 14ac0f0e4e1f36793d09b41ffd5e482575289ab2 Mon Sep 17 00:00:00 2001 | ||
2 | From: Danny Tsen <dtsen@us.ibm.com> | ||
3 | Date: Tue, 11 Feb 2025 13:48:01 -0500 | ||
4 | Subject: [PATCH] Fix Minerva timing side-channel signal for P-384 curve on PPC | ||
5 | |||
6 | 1. bn_ppc.c: Used bn_mul_mont_int() instead of bn_mul_mont_300_fixed_n6() | ||
7 | for Montgomery multiplication. | ||
8 | 2. ecp_nistp384-ppc64.pl: | ||
9 | - Re-wrote p384_felem_mul and p384_felem_square for easier maintenance with | ||
10 | minumum perl wrapper. | ||
11 | - Implemented p384_felem_reduce, p384_felem_mul_reduce and p384_felem_square_reduce. | ||
12 | - Implemented p384_felem_diff64, felem_diff_128_64 and felem_diff128 in assembly. | ||
13 | 3. ecp_nistp384.c: | ||
14 | - Added wrapper function for p384_felem_mul_reduce and p384_felem_square_reduce. | ||
15 | |||
16 | Signed-off-by: Danny Tsen <dtsen@us.ibm.com> | ||
17 | |||
18 | Reviewed-by: Dmitry Belyavskiy <beldmit@gmail.com> | ||
19 | Reviewed-by: Tomas Mraz <tomas@openssl.org> | ||
20 | (Merged from https://github.com/openssl/openssl/pull/26709) | ||
21 | |||
22 | (cherry picked from commit 85cabd94958303859b1551364a609d4ff40b67a5) | ||
23 | |||
24 | CVE: CVE-2025-27587 | ||
25 | Upstream-Status: Backport [https://github.com/openssl/openssl/commit/14ac0f0e4e1f36793d09b41ffd5e482575289ab2] | ||
26 | Signed-off-by: Peter Marko <peter.marko@siemens.com> | ||
27 | --- | ||
28 | crypto/bn/bn_ppc.c | 3 + | ||
29 | crypto/ec/asm/ecp_nistp384-ppc64.pl | 1724 +++++++++++++++++++++++---- | ||
30 | crypto/ec/ecp_nistp384.c | 28 +- | ||
31 | 3 files changed, 1504 insertions(+), 251 deletions(-) | ||
32 | |||
33 | diff --git a/crypto/bn/bn_ppc.c b/crypto/bn/bn_ppc.c | ||
34 | index 1e9421bee2..29293bad55 100644 | ||
35 | --- a/crypto/bn/bn_ppc.c | ||
36 | +++ b/crypto/bn/bn_ppc.c | ||
37 | @@ -41,12 +41,15 @@ int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, | ||
38 | */ | ||
39 | |||
40 | #if defined(_ARCH_PPC64) && !defined(__ILP32__) | ||
41 | + /* Minerva side-channel fix danny */ | ||
42 | +# if defined(USE_FIXED_N6) | ||
43 | if (num == 6) { | ||
44 | if (OPENSSL_ppccap_P & PPC_MADD300) | ||
45 | return bn_mul_mont_300_fixed_n6(rp, ap, bp, np, n0, num); | ||
46 | else | ||
47 | return bn_mul_mont_fixed_n6(rp, ap, bp, np, n0, num); | ||
48 | } | ||
49 | +# endif | ||
50 | #endif | ||
51 | |||
52 | return bn_mul_mont_int(rp, ap, bp, np, n0, num); | ||
53 | diff --git a/crypto/ec/asm/ecp_nistp384-ppc64.pl b/crypto/ec/asm/ecp_nistp384-ppc64.pl | ||
54 | index 28f4168e52..b663bddfc6 100755 | ||
55 | --- a/crypto/ec/asm/ecp_nistp384-ppc64.pl | ||
56 | +++ b/crypto/ec/asm/ecp_nistp384-ppc64.pl | ||
57 | @@ -7,13 +7,15 @@ | ||
58 | # https://www.openssl.org/source/license.html | ||
59 | # | ||
60 | # ==================================================================== | ||
61 | -# Written by Rohan McLure <rmclure@linux.ibm.com> for the OpenSSL | ||
62 | -# project. | ||
63 | +# Written by Danny Tsen <dtsen@us.ibm.com> # for the OpenSSL project. | ||
64 | +# | ||
65 | +# Copyright 2025- IBM Corp. | ||
66 | # ==================================================================== | ||
67 | # | ||
68 | -# p384 lower-level primitives for PPC64 using vector instructions. | ||
69 | +# p384 lower-level primitives for PPC64. | ||
70 | # | ||
71 | |||
72 | + | ||
73 | use strict; | ||
74 | use warnings; | ||
75 | |||
76 | @@ -21,7 +23,7 @@ my $flavour = shift; | ||
77 | my $output = ""; | ||
78 | while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {} | ||
79 | if (!$output) { | ||
80 | - $output = "-"; | ||
81 | + $output = "-"; | ||
82 | } | ||
83 | |||
84 | my ($xlate, $dir); | ||
85 | @@ -35,271 +37,1495 @@ open OUT,"| \"$^X\" $xlate $flavour $output"; | ||
86 | |||
87 | my $code = ""; | ||
88 | |||
89 | -my ($sp, $outp, $savelr, $savesp) = ("r1", "r3", "r10", "r12"); | ||
90 | - | ||
91 | -my $vzero = "v32"; | ||
92 | - | ||
93 | -sub startproc($) | ||
94 | -{ | ||
95 | - my ($name) = @_; | ||
96 | - | ||
97 | - $code.=<<___; | ||
98 | - .globl ${name} | ||
99 | - .align 5 | ||
100 | -${name}: | ||
101 | - | ||
102 | -___ | ||
103 | -} | ||
104 | - | ||
105 | -sub endproc($) | ||
106 | -{ | ||
107 | - my ($name) = @_; | ||
108 | - | ||
109 | - $code.=<<___; | ||
110 | - blr | ||
111 | - .size ${name},.-${name} | ||
112 | - | ||
113 | -___ | ||
114 | -} | ||
115 | - | ||
116 | -sub load_vrs($$) | ||
117 | -{ | ||
118 | - my ($pointer, $reg_list) = @_; | ||
119 | - | ||
120 | - for (my $i = 0; $i <= 6; $i++) { | ||
121 | - my $offset = $i * 8; | ||
122 | - $code.=<<___; | ||
123 | - lxsd $reg_list->[$i],$offset($pointer) | ||
124 | -___ | ||
125 | - } | ||
126 | - | ||
127 | - $code.=<<___; | ||
128 | - | ||
129 | -___ | ||
130 | -} | ||
131 | - | ||
132 | -sub store_vrs($$) | ||
133 | -{ | ||
134 | - my ($pointer, $reg_list) = @_; | ||
135 | - | ||
136 | - for (my $i = 0; $i <= 12; $i++) { | ||
137 | - my $offset = $i * 16; | ||
138 | - $code.=<<___; | ||
139 | - stxv $reg_list->[$i],$offset($pointer) | ||
140 | -___ | ||
141 | - } | ||
142 | - | ||
143 | - $code.=<<___; | ||
144 | - | ||
145 | -___ | ||
146 | -} | ||
147 | - | ||
148 | $code.=<<___; | ||
149 | -.machine "any" | ||
150 | +.machine "any" | ||
151 | .text | ||
152 | |||
153 | -___ | ||
154 | +.globl p384_felem_mul | ||
155 | +.type p384_felem_mul,\@function | ||
156 | +.align 4 | ||
157 | +p384_felem_mul: | ||
158 | |||
159 | -{ | ||
160 | - # mul/square common | ||
161 | - my ($t1, $t2, $t3, $t4) = ("v33", "v34", "v42", "v43"); | ||
162 | - my ($zero, $one) = ("r8", "r9"); | ||
163 | - my $out = "v51"; | ||
164 | + stdu 1, -176(1) | ||
165 | + mflr 0 | ||
166 | + std 14, 56(1) | ||
167 | + std 15, 64(1) | ||
168 | + std 16, 72(1) | ||
169 | + std 17, 80(1) | ||
170 | + std 18, 88(1) | ||
171 | + std 19, 96(1) | ||
172 | + std 20, 104(1) | ||
173 | + std 21, 112(1) | ||
174 | + std 22, 120(1) | ||
175 | |||
176 | - { | ||
177 | - # | ||
178 | - # p384_felem_mul | ||
179 | - # | ||
180 | + bl _p384_felem_mul_core | ||
181 | |||
182 | - my ($in1p, $in2p) = ("r4", "r5"); | ||
183 | - my @in1 = map("v$_",(44..50)); | ||
184 | - my @in2 = map("v$_",(35..41)); | ||
185 | + mtlr 0 | ||
186 | + ld 14, 56(1) | ||
187 | + ld 15, 64(1) | ||
188 | + ld 16, 72(1) | ||
189 | + ld 17, 80(1) | ||
190 | + ld 18, 88(1) | ||
191 | + ld 19, 96(1) | ||
192 | + ld 20, 104(1) | ||
193 | + ld 21, 112(1) | ||
194 | + ld 22, 120(1) | ||
195 | + addi 1, 1, 176 | ||
196 | + blr | ||
197 | +.size p384_felem_mul,.-p384_felem_mul | ||
198 | |||
199 | - startproc("p384_felem_mul"); | ||
200 | +.globl p384_felem_square | ||
201 | +.type p384_felem_square,\@function | ||
202 | +.align 4 | ||
203 | +p384_felem_square: | ||
204 | |||
205 | - $code.=<<___; | ||
206 | - vspltisw $vzero,0 | ||
207 | + stdu 1, -176(1) | ||
208 | + mflr 0 | ||
209 | + std 14, 56(1) | ||
210 | + std 15, 64(1) | ||
211 | + std 16, 72(1) | ||
212 | + std 17, 80(1) | ||
213 | |||
214 | -___ | ||
215 | + bl _p384_felem_square_core | ||
216 | |||
217 | - load_vrs($in1p, \@in1); | ||
218 | - load_vrs($in2p, \@in2); | ||
219 | - | ||
220 | - $code.=<<___; | ||
221 | - vmsumudm $out,$in1[0],$in2[0],$vzero | ||
222 | - stxv $out,0($outp) | ||
223 | - | ||
224 | - xxpermdi $t1,$in1[0],$in1[1],0b00 | ||
225 | - xxpermdi $t2,$in2[1],$in2[0],0b00 | ||
226 | - vmsumudm $out,$t1,$t2,$vzero | ||
227 | - stxv $out,16($outp) | ||
228 | - | ||
229 | - xxpermdi $t2,$in2[2],$in2[1],0b00 | ||
230 | - vmsumudm $out,$t1,$t2,$vzero | ||
231 | - vmsumudm $out,$in1[2],$in2[0],$out | ||
232 | - stxv $out,32($outp) | ||
233 | - | ||
234 | - xxpermdi $t2,$in2[1],$in2[0],0b00 | ||
235 | - xxpermdi $t3,$in1[2],$in1[3],0b00 | ||
236 | - xxpermdi $t4,$in2[3],$in2[2],0b00 | ||
237 | - vmsumudm $out,$t1,$t4,$vzero | ||
238 | - vmsumudm $out,$t3,$t2,$out | ||
239 | - stxv $out,48($outp) | ||
240 | - | ||
241 | - xxpermdi $t2,$in2[4],$in2[3],0b00 | ||
242 | - xxpermdi $t4,$in2[2],$in2[1],0b00 | ||
243 | - vmsumudm $out,$t1,$t2,$vzero | ||
244 | - vmsumudm $out,$t3,$t4,$out | ||
245 | - vmsumudm $out,$in1[4],$in2[0],$out | ||
246 | - stxv $out,64($outp) | ||
247 | - | ||
248 | - xxpermdi $t2,$in2[5],$in2[4],0b00 | ||
249 | - xxpermdi $t4,$in2[3],$in2[2],0b00 | ||
250 | - vmsumudm $out,$t1,$t2,$vzero | ||
251 | - vmsumudm $out,$t3,$t4,$out | ||
252 | - xxpermdi $t4,$in2[1],$in2[0],0b00 | ||
253 | - xxpermdi $t1,$in1[4],$in1[5],0b00 | ||
254 | - vmsumudm $out,$t1,$t4,$out | ||
255 | - stxv $out,80($outp) | ||
256 | - | ||
257 | - xxpermdi $t1,$in1[0],$in1[1],0b00 | ||
258 | - xxpermdi $t2,$in2[6],$in2[5],0b00 | ||
259 | - xxpermdi $t4,$in2[4],$in2[3],0b00 | ||
260 | - vmsumudm $out,$t1,$t2,$vzero | ||
261 | - vmsumudm $out,$t3,$t4,$out | ||
262 | - xxpermdi $t2,$in2[2],$in2[1],0b00 | ||
263 | - xxpermdi $t1,$in1[4],$in1[5],0b00 | ||
264 | - vmsumudm $out,$t1,$t2,$out | ||
265 | - vmsumudm $out,$in1[6],$in2[0],$out | ||
266 | - stxv $out,96($outp) | ||
267 | - | ||
268 | - xxpermdi $t1,$in1[1],$in1[2],0b00 | ||
269 | - xxpermdi $t2,$in2[6],$in2[5],0b00 | ||
270 | - xxpermdi $t3,$in1[3],$in1[4],0b00 | ||
271 | - vmsumudm $out,$t1,$t2,$vzero | ||
272 | - vmsumudm $out,$t3,$t4,$out | ||
273 | - xxpermdi $t3,$in2[2],$in2[1],0b00 | ||
274 | - xxpermdi $t1,$in1[5],$in1[6],0b00 | ||
275 | - vmsumudm $out,$t1,$t3,$out | ||
276 | - stxv $out,112($outp) | ||
277 | - | ||
278 | - xxpermdi $t1,$in1[2],$in1[3],0b00 | ||
279 | - xxpermdi $t3,$in1[4],$in1[5],0b00 | ||
280 | - vmsumudm $out,$t1,$t2,$vzero | ||
281 | - vmsumudm $out,$t3,$t4,$out | ||
282 | - vmsumudm $out,$in1[6],$in2[2],$out | ||
283 | - stxv $out,128($outp) | ||
284 | - | ||
285 | - xxpermdi $t1,$in1[3],$in1[4],0b00 | ||
286 | - vmsumudm $out,$t1,$t2,$vzero | ||
287 | - xxpermdi $t1,$in1[5],$in1[6],0b00 | ||
288 | - vmsumudm $out,$t1,$t4,$out | ||
289 | - stxv $out,144($outp) | ||
290 | - | ||
291 | - vmsumudm $out,$t3,$t2,$vzero | ||
292 | - vmsumudm $out,$in1[6],$in2[4],$out | ||
293 | - stxv $out,160($outp) | ||
294 | - | ||
295 | - vmsumudm $out,$t1,$t2,$vzero | ||
296 | - stxv $out,176($outp) | ||
297 | - | ||
298 | - vmsumudm $out,$in1[6],$in2[6],$vzero | ||
299 | - stxv $out,192($outp) | ||
300 | -___ | ||
301 | + mtlr 0 | ||
302 | + ld 14, 56(1) | ||
303 | + ld 15, 64(1) | ||
304 | + ld 16, 72(1) | ||
305 | + ld 17, 80(1) | ||
306 | + addi 1, 1, 176 | ||
307 | + blr | ||
308 | +.size p384_felem_square,.-p384_felem_square | ||
309 | |||
310 | - endproc("p384_felem_mul"); | ||
311 | - } | ||
312 | +# | ||
313 | +# Felem mul core function - | ||
314 | +# r3, r4 and r5 need to pre-loaded. | ||
315 | +# | ||
316 | +.type _p384_felem_mul_core,\@function | ||
317 | +.align 4 | ||
318 | +_p384_felem_mul_core: | ||
319 | |||
320 | - { | ||
321 | - # | ||
322 | - # p384_felem_square | ||
323 | - # | ||
324 | + ld 6,0(4) | ||
325 | + ld 14,0(5) | ||
326 | + ld 7,8(4) | ||
327 | + ld 15,8(5) | ||
328 | + ld 8,16(4) | ||
329 | + ld 16,16(5) | ||
330 | + ld 9,24(4) | ||
331 | + ld 17,24(5) | ||
332 | + ld 10,32(4) | ||
333 | + ld 18,32(5) | ||
334 | + ld 11,40(4) | ||
335 | + ld 19,40(5) | ||
336 | + ld 12,48(4) | ||
337 | + ld 20,48(5) | ||
338 | |||
339 | - my ($inp) = ("r4"); | ||
340 | - my @in = map("v$_",(44..50)); | ||
341 | - my @inx2 = map("v$_",(35..41)); | ||
342 | + # out0 | ||
343 | + mulld 21, 14, 6 | ||
344 | + mulhdu 22, 14, 6 | ||
345 | + std 21, 0(3) | ||
346 | + std 22, 8(3) | ||
347 | |||
348 | - startproc("p384_felem_square"); | ||
349 | + vxor 0, 0, 0 | ||
350 | |||
351 | - $code.=<<___; | ||
352 | - vspltisw $vzero,0 | ||
353 | + # out1 | ||
354 | + mtvsrdd 32+13, 14, 6 | ||
355 | + mtvsrdd 32+14, 7, 15 | ||
356 | + vmsumudm 1, 13, 14, 0 | ||
357 | |||
358 | -___ | ||
359 | + # out2 | ||
360 | + mtvsrdd 32+15, 15, 6 | ||
361 | + mtvsrdd 32+16, 7, 16 | ||
362 | + mtvsrdd 32+17, 0, 8 | ||
363 | + mtvsrdd 32+18, 0, 14 | ||
364 | + vmsumudm 19, 15, 16, 0 | ||
365 | + vmsumudm 2, 17, 18, 19 | ||
366 | |||
367 | - load_vrs($inp, \@in); | ||
368 | + # out3 | ||
369 | + mtvsrdd 32+13, 16, 6 | ||
370 | + mtvsrdd 32+14, 7, 17 | ||
371 | + mtvsrdd 32+15, 14, 8 | ||
372 | + mtvsrdd 32+16, 9, 15 | ||
373 | + vmsumudm 19, 13, 14, 0 | ||
374 | + vmsumudm 3, 15, 16, 19 | ||
375 | |||
376 | - $code.=<<___; | ||
377 | - li $zero,0 | ||
378 | - li $one,1 | ||
379 | - mtvsrdd $t1,$one,$zero | ||
380 | -___ | ||
381 | + # out4 | ||
382 | + mtvsrdd 32+13, 17, 6 | ||
383 | + mtvsrdd 32+14, 7, 18 | ||
384 | + mtvsrdd 32+15, 15, 8 | ||
385 | + mtvsrdd 32+16, 9, 16 | ||
386 | + mtvsrdd 32+17, 0, 10 | ||
387 | + mtvsrdd 32+18, 0, 14 | ||
388 | + vmsumudm 19, 13, 14, 0 | ||
389 | + vmsumudm 4, 15, 16, 19 | ||
390 | + vmsumudm 4, 17, 18, 4 | ||
391 | |||
392 | - for (my $i = 0; $i <= 6; $i++) { | ||
393 | - $code.=<<___; | ||
394 | - vsld $inx2[$i],$in[$i],$t1 | ||
395 | -___ | ||
396 | - } | ||
397 | - | ||
398 | - $code.=<<___; | ||
399 | - vmsumudm $out,$in[0],$in[0],$vzero | ||
400 | - stxv $out,0($outp) | ||
401 | - | ||
402 | - vmsumudm $out,$in[0],$inx2[1],$vzero | ||
403 | - stxv $out,16($outp) | ||
404 | - | ||
405 | - vmsumudm $out,$in[0],$inx2[2],$vzero | ||
406 | - vmsumudm $out,$in[1],$in[1],$out | ||
407 | - stxv $out,32($outp) | ||
408 | - | ||
409 | - xxpermdi $t1,$in[0],$in[1],0b00 | ||
410 | - xxpermdi $t2,$inx2[3],$inx2[2],0b00 | ||
411 | - vmsumudm $out,$t1,$t2,$vzero | ||
412 | - stxv $out,48($outp) | ||
413 | - | ||
414 | - xxpermdi $t4,$inx2[4],$inx2[3],0b00 | ||
415 | - vmsumudm $out,$t1,$t4,$vzero | ||
416 | - vmsumudm $out,$in[2],$in[2],$out | ||
417 | - stxv $out,64($outp) | ||
418 | - | ||
419 | - xxpermdi $t2,$inx2[5],$inx2[4],0b00 | ||
420 | - vmsumudm $out,$t1,$t2,$vzero | ||
421 | - vmsumudm $out,$in[2],$inx2[3],$out | ||
422 | - stxv $out,80($outp) | ||
423 | - | ||
424 | - xxpermdi $t2,$inx2[6],$inx2[5],0b00 | ||
425 | - vmsumudm $out,$t1,$t2,$vzero | ||
426 | - vmsumudm $out,$in[2],$inx2[4],$out | ||
427 | - vmsumudm $out,$in[3],$in[3],$out | ||
428 | - stxv $out,96($outp) | ||
429 | - | ||
430 | - xxpermdi $t3,$in[1],$in[2],0b00 | ||
431 | - vmsumudm $out,$t3,$t2,$vzero | ||
432 | - vmsumudm $out,$in[3],$inx2[4],$out | ||
433 | - stxv $out,112($outp) | ||
434 | - | ||
435 | - xxpermdi $t1,$in[2],$in[3],0b00 | ||
436 | - vmsumudm $out,$t1,$t2,$vzero | ||
437 | - vmsumudm $out,$in[4],$in[4],$out | ||
438 | - stxv $out,128($outp) | ||
439 | - | ||
440 | - xxpermdi $t1,$in[3],$in[4],0b00 | ||
441 | - vmsumudm $out,$t1,$t2,$vzero | ||
442 | - stxv $out,144($outp) | ||
443 | - | ||
444 | - vmsumudm $out,$in[4],$inx2[6],$vzero | ||
445 | - vmsumudm $out,$in[5],$in[5],$out | ||
446 | - stxv $out,160($outp) | ||
447 | - | ||
448 | - vmsumudm $out,$in[5],$inx2[6],$vzero | ||
449 | - stxv $out,176($outp) | ||
450 | - | ||
451 | - vmsumudm $out,$in[6],$in[6],$vzero | ||
452 | - stxv $out,192($outp) | ||
453 | -___ | ||
454 | + # out5 | ||
455 | + mtvsrdd 32+13, 18, 6 | ||
456 | + mtvsrdd 32+14, 7, 19 | ||
457 | + mtvsrdd 32+15, 16, 8 | ||
458 | + mtvsrdd 32+16, 9, 17 | ||
459 | + mtvsrdd 32+17, 14, 10 | ||
460 | + mtvsrdd 32+18, 11, 15 | ||
461 | + vmsumudm 19, 13, 14, 0 | ||
462 | + vmsumudm 5, 15, 16, 19 | ||
463 | + vmsumudm 5, 17, 18, 5 | ||
464 | + | ||
465 | + stxv 32+1, 16(3) | ||
466 | + stxv 32+2, 32(3) | ||
467 | + stxv 32+3, 48(3) | ||
468 | + stxv 32+4, 64(3) | ||
469 | + stxv 32+5, 80(3) | ||
470 | + | ||
471 | + # out6 | ||
472 | + mtvsrdd 32+13, 19, 6 | ||
473 | + mtvsrdd 32+14, 7, 20 | ||
474 | + mtvsrdd 32+15, 17, 8 | ||
475 | + mtvsrdd 32+16, 9, 18 | ||
476 | + mtvsrdd 32+17, 15, 10 | ||
477 | + mtvsrdd 32+18, 11, 16 | ||
478 | + vmsumudm 19, 13, 14, 0 | ||
479 | + vmsumudm 6, 15, 16, 19 | ||
480 | + mtvsrdd 32+13, 0, 12 | ||
481 | + mtvsrdd 32+14, 0, 14 | ||
482 | + vmsumudm 19, 17, 18, 6 | ||
483 | + vmsumudm 6, 13, 14, 19 | ||
484 | + | ||
485 | + # out7 | ||
486 | + mtvsrdd 32+13, 19, 7 | ||
487 | + mtvsrdd 32+14, 8, 20 | ||
488 | + mtvsrdd 32+15, 17, 9 | ||
489 | + mtvsrdd 32+16, 10, 18 | ||
490 | + mtvsrdd 32+17, 15, 11 | ||
491 | + mtvsrdd 32+18, 12, 16 | ||
492 | + vmsumudm 19, 13, 14, 0 | ||
493 | + vmsumudm 7, 15, 16, 19 | ||
494 | + vmsumudm 7, 17, 18, 7 | ||
495 | + | ||
496 | + # out8 | ||
497 | + mtvsrdd 32+13, 19, 8 | ||
498 | + mtvsrdd 32+14, 9, 20 | ||
499 | + mtvsrdd 32+15, 17, 10 | ||
500 | + mtvsrdd 32+16, 11, 18 | ||
501 | + mtvsrdd 32+17, 0, 12 | ||
502 | + mtvsrdd 32+18, 0, 16 | ||
503 | + vmsumudm 19, 13, 14, 0 | ||
504 | + vmsumudm 8, 15, 16, 19 | ||
505 | + vmsumudm 8, 17, 18, 8 | ||
506 | + | ||
507 | + # out9 | ||
508 | + mtvsrdd 32+13, 19, 9 | ||
509 | + mtvsrdd 32+14, 10, 20 | ||
510 | + mtvsrdd 32+15, 17, 11 | ||
511 | + mtvsrdd 32+16, 12, 18 | ||
512 | + vmsumudm 19, 13, 14, 0 | ||
513 | + vmsumudm 9, 15, 16, 19 | ||
514 | + | ||
515 | + # out10 | ||
516 | + mtvsrdd 32+13, 19, 10 | ||
517 | + mtvsrdd 32+14, 11, 20 | ||
518 | + mtvsrdd 32+15, 0, 12 | ||
519 | + mtvsrdd 32+16, 0, 18 | ||
520 | + vmsumudm 19, 13, 14, 0 | ||
521 | + vmsumudm 10, 15, 16, 19 | ||
522 | + | ||
523 | + # out11 | ||
524 | + mtvsrdd 32+17, 19, 11 | ||
525 | + mtvsrdd 32+18, 12, 20 | ||
526 | + vmsumudm 11, 17, 18, 0 | ||
527 | + | ||
528 | + stxv 32+6, 96(3) | ||
529 | + stxv 32+7, 112(3) | ||
530 | + stxv 32+8, 128(3) | ||
531 | + stxv 32+9, 144(3) | ||
532 | + stxv 32+10, 160(3) | ||
533 | + stxv 32+11, 176(3) | ||
534 | + | ||
535 | + # out12 | ||
536 | + mulld 21, 20, 12 | ||
537 | + mulhdu 22, 20, 12 # out12 | ||
538 | + | ||
539 | + std 21, 192(3) | ||
540 | + std 22, 200(3) | ||
541 | + | ||
542 | + blr | ||
543 | +.size _p384_felem_mul_core,.-_p384_felem_mul_core | ||
544 | + | ||
545 | +# | ||
546 | +# Felem square core function - | ||
547 | +# r3 and r4 need to pre-loaded. | ||
548 | +# | ||
549 | +.type _p384_felem_square_core,\@function | ||
550 | +.align 4 | ||
551 | +_p384_felem_square_core: | ||
552 | + | ||
553 | + ld 6, 0(4) | ||
554 | + ld 7, 8(4) | ||
555 | + ld 8, 16(4) | ||
556 | + ld 9, 24(4) | ||
557 | + ld 10, 32(4) | ||
558 | + ld 11, 40(4) | ||
559 | + ld 12, 48(4) | ||
560 | + | ||
561 | + vxor 0, 0, 0 | ||
562 | + | ||
563 | + # out0 | ||
564 | + mulld 14, 6, 6 | ||
565 | + mulhdu 15, 6, 6 | ||
566 | + std 14, 0(3) | ||
567 | + std 15, 8(3) | ||
568 | + | ||
569 | + # out1 | ||
570 | + add 14, 6, 6 | ||
571 | + mtvsrdd 32+13, 0, 14 | ||
572 | + mtvsrdd 32+14, 0, 7 | ||
573 | + vmsumudm 1, 13, 14, 0 | ||
574 | + | ||
575 | + # out2 | ||
576 | + mtvsrdd 32+15, 7, 14 | ||
577 | + mtvsrdd 32+16, 7, 8 | ||
578 | + vmsumudm 2, 15, 16, 0 | ||
579 | + | ||
580 | + # out3 | ||
581 | + add 15, 7, 7 | ||
582 | + mtvsrdd 32+13, 8, 14 | ||
583 | + mtvsrdd 32+14, 15, 9 | ||
584 | + vmsumudm 3, 13, 14, 0 | ||
585 | + | ||
586 | + # out4 | ||
587 | + mtvsrdd 32+13, 9, 14 | ||
588 | + mtvsrdd 32+14, 15, 10 | ||
589 | + mtvsrdd 32+15, 0, 8 | ||
590 | + vmsumudm 4, 13, 14, 0 | ||
591 | + vmsumudm 4, 15, 15, 4 | ||
592 | + | ||
593 | + # out5 | ||
594 | + mtvsrdd 32+13, 10, 14 | ||
595 | + mtvsrdd 32+14, 15, 11 | ||
596 | + add 16, 8, 8 | ||
597 | + mtvsrdd 32+15, 0, 16 | ||
598 | + mtvsrdd 32+16, 0, 9 | ||
599 | + vmsumudm 5, 13, 14, 0 | ||
600 | + vmsumudm 5, 15, 16, 5 | ||
601 | + | ||
602 | + stxv 32+1, 16(3) | ||
603 | + stxv 32+2, 32(3) | ||
604 | + stxv 32+3, 48(3) | ||
605 | + stxv 32+4, 64(3) | ||
606 | + | ||
607 | + # out6 | ||
608 | + mtvsrdd 32+13, 11, 14 | ||
609 | + mtvsrdd 32+14, 15, 12 | ||
610 | + mtvsrdd 32+15, 9, 16 | ||
611 | + mtvsrdd 32+16, 9, 10 | ||
612 | + stxv 32+5, 80(3) | ||
613 | + vmsumudm 19, 13, 14, 0 | ||
614 | + vmsumudm 6, 15, 16, 19 | ||
615 | + | ||
616 | + # out7 | ||
617 | + add 17, 9, 9 | ||
618 | + mtvsrdd 32+13, 11, 15 | ||
619 | + mtvsrdd 32+14, 16, 12 | ||
620 | + mtvsrdd 32+15, 0, 17 | ||
621 | + mtvsrdd 32+16, 0, 10 | ||
622 | + vmsumudm 19, 13, 14, 0 | ||
623 | + vmsumudm 7, 15, 16, 19 | ||
624 | + | ||
625 | + # out8 | ||
626 | + mtvsrdd 32+13, 11, 16 | ||
627 | + mtvsrdd 32+14, 17, 12 | ||
628 | + mtvsrdd 32+15, 0, 10 | ||
629 | + vmsumudm 19, 13, 14, 0 | ||
630 | + vmsumudm 8, 15, 15, 19 | ||
631 | + | ||
632 | + # out9 | ||
633 | + add 14, 10, 10 | ||
634 | + mtvsrdd 32+13, 11, 17 | ||
635 | + mtvsrdd 32+14, 14, 12 | ||
636 | + vmsumudm 9, 13, 14, 0 | ||
637 | + | ||
638 | + # out10 | ||
639 | + mtvsrdd 32+13, 11, 14 | ||
640 | + mtvsrdd 32+14, 11, 12 | ||
641 | + vmsumudm 10, 13, 14, 0 | ||
642 | + | ||
643 | + stxv 32+6, 96(3) | ||
644 | + stxv 32+7, 112(3) | ||
645 | + | ||
646 | + # out11 | ||
647 | + #add 14, 11, 11 | ||
648 | + #mtvsrdd 32+13, 0, 14 | ||
649 | + #mtvsrdd 32+14, 0, 12 | ||
650 | + #vmsumudm 11, 13, 14, 0 | ||
651 | + | ||
652 | + mulld 6, 12, 11 | ||
653 | + mulhdu 7, 12, 11 | ||
654 | + addc 8, 6, 6 | ||
655 | + adde 9, 7, 7 | ||
656 | + | ||
657 | + stxv 32+8, 128(3) | ||
658 | + stxv 32+9, 144(3) | ||
659 | + stxv 32+10, 160(3) | ||
660 | + #stxv 32+11, 176(3) | ||
661 | + | ||
662 | + # out12 | ||
663 | + mulld 14, 12, 12 | ||
664 | + mulhdu 15, 12, 12 | ||
665 | + | ||
666 | + std 8, 176(3) | ||
667 | + std 9, 184(3) | ||
668 | + std 14, 192(3) | ||
669 | + std 15, 200(3) | ||
670 | + | ||
671 | + blr | ||
672 | +.size _p384_felem_square_core,.-_p384_felem_square_core | ||
673 | + | ||
674 | +# | ||
675 | +# widefelem (128 bits) * 8 | ||
676 | +# | ||
677 | +.macro F128_X_8 _off1 _off2 | ||
678 | + ld 9,\\_off1(3) | ||
679 | + ld 8,\\_off2(3) | ||
680 | + srdi 10,9,61 | ||
681 | + rldimi 10,8,3,0 | ||
682 | + sldi 9,9,3 | ||
683 | + std 9,\\_off1(3) | ||
684 | + std 10,\\_off2(3) | ||
685 | +.endm | ||
686 | + | ||
687 | +.globl p384_felem128_mul_by_8 | ||
688 | +.type p384_felem128_mul_by_8, \@function | ||
689 | +.align 4 | ||
690 | +p384_felem128_mul_by_8: | ||
691 | + | ||
692 | + F128_X_8 0, 8 | ||
693 | + | ||
694 | + F128_X_8 16, 24 | ||
695 | + | ||
696 | + F128_X_8 32, 40 | ||
697 | + | ||
698 | + F128_X_8 48, 56 | ||
699 | + | ||
700 | + F128_X_8 64, 72 | ||
701 | + | ||
702 | + F128_X_8 80, 88 | ||
703 | + | ||
704 | + F128_X_8 96, 104 | ||
705 | + | ||
706 | + F128_X_8 112, 120 | ||
707 | + | ||
708 | + F128_X_8 128, 136 | ||
709 | + | ||
710 | + F128_X_8 144, 152 | ||
711 | + | ||
712 | + F128_X_8 160, 168 | ||
713 | + | ||
714 | + F128_X_8 176, 184 | ||
715 | + | ||
716 | + F128_X_8 192, 200 | ||
717 | + | ||
718 | + blr | ||
719 | +.size p384_felem128_mul_by_8,.-p384_felem128_mul_by_8 | ||
720 | + | ||
721 | +# | ||
722 | +# widefelem (128 bits) * 2 | ||
723 | +# | ||
724 | +.macro F128_X_2 _off1 _off2 | ||
725 | + ld 9,\\_off1(3) | ||
726 | + ld 8,\\_off2(3) | ||
727 | + srdi 10,9,63 | ||
728 | + rldimi 10,8,1,0 | ||
729 | + sldi 9,9,1 | ||
730 | + std 9,\\_off1(3) | ||
731 | + std 10,\\_off2(3) | ||
732 | +.endm | ||
733 | + | ||
734 | +.globl p384_felem128_mul_by_2 | ||
735 | +.type p384_felem128_mul_by_2, \@function | ||
736 | +.align 4 | ||
737 | +p384_felem128_mul_by_2: | ||
738 | + | ||
739 | + F128_X_2 0, 8 | ||
740 | + | ||
741 | + F128_X_2 16, 24 | ||
742 | + | ||
743 | + F128_X_2 32, 40 | ||
744 | + | ||
745 | + F128_X_2 48, 56 | ||
746 | + | ||
747 | + F128_X_2 64, 72 | ||
748 | + | ||
749 | + F128_X_2 80, 88 | ||
750 | + | ||
751 | + F128_X_2 96, 104 | ||
752 | + | ||
753 | + F128_X_2 112, 120 | ||
754 | + | ||
755 | + F128_X_2 128, 136 | ||
756 | + | ||
757 | + F128_X_2 144, 152 | ||
758 | + | ||
759 | + F128_X_2 160, 168 | ||
760 | + | ||
761 | + F128_X_2 176, 184 | ||
762 | + | ||
763 | + F128_X_2 192, 200 | ||
764 | + | ||
765 | + blr | ||
766 | +.size p384_felem128_mul_by_2,.-p384_felem128_mul_by_2 | ||
767 | + | ||
768 | +.globl p384_felem_diff128 | ||
769 | +.type p384_felem_diff128, \@function | ||
770 | +.align 4 | ||
771 | +p384_felem_diff128: | ||
772 | + | ||
773 | + addis 5, 2, .LConst_two127\@toc\@ha | ||
774 | + addi 5, 5, .LConst_two127\@toc\@l | ||
775 | + | ||
776 | + ld 10, 0(3) | ||
777 | + ld 8, 8(3) | ||
778 | + li 9, 0 | ||
779 | + addc 10, 10, 9 | ||
780 | + li 7, -1 | ||
781 | + rldicr 7, 7, 0, 0 # two127 | ||
782 | + adde 8, 8, 7 | ||
783 | + ld 11, 0(4) | ||
784 | + ld 12, 8(4) | ||
785 | + subfc 11, 11, 10 | ||
786 | + subfe 12, 12, 8 | ||
787 | + std 11, 0(3) # out0 | ||
788 | + std 12, 8(3) | ||
789 | + | ||
790 | + # two127m71 = (r10, r9) | ||
791 | + ld 8, 16(3) | ||
792 | + ld 7, 24(3) | ||
793 | + ld 10, 24(5) # two127m71 | ||
794 | + addc 8, 8, 9 | ||
795 | + adde 7, 7, 10 | ||
796 | + ld 11, 16(4) | ||
797 | + ld 12, 24(4) | ||
798 | + subfc 11, 11, 8 | ||
799 | + subfe 12, 12, 7 | ||
800 | + std 11, 16(3) # out1 | ||
801 | + std 12, 24(3) | ||
802 | + | ||
803 | + ld 8, 32(3) | ||
804 | + ld 7, 40(3) | ||
805 | + addc 8, 8, 9 | ||
806 | + adde 7, 7, 10 | ||
807 | + ld 11, 32(4) | ||
808 | + ld 12, 40(4) | ||
809 | + subfc 11, 11, 8 | ||
810 | + subfe 12, 12, 7 | ||
811 | + std 11, 32(3) # out2 | ||
812 | + std 12, 40(3) | ||
813 | + | ||
814 | + ld 8, 48(3) | ||
815 | + ld 7, 56(3) | ||
816 | + addc 8, 8, 9 | ||
817 | + adde 7, 7, 10 | ||
818 | + ld 11, 48(4) | ||
819 | + ld 12, 56(4) | ||
820 | + subfc 11, 11, 8 | ||
821 | + subfe 12, 12, 7 | ||
822 | + std 11, 48(3) # out3 | ||
823 | + std 12, 56(3) | ||
824 | + | ||
825 | + ld 8, 64(3) | ||
826 | + ld 7, 72(3) | ||
827 | + addc 8, 8, 9 | ||
828 | + adde 7, 7, 10 | ||
829 | + ld 11, 64(4) | ||
830 | + ld 12, 72(4) | ||
831 | + subfc 11, 11, 8 | ||
832 | + subfe 12, 12, 7 | ||
833 | + std 11, 64(3) # out4 | ||
834 | + std 12, 72(3) | ||
835 | + | ||
836 | + ld 8, 80(3) | ||
837 | + ld 7, 88(3) | ||
838 | + addc 8, 8, 9 | ||
839 | + adde 7, 7, 10 | ||
840 | + ld 11, 80(4) | ||
841 | + ld 12, 88(4) | ||
842 | + subfc 11, 11, 8 | ||
843 | + subfe 12, 12, 7 | ||
844 | + std 11, 80(3) # out5 | ||
845 | + std 12, 88(3) | ||
846 | + | ||
847 | + ld 8, 96(3) | ||
848 | + ld 7, 104(3) | ||
849 | + ld 6, 40(5) # two127p111m79m71 | ||
850 | + addc 8, 8, 9 | ||
851 | + adde 7, 7, 6 | ||
852 | + ld 11, 96(4) | ||
853 | + ld 12, 104(4) | ||
854 | + subfc 11, 11, 8 | ||
855 | + subfe 12, 12, 7 | ||
856 | + std 11, 96(3) # out6 | ||
857 | + std 12, 104(3) | ||
858 | + | ||
859 | + ld 8, 112(3) | ||
860 | + ld 7, 120(3) | ||
861 | + ld 6, 56(5) # two127m119m71 | ||
862 | + addc 8, 8, 9 | ||
863 | + adde 7, 7, 6 | ||
864 | + ld 11, 112(4) | ||
865 | + ld 12, 120(4) | ||
866 | + subfc 11, 11, 8 | ||
867 | + subfe 12, 12, 7 | ||
868 | + std 11, 112(3) # out7 | ||
869 | + std 12, 120(3) | ||
870 | + | ||
871 | + ld 8, 128(3) | ||
872 | + ld 7, 136(3) | ||
873 | + ld 6, 72(5) # two127m95m71 | ||
874 | + addc 8, 8, 9 | ||
875 | + adde 7, 7, 6 | ||
876 | + ld 11, 128(4) | ||
877 | + ld 12, 136(4) | ||
878 | + subfc 11, 11, 8 | ||
879 | + subfe 12, 12, 7 | ||
880 | + std 11, 128(3) # out8 | ||
881 | + std 12, 136(3) | ||
882 | + | ||
883 | + ld 8, 144(3) | ||
884 | + ld 7, 152(3) | ||
885 | + addc 8, 8, 9 | ||
886 | + adde 7, 7, 10 | ||
887 | + ld 11, 144(4) | ||
888 | + ld 12, 152(4) | ||
889 | + subfc 11, 11, 8 | ||
890 | + subfe 12, 12, 7 | ||
891 | + std 11, 144(3) # out9 | ||
892 | + std 12, 152(3) | ||
893 | + | ||
894 | + ld 8, 160(3) | ||
895 | + ld 7, 168(3) | ||
896 | + addc 8, 8, 9 | ||
897 | + adde 7, 7, 10 | ||
898 | + ld 11, 160(4) | ||
899 | + ld 12, 168(4) | ||
900 | + subfc 11, 11, 8 | ||
901 | + subfe 12, 12, 7 | ||
902 | + std 11, 160(3) # out10 | ||
903 | + std 12, 168(3) | ||
904 | + | ||
905 | + ld 8, 176(3) | ||
906 | + ld 7, 184(3) | ||
907 | + addc 8, 8, 9 | ||
908 | + adde 7, 7, 10 | ||
909 | + ld 11, 176(4) | ||
910 | + ld 12, 184(4) | ||
911 | + subfc 11, 11, 8 | ||
912 | + subfe 12, 12, 7 | ||
913 | + std 11, 176(3) # out11 | ||
914 | + std 12, 184(3) | ||
915 | + | ||
916 | + ld 8, 192(3) | ||
917 | + ld 7, 200(3) | ||
918 | + addc 8, 8, 9 | ||
919 | + adde 7, 7, 10 | ||
920 | + ld 11, 192(4) | ||
921 | + ld 12, 200(4) | ||
922 | + subfc 11, 11, 8 | ||
923 | + subfe 12, 12, 7 | ||
924 | + std 11, 192(3) # out12 | ||
925 | + std 12, 200(3) | ||
926 | + | ||
927 | + blr | ||
928 | +.size p384_felem_diff128,.-p384_felem_diff128 | ||
929 | + | ||
930 | +.data | ||
931 | +.align 4 | ||
932 | +.LConst_two127: | ||
933 | +#two127 | ||
934 | +.long 0x00000000, 0x00000000, 0x00000000, 0x80000000 | ||
935 | +#two127m71 | ||
936 | +.long 0x00000000, 0x00000000, 0xffffff80, 0x7fffffff | ||
937 | +#two127p111m79m71 | ||
938 | +.long 0x00000000, 0x00000000, 0xffff7f80, 0x80007fff | ||
939 | +#two127m119m71 | ||
940 | +.long 0x00000000, 0x00000000, 0xffffff80, 0x7f7fffff | ||
941 | +#two127m95m71 | ||
942 | +.long 0x00000000, 0x00000000, 0x7fffff80, 0x7fffffff | ||
943 | + | ||
944 | +.text | ||
945 | + | ||
946 | +.globl p384_felem_diff_128_64 | ||
947 | +.type p384_felem_diff_128_64, \@function | ||
948 | +.align 4 | ||
949 | +p384_felem_diff_128_64: | ||
950 | + addis 5, 2, .LConst_128_two64\@toc\@ha | ||
951 | + addi 5, 5, .LConst_128_two64\@toc\@l | ||
952 | + | ||
953 | + ld 9, 0(3) | ||
954 | + ld 10, 8(3) | ||
955 | + ld 8, 48(5) # two64p48m16 | ||
956 | + li 7, 0 | ||
957 | + addc 9, 9, 8 | ||
958 | + li 6, 1 | ||
959 | + adde 10, 10, 6 | ||
960 | + ld 11, 0(4) | ||
961 | + subfc 8, 11, 9 | ||
962 | + subfe 12, 7, 10 | ||
963 | + std 8, 0(3) # out0 | ||
964 | + std 12, 8(3) | ||
965 | + | ||
966 | + ld 9, 16(3) | ||
967 | + ld 10, 24(3) | ||
968 | + ld 8, 0(5) # two64m56m8 | ||
969 | + addc 9, 9, 8 | ||
970 | + addze 10, 10 | ||
971 | + ld 11, 8(4) | ||
972 | + subfc 11, 11, 9 | ||
973 | + subfe 12, 7, 10 | ||
974 | + std 11, 16(3) # out1 | ||
975 | + std 12, 24(3) | ||
976 | + | ||
977 | + ld 9, 32(3) | ||
978 | + ld 10, 40(3) | ||
979 | + ld 8, 16(5) # two64m32m8 | ||
980 | + addc 9, 9, 8 | ||
981 | + addze 10, 10 | ||
982 | + ld 11, 16(4) | ||
983 | + subfc 11, 11, 9 | ||
984 | + subfe 12, 7, 10 | ||
985 | + std 11, 32(3) # out2 | ||
986 | + std 12, 40(3) | ||
987 | + | ||
988 | + ld 10, 48(3) | ||
989 | + ld 8, 56(3) | ||
990 | + #ld 9, 32(5) # two64m8 | ||
991 | + li 9, -256 # two64m8 | ||
992 | + addc 10, 10, 9 | ||
993 | + addze 8, 8 | ||
994 | + ld 11, 24(4) | ||
995 | + subfc 11, 11, 10 | ||
996 | + subfe 12, 7, 8 | ||
997 | + std 11, 48(3) # out3 | ||
998 | + std 12, 56(3) | ||
999 | + | ||
1000 | + ld 10, 64(3) | ||
1001 | + ld 8, 72(3) | ||
1002 | + addc 10, 10, 9 | ||
1003 | + addze 8, 8 | ||
1004 | + ld 11, 32(4) | ||
1005 | + subfc 11, 11, 10 | ||
1006 | + subfe 12, 7, 8 | ||
1007 | + std 11, 64(3) # out4 | ||
1008 | + std 12, 72(3) | ||
1009 | + | ||
1010 | + ld 10, 80(3) | ||
1011 | + ld 8, 88(3) | ||
1012 | + addc 10, 10, 9 | ||
1013 | + addze 8, 8 | ||
1014 | + ld 11, 40(4) | ||
1015 | + subfc 11, 11, 10 | ||
1016 | + subfe 12, 7, 8 | ||
1017 | + std 11, 80(3) # out5 | ||
1018 | + std 12, 88(3) | ||
1019 | + | ||
1020 | + ld 10, 96(3) | ||
1021 | + ld 8, 104(3) | ||
1022 | + addc 10, 10, 9 | ||
1023 | + addze 9, 8 | ||
1024 | + ld 11, 48(4) | ||
1025 | + subfc 11, 11, 10 | ||
1026 | + subfe 12, 7, 9 | ||
1027 | + std 11, 96(3) # out6 | ||
1028 | + std 12, 104(3) | ||
1029 | + | ||
1030 | + blr | ||
1031 | +.size p384_felem_diff_128_64,.-p384_felem_diff_128_64 | ||
1032 | + | ||
1033 | +.data | ||
1034 | +.align 4 | ||
1035 | +.LConst_128_two64: | ||
1036 | +#two64m56m8 | ||
1037 | +.long 0xffffff00, 0xfeffffff, 0x00000000, 0x00000000 | ||
1038 | +#two64m32m8 | ||
1039 | +.long 0xffffff00, 0xfffffffe, 0x00000000, 0x00000000 | ||
1040 | +#two64m8 | ||
1041 | +.long 0xffffff00, 0xffffffff, 0x00000000, 0x00000000 | ||
1042 | +#two64p48m16 | ||
1043 | +.long 0xffff0000, 0x0000ffff, 0x00000001, 0x00000000 | ||
1044 | + | ||
1045 | +.LConst_two60: | ||
1046 | +#two60m52m4 | ||
1047 | +.long 0xfffffff0, 0x0fefffff, 0x0, 0x0 | ||
1048 | +#two60p44m12 | ||
1049 | +.long 0xfffff000, 0x10000fff, 0x0, 0x0 | ||
1050 | +#two60m28m4 | ||
1051 | +.long 0xeffffff0, 0x0fffffff, 0x0, 0x0 | ||
1052 | +#two60m4 | ||
1053 | +.long 0xfffffff0, 0x0fffffff, 0x0, 0x0 | ||
1054 | + | ||
1055 | +.text | ||
1056 | +# | ||
1057 | +# static void felem_diff64(felem out, const felem in) | ||
1058 | +# | ||
1059 | +.globl p384_felem_diff64 | ||
1060 | +.type p384_felem_diff64, \@function | ||
1061 | +.align 4 | ||
1062 | +p384_felem_diff64: | ||
1063 | + addis 5, 2, .LConst_two60\@toc\@ha | ||
1064 | + addi 5, 5, .LConst_two60\@toc\@l | ||
1065 | + | ||
1066 | + ld 9, 0(3) | ||
1067 | + ld 8, 16(5) # two60p44m12 | ||
1068 | + li 7, 0 | ||
1069 | + add 9, 9, 8 | ||
1070 | + ld 11, 0(4) | ||
1071 | + subf 8, 11, 9 | ||
1072 | + std 8, 0(3) # out0 | ||
1073 | + | ||
1074 | + ld 9, 8(3) | ||
1075 | + ld 8, 0(5) # two60m52m4 | ||
1076 | + add 9, 9, 8 | ||
1077 | + ld 11, 8(4) | ||
1078 | + subf 11, 11, 9 | ||
1079 | + std 11, 8(3) # out1 | ||
1080 | + | ||
1081 | + ld 9, 16(3) | ||
1082 | + ld 8, 32(5) # two60m28m4 | ||
1083 | + add 9, 9, 8 | ||
1084 | + ld 11, 16(4) | ||
1085 | + subf 11, 11, 9 | ||
1086 | + std 11, 16(3) # out2 | ||
1087 | + | ||
1088 | + ld 10, 24(3) | ||
1089 | + ld 9, 48(5) # two60m4 | ||
1090 | + add 10, 10, 9 | ||
1091 | + ld 12, 24(4) | ||
1092 | + subf 12, 12, 10 | ||
1093 | + std 12, 24(3) # out3 | ||
1094 | + | ||
1095 | + ld 10, 32(3) | ||
1096 | + add 10, 10, 9 | ||
1097 | + ld 11, 32(4) | ||
1098 | + subf 11, 11, 10 | ||
1099 | + std 11, 32(3) # out4 | ||
1100 | + | ||
1101 | + ld 10, 40(3) | ||
1102 | + add 10, 10, 9 | ||
1103 | + ld 12, 40(4) | ||
1104 | + subf 12, 12, 10 | ||
1105 | + std 12, 40(3) # out5 | ||
1106 | |||
1107 | - endproc("p384_felem_square"); | ||
1108 | - } | ||
1109 | -} | ||
1110 | + ld 10, 48(3) | ||
1111 | + add 10, 10, 9 | ||
1112 | + ld 11, 48(4) | ||
1113 | + subf 11, 11, 10 | ||
1114 | + std 11, 48(3) # out6 | ||
1115 | + | ||
1116 | + blr | ||
1117 | +.size p384_felem_diff64,.-p384_felem_diff64 | ||
1118 | + | ||
1119 | +.text | ||
1120 | +# | ||
1121 | +# Shift 128 bits right <nbits> | ||
1122 | +# | ||
1123 | +.macro SHR o_h o_l in_h in_l nbits | ||
1124 | + srdi \\o_l, \\in_l, \\nbits # shift lower right <nbits> | ||
1125 | + rldimi \\o_l, \\in_h, 64-\\nbits, 0 # insert <64-nbits> from hi | ||
1126 | + srdi \\o_h, \\in_h, \\nbits # shift higher right <nbits> | ||
1127 | +.endm | ||
1128 | + | ||
1129 | +# | ||
1130 | +# static void felem_reduce(felem out, const widefelem in) | ||
1131 | +# | ||
1132 | +.global p384_felem_reduce | ||
1133 | +.type p384_felem_reduce,\@function | ||
1134 | +.align 4 | ||
1135 | +p384_felem_reduce: | ||
1136 | + | ||
1137 | + stdu 1, -208(1) | ||
1138 | + mflr 0 | ||
1139 | + std 14, 56(1) | ||
1140 | + std 15, 64(1) | ||
1141 | + std 16, 72(1) | ||
1142 | + std 17, 80(1) | ||
1143 | + std 18, 88(1) | ||
1144 | + std 19, 96(1) | ||
1145 | + std 20, 104(1) | ||
1146 | + std 21, 112(1) | ||
1147 | + std 22, 120(1) | ||
1148 | + std 23, 128(1) | ||
1149 | + std 24, 136(1) | ||
1150 | + std 25, 144(1) | ||
1151 | + std 26, 152(1) | ||
1152 | + std 27, 160(1) | ||
1153 | + std 28, 168(1) | ||
1154 | + std 29, 176(1) | ||
1155 | + std 30, 184(1) | ||
1156 | + std 31, 192(1) | ||
1157 | + | ||
1158 | + bl _p384_felem_reduce_core | ||
1159 | + | ||
1160 | + mtlr 0 | ||
1161 | + ld 14, 56(1) | ||
1162 | + ld 15, 64(1) | ||
1163 | + ld 16, 72(1) | ||
1164 | + ld 17, 80(1) | ||
1165 | + ld 18, 88(1) | ||
1166 | + ld 19, 96(1) | ||
1167 | + ld 20, 104(1) | ||
1168 | + ld 21, 112(1) | ||
1169 | + ld 22, 120(1) | ||
1170 | + ld 23, 128(1) | ||
1171 | + ld 24, 136(1) | ||
1172 | + ld 25, 144(1) | ||
1173 | + ld 26, 152(1) | ||
1174 | + ld 27, 160(1) | ||
1175 | + ld 28, 168(1) | ||
1176 | + ld 29, 176(1) | ||
1177 | + ld 30, 184(1) | ||
1178 | + ld 31, 192(1) | ||
1179 | + addi 1, 1, 208 | ||
1180 | + blr | ||
1181 | +.size p384_felem_reduce,.-p384_felem_reduce | ||
1182 | + | ||
1183 | +# | ||
1184 | +# Felem reduction core function - | ||
1185 | +# r3 and r4 need to pre-loaded. | ||
1186 | +# | ||
1187 | +.type _p384_felem_reduce_core,\@function | ||
1188 | +.align 4 | ||
1189 | +_p384_felem_reduce_core: | ||
1190 | + addis 12, 2, .LConst\@toc\@ha | ||
1191 | + addi 12, 12, .LConst\@toc\@l | ||
1192 | + | ||
1193 | + # load constat p | ||
1194 | + ld 11, 8(12) # hi - two124m68 | ||
1195 | + | ||
1196 | + # acc[6] = in[6] + two124m68; | ||
1197 | + ld 26, 96(4) # in[6].l | ||
1198 | + ld 27, 96+8(4) # in[6].h | ||
1199 | + add 27, 27, 11 | ||
1200 | + | ||
1201 | + # acc[5] = in[5] + two124m68; | ||
1202 | + ld 24, 80(4) # in[5].l | ||
1203 | + ld 25, 80+8(4) # in[5].h | ||
1204 | + add 25, 25, 11 | ||
1205 | + | ||
1206 | + # acc[4] = in[4] + two124m68; | ||
1207 | + ld 22, 64(4) # in[4].l | ||
1208 | + ld 23, 64+8(4) # in[4].h | ||
1209 | + add 23, 23, 11 | ||
1210 | + | ||
1211 | + # acc[3] = in[3] + two124m68; | ||
1212 | + ld 20, 48(4) # in[3].l | ||
1213 | + ld 21, 48+8(4) # in[3].h | ||
1214 | + add 21, 21, 11 | ||
1215 | + | ||
1216 | + ld 11, 48+8(12) # hi - two124m92m68 | ||
1217 | + | ||
1218 | + # acc[2] = in[2] + two124m92m68; | ||
1219 | + ld 18, 32(4) # in[2].l | ||
1220 | + ld 19, 32+8(4) # in[2].h | ||
1221 | + add 19, 19, 11 | ||
1222 | + | ||
1223 | + ld 11, 16+8(12) # high - two124m116m68 | ||
1224 | + | ||
1225 | + # acc[1] = in[1] + two124m116m68; | ||
1226 | + ld 16, 16(4) # in[1].l | ||
1227 | + ld 17, 16+8(4) # in[1].h | ||
1228 | + add 17, 17, 11 | ||
1229 | + | ||
1230 | + ld 11, 32+8(12) # high - two124p108m76 | ||
1231 | + | ||
1232 | + # acc[0] = in[0] + two124p108m76; | ||
1233 | + ld 14, 0(4) # in[0].l | ||
1234 | + ld 15, 0+8(4) # in[0].h | ||
1235 | + add 15, 15, 11 | ||
1236 | + | ||
1237 | + # compute mask | ||
1238 | + li 7, -1 | ||
1239 | + | ||
1240 | + # Eliminate in[12] | ||
1241 | + | ||
1242 | + # acc[8] += in[12] >> 32; | ||
1243 | + ld 5, 192(4) # in[12].l | ||
1244 | + ld 6, 192+8(4) # in[12].h | ||
1245 | + SHR 9, 10, 6, 5, 32 | ||
1246 | + ld 30, 128(4) # in[8].l | ||
1247 | + ld 31, 136(4) # in[8].h | ||
1248 | + addc 30, 30, 10 | ||
1249 | + adde 31, 31, 9 | ||
1250 | + | ||
1251 | + # acc[7] += (in[12] & 0xffffffff) << 24; | ||
1252 | + srdi 11, 7, 32 # 0xffffffff | ||
1253 | + and 11, 11, 5 | ||
1254 | + sldi 11, 11, 24 # << 24 | ||
1255 | + ld 28, 112(4) # in[7].l | ||
1256 | + ld 29, 120(4) # in[7].h | ||
1257 | + addc 28, 28, 11 | ||
1258 | + addze 29, 29 | ||
1259 | + | ||
1260 | + # acc[7] += in[12] >> 8; | ||
1261 | + SHR 9, 10, 6, 5, 8 | ||
1262 | + addc 28, 28, 10 | ||
1263 | + adde 29, 29, 9 | ||
1264 | + | ||
1265 | + # acc[6] += (in[12] & 0xff) << 48; | ||
1266 | + andi. 11, 5, 0xff | ||
1267 | + sldi 11, 11, 48 | ||
1268 | + addc 26, 26, 11 | ||
1269 | + addze 27, 27 | ||
1270 | + | ||
1271 | + # acc[6] -= in[12] >> 16; | ||
1272 | + SHR 9, 10, 6, 5, 16 | ||
1273 | + subfc 26, 10, 26 | ||
1274 | + subfe 27, 9, 27 | ||
1275 | + | ||
1276 | + # acc[5] -= (in[12] & 0xffff) << 40; | ||
1277 | + srdi 11, 7, 48 # 0xffff | ||
1278 | + and 11, 11, 5 | ||
1279 | + sldi 11, 11, 40 # << 40 | ||
1280 | + li 9, 0 | ||
1281 | + subfc 24, 11, 24 | ||
1282 | + subfe 25, 9, 25 | ||
1283 | + | ||
1284 | + # acc[6] += in[12] >> 48; | ||
1285 | + SHR 9, 10, 6, 5, 48 | ||
1286 | + addc 26, 26, 10 | ||
1287 | + adde 27, 27, 9 | ||
1288 | + | ||
1289 | + # acc[5] += (in[12] & 0xffffffffffff) << 8; | ||
1290 | + srdi 11, 7, 16 # 0xffffffffffff | ||
1291 | + and 11, 11, 5 | ||
1292 | + sldi 11, 11, 8 # << 8 | ||
1293 | + addc 24, 24, 11 | ||
1294 | + addze 25, 25 | ||
1295 | + | ||
1296 | + # Eliminate in[11] | ||
1297 | + | ||
1298 | + # acc[7] += in[11] >> 32; | ||
1299 | + ld 5, 176(4) # in[11].l | ||
1300 | + ld 6, 176+8(4) # in[11].h | ||
1301 | + SHR 9, 10, 6, 5, 32 | ||
1302 | + addc 28, 28, 10 | ||
1303 | + adde 29, 29, 9 | ||
1304 | + | ||
1305 | + # acc[6] += (in[11] & 0xffffffff) << 24; | ||
1306 | + srdi 11, 7, 32 # 0xffffffff | ||
1307 | + and 11, 11, 5 | ||
1308 | + sldi 11, 11, 24 # << 24 | ||
1309 | + addc 26, 26, 11 | ||
1310 | + addze 27, 27 | ||
1311 | + | ||
1312 | + # acc[6] += in[11] >> 8; | ||
1313 | + SHR 9, 10, 6, 5, 8 | ||
1314 | + addc 26, 26, 10 | ||
1315 | + adde 27, 27, 9 | ||
1316 | + | ||
1317 | + # acc[5] += (in[11] & 0xff) << 48; | ||
1318 | + andi. 11, 5, 0xff | ||
1319 | + sldi 11, 11, 48 | ||
1320 | + addc 24, 24, 11 | ||
1321 | + addze 25, 25 | ||
1322 | + | ||
1323 | + # acc[5] -= in[11] >> 16; | ||
1324 | + SHR 9, 10, 6, 5, 16 | ||
1325 | + subfc 24, 10, 24 | ||
1326 | + subfe 25, 9, 25 | ||
1327 | + | ||
1328 | + # acc[4] -= (in[11] & 0xffff) << 40; | ||
1329 | + srdi 11, 7, 48 # 0xffff | ||
1330 | + and 11, 11, 5 | ||
1331 | + sldi 11, 11, 40 # << 40 | ||
1332 | + li 9, 0 | ||
1333 | + subfc 22, 11, 22 | ||
1334 | + subfe 23, 9, 23 | ||
1335 | + | ||
1336 | + # acc[5] += in[11] >> 48; | ||
1337 | + SHR 9, 10, 6, 5, 48 | ||
1338 | + addc 24, 24, 10 | ||
1339 | + adde 25, 25, 9 | ||
1340 | + | ||
1341 | + # acc[4] += (in[11] & 0xffffffffffff) << 8; | ||
1342 | + srdi 11, 7, 16 # 0xffffffffffff | ||
1343 | + and 11, 11, 5 | ||
1344 | + sldi 11, 11, 8 # << 8 | ||
1345 | + addc 22, 22, 11 | ||
1346 | + addze 23, 23 | ||
1347 | + | ||
1348 | + # Eliminate in[10] | ||
1349 | + | ||
1350 | + # acc[6] += in[10] >> 32; | ||
1351 | + ld 5, 160(4) # in[10].l | ||
1352 | + ld 6, 160+8(4) # in[10].h | ||
1353 | + SHR 9, 10, 6, 5, 32 | ||
1354 | + addc 26, 26, 10 | ||
1355 | + adde 27, 27, 9 | ||
1356 | + | ||
1357 | + # acc[5] += (in[10] & 0xffffffff) << 24; | ||
1358 | + srdi 11, 7, 32 # 0xffffffff | ||
1359 | + and 11, 11, 5 | ||
1360 | + sldi 11, 11, 24 # << 24 | ||
1361 | + addc 24, 24, 11 | ||
1362 | + addze 25, 25 | ||
1363 | + | ||
1364 | + # acc[5] += in[10] >> 8; | ||
1365 | + SHR 9, 10, 6, 5, 8 | ||
1366 | + addc 24, 24, 10 | ||
1367 | + adde 25, 25, 9 | ||
1368 | + | ||
1369 | + # acc[4] += (in[10] & 0xff) << 48; | ||
1370 | + andi. 11, 5, 0xff | ||
1371 | + sldi 11, 11, 48 | ||
1372 | + addc 22, 22, 11 | ||
1373 | + addze 23, 23 | ||
1374 | + | ||
1375 | + # acc[4] -= in[10] >> 16; | ||
1376 | + SHR 9, 10, 6, 5, 16 | ||
1377 | + subfc 22, 10, 22 | ||
1378 | + subfe 23, 9, 23 | ||
1379 | + | ||
1380 | + # acc[3] -= (in[10] & 0xffff) << 40; | ||
1381 | + srdi 11, 7, 48 # 0xffff | ||
1382 | + and 11, 11, 5 | ||
1383 | + sldi 11, 11, 40 # << 40 | ||
1384 | + li 9, 0 | ||
1385 | + subfc 20, 11, 20 | ||
1386 | + subfe 21, 9, 21 | ||
1387 | + | ||
1388 | + # acc[4] += in[10] >> 48; | ||
1389 | + SHR 9, 10, 6, 5, 48 | ||
1390 | + addc 22, 22, 10 | ||
1391 | + adde 23, 23, 9 | ||
1392 | + | ||
1393 | + # acc[3] += (in[10] & 0xffffffffffff) << 8; | ||
1394 | + srdi 11, 7, 16 # 0xffffffffffff | ||
1395 | + and 11, 11, 5 | ||
1396 | + sldi 11, 11, 8 # << 8 | ||
1397 | + addc 20, 20, 11 | ||
1398 | + addze 21, 21 | ||
1399 | + | ||
1400 | + # Eliminate in[9] | ||
1401 | + | ||
1402 | + # acc[5] += in[9] >> 32; | ||
1403 | + ld 5, 144(4) # in[9].l | ||
1404 | + ld 6, 144+8(4) # in[9].h | ||
1405 | + SHR 9, 10, 6, 5, 32 | ||
1406 | + addc 24, 24, 10 | ||
1407 | + adde 25, 25, 9 | ||
1408 | + | ||
1409 | + # acc[4] += (in[9] & 0xffffffff) << 24; | ||
1410 | + srdi 11, 7, 32 # 0xffffffff | ||
1411 | + and 11, 11, 5 | ||
1412 | + sldi 11, 11, 24 # << 24 | ||
1413 | + addc 22, 22, 11 | ||
1414 | + addze 23, 23 | ||
1415 | + | ||
1416 | + # acc[4] += in[9] >> 8; | ||
1417 | + SHR 9, 10, 6, 5, 8 | ||
1418 | + addc 22, 22, 10 | ||
1419 | + adde 23, 23, 9 | ||
1420 | + | ||
1421 | + # acc[3] += (in[9] & 0xff) << 48; | ||
1422 | + andi. 11, 5, 0xff | ||
1423 | + sldi 11, 11, 48 | ||
1424 | + addc 20, 20, 11 | ||
1425 | + addze 21, 21 | ||
1426 | + | ||
1427 | + # acc[3] -= in[9] >> 16; | ||
1428 | + SHR 9, 10, 6, 5, 16 | ||
1429 | + subfc 20, 10, 20 | ||
1430 | + subfe 21, 9, 21 | ||
1431 | + | ||
1432 | + # acc[2] -= (in[9] & 0xffff) << 40; | ||
1433 | + srdi 11, 7, 48 # 0xffff | ||
1434 | + and 11, 11, 5 | ||
1435 | + sldi 11, 11, 40 # << 40 | ||
1436 | + li 9, 0 | ||
1437 | + subfc 18, 11, 18 | ||
1438 | + subfe 19, 9, 19 | ||
1439 | + | ||
1440 | + # acc[3] += in[9] >> 48; | ||
1441 | + SHR 9, 10, 6, 5, 48 | ||
1442 | + addc 20, 20, 10 | ||
1443 | + adde 21, 21, 9 | ||
1444 | + | ||
1445 | + # acc[2] += (in[9] & 0xffffffffffff) << 8; | ||
1446 | + srdi 11, 7, 16 # 0xffffffffffff | ||
1447 | + and 11, 11, 5 | ||
1448 | + sldi 11, 11, 8 # << 8 | ||
1449 | + addc 18, 18, 11 | ||
1450 | + addze 19, 19 | ||
1451 | + | ||
1452 | + # Eliminate acc[8] | ||
1453 | + | ||
1454 | + # acc[4] += acc[8] >> 32; | ||
1455 | + mr 5, 30 # acc[8].l | ||
1456 | + mr 6, 31 # acc[8].h | ||
1457 | + SHR 9, 10, 6, 5, 32 | ||
1458 | + addc 22, 22, 10 | ||
1459 | + adde 23, 23, 9 | ||
1460 | + | ||
1461 | + # acc[3] += (acc[8] & 0xffffffff) << 24; | ||
1462 | + srdi 11, 7, 32 # 0xffffffff | ||
1463 | + and 11, 11, 5 | ||
1464 | + sldi 11, 11, 24 # << 24 | ||
1465 | + addc 20, 20, 11 | ||
1466 | + addze 21, 21 | ||
1467 | + | ||
1468 | + # acc[3] += acc[8] >> 8; | ||
1469 | + SHR 9, 10, 6, 5, 8 | ||
1470 | + addc 20, 20, 10 | ||
1471 | + adde 21, 21, 9 | ||
1472 | + | ||
1473 | + # acc[2] += (acc[8] & 0xff) << 48; | ||
1474 | + andi. 11, 5, 0xff | ||
1475 | + sldi 11, 11, 48 | ||
1476 | + addc 18, 18, 11 | ||
1477 | + addze 19, 19 | ||
1478 | + | ||
1479 | + # acc[2] -= acc[8] >> 16; | ||
1480 | + SHR 9, 10, 6, 5, 16 | ||
1481 | + subfc 18, 10, 18 | ||
1482 | + subfe 19, 9, 19 | ||
1483 | + | ||
1484 | + # acc[1] -= (acc[8] & 0xffff) << 40; | ||
1485 | + srdi 11, 7, 48 # 0xffff | ||
1486 | + and 11, 11, 5 | ||
1487 | + sldi 11, 11, 40 # << 40 | ||
1488 | + li 9, 0 | ||
1489 | + subfc 16, 11, 16 | ||
1490 | + subfe 17, 9, 17 | ||
1491 | + | ||
1492 | + #acc[2] += acc[8] >> 48; | ||
1493 | + SHR 9, 10, 6, 5, 48 | ||
1494 | + addc 18, 18, 10 | ||
1495 | + adde 19, 19, 9 | ||
1496 | + | ||
1497 | + # acc[1] += (acc[8] & 0xffffffffffff) << 8; | ||
1498 | + srdi 11, 7, 16 # 0xffffffffffff | ||
1499 | + and 11, 11, 5 | ||
1500 | + sldi 11, 11, 8 # << 8 | ||
1501 | + addc 16, 16, 11 | ||
1502 | + addze 17, 17 | ||
1503 | + | ||
1504 | + # Eliminate acc[7] | ||
1505 | + | ||
1506 | + # acc[3] += acc[7] >> 32; | ||
1507 | + mr 5, 28 # acc[7].l | ||
1508 | + mr 6, 29 # acc[7].h | ||
1509 | + SHR 9, 10, 6, 5, 32 | ||
1510 | + addc 20, 20, 10 | ||
1511 | + adde 21, 21, 9 | ||
1512 | + | ||
1513 | + # acc[2] += (acc[7] & 0xffffffff) << 24; | ||
1514 | + srdi 11, 7, 32 # 0xffffffff | ||
1515 | + and 11, 11, 5 | ||
1516 | + sldi 11, 11, 24 # << 24 | ||
1517 | + addc 18, 18, 11 | ||
1518 | + addze 19, 19 | ||
1519 | + | ||
1520 | + # acc[2] += acc[7] >> 8; | ||
1521 | + SHR 9, 10, 6, 5, 8 | ||
1522 | + addc 18, 18, 10 | ||
1523 | + adde 19, 19, 9 | ||
1524 | + | ||
1525 | + # acc[1] += (acc[7] & 0xff) << 48; | ||
1526 | + andi. 11, 5, 0xff | ||
1527 | + sldi 11, 11, 48 | ||
1528 | + addc 16, 16, 11 | ||
1529 | + addze 17, 17 | ||
1530 | + | ||
1531 | + # acc[1] -= acc[7] >> 16; | ||
1532 | + SHR 9, 10, 6, 5, 16 | ||
1533 | + subfc 16, 10, 16 | ||
1534 | + subfe 17, 9, 17 | ||
1535 | + | ||
1536 | + # acc[0] -= (acc[7] & 0xffff) << 40; | ||
1537 | + srdi 11, 7, 48 # 0xffff | ||
1538 | + and 11, 11, 5 | ||
1539 | + sldi 11, 11, 40 # << 40 | ||
1540 | + li 9, 0 | ||
1541 | + subfc 14, 11, 14 | ||
1542 | + subfe 15, 9, 15 | ||
1543 | + | ||
1544 | + # acc[1] += acc[7] >> 48; | ||
1545 | + SHR 9, 10, 6, 5, 48 | ||
1546 | + addc 16, 16, 10 | ||
1547 | + adde 17, 17, 9 | ||
1548 | + | ||
1549 | + # acc[0] += (acc[7] & 0xffffffffffff) << 8; | ||
1550 | + srdi 11, 7, 16 # 0xffffffffffff | ||
1551 | + and 11, 11, 5 | ||
1552 | + sldi 11, 11, 8 # << 8 | ||
1553 | + addc 14, 14, 11 | ||
1554 | + addze 15, 15 | ||
1555 | + | ||
1556 | + # | ||
1557 | + # Carry 4 -> 5 -> 6 | ||
1558 | + # | ||
1559 | + # acc[5] += acc[4] >> 56; | ||
1560 | + # acc[4] &= 0x00ffffffffffffff; | ||
1561 | + SHR 9, 10, 23, 22, 56 | ||
1562 | + addc 24, 24, 10 | ||
1563 | + adde 25, 25, 9 | ||
1564 | + srdi 11, 7, 8 # 0x00ffffffffffffff | ||
1565 | + and 22, 22, 11 | ||
1566 | + li 23, 0 | ||
1567 | + | ||
1568 | + # acc[6] += acc[5] >> 56; | ||
1569 | + # acc[5] &= 0x00ffffffffffffff; | ||
1570 | + SHR 9, 10, 25, 24, 56 | ||
1571 | + addc 26, 26, 10 | ||
1572 | + adde 27, 27, 9 | ||
1573 | + and 24, 24, 11 | ||
1574 | + li 25, 0 | ||
1575 | + | ||
1576 | + # [3]: Eliminate high bits of acc[6] */ | ||
1577 | + # temp = acc[6] >> 48; | ||
1578 | + # acc[6] &= 0x0000ffffffffffff; | ||
1579 | + SHR 31, 30, 27, 26, 48 # temp = acc[6] >> 48 | ||
1580 | + srdi 11, 7, 16 # 0x0000ffffffffffff | ||
1581 | + and 26, 26, 11 | ||
1582 | + li 27, 0 | ||
1583 | + | ||
1584 | + # temp < 2^80 | ||
1585 | + # acc[3] += temp >> 40; | ||
1586 | + SHR 9, 10, 31, 30, 40 | ||
1587 | + addc 20, 20, 10 | ||
1588 | + adde 21, 21, 9 | ||
1589 | + | ||
1590 | + # acc[2] += (temp & 0xffffffffff) << 16; | ||
1591 | + srdi 11, 7, 24 # 0xffffffffff | ||
1592 | + and 10, 30, 11 | ||
1593 | + sldi 10, 10, 16 | ||
1594 | + addc 18, 18, 10 | ||
1595 | + addze 19, 19 | ||
1596 | + | ||
1597 | + # acc[2] += temp >> 16; | ||
1598 | + SHR 9, 10, 31, 30, 16 | ||
1599 | + addc 18, 18, 10 | ||
1600 | + adde 19, 19, 9 | ||
1601 | + | ||
1602 | + # acc[1] += (temp & 0xffff) << 40; | ||
1603 | + srdi 11, 7, 48 # 0xffff | ||
1604 | + and 10, 30, 11 | ||
1605 | + sldi 10, 10, 40 | ||
1606 | + addc 16, 16, 10 | ||
1607 | + addze 17, 17 | ||
1608 | + | ||
1609 | + # acc[1] -= temp >> 24; | ||
1610 | + SHR 9, 10, 31, 30, 24 | ||
1611 | + subfc 16, 10, 16 | ||
1612 | + subfe 17, 9, 17 | ||
1613 | + | ||
1614 | + # acc[0] -= (temp & 0xffffff) << 32; | ||
1615 | + srdi 11, 7, 40 # 0xffffff | ||
1616 | + and 10, 30, 11 | ||
1617 | + sldi 10, 10, 32 | ||
1618 | + li 9, 0 | ||
1619 | + subfc 14, 10, 14 | ||
1620 | + subfe 15, 9, 15 | ||
1621 | + | ||
1622 | + # acc[0] += temp; | ||
1623 | + addc 14, 14, 30 | ||
1624 | + adde 15, 15, 31 | ||
1625 | + | ||
1626 | + # Carry 0 -> 1 -> 2 -> 3 -> 4 -> 5 -> 6 | ||
1627 | + # | ||
1628 | + # acc[1] += acc[0] >> 56; /* acc[1] < acc_old[1] + 2^72 */ | ||
1629 | + SHR 9, 10, 15, 14, 56 | ||
1630 | + addc 16, 16, 10 | ||
1631 | + adde 17, 17, 9 | ||
1632 | + | ||
1633 | + # acc[0] &= 0x00ffffffffffffff; | ||
1634 | + srdi 11, 7, 8 # 0x00ffffffffffffff | ||
1635 | + and 14, 14, 11 | ||
1636 | + li 15, 0 | ||
1637 | + | ||
1638 | + # acc[2] += acc[1] >> 56; /* acc[2] < acc_old[2] + 2^72 + 2^16 */ | ||
1639 | + SHR 9, 10, 17, 16, 56 | ||
1640 | + addc 18, 18, 10 | ||
1641 | + adde 19, 19, 9 | ||
1642 | + | ||
1643 | + # acc[1] &= 0x00ffffffffffffff; | ||
1644 | + and 16, 16, 11 | ||
1645 | + li 17, 0 | ||
1646 | + | ||
1647 | + # acc[3] += acc[2] >> 56; /* acc[3] < acc_old[3] + 2^72 + 2^16 */ | ||
1648 | + SHR 9, 10, 19, 18, 56 | ||
1649 | + addc 20, 20, 10 | ||
1650 | + adde 21, 21, 9 | ||
1651 | + | ||
1652 | + # acc[2] &= 0x00ffffffffffffff; | ||
1653 | + and 18, 18, 11 | ||
1654 | + li 19, 0 | ||
1655 | + | ||
1656 | + # acc[4] += acc[3] >> 56; | ||
1657 | + SHR 9, 10, 21, 20, 56 | ||
1658 | + addc 22, 22, 10 | ||
1659 | + adde 23, 23, 9 | ||
1660 | + | ||
1661 | + # acc[3] &= 0x00ffffffffffffff; | ||
1662 | + and 20, 20, 11 | ||
1663 | + li 21, 0 | ||
1664 | + | ||
1665 | + # acc[5] += acc[4] >> 56; | ||
1666 | + SHR 9, 10, 23, 22, 56 | ||
1667 | + addc 24, 24, 10 | ||
1668 | + adde 25, 25, 9 | ||
1669 | + | ||
1670 | + # acc[4] &= 0x00ffffffffffffff; | ||
1671 | + and 22, 22, 11 | ||
1672 | + | ||
1673 | + # acc[6] += acc[5] >> 56; | ||
1674 | + SHR 9, 10, 25, 24, 56 | ||
1675 | + addc 26, 26, 10 | ||
1676 | + adde 27, 27, 9 | ||
1677 | + | ||
1678 | + # acc[5] &= 0x00ffffffffffffff; | ||
1679 | + and 24, 24, 11 | ||
1680 | + | ||
1681 | + std 14, 0(3) | ||
1682 | + std 16, 8(3) | ||
1683 | + std 18, 16(3) | ||
1684 | + std 20, 24(3) | ||
1685 | + std 22, 32(3) | ||
1686 | + std 24, 40(3) | ||
1687 | + std 26, 48(3) | ||
1688 | + blr | ||
1689 | +.size _p384_felem_reduce_core,.-_p384_felem_reduce_core | ||
1690 | + | ||
1691 | +.data | ||
1692 | +.align 4 | ||
1693 | +.LConst: | ||
1694 | +# two124m68: | ||
1695 | +.long 0x0, 0x0, 0xfffffff0, 0xfffffff | ||
1696 | +# two124m116m68: | ||
1697 | +.long 0x0, 0x0, 0xfffffff0, 0xfefffff | ||
1698 | +#two124p108m76: | ||
1699 | +.long 0x0, 0x0, 0xfffff000, 0x10000fff | ||
1700 | +#two124m92m68: | ||
1701 | +.long 0x0, 0x0, 0xeffffff0, 0xfffffff | ||
1702 | + | ||
1703 | +.text | ||
1704 | + | ||
1705 | +# | ||
1706 | +# void p384_felem_square_reduce(felem out, const felem in) | ||
1707 | +# | ||
1708 | +.global p384_felem_square_reduce | ||
1709 | +.type p384_felem_square_reduce,\@function | ||
1710 | +.align 4 | ||
1711 | +p384_felem_square_reduce: | ||
1712 | + stdu 1, -512(1) | ||
1713 | + mflr 0 | ||
1714 | + std 14, 56(1) | ||
1715 | + std 15, 64(1) | ||
1716 | + std 16, 72(1) | ||
1717 | + std 17, 80(1) | ||
1718 | + std 18, 88(1) | ||
1719 | + std 19, 96(1) | ||
1720 | + std 20, 104(1) | ||
1721 | + std 21, 112(1) | ||
1722 | + std 22, 120(1) | ||
1723 | + std 23, 128(1) | ||
1724 | + std 24, 136(1) | ||
1725 | + std 25, 144(1) | ||
1726 | + std 26, 152(1) | ||
1727 | + std 27, 160(1) | ||
1728 | + std 28, 168(1) | ||
1729 | + std 29, 176(1) | ||
1730 | + std 30, 184(1) | ||
1731 | + std 31, 192(1) | ||
1732 | + | ||
1733 | + std 3, 496(1) | ||
1734 | + addi 3, 1, 208 | ||
1735 | + bl _p384_felem_square_core | ||
1736 | + | ||
1737 | + mr 4, 3 | ||
1738 | + ld 3, 496(1) | ||
1739 | + bl _p384_felem_reduce_core | ||
1740 | + | ||
1741 | + ld 14, 56(1) | ||
1742 | + ld 15, 64(1) | ||
1743 | + ld 16, 72(1) | ||
1744 | + ld 17, 80(1) | ||
1745 | + ld 18, 88(1) | ||
1746 | + ld 19, 96(1) | ||
1747 | + ld 20, 104(1) | ||
1748 | + ld 21, 112(1) | ||
1749 | + ld 22, 120(1) | ||
1750 | + ld 23, 128(1) | ||
1751 | + ld 24, 136(1) | ||
1752 | + ld 25, 144(1) | ||
1753 | + ld 26, 152(1) | ||
1754 | + ld 27, 160(1) | ||
1755 | + ld 28, 168(1) | ||
1756 | + ld 29, 176(1) | ||
1757 | + ld 30, 184(1) | ||
1758 | + ld 31, 192(1) | ||
1759 | + addi 1, 1, 512 | ||
1760 | + mtlr 0 | ||
1761 | + blr | ||
1762 | +.size p384_felem_square_reduce,.-p384_felem_square_reduce | ||
1763 | + | ||
1764 | +# | ||
1765 | +# void p384_felem_mul_reduce(felem out, const felem in1, const felem in2) | ||
1766 | +# | ||
1767 | +.global p384_felem_mul_reduce | ||
1768 | +.type p384_felem_mul_reduce,\@function | ||
1769 | +.align 5 | ||
1770 | +p384_felem_mul_reduce: | ||
1771 | + stdu 1, -512(1) | ||
1772 | + mflr 0 | ||
1773 | + std 14, 56(1) | ||
1774 | + std 15, 64(1) | ||
1775 | + std 16, 72(1) | ||
1776 | + std 17, 80(1) | ||
1777 | + std 18, 88(1) | ||
1778 | + std 19, 96(1) | ||
1779 | + std 20, 104(1) | ||
1780 | + std 21, 112(1) | ||
1781 | + std 22, 120(1) | ||
1782 | + std 23, 128(1) | ||
1783 | + std 24, 136(1) | ||
1784 | + std 25, 144(1) | ||
1785 | + std 26, 152(1) | ||
1786 | + std 27, 160(1) | ||
1787 | + std 28, 168(1) | ||
1788 | + std 29, 176(1) | ||
1789 | + std 30, 184(1) | ||
1790 | + std 31, 192(1) | ||
1791 | + | ||
1792 | + std 3, 496(1) | ||
1793 | + addi 3, 1, 208 | ||
1794 | + bl _p384_felem_mul_core | ||
1795 | + | ||
1796 | + mr 4, 3 | ||
1797 | + ld 3, 496(1) | ||
1798 | + bl _p384_felem_reduce_core | ||
1799 | + | ||
1800 | + ld 14, 56(1) | ||
1801 | + ld 15, 64(1) | ||
1802 | + ld 16, 72(1) | ||
1803 | + ld 17, 80(1) | ||
1804 | + ld 18, 88(1) | ||
1805 | + ld 19, 96(1) | ||
1806 | + ld 20, 104(1) | ||
1807 | + ld 21, 112(1) | ||
1808 | + ld 22, 120(1) | ||
1809 | + ld 23, 128(1) | ||
1810 | + ld 24, 136(1) | ||
1811 | + ld 25, 144(1) | ||
1812 | + ld 26, 152(1) | ||
1813 | + ld 27, 160(1) | ||
1814 | + ld 28, 168(1) | ||
1815 | + ld 29, 176(1) | ||
1816 | + ld 30, 184(1) | ||
1817 | + ld 31, 192(1) | ||
1818 | + addi 1, 1, 512 | ||
1819 | + mtlr 0 | ||
1820 | + blr | ||
1821 | +.size p384_felem_mul_reduce,.-p384_felem_mul_reduce | ||
1822 | +___ | ||
1823 | |||
1824 | $code =~ s/\`([^\`]*)\`/eval $1/gem; | ||
1825 | print $code; | ||
1826 | diff --git a/crypto/ec/ecp_nistp384.c b/crypto/ec/ecp_nistp384.c | ||
1827 | index 3fd7a40020..e0b5786bc1 100644 | ||
1828 | --- a/crypto/ec/ecp_nistp384.c | ||
1829 | +++ b/crypto/ec/ecp_nistp384.c | ||
1830 | @@ -252,6 +252,16 @@ static void felem_neg(felem out, const felem in) | ||
1831 | out[6] = two60m4 - in[6]; | ||
1832 | } | ||
1833 | |||
1834 | +#if defined(ECP_NISTP384_ASM) | ||
1835 | +void p384_felem_diff64(felem out, const felem in); | ||
1836 | +void p384_felem_diff128(widefelem out, const widefelem in); | ||
1837 | +void p384_felem_diff_128_64(widefelem out, const felem in); | ||
1838 | + | ||
1839 | +# define felem_diff64 p384_felem_diff64 | ||
1840 | +# define felem_diff128 p384_felem_diff128 | ||
1841 | +# define felem_diff_128_64 p384_felem_diff_128_64 | ||
1842 | + | ||
1843 | +#else | ||
1844 | /*- | ||
1845 | * felem_diff64 subtracts |in| from |out| | ||
1846 | * On entry: | ||
1847 | @@ -369,6 +379,7 @@ static void felem_diff128(widefelem out, const widefelem in) | ||
1848 | for (i = 0; i < 2*NLIMBS-1; i++) | ||
1849 | out[i] -= in[i]; | ||
1850 | } | ||
1851 | +#endif /* ECP_NISTP384_ASM */ | ||
1852 | |||
1853 | static void felem_square_ref(widefelem out, const felem in) | ||
1854 | { | ||
1855 | @@ -503,7 +514,7 @@ static void felem_mul_ref(widefelem out, const felem in1, const felem in2) | ||
1856 | * [3]: Y = 2^48 (acc[6] >> 48) | ||
1857 | * (Where a | b | c | d = (2^56)^3 a + (2^56)^2 b + (2^56) c + d) | ||
1858 | */ | ||
1859 | -static void felem_reduce(felem out, const widefelem in) | ||
1860 | +static void felem_reduce_ref(felem out, const widefelem in) | ||
1861 | { | ||
1862 | /* | ||
1863 | * In order to prevent underflow, we add a multiple of p before subtracting. | ||
1864 | @@ -682,8 +693,11 @@ static void (*felem_square_p)(widefelem out, const felem in) = | ||
1865 | static void (*felem_mul_p)(widefelem out, const felem in1, const felem in2) = | ||
1866 | felem_mul_wrapper; | ||
1867 | |||
1868 | +static void (*felem_reduce_p)(felem out, const widefelem in) = felem_reduce_ref; | ||
1869 | + | ||
1870 | void p384_felem_square(widefelem out, const felem in); | ||
1871 | void p384_felem_mul(widefelem out, const felem in1, const felem in2); | ||
1872 | +void p384_felem_reduce(felem out, const widefelem in); | ||
1873 | |||
1874 | # if defined(_ARCH_PPC64) | ||
1875 | # include "crypto/ppc_arch.h" | ||
1876 | @@ -695,6 +709,7 @@ static void felem_select(void) | ||
1877 | if ((OPENSSL_ppccap_P & PPC_MADD300) && (OPENSSL_ppccap_P & PPC_ALTIVEC)) { | ||
1878 | felem_square_p = p384_felem_square; | ||
1879 | felem_mul_p = p384_felem_mul; | ||
1880 | + felem_reduce_p = p384_felem_reduce; | ||
1881 | |||
1882 | return; | ||
1883 | } | ||
1884 | @@ -703,6 +718,7 @@ static void felem_select(void) | ||
1885 | /* Default */ | ||
1886 | felem_square_p = felem_square_ref; | ||
1887 | felem_mul_p = felem_mul_ref; | ||
1888 | + felem_reduce_p = p384_felem_reduce; | ||
1889 | } | ||
1890 | |||
1891 | static void felem_square_wrapper(widefelem out, const felem in) | ||
1892 | @@ -719,10 +735,17 @@ static void felem_mul_wrapper(widefelem out, const felem in1, const felem in2) | ||
1893 | |||
1894 | # define felem_square felem_square_p | ||
1895 | # define felem_mul felem_mul_p | ||
1896 | +# define felem_reduce felem_reduce_p | ||
1897 | + | ||
1898 | +void p384_felem_square_reduce(felem out, const felem in); | ||
1899 | +void p384_felem_mul_reduce(felem out, const felem in1, const felem in2); | ||
1900 | + | ||
1901 | +# define felem_square_reduce p384_felem_square_reduce | ||
1902 | +# define felem_mul_reduce p384_felem_mul_reduce | ||
1903 | #else | ||
1904 | # define felem_square felem_square_ref | ||
1905 | # define felem_mul felem_mul_ref | ||
1906 | -#endif | ||
1907 | +# define felem_reduce felem_reduce_ref | ||
1908 | |||
1909 | static ossl_inline void felem_square_reduce(felem out, const felem in) | ||
1910 | { | ||
1911 | @@ -739,6 +762,7 @@ static ossl_inline void felem_mul_reduce(felem out, const felem in1, const felem | ||
1912 | felem_mul(tmp, in1, in2); | ||
1913 | felem_reduce(out, tmp); | ||
1914 | } | ||
1915 | +#endif | ||
1916 | |||
1917 | /*- | ||
1918 | * felem_inv calculates |out| = |in|^{-1} | ||
diff --git a/meta/recipes-connectivity/openssl/openssl/CVE-2025-27587-2.patch b/meta/recipes-connectivity/openssl/openssl/CVE-2025-27587-2.patch new file mode 100644 index 0000000000..0659a9d6d9 --- /dev/null +++ b/meta/recipes-connectivity/openssl/openssl/CVE-2025-27587-2.patch | |||
@@ -0,0 +1,129 @@ | |||
1 | From 6b1646e472c9e8c08bb14066ba2a7c3eed45f84a Mon Sep 17 00:00:00 2001 | ||
2 | From: "A. Wilcox" <AWilcox@Wilcox-Tech.com> | ||
3 | Date: Thu, 17 Apr 2025 08:51:53 -0500 | ||
4 | Subject: [PATCH] Fix P-384 curve on lower-than-P9 PPC64 targets | ||
5 | |||
6 | The change adding an asm implementation of p384_felem_reduce incorrectly | ||
7 | uses the accelerated version on both targets that support the intrinsics | ||
8 | *and* targets that don't, instead of falling back to the generics on older | ||
9 | targets. This results in crashes when trying to use P-384 on < Power9. | ||
10 | |||
11 | Signed-off-by: Anna Wilcox <AWilcox@Wilcox-Tech.com> | ||
12 | Closes: #27350 | ||
13 | Fixes: 85cabd94 ("Fix Minerva timing side-channel signal for P-384 curve on PPC") | ||
14 | |||
15 | Reviewed-by: Dmitry Belyavskiy <beldmit@gmail.com> | ||
16 | Reviewed-by: Tomas Mraz <tomas@openssl.org> | ||
17 | (Merged from https://github.com/openssl/openssl/pull/27429) | ||
18 | |||
19 | (cherry picked from commit 29864f2b0f1046177e8048a5b17440893d3f9425) | ||
20 | |||
21 | CVE: CVE-2025-27587 | ||
22 | Upstream-Status: Backport [https://github.com/openssl/openssl/commit/6b1646e472c9e8c08bb14066ba2a7c3eed45f84a] | ||
23 | Signed-off-by: Peter Marko <peter.marko@siemens.com> | ||
24 | --- | ||
25 | crypto/ec/ecp_nistp384.c | 54 ++++++++++++++++++++++++---------------- | ||
26 | 1 file changed, 33 insertions(+), 21 deletions(-) | ||
27 | |||
28 | diff --git a/crypto/ec/ecp_nistp384.c b/crypto/ec/ecp_nistp384.c | ||
29 | index e0b5786bc1..439b4d03a3 100644 | ||
30 | --- a/crypto/ec/ecp_nistp384.c | ||
31 | +++ b/crypto/ec/ecp_nistp384.c | ||
32 | @@ -684,6 +684,22 @@ static void felem_reduce_ref(felem out, const widefelem in) | ||
33 | out[i] = acc[i]; | ||
34 | } | ||
35 | |||
36 | +static ossl_inline void felem_square_reduce_ref(felem out, const felem in) | ||
37 | +{ | ||
38 | + widefelem tmp; | ||
39 | + | ||
40 | + felem_square_ref(tmp, in); | ||
41 | + felem_reduce_ref(out, tmp); | ||
42 | +} | ||
43 | + | ||
44 | +static ossl_inline void felem_mul_reduce_ref(felem out, const felem in1, const felem in2) | ||
45 | +{ | ||
46 | + widefelem tmp; | ||
47 | + | ||
48 | + felem_mul_ref(tmp, in1, in2); | ||
49 | + felem_reduce_ref(out, tmp); | ||
50 | +} | ||
51 | + | ||
52 | #if defined(ECP_NISTP384_ASM) | ||
53 | static void felem_square_wrapper(widefelem out, const felem in); | ||
54 | static void felem_mul_wrapper(widefelem out, const felem in1, const felem in2); | ||
55 | @@ -695,10 +711,18 @@ static void (*felem_mul_p)(widefelem out, const felem in1, const felem in2) = | ||
56 | |||
57 | static void (*felem_reduce_p)(felem out, const widefelem in) = felem_reduce_ref; | ||
58 | |||
59 | +static void (*felem_square_reduce_p)(felem out, const felem in) = | ||
60 | + felem_square_reduce_ref; | ||
61 | +static void (*felem_mul_reduce_p)(felem out, const felem in1, const felem in2) = | ||
62 | + felem_mul_reduce_ref; | ||
63 | + | ||
64 | void p384_felem_square(widefelem out, const felem in); | ||
65 | void p384_felem_mul(widefelem out, const felem in1, const felem in2); | ||
66 | void p384_felem_reduce(felem out, const widefelem in); | ||
67 | |||
68 | +void p384_felem_square_reduce(felem out, const felem in); | ||
69 | +void p384_felem_mul_reduce(felem out, const felem in1, const felem in2); | ||
70 | + | ||
71 | # if defined(_ARCH_PPC64) | ||
72 | # include "crypto/ppc_arch.h" | ||
73 | # endif | ||
74 | @@ -710,6 +734,8 @@ static void felem_select(void) | ||
75 | felem_square_p = p384_felem_square; | ||
76 | felem_mul_p = p384_felem_mul; | ||
77 | felem_reduce_p = p384_felem_reduce; | ||
78 | + felem_square_reduce_p = p384_felem_square_reduce; | ||
79 | + felem_mul_reduce_p = p384_felem_mul_reduce; | ||
80 | |||
81 | return; | ||
82 | } | ||
83 | @@ -718,7 +744,9 @@ static void felem_select(void) | ||
84 | /* Default */ | ||
85 | felem_square_p = felem_square_ref; | ||
86 | felem_mul_p = felem_mul_ref; | ||
87 | - felem_reduce_p = p384_felem_reduce; | ||
88 | + felem_reduce_p = felem_reduce_ref; | ||
89 | + felem_square_reduce_p = felem_square_reduce_ref; | ||
90 | + felem_mul_reduce_p = felem_mul_reduce_ref; | ||
91 | } | ||
92 | |||
93 | static void felem_square_wrapper(widefelem out, const felem in) | ||
94 | @@ -737,31 +765,15 @@ static void felem_mul_wrapper(widefelem out, const felem in1, const felem in2) | ||
95 | # define felem_mul felem_mul_p | ||
96 | # define felem_reduce felem_reduce_p | ||
97 | |||
98 | -void p384_felem_square_reduce(felem out, const felem in); | ||
99 | -void p384_felem_mul_reduce(felem out, const felem in1, const felem in2); | ||
100 | - | ||
101 | -# define felem_square_reduce p384_felem_square_reduce | ||
102 | -# define felem_mul_reduce p384_felem_mul_reduce | ||
103 | +# define felem_square_reduce felem_square_reduce_p | ||
104 | +# define felem_mul_reduce felem_mul_reduce_p | ||
105 | #else | ||
106 | # define felem_square felem_square_ref | ||
107 | # define felem_mul felem_mul_ref | ||
108 | # define felem_reduce felem_reduce_ref | ||
109 | |||
110 | -static ossl_inline void felem_square_reduce(felem out, const felem in) | ||
111 | -{ | ||
112 | - widefelem tmp; | ||
113 | - | ||
114 | - felem_square(tmp, in); | ||
115 | - felem_reduce(out, tmp); | ||
116 | -} | ||
117 | - | ||
118 | -static ossl_inline void felem_mul_reduce(felem out, const felem in1, const felem in2) | ||
119 | -{ | ||
120 | - widefelem tmp; | ||
121 | - | ||
122 | - felem_mul(tmp, in1, in2); | ||
123 | - felem_reduce(out, tmp); | ||
124 | -} | ||
125 | +# define felem_square_reduce felem_square_reduce_ref | ||
126 | +# define felem_mul_reduce felem_mul_reduce_ref | ||
127 | #endif | ||
128 | |||
129 | /*- | ||
diff --git a/meta/recipes-connectivity/openssl/openssl_3.2.4.bb b/meta/recipes-connectivity/openssl/openssl_3.2.4.bb index d6bf32d989..fd98b32007 100644 --- a/meta/recipes-connectivity/openssl/openssl_3.2.4.bb +++ b/meta/recipes-connectivity/openssl/openssl_3.2.4.bb | |||
@@ -13,6 +13,8 @@ SRC_URI = "https://github.com/openssl/openssl/releases/download/openssl-${PV}/op | |||
13 | file://0001-Configure-do-not-tweak-mips-cflags.patch \ | 13 | file://0001-Configure-do-not-tweak-mips-cflags.patch \ |
14 | file://0001-Added-handshake-history-reporting-when-test-fails.patch \ | 14 | file://0001-Added-handshake-history-reporting-when-test-fails.patch \ |
15 | file://CVE-2024-41996.patch \ | 15 | file://CVE-2024-41996.patch \ |
16 | file://CVE-2025-27587-1.patch \ | ||
17 | file://CVE-2025-27587-2.patch \ | ||
16 | " | 18 | " |
17 | 19 | ||
18 | SRC_URI:append:class-nativesdk = " \ | 20 | SRC_URI:append:class-nativesdk = " \ |