summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--meta/recipes-connectivity/openssl/openssl/CVE-2025-27587-1.patch1918
-rw-r--r--meta/recipes-connectivity/openssl/openssl/CVE-2025-27587-2.patch129
-rw-r--r--meta/recipes-connectivity/openssl/openssl_3.2.4.bb2
3 files changed, 2049 insertions, 0 deletions
diff --git a/meta/recipes-connectivity/openssl/openssl/CVE-2025-27587-1.patch b/meta/recipes-connectivity/openssl/openssl/CVE-2025-27587-1.patch
new file mode 100644
index 0000000000..eb3fc52dca
--- /dev/null
+++ b/meta/recipes-connectivity/openssl/openssl/CVE-2025-27587-1.patch
@@ -0,0 +1,1918 @@
1From 14ac0f0e4e1f36793d09b41ffd5e482575289ab2 Mon Sep 17 00:00:00 2001
2From: Danny Tsen <dtsen@us.ibm.com>
3Date: Tue, 11 Feb 2025 13:48:01 -0500
4Subject: [PATCH] Fix Minerva timing side-channel signal for P-384 curve on PPC
5
61. bn_ppc.c: Used bn_mul_mont_int() instead of bn_mul_mont_300_fixed_n6()
7 for Montgomery multiplication.
82. ecp_nistp384-ppc64.pl:
9 - Re-wrote p384_felem_mul and p384_felem_square for easier maintenance with
10 minumum perl wrapper.
11 - Implemented p384_felem_reduce, p384_felem_mul_reduce and p384_felem_square_reduce.
12 - Implemented p384_felem_diff64, felem_diff_128_64 and felem_diff128 in assembly.
133. ecp_nistp384.c:
14 - Added wrapper function for p384_felem_mul_reduce and p384_felem_square_reduce.
15
16Signed-off-by: Danny Tsen <dtsen@us.ibm.com>
17
18Reviewed-by: Dmitry Belyavskiy <beldmit@gmail.com>
19Reviewed-by: Tomas Mraz <tomas@openssl.org>
20(Merged from https://github.com/openssl/openssl/pull/26709)
21
22(cherry picked from commit 85cabd94958303859b1551364a609d4ff40b67a5)
23
24CVE: CVE-2025-27587
25Upstream-Status: Backport [https://github.com/openssl/openssl/commit/14ac0f0e4e1f36793d09b41ffd5e482575289ab2]
26Signed-off-by: Peter Marko <peter.marko@siemens.com>
27---
28 crypto/bn/bn_ppc.c | 3 +
29 crypto/ec/asm/ecp_nistp384-ppc64.pl | 1724 +++++++++++++++++++++++----
30 crypto/ec/ecp_nistp384.c | 28 +-
31 3 files changed, 1504 insertions(+), 251 deletions(-)
32
33diff --git a/crypto/bn/bn_ppc.c b/crypto/bn/bn_ppc.c
34index 1e9421bee2..29293bad55 100644
35--- a/crypto/bn/bn_ppc.c
36+++ b/crypto/bn/bn_ppc.c
37@@ -41,12 +41,15 @@ int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
38 */
39
40 #if defined(_ARCH_PPC64) && !defined(__ILP32__)
41+ /* Minerva side-channel fix danny */
42+# if defined(USE_FIXED_N6)
43 if (num == 6) {
44 if (OPENSSL_ppccap_P & PPC_MADD300)
45 return bn_mul_mont_300_fixed_n6(rp, ap, bp, np, n0, num);
46 else
47 return bn_mul_mont_fixed_n6(rp, ap, bp, np, n0, num);
48 }
49+# endif
50 #endif
51
52 return bn_mul_mont_int(rp, ap, bp, np, n0, num);
53diff --git a/crypto/ec/asm/ecp_nistp384-ppc64.pl b/crypto/ec/asm/ecp_nistp384-ppc64.pl
54index 28f4168e52..b663bddfc6 100755
55--- a/crypto/ec/asm/ecp_nistp384-ppc64.pl
56+++ b/crypto/ec/asm/ecp_nistp384-ppc64.pl
57@@ -7,13 +7,15 @@
58 # https://www.openssl.org/source/license.html
59 #
60 # ====================================================================
61-# Written by Rohan McLure <rmclure@linux.ibm.com> for the OpenSSL
62-# project.
63+# Written by Danny Tsen <dtsen@us.ibm.com> # for the OpenSSL project.
64+#
65+# Copyright 2025- IBM Corp.
66 # ====================================================================
67 #
68-# p384 lower-level primitives for PPC64 using vector instructions.
69+# p384 lower-level primitives for PPC64.
70 #
71
72+
73 use strict;
74 use warnings;
75
76@@ -21,7 +23,7 @@ my $flavour = shift;
77 my $output = "";
78 while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {}
79 if (!$output) {
80- $output = "-";
81+ $output = "-";
82 }
83
84 my ($xlate, $dir);
85@@ -35,271 +37,1495 @@ open OUT,"| \"$^X\" $xlate $flavour $output";
86
87 my $code = "";
88
89-my ($sp, $outp, $savelr, $savesp) = ("r1", "r3", "r10", "r12");
90-
91-my $vzero = "v32";
92-
93-sub startproc($)
94-{
95- my ($name) = @_;
96-
97- $code.=<<___;
98- .globl ${name}
99- .align 5
100-${name}:
101-
102-___
103-}
104-
105-sub endproc($)
106-{
107- my ($name) = @_;
108-
109- $code.=<<___;
110- blr
111- .size ${name},.-${name}
112-
113-___
114-}
115-
116-sub load_vrs($$)
117-{
118- my ($pointer, $reg_list) = @_;
119-
120- for (my $i = 0; $i <= 6; $i++) {
121- my $offset = $i * 8;
122- $code.=<<___;
123- lxsd $reg_list->[$i],$offset($pointer)
124-___
125- }
126-
127- $code.=<<___;
128-
129-___
130-}
131-
132-sub store_vrs($$)
133-{
134- my ($pointer, $reg_list) = @_;
135-
136- for (my $i = 0; $i <= 12; $i++) {
137- my $offset = $i * 16;
138- $code.=<<___;
139- stxv $reg_list->[$i],$offset($pointer)
140-___
141- }
142-
143- $code.=<<___;
144-
145-___
146-}
147-
148 $code.=<<___;
149-.machine "any"
150+.machine "any"
151 .text
152
153-___
154+.globl p384_felem_mul
155+.type p384_felem_mul,\@function
156+.align 4
157+p384_felem_mul:
158
159-{
160- # mul/square common
161- my ($t1, $t2, $t3, $t4) = ("v33", "v34", "v42", "v43");
162- my ($zero, $one) = ("r8", "r9");
163- my $out = "v51";
164+ stdu 1, -176(1)
165+ mflr 0
166+ std 14, 56(1)
167+ std 15, 64(1)
168+ std 16, 72(1)
169+ std 17, 80(1)
170+ std 18, 88(1)
171+ std 19, 96(1)
172+ std 20, 104(1)
173+ std 21, 112(1)
174+ std 22, 120(1)
175
176- {
177- #
178- # p384_felem_mul
179- #
180+ bl _p384_felem_mul_core
181
182- my ($in1p, $in2p) = ("r4", "r5");
183- my @in1 = map("v$_",(44..50));
184- my @in2 = map("v$_",(35..41));
185+ mtlr 0
186+ ld 14, 56(1)
187+ ld 15, 64(1)
188+ ld 16, 72(1)
189+ ld 17, 80(1)
190+ ld 18, 88(1)
191+ ld 19, 96(1)
192+ ld 20, 104(1)
193+ ld 21, 112(1)
194+ ld 22, 120(1)
195+ addi 1, 1, 176
196+ blr
197+.size p384_felem_mul,.-p384_felem_mul
198
199- startproc("p384_felem_mul");
200+.globl p384_felem_square
201+.type p384_felem_square,\@function
202+.align 4
203+p384_felem_square:
204
205- $code.=<<___;
206- vspltisw $vzero,0
207+ stdu 1, -176(1)
208+ mflr 0
209+ std 14, 56(1)
210+ std 15, 64(1)
211+ std 16, 72(1)
212+ std 17, 80(1)
213
214-___
215+ bl _p384_felem_square_core
216
217- load_vrs($in1p, \@in1);
218- load_vrs($in2p, \@in2);
219-
220- $code.=<<___;
221- vmsumudm $out,$in1[0],$in2[0],$vzero
222- stxv $out,0($outp)
223-
224- xxpermdi $t1,$in1[0],$in1[1],0b00
225- xxpermdi $t2,$in2[1],$in2[0],0b00
226- vmsumudm $out,$t1,$t2,$vzero
227- stxv $out,16($outp)
228-
229- xxpermdi $t2,$in2[2],$in2[1],0b00
230- vmsumudm $out,$t1,$t2,$vzero
231- vmsumudm $out,$in1[2],$in2[0],$out
232- stxv $out,32($outp)
233-
234- xxpermdi $t2,$in2[1],$in2[0],0b00
235- xxpermdi $t3,$in1[2],$in1[3],0b00
236- xxpermdi $t4,$in2[3],$in2[2],0b00
237- vmsumudm $out,$t1,$t4,$vzero
238- vmsumudm $out,$t3,$t2,$out
239- stxv $out,48($outp)
240-
241- xxpermdi $t2,$in2[4],$in2[3],0b00
242- xxpermdi $t4,$in2[2],$in2[1],0b00
243- vmsumudm $out,$t1,$t2,$vzero
244- vmsumudm $out,$t3,$t4,$out
245- vmsumudm $out,$in1[4],$in2[0],$out
246- stxv $out,64($outp)
247-
248- xxpermdi $t2,$in2[5],$in2[4],0b00
249- xxpermdi $t4,$in2[3],$in2[2],0b00
250- vmsumudm $out,$t1,$t2,$vzero
251- vmsumudm $out,$t3,$t4,$out
252- xxpermdi $t4,$in2[1],$in2[0],0b00
253- xxpermdi $t1,$in1[4],$in1[5],0b00
254- vmsumudm $out,$t1,$t4,$out
255- stxv $out,80($outp)
256-
257- xxpermdi $t1,$in1[0],$in1[1],0b00
258- xxpermdi $t2,$in2[6],$in2[5],0b00
259- xxpermdi $t4,$in2[4],$in2[3],0b00
260- vmsumudm $out,$t1,$t2,$vzero
261- vmsumudm $out,$t3,$t4,$out
262- xxpermdi $t2,$in2[2],$in2[1],0b00
263- xxpermdi $t1,$in1[4],$in1[5],0b00
264- vmsumudm $out,$t1,$t2,$out
265- vmsumudm $out,$in1[6],$in2[0],$out
266- stxv $out,96($outp)
267-
268- xxpermdi $t1,$in1[1],$in1[2],0b00
269- xxpermdi $t2,$in2[6],$in2[5],0b00
270- xxpermdi $t3,$in1[3],$in1[4],0b00
271- vmsumudm $out,$t1,$t2,$vzero
272- vmsumudm $out,$t3,$t4,$out
273- xxpermdi $t3,$in2[2],$in2[1],0b00
274- xxpermdi $t1,$in1[5],$in1[6],0b00
275- vmsumudm $out,$t1,$t3,$out
276- stxv $out,112($outp)
277-
278- xxpermdi $t1,$in1[2],$in1[3],0b00
279- xxpermdi $t3,$in1[4],$in1[5],0b00
280- vmsumudm $out,$t1,$t2,$vzero
281- vmsumudm $out,$t3,$t4,$out
282- vmsumudm $out,$in1[6],$in2[2],$out
283- stxv $out,128($outp)
284-
285- xxpermdi $t1,$in1[3],$in1[4],0b00
286- vmsumudm $out,$t1,$t2,$vzero
287- xxpermdi $t1,$in1[5],$in1[6],0b00
288- vmsumudm $out,$t1,$t4,$out
289- stxv $out,144($outp)
290-
291- vmsumudm $out,$t3,$t2,$vzero
292- vmsumudm $out,$in1[6],$in2[4],$out
293- stxv $out,160($outp)
294-
295- vmsumudm $out,$t1,$t2,$vzero
296- stxv $out,176($outp)
297-
298- vmsumudm $out,$in1[6],$in2[6],$vzero
299- stxv $out,192($outp)
300-___
301+ mtlr 0
302+ ld 14, 56(1)
303+ ld 15, 64(1)
304+ ld 16, 72(1)
305+ ld 17, 80(1)
306+ addi 1, 1, 176
307+ blr
308+.size p384_felem_square,.-p384_felem_square
309
310- endproc("p384_felem_mul");
311- }
312+#
313+# Felem mul core function -
314+# r3, r4 and r5 need to pre-loaded.
315+#
316+.type _p384_felem_mul_core,\@function
317+.align 4
318+_p384_felem_mul_core:
319
320- {
321- #
322- # p384_felem_square
323- #
324+ ld 6,0(4)
325+ ld 14,0(5)
326+ ld 7,8(4)
327+ ld 15,8(5)
328+ ld 8,16(4)
329+ ld 16,16(5)
330+ ld 9,24(4)
331+ ld 17,24(5)
332+ ld 10,32(4)
333+ ld 18,32(5)
334+ ld 11,40(4)
335+ ld 19,40(5)
336+ ld 12,48(4)
337+ ld 20,48(5)
338
339- my ($inp) = ("r4");
340- my @in = map("v$_",(44..50));
341- my @inx2 = map("v$_",(35..41));
342+ # out0
343+ mulld 21, 14, 6
344+ mulhdu 22, 14, 6
345+ std 21, 0(3)
346+ std 22, 8(3)
347
348- startproc("p384_felem_square");
349+ vxor 0, 0, 0
350
351- $code.=<<___;
352- vspltisw $vzero,0
353+ # out1
354+ mtvsrdd 32+13, 14, 6
355+ mtvsrdd 32+14, 7, 15
356+ vmsumudm 1, 13, 14, 0
357
358-___
359+ # out2
360+ mtvsrdd 32+15, 15, 6
361+ mtvsrdd 32+16, 7, 16
362+ mtvsrdd 32+17, 0, 8
363+ mtvsrdd 32+18, 0, 14
364+ vmsumudm 19, 15, 16, 0
365+ vmsumudm 2, 17, 18, 19
366
367- load_vrs($inp, \@in);
368+ # out3
369+ mtvsrdd 32+13, 16, 6
370+ mtvsrdd 32+14, 7, 17
371+ mtvsrdd 32+15, 14, 8
372+ mtvsrdd 32+16, 9, 15
373+ vmsumudm 19, 13, 14, 0
374+ vmsumudm 3, 15, 16, 19
375
376- $code.=<<___;
377- li $zero,0
378- li $one,1
379- mtvsrdd $t1,$one,$zero
380-___
381+ # out4
382+ mtvsrdd 32+13, 17, 6
383+ mtvsrdd 32+14, 7, 18
384+ mtvsrdd 32+15, 15, 8
385+ mtvsrdd 32+16, 9, 16
386+ mtvsrdd 32+17, 0, 10
387+ mtvsrdd 32+18, 0, 14
388+ vmsumudm 19, 13, 14, 0
389+ vmsumudm 4, 15, 16, 19
390+ vmsumudm 4, 17, 18, 4
391
392- for (my $i = 0; $i <= 6; $i++) {
393- $code.=<<___;
394- vsld $inx2[$i],$in[$i],$t1
395-___
396- }
397-
398- $code.=<<___;
399- vmsumudm $out,$in[0],$in[0],$vzero
400- stxv $out,0($outp)
401-
402- vmsumudm $out,$in[0],$inx2[1],$vzero
403- stxv $out,16($outp)
404-
405- vmsumudm $out,$in[0],$inx2[2],$vzero
406- vmsumudm $out,$in[1],$in[1],$out
407- stxv $out,32($outp)
408-
409- xxpermdi $t1,$in[0],$in[1],0b00
410- xxpermdi $t2,$inx2[3],$inx2[2],0b00
411- vmsumudm $out,$t1,$t2,$vzero
412- stxv $out,48($outp)
413-
414- xxpermdi $t4,$inx2[4],$inx2[3],0b00
415- vmsumudm $out,$t1,$t4,$vzero
416- vmsumudm $out,$in[2],$in[2],$out
417- stxv $out,64($outp)
418-
419- xxpermdi $t2,$inx2[5],$inx2[4],0b00
420- vmsumudm $out,$t1,$t2,$vzero
421- vmsumudm $out,$in[2],$inx2[3],$out
422- stxv $out,80($outp)
423-
424- xxpermdi $t2,$inx2[6],$inx2[5],0b00
425- vmsumudm $out,$t1,$t2,$vzero
426- vmsumudm $out,$in[2],$inx2[4],$out
427- vmsumudm $out,$in[3],$in[3],$out
428- stxv $out,96($outp)
429-
430- xxpermdi $t3,$in[1],$in[2],0b00
431- vmsumudm $out,$t3,$t2,$vzero
432- vmsumudm $out,$in[3],$inx2[4],$out
433- stxv $out,112($outp)
434-
435- xxpermdi $t1,$in[2],$in[3],0b00
436- vmsumudm $out,$t1,$t2,$vzero
437- vmsumudm $out,$in[4],$in[4],$out
438- stxv $out,128($outp)
439-
440- xxpermdi $t1,$in[3],$in[4],0b00
441- vmsumudm $out,$t1,$t2,$vzero
442- stxv $out,144($outp)
443-
444- vmsumudm $out,$in[4],$inx2[6],$vzero
445- vmsumudm $out,$in[5],$in[5],$out
446- stxv $out,160($outp)
447-
448- vmsumudm $out,$in[5],$inx2[6],$vzero
449- stxv $out,176($outp)
450-
451- vmsumudm $out,$in[6],$in[6],$vzero
452- stxv $out,192($outp)
453-___
454+ # out5
455+ mtvsrdd 32+13, 18, 6
456+ mtvsrdd 32+14, 7, 19
457+ mtvsrdd 32+15, 16, 8
458+ mtvsrdd 32+16, 9, 17
459+ mtvsrdd 32+17, 14, 10
460+ mtvsrdd 32+18, 11, 15
461+ vmsumudm 19, 13, 14, 0
462+ vmsumudm 5, 15, 16, 19
463+ vmsumudm 5, 17, 18, 5
464+
465+ stxv 32+1, 16(3)
466+ stxv 32+2, 32(3)
467+ stxv 32+3, 48(3)
468+ stxv 32+4, 64(3)
469+ stxv 32+5, 80(3)
470+
471+ # out6
472+ mtvsrdd 32+13, 19, 6
473+ mtvsrdd 32+14, 7, 20
474+ mtvsrdd 32+15, 17, 8
475+ mtvsrdd 32+16, 9, 18
476+ mtvsrdd 32+17, 15, 10
477+ mtvsrdd 32+18, 11, 16
478+ vmsumudm 19, 13, 14, 0
479+ vmsumudm 6, 15, 16, 19
480+ mtvsrdd 32+13, 0, 12
481+ mtvsrdd 32+14, 0, 14
482+ vmsumudm 19, 17, 18, 6
483+ vmsumudm 6, 13, 14, 19
484+
485+ # out7
486+ mtvsrdd 32+13, 19, 7
487+ mtvsrdd 32+14, 8, 20
488+ mtvsrdd 32+15, 17, 9
489+ mtvsrdd 32+16, 10, 18
490+ mtvsrdd 32+17, 15, 11
491+ mtvsrdd 32+18, 12, 16
492+ vmsumudm 19, 13, 14, 0
493+ vmsumudm 7, 15, 16, 19
494+ vmsumudm 7, 17, 18, 7
495+
496+ # out8
497+ mtvsrdd 32+13, 19, 8
498+ mtvsrdd 32+14, 9, 20
499+ mtvsrdd 32+15, 17, 10
500+ mtvsrdd 32+16, 11, 18
501+ mtvsrdd 32+17, 0, 12
502+ mtvsrdd 32+18, 0, 16
503+ vmsumudm 19, 13, 14, 0
504+ vmsumudm 8, 15, 16, 19
505+ vmsumudm 8, 17, 18, 8
506+
507+ # out9
508+ mtvsrdd 32+13, 19, 9
509+ mtvsrdd 32+14, 10, 20
510+ mtvsrdd 32+15, 17, 11
511+ mtvsrdd 32+16, 12, 18
512+ vmsumudm 19, 13, 14, 0
513+ vmsumudm 9, 15, 16, 19
514+
515+ # out10
516+ mtvsrdd 32+13, 19, 10
517+ mtvsrdd 32+14, 11, 20
518+ mtvsrdd 32+15, 0, 12
519+ mtvsrdd 32+16, 0, 18
520+ vmsumudm 19, 13, 14, 0
521+ vmsumudm 10, 15, 16, 19
522+
523+ # out11
524+ mtvsrdd 32+17, 19, 11
525+ mtvsrdd 32+18, 12, 20
526+ vmsumudm 11, 17, 18, 0
527+
528+ stxv 32+6, 96(3)
529+ stxv 32+7, 112(3)
530+ stxv 32+8, 128(3)
531+ stxv 32+9, 144(3)
532+ stxv 32+10, 160(3)
533+ stxv 32+11, 176(3)
534+
535+ # out12
536+ mulld 21, 20, 12
537+ mulhdu 22, 20, 12 # out12
538+
539+ std 21, 192(3)
540+ std 22, 200(3)
541+
542+ blr
543+.size _p384_felem_mul_core,.-_p384_felem_mul_core
544+
545+#
546+# Felem square core function -
547+# r3 and r4 need to pre-loaded.
548+#
549+.type _p384_felem_square_core,\@function
550+.align 4
551+_p384_felem_square_core:
552+
553+ ld 6, 0(4)
554+ ld 7, 8(4)
555+ ld 8, 16(4)
556+ ld 9, 24(4)
557+ ld 10, 32(4)
558+ ld 11, 40(4)
559+ ld 12, 48(4)
560+
561+ vxor 0, 0, 0
562+
563+ # out0
564+ mulld 14, 6, 6
565+ mulhdu 15, 6, 6
566+ std 14, 0(3)
567+ std 15, 8(3)
568+
569+ # out1
570+ add 14, 6, 6
571+ mtvsrdd 32+13, 0, 14
572+ mtvsrdd 32+14, 0, 7
573+ vmsumudm 1, 13, 14, 0
574+
575+ # out2
576+ mtvsrdd 32+15, 7, 14
577+ mtvsrdd 32+16, 7, 8
578+ vmsumudm 2, 15, 16, 0
579+
580+ # out3
581+ add 15, 7, 7
582+ mtvsrdd 32+13, 8, 14
583+ mtvsrdd 32+14, 15, 9
584+ vmsumudm 3, 13, 14, 0
585+
586+ # out4
587+ mtvsrdd 32+13, 9, 14
588+ mtvsrdd 32+14, 15, 10
589+ mtvsrdd 32+15, 0, 8
590+ vmsumudm 4, 13, 14, 0
591+ vmsumudm 4, 15, 15, 4
592+
593+ # out5
594+ mtvsrdd 32+13, 10, 14
595+ mtvsrdd 32+14, 15, 11
596+ add 16, 8, 8
597+ mtvsrdd 32+15, 0, 16
598+ mtvsrdd 32+16, 0, 9
599+ vmsumudm 5, 13, 14, 0
600+ vmsumudm 5, 15, 16, 5
601+
602+ stxv 32+1, 16(3)
603+ stxv 32+2, 32(3)
604+ stxv 32+3, 48(3)
605+ stxv 32+4, 64(3)
606+
607+ # out6
608+ mtvsrdd 32+13, 11, 14
609+ mtvsrdd 32+14, 15, 12
610+ mtvsrdd 32+15, 9, 16
611+ mtvsrdd 32+16, 9, 10
612+ stxv 32+5, 80(3)
613+ vmsumudm 19, 13, 14, 0
614+ vmsumudm 6, 15, 16, 19
615+
616+ # out7
617+ add 17, 9, 9
618+ mtvsrdd 32+13, 11, 15
619+ mtvsrdd 32+14, 16, 12
620+ mtvsrdd 32+15, 0, 17
621+ mtvsrdd 32+16, 0, 10
622+ vmsumudm 19, 13, 14, 0
623+ vmsumudm 7, 15, 16, 19
624+
625+ # out8
626+ mtvsrdd 32+13, 11, 16
627+ mtvsrdd 32+14, 17, 12
628+ mtvsrdd 32+15, 0, 10
629+ vmsumudm 19, 13, 14, 0
630+ vmsumudm 8, 15, 15, 19
631+
632+ # out9
633+ add 14, 10, 10
634+ mtvsrdd 32+13, 11, 17
635+ mtvsrdd 32+14, 14, 12
636+ vmsumudm 9, 13, 14, 0
637+
638+ # out10
639+ mtvsrdd 32+13, 11, 14
640+ mtvsrdd 32+14, 11, 12
641+ vmsumudm 10, 13, 14, 0
642+
643+ stxv 32+6, 96(3)
644+ stxv 32+7, 112(3)
645+
646+ # out11
647+ #add 14, 11, 11
648+ #mtvsrdd 32+13, 0, 14
649+ #mtvsrdd 32+14, 0, 12
650+ #vmsumudm 11, 13, 14, 0
651+
652+ mulld 6, 12, 11
653+ mulhdu 7, 12, 11
654+ addc 8, 6, 6
655+ adde 9, 7, 7
656+
657+ stxv 32+8, 128(3)
658+ stxv 32+9, 144(3)
659+ stxv 32+10, 160(3)
660+ #stxv 32+11, 176(3)
661+
662+ # out12
663+ mulld 14, 12, 12
664+ mulhdu 15, 12, 12
665+
666+ std 8, 176(3)
667+ std 9, 184(3)
668+ std 14, 192(3)
669+ std 15, 200(3)
670+
671+ blr
672+.size _p384_felem_square_core,.-_p384_felem_square_core
673+
674+#
675+# widefelem (128 bits) * 8
676+#
677+.macro F128_X_8 _off1 _off2
678+ ld 9,\\_off1(3)
679+ ld 8,\\_off2(3)
680+ srdi 10,9,61
681+ rldimi 10,8,3,0
682+ sldi 9,9,3
683+ std 9,\\_off1(3)
684+ std 10,\\_off2(3)
685+.endm
686+
687+.globl p384_felem128_mul_by_8
688+.type p384_felem128_mul_by_8, \@function
689+.align 4
690+p384_felem128_mul_by_8:
691+
692+ F128_X_8 0, 8
693+
694+ F128_X_8 16, 24
695+
696+ F128_X_8 32, 40
697+
698+ F128_X_8 48, 56
699+
700+ F128_X_8 64, 72
701+
702+ F128_X_8 80, 88
703+
704+ F128_X_8 96, 104
705+
706+ F128_X_8 112, 120
707+
708+ F128_X_8 128, 136
709+
710+ F128_X_8 144, 152
711+
712+ F128_X_8 160, 168
713+
714+ F128_X_8 176, 184
715+
716+ F128_X_8 192, 200
717+
718+ blr
719+.size p384_felem128_mul_by_8,.-p384_felem128_mul_by_8
720+
721+#
722+# widefelem (128 bits) * 2
723+#
724+.macro F128_X_2 _off1 _off2
725+ ld 9,\\_off1(3)
726+ ld 8,\\_off2(3)
727+ srdi 10,9,63
728+ rldimi 10,8,1,0
729+ sldi 9,9,1
730+ std 9,\\_off1(3)
731+ std 10,\\_off2(3)
732+.endm
733+
734+.globl p384_felem128_mul_by_2
735+.type p384_felem128_mul_by_2, \@function
736+.align 4
737+p384_felem128_mul_by_2:
738+
739+ F128_X_2 0, 8
740+
741+ F128_X_2 16, 24
742+
743+ F128_X_2 32, 40
744+
745+ F128_X_2 48, 56
746+
747+ F128_X_2 64, 72
748+
749+ F128_X_2 80, 88
750+
751+ F128_X_2 96, 104
752+
753+ F128_X_2 112, 120
754+
755+ F128_X_2 128, 136
756+
757+ F128_X_2 144, 152
758+
759+ F128_X_2 160, 168
760+
761+ F128_X_2 176, 184
762+
763+ F128_X_2 192, 200
764+
765+ blr
766+.size p384_felem128_mul_by_2,.-p384_felem128_mul_by_2
767+
768+.globl p384_felem_diff128
769+.type p384_felem_diff128, \@function
770+.align 4
771+p384_felem_diff128:
772+
773+ addis 5, 2, .LConst_two127\@toc\@ha
774+ addi 5, 5, .LConst_two127\@toc\@l
775+
776+ ld 10, 0(3)
777+ ld 8, 8(3)
778+ li 9, 0
779+ addc 10, 10, 9
780+ li 7, -1
781+ rldicr 7, 7, 0, 0 # two127
782+ adde 8, 8, 7
783+ ld 11, 0(4)
784+ ld 12, 8(4)
785+ subfc 11, 11, 10
786+ subfe 12, 12, 8
787+ std 11, 0(3) # out0
788+ std 12, 8(3)
789+
790+ # two127m71 = (r10, r9)
791+ ld 8, 16(3)
792+ ld 7, 24(3)
793+ ld 10, 24(5) # two127m71
794+ addc 8, 8, 9
795+ adde 7, 7, 10
796+ ld 11, 16(4)
797+ ld 12, 24(4)
798+ subfc 11, 11, 8
799+ subfe 12, 12, 7
800+ std 11, 16(3) # out1
801+ std 12, 24(3)
802+
803+ ld 8, 32(3)
804+ ld 7, 40(3)
805+ addc 8, 8, 9
806+ adde 7, 7, 10
807+ ld 11, 32(4)
808+ ld 12, 40(4)
809+ subfc 11, 11, 8
810+ subfe 12, 12, 7
811+ std 11, 32(3) # out2
812+ std 12, 40(3)
813+
814+ ld 8, 48(3)
815+ ld 7, 56(3)
816+ addc 8, 8, 9
817+ adde 7, 7, 10
818+ ld 11, 48(4)
819+ ld 12, 56(4)
820+ subfc 11, 11, 8
821+ subfe 12, 12, 7
822+ std 11, 48(3) # out3
823+ std 12, 56(3)
824+
825+ ld 8, 64(3)
826+ ld 7, 72(3)
827+ addc 8, 8, 9
828+ adde 7, 7, 10
829+ ld 11, 64(4)
830+ ld 12, 72(4)
831+ subfc 11, 11, 8
832+ subfe 12, 12, 7
833+ std 11, 64(3) # out4
834+ std 12, 72(3)
835+
836+ ld 8, 80(3)
837+ ld 7, 88(3)
838+ addc 8, 8, 9
839+ adde 7, 7, 10
840+ ld 11, 80(4)
841+ ld 12, 88(4)
842+ subfc 11, 11, 8
843+ subfe 12, 12, 7
844+ std 11, 80(3) # out5
845+ std 12, 88(3)
846+
847+ ld 8, 96(3)
848+ ld 7, 104(3)
849+ ld 6, 40(5) # two127p111m79m71
850+ addc 8, 8, 9
851+ adde 7, 7, 6
852+ ld 11, 96(4)
853+ ld 12, 104(4)
854+ subfc 11, 11, 8
855+ subfe 12, 12, 7
856+ std 11, 96(3) # out6
857+ std 12, 104(3)
858+
859+ ld 8, 112(3)
860+ ld 7, 120(3)
861+ ld 6, 56(5) # two127m119m71
862+ addc 8, 8, 9
863+ adde 7, 7, 6
864+ ld 11, 112(4)
865+ ld 12, 120(4)
866+ subfc 11, 11, 8
867+ subfe 12, 12, 7
868+ std 11, 112(3) # out7
869+ std 12, 120(3)
870+
871+ ld 8, 128(3)
872+ ld 7, 136(3)
873+ ld 6, 72(5) # two127m95m71
874+ addc 8, 8, 9
875+ adde 7, 7, 6
876+ ld 11, 128(4)
877+ ld 12, 136(4)
878+ subfc 11, 11, 8
879+ subfe 12, 12, 7
880+ std 11, 128(3) # out8
881+ std 12, 136(3)
882+
883+ ld 8, 144(3)
884+ ld 7, 152(3)
885+ addc 8, 8, 9
886+ adde 7, 7, 10
887+ ld 11, 144(4)
888+ ld 12, 152(4)
889+ subfc 11, 11, 8
890+ subfe 12, 12, 7
891+ std 11, 144(3) # out9
892+ std 12, 152(3)
893+
894+ ld 8, 160(3)
895+ ld 7, 168(3)
896+ addc 8, 8, 9
897+ adde 7, 7, 10
898+ ld 11, 160(4)
899+ ld 12, 168(4)
900+ subfc 11, 11, 8
901+ subfe 12, 12, 7
902+ std 11, 160(3) # out10
903+ std 12, 168(3)
904+
905+ ld 8, 176(3)
906+ ld 7, 184(3)
907+ addc 8, 8, 9
908+ adde 7, 7, 10
909+ ld 11, 176(4)
910+ ld 12, 184(4)
911+ subfc 11, 11, 8
912+ subfe 12, 12, 7
913+ std 11, 176(3) # out11
914+ std 12, 184(3)
915+
916+ ld 8, 192(3)
917+ ld 7, 200(3)
918+ addc 8, 8, 9
919+ adde 7, 7, 10
920+ ld 11, 192(4)
921+ ld 12, 200(4)
922+ subfc 11, 11, 8
923+ subfe 12, 12, 7
924+ std 11, 192(3) # out12
925+ std 12, 200(3)
926+
927+ blr
928+.size p384_felem_diff128,.-p384_felem_diff128
929+
930+.data
931+.align 4
932+.LConst_two127:
933+#two127
934+.long 0x00000000, 0x00000000, 0x00000000, 0x80000000
935+#two127m71
936+.long 0x00000000, 0x00000000, 0xffffff80, 0x7fffffff
937+#two127p111m79m71
938+.long 0x00000000, 0x00000000, 0xffff7f80, 0x80007fff
939+#two127m119m71
940+.long 0x00000000, 0x00000000, 0xffffff80, 0x7f7fffff
941+#two127m95m71
942+.long 0x00000000, 0x00000000, 0x7fffff80, 0x7fffffff
943+
944+.text
945+
946+.globl p384_felem_diff_128_64
947+.type p384_felem_diff_128_64, \@function
948+.align 4
949+p384_felem_diff_128_64:
950+ addis 5, 2, .LConst_128_two64\@toc\@ha
951+ addi 5, 5, .LConst_128_two64\@toc\@l
952+
953+ ld 9, 0(3)
954+ ld 10, 8(3)
955+ ld 8, 48(5) # two64p48m16
956+ li 7, 0
957+ addc 9, 9, 8
958+ li 6, 1
959+ adde 10, 10, 6
960+ ld 11, 0(4)
961+ subfc 8, 11, 9
962+ subfe 12, 7, 10
963+ std 8, 0(3) # out0
964+ std 12, 8(3)
965+
966+ ld 9, 16(3)
967+ ld 10, 24(3)
968+ ld 8, 0(5) # two64m56m8
969+ addc 9, 9, 8
970+ addze 10, 10
971+ ld 11, 8(4)
972+ subfc 11, 11, 9
973+ subfe 12, 7, 10
974+ std 11, 16(3) # out1
975+ std 12, 24(3)
976+
977+ ld 9, 32(3)
978+ ld 10, 40(3)
979+ ld 8, 16(5) # two64m32m8
980+ addc 9, 9, 8
981+ addze 10, 10
982+ ld 11, 16(4)
983+ subfc 11, 11, 9
984+ subfe 12, 7, 10
985+ std 11, 32(3) # out2
986+ std 12, 40(3)
987+
988+ ld 10, 48(3)
989+ ld 8, 56(3)
990+ #ld 9, 32(5) # two64m8
991+ li 9, -256 # two64m8
992+ addc 10, 10, 9
993+ addze 8, 8
994+ ld 11, 24(4)
995+ subfc 11, 11, 10
996+ subfe 12, 7, 8
997+ std 11, 48(3) # out3
998+ std 12, 56(3)
999+
1000+ ld 10, 64(3)
1001+ ld 8, 72(3)
1002+ addc 10, 10, 9
1003+ addze 8, 8
1004+ ld 11, 32(4)
1005+ subfc 11, 11, 10
1006+ subfe 12, 7, 8
1007+ std 11, 64(3) # out4
1008+ std 12, 72(3)
1009+
1010+ ld 10, 80(3)
1011+ ld 8, 88(3)
1012+ addc 10, 10, 9
1013+ addze 8, 8
1014+ ld 11, 40(4)
1015+ subfc 11, 11, 10
1016+ subfe 12, 7, 8
1017+ std 11, 80(3) # out5
1018+ std 12, 88(3)
1019+
1020+ ld 10, 96(3)
1021+ ld 8, 104(3)
1022+ addc 10, 10, 9
1023+ addze 9, 8
1024+ ld 11, 48(4)
1025+ subfc 11, 11, 10
1026+ subfe 12, 7, 9
1027+ std 11, 96(3) # out6
1028+ std 12, 104(3)
1029+
1030+ blr
1031+.size p384_felem_diff_128_64,.-p384_felem_diff_128_64
1032+
1033+.data
1034+.align 4
1035+.LConst_128_two64:
1036+#two64m56m8
1037+.long 0xffffff00, 0xfeffffff, 0x00000000, 0x00000000
1038+#two64m32m8
1039+.long 0xffffff00, 0xfffffffe, 0x00000000, 0x00000000
1040+#two64m8
1041+.long 0xffffff00, 0xffffffff, 0x00000000, 0x00000000
1042+#two64p48m16
1043+.long 0xffff0000, 0x0000ffff, 0x00000001, 0x00000000
1044+
1045+.LConst_two60:
1046+#two60m52m4
1047+.long 0xfffffff0, 0x0fefffff, 0x0, 0x0
1048+#two60p44m12
1049+.long 0xfffff000, 0x10000fff, 0x0, 0x0
1050+#two60m28m4
1051+.long 0xeffffff0, 0x0fffffff, 0x0, 0x0
1052+#two60m4
1053+.long 0xfffffff0, 0x0fffffff, 0x0, 0x0
1054+
1055+.text
1056+#
1057+# static void felem_diff64(felem out, const felem in)
1058+#
1059+.globl p384_felem_diff64
1060+.type p384_felem_diff64, \@function
1061+.align 4
1062+p384_felem_diff64:
1063+ addis 5, 2, .LConst_two60\@toc\@ha
1064+ addi 5, 5, .LConst_two60\@toc\@l
1065+
1066+ ld 9, 0(3)
1067+ ld 8, 16(5) # two60p44m12
1068+ li 7, 0
1069+ add 9, 9, 8
1070+ ld 11, 0(4)
1071+ subf 8, 11, 9
1072+ std 8, 0(3) # out0
1073+
1074+ ld 9, 8(3)
1075+ ld 8, 0(5) # two60m52m4
1076+ add 9, 9, 8
1077+ ld 11, 8(4)
1078+ subf 11, 11, 9
1079+ std 11, 8(3) # out1
1080+
1081+ ld 9, 16(3)
1082+ ld 8, 32(5) # two60m28m4
1083+ add 9, 9, 8
1084+ ld 11, 16(4)
1085+ subf 11, 11, 9
1086+ std 11, 16(3) # out2
1087+
1088+ ld 10, 24(3)
1089+ ld 9, 48(5) # two60m4
1090+ add 10, 10, 9
1091+ ld 12, 24(4)
1092+ subf 12, 12, 10
1093+ std 12, 24(3) # out3
1094+
1095+ ld 10, 32(3)
1096+ add 10, 10, 9
1097+ ld 11, 32(4)
1098+ subf 11, 11, 10
1099+ std 11, 32(3) # out4
1100+
1101+ ld 10, 40(3)
1102+ add 10, 10, 9
1103+ ld 12, 40(4)
1104+ subf 12, 12, 10
1105+ std 12, 40(3) # out5
1106
1107- endproc("p384_felem_square");
1108- }
1109-}
1110+ ld 10, 48(3)
1111+ add 10, 10, 9
1112+ ld 11, 48(4)
1113+ subf 11, 11, 10
1114+ std 11, 48(3) # out6
1115+
1116+ blr
1117+.size p384_felem_diff64,.-p384_felem_diff64
1118+
1119+.text
1120+#
1121+# Shift 128 bits right <nbits>
1122+#
1123+.macro SHR o_h o_l in_h in_l nbits
1124+ srdi \\o_l, \\in_l, \\nbits # shift lower right <nbits>
1125+ rldimi \\o_l, \\in_h, 64-\\nbits, 0 # insert <64-nbits> from hi
1126+ srdi \\o_h, \\in_h, \\nbits # shift higher right <nbits>
1127+.endm
1128+
1129+#
1130+# static void felem_reduce(felem out, const widefelem in)
1131+#
1132+.global p384_felem_reduce
1133+.type p384_felem_reduce,\@function
1134+.align 4
1135+p384_felem_reduce:
1136+
1137+ stdu 1, -208(1)
1138+ mflr 0
1139+ std 14, 56(1)
1140+ std 15, 64(1)
1141+ std 16, 72(1)
1142+ std 17, 80(1)
1143+ std 18, 88(1)
1144+ std 19, 96(1)
1145+ std 20, 104(1)
1146+ std 21, 112(1)
1147+ std 22, 120(1)
1148+ std 23, 128(1)
1149+ std 24, 136(1)
1150+ std 25, 144(1)
1151+ std 26, 152(1)
1152+ std 27, 160(1)
1153+ std 28, 168(1)
1154+ std 29, 176(1)
1155+ std 30, 184(1)
1156+ std 31, 192(1)
1157+
1158+ bl _p384_felem_reduce_core
1159+
1160+ mtlr 0
1161+ ld 14, 56(1)
1162+ ld 15, 64(1)
1163+ ld 16, 72(1)
1164+ ld 17, 80(1)
1165+ ld 18, 88(1)
1166+ ld 19, 96(1)
1167+ ld 20, 104(1)
1168+ ld 21, 112(1)
1169+ ld 22, 120(1)
1170+ ld 23, 128(1)
1171+ ld 24, 136(1)
1172+ ld 25, 144(1)
1173+ ld 26, 152(1)
1174+ ld 27, 160(1)
1175+ ld 28, 168(1)
1176+ ld 29, 176(1)
1177+ ld 30, 184(1)
1178+ ld 31, 192(1)
1179+ addi 1, 1, 208
1180+ blr
1181+.size p384_felem_reduce,.-p384_felem_reduce
1182+
1183+#
1184+# Felem reduction core function -
1185+# r3 and r4 need to pre-loaded.
1186+#
1187+.type _p384_felem_reduce_core,\@function
1188+.align 4
1189+_p384_felem_reduce_core:
1190+ addis 12, 2, .LConst\@toc\@ha
1191+ addi 12, 12, .LConst\@toc\@l
1192+
1193+ # load constat p
1194+ ld 11, 8(12) # hi - two124m68
1195+
1196+ # acc[6] = in[6] + two124m68;
1197+ ld 26, 96(4) # in[6].l
1198+ ld 27, 96+8(4) # in[6].h
1199+ add 27, 27, 11
1200+
1201+ # acc[5] = in[5] + two124m68;
1202+ ld 24, 80(4) # in[5].l
1203+ ld 25, 80+8(4) # in[5].h
1204+ add 25, 25, 11
1205+
1206+ # acc[4] = in[4] + two124m68;
1207+ ld 22, 64(4) # in[4].l
1208+ ld 23, 64+8(4) # in[4].h
1209+ add 23, 23, 11
1210+
1211+ # acc[3] = in[3] + two124m68;
1212+ ld 20, 48(4) # in[3].l
1213+ ld 21, 48+8(4) # in[3].h
1214+ add 21, 21, 11
1215+
1216+ ld 11, 48+8(12) # hi - two124m92m68
1217+
1218+ # acc[2] = in[2] + two124m92m68;
1219+ ld 18, 32(4) # in[2].l
1220+ ld 19, 32+8(4) # in[2].h
1221+ add 19, 19, 11
1222+
1223+ ld 11, 16+8(12) # high - two124m116m68
1224+
1225+ # acc[1] = in[1] + two124m116m68;
1226+ ld 16, 16(4) # in[1].l
1227+ ld 17, 16+8(4) # in[1].h
1228+ add 17, 17, 11
1229+
1230+ ld 11, 32+8(12) # high - two124p108m76
1231+
1232+ # acc[0] = in[0] + two124p108m76;
1233+ ld 14, 0(4) # in[0].l
1234+ ld 15, 0+8(4) # in[0].h
1235+ add 15, 15, 11
1236+
1237+ # compute mask
1238+ li 7, -1
1239+
1240+ # Eliminate in[12]
1241+
1242+ # acc[8] += in[12] >> 32;
1243+ ld 5, 192(4) # in[12].l
1244+ ld 6, 192+8(4) # in[12].h
1245+ SHR 9, 10, 6, 5, 32
1246+ ld 30, 128(4) # in[8].l
1247+ ld 31, 136(4) # in[8].h
1248+ addc 30, 30, 10
1249+ adde 31, 31, 9
1250+
1251+ # acc[7] += (in[12] & 0xffffffff) << 24;
1252+ srdi 11, 7, 32 # 0xffffffff
1253+ and 11, 11, 5
1254+ sldi 11, 11, 24 # << 24
1255+ ld 28, 112(4) # in[7].l
1256+ ld 29, 120(4) # in[7].h
1257+ addc 28, 28, 11
1258+ addze 29, 29
1259+
1260+ # acc[7] += in[12] >> 8;
1261+ SHR 9, 10, 6, 5, 8
1262+ addc 28, 28, 10
1263+ adde 29, 29, 9
1264+
1265+ # acc[6] += (in[12] & 0xff) << 48;
1266+ andi. 11, 5, 0xff
1267+ sldi 11, 11, 48
1268+ addc 26, 26, 11
1269+ addze 27, 27
1270+
1271+ # acc[6] -= in[12] >> 16;
1272+ SHR 9, 10, 6, 5, 16
1273+ subfc 26, 10, 26
1274+ subfe 27, 9, 27
1275+
1276+ # acc[5] -= (in[12] & 0xffff) << 40;
1277+ srdi 11, 7, 48 # 0xffff
1278+ and 11, 11, 5
1279+ sldi 11, 11, 40 # << 40
1280+ li 9, 0
1281+ subfc 24, 11, 24
1282+ subfe 25, 9, 25
1283+
1284+ # acc[6] += in[12] >> 48;
1285+ SHR 9, 10, 6, 5, 48
1286+ addc 26, 26, 10
1287+ adde 27, 27, 9
1288+
1289+ # acc[5] += (in[12] & 0xffffffffffff) << 8;
1290+ srdi 11, 7, 16 # 0xffffffffffff
1291+ and 11, 11, 5
1292+ sldi 11, 11, 8 # << 8
1293+ addc 24, 24, 11
1294+ addze 25, 25
1295+
1296+ # Eliminate in[11]
1297+
1298+ # acc[7] += in[11] >> 32;
1299+ ld 5, 176(4) # in[11].l
1300+ ld 6, 176+8(4) # in[11].h
1301+ SHR 9, 10, 6, 5, 32
1302+ addc 28, 28, 10
1303+ adde 29, 29, 9
1304+
1305+ # acc[6] += (in[11] & 0xffffffff) << 24;
1306+ srdi 11, 7, 32 # 0xffffffff
1307+ and 11, 11, 5
1308+ sldi 11, 11, 24 # << 24
1309+ addc 26, 26, 11
1310+ addze 27, 27
1311+
1312+ # acc[6] += in[11] >> 8;
1313+ SHR 9, 10, 6, 5, 8
1314+ addc 26, 26, 10
1315+ adde 27, 27, 9
1316+
1317+ # acc[5] += (in[11] & 0xff) << 48;
1318+ andi. 11, 5, 0xff
1319+ sldi 11, 11, 48
1320+ addc 24, 24, 11
1321+ addze 25, 25
1322+
1323+ # acc[5] -= in[11] >> 16;
1324+ SHR 9, 10, 6, 5, 16
1325+ subfc 24, 10, 24
1326+ subfe 25, 9, 25
1327+
1328+ # acc[4] -= (in[11] & 0xffff) << 40;
1329+ srdi 11, 7, 48 # 0xffff
1330+ and 11, 11, 5
1331+ sldi 11, 11, 40 # << 40
1332+ li 9, 0
1333+ subfc 22, 11, 22
1334+ subfe 23, 9, 23
1335+
1336+ # acc[5] += in[11] >> 48;
1337+ SHR 9, 10, 6, 5, 48
1338+ addc 24, 24, 10
1339+ adde 25, 25, 9
1340+
1341+ # acc[4] += (in[11] & 0xffffffffffff) << 8;
1342+ srdi 11, 7, 16 # 0xffffffffffff
1343+ and 11, 11, 5
1344+ sldi 11, 11, 8 # << 8
1345+ addc 22, 22, 11
1346+ addze 23, 23
1347+
1348+ # Eliminate in[10]
1349+
1350+ # acc[6] += in[10] >> 32;
1351+ ld 5, 160(4) # in[10].l
1352+ ld 6, 160+8(4) # in[10].h
1353+ SHR 9, 10, 6, 5, 32
1354+ addc 26, 26, 10
1355+ adde 27, 27, 9
1356+
1357+ # acc[5] += (in[10] & 0xffffffff) << 24;
1358+ srdi 11, 7, 32 # 0xffffffff
1359+ and 11, 11, 5
1360+ sldi 11, 11, 24 # << 24
1361+ addc 24, 24, 11
1362+ addze 25, 25
1363+
1364+ # acc[5] += in[10] >> 8;
1365+ SHR 9, 10, 6, 5, 8
1366+ addc 24, 24, 10
1367+ adde 25, 25, 9
1368+
1369+ # acc[4] += (in[10] & 0xff) << 48;
1370+ andi. 11, 5, 0xff
1371+ sldi 11, 11, 48
1372+ addc 22, 22, 11
1373+ addze 23, 23
1374+
1375+ # acc[4] -= in[10] >> 16;
1376+ SHR 9, 10, 6, 5, 16
1377+ subfc 22, 10, 22
1378+ subfe 23, 9, 23
1379+
1380+ # acc[3] -= (in[10] & 0xffff) << 40;
1381+ srdi 11, 7, 48 # 0xffff
1382+ and 11, 11, 5
1383+ sldi 11, 11, 40 # << 40
1384+ li 9, 0
1385+ subfc 20, 11, 20
1386+ subfe 21, 9, 21
1387+
1388+ # acc[4] += in[10] >> 48;
1389+ SHR 9, 10, 6, 5, 48
1390+ addc 22, 22, 10
1391+ adde 23, 23, 9
1392+
1393+ # acc[3] += (in[10] & 0xffffffffffff) << 8;
1394+ srdi 11, 7, 16 # 0xffffffffffff
1395+ and 11, 11, 5
1396+ sldi 11, 11, 8 # << 8
1397+ addc 20, 20, 11
1398+ addze 21, 21
1399+
1400+ # Eliminate in[9]
1401+
1402+ # acc[5] += in[9] >> 32;
1403+ ld 5, 144(4) # in[9].l
1404+ ld 6, 144+8(4) # in[9].h
1405+ SHR 9, 10, 6, 5, 32
1406+ addc 24, 24, 10
1407+ adde 25, 25, 9
1408+
1409+ # acc[4] += (in[9] & 0xffffffff) << 24;
1410+ srdi 11, 7, 32 # 0xffffffff
1411+ and 11, 11, 5
1412+ sldi 11, 11, 24 # << 24
1413+ addc 22, 22, 11
1414+ addze 23, 23
1415+
1416+ # acc[4] += in[9] >> 8;
1417+ SHR 9, 10, 6, 5, 8
1418+ addc 22, 22, 10
1419+ adde 23, 23, 9
1420+
1421+ # acc[3] += (in[9] & 0xff) << 48;
1422+ andi. 11, 5, 0xff
1423+ sldi 11, 11, 48
1424+ addc 20, 20, 11
1425+ addze 21, 21
1426+
1427+ # acc[3] -= in[9] >> 16;
1428+ SHR 9, 10, 6, 5, 16
1429+ subfc 20, 10, 20
1430+ subfe 21, 9, 21
1431+
1432+ # acc[2] -= (in[9] & 0xffff) << 40;
1433+ srdi 11, 7, 48 # 0xffff
1434+ and 11, 11, 5
1435+ sldi 11, 11, 40 # << 40
1436+ li 9, 0
1437+ subfc 18, 11, 18
1438+ subfe 19, 9, 19
1439+
1440+ # acc[3] += in[9] >> 48;
1441+ SHR 9, 10, 6, 5, 48
1442+ addc 20, 20, 10
1443+ adde 21, 21, 9
1444+
1445+ # acc[2] += (in[9] & 0xffffffffffff) << 8;
1446+ srdi 11, 7, 16 # 0xffffffffffff
1447+ and 11, 11, 5
1448+ sldi 11, 11, 8 # << 8
1449+ addc 18, 18, 11
1450+ addze 19, 19
1451+
1452+ # Eliminate acc[8]
1453+
1454+ # acc[4] += acc[8] >> 32;
1455+ mr 5, 30 # acc[8].l
1456+ mr 6, 31 # acc[8].h
1457+ SHR 9, 10, 6, 5, 32
1458+ addc 22, 22, 10
1459+ adde 23, 23, 9
1460+
1461+ # acc[3] += (acc[8] & 0xffffffff) << 24;
1462+ srdi 11, 7, 32 # 0xffffffff
1463+ and 11, 11, 5
1464+ sldi 11, 11, 24 # << 24
1465+ addc 20, 20, 11
1466+ addze 21, 21
1467+
1468+ # acc[3] += acc[8] >> 8;
1469+ SHR 9, 10, 6, 5, 8
1470+ addc 20, 20, 10
1471+ adde 21, 21, 9
1472+
1473+ # acc[2] += (acc[8] & 0xff) << 48;
1474+ andi. 11, 5, 0xff
1475+ sldi 11, 11, 48
1476+ addc 18, 18, 11
1477+ addze 19, 19
1478+
1479+ # acc[2] -= acc[8] >> 16;
1480+ SHR 9, 10, 6, 5, 16
1481+ subfc 18, 10, 18
1482+ subfe 19, 9, 19
1483+
1484+ # acc[1] -= (acc[8] & 0xffff) << 40;
1485+ srdi 11, 7, 48 # 0xffff
1486+ and 11, 11, 5
1487+ sldi 11, 11, 40 # << 40
1488+ li 9, 0
1489+ subfc 16, 11, 16
1490+ subfe 17, 9, 17
1491+
1492+ #acc[2] += acc[8] >> 48;
1493+ SHR 9, 10, 6, 5, 48
1494+ addc 18, 18, 10
1495+ adde 19, 19, 9
1496+
1497+ # acc[1] += (acc[8] & 0xffffffffffff) << 8;
1498+ srdi 11, 7, 16 # 0xffffffffffff
1499+ and 11, 11, 5
1500+ sldi 11, 11, 8 # << 8
1501+ addc 16, 16, 11
1502+ addze 17, 17
1503+
1504+ # Eliminate acc[7]
1505+
1506+ # acc[3] += acc[7] >> 32;
1507+ mr 5, 28 # acc[7].l
1508+ mr 6, 29 # acc[7].h
1509+ SHR 9, 10, 6, 5, 32
1510+ addc 20, 20, 10
1511+ adde 21, 21, 9
1512+
1513+ # acc[2] += (acc[7] & 0xffffffff) << 24;
1514+ srdi 11, 7, 32 # 0xffffffff
1515+ and 11, 11, 5
1516+ sldi 11, 11, 24 # << 24
1517+ addc 18, 18, 11
1518+ addze 19, 19
1519+
1520+ # acc[2] += acc[7] >> 8;
1521+ SHR 9, 10, 6, 5, 8
1522+ addc 18, 18, 10
1523+ adde 19, 19, 9
1524+
1525+ # acc[1] += (acc[7] & 0xff) << 48;
1526+ andi. 11, 5, 0xff
1527+ sldi 11, 11, 48
1528+ addc 16, 16, 11
1529+ addze 17, 17
1530+
1531+ # acc[1] -= acc[7] >> 16;
1532+ SHR 9, 10, 6, 5, 16
1533+ subfc 16, 10, 16
1534+ subfe 17, 9, 17
1535+
1536+ # acc[0] -= (acc[7] & 0xffff) << 40;
1537+ srdi 11, 7, 48 # 0xffff
1538+ and 11, 11, 5
1539+ sldi 11, 11, 40 # << 40
1540+ li 9, 0
1541+ subfc 14, 11, 14
1542+ subfe 15, 9, 15
1543+
1544+ # acc[1] += acc[7] >> 48;
1545+ SHR 9, 10, 6, 5, 48
1546+ addc 16, 16, 10
1547+ adde 17, 17, 9
1548+
1549+ # acc[0] += (acc[7] & 0xffffffffffff) << 8;
1550+ srdi 11, 7, 16 # 0xffffffffffff
1551+ and 11, 11, 5
1552+ sldi 11, 11, 8 # << 8
1553+ addc 14, 14, 11
1554+ addze 15, 15
1555+
1556+ #
1557+ # Carry 4 -> 5 -> 6
1558+ #
1559+ # acc[5] += acc[4] >> 56;
1560+ # acc[4] &= 0x00ffffffffffffff;
1561+ SHR 9, 10, 23, 22, 56
1562+ addc 24, 24, 10
1563+ adde 25, 25, 9
1564+ srdi 11, 7, 8 # 0x00ffffffffffffff
1565+ and 22, 22, 11
1566+ li 23, 0
1567+
1568+ # acc[6] += acc[5] >> 56;
1569+ # acc[5] &= 0x00ffffffffffffff;
1570+ SHR 9, 10, 25, 24, 56
1571+ addc 26, 26, 10
1572+ adde 27, 27, 9
1573+ and 24, 24, 11
1574+ li 25, 0
1575+
1576+ # [3]: Eliminate high bits of acc[6] */
1577+ # temp = acc[6] >> 48;
1578+ # acc[6] &= 0x0000ffffffffffff;
1579+ SHR 31, 30, 27, 26, 48 # temp = acc[6] >> 48
1580+ srdi 11, 7, 16 # 0x0000ffffffffffff
1581+ and 26, 26, 11
1582+ li 27, 0
1583+
1584+ # temp < 2^80
1585+ # acc[3] += temp >> 40;
1586+ SHR 9, 10, 31, 30, 40
1587+ addc 20, 20, 10
1588+ adde 21, 21, 9
1589+
1590+ # acc[2] += (temp & 0xffffffffff) << 16;
1591+ srdi 11, 7, 24 # 0xffffffffff
1592+ and 10, 30, 11
1593+ sldi 10, 10, 16
1594+ addc 18, 18, 10
1595+ addze 19, 19
1596+
1597+ # acc[2] += temp >> 16;
1598+ SHR 9, 10, 31, 30, 16
1599+ addc 18, 18, 10
1600+ adde 19, 19, 9
1601+
1602+ # acc[1] += (temp & 0xffff) << 40;
1603+ srdi 11, 7, 48 # 0xffff
1604+ and 10, 30, 11
1605+ sldi 10, 10, 40
1606+ addc 16, 16, 10
1607+ addze 17, 17
1608+
1609+ # acc[1] -= temp >> 24;
1610+ SHR 9, 10, 31, 30, 24
1611+ subfc 16, 10, 16
1612+ subfe 17, 9, 17
1613+
1614+ # acc[0] -= (temp & 0xffffff) << 32;
1615+ srdi 11, 7, 40 # 0xffffff
1616+ and 10, 30, 11
1617+ sldi 10, 10, 32
1618+ li 9, 0
1619+ subfc 14, 10, 14
1620+ subfe 15, 9, 15
1621+
1622+ # acc[0] += temp;
1623+ addc 14, 14, 30
1624+ adde 15, 15, 31
1625+
1626+ # Carry 0 -> 1 -> 2 -> 3 -> 4 -> 5 -> 6
1627+ #
1628+ # acc[1] += acc[0] >> 56; /* acc[1] < acc_old[1] + 2^72 */
1629+ SHR 9, 10, 15, 14, 56
1630+ addc 16, 16, 10
1631+ adde 17, 17, 9
1632+
1633+ # acc[0] &= 0x00ffffffffffffff;
1634+ srdi 11, 7, 8 # 0x00ffffffffffffff
1635+ and 14, 14, 11
1636+ li 15, 0
1637+
1638+ # acc[2] += acc[1] >> 56; /* acc[2] < acc_old[2] + 2^72 + 2^16 */
1639+ SHR 9, 10, 17, 16, 56
1640+ addc 18, 18, 10
1641+ adde 19, 19, 9
1642+
1643+ # acc[1] &= 0x00ffffffffffffff;
1644+ and 16, 16, 11
1645+ li 17, 0
1646+
1647+ # acc[3] += acc[2] >> 56; /* acc[3] < acc_old[3] + 2^72 + 2^16 */
1648+ SHR 9, 10, 19, 18, 56
1649+ addc 20, 20, 10
1650+ adde 21, 21, 9
1651+
1652+ # acc[2] &= 0x00ffffffffffffff;
1653+ and 18, 18, 11
1654+ li 19, 0
1655+
1656+ # acc[4] += acc[3] >> 56;
1657+ SHR 9, 10, 21, 20, 56
1658+ addc 22, 22, 10
1659+ adde 23, 23, 9
1660+
1661+ # acc[3] &= 0x00ffffffffffffff;
1662+ and 20, 20, 11
1663+ li 21, 0
1664+
1665+ # acc[5] += acc[4] >> 56;
1666+ SHR 9, 10, 23, 22, 56
1667+ addc 24, 24, 10
1668+ adde 25, 25, 9
1669+
1670+ # acc[4] &= 0x00ffffffffffffff;
1671+ and 22, 22, 11
1672+
1673+ # acc[6] += acc[5] >> 56;
1674+ SHR 9, 10, 25, 24, 56
1675+ addc 26, 26, 10
1676+ adde 27, 27, 9
1677+
1678+ # acc[5] &= 0x00ffffffffffffff;
1679+ and 24, 24, 11
1680+
1681+ std 14, 0(3)
1682+ std 16, 8(3)
1683+ std 18, 16(3)
1684+ std 20, 24(3)
1685+ std 22, 32(3)
1686+ std 24, 40(3)
1687+ std 26, 48(3)
1688+ blr
1689+.size _p384_felem_reduce_core,.-_p384_felem_reduce_core
1690+
1691+.data
1692+.align 4
1693+.LConst:
1694+# two124m68:
1695+.long 0x0, 0x0, 0xfffffff0, 0xfffffff
1696+# two124m116m68:
1697+.long 0x0, 0x0, 0xfffffff0, 0xfefffff
1698+#two124p108m76:
1699+.long 0x0, 0x0, 0xfffff000, 0x10000fff
1700+#two124m92m68:
1701+.long 0x0, 0x0, 0xeffffff0, 0xfffffff
1702+
1703+.text
1704+
1705+#
1706+# void p384_felem_square_reduce(felem out, const felem in)
1707+#
1708+.global p384_felem_square_reduce
1709+.type p384_felem_square_reduce,\@function
1710+.align 4
1711+p384_felem_square_reduce:
1712+ stdu 1, -512(1)
1713+ mflr 0
1714+ std 14, 56(1)
1715+ std 15, 64(1)
1716+ std 16, 72(1)
1717+ std 17, 80(1)
1718+ std 18, 88(1)
1719+ std 19, 96(1)
1720+ std 20, 104(1)
1721+ std 21, 112(1)
1722+ std 22, 120(1)
1723+ std 23, 128(1)
1724+ std 24, 136(1)
1725+ std 25, 144(1)
1726+ std 26, 152(1)
1727+ std 27, 160(1)
1728+ std 28, 168(1)
1729+ std 29, 176(1)
1730+ std 30, 184(1)
1731+ std 31, 192(1)
1732+
1733+ std 3, 496(1)
1734+ addi 3, 1, 208
1735+ bl _p384_felem_square_core
1736+
1737+ mr 4, 3
1738+ ld 3, 496(1)
1739+ bl _p384_felem_reduce_core
1740+
1741+ ld 14, 56(1)
1742+ ld 15, 64(1)
1743+ ld 16, 72(1)
1744+ ld 17, 80(1)
1745+ ld 18, 88(1)
1746+ ld 19, 96(1)
1747+ ld 20, 104(1)
1748+ ld 21, 112(1)
1749+ ld 22, 120(1)
1750+ ld 23, 128(1)
1751+ ld 24, 136(1)
1752+ ld 25, 144(1)
1753+ ld 26, 152(1)
1754+ ld 27, 160(1)
1755+ ld 28, 168(1)
1756+ ld 29, 176(1)
1757+ ld 30, 184(1)
1758+ ld 31, 192(1)
1759+ addi 1, 1, 512
1760+ mtlr 0
1761+ blr
1762+.size p384_felem_square_reduce,.-p384_felem_square_reduce
1763+
1764+#
1765+# void p384_felem_mul_reduce(felem out, const felem in1, const felem in2)
1766+#
1767+.global p384_felem_mul_reduce
1768+.type p384_felem_mul_reduce,\@function
1769+.align 5
1770+p384_felem_mul_reduce:
1771+ stdu 1, -512(1)
1772+ mflr 0
1773+ std 14, 56(1)
1774+ std 15, 64(1)
1775+ std 16, 72(1)
1776+ std 17, 80(1)
1777+ std 18, 88(1)
1778+ std 19, 96(1)
1779+ std 20, 104(1)
1780+ std 21, 112(1)
1781+ std 22, 120(1)
1782+ std 23, 128(1)
1783+ std 24, 136(1)
1784+ std 25, 144(1)
1785+ std 26, 152(1)
1786+ std 27, 160(1)
1787+ std 28, 168(1)
1788+ std 29, 176(1)
1789+ std 30, 184(1)
1790+ std 31, 192(1)
1791+
1792+ std 3, 496(1)
1793+ addi 3, 1, 208
1794+ bl _p384_felem_mul_core
1795+
1796+ mr 4, 3
1797+ ld 3, 496(1)
1798+ bl _p384_felem_reduce_core
1799+
1800+ ld 14, 56(1)
1801+ ld 15, 64(1)
1802+ ld 16, 72(1)
1803+ ld 17, 80(1)
1804+ ld 18, 88(1)
1805+ ld 19, 96(1)
1806+ ld 20, 104(1)
1807+ ld 21, 112(1)
1808+ ld 22, 120(1)
1809+ ld 23, 128(1)
1810+ ld 24, 136(1)
1811+ ld 25, 144(1)
1812+ ld 26, 152(1)
1813+ ld 27, 160(1)
1814+ ld 28, 168(1)
1815+ ld 29, 176(1)
1816+ ld 30, 184(1)
1817+ ld 31, 192(1)
1818+ addi 1, 1, 512
1819+ mtlr 0
1820+ blr
1821+.size p384_felem_mul_reduce,.-p384_felem_mul_reduce
1822+___
1823
1824 $code =~ s/\`([^\`]*)\`/eval $1/gem;
1825 print $code;
1826diff --git a/crypto/ec/ecp_nistp384.c b/crypto/ec/ecp_nistp384.c
1827index 3fd7a40020..e0b5786bc1 100644
1828--- a/crypto/ec/ecp_nistp384.c
1829+++ b/crypto/ec/ecp_nistp384.c
1830@@ -252,6 +252,16 @@ static void felem_neg(felem out, const felem in)
1831 out[6] = two60m4 - in[6];
1832 }
1833
1834+#if defined(ECP_NISTP384_ASM)
1835+void p384_felem_diff64(felem out, const felem in);
1836+void p384_felem_diff128(widefelem out, const widefelem in);
1837+void p384_felem_diff_128_64(widefelem out, const felem in);
1838+
1839+# define felem_diff64 p384_felem_diff64
1840+# define felem_diff128 p384_felem_diff128
1841+# define felem_diff_128_64 p384_felem_diff_128_64
1842+
1843+#else
1844 /*-
1845 * felem_diff64 subtracts |in| from |out|
1846 * On entry:
1847@@ -369,6 +379,7 @@ static void felem_diff128(widefelem out, const widefelem in)
1848 for (i = 0; i < 2*NLIMBS-1; i++)
1849 out[i] -= in[i];
1850 }
1851+#endif /* ECP_NISTP384_ASM */
1852
1853 static void felem_square_ref(widefelem out, const felem in)
1854 {
1855@@ -503,7 +514,7 @@ static void felem_mul_ref(widefelem out, const felem in1, const felem in2)
1856 * [3]: Y = 2^48 (acc[6] >> 48)
1857 * (Where a | b | c | d = (2^56)^3 a + (2^56)^2 b + (2^56) c + d)
1858 */
1859-static void felem_reduce(felem out, const widefelem in)
1860+static void felem_reduce_ref(felem out, const widefelem in)
1861 {
1862 /*
1863 * In order to prevent underflow, we add a multiple of p before subtracting.
1864@@ -682,8 +693,11 @@ static void (*felem_square_p)(widefelem out, const felem in) =
1865 static void (*felem_mul_p)(widefelem out, const felem in1, const felem in2) =
1866 felem_mul_wrapper;
1867
1868+static void (*felem_reduce_p)(felem out, const widefelem in) = felem_reduce_ref;
1869+
1870 void p384_felem_square(widefelem out, const felem in);
1871 void p384_felem_mul(widefelem out, const felem in1, const felem in2);
1872+void p384_felem_reduce(felem out, const widefelem in);
1873
1874 # if defined(_ARCH_PPC64)
1875 # include "crypto/ppc_arch.h"
1876@@ -695,6 +709,7 @@ static void felem_select(void)
1877 if ((OPENSSL_ppccap_P & PPC_MADD300) && (OPENSSL_ppccap_P & PPC_ALTIVEC)) {
1878 felem_square_p = p384_felem_square;
1879 felem_mul_p = p384_felem_mul;
1880+ felem_reduce_p = p384_felem_reduce;
1881
1882 return;
1883 }
1884@@ -703,6 +718,7 @@ static void felem_select(void)
1885 /* Default */
1886 felem_square_p = felem_square_ref;
1887 felem_mul_p = felem_mul_ref;
1888+ felem_reduce_p = p384_felem_reduce;
1889 }
1890
1891 static void felem_square_wrapper(widefelem out, const felem in)
1892@@ -719,10 +735,17 @@ static void felem_mul_wrapper(widefelem out, const felem in1, const felem in2)
1893
1894 # define felem_square felem_square_p
1895 # define felem_mul felem_mul_p
1896+# define felem_reduce felem_reduce_p
1897+
1898+void p384_felem_square_reduce(felem out, const felem in);
1899+void p384_felem_mul_reduce(felem out, const felem in1, const felem in2);
1900+
1901+# define felem_square_reduce p384_felem_square_reduce
1902+# define felem_mul_reduce p384_felem_mul_reduce
1903 #else
1904 # define felem_square felem_square_ref
1905 # define felem_mul felem_mul_ref
1906-#endif
1907+# define felem_reduce felem_reduce_ref
1908
1909 static ossl_inline void felem_square_reduce(felem out, const felem in)
1910 {
1911@@ -739,6 +762,7 @@ static ossl_inline void felem_mul_reduce(felem out, const felem in1, const felem
1912 felem_mul(tmp, in1, in2);
1913 felem_reduce(out, tmp);
1914 }
1915+#endif
1916
1917 /*-
1918 * felem_inv calculates |out| = |in|^{-1}
diff --git a/meta/recipes-connectivity/openssl/openssl/CVE-2025-27587-2.patch b/meta/recipes-connectivity/openssl/openssl/CVE-2025-27587-2.patch
new file mode 100644
index 0000000000..0659a9d6d9
--- /dev/null
+++ b/meta/recipes-connectivity/openssl/openssl/CVE-2025-27587-2.patch
@@ -0,0 +1,129 @@
1From 6b1646e472c9e8c08bb14066ba2a7c3eed45f84a Mon Sep 17 00:00:00 2001
2From: "A. Wilcox" <AWilcox@Wilcox-Tech.com>
3Date: Thu, 17 Apr 2025 08:51:53 -0500
4Subject: [PATCH] Fix P-384 curve on lower-than-P9 PPC64 targets
5
6The change adding an asm implementation of p384_felem_reduce incorrectly
7uses the accelerated version on both targets that support the intrinsics
8*and* targets that don't, instead of falling back to the generics on older
9targets. This results in crashes when trying to use P-384 on < Power9.
10
11Signed-off-by: Anna Wilcox <AWilcox@Wilcox-Tech.com>
12Closes: #27350
13Fixes: 85cabd94 ("Fix Minerva timing side-channel signal for P-384 curve on PPC")
14
15Reviewed-by: Dmitry Belyavskiy <beldmit@gmail.com>
16Reviewed-by: Tomas Mraz <tomas@openssl.org>
17(Merged from https://github.com/openssl/openssl/pull/27429)
18
19(cherry picked from commit 29864f2b0f1046177e8048a5b17440893d3f9425)
20
21CVE: CVE-2025-27587
22Upstream-Status: Backport [https://github.com/openssl/openssl/commit/6b1646e472c9e8c08bb14066ba2a7c3eed45f84a]
23Signed-off-by: Peter Marko <peter.marko@siemens.com>
24---
25 crypto/ec/ecp_nistp384.c | 54 ++++++++++++++++++++++++----------------
26 1 file changed, 33 insertions(+), 21 deletions(-)
27
28diff --git a/crypto/ec/ecp_nistp384.c b/crypto/ec/ecp_nistp384.c
29index e0b5786bc1..439b4d03a3 100644
30--- a/crypto/ec/ecp_nistp384.c
31+++ b/crypto/ec/ecp_nistp384.c
32@@ -684,6 +684,22 @@ static void felem_reduce_ref(felem out, const widefelem in)
33 out[i] = acc[i];
34 }
35
36+static ossl_inline void felem_square_reduce_ref(felem out, const felem in)
37+{
38+ widefelem tmp;
39+
40+ felem_square_ref(tmp, in);
41+ felem_reduce_ref(out, tmp);
42+}
43+
44+static ossl_inline void felem_mul_reduce_ref(felem out, const felem in1, const felem in2)
45+{
46+ widefelem tmp;
47+
48+ felem_mul_ref(tmp, in1, in2);
49+ felem_reduce_ref(out, tmp);
50+}
51+
52 #if defined(ECP_NISTP384_ASM)
53 static void felem_square_wrapper(widefelem out, const felem in);
54 static void felem_mul_wrapper(widefelem out, const felem in1, const felem in2);
55@@ -695,10 +711,18 @@ static void (*felem_mul_p)(widefelem out, const felem in1, const felem in2) =
56
57 static void (*felem_reduce_p)(felem out, const widefelem in) = felem_reduce_ref;
58
59+static void (*felem_square_reduce_p)(felem out, const felem in) =
60+ felem_square_reduce_ref;
61+static void (*felem_mul_reduce_p)(felem out, const felem in1, const felem in2) =
62+ felem_mul_reduce_ref;
63+
64 void p384_felem_square(widefelem out, const felem in);
65 void p384_felem_mul(widefelem out, const felem in1, const felem in2);
66 void p384_felem_reduce(felem out, const widefelem in);
67
68+void p384_felem_square_reduce(felem out, const felem in);
69+void p384_felem_mul_reduce(felem out, const felem in1, const felem in2);
70+
71 # if defined(_ARCH_PPC64)
72 # include "crypto/ppc_arch.h"
73 # endif
74@@ -710,6 +734,8 @@ static void felem_select(void)
75 felem_square_p = p384_felem_square;
76 felem_mul_p = p384_felem_mul;
77 felem_reduce_p = p384_felem_reduce;
78+ felem_square_reduce_p = p384_felem_square_reduce;
79+ felem_mul_reduce_p = p384_felem_mul_reduce;
80
81 return;
82 }
83@@ -718,7 +744,9 @@ static void felem_select(void)
84 /* Default */
85 felem_square_p = felem_square_ref;
86 felem_mul_p = felem_mul_ref;
87- felem_reduce_p = p384_felem_reduce;
88+ felem_reduce_p = felem_reduce_ref;
89+ felem_square_reduce_p = felem_square_reduce_ref;
90+ felem_mul_reduce_p = felem_mul_reduce_ref;
91 }
92
93 static void felem_square_wrapper(widefelem out, const felem in)
94@@ -737,31 +765,15 @@ static void felem_mul_wrapper(widefelem out, const felem in1, const felem in2)
95 # define felem_mul felem_mul_p
96 # define felem_reduce felem_reduce_p
97
98-void p384_felem_square_reduce(felem out, const felem in);
99-void p384_felem_mul_reduce(felem out, const felem in1, const felem in2);
100-
101-# define felem_square_reduce p384_felem_square_reduce
102-# define felem_mul_reduce p384_felem_mul_reduce
103+# define felem_square_reduce felem_square_reduce_p
104+# define felem_mul_reduce felem_mul_reduce_p
105 #else
106 # define felem_square felem_square_ref
107 # define felem_mul felem_mul_ref
108 # define felem_reduce felem_reduce_ref
109
110-static ossl_inline void felem_square_reduce(felem out, const felem in)
111-{
112- widefelem tmp;
113-
114- felem_square(tmp, in);
115- felem_reduce(out, tmp);
116-}
117-
118-static ossl_inline void felem_mul_reduce(felem out, const felem in1, const felem in2)
119-{
120- widefelem tmp;
121-
122- felem_mul(tmp, in1, in2);
123- felem_reduce(out, tmp);
124-}
125+# define felem_square_reduce felem_square_reduce_ref
126+# define felem_mul_reduce felem_mul_reduce_ref
127 #endif
128
129 /*-
diff --git a/meta/recipes-connectivity/openssl/openssl_3.2.4.bb b/meta/recipes-connectivity/openssl/openssl_3.2.4.bb
index d6bf32d989..fd98b32007 100644
--- a/meta/recipes-connectivity/openssl/openssl_3.2.4.bb
+++ b/meta/recipes-connectivity/openssl/openssl_3.2.4.bb
@@ -13,6 +13,8 @@ SRC_URI = "https://github.com/openssl/openssl/releases/download/openssl-${PV}/op
13 file://0001-Configure-do-not-tweak-mips-cflags.patch \ 13 file://0001-Configure-do-not-tweak-mips-cflags.patch \
14 file://0001-Added-handshake-history-reporting-when-test-fails.patch \ 14 file://0001-Added-handshake-history-reporting-when-test-fails.patch \
15 file://CVE-2024-41996.patch \ 15 file://CVE-2024-41996.patch \
16 file://CVE-2025-27587-1.patch \
17 file://CVE-2025-27587-2.patch \
16 " 18 "
17 19
18SRC_URI:append:class-nativesdk = " \ 20SRC_URI:append:class-nativesdk = " \