1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
|
Upstream-Status: Backport
https://git.lysator.liu.se/nettle/nettle/commit/fa269b6ad06dd13c901dbd84a12e52b918a09cd7
CVE: CVE-2015-8804
Signed-off-by: Armin Kuster <akuster@mvista.com>
Index: nettle-2.7.1/ChangeLog
===================================================================
--- nettle-2.7.1.orig/ChangeLog
+++ nettle-2.7.1/ChangeLog
@@ -1,3 +1,11 @@
+2015-12-15 Niels Möller <nisse@lysator.liu.se>
+
+ * x86_64/ecc-384-modp.asm: Fixed carry propagation bug. Problem
+ reported by Hanno Böck. Simplified the folding to always use
+ non-negative carry, the old code attempted to add in a carry which
+ could be either positive or negative, but didn't get that case
+ right.
+
2015-12-10 Niels Möller <nisse@lysator.liu.se>
* ecc-256.c (ecc_256_modp): Fixed carry propagation bug. Problem
Index: nettle-2.7.1/x86_64/ecc-384-modp.asm
===================================================================
--- nettle-2.7.1.orig/x86_64/ecc-384-modp.asm
+++ nettle-2.7.1/x86_64/ecc-384-modp.asm
@@ -20,7 +20,7 @@ C MA 02111-1301, USA.
.file "ecc-384-modp.asm"
define(<RP>, <%rsi>)
-define(<D4>, <%rax>)
+define(<D5>, <%rax>)
define(<T0>, <%rbx>)
define(<T1>, <%rcx>)
define(<T2>, <%rdx>)
@@ -35,8 +35,8 @@ define(<H4>, <%r13>)
define(<H5>, <%r14>)
define(<C2>, <%r15>)
define(<C0>, H5) C Overlap
-define(<D0>, RP) C Overlap
-define(<TMP>, H4) C Overlap
+define(<TMP>, RP) C Overlap
+
PROLOGUE(nettle_ecc_384_modp)
W64_ENTRY(2, 0)
@@ -48,34 +48,38 @@ PROLOGUE(nettle_ecc_384_modp)
push %r14
push %r15
- C First get top 2 limbs, which need folding twice
+ C First get top 2 limbs, which need folding twice.
+ C B^10 = B^6 + B^4 + 2^32 (B-1)B^4.
+ C We handle the terms as follow:
C
- C H5 H4
- C -H5
- C ------
- C H0 D4
+ C B^6: Folded immediatly.
C
- C Then shift right, (H1,H0,D4) <-- (H0,D4) << 32
- C and add
+ C B^4: Delayed, added in in the next folding.
C
- C H5 H4
- C H1 H0
- C ----------
- C C2 H1 H0
-
- mov 80(RP), D4
- mov 88(RP), H0
- mov D4, H4
- mov H0, H5
- sub H0, D4
- sbb $0, H0
-
- mov D4, T2
- mov H0, H1
- shl $32, H0
- shr $32, T2
+ C 2^32(B-1) B^4: Low half limb delayed until the next
+ C folding. Top 1.5 limbs subtracted and shifter now, resulting
+ C in 2.5 limbs. The low limb saved in D5, high 1.5 limbs added
+ C in.
+
+ mov 80(RP), H4
+ mov 88(RP), H5
+ C Shift right 32 bits, into H1, H0
+ mov H4, H0
+ mov H5, H1
+ mov H5, D5
shr $32, H1
- or T2, H0
+ shl $32, D5
+ shr $32, H0
+ or D5, H0
+
+ C H1 H0
+ C - H1 H0
+ C --------
+ C H1 H0 D5
+ mov H0, D5
+ neg D5
+ sbb H1, H0
+ sbb $0, H1
xor C2, C2
add H4, H0
@@ -114,118 +118,95 @@ PROLOGUE(nettle_ecc_384_modp)
adc H3, T5
adc $0, C0
- C H3 H2 H1 H0 0
- C - H4 H3 H2 H1 H0
- C ---------------
- C H3 H2 H1 H0 D0
-
- mov XREG(D4), XREG(D4)
- mov H0, D0
- neg D0
- sbb H1, H0
- sbb H2, H1
- sbb H3, H2
- sbb H4, H3
- sbb $0, D4
-
- C Shift right. High bits are sign, to be added to C0.
- mov D4, TMP
- sar $32, TMP
- shl $32, D4
- add TMP, C0
-
+ C Shift left, including low half of H4
mov H3, TMP
+ shl $32, H4
shr $32, TMP
- shl $32, H3
- or TMP, D4
+ or TMP, H4
mov H2, TMP
+ shl $32, H3
shr $32, TMP
- shl $32, H2
or TMP, H3
mov H1, TMP
+ shl $32, H2
shr $32, TMP
- shl $32, H1
or TMP, H2
mov H0, TMP
+ shl $32, H1
shr $32, TMP
- shl $32, H0
or TMP, H1
- mov D0, TMP
- shr $32, TMP
- shl $32, D0
- or TMP, H0
+ shl $32, H0
+
+ C H4 H3 H2 H1 H0 0
+ C - H4 H3 H2 H1 H0
+ C ---------------
+ C H4 H3 H2 H1 H0 TMP
- add D0, T0
+ mov H0, TMP
+ neg TMP
+ sbb H1, H0
+ sbb H2, H1
+ sbb H3, H2
+ sbb H4, H3
+ sbb $0, H4
+
+ add TMP, T0
adc H0, T1
adc H1, T2
adc H2, T3
adc H3, T4
- adc D4, T5
+ adc H4, T5
adc $0, C0
C Remains to add in C2 and C0
- C C0 C0<<32 (-2^32+1)C0
- C C2 C2<<32 (-2^32+1)C2
- C where C2 is always positive, while C0 may be -1.
+ C Set H1, H0 = (2^96 - 2^32 + 1) C0
mov C0, H0
mov C0, H1
- mov C0, H2
- sar $63, C0 C Get sign
shl $32, H1
- sub H1, H0 C Gives borrow iff C0 > 0
+ sub H1, H0
sbb $0, H1
- add C0, H2
+ C Set H3, H2 = (2^96 - 2^32 + 1) C2
+ mov C2, H2
+ mov C2, H3
+ shl $32, H3
+ sub H3, H2
+ sbb $0, H3
+ add C0, H2 C No carry. Could use lea trick
+
+ xor C0, C0
add H0, T0
adc H1, T1
- adc $0, H2
- adc $0, C0
-
- C Set (H1 H0) <-- C2 << 96 - C2 << 32 + 1
- mov C2, H0
- mov C2, H1
- shl $32, H1
- sub H1, H0
- sbb $0, H1
-
- add H2, H0
- adc C0, H1
- adc C2, C0
- mov C0, H2
- sar $63, C0
- add H0, T2
- adc H1, T3
- adc H2, T4
- adc C0, T5
- sbb C0, C0
+ adc H2, T2
+ adc H3, T3
+ adc C2, T4
+ adc D5, T5 C Value delayed from initial folding
+ adc $0, C0 C Use sbb and switch sign?
C Final unlikely carry
mov C0, H0
mov C0, H1
- mov C0, H2
- sar $63, C0
shl $32, H1
sub H1, H0
sbb $0, H1
- add C0, H2
pop RP
- sub H0, T0
+ add H0, T0
mov T0, (RP)
- sbb H1, T1
+ adc H1, T1
mov T1, 8(RP)
- sbb H2, T2
+ adc C0, T2
mov T2, 16(RP)
- sbb C0, T3
+ adc $0, T3
mov T3, 24(RP)
- sbb C0, T4
+ adc $0, T4
mov T4, 32(RP)
- sbb C0, T5
+ adc $0, T5
mov T5, 40(RP)
pop %r15
|