diff options
author | Ross Burton <ross.burton@arm.com> | 2023-09-12 18:24:34 +0100 |
---|---|---|
committer | Steve Sakoman <steve@sakoman.com> | 2023-09-29 04:29:01 -1000 |
commit | d32656ac2425c5d0394b61e2dd684179bd8c9ca4 (patch) | |
tree | 8d5f272f6e0d71b6682019ede9dc004e6e5a1f9b | |
parent | 669079e7c0a64f36720bea2d256bd60496713300 (diff) | |
download | poky-d32656ac2425c5d0394b61e2dd684179bd8c9ca4.tar.gz |
gcc: Fix -fstack-protector issue on aarch64
This series of patches fixes deficiencies in GCC's -fstack-protector
implementation for AArch64 when using dynamically allocated stack space.
This is CVE-2023-4039. See:
https://developer.arm.com/Arm%20Security%20Center/GCC%20Stack%20Protector%20Vulnerability%20AArch64
https://github.com/metaredteam/external-disclosures/security/advisories/GHSA-x7ch-h5rf-w2mf
for more details.
(From OE-Core rev: d411ef9f660c443c00eee9bfbbc8c60c3cd0e92d)
Signed-off-by: Ross Burton <ross.burton@arm.com>
Signed-off-by: Steve Sakoman <steve@sakoman.com>
-rw-r--r-- | meta/recipes-devtools/gcc/gcc-9.5.inc | 1 | ||||
-rw-r--r-- | meta/recipes-devtools/gcc/gcc-9.5/CVE-2023-4039.patch | 1506 |
2 files changed, 1507 insertions, 0 deletions
diff --git a/meta/recipes-devtools/gcc/gcc-9.5.inc b/meta/recipes-devtools/gcc/gcc-9.5.inc index 23bfb1a9db..9bb41bbe24 100644 --- a/meta/recipes-devtools/gcc/gcc-9.5.inc +++ b/meta/recipes-devtools/gcc/gcc-9.5.inc | |||
@@ -70,6 +70,7 @@ SRC_URI = "\ | |||
70 | file://0038-gentypes-genmodes-Do-not-use-__LINE__-for-maintainin.patch \ | 70 | file://0038-gentypes-genmodes-Do-not-use-__LINE__-for-maintainin.patch \ |
71 | file://0039-process_alt_operands-Don-t-match-user-defined-regs-o.patch \ | 71 | file://0039-process_alt_operands-Don-t-match-user-defined-regs-o.patch \ |
72 | file://0002-libstdc-Fix-inconsistent-noexcept-specific-for-valar.patch \ | 72 | file://0002-libstdc-Fix-inconsistent-noexcept-specific-for-valar.patch \ |
73 | file://CVE-2023-4039.patch \ | ||
73 | " | 74 | " |
74 | S = "${TMPDIR}/work-shared/gcc-${PV}-${PR}/gcc-${PV}" | 75 | S = "${TMPDIR}/work-shared/gcc-${PV}-${PR}/gcc-${PV}" |
75 | SRC_URI[sha256sum] = "27769f64ef1d4cd5e2be8682c0c93f9887983e6cfd1a927ce5a0a2915a95cf8f" | 76 | SRC_URI[sha256sum] = "27769f64ef1d4cd5e2be8682c0c93f9887983e6cfd1a927ce5a0a2915a95cf8f" |
diff --git a/meta/recipes-devtools/gcc/gcc-9.5/CVE-2023-4039.patch b/meta/recipes-devtools/gcc/gcc-9.5/CVE-2023-4039.patch new file mode 100644 index 0000000000..56d229066f --- /dev/null +++ b/meta/recipes-devtools/gcc/gcc-9.5/CVE-2023-4039.patch | |||
@@ -0,0 +1,1506 @@ | |||
1 | From: Richard Sandiford <richard.sandiford@arm.com> | ||
2 | Subject: [PATCH 00/19] aarch64: Fix -fstack-protector issue | ||
3 | Date: Tue, 12 Sep 2023 16:25:10 +0100 | ||
4 | |||
5 | This series of patches fixes deficiencies in GCC's -fstack-protector | ||
6 | implementation for AArch64 when using dynamically allocated stack space. | ||
7 | This is CVE-2023-4039. See: | ||
8 | |||
9 | https://developer.arm.com/Arm%20Security%20Center/GCC%20Stack%20Protector%20Vulnerability%20AArch64 | ||
10 | https://github.com/metaredteam/external-disclosures/security/advisories/GHSA-x7ch-h5rf-w2mf | ||
11 | |||
12 | for more details. | ||
13 | |||
14 | The fix is to put the saved registers above the locals area when | ||
15 | -fstack-protector is used. | ||
16 | |||
17 | The series also fixes a stack-clash problem that I found while working | ||
18 | on the CVE. In unpatched sources, the stack-clash problem would only | ||
19 | trigger for unrealistic numbers of arguments (8K 64-bit arguments, or an | ||
20 | equivalent). But it would be a more significant issue with the new | ||
21 | -fstack-protector frame layout. It's therefore important that both | ||
22 | problems are fixed together. | ||
23 | |||
24 | Some reorganisation of the code seemed necessary to fix the problems in a | ||
25 | cleanish way. The series is therefore quite long, but only a handful of | ||
26 | patches should have any effect on code generation. | ||
27 | |||
28 | See the individual patches for a detailed description. | ||
29 | |||
30 | Tested on aarch64-linux-gnu. Pushed to trunk and to all active branches. | ||
31 | I've also pushed backports to GCC 7+ to vendors/ARM/heads/CVE-2023-4039. | ||
32 | |||
33 | CVE: CVE-2023-4039 | ||
34 | Upstream-Status: Submitted | ||
35 | Signed-off-by: Ross Burton <ross.burton@arm.com> | ||
36 | |||
37 | |||
38 | From 78ebdb7b12d5e258b9811bab715734454268fd0c Mon Sep 17 00:00:00 2001 | ||
39 | From: Richard Sandiford <richard.sandiford@arm.com> | ||
40 | Date: Fri, 16 Jun 2023 17:00:51 +0100 | ||
41 | Subject: [PATCH 01/10] aarch64: Explicitly handle frames with no saved | ||
42 | registers | ||
43 | |||
44 | If a frame has no saved registers, it can be allocated in one go. | ||
45 | There is no need to treat the areas below and above the saved | ||
46 | registers as separate. | ||
47 | |||
48 | And if we allocate the frame in one go, it should be allocated | ||
49 | as the initial_adjust rather than the final_adjust. This allows the | ||
50 | frame size to grow to guard_size - guard_used_by_caller before a stack | ||
51 | probe is needed. (A frame with no register saves is necessarily a | ||
52 | leaf frame.) | ||
53 | |||
54 | This is a no-op as thing stand, since a leaf function will have | ||
55 | no outgoing arguments, and so all the frame will be above where | ||
56 | the saved registers normally go. | ||
57 | |||
58 | gcc/ | ||
59 | * config/aarch64/aarch64.c (aarch64_layout_frame): Explicitly | ||
60 | allocate the frame in one go if there are no saved registers. | ||
61 | --- | ||
62 | gcc/config/aarch64/aarch64.c | 8 +++++--- | ||
63 | 1 file changed, 5 insertions(+), 3 deletions(-) | ||
64 | |||
65 | diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c | ||
66 | index a35dceab9fc..e9dad682738 100644 | ||
67 | --- a/gcc/config/aarch64/aarch64.c | ||
68 | +++ b/gcc/config/aarch64/aarch64.c | ||
69 | @@ -4771,9 +4771,11 @@ aarch64_layout_frame (void) | ||
70 | max_push_offset = 256; | ||
71 | |||
72 | HOST_WIDE_INT const_size, const_fp_offset; | ||
73 | - if (cfun->machine->frame.frame_size.is_constant (&const_size) | ||
74 | - && const_size < max_push_offset | ||
75 | - && known_eq (crtl->outgoing_args_size, 0)) | ||
76 | + if (cfun->machine->frame.saved_regs_size == 0) | ||
77 | + cfun->machine->frame.initial_adjust = cfun->machine->frame.frame_size; | ||
78 | + else if (cfun->machine->frame.frame_size.is_constant (&const_size) | ||
79 | + && const_size < max_push_offset | ||
80 | + && known_eq (crtl->outgoing_args_size, 0)) | ||
81 | { | ||
82 | /* Simple, small frame with no outgoing arguments: | ||
83 | stp reg1, reg2, [sp, -frame_size]! | ||
84 | -- | ||
85 | 2.34.1 | ||
86 | |||
87 | |||
88 | From 347487fffa0266d43bf18f1f91878410881f596e Mon Sep 17 00:00:00 2001 | ||
89 | From: Richard Sandiford <richard.sandiford@arm.com> | ||
90 | Date: Fri, 16 Jun 2023 16:55:12 +0100 | ||
91 | Subject: [PATCH 02/10] aarch64: Add bytes_below_hard_fp to frame info | ||
92 | |||
93 | The frame layout code currently hard-codes the assumption that | ||
94 | the number of bytes below the saved registers is equal to the | ||
95 | size of the outgoing arguments. This patch abstracts that | ||
96 | value into a new field of aarch64_frame. | ||
97 | |||
98 | gcc/ | ||
99 | * config/aarch64/aarch64.h (aarch64_frame::bytes_below_hard_fp): New | ||
100 | field. | ||
101 | * config/aarch64/aarch64.c (aarch64_layout_frame): Initialize it, | ||
102 | and use it instead of crtl->outgoing_args_size. | ||
103 | (aarch64_get_separate_components): Use bytes_below_hard_fp instead | ||
104 | of outgoing_args_size. | ||
105 | (aarch64_process_components): Likewise. | ||
106 | --- | ||
107 | gcc/config/aarch64/aarch64.c | 50 +++++++++++++++++++----------------- | ||
108 | gcc/config/aarch64/aarch64.h | 6 ++++- | ||
109 | 2 files changed, 32 insertions(+), 24 deletions(-) | ||
110 | |||
111 | diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c | ||
112 | index e9dad682738..25cf10cc4b9 100644 | ||
113 | --- a/gcc/config/aarch64/aarch64.c | ||
114 | +++ b/gcc/config/aarch64/aarch64.c | ||
115 | @@ -4684,6 +4684,8 @@ aarch64_layout_frame (void) | ||
116 | last_fp_reg = regno; | ||
117 | } | ||
118 | |||
119 | + cfun->machine->frame.bytes_below_hard_fp = crtl->outgoing_args_size; | ||
120 | + | ||
121 | if (cfun->machine->frame.emit_frame_chain) | ||
122 | { | ||
123 | /* FP and LR are placed in the linkage record. */ | ||
124 | @@ -4751,11 +4753,11 @@ aarch64_layout_frame (void) | ||
125 | STACK_BOUNDARY / BITS_PER_UNIT); | ||
126 | |||
127 | /* Both these values are already aligned. */ | ||
128 | - gcc_assert (multiple_p (crtl->outgoing_args_size, | ||
129 | + gcc_assert (multiple_p (cfun->machine->frame.bytes_below_hard_fp, | ||
130 | STACK_BOUNDARY / BITS_PER_UNIT)); | ||
131 | cfun->machine->frame.frame_size | ||
132 | = (cfun->machine->frame.hard_fp_offset | ||
133 | - + crtl->outgoing_args_size); | ||
134 | + + cfun->machine->frame.bytes_below_hard_fp); | ||
135 | |||
136 | cfun->machine->frame.locals_offset = cfun->machine->frame.saved_varargs_size; | ||
137 | |||
138 | @@ -4775,23 +4777,23 @@ aarch64_layout_frame (void) | ||
139 | cfun->machine->frame.initial_adjust = cfun->machine->frame.frame_size; | ||
140 | else if (cfun->machine->frame.frame_size.is_constant (&const_size) | ||
141 | && const_size < max_push_offset | ||
142 | - && known_eq (crtl->outgoing_args_size, 0)) | ||
143 | + && known_eq (cfun->machine->frame.bytes_below_hard_fp, 0)) | ||
144 | { | ||
145 | - /* Simple, small frame with no outgoing arguments: | ||
146 | + /* Simple, small frame with no data below the saved registers. | ||
147 | stp reg1, reg2, [sp, -frame_size]! | ||
148 | stp reg3, reg4, [sp, 16] */ | ||
149 | cfun->machine->frame.callee_adjust = const_size; | ||
150 | } | ||
151 | - else if (known_lt (crtl->outgoing_args_size | ||
152 | + else if (known_lt (cfun->machine->frame.bytes_below_hard_fp | ||
153 | + cfun->machine->frame.saved_regs_size, 512) | ||
154 | && !(cfun->calls_alloca | ||
155 | && known_lt (cfun->machine->frame.hard_fp_offset, | ||
156 | max_push_offset))) | ||
157 | { | ||
158 | - /* Frame with small outgoing arguments: | ||
159 | + /* Frame with small area below the saved registers: | ||
160 | sub sp, sp, frame_size | ||
161 | - stp reg1, reg2, [sp, outgoing_args_size] | ||
162 | - stp reg3, reg4, [sp, outgoing_args_size + 16] */ | ||
163 | + stp reg1, reg2, [sp, bytes_below_hard_fp] | ||
164 | + stp reg3, reg4, [sp, bytes_below_hard_fp + 16] */ | ||
165 | cfun->machine->frame.initial_adjust = cfun->machine->frame.frame_size; | ||
166 | cfun->machine->frame.callee_offset | ||
167 | = cfun->machine->frame.frame_size - cfun->machine->frame.hard_fp_offset; | ||
168 | @@ -4799,22 +4801,23 @@ aarch64_layout_frame (void) | ||
169 | else if (cfun->machine->frame.hard_fp_offset.is_constant (&const_fp_offset) | ||
170 | && const_fp_offset < max_push_offset) | ||
171 | { | ||
172 | - /* Frame with large outgoing arguments but a small local area: | ||
173 | + /* Frame with large area below the saved registers, but with a | ||
174 | + small area above: | ||
175 | stp reg1, reg2, [sp, -hard_fp_offset]! | ||
176 | stp reg3, reg4, [sp, 16] | ||
177 | - sub sp, sp, outgoing_args_size */ | ||
178 | + sub sp, sp, bytes_below_hard_fp */ | ||
179 | cfun->machine->frame.callee_adjust = const_fp_offset; | ||
180 | cfun->machine->frame.final_adjust | ||
181 | = cfun->machine->frame.frame_size - cfun->machine->frame.callee_adjust; | ||
182 | } | ||
183 | else | ||
184 | { | ||
185 | - /* Frame with large local area and outgoing arguments using frame pointer: | ||
186 | + /* General case: | ||
187 | sub sp, sp, hard_fp_offset | ||
188 | stp x29, x30, [sp, 0] | ||
189 | add x29, sp, 0 | ||
190 | stp reg3, reg4, [sp, 16] | ||
191 | - sub sp, sp, outgoing_args_size */ | ||
192 | + sub sp, sp, bytes_below_hard_fp */ | ||
193 | cfun->machine->frame.initial_adjust = cfun->machine->frame.hard_fp_offset; | ||
194 | cfun->machine->frame.final_adjust | ||
195 | = cfun->machine->frame.frame_size - cfun->machine->frame.initial_adjust; | ||
196 | @@ -5243,9 +5246,11 @@ aarch64_get_separate_components (void) | ||
197 | if (aarch64_register_saved_on_entry (regno)) | ||
198 | { | ||
199 | poly_int64 offset = cfun->machine->frame.reg_offset[regno]; | ||
200 | + | ||
201 | + /* Get the offset relative to the register we'll use. */ | ||
202 | if (!frame_pointer_needed) | ||
203 | - offset += cfun->machine->frame.frame_size | ||
204 | - - cfun->machine->frame.hard_fp_offset; | ||
205 | + offset += cfun->machine->frame.bytes_below_hard_fp; | ||
206 | + | ||
207 | /* Check that we can access the stack slot of the register with one | ||
208 | direct load with no adjustments needed. */ | ||
209 | if (offset_12bit_unsigned_scaled_p (DImode, offset)) | ||
210 | @@ -5367,8 +5372,8 @@ aarch64_process_components (sbitmap components, bool prologue_p) | ||
211 | rtx reg = gen_rtx_REG (mode, regno); | ||
212 | poly_int64 offset = cfun->machine->frame.reg_offset[regno]; | ||
213 | if (!frame_pointer_needed) | ||
214 | - offset += cfun->machine->frame.frame_size | ||
215 | - - cfun->machine->frame.hard_fp_offset; | ||
216 | + offset += cfun->machine->frame.bytes_below_hard_fp; | ||
217 | + | ||
218 | rtx addr = plus_constant (Pmode, ptr_reg, offset); | ||
219 | rtx mem = gen_frame_mem (mode, addr); | ||
220 | |||
221 | @@ -5410,8 +5415,7 @@ aarch64_process_components (sbitmap components, bool prologue_p) | ||
222 | /* REGNO2 can be saved/restored in a pair with REGNO. */ | ||
223 | rtx reg2 = gen_rtx_REG (mode, regno2); | ||
224 | if (!frame_pointer_needed) | ||
225 | - offset2 += cfun->machine->frame.frame_size | ||
226 | - - cfun->machine->frame.hard_fp_offset; | ||
227 | + offset2 += cfun->machine->frame.bytes_below_hard_fp; | ||
228 | rtx addr2 = plus_constant (Pmode, ptr_reg, offset2); | ||
229 | rtx mem2 = gen_frame_mem (mode, addr2); | ||
230 | rtx set2 = prologue_p ? gen_rtx_SET (mem2, reg2) | ||
231 | @@ -5478,10 +5482,10 @@ aarch64_stack_clash_protection_alloca_probe_range (void) | ||
232 | registers. If POLY_SIZE is not large enough to require a probe this function | ||
233 | will only adjust the stack. When allocating the stack space | ||
234 | FRAME_RELATED_P is then used to indicate if the allocation is frame related. | ||
235 | - FINAL_ADJUSTMENT_P indicates whether we are allocating the outgoing | ||
236 | - arguments. If we are then we ensure that any allocation larger than the ABI | ||
237 | - defined buffer needs a probe so that the invariant of having a 1KB buffer is | ||
238 | - maintained. | ||
239 | + FINAL_ADJUSTMENT_P indicates whether we are allocating the area below | ||
240 | + the saved registers. If we are then we ensure that any allocation | ||
241 | + larger than the ABI defined buffer needs a probe so that the | ||
242 | + invariant of having a 1KB buffer is maintained. | ||
243 | |||
244 | We emit barriers after each stack adjustment to prevent optimizations from | ||
245 | breaking the invariant that we never drop the stack more than a page. This | ||
246 | @@ -5671,7 +5675,7 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2, | ||
247 | /* Handle any residuals. Residuals of at least MIN_PROBE_THRESHOLD have to | ||
248 | be probed. This maintains the requirement that each page is probed at | ||
249 | least once. For initial probing we probe only if the allocation is | ||
250 | - more than GUARD_SIZE - buffer, and for the outgoing arguments we probe | ||
251 | + more than GUARD_SIZE - buffer, and below the saved registers we probe | ||
252 | if the amount is larger than buffer. GUARD_SIZE - buffer + buffer == | ||
253 | GUARD_SIZE. This works that for any allocation that is large enough to | ||
254 | trigger a probe here, we'll have at least one, and if they're not large | ||
255 | diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h | ||
256 | index af0bc3f1881..95831637ba7 100644 | ||
257 | --- a/gcc/config/aarch64/aarch64.h | ||
258 | +++ b/gcc/config/aarch64/aarch64.h | ||
259 | @@ -712,9 +712,13 @@ struct GTY (()) aarch64_frame | ||
260 | HOST_WIDE_INT saved_varargs_size; | ||
261 | |||
262 | /* The size of the saved callee-save int/FP registers. */ | ||
263 | - | ||
264 | HOST_WIDE_INT saved_regs_size; | ||
265 | |||
266 | + /* The number of bytes between the bottom of the static frame (the bottom | ||
267 | + of the outgoing arguments) and the hard frame pointer. This value is | ||
268 | + always a multiple of STACK_BOUNDARY. */ | ||
269 | + poly_int64 bytes_below_hard_fp; | ||
270 | + | ||
271 | /* Offset from the base of the frame (incomming SP) to the | ||
272 | top of the locals area. This value is always a multiple of | ||
273 | STACK_BOUNDARY. */ | ||
274 | -- | ||
275 | 2.34.1 | ||
276 | |||
277 | |||
278 | From 4604c4cd0a6c4c26d6594ec9a0383b4d9197d9df Mon Sep 17 00:00:00 2001 | ||
279 | From: Richard Sandiford <richard.sandiford@arm.com> | ||
280 | Date: Tue, 27 Jun 2023 11:25:40 +0100 | ||
281 | Subject: [PATCH 03/10] aarch64: Rename locals_offset to bytes_above_locals | ||
282 | MIME-Version: 1.0 | ||
283 | Content-Type: text/plain; charset=UTF-8 | ||
284 | Content-Transfer-Encoding: 8bit | ||
285 | |||
286 | locals_offset was described as: | ||
287 | |||
288 | /* Offset from the base of the frame (incomming SP) to the | ||
289 | top of the locals area. This value is always a multiple of | ||
290 | STACK_BOUNDARY. */ | ||
291 | |||
292 | This is implicitly an “upside down” view of the frame: the incoming | ||
293 | SP is at offset 0, and anything N bytes below the incoming SP is at | ||
294 | offset N (rather than -N). | ||
295 | |||
296 | However, reg_offset instead uses a “right way up” view; that is, | ||
297 | it views offsets in address terms. Something above X is at a | ||
298 | positive offset from X and something below X is at a negative | ||
299 | offset from X. | ||
300 | |||
301 | Also, even on FRAME_GROWS_DOWNWARD targets like AArch64, | ||
302 | target-independent code views offsets in address terms too: | ||
303 | locals are allocated at negative offsets to virtual_stack_vars. | ||
304 | |||
305 | It seems confusing to have *_offset fields of the same structure | ||
306 | using different polarities like this. This patch tries to avoid | ||
307 | that by renaming locals_offset to bytes_above_locals. | ||
308 | |||
309 | gcc/ | ||
310 | * config/aarch64/aarch64.h (aarch64_frame::locals_offset): Rename to... | ||
311 | (aarch64_frame::bytes_above_locals): ...this. | ||
312 | * config/aarch64/aarch64.c (aarch64_layout_frame) | ||
313 | (aarch64_initial_elimination_offset): Update accordingly. | ||
314 | --- | ||
315 | gcc/config/aarch64/aarch64.c | 9 +++++---- | ||
316 | gcc/config/aarch64/aarch64.h | 6 +++--- | ||
317 | 2 files changed, 8 insertions(+), 7 deletions(-) | ||
318 | |||
319 | diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c | ||
320 | index 25cf10cc4b9..dcaf491af42 100644 | ||
321 | --- a/gcc/config/aarch64/aarch64.c | ||
322 | +++ b/gcc/config/aarch64/aarch64.c | ||
323 | @@ -4759,7 +4759,8 @@ aarch64_layout_frame (void) | ||
324 | = (cfun->machine->frame.hard_fp_offset | ||
325 | + cfun->machine->frame.bytes_below_hard_fp); | ||
326 | |||
327 | - cfun->machine->frame.locals_offset = cfun->machine->frame.saved_varargs_size; | ||
328 | + cfun->machine->frame.bytes_above_locals | ||
329 | + = cfun->machine->frame.saved_varargs_size; | ||
330 | |||
331 | cfun->machine->frame.initial_adjust = 0; | ||
332 | cfun->machine->frame.final_adjust = 0; | ||
333 | @@ -8566,14 +8567,14 @@ aarch64_initial_elimination_offset (unsigned from, unsigned to) | ||
334 | |||
335 | if (from == FRAME_POINTER_REGNUM) | ||
336 | return cfun->machine->frame.hard_fp_offset | ||
337 | - - cfun->machine->frame.locals_offset; | ||
338 | + - cfun->machine->frame.bytes_above_locals; | ||
339 | } | ||
340 | |||
341 | if (to == STACK_POINTER_REGNUM) | ||
342 | { | ||
343 | if (from == FRAME_POINTER_REGNUM) | ||
344 | - return cfun->machine->frame.frame_size | ||
345 | - - cfun->machine->frame.locals_offset; | ||
346 | + return cfun->machine->frame.frame_size | ||
347 | + - cfun->machine->frame.bytes_above_locals; | ||
348 | } | ||
349 | |||
350 | return cfun->machine->frame.frame_size; | ||
351 | diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h | ||
352 | index 95831637ba7..a079a88b4f4 100644 | ||
353 | --- a/gcc/config/aarch64/aarch64.h | ||
354 | +++ b/gcc/config/aarch64/aarch64.h | ||
355 | @@ -719,10 +719,10 @@ struct GTY (()) aarch64_frame | ||
356 | always a multiple of STACK_BOUNDARY. */ | ||
357 | poly_int64 bytes_below_hard_fp; | ||
358 | |||
359 | - /* Offset from the base of the frame (incomming SP) to the | ||
360 | - top of the locals area. This value is always a multiple of | ||
361 | + /* The number of bytes between the top of the locals area and the top | ||
362 | + of the frame (the incomming SP). This value is always a multiple of | ||
363 | STACK_BOUNDARY. */ | ||
364 | - poly_int64 locals_offset; | ||
365 | + poly_int64 bytes_above_locals; | ||
366 | |||
367 | /* Offset from the base of the frame (incomming SP) to the | ||
368 | hard_frame_pointer. This value is always a multiple of | ||
369 | -- | ||
370 | 2.34.1 | ||
371 | |||
372 | |||
373 | From 16016465ff28a75f5e0540cbaeb4eb102fdc3230 Mon Sep 17 00:00:00 2001 | ||
374 | From: Richard Sandiford <richard.sandiford@arm.com> | ||
375 | Date: Tue, 27 Jun 2023 11:28:11 +0100 | ||
376 | Subject: [PATCH 04/10] aarch64: Rename hard_fp_offset to bytes_above_hard_fp | ||
377 | MIME-Version: 1.0 | ||
378 | Content-Type: text/plain; charset=UTF-8 | ||
379 | Content-Transfer-Encoding: 8bit | ||
380 | |||
381 | Similarly to the previous locals_offset patch, hard_fp_offset | ||
382 | was described as: | ||
383 | |||
384 | /* Offset from the base of the frame (incomming SP) to the | ||
385 | hard_frame_pointer. This value is always a multiple of | ||
386 | STACK_BOUNDARY. */ | ||
387 | poly_int64 hard_fp_offset; | ||
388 | |||
389 | which again took an “upside-down” view: higher offsets meant lower | ||
390 | addresses. This patch renames the field to bytes_above_hard_fp instead. | ||
391 | |||
392 | gcc/ | ||
393 | * config/aarch64/aarch64.h (aarch64_frame::hard_fp_offset): Rename | ||
394 | to... | ||
395 | (aarch64_frame::bytes_above_hard_fp): ...this. | ||
396 | * config/aarch64/aarch64.c (aarch64_layout_frame) | ||
397 | (aarch64_expand_prologue): Update accordingly. | ||
398 | (aarch64_initial_elimination_offset): Likewise. | ||
399 | --- | ||
400 | gcc/config/aarch64/aarch64.c | 21 +++++++++++---------- | ||
401 | gcc/config/aarch64/aarch64.h | 6 +++--- | ||
402 | 2 files changed, 14 insertions(+), 13 deletions(-) | ||
403 | |||
404 | diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c | ||
405 | index dcaf491af42..2681e0c2bb9 100644 | ||
406 | --- a/gcc/config/aarch64/aarch64.c | ||
407 | +++ b/gcc/config/aarch64/aarch64.c | ||
408 | @@ -4747,7 +4747,7 @@ aarch64_layout_frame (void) | ||
409 | HOST_WIDE_INT varargs_and_saved_regs_size | ||
410 | = offset + cfun->machine->frame.saved_varargs_size; | ||
411 | |||
412 | - cfun->machine->frame.hard_fp_offset | ||
413 | + cfun->machine->frame.bytes_above_hard_fp | ||
414 | = aligned_upper_bound (varargs_and_saved_regs_size | ||
415 | + get_frame_size (), | ||
416 | STACK_BOUNDARY / BITS_PER_UNIT); | ||
417 | @@ -4756,7 +4756,7 @@ aarch64_layout_frame (void) | ||
418 | gcc_assert (multiple_p (cfun->machine->frame.bytes_below_hard_fp, | ||
419 | STACK_BOUNDARY / BITS_PER_UNIT)); | ||
420 | cfun->machine->frame.frame_size | ||
421 | - = (cfun->machine->frame.hard_fp_offset | ||
422 | + = (cfun->machine->frame.bytes_above_hard_fp | ||
423 | + cfun->machine->frame.bytes_below_hard_fp); | ||
424 | |||
425 | cfun->machine->frame.bytes_above_locals | ||
426 | @@ -4788,7 +4788,7 @@ aarch64_layout_frame (void) | ||
427 | else if (known_lt (cfun->machine->frame.bytes_below_hard_fp | ||
428 | + cfun->machine->frame.saved_regs_size, 512) | ||
429 | && !(cfun->calls_alloca | ||
430 | - && known_lt (cfun->machine->frame.hard_fp_offset, | ||
431 | + && known_lt (cfun->machine->frame.bytes_above_hard_fp, | ||
432 | max_push_offset))) | ||
433 | { | ||
434 | /* Frame with small area below the saved registers: | ||
435 | @@ -4797,14 +4797,14 @@ aarch64_layout_frame (void) | ||
436 | stp reg3, reg4, [sp, bytes_below_hard_fp + 16] */ | ||
437 | cfun->machine->frame.initial_adjust = cfun->machine->frame.frame_size; | ||
438 | cfun->machine->frame.callee_offset | ||
439 | - = cfun->machine->frame.frame_size - cfun->machine->frame.hard_fp_offset; | ||
440 | + = cfun->machine->frame.frame_size - cfun->machine->frame.bytes_above_hard_fp; | ||
441 | } | ||
442 | - else if (cfun->machine->frame.hard_fp_offset.is_constant (&const_fp_offset) | ||
443 | + else if (cfun->machine->frame.bytes_above_hard_fp.is_constant (&const_fp_offset) | ||
444 | && const_fp_offset < max_push_offset) | ||
445 | { | ||
446 | /* Frame with large area below the saved registers, but with a | ||
447 | small area above: | ||
448 | - stp reg1, reg2, [sp, -hard_fp_offset]! | ||
449 | + stp reg1, reg2, [sp, -bytes_above_hard_fp]! | ||
450 | stp reg3, reg4, [sp, 16] | ||
451 | sub sp, sp, bytes_below_hard_fp */ | ||
452 | cfun->machine->frame.callee_adjust = const_fp_offset; | ||
453 | @@ -4814,12 +4814,13 @@ aarch64_layout_frame (void) | ||
454 | else | ||
455 | { | ||
456 | /* General case: | ||
457 | - sub sp, sp, hard_fp_offset | ||
458 | + sub sp, sp, bytes_above_hard_fp | ||
459 | stp x29, x30, [sp, 0] | ||
460 | add x29, sp, 0 | ||
461 | stp reg3, reg4, [sp, 16] | ||
462 | sub sp, sp, bytes_below_hard_fp */ | ||
463 | - cfun->machine->frame.initial_adjust = cfun->machine->frame.hard_fp_offset; | ||
464 | + cfun->machine->frame.initial_adjust | ||
465 | + = cfun->machine->frame.bytes_above_hard_fp; | ||
466 | cfun->machine->frame.final_adjust | ||
467 | = cfun->machine->frame.frame_size - cfun->machine->frame.initial_adjust; | ||
468 | } | ||
469 | @@ -8563,10 +8564,10 @@ aarch64_initial_elimination_offset (unsigned from, unsigned to) | ||
470 | if (to == HARD_FRAME_POINTER_REGNUM) | ||
471 | { | ||
472 | if (from == ARG_POINTER_REGNUM) | ||
473 | - return cfun->machine->frame.hard_fp_offset; | ||
474 | + return cfun->machine->frame.bytes_above_hard_fp; | ||
475 | |||
476 | if (from == FRAME_POINTER_REGNUM) | ||
477 | - return cfun->machine->frame.hard_fp_offset | ||
478 | + return cfun->machine->frame.bytes_above_hard_fp | ||
479 | - cfun->machine->frame.bytes_above_locals; | ||
480 | } | ||
481 | |||
482 | diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h | ||
483 | index a079a88b4f4..eab6da84a02 100644 | ||
484 | --- a/gcc/config/aarch64/aarch64.h | ||
485 | +++ b/gcc/config/aarch64/aarch64.h | ||
486 | @@ -724,10 +724,10 @@ struct GTY (()) aarch64_frame | ||
487 | STACK_BOUNDARY. */ | ||
488 | poly_int64 bytes_above_locals; | ||
489 | |||
490 | - /* Offset from the base of the frame (incomming SP) to the | ||
491 | - hard_frame_pointer. This value is always a multiple of | ||
492 | + /* The number of bytes between the hard_frame_pointer and the top of | ||
493 | + the frame (the incomming SP). This value is always a multiple of | ||
494 | STACK_BOUNDARY. */ | ||
495 | - poly_int64 hard_fp_offset; | ||
496 | + poly_int64 bytes_above_hard_fp; | ||
497 | |||
498 | /* The size of the frame. This value is the offset from base of the | ||
499 | frame (incomming SP) to the stack_pointer. This value is always | ||
500 | -- | ||
501 | 2.34.1 | ||
502 | |||
503 | |||
504 | From eb2271eb6bb68ec3c9aa9ae4746ea1ee5f18874a Mon Sep 17 00:00:00 2001 | ||
505 | From: Richard Sandiford <richard.sandiford@arm.com> | ||
506 | Date: Thu, 22 Jun 2023 22:26:30 +0100 | ||
507 | Subject: [PATCH 05/10] aarch64: Tweak frame_size comment | ||
508 | MIME-Version: 1.0 | ||
509 | Content-Type: text/plain; charset=UTF-8 | ||
510 | Content-Transfer-Encoding: 8bit | ||
511 | |||
512 | This patch fixes another case in which a value was described with | ||
513 | an “upside-down” view. | ||
514 | |||
515 | gcc/ | ||
516 | * config/aarch64/aarch64.h (aarch64_frame::frame_size): Tweak comment. | ||
517 | --- | ||
518 | gcc/config/aarch64/aarch64.h | 4 ++-- | ||
519 | 1 file changed, 2 insertions(+), 2 deletions(-) | ||
520 | |||
521 | diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h | ||
522 | index eab6da84a02..7c4b65ec55b 100644 | ||
523 | --- a/gcc/config/aarch64/aarch64.h | ||
524 | +++ b/gcc/config/aarch64/aarch64.h | ||
525 | @@ -729,8 +729,8 @@ struct GTY (()) aarch64_frame | ||
526 | STACK_BOUNDARY. */ | ||
527 | poly_int64 bytes_above_hard_fp; | ||
528 | |||
529 | - /* The size of the frame. This value is the offset from base of the | ||
530 | - frame (incomming SP) to the stack_pointer. This value is always | ||
531 | + /* The size of the frame, i.e. the number of bytes between the bottom | ||
532 | + of the outgoing arguments and the incoming SP. This value is always | ||
533 | a multiple of STACK_BOUNDARY. */ | ||
534 | poly_int64 frame_size; | ||
535 | |||
536 | -- | ||
537 | 2.34.1 | ||
538 | |||
539 | |||
540 | From cfed3b87e9351edff1568ade4ef666edc9887639 Mon Sep 17 00:00:00 2001 | ||
541 | From: Richard Sandiford <richard.sandiford@arm.com> | ||
542 | Date: Tue, 15 Aug 2023 19:05:30 +0100 | ||
543 | Subject: [PATCH 06/10] Backport check-function-bodies support | ||
544 | |||
545 | --- | ||
546 | gcc/testsuite/lib/scanasm.exp | 191 ++++++++++++++++++++++++++++++++++ | ||
547 | 1 file changed, 191 insertions(+) | ||
548 | |||
549 | diff --git a/gcc/testsuite/lib/scanasm.exp b/gcc/testsuite/lib/scanasm.exp | ||
550 | index 35ccbc86fc0..c9af27bf47a 100644 | ||
551 | --- a/gcc/testsuite/lib/scanasm.exp | ||
552 | +++ b/gcc/testsuite/lib/scanasm.exp | ||
553 | @@ -546,3 +546,194 @@ proc scan-lto-assembler { args } { | ||
554 | verbose "output_file: $output_file" | ||
555 | dg-scan "scan-lto-assembler" 1 $testcase $output_file $args | ||
556 | } | ||
557 | + | ||
558 | +# Read assembly file FILENAME and store a mapping from function names | ||
559 | +# to function bodies in array RESULT. FILENAME has already been uploaded | ||
560 | +# locally where necessary and is known to exist. | ||
561 | + | ||
562 | +proc parse_function_bodies { filename result } { | ||
563 | + upvar $result up_result | ||
564 | + | ||
565 | + # Regexp for the start of a function definition (name in \1). | ||
566 | + set label {^([a-zA-Z_]\S+):$} | ||
567 | + | ||
568 | + # Regexp for the end of a function definition. | ||
569 | + set terminator {^\s*\.size} | ||
570 | + | ||
571 | + # Regexp for lines that aren't interesting. | ||
572 | + set fluff {^\s*(?:\.|//|@|$)} | ||
573 | + | ||
574 | + set fd [open $filename r] | ||
575 | + set in_function 0 | ||
576 | + while { [gets $fd line] >= 0 } { | ||
577 | + if { [regexp $label $line dummy function_name] } { | ||
578 | + set in_function 1 | ||
579 | + set function_body "" | ||
580 | + } elseif { $in_function } { | ||
581 | + if { [regexp $terminator $line] } { | ||
582 | + set up_result($function_name) $function_body | ||
583 | + set in_function 0 | ||
584 | + } elseif { ![regexp $fluff $line] } { | ||
585 | + append function_body $line "\n" | ||
586 | + } | ||
587 | + } | ||
588 | + } | ||
589 | + close $fd | ||
590 | +} | ||
591 | + | ||
592 | +# FUNCTIONS is an array that maps function names to function bodies. | ||
593 | +# Return true if it contains a definition of function NAME and if | ||
594 | +# that definition matches BODY_REGEXP. | ||
595 | + | ||
596 | +proc check_function_body { functions name body_regexp } { | ||
597 | + upvar $functions up_functions | ||
598 | + | ||
599 | + if { ![info exists up_functions($name)] } { | ||
600 | + return 0 | ||
601 | + } | ||
602 | + set fn_res [regexp "^$body_regexp\$" $up_functions($name)] | ||
603 | + if { !$fn_res } { | ||
604 | + verbose -log "body: $body_regexp" | ||
605 | + verbose -log "against: $up_functions($name)" | ||
606 | + } | ||
607 | + return $fn_res | ||
608 | +} | ||
609 | + | ||
610 | +# Check the implementations of functions against expected output. Used as: | ||
611 | +# | ||
612 | +# { dg-do { check-function-bodies PREFIX TERMINATOR[ OPTION[ SELECTOR]] } } | ||
613 | +# | ||
614 | +# See sourcebuild.texi for details. | ||
615 | + | ||
616 | +proc check-function-bodies { args } { | ||
617 | + if { [llength $args] < 2 } { | ||
618 | + error "too few arguments to check-function-bodies" | ||
619 | + } | ||
620 | + if { [llength $args] > 4 } { | ||
621 | + error "too many arguments to check-function-bodies" | ||
622 | + } | ||
623 | + | ||
624 | + if { [llength $args] >= 3 } { | ||
625 | + set required_flags [lindex $args 2] | ||
626 | + | ||
627 | + upvar 2 dg-extra-tool-flags extra_tool_flags | ||
628 | + set flags $extra_tool_flags | ||
629 | + | ||
630 | + global torture_current_flags | ||
631 | + if { [info exists torture_current_flags] } { | ||
632 | + append flags " " $torture_current_flags | ||
633 | + } | ||
634 | + foreach required_flag $required_flags { | ||
635 | + switch -- $required_flag { | ||
636 | + target - | ||
637 | + xfail { | ||
638 | + error "misplaced $required_flag in check-function-bodies" | ||
639 | + } | ||
640 | + } | ||
641 | + } | ||
642 | + foreach required_flag $required_flags { | ||
643 | + if { ![regexp " $required_flag " $flags] } { | ||
644 | + return | ||
645 | + } | ||
646 | + } | ||
647 | + } | ||
648 | + | ||
649 | + set xfail_all 0 | ||
650 | + if { [llength $args] >= 4 } { | ||
651 | + switch [dg-process-target [lindex $args 3]] { | ||
652 | + "S" { } | ||
653 | + "N" { return } | ||
654 | + "F" { set xfail_all 1 } | ||
655 | + "P" { } | ||
656 | + } | ||
657 | + } | ||
658 | + | ||
659 | + set testcase [testname-for-summary] | ||
660 | + # The name might include a list of options; extract the file name. | ||
661 | + set filename [lindex $testcase 0] | ||
662 | + | ||
663 | + global srcdir | ||
664 | + set input_filename "$srcdir/$filename" | ||
665 | + set output_filename "[file rootname [file tail $filename]].s" | ||
666 | + | ||
667 | + set prefix [lindex $args 0] | ||
668 | + set prefix_len [string length $prefix] | ||
669 | + set terminator [lindex $args 1] | ||
670 | + if { [string equal $terminator ""] } { | ||
671 | + set terminator "*/" | ||
672 | + } | ||
673 | + set terminator_len [string length $terminator] | ||
674 | + | ||
675 | + set have_bodies 0 | ||
676 | + if { [is_remote host] } { | ||
677 | + remote_upload host "$filename" | ||
678 | + } | ||
679 | + if { [file exists $output_filename] } { | ||
680 | + parse_function_bodies $output_filename functions | ||
681 | + set have_bodies 1 | ||
682 | + } else { | ||
683 | + verbose -log "$testcase: output file does not exist" | ||
684 | + } | ||
685 | + | ||
686 | + set count 0 | ||
687 | + set function_regexp "" | ||
688 | + set label {^(\S+):$} | ||
689 | + | ||
690 | + set lineno 1 | ||
691 | + set fd [open $input_filename r] | ||
692 | + set in_function 0 | ||
693 | + while { [gets $fd line] >= 0 } { | ||
694 | + if { [string equal -length $prefix_len $line $prefix] } { | ||
695 | + set line [string trim [string range $line $prefix_len end]] | ||
696 | + if { !$in_function } { | ||
697 | + if { [regexp "^(.*?\\S)\\s+{(.*)}\$" $line dummy \ | ||
698 | + line selector] } { | ||
699 | + set selector [dg-process-target $selector] | ||
700 | + } else { | ||
701 | + set selector "P" | ||
702 | + } | ||
703 | + if { ![regexp $label $line dummy function_name] } { | ||
704 | + close $fd | ||
705 | + error "check-function-bodies: line $lineno does not have a function label" | ||
706 | + } | ||
707 | + set in_function 1 | ||
708 | + set function_regexp "" | ||
709 | + } elseif { [string equal $line "("] } { | ||
710 | + append function_regexp "(?:" | ||
711 | + } elseif { [string equal $line "|"] } { | ||
712 | + append function_regexp "|" | ||
713 | + } elseif { [string equal $line ")"] } { | ||
714 | + append function_regexp ")" | ||
715 | + } elseif { [string equal $line "..."] } { | ||
716 | + append function_regexp ".*" | ||
717 | + } else { | ||
718 | + append function_regexp "\t" $line "\n" | ||
719 | + } | ||
720 | + } elseif { [string equal -length $terminator_len $line $terminator] } { | ||
721 | + if { ![string equal $selector "N"] } { | ||
722 | + if { $xfail_all || [string equal $selector "F"] } { | ||
723 | + setup_xfail "*-*-*" | ||
724 | + } | ||
725 | + set testname "$testcase check-function-bodies $function_name" | ||
726 | + if { !$have_bodies } { | ||
727 | + unresolved $testname | ||
728 | + } elseif { [check_function_body functions $function_name \ | ||
729 | + $function_regexp] } { | ||
730 | + pass $testname | ||
731 | + } else { | ||
732 | + fail $testname | ||
733 | + } | ||
734 | + } | ||
735 | + set in_function 0 | ||
736 | + incr count | ||
737 | + } | ||
738 | + incr lineno | ||
739 | + } | ||
740 | + close $fd | ||
741 | + if { $in_function } { | ||
742 | + error "check-function-bodies: missing \"$terminator\"" | ||
743 | + } | ||
744 | + if { $count == 0 } { | ||
745 | + error "check-function-bodies: no matches found" | ||
746 | + } | ||
747 | +} | ||
748 | -- | ||
749 | 2.34.1 | ||
750 | |||
751 | |||
752 | From 4dd8925d95d3d6d89779b494b5f4cfadcf9fa96e Mon Sep 17 00:00:00 2001 | ||
753 | From: Richard Sandiford <richard.sandiford@arm.com> | ||
754 | Date: Tue, 27 Jun 2023 15:11:44 +0100 | ||
755 | Subject: [PATCH 07/10] aarch64: Tweak stack clash boundary condition | ||
756 | |||
757 | The AArch64 ABI says that, when stack clash protection is used, | ||
758 | there can be a maximum of 1KiB of unprobed space at sp on entry | ||
759 | to a function. Therefore, we need to probe when allocating | ||
760 | >= guard_size - 1KiB of data (>= rather than >). This is what | ||
761 | GCC does. | ||
762 | |||
763 | If an allocation is exactly guard_size bytes, it is enough to allocate | ||
764 | those bytes and probe once at offset 1024. It isn't possible to use a | ||
765 | single probe at any other offset: higher would conmplicate later code, | ||
766 | by leaving more unprobed space than usual, while lower would risk | ||
767 | leaving an entire page unprobed. For simplicity, the code probes all | ||
768 | allocations at offset 1024. | ||
769 | |||
770 | Some register saves also act as probes. If we need to allocate | ||
771 | more space below the last such register save probe, we need to | ||
772 | probe the allocation if it is > 1KiB. Again, this allocation is | ||
773 | then sometimes (but not always) probed at offset 1024. This sort of | ||
774 | allocation is currently only used for outgoing arguments, which are | ||
775 | rarely this big. | ||
776 | |||
777 | However, the code also probed if this final outgoing-arguments | ||
778 | allocation was == 1KiB, rather than just > 1KiB. This isn't | ||
779 | necessary, since the register save then probes at offset 1024 | ||
780 | as required. Continuing to probe allocations of exactly 1KiB | ||
781 | would complicate later patches. | ||
782 | |||
783 | gcc/ | ||
784 | * config/aarch64/aarch64.c (aarch64_allocate_and_probe_stack_space): | ||
785 | Don't probe final allocations that are exactly 1KiB in size (after | ||
786 | unprobed space above the final allocation has been deducted). | ||
787 | |||
788 | gcc/testsuite/ | ||
789 | * gcc.target/aarch64/stack-check-prologue-17.c: New test. | ||
790 | --- | ||
791 | gcc/config/aarch64/aarch64.c | 6 +- | ||
792 | .../aarch64/stack-check-prologue-17.c | 55 +++++++++++++++++++ | ||
793 | 2 files changed, 60 insertions(+), 1 deletion(-) | ||
794 | create mode 100644 gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c | ||
795 | |||
796 | diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c | ||
797 | index 2681e0c2bb9..4c9e11cd7cf 100644 | ||
798 | --- a/gcc/config/aarch64/aarch64.c | ||
799 | +++ b/gcc/config/aarch64/aarch64.c | ||
800 | @@ -5506,6 +5506,8 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2, | ||
801 | HOST_WIDE_INT guard_size | ||
802 | = 1 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_GUARD_SIZE); | ||
803 | HOST_WIDE_INT guard_used_by_caller = STACK_CLASH_CALLER_GUARD; | ||
804 | + HOST_WIDE_INT byte_sp_alignment = STACK_BOUNDARY / BITS_PER_UNIT; | ||
805 | + gcc_assert (multiple_p (poly_size, byte_sp_alignment)); | ||
806 | /* When doing the final adjustment for the outgoing argument size we can't | ||
807 | assume that LR was saved at position 0. So subtract it's offset from the | ||
808 | ABI safe buffer so that we don't accidentally allow an adjustment that | ||
809 | @@ -5513,7 +5515,9 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2, | ||
810 | probing. */ | ||
811 | HOST_WIDE_INT min_probe_threshold | ||
812 | = final_adjustment_p | ||
813 | - ? guard_used_by_caller - cfun->machine->frame.reg_offset[LR_REGNUM] | ||
814 | + ? (guard_used_by_caller | ||
815 | + + byte_sp_alignment | ||
816 | + - cfun->machine->frame.reg_offset[LR_REGNUM]) | ||
817 | : guard_size - guard_used_by_caller; | ||
818 | |||
819 | poly_int64 frame_size = cfun->machine->frame.frame_size; | ||
820 | diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c | ||
821 | new file mode 100644 | ||
822 | index 00000000000..0d8a25d73a2 | ||
823 | --- /dev/null | ||
824 | +++ b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c | ||
825 | @@ -0,0 +1,55 @@ | ||
826 | +/* { dg-options "-O2 -fstack-clash-protection -fomit-frame-pointer --param stack-clash-protection-guard-size=12" } */ | ||
827 | +/* { dg-final { check-function-bodies "**" "" } } */ | ||
828 | + | ||
829 | +void f(int, ...); | ||
830 | +void g(); | ||
831 | + | ||
832 | +/* | ||
833 | +** test1: | ||
834 | +** ... | ||
835 | +** str x30, \[sp\] | ||
836 | +** sub sp, sp, #1024 | ||
837 | +** cbnz w0, .* | ||
838 | +** bl g | ||
839 | +** ... | ||
840 | +*/ | ||
841 | +int test1(int z) { | ||
842 | + __uint128_t x = 0; | ||
843 | + int y[0x400]; | ||
844 | + if (z) | ||
845 | + { | ||
846 | + f(0, 0, 0, 0, 0, 0, 0, &y, | ||
847 | + x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, | ||
848 | + x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, | ||
849 | + x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, | ||
850 | + x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x); | ||
851 | + } | ||
852 | + g(); | ||
853 | + return 1; | ||
854 | +} | ||
855 | + | ||
856 | +/* | ||
857 | +** test2: | ||
858 | +** ... | ||
859 | +** str x30, \[sp\] | ||
860 | +** sub sp, sp, #1040 | ||
861 | +** str xzr, \[sp\] | ||
862 | +** cbnz w0, .* | ||
863 | +** bl g | ||
864 | +** ... | ||
865 | +*/ | ||
866 | +int test2(int z) { | ||
867 | + __uint128_t x = 0; | ||
868 | + int y[0x400]; | ||
869 | + if (z) | ||
870 | + { | ||
871 | + f(0, 0, 0, 0, 0, 0, 0, &y, | ||
872 | + x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, | ||
873 | + x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, | ||
874 | + x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, | ||
875 | + x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, | ||
876 | + x); | ||
877 | + } | ||
878 | + g(); | ||
879 | + return 1; | ||
880 | +} | ||
881 | -- | ||
882 | 2.34.1 | ||
883 | |||
884 | |||
885 | From 12517baf6c88447e3bda3a459ac4c29d61f84e6c Mon Sep 17 00:00:00 2001 | ||
886 | From: Richard Sandiford <richard.sandiford@arm.com> | ||
887 | Date: Tue, 27 Jun 2023 15:12:55 +0100 | ||
888 | Subject: [PATCH 08/10] aarch64: Put LR save probe in first 16 bytes | ||
889 | |||
890 | -fstack-clash-protection uses the save of LR as a probe for the next | ||
891 | allocation. The next allocation could be: | ||
892 | |||
893 | * another part of the static frame, e.g. when allocating SVE save slots | ||
894 | or outgoing arguments | ||
895 | |||
896 | * an alloca in the same function | ||
897 | |||
898 | * an allocation made by a callee function | ||
899 | |||
900 | However, when -fomit-frame-pointer is used, the LR save slot is placed | ||
901 | above the other GPR save slots. It could therefore be up to 80 bytes | ||
902 | above the base of the GPR save area (which is also the hard fp address). | ||
903 | |||
904 | aarch64_allocate_and_probe_stack_space took this into account when | ||
905 | deciding how much subsequent space could be allocated without needing | ||
906 | a probe. However, it interacted badly with: | ||
907 | |||
908 | /* If doing a small final adjustment, we always probe at offset 0. | ||
909 | This is done to avoid issues when LR is not at position 0 or when | ||
910 | the final adjustment is smaller than the probing offset. */ | ||
911 | else if (final_adjustment_p && rounded_size == 0) | ||
912 | residual_probe_offset = 0; | ||
913 | |||
914 | which forces any allocation that is smaller than the guard page size | ||
915 | to be probed at offset 0 rather than the usual offset 1024. It was | ||
916 | therefore possible to construct cases in which we had: | ||
917 | |||
918 | * a probe using LR at SP + 80 bytes (or some other value >= 16) | ||
919 | * an allocation of the guard page size - 16 bytes | ||
920 | * a probe at SP + 0 | ||
921 | |||
922 | which allocates guard page size + 64 consecutive unprobed bytes. | ||
923 | |||
924 | This patch requires the LR probe to be in the first 16 bytes of the | ||
925 | save area when stack clash protection is active. Doing it | ||
926 | unconditionally would cause code-quality regressions. | ||
927 | |||
928 | gcc/ | ||
929 | * config/aarch64/aarch64.c (aarch64_layout_frame): Ensure that | ||
930 | the LR save slot is in the first 16 bytes of the register save area. | ||
931 | (aarch64_allocate_and_probe_stack_space): Remove workaround for | ||
932 | when LR was not in the first 16 bytes. | ||
933 | |||
934 | gcc/testsuite/ | ||
935 | * gcc.target/aarch64/stack-check-prologue-18.c: New test. | ||
936 | --- | ||
937 | gcc/config/aarch64/aarch64.c | 50 +++++---- | ||
938 | .../aarch64/stack-check-prologue-18.c | 100 ++++++++++++++++++ | ||
939 | 2 files changed, 127 insertions(+), 23 deletions(-) | ||
940 | create mode 100644 gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c | ||
941 | |||
942 | diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c | ||
943 | index 4c9e11cd7cf..1e8467fdd03 100644 | ||
944 | --- a/gcc/config/aarch64/aarch64.c | ||
945 | +++ b/gcc/config/aarch64/aarch64.c | ||
946 | @@ -4686,15 +4686,31 @@ aarch64_layout_frame (void) | ||
947 | |||
948 | cfun->machine->frame.bytes_below_hard_fp = crtl->outgoing_args_size; | ||
949 | |||
950 | +#define ALLOCATE_GPR_SLOT(REGNO) \ | ||
951 | + do \ | ||
952 | + { \ | ||
953 | + cfun->machine->frame.reg_offset[REGNO] = offset; \ | ||
954 | + if (cfun->machine->frame.wb_candidate1 == INVALID_REGNUM) \ | ||
955 | + cfun->machine->frame.wb_candidate1 = (REGNO); \ | ||
956 | + else if (cfun->machine->frame.wb_candidate2 == INVALID_REGNUM) \ | ||
957 | + cfun->machine->frame.wb_candidate2 = (REGNO); \ | ||
958 | + offset += UNITS_PER_WORD; \ | ||
959 | + } \ | ||
960 | + while (0) | ||
961 | + | ||
962 | if (cfun->machine->frame.emit_frame_chain) | ||
963 | { | ||
964 | /* FP and LR are placed in the linkage record. */ | ||
965 | - cfun->machine->frame.reg_offset[R29_REGNUM] = 0; | ||
966 | - cfun->machine->frame.wb_candidate1 = R29_REGNUM; | ||
967 | - cfun->machine->frame.reg_offset[R30_REGNUM] = UNITS_PER_WORD; | ||
968 | - cfun->machine->frame.wb_candidate2 = R30_REGNUM; | ||
969 | - offset = 2 * UNITS_PER_WORD; | ||
970 | + ALLOCATE_GPR_SLOT (R29_REGNUM); | ||
971 | + ALLOCATE_GPR_SLOT (R30_REGNUM); | ||
972 | } | ||
973 | + else if (flag_stack_clash_protection | ||
974 | + && cfun->machine->frame.reg_offset[R30_REGNUM] == SLOT_REQUIRED) | ||
975 | + /* Put the LR save slot first, since it makes a good choice of probe | ||
976 | + for stack clash purposes. The idea is that the link register usually | ||
977 | + has to be saved before a call anyway, and so we lose little by | ||
978 | + stopping it from being individually shrink-wrapped. */ | ||
979 | + ALLOCATE_GPR_SLOT (R30_REGNUM); | ||
980 | |||
981 | /* With stack-clash, LR must be saved in non-leaf functions. */ | ||
982 | gcc_assert (crtl->is_leaf | ||
983 | @@ -4704,14 +4720,9 @@ aarch64_layout_frame (void) | ||
984 | /* Now assign stack slots for them. */ | ||
985 | for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++) | ||
986 | if (cfun->machine->frame.reg_offset[regno] == SLOT_REQUIRED) | ||
987 | - { | ||
988 | - cfun->machine->frame.reg_offset[regno] = offset; | ||
989 | - if (cfun->machine->frame.wb_candidate1 == INVALID_REGNUM) | ||
990 | - cfun->machine->frame.wb_candidate1 = regno; | ||
991 | - else if (cfun->machine->frame.wb_candidate2 == INVALID_REGNUM) | ||
992 | - cfun->machine->frame.wb_candidate2 = regno; | ||
993 | - offset += UNITS_PER_WORD; | ||
994 | - } | ||
995 | + ALLOCATE_GPR_SLOT (regno); | ||
996 | + | ||
997 | +#undef ALLOCATE_GPR_SLOT | ||
998 | |||
999 | HOST_WIDE_INT max_int_offset = offset; | ||
1000 | offset = ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT); | ||
1001 | @@ -5508,16 +5519,9 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2, | ||
1002 | HOST_WIDE_INT guard_used_by_caller = STACK_CLASH_CALLER_GUARD; | ||
1003 | HOST_WIDE_INT byte_sp_alignment = STACK_BOUNDARY / BITS_PER_UNIT; | ||
1004 | gcc_assert (multiple_p (poly_size, byte_sp_alignment)); | ||
1005 | - /* When doing the final adjustment for the outgoing argument size we can't | ||
1006 | - assume that LR was saved at position 0. So subtract it's offset from the | ||
1007 | - ABI safe buffer so that we don't accidentally allow an adjustment that | ||
1008 | - would result in an allocation larger than the ABI buffer without | ||
1009 | - probing. */ | ||
1010 | HOST_WIDE_INT min_probe_threshold | ||
1011 | = final_adjustment_p | ||
1012 | - ? (guard_used_by_caller | ||
1013 | - + byte_sp_alignment | ||
1014 | - - cfun->machine->frame.reg_offset[LR_REGNUM]) | ||
1015 | + ? guard_used_by_caller + byte_sp_alignment | ||
1016 | : guard_size - guard_used_by_caller; | ||
1017 | |||
1018 | poly_int64 frame_size = cfun->machine->frame.frame_size; | ||
1019 | @@ -5697,8 +5701,8 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2, | ||
1020 | if (final_adjustment_p && rounded_size != 0) | ||
1021 | min_probe_threshold = 0; | ||
1022 | /* If doing a small final adjustment, we always probe at offset 0. | ||
1023 | - This is done to avoid issues when LR is not at position 0 or when | ||
1024 | - the final adjustment is smaller than the probing offset. */ | ||
1025 | + This is done to avoid issues when the final adjustment is smaller | ||
1026 | + than the probing offset. */ | ||
1027 | else if (final_adjustment_p && rounded_size == 0) | ||
1028 | residual_probe_offset = 0; | ||
1029 | |||
1030 | diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c | ||
1031 | new file mode 100644 | ||
1032 | index 00000000000..82447d20fff | ||
1033 | --- /dev/null | ||
1034 | +++ b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c | ||
1035 | @@ -0,0 +1,100 @@ | ||
1036 | +/* { dg-options "-O2 -fstack-clash-protection -fomit-frame-pointer --param stack-clash-protection-guard-size=12" } */ | ||
1037 | +/* { dg-final { check-function-bodies "**" "" } } */ | ||
1038 | + | ||
1039 | +void f(int, ...); | ||
1040 | +void g(); | ||
1041 | + | ||
1042 | +/* | ||
1043 | +** test1: | ||
1044 | +** ... | ||
1045 | +** str x30, \[sp\] | ||
1046 | +** sub sp, sp, #4064 | ||
1047 | +** str xzr, \[sp\] | ||
1048 | +** cbnz w0, .* | ||
1049 | +** bl g | ||
1050 | +** ... | ||
1051 | +** str x26, \[sp, #?4128\] | ||
1052 | +** ... | ||
1053 | +*/ | ||
1054 | +int test1(int z) { | ||
1055 | + __uint128_t x = 0; | ||
1056 | + int y[0x400]; | ||
1057 | + if (z) | ||
1058 | + { | ||
1059 | + asm volatile ("" ::: | ||
1060 | + "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26"); | ||
1061 | + f(0, 0, 0, 0, 0, 0, 0, &y, | ||
1062 | + x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, | ||
1063 | + x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, | ||
1064 | + x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, | ||
1065 | + x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, | ||
1066 | + x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, | ||
1067 | + x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, | ||
1068 | + x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, | ||
1069 | + x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, | ||
1070 | + x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, | ||
1071 | + x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, | ||
1072 | + x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, | ||
1073 | + x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, | ||
1074 | + x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, | ||
1075 | + x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, | ||
1076 | + x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, | ||
1077 | + x, x, x, x, x, x, x, x, x, x, x, x, x, x); | ||
1078 | + } | ||
1079 | + g(); | ||
1080 | + return 1; | ||
1081 | +} | ||
1082 | + | ||
1083 | +/* | ||
1084 | +** test2: | ||
1085 | +** ... | ||
1086 | +** str x30, \[sp\] | ||
1087 | +** sub sp, sp, #1040 | ||
1088 | +** str xzr, \[sp\] | ||
1089 | +** cbnz w0, .* | ||
1090 | +** bl g | ||
1091 | +** ... | ||
1092 | +*/ | ||
1093 | +int test2(int z) { | ||
1094 | + __uint128_t x = 0; | ||
1095 | + int y[0x400]; | ||
1096 | + if (z) | ||
1097 | + { | ||
1098 | + asm volatile ("" ::: | ||
1099 | + "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26"); | ||
1100 | + f(0, 0, 0, 0, 0, 0, 0, &y, | ||
1101 | + x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, | ||
1102 | + x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, | ||
1103 | + x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, | ||
1104 | + x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, | ||
1105 | + x); | ||
1106 | + } | ||
1107 | + g(); | ||
1108 | + return 1; | ||
1109 | +} | ||
1110 | + | ||
1111 | +/* | ||
1112 | +** test3: | ||
1113 | +** ... | ||
1114 | +** str x30, \[sp\] | ||
1115 | +** sub sp, sp, #1024 | ||
1116 | +** cbnz w0, .* | ||
1117 | +** bl g | ||
1118 | +** ... | ||
1119 | +*/ | ||
1120 | +int test3(int z) { | ||
1121 | + __uint128_t x = 0; | ||
1122 | + int y[0x400]; | ||
1123 | + if (z) | ||
1124 | + { | ||
1125 | + asm volatile ("" ::: | ||
1126 | + "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26"); | ||
1127 | + f(0, 0, 0, 0, 0, 0, 0, &y, | ||
1128 | + x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, | ||
1129 | + x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, | ||
1130 | + x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, | ||
1131 | + x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x); | ||
1132 | + } | ||
1133 | + g(); | ||
1134 | + return 1; | ||
1135 | +} | ||
1136 | -- | ||
1137 | 2.34.1 | ||
1138 | |||
1139 | |||
1140 | From f2684e63652bb251d22c79e40081c646df1f36b6 Mon Sep 17 00:00:00 2001 | ||
1141 | From: Richard Sandiford <richard.sandiford@arm.com> | ||
1142 | Date: Tue, 8 Aug 2023 01:57:26 +0100 | ||
1143 | Subject: [PATCH 09/10] aarch64: Simplify probe of final frame allocation | ||
1144 | |||
1145 | Previous patches ensured that the final frame allocation only needs | ||
1146 | a probe when the size is strictly greater than 1KiB. It's therefore | ||
1147 | safe to use the normal 1024 probe offset in all cases. | ||
1148 | |||
1149 | The main motivation for doing this is to simplify the code and | ||
1150 | remove the number of special cases. | ||
1151 | |||
1152 | gcc/ | ||
1153 | * config/aarch64/aarch64.c (aarch64_allocate_and_probe_stack_space): | ||
1154 | Always probe the residual allocation at offset 1024, asserting | ||
1155 | that that is in range. | ||
1156 | |||
1157 | gcc/testsuite/ | ||
1158 | * gcc.target/aarch64/stack-check-prologue-17.c: Expect the probe | ||
1159 | to be at offset 1024 rather than offset 0. | ||
1160 | * gcc.target/aarch64/stack-check-prologue-18.c: Likewise. | ||
1161 | --- | ||
1162 | gcc/config/aarch64/aarch64.c | 12 ++++-------- | ||
1163 | .../gcc.target/aarch64/stack-check-prologue-17.c | 2 +- | ||
1164 | .../gcc.target/aarch64/stack-check-prologue-18.c | 7 +++++-- | ||
1165 | 3 files changed, 10 insertions(+), 11 deletions(-) | ||
1166 | |||
1167 | diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c | ||
1168 | index 1e8467fdd03..705f719a2ea 100644 | ||
1169 | --- a/gcc/config/aarch64/aarch64.c | ||
1170 | +++ b/gcc/config/aarch64/aarch64.c | ||
1171 | @@ -5695,16 +5695,12 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2, | ||
1172 | are still safe. */ | ||
1173 | if (residual) | ||
1174 | { | ||
1175 | - HOST_WIDE_INT residual_probe_offset = guard_used_by_caller; | ||
1176 | + gcc_assert (guard_used_by_caller + byte_sp_alignment <= size); | ||
1177 | + | ||
1178 | /* If we're doing final adjustments, and we've done any full page | ||
1179 | allocations then any residual needs to be probed. */ | ||
1180 | if (final_adjustment_p && rounded_size != 0) | ||
1181 | min_probe_threshold = 0; | ||
1182 | - /* If doing a small final adjustment, we always probe at offset 0. | ||
1183 | - This is done to avoid issues when the final adjustment is smaller | ||
1184 | - than the probing offset. */ | ||
1185 | - else if (final_adjustment_p && rounded_size == 0) | ||
1186 | - residual_probe_offset = 0; | ||
1187 | |||
1188 | aarch64_sub_sp (temp1, temp2, residual, frame_related_p); | ||
1189 | if (residual >= min_probe_threshold) | ||
1190 | @@ -5715,8 +5711,8 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2, | ||
1191 | HOST_WIDE_INT_PRINT_DEC " bytes, probing will be required." | ||
1192 | "\n", residual); | ||
1193 | |||
1194 | - emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx, | ||
1195 | - residual_probe_offset)); | ||
1196 | + emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx, | ||
1197 | + guard_used_by_caller)); | ||
1198 | emit_insn (gen_blockage ()); | ||
1199 | } | ||
1200 | } | ||
1201 | diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c | ||
1202 | index 0d8a25d73a2..f0ec1389771 100644 | ||
1203 | --- a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c | ||
1204 | +++ b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c | ||
1205 | @@ -33,7 +33,7 @@ int test1(int z) { | ||
1206 | ** ... | ||
1207 | ** str x30, \[sp\] | ||
1208 | ** sub sp, sp, #1040 | ||
1209 | -** str xzr, \[sp\] | ||
1210 | +** str xzr, \[sp, #?1024\] | ||
1211 | ** cbnz w0, .* | ||
1212 | ** bl g | ||
1213 | ** ... | ||
1214 | diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c | ||
1215 | index 82447d20fff..71d33ba34e9 100644 | ||
1216 | --- a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c | ||
1217 | +++ b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c | ||
1218 | @@ -8,8 +8,9 @@ void g(); | ||
1219 | ** test1: | ||
1220 | ** ... | ||
1221 | ** str x30, \[sp\] | ||
1222 | +** ... | ||
1223 | ** sub sp, sp, #4064 | ||
1224 | -** str xzr, \[sp\] | ||
1225 | +** str xzr, \[sp, #?1024\] | ||
1226 | ** cbnz w0, .* | ||
1227 | ** bl g | ||
1228 | ** ... | ||
1229 | @@ -49,8 +50,9 @@ int test1(int z) { | ||
1230 | ** test2: | ||
1231 | ** ... | ||
1232 | ** str x30, \[sp\] | ||
1233 | +** ... | ||
1234 | ** sub sp, sp, #1040 | ||
1235 | -** str xzr, \[sp\] | ||
1236 | +** str xzr, \[sp, #?1024\] | ||
1237 | ** cbnz w0, .* | ||
1238 | ** bl g | ||
1239 | ** ... | ||
1240 | @@ -77,6 +79,7 @@ int test2(int z) { | ||
1241 | ** test3: | ||
1242 | ** ... | ||
1243 | ** str x30, \[sp\] | ||
1244 | +** ... | ||
1245 | ** sub sp, sp, #1024 | ||
1246 | ** cbnz w0, .* | ||
1247 | ** bl g | ||
1248 | -- | ||
1249 | 2.34.1 | ||
1250 | |||
1251 | |||
1252 | From bf3eeaa0182a92987570d9c787bd45079eebf528 Mon Sep 17 00:00:00 2001 | ||
1253 | From: Richard Sandiford <richard.sandiford@arm.com> | ||
1254 | Date: Thu, 15 Jun 2023 19:16:52 +0100 | ||
1255 | Subject: [PATCH 10/10] aarch64: Make stack smash canary protect saved | ||
1256 | registers | ||
1257 | |||
1258 | AArch64 normally puts the saved registers near the bottom of the frame, | ||
1259 | immediately above any dynamic allocations. But this means that a | ||
1260 | stack-smash attack on those dynamic allocations could overwrite the | ||
1261 | saved registers without needing to reach as far as the stack smash | ||
1262 | canary. | ||
1263 | |||
1264 | The same thing could also happen for variable-sized arguments that are | ||
1265 | passed by value, since those are allocated before a call and popped on | ||
1266 | return. | ||
1267 | |||
1268 | This patch avoids that by putting the locals (and thus the canary) below | ||
1269 | the saved registers when stack smash protection is active. | ||
1270 | |||
1271 | The patch fixes CVE-2023-4039. | ||
1272 | |||
1273 | gcc/ | ||
1274 | * config/aarch64/aarch64.c (aarch64_save_regs_above_locals_p): | ||
1275 | New function. | ||
1276 | (aarch64_layout_frame): Use it to decide whether locals should | ||
1277 | go above or below the saved registers. | ||
1278 | (aarch64_expand_prologue): Update stack layout comment. | ||
1279 | Emit a stack tie after the final adjustment. | ||
1280 | |||
1281 | gcc/testsuite/ | ||
1282 | * gcc.target/aarch64/stack-protector-8.c: New test. | ||
1283 | * gcc.target/aarch64/stack-protector-9.c: Likewise. | ||
1284 | --- | ||
1285 | gcc/config/aarch64/aarch64.c | 46 +++++++++++++-- | ||
1286 | .../gcc.target/aarch64/stack-protector-8.c | 58 +++++++++++++++++++ | ||
1287 | .../gcc.target/aarch64/stack-protector-9.c | 33 +++++++++++ | ||
1288 | 3 files changed, 133 insertions(+), 4 deletions(-) | ||
1289 | create mode 100644 gcc/testsuite/gcc.target/aarch64/stack-protector-8.c | ||
1290 | create mode 100644 gcc/testsuite/gcc.target/aarch64/stack-protector-9.c | ||
1291 | |||
1292 | diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c | ||
1293 | index 705f719a2ea..3d094214fac 100644 | ||
1294 | --- a/gcc/config/aarch64/aarch64.c | ||
1295 | +++ b/gcc/config/aarch64/aarch64.c | ||
1296 | @@ -4622,6 +4622,20 @@ aarch64_needs_frame_chain (void) | ||
1297 | return aarch64_use_frame_pointer; | ||
1298 | } | ||
1299 | |||
1300 | +/* Return true if the current function should save registers above | ||
1301 | + the locals area, rather than below it. */ | ||
1302 | + | ||
1303 | +static bool | ||
1304 | +aarch64_save_regs_above_locals_p () | ||
1305 | +{ | ||
1306 | + /* When using stack smash protection, make sure that the canary slot | ||
1307 | + comes between the locals and the saved registers. Otherwise, | ||
1308 | + it would be possible for a carefully sized smash attack to change | ||
1309 | + the saved registers (particularly LR and FP) without reaching the | ||
1310 | + canary. */ | ||
1311 | + return crtl->stack_protect_guard; | ||
1312 | +} | ||
1313 | + | ||
1314 | /* Mark the registers that need to be saved by the callee and calculate | ||
1315 | the size of the callee-saved registers area and frame record (both FP | ||
1316 | and LR may be omitted). */ | ||
1317 | @@ -4686,6 +4700,16 @@ aarch64_layout_frame (void) | ||
1318 | |||
1319 | cfun->machine->frame.bytes_below_hard_fp = crtl->outgoing_args_size; | ||
1320 | |||
1321 | + bool regs_at_top_p = aarch64_save_regs_above_locals_p (); | ||
1322 | + | ||
1323 | + if (regs_at_top_p) | ||
1324 | + { | ||
1325 | + cfun->machine->frame.bytes_below_hard_fp += get_frame_size (); | ||
1326 | + cfun->machine->frame.bytes_below_hard_fp | ||
1327 | + = aligned_upper_bound (cfun->machine->frame.bytes_below_hard_fp, | ||
1328 | + STACK_BOUNDARY / BITS_PER_UNIT); | ||
1329 | + } | ||
1330 | + | ||
1331 | #define ALLOCATE_GPR_SLOT(REGNO) \ | ||
1332 | do \ | ||
1333 | { \ | ||
1334 | @@ -4758,9 +4782,11 @@ aarch64_layout_frame (void) | ||
1335 | HOST_WIDE_INT varargs_and_saved_regs_size | ||
1336 | = offset + cfun->machine->frame.saved_varargs_size; | ||
1337 | |||
1338 | + cfun->machine->frame.bytes_above_hard_fp = varargs_and_saved_regs_size; | ||
1339 | + if (!regs_at_top_p) | ||
1340 | + cfun->machine->frame.bytes_above_hard_fp += get_frame_size (); | ||
1341 | cfun->machine->frame.bytes_above_hard_fp | ||
1342 | - = aligned_upper_bound (varargs_and_saved_regs_size | ||
1343 | - + get_frame_size (), | ||
1344 | + = aligned_upper_bound (cfun->machine->frame.bytes_above_hard_fp, | ||
1345 | STACK_BOUNDARY / BITS_PER_UNIT); | ||
1346 | |||
1347 | /* Both these values are already aligned. */ | ||
1348 | @@ -4772,6 +4798,9 @@ aarch64_layout_frame (void) | ||
1349 | |||
1350 | cfun->machine->frame.bytes_above_locals | ||
1351 | = cfun->machine->frame.saved_varargs_size; | ||
1352 | + if (regs_at_top_p) | ||
1353 | + cfun->machine->frame.bytes_above_locals | ||
1354 | + += cfun->machine->frame.saved_regs_size; | ||
1355 | |||
1356 | cfun->machine->frame.initial_adjust = 0; | ||
1357 | cfun->machine->frame.final_adjust = 0; | ||
1358 | @@ -5764,10 +5793,10 @@ aarch64_add_cfa_expression (rtx_insn *insn, unsigned int reg, | ||
1359 | | for register varargs | | ||
1360 | | | | ||
1361 | +-------------------------------+ | ||
1362 | - | local variables | <-- frame_pointer_rtx | ||
1363 | + | local variables (1) | <-- frame_pointer_rtx | ||
1364 | | | | ||
1365 | +-------------------------------+ | ||
1366 | - | padding | \ | ||
1367 | + | padding (1) | \ | ||
1368 | +-------------------------------+ | | ||
1369 | | callee-saved registers | | frame.saved_regs_size | ||
1370 | +-------------------------------+ | | ||
1371 | @@ -5775,6 +5804,10 @@ aarch64_add_cfa_expression (rtx_insn *insn, unsigned int reg, | ||
1372 | +-------------------------------+ | | ||
1373 | | FP' | / <- hard_frame_pointer_rtx (aligned) | ||
1374 | +-------------------------------+ | ||
1375 | + | local variables (2) | | ||
1376 | + +-------------------------------+ | ||
1377 | + | padding (2) | | ||
1378 | + +-------------------------------+ | ||
1379 | | dynamic allocation | | ||
1380 | +-------------------------------+ | ||
1381 | | padding | | ||
1382 | @@ -5784,6 +5817,9 @@ aarch64_add_cfa_expression (rtx_insn *insn, unsigned int reg, | ||
1383 | +-------------------------------+ | ||
1384 | | | <-- stack_pointer_rtx (aligned) | ||
1385 | |||
1386 | + The regions marked (1) and (2) are mutually exclusive. (2) is used | ||
1387 | + when aarch64_save_regs_above_locals_p is true. | ||
1388 | + | ||
1389 | Dynamic stack allocations via alloca() decrease stack_pointer_rtx | ||
1390 | but leave frame_pointer_rtx and hard_frame_pointer_rtx | ||
1391 | unchanged. | ||
1392 | @@ -5937,6 +5973,8 @@ aarch64_expand_prologue (void) | ||
1393 | that is assumed by the called. */ | ||
1394 | aarch64_allocate_and_probe_stack_space (tmp1_rtx, tmp0_rtx, final_adjust, | ||
1395 | !frame_pointer_needed, true); | ||
1396 | + if (emit_frame_chain && maybe_ne (final_adjust, 0)) | ||
1397 | + emit_insn (gen_stack_tie (stack_pointer_rtx, hard_frame_pointer_rtx)); | ||
1398 | } | ||
1399 | |||
1400 | /* Return TRUE if we can use a simple_return insn. | ||
1401 | diff --git a/gcc/testsuite/gcc.target/aarch64/stack-protector-8.c b/gcc/testsuite/gcc.target/aarch64/stack-protector-8.c | ||
1402 | new file mode 100644 | ||
1403 | index 00000000000..c5e7deef6c1 | ||
1404 | --- /dev/null | ||
1405 | +++ b/gcc/testsuite/gcc.target/aarch64/stack-protector-8.c | ||
1406 | @@ -0,0 +1,58 @@ | ||
1407 | +/* { dg-options " -O -fstack-protector-strong -mstack-protector-guard=sysreg -mstack-protector-guard-reg=tpidr2_el0 -mstack-protector-guard-offset=16" } */ | ||
1408 | +/* { dg-final { check-function-bodies "**" "" } } */ | ||
1409 | + | ||
1410 | +void g(void *); | ||
1411 | + | ||
1412 | +/* | ||
1413 | +** test1: | ||
1414 | +** sub sp, sp, #288 | ||
1415 | +** stp x29, x30, \[sp, #?272\] | ||
1416 | +** add x29, sp, #?272 | ||
1417 | +** mrs (x[0-9]+), tpidr2_el0 | ||
1418 | +** ldr (x[0-9]+), \[\1, #?16\] | ||
1419 | +** str \2, \[sp, #?264\] | ||
1420 | +** mov \2, *0 | ||
1421 | +** add x0, sp, #?8 | ||
1422 | +** bl g | ||
1423 | +** ... | ||
1424 | +** mrs .* | ||
1425 | +** ... | ||
1426 | +** bne .* | ||
1427 | +** ... | ||
1428 | +** ldp x29, x30, \[sp, #?272\] | ||
1429 | +** add sp, sp, #?288 | ||
1430 | +** ret | ||
1431 | +** bl __stack_chk_fail | ||
1432 | +*/ | ||
1433 | +int test1() { | ||
1434 | + int y[0x40]; | ||
1435 | + g(y); | ||
1436 | + return 1; | ||
1437 | +} | ||
1438 | + | ||
1439 | +/* | ||
1440 | +** test2: | ||
1441 | +** stp x29, x30, \[sp, #?-16\]! | ||
1442 | +** mov x29, sp | ||
1443 | +** sub sp, sp, #1040 | ||
1444 | +** mrs (x[0-9]+), tpidr2_el0 | ||
1445 | +** ldr (x[0-9]+), \[\1, #?16\] | ||
1446 | +** str \2, \[sp, #?1032\] | ||
1447 | +** mov \2, *0 | ||
1448 | +** add x0, sp, #?8 | ||
1449 | +** bl g | ||
1450 | +** ... | ||
1451 | +** mrs .* | ||
1452 | +** ... | ||
1453 | +** bne .* | ||
1454 | +** ... | ||
1455 | +** add sp, sp, #?1040 | ||
1456 | +** ldp x29, x30, \[sp\], #?16 | ||
1457 | +** ret | ||
1458 | +** bl __stack_chk_fail | ||
1459 | +*/ | ||
1460 | +int test2() { | ||
1461 | + int y[0x100]; | ||
1462 | + g(y); | ||
1463 | + return 1; | ||
1464 | +} | ||
1465 | diff --git a/gcc/testsuite/gcc.target/aarch64/stack-protector-9.c b/gcc/testsuite/gcc.target/aarch64/stack-protector-9.c | ||
1466 | new file mode 100644 | ||
1467 | index 00000000000..58f322aa480 | ||
1468 | --- /dev/null | ||
1469 | +++ b/gcc/testsuite/gcc.target/aarch64/stack-protector-9.c | ||
1470 | @@ -0,0 +1,33 @@ | ||
1471 | +/* { dg-options "-O2 -mcpu=neoverse-v1 -fstack-protector-all" } */ | ||
1472 | +/* { dg-final { check-function-bodies "**" "" } } */ | ||
1473 | + | ||
1474 | +/* | ||
1475 | +** main: | ||
1476 | +** ... | ||
1477 | +** stp x29, x30, \[sp, #?-[0-9]+\]! | ||
1478 | +** ... | ||
1479 | +** sub sp, sp, #[0-9]+ | ||
1480 | +** ... | ||
1481 | +** str x[0-9]+, \[x29, #?-8\] | ||
1482 | +** ... | ||
1483 | +*/ | ||
1484 | +int f(const char *); | ||
1485 | +void g(void *); | ||
1486 | +int main(int argc, char* argv[]) | ||
1487 | +{ | ||
1488 | + int a; | ||
1489 | + int b; | ||
1490 | + char c[2+f(argv[1])]; | ||
1491 | + int d[0x100]; | ||
1492 | + char y; | ||
1493 | + | ||
1494 | + y=42; a=4; b=10; | ||
1495 | + c[0] = 'h'; c[1] = '\0'; | ||
1496 | + | ||
1497 | + c[f(argv[2])] = '\0'; | ||
1498 | + | ||
1499 | + __builtin_printf("%d %d\n%s\n", a, b, c); | ||
1500 | + g(d); | ||
1501 | + | ||
1502 | + return 0; | ||
1503 | +} | ||
1504 | -- | ||
1505 | 2.34.1 | ||
1506 | |||