summaryrefslogtreecommitdiffstats
path: root/meta/recipes-devtools/gcc/gcc-9.5/CVE-2023-4039.patch
diff options
context:
space:
mode:
Diffstat (limited to 'meta/recipes-devtools/gcc/gcc-9.5/CVE-2023-4039.patch')
-rw-r--r--meta/recipes-devtools/gcc/gcc-9.5/CVE-2023-4039.patch1506
1 files changed, 1506 insertions, 0 deletions
diff --git a/meta/recipes-devtools/gcc/gcc-9.5/CVE-2023-4039.patch b/meta/recipes-devtools/gcc/gcc-9.5/CVE-2023-4039.patch
new file mode 100644
index 0000000000..56d229066f
--- /dev/null
+++ b/meta/recipes-devtools/gcc/gcc-9.5/CVE-2023-4039.patch
@@ -0,0 +1,1506 @@
1From: Richard Sandiford <richard.sandiford@arm.com>
2Subject: [PATCH 00/19] aarch64: Fix -fstack-protector issue
3Date: Tue, 12 Sep 2023 16:25:10 +0100
4
5This series of patches fixes deficiencies in GCC's -fstack-protector
6implementation for AArch64 when using dynamically allocated stack space.
7This is CVE-2023-4039. See:
8
9https://developer.arm.com/Arm%20Security%20Center/GCC%20Stack%20Protector%20Vulnerability%20AArch64
10https://github.com/metaredteam/external-disclosures/security/advisories/GHSA-x7ch-h5rf-w2mf
11
12for more details.
13
14The fix is to put the saved registers above the locals area when
15-fstack-protector is used.
16
17The series also fixes a stack-clash problem that I found while working
18on the CVE. In unpatched sources, the stack-clash problem would only
19trigger for unrealistic numbers of arguments (8K 64-bit arguments, or an
20equivalent). But it would be a more significant issue with the new
21-fstack-protector frame layout. It's therefore important that both
22problems are fixed together.
23
24Some reorganisation of the code seemed necessary to fix the problems in a
25cleanish way. The series is therefore quite long, but only a handful of
26patches should have any effect on code generation.
27
28See the individual patches for a detailed description.
29
30Tested on aarch64-linux-gnu. Pushed to trunk and to all active branches.
31I've also pushed backports to GCC 7+ to vendors/ARM/heads/CVE-2023-4039.
32
33CVE: CVE-2023-4039
34Upstream-Status: Submitted
35Signed-off-by: Ross Burton <ross.burton@arm.com>
36
37
38From 78ebdb7b12d5e258b9811bab715734454268fd0c Mon Sep 17 00:00:00 2001
39From: Richard Sandiford <richard.sandiford@arm.com>
40Date: Fri, 16 Jun 2023 17:00:51 +0100
41Subject: [PATCH 01/10] aarch64: Explicitly handle frames with no saved
42 registers
43
44If a frame has no saved registers, it can be allocated in one go.
45There is no need to treat the areas below and above the saved
46registers as separate.
47
48And if we allocate the frame in one go, it should be allocated
49as the initial_adjust rather than the final_adjust. This allows the
50frame size to grow to guard_size - guard_used_by_caller before a stack
51probe is needed. (A frame with no register saves is necessarily a
52leaf frame.)
53
54This is a no-op as thing stand, since a leaf function will have
55no outgoing arguments, and so all the frame will be above where
56the saved registers normally go.
57
58gcc/
59 * config/aarch64/aarch64.c (aarch64_layout_frame): Explicitly
60 allocate the frame in one go if there are no saved registers.
61---
62 gcc/config/aarch64/aarch64.c | 8 +++++---
63 1 file changed, 5 insertions(+), 3 deletions(-)
64
65diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
66index a35dceab9fc..e9dad682738 100644
67--- a/gcc/config/aarch64/aarch64.c
68+++ b/gcc/config/aarch64/aarch64.c
69@@ -4771,9 +4771,11 @@ aarch64_layout_frame (void)
70 max_push_offset = 256;
71
72 HOST_WIDE_INT const_size, const_fp_offset;
73- if (cfun->machine->frame.frame_size.is_constant (&const_size)
74- && const_size < max_push_offset
75- && known_eq (crtl->outgoing_args_size, 0))
76+ if (cfun->machine->frame.saved_regs_size == 0)
77+ cfun->machine->frame.initial_adjust = cfun->machine->frame.frame_size;
78+ else if (cfun->machine->frame.frame_size.is_constant (&const_size)
79+ && const_size < max_push_offset
80+ && known_eq (crtl->outgoing_args_size, 0))
81 {
82 /* Simple, small frame with no outgoing arguments:
83 stp reg1, reg2, [sp, -frame_size]!
84--
852.34.1
86
87
88From 347487fffa0266d43bf18f1f91878410881f596e Mon Sep 17 00:00:00 2001
89From: Richard Sandiford <richard.sandiford@arm.com>
90Date: Fri, 16 Jun 2023 16:55:12 +0100
91Subject: [PATCH 02/10] aarch64: Add bytes_below_hard_fp to frame info
92
93The frame layout code currently hard-codes the assumption that
94the number of bytes below the saved registers is equal to the
95size of the outgoing arguments. This patch abstracts that
96value into a new field of aarch64_frame.
97
98gcc/
99 * config/aarch64/aarch64.h (aarch64_frame::bytes_below_hard_fp): New
100 field.
101 * config/aarch64/aarch64.c (aarch64_layout_frame): Initialize it,
102 and use it instead of crtl->outgoing_args_size.
103 (aarch64_get_separate_components): Use bytes_below_hard_fp instead
104 of outgoing_args_size.
105 (aarch64_process_components): Likewise.
106---
107 gcc/config/aarch64/aarch64.c | 50 +++++++++++++++++++-----------------
108 gcc/config/aarch64/aarch64.h | 6 ++++-
109 2 files changed, 32 insertions(+), 24 deletions(-)
110
111diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
112index e9dad682738..25cf10cc4b9 100644
113--- a/gcc/config/aarch64/aarch64.c
114+++ b/gcc/config/aarch64/aarch64.c
115@@ -4684,6 +4684,8 @@ aarch64_layout_frame (void)
116 last_fp_reg = regno;
117 }
118
119+ cfun->machine->frame.bytes_below_hard_fp = crtl->outgoing_args_size;
120+
121 if (cfun->machine->frame.emit_frame_chain)
122 {
123 /* FP and LR are placed in the linkage record. */
124@@ -4751,11 +4753,11 @@ aarch64_layout_frame (void)
125 STACK_BOUNDARY / BITS_PER_UNIT);
126
127 /* Both these values are already aligned. */
128- gcc_assert (multiple_p (crtl->outgoing_args_size,
129+ gcc_assert (multiple_p (cfun->machine->frame.bytes_below_hard_fp,
130 STACK_BOUNDARY / BITS_PER_UNIT));
131 cfun->machine->frame.frame_size
132 = (cfun->machine->frame.hard_fp_offset
133- + crtl->outgoing_args_size);
134+ + cfun->machine->frame.bytes_below_hard_fp);
135
136 cfun->machine->frame.locals_offset = cfun->machine->frame.saved_varargs_size;
137
138@@ -4775,23 +4777,23 @@ aarch64_layout_frame (void)
139 cfun->machine->frame.initial_adjust = cfun->machine->frame.frame_size;
140 else if (cfun->machine->frame.frame_size.is_constant (&const_size)
141 && const_size < max_push_offset
142- && known_eq (crtl->outgoing_args_size, 0))
143+ && known_eq (cfun->machine->frame.bytes_below_hard_fp, 0))
144 {
145- /* Simple, small frame with no outgoing arguments:
146+ /* Simple, small frame with no data below the saved registers.
147 stp reg1, reg2, [sp, -frame_size]!
148 stp reg3, reg4, [sp, 16] */
149 cfun->machine->frame.callee_adjust = const_size;
150 }
151- else if (known_lt (crtl->outgoing_args_size
152+ else if (known_lt (cfun->machine->frame.bytes_below_hard_fp
153 + cfun->machine->frame.saved_regs_size, 512)
154 && !(cfun->calls_alloca
155 && known_lt (cfun->machine->frame.hard_fp_offset,
156 max_push_offset)))
157 {
158- /* Frame with small outgoing arguments:
159+ /* Frame with small area below the saved registers:
160 sub sp, sp, frame_size
161- stp reg1, reg2, [sp, outgoing_args_size]
162- stp reg3, reg4, [sp, outgoing_args_size + 16] */
163+ stp reg1, reg2, [sp, bytes_below_hard_fp]
164+ stp reg3, reg4, [sp, bytes_below_hard_fp + 16] */
165 cfun->machine->frame.initial_adjust = cfun->machine->frame.frame_size;
166 cfun->machine->frame.callee_offset
167 = cfun->machine->frame.frame_size - cfun->machine->frame.hard_fp_offset;
168@@ -4799,22 +4801,23 @@ aarch64_layout_frame (void)
169 else if (cfun->machine->frame.hard_fp_offset.is_constant (&const_fp_offset)
170 && const_fp_offset < max_push_offset)
171 {
172- /* Frame with large outgoing arguments but a small local area:
173+ /* Frame with large area below the saved registers, but with a
174+ small area above:
175 stp reg1, reg2, [sp, -hard_fp_offset]!
176 stp reg3, reg4, [sp, 16]
177- sub sp, sp, outgoing_args_size */
178+ sub sp, sp, bytes_below_hard_fp */
179 cfun->machine->frame.callee_adjust = const_fp_offset;
180 cfun->machine->frame.final_adjust
181 = cfun->machine->frame.frame_size - cfun->machine->frame.callee_adjust;
182 }
183 else
184 {
185- /* Frame with large local area and outgoing arguments using frame pointer:
186+ /* General case:
187 sub sp, sp, hard_fp_offset
188 stp x29, x30, [sp, 0]
189 add x29, sp, 0
190 stp reg3, reg4, [sp, 16]
191- sub sp, sp, outgoing_args_size */
192+ sub sp, sp, bytes_below_hard_fp */
193 cfun->machine->frame.initial_adjust = cfun->machine->frame.hard_fp_offset;
194 cfun->machine->frame.final_adjust
195 = cfun->machine->frame.frame_size - cfun->machine->frame.initial_adjust;
196@@ -5243,9 +5246,11 @@ aarch64_get_separate_components (void)
197 if (aarch64_register_saved_on_entry (regno))
198 {
199 poly_int64 offset = cfun->machine->frame.reg_offset[regno];
200+
201+ /* Get the offset relative to the register we'll use. */
202 if (!frame_pointer_needed)
203- offset += cfun->machine->frame.frame_size
204- - cfun->machine->frame.hard_fp_offset;
205+ offset += cfun->machine->frame.bytes_below_hard_fp;
206+
207 /* Check that we can access the stack slot of the register with one
208 direct load with no adjustments needed. */
209 if (offset_12bit_unsigned_scaled_p (DImode, offset))
210@@ -5367,8 +5372,8 @@ aarch64_process_components (sbitmap components, bool prologue_p)
211 rtx reg = gen_rtx_REG (mode, regno);
212 poly_int64 offset = cfun->machine->frame.reg_offset[regno];
213 if (!frame_pointer_needed)
214- offset += cfun->machine->frame.frame_size
215- - cfun->machine->frame.hard_fp_offset;
216+ offset += cfun->machine->frame.bytes_below_hard_fp;
217+
218 rtx addr = plus_constant (Pmode, ptr_reg, offset);
219 rtx mem = gen_frame_mem (mode, addr);
220
221@@ -5410,8 +5415,7 @@ aarch64_process_components (sbitmap components, bool prologue_p)
222 /* REGNO2 can be saved/restored in a pair with REGNO. */
223 rtx reg2 = gen_rtx_REG (mode, regno2);
224 if (!frame_pointer_needed)
225- offset2 += cfun->machine->frame.frame_size
226- - cfun->machine->frame.hard_fp_offset;
227+ offset2 += cfun->machine->frame.bytes_below_hard_fp;
228 rtx addr2 = plus_constant (Pmode, ptr_reg, offset2);
229 rtx mem2 = gen_frame_mem (mode, addr2);
230 rtx set2 = prologue_p ? gen_rtx_SET (mem2, reg2)
231@@ -5478,10 +5482,10 @@ aarch64_stack_clash_protection_alloca_probe_range (void)
232 registers. If POLY_SIZE is not large enough to require a probe this function
233 will only adjust the stack. When allocating the stack space
234 FRAME_RELATED_P is then used to indicate if the allocation is frame related.
235- FINAL_ADJUSTMENT_P indicates whether we are allocating the outgoing
236- arguments. If we are then we ensure that any allocation larger than the ABI
237- defined buffer needs a probe so that the invariant of having a 1KB buffer is
238- maintained.
239+ FINAL_ADJUSTMENT_P indicates whether we are allocating the area below
240+ the saved registers. If we are then we ensure that any allocation
241+ larger than the ABI defined buffer needs a probe so that the
242+ invariant of having a 1KB buffer is maintained.
243
244 We emit barriers after each stack adjustment to prevent optimizations from
245 breaking the invariant that we never drop the stack more than a page. This
246@@ -5671,7 +5675,7 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2,
247 /* Handle any residuals. Residuals of at least MIN_PROBE_THRESHOLD have to
248 be probed. This maintains the requirement that each page is probed at
249 least once. For initial probing we probe only if the allocation is
250- more than GUARD_SIZE - buffer, and for the outgoing arguments we probe
251+ more than GUARD_SIZE - buffer, and below the saved registers we probe
252 if the amount is larger than buffer. GUARD_SIZE - buffer + buffer ==
253 GUARD_SIZE. This works that for any allocation that is large enough to
254 trigger a probe here, we'll have at least one, and if they're not large
255diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
256index af0bc3f1881..95831637ba7 100644
257--- a/gcc/config/aarch64/aarch64.h
258+++ b/gcc/config/aarch64/aarch64.h
259@@ -712,9 +712,13 @@ struct GTY (()) aarch64_frame
260 HOST_WIDE_INT saved_varargs_size;
261
262 /* The size of the saved callee-save int/FP registers. */
263-
264 HOST_WIDE_INT saved_regs_size;
265
266+ /* The number of bytes between the bottom of the static frame (the bottom
267+ of the outgoing arguments) and the hard frame pointer. This value is
268+ always a multiple of STACK_BOUNDARY. */
269+ poly_int64 bytes_below_hard_fp;
270+
271 /* Offset from the base of the frame (incomming SP) to the
272 top of the locals area. This value is always a multiple of
273 STACK_BOUNDARY. */
274--
2752.34.1
276
277
278From 4604c4cd0a6c4c26d6594ec9a0383b4d9197d9df Mon Sep 17 00:00:00 2001
279From: Richard Sandiford <richard.sandiford@arm.com>
280Date: Tue, 27 Jun 2023 11:25:40 +0100
281Subject: [PATCH 03/10] aarch64: Rename locals_offset to bytes_above_locals
282MIME-Version: 1.0
283Content-Type: text/plain; charset=UTF-8
284Content-Transfer-Encoding: 8bit
285
286locals_offset was described as:
287
288 /* Offset from the base of the frame (incomming SP) to the
289 top of the locals area. This value is always a multiple of
290 STACK_BOUNDARY. */
291
292This is implicitly an “upside down” view of the frame: the incoming
293SP is at offset 0, and anything N bytes below the incoming SP is at
294offset N (rather than -N).
295
296However, reg_offset instead uses a “right way up” view; that is,
297it views offsets in address terms. Something above X is at a
298positive offset from X and something below X is at a negative
299offset from X.
300
301Also, even on FRAME_GROWS_DOWNWARD targets like AArch64,
302target-independent code views offsets in address terms too:
303locals are allocated at negative offsets to virtual_stack_vars.
304
305It seems confusing to have *_offset fields of the same structure
306using different polarities like this. This patch tries to avoid
307that by renaming locals_offset to bytes_above_locals.
308
309gcc/
310 * config/aarch64/aarch64.h (aarch64_frame::locals_offset): Rename to...
311 (aarch64_frame::bytes_above_locals): ...this.
312 * config/aarch64/aarch64.c (aarch64_layout_frame)
313 (aarch64_initial_elimination_offset): Update accordingly.
314---
315 gcc/config/aarch64/aarch64.c | 9 +++++----
316 gcc/config/aarch64/aarch64.h | 6 +++---
317 2 files changed, 8 insertions(+), 7 deletions(-)
318
319diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
320index 25cf10cc4b9..dcaf491af42 100644
321--- a/gcc/config/aarch64/aarch64.c
322+++ b/gcc/config/aarch64/aarch64.c
323@@ -4759,7 +4759,8 @@ aarch64_layout_frame (void)
324 = (cfun->machine->frame.hard_fp_offset
325 + cfun->machine->frame.bytes_below_hard_fp);
326
327- cfun->machine->frame.locals_offset = cfun->machine->frame.saved_varargs_size;
328+ cfun->machine->frame.bytes_above_locals
329+ = cfun->machine->frame.saved_varargs_size;
330
331 cfun->machine->frame.initial_adjust = 0;
332 cfun->machine->frame.final_adjust = 0;
333@@ -8566,14 +8567,14 @@ aarch64_initial_elimination_offset (unsigned from, unsigned to)
334
335 if (from == FRAME_POINTER_REGNUM)
336 return cfun->machine->frame.hard_fp_offset
337- - cfun->machine->frame.locals_offset;
338+ - cfun->machine->frame.bytes_above_locals;
339 }
340
341 if (to == STACK_POINTER_REGNUM)
342 {
343 if (from == FRAME_POINTER_REGNUM)
344- return cfun->machine->frame.frame_size
345- - cfun->machine->frame.locals_offset;
346+ return cfun->machine->frame.frame_size
347+ - cfun->machine->frame.bytes_above_locals;
348 }
349
350 return cfun->machine->frame.frame_size;
351diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
352index 95831637ba7..a079a88b4f4 100644
353--- a/gcc/config/aarch64/aarch64.h
354+++ b/gcc/config/aarch64/aarch64.h
355@@ -719,10 +719,10 @@ struct GTY (()) aarch64_frame
356 always a multiple of STACK_BOUNDARY. */
357 poly_int64 bytes_below_hard_fp;
358
359- /* Offset from the base of the frame (incomming SP) to the
360- top of the locals area. This value is always a multiple of
361+ /* The number of bytes between the top of the locals area and the top
362+ of the frame (the incomming SP). This value is always a multiple of
363 STACK_BOUNDARY. */
364- poly_int64 locals_offset;
365+ poly_int64 bytes_above_locals;
366
367 /* Offset from the base of the frame (incomming SP) to the
368 hard_frame_pointer. This value is always a multiple of
369--
3702.34.1
371
372
373From 16016465ff28a75f5e0540cbaeb4eb102fdc3230 Mon Sep 17 00:00:00 2001
374From: Richard Sandiford <richard.sandiford@arm.com>
375Date: Tue, 27 Jun 2023 11:28:11 +0100
376Subject: [PATCH 04/10] aarch64: Rename hard_fp_offset to bytes_above_hard_fp
377MIME-Version: 1.0
378Content-Type: text/plain; charset=UTF-8
379Content-Transfer-Encoding: 8bit
380
381Similarly to the previous locals_offset patch, hard_fp_offset
382was described as:
383
384 /* Offset from the base of the frame (incomming SP) to the
385 hard_frame_pointer. This value is always a multiple of
386 STACK_BOUNDARY. */
387 poly_int64 hard_fp_offset;
388
389which again took an “upside-down” view: higher offsets meant lower
390addresses. This patch renames the field to bytes_above_hard_fp instead.
391
392gcc/
393 * config/aarch64/aarch64.h (aarch64_frame::hard_fp_offset): Rename
394 to...
395 (aarch64_frame::bytes_above_hard_fp): ...this.
396 * config/aarch64/aarch64.c (aarch64_layout_frame)
397 (aarch64_expand_prologue): Update accordingly.
398 (aarch64_initial_elimination_offset): Likewise.
399---
400 gcc/config/aarch64/aarch64.c | 21 +++++++++++----------
401 gcc/config/aarch64/aarch64.h | 6 +++---
402 2 files changed, 14 insertions(+), 13 deletions(-)
403
404diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
405index dcaf491af42..2681e0c2bb9 100644
406--- a/gcc/config/aarch64/aarch64.c
407+++ b/gcc/config/aarch64/aarch64.c
408@@ -4747,7 +4747,7 @@ aarch64_layout_frame (void)
409 HOST_WIDE_INT varargs_and_saved_regs_size
410 = offset + cfun->machine->frame.saved_varargs_size;
411
412- cfun->machine->frame.hard_fp_offset
413+ cfun->machine->frame.bytes_above_hard_fp
414 = aligned_upper_bound (varargs_and_saved_regs_size
415 + get_frame_size (),
416 STACK_BOUNDARY / BITS_PER_UNIT);
417@@ -4756,7 +4756,7 @@ aarch64_layout_frame (void)
418 gcc_assert (multiple_p (cfun->machine->frame.bytes_below_hard_fp,
419 STACK_BOUNDARY / BITS_PER_UNIT));
420 cfun->machine->frame.frame_size
421- = (cfun->machine->frame.hard_fp_offset
422+ = (cfun->machine->frame.bytes_above_hard_fp
423 + cfun->machine->frame.bytes_below_hard_fp);
424
425 cfun->machine->frame.bytes_above_locals
426@@ -4788,7 +4788,7 @@ aarch64_layout_frame (void)
427 else if (known_lt (cfun->machine->frame.bytes_below_hard_fp
428 + cfun->machine->frame.saved_regs_size, 512)
429 && !(cfun->calls_alloca
430- && known_lt (cfun->machine->frame.hard_fp_offset,
431+ && known_lt (cfun->machine->frame.bytes_above_hard_fp,
432 max_push_offset)))
433 {
434 /* Frame with small area below the saved registers:
435@@ -4797,14 +4797,14 @@ aarch64_layout_frame (void)
436 stp reg3, reg4, [sp, bytes_below_hard_fp + 16] */
437 cfun->machine->frame.initial_adjust = cfun->machine->frame.frame_size;
438 cfun->machine->frame.callee_offset
439- = cfun->machine->frame.frame_size - cfun->machine->frame.hard_fp_offset;
440+ = cfun->machine->frame.frame_size - cfun->machine->frame.bytes_above_hard_fp;
441 }
442- else if (cfun->machine->frame.hard_fp_offset.is_constant (&const_fp_offset)
443+ else if (cfun->machine->frame.bytes_above_hard_fp.is_constant (&const_fp_offset)
444 && const_fp_offset < max_push_offset)
445 {
446 /* Frame with large area below the saved registers, but with a
447 small area above:
448- stp reg1, reg2, [sp, -hard_fp_offset]!
449+ stp reg1, reg2, [sp, -bytes_above_hard_fp]!
450 stp reg3, reg4, [sp, 16]
451 sub sp, sp, bytes_below_hard_fp */
452 cfun->machine->frame.callee_adjust = const_fp_offset;
453@@ -4814,12 +4814,13 @@ aarch64_layout_frame (void)
454 else
455 {
456 /* General case:
457- sub sp, sp, hard_fp_offset
458+ sub sp, sp, bytes_above_hard_fp
459 stp x29, x30, [sp, 0]
460 add x29, sp, 0
461 stp reg3, reg4, [sp, 16]
462 sub sp, sp, bytes_below_hard_fp */
463- cfun->machine->frame.initial_adjust = cfun->machine->frame.hard_fp_offset;
464+ cfun->machine->frame.initial_adjust
465+ = cfun->machine->frame.bytes_above_hard_fp;
466 cfun->machine->frame.final_adjust
467 = cfun->machine->frame.frame_size - cfun->machine->frame.initial_adjust;
468 }
469@@ -8563,10 +8564,10 @@ aarch64_initial_elimination_offset (unsigned from, unsigned to)
470 if (to == HARD_FRAME_POINTER_REGNUM)
471 {
472 if (from == ARG_POINTER_REGNUM)
473- return cfun->machine->frame.hard_fp_offset;
474+ return cfun->machine->frame.bytes_above_hard_fp;
475
476 if (from == FRAME_POINTER_REGNUM)
477- return cfun->machine->frame.hard_fp_offset
478+ return cfun->machine->frame.bytes_above_hard_fp
479 - cfun->machine->frame.bytes_above_locals;
480 }
481
482diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
483index a079a88b4f4..eab6da84a02 100644
484--- a/gcc/config/aarch64/aarch64.h
485+++ b/gcc/config/aarch64/aarch64.h
486@@ -724,10 +724,10 @@ struct GTY (()) aarch64_frame
487 STACK_BOUNDARY. */
488 poly_int64 bytes_above_locals;
489
490- /* Offset from the base of the frame (incomming SP) to the
491- hard_frame_pointer. This value is always a multiple of
492+ /* The number of bytes between the hard_frame_pointer and the top of
493+ the frame (the incomming SP). This value is always a multiple of
494 STACK_BOUNDARY. */
495- poly_int64 hard_fp_offset;
496+ poly_int64 bytes_above_hard_fp;
497
498 /* The size of the frame. This value is the offset from base of the
499 frame (incomming SP) to the stack_pointer. This value is always
500--
5012.34.1
502
503
504From eb2271eb6bb68ec3c9aa9ae4746ea1ee5f18874a Mon Sep 17 00:00:00 2001
505From: Richard Sandiford <richard.sandiford@arm.com>
506Date: Thu, 22 Jun 2023 22:26:30 +0100
507Subject: [PATCH 05/10] aarch64: Tweak frame_size comment
508MIME-Version: 1.0
509Content-Type: text/plain; charset=UTF-8
510Content-Transfer-Encoding: 8bit
511
512This patch fixes another case in which a value was described with
513an “upside-down” view.
514
515gcc/
516 * config/aarch64/aarch64.h (aarch64_frame::frame_size): Tweak comment.
517---
518 gcc/config/aarch64/aarch64.h | 4 ++--
519 1 file changed, 2 insertions(+), 2 deletions(-)
520
521diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
522index eab6da84a02..7c4b65ec55b 100644
523--- a/gcc/config/aarch64/aarch64.h
524+++ b/gcc/config/aarch64/aarch64.h
525@@ -729,8 +729,8 @@ struct GTY (()) aarch64_frame
526 STACK_BOUNDARY. */
527 poly_int64 bytes_above_hard_fp;
528
529- /* The size of the frame. This value is the offset from base of the
530- frame (incomming SP) to the stack_pointer. This value is always
531+ /* The size of the frame, i.e. the number of bytes between the bottom
532+ of the outgoing arguments and the incoming SP. This value is always
533 a multiple of STACK_BOUNDARY. */
534 poly_int64 frame_size;
535
536--
5372.34.1
538
539
540From cfed3b87e9351edff1568ade4ef666edc9887639 Mon Sep 17 00:00:00 2001
541From: Richard Sandiford <richard.sandiford@arm.com>
542Date: Tue, 15 Aug 2023 19:05:30 +0100
543Subject: [PATCH 06/10] Backport check-function-bodies support
544
545---
546 gcc/testsuite/lib/scanasm.exp | 191 ++++++++++++++++++++++++++++++++++
547 1 file changed, 191 insertions(+)
548
549diff --git a/gcc/testsuite/lib/scanasm.exp b/gcc/testsuite/lib/scanasm.exp
550index 35ccbc86fc0..c9af27bf47a 100644
551--- a/gcc/testsuite/lib/scanasm.exp
552+++ b/gcc/testsuite/lib/scanasm.exp
553@@ -546,3 +546,194 @@ proc scan-lto-assembler { args } {
554 verbose "output_file: $output_file"
555 dg-scan "scan-lto-assembler" 1 $testcase $output_file $args
556 }
557+
558+# Read assembly file FILENAME and store a mapping from function names
559+# to function bodies in array RESULT. FILENAME has already been uploaded
560+# locally where necessary and is known to exist.
561+
562+proc parse_function_bodies { filename result } {
563+ upvar $result up_result
564+
565+ # Regexp for the start of a function definition (name in \1).
566+ set label {^([a-zA-Z_]\S+):$}
567+
568+ # Regexp for the end of a function definition.
569+ set terminator {^\s*\.size}
570+
571+ # Regexp for lines that aren't interesting.
572+ set fluff {^\s*(?:\.|//|@|$)}
573+
574+ set fd [open $filename r]
575+ set in_function 0
576+ while { [gets $fd line] >= 0 } {
577+ if { [regexp $label $line dummy function_name] } {
578+ set in_function 1
579+ set function_body ""
580+ } elseif { $in_function } {
581+ if { [regexp $terminator $line] } {
582+ set up_result($function_name) $function_body
583+ set in_function 0
584+ } elseif { ![regexp $fluff $line] } {
585+ append function_body $line "\n"
586+ }
587+ }
588+ }
589+ close $fd
590+}
591+
592+# FUNCTIONS is an array that maps function names to function bodies.
593+# Return true if it contains a definition of function NAME and if
594+# that definition matches BODY_REGEXP.
595+
596+proc check_function_body { functions name body_regexp } {
597+ upvar $functions up_functions
598+
599+ if { ![info exists up_functions($name)] } {
600+ return 0
601+ }
602+ set fn_res [regexp "^$body_regexp\$" $up_functions($name)]
603+ if { !$fn_res } {
604+ verbose -log "body: $body_regexp"
605+ verbose -log "against: $up_functions($name)"
606+ }
607+ return $fn_res
608+}
609+
610+# Check the implementations of functions against expected output. Used as:
611+#
612+# { dg-do { check-function-bodies PREFIX TERMINATOR[ OPTION[ SELECTOR]] } }
613+#
614+# See sourcebuild.texi for details.
615+
616+proc check-function-bodies { args } {
617+ if { [llength $args] < 2 } {
618+ error "too few arguments to check-function-bodies"
619+ }
620+ if { [llength $args] > 4 } {
621+ error "too many arguments to check-function-bodies"
622+ }
623+
624+ if { [llength $args] >= 3 } {
625+ set required_flags [lindex $args 2]
626+
627+ upvar 2 dg-extra-tool-flags extra_tool_flags
628+ set flags $extra_tool_flags
629+
630+ global torture_current_flags
631+ if { [info exists torture_current_flags] } {
632+ append flags " " $torture_current_flags
633+ }
634+ foreach required_flag $required_flags {
635+ switch -- $required_flag {
636+ target -
637+ xfail {
638+ error "misplaced $required_flag in check-function-bodies"
639+ }
640+ }
641+ }
642+ foreach required_flag $required_flags {
643+ if { ![regexp " $required_flag " $flags] } {
644+ return
645+ }
646+ }
647+ }
648+
649+ set xfail_all 0
650+ if { [llength $args] >= 4 } {
651+ switch [dg-process-target [lindex $args 3]] {
652+ "S" { }
653+ "N" { return }
654+ "F" { set xfail_all 1 }
655+ "P" { }
656+ }
657+ }
658+
659+ set testcase [testname-for-summary]
660+ # The name might include a list of options; extract the file name.
661+ set filename [lindex $testcase 0]
662+
663+ global srcdir
664+ set input_filename "$srcdir/$filename"
665+ set output_filename "[file rootname [file tail $filename]].s"
666+
667+ set prefix [lindex $args 0]
668+ set prefix_len [string length $prefix]
669+ set terminator [lindex $args 1]
670+ if { [string equal $terminator ""] } {
671+ set terminator "*/"
672+ }
673+ set terminator_len [string length $terminator]
674+
675+ set have_bodies 0
676+ if { [is_remote host] } {
677+ remote_upload host "$filename"
678+ }
679+ if { [file exists $output_filename] } {
680+ parse_function_bodies $output_filename functions
681+ set have_bodies 1
682+ } else {
683+ verbose -log "$testcase: output file does not exist"
684+ }
685+
686+ set count 0
687+ set function_regexp ""
688+ set label {^(\S+):$}
689+
690+ set lineno 1
691+ set fd [open $input_filename r]
692+ set in_function 0
693+ while { [gets $fd line] >= 0 } {
694+ if { [string equal -length $prefix_len $line $prefix] } {
695+ set line [string trim [string range $line $prefix_len end]]
696+ if { !$in_function } {
697+ if { [regexp "^(.*?\\S)\\s+{(.*)}\$" $line dummy \
698+ line selector] } {
699+ set selector [dg-process-target $selector]
700+ } else {
701+ set selector "P"
702+ }
703+ if { ![regexp $label $line dummy function_name] } {
704+ close $fd
705+ error "check-function-bodies: line $lineno does not have a function label"
706+ }
707+ set in_function 1
708+ set function_regexp ""
709+ } elseif { [string equal $line "("] } {
710+ append function_regexp "(?:"
711+ } elseif { [string equal $line "|"] } {
712+ append function_regexp "|"
713+ } elseif { [string equal $line ")"] } {
714+ append function_regexp ")"
715+ } elseif { [string equal $line "..."] } {
716+ append function_regexp ".*"
717+ } else {
718+ append function_regexp "\t" $line "\n"
719+ }
720+ } elseif { [string equal -length $terminator_len $line $terminator] } {
721+ if { ![string equal $selector "N"] } {
722+ if { $xfail_all || [string equal $selector "F"] } {
723+ setup_xfail "*-*-*"
724+ }
725+ set testname "$testcase check-function-bodies $function_name"
726+ if { !$have_bodies } {
727+ unresolved $testname
728+ } elseif { [check_function_body functions $function_name \
729+ $function_regexp] } {
730+ pass $testname
731+ } else {
732+ fail $testname
733+ }
734+ }
735+ set in_function 0
736+ incr count
737+ }
738+ incr lineno
739+ }
740+ close $fd
741+ if { $in_function } {
742+ error "check-function-bodies: missing \"$terminator\""
743+ }
744+ if { $count == 0 } {
745+ error "check-function-bodies: no matches found"
746+ }
747+}
748--
7492.34.1
750
751
752From 4dd8925d95d3d6d89779b494b5f4cfadcf9fa96e Mon Sep 17 00:00:00 2001
753From: Richard Sandiford <richard.sandiford@arm.com>
754Date: Tue, 27 Jun 2023 15:11:44 +0100
755Subject: [PATCH 07/10] aarch64: Tweak stack clash boundary condition
756
757The AArch64 ABI says that, when stack clash protection is used,
758there can be a maximum of 1KiB of unprobed space at sp on entry
759to a function. Therefore, we need to probe when allocating
760>= guard_size - 1KiB of data (>= rather than >). This is what
761GCC does.
762
763If an allocation is exactly guard_size bytes, it is enough to allocate
764those bytes and probe once at offset 1024. It isn't possible to use a
765single probe at any other offset: higher would conmplicate later code,
766by leaving more unprobed space than usual, while lower would risk
767leaving an entire page unprobed. For simplicity, the code probes all
768allocations at offset 1024.
769
770Some register saves also act as probes. If we need to allocate
771more space below the last such register save probe, we need to
772probe the allocation if it is > 1KiB. Again, this allocation is
773then sometimes (but not always) probed at offset 1024. This sort of
774allocation is currently only used for outgoing arguments, which are
775rarely this big.
776
777However, the code also probed if this final outgoing-arguments
778allocation was == 1KiB, rather than just > 1KiB. This isn't
779necessary, since the register save then probes at offset 1024
780as required. Continuing to probe allocations of exactly 1KiB
781would complicate later patches.
782
783gcc/
784 * config/aarch64/aarch64.c (aarch64_allocate_and_probe_stack_space):
785 Don't probe final allocations that are exactly 1KiB in size (after
786 unprobed space above the final allocation has been deducted).
787
788gcc/testsuite/
789 * gcc.target/aarch64/stack-check-prologue-17.c: New test.
790---
791 gcc/config/aarch64/aarch64.c | 6 +-
792 .../aarch64/stack-check-prologue-17.c | 55 +++++++++++++++++++
793 2 files changed, 60 insertions(+), 1 deletion(-)
794 create mode 100644 gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c
795
796diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
797index 2681e0c2bb9..4c9e11cd7cf 100644
798--- a/gcc/config/aarch64/aarch64.c
799+++ b/gcc/config/aarch64/aarch64.c
800@@ -5506,6 +5506,8 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2,
801 HOST_WIDE_INT guard_size
802 = 1 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_GUARD_SIZE);
803 HOST_WIDE_INT guard_used_by_caller = STACK_CLASH_CALLER_GUARD;
804+ HOST_WIDE_INT byte_sp_alignment = STACK_BOUNDARY / BITS_PER_UNIT;
805+ gcc_assert (multiple_p (poly_size, byte_sp_alignment));
806 /* When doing the final adjustment for the outgoing argument size we can't
807 assume that LR was saved at position 0. So subtract it's offset from the
808 ABI safe buffer so that we don't accidentally allow an adjustment that
809@@ -5513,7 +5515,9 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2,
810 probing. */
811 HOST_WIDE_INT min_probe_threshold
812 = final_adjustment_p
813- ? guard_used_by_caller - cfun->machine->frame.reg_offset[LR_REGNUM]
814+ ? (guard_used_by_caller
815+ + byte_sp_alignment
816+ - cfun->machine->frame.reg_offset[LR_REGNUM])
817 : guard_size - guard_used_by_caller;
818
819 poly_int64 frame_size = cfun->machine->frame.frame_size;
820diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c
821new file mode 100644
822index 00000000000..0d8a25d73a2
823--- /dev/null
824+++ b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c
825@@ -0,0 +1,55 @@
826+/* { dg-options "-O2 -fstack-clash-protection -fomit-frame-pointer --param stack-clash-protection-guard-size=12" } */
827+/* { dg-final { check-function-bodies "**" "" } } */
828+
829+void f(int, ...);
830+void g();
831+
832+/*
833+** test1:
834+** ...
835+** str x30, \[sp\]
836+** sub sp, sp, #1024
837+** cbnz w0, .*
838+** bl g
839+** ...
840+*/
841+int test1(int z) {
842+ __uint128_t x = 0;
843+ int y[0x400];
844+ if (z)
845+ {
846+ f(0, 0, 0, 0, 0, 0, 0, &y,
847+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
848+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
849+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
850+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x);
851+ }
852+ g();
853+ return 1;
854+}
855+
856+/*
857+** test2:
858+** ...
859+** str x30, \[sp\]
860+** sub sp, sp, #1040
861+** str xzr, \[sp\]
862+** cbnz w0, .*
863+** bl g
864+** ...
865+*/
866+int test2(int z) {
867+ __uint128_t x = 0;
868+ int y[0x400];
869+ if (z)
870+ {
871+ f(0, 0, 0, 0, 0, 0, 0, &y,
872+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
873+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
874+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
875+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
876+ x);
877+ }
878+ g();
879+ return 1;
880+}
881--
8822.34.1
883
884
885From 12517baf6c88447e3bda3a459ac4c29d61f84e6c Mon Sep 17 00:00:00 2001
886From: Richard Sandiford <richard.sandiford@arm.com>
887Date: Tue, 27 Jun 2023 15:12:55 +0100
888Subject: [PATCH 08/10] aarch64: Put LR save probe in first 16 bytes
889
890-fstack-clash-protection uses the save of LR as a probe for the next
891allocation. The next allocation could be:
892
893* another part of the static frame, e.g. when allocating SVE save slots
894 or outgoing arguments
895
896* an alloca in the same function
897
898* an allocation made by a callee function
899
900However, when -fomit-frame-pointer is used, the LR save slot is placed
901above the other GPR save slots. It could therefore be up to 80 bytes
902above the base of the GPR save area (which is also the hard fp address).
903
904aarch64_allocate_and_probe_stack_space took this into account when
905deciding how much subsequent space could be allocated without needing
906a probe. However, it interacted badly with:
907
908 /* If doing a small final adjustment, we always probe at offset 0.
909 This is done to avoid issues when LR is not at position 0 or when
910 the final adjustment is smaller than the probing offset. */
911 else if (final_adjustment_p && rounded_size == 0)
912 residual_probe_offset = 0;
913
914which forces any allocation that is smaller than the guard page size
915to be probed at offset 0 rather than the usual offset 1024. It was
916therefore possible to construct cases in which we had:
917
918* a probe using LR at SP + 80 bytes (or some other value >= 16)
919* an allocation of the guard page size - 16 bytes
920* a probe at SP + 0
921
922which allocates guard page size + 64 consecutive unprobed bytes.
923
924This patch requires the LR probe to be in the first 16 bytes of the
925save area when stack clash protection is active. Doing it
926unconditionally would cause code-quality regressions.
927
928gcc/
929 * config/aarch64/aarch64.c (aarch64_layout_frame): Ensure that
930 the LR save slot is in the first 16 bytes of the register save area.
931 (aarch64_allocate_and_probe_stack_space): Remove workaround for
932 when LR was not in the first 16 bytes.
933
934gcc/testsuite/
935 * gcc.target/aarch64/stack-check-prologue-18.c: New test.
936---
937 gcc/config/aarch64/aarch64.c | 50 +++++----
938 .../aarch64/stack-check-prologue-18.c | 100 ++++++++++++++++++
939 2 files changed, 127 insertions(+), 23 deletions(-)
940 create mode 100644 gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c
941
942diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
943index 4c9e11cd7cf..1e8467fdd03 100644
944--- a/gcc/config/aarch64/aarch64.c
945+++ b/gcc/config/aarch64/aarch64.c
946@@ -4686,15 +4686,31 @@ aarch64_layout_frame (void)
947
948 cfun->machine->frame.bytes_below_hard_fp = crtl->outgoing_args_size;
949
950+#define ALLOCATE_GPR_SLOT(REGNO) \
951+ do \
952+ { \
953+ cfun->machine->frame.reg_offset[REGNO] = offset; \
954+ if (cfun->machine->frame.wb_candidate1 == INVALID_REGNUM) \
955+ cfun->machine->frame.wb_candidate1 = (REGNO); \
956+ else if (cfun->machine->frame.wb_candidate2 == INVALID_REGNUM) \
957+ cfun->machine->frame.wb_candidate2 = (REGNO); \
958+ offset += UNITS_PER_WORD; \
959+ } \
960+ while (0)
961+
962 if (cfun->machine->frame.emit_frame_chain)
963 {
964 /* FP and LR are placed in the linkage record. */
965- cfun->machine->frame.reg_offset[R29_REGNUM] = 0;
966- cfun->machine->frame.wb_candidate1 = R29_REGNUM;
967- cfun->machine->frame.reg_offset[R30_REGNUM] = UNITS_PER_WORD;
968- cfun->machine->frame.wb_candidate2 = R30_REGNUM;
969- offset = 2 * UNITS_PER_WORD;
970+ ALLOCATE_GPR_SLOT (R29_REGNUM);
971+ ALLOCATE_GPR_SLOT (R30_REGNUM);
972 }
973+ else if (flag_stack_clash_protection
974+ && cfun->machine->frame.reg_offset[R30_REGNUM] == SLOT_REQUIRED)
975+ /* Put the LR save slot first, since it makes a good choice of probe
976+ for stack clash purposes. The idea is that the link register usually
977+ has to be saved before a call anyway, and so we lose little by
978+ stopping it from being individually shrink-wrapped. */
979+ ALLOCATE_GPR_SLOT (R30_REGNUM);
980
981 /* With stack-clash, LR must be saved in non-leaf functions. */
982 gcc_assert (crtl->is_leaf
983@@ -4704,14 +4720,9 @@ aarch64_layout_frame (void)
984 /* Now assign stack slots for them. */
985 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
986 if (cfun->machine->frame.reg_offset[regno] == SLOT_REQUIRED)
987- {
988- cfun->machine->frame.reg_offset[regno] = offset;
989- if (cfun->machine->frame.wb_candidate1 == INVALID_REGNUM)
990- cfun->machine->frame.wb_candidate1 = regno;
991- else if (cfun->machine->frame.wb_candidate2 == INVALID_REGNUM)
992- cfun->machine->frame.wb_candidate2 = regno;
993- offset += UNITS_PER_WORD;
994- }
995+ ALLOCATE_GPR_SLOT (regno);
996+
997+#undef ALLOCATE_GPR_SLOT
998
999 HOST_WIDE_INT max_int_offset = offset;
1000 offset = ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT);
1001@@ -5508,16 +5519,9 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2,
1002 HOST_WIDE_INT guard_used_by_caller = STACK_CLASH_CALLER_GUARD;
1003 HOST_WIDE_INT byte_sp_alignment = STACK_BOUNDARY / BITS_PER_UNIT;
1004 gcc_assert (multiple_p (poly_size, byte_sp_alignment));
1005- /* When doing the final adjustment for the outgoing argument size we can't
1006- assume that LR was saved at position 0. So subtract it's offset from the
1007- ABI safe buffer so that we don't accidentally allow an adjustment that
1008- would result in an allocation larger than the ABI buffer without
1009- probing. */
1010 HOST_WIDE_INT min_probe_threshold
1011 = final_adjustment_p
1012- ? (guard_used_by_caller
1013- + byte_sp_alignment
1014- - cfun->machine->frame.reg_offset[LR_REGNUM])
1015+ ? guard_used_by_caller + byte_sp_alignment
1016 : guard_size - guard_used_by_caller;
1017
1018 poly_int64 frame_size = cfun->machine->frame.frame_size;
1019@@ -5697,8 +5701,8 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2,
1020 if (final_adjustment_p && rounded_size != 0)
1021 min_probe_threshold = 0;
1022 /* If doing a small final adjustment, we always probe at offset 0.
1023- This is done to avoid issues when LR is not at position 0 or when
1024- the final adjustment is smaller than the probing offset. */
1025+ This is done to avoid issues when the final adjustment is smaller
1026+ than the probing offset. */
1027 else if (final_adjustment_p && rounded_size == 0)
1028 residual_probe_offset = 0;
1029
1030diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c
1031new file mode 100644
1032index 00000000000..82447d20fff
1033--- /dev/null
1034+++ b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c
1035@@ -0,0 +1,100 @@
1036+/* { dg-options "-O2 -fstack-clash-protection -fomit-frame-pointer --param stack-clash-protection-guard-size=12" } */
1037+/* { dg-final { check-function-bodies "**" "" } } */
1038+
1039+void f(int, ...);
1040+void g();
1041+
1042+/*
1043+** test1:
1044+** ...
1045+** str x30, \[sp\]
1046+** sub sp, sp, #4064
1047+** str xzr, \[sp\]
1048+** cbnz w0, .*
1049+** bl g
1050+** ...
1051+** str x26, \[sp, #?4128\]
1052+** ...
1053+*/
1054+int test1(int z) {
1055+ __uint128_t x = 0;
1056+ int y[0x400];
1057+ if (z)
1058+ {
1059+ asm volatile ("" :::
1060+ "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26");
1061+ f(0, 0, 0, 0, 0, 0, 0, &y,
1062+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
1063+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
1064+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
1065+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
1066+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
1067+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
1068+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
1069+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
1070+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
1071+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
1072+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
1073+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
1074+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
1075+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
1076+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
1077+ x, x, x, x, x, x, x, x, x, x, x, x, x, x);
1078+ }
1079+ g();
1080+ return 1;
1081+}
1082+
1083+/*
1084+** test2:
1085+** ...
1086+** str x30, \[sp\]
1087+** sub sp, sp, #1040
1088+** str xzr, \[sp\]
1089+** cbnz w0, .*
1090+** bl g
1091+** ...
1092+*/
1093+int test2(int z) {
1094+ __uint128_t x = 0;
1095+ int y[0x400];
1096+ if (z)
1097+ {
1098+ asm volatile ("" :::
1099+ "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26");
1100+ f(0, 0, 0, 0, 0, 0, 0, &y,
1101+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
1102+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
1103+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
1104+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
1105+ x);
1106+ }
1107+ g();
1108+ return 1;
1109+}
1110+
1111+/*
1112+** test3:
1113+** ...
1114+** str x30, \[sp\]
1115+** sub sp, sp, #1024
1116+** cbnz w0, .*
1117+** bl g
1118+** ...
1119+*/
1120+int test3(int z) {
1121+ __uint128_t x = 0;
1122+ int y[0x400];
1123+ if (z)
1124+ {
1125+ asm volatile ("" :::
1126+ "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26");
1127+ f(0, 0, 0, 0, 0, 0, 0, &y,
1128+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
1129+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
1130+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
1131+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x);
1132+ }
1133+ g();
1134+ return 1;
1135+}
1136--
11372.34.1
1138
1139
1140From f2684e63652bb251d22c79e40081c646df1f36b6 Mon Sep 17 00:00:00 2001
1141From: Richard Sandiford <richard.sandiford@arm.com>
1142Date: Tue, 8 Aug 2023 01:57:26 +0100
1143Subject: [PATCH 09/10] aarch64: Simplify probe of final frame allocation
1144
1145Previous patches ensured that the final frame allocation only needs
1146a probe when the size is strictly greater than 1KiB. It's therefore
1147safe to use the normal 1024 probe offset in all cases.
1148
1149The main motivation for doing this is to simplify the code and
1150remove the number of special cases.
1151
1152gcc/
1153 * config/aarch64/aarch64.c (aarch64_allocate_and_probe_stack_space):
1154 Always probe the residual allocation at offset 1024, asserting
1155 that that is in range.
1156
1157gcc/testsuite/
1158 * gcc.target/aarch64/stack-check-prologue-17.c: Expect the probe
1159 to be at offset 1024 rather than offset 0.
1160 * gcc.target/aarch64/stack-check-prologue-18.c: Likewise.
1161---
1162 gcc/config/aarch64/aarch64.c | 12 ++++--------
1163 .../gcc.target/aarch64/stack-check-prologue-17.c | 2 +-
1164 .../gcc.target/aarch64/stack-check-prologue-18.c | 7 +++++--
1165 3 files changed, 10 insertions(+), 11 deletions(-)
1166
1167diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
1168index 1e8467fdd03..705f719a2ea 100644
1169--- a/gcc/config/aarch64/aarch64.c
1170+++ b/gcc/config/aarch64/aarch64.c
1171@@ -5695,16 +5695,12 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2,
1172 are still safe. */
1173 if (residual)
1174 {
1175- HOST_WIDE_INT residual_probe_offset = guard_used_by_caller;
1176+ gcc_assert (guard_used_by_caller + byte_sp_alignment <= size);
1177+
1178 /* If we're doing final adjustments, and we've done any full page
1179 allocations then any residual needs to be probed. */
1180 if (final_adjustment_p && rounded_size != 0)
1181 min_probe_threshold = 0;
1182- /* If doing a small final adjustment, we always probe at offset 0.
1183- This is done to avoid issues when the final adjustment is smaller
1184- than the probing offset. */
1185- else if (final_adjustment_p && rounded_size == 0)
1186- residual_probe_offset = 0;
1187
1188 aarch64_sub_sp (temp1, temp2, residual, frame_related_p);
1189 if (residual >= min_probe_threshold)
1190@@ -5715,8 +5711,8 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2,
1191 HOST_WIDE_INT_PRINT_DEC " bytes, probing will be required."
1192 "\n", residual);
1193
1194- emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
1195- residual_probe_offset));
1196+ emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
1197+ guard_used_by_caller));
1198 emit_insn (gen_blockage ());
1199 }
1200 }
1201diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c
1202index 0d8a25d73a2..f0ec1389771 100644
1203--- a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c
1204+++ b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c
1205@@ -33,7 +33,7 @@ int test1(int z) {
1206 ** ...
1207 ** str x30, \[sp\]
1208 ** sub sp, sp, #1040
1209-** str xzr, \[sp\]
1210+** str xzr, \[sp, #?1024\]
1211 ** cbnz w0, .*
1212 ** bl g
1213 ** ...
1214diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c
1215index 82447d20fff..71d33ba34e9 100644
1216--- a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c
1217+++ b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c
1218@@ -8,8 +8,9 @@ void g();
1219 ** test1:
1220 ** ...
1221 ** str x30, \[sp\]
1222+** ...
1223 ** sub sp, sp, #4064
1224-** str xzr, \[sp\]
1225+** str xzr, \[sp, #?1024\]
1226 ** cbnz w0, .*
1227 ** bl g
1228 ** ...
1229@@ -49,8 +50,9 @@ int test1(int z) {
1230 ** test2:
1231 ** ...
1232 ** str x30, \[sp\]
1233+** ...
1234 ** sub sp, sp, #1040
1235-** str xzr, \[sp\]
1236+** str xzr, \[sp, #?1024\]
1237 ** cbnz w0, .*
1238 ** bl g
1239 ** ...
1240@@ -77,6 +79,7 @@ int test2(int z) {
1241 ** test3:
1242 ** ...
1243 ** str x30, \[sp\]
1244+** ...
1245 ** sub sp, sp, #1024
1246 ** cbnz w0, .*
1247 ** bl g
1248--
12492.34.1
1250
1251
1252From bf3eeaa0182a92987570d9c787bd45079eebf528 Mon Sep 17 00:00:00 2001
1253From: Richard Sandiford <richard.sandiford@arm.com>
1254Date: Thu, 15 Jun 2023 19:16:52 +0100
1255Subject: [PATCH 10/10] aarch64: Make stack smash canary protect saved
1256 registers
1257
1258AArch64 normally puts the saved registers near the bottom of the frame,
1259immediately above any dynamic allocations. But this means that a
1260stack-smash attack on those dynamic allocations could overwrite the
1261saved registers without needing to reach as far as the stack smash
1262canary.
1263
1264The same thing could also happen for variable-sized arguments that are
1265passed by value, since those are allocated before a call and popped on
1266return.
1267
1268This patch avoids that by putting the locals (and thus the canary) below
1269the saved registers when stack smash protection is active.
1270
1271The patch fixes CVE-2023-4039.
1272
1273gcc/
1274 * config/aarch64/aarch64.c (aarch64_save_regs_above_locals_p):
1275 New function.
1276 (aarch64_layout_frame): Use it to decide whether locals should
1277 go above or below the saved registers.
1278 (aarch64_expand_prologue): Update stack layout comment.
1279 Emit a stack tie after the final adjustment.
1280
1281gcc/testsuite/
1282 * gcc.target/aarch64/stack-protector-8.c: New test.
1283 * gcc.target/aarch64/stack-protector-9.c: Likewise.
1284---
1285 gcc/config/aarch64/aarch64.c | 46 +++++++++++++--
1286 .../gcc.target/aarch64/stack-protector-8.c | 58 +++++++++++++++++++
1287 .../gcc.target/aarch64/stack-protector-9.c | 33 +++++++++++
1288 3 files changed, 133 insertions(+), 4 deletions(-)
1289 create mode 100644 gcc/testsuite/gcc.target/aarch64/stack-protector-8.c
1290 create mode 100644 gcc/testsuite/gcc.target/aarch64/stack-protector-9.c
1291
1292diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
1293index 705f719a2ea..3d094214fac 100644
1294--- a/gcc/config/aarch64/aarch64.c
1295+++ b/gcc/config/aarch64/aarch64.c
1296@@ -4622,6 +4622,20 @@ aarch64_needs_frame_chain (void)
1297 return aarch64_use_frame_pointer;
1298 }
1299
1300+/* Return true if the current function should save registers above
1301+ the locals area, rather than below it. */
1302+
1303+static bool
1304+aarch64_save_regs_above_locals_p ()
1305+{
1306+ /* When using stack smash protection, make sure that the canary slot
1307+ comes between the locals and the saved registers. Otherwise,
1308+ it would be possible for a carefully sized smash attack to change
1309+ the saved registers (particularly LR and FP) without reaching the
1310+ canary. */
1311+ return crtl->stack_protect_guard;
1312+}
1313+
1314 /* Mark the registers that need to be saved by the callee and calculate
1315 the size of the callee-saved registers area and frame record (both FP
1316 and LR may be omitted). */
1317@@ -4686,6 +4700,16 @@ aarch64_layout_frame (void)
1318
1319 cfun->machine->frame.bytes_below_hard_fp = crtl->outgoing_args_size;
1320
1321+ bool regs_at_top_p = aarch64_save_regs_above_locals_p ();
1322+
1323+ if (regs_at_top_p)
1324+ {
1325+ cfun->machine->frame.bytes_below_hard_fp += get_frame_size ();
1326+ cfun->machine->frame.bytes_below_hard_fp
1327+ = aligned_upper_bound (cfun->machine->frame.bytes_below_hard_fp,
1328+ STACK_BOUNDARY / BITS_PER_UNIT);
1329+ }
1330+
1331 #define ALLOCATE_GPR_SLOT(REGNO) \
1332 do \
1333 { \
1334@@ -4758,9 +4782,11 @@ aarch64_layout_frame (void)
1335 HOST_WIDE_INT varargs_and_saved_regs_size
1336 = offset + cfun->machine->frame.saved_varargs_size;
1337
1338+ cfun->machine->frame.bytes_above_hard_fp = varargs_and_saved_regs_size;
1339+ if (!regs_at_top_p)
1340+ cfun->machine->frame.bytes_above_hard_fp += get_frame_size ();
1341 cfun->machine->frame.bytes_above_hard_fp
1342- = aligned_upper_bound (varargs_and_saved_regs_size
1343- + get_frame_size (),
1344+ = aligned_upper_bound (cfun->machine->frame.bytes_above_hard_fp,
1345 STACK_BOUNDARY / BITS_PER_UNIT);
1346
1347 /* Both these values are already aligned. */
1348@@ -4772,6 +4798,9 @@ aarch64_layout_frame (void)
1349
1350 cfun->machine->frame.bytes_above_locals
1351 = cfun->machine->frame.saved_varargs_size;
1352+ if (regs_at_top_p)
1353+ cfun->machine->frame.bytes_above_locals
1354+ += cfun->machine->frame.saved_regs_size;
1355
1356 cfun->machine->frame.initial_adjust = 0;
1357 cfun->machine->frame.final_adjust = 0;
1358@@ -5764,10 +5793,10 @@ aarch64_add_cfa_expression (rtx_insn *insn, unsigned int reg,
1359 | for register varargs |
1360 | |
1361 +-------------------------------+
1362- | local variables | <-- frame_pointer_rtx
1363+ | local variables (1) | <-- frame_pointer_rtx
1364 | |
1365 +-------------------------------+
1366- | padding | \
1367+ | padding (1) | \
1368 +-------------------------------+ |
1369 | callee-saved registers | | frame.saved_regs_size
1370 +-------------------------------+ |
1371@@ -5775,6 +5804,10 @@ aarch64_add_cfa_expression (rtx_insn *insn, unsigned int reg,
1372 +-------------------------------+ |
1373 | FP' | / <- hard_frame_pointer_rtx (aligned)
1374 +-------------------------------+
1375+ | local variables (2) |
1376+ +-------------------------------+
1377+ | padding (2) |
1378+ +-------------------------------+
1379 | dynamic allocation |
1380 +-------------------------------+
1381 | padding |
1382@@ -5784,6 +5817,9 @@ aarch64_add_cfa_expression (rtx_insn *insn, unsigned int reg,
1383 +-------------------------------+
1384 | | <-- stack_pointer_rtx (aligned)
1385
1386+ The regions marked (1) and (2) are mutually exclusive. (2) is used
1387+ when aarch64_save_regs_above_locals_p is true.
1388+
1389 Dynamic stack allocations via alloca() decrease stack_pointer_rtx
1390 but leave frame_pointer_rtx and hard_frame_pointer_rtx
1391 unchanged.
1392@@ -5937,6 +5973,8 @@ aarch64_expand_prologue (void)
1393 that is assumed by the called. */
1394 aarch64_allocate_and_probe_stack_space (tmp1_rtx, tmp0_rtx, final_adjust,
1395 !frame_pointer_needed, true);
1396+ if (emit_frame_chain && maybe_ne (final_adjust, 0))
1397+ emit_insn (gen_stack_tie (stack_pointer_rtx, hard_frame_pointer_rtx));
1398 }
1399
1400 /* Return TRUE if we can use a simple_return insn.
1401diff --git a/gcc/testsuite/gcc.target/aarch64/stack-protector-8.c b/gcc/testsuite/gcc.target/aarch64/stack-protector-8.c
1402new file mode 100644
1403index 00000000000..c5e7deef6c1
1404--- /dev/null
1405+++ b/gcc/testsuite/gcc.target/aarch64/stack-protector-8.c
1406@@ -0,0 +1,58 @@
1407+/* { dg-options " -O -fstack-protector-strong -mstack-protector-guard=sysreg -mstack-protector-guard-reg=tpidr2_el0 -mstack-protector-guard-offset=16" } */
1408+/* { dg-final { check-function-bodies "**" "" } } */
1409+
1410+void g(void *);
1411+
1412+/*
1413+** test1:
1414+** sub sp, sp, #288
1415+** stp x29, x30, \[sp, #?272\]
1416+** add x29, sp, #?272
1417+** mrs (x[0-9]+), tpidr2_el0
1418+** ldr (x[0-9]+), \[\1, #?16\]
1419+** str \2, \[sp, #?264\]
1420+** mov \2, *0
1421+** add x0, sp, #?8
1422+** bl g
1423+** ...
1424+** mrs .*
1425+** ...
1426+** bne .*
1427+** ...
1428+** ldp x29, x30, \[sp, #?272\]
1429+** add sp, sp, #?288
1430+** ret
1431+** bl __stack_chk_fail
1432+*/
1433+int test1() {
1434+ int y[0x40];
1435+ g(y);
1436+ return 1;
1437+}
1438+
1439+/*
1440+** test2:
1441+** stp x29, x30, \[sp, #?-16\]!
1442+** mov x29, sp
1443+** sub sp, sp, #1040
1444+** mrs (x[0-9]+), tpidr2_el0
1445+** ldr (x[0-9]+), \[\1, #?16\]
1446+** str \2, \[sp, #?1032\]
1447+** mov \2, *0
1448+** add x0, sp, #?8
1449+** bl g
1450+** ...
1451+** mrs .*
1452+** ...
1453+** bne .*
1454+** ...
1455+** add sp, sp, #?1040
1456+** ldp x29, x30, \[sp\], #?16
1457+** ret
1458+** bl __stack_chk_fail
1459+*/
1460+int test2() {
1461+ int y[0x100];
1462+ g(y);
1463+ return 1;
1464+}
1465diff --git a/gcc/testsuite/gcc.target/aarch64/stack-protector-9.c b/gcc/testsuite/gcc.target/aarch64/stack-protector-9.c
1466new file mode 100644
1467index 00000000000..58f322aa480
1468--- /dev/null
1469+++ b/gcc/testsuite/gcc.target/aarch64/stack-protector-9.c
1470@@ -0,0 +1,33 @@
1471+/* { dg-options "-O2 -mcpu=neoverse-v1 -fstack-protector-all" } */
1472+/* { dg-final { check-function-bodies "**" "" } } */
1473+
1474+/*
1475+** main:
1476+** ...
1477+** stp x29, x30, \[sp, #?-[0-9]+\]!
1478+** ...
1479+** sub sp, sp, #[0-9]+
1480+** ...
1481+** str x[0-9]+, \[x29, #?-8\]
1482+** ...
1483+*/
1484+int f(const char *);
1485+void g(void *);
1486+int main(int argc, char* argv[])
1487+{
1488+ int a;
1489+ int b;
1490+ char c[2+f(argv[1])];
1491+ int d[0x100];
1492+ char y;
1493+
1494+ y=42; a=4; b=10;
1495+ c[0] = 'h'; c[1] = '\0';
1496+
1497+ c[f(argv[2])] = '\0';
1498+
1499+ __builtin_printf("%d %d\n%s\n", a, b, c);
1500+ g(d);
1501+
1502+ return 0;
1503+}
1504--
15052.34.1
1506