diff options
Diffstat (limited to 'meta/recipes-devtools/gcc/gcc/0003-aarch64-Mitigate-SLS-for-BLR-instruction.patch')
-rw-r--r-- | meta/recipes-devtools/gcc/gcc/0003-aarch64-Mitigate-SLS-for-BLR-instruction.patch | 658 |
1 files changed, 658 insertions, 0 deletions
diff --git a/meta/recipes-devtools/gcc/gcc/0003-aarch64-Mitigate-SLS-for-BLR-instruction.patch b/meta/recipes-devtools/gcc/gcc/0003-aarch64-Mitigate-SLS-for-BLR-instruction.patch new file mode 100644 index 0000000000..716a367172 --- /dev/null +++ b/meta/recipes-devtools/gcc/gcc/0003-aarch64-Mitigate-SLS-for-BLR-instruction.patch | |||
@@ -0,0 +1,658 @@ | |||
1 | Upstream-Status: Backport | ||
2 | Signed-off-by: Ross Burton <ross.burton@arm.com> | ||
3 | |||
4 | From a5e7efc40ed841934c1d913f39476afa17d8e5f7 Mon Sep 17 00:00:00 2001 | ||
5 | From: Matthew Malcomson <matthew.malcomson@arm.com> | ||
6 | Date: Thu, 9 Jul 2020 09:11:59 +0100 | ||
7 | Subject: [PATCH 3/3] aarch64: Mitigate SLS for BLR instruction | ||
8 | |||
9 | This patch introduces the mitigation for Straight Line Speculation past | ||
10 | the BLR instruction. | ||
11 | |||
12 | This mitigation replaces BLR instructions with a BL to a stub which uses | ||
13 | a BR to jump to the original value. These function stubs are then | ||
14 | appended with a speculation barrier to ensure no straight line | ||
15 | speculation happens after these jumps. | ||
16 | |||
17 | When optimising for speed we use a set of stubs for each function since | ||
18 | this should help the branch predictor make more accurate predictions | ||
19 | about where a stub should branch. | ||
20 | |||
21 | When optimising for size we use one set of stubs for all functions. | ||
22 | This set of stubs can have human readable names, and we are using | ||
23 | `__call_indirect_x<N>` for register x<N>. | ||
24 | |||
25 | When BTI branch protection is enabled the BLR instruction can jump to a | ||
26 | `BTI c` instruction using any register, while the BR instruction can | ||
27 | only jump to a `BTI c` instruction using the x16 or x17 registers. | ||
28 | Hence, in order to ensure this transformation is safe we mov the value | ||
29 | of the original register into x16 and use x16 for the BR. | ||
30 | |||
31 | As an example when optimising for size: | ||
32 | a | ||
33 | BLR x0 | ||
34 | instruction would get transformed to something like | ||
35 | BL __call_indirect_x0 | ||
36 | where __call_indirect_x0 labels a thunk that contains | ||
37 | __call_indirect_x0: | ||
38 | MOV X16, X0 | ||
39 | BR X16 | ||
40 | <speculation barrier> | ||
41 | |||
42 | The first version of this patch used local symbols specific to a | ||
43 | compilation unit to try and avoid relocations. | ||
44 | This was mistaken since functions coming from the same compilation unit | ||
45 | can still be in different sections, and the assembler will insert | ||
46 | relocations at jumps between sections. | ||
47 | |||
48 | On any relocation the linker is permitted to emit a veneer to handle | ||
49 | jumps between symbols that are very far apart. The registers x16 and | ||
50 | x17 may be clobbered by these veneers. | ||
51 | Hence the function stubs cannot rely on the values of x16 and x17 being | ||
52 | the same as just before the function stub is called. | ||
53 | |||
54 | Similar can be said for the hot/cold partitioning of single functions, | ||
55 | so function-local stubs have the same restriction. | ||
56 | |||
57 | This updated version of the patch never emits function stubs for x16 and | ||
58 | x17, and instead forces other registers to be used. | ||
59 | |||
60 | Given the above, there is now no benefit to local symbols (since they | ||
61 | are not enough to avoid dealing with linker intricacies). This patch | ||
62 | now uses global symbols with hidden visibility each stored in their own | ||
63 | COMDAT section. This means stubs can be shared between compilation | ||
64 | units while still avoiding the PLT indirection. | ||
65 | |||
66 | This patch also removes the `__call_indirect_x30` stub (and | ||
67 | function-local equivalent) which would simply jump back to the original | ||
68 | location. | ||
69 | |||
70 | The function-local stubs are emitted to the assembly output file in one | ||
71 | chunk, which means we need not add the speculation barrier directly | ||
72 | after each one. | ||
73 | This is because we know for certain that the instructions directly after | ||
74 | the BR in all but the last function stub will be from another one of | ||
75 | these stubs and hence will not contain a speculation gadget. | ||
76 | Instead we add a speculation barrier at the end of the sequence of | ||
77 | stubs. | ||
78 | |||
79 | The global stubs are emitted in COMDAT/.linkonce sections by | ||
80 | themselves so that the linker can remove duplicates from multiple object | ||
81 | files. This means they are not emitted in one chunk, and each one must | ||
82 | include the speculation barrier. | ||
83 | |||
84 | Another difference is that since the global stubs are shared across | ||
85 | compilation units we do not know that all functions will be targeting an | ||
86 | architecture supporting the SB instruction. | ||
87 | Rather than provide multiple stubs for each architecture, we provide a | ||
88 | stub that will work for all architectures -- using the DSB+ISB barrier. | ||
89 | |||
90 | This mitigation does not apply for BLR instructions in the following | ||
91 | places: | ||
92 | - Some accesses to thread-local variables use a code sequence with a BLR | ||
93 | instruction. This code sequence is part of the binary interface between | ||
94 | compiler and linker. If this BLR instruction needs to be mitigated, it'd | ||
95 | probably be best to do so in the linker. It seems that the code sequence | ||
96 | for thread-local variable access is unlikely to lead to a Spectre Revalation | ||
97 | Gadget. | ||
98 | - PLT stubs are produced by the linker and each contain a BLR instruction. | ||
99 | It seems that at most only after the last PLT stub a Spectre Revalation | ||
100 | Gadget might appear. | ||
101 | |||
102 | Testing: | ||
103 | Bootstrap and regtest on AArch64 | ||
104 | (with BOOT_CFLAGS="-mharden-sls=retbr,blr") | ||
105 | Used a temporary hack(1) in gcc-dg.exp to use these options on every | ||
106 | test in the testsuite, a slight modification to emit the speculation | ||
107 | barrier after every function stub, and a script to check that the | ||
108 | output never emitted a BLR, or unmitigated BR or RET instruction. | ||
109 | Similar on an aarch64-none-elf cross-compiler. | ||
110 | |||
111 | 1) Temporary hack emitted a speculation barrier at the end of every stub | ||
112 | function, and used a script to ensure that: | ||
113 | a) Every RET or BR is immediately followed by a speculation barrier. | ||
114 | b) No BLR instruction is emitted by compiler. | ||
115 | |||
116 | gcc/ChangeLog: | ||
117 | |||
118 | * config/aarch64/aarch64-protos.h (aarch64_indirect_call_asm): | ||
119 | New declaration. | ||
120 | * config/aarch64/aarch64.c (aarch64_regno_regclass): Handle new | ||
121 | stub registers class. | ||
122 | (aarch64_class_max_nregs): Likewise. | ||
123 | (aarch64_register_move_cost): Likewise. | ||
124 | (aarch64_sls_shared_thunks): Global array to store stub labels. | ||
125 | (aarch64_sls_emit_function_stub): New. | ||
126 | (aarch64_create_blr_label): New. | ||
127 | (aarch64_sls_emit_blr_function_thunks): New. | ||
128 | (aarch64_sls_emit_shared_blr_thunks): New. | ||
129 | (aarch64_asm_file_end): New. | ||
130 | (aarch64_indirect_call_asm): New. | ||
131 | (TARGET_ASM_FILE_END): Use aarch64_asm_file_end. | ||
132 | (TARGET_ASM_FUNCTION_EPILOGUE): Use | ||
133 | aarch64_sls_emit_blr_function_thunks. | ||
134 | * config/aarch64/aarch64.h (STB_REGNUM_P): New. | ||
135 | (enum reg_class): Add STUB_REGS class. | ||
136 | (machine_function): Introduce `call_via` array for | ||
137 | function-local stub labels. | ||
138 | * config/aarch64/aarch64.md (*call_insn, *call_value_insn): Use | ||
139 | aarch64_indirect_call_asm to emit code when hardening BLR | ||
140 | instructions. | ||
141 | * config/aarch64/constraints.md (Ucr): New constraint | ||
142 | representing registers for indirect calls. Is GENERAL_REGS | ||
143 | usually, and STUB_REGS when hardening BLR instruction against | ||
144 | SLS. | ||
145 | * config/aarch64/predicates.md (aarch64_general_reg): STUB_REGS class | ||
146 | is also a general register. | ||
147 | |||
148 | gcc/testsuite/ChangeLog: | ||
149 | |||
150 | * gcc.target/aarch64/sls-mitigation/sls-miti-blr-bti.c: New test. | ||
151 | * gcc.target/aarch64/sls-mitigation/sls-miti-blr.c: New test. | ||
152 | --- | ||
153 | gcc/config/aarch64/aarch64-protos.h | 1 + | ||
154 | gcc/config/aarch64/aarch64.c | 225 ++++++++++++++++++++- | ||
155 | gcc/config/aarch64/aarch64.h | 15 ++ | ||
156 | gcc/config/aarch64/aarch64.md | 11 +- | ||
157 | gcc/config/aarch64/constraints.md | 9 + | ||
158 | gcc/config/aarch64/predicates.md | 3 +- | ||
159 | .../aarch64/sls-mitigation/sls-miti-blr-bti.c | 40 ++++ | ||
160 | .../aarch64/sls-mitigation/sls-miti-blr.c | 33 +++ | ||
161 | 8 files changed, 328 insertions(+), 9 deletions(-) | ||
162 | create mode 100644 gcc/testsuite/gcc.target/aarch64/sls-mitigation/sls-miti-blr-bti.c | ||
163 | create mode 100644 gcc/testsuite/gcc.target/aarch64/sls-mitigation/sls-miti-blr.c | ||
164 | |||
165 | diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h | ||
166 | index ee0ffde..839f801 100644 | ||
167 | --- a/gcc/config/aarch64/aarch64-protos.h | ||
168 | +++ b/gcc/config/aarch64/aarch64-protos.h | ||
169 | @@ -782,6 +782,7 @@ extern const atomic_ool_names aarch64_ool_ldeor_names; | ||
170 | tree aarch64_resolve_overloaded_builtin_general (location_t, tree, void *); | ||
171 | |||
172 | const char *aarch64_sls_barrier (int); | ||
173 | +const char *aarch64_indirect_call_asm (rtx); | ||
174 | extern bool aarch64_harden_sls_retbr_p (void); | ||
175 | extern bool aarch64_harden_sls_blr_p (void); | ||
176 | |||
177 | diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c | ||
178 | index 2389d49..0f7bba3 100644 | ||
179 | --- a/gcc/config/aarch64/aarch64.c | ||
180 | +++ b/gcc/config/aarch64/aarch64.c | ||
181 | @@ -10605,6 +10605,9 @@ aarch64_label_mentioned_p (rtx x) | ||
182 | enum reg_class | ||
183 | aarch64_regno_regclass (unsigned regno) | ||
184 | { | ||
185 | + if (STUB_REGNUM_P (regno)) | ||
186 | + return STUB_REGS; | ||
187 | + | ||
188 | if (GP_REGNUM_P (regno)) | ||
189 | return GENERAL_REGS; | ||
190 | |||
191 | @@ -10939,6 +10942,7 @@ aarch64_class_max_nregs (reg_class_t regclass, machine_mode mode) | ||
192 | unsigned int nregs, vec_flags; | ||
193 | switch (regclass) | ||
194 | { | ||
195 | + case STUB_REGS: | ||
196 | case TAILCALL_ADDR_REGS: | ||
197 | case POINTER_REGS: | ||
198 | case GENERAL_REGS: | ||
199 | @@ -13155,10 +13159,12 @@ aarch64_register_move_cost (machine_mode mode, | ||
200 | = aarch64_tune_params.regmove_cost; | ||
201 | |||
202 | /* Caller save and pointer regs are equivalent to GENERAL_REGS. */ | ||
203 | - if (to == TAILCALL_ADDR_REGS || to == POINTER_REGS) | ||
204 | + if (to == TAILCALL_ADDR_REGS || to == POINTER_REGS | ||
205 | + || to == STUB_REGS) | ||
206 | to = GENERAL_REGS; | ||
207 | |||
208 | - if (from == TAILCALL_ADDR_REGS || from == POINTER_REGS) | ||
209 | + if (from == TAILCALL_ADDR_REGS || from == POINTER_REGS | ||
210 | + || from == STUB_REGS) | ||
211 | from = GENERAL_REGS; | ||
212 | |||
213 | /* Make RDFFR very expensive. In particular, if we know that the FFR | ||
214 | @@ -22957,6 +22963,215 @@ aarch64_sls_barrier (int mitigation_required) | ||
215 | : ""; | ||
216 | } | ||
217 | |||
218 | +static GTY (()) tree aarch64_sls_shared_thunks[30]; | ||
219 | +static GTY (()) bool aarch64_sls_shared_thunks_needed = false; | ||
220 | +const char *indirect_symbol_names[30] = { | ||
221 | + "__call_indirect_x0", | ||
222 | + "__call_indirect_x1", | ||
223 | + "__call_indirect_x2", | ||
224 | + "__call_indirect_x3", | ||
225 | + "__call_indirect_x4", | ||
226 | + "__call_indirect_x5", | ||
227 | + "__call_indirect_x6", | ||
228 | + "__call_indirect_x7", | ||
229 | + "__call_indirect_x8", | ||
230 | + "__call_indirect_x9", | ||
231 | + "__call_indirect_x10", | ||
232 | + "__call_indirect_x11", | ||
233 | + "__call_indirect_x12", | ||
234 | + "__call_indirect_x13", | ||
235 | + "__call_indirect_x14", | ||
236 | + "__call_indirect_x15", | ||
237 | + "", /* "__call_indirect_x16", */ | ||
238 | + "", /* "__call_indirect_x17", */ | ||
239 | + "__call_indirect_x18", | ||
240 | + "__call_indirect_x19", | ||
241 | + "__call_indirect_x20", | ||
242 | + "__call_indirect_x21", | ||
243 | + "__call_indirect_x22", | ||
244 | + "__call_indirect_x23", | ||
245 | + "__call_indirect_x24", | ||
246 | + "__call_indirect_x25", | ||
247 | + "__call_indirect_x26", | ||
248 | + "__call_indirect_x27", | ||
249 | + "__call_indirect_x28", | ||
250 | + "__call_indirect_x29", | ||
251 | +}; | ||
252 | + | ||
253 | +/* Function to create a BLR thunk. This thunk is used to mitigate straight | ||
254 | + line speculation. Instead of a simple BLR that can be speculated past, | ||
255 | + we emit a BL to this thunk, and this thunk contains a BR to the relevant | ||
256 | + register. These thunks have the relevant speculation barries put after | ||
257 | + their indirect branch so that speculation is blocked. | ||
258 | + | ||
259 | + We use such a thunk so the speculation barriers are kept off the | ||
260 | + architecturally executed path in order to reduce the performance overhead. | ||
261 | + | ||
262 | + When optimizing for size we use stubs shared by the linked object. | ||
263 | + When optimizing for performance we emit stubs for each function in the hope | ||
264 | + that the branch predictor can better train on jumps specific for a given | ||
265 | + function. */ | ||
266 | +rtx | ||
267 | +aarch64_sls_create_blr_label (int regnum) | ||
268 | +{ | ||
269 | + gcc_assert (STUB_REGNUM_P (regnum)); | ||
270 | + if (optimize_function_for_size_p (cfun)) | ||
271 | + { | ||
272 | + /* For the thunks shared between different functions in this compilation | ||
273 | + unit we use a named symbol -- this is just for users to more easily | ||
274 | + understand the generated assembly. */ | ||
275 | + aarch64_sls_shared_thunks_needed = true; | ||
276 | + const char *thunk_name = indirect_symbol_names[regnum]; | ||
277 | + if (aarch64_sls_shared_thunks[regnum] == NULL) | ||
278 | + { | ||
279 | + /* Build a decl representing this function stub and record it for | ||
280 | + later. We build a decl here so we can use the GCC machinery for | ||
281 | + handling sections automatically (through `get_named_section` and | ||
282 | + `make_decl_one_only`). That saves us a lot of trouble handling | ||
283 | + the specifics of different output file formats. */ | ||
284 | + tree decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, | ||
285 | + get_identifier (thunk_name), | ||
286 | + build_function_type_list (void_type_node, | ||
287 | + NULL_TREE)); | ||
288 | + DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL, | ||
289 | + NULL_TREE, void_type_node); | ||
290 | + TREE_PUBLIC (decl) = 1; | ||
291 | + TREE_STATIC (decl) = 1; | ||
292 | + DECL_IGNORED_P (decl) = 1; | ||
293 | + DECL_ARTIFICIAL (decl) = 1; | ||
294 | + make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl)); | ||
295 | + resolve_unique_section (decl, 0, false); | ||
296 | + aarch64_sls_shared_thunks[regnum] = decl; | ||
297 | + } | ||
298 | + | ||
299 | + return gen_rtx_SYMBOL_REF (Pmode, thunk_name); | ||
300 | + } | ||
301 | + | ||
302 | + if (cfun->machine->call_via[regnum] == NULL) | ||
303 | + cfun->machine->call_via[regnum] | ||
304 | + = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ()); | ||
305 | + return cfun->machine->call_via[regnum]; | ||
306 | +} | ||
307 | + | ||
308 | +/* Helper function for aarch64_sls_emit_blr_function_thunks and | ||
309 | + aarch64_sls_emit_shared_blr_thunks below. */ | ||
310 | +static void | ||
311 | +aarch64_sls_emit_function_stub (FILE *out_file, int regnum) | ||
312 | +{ | ||
313 | + /* Save in x16 and branch to that function so this transformation does | ||
314 | + not prevent jumping to `BTI c` instructions. */ | ||
315 | + asm_fprintf (out_file, "\tmov\tx16, x%d\n", regnum); | ||
316 | + asm_fprintf (out_file, "\tbr\tx16\n"); | ||
317 | +} | ||
318 | + | ||
319 | +/* Emit all BLR stubs for this particular function. | ||
320 | + Here we emit all the BLR stubs needed for the current function. Since we | ||
321 | + emit these stubs in a consecutive block we know there will be no speculation | ||
322 | + gadgets between each stub, and hence we only emit a speculation barrier at | ||
323 | + the end of the stub sequences. | ||
324 | + | ||
325 | + This is called in the TARGET_ASM_FUNCTION_EPILOGUE hook. */ | ||
326 | +void | ||
327 | +aarch64_sls_emit_blr_function_thunks (FILE *out_file) | ||
328 | +{ | ||
329 | + if (! aarch64_harden_sls_blr_p ()) | ||
330 | + return; | ||
331 | + | ||
332 | + bool any_functions_emitted = false; | ||
333 | + /* We must save and restore the current function section since this assembly | ||
334 | + is emitted at the end of the function. This means it can be emitted *just | ||
335 | + after* the cold section of a function. That cold part would be emitted in | ||
336 | + a different section. That switch would trigger a `.cfi_endproc` directive | ||
337 | + to be emitted in the original section and a `.cfi_startproc` directive to | ||
338 | + be emitted in the new section. Switching to the original section without | ||
339 | + restoring would mean that the `.cfi_endproc` emitted as a function ends | ||
340 | + would happen in a different section -- leaving an unmatched | ||
341 | + `.cfi_startproc` in the cold text section and an unmatched `.cfi_endproc` | ||
342 | + in the standard text section. */ | ||
343 | + section *save_text_section = in_section; | ||
344 | + switch_to_section (function_section (current_function_decl)); | ||
345 | + for (int regnum = 0; regnum < 30; ++regnum) | ||
346 | + { | ||
347 | + rtx specu_label = cfun->machine->call_via[regnum]; | ||
348 | + if (specu_label == NULL) | ||
349 | + continue; | ||
350 | + | ||
351 | + targetm.asm_out.print_operand (out_file, specu_label, 0); | ||
352 | + asm_fprintf (out_file, ":\n"); | ||
353 | + aarch64_sls_emit_function_stub (out_file, regnum); | ||
354 | + any_functions_emitted = true; | ||
355 | + } | ||
356 | + if (any_functions_emitted) | ||
357 | + /* Can use the SB if needs be here, since this stub will only be used | ||
358 | + by the current function, and hence for the current target. */ | ||
359 | + asm_fprintf (out_file, "\t%s\n", aarch64_sls_barrier (true)); | ||
360 | + switch_to_section (save_text_section); | ||
361 | +} | ||
362 | + | ||
363 | +/* Emit shared BLR stubs for the current compilation unit. | ||
364 | + Over the course of compiling this unit we may have converted some BLR | ||
365 | + instructions to a BL to a shared stub function. This is where we emit those | ||
366 | + stub functions. | ||
367 | + This function is for the stubs shared between different functions in this | ||
368 | + compilation unit. We share when optimizing for size instead of speed. | ||
369 | + | ||
370 | + This function is called through the TARGET_ASM_FILE_END hook. */ | ||
371 | +void | ||
372 | +aarch64_sls_emit_shared_blr_thunks (FILE *out_file) | ||
373 | +{ | ||
374 | + if (! aarch64_sls_shared_thunks_needed) | ||
375 | + return; | ||
376 | + | ||
377 | + for (int regnum = 0; regnum < 30; ++regnum) | ||
378 | + { | ||
379 | + tree decl = aarch64_sls_shared_thunks[regnum]; | ||
380 | + if (!decl) | ||
381 | + continue; | ||
382 | + | ||
383 | + const char *name = indirect_symbol_names[regnum]; | ||
384 | + switch_to_section (get_named_section (decl, NULL, 0)); | ||
385 | + ASM_OUTPUT_ALIGN (out_file, 2); | ||
386 | + targetm.asm_out.globalize_label (out_file, name); | ||
387 | + /* Only emits if the compiler is configured for an assembler that can | ||
388 | + handle visibility directives. */ | ||
389 | + targetm.asm_out.assemble_visibility (decl, VISIBILITY_HIDDEN); | ||
390 | + ASM_OUTPUT_TYPE_DIRECTIVE (out_file, name, "function"); | ||
391 | + ASM_OUTPUT_LABEL (out_file, name); | ||
392 | + aarch64_sls_emit_function_stub (out_file, regnum); | ||
393 | + /* Use the most conservative target to ensure it can always be used by any | ||
394 | + function in the translation unit. */ | ||
395 | + asm_fprintf (out_file, "\tdsb\tsy\n\tisb\n"); | ||
396 | + ASM_DECLARE_FUNCTION_SIZE (out_file, name, decl); | ||
397 | + } | ||
398 | +} | ||
399 | + | ||
400 | +/* Implement TARGET_ASM_FILE_END. */ | ||
401 | +void | ||
402 | +aarch64_asm_file_end () | ||
403 | +{ | ||
404 | + aarch64_sls_emit_shared_blr_thunks (asm_out_file); | ||
405 | + /* Since this function will be called for the ASM_FILE_END hook, we ensure | ||
406 | + that what would be called otherwise (e.g. `file_end_indicate_exec_stack` | ||
407 | + for FreeBSD) still gets called. */ | ||
408 | +#ifdef TARGET_ASM_FILE_END | ||
409 | + TARGET_ASM_FILE_END (); | ||
410 | +#endif | ||
411 | +} | ||
412 | + | ||
413 | +const char * | ||
414 | +aarch64_indirect_call_asm (rtx addr) | ||
415 | +{ | ||
416 | + gcc_assert (REG_P (addr)); | ||
417 | + if (aarch64_harden_sls_blr_p ()) | ||
418 | + { | ||
419 | + rtx stub_label = aarch64_sls_create_blr_label (REGNO (addr)); | ||
420 | + output_asm_insn ("bl\t%0", &stub_label); | ||
421 | + } | ||
422 | + else | ||
423 | + output_asm_insn ("blr\t%0", &addr); | ||
424 | + return ""; | ||
425 | +} | ||
426 | + | ||
427 | /* Target-specific selftests. */ | ||
428 | |||
429 | #if CHECKING_P | ||
430 | @@ -23507,6 +23722,12 @@ aarch64_libgcc_floating_mode_supported_p | ||
431 | #undef TARGET_MD_ASM_ADJUST | ||
432 | #define TARGET_MD_ASM_ADJUST arm_md_asm_adjust | ||
433 | |||
434 | +#undef TARGET_ASM_FILE_END | ||
435 | +#define TARGET_ASM_FILE_END aarch64_asm_file_end | ||
436 | + | ||
437 | +#undef TARGET_ASM_FUNCTION_EPILOGUE | ||
438 | +#define TARGET_ASM_FUNCTION_EPILOGUE aarch64_sls_emit_blr_function_thunks | ||
439 | + | ||
440 | struct gcc_target targetm = TARGET_INITIALIZER; | ||
441 | |||
442 | #include "gt-aarch64.h" | ||
443 | diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h | ||
444 | index 8e0fc37..7331450 100644 | ||
445 | --- a/gcc/config/aarch64/aarch64.h | ||
446 | +++ b/gcc/config/aarch64/aarch64.h | ||
447 | @@ -643,6 +643,16 @@ extern unsigned aarch64_architecture_version; | ||
448 | #define GP_REGNUM_P(REGNO) \ | ||
449 | (((unsigned) (REGNO - R0_REGNUM)) <= (R30_REGNUM - R0_REGNUM)) | ||
450 | |||
451 | +/* Registers known to be preserved over a BL instruction. This consists of the | ||
452 | + GENERAL_REGS without x16, x17, and x30. The x30 register is changed by the | ||
453 | + BL instruction itself, while the x16 and x17 registers may be used by | ||
454 | + veneers which can be inserted by the linker. */ | ||
455 | +#define STUB_REGNUM_P(REGNO) \ | ||
456 | + (GP_REGNUM_P (REGNO) \ | ||
457 | + && (REGNO) != R16_REGNUM \ | ||
458 | + && (REGNO) != R17_REGNUM \ | ||
459 | + && (REGNO) != R30_REGNUM) \ | ||
460 | + | ||
461 | #define FP_REGNUM_P(REGNO) \ | ||
462 | (((unsigned) (REGNO - V0_REGNUM)) <= (V31_REGNUM - V0_REGNUM)) | ||
463 | |||
464 | @@ -667,6 +677,7 @@ enum reg_class | ||
465 | { | ||
466 | NO_REGS, | ||
467 | TAILCALL_ADDR_REGS, | ||
468 | + STUB_REGS, | ||
469 | GENERAL_REGS, | ||
470 | STACK_REG, | ||
471 | POINTER_REGS, | ||
472 | @@ -689,6 +700,7 @@ enum reg_class | ||
473 | { \ | ||
474 | "NO_REGS", \ | ||
475 | "TAILCALL_ADDR_REGS", \ | ||
476 | + "STUB_REGS", \ | ||
477 | "GENERAL_REGS", \ | ||
478 | "STACK_REG", \ | ||
479 | "POINTER_REGS", \ | ||
480 | @@ -708,6 +720,7 @@ enum reg_class | ||
481 | { \ | ||
482 | { 0x00000000, 0x00000000, 0x00000000 }, /* NO_REGS */ \ | ||
483 | { 0x00030000, 0x00000000, 0x00000000 }, /* TAILCALL_ADDR_REGS */\ | ||
484 | + { 0x3ffcffff, 0x00000000, 0x00000000 }, /* STUB_REGS */ \ | ||
485 | { 0x7fffffff, 0x00000000, 0x00000003 }, /* GENERAL_REGS */ \ | ||
486 | { 0x80000000, 0x00000000, 0x00000000 }, /* STACK_REG */ \ | ||
487 | { 0xffffffff, 0x00000000, 0x00000003 }, /* POINTER_REGS */ \ | ||
488 | @@ -862,6 +875,8 @@ typedef struct GTY (()) machine_function | ||
489 | struct aarch64_frame frame; | ||
490 | /* One entry for each hard register. */ | ||
491 | bool reg_is_wrapped_separately[LAST_SAVED_REGNUM]; | ||
492 | + /* One entry for each general purpose register. */ | ||
493 | + rtx call_via[SP_REGNUM]; | ||
494 | bool label_is_assembled; | ||
495 | } machine_function; | ||
496 | #endif | ||
497 | diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md | ||
498 | index dda04ee..43da754 100644 | ||
499 | --- a/gcc/config/aarch64/aarch64.md | ||
500 | +++ b/gcc/config/aarch64/aarch64.md | ||
501 | @@ -1022,16 +1022,15 @@ | ||
502 | ) | ||
503 | |||
504 | (define_insn "*call_insn" | ||
505 | - [(call (mem:DI (match_operand:DI 0 "aarch64_call_insn_operand" "r, Usf")) | ||
506 | + [(call (mem:DI (match_operand:DI 0 "aarch64_call_insn_operand" "Ucr, Usf")) | ||
507 | (match_operand 1 "" "")) | ||
508 | (unspec:DI [(match_operand:DI 2 "const_int_operand")] UNSPEC_CALLEE_ABI) | ||
509 | (clobber (reg:DI LR_REGNUM))] | ||
510 | "" | ||
511 | "@ | ||
512 | - blr\\t%0 | ||
513 | + * return aarch64_indirect_call_asm (operands[0]); | ||
514 | bl\\t%c0" | ||
515 | - [(set_attr "type" "call, call")] | ||
516 | -) | ||
517 | + [(set_attr "type" "call, call")]) | ||
518 | |||
519 | (define_expand "call_value" | ||
520 | [(parallel | ||
521 | @@ -1050,13 +1049,13 @@ | ||
522 | |||
523 | (define_insn "*call_value_insn" | ||
524 | [(set (match_operand 0 "" "") | ||
525 | - (call (mem:DI (match_operand:DI 1 "aarch64_call_insn_operand" "r, Usf")) | ||
526 | + (call (mem:DI (match_operand:DI 1 "aarch64_call_insn_operand" "Ucr, Usf")) | ||
527 | (match_operand 2 "" ""))) | ||
528 | (unspec:DI [(match_operand:DI 3 "const_int_operand")] UNSPEC_CALLEE_ABI) | ||
529 | (clobber (reg:DI LR_REGNUM))] | ||
530 | "" | ||
531 | "@ | ||
532 | - blr\\t%1 | ||
533 | + * return aarch64_indirect_call_asm (operands[1]); | ||
534 | bl\\t%c1" | ||
535 | [(set_attr "type" "call, call")] | ||
536 | ) | ||
537 | diff --git a/gcc/config/aarch64/constraints.md b/gcc/config/aarch64/constraints.md | ||
538 | index d993268..8cc6f50 100644 | ||
539 | --- a/gcc/config/aarch64/constraints.md | ||
540 | +++ b/gcc/config/aarch64/constraints.md | ||
541 | @@ -24,6 +24,15 @@ | ||
542 | (define_register_constraint "Ucs" "TAILCALL_ADDR_REGS" | ||
543 | "@internal Registers suitable for an indirect tail call") | ||
544 | |||
545 | +(define_register_constraint "Ucr" | ||
546 | + "aarch64_harden_sls_blr_p () ? STUB_REGS : GENERAL_REGS" | ||
547 | + "@internal Registers to be used for an indirect call. | ||
548 | + This is usually the general registers, but when we are hardening against | ||
549 | + Straight Line Speculation we disallow x16, x17, and x30 so we can use | ||
550 | + indirection stubs. These indirection stubs cannot use the above registers | ||
551 | + since they will be reached by a BL that may have to go through a linker | ||
552 | + veneer.") | ||
553 | + | ||
554 | (define_register_constraint "w" "FP_REGS" | ||
555 | "Floating point and SIMD vector registers.") | ||
556 | |||
557 | diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md | ||
558 | index 215fcec..1754b1e 100644 | ||
559 | --- a/gcc/config/aarch64/predicates.md | ||
560 | +++ b/gcc/config/aarch64/predicates.md | ||
561 | @@ -32,7 +32,8 @@ | ||
562 | |||
563 | (define_predicate "aarch64_general_reg" | ||
564 | (and (match_operand 0 "register_operand") | ||
565 | - (match_test "REGNO_REG_CLASS (REGNO (op)) == GENERAL_REGS"))) | ||
566 | + (match_test "REGNO_REG_CLASS (REGNO (op)) == STUB_REGS | ||
567 | + || REGNO_REG_CLASS (REGNO (op)) == GENERAL_REGS"))) | ||
568 | |||
569 | ;; Return true if OP a (const_int 0) operand. | ||
570 | (define_predicate "const0_operand" | ||
571 | diff --git a/gcc/testsuite/gcc.target/aarch64/sls-mitigation/sls-miti-blr-bti.c b/gcc/testsuite/gcc.target/aarch64/sls-mitigation/sls-miti-blr-bti.c | ||
572 | new file mode 100644 | ||
573 | index 0000000..b1fb754 | ||
574 | --- /dev/null | ||
575 | +++ b/gcc/testsuite/gcc.target/aarch64/sls-mitigation/sls-miti-blr-bti.c | ||
576 | @@ -0,0 +1,40 @@ | ||
577 | +/* { dg-do compile } */ | ||
578 | +/* { dg-additional-options "-mharden-sls=blr -mbranch-protection=bti" } */ | ||
579 | +/* | ||
580 | + Ensure that the SLS hardening of BLR leaves no BLR instructions. | ||
581 | + Here we also check that there are no BR instructions with anything except an | ||
582 | + x16 or x17 register. This is because a `BTI c` instruction can be branched | ||
583 | + to using a BLR instruction using any register, but can only be branched to | ||
584 | + with a BR using an x16 or x17 register. | ||
585 | + */ | ||
586 | +typedef int (foo) (int, int); | ||
587 | +typedef void (bar) (int, int); | ||
588 | +struct sls_testclass { | ||
589 | + foo *x; | ||
590 | + bar *y; | ||
591 | + int left; | ||
592 | + int right; | ||
593 | +}; | ||
594 | + | ||
595 | +/* We test both RTL patterns for a call which returns a value and a call which | ||
596 | + does not. */ | ||
597 | +int blr_call_value (struct sls_testclass x) | ||
598 | +{ | ||
599 | + int retval = x.x(x.left, x.right); | ||
600 | + if (retval % 10) | ||
601 | + return 100; | ||
602 | + return 9; | ||
603 | +} | ||
604 | + | ||
605 | +int blr_call (struct sls_testclass x) | ||
606 | +{ | ||
607 | + x.y(x.left, x.right); | ||
608 | + if (x.left % 10) | ||
609 | + return 100; | ||
610 | + return 9; | ||
611 | +} | ||
612 | + | ||
613 | +/* { dg-final { scan-assembler-not {\tblr\t} } } */ | ||
614 | +/* { dg-final { scan-assembler-not {\tbr\tx(?!16|17)} } } */ | ||
615 | +/* { dg-final { scan-assembler {\tbr\tx(16|17)} } } */ | ||
616 | + | ||
617 | diff --git a/gcc/testsuite/gcc.target/aarch64/sls-mitigation/sls-miti-blr.c b/gcc/testsuite/gcc.target/aarch64/sls-mitigation/sls-miti-blr.c | ||
618 | new file mode 100644 | ||
619 | index 0000000..88bafff | ||
620 | --- /dev/null | ||
621 | +++ b/gcc/testsuite/gcc.target/aarch64/sls-mitigation/sls-miti-blr.c | ||
622 | @@ -0,0 +1,33 @@ | ||
623 | +/* { dg-additional-options "-mharden-sls=blr -save-temps" } */ | ||
624 | +/* Ensure that the SLS hardening of BLR leaves no BLR instructions. | ||
625 | + We only test that all BLR instructions have been removed, not that the | ||
626 | + resulting code makes sense. */ | ||
627 | +typedef int (foo) (int, int); | ||
628 | +typedef void (bar) (int, int); | ||
629 | +struct sls_testclass { | ||
630 | + foo *x; | ||
631 | + bar *y; | ||
632 | + int left; | ||
633 | + int right; | ||
634 | +}; | ||
635 | + | ||
636 | +/* We test both RTL patterns for a call which returns a value and a call which | ||
637 | + does not. */ | ||
638 | +int blr_call_value (struct sls_testclass x) | ||
639 | +{ | ||
640 | + int retval = x.x(x.left, x.right); | ||
641 | + if (retval % 10) | ||
642 | + return 100; | ||
643 | + return 9; | ||
644 | +} | ||
645 | + | ||
646 | +int blr_call (struct sls_testclass x) | ||
647 | +{ | ||
648 | + x.y(x.left, x.right); | ||
649 | + if (x.left % 10) | ||
650 | + return 100; | ||
651 | + return 9; | ||
652 | +} | ||
653 | + | ||
654 | +/* { dg-final { scan-assembler-not {\tblr\t} } } */ | ||
655 | +/* { dg-final { scan-assembler {\tbr\tx[0-9][0-9]?} } } */ | ||
656 | -- | ||
657 | 2.7.4 | ||
658 | |||