diff options
| author | Ross Burton <ross.burton@arm.com> | 2020-07-22 15:10:09 +0100 |
|---|---|---|
| committer | Richard Purdie <richard.purdie@linuxfoundation.org> | 2020-07-22 23:47:57 +0100 |
| commit | 4ad6e1182e1b2d1b0047e250fc3596c66aa3f85e (patch) | |
| tree | 661f8aa27475bb811bafb962e547957fc4cd9048 | |
| parent | d128ded2d16f9163d225397a28132fabdd960fc2 (diff) | |
| download | poky-4ad6e1182e1b2d1b0047e250fc3596c66aa3f85e.tar.gz | |
gcc: mitigate the Straight-line Speculation attack
Straight-line Speculation is a SPECTRE-like attack on Armv8-A, further
details can be found in the white paper here:
https://developer.arm.com/support/arm-security-updates/speculative-processor-vulnerability/downloads/straight-line-speculation
Backport the GCC patches to mitigate the attack.
CVE: CVE-2020-13844
(From OE-Core rev: 3415e0ccdf75575014fb8c600edb707bbec0f566)
Signed-off-by: Ross Burton <ross.burton@arm.com>
Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
4 files changed, 1470 insertions, 0 deletions
diff --git a/meta/recipes-devtools/gcc/gcc-10.1.inc b/meta/recipes-devtools/gcc/gcc-10.1.inc index a3de91a2c6..7c1201a2e3 100644 --- a/meta/recipes-devtools/gcc/gcc-10.1.inc +++ b/meta/recipes-devtools/gcc/gcc-10.1.inc | |||
| @@ -66,6 +66,9 @@ SRC_URI = "\ | |||
| 66 | file://0036-Enable-CET-in-cross-compiler-if-possible.patch \ | 66 | file://0036-Enable-CET-in-cross-compiler-if-possible.patch \ |
| 67 | file://0037-mingw32-Enable-operation_not_supported.patch \ | 67 | file://0037-mingw32-Enable-operation_not_supported.patch \ |
| 68 | file://0038-libatomic-Do-not-enforce-march-on-aarch64.patch \ | 68 | file://0038-libatomic-Do-not-enforce-march-on-aarch64.patch \ |
| 69 | file://0001-aarch64-New-Straight-Line-Speculation-SLS-mitigation.patch \ | ||
| 70 | file://0002-aarch64-Introduce-SLS-mitigation-for-RET-and-BR-inst.patch \ | ||
| 71 | file://0003-aarch64-Mitigate-SLS-for-BLR-instruction.patch \ | ||
| 69 | " | 72 | " |
| 70 | SRC_URI[sha256sum] = "b6898a23844b656f1b68691c5c012036c2e694ac4b53a8918d4712ad876e7ea2" | 73 | SRC_URI[sha256sum] = "b6898a23844b656f1b68691c5c012036c2e694ac4b53a8918d4712ad876e7ea2" |
| 71 | 74 | ||
diff --git a/meta/recipes-devtools/gcc/gcc-10.1/0001-aarch64-New-Straight-Line-Speculation-SLS-mitigation.patch b/meta/recipes-devtools/gcc/gcc-10.1/0001-aarch64-New-Straight-Line-Speculation-SLS-mitigation.patch new file mode 100644 index 0000000000..73de4c7590 --- /dev/null +++ b/meta/recipes-devtools/gcc/gcc-10.1/0001-aarch64-New-Straight-Line-Speculation-SLS-mitigation.patch | |||
| @@ -0,0 +1,202 @@ | |||
| 1 | CVE: CVE-2020-13844 | ||
| 2 | Upstream-Status: Backport | ||
| 3 | Signed-off-by: Ross Burton <ross.burton@arm.com> | ||
| 4 | |||
| 5 | From 1ff243934ac443b5f58cd02a5012ce58ecc31fb2 Mon Sep 17 00:00:00 2001 | ||
| 6 | From: Matthew Malcomson <matthew.malcomson@arm.com> | ||
| 7 | Date: Thu, 9 Jul 2020 09:11:58 +0100 | ||
| 8 | Subject: [PATCH 1/3] aarch64: New Straight Line Speculation (SLS) mitigation | ||
| 9 | flags | ||
| 10 | |||
| 11 | Here we introduce the flags that will be used for straight line speculation. | ||
| 12 | |||
| 13 | The new flag introduced is `-mharden-sls=`. | ||
| 14 | This flag can take arguments of `none`, `all`, or a comma seperated list of one | ||
| 15 | or more of `retbr` or `blr`. | ||
| 16 | `none` indicates no special mitigation of the straight line speculation | ||
| 17 | vulnerability. | ||
| 18 | `all` requests all mitigations currently implemented. | ||
| 19 | `retbr` requests that the RET and BR instructions have a speculation barrier | ||
| 20 | inserted after them. | ||
| 21 | `blr` requests that BLR instructions are replaced by a BL to a function stub | ||
| 22 | using a BR with a speculation barrier after it. | ||
| 23 | |||
| 24 | Setting this on a per-function basis using attributes or the like is not | ||
| 25 | enabled, but may be in the future. | ||
| 26 | |||
| 27 | gcc/ChangeLog: | ||
| 28 | |||
| 29 | 2020-06-02 Matthew Malcomson <matthew.malcomson@arm.com> | ||
| 30 | |||
| 31 | * config/aarch64/aarch64-protos.h (aarch64_harden_sls_retbr_p): | ||
| 32 | New. | ||
| 33 | (aarch64_harden_sls_blr_p): New. | ||
| 34 | * config/aarch64/aarch64.c (enum aarch64_sls_hardening_type): | ||
| 35 | New. | ||
| 36 | (aarch64_harden_sls_retbr_p): New. | ||
| 37 | (aarch64_harden_sls_blr_p): New. | ||
| 38 | (aarch64_validate_sls_mitigation): New. | ||
| 39 | (aarch64_override_options): Parse options for SLS mitigation. | ||
| 40 | * config/aarch64/aarch64.opt (-mharden-sls): New option. | ||
| 41 | * doc/invoke.texi: Document new option. | ||
| 42 | --- | ||
| 43 | gcc/config/aarch64/aarch64-protos.h | 3 ++ | ||
| 44 | gcc/config/aarch64/aarch64.c | 76 +++++++++++++++++++++++++++++++++++++ | ||
| 45 | gcc/config/aarch64/aarch64.opt | 4 ++ | ||
| 46 | gcc/doc/invoke.texi | 12 ++++++ | ||
| 47 | 4 files changed, 95 insertions(+) | ||
| 48 | |||
| 49 | diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h | ||
| 50 | index 723d9ba..eb5f4b4 100644 | ||
| 51 | --- a/gcc/config/aarch64/aarch64-protos.h | ||
| 52 | +++ b/gcc/config/aarch64/aarch64-protos.h | ||
| 53 | @@ -781,4 +781,7 @@ extern const atomic_ool_names aarch64_ool_ldeor_names; | ||
| 54 | |||
| 55 | tree aarch64_resolve_overloaded_builtin_general (location_t, tree, void *); | ||
| 56 | |||
| 57 | +extern bool aarch64_harden_sls_retbr_p (void); | ||
| 58 | +extern bool aarch64_harden_sls_blr_p (void); | ||
| 59 | + | ||
| 60 | #endif /* GCC_AARCH64_PROTOS_H */ | ||
| 61 | diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c | ||
| 62 | index b86434a..437a9cf 100644 | ||
| 63 | --- a/gcc/config/aarch64/aarch64.c | ||
| 64 | +++ b/gcc/config/aarch64/aarch64.c | ||
| 65 | @@ -14494,6 +14494,79 @@ aarch64_validate_mcpu (const char *str, const struct processor **res, | ||
| 66 | return false; | ||
| 67 | } | ||
| 68 | |||
| 69 | +/* Straight line speculation indicators. */ | ||
| 70 | +enum aarch64_sls_hardening_type | ||
| 71 | +{ | ||
| 72 | + SLS_NONE = 0, | ||
| 73 | + SLS_RETBR = 1, | ||
| 74 | + SLS_BLR = 2, | ||
| 75 | + SLS_ALL = 3, | ||
| 76 | +}; | ||
| 77 | +static enum aarch64_sls_hardening_type aarch64_sls_hardening; | ||
| 78 | + | ||
| 79 | +/* Return whether we should mitigatate Straight Line Speculation for the RET | ||
| 80 | + and BR instructions. */ | ||
| 81 | +bool | ||
| 82 | +aarch64_harden_sls_retbr_p (void) | ||
| 83 | +{ | ||
| 84 | + return aarch64_sls_hardening & SLS_RETBR; | ||
| 85 | +} | ||
| 86 | + | ||
| 87 | +/* Return whether we should mitigatate Straight Line Speculation for the BLR | ||
| 88 | + instruction. */ | ||
| 89 | +bool | ||
| 90 | +aarch64_harden_sls_blr_p (void) | ||
| 91 | +{ | ||
| 92 | + return aarch64_sls_hardening & SLS_BLR; | ||
| 93 | +} | ||
| 94 | + | ||
| 95 | +/* As of yet we only allow setting these options globally, in the future we may | ||
| 96 | + allow setting them per function. */ | ||
| 97 | +static void | ||
| 98 | +aarch64_validate_sls_mitigation (const char *const_str) | ||
| 99 | +{ | ||
| 100 | + char *token_save = NULL; | ||
| 101 | + char *str = NULL; | ||
| 102 | + | ||
| 103 | + if (strcmp (const_str, "none") == 0) | ||
| 104 | + { | ||
| 105 | + aarch64_sls_hardening = SLS_NONE; | ||
| 106 | + return; | ||
| 107 | + } | ||
| 108 | + if (strcmp (const_str, "all") == 0) | ||
| 109 | + { | ||
| 110 | + aarch64_sls_hardening = SLS_ALL; | ||
| 111 | + return; | ||
| 112 | + } | ||
| 113 | + | ||
| 114 | + char *str_root = xstrdup (const_str); | ||
| 115 | + str = strtok_r (str_root, ",", &token_save); | ||
| 116 | + if (!str) | ||
| 117 | + error ("invalid argument given to %<-mharden-sls=%>"); | ||
| 118 | + | ||
| 119 | + int temp = SLS_NONE; | ||
| 120 | + while (str) | ||
| 121 | + { | ||
| 122 | + if (strcmp (str, "blr") == 0) | ||
| 123 | + temp |= SLS_BLR; | ||
| 124 | + else if (strcmp (str, "retbr") == 0) | ||
| 125 | + temp |= SLS_RETBR; | ||
| 126 | + else if (strcmp (str, "none") == 0 || strcmp (str, "all") == 0) | ||
| 127 | + { | ||
| 128 | + error ("%<%s%> must be by itself for %<-mharden-sls=%>", str); | ||
| 129 | + break; | ||
| 130 | + } | ||
| 131 | + else | ||
| 132 | + { | ||
| 133 | + error ("invalid argument %<%s%> for %<-mharden-sls=%>", str); | ||
| 134 | + break; | ||
| 135 | + } | ||
| 136 | + str = strtok_r (NULL, ",", &token_save); | ||
| 137 | + } | ||
| 138 | + aarch64_sls_hardening = (aarch64_sls_hardening_type) temp; | ||
| 139 | + free (str_root); | ||
| 140 | +} | ||
| 141 | + | ||
| 142 | /* Parses CONST_STR for branch protection features specified in | ||
| 143 | aarch64_branch_protect_types, and set any global variables required. Returns | ||
| 144 | the parsing result and assigns LAST_STR to the last processed token from | ||
| 145 | @@ -14738,6 +14811,9 @@ aarch64_override_options (void) | ||
| 146 | selected_arch = NULL; | ||
| 147 | selected_tune = NULL; | ||
| 148 | |||
| 149 | + if (aarch64_harden_sls_string) | ||
| 150 | + aarch64_validate_sls_mitigation (aarch64_harden_sls_string); | ||
| 151 | + | ||
| 152 | if (aarch64_branch_protection_string) | ||
| 153 | aarch64_validate_mbranch_protection (aarch64_branch_protection_string); | ||
| 154 | |||
| 155 | diff --git a/gcc/config/aarch64/aarch64.opt b/gcc/config/aarch64/aarch64.opt | ||
| 156 | index d99d14c..5170361 100644 | ||
| 157 | --- a/gcc/config/aarch64/aarch64.opt | ||
| 158 | +++ b/gcc/config/aarch64/aarch64.opt | ||
| 159 | @@ -71,6 +71,10 @@ mgeneral-regs-only | ||
| 160 | Target Report RejectNegative Mask(GENERAL_REGS_ONLY) Save | ||
| 161 | Generate code which uses only the general registers. | ||
| 162 | |||
| 163 | +mharden-sls= | ||
| 164 | +Target RejectNegative Joined Var(aarch64_harden_sls_string) | ||
| 165 | +Generate code to mitigate against straight line speculation. | ||
| 166 | + | ||
| 167 | mfix-cortex-a53-835769 | ||
| 168 | Target Report Var(aarch64_fix_a53_err835769) Init(2) Save | ||
| 169 | Workaround for ARM Cortex-A53 Erratum number 835769. | ||
| 170 | diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi | ||
| 171 | index a2794a6..bd5b77a 100644 | ||
| 172 | --- a/gcc/doc/invoke.texi | ||
| 173 | +++ b/gcc/doc/invoke.texi | ||
| 174 | @@ -696,6 +696,7 @@ Objective-C and Objective-C++ Dialects}. | ||
| 175 | -msign-return-address=@var{scope} @gol | ||
| 176 | -mbranch-protection=@var{none}|@var{standard}|@var{pac-ret}[+@var{leaf} | ||
| 177 | +@var{b-key}]|@var{bti} @gol | ||
| 178 | +-mharden-sls=@var{opts} @gol | ||
| 179 | -march=@var{name} -mcpu=@var{name} -mtune=@var{name} @gol | ||
| 180 | -moverride=@var{string} -mverbose-cost-dump @gol | ||
| 181 | -mstack-protector-guard=@var{guard} -mstack-protector-guard-reg=@var{sysreg} @gol | ||
| 182 | @@ -17065,6 +17066,17 @@ functions. The optional argument @samp{b-key} can be used to sign the functions | ||
| 183 | with the B-key instead of the A-key. | ||
| 184 | @samp{bti} turns on branch target identification mechanism. | ||
| 185 | |||
| 186 | +@item -mharden-sls=@var{opts} | ||
| 187 | +@opindex mharden-sls | ||
| 188 | +Enable compiler hardening against straight line speculation (SLS). | ||
| 189 | +@var{opts} is a comma-separated list of the following options: | ||
| 190 | +@table @samp | ||
| 191 | +@item retbr | ||
| 192 | +@item blr | ||
| 193 | +@end table | ||
| 194 | +In addition, @samp{-mharden-sls=all} enables all SLS hardening while | ||
| 195 | +@samp{-mharden-sls=none} disables all SLS hardening. | ||
| 196 | + | ||
| 197 | @item -msve-vector-bits=@var{bits} | ||
| 198 | @opindex msve-vector-bits | ||
| 199 | Specify the number of bits in an SVE vector register. This option only has | ||
| 200 | -- | ||
| 201 | 2.7.4 | ||
| 202 | |||
diff --git a/meta/recipes-devtools/gcc/gcc-10.1/0002-aarch64-Introduce-SLS-mitigation-for-RET-and-BR-inst.patch b/meta/recipes-devtools/gcc/gcc-10.1/0002-aarch64-Introduce-SLS-mitigation-for-RET-and-BR-inst.patch new file mode 100644 index 0000000000..823cc8b668 --- /dev/null +++ b/meta/recipes-devtools/gcc/gcc-10.1/0002-aarch64-Introduce-SLS-mitigation-for-RET-and-BR-inst.patch | |||
| @@ -0,0 +1,607 @@ | |||
| 1 | Upstream-Status: Backport | ||
| 2 | Signed-off-by: Ross Burton <ross.burton@arm.com> | ||
| 3 | |||
| 4 | From b1204d16e1ec96a4aa89e44de8990e2499ffdb22 Mon Sep 17 00:00:00 2001 | ||
| 5 | From: Matthew Malcomson <matthew.malcomson@arm.com> | ||
| 6 | Date: Thu, 9 Jul 2020 09:11:59 +0100 | ||
| 7 | Subject: [PATCH 2/3] aarch64: Introduce SLS mitigation for RET and BR | ||
| 8 | instructions | ||
| 9 | |||
| 10 | Instructions following RET or BR are not necessarily executed. In order | ||
| 11 | to avoid speculation past RET and BR we can simply append a speculation | ||
| 12 | barrier. | ||
| 13 | |||
| 14 | Since these speculation barriers will not be architecturally executed, | ||
| 15 | they are not expected to add a high performance penalty. | ||
| 16 | |||
| 17 | The speculation barrier is to be SB when targeting architectures which | ||
| 18 | have this enabled, and DSB SY + ISB otherwise. | ||
| 19 | |||
| 20 | We add tests for each of the cases where such an instruction was seen. | ||
| 21 | |||
| 22 | This is implemented by modifying each machine description pattern that | ||
| 23 | emits either a RET or a BR instruction. We choose not to use something | ||
| 24 | like `TARGET_ASM_FUNCTION_EPILOGUE` since it does not affect the | ||
| 25 | `indirect_jump`, `jump`, `sibcall_insn` and `sibcall_value_insn` | ||
| 26 | patterns and we find it preferable to implement the functionality in the | ||
| 27 | same way for every pattern. | ||
| 28 | |||
| 29 | There is one particular case which is slightly tricky. The | ||
| 30 | implementation of TARGET_ASM_TRAMPOLINE_TEMPLATE uses a BR which needs | ||
| 31 | to be mitigated against. The trampoline template is used *once* per | ||
| 32 | compilation unit, and the TRAMPOLINE_SIZE is exposed to the user via the | ||
| 33 | builtin macro __LIBGCC_TRAMPOLINE_SIZE__. | ||
| 34 | In the future we may implement function specific attributes to turn on | ||
| 35 | and off hardening on a per-function basis. | ||
| 36 | The fixed nature of the trampoline described above implies it will be | ||
| 37 | safer to ensure this speculation barrier is always used. | ||
| 38 | |||
| 39 | Testing: | ||
| 40 | Bootstrap and regtest done on aarch64-none-linux | ||
| 41 | Used a temporary hack(1) to use these options on every test in the | ||
| 42 | testsuite and a script to check that the output never emitted an | ||
| 43 | unmitigated RET or BR. | ||
| 44 | |||
| 45 | 1) Temporary hack was a change to the testsuite to always use | ||
| 46 | `-save-temps` and run a script on the assembly output of those | ||
| 47 | compilations which produced one to ensure every RET or BR is immediately | ||
| 48 | followed by a speculation barrier. | ||
| 49 | |||
| 50 | gcc/ChangeLog: | ||
| 51 | |||
| 52 | * config/aarch64/aarch64-protos.h (aarch64_sls_barrier): New. | ||
| 53 | * config/aarch64/aarch64.c (aarch64_output_casesi): Emit | ||
| 54 | speculation barrier after BR instruction if needs be. | ||
| 55 | (aarch64_trampoline_init): Handle ptr_mode value & adjust size | ||
| 56 | of code copied. | ||
| 57 | (aarch64_sls_barrier): New. | ||
| 58 | (aarch64_asm_trampoline_template): Add needed barriers. | ||
| 59 | * config/aarch64/aarch64.h (AARCH64_ISA_SB): New. | ||
| 60 | (TARGET_SB): New. | ||
| 61 | (TRAMPOLINE_SIZE): Account for barrier. | ||
| 62 | * config/aarch64/aarch64.md (indirect_jump, *casesi_dispatch, | ||
| 63 | simple_return, *do_return, *sibcall_insn, *sibcall_value_insn): | ||
| 64 | Emit barrier if needs be, also account for possible barrier using | ||
| 65 | "sls_length" attribute. | ||
| 66 | (sls_length): New attribute. | ||
| 67 | (length): Determine default using any non-default sls_length | ||
| 68 | value. | ||
| 69 | |||
| 70 | gcc/testsuite/ChangeLog: | ||
| 71 | |||
| 72 | * gcc.target/aarch64/sls-mitigation/sls-miti-retbr.c: New test. | ||
| 73 | * gcc.target/aarch64/sls-mitigation/sls-miti-retbr-pacret.c: | ||
| 74 | New test. | ||
| 75 | * gcc.target/aarch64/sls-mitigation/sls-mitigation.exp: New file. | ||
| 76 | * lib/target-supports.exp (check_effective_target_aarch64_asm_sb_ok): | ||
| 77 | New proc. | ||
| 78 | --- | ||
| 79 | gcc/config/aarch64/aarch64-protos.h | 1 + | ||
| 80 | gcc/config/aarch64/aarch64.c | 41 ++++++- | ||
| 81 | gcc/config/aarch64/aarch64.h | 10 +- | ||
| 82 | gcc/config/aarch64/aarch64.md | 76 +++++++++---- | ||
| 83 | .../aarch64/sls-mitigation/sls-miti-retbr-pacret.c | 21 ++++ | ||
| 84 | .../aarch64/sls-mitigation/sls-miti-retbr.c | 119 +++++++++++++++++++++ | ||
| 85 | .../aarch64/sls-mitigation/sls-mitigation.exp | 73 +++++++++++++ | ||
| 86 | gcc/testsuite/lib/target-supports.exp | 2 +- | ||
| 87 | 8 files changed, 318 insertions(+), 25 deletions(-) | ||
| 88 | create mode 100644 gcc/testsuite/gcc.target/aarch64/sls-mitigation/sls-miti-retbr-pacret.c | ||
| 89 | create mode 100644 gcc/testsuite/gcc.target/aarch64/sls-mitigation/sls-miti-retbr.c | ||
| 90 | create mode 100644 gcc/testsuite/gcc.target/aarch64/sls-mitigation/sls-mitigation.exp | ||
| 91 | |||
| 92 | diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h | ||
| 93 | index eb5f4b4..ee0ffde 100644 | ||
| 94 | --- a/gcc/config/aarch64/aarch64-protos.h | ||
| 95 | +++ b/gcc/config/aarch64/aarch64-protos.h | ||
| 96 | @@ -781,6 +781,7 @@ extern const atomic_ool_names aarch64_ool_ldeor_names; | ||
| 97 | |||
| 98 | tree aarch64_resolve_overloaded_builtin_general (location_t, tree, void *); | ||
| 99 | |||
| 100 | +const char *aarch64_sls_barrier (int); | ||
| 101 | extern bool aarch64_harden_sls_retbr_p (void); | ||
| 102 | extern bool aarch64_harden_sls_blr_p (void); | ||
| 103 | |||
| 104 | diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c | ||
| 105 | index 437a9cf..44e3d1f 100644 | ||
| 106 | --- a/gcc/config/aarch64/aarch64.c | ||
| 107 | +++ b/gcc/config/aarch64/aarch64.c | ||
| 108 | @@ -10852,8 +10852,8 @@ aarch64_return_addr (int count, rtx frame ATTRIBUTE_UNUSED) | ||
| 109 | static void | ||
| 110 | aarch64_asm_trampoline_template (FILE *f) | ||
| 111 | { | ||
| 112 | - int offset1 = 16; | ||
| 113 | - int offset2 = 20; | ||
| 114 | + int offset1 = 24; | ||
| 115 | + int offset2 = 28; | ||
| 116 | |||
| 117 | if (aarch64_bti_enabled ()) | ||
| 118 | { | ||
| 119 | @@ -10876,6 +10876,17 @@ aarch64_asm_trampoline_template (FILE *f) | ||
| 120 | } | ||
| 121 | asm_fprintf (f, "\tbr\t%s\n", reg_names [IP1_REGNUM]); | ||
| 122 | |||
| 123 | + /* We always emit a speculation barrier. | ||
| 124 | + This is because the same trampoline template is used for every nested | ||
| 125 | + function. Since nested functions are not particularly common or | ||
| 126 | + performant we don't worry too much about the extra instructions to copy | ||
| 127 | + around. | ||
| 128 | + This is not yet a problem, since we have not yet implemented function | ||
| 129 | + specific attributes to choose between hardening against straight line | ||
| 130 | + speculation or not, but such function specific attributes are likely to | ||
| 131 | + happen in the future. */ | ||
| 132 | + asm_fprintf (f, "\tdsb\tsy\n\tisb\n"); | ||
| 133 | + | ||
| 134 | /* The trampoline needs an extra padding instruction. In case if BTI is | ||
| 135 | enabled the padding instruction is replaced by the BTI instruction at | ||
| 136 | the beginning. */ | ||
| 137 | @@ -10890,10 +10901,14 @@ static void | ||
| 138 | aarch64_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value) | ||
| 139 | { | ||
| 140 | rtx fnaddr, mem, a_tramp; | ||
| 141 | - const int tramp_code_sz = 16; | ||
| 142 | + const int tramp_code_sz = 24; | ||
| 143 | |||
| 144 | /* Don't need to copy the trailing D-words, we fill those in below. */ | ||
| 145 | - emit_block_move (m_tramp, assemble_trampoline_template (), | ||
| 146 | + /* We create our own memory address in Pmode so that `emit_block_move` can | ||
| 147 | + use parts of the backend which expect Pmode addresses. */ | ||
| 148 | + rtx temp = convert_memory_address (Pmode, XEXP (m_tramp, 0)); | ||
| 149 | + emit_block_move (gen_rtx_MEM (BLKmode, temp), | ||
| 150 | + assemble_trampoline_template (), | ||
| 151 | GEN_INT (tramp_code_sz), BLOCK_OP_NORMAL); | ||
| 152 | mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz); | ||
| 153 | fnaddr = XEXP (DECL_RTL (fndecl), 0); | ||
| 154 | @@ -11084,6 +11099,8 @@ aarch64_output_casesi (rtx *operands) | ||
| 155 | output_asm_insn (buf, operands); | ||
| 156 | output_asm_insn (patterns[index][1], operands); | ||
| 157 | output_asm_insn ("br\t%3", operands); | ||
| 158 | + output_asm_insn (aarch64_sls_barrier (aarch64_harden_sls_retbr_p ()), | ||
| 159 | + operands); | ||
| 160 | assemble_label (asm_out_file, label); | ||
| 161 | return ""; | ||
| 162 | } | ||
| 163 | @@ -22924,6 +22941,22 @@ aarch64_file_end_indicate_exec_stack () | ||
| 164 | #undef GNU_PROPERTY_AARCH64_FEATURE_1_BTI | ||
| 165 | #undef GNU_PROPERTY_AARCH64_FEATURE_1_AND | ||
| 166 | |||
| 167 | +/* Helper function for straight line speculation. | ||
| 168 | + Return what barrier should be emitted for straight line speculation | ||
| 169 | + mitigation. | ||
| 170 | + When not mitigating against straight line speculation this function returns | ||
| 171 | + an empty string. | ||
| 172 | + When mitigating against straight line speculation, use: | ||
| 173 | + * SB when the v8.5-A SB extension is enabled. | ||
| 174 | + * DSB+ISB otherwise. */ | ||
| 175 | +const char * | ||
| 176 | +aarch64_sls_barrier (int mitigation_required) | ||
| 177 | +{ | ||
| 178 | + return mitigation_required | ||
| 179 | + ? (TARGET_SB ? "sb" : "dsb\tsy\n\tisb") | ||
| 180 | + : ""; | ||
| 181 | +} | ||
| 182 | + | ||
| 183 | /* Target-specific selftests. */ | ||
| 184 | |||
| 185 | #if CHECKING_P | ||
| 186 | diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h | ||
| 187 | index 1ce23c6..c21015f 100644 | ||
| 188 | --- a/gcc/config/aarch64/aarch64.h | ||
| 189 | +++ b/gcc/config/aarch64/aarch64.h | ||
| 190 | @@ -281,6 +281,7 @@ extern unsigned aarch64_architecture_version; | ||
| 191 | #define AARCH64_ISA_F32MM (aarch64_isa_flags & AARCH64_FL_F32MM) | ||
| 192 | #define AARCH64_ISA_F64MM (aarch64_isa_flags & AARCH64_FL_F64MM) | ||
| 193 | #define AARCH64_ISA_BF16 (aarch64_isa_flags & AARCH64_FL_BF16) | ||
| 194 | +#define AARCH64_ISA_SB (aarch64_isa_flags & AARCH64_FL_SB) | ||
| 195 | |||
| 196 | /* Crypto is an optional extension to AdvSIMD. */ | ||
| 197 | #define TARGET_CRYPTO (TARGET_SIMD && AARCH64_ISA_CRYPTO) | ||
| 198 | @@ -378,6 +379,9 @@ extern unsigned aarch64_architecture_version; | ||
| 199 | #define TARGET_FIX_ERR_A53_835769_DEFAULT 1 | ||
| 200 | #endif | ||
| 201 | |||
| 202 | +/* SB instruction is enabled through +sb. */ | ||
| 203 | +#define TARGET_SB (AARCH64_ISA_SB) | ||
| 204 | + | ||
| 205 | /* Apply the workaround for Cortex-A53 erratum 835769. */ | ||
| 206 | #define TARGET_FIX_ERR_A53_835769 \ | ||
| 207 | ((aarch64_fix_a53_err835769 == 2) \ | ||
| 208 | @@ -1058,8 +1062,10 @@ typedef struct | ||
| 209 | |||
| 210 | #define RETURN_ADDR_RTX aarch64_return_addr | ||
| 211 | |||
| 212 | -/* BTI c + 3 insns + 2 pointer-sized entries. */ | ||
| 213 | -#define TRAMPOLINE_SIZE (TARGET_ILP32 ? 24 : 32) | ||
| 214 | +/* BTI c + 3 insns | ||
| 215 | + + sls barrier of DSB + ISB. | ||
| 216 | + + 2 pointer-sized entries. */ | ||
| 217 | +#define TRAMPOLINE_SIZE (24 + (TARGET_ILP32 ? 8 : 16)) | ||
| 218 | |||
| 219 | /* Trampolines contain dwords, so must be dword aligned. */ | ||
| 220 | #define TRAMPOLINE_ALIGNMENT 64 | ||
| 221 | diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md | ||
| 222 | index 8c8be3c..dda04ee 100644 | ||
| 223 | --- a/gcc/config/aarch64/aarch64.md | ||
| 224 | +++ b/gcc/config/aarch64/aarch64.md | ||
| 225 | @@ -407,10 +407,25 @@ | ||
| 226 | ;; Attribute that specifies whether the alternative uses MOVPRFX. | ||
| 227 | (define_attr "movprfx" "no,yes" (const_string "no")) | ||
| 228 | |||
| 229 | +;; Attribute to specify that an alternative has the length of a single | ||
| 230 | +;; instruction plus a speculation barrier. | ||
| 231 | +(define_attr "sls_length" "none,retbr,casesi" (const_string "none")) | ||
| 232 | + | ||
| 233 | (define_attr "length" "" | ||
| 234 | (cond [(eq_attr "movprfx" "yes") | ||
| 235 | (const_int 8) | ||
| 236 | - ] (const_int 4))) | ||
| 237 | + | ||
| 238 | + (eq_attr "sls_length" "retbr") | ||
| 239 | + (cond [(match_test "!aarch64_harden_sls_retbr_p ()") (const_int 4) | ||
| 240 | + (match_test "TARGET_SB") (const_int 8)] | ||
| 241 | + (const_int 12)) | ||
| 242 | + | ||
| 243 | + (eq_attr "sls_length" "casesi") | ||
| 244 | + (cond [(match_test "!aarch64_harden_sls_retbr_p ()") (const_int 16) | ||
| 245 | + (match_test "TARGET_SB") (const_int 20)] | ||
| 246 | + (const_int 24)) | ||
| 247 | + ] | ||
| 248 | + (const_int 4))) | ||
| 249 | |||
| 250 | ;; Strictly for compatibility with AArch32 in pipeline models, since AArch64 has | ||
| 251 | ;; no predicated insns. | ||
| 252 | @@ -447,8 +462,12 @@ | ||
| 253 | (define_insn "indirect_jump" | ||
| 254 | [(set (pc) (match_operand:DI 0 "register_operand" "r"))] | ||
| 255 | "" | ||
| 256 | - "br\\t%0" | ||
| 257 | - [(set_attr "type" "branch")] | ||
| 258 | + { | ||
| 259 | + output_asm_insn ("br\\t%0", operands); | ||
| 260 | + return aarch64_sls_barrier (aarch64_harden_sls_retbr_p ()); | ||
| 261 | + } | ||
| 262 | + [(set_attr "type" "branch") | ||
| 263 | + (set_attr "sls_length" "retbr")] | ||
| 264 | ) | ||
| 265 | |||
| 266 | (define_insn "jump" | ||
| 267 | @@ -765,7 +784,7 @@ | ||
| 268 | "* | ||
| 269 | return aarch64_output_casesi (operands); | ||
| 270 | " | ||
| 271 | - [(set_attr "length" "16") | ||
| 272 | + [(set_attr "sls_length" "casesi") | ||
| 273 | (set_attr "type" "branch")] | ||
| 274 | ) | ||
| 275 | |||
| 276 | @@ -844,18 +863,23 @@ | ||
| 277 | [(return)] | ||
| 278 | "" | ||
| 279 | { | ||
| 280 | + const char *ret = NULL; | ||
| 281 | if (aarch64_return_address_signing_enabled () | ||
| 282 | && TARGET_ARMV8_3 | ||
| 283 | && !crtl->calls_eh_return) | ||
| 284 | { | ||
| 285 | if (aarch64_ra_sign_key == AARCH64_KEY_B) | ||
| 286 | - return "retab"; | ||
| 287 | + ret = "retab"; | ||
| 288 | else | ||
| 289 | - return "retaa"; | ||
| 290 | + ret = "retaa"; | ||
| 291 | } | ||
| 292 | - return "ret"; | ||
| 293 | + else | ||
| 294 | + ret = "ret"; | ||
| 295 | + output_asm_insn (ret, operands); | ||
| 296 | + return aarch64_sls_barrier (aarch64_harden_sls_retbr_p ()); | ||
| 297 | } | ||
| 298 | - [(set_attr "type" "branch")] | ||
| 299 | + [(set_attr "type" "branch") | ||
| 300 | + (set_attr "sls_length" "retbr")] | ||
| 301 | ) | ||
| 302 | |||
| 303 | (define_expand "return" | ||
| 304 | @@ -867,8 +891,12 @@ | ||
| 305 | (define_insn "simple_return" | ||
| 306 | [(simple_return)] | ||
| 307 | "" | ||
| 308 | - "ret" | ||
| 309 | - [(set_attr "type" "branch")] | ||
| 310 | + { | ||
| 311 | + output_asm_insn ("ret", operands); | ||
| 312 | + return aarch64_sls_barrier (aarch64_harden_sls_retbr_p ()); | ||
| 313 | + } | ||
| 314 | + [(set_attr "type" "branch") | ||
| 315 | + (set_attr "sls_length" "retbr")] | ||
| 316 | ) | ||
| 317 | |||
| 318 | (define_insn "*cb<optab><mode>1" | ||
| 319 | @@ -1066,10 +1094,16 @@ | ||
| 320 | (unspec:DI [(match_operand:DI 2 "const_int_operand")] UNSPEC_CALLEE_ABI) | ||
| 321 | (return)] | ||
| 322 | "SIBLING_CALL_P (insn)" | ||
| 323 | - "@ | ||
| 324 | - br\\t%0 | ||
| 325 | - b\\t%c0" | ||
| 326 | - [(set_attr "type" "branch, branch")] | ||
| 327 | + { | ||
| 328 | + if (which_alternative == 0) | ||
| 329 | + { | ||
| 330 | + output_asm_insn ("br\\t%0", operands); | ||
| 331 | + return aarch64_sls_barrier (aarch64_harden_sls_retbr_p ()); | ||
| 332 | + } | ||
| 333 | + return "b\\t%c0"; | ||
| 334 | + } | ||
| 335 | + [(set_attr "type" "branch, branch") | ||
| 336 | + (set_attr "sls_length" "retbr,none")] | ||
| 337 | ) | ||
| 338 | |||
| 339 | (define_insn "*sibcall_value_insn" | ||
| 340 | @@ -1080,10 +1114,16 @@ | ||
| 341 | (unspec:DI [(match_operand:DI 3 "const_int_operand")] UNSPEC_CALLEE_ABI) | ||
| 342 | (return)] | ||
| 343 | "SIBLING_CALL_P (insn)" | ||
| 344 | - "@ | ||
| 345 | - br\\t%1 | ||
| 346 | - b\\t%c1" | ||
| 347 | - [(set_attr "type" "branch, branch")] | ||
| 348 | + { | ||
| 349 | + if (which_alternative == 0) | ||
| 350 | + { | ||
| 351 | + output_asm_insn ("br\\t%1", operands); | ||
| 352 | + return aarch64_sls_barrier (aarch64_harden_sls_retbr_p ()); | ||
| 353 | + } | ||
| 354 | + return "b\\t%c1"; | ||
| 355 | + } | ||
| 356 | + [(set_attr "type" "branch, branch") | ||
| 357 | + (set_attr "sls_length" "retbr,none")] | ||
| 358 | ) | ||
| 359 | |||
| 360 | ;; Call subroutine returning any type. | ||
| 361 | diff --git a/gcc/testsuite/gcc.target/aarch64/sls-mitigation/sls-miti-retbr-pacret.c b/gcc/testsuite/gcc.target/aarch64/sls-mitigation/sls-miti-retbr-pacret.c | ||
| 362 | new file mode 100644 | ||
| 363 | index 0000000..fa1887a | ||
| 364 | --- /dev/null | ||
| 365 | +++ b/gcc/testsuite/gcc.target/aarch64/sls-mitigation/sls-miti-retbr-pacret.c | ||
| 366 | @@ -0,0 +1,21 @@ | ||
| 367 | +/* Avoid ILP32 since pacret is only available for LP64 */ | ||
| 368 | +/* { dg-do compile { target { ! ilp32 } } } */ | ||
| 369 | +/* { dg-additional-options "-mharden-sls=retbr -mbranch-protection=pac-ret -march=armv8.3-a" } */ | ||
| 370 | + | ||
| 371 | +/* Testing the do_return pattern for retaa and retab. */ | ||
| 372 | +long retbr_subcall(void); | ||
| 373 | +long retbr_do_return_retaa(void) | ||
| 374 | +{ | ||
| 375 | + return retbr_subcall()+1; | ||
| 376 | +} | ||
| 377 | + | ||
| 378 | +__attribute__((target("branch-protection=pac-ret+b-key"))) | ||
| 379 | +long retbr_do_return_retab(void) | ||
| 380 | +{ | ||
| 381 | + return retbr_subcall()+1; | ||
| 382 | +} | ||
| 383 | + | ||
| 384 | +/* Ensure there are no BR or RET instructions which are not directly followed | ||
| 385 | + by a speculation barrier. */ | ||
| 386 | +/* { dg-final { scan-assembler-not {\t(br|ret|retaa|retab)\tx[0-9][0-9]?\n\t(?!dsb\tsy\n\tisb)} } } */ | ||
| 387 | +/* { dg-final { scan-assembler-not {ret\t} } } */ | ||
| 388 | diff --git a/gcc/testsuite/gcc.target/aarch64/sls-mitigation/sls-miti-retbr.c b/gcc/testsuite/gcc.target/aarch64/sls-mitigation/sls-miti-retbr.c | ||
| 389 | new file mode 100644 | ||
| 390 | index 0000000..76b8d03 | ||
| 391 | --- /dev/null | ||
| 392 | +++ b/gcc/testsuite/gcc.target/aarch64/sls-mitigation/sls-miti-retbr.c | ||
| 393 | @@ -0,0 +1,119 @@ | ||
| 394 | +/* We ensure that -Wpedantic is off since it complains about the trampolines | ||
| 395 | + we explicitly want to test. */ | ||
| 396 | +/* { dg-additional-options "-mharden-sls=retbr -Wno-pedantic " } */ | ||
| 397 | +/* | ||
| 398 | + Ensure that the SLS hardening of RET and BR leaves no unprotected RET/BR | ||
| 399 | + instructions. | ||
| 400 | + */ | ||
| 401 | +typedef int (foo) (int, int); | ||
| 402 | +typedef void (bar) (int, int); | ||
| 403 | +struct sls_testclass { | ||
| 404 | + foo *x; | ||
| 405 | + bar *y; | ||
| 406 | + int left; | ||
| 407 | + int right; | ||
| 408 | +}; | ||
| 409 | + | ||
| 410 | +int | ||
| 411 | +retbr_sibcall_value_insn (struct sls_testclass x) | ||
| 412 | +{ | ||
| 413 | + return x.x(x.left, x.right); | ||
| 414 | +} | ||
| 415 | + | ||
| 416 | +void | ||
| 417 | +retbr_sibcall_insn (struct sls_testclass x) | ||
| 418 | +{ | ||
| 419 | + x.y(x.left, x.right); | ||
| 420 | +} | ||
| 421 | + | ||
| 422 | +/* Aim to test two different returns. | ||
| 423 | + One that introduces a tail call in the middle of the function, and one that | ||
| 424 | + has a normal return. */ | ||
| 425 | +int | ||
| 426 | +retbr_multiple_returns (struct sls_testclass x) | ||
| 427 | +{ | ||
| 428 | + int temp; | ||
| 429 | + if (x.left % 10) | ||
| 430 | + return x.x(x.left, 100); | ||
| 431 | + else if (x.right % 20) | ||
| 432 | + { | ||
| 433 | + return x.x(x.left * x.right, 100); | ||
| 434 | + } | ||
| 435 | + temp = x.left % x.right; | ||
| 436 | + temp *= 100; | ||
| 437 | + temp /= 2; | ||
| 438 | + return temp % 3; | ||
| 439 | +} | ||
| 440 | + | ||
| 441 | +void | ||
| 442 | +retbr_multiple_returns_void (struct sls_testclass x) | ||
| 443 | +{ | ||
| 444 | + if (x.left % 10) | ||
| 445 | + { | ||
| 446 | + x.y(x.left, 100); | ||
| 447 | + } | ||
| 448 | + else if (x.right % 20) | ||
| 449 | + { | ||
| 450 | + x.y(x.left * x.right, 100); | ||
| 451 | + } | ||
| 452 | + return; | ||
| 453 | +} | ||
| 454 | + | ||
| 455 | +/* Testing the casesi jump via register. */ | ||
| 456 | +__attribute__ ((optimize ("Os"))) | ||
| 457 | +int | ||
| 458 | +retbr_casesi_dispatch (struct sls_testclass x) | ||
| 459 | +{ | ||
| 460 | + switch (x.left) | ||
| 461 | + { | ||
| 462 | + case -5: | ||
| 463 | + return -2; | ||
| 464 | + case -3: | ||
| 465 | + return -1; | ||
| 466 | + case 0: | ||
| 467 | + return 0; | ||
| 468 | + case 3: | ||
| 469 | + return 1; | ||
| 470 | + case 5: | ||
| 471 | + break; | ||
| 472 | + default: | ||
| 473 | + __builtin_unreachable (); | ||
| 474 | + } | ||
| 475 | + return x.right; | ||
| 476 | +} | ||
| 477 | + | ||
| 478 | +/* Testing the BR in trampolines is mitigated against. */ | ||
| 479 | +void f1 (void *); | ||
| 480 | +void f3 (void *, void (*)(void *)); | ||
| 481 | +void f2 (void *); | ||
| 482 | + | ||
| 483 | +int | ||
| 484 | +retbr_trampolines (void *a, int b) | ||
| 485 | +{ | ||
| 486 | + if (!b) | ||
| 487 | + { | ||
| 488 | + f1 (a); | ||
| 489 | + return 1; | ||
| 490 | + } | ||
| 491 | + if (b) | ||
| 492 | + { | ||
| 493 | + void retbr_tramp_internal (void *c) | ||
| 494 | + { | ||
| 495 | + if (c == a) | ||
| 496 | + f2 (c); | ||
| 497 | + } | ||
| 498 | + f3 (a, retbr_tramp_internal); | ||
| 499 | + } | ||
| 500 | + return 0; | ||
| 501 | +} | ||
| 502 | + | ||
| 503 | +/* Testing the indirect_jump pattern. */ | ||
| 504 | +void | ||
| 505 | +retbr_indirect_jump (int *buf) | ||
| 506 | +{ | ||
| 507 | + __builtin_longjmp(buf, 1); | ||
| 508 | +} | ||
| 509 | + | ||
| 510 | +/* Ensure there are no BR or RET instructions which are not directly followed | ||
| 511 | + by a speculation barrier. */ | ||
| 512 | +/* { dg-final { scan-assembler-not {\t(br|ret|retaa|retab)\tx[0-9][0-9]?\n\t(?!dsb\tsy\n\tisb|sb)} } } */ | ||
| 513 | diff --git a/gcc/testsuite/gcc.target/aarch64/sls-mitigation/sls-mitigation.exp b/gcc/testsuite/gcc.target/aarch64/sls-mitigation/sls-mitigation.exp | ||
| 514 | new file mode 100644 | ||
| 515 | index 0000000..8122503 | ||
| 516 | --- /dev/null | ||
| 517 | +++ b/gcc/testsuite/gcc.target/aarch64/sls-mitigation/sls-mitigation.exp | ||
| 518 | @@ -0,0 +1,73 @@ | ||
| 519 | +# Regression driver for SLS mitigation on AArch64. | ||
| 520 | +# Copyright (C) 2020 Free Software Foundation, Inc. | ||
| 521 | +# Contributed by ARM Ltd. | ||
| 522 | +# | ||
| 523 | +# This file is part of GCC. | ||
| 524 | +# | ||
| 525 | +# GCC is free software; you can redistribute it and/or modify it | ||
| 526 | +# under the terms of the GNU General Public License as published by | ||
| 527 | +# the Free Software Foundation; either version 3, or (at your option) | ||
| 528 | +# any later version. | ||
| 529 | +# | ||
| 530 | +# GCC is distributed in the hope that it will be useful, but | ||
| 531 | +# WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 532 | +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
| 533 | +# General Public License for more details. | ||
| 534 | +# | ||
| 535 | +# You should have received a copy of the GNU General Public License | ||
| 536 | +# along with GCC; see the file COPYING3. If not see | ||
| 537 | +# <http://www.gnu.org/licenses/>. */ | ||
| 538 | + | ||
| 539 | +# Exit immediately if this isn't an AArch64 target. | ||
| 540 | +if {![istarget aarch64*-*-*] } then { | ||
| 541 | + return | ||
| 542 | +} | ||
| 543 | + | ||
| 544 | +# Load support procs. | ||
| 545 | +load_lib gcc-dg.exp | ||
| 546 | +load_lib torture-options.exp | ||
| 547 | + | ||
| 548 | +# If a testcase doesn't have special options, use these. | ||
| 549 | +global DEFAULT_CFLAGS | ||
| 550 | +if ![info exists DEFAULT_CFLAGS] then { | ||
| 551 | + set DEFAULT_CFLAGS " " | ||
| 552 | +} | ||
| 553 | + | ||
| 554 | +# Initialize `dg'. | ||
| 555 | +dg-init | ||
| 556 | +torture-init | ||
| 557 | + | ||
| 558 | +# Use different architectures as well as the normal optimisation options. | ||
| 559 | +# (i.e. use both SB and DSB+ISB barriers). | ||
| 560 | + | ||
| 561 | +set save-dg-do-what-default ${dg-do-what-default} | ||
| 562 | +# Main loop. | ||
| 563 | +# Run with torture tests (i.e. a bunch of different optimisation levels) just | ||
| 564 | +# to increase test coverage. | ||
| 565 | +set dg-do-what-default assemble | ||
| 566 | +gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*.\[cCS\]]] \ | ||
| 567 | + "-save-temps" $DEFAULT_CFLAGS | ||
| 568 | + | ||
| 569 | +# Run the same tests but this time with SB extension. | ||
| 570 | +# Since not all supported assemblers will support that extension we decide | ||
| 571 | +# whether to assemble or just compile based on whether the extension is | ||
| 572 | +# supported for the available assembler. | ||
| 573 | + | ||
| 574 | +set templist {} | ||
| 575 | +foreach x $DG_TORTURE_OPTIONS { | ||
| 576 | + lappend templist "$x -march=armv8.3-a+sb " | ||
| 577 | + lappend templist "$x -march=armv8-a+sb " | ||
| 578 | +} | ||
| 579 | +set-torture-options $templist | ||
| 580 | +if { [check_effective_target_aarch64_asm_sb_ok] } { | ||
| 581 | + set dg-do-what-default assemble | ||
| 582 | +} else { | ||
| 583 | + set dg-do-what-default compile | ||
| 584 | +} | ||
| 585 | +gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*.\[cCS\]]] \ | ||
| 586 | + "-save-temps" $DEFAULT_CFLAGS | ||
| 587 | +set dg-do-what-default ${save-dg-do-what-default} | ||
| 588 | + | ||
| 589 | +# All done. | ||
| 590 | +torture-finish | ||
| 591 | +dg-finish | ||
| 592 | diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp | ||
| 593 | index 8a186dd..9d2e093 100644 | ||
| 594 | --- a/gcc/testsuite/lib/target-supports.exp | ||
| 595 | +++ b/gcc/testsuite/lib/target-supports.exp | ||
| 596 | @@ -9432,7 +9432,7 @@ proc check_effective_target_aarch64_tiny { } { | ||
| 597 | # various architecture extensions via the .arch_extension pseudo-op. | ||
| 598 | |||
| 599 | foreach { aarch64_ext } { "fp" "simd" "crypto" "crc" "lse" "dotprod" "sve" | ||
| 600 | - "i8mm" "f32mm" "f64mm" "bf16" } { | ||
| 601 | + "i8mm" "f32mm" "f64mm" "bf16" "sb" } { | ||
| 602 | eval [string map [list FUNC $aarch64_ext] { | ||
| 603 | proc check_effective_target_aarch64_asm_FUNC_ok { } { | ||
| 604 | if { [istarget aarch64*-*-*] } { | ||
| 605 | -- | ||
| 606 | 2.7.4 | ||
| 607 | |||
diff --git a/meta/recipes-devtools/gcc/gcc-10.1/0003-aarch64-Mitigate-SLS-for-BLR-instruction.patch b/meta/recipes-devtools/gcc/gcc-10.1/0003-aarch64-Mitigate-SLS-for-BLR-instruction.patch new file mode 100644 index 0000000000..716a367172 --- /dev/null +++ b/meta/recipes-devtools/gcc/gcc-10.1/0003-aarch64-Mitigate-SLS-for-BLR-instruction.patch | |||
| @@ -0,0 +1,658 @@ | |||
| 1 | Upstream-Status: Backport | ||
| 2 | Signed-off-by: Ross Burton <ross.burton@arm.com> | ||
| 3 | |||
| 4 | From a5e7efc40ed841934c1d913f39476afa17d8e5f7 Mon Sep 17 00:00:00 2001 | ||
| 5 | From: Matthew Malcomson <matthew.malcomson@arm.com> | ||
| 6 | Date: Thu, 9 Jul 2020 09:11:59 +0100 | ||
| 7 | Subject: [PATCH 3/3] aarch64: Mitigate SLS for BLR instruction | ||
| 8 | |||
| 9 | This patch introduces the mitigation for Straight Line Speculation past | ||
| 10 | the BLR instruction. | ||
| 11 | |||
| 12 | This mitigation replaces BLR instructions with a BL to a stub which uses | ||
| 13 | a BR to jump to the original value. These function stubs are then | ||
| 14 | appended with a speculation barrier to ensure no straight line | ||
| 15 | speculation happens after these jumps. | ||
| 16 | |||
| 17 | When optimising for speed we use a set of stubs for each function since | ||
| 18 | this should help the branch predictor make more accurate predictions | ||
| 19 | about where a stub should branch. | ||
| 20 | |||
| 21 | When optimising for size we use one set of stubs for all functions. | ||
| 22 | This set of stubs can have human readable names, and we are using | ||
| 23 | `__call_indirect_x<N>` for register x<N>. | ||
| 24 | |||
| 25 | When BTI branch protection is enabled the BLR instruction can jump to a | ||
| 26 | `BTI c` instruction using any register, while the BR instruction can | ||
| 27 | only jump to a `BTI c` instruction using the x16 or x17 registers. | ||
| 28 | Hence, in order to ensure this transformation is safe we mov the value | ||
| 29 | of the original register into x16 and use x16 for the BR. | ||
| 30 | |||
| 31 | As an example when optimising for size: | ||
| 32 | a | ||
| 33 | BLR x0 | ||
| 34 | instruction would get transformed to something like | ||
| 35 | BL __call_indirect_x0 | ||
| 36 | where __call_indirect_x0 labels a thunk that contains | ||
| 37 | __call_indirect_x0: | ||
| 38 | MOV X16, X0 | ||
| 39 | BR X16 | ||
| 40 | <speculation barrier> | ||
| 41 | |||
| 42 | The first version of this patch used local symbols specific to a | ||
| 43 | compilation unit to try and avoid relocations. | ||
| 44 | This was mistaken since functions coming from the same compilation unit | ||
| 45 | can still be in different sections, and the assembler will insert | ||
| 46 | relocations at jumps between sections. | ||
| 47 | |||
| 48 | On any relocation the linker is permitted to emit a veneer to handle | ||
| 49 | jumps between symbols that are very far apart. The registers x16 and | ||
| 50 | x17 may be clobbered by these veneers. | ||
| 51 | Hence the function stubs cannot rely on the values of x16 and x17 being | ||
| 52 | the same as just before the function stub is called. | ||
| 53 | |||
| 54 | Similar can be said for the hot/cold partitioning of single functions, | ||
| 55 | so function-local stubs have the same restriction. | ||
| 56 | |||
| 57 | This updated version of the patch never emits function stubs for x16 and | ||
| 58 | x17, and instead forces other registers to be used. | ||
| 59 | |||
| 60 | Given the above, there is now no benefit to local symbols (since they | ||
| 61 | are not enough to avoid dealing with linker intricacies). This patch | ||
| 62 | now uses global symbols with hidden visibility each stored in their own | ||
| 63 | COMDAT section. This means stubs can be shared between compilation | ||
| 64 | units while still avoiding the PLT indirection. | ||
| 65 | |||
| 66 | This patch also removes the `__call_indirect_x30` stub (and | ||
| 67 | function-local equivalent) which would simply jump back to the original | ||
| 68 | location. | ||
| 69 | |||
| 70 | The function-local stubs are emitted to the assembly output file in one | ||
| 71 | chunk, which means we need not add the speculation barrier directly | ||
| 72 | after each one. | ||
| 73 | This is because we know for certain that the instructions directly after | ||
| 74 | the BR in all but the last function stub will be from another one of | ||
| 75 | these stubs and hence will not contain a speculation gadget. | ||
| 76 | Instead we add a speculation barrier at the end of the sequence of | ||
| 77 | stubs. | ||
| 78 | |||
| 79 | The global stubs are emitted in COMDAT/.linkonce sections by | ||
| 80 | themselves so that the linker can remove duplicates from multiple object | ||
| 81 | files. This means they are not emitted in one chunk, and each one must | ||
| 82 | include the speculation barrier. | ||
| 83 | |||
| 84 | Another difference is that since the global stubs are shared across | ||
| 85 | compilation units we do not know that all functions will be targeting an | ||
| 86 | architecture supporting the SB instruction. | ||
| 87 | Rather than provide multiple stubs for each architecture, we provide a | ||
| 88 | stub that will work for all architectures -- using the DSB+ISB barrier. | ||
| 89 | |||
| 90 | This mitigation does not apply for BLR instructions in the following | ||
| 91 | places: | ||
| 92 | - Some accesses to thread-local variables use a code sequence with a BLR | ||
| 93 | instruction. This code sequence is part of the binary interface between | ||
| 94 | compiler and linker. If this BLR instruction needs to be mitigated, it'd | ||
| 95 | probably be best to do so in the linker. It seems that the code sequence | ||
| 96 | for thread-local variable access is unlikely to lead to a Spectre Revalation | ||
| 97 | Gadget. | ||
| 98 | - PLT stubs are produced by the linker and each contain a BLR instruction. | ||
| 99 | It seems that at most only after the last PLT stub a Spectre Revalation | ||
| 100 | Gadget might appear. | ||
| 101 | |||
| 102 | Testing: | ||
| 103 | Bootstrap and regtest on AArch64 | ||
| 104 | (with BOOT_CFLAGS="-mharden-sls=retbr,blr") | ||
| 105 | Used a temporary hack(1) in gcc-dg.exp to use these options on every | ||
| 106 | test in the testsuite, a slight modification to emit the speculation | ||
| 107 | barrier after every function stub, and a script to check that the | ||
| 108 | output never emitted a BLR, or unmitigated BR or RET instruction. | ||
| 109 | Similar on an aarch64-none-elf cross-compiler. | ||
| 110 | |||
| 111 | 1) Temporary hack emitted a speculation barrier at the end of every stub | ||
| 112 | function, and used a script to ensure that: | ||
| 113 | a) Every RET or BR is immediately followed by a speculation barrier. | ||
| 114 | b) No BLR instruction is emitted by compiler. | ||
| 115 | |||
| 116 | gcc/ChangeLog: | ||
| 117 | |||
| 118 | * config/aarch64/aarch64-protos.h (aarch64_indirect_call_asm): | ||
| 119 | New declaration. | ||
| 120 | * config/aarch64/aarch64.c (aarch64_regno_regclass): Handle new | ||
| 121 | stub registers class. | ||
| 122 | (aarch64_class_max_nregs): Likewise. | ||
| 123 | (aarch64_register_move_cost): Likewise. | ||
| 124 | (aarch64_sls_shared_thunks): Global array to store stub labels. | ||
| 125 | (aarch64_sls_emit_function_stub): New. | ||
| 126 | (aarch64_create_blr_label): New. | ||
| 127 | (aarch64_sls_emit_blr_function_thunks): New. | ||
| 128 | (aarch64_sls_emit_shared_blr_thunks): New. | ||
| 129 | (aarch64_asm_file_end): New. | ||
| 130 | (aarch64_indirect_call_asm): New. | ||
| 131 | (TARGET_ASM_FILE_END): Use aarch64_asm_file_end. | ||
| 132 | (TARGET_ASM_FUNCTION_EPILOGUE): Use | ||
| 133 | aarch64_sls_emit_blr_function_thunks. | ||
| 134 | * config/aarch64/aarch64.h (STB_REGNUM_P): New. | ||
| 135 | (enum reg_class): Add STUB_REGS class. | ||
| 136 | (machine_function): Introduce `call_via` array for | ||
| 137 | function-local stub labels. | ||
| 138 | * config/aarch64/aarch64.md (*call_insn, *call_value_insn): Use | ||
| 139 | aarch64_indirect_call_asm to emit code when hardening BLR | ||
| 140 | instructions. | ||
| 141 | * config/aarch64/constraints.md (Ucr): New constraint | ||
| 142 | representing registers for indirect calls. Is GENERAL_REGS | ||
| 143 | usually, and STUB_REGS when hardening BLR instruction against | ||
| 144 | SLS. | ||
| 145 | * config/aarch64/predicates.md (aarch64_general_reg): STUB_REGS class | ||
| 146 | is also a general register. | ||
| 147 | |||
| 148 | gcc/testsuite/ChangeLog: | ||
| 149 | |||
| 150 | * gcc.target/aarch64/sls-mitigation/sls-miti-blr-bti.c: New test. | ||
| 151 | * gcc.target/aarch64/sls-mitigation/sls-miti-blr.c: New test. | ||
| 152 | --- | ||
| 153 | gcc/config/aarch64/aarch64-protos.h | 1 + | ||
| 154 | gcc/config/aarch64/aarch64.c | 225 ++++++++++++++++++++- | ||
| 155 | gcc/config/aarch64/aarch64.h | 15 ++ | ||
| 156 | gcc/config/aarch64/aarch64.md | 11 +- | ||
| 157 | gcc/config/aarch64/constraints.md | 9 + | ||
| 158 | gcc/config/aarch64/predicates.md | 3 +- | ||
| 159 | .../aarch64/sls-mitigation/sls-miti-blr-bti.c | 40 ++++ | ||
| 160 | .../aarch64/sls-mitigation/sls-miti-blr.c | 33 +++ | ||
| 161 | 8 files changed, 328 insertions(+), 9 deletions(-) | ||
| 162 | create mode 100644 gcc/testsuite/gcc.target/aarch64/sls-mitigation/sls-miti-blr-bti.c | ||
| 163 | create mode 100644 gcc/testsuite/gcc.target/aarch64/sls-mitigation/sls-miti-blr.c | ||
| 164 | |||
| 165 | diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h | ||
| 166 | index ee0ffde..839f801 100644 | ||
| 167 | --- a/gcc/config/aarch64/aarch64-protos.h | ||
| 168 | +++ b/gcc/config/aarch64/aarch64-protos.h | ||
| 169 | @@ -782,6 +782,7 @@ extern const atomic_ool_names aarch64_ool_ldeor_names; | ||
| 170 | tree aarch64_resolve_overloaded_builtin_general (location_t, tree, void *); | ||
| 171 | |||
| 172 | const char *aarch64_sls_barrier (int); | ||
| 173 | +const char *aarch64_indirect_call_asm (rtx); | ||
| 174 | extern bool aarch64_harden_sls_retbr_p (void); | ||
| 175 | extern bool aarch64_harden_sls_blr_p (void); | ||
| 176 | |||
| 177 | diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c | ||
| 178 | index 2389d49..0f7bba3 100644 | ||
| 179 | --- a/gcc/config/aarch64/aarch64.c | ||
| 180 | +++ b/gcc/config/aarch64/aarch64.c | ||
| 181 | @@ -10605,6 +10605,9 @@ aarch64_label_mentioned_p (rtx x) | ||
| 182 | enum reg_class | ||
| 183 | aarch64_regno_regclass (unsigned regno) | ||
| 184 | { | ||
| 185 | + if (STUB_REGNUM_P (regno)) | ||
| 186 | + return STUB_REGS; | ||
| 187 | + | ||
| 188 | if (GP_REGNUM_P (regno)) | ||
| 189 | return GENERAL_REGS; | ||
| 190 | |||
| 191 | @@ -10939,6 +10942,7 @@ aarch64_class_max_nregs (reg_class_t regclass, machine_mode mode) | ||
| 192 | unsigned int nregs, vec_flags; | ||
| 193 | switch (regclass) | ||
| 194 | { | ||
| 195 | + case STUB_REGS: | ||
| 196 | case TAILCALL_ADDR_REGS: | ||
| 197 | case POINTER_REGS: | ||
| 198 | case GENERAL_REGS: | ||
| 199 | @@ -13155,10 +13159,12 @@ aarch64_register_move_cost (machine_mode mode, | ||
| 200 | = aarch64_tune_params.regmove_cost; | ||
| 201 | |||
| 202 | /* Caller save and pointer regs are equivalent to GENERAL_REGS. */ | ||
| 203 | - if (to == TAILCALL_ADDR_REGS || to == POINTER_REGS) | ||
| 204 | + if (to == TAILCALL_ADDR_REGS || to == POINTER_REGS | ||
| 205 | + || to == STUB_REGS) | ||
| 206 | to = GENERAL_REGS; | ||
| 207 | |||
| 208 | - if (from == TAILCALL_ADDR_REGS || from == POINTER_REGS) | ||
| 209 | + if (from == TAILCALL_ADDR_REGS || from == POINTER_REGS | ||
| 210 | + || from == STUB_REGS) | ||
| 211 | from = GENERAL_REGS; | ||
| 212 | |||
| 213 | /* Make RDFFR very expensive. In particular, if we know that the FFR | ||
| 214 | @@ -22957,6 +22963,215 @@ aarch64_sls_barrier (int mitigation_required) | ||
| 215 | : ""; | ||
| 216 | } | ||
| 217 | |||
| 218 | +static GTY (()) tree aarch64_sls_shared_thunks[30]; | ||
| 219 | +static GTY (()) bool aarch64_sls_shared_thunks_needed = false; | ||
| 220 | +const char *indirect_symbol_names[30] = { | ||
| 221 | + "__call_indirect_x0", | ||
| 222 | + "__call_indirect_x1", | ||
| 223 | + "__call_indirect_x2", | ||
| 224 | + "__call_indirect_x3", | ||
| 225 | + "__call_indirect_x4", | ||
| 226 | + "__call_indirect_x5", | ||
| 227 | + "__call_indirect_x6", | ||
| 228 | + "__call_indirect_x7", | ||
| 229 | + "__call_indirect_x8", | ||
| 230 | + "__call_indirect_x9", | ||
| 231 | + "__call_indirect_x10", | ||
| 232 | + "__call_indirect_x11", | ||
| 233 | + "__call_indirect_x12", | ||
| 234 | + "__call_indirect_x13", | ||
| 235 | + "__call_indirect_x14", | ||
| 236 | + "__call_indirect_x15", | ||
| 237 | + "", /* "__call_indirect_x16", */ | ||
| 238 | + "", /* "__call_indirect_x17", */ | ||
| 239 | + "__call_indirect_x18", | ||
| 240 | + "__call_indirect_x19", | ||
| 241 | + "__call_indirect_x20", | ||
| 242 | + "__call_indirect_x21", | ||
| 243 | + "__call_indirect_x22", | ||
| 244 | + "__call_indirect_x23", | ||
| 245 | + "__call_indirect_x24", | ||
| 246 | + "__call_indirect_x25", | ||
| 247 | + "__call_indirect_x26", | ||
| 248 | + "__call_indirect_x27", | ||
| 249 | + "__call_indirect_x28", | ||
| 250 | + "__call_indirect_x29", | ||
| 251 | +}; | ||
| 252 | + | ||
| 253 | +/* Function to create a BLR thunk. This thunk is used to mitigate straight | ||
| 254 | + line speculation. Instead of a simple BLR that can be speculated past, | ||
| 255 | + we emit a BL to this thunk, and this thunk contains a BR to the relevant | ||
| 256 | + register. These thunks have the relevant speculation barries put after | ||
| 257 | + their indirect branch so that speculation is blocked. | ||
| 258 | + | ||
| 259 | + We use such a thunk so the speculation barriers are kept off the | ||
| 260 | + architecturally executed path in order to reduce the performance overhead. | ||
| 261 | + | ||
| 262 | + When optimizing for size we use stubs shared by the linked object. | ||
| 263 | + When optimizing for performance we emit stubs for each function in the hope | ||
| 264 | + that the branch predictor can better train on jumps specific for a given | ||
| 265 | + function. */ | ||
| 266 | +rtx | ||
| 267 | +aarch64_sls_create_blr_label (int regnum) | ||
| 268 | +{ | ||
| 269 | + gcc_assert (STUB_REGNUM_P (regnum)); | ||
| 270 | + if (optimize_function_for_size_p (cfun)) | ||
| 271 | + { | ||
| 272 | + /* For the thunks shared between different functions in this compilation | ||
| 273 | + unit we use a named symbol -- this is just for users to more easily | ||
| 274 | + understand the generated assembly. */ | ||
| 275 | + aarch64_sls_shared_thunks_needed = true; | ||
| 276 | + const char *thunk_name = indirect_symbol_names[regnum]; | ||
| 277 | + if (aarch64_sls_shared_thunks[regnum] == NULL) | ||
| 278 | + { | ||
| 279 | + /* Build a decl representing this function stub and record it for | ||
| 280 | + later. We build a decl here so we can use the GCC machinery for | ||
| 281 | + handling sections automatically (through `get_named_section` and | ||
| 282 | + `make_decl_one_only`). That saves us a lot of trouble handling | ||
| 283 | + the specifics of different output file formats. */ | ||
| 284 | + tree decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, | ||
| 285 | + get_identifier (thunk_name), | ||
| 286 | + build_function_type_list (void_type_node, | ||
| 287 | + NULL_TREE)); | ||
| 288 | + DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL, | ||
| 289 | + NULL_TREE, void_type_node); | ||
| 290 | + TREE_PUBLIC (decl) = 1; | ||
| 291 | + TREE_STATIC (decl) = 1; | ||
| 292 | + DECL_IGNORED_P (decl) = 1; | ||
| 293 | + DECL_ARTIFICIAL (decl) = 1; | ||
| 294 | + make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl)); | ||
| 295 | + resolve_unique_section (decl, 0, false); | ||
| 296 | + aarch64_sls_shared_thunks[regnum] = decl; | ||
| 297 | + } | ||
| 298 | + | ||
| 299 | + return gen_rtx_SYMBOL_REF (Pmode, thunk_name); | ||
| 300 | + } | ||
| 301 | + | ||
| 302 | + if (cfun->machine->call_via[regnum] == NULL) | ||
| 303 | + cfun->machine->call_via[regnum] | ||
| 304 | + = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ()); | ||
| 305 | + return cfun->machine->call_via[regnum]; | ||
| 306 | +} | ||
| 307 | + | ||
| 308 | +/* Helper function for aarch64_sls_emit_blr_function_thunks and | ||
| 309 | + aarch64_sls_emit_shared_blr_thunks below. */ | ||
| 310 | +static void | ||
| 311 | +aarch64_sls_emit_function_stub (FILE *out_file, int regnum) | ||
| 312 | +{ | ||
| 313 | + /* Save in x16 and branch to that function so this transformation does | ||
| 314 | + not prevent jumping to `BTI c` instructions. */ | ||
| 315 | + asm_fprintf (out_file, "\tmov\tx16, x%d\n", regnum); | ||
| 316 | + asm_fprintf (out_file, "\tbr\tx16\n"); | ||
| 317 | +} | ||
| 318 | + | ||
| 319 | +/* Emit all BLR stubs for this particular function. | ||
| 320 | + Here we emit all the BLR stubs needed for the current function. Since we | ||
| 321 | + emit these stubs in a consecutive block we know there will be no speculation | ||
| 322 | + gadgets between each stub, and hence we only emit a speculation barrier at | ||
| 323 | + the end of the stub sequences. | ||
| 324 | + | ||
| 325 | + This is called in the TARGET_ASM_FUNCTION_EPILOGUE hook. */ | ||
| 326 | +void | ||
| 327 | +aarch64_sls_emit_blr_function_thunks (FILE *out_file) | ||
| 328 | +{ | ||
| 329 | + if (! aarch64_harden_sls_blr_p ()) | ||
| 330 | + return; | ||
| 331 | + | ||
| 332 | + bool any_functions_emitted = false; | ||
| 333 | + /* We must save and restore the current function section since this assembly | ||
| 334 | + is emitted at the end of the function. This means it can be emitted *just | ||
| 335 | + after* the cold section of a function. That cold part would be emitted in | ||
| 336 | + a different section. That switch would trigger a `.cfi_endproc` directive | ||
| 337 | + to be emitted in the original section and a `.cfi_startproc` directive to | ||
| 338 | + be emitted in the new section. Switching to the original section without | ||
| 339 | + restoring would mean that the `.cfi_endproc` emitted as a function ends | ||
| 340 | + would happen in a different section -- leaving an unmatched | ||
| 341 | + `.cfi_startproc` in the cold text section and an unmatched `.cfi_endproc` | ||
| 342 | + in the standard text section. */ | ||
| 343 | + section *save_text_section = in_section; | ||
| 344 | + switch_to_section (function_section (current_function_decl)); | ||
| 345 | + for (int regnum = 0; regnum < 30; ++regnum) | ||
| 346 | + { | ||
| 347 | + rtx specu_label = cfun->machine->call_via[regnum]; | ||
| 348 | + if (specu_label == NULL) | ||
| 349 | + continue; | ||
| 350 | + | ||
| 351 | + targetm.asm_out.print_operand (out_file, specu_label, 0); | ||
| 352 | + asm_fprintf (out_file, ":\n"); | ||
| 353 | + aarch64_sls_emit_function_stub (out_file, regnum); | ||
| 354 | + any_functions_emitted = true; | ||
| 355 | + } | ||
| 356 | + if (any_functions_emitted) | ||
| 357 | + /* Can use the SB if needs be here, since this stub will only be used | ||
| 358 | + by the current function, and hence for the current target. */ | ||
| 359 | + asm_fprintf (out_file, "\t%s\n", aarch64_sls_barrier (true)); | ||
| 360 | + switch_to_section (save_text_section); | ||
| 361 | +} | ||
| 362 | + | ||
| 363 | +/* Emit shared BLR stubs for the current compilation unit. | ||
| 364 | + Over the course of compiling this unit we may have converted some BLR | ||
| 365 | + instructions to a BL to a shared stub function. This is where we emit those | ||
| 366 | + stub functions. | ||
| 367 | + This function is for the stubs shared between different functions in this | ||
| 368 | + compilation unit. We share when optimizing for size instead of speed. | ||
| 369 | + | ||
| 370 | + This function is called through the TARGET_ASM_FILE_END hook. */ | ||
| 371 | +void | ||
| 372 | +aarch64_sls_emit_shared_blr_thunks (FILE *out_file) | ||
| 373 | +{ | ||
| 374 | + if (! aarch64_sls_shared_thunks_needed) | ||
| 375 | + return; | ||
| 376 | + | ||
| 377 | + for (int regnum = 0; regnum < 30; ++regnum) | ||
| 378 | + { | ||
| 379 | + tree decl = aarch64_sls_shared_thunks[regnum]; | ||
| 380 | + if (!decl) | ||
| 381 | + continue; | ||
| 382 | + | ||
| 383 | + const char *name = indirect_symbol_names[regnum]; | ||
| 384 | + switch_to_section (get_named_section (decl, NULL, 0)); | ||
| 385 | + ASM_OUTPUT_ALIGN (out_file, 2); | ||
| 386 | + targetm.asm_out.globalize_label (out_file, name); | ||
| 387 | + /* Only emits if the compiler is configured for an assembler that can | ||
| 388 | + handle visibility directives. */ | ||
| 389 | + targetm.asm_out.assemble_visibility (decl, VISIBILITY_HIDDEN); | ||
| 390 | + ASM_OUTPUT_TYPE_DIRECTIVE (out_file, name, "function"); | ||
| 391 | + ASM_OUTPUT_LABEL (out_file, name); | ||
| 392 | + aarch64_sls_emit_function_stub (out_file, regnum); | ||
| 393 | + /* Use the most conservative target to ensure it can always be used by any | ||
| 394 | + function in the translation unit. */ | ||
| 395 | + asm_fprintf (out_file, "\tdsb\tsy\n\tisb\n"); | ||
| 396 | + ASM_DECLARE_FUNCTION_SIZE (out_file, name, decl); | ||
| 397 | + } | ||
| 398 | +} | ||
| 399 | + | ||
| 400 | +/* Implement TARGET_ASM_FILE_END. */ | ||
| 401 | +void | ||
| 402 | +aarch64_asm_file_end () | ||
| 403 | +{ | ||
| 404 | + aarch64_sls_emit_shared_blr_thunks (asm_out_file); | ||
| 405 | + /* Since this function will be called for the ASM_FILE_END hook, we ensure | ||
| 406 | + that what would be called otherwise (e.g. `file_end_indicate_exec_stack` | ||
| 407 | + for FreeBSD) still gets called. */ | ||
| 408 | +#ifdef TARGET_ASM_FILE_END | ||
| 409 | + TARGET_ASM_FILE_END (); | ||
| 410 | +#endif | ||
| 411 | +} | ||
| 412 | + | ||
| 413 | +const char * | ||
| 414 | +aarch64_indirect_call_asm (rtx addr) | ||
| 415 | +{ | ||
| 416 | + gcc_assert (REG_P (addr)); | ||
| 417 | + if (aarch64_harden_sls_blr_p ()) | ||
| 418 | + { | ||
| 419 | + rtx stub_label = aarch64_sls_create_blr_label (REGNO (addr)); | ||
| 420 | + output_asm_insn ("bl\t%0", &stub_label); | ||
| 421 | + } | ||
| 422 | + else | ||
| 423 | + output_asm_insn ("blr\t%0", &addr); | ||
| 424 | + return ""; | ||
| 425 | +} | ||
| 426 | + | ||
| 427 | /* Target-specific selftests. */ | ||
| 428 | |||
| 429 | #if CHECKING_P | ||
| 430 | @@ -23507,6 +23722,12 @@ aarch64_libgcc_floating_mode_supported_p | ||
| 431 | #undef TARGET_MD_ASM_ADJUST | ||
| 432 | #define TARGET_MD_ASM_ADJUST arm_md_asm_adjust | ||
| 433 | |||
| 434 | +#undef TARGET_ASM_FILE_END | ||
| 435 | +#define TARGET_ASM_FILE_END aarch64_asm_file_end | ||
| 436 | + | ||
| 437 | +#undef TARGET_ASM_FUNCTION_EPILOGUE | ||
| 438 | +#define TARGET_ASM_FUNCTION_EPILOGUE aarch64_sls_emit_blr_function_thunks | ||
| 439 | + | ||
| 440 | struct gcc_target targetm = TARGET_INITIALIZER; | ||
| 441 | |||
| 442 | #include "gt-aarch64.h" | ||
| 443 | diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h | ||
| 444 | index 8e0fc37..7331450 100644 | ||
| 445 | --- a/gcc/config/aarch64/aarch64.h | ||
| 446 | +++ b/gcc/config/aarch64/aarch64.h | ||
| 447 | @@ -643,6 +643,16 @@ extern unsigned aarch64_architecture_version; | ||
| 448 | #define GP_REGNUM_P(REGNO) \ | ||
| 449 | (((unsigned) (REGNO - R0_REGNUM)) <= (R30_REGNUM - R0_REGNUM)) | ||
| 450 | |||
| 451 | +/* Registers known to be preserved over a BL instruction. This consists of the | ||
| 452 | + GENERAL_REGS without x16, x17, and x30. The x30 register is changed by the | ||
| 453 | + BL instruction itself, while the x16 and x17 registers may be used by | ||
| 454 | + veneers which can be inserted by the linker. */ | ||
| 455 | +#define STUB_REGNUM_P(REGNO) \ | ||
| 456 | + (GP_REGNUM_P (REGNO) \ | ||
| 457 | + && (REGNO) != R16_REGNUM \ | ||
| 458 | + && (REGNO) != R17_REGNUM \ | ||
| 459 | + && (REGNO) != R30_REGNUM) \ | ||
| 460 | + | ||
| 461 | #define FP_REGNUM_P(REGNO) \ | ||
| 462 | (((unsigned) (REGNO - V0_REGNUM)) <= (V31_REGNUM - V0_REGNUM)) | ||
| 463 | |||
| 464 | @@ -667,6 +677,7 @@ enum reg_class | ||
| 465 | { | ||
| 466 | NO_REGS, | ||
| 467 | TAILCALL_ADDR_REGS, | ||
| 468 | + STUB_REGS, | ||
| 469 | GENERAL_REGS, | ||
| 470 | STACK_REG, | ||
| 471 | POINTER_REGS, | ||
| 472 | @@ -689,6 +700,7 @@ enum reg_class | ||
| 473 | { \ | ||
| 474 | "NO_REGS", \ | ||
| 475 | "TAILCALL_ADDR_REGS", \ | ||
| 476 | + "STUB_REGS", \ | ||
| 477 | "GENERAL_REGS", \ | ||
| 478 | "STACK_REG", \ | ||
| 479 | "POINTER_REGS", \ | ||
| 480 | @@ -708,6 +720,7 @@ enum reg_class | ||
| 481 | { \ | ||
| 482 | { 0x00000000, 0x00000000, 0x00000000 }, /* NO_REGS */ \ | ||
| 483 | { 0x00030000, 0x00000000, 0x00000000 }, /* TAILCALL_ADDR_REGS */\ | ||
| 484 | + { 0x3ffcffff, 0x00000000, 0x00000000 }, /* STUB_REGS */ \ | ||
| 485 | { 0x7fffffff, 0x00000000, 0x00000003 }, /* GENERAL_REGS */ \ | ||
| 486 | { 0x80000000, 0x00000000, 0x00000000 }, /* STACK_REG */ \ | ||
| 487 | { 0xffffffff, 0x00000000, 0x00000003 }, /* POINTER_REGS */ \ | ||
| 488 | @@ -862,6 +875,8 @@ typedef struct GTY (()) machine_function | ||
| 489 | struct aarch64_frame frame; | ||
| 490 | /* One entry for each hard register. */ | ||
| 491 | bool reg_is_wrapped_separately[LAST_SAVED_REGNUM]; | ||
| 492 | + /* One entry for each general purpose register. */ | ||
| 493 | + rtx call_via[SP_REGNUM]; | ||
| 494 | bool label_is_assembled; | ||
| 495 | } machine_function; | ||
| 496 | #endif | ||
| 497 | diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md | ||
| 498 | index dda04ee..43da754 100644 | ||
| 499 | --- a/gcc/config/aarch64/aarch64.md | ||
| 500 | +++ b/gcc/config/aarch64/aarch64.md | ||
| 501 | @@ -1022,16 +1022,15 @@ | ||
| 502 | ) | ||
| 503 | |||
| 504 | (define_insn "*call_insn" | ||
| 505 | - [(call (mem:DI (match_operand:DI 0 "aarch64_call_insn_operand" "r, Usf")) | ||
| 506 | + [(call (mem:DI (match_operand:DI 0 "aarch64_call_insn_operand" "Ucr, Usf")) | ||
| 507 | (match_operand 1 "" "")) | ||
| 508 | (unspec:DI [(match_operand:DI 2 "const_int_operand")] UNSPEC_CALLEE_ABI) | ||
| 509 | (clobber (reg:DI LR_REGNUM))] | ||
| 510 | "" | ||
| 511 | "@ | ||
| 512 | - blr\\t%0 | ||
| 513 | + * return aarch64_indirect_call_asm (operands[0]); | ||
| 514 | bl\\t%c0" | ||
| 515 | - [(set_attr "type" "call, call")] | ||
| 516 | -) | ||
| 517 | + [(set_attr "type" "call, call")]) | ||
| 518 | |||
| 519 | (define_expand "call_value" | ||
| 520 | [(parallel | ||
| 521 | @@ -1050,13 +1049,13 @@ | ||
| 522 | |||
| 523 | (define_insn "*call_value_insn" | ||
| 524 | [(set (match_operand 0 "" "") | ||
| 525 | - (call (mem:DI (match_operand:DI 1 "aarch64_call_insn_operand" "r, Usf")) | ||
| 526 | + (call (mem:DI (match_operand:DI 1 "aarch64_call_insn_operand" "Ucr, Usf")) | ||
| 527 | (match_operand 2 "" ""))) | ||
| 528 | (unspec:DI [(match_operand:DI 3 "const_int_operand")] UNSPEC_CALLEE_ABI) | ||
| 529 | (clobber (reg:DI LR_REGNUM))] | ||
| 530 | "" | ||
| 531 | "@ | ||
| 532 | - blr\\t%1 | ||
| 533 | + * return aarch64_indirect_call_asm (operands[1]); | ||
| 534 | bl\\t%c1" | ||
| 535 | [(set_attr "type" "call, call")] | ||
| 536 | ) | ||
| 537 | diff --git a/gcc/config/aarch64/constraints.md b/gcc/config/aarch64/constraints.md | ||
| 538 | index d993268..8cc6f50 100644 | ||
| 539 | --- a/gcc/config/aarch64/constraints.md | ||
| 540 | +++ b/gcc/config/aarch64/constraints.md | ||
| 541 | @@ -24,6 +24,15 @@ | ||
| 542 | (define_register_constraint "Ucs" "TAILCALL_ADDR_REGS" | ||
| 543 | "@internal Registers suitable for an indirect tail call") | ||
| 544 | |||
| 545 | +(define_register_constraint "Ucr" | ||
| 546 | + "aarch64_harden_sls_blr_p () ? STUB_REGS : GENERAL_REGS" | ||
| 547 | + "@internal Registers to be used for an indirect call. | ||
| 548 | + This is usually the general registers, but when we are hardening against | ||
| 549 | + Straight Line Speculation we disallow x16, x17, and x30 so we can use | ||
| 550 | + indirection stubs. These indirection stubs cannot use the above registers | ||
| 551 | + since they will be reached by a BL that may have to go through a linker | ||
| 552 | + veneer.") | ||
| 553 | + | ||
| 554 | (define_register_constraint "w" "FP_REGS" | ||
| 555 | "Floating point and SIMD vector registers.") | ||
| 556 | |||
| 557 | diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md | ||
| 558 | index 215fcec..1754b1e 100644 | ||
| 559 | --- a/gcc/config/aarch64/predicates.md | ||
| 560 | +++ b/gcc/config/aarch64/predicates.md | ||
| 561 | @@ -32,7 +32,8 @@ | ||
| 562 | |||
| 563 | (define_predicate "aarch64_general_reg" | ||
| 564 | (and (match_operand 0 "register_operand") | ||
| 565 | - (match_test "REGNO_REG_CLASS (REGNO (op)) == GENERAL_REGS"))) | ||
| 566 | + (match_test "REGNO_REG_CLASS (REGNO (op)) == STUB_REGS | ||
| 567 | + || REGNO_REG_CLASS (REGNO (op)) == GENERAL_REGS"))) | ||
| 568 | |||
| 569 | ;; Return true if OP a (const_int 0) operand. | ||
| 570 | (define_predicate "const0_operand" | ||
| 571 | diff --git a/gcc/testsuite/gcc.target/aarch64/sls-mitigation/sls-miti-blr-bti.c b/gcc/testsuite/gcc.target/aarch64/sls-mitigation/sls-miti-blr-bti.c | ||
| 572 | new file mode 100644 | ||
| 573 | index 0000000..b1fb754 | ||
| 574 | --- /dev/null | ||
| 575 | +++ b/gcc/testsuite/gcc.target/aarch64/sls-mitigation/sls-miti-blr-bti.c | ||
| 576 | @@ -0,0 +1,40 @@ | ||
| 577 | +/* { dg-do compile } */ | ||
| 578 | +/* { dg-additional-options "-mharden-sls=blr -mbranch-protection=bti" } */ | ||
| 579 | +/* | ||
| 580 | + Ensure that the SLS hardening of BLR leaves no BLR instructions. | ||
| 581 | + Here we also check that there are no BR instructions with anything except an | ||
| 582 | + x16 or x17 register. This is because a `BTI c` instruction can be branched | ||
| 583 | + to using a BLR instruction using any register, but can only be branched to | ||
| 584 | + with a BR using an x16 or x17 register. | ||
| 585 | + */ | ||
| 586 | +typedef int (foo) (int, int); | ||
| 587 | +typedef void (bar) (int, int); | ||
| 588 | +struct sls_testclass { | ||
| 589 | + foo *x; | ||
| 590 | + bar *y; | ||
| 591 | + int left; | ||
| 592 | + int right; | ||
| 593 | +}; | ||
| 594 | + | ||
| 595 | +/* We test both RTL patterns for a call which returns a value and a call which | ||
| 596 | + does not. */ | ||
| 597 | +int blr_call_value (struct sls_testclass x) | ||
| 598 | +{ | ||
| 599 | + int retval = x.x(x.left, x.right); | ||
| 600 | + if (retval % 10) | ||
| 601 | + return 100; | ||
| 602 | + return 9; | ||
| 603 | +} | ||
| 604 | + | ||
| 605 | +int blr_call (struct sls_testclass x) | ||
| 606 | +{ | ||
| 607 | + x.y(x.left, x.right); | ||
| 608 | + if (x.left % 10) | ||
| 609 | + return 100; | ||
| 610 | + return 9; | ||
| 611 | +} | ||
| 612 | + | ||
| 613 | +/* { dg-final { scan-assembler-not {\tblr\t} } } */ | ||
| 614 | +/* { dg-final { scan-assembler-not {\tbr\tx(?!16|17)} } } */ | ||
| 615 | +/* { dg-final { scan-assembler {\tbr\tx(16|17)} } } */ | ||
| 616 | + | ||
| 617 | diff --git a/gcc/testsuite/gcc.target/aarch64/sls-mitigation/sls-miti-blr.c b/gcc/testsuite/gcc.target/aarch64/sls-mitigation/sls-miti-blr.c | ||
| 618 | new file mode 100644 | ||
| 619 | index 0000000..88bafff | ||
| 620 | --- /dev/null | ||
| 621 | +++ b/gcc/testsuite/gcc.target/aarch64/sls-mitigation/sls-miti-blr.c | ||
| 622 | @@ -0,0 +1,33 @@ | ||
| 623 | +/* { dg-additional-options "-mharden-sls=blr -save-temps" } */ | ||
| 624 | +/* Ensure that the SLS hardening of BLR leaves no BLR instructions. | ||
| 625 | + We only test that all BLR instructions have been removed, not that the | ||
| 626 | + resulting code makes sense. */ | ||
| 627 | +typedef int (foo) (int, int); | ||
| 628 | +typedef void (bar) (int, int); | ||
| 629 | +struct sls_testclass { | ||
| 630 | + foo *x; | ||
| 631 | + bar *y; | ||
| 632 | + int left; | ||
| 633 | + int right; | ||
| 634 | +}; | ||
| 635 | + | ||
| 636 | +/* We test both RTL patterns for a call which returns a value and a call which | ||
| 637 | + does not. */ | ||
| 638 | +int blr_call_value (struct sls_testclass x) | ||
| 639 | +{ | ||
| 640 | + int retval = x.x(x.left, x.right); | ||
| 641 | + if (retval % 10) | ||
| 642 | + return 100; | ||
| 643 | + return 9; | ||
| 644 | +} | ||
| 645 | + | ||
| 646 | +int blr_call (struct sls_testclass x) | ||
| 647 | +{ | ||
| 648 | + x.y(x.left, x.right); | ||
| 649 | + if (x.left % 10) | ||
| 650 | + return 100; | ||
| 651 | + return 9; | ||
| 652 | +} | ||
| 653 | + | ||
| 654 | +/* { dg-final { scan-assembler-not {\tblr\t} } } */ | ||
| 655 | +/* { dg-final { scan-assembler {\tbr\tx[0-9][0-9]?} } } */ | ||
| 656 | -- | ||
| 657 | 2.7.4 | ||
| 658 | |||
