summaryrefslogtreecommitdiffstats
path: root/meta/recipes-devtools
diff options
context:
space:
mode:
authorRoss Burton <ross@burtonini.com>2020-10-23 09:39:56 +0100
committerRichard Purdie <richard.purdie@linuxfoundation.org>2020-10-30 17:23:19 +0000
commitf3be15a8a0e3c53d9d9edde3bdc2caa6431c1b67 (patch)
treedfe807990f6ffc50bed314121e7522caeb54d233 /meta/recipes-devtools
parent13936f5f35fcb36d23f45f9fedebf0b115c99db0 (diff)
downloadpoky-f3be15a8a0e3c53d9d9edde3bdc2caa6431c1b67.tar.gz
gcc: mitigate the Straight-line Speculation attack
Straight-line Speculation is a SPECTRE-like attack on Armv8-A, further details can be found in the white paper here: https://developer.arm.com/support/arm-security-updates/speculative-processor-vulnerability/downloads/straight-line-speculation Backport the GCC patches to mitigate the attack. CVE: CVE-2020-13844 (From OE-Core rev: 406a1636e1111729e3bb0fe804ef28c8a51c22e3) Signed-off-by: Ross Burton <ross.burton@arm.com> Signed-off-by: Steve Sakoman <steve@sakoman.com> Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
Diffstat (limited to 'meta/recipes-devtools')
-rw-r--r--meta/recipes-devtools/gcc/gcc-9.3.inc3
-rw-r--r--meta/recipes-devtools/gcc/gcc-9.3/0001-aarch64-New-Straight-Line-Speculation-SLS-mitigation.patch204
-rw-r--r--meta/recipes-devtools/gcc/gcc-9.3/0002-aarch64-Introduce-SLS-mitigation-for-RET-and-BR-inst.patch600
-rw-r--r--meta/recipes-devtools/gcc/gcc-9.3/0003-aarch64-Mitigate-SLS-for-BLR-instruction.patch659
4 files changed, 1466 insertions, 0 deletions
diff --git a/meta/recipes-devtools/gcc/gcc-9.3.inc b/meta/recipes-devtools/gcc/gcc-9.3.inc
index 38dee001d4..4c54ba250a 100644
--- a/meta/recipes-devtools/gcc/gcc-9.3.inc
+++ b/meta/recipes-devtools/gcc/gcc-9.3.inc
@@ -69,6 +69,9 @@ SRC_URI = "\
69 file://0037-CVE-2019-14250-Check-zero-value-in-simple_object_elf.patch \ 69 file://0037-CVE-2019-14250-Check-zero-value-in-simple_object_elf.patch \
70 file://0038-gentypes-genmodes-Do-not-use-__LINE__-for-maintainin.patch \ 70 file://0038-gentypes-genmodes-Do-not-use-__LINE__-for-maintainin.patch \
71 file://0039-process_alt_operands-Don-t-match-user-defined-regs-o.patch \ 71 file://0039-process_alt_operands-Don-t-match-user-defined-regs-o.patch \
72 file://0001-aarch64-New-Straight-Line-Speculation-SLS-mitigation.patch \
73 file://0002-aarch64-Introduce-SLS-mitigation-for-RET-and-BR-inst.patch \
74 file://0003-aarch64-Mitigate-SLS-for-BLR-instruction.patch \
72" 75"
73S = "${TMPDIR}/work-shared/gcc-${PV}-${PR}/gcc-${PV}" 76S = "${TMPDIR}/work-shared/gcc-${PV}-${PR}/gcc-${PV}"
74SRC_URI[sha256sum] = "71e197867611f6054aa1119b13a0c0abac12834765fe2d81f35ac57f84f742d1" 77SRC_URI[sha256sum] = "71e197867611f6054aa1119b13a0c0abac12834765fe2d81f35ac57f84f742d1"
diff --git a/meta/recipes-devtools/gcc/gcc-9.3/0001-aarch64-New-Straight-Line-Speculation-SLS-mitigation.patch b/meta/recipes-devtools/gcc/gcc-9.3/0001-aarch64-New-Straight-Line-Speculation-SLS-mitigation.patch
new file mode 100644
index 0000000000..a7e29f4bd7
--- /dev/null
+++ b/meta/recipes-devtools/gcc/gcc-9.3/0001-aarch64-New-Straight-Line-Speculation-SLS-mitigation.patch
@@ -0,0 +1,204 @@
1CVE: CVE-2020-13844
2Upstream-Status: Backport
3Signed-off-by: Ross Burton <ross.burton@arm.com>
4
5From 20da13e395bde597d8337167c712039c8f923c3b Mon Sep 17 00:00:00 2001
6From: Matthew Malcomson <matthew.malcomson@arm.com>
7Date: Thu, 9 Jul 2020 09:11:58 +0100
8Subject: [PATCH 1/3] aarch64: New Straight Line Speculation (SLS) mitigation
9 flags
10
11Here we introduce the flags that will be used for straight line speculation.
12
13The new flag introduced is `-mharden-sls=`.
14This flag can take arguments of `none`, `all`, or a comma seperated list
15of one or more of `retbr` or `blr`.
16`none` indicates no special mitigation of the straight line speculation
17vulnerability.
18`all` requests all mitigations currently implemented.
19`retbr` requests that the RET and BR instructions have a speculation
20barrier inserted after them.
21`blr` requests that BLR instructions are replaced by a BL to a function
22stub using a BR with a speculation barrier after it.
23
24Setting this on a per-function basis using attributes or the like is not
25enabled, but may be in the future.
26
27(cherry picked from commit a9ba2a9b77bec7eacaf066801f22d1c366a2bc86)
28
29gcc/ChangeLog:
30
312020-06-02 Matthew Malcomson <matthew.malcomson@arm.com>
32
33 * config/aarch64/aarch64-protos.h (aarch64_harden_sls_retbr_p):
34 New.
35 (aarch64_harden_sls_blr_p): New.
36 * config/aarch64/aarch64.c (enum aarch64_sls_hardening_type):
37 New.
38 (aarch64_harden_sls_retbr_p): New.
39 (aarch64_harden_sls_blr_p): New.
40 (aarch64_validate_sls_mitigation): New.
41 (aarch64_override_options): Parse options for SLS mitigation.
42 * config/aarch64/aarch64.opt (-mharden-sls): New option.
43 * doc/invoke.texi: Document new option.
44---
45 gcc/config/aarch64/aarch64-protos.h | 3 ++
46 gcc/config/aarch64/aarch64.c | 76 +++++++++++++++++++++++++++++
47 gcc/config/aarch64/aarch64.opt | 4 ++
48 gcc/doc/invoke.texi | 12 +++++
49 4 files changed, 95 insertions(+)
50
51diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
52index c083cad53..31493f412 100644
53--- a/gcc/config/aarch64/aarch64-protos.h
54+++ b/gcc/config/aarch64/aarch64-protos.h
55@@ -644,4 +644,7 @@ poly_uint64 aarch64_regmode_natural_size (machine_mode);
56
57 bool aarch64_high_bits_all_ones_p (HOST_WIDE_INT);
58
59+extern bool aarch64_harden_sls_retbr_p (void);
60+extern bool aarch64_harden_sls_blr_p (void);
61+
62 #endif /* GCC_AARCH64_PROTOS_H */
63diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
64index b452a53af..269ff6c92 100644
65--- a/gcc/config/aarch64/aarch64.c
66+++ b/gcc/config/aarch64/aarch64.c
67@@ -11734,6 +11734,79 @@ aarch64_validate_mcpu (const char *str, const struct processor **res,
68 return false;
69 }
70
71+/* Straight line speculation indicators. */
72+enum aarch64_sls_hardening_type
73+{
74+ SLS_NONE = 0,
75+ SLS_RETBR = 1,
76+ SLS_BLR = 2,
77+ SLS_ALL = 3,
78+};
79+static enum aarch64_sls_hardening_type aarch64_sls_hardening;
80+
81+/* Return whether we should mitigatate Straight Line Speculation for the RET
82+ and BR instructions. */
83+bool
84+aarch64_harden_sls_retbr_p (void)
85+{
86+ return aarch64_sls_hardening & SLS_RETBR;
87+}
88+
89+/* Return whether we should mitigatate Straight Line Speculation for the BLR
90+ instruction. */
91+bool
92+aarch64_harden_sls_blr_p (void)
93+{
94+ return aarch64_sls_hardening & SLS_BLR;
95+}
96+
97+/* As of yet we only allow setting these options globally, in the future we may
98+ allow setting them per function. */
99+static void
100+aarch64_validate_sls_mitigation (const char *const_str)
101+{
102+ char *token_save = NULL;
103+ char *str = NULL;
104+
105+ if (strcmp (const_str, "none") == 0)
106+ {
107+ aarch64_sls_hardening = SLS_NONE;
108+ return;
109+ }
110+ if (strcmp (const_str, "all") == 0)
111+ {
112+ aarch64_sls_hardening = SLS_ALL;
113+ return;
114+ }
115+
116+ char *str_root = xstrdup (const_str);
117+ str = strtok_r (str_root, ",", &token_save);
118+ if (!str)
119+ error ("invalid argument given to %<-mharden-sls=%>");
120+
121+ int temp = SLS_NONE;
122+ while (str)
123+ {
124+ if (strcmp (str, "blr") == 0)
125+ temp |= SLS_BLR;
126+ else if (strcmp (str, "retbr") == 0)
127+ temp |= SLS_RETBR;
128+ else if (strcmp (str, "none") == 0 || strcmp (str, "all") == 0)
129+ {
130+ error ("%<%s%> must be by itself for %<-mharden-sls=%>", str);
131+ break;
132+ }
133+ else
134+ {
135+ error ("invalid argument %<%s%> for %<-mharden-sls=%>", str);
136+ break;
137+ }
138+ str = strtok_r (NULL, ",", &token_save);
139+ }
140+ aarch64_sls_hardening = (aarch64_sls_hardening_type) temp;
141+ free (str_root);
142+}
143+
144 /* Parses CONST_STR for branch protection features specified in
145 aarch64_branch_protect_types, and set any global variables required. Returns
146 the parsing result and assigns LAST_STR to the last processed token from
147@@ -11972,6 +12045,9 @@ aarch64_override_options (void)
148 selected_arch = NULL;
149 selected_tune = NULL;
150
151+ if (aarch64_harden_sls_string)
152+ aarch64_validate_sls_mitigation (aarch64_harden_sls_string);
153+
154 if (aarch64_branch_protection_string)
155 aarch64_validate_mbranch_protection (aarch64_branch_protection_string);
156
157diff --git a/gcc/config/aarch64/aarch64.opt b/gcc/config/aarch64/aarch64.opt
158index 3c6d1cc90..d27ab6df8 100644
159--- a/gcc/config/aarch64/aarch64.opt
160+++ b/gcc/config/aarch64/aarch64.opt
161@@ -71,6 +71,10 @@ mgeneral-regs-only
162 Target Report RejectNegative Mask(GENERAL_REGS_ONLY) Save
163 Generate code which uses only the general registers.
164
165+mharden-sls=
166+Target RejectNegative Joined Var(aarch64_harden_sls_string)
167+Generate code to mitigate against straight line speculation.
168+
169 mfix-cortex-a53-835769
170 Target Report Var(aarch64_fix_a53_err835769) Init(2) Save
171 Workaround for ARM Cortex-A53 Erratum number 835769.
172diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
173index 2f7ffe456..5f04a7d2b 100644
174--- a/gcc/doc/invoke.texi
175+++ b/gcc/doc/invoke.texi
176@@ -638,6 +638,7 @@ Objective-C and Objective-C++ Dialects}.
177 -mpc-relative-literal-loads @gol
178 -msign-return-address=@var{scope} @gol
179 -mbranch-protection=@var{none}|@var{standard}|@var{pac-ret}[+@var{leaf}]|@var{bti} @gol
180+-mharden-sls=@var{opts} @gol
181 -march=@var{name} -mcpu=@var{name} -mtune=@var{name} @gol
182 -moverride=@var{string} -mverbose-cost-dump @gol
183 -mstack-protector-guard=@var{guard} -mstack-protector-guard-reg=@var{sysreg} @gol
184@@ -15955,6 +15956,17 @@ argument @samp{leaf} can be used to extend the signing to include leaf
185 functions.
186 @samp{bti} turns on branch target identification mechanism.
187
188+@item -mharden-sls=@var{opts}
189+@opindex mharden-sls
190+Enable compiler hardening against straight line speculation (SLS).
191+@var{opts} is a comma-separated list of the following options:
192+@table @samp
193+@item retbr
194+@item blr
195+@end table
196+In addition, @samp{-mharden-sls=all} enables all SLS hardening while
197+@samp{-mharden-sls=none} disables all SLS hardening.
198+
199 @item -msve-vector-bits=@var{bits}
200 @opindex msve-vector-bits
201 Specify the number of bits in an SVE vector register. This option only has
202--
2032.25.1
204
diff --git a/meta/recipes-devtools/gcc/gcc-9.3/0002-aarch64-Introduce-SLS-mitigation-for-RET-and-BR-inst.patch b/meta/recipes-devtools/gcc/gcc-9.3/0002-aarch64-Introduce-SLS-mitigation-for-RET-and-BR-inst.patch
new file mode 100644
index 0000000000..c972088d2b
--- /dev/null
+++ b/meta/recipes-devtools/gcc/gcc-9.3/0002-aarch64-Introduce-SLS-mitigation-for-RET-and-BR-inst.patch
@@ -0,0 +1,600 @@
1CVE: CVE-2020-13844
2Upstream-Status: Backport
3Signed-off-by: Ross Burton <ross.burton@arm.com>
4
5From dc586a749228ecfb71f72ec2ca10e6f7b6874af3 Mon Sep 17 00:00:00 2001
6From: Matthew Malcomson <matthew.malcomson@arm.com>
7Date: Thu, 9 Jul 2020 09:11:59 +0100
8Subject: [PATCH 2/3] aarch64: Introduce SLS mitigation for RET and BR
9 instructions
10
11Instructions following RET or BR are not necessarily executed. In order
12to avoid speculation past RET and BR we can simply append a speculation
13barrier.
14
15Since these speculation barriers will not be architecturally executed,
16they are not expected to add a high performance penalty.
17
18The speculation barrier is to be SB when targeting architectures which
19have this enabled, and DSB SY + ISB otherwise.
20
21We add tests for each of the cases where such an instruction was seen.
22
23This is implemented by modifying each machine description pattern that
24emits either a RET or a BR instruction. We choose not to use something
25like `TARGET_ASM_FUNCTION_EPILOGUE` since it does not affect the
26`indirect_jump`, `jump`, `sibcall_insn` and `sibcall_value_insn`
27patterns and we find it preferable to implement the functionality in the
28same way for every pattern.
29
30There is one particular case which is slightly tricky. The
31implementation of TARGET_ASM_TRAMPOLINE_TEMPLATE uses a BR which needs
32to be mitigated against. The trampoline template is used *once* per
33compilation unit, and the TRAMPOLINE_SIZE is exposed to the user via the
34builtin macro __LIBGCC_TRAMPOLINE_SIZE__.
35In the future we may implement function specific attributes to turn on
36and off hardening on a per-function basis.
37The fixed nature of the trampoline described above implies it will be
38safer to ensure this speculation barrier is always used.
39
40Testing:
41 Bootstrap and regtest done on aarch64-none-linux
42 Used a temporary hack(1) to use these options on every test in the
43 testsuite and a script to check that the output never emitted an
44 unmitigated RET or BR.
45
461) Temporary hack was a change to the testsuite to always use
47`-save-temps` and run a script on the assembly output of those
48compilations which produced one to ensure every RET or BR is immediately
49followed by a speculation barrier.
50
51(cherry picked from be178ecd5ac1fe1510d960ff95c66d0ff831afe1)
52
53gcc/ChangeLog:
54
55 * config/aarch64/aarch64-protos.h (aarch64_sls_barrier): New.
56 * config/aarch64/aarch64.c (aarch64_output_casesi): Emit
57 speculation barrier after BR instruction if needs be.
58 (aarch64_trampoline_init): Handle ptr_mode value & adjust size
59 of code copied.
60 (aarch64_sls_barrier): New.
61 (aarch64_asm_trampoline_template): Add needed barriers.
62 * config/aarch64/aarch64.h (AARCH64_ISA_SB): New.
63 (TARGET_SB): New.
64 (TRAMPOLINE_SIZE): Account for barrier.
65 * config/aarch64/aarch64.md (indirect_jump, *casesi_dispatch,
66 simple_return, *do_return, *sibcall_insn, *sibcall_value_insn):
67 Emit barrier if needs be, also account for possible barrier using
68 "sls_length" attribute.
69 (sls_length): New attribute.
70 (length): Determine default using any non-default sls_length
71 value.
72
73gcc/testsuite/ChangeLog:
74
75 * gcc.target/aarch64/sls-mitigation/sls-miti-retbr.c: New test.
76 * gcc.target/aarch64/sls-mitigation/sls-miti-retbr-pacret.c:
77 New test.
78 * gcc.target/aarch64/sls-mitigation/sls-mitigation.exp: New file.
79 * lib/target-supports.exp (check_effective_target_aarch64_asm_sb_ok):
80 New proc.
81---
82 gcc/config/aarch64/aarch64-protos.h | 1 +
83 gcc/config/aarch64/aarch64.c | 41 +++++-
84 gcc/config/aarch64/aarch64.h | 10 +-
85 gcc/config/aarch64/aarch64.md | 75 ++++++++---
86 .../sls-mitigation/sls-miti-retbr-pacret.c | 15 +++
87 .../aarch64/sls-mitigation/sls-miti-retbr.c | 119 ++++++++++++++++++
88 .../aarch64/sls-mitigation/sls-mitigation.exp | 73 +++++++++++
89 gcc/testsuite/lib/target-supports.exp | 3 +-
90 8 files changed, 312 insertions(+), 25 deletions(-)
91 create mode 100644 gcc/testsuite/gcc.target/aarch64/sls-mitigation/sls-miti-retbr-pacret.c
92 create mode 100644 gcc/testsuite/gcc.target/aarch64/sls-mitigation/sls-miti-retbr.c
93 create mode 100644 gcc/testsuite/gcc.target/aarch64/sls-mitigation/sls-mitigation.exp
94
95diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
96index 31493f412..885eae893 100644
97--- a/gcc/config/aarch64/aarch64-protos.h
98+++ b/gcc/config/aarch64/aarch64-protos.h
99@@ -644,6 +644,7 @@ poly_uint64 aarch64_regmode_natural_size (machine_mode);
100
101 bool aarch64_high_bits_all_ones_p (HOST_WIDE_INT);
102
103+const char *aarch64_sls_barrier (int);
104 extern bool aarch64_harden_sls_retbr_p (void);
105 extern bool aarch64_harden_sls_blr_p (void);
106
107diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
108index 269ff6c92..dff61105c 100644
109--- a/gcc/config/aarch64/aarch64.c
110+++ b/gcc/config/aarch64/aarch64.c
111@@ -8412,8 +8412,8 @@ aarch64_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
112 static void
113 aarch64_asm_trampoline_template (FILE *f)
114 {
115- int offset1 = 16;
116- int offset2 = 20;
117+ int offset1 = 24;
118+ int offset2 = 28;
119
120 if (aarch64_bti_enabled ())
121 {
122@@ -8436,6 +8436,17 @@ aarch64_asm_trampoline_template (FILE *f)
123 }
124 asm_fprintf (f, "\tbr\t%s\n", reg_names [IP1_REGNUM]);
125
126+ /* We always emit a speculation barrier.
127+ This is because the same trampoline template is used for every nested
128+ function. Since nested functions are not particularly common or
129+ performant we don't worry too much about the extra instructions to copy
130+ around.
131+ This is not yet a problem, since we have not yet implemented function
132+ specific attributes to choose between hardening against straight line
133+ speculation or not, but such function specific attributes are likely to
134+ happen in the future. */
135+ asm_fprintf (f, "\tdsb\tsy\n\tisb\n");
136+
137 /* The trampoline needs an extra padding instruction. In case if BTI is
138 enabled the padding instruction is replaced by the BTI instruction at
139 the beginning. */
140@@ -8450,10 +8461,14 @@ static void
141 aarch64_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
142 {
143 rtx fnaddr, mem, a_tramp;
144- const int tramp_code_sz = 16;
145+ const int tramp_code_sz = 24;
146
147 /* Don't need to copy the trailing D-words, we fill those in below. */
148- emit_block_move (m_tramp, assemble_trampoline_template (),
149+ /* We create our own memory address in Pmode so that `emit_block_move` can
150+ use parts of the backend which expect Pmode addresses. */
151+ rtx temp = convert_memory_address (Pmode, XEXP (m_tramp, 0));
152+ emit_block_move (gen_rtx_MEM (BLKmode, temp),
153+ assemble_trampoline_template (),
154 GEN_INT (tramp_code_sz), BLOCK_OP_NORMAL);
155 mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz);
156 fnaddr = XEXP (DECL_RTL (fndecl), 0);
157@@ -8640,6 +8655,8 @@ aarch64_output_casesi (rtx *operands)
158 output_asm_insn (buf, operands);
159 output_asm_insn (patterns[index][1], operands);
160 output_asm_insn ("br\t%3", operands);
161+ output_asm_insn (aarch64_sls_barrier (aarch64_harden_sls_retbr_p ()),
162+ operands);
163 assemble_label (asm_out_file, label);
164 return "";
165 }
166@@ -18976,6 +18993,22 @@ aarch64_file_end_indicate_exec_stack ()
167 #undef GNU_PROPERTY_AARCH64_FEATURE_1_BTI
168 #undef GNU_PROPERTY_AARCH64_FEATURE_1_AND
169
170+/* Helper function for straight line speculation.
171+ Return what barrier should be emitted for straight line speculation
172+ mitigation.
173+ When not mitigating against straight line speculation this function returns
174+ an empty string.
175+ When mitigating against straight line speculation, use:
176+ * SB when the v8.5-A SB extension is enabled.
177+ * DSB+ISB otherwise. */
178+const char *
179+aarch64_sls_barrier (int mitigation_required)
180+{
181+ return mitigation_required
182+ ? (TARGET_SB ? "sb" : "dsb\tsy\n\tisb")
183+ : "";
184+}
185+
186 /* Target-specific selftests. */
187
188 #if CHECKING_P
189diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
190index 772a97296..72ddc6fd9 100644
191--- a/gcc/config/aarch64/aarch64.h
192+++ b/gcc/config/aarch64/aarch64.h
193@@ -235,6 +235,7 @@ extern unsigned aarch64_architecture_version;
194 #define AARCH64_ISA_F16FML (aarch64_isa_flags & AARCH64_FL_F16FML)
195 #define AARCH64_ISA_RCPC8_4 (aarch64_isa_flags & AARCH64_FL_RCPC8_4)
196 #define AARCH64_ISA_V8_5 (aarch64_isa_flags & AARCH64_FL_V8_5)
197+#define AARCH64_ISA_SB (aarch64_isa_flags & AARCH64_FL_SB)
198
199 /* Crypto is an optional extension to AdvSIMD. */
200 #define TARGET_CRYPTO (TARGET_SIMD && AARCH64_ISA_CRYPTO)
201@@ -285,6 +286,9 @@ extern unsigned aarch64_architecture_version;
202 #define TARGET_FIX_ERR_A53_835769_DEFAULT 1
203 #endif
204
205+/* SB instruction is enabled through +sb. */
206+#define TARGET_SB (AARCH64_ISA_SB)
207+
208 /* Apply the workaround for Cortex-A53 erratum 835769. */
209 #define TARGET_FIX_ERR_A53_835769 \
210 ((aarch64_fix_a53_err835769 == 2) \
211@@ -931,8 +935,10 @@ typedef struct
212
213 #define RETURN_ADDR_RTX aarch64_return_addr
214
215-/* BTI c + 3 insns + 2 pointer-sized entries. */
216-#define TRAMPOLINE_SIZE (TARGET_ILP32 ? 24 : 32)
217+/* BTI c + 3 insns
218+ + sls barrier of DSB + ISB.
219+ + 2 pointer-sized entries. */
220+#define TRAMPOLINE_SIZE (24 + (TARGET_ILP32 ? 8 : 16))
221
222 /* Trampolines contain dwords, so must be dword aligned. */
223 #define TRAMPOLINE_ALIGNMENT 64
224diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
225index cc5a887d4..494aee964 100644
226--- a/gcc/config/aarch64/aarch64.md
227+++ b/gcc/config/aarch64/aarch64.md
228@@ -331,10 +331,25 @@
229 ;; Attribute that specifies whether the alternative uses MOVPRFX.
230 (define_attr "movprfx" "no,yes" (const_string "no"))
231
232+;; Attribute to specify that an alternative has the length of a single
233+;; instruction plus a speculation barrier.
234+(define_attr "sls_length" "none,retbr,casesi" (const_string "none"))
235+
236 (define_attr "length" ""
237 (cond [(eq_attr "movprfx" "yes")
238 (const_int 8)
239- ] (const_int 4)))
240+
241+ (eq_attr "sls_length" "retbr")
242+ (cond [(match_test "!aarch64_harden_sls_retbr_p ()") (const_int 4)
243+ (match_test "TARGET_SB") (const_int 8)]
244+ (const_int 12))
245+
246+ (eq_attr "sls_length" "casesi")
247+ (cond [(match_test "!aarch64_harden_sls_retbr_p ()") (const_int 16)
248+ (match_test "TARGET_SB") (const_int 20)]
249+ (const_int 24))
250+ ]
251+ (const_int 4)))
252
253 ;; Strictly for compatibility with AArch32 in pipeline models, since AArch64 has
254 ;; no predicated insns.
255@@ -370,8 +385,12 @@
256 (define_insn "indirect_jump"
257 [(set (pc) (match_operand:DI 0 "register_operand" "r"))]
258 ""
259- "br\\t%0"
260- [(set_attr "type" "branch")]
261+ {
262+ output_asm_insn ("br\\t%0", operands);
263+ return aarch64_sls_barrier (aarch64_harden_sls_retbr_p ());
264+ }
265+ [(set_attr "type" "branch")
266+ (set_attr "sls_length" "retbr")]
267 )
268
269 (define_insn "jump"
270@@ -657,7 +676,7 @@
271 "*
272 return aarch64_output_casesi (operands);
273 "
274- [(set_attr "length" "16")
275+ [(set_attr "sls_length" "casesi")
276 (set_attr "type" "branch")]
277 )
278
279@@ -736,14 +755,18 @@
280 [(return)]
281 ""
282 {
283+ const char *ret = NULL;
284 if (aarch64_return_address_signing_enabled ()
285 && TARGET_ARMV8_3
286 && !crtl->calls_eh_return)
287- return "retaa";
288-
289- return "ret";
290+ ret = "retaa";
291+ else
292+ ret = "ret";
293+ output_asm_insn (ret, operands);
294+ return aarch64_sls_barrier (aarch64_harden_sls_retbr_p ());
295 }
296- [(set_attr "type" "branch")]
297+ [(set_attr "type" "branch")
298+ (set_attr "sls_length" "retbr")]
299 )
300
301 (define_expand "return"
302@@ -755,8 +778,12 @@
303 (define_insn "simple_return"
304 [(simple_return)]
305 "aarch64_use_simple_return_insn_p ()"
306- "ret"
307- [(set_attr "type" "branch")]
308+ {
309+ output_asm_insn ("ret", operands);
310+ return aarch64_sls_barrier (aarch64_harden_sls_retbr_p ());
311+ }
312+ [(set_attr "type" "branch")
313+ (set_attr "sls_length" "retbr")]
314 )
315
316 (define_insn "*cb<optab><mode>1"
317@@ -947,10 +974,16 @@
318 (match_operand 1 "" ""))
319 (return)]
320 "SIBLING_CALL_P (insn)"
321- "@
322- br\\t%0
323- b\\t%c0"
324- [(set_attr "type" "branch, branch")]
325+ {
326+ if (which_alternative == 0)
327+ {
328+ output_asm_insn ("br\\t%0", operands);
329+ return aarch64_sls_barrier (aarch64_harden_sls_retbr_p ());
330+ }
331+ return "b\\t%c0";
332+ }
333+ [(set_attr "type" "branch, branch")
334+ (set_attr "sls_length" "retbr,none")]
335 )
336
337 (define_insn "*sibcall_value_insn"
338@@ -960,10 +993,16 @@
339 (match_operand 2 "" "")))
340 (return)]
341 "SIBLING_CALL_P (insn)"
342- "@
343- br\\t%1
344- b\\t%c1"
345- [(set_attr "type" "branch, branch")]
346+ {
347+ if (which_alternative == 0)
348+ {
349+ output_asm_insn ("br\\t%1", operands);
350+ return aarch64_sls_barrier (aarch64_harden_sls_retbr_p ());
351+ }
352+ return "b\\t%c1";
353+ }
354+ [(set_attr "type" "branch, branch")
355+ (set_attr "sls_length" "retbr,none")]
356 )
357
358 ;; Call subroutine returning any type.
359diff --git a/gcc/testsuite/gcc.target/aarch64/sls-mitigation/sls-miti-retbr-pacret.c b/gcc/testsuite/gcc.target/aarch64/sls-mitigation/sls-miti-retbr-pacret.c
360new file mode 100644
361index 000000000..7656123ee
362--- /dev/null
363+++ b/gcc/testsuite/gcc.target/aarch64/sls-mitigation/sls-miti-retbr-pacret.c
364@@ -0,0 +1,15 @@
365+/* Avoid ILP32 since pacret is only available for LP64 */
366+/* { dg-do compile { target { ! ilp32 } } } */
367+/* { dg-additional-options "-mharden-sls=retbr -mbranch-protection=pac-ret -march=armv8.3-a" } */
368+
369+/* Testing the do_return pattern for retaa. */
370+long retbr_subcall(void);
371+long retbr_do_return_retaa(void)
372+{
373+ return retbr_subcall()+1;
374+}
375+
376+/* Ensure there are no BR or RET instructions which are not directly followed
377+ by a speculation barrier. */
378+/* { dg-final { scan-assembler-not {\t(br|ret|retaa)\tx[0-9][0-9]?\n\t(?!dsb\tsy\n\tisb)} } } */
379+/* { dg-final { scan-assembler-not {ret\t} } } */
380diff --git a/gcc/testsuite/gcc.target/aarch64/sls-mitigation/sls-miti-retbr.c b/gcc/testsuite/gcc.target/aarch64/sls-mitigation/sls-miti-retbr.c
381new file mode 100644
382index 000000000..573b30cdc
383--- /dev/null
384+++ b/gcc/testsuite/gcc.target/aarch64/sls-mitigation/sls-miti-retbr.c
385@@ -0,0 +1,119 @@
386+/* We ensure that -Wpedantic is off since it complains about the trampolines
387+ we explicitly want to test. */
388+/* { dg-additional-options "-mharden-sls=retbr -Wno-pedantic " } */
389+/*
390+ Ensure that the SLS hardening of RET and BR leaves no unprotected RET/BR
391+ instructions.
392+ */
393+typedef int (foo) (int, int);
394+typedef void (bar) (int, int);
395+struct sls_testclass {
396+ foo *x;
397+ bar *y;
398+ int left;
399+ int right;
400+};
401+
402+int
403+retbr_sibcall_value_insn (struct sls_testclass x)
404+{
405+ return x.x(x.left, x.right);
406+}
407+
408+void
409+retbr_sibcall_insn (struct sls_testclass x)
410+{
411+ x.y(x.left, x.right);
412+}
413+
414+/* Aim to test two different returns.
415+ One that introduces a tail call in the middle of the function, and one that
416+ has a normal return. */
417+int
418+retbr_multiple_returns (struct sls_testclass x)
419+{
420+ int temp;
421+ if (x.left % 10)
422+ return x.x(x.left, 100);
423+ else if (x.right % 20)
424+ {
425+ return x.x(x.left * x.right, 100);
426+ }
427+ temp = x.left % x.right;
428+ temp *= 100;
429+ temp /= 2;
430+ return temp % 3;
431+}
432+
433+void
434+retbr_multiple_returns_void (struct sls_testclass x)
435+{
436+ if (x.left % 10)
437+ {
438+ x.y(x.left, 100);
439+ }
440+ else if (x.right % 20)
441+ {
442+ x.y(x.left * x.right, 100);
443+ }
444+ return;
445+}
446+
447+/* Testing the casesi jump via register. */
448+__attribute__ ((optimize ("Os")))
449+int
450+retbr_casesi_dispatch (struct sls_testclass x)
451+{
452+ switch (x.left)
453+ {
454+ case -5:
455+ return -2;
456+ case -3:
457+ return -1;
458+ case 0:
459+ return 0;
460+ case 3:
461+ return 1;
462+ case 5:
463+ break;
464+ default:
465+ __builtin_unreachable ();
466+ }
467+ return x.right;
468+}
469+
470+/* Testing the BR in trampolines is mitigated against. */
471+void f1 (void *);
472+void f3 (void *, void (*)(void *));
473+void f2 (void *);
474+
475+int
476+retbr_trampolines (void *a, int b)
477+{
478+ if (!b)
479+ {
480+ f1 (a);
481+ return 1;
482+ }
483+ if (b)
484+ {
485+ void retbr_tramp_internal (void *c)
486+ {
487+ if (c == a)
488+ f2 (c);
489+ }
490+ f3 (a, retbr_tramp_internal);
491+ }
492+ return 0;
493+}
494+
495+/* Testing the indirect_jump pattern. */
496+void
497+retbr_indirect_jump (int *buf)
498+{
499+ __builtin_longjmp(buf, 1);
500+}
501+
502+/* Ensure there are no BR or RET instructions which are not directly followed
503+ by a speculation barrier. */
504+/* { dg-final { scan-assembler-not {\t(br|ret|retaa)\tx[0-9][0-9]?\n\t(?!dsb\tsy\n\tisb|sb)} } } */
505diff --git a/gcc/testsuite/gcc.target/aarch64/sls-mitigation/sls-mitigation.exp b/gcc/testsuite/gcc.target/aarch64/sls-mitigation/sls-mitigation.exp
506new file mode 100644
507index 000000000..812250379
508--- /dev/null
509+++ b/gcc/testsuite/gcc.target/aarch64/sls-mitigation/sls-mitigation.exp
510@@ -0,0 +1,73 @@
511+# Regression driver for SLS mitigation on AArch64.
512+# Copyright (C) 2020 Free Software Foundation, Inc.
513+# Contributed by ARM Ltd.
514+#
515+# This file is part of GCC.
516+#
517+# GCC is free software; you can redistribute it and/or modify it
518+# under the terms of the GNU General Public License as published by
519+# the Free Software Foundation; either version 3, or (at your option)
520+# any later version.
521+#
522+# GCC is distributed in the hope that it will be useful, but
523+# WITHOUT ANY WARRANTY; without even the implied warranty of
524+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
525+# General Public License for more details.
526+#
527+# You should have received a copy of the GNU General Public License
528+# along with GCC; see the file COPYING3. If not see
529+# <http://www.gnu.org/licenses/>. */
530+
531+# Exit immediately if this isn't an AArch64 target.
532+if {![istarget aarch64*-*-*] } then {
533+ return
534+}
535+
536+# Load support procs.
537+load_lib gcc-dg.exp
538+load_lib torture-options.exp
539+
540+# If a testcase doesn't have special options, use these.
541+global DEFAULT_CFLAGS
542+if ![info exists DEFAULT_CFLAGS] then {
543+ set DEFAULT_CFLAGS " "
544+}
545+
546+# Initialize `dg'.
547+dg-init
548+torture-init
549+
550+# Use different architectures as well as the normal optimisation options.
551+# (i.e. use both SB and DSB+ISB barriers).
552+
553+set save-dg-do-what-default ${dg-do-what-default}
554+# Main loop.
555+# Run with torture tests (i.e. a bunch of different optimisation levels) just
556+# to increase test coverage.
557+set dg-do-what-default assemble
558+gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*.\[cCS\]]] \
559+ "-save-temps" $DEFAULT_CFLAGS
560+
561+# Run the same tests but this time with SB extension.
562+# Since not all supported assemblers will support that extension we decide
563+# whether to assemble or just compile based on whether the extension is
564+# supported for the available assembler.
565+
566+set templist {}
567+foreach x $DG_TORTURE_OPTIONS {
568+ lappend templist "$x -march=armv8.3-a+sb "
569+ lappend templist "$x -march=armv8-a+sb "
570+}
571+set-torture-options $templist
572+if { [check_effective_target_aarch64_asm_sb_ok] } {
573+ set dg-do-what-default assemble
574+} else {
575+ set dg-do-what-default compile
576+}
577+gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*.\[cCS\]]] \
578+ "-save-temps" $DEFAULT_CFLAGS
579+set dg-do-what-default ${save-dg-do-what-default}
580+
581+# All done.
582+torture-finish
583+dg-finish
584diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
585index ea9a50ccb..79482f9b6 100644
586--- a/gcc/testsuite/lib/target-supports.exp
587+++ b/gcc/testsuite/lib/target-supports.exp
588@@ -8579,7 +8579,8 @@ proc check_effective_target_aarch64_tiny { } {
589 # Create functions to check that the AArch64 assembler supports the
590 # various architecture extensions via the .arch_extension pseudo-op.
591
592-foreach { aarch64_ext } { "fp" "simd" "crypto" "crc" "lse" "dotprod" "sve"} {
593+foreach { aarch64_ext } { "fp" "simd" "crypto" "crc" "lse" "dotprod" "sve"
594+ "sb"} {
595 eval [string map [list FUNC $aarch64_ext] {
596 proc check_effective_target_aarch64_asm_FUNC_ok { } {
597 if { [istarget aarch64*-*-*] } {
598--
5992.25.1
600
diff --git a/meta/recipes-devtools/gcc/gcc-9.3/0003-aarch64-Mitigate-SLS-for-BLR-instruction.patch b/meta/recipes-devtools/gcc/gcc-9.3/0003-aarch64-Mitigate-SLS-for-BLR-instruction.patch
new file mode 100644
index 0000000000..6dffef0a34
--- /dev/null
+++ b/meta/recipes-devtools/gcc/gcc-9.3/0003-aarch64-Mitigate-SLS-for-BLR-instruction.patch
@@ -0,0 +1,659 @@
1CVE: CVE-2020-13844
2Upstream-Status: Backport
3Signed-off-by: Ross Burton <ross.burton@arm.com>
4
5From 2155170525f93093b90a1a065e7ed71a925566e9 Mon Sep 17 00:00:00 2001
6From: Matthew Malcomson <matthew.malcomson@arm.com>
7Date: Thu, 9 Jul 2020 09:11:59 +0100
8Subject: [PATCH 3/3] aarch64: Mitigate SLS for BLR instruction
9
10This patch introduces the mitigation for Straight Line Speculation past
11the BLR instruction.
12
13This mitigation replaces BLR instructions with a BL to a stub which uses
14a BR to jump to the original value. These function stubs are then
15appended with a speculation barrier to ensure no straight line
16speculation happens after these jumps.
17
18When optimising for speed we use a set of stubs for each function since
19this should help the branch predictor make more accurate predictions
20about where a stub should branch.
21
22When optimising for size we use one set of stubs for all functions.
23This set of stubs can have human readable names, and we are using
24`__call_indirect_x<N>` for register x<N>.
25
26When BTI branch protection is enabled the BLR instruction can jump to a
27`BTI c` instruction using any register, while the BR instruction can
28only jump to a `BTI c` instruction using the x16 or x17 registers.
29Hence, in order to ensure this transformation is safe we mov the value
30of the original register into x16 and use x16 for the BR.
31
32As an example when optimising for size:
33a
34 BLR x0
35instruction would get transformed to something like
36 BL __call_indirect_x0
37where __call_indirect_x0 labels a thunk that contains
38__call_indirect_x0:
39 MOV X16, X0
40 BR X16
41 <speculation barrier>
42
43The first version of this patch used local symbols specific to a
44compilation unit to try and avoid relocations.
45This was mistaken since functions coming from the same compilation unit
46can still be in different sections, and the assembler will insert
47relocations at jumps between sections.
48
49On any relocation the linker is permitted to emit a veneer to handle
50jumps between symbols that are very far apart. The registers x16 and
51x17 may be clobbered by these veneers.
52Hence the function stubs cannot rely on the values of x16 and x17 being
53the same as just before the function stub is called.
54
55Similar can be said for the hot/cold partitioning of single functions,
56so function-local stubs have the same restriction.
57
58This updated version of the patch never emits function stubs for x16 and
59x17, and instead forces other registers to be used.
60
61Given the above, there is now no benefit to local symbols (since they
62are not enough to avoid dealing with linker intricacies). This patch
63now uses global symbols with hidden visibility each stored in their own
64COMDAT section. This means stubs can be shared between compilation
65units while still avoiding the PLT indirection.
66
67This patch also removes the `__call_indirect_x30` stub (and
68function-local equivalent) which would simply jump back to the original
69location.
70
71The function-local stubs are emitted to the assembly output file in one
72chunk, which means we need not add the speculation barrier directly
73after each one.
74This is because we know for certain that the instructions directly after
75the BR in all but the last function stub will be from another one of
76these stubs and hence will not contain a speculation gadget.
77Instead we add a speculation barrier at the end of the sequence of
78stubs.
79
80The global stubs are emitted in COMDAT/.linkonce sections by
81themselves so that the linker can remove duplicates from multiple object
82files. This means they are not emitted in one chunk, and each one must
83include the speculation barrier.
84
85Another difference is that since the global stubs are shared across
86compilation units we do not know that all functions will be targeting an
87architecture supporting the SB instruction.
88Rather than provide multiple stubs for each architecture, we provide a
89stub that will work for all architectures -- using the DSB+ISB barrier.
90
91This mitigation does not apply for BLR instructions in the following
92places:
93- Some accesses to thread-local variables use a code sequence with a BLR
94 instruction. This code sequence is part of the binary interface between
95 compiler and linker. If this BLR instruction needs to be mitigated, it'd
96 probably be best to do so in the linker. It seems that the code sequence
97 for thread-local variable access is unlikely to lead to a Spectre Revalation
98 Gadget.
99- PLT stubs are produced by the linker and each contain a BLR instruction.
100 It seems that at most only after the last PLT stub a Spectre Revalation
101 Gadget might appear.
102
103Testing:
104 Bootstrap and regtest on AArch64
105 (with BOOT_CFLAGS="-mharden-sls=retbr,blr")
106 Used a temporary hack(1) in gcc-dg.exp to use these options on every
107 test in the testsuite, a slight modification to emit the speculation
108 barrier after every function stub, and a script to check that the
109 output never emitted a BLR, or unmitigated BR or RET instruction.
110 Similar on an aarch64-none-elf cross-compiler.
111
1121) Temporary hack emitted a speculation barrier at the end of every stub
113function, and used a script to ensure that:
114 a) Every RET or BR is immediately followed by a speculation barrier.
115 b) No BLR instruction is emitted by compiler.
116
117(cherry picked from 96b7f495f9269d5448822e4fc28882edb35a58d7)
118
119gcc/ChangeLog:
120
121 * config/aarch64/aarch64-protos.h (aarch64_indirect_call_asm):
122 New declaration.
123 * config/aarch64/aarch64.c (aarch64_regno_regclass): Handle new
124 stub registers class.
125 (aarch64_class_max_nregs): Likewise.
126 (aarch64_register_move_cost): Likewise.
127 (aarch64_sls_shared_thunks): Global array to store stub labels.
128 (aarch64_sls_emit_function_stub): New.
129 (aarch64_create_blr_label): New.
130 (aarch64_sls_emit_blr_function_thunks): New.
131 (aarch64_sls_emit_shared_blr_thunks): New.
132 (aarch64_asm_file_end): New.
133 (aarch64_indirect_call_asm): New.
134 (TARGET_ASM_FILE_END): Use aarch64_asm_file_end.
135 (TARGET_ASM_FUNCTION_EPILOGUE): Use
136 aarch64_sls_emit_blr_function_thunks.
137 * config/aarch64/aarch64.h (STB_REGNUM_P): New.
138 (enum reg_class): Add STUB_REGS class.
139 (machine_function): Introduce `call_via` array for
140 function-local stub labels.
141 * config/aarch64/aarch64.md (*call_insn, *call_value_insn): Use
142 aarch64_indirect_call_asm to emit code when hardening BLR
143 instructions.
144 * config/aarch64/constraints.md (Ucr): New constraint
145 representing registers for indirect calls. Is GENERAL_REGS
146 usually, and STUB_REGS when hardening BLR instruction against
147 SLS.
148 * config/aarch64/predicates.md (aarch64_general_reg): STUB_REGS class
149 is also a general register.
150
151gcc/testsuite/ChangeLog:
152
153 * gcc.target/aarch64/sls-mitigation/sls-miti-blr-bti.c: New test.
154 * gcc.target/aarch64/sls-mitigation/sls-miti-blr.c: New test.
155---
156 gcc/config/aarch64/aarch64-protos.h | 1 +
157 gcc/config/aarch64/aarch64.c | 225 +++++++++++++++++-
158 gcc/config/aarch64/aarch64.h | 15 ++
159 gcc/config/aarch64/aarch64.md | 11 +-
160 gcc/config/aarch64/constraints.md | 9 +
161 gcc/config/aarch64/predicates.md | 3 +-
162 .../aarch64/sls-mitigation/sls-miti-blr-bti.c | 40 ++++
163 .../aarch64/sls-mitigation/sls-miti-blr.c | 33 +++
164 8 files changed, 328 insertions(+), 9 deletions(-)
165 create mode 100644 gcc/testsuite/gcc.target/aarch64/sls-mitigation/sls-miti-blr-bti.c
166 create mode 100644 gcc/testsuite/gcc.target/aarch64/sls-mitigation/sls-miti-blr.c
167
168diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
169index 885eae893..2676e43ae 100644
170--- a/gcc/config/aarch64/aarch64-protos.h
171+++ b/gcc/config/aarch64/aarch64-protos.h
172@@ -645,6 +645,7 @@ poly_uint64 aarch64_regmode_natural_size (machine_mode);
173 bool aarch64_high_bits_all_ones_p (HOST_WIDE_INT);
174
175 const char *aarch64_sls_barrier (int);
176+const char *aarch64_indirect_call_asm (rtx);
177 extern bool aarch64_harden_sls_retbr_p (void);
178 extern bool aarch64_harden_sls_blr_p (void);
179
180diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
181index dff61105c..bc6c02c3a 100644
182--- a/gcc/config/aarch64/aarch64.c
183+++ b/gcc/config/aarch64/aarch64.c
184@@ -8190,6 +8190,9 @@ aarch64_label_mentioned_p (rtx x)
185 enum reg_class
186 aarch64_regno_regclass (unsigned regno)
187 {
188+ if (STUB_REGNUM_P (regno))
189+ return STUB_REGS;
190+
191 if (GP_REGNUM_P (regno))
192 return GENERAL_REGS;
193
194@@ -8499,6 +8502,7 @@ aarch64_class_max_nregs (reg_class_t regclass, machine_mode mode)
195 unsigned int nregs;
196 switch (regclass)
197 {
198+ case STUB_REGS:
199 case TAILCALL_ADDR_REGS:
200 case POINTER_REGS:
201 case GENERAL_REGS:
202@@ -10693,10 +10697,12 @@ aarch64_register_move_cost (machine_mode mode,
203 = aarch64_tune_params.regmove_cost;
204
205 /* Caller save and pointer regs are equivalent to GENERAL_REGS. */
206- if (to == TAILCALL_ADDR_REGS || to == POINTER_REGS)
207+ if (to == TAILCALL_ADDR_REGS || to == POINTER_REGS
208+ || to == STUB_REGS)
209 to = GENERAL_REGS;
210
211- if (from == TAILCALL_ADDR_REGS || from == POINTER_REGS)
212+ if (from == TAILCALL_ADDR_REGS || from == POINTER_REGS
213+ || from == STUB_REGS)
214 from = GENERAL_REGS;
215
216 /* Moving between GPR and stack cost is the same as GP2GP. */
217@@ -19009,6 +19015,215 @@ aarch64_sls_barrier (int mitigation_required)
218 : "";
219 }
220
221+static GTY (()) tree aarch64_sls_shared_thunks[30];
222+static GTY (()) bool aarch64_sls_shared_thunks_needed = false;
223+const char *indirect_symbol_names[30] = {
224+ "__call_indirect_x0",
225+ "__call_indirect_x1",
226+ "__call_indirect_x2",
227+ "__call_indirect_x3",
228+ "__call_indirect_x4",
229+ "__call_indirect_x5",
230+ "__call_indirect_x6",
231+ "__call_indirect_x7",
232+ "__call_indirect_x8",
233+ "__call_indirect_x9",
234+ "__call_indirect_x10",
235+ "__call_indirect_x11",
236+ "__call_indirect_x12",
237+ "__call_indirect_x13",
238+ "__call_indirect_x14",
239+ "__call_indirect_x15",
240+ "", /* "__call_indirect_x16", */
241+ "", /* "__call_indirect_x17", */
242+ "__call_indirect_x18",
243+ "__call_indirect_x19",
244+ "__call_indirect_x20",
245+ "__call_indirect_x21",
246+ "__call_indirect_x22",
247+ "__call_indirect_x23",
248+ "__call_indirect_x24",
249+ "__call_indirect_x25",
250+ "__call_indirect_x26",
251+ "__call_indirect_x27",
252+ "__call_indirect_x28",
253+ "__call_indirect_x29",
254+};
255+
256+/* Function to create a BLR thunk. This thunk is used to mitigate straight
257+ line speculation. Instead of a simple BLR that can be speculated past,
258+ we emit a BL to this thunk, and this thunk contains a BR to the relevant
259+ register. These thunks have the relevant speculation barries put after
260+ their indirect branch so that speculation is blocked.
261+
262+ We use such a thunk so the speculation barriers are kept off the
263+ architecturally executed path in order to reduce the performance overhead.
264+
265+ When optimizing for size we use stubs shared by the linked object.
266+ When optimizing for performance we emit stubs for each function in the hope
267+ that the branch predictor can better train on jumps specific for a given
268+ function. */
269+rtx
270+aarch64_sls_create_blr_label (int regnum)
271+{
272+ gcc_assert (STUB_REGNUM_P (regnum));
273+ if (optimize_function_for_size_p (cfun))
274+ {
275+ /* For the thunks shared between different functions in this compilation
276+ unit we use a named symbol -- this is just for users to more easily
277+ understand the generated assembly. */
278+ aarch64_sls_shared_thunks_needed = true;
279+ const char *thunk_name = indirect_symbol_names[regnum];
280+ if (aarch64_sls_shared_thunks[regnum] == NULL)
281+ {
282+ /* Build a decl representing this function stub and record it for
283+ later. We build a decl here so we can use the GCC machinery for
284+ handling sections automatically (through `get_named_section` and
285+ `make_decl_one_only`). That saves us a lot of trouble handling
286+ the specifics of different output file formats. */
287+ tree decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
288+ get_identifier (thunk_name),
289+ build_function_type_list (void_type_node,
290+ NULL_TREE));
291+ DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
292+ NULL_TREE, void_type_node);
293+ TREE_PUBLIC (decl) = 1;
294+ TREE_STATIC (decl) = 1;
295+ DECL_IGNORED_P (decl) = 1;
296+ DECL_ARTIFICIAL (decl) = 1;
297+ make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
298+ resolve_unique_section (decl, 0, false);
299+ aarch64_sls_shared_thunks[regnum] = decl;
300+ }
301+
302+ return gen_rtx_SYMBOL_REF (Pmode, thunk_name);
303+ }
304+
305+ if (cfun->machine->call_via[regnum] == NULL)
306+ cfun->machine->call_via[regnum]
307+ = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ());
308+ return cfun->machine->call_via[regnum];
309+}
310+
311+/* Helper function for aarch64_sls_emit_blr_function_thunks and
312+ aarch64_sls_emit_shared_blr_thunks below. */
313+static void
314+aarch64_sls_emit_function_stub (FILE *out_file, int regnum)
315+{
316+ /* Save in x16 and branch to that function so this transformation does
317+ not prevent jumping to `BTI c` instructions. */
318+ asm_fprintf (out_file, "\tmov\tx16, x%d\n", regnum);
319+ asm_fprintf (out_file, "\tbr\tx16\n");
320+}
321+
322+/* Emit all BLR stubs for this particular function.
323+ Here we emit all the BLR stubs needed for the current function. Since we
324+ emit these stubs in a consecutive block we know there will be no speculation
325+ gadgets between each stub, and hence we only emit a speculation barrier at
326+ the end of the stub sequences.
327+
328+ This is called in the TARGET_ASM_FUNCTION_EPILOGUE hook. */
329+void
330+aarch64_sls_emit_blr_function_thunks (FILE *out_file)
331+{
332+ if (! aarch64_harden_sls_blr_p ())
333+ return;
334+
335+ bool any_functions_emitted = false;
336+ /* We must save and restore the current function section since this assembly
337+ is emitted at the end of the function. This means it can be emitted *just
338+ after* the cold section of a function. That cold part would be emitted in
339+ a different section. That switch would trigger a `.cfi_endproc` directive
340+ to be emitted in the original section and a `.cfi_startproc` directive to
341+ be emitted in the new section. Switching to the original section without
342+ restoring would mean that the `.cfi_endproc` emitted as a function ends
343+ would happen in a different section -- leaving an unmatched
344+ `.cfi_startproc` in the cold text section and an unmatched `.cfi_endproc`
345+ in the standard text section. */
346+ section *save_text_section = in_section;
347+ switch_to_section (function_section (current_function_decl));
348+ for (int regnum = 0; regnum < 30; ++regnum)
349+ {
350+ rtx specu_label = cfun->machine->call_via[regnum];
351+ if (specu_label == NULL)
352+ continue;
353+
354+ targetm.asm_out.print_operand (out_file, specu_label, 0);
355+ asm_fprintf (out_file, ":\n");
356+ aarch64_sls_emit_function_stub (out_file, regnum);
357+ any_functions_emitted = true;
358+ }
359+ if (any_functions_emitted)
360+ /* Can use the SB if needs be here, since this stub will only be used
361+ by the current function, and hence for the current target. */
362+ asm_fprintf (out_file, "\t%s\n", aarch64_sls_barrier (true));
363+ switch_to_section (save_text_section);
364+}
365+
366+/* Emit shared BLR stubs for the current compilation unit.
367+ Over the course of compiling this unit we may have converted some BLR
368+ instructions to a BL to a shared stub function. This is where we emit those
369+ stub functions.
370+ This function is for the stubs shared between different functions in this
371+ compilation unit. We share when optimizing for size instead of speed.
372+
373+ This function is called through the TARGET_ASM_FILE_END hook. */
374+void
375+aarch64_sls_emit_shared_blr_thunks (FILE *out_file)
376+{
377+ if (! aarch64_sls_shared_thunks_needed)
378+ return;
379+
380+ for (int regnum = 0; regnum < 30; ++regnum)
381+ {
382+ tree decl = aarch64_sls_shared_thunks[regnum];
383+ if (!decl)
384+ continue;
385+
386+ const char *name = indirect_symbol_names[regnum];
387+ switch_to_section (get_named_section (decl, NULL, 0));
388+ ASM_OUTPUT_ALIGN (out_file, 2);
389+ targetm.asm_out.globalize_label (out_file, name);
390+ /* Only emits if the compiler is configured for an assembler that can
391+ handle visibility directives. */
392+ targetm.asm_out.assemble_visibility (decl, VISIBILITY_HIDDEN);
393+ ASM_OUTPUT_TYPE_DIRECTIVE (out_file, name, "function");
394+ ASM_OUTPUT_LABEL (out_file, name);
395+ aarch64_sls_emit_function_stub (out_file, regnum);
396+ /* Use the most conservative target to ensure it can always be used by any
397+ function in the translation unit. */
398+ asm_fprintf (out_file, "\tdsb\tsy\n\tisb\n");
399+ ASM_DECLARE_FUNCTION_SIZE (out_file, name, decl);
400+ }
401+}
402+
403+/* Implement TARGET_ASM_FILE_END. */
404+void
405+aarch64_asm_file_end ()
406+{
407+ aarch64_sls_emit_shared_blr_thunks (asm_out_file);
408+ /* Since this function will be called for the ASM_FILE_END hook, we ensure
409+ that what would be called otherwise (e.g. `file_end_indicate_exec_stack`
410+ for FreeBSD) still gets called. */
411+#ifdef TARGET_ASM_FILE_END
412+ TARGET_ASM_FILE_END ();
413+#endif
414+}
415+
416+const char *
417+aarch64_indirect_call_asm (rtx addr)
418+{
419+ gcc_assert (REG_P (addr));
420+ if (aarch64_harden_sls_blr_p ())
421+ {
422+ rtx stub_label = aarch64_sls_create_blr_label (REGNO (addr));
423+ output_asm_insn ("bl\t%0", &stub_label);
424+ }
425+ else
426+ output_asm_insn ("blr\t%0", &addr);
427+ return "";
428+}
429+
430 /* Target-specific selftests. */
431
432 #if CHECKING_P
433@@ -19529,6 +19744,12 @@ aarch64_libgcc_floating_mode_supported_p
434 #define TARGET_RUN_TARGET_SELFTESTS selftest::aarch64_run_selftests
435 #endif /* #if CHECKING_P */
436
437+#undef TARGET_ASM_FILE_END
438+#define TARGET_ASM_FILE_END aarch64_asm_file_end
439+
440+#undef TARGET_ASM_FUNCTION_EPILOGUE
441+#define TARGET_ASM_FUNCTION_EPILOGUE aarch64_sls_emit_blr_function_thunks
442+
443 struct gcc_target targetm = TARGET_INITIALIZER;
444
445 #include "gt-aarch64.h"
446diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
447index 72ddc6fd9..60682a100 100644
448--- a/gcc/config/aarch64/aarch64.h
449+++ b/gcc/config/aarch64/aarch64.h
450@@ -540,6 +540,16 @@ extern unsigned aarch64_architecture_version;
451 #define GP_REGNUM_P(REGNO) \
452 (((unsigned) (REGNO - R0_REGNUM)) <= (R30_REGNUM - R0_REGNUM))
453
454+/* Registers known to be preserved over a BL instruction. This consists of the
455+ GENERAL_REGS without x16, x17, and x30. The x30 register is changed by the
456+ BL instruction itself, while the x16 and x17 registers may be used by
457+ veneers which can be inserted by the linker. */
458+#define STUB_REGNUM_P(REGNO) \
459+ (GP_REGNUM_P (REGNO) \
460+ && (REGNO) != R16_REGNUM \
461+ && (REGNO) != R17_REGNUM \
462+ && (REGNO) != R30_REGNUM) \
463+
464 #define FP_REGNUM_P(REGNO) \
465 (((unsigned) (REGNO - V0_REGNUM)) <= (V31_REGNUM - V0_REGNUM))
466
467@@ -561,6 +571,7 @@ enum reg_class
468 {
469 NO_REGS,
470 TAILCALL_ADDR_REGS,
471+ STUB_REGS,
472 GENERAL_REGS,
473 STACK_REG,
474 POINTER_REGS,
475@@ -580,6 +591,7 @@ enum reg_class
476 { \
477 "NO_REGS", \
478 "TAILCALL_ADDR_REGS", \
479+ "STUB_REGS", \
480 "GENERAL_REGS", \
481 "STACK_REG", \
482 "POINTER_REGS", \
483@@ -596,6 +608,7 @@ enum reg_class
484 { \
485 { 0x00000000, 0x00000000, 0x00000000 }, /* NO_REGS */ \
486 { 0x00030000, 0x00000000, 0x00000000 }, /* TAILCALL_ADDR_REGS */\
487+ { 0x3ffcffff, 0x00000000, 0x00000000 }, /* STUB_REGS */ \
488 { 0x7fffffff, 0x00000000, 0x00000003 }, /* GENERAL_REGS */ \
489 { 0x80000000, 0x00000000, 0x00000000 }, /* STACK_REG */ \
490 { 0xffffffff, 0x00000000, 0x00000003 }, /* POINTER_REGS */ \
491@@ -735,6 +748,8 @@ typedef struct GTY (()) machine_function
492 struct aarch64_frame frame;
493 /* One entry for each hard register. */
494 bool reg_is_wrapped_separately[LAST_SAVED_REGNUM];
495+ /* One entry for each general purpose register. */
496+ rtx call_via[SP_REGNUM];
497 bool label_is_assembled;
498 } machine_function;
499 #endif
500diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
501index 494aee964..ed8cf8ece 100644
502--- a/gcc/config/aarch64/aarch64.md
503+++ b/gcc/config/aarch64/aarch64.md
504@@ -908,15 +908,14 @@
505 )
506
507 (define_insn "*call_insn"
508- [(call (mem:DI (match_operand:DI 0 "aarch64_call_insn_operand" "r, Usf"))
509+ [(call (mem:DI (match_operand:DI 0 "aarch64_call_insn_operand" "Ucr, Usf"))
510 (match_operand 1 "" ""))
511 (clobber (reg:DI LR_REGNUM))]
512 ""
513 "@
514- blr\\t%0
515+ * return aarch64_indirect_call_asm (operands[0]);
516 bl\\t%c0"
517- [(set_attr "type" "call, call")]
518-)
519+ [(set_attr "type" "call, call")])
520
521 (define_expand "call_value"
522 [(parallel [(set (match_operand 0 "" "")
523@@ -934,12 +933,12 @@
524
525 (define_insn "*call_value_insn"
526 [(set (match_operand 0 "" "")
527- (call (mem:DI (match_operand:DI 1 "aarch64_call_insn_operand" "r, Usf"))
528+ (call (mem:DI (match_operand:DI 1 "aarch64_call_insn_operand" "Ucr, Usf"))
529 (match_operand 2 "" "")))
530 (clobber (reg:DI LR_REGNUM))]
531 ""
532 "@
533- blr\\t%1
534+ * return aarch64_indirect_call_asm (operands[1]);
535 bl\\t%c1"
536 [(set_attr "type" "call, call")]
537 )
538diff --git a/gcc/config/aarch64/constraints.md b/gcc/config/aarch64/constraints.md
539index 21f9549e6..7756dbe83 100644
540--- a/gcc/config/aarch64/constraints.md
541+++ b/gcc/config/aarch64/constraints.md
542@@ -24,6 +24,15 @@
543 (define_register_constraint "Ucs" "TAILCALL_ADDR_REGS"
544 "@internal Registers suitable for an indirect tail call")
545
546+(define_register_constraint "Ucr"
547+ "aarch64_harden_sls_blr_p () ? STUB_REGS : GENERAL_REGS"
548+ "@internal Registers to be used for an indirect call.
549+ This is usually the general registers, but when we are hardening against
550+ Straight Line Speculation we disallow x16, x17, and x30 so we can use
551+ indirection stubs. These indirection stubs cannot use the above registers
552+ since they will be reached by a BL that may have to go through a linker
553+ veneer.")
554+
555 (define_register_constraint "w" "FP_REGS"
556 "Floating point and SIMD vector registers.")
557
558diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md
559index 8e1b78421..4250aecb3 100644
560--- a/gcc/config/aarch64/predicates.md
561+++ b/gcc/config/aarch64/predicates.md
562@@ -32,7 +32,8 @@
563
564 (define_predicate "aarch64_general_reg"
565 (and (match_operand 0 "register_operand")
566- (match_test "REGNO_REG_CLASS (REGNO (op)) == GENERAL_REGS")))
567+ (match_test "REGNO_REG_CLASS (REGNO (op)) == STUB_REGS
568+ || REGNO_REG_CLASS (REGNO (op)) == GENERAL_REGS")))
569
570 ;; Return true if OP a (const_int 0) operand.
571 (define_predicate "const0_operand"
572diff --git a/gcc/testsuite/gcc.target/aarch64/sls-mitigation/sls-miti-blr-bti.c b/gcc/testsuite/gcc.target/aarch64/sls-mitigation/sls-miti-blr-bti.c
573new file mode 100644
574index 000000000..b1fb754c7
575--- /dev/null
576+++ b/gcc/testsuite/gcc.target/aarch64/sls-mitigation/sls-miti-blr-bti.c
577@@ -0,0 +1,40 @@
578+/* { dg-do compile } */
579+/* { dg-additional-options "-mharden-sls=blr -mbranch-protection=bti" } */
580+/*
581+ Ensure that the SLS hardening of BLR leaves no BLR instructions.
582+ Here we also check that there are no BR instructions with anything except an
583+ x16 or x17 register. This is because a `BTI c` instruction can be branched
584+ to using a BLR instruction using any register, but can only be branched to
585+ with a BR using an x16 or x17 register.
586+ */
587+typedef int (foo) (int, int);
588+typedef void (bar) (int, int);
589+struct sls_testclass {
590+ foo *x;
591+ bar *y;
592+ int left;
593+ int right;
594+};
595+
596+/* We test both RTL patterns for a call which returns a value and a call which
597+ does not. */
598+int blr_call_value (struct sls_testclass x)
599+{
600+ int retval = x.x(x.left, x.right);
601+ if (retval % 10)
602+ return 100;
603+ return 9;
604+}
605+
606+int blr_call (struct sls_testclass x)
607+{
608+ x.y(x.left, x.right);
609+ if (x.left % 10)
610+ return 100;
611+ return 9;
612+}
613+
614+/* { dg-final { scan-assembler-not {\tblr\t} } } */
615+/* { dg-final { scan-assembler-not {\tbr\tx(?!16|17)} } } */
616+/* { dg-final { scan-assembler {\tbr\tx(16|17)} } } */
617+
618diff --git a/gcc/testsuite/gcc.target/aarch64/sls-mitigation/sls-miti-blr.c b/gcc/testsuite/gcc.target/aarch64/sls-mitigation/sls-miti-blr.c
619new file mode 100644
620index 000000000..88baffffe
621--- /dev/null
622+++ b/gcc/testsuite/gcc.target/aarch64/sls-mitigation/sls-miti-blr.c
623@@ -0,0 +1,33 @@
624+/* { dg-additional-options "-mharden-sls=blr -save-temps" } */
625+/* Ensure that the SLS hardening of BLR leaves no BLR instructions.
626+ We only test that all BLR instructions have been removed, not that the
627+ resulting code makes sense. */
628+typedef int (foo) (int, int);
629+typedef void (bar) (int, int);
630+struct sls_testclass {
631+ foo *x;
632+ bar *y;
633+ int left;
634+ int right;
635+};
636+
637+/* We test both RTL patterns for a call which returns a value and a call which
638+ does not. */
639+int blr_call_value (struct sls_testclass x)
640+{
641+ int retval = x.x(x.left, x.right);
642+ if (retval % 10)
643+ return 100;
644+ return 9;
645+}
646+
647+int blr_call (struct sls_testclass x)
648+{
649+ x.y(x.left, x.right);
650+ if (x.left % 10)
651+ return 100;
652+ return 9;
653+}
654+
655+/* { dg-final { scan-assembler-not {\tblr\t} } } */
656+/* { dg-final { scan-assembler {\tbr\tx[0-9][0-9]?} } } */
657--
6582.25.1
659