summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--meta/recipes-devtools/gcc/gcc-10.1.inc3
-rw-r--r--meta/recipes-devtools/gcc/gcc-10.1/0001-aarch64-New-Straight-Line-Speculation-SLS-mitigation.patch202
-rw-r--r--meta/recipes-devtools/gcc/gcc-10.1/0002-aarch64-Introduce-SLS-mitigation-for-RET-and-BR-inst.patch607
-rw-r--r--meta/recipes-devtools/gcc/gcc-10.1/0003-aarch64-Mitigate-SLS-for-BLR-instruction.patch658
4 files changed, 1470 insertions, 0 deletions
diff --git a/meta/recipes-devtools/gcc/gcc-10.1.inc b/meta/recipes-devtools/gcc/gcc-10.1.inc
index a3de91a2c6..7c1201a2e3 100644
--- a/meta/recipes-devtools/gcc/gcc-10.1.inc
+++ b/meta/recipes-devtools/gcc/gcc-10.1.inc
@@ -66,6 +66,9 @@ SRC_URI = "\
66 file://0036-Enable-CET-in-cross-compiler-if-possible.patch \ 66 file://0036-Enable-CET-in-cross-compiler-if-possible.patch \
67 file://0037-mingw32-Enable-operation_not_supported.patch \ 67 file://0037-mingw32-Enable-operation_not_supported.patch \
68 file://0038-libatomic-Do-not-enforce-march-on-aarch64.patch \ 68 file://0038-libatomic-Do-not-enforce-march-on-aarch64.patch \
69 file://0001-aarch64-New-Straight-Line-Speculation-SLS-mitigation.patch \
70 file://0002-aarch64-Introduce-SLS-mitigation-for-RET-and-BR-inst.patch \
71 file://0003-aarch64-Mitigate-SLS-for-BLR-instruction.patch \
69" 72"
70SRC_URI[sha256sum] = "b6898a23844b656f1b68691c5c012036c2e694ac4b53a8918d4712ad876e7ea2" 73SRC_URI[sha256sum] = "b6898a23844b656f1b68691c5c012036c2e694ac4b53a8918d4712ad876e7ea2"
71 74
diff --git a/meta/recipes-devtools/gcc/gcc-10.1/0001-aarch64-New-Straight-Line-Speculation-SLS-mitigation.patch b/meta/recipes-devtools/gcc/gcc-10.1/0001-aarch64-New-Straight-Line-Speculation-SLS-mitigation.patch
new file mode 100644
index 0000000000..73de4c7590
--- /dev/null
+++ b/meta/recipes-devtools/gcc/gcc-10.1/0001-aarch64-New-Straight-Line-Speculation-SLS-mitigation.patch
@@ -0,0 +1,202 @@
1CVE: CVE-2020-13844
2Upstream-Status: Backport
3Signed-off-by: Ross Burton <ross.burton@arm.com>
4
5From 1ff243934ac443b5f58cd02a5012ce58ecc31fb2 Mon Sep 17 00:00:00 2001
6From: Matthew Malcomson <matthew.malcomson@arm.com>
7Date: Thu, 9 Jul 2020 09:11:58 +0100
8Subject: [PATCH 1/3] aarch64: New Straight Line Speculation (SLS) mitigation
9 flags
10
11Here we introduce the flags that will be used for straight line speculation.
12
13The new flag introduced is `-mharden-sls=`.
14This flag can take arguments of `none`, `all`, or a comma seperated list of one
15or more of `retbr` or `blr`.
16`none` indicates no special mitigation of the straight line speculation
17vulnerability.
18`all` requests all mitigations currently implemented.
19`retbr` requests that the RET and BR instructions have a speculation barrier
20inserted after them.
21`blr` requests that BLR instructions are replaced by a BL to a function stub
22using a BR with a speculation barrier after it.
23
24Setting this on a per-function basis using attributes or the like is not
25enabled, but may be in the future.
26
27gcc/ChangeLog:
28
292020-06-02 Matthew Malcomson <matthew.malcomson@arm.com>
30
31 * config/aarch64/aarch64-protos.h (aarch64_harden_sls_retbr_p):
32 New.
33 (aarch64_harden_sls_blr_p): New.
34 * config/aarch64/aarch64.c (enum aarch64_sls_hardening_type):
35 New.
36 (aarch64_harden_sls_retbr_p): New.
37 (aarch64_harden_sls_blr_p): New.
38 (aarch64_validate_sls_mitigation): New.
39 (aarch64_override_options): Parse options for SLS mitigation.
40 * config/aarch64/aarch64.opt (-mharden-sls): New option.
41 * doc/invoke.texi: Document new option.
42---
43 gcc/config/aarch64/aarch64-protos.h | 3 ++
44 gcc/config/aarch64/aarch64.c | 76 +++++++++++++++++++++++++++++++++++++
45 gcc/config/aarch64/aarch64.opt | 4 ++
46 gcc/doc/invoke.texi | 12 ++++++
47 4 files changed, 95 insertions(+)
48
49diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
50index 723d9ba..eb5f4b4 100644
51--- a/gcc/config/aarch64/aarch64-protos.h
52+++ b/gcc/config/aarch64/aarch64-protos.h
53@@ -781,4 +781,7 @@ extern const atomic_ool_names aarch64_ool_ldeor_names;
54
55 tree aarch64_resolve_overloaded_builtin_general (location_t, tree, void *);
56
57+extern bool aarch64_harden_sls_retbr_p (void);
58+extern bool aarch64_harden_sls_blr_p (void);
59+
60 #endif /* GCC_AARCH64_PROTOS_H */
61diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
62index b86434a..437a9cf 100644
63--- a/gcc/config/aarch64/aarch64.c
64+++ b/gcc/config/aarch64/aarch64.c
65@@ -14494,6 +14494,79 @@ aarch64_validate_mcpu (const char *str, const struct processor **res,
66 return false;
67 }
68
69+/* Straight line speculation indicators. */
70+enum aarch64_sls_hardening_type
71+{
72+ SLS_NONE = 0,
73+ SLS_RETBR = 1,
74+ SLS_BLR = 2,
75+ SLS_ALL = 3,
76+};
77+static enum aarch64_sls_hardening_type aarch64_sls_hardening;
78+
79+/* Return whether we should mitigatate Straight Line Speculation for the RET
80+ and BR instructions. */
81+bool
82+aarch64_harden_sls_retbr_p (void)
83+{
84+ return aarch64_sls_hardening & SLS_RETBR;
85+}
86+
87+/* Return whether we should mitigatate Straight Line Speculation for the BLR
88+ instruction. */
89+bool
90+aarch64_harden_sls_blr_p (void)
91+{
92+ return aarch64_sls_hardening & SLS_BLR;
93+}
94+
95+/* As of yet we only allow setting these options globally, in the future we may
96+ allow setting them per function. */
97+static void
98+aarch64_validate_sls_mitigation (const char *const_str)
99+{
100+ char *token_save = NULL;
101+ char *str = NULL;
102+
103+ if (strcmp (const_str, "none") == 0)
104+ {
105+ aarch64_sls_hardening = SLS_NONE;
106+ return;
107+ }
108+ if (strcmp (const_str, "all") == 0)
109+ {
110+ aarch64_sls_hardening = SLS_ALL;
111+ return;
112+ }
113+
114+ char *str_root = xstrdup (const_str);
115+ str = strtok_r (str_root, ",", &token_save);
116+ if (!str)
117+ error ("invalid argument given to %<-mharden-sls=%>");
118+
119+ int temp = SLS_NONE;
120+ while (str)
121+ {
122+ if (strcmp (str, "blr") == 0)
123+ temp |= SLS_BLR;
124+ else if (strcmp (str, "retbr") == 0)
125+ temp |= SLS_RETBR;
126+ else if (strcmp (str, "none") == 0 || strcmp (str, "all") == 0)
127+ {
128+ error ("%<%s%> must be by itself for %<-mharden-sls=%>", str);
129+ break;
130+ }
131+ else
132+ {
133+ error ("invalid argument %<%s%> for %<-mharden-sls=%>", str);
134+ break;
135+ }
136+ str = strtok_r (NULL, ",", &token_save);
137+ }
138+ aarch64_sls_hardening = (aarch64_sls_hardening_type) temp;
139+ free (str_root);
140+}
141+
142 /* Parses CONST_STR for branch protection features specified in
143 aarch64_branch_protect_types, and set any global variables required. Returns
144 the parsing result and assigns LAST_STR to the last processed token from
145@@ -14738,6 +14811,9 @@ aarch64_override_options (void)
146 selected_arch = NULL;
147 selected_tune = NULL;
148
149+ if (aarch64_harden_sls_string)
150+ aarch64_validate_sls_mitigation (aarch64_harden_sls_string);
151+
152 if (aarch64_branch_protection_string)
153 aarch64_validate_mbranch_protection (aarch64_branch_protection_string);
154
155diff --git a/gcc/config/aarch64/aarch64.opt b/gcc/config/aarch64/aarch64.opt
156index d99d14c..5170361 100644
157--- a/gcc/config/aarch64/aarch64.opt
158+++ b/gcc/config/aarch64/aarch64.opt
159@@ -71,6 +71,10 @@ mgeneral-regs-only
160 Target Report RejectNegative Mask(GENERAL_REGS_ONLY) Save
161 Generate code which uses only the general registers.
162
163+mharden-sls=
164+Target RejectNegative Joined Var(aarch64_harden_sls_string)
165+Generate code to mitigate against straight line speculation.
166+
167 mfix-cortex-a53-835769
168 Target Report Var(aarch64_fix_a53_err835769) Init(2) Save
169 Workaround for ARM Cortex-A53 Erratum number 835769.
170diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
171index a2794a6..bd5b77a 100644
172--- a/gcc/doc/invoke.texi
173+++ b/gcc/doc/invoke.texi
174@@ -696,6 +696,7 @@ Objective-C and Objective-C++ Dialects}.
175 -msign-return-address=@var{scope} @gol
176 -mbranch-protection=@var{none}|@var{standard}|@var{pac-ret}[+@var{leaf}
177 +@var{b-key}]|@var{bti} @gol
178+-mharden-sls=@var{opts} @gol
179 -march=@var{name} -mcpu=@var{name} -mtune=@var{name} @gol
180 -moverride=@var{string} -mverbose-cost-dump @gol
181 -mstack-protector-guard=@var{guard} -mstack-protector-guard-reg=@var{sysreg} @gol
182@@ -17065,6 +17066,17 @@ functions. The optional argument @samp{b-key} can be used to sign the functions
183 with the B-key instead of the A-key.
184 @samp{bti} turns on branch target identification mechanism.
185
186+@item -mharden-sls=@var{opts}
187+@opindex mharden-sls
188+Enable compiler hardening against straight line speculation (SLS).
189+@var{opts} is a comma-separated list of the following options:
190+@table @samp
191+@item retbr
192+@item blr
193+@end table
194+In addition, @samp{-mharden-sls=all} enables all SLS hardening while
195+@samp{-mharden-sls=none} disables all SLS hardening.
196+
197 @item -msve-vector-bits=@var{bits}
198 @opindex msve-vector-bits
199 Specify the number of bits in an SVE vector register. This option only has
200--
2012.7.4
202
diff --git a/meta/recipes-devtools/gcc/gcc-10.1/0002-aarch64-Introduce-SLS-mitigation-for-RET-and-BR-inst.patch b/meta/recipes-devtools/gcc/gcc-10.1/0002-aarch64-Introduce-SLS-mitigation-for-RET-and-BR-inst.patch
new file mode 100644
index 0000000000..823cc8b668
--- /dev/null
+++ b/meta/recipes-devtools/gcc/gcc-10.1/0002-aarch64-Introduce-SLS-mitigation-for-RET-and-BR-inst.patch
@@ -0,0 +1,607 @@
1Upstream-Status: Backport
2Signed-off-by: Ross Burton <ross.burton@arm.com>
3
4From b1204d16e1ec96a4aa89e44de8990e2499ffdb22 Mon Sep 17 00:00:00 2001
5From: Matthew Malcomson <matthew.malcomson@arm.com>
6Date: Thu, 9 Jul 2020 09:11:59 +0100
7Subject: [PATCH 2/3] aarch64: Introduce SLS mitigation for RET and BR
8 instructions
9
10Instructions following RET or BR are not necessarily executed. In order
11to avoid speculation past RET and BR we can simply append a speculation
12barrier.
13
14Since these speculation barriers will not be architecturally executed,
15they are not expected to add a high performance penalty.
16
17The speculation barrier is to be SB when targeting architectures which
18have this enabled, and DSB SY + ISB otherwise.
19
20We add tests for each of the cases where such an instruction was seen.
21
22This is implemented by modifying each machine description pattern that
23emits either a RET or a BR instruction. We choose not to use something
24like `TARGET_ASM_FUNCTION_EPILOGUE` since it does not affect the
25`indirect_jump`, `jump`, `sibcall_insn` and `sibcall_value_insn`
26patterns and we find it preferable to implement the functionality in the
27same way for every pattern.
28
29There is one particular case which is slightly tricky. The
30implementation of TARGET_ASM_TRAMPOLINE_TEMPLATE uses a BR which needs
31to be mitigated against. The trampoline template is used *once* per
32compilation unit, and the TRAMPOLINE_SIZE is exposed to the user via the
33builtin macro __LIBGCC_TRAMPOLINE_SIZE__.
34In the future we may implement function specific attributes to turn on
35and off hardening on a per-function basis.
36The fixed nature of the trampoline described above implies it will be
37safer to ensure this speculation barrier is always used.
38
39Testing:
40 Bootstrap and regtest done on aarch64-none-linux
41 Used a temporary hack(1) to use these options on every test in the
42 testsuite and a script to check that the output never emitted an
43 unmitigated RET or BR.
44
451) Temporary hack was a change to the testsuite to always use
46`-save-temps` and run a script on the assembly output of those
47compilations which produced one to ensure every RET or BR is immediately
48followed by a speculation barrier.
49
50gcc/ChangeLog:
51
52 * config/aarch64/aarch64-protos.h (aarch64_sls_barrier): New.
53 * config/aarch64/aarch64.c (aarch64_output_casesi): Emit
54 speculation barrier after BR instruction if needs be.
55 (aarch64_trampoline_init): Handle ptr_mode value & adjust size
56 of code copied.
57 (aarch64_sls_barrier): New.
58 (aarch64_asm_trampoline_template): Add needed barriers.
59 * config/aarch64/aarch64.h (AARCH64_ISA_SB): New.
60 (TARGET_SB): New.
61 (TRAMPOLINE_SIZE): Account for barrier.
62 * config/aarch64/aarch64.md (indirect_jump, *casesi_dispatch,
63 simple_return, *do_return, *sibcall_insn, *sibcall_value_insn):
64 Emit barrier if needs be, also account for possible barrier using
65 "sls_length" attribute.
66 (sls_length): New attribute.
67 (length): Determine default using any non-default sls_length
68 value.
69
70gcc/testsuite/ChangeLog:
71
72 * gcc.target/aarch64/sls-mitigation/sls-miti-retbr.c: New test.
73 * gcc.target/aarch64/sls-mitigation/sls-miti-retbr-pacret.c:
74 New test.
75 * gcc.target/aarch64/sls-mitigation/sls-mitigation.exp: New file.
76 * lib/target-supports.exp (check_effective_target_aarch64_asm_sb_ok):
77 New proc.
78---
79 gcc/config/aarch64/aarch64-protos.h | 1 +
80 gcc/config/aarch64/aarch64.c | 41 ++++++-
81 gcc/config/aarch64/aarch64.h | 10 +-
82 gcc/config/aarch64/aarch64.md | 76 +++++++++----
83 .../aarch64/sls-mitigation/sls-miti-retbr-pacret.c | 21 ++++
84 .../aarch64/sls-mitigation/sls-miti-retbr.c | 119 +++++++++++++++++++++
85 .../aarch64/sls-mitigation/sls-mitigation.exp | 73 +++++++++++++
86 gcc/testsuite/lib/target-supports.exp | 2 +-
87 8 files changed, 318 insertions(+), 25 deletions(-)
88 create mode 100644 gcc/testsuite/gcc.target/aarch64/sls-mitigation/sls-miti-retbr-pacret.c
89 create mode 100644 gcc/testsuite/gcc.target/aarch64/sls-mitigation/sls-miti-retbr.c
90 create mode 100644 gcc/testsuite/gcc.target/aarch64/sls-mitigation/sls-mitigation.exp
91
92diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
93index eb5f4b4..ee0ffde 100644
94--- a/gcc/config/aarch64/aarch64-protos.h
95+++ b/gcc/config/aarch64/aarch64-protos.h
96@@ -781,6 +781,7 @@ extern const atomic_ool_names aarch64_ool_ldeor_names;
97
98 tree aarch64_resolve_overloaded_builtin_general (location_t, tree, void *);
99
100+const char *aarch64_sls_barrier (int);
101 extern bool aarch64_harden_sls_retbr_p (void);
102 extern bool aarch64_harden_sls_blr_p (void);
103
104diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
105index 437a9cf..44e3d1f 100644
106--- a/gcc/config/aarch64/aarch64.c
107+++ b/gcc/config/aarch64/aarch64.c
108@@ -10852,8 +10852,8 @@ aarch64_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
109 static void
110 aarch64_asm_trampoline_template (FILE *f)
111 {
112- int offset1 = 16;
113- int offset2 = 20;
114+ int offset1 = 24;
115+ int offset2 = 28;
116
117 if (aarch64_bti_enabled ())
118 {
119@@ -10876,6 +10876,17 @@ aarch64_asm_trampoline_template (FILE *f)
120 }
121 asm_fprintf (f, "\tbr\t%s\n", reg_names [IP1_REGNUM]);
122
123+ /* We always emit a speculation barrier.
124+ This is because the same trampoline template is used for every nested
125+ function. Since nested functions are not particularly common or
126+ performant we don't worry too much about the extra instructions to copy
127+ around.
128+ This is not yet a problem, since we have not yet implemented function
129+ specific attributes to choose between hardening against straight line
130+ speculation or not, but such function specific attributes are likely to
131+ happen in the future. */
132+ asm_fprintf (f, "\tdsb\tsy\n\tisb\n");
133+
134 /* The trampoline needs an extra padding instruction. In case if BTI is
135 enabled the padding instruction is replaced by the BTI instruction at
136 the beginning. */
137@@ -10890,10 +10901,14 @@ static void
138 aarch64_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
139 {
140 rtx fnaddr, mem, a_tramp;
141- const int tramp_code_sz = 16;
142+ const int tramp_code_sz = 24;
143
144 /* Don't need to copy the trailing D-words, we fill those in below. */
145- emit_block_move (m_tramp, assemble_trampoline_template (),
146+ /* We create our own memory address in Pmode so that `emit_block_move` can
147+ use parts of the backend which expect Pmode addresses. */
148+ rtx temp = convert_memory_address (Pmode, XEXP (m_tramp, 0));
149+ emit_block_move (gen_rtx_MEM (BLKmode, temp),
150+ assemble_trampoline_template (),
151 GEN_INT (tramp_code_sz), BLOCK_OP_NORMAL);
152 mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz);
153 fnaddr = XEXP (DECL_RTL (fndecl), 0);
154@@ -11084,6 +11099,8 @@ aarch64_output_casesi (rtx *operands)
155 output_asm_insn (buf, operands);
156 output_asm_insn (patterns[index][1], operands);
157 output_asm_insn ("br\t%3", operands);
158+ output_asm_insn (aarch64_sls_barrier (aarch64_harden_sls_retbr_p ()),
159+ operands);
160 assemble_label (asm_out_file, label);
161 return "";
162 }
163@@ -22924,6 +22941,22 @@ aarch64_file_end_indicate_exec_stack ()
164 #undef GNU_PROPERTY_AARCH64_FEATURE_1_BTI
165 #undef GNU_PROPERTY_AARCH64_FEATURE_1_AND
166
167+/* Helper function for straight line speculation.
168+ Return what barrier should be emitted for straight line speculation
169+ mitigation.
170+ When not mitigating against straight line speculation this function returns
171+ an empty string.
172+ When mitigating against straight line speculation, use:
173+ * SB when the v8.5-A SB extension is enabled.
174+ * DSB+ISB otherwise. */
175+const char *
176+aarch64_sls_barrier (int mitigation_required)
177+{
178+ return mitigation_required
179+ ? (TARGET_SB ? "sb" : "dsb\tsy\n\tisb")
180+ : "";
181+}
182+
183 /* Target-specific selftests. */
184
185 #if CHECKING_P
186diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
187index 1ce23c6..c21015f 100644
188--- a/gcc/config/aarch64/aarch64.h
189+++ b/gcc/config/aarch64/aarch64.h
190@@ -281,6 +281,7 @@ extern unsigned aarch64_architecture_version;
191 #define AARCH64_ISA_F32MM (aarch64_isa_flags & AARCH64_FL_F32MM)
192 #define AARCH64_ISA_F64MM (aarch64_isa_flags & AARCH64_FL_F64MM)
193 #define AARCH64_ISA_BF16 (aarch64_isa_flags & AARCH64_FL_BF16)
194+#define AARCH64_ISA_SB (aarch64_isa_flags & AARCH64_FL_SB)
195
196 /* Crypto is an optional extension to AdvSIMD. */
197 #define TARGET_CRYPTO (TARGET_SIMD && AARCH64_ISA_CRYPTO)
198@@ -378,6 +379,9 @@ extern unsigned aarch64_architecture_version;
199 #define TARGET_FIX_ERR_A53_835769_DEFAULT 1
200 #endif
201
202+/* SB instruction is enabled through +sb. */
203+#define TARGET_SB (AARCH64_ISA_SB)
204+
205 /* Apply the workaround for Cortex-A53 erratum 835769. */
206 #define TARGET_FIX_ERR_A53_835769 \
207 ((aarch64_fix_a53_err835769 == 2) \
208@@ -1058,8 +1062,10 @@ typedef struct
209
210 #define RETURN_ADDR_RTX aarch64_return_addr
211
212-/* BTI c + 3 insns + 2 pointer-sized entries. */
213-#define TRAMPOLINE_SIZE (TARGET_ILP32 ? 24 : 32)
214+/* BTI c + 3 insns
215+ + sls barrier of DSB + ISB.
216+ + 2 pointer-sized entries. */
217+#define TRAMPOLINE_SIZE (24 + (TARGET_ILP32 ? 8 : 16))
218
219 /* Trampolines contain dwords, so must be dword aligned. */
220 #define TRAMPOLINE_ALIGNMENT 64
221diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
222index 8c8be3c..dda04ee 100644
223--- a/gcc/config/aarch64/aarch64.md
224+++ b/gcc/config/aarch64/aarch64.md
225@@ -407,10 +407,25 @@
226 ;; Attribute that specifies whether the alternative uses MOVPRFX.
227 (define_attr "movprfx" "no,yes" (const_string "no"))
228
229+;; Attribute to specify that an alternative has the length of a single
230+;; instruction plus a speculation barrier.
231+(define_attr "sls_length" "none,retbr,casesi" (const_string "none"))
232+
233 (define_attr "length" ""
234 (cond [(eq_attr "movprfx" "yes")
235 (const_int 8)
236- ] (const_int 4)))
237+
238+ (eq_attr "sls_length" "retbr")
239+ (cond [(match_test "!aarch64_harden_sls_retbr_p ()") (const_int 4)
240+ (match_test "TARGET_SB") (const_int 8)]
241+ (const_int 12))
242+
243+ (eq_attr "sls_length" "casesi")
244+ (cond [(match_test "!aarch64_harden_sls_retbr_p ()") (const_int 16)
245+ (match_test "TARGET_SB") (const_int 20)]
246+ (const_int 24))
247+ ]
248+ (const_int 4)))
249
250 ;; Strictly for compatibility with AArch32 in pipeline models, since AArch64 has
251 ;; no predicated insns.
252@@ -447,8 +462,12 @@
253 (define_insn "indirect_jump"
254 [(set (pc) (match_operand:DI 0 "register_operand" "r"))]
255 ""
256- "br\\t%0"
257- [(set_attr "type" "branch")]
258+ {
259+ output_asm_insn ("br\\t%0", operands);
260+ return aarch64_sls_barrier (aarch64_harden_sls_retbr_p ());
261+ }
262+ [(set_attr "type" "branch")
263+ (set_attr "sls_length" "retbr")]
264 )
265
266 (define_insn "jump"
267@@ -765,7 +784,7 @@
268 "*
269 return aarch64_output_casesi (operands);
270 "
271- [(set_attr "length" "16")
272+ [(set_attr "sls_length" "casesi")
273 (set_attr "type" "branch")]
274 )
275
276@@ -844,18 +863,23 @@
277 [(return)]
278 ""
279 {
280+ const char *ret = NULL;
281 if (aarch64_return_address_signing_enabled ()
282 && TARGET_ARMV8_3
283 && !crtl->calls_eh_return)
284 {
285 if (aarch64_ra_sign_key == AARCH64_KEY_B)
286- return "retab";
287+ ret = "retab";
288 else
289- return "retaa";
290+ ret = "retaa";
291 }
292- return "ret";
293+ else
294+ ret = "ret";
295+ output_asm_insn (ret, operands);
296+ return aarch64_sls_barrier (aarch64_harden_sls_retbr_p ());
297 }
298- [(set_attr "type" "branch")]
299+ [(set_attr "type" "branch")
300+ (set_attr "sls_length" "retbr")]
301 )
302
303 (define_expand "return"
304@@ -867,8 +891,12 @@
305 (define_insn "simple_return"
306 [(simple_return)]
307 ""
308- "ret"
309- [(set_attr "type" "branch")]
310+ {
311+ output_asm_insn ("ret", operands);
312+ return aarch64_sls_barrier (aarch64_harden_sls_retbr_p ());
313+ }
314+ [(set_attr "type" "branch")
315+ (set_attr "sls_length" "retbr")]
316 )
317
318 (define_insn "*cb<optab><mode>1"
319@@ -1066,10 +1094,16 @@
320 (unspec:DI [(match_operand:DI 2 "const_int_operand")] UNSPEC_CALLEE_ABI)
321 (return)]
322 "SIBLING_CALL_P (insn)"
323- "@
324- br\\t%0
325- b\\t%c0"
326- [(set_attr "type" "branch, branch")]
327+ {
328+ if (which_alternative == 0)
329+ {
330+ output_asm_insn ("br\\t%0", operands);
331+ return aarch64_sls_barrier (aarch64_harden_sls_retbr_p ());
332+ }
333+ return "b\\t%c0";
334+ }
335+ [(set_attr "type" "branch, branch")
336+ (set_attr "sls_length" "retbr,none")]
337 )
338
339 (define_insn "*sibcall_value_insn"
340@@ -1080,10 +1114,16 @@
341 (unspec:DI [(match_operand:DI 3 "const_int_operand")] UNSPEC_CALLEE_ABI)
342 (return)]
343 "SIBLING_CALL_P (insn)"
344- "@
345- br\\t%1
346- b\\t%c1"
347- [(set_attr "type" "branch, branch")]
348+ {
349+ if (which_alternative == 0)
350+ {
351+ output_asm_insn ("br\\t%1", operands);
352+ return aarch64_sls_barrier (aarch64_harden_sls_retbr_p ());
353+ }
354+ return "b\\t%c1";
355+ }
356+ [(set_attr "type" "branch, branch")
357+ (set_attr "sls_length" "retbr,none")]
358 )
359
360 ;; Call subroutine returning any type.
361diff --git a/gcc/testsuite/gcc.target/aarch64/sls-mitigation/sls-miti-retbr-pacret.c b/gcc/testsuite/gcc.target/aarch64/sls-mitigation/sls-miti-retbr-pacret.c
362new file mode 100644
363index 0000000..fa1887a
364--- /dev/null
365+++ b/gcc/testsuite/gcc.target/aarch64/sls-mitigation/sls-miti-retbr-pacret.c
366@@ -0,0 +1,21 @@
367+/* Avoid ILP32 since pacret is only available for LP64 */
368+/* { dg-do compile { target { ! ilp32 } } } */
369+/* { dg-additional-options "-mharden-sls=retbr -mbranch-protection=pac-ret -march=armv8.3-a" } */
370+
371+/* Testing the do_return pattern for retaa and retab. */
372+long retbr_subcall(void);
373+long retbr_do_return_retaa(void)
374+{
375+ return retbr_subcall()+1;
376+}
377+
378+__attribute__((target("branch-protection=pac-ret+b-key")))
379+long retbr_do_return_retab(void)
380+{
381+ return retbr_subcall()+1;
382+}
383+
384+/* Ensure there are no BR or RET instructions which are not directly followed
385+ by a speculation barrier. */
386+/* { dg-final { scan-assembler-not {\t(br|ret|retaa|retab)\tx[0-9][0-9]?\n\t(?!dsb\tsy\n\tisb)} } } */
387+/* { dg-final { scan-assembler-not {ret\t} } } */
388diff --git a/gcc/testsuite/gcc.target/aarch64/sls-mitigation/sls-miti-retbr.c b/gcc/testsuite/gcc.target/aarch64/sls-mitigation/sls-miti-retbr.c
389new file mode 100644
390index 0000000..76b8d03
391--- /dev/null
392+++ b/gcc/testsuite/gcc.target/aarch64/sls-mitigation/sls-miti-retbr.c
393@@ -0,0 +1,119 @@
394+/* We ensure that -Wpedantic is off since it complains about the trampolines
395+ we explicitly want to test. */
396+/* { dg-additional-options "-mharden-sls=retbr -Wno-pedantic " } */
397+/*
398+ Ensure that the SLS hardening of RET and BR leaves no unprotected RET/BR
399+ instructions.
400+ */
401+typedef int (foo) (int, int);
402+typedef void (bar) (int, int);
403+struct sls_testclass {
404+ foo *x;
405+ bar *y;
406+ int left;
407+ int right;
408+};
409+
410+int
411+retbr_sibcall_value_insn (struct sls_testclass x)
412+{
413+ return x.x(x.left, x.right);
414+}
415+
416+void
417+retbr_sibcall_insn (struct sls_testclass x)
418+{
419+ x.y(x.left, x.right);
420+}
421+
422+/* Aim to test two different returns.
423+ One that introduces a tail call in the middle of the function, and one that
424+ has a normal return. */
425+int
426+retbr_multiple_returns (struct sls_testclass x)
427+{
428+ int temp;
429+ if (x.left % 10)
430+ return x.x(x.left, 100);
431+ else if (x.right % 20)
432+ {
433+ return x.x(x.left * x.right, 100);
434+ }
435+ temp = x.left % x.right;
436+ temp *= 100;
437+ temp /= 2;
438+ return temp % 3;
439+}
440+
441+void
442+retbr_multiple_returns_void (struct sls_testclass x)
443+{
444+ if (x.left % 10)
445+ {
446+ x.y(x.left, 100);
447+ }
448+ else if (x.right % 20)
449+ {
450+ x.y(x.left * x.right, 100);
451+ }
452+ return;
453+}
454+
455+/* Testing the casesi jump via register. */
456+__attribute__ ((optimize ("Os")))
457+int
458+retbr_casesi_dispatch (struct sls_testclass x)
459+{
460+ switch (x.left)
461+ {
462+ case -5:
463+ return -2;
464+ case -3:
465+ return -1;
466+ case 0:
467+ return 0;
468+ case 3:
469+ return 1;
470+ case 5:
471+ break;
472+ default:
473+ __builtin_unreachable ();
474+ }
475+ return x.right;
476+}
477+
478+/* Testing the BR in trampolines is mitigated against. */
479+void f1 (void *);
480+void f3 (void *, void (*)(void *));
481+void f2 (void *);
482+
483+int
484+retbr_trampolines (void *a, int b)
485+{
486+ if (!b)
487+ {
488+ f1 (a);
489+ return 1;
490+ }
491+ if (b)
492+ {
493+ void retbr_tramp_internal (void *c)
494+ {
495+ if (c == a)
496+ f2 (c);
497+ }
498+ f3 (a, retbr_tramp_internal);
499+ }
500+ return 0;
501+}
502+
503+/* Testing the indirect_jump pattern. */
504+void
505+retbr_indirect_jump (int *buf)
506+{
507+ __builtin_longjmp(buf, 1);
508+}
509+
510+/* Ensure there are no BR or RET instructions which are not directly followed
511+ by a speculation barrier. */
512+/* { dg-final { scan-assembler-not {\t(br|ret|retaa|retab)\tx[0-9][0-9]?\n\t(?!dsb\tsy\n\tisb|sb)} } } */
513diff --git a/gcc/testsuite/gcc.target/aarch64/sls-mitigation/sls-mitigation.exp b/gcc/testsuite/gcc.target/aarch64/sls-mitigation/sls-mitigation.exp
514new file mode 100644
515index 0000000..8122503
516--- /dev/null
517+++ b/gcc/testsuite/gcc.target/aarch64/sls-mitigation/sls-mitigation.exp
518@@ -0,0 +1,73 @@
519+# Regression driver for SLS mitigation on AArch64.
520+# Copyright (C) 2020 Free Software Foundation, Inc.
521+# Contributed by ARM Ltd.
522+#
523+# This file is part of GCC.
524+#
525+# GCC is free software; you can redistribute it and/or modify it
526+# under the terms of the GNU General Public License as published by
527+# the Free Software Foundation; either version 3, or (at your option)
528+# any later version.
529+#
530+# GCC is distributed in the hope that it will be useful, but
531+# WITHOUT ANY WARRANTY; without even the implied warranty of
532+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
533+# General Public License for more details.
534+#
535+# You should have received a copy of the GNU General Public License
536+# along with GCC; see the file COPYING3. If not see
537+# <http://www.gnu.org/licenses/>. */
538+
539+# Exit immediately if this isn't an AArch64 target.
540+if {![istarget aarch64*-*-*] } then {
541+ return
542+}
543+
544+# Load support procs.
545+load_lib gcc-dg.exp
546+load_lib torture-options.exp
547+
548+# If a testcase doesn't have special options, use these.
549+global DEFAULT_CFLAGS
550+if ![info exists DEFAULT_CFLAGS] then {
551+ set DEFAULT_CFLAGS " "
552+}
553+
554+# Initialize `dg'.
555+dg-init
556+torture-init
557+
558+# Use different architectures as well as the normal optimisation options.
559+# (i.e. use both SB and DSB+ISB barriers).
560+
561+set save-dg-do-what-default ${dg-do-what-default}
562+# Main loop.
563+# Run with torture tests (i.e. a bunch of different optimisation levels) just
564+# to increase test coverage.
565+set dg-do-what-default assemble
566+gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*.\[cCS\]]] \
567+ "-save-temps" $DEFAULT_CFLAGS
568+
569+# Run the same tests but this time with SB extension.
570+# Since not all supported assemblers will support that extension we decide
571+# whether to assemble or just compile based on whether the extension is
572+# supported for the available assembler.
573+
574+set templist {}
575+foreach x $DG_TORTURE_OPTIONS {
576+ lappend templist "$x -march=armv8.3-a+sb "
577+ lappend templist "$x -march=armv8-a+sb "
578+}
579+set-torture-options $templist
580+if { [check_effective_target_aarch64_asm_sb_ok] } {
581+ set dg-do-what-default assemble
582+} else {
583+ set dg-do-what-default compile
584+}
585+gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*.\[cCS\]]] \
586+ "-save-temps" $DEFAULT_CFLAGS
587+set dg-do-what-default ${save-dg-do-what-default}
588+
589+# All done.
590+torture-finish
591+dg-finish
592diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
593index 8a186dd..9d2e093 100644
594--- a/gcc/testsuite/lib/target-supports.exp
595+++ b/gcc/testsuite/lib/target-supports.exp
596@@ -9432,7 +9432,7 @@ proc check_effective_target_aarch64_tiny { } {
597 # various architecture extensions via the .arch_extension pseudo-op.
598
599 foreach { aarch64_ext } { "fp" "simd" "crypto" "crc" "lse" "dotprod" "sve"
600- "i8mm" "f32mm" "f64mm" "bf16" } {
601+ "i8mm" "f32mm" "f64mm" "bf16" "sb" } {
602 eval [string map [list FUNC $aarch64_ext] {
603 proc check_effective_target_aarch64_asm_FUNC_ok { } {
604 if { [istarget aarch64*-*-*] } {
605--
6062.7.4
607
diff --git a/meta/recipes-devtools/gcc/gcc-10.1/0003-aarch64-Mitigate-SLS-for-BLR-instruction.patch b/meta/recipes-devtools/gcc/gcc-10.1/0003-aarch64-Mitigate-SLS-for-BLR-instruction.patch
new file mode 100644
index 0000000000..716a367172
--- /dev/null
+++ b/meta/recipes-devtools/gcc/gcc-10.1/0003-aarch64-Mitigate-SLS-for-BLR-instruction.patch
@@ -0,0 +1,658 @@
1Upstream-Status: Backport
2Signed-off-by: Ross Burton <ross.burton@arm.com>
3
4From a5e7efc40ed841934c1d913f39476afa17d8e5f7 Mon Sep 17 00:00:00 2001
5From: Matthew Malcomson <matthew.malcomson@arm.com>
6Date: Thu, 9 Jul 2020 09:11:59 +0100
7Subject: [PATCH 3/3] aarch64: Mitigate SLS for BLR instruction
8
9This patch introduces the mitigation for Straight Line Speculation past
10the BLR instruction.
11
12This mitigation replaces BLR instructions with a BL to a stub which uses
13a BR to jump to the original value. These function stubs are then
14appended with a speculation barrier to ensure no straight line
15speculation happens after these jumps.
16
17When optimising for speed we use a set of stubs for each function since
18this should help the branch predictor make more accurate predictions
19about where a stub should branch.
20
21When optimising for size we use one set of stubs for all functions.
22This set of stubs can have human readable names, and we are using
23`__call_indirect_x<N>` for register x<N>.
24
25When BTI branch protection is enabled the BLR instruction can jump to a
26`BTI c` instruction using any register, while the BR instruction can
27only jump to a `BTI c` instruction using the x16 or x17 registers.
28Hence, in order to ensure this transformation is safe we mov the value
29of the original register into x16 and use x16 for the BR.
30
31As an example when optimising for size:
32a
33 BLR x0
34instruction would get transformed to something like
35 BL __call_indirect_x0
36where __call_indirect_x0 labels a thunk that contains
37__call_indirect_x0:
38 MOV X16, X0
39 BR X16
40 <speculation barrier>
41
42The first version of this patch used local symbols specific to a
43compilation unit to try and avoid relocations.
44This was mistaken since functions coming from the same compilation unit
45can still be in different sections, and the assembler will insert
46relocations at jumps between sections.
47
48On any relocation the linker is permitted to emit a veneer to handle
49jumps between symbols that are very far apart. The registers x16 and
50x17 may be clobbered by these veneers.
51Hence the function stubs cannot rely on the values of x16 and x17 being
52the same as just before the function stub is called.
53
54Similar can be said for the hot/cold partitioning of single functions,
55so function-local stubs have the same restriction.
56
57This updated version of the patch never emits function stubs for x16 and
58x17, and instead forces other registers to be used.
59
60Given the above, there is now no benefit to local symbols (since they
61are not enough to avoid dealing with linker intricacies). This patch
62now uses global symbols with hidden visibility each stored in their own
63COMDAT section. This means stubs can be shared between compilation
64units while still avoiding the PLT indirection.
65
66This patch also removes the `__call_indirect_x30` stub (and
67function-local equivalent) which would simply jump back to the original
68location.
69
70The function-local stubs are emitted to the assembly output file in one
71chunk, which means we need not add the speculation barrier directly
72after each one.
73This is because we know for certain that the instructions directly after
74the BR in all but the last function stub will be from another one of
75these stubs and hence will not contain a speculation gadget.
76Instead we add a speculation barrier at the end of the sequence of
77stubs.
78
79The global stubs are emitted in COMDAT/.linkonce sections by
80themselves so that the linker can remove duplicates from multiple object
81files. This means they are not emitted in one chunk, and each one must
82include the speculation barrier.
83
84Another difference is that since the global stubs are shared across
85compilation units we do not know that all functions will be targeting an
86architecture supporting the SB instruction.
87Rather than provide multiple stubs for each architecture, we provide a
88stub that will work for all architectures -- using the DSB+ISB barrier.
89
90This mitigation does not apply for BLR instructions in the following
91places:
92- Some accesses to thread-local variables use a code sequence with a BLR
93 instruction. This code sequence is part of the binary interface between
94 compiler and linker. If this BLR instruction needs to be mitigated, it'd
95 probably be best to do so in the linker. It seems that the code sequence
96 for thread-local variable access is unlikely to lead to a Spectre Revalation
97 Gadget.
98- PLT stubs are produced by the linker and each contain a BLR instruction.
99 It seems that at most only after the last PLT stub a Spectre Revalation
100 Gadget might appear.
101
102Testing:
103 Bootstrap and regtest on AArch64
104 (with BOOT_CFLAGS="-mharden-sls=retbr,blr")
105 Used a temporary hack(1) in gcc-dg.exp to use these options on every
106 test in the testsuite, a slight modification to emit the speculation
107 barrier after every function stub, and a script to check that the
108 output never emitted a BLR, or unmitigated BR or RET instruction.
109 Similar on an aarch64-none-elf cross-compiler.
110
1111) Temporary hack emitted a speculation barrier at the end of every stub
112function, and used a script to ensure that:
113 a) Every RET or BR is immediately followed by a speculation barrier.
114 b) No BLR instruction is emitted by compiler.
115
116gcc/ChangeLog:
117
118 * config/aarch64/aarch64-protos.h (aarch64_indirect_call_asm):
119 New declaration.
120 * config/aarch64/aarch64.c (aarch64_regno_regclass): Handle new
121 stub registers class.
122 (aarch64_class_max_nregs): Likewise.
123 (aarch64_register_move_cost): Likewise.
124 (aarch64_sls_shared_thunks): Global array to store stub labels.
125 (aarch64_sls_emit_function_stub): New.
126 (aarch64_create_blr_label): New.
127 (aarch64_sls_emit_blr_function_thunks): New.
128 (aarch64_sls_emit_shared_blr_thunks): New.
129 (aarch64_asm_file_end): New.
130 (aarch64_indirect_call_asm): New.
131 (TARGET_ASM_FILE_END): Use aarch64_asm_file_end.
132 (TARGET_ASM_FUNCTION_EPILOGUE): Use
133 aarch64_sls_emit_blr_function_thunks.
134 * config/aarch64/aarch64.h (STB_REGNUM_P): New.
135 (enum reg_class): Add STUB_REGS class.
136 (machine_function): Introduce `call_via` array for
137 function-local stub labels.
138 * config/aarch64/aarch64.md (*call_insn, *call_value_insn): Use
139 aarch64_indirect_call_asm to emit code when hardening BLR
140 instructions.
141 * config/aarch64/constraints.md (Ucr): New constraint
142 representing registers for indirect calls. Is GENERAL_REGS
143 usually, and STUB_REGS when hardening BLR instruction against
144 SLS.
145 * config/aarch64/predicates.md (aarch64_general_reg): STUB_REGS class
146 is also a general register.
147
148gcc/testsuite/ChangeLog:
149
150 * gcc.target/aarch64/sls-mitigation/sls-miti-blr-bti.c: New test.
151 * gcc.target/aarch64/sls-mitigation/sls-miti-blr.c: New test.
152---
153 gcc/config/aarch64/aarch64-protos.h | 1 +
154 gcc/config/aarch64/aarch64.c | 225 ++++++++++++++++++++-
155 gcc/config/aarch64/aarch64.h | 15 ++
156 gcc/config/aarch64/aarch64.md | 11 +-
157 gcc/config/aarch64/constraints.md | 9 +
158 gcc/config/aarch64/predicates.md | 3 +-
159 .../aarch64/sls-mitigation/sls-miti-blr-bti.c | 40 ++++
160 .../aarch64/sls-mitigation/sls-miti-blr.c | 33 +++
161 8 files changed, 328 insertions(+), 9 deletions(-)
162 create mode 100644 gcc/testsuite/gcc.target/aarch64/sls-mitigation/sls-miti-blr-bti.c
163 create mode 100644 gcc/testsuite/gcc.target/aarch64/sls-mitigation/sls-miti-blr.c
164
165diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
166index ee0ffde..839f801 100644
167--- a/gcc/config/aarch64/aarch64-protos.h
168+++ b/gcc/config/aarch64/aarch64-protos.h
169@@ -782,6 +782,7 @@ extern const atomic_ool_names aarch64_ool_ldeor_names;
170 tree aarch64_resolve_overloaded_builtin_general (location_t, tree, void *);
171
172 const char *aarch64_sls_barrier (int);
173+const char *aarch64_indirect_call_asm (rtx);
174 extern bool aarch64_harden_sls_retbr_p (void);
175 extern bool aarch64_harden_sls_blr_p (void);
176
177diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
178index 2389d49..0f7bba3 100644
179--- a/gcc/config/aarch64/aarch64.c
180+++ b/gcc/config/aarch64/aarch64.c
181@@ -10605,6 +10605,9 @@ aarch64_label_mentioned_p (rtx x)
182 enum reg_class
183 aarch64_regno_regclass (unsigned regno)
184 {
185+ if (STUB_REGNUM_P (regno))
186+ return STUB_REGS;
187+
188 if (GP_REGNUM_P (regno))
189 return GENERAL_REGS;
190
191@@ -10939,6 +10942,7 @@ aarch64_class_max_nregs (reg_class_t regclass, machine_mode mode)
192 unsigned int nregs, vec_flags;
193 switch (regclass)
194 {
195+ case STUB_REGS:
196 case TAILCALL_ADDR_REGS:
197 case POINTER_REGS:
198 case GENERAL_REGS:
199@@ -13155,10 +13159,12 @@ aarch64_register_move_cost (machine_mode mode,
200 = aarch64_tune_params.regmove_cost;
201
202 /* Caller save and pointer regs are equivalent to GENERAL_REGS. */
203- if (to == TAILCALL_ADDR_REGS || to == POINTER_REGS)
204+ if (to == TAILCALL_ADDR_REGS || to == POINTER_REGS
205+ || to == STUB_REGS)
206 to = GENERAL_REGS;
207
208- if (from == TAILCALL_ADDR_REGS || from == POINTER_REGS)
209+ if (from == TAILCALL_ADDR_REGS || from == POINTER_REGS
210+ || from == STUB_REGS)
211 from = GENERAL_REGS;
212
213 /* Make RDFFR very expensive. In particular, if we know that the FFR
214@@ -22957,6 +22963,215 @@ aarch64_sls_barrier (int mitigation_required)
215 : "";
216 }
217
218+static GTY (()) tree aarch64_sls_shared_thunks[30];
219+static GTY (()) bool aarch64_sls_shared_thunks_needed = false;
220+const char *indirect_symbol_names[30] = {
221+ "__call_indirect_x0",
222+ "__call_indirect_x1",
223+ "__call_indirect_x2",
224+ "__call_indirect_x3",
225+ "__call_indirect_x4",
226+ "__call_indirect_x5",
227+ "__call_indirect_x6",
228+ "__call_indirect_x7",
229+ "__call_indirect_x8",
230+ "__call_indirect_x9",
231+ "__call_indirect_x10",
232+ "__call_indirect_x11",
233+ "__call_indirect_x12",
234+ "__call_indirect_x13",
235+ "__call_indirect_x14",
236+ "__call_indirect_x15",
237+ "", /* "__call_indirect_x16", */
238+ "", /* "__call_indirect_x17", */
239+ "__call_indirect_x18",
240+ "__call_indirect_x19",
241+ "__call_indirect_x20",
242+ "__call_indirect_x21",
243+ "__call_indirect_x22",
244+ "__call_indirect_x23",
245+ "__call_indirect_x24",
246+ "__call_indirect_x25",
247+ "__call_indirect_x26",
248+ "__call_indirect_x27",
249+ "__call_indirect_x28",
250+ "__call_indirect_x29",
251+};
252+
253+/* Function to create a BLR thunk. This thunk is used to mitigate straight
254+ line speculation. Instead of a simple BLR that can be speculated past,
255+ we emit a BL to this thunk, and this thunk contains a BR to the relevant
256+ register. These thunks have the relevant speculation barries put after
257+ their indirect branch so that speculation is blocked.
258+
259+ We use such a thunk so the speculation barriers are kept off the
260+ architecturally executed path in order to reduce the performance overhead.
261+
262+ When optimizing for size we use stubs shared by the linked object.
263+ When optimizing for performance we emit stubs for each function in the hope
264+ that the branch predictor can better train on jumps specific for a given
265+ function. */
266+rtx
267+aarch64_sls_create_blr_label (int regnum)
268+{
269+ gcc_assert (STUB_REGNUM_P (regnum));
270+ if (optimize_function_for_size_p (cfun))
271+ {
272+ /* For the thunks shared between different functions in this compilation
273+ unit we use a named symbol -- this is just for users to more easily
274+ understand the generated assembly. */
275+ aarch64_sls_shared_thunks_needed = true;
276+ const char *thunk_name = indirect_symbol_names[regnum];
277+ if (aarch64_sls_shared_thunks[regnum] == NULL)
278+ {
279+ /* Build a decl representing this function stub and record it for
280+ later. We build a decl here so we can use the GCC machinery for
281+ handling sections automatically (through `get_named_section` and
282+ `make_decl_one_only`). That saves us a lot of trouble handling
283+ the specifics of different output file formats. */
284+ tree decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
285+ get_identifier (thunk_name),
286+ build_function_type_list (void_type_node,
287+ NULL_TREE));
288+ DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
289+ NULL_TREE, void_type_node);
290+ TREE_PUBLIC (decl) = 1;
291+ TREE_STATIC (decl) = 1;
292+ DECL_IGNORED_P (decl) = 1;
293+ DECL_ARTIFICIAL (decl) = 1;
294+ make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
295+ resolve_unique_section (decl, 0, false);
296+ aarch64_sls_shared_thunks[regnum] = decl;
297+ }
298+
299+ return gen_rtx_SYMBOL_REF (Pmode, thunk_name);
300+ }
301+
302+ if (cfun->machine->call_via[regnum] == NULL)
303+ cfun->machine->call_via[regnum]
304+ = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ());
305+ return cfun->machine->call_via[regnum];
306+}
307+
308+/* Helper function for aarch64_sls_emit_blr_function_thunks and
309+ aarch64_sls_emit_shared_blr_thunks below. */
310+static void
311+aarch64_sls_emit_function_stub (FILE *out_file, int regnum)
312+{
313+ /* Save in x16 and branch to that function so this transformation does
314+ not prevent jumping to `BTI c` instructions. */
315+ asm_fprintf (out_file, "\tmov\tx16, x%d\n", regnum);
316+ asm_fprintf (out_file, "\tbr\tx16\n");
317+}
318+
319+/* Emit all BLR stubs for this particular function.
320+ Here we emit all the BLR stubs needed for the current function. Since we
321+ emit these stubs in a consecutive block we know there will be no speculation
322+ gadgets between each stub, and hence we only emit a speculation barrier at
323+ the end of the stub sequences.
324+
325+ This is called in the TARGET_ASM_FUNCTION_EPILOGUE hook. */
326+void
327+aarch64_sls_emit_blr_function_thunks (FILE *out_file)
328+{
329+ if (! aarch64_harden_sls_blr_p ())
330+ return;
331+
332+ bool any_functions_emitted = false;
333+ /* We must save and restore the current function section since this assembly
334+ is emitted at the end of the function. This means it can be emitted *just
335+ after* the cold section of a function. That cold part would be emitted in
336+ a different section. That switch would trigger a `.cfi_endproc` directive
337+ to be emitted in the original section and a `.cfi_startproc` directive to
338+ be emitted in the new section. Switching to the original section without
339+ restoring would mean that the `.cfi_endproc` emitted as a function ends
340+ would happen in a different section -- leaving an unmatched
341+ `.cfi_startproc` in the cold text section and an unmatched `.cfi_endproc`
342+ in the standard text section. */
343+ section *save_text_section = in_section;
344+ switch_to_section (function_section (current_function_decl));
345+ for (int regnum = 0; regnum < 30; ++regnum)
346+ {
347+ rtx specu_label = cfun->machine->call_via[regnum];
348+ if (specu_label == NULL)
349+ continue;
350+
351+ targetm.asm_out.print_operand (out_file, specu_label, 0);
352+ asm_fprintf (out_file, ":\n");
353+ aarch64_sls_emit_function_stub (out_file, regnum);
354+ any_functions_emitted = true;
355+ }
356+ if (any_functions_emitted)
357+ /* Can use the SB if needs be here, since this stub will only be used
358+ by the current function, and hence for the current target. */
359+ asm_fprintf (out_file, "\t%s\n", aarch64_sls_barrier (true));
360+ switch_to_section (save_text_section);
361+}
362+
363+/* Emit shared BLR stubs for the current compilation unit.
364+ Over the course of compiling this unit we may have converted some BLR
365+ instructions to a BL to a shared stub function. This is where we emit those
366+ stub functions.
367+ This function is for the stubs shared between different functions in this
368+ compilation unit. We share when optimizing for size instead of speed.
369+
370+ This function is called through the TARGET_ASM_FILE_END hook. */
371+void
372+aarch64_sls_emit_shared_blr_thunks (FILE *out_file)
373+{
374+ if (! aarch64_sls_shared_thunks_needed)
375+ return;
376+
377+ for (int regnum = 0; regnum < 30; ++regnum)
378+ {
379+ tree decl = aarch64_sls_shared_thunks[regnum];
380+ if (!decl)
381+ continue;
382+
383+ const char *name = indirect_symbol_names[regnum];
384+ switch_to_section (get_named_section (decl, NULL, 0));
385+ ASM_OUTPUT_ALIGN (out_file, 2);
386+ targetm.asm_out.globalize_label (out_file, name);
387+ /* Only emits if the compiler is configured for an assembler that can
388+ handle visibility directives. */
389+ targetm.asm_out.assemble_visibility (decl, VISIBILITY_HIDDEN);
390+ ASM_OUTPUT_TYPE_DIRECTIVE (out_file, name, "function");
391+ ASM_OUTPUT_LABEL (out_file, name);
392+ aarch64_sls_emit_function_stub (out_file, regnum);
393+ /* Use the most conservative target to ensure it can always be used by any
394+ function in the translation unit. */
395+ asm_fprintf (out_file, "\tdsb\tsy\n\tisb\n");
396+ ASM_DECLARE_FUNCTION_SIZE (out_file, name, decl);
397+ }
398+}
399+
400+/* Implement TARGET_ASM_FILE_END. */
401+void
402+aarch64_asm_file_end ()
403+{
404+ aarch64_sls_emit_shared_blr_thunks (asm_out_file);
405+ /* Since this function will be called for the ASM_FILE_END hook, we ensure
406+ that what would be called otherwise (e.g. `file_end_indicate_exec_stack`
407+ for FreeBSD) still gets called. */
408+#ifdef TARGET_ASM_FILE_END
409+ TARGET_ASM_FILE_END ();
410+#endif
411+}
412+
413+const char *
414+aarch64_indirect_call_asm (rtx addr)
415+{
416+ gcc_assert (REG_P (addr));
417+ if (aarch64_harden_sls_blr_p ())
418+ {
419+ rtx stub_label = aarch64_sls_create_blr_label (REGNO (addr));
420+ output_asm_insn ("bl\t%0", &stub_label);
421+ }
422+ else
423+ output_asm_insn ("blr\t%0", &addr);
424+ return "";
425+}
426+
427 /* Target-specific selftests. */
428
429 #if CHECKING_P
430@@ -23507,6 +23722,12 @@ aarch64_libgcc_floating_mode_supported_p
431 #undef TARGET_MD_ASM_ADJUST
432 #define TARGET_MD_ASM_ADJUST arm_md_asm_adjust
433
434+#undef TARGET_ASM_FILE_END
435+#define TARGET_ASM_FILE_END aarch64_asm_file_end
436+
437+#undef TARGET_ASM_FUNCTION_EPILOGUE
438+#define TARGET_ASM_FUNCTION_EPILOGUE aarch64_sls_emit_blr_function_thunks
439+
440 struct gcc_target targetm = TARGET_INITIALIZER;
441
442 #include "gt-aarch64.h"
443diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
444index 8e0fc37..7331450 100644
445--- a/gcc/config/aarch64/aarch64.h
446+++ b/gcc/config/aarch64/aarch64.h
447@@ -643,6 +643,16 @@ extern unsigned aarch64_architecture_version;
448 #define GP_REGNUM_P(REGNO) \
449 (((unsigned) (REGNO - R0_REGNUM)) <= (R30_REGNUM - R0_REGNUM))
450
451+/* Registers known to be preserved over a BL instruction. This consists of the
452+ GENERAL_REGS without x16, x17, and x30. The x30 register is changed by the
453+ BL instruction itself, while the x16 and x17 registers may be used by
454+ veneers which can be inserted by the linker. */
455+#define STUB_REGNUM_P(REGNO) \
456+ (GP_REGNUM_P (REGNO) \
457+ && (REGNO) != R16_REGNUM \
458+ && (REGNO) != R17_REGNUM \
459+ && (REGNO) != R30_REGNUM) \
460+
461 #define FP_REGNUM_P(REGNO) \
462 (((unsigned) (REGNO - V0_REGNUM)) <= (V31_REGNUM - V0_REGNUM))
463
464@@ -667,6 +677,7 @@ enum reg_class
465 {
466 NO_REGS,
467 TAILCALL_ADDR_REGS,
468+ STUB_REGS,
469 GENERAL_REGS,
470 STACK_REG,
471 POINTER_REGS,
472@@ -689,6 +700,7 @@ enum reg_class
473 { \
474 "NO_REGS", \
475 "TAILCALL_ADDR_REGS", \
476+ "STUB_REGS", \
477 "GENERAL_REGS", \
478 "STACK_REG", \
479 "POINTER_REGS", \
480@@ -708,6 +720,7 @@ enum reg_class
481 { \
482 { 0x00000000, 0x00000000, 0x00000000 }, /* NO_REGS */ \
483 { 0x00030000, 0x00000000, 0x00000000 }, /* TAILCALL_ADDR_REGS */\
484+ { 0x3ffcffff, 0x00000000, 0x00000000 }, /* STUB_REGS */ \
485 { 0x7fffffff, 0x00000000, 0x00000003 }, /* GENERAL_REGS */ \
486 { 0x80000000, 0x00000000, 0x00000000 }, /* STACK_REG */ \
487 { 0xffffffff, 0x00000000, 0x00000003 }, /* POINTER_REGS */ \
488@@ -862,6 +875,8 @@ typedef struct GTY (()) machine_function
489 struct aarch64_frame frame;
490 /* One entry for each hard register. */
491 bool reg_is_wrapped_separately[LAST_SAVED_REGNUM];
492+ /* One entry for each general purpose register. */
493+ rtx call_via[SP_REGNUM];
494 bool label_is_assembled;
495 } machine_function;
496 #endif
497diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
498index dda04ee..43da754 100644
499--- a/gcc/config/aarch64/aarch64.md
500+++ b/gcc/config/aarch64/aarch64.md
501@@ -1022,16 +1022,15 @@
502 )
503
504 (define_insn "*call_insn"
505- [(call (mem:DI (match_operand:DI 0 "aarch64_call_insn_operand" "r, Usf"))
506+ [(call (mem:DI (match_operand:DI 0 "aarch64_call_insn_operand" "Ucr, Usf"))
507 (match_operand 1 "" ""))
508 (unspec:DI [(match_operand:DI 2 "const_int_operand")] UNSPEC_CALLEE_ABI)
509 (clobber (reg:DI LR_REGNUM))]
510 ""
511 "@
512- blr\\t%0
513+ * return aarch64_indirect_call_asm (operands[0]);
514 bl\\t%c0"
515- [(set_attr "type" "call, call")]
516-)
517+ [(set_attr "type" "call, call")])
518
519 (define_expand "call_value"
520 [(parallel
521@@ -1050,13 +1049,13 @@
522
523 (define_insn "*call_value_insn"
524 [(set (match_operand 0 "" "")
525- (call (mem:DI (match_operand:DI 1 "aarch64_call_insn_operand" "r, Usf"))
526+ (call (mem:DI (match_operand:DI 1 "aarch64_call_insn_operand" "Ucr, Usf"))
527 (match_operand 2 "" "")))
528 (unspec:DI [(match_operand:DI 3 "const_int_operand")] UNSPEC_CALLEE_ABI)
529 (clobber (reg:DI LR_REGNUM))]
530 ""
531 "@
532- blr\\t%1
533+ * return aarch64_indirect_call_asm (operands[1]);
534 bl\\t%c1"
535 [(set_attr "type" "call, call")]
536 )
537diff --git a/gcc/config/aarch64/constraints.md b/gcc/config/aarch64/constraints.md
538index d993268..8cc6f50 100644
539--- a/gcc/config/aarch64/constraints.md
540+++ b/gcc/config/aarch64/constraints.md
541@@ -24,6 +24,15 @@
542 (define_register_constraint "Ucs" "TAILCALL_ADDR_REGS"
543 "@internal Registers suitable for an indirect tail call")
544
545+(define_register_constraint "Ucr"
546+ "aarch64_harden_sls_blr_p () ? STUB_REGS : GENERAL_REGS"
547+ "@internal Registers to be used for an indirect call.
548+ This is usually the general registers, but when we are hardening against
549+ Straight Line Speculation we disallow x16, x17, and x30 so we can use
550+ indirection stubs. These indirection stubs cannot use the above registers
551+ since they will be reached by a BL that may have to go through a linker
552+ veneer.")
553+
554 (define_register_constraint "w" "FP_REGS"
555 "Floating point and SIMD vector registers.")
556
557diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md
558index 215fcec..1754b1e 100644
559--- a/gcc/config/aarch64/predicates.md
560+++ b/gcc/config/aarch64/predicates.md
561@@ -32,7 +32,8 @@
562
563 (define_predicate "aarch64_general_reg"
564 (and (match_operand 0 "register_operand")
565- (match_test "REGNO_REG_CLASS (REGNO (op)) == GENERAL_REGS")))
566+ (match_test "REGNO_REG_CLASS (REGNO (op)) == STUB_REGS
567+ || REGNO_REG_CLASS (REGNO (op)) == GENERAL_REGS")))
568
569 ;; Return true if OP a (const_int 0) operand.
570 (define_predicate "const0_operand"
571diff --git a/gcc/testsuite/gcc.target/aarch64/sls-mitigation/sls-miti-blr-bti.c b/gcc/testsuite/gcc.target/aarch64/sls-mitigation/sls-miti-blr-bti.c
572new file mode 100644
573index 0000000..b1fb754
574--- /dev/null
575+++ b/gcc/testsuite/gcc.target/aarch64/sls-mitigation/sls-miti-blr-bti.c
576@@ -0,0 +1,40 @@
577+/* { dg-do compile } */
578+/* { dg-additional-options "-mharden-sls=blr -mbranch-protection=bti" } */
579+/*
580+ Ensure that the SLS hardening of BLR leaves no BLR instructions.
581+ Here we also check that there are no BR instructions with anything except an
582+ x16 or x17 register. This is because a `BTI c` instruction can be branched
583+ to using a BLR instruction using any register, but can only be branched to
584+ with a BR using an x16 or x17 register.
585+ */
586+typedef int (foo) (int, int);
587+typedef void (bar) (int, int);
588+struct sls_testclass {
589+ foo *x;
590+ bar *y;
591+ int left;
592+ int right;
593+};
594+
595+/* We test both RTL patterns for a call which returns a value and a call which
596+ does not. */
597+int blr_call_value (struct sls_testclass x)
598+{
599+ int retval = x.x(x.left, x.right);
600+ if (retval % 10)
601+ return 100;
602+ return 9;
603+}
604+
605+int blr_call (struct sls_testclass x)
606+{
607+ x.y(x.left, x.right);
608+ if (x.left % 10)
609+ return 100;
610+ return 9;
611+}
612+
613+/* { dg-final { scan-assembler-not {\tblr\t} } } */
614+/* { dg-final { scan-assembler-not {\tbr\tx(?!16|17)} } } */
615+/* { dg-final { scan-assembler {\tbr\tx(16|17)} } } */
616+
617diff --git a/gcc/testsuite/gcc.target/aarch64/sls-mitigation/sls-miti-blr.c b/gcc/testsuite/gcc.target/aarch64/sls-mitigation/sls-miti-blr.c
618new file mode 100644
619index 0000000..88bafff
620--- /dev/null
621+++ b/gcc/testsuite/gcc.target/aarch64/sls-mitigation/sls-miti-blr.c
622@@ -0,0 +1,33 @@
623+/* { dg-additional-options "-mharden-sls=blr -save-temps" } */
624+/* Ensure that the SLS hardening of BLR leaves no BLR instructions.
625+ We only test that all BLR instructions have been removed, not that the
626+ resulting code makes sense. */
627+typedef int (foo) (int, int);
628+typedef void (bar) (int, int);
629+struct sls_testclass {
630+ foo *x;
631+ bar *y;
632+ int left;
633+ int right;
634+};
635+
636+/* We test both RTL patterns for a call which returns a value and a call which
637+ does not. */
638+int blr_call_value (struct sls_testclass x)
639+{
640+ int retval = x.x(x.left, x.right);
641+ if (retval % 10)
642+ return 100;
643+ return 9;
644+}
645+
646+int blr_call (struct sls_testclass x)
647+{
648+ x.y(x.left, x.right);
649+ if (x.left % 10)
650+ return 100;
651+ return 9;
652+}
653+
654+/* { dg-final { scan-assembler-not {\tblr\t} } } */
655+/* { dg-final { scan-assembler {\tbr\tx[0-9][0-9]?} } } */
656--
6572.7.4
658