summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBin Lan <bin.lan.cn@windriver.com>2024-12-17 18:47:29 +0800
committerRichard Purdie <richard.purdie@linuxfoundation.org>2024-12-18 11:11:55 +0000
commit5c3a3cf22543f8e04873aa7e39f451117c6b9035 (patch)
treec810d6b1a4d602c1677adeb2700fe387a67c3d7c
parent1db6f52e721ad6c970f1907bfe053221eca43e6d (diff)
downloadpoky-5c3a3cf22543f8e04873aa7e39f451117c6b9035.tar.gz
gcc: backport patch to fix data relocation to !ENDBR: stpcpy
There is the following warning when building linux-yocto with default configuration on x86-64 with gcc-14.2: AR built-in.a AR vmlinux.a LD vmlinux.o vmlinux.o: warning: objtool: .export_symbol+0x332a0: data relocation to !ENDBR: stpcpy+0x0 This change set removes the warning. PR target/116174 [https://gcc.gnu.org/bugzilla/show_bug.cgi?id=116174] (From OE-Core rev: 30d4f18d1e11b3336c8668dccd96b9ff35c7bc76) Signed-off-by: Bin Lan <bin.lan.cn@windriver.com> Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
-rw-r--r--meta/recipes-devtools/gcc/gcc-14.2.inc1
-rw-r--r--meta/recipes-devtools/gcc/gcc/0027-gcc-backport-patch-to-fix-data-relocation-to-ENDBR-s.patch447
2 files changed, 448 insertions, 0 deletions
diff --git a/meta/recipes-devtools/gcc/gcc-14.2.inc b/meta/recipes-devtools/gcc/gcc-14.2.inc
index 932a27995b..3d65bed92a 100644
--- a/meta/recipes-devtools/gcc/gcc-14.2.inc
+++ b/meta/recipes-devtools/gcc/gcc-14.2.inc
@@ -69,6 +69,7 @@ SRC_URI = "${BASEURI} \
69 file://0024-Avoid-hardcoded-build-paths-into-ppc-libgcc.patch \ 69 file://0024-Avoid-hardcoded-build-paths-into-ppc-libgcc.patch \
70 file://0025-gcc-testsuite-tweaks-for-mips-OE.patch \ 70 file://0025-gcc-testsuite-tweaks-for-mips-OE.patch \
71 file://0026-gcc-Fix-c-tweak-for-Wrange-loop-construct.patch \ 71 file://0026-gcc-Fix-c-tweak-for-Wrange-loop-construct.patch \
72 file://0027-gcc-backport-patch-to-fix-data-relocation-to-ENDBR-s.patch \
72 file://gcc.git-ab884fffe3fc82a710bea66ad651720d71c938b8.patch \ 73 file://gcc.git-ab884fffe3fc82a710bea66ad651720d71c938b8.patch \
73" 74"
74 75
diff --git a/meta/recipes-devtools/gcc/gcc/0027-gcc-backport-patch-to-fix-data-relocation-to-ENDBR-s.patch b/meta/recipes-devtools/gcc/gcc/0027-gcc-backport-patch-to-fix-data-relocation-to-ENDBR-s.patch
new file mode 100644
index 0000000000..5bede60816
--- /dev/null
+++ b/meta/recipes-devtools/gcc/gcc/0027-gcc-backport-patch-to-fix-data-relocation-to-ENDBR-s.patch
@@ -0,0 +1,447 @@
1From 4e7735a8d87559bbddfe3a985786996e22241f8d Mon Sep 17 00:00:00 2001
2From: liuhongt <hongtao.liu@intel.com>
3Date: Mon, 12 Aug 2024 14:35:31 +0800
4Subject: [PATCH] Move ix86_align_loops into a separate pass and insert the
5 pass after pass_endbr_and_patchable_area.
6
7gcc/ChangeLog:
8
9 PR target/116174
10 * config/i386/i386.cc (ix86_align_loops): Move this to ..
11 * config/i386/i386-features.cc (ix86_align_loops): .. here.
12 (class pass_align_tight_loops): New class.
13 (make_pass_align_tight_loops): New function.
14 * config/i386/i386-passes.def: Insert pass_align_tight_loops
15 after pass_insert_endbr_and_patchable_area.
16 * config/i386/i386-protos.h (make_pass_align_tight_loops): New
17 declare.
18
19gcc/testsuite/ChangeLog:
20
21 * gcc.target/i386/pr116174.c: New test.
22
23(cherry picked from commit c3c83d22d212a35cb1bfb8727477819463f0dcd8)
24
25Upstream-Status: Backport [https://gcc.gnu.org/git/?p=gcc.git;a=patch;h=4e7735a8d87559bbddfe3a985786996e22241f8d]
26
27Signed-off-by: Bin Lan <bin.lan.cn@windriver.com>
28---
29 gcc/config/i386/i386-features.cc | 191 +++++++++++++++++++++++
30 gcc/config/i386/i386-passes.def | 3 +
31 gcc/config/i386/i386-protos.h | 1 +
32 gcc/config/i386/i386.cc | 146 -----------------
33 gcc/testsuite/gcc.target/i386/pr116174.c | 12 ++
34 5 files changed, 207 insertions(+), 146 deletions(-)
35 create mode 100644 gcc/testsuite/gcc.target/i386/pr116174.c
36
37diff --git a/gcc/config/i386/i386-features.cc b/gcc/config/i386/i386-features.cc
38index e3e004d55267..7de19d423637 100644
39--- a/gcc/config/i386/i386-features.cc
40+++ b/gcc/config/i386/i386-features.cc
41@@ -3253,6 +3253,197 @@ make_pass_remove_partial_avx_dependency (gcc::context *ctxt)
42 return new pass_remove_partial_avx_dependency (ctxt);
43 }
44
45+/* When a hot loop can be fit into one cacheline,
46+ force align the loop without considering the max skip. */
47+static void
48+ix86_align_loops ()
49+{
50+ basic_block bb;
51+
52+ /* Don't do this when we don't know cache line size. */
53+ if (ix86_cost->prefetch_block == 0)
54+ return;
55+
56+ loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
57+ profile_count count_threshold = cfun->cfg->count_max / param_align_threshold;
58+ FOR_EACH_BB_FN (bb, cfun)
59+ {
60+ rtx_insn *label = BB_HEAD (bb);
61+ bool has_fallthru = 0;
62+ edge e;
63+ edge_iterator ei;
64+
65+ if (!LABEL_P (label))
66+ continue;
67+
68+ profile_count fallthru_count = profile_count::zero ();
69+ profile_count branch_count = profile_count::zero ();
70+
71+ FOR_EACH_EDGE (e, ei, bb->preds)
72+ {
73+ if (e->flags & EDGE_FALLTHRU)
74+ has_fallthru = 1, fallthru_count += e->count ();
75+ else
76+ branch_count += e->count ();
77+ }
78+
79+ if (!fallthru_count.initialized_p () || !branch_count.initialized_p ())
80+ continue;
81+
82+ if (bb->loop_father
83+ && bb->loop_father->latch != EXIT_BLOCK_PTR_FOR_FN (cfun)
84+ && (has_fallthru
85+ ? (!(single_succ_p (bb)
86+ && single_succ (bb) == EXIT_BLOCK_PTR_FOR_FN (cfun))
87+ && optimize_bb_for_speed_p (bb)
88+ && branch_count + fallthru_count > count_threshold
89+ && (branch_count > fallthru_count * param_align_loop_iterations))
90+ /* In case there'no fallthru for the loop.
91+ Nops inserted won't be executed. */
92+ : (branch_count > count_threshold
93+ || (bb->count > bb->prev_bb->count * 10
94+ && (bb->prev_bb->count
95+ <= ENTRY_BLOCK_PTR_FOR_FN (cfun)->count / 2)))))
96+ {
97+ rtx_insn* insn, *end_insn;
98+ HOST_WIDE_INT size = 0;
99+ bool padding_p = true;
100+ basic_block tbb = bb;
101+ unsigned cond_branch_num = 0;
102+ bool detect_tight_loop_p = false;
103+
104+ for (unsigned int i = 0; i != bb->loop_father->num_nodes;
105+ i++, tbb = tbb->next_bb)
106+ {
107+ /* Only handle continuous cfg layout. */
108+ if (bb->loop_father != tbb->loop_father)
109+ {
110+ padding_p = false;
111+ break;
112+ }
113+
114+ FOR_BB_INSNS (tbb, insn)
115+ {
116+ if (!NONDEBUG_INSN_P (insn))
117+ continue;
118+ size += ix86_min_insn_size (insn);
119+
120+ /* We don't know size of inline asm.
121+ Don't align loop for call. */
122+ if (asm_noperands (PATTERN (insn)) >= 0
123+ || CALL_P (insn))
124+ {
125+ size = -1;
126+ break;
127+ }
128+ }
129+
130+ if (size == -1 || size > ix86_cost->prefetch_block)
131+ {
132+ padding_p = false;
133+ break;
134+ }
135+
136+ FOR_EACH_EDGE (e, ei, tbb->succs)
137+ {
138+ /* It could be part of the loop. */
139+ if (e->dest == bb)
140+ {
141+ detect_tight_loop_p = true;
142+ break;
143+ }
144+ }
145+
146+ if (detect_tight_loop_p)
147+ break;
148+
149+ end_insn = BB_END (tbb);
150+ if (JUMP_P (end_insn))
151+ {
152+ /* For decoded icache:
153+ 1. Up to two branches are allowed per Way.
154+ 2. A non-conditional branch is the last micro-op in a Way.
155+ */
156+ if (onlyjump_p (end_insn)
157+ && (any_uncondjump_p (end_insn)
158+ || single_succ_p (tbb)))
159+ {
160+ padding_p = false;
161+ break;
162+ }
163+ else if (++cond_branch_num >= 2)
164+ {
165+ padding_p = false;
166+ break;
167+ }
168+ }
169+
170+ }
171+
172+ if (padding_p && detect_tight_loop_p)
173+ {
174+ emit_insn_before (gen_max_skip_align (GEN_INT (ceil_log2 (size)),
175+ GEN_INT (0)), label);
176+ /* End of function. */
177+ if (!tbb || tbb == EXIT_BLOCK_PTR_FOR_FN (cfun))
178+ break;
179+ /* Skip bb which already fits into one cacheline. */
180+ bb = tbb;
181+ }
182+ }
183+ }
184+
185+ loop_optimizer_finalize ();
186+ free_dominance_info (CDI_DOMINATORS);
187+}
188+
189+namespace {
190+
191+const pass_data pass_data_align_tight_loops =
192+{
193+ RTL_PASS, /* type */
194+ "align_tight_loops", /* name */
195+ OPTGROUP_NONE, /* optinfo_flags */
196+ TV_MACH_DEP, /* tv_id */
197+ 0, /* properties_required */
198+ 0, /* properties_provided */
199+ 0, /* properties_destroyed */
200+ 0, /* todo_flags_start */
201+ 0, /* todo_flags_finish */
202+};
203+
204+class pass_align_tight_loops : public rtl_opt_pass
205+{
206+public:
207+ pass_align_tight_loops (gcc::context *ctxt)
208+ : rtl_opt_pass (pass_data_align_tight_loops, ctxt)
209+ {}
210+
211+ /* opt_pass methods: */
212+ bool gate (function *) final override
213+ {
214+ return optimize && optimize_function_for_speed_p (cfun);
215+ }
216+
217+ unsigned int execute (function *) final override
218+ {
219+ timevar_push (TV_MACH_DEP);
220+#ifdef ASM_OUTPUT_MAX_SKIP_ALIGN
221+ ix86_align_loops ();
222+#endif
223+ timevar_pop (TV_MACH_DEP);
224+ return 0;
225+ }
226+}; // class pass_align_tight_loops
227+
228+} // anon namespace
229+
230+rtl_opt_pass *
231+make_pass_align_tight_loops (gcc::context *ctxt)
232+{
233+ return new pass_align_tight_loops (ctxt);
234+}
235+
236 /* This compares the priority of target features in function DECL1
237 and DECL2. It returns positive value if DECL1 is higher priority,
238 negative value if DECL2 is higher priority and 0 if they are the
239diff --git a/gcc/config/i386/i386-passes.def b/gcc/config/i386/i386-passes.def
240index 7d96766f7b96..e500f15c9971 100644
241--- a/gcc/config/i386/i386-passes.def
242+++ b/gcc/config/i386/i386-passes.def
243@@ -31,5 +31,8 @@ along with GCC; see the file COPYING3. If not see
244 INSERT_PASS_BEFORE (pass_cse2, 1, pass_stv, true /* timode_p */);
245
246 INSERT_PASS_BEFORE (pass_shorten_branches, 1, pass_insert_endbr_and_patchable_area);
247+ /* pass_align_tight_loops must be after pass_insert_endbr_and_patchable_area.
248+ PR116174. */
249+ INSERT_PASS_BEFORE (pass_shorten_branches, 1, pass_align_tight_loops);
250
251 INSERT_PASS_AFTER (pass_combine, 1, pass_remove_partial_avx_dependency);
252diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
253index 46214a63974d..36c7b1aed42b 100644
254--- a/gcc/config/i386/i386-protos.h
255+++ b/gcc/config/i386/i386-protos.h
256@@ -419,6 +419,7 @@ extern rtl_opt_pass *make_pass_insert_endbr_and_patchable_area
257 (gcc::context *);
258 extern rtl_opt_pass *make_pass_remove_partial_avx_dependency
259 (gcc::context *);
260+extern rtl_opt_pass *make_pass_align_tight_loops (gcc::context *);
261
262 extern bool ix86_has_no_direct_extern_access;
263
264diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
265index 6f89891d3cb5..288c69467d62 100644
266--- a/gcc/config/i386/i386.cc
267+++ b/gcc/config/i386/i386.cc
268@@ -23444,150 +23444,6 @@ ix86_split_stlf_stall_load ()
269 }
270 }
271
272-/* When a hot loop can be fit into one cacheline,
273- force align the loop without considering the max skip. */
274-static void
275-ix86_align_loops ()
276-{
277- basic_block bb;
278-
279- /* Don't do this when we don't know cache line size. */
280- if (ix86_cost->prefetch_block == 0)
281- return;
282-
283- loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
284- profile_count count_threshold = cfun->cfg->count_max / param_align_threshold;
285- FOR_EACH_BB_FN (bb, cfun)
286- {
287- rtx_insn *label = BB_HEAD (bb);
288- bool has_fallthru = 0;
289- edge e;
290- edge_iterator ei;
291-
292- if (!LABEL_P (label))
293- continue;
294-
295- profile_count fallthru_count = profile_count::zero ();
296- profile_count branch_count = profile_count::zero ();
297-
298- FOR_EACH_EDGE (e, ei, bb->preds)
299- {
300- if (e->flags & EDGE_FALLTHRU)
301- has_fallthru = 1, fallthru_count += e->count ();
302- else
303- branch_count += e->count ();
304- }
305-
306- if (!fallthru_count.initialized_p () || !branch_count.initialized_p ())
307- continue;
308-
309- if (bb->loop_father
310- && bb->loop_father->latch != EXIT_BLOCK_PTR_FOR_FN (cfun)
311- && (has_fallthru
312- ? (!(single_succ_p (bb)
313- && single_succ (bb) == EXIT_BLOCK_PTR_FOR_FN (cfun))
314- && optimize_bb_for_speed_p (bb)
315- && branch_count + fallthru_count > count_threshold
316- && (branch_count > fallthru_count * param_align_loop_iterations))
317- /* In case there'no fallthru for the loop.
318- Nops inserted won't be executed. */
319- : (branch_count > count_threshold
320- || (bb->count > bb->prev_bb->count * 10
321- && (bb->prev_bb->count
322- <= ENTRY_BLOCK_PTR_FOR_FN (cfun)->count / 2)))))
323- {
324- rtx_insn* insn, *end_insn;
325- HOST_WIDE_INT size = 0;
326- bool padding_p = true;
327- basic_block tbb = bb;
328- unsigned cond_branch_num = 0;
329- bool detect_tight_loop_p = false;
330-
331- for (unsigned int i = 0; i != bb->loop_father->num_nodes;
332- i++, tbb = tbb->next_bb)
333- {
334- /* Only handle continuous cfg layout. */
335- if (bb->loop_father != tbb->loop_father)
336- {
337- padding_p = false;
338- break;
339- }
340-
341- FOR_BB_INSNS (tbb, insn)
342- {
343- if (!NONDEBUG_INSN_P (insn))
344- continue;
345- size += ix86_min_insn_size (insn);
346-
347- /* We don't know size of inline asm.
348- Don't align loop for call. */
349- if (asm_noperands (PATTERN (insn)) >= 0
350- || CALL_P (insn))
351- {
352- size = -1;
353- break;
354- }
355- }
356-
357- if (size == -1 || size > ix86_cost->prefetch_block)
358- {
359- padding_p = false;
360- break;
361- }
362-
363- FOR_EACH_EDGE (e, ei, tbb->succs)
364- {
365- /* It could be part of the loop. */
366- if (e->dest == bb)
367- {
368- detect_tight_loop_p = true;
369- break;
370- }
371- }
372-
373- if (detect_tight_loop_p)
374- break;
375-
376- end_insn = BB_END (tbb);
377- if (JUMP_P (end_insn))
378- {
379- /* For decoded icache:
380- 1. Up to two branches are allowed per Way.
381- 2. A non-conditional branch is the last micro-op in a Way.
382- */
383- if (onlyjump_p (end_insn)
384- && (any_uncondjump_p (end_insn)
385- || single_succ_p (tbb)))
386- {
387- padding_p = false;
388- break;
389- }
390- else if (++cond_branch_num >= 2)
391- {
392- padding_p = false;
393- break;
394- }
395- }
396-
397- }
398-
399- if (padding_p && detect_tight_loop_p)
400- {
401- emit_insn_before (gen_max_skip_align (GEN_INT (ceil_log2 (size)),
402- GEN_INT (0)), label);
403- /* End of function. */
404- if (!tbb || tbb == EXIT_BLOCK_PTR_FOR_FN (cfun))
405- break;
406- /* Skip bb which already fits into one cacheline. */
407- bb = tbb;
408- }
409- }
410- }
411-
412- loop_optimizer_finalize ();
413- free_dominance_info (CDI_DOMINATORS);
414-}
415-
416 /* Implement machine specific optimizations. We implement padding of returns
417 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
418 static void
419@@ -23611,8 +23467,6 @@ ix86_reorg (void)
420 #ifdef ASM_OUTPUT_MAX_SKIP_ALIGN
421 if (TARGET_FOUR_JUMP_LIMIT)
422 ix86_avoid_jump_mispredicts ();
423-
424- ix86_align_loops ();
425 #endif
426 }
427 }
428diff --git a/gcc/testsuite/gcc.target/i386/pr116174.c b/gcc/testsuite/gcc.target/i386/pr116174.c
429new file mode 100644
430index 000000000000..8877d0b51af1
431--- /dev/null
432+++ b/gcc/testsuite/gcc.target/i386/pr116174.c
433@@ -0,0 +1,12 @@
434+/* { dg-do compile { target *-*-linux* } } */
435+/* { dg-options "-O2 -fcf-protection=branch" } */
436+
437+char *
438+foo (char *dest, const char *src)
439+{
440+ while ((*dest++ = *src++) != '\0')
441+ /* nothing */;
442+ return --dest;
443+}
444+
445+/* { dg-final { scan-assembler "\t\.cfi_startproc\n\tendbr(32|64)\n" } } */
446--
4472.43.5