1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
|
From 536b8318974495cde2b42c3c2742748e2b271be0 Mon Sep 17 00:00:00 2001
From: ktkachov <ktkachov@138bc75d-0d04-0410-961f-82ee72b054a4>
Date: Wed, 27 May 2015 13:25:01 +0000
Subject: [PATCH] PR target/65358 Avoid clobbering partial argument during
sibcall
PR target/65358
* expr.c (memory_load_overlap): New function.
(emit_push_insn): When pushing partial args to the stack would
clobber the register part load the overlapping part into a pseudo
and put it into the hard reg after pushing. Change return type
to bool. Add bool argument.
* expr.h (emit_push_insn): Change return type to bool.
Add bool argument.
* calls.c (expand_call): Cancel sibcall optimization when encountering
partial argument on targets with ARGS_GROW_DOWNWARD and
!STACK_GROWS_DOWNWARD.
(emit_library_call_value_1): Update callsite of emit_push_insn.
(store_one_arg): Likewise.
PR target/65358
* gcc.dg/pr65358.c: New test.
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@223753 138bc75d-0d04-0410-961f-82ee72b054a4
Upstream-Status: Backport from 6.0
Signed-off-by: Martin Jansa <Martin.Jansa@gmail.com>
---
gcc/calls.c | 17 ++++++--
gcc/expr.c | 90 +++++++++++++++++++++++++++++++++++++-----
gcc/expr.h | 4 +-
gcc/testsuite/gcc.dg/pr65358.c | 33 ++++++++++++++++
4 files changed, 129 insertions(+), 15 deletions(-)
create mode 100644 gcc/testsuite/gcc.dg/pr65358.c
diff --git a/gcc/calls.c b/gcc/calls.c
index ee8ea5f..2334381 100644
--- a/gcc/calls.c
+++ b/gcc/calls.c
@@ -3236,6 +3236,14 @@ expand_call (tree exp, rtx target, int ignore)
{
rtx_insn *before_arg = get_last_insn ();
+ /* On targets with weird calling conventions (e.g. PA) it's
+ hard to ensure that all cases of argument overlap between
+ stack and registers work. Play it safe and bail out. */
+#if defined(ARGS_GROW_DOWNWARD) && !defined(STACK_GROWS_DOWNWARD)
+ sibcall_failure = 1;
+ break;
+#endif
+
if (store_one_arg (&args[i], argblock, flags,
adjusted_args_size.var != 0,
reg_parm_stack_space)
@@ -4279,7 +4287,7 @@ emit_library_call_value_1 (int retval, rtx orgfun, rtx value,
partial, reg, 0, argblock,
GEN_INT (argvec[argnum].locate.offset.constant),
reg_parm_stack_space,
- ARGS_SIZE_RTX (argvec[argnum].locate.alignment_pad));
+ ARGS_SIZE_RTX (argvec[argnum].locate.alignment_pad), false);
/* Now mark the segment we just used. */
if (ACCUMULATE_OUTGOING_ARGS)
@@ -4886,10 +4894,11 @@ store_one_arg (struct arg_data *arg, rtx argblock, int flags,
/* This isn't already where we want it on the stack, so put it there.
This can either be done with push or copy insns. */
- emit_push_insn (arg->value, arg->mode, TREE_TYPE (pval), NULL_RTX,
+ if (!emit_push_insn (arg->value, arg->mode, TREE_TYPE (pval), NULL_RTX,
parm_align, partial, reg, used - size, argblock,
ARGS_SIZE_RTX (arg->locate.offset), reg_parm_stack_space,
- ARGS_SIZE_RTX (arg->locate.alignment_pad));
+ ARGS_SIZE_RTX (arg->locate.alignment_pad), true))
+ sibcall_failure = 1;
/* Unless this is a partially-in-register argument, the argument is now
in the stack. */
@@ -5001,7 +5010,7 @@ store_one_arg (struct arg_data *arg, rtx argblock, int flags,
emit_push_insn (arg->value, arg->mode, TREE_TYPE (pval), size_rtx,
parm_align, partial, reg, excess, argblock,
ARGS_SIZE_RTX (arg->locate.offset), reg_parm_stack_space,
- ARGS_SIZE_RTX (arg->locate.alignment_pad));
+ ARGS_SIZE_RTX (arg->locate.alignment_pad), false);
/* Unless this is a partially-in-register argument, the argument is now
in the stack.
diff --git a/gcc/expr.c b/gcc/expr.c
index 5c09550..24a6293 100644
--- a/gcc/expr.c
+++ b/gcc/expr.c
@@ -4121,12 +4121,35 @@ emit_single_push_insn (machine_mode mode, rtx x, tree type)
}
#endif
+/* If reading SIZE bytes from X will end up reading from
+ Y return the number of bytes that overlap. Return -1
+ if there is no overlap or -2 if we can't determine
+ (for example when X and Y have different base registers). */
+
+static int
+memory_load_overlap (rtx x, rtx y, HOST_WIDE_INT size)
+{
+ rtx tmp = plus_constant (Pmode, x, size);
+ rtx sub = simplify_gen_binary (MINUS, Pmode, tmp, y);
+
+ if (!CONST_INT_P (sub))
+ return -2;
+
+ HOST_WIDE_INT val = INTVAL (sub);
+
+ return IN_RANGE (val, 1, size) ? val : -1;
+}
+
/* Generate code to push X onto the stack, assuming it has mode MODE and
type TYPE.
MODE is redundant except when X is a CONST_INT (since they don't
carry mode info).
SIZE is an rtx for the size of data to be copied (in bytes),
needed only if X is BLKmode.
+ Return true if successful. May return false if asked to push a
+ partial argument during a sibcall optimization (as specified by
+ SIBCALL_P) and the incoming and outgoing pointers cannot be shown
+ to not overlap.
ALIGN (in bits) is maximum alignment we can assume.
@@ -4152,11 +4175,11 @@ emit_single_push_insn (machine_mode mode, rtx x, tree type)
for arguments passed in registers. If nonzero, it will be the number
of bytes required. */
-void
+bool
emit_push_insn (rtx x, machine_mode mode, tree type, rtx size,
unsigned int align, int partial, rtx reg, int extra,
rtx args_addr, rtx args_so_far, int reg_parm_stack_space,
- rtx alignment_pad)
+ rtx alignment_pad, bool sibcall_p)
{
rtx xinner;
enum direction stack_direction
@@ -4179,6 +4202,10 @@ emit_push_insn (rtx x, machine_mode mode, tree type, rtx size,
xinner = x;
+ int nregs = partial / UNITS_PER_WORD;
+ rtx *tmp_regs = NULL;
+ int overlapping = 0;
+
if (mode == BLKmode
|| (STRICT_ALIGNMENT && align < GET_MODE_ALIGNMENT (mode)))
{
@@ -4309,6 +4336,43 @@ emit_push_insn (rtx x, machine_mode mode, tree type, rtx size,
PARM_BOUNDARY. Assume the caller isn't lying. */
set_mem_align (target, align);
+ /* If part should go in registers and pushing to that part would
+ overwrite some of the values that need to go into regs, load the
+ overlapping values into temporary pseudos to be moved into the hard
+ regs at the end after the stack pushing has completed.
+ We cannot load them directly into the hard regs here because
+ they can be clobbered by the block move expansions.
+ See PR 65358. */
+
+ if (partial > 0 && reg != 0 && mode == BLKmode
+ && GET_CODE (reg) != PARALLEL)
+ {
+ overlapping = memory_load_overlap (XEXP (x, 0), temp, partial);
+ if (overlapping > 0)
+ {
+ gcc_assert (overlapping % UNITS_PER_WORD == 0);
+ overlapping /= UNITS_PER_WORD;
+
+ tmp_regs = XALLOCAVEC (rtx, overlapping);
+
+ for (int i = 0; i < overlapping; i++)
+ tmp_regs[i] = gen_reg_rtx (word_mode);
+
+ for (int i = 0; i < overlapping; i++)
+ emit_move_insn (tmp_regs[i],
+ operand_subword_force (target, i, mode));
+ }
+ else if (overlapping == -1)
+ overlapping = 0;
+ /* Could not determine whether there is overlap.
+ Fail the sibcall. */
+ else
+ {
+ overlapping = 0;
+ if (sibcall_p)
+ return false;
+ }
+ }
emit_block_move (target, xinner, size, BLOCK_OP_CALL_PARM);
}
}
@@ -4363,12 +4427,13 @@ emit_push_insn (rtx x, machine_mode mode, tree type, rtx size,
has a size a multiple of a word. */
for (i = size - 1; i >= not_stack; i--)
if (i >= not_stack + offset)
- emit_push_insn (operand_subword_force (x, i, mode),
+ if (!emit_push_insn (operand_subword_force (x, i, mode),
word_mode, NULL_TREE, NULL_RTX, align, 0, NULL_RTX,
0, args_addr,
GEN_INT (args_offset + ((i - not_stack + skip)
* UNITS_PER_WORD)),
- reg_parm_stack_space, alignment_pad);
+ reg_parm_stack_space, alignment_pad, sibcall_p))
+ return false;
}
else
{
@@ -4411,9 +4476,8 @@ emit_push_insn (rtx x, machine_mode mode, tree type, rtx size,
}
}
- /* If part should go in registers, copy that part
- into the appropriate registers. Do this now, at the end,
- since mem-to-mem copies above may do function calls. */
+ /* Move the partial arguments into the registers and any overlapping
+ values that we moved into the pseudos in tmp_regs. */
if (partial > 0 && reg != 0)
{
/* Handle calls that pass values in multiple non-contiguous locations.
@@ -4421,9 +4485,15 @@ emit_push_insn (rtx x, machine_mode mode, tree type, rtx size,
if (GET_CODE (reg) == PARALLEL)
emit_group_load (reg, x, type, -1);
else
- {
+ {
gcc_assert (partial % UNITS_PER_WORD == 0);
- move_block_to_reg (REGNO (reg), x, partial / UNITS_PER_WORD, mode);
+ move_block_to_reg (REGNO (reg), x, nregs - overlapping, mode);
+
+ for (int i = 0; i < overlapping; i++)
+ emit_move_insn (gen_rtx_REG (word_mode, REGNO (reg)
+ + nregs - overlapping + i),
+ tmp_regs[i]);
+
}
}
@@ -4432,6 +4502,8 @@ emit_push_insn (rtx x, machine_mode mode, tree type, rtx size,
if (alignment_pad && args_addr == 0)
anti_adjust_stack (alignment_pad);
+
+ return true;
}
/* Return X if X can be used as a subtarget in a sequence of arithmetic
diff --git a/gcc/expr.h b/gcc/expr.h
index 867852e..5fcc13f 100644
--- a/gcc/expr.h
+++ b/gcc/expr.h
@@ -218,8 +218,8 @@ extern rtx emit_move_resolve_push (machine_mode, rtx);
extern rtx push_block (rtx, int, int);
/* Generate code to push something onto the stack, given its mode and type. */
-extern void emit_push_insn (rtx, machine_mode, tree, rtx, unsigned int,
- int, rtx, int, rtx, rtx, int, rtx);
+extern bool emit_push_insn (rtx, machine_mode, tree, rtx, unsigned int,
+ int, rtx, int, rtx, rtx, int, rtx, bool);
/* Expand an assignment that stores the value of FROM into TO. */
extern void expand_assignment (tree, tree, bool);
diff --git a/gcc/testsuite/gcc.dg/pr65358.c b/gcc/testsuite/gcc.dg/pr65358.c
new file mode 100644
index 0000000..ba89fd4
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr65358.c
@@ -0,0 +1,33 @@
+/* { dg-do run } */
+/* { dg-options "-O2" } */
+
+struct pack
+{
+ int fine;
+ int victim;
+ int killer;
+};
+
+int __attribute__ ((__noinline__, __noclone__))
+bar (int a, int b, struct pack p)
+{
+ if (a != 20 || b != 30)
+ __builtin_abort ();
+ if (p.fine != 40 || p.victim != 50 || p.killer != 60)
+ __builtin_abort ();
+ return 0;
+}
+
+int __attribute__ ((__noinline__, __noclone__))
+foo (int arg1, int arg2, int arg3, struct pack p)
+{
+ return bar (arg2, arg3, p);
+}
+
+int main (void)
+{
+ struct pack p = { 40, 50, 60 };
+
+ (void) foo (10, 20, 30, p);
+ return 0;
+}
--
2.7.0
|