Merge dataflow branch into mainline
[gcc.git] / gcc / combine-stack-adj.c
1 /* Combine stack adjustments.
2 Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997,
3 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007
4 Free Software Foundation, Inc.
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 2, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING. If not, write to the Free
20 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
21 02110-1301, USA. */
22
23 /* Track stack adjustments and stack memory references. Attempt to
24 reduce the number of stack adjustments by back-propagating across
25 the memory references.
26
27 This is intended primarily for use with targets that do not define
28 ACCUMULATE_OUTGOING_ARGS. It is of significantly more value to
29 targets that define PREFERRED_STACK_BOUNDARY more aligned than
30 STACK_BOUNDARY (e.g. x86), or if not all registers can be pushed
31 (e.g. x86 fp regs) which would ordinarily have to be implemented
32 as a sub/mov pair due to restrictions in calls.c.
33
34 Propagation stops when any of the insns that need adjusting are
35 (a) no longer valid because we've exceeded their range, (b) a
36 non-trivial push instruction, or (c) a call instruction.
37
38 Restriction B is based on the assumption that push instructions
39 are smaller or faster. If a port really wants to remove all
40 pushes, it should have defined ACCUMULATE_OUTGOING_ARGS. The
41 one exception that is made is for an add immediately followed
42 by a push. */
43
44 #include "config.h"
45 #include "system.h"
46 #include "coretypes.h"
47 #include "tm.h"
48 #include "rtl.h"
49 #include "tm_p.h"
50 #include "insn-config.h"
51 #include "recog.h"
52 #include "output.h"
53 #include "regs.h"
54 #include "hard-reg-set.h"
55 #include "flags.h"
56 #include "function.h"
57 #include "expr.h"
58 #include "basic-block.h"
59 #include "df.h"
60 #include "except.h"
61 #include "toplev.h"
62 #include "reload.h"
63 #include "timevar.h"
64 #include "tree-pass.h"
65
66 \f
67 /* Turn STACK_GROWS_DOWNWARD into a boolean. */
68 #ifdef STACK_GROWS_DOWNWARD
69 #undef STACK_GROWS_DOWNWARD
70 #define STACK_GROWS_DOWNWARD 1
71 #else
72 #define STACK_GROWS_DOWNWARD 0
73 #endif
74
75 /* This structure records stack memory references between stack adjusting
76 instructions. */
77
78 struct csa_memlist
79 {
80 HOST_WIDE_INT sp_offset;
81 rtx insn, *mem;
82 struct csa_memlist *next;
83 };
84
85 static int stack_memref_p (rtx);
86 static rtx single_set_for_csa (rtx);
87 static void free_csa_memlist (struct csa_memlist *);
88 static struct csa_memlist *record_one_stack_memref (rtx, rtx *,
89 struct csa_memlist *);
90 static int try_apply_stack_adjustment (rtx, struct csa_memlist *,
91 HOST_WIDE_INT, HOST_WIDE_INT);
92 static void combine_stack_adjustments_for_block (basic_block);
93 static int record_stack_memrefs (rtx *, void *);
94
95
96 /* Main entry point for stack adjustment combination. */
97
98 static void
99 combine_stack_adjustments (void)
100 {
101 basic_block bb;
102
103 FOR_EACH_BB (bb)
104 combine_stack_adjustments_for_block (bb);
105 }
106
107 /* Recognize a MEM of the form (sp) or (plus sp const). */
108
109 static int
110 stack_memref_p (rtx x)
111 {
112 if (!MEM_P (x))
113 return 0;
114 x = XEXP (x, 0);
115
116 if (x == stack_pointer_rtx)
117 return 1;
118 if (GET_CODE (x) == PLUS
119 && XEXP (x, 0) == stack_pointer_rtx
120 && GET_CODE (XEXP (x, 1)) == CONST_INT)
121 return 1;
122
123 return 0;
124 }
125
126 /* Recognize either normal single_set or the hack in i386.md for
127 tying fp and sp adjustments. */
128
129 static rtx
130 single_set_for_csa (rtx insn)
131 {
132 int i;
133 rtx tmp = single_set (insn);
134 if (tmp)
135 return tmp;
136
137 if (!NONJUMP_INSN_P (insn)
138 || GET_CODE (PATTERN (insn)) != PARALLEL)
139 return NULL_RTX;
140
141 tmp = PATTERN (insn);
142 if (GET_CODE (XVECEXP (tmp, 0, 0)) != SET)
143 return NULL_RTX;
144
145 for (i = 1; i < XVECLEN (tmp, 0); ++i)
146 {
147 rtx this = XVECEXP (tmp, 0, i);
148
149 /* The special case is allowing a no-op set. */
150 if (GET_CODE (this) == SET
151 && SET_SRC (this) == SET_DEST (this))
152 ;
153 else if (GET_CODE (this) != CLOBBER
154 && GET_CODE (this) != USE)
155 return NULL_RTX;
156 }
157
158 return XVECEXP (tmp, 0, 0);
159 }
160
161 /* Free the list of csa_memlist nodes. */
162
163 static void
164 free_csa_memlist (struct csa_memlist *memlist)
165 {
166 struct csa_memlist *next;
167 for (; memlist ; memlist = next)
168 {
169 next = memlist->next;
170 free (memlist);
171 }
172 }
173
174 /* Create a new csa_memlist node from the given memory reference.
175 It is already known that the memory is stack_memref_p. */
176
177 static struct csa_memlist *
178 record_one_stack_memref (rtx insn, rtx *mem, struct csa_memlist *next_memlist)
179 {
180 struct csa_memlist *ml;
181
182 ml = XNEW (struct csa_memlist);
183
184 if (XEXP (*mem, 0) == stack_pointer_rtx)
185 ml->sp_offset = 0;
186 else
187 ml->sp_offset = INTVAL (XEXP (XEXP (*mem, 0), 1));
188
189 ml->insn = insn;
190 ml->mem = mem;
191 ml->next = next_memlist;
192
193 return ml;
194 }
195
196 /* Attempt to apply ADJUST to the stack adjusting insn INSN, as well
197 as each of the memories in MEMLIST. Return true on success. */
198
199 static int
200 try_apply_stack_adjustment (rtx insn, struct csa_memlist *memlist, HOST_WIDE_INT new_adjust,
201 HOST_WIDE_INT delta)
202 {
203 struct csa_memlist *ml;
204 rtx set;
205
206 set = single_set_for_csa (insn);
207 validate_change (insn, &XEXP (SET_SRC (set), 1), GEN_INT (new_adjust), 1);
208
209 for (ml = memlist; ml ; ml = ml->next)
210 validate_change
211 (ml->insn, ml->mem,
212 replace_equiv_address_nv (*ml->mem,
213 plus_constant (stack_pointer_rtx,
214 ml->sp_offset - delta)), 1);
215
216 if (apply_change_group ())
217 {
218 /* Succeeded. Update our knowledge of the memory references. */
219 for (ml = memlist; ml ; ml = ml->next)
220 ml->sp_offset -= delta;
221
222 return 1;
223 }
224 else
225 return 0;
226 }
227
228 /* Called via for_each_rtx and used to record all stack memory references in
229 the insn and discard all other stack pointer references. */
230 struct record_stack_memrefs_data
231 {
232 rtx insn;
233 struct csa_memlist *memlist;
234 };
235
236 static int
237 record_stack_memrefs (rtx *xp, void *data)
238 {
239 rtx x = *xp;
240 struct record_stack_memrefs_data *d =
241 (struct record_stack_memrefs_data *) data;
242 if (!x)
243 return 0;
244 switch (GET_CODE (x))
245 {
246 case MEM:
247 if (!reg_mentioned_p (stack_pointer_rtx, x))
248 return -1;
249 /* We are not able to handle correctly all possible memrefs containing
250 stack pointer, so this check is necessary. */
251 if (stack_memref_p (x))
252 {
253 d->memlist = record_one_stack_memref (d->insn, xp, d->memlist);
254 return -1;
255 }
256 return 1;
257 case REG:
258 /* ??? We want be able to handle non-memory stack pointer
259 references later. For now just discard all insns referring to
260 stack pointer outside mem expressions. We would probably
261 want to teach validate_replace to simplify expressions first.
262
263 We can't just compare with STACK_POINTER_RTX because the
264 reference to the stack pointer might be in some other mode.
265 In particular, an explicit clobber in an asm statement will
266 result in a QImode clobber. */
267 if (REGNO (x) == STACK_POINTER_REGNUM)
268 return 1;
269 break;
270 default:
271 break;
272 }
273 return 0;
274 }
275
276 /* Subroutine of combine_stack_adjustments, called for each basic block. */
277
278 static void
279 combine_stack_adjustments_for_block (basic_block bb)
280 {
281 HOST_WIDE_INT last_sp_adjust = 0;
282 rtx last_sp_set = NULL_RTX;
283 struct csa_memlist *memlist = NULL;
284 rtx insn, next, set;
285 struct record_stack_memrefs_data data;
286 bool end_of_block = false;
287
288 for (insn = BB_HEAD (bb); !end_of_block ; insn = next)
289 {
290 end_of_block = insn == BB_END (bb);
291 next = NEXT_INSN (insn);
292
293 if (! INSN_P (insn))
294 continue;
295
296 set = single_set_for_csa (insn);
297 if (set)
298 {
299 rtx dest = SET_DEST (set);
300 rtx src = SET_SRC (set);
301
302 /* Find constant additions to the stack pointer. */
303 if (dest == stack_pointer_rtx
304 && GET_CODE (src) == PLUS
305 && XEXP (src, 0) == stack_pointer_rtx
306 && GET_CODE (XEXP (src, 1)) == CONST_INT)
307 {
308 HOST_WIDE_INT this_adjust = INTVAL (XEXP (src, 1));
309
310 /* If we've not seen an adjustment previously, record
311 it now and continue. */
312 if (! last_sp_set)
313 {
314 last_sp_set = insn;
315 last_sp_adjust = this_adjust;
316 continue;
317 }
318
319 /* If not all recorded memrefs can be adjusted, or the
320 adjustment is now too large for a constant addition,
321 we cannot merge the two stack adjustments.
322
323 Also we need to be careful to not move stack pointer
324 such that we create stack accesses outside the allocated
325 area. We can combine an allocation into the first insn,
326 or a deallocation into the second insn. We can not
327 combine an allocation followed by a deallocation.
328
329 The only somewhat frequent occurrence of the later is when
330 a function allocates a stack frame but does not use it.
331 For this case, we would need to analyze rtl stream to be
332 sure that allocated area is really unused. This means not
333 only checking the memory references, but also all registers
334 or global memory references possibly containing a stack
335 frame address.
336
337 Perhaps the best way to address this problem is to teach
338 gcc not to allocate stack for objects never used. */
339
340 /* Combine an allocation into the first instruction. */
341 if (STACK_GROWS_DOWNWARD ? this_adjust <= 0 : this_adjust >= 0)
342 {
343 if (try_apply_stack_adjustment (last_sp_set, memlist,
344 last_sp_adjust + this_adjust,
345 this_adjust))
346 {
347 /* It worked! */
348 delete_insn (insn);
349 last_sp_adjust += this_adjust;
350 continue;
351 }
352 }
353
354 /* Otherwise we have a deallocation. Do not combine with
355 a previous allocation. Combine into the second insn. */
356 else if (STACK_GROWS_DOWNWARD
357 ? last_sp_adjust >= 0 : last_sp_adjust <= 0)
358 {
359 if (try_apply_stack_adjustment (insn, memlist,
360 last_sp_adjust + this_adjust,
361 -last_sp_adjust))
362 {
363 /* It worked! */
364 delete_insn (last_sp_set);
365 last_sp_set = insn;
366 last_sp_adjust += this_adjust;
367 free_csa_memlist (memlist);
368 memlist = NULL;
369 continue;
370 }
371 }
372
373 /* Combination failed. Restart processing from here. If
374 deallocation+allocation conspired to cancel, we can
375 delete the old deallocation insn. */
376 if (last_sp_set && last_sp_adjust == 0)
377 delete_insn (insn);
378 free_csa_memlist (memlist);
379 memlist = NULL;
380 last_sp_set = insn;
381 last_sp_adjust = this_adjust;
382 continue;
383 }
384
385 /* Find a predecrement of exactly the previous adjustment and
386 turn it into a direct store. Obviously we can't do this if
387 there were any intervening uses of the stack pointer. */
388 if (memlist == NULL
389 && MEM_P (dest)
390 && ((GET_CODE (XEXP (dest, 0)) == PRE_DEC
391 && (last_sp_adjust
392 == (HOST_WIDE_INT) GET_MODE_SIZE (GET_MODE (dest))))
393 || (GET_CODE (XEXP (dest, 0)) == PRE_MODIFY
394 && GET_CODE (XEXP (XEXP (dest, 0), 1)) == PLUS
395 && XEXP (XEXP (XEXP (dest, 0), 1), 0) == stack_pointer_rtx
396 && (GET_CODE (XEXP (XEXP (XEXP (dest, 0), 1), 1))
397 == CONST_INT)
398 && (INTVAL (XEXP (XEXP (XEXP (dest, 0), 1), 1))
399 == -last_sp_adjust)))
400 && XEXP (XEXP (dest, 0), 0) == stack_pointer_rtx
401 && ! reg_mentioned_p (stack_pointer_rtx, src)
402 && memory_address_p (GET_MODE (dest), stack_pointer_rtx)
403 && validate_change (insn, &SET_DEST (set),
404 replace_equiv_address (dest,
405 stack_pointer_rtx),
406 0))
407 {
408 delete_insn (last_sp_set);
409 free_csa_memlist (memlist);
410 memlist = NULL;
411 last_sp_set = NULL_RTX;
412 last_sp_adjust = 0;
413 continue;
414 }
415 }
416
417 data.insn = insn;
418 data.memlist = memlist;
419 if (!CALL_P (insn) && last_sp_set
420 && !for_each_rtx (&PATTERN (insn), record_stack_memrefs, &data))
421 {
422 memlist = data.memlist;
423 continue;
424 }
425 memlist = data.memlist;
426
427 /* Otherwise, we were not able to process the instruction.
428 Do not continue collecting data across such a one. */
429 if (last_sp_set
430 && (CALL_P (insn)
431 || reg_mentioned_p (stack_pointer_rtx, PATTERN (insn))))
432 {
433 if (last_sp_set && last_sp_adjust == 0)
434 delete_insn (last_sp_set);
435 free_csa_memlist (memlist);
436 memlist = NULL;
437 last_sp_set = NULL_RTX;
438 last_sp_adjust = 0;
439 }
440 }
441
442 if (last_sp_set && last_sp_adjust == 0)
443 delete_insn (last_sp_set);
444
445 if (memlist)
446 free_csa_memlist (memlist);
447 }
448 \f
449
450 static bool
451 gate_handle_stack_adjustments (void)
452 {
453 return (optimize > 0);
454 }
455
456 static unsigned int
457 rest_of_handle_stack_adjustments (void)
458 {
459 cleanup_cfg (flag_crossjumping ? CLEANUP_CROSSJUMP : 0);
460
461 /* This is kind of a heuristic. We need to run combine_stack_adjustments
462 even for machines with possibly nonzero RETURN_POPS_ARGS
463 and ACCUMULATE_OUTGOING_ARGS. We expect that only ports having
464 push instructions will have popping returns. */
465 #ifndef PUSH_ROUNDING
466 if (!ACCUMULATE_OUTGOING_ARGS)
467 #endif
468 {
469 df_note_add_problem ();
470 df_analyze ();
471 combine_stack_adjustments ();
472 }
473 return 0;
474 }
475
476 struct tree_opt_pass pass_stack_adjustments =
477 {
478 "csa", /* name */
479 gate_handle_stack_adjustments, /* gate */
480 rest_of_handle_stack_adjustments, /* execute */
481 NULL, /* sub */
482 NULL, /* next */
483 0, /* static_pass_number */
484 0, /* tv_id */
485 0, /* properties_required */
486 0, /* properties_provided */
487 0, /* properties_destroyed */
488 0, /* todo_flags_start */
489 TODO_df_finish |
490 TODO_dump_func |
491 TODO_ggc_collect, /* todo_flags_finish */
492 0 /* letter */
493 };
494