* config/nvptx/nvptx.md (call_operation): Remove unused variables.
[gcc.git] / gcc / tree-ssa-tail-merge.c
1 /* Tail merging for gimple.
2 Copyright (C) 2011-2015 Free Software Foundation, Inc.
3 Contributed by Tom de Vries (tom@codesourcery.com)
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 /* Pass overview.
22
23
24 MOTIVATIONAL EXAMPLE
25
26 gimple representation of gcc/testsuite/gcc.dg/pr43864.c at
27
28 hprofStartupp (charD.1 * outputFileNameD.2600, charD.1 * ctxD.2601)
29 {
30 struct FILED.1638 * fpD.2605;
31 charD.1 fileNameD.2604[1000];
32 intD.0 D.3915;
33 const charD.1 * restrict outputFileName.0D.3914;
34
35 # BLOCK 2 freq:10000
36 # PRED: ENTRY [100.0%] (fallthru,exec)
37 # PT = nonlocal { D.3926 } (restr)
38 outputFileName.0D.3914_3
39 = (const charD.1 * restrict) outputFileNameD.2600_2(D);
40 # .MEMD.3923_13 = VDEF <.MEMD.3923_12(D)>
41 # USE = nonlocal null { fileNameD.2604 D.3926 } (restr)
42 # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr)
43 sprintfD.759 (&fileNameD.2604, outputFileName.0D.3914_3);
44 # .MEMD.3923_14 = VDEF <.MEMD.3923_13>
45 # USE = nonlocal null { fileNameD.2604 D.3926 } (restr)
46 # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr)
47 D.3915_4 = accessD.2606 (&fileNameD.2604, 1);
48 if (D.3915_4 == 0)
49 goto <bb 3>;
50 else
51 goto <bb 4>;
52 # SUCC: 3 [10.0%] (true,exec) 4 [90.0%] (false,exec)
53
54 # BLOCK 3 freq:1000
55 # PRED: 2 [10.0%] (true,exec)
56 # .MEMD.3923_15 = VDEF <.MEMD.3923_14>
57 # USE = nonlocal null { fileNameD.2604 D.3926 } (restr)
58 # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr)
59 freeD.898 (ctxD.2601_5(D));
60 goto <bb 7>;
61 # SUCC: 7 [100.0%] (fallthru,exec)
62
63 # BLOCK 4 freq:9000
64 # PRED: 2 [90.0%] (false,exec)
65 # .MEMD.3923_16 = VDEF <.MEMD.3923_14>
66 # PT = nonlocal escaped
67 # USE = nonlocal null { fileNameD.2604 D.3926 } (restr)
68 # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr)
69 fpD.2605_8 = fopenD.1805 (&fileNameD.2604[0], 0B);
70 if (fpD.2605_8 == 0B)
71 goto <bb 5>;
72 else
73 goto <bb 6>;
74 # SUCC: 5 [1.9%] (true,exec) 6 [98.1%] (false,exec)
75
76 # BLOCK 5 freq:173
77 # PRED: 4 [1.9%] (true,exec)
78 # .MEMD.3923_17 = VDEF <.MEMD.3923_16>
79 # USE = nonlocal null { fileNameD.2604 D.3926 } (restr)
80 # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr)
81 freeD.898 (ctxD.2601_5(D));
82 goto <bb 7>;
83 # SUCC: 7 [100.0%] (fallthru,exec)
84
85 # BLOCK 6 freq:8827
86 # PRED: 4 [98.1%] (false,exec)
87 # .MEMD.3923_18 = VDEF <.MEMD.3923_16>
88 # USE = nonlocal null { fileNameD.2604 D.3926 } (restr)
89 # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr)
90 fooD.2599 (outputFileNameD.2600_2(D), fpD.2605_8);
91 # SUCC: 7 [100.0%] (fallthru,exec)
92
93 # BLOCK 7 freq:10000
94 # PRED: 3 [100.0%] (fallthru,exec) 5 [100.0%] (fallthru,exec)
95 6 [100.0%] (fallthru,exec)
96 # PT = nonlocal null
97
98 # ctxD.2601_1 = PHI <0B(3), 0B(5), ctxD.2601_5(D)(6)>
99 # .MEMD.3923_11 = PHI <.MEMD.3923_15(3), .MEMD.3923_17(5),
100 .MEMD.3923_18(6)>
101 # VUSE <.MEMD.3923_11>
102 return ctxD.2601_1;
103 # SUCC: EXIT [100.0%]
104 }
105
106 bb 3 and bb 5 can be merged. The blocks have different predecessors, but the
107 same successors, and the same operations.
108
109
110 CONTEXT
111
112 A technique called tail merging (or cross jumping) can fix the example
113 above. For a block, we look for common code at the end (the tail) of the
114 predecessor blocks, and insert jumps from one block to the other.
115 The example is a special case for tail merging, in that 2 whole blocks
116 can be merged, rather than just the end parts of it.
117 We currently only focus on whole block merging, so in that sense
118 calling this pass tail merge is a bit of a misnomer.
119
120 We distinguish 2 kinds of situations in which blocks can be merged:
121 - same operations, same predecessors. The successor edges coming from one
122 block are redirected to come from the other block.
123 - same operations, same successors. The predecessor edges entering one block
124 are redirected to enter the other block. Note that this operation might
125 involve introducing phi operations.
126
127 For efficient implementation, we would like to value numbers the blocks, and
128 have a comparison operator that tells us whether the blocks are equal.
129 Besides being runtime efficient, block value numbering should also abstract
130 from irrelevant differences in order of operations, much like normal value
131 numbering abstracts from irrelevant order of operations.
132
133 For the first situation (same_operations, same predecessors), normal value
134 numbering fits well. We can calculate a block value number based on the
135 value numbers of the defs and vdefs.
136
137 For the second situation (same operations, same successors), this approach
138 doesn't work so well. We can illustrate this using the example. The calls
139 to free use different vdefs: MEMD.3923_16 and MEMD.3923_14, and these will
140 remain different in value numbering, since they represent different memory
141 states. So the resulting vdefs of the frees will be different in value
142 numbering, so the block value numbers will be different.
143
144 The reason why we call the blocks equal is not because they define the same
145 values, but because uses in the blocks use (possibly different) defs in the
146 same way. To be able to detect this efficiently, we need to do some kind of
147 reverse value numbering, meaning number the uses rather than the defs, and
148 calculate a block value number based on the value number of the uses.
149 Ideally, a block comparison operator will also indicate which phis are needed
150 to merge the blocks.
151
152 For the moment, we don't do block value numbering, but we do insn-by-insn
153 matching, using scc value numbers to match operations with results, and
154 structural comparison otherwise, while ignoring vop mismatches.
155
156
157 IMPLEMENTATION
158
159 1. The pass first determines all groups of blocks with the same successor
160 blocks.
161 2. Within each group, it tries to determine clusters of equal basic blocks.
162 3. The clusters are applied.
163 4. The same successor groups are updated.
164 5. This process is repeated from 2 onwards, until no more changes.
165
166
167 LIMITATIONS/TODO
168
169 - block only
170 - handles only 'same operations, same successors'.
171 It handles same predecessors as a special subcase though.
172 - does not implement the reverse value numbering and block value numbering.
173 - improve memory allocation: use garbage collected memory, obstacks,
174 allocpools where appropriate.
175 - no insertion of gimple_reg phis, We only introduce vop-phis.
176 - handle blocks with gimple_reg phi_nodes.
177
178
179 PASS PLACEMENT
180 This 'pass' is not a stand-alone gimple pass, but runs as part of
181 pass_pre, in order to share the value numbering.
182
183
184 SWITCHES
185
186 - ftree-tail-merge. On at -O2. We may have to enable it only at -Os. */
187
188 #include "config.h"
189 #include "system.h"
190 #include "coretypes.h"
191 #include "tm.h"
192 #include "alias.h"
193 #include "symtab.h"
194 #include "tree.h"
195 #include "fold-const.h"
196 #include "stor-layout.h"
197 #include "trans-mem.h"
198 #include "tm_p.h"
199 #include "predict.h"
200 #include "hard-reg-set.h"
201 #include "function.h"
202 #include "dominance.h"
203 #include "cfg.h"
204 #include "cfganal.h"
205 #include "cfgcleanup.h"
206 #include "basic-block.h"
207 #include "flags.h"
208 #include "tree-ssa-alias.h"
209 #include "internal-fn.h"
210 #include "tree-eh.h"
211 #include "gimple-expr.h"
212 #include "gimple.h"
213 #include "gimple-iterator.h"
214 #include "gimple-ssa.h"
215 #include "tree-cfg.h"
216 #include "tree-phinodes.h"
217 #include "ssa-iterators.h"
218 #include "tree-into-ssa.h"
219 #include "params.h"
220 #include "gimple-pretty-print.h"
221 #include "tree-ssa-sccvn.h"
222 #include "tree-dump.h"
223 #include "cfgloop.h"
224 #include "tree-pass.h"
225 #include "trans-mem.h"
226
227 /* Describes a group of bbs with the same successors. The successor bbs are
228 cached in succs, and the successor edge flags are cached in succ_flags.
229 If a bb has the EDGE_TRUE/FALSE_VALUE flags swapped compared to succ_flags,
230 it's marked in inverse.
231 Additionally, the hash value for the struct is cached in hashval, and
232 in_worklist indicates whether it's currently part of worklist. */
233
234 struct same_succ_def : pointer_hash <same_succ_def>
235 {
236 /* The bbs that have the same successor bbs. */
237 bitmap bbs;
238 /* The successor bbs. */
239 bitmap succs;
240 /* Indicates whether the EDGE_TRUE/FALSE_VALUEs of succ_flags are swapped for
241 bb. */
242 bitmap inverse;
243 /* The edge flags for each of the successor bbs. */
244 vec<int> succ_flags;
245 /* Indicates whether the struct is currently in the worklist. */
246 bool in_worklist;
247 /* The hash value of the struct. */
248 hashval_t hashval;
249
250 /* hash_table support. */
251 static inline hashval_t hash (const same_succ_def *);
252 static int equal (const same_succ_def *, const same_succ_def *);
253 static void remove (same_succ_def *);
254 };
255 typedef struct same_succ_def *same_succ;
256 typedef const struct same_succ_def *const_same_succ;
257
258 /* hash routine for hash_table support, returns hashval of E. */
259
260 inline hashval_t
261 same_succ_def::hash (const same_succ_def *e)
262 {
263 return e->hashval;
264 }
265
266 /* A group of bbs where 1 bb from bbs can replace the other bbs. */
267
268 struct bb_cluster_def
269 {
270 /* The bbs in the cluster. */
271 bitmap bbs;
272 /* The preds of the bbs in the cluster. */
273 bitmap preds;
274 /* Index in all_clusters vector. */
275 int index;
276 /* The bb to replace the cluster with. */
277 basic_block rep_bb;
278 };
279 typedef struct bb_cluster_def *bb_cluster;
280 typedef const struct bb_cluster_def *const_bb_cluster;
281
282 /* Per bb-info. */
283
284 struct aux_bb_info
285 {
286 /* The number of non-debug statements in the bb. */
287 int size;
288 /* The same_succ that this bb is a member of. */
289 same_succ bb_same_succ;
290 /* The cluster that this bb is a member of. */
291 bb_cluster cluster;
292 /* The vop state at the exit of a bb. This is shortlived data, used to
293 communicate data between update_block_by and update_vuses. */
294 tree vop_at_exit;
295 /* The bb that either contains or is dominated by the dependencies of the
296 bb. */
297 basic_block dep_bb;
298 };
299
300 /* Macros to access the fields of struct aux_bb_info. */
301
302 #define BB_SIZE(bb) (((struct aux_bb_info *)bb->aux)->size)
303 #define BB_SAME_SUCC(bb) (((struct aux_bb_info *)bb->aux)->bb_same_succ)
304 #define BB_CLUSTER(bb) (((struct aux_bb_info *)bb->aux)->cluster)
305 #define BB_VOP_AT_EXIT(bb) (((struct aux_bb_info *)bb->aux)->vop_at_exit)
306 #define BB_DEP_BB(bb) (((struct aux_bb_info *)bb->aux)->dep_bb)
307
308 /* Returns true if the only effect a statement STMT has, is to define locally
309 used SSA_NAMEs. */
310
311 static bool
312 stmt_local_def (gimple stmt)
313 {
314 basic_block bb, def_bb;
315 imm_use_iterator iter;
316 use_operand_p use_p;
317 tree val;
318 def_operand_p def_p;
319
320 if (gimple_vdef (stmt) != NULL_TREE
321 || gimple_has_side_effects (stmt)
322 || gimple_could_trap_p_1 (stmt, false, false)
323 || gimple_vuse (stmt) != NULL_TREE)
324 return false;
325
326 def_p = SINGLE_SSA_DEF_OPERAND (stmt, SSA_OP_DEF);
327 if (def_p == NULL)
328 return false;
329
330 val = DEF_FROM_PTR (def_p);
331 if (val == NULL_TREE || TREE_CODE (val) != SSA_NAME)
332 return false;
333
334 def_bb = gimple_bb (stmt);
335
336 FOR_EACH_IMM_USE_FAST (use_p, iter, val)
337 {
338 if (is_gimple_debug (USE_STMT (use_p)))
339 continue;
340 bb = gimple_bb (USE_STMT (use_p));
341 if (bb == def_bb)
342 continue;
343
344 if (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI
345 && EDGE_PRED (bb, PHI_ARG_INDEX_FROM_USE (use_p))->src == def_bb)
346 continue;
347
348 return false;
349 }
350
351 return true;
352 }
353
354 /* Let GSI skip forwards over local defs. */
355
356 static void
357 gsi_advance_fw_nondebug_nonlocal (gimple_stmt_iterator *gsi)
358 {
359 gimple stmt;
360
361 while (true)
362 {
363 if (gsi_end_p (*gsi))
364 return;
365 stmt = gsi_stmt (*gsi);
366 if (!stmt_local_def (stmt))
367 return;
368 gsi_next_nondebug (gsi);
369 }
370 }
371
372 /* VAL1 and VAL2 are either:
373 - uses in BB1 and BB2, or
374 - phi alternatives for BB1 and BB2.
375 Return true if the uses have the same gvn value. */
376
377 static bool
378 gvn_uses_equal (tree val1, tree val2)
379 {
380 gcc_checking_assert (val1 != NULL_TREE && val2 != NULL_TREE);
381
382 if (val1 == val2)
383 return true;
384
385 if (vn_valueize (val1) != vn_valueize (val2))
386 return false;
387
388 return ((TREE_CODE (val1) == SSA_NAME || CONSTANT_CLASS_P (val1))
389 && (TREE_CODE (val2) == SSA_NAME || CONSTANT_CLASS_P (val2)));
390 }
391
392 /* Prints E to FILE. */
393
394 static void
395 same_succ_print (FILE *file, const same_succ e)
396 {
397 unsigned int i;
398 bitmap_print (file, e->bbs, "bbs:", "\n");
399 bitmap_print (file, e->succs, "succs:", "\n");
400 bitmap_print (file, e->inverse, "inverse:", "\n");
401 fprintf (file, "flags:");
402 for (i = 0; i < e->succ_flags.length (); ++i)
403 fprintf (file, " %x", e->succ_flags[i]);
404 fprintf (file, "\n");
405 }
406
407 /* Prints same_succ VE to VFILE. */
408
409 inline int
410 ssa_same_succ_print_traverse (same_succ *pe, FILE *file)
411 {
412 const same_succ e = *pe;
413 same_succ_print (file, e);
414 return 1;
415 }
416
417 /* Update BB_DEP_BB (USE_BB), given a use of VAL in USE_BB. */
418
419 static void
420 update_dep_bb (basic_block use_bb, tree val)
421 {
422 basic_block dep_bb;
423
424 /* Not a dep. */
425 if (TREE_CODE (val) != SSA_NAME)
426 return;
427
428 /* Skip use of global def. */
429 if (SSA_NAME_IS_DEFAULT_DEF (val))
430 return;
431
432 /* Skip use of local def. */
433 dep_bb = gimple_bb (SSA_NAME_DEF_STMT (val));
434 if (dep_bb == use_bb)
435 return;
436
437 if (BB_DEP_BB (use_bb) == NULL
438 || dominated_by_p (CDI_DOMINATORS, dep_bb, BB_DEP_BB (use_bb)))
439 BB_DEP_BB (use_bb) = dep_bb;
440 }
441
442 /* Update BB_DEP_BB, given the dependencies in STMT. */
443
444 static void
445 stmt_update_dep_bb (gimple stmt)
446 {
447 ssa_op_iter iter;
448 use_operand_p use;
449
450 FOR_EACH_SSA_USE_OPERAND (use, stmt, iter, SSA_OP_USE)
451 update_dep_bb (gimple_bb (stmt), USE_FROM_PTR (use));
452 }
453
454 /* Calculates hash value for same_succ VE. */
455
456 static hashval_t
457 same_succ_hash (const_same_succ e)
458 {
459 inchash::hash hstate (bitmap_hash (e->succs));
460 int flags;
461 unsigned int i;
462 unsigned int first = bitmap_first_set_bit (e->bbs);
463 basic_block bb = BASIC_BLOCK_FOR_FN (cfun, first);
464 int size = 0;
465 gimple stmt;
466 tree arg;
467 unsigned int s;
468 bitmap_iterator bs;
469
470 for (gimple_stmt_iterator gsi = gsi_start_nondebug_bb (bb);
471 !gsi_end_p (gsi); gsi_next_nondebug (&gsi))
472 {
473 stmt = gsi_stmt (gsi);
474 stmt_update_dep_bb (stmt);
475 if (stmt_local_def (stmt))
476 continue;
477 size++;
478
479 hstate.add_int (gimple_code (stmt));
480 if (is_gimple_assign (stmt))
481 hstate.add_int (gimple_assign_rhs_code (stmt));
482 if (!is_gimple_call (stmt))
483 continue;
484 if (gimple_call_internal_p (stmt))
485 hstate.add_int (gimple_call_internal_fn (stmt));
486 else
487 {
488 inchash::add_expr (gimple_call_fn (stmt), hstate);
489 if (gimple_call_chain (stmt))
490 inchash::add_expr (gimple_call_chain (stmt), hstate);
491 }
492 for (i = 0; i < gimple_call_num_args (stmt); i++)
493 {
494 arg = gimple_call_arg (stmt, i);
495 arg = vn_valueize (arg);
496 inchash::add_expr (arg, hstate);
497 }
498 }
499
500 hstate.add_int (size);
501 BB_SIZE (bb) = size;
502
503 for (i = 0; i < e->succ_flags.length (); ++i)
504 {
505 flags = e->succ_flags[i];
506 flags = flags & ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE);
507 hstate.add_int (flags);
508 }
509
510 EXECUTE_IF_SET_IN_BITMAP (e->succs, 0, s, bs)
511 {
512 int n = find_edge (bb, BASIC_BLOCK_FOR_FN (cfun, s))->dest_idx;
513 for (gphi_iterator gsi = gsi_start_phis (BASIC_BLOCK_FOR_FN (cfun, s));
514 !gsi_end_p (gsi);
515 gsi_next (&gsi))
516 {
517 gphi *phi = gsi.phi ();
518 tree lhs = gimple_phi_result (phi);
519 tree val = gimple_phi_arg_def (phi, n);
520
521 if (virtual_operand_p (lhs))
522 continue;
523 update_dep_bb (bb, val);
524 }
525 }
526
527 return hstate.end ();
528 }
529
530 /* Returns true if E1 and E2 have 2 successors, and if the successor flags
531 are inverse for the EDGE_TRUE_VALUE and EDGE_FALSE_VALUE flags, and equal for
532 the other edge flags. */
533
534 static bool
535 inverse_flags (const_same_succ e1, const_same_succ e2)
536 {
537 int f1a, f1b, f2a, f2b;
538 int mask = ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE);
539
540 if (e1->succ_flags.length () != 2)
541 return false;
542
543 f1a = e1->succ_flags[0];
544 f1b = e1->succ_flags[1];
545 f2a = e2->succ_flags[0];
546 f2b = e2->succ_flags[1];
547
548 if (f1a == f2a && f1b == f2b)
549 return false;
550
551 return (f1a & mask) == (f2a & mask) && (f1b & mask) == (f2b & mask);
552 }
553
554 /* Compares SAME_SUCCs E1 and E2. */
555
556 int
557 same_succ_def::equal (const same_succ_def *e1, const same_succ_def *e2)
558 {
559 unsigned int i, first1, first2;
560 gimple_stmt_iterator gsi1, gsi2;
561 gimple s1, s2;
562 basic_block bb1, bb2;
563
564 if (e1->hashval != e2->hashval)
565 return 0;
566
567 if (e1->succ_flags.length () != e2->succ_flags.length ())
568 return 0;
569
570 if (!bitmap_equal_p (e1->succs, e2->succs))
571 return 0;
572
573 if (!inverse_flags (e1, e2))
574 {
575 for (i = 0; i < e1->succ_flags.length (); ++i)
576 if (e1->succ_flags[i] != e2->succ_flags[i])
577 return 0;
578 }
579
580 first1 = bitmap_first_set_bit (e1->bbs);
581 first2 = bitmap_first_set_bit (e2->bbs);
582
583 bb1 = BASIC_BLOCK_FOR_FN (cfun, first1);
584 bb2 = BASIC_BLOCK_FOR_FN (cfun, first2);
585
586 if (BB_SIZE (bb1) != BB_SIZE (bb2))
587 return 0;
588
589 gsi1 = gsi_start_nondebug_bb (bb1);
590 gsi2 = gsi_start_nondebug_bb (bb2);
591 gsi_advance_fw_nondebug_nonlocal (&gsi1);
592 gsi_advance_fw_nondebug_nonlocal (&gsi2);
593 while (!(gsi_end_p (gsi1) || gsi_end_p (gsi2)))
594 {
595 s1 = gsi_stmt (gsi1);
596 s2 = gsi_stmt (gsi2);
597 if (gimple_code (s1) != gimple_code (s2))
598 return 0;
599 if (is_gimple_call (s1) && !gimple_call_same_target_p (s1, s2))
600 return 0;
601 gsi_next_nondebug (&gsi1);
602 gsi_next_nondebug (&gsi2);
603 gsi_advance_fw_nondebug_nonlocal (&gsi1);
604 gsi_advance_fw_nondebug_nonlocal (&gsi2);
605 }
606
607 return 1;
608 }
609
610 /* Alloc and init a new SAME_SUCC. */
611
612 static same_succ
613 same_succ_alloc (void)
614 {
615 same_succ same = XNEW (struct same_succ_def);
616
617 same->bbs = BITMAP_ALLOC (NULL);
618 same->succs = BITMAP_ALLOC (NULL);
619 same->inverse = BITMAP_ALLOC (NULL);
620 same->succ_flags.create (10);
621 same->in_worklist = false;
622
623 return same;
624 }
625
626 /* Delete same_succ E. */
627
628 void
629 same_succ_def::remove (same_succ e)
630 {
631 BITMAP_FREE (e->bbs);
632 BITMAP_FREE (e->succs);
633 BITMAP_FREE (e->inverse);
634 e->succ_flags.release ();
635
636 XDELETE (e);
637 }
638
639 /* Reset same_succ SAME. */
640
641 static void
642 same_succ_reset (same_succ same)
643 {
644 bitmap_clear (same->bbs);
645 bitmap_clear (same->succs);
646 bitmap_clear (same->inverse);
647 same->succ_flags.truncate (0);
648 }
649
650 static hash_table<same_succ_def> *same_succ_htab;
651
652 /* Array that is used to store the edge flags for a successor. */
653
654 static int *same_succ_edge_flags;
655
656 /* Bitmap that is used to mark bbs that are recently deleted. */
657
658 static bitmap deleted_bbs;
659
660 /* Bitmap that is used to mark predecessors of bbs that are
661 deleted. */
662
663 static bitmap deleted_bb_preds;
664
665 /* Prints same_succ_htab to stderr. */
666
667 extern void debug_same_succ (void);
668 DEBUG_FUNCTION void
669 debug_same_succ ( void)
670 {
671 same_succ_htab->traverse <FILE *, ssa_same_succ_print_traverse> (stderr);
672 }
673
674
675 /* Vector of bbs to process. */
676
677 static vec<same_succ> worklist;
678
679 /* Prints worklist to FILE. */
680
681 static void
682 print_worklist (FILE *file)
683 {
684 unsigned int i;
685 for (i = 0; i < worklist.length (); ++i)
686 same_succ_print (file, worklist[i]);
687 }
688
689 /* Adds SAME to worklist. */
690
691 static void
692 add_to_worklist (same_succ same)
693 {
694 if (same->in_worklist)
695 return;
696
697 if (bitmap_count_bits (same->bbs) < 2)
698 return;
699
700 same->in_worklist = true;
701 worklist.safe_push (same);
702 }
703
704 /* Add BB to same_succ_htab. */
705
706 static void
707 find_same_succ_bb (basic_block bb, same_succ *same_p)
708 {
709 unsigned int j;
710 bitmap_iterator bj;
711 same_succ same = *same_p;
712 same_succ *slot;
713 edge_iterator ei;
714 edge e;
715
716 if (bb == NULL
717 /* Be conservative with loop structure. It's not evident that this test
718 is sufficient. Before tail-merge, we've just called
719 loop_optimizer_finalize, and LOOPS_MAY_HAVE_MULTIPLE_LATCHES is now
720 set, so there's no guarantee that the loop->latch value is still valid.
721 But we assume that, since we've forced LOOPS_HAVE_SIMPLE_LATCHES at the
722 start of pre, we've kept that property intact throughout pre, and are
723 keeping it throughout tail-merge using this test. */
724 || bb->loop_father->latch == bb)
725 return;
726 bitmap_set_bit (same->bbs, bb->index);
727 FOR_EACH_EDGE (e, ei, bb->succs)
728 {
729 int index = e->dest->index;
730 bitmap_set_bit (same->succs, index);
731 same_succ_edge_flags[index] = e->flags;
732 }
733 EXECUTE_IF_SET_IN_BITMAP (same->succs, 0, j, bj)
734 same->succ_flags.safe_push (same_succ_edge_flags[j]);
735
736 same->hashval = same_succ_hash (same);
737
738 slot = same_succ_htab->find_slot_with_hash (same, same->hashval, INSERT);
739 if (*slot == NULL)
740 {
741 *slot = same;
742 BB_SAME_SUCC (bb) = same;
743 add_to_worklist (same);
744 *same_p = NULL;
745 }
746 else
747 {
748 bitmap_set_bit ((*slot)->bbs, bb->index);
749 BB_SAME_SUCC (bb) = *slot;
750 add_to_worklist (*slot);
751 if (inverse_flags (same, *slot))
752 bitmap_set_bit ((*slot)->inverse, bb->index);
753 same_succ_reset (same);
754 }
755 }
756
757 /* Find bbs with same successors. */
758
759 static void
760 find_same_succ (void)
761 {
762 same_succ same = same_succ_alloc ();
763 basic_block bb;
764
765 FOR_EACH_BB_FN (bb, cfun)
766 {
767 find_same_succ_bb (bb, &same);
768 if (same == NULL)
769 same = same_succ_alloc ();
770 }
771
772 same_succ_def::remove (same);
773 }
774
775 /* Initializes worklist administration. */
776
777 static void
778 init_worklist (void)
779 {
780 alloc_aux_for_blocks (sizeof (struct aux_bb_info));
781 same_succ_htab = new hash_table<same_succ_def> (n_basic_blocks_for_fn (cfun));
782 same_succ_edge_flags = XCNEWVEC (int, last_basic_block_for_fn (cfun));
783 deleted_bbs = BITMAP_ALLOC (NULL);
784 deleted_bb_preds = BITMAP_ALLOC (NULL);
785 worklist.create (n_basic_blocks_for_fn (cfun));
786 find_same_succ ();
787
788 if (dump_file && (dump_flags & TDF_DETAILS))
789 {
790 fprintf (dump_file, "initial worklist:\n");
791 print_worklist (dump_file);
792 }
793 }
794
795 /* Deletes worklist administration. */
796
797 static void
798 delete_worklist (void)
799 {
800 free_aux_for_blocks ();
801 delete same_succ_htab;
802 same_succ_htab = NULL;
803 XDELETEVEC (same_succ_edge_flags);
804 same_succ_edge_flags = NULL;
805 BITMAP_FREE (deleted_bbs);
806 BITMAP_FREE (deleted_bb_preds);
807 worklist.release ();
808 }
809
810 /* Mark BB as deleted, and mark its predecessors. */
811
812 static void
813 mark_basic_block_deleted (basic_block bb)
814 {
815 edge e;
816 edge_iterator ei;
817
818 bitmap_set_bit (deleted_bbs, bb->index);
819
820 FOR_EACH_EDGE (e, ei, bb->preds)
821 bitmap_set_bit (deleted_bb_preds, e->src->index);
822 }
823
824 /* Removes BB from its corresponding same_succ. */
825
826 static void
827 same_succ_flush_bb (basic_block bb)
828 {
829 same_succ same = BB_SAME_SUCC (bb);
830 BB_SAME_SUCC (bb) = NULL;
831 if (bitmap_single_bit_set_p (same->bbs))
832 same_succ_htab->remove_elt_with_hash (same, same->hashval);
833 else
834 bitmap_clear_bit (same->bbs, bb->index);
835 }
836
837 /* Removes all bbs in BBS from their corresponding same_succ. */
838
839 static void
840 same_succ_flush_bbs (bitmap bbs)
841 {
842 unsigned int i;
843 bitmap_iterator bi;
844
845 EXECUTE_IF_SET_IN_BITMAP (bbs, 0, i, bi)
846 same_succ_flush_bb (BASIC_BLOCK_FOR_FN (cfun, i));
847 }
848
849 /* Release the last vdef in BB, either normal or phi result. */
850
851 static void
852 release_last_vdef (basic_block bb)
853 {
854 for (gimple_stmt_iterator i = gsi_last_bb (bb); !gsi_end_p (i);
855 gsi_prev_nondebug (&i))
856 {
857 gimple stmt = gsi_stmt (i);
858 if (gimple_vdef (stmt) == NULL_TREE)
859 continue;
860
861 mark_virtual_operand_for_renaming (gimple_vdef (stmt));
862 return;
863 }
864
865 for (gphi_iterator i = gsi_start_phis (bb); !gsi_end_p (i);
866 gsi_next (&i))
867 {
868 gphi *phi = i.phi ();
869 tree res = gimple_phi_result (phi);
870
871 if (!virtual_operand_p (res))
872 continue;
873
874 mark_virtual_phi_result_for_renaming (phi);
875 return;
876 }
877 }
878
879 /* For deleted_bb_preds, find bbs with same successors. */
880
881 static void
882 update_worklist (void)
883 {
884 unsigned int i;
885 bitmap_iterator bi;
886 basic_block bb;
887 same_succ same;
888
889 bitmap_and_compl_into (deleted_bb_preds, deleted_bbs);
890 bitmap_clear (deleted_bbs);
891
892 bitmap_clear_bit (deleted_bb_preds, ENTRY_BLOCK);
893 same_succ_flush_bbs (deleted_bb_preds);
894
895 same = same_succ_alloc ();
896 EXECUTE_IF_SET_IN_BITMAP (deleted_bb_preds, 0, i, bi)
897 {
898 bb = BASIC_BLOCK_FOR_FN (cfun, i);
899 gcc_assert (bb != NULL);
900 find_same_succ_bb (bb, &same);
901 if (same == NULL)
902 same = same_succ_alloc ();
903 }
904 same_succ_def::remove (same);
905 bitmap_clear (deleted_bb_preds);
906 }
907
908 /* Prints cluster C to FILE. */
909
910 static void
911 print_cluster (FILE *file, bb_cluster c)
912 {
913 if (c == NULL)
914 return;
915 bitmap_print (file, c->bbs, "bbs:", "\n");
916 bitmap_print (file, c->preds, "preds:", "\n");
917 }
918
919 /* Prints cluster C to stderr. */
920
921 extern void debug_cluster (bb_cluster);
922 DEBUG_FUNCTION void
923 debug_cluster (bb_cluster c)
924 {
925 print_cluster (stderr, c);
926 }
927
928 /* Update C->rep_bb, given that BB is added to the cluster. */
929
930 static void
931 update_rep_bb (bb_cluster c, basic_block bb)
932 {
933 /* Initial. */
934 if (c->rep_bb == NULL)
935 {
936 c->rep_bb = bb;
937 return;
938 }
939
940 /* Current needs no deps, keep it. */
941 if (BB_DEP_BB (c->rep_bb) == NULL)
942 return;
943
944 /* Bb needs no deps, change rep_bb. */
945 if (BB_DEP_BB (bb) == NULL)
946 {
947 c->rep_bb = bb;
948 return;
949 }
950
951 /* Bb needs last deps earlier than current, change rep_bb. A potential
952 problem with this, is that the first deps might also be earlier, which
953 would mean we prefer longer lifetimes for the deps. To be able to check
954 for this, we would have to trace BB_FIRST_DEP_BB as well, besides
955 BB_DEP_BB, which is really BB_LAST_DEP_BB.
956 The benefit of choosing the bb with last deps earlier, is that it can
957 potentially be used as replacement for more bbs. */
958 if (dominated_by_p (CDI_DOMINATORS, BB_DEP_BB (c->rep_bb), BB_DEP_BB (bb)))
959 c->rep_bb = bb;
960 }
961
962 /* Add BB to cluster C. Sets BB in C->bbs, and preds of BB in C->preds. */
963
964 static void
965 add_bb_to_cluster (bb_cluster c, basic_block bb)
966 {
967 edge e;
968 edge_iterator ei;
969
970 bitmap_set_bit (c->bbs, bb->index);
971
972 FOR_EACH_EDGE (e, ei, bb->preds)
973 bitmap_set_bit (c->preds, e->src->index);
974
975 update_rep_bb (c, bb);
976 }
977
978 /* Allocate and init new cluster. */
979
980 static bb_cluster
981 new_cluster (void)
982 {
983 bb_cluster c;
984 c = XCNEW (struct bb_cluster_def);
985 c->bbs = BITMAP_ALLOC (NULL);
986 c->preds = BITMAP_ALLOC (NULL);
987 c->rep_bb = NULL;
988 return c;
989 }
990
991 /* Delete clusters. */
992
993 static void
994 delete_cluster (bb_cluster c)
995 {
996 if (c == NULL)
997 return;
998 BITMAP_FREE (c->bbs);
999 BITMAP_FREE (c->preds);
1000 XDELETE (c);
1001 }
1002
1003
1004 /* Array that contains all clusters. */
1005
1006 static vec<bb_cluster> all_clusters;
1007
1008 /* Allocate all cluster vectors. */
1009
1010 static void
1011 alloc_cluster_vectors (void)
1012 {
1013 all_clusters.create (n_basic_blocks_for_fn (cfun));
1014 }
1015
1016 /* Reset all cluster vectors. */
1017
1018 static void
1019 reset_cluster_vectors (void)
1020 {
1021 unsigned int i;
1022 basic_block bb;
1023 for (i = 0; i < all_clusters.length (); ++i)
1024 delete_cluster (all_clusters[i]);
1025 all_clusters.truncate (0);
1026 FOR_EACH_BB_FN (bb, cfun)
1027 BB_CLUSTER (bb) = NULL;
1028 }
1029
1030 /* Delete all cluster vectors. */
1031
1032 static void
1033 delete_cluster_vectors (void)
1034 {
1035 unsigned int i;
1036 for (i = 0; i < all_clusters.length (); ++i)
1037 delete_cluster (all_clusters[i]);
1038 all_clusters.release ();
1039 }
1040
1041 /* Merge cluster C2 into C1. */
1042
1043 static void
1044 merge_clusters (bb_cluster c1, bb_cluster c2)
1045 {
1046 bitmap_ior_into (c1->bbs, c2->bbs);
1047 bitmap_ior_into (c1->preds, c2->preds);
1048 }
1049
1050 /* Register equivalence of BB1 and BB2 (members of cluster C). Store c in
1051 all_clusters, or merge c with existing cluster. */
1052
1053 static void
1054 set_cluster (basic_block bb1, basic_block bb2)
1055 {
1056 basic_block merge_bb, other_bb;
1057 bb_cluster merge, old, c;
1058
1059 if (BB_CLUSTER (bb1) == NULL && BB_CLUSTER (bb2) == NULL)
1060 {
1061 c = new_cluster ();
1062 add_bb_to_cluster (c, bb1);
1063 add_bb_to_cluster (c, bb2);
1064 BB_CLUSTER (bb1) = c;
1065 BB_CLUSTER (bb2) = c;
1066 c->index = all_clusters.length ();
1067 all_clusters.safe_push (c);
1068 }
1069 else if (BB_CLUSTER (bb1) == NULL || BB_CLUSTER (bb2) == NULL)
1070 {
1071 merge_bb = BB_CLUSTER (bb1) == NULL ? bb2 : bb1;
1072 other_bb = BB_CLUSTER (bb1) == NULL ? bb1 : bb2;
1073 merge = BB_CLUSTER (merge_bb);
1074 add_bb_to_cluster (merge, other_bb);
1075 BB_CLUSTER (other_bb) = merge;
1076 }
1077 else if (BB_CLUSTER (bb1) != BB_CLUSTER (bb2))
1078 {
1079 unsigned int i;
1080 bitmap_iterator bi;
1081
1082 old = BB_CLUSTER (bb2);
1083 merge = BB_CLUSTER (bb1);
1084 merge_clusters (merge, old);
1085 EXECUTE_IF_SET_IN_BITMAP (old->bbs, 0, i, bi)
1086 BB_CLUSTER (BASIC_BLOCK_FOR_FN (cfun, i)) = merge;
1087 all_clusters[old->index] = NULL;
1088 update_rep_bb (merge, old->rep_bb);
1089 delete_cluster (old);
1090 }
1091 else
1092 gcc_unreachable ();
1093 }
1094
1095 /* Return true if gimple operands T1 and T2 have the same value. */
1096
1097 static bool
1098 gimple_operand_equal_value_p (tree t1, tree t2)
1099 {
1100 if (t1 == t2)
1101 return true;
1102
1103 if (t1 == NULL_TREE
1104 || t2 == NULL_TREE)
1105 return false;
1106
1107 if (operand_equal_p (t1, t2, 0))
1108 return true;
1109
1110 return gvn_uses_equal (t1, t2);
1111 }
1112
1113 /* Return true if gimple statements S1 and S2 are equal. Gimple_bb (s1) and
1114 gimple_bb (s2) are members of SAME_SUCC. */
1115
1116 static bool
1117 gimple_equal_p (same_succ same_succ, gimple s1, gimple s2)
1118 {
1119 unsigned int i;
1120 tree lhs1, lhs2;
1121 basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
1122 tree t1, t2;
1123 bool inv_cond;
1124 enum tree_code code1, code2;
1125
1126 if (gimple_code (s1) != gimple_code (s2))
1127 return false;
1128
1129 switch (gimple_code (s1))
1130 {
1131 case GIMPLE_CALL:
1132 if (!gimple_call_same_target_p (s1, s2))
1133 return false;
1134
1135 t1 = gimple_call_chain (s1);
1136 t2 = gimple_call_chain (s2);
1137 if (!gimple_operand_equal_value_p (t1, t2))
1138 return false;
1139
1140 if (gimple_call_num_args (s1) != gimple_call_num_args (s2))
1141 return false;
1142
1143 for (i = 0; i < gimple_call_num_args (s1); ++i)
1144 {
1145 t1 = gimple_call_arg (s1, i);
1146 t2 = gimple_call_arg (s2, i);
1147 if (!gimple_operand_equal_value_p (t1, t2))
1148 return false;
1149 }
1150
1151 lhs1 = gimple_get_lhs (s1);
1152 lhs2 = gimple_get_lhs (s2);
1153 if (lhs1 == NULL_TREE && lhs2 == NULL_TREE)
1154 return true;
1155 if (lhs1 == NULL_TREE || lhs2 == NULL_TREE)
1156 return false;
1157 if (TREE_CODE (lhs1) == SSA_NAME && TREE_CODE (lhs2) == SSA_NAME)
1158 return vn_valueize (lhs1) == vn_valueize (lhs2);
1159 return operand_equal_p (lhs1, lhs2, 0);
1160
1161 case GIMPLE_ASSIGN:
1162 lhs1 = gimple_get_lhs (s1);
1163 lhs2 = gimple_get_lhs (s2);
1164 if (TREE_CODE (lhs1) != SSA_NAME
1165 && TREE_CODE (lhs2) != SSA_NAME)
1166 return (operand_equal_p (lhs1, lhs2, 0)
1167 && gimple_operand_equal_value_p (gimple_assign_rhs1 (s1),
1168 gimple_assign_rhs1 (s2)));
1169 else if (TREE_CODE (lhs1) == SSA_NAME
1170 && TREE_CODE (lhs2) == SSA_NAME)
1171 return operand_equal_p (gimple_assign_rhs1 (s1),
1172 gimple_assign_rhs1 (s2), 0);
1173 return false;
1174
1175 case GIMPLE_COND:
1176 t1 = gimple_cond_lhs (s1);
1177 t2 = gimple_cond_lhs (s2);
1178 if (!gimple_operand_equal_value_p (t1, t2))
1179 return false;
1180
1181 t1 = gimple_cond_rhs (s1);
1182 t2 = gimple_cond_rhs (s2);
1183 if (!gimple_operand_equal_value_p (t1, t2))
1184 return false;
1185
1186 code1 = gimple_expr_code (s1);
1187 code2 = gimple_expr_code (s2);
1188 inv_cond = (bitmap_bit_p (same_succ->inverse, bb1->index)
1189 != bitmap_bit_p (same_succ->inverse, bb2->index));
1190 if (inv_cond)
1191 {
1192 bool honor_nans = HONOR_NANS (t1);
1193 code2 = invert_tree_comparison (code2, honor_nans);
1194 }
1195 return code1 == code2;
1196
1197 default:
1198 return false;
1199 }
1200 }
1201
1202 /* Let GSI skip backwards over local defs. Return the earliest vuse in VUSE.
1203 Return true in VUSE_ESCAPED if the vuse influenced a SSA_OP_DEF of one of the
1204 processed statements. */
1205
1206 static void
1207 gsi_advance_bw_nondebug_nonlocal (gimple_stmt_iterator *gsi, tree *vuse,
1208 bool *vuse_escaped)
1209 {
1210 gimple stmt;
1211 tree lvuse;
1212
1213 while (true)
1214 {
1215 if (gsi_end_p (*gsi))
1216 return;
1217 stmt = gsi_stmt (*gsi);
1218
1219 lvuse = gimple_vuse (stmt);
1220 if (lvuse != NULL_TREE)
1221 {
1222 *vuse = lvuse;
1223 if (!ZERO_SSA_OPERANDS (stmt, SSA_OP_DEF))
1224 *vuse_escaped = true;
1225 }
1226
1227 if (!stmt_local_def (stmt))
1228 return;
1229 gsi_prev_nondebug (gsi);
1230 }
1231 }
1232
1233 /* Determines whether BB1 and BB2 (members of same_succ) are duplicates. If so,
1234 clusters them. */
1235
1236 static void
1237 find_duplicate (same_succ same_succ, basic_block bb1, basic_block bb2)
1238 {
1239 gimple_stmt_iterator gsi1 = gsi_last_nondebug_bb (bb1);
1240 gimple_stmt_iterator gsi2 = gsi_last_nondebug_bb (bb2);
1241 tree vuse1 = NULL_TREE, vuse2 = NULL_TREE;
1242 bool vuse_escaped = false;
1243
1244 gsi_advance_bw_nondebug_nonlocal (&gsi1, &vuse1, &vuse_escaped);
1245 gsi_advance_bw_nondebug_nonlocal (&gsi2, &vuse2, &vuse_escaped);
1246
1247 while (!gsi_end_p (gsi1) && !gsi_end_p (gsi2))
1248 {
1249 gimple stmt1 = gsi_stmt (gsi1);
1250 gimple stmt2 = gsi_stmt (gsi2);
1251
1252 /* What could be better than to this this here is to blacklist the bb
1253 containing the stmt, when encountering the stmt f.i. in
1254 same_succ_hash. */
1255 if (is_tm_ending (stmt1)
1256 || is_tm_ending (stmt2))
1257 return;
1258
1259 if (!gimple_equal_p (same_succ, stmt1, stmt2))
1260 return;
1261
1262 gsi_prev_nondebug (&gsi1);
1263 gsi_prev_nondebug (&gsi2);
1264 gsi_advance_bw_nondebug_nonlocal (&gsi1, &vuse1, &vuse_escaped);
1265 gsi_advance_bw_nondebug_nonlocal (&gsi2, &vuse2, &vuse_escaped);
1266 }
1267
1268 if (!(gsi_end_p (gsi1) && gsi_end_p (gsi2)))
1269 return;
1270
1271 /* If the incoming vuses are not the same, and the vuse escaped into an
1272 SSA_OP_DEF, then merging the 2 blocks will change the value of the def,
1273 which potentially means the semantics of one of the blocks will be changed.
1274 TODO: make this check more precise. */
1275 if (vuse_escaped && vuse1 != vuse2)
1276 return;
1277
1278 if (dump_file)
1279 fprintf (dump_file, "find_duplicates: <bb %d> duplicate of <bb %d>\n",
1280 bb1->index, bb2->index);
1281
1282 set_cluster (bb1, bb2);
1283 }
1284
1285 /* Returns whether for all phis in DEST the phi alternatives for E1 and
1286 E2 are equal. */
1287
1288 static bool
1289 same_phi_alternatives_1 (basic_block dest, edge e1, edge e2)
1290 {
1291 int n1 = e1->dest_idx, n2 = e2->dest_idx;
1292 gphi_iterator gsi;
1293
1294 for (gsi = gsi_start_phis (dest); !gsi_end_p (gsi); gsi_next (&gsi))
1295 {
1296 gphi *phi = gsi.phi ();
1297 tree lhs = gimple_phi_result (phi);
1298 tree val1 = gimple_phi_arg_def (phi, n1);
1299 tree val2 = gimple_phi_arg_def (phi, n2);
1300
1301 if (virtual_operand_p (lhs))
1302 continue;
1303
1304 if (operand_equal_for_phi_arg_p (val1, val2))
1305 continue;
1306 if (gvn_uses_equal (val1, val2))
1307 continue;
1308
1309 return false;
1310 }
1311
1312 return true;
1313 }
1314
1315 /* Returns whether for all successors of BB1 and BB2 (members of SAME_SUCC), the
1316 phi alternatives for BB1 and BB2 are equal. */
1317
1318 static bool
1319 same_phi_alternatives (same_succ same_succ, basic_block bb1, basic_block bb2)
1320 {
1321 unsigned int s;
1322 bitmap_iterator bs;
1323 edge e1, e2;
1324 basic_block succ;
1325
1326 EXECUTE_IF_SET_IN_BITMAP (same_succ->succs, 0, s, bs)
1327 {
1328 succ = BASIC_BLOCK_FOR_FN (cfun, s);
1329 e1 = find_edge (bb1, succ);
1330 e2 = find_edge (bb2, succ);
1331 if (e1->flags & EDGE_COMPLEX
1332 || e2->flags & EDGE_COMPLEX)
1333 return false;
1334
1335 /* For all phis in bb, the phi alternatives for e1 and e2 need to have
1336 the same value. */
1337 if (!same_phi_alternatives_1 (succ, e1, e2))
1338 return false;
1339 }
1340
1341 return true;
1342 }
1343
1344 /* Return true if BB has non-vop phis. */
1345
1346 static bool
1347 bb_has_non_vop_phi (basic_block bb)
1348 {
1349 gimple_seq phis = phi_nodes (bb);
1350 gimple phi;
1351
1352 if (phis == NULL)
1353 return false;
1354
1355 if (!gimple_seq_singleton_p (phis))
1356 return true;
1357
1358 phi = gimple_seq_first_stmt (phis);
1359 return !virtual_operand_p (gimple_phi_result (phi));
1360 }
1361
1362 /* Returns true if redirecting the incoming edges of FROM to TO maintains the
1363 invariant that uses in FROM are dominates by their defs. */
1364
1365 static bool
1366 deps_ok_for_redirect_from_bb_to_bb (basic_block from, basic_block to)
1367 {
1368 basic_block cd, dep_bb = BB_DEP_BB (to);
1369 edge_iterator ei;
1370 edge e;
1371 bitmap from_preds = BITMAP_ALLOC (NULL);
1372
1373 if (dep_bb == NULL)
1374 return true;
1375
1376 FOR_EACH_EDGE (e, ei, from->preds)
1377 bitmap_set_bit (from_preds, e->src->index);
1378 cd = nearest_common_dominator_for_set (CDI_DOMINATORS, from_preds);
1379 BITMAP_FREE (from_preds);
1380
1381 return dominated_by_p (CDI_DOMINATORS, dep_bb, cd);
1382 }
1383
1384 /* Returns true if replacing BB1 (or its replacement bb) by BB2 (or its
1385 replacement bb) and vice versa maintains the invariant that uses in the
1386 replacement are dominates by their defs. */
1387
1388 static bool
1389 deps_ok_for_redirect (basic_block bb1, basic_block bb2)
1390 {
1391 if (BB_CLUSTER (bb1) != NULL)
1392 bb1 = BB_CLUSTER (bb1)->rep_bb;
1393
1394 if (BB_CLUSTER (bb2) != NULL)
1395 bb2 = BB_CLUSTER (bb2)->rep_bb;
1396
1397 return (deps_ok_for_redirect_from_bb_to_bb (bb1, bb2)
1398 && deps_ok_for_redirect_from_bb_to_bb (bb2, bb1));
1399 }
1400
1401 /* Within SAME_SUCC->bbs, find clusters of bbs which can be merged. */
1402
1403 static void
1404 find_clusters_1 (same_succ same_succ)
1405 {
1406 basic_block bb1, bb2;
1407 unsigned int i, j;
1408 bitmap_iterator bi, bj;
1409 int nr_comparisons;
1410 int max_comparisons = PARAM_VALUE (PARAM_MAX_TAIL_MERGE_COMPARISONS);
1411
1412 EXECUTE_IF_SET_IN_BITMAP (same_succ->bbs, 0, i, bi)
1413 {
1414 bb1 = BASIC_BLOCK_FOR_FN (cfun, i);
1415
1416 /* TODO: handle blocks with phi-nodes. We'll have to find corresponding
1417 phi-nodes in bb1 and bb2, with the same alternatives for the same
1418 preds. */
1419 if (bb_has_non_vop_phi (bb1))
1420 continue;
1421
1422 nr_comparisons = 0;
1423 EXECUTE_IF_SET_IN_BITMAP (same_succ->bbs, i + 1, j, bj)
1424 {
1425 bb2 = BASIC_BLOCK_FOR_FN (cfun, j);
1426
1427 if (bb_has_non_vop_phi (bb2))
1428 continue;
1429
1430 if (BB_CLUSTER (bb1) != NULL && BB_CLUSTER (bb1) == BB_CLUSTER (bb2))
1431 continue;
1432
1433 /* Limit quadratic behaviour. */
1434 nr_comparisons++;
1435 if (nr_comparisons > max_comparisons)
1436 break;
1437
1438 /* This is a conservative dependency check. We could test more
1439 precise for allowed replacement direction. */
1440 if (!deps_ok_for_redirect (bb1, bb2))
1441 continue;
1442
1443 if (!(same_phi_alternatives (same_succ, bb1, bb2)))
1444 continue;
1445
1446 find_duplicate (same_succ, bb1, bb2);
1447 }
1448 }
1449 }
1450
1451 /* Find clusters of bbs which can be merged. */
1452
1453 static void
1454 find_clusters (void)
1455 {
1456 same_succ same;
1457
1458 while (!worklist.is_empty ())
1459 {
1460 same = worklist.pop ();
1461 same->in_worklist = false;
1462 if (dump_file && (dump_flags & TDF_DETAILS))
1463 {
1464 fprintf (dump_file, "processing worklist entry\n");
1465 same_succ_print (dump_file, same);
1466 }
1467 find_clusters_1 (same);
1468 }
1469 }
1470
1471 /* Returns the vop phi of BB, if any. */
1472
1473 static gphi *
1474 vop_phi (basic_block bb)
1475 {
1476 gphi *stmt;
1477 gphi_iterator gsi;
1478 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1479 {
1480 stmt = gsi.phi ();
1481 if (! virtual_operand_p (gimple_phi_result (stmt)))
1482 continue;
1483 return stmt;
1484 }
1485 return NULL;
1486 }
1487
1488 /* Redirect all edges from BB1 to BB2, removes BB1 and marks it as removed. */
1489
1490 static void
1491 replace_block_by (basic_block bb1, basic_block bb2)
1492 {
1493 edge pred_edge;
1494 edge e1, e2;
1495 edge_iterator ei;
1496 unsigned int i;
1497 gphi *bb2_phi;
1498
1499 bb2_phi = vop_phi (bb2);
1500
1501 /* Mark the basic block as deleted. */
1502 mark_basic_block_deleted (bb1);
1503
1504 /* Redirect the incoming edges of bb1 to bb2. */
1505 for (i = EDGE_COUNT (bb1->preds); i > 0 ; --i)
1506 {
1507 pred_edge = EDGE_PRED (bb1, i - 1);
1508 pred_edge = redirect_edge_and_branch (pred_edge, bb2);
1509 gcc_assert (pred_edge != NULL);
1510
1511 if (bb2_phi == NULL)
1512 continue;
1513
1514 /* The phi might have run out of capacity when the redirect added an
1515 argument, which means it could have been replaced. Refresh it. */
1516 bb2_phi = vop_phi (bb2);
1517
1518 add_phi_arg (bb2_phi, SSA_NAME_VAR (gimple_phi_result (bb2_phi)),
1519 pred_edge, UNKNOWN_LOCATION);
1520 }
1521
1522 bb2->frequency += bb1->frequency;
1523 if (bb2->frequency > BB_FREQ_MAX)
1524 bb2->frequency = BB_FREQ_MAX;
1525
1526 bb2->count += bb1->count;
1527
1528 /* Merge the outgoing edge counts from bb1 onto bb2. */
1529 gcov_type out_sum = 0;
1530 FOR_EACH_EDGE (e1, ei, bb1->succs)
1531 {
1532 e2 = find_edge (bb2, e1->dest);
1533 gcc_assert (e2);
1534 e2->count += e1->count;
1535 out_sum += e2->count;
1536 }
1537 /* Recompute the edge probabilities from the new merged edge count.
1538 Use the sum of the new merged edge counts computed above instead
1539 of bb2's merged count, in case there are profile count insanities
1540 making the bb count inconsistent with the edge weights. */
1541 FOR_EACH_EDGE (e2, ei, bb2->succs)
1542 {
1543 e2->probability = GCOV_COMPUTE_SCALE (e2->count, out_sum);
1544 }
1545
1546 /* Do updates that use bb1, before deleting bb1. */
1547 release_last_vdef (bb1);
1548 same_succ_flush_bb (bb1);
1549
1550 delete_basic_block (bb1);
1551 }
1552
1553 /* Bbs for which update_debug_stmt need to be called. */
1554
1555 static bitmap update_bbs;
1556
1557 /* For each cluster in all_clusters, merge all cluster->bbs. Returns
1558 number of bbs removed. */
1559
1560 static int
1561 apply_clusters (void)
1562 {
1563 basic_block bb1, bb2;
1564 bb_cluster c;
1565 unsigned int i, j;
1566 bitmap_iterator bj;
1567 int nr_bbs_removed = 0;
1568
1569 for (i = 0; i < all_clusters.length (); ++i)
1570 {
1571 c = all_clusters[i];
1572 if (c == NULL)
1573 continue;
1574
1575 bb2 = c->rep_bb;
1576 bitmap_set_bit (update_bbs, bb2->index);
1577
1578 bitmap_clear_bit (c->bbs, bb2->index);
1579 EXECUTE_IF_SET_IN_BITMAP (c->bbs, 0, j, bj)
1580 {
1581 bb1 = BASIC_BLOCK_FOR_FN (cfun, j);
1582 bitmap_clear_bit (update_bbs, bb1->index);
1583
1584 replace_block_by (bb1, bb2);
1585 nr_bbs_removed++;
1586 }
1587 }
1588
1589 return nr_bbs_removed;
1590 }
1591
1592 /* Resets debug statement STMT if it has uses that are not dominated by their
1593 defs. */
1594
1595 static void
1596 update_debug_stmt (gimple stmt)
1597 {
1598 use_operand_p use_p;
1599 ssa_op_iter oi;
1600 basic_block bbuse;
1601
1602 if (!gimple_debug_bind_p (stmt))
1603 return;
1604
1605 bbuse = gimple_bb (stmt);
1606 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, oi, SSA_OP_USE)
1607 {
1608 tree name = USE_FROM_PTR (use_p);
1609 gimple def_stmt = SSA_NAME_DEF_STMT (name);
1610 basic_block bbdef = gimple_bb (def_stmt);
1611 if (bbdef == NULL || bbuse == bbdef
1612 || dominated_by_p (CDI_DOMINATORS, bbuse, bbdef))
1613 continue;
1614
1615 gimple_debug_bind_reset_value (stmt);
1616 update_stmt (stmt);
1617 break;
1618 }
1619 }
1620
1621 /* Resets all debug statements that have uses that are not
1622 dominated by their defs. */
1623
1624 static void
1625 update_debug_stmts (void)
1626 {
1627 basic_block bb;
1628 bitmap_iterator bi;
1629 unsigned int i;
1630
1631 EXECUTE_IF_SET_IN_BITMAP (update_bbs, 0, i, bi)
1632 {
1633 gimple stmt;
1634 gimple_stmt_iterator gsi;
1635
1636 bb = BASIC_BLOCK_FOR_FN (cfun, i);
1637 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1638 {
1639 stmt = gsi_stmt (gsi);
1640 if (!is_gimple_debug (stmt))
1641 continue;
1642 update_debug_stmt (stmt);
1643 }
1644 }
1645 }
1646
1647 /* Runs tail merge optimization. */
1648
1649 unsigned int
1650 tail_merge_optimize (unsigned int todo)
1651 {
1652 int nr_bbs_removed_total = 0;
1653 int nr_bbs_removed;
1654 bool loop_entered = false;
1655 int iteration_nr = 0;
1656 int max_iterations = PARAM_VALUE (PARAM_MAX_TAIL_MERGE_ITERATIONS);
1657
1658 if (!flag_tree_tail_merge
1659 || max_iterations == 0)
1660 return 0;
1661
1662 timevar_push (TV_TREE_TAIL_MERGE);
1663
1664 if (!dom_info_available_p (CDI_DOMINATORS))
1665 {
1666 /* PRE can leave us with unreachable blocks, remove them now. */
1667 delete_unreachable_blocks ();
1668 calculate_dominance_info (CDI_DOMINATORS);
1669 }
1670 init_worklist ();
1671
1672 while (!worklist.is_empty ())
1673 {
1674 if (!loop_entered)
1675 {
1676 loop_entered = true;
1677 alloc_cluster_vectors ();
1678 update_bbs = BITMAP_ALLOC (NULL);
1679 }
1680 else
1681 reset_cluster_vectors ();
1682
1683 iteration_nr++;
1684 if (dump_file && (dump_flags & TDF_DETAILS))
1685 fprintf (dump_file, "worklist iteration #%d\n", iteration_nr);
1686
1687 find_clusters ();
1688 gcc_assert (worklist.is_empty ());
1689 if (all_clusters.is_empty ())
1690 break;
1691
1692 nr_bbs_removed = apply_clusters ();
1693 nr_bbs_removed_total += nr_bbs_removed;
1694 if (nr_bbs_removed == 0)
1695 break;
1696
1697 free_dominance_info (CDI_DOMINATORS);
1698
1699 if (iteration_nr == max_iterations)
1700 break;
1701
1702 calculate_dominance_info (CDI_DOMINATORS);
1703 update_worklist ();
1704 }
1705
1706 if (dump_file && (dump_flags & TDF_DETAILS))
1707 fprintf (dump_file, "htab collision / search: %f\n",
1708 same_succ_htab->collisions ());
1709
1710 if (nr_bbs_removed_total > 0)
1711 {
1712 if (MAY_HAVE_DEBUG_STMTS)
1713 {
1714 calculate_dominance_info (CDI_DOMINATORS);
1715 update_debug_stmts ();
1716 }
1717
1718 if (dump_file && (dump_flags & TDF_DETAILS))
1719 {
1720 fprintf (dump_file, "Before TODOs.\n");
1721 dump_function_to_file (current_function_decl, dump_file, dump_flags);
1722 }
1723
1724 mark_virtual_operands_for_renaming (cfun);
1725 }
1726
1727 delete_worklist ();
1728 if (loop_entered)
1729 {
1730 delete_cluster_vectors ();
1731 BITMAP_FREE (update_bbs);
1732 }
1733
1734 timevar_pop (TV_TREE_TAIL_MERGE);
1735
1736 return todo;
1737 }