lto-cgraph.c (output_profile_summary, [...]): Use gcov streaming; stream hot bb thres...
[gcc.git] / gcc / tree-ssa-tail-merge.c
1 /* Tail merging for gimple.
2 Copyright (C) 2011-2013 Free Software Foundation, Inc.
3 Contributed by Tom de Vries (tom@codesourcery.com)
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 /* Pass overview.
22
23
24 MOTIVATIONAL EXAMPLE
25
26 gimple representation of gcc/testsuite/gcc.dg/pr43864.c at
27
28 hprofStartupp (charD.1 * outputFileNameD.2600, charD.1 * ctxD.2601)
29 {
30 struct FILED.1638 * fpD.2605;
31 charD.1 fileNameD.2604[1000];
32 intD.0 D.3915;
33 const charD.1 * restrict outputFileName.0D.3914;
34
35 # BLOCK 2 freq:10000
36 # PRED: ENTRY [100.0%] (fallthru,exec)
37 # PT = nonlocal { D.3926 } (restr)
38 outputFileName.0D.3914_3
39 = (const charD.1 * restrict) outputFileNameD.2600_2(D);
40 # .MEMD.3923_13 = VDEF <.MEMD.3923_12(D)>
41 # USE = nonlocal null { fileNameD.2604 D.3926 } (restr)
42 # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr)
43 sprintfD.759 (&fileNameD.2604, outputFileName.0D.3914_3);
44 # .MEMD.3923_14 = VDEF <.MEMD.3923_13>
45 # USE = nonlocal null { fileNameD.2604 D.3926 } (restr)
46 # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr)
47 D.3915_4 = accessD.2606 (&fileNameD.2604, 1);
48 if (D.3915_4 == 0)
49 goto <bb 3>;
50 else
51 goto <bb 4>;
52 # SUCC: 3 [10.0%] (true,exec) 4 [90.0%] (false,exec)
53
54 # BLOCK 3 freq:1000
55 # PRED: 2 [10.0%] (true,exec)
56 # .MEMD.3923_15 = VDEF <.MEMD.3923_14>
57 # USE = nonlocal null { fileNameD.2604 D.3926 } (restr)
58 # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr)
59 freeD.898 (ctxD.2601_5(D));
60 goto <bb 7>;
61 # SUCC: 7 [100.0%] (fallthru,exec)
62
63 # BLOCK 4 freq:9000
64 # PRED: 2 [90.0%] (false,exec)
65 # .MEMD.3923_16 = VDEF <.MEMD.3923_14>
66 # PT = nonlocal escaped
67 # USE = nonlocal null { fileNameD.2604 D.3926 } (restr)
68 # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr)
69 fpD.2605_8 = fopenD.1805 (&fileNameD.2604[0], 0B);
70 if (fpD.2605_8 == 0B)
71 goto <bb 5>;
72 else
73 goto <bb 6>;
74 # SUCC: 5 [1.9%] (true,exec) 6 [98.1%] (false,exec)
75
76 # BLOCK 5 freq:173
77 # PRED: 4 [1.9%] (true,exec)
78 # .MEMD.3923_17 = VDEF <.MEMD.3923_16>
79 # USE = nonlocal null { fileNameD.2604 D.3926 } (restr)
80 # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr)
81 freeD.898 (ctxD.2601_5(D));
82 goto <bb 7>;
83 # SUCC: 7 [100.0%] (fallthru,exec)
84
85 # BLOCK 6 freq:8827
86 # PRED: 4 [98.1%] (false,exec)
87 # .MEMD.3923_18 = VDEF <.MEMD.3923_16>
88 # USE = nonlocal null { fileNameD.2604 D.3926 } (restr)
89 # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr)
90 fooD.2599 (outputFileNameD.2600_2(D), fpD.2605_8);
91 # SUCC: 7 [100.0%] (fallthru,exec)
92
93 # BLOCK 7 freq:10000
94 # PRED: 3 [100.0%] (fallthru,exec) 5 [100.0%] (fallthru,exec)
95 6 [100.0%] (fallthru,exec)
96 # PT = nonlocal null
97
98 # ctxD.2601_1 = PHI <0B(3), 0B(5), ctxD.2601_5(D)(6)>
99 # .MEMD.3923_11 = PHI <.MEMD.3923_15(3), .MEMD.3923_17(5),
100 .MEMD.3923_18(6)>
101 # VUSE <.MEMD.3923_11>
102 return ctxD.2601_1;
103 # SUCC: EXIT [100.0%]
104 }
105
106 bb 3 and bb 5 can be merged. The blocks have different predecessors, but the
107 same successors, and the same operations.
108
109
110 CONTEXT
111
112 A technique called tail merging (or cross jumping) can fix the example
113 above. For a block, we look for common code at the end (the tail) of the
114 predecessor blocks, and insert jumps from one block to the other.
115 The example is a special case for tail merging, in that 2 whole blocks
116 can be merged, rather than just the end parts of it.
117 We currently only focus on whole block merging, so in that sense
118 calling this pass tail merge is a bit of a misnomer.
119
120 We distinguish 2 kinds of situations in which blocks can be merged:
121 - same operations, same predecessors. The successor edges coming from one
122 block are redirected to come from the other block.
123 - same operations, same successors. The predecessor edges entering one block
124 are redirected to enter the other block. Note that this operation might
125 involve introducing phi operations.
126
127 For efficient implementation, we would like to value numbers the blocks, and
128 have a comparison operator that tells us whether the blocks are equal.
129 Besides being runtime efficient, block value numbering should also abstract
130 from irrelevant differences in order of operations, much like normal value
131 numbering abstracts from irrelevant order of operations.
132
133 For the first situation (same_operations, same predecessors), normal value
134 numbering fits well. We can calculate a block value number based on the
135 value numbers of the defs and vdefs.
136
137 For the second situation (same operations, same successors), this approach
138 doesn't work so well. We can illustrate this using the example. The calls
139 to free use different vdefs: MEMD.3923_16 and MEMD.3923_14, and these will
140 remain different in value numbering, since they represent different memory
141 states. So the resulting vdefs of the frees will be different in value
142 numbering, so the block value numbers will be different.
143
144 The reason why we call the blocks equal is not because they define the same
145 values, but because uses in the blocks use (possibly different) defs in the
146 same way. To be able to detect this efficiently, we need to do some kind of
147 reverse value numbering, meaning number the uses rather than the defs, and
148 calculate a block value number based on the value number of the uses.
149 Ideally, a block comparison operator will also indicate which phis are needed
150 to merge the blocks.
151
152 For the moment, we don't do block value numbering, but we do insn-by-insn
153 matching, using scc value numbers to match operations with results, and
154 structural comparison otherwise, while ignoring vop mismatches.
155
156
157 IMPLEMENTATION
158
159 1. The pass first determines all groups of blocks with the same successor
160 blocks.
161 2. Within each group, it tries to determine clusters of equal basic blocks.
162 3. The clusters are applied.
163 4. The same successor groups are updated.
164 5. This process is repeated from 2 onwards, until no more changes.
165
166
167 LIMITATIONS/TODO
168
169 - block only
170 - handles only 'same operations, same successors'.
171 It handles same predecessors as a special subcase though.
172 - does not implement the reverse value numbering and block value numbering.
173 - improve memory allocation: use garbage collected memory, obstacks,
174 allocpools where appropriate.
175 - no insertion of gimple_reg phis, We only introduce vop-phis.
176 - handle blocks with gimple_reg phi_nodes.
177
178
179 SWITCHES
180
181 - ftree-tail-merge. On at -O2. We may have to enable it only at -Os. */
182
183 #include "config.h"
184 #include "system.h"
185 #include "coretypes.h"
186 #include "tm.h"
187 #include "tree.h"
188 #include "tm_p.h"
189 #include "basic-block.h"
190 #include "flags.h"
191 #include "function.h"
192 #include "tree-flow.h"
193 #include "bitmap.h"
194 #include "tree-ssa-alias.h"
195 #include "params.h"
196 #include "hash-table.h"
197 #include "gimple-pretty-print.h"
198 #include "tree-ssa-sccvn.h"
199 #include "tree-dump.h"
200
201 /* ??? This currently runs as part of tree-ssa-pre. Why is this not
202 a stand-alone GIMPLE pass? */
203 #include "tree-pass.h"
204
205 /* Describes a group of bbs with the same successors. The successor bbs are
206 cached in succs, and the successor edge flags are cached in succ_flags.
207 If a bb has the EDGE_TRUE/VALSE_VALUE flags swapped compared to succ_flags,
208 it's marked in inverse.
209 Additionally, the hash value for the struct is cached in hashval, and
210 in_worklist indicates whether it's currently part of worklist. */
211
212 struct same_succ_def
213 {
214 /* The bbs that have the same successor bbs. */
215 bitmap bbs;
216 /* The successor bbs. */
217 bitmap succs;
218 /* Indicates whether the EDGE_TRUE/FALSE_VALUEs of succ_flags are swapped for
219 bb. */
220 bitmap inverse;
221 /* The edge flags for each of the successor bbs. */
222 vec<int> succ_flags;
223 /* Indicates whether the struct is currently in the worklist. */
224 bool in_worklist;
225 /* The hash value of the struct. */
226 hashval_t hashval;
227
228 /* hash_table support. */
229 typedef same_succ_def value_type;
230 typedef same_succ_def compare_type;
231 static inline hashval_t hash (const value_type *);
232 static int equal (const value_type *, const compare_type *);
233 static void remove (value_type *);
234 };
235 typedef struct same_succ_def *same_succ;
236 typedef const struct same_succ_def *const_same_succ;
237
238 /* hash routine for hash_table support, returns hashval of E. */
239
240 inline hashval_t
241 same_succ_def::hash (const value_type *e)
242 {
243 return e->hashval;
244 }
245
246 /* A group of bbs where 1 bb from bbs can replace the other bbs. */
247
248 struct bb_cluster_def
249 {
250 /* The bbs in the cluster. */
251 bitmap bbs;
252 /* The preds of the bbs in the cluster. */
253 bitmap preds;
254 /* Index in all_clusters vector. */
255 int index;
256 /* The bb to replace the cluster with. */
257 basic_block rep_bb;
258 };
259 typedef struct bb_cluster_def *bb_cluster;
260 typedef const struct bb_cluster_def *const_bb_cluster;
261
262 /* Per bb-info. */
263
264 struct aux_bb_info
265 {
266 /* The number of non-debug statements in the bb. */
267 int size;
268 /* The same_succ that this bb is a member of. */
269 same_succ bb_same_succ;
270 /* The cluster that this bb is a member of. */
271 bb_cluster cluster;
272 /* The vop state at the exit of a bb. This is shortlived data, used to
273 communicate data between update_block_by and update_vuses. */
274 tree vop_at_exit;
275 /* The bb that either contains or is dominated by the dependencies of the
276 bb. */
277 basic_block dep_bb;
278 };
279
280 /* Macros to access the fields of struct aux_bb_info. */
281
282 #define BB_SIZE(bb) (((struct aux_bb_info *)bb->aux)->size)
283 #define BB_SAME_SUCC(bb) (((struct aux_bb_info *)bb->aux)->bb_same_succ)
284 #define BB_CLUSTER(bb) (((struct aux_bb_info *)bb->aux)->cluster)
285 #define BB_VOP_AT_EXIT(bb) (((struct aux_bb_info *)bb->aux)->vop_at_exit)
286 #define BB_DEP_BB(bb) (((struct aux_bb_info *)bb->aux)->dep_bb)
287
288 /* Returns true if the only effect a statement STMT has, is to define locally
289 used SSA_NAMEs. */
290
291 static bool
292 stmt_local_def (gimple stmt)
293 {
294 basic_block bb, def_bb;
295 imm_use_iterator iter;
296 use_operand_p use_p;
297 tree val;
298 def_operand_p def_p;
299
300 if (gimple_has_side_effects (stmt))
301 return false;
302
303 def_p = SINGLE_SSA_DEF_OPERAND (stmt, SSA_OP_DEF);
304 if (def_p == NULL)
305 return false;
306
307 val = DEF_FROM_PTR (def_p);
308 if (val == NULL_TREE || TREE_CODE (val) != SSA_NAME)
309 return false;
310
311 def_bb = gimple_bb (stmt);
312
313 FOR_EACH_IMM_USE_FAST (use_p, iter, val)
314 {
315 if (is_gimple_debug (USE_STMT (use_p)))
316 continue;
317 bb = gimple_bb (USE_STMT (use_p));
318 if (bb == def_bb)
319 continue;
320
321 if (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI
322 && EDGE_PRED (bb, PHI_ARG_INDEX_FROM_USE (use_p))->src == def_bb)
323 continue;
324
325 return false;
326 }
327
328 return true;
329 }
330
331 /* Let GSI skip forwards over local defs. */
332
333 static void
334 gsi_advance_fw_nondebug_nonlocal (gimple_stmt_iterator *gsi)
335 {
336 gimple stmt;
337
338 while (true)
339 {
340 if (gsi_end_p (*gsi))
341 return;
342 stmt = gsi_stmt (*gsi);
343 if (!stmt_local_def (stmt))
344 return;
345 gsi_next_nondebug (gsi);
346 }
347 }
348
349 /* VAL1 and VAL2 are either:
350 - uses in BB1 and BB2, or
351 - phi alternatives for BB1 and BB2.
352 Return true if the uses have the same gvn value. */
353
354 static bool
355 gvn_uses_equal (tree val1, tree val2)
356 {
357 gcc_checking_assert (val1 != NULL_TREE && val2 != NULL_TREE);
358
359 if (val1 == val2)
360 return true;
361
362 if (vn_valueize (val1) != vn_valueize (val2))
363 return false;
364
365 return ((TREE_CODE (val1) == SSA_NAME || CONSTANT_CLASS_P (val1))
366 && (TREE_CODE (val2) == SSA_NAME || CONSTANT_CLASS_P (val2)));
367 }
368
369 /* Prints E to FILE. */
370
371 static void
372 same_succ_print (FILE *file, const same_succ e)
373 {
374 unsigned int i;
375 bitmap_print (file, e->bbs, "bbs:", "\n");
376 bitmap_print (file, e->succs, "succs:", "\n");
377 bitmap_print (file, e->inverse, "inverse:", "\n");
378 fprintf (file, "flags:");
379 for (i = 0; i < e->succ_flags.length (); ++i)
380 fprintf (file, " %x", e->succ_flags[i]);
381 fprintf (file, "\n");
382 }
383
384 /* Prints same_succ VE to VFILE. */
385
386 inline int
387 ssa_same_succ_print_traverse (same_succ *pe, FILE *file)
388 {
389 const same_succ e = *pe;
390 same_succ_print (file, e);
391 return 1;
392 }
393
394 /* Update BB_DEP_BB (USE_BB), given a use of VAL in USE_BB. */
395
396 static void
397 update_dep_bb (basic_block use_bb, tree val)
398 {
399 basic_block dep_bb;
400
401 /* Not a dep. */
402 if (TREE_CODE (val) != SSA_NAME)
403 return;
404
405 /* Skip use of global def. */
406 if (SSA_NAME_IS_DEFAULT_DEF (val))
407 return;
408
409 /* Skip use of local def. */
410 dep_bb = gimple_bb (SSA_NAME_DEF_STMT (val));
411 if (dep_bb == use_bb)
412 return;
413
414 if (BB_DEP_BB (use_bb) == NULL
415 || dominated_by_p (CDI_DOMINATORS, dep_bb, BB_DEP_BB (use_bb)))
416 BB_DEP_BB (use_bb) = dep_bb;
417 }
418
419 /* Update BB_DEP_BB, given the dependencies in STMT. */
420
421 static void
422 stmt_update_dep_bb (gimple stmt)
423 {
424 ssa_op_iter iter;
425 use_operand_p use;
426
427 FOR_EACH_SSA_USE_OPERAND (use, stmt, iter, SSA_OP_USE)
428 update_dep_bb (gimple_bb (stmt), USE_FROM_PTR (use));
429 }
430
431 /* Calculates hash value for same_succ VE. */
432
433 static hashval_t
434 same_succ_hash (const_same_succ e)
435 {
436 hashval_t hashval = bitmap_hash (e->succs);
437 int flags;
438 unsigned int i;
439 unsigned int first = bitmap_first_set_bit (e->bbs);
440 basic_block bb = BASIC_BLOCK (first);
441 int size = 0;
442 gimple_stmt_iterator gsi;
443 gimple stmt;
444 tree arg;
445 unsigned int s;
446 bitmap_iterator bs;
447
448 for (gsi = gsi_start_nondebug_bb (bb);
449 !gsi_end_p (gsi); gsi_next_nondebug (&gsi))
450 {
451 stmt = gsi_stmt (gsi);
452 stmt_update_dep_bb (stmt);
453 if (stmt_local_def (stmt))
454 continue;
455 size++;
456
457 hashval = iterative_hash_hashval_t (gimple_code (stmt), hashval);
458 if (is_gimple_assign (stmt))
459 hashval = iterative_hash_hashval_t (gimple_assign_rhs_code (stmt),
460 hashval);
461 if (!is_gimple_call (stmt))
462 continue;
463 if (gimple_call_internal_p (stmt))
464 hashval = iterative_hash_hashval_t
465 ((hashval_t) gimple_call_internal_fn (stmt), hashval);
466 else
467 hashval = iterative_hash_expr (gimple_call_fn (stmt), hashval);
468 for (i = 0; i < gimple_call_num_args (stmt); i++)
469 {
470 arg = gimple_call_arg (stmt, i);
471 arg = vn_valueize (arg);
472 hashval = iterative_hash_expr (arg, hashval);
473 }
474 }
475
476 hashval = iterative_hash_hashval_t (size, hashval);
477 BB_SIZE (bb) = size;
478
479 for (i = 0; i < e->succ_flags.length (); ++i)
480 {
481 flags = e->succ_flags[i];
482 flags = flags & ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE);
483 hashval = iterative_hash_hashval_t (flags, hashval);
484 }
485
486 EXECUTE_IF_SET_IN_BITMAP (e->succs, 0, s, bs)
487 {
488 int n = find_edge (bb, BASIC_BLOCK (s))->dest_idx;
489 for (gsi = gsi_start_phis (BASIC_BLOCK (s)); !gsi_end_p (gsi);
490 gsi_next (&gsi))
491 {
492 gimple phi = gsi_stmt (gsi);
493 tree lhs = gimple_phi_result (phi);
494 tree val = gimple_phi_arg_def (phi, n);
495
496 if (virtual_operand_p (lhs))
497 continue;
498 update_dep_bb (bb, val);
499 }
500 }
501
502 return hashval;
503 }
504
505 /* Returns true if E1 and E2 have 2 successors, and if the successor flags
506 are inverse for the EDGE_TRUE_VALUE and EDGE_FALSE_VALUE flags, and equal for
507 the other edge flags. */
508
509 static bool
510 inverse_flags (const_same_succ e1, const_same_succ e2)
511 {
512 int f1a, f1b, f2a, f2b;
513 int mask = ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE);
514
515 if (e1->succ_flags.length () != 2)
516 return false;
517
518 f1a = e1->succ_flags[0];
519 f1b = e1->succ_flags[1];
520 f2a = e2->succ_flags[0];
521 f2b = e2->succ_flags[1];
522
523 if (f1a == f2a && f1b == f2b)
524 return false;
525
526 return (f1a & mask) == (f2a & mask) && (f1b & mask) == (f2b & mask);
527 }
528
529 /* Compares SAME_SUCCs E1 and E2. */
530
531 int
532 same_succ_def::equal (const value_type *e1, const compare_type *e2)
533 {
534 unsigned int i, first1, first2;
535 gimple_stmt_iterator gsi1, gsi2;
536 gimple s1, s2;
537 basic_block bb1, bb2;
538
539 if (e1->hashval != e2->hashval)
540 return 0;
541
542 if (e1->succ_flags.length () != e2->succ_flags.length ())
543 return 0;
544
545 if (!bitmap_equal_p (e1->succs, e2->succs))
546 return 0;
547
548 if (!inverse_flags (e1, e2))
549 {
550 for (i = 0; i < e1->succ_flags.length (); ++i)
551 if (e1->succ_flags[i] != e1->succ_flags[i])
552 return 0;
553 }
554
555 first1 = bitmap_first_set_bit (e1->bbs);
556 first2 = bitmap_first_set_bit (e2->bbs);
557
558 bb1 = BASIC_BLOCK (first1);
559 bb2 = BASIC_BLOCK (first2);
560
561 if (BB_SIZE (bb1) != BB_SIZE (bb2))
562 return 0;
563
564 gsi1 = gsi_start_nondebug_bb (bb1);
565 gsi2 = gsi_start_nondebug_bb (bb2);
566 gsi_advance_fw_nondebug_nonlocal (&gsi1);
567 gsi_advance_fw_nondebug_nonlocal (&gsi2);
568 while (!(gsi_end_p (gsi1) || gsi_end_p (gsi2)))
569 {
570 s1 = gsi_stmt (gsi1);
571 s2 = gsi_stmt (gsi2);
572 if (gimple_code (s1) != gimple_code (s2))
573 return 0;
574 if (is_gimple_call (s1) && !gimple_call_same_target_p (s1, s2))
575 return 0;
576 gsi_next_nondebug (&gsi1);
577 gsi_next_nondebug (&gsi2);
578 gsi_advance_fw_nondebug_nonlocal (&gsi1);
579 gsi_advance_fw_nondebug_nonlocal (&gsi2);
580 }
581
582 return 1;
583 }
584
585 /* Alloc and init a new SAME_SUCC. */
586
587 static same_succ
588 same_succ_alloc (void)
589 {
590 same_succ same = XNEW (struct same_succ_def);
591
592 same->bbs = BITMAP_ALLOC (NULL);
593 same->succs = BITMAP_ALLOC (NULL);
594 same->inverse = BITMAP_ALLOC (NULL);
595 same->succ_flags.create (10);
596 same->in_worklist = false;
597
598 return same;
599 }
600
601 /* Delete same_succ E. */
602
603 void
604 same_succ_def::remove (same_succ e)
605 {
606 BITMAP_FREE (e->bbs);
607 BITMAP_FREE (e->succs);
608 BITMAP_FREE (e->inverse);
609 e->succ_flags.release ();
610
611 XDELETE (e);
612 }
613
614 /* Reset same_succ SAME. */
615
616 static void
617 same_succ_reset (same_succ same)
618 {
619 bitmap_clear (same->bbs);
620 bitmap_clear (same->succs);
621 bitmap_clear (same->inverse);
622 same->succ_flags.truncate (0);
623 }
624
625 static hash_table <same_succ_def> same_succ_htab;
626
627 /* Array that is used to store the edge flags for a successor. */
628
629 static int *same_succ_edge_flags;
630
631 /* Bitmap that is used to mark bbs that are recently deleted. */
632
633 static bitmap deleted_bbs;
634
635 /* Bitmap that is used to mark predecessors of bbs that are
636 deleted. */
637
638 static bitmap deleted_bb_preds;
639
640 /* Prints same_succ_htab to stderr. */
641
642 extern void debug_same_succ (void);
643 DEBUG_FUNCTION void
644 debug_same_succ ( void)
645 {
646 same_succ_htab.traverse <FILE *, ssa_same_succ_print_traverse> (stderr);
647 }
648
649
650 /* Vector of bbs to process. */
651
652 static vec<same_succ> worklist;
653
654 /* Prints worklist to FILE. */
655
656 static void
657 print_worklist (FILE *file)
658 {
659 unsigned int i;
660 for (i = 0; i < worklist.length (); ++i)
661 same_succ_print (file, worklist[i]);
662 }
663
664 /* Adds SAME to worklist. */
665
666 static void
667 add_to_worklist (same_succ same)
668 {
669 if (same->in_worklist)
670 return;
671
672 if (bitmap_count_bits (same->bbs) < 2)
673 return;
674
675 same->in_worklist = true;
676 worklist.safe_push (same);
677 }
678
679 /* Add BB to same_succ_htab. */
680
681 static void
682 find_same_succ_bb (basic_block bb, same_succ *same_p)
683 {
684 unsigned int j;
685 bitmap_iterator bj;
686 same_succ same = *same_p;
687 same_succ *slot;
688 edge_iterator ei;
689 edge e;
690
691 if (bb == NULL)
692 return;
693 bitmap_set_bit (same->bbs, bb->index);
694 FOR_EACH_EDGE (e, ei, bb->succs)
695 {
696 int index = e->dest->index;
697 bitmap_set_bit (same->succs, index);
698 same_succ_edge_flags[index] = e->flags;
699 }
700 EXECUTE_IF_SET_IN_BITMAP (same->succs, 0, j, bj)
701 same->succ_flags.safe_push (same_succ_edge_flags[j]);
702
703 same->hashval = same_succ_hash (same);
704
705 slot = same_succ_htab.find_slot_with_hash (same, same->hashval, INSERT);
706 if (*slot == NULL)
707 {
708 *slot = same;
709 BB_SAME_SUCC (bb) = same;
710 add_to_worklist (same);
711 *same_p = NULL;
712 }
713 else
714 {
715 bitmap_set_bit ((*slot)->bbs, bb->index);
716 BB_SAME_SUCC (bb) = *slot;
717 add_to_worklist (*slot);
718 if (inverse_flags (same, *slot))
719 bitmap_set_bit ((*slot)->inverse, bb->index);
720 same_succ_reset (same);
721 }
722 }
723
724 /* Find bbs with same successors. */
725
726 static void
727 find_same_succ (void)
728 {
729 same_succ same = same_succ_alloc ();
730 basic_block bb;
731
732 FOR_EACH_BB (bb)
733 {
734 find_same_succ_bb (bb, &same);
735 if (same == NULL)
736 same = same_succ_alloc ();
737 }
738
739 same_succ_def::remove (same);
740 }
741
742 /* Initializes worklist administration. */
743
744 static void
745 init_worklist (void)
746 {
747 alloc_aux_for_blocks (sizeof (struct aux_bb_info));
748 same_succ_htab.create (n_basic_blocks);
749 same_succ_edge_flags = XCNEWVEC (int, last_basic_block);
750 deleted_bbs = BITMAP_ALLOC (NULL);
751 deleted_bb_preds = BITMAP_ALLOC (NULL);
752 worklist.create (n_basic_blocks);
753 find_same_succ ();
754
755 if (dump_file && (dump_flags & TDF_DETAILS))
756 {
757 fprintf (dump_file, "initial worklist:\n");
758 print_worklist (dump_file);
759 }
760 }
761
762 /* Deletes worklist administration. */
763
764 static void
765 delete_worklist (void)
766 {
767 free_aux_for_blocks ();
768 same_succ_htab.dispose ();
769 XDELETEVEC (same_succ_edge_flags);
770 same_succ_edge_flags = NULL;
771 BITMAP_FREE (deleted_bbs);
772 BITMAP_FREE (deleted_bb_preds);
773 worklist.release ();
774 }
775
776 /* Mark BB as deleted, and mark its predecessors. */
777
778 static void
779 mark_basic_block_deleted (basic_block bb)
780 {
781 edge e;
782 edge_iterator ei;
783
784 bitmap_set_bit (deleted_bbs, bb->index);
785
786 FOR_EACH_EDGE (e, ei, bb->preds)
787 bitmap_set_bit (deleted_bb_preds, e->src->index);
788 }
789
790 /* Removes BB from its corresponding same_succ. */
791
792 static void
793 same_succ_flush_bb (basic_block bb)
794 {
795 same_succ same = BB_SAME_SUCC (bb);
796 BB_SAME_SUCC (bb) = NULL;
797 if (bitmap_single_bit_set_p (same->bbs))
798 same_succ_htab.remove_elt_with_hash (same, same->hashval);
799 else
800 bitmap_clear_bit (same->bbs, bb->index);
801 }
802
803 /* Removes all bbs in BBS from their corresponding same_succ. */
804
805 static void
806 same_succ_flush_bbs (bitmap bbs)
807 {
808 unsigned int i;
809 bitmap_iterator bi;
810
811 EXECUTE_IF_SET_IN_BITMAP (bbs, 0, i, bi)
812 same_succ_flush_bb (BASIC_BLOCK (i));
813 }
814
815 /* Release the last vdef in BB, either normal or phi result. */
816
817 static void
818 release_last_vdef (basic_block bb)
819 {
820 gimple_stmt_iterator i;
821
822 for (i = gsi_last_bb (bb); !gsi_end_p (i); gsi_prev_nondebug (&i))
823 {
824 gimple stmt = gsi_stmt (i);
825 if (gimple_vdef (stmt) == NULL_TREE)
826 continue;
827
828 mark_virtual_operand_for_renaming (gimple_vdef (stmt));
829 return;
830 }
831
832 for (i = gsi_start_phis (bb); !gsi_end_p (i); gsi_next (&i))
833 {
834 gimple phi = gsi_stmt (i);
835 tree res = gimple_phi_result (phi);
836
837 if (!virtual_operand_p (res))
838 continue;
839
840 mark_virtual_phi_result_for_renaming (phi);
841 return;
842 }
843
844 }
845
846 /* For deleted_bb_preds, find bbs with same successors. */
847
848 static void
849 update_worklist (void)
850 {
851 unsigned int i;
852 bitmap_iterator bi;
853 basic_block bb;
854 same_succ same;
855
856 bitmap_and_compl_into (deleted_bb_preds, deleted_bbs);
857 bitmap_clear (deleted_bbs);
858
859 bitmap_clear_bit (deleted_bb_preds, ENTRY_BLOCK);
860 same_succ_flush_bbs (deleted_bb_preds);
861
862 same = same_succ_alloc ();
863 EXECUTE_IF_SET_IN_BITMAP (deleted_bb_preds, 0, i, bi)
864 {
865 bb = BASIC_BLOCK (i);
866 gcc_assert (bb != NULL);
867 find_same_succ_bb (bb, &same);
868 if (same == NULL)
869 same = same_succ_alloc ();
870 }
871 same_succ_def::remove (same);
872 bitmap_clear (deleted_bb_preds);
873 }
874
875 /* Prints cluster C to FILE. */
876
877 static void
878 print_cluster (FILE *file, bb_cluster c)
879 {
880 if (c == NULL)
881 return;
882 bitmap_print (file, c->bbs, "bbs:", "\n");
883 bitmap_print (file, c->preds, "preds:", "\n");
884 }
885
886 /* Prints cluster C to stderr. */
887
888 extern void debug_cluster (bb_cluster);
889 DEBUG_FUNCTION void
890 debug_cluster (bb_cluster c)
891 {
892 print_cluster (stderr, c);
893 }
894
895 /* Update C->rep_bb, given that BB is added to the cluster. */
896
897 static void
898 update_rep_bb (bb_cluster c, basic_block bb)
899 {
900 /* Initial. */
901 if (c->rep_bb == NULL)
902 {
903 c->rep_bb = bb;
904 return;
905 }
906
907 /* Current needs no deps, keep it. */
908 if (BB_DEP_BB (c->rep_bb) == NULL)
909 return;
910
911 /* Bb needs no deps, change rep_bb. */
912 if (BB_DEP_BB (bb) == NULL)
913 {
914 c->rep_bb = bb;
915 return;
916 }
917
918 /* Bb needs last deps earlier than current, change rep_bb. A potential
919 problem with this, is that the first deps might also be earlier, which
920 would mean we prefer longer lifetimes for the deps. To be able to check
921 for this, we would have to trace BB_FIRST_DEP_BB as well, besides
922 BB_DEP_BB, which is really BB_LAST_DEP_BB.
923 The benefit of choosing the bb with last deps earlier, is that it can
924 potentially be used as replacement for more bbs. */
925 if (dominated_by_p (CDI_DOMINATORS, BB_DEP_BB (c->rep_bb), BB_DEP_BB (bb)))
926 c->rep_bb = bb;
927 }
928
929 /* Add BB to cluster C. Sets BB in C->bbs, and preds of BB in C->preds. */
930
931 static void
932 add_bb_to_cluster (bb_cluster c, basic_block bb)
933 {
934 edge e;
935 edge_iterator ei;
936
937 bitmap_set_bit (c->bbs, bb->index);
938
939 FOR_EACH_EDGE (e, ei, bb->preds)
940 bitmap_set_bit (c->preds, e->src->index);
941
942 update_rep_bb (c, bb);
943 }
944
945 /* Allocate and init new cluster. */
946
947 static bb_cluster
948 new_cluster (void)
949 {
950 bb_cluster c;
951 c = XCNEW (struct bb_cluster_def);
952 c->bbs = BITMAP_ALLOC (NULL);
953 c->preds = BITMAP_ALLOC (NULL);
954 c->rep_bb = NULL;
955 return c;
956 }
957
958 /* Delete clusters. */
959
960 static void
961 delete_cluster (bb_cluster c)
962 {
963 if (c == NULL)
964 return;
965 BITMAP_FREE (c->bbs);
966 BITMAP_FREE (c->preds);
967 XDELETE (c);
968 }
969
970
971 /* Array that contains all clusters. */
972
973 static vec<bb_cluster> all_clusters;
974
975 /* Allocate all cluster vectors. */
976
977 static void
978 alloc_cluster_vectors (void)
979 {
980 all_clusters.create (n_basic_blocks);
981 }
982
983 /* Reset all cluster vectors. */
984
985 static void
986 reset_cluster_vectors (void)
987 {
988 unsigned int i;
989 basic_block bb;
990 for (i = 0; i < all_clusters.length (); ++i)
991 delete_cluster (all_clusters[i]);
992 all_clusters.truncate (0);
993 FOR_EACH_BB (bb)
994 BB_CLUSTER (bb) = NULL;
995 }
996
997 /* Delete all cluster vectors. */
998
999 static void
1000 delete_cluster_vectors (void)
1001 {
1002 unsigned int i;
1003 for (i = 0; i < all_clusters.length (); ++i)
1004 delete_cluster (all_clusters[i]);
1005 all_clusters.release ();
1006 }
1007
1008 /* Merge cluster C2 into C1. */
1009
1010 static void
1011 merge_clusters (bb_cluster c1, bb_cluster c2)
1012 {
1013 bitmap_ior_into (c1->bbs, c2->bbs);
1014 bitmap_ior_into (c1->preds, c2->preds);
1015 }
1016
1017 /* Register equivalence of BB1 and BB2 (members of cluster C). Store c in
1018 all_clusters, or merge c with existing cluster. */
1019
1020 static void
1021 set_cluster (basic_block bb1, basic_block bb2)
1022 {
1023 basic_block merge_bb, other_bb;
1024 bb_cluster merge, old, c;
1025
1026 if (BB_CLUSTER (bb1) == NULL && BB_CLUSTER (bb2) == NULL)
1027 {
1028 c = new_cluster ();
1029 add_bb_to_cluster (c, bb1);
1030 add_bb_to_cluster (c, bb2);
1031 BB_CLUSTER (bb1) = c;
1032 BB_CLUSTER (bb2) = c;
1033 c->index = all_clusters.length ();
1034 all_clusters.safe_push (c);
1035 }
1036 else if (BB_CLUSTER (bb1) == NULL || BB_CLUSTER (bb2) == NULL)
1037 {
1038 merge_bb = BB_CLUSTER (bb1) == NULL ? bb2 : bb1;
1039 other_bb = BB_CLUSTER (bb1) == NULL ? bb1 : bb2;
1040 merge = BB_CLUSTER (merge_bb);
1041 add_bb_to_cluster (merge, other_bb);
1042 BB_CLUSTER (other_bb) = merge;
1043 }
1044 else if (BB_CLUSTER (bb1) != BB_CLUSTER (bb2))
1045 {
1046 unsigned int i;
1047 bitmap_iterator bi;
1048
1049 old = BB_CLUSTER (bb2);
1050 merge = BB_CLUSTER (bb1);
1051 merge_clusters (merge, old);
1052 EXECUTE_IF_SET_IN_BITMAP (old->bbs, 0, i, bi)
1053 BB_CLUSTER (BASIC_BLOCK (i)) = merge;
1054 all_clusters[old->index] = NULL;
1055 update_rep_bb (merge, old->rep_bb);
1056 delete_cluster (old);
1057 }
1058 else
1059 gcc_unreachable ();
1060 }
1061
1062 /* Return true if gimple statements S1 and S2 are equal. Gimple_bb (s1) and
1063 gimple_bb (s2) are members of SAME_SUCC. */
1064
1065 static bool
1066 gimple_equal_p (same_succ same_succ, gimple s1, gimple s2)
1067 {
1068 unsigned int i;
1069 tree lhs1, lhs2;
1070 basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
1071 tree t1, t2;
1072 bool equal, inv_cond;
1073 enum tree_code code1, code2;
1074
1075 if (gimple_code (s1) != gimple_code (s2))
1076 return false;
1077
1078 switch (gimple_code (s1))
1079 {
1080 case GIMPLE_CALL:
1081 if (gimple_call_num_args (s1) != gimple_call_num_args (s2))
1082 return false;
1083 if (!gimple_call_same_target_p (s1, s2))
1084 return false;
1085
1086 /* Eventually, we'll significantly complicate the CFG by adding
1087 back edges to properly model the effects of transaction restart.
1088 For the bulk of optimization this does not matter, but what we
1089 cannot recover from is tail merging blocks between two separate
1090 transactions. Avoid that by making commit not match. */
1091 if (gimple_call_builtin_p (s1, BUILT_IN_TM_COMMIT))
1092 return false;
1093
1094 equal = true;
1095 for (i = 0; i < gimple_call_num_args (s1); ++i)
1096 {
1097 t1 = gimple_call_arg (s1, i);
1098 t2 = gimple_call_arg (s2, i);
1099 if (operand_equal_p (t1, t2, 0))
1100 continue;
1101 if (gvn_uses_equal (t1, t2))
1102 continue;
1103 equal = false;
1104 break;
1105 }
1106 if (!equal)
1107 return false;
1108
1109 lhs1 = gimple_get_lhs (s1);
1110 lhs2 = gimple_get_lhs (s2);
1111 if (lhs1 == NULL_TREE && lhs2 == NULL_TREE)
1112 return true;
1113 if (lhs1 == NULL_TREE || lhs2 == NULL_TREE)
1114 return false;
1115 if (TREE_CODE (lhs1) == SSA_NAME && TREE_CODE (lhs2) == SSA_NAME)
1116 return vn_valueize (lhs1) == vn_valueize (lhs2);
1117 return operand_equal_p (lhs1, lhs2, 0);
1118
1119 case GIMPLE_ASSIGN:
1120 lhs1 = gimple_get_lhs (s1);
1121 lhs2 = gimple_get_lhs (s2);
1122 if (TREE_CODE (lhs1) != SSA_NAME
1123 && TREE_CODE (lhs2) != SSA_NAME)
1124 return (vn_valueize (gimple_vdef (s1))
1125 == vn_valueize (gimple_vdef (s2)));
1126 else if (TREE_CODE (lhs1) == SSA_NAME
1127 && TREE_CODE (lhs2) == SSA_NAME)
1128 return vn_valueize (lhs1) == vn_valueize (lhs2);
1129 return false;
1130
1131 case GIMPLE_COND:
1132 t1 = gimple_cond_lhs (s1);
1133 t2 = gimple_cond_lhs (s2);
1134 if (!operand_equal_p (t1, t2, 0)
1135 && !gvn_uses_equal (t1, t2))
1136 return false;
1137
1138 t1 = gimple_cond_rhs (s1);
1139 t2 = gimple_cond_rhs (s2);
1140 if (!operand_equal_p (t1, t2, 0)
1141 && !gvn_uses_equal (t1, t2))
1142 return false;
1143
1144 code1 = gimple_expr_code (s1);
1145 code2 = gimple_expr_code (s2);
1146 inv_cond = (bitmap_bit_p (same_succ->inverse, bb1->index)
1147 != bitmap_bit_p (same_succ->inverse, bb2->index));
1148 if (inv_cond)
1149 {
1150 bool honor_nans
1151 = HONOR_NANS (TYPE_MODE (TREE_TYPE (gimple_cond_lhs (s1))));
1152 code2 = invert_tree_comparison (code2, honor_nans);
1153 }
1154 return code1 == code2;
1155
1156 default:
1157 return false;
1158 }
1159 }
1160
1161 /* Let GSI skip backwards over local defs. Return the earliest vuse in VUSE.
1162 Return true in VUSE_ESCAPED if the vuse influenced a SSA_OP_DEF of one of the
1163 processed statements. */
1164
1165 static void
1166 gsi_advance_bw_nondebug_nonlocal (gimple_stmt_iterator *gsi, tree *vuse,
1167 bool *vuse_escaped)
1168 {
1169 gimple stmt;
1170 tree lvuse;
1171
1172 while (true)
1173 {
1174 if (gsi_end_p (*gsi))
1175 return;
1176 stmt = gsi_stmt (*gsi);
1177
1178 lvuse = gimple_vuse (stmt);
1179 if (lvuse != NULL_TREE)
1180 {
1181 *vuse = lvuse;
1182 if (!ZERO_SSA_OPERANDS (stmt, SSA_OP_DEF))
1183 *vuse_escaped = true;
1184 }
1185
1186 if (!stmt_local_def (stmt))
1187 return;
1188 gsi_prev_nondebug (gsi);
1189 }
1190 }
1191
1192 /* Determines whether BB1 and BB2 (members of same_succ) are duplicates. If so,
1193 clusters them. */
1194
1195 static void
1196 find_duplicate (same_succ same_succ, basic_block bb1, basic_block bb2)
1197 {
1198 gimple_stmt_iterator gsi1 = gsi_last_nondebug_bb (bb1);
1199 gimple_stmt_iterator gsi2 = gsi_last_nondebug_bb (bb2);
1200 tree vuse1 = NULL_TREE, vuse2 = NULL_TREE;
1201 bool vuse_escaped = false;
1202
1203 gsi_advance_bw_nondebug_nonlocal (&gsi1, &vuse1, &vuse_escaped);
1204 gsi_advance_bw_nondebug_nonlocal (&gsi2, &vuse2, &vuse_escaped);
1205
1206 while (!gsi_end_p (gsi1) && !gsi_end_p (gsi2))
1207 {
1208 gimple stmt1 = gsi_stmt (gsi1);
1209 gimple stmt2 = gsi_stmt (gsi2);
1210
1211 if (!gimple_equal_p (same_succ, stmt1, stmt2))
1212 return;
1213
1214 // We cannot tail-merge the builtins that end transactions.
1215 // ??? The alternative being unsharing of BBs in the tm_init pass.
1216 if (flag_tm
1217 && is_gimple_call (stmt1)
1218 && (gimple_call_flags (stmt1) & ECF_TM_BUILTIN)
1219 && is_tm_ending_fndecl (gimple_call_fndecl (stmt1)))
1220 return;
1221
1222 gsi_prev_nondebug (&gsi1);
1223 gsi_prev_nondebug (&gsi2);
1224 gsi_advance_bw_nondebug_nonlocal (&gsi1, &vuse1, &vuse_escaped);
1225 gsi_advance_bw_nondebug_nonlocal (&gsi2, &vuse2, &vuse_escaped);
1226 }
1227
1228 if (!(gsi_end_p (gsi1) && gsi_end_p (gsi2)))
1229 return;
1230
1231 /* If the incoming vuses are not the same, and the vuse escaped into an
1232 SSA_OP_DEF, then merging the 2 blocks will change the value of the def,
1233 which potentially means the semantics of one of the blocks will be changed.
1234 TODO: make this check more precise. */
1235 if (vuse_escaped && vuse1 != vuse2)
1236 return;
1237
1238 if (dump_file)
1239 fprintf (dump_file, "find_duplicates: <bb %d> duplicate of <bb %d>\n",
1240 bb1->index, bb2->index);
1241
1242 set_cluster (bb1, bb2);
1243 }
1244
1245 /* Returns whether for all phis in DEST the phi alternatives for E1 and
1246 E2 are equal. */
1247
1248 static bool
1249 same_phi_alternatives_1 (basic_block dest, edge e1, edge e2)
1250 {
1251 int n1 = e1->dest_idx, n2 = e2->dest_idx;
1252 gimple_stmt_iterator gsi;
1253
1254 for (gsi = gsi_start_phis (dest); !gsi_end_p (gsi); gsi_next (&gsi))
1255 {
1256 gimple phi = gsi_stmt (gsi);
1257 tree lhs = gimple_phi_result (phi);
1258 tree val1 = gimple_phi_arg_def (phi, n1);
1259 tree val2 = gimple_phi_arg_def (phi, n2);
1260
1261 if (virtual_operand_p (lhs))
1262 continue;
1263
1264 if (operand_equal_for_phi_arg_p (val1, val2))
1265 continue;
1266 if (gvn_uses_equal (val1, val2))
1267 continue;
1268
1269 return false;
1270 }
1271
1272 return true;
1273 }
1274
1275 /* Returns whether for all successors of BB1 and BB2 (members of SAME_SUCC), the
1276 phi alternatives for BB1 and BB2 are equal. */
1277
1278 static bool
1279 same_phi_alternatives (same_succ same_succ, basic_block bb1, basic_block bb2)
1280 {
1281 unsigned int s;
1282 bitmap_iterator bs;
1283 edge e1, e2;
1284 basic_block succ;
1285
1286 EXECUTE_IF_SET_IN_BITMAP (same_succ->succs, 0, s, bs)
1287 {
1288 succ = BASIC_BLOCK (s);
1289 e1 = find_edge (bb1, succ);
1290 e2 = find_edge (bb2, succ);
1291 if (e1->flags & EDGE_COMPLEX
1292 || e2->flags & EDGE_COMPLEX)
1293 return false;
1294
1295 /* For all phis in bb, the phi alternatives for e1 and e2 need to have
1296 the same value. */
1297 if (!same_phi_alternatives_1 (succ, e1, e2))
1298 return false;
1299 }
1300
1301 return true;
1302 }
1303
1304 /* Return true if BB has non-vop phis. */
1305
1306 static bool
1307 bb_has_non_vop_phi (basic_block bb)
1308 {
1309 gimple_seq phis = phi_nodes (bb);
1310 gimple phi;
1311
1312 if (phis == NULL)
1313 return false;
1314
1315 if (!gimple_seq_singleton_p (phis))
1316 return true;
1317
1318 phi = gimple_seq_first_stmt (phis);
1319 return !virtual_operand_p (gimple_phi_result (phi));
1320 }
1321
1322 /* Returns true if redirecting the incoming edges of FROM to TO maintains the
1323 invariant that uses in FROM are dominates by their defs. */
1324
1325 static bool
1326 deps_ok_for_redirect_from_bb_to_bb (basic_block from, basic_block to)
1327 {
1328 basic_block cd, dep_bb = BB_DEP_BB (to);
1329 edge_iterator ei;
1330 edge e;
1331 bitmap from_preds = BITMAP_ALLOC (NULL);
1332
1333 if (dep_bb == NULL)
1334 return true;
1335
1336 FOR_EACH_EDGE (e, ei, from->preds)
1337 bitmap_set_bit (from_preds, e->src->index);
1338 cd = nearest_common_dominator_for_set (CDI_DOMINATORS, from_preds);
1339 BITMAP_FREE (from_preds);
1340
1341 return dominated_by_p (CDI_DOMINATORS, dep_bb, cd);
1342 }
1343
1344 /* Returns true if replacing BB1 (or its replacement bb) by BB2 (or its
1345 replacement bb) and vice versa maintains the invariant that uses in the
1346 replacement are dominates by their defs. */
1347
1348 static bool
1349 deps_ok_for_redirect (basic_block bb1, basic_block bb2)
1350 {
1351 if (BB_CLUSTER (bb1) != NULL)
1352 bb1 = BB_CLUSTER (bb1)->rep_bb;
1353
1354 if (BB_CLUSTER (bb2) != NULL)
1355 bb2 = BB_CLUSTER (bb2)->rep_bb;
1356
1357 return (deps_ok_for_redirect_from_bb_to_bb (bb1, bb2)
1358 && deps_ok_for_redirect_from_bb_to_bb (bb2, bb1));
1359 }
1360
1361 /* Within SAME_SUCC->bbs, find clusters of bbs which can be merged. */
1362
1363 static void
1364 find_clusters_1 (same_succ same_succ)
1365 {
1366 basic_block bb1, bb2;
1367 unsigned int i, j;
1368 bitmap_iterator bi, bj;
1369 int nr_comparisons;
1370 int max_comparisons = PARAM_VALUE (PARAM_MAX_TAIL_MERGE_COMPARISONS);
1371
1372 EXECUTE_IF_SET_IN_BITMAP (same_succ->bbs, 0, i, bi)
1373 {
1374 bb1 = BASIC_BLOCK (i);
1375
1376 /* TODO: handle blocks with phi-nodes. We'll have to find corresponding
1377 phi-nodes in bb1 and bb2, with the same alternatives for the same
1378 preds. */
1379 if (bb_has_non_vop_phi (bb1))
1380 continue;
1381
1382 nr_comparisons = 0;
1383 EXECUTE_IF_SET_IN_BITMAP (same_succ->bbs, i + 1, j, bj)
1384 {
1385 bb2 = BASIC_BLOCK (j);
1386
1387 if (bb_has_non_vop_phi (bb2))
1388 continue;
1389
1390 if (BB_CLUSTER (bb1) != NULL && BB_CLUSTER (bb1) == BB_CLUSTER (bb2))
1391 continue;
1392
1393 /* Limit quadratic behaviour. */
1394 nr_comparisons++;
1395 if (nr_comparisons > max_comparisons)
1396 break;
1397
1398 /* This is a conservative dependency check. We could test more
1399 precise for allowed replacement direction. */
1400 if (!deps_ok_for_redirect (bb1, bb2))
1401 continue;
1402
1403 if (!(same_phi_alternatives (same_succ, bb1, bb2)))
1404 continue;
1405
1406 find_duplicate (same_succ, bb1, bb2);
1407 }
1408 }
1409 }
1410
1411 /* Find clusters of bbs which can be merged. */
1412
1413 static void
1414 find_clusters (void)
1415 {
1416 same_succ same;
1417
1418 while (!worklist.is_empty ())
1419 {
1420 same = worklist.pop ();
1421 same->in_worklist = false;
1422 if (dump_file && (dump_flags & TDF_DETAILS))
1423 {
1424 fprintf (dump_file, "processing worklist entry\n");
1425 same_succ_print (dump_file, same);
1426 }
1427 find_clusters_1 (same);
1428 }
1429 }
1430
1431 /* Returns the vop phi of BB, if any. */
1432
1433 static gimple
1434 vop_phi (basic_block bb)
1435 {
1436 gimple stmt;
1437 gimple_stmt_iterator gsi;
1438 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1439 {
1440 stmt = gsi_stmt (gsi);
1441 if (! virtual_operand_p (gimple_phi_result (stmt)))
1442 continue;
1443 return stmt;
1444 }
1445 return NULL;
1446 }
1447
1448 /* Redirect all edges from BB1 to BB2, removes BB1 and marks it as removed. */
1449
1450 static void
1451 replace_block_by (basic_block bb1, basic_block bb2)
1452 {
1453 edge pred_edge;
1454 unsigned int i;
1455 gimple bb2_phi;
1456
1457 bb2_phi = vop_phi (bb2);
1458
1459 /* Mark the basic block as deleted. */
1460 mark_basic_block_deleted (bb1);
1461
1462 /* Redirect the incoming edges of bb1 to bb2. */
1463 for (i = EDGE_COUNT (bb1->preds); i > 0 ; --i)
1464 {
1465 pred_edge = EDGE_PRED (bb1, i - 1);
1466 pred_edge = redirect_edge_and_branch (pred_edge, bb2);
1467 gcc_assert (pred_edge != NULL);
1468
1469 if (bb2_phi == NULL)
1470 continue;
1471
1472 /* The phi might have run out of capacity when the redirect added an
1473 argument, which means it could have been replaced. Refresh it. */
1474 bb2_phi = vop_phi (bb2);
1475
1476 add_phi_arg (bb2_phi, SSA_NAME_VAR (gimple_phi_result (bb2_phi)),
1477 pred_edge, UNKNOWN_LOCATION);
1478 }
1479
1480 bb2->frequency += bb1->frequency;
1481 if (bb2->frequency > BB_FREQ_MAX)
1482 bb2->frequency = BB_FREQ_MAX;
1483
1484 bb2->count += bb1->count;
1485
1486 /* Do updates that use bb1, before deleting bb1. */
1487 release_last_vdef (bb1);
1488 same_succ_flush_bb (bb1);
1489
1490 delete_basic_block (bb1);
1491 }
1492
1493 /* Bbs for which update_debug_stmt need to be called. */
1494
1495 static bitmap update_bbs;
1496
1497 /* For each cluster in all_clusters, merge all cluster->bbs. Returns
1498 number of bbs removed. */
1499
1500 static int
1501 apply_clusters (void)
1502 {
1503 basic_block bb1, bb2;
1504 bb_cluster c;
1505 unsigned int i, j;
1506 bitmap_iterator bj;
1507 int nr_bbs_removed = 0;
1508
1509 for (i = 0; i < all_clusters.length (); ++i)
1510 {
1511 c = all_clusters[i];
1512 if (c == NULL)
1513 continue;
1514
1515 bb2 = c->rep_bb;
1516 bitmap_set_bit (update_bbs, bb2->index);
1517
1518 bitmap_clear_bit (c->bbs, bb2->index);
1519 EXECUTE_IF_SET_IN_BITMAP (c->bbs, 0, j, bj)
1520 {
1521 bb1 = BASIC_BLOCK (j);
1522 bitmap_clear_bit (update_bbs, bb1->index);
1523
1524 replace_block_by (bb1, bb2);
1525 nr_bbs_removed++;
1526 }
1527 }
1528
1529 return nr_bbs_removed;
1530 }
1531
1532 /* Resets debug statement STMT if it has uses that are not dominated by their
1533 defs. */
1534
1535 static void
1536 update_debug_stmt (gimple stmt)
1537 {
1538 use_operand_p use_p;
1539 ssa_op_iter oi;
1540 basic_block bbdef, bbuse;
1541 gimple def_stmt;
1542 tree name;
1543
1544 if (!gimple_debug_bind_p (stmt))
1545 return;
1546
1547 bbuse = gimple_bb (stmt);
1548 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, oi, SSA_OP_USE)
1549 {
1550 name = USE_FROM_PTR (use_p);
1551 gcc_assert (TREE_CODE (name) == SSA_NAME);
1552
1553 def_stmt = SSA_NAME_DEF_STMT (name);
1554 gcc_assert (def_stmt != NULL);
1555
1556 bbdef = gimple_bb (def_stmt);
1557 if (bbdef == NULL || bbuse == bbdef
1558 || dominated_by_p (CDI_DOMINATORS, bbuse, bbdef))
1559 continue;
1560
1561 gimple_debug_bind_reset_value (stmt);
1562 update_stmt (stmt);
1563 }
1564 }
1565
1566 /* Resets all debug statements that have uses that are not
1567 dominated by their defs. */
1568
1569 static void
1570 update_debug_stmts (void)
1571 {
1572 basic_block bb;
1573 bitmap_iterator bi;
1574 unsigned int i;
1575
1576 EXECUTE_IF_SET_IN_BITMAP (update_bbs, 0, i, bi)
1577 {
1578 gimple stmt;
1579 gimple_stmt_iterator gsi;
1580
1581 bb = BASIC_BLOCK (i);
1582 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1583 {
1584 stmt = gsi_stmt (gsi);
1585 if (!is_gimple_debug (stmt))
1586 continue;
1587 update_debug_stmt (stmt);
1588 }
1589 }
1590 }
1591
1592 /* Runs tail merge optimization. */
1593
1594 unsigned int
1595 tail_merge_optimize (unsigned int todo)
1596 {
1597 int nr_bbs_removed_total = 0;
1598 int nr_bbs_removed;
1599 bool loop_entered = false;
1600 int iteration_nr = 0;
1601 int max_iterations = PARAM_VALUE (PARAM_MAX_TAIL_MERGE_ITERATIONS);
1602
1603 if (!flag_tree_tail_merge || max_iterations == 0)
1604 return 0;
1605
1606 timevar_push (TV_TREE_TAIL_MERGE);
1607
1608 if (!dom_info_available_p (CDI_DOMINATORS))
1609 {
1610 /* PRE can leave us with unreachable blocks, remove them now. */
1611 delete_unreachable_blocks ();
1612 calculate_dominance_info (CDI_DOMINATORS);
1613 }
1614 init_worklist ();
1615
1616 while (!worklist.is_empty ())
1617 {
1618 if (!loop_entered)
1619 {
1620 loop_entered = true;
1621 alloc_cluster_vectors ();
1622 update_bbs = BITMAP_ALLOC (NULL);
1623 }
1624 else
1625 reset_cluster_vectors ();
1626
1627 iteration_nr++;
1628 if (dump_file && (dump_flags & TDF_DETAILS))
1629 fprintf (dump_file, "worklist iteration #%d\n", iteration_nr);
1630
1631 find_clusters ();
1632 gcc_assert (worklist.is_empty ());
1633 if (all_clusters.is_empty ())
1634 break;
1635
1636 nr_bbs_removed = apply_clusters ();
1637 nr_bbs_removed_total += nr_bbs_removed;
1638 if (nr_bbs_removed == 0)
1639 break;
1640
1641 free_dominance_info (CDI_DOMINATORS);
1642
1643 if (iteration_nr == max_iterations)
1644 break;
1645
1646 calculate_dominance_info (CDI_DOMINATORS);
1647 update_worklist ();
1648 }
1649
1650 if (dump_file && (dump_flags & TDF_DETAILS))
1651 fprintf (dump_file, "htab collision / search: %f\n",
1652 same_succ_htab.collisions ());
1653
1654 if (nr_bbs_removed_total > 0)
1655 {
1656 if (MAY_HAVE_DEBUG_STMTS)
1657 {
1658 calculate_dominance_info (CDI_DOMINATORS);
1659 update_debug_stmts ();
1660 }
1661
1662 if (dump_file && (dump_flags & TDF_DETAILS))
1663 {
1664 fprintf (dump_file, "Before TODOs.\n");
1665 dump_function_to_file (current_function_decl, dump_file, dump_flags);
1666 }
1667
1668 todo |= (TODO_verify_ssa | TODO_verify_stmts | TODO_verify_flow);
1669 mark_virtual_operands_for_renaming (cfun);
1670 }
1671
1672 delete_worklist ();
1673 if (loop_entered)
1674 {
1675 delete_cluster_vectors ();
1676 BITMAP_FREE (update_bbs);
1677 }
1678
1679 timevar_pop (TV_TREE_TAIL_MERGE);
1680
1681 return todo;
1682 }