* g++.dg/cpp0x/nullptr21.c: Remove printfs, make self-checking.
[gcc.git] / gcc / tree-ssa-tail-merge.c
1 /* Tail merging for gimple.
2 Copyright (C) 2011, 2012 Free Software Foundation, Inc.
3 Contributed by Tom de Vries (tom@codesourcery.com)
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 /* Pass overview.
22
23
24 MOTIVATIONAL EXAMPLE
25
26 gimple representation of gcc/testsuite/gcc.dg/pr43864.c at
27
28 hprofStartupp (charD.1 * outputFileNameD.2600, charD.1 * ctxD.2601)
29 {
30 struct FILED.1638 * fpD.2605;
31 charD.1 fileNameD.2604[1000];
32 intD.0 D.3915;
33 const charD.1 * restrict outputFileName.0D.3914;
34
35 # BLOCK 2 freq:10000
36 # PRED: ENTRY [100.0%] (fallthru,exec)
37 # PT = nonlocal { D.3926 } (restr)
38 outputFileName.0D.3914_3
39 = (const charD.1 * restrict) outputFileNameD.2600_2(D);
40 # .MEMD.3923_13 = VDEF <.MEMD.3923_12(D)>
41 # USE = nonlocal null { fileNameD.2604 D.3926 } (restr)
42 # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr)
43 sprintfD.759 (&fileNameD.2604, outputFileName.0D.3914_3);
44 # .MEMD.3923_14 = VDEF <.MEMD.3923_13>
45 # USE = nonlocal null { fileNameD.2604 D.3926 } (restr)
46 # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr)
47 D.3915_4 = accessD.2606 (&fileNameD.2604, 1);
48 if (D.3915_4 == 0)
49 goto <bb 3>;
50 else
51 goto <bb 4>;
52 # SUCC: 3 [10.0%] (true,exec) 4 [90.0%] (false,exec)
53
54 # BLOCK 3 freq:1000
55 # PRED: 2 [10.0%] (true,exec)
56 # .MEMD.3923_15 = VDEF <.MEMD.3923_14>
57 # USE = nonlocal null { fileNameD.2604 D.3926 } (restr)
58 # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr)
59 freeD.898 (ctxD.2601_5(D));
60 goto <bb 7>;
61 # SUCC: 7 [100.0%] (fallthru,exec)
62
63 # BLOCK 4 freq:9000
64 # PRED: 2 [90.0%] (false,exec)
65 # .MEMD.3923_16 = VDEF <.MEMD.3923_14>
66 # PT = nonlocal escaped
67 # USE = nonlocal null { fileNameD.2604 D.3926 } (restr)
68 # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr)
69 fpD.2605_8 = fopenD.1805 (&fileNameD.2604[0], 0B);
70 if (fpD.2605_8 == 0B)
71 goto <bb 5>;
72 else
73 goto <bb 6>;
74 # SUCC: 5 [1.9%] (true,exec) 6 [98.1%] (false,exec)
75
76 # BLOCK 5 freq:173
77 # PRED: 4 [1.9%] (true,exec)
78 # .MEMD.3923_17 = VDEF <.MEMD.3923_16>
79 # USE = nonlocal null { fileNameD.2604 D.3926 } (restr)
80 # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr)
81 freeD.898 (ctxD.2601_5(D));
82 goto <bb 7>;
83 # SUCC: 7 [100.0%] (fallthru,exec)
84
85 # BLOCK 6 freq:8827
86 # PRED: 4 [98.1%] (false,exec)
87 # .MEMD.3923_18 = VDEF <.MEMD.3923_16>
88 # USE = nonlocal null { fileNameD.2604 D.3926 } (restr)
89 # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr)
90 fooD.2599 (outputFileNameD.2600_2(D), fpD.2605_8);
91 # SUCC: 7 [100.0%] (fallthru,exec)
92
93 # BLOCK 7 freq:10000
94 # PRED: 3 [100.0%] (fallthru,exec) 5 [100.0%] (fallthru,exec)
95 6 [100.0%] (fallthru,exec)
96 # PT = nonlocal null
97
98 # ctxD.2601_1 = PHI <0B(3), 0B(5), ctxD.2601_5(D)(6)>
99 # .MEMD.3923_11 = PHI <.MEMD.3923_15(3), .MEMD.3923_17(5),
100 .MEMD.3923_18(6)>
101 # VUSE <.MEMD.3923_11>
102 return ctxD.2601_1;
103 # SUCC: EXIT [100.0%]
104 }
105
106 bb 3 and bb 5 can be merged. The blocks have different predecessors, but the
107 same successors, and the same operations.
108
109
110 CONTEXT
111
112 A technique called tail merging (or cross jumping) can fix the example
113 above. For a block, we look for common code at the end (the tail) of the
114 predecessor blocks, and insert jumps from one block to the other.
115 The example is a special case for tail merging, in that 2 whole blocks
116 can be merged, rather than just the end parts of it.
117 We currently only focus on whole block merging, so in that sense
118 calling this pass tail merge is a bit of a misnomer.
119
120 We distinguish 2 kinds of situations in which blocks can be merged:
121 - same operations, same predecessors. The successor edges coming from one
122 block are redirected to come from the other block.
123 - same operations, same successors. The predecessor edges entering one block
124 are redirected to enter the other block. Note that this operation might
125 involve introducing phi operations.
126
127 For efficient implementation, we would like to value numbers the blocks, and
128 have a comparison operator that tells us whether the blocks are equal.
129 Besides being runtime efficient, block value numbering should also abstract
130 from irrelevant differences in order of operations, much like normal value
131 numbering abstracts from irrelevant order of operations.
132
133 For the first situation (same_operations, same predecessors), normal value
134 numbering fits well. We can calculate a block value number based on the
135 value numbers of the defs and vdefs.
136
137 For the second situation (same operations, same successors), this approach
138 doesn't work so well. We can illustrate this using the example. The calls
139 to free use different vdefs: MEMD.3923_16 and MEMD.3923_14, and these will
140 remain different in value numbering, since they represent different memory
141 states. So the resulting vdefs of the frees will be different in value
142 numbering, so the block value numbers will be different.
143
144 The reason why we call the blocks equal is not because they define the same
145 values, but because uses in the blocks use (possibly different) defs in the
146 same way. To be able to detect this efficiently, we need to do some kind of
147 reverse value numbering, meaning number the uses rather than the defs, and
148 calculate a block value number based on the value number of the uses.
149 Ideally, a block comparison operator will also indicate which phis are needed
150 to merge the blocks.
151
152 For the moment, we don't do block value numbering, but we do insn-by-insn
153 matching, using scc value numbers to match operations with results, and
154 structural comparison otherwise, while ignoring vop mismatches.
155
156
157 IMPLEMENTATION
158
159 1. The pass first determines all groups of blocks with the same successor
160 blocks.
161 2. Within each group, it tries to determine clusters of equal basic blocks.
162 3. The clusters are applied.
163 4. The same successor groups are updated.
164 5. This process is repeated from 2 onwards, until no more changes.
165
166
167 LIMITATIONS/TODO
168
169 - block only
170 - handles only 'same operations, same successors'.
171 It handles same predecessors as a special subcase though.
172 - does not implement the reverse value numbering and block value numbering.
173 - improve memory allocation: use garbage collected memory, obstacks,
174 allocpools where appropriate.
175 - no insertion of gimple_reg phis, We only introduce vop-phis.
176 - handle blocks with gimple_reg phi_nodes.
177
178
179 SWITCHES
180
181 - ftree-tail-merge. On at -O2. We may have to enable it only at -Os. */
182
183 #include "config.h"
184 #include "system.h"
185 #include "coretypes.h"
186 #include "tm.h"
187 #include "tree.h"
188 #include "tm_p.h"
189 #include "basic-block.h"
190 #include "flags.h"
191 #include "function.h"
192 #include "tree-flow.h"
193 #include "bitmap.h"
194 #include "tree-ssa-alias.h"
195 #include "params.h"
196 #include "hashtab.h"
197 #include "gimple-pretty-print.h"
198 #include "tree-ssa-sccvn.h"
199 #include "tree-dump.h"
200
201 /* ??? This currently runs as part of tree-ssa-pre. Why is this not
202 a stand-alone GIMPLE pass? */
203 #include "tree-pass.h"
204
205 /* Describes a group of bbs with the same successors. The successor bbs are
206 cached in succs, and the successor edge flags are cached in succ_flags.
207 If a bb has the EDGE_TRUE/VALSE_VALUE flags swapped compared to succ_flags,
208 it's marked in inverse.
209 Additionally, the hash value for the struct is cached in hashval, and
210 in_worklist indicates whether it's currently part of worklist. */
211
212 struct same_succ_def
213 {
214 /* The bbs that have the same successor bbs. */
215 bitmap bbs;
216 /* The successor bbs. */
217 bitmap succs;
218 /* Indicates whether the EDGE_TRUE/FALSE_VALUEs of succ_flags are swapped for
219 bb. */
220 bitmap inverse;
221 /* The edge flags for each of the successor bbs. */
222 VEC (int, heap) *succ_flags;
223 /* Indicates whether the struct is currently in the worklist. */
224 bool in_worklist;
225 /* The hash value of the struct. */
226 hashval_t hashval;
227 };
228 typedef struct same_succ_def *same_succ;
229 typedef const struct same_succ_def *const_same_succ;
230
231 /* A group of bbs where 1 bb from bbs can replace the other bbs. */
232
233 struct bb_cluster_def
234 {
235 /* The bbs in the cluster. */
236 bitmap bbs;
237 /* The preds of the bbs in the cluster. */
238 bitmap preds;
239 /* Index in all_clusters vector. */
240 int index;
241 /* The bb to replace the cluster with. */
242 basic_block rep_bb;
243 };
244 typedef struct bb_cluster_def *bb_cluster;
245 typedef const struct bb_cluster_def *const_bb_cluster;
246
247 /* Per bb-info. */
248
249 struct aux_bb_info
250 {
251 /* The number of non-debug statements in the bb. */
252 int size;
253 /* The same_succ that this bb is a member of. */
254 same_succ bb_same_succ;
255 /* The cluster that this bb is a member of. */
256 bb_cluster cluster;
257 /* The vop state at the exit of a bb. This is shortlived data, used to
258 communicate data between update_block_by and update_vuses. */
259 tree vop_at_exit;
260 /* The bb that either contains or is dominated by the dependencies of the
261 bb. */
262 basic_block dep_bb;
263 };
264
265 /* Macros to access the fields of struct aux_bb_info. */
266
267 #define BB_SIZE(bb) (((struct aux_bb_info *)bb->aux)->size)
268 #define BB_SAME_SUCC(bb) (((struct aux_bb_info *)bb->aux)->bb_same_succ)
269 #define BB_CLUSTER(bb) (((struct aux_bb_info *)bb->aux)->cluster)
270 #define BB_VOP_AT_EXIT(bb) (((struct aux_bb_info *)bb->aux)->vop_at_exit)
271 #define BB_DEP_BB(bb) (((struct aux_bb_info *)bb->aux)->dep_bb)
272
273 /* Returns true if the only effect a statement STMT has, is to define locally
274 used SSA_NAMEs. */
275
276 static bool
277 stmt_local_def (gimple stmt)
278 {
279 basic_block bb, def_bb;
280 imm_use_iterator iter;
281 use_operand_p use_p;
282 tree val;
283 def_operand_p def_p;
284
285 if (gimple_has_side_effects (stmt))
286 return false;
287
288 def_p = SINGLE_SSA_DEF_OPERAND (stmt, SSA_OP_DEF);
289 if (def_p == NULL)
290 return false;
291
292 val = DEF_FROM_PTR (def_p);
293 if (val == NULL_TREE || TREE_CODE (val) != SSA_NAME)
294 return false;
295
296 def_bb = gimple_bb (stmt);
297
298 FOR_EACH_IMM_USE_FAST (use_p, iter, val)
299 {
300 if (is_gimple_debug (USE_STMT (use_p)))
301 continue;
302 bb = gimple_bb (USE_STMT (use_p));
303 if (bb == def_bb)
304 continue;
305
306 if (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI
307 && EDGE_PRED (bb, PHI_ARG_INDEX_FROM_USE (use_p))->src == def_bb)
308 continue;
309
310 return false;
311 }
312
313 return true;
314 }
315
316 /* Let GSI skip forwards over local defs. */
317
318 static void
319 gsi_advance_fw_nondebug_nonlocal (gimple_stmt_iterator *gsi)
320 {
321 gimple stmt;
322
323 while (true)
324 {
325 if (gsi_end_p (*gsi))
326 return;
327 stmt = gsi_stmt (*gsi);
328 if (!stmt_local_def (stmt))
329 return;
330 gsi_next_nondebug (gsi);
331 }
332 }
333
334 /* VAL1 and VAL2 are either:
335 - uses in BB1 and BB2, or
336 - phi alternatives for BB1 and BB2.
337 Return true if the uses have the same gvn value. */
338
339 static bool
340 gvn_uses_equal (tree val1, tree val2)
341 {
342 gcc_checking_assert (val1 != NULL_TREE && val2 != NULL_TREE);
343
344 if (val1 == val2)
345 return true;
346
347 if (vn_valueize (val1) != vn_valueize (val2))
348 return false;
349
350 return ((TREE_CODE (val1) == SSA_NAME || CONSTANT_CLASS_P (val1))
351 && (TREE_CODE (val2) == SSA_NAME || CONSTANT_CLASS_P (val2)));
352 }
353
354 /* Prints E to FILE. */
355
356 static void
357 same_succ_print (FILE *file, const same_succ e)
358 {
359 unsigned int i;
360 bitmap_print (file, e->bbs, "bbs:", "\n");
361 bitmap_print (file, e->succs, "succs:", "\n");
362 bitmap_print (file, e->inverse, "inverse:", "\n");
363 fprintf (file, "flags:");
364 for (i = 0; i < VEC_length (int, e->succ_flags); ++i)
365 fprintf (file, " %x", VEC_index (int, e->succ_flags, i));
366 fprintf (file, "\n");
367 }
368
369 /* Prints same_succ VE to VFILE. */
370
371 static int
372 same_succ_print_traverse (void **ve, void *vfile)
373 {
374 const same_succ e = *((const same_succ *)ve);
375 FILE *file = ((FILE*)vfile);
376 same_succ_print (file, e);
377 return 1;
378 }
379
380 /* Update BB_DEP_BB (USE_BB), given a use of VAL in USE_BB. */
381
382 static void
383 update_dep_bb (basic_block use_bb, tree val)
384 {
385 basic_block dep_bb;
386
387 /* Not a dep. */
388 if (TREE_CODE (val) != SSA_NAME)
389 return;
390
391 /* Skip use of global def. */
392 if (SSA_NAME_IS_DEFAULT_DEF (val))
393 return;
394
395 /* Skip use of local def. */
396 dep_bb = gimple_bb (SSA_NAME_DEF_STMT (val));
397 if (dep_bb == use_bb)
398 return;
399
400 if (BB_DEP_BB (use_bb) == NULL
401 || dominated_by_p (CDI_DOMINATORS, dep_bb, BB_DEP_BB (use_bb)))
402 BB_DEP_BB (use_bb) = dep_bb;
403 }
404
405 /* Update BB_DEP_BB, given the dependencies in STMT. */
406
407 static void
408 stmt_update_dep_bb (gimple stmt)
409 {
410 ssa_op_iter iter;
411 use_operand_p use;
412
413 FOR_EACH_SSA_USE_OPERAND (use, stmt, iter, SSA_OP_USE)
414 update_dep_bb (gimple_bb (stmt), USE_FROM_PTR (use));
415 }
416
417 /* Calculates hash value for same_succ VE. */
418
419 static hashval_t
420 same_succ_hash (const void *ve)
421 {
422 const_same_succ e = (const_same_succ)ve;
423 hashval_t hashval = bitmap_hash (e->succs);
424 int flags;
425 unsigned int i;
426 unsigned int first = bitmap_first_set_bit (e->bbs);
427 basic_block bb = BASIC_BLOCK (first);
428 int size = 0;
429 gimple_stmt_iterator gsi;
430 gimple stmt;
431 tree arg;
432 unsigned int s;
433 bitmap_iterator bs;
434
435 for (gsi = gsi_start_nondebug_bb (bb);
436 !gsi_end_p (gsi); gsi_next_nondebug (&gsi))
437 {
438 stmt = gsi_stmt (gsi);
439 stmt_update_dep_bb (stmt);
440 if (stmt_local_def (stmt))
441 continue;
442 size++;
443
444 hashval = iterative_hash_hashval_t (gimple_code (stmt), hashval);
445 if (is_gimple_assign (stmt))
446 hashval = iterative_hash_hashval_t (gimple_assign_rhs_code (stmt),
447 hashval);
448 if (!is_gimple_call (stmt))
449 continue;
450 if (gimple_call_internal_p (stmt))
451 hashval = iterative_hash_hashval_t
452 ((hashval_t) gimple_call_internal_fn (stmt), hashval);
453 else
454 hashval = iterative_hash_expr (gimple_call_fn (stmt), hashval);
455 for (i = 0; i < gimple_call_num_args (stmt); i++)
456 {
457 arg = gimple_call_arg (stmt, i);
458 arg = vn_valueize (arg);
459 hashval = iterative_hash_expr (arg, hashval);
460 }
461 }
462
463 hashval = iterative_hash_hashval_t (size, hashval);
464 BB_SIZE (bb) = size;
465
466 for (i = 0; i < VEC_length (int, e->succ_flags); ++i)
467 {
468 flags = VEC_index (int, e->succ_flags, i);
469 flags = flags & ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE);
470 hashval = iterative_hash_hashval_t (flags, hashval);
471 }
472
473 EXECUTE_IF_SET_IN_BITMAP (e->succs, 0, s, bs)
474 {
475 int n = find_edge (bb, BASIC_BLOCK (s))->dest_idx;
476 for (gsi = gsi_start_phis (BASIC_BLOCK (s)); !gsi_end_p (gsi);
477 gsi_next (&gsi))
478 {
479 gimple phi = gsi_stmt (gsi);
480 tree lhs = gimple_phi_result (phi);
481 tree val = gimple_phi_arg_def (phi, n);
482
483 if (!is_gimple_reg (lhs))
484 continue;
485 update_dep_bb (bb, val);
486 }
487 }
488
489 return hashval;
490 }
491
492 /* Returns true if E1 and E2 have 2 successors, and if the successor flags
493 are inverse for the EDGE_TRUE_VALUE and EDGE_FALSE_VALUE flags, and equal for
494 the other edge flags. */
495
496 static bool
497 inverse_flags (const_same_succ e1, const_same_succ e2)
498 {
499 int f1a, f1b, f2a, f2b;
500 int mask = ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE);
501
502 if (VEC_length (int, e1->succ_flags) != 2)
503 return false;
504
505 f1a = VEC_index (int, e1->succ_flags, 0);
506 f1b = VEC_index (int, e1->succ_flags, 1);
507 f2a = VEC_index (int, e2->succ_flags, 0);
508 f2b = VEC_index (int, e2->succ_flags, 1);
509
510 if (f1a == f2a && f1b == f2b)
511 return false;
512
513 return (f1a & mask) == (f2a & mask) && (f1b & mask) == (f2b & mask);
514 }
515
516 /* Compares SAME_SUCCs VE1 and VE2. */
517
518 static int
519 same_succ_equal (const void *ve1, const void *ve2)
520 {
521 const_same_succ e1 = (const_same_succ)ve1;
522 const_same_succ e2 = (const_same_succ)ve2;
523 unsigned int i, first1, first2;
524 gimple_stmt_iterator gsi1, gsi2;
525 gimple s1, s2;
526 basic_block bb1, bb2;
527
528 if (e1->hashval != e2->hashval)
529 return 0;
530
531 if (VEC_length (int, e1->succ_flags) != VEC_length (int, e2->succ_flags))
532 return 0;
533
534 if (!bitmap_equal_p (e1->succs, e2->succs))
535 return 0;
536
537 if (!inverse_flags (e1, e2))
538 {
539 for (i = 0; i < VEC_length (int, e1->succ_flags); ++i)
540 if (VEC_index (int, e1->succ_flags, i)
541 != VEC_index (int, e1->succ_flags, i))
542 return 0;
543 }
544
545 first1 = bitmap_first_set_bit (e1->bbs);
546 first2 = bitmap_first_set_bit (e2->bbs);
547
548 bb1 = BASIC_BLOCK (first1);
549 bb2 = BASIC_BLOCK (first2);
550
551 if (BB_SIZE (bb1) != BB_SIZE (bb2))
552 return 0;
553
554 gsi1 = gsi_start_nondebug_bb (bb1);
555 gsi2 = gsi_start_nondebug_bb (bb2);
556 gsi_advance_fw_nondebug_nonlocal (&gsi1);
557 gsi_advance_fw_nondebug_nonlocal (&gsi2);
558 while (!(gsi_end_p (gsi1) || gsi_end_p (gsi2)))
559 {
560 s1 = gsi_stmt (gsi1);
561 s2 = gsi_stmt (gsi2);
562 if (gimple_code (s1) != gimple_code (s2))
563 return 0;
564 if (is_gimple_call (s1) && !gimple_call_same_target_p (s1, s2))
565 return 0;
566 gsi_next_nondebug (&gsi1);
567 gsi_next_nondebug (&gsi2);
568 gsi_advance_fw_nondebug_nonlocal (&gsi1);
569 gsi_advance_fw_nondebug_nonlocal (&gsi2);
570 }
571
572 return 1;
573 }
574
575 /* Alloc and init a new SAME_SUCC. */
576
577 static same_succ
578 same_succ_alloc (void)
579 {
580 same_succ same = XNEW (struct same_succ_def);
581
582 same->bbs = BITMAP_ALLOC (NULL);
583 same->succs = BITMAP_ALLOC (NULL);
584 same->inverse = BITMAP_ALLOC (NULL);
585 same->succ_flags = VEC_alloc (int, heap, 10);
586 same->in_worklist = false;
587
588 return same;
589 }
590
591 /* Delete same_succ VE. */
592
593 static void
594 same_succ_delete (void *ve)
595 {
596 same_succ e = (same_succ)ve;
597
598 BITMAP_FREE (e->bbs);
599 BITMAP_FREE (e->succs);
600 BITMAP_FREE (e->inverse);
601 VEC_free (int, heap, e->succ_flags);
602
603 XDELETE (ve);
604 }
605
606 /* Reset same_succ SAME. */
607
608 static void
609 same_succ_reset (same_succ same)
610 {
611 bitmap_clear (same->bbs);
612 bitmap_clear (same->succs);
613 bitmap_clear (same->inverse);
614 VEC_truncate (int, same->succ_flags, 0);
615 }
616
617 /* Hash table with all same_succ entries. */
618
619 static htab_t same_succ_htab;
620
621 /* Array that is used to store the edge flags for a successor. */
622
623 static int *same_succ_edge_flags;
624
625 /* Bitmap that is used to mark bbs that are recently deleted. */
626
627 static bitmap deleted_bbs;
628
629 /* Bitmap that is used to mark predecessors of bbs that are
630 deleted. */
631
632 static bitmap deleted_bb_preds;
633
634 /* Prints same_succ_htab to stderr. */
635
636 extern void debug_same_succ (void);
637 DEBUG_FUNCTION void
638 debug_same_succ ( void)
639 {
640 htab_traverse (same_succ_htab, same_succ_print_traverse, stderr);
641 }
642
643 DEF_VEC_P (same_succ);
644 DEF_VEC_ALLOC_P (same_succ, heap);
645
646 /* Vector of bbs to process. */
647
648 static VEC (same_succ, heap) *worklist;
649
650 /* Prints worklist to FILE. */
651
652 static void
653 print_worklist (FILE *file)
654 {
655 unsigned int i;
656 for (i = 0; i < VEC_length (same_succ, worklist); ++i)
657 same_succ_print (file, VEC_index (same_succ, worklist, i));
658 }
659
660 /* Adds SAME to worklist. */
661
662 static void
663 add_to_worklist (same_succ same)
664 {
665 if (same->in_worklist)
666 return;
667
668 if (bitmap_count_bits (same->bbs) < 2)
669 return;
670
671 same->in_worklist = true;
672 VEC_safe_push (same_succ, heap, worklist, same);
673 }
674
675 /* Add BB to same_succ_htab. */
676
677 static void
678 find_same_succ_bb (basic_block bb, same_succ *same_p)
679 {
680 unsigned int j;
681 bitmap_iterator bj;
682 same_succ same = *same_p;
683 same_succ *slot;
684 edge_iterator ei;
685 edge e;
686
687 if (bb == NULL)
688 return;
689 bitmap_set_bit (same->bbs, bb->index);
690 FOR_EACH_EDGE (e, ei, bb->succs)
691 {
692 int index = e->dest->index;
693 bitmap_set_bit (same->succs, index);
694 same_succ_edge_flags[index] = e->flags;
695 }
696 EXECUTE_IF_SET_IN_BITMAP (same->succs, 0, j, bj)
697 VEC_safe_push (int, heap, same->succ_flags, same_succ_edge_flags[j]);
698
699 same->hashval = same_succ_hash (same);
700
701 slot = (same_succ *) htab_find_slot_with_hash (same_succ_htab, same,
702 same->hashval, INSERT);
703 if (*slot == NULL)
704 {
705 *slot = same;
706 BB_SAME_SUCC (bb) = same;
707 add_to_worklist (same);
708 *same_p = NULL;
709 }
710 else
711 {
712 bitmap_set_bit ((*slot)->bbs, bb->index);
713 BB_SAME_SUCC (bb) = *slot;
714 add_to_worklist (*slot);
715 if (inverse_flags (same, *slot))
716 bitmap_set_bit ((*slot)->inverse, bb->index);
717 same_succ_reset (same);
718 }
719 }
720
721 /* Find bbs with same successors. */
722
723 static void
724 find_same_succ (void)
725 {
726 same_succ same = same_succ_alloc ();
727 basic_block bb;
728
729 FOR_EACH_BB (bb)
730 {
731 find_same_succ_bb (bb, &same);
732 if (same == NULL)
733 same = same_succ_alloc ();
734 }
735
736 same_succ_delete (same);
737 }
738
739 /* Initializes worklist administration. */
740
741 static void
742 init_worklist (void)
743 {
744 alloc_aux_for_blocks (sizeof (struct aux_bb_info));
745 same_succ_htab
746 = htab_create (n_basic_blocks, same_succ_hash, same_succ_equal,
747 same_succ_delete);
748 same_succ_edge_flags = XCNEWVEC (int, last_basic_block);
749 deleted_bbs = BITMAP_ALLOC (NULL);
750 deleted_bb_preds = BITMAP_ALLOC (NULL);
751 worklist = VEC_alloc (same_succ, heap, n_basic_blocks);
752 find_same_succ ();
753
754 if (dump_file && (dump_flags & TDF_DETAILS))
755 {
756 fprintf (dump_file, "initial worklist:\n");
757 print_worklist (dump_file);
758 }
759 }
760
761 /* Deletes worklist administration. */
762
763 static void
764 delete_worklist (void)
765 {
766 free_aux_for_blocks ();
767 htab_delete (same_succ_htab);
768 same_succ_htab = NULL;
769 XDELETEVEC (same_succ_edge_flags);
770 same_succ_edge_flags = NULL;
771 BITMAP_FREE (deleted_bbs);
772 BITMAP_FREE (deleted_bb_preds);
773 VEC_free (same_succ, heap, worklist);
774 }
775
776 /* Mark BB as deleted, and mark its predecessors. */
777
778 static void
779 mark_basic_block_deleted (basic_block bb)
780 {
781 edge e;
782 edge_iterator ei;
783
784 bitmap_set_bit (deleted_bbs, bb->index);
785
786 FOR_EACH_EDGE (e, ei, bb->preds)
787 bitmap_set_bit (deleted_bb_preds, e->src->index);
788 }
789
790 /* Removes BB from its corresponding same_succ. */
791
792 static void
793 same_succ_flush_bb (basic_block bb)
794 {
795 same_succ same = BB_SAME_SUCC (bb);
796 BB_SAME_SUCC (bb) = NULL;
797 if (bitmap_single_bit_set_p (same->bbs))
798 htab_remove_elt_with_hash (same_succ_htab, same, same->hashval);
799 else
800 bitmap_clear_bit (same->bbs, bb->index);
801 }
802
803 /* Removes all bbs in BBS from their corresponding same_succ. */
804
805 static void
806 same_succ_flush_bbs (bitmap bbs)
807 {
808 unsigned int i;
809 bitmap_iterator bi;
810
811 EXECUTE_IF_SET_IN_BITMAP (bbs, 0, i, bi)
812 same_succ_flush_bb (BASIC_BLOCK (i));
813 }
814
815 /* Release the last vdef in BB, either normal or phi result. */
816
817 static void
818 release_last_vdef (basic_block bb)
819 {
820 gimple_stmt_iterator i;
821
822 for (i = gsi_last_bb (bb); !gsi_end_p (i); gsi_prev_nondebug (&i))
823 {
824 gimple stmt = gsi_stmt (i);
825 if (gimple_vdef (stmt) == NULL_TREE)
826 continue;
827
828 mark_virtual_operand_for_renaming (gimple_vdef (stmt));
829 return;
830 }
831
832 for (i = gsi_start_phis (bb); !gsi_end_p (i); gsi_next (&i))
833 {
834 gimple phi = gsi_stmt (i);
835 tree res = gimple_phi_result (phi);
836
837 if (is_gimple_reg (res))
838 continue;
839
840 mark_virtual_phi_result_for_renaming (phi);
841 return;
842 }
843
844 }
845
846 /* For deleted_bb_preds, find bbs with same successors. */
847
848 static void
849 update_worklist (void)
850 {
851 unsigned int i;
852 bitmap_iterator bi;
853 basic_block bb;
854 same_succ same;
855
856 bitmap_and_compl_into (deleted_bb_preds, deleted_bbs);
857 bitmap_clear (deleted_bbs);
858
859 bitmap_clear_bit (deleted_bb_preds, ENTRY_BLOCK);
860 same_succ_flush_bbs (deleted_bb_preds);
861
862 same = same_succ_alloc ();
863 EXECUTE_IF_SET_IN_BITMAP (deleted_bb_preds, 0, i, bi)
864 {
865 bb = BASIC_BLOCK (i);
866 gcc_assert (bb != NULL);
867 find_same_succ_bb (bb, &same);
868 if (same == NULL)
869 same = same_succ_alloc ();
870 }
871 same_succ_delete (same);
872 bitmap_clear (deleted_bb_preds);
873 }
874
875 /* Prints cluster C to FILE. */
876
877 static void
878 print_cluster (FILE *file, bb_cluster c)
879 {
880 if (c == NULL)
881 return;
882 bitmap_print (file, c->bbs, "bbs:", "\n");
883 bitmap_print (file, c->preds, "preds:", "\n");
884 }
885
886 /* Prints cluster C to stderr. */
887
888 extern void debug_cluster (bb_cluster);
889 DEBUG_FUNCTION void
890 debug_cluster (bb_cluster c)
891 {
892 print_cluster (stderr, c);
893 }
894
895 /* Update C->rep_bb, given that BB is added to the cluster. */
896
897 static void
898 update_rep_bb (bb_cluster c, basic_block bb)
899 {
900 /* Initial. */
901 if (c->rep_bb == NULL)
902 {
903 c->rep_bb = bb;
904 return;
905 }
906
907 /* Current needs no deps, keep it. */
908 if (BB_DEP_BB (c->rep_bb) == NULL)
909 return;
910
911 /* Bb needs no deps, change rep_bb. */
912 if (BB_DEP_BB (bb) == NULL)
913 {
914 c->rep_bb = bb;
915 return;
916 }
917
918 /* Bb needs last deps earlier than current, change rep_bb. A potential
919 problem with this, is that the first deps might also be earlier, which
920 would mean we prefer longer lifetimes for the deps. To be able to check
921 for this, we would have to trace BB_FIRST_DEP_BB as well, besides
922 BB_DEP_BB, which is really BB_LAST_DEP_BB.
923 The benefit of choosing the bb with last deps earlier, is that it can
924 potentially be used as replacement for more bbs. */
925 if (dominated_by_p (CDI_DOMINATORS, BB_DEP_BB (c->rep_bb), BB_DEP_BB (bb)))
926 c->rep_bb = bb;
927 }
928
929 /* Add BB to cluster C. Sets BB in C->bbs, and preds of BB in C->preds. */
930
931 static void
932 add_bb_to_cluster (bb_cluster c, basic_block bb)
933 {
934 edge e;
935 edge_iterator ei;
936
937 bitmap_set_bit (c->bbs, bb->index);
938
939 FOR_EACH_EDGE (e, ei, bb->preds)
940 bitmap_set_bit (c->preds, e->src->index);
941
942 update_rep_bb (c, bb);
943 }
944
945 /* Allocate and init new cluster. */
946
947 static bb_cluster
948 new_cluster (void)
949 {
950 bb_cluster c;
951 c = XCNEW (struct bb_cluster_def);
952 c->bbs = BITMAP_ALLOC (NULL);
953 c->preds = BITMAP_ALLOC (NULL);
954 c->rep_bb = NULL;
955 return c;
956 }
957
958 /* Delete clusters. */
959
960 static void
961 delete_cluster (bb_cluster c)
962 {
963 if (c == NULL)
964 return;
965 BITMAP_FREE (c->bbs);
966 BITMAP_FREE (c->preds);
967 XDELETE (c);
968 }
969
970 DEF_VEC_P (bb_cluster);
971 DEF_VEC_ALLOC_P (bb_cluster, heap);
972
973 /* Array that contains all clusters. */
974
975 static VEC (bb_cluster, heap) *all_clusters;
976
977 /* Allocate all cluster vectors. */
978
979 static void
980 alloc_cluster_vectors (void)
981 {
982 all_clusters = VEC_alloc (bb_cluster, heap, n_basic_blocks);
983 }
984
985 /* Reset all cluster vectors. */
986
987 static void
988 reset_cluster_vectors (void)
989 {
990 unsigned int i;
991 basic_block bb;
992 for (i = 0; i < VEC_length (bb_cluster, all_clusters); ++i)
993 delete_cluster (VEC_index (bb_cluster, all_clusters, i));
994 VEC_truncate (bb_cluster, all_clusters, 0);
995 FOR_EACH_BB (bb)
996 BB_CLUSTER (bb) = NULL;
997 }
998
999 /* Delete all cluster vectors. */
1000
1001 static void
1002 delete_cluster_vectors (void)
1003 {
1004 unsigned int i;
1005 for (i = 0; i < VEC_length (bb_cluster, all_clusters); ++i)
1006 delete_cluster (VEC_index (bb_cluster, all_clusters, i));
1007 VEC_free (bb_cluster, heap, all_clusters);
1008 }
1009
1010 /* Merge cluster C2 into C1. */
1011
1012 static void
1013 merge_clusters (bb_cluster c1, bb_cluster c2)
1014 {
1015 bitmap_ior_into (c1->bbs, c2->bbs);
1016 bitmap_ior_into (c1->preds, c2->preds);
1017 }
1018
1019 /* Register equivalence of BB1 and BB2 (members of cluster C). Store c in
1020 all_clusters, or merge c with existing cluster. */
1021
1022 static void
1023 set_cluster (basic_block bb1, basic_block bb2)
1024 {
1025 basic_block merge_bb, other_bb;
1026 bb_cluster merge, old, c;
1027
1028 if (BB_CLUSTER (bb1) == NULL && BB_CLUSTER (bb2) == NULL)
1029 {
1030 c = new_cluster ();
1031 add_bb_to_cluster (c, bb1);
1032 add_bb_to_cluster (c, bb2);
1033 BB_CLUSTER (bb1) = c;
1034 BB_CLUSTER (bb2) = c;
1035 c->index = VEC_length (bb_cluster, all_clusters);
1036 VEC_safe_push (bb_cluster, heap, all_clusters, c);
1037 }
1038 else if (BB_CLUSTER (bb1) == NULL || BB_CLUSTER (bb2) == NULL)
1039 {
1040 merge_bb = BB_CLUSTER (bb1) == NULL ? bb2 : bb1;
1041 other_bb = BB_CLUSTER (bb1) == NULL ? bb1 : bb2;
1042 merge = BB_CLUSTER (merge_bb);
1043 add_bb_to_cluster (merge, other_bb);
1044 BB_CLUSTER (other_bb) = merge;
1045 }
1046 else if (BB_CLUSTER (bb1) != BB_CLUSTER (bb2))
1047 {
1048 unsigned int i;
1049 bitmap_iterator bi;
1050
1051 old = BB_CLUSTER (bb2);
1052 merge = BB_CLUSTER (bb1);
1053 merge_clusters (merge, old);
1054 EXECUTE_IF_SET_IN_BITMAP (old->bbs, 0, i, bi)
1055 BB_CLUSTER (BASIC_BLOCK (i)) = merge;
1056 VEC_replace (bb_cluster, all_clusters, old->index, NULL);
1057 update_rep_bb (merge, old->rep_bb);
1058 delete_cluster (old);
1059 }
1060 else
1061 gcc_unreachable ();
1062 }
1063
1064 /* Return true if gimple statements S1 and S2 are equal. Gimple_bb (s1) and
1065 gimple_bb (s2) are members of SAME_SUCC. */
1066
1067 static bool
1068 gimple_equal_p (same_succ same_succ, gimple s1, gimple s2)
1069 {
1070 unsigned int i;
1071 tree lhs1, lhs2;
1072 basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
1073 tree t1, t2;
1074 bool equal, inv_cond;
1075 enum tree_code code1, code2;
1076
1077 if (gimple_code (s1) != gimple_code (s2))
1078 return false;
1079
1080 switch (gimple_code (s1))
1081 {
1082 case GIMPLE_CALL:
1083 if (gimple_call_num_args (s1) != gimple_call_num_args (s2))
1084 return false;
1085 if (!gimple_call_same_target_p (s1, s2))
1086 return false;
1087
1088 /* Eventually, we'll significantly complicate the CFG by adding
1089 back edges to properly model the effects of transaction restart.
1090 For the bulk of optimization this does not matter, but what we
1091 cannot recover from is tail merging blocks between two separate
1092 transactions. Avoid that by making commit not match. */
1093 if (gimple_call_builtin_p (s1, BUILT_IN_TM_COMMIT))
1094 return false;
1095
1096 equal = true;
1097 for (i = 0; i < gimple_call_num_args (s1); ++i)
1098 {
1099 t1 = gimple_call_arg (s1, i);
1100 t2 = gimple_call_arg (s2, i);
1101 if (operand_equal_p (t1, t2, 0))
1102 continue;
1103 if (gvn_uses_equal (t1, t2))
1104 continue;
1105 equal = false;
1106 break;
1107 }
1108 if (!equal)
1109 return false;
1110
1111 lhs1 = gimple_get_lhs (s1);
1112 lhs2 = gimple_get_lhs (s2);
1113 if (lhs1 == NULL_TREE && lhs2 == NULL_TREE)
1114 return true;
1115 if (lhs1 == NULL_TREE || lhs2 == NULL_TREE)
1116 return false;
1117 if (TREE_CODE (lhs1) == SSA_NAME && TREE_CODE (lhs2) == SSA_NAME)
1118 return vn_valueize (lhs1) == vn_valueize (lhs2);
1119 return operand_equal_p (lhs1, lhs2, 0);
1120
1121 case GIMPLE_ASSIGN:
1122 lhs1 = gimple_get_lhs (s1);
1123 lhs2 = gimple_get_lhs (s2);
1124 if (gimple_vdef (s1))
1125 {
1126 if (vn_valueize (gimple_vdef (s1)) != vn_valueize (gimple_vdef (s2)))
1127 return false;
1128 if (TREE_CODE (lhs1) != SSA_NAME
1129 && TREE_CODE (lhs2) != SSA_NAME)
1130 return true;
1131 }
1132 return (TREE_CODE (lhs1) == SSA_NAME
1133 && TREE_CODE (lhs2) == SSA_NAME
1134 && vn_valueize (lhs1) == vn_valueize (lhs2));
1135
1136 case GIMPLE_COND:
1137 t1 = gimple_cond_lhs (s1);
1138 t2 = gimple_cond_lhs (s2);
1139 if (!operand_equal_p (t1, t2, 0)
1140 && !gvn_uses_equal (t1, t2))
1141 return false;
1142
1143 t1 = gimple_cond_rhs (s1);
1144 t2 = gimple_cond_rhs (s2);
1145 if (!operand_equal_p (t1, t2, 0)
1146 && !gvn_uses_equal (t1, t2))
1147 return false;
1148
1149 code1 = gimple_expr_code (s1);
1150 code2 = gimple_expr_code (s2);
1151 inv_cond = (bitmap_bit_p (same_succ->inverse, bb1->index)
1152 != bitmap_bit_p (same_succ->inverse, bb2->index));
1153 if (inv_cond)
1154 {
1155 bool honor_nans
1156 = HONOR_NANS (TYPE_MODE (TREE_TYPE (gimple_cond_lhs (s1))));
1157 code2 = invert_tree_comparison (code2, honor_nans);
1158 }
1159 return code1 == code2;
1160
1161 default:
1162 return false;
1163 }
1164 }
1165
1166 /* Let GSI skip backwards over local defs. Return the earliest vuse in VUSE.
1167 Return true in VUSE_ESCAPED if the vuse influenced a SSA_OP_DEF of one of the
1168 processed statements. */
1169
1170 static void
1171 gsi_advance_bw_nondebug_nonlocal (gimple_stmt_iterator *gsi, tree *vuse,
1172 bool *vuse_escaped)
1173 {
1174 gimple stmt;
1175 tree lvuse;
1176
1177 while (true)
1178 {
1179 if (gsi_end_p (*gsi))
1180 return;
1181 stmt = gsi_stmt (*gsi);
1182
1183 lvuse = gimple_vuse (stmt);
1184 if (lvuse != NULL_TREE)
1185 {
1186 *vuse = lvuse;
1187 if (!ZERO_SSA_OPERANDS (stmt, SSA_OP_DEF))
1188 *vuse_escaped = true;
1189 }
1190
1191 if (!stmt_local_def (stmt))
1192 return;
1193 gsi_prev_nondebug (gsi);
1194 }
1195 }
1196
1197 /* Determines whether BB1 and BB2 (members of same_succ) are duplicates. If so,
1198 clusters them. */
1199
1200 static void
1201 find_duplicate (same_succ same_succ, basic_block bb1, basic_block bb2)
1202 {
1203 gimple_stmt_iterator gsi1 = gsi_last_nondebug_bb (bb1);
1204 gimple_stmt_iterator gsi2 = gsi_last_nondebug_bb (bb2);
1205 tree vuse1 = NULL_TREE, vuse2 = NULL_TREE;
1206 bool vuse_escaped = false;
1207
1208 gsi_advance_bw_nondebug_nonlocal (&gsi1, &vuse1, &vuse_escaped);
1209 gsi_advance_bw_nondebug_nonlocal (&gsi2, &vuse2, &vuse_escaped);
1210
1211 while (!gsi_end_p (gsi1) && !gsi_end_p (gsi2))
1212 {
1213 if (!gimple_equal_p (same_succ, gsi_stmt (gsi1), gsi_stmt (gsi2)))
1214 return;
1215
1216 gsi_prev_nondebug (&gsi1);
1217 gsi_prev_nondebug (&gsi2);
1218 gsi_advance_bw_nondebug_nonlocal (&gsi1, &vuse1, &vuse_escaped);
1219 gsi_advance_bw_nondebug_nonlocal (&gsi2, &vuse2, &vuse_escaped);
1220 }
1221
1222 if (!(gsi_end_p (gsi1) && gsi_end_p (gsi2)))
1223 return;
1224
1225 /* If the incoming vuses are not the same, and the vuse escaped into an
1226 SSA_OP_DEF, then merging the 2 blocks will change the value of the def,
1227 which potentially means the semantics of one of the blocks will be changed.
1228 TODO: make this check more precise. */
1229 if (vuse_escaped && vuse1 != vuse2)
1230 return;
1231
1232 if (dump_file)
1233 fprintf (dump_file, "find_duplicates: <bb %d> duplicate of <bb %d>\n",
1234 bb1->index, bb2->index);
1235
1236 set_cluster (bb1, bb2);
1237 }
1238
1239 /* Returns whether for all phis in DEST the phi alternatives for E1 and
1240 E2 are equal. */
1241
1242 static bool
1243 same_phi_alternatives_1 (basic_block dest, edge e1, edge e2)
1244 {
1245 int n1 = e1->dest_idx, n2 = e2->dest_idx;
1246 gimple_stmt_iterator gsi;
1247
1248 for (gsi = gsi_start_phis (dest); !gsi_end_p (gsi); gsi_next (&gsi))
1249 {
1250 gimple phi = gsi_stmt (gsi);
1251 tree lhs = gimple_phi_result (phi);
1252 tree val1 = gimple_phi_arg_def (phi, n1);
1253 tree val2 = gimple_phi_arg_def (phi, n2);
1254
1255 if (!is_gimple_reg (lhs))
1256 continue;
1257
1258 if (operand_equal_for_phi_arg_p (val1, val2))
1259 continue;
1260 if (gvn_uses_equal (val1, val2))
1261 continue;
1262
1263 return false;
1264 }
1265
1266 return true;
1267 }
1268
1269 /* Returns whether for all successors of BB1 and BB2 (members of SAME_SUCC), the
1270 phi alternatives for BB1 and BB2 are equal. */
1271
1272 static bool
1273 same_phi_alternatives (same_succ same_succ, basic_block bb1, basic_block bb2)
1274 {
1275 unsigned int s;
1276 bitmap_iterator bs;
1277 edge e1, e2;
1278 basic_block succ;
1279
1280 EXECUTE_IF_SET_IN_BITMAP (same_succ->succs, 0, s, bs)
1281 {
1282 succ = BASIC_BLOCK (s);
1283 e1 = find_edge (bb1, succ);
1284 e2 = find_edge (bb2, succ);
1285 if (e1->flags & EDGE_COMPLEX
1286 || e2->flags & EDGE_COMPLEX)
1287 return false;
1288
1289 /* For all phis in bb, the phi alternatives for e1 and e2 need to have
1290 the same value. */
1291 if (!same_phi_alternatives_1 (succ, e1, e2))
1292 return false;
1293 }
1294
1295 return true;
1296 }
1297
1298 /* Return true if BB has non-vop phis. */
1299
1300 static bool
1301 bb_has_non_vop_phi (basic_block bb)
1302 {
1303 gimple_seq phis = phi_nodes (bb);
1304 gimple phi;
1305
1306 if (phis == NULL)
1307 return false;
1308
1309 if (!gimple_seq_singleton_p (phis))
1310 return true;
1311
1312 phi = gimple_seq_first_stmt (phis);
1313 return is_gimple_reg (gimple_phi_result (phi));
1314 }
1315
1316 /* Returns true if redirecting the incoming edges of FROM to TO maintains the
1317 invariant that uses in FROM are dominates by their defs. */
1318
1319 static bool
1320 deps_ok_for_redirect_from_bb_to_bb (basic_block from, basic_block to)
1321 {
1322 basic_block cd, dep_bb = BB_DEP_BB (to);
1323 edge_iterator ei;
1324 edge e;
1325 bitmap from_preds = BITMAP_ALLOC (NULL);
1326
1327 if (dep_bb == NULL)
1328 return true;
1329
1330 FOR_EACH_EDGE (e, ei, from->preds)
1331 bitmap_set_bit (from_preds, e->src->index);
1332 cd = nearest_common_dominator_for_set (CDI_DOMINATORS, from_preds);
1333 BITMAP_FREE (from_preds);
1334
1335 return dominated_by_p (CDI_DOMINATORS, dep_bb, cd);
1336 }
1337
1338 /* Returns true if replacing BB1 (or its replacement bb) by BB2 (or its
1339 replacement bb) and vice versa maintains the invariant that uses in the
1340 replacement are dominates by their defs. */
1341
1342 static bool
1343 deps_ok_for_redirect (basic_block bb1, basic_block bb2)
1344 {
1345 if (BB_CLUSTER (bb1) != NULL)
1346 bb1 = BB_CLUSTER (bb1)->rep_bb;
1347
1348 if (BB_CLUSTER (bb2) != NULL)
1349 bb2 = BB_CLUSTER (bb2)->rep_bb;
1350
1351 return (deps_ok_for_redirect_from_bb_to_bb (bb1, bb2)
1352 && deps_ok_for_redirect_from_bb_to_bb (bb2, bb1));
1353 }
1354
1355 /* Within SAME_SUCC->bbs, find clusters of bbs which can be merged. */
1356
1357 static void
1358 find_clusters_1 (same_succ same_succ)
1359 {
1360 basic_block bb1, bb2;
1361 unsigned int i, j;
1362 bitmap_iterator bi, bj;
1363 int nr_comparisons;
1364 int max_comparisons = PARAM_VALUE (PARAM_MAX_TAIL_MERGE_COMPARISONS);
1365
1366 EXECUTE_IF_SET_IN_BITMAP (same_succ->bbs, 0, i, bi)
1367 {
1368 bb1 = BASIC_BLOCK (i);
1369
1370 /* TODO: handle blocks with phi-nodes. We'll have to find corresponding
1371 phi-nodes in bb1 and bb2, with the same alternatives for the same
1372 preds. */
1373 if (bb_has_non_vop_phi (bb1))
1374 continue;
1375
1376 nr_comparisons = 0;
1377 EXECUTE_IF_SET_IN_BITMAP (same_succ->bbs, i + 1, j, bj)
1378 {
1379 bb2 = BASIC_BLOCK (j);
1380
1381 if (bb_has_non_vop_phi (bb2))
1382 continue;
1383
1384 if (BB_CLUSTER (bb1) != NULL && BB_CLUSTER (bb1) == BB_CLUSTER (bb2))
1385 continue;
1386
1387 /* Limit quadratic behaviour. */
1388 nr_comparisons++;
1389 if (nr_comparisons > max_comparisons)
1390 break;
1391
1392 /* This is a conservative dependency check. We could test more
1393 precise for allowed replacement direction. */
1394 if (!deps_ok_for_redirect (bb1, bb2))
1395 continue;
1396
1397 if (!(same_phi_alternatives (same_succ, bb1, bb2)))
1398 continue;
1399
1400 find_duplicate (same_succ, bb1, bb2);
1401 }
1402 }
1403 }
1404
1405 /* Find clusters of bbs which can be merged. */
1406
1407 static void
1408 find_clusters (void)
1409 {
1410 same_succ same;
1411
1412 while (!VEC_empty (same_succ, worklist))
1413 {
1414 same = VEC_pop (same_succ, worklist);
1415 same->in_worklist = false;
1416 if (dump_file && (dump_flags & TDF_DETAILS))
1417 {
1418 fprintf (dump_file, "processing worklist entry\n");
1419 same_succ_print (dump_file, same);
1420 }
1421 find_clusters_1 (same);
1422 }
1423 }
1424
1425 /* Returns the vop phi of BB, if any. */
1426
1427 static gimple
1428 vop_phi (basic_block bb)
1429 {
1430 gimple stmt;
1431 gimple_stmt_iterator gsi;
1432 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1433 {
1434 stmt = gsi_stmt (gsi);
1435 if (is_gimple_reg (gimple_phi_result (stmt)))
1436 continue;
1437 return stmt;
1438 }
1439 return NULL;
1440 }
1441
1442 /* Redirect all edges from BB1 to BB2, removes BB1 and marks it as removed. */
1443
1444 static void
1445 replace_block_by (basic_block bb1, basic_block bb2)
1446 {
1447 edge pred_edge;
1448 unsigned int i;
1449 gimple bb2_phi;
1450
1451 bb2_phi = vop_phi (bb2);
1452
1453 /* Mark the basic block as deleted. */
1454 mark_basic_block_deleted (bb1);
1455
1456 /* Redirect the incoming edges of bb1 to bb2. */
1457 for (i = EDGE_COUNT (bb1->preds); i > 0 ; --i)
1458 {
1459 pred_edge = EDGE_PRED (bb1, i - 1);
1460 pred_edge = redirect_edge_and_branch (pred_edge, bb2);
1461 gcc_assert (pred_edge != NULL);
1462
1463 if (bb2_phi == NULL)
1464 continue;
1465
1466 /* The phi might have run out of capacity when the redirect added an
1467 argument, which means it could have been replaced. Refresh it. */
1468 bb2_phi = vop_phi (bb2);
1469
1470 add_phi_arg (bb2_phi, SSA_NAME_VAR (gimple_phi_result (bb2_phi)),
1471 pred_edge, UNKNOWN_LOCATION);
1472 }
1473
1474 bb2->frequency += bb1->frequency;
1475 if (bb2->frequency > BB_FREQ_MAX)
1476 bb2->frequency = BB_FREQ_MAX;
1477 bb1->frequency = 0;
1478
1479 /* Do updates that use bb1, before deleting bb1. */
1480 release_last_vdef (bb1);
1481 same_succ_flush_bb (bb1);
1482
1483 delete_basic_block (bb1);
1484 }
1485
1486 /* Bbs for which update_debug_stmt need to be called. */
1487
1488 static bitmap update_bbs;
1489
1490 /* For each cluster in all_clusters, merge all cluster->bbs. Returns
1491 number of bbs removed. */
1492
1493 static int
1494 apply_clusters (void)
1495 {
1496 basic_block bb1, bb2;
1497 bb_cluster c;
1498 unsigned int i, j;
1499 bitmap_iterator bj;
1500 int nr_bbs_removed = 0;
1501
1502 for (i = 0; i < VEC_length (bb_cluster, all_clusters); ++i)
1503 {
1504 c = VEC_index (bb_cluster, all_clusters, i);
1505 if (c == NULL)
1506 continue;
1507
1508 bb2 = c->rep_bb;
1509 bitmap_set_bit (update_bbs, bb2->index);
1510
1511 bitmap_clear_bit (c->bbs, bb2->index);
1512 EXECUTE_IF_SET_IN_BITMAP (c->bbs, 0, j, bj)
1513 {
1514 bb1 = BASIC_BLOCK (j);
1515 bitmap_clear_bit (update_bbs, bb1->index);
1516
1517 replace_block_by (bb1, bb2);
1518 nr_bbs_removed++;
1519 }
1520 }
1521
1522 return nr_bbs_removed;
1523 }
1524
1525 /* Resets debug statement STMT if it has uses that are not dominated by their
1526 defs. */
1527
1528 static void
1529 update_debug_stmt (gimple stmt)
1530 {
1531 use_operand_p use_p;
1532 ssa_op_iter oi;
1533 basic_block bbdef, bbuse;
1534 gimple def_stmt;
1535 tree name;
1536
1537 if (!gimple_debug_bind_p (stmt))
1538 return;
1539
1540 bbuse = gimple_bb (stmt);
1541 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, oi, SSA_OP_USE)
1542 {
1543 name = USE_FROM_PTR (use_p);
1544 gcc_assert (TREE_CODE (name) == SSA_NAME);
1545
1546 def_stmt = SSA_NAME_DEF_STMT (name);
1547 gcc_assert (def_stmt != NULL);
1548
1549 bbdef = gimple_bb (def_stmt);
1550 if (bbdef == NULL || bbuse == bbdef
1551 || dominated_by_p (CDI_DOMINATORS, bbuse, bbdef))
1552 continue;
1553
1554 gimple_debug_bind_reset_value (stmt);
1555 update_stmt (stmt);
1556 }
1557 }
1558
1559 /* Resets all debug statements that have uses that are not
1560 dominated by their defs. */
1561
1562 static void
1563 update_debug_stmts (void)
1564 {
1565 basic_block bb;
1566 bitmap_iterator bi;
1567 unsigned int i;
1568
1569 EXECUTE_IF_SET_IN_BITMAP (update_bbs, 0, i, bi)
1570 {
1571 gimple stmt;
1572 gimple_stmt_iterator gsi;
1573
1574 bb = BASIC_BLOCK (i);
1575 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1576 {
1577 stmt = gsi_stmt (gsi);
1578 if (!is_gimple_debug (stmt))
1579 continue;
1580 update_debug_stmt (stmt);
1581 }
1582 }
1583 }
1584
1585 /* Runs tail merge optimization. */
1586
1587 unsigned int
1588 tail_merge_optimize (unsigned int todo)
1589 {
1590 int nr_bbs_removed_total = 0;
1591 int nr_bbs_removed;
1592 bool loop_entered = false;
1593 int iteration_nr = 0;
1594 int max_iterations = PARAM_VALUE (PARAM_MAX_TAIL_MERGE_ITERATIONS);
1595
1596 if (!flag_tree_tail_merge || max_iterations == 0)
1597 return 0;
1598
1599 timevar_push (TV_TREE_TAIL_MERGE);
1600
1601 calculate_dominance_info (CDI_DOMINATORS);
1602 init_worklist ();
1603
1604 while (!VEC_empty (same_succ, worklist))
1605 {
1606 if (!loop_entered)
1607 {
1608 loop_entered = true;
1609 alloc_cluster_vectors ();
1610 update_bbs = BITMAP_ALLOC (NULL);
1611 }
1612 else
1613 reset_cluster_vectors ();
1614
1615 iteration_nr++;
1616 if (dump_file && (dump_flags & TDF_DETAILS))
1617 fprintf (dump_file, "worklist iteration #%d\n", iteration_nr);
1618
1619 find_clusters ();
1620 gcc_assert (VEC_empty (same_succ, worklist));
1621 if (VEC_empty (bb_cluster, all_clusters))
1622 break;
1623
1624 nr_bbs_removed = apply_clusters ();
1625 nr_bbs_removed_total += nr_bbs_removed;
1626 if (nr_bbs_removed == 0)
1627 break;
1628
1629 free_dominance_info (CDI_DOMINATORS);
1630
1631 if (iteration_nr == max_iterations)
1632 break;
1633
1634 calculate_dominance_info (CDI_DOMINATORS);
1635 update_worklist ();
1636 }
1637
1638 if (dump_file && (dump_flags & TDF_DETAILS))
1639 fprintf (dump_file, "htab collision / search: %f\n",
1640 htab_collisions (same_succ_htab));
1641
1642 if (nr_bbs_removed_total > 0)
1643 {
1644 if (MAY_HAVE_DEBUG_STMTS)
1645 {
1646 calculate_dominance_info (CDI_DOMINATORS);
1647 update_debug_stmts ();
1648 }
1649
1650 if (dump_file && (dump_flags & TDF_DETAILS))
1651 {
1652 fprintf (dump_file, "Before TODOs.\n");
1653 dump_function_to_file (current_function_decl, dump_file, dump_flags);
1654 }
1655
1656 todo |= (TODO_verify_ssa | TODO_verify_stmts | TODO_verify_flow);
1657 mark_sym_for_renaming (gimple_vop (cfun));
1658 }
1659
1660 delete_worklist ();
1661 if (loop_entered)
1662 {
1663 delete_cluster_vectors ();
1664 BITMAP_FREE (update_bbs);
1665 }
1666
1667 timevar_pop (TV_TREE_TAIL_MERGE);
1668
1669 return todo;
1670 }