Clean up of new format of -falign-FOO.
[gcc.git] / gcc / tree-ssa-tail-merge.c
1 /* Tail merging for gimple.
2 Copyright (C) 2011-2018 Free Software Foundation, Inc.
3 Contributed by Tom de Vries (tom@codesourcery.com)
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 /* Pass overview.
22
23
24 MOTIVATIONAL EXAMPLE
25
26 gimple representation of gcc/testsuite/gcc.dg/pr43864.c at
27
28 hprofStartupp (charD.1 * outputFileNameD.2600, charD.1 * ctxD.2601)
29 {
30 struct FILED.1638 * fpD.2605;
31 charD.1 fileNameD.2604[1000];
32 intD.0 D.3915;
33 const charD.1 * restrict outputFileName.0D.3914;
34
35 # BLOCK 2 freq:10000
36 # PRED: ENTRY [100.0%] (fallthru,exec)
37 # PT = nonlocal { D.3926 } (restr)
38 outputFileName.0D.3914_3
39 = (const charD.1 * restrict) outputFileNameD.2600_2(D);
40 # .MEMD.3923_13 = VDEF <.MEMD.3923_12(D)>
41 # USE = nonlocal null { fileNameD.2604 D.3926 } (restr)
42 # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr)
43 sprintfD.759 (&fileNameD.2604, outputFileName.0D.3914_3);
44 # .MEMD.3923_14 = VDEF <.MEMD.3923_13>
45 # USE = nonlocal null { fileNameD.2604 D.3926 } (restr)
46 # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr)
47 D.3915_4 = accessD.2606 (&fileNameD.2604, 1);
48 if (D.3915_4 == 0)
49 goto <bb 3>;
50 else
51 goto <bb 4>;
52 # SUCC: 3 [10.0%] (true,exec) 4 [90.0%] (false,exec)
53
54 # BLOCK 3 freq:1000
55 # PRED: 2 [10.0%] (true,exec)
56 # .MEMD.3923_15 = VDEF <.MEMD.3923_14>
57 # USE = nonlocal null { fileNameD.2604 D.3926 } (restr)
58 # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr)
59 freeD.898 (ctxD.2601_5(D));
60 goto <bb 7>;
61 # SUCC: 7 [100.0%] (fallthru,exec)
62
63 # BLOCK 4 freq:9000
64 # PRED: 2 [90.0%] (false,exec)
65 # .MEMD.3923_16 = VDEF <.MEMD.3923_14>
66 # PT = nonlocal escaped
67 # USE = nonlocal null { fileNameD.2604 D.3926 } (restr)
68 # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr)
69 fpD.2605_8 = fopenD.1805 (&fileNameD.2604[0], 0B);
70 if (fpD.2605_8 == 0B)
71 goto <bb 5>;
72 else
73 goto <bb 6>;
74 # SUCC: 5 [1.9%] (true,exec) 6 [98.1%] (false,exec)
75
76 # BLOCK 5 freq:173
77 # PRED: 4 [1.9%] (true,exec)
78 # .MEMD.3923_17 = VDEF <.MEMD.3923_16>
79 # USE = nonlocal null { fileNameD.2604 D.3926 } (restr)
80 # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr)
81 freeD.898 (ctxD.2601_5(D));
82 goto <bb 7>;
83 # SUCC: 7 [100.0%] (fallthru,exec)
84
85 # BLOCK 6 freq:8827
86 # PRED: 4 [98.1%] (false,exec)
87 # .MEMD.3923_18 = VDEF <.MEMD.3923_16>
88 # USE = nonlocal null { fileNameD.2604 D.3926 } (restr)
89 # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr)
90 fooD.2599 (outputFileNameD.2600_2(D), fpD.2605_8);
91 # SUCC: 7 [100.0%] (fallthru,exec)
92
93 # BLOCK 7 freq:10000
94 # PRED: 3 [100.0%] (fallthru,exec) 5 [100.0%] (fallthru,exec)
95 6 [100.0%] (fallthru,exec)
96 # PT = nonlocal null
97
98 # ctxD.2601_1 = PHI <0B(3), 0B(5), ctxD.2601_5(D)(6)>
99 # .MEMD.3923_11 = PHI <.MEMD.3923_15(3), .MEMD.3923_17(5),
100 .MEMD.3923_18(6)>
101 # VUSE <.MEMD.3923_11>
102 return ctxD.2601_1;
103 # SUCC: EXIT [100.0%]
104 }
105
106 bb 3 and bb 5 can be merged. The blocks have different predecessors, but the
107 same successors, and the same operations.
108
109
110 CONTEXT
111
112 A technique called tail merging (or cross jumping) can fix the example
113 above. For a block, we look for common code at the end (the tail) of the
114 predecessor blocks, and insert jumps from one block to the other.
115 The example is a special case for tail merging, in that 2 whole blocks
116 can be merged, rather than just the end parts of it.
117 We currently only focus on whole block merging, so in that sense
118 calling this pass tail merge is a bit of a misnomer.
119
120 We distinguish 2 kinds of situations in which blocks can be merged:
121 - same operations, same predecessors. The successor edges coming from one
122 block are redirected to come from the other block.
123 - same operations, same successors. The predecessor edges entering one block
124 are redirected to enter the other block. Note that this operation might
125 involve introducing phi operations.
126
127 For efficient implementation, we would like to value numbers the blocks, and
128 have a comparison operator that tells us whether the blocks are equal.
129 Besides being runtime efficient, block value numbering should also abstract
130 from irrelevant differences in order of operations, much like normal value
131 numbering abstracts from irrelevant order of operations.
132
133 For the first situation (same_operations, same predecessors), normal value
134 numbering fits well. We can calculate a block value number based on the
135 value numbers of the defs and vdefs.
136
137 For the second situation (same operations, same successors), this approach
138 doesn't work so well. We can illustrate this using the example. The calls
139 to free use different vdefs: MEMD.3923_16 and MEMD.3923_14, and these will
140 remain different in value numbering, since they represent different memory
141 states. So the resulting vdefs of the frees will be different in value
142 numbering, so the block value numbers will be different.
143
144 The reason why we call the blocks equal is not because they define the same
145 values, but because uses in the blocks use (possibly different) defs in the
146 same way. To be able to detect this efficiently, we need to do some kind of
147 reverse value numbering, meaning number the uses rather than the defs, and
148 calculate a block value number based on the value number of the uses.
149 Ideally, a block comparison operator will also indicate which phis are needed
150 to merge the blocks.
151
152 For the moment, we don't do block value numbering, but we do insn-by-insn
153 matching, using scc value numbers to match operations with results, and
154 structural comparison otherwise, while ignoring vop mismatches.
155
156
157 IMPLEMENTATION
158
159 1. The pass first determines all groups of blocks with the same successor
160 blocks.
161 2. Within each group, it tries to determine clusters of equal basic blocks.
162 3. The clusters are applied.
163 4. The same successor groups are updated.
164 5. This process is repeated from 2 onwards, until no more changes.
165
166
167 LIMITATIONS/TODO
168
169 - block only
170 - handles only 'same operations, same successors'.
171 It handles same predecessors as a special subcase though.
172 - does not implement the reverse value numbering and block value numbering.
173 - improve memory allocation: use garbage collected memory, obstacks,
174 allocpools where appropriate.
175 - no insertion of gimple_reg phis, We only introduce vop-phis.
176 - handle blocks with gimple_reg phi_nodes.
177
178
179 PASS PLACEMENT
180 This 'pass' is not a stand-alone gimple pass, but runs as part of
181 pass_pre, in order to share the value numbering.
182
183
184 SWITCHES
185
186 - ftree-tail-merge. On at -O2. We may have to enable it only at -Os. */
187
188 #include "config.h"
189 #include "system.h"
190 #include "coretypes.h"
191 #include "backend.h"
192 #include "tree.h"
193 #include "gimple.h"
194 #include "cfghooks.h"
195 #include "tree-pass.h"
196 #include "ssa.h"
197 #include "fold-const.h"
198 #include "trans-mem.h"
199 #include "cfganal.h"
200 #include "cfgcleanup.h"
201 #include "gimple-iterator.h"
202 #include "tree-cfg.h"
203 #include "tree-into-ssa.h"
204 #include "params.h"
205 #include "tree-ssa-sccvn.h"
206 #include "cfgloop.h"
207 #include "tree-eh.h"
208 #include "tree-cfgcleanup.h"
209
210 const int ignore_edge_flags = EDGE_DFS_BACK | EDGE_EXECUTABLE;
211
212 /* Describes a group of bbs with the same successors. The successor bbs are
213 cached in succs, and the successor edge flags are cached in succ_flags.
214 If a bb has the EDGE_TRUE/FALSE_VALUE flags swapped compared to succ_flags,
215 it's marked in inverse.
216 Additionally, the hash value for the struct is cached in hashval, and
217 in_worklist indicates whether it's currently part of worklist. */
218
219 struct same_succ : pointer_hash <same_succ>
220 {
221 /* The bbs that have the same successor bbs. */
222 bitmap bbs;
223 /* The successor bbs. */
224 bitmap succs;
225 /* Indicates whether the EDGE_TRUE/FALSE_VALUEs of succ_flags are swapped for
226 bb. */
227 bitmap inverse;
228 /* The edge flags for each of the successor bbs. */
229 vec<int> succ_flags;
230 /* Indicates whether the struct is currently in the worklist. */
231 bool in_worklist;
232 /* The hash value of the struct. */
233 hashval_t hashval;
234
235 /* hash_table support. */
236 static inline hashval_t hash (const same_succ *);
237 static int equal (const same_succ *, const same_succ *);
238 static void remove (same_succ *);
239 };
240
241 /* hash routine for hash_table support, returns hashval of E. */
242
243 inline hashval_t
244 same_succ::hash (const same_succ *e)
245 {
246 return e->hashval;
247 }
248
249 /* A group of bbs where 1 bb from bbs can replace the other bbs. */
250
251 struct bb_cluster
252 {
253 /* The bbs in the cluster. */
254 bitmap bbs;
255 /* The preds of the bbs in the cluster. */
256 bitmap preds;
257 /* Index in all_clusters vector. */
258 int index;
259 /* The bb to replace the cluster with. */
260 basic_block rep_bb;
261 };
262
263 /* Per bb-info. */
264
265 struct aux_bb_info
266 {
267 /* The number of non-debug statements in the bb. */
268 int size;
269 /* The same_succ that this bb is a member of. */
270 same_succ *bb_same_succ;
271 /* The cluster that this bb is a member of. */
272 bb_cluster *cluster;
273 /* The vop state at the exit of a bb. This is shortlived data, used to
274 communicate data between update_block_by and update_vuses. */
275 tree vop_at_exit;
276 /* The bb that either contains or is dominated by the dependencies of the
277 bb. */
278 basic_block dep_bb;
279 };
280
281 /* Macros to access the fields of struct aux_bb_info. */
282
283 #define BB_SIZE(bb) (((struct aux_bb_info *)bb->aux)->size)
284 #define BB_SAME_SUCC(bb) (((struct aux_bb_info *)bb->aux)->bb_same_succ)
285 #define BB_CLUSTER(bb) (((struct aux_bb_info *)bb->aux)->cluster)
286 #define BB_VOP_AT_EXIT(bb) (((struct aux_bb_info *)bb->aux)->vop_at_exit)
287 #define BB_DEP_BB(bb) (((struct aux_bb_info *)bb->aux)->dep_bb)
288
289 /* Returns true if the only effect a statement STMT has, is to define locally
290 used SSA_NAMEs. */
291
292 static bool
293 stmt_local_def (gimple *stmt)
294 {
295 basic_block bb, def_bb;
296 imm_use_iterator iter;
297 use_operand_p use_p;
298 tree val;
299 def_operand_p def_p;
300
301 if (gimple_vdef (stmt) != NULL_TREE
302 || gimple_has_side_effects (stmt)
303 || gimple_could_trap_p_1 (stmt, false, false)
304 || gimple_vuse (stmt) != NULL_TREE
305 /* Copied from tree-ssa-ifcombine.c:bb_no_side_effects_p():
306 const calls don't match any of the above, yet they could
307 still have some side-effects - they could contain
308 gimple_could_trap_p statements, like floating point
309 exceptions or integer division by zero. See PR70586.
310 FIXME: perhaps gimple_has_side_effects or gimple_could_trap_p
311 should handle this. */
312 || is_gimple_call (stmt))
313 return false;
314
315 def_p = SINGLE_SSA_DEF_OPERAND (stmt, SSA_OP_DEF);
316 if (def_p == NULL)
317 return false;
318
319 val = DEF_FROM_PTR (def_p);
320 if (val == NULL_TREE || TREE_CODE (val) != SSA_NAME)
321 return false;
322
323 def_bb = gimple_bb (stmt);
324
325 FOR_EACH_IMM_USE_FAST (use_p, iter, val)
326 {
327 if (is_gimple_debug (USE_STMT (use_p)))
328 continue;
329 bb = gimple_bb (USE_STMT (use_p));
330 if (bb == def_bb)
331 continue;
332
333 if (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI
334 && EDGE_PRED (bb, PHI_ARG_INDEX_FROM_USE (use_p))->src == def_bb)
335 continue;
336
337 return false;
338 }
339
340 return true;
341 }
342
343 /* Let GSI skip forwards over local defs. */
344
345 static void
346 gsi_advance_fw_nondebug_nonlocal (gimple_stmt_iterator *gsi)
347 {
348 gimple *stmt;
349
350 while (true)
351 {
352 if (gsi_end_p (*gsi))
353 return;
354 stmt = gsi_stmt (*gsi);
355 if (!stmt_local_def (stmt))
356 return;
357 gsi_next_nondebug (gsi);
358 }
359 }
360
361 /* VAL1 and VAL2 are either:
362 - uses in BB1 and BB2, or
363 - phi alternatives for BB1 and BB2.
364 Return true if the uses have the same gvn value. */
365
366 static bool
367 gvn_uses_equal (tree val1, tree val2)
368 {
369 gcc_checking_assert (val1 != NULL_TREE && val2 != NULL_TREE);
370
371 if (val1 == val2)
372 return true;
373
374 if (vn_valueize (val1) != vn_valueize (val2))
375 return false;
376
377 return ((TREE_CODE (val1) == SSA_NAME || CONSTANT_CLASS_P (val1))
378 && (TREE_CODE (val2) == SSA_NAME || CONSTANT_CLASS_P (val2)));
379 }
380
381 /* Prints E to FILE. */
382
383 static void
384 same_succ_print (FILE *file, const same_succ *e)
385 {
386 unsigned int i;
387 bitmap_print (file, e->bbs, "bbs:", "\n");
388 bitmap_print (file, e->succs, "succs:", "\n");
389 bitmap_print (file, e->inverse, "inverse:", "\n");
390 fprintf (file, "flags:");
391 for (i = 0; i < e->succ_flags.length (); ++i)
392 fprintf (file, " %x", e->succ_flags[i]);
393 fprintf (file, "\n");
394 }
395
396 /* Prints same_succ VE to VFILE. */
397
398 inline int
399 ssa_same_succ_print_traverse (same_succ **pe, FILE *file)
400 {
401 const same_succ *e = *pe;
402 same_succ_print (file, e);
403 return 1;
404 }
405
406 /* Update BB_DEP_BB (USE_BB), given a use of VAL in USE_BB. */
407
408 static void
409 update_dep_bb (basic_block use_bb, tree val)
410 {
411 basic_block dep_bb;
412
413 /* Not a dep. */
414 if (TREE_CODE (val) != SSA_NAME)
415 return;
416
417 /* Skip use of global def. */
418 if (SSA_NAME_IS_DEFAULT_DEF (val))
419 return;
420
421 /* Skip use of local def. */
422 dep_bb = gimple_bb (SSA_NAME_DEF_STMT (val));
423 if (dep_bb == use_bb)
424 return;
425
426 if (BB_DEP_BB (use_bb) == NULL
427 || dominated_by_p (CDI_DOMINATORS, dep_bb, BB_DEP_BB (use_bb)))
428 BB_DEP_BB (use_bb) = dep_bb;
429 }
430
431 /* Update BB_DEP_BB, given the dependencies in STMT. */
432
433 static void
434 stmt_update_dep_bb (gimple *stmt)
435 {
436 ssa_op_iter iter;
437 use_operand_p use;
438
439 FOR_EACH_SSA_USE_OPERAND (use, stmt, iter, SSA_OP_USE)
440 update_dep_bb (gimple_bb (stmt), USE_FROM_PTR (use));
441 }
442
443 /* Calculates hash value for same_succ VE. */
444
445 static hashval_t
446 same_succ_hash (const same_succ *e)
447 {
448 inchash::hash hstate (bitmap_hash (e->succs));
449 int flags;
450 unsigned int i;
451 unsigned int first = bitmap_first_set_bit (e->bbs);
452 basic_block bb = BASIC_BLOCK_FOR_FN (cfun, first);
453 int size = 0;
454 gimple *stmt;
455 tree arg;
456 unsigned int s;
457 bitmap_iterator bs;
458
459 for (gimple_stmt_iterator gsi = gsi_start_nondebug_bb (bb);
460 !gsi_end_p (gsi); gsi_next_nondebug (&gsi))
461 {
462 stmt = gsi_stmt (gsi);
463 stmt_update_dep_bb (stmt);
464 if (stmt_local_def (stmt))
465 continue;
466 size++;
467
468 hstate.add_int (gimple_code (stmt));
469 if (is_gimple_assign (stmt))
470 hstate.add_int (gimple_assign_rhs_code (stmt));
471 if (!is_gimple_call (stmt))
472 continue;
473 if (gimple_call_internal_p (stmt))
474 hstate.add_int (gimple_call_internal_fn (stmt));
475 else
476 {
477 inchash::add_expr (gimple_call_fn (stmt), hstate);
478 if (gimple_call_chain (stmt))
479 inchash::add_expr (gimple_call_chain (stmt), hstate);
480 }
481 for (i = 0; i < gimple_call_num_args (stmt); i++)
482 {
483 arg = gimple_call_arg (stmt, i);
484 arg = vn_valueize (arg);
485 inchash::add_expr (arg, hstate);
486 }
487 }
488
489 hstate.add_int (size);
490 BB_SIZE (bb) = size;
491
492 hstate.add_int (bb->loop_father->num);
493
494 for (i = 0; i < e->succ_flags.length (); ++i)
495 {
496 flags = e->succ_flags[i];
497 flags = flags & ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE);
498 hstate.add_int (flags);
499 }
500
501 EXECUTE_IF_SET_IN_BITMAP (e->succs, 0, s, bs)
502 {
503 int n = find_edge (bb, BASIC_BLOCK_FOR_FN (cfun, s))->dest_idx;
504 for (gphi_iterator gsi = gsi_start_phis (BASIC_BLOCK_FOR_FN (cfun, s));
505 !gsi_end_p (gsi);
506 gsi_next (&gsi))
507 {
508 gphi *phi = gsi.phi ();
509 tree lhs = gimple_phi_result (phi);
510 tree val = gimple_phi_arg_def (phi, n);
511
512 if (virtual_operand_p (lhs))
513 continue;
514 update_dep_bb (bb, val);
515 }
516 }
517
518 return hstate.end ();
519 }
520
521 /* Returns true if E1 and E2 have 2 successors, and if the successor flags
522 are inverse for the EDGE_TRUE_VALUE and EDGE_FALSE_VALUE flags, and equal for
523 the other edge flags. */
524
525 static bool
526 inverse_flags (const same_succ *e1, const same_succ *e2)
527 {
528 int f1a, f1b, f2a, f2b;
529 int mask = ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE);
530
531 if (e1->succ_flags.length () != 2)
532 return false;
533
534 f1a = e1->succ_flags[0];
535 f1b = e1->succ_flags[1];
536 f2a = e2->succ_flags[0];
537 f2b = e2->succ_flags[1];
538
539 if (f1a == f2a && f1b == f2b)
540 return false;
541
542 return (f1a & mask) == (f2a & mask) && (f1b & mask) == (f2b & mask);
543 }
544
545 /* Compares SAME_SUCCs E1 and E2. */
546
547 int
548 same_succ::equal (const same_succ *e1, const same_succ *e2)
549 {
550 unsigned int i, first1, first2;
551 gimple_stmt_iterator gsi1, gsi2;
552 gimple *s1, *s2;
553 basic_block bb1, bb2;
554
555 if (e1 == e2)
556 return 1;
557
558 if (e1->hashval != e2->hashval)
559 return 0;
560
561 if (e1->succ_flags.length () != e2->succ_flags.length ())
562 return 0;
563
564 if (!bitmap_equal_p (e1->succs, e2->succs))
565 return 0;
566
567 if (!inverse_flags (e1, e2))
568 {
569 for (i = 0; i < e1->succ_flags.length (); ++i)
570 if (e1->succ_flags[i] != e2->succ_flags[i])
571 return 0;
572 }
573
574 first1 = bitmap_first_set_bit (e1->bbs);
575 first2 = bitmap_first_set_bit (e2->bbs);
576
577 bb1 = BASIC_BLOCK_FOR_FN (cfun, first1);
578 bb2 = BASIC_BLOCK_FOR_FN (cfun, first2);
579
580 if (BB_SIZE (bb1) != BB_SIZE (bb2))
581 return 0;
582
583 if (bb1->loop_father != bb2->loop_father)
584 return 0;
585
586 gsi1 = gsi_start_nondebug_bb (bb1);
587 gsi2 = gsi_start_nondebug_bb (bb2);
588 gsi_advance_fw_nondebug_nonlocal (&gsi1);
589 gsi_advance_fw_nondebug_nonlocal (&gsi2);
590 while (!(gsi_end_p (gsi1) || gsi_end_p (gsi2)))
591 {
592 s1 = gsi_stmt (gsi1);
593 s2 = gsi_stmt (gsi2);
594 if (gimple_code (s1) != gimple_code (s2))
595 return 0;
596 if (is_gimple_call (s1) && !gimple_call_same_target_p (s1, s2))
597 return 0;
598 gsi_next_nondebug (&gsi1);
599 gsi_next_nondebug (&gsi2);
600 gsi_advance_fw_nondebug_nonlocal (&gsi1);
601 gsi_advance_fw_nondebug_nonlocal (&gsi2);
602 }
603
604 return 1;
605 }
606
607 /* Alloc and init a new SAME_SUCC. */
608
609 static same_succ *
610 same_succ_alloc (void)
611 {
612 same_succ *same = XNEW (struct same_succ);
613
614 same->bbs = BITMAP_ALLOC (NULL);
615 same->succs = BITMAP_ALLOC (NULL);
616 same->inverse = BITMAP_ALLOC (NULL);
617 same->succ_flags.create (10);
618 same->in_worklist = false;
619
620 return same;
621 }
622
623 /* Delete same_succ E. */
624
625 void
626 same_succ::remove (same_succ *e)
627 {
628 BITMAP_FREE (e->bbs);
629 BITMAP_FREE (e->succs);
630 BITMAP_FREE (e->inverse);
631 e->succ_flags.release ();
632
633 XDELETE (e);
634 }
635
636 /* Reset same_succ SAME. */
637
638 static void
639 same_succ_reset (same_succ *same)
640 {
641 bitmap_clear (same->bbs);
642 bitmap_clear (same->succs);
643 bitmap_clear (same->inverse);
644 same->succ_flags.truncate (0);
645 }
646
647 static hash_table<same_succ> *same_succ_htab;
648
649 /* Array that is used to store the edge flags for a successor. */
650
651 static int *same_succ_edge_flags;
652
653 /* Bitmap that is used to mark bbs that are recently deleted. */
654
655 static bitmap deleted_bbs;
656
657 /* Bitmap that is used to mark predecessors of bbs that are
658 deleted. */
659
660 static bitmap deleted_bb_preds;
661
662 /* Prints same_succ_htab to stderr. */
663
664 extern void debug_same_succ (void);
665 DEBUG_FUNCTION void
666 debug_same_succ ( void)
667 {
668 same_succ_htab->traverse <FILE *, ssa_same_succ_print_traverse> (stderr);
669 }
670
671
672 /* Vector of bbs to process. */
673
674 static vec<same_succ *> worklist;
675
676 /* Prints worklist to FILE. */
677
678 static void
679 print_worklist (FILE *file)
680 {
681 unsigned int i;
682 for (i = 0; i < worklist.length (); ++i)
683 same_succ_print (file, worklist[i]);
684 }
685
686 /* Adds SAME to worklist. */
687
688 static void
689 add_to_worklist (same_succ *same)
690 {
691 if (same->in_worklist)
692 return;
693
694 if (bitmap_count_bits (same->bbs) < 2)
695 return;
696
697 same->in_worklist = true;
698 worklist.safe_push (same);
699 }
700
701 /* Add BB to same_succ_htab. */
702
703 static void
704 find_same_succ_bb (basic_block bb, same_succ **same_p)
705 {
706 unsigned int j;
707 bitmap_iterator bj;
708 same_succ *same = *same_p;
709 same_succ **slot;
710 edge_iterator ei;
711 edge e;
712
713 if (bb == NULL)
714 return;
715 bitmap_set_bit (same->bbs, bb->index);
716 FOR_EACH_EDGE (e, ei, bb->succs)
717 {
718 int index = e->dest->index;
719 bitmap_set_bit (same->succs, index);
720 same_succ_edge_flags[index] = (e->flags & ~ignore_edge_flags);
721 }
722 EXECUTE_IF_SET_IN_BITMAP (same->succs, 0, j, bj)
723 same->succ_flags.safe_push (same_succ_edge_flags[j]);
724
725 same->hashval = same_succ_hash (same);
726
727 slot = same_succ_htab->find_slot_with_hash (same, same->hashval, INSERT);
728 if (*slot == NULL)
729 {
730 *slot = same;
731 BB_SAME_SUCC (bb) = same;
732 add_to_worklist (same);
733 *same_p = NULL;
734 }
735 else
736 {
737 bitmap_set_bit ((*slot)->bbs, bb->index);
738 BB_SAME_SUCC (bb) = *slot;
739 add_to_worklist (*slot);
740 if (inverse_flags (same, *slot))
741 bitmap_set_bit ((*slot)->inverse, bb->index);
742 same_succ_reset (same);
743 }
744 }
745
746 /* Find bbs with same successors. */
747
748 static void
749 find_same_succ (void)
750 {
751 same_succ *same = same_succ_alloc ();
752 basic_block bb;
753
754 FOR_EACH_BB_FN (bb, cfun)
755 {
756 find_same_succ_bb (bb, &same);
757 if (same == NULL)
758 same = same_succ_alloc ();
759 }
760
761 same_succ::remove (same);
762 }
763
764 /* Initializes worklist administration. */
765
766 static void
767 init_worklist (void)
768 {
769 alloc_aux_for_blocks (sizeof (struct aux_bb_info));
770 same_succ_htab = new hash_table<same_succ> (n_basic_blocks_for_fn (cfun));
771 same_succ_edge_flags = XCNEWVEC (int, last_basic_block_for_fn (cfun));
772 deleted_bbs = BITMAP_ALLOC (NULL);
773 deleted_bb_preds = BITMAP_ALLOC (NULL);
774 worklist.create (n_basic_blocks_for_fn (cfun));
775 find_same_succ ();
776
777 if (dump_file && (dump_flags & TDF_DETAILS))
778 {
779 fprintf (dump_file, "initial worklist:\n");
780 print_worklist (dump_file);
781 }
782 }
783
784 /* Deletes worklist administration. */
785
786 static void
787 delete_worklist (void)
788 {
789 free_aux_for_blocks ();
790 delete same_succ_htab;
791 same_succ_htab = NULL;
792 XDELETEVEC (same_succ_edge_flags);
793 same_succ_edge_flags = NULL;
794 BITMAP_FREE (deleted_bbs);
795 BITMAP_FREE (deleted_bb_preds);
796 worklist.release ();
797 }
798
799 /* Mark BB as deleted, and mark its predecessors. */
800
801 static void
802 mark_basic_block_deleted (basic_block bb)
803 {
804 edge e;
805 edge_iterator ei;
806
807 bitmap_set_bit (deleted_bbs, bb->index);
808
809 FOR_EACH_EDGE (e, ei, bb->preds)
810 bitmap_set_bit (deleted_bb_preds, e->src->index);
811 }
812
813 /* Removes BB from its corresponding same_succ. */
814
815 static void
816 same_succ_flush_bb (basic_block bb)
817 {
818 same_succ *same = BB_SAME_SUCC (bb);
819 if (! same)
820 return;
821
822 BB_SAME_SUCC (bb) = NULL;
823 if (bitmap_single_bit_set_p (same->bbs))
824 same_succ_htab->remove_elt_with_hash (same, same->hashval);
825 else
826 bitmap_clear_bit (same->bbs, bb->index);
827 }
828
829 /* Removes all bbs in BBS from their corresponding same_succ. */
830
831 static void
832 same_succ_flush_bbs (bitmap bbs)
833 {
834 unsigned int i;
835 bitmap_iterator bi;
836
837 EXECUTE_IF_SET_IN_BITMAP (bbs, 0, i, bi)
838 same_succ_flush_bb (BASIC_BLOCK_FOR_FN (cfun, i));
839 }
840
841 /* Release the last vdef in BB, either normal or phi result. */
842
843 static void
844 release_last_vdef (basic_block bb)
845 {
846 for (gimple_stmt_iterator i = gsi_last_bb (bb); !gsi_end_p (i);
847 gsi_prev_nondebug (&i))
848 {
849 gimple *stmt = gsi_stmt (i);
850 if (gimple_vdef (stmt) == NULL_TREE)
851 continue;
852
853 mark_virtual_operand_for_renaming (gimple_vdef (stmt));
854 return;
855 }
856
857 for (gphi_iterator i = gsi_start_phis (bb); !gsi_end_p (i);
858 gsi_next (&i))
859 {
860 gphi *phi = i.phi ();
861 tree res = gimple_phi_result (phi);
862
863 if (!virtual_operand_p (res))
864 continue;
865
866 mark_virtual_phi_result_for_renaming (phi);
867 return;
868 }
869 }
870
871 /* For deleted_bb_preds, find bbs with same successors. */
872
873 static void
874 update_worklist (void)
875 {
876 unsigned int i;
877 bitmap_iterator bi;
878 basic_block bb;
879 same_succ *same;
880
881 bitmap_and_compl_into (deleted_bb_preds, deleted_bbs);
882 bitmap_clear (deleted_bbs);
883
884 bitmap_clear_bit (deleted_bb_preds, ENTRY_BLOCK);
885 same_succ_flush_bbs (deleted_bb_preds);
886
887 same = same_succ_alloc ();
888 EXECUTE_IF_SET_IN_BITMAP (deleted_bb_preds, 0, i, bi)
889 {
890 bb = BASIC_BLOCK_FOR_FN (cfun, i);
891 gcc_assert (bb != NULL);
892 find_same_succ_bb (bb, &same);
893 if (same == NULL)
894 same = same_succ_alloc ();
895 }
896 same_succ::remove (same);
897 bitmap_clear (deleted_bb_preds);
898 }
899
900 /* Prints cluster C to FILE. */
901
902 static void
903 print_cluster (FILE *file, bb_cluster *c)
904 {
905 if (c == NULL)
906 return;
907 bitmap_print (file, c->bbs, "bbs:", "\n");
908 bitmap_print (file, c->preds, "preds:", "\n");
909 }
910
911 /* Prints cluster C to stderr. */
912
913 extern void debug_cluster (bb_cluster *);
914 DEBUG_FUNCTION void
915 debug_cluster (bb_cluster *c)
916 {
917 print_cluster (stderr, c);
918 }
919
920 /* Update C->rep_bb, given that BB is added to the cluster. */
921
922 static void
923 update_rep_bb (bb_cluster *c, basic_block bb)
924 {
925 /* Initial. */
926 if (c->rep_bb == NULL)
927 {
928 c->rep_bb = bb;
929 return;
930 }
931
932 /* Current needs no deps, keep it. */
933 if (BB_DEP_BB (c->rep_bb) == NULL)
934 return;
935
936 /* Bb needs no deps, change rep_bb. */
937 if (BB_DEP_BB (bb) == NULL)
938 {
939 c->rep_bb = bb;
940 return;
941 }
942
943 /* Bb needs last deps earlier than current, change rep_bb. A potential
944 problem with this, is that the first deps might also be earlier, which
945 would mean we prefer longer lifetimes for the deps. To be able to check
946 for this, we would have to trace BB_FIRST_DEP_BB as well, besides
947 BB_DEP_BB, which is really BB_LAST_DEP_BB.
948 The benefit of choosing the bb with last deps earlier, is that it can
949 potentially be used as replacement for more bbs. */
950 if (dominated_by_p (CDI_DOMINATORS, BB_DEP_BB (c->rep_bb), BB_DEP_BB (bb)))
951 c->rep_bb = bb;
952 }
953
954 /* Add BB to cluster C. Sets BB in C->bbs, and preds of BB in C->preds. */
955
956 static void
957 add_bb_to_cluster (bb_cluster *c, basic_block bb)
958 {
959 edge e;
960 edge_iterator ei;
961
962 bitmap_set_bit (c->bbs, bb->index);
963
964 FOR_EACH_EDGE (e, ei, bb->preds)
965 bitmap_set_bit (c->preds, e->src->index);
966
967 update_rep_bb (c, bb);
968 }
969
970 /* Allocate and init new cluster. */
971
972 static bb_cluster *
973 new_cluster (void)
974 {
975 bb_cluster *c;
976 c = XCNEW (bb_cluster);
977 c->bbs = BITMAP_ALLOC (NULL);
978 c->preds = BITMAP_ALLOC (NULL);
979 c->rep_bb = NULL;
980 return c;
981 }
982
983 /* Delete clusters. */
984
985 static void
986 delete_cluster (bb_cluster *c)
987 {
988 if (c == NULL)
989 return;
990 BITMAP_FREE (c->bbs);
991 BITMAP_FREE (c->preds);
992 XDELETE (c);
993 }
994
995
996 /* Array that contains all clusters. */
997
998 static vec<bb_cluster *> all_clusters;
999
1000 /* Allocate all cluster vectors. */
1001
1002 static void
1003 alloc_cluster_vectors (void)
1004 {
1005 all_clusters.create (n_basic_blocks_for_fn (cfun));
1006 }
1007
1008 /* Reset all cluster vectors. */
1009
1010 static void
1011 reset_cluster_vectors (void)
1012 {
1013 unsigned int i;
1014 basic_block bb;
1015 for (i = 0; i < all_clusters.length (); ++i)
1016 delete_cluster (all_clusters[i]);
1017 all_clusters.truncate (0);
1018 FOR_EACH_BB_FN (bb, cfun)
1019 BB_CLUSTER (bb) = NULL;
1020 }
1021
1022 /* Delete all cluster vectors. */
1023
1024 static void
1025 delete_cluster_vectors (void)
1026 {
1027 unsigned int i;
1028 for (i = 0; i < all_clusters.length (); ++i)
1029 delete_cluster (all_clusters[i]);
1030 all_clusters.release ();
1031 }
1032
1033 /* Merge cluster C2 into C1. */
1034
1035 static void
1036 merge_clusters (bb_cluster *c1, bb_cluster *c2)
1037 {
1038 bitmap_ior_into (c1->bbs, c2->bbs);
1039 bitmap_ior_into (c1->preds, c2->preds);
1040 }
1041
1042 /* Register equivalence of BB1 and BB2 (members of cluster C). Store c in
1043 all_clusters, or merge c with existing cluster. */
1044
1045 static void
1046 set_cluster (basic_block bb1, basic_block bb2)
1047 {
1048 basic_block merge_bb, other_bb;
1049 bb_cluster *merge, *old, *c;
1050
1051 if (BB_CLUSTER (bb1) == NULL && BB_CLUSTER (bb2) == NULL)
1052 {
1053 c = new_cluster ();
1054 add_bb_to_cluster (c, bb1);
1055 add_bb_to_cluster (c, bb2);
1056 BB_CLUSTER (bb1) = c;
1057 BB_CLUSTER (bb2) = c;
1058 c->index = all_clusters.length ();
1059 all_clusters.safe_push (c);
1060 }
1061 else if (BB_CLUSTER (bb1) == NULL || BB_CLUSTER (bb2) == NULL)
1062 {
1063 merge_bb = BB_CLUSTER (bb1) == NULL ? bb2 : bb1;
1064 other_bb = BB_CLUSTER (bb1) == NULL ? bb1 : bb2;
1065 merge = BB_CLUSTER (merge_bb);
1066 add_bb_to_cluster (merge, other_bb);
1067 BB_CLUSTER (other_bb) = merge;
1068 }
1069 else if (BB_CLUSTER (bb1) != BB_CLUSTER (bb2))
1070 {
1071 unsigned int i;
1072 bitmap_iterator bi;
1073
1074 old = BB_CLUSTER (bb2);
1075 merge = BB_CLUSTER (bb1);
1076 merge_clusters (merge, old);
1077 EXECUTE_IF_SET_IN_BITMAP (old->bbs, 0, i, bi)
1078 BB_CLUSTER (BASIC_BLOCK_FOR_FN (cfun, i)) = merge;
1079 all_clusters[old->index] = NULL;
1080 update_rep_bb (merge, old->rep_bb);
1081 delete_cluster (old);
1082 }
1083 else
1084 gcc_unreachable ();
1085 }
1086
1087 /* Return true if gimple operands T1 and T2 have the same value. */
1088
1089 static bool
1090 gimple_operand_equal_value_p (tree t1, tree t2)
1091 {
1092 if (t1 == t2)
1093 return true;
1094
1095 if (t1 == NULL_TREE
1096 || t2 == NULL_TREE)
1097 return false;
1098
1099 if (operand_equal_p (t1, t2, OEP_MATCH_SIDE_EFFECTS))
1100 return true;
1101
1102 return gvn_uses_equal (t1, t2);
1103 }
1104
1105 /* Return true if gimple statements S1 and S2 are equal. Gimple_bb (s1) and
1106 gimple_bb (s2) are members of SAME_SUCC. */
1107
1108 static bool
1109 gimple_equal_p (same_succ *same_succ, gimple *s1, gimple *s2)
1110 {
1111 unsigned int i;
1112 tree lhs1, lhs2;
1113 basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
1114 tree t1, t2;
1115 bool inv_cond;
1116 enum tree_code code1, code2;
1117
1118 if (gimple_code (s1) != gimple_code (s2))
1119 return false;
1120
1121 switch (gimple_code (s1))
1122 {
1123 case GIMPLE_CALL:
1124 if (!gimple_call_same_target_p (s1, s2))
1125 return false;
1126
1127 t1 = gimple_call_chain (s1);
1128 t2 = gimple_call_chain (s2);
1129 if (!gimple_operand_equal_value_p (t1, t2))
1130 return false;
1131
1132 if (gimple_call_num_args (s1) != gimple_call_num_args (s2))
1133 return false;
1134
1135 for (i = 0; i < gimple_call_num_args (s1); ++i)
1136 {
1137 t1 = gimple_call_arg (s1, i);
1138 t2 = gimple_call_arg (s2, i);
1139 if (!gimple_operand_equal_value_p (t1, t2))
1140 return false;
1141 }
1142
1143 lhs1 = gimple_get_lhs (s1);
1144 lhs2 = gimple_get_lhs (s2);
1145 if (lhs1 == NULL_TREE && lhs2 == NULL_TREE)
1146 return true;
1147 if (lhs1 == NULL_TREE || lhs2 == NULL_TREE)
1148 return false;
1149 if (TREE_CODE (lhs1) == SSA_NAME && TREE_CODE (lhs2) == SSA_NAME)
1150 return vn_valueize (lhs1) == vn_valueize (lhs2);
1151 return operand_equal_p (lhs1, lhs2, 0);
1152
1153 case GIMPLE_ASSIGN:
1154 lhs1 = gimple_get_lhs (s1);
1155 lhs2 = gimple_get_lhs (s2);
1156 if (TREE_CODE (lhs1) != SSA_NAME
1157 && TREE_CODE (lhs2) != SSA_NAME)
1158 return (operand_equal_p (lhs1, lhs2, 0)
1159 && gimple_operand_equal_value_p (gimple_assign_rhs1 (s1),
1160 gimple_assign_rhs1 (s2)));
1161 else if (TREE_CODE (lhs1) == SSA_NAME
1162 && TREE_CODE (lhs2) == SSA_NAME)
1163 return operand_equal_p (gimple_assign_rhs1 (s1),
1164 gimple_assign_rhs1 (s2), 0);
1165 return false;
1166
1167 case GIMPLE_COND:
1168 t1 = gimple_cond_lhs (s1);
1169 t2 = gimple_cond_lhs (s2);
1170 if (!gimple_operand_equal_value_p (t1, t2))
1171 return false;
1172
1173 t1 = gimple_cond_rhs (s1);
1174 t2 = gimple_cond_rhs (s2);
1175 if (!gimple_operand_equal_value_p (t1, t2))
1176 return false;
1177
1178 code1 = gimple_expr_code (s1);
1179 code2 = gimple_expr_code (s2);
1180 inv_cond = (bitmap_bit_p (same_succ->inverse, bb1->index)
1181 != bitmap_bit_p (same_succ->inverse, bb2->index));
1182 if (inv_cond)
1183 {
1184 bool honor_nans = HONOR_NANS (t1);
1185 code2 = invert_tree_comparison (code2, honor_nans);
1186 }
1187 return code1 == code2;
1188
1189 default:
1190 return false;
1191 }
1192 }
1193
1194 /* Let GSI skip backwards over local defs. Return the earliest vuse in VUSE.
1195 Return true in VUSE_ESCAPED if the vuse influenced a SSA_OP_DEF of one of the
1196 processed statements. */
1197
1198 static void
1199 gsi_advance_bw_nondebug_nonlocal (gimple_stmt_iterator *gsi, tree *vuse,
1200 bool *vuse_escaped)
1201 {
1202 gimple *stmt;
1203 tree lvuse;
1204
1205 while (true)
1206 {
1207 if (gsi_end_p (*gsi))
1208 return;
1209 stmt = gsi_stmt (*gsi);
1210
1211 lvuse = gimple_vuse (stmt);
1212 if (lvuse != NULL_TREE)
1213 {
1214 *vuse = lvuse;
1215 if (!ZERO_SSA_OPERANDS (stmt, SSA_OP_DEF))
1216 *vuse_escaped = true;
1217 }
1218
1219 if (!stmt_local_def (stmt))
1220 return;
1221 gsi_prev_nondebug (gsi);
1222 }
1223 }
1224
1225 /* Return true if equal (in the sense of gimple_equal_p) statements STMT1 and
1226 STMT2 are allowed to be merged. */
1227
1228 static bool
1229 merge_stmts_p (gimple *stmt1, gimple *stmt2)
1230 {
1231 /* What could be better than this here is to blacklist the bb
1232 containing the stmt, when encountering the stmt f.i. in
1233 same_succ_hash. */
1234 if (is_tm_ending (stmt1))
1235 return false;
1236
1237 /* Verify EH landing pads. */
1238 if (lookup_stmt_eh_lp_fn (cfun, stmt1) != lookup_stmt_eh_lp_fn (cfun, stmt2))
1239 return false;
1240
1241 if (is_gimple_call (stmt1)
1242 && gimple_call_internal_p (stmt1))
1243 switch (gimple_call_internal_fn (stmt1))
1244 {
1245 case IFN_UBSAN_NULL:
1246 case IFN_UBSAN_BOUNDS:
1247 case IFN_UBSAN_VPTR:
1248 case IFN_UBSAN_CHECK_ADD:
1249 case IFN_UBSAN_CHECK_SUB:
1250 case IFN_UBSAN_CHECK_MUL:
1251 case IFN_UBSAN_OBJECT_SIZE:
1252 case IFN_UBSAN_PTR:
1253 case IFN_ASAN_CHECK:
1254 /* For these internal functions, gimple_location is an implicit
1255 parameter, which will be used explicitly after expansion.
1256 Merging these statements may cause confusing line numbers in
1257 sanitizer messages. */
1258 return gimple_location (stmt1) == gimple_location (stmt2);
1259 default:
1260 break;
1261 }
1262
1263 return true;
1264 }
1265
1266 /* Determines whether BB1 and BB2 (members of same_succ) are duplicates. If so,
1267 clusters them. */
1268
1269 static void
1270 find_duplicate (same_succ *same_succ, basic_block bb1, basic_block bb2)
1271 {
1272 gimple_stmt_iterator gsi1 = gsi_last_nondebug_bb (bb1);
1273 gimple_stmt_iterator gsi2 = gsi_last_nondebug_bb (bb2);
1274 tree vuse1 = NULL_TREE, vuse2 = NULL_TREE;
1275 bool vuse_escaped = false;
1276
1277 gsi_advance_bw_nondebug_nonlocal (&gsi1, &vuse1, &vuse_escaped);
1278 gsi_advance_bw_nondebug_nonlocal (&gsi2, &vuse2, &vuse_escaped);
1279
1280 while (!gsi_end_p (gsi1) && !gsi_end_p (gsi2))
1281 {
1282 gimple *stmt1 = gsi_stmt (gsi1);
1283 gimple *stmt2 = gsi_stmt (gsi2);
1284
1285 if (gimple_code (stmt1) == GIMPLE_LABEL
1286 && gimple_code (stmt2) == GIMPLE_LABEL)
1287 break;
1288
1289 if (!gimple_equal_p (same_succ, stmt1, stmt2))
1290 return;
1291
1292 if (!merge_stmts_p (stmt1, stmt2))
1293 return;
1294
1295 gsi_prev_nondebug (&gsi1);
1296 gsi_prev_nondebug (&gsi2);
1297 gsi_advance_bw_nondebug_nonlocal (&gsi1, &vuse1, &vuse_escaped);
1298 gsi_advance_bw_nondebug_nonlocal (&gsi2, &vuse2, &vuse_escaped);
1299 }
1300
1301 while (!gsi_end_p (gsi1) && gimple_code (gsi_stmt (gsi1)) == GIMPLE_LABEL)
1302 {
1303 tree label = gimple_label_label (as_a <glabel *> (gsi_stmt (gsi1)));
1304 if (DECL_NONLOCAL (label) || FORCED_LABEL (label))
1305 return;
1306 gsi_prev (&gsi1);
1307 }
1308 while (!gsi_end_p (gsi2) && gimple_code (gsi_stmt (gsi2)) == GIMPLE_LABEL)
1309 {
1310 tree label = gimple_label_label (as_a <glabel *> (gsi_stmt (gsi2)));
1311 if (DECL_NONLOCAL (label) || FORCED_LABEL (label))
1312 return;
1313 gsi_prev (&gsi2);
1314 }
1315 if (!(gsi_end_p (gsi1) && gsi_end_p (gsi2)))
1316 return;
1317
1318 /* If the incoming vuses are not the same, and the vuse escaped into an
1319 SSA_OP_DEF, then merging the 2 blocks will change the value of the def,
1320 which potentially means the semantics of one of the blocks will be changed.
1321 TODO: make this check more precise. */
1322 if (vuse_escaped && vuse1 != vuse2)
1323 return;
1324
1325 if (dump_file)
1326 fprintf (dump_file, "find_duplicates: <bb %d> duplicate of <bb %d>\n",
1327 bb1->index, bb2->index);
1328
1329 set_cluster (bb1, bb2);
1330 }
1331
1332 /* Returns whether for all phis in DEST the phi alternatives for E1 and
1333 E2 are equal. */
1334
1335 static bool
1336 same_phi_alternatives_1 (basic_block dest, edge e1, edge e2)
1337 {
1338 int n1 = e1->dest_idx, n2 = e2->dest_idx;
1339 gphi_iterator gsi;
1340
1341 for (gsi = gsi_start_phis (dest); !gsi_end_p (gsi); gsi_next (&gsi))
1342 {
1343 gphi *phi = gsi.phi ();
1344 tree lhs = gimple_phi_result (phi);
1345 tree val1 = gimple_phi_arg_def (phi, n1);
1346 tree val2 = gimple_phi_arg_def (phi, n2);
1347
1348 if (virtual_operand_p (lhs))
1349 continue;
1350
1351 if (operand_equal_for_phi_arg_p (val1, val2))
1352 continue;
1353 if (gvn_uses_equal (val1, val2))
1354 continue;
1355
1356 return false;
1357 }
1358
1359 return true;
1360 }
1361
1362 /* Returns whether for all successors of BB1 and BB2 (members of SAME_SUCC), the
1363 phi alternatives for BB1 and BB2 are equal. */
1364
1365 static bool
1366 same_phi_alternatives (same_succ *same_succ, basic_block bb1, basic_block bb2)
1367 {
1368 unsigned int s;
1369 bitmap_iterator bs;
1370 edge e1, e2;
1371 basic_block succ;
1372
1373 EXECUTE_IF_SET_IN_BITMAP (same_succ->succs, 0, s, bs)
1374 {
1375 succ = BASIC_BLOCK_FOR_FN (cfun, s);
1376 e1 = find_edge (bb1, succ);
1377 e2 = find_edge (bb2, succ);
1378 if (e1->flags & EDGE_COMPLEX
1379 || e2->flags & EDGE_COMPLEX)
1380 return false;
1381
1382 /* For all phis in bb, the phi alternatives for e1 and e2 need to have
1383 the same value. */
1384 if (!same_phi_alternatives_1 (succ, e1, e2))
1385 return false;
1386 }
1387
1388 return true;
1389 }
1390
1391 /* Return true if BB has non-vop phis. */
1392
1393 static bool
1394 bb_has_non_vop_phi (basic_block bb)
1395 {
1396 gimple_seq phis = phi_nodes (bb);
1397 gimple *phi;
1398
1399 if (phis == NULL)
1400 return false;
1401
1402 if (!gimple_seq_singleton_p (phis))
1403 return true;
1404
1405 phi = gimple_seq_first_stmt (phis);
1406 return !virtual_operand_p (gimple_phi_result (phi));
1407 }
1408
1409 /* Returns true if redirecting the incoming edges of FROM to TO maintains the
1410 invariant that uses in FROM are dominates by their defs. */
1411
1412 static bool
1413 deps_ok_for_redirect_from_bb_to_bb (basic_block from, basic_block to)
1414 {
1415 basic_block cd, dep_bb = BB_DEP_BB (to);
1416 edge_iterator ei;
1417 edge e;
1418
1419 if (dep_bb == NULL)
1420 return true;
1421
1422 bitmap from_preds = BITMAP_ALLOC (NULL);
1423 FOR_EACH_EDGE (e, ei, from->preds)
1424 bitmap_set_bit (from_preds, e->src->index);
1425 cd = nearest_common_dominator_for_set (CDI_DOMINATORS, from_preds);
1426 BITMAP_FREE (from_preds);
1427
1428 return dominated_by_p (CDI_DOMINATORS, dep_bb, cd);
1429 }
1430
1431 /* Returns true if replacing BB1 (or its replacement bb) by BB2 (or its
1432 replacement bb) and vice versa maintains the invariant that uses in the
1433 replacement are dominates by their defs. */
1434
1435 static bool
1436 deps_ok_for_redirect (basic_block bb1, basic_block bb2)
1437 {
1438 if (BB_CLUSTER (bb1) != NULL)
1439 bb1 = BB_CLUSTER (bb1)->rep_bb;
1440
1441 if (BB_CLUSTER (bb2) != NULL)
1442 bb2 = BB_CLUSTER (bb2)->rep_bb;
1443
1444 return (deps_ok_for_redirect_from_bb_to_bb (bb1, bb2)
1445 && deps_ok_for_redirect_from_bb_to_bb (bb2, bb1));
1446 }
1447
1448 /* Within SAME_SUCC->bbs, find clusters of bbs which can be merged. */
1449
1450 static void
1451 find_clusters_1 (same_succ *same_succ)
1452 {
1453 basic_block bb1, bb2;
1454 unsigned int i, j;
1455 bitmap_iterator bi, bj;
1456 int nr_comparisons;
1457 int max_comparisons = PARAM_VALUE (PARAM_MAX_TAIL_MERGE_COMPARISONS);
1458
1459 EXECUTE_IF_SET_IN_BITMAP (same_succ->bbs, 0, i, bi)
1460 {
1461 bb1 = BASIC_BLOCK_FOR_FN (cfun, i);
1462
1463 /* TODO: handle blocks with phi-nodes. We'll have to find corresponding
1464 phi-nodes in bb1 and bb2, with the same alternatives for the same
1465 preds. */
1466 if (bb_has_non_vop_phi (bb1) || bb_has_eh_pred (bb1)
1467 || bb_has_abnormal_pred (bb1))
1468 continue;
1469
1470 nr_comparisons = 0;
1471 EXECUTE_IF_SET_IN_BITMAP (same_succ->bbs, i + 1, j, bj)
1472 {
1473 bb2 = BASIC_BLOCK_FOR_FN (cfun, j);
1474
1475 if (bb_has_non_vop_phi (bb2) || bb_has_eh_pred (bb2)
1476 || bb_has_abnormal_pred (bb2))
1477 continue;
1478
1479 if (BB_CLUSTER (bb1) != NULL && BB_CLUSTER (bb1) == BB_CLUSTER (bb2))
1480 continue;
1481
1482 /* Limit quadratic behavior. */
1483 nr_comparisons++;
1484 if (nr_comparisons > max_comparisons)
1485 break;
1486
1487 /* This is a conservative dependency check. We could test more
1488 precise for allowed replacement direction. */
1489 if (!deps_ok_for_redirect (bb1, bb2))
1490 continue;
1491
1492 if (!(same_phi_alternatives (same_succ, bb1, bb2)))
1493 continue;
1494
1495 find_duplicate (same_succ, bb1, bb2);
1496 }
1497 }
1498 }
1499
1500 /* Find clusters of bbs which can be merged. */
1501
1502 static void
1503 find_clusters (void)
1504 {
1505 same_succ *same;
1506
1507 while (!worklist.is_empty ())
1508 {
1509 same = worklist.pop ();
1510 same->in_worklist = false;
1511 if (dump_file && (dump_flags & TDF_DETAILS))
1512 {
1513 fprintf (dump_file, "processing worklist entry\n");
1514 same_succ_print (dump_file, same);
1515 }
1516 find_clusters_1 (same);
1517 }
1518 }
1519
1520 /* Returns the vop phi of BB, if any. */
1521
1522 static gphi *
1523 vop_phi (basic_block bb)
1524 {
1525 gphi *stmt;
1526 gphi_iterator gsi;
1527 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1528 {
1529 stmt = gsi.phi ();
1530 if (! virtual_operand_p (gimple_phi_result (stmt)))
1531 continue;
1532 return stmt;
1533 }
1534 return NULL;
1535 }
1536
1537 /* Redirect all edges from BB1 to BB2, removes BB1 and marks it as removed. */
1538
1539 static void
1540 replace_block_by (basic_block bb1, basic_block bb2)
1541 {
1542 edge pred_edge;
1543 unsigned int i;
1544 gphi *bb2_phi;
1545
1546 bb2_phi = vop_phi (bb2);
1547
1548 /* Mark the basic block as deleted. */
1549 mark_basic_block_deleted (bb1);
1550
1551 /* Redirect the incoming edges of bb1 to bb2. */
1552 for (i = EDGE_COUNT (bb1->preds); i > 0 ; --i)
1553 {
1554 pred_edge = EDGE_PRED (bb1, i - 1);
1555 pred_edge = redirect_edge_and_branch (pred_edge, bb2);
1556 gcc_assert (pred_edge != NULL);
1557
1558 if (bb2_phi == NULL)
1559 continue;
1560
1561 /* The phi might have run out of capacity when the redirect added an
1562 argument, which means it could have been replaced. Refresh it. */
1563 bb2_phi = vop_phi (bb2);
1564
1565 add_phi_arg (bb2_phi, SSA_NAME_VAR (gimple_phi_result (bb2_phi)),
1566 pred_edge, UNKNOWN_LOCATION);
1567 }
1568
1569
1570 /* Merge the outgoing edge counts from bb1 onto bb2. */
1571 edge e1, e2;
1572 edge_iterator ei;
1573
1574 if (bb2->count.initialized_p ())
1575 FOR_EACH_EDGE (e1, ei, bb1->succs)
1576 {
1577 e2 = find_edge (bb2, e1->dest);
1578 gcc_assert (e2);
1579
1580 /* If probabilities are same, we are done.
1581 If counts are nonzero we can distribute accordingly. In remaining
1582 cases just avreage the values and hope for the best. */
1583 e2->probability = e1->probability.combine_with_count
1584 (bb1->count, e2->probability, bb2->count);
1585 }
1586 bb2->count += bb1->count;
1587
1588 /* Move over any user labels from bb1 after the bb2 labels. */
1589 gimple_stmt_iterator gsi1 = gsi_start_bb (bb1);
1590 if (!gsi_end_p (gsi1) && gimple_code (gsi_stmt (gsi1)) == GIMPLE_LABEL)
1591 {
1592 gimple_stmt_iterator gsi2 = gsi_after_labels (bb2);
1593 while (!gsi_end_p (gsi1)
1594 && gimple_code (gsi_stmt (gsi1)) == GIMPLE_LABEL)
1595 {
1596 tree label = gimple_label_label (as_a <glabel *> (gsi_stmt (gsi1)));
1597 gcc_assert (!DECL_NONLOCAL (label) && !FORCED_LABEL (label));
1598 if (DECL_ARTIFICIAL (label))
1599 gsi_next (&gsi1);
1600 else
1601 gsi_move_before (&gsi1, &gsi2);
1602 }
1603 }
1604
1605 /* Clear range info from all stmts in BB2 -- this transformation
1606 could make them out of date. */
1607 reset_flow_sensitive_info_in_bb (bb2);
1608
1609 /* Do updates that use bb1, before deleting bb1. */
1610 release_last_vdef (bb1);
1611 same_succ_flush_bb (bb1);
1612
1613 delete_basic_block (bb1);
1614 }
1615
1616 /* Bbs for which update_debug_stmt need to be called. */
1617
1618 static bitmap update_bbs;
1619
1620 /* For each cluster in all_clusters, merge all cluster->bbs. Returns
1621 number of bbs removed. */
1622
1623 static int
1624 apply_clusters (void)
1625 {
1626 basic_block bb1, bb2;
1627 bb_cluster *c;
1628 unsigned int i, j;
1629 bitmap_iterator bj;
1630 int nr_bbs_removed = 0;
1631
1632 for (i = 0; i < all_clusters.length (); ++i)
1633 {
1634 c = all_clusters[i];
1635 if (c == NULL)
1636 continue;
1637
1638 bb2 = c->rep_bb;
1639 bitmap_set_bit (update_bbs, bb2->index);
1640
1641 bitmap_clear_bit (c->bbs, bb2->index);
1642 EXECUTE_IF_SET_IN_BITMAP (c->bbs, 0, j, bj)
1643 {
1644 bb1 = BASIC_BLOCK_FOR_FN (cfun, j);
1645 bitmap_clear_bit (update_bbs, bb1->index);
1646
1647 replace_block_by (bb1, bb2);
1648 nr_bbs_removed++;
1649 }
1650 }
1651
1652 return nr_bbs_removed;
1653 }
1654
1655 /* Resets debug statement STMT if it has uses that are not dominated by their
1656 defs. */
1657
1658 static void
1659 update_debug_stmt (gimple *stmt)
1660 {
1661 use_operand_p use_p;
1662 ssa_op_iter oi;
1663 basic_block bbuse;
1664
1665 if (!gimple_debug_bind_p (stmt))
1666 return;
1667
1668 bbuse = gimple_bb (stmt);
1669 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, oi, SSA_OP_USE)
1670 {
1671 tree name = USE_FROM_PTR (use_p);
1672 gimple *def_stmt = SSA_NAME_DEF_STMT (name);
1673 basic_block bbdef = gimple_bb (def_stmt);
1674 if (bbdef == NULL || bbuse == bbdef
1675 || dominated_by_p (CDI_DOMINATORS, bbuse, bbdef))
1676 continue;
1677
1678 gimple_debug_bind_reset_value (stmt);
1679 update_stmt (stmt);
1680 break;
1681 }
1682 }
1683
1684 /* Resets all debug statements that have uses that are not
1685 dominated by their defs. */
1686
1687 static void
1688 update_debug_stmts (void)
1689 {
1690 basic_block bb;
1691 bitmap_iterator bi;
1692 unsigned int i;
1693
1694 EXECUTE_IF_SET_IN_BITMAP (update_bbs, 0, i, bi)
1695 {
1696 gimple *stmt;
1697 gimple_stmt_iterator gsi;
1698
1699 bb = BASIC_BLOCK_FOR_FN (cfun, i);
1700 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1701 {
1702 stmt = gsi_stmt (gsi);
1703 if (!is_gimple_debug (stmt))
1704 continue;
1705 update_debug_stmt (stmt);
1706 }
1707 }
1708 }
1709
1710 /* Runs tail merge optimization. */
1711
1712 unsigned int
1713 tail_merge_optimize (unsigned int todo)
1714 {
1715 int nr_bbs_removed_total = 0;
1716 int nr_bbs_removed;
1717 bool loop_entered = false;
1718 int iteration_nr = 0;
1719 int max_iterations = PARAM_VALUE (PARAM_MAX_TAIL_MERGE_ITERATIONS);
1720
1721 if (!flag_tree_tail_merge
1722 || max_iterations == 0)
1723 return 0;
1724
1725 timevar_push (TV_TREE_TAIL_MERGE);
1726
1727 /* We enter from PRE which has critical edges split. Elimination
1728 does not process trivially dead code so cleanup the CFG if we
1729 are told so. And re-split critical edges then. */
1730 if (todo & TODO_cleanup_cfg)
1731 {
1732 cleanup_tree_cfg ();
1733 todo &= ~TODO_cleanup_cfg;
1734 split_critical_edges ();
1735 }
1736
1737 if (!dom_info_available_p (CDI_DOMINATORS))
1738 {
1739 /* PRE can leave us with unreachable blocks, remove them now. */
1740 delete_unreachable_blocks ();
1741 calculate_dominance_info (CDI_DOMINATORS);
1742 }
1743 init_worklist ();
1744
1745 while (!worklist.is_empty ())
1746 {
1747 if (!loop_entered)
1748 {
1749 loop_entered = true;
1750 alloc_cluster_vectors ();
1751 update_bbs = BITMAP_ALLOC (NULL);
1752 }
1753 else
1754 reset_cluster_vectors ();
1755
1756 iteration_nr++;
1757 if (dump_file && (dump_flags & TDF_DETAILS))
1758 fprintf (dump_file, "worklist iteration #%d\n", iteration_nr);
1759
1760 find_clusters ();
1761 gcc_assert (worklist.is_empty ());
1762 if (all_clusters.is_empty ())
1763 break;
1764
1765 nr_bbs_removed = apply_clusters ();
1766 nr_bbs_removed_total += nr_bbs_removed;
1767 if (nr_bbs_removed == 0)
1768 break;
1769
1770 free_dominance_info (CDI_DOMINATORS);
1771
1772 if (iteration_nr == max_iterations)
1773 break;
1774
1775 calculate_dominance_info (CDI_DOMINATORS);
1776 update_worklist ();
1777 }
1778
1779 if (dump_file && (dump_flags & TDF_DETAILS))
1780 fprintf (dump_file, "htab collision / search: %f\n",
1781 same_succ_htab->collisions ());
1782
1783 if (nr_bbs_removed_total > 0)
1784 {
1785 if (MAY_HAVE_DEBUG_BIND_STMTS)
1786 {
1787 calculate_dominance_info (CDI_DOMINATORS);
1788 update_debug_stmts ();
1789 }
1790
1791 if (dump_file && (dump_flags & TDF_DETAILS))
1792 {
1793 fprintf (dump_file, "Before TODOs.\n");
1794 dump_function_to_file (current_function_decl, dump_file, dump_flags);
1795 }
1796
1797 mark_virtual_operands_for_renaming (cfun);
1798 }
1799
1800 delete_worklist ();
1801 if (loop_entered)
1802 {
1803 delete_cluster_vectors ();
1804 BITMAP_FREE (update_bbs);
1805 }
1806
1807 timevar_pop (TV_TREE_TAIL_MERGE);
1808
1809 return todo;
1810 }