Daily bump.
[gcc.git] / gcc / gimple-ssa-split-paths.c
1 /* Support routines for Splitting Paths to loop backedges
2 Copyright (C) 2015-2020 Free Software Foundation, Inc.
3 Contributed by Ajit Kumar Agarwal <ajitkum@xilinx.com>.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "tree.h"
26 #include "gimple.h"
27 #include "tree-pass.h"
28 #include "tree-cfg.h"
29 #include "cfganal.h"
30 #include "cfgloop.h"
31 #include "gimple-iterator.h"
32 #include "tracer.h"
33 #include "predict.h"
34 #include "gimple-ssa.h"
35 #include "tree-phinodes.h"
36 #include "ssa-iterators.h"
37
38 /* Given LATCH, the latch block in a loop, see if the shape of the
39 path reaching LATCH is suitable for being split by duplication.
40 If so, return the block that will be duplicated into its predecessor
41 paths. Else return NULL. */
42
43 static basic_block
44 find_block_to_duplicate_for_splitting_paths (basic_block latch)
45 {
46 /* We should have simple latches at this point. So the latch should
47 have a single successor. This implies the predecessor of the latch
48 likely has the loop exit. And it's that predecessor we're most
49 interested in. To keep things simple, we're going to require that
50 the latch have a single predecessor too. */
51 if (single_succ_p (latch) && single_pred_p (latch))
52 {
53 basic_block bb = get_immediate_dominator (CDI_DOMINATORS, latch);
54 gcc_assert (single_pred_edge (latch)->src == bb);
55
56 /* If BB has been marked as not to be duplicated, then honor that
57 request. */
58 if (ignore_bb_p (bb))
59 return NULL;
60
61 gimple *last = gsi_stmt (gsi_last_nondebug_bb (bb));
62 /* The immediate dominator of the latch must end in a conditional. */
63 if (!last || gimple_code (last) != GIMPLE_COND)
64 return NULL;
65
66 /* We're hoping that BB is a join point for an IF-THEN-ELSE diamond
67 region. Verify that it is.
68
69 First, verify that BB has two predecessors (each arm of the
70 IF-THEN-ELSE) and two successors (the latch and exit). */
71 if (EDGE_COUNT (bb->preds) == 2 && EDGE_COUNT (bb->succs) == 2)
72 {
73 /* Now verify that BB's immediate dominator ends in a
74 conditional as well. */
75 basic_block bb_idom = get_immediate_dominator (CDI_DOMINATORS, bb);
76 gimple *last = gsi_stmt (gsi_last_nondebug_bb (bb_idom));
77 if (!last || gimple_code (last) != GIMPLE_COND)
78 return NULL;
79
80 /* And that BB's immediate dominator's successors are the
81 predecessors of BB or BB itself. */
82 if (!(EDGE_PRED (bb, 0)->src == bb_idom
83 || find_edge (bb_idom, EDGE_PRED (bb, 0)->src))
84 || !(EDGE_PRED (bb, 1)->src == bb_idom
85 || find_edge (bb_idom, EDGE_PRED (bb, 1)->src)))
86 return NULL;
87
88 /* And that the predecessors of BB each have a single successor
89 or are BB's immediate domiator itself. */
90 if (!(EDGE_PRED (bb, 0)->src == bb_idom
91 || single_succ_p (EDGE_PRED (bb, 0)->src))
92 || !(EDGE_PRED (bb, 1)->src == bb_idom
93 || single_succ_p (EDGE_PRED (bb, 1)->src)))
94 return NULL;
95
96 /* So at this point we have a simple diamond for an IF-THEN-ELSE
97 construct starting at BB_IDOM, with a join point at BB. BB
98 pass control outside the loop or to the loop latch.
99
100 We're going to want to create two duplicates of BB, one for
101 each successor of BB_IDOM. */
102 return bb;
103 }
104 }
105 return NULL;
106 }
107
108 /* Return the number of non-debug statements in a block. */
109 static unsigned int
110 count_stmts_in_block (basic_block bb)
111 {
112 gimple_stmt_iterator gsi;
113 unsigned int num_stmts = 0;
114
115 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
116 {
117 gimple *stmt = gsi_stmt (gsi);
118 if (!is_gimple_debug (stmt))
119 num_stmts++;
120 }
121 return num_stmts;
122 }
123
124 /* Return TRUE if CODE represents a tree code that is not likely to
125 be easily if-convertable because it likely expands into multiple
126 insns, FALSE otherwise. */
127 static bool
128 poor_ifcvt_candidate_code (enum tree_code code)
129 {
130 return (code == MIN_EXPR
131 || code == MAX_EXPR
132 || code == ABS_EXPR
133 || code == COND_EXPR
134 || code == CALL_EXPR);
135 }
136
137 /* Return TRUE if BB is a reasonable block to duplicate by examining
138 its size, false otherwise. BB will always be a loop latch block.
139
140 Things to consider:
141
142 We do not want to spoil if-conversion if at all possible.
143
144 Most of the benefit seems to be from eliminating the unconditional
145 jump rather than CSE/DCE opportunities. So favor duplicating
146 small latches. A latch with just a conditional branch is ideal.
147
148 CSE/DCE opportunties crop up when statements from the predecessors
149 feed statements in the latch and allow statements in the latch to
150 simplify. */
151
152 static bool
153 is_feasible_trace (basic_block bb)
154 {
155 basic_block pred1 = EDGE_PRED (bb, 0)->src;
156 basic_block pred2 = EDGE_PRED (bb, 1)->src;
157 int num_stmts_in_join = count_stmts_in_block (bb);
158 int num_stmts_in_pred1
159 = EDGE_COUNT (pred1->succs) == 1 ? count_stmts_in_block (pred1) : 0;
160 int num_stmts_in_pred2
161 = EDGE_COUNT (pred2->succs) == 1 ? count_stmts_in_block (pred2) : 0;
162
163 /* This is meant to catch cases that are likely opportunities for
164 if-conversion. Essentially we look for the case where
165 BB's predecessors are both single statement blocks where
166 the output of that statement feed the same PHI in BB. */
167 if (num_stmts_in_pred1 == 1 && num_stmts_in_pred2 == 1)
168 {
169 gimple *stmt1 = last_and_only_stmt (pred1);
170 gimple *stmt2 = last_and_only_stmt (pred2);
171
172 if (stmt1 && stmt2
173 && gimple_code (stmt1) == GIMPLE_ASSIGN
174 && gimple_code (stmt2) == GIMPLE_ASSIGN)
175 {
176 enum tree_code code1 = gimple_assign_rhs_code (stmt1);
177 enum tree_code code2 = gimple_assign_rhs_code (stmt2);
178
179 if (!poor_ifcvt_candidate_code (code1)
180 && !poor_ifcvt_candidate_code (code2))
181 {
182 tree lhs1 = gimple_assign_lhs (stmt1);
183 tree lhs2 = gimple_assign_lhs (stmt2);
184 gimple_stmt_iterator gsi;
185 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
186 {
187 gimple *phi = gsi_stmt (gsi);
188 if ((gimple_phi_arg_def (phi, 0) == lhs1
189 && gimple_phi_arg_def (phi, 1) == lhs2)
190 || (gimple_phi_arg_def (phi, 1) == lhs1
191 && gimple_phi_arg_def (phi, 0) == lhs2))
192 {
193 if (dump_file && (dump_flags & TDF_DETAILS))
194 fprintf (dump_file,
195 "Block %d appears to be a join point for "
196 "if-convertable diamond.\n",
197 bb->index);
198 return false;
199 }
200 }
201 }
202 }
203 }
204
205 /* Canonicalize the form. */
206 if (num_stmts_in_pred1 == 0 && num_stmts_in_pred2 == 1)
207 {
208 std::swap (pred1, pred2);
209 std::swap (num_stmts_in_pred1, num_stmts_in_pred2);
210 }
211
212 /* Another variant. This one is half-diamond. */
213 if (num_stmts_in_pred1 == 1 && num_stmts_in_pred2 == 0
214 && dominated_by_p (CDI_DOMINATORS, pred1, pred2))
215 {
216 gimple *stmt1 = last_and_only_stmt (pred1);
217
218 /* The only statement in PRED1 must be an assignment that is
219 not a good candidate for if-conversion. This may need some
220 generalization. */
221 if (stmt1 && gimple_code (stmt1) == GIMPLE_ASSIGN)
222 {
223 enum tree_code code1 = gimple_assign_rhs_code (stmt1);
224
225 if (!poor_ifcvt_candidate_code (code1))
226 {
227 tree lhs1 = gimple_assign_lhs (stmt1);
228 tree rhs1 = gimple_assign_rhs1 (stmt1);
229
230 gimple_stmt_iterator gsi;
231 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
232 {
233 gimple *phi = gsi_stmt (gsi);
234 if ((gimple_phi_arg_def (phi, 0) == lhs1
235 && gimple_phi_arg_def (phi, 1) == rhs1)
236 || (gimple_phi_arg_def (phi, 1) == lhs1
237 && gimple_phi_arg_def (phi, 0) == rhs1))
238 {
239 if (dump_file && (dump_flags & TDF_DETAILS))
240 fprintf (dump_file,
241 "Block %d appears to be a join point for "
242 "if-convertable half-diamond.\n",
243 bb->index);
244 return false;
245 }
246 }
247 }
248 }
249 }
250
251 /* If the joiner has no PHIs with useful uses there is zero chance
252 of CSE/DCE/jump-threading possibilities exposed by duplicating it. */
253 bool found_useful_phi = false;
254 for (gphi_iterator si = gsi_start_phis (bb); ! gsi_end_p (si);
255 gsi_next (&si))
256 {
257 gphi *phi = si.phi ();
258 use_operand_p use_p;
259 imm_use_iterator iter;
260 FOR_EACH_IMM_USE_FAST (use_p, iter, gimple_phi_result (phi))
261 {
262 gimple *stmt = USE_STMT (use_p);
263 if (is_gimple_debug (stmt))
264 continue;
265 /* If there's a use in the joiner this might be a CSE/DCE
266 opportunity, but not if the use is in a conditional
267 which makes this a likely if-conversion candidate. */
268 if (gimple_bb (stmt) == bb
269 && (!is_gimple_assign (stmt)
270 || (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
271 != tcc_comparison)))
272 {
273 found_useful_phi = true;
274 break;
275 }
276 /* If the use is on a loop header PHI and on one path the
277 value is unchanged this might expose a jump threading
278 opportunity. */
279 if (gimple_code (stmt) == GIMPLE_PHI
280 && gimple_bb (stmt) == bb->loop_father->header
281 /* But for memory the PHI alone isn't good enough. */
282 && ! virtual_operand_p (gimple_phi_result (stmt)))
283 {
284 bool found_unchanged_path = false;
285 for (unsigned i = 0; i < gimple_phi_num_args (phi); ++i)
286 if (gimple_phi_arg_def (phi, i) == gimple_phi_result (stmt))
287 {
288 found_unchanged_path = true;
289 break;
290 }
291 /* If we found an unchanged path this can only be a threading
292 opportunity if we have uses of the loop header PHI result
293 in a stmt dominating the merge block. Otherwise the
294 splitting may prevent if-conversion. */
295 if (found_unchanged_path)
296 {
297 use_operand_p use2_p;
298 imm_use_iterator iter2;
299 FOR_EACH_IMM_USE_FAST (use2_p, iter2, gimple_phi_result (stmt))
300 {
301 gimple *use_stmt = USE_STMT (use2_p);
302 if (is_gimple_debug (use_stmt))
303 continue;
304 basic_block use_bb = gimple_bb (use_stmt);
305 if (use_bb != bb
306 && dominated_by_p (CDI_DOMINATORS, bb, use_bb))
307 {
308 if (gcond *cond = dyn_cast <gcond *> (use_stmt))
309 if (gimple_cond_code (cond) == EQ_EXPR
310 || gimple_cond_code (cond) == NE_EXPR)
311 found_useful_phi = true;
312 break;
313 }
314 }
315 }
316 if (found_useful_phi)
317 break;
318 }
319 }
320 if (found_useful_phi)
321 break;
322 }
323 /* There is one exception namely a controlling condition we can propagate
324 an equivalence from to the joiner. */
325 bool found_cprop_opportunity = false;
326 basic_block dom = get_immediate_dominator (CDI_DOMINATORS, bb);
327 gcond *cond = as_a <gcond *> (last_stmt (dom));
328 if (gimple_cond_code (cond) == EQ_EXPR
329 || gimple_cond_code (cond) == NE_EXPR)
330 for (unsigned i = 0; i < 2; ++i)
331 {
332 tree op = gimple_op (cond, i);
333 if (TREE_CODE (op) == SSA_NAME)
334 {
335 use_operand_p use_p;
336 imm_use_iterator iter;
337 FOR_EACH_IMM_USE_FAST (use_p, iter, op)
338 {
339 if (is_gimple_debug (USE_STMT (use_p)))
340 continue;
341 if (gimple_bb (USE_STMT (use_p)) == bb)
342 {
343 found_cprop_opportunity = true;
344 break;
345 }
346 }
347 }
348 if (found_cprop_opportunity)
349 break;
350 }
351
352 if (! found_useful_phi && ! found_cprop_opportunity)
353 {
354 if (dump_file && (dump_flags & TDF_DETAILS))
355 fprintf (dump_file,
356 "Block %d is a join that does not expose CSE/DCE/jump-thread "
357 "opportunities when duplicated.\n",
358 bb->index);
359 return false;
360 }
361
362 /* We may want something here which looks at dataflow and tries
363 to guess if duplication of BB is likely to result in simplification
364 of instructions in BB in either the original or the duplicate. */
365
366 /* Upper Hard limit on the number statements to copy. */
367 if (num_stmts_in_join
368 >= param_max_jump_thread_duplication_stmts)
369 return false;
370
371 return true;
372 }
373
374 /* If the immediate dominator of the latch of the loop is
375 block with conditional branch, then the loop latch is
376 duplicated to its predecessors path preserving the SSA
377 semantics.
378
379 CFG before transformation.
380
381 2
382 |
383 |
384 +---->3
385 | / \
386 | / \
387 | 4 5
388 | \ /
389 | \ /
390 | 6
391 | / \
392 | / \
393 | 8 7
394 | | |
395 ---+ E
396
397
398
399 Block 8 is the latch. We're going to make copies of block 6 (9 & 10)
400 and wire things up so they look like this:
401
402 2
403 |
404 |
405 +---->3
406 | / \
407 | / \
408 | 4 5
409 | | |
410 | | |
411 | 9 10
412 | |\ /|
413 | | \ / |
414 | | 7 |
415 | | | |
416 | | E |
417 | | |
418 | \ /
419 | \ /
420 +-----8
421
422
423 Blocks 9 and 10 will get merged into blocks 4 & 5 respectively which
424 enables CSE, DCE and other optimizations to occur on a larger block
425 of code. */
426
427 static bool
428 split_paths ()
429 {
430 bool changed = false;
431 loop_p loop;
432
433 loop_optimizer_init (LOOPS_NORMAL | LOOPS_HAVE_RECORDED_EXITS);
434 initialize_original_copy_tables ();
435 calculate_dominance_info (CDI_DOMINATORS);
436
437 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
438 {
439 /* Only split paths if we are optimizing this loop for speed. */
440 if (!optimize_loop_for_speed_p (loop))
441 continue;
442
443 /* See if there is a block that we can duplicate to split the
444 path to the loop latch. */
445 basic_block bb
446 = find_block_to_duplicate_for_splitting_paths (loop->latch);
447
448 /* BB is the merge point for an IF-THEN-ELSE we want to transform.
449
450 Essentially we want to create a duplicate of bb and redirect the
451 first predecessor of BB to the duplicate (leaving the second
452 predecessor as is. This will split the path leading to the latch
453 re-using BB to avoid useless copying. */
454 if (bb && is_feasible_trace (bb))
455 {
456 if (dump_file && (dump_flags & TDF_DETAILS))
457 fprintf (dump_file,
458 "Duplicating join block %d into predecessor paths\n",
459 bb->index);
460 basic_block pred0 = EDGE_PRED (bb, 0)->src;
461 if (EDGE_COUNT (pred0->succs) != 1)
462 pred0 = EDGE_PRED (bb, 1)->src;
463 transform_duplicate (pred0, bb);
464 changed = true;
465
466 /* If BB has an outgoing edge marked as IRREDUCIBLE, then
467 duplicating BB may result in an irreducible region turning
468 into a natural loop.
469
470 Long term we might want to hook this into the block
471 duplication code, but as we've seen with similar changes
472 for edge removal, that can be somewhat risky. */
473 if (EDGE_SUCC (bb, 0)->flags & EDGE_IRREDUCIBLE_LOOP
474 || EDGE_SUCC (bb, 1)->flags & EDGE_IRREDUCIBLE_LOOP)
475 {
476 if (dump_file && (dump_flags & TDF_DETAILS))
477 fprintf (dump_file,
478 "Join block %d has EDGE_IRREDUCIBLE_LOOP set. "
479 "Scheduling loop fixups.\n",
480 bb->index);
481 loops_state_set (LOOPS_NEED_FIXUP);
482 }
483 }
484 }
485
486 loop_optimizer_finalize ();
487 free_original_copy_tables ();
488 return changed;
489 }
490
491 /* Main entry point for splitting paths. Returns TODO_cleanup_cfg if any
492 paths where split, otherwise return zero. */
493
494 static unsigned int
495 execute_split_paths ()
496 {
497 /* If we don't have at least 2 real blocks and backedges in the
498 CFG, then there's no point in trying to perform path splitting. */
499 if (n_basic_blocks_for_fn (cfun) <= NUM_FIXED_BLOCKS + 1
500 || !mark_dfs_back_edges ())
501 return 0;
502
503 bool changed = split_paths();
504 if (changed)
505 free_dominance_info (CDI_DOMINATORS);
506
507 return changed ? TODO_cleanup_cfg : 0;
508 }
509
510 static bool
511 gate_split_paths ()
512 {
513 return flag_split_paths;
514 }
515
516 namespace {
517
518 const pass_data pass_data_split_paths =
519 {
520 GIMPLE_PASS, /* type */
521 "split-paths", /* name */
522 OPTGROUP_NONE, /* optinfo_flags */
523 TV_SPLIT_PATHS, /* tv_id */
524 PROP_ssa, /* properties_required */
525 0, /* properties_provided */
526 0, /* properties_destroyed */
527 0, /* todo_flags_start */
528 TODO_update_ssa, /* todo_flags_finish */
529 };
530
531 class pass_split_paths : public gimple_opt_pass
532 {
533 public:
534 pass_split_paths (gcc::context *ctxt)
535 : gimple_opt_pass (pass_data_split_paths, ctxt)
536 {}
537 /* opt_pass methods: */
538 opt_pass * clone () { return new pass_split_paths (m_ctxt); }
539 virtual bool gate (function *) { return gate_split_paths (); }
540 virtual unsigned int execute (function *) { return execute_split_paths (); }
541
542 }; // class pass_split_paths
543
544 } // anon namespace
545
546 gimple_opt_pass *
547 make_pass_split_paths (gcc::context *ctxt)
548 {
549 return new pass_split_paths (ctxt);
550 }