tree-flow.h (set_default_def): Rename to ...
[gcc.git] / gcc / ipa-split.c
1 /* Function splitting pass
2 Copyright (C) 2010, 2011, 2012
3 Free Software Foundation, Inc.
4 Contributed by Jan Hubicka <jh@suse.cz>
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 /* The purpose of this pass is to split function bodies to improve
23 inlining. I.e. for function of the form:
24
25 func (...)
26 {
27 if (cheap_test)
28 something_small
29 else
30 something_big
31 }
32
33 Produce:
34
35 func.part (...)
36 {
37 something_big
38 }
39
40 func (...)
41 {
42 if (cheap_test)
43 something_small
44 else
45 func.part (...);
46 }
47
48 When func becomes inlinable and when cheap_test is often true, inlining func,
49 but not fund.part leads to performance improvement similar as inlining
50 original func while the code size growth is smaller.
51
52 The pass is organized in three stages:
53 1) Collect local info about basic block into BB_INFO structure and
54 compute function body estimated size and time.
55 2) Via DFS walk find all possible basic blocks where we can split
56 and chose best one.
57 3) If split point is found, split at the specified BB by creating a clone
58 and updating function to call it.
59
60 The decisions what functions to split are in execute_split_functions
61 and consider_split.
62
63 There are several possible future improvements for this pass including:
64
65 1) Splitting to break up large functions
66 2) Splitting to reduce stack frame usage
67 3) Allow split part of function to use values computed in the header part.
68 The values needs to be passed to split function, perhaps via same
69 interface as for nested functions or as argument.
70 4) Support for simple rematerialization. I.e. when split part use
71 value computed in header from function parameter in very cheap way, we
72 can just recompute it.
73 5) Support splitting of nested functions.
74 6) Support non-SSA arguments.
75 7) There is nothing preventing us from producing multiple parts of single function
76 when needed or splitting also the parts. */
77
78 #include "config.h"
79 #include "system.h"
80 #include "coretypes.h"
81 #include "tree.h"
82 #include "target.h"
83 #include "cgraph.h"
84 #include "ipa-prop.h"
85 #include "tree-flow.h"
86 #include "tree-pass.h"
87 #include "flags.h"
88 #include "diagnostic.h"
89 #include "tree-dump.h"
90 #include "tree-inline.h"
91 #include "params.h"
92 #include "gimple-pretty-print.h"
93 #include "ipa-inline.h"
94
95 /* Per basic block info. */
96
97 typedef struct
98 {
99 unsigned int size;
100 unsigned int time;
101 } bb_info;
102 DEF_VEC_O(bb_info);
103 DEF_VEC_ALLOC_O(bb_info,heap);
104
105 static VEC(bb_info, heap) *bb_info_vec;
106
107 /* Description of split point. */
108
109 struct split_point
110 {
111 /* Size of the partitions. */
112 unsigned int header_time, header_size, split_time, split_size;
113
114 /* SSA names that need to be passed into spit function. */
115 bitmap ssa_names_to_pass;
116
117 /* Basic block where we split (that will become entry point of new function. */
118 basic_block entry_bb;
119
120 /* Basic blocks we are splitting away. */
121 bitmap split_bbs;
122
123 /* True when return value is computed on split part and thus it needs
124 to be returned. */
125 bool split_part_set_retval;
126 };
127
128 /* Best split point found. */
129
130 struct split_point best_split_point;
131
132 /* Set of basic blocks that are not allowed to dominate a split point. */
133
134 static bitmap forbidden_dominators;
135
136 static tree find_retval (basic_block return_bb);
137
138 /* Callback for walk_stmt_load_store_addr_ops. If T is non-SSA automatic
139 variable, check it if it is present in bitmap passed via DATA. */
140
141 static bool
142 test_nonssa_use (gimple stmt ATTRIBUTE_UNUSED, tree t, void *data)
143 {
144 t = get_base_address (t);
145
146 if (!t || is_gimple_reg (t))
147 return false;
148
149 if (TREE_CODE (t) == PARM_DECL
150 || (TREE_CODE (t) == VAR_DECL
151 && auto_var_in_fn_p (t, current_function_decl))
152 || TREE_CODE (t) == RESULT_DECL
153 || TREE_CODE (t) == LABEL_DECL)
154 return bitmap_bit_p ((bitmap)data, DECL_UID (t));
155
156 /* For DECL_BY_REFERENCE, the return value is actually a pointer. We want
157 to pretend that the value pointed to is actual result decl. */
158 if ((TREE_CODE (t) == MEM_REF || INDIRECT_REF_P (t))
159 && TREE_CODE (TREE_OPERAND (t, 0)) == SSA_NAME
160 && TREE_CODE (SSA_NAME_VAR (TREE_OPERAND (t, 0))) == RESULT_DECL
161 && DECL_BY_REFERENCE (DECL_RESULT (current_function_decl)))
162 return
163 bitmap_bit_p ((bitmap)data,
164 DECL_UID (DECL_RESULT (current_function_decl)));
165
166 return false;
167 }
168
169 /* Dump split point CURRENT. */
170
171 static void
172 dump_split_point (FILE * file, struct split_point *current)
173 {
174 fprintf (file,
175 "Split point at BB %i\n"
176 " header time: %i header size: %i\n"
177 " split time: %i split size: %i\n bbs: ",
178 current->entry_bb->index, current->header_time,
179 current->header_size, current->split_time, current->split_size);
180 dump_bitmap (file, current->split_bbs);
181 fprintf (file, " SSA names to pass: ");
182 dump_bitmap (file, current->ssa_names_to_pass);
183 }
184
185 /* Look for all BBs in header that might lead to the split part and verify
186 that they are not defining any non-SSA var used by the split part.
187 Parameters are the same as for consider_split. */
188
189 static bool
190 verify_non_ssa_vars (struct split_point *current, bitmap non_ssa_vars,
191 basic_block return_bb)
192 {
193 bitmap seen = BITMAP_ALLOC (NULL);
194 VEC (basic_block,heap) *worklist = NULL;
195 edge e;
196 edge_iterator ei;
197 bool ok = true;
198
199 FOR_EACH_EDGE (e, ei, current->entry_bb->preds)
200 if (e->src != ENTRY_BLOCK_PTR
201 && !bitmap_bit_p (current->split_bbs, e->src->index))
202 {
203 VEC_safe_push (basic_block, heap, worklist, e->src);
204 bitmap_set_bit (seen, e->src->index);
205 }
206
207 while (!VEC_empty (basic_block, worklist))
208 {
209 gimple_stmt_iterator bsi;
210 basic_block bb = VEC_pop (basic_block, worklist);
211
212 FOR_EACH_EDGE (e, ei, bb->preds)
213 if (e->src != ENTRY_BLOCK_PTR
214 && bitmap_set_bit (seen, e->src->index))
215 {
216 gcc_checking_assert (!bitmap_bit_p (current->split_bbs,
217 e->src->index));
218 VEC_safe_push (basic_block, heap, worklist, e->src);
219 }
220 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
221 {
222 gimple stmt = gsi_stmt (bsi);
223 if (is_gimple_debug (stmt))
224 continue;
225 if (walk_stmt_load_store_addr_ops
226 (stmt, non_ssa_vars, test_nonssa_use, test_nonssa_use,
227 test_nonssa_use))
228 {
229 ok = false;
230 goto done;
231 }
232 if (gimple_code (stmt) == GIMPLE_LABEL
233 && test_nonssa_use (stmt, gimple_label_label (stmt),
234 non_ssa_vars))
235 {
236 ok = false;
237 goto done;
238 }
239 }
240 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
241 {
242 if (walk_stmt_load_store_addr_ops
243 (gsi_stmt (bsi), non_ssa_vars, test_nonssa_use, test_nonssa_use,
244 test_nonssa_use))
245 {
246 ok = false;
247 goto done;
248 }
249 }
250 FOR_EACH_EDGE (e, ei, bb->succs)
251 {
252 if (e->dest != return_bb)
253 continue;
254 for (bsi = gsi_start_phis (return_bb); !gsi_end_p (bsi);
255 gsi_next (&bsi))
256 {
257 gimple stmt = gsi_stmt (bsi);
258 tree op = gimple_phi_arg_def (stmt, e->dest_idx);
259
260 if (!is_gimple_reg (gimple_phi_result (stmt)))
261 continue;
262 if (TREE_CODE (op) != SSA_NAME
263 && test_nonssa_use (stmt, op, non_ssa_vars))
264 {
265 ok = false;
266 goto done;
267 }
268 }
269 }
270 }
271 done:
272 BITMAP_FREE (seen);
273 VEC_free (basic_block, heap, worklist);
274 return ok;
275 }
276
277 /* If STMT is a call, check the callee against a list of forbidden
278 predicate functions. If a match is found, look for uses of the
279 call result in condition statements that compare against zero.
280 For each such use, find the block targeted by the condition
281 statement for the nonzero result, and set the bit for this block
282 in the forbidden dominators bitmap. The purpose of this is to avoid
283 selecting a split point where we are likely to lose the chance
284 to optimize away an unused function call. */
285
286 static void
287 check_forbidden_calls (gimple stmt)
288 {
289 imm_use_iterator use_iter;
290 use_operand_p use_p;
291 tree lhs;
292
293 /* At the moment, __builtin_constant_p is the only forbidden
294 predicate function call (see PR49642). */
295 if (!gimple_call_builtin_p (stmt, BUILT_IN_CONSTANT_P))
296 return;
297
298 lhs = gimple_call_lhs (stmt);
299
300 if (!lhs || TREE_CODE (lhs) != SSA_NAME)
301 return;
302
303 FOR_EACH_IMM_USE_FAST (use_p, use_iter, lhs)
304 {
305 tree op1;
306 basic_block use_bb, forbidden_bb;
307 enum tree_code code;
308 edge true_edge, false_edge;
309 gimple use_stmt = USE_STMT (use_p);
310
311 if (gimple_code (use_stmt) != GIMPLE_COND)
312 continue;
313
314 /* Assuming canonical form for GIMPLE_COND here, with constant
315 in second position. */
316 op1 = gimple_cond_rhs (use_stmt);
317 code = gimple_cond_code (use_stmt);
318 use_bb = gimple_bb (use_stmt);
319
320 extract_true_false_edges_from_block (use_bb, &true_edge, &false_edge);
321
322 /* We're only interested in comparisons that distinguish
323 unambiguously from zero. */
324 if (!integer_zerop (op1) || code == LE_EXPR || code == GE_EXPR)
325 continue;
326
327 if (code == EQ_EXPR)
328 forbidden_bb = false_edge->dest;
329 else
330 forbidden_bb = true_edge->dest;
331
332 bitmap_set_bit (forbidden_dominators, forbidden_bb->index);
333 }
334 }
335
336 /* If BB is dominated by any block in the forbidden dominators set,
337 return TRUE; else FALSE. */
338
339 static bool
340 dominated_by_forbidden (basic_block bb)
341 {
342 unsigned dom_bb;
343 bitmap_iterator bi;
344
345 EXECUTE_IF_SET_IN_BITMAP (forbidden_dominators, 1, dom_bb, bi)
346 {
347 if (dominated_by_p (CDI_DOMINATORS, bb, BASIC_BLOCK (dom_bb)))
348 return true;
349 }
350
351 return false;
352 }
353
354 /* We found an split_point CURRENT. NON_SSA_VARS is bitmap of all non ssa
355 variables used and RETURN_BB is return basic block.
356 See if we can split function here. */
357
358 static void
359 consider_split (struct split_point *current, bitmap non_ssa_vars,
360 basic_block return_bb)
361 {
362 tree parm;
363 unsigned int num_args = 0;
364 unsigned int call_overhead;
365 edge e;
366 edge_iterator ei;
367 gimple_stmt_iterator bsi;
368 unsigned int i;
369 int incoming_freq = 0;
370 tree retval;
371
372 if (dump_file && (dump_flags & TDF_DETAILS))
373 dump_split_point (dump_file, current);
374
375 FOR_EACH_EDGE (e, ei, current->entry_bb->preds)
376 if (!bitmap_bit_p (current->split_bbs, e->src->index))
377 incoming_freq += EDGE_FREQUENCY (e);
378
379 /* Do not split when we would end up calling function anyway. */
380 if (incoming_freq
381 >= (ENTRY_BLOCK_PTR->frequency
382 * PARAM_VALUE (PARAM_PARTIAL_INLINING_ENTRY_PROBABILITY) / 100))
383 {
384 if (dump_file && (dump_flags & TDF_DETAILS))
385 fprintf (dump_file,
386 " Refused: incoming frequency is too large.\n");
387 return;
388 }
389
390 if (!current->header_size)
391 {
392 if (dump_file && (dump_flags & TDF_DETAILS))
393 fprintf (dump_file, " Refused: header empty\n");
394 return;
395 }
396
397 /* Verify that PHI args on entry are either virtual or all their operands
398 incoming from header are the same. */
399 for (bsi = gsi_start_phis (current->entry_bb); !gsi_end_p (bsi); gsi_next (&bsi))
400 {
401 gimple stmt = gsi_stmt (bsi);
402 tree val = NULL;
403
404 if (!is_gimple_reg (gimple_phi_result (stmt)))
405 continue;
406 for (i = 0; i < gimple_phi_num_args (stmt); i++)
407 {
408 edge e = gimple_phi_arg_edge (stmt, i);
409 if (!bitmap_bit_p (current->split_bbs, e->src->index))
410 {
411 tree edge_val = gimple_phi_arg_def (stmt, i);
412 if (val && edge_val != val)
413 {
414 if (dump_file && (dump_flags & TDF_DETAILS))
415 fprintf (dump_file,
416 " Refused: entry BB has PHI with multiple variants\n");
417 return;
418 }
419 val = edge_val;
420 }
421 }
422 }
423
424
425 /* See what argument we will pass to the split function and compute
426 call overhead. */
427 call_overhead = eni_size_weights.call_cost;
428 for (parm = DECL_ARGUMENTS (current_function_decl); parm;
429 parm = DECL_CHAIN (parm))
430 {
431 if (!is_gimple_reg (parm))
432 {
433 if (bitmap_bit_p (non_ssa_vars, DECL_UID (parm)))
434 {
435 if (dump_file && (dump_flags & TDF_DETAILS))
436 fprintf (dump_file,
437 " Refused: need to pass non-ssa param values\n");
438 return;
439 }
440 }
441 else
442 {
443 tree ddef = ssa_default_def (cfun, parm);
444 if (ddef
445 && bitmap_bit_p (current->ssa_names_to_pass,
446 SSA_NAME_VERSION (ddef)))
447 {
448 if (!VOID_TYPE_P (TREE_TYPE (parm)))
449 call_overhead += estimate_move_cost (TREE_TYPE (parm));
450 num_args++;
451 }
452 }
453 }
454 if (!VOID_TYPE_P (TREE_TYPE (current_function_decl)))
455 call_overhead += estimate_move_cost (TREE_TYPE (current_function_decl));
456
457 if (current->split_size <= call_overhead)
458 {
459 if (dump_file && (dump_flags & TDF_DETAILS))
460 fprintf (dump_file,
461 " Refused: split size is smaller than call overhead\n");
462 return;
463 }
464 if (current->header_size + call_overhead
465 >= (unsigned int)(DECL_DECLARED_INLINE_P (current_function_decl)
466 ? MAX_INLINE_INSNS_SINGLE
467 : MAX_INLINE_INSNS_AUTO))
468 {
469 if (dump_file && (dump_flags & TDF_DETAILS))
470 fprintf (dump_file,
471 " Refused: header size is too large for inline candidate\n");
472 return;
473 }
474
475 /* FIXME: we currently can pass only SSA function parameters to the split
476 arguments. Once parm_adjustment infrastructure is supported by cloning,
477 we can pass more than that. */
478 if (num_args != bitmap_count_bits (current->ssa_names_to_pass))
479 {
480
481 if (dump_file && (dump_flags & TDF_DETAILS))
482 fprintf (dump_file,
483 " Refused: need to pass non-param values\n");
484 return;
485 }
486
487 /* When there are non-ssa vars used in the split region, see if they
488 are used in the header region. If so, reject the split.
489 FIXME: we can use nested function support to access both. */
490 if (!bitmap_empty_p (non_ssa_vars)
491 && !verify_non_ssa_vars (current, non_ssa_vars, return_bb))
492 {
493 if (dump_file && (dump_flags & TDF_DETAILS))
494 fprintf (dump_file,
495 " Refused: split part has non-ssa uses\n");
496 return;
497 }
498
499 /* If the split point is dominated by a forbidden block, reject
500 the split. */
501 if (!bitmap_empty_p (forbidden_dominators)
502 && dominated_by_forbidden (current->entry_bb))
503 {
504 if (dump_file && (dump_flags & TDF_DETAILS))
505 fprintf (dump_file,
506 " Refused: split point dominated by forbidden block\n");
507 return;
508 }
509
510 /* See if retval used by return bb is computed by header or split part.
511 When it is computed by split part, we need to produce return statement
512 in the split part and add code to header to pass it around.
513
514 This is bit tricky to test:
515 1) When there is no return_bb or no return value, we always pass
516 value around.
517 2) Invariants are always computed by caller.
518 3) For SSA we need to look if defining statement is in header or split part
519 4) For non-SSA we need to look where the var is computed. */
520 retval = find_retval (return_bb);
521 if (!retval)
522 current->split_part_set_retval = true;
523 else if (is_gimple_min_invariant (retval))
524 current->split_part_set_retval = false;
525 /* Special case is value returned by reference we record as if it was non-ssa
526 set to result_decl. */
527 else if (TREE_CODE (retval) == SSA_NAME
528 && TREE_CODE (SSA_NAME_VAR (retval)) == RESULT_DECL
529 && DECL_BY_REFERENCE (DECL_RESULT (current_function_decl)))
530 current->split_part_set_retval
531 = bitmap_bit_p (non_ssa_vars, DECL_UID (SSA_NAME_VAR (retval)));
532 else if (TREE_CODE (retval) == SSA_NAME)
533 current->split_part_set_retval
534 = (!SSA_NAME_IS_DEFAULT_DEF (retval)
535 && (bitmap_bit_p (current->split_bbs,
536 gimple_bb (SSA_NAME_DEF_STMT (retval))->index)
537 || gimple_bb (SSA_NAME_DEF_STMT (retval)) == return_bb));
538 else if (TREE_CODE (retval) == PARM_DECL)
539 current->split_part_set_retval = false;
540 else if (TREE_CODE (retval) == VAR_DECL
541 || TREE_CODE (retval) == RESULT_DECL)
542 current->split_part_set_retval
543 = bitmap_bit_p (non_ssa_vars, DECL_UID (retval));
544 else
545 current->split_part_set_retval = true;
546
547 /* split_function fixes up at most one PHI non-virtual PHI node in return_bb,
548 for the return value. If there are other PHIs, give up. */
549 if (return_bb != EXIT_BLOCK_PTR)
550 {
551 gimple_stmt_iterator psi;
552
553 for (psi = gsi_start_phis (return_bb); !gsi_end_p (psi); gsi_next (&psi))
554 if (is_gimple_reg (gimple_phi_result (gsi_stmt (psi)))
555 && !(retval
556 && current->split_part_set_retval
557 && TREE_CODE (retval) == SSA_NAME
558 && !DECL_BY_REFERENCE (DECL_RESULT (current_function_decl))
559 && SSA_NAME_DEF_STMT (retval) == gsi_stmt (psi)))
560 {
561 if (dump_file && (dump_flags & TDF_DETAILS))
562 fprintf (dump_file,
563 " Refused: return bb has extra PHIs\n");
564 return;
565 }
566 }
567
568 if (dump_file && (dump_flags & TDF_DETAILS))
569 fprintf (dump_file, " Accepted!\n");
570
571 /* At the moment chose split point with lowest frequency and that leaves
572 out smallest size of header.
573 In future we might re-consider this heuristics. */
574 if (!best_split_point.split_bbs
575 || best_split_point.entry_bb->frequency > current->entry_bb->frequency
576 || (best_split_point.entry_bb->frequency == current->entry_bb->frequency
577 && best_split_point.split_size < current->split_size))
578
579 {
580 if (dump_file && (dump_flags & TDF_DETAILS))
581 fprintf (dump_file, " New best split point!\n");
582 if (best_split_point.ssa_names_to_pass)
583 {
584 BITMAP_FREE (best_split_point.ssa_names_to_pass);
585 BITMAP_FREE (best_split_point.split_bbs);
586 }
587 best_split_point = *current;
588 best_split_point.ssa_names_to_pass = BITMAP_ALLOC (NULL);
589 bitmap_copy (best_split_point.ssa_names_to_pass,
590 current->ssa_names_to_pass);
591 best_split_point.split_bbs = BITMAP_ALLOC (NULL);
592 bitmap_copy (best_split_point.split_bbs, current->split_bbs);
593 }
594 }
595
596 /* Return basic block containing RETURN statement. We allow basic blocks
597 of the form:
598 <retval> = tmp_var;
599 return <retval>
600 but return_bb can not be more complex than this.
601 If nothing is found, return EXIT_BLOCK_PTR.
602
603 When there are multiple RETURN statement, chose one with return value,
604 since that one is more likely shared by multiple code paths.
605
606 Return BB is special, because for function splitting it is the only
607 basic block that is duplicated in between header and split part of the
608 function.
609
610 TODO: We might support multiple return blocks. */
611
612 static basic_block
613 find_return_bb (void)
614 {
615 edge e;
616 basic_block return_bb = EXIT_BLOCK_PTR;
617 gimple_stmt_iterator bsi;
618 bool found_return = false;
619 tree retval = NULL_TREE;
620
621 if (!single_pred_p (EXIT_BLOCK_PTR))
622 return return_bb;
623
624 e = single_pred_edge (EXIT_BLOCK_PTR);
625 for (bsi = gsi_last_bb (e->src); !gsi_end_p (bsi); gsi_prev (&bsi))
626 {
627 gimple stmt = gsi_stmt (bsi);
628 if (gimple_code (stmt) == GIMPLE_LABEL
629 || is_gimple_debug (stmt)
630 || gimple_clobber_p (stmt))
631 ;
632 else if (gimple_code (stmt) == GIMPLE_ASSIGN
633 && found_return
634 && gimple_assign_single_p (stmt)
635 && (auto_var_in_fn_p (gimple_assign_rhs1 (stmt),
636 current_function_decl)
637 || is_gimple_min_invariant (gimple_assign_rhs1 (stmt)))
638 && retval == gimple_assign_lhs (stmt))
639 ;
640 else if (gimple_code (stmt) == GIMPLE_RETURN)
641 {
642 found_return = true;
643 retval = gimple_return_retval (stmt);
644 }
645 else
646 break;
647 }
648 if (gsi_end_p (bsi) && found_return)
649 return_bb = e->src;
650
651 return return_bb;
652 }
653
654 /* Given return basic block RETURN_BB, see where return value is really
655 stored. */
656 static tree
657 find_retval (basic_block return_bb)
658 {
659 gimple_stmt_iterator bsi;
660 for (bsi = gsi_start_bb (return_bb); !gsi_end_p (bsi); gsi_next (&bsi))
661 if (gimple_code (gsi_stmt (bsi)) == GIMPLE_RETURN)
662 return gimple_return_retval (gsi_stmt (bsi));
663 else if (gimple_code (gsi_stmt (bsi)) == GIMPLE_ASSIGN
664 && !gimple_clobber_p (gsi_stmt (bsi)))
665 return gimple_assign_rhs1 (gsi_stmt (bsi));
666 return NULL;
667 }
668
669 /* Callback for walk_stmt_load_store_addr_ops. If T is non-SSA automatic
670 variable, mark it as used in bitmap passed via DATA.
671 Return true when access to T prevents splitting the function. */
672
673 static bool
674 mark_nonssa_use (gimple stmt ATTRIBUTE_UNUSED, tree t, void *data)
675 {
676 t = get_base_address (t);
677
678 if (!t || is_gimple_reg (t))
679 return false;
680
681 /* At present we can't pass non-SSA arguments to split function.
682 FIXME: this can be relaxed by passing references to arguments. */
683 if (TREE_CODE (t) == PARM_DECL)
684 {
685 if (dump_file && (dump_flags & TDF_DETAILS))
686 fprintf (dump_file,
687 "Cannot split: use of non-ssa function parameter.\n");
688 return true;
689 }
690
691 if ((TREE_CODE (t) == VAR_DECL
692 && auto_var_in_fn_p (t, current_function_decl))
693 || TREE_CODE (t) == RESULT_DECL
694 || TREE_CODE (t) == LABEL_DECL)
695 bitmap_set_bit ((bitmap)data, DECL_UID (t));
696
697 /* For DECL_BY_REFERENCE, the return value is actually a pointer. We want
698 to pretend that the value pointed to is actual result decl. */
699 if ((TREE_CODE (t) == MEM_REF || INDIRECT_REF_P (t))
700 && TREE_CODE (TREE_OPERAND (t, 0)) == SSA_NAME
701 && TREE_CODE (SSA_NAME_VAR (TREE_OPERAND (t, 0))) == RESULT_DECL
702 && DECL_BY_REFERENCE (DECL_RESULT (current_function_decl)))
703 return
704 bitmap_bit_p ((bitmap)data,
705 DECL_UID (DECL_RESULT (current_function_decl)));
706
707 return false;
708 }
709
710 /* Compute local properties of basic block BB we collect when looking for
711 split points. We look for ssa defs and store them in SET_SSA_NAMES,
712 for ssa uses and store them in USED_SSA_NAMES and for any non-SSA automatic
713 vars stored in NON_SSA_VARS.
714
715 When BB has edge to RETURN_BB, collect uses in RETURN_BB too.
716
717 Return false when BB contains something that prevents it from being put into
718 split function. */
719
720 static bool
721 visit_bb (basic_block bb, basic_block return_bb,
722 bitmap set_ssa_names, bitmap used_ssa_names,
723 bitmap non_ssa_vars)
724 {
725 gimple_stmt_iterator bsi;
726 edge e;
727 edge_iterator ei;
728 bool can_split = true;
729
730 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
731 {
732 gimple stmt = gsi_stmt (bsi);
733 tree op;
734 ssa_op_iter iter;
735 tree decl;
736
737 if (is_gimple_debug (stmt))
738 continue;
739
740 if (gimple_clobber_p (stmt))
741 continue;
742
743 /* FIXME: We can split regions containing EH. We can not however
744 split RESX, EH_DISPATCH and EH_POINTER referring to same region
745 into different partitions. This would require tracking of
746 EH regions and checking in consider_split_point if they
747 are not used elsewhere. */
748 if (gimple_code (stmt) == GIMPLE_RESX)
749 {
750 if (dump_file && (dump_flags & TDF_DETAILS))
751 fprintf (dump_file, "Cannot split: resx.\n");
752 can_split = false;
753 }
754 if (gimple_code (stmt) == GIMPLE_EH_DISPATCH)
755 {
756 if (dump_file && (dump_flags & TDF_DETAILS))
757 fprintf (dump_file, "Cannot split: eh dispatch.\n");
758 can_split = false;
759 }
760
761 /* Check builtins that prevent splitting. */
762 if (gimple_code (stmt) == GIMPLE_CALL
763 && (decl = gimple_call_fndecl (stmt)) != NULL_TREE
764 && DECL_BUILT_IN (decl)
765 && DECL_BUILT_IN_CLASS (decl) == BUILT_IN_NORMAL)
766 switch (DECL_FUNCTION_CODE (decl))
767 {
768 /* FIXME: once we will allow passing non-parm values to split part,
769 we need to be sure to handle correct builtin_stack_save and
770 builtin_stack_restore. At the moment we are safe; there is no
771 way to store builtin_stack_save result in non-SSA variable
772 since all calls to those are compiler generated. */
773 case BUILT_IN_APPLY:
774 case BUILT_IN_APPLY_ARGS:
775 case BUILT_IN_VA_START:
776 if (dump_file && (dump_flags & TDF_DETAILS))
777 fprintf (dump_file,
778 "Cannot split: builtin_apply and va_start.\n");
779 can_split = false;
780 break;
781 case BUILT_IN_EH_POINTER:
782 if (dump_file && (dump_flags & TDF_DETAILS))
783 fprintf (dump_file, "Cannot split: builtin_eh_pointer.\n");
784 can_split = false;
785 break;
786 default:
787 break;
788 }
789
790 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_DEF)
791 bitmap_set_bit (set_ssa_names, SSA_NAME_VERSION (op));
792 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
793 bitmap_set_bit (used_ssa_names, SSA_NAME_VERSION (op));
794 can_split &= !walk_stmt_load_store_addr_ops (stmt, non_ssa_vars,
795 mark_nonssa_use,
796 mark_nonssa_use,
797 mark_nonssa_use);
798 }
799 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
800 {
801 gimple stmt = gsi_stmt (bsi);
802 unsigned int i;
803
804 if (is_gimple_debug (stmt))
805 continue;
806 if (!is_gimple_reg (gimple_phi_result (stmt)))
807 continue;
808 bitmap_set_bit (set_ssa_names,
809 SSA_NAME_VERSION (gimple_phi_result (stmt)));
810 for (i = 0; i < gimple_phi_num_args (stmt); i++)
811 {
812 tree op = gimple_phi_arg_def (stmt, i);
813 if (TREE_CODE (op) == SSA_NAME)
814 bitmap_set_bit (used_ssa_names, SSA_NAME_VERSION (op));
815 }
816 can_split &= !walk_stmt_load_store_addr_ops (stmt, non_ssa_vars,
817 mark_nonssa_use,
818 mark_nonssa_use,
819 mark_nonssa_use);
820 }
821 /* Record also uses coming from PHI operand in return BB. */
822 FOR_EACH_EDGE (e, ei, bb->succs)
823 if (e->dest == return_bb)
824 {
825 for (bsi = gsi_start_phis (return_bb); !gsi_end_p (bsi); gsi_next (&bsi))
826 {
827 gimple stmt = gsi_stmt (bsi);
828 tree op = gimple_phi_arg_def (stmt, e->dest_idx);
829
830 if (is_gimple_debug (stmt))
831 continue;
832 if (!is_gimple_reg (gimple_phi_result (stmt)))
833 continue;
834 if (TREE_CODE (op) == SSA_NAME)
835 bitmap_set_bit (used_ssa_names, SSA_NAME_VERSION (op));
836 else
837 can_split &= !mark_nonssa_use (stmt, op, non_ssa_vars);
838 }
839 }
840 return can_split;
841 }
842
843 /* Stack entry for recursive DFS walk in find_split_point. */
844
845 typedef struct
846 {
847 /* Basic block we are examining. */
848 basic_block bb;
849
850 /* SSA names set and used by the BB and all BBs reachable
851 from it via DFS walk. */
852 bitmap set_ssa_names, used_ssa_names;
853 bitmap non_ssa_vars;
854
855 /* All BBS visited from this BB via DFS walk. */
856 bitmap bbs_visited;
857
858 /* Last examined edge in DFS walk. Since we walk unoriented graph,
859 the value is up to sum of incoming and outgoing edges of BB. */
860 unsigned int edge_num;
861
862 /* Stack entry index of earliest BB reachable from current BB
863 or any BB visited later in DFS walk. */
864 int earliest;
865
866 /* Overall time and size of all BBs reached from this BB in DFS walk. */
867 int overall_time, overall_size;
868
869 /* When false we can not split on this BB. */
870 bool can_split;
871 } stack_entry;
872 DEF_VEC_O(stack_entry);
873 DEF_VEC_ALLOC_O(stack_entry,heap);
874
875
876 /* Find all articulations and call consider_split on them.
877 OVERALL_TIME and OVERALL_SIZE is time and size of the function.
878
879 We perform basic algorithm for finding an articulation in a graph
880 created from CFG by considering it to be an unoriented graph.
881
882 The articulation is discovered via DFS walk. We collect earliest
883 basic block on stack that is reachable via backward edge. Articulation
884 is any basic block such that there is no backward edge bypassing it.
885 To reduce stack usage we maintain heap allocated stack in STACK vector.
886 AUX pointer of BB is set to index it appears in the stack or -1 once
887 it is visited and popped off the stack.
888
889 The algorithm finds articulation after visiting the whole component
890 reachable by it. This makes it convenient to collect information about
891 the component used by consider_split. */
892
893 static void
894 find_split_points (int overall_time, int overall_size)
895 {
896 stack_entry first;
897 VEC(stack_entry, heap) *stack = NULL;
898 basic_block bb;
899 basic_block return_bb = find_return_bb ();
900 struct split_point current;
901
902 current.header_time = overall_time;
903 current.header_size = overall_size;
904 current.split_time = 0;
905 current.split_size = 0;
906 current.ssa_names_to_pass = BITMAP_ALLOC (NULL);
907
908 first.bb = ENTRY_BLOCK_PTR;
909 first.edge_num = 0;
910 first.overall_time = 0;
911 first.overall_size = 0;
912 first.earliest = INT_MAX;
913 first.set_ssa_names = 0;
914 first.used_ssa_names = 0;
915 first.bbs_visited = 0;
916 VEC_safe_push (stack_entry, heap, stack, &first);
917 ENTRY_BLOCK_PTR->aux = (void *)(intptr_t)-1;
918
919 while (!VEC_empty (stack_entry, stack))
920 {
921 stack_entry *entry = VEC_last (stack_entry, stack);
922
923 /* We are walking an acyclic graph, so edge_num counts
924 succ and pred edges together. However when considering
925 articulation, we want to have processed everything reachable
926 from articulation but nothing that reaches into it. */
927 if (entry->edge_num == EDGE_COUNT (entry->bb->succs)
928 && entry->bb != ENTRY_BLOCK_PTR)
929 {
930 int pos = VEC_length (stack_entry, stack);
931 entry->can_split &= visit_bb (entry->bb, return_bb,
932 entry->set_ssa_names,
933 entry->used_ssa_names,
934 entry->non_ssa_vars);
935 if (pos <= entry->earliest && !entry->can_split
936 && dump_file && (dump_flags & TDF_DETAILS))
937 fprintf (dump_file,
938 "found articulation at bb %i but can not split\n",
939 entry->bb->index);
940 if (pos <= entry->earliest && entry->can_split)
941 {
942 if (dump_file && (dump_flags & TDF_DETAILS))
943 fprintf (dump_file, "found articulation at bb %i\n",
944 entry->bb->index);
945 current.entry_bb = entry->bb;
946 current.ssa_names_to_pass = BITMAP_ALLOC (NULL);
947 bitmap_and_compl (current.ssa_names_to_pass,
948 entry->used_ssa_names, entry->set_ssa_names);
949 current.header_time = overall_time - entry->overall_time;
950 current.header_size = overall_size - entry->overall_size;
951 current.split_time = entry->overall_time;
952 current.split_size = entry->overall_size;
953 current.split_bbs = entry->bbs_visited;
954 consider_split (&current, entry->non_ssa_vars, return_bb);
955 BITMAP_FREE (current.ssa_names_to_pass);
956 }
957 }
958 /* Do actual DFS walk. */
959 if (entry->edge_num
960 < (EDGE_COUNT (entry->bb->succs)
961 + EDGE_COUNT (entry->bb->preds)))
962 {
963 edge e;
964 basic_block dest;
965 if (entry->edge_num < EDGE_COUNT (entry->bb->succs))
966 {
967 e = EDGE_SUCC (entry->bb, entry->edge_num);
968 dest = e->dest;
969 }
970 else
971 {
972 e = EDGE_PRED (entry->bb, entry->edge_num
973 - EDGE_COUNT (entry->bb->succs));
974 dest = e->src;
975 }
976
977 entry->edge_num++;
978
979 /* New BB to visit, push it to the stack. */
980 if (dest != return_bb && dest != EXIT_BLOCK_PTR
981 && !dest->aux)
982 {
983 stack_entry new_entry;
984
985 new_entry.bb = dest;
986 new_entry.edge_num = 0;
987 new_entry.overall_time
988 = VEC_index (bb_info, bb_info_vec, dest->index)->time;
989 new_entry.overall_size
990 = VEC_index (bb_info, bb_info_vec, dest->index)->size;
991 new_entry.earliest = INT_MAX;
992 new_entry.set_ssa_names = BITMAP_ALLOC (NULL);
993 new_entry.used_ssa_names = BITMAP_ALLOC (NULL);
994 new_entry.bbs_visited = BITMAP_ALLOC (NULL);
995 new_entry.non_ssa_vars = BITMAP_ALLOC (NULL);
996 new_entry.can_split = true;
997 bitmap_set_bit (new_entry.bbs_visited, dest->index);
998 VEC_safe_push (stack_entry, heap, stack, &new_entry);
999 dest->aux = (void *)(intptr_t)VEC_length (stack_entry, stack);
1000 }
1001 /* Back edge found, record the earliest point. */
1002 else if ((intptr_t)dest->aux > 0
1003 && (intptr_t)dest->aux < entry->earliest)
1004 entry->earliest = (intptr_t)dest->aux;
1005 }
1006 /* We are done with examining the edges. Pop off the value from stack
1007 and merge stuff we accumulate during the walk. */
1008 else if (entry->bb != ENTRY_BLOCK_PTR)
1009 {
1010 stack_entry *prev = VEC_index (stack_entry, stack,
1011 VEC_length (stack_entry, stack) - 2);
1012
1013 entry->bb->aux = (void *)(intptr_t)-1;
1014 prev->can_split &= entry->can_split;
1015 if (prev->set_ssa_names)
1016 {
1017 bitmap_ior_into (prev->set_ssa_names, entry->set_ssa_names);
1018 bitmap_ior_into (prev->used_ssa_names, entry->used_ssa_names);
1019 bitmap_ior_into (prev->bbs_visited, entry->bbs_visited);
1020 bitmap_ior_into (prev->non_ssa_vars, entry->non_ssa_vars);
1021 }
1022 if (prev->earliest > entry->earliest)
1023 prev->earliest = entry->earliest;
1024 prev->overall_time += entry->overall_time;
1025 prev->overall_size += entry->overall_size;
1026 BITMAP_FREE (entry->set_ssa_names);
1027 BITMAP_FREE (entry->used_ssa_names);
1028 BITMAP_FREE (entry->bbs_visited);
1029 BITMAP_FREE (entry->non_ssa_vars);
1030 VEC_pop (stack_entry, stack);
1031 }
1032 else
1033 VEC_pop (stack_entry, stack);
1034 }
1035 ENTRY_BLOCK_PTR->aux = NULL;
1036 FOR_EACH_BB (bb)
1037 bb->aux = NULL;
1038 VEC_free (stack_entry, heap, stack);
1039 BITMAP_FREE (current.ssa_names_to_pass);
1040 }
1041
1042 /* Split function at SPLIT_POINT. */
1043
1044 static void
1045 split_function (struct split_point *split_point)
1046 {
1047 VEC (tree, heap) *args_to_pass = NULL;
1048 bitmap args_to_skip;
1049 tree parm;
1050 int num = 0;
1051 struct cgraph_node *node, *cur_node = cgraph_get_node (current_function_decl);
1052 basic_block return_bb = find_return_bb ();
1053 basic_block call_bb;
1054 gimple_stmt_iterator gsi;
1055 gimple call;
1056 edge e;
1057 edge_iterator ei;
1058 tree retval = NULL, real_retval = NULL;
1059 bool split_part_return_p = false;
1060 gimple last_stmt = NULL;
1061 unsigned int i;
1062 tree arg, ddef;
1063
1064 if (dump_file)
1065 {
1066 fprintf (dump_file, "\n\nSplitting function at:\n");
1067 dump_split_point (dump_file, split_point);
1068 }
1069
1070 if (cur_node->local.can_change_signature)
1071 args_to_skip = BITMAP_ALLOC (NULL);
1072 else
1073 args_to_skip = NULL;
1074
1075 /* Collect the parameters of new function and args_to_skip bitmap. */
1076 for (parm = DECL_ARGUMENTS (current_function_decl);
1077 parm; parm = DECL_CHAIN (parm), num++)
1078 if (args_to_skip
1079 && (!is_gimple_reg (parm)
1080 || (ddef = ssa_default_def (cfun, parm)) == NULL_TREE
1081 || !bitmap_bit_p (split_point->ssa_names_to_pass,
1082 SSA_NAME_VERSION (ddef))))
1083 bitmap_set_bit (args_to_skip, num);
1084 else
1085 {
1086 /* This parm might not have been used up to now, but is going to be
1087 used, hence register it. */
1088 if (is_gimple_reg (parm))
1089 arg = get_or_create_ssa_default_def (cfun, parm);
1090 else
1091 arg = parm;
1092
1093 if (!useless_type_conversion_p (DECL_ARG_TYPE (parm), TREE_TYPE (arg)))
1094 arg = fold_convert (DECL_ARG_TYPE (parm), arg);
1095 VEC_safe_push (tree, heap, args_to_pass, arg);
1096 }
1097
1098 /* See if the split function will return. */
1099 FOR_EACH_EDGE (e, ei, return_bb->preds)
1100 if (bitmap_bit_p (split_point->split_bbs, e->src->index))
1101 break;
1102 if (e)
1103 split_part_return_p = true;
1104
1105 /* Add return block to what will become the split function.
1106 We do not return; no return block is needed. */
1107 if (!split_part_return_p)
1108 ;
1109 /* We have no return block, so nothing is needed. */
1110 else if (return_bb == EXIT_BLOCK_PTR)
1111 ;
1112 /* When we do not want to return value, we need to construct
1113 new return block with empty return statement.
1114 FIXME: Once we are able to change return type, we should change function
1115 to return void instead of just outputting function with undefined return
1116 value. For structures this affects quality of codegen. */
1117 else if (!split_point->split_part_set_retval
1118 && find_retval (return_bb))
1119 {
1120 bool redirected = true;
1121 basic_block new_return_bb = create_basic_block (NULL, 0, return_bb);
1122 gimple_stmt_iterator gsi = gsi_start_bb (new_return_bb);
1123 gsi_insert_after (&gsi, gimple_build_return (NULL), GSI_NEW_STMT);
1124 while (redirected)
1125 {
1126 redirected = false;
1127 FOR_EACH_EDGE (e, ei, return_bb->preds)
1128 if (bitmap_bit_p (split_point->split_bbs, e->src->index))
1129 {
1130 new_return_bb->count += e->count;
1131 new_return_bb->frequency += EDGE_FREQUENCY (e);
1132 redirect_edge_and_branch (e, new_return_bb);
1133 redirected = true;
1134 break;
1135 }
1136 }
1137 e = make_edge (new_return_bb, EXIT_BLOCK_PTR, 0);
1138 e->probability = REG_BR_PROB_BASE;
1139 e->count = new_return_bb->count;
1140 bitmap_set_bit (split_point->split_bbs, new_return_bb->index);
1141 }
1142 /* When we pass around the value, use existing return block. */
1143 else
1144 bitmap_set_bit (split_point->split_bbs, return_bb->index);
1145
1146 /* If RETURN_BB has virtual operand PHIs, they must be removed and the
1147 virtual operand marked for renaming as we change the CFG in a way that
1148 tree-inline is not able to compensate for.
1149
1150 Note this can happen whether or not we have a return value. If we have
1151 a return value, then RETURN_BB may have PHIs for real operands too. */
1152 if (return_bb != EXIT_BLOCK_PTR)
1153 {
1154 bool phi_p = false;
1155 for (gsi = gsi_start_phis (return_bb); !gsi_end_p (gsi);)
1156 {
1157 gimple stmt = gsi_stmt (gsi);
1158 if (is_gimple_reg (gimple_phi_result (stmt)))
1159 {
1160 gsi_next (&gsi);
1161 continue;
1162 }
1163 mark_virtual_phi_result_for_renaming (stmt);
1164 remove_phi_node (&gsi, true);
1165 phi_p = true;
1166 }
1167 /* In reality we have to rename the reaching definition of the
1168 virtual operand at return_bb as we will eventually release it
1169 when we remove the code region we outlined.
1170 So we have to rename all immediate virtual uses of that region
1171 if we didn't see a PHI definition yet. */
1172 /* ??? In real reality we want to set the reaching vdef of the
1173 entry of the SESE region as the vuse of the call and the reaching
1174 vdef of the exit of the SESE region as the vdef of the call. */
1175 if (!phi_p)
1176 for (gsi = gsi_start_bb (return_bb); !gsi_end_p (gsi); gsi_next (&gsi))
1177 {
1178 gimple stmt = gsi_stmt (gsi);
1179 if (gimple_vuse (stmt))
1180 {
1181 gimple_set_vuse (stmt, NULL_TREE);
1182 update_stmt (stmt);
1183 }
1184 if (gimple_vdef (stmt))
1185 break;
1186 }
1187 }
1188
1189 /* Now create the actual clone. */
1190 rebuild_cgraph_edges ();
1191 node = cgraph_function_versioning (cur_node, NULL, NULL, args_to_skip,
1192 !split_part_return_p,
1193 split_point->split_bbs,
1194 split_point->entry_bb, "part");
1195 /* For usual cloning it is enough to clear builtin only when signature
1196 changes. For partial inlining we however can not expect the part
1197 of builtin implementation to have same semantic as the whole. */
1198 if (DECL_BUILT_IN (node->symbol.decl))
1199 {
1200 DECL_BUILT_IN_CLASS (node->symbol.decl) = NOT_BUILT_IN;
1201 DECL_FUNCTION_CODE (node->symbol.decl) = (enum built_in_function) 0;
1202 }
1203 cgraph_node_remove_callees (cur_node);
1204 if (!split_part_return_p)
1205 TREE_THIS_VOLATILE (node->symbol.decl) = 1;
1206 if (dump_file)
1207 dump_function_to_file (node->symbol.decl, dump_file, dump_flags);
1208
1209 /* Create the basic block we place call into. It is the entry basic block
1210 split after last label. */
1211 call_bb = split_point->entry_bb;
1212 for (gsi = gsi_start_bb (call_bb); !gsi_end_p (gsi);)
1213 if (gimple_code (gsi_stmt (gsi)) == GIMPLE_LABEL)
1214 {
1215 last_stmt = gsi_stmt (gsi);
1216 gsi_next (&gsi);
1217 }
1218 else
1219 break;
1220 e = split_block (split_point->entry_bb, last_stmt);
1221 remove_edge (e);
1222
1223 /* Produce the call statement. */
1224 gsi = gsi_last_bb (call_bb);
1225 FOR_EACH_VEC_ELT (tree, args_to_pass, i, arg)
1226 if (!is_gimple_val (arg))
1227 {
1228 arg = force_gimple_operand_gsi (&gsi, arg, true, NULL_TREE,
1229 false, GSI_CONTINUE_LINKING);
1230 VEC_replace (tree, args_to_pass, i, arg);
1231 }
1232 call = gimple_build_call_vec (node->symbol.decl, args_to_pass);
1233 gimple_set_block (call, DECL_INITIAL (current_function_decl));
1234
1235 /* We avoid address being taken on any variable used by split part,
1236 so return slot optimization is always possible. Moreover this is
1237 required to make DECL_BY_REFERENCE work. */
1238 if (aggregate_value_p (DECL_RESULT (current_function_decl),
1239 TREE_TYPE (current_function_decl)))
1240 gimple_call_set_return_slot_opt (call, true);
1241
1242 /* Update return value. This is bit tricky. When we do not return,
1243 do nothing. When we return we might need to update return_bb
1244 or produce a new return statement. */
1245 if (!split_part_return_p)
1246 gsi_insert_after (&gsi, call, GSI_NEW_STMT);
1247 else
1248 {
1249 e = make_edge (call_bb, return_bb,
1250 return_bb == EXIT_BLOCK_PTR ? 0 : EDGE_FALLTHRU);
1251 e->count = call_bb->count;
1252 e->probability = REG_BR_PROB_BASE;
1253
1254 /* If there is return basic block, see what value we need to store
1255 return value into and put call just before it. */
1256 if (return_bb != EXIT_BLOCK_PTR)
1257 {
1258 real_retval = retval = find_retval (return_bb);
1259
1260 if (real_retval && split_point->split_part_set_retval)
1261 {
1262 gimple_stmt_iterator psi;
1263
1264 /* See if we need new SSA_NAME for the result.
1265 When DECL_BY_REFERENCE is true, retval is actually pointer to
1266 return value and it is constant in whole function. */
1267 if (TREE_CODE (retval) == SSA_NAME
1268 && !DECL_BY_REFERENCE (DECL_RESULT (current_function_decl)))
1269 {
1270 retval = make_ssa_name (SSA_NAME_VAR (retval), call);
1271
1272 /* See if there is PHI defining return value. */
1273 for (psi = gsi_start_phis (return_bb);
1274 !gsi_end_p (psi); gsi_next (&psi))
1275 if (is_gimple_reg (gimple_phi_result (gsi_stmt (psi))))
1276 break;
1277
1278 /* When there is PHI, just update its value. */
1279 if (TREE_CODE (retval) == SSA_NAME
1280 && !gsi_end_p (psi))
1281 add_phi_arg (gsi_stmt (psi), retval, e, UNKNOWN_LOCATION);
1282 /* Otherwise update the return BB itself.
1283 find_return_bb allows at most one assignment to return value,
1284 so update first statement. */
1285 else
1286 {
1287 gimple_stmt_iterator bsi;
1288 for (bsi = gsi_start_bb (return_bb); !gsi_end_p (bsi);
1289 gsi_next (&bsi))
1290 if (gimple_code (gsi_stmt (bsi)) == GIMPLE_RETURN)
1291 {
1292 gimple_return_set_retval (gsi_stmt (bsi), retval);
1293 break;
1294 }
1295 else if (gimple_code (gsi_stmt (bsi)) == GIMPLE_ASSIGN
1296 && !gimple_clobber_p (gsi_stmt (bsi)))
1297 {
1298 gimple_assign_set_rhs1 (gsi_stmt (bsi), retval);
1299 break;
1300 }
1301 update_stmt (gsi_stmt (bsi));
1302 }
1303 }
1304 if (DECL_BY_REFERENCE (DECL_RESULT (current_function_decl)))
1305 {
1306 gimple_call_set_lhs (call, build_simple_mem_ref (retval));
1307 gsi_insert_after (&gsi, call, GSI_NEW_STMT);
1308 }
1309 else
1310 {
1311 tree restype;
1312 restype = TREE_TYPE (DECL_RESULT (current_function_decl));
1313 gsi_insert_after (&gsi, call, GSI_NEW_STMT);
1314 if (!useless_type_conversion_p (TREE_TYPE (retval), restype))
1315 {
1316 gimple cpy;
1317 tree tem = create_tmp_reg (restype, NULL);
1318 tem = make_ssa_name (tem, call);
1319 cpy = gimple_build_assign_with_ops (NOP_EXPR, retval,
1320 tem, NULL_TREE);
1321 gsi_insert_after (&gsi, cpy, GSI_NEW_STMT);
1322 retval = tem;
1323 }
1324 gimple_call_set_lhs (call, retval);
1325 update_stmt (call);
1326 }
1327 }
1328 else
1329 gsi_insert_after (&gsi, call, GSI_NEW_STMT);
1330 }
1331 /* We don't use return block (there is either no return in function or
1332 multiple of them). So create new basic block with return statement.
1333 */
1334 else
1335 {
1336 gimple ret;
1337 if (split_point->split_part_set_retval
1338 && !VOID_TYPE_P (TREE_TYPE (TREE_TYPE (current_function_decl))))
1339 {
1340 retval = DECL_RESULT (current_function_decl);
1341
1342 /* We use temporary register to hold value when aggregate_value_p
1343 is false. Similarly for DECL_BY_REFERENCE we must avoid extra
1344 copy. */
1345 if (!aggregate_value_p (retval, TREE_TYPE (current_function_decl))
1346 && !DECL_BY_REFERENCE (retval))
1347 retval = create_tmp_reg (TREE_TYPE (retval), NULL);
1348 if (is_gimple_reg (retval))
1349 {
1350 /* When returning by reference, there is only one SSA name
1351 assigned to RESULT_DECL (that is pointer to return value).
1352 Look it up or create new one if it is missing. */
1353 if (DECL_BY_REFERENCE (retval))
1354 retval = get_or_create_ssa_default_def (cfun, retval);
1355 /* Otherwise produce new SSA name for return value. */
1356 else
1357 retval = make_ssa_name (retval, call);
1358 }
1359 if (DECL_BY_REFERENCE (DECL_RESULT (current_function_decl)))
1360 gimple_call_set_lhs (call, build_simple_mem_ref (retval));
1361 else
1362 gimple_call_set_lhs (call, retval);
1363 }
1364 gsi_insert_after (&gsi, call, GSI_NEW_STMT);
1365 ret = gimple_build_return (retval);
1366 gsi_insert_after (&gsi, ret, GSI_NEW_STMT);
1367 }
1368 }
1369 free_dominance_info (CDI_DOMINATORS);
1370 free_dominance_info (CDI_POST_DOMINATORS);
1371 compute_inline_parameters (node, true);
1372 }
1373
1374 /* Execute function splitting pass. */
1375
1376 static unsigned int
1377 execute_split_functions (void)
1378 {
1379 gimple_stmt_iterator bsi;
1380 basic_block bb;
1381 int overall_time = 0, overall_size = 0;
1382 int todo = 0;
1383 struct cgraph_node *node = cgraph_get_node (current_function_decl);
1384
1385 if (flags_from_decl_or_type (current_function_decl)
1386 & (ECF_NORETURN|ECF_MALLOC))
1387 {
1388 if (dump_file)
1389 fprintf (dump_file, "Not splitting: noreturn/malloc function.\n");
1390 return 0;
1391 }
1392 if (MAIN_NAME_P (DECL_NAME (current_function_decl)))
1393 {
1394 if (dump_file)
1395 fprintf (dump_file, "Not splitting: main function.\n");
1396 return 0;
1397 }
1398 /* This can be relaxed; function might become inlinable after splitting
1399 away the uninlinable part. */
1400 if (!inline_summary (node)->inlinable)
1401 {
1402 if (dump_file)
1403 fprintf (dump_file, "Not splitting: not inlinable.\n");
1404 return 0;
1405 }
1406 if (DECL_DISREGARD_INLINE_LIMITS (node->symbol.decl))
1407 {
1408 if (dump_file)
1409 fprintf (dump_file, "Not splitting: disregarding inline limits.\n");
1410 return 0;
1411 }
1412 /* This can be relaxed; most of versioning tests actually prevents
1413 a duplication. */
1414 if (!tree_versionable_function_p (current_function_decl))
1415 {
1416 if (dump_file)
1417 fprintf (dump_file, "Not splitting: not versionable.\n");
1418 return 0;
1419 }
1420 /* FIXME: we could support this. */
1421 if (DECL_STRUCT_FUNCTION (current_function_decl)->static_chain_decl)
1422 {
1423 if (dump_file)
1424 fprintf (dump_file, "Not splitting: nested function.\n");
1425 return 0;
1426 }
1427
1428 /* See if it makes sense to try to split.
1429 It makes sense to split if we inline, that is if we have direct calls to
1430 handle or direct calls are possibly going to appear as result of indirect
1431 inlining or LTO. Also handle -fprofile-generate as LTO to allow non-LTO
1432 training for LTO -fprofile-use build.
1433
1434 Note that we are not completely conservative about disqualifying functions
1435 called once. It is possible that the caller is called more then once and
1436 then inlining would still benefit. */
1437 if ((!node->callers || !node->callers->next_caller)
1438 && !node->symbol.address_taken
1439 && (!flag_lto || !node->symbol.externally_visible))
1440 {
1441 if (dump_file)
1442 fprintf (dump_file, "Not splitting: not called directly "
1443 "or called once.\n");
1444 return 0;
1445 }
1446
1447 /* FIXME: We can actually split if splitting reduces call overhead. */
1448 if (!flag_inline_small_functions
1449 && !DECL_DECLARED_INLINE_P (current_function_decl))
1450 {
1451 if (dump_file)
1452 fprintf (dump_file, "Not splitting: not autoinlining and function"
1453 " is not inline.\n");
1454 return 0;
1455 }
1456
1457 /* Initialize bitmap to track forbidden calls. */
1458 forbidden_dominators = BITMAP_ALLOC (NULL);
1459 calculate_dominance_info (CDI_DOMINATORS);
1460
1461 /* Compute local info about basic blocks and determine function size/time. */
1462 VEC_safe_grow_cleared (bb_info, heap, bb_info_vec, last_basic_block + 1);
1463 memset (&best_split_point, 0, sizeof (best_split_point));
1464 FOR_EACH_BB (bb)
1465 {
1466 int time = 0;
1467 int size = 0;
1468 int freq = compute_call_stmt_bb_frequency (current_function_decl, bb);
1469
1470 if (dump_file && (dump_flags & TDF_DETAILS))
1471 fprintf (dump_file, "Basic block %i\n", bb->index);
1472
1473 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1474 {
1475 int this_time, this_size;
1476 gimple stmt = gsi_stmt (bsi);
1477
1478 this_size = estimate_num_insns (stmt, &eni_size_weights);
1479 this_time = estimate_num_insns (stmt, &eni_time_weights) * freq;
1480 size += this_size;
1481 time += this_time;
1482 check_forbidden_calls (stmt);
1483
1484 if (dump_file && (dump_flags & TDF_DETAILS))
1485 {
1486 fprintf (dump_file, " freq:%6i size:%3i time:%3i ",
1487 freq, this_size, this_time);
1488 print_gimple_stmt (dump_file, stmt, 0, 0);
1489 }
1490 }
1491 overall_time += time;
1492 overall_size += size;
1493 VEC_index (bb_info, bb_info_vec, bb->index)->time = time;
1494 VEC_index (bb_info, bb_info_vec, bb->index)->size = size;
1495 }
1496 find_split_points (overall_time, overall_size);
1497 if (best_split_point.split_bbs)
1498 {
1499 split_function (&best_split_point);
1500 BITMAP_FREE (best_split_point.ssa_names_to_pass);
1501 BITMAP_FREE (best_split_point.split_bbs);
1502 todo = TODO_update_ssa | TODO_cleanup_cfg;
1503 }
1504 BITMAP_FREE (forbidden_dominators);
1505 VEC_free (bb_info, heap, bb_info_vec);
1506 bb_info_vec = NULL;
1507 return todo;
1508 }
1509
1510 /* Gate function splitting pass. When doing profile feedback, we want
1511 to execute the pass after profiling is read. So disable one in
1512 early optimization. */
1513
1514 static bool
1515 gate_split_functions (void)
1516 {
1517 return (flag_partial_inlining
1518 && !profile_arc_flag && !flag_branch_probabilities);
1519 }
1520
1521 struct gimple_opt_pass pass_split_functions =
1522 {
1523 {
1524 GIMPLE_PASS,
1525 "fnsplit", /* name */
1526 gate_split_functions, /* gate */
1527 execute_split_functions, /* execute */
1528 NULL, /* sub */
1529 NULL, /* next */
1530 0, /* static_pass_number */
1531 TV_IPA_FNSPLIT, /* tv_id */
1532 PROP_cfg, /* properties_required */
1533 0, /* properties_provided */
1534 0, /* properties_destroyed */
1535 0, /* todo_flags_start */
1536 TODO_verify_all /* todo_flags_finish */
1537 }
1538 };
1539
1540 /* Gate feedback driven function splitting pass.
1541 We don't need to split when profiling at all, we are producing
1542 lousy code anyway. */
1543
1544 static bool
1545 gate_feedback_split_functions (void)
1546 {
1547 return (flag_partial_inlining
1548 && flag_branch_probabilities);
1549 }
1550
1551 /* Execute function splitting pass. */
1552
1553 static unsigned int
1554 execute_feedback_split_functions (void)
1555 {
1556 unsigned int retval = execute_split_functions ();
1557 if (retval)
1558 retval |= TODO_rebuild_cgraph_edges;
1559 return retval;
1560 }
1561
1562 struct gimple_opt_pass pass_feedback_split_functions =
1563 {
1564 {
1565 GIMPLE_PASS,
1566 "feedback_fnsplit", /* name */
1567 gate_feedback_split_functions, /* gate */
1568 execute_feedback_split_functions, /* execute */
1569 NULL, /* sub */
1570 NULL, /* next */
1571 0, /* static_pass_number */
1572 TV_IPA_FNSPLIT, /* tv_id */
1573 PROP_cfg, /* properties_required */
1574 0, /* properties_provided */
1575 0, /* properties_destroyed */
1576 0, /* todo_flags_start */
1577 TODO_verify_all /* todo_flags_finish */
1578 }
1579 };