intrinsic.h (gfc_check_selected_real_kind, [...]): Update prototypes.
[gcc.git] / gcc / ipa-split.c
1 /* Function splitting pass
2 Copyright (C) 2010
3 Free Software Foundation, Inc.
4 Contributed by Jan Hubicka <jh@suse.cz>
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 /* The purpose of this pass is to split function bodies to improve
23 inlining. I.e. for function of the form:
24
25 func (...)
26 {
27 if (cheap_test)
28 something_small
29 else
30 something_big
31 }
32
33 Produce:
34
35 func.part (...)
36 {
37 something_big
38 }
39
40 func (...)
41 {
42 if (cheap_test)
43 something_small
44 else
45 func.part (...);
46 }
47
48 When func becomes inlinable and when cheap_test is often true, inlining func,
49 but not fund.part leads to performance imrovement similar as inlining
50 original func while the code size growth is smaller.
51
52 The pass is organized in three stages:
53 1) Collect local info about basic block into BB_INFO structure and
54 compute function body estimated size and time.
55 2) Via DFS walk find all possible basic blocks where we can split
56 and chose best one.
57 3) If split point is found, split at the specified BB by creating a clone
58 and updating function to call it.
59
60 The decisions what functions to split are in execute_split_functions
61 and consider_split.
62
63 There are several possible future improvements for this pass including:
64
65 1) Splitting to break up large functions
66 2) Splitting to reduce stack frame usage
67 3) Allow split part of function to use values computed in the header part.
68 The values needs to be passed to split function, perhaps via same
69 interface as for nested functions or as argument.
70 4) Support for simple rematerialization. I.e. when split part use
71 value computed in header from function parameter in very cheap way, we
72 can just recompute it.
73 5) Support splitting of nested functions.
74 6) Support non-SSA arguments.
75 7) There is nothing preventing us from producing multiple parts of single function
76 when needed or splitting also the parts. */
77
78 #include "config.h"
79 #include "system.h"
80 #include "coretypes.h"
81 #include "tree.h"
82 #include "target.h"
83 #include "cgraph.h"
84 #include "ipa-prop.h"
85 #include "tree-flow.h"
86 #include "tree-pass.h"
87 #include "flags.h"
88 #include "timevar.h"
89 #include "diagnostic.h"
90 #include "tree-dump.h"
91 #include "tree-inline.h"
92 #include "fibheap.h"
93 #include "params.h"
94 #include "gimple-pretty-print.h"
95
96 /* Per basic block info. */
97
98 typedef struct
99 {
100 unsigned int size;
101 unsigned int time;
102 } bb_info;
103 DEF_VEC_O(bb_info);
104 DEF_VEC_ALLOC_O(bb_info,heap);
105
106 static VEC(bb_info, heap) *bb_info_vec;
107
108 /* Description of split point. */
109
110 struct split_point
111 {
112 /* Size of the partitions. */
113 unsigned int header_time, header_size, split_time, split_size;
114
115 /* SSA names that need to be passed into spit funciton. */
116 bitmap ssa_names_to_pass;
117
118 /* Basic block where we split (that will become entry point of new function. */
119 basic_block entry_bb;
120
121 /* Basic blocks we are splitting away. */
122 bitmap split_bbs;
123 };
124
125 /* Best split point found. */
126
127 struct split_point best_split_point;
128
129 /* Callback for walk_stmt_load_store_addr_ops. If T is non-ssa automatic
130 variable, check it if it is present in bitmap passed via DATA. */
131
132 static bool
133 test_nonssa_use (gimple stmt ATTRIBUTE_UNUSED, tree t,
134 void *data ATTRIBUTE_UNUSED)
135 {
136 t = get_base_address (t);
137
138 if (t && !is_gimple_reg (t)
139 && ((TREE_CODE (t) == VAR_DECL
140 && auto_var_in_fn_p (t, current_function_decl))
141 || (TREE_CODE (t) == PARM_DECL)))
142 return bitmap_bit_p ((bitmap)data, DECL_UID (t));
143 return false;
144 }
145
146 /* Dump split point CURRENT. */
147
148 static void
149 dump_split_point (FILE * file, struct split_point *current)
150 {
151 fprintf (file,
152 "Split point at BB %i header time:%i header size: %i"
153 " split time: %i split size: %i\n bbs: ",
154 current->entry_bb->index, current->header_time,
155 current->header_size, current->split_time, current->split_size);
156 dump_bitmap (file, current->split_bbs);
157 fprintf (file, " SSA names to pass: ");
158 dump_bitmap (file, current->ssa_names_to_pass);
159 }
160
161 /* We found an split_point CURRENT. NON_SSA_VARS is bitmap of all non ssa
162 variables used and RETURN_BB is return basic block.
163 See if we can split function here. */
164
165 static void
166 consider_split (struct split_point *current, bitmap non_ssa_vars,
167 basic_block return_bb)
168 {
169 tree parm;
170 unsigned int num_args = 0;
171 unsigned int call_overhead;
172 edge e;
173 edge_iterator ei;
174 if (dump_file && (dump_flags & TDF_DETAILS))
175 dump_split_point (dump_file, current);
176
177 /* Do not split when we would end up calling function anyway. */
178 if (current->entry_bb->frequency
179 >= (ENTRY_BLOCK_PTR->frequency
180 * PARAM_VALUE (PARAM_PARTIAL_INLINING_ENTRY_PROBABILITY) / 100))
181 {
182 if (dump_file && (dump_flags & TDF_DETAILS))
183 fprintf (dump_file,
184 " Refused: split BB frequency is too large.\n");
185 return;
186 }
187
188 if (!current->header_size)
189 {
190 if (dump_file && (dump_flags & TDF_DETAILS))
191 fprintf (dump_file, " Refused: header empty\n");
192 gcc_unreachable ();
193 return;
194 }
195
196 /* FIXME: We can do better: if the split region start with a loop and there
197 is only one entry point from outer wrold, we can update PHI. */
198 if (!gsi_end_p (gsi_start_phis (current->entry_bb)))
199 {
200 if (dump_file && (dump_flags & TDF_DETAILS))
201 fprintf (dump_file,
202 " Refused: entry BB has PHI\n");
203 return;
204 }
205
206
207 /* See what argument we will pass to the split function and compute
208 call overhead. */
209 call_overhead = eni_size_weights.call_cost;
210 for (parm = DECL_ARGUMENTS (current_function_decl); parm;
211 parm = TREE_CHAIN (parm))
212 {
213 if (!is_gimple_reg (parm))
214 {
215 if (bitmap_bit_p (non_ssa_vars, DECL_UID (parm)))
216 {
217 if (dump_file && (dump_flags & TDF_DETAILS))
218 fprintf (dump_file,
219 " Refused: need to pass non-ssa param values\n");
220 return;
221 }
222 }
223 else if (gimple_default_def (cfun, parm)
224 && bitmap_bit_p (current->ssa_names_to_pass,
225 SSA_NAME_VERSION (gimple_default_def
226 (cfun, parm))))
227 {
228 if (!VOID_TYPE_P (TREE_TYPE (parm)))
229 call_overhead += estimate_move_cost (TREE_TYPE (parm));
230 num_args++;
231 }
232 }
233 if (!VOID_TYPE_P (TREE_TYPE (current_function_decl)))
234 call_overhead += estimate_move_cost (TREE_TYPE (current_function_decl));
235
236 if (current->split_size <= call_overhead)
237 {
238 if (dump_file && (dump_flags & TDF_DETAILS))
239 fprintf (dump_file,
240 " Refused: split size is smaller than call overhead\n");
241 return;
242 }
243 if (current->header_size + call_overhead
244 >= (unsigned int)(DECL_DECLARED_INLINE_P (current_function_decl)
245 ? MAX_INLINE_INSNS_SINGLE
246 : MAX_INLINE_INSNS_AUTO))
247 {
248 if (dump_file && (dump_flags & TDF_DETAILS))
249 fprintf (dump_file,
250 " Refused: header size is too large for inline candidate\n");
251 return;
252 }
253
254 /* FIXME: we currently can pass only SSA function parameters to the split
255 arguments. Once parm_adjustment infrastructure is supported by clonning,
256 we can pass more than that. */
257 if (num_args != bitmap_count_bits (current->ssa_names_to_pass))
258 {
259 if (dump_file && (dump_flags & TDF_DETAILS))
260 fprintf (dump_file,
261 " Refused: need to pass non-param values\n");
262 return;
263 }
264
265 /* When there are non-ssa vars used in the split region, see if they
266 are used in the header region. If so, reject the split.
267 FIXME: we can use nested function support to access both. */
268 if (!bitmap_empty_p (non_ssa_vars))
269 {
270 basic_block bb;
271 FOR_EACH_BB (bb)
272 {
273 gimple_stmt_iterator bsi;
274 if (!bitmap_bit_p (current->split_bbs, bb->index))
275 continue;
276 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
277 {
278 if (is_gimple_debug (gsi_stmt (bsi)))
279 continue;
280 if (walk_stmt_load_store_addr_ops
281 (gsi_stmt (bsi), non_ssa_vars, test_nonssa_use,
282 test_nonssa_use, test_nonssa_use))
283 {
284 if (dump_file && (dump_flags & TDF_DETAILS))
285 fprintf (dump_file,
286 " Refused: split part has non-ssa uses\n");
287 return;
288 }
289 }
290 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
291 {
292 if (is_gimple_debug (gsi_stmt (bsi)))
293 continue;
294 if (walk_stmt_load_store_addr_ops
295 (gsi_stmt (bsi), non_ssa_vars, test_nonssa_use,
296 test_nonssa_use, test_nonssa_use))
297 {
298 if (dump_file && (dump_flags & TDF_DETAILS))
299 fprintf (dump_file,
300 " Refused: split part has non-ssa uses\n");
301 return;
302 }
303 }
304 FOR_EACH_EDGE (e, ei, bb->succs)
305 {
306 if (e->dest != return_bb)
307 continue;
308 for (bsi = gsi_start_phis (return_bb); !gsi_end_p (bsi);
309 gsi_next (&bsi))
310 {
311 gimple stmt = gsi_stmt (bsi);
312 tree op = gimple_phi_arg_def (stmt, e->dest_idx);
313
314 if (!is_gimple_reg (gimple_phi_result (stmt)))
315 continue;
316 if (TREE_CODE (op) != SSA_NAME
317 && test_nonssa_use (stmt, op, non_ssa_vars))
318 {
319 if (dump_file && (dump_flags & TDF_DETAILS))
320 fprintf (dump_file,
321 " Refused: split part has non-ssa uses\n");
322 return;
323 }
324 }
325 }
326 }
327 return;
328 }
329 if (dump_file && (dump_flags & TDF_DETAILS))
330 fprintf (dump_file, " Accepted!\n");
331
332 /* At the moment chose split point with lowest frequency and that leaves
333 out smallest size of header.
334 In future we might re-consider this heuristics. */
335 if (!best_split_point.split_bbs
336 || best_split_point.entry_bb->frequency > current->entry_bb->frequency
337 || (best_split_point.entry_bb->frequency == current->entry_bb->frequency
338 && best_split_point.split_size < current->split_size))
339
340 {
341 if (dump_file && (dump_flags & TDF_DETAILS))
342 fprintf (dump_file, " New best split point!\n");
343 if (best_split_point.ssa_names_to_pass)
344 {
345 BITMAP_FREE (best_split_point.ssa_names_to_pass);
346 BITMAP_FREE (best_split_point.split_bbs);
347 }
348 best_split_point = *current;
349 best_split_point.ssa_names_to_pass = BITMAP_ALLOC (NULL);
350 bitmap_copy (best_split_point.ssa_names_to_pass,
351 current->ssa_names_to_pass);
352 best_split_point.split_bbs = BITMAP_ALLOC (NULL);
353 bitmap_copy (best_split_point.split_bbs, current->split_bbs);
354 }
355 }
356
357 /* Return basic block containing RETURN statement, or EXIT_BLOCK_PTR if none
358 found.
359 When there are multiple RETURN statement, chose one with return value,
360 since that one is more likely shared by multiple code paths.
361 TODO: We might support multiple return blocks. */
362
363 static basic_block
364 find_return_bb (void)
365 {
366 edge e;
367 edge_iterator ei;
368 basic_block return_bb = EXIT_BLOCK_PTR;
369
370 if (EDGE_COUNT (EXIT_BLOCK_PTR->preds) == 1)
371 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
372 {
373 gimple_stmt_iterator bsi;
374 bool found_return = false;
375 tree retval = NULL_TREE;
376
377 for (bsi = gsi_start_bb (e->src); !gsi_end_p (bsi); gsi_next (&bsi))
378 if (gimple_code (gsi_stmt (bsi)) != GIMPLE_RETURN
379 && gimple_code (gsi_stmt (bsi)) != GIMPLE_LABEL
380 && !is_gimple_debug (gsi_stmt (bsi)))
381 break;
382 else if (gimple_code (gsi_stmt (bsi)) == GIMPLE_RETURN)
383 {
384 found_return = true;
385 retval = gimple_return_retval (gsi_stmt (bsi));
386 }
387 if (gsi_end_p (bsi) && found_return)
388 {
389 if (retval)
390 return e->src;
391 else
392 return_bb = e->src;
393 }
394 }
395 return return_bb;
396 }
397
398 /* Callback for walk_stmt_load_store_addr_ops. If T is non-ssa automatic
399 variable, mark it as used in bitmap passed via DATA.
400 Return true when access to T prevents splitting the function. */
401
402 static bool
403 mark_nonssa_use (gimple stmt ATTRIBUTE_UNUSED, tree t,
404 void *data ATTRIBUTE_UNUSED)
405 {
406 t = get_base_address (t);
407
408 if (!t || is_gimple_reg (t))
409 return false;
410
411 /* At present we can't pass non-SSA arguments to split function.
412 FIXME: this can be relaxed by passing references to arguments. */
413 if (TREE_CODE (t) == PARM_DECL)
414 {
415 if (dump_file && (dump_flags & TDF_DETAILS))
416 fprintf (dump_file, "Can not split use of non-ssa function parameter.\n");
417 return true;
418 }
419
420 if (TREE_CODE (t) == VAR_DECL && auto_var_in_fn_p (t, current_function_decl))
421 bitmap_set_bit ((bitmap)data, DECL_UID (t));
422 return false;
423 }
424
425 /* Compute local properties of basic block BB we collect when looking for
426 split points. We look for ssa defs and store them in SET_SSA_NAMES,
427 for ssa uses and store them in USED_SSA_NAMES and for any non-SSA automatic
428 vars stored in NON_SSA_VARS.
429
430 When BB has edge to RETURN_BB, collect uses in RETURN_BB too.
431
432 Return false when BB contains something that prevents it from being put into
433 split function. */
434
435 static bool
436 visit_bb (basic_block bb, basic_block return_bb,
437 bitmap set_ssa_names, bitmap used_ssa_names,
438 bitmap non_ssa_vars)
439 {
440 gimple_stmt_iterator bsi;
441 edge e;
442 edge_iterator ei;
443 bool can_split = true;
444
445 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
446 {
447 gimple stmt = gsi_stmt (bsi);
448 tree op;
449 ssa_op_iter iter;
450 tree decl;
451
452 if (is_gimple_debug (stmt))
453 continue;
454
455 /* FIXME: We can split regions containing EH. We can not however
456 split RESX, EH_DISPATCH and EH_POINTER referring to same region
457 into different partitions. This would require tracking of
458 EH regions and checking in consider_split_point if they
459 are not used elsewhere. */
460 if (gimple_code (stmt) == GIMPLE_RESX
461 && stmt_can_throw_external (stmt))
462 {
463 if (dump_file && (dump_flags & TDF_DETAILS))
464 fprintf (dump_file, "Can not split external resx.\n");
465 can_split = false;
466 }
467 if (gimple_code (stmt) == GIMPLE_EH_DISPATCH)
468 {
469 if (dump_file && (dump_flags & TDF_DETAILS))
470 fprintf (dump_file, "Can not split eh dispatch.\n");
471 can_split = false;
472 }
473
474 /* Check builtins that prevent splitting. */
475 if (gimple_code (stmt) == GIMPLE_CALL
476 && (decl = gimple_call_fndecl (stmt)) != NULL_TREE
477 && DECL_BUILT_IN (decl)
478 && DECL_BUILT_IN_CLASS (decl) == BUILT_IN_NORMAL)
479 switch (DECL_FUNCTION_CODE (decl))
480 {
481 /* FIXME: once we will allow passing non-parm values to split part,
482 we need to be sure to handle correct builtin_stack_save and
483 builtin_stack_restore. At the moment we are safe; there is no
484 way to store builtin_stack_save result in non-SSA variable
485 since all calls to those are compiler generated. */
486 case BUILT_IN_APPLY:
487 case BUILT_IN_VA_START:
488 if (dump_file && (dump_flags & TDF_DETAILS))
489 fprintf (dump_file, "Can not split builtin_apply and va_start.\n");
490 can_split = false;
491 break;
492 case BUILT_IN_EH_POINTER:
493 if (dump_file && (dump_flags & TDF_DETAILS))
494 fprintf (dump_file, "Can not split builtin_eh_pointer.\n");
495 can_split = false;
496 break;
497 default:
498 break;
499 }
500
501 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_DEF)
502 bitmap_set_bit (set_ssa_names, SSA_NAME_VERSION (op));
503 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
504 bitmap_set_bit (used_ssa_names, SSA_NAME_VERSION (op));
505 can_split &= !walk_stmt_load_store_addr_ops (stmt, non_ssa_vars,
506 mark_nonssa_use,
507 mark_nonssa_use,
508 mark_nonssa_use);
509 }
510 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
511 {
512 gimple stmt = gsi_stmt (bsi);
513 tree op;
514 ssa_op_iter iter;
515
516 if (is_gimple_debug (stmt))
517 continue;
518 if (!is_gimple_reg (gimple_phi_result (stmt)))
519 continue;
520 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_DEF)
521 bitmap_set_bit (set_ssa_names, SSA_NAME_VERSION (op));
522 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
523 bitmap_set_bit (used_ssa_names, SSA_NAME_VERSION (op));
524 can_split &= !walk_stmt_load_store_addr_ops (stmt, non_ssa_vars,
525 mark_nonssa_use,
526 mark_nonssa_use,
527 mark_nonssa_use);
528 }
529 /* Record also uses comming from PHI operand in return BB. */
530 FOR_EACH_EDGE (e, ei, bb->succs)
531 if (e->dest == return_bb)
532 {
533 bool found_phi = false;
534 for (bsi = gsi_start_phis (return_bb); !gsi_end_p (bsi); gsi_next (&bsi))
535 {
536 gimple stmt = gsi_stmt (bsi);
537 tree op = gimple_phi_arg_def (stmt, e->dest_idx);
538
539 if (is_gimple_debug (stmt))
540 continue;
541 if (!is_gimple_reg (gimple_phi_result (stmt)))
542 continue;
543 found_phi = true;
544 if (TREE_CODE (op) == SSA_NAME)
545 bitmap_set_bit (used_ssa_names, SSA_NAME_VERSION (op));
546 else
547 can_split &= !mark_nonssa_use (stmt, op, non_ssa_vars);
548 }
549 if (!gsi_end_p (gsi_last_bb (return_bb)))
550 {
551 ssa_op_iter iter;
552 gimple stmt = gsi_stmt (gsi_last_bb (return_bb));
553 tree op;
554 if (!found_phi)
555 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
556 bitmap_set_bit (used_ssa_names, SSA_NAME_VERSION (op));
557 can_split &= !walk_stmt_load_store_addr_ops (stmt, non_ssa_vars,
558 mark_nonssa_use,
559 mark_nonssa_use,
560 mark_nonssa_use);
561 }
562 }
563 return can_split;
564 }
565
566 /* Stack entry for recursive DFS walk in find_split_point. */
567
568 typedef struct
569 {
570 /* Basic block we are examining. */
571 basic_block bb;
572
573 /* SSA names set and used by the BB and all BBs reachable
574 from it via DFS walk. */
575 bitmap set_ssa_names, used_ssa_names;
576 bitmap non_ssa_vars;
577
578 /* All BBS visited from this BB via DFS walk. */
579 bitmap bbs_visited;
580
581 /* Last examined edge in DFS walk. Since we walk unoriented graph,
582 the value is up to sum of incomming and outgoing edges of BB. */
583 unsigned int edge_num;
584
585 /* Stack entry index of earliest BB reachable from current BB
586 or any BB visited later in DFS valk. */
587 int earliest;
588
589 /* Overall time and size of all BBs reached from this BB in DFS walk. */
590 int overall_time, overall_size;
591
592 /* When false we can not split on this BB. */
593 bool can_split;
594 } stack_entry;
595 DEF_VEC_O(stack_entry);
596 DEF_VEC_ALLOC_O(stack_entry,heap);
597
598
599 /* Find all articulations and call consider_split on them.
600 OVERALL_TIME and OVERALL_SIZE is time and size of the function.
601
602 We perform basic algorithm for finding an articulation in a graph
603 created from CFG by considering it to be an unoriented graph.
604
605 The articulation is discovered via DFS walk. We collect earliest
606 basic block on stack that is reachable via backward edge. Articulation
607 is any basic block such that there is no backward edge bypassing it.
608 To reduce stack usage we maintain heap allocated stack in STACK vector.
609 AUX pointer of BB is set to index it appears in the stack or -1 once
610 it is visited and popped off the stack.
611
612 The algorithm finds articulation after visiting the whole component
613 reachable by it. This makes it convenient to collect information about
614 the component used by consider_split. */
615
616 static void
617 find_split_points (int overall_time, int overall_size)
618 {
619 stack_entry first;
620 VEC(stack_entry, heap) *stack = NULL;
621 basic_block bb;
622 basic_block return_bb = find_return_bb ();
623 struct split_point current;
624
625 current.header_time = overall_time;
626 current.header_size = overall_size;
627 current.split_time = 0;
628 current.split_size = 0;
629 current.ssa_names_to_pass = BITMAP_ALLOC (NULL);
630
631 first.bb = ENTRY_BLOCK_PTR;
632 first.edge_num = 0;
633 first.overall_time = 0;
634 first.overall_size = 0;
635 first.earliest = INT_MAX;
636 first.set_ssa_names = 0;
637 first.used_ssa_names = 0;
638 first.bbs_visited = 0;
639 VEC_safe_push (stack_entry, heap, stack, &first);
640 ENTRY_BLOCK_PTR->aux = (void *)(intptr_t)-1;
641
642 while (!VEC_empty (stack_entry, stack))
643 {
644 stack_entry *entry = VEC_last (stack_entry, stack);
645
646 /* We are walking an acyclic graph, so edge_num counts
647 succ and pred edges together. However when considering
648 articulation, we want to have processed everything reachable
649 from articulation but nothing that reaches into it. */
650 if (entry->edge_num == EDGE_COUNT (entry->bb->succs)
651 && entry->bb != ENTRY_BLOCK_PTR)
652 {
653 int pos = VEC_length (stack_entry, stack);
654 entry->can_split &= visit_bb (entry->bb, return_bb,
655 entry->set_ssa_names,
656 entry->used_ssa_names,
657 entry->non_ssa_vars);
658 if (pos <= entry->earliest && !entry->can_split
659 && dump_file && (dump_flags & TDF_DETAILS))
660 fprintf (dump_file,
661 "found articulation at bb %i but can not split\n",
662 entry->bb->index);
663 if (pos <= entry->earliest && entry->can_split)
664 {
665 if (dump_file && (dump_flags & TDF_DETAILS))
666 fprintf (dump_file, "found articulation at bb %i\n",
667 entry->bb->index);
668 current.entry_bb = entry->bb;
669 current.ssa_names_to_pass = BITMAP_ALLOC (NULL);
670 bitmap_and_compl (current.ssa_names_to_pass,
671 entry->used_ssa_names, entry->set_ssa_names);
672 current.header_time = overall_time - entry->overall_time;
673 current.header_size = overall_size - entry->overall_size;
674 current.split_time = entry->overall_time;
675 current.split_size = entry->overall_size;
676 current.split_bbs = entry->bbs_visited;
677 consider_split (&current, entry->non_ssa_vars, return_bb);
678 BITMAP_FREE (current.ssa_names_to_pass);
679 }
680 }
681 /* Do actual DFS walk. */
682 if (entry->edge_num
683 < (EDGE_COUNT (entry->bb->succs)
684 + EDGE_COUNT (entry->bb->preds)))
685 {
686 edge e;
687 basic_block dest;
688 if (entry->edge_num < EDGE_COUNT (entry->bb->succs))
689 {
690 e = EDGE_SUCC (entry->bb, entry->edge_num);
691 dest = e->dest;
692 }
693 else
694 {
695 e = EDGE_PRED (entry->bb, entry->edge_num
696 - EDGE_COUNT (entry->bb->succs));
697 dest = e->src;
698 }
699
700 entry->edge_num++;
701
702 /* New BB to visit, push it to the stack. */
703 if (dest != return_bb && dest != EXIT_BLOCK_PTR
704 && !dest->aux)
705 {
706 stack_entry new_entry;
707
708 new_entry.bb = dest;
709 new_entry.edge_num = 0;
710 new_entry.overall_time
711 = VEC_index (bb_info, bb_info_vec, dest->index)->time;
712 new_entry.overall_size
713 = VEC_index (bb_info, bb_info_vec, dest->index)->size;
714 new_entry.earliest = INT_MAX;
715 new_entry.set_ssa_names = BITMAP_ALLOC (NULL);
716 new_entry.used_ssa_names = BITMAP_ALLOC (NULL);
717 new_entry.bbs_visited = BITMAP_ALLOC (NULL);
718 new_entry.non_ssa_vars = BITMAP_ALLOC (NULL);
719 new_entry.can_split = true;
720 bitmap_set_bit (new_entry.bbs_visited, dest->index);
721 VEC_safe_push (stack_entry, heap, stack, &new_entry);
722 dest->aux = (void *)(intptr_t)VEC_length (stack_entry, stack);
723 }
724 /* Back edge found, record the earliest point. */
725 else if ((intptr_t)dest->aux > 0
726 && (intptr_t)dest->aux < entry->earliest)
727 entry->earliest = (intptr_t)dest->aux;
728 }
729 /* We are done with examing the edges. pop off the value from stack and
730 merge stuff we cummulate during the walk. */
731 else if (entry->bb != ENTRY_BLOCK_PTR)
732 {
733 stack_entry *prev = VEC_index (stack_entry, stack,
734 VEC_length (stack_entry, stack) - 2);
735
736 entry->bb->aux = (void *)(intptr_t)-1;
737 prev->can_split &= entry->can_split;
738 if (prev->set_ssa_names)
739 {
740 bitmap_ior_into (prev->set_ssa_names, entry->set_ssa_names);
741 bitmap_ior_into (prev->used_ssa_names, entry->used_ssa_names);
742 bitmap_ior_into (prev->bbs_visited, entry->bbs_visited);
743 bitmap_ior_into (prev->non_ssa_vars, entry->non_ssa_vars);
744 }
745 if (prev->earliest > entry->earliest)
746 prev->earliest = entry->earliest;
747 prev->overall_time += entry->overall_time;
748 prev->overall_size += entry->overall_size;
749 BITMAP_FREE (entry->set_ssa_names);
750 BITMAP_FREE (entry->used_ssa_names);
751 BITMAP_FREE (entry->bbs_visited);
752 BITMAP_FREE (entry->non_ssa_vars);
753 VEC_pop (stack_entry, stack);
754 }
755 else
756 VEC_pop (stack_entry, stack);
757 }
758 ENTRY_BLOCK_PTR->aux = NULL;
759 FOR_EACH_BB (bb)
760 bb->aux = NULL;
761 BITMAP_FREE (current.ssa_names_to_pass);
762 }
763
764 /* Split function at SPLIT_POINT. */
765
766 static void
767 split_function (struct split_point *split_point)
768 {
769 VEC (tree, heap) *args_to_pass = NULL;
770 bitmap args_to_skip = BITMAP_ALLOC (NULL);
771 tree parm;
772 int num = 0;
773 struct cgraph_node *node;
774 basic_block return_bb = find_return_bb ();
775 basic_block call_bb;
776 gimple_stmt_iterator gsi;
777 gimple call;
778 edge e;
779 edge_iterator ei;
780 tree retval = NULL, real_retval = NULL;
781 bool split_part_return_p = false;
782 gimple last_stmt = NULL;
783
784 if (dump_file)
785 {
786 fprintf (dump_file, "\n\nSplitting function at:\n");
787 dump_split_point (dump_file, split_point);
788 }
789
790 /* Collect the parameters of new function and args_to_skip bitmap. */
791 for (parm = DECL_ARGUMENTS (current_function_decl);
792 parm; parm = TREE_CHAIN (parm), num++)
793 if (!is_gimple_reg (parm)
794 || !gimple_default_def (cfun, parm)
795 || !bitmap_bit_p (split_point->ssa_names_to_pass,
796 SSA_NAME_VERSION (gimple_default_def (cfun, parm))))
797 bitmap_set_bit (args_to_skip, num);
798 else
799 VEC_safe_push (tree, heap, args_to_pass, gimple_default_def (cfun, parm));
800
801 /* See if the split function will return. */
802 FOR_EACH_EDGE (e, ei, return_bb->preds)
803 if (bitmap_bit_p (split_point->split_bbs, e->src->index))
804 break;
805 if (e)
806 split_part_return_p = true;
807
808 /* If we return, we will need the return block. */
809 if (return_bb != EXIT_BLOCK_PTR && split_part_return_p)
810 bitmap_set_bit (split_point->split_bbs, return_bb->index);
811
812 /* Now create the actual clone. */
813 rebuild_cgraph_edges ();
814 node = cgraph_function_versioning (cgraph_node (current_function_decl),
815 NULL, NULL,
816 args_to_skip,
817 split_point->split_bbs,
818 split_point->entry_bb, "_part");
819 cgraph_node_remove_callees (cgraph_node (current_function_decl));
820 if (!split_part_return_p)
821 TREE_THIS_VOLATILE (node->decl) = 1;
822 if (dump_file)
823 dump_function_to_file (node->decl, dump_file, dump_flags);
824
825 /* Create the basic block we place call into. It is the entry basic block
826 split after last label. */
827 call_bb = split_point->entry_bb;
828 for (gsi = gsi_start_bb (call_bb); !gsi_end_p (gsi);)
829 if (gimple_code (gsi_stmt (gsi)) == GIMPLE_LABEL)
830 {
831 last_stmt = gsi_stmt (gsi);
832 gsi_next (&gsi);
833 }
834 else
835 break;
836 e = split_block (split_point->entry_bb, last_stmt);
837 remove_edge (e);
838
839 /* Produce the call statement. */
840 gsi = gsi_last_bb (call_bb);
841 call = gimple_build_call_vec (node->decl, args_to_pass);
842 gimple_set_block (call, DECL_INITIAL (current_function_decl));
843
844 /* Update return value. This is bit tricky. When we do not return,
845 do nothing. When we return we might need to update return_bb
846 or produce a new return statement. */
847 if (!split_part_return_p)
848 gsi_insert_after (&gsi, call, GSI_NEW_STMT);
849 else
850 {
851 e = make_edge (call_bb, return_bb,
852 return_bb == EXIT_BLOCK_PTR ? 0 : EDGE_FALLTHRU);
853 e->count = call_bb->count;
854 e->probability = REG_BR_PROB_BASE;
855 if (return_bb != EXIT_BLOCK_PTR)
856 {
857 gimple return_stmt = gsi_stmt (gsi_last_bb (return_bb));
858 gcc_assert (gimple_code (return_stmt) == GIMPLE_RETURN);
859
860 if ((real_retval = retval = gimple_return_retval (return_stmt))
861 && !is_gimple_min_invariant (retval)
862 && (TREE_CODE (retval) != SSA_NAME
863 || !SSA_NAME_IS_DEFAULT_DEF (retval)))
864 {
865 gimple_stmt_iterator psi;
866
867 /* See if there is PHI definind return value. */
868 for (psi = gsi_start_phis (return_bb);
869 !gsi_end_p (psi); gsi_next (&psi))
870 if (is_gimple_reg (gimple_phi_result (gsi_stmt (psi))))
871 break;
872
873 /* When we have PHI, update PHI. When there is no PHI,
874 update the return statement itself. */
875 if (TREE_CODE (retval) == SSA_NAME)
876 {
877 retval = make_ssa_name (SSA_NAME_VAR (retval), call);
878 if (TREE_CODE (retval) == SSA_NAME
879 && !gsi_end_p (psi))
880 add_phi_arg (gsi_stmt (psi), retval, e, UNKNOWN_LOCATION);
881 else if (TREE_CODE (retval) == SSA_NAME)
882 {
883 gimple_return_set_retval (return_stmt, retval);
884 update_stmt (return_stmt);
885 }
886 }
887 gimple_call_set_lhs (call, retval);
888 }
889 gsi_insert_after (&gsi, call, GSI_NEW_STMT);
890 }
891 else
892 {
893 gimple ret;
894 if (!VOID_TYPE_P (TREE_TYPE (TREE_TYPE (current_function_decl))))
895 {
896 retval
897 = create_tmp_var (TREE_TYPE (TREE_TYPE (current_function_decl)),
898 "RET");
899 if (is_gimple_reg (retval))
900 retval = make_ssa_name (retval, call);
901 gimple_call_set_lhs (call, retval);
902 }
903 gsi_insert_after (&gsi, call, GSI_NEW_STMT);
904 ret = gimple_build_return (retval);
905 gsi_insert_after (&gsi, ret, GSI_NEW_STMT);
906 }
907 }
908 free_dominance_info (CDI_DOMINATORS);
909 free_dominance_info (CDI_POST_DOMINATORS);
910 compute_inline_parameters (node);
911 }
912
913 /* Execute function splitting pass. */
914
915 static unsigned int
916 execute_split_functions (void)
917 {
918 gimple_stmt_iterator bsi;
919 basic_block bb;
920 int overall_time = 0, overall_size = 0;
921 int todo = 0;
922 struct cgraph_node *node = cgraph_node (current_function_decl);
923
924 if (flags_from_decl_or_type (current_function_decl) & ECF_NORETURN)
925 {
926 if (dump_file)
927 fprintf (dump_file, "Not splitting: noreturn function.\n");
928 return 0;
929 }
930 if (MAIN_NAME_P (DECL_NAME (current_function_decl)))
931 {
932 if (dump_file)
933 fprintf (dump_file, "Not splitting: main function.\n");
934 return 0;
935 }
936 /* This can be relaxed; function might become inlinable after splitting
937 away the uninlinable part. */
938 if (!node->local.inlinable)
939 {
940 if (dump_file)
941 fprintf (dump_file, "Not splitting: not inlinable.\n");
942 return 0;
943 }
944 if (node->local.disregard_inline_limits)
945 {
946 if (dump_file)
947 fprintf (dump_file, "Not splitting: disregading inline limits.\n");
948 return 0;
949 }
950 /* This can be relaxed; most of versioning tests actually prevents
951 a duplication. */
952 if (!tree_versionable_function_p (current_function_decl))
953 {
954 if (dump_file)
955 fprintf (dump_file, "Not splitting: not versionable.\n");
956 return 0;
957 }
958 /* FIXME: we could support this. */
959 if (DECL_STRUCT_FUNCTION (current_function_decl)->static_chain_decl)
960 {
961 if (dump_file)
962 fprintf (dump_file, "Not splitting: nested function.\n");
963 return 0;
964 }
965 /* FIXME: Should be easy to support. */
966 if (DECL_BY_REFERENCE (DECL_RESULT (current_function_decl)))
967 {
968 if (dump_file)
969 fprintf (dump_file, "Not splitting: returns value by reference.\n");
970 return 0;
971 }
972
973 /* See if it makes sense to try to split.
974 It makes sense to split if we inline, that is if we have direct calls to
975 handle or direct calls are possibly going to appear as result of indirect
976 inlining or LTO.
977 Note that we are not completely conservative about disqualifying functions
978 called once. It is possible that the caller is called more then once and
979 then inlining would still benefit. */
980 if ((!node->callers || !node->callers->next_caller)
981 && !node->address_taken
982 && ((!flag_lto && !flag_whopr) || !node->local.externally_visible))
983 {
984 if (dump_file)
985 fprintf (dump_file, "Not splitting: not called directly "
986 "or called once.\n");
987 return 0;
988 }
989
990 /* FIXME: We can actually split if splitting reduces call overhead. */
991 if (!flag_inline_small_functions
992 && !DECL_DECLARED_INLINE_P (current_function_decl))
993 {
994 if (dump_file)
995 fprintf (dump_file, "Not splitting: not autoinlining and function"
996 " is not inline.\n");
997 return 0;
998 }
999
1000 /* Compute local info about basic blocks and determine function size/time. */
1001 VEC_safe_grow_cleared (bb_info, heap, bb_info_vec, last_basic_block + 1);
1002 memset (&best_split_point, 0, sizeof (best_split_point));
1003 FOR_EACH_BB (bb)
1004 {
1005 int time = 0;
1006 int size = 0;
1007 int freq = compute_call_stmt_bb_frequency (current_function_decl, bb);
1008
1009 if (dump_file && (dump_flags & TDF_DETAILS))
1010 fprintf (dump_file, "Basic block %i\n", bb->index);
1011
1012 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1013 {
1014 int this_time, this_size;
1015 gimple stmt = gsi_stmt (bsi);
1016
1017 this_size = estimate_num_insns (stmt, &eni_size_weights);
1018 this_time = estimate_num_insns (stmt, &eni_time_weights) * freq;
1019 size += this_size;
1020 time += this_time;
1021
1022 if (dump_file && (dump_flags & TDF_DETAILS))
1023 {
1024 fprintf (dump_file, " freq:%6i size:%3i time:%3i ",
1025 freq, this_size, this_time);
1026 print_gimple_stmt (dump_file, stmt, 0, 0);
1027 }
1028 }
1029 overall_time += time;
1030 overall_size += size;
1031 VEC_index (bb_info, bb_info_vec, bb->index)->time = time;
1032 VEC_index (bb_info, bb_info_vec, bb->index)->size = size;
1033 }
1034 find_split_points (overall_time, overall_size);
1035 if (best_split_point.split_bbs)
1036 {
1037 split_function (&best_split_point);
1038 BITMAP_FREE (best_split_point.ssa_names_to_pass);
1039 BITMAP_FREE (best_split_point.split_bbs);
1040 todo = TODO_update_ssa | TODO_cleanup_cfg;
1041 }
1042 VEC_free (bb_info, heap, bb_info_vec);
1043 bb_info_vec = NULL;
1044 return todo;
1045 }
1046
1047 static bool
1048 gate_split_functions (void)
1049 {
1050 return flag_partial_inlining;
1051 }
1052
1053 struct gimple_opt_pass pass_split_functions =
1054 {
1055 {
1056 GIMPLE_PASS,
1057 "fnsplit", /* name */
1058 gate_split_functions, /* gate */
1059 execute_split_functions, /* execute */
1060 NULL, /* sub */
1061 NULL, /* next */
1062 0, /* static_pass_number */
1063 TV_IPA_FNSPLIT, /* tv_id */
1064 PROP_cfg, /* properties_required */
1065 0, /* properties_provided */
1066 0, /* properties_destroyed */
1067 0, /* todo_flags_start */
1068 TODO_dump_func /* todo_flags_finish */
1069 }
1070 };