value-prof.c (tree_stringops_transform): New.
[gcc.git] / gcc / value-prof.c
1 /* Transformations based on profile information for values.
2 Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 2, or (at your option) any later
9 version.
10
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
19 02110-1301, USA. */
20
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "rtl.h"
26 #include "expr.h"
27 #include "hard-reg-set.h"
28 #include "basic-block.h"
29 #include "value-prof.h"
30 #include "output.h"
31 #include "flags.h"
32 #include "insn-config.h"
33 #include "recog.h"
34 #include "optabs.h"
35 #include "regs.h"
36 #include "ggc.h"
37 #include "tree-flow.h"
38 #include "tree-flow-inline.h"
39 #include "diagnostic.h"
40 #include "coverage.h"
41 #include "tree.h"
42 #include "gcov-io.h"
43 #include "timevar.h"
44 #include "tree-pass.h"
45 #include "toplev.h"
46
47 static struct value_prof_hooks *value_prof_hooks;
48
49 /* In this file value profile based optimizations are placed. Currently the
50 following optimizations are implemented (for more detailed descriptions
51 see comments at value_profile_transformations):
52
53 1) Division/modulo specialization. Provided that we can determine that the
54 operands of the division have some special properties, we may use it to
55 produce more effective code.
56 2) Speculative prefetching. If we are able to determine that the difference
57 between addresses accessed by a memory reference is usually constant, we
58 may add the prefetch instructions.
59 FIXME: This transformation was removed together with RTL based value
60 profiling.
61
62 Every such optimization should add its requirements for profiled values to
63 insn_values_to_profile function. This function is called from branch_prob
64 in profile.c and the requested values are instrumented by it in the first
65 compilation with -fprofile-arcs. The optimization may then read the
66 gathered data in the second compilation with -fbranch-probabilities.
67
68 The measured data is pointed to from the histograms
69 field of the statement annotation of the instrumented insns. It is
70 kept as a linked list of struct histogram_value_t's, which contain the
71 same information as above. */
72
73
74 static tree tree_divmod_fixed_value (tree, tree, tree, tree,
75 tree, int, gcov_type, gcov_type);
76 static tree tree_mod_pow2 (tree, tree, tree, tree, int, gcov_type, gcov_type);
77 static tree tree_mod_subtract (tree, tree, tree, tree, int, int, int,
78 gcov_type, gcov_type, gcov_type);
79 static bool tree_divmod_fixed_value_transform (tree);
80 static bool tree_mod_pow2_value_transform (tree);
81 static bool tree_mod_subtract_transform (tree);
82 static bool tree_stringops_transform (block_stmt_iterator *);
83
84 /* The overall number of invocations of the counter should match execution count
85 of basic block. Report it as error rather than internal error as it might
86 mean that user has misused the profile somehow. */
87 static bool
88 check_counter (tree stmt, const char * name, gcov_type all, gcov_type bb_count)
89 {
90 if (all != bb_count)
91 {
92 location_t * locus;
93 locus = (stmt != NULL && EXPR_HAS_LOCATION (stmt)
94 ? EXPR_LOCUS (stmt)
95 : &DECL_SOURCE_LOCATION (current_function_decl));
96 error ("%HCorrupted value profile: %s profiler overall count (%d) does not match BB count (%d)",
97 locus, name, (int)all, (int)bb_count);
98 return true;
99 }
100 return false;
101 }
102
103 /* Tree based transformations. */
104 static bool
105 tree_value_profile_transformations (void)
106 {
107 basic_block bb;
108 block_stmt_iterator bsi;
109 bool changed = false;
110
111 FOR_EACH_BB (bb)
112 {
113 /* Ignore cold areas -- we are enlarging the code. */
114 if (!bb->count || !maybe_hot_bb_p (bb))
115 continue;
116
117 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
118 {
119 tree stmt = bsi_stmt (bsi);
120 stmt_ann_t ann = get_stmt_ann (stmt);
121 histogram_value th = ann->histograms;
122 if (!th)
123 continue;
124
125 if (dump_file)
126 {
127 fprintf (dump_file, "Trying transformations on insn ");
128 print_generic_stmt (dump_file, stmt, TDF_SLIM);
129 }
130
131 /* Transformations: */
132 /* The order of things in this conditional controls which
133 transformation is used when more than one is applicable. */
134 /* It is expected that any code added by the transformations
135 will be added before the current statement, and that the
136 current statement remain valid (although possibly
137 modified) upon return. */
138 if (flag_value_profile_transformations
139 && (tree_mod_subtract_transform (stmt)
140 || tree_divmod_fixed_value_transform (stmt)
141 || tree_mod_pow2_value_transform (stmt)
142 || tree_stringops_transform (&bsi)))
143 {
144 stmt = bsi_stmt (bsi);
145 changed = true;
146 /* Original statement may no longer be in the same block. */
147 if (bb != bb_for_stmt (stmt))
148 {
149 bb = bb_for_stmt (stmt);
150 bsi = bsi_for_stmt (stmt);
151 }
152 }
153
154 /* Free extra storage from compute_value_histograms. */
155 while (th)
156 {
157 free (th->hvalue.counters);
158 th = th->hvalue.next;
159 }
160 ann->histograms = 0;
161 }
162 }
163
164 if (changed)
165 {
166 counts_to_freqs ();
167 }
168
169 return changed;
170 }
171
172 /* Generate code for transformation 1 (with OPERATION, operands OP1
173 and OP2, whose value is expected to be VALUE, parent modify-expr STMT and
174 probability of taking the optimal path PROB, which is equivalent to COUNT/ALL
175 within roundoff error). This generates the result into a temp and returns
176 the temp; it does not replace or alter the original STMT. */
177 static tree
178 tree_divmod_fixed_value (tree stmt, tree operation,
179 tree op1, tree op2, tree value, int prob, gcov_type count,
180 gcov_type all)
181 {
182 tree stmt1, stmt2, stmt3;
183 tree tmp1, tmp2, tmpv;
184 tree label_decl1 = create_artificial_label ();
185 tree label_decl2 = create_artificial_label ();
186 tree label1, label2;
187 tree bb1end, bb2end, bb3end;
188 basic_block bb, bb2, bb3, bb4;
189 tree optype = TREE_TYPE (operation);
190 edge e12, e13, e23, e24, e34;
191 block_stmt_iterator bsi;
192
193 bb = bb_for_stmt (stmt);
194 bsi = bsi_for_stmt (stmt);
195
196 tmpv = create_tmp_var (optype, "PROF");
197 tmp1 = create_tmp_var (optype, "PROF");
198 stmt1 = build2 (GIMPLE_MODIFY_STMT, optype, tmpv,
199 fold_convert (optype, value));
200 stmt2 = build2 (GIMPLE_MODIFY_STMT, optype, tmp1, op2);
201 stmt3 = build3 (COND_EXPR, void_type_node,
202 build2 (NE_EXPR, boolean_type_node, tmp1, tmpv),
203 build1 (GOTO_EXPR, void_type_node, label_decl2),
204 build1 (GOTO_EXPR, void_type_node, label_decl1));
205 bsi_insert_before (&bsi, stmt1, BSI_SAME_STMT);
206 bsi_insert_before (&bsi, stmt2, BSI_SAME_STMT);
207 bsi_insert_before (&bsi, stmt3, BSI_SAME_STMT);
208 bb1end = stmt3;
209
210 tmp2 = create_tmp_var (optype, "PROF");
211 label1 = build1 (LABEL_EXPR, void_type_node, label_decl1);
212 stmt1 = build2 (GIMPLE_MODIFY_STMT, optype, tmp2,
213 build2 (TREE_CODE (operation), optype, op1, tmpv));
214 bsi_insert_before (&bsi, label1, BSI_SAME_STMT);
215 bsi_insert_before (&bsi, stmt1, BSI_SAME_STMT);
216 bb2end = stmt1;
217
218 label2 = build1 (LABEL_EXPR, void_type_node, label_decl2);
219 stmt1 = build2 (GIMPLE_MODIFY_STMT, optype, tmp2,
220 build2 (TREE_CODE (operation), optype, op1, op2));
221 bsi_insert_before (&bsi, label2, BSI_SAME_STMT);
222 bsi_insert_before (&bsi, stmt1, BSI_SAME_STMT);
223 bb3end = stmt1;
224
225 /* Fix CFG. */
226 /* Edge e23 connects bb2 to bb3, etc. */
227 e12 = split_block (bb, bb1end);
228 bb2 = e12->dest;
229 bb2->count = count;
230 e23 = split_block (bb2, bb2end);
231 bb3 = e23->dest;
232 bb3->count = all - count;
233 e34 = split_block (bb3, bb3end);
234 bb4 = e34->dest;
235 bb4->count = all;
236
237 e12->flags &= ~EDGE_FALLTHRU;
238 e12->flags |= EDGE_FALSE_VALUE;
239 e12->probability = prob;
240 e12->count = count;
241
242 e13 = make_edge (bb, bb3, EDGE_TRUE_VALUE);
243 e13->probability = REG_BR_PROB_BASE - prob;
244 e13->count = all - count;
245
246 remove_edge (e23);
247
248 e24 = make_edge (bb2, bb4, EDGE_FALLTHRU);
249 e24->probability = REG_BR_PROB_BASE;
250 e24->count = count;
251
252 e34->probability = REG_BR_PROB_BASE;
253 e34->count = all - count;
254
255 return tmp2;
256 }
257
258 /* Do transform 1) on INSN if applicable. */
259 static bool
260 tree_divmod_fixed_value_transform (tree stmt)
261 {
262 stmt_ann_t ann = get_stmt_ann (stmt);
263 histogram_value histogram;
264 enum tree_code code;
265 gcov_type val, count, all;
266 tree modify, op, op1, op2, result, value, tree_val;
267 int prob;
268
269 modify = stmt;
270 if (TREE_CODE (stmt) == RETURN_EXPR
271 && TREE_OPERAND (stmt, 0)
272 && TREE_CODE (TREE_OPERAND (stmt, 0)) == GIMPLE_MODIFY_STMT)
273 modify = TREE_OPERAND (stmt, 0);
274 if (TREE_CODE (modify) != GIMPLE_MODIFY_STMT)
275 return false;
276 op = GIMPLE_STMT_OPERAND (modify, 1);
277 if (!INTEGRAL_TYPE_P (TREE_TYPE (op)))
278 return false;
279 code = TREE_CODE (op);
280
281 if (code != TRUNC_DIV_EXPR && code != TRUNC_MOD_EXPR)
282 return false;
283
284 op1 = TREE_OPERAND (op, 0);
285 op2 = TREE_OPERAND (op, 1);
286 if (!ann->histograms)
287 return false;
288
289 for (histogram = ann->histograms; histogram; histogram = histogram->hvalue.next)
290 if (histogram->type == HIST_TYPE_SINGLE_VALUE)
291 break;
292
293 if (!histogram)
294 return false;
295
296 value = histogram->hvalue.value;
297 val = histogram->hvalue.counters[0];
298 count = histogram->hvalue.counters[1];
299 all = histogram->hvalue.counters[2];
300
301 /* We require that count is at least half of all; this means
302 that for the transformation to fire the value must be constant
303 at least 50% of time (and 75% gives the guarantee of usage). */
304 if (simple_cst_equal (op2, value) != 1 || 2 * count < all)
305 return false;
306
307 if (check_counter (stmt, "value", all, bb_for_stmt (stmt)->count))
308 return false;
309
310 /* Compute probability of taking the optimal path. */
311 prob = (count * REG_BR_PROB_BASE + all / 2) / all;
312
313 tree_val = build_int_cst_wide (get_gcov_type (),
314 (unsigned HOST_WIDE_INT) val,
315 val >> (HOST_BITS_PER_WIDE_INT - 1) >> 1);
316 result = tree_divmod_fixed_value (stmt, op, op1, op2, tree_val, prob, count, all);
317
318 if (dump_file)
319 {
320 fprintf (dump_file, "Div/mod by constant ");
321 print_generic_expr (dump_file, value, TDF_SLIM);
322 fprintf (dump_file, "=");
323 print_generic_expr (dump_file, tree_val, TDF_SLIM);
324 fprintf (dump_file, " transformation on insn ");
325 print_generic_stmt (dump_file, stmt, TDF_SLIM);
326 }
327
328 GIMPLE_STMT_OPERAND (modify, 1) = result;
329
330 return true;
331 }
332
333 /* Generate code for transformation 2 (with OPERATION, operands OP1
334 and OP2, parent modify-expr STMT and probability of taking the optimal
335 path PROB, which is equivalent to COUNT/ALL within roundoff error).
336 This generates the result into a temp and returns
337 the temp; it does not replace or alter the original STMT. */
338 static tree
339 tree_mod_pow2 (tree stmt, tree operation, tree op1, tree op2, int prob,
340 gcov_type count, gcov_type all)
341 {
342 tree stmt1, stmt2, stmt3, stmt4;
343 tree tmp2, tmp3;
344 tree label_decl1 = create_artificial_label ();
345 tree label_decl2 = create_artificial_label ();
346 tree label1, label2;
347 tree bb1end, bb2end, bb3end;
348 basic_block bb, bb2, bb3, bb4;
349 tree optype = TREE_TYPE (operation);
350 edge e12, e13, e23, e24, e34;
351 block_stmt_iterator bsi;
352 tree result = create_tmp_var (optype, "PROF");
353
354 bb = bb_for_stmt (stmt);
355 bsi = bsi_for_stmt (stmt);
356
357 tmp2 = create_tmp_var (optype, "PROF");
358 tmp3 = create_tmp_var (optype, "PROF");
359 stmt2 = build2 (GIMPLE_MODIFY_STMT, optype, tmp2,
360 build2 (PLUS_EXPR, optype, op2, build_int_cst (optype, -1)));
361 stmt3 = build2 (GIMPLE_MODIFY_STMT, optype, tmp3,
362 build2 (BIT_AND_EXPR, optype, tmp2, op2));
363 stmt4 = build3 (COND_EXPR, void_type_node,
364 build2 (NE_EXPR, boolean_type_node,
365 tmp3, build_int_cst (optype, 0)),
366 build1 (GOTO_EXPR, void_type_node, label_decl2),
367 build1 (GOTO_EXPR, void_type_node, label_decl1));
368 bsi_insert_before (&bsi, stmt2, BSI_SAME_STMT);
369 bsi_insert_before (&bsi, stmt3, BSI_SAME_STMT);
370 bsi_insert_before (&bsi, stmt4, BSI_SAME_STMT);
371 bb1end = stmt4;
372
373 /* tmp2 == op2-1 inherited from previous block */
374 label1 = build1 (LABEL_EXPR, void_type_node, label_decl1);
375 stmt1 = build2 (GIMPLE_MODIFY_STMT, optype, result,
376 build2 (BIT_AND_EXPR, optype, op1, tmp2));
377 bsi_insert_before (&bsi, label1, BSI_SAME_STMT);
378 bsi_insert_before (&bsi, stmt1, BSI_SAME_STMT);
379 bb2end = stmt1;
380
381 label2 = build1 (LABEL_EXPR, void_type_node, label_decl2);
382 stmt1 = build2 (GIMPLE_MODIFY_STMT, optype, result,
383 build2 (TREE_CODE (operation), optype, op1, op2));
384 bsi_insert_before (&bsi, label2, BSI_SAME_STMT);
385 bsi_insert_before (&bsi, stmt1, BSI_SAME_STMT);
386 bb3end = stmt1;
387
388 /* Fix CFG. */
389 /* Edge e23 connects bb2 to bb3, etc. */
390 e12 = split_block (bb, bb1end);
391 bb2 = e12->dest;
392 bb2->count = count;
393 e23 = split_block (bb2, bb2end);
394 bb3 = e23->dest;
395 bb3->count = all - count;
396 e34 = split_block (bb3, bb3end);
397 bb4 = e34->dest;
398 bb4->count = all;
399
400 e12->flags &= ~EDGE_FALLTHRU;
401 e12->flags |= EDGE_FALSE_VALUE;
402 e12->probability = prob;
403 e12->count = count;
404
405 e13 = make_edge (bb, bb3, EDGE_TRUE_VALUE);
406 e13->probability = REG_BR_PROB_BASE - prob;
407 e13->count = all - count;
408
409 remove_edge (e23);
410
411 e24 = make_edge (bb2, bb4, EDGE_FALLTHRU);
412 e24->probability = REG_BR_PROB_BASE;
413 e24->count = count;
414
415 e34->probability = REG_BR_PROB_BASE;
416 e34->count = all - count;
417
418 return result;
419 }
420
421 /* Do transform 2) on INSN if applicable. */
422 static bool
423 tree_mod_pow2_value_transform (tree stmt)
424 {
425 stmt_ann_t ann = get_stmt_ann (stmt);
426 histogram_value histogram;
427 enum tree_code code;
428 gcov_type count, wrong_values, all;
429 tree modify, op, op1, op2, result, value;
430 int prob;
431
432 modify = stmt;
433 if (TREE_CODE (stmt) == RETURN_EXPR
434 && TREE_OPERAND (stmt, 0)
435 && TREE_CODE (TREE_OPERAND (stmt, 0)) == GIMPLE_MODIFY_STMT)
436 modify = TREE_OPERAND (stmt, 0);
437 if (TREE_CODE (modify) != GIMPLE_MODIFY_STMT)
438 return false;
439 op = GIMPLE_STMT_OPERAND (modify, 1);
440 if (!INTEGRAL_TYPE_P (TREE_TYPE (op)))
441 return false;
442 code = TREE_CODE (op);
443
444 if (code != TRUNC_MOD_EXPR || !TYPE_UNSIGNED (TREE_TYPE (op)))
445 return false;
446
447 op1 = TREE_OPERAND (op, 0);
448 op2 = TREE_OPERAND (op, 1);
449 if (!ann->histograms)
450 return false;
451
452 for (histogram = ann->histograms; histogram; histogram = histogram->hvalue.next)
453 if (histogram->type == HIST_TYPE_POW2)
454 break;
455
456 if (!histogram)
457 return false;
458
459 value = histogram->hvalue.value;
460 wrong_values = histogram->hvalue.counters[0];
461 count = histogram->hvalue.counters[1];
462
463 /* We require that we hit a power of 2 at least half of all evaluations. */
464 if (simple_cst_equal (op2, value) != 1 || count < wrong_values)
465 return false;
466
467 if (dump_file)
468 {
469 fprintf (dump_file, "Mod power of 2 transformation on insn ");
470 print_generic_stmt (dump_file, stmt, TDF_SLIM);
471 }
472
473 /* Compute probability of taking the optimal path. */
474 all = count + wrong_values;
475 if (check_counter (stmt, "pow2", all, bb_for_stmt (stmt)->count))
476 return false;
477
478 prob = (count * REG_BR_PROB_BASE + all / 2) / all;
479
480 result = tree_mod_pow2 (stmt, op, op1, op2, prob, count, all);
481
482 GIMPLE_STMT_OPERAND (modify, 1) = result;
483
484 return true;
485 }
486
487 /* Generate code for transformations 3 and 4 (with OPERATION, operands OP1
488 and OP2, parent modify-expr STMT, and NCOUNTS the number of cases to
489 support. Currently only NCOUNTS==0 or 1 is supported and this is
490 built into this interface. The probabilities of taking the optimal
491 paths are PROB1 and PROB2, which are equivalent to COUNT1/ALL and
492 COUNT2/ALL respectively within roundoff error). This generates the
493 result into a temp and returns the temp; it does not replace or alter
494 the original STMT. */
495 /* FIXME: Generalize the interface to handle NCOUNTS > 1. */
496
497 static tree
498 tree_mod_subtract (tree stmt, tree operation, tree op1, tree op2,
499 int prob1, int prob2, int ncounts,
500 gcov_type count1, gcov_type count2, gcov_type all)
501 {
502 tree stmt1, stmt2, stmt3;
503 tree tmp1;
504 tree label_decl1 = create_artificial_label ();
505 tree label_decl2 = create_artificial_label ();
506 tree label_decl3 = create_artificial_label ();
507 tree label1, label2, label3;
508 tree bb1end, bb2end = NULL_TREE, bb3end;
509 basic_block bb, bb2, bb3, bb4;
510 tree optype = TREE_TYPE (operation);
511 edge e12, e23 = 0, e24, e34, e14;
512 block_stmt_iterator bsi;
513 tree result = create_tmp_var (optype, "PROF");
514
515 bb = bb_for_stmt (stmt);
516 bsi = bsi_for_stmt (stmt);
517
518 tmp1 = create_tmp_var (optype, "PROF");
519 stmt1 = build2 (GIMPLE_MODIFY_STMT, optype, result, op1);
520 stmt2 = build2 (GIMPLE_MODIFY_STMT, optype, tmp1, op2);
521 stmt3 = build3 (COND_EXPR, void_type_node,
522 build2 (LT_EXPR, boolean_type_node, result, tmp1),
523 build1 (GOTO_EXPR, void_type_node, label_decl3),
524 build1 (GOTO_EXPR, void_type_node,
525 ncounts ? label_decl1 : label_decl2));
526 bsi_insert_before (&bsi, stmt1, BSI_SAME_STMT);
527 bsi_insert_before (&bsi, stmt2, BSI_SAME_STMT);
528 bsi_insert_before (&bsi, stmt3, BSI_SAME_STMT);
529 bb1end = stmt3;
530
531 if (ncounts) /* Assumed to be 0 or 1 */
532 {
533 label1 = build1 (LABEL_EXPR, void_type_node, label_decl1);
534 stmt1 = build2 (GIMPLE_MODIFY_STMT, optype, result,
535 build2 (MINUS_EXPR, optype, result, tmp1));
536 stmt2 = build3 (COND_EXPR, void_type_node,
537 build2 (LT_EXPR, boolean_type_node, result, tmp1),
538 build1 (GOTO_EXPR, void_type_node, label_decl3),
539 build1 (GOTO_EXPR, void_type_node, label_decl2));
540 bsi_insert_before (&bsi, label1, BSI_SAME_STMT);
541 bsi_insert_before (&bsi, stmt1, BSI_SAME_STMT);
542 bsi_insert_before (&bsi, stmt2, BSI_SAME_STMT);
543 bb2end = stmt2;
544 }
545
546 /* Fallback case. */
547 label2 = build1 (LABEL_EXPR, void_type_node, label_decl2);
548 stmt1 = build2 (GIMPLE_MODIFY_STMT, optype, result,
549 build2 (TREE_CODE (operation), optype, result, tmp1));
550 bsi_insert_before (&bsi, label2, BSI_SAME_STMT);
551 bsi_insert_before (&bsi, stmt1, BSI_SAME_STMT);
552 bb3end = stmt1;
553
554 label3 = build1 (LABEL_EXPR, void_type_node, label_decl3);
555 bsi_insert_before (&bsi, label3, BSI_SAME_STMT);
556
557 /* Fix CFG. */
558 /* Edge e23 connects bb2 to bb3, etc. */
559 /* However block 3 is optional; if it is not there, references
560 to 3 really refer to block 2. */
561 e12 = split_block (bb, bb1end);
562 bb2 = e12->dest;
563 bb2->count = all - count1;
564
565 if (ncounts) /* Assumed to be 0 or 1. */
566 {
567 e23 = split_block (bb2, bb2end);
568 bb3 = e23->dest;
569 bb3->count = all - count1 - count2;
570 }
571
572 e34 = split_block (ncounts ? bb3 : bb2, bb3end);
573 bb4 = e34->dest;
574 bb4->count = all;
575
576 e12->flags &= ~EDGE_FALLTHRU;
577 e12->flags |= EDGE_FALSE_VALUE;
578 e12->probability = REG_BR_PROB_BASE - prob1;
579 e12->count = all - count1;
580
581 e14 = make_edge (bb, bb4, EDGE_TRUE_VALUE);
582 e14->probability = prob1;
583 e14->count = count1;
584
585 if (ncounts) /* Assumed to be 0 or 1. */
586 {
587 e23->flags &= ~EDGE_FALLTHRU;
588 e23->flags |= EDGE_FALSE_VALUE;
589 e23->count = all - count1 - count2;
590 e23->probability = REG_BR_PROB_BASE - prob2;
591
592 e24 = make_edge (bb2, bb4, EDGE_TRUE_VALUE);
593 e24->probability = prob2;
594 e24->count = count2;
595 }
596
597 e34->probability = REG_BR_PROB_BASE;
598 e34->count = all - count1 - count2;
599
600 return result;
601 }
602
603 /* Do transforms 3) and 4) on INSN if applicable. */
604 static bool
605 tree_mod_subtract_transform (tree stmt)
606 {
607 stmt_ann_t ann = get_stmt_ann (stmt);
608 histogram_value histogram;
609 enum tree_code code;
610 gcov_type count, wrong_values, all;
611 tree modify, op, op1, op2, result, value;
612 int prob1, prob2;
613 unsigned int i;
614
615 modify = stmt;
616 if (TREE_CODE (stmt) == RETURN_EXPR
617 && TREE_OPERAND (stmt, 0)
618 && TREE_CODE (TREE_OPERAND (stmt, 0)) == GIMPLE_MODIFY_STMT)
619 modify = TREE_OPERAND (stmt, 0);
620 if (TREE_CODE (modify) != GIMPLE_MODIFY_STMT)
621 return false;
622 op = GIMPLE_STMT_OPERAND (modify, 1);
623 if (!INTEGRAL_TYPE_P (TREE_TYPE (op)))
624 return false;
625 code = TREE_CODE (op);
626
627 if (code != TRUNC_MOD_EXPR || !TYPE_UNSIGNED (TREE_TYPE (op)))
628 return false;
629
630 op1 = TREE_OPERAND (op, 0);
631 op2 = TREE_OPERAND (op, 1);
632 if (!ann->histograms)
633 return false;
634
635 for (histogram = ann->histograms; histogram; histogram = histogram->hvalue.next)
636 if (histogram->type == HIST_TYPE_INTERVAL)
637 break;
638
639 if (!histogram)
640 return false;
641
642 value = histogram->hvalue.value;
643 all = 0;
644 wrong_values = 0;
645 for (i = 0; i < histogram->hdata.intvl.steps; i++)
646 all += histogram->hvalue.counters[i];
647
648 wrong_values += histogram->hvalue.counters[i];
649 wrong_values += histogram->hvalue.counters[i+1];
650 all += wrong_values;
651
652 /* Compute probability of taking the optimal path. */
653 if (check_counter (stmt, "interval", all, bb_for_stmt (stmt)->count))
654 return false;
655
656 /* We require that we use just subtractions in at least 50% of all
657 evaluations. */
658 count = 0;
659 for (i = 0; i < histogram->hdata.intvl.steps; i++)
660 {
661 count += histogram->hvalue.counters[i];
662 if (count * 2 >= all)
663 break;
664 }
665 if (i == histogram->hdata.intvl.steps)
666 return false;
667
668 if (dump_file)
669 {
670 fprintf (dump_file, "Mod subtract transformation on insn ");
671 print_generic_stmt (dump_file, stmt, TDF_SLIM);
672 }
673
674 /* Compute probability of taking the optimal path(s). */
675 prob1 = (histogram->hvalue.counters[0] * REG_BR_PROB_BASE + all / 2) / all;
676 prob2 = (histogram->hvalue.counters[1] * REG_BR_PROB_BASE + all / 2) / all;
677
678 /* In practice, "steps" is always 2. This interface reflects this,
679 and will need to be changed if "steps" can change. */
680 result = tree_mod_subtract (stmt, op, op1, op2, prob1, prob2, i,
681 histogram->hvalue.counters[0],
682 histogram->hvalue.counters[1], all);
683
684 GIMPLE_STMT_OPERAND (modify, 1) = result;
685
686 return true;
687 }
688
689 /* Return true if the stringop FNDECL with ARGLIST shall be profiled. */
690 static bool
691 interesting_stringop_to_profile_p (tree fndecl, tree arglist)
692 {
693 enum built_in_function fcode = DECL_FUNCTION_CODE (fndecl);
694
695 if (fcode != BUILT_IN_MEMSET && fcode != BUILT_IN_MEMCPY
696 && fcode != BUILT_IN_BZERO)
697 return false;
698
699 switch (fcode)
700 {
701 case BUILT_IN_MEMCPY:
702 case BUILT_IN_MEMPCPY:
703 return validate_arglist (arglist,
704 POINTER_TYPE, POINTER_TYPE, INTEGER_TYPE,
705 VOID_TYPE);
706 case BUILT_IN_MEMSET:
707 return validate_arglist (arglist,
708 POINTER_TYPE, INTEGER_TYPE, INTEGER_TYPE,
709 VOID_TYPE);
710 case BUILT_IN_BZERO:
711 return validate_arglist (arglist, POINTER_TYPE, INTEGER_TYPE,
712 VOID_TYPE);
713 default:
714 gcc_unreachable ();
715 }
716 }
717
718 /* Convert stringop (..., size)
719 into
720 if (size == VALUE)
721 stringop (...., VALUE);
722 else
723 stringop (...., size);
724 assuming constant propagation of VALUE will happen later.
725 */
726 static void
727 tree_stringop_fixed_value (tree stmt, tree value, int prob, gcov_type count,
728 gcov_type all)
729 {
730 tree stmt1, stmt2, stmt3;
731 tree tmp1, tmpv;
732 tree label_decl1 = create_artificial_label ();
733 tree label_decl2 = create_artificial_label ();
734 tree label1, label2;
735 tree bb1end, bb2end;
736 basic_block bb, bb2, bb3, bb4;
737 edge e12, e13, e23, e24, e34;
738 block_stmt_iterator bsi;
739 tree call = get_call_expr_in (stmt);
740 tree arglist = TREE_OPERAND (call, 1);
741 tree blck_size = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
742 tree optype = TREE_TYPE (blck_size);
743 int region;
744
745 bb = bb_for_stmt (stmt);
746 bsi = bsi_for_stmt (stmt);
747
748 if (bsi_end_p (bsi))
749 {
750 edge_iterator ei;
751 for (ei = ei_start (bb->succs); (e34 = ei_safe_edge (ei)); )
752 if (!e34->flags & EDGE_ABNORMAL)
753 break;
754 }
755 else
756 {
757 e34 = split_block (bb, stmt);
758 bsi = bsi_for_stmt (stmt);
759 }
760 bb4 = e34->dest;
761
762 tmpv = create_tmp_var (optype, "PROF");
763 tmp1 = create_tmp_var (optype, "PROF");
764 stmt1 = build2 (GIMPLE_MODIFY_STMT, optype, tmpv,
765 fold_convert (optype, value));
766 stmt2 = build2 (GIMPLE_MODIFY_STMT, optype, tmp1, blck_size);
767 stmt3 = build3 (COND_EXPR, void_type_node,
768 build2 (NE_EXPR, boolean_type_node, tmp1, tmpv),
769 build1 (GOTO_EXPR, void_type_node, label_decl2),
770 build1 (GOTO_EXPR, void_type_node, label_decl1));
771 bsi_insert_before (&bsi, stmt1, BSI_SAME_STMT);
772 bsi_insert_before (&bsi, stmt2, BSI_SAME_STMT);
773 bsi_insert_before (&bsi, stmt3, BSI_SAME_STMT);
774 bb1end = stmt3;
775
776 label1 = build1 (LABEL_EXPR, void_type_node, label_decl1);
777 stmt1 = unshare_expr (stmt);
778 call = get_call_expr_in (stmt1);
779 arglist = TREE_OPERAND (call, 1);
780 TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist))) = value;
781 bsi_insert_before (&bsi, label1, BSI_SAME_STMT);
782 bsi_insert_before (&bsi, stmt1, BSI_SAME_STMT);
783 region = lookup_stmt_eh_region (stmt);
784 if (region >= 0)
785 add_stmt_to_eh_region (stmt1, region);
786 bb2end = stmt1;
787 label2 = build1 (LABEL_EXPR, void_type_node, label_decl2);
788 bsi_insert_before (&bsi, label2, BSI_SAME_STMT);
789
790 /* Fix CFG. */
791 /* Edge e23 connects bb2 to bb3, etc. */
792 e12 = split_block (bb, bb1end);
793 bb2 = e12->dest;
794 bb2->count = count;
795 e23 = split_block (bb2, bb2end);
796 bb3 = e23->dest;
797 bb3->count = all - count;
798
799 e12->flags &= ~EDGE_FALLTHRU;
800 e12->flags |= EDGE_FALSE_VALUE;
801 e12->probability = prob;
802 e12->count = count;
803
804 e13 = make_edge (bb, bb3, EDGE_TRUE_VALUE);
805 e13->probability = REG_BR_PROB_BASE - prob;
806 e13->count = all - count;
807
808 remove_edge (e23);
809
810 e24 = make_edge (bb2, bb4, EDGE_FALLTHRU);
811 e24->probability = REG_BR_PROB_BASE;
812 e24->count = count;
813
814 e34->probability = REG_BR_PROB_BASE;
815 e34->count = all - count;
816 }
817
818 /* Find values inside STMT for that we want to measure histograms for
819 division/modulo optimization. */
820 static bool
821 tree_stringops_transform (block_stmt_iterator *bsi)
822 {
823 tree stmt = bsi_stmt (*bsi);
824 tree call = get_call_expr_in (stmt);
825 tree fndecl;
826 tree arglist;
827 tree blck_size;
828 enum built_in_function fcode;
829 stmt_ann_t ann = get_stmt_ann (stmt);
830 histogram_value histogram;
831 gcov_type count, all, val;
832 tree value;
833 tree dest, src;
834 unsigned int dest_align, src_align;
835 int prob;
836 tree tree_val;
837
838 if (!call)
839 return false;
840 fndecl = get_callee_fndecl (call);
841 if (!fndecl)
842 return false;
843 fcode = DECL_FUNCTION_CODE (fndecl);
844 arglist = TREE_OPERAND (call, 1);
845 if (!interesting_stringop_to_profile_p (fndecl, arglist))
846 return false;
847
848 if (fcode == BUILT_IN_BZERO)
849 blck_size = TREE_VALUE (TREE_CHAIN (arglist));
850 else
851 blck_size = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
852 if (TREE_CODE (blck_size) == INTEGER_CST)
853 return false;
854
855 if (!ann->histograms)
856 return false;
857
858 all = bb_for_stmt (stmt)->count;
859 if (!all)
860 return false;
861 for (histogram = ann->histograms; histogram;
862 histogram = histogram->hvalue.next)
863 if (histogram->type == HIST_TYPE_SINGLE_VALUE)
864 break;
865 if (!histogram)
866 return false;
867 value = histogram->hvalue.value;
868 val = histogram->hvalue.counters[0];
869 count = histogram->hvalue.counters[1];
870 all = histogram->hvalue.counters[2];
871 /* We require that count is at least half of all; this means
872 that for the transformation to fire the value must be constant
873 at least 80% of time. */
874 if ((6 * count / 5) < all)
875 return false;
876 if (check_counter (stmt, "value", all, bb_for_stmt (stmt)->count))
877 return false;
878 prob = (count * REG_BR_PROB_BASE + all / 2) / all;
879 dest = TREE_VALUE (arglist);
880 dest_align = get_pointer_alignment (dest, BIGGEST_ALIGNMENT);
881 switch (fcode)
882 {
883 case BUILT_IN_MEMCPY:
884 case BUILT_IN_MEMPCPY:
885 src = TREE_VALUE (TREE_CHAIN (arglist));
886 src_align = get_pointer_alignment (src, BIGGEST_ALIGNMENT);
887 if (!can_move_by_pieces (val, MIN (dest_align, src_align)))
888 return false;
889 break;
890 case BUILT_IN_MEMSET:
891 if (!can_store_by_pieces (val, builtin_memset_read_str,
892 TREE_VALUE (TREE_CHAIN (arglist)),
893 dest_align))
894 return false;
895 break;
896 case BUILT_IN_BZERO:
897 if (!can_store_by_pieces (val, builtin_memset_read_str,
898 integer_zero_node,
899 dest_align))
900 return false;
901 break;
902 default:
903 gcc_unreachable ();
904 }
905 tree_val = build_int_cst_wide (get_gcov_type (),
906 (unsigned HOST_WIDE_INT) val,
907 val >> (HOST_BITS_PER_WIDE_INT - 1) >> 1);
908 if (dump_file)
909 {
910 fprintf (dump_file, "Single value %i stringop transformation on ",
911 (int)val);
912 print_generic_stmt (dump_file, stmt, TDF_SLIM);
913 }
914 tree_stringop_fixed_value (stmt, tree_val, prob, count, all);
915
916 return true;
917 }
918
919 struct value_prof_hooks {
920 /* Find list of values for which we want to measure histograms. */
921 void (*find_values_to_profile) (histogram_values *);
922
923 /* Identify and exploit properties of values that are hard to analyze
924 statically. See value-prof.c for more detail. */
925 bool (*value_profile_transformations) (void);
926 };
927 \f
928 /* Find values inside STMT for that we want to measure histograms for
929 division/modulo optimization. */
930 static void
931 tree_divmod_values_to_profile (tree stmt, histogram_values *values)
932 {
933 tree assign, lhs, rhs, divisor, op0, type;
934 histogram_value hist;
935
936 if (TREE_CODE (stmt) == RETURN_EXPR)
937 assign = TREE_OPERAND (stmt, 0);
938 else
939 assign = stmt;
940
941 if (!assign
942 || TREE_CODE (assign) != GIMPLE_MODIFY_STMT)
943 return;
944 lhs = GIMPLE_STMT_OPERAND (assign, 0);
945 type = TREE_TYPE (lhs);
946 if (!INTEGRAL_TYPE_P (type))
947 return;
948
949 rhs = GIMPLE_STMT_OPERAND (assign, 1);
950 switch (TREE_CODE (rhs))
951 {
952 case TRUNC_DIV_EXPR:
953 case TRUNC_MOD_EXPR:
954 divisor = TREE_OPERAND (rhs, 1);
955 op0 = TREE_OPERAND (rhs, 0);
956
957 VEC_reserve (histogram_value, heap, *values, 3);
958
959 if (is_gimple_reg (divisor))
960 {
961 /* Check for the case where the divisor is the same value most
962 of the time. */
963 hist = ggc_alloc (sizeof (*hist));
964 hist->hvalue.value = divisor;
965 hist->hvalue.stmt = stmt;
966 hist->type = HIST_TYPE_SINGLE_VALUE;
967 VEC_quick_push (histogram_value, *values, hist);
968 }
969
970 /* For mod, check whether it is not often a noop (or replaceable by
971 a few subtractions). */
972 if (TREE_CODE (rhs) == TRUNC_MOD_EXPR
973 && TYPE_UNSIGNED (type))
974 {
975 /* Check for a special case where the divisor is power of 2. */
976 hist = ggc_alloc (sizeof (*hist));
977 hist->hvalue.value = divisor;
978 hist->hvalue.stmt = stmt;
979 hist->type = HIST_TYPE_POW2;
980 VEC_quick_push (histogram_value, *values, hist);
981
982 hist = ggc_alloc (sizeof (*hist));
983 hist->hvalue.stmt = stmt;
984 hist->hvalue.value
985 = build2 (TRUNC_DIV_EXPR, type, op0, divisor);
986 hist->type = HIST_TYPE_INTERVAL;
987 hist->hdata.intvl.int_start = 0;
988 hist->hdata.intvl.steps = 2;
989 VEC_quick_push (histogram_value, *values, hist);
990 }
991 return;
992
993 default:
994 return;
995 }
996 }
997
998 /* Find values inside STMT for that we want to measure histograms for
999 division/modulo optimization. */
1000 static void
1001 tree_stringops_values_to_profile (tree stmt, histogram_values *values)
1002 {
1003 tree call = get_call_expr_in (stmt);
1004 tree fndecl;
1005 tree arglist;
1006 tree blck_size;
1007 enum built_in_function fcode;
1008 histogram_value hist;
1009
1010 if (!call)
1011 return;
1012 fndecl = get_callee_fndecl (call);
1013 if (!fndecl)
1014 return;
1015 fcode = DECL_FUNCTION_CODE (fndecl);
1016 arglist = TREE_OPERAND (call, 1);
1017
1018 if (!interesting_stringop_to_profile_p (fndecl, arglist))
1019 return;
1020
1021 if (fcode == BUILT_IN_BZERO)
1022 blck_size = TREE_VALUE (TREE_CHAIN (arglist));
1023 else
1024 blck_size = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
1025
1026 if (TREE_CODE (blck_size) != INTEGER_CST)
1027 {
1028 VEC_reserve (histogram_value, heap, *values, 3);
1029 hist = ggc_alloc (sizeof (*hist));
1030 hist->hvalue.value = blck_size;
1031 hist->hvalue.stmt = stmt;
1032 hist->type = HIST_TYPE_SINGLE_VALUE;
1033 VEC_quick_push (histogram_value, *values, hist);
1034 }
1035 }
1036
1037 /* Find values inside STMT for that we want to measure histograms and adds
1038 them to list VALUES. */
1039
1040 static void
1041 tree_values_to_profile (tree stmt, histogram_values *values)
1042 {
1043 if (flag_value_profile_transformations)
1044 {
1045 tree_divmod_values_to_profile (stmt, values);
1046 tree_stringops_values_to_profile (stmt, values);
1047 }
1048 }
1049
1050 static void
1051 tree_find_values_to_profile (histogram_values *values)
1052 {
1053 basic_block bb;
1054 block_stmt_iterator bsi;
1055 unsigned i;
1056 histogram_value hist;
1057
1058 *values = NULL;
1059 FOR_EACH_BB (bb)
1060 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1061 tree_values_to_profile (bsi_stmt (bsi), values);
1062
1063 for (i = 0; VEC_iterate (histogram_value, *values, i, hist); i++)
1064 {
1065 switch (hist->type)
1066 {
1067 case HIST_TYPE_INTERVAL:
1068 if (dump_file)
1069 {
1070 fprintf (dump_file, "Interval counter for tree ");
1071 print_generic_expr (dump_file, hist->hvalue.stmt,
1072 TDF_SLIM);
1073 fprintf (dump_file, ", range %d -- %d.\n",
1074 hist->hdata.intvl.int_start,
1075 (hist->hdata.intvl.int_start
1076 + hist->hdata.intvl.steps - 1));
1077 }
1078 hist->n_counters = hist->hdata.intvl.steps + 2;
1079 break;
1080
1081 case HIST_TYPE_POW2:
1082 if (dump_file)
1083 {
1084 fprintf (dump_file, "Pow2 counter for tree ");
1085 print_generic_expr (dump_file, hist->hvalue.stmt, TDF_SLIM);
1086 fprintf (dump_file, ".\n");
1087 }
1088 hist->n_counters = 2;
1089 break;
1090
1091 case HIST_TYPE_SINGLE_VALUE:
1092 if (dump_file)
1093 {
1094 fprintf (dump_file, "Single value counter for tree ");
1095 print_generic_expr (dump_file, hist->hvalue.stmt, TDF_SLIM);
1096 fprintf (dump_file, ".\n");
1097 }
1098 hist->n_counters = 3;
1099 break;
1100
1101 case HIST_TYPE_CONST_DELTA:
1102 if (dump_file)
1103 {
1104 fprintf (dump_file, "Constant delta counter for tree ");
1105 print_generic_expr (dump_file, hist->hvalue.stmt, TDF_SLIM);
1106 fprintf (dump_file, ".\n");
1107 }
1108 hist->n_counters = 4;
1109 break;
1110
1111 default:
1112 gcc_unreachable ();
1113 }
1114 }
1115 }
1116
1117 static struct value_prof_hooks tree_value_prof_hooks = {
1118 tree_find_values_to_profile,
1119 tree_value_profile_transformations
1120 };
1121
1122 void
1123 tree_register_value_prof_hooks (void)
1124 {
1125 gcc_assert (current_ir_type () == IR_GIMPLE);
1126 value_prof_hooks = &tree_value_prof_hooks;
1127 }
1128 \f
1129 /* IR-independent entry points. */
1130 void
1131 find_values_to_profile (histogram_values *values)
1132 {
1133 (value_prof_hooks->find_values_to_profile) (values);
1134 }
1135
1136 bool
1137 value_profile_transformations (void)
1138 {
1139 return (value_prof_hooks->value_profile_transformations) ();
1140 }
1141 \f
1142