coverage.h (GCOV_TYPE_NODE): Delete.
[gcc.git] / gcc / value-prof.c
1 /* Transformations based on profile information for values.
2 Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 2, or (at your option) any later
9 version.
10
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
19 02111-1307, USA. */
20
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "rtl.h"
26 #include "expr.h"
27 #include "hard-reg-set.h"
28 #include "basic-block.h"
29 #include "value-prof.h"
30 #include "output.h"
31 #include "flags.h"
32 #include "insn-config.h"
33 #include "recog.h"
34 #include "optabs.h"
35 #include "regs.h"
36 #include "ggc.h"
37 #include "tree-flow.h"
38 #include "tree-flow-inline.h"
39 #include "diagnostic.h"
40 #include "coverage.h"
41 #include "tree.h"
42 #include "gcov-io.h"
43
44 static struct value_prof_hooks *value_prof_hooks;
45
46 /* This is the vector of histograms. Created in find_values_to_profile.
47 During profile generation, freed by instrument_values.
48 During profile use, freed by value_profile_transformations. */
49
50 static histogram_values static_values = NULL;
51
52 /* In this file value profile based optimizations are placed. Currently the
53 following optimizations are implemented (for more detailed descriptions
54 see comments at value_profile_transformations):
55
56 1) Division/modulo specialization. Provided that we can determine that the
57 operands of the division have some special properties, we may use it to
58 produce more effective code.
59 2) Speculative prefetching. If we are able to determine that the difference
60 between addresses accessed by a memory reference is usually constant, we
61 may add the prefetch instructions.
62
63 Every such optimization should add its requirements for profiled values to
64 insn_values_to_profile function. This function is called from branch_prob
65 in profile.c and the requested values are instrumented by it in the first
66 compilation with -fprofile-arcs. The optimization may then read the
67 gathered data in the second compilation with -fbranch-probabilities.
68
69 There are currently two versions, RTL-based and tree-based. Over time
70 the RTL-based version may go away.
71
72 In the RTL-based version, the measured data is appended as REG_VALUE_PROFILE
73 note to the instrumented insn. The argument to the note consists of an
74 EXPR_LIST where its members have the following meaning (from the first to
75 the last):
76
77 -- type of information gathered (HIST_TYPE*)
78 -- the expression that is profiled
79 -- list of counters starting from the first one.
80
81 In the tree-based version, the measured data is pointed to from the histograms
82 field of the statement annotation of the instrumented insns. It is
83 kept as a linked list of struct histogram_value_t's, which contain the
84 same information as above. */
85
86 /* For speculative prefetching, the range in that we do not prefetch (because
87 we assume that it will be in cache anyway). The asymmetry between min and
88 max range is trying to reflect the fact that the sequential prefetching
89 of the data is commonly done directly by hardware. Nevertheless, these
90 values are just a guess and should of course be target-specific.
91
92 FIXME: There is no tree form of speculative prefetching as yet.
93
94 FIXME: A better approach to instrumentation in the profile-generation
95 pass is to generate calls to magic library functions (to be added to
96 libgcc) rather than inline code. This approach will probably be
97 necessary to get tree-based speculative prefetching working in a useful
98 fashion, as inline code bloats things so much the rest of the compiler has
99 serious problems dealing with it (judging from the rtl behavior). */
100
101 #ifndef NOPREFETCH_RANGE_MIN
102 #define NOPREFETCH_RANGE_MIN (-16)
103 #endif
104 #ifndef NOPREFETCH_RANGE_MAX
105 #define NOPREFETCH_RANGE_MAX 32
106 #endif
107
108 static void rtl_divmod_values_to_profile (rtx, histogram_values *);
109 #ifdef HAVE_prefetch
110 static bool insn_prefetch_values_to_profile (rtx, histogram_values *);
111 static int find_mem_reference_1 (rtx *, void *);
112 static void find_mem_reference_2 (rtx, rtx, void *);
113 static bool find_mem_reference (rtx, rtx *, int *);
114 #endif
115
116 static void rtl_values_to_profile (rtx, histogram_values *);
117 static rtx rtl_divmod_fixed_value (enum machine_mode, enum rtx_code, rtx, rtx,
118 rtx, gcov_type, int);
119 static rtx rtl_mod_pow2 (enum machine_mode, enum rtx_code, rtx, rtx, rtx, int);
120 static rtx rtl_mod_subtract (enum machine_mode, enum rtx_code, rtx, rtx, rtx,
121 int, int, int);
122 #ifdef HAVE_prefetch
123 static rtx gen_speculative_prefetch (rtx, gcov_type, int);
124 #endif
125 static bool rtl_divmod_fixed_value_transform (rtx);
126 static bool rtl_mod_pow2_value_transform (rtx);
127 static bool rtl_mod_subtract_transform (rtx);
128 #ifdef HAVE_prefetch
129 static bool speculative_prefetching_transform (rtx);
130 #endif
131 static void tree_divmod_values_to_profile (tree, histogram_values *);
132 static void tree_values_to_profile (tree, histogram_values *);
133 static tree tree_divmod_fixed_value (tree, tree, tree, tree,
134 tree, int, gcov_type, gcov_type);
135 static tree tree_mod_pow2 (tree, tree, tree, tree, int, gcov_type, gcov_type);
136 static tree tree_mod_subtract (tree, tree, tree, tree, int, int, int,
137 gcov_type, gcov_type, gcov_type);
138 static bool tree_divmod_fixed_value_transform (tree);
139 static bool tree_mod_pow2_value_transform (tree);
140 static bool tree_mod_subtract_transform (tree);
141
142 \f
143 /* Find values inside INSN for that we want to measure histograms for
144 division/modulo optimization and stores them to VALUES. */
145 static void
146 rtl_divmod_values_to_profile (rtx insn, histogram_values *values)
147 {
148 rtx set, set_src, op1, op2;
149 enum machine_mode mode;
150 histogram_value hist;
151
152 if (!INSN_P (insn))
153 return;
154
155 set = single_set (insn);
156 if (!set)
157 return;
158
159 mode = GET_MODE (SET_DEST (set));
160 if (!INTEGRAL_MODE_P (mode))
161 return;
162
163 set_src = SET_SRC (set);
164 switch (GET_CODE (set_src))
165 {
166 case DIV:
167 case MOD:
168 case UDIV:
169 case UMOD:
170 op1 = XEXP (set_src, 0);
171 op2 = XEXP (set_src, 1);
172 if (side_effects_p (op2))
173 return;
174
175 /* Check for a special case where the divisor is power of 2. */
176 if ((GET_CODE (set_src) == UMOD) && !CONSTANT_P (op2))
177 {
178 hist = ggc_alloc (sizeof (*hist));
179 hist->hvalue.rtl.value = op2;
180 hist->hvalue.rtl.seq = NULL_RTX;
181 hist->hvalue.rtl.mode = mode;
182 hist->hvalue.rtl.insn = insn;
183 hist->type = HIST_TYPE_POW2;
184 hist->hdata.pow2.may_be_other = 1;
185 VEC_safe_push (histogram_value, *values, hist);
186 }
187
188 /* Check whether the divisor is not in fact a constant. */
189 if (!CONSTANT_P (op2))
190 {
191 hist = ggc_alloc (sizeof (*hist));
192 hist->hvalue.rtl.value = op2;
193 hist->hvalue.rtl.mode = mode;
194 hist->hvalue.rtl.seq = NULL_RTX;
195 hist->hvalue.rtl.insn = insn;
196 hist->type = HIST_TYPE_SINGLE_VALUE;
197 VEC_safe_push (histogram_value, *values, hist);
198 }
199
200 /* For mod, check whether it is not often a noop (or replaceable by
201 a few subtractions). */
202 if (GET_CODE (set_src) == UMOD && !side_effects_p (op1))
203 {
204 rtx tmp;
205
206 hist = ggc_alloc (sizeof (*hist));
207 start_sequence ();
208 tmp = simplify_gen_binary (DIV, mode, copy_rtx (op1), copy_rtx (op2));
209 hist->hvalue.rtl.value = force_operand (tmp, NULL_RTX);
210 hist->hvalue.rtl.seq = get_insns ();
211 end_sequence ();
212 hist->hvalue.rtl.mode = mode;
213 hist->hvalue.rtl.insn = insn;
214 hist->type = HIST_TYPE_INTERVAL;
215 hist->hdata.intvl.int_start = 0;
216 hist->hdata.intvl.steps = 2;
217 VEC_safe_push (histogram_value, *values, hist);
218 }
219 return;
220
221 default:
222 return;
223 }
224 }
225
226 #ifdef HAVE_prefetch
227
228 /* Called from find_mem_reference through for_each_rtx, finds a memory
229 reference. I.e. if *EXPR is a MEM, the reference to this MEM is stored
230 to *RET and the traversing of the expression is interrupted by returning 1.
231 Otherwise 0 is returned. */
232
233 static int
234 find_mem_reference_1 (rtx *expr, void *ret)
235 {
236 rtx *mem = ret;
237
238 if (GET_CODE (*expr) == MEM)
239 {
240 *mem = *expr;
241 return 1;
242 }
243 return 0;
244 }
245
246 /* Called form find_mem_reference through note_stores to find out whether
247 the memory reference MEM is a store. I.e. if EXPR == MEM, the variable
248 FMR2_WRITE is set to true. */
249
250 static int fmr2_write;
251 static void
252 find_mem_reference_2 (rtx expr, rtx pat ATTRIBUTE_UNUSED, void *mem)
253 {
254 if (expr == mem)
255 fmr2_write = true;
256 }
257
258 /* Find a memory reference inside INSN, return it in MEM. Set WRITE to true
259 if it is a write of the mem. Return false if no memory reference is found,
260 true otherwise. */
261
262 static bool
263 find_mem_reference (rtx insn, rtx *mem, int *write)
264 {
265 *mem = NULL_RTX;
266 for_each_rtx (&PATTERN (insn), find_mem_reference_1, mem);
267
268 if (!*mem)
269 return false;
270
271 fmr2_write = false;
272 note_stores (PATTERN (insn), find_mem_reference_2, *mem);
273 *write = fmr2_write;
274 return true;
275 }
276
277 /* Find values inside INSN for that we want to measure histograms for
278 a speculative prefetching. Add them to the list VALUES.
279 Returns true if such we found any such value, false otherwise. */
280
281 static bool
282 insn_prefetch_values_to_profile (rtx insn, histogram_values* values)
283 {
284 rtx mem, address;
285 int write;
286 histogram_value hist;
287
288 /* It only makes sense to look for memory references in ordinary insns. */
289 if (GET_CODE (insn) != INSN)
290 return false;
291
292 if (!find_mem_reference (insn, &mem, &write))
293 return false;
294
295 address = XEXP (mem, 0);
296 if (side_effects_p (address))
297 return false;
298
299 if (CONSTANT_P (address))
300 return false;
301
302 hist = ggc_alloc (sizeof (*hist));
303 hist->hvalue.rtl.value = address;
304 hist->hvalue.rtl.mode = GET_MODE (address);
305 hist->hvalue.rtl.seq = NULL_RTX;
306 hist->hvalue.rtl.insn = insn;
307 hist->type = HIST_TYPE_CONST_DELTA;
308 VEC_safe_push (histogram_value, *values, hist);
309
310 return true;
311 }
312 #endif
313 /* Find values inside INSN for that we want to measure histograms and adds
314 them to list VALUES (increasing the record of its length in N_VALUES). */
315 static void
316 rtl_values_to_profile (rtx insn, histogram_values *values)
317 {
318 if (flag_value_profile_transformations)
319 rtl_divmod_values_to_profile (insn, values);
320
321 #ifdef HAVE_prefetch
322 if (flag_speculative_prefetching)
323 insn_prefetch_values_to_profile (insn, values);
324 #endif
325 }
326
327 /* Find list of values for that we want to measure histograms. */
328 static void
329 rtl_find_values_to_profile (histogram_values *values)
330 {
331 rtx insn;
332 unsigned i, libcall_level;
333
334 life_analysis (NULL, PROP_DEATH_NOTES);
335
336 *values = VEC_alloc (histogram_value, 0);
337 libcall_level = 0;
338 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
339 rtl_values_to_profile (insn, values);
340 static_values = *values;
341
342 for (i = 0; i < VEC_length (histogram_value, *values); i++)
343 {
344 histogram_value hist = VEC_index (histogram_value, *values, i);
345
346 switch (hist->type)
347 {
348 case HIST_TYPE_INTERVAL:
349 if (dump_file)
350 fprintf (dump_file,
351 "Interval counter for insn %d, range %d -- %d.\n",
352 INSN_UID ((rtx)hist->hvalue.rtl.insn),
353 hist->hdata.intvl.int_start,
354 (hist->hdata.intvl.int_start
355 + hist->hdata.intvl.steps - 1));
356 hist->n_counters = hist->hdata.intvl.steps + 2;
357 break;
358
359 case HIST_TYPE_POW2:
360 if (dump_file)
361 fprintf (dump_file,
362 "Pow2 counter for insn %d.\n",
363 INSN_UID ((rtx)hist->hvalue.rtl.insn));
364 hist->n_counters
365 = GET_MODE_BITSIZE (hist->hvalue.rtl.mode)
366 + (hist->hdata.pow2.may_be_other ? 1 : 0);
367 break;
368
369 case HIST_TYPE_SINGLE_VALUE:
370 if (dump_file)
371 fprintf (dump_file,
372 "Single value counter for insn %d.\n",
373 INSN_UID ((rtx)hist->hvalue.rtl.insn));
374 hist->n_counters = 3;
375 break;
376
377 case HIST_TYPE_CONST_DELTA:
378 if (dump_file)
379 fprintf (dump_file,
380 "Constant delta counter for insn %d.\n",
381 INSN_UID ((rtx)hist->hvalue.rtl.insn));
382 hist->n_counters = 4;
383 break;
384
385 default:
386 gcc_unreachable ();
387 }
388 }
389 allocate_reg_info (max_reg_num (), FALSE, FALSE);
390 }
391
392 /* Main entry point. Finds REG_VALUE_PROFILE notes from profiler and uses
393 them to identify and exploit properties of values that are hard to analyze
394 statically.
395
396 We do following transformations:
397
398 1)
399
400 x = a / b;
401
402 where b is almost always a constant N is transformed to
403
404 if (b == N)
405 x = a / N;
406 else
407 x = a / b;
408
409 Analogically with %
410
411 2)
412
413 x = a % b
414
415 where b is almost always a power of 2 and the division is unsigned
416 TODO -- handle signed case as well
417
418 if ((b & (b - 1)) == 0)
419 x = a & (b - 1);
420 else
421 x = x % b;
422
423 Note that when b = 0, no error will occur and x = a; this is correct,
424 as result of such operation is undefined.
425
426 3)
427
428 x = a % b
429
430 where a is almost always less then b and the division is unsigned
431 TODO -- handle signed case as well
432
433 x = a;
434 if (x >= b)
435 x %= b;
436
437 4)
438
439 x = a % b
440
441 where a is almost always less then 2 * b and the division is unsigned
442 TODO -- handle signed case as well
443
444 x = a;
445 if (x >= b)
446 x -= b;
447 if (x >= b)
448 x %= b;
449
450 It would be possible to continue analogically for K * b for other small
451 K's, but it is probably not useful.
452
453 5)
454
455 Read or write of mem[address], where the value of address changes usually
456 by a constant C != 0 between the following accesses to the computation; with
457 -fspeculative-prefetching we then add a prefetch of address + C before
458 the insn. This handles prefetching of several interesting cases in addition
459 to a simple prefetching for addresses that are induction variables, e. g.
460 linked lists allocated sequentially (even in case they are processed
461 recursively).
462
463 TODO -- we should also check whether there is not (usually) a small
464 difference with the adjacent memory references, so that we do
465 not issue overlapping prefetches. Also we should employ some
466 heuristics to eliminate cases where prefetching evidently spoils
467 the code.
468 -- it should somehow cooperate with the loop optimizer prefetching
469
470 TODO:
471
472 There are other useful cases that could be handled by a similar mechanism,
473 for example:
474
475 for (i = 0; i < n; i++)
476 ...
477
478 transform to (for constant N):
479
480 if (n == N)
481 for (i = 0; i < N; i++)
482 ...
483 else
484 for (i = 0; i < n; i++)
485 ...
486 making unroller happy. Since this may grow the code significantly,
487 we would have to be very careful here. */
488
489 static bool
490 rtl_value_profile_transformations (void)
491 {
492 rtx insn, next;
493 int changed = false;
494
495 for (insn = get_insns (); insn; insn = next)
496 {
497 next = NEXT_INSN (insn);
498
499 if (!INSN_P (insn))
500 continue;
501
502 /* Scan for insn carrying a histogram. */
503 if (!find_reg_note (insn, REG_VALUE_PROFILE, 0))
504 continue;
505
506 /* Ignore cold areas -- we are growing a code. */
507 if (!maybe_hot_bb_p (BLOCK_FOR_INSN (insn)))
508 continue;
509
510 if (dump_file)
511 {
512 fprintf (dump_file, "Trying transformations on insn %d\n",
513 INSN_UID (insn));
514 print_rtl_single (dump_file, insn);
515 }
516
517 /* Transformations: */
518 if (flag_value_profile_transformations
519 && (rtl_mod_subtract_transform (insn)
520 || rtl_divmod_fixed_value_transform (insn)
521 || rtl_mod_pow2_value_transform (insn)))
522 changed = true;
523 #ifdef HAVE_prefetch
524 if (flag_speculative_prefetching
525 && speculative_prefetching_transform (insn))
526 changed = true;
527 #endif
528 }
529
530 if (changed)
531 {
532 commit_edge_insertions ();
533 allocate_reg_info (max_reg_num (), FALSE, FALSE);
534 }
535
536 return changed;
537 }
538
539 /* Generate code for transformation 1 (with MODE and OPERATION, operands OP1
540 and OP2, whose value is expected to be VALUE, result TARGET and
541 probability of taking the optimal path PROB). */
542 static rtx
543 rtl_divmod_fixed_value (enum machine_mode mode, enum rtx_code operation,
544 rtx target, rtx op1, rtx op2, gcov_type value,
545 int prob)
546 {
547 rtx tmp, tmp1, jump;
548 rtx neq_label = gen_label_rtx ();
549 rtx end_label = gen_label_rtx ();
550 rtx sequence;
551
552 start_sequence ();
553
554 if (!REG_P (op2))
555 {
556 tmp = gen_reg_rtx (mode);
557 emit_move_insn (tmp, copy_rtx (op2));
558 }
559 else
560 tmp = op2;
561
562 do_compare_rtx_and_jump (tmp, GEN_INT (value), NE, 0, mode, NULL_RTX,
563 NULL_RTX, neq_label);
564
565 /* Add branch probability to jump we just created. */
566 jump = get_last_insn ();
567 REG_NOTES (jump) = gen_rtx_EXPR_LIST (REG_BR_PROB,
568 GEN_INT (REG_BR_PROB_BASE - prob),
569 REG_NOTES (jump));
570
571 tmp1 = simplify_gen_binary (operation, mode,
572 copy_rtx (op1), GEN_INT (value));
573 tmp1 = force_operand (tmp1, target);
574 if (tmp1 != target)
575 emit_move_insn (copy_rtx (target), copy_rtx (tmp1));
576
577 emit_jump_insn (gen_jump (end_label));
578 emit_barrier ();
579
580 emit_label (neq_label);
581 tmp1 = simplify_gen_binary (operation, mode,
582 copy_rtx (op1), copy_rtx (tmp));
583 tmp1 = force_operand (tmp1, target);
584 if (tmp1 != target)
585 emit_move_insn (copy_rtx (target), copy_rtx (tmp1));
586
587 emit_label (end_label);
588
589 sequence = get_insns ();
590 end_sequence ();
591 rebuild_jump_labels (sequence);
592 return sequence;
593 }
594
595 /* Do transform 1) on INSN if applicable. */
596 static bool
597 rtl_divmod_fixed_value_transform (rtx insn)
598 {
599 rtx set, set_src, set_dest, op1, op2, value, histogram;
600 enum rtx_code code;
601 enum machine_mode mode;
602 gcov_type val, count, all;
603 edge e;
604 int prob;
605
606 set = single_set (insn);
607 if (!set)
608 return false;
609
610 set_src = SET_SRC (set);
611 set_dest = SET_DEST (set);
612 code = GET_CODE (set_src);
613 mode = GET_MODE (set_dest);
614
615 if (code != DIV && code != MOD && code != UDIV && code != UMOD)
616 return false;
617 op1 = XEXP (set_src, false);
618 op2 = XEXP (set_src, 1);
619
620 for (histogram = REG_NOTES (insn);
621 histogram;
622 histogram = XEXP (histogram, 1))
623 if (REG_NOTE_KIND (histogram) == REG_VALUE_PROFILE
624 && XEXP (XEXP (histogram, 0), 0) == GEN_INT (HIST_TYPE_SINGLE_VALUE))
625 break;
626
627 if (!histogram)
628 return false;
629
630 histogram = XEXP (XEXP (histogram, 0), 1);
631 value = XEXP (histogram, 0);
632 histogram = XEXP (histogram, 1);
633 val = INTVAL (XEXP (histogram, 0));
634 histogram = XEXP (histogram, 1);
635 count = INTVAL (XEXP (histogram, 0));
636 histogram = XEXP (histogram, 1);
637 all = INTVAL (XEXP (histogram, 0));
638
639 /* We require that count be at least half of all; this means
640 that for the transformation to fire the value must be constant
641 at least 50% of time (and 75% gives the guarantee of usage). */
642 if (!rtx_equal_p (op2, value) || 2 * count < all)
643 return false;
644
645 if (dump_file)
646 fprintf (dump_file, "Div/mod by constant transformation on insn %d\n",
647 INSN_UID (insn));
648
649 /* Compute probability of taking the optimal path. */
650 prob = (count * REG_BR_PROB_BASE + all / 2) / all;
651
652 e = split_block (BLOCK_FOR_INSN (insn), PREV_INSN (insn));
653 delete_insn (insn);
654
655 insert_insn_on_edge (
656 rtl_divmod_fixed_value (mode, code, set_dest,
657 op1, op2, val, prob), e);
658
659 return true;
660 }
661
662 /* Generate code for transformation 2 (with MODE and OPERATION, operands OP1
663 and OP2, result TARGET and probability of taking the optimal path PROB). */
664 static rtx
665 rtl_mod_pow2 (enum machine_mode mode, enum rtx_code operation, rtx target,
666 rtx op1, rtx op2, int prob)
667 {
668 rtx tmp, tmp1, tmp2, tmp3, jump;
669 rtx neq_label = gen_label_rtx ();
670 rtx end_label = gen_label_rtx ();
671 rtx sequence;
672
673 start_sequence ();
674
675 if (!REG_P (op2))
676 {
677 tmp = gen_reg_rtx (mode);
678 emit_move_insn (tmp, copy_rtx (op2));
679 }
680 else
681 tmp = op2;
682
683 tmp1 = expand_simple_binop (mode, PLUS, tmp, constm1_rtx, NULL_RTX,
684 0, OPTAB_WIDEN);
685 tmp2 = expand_simple_binop (mode, AND, tmp, tmp1, NULL_RTX,
686 0, OPTAB_WIDEN);
687 do_compare_rtx_and_jump (tmp2, const0_rtx, NE, 0, mode, NULL_RTX,
688 NULL_RTX, neq_label);
689
690 /* Add branch probability to jump we just created. */
691 jump = get_last_insn ();
692 REG_NOTES (jump) = gen_rtx_EXPR_LIST (REG_BR_PROB,
693 GEN_INT (REG_BR_PROB_BASE - prob),
694 REG_NOTES (jump));
695
696 tmp3 = expand_simple_binop (mode, AND, op1, tmp1, target,
697 0, OPTAB_WIDEN);
698 if (tmp3 != target)
699 emit_move_insn (copy_rtx (target), tmp3);
700 emit_jump_insn (gen_jump (end_label));
701 emit_barrier ();
702
703 emit_label (neq_label);
704 tmp1 = simplify_gen_binary (operation, mode, copy_rtx (op1), copy_rtx (tmp));
705 tmp1 = force_operand (tmp1, target);
706 if (tmp1 != target)
707 emit_move_insn (target, tmp1);
708
709 emit_label (end_label);
710
711 sequence = get_insns ();
712 end_sequence ();
713 rebuild_jump_labels (sequence);
714 return sequence;
715 }
716
717 /* Do transform 2) on INSN if applicable. */
718 static bool
719 rtl_mod_pow2_value_transform (rtx insn)
720 {
721 rtx set, set_src, set_dest, op1, op2, value, histogram;
722 enum rtx_code code;
723 enum machine_mode mode;
724 gcov_type wrong_values, count;
725 edge e;
726 int i, all, prob;
727
728 set = single_set (insn);
729 if (!set)
730 return false;
731
732 set_src = SET_SRC (set);
733 set_dest = SET_DEST (set);
734 code = GET_CODE (set_src);
735 mode = GET_MODE (set_dest);
736
737 if (code != UMOD)
738 return false;
739 op1 = XEXP (set_src, 0);
740 op2 = XEXP (set_src, 1);
741
742 for (histogram = REG_NOTES (insn);
743 histogram;
744 histogram = XEXP (histogram, 1))
745 if (REG_NOTE_KIND (histogram) == REG_VALUE_PROFILE
746 && XEXP (XEXP (histogram, 0), 0) == GEN_INT (HIST_TYPE_POW2))
747 break;
748
749 if (!histogram)
750 return false;
751
752 histogram = XEXP (XEXP (histogram, 0), 1);
753 value = XEXP (histogram, 0);
754 histogram = XEXP (histogram, 1);
755 wrong_values =INTVAL (XEXP (histogram, 0));
756 histogram = XEXP (histogram, 1);
757
758 count = 0;
759 for (i = 0; i < GET_MODE_BITSIZE (mode); i++)
760 {
761 count += INTVAL (XEXP (histogram, 0));
762 histogram = XEXP (histogram, 1);
763 }
764
765 if (!rtx_equal_p (op2, value))
766 return false;
767
768 /* We require that we hit a power of two at least half of all evaluations. */
769 if (count < wrong_values)
770 return false;
771
772 if (dump_file)
773 fprintf (dump_file, "Mod power of 2 transformation on insn %d\n",
774 INSN_UID (insn));
775
776 /* Compute probability of taking the optimal path. */
777 all = count + wrong_values;
778 prob = (count * REG_BR_PROB_BASE + all / 2) / all;
779
780 e = split_block (BLOCK_FOR_INSN (insn), PREV_INSN (insn));
781 delete_insn (insn);
782
783 insert_insn_on_edge (
784 rtl_mod_pow2 (mode, code, set_dest, op1, op2, prob), e);
785
786 return true;
787 }
788
789 /* Generate code for transformations 3 and 4 (with MODE and OPERATION,
790 operands OP1 and OP2, result TARGET, at most SUB subtractions, and
791 probability of taking the optimal path(s) PROB1 and PROB2). */
792 static rtx
793 rtl_mod_subtract (enum machine_mode mode, enum rtx_code operation,
794 rtx target, rtx op1, rtx op2, int sub, int prob1, int prob2)
795 {
796 rtx tmp, tmp1, jump;
797 rtx end_label = gen_label_rtx ();
798 rtx sequence;
799 int i;
800
801 start_sequence ();
802
803 if (!REG_P (op2))
804 {
805 tmp = gen_reg_rtx (mode);
806 emit_move_insn (tmp, copy_rtx (op2));
807 }
808 else
809 tmp = op2;
810
811 emit_move_insn (target, copy_rtx (op1));
812 do_compare_rtx_and_jump (target, tmp, LTU, 0, mode, NULL_RTX,
813 NULL_RTX, end_label);
814
815 /* Add branch probability to jump we just created. */
816 jump = get_last_insn ();
817 REG_NOTES (jump) = gen_rtx_EXPR_LIST (REG_BR_PROB,
818 GEN_INT (prob1), REG_NOTES (jump));
819
820 for (i = 0; i < sub; i++)
821 {
822 tmp1 = expand_simple_binop (mode, MINUS, target, tmp, target,
823 0, OPTAB_WIDEN);
824 if (tmp1 != target)
825 emit_move_insn (target, tmp1);
826 do_compare_rtx_and_jump (target, tmp, LTU, 0, mode, NULL_RTX,
827 NULL_RTX, end_label);
828
829 /* Add branch probability to jump we just created. */
830 jump = get_last_insn ();
831 REG_NOTES (jump) = gen_rtx_EXPR_LIST (REG_BR_PROB,
832 GEN_INT (prob2), REG_NOTES (jump));
833 }
834
835 tmp1 = simplify_gen_binary (operation, mode, copy_rtx (target), copy_rtx (tmp));
836 tmp1 = force_operand (tmp1, target);
837 if (tmp1 != target)
838 emit_move_insn (target, tmp1);
839
840 emit_label (end_label);
841
842 sequence = get_insns ();
843 end_sequence ();
844 rebuild_jump_labels (sequence);
845 return sequence;
846 }
847
848 /* Do transforms 3) and 4) on INSN if applicable. */
849 static bool
850 rtl_mod_subtract_transform (rtx insn)
851 {
852 rtx set, set_src, set_dest, op1, op2, histogram;
853 enum rtx_code code;
854 enum machine_mode mode;
855 gcov_type wrong_values, counts[2], count, all;
856 edge e;
857 int i, prob1, prob2;
858
859 set = single_set (insn);
860 if (!set)
861 return false;
862
863 set_src = SET_SRC (set);
864 set_dest = SET_DEST (set);
865 code = GET_CODE (set_src);
866 mode = GET_MODE (set_dest);
867
868 if (code != UMOD)
869 return false;
870 op1 = XEXP (set_src, 0);
871 op2 = XEXP (set_src, 1);
872
873 for (histogram = REG_NOTES (insn);
874 histogram;
875 histogram = XEXP (histogram, 1))
876 if (REG_NOTE_KIND (histogram) == REG_VALUE_PROFILE
877 && XEXP (XEXP (histogram, 0), 0) == GEN_INT (HIST_TYPE_INTERVAL))
878 break;
879
880 if (!histogram)
881 return false;
882
883 histogram = XEXP (XEXP (histogram, 0), 1);
884 histogram = XEXP (histogram, 1);
885
886 all = 0;
887 for (i = 0; i < 2; i++)
888 {
889 counts[i] = INTVAL (XEXP (histogram, 0));
890 all += counts[i];
891 histogram = XEXP (histogram, 1);
892 }
893 wrong_values = INTVAL (XEXP (histogram, 0));
894 histogram = XEXP (histogram, 1);
895 wrong_values += INTVAL (XEXP (histogram, 0));
896 all += wrong_values;
897
898 /* We require that we use just subtractions in at least 50% of all
899 evaluations. */
900 count = 0;
901 for (i = 0; i < 2; i++)
902 {
903 count += counts[i];
904 if (count * 2 >= all)
905 break;
906 }
907
908 if (i == 2)
909 return false;
910
911 if (dump_file)
912 fprintf (dump_file, "Mod subtract transformation on insn %d\n",
913 INSN_UID (insn));
914
915 /* Compute probability of taking the optimal path(s). */
916 prob1 = (counts[0] * REG_BR_PROB_BASE + all / 2) / all;
917 prob2 = (counts[1] * REG_BR_PROB_BASE + all / 2) / all;
918
919 e = split_block (BLOCK_FOR_INSN (insn), PREV_INSN (insn));
920 delete_insn (insn);
921
922 insert_insn_on_edge (
923 rtl_mod_subtract (mode, code, set_dest,
924 op1, op2, i, prob1, prob2), e);
925
926 return true;
927 }
928
929 #ifdef HAVE_prefetch
930 /* Generate code for transformation 5 for mem with ADDRESS and a constant
931 step DELTA. WRITE is true if the reference is a store to mem. */
932
933 static rtx
934 gen_speculative_prefetch (rtx address, gcov_type delta, int write)
935 {
936 rtx tmp;
937 rtx sequence;
938
939 /* TODO: we do the prefetching for just one iteration ahead, which
940 often is not enough. */
941 start_sequence ();
942 if (offsettable_address_p (0, VOIDmode, address))
943 tmp = plus_constant (copy_rtx (address), delta);
944 else
945 {
946 tmp = simplify_gen_binary (PLUS, Pmode,
947 copy_rtx (address), GEN_INT (delta));
948 tmp = force_operand (tmp, NULL);
949 }
950 if (! (*insn_data[(int)CODE_FOR_prefetch].operand[0].predicate)
951 (tmp, insn_data[(int)CODE_FOR_prefetch].operand[0].mode))
952 tmp = force_reg (Pmode, tmp);
953 emit_insn (gen_prefetch (tmp, GEN_INT (write), GEN_INT (3)));
954 sequence = get_insns ();
955 end_sequence ();
956
957 return sequence;
958 }
959
960 /* Do transform 5) on INSN if applicable. */
961
962 static bool
963 speculative_prefetching_transform (rtx insn)
964 {
965 rtx histogram, value;
966 gcov_type val, count, all;
967 edge e;
968 rtx mem, address;
969 int write;
970
971 if (!maybe_hot_bb_p (BLOCK_FOR_INSN (insn)))
972 return false;
973
974 if (!find_mem_reference (insn, &mem, &write))
975 return false;
976
977 address = XEXP (mem, 0);
978 if (side_effects_p (address))
979 return false;
980
981 if (CONSTANT_P (address))
982 return false;
983
984 for (histogram = REG_NOTES (insn);
985 histogram;
986 histogram = XEXP (histogram, 1))
987 if (REG_NOTE_KIND (histogram) == REG_VALUE_PROFILE
988 && XEXP (XEXP (histogram, 0), 0) == GEN_INT (HIST_TYPE_CONST_DELTA))
989 break;
990
991 if (!histogram)
992 return false;
993
994 histogram = XEXP (XEXP (histogram, 0), 1);
995 value = XEXP (histogram, 0);
996 histogram = XEXP (histogram, 1);
997 /* Skip last value referenced. */
998 histogram = XEXP (histogram, 1);
999 val = INTVAL (XEXP (histogram, 0));
1000 histogram = XEXP (histogram, 1);
1001 count = INTVAL (XEXP (histogram, 0));
1002 histogram = XEXP (histogram, 1);
1003 all = INTVAL (XEXP (histogram, 0));
1004
1005 /* With that few executions we do not really have a reason to optimize the
1006 statement, and more importantly, the data about differences of addresses
1007 are spoiled by the first item that had no previous value to compare
1008 with. */
1009 if (all < 4)
1010 return false;
1011
1012 /* We require that count be at least half of all; this means
1013 that for the transformation to fire the value must be constant
1014 at least 50% of time (and 75% gives the guarantee of usage). */
1015 if (!rtx_equal_p (address, value) || 2 * count < all)
1016 return false;
1017
1018 /* If the difference is too small, it does not make too much sense to
1019 prefetch, as the memory is probably already in cache. */
1020 if (val >= NOPREFETCH_RANGE_MIN && val <= NOPREFETCH_RANGE_MAX)
1021 return false;
1022
1023 if (dump_file)
1024 fprintf (dump_file, "Speculative prefetching for insn %d\n",
1025 INSN_UID (insn));
1026
1027 e = split_block (BLOCK_FOR_INSN (insn), PREV_INSN (insn));
1028
1029 insert_insn_on_edge (gen_speculative_prefetch (address, val, write), e);
1030
1031 return true;
1032 }
1033 #endif /* HAVE_prefetch */
1034
1035 /* Tree based transformations. */
1036 static bool
1037 tree_value_profile_transformations (void)
1038 {
1039 basic_block bb;
1040 block_stmt_iterator bsi;
1041 bool changed = false;
1042
1043 FOR_EACH_BB (bb)
1044 {
1045 /* Ignore cold areas -- we are enlarging the code. */
1046 if (!maybe_hot_bb_p (bb))
1047 continue;
1048
1049 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1050 {
1051 tree stmt = bsi_stmt (bsi);
1052 stmt_ann_t ann = get_stmt_ann (stmt);
1053 histogram_value th = ann->histograms;
1054 if (!th)
1055 continue;
1056
1057 if (dump_file)
1058 {
1059 fprintf (dump_file, "Trying transformations on insn ");
1060 print_generic_stmt (dump_file, stmt, TDF_SLIM);
1061 }
1062
1063 /* Transformations: */
1064 /* The order of things in this conditional controls which
1065 transformation is used when more than one is applicable. */
1066 /* It is expected that any code added by the transformations
1067 will be added before the current statement, and that the
1068 current statement remain valid (although possibly
1069 modified) upon return. */
1070 if (flag_value_profile_transformations
1071 && (tree_mod_subtract_transform (stmt)
1072 || tree_divmod_fixed_value_transform (stmt)
1073 || tree_mod_pow2_value_transform (stmt)))
1074 {
1075 changed = true;
1076 /* Original statement may no longer be in the same block. */
1077 bb = bb_for_stmt (stmt);
1078 }
1079
1080 /* Free extra storage from compute_value_histograms. */
1081 while (th)
1082 {
1083 free (th->hvalue.tree.counters);
1084 th = th->hvalue.tree.next;
1085 }
1086 ann->histograms = 0;
1087 }
1088 }
1089
1090 if (changed)
1091 {
1092 counts_to_freqs ();
1093 }
1094
1095 return changed;
1096 }
1097
1098 /* Generate code for transformation 1 (with OPERATION, operands OP1
1099 and OP2, whose value is expected to be VALUE, parent modify-expr STMT and
1100 probability of taking the optimal path PROB, which is equivalent to COUNT/ALL
1101 within roundoff error). This generates the result into a temp and returns
1102 the temp; it does not replace or alter the original STMT. */
1103 static tree
1104 tree_divmod_fixed_value (tree stmt, tree operation,
1105 tree op1, tree op2, tree value, int prob, gcov_type count,
1106 gcov_type all)
1107 {
1108 tree stmt1, stmt2, stmt3;
1109 tree tmp1, tmp2, tmpv;
1110 tree label_decl1 = create_artificial_label ();
1111 tree label_decl2 = create_artificial_label ();
1112 tree label_decl3 = create_artificial_label ();
1113 tree label1, label2, label3;
1114 tree bb1end, bb2end, bb3end;
1115 basic_block bb, bb2, bb3, bb4;
1116 tree optype = TREE_TYPE (operation);
1117 edge e12, e13, e23, e24, e34;
1118 block_stmt_iterator bsi;
1119
1120 bb = bb_for_stmt (stmt);
1121 bsi = bsi_for_stmt (stmt);
1122
1123 tmpv = create_tmp_var (optype, "PROF");
1124 tmp1 = create_tmp_var (optype, "PROF");
1125 stmt1 = build2 (MODIFY_EXPR, optype, tmpv, fold_convert (optype, value));
1126 stmt2 = build2 (MODIFY_EXPR, optype, tmp1, op2);
1127 stmt3 = build3 (COND_EXPR, void_type_node,
1128 build2 (NE_EXPR, boolean_type_node, tmp1, tmpv),
1129 build1 (GOTO_EXPR, void_type_node, label_decl2),
1130 build1 (GOTO_EXPR, void_type_node, label_decl1));
1131 bsi_insert_before (&bsi, stmt1, BSI_SAME_STMT);
1132 bsi_insert_before (&bsi, stmt2, BSI_SAME_STMT);
1133 bsi_insert_before (&bsi, stmt3, BSI_SAME_STMT);
1134 bb1end = stmt3;
1135
1136 tmp2 = create_tmp_var (optype, "PROF");
1137 label1 = build1 (LABEL_EXPR, void_type_node, label_decl1);
1138 stmt1 = build2 (MODIFY_EXPR, optype, tmp2,
1139 build2 (TREE_CODE (operation), optype, op1, tmpv));
1140 bsi_insert_before (&bsi, label1, BSI_SAME_STMT);
1141 bsi_insert_before (&bsi, stmt1, BSI_SAME_STMT);
1142 bb2end = stmt1;
1143
1144 label2 = build1 (LABEL_EXPR, void_type_node, label_decl2);
1145 stmt1 = build2 (MODIFY_EXPR, optype, tmp2,
1146 build2 (TREE_CODE (operation), optype, op1, op2));
1147 bsi_insert_before (&bsi, label2, BSI_SAME_STMT);
1148 bsi_insert_before (&bsi, stmt1, BSI_SAME_STMT);
1149 bb3end = stmt1;
1150
1151 label3 = build1 (LABEL_EXPR, void_type_node, label_decl3);
1152 bsi_insert_before (&bsi, label3, BSI_SAME_STMT);
1153
1154 /* Fix CFG. */
1155 /* Edge e23 connects bb2 to bb3, etc. */
1156 e12 = split_block (bb, bb1end);
1157 bb2 = e12->dest;
1158 bb2->count = count;
1159 e23 = split_block (bb2, bb2end);
1160 bb3 = e23->dest;
1161 bb3->count = all - count;
1162 e34 = split_block (bb3, bb3end);
1163 bb4 = e34->dest;
1164 bb4->count = all;
1165
1166 e12->flags &= ~EDGE_FALLTHRU;
1167 e12->flags |= EDGE_FALSE_VALUE;
1168 e12->probability = prob;
1169 e12->count = count;
1170
1171 e13 = make_edge (bb, bb3, EDGE_TRUE_VALUE);
1172 e13->probability = REG_BR_PROB_BASE - prob;
1173 e13->count = all - count;
1174
1175 remove_edge (e23);
1176
1177 e24 = make_edge (bb2, bb4, EDGE_FALLTHRU);
1178 e24->probability = REG_BR_PROB_BASE;
1179 e24->count = count;
1180
1181 e34->probability = REG_BR_PROB_BASE;
1182 e34->count = all - count;
1183
1184 return tmp2;
1185 }
1186
1187 /* Do transform 1) on INSN if applicable. */
1188 static bool
1189 tree_divmod_fixed_value_transform (tree stmt)
1190 {
1191 stmt_ann_t ann = get_stmt_ann (stmt);
1192 histogram_value histogram;
1193 enum tree_code code;
1194 gcov_type val, count, all;
1195 tree modify, op, op1, op2, result, value, tree_val;
1196 int prob;
1197
1198 modify = stmt;
1199 if (TREE_CODE (stmt) == RETURN_EXPR
1200 && TREE_OPERAND (stmt, 0)
1201 && TREE_CODE (TREE_OPERAND (stmt, 0)) == MODIFY_EXPR)
1202 modify = TREE_OPERAND (stmt, 0);
1203 if (TREE_CODE (modify) != MODIFY_EXPR)
1204 return false;
1205 op = TREE_OPERAND (modify, 1);
1206 if (!INTEGRAL_TYPE_P (TREE_TYPE (op)))
1207 return false;
1208 code = TREE_CODE (op);
1209
1210 if (code != TRUNC_DIV_EXPR && code != TRUNC_MOD_EXPR)
1211 return false;
1212
1213 op1 = TREE_OPERAND (op, 0);
1214 op2 = TREE_OPERAND (op, 1);
1215 if (!ann->histograms)
1216 return false;
1217
1218 for (histogram = ann->histograms; histogram; histogram = histogram->hvalue.tree.next)
1219 if (histogram->type == HIST_TYPE_SINGLE_VALUE)
1220 break;
1221
1222 if (!histogram)
1223 return false;
1224
1225 value = histogram->hvalue.tree.value;
1226 val = histogram->hvalue.tree.counters[0];
1227 count = histogram->hvalue.tree.counters[1];
1228 all = histogram->hvalue.tree.counters[2];
1229
1230 /* We require that count is at least half of all; this means
1231 that for the transformation to fire the value must be constant
1232 at least 50% of time (and 75% gives the guarantee of usage). */
1233 if (simple_cst_equal (op2, value) != 1 || 2 * count < all)
1234 return false;
1235
1236 if (dump_file)
1237 {
1238 fprintf (dump_file, "Div/mod by constant transformation on insn ");
1239 print_generic_stmt (dump_file, stmt, TDF_SLIM);
1240 }
1241
1242 /* Compute probability of taking the optimal path. */
1243 prob = (count * REG_BR_PROB_BASE + all / 2) / all;
1244
1245 tree_val = build_int_cst_wide (get_gcov_type (),
1246 val & 0xffffffffull, val >> 32);
1247 result = tree_divmod_fixed_value (stmt, op, op1, op2, tree_val, prob, count, all);
1248
1249 TREE_OPERAND (modify, 1) = result;
1250
1251 return true;
1252 }
1253
1254 /* Generate code for transformation 2 (with OPERATION, operands OP1
1255 and OP2, parent modify-expr STMT and probability of taking the optimal
1256 path PROB, which is equivalent to COUNT/ALL within roundoff error).
1257 This generates the result into a temp and returns
1258 the temp; it does not replace or alter the original STMT. */
1259 static tree
1260 tree_mod_pow2 (tree stmt, tree operation, tree op1, tree op2, int prob,
1261 gcov_type count, gcov_type all)
1262 {
1263 tree stmt1, stmt2, stmt3, stmt4;
1264 tree tmp1, tmp2, tmp3;
1265 tree label_decl1 = create_artificial_label ();
1266 tree label_decl2 = create_artificial_label ();
1267 tree label_decl3 = create_artificial_label ();
1268 tree label1, label2, label3;
1269 tree bb1end, bb2end, bb3end;
1270 basic_block bb, bb2, bb3, bb4;
1271 tree optype = TREE_TYPE (operation);
1272 edge e12, e13, e23, e24, e34;
1273 block_stmt_iterator bsi;
1274 tree result = create_tmp_var (optype, "PROF");
1275
1276 bb = bb_for_stmt (stmt);
1277 bsi = bsi_for_stmt (stmt);
1278
1279 tmp1 = create_tmp_var (optype, "PROF");
1280 tmp2 = create_tmp_var (optype, "PROF");
1281 tmp3 = create_tmp_var (optype, "PROF");
1282 stmt1 = build2 (MODIFY_EXPR, optype, tmp1, fold_convert (optype, op2));
1283 stmt2 = build2 (MODIFY_EXPR, optype, tmp2,
1284 build2 (PLUS_EXPR, optype, op2, integer_minus_one_node));
1285 stmt3 = build2 (MODIFY_EXPR, optype, tmp3,
1286 build2 (BIT_AND_EXPR, optype, tmp2, tmp1));
1287 stmt4 = build3 (COND_EXPR, void_type_node,
1288 build2 (NE_EXPR, boolean_type_node, tmp3, integer_zero_node),
1289 build1 (GOTO_EXPR, void_type_node, label_decl2),
1290 build1 (GOTO_EXPR, void_type_node, label_decl1));
1291 bsi_insert_before (&bsi, stmt1, BSI_SAME_STMT);
1292 bsi_insert_before (&bsi, stmt2, BSI_SAME_STMT);
1293 bsi_insert_before (&bsi, stmt3, BSI_SAME_STMT);
1294 bsi_insert_before (&bsi, stmt4, BSI_SAME_STMT);
1295 bb1end = stmt4;
1296
1297 /* tmp2 == op2-1 inherited from previous block */
1298 label1 = build1 (LABEL_EXPR, void_type_node, label_decl1);
1299 stmt1 = build2 (MODIFY_EXPR, optype, result,
1300 build2 (BIT_AND_EXPR, optype, op1, tmp2));
1301 bsi_insert_before (&bsi, label1, BSI_SAME_STMT);
1302 bsi_insert_before (&bsi, stmt1, BSI_SAME_STMT);
1303 bb2end = stmt1;
1304
1305 label2 = build1 (LABEL_EXPR, void_type_node, label_decl2);
1306 stmt1 = build2 (MODIFY_EXPR, optype, result,
1307 build2 (TREE_CODE (operation), optype, op1, op2));
1308 bsi_insert_before (&bsi, label2, BSI_SAME_STMT);
1309 bsi_insert_before (&bsi, stmt1, BSI_SAME_STMT);
1310 bb3end = stmt1;
1311
1312 label3 = build1 (LABEL_EXPR, void_type_node, label_decl3);
1313 bsi_insert_before (&bsi, label3, BSI_SAME_STMT);
1314
1315 /* Fix CFG. */
1316 /* Edge e23 connects bb2 to bb3, etc. */
1317 e12 = split_block (bb, bb1end);
1318 bb2 = e12->dest;
1319 bb2->count = count;
1320 e23 = split_block (bb2, bb2end);
1321 bb3 = e23->dest;
1322 bb3->count = all - count;
1323 e34 = split_block (bb3, bb3end);
1324 bb4 = e34->dest;
1325 bb4->count = all;
1326
1327 e12->flags &= ~EDGE_FALLTHRU;
1328 e12->flags |= EDGE_FALSE_VALUE;
1329 e12->probability = prob;
1330 e12->count = count;
1331
1332 e13 = make_edge (bb, bb3, EDGE_TRUE_VALUE);
1333 e13->probability = REG_BR_PROB_BASE - prob;
1334 e13->count = all - count;
1335
1336 remove_edge (e23);
1337
1338 e24 = make_edge (bb2, bb4, EDGE_FALLTHRU);
1339 e24->probability = REG_BR_PROB_BASE;
1340 e24->count = count;
1341
1342 e34->probability = REG_BR_PROB_BASE;
1343 e34->count = all - count;
1344
1345 return result;
1346 }
1347
1348 /* Do transform 2) on INSN if applicable. */
1349 static bool
1350 tree_mod_pow2_value_transform (tree stmt)
1351 {
1352 stmt_ann_t ann = get_stmt_ann (stmt);
1353 histogram_value histogram;
1354 enum tree_code code;
1355 gcov_type count, wrong_values, all;
1356 tree modify, op, op1, op2, result, value;
1357 int prob;
1358 unsigned int i;
1359
1360 modify = stmt;
1361 if (TREE_CODE (stmt) == RETURN_EXPR
1362 && TREE_OPERAND (stmt, 0)
1363 && TREE_CODE (TREE_OPERAND (stmt, 0)) == MODIFY_EXPR)
1364 modify = TREE_OPERAND (stmt, 0);
1365 if (TREE_CODE (modify) != MODIFY_EXPR)
1366 return false;
1367 op = TREE_OPERAND (modify, 1);
1368 if (!INTEGRAL_TYPE_P (TREE_TYPE (op)))
1369 return false;
1370 code = TREE_CODE (op);
1371
1372 if (code != TRUNC_MOD_EXPR || !TYPE_UNSIGNED (TREE_TYPE (op)))
1373 return false;
1374
1375 op1 = TREE_OPERAND (op, 0);
1376 op2 = TREE_OPERAND (op, 1);
1377 if (!ann->histograms)
1378 return false;
1379
1380 for (histogram = ann->histograms; histogram; histogram = histogram->hvalue.tree.next)
1381 if (histogram->type == HIST_TYPE_POW2)
1382 break;
1383
1384 if (!histogram)
1385 return false;
1386
1387 value = histogram->hvalue.tree.value;
1388 wrong_values = histogram->hvalue.tree.counters[0];
1389 count = 0;
1390 for (i = 1; i <= TREE_INT_CST_LOW (TYPE_SIZE (TREE_TYPE (stmt))); i++)
1391 count += histogram->hvalue.tree.counters[i];
1392
1393 /* We require that we hit a power of 2 at least half of all evaluations. */
1394 if (simple_cst_equal (op2, value) != 1 || count < wrong_values)
1395 return false;
1396
1397 if (dump_file)
1398 {
1399 fprintf (dump_file, "Mod power of 2 transformation on insn ");
1400 print_generic_stmt (dump_file, stmt, TDF_SLIM);
1401 }
1402
1403 /* Compute probability of taking the optimal path. */
1404 all = count + wrong_values;
1405 prob = (count * REG_BR_PROB_BASE + all / 2) / all;
1406
1407 result = tree_mod_pow2 (stmt, op, op1, op2, prob, count, all);
1408
1409 TREE_OPERAND (modify, 1) = result;
1410
1411 return true;
1412 }
1413
1414 /* Generate code for transformations 3 and 4 (with OPERATION, operands OP1
1415 and OP2, parent modify-expr STMT, and NCOUNTS the number of cases to
1416 support. Currently only NCOUNTS==0 or 1 is supported and this is
1417 built into this interface. The probabilities of taking the optimal
1418 paths are PROB1 and PROB2, which are equivalent to COUNT1/ALL and
1419 COUNT2/ALL respectively within roundoff error). This generates the
1420 result into a temp and returns the temp; it does not replace or alter
1421 the original STMT. */
1422 /* FIXME: Generalize the interface to handle NCOUNTS > 1. */
1423
1424 static tree
1425 tree_mod_subtract (tree stmt, tree operation, tree op1, tree op2,
1426 int prob1, int prob2, int ncounts,
1427 gcov_type count1, gcov_type count2, gcov_type all)
1428 {
1429 tree stmt1, stmt2, stmt3;
1430 tree tmp1;
1431 tree label_decl1 = create_artificial_label ();
1432 tree label_decl2 = create_artificial_label ();
1433 tree label_decl3 = create_artificial_label ();
1434 tree label1, label2, label3;
1435 tree bb1end, bb2end = NULL_TREE, bb3end;
1436 basic_block bb, bb2, bb3, bb4;
1437 tree optype = TREE_TYPE (operation);
1438 edge e12, e23 = 0, e24, e34, e14;
1439 block_stmt_iterator bsi;
1440 tree result = create_tmp_var (optype, "PROF");
1441
1442 bb = bb_for_stmt (stmt);
1443 bsi = bsi_for_stmt (stmt);
1444
1445 tmp1 = create_tmp_var (optype, "PROF");
1446 stmt1 = build2 (MODIFY_EXPR, optype, result, op1);
1447 stmt2 = build2 (MODIFY_EXPR, optype, tmp1, op2);
1448 stmt3 = build3 (COND_EXPR, void_type_node,
1449 build2 (LT_EXPR, boolean_type_node, result, tmp1),
1450 build1 (GOTO_EXPR, void_type_node, label_decl3),
1451 build1 (GOTO_EXPR, void_type_node,
1452 ncounts ? label_decl1 : label_decl2));
1453 bsi_insert_before (&bsi, stmt1, BSI_SAME_STMT);
1454 bsi_insert_before (&bsi, stmt2, BSI_SAME_STMT);
1455 bsi_insert_before (&bsi, stmt3, BSI_SAME_STMT);
1456 bb1end = stmt3;
1457
1458 if (ncounts) /* Assumed to be 0 or 1 */
1459 {
1460 label1 = build1 (LABEL_EXPR, void_type_node, label_decl1);
1461 stmt1 = build2 (MODIFY_EXPR, optype, result,
1462 build2 (MINUS_EXPR, optype, result, tmp1));
1463 stmt2 = build3 (COND_EXPR, void_type_node,
1464 build2 (LT_EXPR, boolean_type_node, result, tmp1),
1465 build1 (GOTO_EXPR, void_type_node, label_decl3),
1466 build1 (GOTO_EXPR, void_type_node, label_decl2));
1467 bsi_insert_before (&bsi, label1, BSI_SAME_STMT);
1468 bsi_insert_before (&bsi, stmt1, BSI_SAME_STMT);
1469 bsi_insert_before (&bsi, stmt2, BSI_SAME_STMT);
1470 bb2end = stmt2;
1471 }
1472
1473 /* Fallback case. */
1474 label2 = build1 (LABEL_EXPR, void_type_node, label_decl2);
1475 stmt1 = build2 (MODIFY_EXPR, optype, result,
1476 build2 (TREE_CODE (operation), optype, result, tmp1));
1477 bsi_insert_before (&bsi, label2, BSI_SAME_STMT);
1478 bsi_insert_before (&bsi, stmt1, BSI_SAME_STMT);
1479 bb3end = stmt1;
1480
1481 label3 = build1 (LABEL_EXPR, void_type_node, label_decl3);
1482 bsi_insert_before (&bsi, label3, BSI_SAME_STMT);
1483
1484 /* Fix CFG. */
1485 /* Edge e23 connects bb2 to bb3, etc. */
1486 /* However block 3 is optional; if it is not there, references
1487 to 3 really refer to block 2. */
1488 e12 = split_block (bb, bb1end);
1489 bb2 = e12->dest;
1490 bb2->count = all - count1;
1491
1492 if (ncounts) /* Assumed to be 0 or 1. */
1493 {
1494 e23 = split_block (bb2, bb2end);
1495 bb3 = e23->dest;
1496 bb3->count = all - count1 - count2;
1497 }
1498
1499 e34 = split_block (ncounts ? bb3 : bb2, bb3end);
1500 bb4 = e34->dest;
1501 bb4->count = all;
1502
1503 e12->flags &= ~EDGE_FALLTHRU;
1504 e12->flags |= EDGE_FALSE_VALUE;
1505 e12->probability = REG_BR_PROB_BASE - prob1;
1506 e12->count = count1;
1507
1508 e14 = make_edge (bb, bb4, EDGE_TRUE_VALUE);
1509 e14->probability = prob1;
1510 e14->count = all - count1;
1511
1512 if (ncounts) /* Assumed to be 0 or 1. */
1513 {
1514 e23->flags &= ~EDGE_FALLTHRU;
1515 e23->flags |= EDGE_FALSE_VALUE;
1516 e23->count = all - count1 - count2;
1517 e23->probability = REG_BR_PROB_BASE - prob2;
1518
1519 e24 = make_edge (bb2, bb4, EDGE_TRUE_VALUE);
1520 e24->probability = prob2;
1521 e24->count = count2;
1522 }
1523
1524 e34->probability = REG_BR_PROB_BASE;
1525 e34->count = all - count1 - count2;
1526
1527 return result;
1528 }
1529
1530 /* Do transforms 3) and 4) on INSN if applicable. */
1531 static bool
1532 tree_mod_subtract_transform (tree stmt)
1533 {
1534 stmt_ann_t ann = get_stmt_ann (stmt);
1535 histogram_value histogram;
1536 enum tree_code code;
1537 gcov_type count, wrong_values, all;
1538 tree modify, op, op1, op2, result, value;
1539 int prob1, prob2;
1540 unsigned int i;
1541
1542 modify = stmt;
1543 if (TREE_CODE (stmt) == RETURN_EXPR
1544 && TREE_OPERAND (stmt, 0)
1545 && TREE_CODE (TREE_OPERAND (stmt, 0)) == MODIFY_EXPR)
1546 modify = TREE_OPERAND (stmt, 0);
1547 if (TREE_CODE (modify) != MODIFY_EXPR)
1548 return false;
1549 op = TREE_OPERAND (modify, 1);
1550 if (!INTEGRAL_TYPE_P (TREE_TYPE (op)))
1551 return false;
1552 code = TREE_CODE (op);
1553
1554 if (code != TRUNC_MOD_EXPR || !TYPE_UNSIGNED (TREE_TYPE (op)))
1555 return false;
1556
1557 op1 = TREE_OPERAND (op, 0);
1558 op2 = TREE_OPERAND (op, 1);
1559 if (!ann->histograms)
1560 return false;
1561
1562 for (histogram = ann->histograms; histogram; histogram = histogram->hvalue.tree.next)
1563 if (histogram->type == HIST_TYPE_INTERVAL)
1564 break;
1565
1566 if (!histogram)
1567 return false;
1568
1569 value = histogram->hvalue.tree.value;
1570 all = 0;
1571 wrong_values = 0;
1572 for (i = 0; i < histogram->hdata.intvl.steps; i++)
1573 all += histogram->hvalue.tree.counters[i];
1574
1575 wrong_values += histogram->hvalue.tree.counters[i];
1576 wrong_values += histogram->hvalue.tree.counters[i+1];
1577 all += wrong_values;
1578
1579 /* Sanity check. */
1580 if (simple_cst_equal (op2, value) != 1)
1581 return false;
1582
1583 /* We require that we use just subtractions in at least 50% of all
1584 evaluations. */
1585 count = 0;
1586 for (i = 0; i < histogram->hdata.intvl.steps; i++)
1587 {
1588 count += histogram->hvalue.tree.counters[i];
1589 if (count * 2 >= all)
1590 break;
1591 }
1592 if (i == histogram->hdata.intvl.steps)
1593 return false;
1594
1595 if (dump_file)
1596 {
1597 fprintf (dump_file, "Mod subtract transformation on insn ");
1598 print_generic_stmt (dump_file, stmt, TDF_SLIM);
1599 }
1600
1601 /* Compute probability of taking the optimal path(s). */
1602 prob1 = (histogram->hvalue.tree.counters[0] * REG_BR_PROB_BASE + all / 2) / all;
1603 prob2 = (histogram->hvalue.tree.counters[1] * REG_BR_PROB_BASE + all / 2) / all;
1604
1605 /* In practice, "steps" is always 2. This interface reflects this,
1606 and will need to be changed if "steps" can change. */
1607 result = tree_mod_subtract (stmt, op, op1, op2, prob1, prob2, i,
1608 histogram->hvalue.tree.counters[0],
1609 histogram->hvalue.tree.counters[1], all);
1610
1611 TREE_OPERAND (modify, 1) = result;
1612
1613 return true;
1614 }
1615 \f
1616 /* Connection to the outside world. */
1617 /* Struct for IR-dependent hooks. */
1618 struct value_prof_hooks {
1619 /* Find list of values for which we want to measure histograms. */
1620 void (*find_values_to_profile) (histogram_values *);
1621
1622 /* Identify and exploit properties of values that are hard to analyze
1623 statically. See value-prof.c for more detail. */
1624 bool (*value_profile_transformations) (void);
1625 };
1626
1627 /* Hooks for RTL-based versions (the only ones that currently work). */
1628 static struct value_prof_hooks rtl_value_prof_hooks =
1629 {
1630 rtl_find_values_to_profile,
1631 rtl_value_profile_transformations
1632 };
1633
1634 void
1635 rtl_register_value_prof_hooks (void)
1636 {
1637 value_prof_hooks = &rtl_value_prof_hooks;
1638 gcc_assert (!ir_type ());
1639 }
1640 \f
1641 /* Find values inside INSN for that we want to measure histograms for
1642 division/modulo optimization. */
1643 static void
1644 tree_divmod_values_to_profile (tree stmt, histogram_values *values)
1645 {
1646 tree op, op1, op2;
1647 histogram_value hist;
1648
1649 op = stmt;
1650 if (TREE_CODE (stmt) == RETURN_EXPR
1651 && TREE_OPERAND (stmt, 0)
1652 && TREE_CODE (TREE_OPERAND (stmt, 0)) == MODIFY_EXPR)
1653 op = TREE_OPERAND (stmt, 0);
1654
1655 if (TREE_CODE (op) != MODIFY_EXPR)
1656 return;
1657 if (!INTEGRAL_TYPE_P (TREE_TYPE (op)))
1658 return;
1659 op = TREE_OPERAND (op, 1);
1660 switch (TREE_CODE (op))
1661 {
1662 case TRUNC_DIV_EXPR:
1663 case TRUNC_MOD_EXPR:
1664 op1 = TREE_OPERAND (op, 0);
1665 op2 = TREE_OPERAND (op, 1);
1666
1667 /* Check for a special case where the divisor is power(s) of 2.
1668 This is more aggressive than the RTL version, under the
1669 assumption that later phases will reduce / or % by power of 2
1670 to something clever most of the time. Signed or unsigned. */
1671 if (TREE_CODE (op2) != INTEGER_CST)
1672 {
1673 hist = ggc_alloc (sizeof (*hist));
1674 hist->hvalue.tree.value = op2;
1675 hist->hvalue.tree.stmt = stmt;
1676 hist->type = HIST_TYPE_POW2;
1677 hist->hdata.pow2.may_be_other = 1;
1678 VEC_safe_push (histogram_value, *values, hist);
1679 }
1680
1681 /* Check for the case where the divisor is the same value most
1682 of the time. */
1683 if (TREE_CODE (op2) != INTEGER_CST)
1684 {
1685 hist = ggc_alloc (sizeof (*hist));
1686 hist->hvalue.tree.value = op2;
1687 hist->hvalue.tree.stmt = stmt;
1688 hist->type = HIST_TYPE_SINGLE_VALUE;
1689 VEC_safe_push (histogram_value, *values, hist);
1690 }
1691
1692 /* For mod, check whether it is not often a noop (or replaceable by
1693 a few subtractions). */
1694 if (TREE_CODE (op) == TRUNC_MOD_EXPR && TYPE_UNSIGNED (TREE_TYPE (op)))
1695 {
1696 hist = ggc_alloc (sizeof (*hist));
1697 hist->hvalue.tree.stmt = stmt;
1698 hist->hvalue.tree.value = op2;
1699 hist->type = HIST_TYPE_INTERVAL;
1700 hist->hdata.intvl.int_start = 0;
1701 hist->hdata.intvl.steps = 2;
1702 VEC_safe_push (histogram_value, *values, hist);
1703 }
1704 return;
1705
1706 default:
1707 return;
1708 }
1709 }
1710
1711 /* Find values inside INSN for that we want to measure histograms and adds
1712 them to list VALUES (increasing the record of its length in N_VALUES). */
1713 static void
1714 tree_values_to_profile (tree stmt, histogram_values *values)
1715 {
1716 if (flag_value_profile_transformations)
1717 tree_divmod_values_to_profile (stmt, values);
1718 }
1719
1720 static void
1721 tree_find_values_to_profile (histogram_values *values)
1722 {
1723 basic_block bb;
1724 block_stmt_iterator bsi;
1725 tree stmt;
1726 unsigned int i;
1727
1728 *values = VEC_alloc (histogram_value, 0);
1729 FOR_EACH_BB (bb)
1730 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1731 {
1732 tree stmt = bsi_stmt (bsi);
1733 tree_values_to_profile (stmt, values);
1734 }
1735 static_values = *values;
1736
1737 for (i = 0; i < VEC_length (histogram_value, *values); i++)
1738 {
1739 histogram_value hist = VEC_index (histogram_value, *values, i);
1740
1741 switch (hist->type)
1742 {
1743 case HIST_TYPE_INTERVAL:
1744 if (dump_file)
1745 {
1746 fprintf (dump_file, "Interval counter for tree ");
1747 print_generic_expr (dump_file, hist->hvalue.tree.stmt,
1748 TDF_SLIM);
1749 fprintf (dump_file, ", range %d -- %d.\n",
1750 hist->hdata.intvl.int_start,
1751 (hist->hdata.intvl.int_start
1752 + hist->hdata.intvl.steps - 1));
1753 }
1754 hist->n_counters = hist->hdata.intvl.steps + 2;
1755 break;
1756
1757 case HIST_TYPE_POW2:
1758 if (dump_file)
1759 {
1760 fprintf (dump_file, "Pow2 counter for insn ");
1761 print_generic_expr (dump_file, hist->hvalue.tree.stmt, TDF_SLIM);
1762 fprintf (dump_file, ".\n");
1763 }
1764 stmt = hist->hvalue.tree.stmt;
1765 hist->n_counters
1766 = TREE_INT_CST_LOW (TYPE_SIZE (TREE_TYPE (stmt)))
1767 + (hist->hdata.pow2.may_be_other ? 1 : 0);
1768 break;
1769
1770 case HIST_TYPE_SINGLE_VALUE:
1771 if (dump_file)
1772 {
1773 fprintf (dump_file, "Single value counter for insn ");
1774 print_generic_expr (dump_file, hist->hvalue.tree.stmt, TDF_SLIM);
1775 fprintf (dump_file, ".\n");
1776 }
1777 hist->n_counters = 3;
1778 break;
1779
1780 case HIST_TYPE_CONST_DELTA:
1781 if (dump_file)
1782 {
1783 fprintf (dump_file, "Constant delta counter for insn ");
1784 print_generic_expr (dump_file, hist->hvalue.tree.stmt, TDF_SLIM);
1785 fprintf (dump_file, ".\n");
1786 }
1787 hist->n_counters = 4;
1788 break;
1789
1790 default:
1791 abort ();
1792 }
1793 }
1794 }
1795
1796 static struct value_prof_hooks tree_value_prof_hooks = {
1797 tree_find_values_to_profile,
1798 tree_value_profile_transformations
1799 };
1800
1801 void
1802 tree_register_value_prof_hooks (void)
1803 {
1804 value_prof_hooks = &tree_value_prof_hooks;
1805 gcc_assert (ir_type ());
1806 }
1807 \f
1808 /* IR-independent entry points. */
1809 void
1810 find_values_to_profile (histogram_values *values)
1811 {
1812 (value_prof_hooks->find_values_to_profile) (values);
1813 }
1814
1815 bool
1816 value_profile_transformations (void)
1817 {
1818 bool retval = (value_prof_hooks->value_profile_transformations) ();
1819 VEC_free (histogram_value, static_values);
1820 return retval;
1821 }