usage.adb: Change "pragma inline" to "pragma Inline" in information and error messages
[gcc.git] / gcc / value-prof.c
1 /* Transformations based on profile information for values.
2 Copyright (C) 2003, 2004 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 2, or (at your option) any later
9 version.
10
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
19 02111-1307, USA. */
20
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "rtl.h"
26 #include "expr.h"
27 #include "hard-reg-set.h"
28 #include "basic-block.h"
29 #include "value-prof.h"
30 #include "output.h"
31 #include "flags.h"
32 #include "insn-config.h"
33 #include "recog.h"
34 #include "optabs.h"
35 #include "regs.h"
36 #include "ggc.h"
37
38 static struct value_prof_hooks *value_prof_hooks;
39
40 /* In this file value profile based optimizations are placed. Currently the
41 following optimizations are implemented (for more detailed descriptions
42 see comments at value_profile_transformations):
43
44 1) Division/modulo specialization. Provided that we can determine that the
45 operands of the division have some special properties, we may use it to
46 produce more effective code.
47 2) Speculative prefetching. If we are able to determine that the difference
48 between addresses accessed by a memory reference is usually constant, we
49 may add the prefetch instructions.
50
51 Every such optimization should add its requirements for profiled values to
52 insn_values_to_profile function. This function is called from branch_prob
53 in profile.c and the requested values are instrumented by it in the first
54 compilation with -fprofile-arcs. The optimization may then read the
55 gathered data in the second compilation with -fbranch-probabilities.
56 The measured data is appended as REG_VALUE_PROFILE note to the instrumented
57 insn. The argument to the note consists of an EXPR_LIST where its
58 members have the following meaning (from the first to the last):
59
60 -- type of information gathered (HIST_TYPE*)
61 -- the expression that is profiled
62 -- list of counters starting from the first one. */
63
64 /* For speculative prefetching, the range in that we do not prefetch (because
65 we assume that it will be in cache anyway). The asymmetry between min and
66 max range is trying to reflect the fact that the sequential prefetching
67 of the data is commonly done directly by hardware. Nevertheless, these
68 values are just a guess and should of course be target-specific. */
69
70 #ifndef NOPREFETCH_RANGE_MIN
71 #define NOPREFETCH_RANGE_MIN (-16)
72 #endif
73 #ifndef NOPREFETCH_RANGE_MAX
74 #define NOPREFETCH_RANGE_MAX 32
75 #endif
76
77 static void insn_divmod_values_to_profile (rtx, histogram_values *);
78 #ifdef HAVE_prefetch
79 static bool insn_prefetch_values_to_profile (rtx, histogram_values *);
80 static int find_mem_reference_1 (rtx *, void *);
81 static void find_mem_reference_2 (rtx, rtx, void *);
82 static bool find_mem_reference (rtx, rtx *, int *);
83 #endif
84
85 static void insn_values_to_profile (rtx, histogram_values *);
86 static rtx gen_divmod_fixed_value (enum machine_mode, enum rtx_code, rtx, rtx,
87 rtx, gcov_type, int);
88 static rtx gen_mod_pow2 (enum machine_mode, enum rtx_code, rtx, rtx, rtx, int);
89 static rtx gen_mod_subtract (enum machine_mode, enum rtx_code, rtx, rtx, rtx,
90 int, int, int);
91 #ifdef HAVE_prefetch
92 static rtx gen_speculative_prefetch (rtx, gcov_type, int);
93 #endif
94 static bool divmod_fixed_value_transform (rtx insn);
95 static bool mod_pow2_value_transform (rtx);
96 static bool mod_subtract_transform (rtx);
97 #ifdef HAVE_prefetch
98 static bool speculative_prefetching_transform (rtx);
99 #endif
100 \f
101 /* Find values inside INSN for that we want to measure histograms for
102 division/modulo optimization and stores them to VALUES. */
103 static void
104 insn_divmod_values_to_profile (rtx insn, histogram_values *values)
105 {
106 rtx set, set_src, op1, op2;
107 enum machine_mode mode;
108 histogram_value hist;
109
110 if (!INSN_P (insn))
111 return;
112
113 set = single_set (insn);
114 if (!set)
115 return;
116
117 mode = GET_MODE (SET_DEST (set));
118 if (!INTEGRAL_MODE_P (mode))
119 return;
120
121 set_src = SET_SRC (set);
122 switch (GET_CODE (set_src))
123 {
124 case DIV:
125 case MOD:
126 case UDIV:
127 case UMOD:
128 op1 = XEXP (set_src, 0);
129 op2 = XEXP (set_src, 1);
130 if (side_effects_p (op2))
131 return;
132
133 /* Check for a special case where the divisor is power of 2. */
134 if ((GET_CODE (set_src) == UMOD) && !CONSTANT_P (op2))
135 {
136 hist = ggc_alloc (sizeof (*hist));
137 hist->value = op2;
138 hist->seq = NULL_RTX;
139 hist->mode = mode;
140 hist->insn = insn;
141 hist->type = HIST_TYPE_POW2;
142 hist->hdata.pow2.may_be_other = 1;
143 VEC_safe_push (histogram_value, *values, hist);
144 }
145
146 /* Check whether the divisor is not in fact a constant. */
147 if (!CONSTANT_P (op2))
148 {
149 hist = ggc_alloc (sizeof (*hist));
150 hist->value = op2;
151 hist->mode = mode;
152 hist->seq = NULL_RTX;
153 hist->insn = insn;
154 hist->type = HIST_TYPE_SINGLE_VALUE;
155 VEC_safe_push (histogram_value, *values, hist);
156 }
157
158 /* For mod, check whether it is not often a noop (or replaceable by
159 a few subtractions). */
160 if (GET_CODE (set_src) == UMOD && !side_effects_p (op1))
161 {
162 rtx tmp;
163
164 hist = ggc_alloc (sizeof (*hist));
165 start_sequence ();
166 tmp = simplify_gen_binary (DIV, mode, copy_rtx (op1), copy_rtx (op2));
167 hist->value = force_operand (tmp, NULL_RTX);
168 hist->seq = get_insns ();
169 end_sequence ();
170 hist->mode = mode;
171 hist->insn = insn;
172 hist->type = HIST_TYPE_INTERVAL;
173 hist->hdata.intvl.int_start = 0;
174 hist->hdata.intvl.steps = 2;
175 hist->hdata.intvl.may_be_less = 1;
176 hist->hdata.intvl.may_be_more = 1;
177 VEC_safe_push (histogram_value, *values, hist);
178 }
179 return;
180
181 default:
182 return;
183 }
184 }
185
186 #ifdef HAVE_prefetch
187
188 /* Called from find_mem_reference through for_each_rtx, finds a memory
189 reference. I.e. if *EXPR is a MEM, the reference to this MEM is stored
190 to *RET and the traversing of the expression is interrupted by returning 1.
191 Otherwise 0 is returned. */
192
193 static int
194 find_mem_reference_1 (rtx *expr, void *ret)
195 {
196 rtx *mem = ret;
197
198 if (GET_CODE (*expr) == MEM)
199 {
200 *mem = *expr;
201 return 1;
202 }
203 return 0;
204 }
205
206 /* Called form find_mem_reference through note_stores to find out whether
207 the memory reference MEM is a store. I.e. if EXPR == MEM, the variable
208 FMR2_WRITE is set to true. */
209
210 static int fmr2_write;
211 static void
212 find_mem_reference_2 (rtx expr, rtx pat ATTRIBUTE_UNUSED, void *mem)
213 {
214 if (expr == mem)
215 fmr2_write = true;
216 }
217
218 /* Find a memory reference inside INSN, return it in MEM. Set WRITE to true
219 if it is a write of the mem. Return false if no memory reference is found,
220 true otherwise. */
221
222 static bool
223 find_mem_reference (rtx insn, rtx *mem, int *write)
224 {
225 *mem = NULL_RTX;
226 for_each_rtx (&PATTERN (insn), find_mem_reference_1, mem);
227
228 if (!*mem)
229 return false;
230
231 fmr2_write = false;
232 note_stores (PATTERN (insn), find_mem_reference_2, *mem);
233 *write = fmr2_write;
234 return true;
235 }
236
237 /* Find values inside INSN for that we want to measure histograms for
238 a speculative prefetching. Add them to the list VALUES.
239 Returns true if such we found any such value, false otherwise. */
240
241 static bool
242 insn_prefetch_values_to_profile (rtx insn, histogram_values *values)
243 {
244 rtx mem, address;
245 int write;
246 histogram_value hist;
247
248 if (!INSN_P (insn))
249 return false;
250
251 if (!find_mem_reference (insn, &mem, &write))
252 return false;
253
254 address = XEXP (mem, 0);
255 if (side_effects_p (address))
256 return false;
257
258 if (CONSTANT_P (address))
259 return false;
260
261 hist = ggc_alloc (sizeof (*hist));
262 hist->value = address;
263 hist->mode = GET_MODE (address);
264 hist->seq = NULL_RTX;
265 hist->insn = insn;
266 hist->type = HIST_TYPE_CONST_DELTA;
267 VEC_safe_push (histogram_value, *values, hist);
268
269 return true;
270 }
271 #endif
272 /* Find values inside INSN for that we want to measure histograms and adds
273 them to list VALUES (increasing the record of its length in N_VALUES). */
274 static void
275 insn_values_to_profile (rtx insn, histogram_values *values)
276 {
277 if (flag_value_profile_transformations)
278 insn_divmod_values_to_profile (insn, values);
279
280 #ifdef HAVE_prefetch
281 if (flag_speculative_prefetching)
282 insn_prefetch_values_to_profile (insn, values);
283 #endif
284 }
285
286 /* Find list of values for that we want to measure histograms. */
287 static void
288 rtl_find_values_to_profile (histogram_values *values)
289 {
290 rtx insn;
291 unsigned i;
292
293 life_analysis (NULL, PROP_DEATH_NOTES);
294
295 *values = VEC_alloc (histogram_value, 0);
296 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
297 insn_values_to_profile (insn, values);
298
299 for (i = 0; i < VEC_length (histogram_value, *values); i++)
300 {
301 histogram_value hist = VEC_index (histogram_value, *values, i);
302
303 switch (hist->type)
304 {
305 case HIST_TYPE_INTERVAL:
306 if (dump_file)
307 fprintf (dump_file,
308 "Interval counter for insn %d, range %d -- %d.\n",
309 INSN_UID ((rtx)hist->insn),
310 hist->hdata.intvl.int_start,
311 (hist->hdata.intvl.int_start
312 + hist->hdata.intvl.steps - 1));
313 hist->n_counters = hist->hdata.intvl.steps +
314 (hist->hdata.intvl.may_be_less ? 1 : 0) +
315 (hist->hdata.intvl.may_be_more ? 1 : 0);
316 break;
317
318 case HIST_TYPE_POW2:
319 if (dump_file)
320 fprintf (dump_file,
321 "Pow2 counter for insn %d.\n",
322 INSN_UID ((rtx)hist->insn));
323 hist->n_counters
324 = GET_MODE_BITSIZE (hist->mode)
325 + (hist->hdata.pow2.may_be_other ? 1 : 0);
326 break;
327
328 case HIST_TYPE_SINGLE_VALUE:
329 if (dump_file)
330 fprintf (dump_file,
331 "Single value counter for insn %d.\n",
332 INSN_UID ((rtx)hist->insn));
333 hist->n_counters = 3;
334 break;
335
336 case HIST_TYPE_CONST_DELTA:
337 if (dump_file)
338 fprintf (dump_file,
339 "Constant delta counter for insn %d.\n",
340 INSN_UID ((rtx)hist->insn));
341 hist->n_counters = 4;
342 break;
343
344 default:
345 abort ();
346 }
347 }
348 allocate_reg_info (max_reg_num (), FALSE, FALSE);
349 }
350
351 /* Main entry point. Finds REG_VALUE_PROFILE notes from profiler and uses
352 them to identify and exploit properties of values that are hard to analyze
353 statically.
354
355 We do following transformations:
356
357 1)
358
359 x = a / b;
360
361 where b is almost always a constant N is transformed to
362
363 if (b == N)
364 x = a / N;
365 else
366 x = a / b;
367
368 Analogically with %
369
370 2)
371
372 x = a % b
373
374 where b is almost always a power of 2 and the division is unsigned
375 TODO -- handle signed case as well
376
377 if ((b & (b - 1)) == 0)
378 x = a & (b - 1);
379 else
380 x = x % b;
381
382 Note that when b = 0, no error will occur and x = a; this is correct,
383 as result of such operation is undefined.
384
385 3)
386
387 x = a % b
388
389 where a is almost always less then b and the division is unsigned
390 TODO -- handle signed case as well
391
392 x = a;
393 if (x >= b)
394 x %= b;
395
396 4)
397
398 x = a % b
399
400 where a is almost always less then 2 * b and the division is unsigned
401 TODO -- handle signed case as well
402
403 x = a;
404 if (x >= b)
405 x -= b;
406 if (x >= b)
407 x %= b;
408
409 It would be possible to continue analogically for K * b for other small
410 K's, but it is probably not useful.
411
412 5)
413
414 Read or write of mem[address], where the value of address changes usually
415 by a constant C != 0 between the following accesses to the computation; with
416 -fspeculative-prefetching we then add a prefetch of address + C before
417 the insn. This handles prefetching of several interesting cases in addition
418 to a simple prefetching for addresses that are induction variables, e. g.
419 linked lists allocated sequentially (even in case they are processed
420 recursively).
421
422 TODO -- we should also check whether there is not (usually) a small
423 difference with the adjacent memory references, so that we do
424 not issue overlapping prefetches. Also we should employ some
425 heuristics to eliminate cases where prefetching evidently spoils
426 the code.
427 -- it should somehow cooperate with the loop optimizer prefetching
428
429 TODO:
430
431 There are other useful cases that could be handled by a similar mechanism,
432 for example:
433
434 for (i = 0; i < n; i++)
435 ...
436
437 transform to (for constant N):
438
439 if (n == N)
440 for (i = 0; i < N; i++)
441 ...
442 else
443 for (i = 0; i < n; i++)
444 ...
445 making unroller happy. Since this may grow the code significantly,
446 we would have to be very careful here. */
447
448 static bool
449 rtl_value_profile_transformations (void)
450 {
451 rtx insn, next;
452 int changed = false;
453
454 for (insn = get_insns (); insn; insn = next)
455 {
456 next = NEXT_INSN (insn);
457
458 if (!INSN_P (insn))
459 continue;
460
461 /* Scan for insn carrying a histogram. */
462 if (!find_reg_note (insn, REG_VALUE_PROFILE, 0))
463 continue;
464
465 /* Ignore cold areas -- we are growing a code. */
466 if (!maybe_hot_bb_p (BLOCK_FOR_INSN (insn)))
467 continue;
468
469 if (dump_file)
470 {
471 fprintf (dump_file, "Trying transformations on insn %d\n",
472 INSN_UID (insn));
473 print_rtl_single (dump_file, insn);
474 }
475
476 /* Transformations: */
477 if (flag_value_profile_transformations
478 && (mod_subtract_transform (insn)
479 || divmod_fixed_value_transform (insn)
480 || mod_pow2_value_transform (insn)))
481 changed = true;
482 #ifdef HAVE_prefetch
483 if (flag_speculative_prefetching
484 && speculative_prefetching_transform (insn))
485 changed = true;
486 #endif
487 }
488
489 if (changed)
490 {
491 commit_edge_insertions ();
492 allocate_reg_info (max_reg_num (), FALSE, FALSE);
493 }
494
495 return changed;
496 }
497
498 /* Generate code for transformation 1 (with MODE and OPERATION, operands OP1
499 and OP2, whose value is expected to be VALUE, result TARGET and
500 probability of taking the optimal path PROB). */
501 static rtx
502 gen_divmod_fixed_value (enum machine_mode mode, enum rtx_code operation,
503 rtx target, rtx op1, rtx op2, gcov_type value,
504 int prob)
505 {
506 rtx tmp, tmp1, jump;
507 rtx neq_label = gen_label_rtx ();
508 rtx end_label = gen_label_rtx ();
509 rtx sequence;
510
511 start_sequence ();
512
513 if (!REG_P (op2))
514 {
515 tmp = gen_reg_rtx (mode);
516 emit_move_insn (tmp, copy_rtx (op2));
517 }
518 else
519 tmp = op2;
520
521 do_compare_rtx_and_jump (tmp, GEN_INT (value), NE, 0, mode, NULL_RTX,
522 NULL_RTX, neq_label);
523
524 /* Add branch probability to jump we just created. */
525 jump = get_last_insn ();
526 REG_NOTES (jump) = gen_rtx_EXPR_LIST (REG_BR_PROB,
527 GEN_INT (REG_BR_PROB_BASE - prob),
528 REG_NOTES (jump));
529
530 tmp1 = simplify_gen_binary (operation, mode,
531 copy_rtx (op1), GEN_INT (value));
532 tmp1 = force_operand (tmp1, target);
533 if (tmp1 != target)
534 emit_move_insn (copy_rtx (target), copy_rtx (tmp1));
535
536 emit_jump_insn (gen_jump (end_label));
537 emit_barrier ();
538
539 emit_label (neq_label);
540 tmp1 = simplify_gen_binary (operation, mode,
541 copy_rtx (op1), copy_rtx (tmp));
542 tmp1 = force_operand (tmp1, target);
543 if (tmp1 != target)
544 emit_move_insn (copy_rtx (target), copy_rtx (tmp1));
545
546 emit_label (end_label);
547
548 sequence = get_insns ();
549 end_sequence ();
550 rebuild_jump_labels (sequence);
551 return sequence;
552 }
553
554 /* Do transform 1) on INSN if applicable. */
555 static bool
556 divmod_fixed_value_transform (rtx insn)
557 {
558 rtx set, set_src, set_dest, op1, op2, value, histogram;
559 enum rtx_code code;
560 enum machine_mode mode;
561 gcov_type val, count, all;
562 edge e;
563 int prob;
564
565 set = single_set (insn);
566 if (!set)
567 return false;
568
569 set_src = SET_SRC (set);
570 set_dest = SET_DEST (set);
571 code = GET_CODE (set_src);
572 mode = GET_MODE (set_dest);
573
574 if (code != DIV && code != MOD && code != UDIV && code != UMOD)
575 return false;
576 op1 = XEXP (set_src, false);
577 op2 = XEXP (set_src, 1);
578
579 for (histogram = REG_NOTES (insn);
580 histogram;
581 histogram = XEXP (histogram, 1))
582 if (REG_NOTE_KIND (histogram) == REG_VALUE_PROFILE
583 && XEXP (XEXP (histogram, 0), 0) == GEN_INT (HIST_TYPE_SINGLE_VALUE))
584 break;
585
586 if (!histogram)
587 return false;
588
589 histogram = XEXP (XEXP (histogram, 0), 1);
590 value = XEXP (histogram, 0);
591 histogram = XEXP (histogram, 1);
592 val = INTVAL (XEXP (histogram, 0));
593 histogram = XEXP (histogram, 1);
594 count = INTVAL (XEXP (histogram, 0));
595 histogram = XEXP (histogram, 1);
596 all = INTVAL (XEXP (histogram, 0));
597
598 /* We require that count is at least half of all; this means
599 that for the transformation to fire the value must be constant
600 at least 50% of time (and 75% gives the guarantee of usage). */
601 if (!rtx_equal_p (op2, value) || 2 * count < all)
602 return false;
603
604 if (dump_file)
605 fprintf (dump_file, "Div/mod by constant transformation on insn %d\n",
606 INSN_UID (insn));
607
608 /* Compute probability of taking the optimal path. */
609 prob = (count * REG_BR_PROB_BASE + all / 2) / all;
610
611 e = split_block (BLOCK_FOR_INSN (insn), PREV_INSN (insn));
612 delete_insn (insn);
613
614 insert_insn_on_edge (
615 gen_divmod_fixed_value (mode, code, set_dest,
616 op1, op2, val, prob), e);
617
618 return true;
619 }
620
621 /* Generate code for transformation 2 (with MODE and OPERATION, operands OP1
622 and OP2, result TARGET and probability of taking the optimal path PROB). */
623 static rtx
624 gen_mod_pow2 (enum machine_mode mode, enum rtx_code operation, rtx target,
625 rtx op1, rtx op2, int prob)
626 {
627 rtx tmp, tmp1, tmp2, tmp3, jump;
628 rtx neq_label = gen_label_rtx ();
629 rtx end_label = gen_label_rtx ();
630 rtx sequence;
631
632 start_sequence ();
633
634 if (!REG_P (op2))
635 {
636 tmp = gen_reg_rtx (mode);
637 emit_move_insn (tmp, copy_rtx (op2));
638 }
639 else
640 tmp = op2;
641
642 tmp1 = expand_simple_binop (mode, PLUS, tmp, constm1_rtx, NULL_RTX,
643 0, OPTAB_WIDEN);
644 tmp2 = expand_simple_binop (mode, AND, tmp, tmp1, NULL_RTX,
645 0, OPTAB_WIDEN);
646 do_compare_rtx_and_jump (tmp2, const0_rtx, NE, 0, mode, NULL_RTX,
647 NULL_RTX, neq_label);
648
649 /* Add branch probability to jump we just created. */
650 jump = get_last_insn ();
651 REG_NOTES (jump) = gen_rtx_EXPR_LIST (REG_BR_PROB,
652 GEN_INT (REG_BR_PROB_BASE - prob),
653 REG_NOTES (jump));
654
655 tmp3 = expand_simple_binop (mode, AND, op1, tmp1, target,
656 0, OPTAB_WIDEN);
657 if (tmp3 != target)
658 emit_move_insn (copy_rtx (target), tmp3);
659 emit_jump_insn (gen_jump (end_label));
660 emit_barrier ();
661
662 emit_label (neq_label);
663 tmp1 = simplify_gen_binary (operation, mode, copy_rtx (op1), copy_rtx (tmp));
664 tmp1 = force_operand (tmp1, target);
665 if (tmp1 != target)
666 emit_move_insn (target, tmp1);
667
668 emit_label (end_label);
669
670 sequence = get_insns ();
671 end_sequence ();
672 rebuild_jump_labels (sequence);
673 return sequence;
674 }
675
676 /* Do transform 2) on INSN if applicable. */
677 static bool
678 mod_pow2_value_transform (rtx insn)
679 {
680 rtx set, set_src, set_dest, op1, op2, value, histogram;
681 enum rtx_code code;
682 enum machine_mode mode;
683 gcov_type wrong_values, count;
684 edge e;
685 int i, all, prob;
686
687 set = single_set (insn);
688 if (!set)
689 return false;
690
691 set_src = SET_SRC (set);
692 set_dest = SET_DEST (set);
693 code = GET_CODE (set_src);
694 mode = GET_MODE (set_dest);
695
696 if (code != UMOD)
697 return false;
698 op1 = XEXP (set_src, 0);
699 op2 = XEXP (set_src, 1);
700
701 for (histogram = REG_NOTES (insn);
702 histogram;
703 histogram = XEXP (histogram, 1))
704 if (REG_NOTE_KIND (histogram) == REG_VALUE_PROFILE
705 && XEXP (XEXP (histogram, 0), 0) == GEN_INT (HIST_TYPE_POW2))
706 break;
707
708 if (!histogram)
709 return false;
710
711 histogram = XEXP (XEXP (histogram, 0), 1);
712 value = XEXP (histogram, 0);
713 histogram = XEXP (histogram, 1);
714 wrong_values =INTVAL (XEXP (histogram, 0));
715 histogram = XEXP (histogram, 1);
716
717 count = 0;
718 for (i = 0; i < GET_MODE_BITSIZE (mode); i++)
719 {
720 count += INTVAL (XEXP (histogram, 0));
721 histogram = XEXP (histogram, 1);
722 }
723
724 if (!rtx_equal_p (op2, value))
725 return false;
726
727 /* We require that we hit a power of two at least half of all evaluations. */
728 if (count < wrong_values)
729 return false;
730
731 if (dump_file)
732 fprintf (dump_file, "Mod power of 2 transformation on insn %d\n",
733 INSN_UID (insn));
734
735 /* Compute probability of taking the optimal path. */
736 all = count + wrong_values;
737 prob = (count * REG_BR_PROB_BASE + all / 2) / all;
738
739 e = split_block (BLOCK_FOR_INSN (insn), PREV_INSN (insn));
740 delete_insn (insn);
741
742 insert_insn_on_edge (
743 gen_mod_pow2 (mode, code, set_dest, op1, op2, prob), e);
744
745 return true;
746 }
747
748 /* Generate code for transformations 3 and 4 (with MODE and OPERATION,
749 operands OP1 and OP2, result TARGET, at most SUB subtractions, and
750 probability of taking the optimal path(s) PROB1 and PROB2). */
751 static rtx
752 gen_mod_subtract (enum machine_mode mode, enum rtx_code operation,
753 rtx target, rtx op1, rtx op2, int sub, int prob1, int prob2)
754 {
755 rtx tmp, tmp1, jump;
756 rtx end_label = gen_label_rtx ();
757 rtx sequence;
758 int i;
759
760 start_sequence ();
761
762 if (!REG_P (op2))
763 {
764 tmp = gen_reg_rtx (mode);
765 emit_move_insn (tmp, copy_rtx (op2));
766 }
767 else
768 tmp = op2;
769
770 emit_move_insn (target, copy_rtx (op1));
771 do_compare_rtx_and_jump (target, tmp, LTU, 0, mode, NULL_RTX,
772 NULL_RTX, end_label);
773
774 /* Add branch probability to jump we just created. */
775 jump = get_last_insn ();
776 REG_NOTES (jump) = gen_rtx_EXPR_LIST (REG_BR_PROB,
777 GEN_INT (prob1), REG_NOTES (jump));
778
779 for (i = 0; i < sub; i++)
780 {
781 tmp1 = expand_simple_binop (mode, MINUS, target, tmp, target,
782 0, OPTAB_WIDEN);
783 if (tmp1 != target)
784 emit_move_insn (target, tmp1);
785 do_compare_rtx_and_jump (target, tmp, LTU, 0, mode, NULL_RTX,
786 NULL_RTX, end_label);
787
788 /* Add branch probability to jump we just created. */
789 jump = get_last_insn ();
790 REG_NOTES (jump) = gen_rtx_EXPR_LIST (REG_BR_PROB,
791 GEN_INT (prob2), REG_NOTES (jump));
792 }
793
794 tmp1 = simplify_gen_binary (operation, mode, copy_rtx (target), copy_rtx (tmp));
795 tmp1 = force_operand (tmp1, target);
796 if (tmp1 != target)
797 emit_move_insn (target, tmp1);
798
799 emit_label (end_label);
800
801 sequence = get_insns ();
802 end_sequence ();
803 rebuild_jump_labels (sequence);
804 return sequence;
805 }
806
807 /* Do transforms 3) and 4) on INSN if applicable. */
808 static bool
809 mod_subtract_transform (rtx insn)
810 {
811 rtx set, set_src, set_dest, op1, op2, value, histogram;
812 enum rtx_code code;
813 enum machine_mode mode;
814 gcov_type wrong_values, counts[2], count, all;
815 edge e;
816 int i, prob1, prob2;
817
818 set = single_set (insn);
819 if (!set)
820 return false;
821
822 set_src = SET_SRC (set);
823 set_dest = SET_DEST (set);
824 code = GET_CODE (set_src);
825 mode = GET_MODE (set_dest);
826
827 if (code != UMOD)
828 return false;
829 op1 = XEXP (set_src, 0);
830 op2 = XEXP (set_src, 1);
831
832 for (histogram = REG_NOTES (insn);
833 histogram;
834 histogram = XEXP (histogram, 1))
835 if (REG_NOTE_KIND (histogram) == REG_VALUE_PROFILE
836 && XEXP (XEXP (histogram, 0), 0) == GEN_INT (HIST_TYPE_INTERVAL))
837 break;
838
839 if (!histogram)
840 return false;
841
842 histogram = XEXP (XEXP (histogram, 0), 1);
843 value = XEXP (histogram, 0);
844 histogram = XEXP (histogram, 1);
845
846 all = 0;
847 for (i = 0; i < 2; i++)
848 {
849 counts[i] = INTVAL (XEXP (histogram, 0));
850 all += counts[i];
851 histogram = XEXP (histogram, 1);
852 }
853 wrong_values = INTVAL (XEXP (histogram, 0));
854 histogram = XEXP (histogram, 1);
855 wrong_values += INTVAL (XEXP (histogram, 0));
856 all += wrong_values;
857
858 /* We require that we use just subtractions in at least 50% of all
859 evaluations. */
860 count = 0;
861 for (i = 0; i < 2; i++)
862 {
863 count += counts[i];
864 if (count * 2 >= all)
865 break;
866 }
867
868 if (i == 2)
869 return false;
870
871 if (dump_file)
872 fprintf (dump_file, "Mod subtract transformation on insn %d\n",
873 INSN_UID (insn));
874
875 /* Compute probability of taking the optimal path(s). */
876 prob1 = (counts[0] * REG_BR_PROB_BASE + all / 2) / all;
877 prob2 = (counts[1] * REG_BR_PROB_BASE + all / 2) / all;
878
879 e = split_block (BLOCK_FOR_INSN (insn), PREV_INSN (insn));
880 delete_insn (insn);
881
882 insert_insn_on_edge (
883 gen_mod_subtract (mode, code, set_dest,
884 op1, op2, i, prob1, prob2), e);
885
886 return true;
887 }
888
889 #ifdef HAVE_prefetch
890 /* Generate code for transformation 5 for mem with ADDRESS and a constant
891 step DELTA. WRITE is true if the reference is a store to mem. */
892
893 static rtx
894 gen_speculative_prefetch (rtx address, gcov_type delta, int write)
895 {
896 rtx tmp;
897 rtx sequence;
898
899 /* TODO: we do the prefetching for just one iteration ahead, which
900 often is not enough. */
901 start_sequence ();
902 if (offsettable_address_p (0, VOIDmode, address))
903 tmp = plus_constant (copy_rtx (address), delta);
904 else
905 {
906 tmp = simplify_gen_binary (PLUS, Pmode,
907 copy_rtx (address), GEN_INT (delta));
908 tmp = force_operand (tmp, NULL);
909 }
910 if (! (*insn_data[(int)CODE_FOR_prefetch].operand[0].predicate)
911 (tmp, insn_data[(int)CODE_FOR_prefetch].operand[0].mode))
912 tmp = force_reg (Pmode, tmp);
913 emit_insn (gen_prefetch (tmp, GEN_INT (write), GEN_INT (3)));
914 sequence = get_insns ();
915 end_sequence ();
916
917 return sequence;
918 }
919
920 /* Do transform 5) on INSN if applicable. */
921
922 static bool
923 speculative_prefetching_transform (rtx insn)
924 {
925 rtx histogram, value;
926 gcov_type val, count, all;
927 edge e;
928 rtx mem, address;
929 int write;
930
931 if (!maybe_hot_bb_p (BLOCK_FOR_INSN (insn)))
932 return false;
933
934 if (!find_mem_reference (insn, &mem, &write))
935 return false;
936
937 address = XEXP (mem, 0);
938 if (side_effects_p (address))
939 return false;
940
941 if (CONSTANT_P (address))
942 return false;
943
944 for (histogram = REG_NOTES (insn);
945 histogram;
946 histogram = XEXP (histogram, 1))
947 if (REG_NOTE_KIND (histogram) == REG_VALUE_PROFILE
948 && XEXP (XEXP (histogram, 0), 0) == GEN_INT (HIST_TYPE_CONST_DELTA))
949 break;
950
951 if (!histogram)
952 return false;
953
954 histogram = XEXP (XEXP (histogram, 0), 1);
955 value = XEXP (histogram, 0);
956 histogram = XEXP (histogram, 1);
957 /* Skip last value referenced. */
958 histogram = XEXP (histogram, 1);
959 val = INTVAL (XEXP (histogram, 0));
960 histogram = XEXP (histogram, 1);
961 count = INTVAL (XEXP (histogram, 0));
962 histogram = XEXP (histogram, 1);
963 all = INTVAL (XEXP (histogram, 0));
964
965 /* With that few executions we do not really have a reason to optimize the
966 statement, and more importantly, the data about differences of addresses
967 are spoiled by the first item that had no previous value to compare
968 with. */
969 if (all < 4)
970 return false;
971
972 /* We require that count is at least half of all; this means
973 that for the transformation to fire the value must be constant
974 at least 50% of time (and 75% gives the guarantee of usage). */
975 if (!rtx_equal_p (address, value) || 2 * count < all)
976 return false;
977
978 /* If the difference is too small, it does not make too much sense to
979 prefetch, as the memory is probably already in cache. */
980 if (val >= NOPREFETCH_RANGE_MIN && val <= NOPREFETCH_RANGE_MAX)
981 return false;
982
983 if (dump_file)
984 fprintf (dump_file, "Speculative prefetching for insn %d\n",
985 INSN_UID (insn));
986
987 e = split_block (BLOCK_FOR_INSN (insn), PREV_INSN (insn));
988
989 insert_insn_on_edge (gen_speculative_prefetch (address, val, write), e);
990
991 return true;
992 }
993 #endif /* HAVE_prefetch */
994 \f
995 /* Connection to the outside world. */
996 /* Struct for IR-dependent hooks. */
997 struct value_prof_hooks {
998 /* Find list of values for which we want to measure histograms. */
999 void (*find_values_to_profile) (histogram_values *);
1000
1001 /* Identify and exploit properties of values that are hard to analyze
1002 statically. See value-prof.c for more detail. */
1003 bool (*value_profile_transformations) (void);
1004 };
1005
1006 /* Hooks for RTL-based versions (the only ones that currently work). */
1007 static struct value_prof_hooks rtl_value_prof_hooks =
1008 {
1009 rtl_find_values_to_profile,
1010 rtl_value_profile_transformations
1011 };
1012
1013 void
1014 rtl_register_value_prof_hooks (void)
1015 {
1016 value_prof_hooks = &rtl_value_prof_hooks;
1017 if (ir_type ())
1018 abort ();
1019 }
1020 \f
1021 /* Tree-based versions are stubs for now. */
1022 static void
1023 tree_find_values_to_profile (histogram_values *values ATTRIBUTE_UNUSED)
1024 {
1025 abort ();
1026 }
1027
1028 static bool
1029 tree_value_profile_transformations (void)
1030 {
1031 abort ();
1032 }
1033
1034 static struct value_prof_hooks tree_value_prof_hooks = {
1035 tree_find_values_to_profile,
1036 tree_value_profile_transformations
1037 };
1038
1039 void
1040 tree_register_value_prof_hooks (void)
1041 {
1042 value_prof_hooks = &tree_value_prof_hooks;
1043 if (!ir_type ())
1044 abort ();
1045 }
1046 \f
1047 /* IR-independent entry points. */
1048 void
1049 find_values_to_profile (histogram_values *values)
1050 {
1051 (value_prof_hooks->find_values_to_profile) (values);
1052 }
1053
1054 bool
1055 value_profile_transformations (void)
1056 {
1057 return (value_prof_hooks->value_profile_transformations) ();
1058 }
1059