re PR rtl-optimization/17428 (internal compiler error: in spill_failure, at reload1...
[gcc.git] / gcc / value-prof.c
1 /* Transformations based on profile information for values.
2 Copyright (C) 2003, 2004 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 2, or (at your option) any later
9 version.
10
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
19 02111-1307, USA. */
20
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "rtl.h"
26 #include "expr.h"
27 #include "hard-reg-set.h"
28 #include "basic-block.h"
29 #include "value-prof.h"
30 #include "output.h"
31 #include "flags.h"
32 #include "insn-config.h"
33 #include "recog.h"
34 #include "optabs.h"
35 #include "regs.h"
36 #include "ggc.h"
37
38 static struct value_prof_hooks *value_prof_hooks;
39
40 /* In this file value profile based optimizations are placed. Currently the
41 following optimizations are implemented (for more detailed descriptions
42 see comments at value_profile_transformations):
43
44 1) Division/modulo specialization. Provided that we can determine that the
45 operands of the division have some special properties, we may use it to
46 produce more effective code.
47 2) Speculative prefetching. If we are able to determine that the difference
48 between addresses accessed by a memory reference is usually constant, we
49 may add the prefetch instructions.
50
51 Every such optimization should add its requirements for profiled values to
52 insn_values_to_profile function. This function is called from branch_prob
53 in profile.c and the requested values are instrumented by it in the first
54 compilation with -fprofile-arcs. The optimization may then read the
55 gathered data in the second compilation with -fbranch-probabilities.
56 The measured data is appended as REG_VALUE_PROFILE note to the instrumented
57 insn. The argument to the note consists of an EXPR_LIST where its
58 members have the following meaning (from the first to the last):
59
60 -- type of information gathered (HIST_TYPE*)
61 -- the expression that is profiled
62 -- list of counters starting from the first one. */
63
64 /* For speculative prefetching, the range in that we do not prefetch (because
65 we assume that it will be in cache anyway). The asymmetry between min and
66 max range is trying to reflect the fact that the sequential prefetching
67 of the data is commonly done directly by hardware. Nevertheless, these
68 values are just a guess and should of course be target-specific. */
69
70 #ifndef NOPREFETCH_RANGE_MIN
71 #define NOPREFETCH_RANGE_MIN (-16)
72 #endif
73 #ifndef NOPREFETCH_RANGE_MAX
74 #define NOPREFETCH_RANGE_MAX 32
75 #endif
76
77 static void insn_divmod_values_to_profile (rtx, histogram_values *);
78 #ifdef HAVE_prefetch
79 static bool insn_prefetch_values_to_profile (rtx, histogram_values *);
80 static int find_mem_reference_1 (rtx *, void *);
81 static void find_mem_reference_2 (rtx, rtx, void *);
82 static bool find_mem_reference (rtx, rtx *, int *);
83 #endif
84
85 static void insn_values_to_profile (rtx, histogram_values *);
86 static rtx gen_divmod_fixed_value (enum machine_mode, enum rtx_code, rtx, rtx,
87 rtx, gcov_type, int);
88 static rtx gen_mod_pow2 (enum machine_mode, enum rtx_code, rtx, rtx, rtx, int);
89 static rtx gen_mod_subtract (enum machine_mode, enum rtx_code, rtx, rtx, rtx,
90 int, int, int);
91 #ifdef HAVE_prefetch
92 static rtx gen_speculative_prefetch (rtx, gcov_type, int);
93 #endif
94 static bool divmod_fixed_value_transform (rtx insn);
95 static bool mod_pow2_value_transform (rtx);
96 static bool mod_subtract_transform (rtx);
97 #ifdef HAVE_prefetch
98 static bool speculative_prefetching_transform (rtx);
99 #endif
100 \f
101 /* Find values inside INSN for that we want to measure histograms for
102 division/modulo optimization and stores them to VALUES. */
103 static void
104 insn_divmod_values_to_profile (rtx insn, histogram_values *values)
105 {
106 rtx set, set_src, op1, op2;
107 enum machine_mode mode;
108 histogram_value hist;
109
110 if (!INSN_P (insn))
111 return;
112
113 set = single_set (insn);
114 if (!set)
115 return;
116
117 mode = GET_MODE (SET_DEST (set));
118 if (!INTEGRAL_MODE_P (mode))
119 return;
120
121 set_src = SET_SRC (set);
122 switch (GET_CODE (set_src))
123 {
124 case DIV:
125 case MOD:
126 case UDIV:
127 case UMOD:
128 op1 = XEXP (set_src, 0);
129 op2 = XEXP (set_src, 1);
130 if (side_effects_p (op2))
131 return;
132
133 /* Check for a special case where the divisor is power of 2. */
134 if ((GET_CODE (set_src) == UMOD) && !CONSTANT_P (op2))
135 {
136 hist = ggc_alloc (sizeof (*hist));
137 hist->value = op2;
138 hist->seq = NULL_RTX;
139 hist->mode = mode;
140 hist->insn = insn;
141 hist->type = HIST_TYPE_POW2;
142 hist->hdata.pow2.may_be_other = 1;
143 VEC_safe_push (histogram_value, *values, hist);
144 }
145
146 /* Check whether the divisor is not in fact a constant. */
147 if (!CONSTANT_P (op2))
148 {
149 hist = ggc_alloc (sizeof (*hist));
150 hist->value = op2;
151 hist->mode = mode;
152 hist->seq = NULL_RTX;
153 hist->insn = insn;
154 hist->type = HIST_TYPE_SINGLE_VALUE;
155 VEC_safe_push (histogram_value, *values, hist);
156 }
157
158 /* For mod, check whether it is not often a noop (or replaceable by
159 a few subtractions). */
160 if (GET_CODE (set_src) == UMOD && !side_effects_p (op1))
161 {
162 rtx tmp;
163
164 hist = ggc_alloc (sizeof (*hist));
165 start_sequence ();
166 tmp = simplify_gen_binary (DIV, mode, copy_rtx (op1), copy_rtx (op2));
167 hist->value = force_operand (tmp, NULL_RTX);
168 hist->seq = get_insns ();
169 end_sequence ();
170 hist->mode = mode;
171 hist->insn = insn;
172 hist->type = HIST_TYPE_INTERVAL;
173 hist->hdata.intvl.int_start = 0;
174 hist->hdata.intvl.steps = 2;
175 hist->hdata.intvl.may_be_less = 1;
176 hist->hdata.intvl.may_be_more = 1;
177 VEC_safe_push (histogram_value, *values, hist);
178 }
179 return;
180
181 default:
182 return;
183 }
184 }
185
186 #ifdef HAVE_prefetch
187
188 /* Called from find_mem_reference through for_each_rtx, finds a memory
189 reference. I.e. if *EXPR is a MEM, the reference to this MEM is stored
190 to *RET and the traversing of the expression is interrupted by returning 1.
191 Otherwise 0 is returned. */
192
193 static int
194 find_mem_reference_1 (rtx *expr, void *ret)
195 {
196 rtx *mem = ret;
197
198 if (GET_CODE (*expr) == MEM)
199 {
200 *mem = *expr;
201 return 1;
202 }
203 return 0;
204 }
205
206 /* Called form find_mem_reference through note_stores to find out whether
207 the memory reference MEM is a store. I.e. if EXPR == MEM, the variable
208 FMR2_WRITE is set to true. */
209
210 static int fmr2_write;
211 static void
212 find_mem_reference_2 (rtx expr, rtx pat ATTRIBUTE_UNUSED, void *mem)
213 {
214 if (expr == mem)
215 fmr2_write = true;
216 }
217
218 /* Find a memory reference inside INSN, return it in MEM. Set WRITE to true
219 if it is a write of the mem. Return false if no memory reference is found,
220 true otherwise. */
221
222 static bool
223 find_mem_reference (rtx insn, rtx *mem, int *write)
224 {
225 *mem = NULL_RTX;
226 for_each_rtx (&PATTERN (insn), find_mem_reference_1, mem);
227
228 if (!*mem)
229 return false;
230
231 fmr2_write = false;
232 note_stores (PATTERN (insn), find_mem_reference_2, *mem);
233 *write = fmr2_write;
234 return true;
235 }
236
237 /* Find values inside INSN for that we want to measure histograms for
238 a speculative prefetching. Add them to the list VALUES.
239 Returns true if such we found any such value, false otherwise. */
240
241 static bool
242 insn_prefetch_values_to_profile (rtx insn, histogram_values *values)
243 {
244 rtx mem, address;
245 int write;
246 histogram_value hist;
247
248 /* It only makes sense to look for memory references in ordinary insns. */
249 if (GET_CODE (insn) != INSN)
250 return false;
251
252 if (!find_mem_reference (insn, &mem, &write))
253 return false;
254
255 address = XEXP (mem, 0);
256 if (side_effects_p (address))
257 return false;
258
259 if (CONSTANT_P (address))
260 return false;
261
262 hist = ggc_alloc (sizeof (*hist));
263 hist->value = address;
264 hist->mode = GET_MODE (address);
265 hist->seq = NULL_RTX;
266 hist->insn = insn;
267 hist->type = HIST_TYPE_CONST_DELTA;
268 VEC_safe_push (histogram_value, *values, hist);
269
270 return true;
271 }
272 #endif
273 /* Find values inside INSN for that we want to measure histograms and adds
274 them to list VALUES (increasing the record of its length in N_VALUES). */
275 static void
276 insn_values_to_profile (rtx insn, histogram_values *values)
277 {
278 if (flag_value_profile_transformations)
279 insn_divmod_values_to_profile (insn, values);
280
281 #ifdef HAVE_prefetch
282 if (flag_speculative_prefetching)
283 insn_prefetch_values_to_profile (insn, values);
284 #endif
285 }
286
287 /* Find list of values for that we want to measure histograms. */
288 static void
289 rtl_find_values_to_profile (histogram_values *values)
290 {
291 rtx insn;
292 unsigned i, libcall_level;
293
294 life_analysis (NULL, PROP_DEATH_NOTES);
295
296 *values = VEC_alloc (histogram_value, 0);
297 libcall_level = 0;
298 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
299 {
300 if (find_reg_note (insn, REG_LIBCALL, NULL_RTX))
301 libcall_level++;
302
303 /* Do not instrument values inside libcalls (we are going to split block
304 due to instrumentation, and libcall blocks should be local to a single
305 basic block). */
306 if (!libcall_level)
307 insn_values_to_profile (insn, values);
308
309 if (find_reg_note (insn, REG_RETVAL, NULL_RTX))
310 {
311 gcc_assert (libcall_level > 0);
312 libcall_level--;
313 }
314 }
315 gcc_assert (libcall_level == 0);
316
317 for (i = 0; i < VEC_length (histogram_value, *values); i++)
318 {
319 histogram_value hist = VEC_index (histogram_value, *values, i);
320
321 switch (hist->type)
322 {
323 case HIST_TYPE_INTERVAL:
324 if (dump_file)
325 fprintf (dump_file,
326 "Interval counter for insn %d, range %d -- %d.\n",
327 INSN_UID ((rtx)hist->insn),
328 hist->hdata.intvl.int_start,
329 (hist->hdata.intvl.int_start
330 + hist->hdata.intvl.steps - 1));
331 hist->n_counters = hist->hdata.intvl.steps +
332 (hist->hdata.intvl.may_be_less ? 1 : 0) +
333 (hist->hdata.intvl.may_be_more ? 1 : 0);
334 break;
335
336 case HIST_TYPE_POW2:
337 if (dump_file)
338 fprintf (dump_file,
339 "Pow2 counter for insn %d.\n",
340 INSN_UID ((rtx)hist->insn));
341 hist->n_counters
342 = GET_MODE_BITSIZE (hist->mode)
343 + (hist->hdata.pow2.may_be_other ? 1 : 0);
344 break;
345
346 case HIST_TYPE_SINGLE_VALUE:
347 if (dump_file)
348 fprintf (dump_file,
349 "Single value counter for insn %d.\n",
350 INSN_UID ((rtx)hist->insn));
351 hist->n_counters = 3;
352 break;
353
354 case HIST_TYPE_CONST_DELTA:
355 if (dump_file)
356 fprintf (dump_file,
357 "Constant delta counter for insn %d.\n",
358 INSN_UID ((rtx)hist->insn));
359 hist->n_counters = 4;
360 break;
361
362 default:
363 abort ();
364 }
365 }
366 allocate_reg_info (max_reg_num (), FALSE, FALSE);
367 }
368
369 /* Main entry point. Finds REG_VALUE_PROFILE notes from profiler and uses
370 them to identify and exploit properties of values that are hard to analyze
371 statically.
372
373 We do following transformations:
374
375 1)
376
377 x = a / b;
378
379 where b is almost always a constant N is transformed to
380
381 if (b == N)
382 x = a / N;
383 else
384 x = a / b;
385
386 Analogically with %
387
388 2)
389
390 x = a % b
391
392 where b is almost always a power of 2 and the division is unsigned
393 TODO -- handle signed case as well
394
395 if ((b & (b - 1)) == 0)
396 x = a & (b - 1);
397 else
398 x = x % b;
399
400 Note that when b = 0, no error will occur and x = a; this is correct,
401 as result of such operation is undefined.
402
403 3)
404
405 x = a % b
406
407 where a is almost always less then b and the division is unsigned
408 TODO -- handle signed case as well
409
410 x = a;
411 if (x >= b)
412 x %= b;
413
414 4)
415
416 x = a % b
417
418 where a is almost always less then 2 * b and the division is unsigned
419 TODO -- handle signed case as well
420
421 x = a;
422 if (x >= b)
423 x -= b;
424 if (x >= b)
425 x %= b;
426
427 It would be possible to continue analogically for K * b for other small
428 K's, but it is probably not useful.
429
430 5)
431
432 Read or write of mem[address], where the value of address changes usually
433 by a constant C != 0 between the following accesses to the computation; with
434 -fspeculative-prefetching we then add a prefetch of address + C before
435 the insn. This handles prefetching of several interesting cases in addition
436 to a simple prefetching for addresses that are induction variables, e. g.
437 linked lists allocated sequentially (even in case they are processed
438 recursively).
439
440 TODO -- we should also check whether there is not (usually) a small
441 difference with the adjacent memory references, so that we do
442 not issue overlapping prefetches. Also we should employ some
443 heuristics to eliminate cases where prefetching evidently spoils
444 the code.
445 -- it should somehow cooperate with the loop optimizer prefetching
446
447 TODO:
448
449 There are other useful cases that could be handled by a similar mechanism,
450 for example:
451
452 for (i = 0; i < n; i++)
453 ...
454
455 transform to (for constant N):
456
457 if (n == N)
458 for (i = 0; i < N; i++)
459 ...
460 else
461 for (i = 0; i < n; i++)
462 ...
463 making unroller happy. Since this may grow the code significantly,
464 we would have to be very careful here. */
465
466 static bool
467 rtl_value_profile_transformations (void)
468 {
469 rtx insn, next;
470 int changed = false;
471
472 for (insn = get_insns (); insn; insn = next)
473 {
474 next = NEXT_INSN (insn);
475
476 if (!INSN_P (insn))
477 continue;
478
479 /* Scan for insn carrying a histogram. */
480 if (!find_reg_note (insn, REG_VALUE_PROFILE, 0))
481 continue;
482
483 /* Ignore cold areas -- we are growing a code. */
484 if (!maybe_hot_bb_p (BLOCK_FOR_INSN (insn)))
485 continue;
486
487 if (dump_file)
488 {
489 fprintf (dump_file, "Trying transformations on insn %d\n",
490 INSN_UID (insn));
491 print_rtl_single (dump_file, insn);
492 }
493
494 /* Transformations: */
495 if (flag_value_profile_transformations
496 && (mod_subtract_transform (insn)
497 || divmod_fixed_value_transform (insn)
498 || mod_pow2_value_transform (insn)))
499 changed = true;
500 #ifdef HAVE_prefetch
501 if (flag_speculative_prefetching
502 && speculative_prefetching_transform (insn))
503 changed = true;
504 #endif
505 }
506
507 if (changed)
508 {
509 commit_edge_insertions ();
510 allocate_reg_info (max_reg_num (), FALSE, FALSE);
511 }
512
513 return changed;
514 }
515
516 /* Generate code for transformation 1 (with MODE and OPERATION, operands OP1
517 and OP2, whose value is expected to be VALUE, result TARGET and
518 probability of taking the optimal path PROB). */
519 static rtx
520 gen_divmod_fixed_value (enum machine_mode mode, enum rtx_code operation,
521 rtx target, rtx op1, rtx op2, gcov_type value,
522 int prob)
523 {
524 rtx tmp, tmp1, jump;
525 rtx neq_label = gen_label_rtx ();
526 rtx end_label = gen_label_rtx ();
527 rtx sequence;
528
529 start_sequence ();
530
531 if (!REG_P (op2))
532 {
533 tmp = gen_reg_rtx (mode);
534 emit_move_insn (tmp, copy_rtx (op2));
535 }
536 else
537 tmp = op2;
538
539 do_compare_rtx_and_jump (tmp, GEN_INT (value), NE, 0, mode, NULL_RTX,
540 NULL_RTX, neq_label);
541
542 /* Add branch probability to jump we just created. */
543 jump = get_last_insn ();
544 REG_NOTES (jump) = gen_rtx_EXPR_LIST (REG_BR_PROB,
545 GEN_INT (REG_BR_PROB_BASE - prob),
546 REG_NOTES (jump));
547
548 tmp1 = simplify_gen_binary (operation, mode,
549 copy_rtx (op1), GEN_INT (value));
550 tmp1 = force_operand (tmp1, target);
551 if (tmp1 != target)
552 emit_move_insn (copy_rtx (target), copy_rtx (tmp1));
553
554 emit_jump_insn (gen_jump (end_label));
555 emit_barrier ();
556
557 emit_label (neq_label);
558 tmp1 = simplify_gen_binary (operation, mode,
559 copy_rtx (op1), copy_rtx (tmp));
560 tmp1 = force_operand (tmp1, target);
561 if (tmp1 != target)
562 emit_move_insn (copy_rtx (target), copy_rtx (tmp1));
563
564 emit_label (end_label);
565
566 sequence = get_insns ();
567 end_sequence ();
568 rebuild_jump_labels (sequence);
569 return sequence;
570 }
571
572 /* Do transform 1) on INSN if applicable. */
573 static bool
574 divmod_fixed_value_transform (rtx insn)
575 {
576 rtx set, set_src, set_dest, op1, op2, value, histogram;
577 enum rtx_code code;
578 enum machine_mode mode;
579 gcov_type val, count, all;
580 edge e;
581 int prob;
582
583 set = single_set (insn);
584 if (!set)
585 return false;
586
587 set_src = SET_SRC (set);
588 set_dest = SET_DEST (set);
589 code = GET_CODE (set_src);
590 mode = GET_MODE (set_dest);
591
592 if (code != DIV && code != MOD && code != UDIV && code != UMOD)
593 return false;
594 op1 = XEXP (set_src, false);
595 op2 = XEXP (set_src, 1);
596
597 for (histogram = REG_NOTES (insn);
598 histogram;
599 histogram = XEXP (histogram, 1))
600 if (REG_NOTE_KIND (histogram) == REG_VALUE_PROFILE
601 && XEXP (XEXP (histogram, 0), 0) == GEN_INT (HIST_TYPE_SINGLE_VALUE))
602 break;
603
604 if (!histogram)
605 return false;
606
607 histogram = XEXP (XEXP (histogram, 0), 1);
608 value = XEXP (histogram, 0);
609 histogram = XEXP (histogram, 1);
610 val = INTVAL (XEXP (histogram, 0));
611 histogram = XEXP (histogram, 1);
612 count = INTVAL (XEXP (histogram, 0));
613 histogram = XEXP (histogram, 1);
614 all = INTVAL (XEXP (histogram, 0));
615
616 /* We require that count be at least half of all; this means
617 that for the transformation to fire the value must be constant
618 at least 50% of time (and 75% gives the guarantee of usage). */
619 if (!rtx_equal_p (op2, value) || 2 * count < all)
620 return false;
621
622 if (dump_file)
623 fprintf (dump_file, "Div/mod by constant transformation on insn %d\n",
624 INSN_UID (insn));
625
626 /* Compute probability of taking the optimal path. */
627 prob = (count * REG_BR_PROB_BASE + all / 2) / all;
628
629 e = split_block (BLOCK_FOR_INSN (insn), PREV_INSN (insn));
630 delete_insn (insn);
631
632 insert_insn_on_edge (
633 gen_divmod_fixed_value (mode, code, set_dest,
634 op1, op2, val, prob), e);
635
636 return true;
637 }
638
639 /* Generate code for transformation 2 (with MODE and OPERATION, operands OP1
640 and OP2, result TARGET and probability of taking the optimal path PROB). */
641 static rtx
642 gen_mod_pow2 (enum machine_mode mode, enum rtx_code operation, rtx target,
643 rtx op1, rtx op2, int prob)
644 {
645 rtx tmp, tmp1, tmp2, tmp3, jump;
646 rtx neq_label = gen_label_rtx ();
647 rtx end_label = gen_label_rtx ();
648 rtx sequence;
649
650 start_sequence ();
651
652 if (!REG_P (op2))
653 {
654 tmp = gen_reg_rtx (mode);
655 emit_move_insn (tmp, copy_rtx (op2));
656 }
657 else
658 tmp = op2;
659
660 tmp1 = expand_simple_binop (mode, PLUS, tmp, constm1_rtx, NULL_RTX,
661 0, OPTAB_WIDEN);
662 tmp2 = expand_simple_binop (mode, AND, tmp, tmp1, NULL_RTX,
663 0, OPTAB_WIDEN);
664 do_compare_rtx_and_jump (tmp2, const0_rtx, NE, 0, mode, NULL_RTX,
665 NULL_RTX, neq_label);
666
667 /* Add branch probability to jump we just created. */
668 jump = get_last_insn ();
669 REG_NOTES (jump) = gen_rtx_EXPR_LIST (REG_BR_PROB,
670 GEN_INT (REG_BR_PROB_BASE - prob),
671 REG_NOTES (jump));
672
673 tmp3 = expand_simple_binop (mode, AND, op1, tmp1, target,
674 0, OPTAB_WIDEN);
675 if (tmp3 != target)
676 emit_move_insn (copy_rtx (target), tmp3);
677 emit_jump_insn (gen_jump (end_label));
678 emit_barrier ();
679
680 emit_label (neq_label);
681 tmp1 = simplify_gen_binary (operation, mode, copy_rtx (op1), copy_rtx (tmp));
682 tmp1 = force_operand (tmp1, target);
683 if (tmp1 != target)
684 emit_move_insn (target, tmp1);
685
686 emit_label (end_label);
687
688 sequence = get_insns ();
689 end_sequence ();
690 rebuild_jump_labels (sequence);
691 return sequence;
692 }
693
694 /* Do transform 2) on INSN if applicable. */
695 static bool
696 mod_pow2_value_transform (rtx insn)
697 {
698 rtx set, set_src, set_dest, op1, op2, value, histogram;
699 enum rtx_code code;
700 enum machine_mode mode;
701 gcov_type wrong_values, count;
702 edge e;
703 int i, all, prob;
704
705 set = single_set (insn);
706 if (!set)
707 return false;
708
709 set_src = SET_SRC (set);
710 set_dest = SET_DEST (set);
711 code = GET_CODE (set_src);
712 mode = GET_MODE (set_dest);
713
714 if (code != UMOD)
715 return false;
716 op1 = XEXP (set_src, 0);
717 op2 = XEXP (set_src, 1);
718
719 for (histogram = REG_NOTES (insn);
720 histogram;
721 histogram = XEXP (histogram, 1))
722 if (REG_NOTE_KIND (histogram) == REG_VALUE_PROFILE
723 && XEXP (XEXP (histogram, 0), 0) == GEN_INT (HIST_TYPE_POW2))
724 break;
725
726 if (!histogram)
727 return false;
728
729 histogram = XEXP (XEXP (histogram, 0), 1);
730 value = XEXP (histogram, 0);
731 histogram = XEXP (histogram, 1);
732 wrong_values =INTVAL (XEXP (histogram, 0));
733 histogram = XEXP (histogram, 1);
734
735 count = 0;
736 for (i = 0; i < GET_MODE_BITSIZE (mode); i++)
737 {
738 count += INTVAL (XEXP (histogram, 0));
739 histogram = XEXP (histogram, 1);
740 }
741
742 if (!rtx_equal_p (op2, value))
743 return false;
744
745 /* We require that we hit a power of two at least half of all evaluations. */
746 if (count < wrong_values)
747 return false;
748
749 if (dump_file)
750 fprintf (dump_file, "Mod power of 2 transformation on insn %d\n",
751 INSN_UID (insn));
752
753 /* Compute probability of taking the optimal path. */
754 all = count + wrong_values;
755 prob = (count * REG_BR_PROB_BASE + all / 2) / all;
756
757 e = split_block (BLOCK_FOR_INSN (insn), PREV_INSN (insn));
758 delete_insn (insn);
759
760 insert_insn_on_edge (
761 gen_mod_pow2 (mode, code, set_dest, op1, op2, prob), e);
762
763 return true;
764 }
765
766 /* Generate code for transformations 3 and 4 (with MODE and OPERATION,
767 operands OP1 and OP2, result TARGET, at most SUB subtractions, and
768 probability of taking the optimal path(s) PROB1 and PROB2). */
769 static rtx
770 gen_mod_subtract (enum machine_mode mode, enum rtx_code operation,
771 rtx target, rtx op1, rtx op2, int sub, int prob1, int prob2)
772 {
773 rtx tmp, tmp1, jump;
774 rtx end_label = gen_label_rtx ();
775 rtx sequence;
776 int i;
777
778 start_sequence ();
779
780 if (!REG_P (op2))
781 {
782 tmp = gen_reg_rtx (mode);
783 emit_move_insn (tmp, copy_rtx (op2));
784 }
785 else
786 tmp = op2;
787
788 emit_move_insn (target, copy_rtx (op1));
789 do_compare_rtx_and_jump (target, tmp, LTU, 0, mode, NULL_RTX,
790 NULL_RTX, end_label);
791
792 /* Add branch probability to jump we just created. */
793 jump = get_last_insn ();
794 REG_NOTES (jump) = gen_rtx_EXPR_LIST (REG_BR_PROB,
795 GEN_INT (prob1), REG_NOTES (jump));
796
797 for (i = 0; i < sub; i++)
798 {
799 tmp1 = expand_simple_binop (mode, MINUS, target, tmp, target,
800 0, OPTAB_WIDEN);
801 if (tmp1 != target)
802 emit_move_insn (target, tmp1);
803 do_compare_rtx_and_jump (target, tmp, LTU, 0, mode, NULL_RTX,
804 NULL_RTX, end_label);
805
806 /* Add branch probability to jump we just created. */
807 jump = get_last_insn ();
808 REG_NOTES (jump) = gen_rtx_EXPR_LIST (REG_BR_PROB,
809 GEN_INT (prob2), REG_NOTES (jump));
810 }
811
812 tmp1 = simplify_gen_binary (operation, mode, copy_rtx (target), copy_rtx (tmp));
813 tmp1 = force_operand (tmp1, target);
814 if (tmp1 != target)
815 emit_move_insn (target, tmp1);
816
817 emit_label (end_label);
818
819 sequence = get_insns ();
820 end_sequence ();
821 rebuild_jump_labels (sequence);
822 return sequence;
823 }
824
825 /* Do transforms 3) and 4) on INSN if applicable. */
826 static bool
827 mod_subtract_transform (rtx insn)
828 {
829 rtx set, set_src, set_dest, op1, op2, value, histogram;
830 enum rtx_code code;
831 enum machine_mode mode;
832 gcov_type wrong_values, counts[2], count, all;
833 edge e;
834 int i, prob1, prob2;
835
836 set = single_set (insn);
837 if (!set)
838 return false;
839
840 set_src = SET_SRC (set);
841 set_dest = SET_DEST (set);
842 code = GET_CODE (set_src);
843 mode = GET_MODE (set_dest);
844
845 if (code != UMOD)
846 return false;
847 op1 = XEXP (set_src, 0);
848 op2 = XEXP (set_src, 1);
849
850 for (histogram = REG_NOTES (insn);
851 histogram;
852 histogram = XEXP (histogram, 1))
853 if (REG_NOTE_KIND (histogram) == REG_VALUE_PROFILE
854 && XEXP (XEXP (histogram, 0), 0) == GEN_INT (HIST_TYPE_INTERVAL))
855 break;
856
857 if (!histogram)
858 return false;
859
860 histogram = XEXP (XEXP (histogram, 0), 1);
861 value = XEXP (histogram, 0);
862 histogram = XEXP (histogram, 1);
863
864 all = 0;
865 for (i = 0; i < 2; i++)
866 {
867 counts[i] = INTVAL (XEXP (histogram, 0));
868 all += counts[i];
869 histogram = XEXP (histogram, 1);
870 }
871 wrong_values = INTVAL (XEXP (histogram, 0));
872 histogram = XEXP (histogram, 1);
873 wrong_values += INTVAL (XEXP (histogram, 0));
874 all += wrong_values;
875
876 /* We require that we use just subtractions in at least 50% of all
877 evaluations. */
878 count = 0;
879 for (i = 0; i < 2; i++)
880 {
881 count += counts[i];
882 if (count * 2 >= all)
883 break;
884 }
885
886 if (i == 2)
887 return false;
888
889 if (dump_file)
890 fprintf (dump_file, "Mod subtract transformation on insn %d\n",
891 INSN_UID (insn));
892
893 /* Compute probability of taking the optimal path(s). */
894 prob1 = (counts[0] * REG_BR_PROB_BASE + all / 2) / all;
895 prob2 = (counts[1] * REG_BR_PROB_BASE + all / 2) / all;
896
897 e = split_block (BLOCK_FOR_INSN (insn), PREV_INSN (insn));
898 delete_insn (insn);
899
900 insert_insn_on_edge (
901 gen_mod_subtract (mode, code, set_dest,
902 op1, op2, i, prob1, prob2), e);
903
904 return true;
905 }
906
907 #ifdef HAVE_prefetch
908 /* Generate code for transformation 5 for mem with ADDRESS and a constant
909 step DELTA. WRITE is true if the reference is a store to mem. */
910
911 static rtx
912 gen_speculative_prefetch (rtx address, gcov_type delta, int write)
913 {
914 rtx tmp;
915 rtx sequence;
916
917 /* TODO: we do the prefetching for just one iteration ahead, which
918 often is not enough. */
919 start_sequence ();
920 if (offsettable_address_p (0, VOIDmode, address))
921 tmp = plus_constant (copy_rtx (address), delta);
922 else
923 {
924 tmp = simplify_gen_binary (PLUS, Pmode,
925 copy_rtx (address), GEN_INT (delta));
926 tmp = force_operand (tmp, NULL);
927 }
928 if (! (*insn_data[(int)CODE_FOR_prefetch].operand[0].predicate)
929 (tmp, insn_data[(int)CODE_FOR_prefetch].operand[0].mode))
930 tmp = force_reg (Pmode, tmp);
931 emit_insn (gen_prefetch (tmp, GEN_INT (write), GEN_INT (3)));
932 sequence = get_insns ();
933 end_sequence ();
934
935 return sequence;
936 }
937
938 /* Do transform 5) on INSN if applicable. */
939
940 static bool
941 speculative_prefetching_transform (rtx insn)
942 {
943 rtx histogram, value;
944 gcov_type val, count, all;
945 edge e;
946 rtx mem, address;
947 int write;
948
949 if (!maybe_hot_bb_p (BLOCK_FOR_INSN (insn)))
950 return false;
951
952 if (!find_mem_reference (insn, &mem, &write))
953 return false;
954
955 address = XEXP (mem, 0);
956 if (side_effects_p (address))
957 return false;
958
959 if (CONSTANT_P (address))
960 return false;
961
962 for (histogram = REG_NOTES (insn);
963 histogram;
964 histogram = XEXP (histogram, 1))
965 if (REG_NOTE_KIND (histogram) == REG_VALUE_PROFILE
966 && XEXP (XEXP (histogram, 0), 0) == GEN_INT (HIST_TYPE_CONST_DELTA))
967 break;
968
969 if (!histogram)
970 return false;
971
972 histogram = XEXP (XEXP (histogram, 0), 1);
973 value = XEXP (histogram, 0);
974 histogram = XEXP (histogram, 1);
975 /* Skip last value referenced. */
976 histogram = XEXP (histogram, 1);
977 val = INTVAL (XEXP (histogram, 0));
978 histogram = XEXP (histogram, 1);
979 count = INTVAL (XEXP (histogram, 0));
980 histogram = XEXP (histogram, 1);
981 all = INTVAL (XEXP (histogram, 0));
982
983 /* With that few executions we do not really have a reason to optimize the
984 statement, and more importantly, the data about differences of addresses
985 are spoiled by the first item that had no previous value to compare
986 with. */
987 if (all < 4)
988 return false;
989
990 /* We require that count be at least half of all; this means
991 that for the transformation to fire the value must be constant
992 at least 50% of time (and 75% gives the guarantee of usage). */
993 if (!rtx_equal_p (address, value) || 2 * count < all)
994 return false;
995
996 /* If the difference is too small, it does not make too much sense to
997 prefetch, as the memory is probably already in cache. */
998 if (val >= NOPREFETCH_RANGE_MIN && val <= NOPREFETCH_RANGE_MAX)
999 return false;
1000
1001 if (dump_file)
1002 fprintf (dump_file, "Speculative prefetching for insn %d\n",
1003 INSN_UID (insn));
1004
1005 e = split_block (BLOCK_FOR_INSN (insn), PREV_INSN (insn));
1006
1007 insert_insn_on_edge (gen_speculative_prefetch (address, val, write), e);
1008
1009 return true;
1010 }
1011 #endif /* HAVE_prefetch */
1012 \f
1013 /* Connection to the outside world. */
1014 /* Struct for IR-dependent hooks. */
1015 struct value_prof_hooks {
1016 /* Find list of values for which we want to measure histograms. */
1017 void (*find_values_to_profile) (histogram_values *);
1018
1019 /* Identify and exploit properties of values that are hard to analyze
1020 statically. See value-prof.c for more detail. */
1021 bool (*value_profile_transformations) (void);
1022 };
1023
1024 /* Hooks for RTL-based versions (the only ones that currently work). */
1025 static struct value_prof_hooks rtl_value_prof_hooks =
1026 {
1027 rtl_find_values_to_profile,
1028 rtl_value_profile_transformations
1029 };
1030
1031 void
1032 rtl_register_value_prof_hooks (void)
1033 {
1034 value_prof_hooks = &rtl_value_prof_hooks;
1035 if (ir_type ())
1036 abort ();
1037 }
1038 \f
1039 /* Tree-based versions are stubs for now. */
1040 static void
1041 tree_find_values_to_profile (histogram_values *values ATTRIBUTE_UNUSED)
1042 {
1043 abort ();
1044 }
1045
1046 static bool
1047 tree_value_profile_transformations (void)
1048 {
1049 abort ();
1050 }
1051
1052 static struct value_prof_hooks tree_value_prof_hooks = {
1053 tree_find_values_to_profile,
1054 tree_value_profile_transformations
1055 };
1056
1057 void
1058 tree_register_value_prof_hooks (void)
1059 {
1060 value_prof_hooks = &tree_value_prof_hooks;
1061 if (!ir_type ())
1062 abort ();
1063 }
1064 \f
1065 /* IR-independent entry points. */
1066 void
1067 find_values_to_profile (histogram_values *values)
1068 {
1069 (value_prof_hooks->find_values_to_profile) (values);
1070 }
1071
1072 bool
1073 value_profile_transformations (void)
1074 {
1075 return (value_prof_hooks->value_profile_transformations) ();
1076 }
1077