tree.h (PHI_CHAIN): New.
[gcc.git] / gcc / rtl-profile.c
1 /* Calculate branch probabilities, and basic block execution counts.
2 Copyright (C) 1990, 1991, 1992, 1993, 1994, 1996, 1997, 1998, 1999,
3 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
4 Contributed by James E. Wilson, UC Berkeley/Cygnus Support;
5 based on some ideas from Dain Samples of UC Berkeley.
6 Further mangling by Bob Manson, Cygnus Support.
7
8 This file is part of GCC.
9
10 GCC is free software; you can redistribute it and/or modify it under
11 the terms of the GNU General Public License as published by the Free
12 Software Foundation; either version 2, or (at your option) any later
13 version.
14
15 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
16 WARRANTY; without even the implied warranty of MERCHANTABILITY or
17 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with GCC; see the file COPYING. If not, write to the Free
22 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
23 02111-1307, USA. */
24
25 /* Generate basic block profile instrumentation and auxiliary files.
26 Profile generation is optimized, so that not all arcs in the basic
27 block graph need instrumenting. First, the BB graph is closed with
28 one entry (function start), and one exit (function exit). Any
29 ABNORMAL_EDGE cannot be instrumented (because there is no control
30 path to place the code). We close the graph by inserting fake
31 EDGE_FAKE edges to the EXIT_BLOCK, from the sources of abnormal
32 edges that do not go to the exit_block. We ignore such abnormal
33 edges. Naturally these fake edges are never directly traversed,
34 and so *cannot* be directly instrumented. Some other graph
35 massaging is done. To optimize the instrumentation we generate the
36 BB minimal span tree, only edges that are not on the span tree
37 (plus the entry point) need instrumenting. From that information
38 all other edge counts can be deduced. By construction all fake
39 edges must be on the spanning tree. We also attempt to place
40 EDGE_CRITICAL edges on the spanning tree.
41
42 The auxiliary file generated is <dumpbase>.bbg. The format is
43 described in full in gcov-io.h. */
44
45 /* ??? Register allocation should use basic block execution counts to
46 give preference to the most commonly executed blocks. */
47
48 /* ??? Should calculate branch probabilities before instrumenting code, since
49 then we can use arc counts to help decide which arcs to instrument. */
50
51 #include "config.h"
52 #include "system.h"
53 #include "coretypes.h"
54 #include "tm.h"
55 #include "rtl.h"
56 #include "flags.h"
57 #include "output.h"
58 #include "regs.h"
59 #include "expr.h"
60 #include "function.h"
61 #include "toplev.h"
62 #include "coverage.h"
63 #include "value-prof.h"
64 #include "tree.h"
65
66 /* Output instructions as RTL to increment the edge execution count. */
67
68 static void
69 rtl_gen_edge_profiler (int edgeno, edge e)
70 {
71 rtx ref = rtl_coverage_counter_ref (GCOV_COUNTER_ARCS, edgeno);
72 rtx tmp;
73 enum machine_mode mode = GET_MODE (ref);
74 rtx sequence;
75
76 start_sequence ();
77 ref = validize_mem (ref);
78
79 tmp = expand_simple_binop (mode, PLUS, ref, const1_rtx,
80 ref, 0, OPTAB_WIDEN);
81
82 if (tmp != ref)
83 emit_move_insn (copy_rtx (ref), tmp);
84
85 sequence = get_insns ();
86 end_sequence ();
87 insert_insn_on_edge (sequence, e);
88 rebuild_jump_labels (e->insns.r);
89 }
90
91 /* Output instructions as RTL to increment the interval histogram counter.
92 VALUE is the expression whose value is profiled. TAG is the tag of the
93 section for counters, BASE is offset of the counter position. */
94
95 static void
96 rtl_gen_interval_profiler (struct histogram_value *value, unsigned tag,
97 unsigned base)
98 {
99 unsigned gcov_size = tree_low_cst (TYPE_SIZE (GCOV_TYPE_NODE), 1);
100 enum machine_mode mode = mode_for_size (gcov_size, MODE_INT, 0);
101 rtx mem_ref, tmp, tmp1, mr, val;
102 rtx sequence;
103 rtx more_label = gen_label_rtx ();
104 rtx less_label = gen_label_rtx ();
105 rtx end_of_code_label = gen_label_rtx ();
106 int per_counter = gcov_size / BITS_PER_UNIT;
107 edge e = split_block (BLOCK_FOR_INSN ((rtx)value->insn),
108 PREV_INSN ((rtx)value->insn));
109
110 start_sequence ();
111
112 if (value->seq)
113 emit_insn (value->seq);
114
115 mr = gen_reg_rtx (Pmode);
116
117 tmp = rtl_coverage_counter_ref (tag, base);
118 tmp = force_reg (Pmode, XEXP (tmp, 0));
119
120 val = expand_simple_binop (value->mode, MINUS,
121 copy_rtx (value->value),
122 GEN_INT (value->hdata.intvl.int_start),
123 NULL_RTX, 0, OPTAB_WIDEN);
124
125 if (value->hdata.intvl.may_be_more)
126 do_compare_rtx_and_jump (copy_rtx (val), GEN_INT (value->hdata.intvl.steps),
127 GE, 0, value->mode, NULL_RTX, NULL_RTX, more_label);
128 if (value->hdata.intvl.may_be_less)
129 do_compare_rtx_and_jump (copy_rtx (val), const0_rtx, LT, 0, value->mode,
130 NULL_RTX, NULL_RTX, less_label);
131
132 /* We are in range. */
133 tmp1 = expand_simple_binop (value->mode, MULT,
134 copy_rtx (val), GEN_INT (per_counter),
135 NULL_RTX, 0, OPTAB_WIDEN);
136 tmp1 = expand_simple_binop (Pmode, PLUS, copy_rtx (tmp), tmp1, mr,
137 0, OPTAB_WIDEN);
138 if (tmp1 != mr)
139 emit_move_insn (copy_rtx (mr), tmp1);
140
141 if (value->hdata.intvl.may_be_more
142 || value->hdata.intvl.may_be_less)
143 {
144 emit_jump_insn (gen_jump (end_of_code_label));
145 emit_barrier ();
146 }
147
148 /* Above the interval. */
149 if (value->hdata.intvl.may_be_more)
150 {
151 emit_label (more_label);
152 tmp1 = expand_simple_binop (Pmode, PLUS, copy_rtx (tmp),
153 GEN_INT (per_counter * value->hdata.intvl.steps),
154 mr, 0, OPTAB_WIDEN);
155 if (tmp1 != mr)
156 emit_move_insn (copy_rtx (mr), tmp1);
157 if (value->hdata.intvl.may_be_less)
158 {
159 emit_jump_insn (gen_jump (end_of_code_label));
160 emit_barrier ();
161 }
162 }
163
164 /* Below the interval. */
165 if (value->hdata.intvl.may_be_less)
166 {
167 emit_label (less_label);
168 tmp1 = expand_simple_binop (Pmode, PLUS, copy_rtx (tmp),
169 GEN_INT (per_counter * (value->hdata.intvl.steps
170 + (value->hdata.intvl.may_be_more ? 1 : 0))),
171 mr, 0, OPTAB_WIDEN);
172 if (tmp1 != mr)
173 emit_move_insn (copy_rtx (mr), tmp1);
174 }
175
176 if (value->hdata.intvl.may_be_more
177 || value->hdata.intvl.may_be_less)
178 emit_label (end_of_code_label);
179
180 mem_ref = validize_mem (gen_rtx_MEM (mode, mr));
181
182 tmp = expand_simple_binop (mode, PLUS, copy_rtx (mem_ref), const1_rtx,
183 mem_ref, 0, OPTAB_WIDEN);
184
185 if (tmp != mem_ref)
186 emit_move_insn (copy_rtx (mem_ref), tmp);
187
188 sequence = get_insns ();
189 end_sequence ();
190 rebuild_jump_labels (sequence);
191 safe_insert_insn_on_edge (sequence, e);
192 }
193
194 /* Output instructions as RTL to increment the power of two histogram counter.
195 VALUE is the expression whose value is profiled. TAG is the tag of the
196 section for counters, BASE is offset of the counter position. */
197
198 static void
199 rtl_gen_pow2_profiler (struct histogram_value *value, unsigned tag,
200 unsigned base)
201 {
202 unsigned gcov_size = tree_low_cst (TYPE_SIZE (GCOV_TYPE_NODE), 1);
203 enum machine_mode mode = mode_for_size (gcov_size, MODE_INT, 0);
204 rtx mem_ref, tmp, mr, uval;
205 rtx sequence;
206 rtx end_of_code_label = gen_label_rtx ();
207 rtx loop_label = gen_label_rtx ();
208 int per_counter = gcov_size / BITS_PER_UNIT;
209 edge e = split_block (BLOCK_FOR_INSN ((rtx)value->insn),
210 PREV_INSN ((rtx)value->insn));
211
212 start_sequence ();
213
214 if (value->seq)
215 emit_insn (value->seq);
216
217 mr = gen_reg_rtx (Pmode);
218 tmp = rtl_coverage_counter_ref (tag, base);
219 tmp = force_reg (Pmode, XEXP (tmp, 0));
220 emit_move_insn (mr, tmp);
221
222 uval = gen_reg_rtx (value->mode);
223 emit_move_insn (uval, copy_rtx (value->value));
224
225 /* Check for non-power of 2. */
226 if (value->hdata.pow2.may_be_other)
227 {
228 do_compare_rtx_and_jump (copy_rtx (uval), const0_rtx, LE, 0, value->mode,
229 NULL_RTX, NULL_RTX, end_of_code_label);
230 tmp = expand_simple_binop (value->mode, PLUS, copy_rtx (uval),
231 constm1_rtx, NULL_RTX, 0, OPTAB_WIDEN);
232 tmp = expand_simple_binop (value->mode, AND, copy_rtx (uval), tmp,
233 NULL_RTX, 0, OPTAB_WIDEN);
234 do_compare_rtx_and_jump (tmp, const0_rtx, NE, 0, value->mode, NULL_RTX,
235 NULL_RTX, end_of_code_label);
236 }
237
238 /* Count log_2(value). */
239 emit_label (loop_label);
240
241 tmp = expand_simple_binop (Pmode, PLUS, copy_rtx (mr), GEN_INT (per_counter), mr, 0, OPTAB_WIDEN);
242 if (tmp != mr)
243 emit_move_insn (copy_rtx (mr), tmp);
244
245 tmp = expand_simple_binop (value->mode, ASHIFTRT, copy_rtx (uval), const1_rtx,
246 uval, 0, OPTAB_WIDEN);
247 if (tmp != uval)
248 emit_move_insn (copy_rtx (uval), tmp);
249
250 do_compare_rtx_and_jump (copy_rtx (uval), const0_rtx, NE, 0, value->mode,
251 NULL_RTX, NULL_RTX, loop_label);
252
253 /* Increase the counter. */
254 emit_label (end_of_code_label);
255
256 mem_ref = validize_mem (gen_rtx_MEM (mode, mr));
257
258 tmp = expand_simple_binop (mode, PLUS, copy_rtx (mem_ref), const1_rtx,
259 mem_ref, 0, OPTAB_WIDEN);
260
261 if (tmp != mem_ref)
262 emit_move_insn (copy_rtx (mem_ref), tmp);
263
264 sequence = get_insns ();
265 end_sequence ();
266 rebuild_jump_labels (sequence);
267 safe_insert_insn_on_edge (sequence, e);
268 }
269
270 /* Output instructions as RTL for code to find the most common value.
271 VALUE is the expression whose value is profiled. TAG is the tag of the
272 section for counters, BASE is offset of the counter position. */
273
274 static rtx
275 rtl_gen_one_value_profiler_no_edge_manipulation (struct histogram_value *value,
276 unsigned tag, unsigned base)
277 {
278 unsigned gcov_size = tree_low_cst (TYPE_SIZE (GCOV_TYPE_NODE), 1);
279 enum machine_mode mode = mode_for_size (gcov_size, MODE_INT, 0);
280 rtx stored_value_ref, counter_ref, all_ref, stored_value, counter, all;
281 rtx tmp, uval;
282 rtx sequence;
283 rtx same_label = gen_label_rtx ();
284 rtx zero_label = gen_label_rtx ();
285 rtx end_of_code_label = gen_label_rtx ();
286
287 start_sequence ();
288
289 if (value->seq)
290 emit_insn (value->seq);
291
292 stored_value_ref = rtl_coverage_counter_ref (tag, base);
293 counter_ref = rtl_coverage_counter_ref (tag, base + 1);
294 all_ref = rtl_coverage_counter_ref (tag, base + 2);
295 stored_value = validize_mem (stored_value_ref);
296 counter = validize_mem (counter_ref);
297 all = validize_mem (all_ref);
298
299 uval = gen_reg_rtx (mode);
300 convert_move (uval, copy_rtx (value->value), 0);
301
302 /* Check if the stored value matches. */
303 do_compare_rtx_and_jump (copy_rtx (uval), copy_rtx (stored_value), EQ,
304 0, mode, NULL_RTX, NULL_RTX, same_label);
305
306 /* Does not match; check whether the counter is zero. */
307 do_compare_rtx_and_jump (copy_rtx (counter), const0_rtx, EQ, 0, mode,
308 NULL_RTX, NULL_RTX, zero_label);
309
310 /* The counter is not zero yet. */
311 tmp = expand_simple_binop (mode, PLUS, copy_rtx (counter), constm1_rtx,
312 counter, 0, OPTAB_WIDEN);
313
314 if (tmp != counter)
315 emit_move_insn (copy_rtx (counter), tmp);
316
317 emit_jump_insn (gen_jump (end_of_code_label));
318 emit_barrier ();
319
320 emit_label (zero_label);
321 /* Set new value. */
322 emit_move_insn (copy_rtx (stored_value), copy_rtx (uval));
323
324 emit_label (same_label);
325 /* Increase the counter. */
326 tmp = expand_simple_binop (mode, PLUS, copy_rtx (counter), const1_rtx,
327 counter, 0, OPTAB_WIDEN);
328
329 if (tmp != counter)
330 emit_move_insn (copy_rtx (counter), tmp);
331
332 emit_label (end_of_code_label);
333
334 /* Increase the counter of all executions; this seems redundant given
335 that ve have counts for edges in cfg, but it may happen that some
336 optimization will change the counts for the block (either because
337 it is unable to update them correctly, or because it will duplicate
338 the block or its part). */
339 tmp = expand_simple_binop (mode, PLUS, copy_rtx (all), const1_rtx,
340 all, 0, OPTAB_WIDEN);
341
342 if (tmp != all)
343 emit_move_insn (copy_rtx (all), tmp);
344 sequence = get_insns ();
345 end_sequence ();
346 return sequence;
347 }
348
349 /* Output instructions as RTL for code to find the most common value.
350 VALUE is the expression whose value is profiled. TAG is the tag of the
351 section for counters, BASE is offset of the counter position. */
352
353 static void
354 rtl_gen_one_value_profiler (struct histogram_value *value, unsigned tag,
355 unsigned base)
356 {
357 edge e = split_block (BLOCK_FOR_INSN ((rtx)value->insn),
358 PREV_INSN ((rtx)value->insn));
359 rtx sequence = rtl_gen_one_value_profiler_no_edge_manipulation (value,
360 tag, base);
361 rebuild_jump_labels (sequence);
362 safe_insert_insn_on_edge (sequence, e);
363 }
364
365 /* Output instructions as RTL for code to find the most common value of
366 a difference between two evaluations of an expression.
367 VALUE is the expression whose value is profiled. TAG is the tag of the
368 section for counters, BASE is offset of the counter position. */
369
370 static void
371 rtl_gen_const_delta_profiler (struct histogram_value *value, unsigned tag,
372 unsigned base)
373 {
374 struct histogram_value one_value_delta;
375 unsigned gcov_size = tree_low_cst (TYPE_SIZE (GCOV_TYPE_NODE), 1);
376 enum machine_mode mode = mode_for_size (gcov_size, MODE_INT, 0);
377 rtx stored_value_ref, stored_value, tmp, uval;
378 rtx sequence;
379 edge e = split_block (BLOCK_FOR_INSN ((rtx)value->insn),
380 PREV_INSN ((rtx)value->insn));
381
382 start_sequence ();
383
384 if (value->seq)
385 emit_insn (value->seq);
386
387 stored_value_ref = rtl_coverage_counter_ref (tag, base);
388 stored_value = validize_mem (stored_value_ref);
389
390 uval = gen_reg_rtx (mode);
391 convert_move (uval, copy_rtx (value->value), 0);
392 tmp = expand_simple_binop (mode, MINUS,
393 copy_rtx (uval), copy_rtx (stored_value),
394 NULL_RTX, 0, OPTAB_WIDEN);
395
396 one_value_delta.value = tmp;
397 one_value_delta.mode = mode;
398 one_value_delta.seq = NULL_RTX;
399 one_value_delta.insn = value->insn;
400 one_value_delta.type = HIST_TYPE_SINGLE_VALUE;
401 emit_insn (rtl_gen_one_value_profiler_no_edge_manipulation (&one_value_delta,
402 tag, base + 1));
403 emit_move_insn (copy_rtx (stored_value), uval);
404 sequence = get_insns ();
405 end_sequence ();
406 rebuild_jump_labels (sequence);
407 safe_insert_insn_on_edge (sequence, e);
408 }
409
410 /* Return the file on which profile dump output goes, if any. */
411
412 static FILE *rtl_profile_dump_file (void) {
413 return dump_file;
414 }
415 \f
416 struct profile_hooks rtl_profile_hooks =
417 {
418 rtl_gen_edge_profiler,
419 rtl_gen_interval_profiler,
420 rtl_gen_pow2_profiler,
421 rtl_gen_one_value_profiler,
422 rtl_gen_const_delta_profiler,
423 rtl_profile_dump_file
424 };