doloop.c: Removed.
[gcc.git] / gcc / loop-doloop.c
1 /* Perform doloop optimizations
2 Copyright (C) 2004 Free Software Foundation, Inc.
3 Based on code by Michael P. Hayes (m.hayes@elec.canterbury.ac.nz)
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
20 02111-1307, USA. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "rtl.h"
27 #include "flags.h"
28 #include "expr.h"
29 #include "hard-reg-set.h"
30 #include "basic-block.h"
31 #include "toplev.h"
32 #include "tm_p.h"
33 #include "cfgloop.h"
34 #include "output.h"
35 #include "params.h"
36
37 /* This module is used to modify loops with a determinable number of
38 iterations to use special low-overhead looping instructions.
39
40 It first validates whether the loop is well behaved and has a
41 determinable number of iterations (either at compile or run-time).
42 It then modifies the loop to use a low-overhead looping pattern as
43 follows:
44
45 1. A pseudo register is allocated as the loop iteration counter.
46
47 2. The number of loop iterations is calculated and is stored
48 in the loop counter.
49
50 3. At the end of the loop, the jump insn is replaced by the
51 doloop_end pattern. The compare must remain because it might be
52 used elsewhere. If the loop-variable or condition register are
53 used elsewhere, they will be eliminated by flow.
54
55 4. An optional doloop_begin pattern is inserted at the top of the
56 loop.
57
58 TODO The optimization should only performed when either the biv used for exit
59 condition is unused at all except for the exit test, or if we do not have to
60 change its value, since otherwise we have to add a new induction variable,
61 which usually will not pay up (unless the cost of the doloop pattern is
62 somehow extremely lower than the cost of compare & jump, or unless the bct
63 register cannot be used for anything else but doloop -- ??? detect these
64 cases). */
65
66 #ifdef HAVE_doloop_end
67
68 /* Return the loop termination condition for PATTERN or zero
69 if it is not a decrement and branch jump insn. */
70
71 static rtx
72 doloop_condition_get (rtx pattern)
73 {
74 rtx cmp;
75 rtx inc;
76 rtx reg;
77 rtx condition;
78
79 /* The canonical doloop pattern we expect is:
80
81 (parallel [(set (pc) (if_then_else (condition)
82 (label_ref (label))
83 (pc)))
84 (set (reg) (plus (reg) (const_int -1)))
85 (additional clobbers and uses)])
86
87 Some machines (IA-64) make the decrement conditional on
88 the condition as well, so we don't bother verifying the
89 actual decrement. In summary, the branch must be the
90 first entry of the parallel (also required by jump.c),
91 and the second entry of the parallel must be a set of
92 the loop counter register. */
93
94 if (GET_CODE (pattern) != PARALLEL)
95 return 0;
96
97 cmp = XVECEXP (pattern, 0, 0);
98 inc = XVECEXP (pattern, 0, 1);
99
100 /* Check for (set (reg) (something)). */
101 if (GET_CODE (inc) != SET || ! REG_P (SET_DEST (inc)))
102 return 0;
103
104 /* Extract loop counter register. */
105 reg = SET_DEST (inc);
106
107 /* Check for (set (pc) (if_then_else (condition)
108 (label_ref (label))
109 (pc))). */
110 if (GET_CODE (cmp) != SET
111 || SET_DEST (cmp) != pc_rtx
112 || GET_CODE (SET_SRC (cmp)) != IF_THEN_ELSE
113 || GET_CODE (XEXP (SET_SRC (cmp), 1)) != LABEL_REF
114 || XEXP (SET_SRC (cmp), 2) != pc_rtx)
115 return 0;
116
117 /* Extract loop termination condition. */
118 condition = XEXP (SET_SRC (cmp), 0);
119
120 if ((GET_CODE (condition) != GE && GET_CODE (condition) != NE)
121 || GET_CODE (XEXP (condition, 1)) != CONST_INT)
122 return 0;
123
124 if (XEXP (condition, 0) == reg)
125 return condition;
126
127 if (GET_CODE (XEXP (condition, 0)) == PLUS
128 && XEXP (XEXP (condition, 0), 0) == reg)
129 return condition;
130
131 /* ??? If a machine uses a funny comparison, we could return a
132 canonicalised form here. */
133
134 return 0;
135 }
136
137 /* Return nonzero if the loop specified by LOOP is suitable for
138 the use of special low-overhead looping instructions. DESC
139 describes the number of iterations of the loop. */
140
141 static bool
142 doloop_valid_p (struct loop *loop, struct niter_desc *desc)
143 {
144 basic_block *body = get_loop_body (loop), bb;
145 rtx insn;
146 unsigned i;
147
148 /* Check for loops that may not terminate under special conditions. */
149 if (!desc->simple_p
150 || desc->assumptions
151 || desc->infinite)
152 {
153 /* There are some cases that would require a special attention.
154 For example if the comparison is LEU and the comparison value
155 is UINT_MAX then the loop will not terminate. Similarly, if the
156 comparison code is GEU and the comparison value is 0, the
157 loop will not terminate.
158
159 If the absolute increment is not 1, the loop can be infinite
160 even with LTU/GTU, e.g. for (i = 3; i > 0; i -= 2)
161
162 ??? We could compute these conditions at run-time and have a
163 additional jump around the loop to ensure an infinite loop.
164 However, it is very unlikely that this is the intended
165 behavior of the loop and checking for these rare boundary
166 conditions would pessimize all other code.
167
168 If the loop is executed only a few times an extra check to
169 restart the loop could use up most of the benefits of using a
170 count register loop. Note however, that normally, this
171 restart branch would never execute, so it could be predicted
172 well by the CPU. We should generate the pessimistic code by
173 default, and have an option, e.g. -funsafe-loops that would
174 enable count-register loops in this case. */
175 if (dump_file)
176 fprintf (dump_file, "Doloop: Possible infinite iteration case.\n");
177 return false;
178 }
179
180 for (i = 0; i < loop->num_nodes; i++)
181 {
182 bb = body[i];
183
184 for (insn = BB_HEAD (bb);
185 insn != NEXT_INSN (BB_END (bb));
186 insn = NEXT_INSN (insn))
187 {
188 /* A called function may clobber any special registers required for
189 low-overhead looping. */
190 if (GET_CODE (insn) == CALL_INSN)
191 {
192 if (dump_file)
193 fprintf (dump_file, "Doloop: Function call in loop.\n");
194 return false;
195 }
196
197 /* Some targets (eg, PPC) use the count register for branch on table
198 instructions. ??? This should be a target specific check. */
199 if (GET_CODE (insn) == JUMP_INSN
200 && (GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
201 || GET_CODE (PATTERN (insn)) == ADDR_VEC))
202 {
203 if (dump_file)
204 fprintf (dump_file, "Doloop: Computed branch in the loop.\n");
205 return false;
206 }
207 }
208 }
209 free (body);
210
211 return true;
212 }
213
214 /* Adds test of COND jumping to DEST to the end of BB. */
215
216 static void
217 add_test (rtx cond, basic_block bb, basic_block dest)
218 {
219 rtx seq, jump, label;
220 enum machine_mode mode;
221 rtx op0 = XEXP (cond, 0), op1 = XEXP (cond, 1);
222 enum rtx_code code = GET_CODE (cond);
223
224 mode = GET_MODE (XEXP (cond, 0));
225 if (mode == VOIDmode)
226 mode = GET_MODE (XEXP (cond, 1));
227
228 start_sequence ();
229 op0 = force_operand (op0, NULL_RTX);
230 op1 = force_operand (op1, NULL_RTX);
231 label = block_label (dest);
232 do_compare_rtx_and_jump (op0, op1, code, 0, mode, NULL_RTX, NULL_RTX, label);
233
234 jump = get_last_insn ();
235 JUMP_LABEL (jump) = label;
236
237 /* The jump is supposed to handle an unlikely special case. */
238 REG_NOTES (jump)
239 = gen_rtx_EXPR_LIST (REG_BR_PROB,
240 GEN_INT (0), REG_NOTES (jump));
241
242 LABEL_NUSES (label)++;
243
244 seq = get_insns ();
245 end_sequence ();
246 emit_insn_after (seq, BB_END (bb));
247 }
248
249 /* Modify the loop to use the low-overhead looping insn where LOOP
250 describes the loop, DESC describes the number of iterations of the
251 loop, and DOLOOP_INSN is the low-overhead looping insn to emit at the
252 end of the loop. CONDITION is the condition separated from the
253 DOLOOP_SEQ. */
254
255 static void
256 doloop_modify (struct loop *loop, struct niter_desc *desc,
257 rtx doloop_seq, rtx condition)
258 {
259 rtx counter_reg;
260 rtx count, tmp, noloop = NULL_RTX;
261 rtx sequence;
262 rtx jump_insn;
263 rtx jump_label;
264 int nonneg = 0, irr;
265 bool increment_count;
266 basic_block loop_end = desc->out_edge->src;
267
268 jump_insn = BB_END (loop_end);
269
270 if (dump_file)
271 {
272 fprintf (dump_file, "Doloop: Inserting doloop pattern (");
273 if (desc->const_iter)
274 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, desc->niter);
275 else
276 fputs ("runtime", dump_file);
277 fputs (" iterations).\n", dump_file);
278 }
279
280 /* Discard original jump to continue loop. The original compare
281 result may still be live, so it cannot be discarded explicitly. */
282 delete_insn (jump_insn);
283
284 counter_reg = XEXP (condition, 0);
285 if (GET_CODE (counter_reg) == PLUS)
286 counter_reg = XEXP (counter_reg, 0);
287
288 count = desc->niter_expr;
289 increment_count = false;
290 switch (GET_CODE (condition))
291 {
292 case NE:
293 /* Currently only NE tests against zero and one are supported. */
294 if (XEXP (condition, 1) == const1_rtx)
295 {
296 increment_count = true;
297 noloop = const1_rtx;
298 }
299 else if (XEXP (condition, 1) == const0_rtx)
300 noloop = const0_rtx;
301 else
302 abort ();
303 break;
304
305 case GE:
306 /* Currently only GE tests against zero are supported. */
307 if (XEXP (condition, 1) != const0_rtx)
308 abort ();
309
310 noloop = constm1_rtx;
311
312 /* The iteration count does not need incrementing for a GE test. */
313 increment_count = false;
314
315 /* Determine if the iteration counter will be non-negative.
316 Note that the maximum value loaded is iterations_max - 1. */
317 if (desc->niter_max
318 <= ((unsigned HOST_WIDEST_INT) 1
319 << (GET_MODE_BITSIZE (GET_MODE (counter_reg)) - 1)))
320 nonneg = 1;
321 break;
322
323 /* Abort if an invalid doloop pattern has been generated. */
324 default:
325 abort ();
326 }
327
328 if (increment_count)
329 count = simplify_gen_binary (PLUS, desc->mode, count, const1_rtx);
330
331 /* Insert initialization of the count register into the loop header. */
332 start_sequence ();
333 tmp = force_operand (count, counter_reg);
334 convert_move (counter_reg, tmp, 1);
335 sequence = get_insns ();
336 end_sequence ();
337 emit_insn_after (sequence, BB_END (loop_preheader_edge (loop)->src));
338
339 if (desc->noloop_assumptions)
340 {
341 rtx ass = desc->noloop_assumptions;
342 basic_block preheader = loop_preheader_edge (loop)->src;
343 basic_block set_zero
344 = loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX);
345 basic_block new_preheader
346 = loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX);
347 basic_block bb;
348 edge te;
349 gcov_type cnt;
350
351 /* Expand the condition testing the assumptions and if it does not pass,
352 reset the count register to 0. */
353 add_test (XEXP (ass, 0), preheader, set_zero);
354 preheader->succ->flags &= ~EDGE_FALLTHRU;
355 cnt = preheader->succ->count;
356 preheader->succ->probability = 0;
357 preheader->succ->count = 0;
358 irr = preheader->succ->flags & EDGE_IRREDUCIBLE_LOOP;
359 te = make_edge (preheader, new_preheader, EDGE_FALLTHRU | irr);
360 te->probability = REG_BR_PROB_BASE;
361 te->count = cnt;
362 set_immediate_dominator (CDI_DOMINATORS, new_preheader, preheader);
363
364 set_zero->count = 0;
365 set_zero->frequency = 0;
366
367 for (ass = XEXP (ass, 1); ass; ass = XEXP (ass, 1))
368 {
369 bb = loop_split_edge_with (te, NULL_RTX);
370 te = bb->succ;
371 add_test (XEXP (ass, 0), bb, set_zero);
372 make_edge (bb, set_zero, irr);
373 }
374
375 start_sequence ();
376 convert_move (counter_reg, noloop, 0);
377 sequence = get_insns ();
378 end_sequence ();
379 emit_insn_after (sequence, BB_END (set_zero));
380 }
381
382 /* Some targets (eg, C4x) need to initialize special looping
383 registers. */
384 #ifdef HAVE_doloop_begin
385 {
386 rtx init;
387 unsigned level = get_loop_level (loop) + 1;
388 init = gen_doloop_begin (counter_reg,
389 desc->const_iter ? desc->niter_expr : const0_rtx,
390 desc->niter_max,
391 GEN_INT (level));
392 if (init)
393 {
394 start_sequence ();
395 emit_insn (init);
396 sequence = get_insns ();
397 end_sequence ();
398 emit_insn_after (sequence, BB_END (loop_preheader_edge (loop)->src));
399 }
400 }
401 #endif
402
403 /* Insert the new low-overhead looping insn. */
404 emit_jump_insn_after (doloop_seq, BB_END (loop_end));
405 jump_insn = BB_END (loop_end);
406 jump_label = block_label (desc->in_edge->dest);
407 JUMP_LABEL (jump_insn) = jump_label;
408 LABEL_NUSES (jump_label)++;
409
410 /* Ensure the right fallthru edge is marked, for case we have reversed
411 the condition. */
412 desc->in_edge->flags &= ~EDGE_FALLTHRU;
413 desc->out_edge->flags |= EDGE_FALLTHRU;
414
415 /* Add a REG_NONNEG note if the actual or estimated maximum number
416 of iterations is non-negative. */
417 if (nonneg)
418 {
419 REG_NOTES (jump_insn)
420 = gen_rtx_EXPR_LIST (REG_NONNEG, NULL_RTX, REG_NOTES (jump_insn));
421 }
422 }
423
424 /* Process loop described by LOOP validating that the loop is suitable for
425 conversion to use a low overhead looping instruction, replacing the jump
426 insn where suitable. Returns true if the loop was successfully
427 modified. */
428
429 static bool
430 doloop_optimize (struct loop *loop)
431 {
432 enum machine_mode mode;
433 rtx doloop_seq, doloop_pat, doloop_reg;
434 rtx iterations;
435 rtx iterations_max;
436 rtx start_label;
437 rtx condition;
438 unsigned level, est_niter;
439 struct niter_desc *desc;
440
441 if (dump_file)
442 fprintf (dump_file, "Doloop: Processing loop %d.\n", loop->num);
443
444 iv_analysis_loop_init (loop);
445
446 /* Find the simple exit of a LOOP. */
447 desc = get_simple_loop_desc (loop);
448
449 /* Check that loop is a candidate for a low-overhead looping insn. */
450 if (!doloop_valid_p (loop, desc))
451 {
452 if (dump_file)
453 fprintf (dump_file,
454 "Doloop: The loop is not suitable.\n");
455 return false;
456 }
457 mode = desc->mode;
458
459 est_niter = 3;
460 if (desc->const_iter)
461 est_niter = desc->niter;
462 /* If the estimate on number of iterations is reliable (comes from profile
463 feedback), use it. Do not use it normally, since the expected number
464 of iterations of an unrolled loop is 2. */
465 if (loop->header->count)
466 est_niter = expected_loop_iterations (loop);
467
468 if (est_niter < 3)
469 {
470 if (dump_file)
471 fprintf (dump_file,
472 "Doloop: Too few iterations (%u) to be profitable.\n",
473 est_niter);
474 return false;
475 }
476
477 iterations = desc->const_iter ? desc->niter_expr : const0_rtx;
478 iterations_max = GEN_INT (desc->niter_max);
479 level = get_loop_level (loop) + 1;
480
481 /* Generate looping insn. If the pattern FAILs then give up trying
482 to modify the loop since there is some aspect the back-end does
483 not like. */
484 start_label = block_label (desc->in_edge->dest);
485 doloop_reg = gen_reg_rtx (mode);
486 doloop_seq = gen_doloop_end (doloop_reg, iterations, iterations_max,
487 GEN_INT (level), start_label);
488 if (! doloop_seq && mode != word_mode)
489 {
490 PUT_MODE (doloop_reg, word_mode);
491 doloop_seq = gen_doloop_end (doloop_reg, iterations, iterations_max,
492 GEN_INT (level), start_label);
493 }
494 if (! doloop_seq)
495 {
496 if (dump_file)
497 fprintf (dump_file,
498 "Doloop: Target unwilling to use doloop pattern!\n");
499 return false;
500 }
501
502 /* If multiple instructions were created, the last must be the
503 jump instruction. Also, a raw define_insn may yield a plain
504 pattern. */
505 doloop_pat = doloop_seq;
506 if (INSN_P (doloop_pat))
507 {
508 while (NEXT_INSN (doloop_pat) != NULL_RTX)
509 doloop_pat = NEXT_INSN (doloop_pat);
510 if (GET_CODE (doloop_pat) == JUMP_INSN)
511 doloop_pat = PATTERN (doloop_pat);
512 else
513 doloop_pat = NULL_RTX;
514 }
515
516 if (! doloop_pat
517 || ! (condition = doloop_condition_get (doloop_pat)))
518 {
519 if (dump_file)
520 fprintf (dump_file, "Doloop: Unrecognizable doloop pattern!\n");
521 return false;
522 }
523
524 doloop_modify (loop, desc, doloop_seq, condition);
525 return true;
526 }
527
528 /* This is the main entry point. Process all LOOPS using doloop_optimize. */
529
530 void
531 doloop_optimize_loops (struct loops *loops)
532 {
533 unsigned i;
534 struct loop *loop;
535
536 for (i = 1; i < loops->num; i++)
537 {
538 loop = loops->parray[i];
539 if (!loop)
540 continue;
541
542 doloop_optimize (loop);
543 }
544
545 iv_analysis_done ();
546
547 #ifdef ENABLE_CHECKING
548 verify_dominators (CDI_DOMINATORS);
549 verify_loop_structure (loops);
550 #endif
551 }
552 #endif /* HAVE_doloop_end */
553