2 Copyright (C) 2002-2014 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
22 #include "coretypes.h"
26 #include "hard-reg-set.h"
36 #include "dominance.h"
39 #include "basic-block.h"
43 #include "hash-table.h"
48 /* This pass performs loop unrolling. We only perform this
49 optimization on innermost loops (with single exception) because
50 the impact on performance is greatest here, and we want to avoid
51 unnecessary code size growth. The gain is caused by greater sequentiality
52 of code, better code to optimize for further passes and in some cases
53 by fewer testings of exit conditions. The main problem is code growth,
54 that impacts performance negatively due to effect of caches.
58 -- unrolling of loops that roll constant times; this is almost always
59 win, as we get rid of exit condition tests.
60 -- unrolling of loops that roll number of times that we can compute
61 in runtime; we also get rid of exit condition tests here, but there
62 is the extra expense for calculating the number of iterations
63 -- simple unrolling of remaining loops; this is performed only if we
64 are asked to, as the gain is questionable in this case and often
65 it may even slow down the code
66 For more detailed descriptions of each of those, see comments at
67 appropriate function below.
69 There is a lot of parameters (defined and described in params.def) that
70 control how much we unroll.
72 ??? A great problem is that we don't have a good way how to determine
73 how many times we should unroll the loop; the experiments I have made
74 showed that this choice may affect performance in order of several %.
77 /* Information about induction variables to split. */
81 rtx_insn
*insn
; /* The insn in that the induction variable occurs. */
82 rtx orig_var
; /* The variable (register) for the IV before split. */
83 rtx base_var
; /* The variable on that the values in the further
84 iterations are based. */
85 rtx step
; /* Step of the induction variable. */
86 struct iv_to_split
*next
; /* Next entry in walking order. */
89 /* Information about accumulators to expand. */
93 rtx_insn
*insn
; /* The insn in that the variable expansion occurs. */
94 rtx reg
; /* The accumulator which is expanded. */
95 vec
<rtx
> var_expansions
; /* The copies of the accumulator which is expanded. */
96 struct var_to_expand
*next
; /* Next entry in walking order. */
97 enum rtx_code op
; /* The type of the accumulation - addition, subtraction
99 int expansion_count
; /* Count the number of expansions generated so far. */
100 int reuse_expansion
; /* The expansion we intend to reuse to expand
101 the accumulator. If REUSE_EXPANSION is 0 reuse
102 the original accumulator. Else use
103 var_expansions[REUSE_EXPANSION - 1]. */
106 /* Hashtable helper for iv_to_split. */
108 struct iv_split_hasher
: typed_free_remove
<iv_to_split
>
110 typedef iv_to_split value_type
;
111 typedef iv_to_split compare_type
;
112 static inline hashval_t
hash (const value_type
*);
113 static inline bool equal (const value_type
*, const compare_type
*);
117 /* A hash function for information about insns to split. */
120 iv_split_hasher::hash (const value_type
*ivts
)
122 return (hashval_t
) INSN_UID (ivts
->insn
);
125 /* An equality functions for information about insns to split. */
128 iv_split_hasher::equal (const value_type
*i1
, const compare_type
*i2
)
130 return i1
->insn
== i2
->insn
;
133 /* Hashtable helper for iv_to_split. */
135 struct var_expand_hasher
: typed_free_remove
<var_to_expand
>
137 typedef var_to_expand value_type
;
138 typedef var_to_expand compare_type
;
139 static inline hashval_t
hash (const value_type
*);
140 static inline bool equal (const value_type
*, const compare_type
*);
143 /* Return a hash for VES. */
146 var_expand_hasher::hash (const value_type
*ves
)
148 return (hashval_t
) INSN_UID (ves
->insn
);
151 /* Return true if I1 and I2 refer to the same instruction. */
154 var_expand_hasher::equal (const value_type
*i1
, const compare_type
*i2
)
156 return i1
->insn
== i2
->insn
;
159 /* Information about optimization applied in
160 the unrolled loop. */
164 hash_table
<iv_split_hasher
> *insns_to_split
; /* A hashtable of insns to
166 struct iv_to_split
*iv_to_split_head
; /* The first iv to split. */
167 struct iv_to_split
**iv_to_split_tail
; /* Pointer to the tail of the list. */
168 hash_table
<var_expand_hasher
> *insns_with_var_to_expand
; /* A hashtable of
169 insns with accumulators to expand. */
170 struct var_to_expand
*var_to_expand_head
; /* The first var to expand. */
171 struct var_to_expand
**var_to_expand_tail
; /* Pointer to the tail of the list. */
172 unsigned first_new_block
; /* The first basic block that was
174 basic_block loop_exit
; /* The loop exit basic block. */
175 basic_block loop_preheader
; /* The loop preheader basic block. */
178 static void decide_unroll_stupid (struct loop
*, int);
179 static void decide_unroll_constant_iterations (struct loop
*, int);
180 static void decide_unroll_runtime_iterations (struct loop
*, int);
181 static void unroll_loop_stupid (struct loop
*);
182 static void decide_unrolling (int);
183 static void unroll_loop_constant_iterations (struct loop
*);
184 static void unroll_loop_runtime_iterations (struct loop
*);
185 static struct opt_info
*analyze_insns_in_loop (struct loop
*);
186 static void opt_info_start_duplication (struct opt_info
*);
187 static void apply_opt_in_copies (struct opt_info
*, unsigned, bool, bool);
188 static void free_opt_info (struct opt_info
*);
189 static struct var_to_expand
*analyze_insn_to_expand_var (struct loop
*, rtx_insn
*);
190 static bool referenced_in_one_insn_in_loop_p (struct loop
*, rtx
, int *);
191 static struct iv_to_split
*analyze_iv_to_split_insn (rtx_insn
*);
192 static void expand_var_during_unrolling (struct var_to_expand
*, rtx_insn
*);
193 static void insert_var_expansion_initialization (struct var_to_expand
*,
195 static void combine_var_copies_in_loop_exit (struct var_to_expand
*,
197 static rtx
get_expansion (struct var_to_expand
*);
199 /* Emit a message summarizing the unroll that will be
200 performed for LOOP, along with the loop's location LOCUS, if
201 appropriate given the dump or -fopt-info settings. */
204 report_unroll (struct loop
*loop
, location_t locus
)
206 int report_flags
= MSG_OPTIMIZED_LOCATIONS
| TDF_RTL
| TDF_DETAILS
;
208 if (loop
->lpt_decision
.decision
== LPT_NONE
)
211 if (!dump_enabled_p ())
214 dump_printf_loc (report_flags
, locus
,
215 "loop unrolled %d times",
216 loop
->lpt_decision
.times
);
218 dump_printf (report_flags
,
219 " (header execution count %d)",
220 (int)loop
->header
->count
);
222 dump_printf (report_flags
, "\n");
225 /* Decide whether unroll loops and how much. */
227 decide_unrolling (int flags
)
231 /* Scan the loops, inner ones first. */
232 FOR_EACH_LOOP (loop
, LI_FROM_INNERMOST
)
234 loop
->lpt_decision
.decision
= LPT_NONE
;
235 location_t locus
= get_loop_location (loop
);
237 if (dump_enabled_p ())
238 dump_printf_loc (TDF_RTL
, locus
,
239 ";; *** Considering loop %d at BB %d for "
241 loop
->num
, loop
->header
->index
);
243 /* Do not peel cold areas. */
244 if (optimize_loop_for_size_p (loop
))
247 fprintf (dump_file
, ";; Not considering loop, cold area\n");
251 /* Can the loop be manipulated? */
252 if (!can_duplicate_loop_p (loop
))
256 ";; Not considering loop, cannot duplicate\n");
260 /* Skip non-innermost loops. */
264 fprintf (dump_file
, ";; Not considering loop, is not innermost\n");
268 loop
->ninsns
= num_loop_insns (loop
);
269 loop
->av_ninsns
= average_num_loop_insns (loop
);
271 /* Try transformations one by one in decreasing order of
274 decide_unroll_constant_iterations (loop
, flags
);
275 if (loop
->lpt_decision
.decision
== LPT_NONE
)
276 decide_unroll_runtime_iterations (loop
, flags
);
277 if (loop
->lpt_decision
.decision
== LPT_NONE
)
278 decide_unroll_stupid (loop
, flags
);
280 report_unroll (loop
, locus
);
286 unroll_loops (int flags
)
289 bool changed
= false;
291 /* Now decide rest of unrolling. */
292 decide_unrolling (flags
);
294 /* Scan the loops, inner ones first. */
295 FOR_EACH_LOOP (loop
, LI_FROM_INNERMOST
)
297 /* And perform the appropriate transformations. */
298 switch (loop
->lpt_decision
.decision
)
300 case LPT_UNROLL_CONSTANT
:
301 unroll_loop_constant_iterations (loop
);
304 case LPT_UNROLL_RUNTIME
:
305 unroll_loop_runtime_iterations (loop
);
308 case LPT_UNROLL_STUPID
:
309 unroll_loop_stupid (loop
);
321 calculate_dominance_info (CDI_DOMINATORS
);
322 fix_loop_structure (NULL
);
328 /* Check whether exit of the LOOP is at the end of loop body. */
331 loop_exit_at_end_p (struct loop
*loop
)
333 struct niter_desc
*desc
= get_simple_loop_desc (loop
);
336 /* We should never have conditional in latch block. */
337 gcc_assert (desc
->in_edge
->dest
!= loop
->header
);
339 if (desc
->in_edge
->dest
!= loop
->latch
)
342 /* Check that the latch is empty. */
343 FOR_BB_INSNS (loop
->latch
, insn
)
345 if (INSN_P (insn
) && active_insn_p (insn
))
352 /* Decide whether to unroll LOOP iterating constant number of times
356 decide_unroll_constant_iterations (struct loop
*loop
, int flags
)
358 unsigned nunroll
, nunroll_by_av
, best_copies
, best_unroll
= 0, n_copies
, i
;
359 struct niter_desc
*desc
;
360 widest_int iterations
;
362 if (!(flags
& UAP_UNROLL
))
364 /* We were not asked to, just return back silently. */
370 "\n;; Considering unrolling loop with constant "
371 "number of iterations\n");
373 /* nunroll = total number of copies of the original loop body in
374 unrolled loop (i.e. if it is 2, we have to duplicate loop body once. */
375 nunroll
= PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS
) / loop
->ninsns
;
377 = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS
) / loop
->av_ninsns
;
378 if (nunroll
> nunroll_by_av
)
379 nunroll
= nunroll_by_av
;
380 if (nunroll
> (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES
))
381 nunroll
= PARAM_VALUE (PARAM_MAX_UNROLL_TIMES
);
383 if (targetm
.loop_unroll_adjust
)
384 nunroll
= targetm
.loop_unroll_adjust (nunroll
, loop
);
386 /* Skip big loops. */
390 fprintf (dump_file
, ";; Not considering loop, is too big\n");
394 /* Check for simple loops. */
395 desc
= get_simple_loop_desc (loop
);
397 /* Check number of iterations. */
398 if (!desc
->simple_p
|| !desc
->const_iter
|| desc
->assumptions
)
402 ";; Unable to prove that the loop iterates constant times\n");
406 /* Check whether the loop rolls enough to consider.
407 Consult also loop bounds and profile; in the case the loop has more
408 than one exit it may well loop less than determined maximal number
410 if (desc
->niter
< 2 * nunroll
411 || ((get_estimated_loop_iterations (loop
, &iterations
)
412 || get_max_loop_iterations (loop
, &iterations
))
413 && wi::ltu_p (iterations
, 2 * nunroll
)))
416 fprintf (dump_file
, ";; Not unrolling loop, doesn't roll\n");
420 /* Success; now compute number of iterations to unroll. We alter
421 nunroll so that as few as possible copies of loop body are
422 necessary, while still not decreasing the number of unrollings
423 too much (at most by 1). */
424 best_copies
= 2 * nunroll
+ 10;
427 if (i
- 1 >= desc
->niter
)
430 for (; i
>= nunroll
- 1; i
--)
432 unsigned exit_mod
= desc
->niter
% (i
+ 1);
434 if (!loop_exit_at_end_p (loop
))
435 n_copies
= exit_mod
+ i
+ 1;
436 else if (exit_mod
!= (unsigned) i
437 || desc
->noloop_assumptions
!= NULL_RTX
)
438 n_copies
= exit_mod
+ i
+ 2;
442 if (n_copies
< best_copies
)
444 best_copies
= n_copies
;
449 loop
->lpt_decision
.decision
= LPT_UNROLL_CONSTANT
;
450 loop
->lpt_decision
.times
= best_unroll
;
453 /* Unroll LOOP with constant number of iterations LOOP->LPT_DECISION.TIMES times.
454 The transformation does this:
456 for (i = 0; i < 102; i++)
459 ==> (LOOP->LPT_DECISION.TIMES == 3)
473 unroll_loop_constant_iterations (struct loop
*loop
)
475 unsigned HOST_WIDE_INT niter
;
480 unsigned max_unroll
= loop
->lpt_decision
.times
;
481 struct niter_desc
*desc
= get_simple_loop_desc (loop
);
482 bool exit_at_end
= loop_exit_at_end_p (loop
);
483 struct opt_info
*opt_info
= NULL
;
488 /* Should not get here (such loop should be peeled instead). */
489 gcc_assert (niter
> max_unroll
+ 1);
491 exit_mod
= niter
% (max_unroll
+ 1);
493 wont_exit
= sbitmap_alloc (max_unroll
+ 1);
494 bitmap_ones (wont_exit
);
496 auto_vec
<edge
> remove_edges
;
497 if (flag_split_ivs_in_unroller
498 || flag_variable_expansion_in_unroller
)
499 opt_info
= analyze_insns_in_loop (loop
);
503 /* The exit is not at the end of the loop; leave exit test
504 in the first copy, so that the loops that start with test
505 of exit condition have continuous body after unrolling. */
508 fprintf (dump_file
, ";; Condition at beginning of loop.\n");
510 /* Peel exit_mod iterations. */
511 bitmap_clear_bit (wont_exit
, 0);
512 if (desc
->noloop_assumptions
)
513 bitmap_clear_bit (wont_exit
, 1);
517 opt_info_start_duplication (opt_info
);
518 ok
= duplicate_loop_to_header_edge (loop
, loop_preheader_edge (loop
),
520 wont_exit
, desc
->out_edge
,
522 DLTHE_FLAG_UPDATE_FREQ
523 | (opt_info
&& exit_mod
> 1
524 ? DLTHE_RECORD_COPY_NUMBER
528 if (opt_info
&& exit_mod
> 1)
529 apply_opt_in_copies (opt_info
, exit_mod
, false, false);
531 desc
->noloop_assumptions
= NULL_RTX
;
532 desc
->niter
-= exit_mod
;
533 loop
->nb_iterations_upper_bound
-= exit_mod
;
534 if (loop
->any_estimate
535 && wi::leu_p (exit_mod
, loop
->nb_iterations_estimate
))
536 loop
->nb_iterations_estimate
-= exit_mod
;
538 loop
->any_estimate
= false;
541 bitmap_set_bit (wont_exit
, 1);
545 /* Leave exit test in last copy, for the same reason as above if
546 the loop tests the condition at the end of loop body. */
549 fprintf (dump_file
, ";; Condition at end of loop.\n");
551 /* We know that niter >= max_unroll + 2; so we do not need to care of
552 case when we would exit before reaching the loop. So just peel
553 exit_mod + 1 iterations. */
554 if (exit_mod
!= max_unroll
555 || desc
->noloop_assumptions
)
557 bitmap_clear_bit (wont_exit
, 0);
558 if (desc
->noloop_assumptions
)
559 bitmap_clear_bit (wont_exit
, 1);
561 opt_info_start_duplication (opt_info
);
562 ok
= duplicate_loop_to_header_edge (loop
, loop_preheader_edge (loop
),
564 wont_exit
, desc
->out_edge
,
566 DLTHE_FLAG_UPDATE_FREQ
567 | (opt_info
&& exit_mod
> 0
568 ? DLTHE_RECORD_COPY_NUMBER
572 if (opt_info
&& exit_mod
> 0)
573 apply_opt_in_copies (opt_info
, exit_mod
+ 1, false, false);
575 desc
->niter
-= exit_mod
+ 1;
576 loop
->nb_iterations_upper_bound
-= exit_mod
+ 1;
577 if (loop
->any_estimate
578 && wi::leu_p (exit_mod
+ 1, loop
->nb_iterations_estimate
))
579 loop
->nb_iterations_estimate
-= exit_mod
+ 1;
581 loop
->any_estimate
= false;
582 desc
->noloop_assumptions
= NULL_RTX
;
584 bitmap_set_bit (wont_exit
, 0);
585 bitmap_set_bit (wont_exit
, 1);
588 bitmap_clear_bit (wont_exit
, max_unroll
);
591 /* Now unroll the loop. */
593 opt_info_start_duplication (opt_info
);
594 ok
= duplicate_loop_to_header_edge (loop
, loop_latch_edge (loop
),
596 wont_exit
, desc
->out_edge
,
598 DLTHE_FLAG_UPDATE_FREQ
600 ? DLTHE_RECORD_COPY_NUMBER
606 apply_opt_in_copies (opt_info
, max_unroll
, true, true);
607 free_opt_info (opt_info
);
614 basic_block exit_block
= get_bb_copy (desc
->in_edge
->src
);
615 /* Find a new in and out edge; they are in the last copy we have made. */
617 if (EDGE_SUCC (exit_block
, 0)->dest
== desc
->out_edge
->dest
)
619 desc
->out_edge
= EDGE_SUCC (exit_block
, 0);
620 desc
->in_edge
= EDGE_SUCC (exit_block
, 1);
624 desc
->out_edge
= EDGE_SUCC (exit_block
, 1);
625 desc
->in_edge
= EDGE_SUCC (exit_block
, 0);
629 desc
->niter
/= max_unroll
+ 1;
630 loop
->nb_iterations_upper_bound
631 = wi::udiv_trunc (loop
->nb_iterations_upper_bound
, max_unroll
+ 1);
632 if (loop
->any_estimate
)
633 loop
->nb_iterations_estimate
634 = wi::udiv_trunc (loop
->nb_iterations_estimate
, max_unroll
+ 1);
635 desc
->niter_expr
= GEN_INT (desc
->niter
);
637 /* Remove the edges. */
638 FOR_EACH_VEC_ELT (remove_edges
, i
, e
)
643 ";; Unrolled loop %d times, constant # of iterations %i insns\n",
644 max_unroll
, num_loop_insns (loop
));
647 /* Decide whether to unroll LOOP iterating runtime computable number of times
650 decide_unroll_runtime_iterations (struct loop
*loop
, int flags
)
652 unsigned nunroll
, nunroll_by_av
, i
;
653 struct niter_desc
*desc
;
654 widest_int iterations
;
656 if (!(flags
& UAP_UNROLL
))
658 /* We were not asked to, just return back silently. */
664 "\n;; Considering unrolling loop with runtime "
665 "computable number of iterations\n");
667 /* nunroll = total number of copies of the original loop body in
668 unrolled loop (i.e. if it is 2, we have to duplicate loop body once. */
669 nunroll
= PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS
) / loop
->ninsns
;
670 nunroll_by_av
= PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS
) / loop
->av_ninsns
;
671 if (nunroll
> nunroll_by_av
)
672 nunroll
= nunroll_by_av
;
673 if (nunroll
> (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES
))
674 nunroll
= PARAM_VALUE (PARAM_MAX_UNROLL_TIMES
);
676 if (targetm
.loop_unroll_adjust
)
677 nunroll
= targetm
.loop_unroll_adjust (nunroll
, loop
);
679 /* Skip big loops. */
683 fprintf (dump_file
, ";; Not considering loop, is too big\n");
687 /* Check for simple loops. */
688 desc
= get_simple_loop_desc (loop
);
690 /* Check simpleness. */
691 if (!desc
->simple_p
|| desc
->assumptions
)
695 ";; Unable to prove that the number of iterations "
696 "can be counted in runtime\n");
700 if (desc
->const_iter
)
703 fprintf (dump_file
, ";; Loop iterates constant times\n");
707 /* Check whether the loop rolls. */
708 if ((get_estimated_loop_iterations (loop
, &iterations
)
709 || get_max_loop_iterations (loop
, &iterations
))
710 && wi::ltu_p (iterations
, 2 * nunroll
))
713 fprintf (dump_file
, ";; Not unrolling loop, doesn't roll\n");
717 /* Success; now force nunroll to be power of 2, as we are unable to
718 cope with overflows in computation of number of iterations. */
719 for (i
= 1; 2 * i
<= nunroll
; i
*= 2)
722 loop
->lpt_decision
.decision
= LPT_UNROLL_RUNTIME
;
723 loop
->lpt_decision
.times
= i
- 1;
726 /* Splits edge E and inserts the sequence of instructions INSNS on it, and
727 returns the newly created block. If INSNS is NULL_RTX, nothing is changed
728 and NULL is returned instead. */
731 split_edge_and_insert (edge e
, rtx_insn
*insns
)
738 emit_insn_after (insns
, BB_END (bb
));
740 /* ??? We used to assume that INSNS can contain control flow insns, and
741 that we had to try to find sub basic blocks in BB to maintain a valid
742 CFG. For this purpose we used to set the BB_SUPERBLOCK flag on BB
743 and call break_superblocks when going out of cfglayout mode. But it
744 turns out that this never happens; and that if it does ever happen,
745 the verify_flow_info at the end of the RTL loop passes would fail.
747 There are two reasons why we expected we could have control flow insns
748 in INSNS. The first is when a comparison has to be done in parts, and
749 the second is when the number of iterations is computed for loops with
750 the number of iterations known at runtime. In both cases, test cases
751 to get control flow in INSNS appear to be impossible to construct:
753 * If do_compare_rtx_and_jump needs several branches to do comparison
754 in a mode that needs comparison by parts, we cannot analyze the
755 number of iterations of the loop, and we never get to unrolling it.
757 * The code in expand_divmod that was suspected to cause creation of
758 branching code seems to be only accessed for signed division. The
759 divisions used by # of iterations analysis are always unsigned.
760 Problems might arise on architectures that emits branching code
761 for some operations that may appear in the unroller (especially
762 for division), but we have no such architectures.
764 Considering all this, it was decided that we should for now assume
765 that INSNS can in theory contain control flow insns, but in practice
766 it never does. So we don't handle the theoretical case, and should
767 a real failure ever show up, we have a pretty good clue for how to
773 /* Prepare a sequence comparing OP0 with OP1 using COMP and jumping to LABEL if
774 true, with probability PROB. If CINSN is not NULL, it is the insn to copy
775 in order to create a jump. */
778 compare_and_jump_seq (rtx op0
, rtx op1
, enum rtx_code comp
, rtx label
, int prob
,
781 rtx_insn
*seq
, *jump
;
785 mode
= GET_MODE (op0
);
786 if (mode
== VOIDmode
)
787 mode
= GET_MODE (op1
);
790 if (GET_MODE_CLASS (mode
) == MODE_CC
)
792 /* A hack -- there seems to be no easy generic way how to make a
793 conditional jump from a ccmode comparison. */
795 cond
= XEXP (SET_SRC (pc_set (cinsn
)), 0);
796 gcc_assert (GET_CODE (cond
) == comp
);
797 gcc_assert (rtx_equal_p (op0
, XEXP (cond
, 0)));
798 gcc_assert (rtx_equal_p (op1
, XEXP (cond
, 1)));
799 emit_jump_insn (copy_insn (PATTERN (cinsn
)));
800 jump
= get_last_insn ();
801 gcc_assert (JUMP_P (jump
));
802 JUMP_LABEL (jump
) = JUMP_LABEL (cinsn
);
803 LABEL_NUSES (JUMP_LABEL (jump
))++;
804 redirect_jump (jump
, label
, 0);
810 op0
= force_operand (op0
, NULL_RTX
);
811 op1
= force_operand (op1
, NULL_RTX
);
812 do_compare_rtx_and_jump (op0
, op1
, comp
, 0,
813 mode
, NULL_RTX
, NULL_RTX
, label
, -1);
814 jump
= get_last_insn ();
815 gcc_assert (JUMP_P (jump
));
816 JUMP_LABEL (jump
) = label
;
817 LABEL_NUSES (label
)++;
819 add_int_reg_note (jump
, REG_BR_PROB
, prob
);
827 /* Unroll LOOP for which we are able to count number of iterations in runtime
828 LOOP->LPT_DECISION.TIMES times. The transformation does this (with some
829 extra care for case n < 0):
831 for (i = 0; i < n; i++)
834 ==> (LOOP->LPT_DECISION.TIMES == 3)
859 unroll_loop_runtime_iterations (struct loop
*loop
)
861 rtx old_niter
, niter
, tmp
;
862 rtx_insn
*init_code
, *branch_code
;
864 basic_block preheader
, *body
, swtch
, ezc_swtch
;
869 bool extra_zero_check
, last_may_exit
;
870 unsigned max_unroll
= loop
->lpt_decision
.times
;
871 struct niter_desc
*desc
= get_simple_loop_desc (loop
);
872 bool exit_at_end
= loop_exit_at_end_p (loop
);
873 struct opt_info
*opt_info
= NULL
;
876 if (flag_split_ivs_in_unroller
877 || flag_variable_expansion_in_unroller
)
878 opt_info
= analyze_insns_in_loop (loop
);
880 /* Remember blocks whose dominators will have to be updated. */
881 auto_vec
<basic_block
> dom_bbs
;
883 body
= get_loop_body (loop
);
884 for (i
= 0; i
< loop
->num_nodes
; i
++)
886 vec
<basic_block
> ldom
;
889 ldom
= get_dominated_by (CDI_DOMINATORS
, body
[i
]);
890 FOR_EACH_VEC_ELT (ldom
, j
, bb
)
891 if (!flow_bb_inside_loop_p (loop
, bb
))
892 dom_bbs
.safe_push (bb
);
900 /* Leave exit in first copy (for explanation why see comment in
901 unroll_loop_constant_iterations). */
903 n_peel
= max_unroll
- 1;
904 extra_zero_check
= true;
905 last_may_exit
= false;
909 /* Leave exit in last copy (for explanation why see comment in
910 unroll_loop_constant_iterations). */
911 may_exit_copy
= max_unroll
;
913 extra_zero_check
= false;
914 last_may_exit
= true;
917 /* Get expression for number of iterations. */
919 old_niter
= niter
= gen_reg_rtx (desc
->mode
);
920 tmp
= force_operand (copy_rtx (desc
->niter_expr
), niter
);
922 emit_move_insn (niter
, tmp
);
924 /* Count modulo by ANDing it with max_unroll; we use the fact that
925 the number of unrollings is a power of two, and thus this is correct
926 even if there is overflow in the computation. */
927 niter
= expand_simple_binop (desc
->mode
, AND
,
928 niter
, gen_int_mode (max_unroll
, desc
->mode
),
929 NULL_RTX
, 0, OPTAB_LIB_WIDEN
);
931 init_code
= get_insns ();
933 unshare_all_rtl_in_chain (init_code
);
935 /* Precondition the loop. */
936 split_edge_and_insert (loop_preheader_edge (loop
), init_code
);
938 auto_vec
<edge
> remove_edges
;
940 wont_exit
= sbitmap_alloc (max_unroll
+ 2);
942 /* Peel the first copy of loop body (almost always we must leave exit test
943 here; the only exception is when we have extra zero check and the number
944 of iterations is reliable. Also record the place of (possible) extra
946 bitmap_clear (wont_exit
);
948 && !desc
->noloop_assumptions
)
949 bitmap_set_bit (wont_exit
, 1);
950 ezc_swtch
= loop_preheader_edge (loop
)->src
;
951 ok
= duplicate_loop_to_header_edge (loop
, loop_preheader_edge (loop
),
952 1, wont_exit
, desc
->out_edge
,
954 DLTHE_FLAG_UPDATE_FREQ
);
957 /* Record the place where switch will be built for preconditioning. */
958 swtch
= split_edge (loop_preheader_edge (loop
));
960 for (i
= 0; i
< n_peel
; i
++)
963 bitmap_clear (wont_exit
);
964 if (i
!= n_peel
- 1 || !last_may_exit
)
965 bitmap_set_bit (wont_exit
, 1);
966 ok
= duplicate_loop_to_header_edge (loop
, loop_preheader_edge (loop
),
967 1, wont_exit
, desc
->out_edge
,
969 DLTHE_FLAG_UPDATE_FREQ
);
972 /* Create item for switch. */
973 j
= n_peel
- i
- (extra_zero_check
? 0 : 1);
974 p
= REG_BR_PROB_BASE
/ (i
+ 2);
976 preheader
= split_edge (loop_preheader_edge (loop
));
977 branch_code
= compare_and_jump_seq (copy_rtx (niter
), GEN_INT (j
), EQ
,
978 block_label (preheader
), p
,
981 /* We rely on the fact that the compare and jump cannot be optimized out,
982 and hence the cfg we create is correct. */
983 gcc_assert (branch_code
!= NULL_RTX
);
985 swtch
= split_edge_and_insert (single_pred_edge (swtch
), branch_code
);
986 set_immediate_dominator (CDI_DOMINATORS
, preheader
, swtch
);
987 single_pred_edge (swtch
)->probability
= REG_BR_PROB_BASE
- p
;
988 e
= make_edge (swtch
, preheader
,
989 single_succ_edge (swtch
)->flags
& EDGE_IRREDUCIBLE_LOOP
);
990 e
->count
= RDIV (preheader
->count
* REG_BR_PROB_BASE
, p
);
994 if (extra_zero_check
)
996 /* Add branch for zero iterations. */
997 p
= REG_BR_PROB_BASE
/ (max_unroll
+ 1);
999 preheader
= split_edge (loop_preheader_edge (loop
));
1000 branch_code
= compare_and_jump_seq (copy_rtx (niter
), const0_rtx
, EQ
,
1001 block_label (preheader
), p
,
1003 gcc_assert (branch_code
!= NULL_RTX
);
1005 swtch
= split_edge_and_insert (single_succ_edge (swtch
), branch_code
);
1006 set_immediate_dominator (CDI_DOMINATORS
, preheader
, swtch
);
1007 single_succ_edge (swtch
)->probability
= REG_BR_PROB_BASE
- p
;
1008 e
= make_edge (swtch
, preheader
,
1009 single_succ_edge (swtch
)->flags
& EDGE_IRREDUCIBLE_LOOP
);
1010 e
->count
= RDIV (preheader
->count
* REG_BR_PROB_BASE
, p
);
1014 /* Recount dominators for outer blocks. */
1015 iterate_fix_dominators (CDI_DOMINATORS
, dom_bbs
, false);
1017 /* And unroll loop. */
1019 bitmap_ones (wont_exit
);
1020 bitmap_clear_bit (wont_exit
, may_exit_copy
);
1021 opt_info_start_duplication (opt_info
);
1023 ok
= duplicate_loop_to_header_edge (loop
, loop_latch_edge (loop
),
1025 wont_exit
, desc
->out_edge
,
1027 DLTHE_FLAG_UPDATE_FREQ
1029 ? DLTHE_RECORD_COPY_NUMBER
1035 apply_opt_in_copies (opt_info
, max_unroll
, true, true);
1036 free_opt_info (opt_info
);
1043 basic_block exit_block
= get_bb_copy (desc
->in_edge
->src
);
1044 /* Find a new in and out edge; they are in the last copy we have
1047 if (EDGE_SUCC (exit_block
, 0)->dest
== desc
->out_edge
->dest
)
1049 desc
->out_edge
= EDGE_SUCC (exit_block
, 0);
1050 desc
->in_edge
= EDGE_SUCC (exit_block
, 1);
1054 desc
->out_edge
= EDGE_SUCC (exit_block
, 1);
1055 desc
->in_edge
= EDGE_SUCC (exit_block
, 0);
1059 /* Remove the edges. */
1060 FOR_EACH_VEC_ELT (remove_edges
, i
, e
)
1063 /* We must be careful when updating the number of iterations due to
1064 preconditioning and the fact that the value must be valid at entry
1065 of the loop. After passing through the above code, we see that
1066 the correct new number of iterations is this: */
1067 gcc_assert (!desc
->const_iter
);
1069 simplify_gen_binary (UDIV
, desc
->mode
, old_niter
,
1070 gen_int_mode (max_unroll
+ 1, desc
->mode
));
1071 loop
->nb_iterations_upper_bound
1072 = wi::udiv_trunc (loop
->nb_iterations_upper_bound
, max_unroll
+ 1);
1073 if (loop
->any_estimate
)
1074 loop
->nb_iterations_estimate
1075 = wi::udiv_trunc (loop
->nb_iterations_estimate
, max_unroll
+ 1);
1079 simplify_gen_binary (MINUS
, desc
->mode
, desc
->niter_expr
, const1_rtx
);
1080 desc
->noloop_assumptions
= NULL_RTX
;
1081 --loop
->nb_iterations_upper_bound
;
1082 if (loop
->any_estimate
1083 && loop
->nb_iterations_estimate
!= 0)
1084 --loop
->nb_iterations_estimate
;
1086 loop
->any_estimate
= false;
1091 ";; Unrolled loop %d times, counting # of iterations "
1092 "in runtime, %i insns\n",
1093 max_unroll
, num_loop_insns (loop
));
1096 /* Decide whether to unroll LOOP stupidly and how much. */
1098 decide_unroll_stupid (struct loop
*loop
, int flags
)
1100 unsigned nunroll
, nunroll_by_av
, i
;
1101 struct niter_desc
*desc
;
1102 widest_int iterations
;
1104 if (!(flags
& UAP_UNROLL_ALL
))
1106 /* We were not asked to, just return back silently. */
1111 fprintf (dump_file
, "\n;; Considering unrolling loop stupidly\n");
1113 /* nunroll = total number of copies of the original loop body in
1114 unrolled loop (i.e. if it is 2, we have to duplicate loop body once. */
1115 nunroll
= PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS
) / loop
->ninsns
;
1117 = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS
) / loop
->av_ninsns
;
1118 if (nunroll
> nunroll_by_av
)
1119 nunroll
= nunroll_by_av
;
1120 if (nunroll
> (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES
))
1121 nunroll
= PARAM_VALUE (PARAM_MAX_UNROLL_TIMES
);
1123 if (targetm
.loop_unroll_adjust
)
1124 nunroll
= targetm
.loop_unroll_adjust (nunroll
, loop
);
1126 /* Skip big loops. */
1130 fprintf (dump_file
, ";; Not considering loop, is too big\n");
1134 /* Check for simple loops. */
1135 desc
= get_simple_loop_desc (loop
);
1137 /* Check simpleness. */
1138 if (desc
->simple_p
&& !desc
->assumptions
)
1141 fprintf (dump_file
, ";; The loop is simple\n");
1145 /* Do not unroll loops with branches inside -- it increases number
1147 TODO: this heuristic needs tunning; call inside the loop body
1148 is also relatively good reason to not unroll. */
1149 if (num_loop_branches (loop
) > 1)
1152 fprintf (dump_file
, ";; Not unrolling, contains branches\n");
1156 /* Check whether the loop rolls. */
1157 if ((get_estimated_loop_iterations (loop
, &iterations
)
1158 || get_max_loop_iterations (loop
, &iterations
))
1159 && wi::ltu_p (iterations
, 2 * nunroll
))
1162 fprintf (dump_file
, ";; Not unrolling loop, doesn't roll\n");
1166 /* Success. Now force nunroll to be power of 2, as it seems that this
1167 improves results (partially because of better alignments, partially
1168 because of some dark magic). */
1169 for (i
= 1; 2 * i
<= nunroll
; i
*= 2)
1172 loop
->lpt_decision
.decision
= LPT_UNROLL_STUPID
;
1173 loop
->lpt_decision
.times
= i
- 1;
1176 /* Unroll a LOOP LOOP->LPT_DECISION.TIMES times. The transformation does this:
1181 ==> (LOOP->LPT_DECISION.TIMES == 3)
1195 unroll_loop_stupid (struct loop
*loop
)
1198 unsigned nunroll
= loop
->lpt_decision
.times
;
1199 struct niter_desc
*desc
= get_simple_loop_desc (loop
);
1200 struct opt_info
*opt_info
= NULL
;
1203 if (flag_split_ivs_in_unroller
1204 || flag_variable_expansion_in_unroller
)
1205 opt_info
= analyze_insns_in_loop (loop
);
1208 wont_exit
= sbitmap_alloc (nunroll
+ 1);
1209 bitmap_clear (wont_exit
);
1210 opt_info_start_duplication (opt_info
);
1212 ok
= duplicate_loop_to_header_edge (loop
, loop_latch_edge (loop
),
1215 DLTHE_FLAG_UPDATE_FREQ
1217 ? DLTHE_RECORD_COPY_NUMBER
1223 apply_opt_in_copies (opt_info
, nunroll
, true, true);
1224 free_opt_info (opt_info
);
1231 /* We indeed may get here provided that there are nontrivial assumptions
1232 for a loop to be really simple. We could update the counts, but the
1233 problem is that we are unable to decide which exit will be taken
1234 (not really true in case the number of iterations is constant,
1235 but no one will do anything with this information, so we do not
1237 desc
->simple_p
= false;
1241 fprintf (dump_file
, ";; Unrolled loop %d times, %i insns\n",
1242 nunroll
, num_loop_insns (loop
));
1245 /* Returns true if REG is referenced in one nondebug insn in LOOP.
1246 Set *DEBUG_USES to the number of debug insns that reference the
1250 referenced_in_one_insn_in_loop_p (struct loop
*loop
, rtx reg
,
1253 basic_block
*body
, bb
;
1258 body
= get_loop_body (loop
);
1259 for (i
= 0; i
< loop
->num_nodes
; i
++)
1263 FOR_BB_INSNS (bb
, insn
)
1264 if (!rtx_referenced_p (reg
, insn
))
1266 else if (DEBUG_INSN_P (insn
))
1268 else if (++count_ref
> 1)
1272 return (count_ref
== 1);
1275 /* Reset the DEBUG_USES debug insns in LOOP that reference REG. */
1278 reset_debug_uses_in_loop (struct loop
*loop
, rtx reg
, int debug_uses
)
1280 basic_block
*body
, bb
;
1284 body
= get_loop_body (loop
);
1285 for (i
= 0; debug_uses
&& i
< loop
->num_nodes
; i
++)
1289 FOR_BB_INSNS (bb
, insn
)
1290 if (!DEBUG_INSN_P (insn
) || !rtx_referenced_p (reg
, insn
))
1294 validate_change (insn
, &INSN_VAR_LOCATION_LOC (insn
),
1295 gen_rtx_UNKNOWN_VAR_LOC (), 0);
1303 /* Determine whether INSN contains an accumulator
1304 which can be expanded into separate copies,
1305 one for each copy of the LOOP body.
1307 for (i = 0 ; i < n; i++)
1321 Return NULL if INSN contains no opportunity for expansion of accumulator.
1322 Otherwise, allocate a VAR_TO_EXPAND structure, fill it with the relevant
1323 information and return a pointer to it.
1326 static struct var_to_expand
*
1327 analyze_insn_to_expand_var (struct loop
*loop
, rtx_insn
*insn
)
1330 struct var_to_expand
*ves
;
1335 set
= single_set (insn
);
1339 dest
= SET_DEST (set
);
1340 src
= SET_SRC (set
);
1341 code
= GET_CODE (src
);
1343 if (code
!= PLUS
&& code
!= MINUS
&& code
!= MULT
&& code
!= FMA
)
1346 if (FLOAT_MODE_P (GET_MODE (dest
)))
1348 if (!flag_associative_math
)
1350 /* In the case of FMA, we're also changing the rounding. */
1351 if (code
== FMA
&& !flag_unsafe_math_optimizations
)
1355 /* Hmm, this is a bit paradoxical. We know that INSN is a valid insn
1356 in MD. But if there is no optab to generate the insn, we can not
1357 perform the variable expansion. This can happen if an MD provides
1358 an insn but not a named pattern to generate it, for example to avoid
1359 producing code that needs additional mode switches like for x87/mmx.
1361 So we check have_insn_for which looks for an optab for the operation
1362 in SRC. If it doesn't exist, we can't perform the expansion even
1363 though INSN is valid. */
1364 if (!have_insn_for (code
, GET_MODE (src
)))
1368 && !(GET_CODE (dest
) == SUBREG
1369 && REG_P (SUBREG_REG (dest
))))
1372 /* Find the accumulator use within the operation. */
1375 /* We only support accumulation via FMA in the ADD position. */
1376 if (!rtx_equal_p (dest
, XEXP (src
, 2)))
1380 else if (rtx_equal_p (dest
, XEXP (src
, 0)))
1382 else if (rtx_equal_p (dest
, XEXP (src
, 1)))
1384 /* The method of expansion that we are using; which includes the
1385 initialization of the expansions with zero and the summation of
1386 the expansions at the end of the computation will yield wrong
1387 results for (x = something - x) thus avoid using it in that case. */
1395 /* It must not otherwise be used. */
1398 if (rtx_referenced_p (dest
, XEXP (src
, 0))
1399 || rtx_referenced_p (dest
, XEXP (src
, 1)))
1402 else if (rtx_referenced_p (dest
, XEXP (src
, 1 - accum_pos
)))
1405 /* It must be used in exactly one insn. */
1406 if (!referenced_in_one_insn_in_loop_p (loop
, dest
, &debug_uses
))
1411 fprintf (dump_file
, "\n;; Expanding Accumulator ");
1412 print_rtl (dump_file
, dest
);
1413 fprintf (dump_file
, "\n");
1417 /* Instead of resetting the debug insns, we could replace each
1418 debug use in the loop with the sum or product of all expanded
1419 accummulators. Since we'll only know of all expansions at the
1420 end, we'd have to keep track of which vars_to_expand a debug
1421 insn in the loop references, take note of each copy of the
1422 debug insn during unrolling, and when it's all done, compute
1423 the sum or product of each variable and adjust the original
1424 debug insn and each copy thereof. What a pain! */
1425 reset_debug_uses_in_loop (loop
, dest
, debug_uses
);
1427 /* Record the accumulator to expand. */
1428 ves
= XNEW (struct var_to_expand
);
1430 ves
->reg
= copy_rtx (dest
);
1431 ves
->var_expansions
.create (1);
1433 ves
->op
= GET_CODE (src
);
1434 ves
->expansion_count
= 0;
1435 ves
->reuse_expansion
= 0;
1439 /* Determine whether there is an induction variable in INSN that
1440 we would like to split during unrolling.
1460 Return NULL if INSN contains no interesting IVs. Otherwise, allocate
1461 an IV_TO_SPLIT structure, fill it with the relevant information and return a
1464 static struct iv_to_split
*
1465 analyze_iv_to_split_insn (rtx_insn
*insn
)
1469 struct iv_to_split
*ivts
;
1472 /* For now we just split the basic induction variables. Later this may be
1473 extended for example by selecting also addresses of memory references. */
1474 set
= single_set (insn
);
1478 dest
= SET_DEST (set
);
1482 if (!biv_p (insn
, dest
))
1485 ok
= iv_analyze_result (insn
, dest
, &iv
);
1487 /* This used to be an assert under the assumption that if biv_p returns
1488 true that iv_analyze_result must also return true. However, that
1489 assumption is not strictly correct as evidenced by pr25569.
1491 Returning NULL when iv_analyze_result returns false is safe and
1492 avoids the problems in pr25569 until the iv_analyze_* routines
1493 can be fixed, which is apparently hard and time consuming
1494 according to their author. */
1498 if (iv
.step
== const0_rtx
1499 || iv
.mode
!= iv
.extend_mode
)
1502 /* Record the insn to split. */
1503 ivts
= XNEW (struct iv_to_split
);
1505 ivts
->orig_var
= dest
;
1506 ivts
->base_var
= NULL_RTX
;
1507 ivts
->step
= iv
.step
;
1513 /* Determines which of insns in LOOP can be optimized.
1514 Return a OPT_INFO struct with the relevant hash tables filled
1515 with all insns to be optimized. The FIRST_NEW_BLOCK field
1516 is undefined for the return value. */
1518 static struct opt_info
*
1519 analyze_insns_in_loop (struct loop
*loop
)
1521 basic_block
*body
, bb
;
1523 struct opt_info
*opt_info
= XCNEW (struct opt_info
);
1525 struct iv_to_split
*ivts
= NULL
;
1526 struct var_to_expand
*ves
= NULL
;
1527 iv_to_split
**slot1
;
1528 var_to_expand
**slot2
;
1529 vec
<edge
> edges
= get_loop_exit_edges (loop
);
1531 bool can_apply
= false;
1533 iv_analysis_loop_init (loop
);
1535 body
= get_loop_body (loop
);
1537 if (flag_split_ivs_in_unroller
)
1539 opt_info
->insns_to_split
1540 = new hash_table
<iv_split_hasher
> (5 * loop
->num_nodes
);
1541 opt_info
->iv_to_split_head
= NULL
;
1542 opt_info
->iv_to_split_tail
= &opt_info
->iv_to_split_head
;
1545 /* Record the loop exit bb and loop preheader before the unrolling. */
1546 opt_info
->loop_preheader
= loop_preheader_edge (loop
)->src
;
1548 if (edges
.length () == 1)
1551 if (!(exit
->flags
& EDGE_COMPLEX
))
1553 opt_info
->loop_exit
= split_edge (exit
);
1558 if (flag_variable_expansion_in_unroller
1561 opt_info
->insns_with_var_to_expand
1562 = new hash_table
<var_expand_hasher
> (5 * loop
->num_nodes
);
1563 opt_info
->var_to_expand_head
= NULL
;
1564 opt_info
->var_to_expand_tail
= &opt_info
->var_to_expand_head
;
1567 for (i
= 0; i
< loop
->num_nodes
; i
++)
1570 if (!dominated_by_p (CDI_DOMINATORS
, loop
->latch
, bb
))
1573 FOR_BB_INSNS (bb
, insn
)
1578 if (opt_info
->insns_to_split
)
1579 ivts
= analyze_iv_to_split_insn (insn
);
1583 slot1
= opt_info
->insns_to_split
->find_slot (ivts
, INSERT
);
1584 gcc_assert (*slot1
== NULL
);
1586 *opt_info
->iv_to_split_tail
= ivts
;
1587 opt_info
->iv_to_split_tail
= &ivts
->next
;
1591 if (opt_info
->insns_with_var_to_expand
)
1592 ves
= analyze_insn_to_expand_var (loop
, insn
);
1596 slot2
= opt_info
->insns_with_var_to_expand
->find_slot (ves
, INSERT
);
1597 gcc_assert (*slot2
== NULL
);
1599 *opt_info
->var_to_expand_tail
= ves
;
1600 opt_info
->var_to_expand_tail
= &ves
->next
;
1610 /* Called just before loop duplication. Records start of duplicated area
1614 opt_info_start_duplication (struct opt_info
*opt_info
)
1617 opt_info
->first_new_block
= last_basic_block_for_fn (cfun
);
1620 /* Determine the number of iterations between initialization of the base
1621 variable and the current copy (N_COPY). N_COPIES is the total number
1622 of newly created copies. UNROLLING is true if we are unrolling
1623 (not peeling) the loop. */
1626 determine_split_iv_delta (unsigned n_copy
, unsigned n_copies
, bool unrolling
)
1630 /* If we are unrolling, initialization is done in the original loop
1636 /* If we are peeling, the copy in that the initialization occurs has
1637 number 1. The original loop (number 0) is the last. */
1645 /* Allocate basic variable for the induction variable chain. */
1648 allocate_basic_variable (struct iv_to_split
*ivts
)
1650 rtx expr
= SET_SRC (single_set (ivts
->insn
));
1652 ivts
->base_var
= gen_reg_rtx (GET_MODE (expr
));
1655 /* Insert initialization of basic variable of IVTS before INSN, taking
1656 the initial value from INSN. */
1659 insert_base_initialization (struct iv_to_split
*ivts
, rtx_insn
*insn
)
1661 rtx expr
= copy_rtx (SET_SRC (single_set (insn
)));
1665 expr
= force_operand (expr
, ivts
->base_var
);
1666 if (expr
!= ivts
->base_var
)
1667 emit_move_insn (ivts
->base_var
, expr
);
1671 emit_insn_before (seq
, insn
);
1674 /* Replace the use of induction variable described in IVTS in INSN
1675 by base variable + DELTA * step. */
1678 split_iv (struct iv_to_split
*ivts
, rtx_insn
*insn
, unsigned delta
)
1680 rtx expr
, *loc
, incr
, var
;
1682 machine_mode mode
= GET_MODE (ivts
->base_var
);
1685 /* Construct base + DELTA * step. */
1687 expr
= ivts
->base_var
;
1690 incr
= simplify_gen_binary (MULT
, mode
,
1691 ivts
->step
, gen_int_mode (delta
, mode
));
1692 expr
= simplify_gen_binary (PLUS
, GET_MODE (ivts
->base_var
),
1693 ivts
->base_var
, incr
);
1696 /* Figure out where to do the replacement. */
1697 loc
= &SET_SRC (single_set (insn
));
1699 /* If we can make the replacement right away, we're done. */
1700 if (validate_change (insn
, loc
, expr
, 0))
1703 /* Otherwise, force EXPR into a register and try again. */
1705 var
= gen_reg_rtx (mode
);
1706 expr
= force_operand (expr
, var
);
1708 emit_move_insn (var
, expr
);
1711 emit_insn_before (seq
, insn
);
1713 if (validate_change (insn
, loc
, var
, 0))
1716 /* The last chance. Try recreating the assignment in insn
1717 completely from scratch. */
1718 set
= single_set (insn
);
1723 src
= copy_rtx (SET_SRC (set
));
1724 dest
= copy_rtx (SET_DEST (set
));
1725 src
= force_operand (src
, dest
);
1727 emit_move_insn (dest
, src
);
1731 emit_insn_before (seq
, insn
);
1736 /* Return one expansion of the accumulator recorded in struct VE. */
1739 get_expansion (struct var_to_expand
*ve
)
1743 if (ve
->reuse_expansion
== 0)
1746 reg
= ve
->var_expansions
[ve
->reuse_expansion
- 1];
1748 if (ve
->var_expansions
.length () == (unsigned) ve
->reuse_expansion
)
1749 ve
->reuse_expansion
= 0;
1751 ve
->reuse_expansion
++;
1757 /* Given INSN replace the uses of the accumulator recorded in VE
1758 with a new register. */
1761 expand_var_during_unrolling (struct var_to_expand
*ve
, rtx_insn
*insn
)
1764 bool really_new_expansion
= false;
1766 set
= single_set (insn
);
1769 /* Generate a new register only if the expansion limit has not been
1770 reached. Else reuse an already existing expansion. */
1771 if (PARAM_VALUE (PARAM_MAX_VARIABLE_EXPANSIONS
) > ve
->expansion_count
)
1773 really_new_expansion
= true;
1774 new_reg
= gen_reg_rtx (GET_MODE (ve
->reg
));
1777 new_reg
= get_expansion (ve
);
1779 validate_replace_rtx_group (SET_DEST (set
), new_reg
, insn
);
1780 if (apply_change_group ())
1781 if (really_new_expansion
)
1783 ve
->var_expansions
.safe_push (new_reg
);
1784 ve
->expansion_count
++;
1788 /* Initialize the variable expansions in loop preheader. PLACE is the
1789 loop-preheader basic block where the initialization of the
1790 expansions should take place. The expansions are initialized with
1791 (-0) when the operation is plus or minus to honor sign zero. This
1792 way we can prevent cases where the sign of the final result is
1793 effected by the sign of the expansion. Here is an example to
1796 for (i = 0 ; i < n; i++)
1810 When SUM is initialized with -zero and SOMETHING is also -zero; the
1811 final result of sum should be -zero thus the expansions sum1 and sum2
1812 should be initialized with -zero as well (otherwise we will get +zero
1813 as the final result). */
1816 insert_var_expansion_initialization (struct var_to_expand
*ve
,
1822 machine_mode mode
= GET_MODE (ve
->reg
);
1823 bool honor_signed_zero_p
= HONOR_SIGNED_ZEROS (mode
);
1825 if (ve
->var_expansions
.length () == 0)
1832 /* Note that we only accumulate FMA via the ADD operand. */
1835 FOR_EACH_VEC_ELT (ve
->var_expansions
, i
, var
)
1837 if (honor_signed_zero_p
)
1838 zero_init
= simplify_gen_unary (NEG
, mode
, CONST0_RTX (mode
), mode
);
1840 zero_init
= CONST0_RTX (mode
);
1841 emit_move_insn (var
, zero_init
);
1846 FOR_EACH_VEC_ELT (ve
->var_expansions
, i
, var
)
1848 zero_init
= CONST1_RTX (GET_MODE (var
));
1849 emit_move_insn (var
, zero_init
);
1860 emit_insn_after (seq
, BB_END (place
));
1863 /* Combine the variable expansions at the loop exit. PLACE is the
1864 loop exit basic block where the summation of the expansions should
1868 combine_var_copies_in_loop_exit (struct var_to_expand
*ve
, basic_block place
)
1872 rtx_insn
*seq
, *insn
;
1875 if (ve
->var_expansions
.length () == 0)
1882 /* Note that we only accumulate FMA via the ADD operand. */
1885 FOR_EACH_VEC_ELT (ve
->var_expansions
, i
, var
)
1886 sum
= simplify_gen_binary (PLUS
, GET_MODE (ve
->reg
), var
, sum
);
1890 FOR_EACH_VEC_ELT (ve
->var_expansions
, i
, var
)
1891 sum
= simplify_gen_binary (MULT
, GET_MODE (ve
->reg
), var
, sum
);
1898 expr
= force_operand (sum
, ve
->reg
);
1899 if (expr
!= ve
->reg
)
1900 emit_move_insn (ve
->reg
, expr
);
1904 insn
= BB_HEAD (place
);
1905 while (!NOTE_INSN_BASIC_BLOCK_P (insn
))
1906 insn
= NEXT_INSN (insn
);
1908 emit_insn_after (seq
, insn
);
1911 /* Strip away REG_EQUAL notes for IVs we're splitting.
1913 Updating REG_EQUAL notes for IVs we split is tricky: We
1914 cannot tell until after unrolling, DF-rescanning, and liveness
1915 updating, whether an EQ_USE is reached by the split IV while
1916 the IV reg is still live. See PR55006.
1918 ??? We cannot use remove_reg_equal_equiv_notes_for_regno,
1919 because RTL loop-iv requires us to defer rescanning insns and
1920 any notes attached to them. So resort to old techniques... */
1923 maybe_strip_eq_note_for_split_iv (struct opt_info
*opt_info
, rtx_insn
*insn
)
1925 struct iv_to_split
*ivts
;
1926 rtx note
= find_reg_equal_equiv_note (insn
);
1929 for (ivts
= opt_info
->iv_to_split_head
; ivts
; ivts
= ivts
->next
)
1930 if (reg_mentioned_p (ivts
->orig_var
, note
))
1932 remove_note (insn
, note
);
1937 /* Apply loop optimizations in loop copies using the
1938 data which gathered during the unrolling. Structure
1939 OPT_INFO record that data.
1941 UNROLLING is true if we unrolled (not peeled) the loop.
1942 REWRITE_ORIGINAL_BODY is true if we should also rewrite the original body of
1943 the loop (as it should happen in complete unrolling, but not in ordinary
1944 peeling of the loop). */
1947 apply_opt_in_copies (struct opt_info
*opt_info
,
1948 unsigned n_copies
, bool unrolling
,
1949 bool rewrite_original_loop
)
1952 basic_block bb
, orig_bb
;
1953 rtx_insn
*insn
, *orig_insn
, *next
;
1954 struct iv_to_split ivts_templ
, *ivts
;
1955 struct var_to_expand ve_templ
, *ves
;
1957 /* Sanity check -- we need to put initialization in the original loop
1959 gcc_assert (!unrolling
|| rewrite_original_loop
);
1961 /* Allocate the basic variables (i0). */
1962 if (opt_info
->insns_to_split
)
1963 for (ivts
= opt_info
->iv_to_split_head
; ivts
; ivts
= ivts
->next
)
1964 allocate_basic_variable (ivts
);
1966 for (i
= opt_info
->first_new_block
;
1967 i
< (unsigned) last_basic_block_for_fn (cfun
);
1970 bb
= BASIC_BLOCK_FOR_FN (cfun
, i
);
1971 orig_bb
= get_bb_original (bb
);
1973 /* bb->aux holds position in copy sequence initialized by
1974 duplicate_loop_to_header_edge. */
1975 delta
= determine_split_iv_delta ((size_t)bb
->aux
, n_copies
,
1978 orig_insn
= BB_HEAD (orig_bb
);
1979 FOR_BB_INSNS_SAFE (bb
, insn
, next
)
1982 || (DEBUG_INSN_P (insn
)
1983 && TREE_CODE (INSN_VAR_LOCATION_DECL (insn
)) == LABEL_DECL
))
1986 while (!INSN_P (orig_insn
)
1987 || (DEBUG_INSN_P (orig_insn
)
1988 && (TREE_CODE (INSN_VAR_LOCATION_DECL (orig_insn
))
1990 orig_insn
= NEXT_INSN (orig_insn
);
1992 ivts_templ
.insn
= orig_insn
;
1993 ve_templ
.insn
= orig_insn
;
1995 /* Apply splitting iv optimization. */
1996 if (opt_info
->insns_to_split
)
1998 maybe_strip_eq_note_for_split_iv (opt_info
, insn
);
2000 ivts
= opt_info
->insns_to_split
->find (&ivts_templ
);
2004 gcc_assert (GET_CODE (PATTERN (insn
))
2005 == GET_CODE (PATTERN (orig_insn
)));
2008 insert_base_initialization (ivts
, insn
);
2009 split_iv (ivts
, insn
, delta
);
2012 /* Apply variable expansion optimization. */
2013 if (unrolling
&& opt_info
->insns_with_var_to_expand
)
2015 ves
= (struct var_to_expand
*)
2016 opt_info
->insns_with_var_to_expand
->find (&ve_templ
);
2019 gcc_assert (GET_CODE (PATTERN (insn
))
2020 == GET_CODE (PATTERN (orig_insn
)));
2021 expand_var_during_unrolling (ves
, insn
);
2024 orig_insn
= NEXT_INSN (orig_insn
);
2028 if (!rewrite_original_loop
)
2031 /* Initialize the variable expansions in the loop preheader
2032 and take care of combining them at the loop exit. */
2033 if (opt_info
->insns_with_var_to_expand
)
2035 for (ves
= opt_info
->var_to_expand_head
; ves
; ves
= ves
->next
)
2036 insert_var_expansion_initialization (ves
, opt_info
->loop_preheader
);
2037 for (ves
= opt_info
->var_to_expand_head
; ves
; ves
= ves
->next
)
2038 combine_var_copies_in_loop_exit (ves
, opt_info
->loop_exit
);
2041 /* Rewrite also the original loop body. Find them as originals of the blocks
2042 in the last copied iteration, i.e. those that have
2043 get_bb_copy (get_bb_original (bb)) == bb. */
2044 for (i
= opt_info
->first_new_block
;
2045 i
< (unsigned) last_basic_block_for_fn (cfun
);
2048 bb
= BASIC_BLOCK_FOR_FN (cfun
, i
);
2049 orig_bb
= get_bb_original (bb
);
2050 if (get_bb_copy (orig_bb
) != bb
)
2053 delta
= determine_split_iv_delta (0, n_copies
, unrolling
);
2054 for (orig_insn
= BB_HEAD (orig_bb
);
2055 orig_insn
!= NEXT_INSN (BB_END (bb
));
2058 next
= NEXT_INSN (orig_insn
);
2060 if (!INSN_P (orig_insn
))
2063 ivts_templ
.insn
= orig_insn
;
2064 if (opt_info
->insns_to_split
)
2066 maybe_strip_eq_note_for_split_iv (opt_info
, orig_insn
);
2068 ivts
= (struct iv_to_split
*)
2069 opt_info
->insns_to_split
->find (&ivts_templ
);
2073 insert_base_initialization (ivts
, orig_insn
);
2074 split_iv (ivts
, orig_insn
, delta
);
2083 /* Release OPT_INFO. */
2086 free_opt_info (struct opt_info
*opt_info
)
2088 delete opt_info
->insns_to_split
;
2089 opt_info
->insns_to_split
= NULL
;
2090 if (opt_info
->insns_with_var_to_expand
)
2092 struct var_to_expand
*ves
;
2094 for (ves
= opt_info
->var_to_expand_head
; ves
; ves
= ves
->next
)
2095 ves
->var_expansions
.release ();
2096 delete opt_info
->insns_with_var_to_expand
;
2097 opt_info
->insns_with_var_to_expand
= NULL
;