/* Loop unrolling and peeling.
- Copyright (C) 2002, 2003, 2004, 2005, 2007, 2008, 2010
+ Copyright (C) 2002, 2003, 2004, 2005, 2007, 2008, 2010, 2011
Free Software Foundation, Inc.
This file is part of GCC.
#include "obstack.h"
#include "basic-block.h"
#include "cfgloop.h"
-#include "cfglayout.h"
#include "params.h"
-#include "output.h"
#include "expr.h"
#include "hashtab.h"
#include "recog.h"
+#include "target.h"
+#include "dumpfile.h"
/* This pass performs loop unrolling and peeling. We only perform these
optimizations on innermost loops (with single exception) because
struct iv_to_split
{
rtx insn; /* The insn in that the induction variable occurs. */
+ rtx orig_var; /* The variable (register) for the IV before split. */
rtx base_var; /* The variable on that the values in the further
iterations are based. */
rtx step; /* Step of the induction variable. */
{
rtx insn; /* The insn in that the variable expansion occurs. */
rtx reg; /* The accumulator which is expanded. */
- VEC(rtx,heap) *var_expansions; /* The copies of the accumulator which is expanded. */
+ vec<rtx> var_expansions; /* The copies of the accumulator which is expanded. */
struct var_to_expand *next; /* Next entry in walking order. */
enum rtx_code op; /* The type of the accumulation - addition, subtraction
or multiplication. */
the accumulator. If REUSE_EXPANSION is 0 reuse
the original accumulator. Else use
var_expansions[REUSE_EXPANSION - 1]. */
- unsigned accum_pos; /* The position in which the accumulator is placed in
- the insn src. For example in x = x + something
- accum_pos is 0 while in x = something + x accum_pos
- is 1. */
};
/* Information about optimization applied in
if (check)
{
#ifdef ENABLE_CHECKING
- verify_dominators (CDI_DOMINATORS);
verify_loop_structure ();
#endif
}
/* Check that the latch is empty. */
FOR_BB_INSNS (loop->latch, insn)
{
- if (INSN_P (insn))
+ if (NONDEBUG_INSN_P (insn))
return false;
}
{
peel_loop_completely (loop);
#ifdef ENABLE_CHECKING
- verify_dominators (CDI_DOMINATORS);
verify_loop_structure ();
#endif
}
|| desc->assumptions
|| desc->infinite
|| !desc->const_iter
- || desc->niter != 0)
+ || (desc->niter != 0
+ && max_loop_iterations_int (loop) != 0))
{
if (dump_file)
fprintf (dump_file,
sbitmap wont_exit;
unsigned HOST_WIDE_INT npeel;
unsigned i;
- VEC (edge, heap) *remove_edges;
+ vec<edge> remove_edges;
edge ein;
struct niter_desc *desc = get_simple_loop_desc (loop);
struct opt_info *opt_info = NULL;
bool ok;
wont_exit = sbitmap_alloc (npeel + 1);
- sbitmap_ones (wont_exit);
- RESET_BIT (wont_exit, 0);
+ bitmap_ones (wont_exit);
+ bitmap_clear_bit (wont_exit, 0);
if (desc->noloop_assumptions)
- RESET_BIT (wont_exit, 1);
+ bitmap_clear_bit (wont_exit, 1);
- remove_edges = NULL;
+ remove_edges.create (0);
if (flag_split_ivs_in_unroller)
opt_info = analyze_insns_in_loop (loop);
}
/* Remove the exit edges. */
- for (i = 0; VEC_iterate (edge, remove_edges, i, ein); i++)
+ FOR_EACH_VEC_ELT (remove_edges, i, ein)
remove_path (ein);
- VEC_free (edge, heap, remove_edges);
+ remove_edges.release ();
}
ein = desc->in_edge;
{
unsigned nunroll, nunroll_by_av, best_copies, best_unroll = 0, n_copies, i;
struct niter_desc *desc;
+ double_int iterations;
if (!(flags & UAP_UNROLL))
{
return;
}
- /* Check whether the loop rolls enough to consider. */
- if (desc->niter < 2 * nunroll)
+ /* Check whether the loop rolls enough to consider.
+ Consult also loop bounds and profile; in the case the loop has more
+ than one exit it may well loop less than determined maximal number
+ of iterations. */
+ if (desc->niter < 2 * nunroll
+ || ((estimated_loop_iterations (loop, &iterations)
+ || max_loop_iterations (loop, &iterations))
+ && iterations.ult (double_int::from_shwi (2 * nunroll))))
{
if (dump_file)
fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
}
}
- if (dump_file)
- fprintf (dump_file, ";; max_unroll %d (%d copies, initial %d).\n",
- best_unroll + 1, best_copies, nunroll);
-
loop->lpt_decision.decision = LPT_UNROLL_CONSTANT;
loop->lpt_decision.times = best_unroll;
if (dump_file)
- fprintf (dump_file,
- ";; Decided to unroll the constant times rolling loop, %d times.\n",
- loop->lpt_decision.times);
+ fprintf (dump_file, ";; Decided to unroll the loop %d times (%d copies).\n",
+ loop->lpt_decision.times, best_copies);
}
-/* Unroll LOOP with constant number of iterations LOOP->LPT_DECISION.TIMES + 1
- times. The transformation does this:
+/* Unroll LOOP with constant number of iterations LOOP->LPT_DECISION.TIMES times.
+ The transformation does this:
for (i = 0; i < 102; i++)
body;
- ==>
+ ==> (LOOP->LPT_DECISION.TIMES == 3)
i = 0;
body; i++;
unsigned exit_mod;
sbitmap wont_exit;
unsigned i;
- VEC (edge, heap) *remove_edges;
+ vec<edge> remove_edges;
edge e;
unsigned max_unroll = loop->lpt_decision.times;
struct niter_desc *desc = get_simple_loop_desc (loop);
exit_mod = niter % (max_unroll + 1);
wont_exit = sbitmap_alloc (max_unroll + 1);
- sbitmap_ones (wont_exit);
+ bitmap_ones (wont_exit);
- remove_edges = NULL;
+ remove_edges.create (0);
if (flag_split_ivs_in_unroller
|| flag_variable_expansion_in_unroller)
opt_info = analyze_insns_in_loop (loop);
of exit condition have continuous body after unrolling. */
if (dump_file)
- fprintf (dump_file, ";; Condition on beginning of loop.\n");
+ fprintf (dump_file, ";; Condition at beginning of loop.\n");
/* Peel exit_mod iterations. */
- RESET_BIT (wont_exit, 0);
+ bitmap_clear_bit (wont_exit, 0);
if (desc->noloop_assumptions)
- RESET_BIT (wont_exit, 1);
+ bitmap_clear_bit (wont_exit, 1);
if (exit_mod)
{
desc->noloop_assumptions = NULL_RTX;
desc->niter -= exit_mod;
- desc->niter_max -= exit_mod;
+ loop->nb_iterations_upper_bound -= double_int::from_uhwi (exit_mod);
+ if (loop->any_estimate
+ && double_int::from_uhwi (exit_mod).ule
+ (loop->nb_iterations_estimate))
+ loop->nb_iterations_estimate -= double_int::from_uhwi (exit_mod);
+ else
+ loop->any_estimate = false;
}
- SET_BIT (wont_exit, 1);
+ bitmap_set_bit (wont_exit, 1);
}
else
{
the loop tests the condition at the end of loop body. */
if (dump_file)
- fprintf (dump_file, ";; Condition on end of loop.\n");
+ fprintf (dump_file, ";; Condition at end of loop.\n");
/* We know that niter >= max_unroll + 2; so we do not need to care of
case when we would exit before reaching the loop. So just peel
if (exit_mod != max_unroll
|| desc->noloop_assumptions)
{
- RESET_BIT (wont_exit, 0);
+ bitmap_clear_bit (wont_exit, 0);
if (desc->noloop_assumptions)
- RESET_BIT (wont_exit, 1);
+ bitmap_clear_bit (wont_exit, 1);
opt_info_start_duplication (opt_info);
ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
apply_opt_in_copies (opt_info, exit_mod + 1, false, false);
desc->niter -= exit_mod + 1;
- desc->niter_max -= exit_mod + 1;
+ loop->nb_iterations_upper_bound -= double_int::from_uhwi (exit_mod + 1);
+ if (loop->any_estimate
+ && double_int::from_uhwi (exit_mod + 1).ule
+ (loop->nb_iterations_estimate))
+ loop->nb_iterations_estimate -= double_int::from_uhwi (exit_mod + 1);
+ else
+ loop->any_estimate = false;
desc->noloop_assumptions = NULL_RTX;
- SET_BIT (wont_exit, 0);
- SET_BIT (wont_exit, 1);
+ bitmap_set_bit (wont_exit, 0);
+ bitmap_set_bit (wont_exit, 1);
}
- RESET_BIT (wont_exit, max_unroll);
+ bitmap_clear_bit (wont_exit, max_unroll);
}
/* Now unroll the loop. */
}
desc->niter /= max_unroll + 1;
- desc->niter_max /= max_unroll + 1;
+ loop->nb_iterations_upper_bound
+ = loop->nb_iterations_upper_bound.udiv (double_int::from_uhwi (max_unroll
+ + 1),
+ TRUNC_DIV_EXPR);
+ if (loop->any_estimate)
+ loop->nb_iterations_estimate
+ = loop->nb_iterations_estimate.udiv (double_int::from_uhwi (max_unroll
+ + 1),
+ TRUNC_DIV_EXPR);
desc->niter_expr = GEN_INT (desc->niter);
/* Remove the edges. */
- for (i = 0; VEC_iterate (edge, remove_edges, i, e); i++)
+ FOR_EACH_VEC_ELT (remove_edges, i, e)
remove_path (e);
- VEC_free (edge, heap, remove_edges);
+ remove_edges.release ();
if (dump_file)
fprintf (dump_file,
{
unsigned nunroll, nunroll_by_av, i;
struct niter_desc *desc;
+ double_int iterations;
if (!(flags & UAP_UNROLL))
{
if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
+ if (targetm.loop_unroll_adjust)
+ nunroll = targetm.loop_unroll_adjust (nunroll, loop);
+
/* Skip big loops. */
if (nunroll <= 1)
{
return;
}
- /* If we have profile feedback, check whether the loop rolls. */
- if (loop->header->count && expected_loop_iterations (loop) < 2 * nunroll)
+ /* Check whether the loop rolls. */
+ if ((estimated_loop_iterations (loop, &iterations)
+ || max_loop_iterations (loop, &iterations))
+ && iterations.ult (double_int::from_shwi (2 * nunroll)))
{
if (dump_file)
fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
loop->lpt_decision.times = i - 1;
if (dump_file)
- fprintf (dump_file,
- ";; Decided to unroll the runtime computable "
- "times rolling loop, %d times.\n",
+ fprintf (dump_file, ";; Decided to unroll the loop %d times.\n",
loop->lpt_decision.times);
}
CFG. For this purpose we used to set the BB_SUPERBLOCK flag on BB
and call break_superblocks when going out of cfglayout mode. But it
turns out that this never happens; and that if it does ever happen,
- the verify_flow_info call in loop_optimizer_finalize would fail.
+ the TODO_verify_flow at the end of the RTL loop passes would fail.
There are two reasons why we expected we could have control flow insns
in INSNS. The first is when a comparison has to be done in parts, and
return bb;
}
-/* Unroll LOOP for that we are able to count number of iterations in runtime
- LOOP->LPT_DECISION.TIMES + 1 times. The transformation does this (with some
+/* Unroll LOOP for which we are able to count number of iterations in runtime
+ LOOP->LPT_DECISION.TIMES times. The transformation does this (with some
extra care for case n < 0):
for (i = 0; i < n; i++)
body;
- ==>
+ ==> (LOOP->LPT_DECISION.TIMES == 3)
i = 0;
mod = n % 4;
rtx old_niter, niter, init_code, branch_code, tmp;
unsigned i, j, p;
basic_block preheader, *body, swtch, ezc_swtch;
- VEC (basic_block, heap) *dom_bbs;
+ vec<basic_block> dom_bbs;
sbitmap wont_exit;
int may_exit_copy;
unsigned n_peel;
- VEC (edge, heap) *remove_edges;
+ vec<edge> remove_edges;
edge e;
bool extra_zero_check, last_may_exit;
unsigned max_unroll = loop->lpt_decision.times;
opt_info = analyze_insns_in_loop (loop);
/* Remember blocks whose dominators will have to be updated. */
- dom_bbs = NULL;
+ dom_bbs.create (0);
body = get_loop_body (loop);
for (i = 0; i < loop->num_nodes; i++)
{
- VEC (basic_block, heap) *ldom;
+ vec<basic_block> ldom;
basic_block bb;
ldom = get_dominated_by (CDI_DOMINATORS, body[i]);
- for (j = 0; VEC_iterate (basic_block, ldom, j, bb); j++)
+ FOR_EACH_VEC_ELT (ldom, j, bb)
if (!flow_bb_inside_loop_p (loop, bb))
- VEC_safe_push (basic_block, heap, dom_bbs, bb);
+ dom_bbs.safe_push (bb);
- VEC_free (basic_block, heap, ldom);
+ ldom.release ();
}
free (body);
/* Precondition the loop. */
split_edge_and_insert (loop_preheader_edge (loop), init_code);
- remove_edges = NULL;
+ remove_edges.create (0);
wont_exit = sbitmap_alloc (max_unroll + 2);
here; the only exception is when we have extra zero check and the number
of iterations is reliable. Also record the place of (possible) extra
zero check. */
- sbitmap_zero (wont_exit);
+ bitmap_clear (wont_exit);
if (extra_zero_check
&& !desc->noloop_assumptions)
- SET_BIT (wont_exit, 1);
+ bitmap_set_bit (wont_exit, 1);
ezc_swtch = loop_preheader_edge (loop)->src;
ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
1, wont_exit, desc->out_edge,
for (i = 0; i < n_peel; i++)
{
/* Peel the copy. */
- sbitmap_zero (wont_exit);
+ bitmap_clear (wont_exit);
if (i != n_peel - 1 || !last_may_exit)
- SET_BIT (wont_exit, 1);
+ bitmap_set_bit (wont_exit, 1);
ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
1, wont_exit, desc->out_edge,
&remove_edges,
single_pred_edge (swtch)->probability = REG_BR_PROB_BASE - p;
e = make_edge (swtch, preheader,
single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP);
+ e->count = RDIV (preheader->count * REG_BR_PROB_BASE, p);
e->probability = p;
}
single_succ_edge (swtch)->probability = REG_BR_PROB_BASE - p;
e = make_edge (swtch, preheader,
single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP);
+ e->count = RDIV (preheader->count * REG_BR_PROB_BASE, p);
e->probability = p;
}
/* And unroll loop. */
- sbitmap_ones (wont_exit);
- RESET_BIT (wont_exit, may_exit_copy);
+ bitmap_ones (wont_exit);
+ bitmap_clear_bit (wont_exit, may_exit_copy);
opt_info_start_duplication (opt_info);
ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
}
/* Remove the edges. */
- for (i = 0; VEC_iterate (edge, remove_edges, i, e); i++)
+ FOR_EACH_VEC_ELT (remove_edges, i, e)
remove_path (e);
- VEC_free (edge, heap, remove_edges);
+ remove_edges.release ();
/* We must be careful when updating the number of iterations due to
preconditioning and the fact that the value must be valid at entry
desc->niter_expr =
simplify_gen_binary (UDIV, desc->mode, old_niter,
GEN_INT (max_unroll + 1));
- desc->niter_max /= max_unroll + 1;
+ loop->nb_iterations_upper_bound
+ = loop->nb_iterations_upper_bound.udiv (double_int::from_uhwi (max_unroll
+ + 1),
+ TRUNC_DIV_EXPR);
+ if (loop->any_estimate)
+ loop->nb_iterations_estimate
+ = loop->nb_iterations_estimate.udiv (double_int::from_uhwi (max_unroll
+ + 1),
+ TRUNC_DIV_EXPR);
if (exit_at_end)
{
desc->niter_expr =
simplify_gen_binary (MINUS, desc->mode, desc->niter_expr, const1_rtx);
desc->noloop_assumptions = NULL_RTX;
- desc->niter_max--;
+ --loop->nb_iterations_upper_bound;
+ if (loop->any_estimate
+ && loop->nb_iterations_estimate != double_int_zero)
+ --loop->nb_iterations_estimate;
+ else
+ loop->any_estimate = false;
}
if (dump_file)
"in runtime, %i insns\n",
max_unroll, num_loop_insns (loop));
- VEC_free (basic_block, heap, dom_bbs);
+ dom_bbs.release ();
}
/* Decide whether to simply peel LOOP and how much. */
decide_peel_simple (struct loop *loop, int flags)
{
unsigned npeel;
- struct niter_desc *desc;
+ double_int iterations;
if (!(flags & UAP_PEEL))
{
return;
}
- /* Check for simple loops. */
- desc = get_simple_loop_desc (loop);
-
- /* Check number of iterations. */
- if (desc->simple_p && !desc->assumptions && desc->const_iter)
- {
- if (dump_file)
- fprintf (dump_file, ";; Loop iterates constant times\n");
- return;
- }
-
/* Do not simply peel loops with branches inside -- it increases number
- of mispredicts. */
- if (num_loop_branches (loop) > 1)
+ of mispredicts.
+ Exception is when we do have profile and we however have good chance
+ to peel proper number of iterations loop will iterate in practice.
+ TODO: this heuristic needs tunning; while for complette unrolling
+ the branch inside loop mostly eliminates any improvements, for
+ peeling it is not the case. Also a function call inside loop is
+ also branch from branch prediction POV (and probably better reason
+ to not unroll/peel). */
+ if (num_loop_branches (loop) > 1
+ && profile_status != PROFILE_READ)
{
if (dump_file)
fprintf (dump_file, ";; Not peeling, contains branches\n");
return;
}
- if (loop->header->count)
+ /* If we have realistic estimate on number of iterations, use it. */
+ if (estimated_loop_iterations (loop, &iterations))
{
- unsigned niter = expected_loop_iterations (loop);
- if (niter + 1 > npeel)
+ if (double_int::from_shwi (npeel).ule (iterations))
{
if (dump_file)
{
fprintf (dump_file, ";; Not peeling loop, rolls too much (");
fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
- (HOST_WIDEST_INT) (niter + 1));
+ (HOST_WIDEST_INT) (iterations.to_shwi () + 1));
fprintf (dump_file, " iterations > %d [maximum peelings])\n",
npeel);
}
return;
}
- npeel = niter + 1;
+ npeel = iterations.to_shwi () + 1;
}
+ /* If we have small enough bound on iterations, we can still peel (completely
+ unroll). */
+ else if (max_loop_iterations (loop, &iterations)
+ && iterations.ult (double_int::from_shwi (npeel)))
+ npeel = iterations.to_shwi () + 1;
else
{
/* For now we have no good heuristics to decide whether loop peeling
loop->lpt_decision.times = npeel;
if (dump_file)
- fprintf (dump_file, ";; Decided to simply peel the loop, %d times.\n",
+ fprintf (dump_file, ";; Decided to simply peel the loop %d times.\n",
loop->lpt_decision.times);
}
-/* Peel a LOOP LOOP->LPT_DECISION.TIMES times. The transformation:
+/* Peel a LOOP LOOP->LPT_DECISION.TIMES times. The transformation does this:
+
while (cond)
body;
- ==>
+ ==> (LOOP->LPT_DECISION.TIMES == 3)
if (!cond) goto end;
body;
if (!cond) goto end;
body;
+ if (!cond) goto end;
+ body;
while (cond)
body;
end: ;
opt_info = analyze_insns_in_loop (loop);
wont_exit = sbitmap_alloc (npeel + 1);
- sbitmap_zero (wont_exit);
+ bitmap_clear (wont_exit);
opt_info_start_duplication (opt_info);
{
unsigned nunroll, nunroll_by_av, i;
struct niter_desc *desc;
+ double_int iterations;
if (!(flags & UAP_UNROLL_ALL))
{
if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
+ if (targetm.loop_unroll_adjust)
+ nunroll = targetm.loop_unroll_adjust (nunroll, loop);
+
/* Skip big loops. */
if (nunroll <= 1)
{
}
/* Do not unroll loops with branches inside -- it increases number
- of mispredicts. */
+ of mispredicts.
+ TODO: this heuristic needs tunning; call inside the loop body
+ is also relatively good reason to not unroll. */
if (num_loop_branches (loop) > 1)
{
if (dump_file)
return;
}
- /* If we have profile feedback, check whether the loop rolls. */
- if (loop->header->count
- && expected_loop_iterations (loop) < 2 * nunroll)
+ /* Check whether the loop rolls. */
+ if ((estimated_loop_iterations (loop, &iterations)
+ || max_loop_iterations (loop, &iterations))
+ && iterations.ult (double_int::from_shwi (2 * nunroll)))
{
if (dump_file)
fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
loop->lpt_decision.times = i - 1;
if (dump_file)
- fprintf (dump_file,
- ";; Decided to unroll the loop stupidly, %d times.\n",
+ fprintf (dump_file, ";; Decided to unroll the loop stupidly %d times.\n",
loop->lpt_decision.times);
}
-/* Unroll a LOOP LOOP->LPT_DECISION.TIMES times. The transformation:
+/* Unroll a LOOP LOOP->LPT_DECISION.TIMES times. The transformation does this:
+
while (cond)
body;
- ==>
+ ==> (LOOP->LPT_DECISION.TIMES == 3)
while (cond)
{
wont_exit = sbitmap_alloc (nunroll + 1);
- sbitmap_zero (wont_exit);
+ bitmap_clear (wont_exit);
opt_info_start_duplication (opt_info);
ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
static struct var_to_expand *
analyze_insn_to_expand_var (struct loop *loop, rtx insn)
{
- rtx set, dest, src, op1, op2, something;
+ rtx set, dest, src;
struct var_to_expand *ves;
- enum machine_mode mode1, mode2;
unsigned accum_pos;
+ enum rtx_code code;
int debug_uses = 0;
set = single_set (insn);
dest = SET_DEST (set);
src = SET_SRC (set);
+ code = GET_CODE (src);
- if (GET_CODE (src) != PLUS
- && GET_CODE (src) != MINUS
- && GET_CODE (src) != MULT)
+ if (code != PLUS && code != MINUS && code != MULT && code != FMA)
return NULL;
+ if (FLOAT_MODE_P (GET_MODE (dest)))
+ {
+ if (!flag_associative_math)
+ return NULL;
+ /* In the case of FMA, we're also changing the rounding. */
+ if (code == FMA && !flag_unsafe_math_optimizations)
+ return NULL;
+ }
+
/* Hmm, this is a bit paradoxical. We know that INSN is a valid insn
in MD. But if there is no optab to generate the insn, we can not
perform the variable expansion. This can happen if an MD provides
So we check have_insn_for which looks for an optab for the operation
in SRC. If it doesn't exist, we can't perform the expansion even
though INSN is valid. */
- if (!have_insn_for (GET_CODE (src), GET_MODE (src)))
+ if (!have_insn_for (code, GET_MODE (src)))
return NULL;
- op1 = XEXP (src, 0);
- op2 = XEXP (src, 1);
-
if (!REG_P (dest)
&& !(GET_CODE (dest) == SUBREG
&& REG_P (SUBREG_REG (dest))))
return NULL;
- if (rtx_equal_p (dest, op1))
+ /* Find the accumulator use within the operation. */
+ if (code == FMA)
+ {
+ /* We only support accumulation via FMA in the ADD position. */
+ if (!rtx_equal_p (dest, XEXP (src, 2)))
+ return NULL;
+ accum_pos = 2;
+ }
+ else if (rtx_equal_p (dest, XEXP (src, 0)))
accum_pos = 0;
- else if (rtx_equal_p (dest, op2))
- accum_pos = 1;
+ else if (rtx_equal_p (dest, XEXP (src, 1)))
+ {
+ /* The method of expansion that we are using; which includes the
+ initialization of the expansions with zero and the summation of
+ the expansions at the end of the computation will yield wrong
+ results for (x = something - x) thus avoid using it in that case. */
+ if (code == MINUS)
+ return NULL;
+ accum_pos = 1;
+ }
else
return NULL;
- /* The method of expansion that we are using; which includes
- the initialization of the expansions with zero and the summation of
- the expansions at the end of the computation will yield wrong results
- for (x = something - x) thus avoid using it in that case. */
- if (accum_pos == 1
- && GET_CODE (src) == MINUS)
- return NULL;
-
- something = (accum_pos == 0) ? op2 : op1;
-
- if (rtx_referenced_p (dest, something))
+ /* It must not otherwise be used. */
+ if (code == FMA)
+ {
+ if (rtx_referenced_p (dest, XEXP (src, 0))
+ || rtx_referenced_p (dest, XEXP (src, 1)))
+ return NULL;
+ }
+ else if (rtx_referenced_p (dest, XEXP (src, 1 - accum_pos)))
return NULL;
+ /* It must be used in exactly one insn. */
if (!referenced_in_one_insn_in_loop_p (loop, dest, &debug_uses))
return NULL;
- mode1 = GET_MODE (dest);
- mode2 = GET_MODE (something);
- if ((FLOAT_MODE_P (mode1)
- || FLOAT_MODE_P (mode2))
- && !flag_associative_math)
- return NULL;
-
if (dump_file)
- {
- fprintf (dump_file,
- "\n;; Expanding Accumulator ");
- print_rtl (dump_file, dest);
- fprintf (dump_file, "\n");
- }
+ {
+ fprintf (dump_file, "\n;; Expanding Accumulator ");
+ print_rtl (dump_file, dest);
+ fprintf (dump_file, "\n");
+ }
if (debug_uses)
/* Instead of resetting the debug insns, we could replace each
ves = XNEW (struct var_to_expand);
ves->insn = insn;
ves->reg = copy_rtx (dest);
- ves->var_expansions = VEC_alloc (rtx, heap, 1);
+ ves->var_expansions.create (1);
ves->next = NULL;
ves->op = GET_CODE (src);
ves->expansion_count = 0;
ves->reuse_expansion = 0;
- ves->accum_pos = accum_pos;
return ves;
}
/* Record the insn to split. */
ivts = XNEW (struct iv_to_split);
ivts->insn = insn;
+ ivts->orig_var = dest;
ivts->base_var = NULL_RTX;
ivts->step = iv.step;
ivts->next = NULL;
struct var_to_expand *ves = NULL;
PTR *slot1;
PTR *slot2;
- VEC (edge, heap) *edges = get_loop_exit_edges (loop);
+ vec<edge> edges = get_loop_exit_edges (loop);
edge exit;
bool can_apply = false;
/* Record the loop exit bb and loop preheader before the unrolling. */
opt_info->loop_preheader = loop_preheader_edge (loop)->src;
- if (VEC_length (edge, edges) == 1)
+ if (edges.length () == 1)
{
- exit = VEC_index (edge, edges, 0);
+ exit = edges[0];
if (!(exit->flags & EDGE_COMPLEX))
{
opt_info->loop_exit = split_edge (exit);
}
}
- VEC_free (edge, heap, edges);
+ edges.release ();
free (body);
return opt_info;
}
if (ve->reuse_expansion == 0)
reg = ve->reg;
else
- reg = VEC_index (rtx, ve->var_expansions, ve->reuse_expansion - 1);
+ reg = ve->var_expansions[ve->reuse_expansion - 1];
- if (VEC_length (rtx, ve->var_expansions) == (unsigned) ve->reuse_expansion)
+ if (ve->var_expansions.length () == (unsigned) ve->reuse_expansion)
ve->reuse_expansion = 0;
else
ve->reuse_expansion++;
else
new_reg = get_expansion (ve);
- validate_change (insn, &SET_DEST (set), new_reg, 1);
- validate_change (insn, &XEXP (SET_SRC (set), ve->accum_pos), new_reg, 1);
-
+ validate_replace_rtx_group (SET_DEST (set), new_reg, insn);
if (apply_change_group ())
if (really_new_expansion)
{
- VEC_safe_push (rtx, heap, ve->var_expansions, new_reg);
+ ve->var_expansions.safe_push (new_reg);
ve->expansion_count++;
}
}
insert_var_expansion_initialization (struct var_to_expand *ve,
basic_block place)
{
- rtx seq, var, zero_init, insn;
+ rtx seq, var, zero_init;
unsigned i;
enum machine_mode mode = GET_MODE (ve->reg);
bool honor_signed_zero_p = HONOR_SIGNED_ZEROS (mode);
- if (VEC_length (rtx, ve->var_expansions) == 0)
+ if (ve->var_expansions.length () == 0)
return;
start_sequence ();
- if (ve->op == PLUS || ve->op == MINUS)
- for (i = 0; VEC_iterate (rtx, ve->var_expansions, i, var); i++)
- {
- if (honor_signed_zero_p)
- zero_init = simplify_gen_unary (NEG, mode, CONST0_RTX (mode), mode);
- else
- zero_init = CONST0_RTX (mode);
+ switch (ve->op)
+ {
+ case FMA:
+ /* Note that we only accumulate FMA via the ADD operand. */
+ case PLUS:
+ case MINUS:
+ FOR_EACH_VEC_ELT (ve->var_expansions, i, var)
+ {
+ if (honor_signed_zero_p)
+ zero_init = simplify_gen_unary (NEG, mode, CONST0_RTX (mode), mode);
+ else
+ zero_init = CONST0_RTX (mode);
+ emit_move_insn (var, zero_init);
+ }
+ break;
- emit_move_insn (var, zero_init);
- }
- else if (ve->op == MULT)
- for (i = 0; VEC_iterate (rtx, ve->var_expansions, i, var); i++)
- {
- zero_init = CONST1_RTX (GET_MODE (var));
- emit_move_insn (var, zero_init);
- }
+ case MULT:
+ FOR_EACH_VEC_ELT (ve->var_expansions, i, var)
+ {
+ zero_init = CONST1_RTX (GET_MODE (var));
+ emit_move_insn (var, zero_init);
+ }
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
seq = get_insns ();
end_sequence ();
- insn = BB_HEAD (place);
- while (!NOTE_INSN_BASIC_BLOCK_P (insn))
- insn = NEXT_INSN (insn);
-
- emit_insn_after (seq, insn);
+ emit_insn_after (seq, BB_END (place));
}
/* Combine the variable expansions at the loop exit. PLACE is the
rtx expr, seq, var, insn;
unsigned i;
- if (VEC_length (rtx, ve->var_expansions) == 0)
+ if (ve->var_expansions.length () == 0)
return;
start_sequence ();
- if (ve->op == PLUS || ve->op == MINUS)
- for (i = 0; VEC_iterate (rtx, ve->var_expansions, i, var); i++)
- {
- sum = simplify_gen_binary (PLUS, GET_MODE (ve->reg),
- var, sum);
- }
- else if (ve->op == MULT)
- for (i = 0; VEC_iterate (rtx, ve->var_expansions, i, var); i++)
- {
- sum = simplify_gen_binary (MULT, GET_MODE (ve->reg),
- var, sum);
- }
+ switch (ve->op)
+ {
+ case FMA:
+ /* Note that we only accumulate FMA via the ADD operand. */
+ case PLUS:
+ case MINUS:
+ FOR_EACH_VEC_ELT (ve->var_expansions, i, var)
+ sum = simplify_gen_binary (PLUS, GET_MODE (ve->reg), var, sum);
+ break;
+
+ case MULT:
+ FOR_EACH_VEC_ELT (ve->var_expansions, i, var)
+ sum = simplify_gen_binary (MULT, GET_MODE (ve->reg), var, sum);
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
expr = force_operand (sum, ve->reg);
if (expr != ve->reg)
emit_insn_after (seq, insn);
}
+/* Strip away REG_EQUAL notes for IVs we're splitting.
+
+ Updating REG_EQUAL notes for IVs we split is tricky: We
+ cannot tell until after unrolling, DF-rescanning, and liveness
+ updating, whether an EQ_USE is reached by the split IV while
+ the IV reg is still live. See PR55006.
+
+ ??? We cannot use remove_reg_equal_equiv_notes_for_regno,
+ because RTL loop-iv requires us to defer rescanning insns and
+ any notes attached to them. So resort to old techniques... */
+
+static void
+maybe_strip_eq_note_for_split_iv (struct opt_info *opt_info, rtx insn)
+{
+ struct iv_to_split *ivts;
+ rtx note = find_reg_equal_equiv_note (insn);
+ if (! note)
+ return;
+ for (ivts = opt_info->iv_to_split_head; ivts; ivts = ivts->next)
+ if (reg_mentioned_p (ivts->orig_var, note))
+ {
+ remove_note (insn, note);
+ return;
+ }
+}
+
/* Apply loop optimizations in loop copies using the
data which gathered during the unrolling. Structure
OPT_INFO record that data.
unrolling);
bb->aux = 0;
orig_insn = BB_HEAD (orig_bb);
- for (insn = BB_HEAD (bb); insn != NEXT_INSN (BB_END (bb)); insn = next)
+ FOR_BB_INSNS_SAFE (bb, insn, next)
{
- next = NEXT_INSN (insn);
- if (!INSN_P (insn))
+ if (!INSN_P (insn)
+ || (DEBUG_INSN_P (insn)
+ && TREE_CODE (INSN_VAR_LOCATION_DECL (insn)) == LABEL_DECL))
continue;
- while (!INSN_P (orig_insn))
+ while (!INSN_P (orig_insn)
+ || (DEBUG_INSN_P (orig_insn)
+ && (TREE_CODE (INSN_VAR_LOCATION_DECL (orig_insn))
+ == LABEL_DECL)))
orig_insn = NEXT_INSN (orig_insn);
ivts_templ.insn = orig_insn;
/* Apply splitting iv optimization. */
if (opt_info->insns_to_split)
{
+ maybe_strip_eq_note_for_split_iv (opt_info, insn);
+
ivts = (struct iv_to_split *)
htab_find (opt_info->insns_to_split, &ivts_templ);
ivts_templ.insn = orig_insn;
if (opt_info->insns_to_split)
{
+ maybe_strip_eq_note_for_split_iv (opt_info, orig_insn);
+
ivts = (struct iv_to_split *)
htab_find (opt_info->insns_to_split, &ivts_templ);
if (ivts)
struct var_to_expand *ves;
for (ves = opt_info->var_to_expand_head; ves; ves = ves->next)
- VEC_free (rtx, heap, ves->var_expansions);
+ ves->var_expansions.release ();
htab_delete (opt_info->insns_with_var_to_expand);
}
free (opt_info);