#include "coretypes.h"
#include "dumpfile.h"
#include "tm.h"
-#include "ggc.h"
#include "tree.h"
+#include "stor-layout.h"
#include "basic-block.h"
#include "gimple-pretty-print.h"
-#include "tree-flow.h"
+#include "tree-ssa-alias.h"
+#include "internal-fn.h"
+#include "gimple-expr.h"
+#include "is-a.h"
+#include "gimple.h"
+#include "gimplify.h"
+#include "gimple-iterator.h"
+#include "gimplify-me.h"
+#include "gimple-ssa.h"
+#include "tree-phinodes.h"
+#include "ssa-iterators.h"
+#include "stringpool.h"
+#include "tree-ssanames.h"
+#include "tree-ssa-loop-ivopts.h"
+#include "tree-ssa-loop-manip.h"
+#include "tree-ssa-loop-niter.h"
#include "tree-pass.h"
#include "cfgloop.h"
#include "expr.h"
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
- "=== vect_determine_vectorization_factor ===");
+ "=== vect_determine_vectorization_factor ===\n");
for (i = 0; i < nbbs; i++)
{
{
dump_printf_loc (MSG_NOTE, vect_location, "==> examining phi: ");
dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
+ dump_printf (MSG_NOTE, "\n");
}
gcc_assert (stmt_info);
dump_printf_loc (MSG_NOTE, vect_location,
"get vectype for scalar type: ");
dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
+ dump_printf (MSG_NOTE, "\n");
}
vectype = get_vectype_for_scalar_type (scalar_type);
"data-type ");
dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
scalar_type);
+ dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
}
return false;
}
{
dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
+ dump_printf (MSG_NOTE, "\n");
}
nunits = TYPE_VECTOR_SUBPARTS (vectype);
if (dump_enabled_p ())
- dump_printf_loc (MSG_NOTE, vect_location, "nunits = %d", nunits);
+ dump_printf_loc (MSG_NOTE, vect_location, "nunits = %d\n",
+ nunits);
if (!vectorization_factor
|| (nunits > vectorization_factor))
dump_printf_loc (MSG_NOTE, vect_location,
"==> examining statement: ");
dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
+ dump_printf (MSG_NOTE, "\n");
}
gcc_assert (stmt_info);
/* Skip stmts which do not need to be vectorized. */
- if (!STMT_VINFO_RELEVANT_P (stmt_info)
- && !STMT_VINFO_LIVE_P (stmt_info))
+ if ((!STMT_VINFO_RELEVANT_P (stmt_info)
+ && !STMT_VINFO_LIVE_P (stmt_info))
+ || gimple_clobber_p (stmt))
{
if (STMT_VINFO_IN_PATTERN_P (stmt_info)
&& (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info))
dump_printf_loc (MSG_NOTE, vect_location,
"==> examining pattern statement: ");
dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
+ dump_printf (MSG_NOTE, "\n");
}
}
else
{
if (dump_enabled_p ())
- dump_printf_loc (MSG_NOTE, vect_location, "skip.");
+ dump_printf_loc (MSG_NOTE, vect_location, "skip.\n");
gsi_next (&si);
continue;
}
"==> examining pattern def stmt: ");
dump_gimple_stmt (MSG_NOTE, TDF_SLIM,
pattern_def_stmt, 0);
+ dump_printf (MSG_NOTE, "\n");
}
stmt = pattern_def_stmt;
if (gimple_get_lhs (stmt) == NULL_TREE)
{
+ if (is_gimple_call (stmt))
+ {
+ /* Ignore calls with no lhs. These must be calls to
+ #pragma omp simd functions, and what vectorization factor
+ it really needs can't be determined until
+ vectorizable_simd_clone_call. */
+ if (!analyze_pattern_stmt && gsi_end_p (pattern_def_si))
+ {
+ pattern_def_seq = NULL;
+ gsi_next (&si);
+ }
+ continue;
+ }
if (dump_enabled_p ())
{
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"not vectorized: irregular stmt.");
dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt,
0);
+ dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
}
return false;
}
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"not vectorized: vector stmt in loop:");
dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
+ dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
}
return false;
}
dump_printf_loc (MSG_NOTE, vect_location,
"get vectype for scalar type: ");
dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
+ dump_printf (MSG_NOTE, "\n");
}
vectype = get_vectype_for_scalar_type (scalar_type);
if (!vectype)
"data-type ");
dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
scalar_type);
+ dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
}
return false;
}
STMT_VINFO_VECTYPE (stmt_info) = vectype;
+
+ if (dump_enabled_p ())
+ {
+ dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
+ dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
+ dump_printf (MSG_NOTE, "\n");
+ }
}
/* The vectorization factor is according to the smallest
dump_printf_loc (MSG_NOTE, vect_location,
"get vectype for scalar type: ");
dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
+ dump_printf (MSG_NOTE, "\n");
}
vf_vectype = get_vectype_for_scalar_type (scalar_type);
if (!vf_vectype)
"not vectorized: unsupported data-type ");
dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
scalar_type);
+ dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
}
return false;
}
dump_printf (MSG_MISSED_OPTIMIZATION, " and ");
dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
vf_vectype);
+ dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
}
return false;
}
{
dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
dump_generic_expr (MSG_NOTE, TDF_SLIM, vf_vectype);
+ dump_printf (MSG_NOTE, "\n");
}
nunits = TYPE_VECTOR_SUBPARTS (vf_vectype);
if (dump_enabled_p ())
- dump_printf_loc (MSG_NOTE, vect_location, "nunits = %d", nunits);
+ dump_printf_loc (MSG_NOTE, vect_location, "nunits = %d\n", nunits);
if (!vectorization_factor
|| (nunits > vectorization_factor))
vectorization_factor = nunits;
/* TODO: Analyze cost. Decide if worth while to vectorize. */
if (dump_enabled_p ())
- dump_printf_loc (MSG_NOTE, vect_location, "vectorization factor = %d",
+ dump_printf_loc (MSG_NOTE, vect_location, "vectorization factor = %d\n",
vectorization_factor);
if (vectorization_factor <= 1)
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "not vectorized: unsupported data-type");
+ "not vectorized: unsupported data-type\n");
return false;
}
LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
/* Function vect_is_simple_iv_evolution.
FORNOW: A simple evolution of an induction variables in the loop is
- considered a polynomial evolution with constant step. */
+ considered a polynomial evolution. */
static bool
vect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, tree * init,
tree init_expr;
tree step_expr;
tree evolution_part = evolution_part_in_loop_num (access_fn, loop_nb);
+ basic_block bb;
/* When there is no evolution in this loop, the evolution function
is not "simple". */
dump_generic_expr (MSG_NOTE, TDF_SLIM, step_expr);
dump_printf (MSG_NOTE, ", init: ");
dump_generic_expr (MSG_NOTE, TDF_SLIM, init_expr);
+ dump_printf (MSG_NOTE, "\n");
}
*init = init_expr;
*step = step_expr;
- if (TREE_CODE (step_expr) != INTEGER_CST)
+ if (TREE_CODE (step_expr) != INTEGER_CST
+ && (TREE_CODE (step_expr) != SSA_NAME
+ || ((bb = gimple_bb (SSA_NAME_DEF_STMT (step_expr)))
+ && flow_bb_inside_loop_p (get_loop (cfun, loop_nb), bb))
+ || (!INTEGRAL_TYPE_P (TREE_TYPE (step_expr))
+ && (!SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr))
+ || !flag_associative_math)))
+ && (TREE_CODE (step_expr) != REAL_CST
+ || !flag_associative_math))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "step unknown.");
+ "step unknown.\n");
return false;
}
vect_analyze_scalar_cycles_1 (loop_vec_info loop_vinfo, struct loop *loop)
{
basic_block bb = loop->header;
- tree dumy;
- vec<gimple> worklist;
- worklist.create (64);
+ tree init, step;
+ stack_vec<gimple, 64> worklist;
gimple_stmt_iterator gsi;
bool double_reduc;
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
- "=== vect_analyze_scalar_cycles ===");
+ "=== vect_analyze_scalar_cycles ===\n");
/* First - identify all inductions. Reduction detection assumes that all the
inductions have been identified, therefore, this order must not be
{
dump_printf_loc (MSG_NOTE, vect_location, "Analyze phi: ");
dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
+ dump_printf (MSG_NOTE, "\n");
}
/* Skip virtual phi's. The data dependences that are associated with
dump_printf_loc (MSG_NOTE, vect_location,
"Access function of PHI: ");
dump_generic_expr (MSG_NOTE, TDF_SLIM, access_fn);
+ dump_printf (MSG_NOTE, "\n");
}
STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_vinfo)
= evolution_part_in_loop_num (access_fn, loop->num);
}
if (!access_fn
- || !vect_is_simple_iv_evolution (loop->num, access_fn, &dumy, &dumy))
+ || !vect_is_simple_iv_evolution (loop->num, access_fn, &init, &step)
+ || (LOOP_VINFO_LOOP (loop_vinfo) != loop
+ && TREE_CODE (step) != INTEGER_CST))
{
worklist.safe_push (phi);
continue;
gcc_assert (STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_vinfo) != NULL_TREE);
if (dump_enabled_p ())
- dump_printf_loc (MSG_NOTE, vect_location, "Detected induction.");
+ dump_printf_loc (MSG_NOTE, vect_location, "Detected induction.\n");
STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_induction_def;
}
{
dump_printf_loc (MSG_NOTE, vect_location, "Analyze phi: ");
dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
+ dump_printf (MSG_NOTE, "\n");
}
gcc_assert (!virtual_operand_p (def)
{
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
- "Detected double reduction.");
+ "Detected double reduction.\n");
STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_double_reduction_def;
STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
{
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
- "Detected vectorizable nested cycle.");
+ "Detected vectorizable nested cycle.\n");
STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_nested_cycle;
STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
{
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
- "Detected reduction.");
+ "Detected reduction.\n");
STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_reduction_def;
STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
else
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "Unknown def-use cycle pattern.");
+ "Unknown def-use cycle pattern.\n");
}
-
- worklist.release ();
}
vect_analyze_scalar_cycles_1 (loop_vinfo, loop->inner);
}
+
/* Function vect_get_loop_niters.
- Determine how many iterations the loop is executed.
- If an expression that represents the number of iterations
- can be constructed, place it in NUMBER_OF_ITERATIONS.
+ Determine how many iterations the loop is executed and place it
+ in NUMBER_OF_ITERATIONS. Place the number of latch iterations
+ in NUMBER_OF_ITERATIONSM1.
+
Return the loop exit condition. */
static gimple
-vect_get_loop_niters (struct loop *loop, tree *number_of_iterations)
+vect_get_loop_niters (struct loop *loop, tree *number_of_iterations,
+ tree *number_of_iterationsm1)
{
tree niters;
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
- "=== get_loop_niters ===");
- niters = number_of_exit_cond_executions (loop);
+ "=== get_loop_niters ===\n");
- if (niters != NULL_TREE
- && niters != chrec_dont_know)
- {
- *number_of_iterations = niters;
+ niters = number_of_latch_executions (loop);
+ *number_of_iterationsm1 = niters;
- if (dump_enabled_p ())
- {
- dump_printf_loc (MSG_NOTE, vect_location, "==> get_loop_niters:");
- dump_generic_expr (MSG_NOTE, TDF_SLIM, *number_of_iterations);
- }
- }
+ /* We want the number of loop header executions which is the number
+ of latch executions plus one.
+ ??? For UINT_MAX latch executions this number overflows to zero
+ for loops like do { n++; } while (n != 0); */
+ if (niters && !chrec_contains_undetermined (niters))
+ niters = fold_build2 (PLUS_EXPR, TREE_TYPE (niters), unshare_expr (niters),
+ build_int_cst (TREE_TYPE (niters), 1));
+ *number_of_iterations = niters;
return get_loop_exit_condition (loop);
}
gcc_assert (nbbs == loop->num_nodes);
LOOP_VINFO_BBS (res) = bbs;
+ LOOP_VINFO_NITERSM1 (res) = NULL;
LOOP_VINFO_NITERS (res) = NULL;
LOOP_VINFO_NITERS_UNCHANGED (res) = NULL;
LOOP_VINFO_COST_MODEL_MIN_ITERS (res) = 0;
LOOP_VINFO_VECTORIZABLE_P (res) = 0;
- LOOP_PEELING_FOR_ALIGNMENT (res) = 0;
+ LOOP_VINFO_PEELING_FOR_ALIGNMENT (res) = 0;
LOOP_VINFO_VECT_FACTOR (res) = 0;
LOOP_VINFO_LOOP_NEST (res).create (3);
LOOP_VINFO_DATAREFS (res).create (10);
LOOP_VINFO_REDUCTION_CHAINS (res).create (10);
LOOP_VINFO_SLP_INSTANCES (res).create (10);
LOOP_VINFO_SLP_UNROLLING_FACTOR (res) = 1;
- LOOP_VINFO_PEELING_HTAB (res) = NULL;
LOOP_VINFO_TARGET_COST_DATA (res) = init_cost (loop);
LOOP_VINFO_PEELING_FOR_GAPS (res) = false;
+ LOOP_VINFO_PEELING_FOR_NITER (res) = false;
LOOP_VINFO_OPERANDS_SWAPPED (res) = false;
return res;
|| code == POINTER_PLUS_EXPR
|| code == MULT_EXPR)
&& CONSTANT_CLASS_P (gimple_assign_rhs1 (stmt)))
- swap_tree_operands (stmt,
- gimple_assign_rhs1_ptr (stmt),
- gimple_assign_rhs2_ptr (stmt));
+ swap_ssa_operands (stmt,
+ gimple_assign_rhs1_ptr (stmt),
+ gimple_assign_rhs2_ptr (stmt));
}
/* Free stmt_vec_info. */
}
free (LOOP_VINFO_BBS (loop_vinfo));
- free_data_refs (LOOP_VINFO_DATAREFS (loop_vinfo));
+ vect_destroy_datarefs (loop_vinfo, NULL);
free_dependence_relations (LOOP_VINFO_DDRS (loop_vinfo));
LOOP_VINFO_LOOP_NEST (loop_vinfo).release ();
LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).release ();
LOOP_VINFO_REDUCTIONS (loop_vinfo).release ();
LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo).release ();
- if (LOOP_VINFO_PEELING_HTAB (loop_vinfo))
- htab_delete (LOOP_VINFO_PEELING_HTAB (loop_vinfo));
+ if (LOOP_VINFO_PEELING_HTAB (loop_vinfo).is_created ())
+ LOOP_VINFO_PEELING_HTAB (loop_vinfo).dispose ();
destroy_cost_data (LOOP_VINFO_TARGET_COST_DATA (loop_vinfo));
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
- "===== analyze_loop_nest_1 =====");
+ "===== analyze_loop_nest_1 =====\n");
/* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "bad inner-loop form.");
+ "bad inner-loop form.\n");
return NULL;
}
{
loop_vec_info loop_vinfo;
gimple loop_cond;
- tree number_of_iterations = NULL;
+ tree number_of_iterations = NULL, number_of_iterationsm1 = NULL;
loop_vec_info inner_loop_vinfo = NULL;
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
- "=== vect_analyze_loop_form ===");
+ "=== vect_analyze_loop_form ===\n");
/* Different restrictions apply when we are considering an inner-most loop,
vs. an outer (nested) loop.
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "not vectorized: control flow in loop.");
+ "not vectorized: control flow in loop.\n");
return NULL;
}
if (empty_block_p (loop->header))
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "not vectorized: empty loop.");
- return NULL;
- }
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "not vectorized: empty loop.\n");
+ return NULL;
+ }
}
else
{
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "not vectorized: multiple nested loops.");
+ "not vectorized: multiple nested loops.\n");
return NULL;
}
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "not vectorized: Bad inner loop.");
+ "not vectorized: Bad inner loop.\n");
return NULL;
}
LOOP_VINFO_NITERS (inner_loop_vinfo)))
{
if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "not vectorized: inner-loop count not invariant.");
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "not vectorized: inner-loop count not"
+ " invariant.\n");
destroy_loop_vec_info (inner_loop_vinfo, true);
return NULL;
}
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "not vectorized: control flow in loop.");
+ "not vectorized: control flow in loop.\n");
destroy_loop_vec_info (inner_loop_vinfo, true);
return NULL;
}
|| single_exit (innerloop)->dest != EDGE_PRED (loop->latch, 0)->src)
{
if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "not vectorized: unsupported outerloop form.");
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "not vectorized: unsupported outerloop form.\n");
destroy_loop_vec_info (inner_loop_vinfo, true);
return NULL;
}
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
- "Considering outer-loop vectorization.");
+ "Considering outer-loop vectorization.\n");
}
if (!single_exit (loop)
{
if (!single_exit (loop))
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "not vectorized: multiple exits.");
+ "not vectorized: multiple exits.\n");
else if (EDGE_COUNT (loop->header->preds) != 2)
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "not vectorized: too many incoming edges.");
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "not vectorized: too many incoming edges.\n");
}
if (inner_loop_vinfo)
destroy_loop_vec_info (inner_loop_vinfo, true);
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "not vectorized: latch block not empty.");
+ "not vectorized: latch block not empty.\n");
if (inner_loop_vinfo)
destroy_loop_vec_info (inner_loop_vinfo, true);
return NULL;
{
split_loop_exit_edge (e);
if (dump_enabled_p ())
- dump_printf (MSG_NOTE, "split exit edge.");
+ dump_printf (MSG_NOTE, "split exit edge.\n");
}
else
{
if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "not vectorized: abnormal loop exit edge.");
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "not vectorized: abnormal loop exit edge.\n");
if (inner_loop_vinfo)
destroy_loop_vec_info (inner_loop_vinfo, true);
return NULL;
}
}
- loop_cond = vect_get_loop_niters (loop, &number_of_iterations);
+ loop_cond = vect_get_loop_niters (loop, &number_of_iterations,
+ &number_of_iterationsm1);
if (!loop_cond)
{
if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "not vectorized: complicated exit condition.");
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "not vectorized: complicated exit condition.\n");
if (inner_loop_vinfo)
destroy_loop_vec_info (inner_loop_vinfo, true);
return NULL;
}
- if (!number_of_iterations)
+ if (!number_of_iterations
+ || chrec_contains_undetermined (number_of_iterations))
{
if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"not vectorized: number of iterations cannot be "
- "computed.");
+ "computed.\n");
if (inner_loop_vinfo)
destroy_loop_vec_info (inner_loop_vinfo, true);
return NULL;
}
- if (chrec_contains_undetermined (number_of_iterations))
+ if (integer_zerop (number_of_iterations))
{
if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "Infinite number of iterations.");
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "not vectorized: number of iterations = 0.\n");
if (inner_loop_vinfo)
- destroy_loop_vec_info (inner_loop_vinfo, true);
+ destroy_loop_vec_info (inner_loop_vinfo, true);
return NULL;
}
- if (!NITERS_KNOWN_P (number_of_iterations))
+ loop_vinfo = new_loop_vec_info (loop);
+ LOOP_VINFO_NITERSM1 (loop_vinfo) = number_of_iterationsm1;
+ LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
+ LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo) = number_of_iterations;
+
+ if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
{
if (dump_enabled_p ())
{
dump_printf_loc (MSG_NOTE, vect_location,
"Symbolic number of iterations is ");
dump_generic_expr (MSG_NOTE, TDF_DETAILS, number_of_iterations);
+ dump_printf (MSG_NOTE, "\n");
}
}
- else if (TREE_INT_CST_LOW (number_of_iterations) == 0)
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "not vectorized: number of iterations = 0.");
- if (inner_loop_vinfo)
- destroy_loop_vec_info (inner_loop_vinfo, true);
- return NULL;
- }
-
- loop_vinfo = new_loop_vec_info (loop);
- LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
- LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo) = number_of_iterations;
STMT_VINFO_TYPE (vinfo_for_stmt (loop_cond)) = loop_exit_ctrl_vec_info_type;
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
- "=== vect_analyze_loop_operations ===");
+ "=== vect_analyze_loop_operations ===\n");
gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
- "Updating vectorization factor to %d ",
+ "Updating vectorization factor to %d\n",
vectorization_factor);
}
{
dump_printf_loc (MSG_NOTE, vect_location, "examining phi: ");
dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
+ dump_printf (MSG_NOTE, "\n");
}
/* Inner-loop loop-closed exit phi in outer-loop vectorization
!= vect_double_reduction_def)
{
if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"Unsupported loop-closed phi in "
- "outer-loop.");
+ "outer-loop.\n");
return false;
}
return false;
op_def_stmt = SSA_NAME_DEF_STMT (phi_op);
- if (!op_def_stmt
+ if (gimple_nop_p (op_def_stmt)
|| !flow_bb_inside_loop_p (loop, gimple_bb (op_def_stmt))
|| !vinfo_for_stmt (op_def_stmt))
return false;
/* FORNOW: not yet supported. */
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "not vectorized: value used after loop.");
+ "not vectorized: value used after loop.\n");
return false;
}
{
/* A scalar-dependence cycle that we don't support. */
if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "not vectorized: scalar dependence cycle.");
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "not vectorized: scalar dependence cycle.\n");
return false;
}
{
if (dump_enabled_p ())
{
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"not vectorized: relevant phi not "
"supported: ");
dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, phi, 0);
+ dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
}
return false;
}
for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
{
gimple stmt = gsi_stmt (si);
- if (!vect_analyze_stmt (stmt, &need_to_vectorize, NULL))
+ if (!gimple_clobber_p (stmt)
+ && !vect_analyze_stmt (stmt, &need_to_vectorize, NULL))
return false;
}
} /* bbs */
{
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
- "All the computation can be taken out of the loop.");
+ "All the computation can be taken out of the loop.\n");
if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"not vectorized: redundant loop. no profit to "
- "vectorize.");
+ "vectorize.\n");
return false;
}
if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) && dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
"vectorization_factor = %d, niters = "
- HOST_WIDE_INT_PRINT_DEC, vectorization_factor,
+ HOST_WIDE_INT_PRINT_DEC "\n", vectorization_factor,
LOOP_VINFO_INT_NITERS (loop_vinfo));
if ((LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "not vectorized: iteration count too small.");
+ "not vectorized: iteration count too small.\n");
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"not vectorized: iteration count smaller than "
- "vectorization factor.");
+ "vectorization factor.\n");
return false;
}
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "not vectorized: vectorization not profitable.");
+ "not vectorized: vectorization not profitable.\n");
if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"not vectorized: vector version will never be "
- "profitable.");
+ "profitable.\n");
return false;
}
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "not vectorized: vectorization not profitable.");
+ "not vectorized: vectorization not profitable.\n");
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
"not vectorized: iteration count smaller than user "
"specified loop bound parameter or minimum profitable "
- "iterations (whichever is more conservative).");
+ "iterations (whichever is more conservative).\n");
return false;
}
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"not vectorized: estimated iteration count too "
- "small.");
+ "small.\n");
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
"not vectorized: estimated iteration count smaller "
"than specified loop bound parameter or minimum "
"profitable iterations (whichever is more "
- "conservative).");
+ "conservative).\n");
return false;
}
- if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
- || LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0
- || LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_NOTE, vect_location, "epilog loop required.");
- if (!vect_can_advance_ivs_p (loop_vinfo))
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "not vectorized: can't create epilog loop 1.");
- return false;
- }
- if (!slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "not vectorized: can't create epilog loop 2.");
- return false;
- }
- }
-
return true;
}
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "bad data references.");
+ "bad data references.\n");
+ return false;
+ }
+
+ /* Analyze the access patterns of the data-refs in the loop (consecutive,
+ complex, etc.). FORNOW: Only handle consecutive access pattern. */
+
+ ok = vect_analyze_data_ref_accesses (loop_vinfo, NULL);
+ if (!ok)
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "bad data access.\n");
return false;
}
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "unexpected pattern.");
+ "unexpected pattern.\n");
return false;
}
the dependences.
FORNOW: fail at the first data dependence that we encounter. */
- ok = vect_analyze_data_ref_dependences (loop_vinfo, NULL, &max_vf);
+ ok = vect_analyze_data_ref_dependences (loop_vinfo, &max_vf);
if (!ok
|| max_vf < min_vf)
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "bad data dependence.");
+ "bad data dependence.\n");
return false;
}
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "can't determine vectorization factor.");
+ "can't determine vectorization factor.\n");
return false;
}
if (max_vf < LOOP_VINFO_VECT_FACTOR (loop_vinfo))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "bad data dependence.");
+ "bad data dependence.\n");
return false;
}
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "bad data alignment.");
- return false;
- }
-
- /* Analyze the access patterns of the data-refs in the loop (consecutive,
- complex, etc.). FORNOW: Only handle consecutive access pattern. */
-
- ok = vect_analyze_data_ref_accesses (loop_vinfo, NULL);
- if (!ok)
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "bad data access.");
+ "bad data alignment.\n");
return false;
}
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"too long list of versioning for alias "
- "run-time tests.");
+ "run-time tests.\n");
return false;
}
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "bad data alignment.");
+ "bad data alignment.\n");
return false;
}
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "bad operation or unsupported loop bound.");
+ "bad operation or unsupported loop bound.\n");
return false;
}
+ /* Decide whether we need to create an epilogue loop to handle
+ remaining scalar iterations. */
+ if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
+ && LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
+ {
+ if (ctz_hwi (LOOP_VINFO_INT_NITERS (loop_vinfo)
+ - LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo))
+ < exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo)))
+ LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = true;
+ }
+ else if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)
+ || (tree_ctz (LOOP_VINFO_NITERS (loop_vinfo))
+ < (unsigned)exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo))))
+ LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = true;
+
+ /* If an epilogue loop is required make sure we can create one. */
+ if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
+ || LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location, "epilog loop required\n");
+ if (!vect_can_advance_ivs_p (loop_vinfo)
+ || !slpeel_can_duplicate_loop_p (LOOP_VINFO_LOOP (loop_vinfo),
+ single_exit (LOOP_VINFO_LOOP
+ (loop_vinfo))))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "not vectorized: can't create required "
+ "epilog loop\n");
+ return false;
+ }
+ }
+
return true;
}
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
- "===== analyze_loop_nest =====");
+ "===== analyze_loop_nest =====\n");
if (loop_outer (loop)
&& loop_vec_info_for_loop (loop_outer (loop))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
- "outer-loop already vectorized.");
+ "outer-loop already vectorized.\n");
return NULL;
}
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "bad loop form.");
+ "bad loop form.\n");
return NULL;
}
{
dump_printf_loc (msg_type, vect_location, "%s", msg);
dump_gimple_stmt (msg_type, TDF_SLIM, stmt, 0);
+ dump_printf (msg_type, "\n");
}
{
dump_printf_loc (MSG_NOTE, vect_location, "swapping oprnds: ");
dump_gimple_stmt (MSG_NOTE, TDF_SLIM, next_stmt, 0);
+ dump_printf (MSG_NOTE, "\n");
}
- swap_tree_operands (next_stmt,
- gimple_assign_rhs1_ptr (next_stmt),
- gimple_assign_rhs2_ptr (next_stmt));
+ swap_ssa_operands (next_stmt,
+ gimple_assign_rhs1_ptr (next_stmt),
+ gimple_assign_rhs2_ptr (next_stmt));
update_stmt (next_stmt);
if (CONSTANT_CLASS_P (gimple_assign_rhs1 (next_stmt)))
a3 = ...
a2 = operation (a3, a1)
+ or
+
+ a3 = ...
+ loop_header:
+ a1 = phi < a0, a2 >
+ a2 = operation (a3, a1)
+
such that:
1. operation is commutative and associative and it is safe to
change the order of the computation (if CHECK_REDUCTION is true)
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "intermediate value used outside loop.");
+ "intermediate value used outside loop.\n");
return NULL;
}
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "reduction used in loop.");
+ "reduction used in loop.\n");
return NULL;
}
}
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"reduction: not ssa_name: ");
dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, loop_arg);
+ dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
}
return NULL;
}
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "reduction: no def_stmt.");
+ "reduction: no def_stmt.\n");
return NULL;
}
if (!is_gimple_assign (def_stmt) && gimple_code (def_stmt) != GIMPLE_PHI)
{
if (dump_enabled_p ())
- dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
+ {
+ dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
+ dump_printf (MSG_NOTE, "\n");
+ }
return NULL;
}
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "reduction used in loop.");
+ "reduction used in loop.\n");
return NULL;
}
}
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "unsupported phi node definition.");
+ "unsupported phi node definition.\n");
return NULL;
}
dump_generic_expr (MSG_NOTE, TDF_SLIM,
TREE_TYPE (op4));
}
+ dump_printf (MSG_NOTE, "\n");
}
return NULL;
if (def2 && def2 == phi
&& (code == COND_EXPR
|| !def1 || gimple_nop_p (def1)
+ || !flow_bb_inside_loop_p (loop, gimple_bb (def1))
|| (def1 && flow_bb_inside_loop_p (loop, gimple_bb (def1))
&& (is_gimple_assign (def1)
|| is_gimple_call (def1)
if (def1 && def1 == phi
&& (code == COND_EXPR
|| !def2 || gimple_nop_p (def2)
+ || !flow_bb_inside_loop_p (loop, gimple_bb (def2))
|| (def2 && flow_bb_inside_loop_p (loop, gimple_bb (def2))
&& (is_gimple_assign (def2)
|| is_gimple_call (def2)
report_vect_op (MSG_NOTE, def_stmt,
"detected reduction: need to swap operands: ");
- swap_tree_operands (def_stmt, gimple_assign_rhs1_ptr (def_stmt),
- gimple_assign_rhs2_ptr (def_stmt));
+ swap_ssa_operands (def_stmt, gimple_assign_rhs1_ptr (def_stmt),
+ gimple_assign_rhs2_ptr (def_stmt));
if (CONSTANT_CLASS_P (gimple_assign_rhs1 (def_stmt)))
LOOP_VINFO_OPERANDS_SWAPPED (loop_info) = true;
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
"cost model: epilogue peel iters set to vf/2 "
- "because loop iterations are unknown .");
+ "because loop iterations are unknown .\n");
/* If peeled iterations are known but number of scalar loop
iterations are unknown, count a taken branch per peeled loop. */
int scalar_single_iter_cost = 0;
int scalar_outside_cost = 0;
int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
- int npeel = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo);
+ int npeel = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
void *target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
/* Cost model disabled. */
- if (!flag_vect_cost_model)
+ if (unlimited_cost_model (LOOP_VINFO_LOOP (loop_vinfo)))
{
- dump_printf_loc (MSG_NOTE, vect_location, "cost model disabled.");
+ dump_printf_loc (MSG_NOTE, vect_location, "cost model disabled.\n");
*ret_min_profitable_niters = 0;
*ret_min_profitable_estimate = 0;
return;
{
peel_iters_prologue = vf/2;
dump_printf (MSG_NOTE, "cost model: "
- "prologue peel iters set to vf/2.");
+ "prologue peel iters set to vf/2.\n");
/* If peeling for alignment is unknown, loop bound of main loop becomes
unknown. */
peel_iters_epilogue = vf/2;
dump_printf (MSG_NOTE, "cost model: "
"epilogue peel iters set to vf/2 because "
- "peeling for alignment is unknown.");
+ "peeling for alignment is unknown.\n");
/* If peeled iterations are unknown, count a taken branch and a not taken
branch per peeled loop. Even if scalar loop iterations are known,
else
{
/* Cost model check occurs at prologue generation. */
- if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) < 0)
+ if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) < 0)
scalar_outside_cost += 2 * vect_get_stmt_cost (cond_branch_taken)
+ vect_get_stmt_cost (cond_branch_not_taken);
/* Cost model check occurs at epilogue generation. */
/* vector version will never be profitable. */
else
{
+ if (LOOP_VINFO_LOOP (loop_vinfo)->force_vect)
+ warning_at (vect_location, OPT_Wopenmp_simd, "vectorization "
+ "did not happen for a simd loop");
+
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"cost model: the vector iteration cost = %d "
"divided by the scalar iteration cost = %d "
- "is greater or equal to the vectorization factor = %d.",
+ "is greater or equal to the vectorization factor = %d"
+ ".\n",
vec_inside_cost, scalar_single_iter_cost, vf);
*ret_min_profitable_niters = -1;
*ret_min_profitable_estimate = -1;
peel_iters_prologue);
dump_printf (MSG_NOTE, " epilogue iterations: %d\n",
peel_iters_epilogue);
- dump_printf (MSG_NOTE,
+ dump_printf (MSG_NOTE,
" Calculated minimum iters for profitability: %d\n",
min_profitable_iters);
+ dump_printf (MSG_NOTE, "\n");
}
min_profitable_iters =
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
- " Runtime profitability threshold = %d\n", min_profitable_iters);
+ " Runtime profitability threshold = %d\n",
+ min_profitable_iters);
*ret_min_profitable_niters = min_profitable_iters;
"unsupported data-type ");
dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
TREE_TYPE (reduction_op));
+ dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
}
return false;
}
}
else
{
- int vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
+ int vec_size_in_bits = tree_to_uhwi (TYPE_SIZE (vectype));
tree bitsize =
TYPE_SIZE (TREE_TYPE (gimple_assign_lhs (orig_stmt)));
- int element_bitsize = tree_low_cst (bitsize, 1);
+ int element_bitsize = tree_to_uhwi (bitsize);
int nelements = vec_size_in_bits / element_bitsize;
optab = optab_for_tree_code (code, vectype, optab_default);
if (dump_enabled_p ())
dump_printf (MSG_NOTE,
"vect_model_reduction_cost: inside_cost = %d, "
- "prologue_cost = %d, epilogue_cost = %d .", inside_cost,
+ "prologue_cost = %d, epilogue_cost = %d .\n", inside_cost,
prologue_cost, epilogue_cost);
return true;
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
"vect_model_induction_cost: inside_cost = %d, "
- "prologue_cost = %d .", inside_cost, prologue_cost);
+ "prologue_cost = %d .\n", inside_cost, prologue_cost);
}
stmt_vec_info stmt_vinfo = vinfo_for_stmt (iv_phi);
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
- tree scalar_type;
tree vectype;
int nunits;
edge pe = loop_preheader_edge (loop);
struct loop *iv_loop;
basic_block new_bb;
tree new_vec, vec_init, vec_step, t;
- tree access_fn;
tree new_var;
tree new_name;
gimple init_stmt, induction_phi, new_stmt;
tree init_expr, step_expr;
int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
int i;
- bool ok;
int ncopies;
tree expr;
stmt_vec_info phi_info = vinfo_for_stmt (iv_phi);
latch_e = loop_latch_edge (iv_loop);
loop_arg = PHI_ARG_DEF_FROM_EDGE (iv_phi, latch_e);
- access_fn = analyze_scalar_evolution (iv_loop, PHI_RESULT (iv_phi));
- gcc_assert (access_fn);
- STRIP_NOPS (access_fn);
- ok = vect_is_simple_iv_evolution (iv_loop->num, access_fn,
- &init_expr, &step_expr);
- gcc_assert (ok);
+ step_expr = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (phi_info);
+ gcc_assert (step_expr != NULL_TREE);
+
pe = loop_preheader_edge (iv_loop);
+ init_expr = PHI_ARG_DEF_FROM_EDGE (iv_phi,
+ loop_preheader_edge (iv_loop));
- scalar_type = TREE_TYPE (init_expr);
- vectype = get_vectype_for_scalar_type (scalar_type);
+ vectype = get_vectype_for_scalar_type (TREE_TYPE (init_expr));
resvectype = get_vectype_for_scalar_type (TREE_TYPE (PHI_RESULT (iv_phi)));
gcc_assert (vectype);
nunits = TYPE_VECTOR_SUBPARTS (vectype);
gcc_assert (phi_info);
gcc_assert (ncopies >= 1);
+ /* Convert the step to the desired type. */
+ step_expr = force_gimple_operand (fold_convert (TREE_TYPE (vectype),
+ step_expr),
+ &stmts, true, NULL_TREE);
+ if (stmts)
+ {
+ new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
+ gcc_assert (!new_bb);
+ }
+
/* Find the first insertion point in the BB. */
si = gsi_after_labels (bb);
/* iv_loop is nested in the loop to be vectorized. init_expr had already
been created during vectorization of previous stmts. We obtain it
from the STMT_VINFO_VEC_STMT of the defining stmt. */
- tree iv_def = PHI_ARG_DEF_FROM_EDGE (iv_phi,
- loop_preheader_edge (iv_loop));
- vec_init = vect_get_vec_def_for_operand (iv_def, iv_phi, NULL);
+ vec_init = vect_get_vec_def_for_operand (init_expr, iv_phi, NULL);
/* If the initial value is not of proper type, convert it. */
if (!useless_type_conversion_p (vectype, TREE_TYPE (vec_init)))
{
/* iv_loop is the loop to be vectorized. Create:
vec_init = [X, X+S, X+2*S, X+3*S] (S = step_expr, X = init_expr) */
- new_var = vect_get_new_vect_var (scalar_type, vect_scalar_var, "var_");
- new_name = force_gimple_operand (init_expr, &stmts, false, new_var);
+ new_var = vect_get_new_vect_var (TREE_TYPE (vectype),
+ vect_scalar_var, "var_");
+ new_name = force_gimple_operand (fold_convert (TREE_TYPE (vectype),
+ init_expr),
+ &stmts, false, new_var);
if (stmts)
{
new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
}
vec_alloc (v, nunits);
+ bool constant_p = is_gimple_min_invariant (new_name);
CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, new_name);
for (i = 1; i < nunits; i++)
{
/* Create: new_name_i = new_name + step_expr */
- enum tree_code code = POINTER_TYPE_P (scalar_type)
- ? POINTER_PLUS_EXPR : PLUS_EXPR;
- init_stmt = gimple_build_assign_with_ops (code, new_var,
- new_name, step_expr);
- new_name = make_ssa_name (new_var, init_stmt);
- gimple_assign_set_lhs (init_stmt, new_name);
-
- new_bb = gsi_insert_on_edge_immediate (pe, init_stmt);
- gcc_assert (!new_bb);
-
- if (dump_enabled_p ())
+ new_name = fold_build2 (PLUS_EXPR, TREE_TYPE (new_name),
+ new_name, step_expr);
+ if (!is_gimple_min_invariant (new_name))
{
- dump_printf_loc (MSG_NOTE, vect_location,
- "created new init_stmt: ");
- dump_gimple_stmt (MSG_NOTE, TDF_SLIM, init_stmt, 0);
+ init_stmt = gimple_build_assign (new_var, new_name);
+ new_name = make_ssa_name (new_var, init_stmt);
+ gimple_assign_set_lhs (init_stmt, new_name);
+ new_bb = gsi_insert_on_edge_immediate (pe, init_stmt);
+ gcc_assert (!new_bb);
+ if (dump_enabled_p ())
+ {
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "created new init_stmt: ");
+ dump_gimple_stmt (MSG_NOTE, TDF_SLIM, init_stmt, 0);
+ dump_printf (MSG_NOTE, "\n");
+ }
+ constant_p = false;
}
CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, new_name);
}
/* Create a vector from [new_name_0, new_name_1, ..., new_name_nunits-1] */
- new_vec = build_constructor (vectype, v);
+ if (constant_p)
+ new_vec = build_vector_from_ctor (vectype, v);
+ else
+ new_vec = build_constructor (vectype, v);
vec_init = vect_init_vector (iv_phi, new_vec, vectype, NULL);
}
{
/* iv_loop is the loop to be vectorized. Generate:
vec_step = [VF*S, VF*S, VF*S, VF*S] */
- expr = build_int_cst (TREE_TYPE (step_expr), vf);
+ if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr)))
+ {
+ expr = build_int_cst (integer_type_node, vf);
+ expr = fold_convert (TREE_TYPE (step_expr), expr);
+ }
+ else
+ expr = build_int_cst (TREE_TYPE (step_expr), vf);
new_name = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
expr, step_expr);
+ if (TREE_CODE (step_expr) == SSA_NAME)
+ new_name = vect_init_vector (iv_phi, new_name,
+ TREE_TYPE (step_expr), NULL);
}
t = unshare_expr (new_name);
- gcc_assert (CONSTANT_CLASS_P (new_name));
+ gcc_assert (CONSTANT_CLASS_P (new_name)
+ || TREE_CODE (new_name) == SSA_NAME);
stepvectype = get_vectype_for_scalar_type (TREE_TYPE (new_name));
gcc_assert (stepvectype);
new_vec = build_vector_from_val (stepvectype, t);
gcc_assert (!nested_in_vect_loop);
/* Create the vector that holds the step of the induction. */
- expr = build_int_cst (TREE_TYPE (step_expr), nunits);
+ if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr)))
+ {
+ expr = build_int_cst (integer_type_node, nunits);
+ expr = fold_convert (TREE_TYPE (step_expr), expr);
+ }
+ else
+ expr = build_int_cst (TREE_TYPE (step_expr), nunits);
new_name = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
expr, step_expr);
+ if (TREE_CODE (step_expr) == SSA_NAME)
+ new_name = vect_init_vector (iv_phi, new_name,
+ TREE_TYPE (step_expr), NULL);
t = unshare_expr (new_name);
- gcc_assert (CONSTANT_CLASS_P (new_name));
+ gcc_assert (CONSTANT_CLASS_P (new_name)
+ || TREE_CODE (new_name) == SSA_NAME);
new_vec = build_vector_from_val (stepvectype, t);
vec_step = vect_init_vector (iv_phi, new_vec, stepvectype, NULL);
dump_printf_loc (MSG_NOTE, vect_location,
"vector of inductions after inner-loop:");
dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0);
+ dump_printf (MSG_NOTE, "\n");
}
}
}
dump_printf (MSG_NOTE, "\n");
dump_gimple_stmt (MSG_NOTE, TDF_SLIM,
SSA_NAME_DEF_STMT (vec_def), 0);
+ dump_printf (MSG_NOTE, "\n");
}
STMT_VINFO_VEC_STMT (phi_info) = induction_phi;
bool extract_scalar_result = false;
gimple use_stmt, orig_stmt, reduction_phi = NULL;
bool nested_in_vect_loop = false;
- vec<gimple> new_phis = vNULL;
- vec<gimple> inner_phis = vNULL;
+ auto_vec<gimple> new_phis;
+ auto_vec<gimple> inner_phis;
enum vect_def_type dt = vect_unknown_def_type;
int j, i;
- vec<tree> scalar_results = vNULL;
+ auto_vec<tree> scalar_results;
unsigned int group_size = 1, k, ratio;
- vec<tree> vec_initial_defs = vNULL;
- vec<gimple> phis;
+ auto_vec<tree> vec_initial_defs;
+ auto_vec<gimple> phis;
bool slp_reduc = false;
tree new_phi_result;
gimple inner_phi = NULL;
dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
dump_printf (MSG_NOTE, "\n");
dump_gimple_stmt (MSG_NOTE, TDF_SLIM, SSA_NAME_DEF_STMT (def), 0);
+ dump_printf (MSG_NOTE, "\n");
}
phi = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (phi));
}
}
- vec_initial_defs.release ();
-
/* 2. Create epilog code.
The reduction epilog code operates across the elements of the vector
of partial results computed by the vectorized loop.
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
- "Reduce using direct vector reduction.");
+ "Reduce using direct vector reduction.\n");
vec_dest = vect_create_destination_var (scalar_dest, vectype);
tmp = build1 (reduc_code, vectype, new_phi_result);
enum tree_code shift_code = ERROR_MARK;
bool have_whole_vector_shift = true;
int bit_offset;
- int element_bitsize = tree_low_cst (bitsize, 1);
- int vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
+ int element_bitsize = tree_to_uhwi (bitsize);
+ int vec_size_in_bits = tree_to_uhwi (TYPE_SIZE (vectype));
tree vec_temp;
if (optab_handler (vec_shr_optab, mode) != CODE_FOR_nothing)
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
- "Reduce using vector shifts");
+ "Reduce using vector shifts\n");
vec_dest = vect_create_destination_var (scalar_dest, vectype);
new_temp = new_phi_result;
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
- "Reduce using scalar code. ");
+ "Reduce using scalar code.\n");
- vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
+ vec_size_in_bits = tree_to_uhwi (TYPE_SIZE (vectype));
FOR_EACH_VEC_ELT (new_phis, i, new_phi)
{
if (gimple_code (new_phi) == GIMPLE_PHI)
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
- "extract scalar result");
+ "extract scalar result\n");
if (BYTES_BIG_ENDIAN)
bitpos = size_binop (MULT_EXPR,
epilog_stmt = gimple_build_assign (new_dest, expr);
new_temp = make_ssa_name (new_dest, epilog_stmt);
gimple_assign_set_lhs (epilog_stmt, new_temp);
- SSA_NAME_DEF_STMT (new_temp) = epilog_stmt;
gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
if (nested_in_vect_loop)
{
result. (The reduction result is expected to have two immediate uses -
one at the latch block, and one at the loop exit). */
FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
- if (!flow_bb_inside_loop_p (loop, gimple_bb (USE_STMT (use_p))))
+ if (!flow_bb_inside_loop_p (loop, gimple_bb (USE_STMT (use_p)))
+ && !is_gimple_debug (USE_STMT (use_p)))
phis.safe_push (USE_STMT (use_p));
- /* We expect to have found an exit_phi because of loop-closed-ssa
- form. */
- gcc_assert (!phis.is_empty ());
+ /* While we expect to have found an exit_phi because of loop-closed-ssa
+ form we can end up without one if the scalar cycle is dead. */
FOR_EACH_VEC_ELT (phis, i, exit_phi)
{
dump_printf_loc (MSG_NOTE, vect_location,
"created double reduction phi node: ");
dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vect_phi, 0);
+ dump_printf (MSG_NOTE, "\n");
}
vect_phi_res = PHI_RESULT (vect_phi);
FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
{
if (!flow_bb_inside_loop_p (loop, gimple_bb (USE_STMT (use_p))))
- phis.safe_push (USE_STMT (use_p));
+ {
+ if (!is_gimple_debug (USE_STMT (use_p)))
+ phis.safe_push (USE_STMT (use_p));
+ }
else
{
if (double_reduc && gimple_code (USE_STMT (use_p)) == GIMPLE_PHI)
FOR_EACH_IMM_USE_FAST (phi_use_p, phi_imm_iter, phi_res)
{
if (!flow_bb_inside_loop_p (loop,
- gimple_bb (USE_STMT (phi_use_p))))
+ gimple_bb (USE_STMT (phi_use_p)))
+ && !is_gimple_debug (USE_STMT (phi_use_p)))
phis.safe_push (USE_STMT (phi_use_p));
}
}
phis.release ();
}
-
- scalar_results.release ();
- inner_phis.release ();
- new_phis.release ();
}
struct loop * def_stmt_loop, *outer_loop = NULL;
tree def_arg;
gimple def_arg_stmt;
- vec<tree> vec_oprnds0 = vNULL;
- vec<tree> vec_oprnds1 = vNULL;
- vec<tree> vect_defs = vNULL;
- vec<gimple> phis = vNULL;
+ auto_vec<tree> vec_oprnds0;
+ auto_vec<tree> vec_oprnds1;
+ auto_vec<tree> vect_defs;
+ auto_vec<gimple> phis;
int vec_num;
tree def0, def1, tem, op0, op1 = NULL_TREE;
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "unsupported condition in reduction");
+ "unsupported condition in reduction\n");
return false;
}
not vectorizable_reduction. */
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "unsupported shift or rotation.");
+ "unsupported shift or rotation.\n");
return false;
}
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "no optab.");
+ "no optab.\n");
return false;
}
if (optab_handler (optab, vec_mode) == CODE_FOR_nothing)
{
if (dump_enabled_p ())
- dump_printf (MSG_NOTE, "op not supported by target.");
+ dump_printf (MSG_NOTE, "op not supported by target.\n");
if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
|| LOOP_VINFO_VECT_FACTOR (loop_vinfo)
return false;
if (dump_enabled_p ())
- dump_printf (MSG_NOTE, "proceeding using word mode.");
+ dump_printf (MSG_NOTE, "proceeding using word mode.\n");
}
/* Worthwhile without SIMD support? */
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "not worthwhile without SIMD support.");
+ "not worthwhile without SIMD support.\n");
return false;
}
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "no optab for reduction.");
+ "no optab for reduction.\n");
epilog_reduc_code = ERROR_MARK;
}
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "reduc op not supported by target.");
+ "reduc op not supported by target.\n");
epilog_reduc_code = ERROR_MARK;
}
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "no reduc code for scalar code.");
+ "no reduc code for scalar code.\n");
return false;
}
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "multiple types in double reduction");
+ "multiple types in double reduction\n");
return false;
}
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "invalid types in dot-prod");
+ "invalid types in dot-prod\n");
return false;
}
/** Transform. **/
if (dump_enabled_p ())
- dump_printf_loc (MSG_NOTE, vect_location, "transform reduction.");
+ dump_printf_loc (MSG_NOTE, vect_location, "transform reduction.\n");
/* FORNOW: Multiple types are not supported for condition. */
if (code == COND_EXPR)
epilog_reduc_code, phis, reduc_index,
double_reduc, slp_node);
- phis.release ();
- vect_defs.release ();
- vec_oprnds0.release ();
- vec_oprnds1.release ();
-
return true;
}
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "multiple types in nested loop.");
+ "multiple types in nested loop.\n");
return false;
}
&& !STMT_VINFO_LIVE_P (exit_phi_vinfo)))
{
if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"inner-loop induction only used outside "
- "of the outer vectorized loop.");
+ "of the outer vectorized loop.\n");
return false;
}
}
STMT_VINFO_TYPE (stmt_info) = induc_vec_info_type;
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
- "=== vectorizable_induction ===");
+ "=== vectorizable_induction ===\n");
vect_model_induction_cost (stmt_info, ncopies);
return true;
}
/** Transform. **/
if (dump_enabled_p ())
- dump_printf_loc (MSG_NOTE, vect_location, "transform induction phi.");
+ dump_printf_loc (MSG_NOTE, vect_location, "transform induction phi.\n");
vec_def = get_initial_def_for_induction (phi);
*vec_stmt = SSA_NAME_DEF_STMT (vec_def);
bool
vectorizable_live_operation (gimple stmt,
gimple_stmt_iterator *gsi ATTRIBUTE_UNUSED,
- gimple *vec_stmt ATTRIBUTE_UNUSED)
+ gimple *vec_stmt)
{
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
return false;
if (!is_gimple_assign (stmt))
- return false;
+ {
+ if (gimple_call_internal_p (stmt)
+ && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE
+ && gimple_call_lhs (stmt)
+ && loop->simduid
+ && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
+ && loop->simduid
+ == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
+ {
+ edge e = single_exit (loop);
+ basic_block merge_bb = e->dest;
+ imm_use_iterator imm_iter;
+ use_operand_p use_p;
+ tree lhs = gimple_call_lhs (stmt);
+
+ FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
+ {
+ gimple use_stmt = USE_STMT (use_p);
+ if (gimple_code (use_stmt) == GIMPLE_PHI
+ || gimple_bb (use_stmt) == merge_bb)
+ {
+ if (vec_stmt)
+ {
+ tree vfm1
+ = build_int_cst (unsigned_type_node,
+ loop_vinfo->vectorization_factor - 1);
+ SET_PHI_ARG_DEF (use_stmt, e->dest_idx, vfm1);
+ }
+ return true;
+ }
+ }
+ }
+
+ return false;
+ }
if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
return false;
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "use not simple.");
+ "use not simple.\n");
return false;
}
{
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
- "killing debug use");
+ "killing debug use\n");
gimple_debug_bind_reset_value (ustmt);
update_stmt (ustmt);
}
}
+
+/* This function builds ni_name = number of iterations. Statements
+ are emitted on the loop preheader edge. */
+
+static tree
+vect_build_loop_niters (loop_vec_info loop_vinfo)
+{
+ tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
+ if (TREE_CODE (ni) == INTEGER_CST)
+ return ni;
+ else
+ {
+ tree ni_name, var;
+ gimple_seq stmts = NULL;
+ edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
+
+ var = create_tmp_var (TREE_TYPE (ni), "niters");
+ ni_name = force_gimple_operand (ni, &stmts, false, var);
+ if (stmts)
+ gsi_insert_seq_on_edge_immediate (pe, stmts);
+
+ return ni_name;
+ }
+}
+
+
+/* This function generates the following statements:
+
+ ni_name = number of iterations loop executes
+ ratio = ni_name / vf
+ ratio_mult_vf_name = ratio * vf
+
+ and places them on the loop preheader edge. */
+
+static void
+vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo,
+ tree ni_name,
+ tree *ratio_mult_vf_name_ptr,
+ tree *ratio_name_ptr)
+{
+ tree ni_minus_gap_name;
+ tree var;
+ tree ratio_name;
+ tree ratio_mult_vf_name;
+ int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
+ edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
+ tree log_vf;
+
+ log_vf = build_int_cst (TREE_TYPE (ni_name), exact_log2 (vf));
+
+ /* If epilogue loop is required because of data accesses with gaps, we
+ subtract one iteration from the total number of iterations here for
+ correct calculation of RATIO. */
+ if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
+ {
+ ni_minus_gap_name = fold_build2 (MINUS_EXPR, TREE_TYPE (ni_name),
+ ni_name,
+ build_one_cst (TREE_TYPE (ni_name)));
+ if (!is_gimple_val (ni_minus_gap_name))
+ {
+ var = create_tmp_var (TREE_TYPE (ni_name), "ni_gap");
+ gimple stmts = NULL;
+ ni_minus_gap_name = force_gimple_operand (ni_minus_gap_name, &stmts,
+ true, var);
+ gsi_insert_seq_on_edge_immediate (pe, stmts);
+ }
+ }
+ else
+ ni_minus_gap_name = ni_name;
+
+ /* Create: ratio = ni >> log2(vf) */
+ /* ??? As we have ni == number of latch executions + 1, ni could
+ have overflown to zero. So avoid computing ratio based on ni
+ but compute it using the fact that we know ratio will be at least
+ one, thus via (ni - vf) >> log2(vf) + 1. */
+ ratio_name
+ = fold_build2 (PLUS_EXPR, TREE_TYPE (ni_name),
+ fold_build2 (RSHIFT_EXPR, TREE_TYPE (ni_name),
+ fold_build2 (MINUS_EXPR, TREE_TYPE (ni_name),
+ ni_minus_gap_name,
+ build_int_cst
+ (TREE_TYPE (ni_name), vf)),
+ log_vf),
+ build_int_cst (TREE_TYPE (ni_name), 1));
+ if (!is_gimple_val (ratio_name))
+ {
+ var = create_tmp_var (TREE_TYPE (ni_name), "bnd");
+ gimple stmts = NULL;
+ ratio_name = force_gimple_operand (ratio_name, &stmts, true, var);
+ gsi_insert_seq_on_edge_immediate (pe, stmts);
+ }
+ *ratio_name_ptr = ratio_name;
+
+ /* Create: ratio_mult_vf = ratio << log2 (vf). */
+
+ if (ratio_mult_vf_name_ptr)
+ {
+ ratio_mult_vf_name = fold_build2 (LSHIFT_EXPR, TREE_TYPE (ratio_name),
+ ratio_name, log_vf);
+ if (!is_gimple_val (ratio_mult_vf_name))
+ {
+ var = create_tmp_var (TREE_TYPE (ni_name), "ratio_mult_vf");
+ gimple stmts = NULL;
+ ratio_mult_vf_name = force_gimple_operand (ratio_mult_vf_name, &stmts,
+ true, var);
+ gsi_insert_seq_on_edge_immediate (pe, stmts);
+ }
+ *ratio_mult_vf_name_ptr = ratio_mult_vf_name;
+ }
+
+ return;
+}
+
+
/* Function vect_transform_loop.
The analysis phase has determined that the loop is vectorizable.
int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
bool grouped_store;
bool slp_scheduled = false;
- unsigned int nunits;
gimple stmt, pattern_stmt;
gimple_seq pattern_def_seq = NULL;
gimple_stmt_iterator pattern_def_si = gsi_none ();
gcov_type expected_iterations = expected_loop_iterations_unbounded (loop);
if (dump_enabled_p ())
- dump_printf_loc (MSG_NOTE, vect_location, "=== vec_transform_loop ===");
+ dump_printf_loc (MSG_NOTE, vect_location, "=== vec_transform_loop ===\n");
/* If profile is inprecise, we have chance to fix it up. */
if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
- "Profitability threshold is %d loop iterations.", th);
+ "Profitability threshold is %d loop iterations.\n",
+ th);
check_profitability = true;
}
- /* Peel the loop if there are data refs with unknown alignment.
- Only one data ref with unknown store is allowed. */
+ /* Version the loop first, if required, so the profitability check
+ comes first. */
- if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
+ if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)
+ || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
{
- vect_do_peeling_for_alignment (loop_vinfo, th, check_profitability);
+ vect_loop_versioning (loop_vinfo, th, check_profitability);
check_profitability = false;
}
- if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)
- || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
+ tree ni_name = vect_build_loop_niters (loop_vinfo);
+ LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo) = ni_name;
+
+ /* Peel the loop if there are data refs with unknown alignment.
+ Only one data ref with unknown store is allowed. */
+
+ if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo))
{
- vect_loop_versioning (loop_vinfo, th, check_profitability);
+ vect_do_peeling_for_alignment (loop_vinfo, ni_name,
+ th, check_profitability);
check_profitability = false;
+ /* The above adjusts LOOP_VINFO_NITERS, so cause ni_name to
+ be re-computed. */
+ ni_name = NULL_TREE;
}
/* If the loop has a symbolic number of iterations 'n' (i.e. it's not a
will remain scalar and will compute the remaining (n%VF) iterations.
(VF is the vectorization factor). */
- if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
- || (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
- && LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0)
- || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
- vect_do_peeling_for_loop_bound (loop_vinfo, &ratio,
- th, check_profitability);
- else
+ if (LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo)
+ || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
+ {
+ tree ratio_mult_vf;
+ if (!ni_name)
+ ni_name = vect_build_loop_niters (loop_vinfo);
+ vect_generate_tmps_on_preheader (loop_vinfo, ni_name, &ratio_mult_vf,
+ &ratio);
+ vect_do_peeling_for_loop_bound (loop_vinfo, ni_name, ratio_mult_vf,
+ th, check_profitability);
+ }
+ else if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
ratio = build_int_cst (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo)),
LOOP_VINFO_INT_NITERS (loop_vinfo) / vectorization_factor);
+ else
+ {
+ if (!ni_name)
+ ni_name = vect_build_loop_niters (loop_vinfo);
+ vect_generate_tmps_on_preheader (loop_vinfo, ni_name, NULL, &ratio);
+ }
/* 1) Make sure the loop header has exactly two entries
2) Make sure we have a preheader basic block. */
dump_printf_loc (MSG_NOTE, vect_location,
"------>vectorizing phi: ");
dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
+ dump_printf (MSG_NOTE, "\n");
}
stmt_info = vinfo_for_stmt (phi);
if (!stmt_info)
if ((TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info))
!= (unsigned HOST_WIDE_INT) vectorization_factor)
&& dump_enabled_p ())
- dump_printf_loc (MSG_NOTE, vect_location, "multiple-types.");
+ dump_printf_loc (MSG_NOTE, vect_location, "multiple-types.\n");
if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def)
{
if (dump_enabled_p ())
- dump_printf_loc (MSG_NOTE, vect_location, "transform phi.");
+ dump_printf_loc (MSG_NOTE, vect_location, "transform phi.\n");
vect_transform_stmt (phi, NULL, NULL, NULL, NULL);
}
}
if (transform_pattern_stmt)
stmt = pattern_stmt;
else
- stmt = gsi_stmt (si);
+ {
+ stmt = gsi_stmt (si);
+ /* During vectorization remove existing clobber stmts. */
+ if (gimple_clobber_p (stmt))
+ {
+ unlink_stmt_vdef (stmt);
+ gsi_remove (&si, true);
+ release_defs (stmt);
+ continue;
+ }
+ }
if (dump_enabled_p ())
{
dump_printf_loc (MSG_NOTE, vect_location,
"------>vectorizing statement: ");
dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
+ dump_printf (MSG_NOTE, "\n");
}
stmt_info = vinfo_for_stmt (stmt);
"stmt: ");
dump_gimple_stmt (MSG_NOTE, TDF_SLIM,
pattern_def_stmt, 0);
+ dump_printf (MSG_NOTE, "\n");
}
stmt = pattern_def_stmt;
transform_pattern_stmt = false;
}
- gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
- nunits = (unsigned int) TYPE_VECTOR_SUBPARTS (
- STMT_VINFO_VECTYPE (stmt_info));
- if (!STMT_SLP_TYPE (stmt_info)
- && nunits != (unsigned int) vectorization_factor
- && dump_enabled_p ())
- /* For SLP VF is set according to unrolling factor, and not to
- vector size, hence for SLP this print is not valid. */
- dump_printf_loc (MSG_NOTE, vect_location,
- "multiple-types.");
+ if (STMT_VINFO_VECTYPE (stmt_info))
+ {
+ unsigned int nunits
+ = (unsigned int)
+ TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
+ if (!STMT_SLP_TYPE (stmt_info)
+ && nunits != (unsigned int) vectorization_factor
+ && dump_enabled_p ())
+ /* For SLP VF is set according to unrolling factor, and not
+ to vector size, hence for SLP this print is not valid. */
+ dump_printf_loc (MSG_NOTE, vect_location, "multiple-types.\n");
+ }
/* SLP. Schedule all the SLP instances when the first SLP stmt is
reached. */
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
- "=== scheduling SLP instances ===");
+ "=== scheduling SLP instances ===\n");
vect_schedule_slp (loop_vinfo, NULL);
}
/* -------- vectorize statement ------------ */
if (dump_enabled_p ())
- dump_printf_loc (MSG_NOTE, vect_location, "transform statement.");
+ dump_printf_loc (MSG_NOTE, vect_location, "transform statement.\n");
grouped_store = false;
is_store = vect_transform_stmt (stmt, &si, &grouped_store, NULL, NULL);
slpeel_make_loop_iterate_ntimes (loop, ratio);
/* Reduce loop iterations by the vectorization factor. */
- scale_loop_profile (loop, RDIV (REG_BR_PROB_BASE , vectorization_factor),
+ scale_loop_profile (loop, GCOV_COMPUTE_SCALE (1, vectorization_factor),
expected_iterations / vectorization_factor);
loop->nb_iterations_upper_bound
= loop->nb_iterations_upper_bound.udiv (double_int::from_uhwi (vectorization_factor),
loop->nb_iterations_estimate = loop->nb_iterations_estimate - double_int_one;
}
- /* The memory tags and pointers in vectorized statements need to
- have their SSA forms updated. FIXME, why can't this be delayed
- until all the loops have been transformed? */
- update_ssa (TODO_update_ssa);
-
if (dump_enabled_p ())
- dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location, "LOOP VECTORIZED.");
- if (loop->inner && dump_enabled_p ())
- dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
- "OUTER LOOP VECTORIZED.");
+ {
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "LOOP VECTORIZED\n");
+ if (loop->inner)
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "OUTER LOOP VECTORIZED\n");
+ dump_printf (MSG_NOTE, "\n");
+ }
}