/* If-conversion for vectorizer.
- Copyright (C) 2004-2016 Free Software Foundation, Inc.
+ Copyright (C) 2004-2019 Free Software Foundation, Inc.
Contributed by Devang Patel <dpatel@apple.com>
This file is part of GCC.
#include "cfgloop.h"
#include "tree-data-ref.h"
#include "tree-scalar-evolution.h"
+#include "tree-ssa-loop.h"
+#include "tree-ssa-loop-niter.h"
#include "tree-ssa-loop-ivopts.h"
#include "tree-ssa-address.h"
#include "dbgcnt.h"
#include "varasm.h"
#include "builtins.h"
#include "params.h"
+#include "cfganal.h"
+#include "internal-fn.h"
+#include "fold-const.h"
+#include "tree-ssa-sccvn.h"
+
+/* Only handle PHIs with no more arguments unless we are asked to by
+ simd pragma. */
+#define MAX_PHI_ARG_NUM \
+ ((unsigned) PARAM_VALUE (PARAM_MAX_TREE_IF_CONVERSION_PHI_ARGS))
+
+/* True if we've converted a statement that was only executed when some
+ condition C was true, and if for correctness we need to predicate the
+ statement to ensure that it is a no-op when C is false. See
+ predicate_statements for the kinds of predication we support. */
+static bool need_to_predicate;
+
+/* Indicate if there are any complicated PHIs that need to be handled in
+ if-conversion. Complicated PHI has more than two arguments and can't
+ be degenerated to two arguments PHI. See more information in comment
+ before phi_convertible_by_degenerating_args. */
+static bool any_complicated_phi;
+
+/* Hash for struct innermost_loop_behavior. It depends on the user to
+ free the memory. */
+
+struct innermost_loop_behavior_hash : nofree_ptr_hash <innermost_loop_behavior>
+{
+ static inline hashval_t hash (const value_type &);
+ static inline bool equal (const value_type &,
+ const compare_type &);
+};
+
+inline hashval_t
+innermost_loop_behavior_hash::hash (const value_type &e)
+{
+ hashval_t hash;
+
+ hash = iterative_hash_expr (e->base_address, 0);
+ hash = iterative_hash_expr (e->offset, hash);
+ hash = iterative_hash_expr (e->init, hash);
+ return iterative_hash_expr (e->step, hash);
+}
+
+inline bool
+innermost_loop_behavior_hash::equal (const value_type &e1,
+ const compare_type &e2)
+{
+ if ((e1->base_address && !e2->base_address)
+ || (!e1->base_address && e2->base_address)
+ || (!e1->offset && e2->offset)
+ || (e1->offset && !e2->offset)
+ || (!e1->init && e2->init)
+ || (e1->init && !e2->init)
+ || (!e1->step && e2->step)
+ || (e1->step && !e2->step))
+ return false;
+
+ if (e1->base_address && e2->base_address
+ && !operand_equal_p (e1->base_address, e2->base_address, 0))
+ return false;
+ if (e1->offset && e2->offset
+ && !operand_equal_p (e1->offset, e2->offset, 0))
+ return false;
+ if (e1->init && e2->init
+ && !operand_equal_p (e1->init, e2->init, 0))
+ return false;
+ if (e1->step && e2->step
+ && !operand_equal_p (e1->step, e2->step, 0))
+ return false;
+
+ return true;
+}
/* List of basic blocks in if-conversion-suitable order. */
static basic_block *ifc_bbs;
-/* Apply more aggressive (extended) if-conversion if true. */
-static bool aggressive_if_conv;
+/* Hash table to store <DR's innermost loop behavior, DR> pairs. */
+static hash_map<innermost_loop_behavior_hash,
+ data_reference_p> *innermost_DR_map;
-/* Hash table to store references, DR pairs. */
-static hash_map<tree_operand_hash, data_reference_p> *ref_DR_map;
-
-/* Hash table to store base reference, DR pairs. */
+/* Hash table to store <base reference, DR> pairs. */
static hash_map<tree_operand_hash, data_reference_p> *baseref_DR_map;
+/* List of redundant SSA names: the first should be replaced by the second. */
+static vec< std::pair<tree, tree> > redundant_ssa_names;
+
/* Structure used to predicate basic blocks. This is attached to the
->aux field of the BBs in the loop to be if-converted. */
struct bb_predicate {
static inline void
add_bb_predicate_gimplified_stmts (basic_block bb, gimple_seq stmts)
{
- gimple_seq_add_seq
+ /* We might have updated some stmts in STMTS via force_gimple_operand
+ calling fold_stmt and that producing multiple stmts. Delink immediate
+ uses so update_ssa after loop versioning doesn't get confused for
+ the not yet inserted predicates.
+ ??? This should go away once we reliably avoid updating stmts
+ not in any BB. */
+ for (gimple_stmt_iterator gsi = gsi_start (stmts);
+ !gsi_end_p (gsi); gsi_next (&gsi))
+ {
+ gimple *stmt = gsi_stmt (gsi);
+ delink_stmt_imm_use (stmt);
+ gimple_set_modified (stmt, true);
+ }
+ gimple_seq_add_seq_without_update
(&(((struct bb_predicate *) bb->aux)->predicate_gimplified_stmts), stmts);
}
set_bb_predicate (bb, boolean_true_node);
}
-/* Release the SSA_NAMEs associated with the predicate of basic block BB,
- but don't actually free it. */
+/* Release the SSA_NAMEs associated with the predicate of basic block BB. */
static inline void
release_bb_predicate (basic_block bb)
gimple_seq stmts = bb_predicate_gimplified_stmts (bb);
if (stmts)
{
- gimple_stmt_iterator i;
+ /* Ensure that these stmts haven't yet been added to a bb. */
+ if (flag_checking)
+ for (gimple_stmt_iterator i = gsi_start (stmts);
+ !gsi_end_p (i); gsi_next (&i))
+ gcc_assert (! gimple_bb (gsi_stmt (i)));
- for (i = gsi_start (stmts); !gsi_end_p (i); gsi_next (&i))
- free_stmt_operands (cfun, gsi_stmt (i));
+ /* Discard them. */
+ gimple_seq_discard (stmts);
set_bb_predicate_gimplified_stmts (bb, NULL);
}
}
{
tree new_name = make_temp_ssa_name (type, NULL, "_ifc_");
gimple *stmt = gimple_build_assign (new_name, expr);
+ gimple_set_vuse (stmt, gimple_vuse (gsi_stmt (*gsi)));
gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
return new_name;
}
return fold_build2_loc (loc, TRUTH_OR_EXPR, boolean_type_node, c1, c2);
}
-/* Returns true if N is either a constant or a SSA_NAME. */
-
-static bool
-constant_or_ssa_name (tree n)
-{
- switch (TREE_CODE (n))
- {
- case SSA_NAME:
- case INTEGER_CST:
- case REAL_CST:
- case COMPLEX_CST:
- case VECTOR_CST:
- return true;
- default:
- return false;
- }
-}
-
/* Returns either a COND_EXPR or the folded expression if the folded
expression is a MIN_EXPR, a MAX_EXPR, an ABS_EXPR,
a constant or a SSA_NAME. */
&& (integer_zerop (op1)))
cond = op0;
}
- cond_expr = fold_ternary (COND_EXPR, type, cond,
- rhs, lhs);
+ cond_expr = fold_ternary (COND_EXPR, type, cond, rhs, lhs);
if (cond_expr == NULL_TREE)
return build3 (COND_EXPR, type, cond, rhs, lhs);
STRIP_USELESS_TYPE_CONVERSION (cond_expr);
- if (constant_or_ssa_name (cond_expr))
+ if (is_gimple_val (cond_expr))
return cond_expr;
if (TREE_CODE (cond_expr) == ABS_EXPR)
{
rhs1 = TREE_OPERAND (cond_expr, 1);
STRIP_USELESS_TYPE_CONVERSION (rhs1);
- if (constant_or_ssa_name (rhs1))
+ if (is_gimple_val (rhs1))
return build1 (ABS_EXPR, type, rhs1);
}
STRIP_USELESS_TYPE_CONVERSION (lhs1);
rhs1 = TREE_OPERAND (cond_expr, 1);
STRIP_USELESS_TYPE_CONVERSION (rhs1);
- if (constant_or_ssa_name (rhs1)
- && constant_or_ssa_name (lhs1))
+ if (is_gimple_val (rhs1) && is_gimple_val (lhs1))
return build2 (TREE_CODE (cond_expr), type, lhs1, rhs1);
}
return build3 (COND_EXPR, type, cond, rhs, lhs);
return false;
}
-/* Return true when PHI is if-convertible. PHI is part of loop LOOP
- and it belongs to basic block BB.
+/* Given PHI which has more than two arguments, this function checks if
+ it's if-convertible by degenerating its arguments. Specifically, if
+ below two conditions are satisfied:
- PHI is not if-convertible if:
- - it has more than 2 arguments.
+ 1) Number of PHI arguments with different values equals to 2 and one
+ argument has the only occurrence.
+ 2) The edge corresponding to the unique argument isn't critical edge.
- When we didn't see if-convertible stores, PHI is not
- if-convertible if:
- - a virtual PHI is immediately used in another PHI node,
- - there is a virtual PHI in a BB other than the loop->header.
- When the aggressive_if_conv is set, PHI can have more than
- two arguments. */
+ Such PHI can be handled as PHIs have only two arguments. For example,
+ below PHI:
+
+ res = PHI <A_1(e1), A_1(e2), A_2(e3)>;
+
+ can be transformed into:
+
+ res = (predicate of e3) ? A_2 : A_1;
+
+ Return TRUE if it is the case, FALSE otherwise. */
static bool
-if_convertible_phi_p (struct loop *loop, basic_block bb, gphi *phi,
- bool any_mask_load_store)
+phi_convertible_by_degenerating_args (gphi *phi)
{
- if (dump_file && (dump_flags & TDF_DETAILS))
- {
- fprintf (dump_file, "-------------------------\n");
- print_gimple_stmt (dump_file, phi, 0, TDF_SLIM);
- }
+ edge e;
+ tree arg, t1 = NULL, t2 = NULL;
+ unsigned int i, i1 = 0, i2 = 0, n1 = 0, n2 = 0;
+ unsigned int num_args = gimple_phi_num_args (phi);
- if (bb != loop->header)
+ gcc_assert (num_args > 2);
+
+ for (i = 0; i < num_args; i++)
{
- if (gimple_phi_num_args (phi) != 2
- && !aggressive_if_conv)
+ arg = gimple_phi_arg_def (phi, i);
+ if (t1 == NULL || operand_equal_p (t1, arg, 0))
{
- if (dump_file && (dump_flags & TDF_DETAILS))
- fprintf (dump_file, "More than two phi node args.\n");
- return false;
- }
+ n1++;
+ i1 = i;
+ t1 = arg;
+ }
+ else if (t2 == NULL || operand_equal_p (t2, arg, 0))
+ {
+ n2++;
+ i2 = i;
+ t2 = arg;
+ }
+ else
+ return false;
}
- if (any_mask_load_store)
- return true;
+ if (n1 != 1 && n2 != 1)
+ return false;
- /* When there were no if-convertible stores, check
- that there are no memory writes in the branches of the loop to be
- if-converted. */
- if (virtual_operand_p (gimple_phi_result (phi)))
- {
- imm_use_iterator imm_iter;
- use_operand_p use_p;
+ /* Check if the edge corresponding to the unique arg is critical. */
+ e = gimple_phi_arg_edge (phi, (n1 == 1) ? i1 : i2);
+ if (EDGE_COUNT (e->src->succs) > 1)
+ return false;
- if (bb != loop->header)
- {
- if (dump_file && (dump_flags & TDF_DETAILS))
- fprintf (dump_file, "Virtual phi not on loop->header.\n");
- return false;
- }
+ return true;
+}
- FOR_EACH_IMM_USE_FAST (use_p, imm_iter, gimple_phi_result (phi))
- {
- if (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI
- && USE_STMT (use_p) != phi)
- {
- if (dump_file && (dump_flags & TDF_DETAILS))
- fprintf (dump_file, "Difficult to handle this virtual phi.\n");
- return false;
- }
- }
+/* Return true when PHI is if-convertible. PHI is part of loop LOOP
+ and it belongs to basic block BB. Note at this point, it is sure
+ that PHI is if-convertible. This function updates global variable
+ ANY_COMPLICATED_PHI if PHI is complicated. */
+
+static bool
+if_convertible_phi_p (struct loop *loop, basic_block bb, gphi *phi)
+{
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "-------------------------\n");
+ print_gimple_stmt (dump_file, phi, 0, TDF_SLIM);
}
+ if (bb != loop->header
+ && gimple_phi_num_args (phi) > 2
+ && !phi_convertible_by_degenerating_args (phi))
+ any_complicated_phi = true;
+
return true;
}
{
data_reference_p *master_dr, *base_master_dr;
- tree ref = DR_REF (a);
tree base_ref = DR_BASE_OBJECT (a);
+ innermost_loop_behavior *innermost = &DR_INNERMOST (a);
tree ca = bb_predicate (gimple_bb (DR_STMT (a)));
bool exist1, exist2;
- while (TREE_CODE (ref) == COMPONENT_REF
- || TREE_CODE (ref) == IMAGPART_EXPR
- || TREE_CODE (ref) == REALPART_EXPR)
- ref = TREE_OPERAND (ref, 0);
-
- master_dr = &ref_DR_map->get_or_insert (ref, &exist1);
+ master_dr = &innermost_DR_map->get_or_insert (innermost, &exist1);
if (!exist1)
*master_dr = a;
}
}
+/* Return TRUE if can prove the index IDX of an array reference REF is
+ within array bound. Return false otherwise. */
+
+static bool
+idx_within_array_bound (tree ref, tree *idx, void *dta)
+{
+ wi::overflow_type overflow;
+ widest_int niter, valid_niter, delta, wi_step;
+ tree ev, init, step;
+ tree low, high;
+ struct loop *loop = (struct loop*) dta;
+
+ /* Only support within-bound access for array references. */
+ if (TREE_CODE (ref) != ARRAY_REF)
+ return false;
+
+ /* For arrays at the end of the structure, we are not guaranteed that they
+ do not really extend over their declared size. However, for arrays of
+ size greater than one, this is unlikely to be intended. */
+ if (array_at_struct_end_p (ref))
+ return false;
+
+ ev = analyze_scalar_evolution (loop, *idx);
+ ev = instantiate_parameters (loop, ev);
+ init = initial_condition (ev);
+ step = evolution_part_in_loop_num (ev, loop->num);
+
+ if (!init || TREE_CODE (init) != INTEGER_CST
+ || (step && TREE_CODE (step) != INTEGER_CST))
+ return false;
+
+ low = array_ref_low_bound (ref);
+ high = array_ref_up_bound (ref);
+
+ /* The case of nonconstant bounds could be handled, but it would be
+ complicated. */
+ if (TREE_CODE (low) != INTEGER_CST
+ || !high || TREE_CODE (high) != INTEGER_CST)
+ return false;
+
+ /* Check if the intial idx is within bound. */
+ if (wi::to_widest (init) < wi::to_widest (low)
+ || wi::to_widest (init) > wi::to_widest (high))
+ return false;
+
+ /* The idx is always within bound. */
+ if (!step || integer_zerop (step))
+ return true;
+
+ if (!max_loop_iterations (loop, &niter))
+ return false;
+
+ if (wi::to_widest (step) < 0)
+ {
+ delta = wi::to_widest (init) - wi::to_widest (low);
+ wi_step = -wi::to_widest (step);
+ }
+ else
+ {
+ delta = wi::to_widest (high) - wi::to_widest (init);
+ wi_step = wi::to_widest (step);
+ }
+
+ valid_niter = wi::div_floor (delta, wi_step, SIGNED, &overflow);
+ /* The iteration space of idx is within array bound. */
+ if (!overflow && niter <= valid_niter)
+ return true;
+
+ return false;
+}
+
+/* Return TRUE if ref is a within bound array reference. */
+
+static bool
+ref_within_array_bound (gimple *stmt, tree ref)
+{
+ struct loop *loop = loop_containing_stmt (stmt);
+
+ gcc_assert (loop != NULL);
+ return for_each_index (&ref, idx_within_array_bound, loop);
+}
+
+
+/* Given a memory reference expression T, return TRUE if base object
+ it refers to is writable. The base object of a memory reference
+ is the main object being referenced, which is returned by function
+ get_base_address. */
+
+static bool
+base_object_writable (tree ref)
+{
+ tree base_tree = get_base_address (ref);
+
+ return (base_tree
+ && DECL_P (base_tree)
+ && decl_binds_to_current_def_p (base_tree)
+ && !TREE_READONLY (base_tree));
+}
+
/* Return true when the memory references of STMT won't trap in the
if-converted code. There are two things that we have to check for:
static bool
ifcvt_memrefs_wont_trap (gimple *stmt, vec<data_reference_p> drs)
{
+ /* If DR didn't see a reference here we can't use it to tell
+ whether the ref traps or not. */
+ if (gimple_uid (stmt) == 0)
+ return false;
+
data_reference_p *master_dr, *base_master_dr;
data_reference_p a = drs[gimple_uid (stmt) - 1];
- tree ref_base_a = DR_REF (a);
tree base = DR_BASE_OBJECT (a);
+ innermost_loop_behavior *innermost = &DR_INNERMOST (a);
gcc_assert (DR_STMT (a) == stmt);
+ gcc_assert (DR_BASE_ADDRESS (a) || DR_OFFSET (a)
+ || DR_INIT (a) || DR_STEP (a));
- while (TREE_CODE (ref_base_a) == COMPONENT_REF
- || TREE_CODE (ref_base_a) == IMAGPART_EXPR
- || TREE_CODE (ref_base_a) == REALPART_EXPR)
- ref_base_a = TREE_OPERAND (ref_base_a, 0);
+ master_dr = innermost_DR_map->get (innermost);
+ gcc_assert (master_dr != NULL);
- master_dr = ref_DR_map->get (ref_base_a);
base_master_dr = baseref_DR_map->get (base);
- gcc_assert (master_dr != NULL);
-
/* If a is unconditionally written to it doesn't trap. */
if (DR_W_UNCONDITIONALLY (*master_dr))
return true;
- /* If a is unconditionally accessed then ... */
- if (DR_RW_UNCONDITIONALLY (*master_dr))
+ /* If a is unconditionally accessed then ...
+
+ Even a is conditional access, we can treat it as an unconditional
+ one if it's an array reference and all its index are within array
+ bound. */
+ if (DR_RW_UNCONDITIONALLY (*master_dr)
+ || ref_within_array_bound (stmt, DR_REF (a)))
{
/* an unconditional read won't trap. */
if (DR_IS_READ (a))
if (base_master_dr
&& DR_BASE_W_UNCONDITIONALLY (*base_master_dr))
return PARAM_VALUE (PARAM_ALLOW_STORE_DATA_RACES);
- else
- {
- /* or the base is know to be not readonly. */
- tree base_tree = get_base_address (DR_REF (a));
- if (DECL_P (base_tree)
- && decl_binds_to_current_def_p (base_tree)
- && ! TREE_READONLY (base_tree))
- return PARAM_VALUE (PARAM_ALLOW_STORE_DATA_RACES);
- }
+ /* or the base is known to be not readonly. */
+ else if (base_object_writable (DR_REF (a)))
+ return PARAM_VALUE (PARAM_ALLOW_STORE_DATA_RACES);
}
+
return false;
}
static bool
ifcvt_can_use_mask_load_store (gimple *stmt)
{
- tree lhs, ref;
- machine_mode mode;
- basic_block bb = gimple_bb (stmt);
- bool is_load;
-
- if (!(flag_tree_loop_vectorize || bb->loop_father->force_vectorize)
- || bb->loop_father->dont_vectorize
- || !gimple_assign_single_p (stmt)
- || gimple_has_volatile_ops (stmt))
- return false;
-
/* Check whether this is a load or store. */
- lhs = gimple_assign_lhs (stmt);
+ tree lhs = gimple_assign_lhs (stmt);
+ bool is_load;
+ tree ref;
if (gimple_store_p (stmt))
{
if (!is_gimple_val (gimple_assign_rhs1 (stmt)))
/* Mask should be integer mode of the same size as the load/store
mode. */
- mode = TYPE_MODE (TREE_TYPE (lhs));
- if (int_mode_for_mode (mode) == BLKmode
- || VECTOR_MODE_P (mode))
+ machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
+ if (!int_mode_for_mode (mode).exists () || VECTOR_MODE_P (mode))
return false;
if (can_vec_mask_load_store_p (mode, VOIDmode, is_load))
return false;
}
+/* Return true if STMT could be converted from an operation that is
+ unconditional to one that is conditional on a bb predicate mask. */
+
+static bool
+ifcvt_can_predicate (gimple *stmt)
+{
+ basic_block bb = gimple_bb (stmt);
+
+ if (!(flag_tree_loop_vectorize || bb->loop_father->force_vectorize)
+ || bb->loop_father->dont_vectorize
+ || gimple_has_volatile_ops (stmt))
+ return false;
+
+ if (gimple_assign_single_p (stmt))
+ return ifcvt_can_use_mask_load_store (stmt);
+
+ tree_code code = gimple_assign_rhs_code (stmt);
+ tree lhs_type = TREE_TYPE (gimple_assign_lhs (stmt));
+ tree rhs_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
+ if (!types_compatible_p (lhs_type, rhs_type))
+ return false;
+ internal_fn cond_fn = get_conditional_internal_fn (code);
+ return (cond_fn != IFN_LAST
+ && vectorized_internal_fn_supported_p (cond_fn, lhs_type));
+}
+
/* Return true when STMT is if-convertible.
GIMPLE_ASSIGN statement is not if-convertible if,
static bool
if_convertible_gimple_assign_stmt_p (gimple *stmt,
- vec<data_reference_p> refs,
- bool *any_mask_load_store)
+ vec<data_reference_p> refs)
{
tree lhs = gimple_assign_lhs (stmt);
|| ! ifcvt_memrefs_wont_trap (stmt, refs))
&& gimple_could_trap_p (stmt))
{
- if (ifcvt_can_use_mask_load_store (stmt))
+ if (ifcvt_can_predicate (stmt))
{
gimple_set_plf (stmt, GF_PLF_2, true);
- *any_mask_load_store = true;
+ need_to_predicate = true;
return true;
}
if (dump_file && (dump_flags & TDF_DETAILS))
/* When if-converting stores force versioning, likewise if we
ended up generating store data races. */
if (gimple_vdef (stmt))
- *any_mask_load_store = true;
+ need_to_predicate = true;
return true;
}
- it is builtins call. */
static bool
-if_convertible_stmt_p (gimple *stmt, vec<data_reference_p> refs,
- bool *any_mask_load_store)
+if_convertible_stmt_p (gimple *stmt, vec<data_reference_p> refs)
{
switch (gimple_code (stmt))
{
return true;
case GIMPLE_ASSIGN:
- return if_convertible_gimple_assign_stmt_p (stmt, refs,
- any_mask_load_store);
+ return if_convertible_gimple_assign_stmt_p (stmt, refs);
case GIMPLE_CALL:
{
&& !(flags & ECF_LOOPING_CONST_OR_PURE)
/* We can only vectorize some builtins at the moment,
so restrict if-conversion to those. */
- && DECL_BUILT_IN (fndecl))
+ && fndecl_built_in_p (fndecl))
return true;
}
return false;
print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
}
return false;
- break;
}
return true;
return true;
}
-/* Returns true if at least one successor in on critical edge. */
-static inline bool
-has_pred_critical_p (basic_block bb)
-{
- edge e;
- edge_iterator ei;
-
- FOR_EACH_EDGE (e, ei, bb->preds)
- if (EDGE_COUNT (e->src->succs) > 1)
- return true;
- return false;
-}
-
/* Return true when BB is if-convertible. This routine does not check
basic block's statements and phis.
- it is after the exit block but before the latch,
- its edges are not normal.
- Last restriction is valid if aggressive_if_conv is false.
-
EXIT_BB is the basic block containing the exit of the LOOP. BB is
inside LOOP. */
if (EDGE_COUNT (bb->succs) > 2)
return false;
- if (EDGE_COUNT (bb->preds) > 2
- && !aggressive_if_conv)
- return false;
-
if (exit_bb)
{
if (bb != loop->latch)
return false;
}
- /* At least one incoming edge has to be non-critical as otherwise edge
- predicates are not equal to basic-block predicates of the edge
- source. This check is skipped if aggressive_if_conv is true. */
- if (!aggressive_if_conv
- && EDGE_COUNT (bb->preds) > 1
- && bb != loop->header
- && all_preds_critical_p (bb))
- {
- if (dump_file && (dump_flags & TDF_DETAILS))
- fprintf (dump_file, "only critical predecessors\n");
- return false;
- }
-
return true;
}
&& bb_predicate_gimplified_stmts (loop->latch) == NULL);
}
+/* Build region by adding loop pre-header and post-header blocks. */
+
+static vec<basic_block>
+build_region (struct loop *loop)
+{
+ vec<basic_block> region = vNULL;
+ basic_block exit_bb = NULL;
+
+ gcc_assert (ifc_bbs);
+ /* The first element is loop pre-header. */
+ region.safe_push (loop_preheader_edge (loop)->src);
+
+ for (unsigned int i = 0; i < loop->num_nodes; i++)
+ {
+ basic_block bb = ifc_bbs[i];
+ region.safe_push (bb);
+ /* Find loop postheader. */
+ edge e;
+ edge_iterator ei;
+ FOR_EACH_EDGE (e, ei, bb->succs)
+ if (loop_exit_edge_p (loop, e))
+ {
+ exit_bb = e->dest;
+ break;
+ }
+ }
+ /* The last element is loop post-header. */
+ gcc_assert (exit_bb);
+ region.safe_push (exit_bb);
+ return region;
+}
+
/* Return true when LOOP is if-convertible. This is a helper function
for if_convertible_loop_p. REFS and DDRS are initialized and freed
in if_convertible_loop_p. */
static bool
-if_convertible_loop_p_1 (struct loop *loop,
- vec<data_reference_p> *refs,
- bool *any_mask_load_store)
+if_convertible_loop_p_1 (struct loop *loop, vec<data_reference_p> *refs)
{
unsigned int i;
basic_block exit_bb = NULL;
+ vec<basic_block> region;
if (find_data_references_in_loop (loop, refs) == chrec_dont_know)
return false;
calculate_dominance_info (CDI_DOMINATORS);
- calculate_dominance_info (CDI_POST_DOMINATORS);
/* Allow statements that can be handled during if-conversion. */
ifc_bbs = get_loop_body_in_if_conv_order (loop);
data_reference_p dr;
- ref_DR_map = new hash_map<tree_operand_hash, data_reference_p>;
+ innermost_DR_map
+ = new hash_map<innermost_loop_behavior_hash, data_reference_p>;
baseref_DR_map = new hash_map<tree_operand_hash, data_reference_p>;
+ /* Compute post-dominator tree locally. */
+ region = build_region (loop);
+ calculate_dominance_info_for_region (CDI_POST_DOMINATORS, region);
+
predicate_bbs (loop);
+ /* Free post-dominator tree since it is not used after predication. */
+ free_dominance_info_for_region (cfun, CDI_POST_DOMINATORS, region);
+ region.release ();
+
for (i = 0; refs->iterate (i, &dr); i++)
{
+ tree ref = DR_REF (dr);
+
dr->aux = XNEW (struct ifc_dr);
DR_BASE_W_UNCONDITIONALLY (dr) = false;
DR_RW_UNCONDITIONALLY (dr) = false;
IFC_DR (dr)->base_w_predicate = boolean_false_node;
if (gimple_uid (DR_STMT (dr)) == 0)
gimple_set_uid (DR_STMT (dr), i + 1);
+
+ /* If DR doesn't have innermost loop behavior or it's a compound
+ memory reference, we synthesize its innermost loop behavior
+ for hashing. */
+ if (TREE_CODE (ref) == COMPONENT_REF
+ || TREE_CODE (ref) == IMAGPART_EXPR
+ || TREE_CODE (ref) == REALPART_EXPR
+ || !(DR_BASE_ADDRESS (dr) || DR_OFFSET (dr)
+ || DR_INIT (dr) || DR_STEP (dr)))
+ {
+ while (TREE_CODE (ref) == COMPONENT_REF
+ || TREE_CODE (ref) == IMAGPART_EXPR
+ || TREE_CODE (ref) == REALPART_EXPR)
+ ref = TREE_OPERAND (ref, 0);
+
+ memset (&DR_INNERMOST (dr), 0, sizeof (DR_INNERMOST (dr)));
+ DR_BASE_ADDRESS (dr) = ref;
+ }
hash_memrefs_baserefs_and_store_DRs_read_written_info (dr);
}
/* Check the if-convertibility of statements in predicated BBs. */
if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
for (itr = gsi_start_bb (bb); !gsi_end_p (itr); gsi_next (&itr))
- if (!if_convertible_stmt_p (gsi_stmt (itr), *refs,
- any_mask_load_store))
+ if (!if_convertible_stmt_p (gsi_stmt (itr), *refs))
return false;
}
- for (i = 0; i < loop->num_nodes; i++)
- free_bb_predicate (ifc_bbs[i]);
-
/* Checking PHIs needs to be done after stmts, as the fact whether there
are any masked loads or stores affects the tests. */
for (i = 0; i < loop->num_nodes; i++)
gphi_iterator itr;
for (itr = gsi_start_phis (bb); !gsi_end_p (itr); gsi_next (&itr))
- if (!if_convertible_phi_p (loop, bb, itr.phi (),
- *any_mask_load_store))
+ if (!if_convertible_phi_p (loop, bb, itr.phi ()))
return false;
}
- if its basic blocks and phi nodes are if convertible. */
static bool
-if_convertible_loop_p (struct loop *loop, bool *any_mask_load_store)
+if_convertible_loop_p (struct loop *loop)
{
edge e;
edge_iterator ei;
return false;
refs.create (5);
- res = if_convertible_loop_p_1 (loop, &refs, any_mask_load_store);
+ res = if_convertible_loop_p_1 (loop, &refs);
data_reference_p dr;
unsigned int i;
free_data_refs (refs);
- delete ref_DR_map;
- ref_DR_map = NULL;
+ delete innermost_DR_map;
+ innermost_DR_map = NULL;
delete baseref_DR_map;
baseref_DR_map = NULL;
e = gimple_phi_arg_edge (phi, (*occur)[i]);
c = bb_predicate (e->src);
if (is_true_predicate (c))
- continue;
+ {
+ cond = c;
+ break;
+ }
c = force_gimple_operand_gsi_1 (gsi, unshare_expr (c),
is_gimple_condexpr, NULL_TREE,
true, GSI_SAME_STMT);
return cond;
}
+/* Local valueization callback that follows all-use SSA edges. */
+
+static tree
+ifcvt_follow_ssa_use_edges (tree val)
+{
+ return val;
+}
+
/* Replace a scalar PHI node with a COND_EXPR using COND as condition.
This routine can handle PHI nodes with more than two arguments.
arg0, arg1);
new_stmt = gimple_build_assign (res, rhs);
gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
- update_stmt (new_stmt);
+ gimple_stmt_iterator new_gsi = gsi_for_stmt (new_stmt);
+ if (fold_stmt (&new_gsi, ifcvt_follow_ssa_use_edges))
+ {
+ new_stmt = gsi_stmt (new_gsi);
+ update_stmt (new_stmt);
+ }
if (dump_file && (dump_flags & TDF_DETAILS))
{
if (bb == loop->header)
continue;
- if (EDGE_COUNT (bb->preds) == 1)
- continue;
-
phi_gsi = gsi_start_phis (bb);
if (gsi_end_p (phi_gsi))
continue;
while (!gsi_end_p (phi_gsi))
{
phi = phi_gsi.phi ();
- predicate_scalar_phi (phi, &gsi);
- release_phi_node (phi);
- gsi_next (&phi_gsi);
+ if (virtual_operand_p (gimple_phi_result (phi)))
+ gsi_next (&phi_gsi);
+ else
+ {
+ predicate_scalar_phi (phi, &gsi);
+ remove_phi_node (&phi_gsi, false);
+ }
}
-
- set_phi_nodes (bb, NULL);
}
}
gimplification of the predicates. */
static void
-insert_gimplified_predicates (loop_p loop, bool any_mask_load_store)
+insert_gimplified_predicates (loop_p loop)
{
unsigned int i;
stmts = bb_predicate_gimplified_stmts (bb);
if (stmts)
{
- if (any_mask_load_store)
+ if (need_to_predicate)
{
/* Insert the predicate of the BB just after the label,
as the if-conversion of memory writes will use this
}
}
-/* Helper function for predicate_mem_writes. Returns index of existent
+/* Helper function for predicate_statements. Returns index of existent
mask if it was created for given SIZE and -1 otherwise. */
static int
return -1;
}
+/* Helper function for predicate_statements. STMT is a memory read or
+ write and it needs to be predicated by MASK. Return a statement
+ that does so. */
+
+static gimple *
+predicate_load_or_store (gimple_stmt_iterator *gsi, gassign *stmt, tree mask)
+{
+ gcall *new_stmt;
+
+ tree lhs = gimple_assign_lhs (stmt);
+ tree rhs = gimple_assign_rhs1 (stmt);
+ tree ref = TREE_CODE (lhs) == SSA_NAME ? rhs : lhs;
+ mark_addressable (ref);
+ tree addr = force_gimple_operand_gsi (gsi, build_fold_addr_expr (ref),
+ true, NULL_TREE, true, GSI_SAME_STMT);
+ tree ptr = build_int_cst (reference_alias_ptr_type (ref),
+ get_object_alignment (ref));
+ /* Copy points-to info if possible. */
+ if (TREE_CODE (addr) == SSA_NAME && !SSA_NAME_PTR_INFO (addr))
+ copy_ref_info (build2 (MEM_REF, TREE_TYPE (ref), addr, ptr),
+ ref);
+ if (TREE_CODE (lhs) == SSA_NAME)
+ {
+ new_stmt
+ = gimple_build_call_internal (IFN_MASK_LOAD, 3, addr,
+ ptr, mask);
+ gimple_call_set_lhs (new_stmt, lhs);
+ gimple_set_vuse (new_stmt, gimple_vuse (stmt));
+ }
+ else
+ {
+ new_stmt
+ = gimple_build_call_internal (IFN_MASK_STORE, 4, addr, ptr,
+ mask, rhs);
+ gimple_set_vuse (new_stmt, gimple_vuse (stmt));
+ gimple_set_vdef (new_stmt, gimple_vdef (stmt));
+ SSA_NAME_DEF_STMT (gimple_vdef (new_stmt)) = new_stmt;
+ }
+ gimple_call_set_nothrow (new_stmt, true);
+ return new_stmt;
+}
+
+/* STMT uses OP_LHS. Check whether it is equivalent to:
+
+ ... = OP_MASK ? OP_LHS : X;
+
+ Return X if so, otherwise return null. OP_MASK is an SSA_NAME that is
+ known to have value OP_COND. */
+
+static tree
+check_redundant_cond_expr (gimple *stmt, tree op_mask, tree op_cond,
+ tree op_lhs)
+{
+ gassign *assign = dyn_cast <gassign *> (stmt);
+ if (!assign || gimple_assign_rhs_code (assign) != COND_EXPR)
+ return NULL_TREE;
+
+ tree use_cond = gimple_assign_rhs1 (assign);
+ tree if_true = gimple_assign_rhs2 (assign);
+ tree if_false = gimple_assign_rhs3 (assign);
+
+ if ((use_cond == op_mask || operand_equal_p (use_cond, op_cond, 0))
+ && if_true == op_lhs)
+ return if_false;
+
+ if (inverse_conditions_p (use_cond, op_cond) && if_false == op_lhs)
+ return if_true;
+
+ return NULL_TREE;
+}
+
+/* Return true if VALUE is available for use at STMT. SSA_NAMES is
+ the set of SSA names defined earlier in STMT's block. */
+
+static bool
+value_available_p (gimple *stmt, hash_set<tree_ssa_name_hash> *ssa_names,
+ tree value)
+{
+ if (is_gimple_min_invariant (value))
+ return true;
+
+ if (TREE_CODE (value) == SSA_NAME)
+ {
+ if (SSA_NAME_IS_DEFAULT_DEF (value))
+ return true;
+
+ basic_block def_bb = gimple_bb (SSA_NAME_DEF_STMT (value));
+ basic_block use_bb = gimple_bb (stmt);
+ return (def_bb == use_bb
+ ? ssa_names->contains (value)
+ : dominated_by_p (CDI_DOMINATORS, use_bb, def_bb));
+ }
+
+ return false;
+}
+
+/* Helper function for predicate_statements. STMT is a potentially-trapping
+ arithmetic operation that needs to be predicated by MASK, an SSA_NAME that
+ has value COND. Return a statement that does so. SSA_NAMES is the set of
+ SSA names defined earlier in STMT's block. */
+
+static gimple *
+predicate_rhs_code (gassign *stmt, tree mask, tree cond,
+ hash_set<tree_ssa_name_hash> *ssa_names)
+{
+ tree lhs = gimple_assign_lhs (stmt);
+ tree_code code = gimple_assign_rhs_code (stmt);
+ unsigned int nops = gimple_num_ops (stmt);
+ internal_fn cond_fn = get_conditional_internal_fn (code);
+
+ /* Construct the arguments to the conditional internal function. */
+ auto_vec<tree, 8> args;
+ args.safe_grow (nops + 1);
+ args[0] = mask;
+ for (unsigned int i = 1; i < nops; ++i)
+ args[i] = gimple_op (stmt, i);
+ args[nops] = NULL_TREE;
+
+ /* Look for uses of the result to see whether they are COND_EXPRs that can
+ be folded into the conditional call. */
+ imm_use_iterator imm_iter;
+ gimple *use_stmt;
+ FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, lhs)
+ {
+ tree new_else = check_redundant_cond_expr (use_stmt, mask, cond, lhs);
+ if (new_else && value_available_p (stmt, ssa_names, new_else))
+ {
+ if (!args[nops])
+ args[nops] = new_else;
+ if (operand_equal_p (new_else, args[nops], 0))
+ {
+ /* We have:
+
+ LHS = IFN_COND (MASK, ..., ELSE);
+ X = MASK ? LHS : ELSE;
+
+ which makes X equivalent to LHS. */
+ tree use_lhs = gimple_assign_lhs (use_stmt);
+ redundant_ssa_names.safe_push (std::make_pair (use_lhs, lhs));
+ }
+ }
+ }
+ if (!args[nops])
+ args[nops] = targetm.preferred_else_value (cond_fn, TREE_TYPE (lhs),
+ nops - 1, &args[1]);
+
+ /* Create and insert the call. */
+ gcall *new_stmt = gimple_build_call_internal_vec (cond_fn, args);
+ gimple_call_set_lhs (new_stmt, lhs);
+ gimple_call_set_nothrow (new_stmt, true);
+
+ return new_stmt;
+}
+
/* Predicate each write to memory in LOOP.
This function transforms control flow constructs containing memory
| goto bb_1
| end_bb_4
- predicate_mem_writes is then predicating the memory write as follows:
+ predicate_statements is then predicating the memory write as follows:
| bb_0
| i = 0
*/
static void
-predicate_mem_writes (loop_p loop)
+predicate_statements (loop_p loop)
{
unsigned int i, orig_loop_num_nodes = loop->num_nodes;
auto_vec<int, 1> vect_sizes;
auto_vec<tree, 1> vect_masks;
+ hash_set<tree_ssa_name_hash> ssa_names;
for (i = 1; i < orig_loop_num_nodes; i++)
{
basic_block bb = ifc_bbs[i];
tree cond = bb_predicate (bb);
bool swap;
- gimple *stmt;
int index;
- if (is_true_predicate (cond) || is_false_predicate (cond))
+ if (is_true_predicate (cond))
continue;
swap = false;
vect_sizes.truncate (0);
vect_masks.truncate (0);
- for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
- if (!gimple_assign_single_p (stmt = gsi_stmt (gsi)))
- continue;
- else if (gimple_plf (stmt, GF_PLF_2))
- {
- tree lhs = gimple_assign_lhs (stmt);
- tree rhs = gimple_assign_rhs1 (stmt);
- tree ref, addr, ptr, mask;
- gimple *new_stmt;
- gimple_seq stmts = NULL;
- int bitsize = GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (lhs)));
- ref = TREE_CODE (lhs) == SSA_NAME ? rhs : lhs;
- mark_addressable (ref);
- addr = force_gimple_operand_gsi (&gsi, build_fold_addr_expr (ref),
- true, NULL_TREE, true,
- GSI_SAME_STMT);
- if (!vect_sizes.is_empty ()
- && (index = mask_exists (bitsize, vect_sizes)) != -1)
- /* Use created mask. */
- mask = vect_masks[index];
- else
- {
- if (COMPARISON_CLASS_P (cond))
- mask = gimple_build (&stmts, TREE_CODE (cond),
- boolean_type_node,
- TREE_OPERAND (cond, 0),
- TREE_OPERAND (cond, 1));
- else
- {
- gcc_assert (TREE_CODE (cond) == SSA_NAME);
+ for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);)
+ {
+ gassign *stmt = dyn_cast <gassign *> (gsi_stmt (gsi));
+ if (!stmt)
+ ;
+ else if (is_false_predicate (cond)
+ && gimple_vdef (stmt))
+ {
+ unlink_stmt_vdef (stmt);
+ gsi_remove (&gsi, true);
+ release_defs (stmt);
+ continue;
+ }
+ else if (gimple_plf (stmt, GF_PLF_2))
+ {
+ tree lhs = gimple_assign_lhs (stmt);
+ tree mask;
+ gimple *new_stmt;
+ gimple_seq stmts = NULL;
+ machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
+ /* We checked before setting GF_PLF_2 that an equivalent
+ integer mode exists. */
+ int bitsize = GET_MODE_BITSIZE (mode).to_constant ();
+ if (!vect_sizes.is_empty ()
+ && (index = mask_exists (bitsize, vect_sizes)) != -1)
+ /* Use created mask. */
+ mask = vect_masks[index];
+ else
+ {
+ if (COMPARISON_CLASS_P (cond))
+ mask = gimple_build (&stmts, TREE_CODE (cond),
+ boolean_type_node,
+ TREE_OPERAND (cond, 0),
+ TREE_OPERAND (cond, 1));
+ else
mask = cond;
- }
-
- if (swap)
- {
- tree true_val
- = constant_boolean_node (true, TREE_TYPE (mask));
- mask = gimple_build (&stmts, BIT_XOR_EXPR,
- TREE_TYPE (mask), mask, true_val);
- }
- gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
- mask = ifc_temp_var (TREE_TYPE (mask), mask, &gsi);
- /* Save mask and its size for further use. */
- vect_sizes.safe_push (bitsize);
- vect_masks.safe_push (mask);
- }
- ptr = build_int_cst (reference_alias_ptr_type (ref),
- get_object_alignment (ref));
- /* Copy points-to info if possible. */
- if (TREE_CODE (addr) == SSA_NAME && !SSA_NAME_PTR_INFO (addr))
- copy_ref_info (build2 (MEM_REF, TREE_TYPE (ref), addr, ptr),
- ref);
- if (TREE_CODE (lhs) == SSA_NAME)
- {
- new_stmt
- = gimple_build_call_internal (IFN_MASK_LOAD, 3, addr,
- ptr, mask);
- gimple_call_set_lhs (new_stmt, lhs);
- }
- else
- new_stmt
- = gimple_build_call_internal (IFN_MASK_STORE, 4, addr, ptr,
- mask, rhs);
- gsi_replace (&gsi, new_stmt, true);
- }
- else if (gimple_vdef (stmt))
- {
- tree lhs = gimple_assign_lhs (stmt);
- tree rhs = gimple_assign_rhs1 (stmt);
- tree type = TREE_TYPE (lhs);
-
- lhs = ifc_temp_var (type, unshare_expr (lhs), &gsi);
- rhs = ifc_temp_var (type, unshare_expr (rhs), &gsi);
- if (swap)
- std::swap (lhs, rhs);
- cond = force_gimple_operand_gsi_1 (&gsi, unshare_expr (cond),
- is_gimple_condexpr, NULL_TREE,
- true, GSI_SAME_STMT);
- rhs = fold_build_cond_expr (type, unshare_expr (cond), rhs, lhs);
- gimple_assign_set_rhs1 (stmt, ifc_temp_var (type, rhs, &gsi));
- update_stmt (stmt);
- }
+ if (swap)
+ {
+ tree true_val
+ = constant_boolean_node (true, TREE_TYPE (mask));
+ mask = gimple_build (&stmts, BIT_XOR_EXPR,
+ TREE_TYPE (mask), mask, true_val);
+ }
+ gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
+
+ /* Save mask and its size for further use. */
+ vect_sizes.safe_push (bitsize);
+ vect_masks.safe_push (mask);
+ }
+ if (gimple_assign_single_p (stmt))
+ new_stmt = predicate_load_or_store (&gsi, stmt, mask);
+ else
+ new_stmt = predicate_rhs_code (stmt, mask, cond, &ssa_names);
+
+ gsi_replace (&gsi, new_stmt, true);
+ }
+ else if (gimple_vdef (stmt))
+ {
+ tree lhs = gimple_assign_lhs (stmt);
+ tree rhs = gimple_assign_rhs1 (stmt);
+ tree type = TREE_TYPE (lhs);
+
+ lhs = ifc_temp_var (type, unshare_expr (lhs), &gsi);
+ rhs = ifc_temp_var (type, unshare_expr (rhs), &gsi);
+ if (swap)
+ std::swap (lhs, rhs);
+ cond = force_gimple_operand_gsi_1 (&gsi, unshare_expr (cond),
+ is_gimple_condexpr, NULL_TREE,
+ true, GSI_SAME_STMT);
+ rhs = fold_build_cond_expr (type, unshare_expr (cond), rhs, lhs);
+ gimple_assign_set_rhs1 (stmt, ifc_temp_var (type, rhs, &gsi));
+ update_stmt (stmt);
+ }
+ tree lhs = gimple_get_lhs (gsi_stmt (gsi));
+ if (lhs && TREE_CODE (lhs) == SSA_NAME)
+ ssa_names.add (lhs);
+ gsi_next (&gsi);
+ }
+ ssa_names.empty ();
}
}
blocks. Replace PHI nodes with conditional modify expressions. */
static void
-combine_blocks (struct loop *loop, bool any_mask_load_store)
+combine_blocks (struct loop *loop)
{
basic_block bb, exit_bb, merge_target_bb;
unsigned int orig_loop_num_nodes = loop->num_nodes;
edge e;
edge_iterator ei;
- predicate_bbs (loop);
remove_conditions_and_labels (loop);
- insert_gimplified_predicates (loop, any_mask_load_store);
+ insert_gimplified_predicates (loop);
predicate_all_scalar_phis (loop);
- if (any_mask_load_store)
- predicate_mem_writes (loop);
+ if (need_to_predicate)
+ predicate_statements (loop);
/* Merge basic blocks: first remove all the edges in the loop,
except for those from the exit block. */
if (exit_bb != loop->header)
{
/* Connect this node to loop header. */
- make_edge (loop->header, exit_bb, EDGE_FALLTHRU);
+ make_single_succ_edge (loop->header, exit_bb, EDGE_FALLTHRU);
set_immediate_dominator (CDI_DOMINATORS, exit_bb, loop->header);
}
}
merge_target_bb = loop->header;
+
+ /* Get at the virtual def valid for uses starting at the first block
+ we merge into the header. Without a virtual PHI the loop has the
+ same virtual use on all stmts. */
+ gphi *vphi = get_virtual_phi (loop->header);
+ tree last_vdef = NULL_TREE;
+ if (vphi)
+ {
+ last_vdef = gimple_phi_result (vphi);
+ for (gimple_stmt_iterator gsi = gsi_start_bb (loop->header);
+ ! gsi_end_p (gsi); gsi_next (&gsi))
+ if (gimple_vdef (gsi_stmt (gsi)))
+ last_vdef = gimple_vdef (gsi_stmt (gsi));
+ }
for (i = 1; i < orig_loop_num_nodes; i++)
{
gimple_stmt_iterator gsi;
if (bb == exit_bb || bb == loop->latch)
continue;
+ /* We release virtual PHIs late because we have to propagate them
+ out using the current VUSE. The def might be the one used
+ after the loop. */
+ vphi = get_virtual_phi (bb);
+ if (vphi)
+ {
+ imm_use_iterator iter;
+ use_operand_p use_p;
+ gimple *use_stmt;
+ FOR_EACH_IMM_USE_STMT (use_stmt, iter, gimple_phi_result (vphi))
+ {
+ FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
+ SET_USE (use_p, last_vdef);
+ }
+ if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_phi_result (vphi)))
+ SSA_NAME_OCCURS_IN_ABNORMAL_PHI (last_vdef) = 1;
+ gsi = gsi_for_stmt (vphi);
+ remove_phi_node (&gsi, true);
+ }
+
/* Make stmts member of loop->header and clear range info from all stmts
in BB which is now no longer executed conditional on a predicate we
could have derived it from. */
{
gimple *stmt = gsi_stmt (gsi);
gimple_set_bb (stmt, merge_target_bb);
+ /* Update virtual operands. */
+ if (last_vdef)
+ {
+ use_operand_p use_p = ssa_vuse_operand (stmt);
+ if (use_p
+ && USE_FROM_PTR (use_p) != last_vdef)
+ SET_USE (use_p, last_vdef);
+ if (gimple_vdef (stmt))
+ last_vdef = gimple_vdef (stmt);
+ }
if (predicated[i])
{
ssa_op_iter i;
/* Update stmt list. */
last = gsi_last_bb (merge_target_bb);
- gsi_insert_seq_after (&last, bb_seq (bb), GSI_NEW_STMT);
+ gsi_insert_seq_after_without_update (&last, bb_seq (bb), GSI_NEW_STMT);
set_bb_seq (bb, NULL);
delete_basic_block (bb);
This reduces the number of basic blocks to two, to please the
vectorizer that handles only loops with two nodes. */
if (exit_bb
- && exit_bb != loop->header
- && can_merge_blocks_p (loop->header, exit_bb))
- merge_blocks (loop->header, exit_bb);
+ && exit_bb != loop->header)
+ {
+ /* We release virtual PHIs late because we have to propagate them
+ out using the current VUSE. The def might be the one used
+ after the loop. */
+ vphi = get_virtual_phi (exit_bb);
+ if (vphi)
+ {
+ imm_use_iterator iter;
+ use_operand_p use_p;
+ gimple *use_stmt;
+ FOR_EACH_IMM_USE_STMT (use_stmt, iter, gimple_phi_result (vphi))
+ {
+ FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
+ SET_USE (use_p, last_vdef);
+ }
+ if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_phi_result (vphi)))
+ SSA_NAME_OCCURS_IN_ABNORMAL_PHI (last_vdef) = 1;
+ gimple_stmt_iterator gsi = gsi_for_stmt (vphi);
+ remove_phi_node (&gsi, true);
+ }
+
+ if (can_merge_blocks_p (loop->header, exit_bb))
+ merge_blocks (loop->header, exit_bb);
+ }
free (ifc_bbs);
ifc_bbs = NULL;
will be if-converted, the new copy of the loop will not,
and the LOOP_VECTORIZED internal call will be guarding which
loop to execute. The vectorizer pass will fold this
- internal call into either true or false. */
+ internal call into either true or false.
-static bool
+ Note that this function intentionally invalidates profile. Both edges
+ out of LOOP_VECTORIZED must have 100% probability so the profile remains
+ consistent after the condition is folded in the vectorizer. */
+
+static struct loop *
version_loop_for_if_conversion (struct loop *loop)
{
basic_block cond_bb;
struct loop *new_loop;
gimple *g;
gimple_stmt_iterator gsi;
+ unsigned int save_length;
g = gimple_build_call_internal (IFN_LOOP_VECTORIZED, 2,
build_int_cst (integer_type_node, loop->num),
integer_zero_node);
gimple_call_set_lhs (g, cond);
+ /* Save BB->aux around loop_version as that uses the same field. */
+ save_length = loop->inner ? loop->inner->num_nodes : loop->num_nodes;
+ void **saved_preds = XALLOCAVEC (void *, save_length);
+ for (unsigned i = 0; i < save_length; i++)
+ saved_preds[i] = ifc_bbs[i]->aux;
+
initialize_original_copy_tables ();
+ /* At this point we invalidate porfile confistency until IFN_LOOP_VECTORIZED
+ is re-merged in the vectorizer. */
new_loop = loop_version (loop, cond, &cond_bb,
- REG_BR_PROB_BASE, REG_BR_PROB_BASE,
- REG_BR_PROB_BASE, true);
+ profile_probability::always (),
+ profile_probability::always (),
+ profile_probability::always (),
+ profile_probability::always (), true);
free_original_copy_tables ();
+
+ for (unsigned i = 0; i < save_length; i++)
+ ifc_bbs[i]->aux = saved_preds[i];
+
if (new_loop == NULL)
- return false;
+ return NULL;
+
new_loop->dont_vectorize = true;
new_loop->force_vectorize = false;
gsi = gsi_last_bb (cond_bb);
gimple_call_set_arg (g, 1, build_int_cst (integer_type_node, new_loop->num));
gsi_insert_before (&gsi, g, GSI_SAME_STMT);
update_ssa (TODO_update_ssa);
+ return new_loop;
+}
+
+/* Return true when LOOP satisfies the follow conditions that will
+ allow it to be recognized by the vectorizer for outer-loop
+ vectorization:
+ - The loop is not the root node of the loop tree.
+ - The loop has exactly one inner loop.
+ - The loop has a single exit.
+ - The loop header has a single successor, which is the inner
+ loop header.
+ - Each of the inner and outer loop latches have a single
+ predecessor.
+ - The loop exit block has a single predecessor, which is the
+ inner loop's exit block. */
+
+static bool
+versionable_outer_loop_p (struct loop *loop)
+{
+ if (!loop_outer (loop)
+ || loop->dont_vectorize
+ || !loop->inner
+ || loop->inner->next
+ || !single_exit (loop)
+ || !single_succ_p (loop->header)
+ || single_succ (loop->header) != loop->inner->header
+ || !single_pred_p (loop->latch)
+ || !single_pred_p (loop->inner->latch))
+ return false;
+
+ basic_block outer_exit = single_pred (loop->latch);
+ basic_block inner_exit = single_pred (loop->inner->latch);
+
+ if (!single_pred_p (outer_exit) || single_pred (outer_exit) != inner_exit)
+ return false;
+
+ if (dump_file)
+ fprintf (dump_file, "Found vectorizable outer loop for versioning\n");
+
return true;
}
-/* Performs splitting of critical edges if aggressive_if_conv is true.
- Returns false if loop won't be if converted and true otherwise. */
+/* Performs splitting of critical edges. Skip splitting and return false
+ if LOOP will not be converted because:
+
+ - LOOP is not well formed.
+ - LOOP has PHI with more than MAX_PHI_ARG_NUM arguments.
+
+ Last restriction is valid only if AGGRESSIVE_IF_CONV is false. */
static bool
-ifcvt_split_critical_edges (struct loop *loop)
+ifcvt_split_critical_edges (struct loop *loop, bool aggressive_if_conv)
{
basic_block *body;
basic_block bb;
gimple *stmt;
edge e;
edge_iterator ei;
+ auto_vec<edge> critical_edges;
- if (num <= 2)
- return false;
- if (loop->inner)
- return false;
- if (!single_exit (loop))
+ /* Loop is not well formed. */
+ if (num <= 2 || loop->inner || !single_exit (loop))
return false;
body = get_loop_body (loop);
for (i = 0; i < num; i++)
{
bb = body[i];
- if (bb == loop->latch
- || bb_with_exit_edge_p (loop, bb))
+ if (!aggressive_if_conv
+ && phi_nodes (bb)
+ && EDGE_COUNT (bb->preds) > MAX_PHI_ARG_NUM)
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file,
+ "BB %d has complicated PHI with more than %u args.\n",
+ bb->index, MAX_PHI_ARG_NUM);
+
+ free (body);
+ return false;
+ }
+ if (bb == loop->latch || bb_with_exit_edge_p (loop, bb))
continue;
+
stmt = last_stmt (bb);
/* Skip basic blocks not ending with conditional branch. */
- if (!(stmt && gimple_code (stmt) == GIMPLE_COND))
+ if (!stmt || gimple_code (stmt) != GIMPLE_COND)
continue;
+
FOR_EACH_EDGE (e, ei, bb->succs)
if (EDGE_CRITICAL_P (e) && e->dest->loop_father == loop)
- split_edge (e);
+ critical_edges.safe_push (e);
}
free (body);
- return true;
-}
-
-/* Assumes that lhs of DEF_STMT have multiple uses.
- Delete one use by (1) creation of copy DEF_STMT with
- unique lhs; (2) change original use of lhs in one
- use statement with newly created lhs. */
-
-static void
-ifcvt_split_def_stmt (gimple *def_stmt, gimple *use_stmt)
-{
- tree var;
- tree lhs;
- gimple *copy_stmt;
- gimple_stmt_iterator gsi;
- use_operand_p use_p;
- imm_use_iterator imm_iter;
- var = gimple_assign_lhs (def_stmt);
- copy_stmt = gimple_copy (def_stmt);
- lhs = make_temp_ssa_name (TREE_TYPE (var), NULL, "_ifc_");
- gimple_assign_set_lhs (copy_stmt, lhs);
- SSA_NAME_DEF_STMT (lhs) = copy_stmt;
- /* Insert copy of DEF_STMT. */
- gsi = gsi_for_stmt (def_stmt);
- gsi_insert_after (&gsi, copy_stmt, GSI_SAME_STMT);
- /* Change use of var to lhs in use_stmt. */
- if (dump_file && (dump_flags & TDF_DETAILS))
+ while (critical_edges.length () > 0)
{
- fprintf (dump_file, "Change use of var ");
- print_generic_expr (dump_file, var, TDF_SLIM);
- fprintf (dump_file, " to ");
- print_generic_expr (dump_file, lhs, TDF_SLIM);
- fprintf (dump_file, "\n");
+ e = critical_edges.pop ();
+ /* Don't split if bb can be predicated along non-critical edge. */
+ if (EDGE_COUNT (e->dest->preds) > 2 || all_preds_critical_p (e->dest))
+ split_edge (e);
}
- FOR_EACH_IMM_USE_FAST (use_p, imm_iter, var)
- {
- if (USE_STMT (use_p) != use_stmt)
- continue;
- SET_USE (use_p, lhs);
- break;
- }
-}
-/* Traverse bool pattern recursively starting from VAR.
- Save its def and use statements to defuse_list if VAR does
- not have single use. */
-
-static void
-ifcvt_walk_pattern_tree (tree var, vec<gimple *> *defuse_list,
- gimple *use_stmt)
-{
- tree rhs1, rhs2;
- enum tree_code code;
- gimple *def_stmt;
-
- def_stmt = SSA_NAME_DEF_STMT (var);
- if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
- return;
- if (!has_single_use (var))
- {
- /* Put def and use stmts into defuse_list. */
- defuse_list->safe_push (def_stmt);
- defuse_list->safe_push (use_stmt);
- if (dump_file && (dump_flags & TDF_DETAILS))
- {
- fprintf (dump_file, "Multiple lhs uses in stmt\n");
- print_gimple_stmt (dump_file, def_stmt, 0, TDF_SLIM);
- }
- }
- rhs1 = gimple_assign_rhs1 (def_stmt);
- code = gimple_assign_rhs_code (def_stmt);
- switch (code)
- {
- case SSA_NAME:
- ifcvt_walk_pattern_tree (rhs1, defuse_list, def_stmt);
- break;
- CASE_CONVERT:
- if ((TYPE_PRECISION (TREE_TYPE (rhs1)) != 1
- || !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
- && TREE_CODE (TREE_TYPE (rhs1)) != BOOLEAN_TYPE)
- break;
- ifcvt_walk_pattern_tree (rhs1, defuse_list, def_stmt);
- break;
- case BIT_NOT_EXPR:
- ifcvt_walk_pattern_tree (rhs1, defuse_list, def_stmt);
- break;
- case BIT_AND_EXPR:
- case BIT_IOR_EXPR:
- case BIT_XOR_EXPR:
- ifcvt_walk_pattern_tree (rhs1, defuse_list, def_stmt);
- rhs2 = gimple_assign_rhs2 (def_stmt);
- ifcvt_walk_pattern_tree (rhs2, defuse_list, def_stmt);
- break;
- default:
- break;
- }
- return;
-}
-
-/* Returns true if STMT can be a root of bool pattern applied
- by vectorizer. */
-
-static bool
-stmt_is_root_of_bool_pattern (gimple *stmt)
-{
- enum tree_code code;
- tree lhs, rhs;
-
- code = gimple_assign_rhs_code (stmt);
- if (CONVERT_EXPR_CODE_P (code))
- {
- lhs = gimple_assign_lhs (stmt);
- rhs = gimple_assign_rhs1 (stmt);
- if (TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE)
- return false;
- if (TREE_CODE (TREE_TYPE (lhs)) == BOOLEAN_TYPE)
- return false;
- return true;
- }
- else if (code == COND_EXPR)
- {
- rhs = gimple_assign_rhs1 (stmt);
- if (TREE_CODE (rhs) != SSA_NAME)
- return false;
- return true;
- }
- return false;
-}
-
-/* Traverse all statements in BB which correspond to loop header to
- find out all statements which can start bool pattern applied by
- vectorizer and convert multiple uses in it to conform pattern
- restrictions. Such case can occur if the same predicate is used both
- for phi node conversion and load/store mask. */
-
-static void
-ifcvt_repair_bool_pattern (basic_block bb)
-{
- tree rhs;
- gimple *stmt;
- gimple_stmt_iterator gsi;
- vec<gimple *> defuse_list = vNULL;
- vec<gimple *> pattern_roots = vNULL;
- bool repeat = true;
- int niter = 0;
- unsigned int ix;
-
- /* Collect all root pattern statements. */
- for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
- {
- stmt = gsi_stmt (gsi);
- if (gimple_code (stmt) != GIMPLE_ASSIGN)
- continue;
- if (!stmt_is_root_of_bool_pattern (stmt))
- continue;
- pattern_roots.safe_push (stmt);
- }
-
- if (pattern_roots.is_empty ())
- return;
-
- /* Split all statements with multiple uses iteratively since splitting
- may create new multiple uses. */
- while (repeat)
- {
- repeat = false;
- niter++;
- FOR_EACH_VEC_ELT (pattern_roots, ix, stmt)
- {
- rhs = gimple_assign_rhs1 (stmt);
- ifcvt_walk_pattern_tree (rhs, &defuse_list, stmt);
- while (defuse_list.length () > 0)
- {
- repeat = true;
- gimple *def_stmt, *use_stmt;
- use_stmt = defuse_list.pop ();
- def_stmt = defuse_list.pop ();
- ifcvt_split_def_stmt (def_stmt, use_stmt);
- }
-
- }
- }
- if (dump_file && (dump_flags & TDF_DETAILS))
- fprintf (dump_file, "Repair bool pattern takes %d iterations. \n",
- niter);
+ return true;
}
/* Delete redundant statements produced by predication which prevents
enum gimple_code code;
use_operand_p use_p;
imm_use_iterator imm_iter;
+ std::pair <tree, tree> *name_pair;
+ unsigned int i;
+
+ FOR_EACH_VEC_ELT (redundant_ssa_names, i, name_pair)
+ replace_uses_by (name_pair->first, name_pair->second);
+ redundant_ssa_names.release ();
worklist.create (64);
/* Consider all phi as live statements. */
profitability analysis. Returns non-zero todo flags when something
changed. */
-static unsigned int
+unsigned int
tree_if_conversion (struct loop *loop)
{
unsigned int todo = 0;
+ bool aggressive_if_conv;
+ struct loop *rloop;
+ bitmap exit_bbs;
+
+ again:
+ rloop = NULL;
ifc_bbs = NULL;
- bool any_mask_load_store = false;
+ need_to_predicate = false;
+ any_complicated_phi = false;
- /* Set up aggressive if-conversion for loops marked with simd pragma. */
+ /* Apply more aggressive if-conversion when loop or its outer loop were
+ marked with simd pragma. When that's the case, we try to if-convert
+ loop containing PHIs with more than MAX_PHI_ARG_NUM arguments. */
aggressive_if_conv = loop->force_vectorize;
- /* Check either outer loop was marked with simd pragma. */
if (!aggressive_if_conv)
{
struct loop *outer_loop = loop_outer (loop);
aggressive_if_conv = true;
}
- if (aggressive_if_conv)
- if (!ifcvt_split_critical_edges (loop))
- goto cleanup;
+ if (!ifcvt_split_critical_edges (loop, aggressive_if_conv))
+ goto cleanup;
- if (!if_convertible_loop_p (loop, &any_mask_load_store)
+ if (!if_convertible_loop_p (loop)
|| !dbg_cnt (if_conversion_tree))
goto cleanup;
- if (any_mask_load_store
+ if ((need_to_predicate || any_complicated_phi)
&& ((!flag_tree_loop_vectorize && !loop->force_vectorize)
|| loop->dont_vectorize))
goto cleanup;
- if (any_mask_load_store && !version_loop_for_if_conversion (loop))
- goto cleanup;
+ /* Since we have no cost model, always version loops unless the user
+ specified -ftree-loop-if-convert or unless versioning is required.
+ Either version this loop, or if the pattern is right for outer-loop
+ vectorization, version the outer loop. In the latter case we will
+ still if-convert the original inner loop. */
+ if (need_to_predicate
+ || any_complicated_phi
+ || flag_tree_loop_if_convert != 1)
+ {
+ struct loop *vloop
+ = (versionable_outer_loop_p (loop_outer (loop))
+ ? loop_outer (loop) : loop);
+ struct loop *nloop = version_loop_for_if_conversion (vloop);
+ if (nloop == NULL)
+ goto cleanup;
+ if (vloop != loop)
+ {
+ /* If versionable_outer_loop_p decided to version the
+ outer loop, version also the inner loop of the non-vectorized
+ loop copy. So we transform:
+ loop1
+ loop2
+ into:
+ if (LOOP_VECTORIZED (1, 3))
+ {
+ loop1
+ loop2
+ }
+ else
+ loop3 (copy of loop1)
+ if (LOOP_VECTORIZED (4, 5))
+ loop4 (copy of loop2)
+ else
+ loop5 (copy of loop4) */
+ gcc_assert (nloop->inner && nloop->inner->next == NULL);
+ rloop = nloop->inner;
+ }
+ }
/* Now all statements are if-convertible. Combine all the basic
blocks into one huge basic block doing the if-conversion
on-the-fly. */
- combine_blocks (loop, any_mask_load_store);
+ combine_blocks (loop);
- /* Delete dead predicate computations and repair tree correspondent
- to bool pattern to delete multiple uses of predicates. */
- if (aggressive_if_conv)
- {
- ifcvt_local_dce (loop->header);
- ifcvt_repair_bool_pattern (loop->header);
- }
+ /* Delete dead predicate computations. */
+ ifcvt_local_dce (loop->header);
+
+ /* Perform local CSE, this esp. helps the vectorizer analysis if loads
+ and stores are involved.
+ ??? We'll still keep dead stores though. */
+ exit_bbs = BITMAP_ALLOC (NULL);
+ bitmap_set_bit (exit_bbs, single_exit (loop)->dest->index);
+ todo |= do_rpo_vn (cfun, loop_preheader_edge (loop), exit_bbs);
+ BITMAP_FREE (exit_bbs);
todo |= TODO_cleanup_cfg;
- if (any_mask_load_store)
- {
- mark_virtual_operands_for_renaming (cfun);
- todo |= TODO_update_ssa_only_virtuals;
- }
cleanup:
if (ifc_bbs)
free (ifc_bbs);
ifc_bbs = NULL;
}
- free_dominance_info (CDI_POST_DOMINATORS);
+ if (rloop != NULL)
+ {
+ loop = rloop;
+ goto again;
+ }
return todo;
}
GIMPLE_PASS, /* type */
"ifcvt", /* name */
OPTGROUP_NONE, /* optinfo_flags */
- TV_NONE, /* tv_id */
+ TV_TREE_LOOP_IFCVT, /* tv_id */
( PROP_cfg | PROP_ssa ), /* properties_required */
0, /* properties_provided */
0, /* properties_destroyed */
{
return (((flag_tree_loop_vectorize || fun->has_force_vectorize_loops)
&& flag_tree_loop_if_convert != 0)
- || flag_tree_loop_if_convert == 1
- || flag_tree_loop_if_convert_stores == 1);
+ || flag_tree_loop_if_convert == 1);
}
unsigned int
FOR_EACH_LOOP (loop, 0)
if (flag_tree_loop_if_convert == 1
- || flag_tree_loop_if_convert_stores == 1
|| ((flag_tree_loop_vectorize || loop->force_vectorize)
&& !loop->dont_vectorize))
todo |= tree_if_conversion (loop);
+ if (todo)
+ {
+ free_numbers_of_iterations_estimates (fun);
+ scev_reset ();
+ }
+
if (flag_checking)
{
basic_block bb;