/* Data References Analysis and Manipulation Utilities for Vectorization.
- Copyright (C) 2003-2015 Free Software Foundation, Inc.
+ Copyright (C) 2003-2016 Free Software Foundation, Inc.
Contributed by Dorit Naishlos <dorit@il.ibm.com>
and Ira Rosen <irar@il.ibm.com>
#include "config.h"
#include "system.h"
#include "coretypes.h"
+#include "backend.h"
+#include "target.h"
+#include "rtl.h"
+#include "tree.h"
+#include "gimple.h"
+#include "predict.h"
+#include "tm_p.h"
+#include "ssa.h"
+#include "optabs-tree.h"
+#include "cgraph.h"
#include "dumpfile.h"
-#include "tm.h"
-#include "hash-set.h"
-#include "machmode.h"
-#include "vec.h"
-#include "double-int.h"
-#include "input.h"
#include "alias.h"
-#include "symtab.h"
-#include "wide-int.h"
-#include "inchash.h"
-#include "tree.h"
#include "fold-const.h"
#include "stor-layout.h"
-#include "tm_p.h"
-#include "target.h"
-#include "predict.h"
-#include "hard-reg-set.h"
-#include "input.h"
-#include "function.h"
-#include "dominance.h"
-#include "cfg.h"
-#include "basic-block.h"
-#include "gimple-pretty-print.h"
-#include "tree-ssa-alias.h"
-#include "internal-fn.h"
#include "tree-eh.h"
-#include "gimple-expr.h"
-#include "is-a.h"
-#include "gimple.h"
#include "gimplify.h"
#include "gimple-iterator.h"
#include "gimplify-me.h"
-#include "gimple-ssa.h"
-#include "tree-phinodes.h"
-#include "ssa-iterators.h"
-#include "stringpool.h"
-#include "tree-ssanames.h"
#include "tree-ssa-loop-ivopts.h"
#include "tree-ssa-loop-manip.h"
#include "tree-ssa-loop.h"
-#include "dumpfile.h"
#include "cfgloop.h"
-#include "tree-chrec.h"
#include "tree-scalar-evolution.h"
#include "tree-vectorizer.h"
-#include "diagnostic-core.h"
-#include "hash-map.h"
-#include "plugin-api.h"
-#include "ipa-ref.h"
-#include "cgraph.h"
-/* Need to include rtl.h, expr.h, etc. for optabs. */
#include "expr.h"
-#include "insn-codes.h"
-#include "optabs.h"
#include "builtins.h"
-#include "varasm.h"
+#include "params.h"
/* Return true if load- or store-lanes optab OPTAB is implemented for
COUNT vectors of type VECTYPE. NAME is the name of OPTAB. */
types. */
tree
-vect_get_smallest_scalar_type (gimple stmt, HOST_WIDE_INT *lhs_size_unit,
+vect_get_smallest_scalar_type (gimple *stmt, HOST_WIDE_INT *lhs_size_unit,
HOST_WIDE_INT *rhs_size_unit)
{
tree scalar_type = gimple_expr_type (stmt);
return false;
}
- if (STMT_VINFO_GATHER_P (stmtinfo_a)
- || STMT_VINFO_GATHER_P (stmtinfo_b))
+ if (STMT_VINFO_GATHER_SCATTER_P (stmtinfo_a)
+ || STMT_VINFO_GATHER_SCATTER_P (stmtinfo_b))
{
if (dump_enabled_p ())
{
return false;
}
- if (STMT_VINFO_GATHER_P (stmtinfo_a)
- || STMT_VINFO_GATHER_P (stmtinfo_b))
+ if (STMT_VINFO_GATHER_SCATTER_P (stmtinfo_a)
+ || STMT_VINFO_GATHER_SCATTER_P (stmtinfo_b))
{
if (dump_enabled_p ())
{
if (STMT_VINFO_GROUPED_ACCESS (stmtinfo_a)
|| STMT_VINFO_GROUPED_ACCESS (stmtinfo_b))
{
- gimple earlier_stmt;
+ gimple *earlier_stmt;
earlier_stmt = get_earlier_stmt (DR_STMT (dra), DR_STMT (drb));
if (DR_IS_WRITE
(STMT_VINFO_DATA_REF (vinfo_for_stmt (earlier_stmt))))
dump_printf_loc (MSG_NOTE, vect_location,
"=== vect_analyze_data_ref_dependences ===\n");
+ LOOP_VINFO_DDRS (loop_vinfo)
+ .create (LOOP_VINFO_DATAREFS (loop_vinfo).length ()
+ * LOOP_VINFO_DATAREFS (loop_vinfo).length ());
LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo) = true;
if (!compute_all_dependences (LOOP_VINFO_DATAREFS (loop_vinfo),
&LOOP_VINFO_DDRS (loop_vinfo),
dump_printf (MSG_NOTE, "\n");
}
- /* We do not vectorize basic blocks with write-write dependencies. */
- if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))
- return true;
+ return true;
+}
- /* If we have a read-write dependence check that the load is before the store.
- When we vectorize basic blocks, vector load can be only before
- corresponding scalar load, and vector store can be only after its
- corresponding scalar store. So the order of the acceses is preserved in
- case the load is before the store. */
- gimple earlier_stmt = get_earlier_stmt (DR_STMT (dra), DR_STMT (drb));
- if (DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (earlier_stmt))))
+
+/* Analyze dependences involved in the transform of SLP NODE. STORES
+ contain the vector of scalar stores of this instance if we are
+ disambiguating the loads. */
+
+static bool
+vect_slp_analyze_node_dependences (slp_instance instance, slp_tree node,
+ vec<gimple *> stores, gimple *last_store)
+{
+ /* This walks over all stmts involved in the SLP load/store done
+ in NODE verifying we can sink them up to the last stmt in the
+ group. */
+ gimple *last_access = vect_find_last_scalar_stmt_in_slp (node);
+ for (unsigned k = 0; k < SLP_INSTANCE_GROUP_SIZE (instance); ++k)
{
- /* That only holds for load-store pairs taking part in vectorization. */
- if (STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dra)))
- && STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (drb))))
- return false;
- }
+ gimple *access = SLP_TREE_SCALAR_STMTS (node)[k];
+ if (access == last_access)
+ continue;
+ data_reference *dr_a = STMT_VINFO_DATA_REF (vinfo_for_stmt (access));
+ for (gimple_stmt_iterator gsi = gsi_for_stmt (access);
+ gsi_stmt (gsi) != last_access; gsi_next (&gsi))
+ {
+ gimple *stmt = gsi_stmt (gsi);
+ if (! gimple_vuse (stmt)
+ || (DR_IS_READ (dr_a) && ! gimple_vdef (stmt)))
+ continue;
+
+ /* If we couldn't record a (single) data reference for this
+ stmt we have to give up. */
+ /* ??? Here and below if dependence analysis fails we can resort
+ to the alias oracle which can handle more kinds of stmts. */
+ data_reference *dr_b = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
+ if (!dr_b)
+ return false;
+
+ /* If we run into a store of this same instance (we've just
+ marked those) then delay dependence checking until we run
+ into the last store because this is where it will have
+ been sunk to (and we verify if we can do that as well). */
+ if (gimple_visited_p (stmt))
+ {
+ if (stmt != last_store)
+ continue;
+ unsigned i;
+ gimple *store;
+ FOR_EACH_VEC_ELT (stores, i, store)
+ {
+ data_reference *store_dr
+ = STMT_VINFO_DATA_REF (vinfo_for_stmt (store));
+ ddr_p ddr = initialize_data_dependence_relation
+ (dr_a, store_dr, vNULL);
+ if (vect_slp_analyze_data_ref_dependence (ddr))
+ {
+ free_dependence_relation (ddr);
+ return false;
+ }
+ free_dependence_relation (ddr);
+ }
+ }
+ ddr_p ddr = initialize_data_dependence_relation (dr_a, dr_b, vNULL);
+ if (vect_slp_analyze_data_ref_dependence (ddr))
+ {
+ free_dependence_relation (ddr);
+ return false;
+ }
+ free_dependence_relation (ddr);
+ }
+ }
return true;
}
the maximum vectorization factor the data dependences allow. */
bool
-vect_slp_analyze_data_ref_dependences (bb_vec_info bb_vinfo)
+vect_slp_analyze_instance_dependence (slp_instance instance)
{
- struct data_dependence_relation *ddr;
- unsigned int i;
-
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
- "=== vect_slp_analyze_data_ref_dependences ===\n");
+ "=== vect_slp_analyze_instance_dependence ===\n");
- if (!compute_all_dependences (BB_VINFO_DATAREFS (bb_vinfo),
- &BB_VINFO_DDRS (bb_vinfo),
- vNULL, true))
- return false;
+ /* The stores of this instance are at the root of the SLP tree. */
+ slp_tree store = SLP_INSTANCE_TREE (instance);
+ if (! STMT_VINFO_DATA_REF (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (store)[0])))
+ store = NULL;
- FOR_EACH_VEC_ELT (BB_VINFO_DDRS (bb_vinfo), i, ddr)
- if (vect_slp_analyze_data_ref_dependence (ddr))
- return false;
+ /* Verify we can sink stores to the vectorized stmt insert location. */
+ gimple *last_store = NULL;
+ if (store)
+ {
+ if (! vect_slp_analyze_node_dependences (instance, store, vNULL, NULL))
+ return false;
- return true;
-}
+ /* Mark stores in this instance and remember the last one. */
+ last_store = vect_find_last_scalar_stmt_in_slp (store);
+ for (unsigned k = 0; k < SLP_INSTANCE_GROUP_SIZE (instance); ++k)
+ gimple_set_visited (SLP_TREE_SCALAR_STMTS (store)[k], true);
+ }
+
+ bool res = true;
+ /* Verify we can sink loads to the vectorized stmt insert location,
+ special-casing stores of this instance. */
+ slp_tree load;
+ unsigned int i;
+ FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), i, load)
+ if (! vect_slp_analyze_node_dependences (instance, load,
+ store
+ ? SLP_TREE_SCALAR_STMTS (store)
+ : vNULL, last_store))
+ {
+ res = false;
+ break;
+ }
+
+ /* Unset the visited flag. */
+ if (store)
+ for (unsigned k = 0; k < SLP_INSTANCE_GROUP_SIZE (instance); ++k)
+ gimple_set_visited (SLP_TREE_SCALAR_STMTS (store)[k], false);
+
+ return res;
+}
/* Function vect_compute_data_ref_alignment
FOR NOW: No analysis is actually performed. Misalignment is calculated
only for trivial cases. TODO. */
-static bool
+bool
vect_compute_data_ref_alignment (struct data_reference *dr)
{
- gimple stmt = DR_STMT (dr);
+ gimple *stmt = DR_STMT (dr);
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
struct loop *loop = NULL;
tree ref = DR_REF (dr);
tree vectype;
tree base, base_addr;
- bool base_aligned;
- tree misalign;
- tree aligned_to, alignment;
+ tree misalign = NULL_TREE;
+ tree aligned_to;
+ unsigned HOST_WIDE_INT alignment;
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
/* Initialize misalignment to unknown. */
SET_DR_MISALIGNMENT (dr, -1);
- /* Strided loads perform only component accesses, misalignment information
- is irrelevant for them. */
- if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
- return true;
-
- misalign = DR_INIT (dr);
+ if (tree_fits_shwi_p (DR_STEP (dr)))
+ misalign = DR_INIT (dr);
aligned_to = DR_ALIGNED_TO (dr);
base_addr = DR_BASE_ADDRESS (dr);
vectype = STMT_VINFO_VECTYPE (stmt_info);
if (loop && nested_in_vect_loop_p (loop, stmt))
{
tree step = DR_STEP (dr);
- HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
- if (dr_step % GET_MODE_SIZE (TYPE_MODE (vectype)) == 0)
+ if (tree_fits_shwi_p (step)
+ && tree_to_shwi (step) % GET_MODE_SIZE (TYPE_MODE (vectype)) == 0)
{
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
}
}
- /* Similarly, if we're doing basic-block vectorization, we can only use
- base and misalignment information relative to an innermost loop if the
- misalignment stays the same throughout the execution of the loop.
- As above, this is the case if the stride of the dataref evenly divides
- by the vector size. */
- if (!loop)
+ /* Similarly we can only use base and misalignment information relative to
+ an innermost loop if the misalignment stays the same throughout the
+ execution of the loop. As above, this is the case if the stride of
+ the dataref evenly divides by the vector size. */
+ else
{
tree step = DR_STEP (dr);
- HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
+ unsigned vf = loop ? LOOP_VINFO_VECT_FACTOR (loop_vinfo) : 1;
- if (dr_step % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0)
+ if (tree_fits_shwi_p (step)
+ && ((tree_to_shwi (step) * vf)
+ % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "SLP: step doesn't divide the vector-size.\n");
+ "step doesn't divide the vector-size.\n");
misalign = NULL_TREE;
}
}
- base = build_fold_indirect_ref (base_addr);
- alignment = ssize_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT);
+ /* To look at alignment of the base we have to preserve an inner MEM_REF
+ as that carries alignment information of the actual access. */
+ base = ref;
+ while (handled_component_p (base))
+ base = TREE_OPERAND (base, 0);
+ if (TREE_CODE (base) == MEM_REF)
+ base = build2 (MEM_REF, TREE_TYPE (base), base_addr,
+ build_int_cst (TREE_TYPE (TREE_OPERAND (base, 1)), 0));
+ unsigned int base_alignment = get_object_alignment (base);
+
+ if (base_alignment >= TYPE_ALIGN (TREE_TYPE (vectype)))
+ DR_VECT_AUX (dr)->base_element_aligned = true;
+
+ alignment = TYPE_ALIGN_UNIT (vectype);
- if ((aligned_to && tree_int_cst_compare (aligned_to, alignment) < 0)
+ if ((compare_tree_int (aligned_to, alignment) < 0)
|| !misalign)
{
if (dump_enabled_p ())
{
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"Unknown alignment for access: ");
- dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, base);
+ dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, ref);
dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
}
return true;
}
- if ((DECL_P (base)
- && tree_int_cst_compare (ssize_int (DECL_ALIGN_UNIT (base)),
- alignment) >= 0)
- || (TREE_CODE (base_addr) == SSA_NAME
- && tree_int_cst_compare (ssize_int (TYPE_ALIGN_UNIT (TREE_TYPE (
- TREE_TYPE (base_addr)))),
- alignment) >= 0)
- || (get_pointer_alignment (base_addr) >= TYPE_ALIGN (vectype)))
- base_aligned = true;
- else
- base_aligned = false;
-
- if (!base_aligned)
+ if (base_alignment < TYPE_ALIGN (vectype))
{
- /* Do not change the alignment of global variables here if
- flag_section_anchors is enabled as we already generated
- RTL for other functions. Most global variables should
- have been aligned during the IPA increase_alignment pass. */
- if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype))
- || (TREE_STATIC (base) && flag_section_anchors))
+ /* Strip an inner MEM_REF to a bare decl if possible. */
+ if (TREE_CODE (base) == MEM_REF
+ && integer_zerop (TREE_OPERAND (base, 1))
+ && TREE_CODE (TREE_OPERAND (base, 0)) == ADDR_EXPR)
+ base = TREE_OPERAND (TREE_OPERAND (base, 0), 0);
+
+ if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype)))
{
if (dump_enabled_p ())
{
dump_printf (MSG_NOTE, "\n");
}
- ((dataref_aux *)dr->aux)->base_decl = base;
- ((dataref_aux *)dr->aux)->base_misaligned = true;
+ DR_VECT_AUX (dr)->base_decl = base;
+ DR_VECT_AUX (dr)->base_misaligned = true;
+ DR_VECT_AUX (dr)->base_element_aligned = true;
}
/* If this is a backward running DR then first access in the larger
vectype actually is N-1 elements before the address in the DR.
Adjust misalign accordingly. */
- if (tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0)
+ if (tree_int_cst_sgn (DR_STEP (dr)) < 0)
{
tree offset = ssize_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
/* DR_STEP(dr) is the same as -TYPE_SIZE of the scalar type,
misalign = size_binop (PLUS_EXPR, misalign, offset);
}
- /* Modulo alignment. */
- misalign = size_binop (FLOOR_MOD_EXPR, misalign, alignment);
-
- if (!tree_fits_uhwi_p (misalign))
- {
- /* Negative or overflowed misalignment value. */
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "unexpected misalign value\n");
- return false;
- }
-
- SET_DR_MISALIGNMENT (dr, tree_to_uhwi (misalign));
+ SET_DR_MISALIGNMENT (dr,
+ wi::mod_floor (misalign, alignment, SIGNED).to_uhwi ());
if (dump_enabled_p ())
{
}
-/* Function vect_compute_data_refs_alignment
-
- Compute the misalignment of data references in the loop.
- Return FALSE if a data reference is found that cannot be vectorized. */
-
-static bool
-vect_compute_data_refs_alignment (loop_vec_info loop_vinfo,
- bb_vec_info bb_vinfo)
-{
- vec<data_reference_p> datarefs;
- struct data_reference *dr;
- unsigned int i;
-
- if (loop_vinfo)
- datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
- else
- datarefs = BB_VINFO_DATAREFS (bb_vinfo);
-
- FOR_EACH_VEC_ELT (datarefs, i, dr)
- if (STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr)))
- && !vect_compute_data_ref_alignment (dr))
- {
- if (bb_vinfo)
- {
- /* Mark unsupported statement as unvectorizable. */
- STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
- continue;
- }
- else
- return false;
- }
-
- return true;
-}
-
-
/* Function vect_update_misalignment_for_peel
DR - the data reference whose misalignment is to be adjusted.
}
+/* Function verify_data_ref_alignment
+
+ Return TRUE if DR can be handled with respect to alignment. */
+
+static bool
+verify_data_ref_alignment (data_reference_p dr)
+{
+ enum dr_alignment_support supportable_dr_alignment
+ = vect_supportable_dr_alignment (dr, false);
+ if (!supportable_dr_alignment)
+ {
+ if (dump_enabled_p ())
+ {
+ if (DR_IS_READ (dr))
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "not vectorized: unsupported unaligned load.");
+ else
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "not vectorized: unsupported unaligned "
+ "store.");
+
+ dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
+ DR_REF (dr));
+ dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
+ }
+ return false;
+ }
+
+ if (supportable_dr_alignment != dr_aligned && dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "Vectorizing an unaligned access.\n");
+
+ return true;
+}
+
/* Function vect_verify_datarefs_alignment
Return TRUE if all data references in the loop can be
handled with respect to alignment. */
bool
-vect_verify_datarefs_alignment (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
+vect_verify_datarefs_alignment (loop_vec_info vinfo)
{
- vec<data_reference_p> datarefs;
+ vec<data_reference_p> datarefs = vinfo->datarefs;
struct data_reference *dr;
- enum dr_alignment_support supportable_dr_alignment;
unsigned int i;
- if (loop_vinfo)
- datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
- else
- datarefs = BB_VINFO_DATAREFS (bb_vinfo);
-
FOR_EACH_VEC_ELT (datarefs, i, dr)
{
- gimple stmt = DR_STMT (dr);
+ gimple *stmt = DR_STMT (dr);
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
if (!STMT_VINFO_RELEVANT_P (stmt_info))
continue;
- /* For interleaving, only the alignment of the first access matters.
- Skip statements marked as not vectorizable. */
- if ((STMT_VINFO_GROUPED_ACCESS (stmt_info)
- && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
- || !STMT_VINFO_VECTORIZABLE (stmt_info))
- continue;
+ /* For interleaving, only the alignment of the first access matters. */
+ if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
+ && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
+ continue;
- /* Strided loads perform only component accesses, alignment is
+ /* Strided accesses perform only component accesses, alignment is
irrelevant for them. */
- if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
+ if (STMT_VINFO_STRIDED_P (stmt_info)
+ && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
continue;
- supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
- if (!supportable_dr_alignment)
- {
- if (dump_enabled_p ())
- {
- if (DR_IS_READ (dr))
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "not vectorized: unsupported unaligned load.");
- else
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "not vectorized: unsupported unaligned "
- "store.");
-
- dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
- DR_REF (dr));
- dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
- }
- return false;
- }
- if (supportable_dr_alignment != dr_aligned && dump_enabled_p ())
- dump_printf_loc (MSG_NOTE, vect_location,
- "Vectorizing an unaligned access.\n");
+ if (! verify_data_ref_alignment (dr))
+ return false;
}
+
return true;
}
static bool
vector_alignment_reachable_p (struct data_reference *dr)
{
- gimple stmt = DR_STMT (dr);
+ gimple *stmt = DR_STMT (dr);
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
tree vectype = STMT_VINFO_VECTYPE (stmt_info);
unsigned int *outside_cost,
stmt_vector_for_cost *body_cost_vec)
{
- gimple stmt = DR_STMT (dr);
+ gimple *stmt = DR_STMT (dr);
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
int nunits = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
}
+typedef struct _vect_peel_info
+{
+ int npeel;
+ struct data_reference *dr;
+ unsigned int count;
+} *vect_peel_info;
+
+typedef struct _vect_peel_extended_info
+{
+ struct _vect_peel_info peel_info;
+ unsigned int inside_cost;
+ unsigned int outside_cost;
+ stmt_vector_for_cost body_cost_vec;
+} *vect_peel_extended_info;
+
+
+/* Peeling hashtable helpers. */
+
+struct peel_info_hasher : free_ptr_hash <_vect_peel_info>
+{
+ static inline hashval_t hash (const _vect_peel_info *);
+ static inline bool equal (const _vect_peel_info *, const _vect_peel_info *);
+};
+
+inline hashval_t
+peel_info_hasher::hash (const _vect_peel_info *peel_info)
+{
+ return (hashval_t) peel_info->npeel;
+}
+
+inline bool
+peel_info_hasher::equal (const _vect_peel_info *a, const _vect_peel_info *b)
+{
+ return (a->npeel == b->npeel);
+}
+
+
/* Insert DR into peeling hash table with NPEEL as key. */
static void
-vect_peeling_hash_insert (loop_vec_info loop_vinfo, struct data_reference *dr,
+vect_peeling_hash_insert (hash_table<peel_info_hasher> *peeling_htab,
+ loop_vec_info loop_vinfo, struct data_reference *dr,
int npeel)
{
struct _vect_peel_info elem, *slot;
bool supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
elem.npeel = npeel;
- slot = LOOP_VINFO_PEELING_HTAB (loop_vinfo)->find (&elem);
+ slot = peeling_htab->find (&elem);
if (slot)
slot->count++;
else
slot->npeel = npeel;
slot->dr = dr;
slot->count = 1;
- new_slot
- = LOOP_VINFO_PEELING_HTAB (loop_vinfo)->find_slot (slot, INSERT);
+ new_slot = peeling_htab->find_slot (slot, INSERT);
*new_slot = slot;
}
vect_peel_info elem = *slot;
int save_misalignment, dummy;
unsigned int inside_cost = 0, outside_cost = 0, i;
- gimple stmt = DR_STMT (elem->dr);
+ gimple *stmt = DR_STMT (elem->dr);
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
struct data_reference *dr;
stmt_vector_for_cost prologue_cost_vec, body_cost_vec, epilogue_cost_vec;
- int single_iter_cost;
prologue_cost_vec.create (2);
body_cost_vec.create (2);
&& GROUP_FIRST_ELEMENT (stmt_info) != stmt)
continue;
+ /* Strided accesses perform only component accesses, alignment is
+ irrelevant for them. */
+ if (STMT_VINFO_STRIDED_P (stmt_info)
+ && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
+ continue;
+
save_misalignment = DR_MISALIGNMENT (dr);
vect_update_misalignment_for_peel (dr, elem->dr, elem->npeel);
vect_get_data_access_cost (dr, &inside_cost, &outside_cost,
SET_DR_MISALIGNMENT (dr, save_misalignment);
}
- single_iter_cost = vect_get_single_scalar_iteration_cost (loop_vinfo);
- outside_cost += vect_get_known_peeling_cost (loop_vinfo, elem->npeel,
- &dummy, single_iter_cost,
- &prologue_cost_vec,
- &epilogue_cost_vec);
+ outside_cost += vect_get_known_peeling_cost
+ (loop_vinfo, elem->npeel, &dummy,
+ &LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo),
+ &prologue_cost_vec, &epilogue_cost_vec);
/* Prologue and epilogue costs are added to the target model later.
These costs depend only on the scalar iteration cost, the
option that aligns as many accesses as possible. */
static struct data_reference *
-vect_peeling_hash_choose_best_peeling (loop_vec_info loop_vinfo,
+vect_peeling_hash_choose_best_peeling (hash_table<peel_info_hasher> *peeling_htab,
+ loop_vec_info loop_vinfo,
unsigned int *npeel,
stmt_vector_for_cost *body_cost_vec)
{
{
res.inside_cost = INT_MAX;
res.outside_cost = INT_MAX;
- LOOP_VINFO_PEELING_HTAB (loop_vinfo)
- ->traverse <_vect_peel_extended_info *,
- vect_peeling_hash_get_lowest_cost> (&res);
+ peeling_htab->traverse <_vect_peel_extended_info *,
+ vect_peeling_hash_get_lowest_cost> (&res);
}
else
{
res.peel_info.count = 0;
- LOOP_VINFO_PEELING_HTAB (loop_vinfo)
- ->traverse <_vect_peel_extended_info *,
- vect_peeling_hash_get_most_frequent> (&res);
+ peeling_htab->traverse <_vect_peel_extended_info *,
+ vect_peeling_hash_get_most_frequent> (&res);
}
*npeel = res.peel_info.npeel;
bool do_peeling = false;
bool do_versioning = false;
bool stat;
- gimple stmt;
+ gimple *stmt;
stmt_vec_info stmt_info;
unsigned int npeel = 0;
bool all_misalignments_unknown = true;
tree vectype;
unsigned int nelements, mis, same_align_drs_max = 0;
stmt_vector_for_cost body_cost_vec = stmt_vector_for_cost ();
+ hash_table<peel_info_hasher> peeling_htab (1);
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
"=== vect_enhance_data_refs_alignment ===\n");
+ /* Reset data so we can safely be called multiple times. */
+ LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).truncate (0);
+ LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) = 0;
+
/* While cost model enhancements are expected in the future, the high level
view of the code at this time is as follows:
if (integer_zerop (DR_STEP (dr)))
continue;
- /* Strided loads perform only component accesses, alignment is
+ /* Strided accesses perform only component accesses, alignment is
irrelevant for them. */
- if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
+ if (STMT_VINFO_STRIDED_P (stmt_info)
+ && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
continue;
supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
size_zero_node) < 0;
/* Save info about DR in the hash table. */
- if (!LOOP_VINFO_PEELING_HTAB (loop_vinfo))
- LOOP_VINFO_PEELING_HTAB (loop_vinfo)
- = new hash_table<peel_info_hasher> (1);
-
vectype = STMT_VINFO_VECTYPE (stmt_info);
nelements = TYPE_VECTOR_SUBPARTS (vectype);
mis = DR_MISALIGNMENT (dr) / GET_MODE_SIZE (TYPE_MODE (
We do this automtically for cost model, since we calculate cost
for every peeling option. */
if (unlimited_cost_model (LOOP_VINFO_LOOP (loop_vinfo)))
- possible_npeel_number = vf /nelements;
+ {
+ if (STMT_SLP_TYPE (stmt_info))
+ possible_npeel_number
+ = (vf * GROUP_SIZE (stmt_info)) / nelements;
+ else
+ possible_npeel_number = vf / nelements;
+ }
/* Handle the aligned case. We may decide to align some other
access, making DR unaligned. */
for (j = 0; j < possible_npeel_number; j++)
{
- gcc_assert (npeel_tmp <= vf);
- vect_peeling_hash_insert (loop_vinfo, dr, npeel_tmp);
+ vect_peeling_hash_insert (&peeling_htab, loop_vinfo,
+ dr, npeel_tmp);
npeel_tmp += nelements;
}
/* Check if we can possibly peel the loop. */
if (!vect_can_advance_ivs_p (loop_vinfo)
- || !slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
- do_peeling = false;
-
- /* If we don't know how many times the peeling loop will run
- assume it will run VF-1 times and disable peeling if the remaining
- iters are less than the vectorization factor. */
- if (do_peeling
- && all_misalignments_unknown
- && LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
- && (LOOP_VINFO_INT_NITERS (loop_vinfo)
- < 2 * (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo) - 1))
+ || !slpeel_can_duplicate_loop_p (loop, single_exit (loop))
+ || loop->inner)
do_peeling = false;
if (do_peeling
}
/* In case there are only loads with different unknown misalignments, use
- peeling only if it may help to align other accesses in the loop. */
+ peeling only if it may help to align other accesses in the loop or
+ if it may help improving load bandwith when we'd end up using
+ unaligned loads. */
+ tree dr0_vt = STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr0)));
if (!first_store
&& !STMT_VINFO_SAME_ALIGN_REFS (
vinfo_for_stmt (DR_STMT (dr0))).length ()
- && vect_supportable_dr_alignment (dr0, false)
- != dr_unaligned_supported)
+ && (vect_supportable_dr_alignment (dr0, false)
+ != dr_unaligned_supported
+ || (builtin_vectorization_cost (vector_load, dr0_vt, 0)
+ == builtin_vectorization_cost (unaligned_load, dr0_vt, -1))))
do_peeling = false;
}
gcc_assert (!all_misalignments_unknown);
/* Choose the best peeling from the hash table. */
- dr0 = vect_peeling_hash_choose_best_peeling (loop_vinfo, &npeel,
+ dr0 = vect_peeling_hash_choose_best_peeling (&peeling_htab,
+ loop_vinfo, &npeel,
&body_cost_vec);
if (!dr0 || !npeel)
do_peeling = false;
-
- /* If peeling by npeel will result in a remaining loop not iterating
- enough to be vectorized then do not peel. */
- if (do_peeling
- && LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
- && (LOOP_VINFO_INT_NITERS (loop_vinfo)
- < LOOP_VINFO_VECT_FACTOR (loop_vinfo) + npeel))
- do_peeling = false;
}
if (do_peeling)
&& GROUP_FIRST_ELEMENT (stmt_info) != stmt)
continue;
- /* Strided loads perform only component accesses, alignment is
+ /* Strided accesses perform only component accesses, alignment is
irrelevant for them. */
- if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
+ if (STMT_VINFO_STRIDED_P (stmt_info)
+ && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
continue;
save_misalignment = DR_MISALIGNMENT (dr);
if (do_peeling && known_alignment_for_access_p (dr0) && npeel == 0)
{
- stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
+ stat = vect_verify_datarefs_alignment (loop_vinfo);
if (!stat)
do_peeling = false;
else
}
}
+ /* Cost model #1 - honor --param vect-max-peeling-for-alignment. */
if (do_peeling)
{
unsigned max_allowed_peel
unsigned max_peel = npeel;
if (max_peel == 0)
{
- gimple dr_stmt = DR_STMT (dr0);
+ gimple *dr_stmt = DR_STMT (dr0);
stmt_vec_info vinfo = vinfo_for_stmt (dr_stmt);
tree vtype = STMT_VINFO_VECTYPE (vinfo);
max_peel = TYPE_VECTOR_SUBPARTS (vtype) - 1;
}
}
+ /* Cost model #2 - if peeling may result in a remaining loop not
+ iterating enough to be vectorized then do not peel. */
+ if (do_peeling
+ && LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
+ {
+ unsigned max_peel
+ = npeel == 0 ? LOOP_VINFO_VECT_FACTOR (loop_vinfo) - 1 : npeel;
+ if (LOOP_VINFO_INT_NITERS (loop_vinfo)
+ < LOOP_VINFO_VECT_FACTOR (loop_vinfo) + max_peel)
+ do_peeling = false;
+ }
+
if (do_peeling)
{
- stmt_info_for_cost *si;
- void *data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
-
/* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
If the misalignment of DR_i is identical to that of dr0 then set
DR_MISALIGNMENT (DR_i) to zero. If the misalignment of DR_i and
dump_printf_loc (MSG_NOTE, vect_location,
"Peeling for alignment will be applied.\n");
}
- /* We've delayed passing the inside-loop peeling costs to the
- target cost model until we were sure peeling would happen.
- Do so now. */
- if (body_cost_vec.exists ())
- {
- FOR_EACH_VEC_ELT (body_cost_vec, i, si)
- {
- struct _stmt_vec_info *stmt_info
- = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
- (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
- si->misalign, vect_body);
- }
- body_cost_vec.release ();
- }
+ /* The inside-loop cost will be accounted for in vectorizable_load
+ and vectorizable_store correctly with adjusted alignments.
+ Drop the body_cst_vec on the floor here. */
+ body_cost_vec.release ();
- stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
+ stat = vect_verify_datarefs_alignment (loop_vinfo);
gcc_assert (stat);
return stat;
}
&& GROUP_FIRST_ELEMENT (stmt_info) != stmt))
continue;
- /* Strided loads perform only component accesses, alignment is
- irrelevant for them. */
- if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
- continue;
+ if (STMT_VINFO_STRIDED_P (stmt_info))
+ {
+ /* Strided loads perform only component accesses, alignment is
+ irrelevant for them. */
+ if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
+ continue;
+ do_versioning = false;
+ break;
+ }
supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
if (!supportable_dr_alignment)
{
- gimple stmt;
+ gimple *stmt;
int mask;
tree vectype;
if (do_versioning)
{
- vec<gimple> may_misalign_stmts
+ vec<gimple *> may_misalign_stmts
= LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
- gimple stmt;
+ gimple *stmt;
/* It can now be assumed that the data references in the statements
in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version
/* Peeling and versioning can't be done together at this time. */
gcc_assert (! (do_peeling && do_versioning));
- stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
+ stat = vect_verify_datarefs_alignment (loop_vinfo);
gcc_assert (stat);
return stat;
}
/* This point is reached if neither peeling nor versioning is being done. */
gcc_assert (! (do_peeling || do_versioning));
- stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
+ stat = vect_verify_datarefs_alignment (loop_vinfo);
return stat;
}
Return FALSE if a data reference is found that cannot be vectorized. */
bool
-vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo,
- bb_vec_info bb_vinfo)
+vect_analyze_data_refs_alignment (loop_vec_info vinfo)
{
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
/* Mark groups of data references with same alignment using
data dependence information. */
- if (loop_vinfo)
+ vec<ddr_p> ddrs = vinfo->ddrs;
+ struct data_dependence_relation *ddr;
+ unsigned int i;
+
+ FOR_EACH_VEC_ELT (ddrs, i, ddr)
+ vect_find_same_alignment_drs (ddr, vinfo);
+
+ vec<data_reference_p> datarefs = vinfo->datarefs;
+ struct data_reference *dr;
+
+ FOR_EACH_VEC_ELT (datarefs, i, dr)
{
- vec<ddr_p> ddrs = LOOP_VINFO_DDRS (loop_vinfo);
- struct data_dependence_relation *ddr;
- unsigned int i;
+ stmt_vec_info stmt_info = vinfo_for_stmt (DR_STMT (dr));
+ if (STMT_VINFO_VECTORIZABLE (stmt_info)
+ && !vect_compute_data_ref_alignment (dr))
+ {
+ /* Strided accesses perform only component accesses, misalignment
+ information is irrelevant for them. */
+ if (STMT_VINFO_STRIDED_P (stmt_info)
+ && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
+ continue;
- FOR_EACH_VEC_ELT (ddrs, i, ddr)
- vect_find_same_alignment_drs (ddr, loop_vinfo);
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "not vectorized: can't calculate alignment "
+ "for data ref.\n");
+
+ return false;
+ }
}
- if (!vect_compute_data_refs_alignment (loop_vinfo, bb_vinfo))
+ return true;
+}
+
+
+/* Analyze alignment of DRs of stmts in NODE. */
+
+static bool
+vect_slp_analyze_and_verify_node_alignment (slp_tree node)
+{
+ /* We vectorize from the first scalar stmt in the node unless
+ the node is permuted in which case we start from the first
+ element in the group. */
+ gimple *first_stmt = SLP_TREE_SCALAR_STMTS (node)[0];
+ data_reference_p first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
+ if (SLP_TREE_LOAD_PERMUTATION (node).exists ())
+ first_stmt = GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt));
+
+ data_reference_p dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
+ if (! vect_compute_data_ref_alignment (dr)
+ /* For creating the data-ref pointer we need alignment of the
+ first element anyway. */
+ || (dr != first_dr
+ && ! vect_compute_data_ref_alignment (first_dr))
+ || ! verify_data_ref_alignment (dr))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "not vectorized: can't calculate alignment "
- "for data ref.\n");
+ "not vectorized: bad data alignment in basic "
+ "block.\n");
return false;
}
return true;
}
+/* Function vect_slp_analyze_instance_alignment
+
+ Analyze the alignment of the data-references in the SLP instance.
+ Return FALSE if a data reference is found that cannot be vectorized. */
+
+bool
+vect_slp_analyze_and_verify_instance_alignment (slp_instance instance)
+{
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "=== vect_slp_analyze_and_verify_instance_alignment ===\n");
+
+ slp_tree node;
+ unsigned i;
+ FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), i, node)
+ if (! vect_slp_analyze_and_verify_node_alignment (node))
+ return false;
+
+ node = SLP_INSTANCE_TREE (instance);
+ if (STMT_VINFO_DATA_REF (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (node)[0]))
+ && ! vect_slp_analyze_and_verify_node_alignment
+ (SLP_INSTANCE_TREE (instance)))
+ return false;
+
+ return true;
+}
+
/* Analyze groups of accesses: check that DR belongs to a group of
accesses of legal size, step, etc. Detect gaps, single element
interleaving, and other special cases. Set grouped access info.
- Collect groups of strided stores for further use in SLP analysis. */
+ Collect groups of strided stores for further use in SLP analysis.
+ Worker for vect_analyze_group_access. */
static bool
-vect_analyze_group_access (struct data_reference *dr)
+vect_analyze_group_access_1 (struct data_reference *dr)
{
tree step = DR_STEP (dr);
tree scalar_type = TREE_TYPE (DR_REF (dr));
HOST_WIDE_INT type_size = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
- gimple stmt = DR_STMT (dr);
+ gimple *stmt = DR_STMT (dr);
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
- HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
+ HOST_WIDE_INT dr_step = -1;
HOST_WIDE_INT groupsize, last_accessed_element = 1;
bool slp_impossible = false;
- struct loop *loop = NULL;
-
- if (loop_vinfo)
- loop = LOOP_VINFO_LOOP (loop_vinfo);
/* For interleaving, GROUPSIZE is STEP counted in elements, i.e., the
size of the interleaving group (including gaps). */
- groupsize = absu_hwi (dr_step) / type_size;
+ if (tree_fits_shwi_p (step))
+ {
+ dr_step = tree_to_shwi (step);
+ /* Check that STEP is a multiple of type size. Otherwise there is
+ a non-element-sized gap at the end of the group which we
+ cannot represent in GROUP_GAP or GROUP_SIZE.
+ ??? As we can handle non-constant step fine here we should
+ simply remove uses of GROUP_GAP between the last and first
+ element and instead rely on DR_STEP. GROUP_SIZE then would
+ simply not include that gap. */
+ if ((dr_step % type_size) != 0)
+ {
+ if (dump_enabled_p ())
+ {
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "Step ");
+ dump_generic_expr (MSG_NOTE, TDF_SLIM, step);
+ dump_printf (MSG_NOTE,
+ " is not a multiple of the element size for ");
+ dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr));
+ dump_printf (MSG_NOTE, "\n");
+ }
+ return false;
+ }
+ groupsize = absu_hwi (dr_step) / type_size;
+ }
+ else
+ groupsize = 0;
/* Not consecutive access is possible only if it is a part of interleaving. */
if (!GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
dump_printf (MSG_NOTE, "\n");
}
- if (loop_vinfo)
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_NOTE, vect_location,
- "Data access with gaps requires scalar "
- "epilogue loop\n");
- if (loop->inner)
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "Peeling for outer loop is not"
- " supported\n");
- return false;
- }
-
- LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
- }
-
return true;
}
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"not consecutive access ");
dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
- dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
}
if (bb_vinfo)
return true;
}
- return false;
+ dump_printf_loc (MSG_NOTE, vect_location, "using strided accesses\n");
+ STMT_VINFO_STRIDED_P (stmt_info) = true;
+ return true;
}
if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) == stmt)
{
/* First stmt in the interleaving chain. Check the chain. */
- gimple next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt));
+ gimple *next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt));
struct data_reference *data_ref = dr;
unsigned int count = 1;
tree prev_init = DR_INIT (data_ref);
- gimple prev = stmt;
+ gimple *prev = stmt;
HOST_WIDE_INT diff, gaps = 0;
- unsigned HOST_WIDE_INT count_in_bytes;
while (next)
{
return false;
}
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "Two or more load stmts share the same dr.\n");
+
/* For load use the same data-ref load. */
GROUP_SAME_DR_STMT (vinfo_for_stmt (next)) = prev;
count++;
}
- /* COUNT is the number of accesses found, we multiply it by the size of
- the type to get COUNT_IN_BYTES. */
- count_in_bytes = type_size * count;
+ if (groupsize == 0)
+ groupsize = count + gaps;
- /* Check that the size of the interleaving (including gaps) is not
- greater than STEP. */
- if (dr_step != 0
- && absu_hwi (dr_step) < count_in_bytes + gaps * type_size)
- {
- if (dump_enabled_p ())
- {
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "interleaving size is greater than step for ");
- dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
- DR_REF (dr));
- dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
- }
- return false;
- }
+ if (groupsize > UINT_MAX)
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "group is too large\n");
+ return false;
+ }
- /* Check that the size of the interleaving is equal to STEP for stores,
+ /* Check that the size of the interleaving is equal to count for stores,
i.e., that there are no gaps. */
- if (dr_step != 0
- && absu_hwi (dr_step) != count_in_bytes)
+ if (groupsize != count
+ && !DR_IS_READ (dr))
{
- if (DR_IS_READ (dr))
- {
- slp_impossible = true;
- /* There is a gap after the last load in the group. This gap is a
- difference between the groupsize and the number of elements.
- When there is no gap, this difference should be 0. */
- GROUP_GAP (vinfo_for_stmt (stmt)) = groupsize - count;
- }
- else
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "interleaved store with gaps\n");
- return false;
- }
- }
-
- /* Check that STEP is a multiple of type size. */
- if (dr_step != 0
- && (dr_step % type_size) != 0)
- {
- if (dump_enabled_p ())
- {
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "step is not a multiple of type size: step ");
- dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, step);
- dump_printf (MSG_MISSED_OPTIMIZATION, " size ");
- dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
- TYPE_SIZE_UNIT (scalar_type));
- dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
- }
- return false;
- }
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "interleaved store with gaps\n");
+ return false;
+ }
- if (groupsize == 0)
- groupsize = count;
+ /* If there is a gap after the last load in the group it is the
+ difference between the groupsize and the last accessed
+ element.
+ When there is no gap, this difference should be 0. */
+ GROUP_GAP (vinfo_for_stmt (stmt)) = groupsize - last_accessed_element;
GROUP_SIZE (vinfo_for_stmt (stmt)) = groupsize;
if (dump_enabled_p ())
- dump_printf_loc (MSG_NOTE, vect_location,
- "Detected interleaving of size %d\n", (int)groupsize);
+ {
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "Detected interleaving ");
+ if (DR_IS_READ (dr))
+ dump_printf (MSG_NOTE, "load ");
+ else
+ dump_printf (MSG_NOTE, "store ");
+ dump_printf (MSG_NOTE, "of size %u starting with ",
+ (unsigned)groupsize);
+ dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
+ if (GROUP_GAP (vinfo_for_stmt (stmt)) != 0)
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "There is a gap of %u elements after the group\n",
+ GROUP_GAP (vinfo_for_stmt (stmt)));
+ }
/* SLP: create an SLP data structure for every interleaving group of
stores for further analysis in vect_analyse_slp. */
if (bb_vinfo)
BB_VINFO_GROUPED_STORES (bb_vinfo).safe_push (stmt);
}
+ }
- /* There is a gap in the end of the group. */
- if (groupsize - last_accessed_element > 0 && loop_vinfo)
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "Data access with gaps requires scalar "
- "epilogue loop\n");
- if (loop->inner)
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "Peeling for outer loop is not supported\n");
- return false;
- }
+ return true;
+}
+
+/* Analyze groups of accesses: check that DR belongs to a group of
+ accesses of legal size, step, etc. Detect gaps, single element
+ interleaving, and other special cases. Set grouped access info.
+ Collect groups of strided stores for further use in SLP analysis. */
- LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
+static bool
+vect_analyze_group_access (struct data_reference *dr)
+{
+ if (!vect_analyze_group_access_1 (dr))
+ {
+ /* Dissolve the group if present. */
+ gimple *next;
+ gimple *stmt = GROUP_FIRST_ELEMENT (vinfo_for_stmt (DR_STMT (dr)));
+ while (stmt)
+ {
+ stmt_vec_info vinfo = vinfo_for_stmt (stmt);
+ next = GROUP_NEXT_ELEMENT (vinfo);
+ GROUP_FIRST_ELEMENT (vinfo) = NULL;
+ GROUP_NEXT_ELEMENT (vinfo) = NULL;
+ stmt = next;
}
+ return false;
}
-
return true;
}
-
/* Analyze the access pattern of the data-reference DR.
In case of non-consecutive accesses call vect_analyze_group_access() to
analyze groups of accesses. */
{
tree step = DR_STEP (dr);
tree scalar_type = TREE_TYPE (DR_REF (dr));
- gimple stmt = DR_STMT (dr);
+ gimple *stmt = DR_STMT (dr);
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
struct loop *loop = NULL;
return false;
}
- /* Allow invariant loads in not nested loops. */
+ /* Allow loads with zero step in inner-loop vectorization. */
if (loop_vinfo && integer_zerop (step))
{
GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = NULL;
- if (nested_in_vect_loop_p (loop, stmt))
+ if (!nested_in_vect_loop_p (loop, stmt))
+ return DR_IS_READ (dr);
+ /* Allow references with zero step for outer loops marked
+ with pragma omp simd only - it guarantees absence of
+ loop-carried dependencies between inner loop iterations. */
+ if (!loop->force_vectorize)
{
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
"zero step in inner loop of nest\n");
return false;
}
- return DR_IS_READ (dr);
}
if (loop && nested_in_vect_loop_p (loop, stmt))
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
"zero step in outer loop.\n");
- if (DR_IS_READ (dr))
- return true;
- else
- return false;
+ return DR_IS_READ (dr);
}
}
return false;
}
+
/* Assume this is a DR handled by non-constant strided load case. */
if (TREE_CODE (step) != INTEGER_CST)
- return STMT_VINFO_STRIDE_LOAD_P (stmt_info);
+ return (STMT_VINFO_STRIDED_P (stmt_info)
+ && (!STMT_VINFO_GROUPED_ACCESS (stmt_info)
+ || vect_analyze_group_access (dr)));
/* Not consecutive access - check if it's a part of interleaving group. */
return vect_analyze_group_access (dr);
if (t2 == NULL)
return 1;
+ STRIP_NOPS (t1);
+ STRIP_NOPS (t2);
if (TREE_CODE (t1) != TREE_CODE (t2))
return TREE_CODE (t1) < TREE_CODE (t2) ? -1 : 1;
if (dra == drb)
return 0;
+ /* DRs in different loops never belong to the same group. */
+ loop_p loopa = gimple_bb (DR_STMT (dra))->loop_father;
+ loop_p loopb = gimple_bb (DR_STMT (drb))->loop_father;
+ if (loopa != loopb)
+ return loopa->num < loopb->num ? -1 : 1;
+
/* Ordering of DRs according to base. */
if (!operand_equal_p (DR_BASE_ADDRESS (dra), DR_BASE_ADDRESS (drb), 0))
{
FORNOW: handle only arrays and pointer accesses. */
bool
-vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
+vect_analyze_data_ref_accesses (vec_info *vinfo)
{
unsigned int i;
- vec<data_reference_p> datarefs;
+ vec<data_reference_p> datarefs = vinfo->datarefs;
struct data_reference *dr;
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
"=== vect_analyze_data_ref_accesses ===\n");
- if (loop_vinfo)
- datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
- else
- datarefs = BB_VINFO_DATAREFS (bb_vinfo);
-
if (datarefs.is_empty ())
return true;
matters we can push those to a worklist and re-iterate
over them. The we can just skip ahead to the next DR here. */
+ /* DRs in a different loop should not be put into the same
+ interleaving group. */
+ if (gimple_bb (DR_STMT (dra))->loop_father
+ != gimple_bb (DR_STMT (drb))->loop_father)
+ break;
+
/* Check that the data-refs have same first location (except init)
and they are both either store or load (not load and store,
not masked loads or stores). */
|| !gimple_assign_single_p (DR_STMT (drb)))
break;
- /* Check that the data-refs have the same constant size and step. */
+ /* Check that the data-refs have the same constant size. */
tree sza = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra)));
tree szb = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb)));
if (!tree_fits_uhwi_p (sza)
|| !tree_fits_uhwi_p (szb)
- || !tree_int_cst_equal (sza, szb)
- || !tree_fits_shwi_p (DR_STEP (dra))
- || !tree_fits_shwi_p (DR_STEP (drb))
- || !tree_int_cst_equal (DR_STEP (dra), DR_STEP (drb)))
+ || !tree_int_cst_equal (sza, szb))
+ break;
+
+ /* Check that the data-refs have the same step. */
+ if (!operand_equal_p (DR_STEP (dra), DR_STEP (drb), 0))
break;
/* Do not place the same access in the interleaving chain twice. */
/* If init_b == init_a + the size of the type * k, we have an
interleaving, and DRA is accessed before DRB. */
HOST_WIDE_INT type_size_a = tree_to_uhwi (sza);
- if ((init_b - init_a) % type_size_a != 0)
+ if (type_size_a == 0
+ || (init_b - init_a) % type_size_a != 0)
break;
- /* The step (if not zero) is greater than the difference between
- data-refs' inits. This splits groups into suitable sizes. */
- HOST_WIDE_INT step = tree_to_shwi (DR_STEP (dra));
- if (step != 0 && step <= (init_b - init_a))
+ /* If we have a store, the accesses are adjacent. This splits
+ groups into chunks we support (we don't support vectorization
+ of stores with gaps). */
+ if (!DR_IS_READ (dra)
+ && (init_b - (HOST_WIDE_INT) TREE_INT_CST_LOW
+ (DR_INIT (datarefs_copy[i-1]))
+ != type_size_a))
break;
+ /* If the step (if not zero or non-constant) is greater than the
+ difference between data-refs' inits this splits groups into
+ suitable sizes. */
+ if (tree_fits_shwi_p (DR_STEP (dra)))
+ {
+ HOST_WIDE_INT step = tree_to_shwi (DR_STEP (dra));
+ if (step != 0 && step <= (init_b - init_a))
+ break;
+ }
+
if (dump_enabled_p ())
{
dump_printf_loc (MSG_NOTE, vect_location,
"Detected interleaving ");
+ if (DR_IS_READ (dra))
+ dump_printf (MSG_NOTE, "load ");
+ else
+ dump_printf (MSG_NOTE, "store ");
dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dra));
dump_printf (MSG_NOTE, " and ");
dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (drb));
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"not vectorized: complicated access pattern.\n");
- if (bb_vinfo)
+ if (is_a <bb_vec_info> (vinfo))
{
/* Mark the statement as not vectorizable. */
STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
FOR_EACH_VEC_ELT (may_alias_ddrs, i, ddr)
{
struct data_reference *dr_a, *dr_b;
- gimple dr_group_first_a, dr_group_first_b;
+ gimple *dr_group_first_a, *dr_group_first_b;
tree segment_length_a, segment_length_b;
- gimple stmt_a, stmt_b;
+ gimple *stmt_a, *stmt_b;
dr_a = DDR_A (ddr);
stmt_a = DR_STMT (DDR_A (ddr));
return true;
}
-/* Check whether a non-affine read in stmt is suitable for gather load
- and if so, return a builtin decl for that operation. */
+/* Check whether a non-affine read or write in stmt is suitable for gather load
+ or scatter store and if so, return a builtin decl for that operation. */
tree
-vect_check_gather (gimple stmt, loop_vec_info loop_vinfo, tree *basep,
- tree *offp, int *scalep)
+vect_check_gather_scatter (gimple *stmt, loop_vec_info loop_vinfo, tree *basep,
+ tree *offp, int *scalep)
{
HOST_WIDE_INT scale = 1, pbitpos, pbitsize;
struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
tree offtype = NULL_TREE;
tree decl, base, off;
machine_mode pmode;
- int punsignedp, pvolatilep;
+ int punsignedp, reversep, pvolatilep = 0;
base = DR_REF (dr);
/* For masked loads/stores, DR_REF (dr) is an artificial MEM_REF,
&& integer_zerop (TREE_OPERAND (base, 1))
&& !expr_invariant_in_loop_p (loop, TREE_OPERAND (base, 0)))
{
- gimple def_stmt = SSA_NAME_DEF_STMT (TREE_OPERAND (base, 0));
+ gimple *def_stmt = SSA_NAME_DEF_STMT (TREE_OPERAND (base, 0));
if (is_gimple_assign (def_stmt)
&& gimple_assign_rhs_code (def_stmt) == ADDR_EXPR)
base = TREE_OPERAND (gimple_assign_rhs1 (def_stmt), 0);
}
- /* The gather builtins need address of the form
+ /* The gather and scatter builtins need address of the form
loop_invariant + vector * {1, 2, 4, 8}
or
loop_invariant + sign_extend (vector) * { 1, 2, 4, 8 }.
vectorized. The following code attempts to find such a preexistng
SSA_NAME OFF and put the loop invariants into a tree BASE
that can be gimplified before the loop. */
- base = get_inner_reference (base, &pbitsize, &pbitpos, &off,
- &pmode, &punsignedp, &pvolatilep, false);
- gcc_assert (base != NULL_TREE && (pbitpos % BITS_PER_UNIT) == 0);
+ base = get_inner_reference (base, &pbitsize, &pbitpos, &off, &pmode,
+ &punsignedp, &reversep, &pvolatilep, false);
+ gcc_assert (base && (pbitpos % BITS_PER_UNIT) == 0 && !reversep);
if (TREE_CODE (base) == MEM_REF)
{
if (TREE_CODE (off) == SSA_NAME)
{
- gimple def_stmt = SSA_NAME_DEF_STMT (off);
+ gimple *def_stmt = SSA_NAME_DEF_STMT (off);
if (expr_invariant_in_loop_p (loop, off))
return NULL_TREE;
if (offtype == NULL_TREE)
offtype = TREE_TYPE (off);
- decl = targetm.vectorize.builtin_gather (STMT_VINFO_VECTYPE (stmt_info),
- offtype, scale);
+ if (DR_IS_READ (dr))
+ decl = targetm.vectorize.builtin_gather (STMT_VINFO_VECTYPE (stmt_info),
+ offtype, scale);
+ else
+ decl = targetm.vectorize.builtin_scatter (STMT_VINFO_VECTYPE (stmt_info),
+ offtype, scale);
+
if (decl == NULL_TREE)
return NULL_TREE;
*/
bool
-vect_analyze_data_refs (loop_vec_info loop_vinfo,
- bb_vec_info bb_vinfo,
- int *min_vf, unsigned *n_stmts)
+vect_analyze_data_refs (vec_info *vinfo, int *min_vf)
{
struct loop *loop = NULL;
- basic_block bb = NULL;
unsigned int i;
- vec<data_reference_p> datarefs;
struct data_reference *dr;
tree scalar_type;
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
- "=== vect_analyze_data_refs ===\n");
-
- if (loop_vinfo)
- {
- basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
-
- loop = LOOP_VINFO_LOOP (loop_vinfo);
- datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
- if (!find_loop_nest (loop, &LOOP_VINFO_LOOP_NEST (loop_vinfo)))
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "not vectorized: loop contains function calls"
- " or data references that cannot be analyzed\n");
- return false;
- }
-
- for (i = 0; i < loop->num_nodes; i++)
- {
- gimple_stmt_iterator gsi;
-
- for (gsi = gsi_start_bb (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
- {
- gimple stmt = gsi_stmt (gsi);
- if (is_gimple_debug (stmt))
- continue;
- ++*n_stmts;
- if (!find_data_references_in_stmt (loop, stmt, &datarefs))
- {
- if (is_gimple_call (stmt) && loop->safelen)
- {
- tree fndecl = gimple_call_fndecl (stmt), op;
- if (fndecl != NULL_TREE)
- {
- struct cgraph_node *node = cgraph_node::get (fndecl);
- if (node != NULL && node->simd_clones != NULL)
- {
- unsigned int j, n = gimple_call_num_args (stmt);
- for (j = 0; j < n; j++)
- {
- op = gimple_call_arg (stmt, j);
- if (DECL_P (op)
- || (REFERENCE_CLASS_P (op)
- && get_base_address (op)))
- break;
- }
- op = gimple_call_lhs (stmt);
- /* Ignore #pragma omp declare simd functions
- if they don't have data references in the
- call stmt itself. */
- if (j == n
- && !(op
- && (DECL_P (op)
- || (REFERENCE_CLASS_P (op)
- && get_base_address (op)))))
- continue;
- }
- }
- }
- LOOP_VINFO_DATAREFS (loop_vinfo) = datarefs;
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "not vectorized: loop contains function "
- "calls or data references that cannot "
- "be analyzed\n");
- return false;
- }
- }
- }
-
- LOOP_VINFO_DATAREFS (loop_vinfo) = datarefs;
- }
- else
- {
- gimple_stmt_iterator gsi;
+ "=== vect_analyze_data_refs ===\n");
- bb = BB_VINFO_BB (bb_vinfo);
- for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
- {
- gimple stmt = gsi_stmt (gsi);
- if (is_gimple_debug (stmt))
- continue;
- ++*n_stmts;
- if (!find_data_references_in_stmt (NULL, stmt,
- &BB_VINFO_DATAREFS (bb_vinfo)))
- {
- /* Mark the rest of the basic-block as unvectorizable. */
- for (; !gsi_end_p (gsi); gsi_next (&gsi))
- {
- stmt = gsi_stmt (gsi);
- STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)) = false;
- }
- break;
- }
- }
-
- datarefs = BB_VINFO_DATAREFS (bb_vinfo);
- }
+ if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
+ loop = LOOP_VINFO_LOOP (loop_vinfo);
/* Go through the data-refs, check that the analysis succeeded. Update
pointer from stmt_vec_info struct to DR and vectype. */
+ vec<data_reference_p> datarefs = vinfo->datarefs;
FOR_EACH_VEC_ELT (datarefs, i, dr)
{
- gimple stmt;
+ gimple *stmt;
stmt_vec_info stmt_info;
tree base, offset, init;
- bool gather = false;
+ enum { SG_NONE, GATHER, SCATTER } gatherscatter = SG_NONE;
bool simd_lane_access = false;
int vf;
= DR_IS_READ (dr)
&& !TREE_THIS_VOLATILE (DR_REF (dr))
&& targetm.vectorize.builtin_gather != NULL;
+ bool maybe_scatter
+ = DR_IS_WRITE (dr)
+ && !TREE_THIS_VOLATILE (DR_REF (dr))
+ && targetm.vectorize.builtin_scatter != NULL;
bool maybe_simd_lane_access
- = loop_vinfo && loop->simduid;
+ = is_a <loop_vec_info> (vinfo) && loop->simduid;
- /* If target supports vector gather loads, or if this might be
- a SIMD lane access, see if they can't be used. */
- if (loop_vinfo
- && (maybe_gather || maybe_simd_lane_access)
+ /* If target supports vector gather loads or scatter stores, or if
+ this might be a SIMD lane access, see if they can't be used. */
+ if (is_a <loop_vec_info> (vinfo)
+ && (maybe_gather || maybe_scatter || maybe_simd_lane_access)
&& !nested_in_vect_loop_p (loop, stmt))
{
struct data_reference *newdr
= create_data_ref (NULL, loop_containing_stmt (stmt),
- DR_REF (dr), stmt, true);
+ DR_REF (dr), stmt, maybe_scatter ? false : true);
gcc_assert (newdr != NULL && DR_REF (newdr));
if (DR_BASE_ADDRESS (newdr)
&& DR_OFFSET (newdr)
off = TREE_OPERAND (off, 0);
if (TREE_CODE (off) == SSA_NAME)
{
- gimple def = SSA_NAME_DEF_STMT (off);
+ gimple *def = SSA_NAME_DEF_STMT (off);
tree reft = TREE_TYPE (DR_REF (newdr));
if (is_gimple_call (def)
&& gimple_call_internal_p (def)
}
}
}
- if (!simd_lane_access && maybe_gather)
+ if (!simd_lane_access && (maybe_gather || maybe_scatter))
{
dr = newdr;
- gather = true;
+ if (maybe_gather)
+ gatherscatter = GATHER;
+ else
+ gatherscatter = SCATTER;
}
}
- if (!gather && !simd_lane_access)
+ if (gatherscatter == SG_NONE && !simd_lane_access)
free_data_ref (newdr);
}
- if (!gather && !simd_lane_access)
+ if (gatherscatter == SG_NONE && !simd_lane_access)
{
if (dump_enabled_p ())
{
dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
}
- if (bb_vinfo)
+ if (is_a <bb_vec_info> (vinfo))
break;
return false;
"not vectorized: base addr of dr is a "
"constant\n");
- if (bb_vinfo)
+ if (is_a <bb_vec_info> (vinfo))
break;
- if (gather || simd_lane_access)
+ if (gatherscatter != SG_NONE || simd_lane_access)
free_data_ref (dr);
return false;
}
dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
}
- if (bb_vinfo)
+ if (is_a <bb_vec_info> (vinfo))
break;
return false;
dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
}
- if (bb_vinfo)
+ if (is_a <bb_vec_info> (vinfo))
break;
- if (gather || simd_lane_access)
+ if (gatherscatter != SG_NONE || simd_lane_access)
free_data_ref (dr);
return false;
}
dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
}
- if (bb_vinfo)
+ if (is_a <bb_vec_info> (vinfo))
break;
- if (gather || simd_lane_access)
+ if (gatherscatter != SG_NONE || simd_lane_access)
free_data_ref (dr);
return false;
}
dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
}
- if (bb_vinfo)
+ if (is_a <bb_vec_info> (vinfo))
break;
- if (gather || simd_lane_access)
+ if (gatherscatter != SG_NONE || simd_lane_access)
free_data_ref (dr);
return false;
}
HOST_WIDE_INT pbitsize, pbitpos;
tree poffset;
machine_mode pmode;
- int punsignedp, pvolatilep;
+ int punsignedp, preversep, pvolatilep;
affine_iv base_iv, offset_iv;
tree dinit;
}
outer_base = get_inner_reference (inner_base, &pbitsize, &pbitpos,
- &poffset, &pmode, &punsignedp, &pvolatilep, false);
+ &poffset, &pmode, &punsignedp,
+ &preversep, &pvolatilep, false);
gcc_assert (outer_base != NULL_TREE);
if (pbitpos % BITS_PER_UNIT != 0)
return false;
}
+ if (preversep)
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "failed: reverse storage order.\n");
+ return false;
+ }
+
outer_base = build_fold_addr_expr (outer_base);
if (!simple_iv (loop, loop_containing_stmt (stmt), outer_base,
&base_iv, false))
dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
}
- if (bb_vinfo)
+ if (is_a <bb_vec_info> (vinfo))
break;
- if (gather || simd_lane_access)
+ if (gatherscatter != SG_NONE || simd_lane_access)
free_data_ref (dr);
return false;
}
dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
}
- if (bb_vinfo)
- break;
+ if (is_a <bb_vec_info> (vinfo))
+ {
+ /* No vector type is fine, the ref can still participate
+ in dependence analysis, we just can't vectorize it. */
+ STMT_VINFO_VECTORIZABLE (stmt_info) = false;
+ continue;
+ }
- if (gather || simd_lane_access)
+ if (gatherscatter != SG_NONE || simd_lane_access)
{
STMT_VINFO_DATA_REF (stmt_info) = NULL;
- if (gather)
+ if (gatherscatter != SG_NONE)
free_data_ref (dr);
}
return false;
if (vf > *min_vf)
*min_vf = vf;
- if (gather)
+ if (gatherscatter != SG_NONE)
{
tree off;
-
- gather = 0 != vect_check_gather (stmt, loop_vinfo, NULL, &off, NULL);
- if (gather
- && get_vectype_for_scalar_type (TREE_TYPE (off)) == NULL_TREE)
- gather = false;
- if (!gather)
+ if (!vect_check_gather_scatter (stmt, as_a <loop_vec_info> (vinfo),
+ NULL, &off, NULL)
+ || get_vectype_for_scalar_type (TREE_TYPE (off)) == NULL_TREE)
{
STMT_VINFO_DATA_REF (stmt_info) = NULL;
free_data_ref (dr);
if (dump_enabled_p ())
{
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "not vectorized: not suitable for gather "
- "load ");
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ (gatherscatter == GATHER) ?
+ "not vectorized: not suitable for gather "
+ "load " :
+ "not vectorized: not suitable for scatter "
+ "store ");
dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
}
return false;
}
+ free_data_ref (datarefs[i]);
datarefs[i] = dr;
- STMT_VINFO_GATHER_P (stmt_info) = true;
+ STMT_VINFO_GATHER_SCATTER_P (stmt_info) = gatherscatter;
}
- else if (loop_vinfo
+
+ else if (is_a <loop_vec_info> (vinfo)
&& TREE_CODE (DR_STEP (dr)) != INTEGER_CST)
{
- if (nested_in_vect_loop_p (loop, stmt)
- || !DR_IS_READ (dr))
+ if (nested_in_vect_loop_p (loop, stmt))
{
if (dump_enabled_p ())
{
}
return false;
}
- STMT_VINFO_STRIDE_LOAD_P (stmt_info) = true;
+ STMT_VINFO_STRIDED_P (stmt_info) = true;
}
}
avoids spending useless time in analyzing their dependence. */
if (i != datarefs.length ())
{
- gcc_assert (bb_vinfo != NULL);
+ gcc_assert (is_a <bb_vec_info> (vinfo));
for (unsigned j = i; j < datarefs.length (); ++j)
{
data_reference_p dr = datarefs[j];
case vect_scalar_var:
prefix = "stmp";
break;
+ case vect_mask_var:
+ prefix = "mask";
+ break;
case vect_pointer_var:
prefix = "vectp";
break;
return new_vect_var;
}
+/* Like vect_get_new_vect_var but return an SSA name. */
+
+tree
+vect_get_new_ssa_name (tree type, enum vect_var_kind var_kind, const char *name)
+{
+ const char *prefix;
+ tree new_vect_var;
+
+ switch (var_kind)
+ {
+ case vect_simple_var:
+ prefix = "vect";
+ break;
+ case vect_scalar_var:
+ prefix = "stmp";
+ break;
+ case vect_pointer_var:
+ prefix = "vectp";
+ break;
+ default:
+ gcc_unreachable ();
+ }
+
+ if (name)
+ {
+ char* tmp = concat (prefix, "_", name, NULL);
+ new_vect_var = make_temp_ssa_name (type, NULL, tmp);
+ free (tmp);
+ }
+ else
+ new_vect_var = make_temp_ssa_name (type, NULL, prefix);
+
+ return new_vect_var;
+}
+
+/* Duplicate ptr info and set alignment/misaligment on NAME from DR. */
+
+static void
+vect_duplicate_ssa_name_ptr_info (tree name, data_reference *dr,
+ stmt_vec_info stmt_info)
+{
+ duplicate_ssa_name_ptr_info (name, DR_PTR_INFO (dr));
+ unsigned int align = TYPE_ALIGN_UNIT (STMT_VINFO_VECTYPE (stmt_info));
+ int misalign = DR_MISALIGNMENT (dr);
+ if (misalign == -1)
+ mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (name));
+ else
+ set_ptr_info_alignment (SSA_NAME_PTR_INFO (name), align, misalign);
+}
/* Function vect_create_addr_base_for_vector_ref.
FORNOW: We are only handling array accesses with step 1. */
tree
-vect_create_addr_base_for_vector_ref (gimple stmt,
+vect_create_addr_base_for_vector_ref (gimple *stmt,
gimple_seq *new_stmt_list,
tree offset,
struct loop *loop,
}
vect_ptr_type = build_pointer_type (STMT_VINFO_VECTYPE (stmt_info));
- addr_base = fold_convert (vect_ptr_type, addr_base);
dest = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var, base_name);
- addr_base = force_gimple_operand (addr_base, &seq, false, dest);
+ addr_base = force_gimple_operand (addr_base, &seq, true, dest);
gimple_seq_add_seq (new_stmt_list, seq);
if (DR_PTR_INFO (dr)
- && TREE_CODE (addr_base) == SSA_NAME)
+ && TREE_CODE (addr_base) == SSA_NAME
+ && !SSA_NAME_PTR_INFO (addr_base))
{
- duplicate_ssa_name_ptr_info (addr_base, DR_PTR_INFO (dr));
- unsigned int align = TYPE_ALIGN_UNIT (STMT_VINFO_VECTYPE (stmt_info));
- int misalign = DR_MISALIGNMENT (dr);
- if (offset || byte_offset || (misalign == -1))
+ vect_duplicate_ssa_name_ptr_info (addr_base, dr, stmt_info);
+ if (offset || byte_offset)
mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (addr_base));
- else
- set_ptr_info_alignment (SSA_NAME_PTR_INFO (addr_base), align, misalign);
}
if (dump_enabled_p ())
4. Return the pointer. */
tree
-vect_create_data_ref_ptr (gimple stmt, tree aggr_type, struct loop *at_loop,
+vect_create_data_ref_ptr (gimple *stmt, tree aggr_type, struct loop *at_loop,
tree offset, tree *initial_address,
- gimple_stmt_iterator *gsi, gimple *ptr_incr,
+ gimple_stmt_iterator *gsi, gimple **ptr_incr,
bool only_init, bool *inv_p, tree byte_offset)
{
const char *base_name;
tree aggr_ptr_type;
tree aggr_ptr;
tree new_temp;
- gimple vec_stmt;
gimple_seq new_stmt_list = NULL;
edge pe = NULL;
basic_block new_bb;
gimple_stmt_iterator incr_gsi;
bool insert_after;
tree indx_before_incr, indx_after_incr;
- gimple incr;
+ gimple *incr;
tree step;
bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
/* Likewise for any of the data references in the stmt group. */
else if (STMT_VINFO_GROUP_SIZE (stmt_info) > 1)
{
- gimple orig_stmt = STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_info);
+ gimple *orig_stmt = STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_info);
do
{
stmt_vec_info sinfo = vinfo_for_stmt (orig_stmt);
}
*initial_address = new_temp;
-
- /* Create: p = (aggr_type *) initial_base */
- if (TREE_CODE (new_temp) != SSA_NAME
- || !useless_type_conversion_p (aggr_ptr_type, TREE_TYPE (new_temp)))
- {
- vec_stmt = gimple_build_assign (aggr_ptr,
- fold_convert (aggr_ptr_type, new_temp));
- aggr_ptr_init = make_ssa_name (aggr_ptr, vec_stmt);
- /* Copy the points-to information if it exists. */
- if (DR_PTR_INFO (dr))
- duplicate_ssa_name_ptr_info (aggr_ptr_init, DR_PTR_INFO (dr));
- gimple_assign_set_lhs (vec_stmt, aggr_ptr_init);
- if (pe)
- {
- new_bb = gsi_insert_on_edge_immediate (pe, vec_stmt);
- gcc_assert (!new_bb);
- }
- else
- gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
- }
- else
- aggr_ptr_init = new_temp;
+ aggr_ptr_init = new_temp;
/* (3) Handle the updating of the aggregate-pointer inside the loop.
This is needed when ONLY_INIT is false, and also when AT_LOOP is the
aggr_ptr, loop, &incr_gsi, insert_after,
&indx_before_incr, &indx_after_incr);
incr = gsi_stmt (incr_gsi);
- set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
+ set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
/* Copy the points-to information if it exists. */
if (DR_PTR_INFO (dr))
{
- duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
- duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
+ vect_duplicate_ssa_name_ptr_info (indx_before_incr, dr, stmt_info);
+ vect_duplicate_ssa_name_ptr_info (indx_after_incr, dr, stmt_info);
}
if (ptr_incr)
*ptr_incr = incr;
containing_loop, &incr_gsi, insert_after, &indx_before_incr,
&indx_after_incr);
incr = gsi_stmt (incr_gsi);
- set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
+ set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
/* Copy the points-to information if it exists. */
if (DR_PTR_INFO (dr))
{
- duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
- duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
+ vect_duplicate_ssa_name_ptr_info (indx_before_incr, dr, stmt_info);
+ vect_duplicate_ssa_name_ptr_info (indx_after_incr, dr, stmt_info);
}
if (ptr_incr)
*ptr_incr = incr;
*/
tree
-bump_vector_ptr (tree dataref_ptr, gimple ptr_incr, gimple_stmt_iterator *gsi,
- gimple stmt, tree bump)
+bump_vector_ptr (tree dataref_ptr, gimple *ptr_incr, gimple_stmt_iterator *gsi,
+ gimple *stmt, tree bump)
{
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
if (bump)
update = bump;
- new_dataref_ptr = copy_ssa_name (dataref_ptr);
+ if (TREE_CODE (dataref_ptr) == SSA_NAME)
+ new_dataref_ptr = copy_ssa_name (dataref_ptr);
+ else
+ new_dataref_ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
incr_stmt = gimple_build_assign (new_dataref_ptr, POINTER_PLUS_EXPR,
dataref_ptr, update);
vect_finish_stmt_generation (stmt, incr_stmt, gsi);
tree type;
enum vect_var_kind kind;
- kind = vectype ? vect_simple_var : vect_scalar_var;
+ kind = vectype
+ ? VECTOR_BOOLEAN_TYPE_P (vectype)
+ ? vect_mask_var
+ : vect_simple_var
+ : vect_scalar_var;
type = vectype ? vectype : TREE_TYPE (scalar_dest);
gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
void
vect_permute_store_chain (vec<tree> dr_chain,
unsigned int length,
- gimple stmt,
+ gimple *stmt,
gimple_stmt_iterator *gsi,
vec<tree> *result_chain)
{
tree vect1, vect2, high, low;
- gimple perm_stmt;
+ gimple *perm_stmt;
tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
tree perm_mask_low, perm_mask_high;
tree data_ref;
Return value - the result of the loop-header phi node. */
tree
-vect_setup_realignment (gimple stmt, gimple_stmt_iterator *gsi,
+vect_setup_realignment (gimple *stmt, gimple_stmt_iterator *gsi,
tree *realignment_token,
enum dr_alignment_support alignment_support_scheme,
tree init_addr,
edge pe = NULL;
tree scalar_dest = gimple_assign_lhs (stmt);
tree vec_dest;
- gimple inc;
+ gimple *inc;
tree ptr;
tree data_ref;
basic_block new_bb;
ptr = vect_create_data_ref_ptr (stmt, vectype, loop_for_initial_load,
NULL_TREE, &init_addr, NULL, &inc,
true, &inv_p);
- new_temp = copy_ssa_name (ptr);
+ if (TREE_CODE (ptr) == SSA_NAME)
+ new_temp = copy_ssa_name (ptr);
+ else
+ new_temp = make_ssa_name (TREE_TYPE (ptr));
new_stmt = gimple_build_assign
(new_temp, BIT_AND_EXPR, ptr,
build_int_cst (TREE_TYPE (ptr),
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"shuffle of 3 loads is not supported by"
" target\n");
- return false;
+ return false;
}
for (i = 0, j = 0; i < nelt; i++)
if (3 * i + k < 2 * nelt)
static void
vect_permute_load_chain (vec<tree> dr_chain,
unsigned int length,
- gimple stmt,
+ gimple *stmt,
gimple_stmt_iterator *gsi,
vec<tree> *result_chain)
{
tree data_ref, first_vect, second_vect;
tree perm_mask_even, perm_mask_odd;
tree perm3_mask_low, perm3_mask_high;
- gimple perm_stmt;
+ gimple *perm_stmt;
tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
unsigned int i, j, log_length = exact_log2 (length);
unsigned nelt = TYPE_VECTOR_SUBPARTS (vectype);
static bool
vect_shift_permute_load_chain (vec<tree> dr_chain,
unsigned int length,
- gimple stmt,
+ gimple *stmt,
gimple_stmt_iterator *gsi,
vec<tree> *result_chain)
{
tree vect[3], vect_shift[3], data_ref, first_vect, second_vect;
tree perm2_mask1, perm2_mask2, perm3_mask;
tree select_mask, shift1_mask, shift2_mask, shift3_mask, shift4_mask;
- gimple perm_stmt;
+ gimple *perm_stmt;
tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
unsigned int i;
*/
void
-vect_transform_grouped_load (gimple stmt, vec<tree> dr_chain, int size,
+vect_transform_grouped_load (gimple *stmt, vec<tree> dr_chain, int size,
gimple_stmt_iterator *gsi)
{
machine_mode mode;
for each vector to the associated scalar statement. */
void
-vect_record_grouped_load_vectors (gimple stmt, vec<tree> result_chain)
+vect_record_grouped_load_vectors (gimple *stmt, vec<tree> result_chain)
{
- gimple first_stmt = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
- gimple next_stmt, new_stmt;
+ gimple *first_stmt = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
+ gimple *next_stmt, *new_stmt;
unsigned int i, gap_count;
tree tmp_data_ref;
{
if (!GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt)))
{
- gimple prev_stmt =
+ gimple *prev_stmt =
STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt));
- gimple rel_stmt =
+ gimple *rel_stmt =
STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt));
while (rel_stmt)
{
if (TREE_CODE (decl) != VAR_DECL)
return false;
- /* With -fno-toplevel-reorder we may have already output the constant. */
- if (TREE_ASM_WRITTEN (decl))
- return false;
-
- /* Constant pool entries may be shared and not properly merged by LTO. */
- if (DECL_IN_CONSTANT_POOL (decl))
- return false;
-
- if (TREE_PUBLIC (decl) || DECL_EXTERNAL (decl))
- {
- symtab_node *snode;
-
- /* We cannot change alignment of symbols that may bind to symbols
- in other translation unit that may contain a definition with lower
- alignment. */
- if (!decl_binds_to_current_def_p (decl))
- return false;
-
- /* When compiling partition, be sure the symbol is not output by other
- partition. */
- snode = symtab_node::get (decl);
- if (flag_ltrans
- && (snode->in_other_partition
- || snode->get_partitioning_class () == SYMBOL_DUPLICATE))
- return false;
- }
-
- /* Do not override the alignment as specified by the ABI when the used
- attribute is set. */
- if (DECL_PRESERVE_P (decl))
- return false;
-
- /* Do not override explicit alignment set by the user when an explicit
- section name is also used. This is a common idiom used by many
- software projects. */
- if (TREE_STATIC (decl)
- && DECL_SECTION_NAME (decl) != NULL
- && !symtab_node::get (decl)->implicit_section)
+ if (decl_in_symtab_p (decl)
+ && !symtab_node::get (decl)->can_increase_alignment_p ())
return false;
- /* If symbol is an alias, we need to check that target is OK. */
- if (TREE_STATIC (decl))
- {
- tree target = symtab_node::get (decl)->ultimate_alias_target ()->decl;
- if (target != decl)
- {
- if (DECL_PRESERVE_P (target))
- return false;
- decl = target;
- }
- }
-
if (TREE_STATIC (decl))
return (alignment <= MAX_OFILE_ALIGNMENT);
else
vect_supportable_dr_alignment (struct data_reference *dr,
bool check_aligned_accesses)
{
- gimple stmt = DR_STMT (dr);
+ gimple *stmt = DR_STMT (dr);
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
tree vectype = STMT_VINFO_VECTYPE (stmt_info);
machine_mode mode = TYPE_MODE (vectype);