/* Data References Analysis and Manipulation Utilities for Vectorization.
- Copyright (C) 2003-2014 Free Software Foundation, Inc.
+ Copyright (C) 2003-2016 Free Software Foundation, Inc.
Contributed by Dorit Naishlos <dorit@il.ibm.com>
and Ira Rosen <irar@il.ibm.com>
#include "config.h"
#include "system.h"
#include "coretypes.h"
-#include "dumpfile.h"
-#include "tm.h"
+#include "backend.h"
+#include "target.h"
+#include "rtl.h"
#include "tree.h"
-#include "stor-layout.h"
+#include "gimple.h"
+#include "predict.h"
#include "tm_p.h"
-#include "target.h"
-#include "basic-block.h"
-#include "gimple-pretty-print.h"
-#include "tree-ssa-alias.h"
-#include "internal-fn.h"
+#include "ssa.h"
+#include "optabs-tree.h"
+#include "cgraph.h"
+#include "dumpfile.h"
+#include "alias.h"
+#include "fold-const.h"
+#include "stor-layout.h"
#include "tree-eh.h"
-#include "gimple-expr.h"
-#include "is-a.h"
-#include "gimple.h"
#include "gimplify.h"
#include "gimple-iterator.h"
#include "gimplify-me.h"
-#include "gimple-ssa.h"
-#include "tree-phinodes.h"
-#include "ssa-iterators.h"
-#include "stringpool.h"
-#include "tree-ssanames.h"
#include "tree-ssa-loop-ivopts.h"
#include "tree-ssa-loop-manip.h"
#include "tree-ssa-loop.h"
-#include "dumpfile.h"
#include "cfgloop.h"
-#include "tree-chrec.h"
#include "tree-scalar-evolution.h"
#include "tree-vectorizer.h"
-#include "diagnostic-core.h"
-#include "cgraph.h"
-/* Need to include rtl.h, expr.h, etc. for optabs. */
#include "expr.h"
-#include "optabs.h"
+#include "builtins.h"
+#include "params.h"
/* Return true if load- or store-lanes optab OPTAB is implemented for
COUNT vectors of type VECTYPE. NAME is the name of OPTAB. */
vect_lanes_optab_supported_p (const char *name, convert_optab optab,
tree vectype, unsigned HOST_WIDE_INT count)
{
- enum machine_mode mode, array_mode;
+ machine_mode mode, array_mode;
bool limit_p;
mode = TYPE_MODE (vectype);
types. */
tree
-vect_get_smallest_scalar_type (gimple stmt, HOST_WIDE_INT *lhs_size_unit,
+vect_get_smallest_scalar_type (gimple *stmt, HOST_WIDE_INT *lhs_size_unit,
HOST_WIDE_INT *rhs_size_unit)
{
tree scalar_type = gimple_expr_type (stmt);
return false;
}
- if (STMT_VINFO_GATHER_P (stmtinfo_a)
- || STMT_VINFO_GATHER_P (stmtinfo_b))
+ if (STMT_VINFO_GATHER_SCATTER_P (stmtinfo_a)
+ || STMT_VINFO_GATHER_SCATTER_P (stmtinfo_b))
{
if (dump_enabled_p ())
{
return false;
}
- if (STMT_VINFO_GATHER_P (stmtinfo_a)
- || STMT_VINFO_GATHER_P (stmtinfo_b))
+ if (STMT_VINFO_GATHER_SCATTER_P (stmtinfo_a)
+ || STMT_VINFO_GATHER_SCATTER_P (stmtinfo_b))
{
if (dump_enabled_p ())
{
.. = a[i+1];
where we will end up loading { a[i], a[i+1] } once, make
sure that inserting group loads before the first load and
- stores after the last store will do the right thing. */
- if ((STMT_VINFO_GROUPED_ACCESS (stmtinfo_a)
- && GROUP_SAME_DR_STMT (stmtinfo_a))
- || (STMT_VINFO_GROUPED_ACCESS (stmtinfo_b)
- && GROUP_SAME_DR_STMT (stmtinfo_b)))
+ stores after the last store will do the right thing.
+ Similar for groups like
+ a[i] = ...;
+ ... = a[i];
+ a[i+1] = ...;
+ where loads from the group interleave with the store. */
+ if (STMT_VINFO_GROUPED_ACCESS (stmtinfo_a)
+ || STMT_VINFO_GROUPED_ACCESS (stmtinfo_b))
{
- gimple earlier_stmt;
+ gimple *earlier_stmt;
earlier_stmt = get_earlier_stmt (DR_STMT (dra), DR_STMT (drb));
if (DR_IS_WRITE
(STMT_VINFO_DATA_REF (vinfo_for_stmt (earlier_stmt))))
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"dependence distance negative.\n");
+ /* Record a negative dependence distance to later limit the
+ amount of stmt copying / unrolling we can perform.
+ Only need to handle read-after-write dependence. */
+ if (DR_IS_READ (drb)
+ && (STMT_VINFO_MIN_NEG_DIST (stmtinfo_b) == 0
+ || STMT_VINFO_MIN_NEG_DIST (stmtinfo_b) > (unsigned)dist))
+ STMT_VINFO_MIN_NEG_DIST (stmtinfo_b) = dist;
continue;
}
dump_printf_loc (MSG_NOTE, vect_location,
"=== vect_analyze_data_ref_dependences ===\n");
+ LOOP_VINFO_DDRS (loop_vinfo)
+ .create (LOOP_VINFO_DATAREFS (loop_vinfo).length ()
+ * LOOP_VINFO_DATAREFS (loop_vinfo).length ());
LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo) = true;
if (!compute_all_dependences (LOOP_VINFO_DATAREFS (loop_vinfo),
&LOOP_VINFO_DDRS (loop_vinfo),
dump_printf (MSG_NOTE, "\n");
}
- /* We do not vectorize basic blocks with write-write dependencies. */
- if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))
- return true;
+ return true;
+}
+
+
+/* Analyze dependences involved in the transform of SLP NODE. STORES
+ contain the vector of scalar stores of this instance if we are
+ disambiguating the loads. */
- /* If we have a read-write dependence check that the load is before the store.
- When we vectorize basic blocks, vector load can be only before
- corresponding scalar load, and vector store can be only after its
- corresponding scalar store. So the order of the acceses is preserved in
- case the load is before the store. */
- gimple earlier_stmt = get_earlier_stmt (DR_STMT (dra), DR_STMT (drb));
- if (DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (earlier_stmt))))
+static bool
+vect_slp_analyze_node_dependences (slp_instance instance, slp_tree node,
+ vec<gimple *> stores, gimple *last_store)
+{
+ /* This walks over all stmts involved in the SLP load/store done
+ in NODE verifying we can sink them up to the last stmt in the
+ group. */
+ gimple *last_access = vect_find_last_scalar_stmt_in_slp (node);
+ for (unsigned k = 0; k < SLP_INSTANCE_GROUP_SIZE (instance); ++k)
{
- /* That only holds for load-store pairs taking part in vectorization. */
- if (STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dra)))
- && STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (drb))))
- return false;
- }
+ gimple *access = SLP_TREE_SCALAR_STMTS (node)[k];
+ if (access == last_access)
+ continue;
+ data_reference *dr_a = STMT_VINFO_DATA_REF (vinfo_for_stmt (access));
+ for (gimple_stmt_iterator gsi = gsi_for_stmt (access);
+ gsi_stmt (gsi) != last_access; gsi_next (&gsi))
+ {
+ gimple *stmt = gsi_stmt (gsi);
+ if (! gimple_vuse (stmt)
+ || (DR_IS_READ (dr_a) && ! gimple_vdef (stmt)))
+ continue;
+
+ /* If we couldn't record a (single) data reference for this
+ stmt we have to give up. */
+ /* ??? Here and below if dependence analysis fails we can resort
+ to the alias oracle which can handle more kinds of stmts. */
+ data_reference *dr_b = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
+ if (!dr_b)
+ return false;
+
+ /* If we run into a store of this same instance (we've just
+ marked those) then delay dependence checking until we run
+ into the last store because this is where it will have
+ been sunk to (and we verify if we can do that as well). */
+ if (gimple_visited_p (stmt))
+ {
+ if (stmt != last_store)
+ continue;
+ unsigned i;
+ gimple *store;
+ FOR_EACH_VEC_ELT (stores, i, store)
+ {
+ data_reference *store_dr
+ = STMT_VINFO_DATA_REF (vinfo_for_stmt (store));
+ ddr_p ddr = initialize_data_dependence_relation
+ (dr_a, store_dr, vNULL);
+ if (vect_slp_analyze_data_ref_dependence (ddr))
+ {
+ free_dependence_relation (ddr);
+ return false;
+ }
+ free_dependence_relation (ddr);
+ }
+ }
+ ddr_p ddr = initialize_data_dependence_relation (dr_a, dr_b, vNULL);
+ if (vect_slp_analyze_data_ref_dependence (ddr))
+ {
+ free_dependence_relation (ddr);
+ return false;
+ }
+ free_dependence_relation (ddr);
+ }
+ }
return true;
}
the maximum vectorization factor the data dependences allow. */
bool
-vect_slp_analyze_data_ref_dependences (bb_vec_info bb_vinfo)
+vect_slp_analyze_instance_dependence (slp_instance instance)
{
- struct data_dependence_relation *ddr;
- unsigned int i;
-
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
- "=== vect_slp_analyze_data_ref_dependences ===\n");
+ "=== vect_slp_analyze_instance_dependence ===\n");
- if (!compute_all_dependences (BB_VINFO_DATAREFS (bb_vinfo),
- &BB_VINFO_DDRS (bb_vinfo),
- vNULL, true))
- return false;
+ /* The stores of this instance are at the root of the SLP tree. */
+ slp_tree store = SLP_INSTANCE_TREE (instance);
+ if (! STMT_VINFO_DATA_REF (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (store)[0])))
+ store = NULL;
- FOR_EACH_VEC_ELT (BB_VINFO_DDRS (bb_vinfo), i, ddr)
- if (vect_slp_analyze_data_ref_dependence (ddr))
- return false;
+ /* Verify we can sink stores to the vectorized stmt insert location. */
+ gimple *last_store = NULL;
+ if (store)
+ {
+ if (! vect_slp_analyze_node_dependences (instance, store, vNULL, NULL))
+ return false;
- return true;
-}
+ /* Mark stores in this instance and remember the last one. */
+ last_store = vect_find_last_scalar_stmt_in_slp (store);
+ for (unsigned k = 0; k < SLP_INSTANCE_GROUP_SIZE (instance); ++k)
+ gimple_set_visited (SLP_TREE_SCALAR_STMTS (store)[k], true);
+ }
+
+ bool res = true;
+ /* Verify we can sink loads to the vectorized stmt insert location,
+ special-casing stores of this instance. */
+ slp_tree load;
+ unsigned int i;
+ FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), i, load)
+ if (! vect_slp_analyze_node_dependences (instance, load,
+ store
+ ? SLP_TREE_SCALAR_STMTS (store)
+ : vNULL, last_store))
+ {
+ res = false;
+ break;
+ }
+
+ /* Unset the visited flag. */
+ if (store)
+ for (unsigned k = 0; k < SLP_INSTANCE_GROUP_SIZE (instance); ++k)
+ gimple_set_visited (SLP_TREE_SCALAR_STMTS (store)[k], false);
+
+ return res;
+}
/* Function vect_compute_data_ref_alignment
FOR NOW: No analysis is actually performed. Misalignment is calculated
only for trivial cases. TODO. */
-static bool
+bool
vect_compute_data_ref_alignment (struct data_reference *dr)
{
- gimple stmt = DR_STMT (dr);
+ gimple *stmt = DR_STMT (dr);
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
struct loop *loop = NULL;
tree ref = DR_REF (dr);
tree vectype;
tree base, base_addr;
- bool base_aligned;
- tree misalign;
- tree aligned_to, alignment;
+ tree misalign = NULL_TREE;
+ tree aligned_to;
+ unsigned HOST_WIDE_INT alignment;
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
/* Initialize misalignment to unknown. */
SET_DR_MISALIGNMENT (dr, -1);
- /* Strided loads perform only component accesses, misalignment information
- is irrelevant for them. */
- if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
- return true;
-
- misalign = DR_INIT (dr);
+ if (tree_fits_shwi_p (DR_STEP (dr)))
+ misalign = DR_INIT (dr);
aligned_to = DR_ALIGNED_TO (dr);
base_addr = DR_BASE_ADDRESS (dr);
vectype = STMT_VINFO_VECTYPE (stmt_info);
if (loop && nested_in_vect_loop_p (loop, stmt))
{
tree step = DR_STEP (dr);
- HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
- if (dr_step % GET_MODE_SIZE (TYPE_MODE (vectype)) == 0)
+ if (tree_fits_shwi_p (step)
+ && tree_to_shwi (step) % GET_MODE_SIZE (TYPE_MODE (vectype)) == 0)
{
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
}
}
- /* Similarly, if we're doing basic-block vectorization, we can only use
- base and misalignment information relative to an innermost loop if the
- misalignment stays the same throughout the execution of the loop.
- As above, this is the case if the stride of the dataref evenly divides
- by the vector size. */
- if (!loop)
+ /* Similarly we can only use base and misalignment information relative to
+ an innermost loop if the misalignment stays the same throughout the
+ execution of the loop. As above, this is the case if the stride of
+ the dataref evenly divides by the vector size. */
+ else
{
tree step = DR_STEP (dr);
- HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
+ unsigned vf = loop ? LOOP_VINFO_VECT_FACTOR (loop_vinfo) : 1;
- if (dr_step % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0)
+ if (tree_fits_shwi_p (step)
+ && ((tree_to_shwi (step) * vf)
+ % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "SLP: step doesn't divide the vector-size.\n");
+ "step doesn't divide the vector-size.\n");
misalign = NULL_TREE;
}
}
- base = build_fold_indirect_ref (base_addr);
- alignment = ssize_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT);
+ /* To look at alignment of the base we have to preserve an inner MEM_REF
+ as that carries alignment information of the actual access. */
+ base = ref;
+ while (handled_component_p (base))
+ base = TREE_OPERAND (base, 0);
+ if (TREE_CODE (base) == MEM_REF)
+ base = build2 (MEM_REF, TREE_TYPE (base), base_addr,
+ build_int_cst (TREE_TYPE (TREE_OPERAND (base, 1)), 0));
+ unsigned int base_alignment = get_object_alignment (base);
+
+ if (base_alignment >= TYPE_ALIGN (TREE_TYPE (vectype)))
+ DR_VECT_AUX (dr)->base_element_aligned = true;
- if ((aligned_to && tree_int_cst_compare (aligned_to, alignment) < 0)
+ alignment = TYPE_ALIGN_UNIT (vectype);
+
+ if ((compare_tree_int (aligned_to, alignment) < 0)
|| !misalign)
{
if (dump_enabled_p ())
{
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"Unknown alignment for access: ");
- dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, base);
+ dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, ref);
dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
}
return true;
}
- if ((DECL_P (base)
- && tree_int_cst_compare (ssize_int (DECL_ALIGN_UNIT (base)),
- alignment) >= 0)
- || (TREE_CODE (base_addr) == SSA_NAME
- && tree_int_cst_compare (ssize_int (TYPE_ALIGN_UNIT (TREE_TYPE (
- TREE_TYPE (base_addr)))),
- alignment) >= 0)
- || (get_pointer_alignment (base_addr) >= TYPE_ALIGN (vectype)))
- base_aligned = true;
- else
- base_aligned = false;
-
- if (!base_aligned)
+ if (base_alignment < TYPE_ALIGN (vectype))
{
- /* Do not change the alignment of global variables here if
- flag_section_anchors is enabled as we already generated
- RTL for other functions. Most global variables should
- have been aligned during the IPA increase_alignment pass. */
- if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype))
- || (TREE_STATIC (base) && flag_section_anchors))
+ /* Strip an inner MEM_REF to a bare decl if possible. */
+ if (TREE_CODE (base) == MEM_REF
+ && integer_zerop (TREE_OPERAND (base, 1))
+ && TREE_CODE (TREE_OPERAND (base, 0)) == ADDR_EXPR)
+ base = TREE_OPERAND (TREE_OPERAND (base, 0), 0);
+
+ if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype)))
{
if (dump_enabled_p ())
{
dump_printf (MSG_NOTE, "\n");
}
- ((dataref_aux *)dr->aux)->base_decl = base;
- ((dataref_aux *)dr->aux)->base_misaligned = true;
+ DR_VECT_AUX (dr)->base_decl = base;
+ DR_VECT_AUX (dr)->base_misaligned = true;
+ DR_VECT_AUX (dr)->base_element_aligned = true;
}
/* If this is a backward running DR then first access in the larger
vectype actually is N-1 elements before the address in the DR.
Adjust misalign accordingly. */
- if (tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0)
+ if (tree_int_cst_sgn (DR_STEP (dr)) < 0)
{
tree offset = ssize_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
/* DR_STEP(dr) is the same as -TYPE_SIZE of the scalar type,
misalign = size_binop (PLUS_EXPR, misalign, offset);
}
- /* Modulo alignment. */
- misalign = size_binop (FLOOR_MOD_EXPR, misalign, alignment);
-
- if (!tree_fits_uhwi_p (misalign))
- {
- /* Negative or overflowed misalignment value. */
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "unexpected misalign value\n");
- return false;
- }
-
- SET_DR_MISALIGNMENT (dr, tree_to_uhwi (misalign));
+ SET_DR_MISALIGNMENT (dr,
+ wi::mod_floor (misalign, alignment, SIGNED).to_uhwi ());
if (dump_enabled_p ())
{
}
-/* Function vect_compute_data_refs_alignment
-
- Compute the misalignment of data references in the loop.
- Return FALSE if a data reference is found that cannot be vectorized. */
-
-static bool
-vect_compute_data_refs_alignment (loop_vec_info loop_vinfo,
- bb_vec_info bb_vinfo)
-{
- vec<data_reference_p> datarefs;
- struct data_reference *dr;
- unsigned int i;
-
- if (loop_vinfo)
- datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
- else
- datarefs = BB_VINFO_DATAREFS (bb_vinfo);
-
- FOR_EACH_VEC_ELT (datarefs, i, dr)
- if (STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr)))
- && !vect_compute_data_ref_alignment (dr))
- {
- if (bb_vinfo)
- {
- /* Mark unsupported statement as unvectorizable. */
- STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
- continue;
- }
- else
- return false;
- }
-
- return true;
-}
-
-
/* Function vect_update_misalignment_for_peel
DR - the data reference whose misalignment is to be adjusted.
}
+/* Function verify_data_ref_alignment
+
+ Return TRUE if DR can be handled with respect to alignment. */
+
+static bool
+verify_data_ref_alignment (data_reference_p dr)
+{
+ enum dr_alignment_support supportable_dr_alignment
+ = vect_supportable_dr_alignment (dr, false);
+ if (!supportable_dr_alignment)
+ {
+ if (dump_enabled_p ())
+ {
+ if (DR_IS_READ (dr))
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "not vectorized: unsupported unaligned load.");
+ else
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "not vectorized: unsupported unaligned "
+ "store.");
+
+ dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
+ DR_REF (dr));
+ dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
+ }
+ return false;
+ }
+
+ if (supportable_dr_alignment != dr_aligned && dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "Vectorizing an unaligned access.\n");
+
+ return true;
+}
+
/* Function vect_verify_datarefs_alignment
Return TRUE if all data references in the loop can be
handled with respect to alignment. */
bool
-vect_verify_datarefs_alignment (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
+vect_verify_datarefs_alignment (loop_vec_info vinfo)
{
- vec<data_reference_p> datarefs;
+ vec<data_reference_p> datarefs = vinfo->datarefs;
struct data_reference *dr;
- enum dr_alignment_support supportable_dr_alignment;
unsigned int i;
- if (loop_vinfo)
- datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
- else
- datarefs = BB_VINFO_DATAREFS (bb_vinfo);
-
FOR_EACH_VEC_ELT (datarefs, i, dr)
{
- gimple stmt = DR_STMT (dr);
+ gimple *stmt = DR_STMT (dr);
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
if (!STMT_VINFO_RELEVANT_P (stmt_info))
continue;
- /* For interleaving, only the alignment of the first access matters.
- Skip statements marked as not vectorizable. */
- if ((STMT_VINFO_GROUPED_ACCESS (stmt_info)
- && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
- || !STMT_VINFO_VECTORIZABLE (stmt_info))
- continue;
+ /* For interleaving, only the alignment of the first access matters. */
+ if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
+ && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
+ continue;
- /* Strided loads perform only component accesses, alignment is
+ /* Strided accesses perform only component accesses, alignment is
irrelevant for them. */
- if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
+ if (STMT_VINFO_STRIDED_P (stmt_info)
+ && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
continue;
- supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
- if (!supportable_dr_alignment)
- {
- if (dump_enabled_p ())
- {
- if (DR_IS_READ (dr))
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "not vectorized: unsupported unaligned load.");
- else
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "not vectorized: unsupported unaligned "
- "store.");
-
- dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
- DR_REF (dr));
- dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
- }
- return false;
- }
- if (supportable_dr_alignment != dr_aligned && dump_enabled_p ())
- dump_printf_loc (MSG_NOTE, vect_location,
- "Vectorizing an unaligned access.\n");
+ if (! verify_data_ref_alignment (dr))
+ return false;
}
+
return true;
}
static bool
vector_alignment_reachable_p (struct data_reference *dr)
{
- gimple stmt = DR_STMT (dr);
+ gimple *stmt = DR_STMT (dr);
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
tree vectype = STMT_VINFO_VECTYPE (stmt_info);
unsigned int *outside_cost,
stmt_vector_for_cost *body_cost_vec)
{
- gimple stmt = DR_STMT (dr);
+ gimple *stmt = DR_STMT (dr);
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
int nunits = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
}
+typedef struct _vect_peel_info
+{
+ int npeel;
+ struct data_reference *dr;
+ unsigned int count;
+} *vect_peel_info;
+
+typedef struct _vect_peel_extended_info
+{
+ struct _vect_peel_info peel_info;
+ unsigned int inside_cost;
+ unsigned int outside_cost;
+ stmt_vector_for_cost body_cost_vec;
+} *vect_peel_extended_info;
+
+
+/* Peeling hashtable helpers. */
+
+struct peel_info_hasher : free_ptr_hash <_vect_peel_info>
+{
+ static inline hashval_t hash (const _vect_peel_info *);
+ static inline bool equal (const _vect_peel_info *, const _vect_peel_info *);
+};
+
+inline hashval_t
+peel_info_hasher::hash (const _vect_peel_info *peel_info)
+{
+ return (hashval_t) peel_info->npeel;
+}
+
+inline bool
+peel_info_hasher::equal (const _vect_peel_info *a, const _vect_peel_info *b)
+{
+ return (a->npeel == b->npeel);
+}
+
+
/* Insert DR into peeling hash table with NPEEL as key. */
static void
-vect_peeling_hash_insert (loop_vec_info loop_vinfo, struct data_reference *dr,
+vect_peeling_hash_insert (hash_table<peel_info_hasher> *peeling_htab,
+ loop_vec_info loop_vinfo, struct data_reference *dr,
int npeel)
{
struct _vect_peel_info elem, *slot;
bool supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
elem.npeel = npeel;
- slot = LOOP_VINFO_PEELING_HTAB (loop_vinfo).find (&elem);
+ slot = peeling_htab->find (&elem);
if (slot)
slot->count++;
else
slot->npeel = npeel;
slot->dr = dr;
slot->count = 1;
- new_slot = LOOP_VINFO_PEELING_HTAB (loop_vinfo).find_slot (slot, INSERT);
+ new_slot = peeling_htab->find_slot (slot, INSERT);
*new_slot = slot;
}
vect_peel_info elem = *slot;
int save_misalignment, dummy;
unsigned int inside_cost = 0, outside_cost = 0, i;
- gimple stmt = DR_STMT (elem->dr);
+ gimple *stmt = DR_STMT (elem->dr);
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
struct data_reference *dr;
stmt_vector_for_cost prologue_cost_vec, body_cost_vec, epilogue_cost_vec;
- int single_iter_cost;
prologue_cost_vec.create (2);
body_cost_vec.create (2);
&& GROUP_FIRST_ELEMENT (stmt_info) != stmt)
continue;
+ /* Strided accesses perform only component accesses, alignment is
+ irrelevant for them. */
+ if (STMT_VINFO_STRIDED_P (stmt_info)
+ && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
+ continue;
+
save_misalignment = DR_MISALIGNMENT (dr);
vect_update_misalignment_for_peel (dr, elem->dr, elem->npeel);
vect_get_data_access_cost (dr, &inside_cost, &outside_cost,
SET_DR_MISALIGNMENT (dr, save_misalignment);
}
- single_iter_cost = vect_get_single_scalar_iteration_cost (loop_vinfo);
- outside_cost += vect_get_known_peeling_cost (loop_vinfo, elem->npeel,
- &dummy, single_iter_cost,
- &prologue_cost_vec,
- &epilogue_cost_vec);
+ outside_cost += vect_get_known_peeling_cost
+ (loop_vinfo, elem->npeel, &dummy,
+ &LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo),
+ &prologue_cost_vec, &epilogue_cost_vec);
/* Prologue and epilogue costs are added to the target model later.
These costs depend only on the scalar iteration cost, the
option that aligns as many accesses as possible. */
static struct data_reference *
-vect_peeling_hash_choose_best_peeling (loop_vec_info loop_vinfo,
+vect_peeling_hash_choose_best_peeling (hash_table<peel_info_hasher> *peeling_htab,
+ loop_vec_info loop_vinfo,
unsigned int *npeel,
stmt_vector_for_cost *body_cost_vec)
{
{
res.inside_cost = INT_MAX;
res.outside_cost = INT_MAX;
- LOOP_VINFO_PEELING_HTAB (loop_vinfo)
- .traverse <_vect_peel_extended_info *,
- vect_peeling_hash_get_lowest_cost> (&res);
+ peeling_htab->traverse <_vect_peel_extended_info *,
+ vect_peeling_hash_get_lowest_cost> (&res);
}
else
{
res.peel_info.count = 0;
- LOOP_VINFO_PEELING_HTAB (loop_vinfo)
- .traverse <_vect_peel_extended_info *,
- vect_peeling_hash_get_most_frequent> (&res);
+ peeling_htab->traverse <_vect_peel_extended_info *,
+ vect_peeling_hash_get_most_frequent> (&res);
}
*npeel = res.peel_info.npeel;
bool do_peeling = false;
bool do_versioning = false;
bool stat;
- gimple stmt;
+ gimple *stmt;
stmt_vec_info stmt_info;
unsigned int npeel = 0;
bool all_misalignments_unknown = true;
tree vectype;
unsigned int nelements, mis, same_align_drs_max = 0;
stmt_vector_for_cost body_cost_vec = stmt_vector_for_cost ();
+ hash_table<peel_info_hasher> peeling_htab (1);
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
"=== vect_enhance_data_refs_alignment ===\n");
+ /* Reset data so we can safely be called multiple times. */
+ LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).truncate (0);
+ LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) = 0;
+
/* While cost model enhancements are expected in the future, the high level
view of the code at this time is as follows:
if (integer_zerop (DR_STEP (dr)))
continue;
- /* Strided loads perform only component accesses, alignment is
+ /* Strided accesses perform only component accesses, alignment is
irrelevant for them. */
- if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
+ if (STMT_VINFO_STRIDED_P (stmt_info)
+ && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
continue;
supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
size_zero_node) < 0;
/* Save info about DR in the hash table. */
- if (!LOOP_VINFO_PEELING_HTAB (loop_vinfo).is_created ())
- LOOP_VINFO_PEELING_HTAB (loop_vinfo).create (1);
-
vectype = STMT_VINFO_VECTYPE (stmt_info);
nelements = TYPE_VECTOR_SUBPARTS (vectype);
mis = DR_MISALIGNMENT (dr) / GET_MODE_SIZE (TYPE_MODE (
We do this automtically for cost model, since we calculate cost
for every peeling option. */
if (unlimited_cost_model (LOOP_VINFO_LOOP (loop_vinfo)))
- possible_npeel_number = vf /nelements;
+ {
+ if (STMT_SLP_TYPE (stmt_info))
+ possible_npeel_number
+ = (vf * GROUP_SIZE (stmt_info)) / nelements;
+ else
+ possible_npeel_number = vf / nelements;
+ }
/* Handle the aligned case. We may decide to align some other
access, making DR unaligned. */
for (j = 0; j < possible_npeel_number; j++)
{
- gcc_assert (npeel_tmp <= vf);
- vect_peeling_hash_insert (loop_vinfo, dr, npeel_tmp);
+ vect_peeling_hash_insert (&peeling_htab, loop_vinfo,
+ dr, npeel_tmp);
npeel_tmp += nelements;
}
/* Check if we can possibly peel the loop. */
if (!vect_can_advance_ivs_p (loop_vinfo)
- || !slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
+ || !slpeel_can_duplicate_loop_p (loop, single_exit (loop))
+ || loop->inner)
do_peeling = false;
- if (do_peeling && all_misalignments_unknown
+ if (do_peeling
+ && all_misalignments_unknown
&& vect_supportable_dr_alignment (dr0, false))
{
-
/* Check if the target requires to prefer stores over loads, i.e., if
misaligned stores are more expensive than misaligned loads (taking
drs with same alignment into account). */
}
/* In case there are only loads with different unknown misalignments, use
- peeling only if it may help to align other accesses in the loop. */
+ peeling only if it may help to align other accesses in the loop or
+ if it may help improving load bandwith when we'd end up using
+ unaligned loads. */
+ tree dr0_vt = STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr0)));
if (!first_store
&& !STMT_VINFO_SAME_ALIGN_REFS (
vinfo_for_stmt (DR_STMT (dr0))).length ()
- && vect_supportable_dr_alignment (dr0, false)
- != dr_unaligned_supported)
+ && (vect_supportable_dr_alignment (dr0, false)
+ != dr_unaligned_supported
+ || (builtin_vectorization_cost (vector_load, dr0_vt, 0)
+ == builtin_vectorization_cost (unaligned_load, dr0_vt, -1))))
do_peeling = false;
}
gcc_assert (!all_misalignments_unknown);
/* Choose the best peeling from the hash table. */
- dr0 = vect_peeling_hash_choose_best_peeling (loop_vinfo, &npeel,
+ dr0 = vect_peeling_hash_choose_best_peeling (&peeling_htab,
+ loop_vinfo, &npeel,
&body_cost_vec);
if (!dr0 || !npeel)
do_peeling = false;
&& GROUP_FIRST_ELEMENT (stmt_info) != stmt)
continue;
- /* Strided loads perform only component accesses, alignment is
+ /* Strided accesses perform only component accesses, alignment is
irrelevant for them. */
- if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
+ if (STMT_VINFO_STRIDED_P (stmt_info)
+ && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
continue;
save_misalignment = DR_MISALIGNMENT (dr);
if (do_peeling && known_alignment_for_access_p (dr0) && npeel == 0)
{
- stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
+ stat = vect_verify_datarefs_alignment (loop_vinfo);
if (!stat)
do_peeling = false;
else
}
}
+ /* Cost model #1 - honor --param vect-max-peeling-for-alignment. */
if (do_peeling)
{
unsigned max_allowed_peel
unsigned max_peel = npeel;
if (max_peel == 0)
{
- gimple dr_stmt = DR_STMT (dr0);
+ gimple *dr_stmt = DR_STMT (dr0);
stmt_vec_info vinfo = vinfo_for_stmt (dr_stmt);
tree vtype = STMT_VINFO_VECTYPE (vinfo);
max_peel = TYPE_VECTOR_SUBPARTS (vtype) - 1;
}
}
+ /* Cost model #2 - if peeling may result in a remaining loop not
+ iterating enough to be vectorized then do not peel. */
+ if (do_peeling
+ && LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
+ {
+ unsigned max_peel
+ = npeel == 0 ? LOOP_VINFO_VECT_FACTOR (loop_vinfo) - 1 : npeel;
+ if (LOOP_VINFO_INT_NITERS (loop_vinfo)
+ < LOOP_VINFO_VECT_FACTOR (loop_vinfo) + max_peel)
+ do_peeling = false;
+ }
+
if (do_peeling)
{
- stmt_info_for_cost *si;
- void *data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
-
/* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
If the misalignment of DR_i is identical to that of dr0 then set
DR_MISALIGNMENT (DR_i) to zero. If the misalignment of DR_i and
dump_printf_loc (MSG_NOTE, vect_location,
"Peeling for alignment will be applied.\n");
}
- /* We've delayed passing the inside-loop peeling costs to the
- target cost model until we were sure peeling would happen.
- Do so now. */
- if (body_cost_vec.exists ())
- {
- FOR_EACH_VEC_ELT (body_cost_vec, i, si)
- {
- struct _stmt_vec_info *stmt_info
- = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
- (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
- si->misalign, vect_body);
- }
- body_cost_vec.release ();
- }
+ /* The inside-loop cost will be accounted for in vectorizable_load
+ and vectorizable_store correctly with adjusted alignments.
+ Drop the body_cst_vec on the floor here. */
+ body_cost_vec.release ();
- stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
+ stat = vect_verify_datarefs_alignment (loop_vinfo);
gcc_assert (stat);
return stat;
}
&& GROUP_FIRST_ELEMENT (stmt_info) != stmt))
continue;
- /* Strided loads perform only component accesses, alignment is
- irrelevant for them. */
- if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
- continue;
+ if (STMT_VINFO_STRIDED_P (stmt_info))
+ {
+ /* Strided loads perform only component accesses, alignment is
+ irrelevant for them. */
+ if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
+ continue;
+ do_versioning = false;
+ break;
+ }
supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
if (!supportable_dr_alignment)
{
- gimple stmt;
+ gimple *stmt;
int mask;
tree vectype;
if (do_versioning)
{
- vec<gimple> may_misalign_stmts
+ vec<gimple *> may_misalign_stmts
= LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
- gimple stmt;
+ gimple *stmt;
/* It can now be assumed that the data references in the statements
in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version
/* Peeling and versioning can't be done together at this time. */
gcc_assert (! (do_peeling && do_versioning));
- stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
+ stat = vect_verify_datarefs_alignment (loop_vinfo);
gcc_assert (stat);
return stat;
}
/* This point is reached if neither peeling nor versioning is being done. */
gcc_assert (! (do_peeling || do_versioning));
- stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
+ stat = vect_verify_datarefs_alignment (loop_vinfo);
return stat;
}
Return FALSE if a data reference is found that cannot be vectorized. */
bool
-vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo,
- bb_vec_info bb_vinfo)
+vect_analyze_data_refs_alignment (loop_vec_info vinfo)
{
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
/* Mark groups of data references with same alignment using
data dependence information. */
- if (loop_vinfo)
+ vec<ddr_p> ddrs = vinfo->ddrs;
+ struct data_dependence_relation *ddr;
+ unsigned int i;
+
+ FOR_EACH_VEC_ELT (ddrs, i, ddr)
+ vect_find_same_alignment_drs (ddr, vinfo);
+
+ vec<data_reference_p> datarefs = vinfo->datarefs;
+ struct data_reference *dr;
+
+ FOR_EACH_VEC_ELT (datarefs, i, dr)
{
- vec<ddr_p> ddrs = LOOP_VINFO_DDRS (loop_vinfo);
- struct data_dependence_relation *ddr;
- unsigned int i;
+ stmt_vec_info stmt_info = vinfo_for_stmt (DR_STMT (dr));
+ if (STMT_VINFO_VECTORIZABLE (stmt_info)
+ && !vect_compute_data_ref_alignment (dr))
+ {
+ /* Strided accesses perform only component accesses, misalignment
+ information is irrelevant for them. */
+ if (STMT_VINFO_STRIDED_P (stmt_info)
+ && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
+ continue;
+
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "not vectorized: can't calculate alignment "
+ "for data ref.\n");
- FOR_EACH_VEC_ELT (ddrs, i, ddr)
- vect_find_same_alignment_drs (ddr, loop_vinfo);
+ return false;
+ }
}
- if (!vect_compute_data_refs_alignment (loop_vinfo, bb_vinfo))
+ return true;
+}
+
+
+/* Analyze alignment of DRs of stmts in NODE. */
+
+static bool
+vect_slp_analyze_and_verify_node_alignment (slp_tree node)
+{
+ /* We vectorize from the first scalar stmt in the node unless
+ the node is permuted in which case we start from the first
+ element in the group. */
+ gimple *first_stmt = SLP_TREE_SCALAR_STMTS (node)[0];
+ data_reference_p first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
+ if (SLP_TREE_LOAD_PERMUTATION (node).exists ())
+ first_stmt = GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt));
+
+ data_reference_p dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
+ if (! vect_compute_data_ref_alignment (dr)
+ /* For creating the data-ref pointer we need alignment of the
+ first element anyway. */
+ || (dr != first_dr
+ && ! vect_compute_data_ref_alignment (first_dr))
+ || ! verify_data_ref_alignment (dr))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "not vectorized: can't calculate alignment "
- "for data ref.\n");
+ "not vectorized: bad data alignment in basic "
+ "block.\n");
return false;
}
return true;
}
+/* Function vect_slp_analyze_instance_alignment
+
+ Analyze the alignment of the data-references in the SLP instance.
+ Return FALSE if a data reference is found that cannot be vectorized. */
+
+bool
+vect_slp_analyze_and_verify_instance_alignment (slp_instance instance)
+{
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "=== vect_slp_analyze_and_verify_instance_alignment ===\n");
+
+ slp_tree node;
+ unsigned i;
+ FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), i, node)
+ if (! vect_slp_analyze_and_verify_node_alignment (node))
+ return false;
+
+ node = SLP_INSTANCE_TREE (instance);
+ if (STMT_VINFO_DATA_REF (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (node)[0]))
+ && ! vect_slp_analyze_and_verify_node_alignment
+ (SLP_INSTANCE_TREE (instance)))
+ return false;
+
+ return true;
+}
+
/* Analyze groups of accesses: check that DR belongs to a group of
accesses of legal size, step, etc. Detect gaps, single element
interleaving, and other special cases. Set grouped access info.
- Collect groups of strided stores for further use in SLP analysis. */
+ Collect groups of strided stores for further use in SLP analysis.
+ Worker for vect_analyze_group_access. */
static bool
-vect_analyze_group_access (struct data_reference *dr)
+vect_analyze_group_access_1 (struct data_reference *dr)
{
tree step = DR_STEP (dr);
tree scalar_type = TREE_TYPE (DR_REF (dr));
HOST_WIDE_INT type_size = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
- gimple stmt = DR_STMT (dr);
+ gimple *stmt = DR_STMT (dr);
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
- HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
+ HOST_WIDE_INT dr_step = -1;
HOST_WIDE_INT groupsize, last_accessed_element = 1;
bool slp_impossible = false;
- struct loop *loop = NULL;
-
- if (loop_vinfo)
- loop = LOOP_VINFO_LOOP (loop_vinfo);
/* For interleaving, GROUPSIZE is STEP counted in elements, i.e., the
size of the interleaving group (including gaps). */
- groupsize = absu_hwi (dr_step) / type_size;
+ if (tree_fits_shwi_p (step))
+ {
+ dr_step = tree_to_shwi (step);
+ /* Check that STEP is a multiple of type size. Otherwise there is
+ a non-element-sized gap at the end of the group which we
+ cannot represent in GROUP_GAP or GROUP_SIZE.
+ ??? As we can handle non-constant step fine here we should
+ simply remove uses of GROUP_GAP between the last and first
+ element and instead rely on DR_STEP. GROUP_SIZE then would
+ simply not include that gap. */
+ if ((dr_step % type_size) != 0)
+ {
+ if (dump_enabled_p ())
+ {
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "Step ");
+ dump_generic_expr (MSG_NOTE, TDF_SLIM, step);
+ dump_printf (MSG_NOTE,
+ " is not a multiple of the element size for ");
+ dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr));
+ dump_printf (MSG_NOTE, "\n");
+ }
+ return false;
+ }
+ groupsize = absu_hwi (dr_step) / type_size;
+ }
+ else
+ groupsize = 0;
/* Not consecutive access is possible only if it is a part of interleaving. */
if (!GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
dump_printf (MSG_NOTE, "\n");
}
- if (loop_vinfo)
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_NOTE, vect_location,
- "Data access with gaps requires scalar "
- "epilogue loop\n");
- if (loop->inner)
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "Peeling for outer loop is not"
- " supported\n");
- return false;
- }
-
- LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
- }
-
return true;
}
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"not consecutive access ");
dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
- dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
}
if (bb_vinfo)
return true;
}
- return false;
+ dump_printf_loc (MSG_NOTE, vect_location, "using strided accesses\n");
+ STMT_VINFO_STRIDED_P (stmt_info) = true;
+ return true;
}
if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) == stmt)
{
/* First stmt in the interleaving chain. Check the chain. */
- gimple next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt));
+ gimple *next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt));
struct data_reference *data_ref = dr;
unsigned int count = 1;
tree prev_init = DR_INIT (data_ref);
- gimple prev = stmt;
+ gimple *prev = stmt;
HOST_WIDE_INT diff, gaps = 0;
- unsigned HOST_WIDE_INT count_in_bytes;
while (next)
{
return false;
}
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "Two or more load stmts share the same dr.\n");
+
/* For load use the same data-ref load. */
GROUP_SAME_DR_STMT (vinfo_for_stmt (next)) = prev;
count++;
}
- /* COUNT is the number of accesses found, we multiply it by the size of
- the type to get COUNT_IN_BYTES. */
- count_in_bytes = type_size * count;
+ if (groupsize == 0)
+ groupsize = count + gaps;
- /* Check that the size of the interleaving (including gaps) is not
- greater than STEP. */
- if (dr_step != 0
- && absu_hwi (dr_step) < count_in_bytes + gaps * type_size)
- {
- if (dump_enabled_p ())
- {
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "interleaving size is greater than step for ");
- dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
- DR_REF (dr));
- dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
- }
- return false;
- }
+ if (groupsize > UINT_MAX)
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "group is too large\n");
+ return false;
+ }
- /* Check that the size of the interleaving is equal to STEP for stores,
+ /* Check that the size of the interleaving is equal to count for stores,
i.e., that there are no gaps. */
- if (dr_step != 0
- && absu_hwi (dr_step) != count_in_bytes)
+ if (groupsize != count
+ && !DR_IS_READ (dr))
{
- if (DR_IS_READ (dr))
- {
- slp_impossible = true;
- /* There is a gap after the last load in the group. This gap is a
- difference between the groupsize and the number of elements.
- When there is no gap, this difference should be 0. */
- GROUP_GAP (vinfo_for_stmt (stmt)) = groupsize - count;
- }
- else
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "interleaved store with gaps\n");
- return false;
- }
- }
-
- /* Check that STEP is a multiple of type size. */
- if (dr_step != 0
- && (dr_step % type_size) != 0)
- {
- if (dump_enabled_p ())
- {
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "step is not a multiple of type size: step ");
- dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, step);
- dump_printf (MSG_MISSED_OPTIMIZATION, " size ");
- dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
- TYPE_SIZE_UNIT (scalar_type));
- dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
- }
- return false;
- }
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "interleaved store with gaps\n");
+ return false;
+ }
- if (groupsize == 0)
- groupsize = count;
+ /* If there is a gap after the last load in the group it is the
+ difference between the groupsize and the last accessed
+ element.
+ When there is no gap, this difference should be 0. */
+ GROUP_GAP (vinfo_for_stmt (stmt)) = groupsize - last_accessed_element;
GROUP_SIZE (vinfo_for_stmt (stmt)) = groupsize;
if (dump_enabled_p ())
- dump_printf_loc (MSG_NOTE, vect_location,
- "Detected interleaving of size %d\n", (int)groupsize);
+ {
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "Detected interleaving ");
+ if (DR_IS_READ (dr))
+ dump_printf (MSG_NOTE, "load ");
+ else
+ dump_printf (MSG_NOTE, "store ");
+ dump_printf (MSG_NOTE, "of size %u starting with ",
+ (unsigned)groupsize);
+ dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
+ if (GROUP_GAP (vinfo_for_stmt (stmt)) != 0)
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "There is a gap of %u elements after the group\n",
+ GROUP_GAP (vinfo_for_stmt (stmt)));
+ }
/* SLP: create an SLP data structure for every interleaving group of
stores for further analysis in vect_analyse_slp. */
if (bb_vinfo)
BB_VINFO_GROUPED_STORES (bb_vinfo).safe_push (stmt);
}
+ }
- /* There is a gap in the end of the group. */
- if (groupsize - last_accessed_element > 0 && loop_vinfo)
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "Data access with gaps requires scalar "
- "epilogue loop\n");
- if (loop->inner)
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "Peeling for outer loop is not supported\n");
- return false;
- }
+ return true;
+}
+
+/* Analyze groups of accesses: check that DR belongs to a group of
+ accesses of legal size, step, etc. Detect gaps, single element
+ interleaving, and other special cases. Set grouped access info.
+ Collect groups of strided stores for further use in SLP analysis. */
- LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
+static bool
+vect_analyze_group_access (struct data_reference *dr)
+{
+ if (!vect_analyze_group_access_1 (dr))
+ {
+ /* Dissolve the group if present. */
+ gimple *next;
+ gimple *stmt = GROUP_FIRST_ELEMENT (vinfo_for_stmt (DR_STMT (dr)));
+ while (stmt)
+ {
+ stmt_vec_info vinfo = vinfo_for_stmt (stmt);
+ next = GROUP_NEXT_ELEMENT (vinfo);
+ GROUP_FIRST_ELEMENT (vinfo) = NULL;
+ GROUP_NEXT_ELEMENT (vinfo) = NULL;
+ stmt = next;
}
+ return false;
}
-
return true;
}
-
/* Analyze the access pattern of the data-reference DR.
In case of non-consecutive accesses call vect_analyze_group_access() to
analyze groups of accesses. */
{
tree step = DR_STEP (dr);
tree scalar_type = TREE_TYPE (DR_REF (dr));
- gimple stmt = DR_STMT (dr);
+ gimple *stmt = DR_STMT (dr);
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
struct loop *loop = NULL;
return false;
}
- /* Allow invariant loads in not nested loops. */
+ /* Allow loads with zero step in inner-loop vectorization. */
if (loop_vinfo && integer_zerop (step))
{
GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = NULL;
- if (nested_in_vect_loop_p (loop, stmt))
+ if (!nested_in_vect_loop_p (loop, stmt))
+ return DR_IS_READ (dr);
+ /* Allow references with zero step for outer loops marked
+ with pragma omp simd only - it guarantees absence of
+ loop-carried dependencies between inner loop iterations. */
+ if (!loop->force_vectorize)
{
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
"zero step in inner loop of nest\n");
return false;
}
- return DR_IS_READ (dr);
}
if (loop && nested_in_vect_loop_p (loop, stmt))
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
"zero step in outer loop.\n");
- if (DR_IS_READ (dr))
- return true;
- else
- return false;
+ return DR_IS_READ (dr);
}
}
return false;
}
+
/* Assume this is a DR handled by non-constant strided load case. */
if (TREE_CODE (step) != INTEGER_CST)
- return STMT_VINFO_STRIDE_LOAD_P (stmt_info);
+ return (STMT_VINFO_STRIDED_P (stmt_info)
+ && (!STMT_VINFO_GROUPED_ACCESS (stmt_info)
+ || vect_analyze_group_access (dr)));
/* Not consecutive access - check if it's a part of interleaving group. */
return vect_analyze_group_access (dr);
if (t2 == NULL)
return 1;
+ STRIP_NOPS (t1);
+ STRIP_NOPS (t2);
if (TREE_CODE (t1) != TREE_CODE (t2))
return TREE_CODE (t1) < TREE_CODE (t2) ? -1 : 1;
if (dra == drb)
return 0;
+ /* DRs in different loops never belong to the same group. */
+ loop_p loopa = gimple_bb (DR_STMT (dra))->loop_father;
+ loop_p loopb = gimple_bb (DR_STMT (drb))->loop_father;
+ if (loopa != loopb)
+ return loopa->num < loopb->num ? -1 : 1;
+
/* Ordering of DRs according to base. */
if (!operand_equal_p (DR_BASE_ADDRESS (dra), DR_BASE_ADDRESS (drb), 0))
{
FORNOW: handle only arrays and pointer accesses. */
bool
-vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
+vect_analyze_data_ref_accesses (vec_info *vinfo)
{
unsigned int i;
- vec<data_reference_p> datarefs;
+ vec<data_reference_p> datarefs = vinfo->datarefs;
struct data_reference *dr;
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
"=== vect_analyze_data_ref_accesses ===\n");
- if (loop_vinfo)
- datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
- else
- datarefs = BB_VINFO_DATAREFS (bb_vinfo);
-
if (datarefs.is_empty ())
return true;
linear. Don't modify the original vector's order, it is needed for
determining what dependencies are reversed. */
vec<data_reference_p> datarefs_copy = datarefs.copy ();
- qsort (datarefs_copy.address (), datarefs_copy.length (),
- sizeof (data_reference_p), dr_group_sort_cmp);
+ datarefs_copy.qsort (dr_group_sort_cmp);
/* Build the interleaving chains. */
for (i = 0; i < datarefs_copy.length () - 1;)
matters we can push those to a worklist and re-iterate
over them. The we can just skip ahead to the next DR here. */
+ /* DRs in a different loop should not be put into the same
+ interleaving group. */
+ if (gimple_bb (DR_STMT (dra))->loop_father
+ != gimple_bb (DR_STMT (drb))->loop_father)
+ break;
+
/* Check that the data-refs have same first location (except init)
- and they are both either store or load (not load and store). */
+ and they are both either store or load (not load and store,
+ not masked loads or stores). */
if (DR_IS_READ (dra) != DR_IS_READ (drb)
|| !operand_equal_p (DR_BASE_ADDRESS (dra),
DR_BASE_ADDRESS (drb), 0)
- || !dr_equal_offsets_p (dra, drb))
+ || !dr_equal_offsets_p (dra, drb)
+ || !gimple_assign_single_p (DR_STMT (dra))
+ || !gimple_assign_single_p (DR_STMT (drb)))
break;
- /* Check that the data-refs have the same constant size and step. */
+ /* Check that the data-refs have the same constant size. */
tree sza = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra)));
tree szb = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb)));
if (!tree_fits_uhwi_p (sza)
|| !tree_fits_uhwi_p (szb)
- || !tree_int_cst_equal (sza, szb)
- || !tree_fits_shwi_p (DR_STEP (dra))
- || !tree_fits_shwi_p (DR_STEP (drb))
- || !tree_int_cst_equal (DR_STEP (dra), DR_STEP (drb)))
+ || !tree_int_cst_equal (sza, szb))
+ break;
+
+ /* Check that the data-refs have the same step. */
+ if (!operand_equal_p (DR_STEP (dra), DR_STEP (drb), 0))
break;
/* Do not place the same access in the interleaving chain twice. */
/* If init_b == init_a + the size of the type * k, we have an
interleaving, and DRA is accessed before DRB. */
HOST_WIDE_INT type_size_a = tree_to_uhwi (sza);
- if ((init_b - init_a) % type_size_a != 0)
+ if (type_size_a == 0
+ || (init_b - init_a) % type_size_a != 0)
break;
- /* The step (if not zero) is greater than the difference between
- data-refs' inits. This splits groups into suitable sizes. */
- HOST_WIDE_INT step = tree_to_shwi (DR_STEP (dra));
- if (step != 0 && step <= (init_b - init_a))
+ /* If we have a store, the accesses are adjacent. This splits
+ groups into chunks we support (we don't support vectorization
+ of stores with gaps). */
+ if (!DR_IS_READ (dra)
+ && (init_b - (HOST_WIDE_INT) TREE_INT_CST_LOW
+ (DR_INIT (datarefs_copy[i-1]))
+ != type_size_a))
break;
+ /* If the step (if not zero or non-constant) is greater than the
+ difference between data-refs' inits this splits groups into
+ suitable sizes. */
+ if (tree_fits_shwi_p (DR_STEP (dra)))
+ {
+ HOST_WIDE_INT step = tree_to_shwi (DR_STEP (dra));
+ if (step != 0 && step <= (init_b - init_a))
+ break;
+ }
+
if (dump_enabled_p ())
{
dump_printf_loc (MSG_NOTE, vect_location,
"Detected interleaving ");
+ if (DR_IS_READ (dra))
+ dump_printf (MSG_NOTE, "load ");
+ else
+ dump_printf (MSG_NOTE, "store ");
dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dra));
dump_printf (MSG_NOTE, " and ");
dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (drb));
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"not vectorized: complicated access pattern.\n");
- if (bb_vinfo)
+ if (is_a <bb_vec_info> (vinfo))
{
/* Mark the statement as not vectorizable. */
STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
return 0;
}
-template <class T> static void
-swap (T& a, T& b)
-{
- T c (a);
- a = b;
- b = c;
-}
-
/* Function vect_vfa_segment_size.
Create an expression that computes the size of segment
FOR_EACH_VEC_ELT (may_alias_ddrs, i, ddr)
{
struct data_reference *dr_a, *dr_b;
- gimple dr_group_first_a, dr_group_first_b;
+ gimple *dr_group_first_a, *dr_group_first_b;
tree segment_length_a, segment_length_b;
- gimple stmt_a, stmt_b;
+ gimple *stmt_a, *stmt_b;
dr_a = DDR_A (ddr);
stmt_a = DR_STMT (DDR_A (ddr));
dr_with_seg_len (dr_b, segment_length_b));
if (compare_tree (DR_BASE_ADDRESS (dr_a), DR_BASE_ADDRESS (dr_b)) > 0)
- swap (dr_with_seg_len_pair.first, dr_with_seg_len_pair.second);
+ std::swap (dr_with_seg_len_pair.first, dr_with_seg_len_pair.second);
comp_alias_ddrs.safe_push (dr_with_seg_len_pair);
}
and DR_A1 and DR_A2 are two consecutive memrefs. */
if (*dr_a1 == *dr_a2)
{
- swap (dr_a1, dr_b1);
- swap (dr_a2, dr_b2);
+ std::swap (dr_a1, dr_b1);
+ std::swap (dr_a2, dr_b2);
}
if (!operand_equal_p (DR_BASE_ADDRESS (dr_a1->dr),
*/
- HOST_WIDE_INT
- min_seg_len_b = (TREE_CODE (dr_b1->seg_len) == INTEGER_CST) ?
- TREE_INT_CST_LOW (dr_b1->seg_len) :
- vect_factor;
+ HOST_WIDE_INT min_seg_len_b = (tree_fits_shwi_p (dr_b1->seg_len)
+ ? tree_to_shwi (dr_b1->seg_len)
+ : vect_factor);
if (diff <= min_seg_len_b
- || (TREE_CODE (dr_a1->seg_len) == INTEGER_CST
- && diff - (HOST_WIDE_INT) TREE_INT_CST_LOW (dr_a1->seg_len) <
- min_seg_len_b))
+ || (tree_fits_shwi_p (dr_a1->seg_len)
+ && diff - tree_to_shwi (dr_a1->seg_len) < min_seg_len_b))
{
if (dump_enabled_p ())
{
return true;
}
-/* Check whether a non-affine read in stmt is suitable for gather load
- and if so, return a builtin decl for that operation. */
+/* Check whether a non-affine read or write in stmt is suitable for gather load
+ or scatter store and if so, return a builtin decl for that operation. */
tree
-vect_check_gather (gimple stmt, loop_vec_info loop_vinfo, tree *basep,
- tree *offp, int *scalep)
+vect_check_gather_scatter (gimple *stmt, loop_vec_info loop_vinfo, tree *basep,
+ tree *offp, int *scalep)
{
HOST_WIDE_INT scale = 1, pbitpos, pbitsize;
struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
tree offtype = NULL_TREE;
tree decl, base, off;
- enum machine_mode pmode;
- int punsignedp, pvolatilep;
+ machine_mode pmode;
+ int punsignedp, reversep, pvolatilep = 0;
base = DR_REF (dr);
/* For masked loads/stores, DR_REF (dr) is an artificial MEM_REF,
&& integer_zerop (TREE_OPERAND (base, 1))
&& !expr_invariant_in_loop_p (loop, TREE_OPERAND (base, 0)))
{
- gimple def_stmt = SSA_NAME_DEF_STMT (TREE_OPERAND (base, 0));
+ gimple *def_stmt = SSA_NAME_DEF_STMT (TREE_OPERAND (base, 0));
if (is_gimple_assign (def_stmt)
&& gimple_assign_rhs_code (def_stmt) == ADDR_EXPR)
base = TREE_OPERAND (gimple_assign_rhs1 (def_stmt), 0);
}
- /* The gather builtins need address of the form
+ /* The gather and scatter builtins need address of the form
loop_invariant + vector * {1, 2, 4, 8}
or
loop_invariant + sign_extend (vector) * { 1, 2, 4, 8 }.
vectorized. The following code attempts to find such a preexistng
SSA_NAME OFF and put the loop invariants into a tree BASE
that can be gimplified before the loop. */
- base = get_inner_reference (base, &pbitsize, &pbitpos, &off,
- &pmode, &punsignedp, &pvolatilep, false);
- gcc_assert (base != NULL_TREE && (pbitpos % BITS_PER_UNIT) == 0);
+ base = get_inner_reference (base, &pbitsize, &pbitpos, &off, &pmode,
+ &punsignedp, &reversep, &pvolatilep, false);
+ gcc_assert (base && (pbitpos % BITS_PER_UNIT) == 0 && !reversep);
if (TREE_CODE (base) == MEM_REF)
{
{
if (off == NULL_TREE)
{
- double_int moff = mem_ref_offset (base);
- off = double_int_to_tree (sizetype, moff);
+ offset_int moff = mem_ref_offset (base);
+ off = wide_int_to_tree (sizetype, moff);
}
else
off = size_binop (PLUS_EXPR, off,
if (TREE_CODE (off) == SSA_NAME)
{
- gimple def_stmt = SSA_NAME_DEF_STMT (off);
+ gimple *def_stmt = SSA_NAME_DEF_STMT (off);
if (expr_invariant_in_loop_p (loop, off))
return NULL_TREE;
if (offtype == NULL_TREE)
offtype = TREE_TYPE (off);
- decl = targetm.vectorize.builtin_gather (STMT_VINFO_VECTYPE (stmt_info),
- offtype, scale);
+ if (DR_IS_READ (dr))
+ decl = targetm.vectorize.builtin_gather (STMT_VINFO_VECTYPE (stmt_info),
+ offtype, scale);
+ else
+ decl = targetm.vectorize.builtin_scatter (STMT_VINFO_VECTYPE (stmt_info),
+ offtype, scale);
+
if (decl == NULL_TREE)
return NULL_TREE;
*/
bool
-vect_analyze_data_refs (loop_vec_info loop_vinfo,
- bb_vec_info bb_vinfo,
- int *min_vf)
+vect_analyze_data_refs (vec_info *vinfo, int *min_vf)
{
struct loop *loop = NULL;
- basic_block bb = NULL;
unsigned int i;
- vec<data_reference_p> datarefs;
struct data_reference *dr;
tree scalar_type;
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
- "=== vect_analyze_data_refs ===\n");
-
- if (loop_vinfo)
- {
- basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
-
- loop = LOOP_VINFO_LOOP (loop_vinfo);
- datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
- if (!find_loop_nest (loop, &LOOP_VINFO_LOOP_NEST (loop_vinfo)))
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "not vectorized: loop contains function calls"
- " or data references that cannot be analyzed\n");
- return false;
- }
+ "=== vect_analyze_data_refs ===\n");
- for (i = 0; i < loop->num_nodes; i++)
- {
- gimple_stmt_iterator gsi;
-
- for (gsi = gsi_start_bb (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
- {
- gimple stmt = gsi_stmt (gsi);
- if (!find_data_references_in_stmt (loop, stmt, &datarefs))
- {
- if (is_gimple_call (stmt) && loop->safelen)
- {
- tree fndecl = gimple_call_fndecl (stmt), op;
- if (fndecl != NULL_TREE)
- {
- struct cgraph_node *node = cgraph_get_node (fndecl);
- if (node != NULL && node->simd_clones != NULL)
- {
- unsigned int j, n = gimple_call_num_args (stmt);
- for (j = 0; j < n; j++)
- {
- op = gimple_call_arg (stmt, j);
- if (DECL_P (op)
- || (REFERENCE_CLASS_P (op)
- && get_base_address (op)))
- break;
- }
- op = gimple_call_lhs (stmt);
- /* Ignore #pragma omp declare simd functions
- if they don't have data references in the
- call stmt itself. */
- if (j == n
- && !(op
- && (DECL_P (op)
- || (REFERENCE_CLASS_P (op)
- && get_base_address (op)))))
- continue;
- }
- }
- }
- LOOP_VINFO_DATAREFS (loop_vinfo) = datarefs;
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "not vectorized: loop contains function "
- "calls or data references that cannot "
- "be analyzed\n");
- return false;
- }
- }
- }
-
- LOOP_VINFO_DATAREFS (loop_vinfo) = datarefs;
- }
- else
- {
- gimple_stmt_iterator gsi;
-
- bb = BB_VINFO_BB (bb_vinfo);
- for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
- {
- gimple stmt = gsi_stmt (gsi);
- if (!find_data_references_in_stmt (NULL, stmt,
- &BB_VINFO_DATAREFS (bb_vinfo)))
- {
- /* Mark the rest of the basic-block as unvectorizable. */
- for (; !gsi_end_p (gsi); gsi_next (&gsi))
- {
- stmt = gsi_stmt (gsi);
- STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)) = false;
- }
- break;
- }
- }
-
- datarefs = BB_VINFO_DATAREFS (bb_vinfo);
- }
+ if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
+ loop = LOOP_VINFO_LOOP (loop_vinfo);
/* Go through the data-refs, check that the analysis succeeded. Update
pointer from stmt_vec_info struct to DR and vectype. */
+ vec<data_reference_p> datarefs = vinfo->datarefs;
FOR_EACH_VEC_ELT (datarefs, i, dr)
{
- gimple stmt;
+ gimple *stmt;
stmt_vec_info stmt_info;
tree base, offset, init;
- bool gather = false;
+ enum { SG_NONE, GATHER, SCATTER } gatherscatter = SG_NONE;
bool simd_lane_access = false;
int vf;
= DR_IS_READ (dr)
&& !TREE_THIS_VOLATILE (DR_REF (dr))
&& targetm.vectorize.builtin_gather != NULL;
+ bool maybe_scatter
+ = DR_IS_WRITE (dr)
+ && !TREE_THIS_VOLATILE (DR_REF (dr))
+ && targetm.vectorize.builtin_scatter != NULL;
bool maybe_simd_lane_access
- = loop_vinfo && loop->simduid;
+ = is_a <loop_vec_info> (vinfo) && loop->simduid;
- /* If target supports vector gather loads, or if this might be
- a SIMD lane access, see if they can't be used. */
- if (loop_vinfo
- && (maybe_gather || maybe_simd_lane_access)
+ /* If target supports vector gather loads or scatter stores, or if
+ this might be a SIMD lane access, see if they can't be used. */
+ if (is_a <loop_vec_info> (vinfo)
+ && (maybe_gather || maybe_scatter || maybe_simd_lane_access)
&& !nested_in_vect_loop_p (loop, stmt))
{
struct data_reference *newdr
= create_data_ref (NULL, loop_containing_stmt (stmt),
- DR_REF (dr), stmt, true);
+ DR_REF (dr), stmt, maybe_scatter ? false : true);
gcc_assert (newdr != NULL && DR_REF (newdr));
if (DR_BASE_ADDRESS (newdr)
&& DR_OFFSET (newdr)
off = TREE_OPERAND (off, 0);
if (TREE_CODE (off) == SSA_NAME)
{
- gimple def = SSA_NAME_DEF_STMT (off);
+ gimple *def = SSA_NAME_DEF_STMT (off);
tree reft = TREE_TYPE (DR_REF (newdr));
if (is_gimple_call (def)
&& gimple_call_internal_p (def)
}
}
}
- if (!simd_lane_access && maybe_gather)
+ if (!simd_lane_access && (maybe_gather || maybe_scatter))
{
dr = newdr;
- gather = true;
+ if (maybe_gather)
+ gatherscatter = GATHER;
+ else
+ gatherscatter = SCATTER;
}
}
- if (!gather && !simd_lane_access)
+ if (gatherscatter == SG_NONE && !simd_lane_access)
free_data_ref (newdr);
}
- if (!gather && !simd_lane_access)
+ if (gatherscatter == SG_NONE && !simd_lane_access)
{
if (dump_enabled_p ())
{
dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
}
- if (bb_vinfo)
+ if (is_a <bb_vec_info> (vinfo))
break;
return false;
"not vectorized: base addr of dr is a "
"constant\n");
- if (bb_vinfo)
+ if (is_a <bb_vec_info> (vinfo))
break;
- if (gather || simd_lane_access)
+ if (gatherscatter != SG_NONE || simd_lane_access)
free_data_ref (dr);
return false;
}
dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
}
- if (bb_vinfo)
+ if (is_a <bb_vec_info> (vinfo))
break;
return false;
dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
}
- if (bb_vinfo)
+ if (is_a <bb_vec_info> (vinfo))
break;
- if (gather || simd_lane_access)
+ if (gatherscatter != SG_NONE || simd_lane_access)
free_data_ref (dr);
return false;
}
dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
}
- if (bb_vinfo)
+ if (is_a <bb_vec_info> (vinfo))
break;
- if (gather || simd_lane_access)
+ if (gatherscatter != SG_NONE || simd_lane_access)
free_data_ref (dr);
return false;
}
dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
}
- if (bb_vinfo)
+ if (is_a <bb_vec_info> (vinfo))
break;
- if (gather || simd_lane_access)
+ if (gatherscatter != SG_NONE || simd_lane_access)
free_data_ref (dr);
return false;
}
tree outer_step, outer_base, outer_init;
HOST_WIDE_INT pbitsize, pbitpos;
tree poffset;
- enum machine_mode pmode;
- int punsignedp, pvolatilep;
+ machine_mode pmode;
+ int punsignedp, preversep, pvolatilep;
affine_iv base_iv, offset_iv;
tree dinit;
}
outer_base = get_inner_reference (inner_base, &pbitsize, &pbitpos,
- &poffset, &pmode, &punsignedp, &pvolatilep, false);
+ &poffset, &pmode, &punsignedp,
+ &preversep, &pvolatilep, false);
gcc_assert (outer_base != NULL_TREE);
if (pbitpos % BITS_PER_UNIT != 0)
return false;
}
+ if (preversep)
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "failed: reverse storage order.\n");
+ return false;
+ }
+
outer_base = build_fold_addr_expr (outer_base);
if (!simple_iv (loop, loop_containing_stmt (stmt), outer_base,
&base_iv, false))
dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
}
- if (bb_vinfo)
+ if (is_a <bb_vec_info> (vinfo))
break;
- if (gather || simd_lane_access)
+ if (gatherscatter != SG_NONE || simd_lane_access)
free_data_ref (dr);
return false;
}
dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
}
- if (bb_vinfo)
- break;
+ if (is_a <bb_vec_info> (vinfo))
+ {
+ /* No vector type is fine, the ref can still participate
+ in dependence analysis, we just can't vectorize it. */
+ STMT_VINFO_VECTORIZABLE (stmt_info) = false;
+ continue;
+ }
- if (gather || simd_lane_access)
+ if (gatherscatter != SG_NONE || simd_lane_access)
{
STMT_VINFO_DATA_REF (stmt_info) = NULL;
- if (gather)
+ if (gatherscatter != SG_NONE)
free_data_ref (dr);
}
return false;
if (vf > *min_vf)
*min_vf = vf;
- if (gather)
+ if (gatherscatter != SG_NONE)
{
tree off;
-
- gather = 0 != vect_check_gather (stmt, loop_vinfo, NULL, &off, NULL);
- if (gather
- && get_vectype_for_scalar_type (TREE_TYPE (off)) == NULL_TREE)
- gather = false;
- if (!gather)
+ if (!vect_check_gather_scatter (stmt, as_a <loop_vec_info> (vinfo),
+ NULL, &off, NULL)
+ || get_vectype_for_scalar_type (TREE_TYPE (off)) == NULL_TREE)
{
STMT_VINFO_DATA_REF (stmt_info) = NULL;
free_data_ref (dr);
if (dump_enabled_p ())
{
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "not vectorized: not suitable for gather "
- "load ");
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ (gatherscatter == GATHER) ?
+ "not vectorized: not suitable for gather "
+ "load " :
+ "not vectorized: not suitable for scatter "
+ "store ");
dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
}
return false;
}
+ free_data_ref (datarefs[i]);
datarefs[i] = dr;
- STMT_VINFO_GATHER_P (stmt_info) = true;
+ STMT_VINFO_GATHER_SCATTER_P (stmt_info) = gatherscatter;
}
- else if (loop_vinfo
+
+ else if (is_a <loop_vec_info> (vinfo)
&& TREE_CODE (DR_STEP (dr)) != INTEGER_CST)
{
- if (nested_in_vect_loop_p (loop, stmt)
- || !DR_IS_READ (dr))
+ if (nested_in_vect_loop_p (loop, stmt))
{
if (dump_enabled_p ())
{
}
return false;
}
- STMT_VINFO_STRIDE_LOAD_P (stmt_info) = true;
+ STMT_VINFO_STRIDED_P (stmt_info) = true;
}
}
avoids spending useless time in analyzing their dependence. */
if (i != datarefs.length ())
{
- gcc_assert (bb_vinfo != NULL);
+ gcc_assert (is_a <bb_vec_info> (vinfo));
for (unsigned j = i; j < datarefs.length (); ++j)
{
data_reference_p dr = datarefs[j];
case vect_scalar_var:
prefix = "stmp";
break;
+ case vect_mask_var:
+ prefix = "mask";
+ break;
case vect_pointer_var:
prefix = "vectp";
break;
return new_vect_var;
}
+/* Like vect_get_new_vect_var but return an SSA name. */
+
+tree
+vect_get_new_ssa_name (tree type, enum vect_var_kind var_kind, const char *name)
+{
+ const char *prefix;
+ tree new_vect_var;
+
+ switch (var_kind)
+ {
+ case vect_simple_var:
+ prefix = "vect";
+ break;
+ case vect_scalar_var:
+ prefix = "stmp";
+ break;
+ case vect_pointer_var:
+ prefix = "vectp";
+ break;
+ default:
+ gcc_unreachable ();
+ }
+
+ if (name)
+ {
+ char* tmp = concat (prefix, "_", name, NULL);
+ new_vect_var = make_temp_ssa_name (type, NULL, tmp);
+ free (tmp);
+ }
+ else
+ new_vect_var = make_temp_ssa_name (type, NULL, prefix);
+
+ return new_vect_var;
+}
+
+/* Duplicate ptr info and set alignment/misaligment on NAME from DR. */
+
+static void
+vect_duplicate_ssa_name_ptr_info (tree name, data_reference *dr,
+ stmt_vec_info stmt_info)
+{
+ duplicate_ssa_name_ptr_info (name, DR_PTR_INFO (dr));
+ unsigned int align = TYPE_ALIGN_UNIT (STMT_VINFO_VECTYPE (stmt_info));
+ int misalign = DR_MISALIGNMENT (dr);
+ if (misalign == -1)
+ mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (name));
+ else
+ set_ptr_info_alignment (SSA_NAME_PTR_INFO (name), align, misalign);
+}
/* Function vect_create_addr_base_for_vector_ref.
is as follows:
if LOOP=i_loop: &in (relative to i_loop)
if LOOP=j_loop: &in+i*2B (relative to j_loop)
+ BYTE_OFFSET: Optional, defaulted to NULL. If supplied, it is added to the
+ initial address. Unlike OFFSET, which is number of elements to
+ be added, BYTE_OFFSET is measured in bytes.
Output:
1. Return an SSA_NAME whose value is the address of the memory location of
FORNOW: We are only handling array accesses with step 1. */
tree
-vect_create_addr_base_for_vector_ref (gimple stmt,
+vect_create_addr_base_for_vector_ref (gimple *stmt,
gimple_seq *new_stmt_list,
tree offset,
- struct loop *loop)
+ struct loop *loop,
+ tree byte_offset)
{
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
base_offset = fold_build2 (PLUS_EXPR, sizetype,
base_offset, offset);
}
+ if (byte_offset)
+ {
+ byte_offset = fold_convert (sizetype, byte_offset);
+ base_offset = fold_build2 (PLUS_EXPR, sizetype,
+ base_offset, byte_offset);
+ }
/* base + base_offset */
if (loop_vinfo)
}
vect_ptr_type = build_pointer_type (STMT_VINFO_VECTYPE (stmt_info));
- addr_base = fold_convert (vect_ptr_type, addr_base);
dest = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var, base_name);
- addr_base = force_gimple_operand (addr_base, &seq, false, dest);
+ addr_base = force_gimple_operand (addr_base, &seq, true, dest);
gimple_seq_add_seq (new_stmt_list, seq);
if (DR_PTR_INFO (dr)
- && TREE_CODE (addr_base) == SSA_NAME)
+ && TREE_CODE (addr_base) == SSA_NAME
+ && !SSA_NAME_PTR_INFO (addr_base))
{
- duplicate_ssa_name_ptr_info (addr_base, DR_PTR_INFO (dr));
- if (offset)
+ vect_duplicate_ssa_name_ptr_info (addr_base, dr, stmt_info);
+ if (offset || byte_offset)
mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (addr_base));
}
5. BSI: location where the new stmts are to be placed if there is no loop
6. ONLY_INIT: indicate if ap is to be updated in the loop, or remain
pointing to the initial address.
+ 7. BYTE_OFFSET (optional, defaults to NULL): a byte offset to be added
+ to the initial address accessed by the data-ref in STMT. This is
+ similar to OFFSET, but OFFSET is counted in elements, while BYTE_OFFSET
+ in bytes.
Output:
1. Declare a new ptr to vector_type, and have it point to the base of the
initial_address = &a[init];
if OFFSET is supplied:
initial_address = &a[init + OFFSET];
+ if BYTE_OFFSET is supplied:
+ initial_address = &a[init] + BYTE_OFFSET;
Return the initial_address in INITIAL_ADDRESS.
4. Return the pointer. */
tree
-vect_create_data_ref_ptr (gimple stmt, tree aggr_type, struct loop *at_loop,
+vect_create_data_ref_ptr (gimple *stmt, tree aggr_type, struct loop *at_loop,
tree offset, tree *initial_address,
- gimple_stmt_iterator *gsi, gimple *ptr_incr,
- bool only_init, bool *inv_p)
+ gimple_stmt_iterator *gsi, gimple **ptr_incr,
+ bool only_init, bool *inv_p, tree byte_offset)
{
const char *base_name;
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
tree aggr_ptr_type;
tree aggr_ptr;
tree new_temp;
- gimple vec_stmt;
gimple_seq new_stmt_list = NULL;
edge pe = NULL;
basic_block new_bb;
gimple_stmt_iterator incr_gsi;
bool insert_after;
tree indx_before_incr, indx_after_incr;
- gimple incr;
+ gimple *incr;
tree step;
bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
/* Likewise for any of the data references in the stmt group. */
else if (STMT_VINFO_GROUP_SIZE (stmt_info) > 1)
{
- gimple orig_stmt = STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_info);
+ gimple *orig_stmt = STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_info);
do
{
stmt_vec_info sinfo = vinfo_for_stmt (orig_stmt);
/* (2) Calculate the initial address of the aggregate-pointer, and set
the aggregate-pointer to point to it before the loop. */
- /* Create: (&(base[init_val+offset]) in the loop preheader. */
+ /* Create: (&(base[init_val+offset]+byte_offset) in the loop preheader. */
new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
- offset, loop);
+ offset, loop, byte_offset);
if (new_stmt_list)
{
if (pe)
}
*initial_address = new_temp;
-
- /* Create: p = (aggr_type *) initial_base */
- if (TREE_CODE (new_temp) != SSA_NAME
- || !useless_type_conversion_p (aggr_ptr_type, TREE_TYPE (new_temp)))
- {
- vec_stmt = gimple_build_assign (aggr_ptr,
- fold_convert (aggr_ptr_type, new_temp));
- aggr_ptr_init = make_ssa_name (aggr_ptr, vec_stmt);
- /* Copy the points-to information if it exists. */
- if (DR_PTR_INFO (dr))
- duplicate_ssa_name_ptr_info (aggr_ptr_init, DR_PTR_INFO (dr));
- gimple_assign_set_lhs (vec_stmt, aggr_ptr_init);
- if (pe)
- {
- new_bb = gsi_insert_on_edge_immediate (pe, vec_stmt);
- gcc_assert (!new_bb);
- }
- else
- gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
- }
- else
- aggr_ptr_init = new_temp;
+ aggr_ptr_init = new_temp;
/* (3) Handle the updating of the aggregate-pointer inside the loop.
This is needed when ONLY_INIT is false, and also when AT_LOOP is the
aggr_ptr, loop, &incr_gsi, insert_after,
&indx_before_incr, &indx_after_incr);
incr = gsi_stmt (incr_gsi);
- set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
+ set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
/* Copy the points-to information if it exists. */
if (DR_PTR_INFO (dr))
{
- duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
- duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
+ vect_duplicate_ssa_name_ptr_info (indx_before_incr, dr, stmt_info);
+ vect_duplicate_ssa_name_ptr_info (indx_after_incr, dr, stmt_info);
}
if (ptr_incr)
*ptr_incr = incr;
containing_loop, &incr_gsi, insert_after, &indx_before_incr,
&indx_after_incr);
incr = gsi_stmt (incr_gsi);
- set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
+ set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
/* Copy the points-to information if it exists. */
if (DR_PTR_INFO (dr))
{
- duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
- duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
+ vect_duplicate_ssa_name_ptr_info (indx_before_incr, dr, stmt_info);
+ vect_duplicate_ssa_name_ptr_info (indx_after_incr, dr, stmt_info);
}
if (ptr_incr)
*ptr_incr = incr;
*/
tree
-bump_vector_ptr (tree dataref_ptr, gimple ptr_incr, gimple_stmt_iterator *gsi,
- gimple stmt, tree bump)
+bump_vector_ptr (tree dataref_ptr, gimple *ptr_incr, gimple_stmt_iterator *gsi,
+ gimple *stmt, tree bump)
{
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
tree vectype = STMT_VINFO_VECTYPE (stmt_info);
tree update = TYPE_SIZE_UNIT (vectype);
- gimple incr_stmt;
+ gassign *incr_stmt;
ssa_op_iter iter;
use_operand_p use_p;
tree new_dataref_ptr;
if (bump)
update = bump;
- new_dataref_ptr = copy_ssa_name (dataref_ptr, NULL);
- incr_stmt = gimple_build_assign_with_ops (POINTER_PLUS_EXPR, new_dataref_ptr,
- dataref_ptr, update);
+ if (TREE_CODE (dataref_ptr) == SSA_NAME)
+ new_dataref_ptr = copy_ssa_name (dataref_ptr);
+ else
+ new_dataref_ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
+ incr_stmt = gimple_build_assign (new_dataref_ptr, POINTER_PLUS_EXPR,
+ dataref_ptr, update);
vect_finish_stmt_generation (stmt, incr_stmt, gsi);
/* Copy the points-to information if it exists. */
tree type;
enum vect_var_kind kind;
- kind = vectype ? vect_simple_var : vect_scalar_var;
+ kind = vectype
+ ? VECTOR_BOOLEAN_TYPE_P (vectype)
+ ? vect_mask_var
+ : vect_simple_var
+ : vect_scalar_var;
type = vectype ? vectype : TREE_TYPE (scalar_dest);
gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
name = get_name (scalar_dest);
if (name)
- asprintf (&new_name, "%s_%u", name, SSA_NAME_VERSION (scalar_dest));
+ new_name = xasprintf ("%s_%u", name, SSA_NAME_VERSION (scalar_dest));
else
- asprintf (&new_name, "_%u", SSA_NAME_VERSION (scalar_dest));
+ new_name = xasprintf ("_%u", SSA_NAME_VERSION (scalar_dest));
vec_dest = vect_get_new_vect_var (type, kind, new_name);
free (new_name);
bool
vect_grouped_store_supported (tree vectype, unsigned HOST_WIDE_INT count)
{
- enum machine_mode mode = TYPE_MODE (vectype);
+ machine_mode mode = TYPE_MODE (vectype);
- /* vect_permute_store_chain requires the group size to be a power of two. */
- if (exact_log2 (count) == -1)
+ /* vect_permute_store_chain requires the group size to be equal to 3 or
+ be a power of two. */
+ if (count != 3 && exact_log2 (count) == -1)
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "the size of the group of accesses"
- " is not a power of 2\n");
+ "the size of the group of accesses"
+ " is not a power of 2 or not eqaul to 3\n");
return false;
}
{
unsigned int i, nelt = GET_MODE_NUNITS (mode);
unsigned char *sel = XALLOCAVEC (unsigned char, nelt);
- for (i = 0; i < nelt / 2; i++)
+
+ if (count == 3)
{
- sel[i * 2] = i;
- sel[i * 2 + 1] = i + nelt;
+ unsigned int j0 = 0, j1 = 0, j2 = 0;
+ unsigned int i, j;
+
+ for (j = 0; j < 3; j++)
+ {
+ int nelt0 = ((3 - j) * nelt) % 3;
+ int nelt1 = ((3 - j) * nelt + 1) % 3;
+ int nelt2 = ((3 - j) * nelt + 2) % 3;
+ for (i = 0; i < nelt; i++)
+ {
+ if (3 * i + nelt0 < nelt)
+ sel[3 * i + nelt0] = j0++;
+ if (3 * i + nelt1 < nelt)
+ sel[3 * i + nelt1] = nelt + j1++;
+ if (3 * i + nelt2 < nelt)
+ sel[3 * i + nelt2] = 0;
+ }
+ if (!can_vec_perm_p (mode, false, sel))
+ {
+ if (dump_enabled_p ())
+ dump_printf (MSG_MISSED_OPTIMIZATION,
+ "permutaion op not supported by target.\n");
+ return false;
+ }
+
+ for (i = 0; i < nelt; i++)
+ {
+ if (3 * i + nelt0 < nelt)
+ sel[3 * i + nelt0] = 3 * i + nelt0;
+ if (3 * i + nelt1 < nelt)
+ sel[3 * i + nelt1] = 3 * i + nelt1;
+ if (3 * i + nelt2 < nelt)
+ sel[3 * i + nelt2] = nelt + j2++;
+ }
+ if (!can_vec_perm_p (mode, false, sel))
+ {
+ if (dump_enabled_p ())
+ dump_printf (MSG_MISSED_OPTIMIZATION,
+ "permutaion op not supported by target.\n");
+ return false;
+ }
+ }
+ return true;
}
- if (can_vec_perm_p (mode, false, sel))
+ else
{
- for (i = 0; i < nelt; i++)
- sel[i] += nelt / 2;
- if (can_vec_perm_p (mode, false, sel))
- return true;
+ /* If length is not equal to 3 then only power of 2 is supported. */
+ gcc_assert (exact_log2 (count) != -1);
+
+ for (i = 0; i < nelt / 2; i++)
+ {
+ sel[i * 2] = i;
+ sel[i * 2 + 1] = i + nelt;
+ }
+ if (can_vec_perm_p (mode, false, sel))
+ {
+ for (i = 0; i < nelt; i++)
+ sel[i] += nelt / 2;
+ if (can_vec_perm_p (mode, false, sel))
+ return true;
+ }
}
}
if (dump_enabled_p ())
dump_printf (MSG_MISSED_OPTIMIZATION,
- "interleave op not supported by target.\n");
+ "permutaion op not supported by target.\n");
return false;
}
/* Function vect_permute_store_chain.
Given a chain of interleaved stores in DR_CHAIN of LENGTH that must be
- a power of 2, generate interleave_high/low stmts to reorder the data
- correctly for the stores. Return the final references for stores in
- RESULT_CHAIN.
+ a power of 2 or equal to 3, generate interleave_high/low stmts to reorder
+ the data correctly for the stores. Return the final references for stores
+ in RESULT_CHAIN.
E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
The input is 4 vectors each containing 8 elements. We assign a number to
void
vect_permute_store_chain (vec<tree> dr_chain,
unsigned int length,
- gimple stmt,
+ gimple *stmt,
gimple_stmt_iterator *gsi,
vec<tree> *result_chain)
{
tree vect1, vect2, high, low;
- gimple perm_stmt;
+ gimple *perm_stmt;
tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
tree perm_mask_low, perm_mask_high;
- unsigned int i, n;
+ tree data_ref;
+ tree perm3_mask_low, perm3_mask_high;
+ unsigned int i, n, log_length = exact_log2 (length);
unsigned int j, nelt = TYPE_VECTOR_SUBPARTS (vectype);
unsigned char *sel = XALLOCAVEC (unsigned char, nelt);
memcpy (result_chain->address (), dr_chain.address (),
length * sizeof (tree));
- for (i = 0, n = nelt / 2; i < n; i++)
+ if (length == 3)
{
- sel[i * 2] = i;
- sel[i * 2 + 1] = i + nelt;
- }
- perm_mask_high = vect_gen_perm_mask (vectype, sel);
- gcc_assert (perm_mask_high != NULL);
+ unsigned int j0 = 0, j1 = 0, j2 = 0;
- for (i = 0; i < nelt; i++)
- sel[i] += nelt / 2;
- perm_mask_low = vect_gen_perm_mask (vectype, sel);
- gcc_assert (perm_mask_low != NULL);
+ for (j = 0; j < 3; j++)
+ {
+ int nelt0 = ((3 - j) * nelt) % 3;
+ int nelt1 = ((3 - j) * nelt + 1) % 3;
+ int nelt2 = ((3 - j) * nelt + 2) % 3;
- for (i = 0, n = exact_log2 (length); i < n; i++)
- {
- for (j = 0; j < length/2; j++)
- {
- vect1 = dr_chain[j];
- vect2 = dr_chain[j+length/2];
+ for (i = 0; i < nelt; i++)
+ {
+ if (3 * i + nelt0 < nelt)
+ sel[3 * i + nelt0] = j0++;
+ if (3 * i + nelt1 < nelt)
+ sel[3 * i + nelt1] = nelt + j1++;
+ if (3 * i + nelt2 < nelt)
+ sel[3 * i + nelt2] = 0;
+ }
+ perm3_mask_low = vect_gen_perm_mask_checked (vectype, sel);
+
+ for (i = 0; i < nelt; i++)
+ {
+ if (3 * i + nelt0 < nelt)
+ sel[3 * i + nelt0] = 3 * i + nelt0;
+ if (3 * i + nelt1 < nelt)
+ sel[3 * i + nelt1] = 3 * i + nelt1;
+ if (3 * i + nelt2 < nelt)
+ sel[3 * i + nelt2] = nelt + j2++;
+ }
+ perm3_mask_high = vect_gen_perm_mask_checked (vectype, sel);
+
+ vect1 = dr_chain[0];
+ vect2 = dr_chain[1];
/* Create interleaving stmt:
- high = VEC_PERM_EXPR <vect1, vect2, {0, nelt, 1, nelt+1, ...}> */
- high = make_temp_ssa_name (vectype, NULL, "vect_inter_high");
- perm_stmt
- = gimple_build_assign_with_ops (VEC_PERM_EXPR, high,
- vect1, vect2, perm_mask_high);
+ low = VEC_PERM_EXPR <vect1, vect2,
+ {j, nelt, *, j + 1, nelt + j + 1, *,
+ j + 2, nelt + j + 2, *, ...}> */
+ data_ref = make_temp_ssa_name (vectype, NULL, "vect_shuffle3_low");
+ perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, vect1,
+ vect2, perm3_mask_low);
vect_finish_stmt_generation (stmt, perm_stmt, gsi);
- (*result_chain)[2*j] = high;
+ vect1 = data_ref;
+ vect2 = dr_chain[2];
/* Create interleaving stmt:
- low = VEC_PERM_EXPR <vect1, vect2, {nelt/2, nelt*3/2, nelt/2+1,
- nelt*3/2+1, ...}> */
- low = make_temp_ssa_name (vectype, NULL, "vect_inter_low");
- perm_stmt
- = gimple_build_assign_with_ops (VEC_PERM_EXPR, low,
- vect1, vect2, perm_mask_low);
+ low = VEC_PERM_EXPR <vect1, vect2,
+ {0, 1, nelt + j, 3, 4, nelt + j + 1,
+ 6, 7, nelt + j + 2, ...}> */
+ data_ref = make_temp_ssa_name (vectype, NULL, "vect_shuffle3_high");
+ perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, vect1,
+ vect2, perm3_mask_high);
vect_finish_stmt_generation (stmt, perm_stmt, gsi);
- (*result_chain)[2*j+1] = low;
+ (*result_chain)[j] = data_ref;
+ }
+ }
+ else
+ {
+ /* If length is not equal to 3 then only power of 2 is supported. */
+ gcc_assert (exact_log2 (length) != -1);
+
+ for (i = 0, n = nelt / 2; i < n; i++)
+ {
+ sel[i * 2] = i;
+ sel[i * 2 + 1] = i + nelt;
}
- memcpy (dr_chain.address (), result_chain->address (),
- length * sizeof (tree));
+ perm_mask_high = vect_gen_perm_mask_checked (vectype, sel);
+
+ for (i = 0; i < nelt; i++)
+ sel[i] += nelt / 2;
+ perm_mask_low = vect_gen_perm_mask_checked (vectype, sel);
+
+ for (i = 0, n = log_length; i < n; i++)
+ {
+ for (j = 0; j < length/2; j++)
+ {
+ vect1 = dr_chain[j];
+ vect2 = dr_chain[j+length/2];
+
+ /* Create interleaving stmt:
+ high = VEC_PERM_EXPR <vect1, vect2, {0, nelt, 1, nelt+1,
+ ...}> */
+ high = make_temp_ssa_name (vectype, NULL, "vect_inter_high");
+ perm_stmt = gimple_build_assign (high, VEC_PERM_EXPR, vect1,
+ vect2, perm_mask_high);
+ vect_finish_stmt_generation (stmt, perm_stmt, gsi);
+ (*result_chain)[2*j] = high;
+
+ /* Create interleaving stmt:
+ low = VEC_PERM_EXPR <vect1, vect2,
+ {nelt/2, nelt*3/2, nelt/2+1, nelt*3/2+1,
+ ...}> */
+ low = make_temp_ssa_name (vectype, NULL, "vect_inter_low");
+ perm_stmt = gimple_build_assign (low, VEC_PERM_EXPR, vect1,
+ vect2, perm_mask_low);
+ vect_finish_stmt_generation (stmt, perm_stmt, gsi);
+ (*result_chain)[2*j+1] = low;
+ }
+ memcpy (dr_chain.address (), result_chain->address (),
+ length * sizeof (tree));
+ }
}
}
Return value - the result of the loop-header phi node. */
tree
-vect_setup_realignment (gimple stmt, gimple_stmt_iterator *gsi,
+vect_setup_realignment (gimple *stmt, gimple_stmt_iterator *gsi,
tree *realignment_token,
enum dr_alignment_support alignment_support_scheme,
tree init_addr,
edge pe = NULL;
tree scalar_dest = gimple_assign_lhs (stmt);
tree vec_dest;
- gimple inc;
+ gimple *inc;
tree ptr;
tree data_ref;
- gimple new_stmt;
basic_block new_bb;
tree msq_init = NULL_TREE;
tree new_temp;
- gimple phi_stmt;
+ gphi *phi_stmt;
tree msq = NULL_TREE;
gimple_seq stmts = NULL;
bool inv_p;
if (alignment_support_scheme == dr_explicit_realign_optimized)
{
/* Create msq_init = *(floor(p1)) in the loop preheader */
+ gassign *new_stmt;
gcc_assert (!compute_in_loop);
vec_dest = vect_create_destination_var (scalar_dest, vectype);
ptr = vect_create_data_ref_ptr (stmt, vectype, loop_for_initial_load,
NULL_TREE, &init_addr, NULL, &inc,
true, &inv_p);
- new_temp = copy_ssa_name (ptr, NULL);
- new_stmt = gimple_build_assign_with_ops
- (BIT_AND_EXPR, new_temp, ptr,
+ if (TREE_CODE (ptr) == SSA_NAME)
+ new_temp = copy_ssa_name (ptr);
+ else
+ new_temp = make_ssa_name (TREE_TYPE (ptr));
+ new_stmt = gimple_build_assign
+ (new_temp, BIT_AND_EXPR, ptr,
build_int_cst (TREE_TYPE (ptr),
-(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
if (targetm.vectorize.builtin_mask_for_load)
{
+ gcall *new_stmt;
tree builtin_decl;
/* Compute INIT_ADDR - the initial addressed accessed by this memref. */
pe = loop_preheader_edge (containing_loop);
vec_dest = vect_create_destination_var (scalar_dest, vectype);
- msq = make_ssa_name (vec_dest, NULL);
+ msq = make_ssa_name (vec_dest);
phi_stmt = create_phi_node (msq, containing_loop->header);
add_phi_arg (phi_stmt, msq_init, pe, UNKNOWN_LOCATION);
bool
vect_grouped_load_supported (tree vectype, unsigned HOST_WIDE_INT count)
{
- enum machine_mode mode = TYPE_MODE (vectype);
+ machine_mode mode = TYPE_MODE (vectype);
- /* vect_permute_load_chain requires the group size to be a power of two. */
- if (exact_log2 (count) == -1)
+ /* vect_permute_load_chain requires the group size to be equal to 3 or
+ be a power of two. */
+ if (count != 3 && exact_log2 (count) == -1)
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "the size of the group of accesses"
- " is not a power of 2\n");
+ "the size of the group of accesses"
+ " is not a power of 2 or not equal to 3\n");
return false;
}
/* Check that the permutation is supported. */
if (VECTOR_MODE_P (mode))
{
- unsigned int i, nelt = GET_MODE_NUNITS (mode);
+ unsigned int i, j, nelt = GET_MODE_NUNITS (mode);
unsigned char *sel = XALLOCAVEC (unsigned char, nelt);
- for (i = 0; i < nelt; i++)
- sel[i] = i * 2;
- if (can_vec_perm_p (mode, false, sel))
+ if (count == 3)
{
+ unsigned int k;
+ for (k = 0; k < 3; k++)
+ {
+ for (i = 0; i < nelt; i++)
+ if (3 * i + k < 2 * nelt)
+ sel[i] = 3 * i + k;
+ else
+ sel[i] = 0;
+ if (!can_vec_perm_p (mode, false, sel))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "shuffle of 3 loads is not supported by"
+ " target\n");
+ return false;
+ }
+ for (i = 0, j = 0; i < nelt; i++)
+ if (3 * i + k < 2 * nelt)
+ sel[i] = i;
+ else
+ sel[i] = nelt + ((nelt + k) % 3) + 3 * (j++);
+ if (!can_vec_perm_p (mode, false, sel))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "shuffle of 3 loads is not supported by"
+ " target\n");
+ return false;
+ }
+ }
+ return true;
+ }
+ else
+ {
+ /* If length is not equal to 3 then only power of 2 is supported. */
+ gcc_assert (exact_log2 (count) != -1);
for (i = 0; i < nelt; i++)
- sel[i] = i * 2 + 1;
+ sel[i] = i * 2;
if (can_vec_perm_p (mode, false, sel))
- return true;
- }
+ {
+ for (i = 0; i < nelt; i++)
+ sel[i] = i * 2 + 1;
+ if (can_vec_perm_p (mode, false, sel))
+ return true;
+ }
+ }
}
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "extract even/odd not supported by target\n");
+ "extract even/odd not supported by target\n");
return false;
}
/* Function vect_permute_load_chain.
Given a chain of interleaved loads in DR_CHAIN of LENGTH that must be
- a power of 2, generate extract_even/odd stmts to reorder the input data
- correctly. Return the final references for loads in RESULT_CHAIN.
+ a power of 2 or equal to 3, generate extract_even/odd stmts to reorder
+ the input data correctly. Return the final references for loads in
+ RESULT_CHAIN.
E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
The input is 4 vectors each containing 8 elements. We assign a number to each
static void
vect_permute_load_chain (vec<tree> dr_chain,
unsigned int length,
- gimple stmt,
+ gimple *stmt,
gimple_stmt_iterator *gsi,
vec<tree> *result_chain)
{
tree data_ref, first_vect, second_vect;
tree perm_mask_even, perm_mask_odd;
- gimple perm_stmt;
+ tree perm3_mask_low, perm3_mask_high;
+ gimple *perm_stmt;
tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
unsigned int i, j, log_length = exact_log2 (length);
unsigned nelt = TYPE_VECTOR_SUBPARTS (vectype);
memcpy (result_chain->address (), dr_chain.address (),
length * sizeof (tree));
- for (i = 0; i < nelt; ++i)
- sel[i] = i * 2;
- perm_mask_even = vect_gen_perm_mask (vectype, sel);
- gcc_assert (perm_mask_even != NULL);
+ if (length == 3)
+ {
+ unsigned int k;
+
+ for (k = 0; k < 3; k++)
+ {
+ for (i = 0; i < nelt; i++)
+ if (3 * i + k < 2 * nelt)
+ sel[i] = 3 * i + k;
+ else
+ sel[i] = 0;
+ perm3_mask_low = vect_gen_perm_mask_checked (vectype, sel);
+
+ for (i = 0, j = 0; i < nelt; i++)
+ if (3 * i + k < 2 * nelt)
+ sel[i] = i;
+ else
+ sel[i] = nelt + ((nelt + k) % 3) + 3 * (j++);
+
+ perm3_mask_high = vect_gen_perm_mask_checked (vectype, sel);
+
+ first_vect = dr_chain[0];
+ second_vect = dr_chain[1];
+
+ /* Create interleaving stmt (low part of):
+ low = VEC_PERM_EXPR <first_vect, second_vect2, {k, 3 + k, 6 + k,
+ ...}> */
+ data_ref = make_temp_ssa_name (vectype, NULL, "vect_shuffle3_low");
+ perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, first_vect,
+ second_vect, perm3_mask_low);
+ vect_finish_stmt_generation (stmt, perm_stmt, gsi);
+
+ /* Create interleaving stmt (high part of):
+ high = VEC_PERM_EXPR <first_vect, second_vect2, {k, 3 + k, 6 + k,
+ ...}> */
+ first_vect = data_ref;
+ second_vect = dr_chain[2];
+ data_ref = make_temp_ssa_name (vectype, NULL, "vect_shuffle3_high");
+ perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, first_vect,
+ second_vect, perm3_mask_high);
+ vect_finish_stmt_generation (stmt, perm_stmt, gsi);
+ (*result_chain)[k] = data_ref;
+ }
+ }
+ else
+ {
+ /* If length is not equal to 3 then only power of 2 is supported. */
+ gcc_assert (exact_log2 (length) != -1);
+
+ for (i = 0; i < nelt; ++i)
+ sel[i] = i * 2;
+ perm_mask_even = vect_gen_perm_mask_checked (vectype, sel);
+
+ for (i = 0; i < nelt; ++i)
+ sel[i] = i * 2 + 1;
+ perm_mask_odd = vect_gen_perm_mask_checked (vectype, sel);
+
+ for (i = 0; i < log_length; i++)
+ {
+ for (j = 0; j < length; j += 2)
+ {
+ first_vect = dr_chain[j];
+ second_vect = dr_chain[j+1];
+
+ /* data_ref = permute_even (first_data_ref, second_data_ref); */
+ data_ref = make_temp_ssa_name (vectype, NULL, "vect_perm_even");
+ perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR,
+ first_vect, second_vect,
+ perm_mask_even);
+ vect_finish_stmt_generation (stmt, perm_stmt, gsi);
+ (*result_chain)[j/2] = data_ref;
+
+ /* data_ref = permute_odd (first_data_ref, second_data_ref); */
+ data_ref = make_temp_ssa_name (vectype, NULL, "vect_perm_odd");
+ perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR,
+ first_vect, second_vect,
+ perm_mask_odd);
+ vect_finish_stmt_generation (stmt, perm_stmt, gsi);
+ (*result_chain)[j/2+length/2] = data_ref;
+ }
+ memcpy (dr_chain.address (), result_chain->address (),
+ length * sizeof (tree));
+ }
+ }
+}
+
+/* Function vect_shift_permute_load_chain.
+
+ Given a chain of loads in DR_CHAIN of LENGTH 2 or 3, generate
+ sequence of stmts to reorder the input data accordingly.
+ Return the final references for loads in RESULT_CHAIN.
+ Return true if successed, false otherwise.
+
+ E.g., LENGTH is 3 and the scalar type is short, i.e., VF is 8.
+ The input is 3 vectors each containing 8 elements. We assign a
+ number to each element, the input sequence is:
+
+ 1st vec: 0 1 2 3 4 5 6 7
+ 2nd vec: 8 9 10 11 12 13 14 15
+ 3rd vec: 16 17 18 19 20 21 22 23
+
+ The output sequence should be:
+
+ 1st vec: 0 3 6 9 12 15 18 21
+ 2nd vec: 1 4 7 10 13 16 19 22
+ 3rd vec: 2 5 8 11 14 17 20 23
+
+ We use 3 shuffle instructions and 3 * 3 - 1 shifts to create such output.
+
+ First we shuffle all 3 vectors to get correct elements order:
+
+ 1st vec: ( 0 3 6) ( 1 4 7) ( 2 5)
+ 2nd vec: ( 8 11 14) ( 9 12 15) (10 13)
+ 3rd vec: (16 19 22) (17 20 23) (18 21)
+
+ Next we unite and shift vector 3 times:
+
+ 1st step:
+ shift right by 6 the concatenation of:
+ "1st vec" and "2nd vec"
+ ( 0 3 6) ( 1 4 7) |( 2 5) _ ( 8 11 14) ( 9 12 15)| (10 13)
+ "2nd vec" and "3rd vec"
+ ( 8 11 14) ( 9 12 15) |(10 13) _ (16 19 22) (17 20 23)| (18 21)
+ "3rd vec" and "1st vec"
+ (16 19 22) (17 20 23) |(18 21) _ ( 0 3 6) ( 1 4 7)| ( 2 5)
+ | New vectors |
+
+ So that now new vectors are:
+
+ 1st vec: ( 2 5) ( 8 11 14) ( 9 12 15)
+ 2nd vec: (10 13) (16 19 22) (17 20 23)
+ 3rd vec: (18 21) ( 0 3 6) ( 1 4 7)
+
+ 2nd step:
+ shift right by 5 the concatenation of:
+ "1st vec" and "3rd vec"
+ ( 2 5) ( 8 11 14) |( 9 12 15) _ (18 21) ( 0 3 6)| ( 1 4 7)
+ "2nd vec" and "1st vec"
+ (10 13) (16 19 22) |(17 20 23) _ ( 2 5) ( 8 11 14)| ( 9 12 15)
+ "3rd vec" and "2nd vec"
+ (18 21) ( 0 3 6) |( 1 4 7) _ (10 13) (16 19 22)| (17 20 23)
+ | New vectors |
+
+ So that now new vectors are:
+
+ 1st vec: ( 9 12 15) (18 21) ( 0 3 6)
+ 2nd vec: (17 20 23) ( 2 5) ( 8 11 14)
+ 3rd vec: ( 1 4 7) (10 13) (16 19 22) READY
+
+ 3rd step:
+ shift right by 5 the concatenation of:
+ "1st vec" and "1st vec"
+ ( 9 12 15) (18 21) |( 0 3 6) _ ( 9 12 15) (18 21)| ( 0 3 6)
+ shift right by 3 the concatenation of:
+ "2nd vec" and "2nd vec"
+ (17 20 23) |( 2 5) ( 8 11 14) _ (17 20 23)| ( 2 5) ( 8 11 14)
+ | New vectors |
+
+ So that now all vectors are READY:
+ 1st vec: ( 0 3 6) ( 9 12 15) (18 21)
+ 2nd vec: ( 2 5) ( 8 11 14) (17 20 23)
+ 3rd vec: ( 1 4 7) (10 13) (16 19 22)
+
+ This algorithm is faster than one in vect_permute_load_chain if:
+ 1. "shift of a concatination" is faster than general permutation.
+ This is usually so.
+ 2. The TARGET machine can't execute vector instructions in parallel.
+ This is because each step of the algorithm depends on previous.
+ The algorithm in vect_permute_load_chain is much more parallel.
+
+ The algorithm is applicable only for LOAD CHAIN LENGTH less than VF.
+*/
+
+static bool
+vect_shift_permute_load_chain (vec<tree> dr_chain,
+ unsigned int length,
+ gimple *stmt,
+ gimple_stmt_iterator *gsi,
+ vec<tree> *result_chain)
+{
+ tree vect[3], vect_shift[3], data_ref, first_vect, second_vect;
+ tree perm2_mask1, perm2_mask2, perm3_mask;
+ tree select_mask, shift1_mask, shift2_mask, shift3_mask, shift4_mask;
+ gimple *perm_stmt;
+
+ tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
+ unsigned int i;
+ unsigned nelt = TYPE_VECTOR_SUBPARTS (vectype);
+ unsigned char *sel = XALLOCAVEC (unsigned char, nelt);
+ stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
+ loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
+
+ result_chain->quick_grow (length);
+ memcpy (result_chain->address (), dr_chain.address (),
+ length * sizeof (tree));
+
+ if (exact_log2 (length) != -1 && LOOP_VINFO_VECT_FACTOR (loop_vinfo) > 4)
+ {
+ unsigned int j, log_length = exact_log2 (length);
+ for (i = 0; i < nelt / 2; ++i)
+ sel[i] = i * 2;
+ for (i = 0; i < nelt / 2; ++i)
+ sel[nelt / 2 + i] = i * 2 + 1;
+ if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "shuffle of 2 fields structure is not \
+ supported by target\n");
+ return false;
+ }
+ perm2_mask1 = vect_gen_perm_mask_checked (vectype, sel);
- for (i = 0; i < nelt; ++i)
- sel[i] = i * 2 + 1;
- perm_mask_odd = vect_gen_perm_mask (vectype, sel);
- gcc_assert (perm_mask_odd != NULL);
+ for (i = 0; i < nelt / 2; ++i)
+ sel[i] = i * 2 + 1;
+ for (i = 0; i < nelt / 2; ++i)
+ sel[nelt / 2 + i] = i * 2;
+ if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "shuffle of 2 fields structure is not \
+ supported by target\n");
+ return false;
+ }
+ perm2_mask2 = vect_gen_perm_mask_checked (vectype, sel);
- for (i = 0; i < log_length; i++)
+ /* Generating permutation constant to shift all elements.
+ For vector length 8 it is {4 5 6 7 8 9 10 11}. */
+ for (i = 0; i < nelt; i++)
+ sel[i] = nelt / 2 + i;
+ if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "shift permutation is not supported by target\n");
+ return false;
+ }
+ shift1_mask = vect_gen_perm_mask_checked (vectype, sel);
+
+ /* Generating permutation constant to select vector from 2.
+ For vector length 8 it is {0 1 2 3 12 13 14 15}. */
+ for (i = 0; i < nelt / 2; i++)
+ sel[i] = i;
+ for (i = nelt / 2; i < nelt; i++)
+ sel[i] = nelt + i;
+ if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "select is not supported by target\n");
+ return false;
+ }
+ select_mask = vect_gen_perm_mask_checked (vectype, sel);
+
+ for (i = 0; i < log_length; i++)
+ {
+ for (j = 0; j < length; j += 2)
+ {
+ first_vect = dr_chain[j];
+ second_vect = dr_chain[j + 1];
+
+ data_ref = make_temp_ssa_name (vectype, NULL, "vect_shuffle2");
+ perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR,
+ first_vect, first_vect,
+ perm2_mask1);
+ vect_finish_stmt_generation (stmt, perm_stmt, gsi);
+ vect[0] = data_ref;
+
+ data_ref = make_temp_ssa_name (vectype, NULL, "vect_shuffle2");
+ perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR,
+ second_vect, second_vect,
+ perm2_mask2);
+ vect_finish_stmt_generation (stmt, perm_stmt, gsi);
+ vect[1] = data_ref;
+
+ data_ref = make_temp_ssa_name (vectype, NULL, "vect_shift");
+ perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR,
+ vect[0], vect[1], shift1_mask);
+ vect_finish_stmt_generation (stmt, perm_stmt, gsi);
+ (*result_chain)[j/2 + length/2] = data_ref;
+
+ data_ref = make_temp_ssa_name (vectype, NULL, "vect_select");
+ perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR,
+ vect[0], vect[1], select_mask);
+ vect_finish_stmt_generation (stmt, perm_stmt, gsi);
+ (*result_chain)[j/2] = data_ref;
+ }
+ memcpy (dr_chain.address (), result_chain->address (),
+ length * sizeof (tree));
+ }
+ return true;
+ }
+ if (length == 3 && LOOP_VINFO_VECT_FACTOR (loop_vinfo) > 2)
{
- for (j = 0; j < length; j += 2)
+ unsigned int k = 0, l = 0;
+
+ /* Generating permutation constant to get all elements in rigth order.
+ For vector length 8 it is {0 3 6 1 4 7 2 5}. */
+ for (i = 0; i < nelt; i++)
+ {
+ if (3 * k + (l % 3) >= nelt)
+ {
+ k = 0;
+ l += (3 - (nelt % 3));
+ }
+ sel[i] = 3 * k + (l % 3);
+ k++;
+ }
+ if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
{
- first_vect = dr_chain[j];
- second_vect = dr_chain[j+1];
-
- /* data_ref = permute_even (first_data_ref, second_data_ref); */
- data_ref = make_temp_ssa_name (vectype, NULL, "vect_perm_even");
- perm_stmt = gimple_build_assign_with_ops (VEC_PERM_EXPR, data_ref,
- first_vect, second_vect,
- perm_mask_even);
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "shuffle of 3 fields structure is not \
+ supported by target\n");
+ return false;
+ }
+ perm3_mask = vect_gen_perm_mask_checked (vectype, sel);
+
+ /* Generating permutation constant to shift all elements.
+ For vector length 8 it is {6 7 8 9 10 11 12 13}. */
+ for (i = 0; i < nelt; i++)
+ sel[i] = 2 * (nelt / 3) + (nelt % 3) + i;
+ if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "shift permutation is not supported by target\n");
+ return false;
+ }
+ shift1_mask = vect_gen_perm_mask_checked (vectype, sel);
+
+ /* Generating permutation constant to shift all elements.
+ For vector length 8 it is {5 6 7 8 9 10 11 12}. */
+ for (i = 0; i < nelt; i++)
+ sel[i] = 2 * (nelt / 3) + 1 + i;
+ if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "shift permutation is not supported by target\n");
+ return false;
+ }
+ shift2_mask = vect_gen_perm_mask_checked (vectype, sel);
+
+ /* Generating permutation constant to shift all elements.
+ For vector length 8 it is {3 4 5 6 7 8 9 10}. */
+ for (i = 0; i < nelt; i++)
+ sel[i] = (nelt / 3) + (nelt % 3) / 2 + i;
+ if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "shift permutation is not supported by target\n");
+ return false;
+ }
+ shift3_mask = vect_gen_perm_mask_checked (vectype, sel);
+
+ /* Generating permutation constant to shift all elements.
+ For vector length 8 it is {5 6 7 8 9 10 11 12}. */
+ for (i = 0; i < nelt; i++)
+ sel[i] = 2 * (nelt / 3) + (nelt % 3) / 2 + i;
+ if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "shift permutation is not supported by target\n");
+ return false;
+ }
+ shift4_mask = vect_gen_perm_mask_checked (vectype, sel);
+
+ for (k = 0; k < 3; k++)
+ {
+ data_ref = make_temp_ssa_name (vectype, NULL, "vect_shuffle3");
+ perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR,
+ dr_chain[k], dr_chain[k],
+ perm3_mask);
vect_finish_stmt_generation (stmt, perm_stmt, gsi);
- (*result_chain)[j/2] = data_ref;
+ vect[k] = data_ref;
+ }
+
+ for (k = 0; k < 3; k++)
+ {
+ data_ref = make_temp_ssa_name (vectype, NULL, "vect_shift1");
+ perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR,
+ vect[k % 3], vect[(k + 1) % 3],
+ shift1_mask);
+ vect_finish_stmt_generation (stmt, perm_stmt, gsi);
+ vect_shift[k] = data_ref;
+ }
- /* data_ref = permute_odd (first_data_ref, second_data_ref); */
- data_ref = make_temp_ssa_name (vectype, NULL, "vect_perm_odd");
- perm_stmt = gimple_build_assign_with_ops (VEC_PERM_EXPR, data_ref,
- first_vect, second_vect,
- perm_mask_odd);
+ for (k = 0; k < 3; k++)
+ {
+ data_ref = make_temp_ssa_name (vectype, NULL, "vect_shift2");
+ perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR,
+ vect_shift[(4 - k) % 3],
+ vect_shift[(3 - k) % 3],
+ shift2_mask);
vect_finish_stmt_generation (stmt, perm_stmt, gsi);
- (*result_chain)[j/2+length/2] = data_ref;
+ vect[k] = data_ref;
}
- memcpy (dr_chain.address (), result_chain->address (),
- length * sizeof (tree));
+
+ (*result_chain)[3 - (nelt % 3)] = vect[2];
+
+ data_ref = make_temp_ssa_name (vectype, NULL, "vect_shift3");
+ perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, vect[0],
+ vect[0], shift3_mask);
+ vect_finish_stmt_generation (stmt, perm_stmt, gsi);
+ (*result_chain)[nelt % 3] = data_ref;
+
+ data_ref = make_temp_ssa_name (vectype, NULL, "vect_shift4");
+ perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, vect[1],
+ vect[1], shift4_mask);
+ vect_finish_stmt_generation (stmt, perm_stmt, gsi);
+ (*result_chain)[0] = data_ref;
+ return true;
}
+ return false;
}
-
/* Function vect_transform_grouped_load.
Given a chain of input interleaved data-refs (in DR_CHAIN), build statements
*/
void
-vect_transform_grouped_load (gimple stmt, vec<tree> dr_chain, int size,
+vect_transform_grouped_load (gimple *stmt, vec<tree> dr_chain, int size,
gimple_stmt_iterator *gsi)
{
+ machine_mode mode;
vec<tree> result_chain = vNULL;
/* DR_CHAIN contains input data-refs that are a part of the interleaving.
RESULT_CHAIN is the output of vect_permute_load_chain, it contains permuted
vectors, that are ready for vector computation. */
result_chain.create (size);
- vect_permute_load_chain (dr_chain, size, stmt, gsi, &result_chain);
+
+ /* If reassociation width for vector type is 2 or greater target machine can
+ execute 2 or more vector instructions in parallel. Otherwise try to
+ get chain for loads group using vect_shift_permute_load_chain. */
+ mode = TYPE_MODE (STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt)));
+ if (targetm.sched.reassociation_width (VEC_PERM_EXPR, mode) > 1
+ || exact_log2 (size) != -1
+ || !vect_shift_permute_load_chain (dr_chain, size, stmt,
+ gsi, &result_chain))
+ vect_permute_load_chain (dr_chain, size, stmt, gsi, &result_chain);
vect_record_grouped_load_vectors (stmt, result_chain);
result_chain.release ();
}
for each vector to the associated scalar statement. */
void
-vect_record_grouped_load_vectors (gimple stmt, vec<tree> result_chain)
+vect_record_grouped_load_vectors (gimple *stmt, vec<tree> result_chain)
{
- gimple first_stmt = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
- gimple next_stmt, new_stmt;
+ gimple *first_stmt = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
+ gimple *next_stmt, *new_stmt;
unsigned int i, gap_count;
tree tmp_data_ref;
{
if (!GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt)))
{
- gimple prev_stmt =
+ gimple *prev_stmt =
STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt));
- gimple rel_stmt =
+ gimple *rel_stmt =
STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt));
while (rel_stmt)
{
if (TREE_CODE (decl) != VAR_DECL)
return false;
- /* We cannot change alignment of common or external symbols as another
- translation unit may contain a definition with lower alignment.
- The rules of common symbol linking mean that the definition
- will override the common symbol. The same is true for constant
- pool entries which may be shared and are not properly merged
- by LTO. */
- if (DECL_EXTERNAL (decl)
- || DECL_COMMON (decl)
- || DECL_IN_CONSTANT_POOL (decl))
- return false;
-
- if (TREE_ASM_WRITTEN (decl))
- return false;
-
- /* Do not override the alignment as specified by the ABI when the used
- attribute is set. */
- if (DECL_PRESERVE_P (decl))
- return false;
-
- /* Do not override explicit alignment set by the user when an explicit
- section name is also used. This is a common idiom used by many
- software projects. */
- if (DECL_SECTION_NAME (decl) != NULL_TREE
- && !DECL_HAS_IMPLICIT_SECTION_NAME_P (decl))
+ if (decl_in_symtab_p (decl)
+ && !symtab_node::get (decl)->can_increase_alignment_p ())
return false;
if (TREE_STATIC (decl))
vect_supportable_dr_alignment (struct data_reference *dr,
bool check_aligned_accesses)
{
- gimple stmt = DR_STMT (dr);
+ gimple *stmt = DR_STMT (dr);
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
tree vectype = STMT_VINFO_VECTYPE (stmt_info);
- enum machine_mode mode = TYPE_MODE (vectype);
+ machine_mode mode = TYPE_MODE (vectype);
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
struct loop *vect_loop = NULL;
bool nested_in_vect_loop = false;