+2015-09-04 Andrey Turetskiy <andrey.turetskiy@intel.com>
+ Petr Murzin <petr.murzin@intel.com>
+ Kirill Yukhin <kirill.yukhin@intel.com>
+
+ * doc/tm.texi.in (TARGET_VECTORIZE_BUILTIN_SCATTER): New.
+ * doc/tm.texi: Regenerate.
+ * target.def: Add scatter builtin.
+ * tree-vectorizer.h: Rename gather_p to gather_scatter_p and use it
+ for loads/stores in case of gather/scatter accordingly.
+ (STMT_VINFO_GATHER_SCATTER_P(S)): Use it instead of STMT_VINFO_GATHER_P(S).
+ (vect_check_gather): Rename to ...
+ (vect_check_gather_scatter): this.
+ * tree-vect-data-refs.c (vect_analyze_data_ref_dependence): Use
+ STMT_VINFO_GATHER_SCATTER_P instead of STMT_VINFO_SCATTER_P.
+ (vect_check_gather_scatter): Use it instead of vect_check_gather.
+ (vect_analyze_data_refs): Add gatherscatter enum and maybe_scatter variable
+ and new checkings for it accordingly.
+ * tree-vect-stmts.c
+ (STMT_VINFO_GATHER_SCATTER_P(S)): Use it instead of STMT_VINFO_GATHER_P(S).
+ (vect_check_gather_scatter): Use it instead of vect_check_gather.
+ (vectorizable_store): Add checkings for STMT_VINFO_GATHER_SCATTER_P.
+
2015-09-03 Bill Schmidt <wschmidt@vnet.linux.ibm.com>
* config/rs6000/altivec.md (altivec_vperm_v8hiv16qi): New
loads.
@end deftypefn
+@deftypefn {Target Hook} tree TARGET_VECTORIZE_BUILTIN_SCATTER (const_tree @var{vectype}, const_tree @var{index_type}, int @var{scale})
+Target builtin that implements vector scatter operation. @var{vectype}
+is the vector type of the store and @var{index_type} is scalar type of
+the index, scaled by @var{scale}.
+The default is @code{NULL_TREE} which means to not vectorize scatter
+stores.
+@end deftypefn
+
@deftypefn {Target Hook} int TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN (struct cgraph_node *@var{}, struct cgraph_simd_clone *@var{}, @var{tree}, @var{int})
This hook should set @var{vecsize_mangle}, @var{vecsize_int}, @var{vecsize_float}
fields in @var{simd_clone} structure pointed by @var{clone_info} argument and also
@hook TARGET_VECTORIZE_BUILTIN_GATHER
+@hook TARGET_VECTORIZE_BUILTIN_SCATTER
+
@hook TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN
@hook TARGET_SIMD_CLONE_ADJUST
(const_tree mem_vectype, const_tree index_type, int scale),
NULL)
+/* Target builtin that implements vector scatter operation. */
+DEFHOOK
+(builtin_scatter,
+"Target builtin that implements vector scatter operation. @var{vectype}\n\
+is the vector type of the store and @var{index_type} is scalar type of\n\
+the index, scaled by @var{scale}.\n\
+The default is @code{NULL_TREE} which means to not vectorize scatter\n\
+stores.",
+ tree,
+ (const_tree vectype, const_tree index_type, int scale),
+ NULL)
+
/* Target function to initialize the cost model for a loop or block. */
DEFHOOK
(init_cost,
return false;
}
- if (STMT_VINFO_GATHER_P (stmtinfo_a)
- || STMT_VINFO_GATHER_P (stmtinfo_b))
+ if (STMT_VINFO_GATHER_SCATTER_P (stmtinfo_a)
+ || STMT_VINFO_GATHER_SCATTER_P (stmtinfo_b))
{
if (dump_enabled_p ())
{
return false;
}
- if (STMT_VINFO_GATHER_P (stmtinfo_a)
- || STMT_VINFO_GATHER_P (stmtinfo_b))
+ if (STMT_VINFO_GATHER_SCATTER_P (stmtinfo_a)
+ || STMT_VINFO_GATHER_SCATTER_P (stmtinfo_b))
{
if (dump_enabled_p ())
{
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
"zero step in outer loop.\n");
- if (DR_IS_READ (dr))
- return true;
- else
- return false;
+ return DR_IS_READ (dr);
}
}
return true;
}
-/* Check whether a non-affine read in stmt is suitable for gather load
- and if so, return a builtin decl for that operation. */
+/* Check whether a non-affine read or write in stmt is suitable for gather load
+ or scatter store and if so, return a builtin decl for that operation. */
tree
-vect_check_gather (gimple stmt, loop_vec_info loop_vinfo, tree *basep,
- tree *offp, int *scalep)
+vect_check_gather_scatter (gimple stmt, loop_vec_info loop_vinfo, tree *basep,
+ tree *offp, int *scalep)
{
HOST_WIDE_INT scale = 1, pbitpos, pbitsize;
struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
base = TREE_OPERAND (gimple_assign_rhs1 (def_stmt), 0);
}
- /* The gather builtins need address of the form
+ /* The gather and scatter builtins need address of the form
loop_invariant + vector * {1, 2, 4, 8}
or
loop_invariant + sign_extend (vector) * { 1, 2, 4, 8 }.
if (offtype == NULL_TREE)
offtype = TREE_TYPE (off);
- decl = targetm.vectorize.builtin_gather (STMT_VINFO_VECTYPE (stmt_info),
- offtype, scale);
+ if (DR_IS_READ (dr))
+ decl = targetm.vectorize.builtin_gather (STMT_VINFO_VECTYPE (stmt_info),
+ offtype, scale);
+ else
+ decl = targetm.vectorize.builtin_scatter (STMT_VINFO_VECTYPE (stmt_info),
+ offtype, scale);
+
if (decl == NULL_TREE)
return NULL_TREE;
gimple stmt;
stmt_vec_info stmt_info;
tree base, offset, init;
- bool gather = false;
+ enum { SG_NONE, GATHER, SCATTER } gatherscatter = SG_NONE;
bool simd_lane_access = false;
int vf;
= DR_IS_READ (dr)
&& !TREE_THIS_VOLATILE (DR_REF (dr))
&& targetm.vectorize.builtin_gather != NULL;
+ bool maybe_scatter
+ = DR_IS_WRITE (dr)
+ && !TREE_THIS_VOLATILE (DR_REF (dr))
+ && targetm.vectorize.builtin_scatter != NULL;
bool maybe_simd_lane_access
= loop_vinfo && loop->simduid;
- /* If target supports vector gather loads, or if this might be
- a SIMD lane access, see if they can't be used. */
+ /* If target supports vector gather loads or scatter stores, or if
+ this might be a SIMD lane access, see if they can't be used. */
if (loop_vinfo
- && (maybe_gather || maybe_simd_lane_access)
+ && (maybe_gather || maybe_scatter || maybe_simd_lane_access)
&& !nested_in_vect_loop_p (loop, stmt))
{
struct data_reference *newdr
= create_data_ref (NULL, loop_containing_stmt (stmt),
- DR_REF (dr), stmt, true);
+ DR_REF (dr), stmt, maybe_scatter ? false : true);
gcc_assert (newdr != NULL && DR_REF (newdr));
if (DR_BASE_ADDRESS (newdr)
&& DR_OFFSET (newdr)
}
}
}
- if (!simd_lane_access && maybe_gather)
+ if (!simd_lane_access && (maybe_gather || maybe_scatter))
{
dr = newdr;
- gather = true;
+ if (maybe_gather)
+ gatherscatter = GATHER;
+ else
+ gatherscatter = SCATTER;
}
}
- if (!gather && !simd_lane_access)
+ if (gatherscatter == SG_NONE && !simd_lane_access)
free_data_ref (newdr);
}
- if (!gather && !simd_lane_access)
+ if (gatherscatter == SG_NONE && !simd_lane_access)
{
if (dump_enabled_p ())
{
if (bb_vinfo)
break;
- if (gather || simd_lane_access)
+ if (gatherscatter != SG_NONE || simd_lane_access)
free_data_ref (dr);
return false;
}
if (bb_vinfo)
break;
- if (gather || simd_lane_access)
+ if (gatherscatter != SG_NONE || simd_lane_access)
free_data_ref (dr);
return false;
}
if (bb_vinfo)
break;
- if (gather || simd_lane_access)
+ if (gatherscatter != SG_NONE || simd_lane_access)
free_data_ref (dr);
return false;
}
if (bb_vinfo)
break;
- if (gather || simd_lane_access)
+ if (gatherscatter != SG_NONE || simd_lane_access)
free_data_ref (dr);
return false;
}
if (bb_vinfo)
break;
- if (gather || simd_lane_access)
+ if (gatherscatter != SG_NONE || simd_lane_access)
free_data_ref (dr);
return false;
}
if (bb_vinfo)
break;
- if (gather || simd_lane_access)
+ if (gatherscatter != SG_NONE || simd_lane_access)
{
STMT_VINFO_DATA_REF (stmt_info) = NULL;
- if (gather)
+ if (gatherscatter != SG_NONE)
free_data_ref (dr);
}
return false;
if (vf > *min_vf)
*min_vf = vf;
- if (gather)
+ if (gatherscatter != SG_NONE)
{
tree off;
-
- gather = 0 != vect_check_gather (stmt, loop_vinfo, NULL, &off, NULL);
- if (gather
- && get_vectype_for_scalar_type (TREE_TYPE (off)) == NULL_TREE)
- gather = false;
- if (!gather)
+ if (!vect_check_gather_scatter (stmt, loop_vinfo, NULL, &off, NULL)
+ || get_vectype_for_scalar_type (TREE_TYPE (off)) == NULL_TREE)
{
STMT_VINFO_DATA_REF (stmt_info) = NULL;
free_data_ref (dr);
if (dump_enabled_p ())
{
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "not vectorized: not suitable for gather "
- "load ");
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ (gatherscatter == GATHER) ?
+ "not vectorized: not suitable for gather "
+ "load " :
+ "not vectorized: not suitable for scatter "
+ "store ");
dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
}
}
datarefs[i] = dr;
- STMT_VINFO_GATHER_P (stmt_info) = true;
+ STMT_VINFO_GATHER_SCATTER_P (stmt_info) = gatherscatter;
}
+
else if (loop_vinfo
&& TREE_CODE (DR_STEP (dr)) != INTEGER_CST)
{
return false;
}
- if (STMT_VINFO_GATHER_P (stmt_vinfo))
+ if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
{
tree off;
- tree decl = vect_check_gather (stmt, loop_vinfo, NULL, &off, NULL);
+ tree decl = vect_check_gather_scatter (stmt, loop_vinfo, NULL, &off, NULL);
gcc_assert (decl);
if (!process_use (stmt, off, loop_vinfo, live_p, relevant,
&worklist, true))
if (STMT_VINFO_STRIDED_P (stmt_info))
return false;
- if (STMT_VINFO_GATHER_P (stmt_info))
+ if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
{
gimple def_stmt;
tree def;
- gather_decl = vect_check_gather (stmt, loop_vinfo, &gather_base,
+ gather_decl = vect_check_gather_scatter (stmt, loop_vinfo, &gather_base,
&gather_off, &gather_scale);
gcc_assert (gather_decl);
if (!vect_is_simple_use_1 (gather_off, NULL, loop_vinfo, NULL,
/** Transform. **/
- if (STMT_VINFO_GATHER_P (stmt_info))
+ if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
{
tree vec_oprnd0 = NULL_TREE, op;
tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
unsigned int vec_num;
bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
tree aggr_type;
+ tree scatter_base = NULL_TREE, scatter_off = NULL_TREE;
+ tree scatter_off_vectype = NULL_TREE, scatter_decl = NULL_TREE;
+ int scatter_scale = 1;
+ enum vect_def_type scatter_idx_dt = vect_unknown_def_type;
+ enum vect_def_type scatter_src_dt = vect_unknown_def_type;
+ gimple new_stmt;
if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
return false;
}
}
+ if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
+ {
+ gimple def_stmt;
+ tree def;
+ scatter_decl = vect_check_gather_scatter (stmt, loop_vinfo, &scatter_base,
+ &scatter_off, &scatter_scale);
+ gcc_assert (scatter_decl);
+ if (!vect_is_simple_use_1 (scatter_off, NULL, loop_vinfo, bb_vinfo,
+ &def_stmt, &def, &scatter_idx_dt,
+ &scatter_off_vectype))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "scatter index use not simple.");
+ return false;
+ }
+ }
+
if (!vec_stmt) /* transformation not required. */
{
STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
ensure_base_align (stmt_info, dr);
+ if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
+ {
+ tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE, op, src;
+ tree arglist = TYPE_ARG_TYPES (TREE_TYPE (scatter_decl));
+ tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
+ tree ptr, mask, var, scale, perm_mask = NULL_TREE;
+ edge pe = loop_preheader_edge (loop);
+ gimple_seq seq;
+ basic_block new_bb;
+ enum { NARROW, NONE, WIDEN } modifier;
+ int scatter_off_nunits = TYPE_VECTOR_SUBPARTS (scatter_off_vectype);
+
+ if (nunits == (unsigned int) scatter_off_nunits)
+ modifier = NONE;
+ else if (nunits == (unsigned int) scatter_off_nunits / 2)
+ {
+ unsigned char *sel = XALLOCAVEC (unsigned char, scatter_off_nunits);
+ modifier = WIDEN;
+
+ for (i = 0; i < (unsigned int) scatter_off_nunits; ++i)
+ sel[i] = i | nunits;
+
+ perm_mask = vect_gen_perm_mask_checked (scatter_off_vectype, sel);
+ gcc_assert (perm_mask != NULL_TREE);
+ }
+ else if (nunits == (unsigned int) scatter_off_nunits * 2)
+ {
+ unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
+ modifier = NARROW;
+
+ for (i = 0; i < (unsigned int) nunits; ++i)
+ sel[i] = i | scatter_off_nunits;
+
+ perm_mask = vect_gen_perm_mask_checked (vectype, sel);
+ gcc_assert (perm_mask != NULL_TREE);
+ ncopies *= 2;
+ }
+ else
+ gcc_unreachable ();
+
+ rettype = TREE_TYPE (TREE_TYPE (scatter_decl));
+ ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
+ masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
+ idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
+ srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
+ scaletype = TREE_VALUE (arglist);
+
+ gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
+ && TREE_CODE (rettype) == VOID_TYPE);
+
+ ptr = fold_convert (ptrtype, scatter_base);
+ if (!is_gimple_min_invariant (ptr))
+ {
+ ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
+ new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
+ gcc_assert (!new_bb);
+ }
+
+ /* Currently we support only unconditional scatter stores,
+ so mask should be all ones. */
+ mask = build_int_cst (masktype, -1);
+ mask = vect_init_vector (stmt, mask, masktype, NULL);
+
+ scale = build_int_cst (scaletype, scatter_scale);
+
+ prev_stmt_info = NULL;
+ for (j = 0; j < ncopies; ++j)
+ {
+ if (j == 0)
+ {
+ src = vec_oprnd1
+ = vect_get_vec_def_for_operand (gimple_assign_rhs1 (stmt), stmt, NULL);
+ op = vec_oprnd0
+ = vect_get_vec_def_for_operand (scatter_off, stmt, NULL);
+ }
+ else if (modifier != NONE && (j & 1))
+ {
+ if (modifier == WIDEN)
+ {
+ src = vec_oprnd1
+ = vect_get_vec_def_for_stmt_copy (scatter_src_dt, vec_oprnd1);
+ op = permute_vec_elements (vec_oprnd0, vec_oprnd0, perm_mask,
+ stmt, gsi);
+ }
+ else if (modifier == NARROW)
+ {
+ src = permute_vec_elements (vec_oprnd1, vec_oprnd1, perm_mask,
+ stmt, gsi);
+ op = vec_oprnd0
+ = vect_get_vec_def_for_stmt_copy (scatter_idx_dt, vec_oprnd0);
+ }
+ else
+ gcc_unreachable ();
+ }
+ else
+ {
+ src = vec_oprnd1
+ = vect_get_vec_def_for_stmt_copy (scatter_src_dt, vec_oprnd1);
+ op = vec_oprnd0
+ = vect_get_vec_def_for_stmt_copy (scatter_idx_dt, vec_oprnd0);
+ }
+
+ if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
+ {
+ gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src))
+ == TYPE_VECTOR_SUBPARTS (srctype));
+ var = vect_get_new_vect_var (srctype, vect_simple_var, NULL);
+ var = make_ssa_name (var);
+ src = build1 (VIEW_CONVERT_EXPR, srctype, src);
+ new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
+ vect_finish_stmt_generation (stmt, new_stmt, gsi);
+ src = var;
+ }
+
+ if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
+ {
+ gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
+ == TYPE_VECTOR_SUBPARTS (idxtype));
+ var = vect_get_new_vect_var (idxtype, vect_simple_var, NULL);
+ var = make_ssa_name (var);
+ op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
+ new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
+ vect_finish_stmt_generation (stmt, new_stmt, gsi);
+ op = var;
+ }
+
+ new_stmt
+ = gimple_build_call (scatter_decl, 5, ptr, mask, op, src, scale);
+
+ vect_finish_stmt_generation (stmt, new_stmt, gsi);
+
+ if (prev_stmt_info == NULL)
+ STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
+ else
+ STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
+ prev_stmt_info = vinfo_for_stmt (new_stmt);
+ }
+ return true;
+ }
+
if (grouped_store)
{
first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
prev_stmt_info = NULL;
for (j = 0; j < ncopies; j++)
{
- gimple new_stmt;
if (j == 0)
{
{
grouped_load = true;
/* FORNOW */
- gcc_assert (! nested_in_vect_loop && !STMT_VINFO_GATHER_P (stmt_info));
+ gcc_assert (!nested_in_vect_loop && !STMT_VINFO_GATHER_SCATTER_P (stmt_info));
first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
}
- if (STMT_VINFO_GATHER_P (stmt_info))
+ if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
{
gimple def_stmt;
tree def;
- gather_decl = vect_check_gather (stmt, loop_vinfo, &gather_base,
- &gather_off, &gather_scale);
+ gather_decl = vect_check_gather_scatter (stmt, loop_vinfo, &gather_base,
+ &gather_off, &gather_scale);
gcc_assert (gather_decl);
if (!vect_is_simple_use_1 (gather_off, NULL, loop_vinfo, bb_vinfo,
&def_stmt, &def, &gather_dt,
ensure_base_align (stmt_info, dr);
- if (STMT_VINFO_GATHER_P (stmt_info))
+ if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
{
tree vec_oprnd0 = NULL_TREE, op;
tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
vectorization. */
bool vectorizable;
- /* For loads only, true if this is a gather load. */
- bool gather_p;
+ /* For loads if this is a gather, for stores if this is a scatter. */
+ bool gather_scatter_p;
/* True if this is an access with loop-invariant stride. */
bool strided_p;
#define STMT_VINFO_VEC_STMT(S) (S)->vectorized_stmt
#define STMT_VINFO_VECTORIZABLE(S) (S)->vectorizable
#define STMT_VINFO_DATA_REF(S) (S)->data_ref_info
-#define STMT_VINFO_GATHER_P(S) (S)->gather_p
+#define STMT_VINFO_GATHER_SCATTER_P(S) (S)->gather_scatter_p
#define STMT_VINFO_STRIDED_P(S) (S)->strided_p
#define STMT_VINFO_SIMD_LANE_ACCESS_P(S) (S)->simd_lane_access_p
extern bool vect_verify_datarefs_alignment (loop_vec_info, bb_vec_info);
extern bool vect_analyze_data_ref_accesses (loop_vec_info, bb_vec_info);
extern bool vect_prune_runtime_alias_test_list (loop_vec_info);
-extern tree vect_check_gather (gimple, loop_vec_info, tree *, tree *,
- int *);
+extern tree vect_check_gather_scatter (gimple, loop_vec_info, tree *, tree *,
+ int *);
extern bool vect_analyze_data_refs (loop_vec_info, bb_vec_info, int *,
unsigned *);
extern tree vect_create_data_ref_ptr (gimple, tree, struct loop *, tree,