--- /dev/null
+/* PR tree-optimization/88464 */
+/* { dg-do run { target { avx512f } } } */
+/* { dg-options "-O3 -mavx512f -mprefer-vector-width=512 -mtune=skylake-avx512" } */
+
+#include "avx512f-check.h"
+
+#include "avx512f-pr88464-1.c"
+
+static void
+avx512f_test (void)
+{
+ double a[1024], b[1024];
+ float c[1024], f[1024];
+ int d[1024];
+ long e[1024];
+ int i;
+ for (i = 0; i < 1024; i++)
+ {
+ asm volatile ("" : "+g" (i));
+ a[i] = (i % 3) != 0 ? 15.0 : -5.0;
+ b[i] = 2 * i;
+ d[i] = (i % 3) ? 1023 - i : __INT_MAX__;
+ }
+ f1 (a, b, d, 1024);
+ for (i = 0; i < 1024; i++)
+ {
+ asm volatile ("" : "+g" (i));
+ if (a[i] != ((i % 3) != 0 ? (1023 - i) * 2.0 : -5.0))
+ abort ();
+ a[i] = (i % 3) != 1 ? 15.0 : -5.0;
+ b[i] = 3 * i;
+ e[i] = (i % 3) != 1 ? 1023 - i : __LONG_MAX__;
+ }
+ f2 (a, b, e, 1024);
+ for (i = 0; i < 1024; i++)
+ {
+ asm volatile ("" : "+g" (i));
+ if (a[i] != ((i % 3) != 1 ? (1023 - i) * 3.0 : -5.0))
+ abort ();
+ c[i] = (i % 3) != 2 ? 15.0f : -5.0f;
+ d[i] = (i % 3) != 2 ? 1023 - i : __INT_MAX__;
+ f[i] = 4 * i;
+ }
+ f3 (c, f, d, 1024);
+ for (i = 0; i < 1024; i++)
+ {
+ asm volatile ("" : "+g" (i));
+ if (c[i] != ((i % 3) != 2 ? (1023 - i) * 4.0f : -5.0f))
+ abort ();
+ c[i] = (i % 3) != 0 ? 15.0f : -5.0f;
+ e[i] = (i % 3) != 0 ? 1023 - i : __INT_MAX__;
+ f[i] = 5 * i;
+ }
+ f4 (c, f, e, 1024);
+ for (i = 0; i < 1024; i++)
+ {
+ asm volatile ("" : "+g" (i));
+ if (c[i] != ((i % 3) != 0 ? (1023 - i) * 5.0f : -5.0f))
+ abort ();
+ }
+}
if (mask && TREE_CODE (masktype) == INTEGER_TYPE)
masktype = build_same_sized_truth_vector_type (srctype);
+ tree mask_halftype = masktype;
tree perm_mask = NULL_TREE;
tree mask_perm_mask = NULL_TREE;
if (known_eq (nunits, gather_off_nunits))
ncopies *= 2;
- if (mask)
+ if (mask && masktype == real_masktype)
{
for (int i = 0; i < count; ++i)
sel[i] = i | (count / 2);
indices.new_vector (sel, 2, count);
mask_perm_mask = vect_gen_perm_mask_checked (masktype, indices);
}
+ else if (mask)
+ mask_halftype
+ = build_same_sized_truth_vector_type (gs_info->offset_vectype);
}
else
gcc_unreachable ();
{
if (j == 0)
vec_mask = vect_get_vec_def_for_operand (mask, stmt_info);
- else
+ else if (modifier != NARROW || (j & 1) == 0)
vec_mask = vect_get_vec_def_for_stmt_copy (loop_vinfo,
vec_mask);
mask_op = var;
}
}
+ if (modifier == NARROW && masktype != real_masktype)
+ {
+ var = vect_get_new_ssa_name (mask_halftype, vect_simple_var);
+ gassign *new_stmt
+ = gimple_build_assign (var, (j & 1) ? VEC_UNPACK_HI_EXPR
+ : VEC_UNPACK_LO_EXPR,
+ mask_op);
+ vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
+ mask_op = var;
+ }
src_op = mask_op;
}
tree mask_arg = mask_op;
if (masktype != real_masktype)
{
- tree utype;
- if (TYPE_MODE (real_masktype) == TYPE_MODE (masktype))
+ tree utype, optype = TREE_TYPE (mask_op);
+ if (TYPE_MODE (real_masktype) == TYPE_MODE (optype))
utype = real_masktype;
else
- utype = lang_hooks.types.type_for_mode (TYPE_MODE (masktype), 1);
+ utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
var = vect_get_new_ssa_name (utype, vect_scalar_var);
mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_op);
gassign *new_stmt
gcc_assert (TYPE_PRECISION (utype)
<= TYPE_PRECISION (real_masktype));
var = vect_get_new_ssa_name (real_masktype, vect_scalar_var);
- new_stmt = gimple_build_assign (var, NOP_EXPR, utype);
+ new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
mask_arg = var;
}
return false;
}
else if (memory_access_type != VMAT_LOAD_STORE_LANES
- && (memory_access_type != VMAT_GATHER_SCATTER || gs_info.decl))
+ && (memory_access_type != VMAT_GATHER_SCATTER
+ || (gs_info.decl && !VECTOR_BOOLEAN_TYPE_P (mask_vectype))))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE, src;
tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
- tree ptr, mask, var, scale, perm_mask = NULL_TREE;
+ tree ptr, var, scale, vec_mask;
+ tree mask_arg = NULL_TREE, mask_op = NULL_TREE, perm_mask = NULL_TREE;
+ tree mask_halfvectype = mask_vectype;
edge pe = loop_preheader_edge (loop);
gimple_seq seq;
basic_block new_bb;
perm_mask = vect_gen_perm_mask_checked (vectype, indices);
gcc_assert (perm_mask != NULL_TREE);
ncopies *= 2;
+
+ if (mask)
+ mask_halfvectype
+ = build_same_sized_truth_vector_type (gs_info.offset_vectype);
}
else
gcc_unreachable ();
gcc_assert (!new_bb);
}
- /* Currently we support only unconditional scatter stores,
- so mask should be all ones. */
- mask = build_int_cst (masktype, -1);
- mask = vect_init_vector (stmt_info, mask, masktype, NULL);
+ if (mask == NULL_TREE)
+ {
+ mask_arg = build_int_cst (masktype, -1);
+ mask_arg = vect_init_vector (stmt_info, mask_arg, masktype, NULL);
+ }
scale = build_int_cst (scaletype, gs_info.scale);
{
if (j == 0)
{
- src = vec_oprnd1
- = vect_get_vec_def_for_operand (op, stmt_info);
- op = vec_oprnd0
- = vect_get_vec_def_for_operand (gs_info.offset, stmt_info);
+ src = vec_oprnd1 = vect_get_vec_def_for_operand (op, stmt_info);
+ op = vec_oprnd0 = vect_get_vec_def_for_operand (gs_info.offset,
+ stmt_info);
+ if (mask)
+ mask_op = vec_mask = vect_get_vec_def_for_operand (mask,
+ stmt_info);
}
else if (modifier != NONE && (j & 1))
{
if (modifier == WIDEN)
{
- src = vec_oprnd1
- = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd1);
+ src
+ = vec_oprnd1 = vect_get_vec_def_for_stmt_copy (vinfo,
+ vec_oprnd1);
op = permute_vec_elements (vec_oprnd0, vec_oprnd0, perm_mask,
stmt_info, gsi);
+ if (mask)
+ mask_op
+ = vec_mask = vect_get_vec_def_for_stmt_copy (vinfo,
+ vec_mask);
}
else if (modifier == NARROW)
{
src = permute_vec_elements (vec_oprnd1, vec_oprnd1, perm_mask,
stmt_info, gsi);
- op = vec_oprnd0
- = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd0);
+ op = vec_oprnd0 = vect_get_vec_def_for_stmt_copy (vinfo,
+ vec_oprnd0);
}
else
gcc_unreachable ();
}
else
{
- src = vec_oprnd1
- = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd1);
- op = vec_oprnd0
- = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd0);
+ src = vec_oprnd1 = vect_get_vec_def_for_stmt_copy (vinfo,
+ vec_oprnd1);
+ op = vec_oprnd0 = vect_get_vec_def_for_stmt_copy (vinfo,
+ vec_oprnd0);
+ if (mask)
+ mask_op = vec_mask = vect_get_vec_def_for_stmt_copy (vinfo,
+ vec_mask);
}
if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
op = var;
}
+ if (mask)
+ {
+ tree utype;
+ mask_arg = mask_op;
+ if (modifier == NARROW)
+ {
+ var = vect_get_new_ssa_name (mask_halfvectype,
+ vect_simple_var);
+ gassign *new_stmt
+ = gimple_build_assign (var, (j & 1) ? VEC_UNPACK_HI_EXPR
+ : VEC_UNPACK_LO_EXPR,
+ mask_op);
+ vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
+ mask_arg = var;
+ }
+ tree optype = TREE_TYPE (mask_arg);
+ if (TYPE_MODE (masktype) == TYPE_MODE (optype))
+ utype = masktype;
+ else
+ utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
+ var = vect_get_new_ssa_name (utype, vect_scalar_var);
+ mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_arg);
+ gassign *new_stmt
+ = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
+ vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
+ mask_arg = var;
+ if (!useless_type_conversion_p (masktype, utype))
+ {
+ gcc_assert (TYPE_PRECISION (utype)
+ <= TYPE_PRECISION (masktype));
+ var = vect_get_new_ssa_name (masktype, vect_scalar_var);
+ new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
+ vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
+ mask_arg = var;
+ }
+ }
+
gcall *new_stmt
- = gimple_build_call (gs_info.decl, 5, ptr, mask, op, src, scale);
+ = gimple_build_call (gs_info.decl, 5, ptr, mask_arg, op, src, scale);
stmt_vec_info new_stmt_info
= vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
gimple *perm_stmt;
tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
- if (TREE_CODE (scalar_dest) == SSA_NAME)
+ if (scalar_dest && TREE_CODE (scalar_dest) == SSA_NAME)
perm_dest = vect_create_destination_var (scalar_dest, vectype);
else
perm_dest = vect_get_new_vect_var (vectype, vect_simple_var, NULL);