+2019-10-25 Richard Sandiford <richard.sandiford@arm.com>
+
+ * tree-vect-loop.c (vectorizable_reduction): Restrict the
+ LOOP_VINFO_CAN_FULLY_MASK_P handling to cases that will be
+ handled by vect_transform_reduction. Allow fully-masked loops
+ to be used with reduction chains.
+ * tree-vect-stmts.c (vectorizable_operation): Handle reduction
+ operations in fully-masked loops.
+ (vectorizable_condition): Reject EXTRACT_LAST_REDUCTION
+ operations in fully-masked loops.
+
2019-10-25 Richard Biener <rguenther@suse.de>
* tree-vect-loop.c (vectorizable_reduction): Verify
+2019-10-25 Richard Sandiford <richard.sandiford@arm.com>
+
+ * gcc.dg/vect/pr65947-1.c: No longer expect doubled dump lines
+ for FOLD_EXTRACT_LAST reductions.
+ * gcc.dg/vect/pr65947-2.c: Likewise.
+ * gcc.dg/vect/pr65947-3.c: Likewise.
+ * gcc.dg/vect/pr65947-4.c: Likewise.
+ * gcc.dg/vect/pr65947-5.c: Likewise.
+ * gcc.dg/vect/pr65947-6.c: Likewise.
+ * gcc.dg/vect/pr65947-9.c: Likewise.
+ * gcc.dg/vect/pr65947-10.c: Likewise.
+ * gcc.dg/vect/pr65947-12.c: Likewise.
+ * gcc.dg/vect/pr65947-13.c: Likewise.
+ * gcc.dg/vect/pr65947-14.c: Likewise.
+ * gcc.dg/vect/pr80631-1.c: Likewise.
+ * gcc.dg/vect/pr80631-2.c: Likewise.
+ * gcc.dg/vect/vect-cond-reduc-3.c: Likewise.
+ * gcc.dg/vect/vect-cond-reduc-4.c: Likewise.
+
2019-10-24 Jakub Jelinek <jakub@redhat.com>
* c-c++-common/gomp/declare-variant-8.c: New test.
}
/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */
-/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 4 "vect" { target vect_fold_extract_last } } } */
+/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 2 "vect" { target vect_fold_extract_last } } } */
/* { dg-final { scan-tree-dump-times "condition expression based on integer induction." 2 "vect" { target { ! vect_fold_extract_last } } } } */
}
/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */
-/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 4 "vect" { target vect_fold_extract_last } } } */
+/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 2 "vect" { target vect_fold_extract_last } } } */
/* { dg-final { scan-tree-dump-not "condition expression based on integer induction." "vect" } } */
}
/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */
-/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 4 "vect" { target vect_fold_extract_last } } } */
+/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 2 "vect" { target vect_fold_extract_last } } } */
/* { dg-final { scan-tree-dump-not "condition expression based on integer induction." "vect" } } */
/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */
/* { dg-final { scan-tree-dump-times "condition expression based on integer induction." 2 "vect" { xfail vect_fold_extract_last } } } */
-/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 4 "vect" { target vect_fold_extract_last } } } */
+/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 2 "vect" { target vect_fold_extract_last } } } */
}
/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */
-/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 4 "vect" { target vect_fold_extract_last } } } */
+/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 2 "vect" { target vect_fold_extract_last } } } */
/* { dg-final { scan-tree-dump-times "condition expression based on integer induction." 2 "vect" { target { ! vect_fold_extract_last } } } } */
}
/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */
-/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 4 "vect" { target vect_fold_extract_last } } } */
+/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 2 "vect" { target vect_fold_extract_last } } } */
/* { dg-final { scan-tree-dump-not "condition expression based on integer induction." "vect" } } */
}
/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */
-/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 4 "vect" { target vect_fold_extract_last } } } */
+/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 2 "vect" { target vect_fold_extract_last } } } */
/* { dg-final { scan-tree-dump-not "condition expression based on integer induction." "vect" } } */
}
/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */
-/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 4 "vect" { target vect_fold_extract_last } } } */
+/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 2 "vect" { target vect_fold_extract_last } } } */
/* { dg-final { scan-tree-dump-times "condition expression based on integer induction." 2 "vect" { target { ! vect_fold_extract_last } } } } */
/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 1 "vect" { target { ! vect_fold_extract_last } } } } */
/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" { target vect_fold_extract_last } } } */
/* { dg-final { scan-tree-dump "loop size is greater than data size" "vect" { xfail vect_fold_extract_last } } } */
-/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 4 "vect" { target vect_fold_extract_last } } } */
+/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 2 "vect" { target vect_fold_extract_last } } } */
/* { dg-final { scan-tree-dump-not "condition expression based on integer induction." "vect" } } */
}
/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */
-/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 4 "vect" { target vect_fold_extract_last } } } */
+/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 2 "vect" { target vect_fold_extract_last } } } */
/* { dg-final { scan-tree-dump-not "condition expression based on integer induction." "vect" } } */
/* { dg-final { scan-tree-dump-not "LOOP VECTORIZED" "vect" { target { ! vect_fold_extract_last } } } } */
/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 1 "vect" { target vect_fold_extract_last } } } */
/* { dg-final { scan-tree-dump "loop size is greater than data size" "vect" { target { ! vect_fold_extract_last } } } } */
-/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 2 "vect" { target vect_fold_extract_last } } } */
+/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 1 "vect" { target vect_fold_extract_last } } } */
/* { dg-final { scan-tree-dump-not "condition expression based on integer induction." "vect" } } */
}
/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 5 "vect" { target vect_condition } } } */
-/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 10 "vect" { target vect_fold_extract_last } } } */
+/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 5 "vect" { target vect_fold_extract_last } } } */
/* { dg-final { scan-tree-dump-times "condition expression based on integer induction." 5 "vect" { target { { ! vect_fold_extract_last } && vect_condition } } } } */
/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 5 "vect" { target vect_condition } } } */
/* { dg-final { scan-tree-dump-times "condition expression based on integer induction." 5 "vect" { target vect_condition xfail vect_fold_extract_last } } } */
-/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 10 "vect" { target vect_fold_extract_last } } } */
+/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 5 "vect" { target vect_fold_extract_last } } } */
}
/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */
-/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 4 "vect" { target vect_fold_extract_last } } } */
+/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 2 "vect" { target vect_fold_extract_last } } } */
/* { dg-final { scan-tree-dump-times "condition expression based on integer induction." 2 "vect" { target { ! vect_fold_extract_last } } } } */
}
/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */
-/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 4 "vect" { target vect_fold_extract_last } } } */
+/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 2 "vect" { target vect_fold_extract_last } } } */
/* { dg-final { scan-tree-dump-times "condition expression based on integer induction." 2 "vect" { target { ! vect_fold_extract_last } } } } */
else
vec_num = 1;
- internal_fn cond_fn = get_conditional_internal_fn (code);
- vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
- bool mask_by_cond_expr = use_mask_by_cond_expr_p (code, cond_fn, vectype_in);
-
vect_model_reduction_cost (stmt_info, reduc_fn, reduction_type, ncopies,
cost_vec);
- if (loop_vinfo && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
- {
- if (reduction_type != FOLD_LEFT_REDUCTION
- && !mask_by_cond_expr
- && (cond_fn == IFN_LAST
- || !direct_internal_fn_supported_p (cond_fn, vectype_in,
- OPTIMIZE_FOR_SPEED)))
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "can't use a fully-masked loop because no"
- " conditional operation is available.\n");
- LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
- }
- else if (reduc_index == -1)
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "can't use a fully-masked loop for chained"
- " reductions.\n");
- LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
- }
- else
- vect_record_loop_mask (loop_vinfo, masks, ncopies * vec_num,
- vectype_in, NULL);
- }
if (dump_enabled_p ()
&& reduction_type == FOLD_LEFT_REDUCTION)
dump_printf_loc (MSG_NOTE, vect_location,
STMT_VINFO_DEF_TYPE (stmt_info) = vect_internal_def;
STMT_VINFO_DEF_TYPE (vect_orig_stmt (stmt_info)) = vect_internal_def;
}
+ else if (loop_vinfo && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
+ {
+ vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
+ internal_fn cond_fn = get_conditional_internal_fn (code);
+
+ if (reduction_type != FOLD_LEFT_REDUCTION
+ && !use_mask_by_cond_expr_p (code, cond_fn, vectype_in)
+ && (cond_fn == IFN_LAST
+ || !direct_internal_fn_supported_p (cond_fn, vectype_in,
+ OPTIMIZE_FOR_SPEED)))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "can't use a fully-masked loop because no"
+ " conditional operation is available.\n");
+ LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
+ }
+ else
+ vect_record_loop_mask (loop_vinfo, masks, ncopies * vec_num,
+ vectype_in, NULL);
+ }
return true;
}
poly_uint64 nunits_in;
poly_uint64 nunits_out;
tree vectype_out;
- int ncopies;
+ int ncopies, vec_num;
int j, i;
vec<tree> vec_oprnds0 = vNULL;
vec<tree> vec_oprnds1 = vNULL;
vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
case of SLP. */
if (slp_node)
- ncopies = 1;
+ {
+ ncopies = 1;
+ vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
+ }
else
- ncopies = vect_get_num_copies (loop_vinfo, vectype);
+ {
+ ncopies = vect_get_num_copies (loop_vinfo, vectype);
+ vec_num = 1;
+ }
gcc_assert (ncopies >= 1);
return false;
}
+ int reduc_idx = STMT_VINFO_REDUC_IDX (stmt_info);
+ vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
+ internal_fn cond_fn = get_conditional_internal_fn (code);
+
if (!vec_stmt) /* transformation not required. */
{
+ /* If this operation is part of a reduction, a fully-masked loop
+ should only change the active lanes of the reduction chain,
+ keeping the inactive lanes as-is. */
+ if (loop_vinfo
+ && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo)
+ && reduc_idx >= 0)
+ {
+ if (cond_fn == IFN_LAST
+ || !direct_internal_fn_supported_p (cond_fn, vectype,
+ OPTIMIZE_FOR_SPEED))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "can't use a fully-masked loop because no"
+ " conditional operation is available.\n");
+ LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
+ }
+ else
+ vect_record_loop_mask (loop_vinfo, masks, ncopies * vec_num,
+ vectype, NULL);
+ }
+
STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
DUMP_VECT_SCOPE ("vectorizable_operation");
vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec);
dump_printf_loc (MSG_NOTE, vect_location,
"transform binary/unary operation.\n");
+ bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
+
/* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
vectors with unsigned elements, but the result is signed. So, we
need to compute the MINUS_EXPR into vectype temporary and
? vec_oprnds1[i] : NULL_TREE);
vop2 = ((op_type == ternary_op)
? vec_oprnds2[i] : NULL_TREE);
- gassign *new_stmt = gimple_build_assign (vec_dest, code,
- vop0, vop1, vop2);
- new_temp = make_ssa_name (vec_dest, new_stmt);
- gimple_assign_set_lhs (new_stmt, new_temp);
- new_stmt_info
- = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
- if (vec_cvt_dest)
+ if (masked_loop_p && reduc_idx >= 0)
{
- new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp);
- gassign *new_stmt
- = gimple_build_assign (vec_cvt_dest, VIEW_CONVERT_EXPR,
- new_temp);
- new_temp = make_ssa_name (vec_cvt_dest, new_stmt);
+ /* Perform the operation on active elements only and take
+ inactive elements from the reduction chain input. */
+ gcc_assert (!vop2);
+ vop2 = reduc_idx == 1 ? vop1 : vop0;
+ tree mask = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
+ vectype, i * ncopies + j);
+ gcall *call = gimple_build_call_internal (cond_fn, 4, mask,
+ vop0, vop1, vop2);
+ new_temp = make_ssa_name (vec_dest, call);
+ gimple_call_set_lhs (call, new_temp);
+ gimple_call_set_nothrow (call, true);
+ new_stmt_info
+ = vect_finish_stmt_generation (stmt_info, call, gsi);
+ }
+ else
+ {
+ gassign *new_stmt = gimple_build_assign (vec_dest, code,
+ vop0, vop1, vop2);
+ new_temp = make_ssa_name (vec_dest, new_stmt);
gimple_assign_set_lhs (new_stmt, new_temp);
new_stmt_info
= vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
+ if (vec_cvt_dest)
+ {
+ new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp);
+ gassign *new_stmt
+ = gimple_build_assign (vec_cvt_dest, VIEW_CONVERT_EXPR,
+ new_temp);
+ new_temp = make_ssa_name (vec_cvt_dest, new_stmt);
+ gimple_assign_set_lhs (new_stmt, new_temp);
+ new_stmt_info
+ = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
+ }
}
if (slp_node)
SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
return false;
}
}
+ if (loop_vinfo
+ && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo)
+ && reduction_type == EXTRACT_LAST_REDUCTION)
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "can't yet use a fully-masked loop for"
+ " EXTRACT_LAST_REDUCTION.\n");
+ LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
+ }
if (expand_vec_cond_expr_p (vectype, comp_vectype,
cond_code))
{