From: Richard Sandiford Date: Fri, 25 Oct 2019 08:22:13 +0000 (+0000) Subject: Fix reductions for fully-masked loops X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=89d0345ad7b8d84045813972ee60557a6b511c57;p=gcc.git Fix reductions for fully-masked loops Now that vectorizable_operation vectorises most loop stmts involved in a reduction, it needs to be aware of reductions in fully-masked loops. The LOOP_VINFO_CAN_FULLY_MASK_P parts of vectorizable_reduction now only apply to cases that use vect_transform_reduction. This new way of doing things is definitely an improvement for SVE though, since it means we can lift the old restriction of not using fully-masked loops for reduction chains. 2019-10-25 Richard Sandiford gcc/ * tree-vect-loop.c (vectorizable_reduction): Restrict the LOOP_VINFO_CAN_FULLY_MASK_P handling to cases that will be handled by vect_transform_reduction. Allow fully-masked loops to be used with reduction chains. * tree-vect-stmts.c (vectorizable_operation): Handle reduction operations in fully-masked loops. (vectorizable_condition): Reject EXTRACT_LAST_REDUCTION operations in fully-masked loops. gcc/testsuite/ * gcc.dg/vect/pr65947-1.c: No longer expect doubled dump lines for FOLD_EXTRACT_LAST reductions. * gcc.dg/vect/pr65947-2.c: Likewise. * gcc.dg/vect/pr65947-3.c: Likewise. * gcc.dg/vect/pr65947-4.c: Likewise. * gcc.dg/vect/pr65947-5.c: Likewise. * gcc.dg/vect/pr65947-6.c: Likewise. * gcc.dg/vect/pr65947-9.c: Likewise. * gcc.dg/vect/pr65947-10.c: Likewise. * gcc.dg/vect/pr65947-12.c: Likewise. * gcc.dg/vect/pr65947-13.c: Likewise. * gcc.dg/vect/pr65947-14.c: Likewise. * gcc.dg/vect/pr80631-1.c: Likewise. * gcc.dg/vect/pr80631-2.c: Likewise. * gcc.dg/vect/vect-cond-reduc-3.c: Likewise. * gcc.dg/vect/vect-cond-reduc-4.c: Likewise. From-SVN: r277438 --- diff --git a/gcc/ChangeLog b/gcc/ChangeLog index bb64e932ae8..89448b1166a 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,14 @@ +2019-10-25 Richard Sandiford + + * tree-vect-loop.c (vectorizable_reduction): Restrict the + LOOP_VINFO_CAN_FULLY_MASK_P handling to cases that will be + handled by vect_transform_reduction. Allow fully-masked loops + to be used with reduction chains. + * tree-vect-stmts.c (vectorizable_operation): Handle reduction + operations in fully-masked loops. + (vectorizable_condition): Reject EXTRACT_LAST_REDUCTION + operations in fully-masked loops. + 2019-10-25 Richard Biener * tree-vect-loop.c (vectorizable_reduction): Verify diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 8e652ab3cbf..4a98f6cab8a 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,22 @@ +2019-10-25 Richard Sandiford + + * gcc.dg/vect/pr65947-1.c: No longer expect doubled dump lines + for FOLD_EXTRACT_LAST reductions. + * gcc.dg/vect/pr65947-2.c: Likewise. + * gcc.dg/vect/pr65947-3.c: Likewise. + * gcc.dg/vect/pr65947-4.c: Likewise. + * gcc.dg/vect/pr65947-5.c: Likewise. + * gcc.dg/vect/pr65947-6.c: Likewise. + * gcc.dg/vect/pr65947-9.c: Likewise. + * gcc.dg/vect/pr65947-10.c: Likewise. + * gcc.dg/vect/pr65947-12.c: Likewise. + * gcc.dg/vect/pr65947-13.c: Likewise. + * gcc.dg/vect/pr65947-14.c: Likewise. + * gcc.dg/vect/pr80631-1.c: Likewise. + * gcc.dg/vect/pr80631-2.c: Likewise. + * gcc.dg/vect/vect-cond-reduc-3.c: Likewise. + * gcc.dg/vect/vect-cond-reduc-4.c: Likewise. + 2019-10-24 Jakub Jelinek * c-c++-common/gomp/declare-variant-8.c: New test. diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-1.c b/gcc/testsuite/gcc.dg/vect/pr65947-1.c index b81baed914c..8ebc385053c 100644 --- a/gcc/testsuite/gcc.dg/vect/pr65947-1.c +++ b/gcc/testsuite/gcc.dg/vect/pr65947-1.c @@ -41,5 +41,5 @@ main (void) } /* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */ -/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 4 "vect" { target vect_fold_extract_last } } } */ +/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 2 "vect" { target vect_fold_extract_last } } } */ /* { dg-final { scan-tree-dump-times "condition expression based on integer induction." 2 "vect" { target { ! vect_fold_extract_last } } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-10.c b/gcc/testsuite/gcc.dg/vect/pr65947-10.c index f37aecab082..e4a1d9419c2 100644 --- a/gcc/testsuite/gcc.dg/vect/pr65947-10.c +++ b/gcc/testsuite/gcc.dg/vect/pr65947-10.c @@ -42,6 +42,6 @@ main (void) } /* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */ -/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 4 "vect" { target vect_fold_extract_last } } } */ +/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 2 "vect" { target vect_fold_extract_last } } } */ /* { dg-final { scan-tree-dump-not "condition expression based on integer induction." "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-12.c b/gcc/testsuite/gcc.dg/vect/pr65947-12.c index b84fd41bc63..a47f4146a29 100644 --- a/gcc/testsuite/gcc.dg/vect/pr65947-12.c +++ b/gcc/testsuite/gcc.dg/vect/pr65947-12.c @@ -42,5 +42,5 @@ main (void) } /* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */ -/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 4 "vect" { target vect_fold_extract_last } } } */ +/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 2 "vect" { target vect_fold_extract_last } } } */ /* { dg-final { scan-tree-dump-not "condition expression based on integer induction." "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-13.c b/gcc/testsuite/gcc.dg/vect/pr65947-13.c index 4ad5262019a..b0755c0be65 100644 --- a/gcc/testsuite/gcc.dg/vect/pr65947-13.c +++ b/gcc/testsuite/gcc.dg/vect/pr65947-13.c @@ -42,4 +42,4 @@ main (void) /* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */ /* { dg-final { scan-tree-dump-times "condition expression based on integer induction." 2 "vect" { xfail vect_fold_extract_last } } } */ -/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 4 "vect" { target vect_fold_extract_last } } } */ +/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 2 "vect" { target vect_fold_extract_last } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-14.c b/gcc/testsuite/gcc.dg/vect/pr65947-14.c index d0194f237d4..c0df587e7fa 100644 --- a/gcc/testsuite/gcc.dg/vect/pr65947-14.c +++ b/gcc/testsuite/gcc.dg/vect/pr65947-14.c @@ -41,5 +41,5 @@ main (void) } /* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */ -/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 4 "vect" { target vect_fold_extract_last } } } */ +/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 2 "vect" { target vect_fold_extract_last } } } */ /* { dg-final { scan-tree-dump-times "condition expression based on integer induction." 2 "vect" { target { ! vect_fold_extract_last } } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-2.c b/gcc/testsuite/gcc.dg/vect/pr65947-2.c index 18d33c436a5..58ba5f764d0 100644 --- a/gcc/testsuite/gcc.dg/vect/pr65947-2.c +++ b/gcc/testsuite/gcc.dg/vect/pr65947-2.c @@ -42,5 +42,5 @@ main (void) } /* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */ -/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 4 "vect" { target vect_fold_extract_last } } } */ +/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 2 "vect" { target vect_fold_extract_last } } } */ /* { dg-final { scan-tree-dump-not "condition expression based on integer induction." "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-3.c b/gcc/testsuite/gcc.dg/vect/pr65947-3.c index 427abdb4140..6b4077e1a62 100644 --- a/gcc/testsuite/gcc.dg/vect/pr65947-3.c +++ b/gcc/testsuite/gcc.dg/vect/pr65947-3.c @@ -52,5 +52,5 @@ main (void) } /* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */ -/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 4 "vect" { target vect_fold_extract_last } } } */ +/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 2 "vect" { target vect_fold_extract_last } } } */ /* { dg-final { scan-tree-dump-not "condition expression based on integer induction." "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-4.c b/gcc/testsuite/gcc.dg/vect/pr65947-4.c index 40557104715..99f9765038a 100644 --- a/gcc/testsuite/gcc.dg/vect/pr65947-4.c +++ b/gcc/testsuite/gcc.dg/vect/pr65947-4.c @@ -41,6 +41,6 @@ main (void) } /* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */ -/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 4 "vect" { target vect_fold_extract_last } } } */ +/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 2 "vect" { target vect_fold_extract_last } } } */ /* { dg-final { scan-tree-dump-times "condition expression based on integer induction." 2 "vect" { target { ! vect_fold_extract_last } } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-5.c b/gcc/testsuite/gcc.dg/vect/pr65947-5.c index c91b648aa05..4e3f765cd0c 100644 --- a/gcc/testsuite/gcc.dg/vect/pr65947-5.c +++ b/gcc/testsuite/gcc.dg/vect/pr65947-5.c @@ -53,5 +53,5 @@ main (void) /* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 1 "vect" { target { ! vect_fold_extract_last } } } } */ /* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" { target vect_fold_extract_last } } } */ /* { dg-final { scan-tree-dump "loop size is greater than data size" "vect" { xfail vect_fold_extract_last } } } */ -/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 4 "vect" { target vect_fold_extract_last } } } */ +/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 2 "vect" { target vect_fold_extract_last } } } */ /* { dg-final { scan-tree-dump-not "condition expression based on integer induction." "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-6.c b/gcc/testsuite/gcc.dg/vect/pr65947-6.c index b072c8d33a2..dde96d7a553 100644 --- a/gcc/testsuite/gcc.dg/vect/pr65947-6.c +++ b/gcc/testsuite/gcc.dg/vect/pr65947-6.c @@ -41,5 +41,5 @@ main (void) } /* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */ -/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 4 "vect" { target vect_fold_extract_last } } } */ +/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 2 "vect" { target vect_fold_extract_last } } } */ /* { dg-final { scan-tree-dump-not "condition expression based on integer induction." "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-9.c b/gcc/testsuite/gcc.dg/vect/pr65947-9.c index e43e0e473be..1f295306016 100644 --- a/gcc/testsuite/gcc.dg/vect/pr65947-9.c +++ b/gcc/testsuite/gcc.dg/vect/pr65947-9.c @@ -48,5 +48,5 @@ main () /* { dg-final { scan-tree-dump-not "LOOP VECTORIZED" "vect" { target { ! vect_fold_extract_last } } } } */ /* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 1 "vect" { target vect_fold_extract_last } } } */ /* { dg-final { scan-tree-dump "loop size is greater than data size" "vect" { target { ! vect_fold_extract_last } } } } */ -/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 2 "vect" { target vect_fold_extract_last } } } */ +/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 1 "vect" { target vect_fold_extract_last } } } */ /* { dg-final { scan-tree-dump-not "condition expression based on integer induction." "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/pr80631-1.c b/gcc/testsuite/gcc.dg/vect/pr80631-1.c index b531fe6dbf9..f430debb09d 100644 --- a/gcc/testsuite/gcc.dg/vect/pr80631-1.c +++ b/gcc/testsuite/gcc.dg/vect/pr80631-1.c @@ -72,5 +72,5 @@ main () } /* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 5 "vect" { target vect_condition } } } */ -/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 10 "vect" { target vect_fold_extract_last } } } */ +/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 5 "vect" { target vect_fold_extract_last } } } */ /* { dg-final { scan-tree-dump-times "condition expression based on integer induction." 5 "vect" { target { { ! vect_fold_extract_last } && vect_condition } } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/pr80631-2.c b/gcc/testsuite/gcc.dg/vect/pr80631-2.c index 07f1a721e47..ca786f6f647 100644 --- a/gcc/testsuite/gcc.dg/vect/pr80631-2.c +++ b/gcc/testsuite/gcc.dg/vect/pr80631-2.c @@ -73,4 +73,4 @@ main () /* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 5 "vect" { target vect_condition } } } */ /* { dg-final { scan-tree-dump-times "condition expression based on integer induction." 5 "vect" { target vect_condition xfail vect_fold_extract_last } } } */ -/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 10 "vect" { target vect_fold_extract_last } } } */ +/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 5 "vect" { target vect_fold_extract_last } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-cond-reduc-3.c b/gcc/testsuite/gcc.dg/vect/vect-cond-reduc-3.c index a5b3849a8c3..de9921cfcec 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-cond-reduc-3.c +++ b/gcc/testsuite/gcc.dg/vect/vect-cond-reduc-3.c @@ -40,6 +40,6 @@ main (void) } /* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */ -/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 4 "vect" { target vect_fold_extract_last } } } */ +/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 2 "vect" { target vect_fold_extract_last } } } */ /* { dg-final { scan-tree-dump-times "condition expression based on integer induction." 2 "vect" { target { ! vect_fold_extract_last } } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-cond-reduc-4.c b/gcc/testsuite/gcc.dg/vect/vect-cond-reduc-4.c index 6b6d17fb93c..543504f6b0a 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-cond-reduc-4.c +++ b/gcc/testsuite/gcc.dg/vect/vect-cond-reduc-4.c @@ -40,6 +40,6 @@ main (void) } /* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */ -/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 4 "vect" { target vect_fold_extract_last } } } */ +/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 2 "vect" { target vect_fold_extract_last } } } */ /* { dg-final { scan-tree-dump-times "condition expression based on integer induction." 2 "vect" { target { ! vect_fold_extract_last } } } } */ diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c index d0fd7bdbf80..3b58ceec79e 100644 --- a/gcc/tree-vect-loop.c +++ b/gcc/tree-vect-loop.c @@ -6319,38 +6319,8 @@ vectorizable_reduction (stmt_vec_info stmt_info, slp_tree slp_node, else vec_num = 1; - internal_fn cond_fn = get_conditional_internal_fn (code); - vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo); - bool mask_by_cond_expr = use_mask_by_cond_expr_p (code, cond_fn, vectype_in); - vect_model_reduction_cost (stmt_info, reduc_fn, reduction_type, ncopies, cost_vec); - if (loop_vinfo && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo)) - { - if (reduction_type != FOLD_LEFT_REDUCTION - && !mask_by_cond_expr - && (cond_fn == IFN_LAST - || !direct_internal_fn_supported_p (cond_fn, vectype_in, - OPTIMIZE_FOR_SPEED))) - { - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "can't use a fully-masked loop because no" - " conditional operation is available.\n"); - LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false; - } - else if (reduc_index == -1) - { - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "can't use a fully-masked loop for chained" - " reductions.\n"); - LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false; - } - else - vect_record_loop_mask (loop_vinfo, masks, ncopies * vec_num, - vectype_in, NULL); - } if (dump_enabled_p () && reduction_type == FOLD_LEFT_REDUCTION) dump_printf_loc (MSG_NOTE, vect_location, @@ -6367,6 +6337,27 @@ vectorizable_reduction (stmt_vec_info stmt_info, slp_tree slp_node, STMT_VINFO_DEF_TYPE (stmt_info) = vect_internal_def; STMT_VINFO_DEF_TYPE (vect_orig_stmt (stmt_info)) = vect_internal_def; } + else if (loop_vinfo && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo)) + { + vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo); + internal_fn cond_fn = get_conditional_internal_fn (code); + + if (reduction_type != FOLD_LEFT_REDUCTION + && !use_mask_by_cond_expr_p (code, cond_fn, vectype_in) + && (cond_fn == IFN_LAST + || !direct_internal_fn_supported_p (cond_fn, vectype_in, + OPTIMIZE_FOR_SPEED))) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "can't use a fully-masked loop because no" + " conditional operation is available.\n"); + LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false; + } + else + vect_record_loop_mask (loop_vinfo, masks, ncopies * vec_num, + vectype_in, NULL); + } return true; } diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c index 02b95f51abb..19ac82fe4e3 100644 --- a/gcc/tree-vect-stmts.c +++ b/gcc/tree-vect-stmts.c @@ -5929,7 +5929,7 @@ vectorizable_operation (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, poly_uint64 nunits_in; poly_uint64 nunits_out; tree vectype_out; - int ncopies; + int ncopies, vec_num; int j, i; vec vec_oprnds0 = vNULL; vec vec_oprnds1 = vNULL; @@ -6066,9 +6066,15 @@ vectorizable_operation (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in case of SLP. */ if (slp_node) - ncopies = 1; + { + ncopies = 1; + vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); + } else - ncopies = vect_get_num_copies (loop_vinfo, vectype); + { + ncopies = vect_get_num_copies (loop_vinfo, vectype); + vec_num = 1; + } gcc_assert (ncopies >= 1); @@ -6121,8 +6127,34 @@ vectorizable_operation (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, return false; } + int reduc_idx = STMT_VINFO_REDUC_IDX (stmt_info); + vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL); + internal_fn cond_fn = get_conditional_internal_fn (code); + if (!vec_stmt) /* transformation not required. */ { + /* If this operation is part of a reduction, a fully-masked loop + should only change the active lanes of the reduction chain, + keeping the inactive lanes as-is. */ + if (loop_vinfo + && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) + && reduc_idx >= 0) + { + if (cond_fn == IFN_LAST + || !direct_internal_fn_supported_p (cond_fn, vectype, + OPTIMIZE_FOR_SPEED)) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "can't use a fully-masked loop because no" + " conditional operation is available.\n"); + LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false; + } + else + vect_record_loop_mask (loop_vinfo, masks, ncopies * vec_num, + vectype, NULL); + } + STMT_VINFO_TYPE (stmt_info) = op_vec_info_type; DUMP_VECT_SCOPE ("vectorizable_operation"); vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec); @@ -6135,6 +6167,8 @@ vectorizable_operation (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, dump_printf_loc (MSG_NOTE, vect_location, "transform binary/unary operation.\n"); + bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo); + /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as vectors with unsigned elements, but the result is signed. So, we need to compute the MINUS_EXPR into vectype temporary and @@ -6252,22 +6286,41 @@ vectorizable_operation (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, ? vec_oprnds1[i] : NULL_TREE); vop2 = ((op_type == ternary_op) ? vec_oprnds2[i] : NULL_TREE); - gassign *new_stmt = gimple_build_assign (vec_dest, code, - vop0, vop1, vop2); - new_temp = make_ssa_name (vec_dest, new_stmt); - gimple_assign_set_lhs (new_stmt, new_temp); - new_stmt_info - = vect_finish_stmt_generation (stmt_info, new_stmt, gsi); - if (vec_cvt_dest) + if (masked_loop_p && reduc_idx >= 0) { - new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp); - gassign *new_stmt - = gimple_build_assign (vec_cvt_dest, VIEW_CONVERT_EXPR, - new_temp); - new_temp = make_ssa_name (vec_cvt_dest, new_stmt); + /* Perform the operation on active elements only and take + inactive elements from the reduction chain input. */ + gcc_assert (!vop2); + vop2 = reduc_idx == 1 ? vop1 : vop0; + tree mask = vect_get_loop_mask (gsi, masks, vec_num * ncopies, + vectype, i * ncopies + j); + gcall *call = gimple_build_call_internal (cond_fn, 4, mask, + vop0, vop1, vop2); + new_temp = make_ssa_name (vec_dest, call); + gimple_call_set_lhs (call, new_temp); + gimple_call_set_nothrow (call, true); + new_stmt_info + = vect_finish_stmt_generation (stmt_info, call, gsi); + } + else + { + gassign *new_stmt = gimple_build_assign (vec_dest, code, + vop0, vop1, vop2); + new_temp = make_ssa_name (vec_dest, new_stmt); gimple_assign_set_lhs (new_stmt, new_temp); new_stmt_info = vect_finish_stmt_generation (stmt_info, new_stmt, gsi); + if (vec_cvt_dest) + { + new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp); + gassign *new_stmt + = gimple_build_assign (vec_cvt_dest, VIEW_CONVERT_EXPR, + new_temp); + new_temp = make_ssa_name (vec_cvt_dest, new_stmt); + gimple_assign_set_lhs (new_stmt, new_temp); + new_stmt_info + = vect_finish_stmt_generation (stmt_info, new_stmt, gsi); + } } if (slp_node) SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info); @@ -9997,6 +10050,16 @@ vectorizable_condition (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, return false; } } + if (loop_vinfo + && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) + && reduction_type == EXTRACT_LAST_REDUCTION) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "can't yet use a fully-masked loop for" + " EXTRACT_LAST_REDUCTION.\n"); + LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false; + } if (expand_vec_cond_expr_p (vectype, comp_vectype, cond_code)) {