Fix reductions for fully-masked loops
authorRichard Sandiford <richard.sandiford@arm.com>
Fri, 25 Oct 2019 08:22:13 +0000 (08:22 +0000)
committerRichard Sandiford <rsandifo@gcc.gnu.org>
Fri, 25 Oct 2019 08:22:13 +0000 (08:22 +0000)
Now that vectorizable_operation vectorises most loop stmts involved
in a reduction, it needs to be aware of reductions in fully-masked loops.
The LOOP_VINFO_CAN_FULLY_MASK_P parts of vectorizable_reduction now only
apply to cases that use vect_transform_reduction.

This new way of doing things is definitely an improvement for SVE though,
since it means we can lift the old restriction of not using fully-masked
loops for reduction chains.

2019-10-25  Richard Sandiford  <richard.sandiford@arm.com>

gcc/
* tree-vect-loop.c (vectorizable_reduction): Restrict the
LOOP_VINFO_CAN_FULLY_MASK_P handling to cases that will be
handled by vect_transform_reduction.  Allow fully-masked loops
to be used with reduction chains.
* tree-vect-stmts.c (vectorizable_operation): Handle reduction
operations in fully-masked loops.
(vectorizable_condition): Reject EXTRACT_LAST_REDUCTION
operations in fully-masked loops.

gcc/testsuite/
* gcc.dg/vect/pr65947-1.c: No longer expect doubled dump lines
for FOLD_EXTRACT_LAST reductions.
* gcc.dg/vect/pr65947-2.c: Likewise.
* gcc.dg/vect/pr65947-3.c: Likewise.
* gcc.dg/vect/pr65947-4.c: Likewise.
* gcc.dg/vect/pr65947-5.c: Likewise.
* gcc.dg/vect/pr65947-6.c: Likewise.
* gcc.dg/vect/pr65947-9.c: Likewise.
* gcc.dg/vect/pr65947-10.c: Likewise.
* gcc.dg/vect/pr65947-12.c: Likewise.
* gcc.dg/vect/pr65947-13.c: Likewise.
* gcc.dg/vect/pr65947-14.c: Likewise.
* gcc.dg/vect/pr80631-1.c: Likewise.
* gcc.dg/vect/pr80631-2.c: Likewise.
* gcc.dg/vect/vect-cond-reduc-3.c: Likewise.
* gcc.dg/vect/vect-cond-reduc-4.c: Likewise.

From-SVN: r277438

19 files changed:
gcc/ChangeLog
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.dg/vect/pr65947-1.c
gcc/testsuite/gcc.dg/vect/pr65947-10.c
gcc/testsuite/gcc.dg/vect/pr65947-12.c
gcc/testsuite/gcc.dg/vect/pr65947-13.c
gcc/testsuite/gcc.dg/vect/pr65947-14.c
gcc/testsuite/gcc.dg/vect/pr65947-2.c
gcc/testsuite/gcc.dg/vect/pr65947-3.c
gcc/testsuite/gcc.dg/vect/pr65947-4.c
gcc/testsuite/gcc.dg/vect/pr65947-5.c
gcc/testsuite/gcc.dg/vect/pr65947-6.c
gcc/testsuite/gcc.dg/vect/pr65947-9.c
gcc/testsuite/gcc.dg/vect/pr80631-1.c
gcc/testsuite/gcc.dg/vect/pr80631-2.c
gcc/testsuite/gcc.dg/vect/vect-cond-reduc-3.c
gcc/testsuite/gcc.dg/vect/vect-cond-reduc-4.c
gcc/tree-vect-loop.c
gcc/tree-vect-stmts.c

index bb64e932ae8c76ccbc033a722546c41a939b4807..89448b1166aacf68434910fa2cd3b3fe9cc37f83 100644 (file)
@@ -1,3 +1,14 @@
+2019-10-25  Richard Sandiford  <richard.sandiford@arm.com>
+
+       * tree-vect-loop.c (vectorizable_reduction): Restrict the
+       LOOP_VINFO_CAN_FULLY_MASK_P handling to cases that will be
+       handled by vect_transform_reduction.  Allow fully-masked loops
+       to be used with reduction chains.
+       * tree-vect-stmts.c (vectorizable_operation): Handle reduction
+       operations in fully-masked loops.
+       (vectorizable_condition): Reject EXTRACT_LAST_REDUCTION
+       operations in fully-masked loops.
+
 2019-10-25  Richard Biener  <rguenther@suse.de>
 
        * tree-vect-loop.c (vectorizable_reduction): Verify
index 8e652ab3cbf3a463ffd092e8d6357fcaf953d49c..4a98f6cab8a8e442e95cebdf0e263f9f978edf39 100644 (file)
@@ -1,3 +1,22 @@
+2019-10-25  Richard Sandiford  <richard.sandiford@arm.com>
+
+       * gcc.dg/vect/pr65947-1.c: No longer expect doubled dump lines
+       for FOLD_EXTRACT_LAST reductions.
+       * gcc.dg/vect/pr65947-2.c: Likewise.
+       * gcc.dg/vect/pr65947-3.c: Likewise.
+       * gcc.dg/vect/pr65947-4.c: Likewise.
+       * gcc.dg/vect/pr65947-5.c: Likewise.
+       * gcc.dg/vect/pr65947-6.c: Likewise.
+       * gcc.dg/vect/pr65947-9.c: Likewise.
+       * gcc.dg/vect/pr65947-10.c: Likewise.
+       * gcc.dg/vect/pr65947-12.c: Likewise.
+       * gcc.dg/vect/pr65947-13.c: Likewise.
+       * gcc.dg/vect/pr65947-14.c: Likewise.
+       * gcc.dg/vect/pr80631-1.c: Likewise.
+       * gcc.dg/vect/pr80631-2.c: Likewise.
+       * gcc.dg/vect/vect-cond-reduc-3.c: Likewise.
+       * gcc.dg/vect/vect-cond-reduc-4.c: Likewise.
+
 2019-10-24  Jakub Jelinek  <jakub@redhat.com>
 
        * c-c++-common/gomp/declare-variant-8.c: New test.
index b81baed914c8b606ffd60dfad863d79fb67eb8e7..8ebc385053cd737e800e6dff1c3843a514f274cd 100644 (file)
@@ -41,5 +41,5 @@ main (void)
 }
 
 /* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */
-/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 4 "vect" { target vect_fold_extract_last } } } */
+/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 2 "vect" { target vect_fold_extract_last } } } */
 /* { dg-final { scan-tree-dump-times "condition expression based on integer induction." 2 "vect" { target { ! vect_fold_extract_last } } } } */
index f37aecab082468722ed7cd9c7858b3f59fb28209..e4a1d9419c21e204f0492318aa089db8f8b915c3 100644 (file)
@@ -42,6 +42,6 @@ main (void)
 }
 
 /* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */
-/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 4 "vect" { target vect_fold_extract_last } } } */
+/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 2 "vect" { target vect_fold_extract_last } } } */
 /* { dg-final { scan-tree-dump-not "condition expression based on integer induction." "vect" } } */
 
index b84fd41bc63ef89874ff9ae2ba28fc7c5bf63d6a..a47f4146a29a6d584a6a531dc22746f9d299cecb 100644 (file)
@@ -42,5 +42,5 @@ main (void)
 }
 
 /* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */
-/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 4 "vect" { target vect_fold_extract_last } } } */
+/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 2 "vect" { target vect_fold_extract_last } } } */
 /* { dg-final { scan-tree-dump-not "condition expression based on integer induction." "vect" } } */
index 4ad5262019a6bb50608f5569b32af8af1602a9d8..b0755c0be651187c09bae258f9e072bb54f211ab 100644 (file)
@@ -42,4 +42,4 @@ main (void)
 
 /* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */
 /* { dg-final { scan-tree-dump-times "condition expression based on integer induction." 2 "vect" { xfail vect_fold_extract_last } } } */
-/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 4 "vect" { target vect_fold_extract_last } } } */
+/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 2 "vect" { target vect_fold_extract_last } } } */
index d0194f237d4dc28079711366b5433e6cd975ad81..c0df587e7fa62265fe6a8d660d1f131fefb3c6fc 100644 (file)
@@ -41,5 +41,5 @@ main (void)
 }
 
 /* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */
-/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 4 "vect" { target vect_fold_extract_last } } } */
+/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 2 "vect" { target vect_fold_extract_last } } } */
 /* { dg-final { scan-tree-dump-times "condition expression based on integer induction." 2 "vect" { target { ! vect_fold_extract_last } } } } */
index 18d33c436a526fb091163a9b82f17e1198f85cd3..58ba5f764d08d823710496c49b0c13286ee2e872 100644 (file)
@@ -42,5 +42,5 @@ main (void)
 }
 
 /* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */
-/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 4 "vect" { target vect_fold_extract_last } } } */
+/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 2 "vect" { target vect_fold_extract_last } } } */
 /* { dg-final { scan-tree-dump-not "condition expression based on integer induction." "vect" } } */
index 427abdb4140090cec89835eaa4b72d3c874d217f..6b4077e1a62b255f7658acb71573efcbd068cf17 100644 (file)
@@ -52,5 +52,5 @@ main (void)
 }
 
 /* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */
-/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 4 "vect" { target vect_fold_extract_last } } } */
+/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 2 "vect" { target vect_fold_extract_last } } } */
 /* { dg-final { scan-tree-dump-not "condition expression based on integer induction." "vect" } } */
index 405571047158143e8b01e320961602d1e088bf1a..99f9765038a3750df042c825462f65efea4b743c 100644 (file)
@@ -41,6 +41,6 @@ main (void)
 }
 
 /* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */
-/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 4 "vect" { target vect_fold_extract_last } } } */
+/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 2 "vect" { target vect_fold_extract_last } } } */
 /* { dg-final { scan-tree-dump-times "condition expression based on integer induction." 2 "vect" { target { ! vect_fold_extract_last } } } } */
 
index c91b648aa056c670d391111a0fea2efa50ca8ea0..4e3f765cd0cf5680ee17495a610db73449464e3c 100644 (file)
@@ -53,5 +53,5 @@ main (void)
 /* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 1 "vect" { target { ! vect_fold_extract_last } } } } */
 /* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" { target vect_fold_extract_last } } } */
 /* { dg-final { scan-tree-dump "loop size is greater than data size" "vect" { xfail vect_fold_extract_last } } } */
-/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 4 "vect" { target vect_fold_extract_last } } } */
+/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 2 "vect" { target vect_fold_extract_last } } } */
 /* { dg-final { scan-tree-dump-not "condition expression based on integer induction." "vect" } } */
index b072c8d33a2143426556aefbda592df1c7ced7c4..dde96d7a553cce2a588c967dcb87d4ce49777a4b 100644 (file)
@@ -41,5 +41,5 @@ main (void)
 }
 
 /* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */
-/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 4 "vect" { target vect_fold_extract_last } } } */
+/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 2 "vect" { target vect_fold_extract_last } } } */
 /* { dg-final { scan-tree-dump-not "condition expression based on integer induction." "vect" } } */
index e43e0e473bed36ca7a34ba2dd62b04499f6e9de1..1f2953060166f6029661941bb6bcfa33b1e9f46b 100644 (file)
@@ -48,5 +48,5 @@ main ()
 /* { dg-final { scan-tree-dump-not "LOOP VECTORIZED" "vect" { target { ! vect_fold_extract_last } } } } */
 /* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 1 "vect" { target vect_fold_extract_last } } } */
 /* { dg-final { scan-tree-dump "loop size is greater than data size" "vect" { target { ! vect_fold_extract_last } } } } */
-/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 2 "vect" { target vect_fold_extract_last } } } */
+/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 1 "vect" { target vect_fold_extract_last } } } */
 /* { dg-final { scan-tree-dump-not "condition expression based on integer induction." "vect" } } */
index b531fe6dbf9420baa973ea178a7cd540f2706e80..f430debb09d8d71764bf9da45cff10abe005236e 100644 (file)
@@ -72,5 +72,5 @@ main ()
 }
 
 /* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 5 "vect" { target vect_condition } } } */
-/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 10 "vect" { target vect_fold_extract_last } } } */
+/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 5 "vect" { target vect_fold_extract_last } } } */
 /* { dg-final { scan-tree-dump-times "condition expression based on integer induction." 5 "vect" { target { { ! vect_fold_extract_last } && vect_condition } } } } */
index 07f1a721e477a15caac29d80257dde086f82d9aa..ca786f6f6471c2970da5fe63da6288f3fb65e08f 100644 (file)
@@ -73,4 +73,4 @@ main ()
 
 /* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 5 "vect" { target vect_condition } } } */
 /* { dg-final { scan-tree-dump-times "condition expression based on integer induction." 5 "vect" { target vect_condition xfail vect_fold_extract_last } } } */
-/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 10 "vect" { target vect_fold_extract_last } } } */
+/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 5 "vect" { target vect_fold_extract_last } } } */
index a5b3849a8c35089e3e9b18f6bfffea4d483364a4..de9921cfcec1d729b413a3b9c4de99f217c089d9 100644 (file)
@@ -40,6 +40,6 @@ main (void)
 }
 
 /* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */
-/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 4 "vect" { target vect_fold_extract_last } } } */
+/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 2 "vect" { target vect_fold_extract_last } } } */
 /* { dg-final { scan-tree-dump-times "condition expression based on integer induction." 2 "vect" { target { ! vect_fold_extract_last } } } } */
 
index 6b6d17fb93c7d3a4f8d589df6fb5a80900e56a78..543504f6b0aabeecee8faf466bab58705fb88bb8 100644 (file)
@@ -40,6 +40,6 @@ main (void)
 }
 
 /* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */
-/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 4 "vect" { target vect_fold_extract_last } } } */
+/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 2 "vect" { target vect_fold_extract_last } } } */
 /* { dg-final { scan-tree-dump-times "condition expression based on integer induction." 2 "vect" { target { ! vect_fold_extract_last } } } } */
 
index d0fd7bdbf803583407617644b7190f8e95c9670b..3b58ceec79e0bd1f03711837cf0994b205d35296 100644 (file)
@@ -6319,38 +6319,8 @@ vectorizable_reduction (stmt_vec_info stmt_info, slp_tree slp_node,
   else
     vec_num = 1;
 
-  internal_fn cond_fn = get_conditional_internal_fn (code);
-  vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
-  bool mask_by_cond_expr = use_mask_by_cond_expr_p (code, cond_fn, vectype_in);
-
   vect_model_reduction_cost (stmt_info, reduc_fn, reduction_type, ncopies,
                             cost_vec);
-  if (loop_vinfo && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
-    {
-      if (reduction_type != FOLD_LEFT_REDUCTION
-         && !mask_by_cond_expr
-         && (cond_fn == IFN_LAST
-             || !direct_internal_fn_supported_p (cond_fn, vectype_in,
-                                                 OPTIMIZE_FOR_SPEED)))
-       {
-         if (dump_enabled_p ())
-           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-                            "can't use a fully-masked loop because no"
-                            " conditional operation is available.\n");
-         LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
-       }
-      else if (reduc_index == -1)
-       {
-         if (dump_enabled_p ())
-           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-                            "can't use a fully-masked loop for chained"
-                            " reductions.\n");
-         LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
-       }
-      else
-       vect_record_loop_mask (loop_vinfo, masks, ncopies * vec_num,
-                              vectype_in, NULL);
-    }
   if (dump_enabled_p ()
       && reduction_type == FOLD_LEFT_REDUCTION)
     dump_printf_loc (MSG_NOTE, vect_location,
@@ -6367,6 +6337,27 @@ vectorizable_reduction (stmt_vec_info stmt_info, slp_tree slp_node,
       STMT_VINFO_DEF_TYPE (stmt_info) = vect_internal_def;
       STMT_VINFO_DEF_TYPE (vect_orig_stmt (stmt_info)) = vect_internal_def;
     }
+  else if (loop_vinfo && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
+    {
+      vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
+      internal_fn cond_fn = get_conditional_internal_fn (code);
+
+      if (reduction_type != FOLD_LEFT_REDUCTION
+         && !use_mask_by_cond_expr_p (code, cond_fn, vectype_in)
+         && (cond_fn == IFN_LAST
+             || !direct_internal_fn_supported_p (cond_fn, vectype_in,
+                                                 OPTIMIZE_FOR_SPEED)))
+       {
+         if (dump_enabled_p ())
+           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+                            "can't use a fully-masked loop because no"
+                            " conditional operation is available.\n");
+         LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
+       }
+      else
+       vect_record_loop_mask (loop_vinfo, masks, ncopies * vec_num,
+                              vectype_in, NULL);
+    }
   return true;
 }
 
index 02b95f51abbe66a10d13658a480f2580a3372c25..19ac82fe4e3c34c3776bd7ef3df40a06660abdb5 100644 (file)
@@ -5929,7 +5929,7 @@ vectorizable_operation (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
   poly_uint64 nunits_in;
   poly_uint64 nunits_out;
   tree vectype_out;
-  int ncopies;
+  int ncopies, vec_num;
   int j, i;
   vec<tree> vec_oprnds0 = vNULL;
   vec<tree> vec_oprnds1 = vNULL;
@@ -6066,9 +6066,15 @@ vectorizable_operation (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
      case of SLP.  */
   if (slp_node)
-    ncopies = 1;
+    {
+      ncopies = 1;
+      vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
+    }
   else
-    ncopies = vect_get_num_copies (loop_vinfo, vectype);
+    {
+      ncopies = vect_get_num_copies (loop_vinfo, vectype);
+      vec_num = 1;
+    }
 
   gcc_assert (ncopies >= 1);
 
@@ -6121,8 +6127,34 @@ vectorizable_operation (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
       return false;
     }
 
+  int reduc_idx = STMT_VINFO_REDUC_IDX (stmt_info);
+  vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
+  internal_fn cond_fn = get_conditional_internal_fn (code);
+
   if (!vec_stmt) /* transformation not required.  */
     {
+      /* If this operation is part of a reduction, a fully-masked loop
+        should only change the active lanes of the reduction chain,
+        keeping the inactive lanes as-is.  */
+      if (loop_vinfo
+         && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo)
+         && reduc_idx >= 0)
+       {
+         if (cond_fn == IFN_LAST
+             || !direct_internal_fn_supported_p (cond_fn, vectype,
+                                                 OPTIMIZE_FOR_SPEED))
+           {
+             if (dump_enabled_p ())
+               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+                                "can't use a fully-masked loop because no"
+                                " conditional operation is available.\n");
+             LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
+           }
+         else
+           vect_record_loop_mask (loop_vinfo, masks, ncopies * vec_num,
+                                  vectype, NULL);
+       }
+
       STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
       DUMP_VECT_SCOPE ("vectorizable_operation");
       vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec);
@@ -6135,6 +6167,8 @@ vectorizable_operation (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
     dump_printf_loc (MSG_NOTE, vect_location,
                      "transform binary/unary operation.\n");
 
+  bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
+
   /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
      vectors with unsigned elements, but the result is signed.  So, we
      need to compute the MINUS_EXPR into vectype temporary and
@@ -6252,22 +6286,41 @@ vectorizable_operation (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
                  ? vec_oprnds1[i] : NULL_TREE);
          vop2 = ((op_type == ternary_op)
                  ? vec_oprnds2[i] : NULL_TREE);
-         gassign *new_stmt = gimple_build_assign (vec_dest, code,
-                                                  vop0, vop1, vop2);
-         new_temp = make_ssa_name (vec_dest, new_stmt);
-         gimple_assign_set_lhs (new_stmt, new_temp);
-         new_stmt_info
-           = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
-         if (vec_cvt_dest)
+         if (masked_loop_p && reduc_idx >= 0)
            {
-             new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp);
-             gassign *new_stmt
-               = gimple_build_assign (vec_cvt_dest, VIEW_CONVERT_EXPR,
-                                      new_temp);
-             new_temp = make_ssa_name (vec_cvt_dest, new_stmt);
+             /* Perform the operation on active elements only and take
+                inactive elements from the reduction chain input.  */
+             gcc_assert (!vop2);
+             vop2 = reduc_idx == 1 ? vop1 : vop0;
+             tree mask = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
+                                             vectype, i * ncopies + j);
+             gcall *call = gimple_build_call_internal (cond_fn, 4, mask,
+                                                       vop0, vop1, vop2);
+             new_temp = make_ssa_name (vec_dest, call);
+             gimple_call_set_lhs (call, new_temp);
+             gimple_call_set_nothrow (call, true);
+             new_stmt_info
+               = vect_finish_stmt_generation (stmt_info, call, gsi);
+           }
+         else
+           {
+             gassign *new_stmt = gimple_build_assign (vec_dest, code,
+                                                      vop0, vop1, vop2);
+             new_temp = make_ssa_name (vec_dest, new_stmt);
              gimple_assign_set_lhs (new_stmt, new_temp);
              new_stmt_info
                = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
+             if (vec_cvt_dest)
+               {
+                 new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp);
+                 gassign *new_stmt
+                   = gimple_build_assign (vec_cvt_dest, VIEW_CONVERT_EXPR,
+                                          new_temp);
+                 new_temp = make_ssa_name (vec_cvt_dest, new_stmt);
+                 gimple_assign_set_lhs (new_stmt, new_temp);
+                 new_stmt_info
+                   = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
+               }
            }
           if (slp_node)
            SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
@@ -9997,6 +10050,16 @@ vectorizable_condition (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
                return false;
            }
        }
+      if (loop_vinfo
+         && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo)
+         && reduction_type == EXTRACT_LAST_REDUCTION)
+       {
+         if (dump_enabled_p ())
+           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+                            "can't yet use a fully-masked loop for"
+                            " EXTRACT_LAST_REDUCTION.\n");
+         LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
+       }
       if (expand_vec_cond_expr_p (vectype, comp_vectype,
                                     cond_code))
        {