Add support for reductions in fully-masked loops
authorRichard Sandiford <richard.sandiford@linaro.org>
Sat, 13 Jan 2018 17:59:00 +0000 (17:59 +0000)
committerRichard Sandiford <rsandifo@gcc.gnu.org>
Sat, 13 Jan 2018 17:59:00 +0000 (17:59 +0000)
This patch removes the restriction that fully-masked loops cannot
have reductions.  The key thing here is to make sure that the
reduction accumulator doesn't include any values associated with
inactive lanes; the patch adds a bunch of conditional binary
operations for doing that.

2018-01-13  Richard Sandiford  <richard.sandiford@linaro.org>
    Alan Hayward  <alan.hayward@arm.com>
    David Sherwood  <david.sherwood@arm.com>

gcc/
* doc/md.texi (cond_add@var{mode}, cond_sub@var{mode})
(cond_and@var{mode}, cond_ior@var{mode}, cond_xor@var{mode})
(cond_smin@var{mode}, cond_smax@var{mode}, cond_umin@var{mode})
(cond_umax@var{mode}): Document.
* optabs.def (cond_add_optab, cond_sub_optab, cond_and_optab)
(cond_ior_optab, cond_xor_optab, cond_smin_optab, cond_smax_optab)
(cond_umin_optab, cond_umax_optab): New optabs.
* internal-fn.def (COND_ADD, COND_SUB, COND_MIN, COND_MAX, COND_AND)
(COND_IOR, COND_XOR): New internal functions.
* internal-fn.h (get_conditional_internal_fn): Declare.
* internal-fn.c (cond_binary_direct): New macro.
(expand_cond_binary_optab_fn): Likewise.
(direct_cond_binary_optab_supported_p): Likewise.
(get_conditional_internal_fn): New function.
* tree-vect-loop.c (vectorizable_reduction): Handle fully-masked loops.
Cope with reduction statements that are vectorized as calls rather
than assignments.
* config/aarch64/aarch64-sve.md (cond_<optab><mode>): New insns.
* config/aarch64/iterators.md (UNSPEC_COND_ADD, UNSPEC_COND_SUB)
(UNSPEC_COND_SMAX, UNSPEC_COND_UMAX, UNSPEC_COND_SMIN)
(UNSPEC_COND_UMIN, UNSPEC_COND_AND, UNSPEC_COND_ORR)
(UNSPEC_COND_EOR): New unspecs.
(optab): Add mappings for them.
(SVE_COND_INT_OP, SVE_COND_FP_OP): New int iterators.
(sve_int_op, sve_fp_op): New int attributes.

gcc/testsuite/
* gcc.dg/vect/pr60482.c: Remove XFAIL for variable-length vectors.
* gcc.target/aarch64/sve/reduc_1.c: Expect the loop operations
to be predicated.
* gcc.target/aarch64/sve/slp_5.c: Check for a fully-masked loop.
* gcc.target/aarch64/sve/slp_7.c: Likewise.
* gcc.target/aarch64/sve/reduc_5.c: New test.
* gcc.target/aarch64/sve/slp_13.c: Likewise.
* gcc.target/aarch64/sve/slp_13_run.c: Likewise.

Co-Authored-By: Alan Hayward <alan.hayward@arm.com>
Co-Authored-By: David Sherwood <david.sherwood@arm.com>
From-SVN: r256626

17 files changed:
gcc/ChangeLog
gcc/config/aarch64/aarch64-sve.md
gcc/config/aarch64/iterators.md
gcc/doc/md.texi
gcc/internal-fn.c
gcc/internal-fn.def
gcc/internal-fn.h
gcc/optabs.def
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.dg/vect/pr60482.c
gcc/testsuite/gcc.target/aarch64/sve/reduc_1.c
gcc/testsuite/gcc.target/aarch64/sve/reduc_5.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/slp_13.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/slp_13_run.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/slp_5.c
gcc/testsuite/gcc.target/aarch64/sve/slp_7.c
gcc/tree-vect-loop.c

index a00e7d9861e42d7644def99e5ff5e72ea8008c83..c08d2b35e5e7a7a90c92d91feccf5382d5a5deeb 100644 (file)
@@ -1,3 +1,33 @@
+2018-01-13  Richard Sandiford  <richard.sandiford@linaro.org>
+           Alan Hayward  <alan.hayward@arm.com>
+           David Sherwood  <david.sherwood@arm.com>
+
+       * doc/md.texi (cond_add@var{mode}, cond_sub@var{mode})
+       (cond_and@var{mode}, cond_ior@var{mode}, cond_xor@var{mode})
+       (cond_smin@var{mode}, cond_smax@var{mode}, cond_umin@var{mode})
+       (cond_umax@var{mode}): Document.
+       * optabs.def (cond_add_optab, cond_sub_optab, cond_and_optab)
+       (cond_ior_optab, cond_xor_optab, cond_smin_optab, cond_smax_optab)
+       (cond_umin_optab, cond_umax_optab): New optabs.
+       * internal-fn.def (COND_ADD, COND_SUB, COND_MIN, COND_MAX, COND_AND)
+       (COND_IOR, COND_XOR): New internal functions.
+       * internal-fn.h (get_conditional_internal_fn): Declare.
+       * internal-fn.c (cond_binary_direct): New macro.
+       (expand_cond_binary_optab_fn): Likewise.
+       (direct_cond_binary_optab_supported_p): Likewise.
+       (get_conditional_internal_fn): New function.
+       * tree-vect-loop.c (vectorizable_reduction): Handle fully-masked loops.
+       Cope with reduction statements that are vectorized as calls rather
+       than assignments.
+       * config/aarch64/aarch64-sve.md (cond_<optab><mode>): New insns.
+       * config/aarch64/iterators.md (UNSPEC_COND_ADD, UNSPEC_COND_SUB)
+       (UNSPEC_COND_SMAX, UNSPEC_COND_UMAX, UNSPEC_COND_SMIN)
+       (UNSPEC_COND_UMIN, UNSPEC_COND_AND, UNSPEC_COND_ORR)
+       (UNSPEC_COND_EOR): New unspecs.
+       (optab): Add mappings for them.
+       (SVE_COND_INT_OP, SVE_COND_FP_OP): New int iterators.
+       (sve_int_op, sve_fp_op): New int attributes.
+
 2018-01-13  Richard Sandiford  <richard.sandiford@linaro.org>
            Alan Hayward  <alan.hayward@arm.com>
            David Sherwood  <david.sherwood@arm.com>
index b312ab7b71569511902ffe12c95a1d681c358bee..0329fedb15096f0ae1cc1bd59679d019300ba43e 100644 (file)
   "<maxmin_uns_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
 )
 
+;; Predicated integer operations.
+(define_insn "cond_<optab><mode>"
+  [(set (match_operand:SVE_I 0 "register_operand" "=w")
+       (unspec:SVE_I
+         [(match_operand:<VPRED> 1 "register_operand" "Upl")
+          (match_operand:SVE_I 2 "register_operand" "0")
+          (match_operand:SVE_I 3 "register_operand" "w")]
+         SVE_COND_INT_OP))]
+  "TARGET_SVE"
+  "<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
+)
+
 ;; Unpredicated integer add reduction.
 (define_expand "reduc_plus_scal_<mode>"
   [(set (match_operand:<VEL> 0 "register_operand")
   }
 )
 
+;; Predicated floating-point operations.
+(define_insn "cond_<optab><mode>"
+  [(set (match_operand:SVE_F 0 "register_operand" "=w")
+       (unspec:SVE_F
+         [(match_operand:<VPRED> 1 "register_operand" "Upl")
+          (match_operand:SVE_F 2 "register_operand" "0")
+          (match_operand:SVE_F 3 "register_operand" "w")]
+         SVE_COND_FP_OP))]
+  "TARGET_SVE"
+  "<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
+)
+
 ;; Shift an SVE vector left and insert a scalar into element 0.
 (define_insn "vec_shl_insert_<mode>"
   [(set (match_operand:SVE_ALL 0 "register_operand" "=w, w")
index 406c5157ee81c28d565cc702f6d69f3e9b96d3c1..c380b3bfecf9da67bb16fd72bf59afc8a0af9b33 100644 (file)
     UNSPEC_ANDF                ; Used in aarch64-sve.md.
     UNSPEC_IORF                ; Used in aarch64-sve.md.
     UNSPEC_XORF                ; Used in aarch64-sve.md.
+    UNSPEC_COND_ADD    ; Used in aarch64-sve.md.
+    UNSPEC_COND_SUB    ; Used in aarch64-sve.md.
+    UNSPEC_COND_SMAX   ; Used in aarch64-sve.md.
+    UNSPEC_COND_UMAX   ; Used in aarch64-sve.md.
+    UNSPEC_COND_SMIN   ; Used in aarch64-sve.md.
+    UNSPEC_COND_UMIN   ; Used in aarch64-sve.md.
+    UNSPEC_COND_AND    ; Used in aarch64-sve.md.
+    UNSPEC_COND_ORR    ; Used in aarch64-sve.md.
+    UNSPEC_COND_EOR    ; Used in aarch64-sve.md.
     UNSPEC_COND_LT     ; Used in aarch64-sve.md.
     UNSPEC_COND_LE     ; Used in aarch64-sve.md.
     UNSPEC_COND_EQ     ; Used in aarch64-sve.md.
 
 (define_int_iterator UNPACK_UNSIGNED [UNSPEC_UNPACKULO UNSPEC_UNPACKUHI])
 
+(define_int_iterator SVE_COND_INT_OP [UNSPEC_COND_ADD UNSPEC_COND_SUB
+                                     UNSPEC_COND_SMAX UNSPEC_COND_UMAX
+                                     UNSPEC_COND_SMIN UNSPEC_COND_UMIN
+                                     UNSPEC_COND_AND
+                                     UNSPEC_COND_ORR
+                                     UNSPEC_COND_EOR])
+
+(define_int_iterator SVE_COND_FP_OP [UNSPEC_COND_ADD UNSPEC_COND_SUB])
+
 (define_int_iterator SVE_COND_INT_CMP [UNSPEC_COND_LT UNSPEC_COND_LE
                                       UNSPEC_COND_EQ UNSPEC_COND_NE
                                       UNSPEC_COND_GE UNSPEC_COND_GT
                        (UNSPEC_XORF "xor")
                        (UNSPEC_ANDV "and")
                        (UNSPEC_IORV "ior")
-                       (UNSPEC_XORV "xor")])
+                       (UNSPEC_XORV "xor")
+                       (UNSPEC_COND_ADD "add")
+                       (UNSPEC_COND_SUB "sub")
+                       (UNSPEC_COND_SMAX "smax")
+                       (UNSPEC_COND_UMAX "umax")
+                       (UNSPEC_COND_SMIN "smin")
+                       (UNSPEC_COND_UMIN "umin")
+                       (UNSPEC_COND_AND "and")
+                       (UNSPEC_COND_ORR "ior")
+                       (UNSPEC_COND_EOR "xor")])
 
 (define_int_attr  maxmin_uns [(UNSPEC_UMAXV "umax")
                              (UNSPEC_UMINV "umin")
                          (UNSPEC_COND_LS "vsd")
                          (UNSPEC_COND_HS "vsd")
                          (UNSPEC_COND_HI "vsd")])
+
+(define_int_attr sve_int_op [(UNSPEC_COND_ADD "add")
+                            (UNSPEC_COND_SUB "sub")
+                            (UNSPEC_COND_SMAX "smax")
+                            (UNSPEC_COND_UMAX "umax")
+                            (UNSPEC_COND_SMIN "smin")
+                            (UNSPEC_COND_UMIN "umin")
+                            (UNSPEC_COND_AND "and")
+                            (UNSPEC_COND_ORR "orr")
+                            (UNSPEC_COND_EOR "eor")])
+
+(define_int_attr sve_fp_op [(UNSPEC_COND_ADD "fadd")
+                           (UNSPEC_COND_SUB "fsub")])
index f4e89c39d5d2fba9c2112c7e01c7195cf53248da..285cffd22f64c98746d07bed657677212f84989e 100644 (file)
@@ -6248,6 +6248,42 @@ move operand 2 or (operands 2 + operand 3) into operand 0 according to the
 comparison in operand 1.  If the comparison is false, operand 2 is moved into
 operand 0, otherwise (operand 2 + operand 3) is moved.
 
+@cindex @code{cond_add@var{mode}} instruction pattern
+@cindex @code{cond_sub@var{mode}} instruction pattern
+@cindex @code{cond_and@var{mode}} instruction pattern
+@cindex @code{cond_ior@var{mode}} instruction pattern
+@cindex @code{cond_xor@var{mode}} instruction pattern
+@cindex @code{cond_smin@var{mode}} instruction pattern
+@cindex @code{cond_smax@var{mode}} instruction pattern
+@cindex @code{cond_umin@var{mode}} instruction pattern
+@cindex @code{cond_umax@var{mode}} instruction pattern
+@item @samp{cond_add@var{mode}}
+@itemx @samp{cond_sub@var{mode}}
+@itemx @samp{cond_and@var{mode}}
+@itemx @samp{cond_ior@var{mode}}
+@itemx @samp{cond_xor@var{mode}}
+@itemx @samp{cond_smin@var{mode}}
+@itemx @samp{cond_smax@var{mode}}
+@itemx @samp{cond_umin@var{mode}}
+@itemx @samp{cond_umax@var{mode}}
+Perform an elementwise operation on vector operands 2 and 3,
+under the control of the vector mask in operand 1, and store the result
+in operand 0.  This is equivalent to:
+
+@smallexample
+for (i = 0; i < GET_MODE_NUNITS (@var{n}); i++)
+  op0[i] = op1[i] ? op2[i] @var{op} op3[i] : op2[i];
+@end smallexample
+
+where, for example, @var{op} is @code{+} for @samp{cond_add@var{mode}}.
+
+When defined for floating-point modes, the contents of @samp{op3[i]}
+are not interpreted if @var{op1[i]} is false, just like they would not
+be in a normal C @samp{?:} condition.
+
+Operands 0, 2 and 3 all have mode @var{m}, while operand 1 has the mode
+returned by @code{TARGET_VECTORIZE_GET_MASK_MODE}.
+
 @cindex @code{neg@var{mode}cc} instruction pattern
 @item @samp{neg@var{mode}cc}
 Similar to @samp{mov@var{mode}cc} but for conditional negation.  Conditionally
index 44d5486adc5a7165260a6c3765b444ceebcabdad..bc4027bd60359696f06d9c263c4a9bb658be3327 100644 (file)
@@ -88,6 +88,7 @@ init_internal_fns ()
 #define mask_store_lanes_direct { 0, 0, false }
 #define unary_direct { 0, 0, true }
 #define binary_direct { 0, 0, true }
+#define cond_binary_direct { 1, 1, true }
 #define while_direct { 0, 2, false }
 
 const direct_internal_fn_info direct_internal_fn_array[IFN_LAST + 1] = {
@@ -2855,6 +2856,9 @@ expand_while_optab_fn (internal_fn, gcall *stmt, convert_optab optab)
 #define expand_binary_optab_fn(FN, STMT, OPTAB) \
   expand_direct_optab_fn (FN, STMT, OPTAB, 2)
 
+#define expand_cond_binary_optab_fn(FN, STMT, OPTAB) \
+  expand_direct_optab_fn (FN, STMT, OPTAB, 3)
+
 /* RETURN_TYPE and ARGS are a return type and argument list that are
    in principle compatible with FN (which satisfies direct_internal_fn_p).
    Return the types that should be used to determine whether the
@@ -2928,6 +2932,7 @@ multi_vector_optab_supported_p (convert_optab optab, tree_pair types,
 
 #define direct_unary_optab_supported_p direct_optab_supported_p
 #define direct_binary_optab_supported_p direct_optab_supported_p
+#define direct_cond_binary_optab_supported_p direct_optab_supported_p
 #define direct_mask_load_optab_supported_p direct_optab_supported_p
 #define direct_load_lanes_optab_supported_p multi_vector_optab_supported_p
 #define direct_mask_load_lanes_optab_supported_p multi_vector_optab_supported_p
@@ -3049,6 +3054,37 @@ static void (*const internal_fn_expanders[]) (internal_fn, gcall *) = {
   0
 };
 
+/* Return a function that performs the conditional form of CODE, i.e.:
+
+     LHS = RHS1 ? RHS2 CODE RHS3 : RHS2
+
+   (operating elementwise if the operands are vectors).  Return IFN_LAST
+   if no such function exists.  */
+
+internal_fn
+get_conditional_internal_fn (tree_code code)
+{
+  switch (code)
+    {
+    case PLUS_EXPR:
+      return IFN_COND_ADD;
+    case MINUS_EXPR:
+      return IFN_COND_SUB;
+    case MIN_EXPR:
+      return IFN_COND_MIN;
+    case MAX_EXPR:
+      return IFN_COND_MAX;
+    case BIT_AND_EXPR:
+      return IFN_COND_AND;
+    case BIT_IOR_EXPR:
+      return IFN_COND_IOR;
+    case BIT_XOR_EXPR:
+      return IFN_COND_XOR;
+    default:
+      return IFN_LAST;
+    }
+}
+
 /* Expand STMT as though it were a call to internal function FN.  */
 
 void
index 39dd7fc15c276132a3a49e985b85c87a72d7cfcc..f0984402355a4a48ce567a7561e5bd41e0d93e71 100644 (file)
@@ -53,6 +53,11 @@ along with GCC; see the file COPYING3.  If not see
    - store_lanes: currently just vec_store_lanes
    - mask_store_lanes: currently just vec_mask_store_lanes
 
+   - unary: a normal unary optab, such as vec_reverse_<mode>
+   - binary: a normal binary optab, such as vec_interleave_lo_<mode>
+
+   - cond_binary: a conditional binary optab, such as add<mode>cc
+
    DEF_INTERNAL_SIGNED_OPTAB_FN defines an internal function that
    maps to one of two optabs, depending on the signedness of an input.
    SIGNED_OPTAB and UNSIGNED_OPTAB are the optabs for signed and
@@ -121,6 +126,19 @@ DEF_INTERNAL_OPTAB_FN (WHILE_ULT, ECF_CONST | ECF_NOTHROW, while_ult, while)
 DEF_INTERNAL_OPTAB_FN (VEC_SHL_INSERT, ECF_CONST | ECF_NOTHROW,
                       vec_shl_insert, binary)
 
+DEF_INTERNAL_OPTAB_FN (COND_ADD, ECF_CONST, cond_add, cond_binary)
+DEF_INTERNAL_OPTAB_FN (COND_SUB, ECF_CONST, cond_sub, cond_binary)
+DEF_INTERNAL_SIGNED_OPTAB_FN (COND_MIN, ECF_CONST, first,
+                             cond_smin, cond_umin, cond_binary)
+DEF_INTERNAL_SIGNED_OPTAB_FN (COND_MAX, ECF_CONST, first,
+                             cond_smax, cond_umax, cond_binary)
+DEF_INTERNAL_OPTAB_FN (COND_AND, ECF_CONST | ECF_NOTHROW,
+                      cond_and, cond_binary)
+DEF_INTERNAL_OPTAB_FN (COND_IOR, ECF_CONST | ECF_NOTHROW,
+                      cond_ior, cond_binary)
+DEF_INTERNAL_OPTAB_FN (COND_XOR, ECF_CONST | ECF_NOTHROW,
+                      cond_xor, cond_binary)
+
 DEF_INTERNAL_OPTAB_FN (RSQRT, ECF_CONST, rsqrt, unary)
 
 DEF_INTERNAL_OPTAB_FN (REDUC_PLUS, ECF_CONST | ECF_NOTHROW,
index 22c15f25db5e491291bfb2b4da5a6c374f7cda59..0464e3f9e7a6229647e0eaeeaa9e521a4a8e48b8 100644 (file)
@@ -190,6 +190,8 @@ direct_internal_fn_supported_p (internal_fn fn, tree type0, tree type1,
 
 extern bool set_edom_supported_p (void);
 
+extern internal_fn get_conditional_internal_fn (tree_code);
+
 extern void expand_internal_call (gcall *);
 extern void expand_internal_call (internal_fn, gcall *);
 extern void expand_PHI (internal_fn, gcall *);
index 318a2c7c8442aef94661fce3f4afcac2f35deff0..9a79f4d250baff3d2cb72f88398990da23613274 100644 (file)
@@ -220,6 +220,15 @@ OPTAB_D (addcc_optab, "add$acc")
 OPTAB_D (negcc_optab, "neg$acc")
 OPTAB_D (notcc_optab, "not$acc")
 OPTAB_D (movcc_optab, "mov$acc")
+OPTAB_D (cond_add_optab, "cond_add$a")
+OPTAB_D (cond_sub_optab, "cond_sub$a")
+OPTAB_D (cond_and_optab, "cond_and$a")
+OPTAB_D (cond_ior_optab, "cond_ior$a")
+OPTAB_D (cond_xor_optab, "cond_xor$a")
+OPTAB_D (cond_smin_optab, "cond_smin$a")
+OPTAB_D (cond_smax_optab, "cond_smax$a")
+OPTAB_D (cond_umin_optab, "cond_umin$a")
+OPTAB_D (cond_umax_optab, "cond_umax$a")
 OPTAB_D (cmov_optab, "cmov$a6")
 OPTAB_D (cstore_optab, "cstore$a4")
 OPTAB_D (ctrap_optab, "ctrap$a4")
index a7370ec9679156312dee96ad9bddf58b13afbee2..7363c0d04a4acf9213262ff0202cd0eed6772c13 100644 (file)
@@ -1,3 +1,16 @@
+2018-01-13  Richard Sandiford  <richard.sandiford@linaro.org>
+           Alan Hayward  <alan.hayward@arm.com>
+           David Sherwood  <david.sherwood@arm.com>
+
+       * gcc.dg/vect/pr60482.c: Remove XFAIL for variable-length vectors.
+       * gcc.target/aarch64/sve/reduc_1.c: Expect the loop operations
+       to be predicated.
+       * gcc.target/aarch64/sve/slp_5.c: Check for a fully-masked loop.
+       * gcc.target/aarch64/sve/slp_7.c: Likewise.
+       * gcc.target/aarch64/sve/reduc_5.c: New test.
+       * gcc.target/aarch64/sve/slp_13.c: Likewise.
+       * gcc.target/aarch64/sve/slp_13_run.c: Likewise.
+
 2018-01-13  Richard Sandiford  <richard.sandiford@linaro.org>
            Alan Hayward  <alan.hayward@arm.com>
            David Sherwood  <david.sherwood@arm.com>
index b07fe1ef33e523c93ba67480145326821b672c7e..4c5c20c810932ec042523121173b80f59f22cc9a 100644 (file)
@@ -16,6 +16,4 @@ foo (double *x, int n)
   return p;
 }
 
-/* Until fully-masked loops are supported, we always need an epilog
-   loop for variable-length vectors.  */
-/* { dg-final { scan-tree-dump-not "epilog loop required" "vect" { xfail vect_variable_length } } } */
+/* { dg-final { scan-tree-dump-not "epilog loop required" "vect" } } */
index 72dc793b65049059185be7184b76d54e206e5784..a258344b0a965e7fdc77d18e9991066e4c9521f8 100644 (file)
@@ -105,10 +105,10 @@ reduc_##NAME##_##TYPE (TYPE *a, int n)            \
 
 TEST_BITWISE (DEF_REDUC_BITWISE)
 
-/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b\n} 1 } } */
-/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.h, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */
-/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
-/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b, z[0-9]+\.b\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */
 
 /* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b, z[0-9]+\.b\n} 1 } } */
 /* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */
@@ -130,9 +130,9 @@ TEST_BITWISE (DEF_REDUC_BITWISE)
 /* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
 /* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */
 
-/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.h, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */
-/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
-/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */
 
 /* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */
 /* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
@@ -142,11 +142,20 @@ TEST_BITWISE (DEF_REDUC_BITWISE)
 /* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
 /* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */
 
-/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 8 } } */
+/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b, z[0-9]+\.b\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */
 
-/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 8 } } */
+/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b, z[0-9]+\.b\n} 2 } } */
+/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
+/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
+/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */
 
-/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 8 } } */
+/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b, z[0-9]+\.b\n} 2 } } */
+/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
+/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
+/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */
 
 /* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.b\n} 1 } } */
 /* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.h\n} 1 } } */
@@ -180,17 +189,17 @@ TEST_BITWISE (DEF_REDUC_BITWISE)
 /* { dg-final { scan-assembler-times {\tfminnmv\ts[0-9]+, p[0-7], z[0-9]+\.s\n} 1 } } */
 /* { dg-final { scan-assembler-times {\tfminnmv\td[0-9]+, p[0-7], z[0-9]+\.d\n} 1 } } */
 
-/* { dg-final { scan-assembler-times {\tandv\tb[0-9]+, p[0-7], z[0-9]+\.b} 2 } } */
-/* { dg-final { scan-assembler-times {\tandv\th[0-9]+, p[0-7], z[0-9]+\.h} 2 } } */
-/* { dg-final { scan-assembler-times {\tandv\ts[0-9]+, p[0-7], z[0-9]+\.s} 2 } } */
-/* { dg-final { scan-assembler-times {\tandv\td[0-9]+, p[0-7], z[0-9]+\.d} 2 } } */
+/* { dg-final { scan-assembler-times {\tandv\tb[0-9]+, p[0-7], z[0-9]+\.b\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tandv\th[0-9]+, p[0-7], z[0-9]+\.h\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tandv\ts[0-9]+, p[0-7], z[0-9]+\.s\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tandv\td[0-9]+, p[0-7], z[0-9]+\.d\n} 2 } } */
 
-/* { dg-final { scan-assembler-times {\torv\tb[0-9]+, p[0-7], z[0-9]+\.b} 2 } } */
-/* { dg-final { scan-assembler-times {\torv\th[0-9]+, p[0-7], z[0-9]+\.h} 2 } } */
-/* { dg-final { scan-assembler-times {\torv\ts[0-9]+, p[0-7], z[0-9]+\.s} 2 } } */
-/* { dg-final { scan-assembler-times {\torv\td[0-9]+, p[0-7], z[0-9]+\.d} 2 } } */
+/* { dg-final { scan-assembler-times {\torv\tb[0-9]+, p[0-7], z[0-9]+\.b\n} 2 } } */
+/* { dg-final { scan-assembler-times {\torv\th[0-9]+, p[0-7], z[0-9]+\.h\n} 2 } } */
+/* { dg-final { scan-assembler-times {\torv\ts[0-9]+, p[0-7], z[0-9]+\.s\n} 2 } } */
+/* { dg-final { scan-assembler-times {\torv\td[0-9]+, p[0-7], z[0-9]+\.d\n} 2 } } */
 
-/* { dg-final { scan-assembler-times {\teorv\tb[0-9]+, p[0-7], z[0-9]+\.b} 2 } } */
-/* { dg-final { scan-assembler-times {\teorv\th[0-9]+, p[0-7], z[0-9]+\.h} 2 } } */
-/* { dg-final { scan-assembler-times {\teorv\ts[0-9]+, p[0-7], z[0-9]+\.s} 2 } } */
-/* { dg-final { scan-assembler-times {\teorv\td[0-9]+, p[0-7], z[0-9]+\.d} 2 } } */
+/* { dg-final { scan-assembler-times {\teorv\tb[0-9]+, p[0-7], z[0-9]+\.b\n} 2 } } */
+/* { dg-final { scan-assembler-times {\teorv\th[0-9]+, p[0-7], z[0-9]+\.h\n} 2 } } */
+/* { dg-final { scan-assembler-times {\teorv\ts[0-9]+, p[0-7], z[0-9]+\.s\n} 2 } } */
+/* { dg-final { scan-assembler-times {\teorv\td[0-9]+, p[0-7], z[0-9]+\.d\n} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/reduc_5.c b/gcc/testsuite/gcc.target/aarch64/sve/reduc_5.c
new file mode 100644 (file)
index 0000000..ff53594
--- /dev/null
@@ -0,0 +1,38 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -ffast-math" } */
+
+#include <stdint.h>
+
+#define REDUC(TYPE)                                            \
+  TYPE reduc_##TYPE (TYPE *x, int count)                       \
+  {                                                            \
+    TYPE sum = 0;                                              \
+    for (int i = 0; i < count; ++i)                            \
+      sum -= x[i];                                             \
+    return sum;                                                        \
+  }
+
+REDUC (int8_t)
+REDUC (uint8_t)
+REDUC (int16_t)
+REDUC (uint16_t)
+REDUC (int32_t)
+REDUC (uint32_t)
+REDUC (int64_t)
+REDUC (uint64_t)
+REDUC (float)
+REDUC (double)
+
+/* XFAILed until we support sub-int reductions for signed types.  */
+/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.b, p[0-7]/m} 2 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.h, p[0-7]/m} 2 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.b, p[0-7]/m} 1 } } */
+/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.h, p[0-7]/m} 1 } } */
+/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.s, p[0-7]/m} 2 } } */
+/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.d, p[0-7]/m} 2 } } */
+/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.s, p[0-7]/m} 1 } } */
+/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.d, p[0-7]/m} 1 } } */
+
+/* XFAILed until we support sub-int reductions for signed types.  */
+/* { dg-final { scan-assembler-times {\tsub\t} 8 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times {\tfsub\t} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/slp_13.c b/gcc/testsuite/gcc.target/aarch64/sve/slp_13.c
new file mode 100644 (file)
index 0000000..5b875bf
--- /dev/null
@@ -0,0 +1,52 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -msve-vector-bits=scalable" } */
+
+#include <stdint.h>
+
+#define VEC_PERM(TYPE)                                         \
+TYPE __attribute__ ((noinline, noclone))                       \
+vec_slp_##TYPE (TYPE *restrict a, int n)                       \
+{                                                              \
+  TYPE res = 0;                                                        \
+  for (int i = 0; i < n; ++i)                                  \
+    {                                                          \
+      res += a[i * 2] * 3;                                     \
+      res += a[i * 2 + 1] * 5;                                 \
+    }                                                          \
+  return res;                                                  \
+}
+
+#define TEST_ALL(T)                            \
+  T (int8_t)                                   \
+  T (uint8_t)                                  \
+  T (int16_t)                                  \
+  T (uint16_t)                                 \
+  T (int32_t)                                  \
+  T (uint32_t)                                 \
+  T (int64_t)                                  \
+  T (uint64_t)
+
+TEST_ALL (VEC_PERM)
+
+/* ??? We don't treat the int8_t and int16_t loops as reductions.  */
+/* ??? We don't treat the uint loops as SLP.  */
+/* The loop should be fully-masked.  */
+/* { dg-final { scan-assembler-times {\tld1b\t} 2 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times {\tld1h\t} 2 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times {\tld1w\t} 2 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times {\tld1w\t} 1 } } */
+/* { dg-final { scan-assembler-times {\tld1d\t} 2 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times {\tld1d\t} 1 } } */
+/* { dg-final { scan-assembler-not {\tldr} { xfail *-*-* } } } */
+
+/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.b} 4 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.h} 4 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.s} 4 } } */
+/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.d} 4 } } */
+
+/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.b\n} 2 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.h\n} 2 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.s\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.d\n} 2 } } */
+
+/* { dg-final { scan-assembler-not {\tuqdec} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/slp_13_run.c b/gcc/testsuite/gcc.target/aarch64/sve/slp_13_run.c
new file mode 100644 (file)
index 0000000..9799e3b
--- /dev/null
@@ -0,0 +1,28 @@
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#include "slp_13.c"
+
+#define N1 (103 * 2)
+#define N2 (111 * 2)
+
+#define HARNESS(TYPE)                                          \
+  {                                                            \
+    TYPE a[N2];                                                        \
+    TYPE expected = 0;                                         \
+    for (unsigned int i = 0; i < N2; ++i)                      \
+      {                                                                \
+       a[i] = i * 2 + i % 5;                                   \
+       if (i < N1)                                             \
+         expected += a[i] * (i & 1 ? 5 : 3);                   \
+       asm volatile ("");                                      \
+      }                                                                \
+    if (vec_slp_##TYPE (a, N1 / 2) != expected)                        \
+      __builtin_abort ();                                      \
+  }
+
+int __attribute__ ((optimize (1)))
+main (void)
+{
+  TEST_ALL (HARNESS)
+}
index 4e2641929efed43ff765c9824698f0df3ed3e349..7ff12c58570282e59328c37ddfe000f90bad91ff 100644 (file)
@@ -56,3 +56,12 @@ TEST_ALL (VEC_PERM)
 /* { dg-final { scan-assembler-times {\tfaddv\th[0-9]+, p[0-7], z[0-9]+\.h} 2 } } */
 /* { dg-final { scan-assembler-times {\tfaddv\ts[0-9]+, p[0-7], z[0-9]+\.s} 2 } } */
 /* { dg-final { scan-assembler-times {\tfaddv\td[0-9]+, p[0-7], z[0-9]+\.d} 2 } } */
+
+/* Should be 4 and 6 respectively, if we used reductions for int8_t and
+   int16_t.  */
+/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.b} 2 } } */
+/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.h} 4 } } */
+/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.s} 6 } } */
+/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.d} 6 } } */
+
+/* { dg-final { scan-assembler-not {\tuqdec} } } */
index 76abbdcf8aa99a7ba1916e7248ba0cc3dfa79ef3..9e6aa8ccbf81c204edc5f9faa37292f0180f0c4d 100644 (file)
@@ -64,3 +64,12 @@ TEST_ALL (VEC_PERM)
 /* { dg-final { scan-assembler-times {\tfaddv\th[0-9]+, p[0-7], z[0-9]+\.h} 4 } } */
 /* { dg-final { scan-assembler-times {\tfaddv\ts[0-9]+, p[0-7], z[0-9]+\.s} 4 } } */
 /* { dg-final { scan-assembler-times {\tfaddv\td[0-9]+, p[0-7], z[0-9]+\.d} 4 } } */
+
+/* Should be 4 and 6 respectively, if we used reductions for int8_t and
+   int16_t.  */
+/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.b} 2 } } */
+/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.h} 4 } } */
+/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.s} 6 } } */
+/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.d} 6 } } */
+
+/* { dg-final { scan-assembler-not {\tuqdec} } } */
index 5deb7800f43851ec278ce038db825dd0e2253d92..15d36b2e8396f5f1b2103117a1034adbebd093eb 100644 (file)
@@ -6893,19 +6893,42 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi,
       return false;
     }
 
+  if (slp_node)
+    vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
+  else
+    vec_num = 1;
+
+  internal_fn cond_fn = get_conditional_internal_fn (code);
+  vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
+
   if (!vec_stmt) /* transformation not required.  */
     {
-      if (LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
-       {
-         if (dump_enabled_p ())
-           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-                            "can't use a fully-masked loop due to "
-                            "reduction operation.\n");
-         LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
-       }
-
       if (first_p)
        vect_model_reduction_cost (stmt_info, reduc_fn, ncopies);
+      if (loop_vinfo && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
+       {
+         if (cond_fn == IFN_LAST
+             || !direct_internal_fn_supported_p (cond_fn, vectype_in,
+                                                 OPTIMIZE_FOR_SPEED))
+           {
+             if (dump_enabled_p ())
+               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+                                "can't use a fully-masked loop because no"
+                                " conditional operation is available.\n");
+             LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
+           }
+         else if (reduc_index == -1)
+           {
+             if (dump_enabled_p ())
+               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+                                "can't use a fully-masked loop for chained"
+                                " reductions.\n");
+             LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
+           }
+         else
+           vect_record_loop_mask (loop_vinfo, masks, ncopies * vec_num,
+                                  vectype_in);
+       }
       STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
       return true;
     }
@@ -6919,16 +6942,15 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi,
   if (code == COND_EXPR)
     gcc_assert (ncopies == 1);
 
+  bool masked_loop_p = LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
+
   /* Create the destination vector  */
   vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
 
   prev_stmt_info = NULL;
   prev_phi_info = NULL;
-  if (slp_node)
-    vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
-  else
+  if (!slp_node)
     {
-      vec_num = 1;
       vec_oprnds0.create (1);
       vec_oprnds1.create (1);
       if (op_type == ternary_op)
@@ -7002,19 +7024,19 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi,
              gcc_assert (reduc_index != -1 || ! single_defuse_cycle);
 
              if (single_defuse_cycle && reduc_index == 0)
-               vec_oprnds0[0] = gimple_assign_lhs (new_stmt);
+               vec_oprnds0[0] = gimple_get_lhs (new_stmt);
              else
                vec_oprnds0[0]
                  = vect_get_vec_def_for_stmt_copy (dts[0], vec_oprnds0[0]);
              if (single_defuse_cycle && reduc_index == 1)
-               vec_oprnds1[0] = gimple_assign_lhs (new_stmt);
+               vec_oprnds1[0] = gimple_get_lhs (new_stmt);
              else
                vec_oprnds1[0]
                  = vect_get_vec_def_for_stmt_copy (dts[1], vec_oprnds1[0]);
              if (op_type == ternary_op)
                {
                  if (single_defuse_cycle && reduc_index == 2)
-                   vec_oprnds2[0] = gimple_assign_lhs (new_stmt);
+                   vec_oprnds2[0] = gimple_get_lhs (new_stmt);
                  else
                    vec_oprnds2[0] 
                      = vect_get_vec_def_for_stmt_copy (dts[2], vec_oprnds2[0]);
@@ -7025,13 +7047,33 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi,
       FOR_EACH_VEC_ELT (vec_oprnds0, i, def0)
         {
          tree vop[3] = { def0, vec_oprnds1[i], NULL_TREE };
-         if (op_type == ternary_op)
-           vop[2] = vec_oprnds2[i];
+         if (masked_loop_p)
+           {
+             /* Make sure that the reduction accumulator is vop[0].  */
+             if (reduc_index == 1)
+               {
+                 gcc_assert (commutative_tree_code (code));
+                 std::swap (vop[0], vop[1]);
+               }
+             tree mask = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
+                                             vectype_in, i * ncopies + j);
+             gcall *call = gimple_build_call_internal (cond_fn, 3, mask,
+                                                       vop[0], vop[1]);
+             new_temp = make_ssa_name (vec_dest, call);
+             gimple_call_set_lhs (call, new_temp);
+             gimple_call_set_nothrow (call, true);
+             new_stmt = call;
+           }
+         else
+           {
+             if (op_type == ternary_op)
+               vop[2] = vec_oprnds2[i];
 
-          new_temp = make_ssa_name (vec_dest, new_stmt);
-          new_stmt = gimple_build_assign (new_temp, code,
-                                         vop[0], vop[1], vop[2]);
-          vect_finish_stmt_generation (stmt, new_stmt, gsi);
+             new_temp = make_ssa_name (vec_dest, new_stmt);
+             new_stmt = gimple_build_assign (new_temp, code,
+                                             vop[0], vop[1], vop[2]);
+           }
+         vect_finish_stmt_generation (stmt, new_stmt, gsi);
 
           if (slp_node)
             {
@@ -7056,7 +7098,7 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi,
   /* Finalize the reduction-phi (set its arguments) and create the
      epilog reduction code.  */
   if ((!single_defuse_cycle || code == COND_EXPR) && !slp_node)
-    vect_defs[0] = gimple_assign_lhs (*vec_stmt);
+    vect_defs[0] = gimple_get_lhs (*vec_stmt);
 
   vect_create_epilog_for_reduction (vect_defs, stmt, reduc_def_stmt,
                                    epilog_copies, reduc_fn, phis,