Add support for bitwise reductions

author Richard Sandiford <richard.sandiford@linaro.org>

Sat, 13 Jan 2018 17:58:42 +0000 (17:58 +0000)

committer Richard Sandiford <rsandifo@gcc.gnu.org>

Sat, 13 Jan 2018 17:58:42 +0000 (17:58 +0000)
author Richard Sandiford <richard.sandiford@linaro.org>
Sat, 13 Jan 2018 17:58:42 +0000 (17:58 +0000)
committer Richard Sandiford <rsandifo@gcc.gnu.org>
Sat, 13 Jan 2018 17:58:42 +0000 (17:58 +0000)
diff --git a/gcc/ChangeLog b/gcc/ChangeLog

index 77eae2dbf22eca0e22adf44ecdb22d2910c6fc23..c7a58956cdc011cd22ba1f5b103997aca3382911 100644 (file)
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,25 @@
+2018-01-13  Richard Sandiford  <richard.sandiford@linaro.org>
+           Alan Hayward  <alan.hayward@arm.com>
+           David Sherwood  <david.sherwood@arm.com>
+
+       * optabs.def (reduc_and_scal_optab, reduc_ior_scal_optab)
+       (reduc_xor_scal_optab): New optabs.
+       * doc/md.texi (reduc_and_scal_@var{m}, reduc_ior_scal_@var{m})
+       (reduc_xor_scal_@var{m}): Document.
+       * doc/sourcebuild.texi (vect_logical_reduc): Likewise.
+       * internal-fn.def (IFN_REDUC_AND, IFN_REDUC_IOR, IFN_REDUC_XOR): New
+       internal functions.
+       * fold-const-call.c (fold_const_call): Handle them.
+       * tree-vect-loop.c (reduction_fn_for_scalar_code): Return the new
+       internal functions for BIT_AND_EXPR, BIT_IOR_EXPR and BIT_XOR_EXPR.
+       * config/aarch64/aarch64-sve.md (reduc_<bit_reduc>_scal_<mode>):
+       (*reduc_<bit_reduc>_scal_<mode>): New patterns.
+       * config/aarch64/iterators.md (UNSPEC_ANDV, UNSPEC_ORV)
+       (UNSPEC_XORV): New unspecs.
+       (optab): Add entries for them.
+       (BITWISEV): New int iterator.
+       (bit_reduc_op): New int attributes.
+
  2018-01-13  Richard Sandiford  <richard.sandiford@linaro.org>
             Alan Hayward  <alan.hayward@arm.com>
             David Sherwood  <david.sherwood@arm.com>
diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md

index fc3590223fa8e44df33df22d0bc459c27e08ffc6..b312ab7b71569511902ffe12c95a1d681c358bee 100644 (file)
--- a/gcc/config/aarch64/aarch64-sve.md
+++ b/gcc/config/aarch64/aarch64-sve.md
@@ -1505,6 +1505,26 @@
    "<maxmin_uns_op>v\t%<Vetype>0, %1, %2.<Vetype>"
  )
  
+(define_expand "reduc_<optab>_scal_<mode>"
+  [(set (match_operand:<VEL> 0 "register_operand")
+       (unspec:<VEL> [(match_dup 2)
+                      (match_operand:SVE_I 1 "register_operand")]
+                     BITWISEV))]
+  "TARGET_SVE"
+  {
+    operands[2] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
+  }
+)
+
+(define_insn "*reduc_<optab>_scal_<mode>"
+  [(set (match_operand:<VEL> 0 "register_operand" "=w")
+       (unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl")
+                      (match_operand:SVE_I 2 "register_operand" "w")]
+                     BITWISEV))]
+  "TARGET_SVE"
+  "<bit_reduc_op>\t%<Vetype>0, %1, %2.<Vetype>"
+)
+
  ;; Unpredicated floating-point addition.
  (define_expand "add<mode>3"
    [(set (match_operand:SVE_F 0 "register_operand")
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md

index 607f9c3027a37428fa9c7aad67c33a17f34142bc..406c5157ee81c28d565cc702f6d69f3e9b96d3c1 100644 (file)
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -426,6 +426,9 @@
      UNSPEC_FMLAL2      ; Used in aarch64-simd.md.
      UNSPEC_FMLSL2      ; Used in aarch64-simd.md.
      UNSPEC_SEL         ; Used in aarch64-sve.md.
+    UNSPEC_ANDV                ; Used in aarch64-sve.md.
+    UNSPEC_IORV                ; Used in aarch64-sve.md.
+    UNSPEC_XORV                ; Used in aarch64-sve.md.
      UNSPEC_ANDF                ; Used in aarch64-sve.md.
      UNSPEC_IORF                ; Used in aarch64-sve.md.
      UNSPEC_XORF                ; Used in aarch64-sve.md.
@@ -1344,6 +1347,8 @@
  (define_int_iterator FMAXMINV [UNSPEC_FMAXV UNSPEC_FMINV
                                UNSPEC_FMAXNMV UNSPEC_FMINNMV])
  
+(define_int_iterator BITWISEV [UNSPEC_ANDV UNSPEC_IORV UNSPEC_XORV])
+
  (define_int_iterator LOGICALF [UNSPEC_ANDF UNSPEC_IORF UNSPEC_XORF])
  
  (define_int_iterator HADDSUB [UNSPEC_SHADD UNSPEC_UHADD
@@ -1476,7 +1481,10 @@
  ;; name for consistency with the integer patterns.
  (define_int_attr optab [(UNSPEC_ANDF "and")
                         (UNSPEC_IORF "ior")
-                       (UNSPEC_XORF "xor")])
+                       (UNSPEC_XORF "xor")
+                       (UNSPEC_ANDV "and")
+                       (UNSPEC_IORV "ior")
+                       (UNSPEC_XORV "xor")])
  
  (define_int_attr  maxmin_uns [(UNSPEC_UMAXV "umax")
                               (UNSPEC_UMINV "umin")
@@ -1504,6 +1512,10 @@
                                  (UNSPEC_FMAXNM "fmaxnm")
                                  (UNSPEC_FMINNM "fminnm")])
  
+(define_int_attr bit_reduc_op [(UNSPEC_ANDV "andv")
+                              (UNSPEC_IORV "orv")
+                              (UNSPEC_XORV "eorv")])
+
  ;; The SVE logical instruction that implements an unspec.
  (define_int_attr logicalf_op [(UNSPEC_ANDF "and")
                               (UNSPEC_IORF "orr")
diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi

index 4f635b03c23eb5d0a4c5598633dc1aecc68ba909..c66c0b3981fe602511367ab810be35c1e4db0f5c 100644 (file)
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -5195,6 +5195,17 @@ Compute the sum of the elements of a vector. The vector is operand 1, and
  operand 0 is the scalar result, with mode equal to the mode of the elements of
  the input vector.
  
+@cindex @code{reduc_and_scal_@var{m}} instruction pattern
+@item @samp{reduc_and_scal_@var{m}}
+@cindex @code{reduc_ior_scal_@var{m}} instruction pattern
+@itemx @samp{reduc_ior_scal_@var{m}}
+@cindex @code{reduc_xor_scal_@var{m}} instruction pattern
+@itemx @samp{reduc_xor_scal_@var{m}}
+Compute the bitwise @code{AND}/@code{IOR}/@code{XOR} reduction of the elements
+of a vector of mode @var{m}.  Operand 1 is the vector input and operand 0
+is the scalar result.  The mode of the scalar result is the same as one
+element of @var{m}.
+
  @cindex @code{sdot_prod@var{m}} instruction pattern
  @item @samp{sdot_prod@var{m}}
  @cindex @code{udot_prod@var{m}} instruction pattern
diff --git a/gcc/doc/sourcebuild.texi b/gcc/doc/sourcebuild.texi

index e4a383567b7aeebe8b5c7ac01039484bf80fa342..25b6af3aa84cf54624ef65d11afbfd9634293f3e 100644 (file)
--- a/gcc/doc/sourcebuild.texi
+++ b/gcc/doc/sourcebuild.texi
@@ -1581,6 +1581,9 @@ Target supports 16- and 8-bytes vectors.
  
  @item vect_sizes_32B_16B
  Target supports 32- and 16-bytes vectors.
+
+@item vect_logical_reduc
+Target supports AND, IOR and XOR reduction on vectors.
  @end table
  
  @subsubsection Thread Local Storage attributes
diff --git a/gcc/fold-const-call.c b/gcc/fold-const-call.c

index 7e3cd1e79e79d4631ff391a980be26ab4a1f7866..60acf96bbf3bfe1d9cc8b1fd7b512abd5f3f9348 100644 (file)
--- a/gcc/fold-const-call.c
+++ b/gcc/fold-const-call.c
@@ -1181,6 +1181,15 @@ fold_const_call (combined_fn fn, tree type, tree arg)
      case CFN_REDUC_MIN:
        return fold_const_reduction (type, arg, MIN_EXPR);
  
+    case CFN_REDUC_AND:
+      return fold_const_reduction (type, arg, BIT_AND_EXPR);
+
+    case CFN_REDUC_IOR:
+      return fold_const_reduction (type, arg, BIT_IOR_EXPR);
+
+    case CFN_REDUC_XOR:
+      return fold_const_reduction (type, arg, BIT_XOR_EXPR);
+
      default:
        return fold_const_call_1 (fn, type, arg);
      }
diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def

index 925a23068b8257840b663fe4abc808bc7a9d2bc4..ef79b62efe025702d0db8d2c29553cfa19ebd3ad 100644 (file)
--- a/gcc/internal-fn.def
+++ b/gcc/internal-fn.def
@@ -127,6 +127,12 @@ DEF_INTERNAL_SIGNED_OPTAB_FN (REDUC_MAX, ECF_CONST | ECF_NOTHROW, first,
                               reduc_smax_scal, reduc_umax_scal, unary)
  DEF_INTERNAL_SIGNED_OPTAB_FN (REDUC_MIN, ECF_CONST | ECF_NOTHROW, first,
                               reduc_smin_scal, reduc_umin_scal, unary)
+DEF_INTERNAL_OPTAB_FN (REDUC_AND, ECF_CONST | ECF_NOTHROW,
+                      reduc_and_scal, unary)
+DEF_INTERNAL_OPTAB_FN (REDUC_IOR, ECF_CONST | ECF_NOTHROW,
+                      reduc_ior_scal, unary)
+DEF_INTERNAL_OPTAB_FN (REDUC_XOR, ECF_CONST | ECF_NOTHROW,
+                      reduc_xor_scal, unary)
  
  /* Unary math functions.  */
  DEF_INTERNAL_FLT_FN (ACOS, ECF_CONST, acos, unary)
diff --git a/gcc/optabs.def b/gcc/optabs.def

index ec5f5f544eaec991f30a046ac1eda9cf3de930ed..035c8e9d6ec6077dc95e781c0f73914243b3c1e5 100644 (file)
--- a/gcc/optabs.def
+++ b/gcc/optabs.def
@@ -292,6 +292,9 @@ OPTAB_D (reduc_smin_scal_optab, "reduc_smin_scal_$a")
  OPTAB_D (reduc_plus_scal_optab, "reduc_plus_scal_$a")
  OPTAB_D (reduc_umax_scal_optab, "reduc_umax_scal_$a")
  OPTAB_D (reduc_umin_scal_optab, "reduc_umin_scal_$a")
+OPTAB_D (reduc_and_scal_optab,  "reduc_and_scal_$a")
+OPTAB_D (reduc_ior_scal_optab,  "reduc_ior_scal_$a")
+OPTAB_D (reduc_xor_scal_optab,  "reduc_xor_scal_$a")
  
  OPTAB_D (sdot_prod_optab, "sdot_prod$I$a")
  OPTAB_D (ssum_widen_optab, "widen_ssum$I$a3")
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog

index d3ec83ce526beddf94ace48f1e9a24692eff95bf..76f3c8a220029a3849a41d836554d18555b893c6 100644 (file)
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,19 @@
+2018-01-13  Richard Sandiford  <richard.sandiford@linaro.org>
+           Alan Hayward  <alan.hayward@arm.com>
+           David Sherwood  <david.sherwood@arm.com>
+
+       * lib/target-supports.exp (check_effective_target_vect_logical_reduc):
+       New proc.
+       * gcc.dg/vect/vect-reduc-or_1.c: Also run for vect_logical_reduc
+       and add an associated scan-dump test.  Prevent vectorization
+       of the first two loops.
+       * gcc.dg/vect/vect-reduc-or_2.c: Likewise.
+       * gcc.target/aarch64/sve/reduc_1.c: Add AND, IOR and XOR reductions.
+       * gcc.target/aarch64/sve/reduc_2.c: Likewise.
+       * gcc.target/aarch64/sve/reduc_1_run.c: Likewise.
+       (INIT_VECTOR): Tweak initial value so that some bits are always set.
+       * gcc.target/aarch64/sve/reduc_2_run.c: Likewise.
+
  2018-01-13  Richard Sandiford  <richard.sandiford@linaro.org>
             Alan Hayward  <alan.hayward@arm.com>
             David Sherwood  <david.sherwood@arm.com>
diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-or_1.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-or_1.c

index aad145161b70b7d70fbc3d4621165b8cdd7848e7..cff3f16107f2355fb4e38b3b66596253545ec796 100644 (file)
--- a/gcc/testsuite/gcc.dg/vect/vect-reduc-or_1.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-or_1.c
@@ -1,4 +1,4 @@
-/* { dg-require-effective-target whole_vector_shift } */
+/* { dg-do run { target { whole_vector_shift || vect_logical_reduc } } } */
  
  /* Write a reduction loop to be reduced using vector shifts.  */
  
@@ -24,17 +24,17 @@ main (unsigned char argc, char **argv)
    check_vect ();
  
    for (i = 0; i < N; i++)
-    in[i] = (i + i + 1) & 0xfd;
+    {
+      in[i] = (i + i + 1) & 0xfd;
+      asm volatile ("" ::: "memory");
+    }
  
    for (i = 0; i < N; i++)
      {
        expected |= in[i];
-      asm volatile ("");
+      asm volatile ("" ::: "memory");
      }
  
-  /* Prevent constant propagation of the entire loop below.  */
-  asm volatile ("" : : : "memory");
-
    for (i = 0; i < N; i++)
      sum |= in[i];
  
@@ -47,5 +47,5 @@ main (unsigned char argc, char **argv)
    return 0;
  }
  
-/* { dg-final { scan-tree-dump "Reduce using vector shifts" "vect" } } */
-
+/* { dg-final { scan-tree-dump "Reduce using vector shifts" "vect" { target { ! vect_logical_reduc } } } } */
+/* { dg-final { scan-tree-dump "Reduce using direct vector reduction" "vect" { target vect_logical_reduc } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-or_2.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-or_2.c

index ff3dfb212ad19915a74e0c5f02344909a09d0e25..cd1af6dc9ae84742c565b8454d375aa9bf75f83f 100644 (file)
--- a/gcc/testsuite/gcc.dg/vect/vect-reduc-or_2.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-or_2.c
@@ -1,4 +1,4 @@
-/* { dg-require-effective-target whole_vector_shift } */
+/* { dg-do run { target { whole_vector_shift || vect_logical_reduc } } } */
  
  /* Write a reduction loop to be reduced using vector shifts and folded.  */
  
@@ -23,12 +23,15 @@ main (unsigned char argc, char **argv)
    check_vect ();
  
    for (i = 0; i < N; i++)
-    in[i] = (i + i + 1) & 0xfd;
+    {
+      in[i] = (i + i + 1) & 0xfd;
+      asm volatile ("" ::: "memory");
+    }
  
    for (i = 0; i < N; i++)
      {
        expected |= in[i];
-      asm volatile ("");
+      asm volatile ("" ::: "memory");
      }
  
    for (i = 0; i < N; i++)
@@ -43,5 +46,5 @@ main (unsigned char argc, char **argv)
    return 0;
  }
  
-/* { dg-final { scan-tree-dump "Reduce using vector shifts" "vect" } } */
-
+/* { dg-final { scan-tree-dump "Reduce using vector shifts" "vect" { target { ! vect_logical_reduc } } } } */
+/* { dg-final { scan-tree-dump "Reduce using direct vector reduction" "vect" { target vect_logical_reduc } } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/reduc_1.c b/gcc/testsuite/gcc.target/aarch64/sve/reduc_1.c

index f86966bc1fab1aced69e65ba61707e79b54ebe1f..72dc793b65049059185be7184b76d54e206e5784 100644 (file)
--- a/gcc/testsuite/gcc.target/aarch64/sve/reduc_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/reduc_1.c
@@ -65,6 +65,46 @@ reduc_##NAME##_##TYPE (TYPE *a, int n)               \
  
  TEST_MAXMIN (DEF_REDUC_MAXMIN)
  
+#define DEF_REDUC_BITWISE(TYPE, NAME, BIT_OP)  \
+TYPE __attribute__ ((noinline, noclone))       \
+reduc_##NAME##_##TYPE (TYPE *a, int n)         \
+{                                              \
+  TYPE r = 13;                                 \
+  for (int i = 0; i < n; ++i)                  \
+    r BIT_OP a[i];                             \
+  return r;                                    \
+}
+
+#define TEST_BITWISE(T)                                \
+  T (int8_t, and, &=)                          \
+  T (int16_t, and, &=)                         \
+  T (int32_t, and, &=)                         \
+  T (int64_t, and, &=)                         \
+  T (uint8_t, and, &=)                         \
+  T (uint16_t, and, &=)                                \
+  T (uint32_t, and, &=)                                \
+  T (uint64_t, and, &=)                                \
+                                               \
+  T (int8_t, ior, |=)                          \
+  T (int16_t, ior, |=)                         \
+  T (int32_t, ior, |=)                         \
+  T (int64_t, ior, |=)                         \
+  T (uint8_t, ior, |=)                         \
+  T (uint16_t, ior, |=)                                \
+  T (uint32_t, ior, |=)                                \
+  T (uint64_t, ior, |=)                                \
+                                               \
+  T (int8_t, xor, ^=)                          \
+  T (int16_t, xor, ^=)                         \
+  T (int32_t, xor, ^=)                         \
+  T (int64_t, xor, ^=)                         \
+  T (uint8_t, xor, ^=)                         \
+  T (uint16_t, xor, ^=)                                \
+  T (uint32_t, xor, ^=)                                \
+  T (uint64_t, xor, ^=)
+
+TEST_BITWISE (DEF_REDUC_BITWISE)
+
  /* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b\n} 1 } } */
  /* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.h, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */
  /* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
@@ -102,6 +142,12 @@ TEST_MAXMIN (DEF_REDUC_MAXMIN)
  /* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
  /* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */
  
+/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 8 } } */
+
+/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 8 } } */
+
+/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 8 } } */
+
  /* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.b\n} 1 } } */
  /* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.h\n} 1 } } */
  /* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.s\n} 2 } } */
@@ -133,3 +179,18 @@ TEST_MAXMIN (DEF_REDUC_MAXMIN)
  /* { dg-final { scan-assembler-times {\tfminnmv\th[0-9]+, p[0-7], z[0-9]+\.h\n} 1 } } */
  /* { dg-final { scan-assembler-times {\tfminnmv\ts[0-9]+, p[0-7], z[0-9]+\.s\n} 1 } } */
  /* { dg-final { scan-assembler-times {\tfminnmv\td[0-9]+, p[0-7], z[0-9]+\.d\n} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tandv\tb[0-9]+, p[0-7], z[0-9]+\.b} 2 } } */
+/* { dg-final { scan-assembler-times {\tandv\th[0-9]+, p[0-7], z[0-9]+\.h} 2 } } */
+/* { dg-final { scan-assembler-times {\tandv\ts[0-9]+, p[0-7], z[0-9]+\.s} 2 } } */
+/* { dg-final { scan-assembler-times {\tandv\td[0-9]+, p[0-7], z[0-9]+\.d} 2 } } */
+
+/* { dg-final { scan-assembler-times {\torv\tb[0-9]+, p[0-7], z[0-9]+\.b} 2 } } */
+/* { dg-final { scan-assembler-times {\torv\th[0-9]+, p[0-7], z[0-9]+\.h} 2 } } */
+/* { dg-final { scan-assembler-times {\torv\ts[0-9]+, p[0-7], z[0-9]+\.s} 2 } } */
+/* { dg-final { scan-assembler-times {\torv\td[0-9]+, p[0-7], z[0-9]+\.d} 2 } } */
+
+/* { dg-final { scan-assembler-times {\teorv\tb[0-9]+, p[0-7], z[0-9]+\.b} 2 } } */
+/* { dg-final { scan-assembler-times {\teorv\th[0-9]+, p[0-7], z[0-9]+\.h} 2 } } */
+/* { dg-final { scan-assembler-times {\teorv\ts[0-9]+, p[0-7], z[0-9]+\.s} 2 } } */
+/* { dg-final { scan-assembler-times {\teorv\td[0-9]+, p[0-7], z[0-9]+\.d} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/reduc_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve/reduc_1_run.c

index 3fcb7fba650d6cf2297f43fb26257cf0e05ee744..c0fdadaa282d4020e7fcdcb9b5970ce6c3caef69 100644 (file)
--- a/gcc/testsuite/gcc.target/aarch64/sve/reduc_1_run.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/reduc_1_run.c
@@ -9,7 +9,7 @@
    TYPE a[NUM_ELEMS (TYPE) + 1];                                \
    for (int i = 0; i < NUM_ELEMS (TYPE) + 1; i++)       \
      {                                                  \
-      a[i] = (i * 2) * (i & 1 ? 1 : -1);               \
+      a[i] = ((i * 2) * (i & 1 ? 1 : -1) | 3);         \
        asm volatile ("" ::: "memory");                  \
      }
  
@@ -35,10 +35,22 @@
        __builtin_abort ();                                      \
    }
  
+#define TEST_REDUC_BITWISE(TYPE, NAME, BIT_OP)                 \
+  {                                                            \
+    INIT_VECTOR (TYPE);                                                \
+    TYPE r1 = reduc_##NAME##_##TYPE (a, NUM_ELEMS (TYPE));     \
+    volatile TYPE r2 = 13;                                     \
+    for (int i = 0; i < NUM_ELEMS (TYPE); ++i)                 \
+      r2 BIT_OP a[i];                                          \
+    if (r1 != r2)                                              \
+      __builtin_abort ();                                      \
+  }
+
  int main ()
  {
    TEST_PLUS (TEST_REDUC_PLUS)
    TEST_MAXMIN (TEST_REDUC_MAXMIN)
+  TEST_BITWISE (TEST_REDUC_BITWISE)
  
    return 0;
  }
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/reduc_2.c b/gcc/testsuite/gcc.target/aarch64/sve/reduc_2.c

index adc3699f9ed579d0cd2108c6b069602a12abec49..376a453fc7300c79d6924de3b4ed0852f8b6b093 100644 (file)
--- a/gcc/testsuite/gcc.target/aarch64/sve/reduc_2.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/reduc_2.c
@@ -73,6 +73,49 @@ reduc_##NAME##_##TYPE (TYPE (*restrict a)[NUM_ELEMS (TYPE)], \
  
  TEST_MAXMIN (DEF_REDUC_MAXMIN)
  
+#define DEF_REDUC_BITWISE(TYPE,NAME,BIT_OP)                    \
+void __attribute__ ((noinline, noclone))                       \
+reduc_##NAME##TYPE (TYPE (*restrict a)[NUM_ELEMS(TYPE)],       \
+                   TYPE *restrict r, int n)                    \
+{                                                              \
+  for (int i = 0; i < n; i++)                                  \
+    {                                                          \
+      r[i] = a[i][0];                                          \
+      for (int j = 0; j < NUM_ELEMS(TYPE); j++)                        \
+        r[i] BIT_OP a[i][j];                                   \
+    }                                                          \
+}
+
+#define TEST_BITWISE(T)                                \
+  T (int8_t, and, &=)                          \
+  T (int16_t, and, &=)                         \
+  T (int32_t, and, &=)                         \
+  T (int64_t, and, &=)                         \
+  T (uint8_t, and, &=)                         \
+  T (uint16_t, and, &=)                                \
+  T (uint32_t, and, &=)                                \
+  T (uint64_t, and, &=)                                \
+                                               \
+  T (int8_t, ior, |=)                          \
+  T (int16_t, ior, |=)                         \
+  T (int32_t, ior, |=)                         \
+  T (int64_t, ior, |=)                         \
+  T (uint8_t, ior, |=)                         \
+  T (uint16_t, ior, |=)                                \
+  T (uint32_t, ior, |=)                                \
+  T (uint64_t, ior, |=)                                \
+                                               \
+  T (int8_t, xor, ^=)                          \
+  T (int16_t, xor, ^=)                         \
+  T (int32_t, xor, ^=)                         \
+  T (int64_t, xor, ^=)                         \
+  T (uint8_t, xor, ^=)                         \
+  T (uint16_t, xor, ^=)                                \
+  T (uint32_t, xor, ^=)                                \
+  T (uint64_t, xor, ^=)
+
+TEST_BITWISE (DEF_REDUC_BITWISE)
+
  /* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.b\n} 1 } } */
  /* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.h\n} 1 } } */
  /* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.s\n} 2 } } */
@@ -104,3 +147,18 @@ TEST_MAXMIN (DEF_REDUC_MAXMIN)
  /* { dg-final { scan-assembler-times {\tfminnmv\th[0-9]+, p[0-7], z[0-9]+\.h\n} 1 } } */
  /* { dg-final { scan-assembler-times {\tfminnmv\ts[0-9]+, p[0-7], z[0-9]+\.s\n} 1 } } */
  /* { dg-final { scan-assembler-times {\tfminnmv\td[0-9]+, p[0-7], z[0-9]+\.d\n} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tandv\tb[0-9]+, p[0-7], z[0-9]+\.b\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tandv\th[0-9]+, p[0-7], z[0-9]+\.h\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tandv\ts[0-9]+, p[0-7], z[0-9]+\.s\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tandv\td[0-9]+, p[0-7], z[0-9]+\.d\n} 2 } } */
+
+/* { dg-final { scan-assembler-times {\torv\tb[0-9]+, p[0-7], z[0-9]+\.b\n} 2 } } */
+/* { dg-final { scan-assembler-times {\torv\th[0-9]+, p[0-7], z[0-9]+\.h\n} 2 } } */
+/* { dg-final { scan-assembler-times {\torv\ts[0-9]+, p[0-7], z[0-9]+\.s\n} 2 } } */
+/* { dg-final { scan-assembler-times {\torv\td[0-9]+, p[0-7], z[0-9]+\.d\n} 2 } } */
+
+/* { dg-final { scan-assembler-times {\teorv\tb[0-9]+, p[0-7], z[0-9]+\.b\n} 2 } } */
+/* { dg-final { scan-assembler-times {\teorv\th[0-9]+, p[0-7], z[0-9]+\.h\n} 2 } } */
+/* { dg-final { scan-assembler-times {\teorv\ts[0-9]+, p[0-7], z[0-9]+\.s\n} 2 } } */
+/* { dg-final { scan-assembler-times {\teorv\td[0-9]+, p[0-7], z[0-9]+\.d\n} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/reduc_2_run.c b/gcc/testsuite/gcc.target/aarch64/sve/reduc_2_run.c

index f48e348be4315a7038f647bb0853db3e316295e2..c4a0426e219408ea9050ffe908aaf31dcb343262 100644 (file)
--- a/gcc/testsuite/gcc.target/aarch64/sve/reduc_2_run.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/reduc_2_run.c
@@ -56,6 +56,20 @@
        }                                                        \
      }
  
+#define TEST_REDUC_BITWISE(TYPE, NAME, BIT_OP)         \
+  {                                                    \
+    INIT_MATRIX (TYPE);                                        \
+    reduc_##NAME##_##TYPE (mat, r, NROWS);             \
+    for (int i = 0; i < NROWS; i++)                    \
+      {                                                        \
+       volatile TYPE r2 = mat[i][0];                   \
+       for (int j = 0; j < NUM_ELEMS (TYPE); ++j)      \
+         r2 BIT_OP mat[i][j];                          \
+       if (r[i] != r2)                                 \
+         __builtin_abort ();                           \
+      }                                                        \
+    }
+
  int main ()
  {
    TEST_PLUS (TEST_REDUC_PLUS)
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp

index aedb7980da2e5ab205f2dcd699c9d7786dc6e616..73dbb2c6346ad37e5baec39f8a401bc0d9d2954a 100644 (file)
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -7222,6 +7222,12 @@ proc check_effective_target_vect_call_roundf { } {
      return $et_vect_call_roundf_saved($et_index)
  }
  
+# Return 1 if the target supports AND, OR and XOR reduction.
+
+proc check_effective_target_vect_logical_reduc { } {
+    return [check_effective_target_aarch64_sve]
+}
+
  # Return 1 if the target supports section-anchors
  
  proc check_effective_target_section_anchors { } {
diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c

index 9219a0dd9f8b763209f99ec9bf0b331c145f313e..d67911569ec8718ef90fc1a9d97df3f40e239f58 100644 (file)
--- a/gcc/tree-vect-loop.c
+++ b/gcc/tree-vect-loop.c
@@ -2438,11 +2438,20 @@ reduction_fn_for_scalar_code (enum tree_code code, internal_fn *reduc_fn)
          *reduc_fn = IFN_REDUC_PLUS;
          return true;
  
-      case MULT_EXPR:
-      case MINUS_EXPR:
+      case BIT_AND_EXPR:
+       *reduc_fn = IFN_REDUC_AND;
+       return true;
+
        case BIT_IOR_EXPR:
+       *reduc_fn = IFN_REDUC_IOR;
+       return true;
+
        case BIT_XOR_EXPR:
-      case BIT_AND_EXPR:
+       *reduc_fn = IFN_REDUC_XOR;
+       return true;
+
+      case MULT_EXPR:
+      case MINUS_EXPR:
          *reduc_fn = IFN_LAST;
          return true;
author	Richard Sandiford <richard.sandiford@linaro.org>
	Sat, 13 Jan 2018 17:58:42 +0000 (17:58 +0000)
committer	Richard Sandiford <rsandifo@gcc.gnu.org>
	Sat, 13 Jan 2018 17:58:42 +0000 (17:58 +0000)
gcc/ChangeLog		patch \| blob \| history
gcc/config/aarch64/aarch64-sve.md		patch \| blob \| history
gcc/config/aarch64/iterators.md		patch \| blob \| history
gcc/doc/md.texi		patch \| blob \| history
gcc/doc/sourcebuild.texi		patch \| blob \| history
gcc/fold-const-call.c		patch \| blob \| history
gcc/internal-fn.def		patch \| blob \| history
gcc/optabs.def		patch \| blob \| history
gcc/testsuite/ChangeLog		patch \| blob \| history
gcc/testsuite/gcc.dg/vect/vect-reduc-or_1.c		patch \| blob \| history
gcc/testsuite/gcc.dg/vect/vect-reduc-or_2.c		patch \| blob \| history
gcc/testsuite/gcc.target/aarch64/sve/reduc_1.c		patch \| blob \| history
gcc/testsuite/gcc.target/aarch64/sve/reduc_1_run.c		patch \| blob \| history
gcc/testsuite/gcc.target/aarch64/sve/reduc_2.c		patch \| blob \| history
gcc/testsuite/gcc.target/aarch64/sve/reduc_2_run.c		patch \| blob \| history
gcc/testsuite/lib/target-supports.exp		patch \| blob \| history
gcc/tree-vect-loop.c		patch \| blob \| history