Fix folding of vector mask EQ/NE expressions
authorRichard Sandiford <richard.sandiford@linaro.org>
Sat, 13 Jan 2018 17:57:17 +0000 (17:57 +0000)
committerRichard Sandiford <rsandifo@gcc.gnu.org>
Sat, 13 Jan 2018 17:57:17 +0000 (17:57 +0000)
fold_binary_loc assumed that if the type of the result wasn't a vector,
the operands wouldn't be either.  This isn't necessarily true for
EQ_EXPR and NE_EXPR of vector masks, which can return a single scalar
for the mask as a whole.

2018-01-13  Richard Sandiford  <richard.sandiford@linaro.org>
    Alan Hayward  <alan.hayward@arm.com>
    David Sherwood  <david.sherwood@arm.com>

gcc/
* fold-const.c (fold_binary_loc): Check the argument types
rather than the result type when testing for a vector operation.

gcc/testsuite/
* gcc.target/aarch64/sve/vec_bool_cmp_1.c: New test.
* gcc.target/aarch64/sve/vec_bool_cmp_1_run.c: Likweise.

Co-Authored-By: Alan Hayward <alan.hayward@arm.com>
Co-Authored-By: David Sherwood <david.sherwood@arm.com>
From-SVN: r256616

gcc/ChangeLog
gcc/fold-const.c
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.target/aarch64/sve/vec_bool_cmp_1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/vec_bool_cmp_1_run.c [new file with mode: 0644]

index 64951e9d78c11fd6d7c9bac035b25100b2829c2e..d965d8fbb3a1e70e6e092383fa806a5c8064c5e4 100644 (file)
@@ -1,3 +1,10 @@
+2018-01-13  Richard Sandiford  <richard.sandiford@linaro.org>
+           Alan Hayward  <alan.hayward@arm.com>
+           David Sherwood  <david.sherwood@arm.com>
+
+       * fold-const.c (fold_binary_loc): Check the argument types
+       rather than the result type when testing for a vector operation.
+
 2018-01-13  Richard Sandiford  <richard.sandiford@linaro.org>
 
        * doc/tm.texi.in (DWARF_LAZY_REGISTER_VALUE): Document.
index 521c2dc09cedb213f9a83efe06b356512ab02a0a..cfb1b3d0614c35629651978fc4cb79ff70bae2f1 100644 (file)
@@ -9323,7 +9323,7 @@ fold_binary_loc (location_t loc, enum tree_code code, tree type,
 
   if ((code == BIT_AND_EXPR || code == BIT_IOR_EXPR
        || code == EQ_EXPR || code == NE_EXPR)
-      && TREE_CODE (type) != VECTOR_TYPE
+      && !VECTOR_TYPE_P (TREE_TYPE (arg0))
       && ((truth_value_p (TREE_CODE (arg0))
           && (truth_value_p (TREE_CODE (arg1))
               || (TREE_CODE (arg1) == BIT_AND_EXPR
index 48f40dad147b560e06062d8ca3a4dfcf69950e06..9997b287d9de3e8753b4864243bfec99bd049f49 100644 (file)
@@ -1,3 +1,10 @@
+2018-01-13  Richard Sandiford  <richard.sandiford@linaro.org>
+           Alan Hayward  <alan.hayward@arm.com>
+           David Sherwood  <david.sherwood@arm.com>
+
+       * gcc.target/aarch64/sve/vec_bool_cmp_1.c: New test.
+       * gcc.target/aarch64/sve/vec_bool_cmp_1_run.c: Likweise.
+
 2018-01-13  Richard Sandiford  <richard.sandiford@linaro.org>
 
        * g++.target/aarch64/sve/aarch64-sve.exp: New harness.
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vec_bool_cmp_1.c b/gcc/testsuite/gcc.target/aarch64/sve/vec_bool_cmp_1.c
new file mode 100644 (file)
index 0000000..e60d1c5
--- /dev/null
@@ -0,0 +1,40 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#include <stdint.h>
+#include <stdbool.h>
+
+#define VEC_BOOL(NAME, OP, VARTYPE, INDUCTYPE)                         \
+void __attribute__ ((noinline, noclone))                               \
+vec_bool_##NAME##_##VARTYPE##_##INDUCTYPE (VARTYPE *dst, VARTYPE *src, \
+                                          INDUCTYPE start,             \
+                                          INDUCTYPE n,                 \
+                                          INDUCTYPE mask)              \
+{                                                                      \
+  for (INDUCTYPE i = 0; i < n; i++)                                    \
+    {                                                                  \
+      bool lhs = i >= start;                                           \
+      bool rhs = (i & mask) != 0x3D;                                   \
+      if (lhs OP rhs)                                                  \
+        dst[i] = src[i];                                               \
+    }                                                                  \
+}
+
+#define TEST_OP(T, NAME, OP)                   \
+  T (NAME, OP, uint8_t, uint8_t)               \
+  T (NAME, OP, uint16_t, uint16_t)             \
+  T (NAME, OP, uint32_t, uint32_t)             \
+  T (NAME, OP, uint64_t, uint64_t)             \
+  T (NAME, OP, float, uint32_t)                        \
+  T (NAME, OP, double, uint64_t)
+
+#define TEST_ALL(T)                            \
+  TEST_OP (T, cmpeq, ==)                       \
+  TEST_OP (T, cmpne, !=)
+
+TEST_ALL (VEC_BOOL)
+
+/* Both cmpne and cmpeq loops will contain an exclusive predicate or.  */
+/* { dg-final { scan-assembler-times {\teors?\tp[0-9]*\.b, p[0-7]/z, p[0-9]*\.b, p[0-9]*\.b\n} 12 } } */
+/* cmpeq will also contain a predicate not operation.  */
+/* { dg-final { scan-assembler-times {\tnot\tp[0-9]*\.b, p[0-7]/z, p[0-9]*\.b\n} 6 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vec_bool_cmp_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve/vec_bool_cmp_1_run.c
new file mode 100644 (file)
index 0000000..cd0fd56
--- /dev/null
@@ -0,0 +1,37 @@
+/* { dg-do run { target { aarch64_sve_hw } } } */
+/* { dg-options "-O3 -fno-inline" } */
+
+#include "vec_bool_cmp_1.c"
+
+#define N 103
+
+#define TEST_VEC_BOOL(NAME, OP, VARTYPE, INDUCTYPE)            \
+{                                                              \
+  INDUCTYPE i;                                                 \
+  VARTYPE src[N];                                              \
+  VARTYPE dst[N];                                              \
+  for (i = 0; i < N; i++)                                      \
+    {                                                          \
+      src[i] = i;                                              \
+      dst[i] = i * 2;                                          \
+      asm volatile ("" ::: "memory");                          \
+    }                                                          \
+  vec_bool_##NAME##_##VARTYPE##_##INDUCTYPE (dst, src, 13,     \
+                                            97, 0xFF);         \
+  for (i = 0; i < 13; i++)                                     \
+    if (dst[i] != (VARTYPE) (0 OP 1 ? i : i * 2))              \
+      __builtin_abort ();                                      \
+  for (i = 13; i < 97; i++)                                    \
+    if (dst[i] != (VARTYPE) (1 OP (i != 0x3D) ? i : i * 2))    \
+      __builtin_abort ();                                      \
+  for (i = 97; i < N; i++)                                     \
+    if (dst[i] != (i * 2))                                     \
+      __builtin_abort ();                                      \
+}
+
+int __attribute__ ((optimize (1)))
+main ()
+{
+  TEST_ALL (TEST_VEC_BOOL)
+  return 0;
+}