Implement SLP of internal functions
authorRichard Sandiford <richard.sandiford@linaro.org>
Thu, 12 Jul 2018 13:02:17 +0000 (13:02 +0000)
committerRichard Sandiford <rsandifo@gcc.gnu.org>
Thu, 12 Jul 2018 13:02:17 +0000 (13:02 +0000)
SLP of calls was previously restricted to built-in functions.
This patch extends it to internal functions.

2018-07-12  Richard Sandiford  <richard.sandiford@linaro.org>

gcc/
* internal-fn.h (vectorizable_internal_fn_p): New function.
* tree-vect-slp.c (compatible_calls_p): Likewise.
(vect_build_slp_tree_1): Remove nops argument.  Handle calls
to internal functions.
(vect_build_slp_tree_2): Update call to vect_build_slp_tree_1.

gcc/testsuite/
* gcc.dg/vect/vect-cond-arith-6.c: New test.
* gcc.target/aarch64/sve/cond_arith_4.c: Likewise.
* gcc.target/aarch64/sve/cond_arith_4_run.c: Likewise.
* gcc.target/aarch64/sve/cond_arith_5.c: Likewise.
* gcc.target/aarch64/sve/cond_arith_5_run.c: Likewise.
* gcc.target/aarch64/sve/slp_14.c: Likewise.
* gcc.target/aarch64/sve/slp_14_run.c: Likewise.

From-SVN: r262590

gcc/ChangeLog
gcc/internal-fn.h
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.dg/vect/vect-cond-arith-6.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/cond_arith_4.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/cond_arith_4_run.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/cond_arith_5.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/cond_arith_5_run.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/slp_14.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/slp_14_run.c [new file with mode: 0644]
gcc/tree-vect-slp.c

index a77b04ccf6b74510aa18b899c785e6e84adf4dec..03cb0fa41909a8f4bb075e3596a6479ea7b8af01 100644 (file)
@@ -1,3 +1,11 @@
+2018-07-12  Richard Sandiford  <richard.sandiford@linaro.org>
+
+       * internal-fn.h (vectorizable_internal_fn_p): New function.
+       * tree-vect-slp.c (compatible_calls_p): Likewise.
+       (vect_build_slp_tree_1): Remove nops argument.  Handle calls
+       to internal functions.
+       (vect_build_slp_tree_2): Update call to vect_build_slp_tree_1.
+
 2018-07-12  Richard Sandiford  <richard.sandiford@linaro.org>
 
        * fold-const.h (inverse_conditions_p): Declare.
index 48db7721aa80f162c6177f2d7af5ce18001efc48..5c5bda1b3b7b7561ca1ebee215d6910e820a7c7c 100644 (file)
@@ -160,6 +160,17 @@ direct_internal_fn_p (internal_fn fn)
   return direct_internal_fn_array[fn].type0 >= -1;
 }
 
+/* Return true if FN is a direct internal function that can be vectorized by
+   converting the return type and all argument types to vectors of the same
+   number of elements.  E.g. we can vectorize an IFN_SQRT on floats as an
+   IFN_SQRT on vectors of N floats.  */
+
+inline bool
+vectorizable_internal_fn_p (internal_fn fn)
+{
+  return direct_internal_fn_array[fn].vectorizable;
+}
+
 /* Return optab information about internal function FN.  Only meaningful
    if direct_internal_fn_p (FN).  */
 
index b101f230e5453f3e26af5087d2da51d2eb0e2c9f..1c1098519c6d485ffb16c41cebea5c21ce560aa7 100644 (file)
@@ -1,3 +1,13 @@
+2018-07-12  Richard Sandiford  <richard.sandiford@linaro.org>
+
+       * gcc.dg/vect/vect-cond-arith-6.c: New test.
+       * gcc.target/aarch64/sve/cond_arith_4.c: Likewise.
+       * gcc.target/aarch64/sve/cond_arith_4_run.c: Likewise.
+       * gcc.target/aarch64/sve/cond_arith_5.c: Likewise.
+       * gcc.target/aarch64/sve/cond_arith_5_run.c: Likewise.
+       * gcc.target/aarch64/sve/slp_14.c: Likewise.
+       * gcc.target/aarch64/sve/slp_14_run.c: Likewise.
+
 2018-07-12  Richard Sandiford  <richard.sandiford@linaro.org>
 
        * gcc.dg/vect/vect-cond-arith-4.c: New test.
diff --git a/gcc/testsuite/gcc.dg/vect/vect-cond-arith-6.c b/gcc/testsuite/gcc.dg/vect/vect-cond-arith-6.c
new file mode 100644 (file)
index 0000000..cc70b8a
--- /dev/null
@@ -0,0 +1,62 @@
+/* { dg-additional-options "-fdump-tree-optimized" } */
+
+#include "tree-vect.h"
+
+#define N (VECTOR_BITS * 11 / 64 + 4)
+
+#define add(A, B) ((A) + (B))
+#define sub(A, B) ((A) - (B))
+#define mul(A, B) ((A) * (B))
+#define div(A, B) ((A) / (B))
+
+#define DEF(OP)                                                                \
+  void __attribute__ ((noipa))                                         \
+  f_##OP (double *restrict a, double *restrict b, double x)            \
+  {                                                                    \
+    for (int i = 0; i < N; i += 2)                                     \
+      {                                                                        \
+       a[i] = b[i] < 100 ? OP (b[i], x) : b[i];                        \
+       a[i + 1] = b[i + 1] < 70 ? OP (b[i + 1], x) : b[i + 1];         \
+      }                                                                        \
+  }
+
+#define TEST(OP)                                               \
+  {                                                            \
+    f_##OP (a, b, 10);                                         \
+    for (int i = 0; i < N; ++i)                                        \
+      {                                                                \
+       int bval = (i % 17) * 10;                               \
+       int truev = OP (bval, 10);                              \
+       if (a[i] != (bval < (i & 1 ? 70 : 100) ? truev : bval)) \
+       __builtin_abort ();                                     \
+       asm volatile ("" ::: "memory");                         \
+      }                                                                \
+  }
+
+#define FOR_EACH_OP(T)                         \
+  T (add)                                      \
+  T (sub)                                      \
+  T (mul)                                      \
+  T (div)
+
+FOR_EACH_OP (DEF)
+
+int
+main (void)
+{
+  double a[N], b[N];
+  for (int i = 0; i < N; ++i)
+    {
+      b[i] = (i % 17) * 10;
+      asm volatile ("" ::: "memory");
+    }
+  FOR_EACH_OP (TEST)
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times {vectorizing stmts using SLP} 4 "vect" { target vect_double_cond_arith } } } */
+/* { dg-final { scan-tree-dump-times { = \.COND_ADD} 1 "optimized" { target vect_double_cond_arith } } } */
+/* { dg-final { scan-tree-dump-times { = \.COND_SUB} 1 "optimized" { target vect_double_cond_arith } } } */
+/* { dg-final { scan-tree-dump-times { = \.COND_MUL} 1 "optimized" { target vect_double_cond_arith } } } */
+/* { dg-final { scan-tree-dump-times { = \.COND_RDIV} 1 "optimized" { target vect_double_cond_arith } } } */
+/* { dg-final { scan-tree-dump-not {VEC_COND_EXPR} "optimized" { target vect_double_cond_arith } } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_arith_4.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_arith_4.c
new file mode 100644 (file)
index 0000000..811ca61
--- /dev/null
@@ -0,0 +1,62 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#include <stdint.h>
+
+#define TEST(TYPE, NAME, OP)                                           \
+  void __attribute__ ((noinline, noclone))                             \
+  test_##TYPE##_##NAME (TYPE *__restrict x,                            \
+                       TYPE *__restrict y,                             \
+                       TYPE z1, TYPE z2,                               \
+                       TYPE *__restrict pred, int n)                   \
+  {                                                                    \
+    for (int i = 0; i < n; i += 2)                                     \
+      {                                                                        \
+       x[i] = (pred[i] != 1 ? y[i] OP z1 : y[i]);                      \
+       x[i + 1] = (pred[i + 1] != 1 ? y[i + 1] OP z2 : y[i + 1]);      \
+      }                                                                        \
+  }
+
+#define TEST_INT_TYPE(TYPE) \
+  TEST (TYPE, div, /)
+
+#define TEST_FP_TYPE(TYPE) \
+  TEST (TYPE, add, +) \
+  TEST (TYPE, sub, -) \
+  TEST (TYPE, mul, *) \
+  TEST (TYPE, div, /)
+
+#define TEST_ALL \
+  TEST_INT_TYPE (int32_t) \
+  TEST_INT_TYPE (uint32_t) \
+  TEST_INT_TYPE (int64_t) \
+  TEST_INT_TYPE (uint64_t) \
+  TEST_FP_TYPE (float) \
+  TEST_FP_TYPE (double)
+
+TEST_ALL
+
+/* { dg-final { scan-assembler-times {\tsdiv\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tudiv\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tsdiv\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tudiv\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tfdiv\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tfdiv\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z,} 12 } } */
+/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7],} 6 } } */
+
+/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z,} 12 } } */
+/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7],} 6 } } */
+
+/* { dg-final { scan-assembler-not {\tsel\t} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_arith_4_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_arith_4_run.c
new file mode 100644 (file)
index 0000000..153f0c3
--- /dev/null
@@ -0,0 +1,32 @@
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#include "cond_arith_4.c"
+
+#define N 98
+
+#undef TEST
+#define TEST(TYPE, NAME, OP)                                   \
+  {                                                            \
+    TYPE x[N], y[N], pred[N], z[2] = { 5, 7 };                 \
+    for (int i = 0; i < N; ++i)                                        \
+      {                                                                \
+       y[i] = i * i;                                           \
+       pred[i] = i % 3;                                        \
+      }                                                                \
+    test_##TYPE##_##NAME (x, y, z[0], z[1], pred, N);          \
+    for (int i = 0; i < N; ++i)                                        \
+      {                                                                \
+       TYPE expected = i % 3 != 1 ? y[i] OP z[i & 1] : y[i];   \
+       if (x[i] != expected)                                   \
+         __builtin_abort ();                                   \
+       asm volatile ("" ::: "memory");                         \
+      }                                                                \
+  }
+
+int
+main (void)
+{
+  TEST_ALL
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_arith_5.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_arith_5.c
new file mode 100644 (file)
index 0000000..8bc247e
--- /dev/null
@@ -0,0 +1,85 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -fno-vect-cost-model" } */
+
+#include <stdint.h>
+
+#define TEST(DATA_TYPE, OTHER_TYPE, NAME, OP)                          \
+  void __attribute__ ((noinline, noclone))                             \
+  test_##DATA_TYPE##_##OTHER_TYPE##_##NAME (DATA_TYPE *__restrict x,   \
+                                           DATA_TYPE *__restrict y,    \
+                                           DATA_TYPE z1, DATA_TYPE z2, \
+                                           DATA_TYPE *__restrict pred, \
+                                           OTHER_TYPE *__restrict foo, \
+                                           int n)                      \
+  {                                                                    \
+    for (int i = 0; i < n; i += 2)                                     \
+      {                                                                        \
+       x[i] = (pred[i] != 1 ? y[i] OP z1 : y[i]);                      \
+       x[i + 1] = (pred[i + 1] != 1 ? y[i + 1] OP z2 : y[i + 1]);      \
+       foo[i] += 1;                                                    \
+       foo[i + 1] += 2;                                                \
+      }                                                                        \
+  }
+
+#define TEST_INT_TYPE(DATA_TYPE, OTHER_TYPE) \
+  TEST (DATA_TYPE, OTHER_TYPE, div, /)
+
+#define TEST_FP_TYPE(DATA_TYPE, OTHER_TYPE) \
+  TEST (DATA_TYPE, OTHER_TYPE, add, +) \
+  TEST (DATA_TYPE, OTHER_TYPE, sub, -) \
+  TEST (DATA_TYPE, OTHER_TYPE, mul, *) \
+  TEST (DATA_TYPE, OTHER_TYPE, div, /)
+
+#define TEST_ALL \
+  TEST_INT_TYPE (int32_t, int8_t) \
+  TEST_INT_TYPE (int32_t, int16_t) \
+  TEST_INT_TYPE (uint32_t, int8_t) \
+  TEST_INT_TYPE (uint32_t, int16_t) \
+  TEST_INT_TYPE (int64_t, int8_t) \
+  TEST_INT_TYPE (int64_t, int16_t) \
+  TEST_INT_TYPE (int64_t, int32_t) \
+  TEST_INT_TYPE (uint64_t, int8_t) \
+  TEST_INT_TYPE (uint64_t, int16_t) \
+  TEST_INT_TYPE (uint64_t, int32_t) \
+  TEST_FP_TYPE (float, int8_t) \
+  TEST_FP_TYPE (float, int16_t) \
+  TEST_FP_TYPE (double, int8_t) \
+  TEST_FP_TYPE (double, int16_t) \
+  TEST_FP_TYPE (double, int32_t)
+
+TEST_ALL
+
+/* { dg-final { scan-assembler-times {\tsdiv\tz[0-9]+\.s, p[0-7]/m,} 6 } } */
+/* { dg-final { scan-assembler-times {\tudiv\tz[0-9]+\.s, p[0-7]/m,} 6 } } */
+/* { dg-final { scan-assembler-times {\tsdiv\tz[0-9]+\.d, p[0-7]/m,} 14 } } */
+/* { dg-final { scan-assembler-times {\tudiv\tz[0-9]+\.d, p[0-7]/m,} 14 } } */
+
+/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.s, p[0-7]/m,} 6 } } */
+/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.d, p[0-7]/m,} 14 } } */
+
+/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.s, p[0-7]/m,} 6 } } */
+/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.d, p[0-7]/m,} 14 } } */
+
+/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.s, p[0-7]/m,} 6 } } */
+/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.d, p[0-7]/m,} 14 } } */
+
+/* { dg-final { scan-assembler-times {\tfdiv\tz[0-9]+\.s, p[0-7]/m,} 6 } } */
+/* { dg-final { scan-assembler-times {\tfdiv\tz[0-9]+\.d, p[0-7]/m,} 14 } } */
+
+/* The load XFAILs for fixed-length SVE account for extra loads from the
+   constant pool.  */
+/* { dg-final { scan-assembler-times {\tld1b\tz[0-9]+\.b, p[0-7]/z,} 12 { xfail { aarch64_sve && { ! vect_variable_length } } } } } */
+/* { dg-final { scan-assembler-times {\tst1b\tz[0-9]+\.b, p[0-7],} 12 } } */
+
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.h, p[0-7]/z,} 12 { xfail { aarch64_sve && { ! vect_variable_length } } } } } */
+/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.h, p[0-7],} 12 } } */
+
+/* 72 for x operations, 6 for foo operations.  */
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z,} 78 { xfail { aarch64_sve && { ! vect_variable_length } } } } } */
+/* 36 for x operations, 6 for foo operations.  */
+/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7],} 42 } } */
+
+/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z,} 168 } } */
+/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7],} 84 } } */
+
+/* { dg-final { scan-assembler-not {\tsel\t} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_arith_5_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_arith_5_run.c
new file mode 100644 (file)
index 0000000..118a239
--- /dev/null
@@ -0,0 +1,35 @@
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#include "cond_arith_5.c"
+
+#define N 98
+
+#undef TEST
+#define TEST(DATA_TYPE, OTHER_TYPE, NAME, OP)                          \
+  {                                                                    \
+    DATA_TYPE x[N], y[N], pred[N], z[2] = { 5, 7 };                    \
+    OTHER_TYPE foo[N];                                                 \
+    for (int i = 0; i < N; ++i)                                                \
+      {                                                                        \
+       y[i] = i * i;                                                   \
+       pred[i] = i % 3;                                                \
+       foo[i] = i * 5;                                                 \
+      }                                                                        \
+    test_##DATA_TYPE##_##OTHER_TYPE##_##NAME (x, y, z[0], z[1],                \
+                                             pred, foo, N);            \
+    for (int i = 0; i < N; ++i)                                                \
+      {                                                                        \
+       DATA_TYPE expected = i % 3 != 1 ? y[i] OP z[i & 1] : y[i];      \
+       if (x[i] != expected)                                           \
+         __builtin_abort ();                                           \
+       asm volatile ("" ::: "memory");                                 \
+      }                                                                        \
+  }
+
+int
+main (void)
+{
+  TEST_ALL
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/slp_14.c b/gcc/testsuite/gcc.target/aarch64/sve/slp_14.c
new file mode 100644 (file)
index 0000000..2f9123c
--- /dev/null
@@ -0,0 +1,48 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#include <stdint.h>
+
+#define VEC_PERM(TYPE)                                         \
+void __attribute__ ((weak))                                    \
+vec_slp_##TYPE (TYPE *restrict a, TYPE *restrict b, int n)     \
+{                                                              \
+  for (int i = 0; i < n; ++i)                                  \
+    {                                                          \
+      TYPE a1 = a[i * 2];                                      \
+      TYPE a2 = a[i * 2 + 1];                                  \
+      TYPE b1 = b[i * 2];                                      \
+      TYPE b2 = b[i * 2 + 1];                                  \
+      a[i * 2] = b1 > 1 ? a1 / b1 : a1;                                \
+      a[i * 2 + 1] = b2 > 2 ? a2 / b2 : a2;                    \
+    }                                                          \
+}
+
+#define TEST_ALL(T)                            \
+  T (int32_t)                                  \
+  T (uint32_t)                                 \
+  T (int64_t)                                  \
+  T (uint64_t)                                 \
+  T (float)                                    \
+  T (double)
+
+TEST_ALL (VEC_PERM)
+
+/* The loop should be fully-masked.  The load XFAILs for fixed-length
+   SVE account for extra loads from the constant pool.  */
+/* { dg-final { scan-assembler-times {\tld1w\t} 6 { xfail { aarch64_sve && { ! vect_variable_length } } } } } */
+/* { dg-final { scan-assembler-times {\tst1w\t} 3 } } */
+/* { dg-final { scan-assembler-times {\tld1d\t} 6 { xfail { aarch64_sve && { ! vect_variable_length } } } } } */
+/* { dg-final { scan-assembler-times {\tst1d\t} 3 } } */
+/* { dg-final { scan-assembler-not {\tldr} } } */
+/* { dg-final { scan-assembler-not {\tstr} } } */
+
+/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.s} 6 } } */
+/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.d} 6 } } */
+
+/* { dg-final { scan-assembler-times {\tsdiv\tz[0-9]+\.s} 1 } } */
+/* { dg-final { scan-assembler-times {\tudiv\tz[0-9]+\.s} 1 } } */
+/* { dg-final { scan-assembler-times {\tfdiv\tz[0-9]+\.s} 1 } } */
+/* { dg-final { scan-assembler-times {\tsdiv\tz[0-9]+\.d} 1 } } */
+/* { dg-final { scan-assembler-times {\tudiv\tz[0-9]+\.d} 1 } } */
+/* { dg-final { scan-assembler-times {\tfdiv\tz[0-9]+\.d} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/slp_14_run.c b/gcc/testsuite/gcc.target/aarch64/sve/slp_14_run.c
new file mode 100644 (file)
index 0000000..112b8b8
--- /dev/null
@@ -0,0 +1,34 @@
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#include "slp_14.c"
+
+#define N1 (103 * 2)
+#define N2 (111 * 2)
+
+#define HARNESS(TYPE)                                          \
+  {                                                            \
+    TYPE a[N2], b[N2];                                         \
+    for (unsigned int i = 0; i < N2; ++i)                      \
+      {                                                                \
+       a[i] = i * 2 + i % 5;                                   \
+       b[i] = i % 11;                                          \
+      }                                                                \
+    vec_slp_##TYPE (a, b, N1 / 2);                             \
+    for (unsigned int i = 0; i < N2; ++i)                      \
+      {                                                                \
+       TYPE orig_a = i * 2 + i % 5;                            \
+       TYPE orig_b = i % 11;                                   \
+       TYPE expected_a = orig_a;                               \
+       if (i < N1 && orig_b > (i & 1 ? 2 : 1))                 \
+         expected_a /= orig_b;                                 \
+       if (a[i] != expected_a || b[i] != orig_b)               \
+         __builtin_abort ();                                   \
+      }                                                                \
+  }
+
+int
+main (void)
+{
+  TEST_ALL (HARNESS)
+}
index 528e1d55892d3b7c8a162fe804bec036989c46cf..8dc5763015280acf8473a2651bb8180ff60c45cd 100644 (file)
@@ -562,6 +562,41 @@ again:
   return 0;
 }
 
+/* Return true if call statements CALL1 and CALL2 are similar enough
+   to be combined into the same SLP group.  */
+
+static bool
+compatible_calls_p (gcall *call1, gcall *call2)
+{
+  unsigned int nargs = gimple_call_num_args (call1);
+  if (nargs != gimple_call_num_args (call2))
+    return false;
+
+  if (gimple_call_combined_fn (call1) != gimple_call_combined_fn (call2))
+    return false;
+
+  if (gimple_call_internal_p (call1))
+    {
+      if (!types_compatible_p (TREE_TYPE (gimple_call_lhs (call1)),
+                              TREE_TYPE (gimple_call_lhs (call2))))
+       return false;
+      for (unsigned int i = 0; i < nargs; ++i)
+       if (!types_compatible_p (TREE_TYPE (gimple_call_arg (call1, i)),
+                                TREE_TYPE (gimple_call_arg (call2, i))))
+         return false;
+    }
+  else
+    {
+      if (!operand_equal_p (gimple_call_fn (call1),
+                           gimple_call_fn (call2), 0))
+       return false;
+
+      if (gimple_call_fntype (call1) != gimple_call_fntype (call2))
+       return false;
+    }
+  return true;
+}
+
 /* A subroutine of vect_build_slp_tree for checking VECTYPE, which is the
    caller's attempt to find the vector type in STMT with the narrowest
    element type.  Return true if VECTYPE is nonnull and if it is valid
@@ -650,8 +685,8 @@ vect_two_operations_perm_ok_p (vec<gimple *> stmts, unsigned int group_size,
 static bool
 vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap,
                       vec<gimple *> stmts, unsigned int group_size,
-                      unsigned nops, poly_uint64 *max_nunits,
-                      bool *matches, bool *two_operators)
+                      poly_uint64 *max_nunits, bool *matches,
+                      bool *two_operators)
 {
   unsigned int i;
   gimple *first_stmt = stmts[0], *stmt = stmts[0];
@@ -727,7 +762,9 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap,
       if (gcall *call_stmt = dyn_cast <gcall *> (stmt))
        {
          rhs_code = CALL_EXPR;
-         if (gimple_call_internal_p (call_stmt)
+         if ((gimple_call_internal_p (call_stmt)
+              && (!vectorizable_internal_fn_p
+                  (gimple_call_internal_fn (call_stmt))))
              || gimple_call_tail_p (call_stmt)
              || gimple_call_noreturn_p (call_stmt)
              || !gimple_call_nothrow_p (call_stmt)
@@ -873,11 +910,8 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap,
          if (rhs_code == CALL_EXPR)
            {
              gimple *first_stmt = stmts[0];
-             if (gimple_call_num_args (stmt) != nops
-                 || !operand_equal_p (gimple_call_fn (first_stmt),
-                                      gimple_call_fn (stmt), 0)
-                 || gimple_call_fntype (first_stmt)
-                    != gimple_call_fntype (stmt))
+             if (!compatible_calls_p (as_a <gcall *> (first_stmt),
+                                      as_a <gcall *> (stmt)))
                {
                  if (dump_enabled_p ())
                    {
@@ -1193,8 +1227,7 @@ vect_build_slp_tree_2 (vec_info *vinfo,
 
   bool two_operators = false;
   unsigned char *swap = XALLOCAVEC (unsigned char, group_size);
-  if (!vect_build_slp_tree_1 (vinfo, swap,
-                             stmts, group_size, nops,
+  if (!vect_build_slp_tree_1 (vinfo, swap, stmts, group_size,
                              &this_max_nunits, matches, &two_operators))
     return NULL;