Handle SLP of call pattern statements
authorRichard Sandiford <richard.sandiford@arm.com>
Fri, 3 Aug 2018 12:56:55 +0000 (12:56 +0000)
committerRichard Sandiford <rsandifo@gcc.gnu.org>
Fri, 3 Aug 2018 12:56:55 +0000 (12:56 +0000)
We couldn't vectorise:

  for (int j = 0; j < n; ++j)
    {
      for (int i = 0; i < 16; ++i)
a[i] = (b[i] + c[i]) >> 1;
      a += step;
      b += step;
      c += step;
    }

at -O3 because cunrolli unrolled the inner loop and SLP couldn't handle
AVG_FLOOR patterns (see also PR86504).  The problem was some overly
strict checking of pattern statements compared to normal statements
in vect_get_and_check_slp_defs:

          switch (gimple_code (def_stmt))
            {
            case GIMPLE_PHI:
            case GIMPLE_ASSIGN:
      break;

    default:
      if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 "unsupported defining stmt:\n");
      return -1;
            }

The easy fix would have been to add GIMPLE_CALL to the switch,
but I don't think the switch is doing anything useful.  We only create
pattern statements that the rest of the vectoriser can handle, and the
other checks in this function and elsewhere check whether SLP is possible.

I'm also not sure why:

          if (!first && !oprnd_info->first_pattern
      /* Allow different pattern state for the defs of the
 first stmt in reduction chains.  */
      && (oprnd_info->first_dt != vect_reduction_def

is necessary.  All that should matter is that the statements in the
node are "similar enough".  It turned out to be quite hard to find a
convincing example that used a mixture of pattern and non-pattern
statements, so bb-slp-pow-1.c is the best I could come up with.
But it does show that the combination of "xi * xi" statements and
"pow (xj, 2) -> xj * xj" patterns are handled correctly.

The patch therefore just removes the whole if block.

The loop also needed commutative swapping to be extended to at least
AVG_FLOOR.

This gives +3.9% on 525.x264_r at -O3.

2018-08-03  Richard Sandiford  <richard.sandiford@arm.com>

gcc/
* internal-fn.h (first_commutative_argument): Declare.
* internal-fn.c (first_commutative_argument): New function.
* tree-vect-slp.c (vect_get_and_check_slp_defs): Remove extra
restrictions for pattern statements.  Use first_commutative_argument
to look for commutative operands in calls to internal functions.

gcc/testsuite/
* gcc.dg/vect/bb-slp-over-widen-1.c: Expect AVG_FLOOR to be used
on vect_avg_qi targets.
* gcc.dg/vect/bb-slp-over-widen-2.c: Likewise.
* gcc.dg/vect/bb-slp-pow-1.c: New test.
* gcc.dg/vect/vect-avg-15.c: Likewise.

From-SVN: r263290

gcc/ChangeLog
gcc/internal-fn.c
gcc/internal-fn.h
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.dg/vect/bb-slp-over-widen-1.c
gcc/testsuite/gcc.dg/vect/bb-slp-over-widen-2.c
gcc/testsuite/gcc.dg/vect/bb-slp-pow-1.c [new file with mode: 0644]
gcc/testsuite/gcc.dg/vect/vect-avg-15.c [new file with mode: 0644]
gcc/tree-vect-slp.c

index 08e2d79aa69052e5959ac1cb2d7c02cbd4fc4e0e..5220e66562f0413b790d611f030d6db1123193b0 100644 (file)
@@ -1,3 +1,11 @@
+2018-08-03  Richard Sandiford  <richard.sandiford@arm.com>
+
+       * internal-fn.h (first_commutative_argument): Declare.
+       * internal-fn.c (first_commutative_argument): New function.
+       * tree-vect-slp.c (vect_get_and_check_slp_defs): Remove extra
+       restrictions for pattern statements.  Use first_commutative_argument
+       to look for commutative operands in calls to internal functions.
+
 2018-08-03  Aldy Hernandez  <aldyh@redhat.com>
 
        * Makefile.in (wide-int-range.o): New.
index 8ede9cac3ef3118ba4781f6d80f44189874bec91..34d4f9efab9a45e0a9e3622f37dab0fa417b76f7 100644 (file)
@@ -3183,6 +3183,42 @@ direct_internal_fn_supported_p (internal_fn fn, tree type,
   return direct_internal_fn_supported_p (fn, tree_pair (type, type), opt_type);
 }
 
+/* If FN is commutative in two consecutive arguments, return the
+   index of the first, otherwise return -1.  */
+
+int
+first_commutative_argument (internal_fn fn)
+{
+  switch (fn)
+    {
+    case IFN_FMA:
+    case IFN_FMS:
+    case IFN_FNMA:
+    case IFN_FNMS:
+    case IFN_AVG_FLOOR:
+    case IFN_AVG_CEIL:
+    case IFN_FMIN:
+    case IFN_FMAX:
+      return 0;
+
+    case IFN_COND_ADD:
+    case IFN_COND_MUL:
+    case IFN_COND_MIN:
+    case IFN_COND_MAX:
+    case IFN_COND_AND:
+    case IFN_COND_IOR:
+    case IFN_COND_XOR:
+    case IFN_COND_FMA:
+    case IFN_COND_FMS:
+    case IFN_COND_FNMA:
+    case IFN_COND_FNMS:
+      return 1;
+
+    default:
+      return -1;
+    }
+}
+
 /* Return true if IFN_SET_EDOM is supported.  */
 
 bool
index 5c5bda1b3b7b7561ca1ebee215d6910e820a7c7c..99765cf407acc7d65356b156e91f9dc51f1dba34 100644 (file)
@@ -201,6 +201,8 @@ direct_internal_fn_supported_p (internal_fn fn, tree type0, tree type1,
                                         opt_type);
 }
 
+extern int first_commutative_argument (internal_fn);
+
 extern bool set_edom_supported_p (void);
 
 extern internal_fn get_conditional_internal_fn (tree_code);
index f0ef23755c3869a602b9887a6318fdcf07d96d48..25a59801a2629e2107d96fe73d8fa18261fc95c0 100644 (file)
@@ -1,3 +1,11 @@
+2018-08-03  Richard Sandiford  <richard.sandiford@arm.com>
+
+       * gcc.dg/vect/bb-slp-over-widen-1.c: Expect AVG_FLOOR to be used
+       on vect_avg_qi targets.
+       * gcc.dg/vect/bb-slp-over-widen-2.c: Likewise.
+       * gcc.dg/vect/bb-slp-pow-1.c: New test.
+       * gcc.dg/vect/vect-avg-15.c: Likewise.
+
 2018-08-03  Martin Liska  <mliska@suse.cz>
 
        * gcc.dg/predict-1.c: Adjust scanned pattern to cover 2 digits.
index a54c99d154df90f00b3f09de34f765259be57047..0dd163ef3aa7a0eb53d6dbd1851f9b2626e076e5 100644 (file)
@@ -63,4 +63,5 @@ main (void)
 
 /* { dg-final { scan-tree-dump "demoting int to signed short" "slp2" { target { ! vect_widen_shift } } } } */
 /* { dg-final { scan-tree-dump "demoting int to unsigned short" "slp2" { target { ! vect_widen_shift } } } } */
+/* { dg-final { scan-tree-dump {\.AVG_FLOOR} "slp2" { target vect_avg_qi } } } */
 /* { dg-final { scan-tree-dump-times "basic block vectorized" 2 "slp2" } } */
index 7e90bead5f815e1eb8dc07857f919b3c7be272a0..3750fb79c12db2bf6940e451e4abaa71c2c32efa 100644 (file)
@@ -62,4 +62,5 @@ main (void)
 
 /* { dg-final { scan-tree-dump "demoting int to signed short" "slp2" { target { ! vect_widen_shift } } } } */
 /* { dg-final { scan-tree-dump "demoting int to unsigned short" "slp2" { target { ! vect_widen_shift } } } } */
+/* { dg-final { scan-tree-dump {\.AVG_FLOOR} "slp2" { target vect_avg_qi } } } */
 /* { dg-final { scan-tree-dump-times "basic block vectorized" 2 "slp2" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-pow-1.c b/gcc/testsuite/gcc.dg/vect/bb-slp-pow-1.c
new file mode 100644 (file)
index 0000000..5a05bd4
--- /dev/null
@@ -0,0 +1,28 @@
+/* { dg-additional-options "-fno-math-errno -fdisable-tree-sincos" } */
+/* { dg-require-effective-target vect_float } */
+
+void __attribute__ ((noipa))
+f (float *a)
+{
+  a[0] = a[0] * a[0];
+  a[1] = __builtin_powf (a[1], 2);
+  a[2] = a[2] * a[2];
+  a[3] = __builtin_powf (a[3], 2);
+}
+
+float a[4] = { 1, 2, 3, 4 };
+
+int
+main (void)
+{
+  f (a);
+  for (int i = 0; i < 4; ++i)
+    {
+      if (a[i] != (i + 1) * (i + 1))
+       __builtin_abort ();
+      asm volatile ("" ::: "memory");
+    }
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "basic block vectorized" 1 "slp2" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-avg-15.c b/gcc/testsuite/gcc.dg/vect/vect-avg-15.c
new file mode 100644 (file)
index 0000000..48d7ed7
--- /dev/null
@@ -0,0 +1,52 @@
+/* { dg-additional-options "-O3" } */
+/* { dg-require-effective-target vect_int } */
+
+#include "tree-vect.h"
+
+#define N 80
+
+void __attribute__ ((noipa))
+f (signed char *restrict a, signed char *restrict b,
+   signed char *restrict c, int n, int step)
+{
+  for (int j = 0; j < n; ++j)
+    {
+      for (int i = 0; i < 16; ++i)
+       a[i] = (b[i] + c[i]) >> 1;
+      a += step;
+      b += step;
+      c += step;
+    }
+}
+
+#define BASE1 -126
+#define BASE2 -42
+
+signed char a[N], b[N], c[N];
+
+int
+main (void)
+{
+  check_vect ();
+
+  for (int i = 0; i < N; ++i)
+    {
+      a[i] = i;
+      b[i] = BASE1 + i * 3;
+      c[i] = BASE2 + i * 2;
+      asm volatile ("" ::: "memory");
+    }
+  f (a, b, c, N / 20, 20);
+  for (int i = 0; i < N; ++i)
+    {
+      int d = (BASE1 + BASE2 + i * 5) >> 1;
+      if (a[i] != (i % 20 < 16 ? d : i))
+       __builtin_abort ();
+    }
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump "vect_recog_average_pattern: detected" "vect" } } */
+/* { dg-final { scan-tree-dump {\.AVG_FLOOR} "vect" { target vect_avg_qi } } } */
+/* { dg-final { scan-tree-dump "Loop contains only SLP stmts" "vect" { target vect_avg_qi } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" { target vect_avg_qi } } } */
index d0f6da4a6acf21e70b81238cf98ecf327d23c30e..367945b69cdf08e62fd6556a8548d60b2445fbfe 100644 (file)
@@ -309,7 +309,7 @@ vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char *swap,
   bool pattern = false;
   slp_oprnd_info oprnd_info;
   int first_op_idx = 1;
-  bool commutative = false;
+  unsigned int commutative_op = -1U;
   bool first_op_cond = false;
   bool first = stmt_num == 0;
   bool second = stmt_num == 1;
@@ -318,6 +318,11 @@ vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char *swap,
     {
       number_of_oprnds = gimple_call_num_args (stmt);
       first_op_idx = 3;
+      if (gimple_call_internal_p (stmt))
+       {
+         internal_fn ifn = gimple_call_internal_fn (stmt);
+         commutative_op = first_commutative_argument (ifn);
+       }
     }
   else if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
     {
@@ -332,7 +337,7 @@ vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char *swap,
          number_of_oprnds++;
        }
       else
-       commutative = commutative_tree_code (code);
+       commutative_op = commutative_tree_code (code) ? 0U : -1U;
     }
   else
     return -1;
@@ -373,62 +378,6 @@ again:
          return -1;
        }
 
-      /* Check if DEF_STMT_INFO is a part of a pattern in LOOP and get
-        the def stmt from the pattern.  Check that all the stmts of the
-        node are in the pattern.  */
-      if (def_stmt_info && is_pattern_stmt_p (def_stmt_info))
-        {
-          pattern = true;
-          if (!first && !oprnd_info->first_pattern
-             /* Allow different pattern state for the defs of the
-                first stmt in reduction chains.  */
-             && (oprnd_info->first_dt != vect_reduction_def
-                 || (!second && !oprnd_info->second_pattern)))
-           {
-             if (i == 0
-                 && !swapped
-                 && commutative)
-               {
-                 swapped = true;
-                 goto again;
-               }
-
-             if (dump_enabled_p ())
-               {
-                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-                                  "Build SLP failed: some of the stmts"
-                                  " are in a pattern, and others are not ");
-                 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
-                  dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
-               }
-
-             return 1;
-            }
-
-         dt = STMT_VINFO_DEF_TYPE (def_stmt_info);
-
-          if (dt == vect_unknown_def_type)
-            {
-              if (dump_enabled_p ())
-                dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-                                "Unsupported pattern.\n");
-              return -1;
-            }
-
-         switch (gimple_code (def_stmt_info->stmt))
-            {
-            case GIMPLE_PHI:
-            case GIMPLE_ASSIGN:
-             break;
-
-           default:
-             if (dump_enabled_p ())
-               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-                                "unsupported defining stmt:\n");
-             return -1;
-            }
-        }
-
       if (second)
        oprnd_info->second_pattern = pattern;
 
@@ -456,9 +405,7 @@ again:
              || !types_compatible_p (oprnd_info->first_op_type, type))
            {
              /* Try swapping operands if we got a mismatch.  */
-             if (i == 0
-                 && !swapped
-                 && commutative)
+             if (i == commutative_op && !swapped)
                {
                  swapped = true;
                  goto again;
@@ -534,9 +481,9 @@ again:
          return -1;
        }
 
-      gassign *stmt = as_a <gassign *> (stmt_info->stmt);
       if (first_op_cond)
        {
+         gassign *stmt = as_a <gassign *> (stmt_info->stmt);
          tree cond = gimple_assign_rhs1 (stmt);
          enum tree_code code = TREE_CODE (cond);
 
@@ -559,13 +506,17 @@ again:
            }
        }
       else
-       swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt),
-                          gimple_assign_rhs2_ptr (stmt));
+       {
+         unsigned int op = commutative_op + first_op_idx;
+         swap_ssa_operands (stmt_info->stmt,
+                            gimple_op_ptr (stmt_info->stmt, op),
+                            gimple_op_ptr (stmt_info->stmt, op + 1));
+       }
       if (dump_enabled_p ())
        {
          dump_printf_loc (MSG_NOTE, vect_location,
                           "swapped operands to match def types in ");
-         dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
+         dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt_info->stmt, 0);
        }
     }