rs6000.c (rs6000_vect_nonmem): New static var.
authorBill Schmidt <wschmidt@linux.vnet.ibm.com>
Fri, 5 May 2017 21:27:54 +0000 (21:27 +0000)
committerWilliam Schmidt <wschmidt@gcc.gnu.org>
Fri, 5 May 2017 21:27:54 +0000 (21:27 +0000)
[gcc]

2017-05-05  Bill Schmidt  <wschmidt@linux.vnet.ibm.com>

* config/rs6000/rs6000.c (rs6000_vect_nonmem): New static var.
(rs6000_init_cost): Initialize rs6000_vect_nonmem.
(rs6000_add_stmt_cost): Update rs6000_vect_nonmem.
(rs6000_finish_cost): Avoid vectorizing simple copy loops with
VF=2 that require versioning.

[gcc/testsuite]

2017-05-05  Bill Schmidt  <wschmidt@linux.vnet.ibm.com>

* gcc.target/powerpc/versioned-copy-loop.c: New file.

From-SVN: r247671

gcc/ChangeLog
gcc/config/rs6000/rs6000.c
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.target/powerpc/versioned-copy-loop.c [new file with mode: 0644]

index aeaa27d31f7ba86d9b4c3ea4ad49b1c4f6f145b2..a2f57ac4f094d6783118cd578dbd7db73afcb846 100644 (file)
@@ -1,3 +1,11 @@
+2017-05-05  Bill Schmidt  <wschmidt@linux.vnet.ibm.com>
+
+       * config/rs6000/rs6000.c (rs6000_vect_nonmem): New static var.
+       (rs6000_init_cost): Initialize rs6000_vect_nonmem.
+       (rs6000_add_stmt_cost): Update rs6000_vect_nonmem.
+       (rs6000_finish_cost): Avoid vectorizing simple copy loops with
+       VF=2 that require versioning.
+
 2017-05-05  David Malcolm  <dmalcolm@redhat.com>
 
        * diagnostic.h (CARET_LINE_MARGIN): Convert from macro to const
index 8f68d84a8fe1d5afa1033c111b2936f7c19e3166..bac56abcf2600da043f26480a690eeb74db6ae7e 100644 (file)
@@ -5873,6 +5873,10 @@ rs6000_density_test (rs6000_cost_data *data)
 
 /* Implement targetm.vectorize.init_cost.  */
 
+/* For each vectorized loop, this var holds TRUE iff a non-memory vector
+   instruction is needed by the vectorization.  */
+static bool rs6000_vect_nonmem;
+
 static void *
 rs6000_init_cost (struct loop *loop_info)
 {
@@ -5881,6 +5885,7 @@ rs6000_init_cost (struct loop *loop_info)
   data->cost[vect_prologue] = 0;
   data->cost[vect_body]     = 0;
   data->cost[vect_epilogue] = 0;
+  rs6000_vect_nonmem = false;
   return data;
 }
 
@@ -5907,6 +5912,15 @@ rs6000_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
 
       retval = (unsigned) (count * stmt_cost);
       cost_data->cost[where] += retval;
+
+      /* Check whether we're doing something other than just a copy loop.
+        Not all such loops may be profitably vectorized; see
+        rs6000_finish_cost.  */
+      if ((kind == vec_to_scalar || kind == vec_perm
+          || kind == vec_promote_demote || kind == vec_construct
+          || kind == scalar_to_vec)
+         || (where == vect_body && kind == vector_stmt))
+       rs6000_vect_nonmem = true;
     }
 
   return retval;
@@ -5923,6 +5937,19 @@ rs6000_finish_cost (void *data, unsigned *prologue_cost,
   if (cost_data->loop_info)
     rs6000_density_test (cost_data);
 
+  /* Don't vectorize minimum-vectorization-factor, simple copy loops
+     that require versioning for any reason.  The vectorization is at
+     best a wash inside the loop, and the versioning checks make
+     profitability highly unlikely and potentially quite harmful.  */
+  if (cost_data->loop_info)
+    {
+      loop_vec_info vec_info = loop_vec_info_for_loop (cost_data->loop_info);
+      if (!rs6000_vect_nonmem
+         && LOOP_VINFO_VECT_FACTOR (vec_info) == 2
+         && LOOP_REQUIRES_VERSIONING (vec_info))
+       cost_data->cost[vect_body] += 10000;
+    }
+
   *prologue_cost = cost_data->cost[vect_prologue];
   *body_cost     = cost_data->cost[vect_body];
   *epilogue_cost = cost_data->cost[vect_epilogue];
index 5cd12864a5b7d51bbcb4471fb6156e90db1aa706..bec90e32e8417b68d44cf538e187f5f691083238 100644 (file)
@@ -1,3 +1,7 @@
+2017-05-05  Bill Schmidt  <wschmidt@linux.vnet.ibm.com>
+
+       * gcc.target/powerpc/versioned-copy-loop.c: New file.
+
 2017-05-05  Michael Meissner  <meissner@linux.vnet.ibm.com>
 
        PR target/79038
diff --git a/gcc/testsuite/gcc.target/powerpc/versioned-copy-loop.c b/gcc/testsuite/gcc.target/powerpc/versioned-copy-loop.c
new file mode 100644 (file)
index 0000000..bbfd165
--- /dev/null
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target powerpc_p8vector_ok } */
+/* { dg-options "-O3 -fdump-tree-vect-details" } */
+
+/* Verify that a pure copy loop with a vectorization factor of two
+   that requires alignment will not be vectorized.  See the cost
+   model hooks in rs6000.c.  */
+
+typedef long unsigned int size_t;
+typedef unsigned char uint8_t;
+
+extern void *memcpy (void *__restrict __dest, const void *__restrict __src,
+       size_t __n) __attribute__ ((__nothrow__ , __leaf__)) __attribute__ ((__nonnull__ (1, 2)));
+
+void foo (void *dstPtr, const void *srcPtr, void *dstEnd)
+{
+    uint8_t *d = (uint8_t*)dstPtr;
+    const uint8_t *s = (const uint8_t*)srcPtr;
+    uint8_t* const e = (uint8_t*)dstEnd;
+
+    do
+      {
+       memcpy (d, s, 8);
+       d += 8;
+       s += 8;
+      }
+    while (d < e);
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" } } */