S/390: Define vectorization_cost hook
authorAndreas Krebbel <krebbel@linux.vnet.ibm.com>
Fri, 2 Dec 2016 08:25:27 +0000 (08:25 +0000)
committerAndreas Krebbel <krebbel@gcc.gnu.org>
Fri, 2 Dec 2016 08:25:27 +0000 (08:25 +0000)
Define the vectorization_cost hook.  The only change right now
compared to the default implementation is the reduced costs for
unaligned loads/stores.  This is supposed to prevent unnecessary loop
peeling performed to reach better alignments.

Further tuning of this hook is required.

-Andreas-

gcc/ChangeLog:

2016-12-02  Andreas Krebbel  <krebbel@linux.vnet.ibm.com>

* gcc/config/s390/s390.c (s390_builtin_vectorization_cost): New
function.
(TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST): Define target
macro.

gcc/testsuite/ChangeLog:

2016-12-02  Andreas Krebbel  <krebbel@linux.vnet.ibm.com>

* gcc.target/s390/vector/vec-nopeel-1.c: New test.

From-SVN: r243158

gcc/ChangeLog
gcc/config/s390/s390.c
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.target/s390/vector/vec-nopeel-1.c [new file with mode: 0644]

index d2ecd0f1d9da30a0a1434b103e02dcf1dd56b4e8..e5f83456f0f00e285b55a6bd7ea567b1116a8414 100644 (file)
@@ -1,3 +1,10 @@
+2016-12-02  Andreas Krebbel  <krebbel@linux.vnet.ibm.com>
+
+       * gcc/config/s390/s390.c (s390_builtin_vectorization_cost): New
+       function.
+       (TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST): Define target
+       macro.
+
 2016-12-02  Andreas Krebbel  <krebbel@linux.vnet.ibm.com>
 
        * config/s390/vector.md (vec_halfhalf): New mode iterator.
index dab4f43e37a33f1848f88d78f376d0a602bc9dde..767666e3f7bfeebc00327cd565f5019b70ced247 100644 (file)
@@ -3674,6 +3674,40 @@ s390_address_cost (rtx addr, machine_mode mode ATTRIBUTE_UNUSED,
   return ad.indx? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (1);
 }
 
+/* Implement targetm.vectorize.builtin_vectorization_cost.  */
+static int
+s390_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
+                                tree vectype,
+                                int misalign ATTRIBUTE_UNUSED)
+{
+  switch (type_of_cost)
+    {
+      case scalar_stmt:
+      case scalar_load:
+      case scalar_store:
+      case vector_stmt:
+      case vector_load:
+      case vector_store:
+      case vec_to_scalar:
+      case scalar_to_vec:
+      case cond_branch_not_taken:
+      case vec_perm:
+      case vec_promote_demote:
+      case unaligned_load:
+      case unaligned_store:
+       return 1;
+
+      case cond_branch_taken:
+       return 3;
+
+      case vec_construct:
+       return TYPE_VECTOR_SUBPARTS (vectype) - 1;
+
+      default:
+       gcc_unreachable ();
+    }
+}
+
 /* If OP is a SYMBOL_REF of a thread-local symbol, return its TLS mode,
    otherwise return 0.  */
 
@@ -15428,6 +15462,9 @@ s390_excess_precision (enum excess_precision_type type)
 #define TARGET_REGISTER_MOVE_COST s390_register_move_cost
 #undef TARGET_MEMORY_MOVE_COST
 #define TARGET_MEMORY_MOVE_COST s390_memory_move_cost
+#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
+#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
+  s390_builtin_vectorization_cost
 
 #undef TARGET_MACHINE_DEPENDENT_REORG
 #define TARGET_MACHINE_DEPENDENT_REORG s390_reorg
index dc269ef178ffd9de93f552e3a9ea6ba95382e335..e39ab1c59a98a9f54c606769bc44758b7980e016 100644 (file)
@@ -1,3 +1,7 @@
+2016-12-02  Andreas Krebbel  <krebbel@linux.vnet.ibm.com>
+
+       * gcc.target/s390/vector/vec-nopeel-1.c: New test.
+
 2016-12-02  Andreas Krebbel  <krebbel@linux.vnet.ibm.com>
 
        * gcc.target/s390/zvector/vec-cmp-2.c: New test.
diff --git a/gcc/testsuite/gcc.target/s390/vector/vec-nopeel-1.c b/gcc/testsuite/gcc.target/s390/vector/vec-nopeel-1.c
new file mode 100644 (file)
index 0000000..581c371
--- /dev/null
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -mzarch -march=z13" } */
+/* { dg-require-effective-target vector } */
+
+int
+foo (int * restrict a, int n)
+{
+  int i, result = 0;
+
+  for (i = 0; i < n * 4; i++)
+    result += a[i];
+  return result;
+}
+
+/* We do NOT want this loop to get peeled.  Without peeling no scalar
+   memory add should appear.  */
+/* { dg-final { scan-assembler-not "\ta\t" } } */