re PR tree-optimization/84037 (Speed regression of polyhedron benchmark since r256644)

author Richard Biener <rguenther@suse.de>

Fri, 16 Feb 2018 13:47:25 +0000 (13:47 +0000)

committer Richard Biener <rguenth@gcc.gnu.org>

Fri, 16 Feb 2018 13:47:25 +0000 (13:47 +0000)
author Richard Biener <rguenther@suse.de>
Fri, 16 Feb 2018 13:47:25 +0000 (13:47 +0000)
committer Richard Biener <rguenth@gcc.gnu.org>
Fri, 16 Feb 2018 13:47:25 +0000 (13:47 +0000)
diff --git a/gcc/ChangeLog b/gcc/ChangeLog

index c43637aac5e6d2a343fc341e92373ca76c4bacff..01ca398965ebea92053a2ff5ec8de44c13e4f079 100644 (file)
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,14 @@
+2018-02-16  Richard Biener  <rguenther@suse.de>
+
+       PR tree-optimization/84037
+       PR tree-optimization/84016
+       PR target/82862
+       * config/i386/i386.c (ix86_builtin_vectorization_cost):
+       Adjust vec_construct for the fact we need additional higher latency
+       128bit inserts for AVX256 and AVX512 vector builds.
+       (ix86_add_stmt_cost): Scale vector construction cost for
+       elementwise loads.
+
  2018-02-16  Richard Biener  <rguenther@suse.de>
  
         PR tree-optimization/84417
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c

index 2e82842c022c540e03d43a9f3ea4fe0df9d75fc1..4a968a75a9e25d122f7db7ce717bf35be5b5c2c3 100644 (file)
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -45906,7 +45906,18 @@ ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
                               ix86_cost->sse_op, true);
  
        case vec_construct:
-       return ix86_vec_cost (mode, ix86_cost->sse_op, false);
+       {
+         /* N element inserts.  */
+         int cost = ix86_vec_cost (mode, ix86_cost->sse_op, false);
+         /* One vinserti128 for combining two SSE vectors for AVX256.  */
+         if (GET_MODE_BITSIZE (mode) == 256)
+           cost += ix86_vec_cost (mode, ix86_cost->addss, true);
+         /* One vinserti64x4 and two vinserti128 for combining SSE
+            and AVX256 vectors to AVX512.  */
+         else if (GET_MODE_BITSIZE (mode) == 512)
+           cost += 3 * ix86_vec_cost (mode, ix86_cost->addss, true);
+         return cost;
+       }
  
        default:
          gcc_unreachable ();
@@ -50245,6 +50256,18 @@ ix86_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
           break;
         }
      }
+  /* If we do elementwise loads into a vector then we are bound by
+     latency and execution resources for the many scalar loads
+     (AGU and load ports).  Try to account for this by scaling the
+     construction cost by the number of elements involved.  */
+  if (kind == vec_construct
+      && stmt_info
+      && stmt_info->type == load_vec_info_type
+      && stmt_info->memory_access_type == VMAT_ELEMENTWISE)
+    {
+      stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
+      stmt_cost *= TYPE_VECTOR_SUBPARTS (vectype);
+    }
    if (stmt_cost == -1)
      stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
author	Richard Biener <rguenther@suse.de>
	Fri, 16 Feb 2018 13:47:25 +0000 (13:47 +0000)
committer	Richard Biener <rguenth@gcc.gnu.org>
	Fri, 16 Feb 2018 13:47:25 +0000 (13:47 +0000)
gcc/ChangeLog		patch \| blob \| history
gcc/config/i386/i386.c		patch \| blob \| history