From b71679934e64bdad94409ace50ee79471cad6f20 Mon Sep 17 00:00:00 2001
From: Richard Biener <rguenther@suse.de>
Date: Thu, 18 Oct 2018 14:31:29 +0000
Subject: [PATCH] i386.c (ix86_builtin_vectorization_cost): Do not feed
 width-specific load/store costs through ix86_vec_cost.

2018-10-18  Richard Biener  <rguenther@suse.de>

	* config/i386/i386.c (ix86_builtin_vectorization_cost): Do not
	feed width-specific load/store costs through ix86_vec_cost.
	* config/i386/x86-tune-costs.h (athlon_cost): Adjust.
	(k8_cost): Likewise.
	(bdver_cost): Likewise.
	(znver1_cost): Likewise.
	(btver1_cost): Likewise.
	(btver2_cost): Likewise.

From-SVN: r265268
---
 gcc/ChangeLog                    | 11 ++++++++
 gcc/config/i386/i386.c           | 14 +++-------
 gcc/config/i386/x86-tune-costs.h | 48 ++++++++++++++++----------------
 3 files changed, 39 insertions(+), 34 deletions(-)

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 87519ad674c..1c2e6129218 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,14 @@
+2018-10-18  Richard Biener  <rguenther@suse.de>
+
+	* config/i386/i386.c (ix86_builtin_vectorization_cost): Do not
+	feed width-specific load/store costs through ix86_vec_cost.
+	* config/i386/x86-tune-costs.h (athlon_cost): Adjust.
+	(k8_cost): Likewise.
+	(bdver_cost): Likewise.
+	(znver1_cost): Likewise.
+	(btver1_cost): Likewise.
+	(btver2_cost): Likewise.
+
 2018-10-18  H.J. Lu  <hongjiu.lu@intel.com>
 
 	* simplify-rtx.c (simplify_subreg): Call simplify_gen_subreg
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 9a5b54978a9..03324c0d5ac 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -45108,16 +45108,14 @@ ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
 	/* See PR82713 - we may end up being called on non-vector type.  */
 	if (index < 0)
 	  index = 2;
-        return ix86_vec_cost (mode,
-			      COSTS_N_INSNS (ix86_cost->sse_load[index]) / 2);
+        return COSTS_N_INSNS (ix86_cost->sse_load[index]) / 2;
 
       case vector_store:
 	index = sse_store_index (mode);
 	/* See PR82713 - we may end up being called on non-vector type.  */
 	if (index < 0)
 	  index = 2;
-        return ix86_vec_cost (mode,
-			      COSTS_N_INSNS (ix86_cost->sse_store[index]) / 2);
+        return COSTS_N_INSNS (ix86_cost->sse_store[index]) / 2;
 
       case vec_to_scalar:
       case scalar_to_vec:
@@ -45130,18 +45128,14 @@ ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
 	/* See PR82713 - we may end up being called on non-vector type.  */
 	if (index < 0)
 	  index = 2;
-        return ix86_vec_cost (mode,
-			      COSTS_N_INSNS
-				 (ix86_cost->sse_unaligned_load[index]) / 2);
+        return COSTS_N_INSNS (ix86_cost->sse_unaligned_load[index]) / 2;
 
       case unaligned_store:
 	index = sse_store_index (mode);
 	/* See PR82713 - we may end up being called on non-vector type.  */
 	if (index < 0)
 	  index = 2;
-        return ix86_vec_cost (mode,
-			      COSTS_N_INSNS
-				 (ix86_cost->sse_unaligned_store[index]) / 2);
+        return COSTS_N_INSNS (ix86_cost->sse_unaligned_store[index]) / 2;
 
       case vector_gather_load:
         return ix86_vec_cost (mode,
diff --git a/gcc/config/i386/x86-tune-costs.h b/gcc/config/i386/x86-tune-costs.h
index dbf9eb50402..50ecb35cbde 100644
--- a/gcc/config/i386/x86-tune-costs.h
+++ b/gcc/config/i386/x86-tune-costs.h
@@ -795,12 +795,12 @@ struct processor_costs athlon_cost = {
   {4, 4},				/* cost of storing MMX registers
 					   in SImode and DImode */
   2, 4, 8,				/* cost of moving XMM,YMM,ZMM register */
-  {4, 4, 6, 12, 24},			/* cost of loading SSE registers
+  {4, 4, 12, 12, 24},			/* cost of loading SSE registers
 					   in 32,64,128,256 and 512-bit */
-  {4, 4, 6, 12, 24},			/* cost of unaligned loads.  */
-  {4, 4, 5, 10, 20},			/* cost of storing SSE registers
+  {4, 4, 12, 12, 24},			/* cost of unaligned loads.  */
+  {4, 4, 10, 10, 20},			/* cost of storing SSE registers
 					   in 32,64,128,256 and 512-bit */
-  {4, 4, 5, 10, 20},			/* cost of unaligned stores.  */
+  {4, 4, 10, 10, 20},			/* cost of unaligned stores.  */
   5, 5,					/* SSE->integer and integer->SSE moves */
   4, 4,					/* Gather load static, per_elt.  */
   4, 4,					/* Gather store static, per_elt.  */
@@ -891,12 +891,12 @@ struct processor_costs k8_cost = {
   {4, 4},				/* cost of storing MMX registers
 					   in SImode and DImode */
   2, 4, 8,				/* cost of moving XMM,YMM,ZMM register */
-  {4, 3, 6, 12, 24},			/* cost of loading SSE registers
+  {4, 3, 12, 12, 24},			/* cost of loading SSE registers
 					   in 32,64,128,256 and 512-bit */
-  {4, 3, 6, 12, 24},			/* cost of unaligned loads.  */
-  {4, 4, 5, 10, 20},			/* cost of storing SSE registers
+  {4, 3, 12, 12, 24},			/* cost of unaligned loads.  */
+  {4, 4, 10, 10, 20},			/* cost of storing SSE registers
 					   in 32,64,128,256 and 512-bit */
-  {4, 4, 5, 10, 20},			/* cost of unaligned stores.  */
+  {4, 4, 10, 10, 20},			/* cost of unaligned stores.  */
   5, 5,					/* SSE->integer and integer->SSE moves */
   4, 4,					/* Gather load static, per_elt.  */
   4, 4,					/* Gather store static, per_elt.  */
@@ -1100,12 +1100,12 @@ const struct processor_costs bdver_cost = {
   {10, 10},				/* cost of storing MMX registers
 					   in SImode and DImode */
   2, 4, 8,				/* cost of moving XMM,YMM,ZMM register */
-  {12, 12, 10, 20, 30},			/* cost of loading SSE registers
+  {12, 12, 10, 40, 60},			/* cost of loading SSE registers
 					   in 32,64,128,256 and 512-bit */
-  {12, 12, 10, 20, 30},			/* cost of unaligned loads.  */
-  {10, 10, 10, 20, 30},			/* cost of storing SSE registers
+  {12, 12, 10, 40, 60},			/* cost of unaligned loads.  */
+  {10, 10, 10, 40, 60},			/* cost of storing SSE registers
 					   in 32,64,128,256 and 512-bit */
-  {10, 10, 10, 20, 30},			/* cost of unaligned stores.  */
+  {10, 10, 10, 40, 60},			/* cost of unaligned stores.  */
   16, 20,				/* SSE->integer and integer->SSE moves */
   12, 12,				/* Gather load static, per_elt.  */
   10, 10,				/* Gather store static, per_elt.  */
@@ -1212,12 +1212,12 @@ struct processor_costs znver1_cost = {
   {8, 8},				/* cost of storing MMX registers
 					   in SImode and DImode.  */
   2, 3, 6,				/* cost of moving XMM,YMM,ZMM register.  */
-  {6, 6, 6, 6, 12},			/* cost of loading SSE registers
+  {6, 6, 6, 12, 24},			/* cost of loading SSE registers
 					   in 32,64,128,256 and 512-bit.  */
-  {6, 6, 6, 6, 12},			/* cost of unaligned loads.  */
-  {8, 8, 8, 8, 16},			/* cost of storing SSE registers
+  {6, 6, 6, 12, 24},			/* cost of unaligned loads.  */
+  {8, 8, 8, 16, 32},			/* cost of storing SSE registers
 					   in 32,64,128,256 and 512-bit.  */
-  {8, 8, 8, 8, 16},			/* cost of unaligned stores.  */
+  {8, 8, 8, 16, 32},			/* cost of unaligned stores.  */
   6, 6,					/* SSE->integer and integer->SSE moves.  */
   /* VGATHERDPD is 23 uops and throughput is 9, VGATHERDPD is 35 uops,
      throughput 12.  Approx 9 uops do not depend on vector size and every load
@@ -1420,12 +1420,12 @@ const struct processor_costs btver1_cost = {
   {12, 12},				/* cost of storing MMX registers
 					   in SImode and DImode */
   2, 4, 8,				/* cost of moving XMM,YMM,ZMM register */
-  {10, 10, 12, 24, 48},			/* cost of loading SSE registers
+  {10, 10, 12, 48, 96},			/* cost of loading SSE registers
 					   in 32,64,128,256 and 512-bit */
-  {10, 10, 12, 24, 48},			/* cost of unaligned loads.  */
-  {10, 10, 12, 24, 48},			/* cost of storing SSE registers
+  {10, 10, 12, 48, 96},			/* cost of unaligned loads.  */
+  {10, 10, 12, 48, 96},			/* cost of storing SSE registers
 					   in 32,64,128,256 and 512-bit */
-  {10, 10, 12, 24, 48},			/* cost of unaligned stores.  */
+  {10, 10, 12, 48, 96},			/* cost of unaligned stores.  */
   14, 14,				/* SSE->integer and integer->SSE moves */
   10, 10,				/* Gather load static, per_elt.  */
   10, 10,				/* Gather store static, per_elt.  */
@@ -1511,12 +1511,12 @@ const struct processor_costs btver2_cost = {
   {12, 12},				/* cost of storing MMX registers
 					   in SImode and DImode */
   2, 4, 8,				/* cost of moving XMM,YMM,ZMM register */
-  {10, 10, 12, 24, 48},			/* cost of loading SSE registers
+  {10, 10, 12, 48, 96},			/* cost of loading SSE registers
 					   in 32,64,128,256 and 512-bit */
-  {10, 10, 12, 24, 48},			/* cost of unaligned loads.  */
-  {10, 10, 12, 24, 48},			/* cost of storing SSE registers
+  {10, 10, 12, 48, 96},			/* cost of unaligned loads.  */
+  {10, 10, 12, 48, 96},			/* cost of storing SSE registers
 					   in 32,64,128,256 and 512-bit */
-  {10, 10, 12, 24, 48},			/* cost of unaligned stores.  */
+  {10, 10, 12, 48, 96},			/* cost of unaligned stores.  */
   14, 14,				/* SSE->integer and integer->SSE moves */
   10, 10,				/* Gather load static, per_elt.  */
   10, 10,				/* Gather store static, per_elt.  */
-- 
2.30.2