i386.c (ix86_builtin_vectorization_cost): Do not feed width-specific load/store costs...

author Richard Biener <rguenther@suse.de>

Thu, 18 Oct 2018 14:31:29 +0000 (14:31 +0000)

committer Richard Biener <rguenth@gcc.gnu.org>

Thu, 18 Oct 2018 14:31:29 +0000 (14:31 +0000)
author Richard Biener <rguenther@suse.de>
Thu, 18 Oct 2018 14:31:29 +0000 (14:31 +0000)
committer Richard Biener <rguenth@gcc.gnu.org>
Thu, 18 Oct 2018 14:31:29 +0000 (14:31 +0000)
diff --git a/gcc/ChangeLog b/gcc/ChangeLog

index 87519ad674ce74d3c6ec48e4f89dbd711278656e..1c2e61292188b691b9380235bb3b95cd731e3481 100644 (file)
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,14 @@
+2018-10-18  Richard Biener  <rguenther@suse.de>
+
+       * config/i386/i386.c (ix86_builtin_vectorization_cost): Do not
+       feed width-specific load/store costs through ix86_vec_cost.
+       * config/i386/x86-tune-costs.h (athlon_cost): Adjust.
+       (k8_cost): Likewise.
+       (bdver_cost): Likewise.
+       (znver1_cost): Likewise.
+       (btver1_cost): Likewise.
+       (btver2_cost): Likewise.
+
  2018-10-18  H.J. Lu  <hongjiu.lu@intel.com>
  
         * simplify-rtx.c (simplify_subreg): Call simplify_gen_subreg
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c

index 9a5b54978a9bbd580cfbb0614a72aa0e04222864..03324c0d5acc4f10a6f358871772be6883a5af15 100644 (file)
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -45108,16 +45108,14 @@ ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
         /* See PR82713 - we may end up being called on non-vector type.  */
         if (index < 0)
           index = 2;
-        return ix86_vec_cost (mode,
-                             COSTS_N_INSNS (ix86_cost->sse_load[index]) / 2);
+        return COSTS_N_INSNS (ix86_cost->sse_load[index]) / 2;
  
        case vector_store:
         index = sse_store_index (mode);
         /* See PR82713 - we may end up being called on non-vector type.  */
         if (index < 0)
           index = 2;
-        return ix86_vec_cost (mode,
-                             COSTS_N_INSNS (ix86_cost->sse_store[index]) / 2);
+        return COSTS_N_INSNS (ix86_cost->sse_store[index]) / 2;
  
        case vec_to_scalar:
        case scalar_to_vec:
@@ -45130,18 +45128,14 @@ ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
         /* See PR82713 - we may end up being called on non-vector type.  */
         if (index < 0)
           index = 2;
-        return ix86_vec_cost (mode,
-                             COSTS_N_INSNS
-                                (ix86_cost->sse_unaligned_load[index]) / 2);
+        return COSTS_N_INSNS (ix86_cost->sse_unaligned_load[index]) / 2;
  
        case unaligned_store:
         index = sse_store_index (mode);
         /* See PR82713 - we may end up being called on non-vector type.  */
         if (index < 0)
           index = 2;
-        return ix86_vec_cost (mode,
-                             COSTS_N_INSNS
-                                (ix86_cost->sse_unaligned_store[index]) / 2);
+        return COSTS_N_INSNS (ix86_cost->sse_unaligned_store[index]) / 2;
  
        case vector_gather_load:
          return ix86_vec_cost (mode,
diff --git a/gcc/config/i386/x86-tune-costs.h b/gcc/config/i386/x86-tune-costs.h

index dbf9eb504027fb25d677edde5de9fae6257b3a3f..50ecb35cbdece59e764580b8429b0080f9f00f0f 100644 (file)
--- a/gcc/config/i386/x86-tune-costs.h
+++ b/gcc/config/i386/x86-tune-costs.h
@@ -795,12 +795,12 @@ struct processor_costs athlon_cost = {
    {4, 4},                              /* cost of storing MMX registers
                                            in SImode and DImode */
    2, 4, 8,                             /* cost of moving XMM,YMM,ZMM register */
-  {4, 4, 6, 12, 24},                   /* cost of loading SSE registers
+  {4, 4, 12, 12, 24},                  /* cost of loading SSE registers
                                            in 32,64,128,256 and 512-bit */
-  {4, 4, 6, 12, 24},                   /* cost of unaligned loads.  */
-  {4, 4, 5, 10, 20},                   /* cost of storing SSE registers
+  {4, 4, 12, 12, 24},                  /* cost of unaligned loads.  */
+  {4, 4, 10, 10, 20},                  /* cost of storing SSE registers
                                            in 32,64,128,256 and 512-bit */
-  {4, 4, 5, 10, 20},                   /* cost of unaligned stores.  */
+  {4, 4, 10, 10, 20},                  /* cost of unaligned stores.  */
    5, 5,                                        /* SSE->integer and integer->SSE moves */
    4, 4,                                        /* Gather load static, per_elt.  */
    4, 4,                                        /* Gather store static, per_elt.  */
@@ -891,12 +891,12 @@ struct processor_costs k8_cost = {
    {4, 4},                              /* cost of storing MMX registers
                                            in SImode and DImode */
    2, 4, 8,                             /* cost of moving XMM,YMM,ZMM register */
-  {4, 3, 6, 12, 24},                   /* cost of loading SSE registers
+  {4, 3, 12, 12, 24},                  /* cost of loading SSE registers
                                            in 32,64,128,256 and 512-bit */
-  {4, 3, 6, 12, 24},                   /* cost of unaligned loads.  */
-  {4, 4, 5, 10, 20},                   /* cost of storing SSE registers
+  {4, 3, 12, 12, 24},                  /* cost of unaligned loads.  */
+  {4, 4, 10, 10, 20},                  /* cost of storing SSE registers
                                            in 32,64,128,256 and 512-bit */
-  {4, 4, 5, 10, 20},                   /* cost of unaligned stores.  */
+  {4, 4, 10, 10, 20},                  /* cost of unaligned stores.  */
    5, 5,                                        /* SSE->integer and integer->SSE moves */
    4, 4,                                        /* Gather load static, per_elt.  */
    4, 4,                                        /* Gather store static, per_elt.  */
@@ -1100,12 +1100,12 @@ const struct processor_costs bdver_cost = {
    {10, 10},                            /* cost of storing MMX registers
                                            in SImode and DImode */
    2, 4, 8,                             /* cost of moving XMM,YMM,ZMM register */
-  {12, 12, 10, 20, 30},                        /* cost of loading SSE registers
+  {12, 12, 10, 40, 60},                        /* cost of loading SSE registers
                                            in 32,64,128,256 and 512-bit */
-  {12, 12, 10, 20, 30},                        /* cost of unaligned loads.  */
-  {10, 10, 10, 20, 30},                        /* cost of storing SSE registers
+  {12, 12, 10, 40, 60},                        /* cost of unaligned loads.  */
+  {10, 10, 10, 40, 60},                        /* cost of storing SSE registers
                                            in 32,64,128,256 and 512-bit */
-  {10, 10, 10, 20, 30},                        /* cost of unaligned stores.  */
+  {10, 10, 10, 40, 60},                        /* cost of unaligned stores.  */
    16, 20,                              /* SSE->integer and integer->SSE moves */
    12, 12,                              /* Gather load static, per_elt.  */
    10, 10,                              /* Gather store static, per_elt.  */
@@ -1212,12 +1212,12 @@ struct processor_costs znver1_cost = {
    {8, 8},                              /* cost of storing MMX registers
                                            in SImode and DImode.  */
    2, 3, 6,                             /* cost of moving XMM,YMM,ZMM register.  */
-  {6, 6, 6, 6, 12},                    /* cost of loading SSE registers
+  {6, 6, 6, 12, 24},                   /* cost of loading SSE registers
                                            in 32,64,128,256 and 512-bit.  */
-  {6, 6, 6, 6, 12},                    /* cost of unaligned loads.  */
-  {8, 8, 8, 8, 16},                    /* cost of storing SSE registers
+  {6, 6, 6, 12, 24},                   /* cost of unaligned loads.  */
+  {8, 8, 8, 16, 32},                   /* cost of storing SSE registers
                                            in 32,64,128,256 and 512-bit.  */
-  {8, 8, 8, 8, 16},                    /* cost of unaligned stores.  */
+  {8, 8, 8, 16, 32},                   /* cost of unaligned stores.  */
    6, 6,                                        /* SSE->integer and integer->SSE moves.  */
    /* VGATHERDPD is 23 uops and throughput is 9, VGATHERDPD is 35 uops,
       throughput 12.  Approx 9 uops do not depend on vector size and every load
@@ -1420,12 +1420,12 @@ const struct processor_costs btver1_cost = {
    {12, 12},                            /* cost of storing MMX registers
                                            in SImode and DImode */
    2, 4, 8,                             /* cost of moving XMM,YMM,ZMM register */
-  {10, 10, 12, 24, 48},                        /* cost of loading SSE registers
+  {10, 10, 12, 48, 96},                        /* cost of loading SSE registers
                                            in 32,64,128,256 and 512-bit */
-  {10, 10, 12, 24, 48},                        /* cost of unaligned loads.  */
-  {10, 10, 12, 24, 48},                        /* cost of storing SSE registers
+  {10, 10, 12, 48, 96},                        /* cost of unaligned loads.  */
+  {10, 10, 12, 48, 96},                        /* cost of storing SSE registers
                                            in 32,64,128,256 and 512-bit */
-  {10, 10, 12, 24, 48},                        /* cost of unaligned stores.  */
+  {10, 10, 12, 48, 96},                        /* cost of unaligned stores.  */
    14, 14,                              /* SSE->integer and integer->SSE moves */
    10, 10,                              /* Gather load static, per_elt.  */
    10, 10,                              /* Gather store static, per_elt.  */
@@ -1511,12 +1511,12 @@ const struct processor_costs btver2_cost = {
    {12, 12},                            /* cost of storing MMX registers
                                            in SImode and DImode */
    2, 4, 8,                             /* cost of moving XMM,YMM,ZMM register */
-  {10, 10, 12, 24, 48},                        /* cost of loading SSE registers
+  {10, 10, 12, 48, 96},                        /* cost of loading SSE registers
                                            in 32,64,128,256 and 512-bit */
-  {10, 10, 12, 24, 48},                        /* cost of unaligned loads.  */
-  {10, 10, 12, 24, 48},                        /* cost of storing SSE registers
+  {10, 10, 12, 48, 96},                        /* cost of unaligned loads.  */
+  {10, 10, 12, 48, 96},                        /* cost of storing SSE registers
                                            in 32,64,128,256 and 512-bit */
-  {10, 10, 12, 24, 48},                        /* cost of unaligned stores.  */
+  {10, 10, 12, 48, 96},                        /* cost of unaligned stores.  */
    14, 14,                              /* SSE->integer and integer->SSE moves */
    10, 10,                              /* Gather load static, per_elt.  */
    10, 10,                              /* Gather store static, per_elt.  */
author	Richard Biener <rguenther@suse.de>
	Thu, 18 Oct 2018 14:31:29 +0000 (14:31 +0000)
committer	Richard Biener <rguenth@gcc.gnu.org>
	Thu, 18 Oct 2018 14:31:29 +0000 (14:31 +0000)
gcc/ChangeLog		patch \| blob \| history
gcc/config/i386/i386.c		patch \| blob \| history
gcc/config/i386/x86-tune-costs.h		patch \| blob \| history