pr79683.c: Disable costmodel.
authorJan Hubicka <hubicka@ucw.cz>
Sat, 21 Oct 2017 11:53:33 +0000 (13:53 +0200)
committerJan Hubicka <hubicka@gcc.gnu.org>
Sat, 21 Oct 2017 11:53:33 +0000 (11:53 +0000)
* gcc.target/i386/pr79683.c: Disable costmodel.
* i386.c (ix86_builtin_vectorization_cost): Use existing rtx_cost
latencies instead of having separate table; make difference between
integer and float costs.
* i386.h (processor_costs): Remove scalar_stmt_cost,
scalar_load_cost, scalar_store_cost, vec_stmt_cost, vec_to_scalar_cost,
scalar_to_vec_cost, vec_align_load_cost, vec_unalign_load_cost,
vec_store_cost.
* x86-tune-costs.h: Remove entries which has been removed in
procesor_costs from all tables; make cond_taken_branch_cost
and cond_not_taken_branch_cost COST_N_INSNS based.
Index: testsuite/gcc.target/i386/pr79683.c
===================================================================
--- testsuite/gcc.target/i386/pr79683.c (revision 253957)
+++ testsuite/gcc.target/i386/pr79683.c (working copy)
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O3 -msse2" } */
+/* { dg-options "-O3 -msse2 -fvect-cost-model=unlimited" } */

 struct s {
     __INT64_TYPE__ a;
Index: config/i386/i386.c
===================================================================
--- config/i386/i386.c (revision 253957)
+++ config/i386/i386.c (working copy)
@@ -44051,37 +44051,61 @@ static int
 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
                                  tree vectype, int)
 {
+  bool fp = false;
+  machine_mode mode = TImode;
+  if (vectype != NULL)
+    {
+      fp = FLOAT_TYPE_P (vectype);
+      mode = TYPE_MODE (vectype);
+    }
+
   switch (type_of_cost)
     {
       case scalar_stmt:
-        return ix86_cost->scalar_stmt_cost;
+        return fp ? ix86_cost->addss : COSTS_N_INSNS (1);

       case scalar_load:
-        return ix86_cost->scalar_load_cost;
+ /* load/store costs are relative to register move which is 2. Recompute
+     it to COSTS_N_INSNS so everything have same base.  */
+        return COSTS_N_INSNS (fp ? ix86_cost->sse_load[0]
+       : ix86_cost->int_load [2]) / 2;

       case scalar_store:
-        return ix86_cost->scalar_store_cost;
+        return COSTS_N_INSNS (fp ? ix86_cost->sse_store[0]
+       : ix86_cost->int_store [2]) / 2;

       case vector_stmt:
-        return ix86_cost->vec_stmt_cost;
+        return ix86_vec_cost (mode,
+       fp ? ix86_cost->addss : ix86_cost->sse_op,
+       true);

       case vector_load:
-        return ix86_cost->vec_align_load_cost;
+        return ix86_vec_cost (mode,
+       COSTS_N_INSNS (ix86_cost->sse_load[2]) / 2,
+       true);

       case vector_store:
-        return ix86_cost->vec_store_cost;
+        return ix86_vec_cost (mode,
+       COSTS_N_INSNS (ix86_cost->sse_store[2]) / 2,
+       true);

       case vec_to_scalar:
-        return ix86_cost->vec_to_scalar_cost;
-
       case scalar_to_vec:
-        return ix86_cost->scalar_to_vec_cost;
+        return ix86_vec_cost (mode, ix86_cost->sse_op, true);

+      /* We should have separate costs for unaligned loads and gather/scatter.
+  Do that incrementally.  */
       case unaligned_load:
-      case unaligned_store:
       case vector_gather_load:
+        return ix86_vec_cost (mode,
+       COSTS_N_INSNS (ix86_cost->sse_load[2]),
+       true);
+
+      case unaligned_store:
       case vector_scatter_store:
-        return ix86_cost->vec_unalign_load_cost;
+        return ix86_vec_cost (mode,
+       COSTS_N_INSNS (ix86_cost->sse_store[2]),
+       true);

       case cond_branch_taken:
         return ix86_cost->cond_taken_branch_cost;
@@ -44091,10 +44115,11 @@ ix86_builtin_vectorization_cost (enum ve

       case vec_perm:
       case vec_promote_demote:
-        return ix86_cost->vec_stmt_cost;
+        return ix86_vec_cost (mode,
+       ix86_cost->sse_op, true);

       case vec_construct:
- return ix86_cost->vec_stmt_cost * (TYPE_VECTOR_SUBPARTS (vectype) - 1);
+ return ix86_vec_cost (mode, ix86_cost->sse_op, false);

       default:
         gcc_unreachable ();
Index: config/i386/i386.h
===================================================================
--- config/i386/i386.h (revision 253957)
+++ config/i386/i386.h (working copy)
@@ -277,18 +277,6 @@ struct processor_costs {
     parallel.  See also
     ix86_reassociation_width.  */
   struct stringop_algs *memcpy, *memset;
-  const int scalar_stmt_cost;   /* Cost of any scalar operation, excluding
-    load and store.  */
-  const int scalar_load_cost;   /* Cost of scalar load.  */
-  const int scalar_store_cost;  /* Cost of scalar store.  */
-  const int vec_stmt_cost;      /* Cost of any vector operation, excluding
-                                   load, store, vector-to-scalar and
-                                   scalar-to-vector operation.  */
-  const int vec_to_scalar_cost;    /* Cost of vect-to-scalar operation.  */
-  const int scalar_to_vec_cost;    /* Cost of scalar-to-vector operation.  */
-  const int vec_align_load_cost;   /* Cost of aligned vector load.  */
-  const int vec_unalign_load_cost; /* Cost of unaligned vector load.  */
-  const int vec_store_cost;        /* Cost of vector store.  */
   const int cond_taken_branch_cost;    /* Cost of taken branch for vectorizer
    cost model.  */
   const int cond_not_taken_branch_cost;/* Cost of not taken branch for
Index: config/i386/x86-tune-costs.h
===================================================================
--- config/i386/x86-tune-costs.h (revision 253958)
+++ config/i386/x86-tune-costs.h (working copy)
@@ -79,17 +79,8 @@ struct processor_costs ix86_size_cost =
   1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp.  */
   ix86_size_memcpy,
   ix86_size_memset,
-  1, /* scalar_stmt_cost.  */
-  1, /* scalar load_cost.  */
-  1, /* scalar_store_cost.  */
-  1, /* vec_stmt_cost.  */
-  1, /* vec_to_scalar_cost.  */
-  1, /* scalar_to_vec_cost.  */
-  1, /* vec_align_load_cost.  */
-  1, /* vec_unalign_load_cost.  */
-  1, /* vec_store_cost.  */
-  1, /* cond_taken_branch_cost.  */
-  1, /* cond_not_taken_branch_cost.  */
+  COSTS_N_BYTES (1), /* cond_taken_branch_cost.  */
+  COSTS_N_BYTES (1), /* cond_not_taken_branch_cost.  */
 };

 /* Processor costs (relative to an add) */
@@ -167,17 +158,8 @@ struct processor_costs i386_cost = { /*
   1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp.  */
   i386_memcpy,
   i386_memset,
-  1, /* scalar_stmt_cost.  */
-  1, /* scalar load_cost.  */
-  1, /* scalar_store_cost.  */
-  1, /* vec_stmt_cost.  */
-  1, /* vec_to_scalar_cost.  */
-  1, /* scalar_to_vec_cost.  */
-  1, /* vec_align_load_cost.  */
-  2, /* vec_unalign_load_cost.  */
-  1, /* vec_store_cost.  */
-  3, /* cond_taken_branch_cost.  */
-  1, /* cond_not_taken_branch_cost.  */
+  COSTS_N_INSNS (3), /* cond_taken_branch_cost.  */
+  COSTS_N_INSNS (1), /* cond_not_taken_branch_cost.  */
 };

 static stringop_algs i486_memcpy[2] = {
@@ -256,17 +238,8 @@ struct processor_costs i486_cost = { /*
   1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp.  */
   i486_memcpy,
   i486_memset,
-  1, /* scalar_stmt_cost.  */
-  1, /* scalar load_cost.  */
-  1, /* scalar_store_cost.  */
-  1, /* vec_stmt_cost.  */
-  1, /* vec_to_scalar_cost.  */
-  1, /* scalar_to_vec_cost.  */
-  1, /* vec_align_load_cost.  */
-  2, /* vec_unalign_load_cost.  */
-  1, /* vec_store_cost.  */
-  3, /* cond_taken_branch_cost.  */
-  1, /* cond_not_taken_branch_cost.  */
+  COSTS_N_INSNS (3), /* cond_taken_branch_cost.  */
+  COSTS_N_INSNS (1), /* cond_not_taken_branch_cost.  */
 };

 static stringop_algs pentium_memcpy[2] = {
@@ -343,17 +316,8 @@ struct processor_costs pentium_cost = {
   1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp.  */
   pentium_memcpy,
   pentium_memset,
-  1, /* scalar_stmt_cost.  */
-  1, /* scalar load_cost.  */
-  1, /* scalar_store_cost.  */
-  1, /* vec_stmt_cost.  */
-  1, /* vec_to_scalar_cost.  */
-  1, /* scalar_to_vec_cost.  */
-  1, /* vec_align_load_cost.  */
-  2, /* vec_unalign_load_cost.  */
-  1, /* vec_store_cost.  */
-  3, /* cond_taken_branch_cost.  */
-  1, /* cond_not_taken_branch_cost.  */
+  COSTS_N_INSNS (3), /* cond_taken_branch_cost.  */
+  COSTS_N_INSNS (1), /* cond_not_taken_branch_cost.  */
 };

 static const
@@ -423,17 +387,8 @@ struct processor_costs lakemont_cost = {
   1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp.  */
   pentium_memcpy,
   pentium_memset,
-  1, /* scalar_stmt_cost.  */
-  1, /* scalar load_cost.  */
-  1, /* scalar_store_cost.  */
-  1, /* vec_stmt_cost.  */
-  1, /* vec_to_scalar_cost.  */
-  1, /* scalar_to_vec_cost.  */
-  1, /* vec_align_load_cost.  */
-  2, /* vec_unalign_load_cost.  */
-  1, /* vec_store_cost.  */
-  3, /* cond_taken_branch_cost.  */
-  1, /* cond_not_taken_branch_cost.  */
+  COSTS_N_INSNS (3), /* cond_taken_branch_cost.  */
+  COSTS_N_INSNS (1), /* cond_not_taken_branch_cost.  */
 };

 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
@@ -518,17 +473,8 @@ struct processor_costs pentiumpro_cost =
   1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp.  */
   pentiumpro_memcpy,
   pentiumpro_memset,
-  1, /* scalar_stmt_cost.  */
-  1, /* scalar load_cost.  */
-  1, /* scalar_store_cost.  */
-  1, /* vec_stmt_cost.  */
-  1, /* vec_to_scalar_cost.  */
-  1, /* scalar_to_vec_cost.  */
-  1, /* vec_align_load_cost.  */
-  2, /* vec_unalign_load_cost.  */
-  1, /* vec_store_cost.  */
-  3, /* cond_taken_branch_cost.  */
-  1, /* cond_not_taken_branch_cost.  */
+  COSTS_N_INSNS (3), /* cond_taken_branch_cost.  */
+  COSTS_N_INSNS (1), /* cond_not_taken_branch_cost.  */
 };

 static stringop_algs geode_memcpy[2] = {
@@ -605,17 +551,8 @@ struct processor_costs geode_cost = {
   1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp.  */
   geode_memcpy,
   geode_memset,
-  1, /* scalar_stmt_cost.  */
-  1, /* scalar load_cost.  */
-  1, /* scalar_store_cost.  */
-  1, /* vec_stmt_cost.  */
-  1, /* vec_to_scalar_cost.  */
-  1, /* scalar_to_vec_cost.  */
-  1, /* vec_align_load_cost.  */
-  2, /* vec_unalign_load_cost.  */
-  1, /* vec_store_cost.  */
-  3, /* cond_taken_branch_cost.  */
-  1, /* cond_not_taken_branch_cost.  */
+  COSTS_N_INSNS (3), /* cond_taken_branch_cost.  */
+  COSTS_N_INSNS (1), /* cond_not_taken_branch_cost.  */
 };

 static stringop_algs k6_memcpy[2] = {
@@ -694,17 +631,8 @@ struct processor_costs k6_cost = {
   1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp.  */
   k6_memcpy,
   k6_memset,
-  1, /* scalar_stmt_cost.  */
-  1, /* scalar load_cost.  */
-  1, /* scalar_store_cost.  */
-  1, /* vec_stmt_cost.  */
-  1, /* vec_to_scalar_cost.  */
-  1, /* scalar_to_vec_cost.  */
-  1, /* vec_align_load_cost.  */
-  2, /* vec_unalign_load_cost.  */
-  1, /* vec_store_cost.  */
-  3, /* cond_taken_branch_cost.  */
-  1, /* cond_not_taken_branch_cost.  */
+  COSTS_N_INSNS (3), /* cond_taken_branch_cost.  */
+  COSTS_N_INSNS (1), /* cond_not_taken_branch_cost.  */
 };

 /* For some reason, Athlon deals better with REP prefix (relative to loops)
@@ -784,17 +712,8 @@ struct processor_costs athlon_cost = {
   1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp.  */
   athlon_memcpy,
   athlon_memset,
-  1, /* scalar_stmt_cost.  */
-  1, /* scalar load_cost.  */
-  1, /* scalar_store_cost.  */
-  1, /* vec_stmt_cost.  */
-  1, /* vec_to_scalar_cost.  */
-  1, /* scalar_to_vec_cost.  */
-  1, /* vec_align_load_cost.  */
-  2, /* vec_unalign_load_cost.  */
-  1, /* vec_store_cost.  */
-  3, /* cond_taken_branch_cost.  */
-  1, /* cond_not_taken_branch_cost.  */
+  COSTS_N_INSNS (3), /* cond_taken_branch_cost.  */
+  COSTS_N_INSNS (1), /* cond_not_taken_branch_cost.  */
 };

 /* K8 has optimized REP instruction for medium sized blocks, but for very
@@ -883,17 +802,8 @@ struct processor_costs k8_cost = {
   1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp.  */
   k8_memcpy,
   k8_memset,
-  4, /* scalar_stmt_cost.  */
-  2, /* scalar load_cost.  */
-  2, /* scalar_store_cost.  */
-  5, /* vec_stmt_cost.  */
-  0, /* vec_to_scalar_cost.  */
-  2, /* scalar_to_vec_cost.  */
-  2, /* vec_align_load_cost.  */
-  3, /* vec_unalign_load_cost.  */
-  3, /* vec_store_cost.  */
-  3, /* cond_taken_branch_cost.  */
-  2, /* cond_not_taken_branch_cost.  */
+  COSTS_N_INSNS (3), /* cond_taken_branch_cost.  */
+  COSTS_N_INSNS (2), /* cond_not_taken_branch_cost.  */
 };

 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
@@ -989,17 +899,8 @@ struct processor_costs amdfam10_cost = {
   1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp.  */
   amdfam10_memcpy,
   amdfam10_memset,
-  4, /* scalar_stmt_cost.  */
-  2, /* scalar load_cost.  */
-  2, /* scalar_store_cost.  */
-  6, /* vec_stmt_cost.  */
-  0, /* vec_to_scalar_cost.  */
-  2, /* scalar_to_vec_cost.  */
-  2, /* vec_align_load_cost.  */
-  2, /* vec_unalign_load_cost.  */
-  2, /* vec_store_cost.  */
-  2, /* cond_taken_branch_cost.  */
-  1, /* cond_not_taken_branch_cost.  */
+  COSTS_N_INSNS (2), /* cond_taken_branch_cost.  */
+  COSTS_N_INSNS (1), /* cond_not_taken_branch_cost.  */
 };

 /*  BDVER1 has optimized REP instruction for medium sized blocks, but for
@@ -1097,17 +998,8 @@ const struct processor_costs bdver1_cost
   1, 2, 1, 1, /* reassoc int, fp, vec_int, vec_fp.  */
   bdver1_memcpy,
   bdver1_memset,
-  6, /* scalar_stmt_cost.  */
-  4, /* scalar load_cost.  */
-  4, /* scalar_store_cost.  */
-  6, /* vec_stmt_cost.  */
-  0, /* vec_to_scalar_cost.  */
-  2, /* scalar_to_vec_cost.  */
-  4, /* vec_align_load_cost.  */
-  4, /* vec_unalign_load_cost.  */
-  4, /* vec_store_cost.  */
-  4, /* cond_taken_branch_cost.  */
-  2, /* cond_not_taken_branch_cost.  */
+  COSTS_N_INSNS (4), /* cond_taken_branch_cost.  */
+  COSTS_N_INSNS (2), /* cond_not_taken_branch_cost.  */
 };

 /*  BDVER2 has optimized REP instruction for medium sized blocks, but for
@@ -1206,17 +1098,8 @@ const struct processor_costs bdver2_cost
   1, 2, 1, 1, /* reassoc int, fp, vec_int, vec_fp.  */
   bdver2_memcpy,
   bdver2_memset,
-  6, /* scalar_stmt_cost.  */
-  4, /* scalar load_cost.  */
-  4, /* scalar_store_cost.  */
-  6, /* vec_stmt_cost.  */
-  0, /* vec_to_scalar_cost.  */
-  2, /* scalar_to_vec_cost.  */
-  4, /* vec_align_load_cost.  */
-  4, /* vec_unalign_load_cost.  */
-  4, /* vec_store_cost.  */
-  4, /* cond_taken_branch_cost.  */
-  2, /* cond_not_taken_branch_cost.  */
+  COSTS_N_INSNS (4), /* cond_taken_branch_cost.  */
+  COSTS_N_INSNS (2), /* cond_not_taken_branch_cost.  */
 };

@@ -1306,17 +1189,8 @@ struct processor_costs bdver3_cost = {
   1, 2, 1, 1, /* reassoc int, fp, vec_int, vec_fp.  */
   bdver3_memcpy,
   bdver3_memset,
-  6, /* scalar_stmt_cost.  */
-  4, /* scalar load_cost.  */
-  4, /* scalar_store_cost.  */
-  6, /* vec_stmt_cost.  */
-  0, /* vec_to_scalar_cost.  */
-  2, /* scalar_to_vec_cost.  */
-  4, /* vec_align_load_cost.  */
-  4, /* vec_unalign_load_cost.  */
-  4, /* vec_store_cost.  */
-  4, /* cond_taken_branch_cost.  */
-  2, /* cond_not_taken_branch_cost.  */
+  COSTS_N_INSNS (4), /* cond_taken_branch_cost.  */
+  COSTS_N_INSNS (2), /* cond_not_taken_branch_cost.  */
 };

 /*  BDVER4 has optimized REP instruction for medium sized blocks, but for
@@ -1405,17 +1279,8 @@ struct processor_costs bdver4_cost = {
   1, 2, 1, 1, /* reassoc int, fp, vec_int, vec_fp.  */
   bdver4_memcpy,
   bdver4_memset,
-  6, /* scalar_stmt_cost.  */
-  4, /* scalar load_cost.  */
-  4, /* scalar_store_cost.  */
-  6, /* vec_stmt_cost.  */
-  0, /* vec_to_scalar_cost.  */
-  2, /* scalar_to_vec_cost.  */
-  4, /* vec_align_load_cost.  */
-  4, /* vec_unalign_load_cost.  */
-  4, /* vec_store_cost.  */
-  4, /* cond_taken_branch_cost.  */
-  2, /* cond_not_taken_branch_cost.  */
+  COSTS_N_INSNS (4), /* cond_taken_branch_cost.  */
+  COSTS_N_INSNS (2), /* cond_not_taken_branch_cost.  */
 };

@@ -1524,17 +1389,8 @@ struct processor_costs znver1_cost = {
   4, 4, 3, 6, /* reassoc int, fp, vec_int, vec_fp.  */
   znver1_memcpy,
   znver1_memset,
-  6, /* scalar_stmt_cost.  */
-  4, /* scalar load_cost.  */
-  4, /* scalar_store_cost.  */
-  6, /* vec_stmt_cost.  */
-  0, /* vec_to_scalar_cost.  */
-  2, /* scalar_to_vec_cost.  */
-  4, /* vec_align_load_cost.  */
-  4, /* vec_unalign_load_cost.  */
-  4, /* vec_store_cost.  */
-  4, /* cond_taken_branch_cost.  */
-  2, /* cond_not_taken_branch_cost.  */
+  COSTS_N_INSNS (4), /* cond_taken_branch_cost.  */
+  COSTS_N_INSNS (2), /* cond_not_taken_branch_cost.  */
 };

   /* BTVER1 has optimized REP instruction for medium sized blocks, but for
@@ -1624,17 +1480,8 @@ const struct processor_costs btver1_cost
   1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp.  */
   btver1_memcpy,
   btver1_memset,
-  4, /* scalar_stmt_cost.  */
-  2, /* scalar load_cost.  */
-  2, /* scalar_store_cost.  */
-  6, /* vec_stmt_cost.  */
-  0, /* vec_to_scalar_cost.  */
-  2, /* scalar_to_vec_cost.  */
-  2, /* vec_align_load_cost.  */
-  2, /* vec_unalign_load_cost.  */
-  2, /* vec_store_cost.  */
-  2, /* cond_taken_branch_cost.  */
-  1, /* cond_not_taken_branch_cost.  */
+  COSTS_N_INSNS (2), /* cond_taken_branch_cost.  */
+  COSTS_N_INSNS (1), /* cond_not_taken_branch_cost.  */
 };

 static stringop_algs btver2_memcpy[2] = {
@@ -1721,17 +1568,8 @@ const struct processor_costs btver2_cost
   1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp.  */
   btver2_memcpy,
   btver2_memset,
-  4, /* scalar_stmt_cost.  */
-  2, /* scalar load_cost.  */
-  2, /* scalar_store_cost.  */
-  6, /* vec_stmt_cost.  */
-  0, /* vec_to_scalar_cost.  */
-  2, /* scalar_to_vec_cost.  */
-  2, /* vec_align_load_cost.  */
-  2, /* vec_unalign_load_cost.  */
-  2, /* vec_store_cost.  */
-  2, /* cond_taken_branch_cost.  */
-  1, /* cond_not_taken_branch_cost.  */
+  COSTS_N_INSNS (2), /* cond_taken_branch_cost.  */
+  COSTS_N_INSNS (1), /* cond_not_taken_branch_cost.  */
 };

 static stringop_algs pentium4_memcpy[2] = {
@@ -1809,17 +1647,8 @@ struct processor_costs pentium4_cost = {
   1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp.  */
   pentium4_memcpy,
   pentium4_memset,
-  1, /* scalar_stmt_cost.  */
-  1, /* scalar load_cost.  */
-  1, /* scalar_store_cost.  */
-  1, /* vec_stmt_cost.  */
-  1, /* vec_to_scalar_cost.  */
-  1, /* scalar_to_vec_cost.  */
-  1, /* vec_align_load_cost.  */
-  2, /* vec_unalign_load_cost.  */
-  1, /* vec_store_cost.  */
-  3, /* cond_taken_branch_cost.  */
-  1, /* cond_not_taken_branch_cost.  */
+  COSTS_N_INSNS (3), /* cond_taken_branch_cost.  */
+  COSTS_N_INSNS (1), /* cond_not_taken_branch_cost.  */
 };

 static stringop_algs nocona_memcpy[2] = {
@@ -1900,17 +1729,8 @@ struct processor_costs nocona_cost = {
   1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp.  */
   nocona_memcpy,
   nocona_memset,
-  1, /* scalar_stmt_cost.  */
-  1, /* scalar load_cost.  */
-  1, /* scalar_store_cost.  */
-  1, /* vec_stmt_cost.  */
-  1, /* vec_to_scalar_cost.  */
-  1, /* scalar_to_vec_cost.  */
-  1, /* vec_align_load_cost.  */
-  2, /* vec_unalign_load_cost.  */
-  1, /* vec_store_cost.  */
-  3, /* cond_taken_branch_cost.  */
-  1, /* cond_not_taken_branch_cost.  */
+  COSTS_N_INSNS (3), /* cond_taken_branch_cost.  */
+  COSTS_N_INSNS (1), /* cond_not_taken_branch_cost.  */
 };

 static stringop_algs atom_memcpy[2] = {
@@ -1989,17 +1809,8 @@ struct processor_costs atom_cost = {
   2, 2, 2, 2, /* reassoc int, fp, vec_int, vec_fp.  */
   atom_memcpy,
   atom_memset,
-  1, /* scalar_stmt_cost.  */
-  1, /* scalar load_cost.  */
-  1, /* scalar_store_cost.  */
-  1, /* vec_stmt_cost.  */
-  1, /* vec_to_scalar_cost.  */
-  1, /* scalar_to_vec_cost.  */
-  1, /* vec_align_load_cost.  */
-  2, /* vec_unalign_load_cost.  */
-  1, /* vec_store_cost.  */
-  3, /* cond_taken_branch_cost.  */
-  1, /* cond_not_taken_branch_cost.  */
+  COSTS_N_INSNS (3), /* cond_taken_branch_cost.  */
+  COSTS_N_INSNS (1), /* cond_not_taken_branch_cost.  */
 };

 static stringop_algs slm_memcpy[2] = {
@@ -2078,17 +1889,8 @@ struct processor_costs slm_cost = {
   1, 2, 1, 1, /* reassoc int, fp, vec_int, vec_fp.  */
   slm_memcpy,
   slm_memset,
-  1, /* scalar_stmt_cost.  */
-  1, /* scalar load_cost.  */
-  1, /* scalar_store_cost.  */
-  1, /* vec_stmt_cost.  */
-  4, /* vec_to_scalar_cost.  */
-  1, /* scalar_to_vec_cost.  */
-  1, /* vec_align_load_cost.  */
-  2, /* vec_unalign_load_cost.  */
-  1, /* vec_store_cost.  */
-  3, /* cond_taken_branch_cost.  */
-  1, /* cond_not_taken_branch_cost.  */
+  COSTS_N_INSNS (3), /* cond_taken_branch_cost.  */
+  COSTS_N_INSNS (1), /* cond_not_taken_branch_cost.  */
 };

 static stringop_algs intel_memcpy[2] = {
@@ -2167,17 +1969,8 @@ struct processor_costs intel_cost = {
   1, 4, 1, 1, /* reassoc int, fp, vec_int, vec_fp.  */
   intel_memcpy,
   intel_memset,
-  1, /* scalar_stmt_cost.  */
-  1, /* scalar load_cost.  */
-  1, /* scalar_store_cost.  */
-  1, /* vec_stmt_cost.  */
-  4, /* vec_to_scalar_cost.  */
-  1, /* scalar_to_vec_cost.  */
-  1, /* vec_align_load_cost.  */
-  2, /* vec_unalign_load_cost.  */
-  1, /* vec_store_cost.  */
-  3, /* cond_taken_branch_cost.  */
-  1, /* cond_not_taken_branch_cost.  */
+  COSTS_N_INSNS (3), /* cond_taken_branch_cost.  */
+  COSTS_N_INSNS (1), /* cond_not_taken_branch_cost.  */
 };

 /* Generic should produce code tuned for Core-i7 (and newer chips)
@@ -2265,17 +2058,8 @@ struct processor_costs generic_cost = {
   1, 2, 1, 1, /* reassoc int, fp, vec_int, vec_fp.  */
   generic_memcpy,
   generic_memset,
-  1, /* scalar_stmt_cost.  */
-  1, /* scalar load_cost.  */
-  1, /* scalar_store_cost.  */
-  1, /* vec_stmt_cost.  */
-  1, /* vec_to_scalar_cost.  */
-  1, /* scalar_to_vec_cost.  */
-  1, /* vec_align_load_cost.  */
-  2, /* vec_unalign_load_cost.  */
-  1, /* vec_store_cost.  */
-  3, /* cond_taken_branch_cost.  */
-  1, /* cond_not_taken_branch_cost.  */
+  COSTS_N_INSNS (3), /* cond_taken_branch_cost.  */
+  COSTS_N_INSNS (1), /* cond_not_taken_branch_cost.  */
 };

 /* core_cost should produce code tuned for Core familly of CPUs.  */
@@ -2366,16 +2150,7 @@ struct processor_costs core_cost = {
   1, 4, 2, 2, /* reassoc int, fp, vec_int, vec_fp.  */
   core_memcpy,
   core_memset,
-  1, /* scalar_stmt_cost.  */
-  1, /* scalar load_cost.  */
-  1, /* scalar_store_cost.  */
-  1, /* vec_stmt_cost.  */
-  1, /* vec_to_scalar_cost.  */
-  1, /* scalar_to_vec_cost.  */
-  1, /* vec_align_load_cost.  */
-  2, /* vec_unalign_load_cost.  */
-  1, /* vec_store_cost.  */
-  3, /* cond_taken_branch_cost.  */
-  1, /* cond_not_taken_branch_cost.  */
+  COSTS_N_INSNS (3), /* cond_taken_branch_cost.  */
+  COSTS_N_INSNS (1), /* cond_not_taken_branch_cost.  */
 };

From-SVN: r253975

gcc/config/i386/i386.c
gcc/config/i386/i386.h
gcc/config/i386/x86-tune-costs.h
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.target/i386/pr79683.c

index 45a219741dbb0da62331cf16bf25860bb2334f95..7f9d694d21753e90f6b863e3fc34267b644f3641 100644 (file)
@@ -44051,37 +44051,61 @@ static int
 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
                                  tree vectype, int)
 {
+  bool fp = false;
+  machine_mode mode = TImode;
+  if (vectype != NULL)
+    {
+      fp = FLOAT_TYPE_P (vectype);
+      mode = TYPE_MODE (vectype);
+    }
+
   switch (type_of_cost)
     {
       case scalar_stmt:
-        return ix86_cost->scalar_stmt_cost;
+        return fp ? ix86_cost->addss : COSTS_N_INSNS (1);
 
       case scalar_load:
-        return ix86_cost->scalar_load_cost;
+       /* load/store costs are relative to register move which is 2. Recompute
+          it to COSTS_N_INSNS so everything have same base.  */
+        return COSTS_N_INSNS (fp ? ix86_cost->sse_load[0]
+                             : ix86_cost->int_load [2]) / 2;
 
       case scalar_store:
-        return ix86_cost->scalar_store_cost;
+        return COSTS_N_INSNS (fp ? ix86_cost->sse_store[0]
+                             : ix86_cost->int_store [2]) / 2;
 
       case vector_stmt:
-        return ix86_cost->vec_stmt_cost;
+        return ix86_vec_cost (mode,
+                             fp ? ix86_cost->addss : ix86_cost->sse_op,
+                             true);
 
       case vector_load:
-        return ix86_cost->vec_align_load_cost;
+        return ix86_vec_cost (mode,
+                             COSTS_N_INSNS (ix86_cost->sse_load[2]) / 2,
+                             true);
 
       case vector_store:
-        return ix86_cost->vec_store_cost;
+        return ix86_vec_cost (mode,
+                             COSTS_N_INSNS (ix86_cost->sse_store[2]) / 2,
+                             true);
 
       case vec_to_scalar:
-        return ix86_cost->vec_to_scalar_cost;
-
       case scalar_to_vec:
-        return ix86_cost->scalar_to_vec_cost;
+        return ix86_vec_cost (mode, ix86_cost->sse_op, true);
 
+      /* We should have separate costs for unaligned loads and gather/scatter.
+        Do that incrementally.  */
       case unaligned_load:
-      case unaligned_store:
       case vector_gather_load:
+        return ix86_vec_cost (mode,
+                             COSTS_N_INSNS (ix86_cost->sse_load[2]),
+                             true);
+
+      case unaligned_store:
       case vector_scatter_store:
-        return ix86_cost->vec_unalign_load_cost;
+        return ix86_vec_cost (mode,
+                             COSTS_N_INSNS (ix86_cost->sse_store[2]),
+                             true);
 
       case cond_branch_taken:
         return ix86_cost->cond_taken_branch_cost;
@@ -44091,10 +44115,11 @@ ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
 
       case vec_perm:
       case vec_promote_demote:
-        return ix86_cost->vec_stmt_cost;
+        return ix86_vec_cost (mode,
+                             ix86_cost->sse_op, true);
 
       case vec_construct:
-       return ix86_cost->vec_stmt_cost * (TYPE_VECTOR_SUBPARTS (vectype) - 1);
+       return ix86_vec_cost (mode, ix86_cost->sse_op, false);
 
       default:
         gcc_unreachable ();
index 04b590c9a6fcc94d1f0151aaa9c041a023aa4b20..a63c13234c5d5d7c3b7b0f8e1a959dde0ff26c62 100644 (file)
@@ -277,18 +277,6 @@ struct processor_costs {
                                   parallel.  See also
                                   ix86_reassociation_width.  */
   struct stringop_algs *memcpy, *memset;
-  const int scalar_stmt_cost;   /* Cost of any scalar operation, excluding
-                                  load and store.  */
-  const int scalar_load_cost;   /* Cost of scalar load.  */
-  const int scalar_store_cost;  /* Cost of scalar store.  */
-  const int vec_stmt_cost;      /* Cost of any vector operation, excluding
-                                   load, store, vector-to-scalar and
-                                   scalar-to-vector operation.  */
-  const int vec_to_scalar_cost;    /* Cost of vect-to-scalar operation.  */
-  const int scalar_to_vec_cost;    /* Cost of scalar-to-vector operation.  */
-  const int vec_align_load_cost;   /* Cost of aligned vector load.  */
-  const int vec_unalign_load_cost; /* Cost of unaligned vector load.  */
-  const int vec_store_cost;        /* Cost of vector store.  */
   const int cond_taken_branch_cost;    /* Cost of taken branch for vectorizer
                                          cost model.  */
   const int cond_not_taken_branch_cost;/* Cost of not taken branch for
index 761d421312e0db6d78d72be7a1c24557177e4e22..0bfcac44fe1e7b42ccd8553aeb09617c7144eda9 100644 (file)
@@ -79,17 +79,8 @@ struct processor_costs ix86_size_cost = {/* costs for tuning for size */
   1, 1, 1, 1,                          /* reassoc int, fp, vec_int, vec_fp.  */
   ix86_size_memcpy,
   ix86_size_memset,
-  1,                                   /* scalar_stmt_cost.  */
-  1,                                   /* scalar load_cost.  */
-  1,                                   /* scalar_store_cost.  */
-  1,                                   /* vec_stmt_cost.  */
-  1,                                   /* vec_to_scalar_cost.  */
-  1,                                   /* scalar_to_vec_cost.  */
-  1,                                   /* vec_align_load_cost.  */
-  1,                                   /* vec_unalign_load_cost.  */
-  1,                                   /* vec_store_cost.  */
-  1,                                   /* cond_taken_branch_cost.  */
-  1,                                   /* cond_not_taken_branch_cost.  */
+  COSTS_N_BYTES (1),                   /* cond_taken_branch_cost.  */
+  COSTS_N_BYTES (1),                   /* cond_not_taken_branch_cost.  */
 };
 
 /* Processor costs (relative to an add) */
@@ -167,17 +158,8 @@ struct processor_costs i386_cost = {       /* 386 specific costs */
   1, 1, 1, 1,                          /* reassoc int, fp, vec_int, vec_fp.  */
   i386_memcpy,
   i386_memset,
-  1,                                   /* scalar_stmt_cost.  */
-  1,                                   /* scalar load_cost.  */
-  1,                                   /* scalar_store_cost.  */
-  1,                                   /* vec_stmt_cost.  */
-  1,                                   /* vec_to_scalar_cost.  */
-  1,                                   /* scalar_to_vec_cost.  */
-  1,                                   /* vec_align_load_cost.  */
-  2,                                   /* vec_unalign_load_cost.  */
-  1,                                   /* vec_store_cost.  */
-  3,                                   /* cond_taken_branch_cost.  */
-  1,                                   /* cond_not_taken_branch_cost.  */
+  COSTS_N_INSNS (3),                   /* cond_taken_branch_cost.  */
+  COSTS_N_INSNS (1),                   /* cond_not_taken_branch_cost.  */
 };
 
 static stringop_algs i486_memcpy[2] = {
@@ -256,17 +238,8 @@ struct processor_costs i486_cost = {       /* 486 specific costs */
   1, 1, 1, 1,                          /* reassoc int, fp, vec_int, vec_fp.  */
   i486_memcpy,
   i486_memset,
-  1,                                   /* scalar_stmt_cost.  */
-  1,                                   /* scalar load_cost.  */
-  1,                                   /* scalar_store_cost.  */
-  1,                                   /* vec_stmt_cost.  */
-  1,                                   /* vec_to_scalar_cost.  */
-  1,                                   /* scalar_to_vec_cost.  */
-  1,                                   /* vec_align_load_cost.  */
-  2,                                   /* vec_unalign_load_cost.  */
-  1,                                   /* vec_store_cost.  */
-  3,                                   /* cond_taken_branch_cost.  */
-  1,                                   /* cond_not_taken_branch_cost.  */
+  COSTS_N_INSNS (3),                   /* cond_taken_branch_cost.  */
+  COSTS_N_INSNS (1),                   /* cond_not_taken_branch_cost.  */
 };
 
 static stringop_algs pentium_memcpy[2] = {
@@ -343,17 +316,8 @@ struct processor_costs pentium_cost = {
   1, 1, 1, 1,                          /* reassoc int, fp, vec_int, vec_fp.  */
   pentium_memcpy,
   pentium_memset,
-  1,                                   /* scalar_stmt_cost.  */
-  1,                                   /* scalar load_cost.  */
-  1,                                   /* scalar_store_cost.  */
-  1,                                   /* vec_stmt_cost.  */
-  1,                                   /* vec_to_scalar_cost.  */
-  1,                                   /* scalar_to_vec_cost.  */
-  1,                                   /* vec_align_load_cost.  */
-  2,                                   /* vec_unalign_load_cost.  */
-  1,                                   /* vec_store_cost.  */
-  3,                                   /* cond_taken_branch_cost.  */
-  1,                                   /* cond_not_taken_branch_cost.  */
+  COSTS_N_INSNS (3),                   /* cond_taken_branch_cost.  */
+  COSTS_N_INSNS (1),                   /* cond_not_taken_branch_cost.  */
 };
 
 static const
@@ -423,17 +387,8 @@ struct processor_costs lakemont_cost = {
   1, 1, 1, 1,                          /* reassoc int, fp, vec_int, vec_fp.  */
   pentium_memcpy,
   pentium_memset,
-  1,                                   /* scalar_stmt_cost.  */
-  1,                                   /* scalar load_cost.  */
-  1,                                   /* scalar_store_cost.  */
-  1,                                   /* vec_stmt_cost.  */
-  1,                                   /* vec_to_scalar_cost.  */
-  1,                                   /* scalar_to_vec_cost.  */
-  1,                                   /* vec_align_load_cost.  */
-  2,                                   /* vec_unalign_load_cost.  */
-  1,                                   /* vec_store_cost.  */
-  3,                                   /* cond_taken_branch_cost.  */
-  1,                                   /* cond_not_taken_branch_cost.  */
+  COSTS_N_INSNS (3),                   /* cond_taken_branch_cost.  */
+  COSTS_N_INSNS (1),                   /* cond_not_taken_branch_cost.  */
 };
 
 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
@@ -518,17 +473,8 @@ struct processor_costs pentiumpro_cost = {
   1, 1, 1, 1,                          /* reassoc int, fp, vec_int, vec_fp.  */
   pentiumpro_memcpy,
   pentiumpro_memset,
-  1,                                   /* scalar_stmt_cost.  */
-  1,                                   /* scalar load_cost.  */
-  1,                                   /* scalar_store_cost.  */
-  1,                                   /* vec_stmt_cost.  */
-  1,                                   /* vec_to_scalar_cost.  */
-  1,                                   /* scalar_to_vec_cost.  */
-  1,                                   /* vec_align_load_cost.  */
-  2,                                   /* vec_unalign_load_cost.  */
-  1,                                   /* vec_store_cost.  */
-  3,                                   /* cond_taken_branch_cost.  */
-  1,                                   /* cond_not_taken_branch_cost.  */
+  COSTS_N_INSNS (3),                   /* cond_taken_branch_cost.  */
+  COSTS_N_INSNS (1),                   /* cond_not_taken_branch_cost.  */
 };
 
 static stringop_algs geode_memcpy[2] = {
@@ -605,17 +551,8 @@ struct processor_costs geode_cost = {
   1, 1, 1, 1,                          /* reassoc int, fp, vec_int, vec_fp.  */
   geode_memcpy,
   geode_memset,
-  1,                                   /* scalar_stmt_cost.  */
-  1,                                   /* scalar load_cost.  */
-  1,                                   /* scalar_store_cost.  */
-  1,                                   /* vec_stmt_cost.  */
-  1,                                   /* vec_to_scalar_cost.  */
-  1,                                   /* scalar_to_vec_cost.  */
-  1,                                   /* vec_align_load_cost.  */
-  2,                                   /* vec_unalign_load_cost.  */
-  1,                                   /* vec_store_cost.  */
-  3,                                   /* cond_taken_branch_cost.  */
-  1,                                   /* cond_not_taken_branch_cost.  */
+  COSTS_N_INSNS (3),                   /* cond_taken_branch_cost.  */
+  COSTS_N_INSNS (1),                   /* cond_not_taken_branch_cost.  */
 };
 
 static stringop_algs k6_memcpy[2] = {
@@ -694,17 +631,8 @@ struct processor_costs k6_cost = {
   1, 1, 1, 1,                          /* reassoc int, fp, vec_int, vec_fp.  */
   k6_memcpy,
   k6_memset,
-  1,                                   /* scalar_stmt_cost.  */
-  1,                                   /* scalar load_cost.  */
-  1,                                   /* scalar_store_cost.  */
-  1,                                   /* vec_stmt_cost.  */
-  1,                                   /* vec_to_scalar_cost.  */
-  1,                                   /* scalar_to_vec_cost.  */
-  1,                                   /* vec_align_load_cost.  */
-  2,                                   /* vec_unalign_load_cost.  */
-  1,                                   /* vec_store_cost.  */
-  3,                                   /* cond_taken_branch_cost.  */
-  1,                                   /* cond_not_taken_branch_cost.  */
+  COSTS_N_INSNS (3),                   /* cond_taken_branch_cost.  */
+  COSTS_N_INSNS (1),                   /* cond_not_taken_branch_cost.  */
 };
 
 /* For some reason, Athlon deals better with REP prefix (relative to loops)
@@ -784,17 +712,8 @@ struct processor_costs athlon_cost = {
   1, 1, 1, 1,                          /* reassoc int, fp, vec_int, vec_fp.  */
   athlon_memcpy,
   athlon_memset,
-  1,                                   /* scalar_stmt_cost.  */
-  1,                                   /* scalar load_cost.  */
-  1,                                   /* scalar_store_cost.  */
-  1,                                   /* vec_stmt_cost.  */
-  1,                                   /* vec_to_scalar_cost.  */
-  1,                                   /* scalar_to_vec_cost.  */
-  1,                                   /* vec_align_load_cost.  */
-  2,                                   /* vec_unalign_load_cost.  */
-  1,                                   /* vec_store_cost.  */
-  3,                                   /* cond_taken_branch_cost.  */
-  1,                                   /* cond_not_taken_branch_cost.  */
+  COSTS_N_INSNS (3),                   /* cond_taken_branch_cost.  */
+  COSTS_N_INSNS (1),                   /* cond_not_taken_branch_cost.  */
 };
 
 /* K8 has optimized REP instruction for medium sized blocks, but for very
@@ -883,17 +802,8 @@ struct processor_costs k8_cost = {
   1, 1, 1, 1,                          /* reassoc int, fp, vec_int, vec_fp.  */
   k8_memcpy,
   k8_memset,
-  4,                                   /* scalar_stmt_cost.  */
-  2,                                   /* scalar load_cost.  */
-  2,                                   /* scalar_store_cost.  */
-  5,                                   /* vec_stmt_cost.  */
-  0,                                   /* vec_to_scalar_cost.  */
-  2,                                   /* scalar_to_vec_cost.  */
-  2,                                   /* vec_align_load_cost.  */
-  3,                                   /* vec_unalign_load_cost.  */
-  3,                                   /* vec_store_cost.  */
-  3,                                   /* cond_taken_branch_cost.  */
-  2,                                   /* cond_not_taken_branch_cost.  */
+  COSTS_N_INSNS (3),                   /* cond_taken_branch_cost.  */
+  COSTS_N_INSNS (2),                   /* cond_not_taken_branch_cost.  */
 };
 
 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
@@ -989,17 +899,8 @@ struct processor_costs amdfam10_cost = {
   1, 1, 1, 1,                          /* reassoc int, fp, vec_int, vec_fp.  */
   amdfam10_memcpy,
   amdfam10_memset,
-  4,                                   /* scalar_stmt_cost.  */
-  2,                                   /* scalar load_cost.  */
-  2,                                   /* scalar_store_cost.  */
-  6,                                   /* vec_stmt_cost.  */
-  0,                                   /* vec_to_scalar_cost.  */
-  2,                                   /* scalar_to_vec_cost.  */
-  2,                                   /* vec_align_load_cost.  */
-  2,                                   /* vec_unalign_load_cost.  */
-  2,                                   /* vec_store_cost.  */
-  2,                                   /* cond_taken_branch_cost.  */
-  1,                                   /* cond_not_taken_branch_cost.  */
+  COSTS_N_INSNS (2),                   /* cond_taken_branch_cost.  */
+  COSTS_N_INSNS (1),                   /* cond_not_taken_branch_cost.  */
 };
 
 /*  BDVER1 has optimized REP instruction for medium sized blocks, but for
@@ -1097,17 +998,8 @@ const struct processor_costs bdver1_cost = {
   1, 2, 1, 1,                          /* reassoc int, fp, vec_int, vec_fp.  */
   bdver1_memcpy,
   bdver1_memset,
-  6,                                   /* scalar_stmt_cost.  */
-  4,                                   /* scalar load_cost.  */
-  4,                                   /* scalar_store_cost.  */
-  6,                                   /* vec_stmt_cost.  */
-  0,                                   /* vec_to_scalar_cost.  */
-  2,                                   /* scalar_to_vec_cost.  */
-  4,                                   /* vec_align_load_cost.  */
-  4,                                   /* vec_unalign_load_cost.  */
-  4,                                   /* vec_store_cost.  */
-  4,                                   /* cond_taken_branch_cost.  */
-  2,                                   /* cond_not_taken_branch_cost.  */
+  COSTS_N_INSNS (4),                   /* cond_taken_branch_cost.  */
+  COSTS_N_INSNS (2),                   /* cond_not_taken_branch_cost.  */
 };
 
 /*  BDVER2 has optimized REP instruction for medium sized blocks, but for
@@ -1206,17 +1098,8 @@ const struct processor_costs bdver2_cost = {
   1, 2, 1, 1,                          /* reassoc int, fp, vec_int, vec_fp.  */
   bdver2_memcpy,
   bdver2_memset,
-  6,                                   /* scalar_stmt_cost.  */
-  4,                                   /* scalar load_cost.  */
-  4,                                   /* scalar_store_cost.  */
-  6,                                   /* vec_stmt_cost.  */
-  0,                                   /* vec_to_scalar_cost.  */
-  2,                                   /* scalar_to_vec_cost.  */
-  4,                                   /* vec_align_load_cost.  */
-  4,                                   /* vec_unalign_load_cost.  */
-  4,                                   /* vec_store_cost.  */
-  4,                                   /* cond_taken_branch_cost.  */
-  2,                                   /* cond_not_taken_branch_cost.  */
+  COSTS_N_INSNS (4),                   /* cond_taken_branch_cost.  */
+  COSTS_N_INSNS (2),                   /* cond_not_taken_branch_cost.  */
 };
 
 
@@ -1306,17 +1189,8 @@ struct processor_costs bdver3_cost = {
   1, 2, 1, 1,                          /* reassoc int, fp, vec_int, vec_fp.  */
   bdver3_memcpy,
   bdver3_memset,
-  6,                                   /* scalar_stmt_cost.  */
-  4,                                   /* scalar load_cost.  */
-  4,                                   /* scalar_store_cost.  */
-  6,                                   /* vec_stmt_cost.  */
-  0,                                   /* vec_to_scalar_cost.  */
-  2,                                   /* scalar_to_vec_cost.  */
-  4,                                   /* vec_align_load_cost.  */
-  4,                                   /* vec_unalign_load_cost.  */
-  4,                                   /* vec_store_cost.  */
-  4,                                   /* cond_taken_branch_cost.  */
-  2,                                   /* cond_not_taken_branch_cost.  */
+  COSTS_N_INSNS (4),                   /* cond_taken_branch_cost.  */
+  COSTS_N_INSNS (2),                   /* cond_not_taken_branch_cost.  */
 };
 
 /*  BDVER4 has optimized REP instruction for medium sized blocks, but for
@@ -1405,17 +1279,8 @@ struct processor_costs bdver4_cost = {
   1, 2, 1, 1,                          /* reassoc int, fp, vec_int, vec_fp.  */
   bdver4_memcpy,
   bdver4_memset,
-  6,                                   /* scalar_stmt_cost.  */
-  4,                                   /* scalar load_cost.  */
-  4,                                   /* scalar_store_cost.  */
-  6,                                   /* vec_stmt_cost.  */
-  0,                                   /* vec_to_scalar_cost.  */
-  2,                                   /* scalar_to_vec_cost.  */
-  4,                                   /* vec_align_load_cost.  */
-  4,                                   /* vec_unalign_load_cost.  */
-  4,                                   /* vec_store_cost.  */
-  4,                                   /* cond_taken_branch_cost.  */
-  2,                                   /* cond_not_taken_branch_cost.  */
+  COSTS_N_INSNS (4),                   /* cond_taken_branch_cost.  */
+  COSTS_N_INSNS (2),                   /* cond_not_taken_branch_cost.  */
 };
 
 
@@ -1524,17 +1389,8 @@ struct processor_costs znver1_cost = {
   4, 4, 3, 6,                          /* reassoc int, fp, vec_int, vec_fp.  */
   znver1_memcpy,
   znver1_memset,
-  6,                                   /* scalar_stmt_cost.  */
-  4,                                   /* scalar load_cost.  */
-  4,                                   /* scalar_store_cost.  */
-  6,                                   /* vec_stmt_cost.  */
-  0,                                   /* vec_to_scalar_cost.  */
-  2,                                   /* scalar_to_vec_cost.  */
-  4,                                   /* vec_align_load_cost.  */
-  4,                                   /* vec_unalign_load_cost.  */
-  4,                                   /* vec_store_cost.  */
-  4,                                   /* cond_taken_branch_cost.  */
-  2,                                   /* cond_not_taken_branch_cost.  */
+  COSTS_N_INSNS (4),                   /* cond_taken_branch_cost.  */
+  COSTS_N_INSNS (2),                   /* cond_not_taken_branch_cost.  */
 };
 
   /* BTVER1 has optimized REP instruction for medium sized blocks, but for
@@ -1624,17 +1480,8 @@ const struct processor_costs btver1_cost = {
   1, 1, 1, 1,                          /* reassoc int, fp, vec_int, vec_fp.  */
   btver1_memcpy,
   btver1_memset,
-  4,                                   /* scalar_stmt_cost.  */
-  2,                                   /* scalar load_cost.  */
-  2,                                   /* scalar_store_cost.  */
-  6,                                   /* vec_stmt_cost.  */
-  0,                                   /* vec_to_scalar_cost.  */
-  2,                                   /* scalar_to_vec_cost.  */
-  2,                                   /* vec_align_load_cost.  */
-  2,                                   /* vec_unalign_load_cost.  */
-  2,                                   /* vec_store_cost.  */
-  2,                                   /* cond_taken_branch_cost.  */
-  1,                                   /* cond_not_taken_branch_cost.  */
+  COSTS_N_INSNS (2),                   /* cond_taken_branch_cost.  */
+  COSTS_N_INSNS (1),                   /* cond_not_taken_branch_cost.  */
 };
 
 static stringop_algs btver2_memcpy[2] = {
@@ -1721,17 +1568,8 @@ const struct processor_costs btver2_cost = {
   1, 1, 1, 1,                          /* reassoc int, fp, vec_int, vec_fp.  */
   btver2_memcpy,
   btver2_memset,
-  4,                                   /* scalar_stmt_cost.  */
-  2,                                   /* scalar load_cost.  */
-  2,                                   /* scalar_store_cost.  */
-  6,                                   /* vec_stmt_cost.  */
-  0,                                   /* vec_to_scalar_cost.  */
-  2,                                   /* scalar_to_vec_cost.  */
-  2,                                   /* vec_align_load_cost.  */
-  2,                                   /* vec_unalign_load_cost.  */
-  2,                                   /* vec_store_cost.  */
-  2,                                   /* cond_taken_branch_cost.  */
-  1,                                   /* cond_not_taken_branch_cost.  */
+  COSTS_N_INSNS (2),                   /* cond_taken_branch_cost.  */
+  COSTS_N_INSNS (1),                   /* cond_not_taken_branch_cost.  */
 };
 
 static stringop_algs pentium4_memcpy[2] = {
@@ -1809,17 +1647,8 @@ struct processor_costs pentium4_cost = {
   1, 1, 1, 1,                          /* reassoc int, fp, vec_int, vec_fp.  */
   pentium4_memcpy,
   pentium4_memset,
-  1,                                   /* scalar_stmt_cost.  */
-  1,                                   /* scalar load_cost.  */
-  1,                                   /* scalar_store_cost.  */
-  1,                                   /* vec_stmt_cost.  */
-  1,                                   /* vec_to_scalar_cost.  */
-  1,                                   /* scalar_to_vec_cost.  */
-  1,                                   /* vec_align_load_cost.  */
-  2,                                   /* vec_unalign_load_cost.  */
-  1,                                   /* vec_store_cost.  */
-  3,                                   /* cond_taken_branch_cost.  */
-  1,                                   /* cond_not_taken_branch_cost.  */
+  COSTS_N_INSNS (3),                   /* cond_taken_branch_cost.  */
+  COSTS_N_INSNS (1),                   /* cond_not_taken_branch_cost.  */
 };
 
 static stringop_algs nocona_memcpy[2] = {
@@ -1900,17 +1729,8 @@ struct processor_costs nocona_cost = {
   1, 1, 1, 1,                          /* reassoc int, fp, vec_int, vec_fp.  */
   nocona_memcpy,
   nocona_memset,
-  1,                                   /* scalar_stmt_cost.  */
-  1,                                   /* scalar load_cost.  */
-  1,                                   /* scalar_store_cost.  */
-  1,                                   /* vec_stmt_cost.  */
-  1,                                   /* vec_to_scalar_cost.  */
-  1,                                   /* scalar_to_vec_cost.  */
-  1,                                   /* vec_align_load_cost.  */
-  2,                                   /* vec_unalign_load_cost.  */
-  1,                                   /* vec_store_cost.  */
-  3,                                   /* cond_taken_branch_cost.  */
-  1,                                   /* cond_not_taken_branch_cost.  */
+  COSTS_N_INSNS (3),                   /* cond_taken_branch_cost.  */
+  COSTS_N_INSNS (1),                   /* cond_not_taken_branch_cost.  */
 };
 
 static stringop_algs atom_memcpy[2] = {
@@ -1989,17 +1809,8 @@ struct processor_costs atom_cost = {
   2, 2, 2, 2,                          /* reassoc int, fp, vec_int, vec_fp.  */
   atom_memcpy,
   atom_memset,
-  1,                                   /* scalar_stmt_cost.  */
-  1,                                   /* scalar load_cost.  */
-  1,                                   /* scalar_store_cost.  */
-  1,                                   /* vec_stmt_cost.  */
-  1,                                   /* vec_to_scalar_cost.  */
-  1,                                   /* scalar_to_vec_cost.  */
-  1,                                   /* vec_align_load_cost.  */
-  2,                                   /* vec_unalign_load_cost.  */
-  1,                                   /* vec_store_cost.  */
-  3,                                   /* cond_taken_branch_cost.  */
-  1,                                   /* cond_not_taken_branch_cost.  */
+  COSTS_N_INSNS (3),                   /* cond_taken_branch_cost.  */
+  COSTS_N_INSNS (1),                   /* cond_not_taken_branch_cost.  */
 };
 
 static stringop_algs slm_memcpy[2] = {
@@ -2078,17 +1889,8 @@ struct processor_costs slm_cost = {
   1, 2, 1, 1,                          /* reassoc int, fp, vec_int, vec_fp.  */
   slm_memcpy,
   slm_memset,
-  1,                                   /* scalar_stmt_cost.  */
-  1,                                   /* scalar load_cost.  */
-  1,                                   /* scalar_store_cost.  */
-  1,                                   /* vec_stmt_cost.  */
-  4,                                   /* vec_to_scalar_cost.  */
-  1,                                   /* scalar_to_vec_cost.  */
-  1,                                   /* vec_align_load_cost.  */
-  2,                                   /* vec_unalign_load_cost.  */
-  1,                                   /* vec_store_cost.  */
-  3,                                   /* cond_taken_branch_cost.  */
-  1,                                   /* cond_not_taken_branch_cost.  */
+  COSTS_N_INSNS (3),                   /* cond_taken_branch_cost.  */
+  COSTS_N_INSNS (1),                   /* cond_not_taken_branch_cost.  */
 };
 
 static stringop_algs intel_memcpy[2] = {
@@ -2167,17 +1969,8 @@ struct processor_costs intel_cost = {
   1, 4, 1, 1,                          /* reassoc int, fp, vec_int, vec_fp.  */
   intel_memcpy,
   intel_memset,
-  1,                                   /* scalar_stmt_cost.  */
-  1,                                   /* scalar load_cost.  */
-  1,                                   /* scalar_store_cost.  */
-  1,                                   /* vec_stmt_cost.  */
-  4,                                   /* vec_to_scalar_cost.  */
-  1,                                   /* scalar_to_vec_cost.  */
-  1,                                   /* vec_align_load_cost.  */
-  2,                                   /* vec_unalign_load_cost.  */
-  1,                                   /* vec_store_cost.  */
-  3,                                   /* cond_taken_branch_cost.  */
-  1,                                   /* cond_not_taken_branch_cost.  */
+  COSTS_N_INSNS (3),                   /* cond_taken_branch_cost.  */
+  COSTS_N_INSNS (1),                   /* cond_not_taken_branch_cost.  */
 };
 
 /* Generic should produce code tuned for Core-i7 (and newer chips)
@@ -2265,17 +2058,8 @@ struct processor_costs generic_cost = {
   1, 2, 1, 1,                          /* reassoc int, fp, vec_int, vec_fp.  */
   generic_memcpy,
   generic_memset,
-  1,                                   /* scalar_stmt_cost.  */
-  1,                                   /* scalar load_cost.  */
-  1,                                   /* scalar_store_cost.  */
-  1,                                   /* vec_stmt_cost.  */
-  1,                                   /* vec_to_scalar_cost.  */
-  1,                                   /* scalar_to_vec_cost.  */
-  1,                                   /* vec_align_load_cost.  */
-  2,                                   /* vec_unalign_load_cost.  */
-  1,                                   /* vec_store_cost.  */
-  3,                                   /* cond_taken_branch_cost.  */
-  1,                                   /* cond_not_taken_branch_cost.  */
+  COSTS_N_INSNS (3),                   /* cond_taken_branch_cost.  */
+  COSTS_N_INSNS (1),                   /* cond_not_taken_branch_cost.  */
 };
 
 /* core_cost should produce code tuned for Core familly of CPUs.  */
@@ -2366,16 +2150,7 @@ struct processor_costs core_cost = {
   1, 4, 2, 2,                          /* reassoc int, fp, vec_int, vec_fp.  */
   core_memcpy,
   core_memset,
-  1,                                   /* scalar_stmt_cost.  */
-  1,                                   /* scalar load_cost.  */
-  1,                                   /* scalar_store_cost.  */
-  1,                                   /* vec_stmt_cost.  */
-  1,                                   /* vec_to_scalar_cost.  */
-  1,                                   /* scalar_to_vec_cost.  */
-  1,                                   /* vec_align_load_cost.  */
-  2,                                   /* vec_unalign_load_cost.  */
-  1,                                   /* vec_store_cost.  */
-  3,                                   /* cond_taken_branch_cost.  */
-  1,                                   /* cond_not_taken_branch_cost.  */
+  COSTS_N_INSNS (3),                   /* cond_taken_branch_cost.  */
+  COSTS_N_INSNS (1),                   /* cond_not_taken_branch_cost.  */
 };
 
index 1380165912e7b3a35fa5be6b636f323c984dae22..22405be6a87c4425e99a086bc3ec2a55ad6779cf 100644 (file)
@@ -1,3 +1,7 @@
+2017-10-20  Jan Hubicka  <hubicka@ucw.cz>
+
+       * gcc.target/i386/pr79683.c: Disable costmodel.
+
 2017-10-21  Eric Botcazou  <ebotcazou@adacore.com>
 
        * gnat.dg/specs/discr_private.ads: Rename into ...
index cbd43fd2af0d489635fddc448b51b277a4e646b3..9e28d85fc89f2cc82ac413ecb591629cf189cdf2 100644 (file)
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O3 -msse2" } */
+/* { dg-options "-O3 -msse2 -fvect-cost-model=unlimited" } */
 
 struct s {
     __INT64_TYPE__ a;