arm-protos.h (struct cpu_vec_costs): New struct type.
authorChristophe Lyon <christophe.lyon@linaro.org>
Tue, 12 Feb 2013 14:53:45 +0000 (14:53 +0000)
committerChristophe Lyon <clyon@gcc.gnu.org>
Tue, 12 Feb 2013 14:53:45 +0000 (15:53 +0100)
2013-02-05  Christophe Lyon <christophe.lyon@linaro.org>

* config/arm/arm-protos.h (struct cpu_vec_costs): New struct type.
(struct tune_params): Add vec_costs field.
* config/arm/arm.c (arm_builtin_vectorization_cost)
(arm_add_stmt_cost): New functions.
(TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST)
(TARGET_VECTORIZE_ADD_STMT_COST): Define.
(arm_default_vec_cost): New struct of type cpu_vec_costs.
(arm_slowmul_tune, arm_fastmul_tune, arm_strongarm_tune)
(arm_xscale_tune, arm_9e_tune, arm_v6t2_tune, arm_cortex_tune)
(arm_cortex_a15_tune, arm_cortex_a5_tune, arm_cortex_a9_tune)
(arm_v6m_tune, arm_fa726te_tune): Define new vec_costs field.

From-SVN: r195977

gcc/ChangeLog
gcc/config/arm/arm-protos.h
gcc/config/arm/arm.c

index a0ced1db78cdc38d9ddd74cb1f23e3863e003c8a..0be8139732333ecad46609618f0d18b48c318291 100644 (file)
@@ -1,3 +1,17 @@
+2013-02-12  Christophe Lyon <christophe.lyon@linaro.org>
+
+       * config/arm/arm-protos.h (struct cpu_vec_costs): New struct type.
+       (struct tune_params): Add vec_costs field.
+       * config/arm/arm.c (arm_builtin_vectorization_cost)
+       (arm_add_stmt_cost): New functions.
+       (TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST)
+       (TARGET_VECTORIZE_ADD_STMT_COST): Define.
+       (arm_default_vec_cost): New struct of type cpu_vec_costs.
+       (arm_slowmul_tune, arm_fastmul_tune, arm_strongarm_tune)
+       (arm_xscale_tune, arm_9e_tune, arm_v6t2_tune, arm_cortex_tune)
+       (arm_cortex_a15_tune, arm_cortex_a5_tune, arm_cortex_a9_tune)
+       (arm_v6m_tune, arm_fa726te_tune): Define new vec_costs field.
+
 2013-02-12  Richard Biener  <rguenther@suse.de>
 
        PR lto/56295
index e9f74dc17a2c64b4cc457efba26bc6cba867ca4f..ffa00c0f7b74cdb296158f8459af992f21273843 100644 (file)
@@ -224,6 +224,27 @@ extern const char *arm_mangle_type (const_tree);
 
 extern void arm_order_regs_for_local_alloc (void);
 
+/* Vectorizer cost model implementation.  */
+struct cpu_vec_costs {
+  const int scalar_stmt_cost;   /* Cost of any scalar operation, excluding
+                                  load and store.  */
+  const int scalar_load_cost;   /* Cost of scalar load.  */
+  const int scalar_store_cost;  /* Cost of scalar store.  */
+  const int vec_stmt_cost;      /* Cost of any vector operation, excluding
+                                   load, store, vector-to-scalar and
+                                   scalar-to-vector operation.  */
+  const int vec_to_scalar_cost;    /* Cost of vect-to-scalar operation.  */
+  const int scalar_to_vec_cost;    /* Cost of scalar-to-vector operation.  */
+  const int vec_align_load_cost;   /* Cost of aligned vector load.  */
+  const int vec_unalign_load_cost; /* Cost of unaligned vector load.  */
+  const int vec_unalign_store_cost; /* Cost of unaligned vector load.  */
+  const int vec_store_cost;        /* Cost of vector store.  */
+  const int cond_taken_branch_cost;    /* Cost of taken branch for vectorizer
+                                         cost model.  */
+  const int cond_not_taken_branch_cost;/* Cost of not taken branch for
+                                         vectorizer cost model.  */
+};
+
 #ifdef RTX_CODE
 /* This needs to be here because we need RTX_CODE and similar.  */
 
@@ -246,6 +267,8 @@ struct tune_params
      performance. The first element covers Thumb state and the second one
      is for ARM state.  */
   bool logical_op_non_short_circuit[2];
+  /* Vectorizer costs.  */
+  const struct cpu_vec_costs* vec_costs;
 };
 
 extern const struct tune_params *current_tune;
index d7877b0d8a484f91d335b6efa44d826065c2e44b..677d6da027d269ea8f299d9fbb86ce39016dc5f9 100644 (file)
@@ -268,6 +268,16 @@ static int arm_cortex_a5_branch_cost (bool, bool);
 
 static bool arm_vectorize_vec_perm_const_ok (enum machine_mode vmode,
                                             const unsigned char *sel);
+
+static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
+                                          tree vectype,
+                                          int misalign ATTRIBUTE_UNUSED);
+static unsigned arm_add_stmt_cost (void *data, int count,
+                                  enum vect_cost_for_stmt kind,
+                                  struct _stmt_vec_info *stmt_info,
+                                  int misalign,
+                                  enum vect_cost_model_location where);
+
 static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
                                         bool op0_preserve_value);
 \f
@@ -629,6 +639,12 @@ static const struct attribute_spec arm_attribute_table[] =
 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
   arm_vectorize_vec_perm_const_ok
 
+#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
+#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
+  arm_builtin_vectorization_cost
+#undef TARGET_VECTORIZE_ADD_STMT_COST
+#define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
+
 #undef TARGET_CANONICALIZE_COMPARISON
 #define TARGET_CANONICALIZE_COMPARISON \
   arm_canonicalize_comparison
@@ -891,6 +907,23 @@ struct processors
   l1_size, \
   l1_line_size
 
+/* arm generic vectorizer costs.  */
+static const
+struct cpu_vec_costs arm_default_vec_cost = {
+  1,                                   /* scalar_stmt_cost.  */
+  1,                                   /* scalar load_cost.  */
+  1,                                   /* scalar_store_cost.  */
+  1,                                   /* vec_stmt_cost.  */
+  1,                                   /* vec_to_scalar_cost.  */
+  1,                                   /* scalar_to_vec_cost.  */
+  1,                                   /* vec_align_load_cost.  */
+  1,                                   /* vec_unalign_load_cost.  */
+  1,                                   /* vec_unalign_store_cost.  */
+  1,                                   /* vec_store_cost.  */
+  3,                                   /* cond_taken_branch_cost.  */
+  1,                                   /* cond_not_taken_branch_cost.  */
+};
+
 const struct tune_params arm_slowmul_tune =
 {
   arm_slowmul_rtx_costs,
@@ -902,6 +935,7 @@ const struct tune_params arm_slowmul_tune =
   arm_default_branch_cost,
   false,                                       /* Prefer LDRD/STRD.  */
   {true, true},                                        /* Prefer non short circuit.  */
+  &arm_default_vec_cost,                        /* Vectorizer costs.  */
 };
 
 const struct tune_params arm_fastmul_tune =
@@ -915,6 +949,7 @@ const struct tune_params arm_fastmul_tune =
   arm_default_branch_cost,
   false,                                       /* Prefer LDRD/STRD.  */
   {true, true},                                        /* Prefer non short circuit.  */
+  &arm_default_vec_cost,                        /* Vectorizer costs.  */
 };
 
 /* StrongARM has early execution of branches, so a sequence that is worth
@@ -931,6 +966,7 @@ const struct tune_params arm_strongarm_tune =
   arm_default_branch_cost,
   false,                                       /* Prefer LDRD/STRD.  */
   {true, true},                                        /* Prefer non short circuit.  */
+  &arm_default_vec_cost,                        /* Vectorizer costs.  */
 };
 
 const struct tune_params arm_xscale_tune =
@@ -944,6 +980,7 @@ const struct tune_params arm_xscale_tune =
   arm_default_branch_cost,
   false,                                       /* Prefer LDRD/STRD.  */
   {true, true},                                        /* Prefer non short circuit.  */
+  &arm_default_vec_cost,                        /* Vectorizer costs.  */
 };
 
 const struct tune_params arm_9e_tune =
@@ -957,6 +994,7 @@ const struct tune_params arm_9e_tune =
   arm_default_branch_cost,
   false,                                       /* Prefer LDRD/STRD.  */
   {true, true},                                        /* Prefer non short circuit.  */
+  &arm_default_vec_cost,                        /* Vectorizer costs.  */
 };
 
 const struct tune_params arm_v6t2_tune =
@@ -970,6 +1008,7 @@ const struct tune_params arm_v6t2_tune =
   arm_default_branch_cost,
   false,                                       /* Prefer LDRD/STRD.  */
   {true, true},                                        /* Prefer non short circuit.  */
+  &arm_default_vec_cost,                        /* Vectorizer costs.  */
 };
 
 /* Generic Cortex tuning.  Use more specific tunings if appropriate.  */
@@ -984,6 +1023,7 @@ const struct tune_params arm_cortex_tune =
   arm_default_branch_cost,
   false,                                       /* Prefer LDRD/STRD.  */
   {true, true},                                        /* Prefer non short circuit.  */
+  &arm_default_vec_cost,                        /* Vectorizer costs.  */
 };
 
 const struct tune_params arm_cortex_a15_tune =
@@ -997,6 +1037,7 @@ const struct tune_params arm_cortex_a15_tune =
   arm_default_branch_cost,
   true,                                                /* Prefer LDRD/STRD.  */
   {true, true},                                        /* Prefer non short circuit.  */
+  &arm_default_vec_cost,                        /* Vectorizer costs.  */
 };
 
 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
@@ -1013,6 +1054,7 @@ const struct tune_params arm_cortex_a5_tune =
   arm_cortex_a5_branch_cost,
   false,                                       /* Prefer LDRD/STRD.  */
   {false, false},                              /* Prefer non short circuit.  */
+  &arm_default_vec_cost,                        /* Vectorizer costs.  */
 };
 
 const struct tune_params arm_cortex_a9_tune =
@@ -1026,6 +1068,7 @@ const struct tune_params arm_cortex_a9_tune =
   arm_default_branch_cost,
   false,                                       /* Prefer LDRD/STRD.  */
   {true, true},                                        /* Prefer non short circuit.  */
+  &arm_default_vec_cost,                        /* Vectorizer costs.  */
 };
 
 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
@@ -1041,6 +1084,7 @@ const struct tune_params arm_v6m_tune =
   arm_default_branch_cost,
   false,                                       /* Prefer LDRD/STRD.  */
   {false, false},                              /* Prefer non short circuit.  */
+  &arm_default_vec_cost,                        /* Vectorizer costs.  */
 };
 
 const struct tune_params arm_fa726te_tune =
@@ -1054,6 +1098,7 @@ const struct tune_params arm_fa726te_tune =
   arm_default_branch_cost,
   false,                                       /* Prefer LDRD/STRD.  */
   {true, true},                                        /* Prefer non short circuit.  */
+  &arm_default_vec_cost,                        /* Vectorizer costs.  */
 };
 
 
@@ -8696,6 +8741,94 @@ arm_memory_move_cost (enum machine_mode mode, reg_class_t rclass,
     }
 }
 
+/* Vectorizer cost model implementation.  */
+
+/* Implement targetm.vectorize.builtin_vectorization_cost.  */
+static int
+arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
+                               tree vectype,
+                               int misalign ATTRIBUTE_UNUSED)
+{
+  unsigned elements;
+
+  switch (type_of_cost)
+    {
+      case scalar_stmt:
+        return current_tune->vec_costs->scalar_stmt_cost;
+
+      case scalar_load:
+        return current_tune->vec_costs->scalar_load_cost;
+
+      case scalar_store:
+        return current_tune->vec_costs->scalar_store_cost;
+
+      case vector_stmt:
+        return current_tune->vec_costs->vec_stmt_cost;
+
+      case vector_load:
+        return current_tune->vec_costs->vec_align_load_cost;
+
+      case vector_store:
+        return current_tune->vec_costs->vec_store_cost;
+
+      case vec_to_scalar:
+        return current_tune->vec_costs->vec_to_scalar_cost;
+
+      case scalar_to_vec:
+        return current_tune->vec_costs->scalar_to_vec_cost;
+
+      case unaligned_load:
+        return current_tune->vec_costs->vec_unalign_load_cost;
+
+      case unaligned_store:
+        return current_tune->vec_costs->vec_unalign_store_cost;
+
+      case cond_branch_taken:
+        return current_tune->vec_costs->cond_taken_branch_cost;
+
+      case cond_branch_not_taken:
+        return current_tune->vec_costs->cond_not_taken_branch_cost;
+
+      case vec_perm:
+      case vec_promote_demote:
+        return current_tune->vec_costs->vec_stmt_cost;
+
+      case vec_construct:
+       elements = TYPE_VECTOR_SUBPARTS (vectype);
+       return elements / 2 + 1;
+
+      default:
+        gcc_unreachable ();
+    }
+}
+
+/* Implement targetm.vectorize.add_stmt_cost.  */
+
+static unsigned
+arm_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
+                  struct _stmt_vec_info *stmt_info, int misalign,
+                  enum vect_cost_model_location where)
+{
+  unsigned *cost = (unsigned *) data;
+  unsigned retval = 0;
+
+  if (flag_vect_cost_model)
+    {
+      tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
+      int stmt_cost = arm_builtin_vectorization_cost (kind, vectype, misalign);
+
+      /* Statements in an inner loop relative to the loop being
+        vectorized are weighted more heavily.  The value here is
+        arbitrary and could potentially be improved with analysis.  */
+      if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
+       count *= 50;  /* FIXME.  */
+
+      retval = (unsigned) (count * stmt_cost);
+      cost[where] += retval;
+    }
+
+  return retval;
+}
 
 /* Return true if and only if this insn can dual-issue only as older.  */
 static bool