From: Christophe Lyon Date: Tue, 12 Feb 2013 14:53:45 +0000 (+0000) Subject: arm-protos.h (struct cpu_vec_costs): New struct type. X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=2597da229a2261de8576099213ffdef4bf946a7c;p=gcc.git arm-protos.h (struct cpu_vec_costs): New struct type. 2013-02-05 Christophe Lyon * config/arm/arm-protos.h (struct cpu_vec_costs): New struct type. (struct tune_params): Add vec_costs field. * config/arm/arm.c (arm_builtin_vectorization_cost) (arm_add_stmt_cost): New functions. (TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST) (TARGET_VECTORIZE_ADD_STMT_COST): Define. (arm_default_vec_cost): New struct of type cpu_vec_costs. (arm_slowmul_tune, arm_fastmul_tune, arm_strongarm_tune) (arm_xscale_tune, arm_9e_tune, arm_v6t2_tune, arm_cortex_tune) (arm_cortex_a15_tune, arm_cortex_a5_tune, arm_cortex_a9_tune) (arm_v6m_tune, arm_fa726te_tune): Define new vec_costs field. From-SVN: r195977 --- diff --git a/gcc/ChangeLog b/gcc/ChangeLog index a0ced1db78c..0be81397323 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,17 @@ +2013-02-12 Christophe Lyon + + * config/arm/arm-protos.h (struct cpu_vec_costs): New struct type. + (struct tune_params): Add vec_costs field. + * config/arm/arm.c (arm_builtin_vectorization_cost) + (arm_add_stmt_cost): New functions. + (TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST) + (TARGET_VECTORIZE_ADD_STMT_COST): Define. + (arm_default_vec_cost): New struct of type cpu_vec_costs. + (arm_slowmul_tune, arm_fastmul_tune, arm_strongarm_tune) + (arm_xscale_tune, arm_9e_tune, arm_v6t2_tune, arm_cortex_tune) + (arm_cortex_a15_tune, arm_cortex_a5_tune, arm_cortex_a9_tune) + (arm_v6m_tune, arm_fa726te_tune): Define new vec_costs field. + 2013-02-12 Richard Biener PR lto/56295 diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h index e9f74dc17a2..ffa00c0f7b7 100644 --- a/gcc/config/arm/arm-protos.h +++ b/gcc/config/arm/arm-protos.h @@ -224,6 +224,27 @@ extern const char *arm_mangle_type (const_tree); extern void arm_order_regs_for_local_alloc (void); +/* Vectorizer cost model implementation. */ +struct cpu_vec_costs { + const int scalar_stmt_cost; /* Cost of any scalar operation, excluding + load and store. */ + const int scalar_load_cost; /* Cost of scalar load. */ + const int scalar_store_cost; /* Cost of scalar store. */ + const int vec_stmt_cost; /* Cost of any vector operation, excluding + load, store, vector-to-scalar and + scalar-to-vector operation. */ + const int vec_to_scalar_cost; /* Cost of vect-to-scalar operation. */ + const int scalar_to_vec_cost; /* Cost of scalar-to-vector operation. */ + const int vec_align_load_cost; /* Cost of aligned vector load. */ + const int vec_unalign_load_cost; /* Cost of unaligned vector load. */ + const int vec_unalign_store_cost; /* Cost of unaligned vector load. */ + const int vec_store_cost; /* Cost of vector store. */ + const int cond_taken_branch_cost; /* Cost of taken branch for vectorizer + cost model. */ + const int cond_not_taken_branch_cost;/* Cost of not taken branch for + vectorizer cost model. */ +}; + #ifdef RTX_CODE /* This needs to be here because we need RTX_CODE and similar. */ @@ -246,6 +267,8 @@ struct tune_params performance. The first element covers Thumb state and the second one is for ARM state. */ bool logical_op_non_short_circuit[2]; + /* Vectorizer costs. */ + const struct cpu_vec_costs* vec_costs; }; extern const struct tune_params *current_tune; diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index d7877b0d8a4..677d6da027d 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -268,6 +268,16 @@ static int arm_cortex_a5_branch_cost (bool, bool); static bool arm_vectorize_vec_perm_const_ok (enum machine_mode vmode, const unsigned char *sel); + +static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost, + tree vectype, + int misalign ATTRIBUTE_UNUSED); +static unsigned arm_add_stmt_cost (void *data, int count, + enum vect_cost_for_stmt kind, + struct _stmt_vec_info *stmt_info, + int misalign, + enum vect_cost_model_location where); + static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1, bool op0_preserve_value); @@ -629,6 +639,12 @@ static const struct attribute_spec arm_attribute_table[] = #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \ arm_vectorize_vec_perm_const_ok +#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST +#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \ + arm_builtin_vectorization_cost +#undef TARGET_VECTORIZE_ADD_STMT_COST +#define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost + #undef TARGET_CANONICALIZE_COMPARISON #define TARGET_CANONICALIZE_COMPARISON \ arm_canonicalize_comparison @@ -891,6 +907,23 @@ struct processors l1_size, \ l1_line_size +/* arm generic vectorizer costs. */ +static const +struct cpu_vec_costs arm_default_vec_cost = { + 1, /* scalar_stmt_cost. */ + 1, /* scalar load_cost. */ + 1, /* scalar_store_cost. */ + 1, /* vec_stmt_cost. */ + 1, /* vec_to_scalar_cost. */ + 1, /* scalar_to_vec_cost. */ + 1, /* vec_align_load_cost. */ + 1, /* vec_unalign_load_cost. */ + 1, /* vec_unalign_store_cost. */ + 1, /* vec_store_cost. */ + 3, /* cond_taken_branch_cost. */ + 1, /* cond_not_taken_branch_cost. */ +}; + const struct tune_params arm_slowmul_tune = { arm_slowmul_rtx_costs, @@ -902,6 +935,7 @@ const struct tune_params arm_slowmul_tune = arm_default_branch_cost, false, /* Prefer LDRD/STRD. */ {true, true}, /* Prefer non short circuit. */ + &arm_default_vec_cost, /* Vectorizer costs. */ }; const struct tune_params arm_fastmul_tune = @@ -915,6 +949,7 @@ const struct tune_params arm_fastmul_tune = arm_default_branch_cost, false, /* Prefer LDRD/STRD. */ {true, true}, /* Prefer non short circuit. */ + &arm_default_vec_cost, /* Vectorizer costs. */ }; /* StrongARM has early execution of branches, so a sequence that is worth @@ -931,6 +966,7 @@ const struct tune_params arm_strongarm_tune = arm_default_branch_cost, false, /* Prefer LDRD/STRD. */ {true, true}, /* Prefer non short circuit. */ + &arm_default_vec_cost, /* Vectorizer costs. */ }; const struct tune_params arm_xscale_tune = @@ -944,6 +980,7 @@ const struct tune_params arm_xscale_tune = arm_default_branch_cost, false, /* Prefer LDRD/STRD. */ {true, true}, /* Prefer non short circuit. */ + &arm_default_vec_cost, /* Vectorizer costs. */ }; const struct tune_params arm_9e_tune = @@ -957,6 +994,7 @@ const struct tune_params arm_9e_tune = arm_default_branch_cost, false, /* Prefer LDRD/STRD. */ {true, true}, /* Prefer non short circuit. */ + &arm_default_vec_cost, /* Vectorizer costs. */ }; const struct tune_params arm_v6t2_tune = @@ -970,6 +1008,7 @@ const struct tune_params arm_v6t2_tune = arm_default_branch_cost, false, /* Prefer LDRD/STRD. */ {true, true}, /* Prefer non short circuit. */ + &arm_default_vec_cost, /* Vectorizer costs. */ }; /* Generic Cortex tuning. Use more specific tunings if appropriate. */ @@ -984,6 +1023,7 @@ const struct tune_params arm_cortex_tune = arm_default_branch_cost, false, /* Prefer LDRD/STRD. */ {true, true}, /* Prefer non short circuit. */ + &arm_default_vec_cost, /* Vectorizer costs. */ }; const struct tune_params arm_cortex_a15_tune = @@ -997,6 +1037,7 @@ const struct tune_params arm_cortex_a15_tune = arm_default_branch_cost, true, /* Prefer LDRD/STRD. */ {true, true}, /* Prefer non short circuit. */ + &arm_default_vec_cost, /* Vectorizer costs. */ }; /* Branches can be dual-issued on Cortex-A5, so conditional execution is @@ -1013,6 +1054,7 @@ const struct tune_params arm_cortex_a5_tune = arm_cortex_a5_branch_cost, false, /* Prefer LDRD/STRD. */ {false, false}, /* Prefer non short circuit. */ + &arm_default_vec_cost, /* Vectorizer costs. */ }; const struct tune_params arm_cortex_a9_tune = @@ -1026,6 +1068,7 @@ const struct tune_params arm_cortex_a9_tune = arm_default_branch_cost, false, /* Prefer LDRD/STRD. */ {true, true}, /* Prefer non short circuit. */ + &arm_default_vec_cost, /* Vectorizer costs. */ }; /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than @@ -1041,6 +1084,7 @@ const struct tune_params arm_v6m_tune = arm_default_branch_cost, false, /* Prefer LDRD/STRD. */ {false, false}, /* Prefer non short circuit. */ + &arm_default_vec_cost, /* Vectorizer costs. */ }; const struct tune_params arm_fa726te_tune = @@ -1054,6 +1098,7 @@ const struct tune_params arm_fa726te_tune = arm_default_branch_cost, false, /* Prefer LDRD/STRD. */ {true, true}, /* Prefer non short circuit. */ + &arm_default_vec_cost, /* Vectorizer costs. */ }; @@ -8696,6 +8741,94 @@ arm_memory_move_cost (enum machine_mode mode, reg_class_t rclass, } } +/* Vectorizer cost model implementation. */ + +/* Implement targetm.vectorize.builtin_vectorization_cost. */ +static int +arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost, + tree vectype, + int misalign ATTRIBUTE_UNUSED) +{ + unsigned elements; + + switch (type_of_cost) + { + case scalar_stmt: + return current_tune->vec_costs->scalar_stmt_cost; + + case scalar_load: + return current_tune->vec_costs->scalar_load_cost; + + case scalar_store: + return current_tune->vec_costs->scalar_store_cost; + + case vector_stmt: + return current_tune->vec_costs->vec_stmt_cost; + + case vector_load: + return current_tune->vec_costs->vec_align_load_cost; + + case vector_store: + return current_tune->vec_costs->vec_store_cost; + + case vec_to_scalar: + return current_tune->vec_costs->vec_to_scalar_cost; + + case scalar_to_vec: + return current_tune->vec_costs->scalar_to_vec_cost; + + case unaligned_load: + return current_tune->vec_costs->vec_unalign_load_cost; + + case unaligned_store: + return current_tune->vec_costs->vec_unalign_store_cost; + + case cond_branch_taken: + return current_tune->vec_costs->cond_taken_branch_cost; + + case cond_branch_not_taken: + return current_tune->vec_costs->cond_not_taken_branch_cost; + + case vec_perm: + case vec_promote_demote: + return current_tune->vec_costs->vec_stmt_cost; + + case vec_construct: + elements = TYPE_VECTOR_SUBPARTS (vectype); + return elements / 2 + 1; + + default: + gcc_unreachable (); + } +} + +/* Implement targetm.vectorize.add_stmt_cost. */ + +static unsigned +arm_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind, + struct _stmt_vec_info *stmt_info, int misalign, + enum vect_cost_model_location where) +{ + unsigned *cost = (unsigned *) data; + unsigned retval = 0; + + if (flag_vect_cost_model) + { + tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE; + int stmt_cost = arm_builtin_vectorization_cost (kind, vectype, misalign); + + /* Statements in an inner loop relative to the loop being + vectorized are weighted more heavily. The value here is + arbitrary and could potentially be improved with analysis. */ + if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info)) + count *= 50; /* FIXME. */ + + retval = (unsigned) (count * stmt_cost); + cost[where] += retval; + } + + return retval; +} /* Return true if and only if this insn can dual-issue only as older. */ static bool