From: Qian Jianhua Date: Wed, 13 Jan 2021 07:22:09 +0000 (+0800) Subject: [PATCH v2] aarch64: Add cpu cost tables for A64FX X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=3f325179b32c4defea58c703190f4d5fa55028bd;p=gcc.git [PATCH v2] aarch64: Add cpu cost tables for A64FX This patch add cost tables for A64FX. 2021-01-13 Qian jianhua gcc/ * config/aarch64/aarch64-cost-tables.h (a64fx_extra_costs): New. * config/aarch64/aarch64.c (a64fx_addrcost_table): New. (a64fx_regmove_cost, a64fx_vector_cost): New. (a64fx_tunings): Use the new added cost tables. --- diff --git a/gcc/config/aarch64/aarch64-cost-tables.h b/gcc/config/aarch64/aarch64-cost-tables.h index 3b8a16864bc..c309f88cbd5 100644 --- a/gcc/config/aarch64/aarch64-cost-tables.h +++ b/gcc/config/aarch64/aarch64-cost-tables.h @@ -541,4 +541,107 @@ const struct cpu_cost_table tsv110_extra_costs = } }; +const struct cpu_cost_table a64fx_extra_costs = +{ + /* ALU */ + { + 0, /* arith. */ + 0, /* logical. */ + 0, /* shift. */ + 0, /* shift_reg. */ + COSTS_N_INSNS (1), /* arith_shift. */ + COSTS_N_INSNS (1), /* arith_shift_reg. */ + COSTS_N_INSNS (1), /* log_shift. */ + COSTS_N_INSNS (1), /* log_shift_reg. */ + 0, /* extend. */ + COSTS_N_INSNS (1), /* extend_arith. */ + 0, /* bfi. */ + 0, /* bfx. */ + 0, /* clz. */ + 0, /* rev. */ + 0, /* non_exec. */ + true /* non_exec_costs_exec. */ + }, + { + /* MULT SImode */ + { + COSTS_N_INSNS (4), /* simple. */ + COSTS_N_INSNS (4), /* flag_setting. */ + COSTS_N_INSNS (4), /* extend. */ + COSTS_N_INSNS (5), /* add. */ + COSTS_N_INSNS (5), /* extend_add. */ + COSTS_N_INSNS (18) /* idiv. */ + }, + /* MULT DImode */ + { + COSTS_N_INSNS (4), /* simple. */ + 0, /* flag_setting (N/A). */ + COSTS_N_INSNS (4), /* extend. */ + COSTS_N_INSNS (5), /* add. */ + COSTS_N_INSNS (5), /* extend_add. */ + COSTS_N_INSNS (26) /* idiv. */ + } + }, + /* LD/ST */ + { + COSTS_N_INSNS (4), /* load. */ + COSTS_N_INSNS (4), /* load_sign_extend. */ + COSTS_N_INSNS (5), /* ldrd. */ + COSTS_N_INSNS (4), /* ldm_1st. */ + 1, /* ldm_regs_per_insn_1st. */ + 2, /* ldm_regs_per_insn_subsequent. */ + COSTS_N_INSNS (4), /* loadf. */ + COSTS_N_INSNS (4), /* loadd. */ + COSTS_N_INSNS (5), /* load_unaligned. */ + 0, /* store. */ + 0, /* strd. */ + 0, /* stm_1st. */ + 1, /* stm_regs_per_insn_1st. */ + 2, /* stm_regs_per_insn_subsequent. */ + 0, /* storef. */ + 0, /* stored. */ + 0, /* store_unaligned. */ + COSTS_N_INSNS (1), /* loadv. */ + COSTS_N_INSNS (1) /* storev. */ + }, + { + /* FP SFmode */ + { + COSTS_N_INSNS (6), /* div. */ + COSTS_N_INSNS (1), /* mult. */ + COSTS_N_INSNS (1), /* mult_addsub. */ + COSTS_N_INSNS (2), /* fma. */ + COSTS_N_INSNS (1), /* addsub. */ + COSTS_N_INSNS (1), /* fpconst. */ + COSTS_N_INSNS (1), /* neg. */ + COSTS_N_INSNS (1), /* compare. */ + COSTS_N_INSNS (2), /* widen. */ + COSTS_N_INSNS (2), /* narrow. */ + COSTS_N_INSNS (2), /* toint. */ + COSTS_N_INSNS (2), /* fromint. */ + COSTS_N_INSNS (2) /* roundint. */ + }, + /* FP DFmode */ + { + COSTS_N_INSNS (11), /* div. */ + COSTS_N_INSNS (1), /* mult. */ + COSTS_N_INSNS (1), /* mult_addsub. */ + COSTS_N_INSNS (2), /* fma. */ + COSTS_N_INSNS (1), /* addsub. */ + COSTS_N_INSNS (1), /* fpconst. */ + COSTS_N_INSNS (1), /* neg. */ + COSTS_N_INSNS (1), /* compare. */ + COSTS_N_INSNS (2), /* widen. */ + COSTS_N_INSNS (2), /* narrow. */ + COSTS_N_INSNS (2), /* toint. */ + COSTS_N_INSNS (2), /* fromint. */ + COSTS_N_INSNS (2) /* roundint. */ + } + }, + /* Vector */ + { + COSTS_N_INSNS (1) /* alu. */ + } +}; + #endif diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index 88106bbcaf8..7536b75003b 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -464,6 +464,22 @@ static const struct cpu_addrcost_table qdf24xx_addrcost_table = 2, /* imm_offset */ }; +static const struct cpu_addrcost_table a64fx_addrcost_table = +{ + { + 1, /* hi */ + 1, /* si */ + 1, /* di */ + 2, /* ti */ + }, + 0, /* pre_modify */ + 0, /* post_modify */ + 2, /* register_offset */ + 3, /* register_sextend */ + 3, /* register_zextend */ + 0, /* imm_offset */ +}; + static const struct cpu_regmove_cost generic_regmove_cost = { 1, /* GP2GP */ @@ -559,6 +575,16 @@ static const struct cpu_regmove_cost tsv110_regmove_cost = 2 /* FP2FP */ }; +static const struct cpu_regmove_cost a64fx_regmove_cost = +{ + 1, /* GP2GP */ + /* Avoid the use of slow int<->fp moves for spilling by setting + their cost higher than memmov_cost. */ + 5, /* GP2FP */ + 7, /* FP2GP */ + 2 /* FP2FP */ +}; + /* Generic costs for Advanced SIMD vector operations. */ static const advsimd_vec_cost generic_advsimd_vector_cost = { @@ -600,6 +626,44 @@ static const struct cpu_vector_cost generic_vector_cost = &generic_sve_vector_cost /* sve */ }; +static const advsimd_vec_cost a64fx_advsimd_vector_cost = +{ + 2, /* int_stmt_cost */ + 5, /* fp_stmt_cost */ + 3, /* permute_cost */ + 13, /* vec_to_scalar_cost */ + 4, /* scalar_to_vec_cost */ + 6, /* align_load_cost */ + 6, /* unalign_load_cost */ + 1, /* unalign_store_cost */ + 1 /* store_cost */ +}; + +static const sve_vec_cost a64fx_sve_vector_cost = +{ + 2, /* int_stmt_cost */ + 5, /* fp_stmt_cost */ + 3, /* permute_cost */ + 13, /* vec_to_scalar_cost */ + 4, /* scalar_to_vec_cost */ + 6, /* align_load_cost */ + 6, /* unalign_load_cost */ + 1, /* unalign_store_cost */ + 1 /* store_cost */ +}; + +static const struct cpu_vector_cost a64fx_vector_cost = +{ + 1, /* scalar_int_stmt_cost */ + 5, /* scalar_fp_stmt_cost */ + 4, /* scalar_load_cost */ + 1, /* scalar_store_cost */ + 3, /* cond_taken_branch_cost */ + 1, /* cond_not_taken_branch_cost */ + &a64fx_advsimd_vector_cost, /* advsimd */ + &a64fx_sve_vector_cost /* sve */ +}; + static const advsimd_vec_cost qdf24xx_advsimd_vector_cost = { 1, /* int_stmt_cost */ @@ -1460,10 +1524,10 @@ static const struct tune_params neoversen2_tunings = static const struct tune_params a64fx_tunings = { - &generic_extra_costs, - &generic_addrcost_table, - &generic_regmove_cost, - &generic_vector_cost, + &a64fx_extra_costs, + &a64fx_addrcost_table, + &a64fx_regmove_cost, + &a64fx_vector_cost, &generic_branch_cost, &generic_approx_modes, SVE_512, /* sve_width */