From 5c5a67e61bb8b97f46ec6581a7d187d89ffc2d88 Mon Sep 17 00:00:00 2001 From: Wilco Dijkstra Date: Thu, 19 Nov 2020 16:14:11 +0000 Subject: [PATCH] AArch64: Add cost table for Cortex-A76 Add an initial cost table for Cortex-A76 - this is copied from cotexa57_extra_costs but updated based on the Optimization Guide. Use the new cost table on all Neoverse tunings and ensure the tunings are consistent for all. As a result more compact code is generated with more combined shift+alu operations. Eg. -mcpu=cortex-a76 will now merge the shifts in: int f(int x, int y) { return (x & y << 3) * (x | y << 3); } and w2, w0, w1, lsl 3 orr w0, w0, w1, lsl 3 mul w0, w2, w0 ret SPEC2017 codesize improves by 0.02% and SPECINT2017 shows 0.24% gain. 2020-11-18 Wilco Dijkstra gcc/ * config/aarch64/aarch64.c (neoversen1_tunings): Use new cortexa76_extra_costs. (neoversev1_tunings): Likewise. (neoversen2_tunines): Likewise. * config/arm/aarch-cost-tables.h (cortexa76_extra_costs): add new costs. --- gcc/config/aarch64/aarch64.c | 6 +- gcc/config/arm/aarch-cost-tables.h | 103 +++++++++++++++++++++++++++++ 2 files changed, 106 insertions(+), 3 deletions(-) diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index 1cf4900eda5..67c25878779 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -1312,7 +1312,7 @@ static const struct tune_params thunderx3t110_tunings = static const struct tune_params neoversen1_tunings = { - &cortexa57_extra_costs, + &cortexa76_extra_costs, &generic_addrcost_table, &generic_regmove_cost, &cortexa57_vector_cost, @@ -1338,7 +1338,7 @@ static const struct tune_params neoversen1_tunings = static const struct tune_params neoversev1_tunings = { - &cortexa57_extra_costs, + &cortexa76_extra_costs, &generic_addrcost_table, &generic_regmove_cost, &cortexa57_vector_cost, @@ -1364,7 +1364,7 @@ static const struct tune_params neoversev1_tunings = static const struct tune_params neoversen2_tunings = { - &cortexa57_extra_costs, + &cortexa76_extra_costs, &generic_addrcost_table, &generic_regmove_cost, &cortexa57_vector_cost, diff --git a/gcc/config/arm/aarch-cost-tables.h b/gcc/config/arm/aarch-cost-tables.h index cf818659901..1b9d53d07b5 100644 --- a/gcc/config/arm/aarch-cost-tables.h +++ b/gcc/config/arm/aarch-cost-tables.h @@ -331,6 +331,109 @@ const struct cpu_cost_table cortexa57_extra_costs = } }; +const struct cpu_cost_table cortexa76_extra_costs = +{ + /* ALU */ + { + 0, /* arith. */ + 0, /* logical. */ + 0, /* shift. */ + 0, /* shift_reg. */ + COSTS_N_INSNS (1), /* arith_shift. */ + COSTS_N_INSNS (1), /* arith_shift_reg. */ + 0, /* log_shift. */ + COSTS_N_INSNS (1), /* log_shift_reg. */ + 0, /* extend. */ + COSTS_N_INSNS (1), /* extend_arith. */ + COSTS_N_INSNS (1), /* bfi. */ + 0, /* bfx. */ + 0, /* clz. */ + 0, /* rev. */ + 0, /* non_exec. */ + true /* non_exec_costs_exec. */ + }, + { + /* MULT SImode */ + { + COSTS_N_INSNS (1), /* simple. */ + COSTS_N_INSNS (2), /* flag_setting. */ + COSTS_N_INSNS (1), /* extend. */ + COSTS_N_INSNS (1), /* add. */ + COSTS_N_INSNS (1), /* extend_add. */ + COSTS_N_INSNS (6) /* idiv. */ + }, + /* MULT DImode */ + { + COSTS_N_INSNS (3), /* simple. */ + 0, /* flag_setting (N/A). */ + COSTS_N_INSNS (1), /* extend. */ + COSTS_N_INSNS (3), /* add. */ + COSTS_N_INSNS (1), /* extend_add. */ + COSTS_N_INSNS (10) /* idiv. */ + } + }, + /* LD/ST */ + { + COSTS_N_INSNS (3), /* load. */ + COSTS_N_INSNS (3), /* load_sign_extend. */ + COSTS_N_INSNS (3), /* ldrd. */ + COSTS_N_INSNS (2), /* ldm_1st. */ + 1, /* ldm_regs_per_insn_1st. */ + 2, /* ldm_regs_per_insn_subsequent. */ + COSTS_N_INSNS (4), /* loadf. */ + COSTS_N_INSNS (4), /* loadd. */ + COSTS_N_INSNS (5), /* load_unaligned. */ + 0, /* store. */ + 0, /* strd. */ + 0, /* stm_1st. */ + 1, /* stm_regs_per_insn_1st. */ + 2, /* stm_regs_per_insn_subsequent. */ + 0, /* storef. */ + 0, /* stored. */ + COSTS_N_INSNS (1), /* store_unaligned. */ + COSTS_N_INSNS (1), /* loadv. */ + COSTS_N_INSNS (1) /* storev. */ + }, + { + /* FP SFmode */ + { + COSTS_N_INSNS (10), /* div. */ + COSTS_N_INSNS (2), /* mult. */ + COSTS_N_INSNS (3), /* mult_addsub. */ + COSTS_N_INSNS (3), /* fma. */ + COSTS_N_INSNS (1), /* addsub. */ + 0, /* fpconst. */ + 0, /* neg. */ + 0, /* compare. */ + COSTS_N_INSNS (1), /* widen. */ + COSTS_N_INSNS (1), /* narrow. */ + COSTS_N_INSNS (1), /* toint. */ + COSTS_N_INSNS (1), /* fromint. */ + COSTS_N_INSNS (1) /* roundint. */ + }, + /* FP DFmode */ + { + COSTS_N_INSNS (15), /* div. */ + COSTS_N_INSNS (2), /* mult. */ + COSTS_N_INSNS (3), /* mult_addsub. */ + COSTS_N_INSNS (3), /* fma. */ + COSTS_N_INSNS (1), /* addsub. */ + 0, /* fpconst. */ + 0, /* neg. */ + 0, /* compare. */ + COSTS_N_INSNS (1), /* widen. */ + COSTS_N_INSNS (1), /* narrow. */ + COSTS_N_INSNS (1), /* toint. */ + COSTS_N_INSNS (1), /* fromint. */ + COSTS_N_INSNS (1) /* roundint. */ + } + }, + /* Vector */ + { + COSTS_N_INSNS (1) /* alu. */ + } +}; + const struct cpu_cost_table exynosm1_extra_costs = { /* ALU */ -- 2.30.2