From b6875aac2b5a0e5e1b284e4934b0f12e8a2afdd0 Mon Sep 17 00:00:00 2001 From: Kugan Vivekanandarajah Date: Wed, 20 May 2015 03:05:10 +0000 Subject: [PATCH] aarch-common-protos.h (struct mem_cost_table): Added new fields loadv and storev. gcc/ChangeLog: 2015-05-20 Kugan Vivekanandarajah Jim Wilson * config/arm/aarch-common-protos.h (struct mem_cost_table): Added new fields loadv and storev. * config/aarch64/aarch64-cost-tables.h (thunderx_extra_costs): Initialize loadv and storev. * config/arm/aarch-cost-tables.h (generic_extra_costs): Likewise. (cortexa53_extra_costs): Likewise. (cortexa57_extra_costs): Likewise. (xgene1_extra_costs): Likewise. * config/aarch64/aarch64.c (aarch64_rtx_costs): Update vector rtx_costs. 2015-05-20 Kugan Vivekanandarajah * config/arm/arm.c (cortexa9_extra_costs): Initialize loadv and storev. (cortexa8_extra_costs): Likewise. (cortexa5_extra_costs): Likewise. (cortexa7_extra_costs): Likewise. (cortexa12_extra_costs): Likewise. (cortexa15_extra_costs): Likewise. (v7m_extra_costs): Likewise. Co-Authored-By: Jim Wilson From-SVN: r223432 --- gcc/ChangeLog | 25 ++ gcc/config/aarch64/aarch64-cost-tables.h | 4 +- gcc/config/aarch64/aarch64.c | 279 ++++++++++++++++++----- gcc/config/arm/aarch-common-protos.h | 2 + gcc/config/arm/aarch-cost-tables.h | 14 +- gcc/config/arm/arm.c | 28 ++- 6 files changed, 280 insertions(+), 72 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 95114fa00a4..4c8eb3da190 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,28 @@ +2015-05-20 Kugan Vivekanandarajah + Jim Wilson + + * config/arm/aarch-common-protos.h (struct mem_cost_table): Added + new fields loadv and storev. + * config/aarch64/aarch64-cost-tables.h (thunderx_extra_costs): + Initialize loadv and storev. + * config/arm/aarch-cost-tables.h (generic_extra_costs): Likewise. + (cortexa53_extra_costs): Likewise. + (cortexa57_extra_costs): Likewise. + (xgene1_extra_costs): Likewise. + * config/aarch64/aarch64.c (aarch64_rtx_costs): Update vector + rtx_costs. + +2015-05-20 Kugan Vivekanandarajah + + * config/arm/arm.c (cortexa9_extra_costs): Initialize loadv and + storev. + (cortexa8_extra_costs): Likewise. + (cortexa5_extra_costs): Likewise. + (cortexa7_extra_costs): Likewise. + (cortexa12_extra_costs): Likewise. + (cortexa15_extra_costs): Likewise. + (v7m_extra_costs): Likewise. + 2015-05-20 Jeff Law * tree-ssa-threadupdate.c (thread_single_edge): Use delete_jump_thread diff --git a/gcc/config/aarch64/aarch64-cost-tables.h b/gcc/config/aarch64/aarch64-cost-tables.h index ae2b547608a..939125c0e88 100644 --- a/gcc/config/aarch64/aarch64-cost-tables.h +++ b/gcc/config/aarch64/aarch64-cost-tables.h @@ -83,7 +83,9 @@ const struct cpu_cost_table thunderx_extra_costs = 0, /* N/A: Stm_regs_per_insn_subsequent. */ 0, /* Storef. */ 0, /* Stored. */ - COSTS_N_INSNS (1) /* Store_unaligned. */ + COSTS_N_INSNS (1), /* Store_unaligned. */ + COSTS_N_INSNS (1), /* Loadv. */ + COSTS_N_INSNS (1) /* Storev. */ }, { /* FP SFmode */ diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index 7f0cc0dad1a..6f71e6649d6 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -5617,16 +5617,6 @@ aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED, above this default. */ *cost = COSTS_N_INSNS (1); - /* TODO: The cost infrastructure currently does not handle - vector operations. Assume that all vector operations - are equally expensive. */ - if (VECTOR_MODE_P (mode)) - { - if (speed) - *cost += extra_cost->vect.alu; - return true; - } - switch (code) { case SET: @@ -5641,7 +5631,9 @@ aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED, if (speed) { rtx address = XEXP (op0, 0); - if (GET_MODE_CLASS (mode) == MODE_INT) + if (VECTOR_MODE_P (mode)) + *cost += extra_cost->ldst.storev; + else if (GET_MODE_CLASS (mode) == MODE_INT) *cost += extra_cost->ldst.store; else if (mode == SFmode) *cost += extra_cost->ldst.storef; @@ -5662,15 +5654,22 @@ aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED, /* Fall through. */ case REG: + /* The cost is one per vector-register copied. */ + if (VECTOR_MODE_P (GET_MODE (op0)) && REG_P (op1)) + { + int n_minus_1 = (GET_MODE_SIZE (GET_MODE (op0)) - 1) + / GET_MODE_SIZE (V4SImode); + *cost = COSTS_N_INSNS (n_minus_1 + 1); + } /* const0_rtx is in general free, but we will use an instruction to set a register to 0. */ - if (REG_P (op1) || op1 == const0_rtx) - { - /* The cost is 1 per register copied. */ - int n_minus_1 = (GET_MODE_SIZE (GET_MODE (op0)) - 1) + else if (REG_P (op1) || op1 == const0_rtx) + { + /* The cost is 1 per register copied. */ + int n_minus_1 = (GET_MODE_SIZE (GET_MODE (op0)) - 1) / UNITS_PER_WORD; - *cost = COSTS_N_INSNS (n_minus_1 + 1); - } + *cost = COSTS_N_INSNS (n_minus_1 + 1); + } else /* Cost is just the cost of the RHS of the set. */ *cost += rtx_cost (op1, SET, 1, speed); @@ -5768,7 +5767,9 @@ aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED, approximation for the additional cost of the addressing mode. */ rtx address = XEXP (x, 0); - if (GET_MODE_CLASS (mode) == MODE_INT) + if (VECTOR_MODE_P (mode)) + *cost += extra_cost->ldst.loadv; + else if (GET_MODE_CLASS (mode) == MODE_INT) *cost += extra_cost->ldst.load; else if (mode == SFmode) *cost += extra_cost->ldst.loadf; @@ -5785,6 +5786,16 @@ aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED, case NEG: op0 = XEXP (x, 0); + if (VECTOR_MODE_P (mode)) + { + if (speed) + { + /* FNEG. */ + *cost += extra_cost->vect.alu; + } + return false; + } + if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT) { if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE @@ -5823,7 +5834,12 @@ aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED, case CLRSB: case CLZ: if (speed) - *cost += extra_cost->alu.clz; + { + if (VECTOR_MODE_P (mode)) + *cost += extra_cost->vect.alu; + else + *cost += extra_cost->alu.clz; + } return false; @@ -5909,6 +5925,20 @@ aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED, return false; } + if (VECTOR_MODE_P (mode)) + { + /* Vector compare. */ + if (speed) + *cost += extra_cost->vect.alu; + + if (aarch64_float_const_zero_rtx_p (op1)) + { + /* Vector cm (eq|ge|gt|lt|le) supports constant 0.0 for no extra + cost. */ + return true; + } + return false; + } return false; case MINUS: @@ -5961,12 +5991,21 @@ cost_minus: if (speed) { - if (GET_MODE_CLASS (mode) == MODE_INT) - /* SUB(S). */ - *cost += extra_cost->alu.arith; + if (VECTOR_MODE_P (mode)) + { + /* Vector SUB. */ + *cost += extra_cost->vect.alu; + } + else if (GET_MODE_CLASS (mode) == MODE_INT) + { + /* SUB(S). */ + *cost += extra_cost->alu.arith; + } else if (GET_MODE_CLASS (mode) == MODE_FLOAT) - /* FSUB. */ - *cost += extra_cost->fp[mode == DFmode].addsub; + { + /* FSUB. */ + *cost += extra_cost->fp[mode == DFmode].addsub; + } } return true; } @@ -6030,12 +6069,21 @@ cost_plus: if (speed) { - if (GET_MODE_CLASS (mode) == MODE_INT) - /* ADD. */ - *cost += extra_cost->alu.arith; + if (VECTOR_MODE_P (mode)) + { + /* Vector ADD. */ + *cost += extra_cost->vect.alu; + } + else if (GET_MODE_CLASS (mode) == MODE_INT) + { + /* ADD. */ + *cost += extra_cost->alu.arith; + } else if (GET_MODE_CLASS (mode) == MODE_FLOAT) - /* FADD. */ - *cost += extra_cost->fp[mode == DFmode].addsub; + { + /* FADD. */ + *cost += extra_cost->fp[mode == DFmode].addsub; + } } return true; } @@ -6044,8 +6092,12 @@ cost_plus: *cost = COSTS_N_INSNS (1); if (speed) - *cost += extra_cost->alu.rev; - + { + if (VECTOR_MODE_P (mode)) + *cost += extra_cost->vect.alu; + else + *cost += extra_cost->alu.rev; + } return false; case IOR: @@ -6053,10 +6105,14 @@ cost_plus: { *cost = COSTS_N_INSNS (1); - if (speed) - *cost += extra_cost->alu.rev; - - return true; + if (speed) + { + if (VECTOR_MODE_P (mode)) + *cost += extra_cost->vect.alu; + else + *cost += extra_cost->alu.rev; + } + return true; } if (aarch64_extr_rtx_p (x, &op0, &op1)) @@ -6075,6 +6131,13 @@ cost_plus: op0 = XEXP (x, 0); op1 = XEXP (x, 1); + if (VECTOR_MODE_P (mode)) + { + if (speed) + *cost += extra_cost->vect.alu; + return true; + } + if (code == AND && GET_CODE (op0) == MULT && CONST_INT_P (XEXP (op0, 1)) @@ -6143,6 +6206,13 @@ cost_plus: x = XEXP (x, 0); op0 = aarch64_strip_shift (x); + if (VECTOR_MODE_P (mode)) + { + /* Vector NOT. */ + *cost += extra_cost->vect.alu; + return false; + } + /* MVN-shifted-reg. */ if (op0 != x) { @@ -6214,10 +6284,19 @@ cost_plus: return true; } - /* UXTB/UXTH. */ if (speed) - *cost += extra_cost->alu.extend; - + { + if (VECTOR_MODE_P (mode)) + { + /* UMOV. */ + *cost += extra_cost->vect.alu; + } + else + { + /* UXTB/UXTH. */ + *cost += extra_cost->alu.extend; + } + } return false; case SIGN_EXTEND: @@ -6237,7 +6316,12 @@ cost_plus: } if (speed) - *cost += extra_cost->alu.extend; + { + if (VECTOR_MODE_P (mode)) + *cost += extra_cost->vect.alu; + else + *cost += extra_cost->alu.extend; + } return false; case ASHIFT: @@ -6246,10 +6330,20 @@ cost_plus: if (CONST_INT_P (op1)) { - /* LSL (immediate), UBMF, UBFIZ and friends. These are all - aliases. */ if (speed) - *cost += extra_cost->alu.shift; + { + if (VECTOR_MODE_P (mode)) + { + /* Vector shift (immediate). */ + *cost += extra_cost->vect.alu; + } + else + { + /* LSL (immediate), UBMF, UBFIZ and friends. These are all + aliases. */ + *cost += extra_cost->alu.shift; + } + } /* We can incorporate zero/sign extend for free. */ if (GET_CODE (op0) == ZERO_EXTEND @@ -6261,10 +6355,19 @@ cost_plus: } else { - /* LSLV. */ if (speed) - *cost += extra_cost->alu.shift_reg; - + { + if (VECTOR_MODE_P (mode)) + { + /* Vector shift (register). */ + *cost += extra_cost->vect.alu; + } + else + { + /* LSLV. */ + *cost += extra_cost->alu.shift_reg; + } + } return false; /* All arguments need to be in registers. */ } @@ -6279,7 +6382,12 @@ cost_plus: { /* ASR (immediate) and friends. */ if (speed) - *cost += extra_cost->alu.shift; + { + if (VECTOR_MODE_P (mode)) + *cost += extra_cost->vect.alu; + else + *cost += extra_cost->alu.shift; + } *cost += rtx_cost (op0, (enum rtx_code) code, 0, speed); return true; @@ -6289,8 +6397,12 @@ cost_plus: /* ASR (register) and friends. */ if (speed) - *cost += extra_cost->alu.shift_reg; - + { + if (VECTOR_MODE_P (mode)) + *cost += extra_cost->vect.alu; + else + *cost += extra_cost->alu.shift_reg; + } return false; /* All arguments need to be in registers. */ } @@ -6338,7 +6450,12 @@ cost_plus: case SIGN_EXTRACT: /* UBFX/SBFX. */ if (speed) - *cost += extra_cost->alu.bfx; + { + if (VECTOR_MODE_P (mode)) + *cost += extra_cost->vect.alu; + else + *cost += extra_cost->alu.bfx; + } /* We can trust that the immediates used will be correct (there are no by-register forms), so we need only cost op0. */ @@ -6355,7 +6472,9 @@ cost_plus: case UMOD: if (speed) { - if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT) + if (VECTOR_MODE_P (mode)) + *cost += extra_cost->vect.alu; + else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT) *cost += (extra_cost->mult[GET_MODE (x) == DImode].add + extra_cost->mult[GET_MODE (x) == DImode].idiv); else if (GET_MODE (x) == DFmode) @@ -6372,7 +6491,9 @@ cost_plus: case SQRT: if (speed) { - if (GET_MODE_CLASS (mode) == MODE_INT) + if (VECTOR_MODE_P (mode)) + *cost += extra_cost->vect.alu; + else if (GET_MODE_CLASS (mode) == MODE_INT) /* There is no integer SQRT, so only DIV and UDIV can get here. */ *cost += extra_cost->mult[mode == DImode].idiv; @@ -6404,7 +6525,12 @@ cost_plus: op2 = XEXP (x, 2); if (speed) - *cost += extra_cost->fp[mode == DFmode].fma; + { + if (VECTOR_MODE_P (mode)) + *cost += extra_cost->vect.alu; + else + *cost += extra_cost->fp[mode == DFmode].fma; + } /* FMSUB, FNMADD, and FNMSUB are free. */ if (GET_CODE (op0) == NEG) @@ -6450,12 +6576,28 @@ cost_plus: case FLOAT_EXTEND: if (speed) - *cost += extra_cost->fp[mode == DFmode].widen; + { + if (VECTOR_MODE_P (mode)) + { + /*Vector truncate. */ + *cost += extra_cost->vect.alu; + } + else + *cost += extra_cost->fp[mode == DFmode].widen; + } return false; case FLOAT_TRUNCATE: if (speed) - *cost += extra_cost->fp[mode == DFmode].narrow; + { + if (VECTOR_MODE_P (mode)) + { + /*Vector conversion. */ + *cost += extra_cost->vect.alu; + } + else + *cost += extra_cost->fp[mode == DFmode].narrow; + } return false; case FIX: @@ -6476,13 +6618,23 @@ cost_plus: } if (speed) - *cost += extra_cost->fp[GET_MODE (x) == DFmode].toint; - + { + if (VECTOR_MODE_P (mode)) + *cost += extra_cost->vect.alu; + else + *cost += extra_cost->fp[GET_MODE (x) == DFmode].toint; + } *cost += rtx_cost (x, (enum rtx_code) code, 0, speed); return true; case ABS: - if (GET_MODE_CLASS (mode) == MODE_FLOAT) + if (VECTOR_MODE_P (mode)) + { + /* ABS (vector). */ + if (speed) + *cost += extra_cost->vect.alu; + } + else if (GET_MODE_CLASS (mode) == MODE_FLOAT) { op0 = XEXP (x, 0); @@ -6515,10 +6667,15 @@ cost_plus: case SMIN: if (speed) { - /* FMAXNM/FMINNM/FMAX/FMIN. - TODO: This may not be accurate for all implementations, but - we do not model this in the cost tables. */ - *cost += extra_cost->fp[mode == DFmode].addsub; + if (VECTOR_MODE_P (mode)) + *cost += extra_cost->vect.alu; + else + { + /* FMAXNM/FMINNM/FMAX/FMIN. + TODO: This may not be accurate for all implementations, but + we do not model this in the cost tables. */ + *cost += extra_cost->fp[mode == DFmode].addsub; + } } return false; diff --git a/gcc/config/arm/aarch-common-protos.h b/gcc/config/arm/aarch-common-protos.h index 3ee7ebfd43b..29f7c993922 100644 --- a/gcc/config/arm/aarch-common-protos.h +++ b/gcc/config/arm/aarch-common-protos.h @@ -102,6 +102,8 @@ struct mem_cost_table const int storef; /* SFmode. */ const int stored; /* DFmode. */ const int store_unaligned; /* Extra for unaligned stores. */ + const int loadv; /* Vector load. */ + const int storev; /* Vector store. */ }; struct fp_cost_table diff --git a/gcc/config/arm/aarch-cost-tables.h b/gcc/config/arm/aarch-cost-tables.h index 6bb8edea3ff..66e09a8b5db 100644 --- a/gcc/config/arm/aarch-cost-tables.h +++ b/gcc/config/arm/aarch-cost-tables.h @@ -81,7 +81,9 @@ const struct cpu_cost_table generic_extra_costs = 1, /* stm_regs_per_insn_subsequent. */ COSTS_N_INSNS (2), /* storef. */ COSTS_N_INSNS (3), /* stored. */ - COSTS_N_INSNS (1) /* store_unaligned. */ + COSTS_N_INSNS (1), /* store_unaligned. */ + COSTS_N_INSNS (1), /* loadv. */ + COSTS_N_INSNS (1) /* storev. */ }, { /* FP SFmode */ @@ -182,7 +184,9 @@ const struct cpu_cost_table cortexa53_extra_costs = 2, /* stm_regs_per_insn_subsequent. */ 0, /* storef. */ 0, /* stored. */ - COSTS_N_INSNS (1) /* store_unaligned. */ + COSTS_N_INSNS (1), /* store_unaligned. */ + COSTS_N_INSNS (1), /* loadv. */ + COSTS_N_INSNS (1) /* storev. */ }, { /* FP SFmode */ @@ -283,7 +287,9 @@ const struct cpu_cost_table cortexa57_extra_costs = 2, /* stm_regs_per_insn_subsequent. */ 0, /* storef. */ 0, /* stored. */ - COSTS_N_INSNS (1) /* store_unaligned. */ + COSTS_N_INSNS (1), /* store_unaligned. */ + COSTS_N_INSNS (1), /* loadv. */ + COSTS_N_INSNS (1) /* storev. */ }, { /* FP SFmode */ @@ -385,6 +391,8 @@ const struct cpu_cost_table xgene1_extra_costs = 0, /* storef. */ 0, /* stored. */ 0, /* store_unaligned. */ + COSTS_N_INSNS (1), /* loadv. */ + COSTS_N_INSNS (1) /* storev. */ }, { /* FP SFmode */ diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index 19b73853eee..6d8d060fc63 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -1024,7 +1024,9 @@ const struct cpu_cost_table cortexa9_extra_costs = 2, /* stm_regs_per_insn_subsequent. */ COSTS_N_INSNS (1), /* storef. */ COSTS_N_INSNS (1), /* stored. */ - COSTS_N_INSNS (1) /* store_unaligned. */ + COSTS_N_INSNS (1), /* store_unaligned. */ + COSTS_N_INSNS (1), /* loadv. */ + COSTS_N_INSNS (1) /* storev. */ }, { /* FP SFmode */ @@ -1125,7 +1127,9 @@ const struct cpu_cost_table cortexa8_extra_costs = 2, /* stm_regs_per_insn_subsequent. */ COSTS_N_INSNS (1), /* storef. */ COSTS_N_INSNS (1), /* stored. */ - COSTS_N_INSNS (1) /* store_unaligned. */ + COSTS_N_INSNS (1), /* store_unaligned. */ + COSTS_N_INSNS (1), /* loadv. */ + COSTS_N_INSNS (1) /* storev. */ }, { /* FP SFmode */ @@ -1227,7 +1231,9 @@ const struct cpu_cost_table cortexa5_extra_costs = 2, /* stm_regs_per_insn_subsequent. */ COSTS_N_INSNS (2), /* storef. */ COSTS_N_INSNS (2), /* stored. */ - COSTS_N_INSNS (1) /* store_unaligned. */ + COSTS_N_INSNS (1), /* store_unaligned. */ + COSTS_N_INSNS (1), /* loadv. */ + COSTS_N_INSNS (1) /* storev. */ }, { /* FP SFmode */ @@ -1330,7 +1336,9 @@ const struct cpu_cost_table cortexa7_extra_costs = 2, /* stm_regs_per_insn_subsequent. */ COSTS_N_INSNS (2), /* storef. */ COSTS_N_INSNS (2), /* stored. */ - COSTS_N_INSNS (1) /* store_unaligned. */ + COSTS_N_INSNS (1), /* store_unaligned. */ + COSTS_N_INSNS (1), /* loadv. */ + COSTS_N_INSNS (1) /* storev. */ }, { /* FP SFmode */ @@ -1431,7 +1439,9 @@ const struct cpu_cost_table cortexa12_extra_costs = 2, /* stm_regs_per_insn_subsequent. */ COSTS_N_INSNS (2), /* storef. */ COSTS_N_INSNS (2), /* stored. */ - 0 /* store_unaligned. */ + 0, /* store_unaligned. */ + COSTS_N_INSNS (1), /* loadv. */ + COSTS_N_INSNS (1) /* storev. */ }, { /* FP SFmode */ @@ -1532,7 +1542,9 @@ const struct cpu_cost_table cortexa15_extra_costs = 2, /* stm_regs_per_insn_subsequent. */ 0, /* storef. */ 0, /* stored. */ - 0 /* store_unaligned. */ + 0, /* store_unaligned. */ + COSTS_N_INSNS (1), /* loadv. */ + COSTS_N_INSNS (1) /* storev. */ }, { /* FP SFmode */ @@ -1633,7 +1645,9 @@ const struct cpu_cost_table v7m_extra_costs = 1, /* stm_regs_per_insn_subsequent. */ COSTS_N_INSNS (2), /* storef. */ COSTS_N_INSNS (3), /* stored. */ - COSTS_N_INSNS (1) /* store_unaligned. */ + COSTS_N_INSNS (1), /* store_unaligned. */ + COSTS_N_INSNS (1), /* loadv. */ + COSTS_N_INSNS (1) /* storev. */ }, { /* FP SFmode */ -- 2.30.2