From: Jan Hubicka Date: Sun, 15 Oct 2017 15:58:42 +0000 (+0200) Subject: i386.c (ix86_rtx_costs): Make difference between x87 and SSE operations. X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=6065f4446c9019840d15e1a5429415f8a0a0d9d4;p=gcc.git i386.c (ix86_rtx_costs): Make difference between x87 and SSE operations. * i386.c (ix86_rtx_costs): Make difference between x87 and SSE operations. * i386.h (struct processor_costs): Add addss, mulss, mulsd, divss, divsd, sqrtss and sqrtsd * x86-tune-costs.h: Add new entries to all costs. (znver1_cost): Fix to match real instruction latencies. From-SVN: r253769 --- diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 8509b499485..4a582bd9445 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,12 @@ +2017-10-14 Jan Hubicka + + * i386.c (ix86_rtx_costs): Make difference between x87 and SSE + operations. + * i386.h (struct processor_costs): Add addss, mulss, mulsd, divss, + divsd, sqrtss and sqrtsd + * x86-tune-costs.h: Add new entries to all costs. + (znver1_cost): Fix to match real instruction latencies. + 2017-10-14 Kyrylo Tkachov Michael Collison diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index d7482bc9a67..28462c0fc9a 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -38812,6 +38812,9 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno, enum rtx_code outer_code = (enum rtx_code) outer_code_i; const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost; int src_cost; + machine_mode inner_mode = mode; + if (VECTOR_MODE_P (mode)) + inner_mode = GET_MODE_INNER (mode); switch (code) { @@ -39012,7 +39015,7 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno, /* ??? SSE scalar/vector cost should be used here. */ /* ??? Bald assumption that fma has the same cost as fmul. */ - *total = cost->fmul; + *total = mode == SFmode ? cost->mulss : cost->mulsd; *total += rtx_cost (XEXP (x, 1), mode, FMA, 1, speed); /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */ @@ -39031,8 +39034,7 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno, case MULT: if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) { - /* ??? SSE scalar cost should be used here. */ - *total = cost->fmul; + *total = inner_mode == DFmode ? cost->mulsd : cost->mulss; return false; } else if (X87_FLOAT_MODE_P (mode)) @@ -39043,7 +39045,7 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno, else if (FLOAT_MODE_P (mode)) { /* ??? SSE vector cost should be used here. */ - *total = cost->fmul; + *total = inner_mode == DFmode ? cost->mulsd : cost->mulss; return false; } else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT) @@ -39071,7 +39073,7 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno, else if (mode == V4SImode && !(TARGET_SSE4_1 || TARGET_AVX)) *total = cost->fmul * 2 + cost->fabs * 5; else - *total = cost->fmul; + *total = inner_mode == DFmode ? cost->mulsd : cost->mulss; return false; } else @@ -39125,13 +39127,12 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno, case MOD: case UMOD: if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) - /* ??? SSE cost should be used here. */ - *total = cost->fdiv; + *total = inner_mode == DFmode ? cost->divsd : cost->divss; else if (X87_FLOAT_MODE_P (mode)) *total = cost->fdiv; else if (FLOAT_MODE_P (mode)) /* ??? SSE vector cost should be used here. */ - *total = cost->fdiv; + *total = inner_mode == DFmode ? cost->divsd : cost->divss; else *total = cost->divide[MODE_INDEX (mode)]; return false; @@ -39210,8 +39211,7 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno, if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) { - /* ??? SSE cost should be used here. */ - *total = cost->fadd; + *total = cost->addss; return false; } else if (X87_FLOAT_MODE_P (mode)) @@ -39221,8 +39221,8 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno, } else if (FLOAT_MODE_P (mode)) { - /* ??? SSE vector cost should be used here. */ - *total = cost->fadd; + /* We should account if registers are split. */ + *total = cost->addss; return false; } /* FALLTHRU */ @@ -39317,13 +39317,12 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno, case SQRT: if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) - /* ??? SSE cost should be used here. */ - *total = cost->fsqrt; + *total = mode == SFmode ? cost->sqrtss : cost->sqrtsd; else if (X87_FLOAT_MODE_P (mode)) *total = cost->fsqrt; else if (FLOAT_MODE_P (mode)) /* ??? SSE vector cost should be used here. */ - *total = cost->fsqrt; + *total = mode == SFmode ? cost->sqrtss : cost->sqrtsd; return false; case UNSPEC: diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index b2feded0464..a602650c332 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -257,6 +257,13 @@ struct processor_costs { const int fsqrt; /* cost of FSQRT instruction. */ /* Specify what algorithm to use for stringops on unknown size. */ + const int addss; /* cost of ADDSS/SD SUBSS/SD instructions. */ + const int mulss; /* cost of MULSS instructions. */ + const int mulsd; /* cost of MULSD instructions. */ + const int divss; /* cost of DIVSS instructions. */ + const int divsd; /* cost of DIVSD instructions. */ + const int sqrtss; /* cost of SQRTSS instructions. */ + const int sqrtsd; /* cost of SQRTSD instructions. */ const int reassoc_int, reassoc_fp, reassoc_vec_int, reassoc_vec_fp; /* Specify reassociation width for integer, fp, vector integer and vector fp diff --git a/gcc/config/i386/x86-tune-costs.h b/gcc/config/i386/x86-tune-costs.h index d27072c0901..1a5702f0d74 100644 --- a/gcc/config/i386/x86-tune-costs.h +++ b/gcc/config/i386/x86-tune-costs.h @@ -65,6 +65,14 @@ struct processor_costs ix86_size_cost = {/* costs for tuning for size */ COSTS_N_BYTES (2), /* cost of FABS instruction. */ COSTS_N_BYTES (2), /* cost of FCHS instruction. */ COSTS_N_BYTES (2), /* cost of FSQRT instruction. */ + + COSTS_N_BYTES (2), /* cost of ADDSS/SD SUBSS/SD insns. */ + COSTS_N_BYTES (2), /* cost of MULSS instruction. */ + COSTS_N_BYTES (2), /* cost of MULSD instruction. */ + COSTS_N_BYTES (2), /* cost of DIVSS instruction. */ + COSTS_N_BYTES (2), /* cost of DIVSD instruction. */ + COSTS_N_BYTES (2), /* cost of SQRTSS instruction. */ + COSTS_N_BYTES (2), /* cost of SQRTSD instruction. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ ix86_size_memcpy, ix86_size_memset, @@ -142,6 +150,14 @@ struct processor_costs i386_cost = { /* 386 specific costs */ COSTS_N_INSNS (22), /* cost of FABS instruction. */ COSTS_N_INSNS (24), /* cost of FCHS instruction. */ COSTS_N_INSNS (122), /* cost of FSQRT instruction. */ + + COSTS_N_INSNS (23), /* cost of ADDSS/SD SUBSS/SD insns. */ + COSTS_N_INSNS (27), /* cost of MULSS instruction. */ + COSTS_N_INSNS (27), /* cost of MULSD instruction. */ + COSTS_N_INSNS (88), /* cost of DIVSS instruction. */ + COSTS_N_INSNS (88), /* cost of DIVSD instruction. */ + COSTS_N_INSNS (122), /* cost of SQRTSS instruction. */ + COSTS_N_INSNS (122), /* cost of SQRTSD instruction. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ i386_memcpy, i386_memset, @@ -220,6 +236,14 @@ struct processor_costs i486_cost = { /* 486 specific costs */ COSTS_N_INSNS (3), /* cost of FABS instruction. */ COSTS_N_INSNS (3), /* cost of FCHS instruction. */ COSTS_N_INSNS (83), /* cost of FSQRT instruction. */ + + COSTS_N_INSNS (8), /* cost of ADDSS/SD SUBSS/SD insns. */ + COSTS_N_INSNS (16), /* cost of MULSS instruction. */ + COSTS_N_INSNS (16), /* cost of MULSD instruction. */ + COSTS_N_INSNS (73), /* cost of DIVSS instruction. */ + COSTS_N_INSNS (74), /* cost of DIVSD instruction. */ + COSTS_N_INSNS (83), /* cost of SQRTSS instruction. */ + COSTS_N_INSNS (83), /* cost of SQRTSD instruction. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ i486_memcpy, i486_memset, @@ -296,6 +320,14 @@ struct processor_costs pentium_cost = { COSTS_N_INSNS (1), /* cost of FABS instruction. */ COSTS_N_INSNS (1), /* cost of FCHS instruction. */ COSTS_N_INSNS (70), /* cost of FSQRT instruction. */ + + COSTS_N_INSNS (3), /* cost of ADDSS/SD SUBSS/SD insns. */ + COSTS_N_INSNS (3), /* cost of MULSS instruction. */ + COSTS_N_INSNS (3), /* cost of MULSD instruction. */ + COSTS_N_INSNS (39), /* cost of DIVSS instruction. */ + COSTS_N_INSNS (39), /* cost of DIVSD instruction. */ + COSTS_N_INSNS (70), /* cost of SQRTSS instruction. */ + COSTS_N_INSNS (70), /* cost of SQRTSD instruction. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ pentium_memcpy, pentium_memset, @@ -365,6 +397,14 @@ struct processor_costs lakemont_cost = { COSTS_N_INSNS (1), /* cost of FABS instruction. */ COSTS_N_INSNS (1), /* cost of FCHS instruction. */ COSTS_N_INSNS (70), /* cost of FSQRT instruction. */ + + COSTS_N_INSNS (5), /* cost of ADDSS/SD SUBSS/SD insns. */ + COSTS_N_INSNS (5), /* cost of MULSS instruction. */ + COSTS_N_INSNS (5), /* cost of MULSD instruction. */ + COSTS_N_INSNS (31), /* cost of DIVSS instruction. */ + COSTS_N_INSNS (60), /* cost of DIVSD instruction. */ + COSTS_N_INSNS (31), /* cost of SQRTSS instruction. */ + COSTS_N_INSNS (63), /* cost of SQRTSD instruction. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ pentium_memcpy, pentium_memset, @@ -449,6 +489,14 @@ struct processor_costs pentiumpro_cost = { COSTS_N_INSNS (2), /* cost of FABS instruction. */ COSTS_N_INSNS (2), /* cost of FCHS instruction. */ COSTS_N_INSNS (56), /* cost of FSQRT instruction. */ + + COSTS_N_INSNS (3), /* cost of ADDSS/SD SUBSS/SD insns. */ + COSTS_N_INSNS (4), /* cost of MULSS instruction. */ + COSTS_N_INSNS (4), /* cost of MULSD instruction. */ + COSTS_N_INSNS (18), /* cost of DIVSS instruction. */ + COSTS_N_INSNS (18), /* cost of DIVSD instruction. */ + COSTS_N_INSNS (31), /* cost of SQRTSS instruction. */ + COSTS_N_INSNS (31), /* cost of SQRTSD instruction. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ pentiumpro_memcpy, pentiumpro_memset, @@ -525,6 +573,14 @@ struct processor_costs geode_cost = { COSTS_N_INSNS (1), /* cost of FABS instruction. */ COSTS_N_INSNS (1), /* cost of FCHS instruction. */ COSTS_N_INSNS (54), /* cost of FSQRT instruction. */ + + COSTS_N_INSNS (6), /* cost of ADDSS/SD SUBSS/SD insns. */ + COSTS_N_INSNS (11), /* cost of MULSS instruction. */ + COSTS_N_INSNS (11), /* cost of MULSD instruction. */ + COSTS_N_INSNS (47), /* cost of DIVSS instruction. */ + COSTS_N_INSNS (47), /* cost of DIVSD instruction. */ + COSTS_N_INSNS (54), /* cost of SQRTSS instruction. */ + COSTS_N_INSNS (54), /* cost of SQRTSD instruction. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ geode_memcpy, geode_memset, @@ -603,6 +659,14 @@ struct processor_costs k6_cost = { COSTS_N_INSNS (2), /* cost of FABS instruction. */ COSTS_N_INSNS (2), /* cost of FCHS instruction. */ COSTS_N_INSNS (56), /* cost of FSQRT instruction. */ + + COSTS_N_INSNS (2), /* cost of ADDSS/SD SUBSS/SD insns. */ + COSTS_N_INSNS (2), /* cost of MULSS instruction. */ + COSTS_N_INSNS (2), /* cost of MULSD instruction. */ + COSTS_N_INSNS (56), /* cost of DIVSS instruction. */ + COSTS_N_INSNS (56), /* cost of DIVSD instruction. */ + COSTS_N_INSNS (56), /* cost of SQRTSS instruction. */ + COSTS_N_INSNS (56), /* cost of SQRTSD instruction. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ k6_memcpy, k6_memset, @@ -681,6 +745,15 @@ struct processor_costs athlon_cost = { COSTS_N_INSNS (2), /* cost of FABS instruction. */ COSTS_N_INSNS (2), /* cost of FCHS instruction. */ COSTS_N_INSNS (35), /* cost of FSQRT instruction. */ + + COSTS_N_INSNS (4), /* cost of ADDSS/SD SUBSS/SD insns. */ + COSTS_N_INSNS (4), /* cost of MULSS instruction. */ + COSTS_N_INSNS (4), /* cost of MULSD instruction. */ + /* 11-16 */ + COSTS_N_INSNS (16), /* cost of DIVSS instruction. */ + COSTS_N_INSNS (24), /* cost of DIVSD instruction. */ + COSTS_N_INSNS (19), /* cost of SQRTSS instruction. */ + COSTS_N_INSNS (19), /* cost of SQRTSD instruction. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ athlon_memcpy, athlon_memset, @@ -768,6 +841,15 @@ struct processor_costs k8_cost = { COSTS_N_INSNS (2), /* cost of FABS instruction. */ COSTS_N_INSNS (2), /* cost of FCHS instruction. */ COSTS_N_INSNS (35), /* cost of FSQRT instruction. */ + + COSTS_N_INSNS (4), /* cost of ADDSS/SD SUBSS/SD insns. */ + COSTS_N_INSNS (4), /* cost of MULSS instruction. */ + COSTS_N_INSNS (4), /* cost of MULSD instruction. */ + /* 11-16 */ + COSTS_N_INSNS (16), /* cost of DIVSS instruction. */ + COSTS_N_INSNS (20), /* cost of DIVSD instruction. */ + COSTS_N_INSNS (19), /* cost of SQRTSS instruction. */ + COSTS_N_INSNS (27), /* cost of SQRTSD instruction. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ k8_memcpy, k8_memset, @@ -862,6 +944,15 @@ struct processor_costs amdfam10_cost = { COSTS_N_INSNS (2), /* cost of FABS instruction. */ COSTS_N_INSNS (2), /* cost of FCHS instruction. */ COSTS_N_INSNS (35), /* cost of FSQRT instruction. */ + + COSTS_N_INSNS (4), /* cost of ADDSS/SD SUBSS/SD insns. */ + COSTS_N_INSNS (4), /* cost of MULSS instruction. */ + COSTS_N_INSNS (4), /* cost of MULSD instruction. */ + /* 11-16 */ + COSTS_N_INSNS (16), /* cost of DIVSS instruction. */ + COSTS_N_INSNS (20), /* cost of DIVSD instruction. */ + COSTS_N_INSNS (19), /* cost of SQRTSS instruction. */ + COSTS_N_INSNS (27), /* cost of SQRTSD instruction. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ amdfam10_memcpy, amdfam10_memset, @@ -957,6 +1048,16 @@ const struct processor_costs bdver1_cost = { COSTS_N_INSNS (2), /* cost of FABS instruction. */ COSTS_N_INSNS (2), /* cost of FCHS instruction. */ COSTS_N_INSNS (52), /* cost of FSQRT instruction. */ + + COSTS_N_INSNS (6), /* cost of ADDSS/SD SUBSS/SD insns. */ + COSTS_N_INSNS (6), /* cost of MULSS instruction. */ + COSTS_N_INSNS (6), /* cost of MULSD instruction. */ + /* 9-24 */ + COSTS_N_INSNS (24), /* cost of DIVSS instruction. */ + /* 9-27 */ + COSTS_N_INSNS (27), /* cost of DIVSD instruction. */ + COSTS_N_INSNS (15), /* cost of SQRTSS instruction. */ + COSTS_N_INSNS (26), /* cost of SQRTSD instruction. */ 1, 2, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ bdver1_memcpy, bdver1_memset, @@ -1053,6 +1154,16 @@ const struct processor_costs bdver2_cost = { COSTS_N_INSNS (2), /* cost of FABS instruction. */ COSTS_N_INSNS (2), /* cost of FCHS instruction. */ COSTS_N_INSNS (52), /* cost of FSQRT instruction. */ + + COSTS_N_INSNS (6), /* cost of ADDSS/SD SUBSS/SD insns. */ + COSTS_N_INSNS (6), /* cost of MULSS instruction. */ + COSTS_N_INSNS (6), /* cost of MULSD instruction. */ + /* 9-24 */ + COSTS_N_INSNS (24), /* cost of DIVSS instruction. */ + /* 9-27 */ + COSTS_N_INSNS (27), /* cost of DIVSD instruction. */ + COSTS_N_INSNS (15), /* cost of SQRTSS instruction. */ + COSTS_N_INSNS (26), /* cost of SQRTSD instruction. */ 1, 2, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ bdver2_memcpy, bdver2_memset, @@ -1140,6 +1251,16 @@ struct processor_costs bdver3_cost = { COSTS_N_INSNS (2), /* cost of FABS instruction. */ COSTS_N_INSNS (2), /* cost of FCHS instruction. */ COSTS_N_INSNS (52), /* cost of FSQRT instruction. */ + + COSTS_N_INSNS (6), /* cost of ADDSS/SD SUBSS/SD insns. */ + COSTS_N_INSNS (6), /* cost of MULSS instruction. */ + COSTS_N_INSNS (6), /* cost of MULSD instruction. */ + /* 9-24 */ + COSTS_N_INSNS (24), /* cost of DIVSS instruction. */ + /* 9-27 */ + COSTS_N_INSNS (27), /* cost of DIVSD instruction. */ + COSTS_N_INSNS (15), /* cost of SQRTSS instruction. */ + COSTS_N_INSNS (26), /* cost of SQRTSD instruction. */ 1, 2, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ bdver3_memcpy, bdver3_memset, @@ -1226,6 +1347,16 @@ struct processor_costs bdver4_cost = { COSTS_N_INSNS (2), /* cost of FABS instruction. */ COSTS_N_INSNS (2), /* cost of FCHS instruction. */ COSTS_N_INSNS (52), /* cost of FSQRT instruction. */ + + COSTS_N_INSNS (6), /* cost of ADDSS/SD SUBSS/SD insns. */ + COSTS_N_INSNS (6), /* cost of MULSS instruction. */ + COSTS_N_INSNS (6), /* cost of MULSD instruction. */ + /* 9-24 */ + COSTS_N_INSNS (24), /* cost of DIVSS instruction. */ + /* 9-27 */ + COSTS_N_INSNS (27), /* cost of DIVSD instruction. */ + COSTS_N_INSNS (15), /* cost of SQRTSS instruction. */ + COSTS_N_INSNS (26), /* cost of SQRTSD instruction. */ 1, 2, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ bdver4_memcpy, bdver4_memset, @@ -1264,15 +1395,17 @@ struct processor_costs znver1_cost = { {COSTS_N_INSNS (3), /* cost of starting multiply for QI. */ COSTS_N_INSNS (3), /* HI. */ COSTS_N_INSNS (3), /* SI. */ - COSTS_N_INSNS (4), /* DI. */ - COSTS_N_INSNS (4)}, /* other. */ + COSTS_N_INSNS (3), /* DI. */ + COSTS_N_INSNS (3)}, /* other. */ 0, /* cost of multiply per each bit set. */ - {COSTS_N_INSNS (19), /* cost of a divide/mod for QI. */ - COSTS_N_INSNS (35), /* HI. */ - COSTS_N_INSNS (51), /* SI. */ - COSTS_N_INSNS (83), /* DI. */ - COSTS_N_INSNS (83)}, /* other. */ + /* Depending on parameters, idiv can get faster on ryzen. This is upper + bound. */ + {COSTS_N_INSNS (16), /* cost of a divide/mod for QI. */ + COSTS_N_INSNS (22), /* HI. */ + COSTS_N_INSNS (30), /* SI. */ + COSTS_N_INSNS (45), /* DI. */ + COSTS_N_INSNS (45)}, /* other. */ COSTS_N_INSNS (1), /* cost of movsx. */ COSTS_N_INSNS (1), /* cost of movzx. */ 8, /* "large" insn. */ @@ -1310,12 +1443,23 @@ struct processor_costs znver1_cost = { time). */ 100, /* number of parallel prefetches. */ 3, /* Branch cost. */ - COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */ - COSTS_N_INSNS (6), /* cost of FMUL instruction. */ - COSTS_N_INSNS (42), /* cost of FDIV instruction. */ - COSTS_N_INSNS (2), /* cost of FABS instruction. */ - COSTS_N_INSNS (2), /* cost of FCHS instruction. */ - COSTS_N_INSNS (52), /* cost of FSQRT instruction. */ + COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */ + COSTS_N_INSNS (5), /* cost of FMUL instruction. */ + /* Latency of fdiv is 8-15. */ + COSTS_N_INSNS (15), /* cost of FDIV instruction. */ + COSTS_N_INSNS (1), /* cost of FABS instruction. */ + COSTS_N_INSNS (1), /* cost of FCHS instruction. */ + /* Latency of fsqrt is 4-10. */ + COSTS_N_INSNS (10), /* cost of FSQRT instruction. */ + + COSTS_N_INSNS (3), /* cost of ADDSS/SD SUBSS/SD insns. */ + COSTS_N_INSNS (3), /* cost of MULSS instruction. */ + COSTS_N_INSNS (4), /* cost of MULSD instruction. */ + COSTS_N_INSNS (10), /* cost of DIVSS instruction. */ + /* 9-13 */ + COSTS_N_INSNS (13), /* cost of DIVSD instruction. */ + COSTS_N_INSNS (10), /* cost of SQRTSS instruction. */ + COSTS_N_INSNS (15), /* cost of SQRTSD instruction. */ /* Zen can execute 4 integer operations per cycle. FP operations take 3 cycles and it can execute 2 integer additions and 2 multiplications thus reassociation may make sense up to with of 6. SPEC2k6 bencharks suggests @@ -1413,6 +1557,14 @@ const struct processor_costs btver1_cost = { COSTS_N_INSNS (2), /* cost of FABS instruction. */ COSTS_N_INSNS (2), /* cost of FCHS instruction. */ COSTS_N_INSNS (35), /* cost of FSQRT instruction. */ + + COSTS_N_INSNS (3), /* cost of ADDSS/SD SUBSS/SD insns. */ + COSTS_N_INSNS (2), /* cost of MULSS instruction. */ + COSTS_N_INSNS (4), /* cost of MULSD instruction. */ + COSTS_N_INSNS (13), /* cost of DIVSS instruction. */ + COSTS_N_INSNS (17), /* cost of DIVSD instruction. */ + COSTS_N_INSNS (14), /* cost of SQRTSS instruction. */ + COSTS_N_INSNS (48), /* cost of SQRTSD instruction. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ btver1_memcpy, btver1_memset, @@ -1499,6 +1651,14 @@ const struct processor_costs btver2_cost = { COSTS_N_INSNS (2), /* cost of FABS instruction. */ COSTS_N_INSNS (2), /* cost of FCHS instruction. */ COSTS_N_INSNS (35), /* cost of FSQRT instruction. */ + + COSTS_N_INSNS (3), /* cost of ADDSS/SD SUBSS/SD insns. */ + COSTS_N_INSNS (2), /* cost of MULSS instruction. */ + COSTS_N_INSNS (4), /* cost of MULSD instruction. */ + COSTS_N_INSNS (13), /* cost of DIVSS instruction. */ + COSTS_N_INSNS (19), /* cost of DIVSD instruction. */ + COSTS_N_INSNS (16), /* cost of SQRTSS instruction. */ + COSTS_N_INSNS (21), /* cost of SQRTSD instruction. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ btver2_memcpy, btver2_memset, @@ -1576,6 +1736,14 @@ struct processor_costs pentium4_cost = { COSTS_N_INSNS (2), /* cost of FABS instruction. */ COSTS_N_INSNS (2), /* cost of FCHS instruction. */ COSTS_N_INSNS (43), /* cost of FSQRT instruction. */ + + COSTS_N_INSNS (4), /* cost of ADDSS/SD SUBSS/SD insns. */ + COSTS_N_INSNS (6), /* cost of MULSS instruction. */ + COSTS_N_INSNS (6), /* cost of MULSD instruction. */ + COSTS_N_INSNS (23), /* cost of DIVSS instruction. */ + COSTS_N_INSNS (38), /* cost of DIVSD instruction. */ + COSTS_N_INSNS (23), /* cost of SQRTSS instruction. */ + COSTS_N_INSNS (38), /* cost of SQRTSD instruction. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ pentium4_memcpy, pentium4_memset, @@ -1656,6 +1824,14 @@ struct processor_costs nocona_cost = { COSTS_N_INSNS (3), /* cost of FABS instruction. */ COSTS_N_INSNS (3), /* cost of FCHS instruction. */ COSTS_N_INSNS (44), /* cost of FSQRT instruction. */ + + COSTS_N_INSNS (5), /* cost of ADDSS/SD SUBSS/SD insns. */ + COSTS_N_INSNS (7), /* cost of MULSS instruction. */ + COSTS_N_INSNS (7), /* cost of MULSD instruction. */ + COSTS_N_INSNS (32), /* cost of DIVSS instruction. */ + COSTS_N_INSNS (40), /* cost of DIVSD instruction. */ + COSTS_N_INSNS (32), /* cost of SQRTSS instruction. */ + COSTS_N_INSNS (41), /* cost of SQRTSD instruction. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ nocona_memcpy, nocona_memset, @@ -1734,6 +1910,14 @@ struct processor_costs atom_cost = { COSTS_N_INSNS (8), /* cost of FABS instruction. */ COSTS_N_INSNS (8), /* cost of FCHS instruction. */ COSTS_N_INSNS (40), /* cost of FSQRT instruction. */ + + COSTS_N_INSNS (5), /* cost of ADDSS/SD SUBSS/SD insns. */ + COSTS_N_INSNS (4), /* cost of MULSS instruction. */ + COSTS_N_INSNS (5), /* cost of MULSD instruction. */ + COSTS_N_INSNS (31), /* cost of DIVSS instruction. */ + COSTS_N_INSNS (60), /* cost of DIVSD instruction. */ + COSTS_N_INSNS (31), /* cost of SQRTSS instruction. */ + COSTS_N_INSNS (63), /* cost of SQRTSD instruction. */ 2, 2, 2, 2, /* reassoc int, fp, vec_int, vec_fp. */ atom_memcpy, atom_memset, @@ -1812,6 +1996,14 @@ struct processor_costs slm_cost = { COSTS_N_INSNS (8), /* cost of FABS instruction. */ COSTS_N_INSNS (8), /* cost of FCHS instruction. */ COSTS_N_INSNS (40), /* cost of FSQRT instruction. */ + + COSTS_N_INSNS (3), /* cost of ADDSS/SD SUBSS/SD insns. */ + COSTS_N_INSNS (4), /* cost of MULSS instruction. */ + COSTS_N_INSNS (5), /* cost of MULSD instruction. */ + COSTS_N_INSNS (39), /* cost of DIVSS instruction. */ + COSTS_N_INSNS (69), /* cost of DIVSD instruction. */ + COSTS_N_INSNS (20), /* cost of SQRTSS instruction. */ + COSTS_N_INSNS (35), /* cost of SQRTSD instruction. */ 1, 2, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ slm_memcpy, slm_memset, @@ -1890,6 +2082,14 @@ struct processor_costs intel_cost = { COSTS_N_INSNS (8), /* cost of FABS instruction. */ COSTS_N_INSNS (8), /* cost of FCHS instruction. */ COSTS_N_INSNS (40), /* cost of FSQRT instruction. */ + + COSTS_N_INSNS (8), /* cost of ADDSS/SD SUBSS/SD insns. */ + COSTS_N_INSNS (8), /* cost of MULSS instruction. */ + COSTS_N_INSNS (8), /* cost of MULSD instruction. */ + COSTS_N_INSNS (20), /* cost of DIVSS instruction. */ + COSTS_N_INSNS (20), /* cost of DIVSD instruction. */ + COSTS_N_INSNS (40), /* cost of SQRTSS instruction. */ + COSTS_N_INSNS (40), /* cost of SQRTSD instruction. */ 1, 4, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ intel_memcpy, intel_memset, @@ -1978,6 +2178,14 @@ struct processor_costs generic_cost = { COSTS_N_INSNS (8), /* cost of FABS instruction. */ COSTS_N_INSNS (8), /* cost of FCHS instruction. */ COSTS_N_INSNS (40), /* cost of FSQRT instruction. */ + + COSTS_N_INSNS (8), /* cost of ADDSS/SD SUBSS/SD insns. */ + COSTS_N_INSNS (8), /* cost of MULSS instruction. */ + COSTS_N_INSNS (8), /* cost of MULSD instruction. */ + COSTS_N_INSNS (20), /* cost of DIVSS instruction. */ + COSTS_N_INSNS (20), /* cost of DIVSD instruction. */ + COSTS_N_INSNS (40), /* cost of SQRTSS instruction. */ + COSTS_N_INSNS (40), /* cost of SQRTSD instruction. */ 1, 2, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ generic_memcpy, generic_memset, @@ -2065,6 +2273,14 @@ struct processor_costs core_cost = { COSTS_N_INSNS (8), /* cost of FABS instruction. */ COSTS_N_INSNS (8), /* cost of FCHS instruction. */ COSTS_N_INSNS (40), /* cost of FSQRT instruction. */ + + COSTS_N_INSNS (3), /* cost of ADDSS/SD SUBSS/SD insns. */ + COSTS_N_INSNS (4), /* cost of MULSS instruction. */ + COSTS_N_INSNS (5), /* cost of MULSD instruction. */ + COSTS_N_INSNS (18), /* cost of DIVSS instruction. */ + COSTS_N_INSNS (32), /* cost of DIVSD instruction. */ + COSTS_N_INSNS (30), /* cost of SQRTSS instruction. */ + COSTS_N_INSNS (58), /* cost of SQRTSD instruction. */ 1, 4, 2, 2, /* reassoc int, fp, vec_int, vec_fp. */ core_memcpy, core_memset,