/* Costs of operations of individual x86 CPUs.
- Copyright (C) 1988-2018 Free Software Foundation, Inc.
+ Copyright (C) 1988-2019 Free Software Foundation, Inc.
This file is part of GCC.
const
struct processor_costs ix86_size_cost = {/* costs for tuning for size */
+ {
+ /* Start of register allocator costs. integer->integer move cost is 2. */
+ 2, /* cost for loading QImode using movzbl */
+ {2, 2, 2}, /* cost of loading integer registers
+ in QImode, HImode and SImode.
+ Relative to reg-reg move (2). */
+ {2, 2, 2}, /* cost of storing integer registers */
+ 2, /* cost of reg,reg fld/fst */
+ {2, 2, 2}, /* cost of loading fp registers
+ in SFmode, DFmode and XFmode */
+ {2, 2, 2}, /* cost of storing fp registers
+ in SFmode, DFmode and XFmode */
+ 3, /* cost of moving MMX register */
+ {3, 3}, /* cost of loading MMX registers
+ in SImode and DImode */
+ {3, 3}, /* cost of storing MMX registers
+ in SImode and DImode */
+ 3, 3, 3, /* cost of moving XMM,YMM,ZMM register */
+ {3, 3, 3, 3, 3}, /* cost of loading SSE registers
+ in 32,64,128,256 and 512-bit */
+ {3, 3, 3, 3, 3}, /* cost of storing SSE registers
+ in 32,64,128,256 and 512-bit */
+ 3, 3, /* SSE->integer and integer->SSE moves */
+ /* End of register allocator costs. */
+ },
+
COSTS_N_BYTES (2), /* cost of an add instruction */
COSTS_N_BYTES (3), /* cost of a lea instruction */
COSTS_N_BYTES (2), /* variable shift costs */
COSTS_N_BYTES (3), /* cost of movzx */
0, /* "large" insn */
2, /* MOVE_RATIO */
-
- /* All move costs are relative to integer->integer move times 2. */
- 2, /* cost for loading QImode using movzbl */
+ 2, /* CLEAR_RATIO */
{2, 2, 2}, /* cost of loading integer registers
in QImode, HImode and SImode.
Relative to reg-reg move (2). */
{2, 2, 2}, /* cost of storing integer registers */
- 2, /* cost of reg,reg fld/fst */
- {2, 2, 2}, /* cost of loading fp registers
- in SFmode, DFmode and XFmode */
- {2, 2, 2}, /* cost of storing fp registers
- in SFmode, DFmode and XFmode */
- 3, /* cost of moving MMX register */
- {3, 3}, /* cost of loading MMX registers
- in SImode and DImode */
- {3, 3}, /* cost of storing MMX registers
- in SImode and DImode */
- 3, 3, 3, /* cost of moving XMM,YMM,ZMM register */
- {3, 3, 3, 3, 3}, /* cost of loading SSE registers
- in 32,64,128,256 and 512-bit */
+ {3, 3, 3, 3, 3}, /* cost of loading SSE register
+ in 32bit, 64bit, 128bit, 256bit and 512bit */
+ {3, 3, 3, 3, 3}, /* cost of storing SSE register
+ in 32bit, 64bit, 128bit, 256bit and 512bit */
{3, 3, 3, 3, 3}, /* cost of unaligned SSE load
in 128bit, 256bit and 512bit */
- {3, 3, 3, 3, 3}, /* cost of storing SSE registers
- in 32,64,128,256 and 512-bit */
- {3, 3, 3, 3, 3}, /* cost of unaligned SSE store
+ {3, 3, 3, 3, 3}, /* cost of unaligned SSE store
in 128bit, 256bit and 512bit */
- 3, 3, /* SSE->integer and integer->SSE moves */
+ 3, 3, 3, /* cost of moving XMM,YMM,ZMM register */
+ 3, /* cost of moving SSE register to integer. */
5, 0, /* Gather load static, per_elt. */
5, 0, /* Gather store static, per_elt. */
0, /* size of l1 cache */
static const
struct processor_costs i386_cost = { /* 386 specific costs */
+ {
+ /* Start of register allocator costs. integer->integer move cost is 2. */
+ 4, /* cost for loading QImode using movzbl */
+ {2, 4, 2}, /* cost of loading integer registers
+ in QImode, HImode and SImode.
+ Relative to reg-reg move (2). */
+ {2, 4, 2}, /* cost of storing integer registers */
+ 2, /* cost of reg,reg fld/fst */
+ {8, 8, 8}, /* cost of loading fp registers
+ in SFmode, DFmode and XFmode */
+ {8, 8, 8}, /* cost of storing fp registers
+ in SFmode, DFmode and XFmode */
+ 2, /* cost of moving MMX register */
+ {4, 8}, /* cost of loading MMX registers
+ in SImode and DImode */
+ {4, 8}, /* cost of storing MMX registers
+ in SImode and DImode */
+ 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
+ {4, 8, 16, 32, 64}, /* cost of loading SSE registers
+ in 32,64,128,256 and 512-bit */
+ {4, 8, 16, 32, 64}, /* cost of storing SSE registers
+ in 32,64,128,256 and 512-bit */
+ 3, 3, /* SSE->integer and integer->SSE moves */
+ /* End of register allocator costs. */
+ },
+
COSTS_N_INSNS (1), /* cost of an add instruction */
COSTS_N_INSNS (1), /* cost of a lea instruction */
COSTS_N_INSNS (3), /* variable shift costs */
COSTS_N_INSNS (2), /* cost of movzx */
15, /* "large" insn */
3, /* MOVE_RATIO */
-
- /* All move costs are relative to integer->integer move times 2 and thus
- they are latency*2. */
- 4, /* cost for loading QImode using movzbl */
+ 3, /* CLEAR_RATIO */
{2, 4, 2}, /* cost of loading integer registers
in QImode, HImode and SImode.
Relative to reg-reg move (2). */
{2, 4, 2}, /* cost of storing integer registers */
- 2, /* cost of reg,reg fld/fst */
- {8, 8, 8}, /* cost of loading fp registers
- in SFmode, DFmode and XFmode */
- {8, 8, 8}, /* cost of storing fp registers
- in SFmode, DFmode and XFmode */
- 2, /* cost of moving MMX register */
- {4, 8}, /* cost of loading MMX registers
- in SImode and DImode */
- {4, 8}, /* cost of storing MMX registers
- in SImode and DImode */
- 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
- {4, 8, 16, 32, 64}, /* cost of loading SSE registers
- in 32,64,128,256 and 512-bit */
+ {4, 8, 16, 32, 64}, /* cost of loading SSE register
+ in 32bit, 64bit, 128bit, 256bit and 512bit */
+ {4, 8, 16, 32, 64}, /* cost of storing SSE register
+ in 32bit, 64bit, 128bit, 256bit and 512bit */
{4, 8, 16, 32, 64}, /* cost of unaligned loads. */
- {4, 8, 16, 32, 64}, /* cost of storing SSE registers
- in 32,64,128,256 and 512-bit */
{4, 8, 16, 32, 64}, /* cost of unaligned stores. */
- 3, 3, /* SSE->integer and integer->SSE moves */
+ 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
+ 3, /* cost of moving SSE register to integer. */
4, 4, /* Gather load static, per_elt. */
4, 4, /* Gather store static, per_elt. */
0, /* size of l1 cache */
static const
struct processor_costs i486_cost = { /* 486 specific costs */
+ {
+ /* Start of register allocator costs. integer->integer move cost is 2. */
+ 4, /* cost for loading QImode using movzbl */
+ {2, 4, 2}, /* cost of loading integer registers
+ in QImode, HImode and SImode.
+ Relative to reg-reg move (2). */
+ {2, 4, 2}, /* cost of storing integer registers */
+ 2, /* cost of reg,reg fld/fst */
+ {8, 8, 8}, /* cost of loading fp registers
+ in SFmode, DFmode and XFmode */
+ {8, 8, 8}, /* cost of storing fp registers
+ in SFmode, DFmode and XFmode */
+ 2, /* cost of moving MMX register */
+ {4, 8}, /* cost of loading MMX registers
+ in SImode and DImode */
+ {4, 8}, /* cost of storing MMX registers
+ in SImode and DImode */
+ 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
+ {4, 8, 16, 32, 64}, /* cost of loading SSE registers
+ in 32,64,128,256 and 512-bit */
+ {4, 8, 16, 32, 64}, /* cost of storing SSE registers
+ in 32,64,128,256 and 512-bit */
+ 3, 3, /* SSE->integer and integer->SSE moves */
+ /* End of register allocator costs. */
+ },
+
COSTS_N_INSNS (1), /* cost of an add instruction */
COSTS_N_INSNS (1), /* cost of a lea instruction */
COSTS_N_INSNS (3), /* variable shift costs */
COSTS_N_INSNS (2), /* cost of movzx */
15, /* "large" insn */
3, /* MOVE_RATIO */
-
- /* All move costs are relative to integer->integer move times 2 and thus
- they are latency*2. */
- 4, /* cost for loading QImode using movzbl */
+ 3, /* CLEAR_RATIO */
{2, 4, 2}, /* cost of loading integer registers
in QImode, HImode and SImode.
Relative to reg-reg move (2). */
{2, 4, 2}, /* cost of storing integer registers */
- 2, /* cost of reg,reg fld/fst */
- {8, 8, 8}, /* cost of loading fp registers
- in SFmode, DFmode and XFmode */
- {8, 8, 8}, /* cost of storing fp registers
- in SFmode, DFmode and XFmode */
- 2, /* cost of moving MMX register */
- {4, 8}, /* cost of loading MMX registers
- in SImode and DImode */
- {4, 8}, /* cost of storing MMX registers
- in SImode and DImode */
- 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
- {4, 8, 16, 32, 64}, /* cost of loading SSE registers
- in 32,64,128,256 and 512-bit */
+ {4, 8, 16, 32, 64}, /* cost of loading SSE register
+ in 32bit, 64bit, 128bit, 256bit and 512bit */
+ {4, 8, 16, 32, 64}, /* cost of storing SSE register
+ in 32bit, 64bit, 128bit, 256bit and 512bit */
{4, 8, 16, 32, 64}, /* cost of unaligned loads. */
- {4, 8, 16, 32, 64}, /* cost of storing SSE registers
- in 32,64,128,256 and 512-bit */
{4, 8, 16, 32, 64}, /* cost of unaligned stores. */
- 3, 3, /* SSE->integer and integer->SSE moves */
+ 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
+ 3, /* cost of moving SSE register to integer. */
4, 4, /* Gather load static, per_elt. */
4, 4, /* Gather store static, per_elt. */
4, /* size of l1 cache. 486 has 8kB cache
static const
struct processor_costs pentium_cost = {
+ {
+ /* Start of register allocator costs. integer->integer move cost is 2. */
+ 6, /* cost for loading QImode using movzbl */
+ {2, 4, 2}, /* cost of loading integer registers
+ in QImode, HImode and SImode.
+ Relative to reg-reg move (2). */
+ {2, 4, 2}, /* cost of storing integer registers */
+ 2, /* cost of reg,reg fld/fst */
+ {2, 2, 6}, /* cost of loading fp registers
+ in SFmode, DFmode and XFmode */
+ {4, 4, 6}, /* cost of storing fp registers
+ in SFmode, DFmode and XFmode */
+ 8, /* cost of moving MMX register */
+ {8, 8}, /* cost of loading MMX registers
+ in SImode and DImode */
+ {8, 8}, /* cost of storing MMX registers
+ in SImode and DImode */
+ 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
+ {4, 8, 16, 32, 64}, /* cost of loading SSE registers
+ in 32,64,128,256 and 512-bit */
+ {4, 8, 16, 32, 64}, /* cost of storing SSE registers
+ in 32,64,128,256 and 512-bit */
+ 3, 3, /* SSE->integer and integer->SSE moves */
+ /* End of register allocator costs. */
+ },
+
COSTS_N_INSNS (1), /* cost of an add instruction */
COSTS_N_INSNS (1), /* cost of a lea instruction */
COSTS_N_INSNS (4), /* variable shift costs */
COSTS_N_INSNS (2), /* cost of movzx */
8, /* "large" insn */
6, /* MOVE_RATIO */
-
- /* All move costs are relative to integer->integer move times 2 and thus
- they are latency*2. */
- 6, /* cost for loading QImode using movzbl */
+ 6, /* CLEAR_RATIO */
{2, 4, 2}, /* cost of loading integer registers
in QImode, HImode and SImode.
Relative to reg-reg move (2). */
{2, 4, 2}, /* cost of storing integer registers */
- 2, /* cost of reg,reg fld/fst */
- {2, 2, 6}, /* cost of loading fp registers
- in SFmode, DFmode and XFmode */
- {4, 4, 6}, /* cost of storing fp registers
- in SFmode, DFmode and XFmode */
- 8, /* cost of moving MMX register */
- {8, 8}, /* cost of loading MMX registers
- in SImode and DImode */
- {8, 8}, /* cost of storing MMX registers
- in SImode and DImode */
- 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
- {4, 8, 16, 32, 64}, /* cost of loading SSE registers
- in 32,64,128,256 and 512-bit */
+ {4, 8, 16, 32, 64}, /* cost of loading SSE register
+ in 32bit, 64bit, 128bit, 256bit and 512bit */
+ {4, 8, 16, 32, 64}, /* cost of storing SSE register
+ in 32bit, 64bit, 128bit, 256bit and 512bit */
{4, 8, 16, 32, 64}, /* cost of unaligned loads. */
- {4, 8, 16, 32, 64}, /* cost of storing SSE registers
- in 32,64,128,256 and 512-bit */
{4, 8, 16, 32, 64}, /* cost of unaligned stores. */
- 3, 3, /* SSE->integer and integer->SSE moves */
+ 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
+ 3, /* cost of moving SSE register to integer. */
4, 4, /* Gather load static, per_elt. */
4, 4, /* Gather store static, per_elt. */
8, /* size of l1 cache. */
static const
struct processor_costs lakemont_cost = {
+ {
+ /* Start of register allocator costs. integer->integer move cost is 2. */
+ 6, /* cost for loading QImode using movzbl */
+ {2, 4, 2}, /* cost of loading integer registers
+ in QImode, HImode and SImode.
+ Relative to reg-reg move (2). */
+ {2, 4, 2}, /* cost of storing integer registers */
+ 2, /* cost of reg,reg fld/fst */
+ {2, 2, 6}, /* cost of loading fp registers
+ in SFmode, DFmode and XFmode */
+ {4, 4, 6}, /* cost of storing fp registers
+ in SFmode, DFmode and XFmode */
+ 8, /* cost of moving MMX register */
+ {8, 8}, /* cost of loading MMX registers
+ in SImode and DImode */
+ {8, 8}, /* cost of storing MMX registers
+ in SImode and DImode */
+ 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
+ {4, 8, 16, 32, 64}, /* cost of loading SSE registers
+ in 32,64,128,256 and 512-bit */
+ {4, 8, 16, 32, 64}, /* cost of storing SSE registers
+ in 32,64,128,256 and 512-bit */
+ 3, 3, /* SSE->integer and integer->SSE moves */
+ /* End of register allocator costs. */
+ },
+
COSTS_N_INSNS (1), /* cost of an add instruction */
COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
COSTS_N_INSNS (1), /* variable shift costs */
COSTS_N_INSNS (2), /* cost of movzx */
8, /* "large" insn */
17, /* MOVE_RATIO */
-
- /* All move costs are relative to integer->integer move times 2 and thus
- they are latency*2. */
- 6, /* cost for loading QImode using movzbl */
+ 6, /* CLEAR_RATIO */
{2, 4, 2}, /* cost of loading integer registers
in QImode, HImode and SImode.
Relative to reg-reg move (2). */
{2, 4, 2}, /* cost of storing integer registers */
- 2, /* cost of reg,reg fld/fst */
- {2, 2, 6}, /* cost of loading fp registers
- in SFmode, DFmode and XFmode */
- {4, 4, 6}, /* cost of storing fp registers
- in SFmode, DFmode and XFmode */
- 8, /* cost of moving MMX register */
- {8, 8}, /* cost of loading MMX registers
- in SImode and DImode */
- {8, 8}, /* cost of storing MMX registers
- in SImode and DImode */
- 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
- {4, 8, 16, 32, 64}, /* cost of loading SSE registers
- in 32,64,128,256 and 512-bit */
+ {4, 8, 16, 32, 64}, /* cost of loading SSE register
+ in 32bit, 64bit, 128bit, 256bit and 512bit */
+ {4, 8, 16, 32, 64}, /* cost of storing SSE register
+ in 32bit, 64bit, 128bit, 256bit and 512bit */
{4, 8, 16, 32, 64}, /* cost of unaligned loads. */
- {4, 8, 16, 32, 64}, /* cost of storing SSE registers
- in 32,64,128,256 and 512-bit */
{4, 8, 16, 32, 64}, /* cost of unaligned stores. */
- 3, 3, /* SSE->integer and integer->SSE moves */
+ 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
+ 3, /* cost of moving SSE register to integer. */
4, 4, /* Gather load static, per_elt. */
4, 4, /* Gather store static, per_elt. */
8, /* size of l1 cache. */
DUMMY_STRINGOP_ALGS};
static const
struct processor_costs pentiumpro_cost = {
+ {
+ /* Start of register allocator costs. integer->integer move cost is 2. */
+ 2, /* cost for loading QImode using movzbl */
+ {4, 4, 4}, /* cost of loading integer registers
+ in QImode, HImode and SImode.
+ Relative to reg-reg move (2). */
+ {2, 2, 2}, /* cost of storing integer registers */
+ 2, /* cost of reg,reg fld/fst */
+ {2, 2, 6}, /* cost of loading fp registers
+ in SFmode, DFmode and XFmode */
+ {4, 4, 6}, /* cost of storing fp registers
+ in SFmode, DFmode and XFmode */
+ 2, /* cost of moving MMX register */
+ {2, 2}, /* cost of loading MMX registers
+ in SImode and DImode */
+ {2, 2}, /* cost of storing MMX registers
+ in SImode and DImode */
+ 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
+ {4, 8, 16, 32, 64}, /* cost of loading SSE registers
+ in 32,64,128,256 and 512-bit */
+ {4, 8, 16, 32, 64}, /* cost of storing SSE registers
+ in 32,64,128,256 and 512-bit */
+ 3, 3, /* SSE->integer and integer->SSE moves */
+ /* End of register allocator costs. */
+ },
+
COSTS_N_INSNS (1), /* cost of an add instruction */
COSTS_N_INSNS (1), /* cost of a lea instruction */
COSTS_N_INSNS (1), /* variable shift costs */
COSTS_N_INSNS (1), /* cost of movzx */
8, /* "large" insn */
6, /* MOVE_RATIO */
-
- /* All move costs are relative to integer->integer move times 2 and thus
- they are latency*2. */
- 2, /* cost for loading QImode using movzbl */
+ 6, /* CLEAR_RATIO */
{4, 4, 4}, /* cost of loading integer registers
in QImode, HImode and SImode.
Relative to reg-reg move (2). */
{2, 2, 2}, /* cost of storing integer registers */
- 2, /* cost of reg,reg fld/fst */
- {2, 2, 6}, /* cost of loading fp registers
- in SFmode, DFmode and XFmode */
- {4, 4, 6}, /* cost of storing fp registers
- in SFmode, DFmode and XFmode */
- 2, /* cost of moving MMX register */
- {2, 2}, /* cost of loading MMX registers
- in SImode and DImode */
- {2, 2}, /* cost of storing MMX registers
- in SImode and DImode */
- 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
- {4, 8, 16, 32, 64}, /* cost of loading SSE registers
- in 32,64,128,256 and 512-bit */
+ {4, 8, 16, 32, 64}, /* cost of loading SSE register
+ in 32bit, 64bit, 128bit, 256bit and 512bit */
+ {4, 8, 16, 32, 64}, /* cost of storing SSE register
+ in 32bit, 64bit, 128bit, 256bit and 512bit */
{4, 8, 16, 32, 64}, /* cost of unaligned loads. */
- {4, 8, 16, 32, 64}, /* cost of storing SSE registers
- in 32,64,128,256 and 512-bit */
{4, 8, 16, 32, 64}, /* cost of unaligned stores. */
- 3, 3, /* SSE->integer and integer->SSE moves */
+ 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
+ 3, /* cost of moving SSE register to integer. */
4, 4, /* Gather load static, per_elt. */
4, 4, /* Gather store static, per_elt. */
8, /* size of l1 cache. */
DUMMY_STRINGOP_ALGS};
static const
struct processor_costs geode_cost = {
- COSTS_N_INSNS (1), /* cost of an add instruction */
- COSTS_N_INSNS (1), /* cost of a lea instruction */
- COSTS_N_INSNS (2), /* variable shift costs */
- COSTS_N_INSNS (1), /* constant shift costs */
- {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
- COSTS_N_INSNS (4), /* HI */
- COSTS_N_INSNS (7), /* SI */
- COSTS_N_INSNS (7), /* DI */
- COSTS_N_INSNS (7)}, /* other */
- 0, /* cost of multiply per each bit set */
- {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
- COSTS_N_INSNS (23), /* HI */
- COSTS_N_INSNS (39), /* SI */
- COSTS_N_INSNS (39), /* DI */
- COSTS_N_INSNS (39)}, /* other */
- COSTS_N_INSNS (1), /* cost of movsx */
- COSTS_N_INSNS (1), /* cost of movzx */
- 8, /* "large" insn */
- 4, /* MOVE_RATIO */
-
- /* All move costs are relative to integer->integer move times 2 and thus
- they are latency*2. */
+ {
+ /* Start of register allocator costs. integer->integer move cost is 2. */
2, /* cost for loading QImode using movzbl */
{2, 2, 2}, /* cost of loading integer registers
in QImode, HImode and SImode.
in SFmode, DFmode and XFmode */
{4, 6, 6}, /* cost of storing fp registers
in SFmode, DFmode and XFmode */
-
2, /* cost of moving MMX register */
{2, 2}, /* cost of loading MMX registers
in SImode and DImode */
2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
{2, 2, 8, 16, 32}, /* cost of loading SSE registers
in 32,64,128,256 and 512-bit */
- {2, 2, 8, 16, 32}, /* cost of unaligned loads. */
{2, 2, 8, 16, 32}, /* cost of storing SSE registers
in 32,64,128,256 and 512-bit */
- {2, 2, 8, 16, 32}, /* cost of unaligned stores. */
6, 6, /* SSE->integer and integer->SSE moves */
+ /* End of register allocator costs. */
+ },
+
+ COSTS_N_INSNS (1), /* cost of an add instruction */
+ COSTS_N_INSNS (1), /* cost of a lea instruction */
+ COSTS_N_INSNS (2), /* variable shift costs */
+ COSTS_N_INSNS (1), /* constant shift costs */
+ {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
+ COSTS_N_INSNS (4), /* HI */
+ COSTS_N_INSNS (7), /* SI */
+ COSTS_N_INSNS (7), /* DI */
+ COSTS_N_INSNS (7)}, /* other */
+ 0, /* cost of multiply per each bit set */
+ {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
+ COSTS_N_INSNS (23), /* HI */
+ COSTS_N_INSNS (39), /* SI */
+ COSTS_N_INSNS (39), /* DI */
+ COSTS_N_INSNS (39)}, /* other */
+ COSTS_N_INSNS (1), /* cost of movsx */
+ COSTS_N_INSNS (1), /* cost of movzx */
+ 8, /* "large" insn */
+ 4, /* MOVE_RATIO */
+ 4, /* CLEAR_RATIO */
+ {2, 2, 2}, /* cost of loading integer registers
+ in QImode, HImode and SImode.
+ Relative to reg-reg move (2). */
+ {2, 2, 2}, /* cost of storing integer registers */
+ {2, 2, 8, 16, 32}, /* cost of loading SSE register
+ in 32bit, 64bit, 128bit, 256bit and 512bit */
+ {2, 2, 8, 16, 32}, /* cost of storing SSE register
+ in 32bit, 64bit, 128bit, 256bit and 512bit */
+ {2, 2, 8, 16, 32}, /* cost of unaligned loads. */
+ {2, 2, 8, 16, 32}, /* cost of unaligned stores. */
+ 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
+ 6, /* cost of moving SSE register to integer. */
2, 2, /* Gather load static, per_elt. */
2, 2, /* Gather store static, per_elt. */
64, /* size of l1 cache. */
DUMMY_STRINGOP_ALGS};
static const
struct processor_costs k6_cost = {
+ {
+ /* Start of register allocator costs. integer->integer move cost is 2. */
+ 3, /* cost for loading QImode using movzbl */
+ {4, 5, 4}, /* cost of loading integer registers
+ in QImode, HImode and SImode.
+ Relative to reg-reg move (2). */
+ {2, 3, 2}, /* cost of storing integer registers */
+ 4, /* cost of reg,reg fld/fst */
+ {6, 6, 6}, /* cost of loading fp registers
+ in SFmode, DFmode and XFmode */
+ {4, 4, 4}, /* cost of storing fp registers
+ in SFmode, DFmode and XFmode */
+ 2, /* cost of moving MMX register */
+ {2, 2}, /* cost of loading MMX registers
+ in SImode and DImode */
+ {2, 2}, /* cost of storing MMX registers
+ in SImode and DImode */
+ 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
+ {2, 2, 8, 16, 32}, /* cost of loading SSE registers
+ in 32,64,128,256 and 512-bit */
+ {2, 2, 8, 16, 32}, /* cost of storing SSE registers
+ in 32,64,128,256 and 512-bit */
+ 6, 6, /* SSE->integer and integer->SSE moves */
+ /* End of register allocator costs. */
+ },
+
COSTS_N_INSNS (1), /* cost of an add instruction */
COSTS_N_INSNS (2), /* cost of a lea instruction */
COSTS_N_INSNS (1), /* variable shift costs */
COSTS_N_INSNS (2), /* cost of movzx */
8, /* "large" insn */
4, /* MOVE_RATIO */
-
- /* All move costs are relative to integer->integer move times 2 and thus
- they are latency*2. */
- 3, /* cost for loading QImode using movzbl */
+ 4, /* CLEAR_RATIO */
{4, 5, 4}, /* cost of loading integer registers
in QImode, HImode and SImode.
Relative to reg-reg move (2). */
{2, 3, 2}, /* cost of storing integer registers */
- 4, /* cost of reg,reg fld/fst */
- {6, 6, 6}, /* cost of loading fp registers
- in SFmode, DFmode and XFmode */
- {4, 4, 4}, /* cost of storing fp registers
- in SFmode, DFmode and XFmode */
- 2, /* cost of moving MMX register */
- {2, 2}, /* cost of loading MMX registers
- in SImode and DImode */
- {2, 2}, /* cost of storing MMX registers
- in SImode and DImode */
- 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
- {2, 2, 8, 16, 32}, /* cost of loading SSE registers
- in 32,64,128,256 and 512-bit */
+ {2, 2, 8, 16, 32}, /* cost of loading SSE register
+ in 32bit, 64bit, 128bit, 256bit and 512bit */
+ {2, 2, 8, 16, 32}, /* cost of storing SSE register
+ in 32bit, 64bit, 128bit, 256bit and 512bit */
{2, 2, 8, 16, 32}, /* cost of unaligned loads. */
- {2, 2, 8, 16, 32}, /* cost of storing SSE registers
- in 32,64,128,256 and 512-bit */
{2, 2, 8, 16, 32}, /* cost of unaligned stores. */
- 6, 6, /* SSE->integer and integer->SSE moves */
+ 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
+ 6, /* cost of moving SSE register to integer. */
2, 2, /* Gather load static, per_elt. */
2, 2, /* Gather store static, per_elt. */
32, /* size of l1 cache. */
DUMMY_STRINGOP_ALGS};
static const
struct processor_costs athlon_cost = {
+ {
+ /* Start of register allocator costs. integer->integer move cost is 2. */
+ 4, /* cost for loading QImode using movzbl */
+ {3, 4, 3}, /* cost of loading integer registers
+ in QImode, HImode and SImode.
+ Relative to reg-reg move (2). */
+ {3, 4, 3}, /* cost of storing integer registers */
+ 4, /* cost of reg,reg fld/fst */
+ {4, 4, 12}, /* cost of loading fp registers
+ in SFmode, DFmode and XFmode */
+ {6, 6, 8}, /* cost of storing fp registers
+ in SFmode, DFmode and XFmode */
+ 2, /* cost of moving MMX register */
+ {4, 4}, /* cost of loading MMX registers
+ in SImode and DImode */
+ {4, 4}, /* cost of storing MMX registers
+ in SImode and DImode */
+ 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
+ {4, 4, 12, 12, 24}, /* cost of loading SSE registers
+ in 32,64,128,256 and 512-bit */
+ {4, 4, 10, 10, 20}, /* cost of storing SSE registers
+ in 32,64,128,256 and 512-bit */
+ 5, 5, /* SSE->integer and integer->SSE moves */
+ /* End of register allocator costs. */
+ },
+
COSTS_N_INSNS (1), /* cost of an add instruction */
COSTS_N_INSNS (2), /* cost of a lea instruction */
COSTS_N_INSNS (1), /* variable shift costs */
COSTS_N_INSNS (1), /* cost of movzx */
8, /* "large" insn */
9, /* MOVE_RATIO */
-
- /* All move costs are relative to integer->integer move times 2 and thus
- they are latency*2. */
- 4, /* cost for loading QImode using movzbl */
+ 6, /* CLEAR_RATIO */
{3, 4, 3}, /* cost of loading integer registers
in QImode, HImode and SImode.
Relative to reg-reg move (2). */
{3, 4, 3}, /* cost of storing integer registers */
- 4, /* cost of reg,reg fld/fst */
- {4, 4, 12}, /* cost of loading fp registers
- in SFmode, DFmode and XFmode */
- {6, 6, 8}, /* cost of storing fp registers
- in SFmode, DFmode and XFmode */
- 2, /* cost of moving MMX register */
- {4, 4}, /* cost of loading MMX registers
- in SImode and DImode */
- {4, 4}, /* cost of storing MMX registers
- in SImode and DImode */
- 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
- {4, 4, 12, 12, 24}, /* cost of loading SSE registers
- in 32,64,128,256 and 512-bit */
+ {4, 4, 12, 12, 24}, /* cost of loading SSE register
+ in 32bit, 64bit, 128bit, 256bit and 512bit */
+ {4, 4, 10, 10, 20}, /* cost of storing SSE register
+ in 32bit, 64bit, 128bit, 256bit and 512bit */
{4, 4, 12, 12, 24}, /* cost of unaligned loads. */
- {4, 4, 10, 10, 20}, /* cost of storing SSE registers
- in 32,64,128,256 and 512-bit */
{4, 4, 10, 10, 20}, /* cost of unaligned stores. */
- 5, 5, /* SSE->integer and integer->SSE moves */
+ 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
+ 5, /* cost of moving SSE register to integer. */
4, 4, /* Gather load static, per_elt. */
4, 4, /* Gather store static, per_elt. */
64, /* size of l1 cache. */
{8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
static const
struct processor_costs k8_cost = {
+ {
+ /* Start of register allocator costs. integer->integer move cost is 2. */
+ 4, /* cost for loading QImode using movzbl */
+ {3, 4, 3}, /* cost of loading integer registers
+ in QImode, HImode and SImode.
+ Relative to reg-reg move (2). */
+ {3, 4, 3}, /* cost of storing integer registers */
+ 4, /* cost of reg,reg fld/fst */
+ {4, 4, 12}, /* cost of loading fp registers
+ in SFmode, DFmode and XFmode */
+ {6, 6, 8}, /* cost of storing fp registers
+ in SFmode, DFmode and XFmode */
+ 2, /* cost of moving MMX register */
+ {3, 3}, /* cost of loading MMX registers
+ in SImode and DImode */
+ {4, 4}, /* cost of storing MMX registers
+ in SImode and DImode */
+ 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
+ {4, 3, 12, 12, 24}, /* cost of loading SSE registers
+ in 32,64,128,256 and 512-bit */
+ {4, 4, 10, 10, 20}, /* cost of storing SSE registers
+ in 32,64,128,256 and 512-bit */
+ 5, 5, /* SSE->integer and integer->SSE moves */
+ /* End of register allocator costs. */
+ },
+
COSTS_N_INSNS (1), /* cost of an add instruction */
COSTS_N_INSNS (2), /* cost of a lea instruction */
COSTS_N_INSNS (1), /* variable shift costs */
COSTS_N_INSNS (1), /* cost of movzx */
8, /* "large" insn */
9, /* MOVE_RATIO */
-
- /* All move costs are relative to integer->integer move times 2 and thus
- they are latency*2. */
- 4, /* cost for loading QImode using movzbl */
+ 6, /* CLEAR_RATIO */
{3, 4, 3}, /* cost of loading integer registers
in QImode, HImode and SImode.
Relative to reg-reg move (2). */
{3, 4, 3}, /* cost of storing integer registers */
- 4, /* cost of reg,reg fld/fst */
- {4, 4, 12}, /* cost of loading fp registers
- in SFmode, DFmode and XFmode */
- {6, 6, 8}, /* cost of storing fp registers
- in SFmode, DFmode and XFmode */
- 2, /* cost of moving MMX register */
- {3, 3}, /* cost of loading MMX registers
- in SImode and DImode */
- {4, 4}, /* cost of storing MMX registers
- in SImode and DImode */
- 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
- {4, 3, 12, 12, 24}, /* cost of loading SSE registers
- in 32,64,128,256 and 512-bit */
+ {4, 3, 12, 12, 24}, /* cost of loading SSE register
+ in 32bit, 64bit, 128bit, 256bit and 512bit */
+ {4, 4, 10, 10, 20}, /* cost of storing SSE register
+ in 32bit, 64bit, 128bit, 256bit and 512bit */
{4, 3, 12, 12, 24}, /* cost of unaligned loads. */
- {4, 4, 10, 10, 20}, /* cost of storing SSE registers
- in 32,64,128,256 and 512-bit */
{4, 4, 10, 10, 20}, /* cost of unaligned stores. */
- 5, 5, /* SSE->integer and integer->SSE moves */
+ 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
+ 5, /* cost of moving SSE register to integer. */
4, 4, /* Gather load static, per_elt. */
4, 4, /* Gather store static, per_elt. */
64, /* size of l1 cache. */
{libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
{-1, libcall, false}}}};
struct processor_costs amdfam10_cost = {
- COSTS_N_INSNS (1), /* cost of an add instruction */
- COSTS_N_INSNS (2), /* cost of a lea instruction */
- COSTS_N_INSNS (1), /* variable shift costs */
- COSTS_N_INSNS (1), /* constant shift costs */
- {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
- COSTS_N_INSNS (4), /* HI */
- COSTS_N_INSNS (3), /* SI */
- COSTS_N_INSNS (4), /* DI */
- COSTS_N_INSNS (5)}, /* other */
- 0, /* cost of multiply per each bit set */
- {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
- COSTS_N_INSNS (35), /* HI */
- COSTS_N_INSNS (51), /* SI */
- COSTS_N_INSNS (83), /* DI */
- COSTS_N_INSNS (83)}, /* other */
- COSTS_N_INSNS (1), /* cost of movsx */
- COSTS_N_INSNS (1), /* cost of movzx */
- 8, /* "large" insn */
- 9, /* MOVE_RATIO */
-
- /* All move costs are relative to integer->integer move times 2 and thus
- they are latency*2. */
+ {
+ /* Start of register allocator costs. integer->integer move cost is 2. */
4, /* cost for loading QImode using movzbl */
{3, 4, 3}, /* cost of loading integer registers
in QImode, HImode and SImode.
2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
{4, 4, 3, 6, 12}, /* cost of loading SSE registers
in 32,64,128,256 and 512-bit */
- {4, 4, 3, 7, 12}, /* cost of unaligned loads. */
{4, 4, 5, 10, 20}, /* cost of storing SSE registers
in 32,64,128,256 and 512-bit */
- {4, 4, 5, 10, 20}, /* cost of unaligned stores. */
3, 3, /* SSE->integer and integer->SSE moves */
+
/* On K8:
MOVD reg64, xmmreg Double FSTORE 4
MOVD reg32, xmmreg Double FSTORE 4
1/1 1/1
MOVD reg32, xmmreg Double FADD 3
1/1 1/1 */
+ /* End of register allocator costs. */
+ },
+
+ COSTS_N_INSNS (1), /* cost of an add instruction */
+ COSTS_N_INSNS (2), /* cost of a lea instruction */
+ COSTS_N_INSNS (1), /* variable shift costs */
+ COSTS_N_INSNS (1), /* constant shift costs */
+ {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
+ COSTS_N_INSNS (4), /* HI */
+ COSTS_N_INSNS (3), /* SI */
+ COSTS_N_INSNS (4), /* DI */
+ COSTS_N_INSNS (5)}, /* other */
+ 0, /* cost of multiply per each bit set */
+ {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
+ COSTS_N_INSNS (35), /* HI */
+ COSTS_N_INSNS (51), /* SI */
+ COSTS_N_INSNS (83), /* DI */
+ COSTS_N_INSNS (83)}, /* other */
+ COSTS_N_INSNS (1), /* cost of movsx */
+ COSTS_N_INSNS (1), /* cost of movzx */
+ 8, /* "large" insn */
+ 9, /* MOVE_RATIO */
+ 6, /* CLEAR_RATIO */
+ {3, 4, 3}, /* cost of loading integer registers
+ in QImode, HImode and SImode.
+ Relative to reg-reg move (2). */
+ {3, 4, 3}, /* cost of storing integer registers */
+ {4, 4, 3, 6, 12}, /* cost of loading SSE register
+ in 32bit, 64bit, 128bit, 256bit and 512bit */
+ {4, 4, 5, 10, 20}, /* cost of storing SSE register
+ in 32bit, 64bit, 128bit, 256bit and 512bit */
+ {4, 4, 3, 7, 12}, /* cost of unaligned loads. */
+ {4, 4, 5, 10, 20}, /* cost of unaligned stores. */
+ 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
+ 3, /* cost of moving SSE register to integer. */
4, 4, /* Gather load static, per_elt. */
4, 4, /* Gather store static, per_elt. */
64, /* size of l1 cache. */
{-1, libcall, false}}}};
const struct processor_costs bdver_cost = {
+ {
+ /* Start of register allocator costs. integer->integer move cost is 2. */
+ 8, /* cost for loading QImode using movzbl */
+ {8, 8, 8}, /* cost of loading integer registers
+ in QImode, HImode and SImode.
+ Relative to reg-reg move (2). */
+ {8, 8, 8}, /* cost of storing integer registers */
+ 4, /* cost of reg,reg fld/fst */
+ {12, 12, 28}, /* cost of loading fp registers
+ in SFmode, DFmode and XFmode */
+ {10, 10, 18}, /* cost of storing fp registers
+ in SFmode, DFmode and XFmode */
+ 4, /* cost of moving MMX register */
+ {12, 12}, /* cost of loading MMX registers
+ in SImode and DImode */
+ {10, 10}, /* cost of storing MMX registers
+ in SImode and DImode */
+ 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
+ {12, 12, 10, 40, 60}, /* cost of loading SSE registers
+ in 32,64,128,256 and 512-bit */
+ {10, 10, 10, 40, 60}, /* cost of storing SSE registers
+ in 32,64,128,256 and 512-bit */
+ 16, 20, /* SSE->integer and integer->SSE moves */
+ /* End of register allocator costs. */
+ },
+
COSTS_N_INSNS (1), /* cost of an add instruction */
COSTS_N_INSNS (1), /* cost of a lea instruction */
COSTS_N_INSNS (1), /* variable shift costs */
COSTS_N_INSNS (1), /* cost of movzx */
8, /* "large" insn */
9, /* MOVE_RATIO */
-
- /* All move costs are relative to integer->integer move times 2 and thus
- they are latency*2. */
- 8, /* cost for loading QImode using movzbl */
+ 6, /* CLEAR_RATIO */
{8, 8, 8}, /* cost of loading integer registers
in QImode, HImode and SImode.
Relative to reg-reg move (2). */
{8, 8, 8}, /* cost of storing integer registers */
- 4, /* cost of reg,reg fld/fst */
- {12, 12, 28}, /* cost of loading fp registers
- in SFmode, DFmode and XFmode */
- {10, 10, 18}, /* cost of storing fp registers
- in SFmode, DFmode and XFmode */
- 4, /* cost of moving MMX register */
- {12, 12}, /* cost of loading MMX registers
- in SImode and DImode */
- {10, 10}, /* cost of storing MMX registers
- in SImode and DImode */
- 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
- {12, 12, 10, 40, 60}, /* cost of loading SSE registers
- in 32,64,128,256 and 512-bit */
+ {12, 12, 10, 40, 60}, /* cost of loading SSE register
+ in 32bit, 64bit, 128bit, 256bit and 512bit */
+ {10, 10, 10, 40, 60}, /* cost of storing SSE register
+ in 32bit, 64bit, 128bit, 256bit and 512bit */
{12, 12, 10, 40, 60}, /* cost of unaligned loads. */
- {10, 10, 10, 40, 60}, /* cost of storing SSE registers
- in 32,64,128,256 and 512-bit */
{10, 10, 10, 40, 60}, /* cost of unaligned stores. */
- 16, 20, /* SSE->integer and integer->SSE moves */
+ 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
+ 16, /* cost of moving SSE register to integer. */
12, 12, /* Gather load static, per_elt. */
10, 10, /* Gather store static, per_elt. */
16, /* size of l1 cache. */
{libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
{-1, libcall, false}}}};
struct processor_costs znver1_cost = {
- COSTS_N_INSNS (1), /* cost of an add instruction. */
- COSTS_N_INSNS (1), /* cost of a lea instruction. */
+ {
+ /* Start of register allocator costs. integer->integer move cost is 2. */
+
+ /* reg-reg moves are done by renaming and thus they are even cheaper than
+ 1 cycle. Becuase reg-reg move cost is 2 and the following tables correspond
+ to doubles of latencies, we do not model this correctly. It does not
+ seem to make practical difference to bump prices up even more. */
+ 6, /* cost for loading QImode using
+ movzbl. */
+ {6, 6, 6}, /* cost of loading integer registers
+ in QImode, HImode and SImode.
+ Relative to reg-reg move (2). */
+ {8, 8, 8}, /* cost of storing integer
+ registers. */
+ 2, /* cost of reg,reg fld/fst. */
+ {6, 6, 16}, /* cost of loading fp registers
+ in SFmode, DFmode and XFmode. */
+ {8, 8, 16}, /* cost of storing fp registers
+ in SFmode, DFmode and XFmode. */
+ 2, /* cost of moving MMX register. */
+ {6, 6}, /* cost of loading MMX registers
+ in SImode and DImode. */
+ {8, 8}, /* cost of storing MMX registers
+ in SImode and DImode. */
+ 2, 3, 6, /* cost of moving XMM,YMM,ZMM register. */
+ {6, 6, 6, 12, 24}, /* cost of loading SSE registers
+ in 32,64,128,256 and 512-bit. */
+ {8, 8, 8, 16, 32}, /* cost of storing SSE registers
+ in 32,64,128,256 and 512-bit. */
+ 6, 6, /* SSE->integer and integer->SSE moves. */
+ /* End of register allocator costs. */
+ },
+
+ COSTS_N_INSNS (1), /* cost of an add instruction. */
+ COSTS_N_INSNS (1), /* cost of a lea instruction. */
COSTS_N_INSNS (1), /* variable shift costs. */
COSTS_N_INSNS (1), /* constant shift costs. */
{COSTS_N_INSNS (3), /* cost of starting multiply for QI. */
COSTS_N_INSNS (1), /* cost of movzx. */
8, /* "large" insn. */
9, /* MOVE_RATIO. */
+ 6, /* CLEAR_RATIO */
+ {6, 6, 6}, /* cost of loading integer registers
+ in QImode, HImode and SImode.
+ Relative to reg-reg move (2). */
+ {8, 8, 8}, /* cost of storing integer
+ registers. */
+ {6, 6, 6, 12, 24}, /* cost of loading SSE register
+ in 32bit, 64bit, 128bit, 256bit and 512bit */
+ {8, 8, 8, 16, 32}, /* cost of storing SSE register
+ in 32bit, 64bit, 128bit, 256bit and 512bit */
+ {6, 6, 6, 12, 24}, /* cost of unaligned loads. */
+ {8, 8, 8, 16, 32}, /* cost of unaligned stores. */
+ 2, 3, 6, /* cost of moving XMM,YMM,ZMM register. */
+ 6, /* cost of moving SSE register to integer. */
+ /* VGATHERDPD is 23 uops and throughput is 9, VGATHERDPD is 35 uops,
+ throughput 12. Approx 9 uops do not depend on vector size and every load
+ is 7 uops. */
+ 18, 8, /* Gather load static, per_elt. */
+ 18, 10, /* Gather store static, per_elt. */
+ 32, /* size of l1 cache. */
+ 512, /* size of l2 cache. */
+ 64, /* size of prefetch block. */
+ /* New AMD processors never drop prefetches; if they cannot be performed
+ immediately, they are queued. We set number of simultaneous prefetches
+ to a large constant to reflect this (it probably is not a good idea not
+ to limit number of prefetches at all, as their execution also takes some
+ time). */
+ 100, /* number of parallel prefetches. */
+ 3, /* Branch cost. */
+ COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
+ COSTS_N_INSNS (5), /* cost of FMUL instruction. */
+ /* Latency of fdiv is 8-15. */
+ COSTS_N_INSNS (15), /* cost of FDIV instruction. */
+ COSTS_N_INSNS (1), /* cost of FABS instruction. */
+ COSTS_N_INSNS (1), /* cost of FCHS instruction. */
+ /* Latency of fsqrt is 4-10. */
+ COSTS_N_INSNS (10), /* cost of FSQRT instruction. */
- /* All move costs are relative to integer->integer move times 2 and thus
- they are latency*2. */
+ COSTS_N_INSNS (1), /* cost of cheap SSE instruction. */
+ COSTS_N_INSNS (3), /* cost of ADDSS/SD SUBSS/SD insns. */
+ COSTS_N_INSNS (3), /* cost of MULSS instruction. */
+ COSTS_N_INSNS (4), /* cost of MULSD instruction. */
+ COSTS_N_INSNS (5), /* cost of FMA SS instruction. */
+ COSTS_N_INSNS (5), /* cost of FMA SD instruction. */
+ COSTS_N_INSNS (10), /* cost of DIVSS instruction. */
+ /* 9-13 */
+ COSTS_N_INSNS (13), /* cost of DIVSD instruction. */
+ COSTS_N_INSNS (10), /* cost of SQRTSS instruction. */
+ COSTS_N_INSNS (15), /* cost of SQRTSD instruction. */
+ /* Zen can execute 4 integer operations per cycle. FP operations take 3 cycles
+ and it can execute 2 integer additions and 2 multiplications thus
+ reassociation may make sense up to with of 6. SPEC2k6 bencharks suggests
+ that 4 works better than 6 probably due to register pressure.
+
+ Integer vector operations are taken by FP unit and execute 3 vector
+ plus/minus operations per cycle but only one multiply. This is adjusted
+ in ix86_reassociation_width. */
+ 4, 4, 3, 6, /* reassoc int, fp, vec_int, vec_fp. */
+ znver1_memcpy,
+ znver1_memset,
+ COSTS_N_INSNS (4), /* cond_taken_branch_cost. */
+ COSTS_N_INSNS (2), /* cond_not_taken_branch_cost. */
+ "16", /* Loop alignment. */
+ "16", /* Jump alignment. */
+ "0:0:8", /* Label alignment. */
+ "16", /* Func alignment. */
+};
+
+/* ZNVER2 has optimized REP instruction for medium sized blocks, but for
+ very small blocks it is better to use loop. For large blocks, libcall
+ can do nontemporary accesses and beat inline considerably. */
+static stringop_algs znver2_memcpy[2] = {
+ {libcall, {{6, loop, false}, {14, unrolled_loop, false},
+ {-1, rep_prefix_4_byte, false}}},
+ {libcall, {{16, loop, false}, {64, rep_prefix_4_byte, false},
+ {-1, libcall, false}}}};
+static stringop_algs znver2_memset[2] = {
+ {libcall, {{8, loop, false}, {24, unrolled_loop, false},
+ {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
+ {libcall, {{24, rep_prefix_4_byte, false}, {128, rep_prefix_8_byte, false},
+ {-1, libcall, false}}}};
+
+struct processor_costs znver2_cost = {
+ {
+ /* Start of register allocator costs. integer->integer move cost is 2. */
/* reg-reg moves are done by renaming and thus they are even cheaper than
- 1 cycle. Becuase reg-reg move cost is 2 and the following tables correspond
+ 1 cycle. Because reg-reg move cost is 2 and following tables correspond
to doubles of latencies, we do not model this correctly. It does not
seem to make practical difference to bump prices up even more. */
6, /* cost for loading QImode using
registers. */
2, /* cost of reg,reg fld/fst. */
{6, 6, 16}, /* cost of loading fp registers
- in SFmode, DFmode and XFmode. */
+ in SFmode, DFmode and XFmode. */
{8, 8, 16}, /* cost of storing fp registers
- in SFmode, DFmode and XFmode. */
+ in SFmode, DFmode and XFmode. */
2, /* cost of moving MMX register. */
{6, 6}, /* cost of loading MMX registers
in SImode and DImode. */
{8, 8}, /* cost of storing MMX registers
in SImode and DImode. */
- 2, 3, 6, /* cost of moving XMM,YMM,ZMM register. */
- {6, 6, 6, 12, 24}, /* cost of loading SSE registers
+ 2, 2, 3, /* cost of moving XMM,YMM,ZMM
+ register. */
+ {6, 6, 6, 6, 12}, /* cost of loading SSE registers
in 32,64,128,256 and 512-bit. */
- {6, 6, 6, 12, 24}, /* cost of unaligned loads. */
- {8, 8, 8, 16, 32}, /* cost of storing SSE registers
+ {8, 8, 8, 8, 16}, /* cost of storing SSE registers
in 32,64,128,256 and 512-bit. */
- {8, 8, 8, 16, 32}, /* cost of unaligned stores. */
- 6, 6, /* SSE->integer and integer->SSE moves. */
+ 6, 6, /* SSE->integer and integer->SSE
+ moves. */
+ /* End of register allocator costs. */
+ },
+
+ COSTS_N_INSNS (1), /* cost of an add instruction. */
+ COSTS_N_INSNS (1), /* cost of a lea instruction. */
+ COSTS_N_INSNS (1), /* variable shift costs. */
+ COSTS_N_INSNS (1), /* constant shift costs. */
+ {COSTS_N_INSNS (3), /* cost of starting multiply for QI. */
+ COSTS_N_INSNS (3), /* HI. */
+ COSTS_N_INSNS (3), /* SI. */
+ COSTS_N_INSNS (3), /* DI. */
+ COSTS_N_INSNS (3)}, /* other. */
+ 0, /* cost of multiply per each bit
+ set. */
+ /* Depending on parameters, idiv can get faster on ryzen. This is upper
+ bound. */
+ {COSTS_N_INSNS (16), /* cost of a divide/mod for QI. */
+ COSTS_N_INSNS (22), /* HI. */
+ COSTS_N_INSNS (30), /* SI. */
+ COSTS_N_INSNS (45), /* DI. */
+ COSTS_N_INSNS (45)}, /* other. */
+ COSTS_N_INSNS (1), /* cost of movsx. */
+ COSTS_N_INSNS (1), /* cost of movzx. */
+ 8, /* "large" insn. */
+ 9, /* MOVE_RATIO. */
+ 6, /* CLEAR_RATIO */
+ {6, 6, 6}, /* cost of loading integer registers
+ in QImode, HImode and SImode.
+ Relative to reg-reg move (2). */
+ {8, 8, 8}, /* cost of storing integer
+ registers. */
+ {6, 6, 6, 6, 12}, /* cost of loading SSE registers
+ in 32bit, 64bit, 128bit, 256bit and 512bit */
+ {8, 8, 8, 8, 16}, /* cost of storing SSE register
+ in 32bit, 64bit, 128bit, 256bit and 512bit */
+ {6, 6, 6, 6, 12}, /* cost of unaligned loads. */
+ {8, 8, 8, 8, 16}, /* cost of unaligned stores. */
+ 2, 2, 3, /* cost of moving XMM,YMM,ZMM
+ register. */
+ 6, /* cost of moving SSE register to integer. */
/* VGATHERDPD is 23 uops and throughput is 9, VGATHERDPD is 35 uops,
throughput 12. Approx 9 uops do not depend on vector size and every load
is 7 uops. */
COSTS_N_INSNS (1), /* cost of cheap SSE instruction. */
COSTS_N_INSNS (3), /* cost of ADDSS/SD SUBSS/SD insns. */
COSTS_N_INSNS (3), /* cost of MULSS instruction. */
- COSTS_N_INSNS (4), /* cost of MULSD instruction. */
+ COSTS_N_INSNS (3), /* cost of MULSD instruction. */
COSTS_N_INSNS (5), /* cost of FMA SS instruction. */
COSTS_N_INSNS (5), /* cost of FMA SD instruction. */
COSTS_N_INSNS (10), /* cost of DIVSS instruction. */
- /* 9-13 */
+ /* 9-13. */
COSTS_N_INSNS (13), /* cost of DIVSD instruction. */
COSTS_N_INSNS (10), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (15), /* cost of SQRTSD instruction. */
- /* Zen can execute 4 integer operations per cycle. FP operations take 3 cycles
- and it can execute 2 integer additions and 2 multiplications thus
- reassociation may make sense up to with of 6. SPEC2k6 bencharks suggests
+ /* Zen can execute 4 integer operations per cycle. FP operations
+ take 3 cycles and it can execute 2 integer additions and 2
+ multiplications thus reassociation may make sense up to with of 6.
+ SPEC2k6 bencharks suggests
that 4 works better than 6 probably due to register pressure.
Integer vector operations are taken by FP unit and execute 3 vector
plus/minus operations per cycle but only one multiply. This is adjusted
in ix86_reassociation_width. */
4, 4, 3, 6, /* reassoc int, fp, vec_int, vec_fp. */
- znver1_memcpy,
- znver1_memset,
+ znver2_memcpy,
+ znver2_memset,
COSTS_N_INSNS (4), /* cond_taken_branch_cost. */
COSTS_N_INSNS (2), /* cond_not_taken_branch_cost. */
"16", /* Loop alignment. */
static const
struct processor_costs skylake_cost = {
+ {
+ /* Start of register allocator costs. integer->integer move cost is 2. */
+ 6, /* cost for loading QImode using movzbl */
+ {4, 4, 4}, /* cost of loading integer registers
+ in QImode, HImode and SImode.
+ Relative to reg-reg move (2). */
+ {6, 6, 6}, /* cost of storing integer registers */
+ 2, /* cost of reg,reg fld/fst */
+ {6, 6, 8}, /* cost of loading fp registers
+ in SFmode, DFmode and XFmode */
+ {6, 6, 10}, /* cost of storing fp registers
+ in SFmode, DFmode and XFmode */
+ 2, /* cost of moving MMX register */
+ {6, 6}, /* cost of loading MMX registers
+ in SImode and DImode */
+ {6, 6}, /* cost of storing MMX registers
+ in SImode and DImode */
+ 2, 2, 4, /* cost of moving XMM,YMM,ZMM register */
+ {6, 6, 6, 10, 20}, /* cost of loading SSE registers
+ in 32,64,128,256 and 512-bit */
+ {8, 8, 8, 12, 24}, /* cost of storing SSE registers
+ in 32,64,128,256 and 512-bit */
+ 6, 6, /* SSE->integer and integer->SSE moves */
+ /* End of register allocator costs. */
+ },
+
COSTS_N_INSNS (1), /* cost of an add instruction */
COSTS_N_INSNS (1)+1, /* cost of a lea instruction */
COSTS_N_INSNS (1), /* variable shift costs */
COSTS_N_INSNS (0), /* cost of movzx */
8, /* "large" insn */
17, /* MOVE_RATIO */
-
- 6, /* cost for loading QImode using movzbl */
+ 6, /* CLEAR_RATIO */
{4, 4, 4}, /* cost of loading integer registers
in QImode, HImode and SImode.
Relative to reg-reg move (2). */
- {6, 6, 3}, /* cost of storing integer registers */
- 2, /* cost of reg,reg fld/fst */
- {6, 6, 8}, /* cost of loading fp registers
- in SFmode, DFmode and XFmode */
- {6, 6, 10}, /* cost of storing fp registers
- in SFmode, DFmode and XFmode */
- 2, /* cost of moving MMX register */
- {6, 6}, /* cost of loading MMX registers
- in SImode and DImode */
- {6, 6}, /* cost of storing MMX registers
- in SImode and DImode */
- 2, 2, 4, /* cost of moving XMM,YMM,ZMM register */
- {6, 6, 6, 10, 20}, /* cost of loading SSE registers
- in 32,64,128,256 and 512-bit */
+ {6, 6, 6}, /* cost of storing integer registers */
+ {6, 6, 6, 10, 20}, /* cost of loading SSE register
+ in 32bit, 64bit, 128bit, 256bit and 512bit */
+ {8, 8, 8, 12, 24}, /* cost of storing SSE register
+ in 32bit, 64bit, 128bit, 256bit and 512bit */
{6, 6, 6, 10, 20}, /* cost of unaligned loads. */
- {8, 8, 8, 12, 24}, /* cost of storing SSE registers
- in 32,64,128,256 and 512-bit */
{8, 8, 8, 8, 16}, /* cost of unaligned stores. */
- 2, 2, /* SSE->integer and integer->SSE moves */
+ 2, 2, 4, /* cost of moving XMM,YMM,ZMM register */
+ 2, /* cost of moving SSE register to integer. */
20, 8, /* Gather load static, per_elt. */
22, 10, /* Gather store static, per_elt. */
64, /* size of l1 cache. */
{libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
{-1, libcall, false}}}};
const struct processor_costs btver1_cost = {
+ {
+ /* Start of register allocator costs. integer->integer move cost is 2. */
+ 8, /* cost for loading QImode using movzbl */
+ {6, 8, 6}, /* cost of loading integer registers
+ in QImode, HImode and SImode.
+ Relative to reg-reg move (2). */
+ {6, 8, 6}, /* cost of storing integer registers */
+ 4, /* cost of reg,reg fld/fst */
+ {12, 12, 28}, /* cost of loading fp registers
+ in SFmode, DFmode and XFmode */
+ {12, 12, 38}, /* cost of storing fp registers
+ in SFmode, DFmode and XFmode */
+ 4, /* cost of moving MMX register */
+ {10, 10}, /* cost of loading MMX registers
+ in SImode and DImode */
+ {12, 12}, /* cost of storing MMX registers
+ in SImode and DImode */
+ 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
+ {10, 10, 12, 48, 96}, /* cost of loading SSE registers
+ in 32,64,128,256 and 512-bit */
+ {10, 10, 12, 48, 96}, /* cost of storing SSE registers
+ in 32,64,128,256 and 512-bit */
+ 14, 14, /* SSE->integer and integer->SSE moves */
+ /* End of register allocator costs. */
+ },
+
COSTS_N_INSNS (1), /* cost of an add instruction */
COSTS_N_INSNS (2), /* cost of a lea instruction */
COSTS_N_INSNS (1), /* variable shift costs */
COSTS_N_INSNS (1), /* cost of movzx */
8, /* "large" insn */
9, /* MOVE_RATIO */
-
- /* All move costs are relative to integer->integer move times 2 and thus
- they are latency*2. */
- 8, /* cost for loading QImode using movzbl */
+ 6, /* CLEAR_RATIO */
{6, 8, 6}, /* cost of loading integer registers
in QImode, HImode and SImode.
Relative to reg-reg move (2). */
{6, 8, 6}, /* cost of storing integer registers */
- 4, /* cost of reg,reg fld/fst */
- {12, 12, 28}, /* cost of loading fp registers
- in SFmode, DFmode and XFmode */
- {12, 12, 38}, /* cost of storing fp registers
- in SFmode, DFmode and XFmode */
- 4, /* cost of moving MMX register */
- {10, 10}, /* cost of loading MMX registers
- in SImode and DImode */
- {12, 12}, /* cost of storing MMX registers
- in SImode and DImode */
- 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
- {10, 10, 12, 48, 96}, /* cost of loading SSE registers
- in 32,64,128,256 and 512-bit */
+ {10, 10, 12, 48, 96}, /* cost of loading SSE register
+ in 32bit, 64bit, 128bit, 256bit and 512bit */
+ {10, 10, 12, 48, 96}, /* cost of storing SSE register
+ in 32bit, 64bit, 128bit, 256bit and 512bit */
{10, 10, 12, 48, 96}, /* cost of unaligned loads. */
- {10, 10, 12, 48, 96}, /* cost of storing SSE registers
- in 32,64,128,256 and 512-bit */
{10, 10, 12, 48, 96}, /* cost of unaligned stores. */
- 14, 14, /* SSE->integer and integer->SSE moves */
+ 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
+ 14, /* cost of moving SSE register to integer. */
10, 10, /* Gather load static, per_elt. */
10, 10, /* Gather store static, per_elt. */
32, /* size of l1 cache. */
{libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
{-1, libcall, false}}}};
const struct processor_costs btver2_cost = {
+ {
+ /* Start of register allocator costs. integer->integer move cost is 2. */
+ 8, /* cost for loading QImode using movzbl */
+ {8, 8, 6}, /* cost of loading integer registers
+ in QImode, HImode and SImode.
+ Relative to reg-reg move (2). */
+ {8, 8, 6}, /* cost of storing integer registers */
+ 4, /* cost of reg,reg fld/fst */
+ {12, 12, 28}, /* cost of loading fp registers
+ in SFmode, DFmode and XFmode */
+ {12, 12, 38}, /* cost of storing fp registers
+ in SFmode, DFmode and XFmode */
+ 4, /* cost of moving MMX register */
+ {10, 10}, /* cost of loading MMX registers
+ in SImode and DImode */
+ {12, 12}, /* cost of storing MMX registers
+ in SImode and DImode */
+ 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
+ {10, 10, 12, 48, 96}, /* cost of loading SSE registers
+ in 32,64,128,256 and 512-bit */
+ {10, 10, 12, 48, 96}, /* cost of storing SSE registers
+ in 32,64,128,256 and 512-bit */
+ 14, 14, /* SSE->integer and integer->SSE moves */
+ /* End of register allocator costs. */
+ },
+
COSTS_N_INSNS (1), /* cost of an add instruction */
COSTS_N_INSNS (2), /* cost of a lea instruction */
COSTS_N_INSNS (1), /* variable shift costs */
COSTS_N_INSNS (1), /* cost of movzx */
8, /* "large" insn */
9, /* MOVE_RATIO */
-
- /* All move costs are relative to integer->integer move times 2 and thus
- they are latency*2. */
- 8, /* cost for loading QImode using movzbl */
+ 6, /* CLEAR_RATIO */
{8, 8, 6}, /* cost of loading integer registers
in QImode, HImode and SImode.
Relative to reg-reg move (2). */
{8, 8, 6}, /* cost of storing integer registers */
- 4, /* cost of reg,reg fld/fst */
- {12, 12, 28}, /* cost of loading fp registers
- in SFmode, DFmode and XFmode */
- {12, 12, 38}, /* cost of storing fp registers
- in SFmode, DFmode and XFmode */
- 4, /* cost of moving MMX register */
- {10, 10}, /* cost of loading MMX registers
- in SImode and DImode */
- {12, 12}, /* cost of storing MMX registers
- in SImode and DImode */
- 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
- {10, 10, 12, 48, 96}, /* cost of loading SSE registers
- in 32,64,128,256 and 512-bit */
+ {10, 10, 12, 48, 96}, /* cost of loading SSE register
+ in 32bit, 64bit, 128bit, 256bit and 512bit */
+ {10, 10, 12, 48, 96}, /* cost of storing SSE register
+ in 32bit, 64bit, 128bit, 256bit and 512bit */
{10, 10, 12, 48, 96}, /* cost of unaligned loads. */
- {10, 10, 12, 48, 96}, /* cost of storing SSE registers
- in 32,64,128,256 and 512-bit */
{10, 10, 12, 48, 96}, /* cost of unaligned stores. */
- 14, 14, /* SSE->integer and integer->SSE moves */
+ 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
+ 14, /* cost of moving SSE register to integer. */
10, 10, /* Gather load static, per_elt. */
10, 10, /* Gather store static, per_elt. */
32, /* size of l1 cache. */
static const
struct processor_costs pentium4_cost = {
- COSTS_N_INSNS (1), /* cost of an add instruction */
- COSTS_N_INSNS (3), /* cost of a lea instruction */
- COSTS_N_INSNS (4), /* variable shift costs */
- COSTS_N_INSNS (4), /* constant shift costs */
- {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
- COSTS_N_INSNS (15), /* HI */
- COSTS_N_INSNS (15), /* SI */
- COSTS_N_INSNS (15), /* DI */
- COSTS_N_INSNS (15)}, /* other */
- 0, /* cost of multiply per each bit set */
- {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
- COSTS_N_INSNS (56), /* HI */
- COSTS_N_INSNS (56), /* SI */
- COSTS_N_INSNS (56), /* DI */
- COSTS_N_INSNS (56)}, /* other */
- COSTS_N_INSNS (1), /* cost of movsx */
- COSTS_N_INSNS (1), /* cost of movzx */
- 16, /* "large" insn */
- 6, /* MOVE_RATIO */
-
- /* All move costs are relative to integer->integer move times 2 and thus
- they are latency*2. */
+ {
+ /* Start of register allocator costs. integer->integer move cost is 2. */
5, /* cost for loading QImode using movzbl */
{4, 5, 4}, /* cost of loading integer registers
in QImode, HImode and SImode.
12, 24, 48, /* cost of moving XMM,YMM,ZMM register */
{16, 16, 16, 32, 64}, /* cost of loading SSE registers
in 32,64,128,256 and 512-bit */
+ {16, 16, 16, 32, 64}, /* cost of storing SSE registers
+ in 32,64,128,256 and 512-bit */
+ 20, 12, /* SSE->integer and integer->SSE moves */
+ /* End of register allocator costs. */
+ },
+
+ COSTS_N_INSNS (1), /* cost of an add instruction */
+ COSTS_N_INSNS (3), /* cost of a lea instruction */
+ COSTS_N_INSNS (4), /* variable shift costs */
+ COSTS_N_INSNS (4), /* constant shift costs */
+ {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
+ COSTS_N_INSNS (15), /* HI */
+ COSTS_N_INSNS (15), /* SI */
+ COSTS_N_INSNS (15), /* DI */
+ COSTS_N_INSNS (15)}, /* other */
+ 0, /* cost of multiply per each bit set */
+ {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
+ COSTS_N_INSNS (56), /* HI */
+ COSTS_N_INSNS (56), /* SI */
+ COSTS_N_INSNS (56), /* DI */
+ COSTS_N_INSNS (56)}, /* other */
+ COSTS_N_INSNS (1), /* cost of movsx */
+ COSTS_N_INSNS (1), /* cost of movzx */
+ 16, /* "large" insn */
+ 6, /* MOVE_RATIO */
+ 6, /* CLEAR_RATIO */
+ {4, 5, 4}, /* cost of loading integer registers
+ in QImode, HImode and SImode.
+ Relative to reg-reg move (2). */
+ {2, 3, 2}, /* cost of storing integer registers */
+ {16, 16, 16, 32, 64}, /* cost of loading SSE register
+ in 32bit, 64bit, 128bit, 256bit and 512bit */
+ {16, 16, 16, 32, 64}, /* cost of storing SSE register
+ in 32bit, 64bit, 128bit, 256bit and 512bit */
{32, 32, 32, 64, 128}, /* cost of unaligned loads. */
- {16, 16, 16, 32, 64}, /* cost of storing SSE registers
- in 32,64,128,256 and 512-bit */
{32, 32, 32, 64, 128}, /* cost of unaligned stores. */
- 20, 12, /* SSE->integer and integer->SSE moves */
+ 12, 24, 48, /* cost of moving XMM,YMM,ZMM register */
+ 20, /* cost of moving SSE register to integer. */
16, 16, /* Gather load static, per_elt. */
16, 16, /* Gather store static, per_elt. */
8, /* size of l1 cache. */
static const
struct processor_costs nocona_cost = {
+ {
+ /* Start of register allocator costs. integer->integer move cost is 2. */
+ 4, /* cost for loading QImode using movzbl */
+ {4, 4, 4}, /* cost of loading integer registers
+ in QImode, HImode and SImode.
+ Relative to reg-reg move (2). */
+ {4, 4, 4}, /* cost of storing integer registers */
+ 12, /* cost of reg,reg fld/fst */
+ {14, 14, 14}, /* cost of loading fp registers
+ in SFmode, DFmode and XFmode */
+ {14, 14, 14}, /* cost of storing fp registers
+ in SFmode, DFmode and XFmode */
+ 14, /* cost of moving MMX register */
+ {12, 12}, /* cost of loading MMX registers
+ in SImode and DImode */
+ {12, 12}, /* cost of storing MMX registers
+ in SImode and DImode */
+ 6, 12, 24, /* cost of moving XMM,YMM,ZMM register */
+ {12, 12, 12, 24, 48}, /* cost of loading SSE registers
+ in 32,64,128,256 and 512-bit */
+ {12, 12, 12, 24, 48}, /* cost of storing SSE registers
+ in 32,64,128,256 and 512-bit */
+ 20, 12, /* SSE->integer and integer->SSE moves */
+ /* End of register allocator costs. */
+ },
+
COSTS_N_INSNS (1), /* cost of an add instruction */
COSTS_N_INSNS (1), /* cost of a lea instruction */
COSTS_N_INSNS (1), /* variable shift costs */
COSTS_N_INSNS (1), /* cost of movzx */
16, /* "large" insn */
17, /* MOVE_RATIO */
-
- /* All move costs are relative to integer->integer move times 2 and thus
- they are latency*2. */
- 4, /* cost for loading QImode using movzbl */
+ 6, /* CLEAR_RATIO */
{4, 4, 4}, /* cost of loading integer registers
in QImode, HImode and SImode.
Relative to reg-reg move (2). */
{4, 4, 4}, /* cost of storing integer registers */
- 12, /* cost of reg,reg fld/fst */
- {14, 14, 14}, /* cost of loading fp registers
- in SFmode, DFmode and XFmode */
- {14, 14, 14}, /* cost of storing fp registers
- in SFmode, DFmode and XFmode */
- 14, /* cost of moving MMX register */
- {12, 12}, /* cost of loading MMX registers
- in SImode and DImode */
- {12, 12}, /* cost of storing MMX registers
- in SImode and DImode */
- 6, 12, 24, /* cost of moving XMM,YMM,ZMM register */
- {12, 12, 12, 24, 48}, /* cost of loading SSE registers
- in 32,64,128,256 and 512-bit */
+ {12, 12, 12, 24, 48}, /* cost of loading SSE register
+ in 32bit, 64bit, 128bit, 256bit and 512bit */
+ {12, 12, 12, 24, 48}, /* cost of storing SSE register
+ in 32bit, 64bit, 128bit, 256bit and 512bit */
{24, 24, 24, 48, 96}, /* cost of unaligned loads. */
- {12, 12, 12, 24, 48}, /* cost of storing SSE registers
- in 32,64,128,256 and 512-bit */
{24, 24, 24, 48, 96}, /* cost of unaligned stores. */
- 20, 12, /* SSE->integer and integer->SSE moves */
+ 6, 12, 24, /* cost of moving XMM,YMM,ZMM register */
+ 20, /* cost of moving SSE register to integer. */
12, 12, /* Gather load static, per_elt. */
12, 12, /* Gather store static, per_elt. */
8, /* size of l1 cache. */
{8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
static const
struct processor_costs atom_cost = {
+ {
+ /* Start of register allocator costs. integer->integer move cost is 2. */
+ 6, /* cost for loading QImode using movzbl */
+ {6, 6, 6}, /* cost of loading integer registers
+ in QImode, HImode and SImode.
+ Relative to reg-reg move (2). */
+ {6, 6, 6}, /* cost of storing integer registers */
+ 4, /* cost of reg,reg fld/fst */
+ {6, 6, 18}, /* cost of loading fp registers
+ in SFmode, DFmode and XFmode */
+ {14, 14, 24}, /* cost of storing fp registers
+ in SFmode, DFmode and XFmode */
+ 2, /* cost of moving MMX register */
+ {8, 8}, /* cost of loading MMX registers
+ in SImode and DImode */
+ {10, 10}, /* cost of storing MMX registers
+ in SImode and DImode */
+ 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
+ {8, 8, 8, 16, 32}, /* cost of loading SSE registers
+ in 32,64,128,256 and 512-bit */
+ {8, 8, 8, 16, 32}, /* cost of storing SSE registers
+ in 32,64,128,256 and 512-bit */
+ 8, 6, /* SSE->integer and integer->SSE moves */
+ /* End of register allocator costs. */
+ },
+
COSTS_N_INSNS (1), /* cost of an add instruction */
COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
COSTS_N_INSNS (1), /* variable shift costs */
COSTS_N_INSNS (1), /* cost of movzx */
8, /* "large" insn */
17, /* MOVE_RATIO */
-
- /* All move costs are relative to integer->integer move times 2 and thus
- they are latency*2. */
- 6, /* cost for loading QImode using movzbl */
+ 6, /* CLEAR_RATIO */
{6, 6, 6}, /* cost of loading integer registers
in QImode, HImode and SImode.
Relative to reg-reg move (2). */
{6, 6, 6}, /* cost of storing integer registers */
- 4, /* cost of reg,reg fld/fst */
- {6, 6, 18}, /* cost of loading fp registers
- in SFmode, DFmode and XFmode */
- {14, 14, 24}, /* cost of storing fp registers
- in SFmode, DFmode and XFmode */
- 2, /* cost of moving MMX register */
- {8, 8}, /* cost of loading MMX registers
- in SImode and DImode */
- {10, 10}, /* cost of storing MMX registers
- in SImode and DImode */
- 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
- {8, 8, 8, 16, 32}, /* cost of loading SSE registers
- in 32,64,128,256 and 512-bit */
+ {8, 8, 8, 16, 32}, /* cost of loading SSE register
+ in 32bit, 64bit, 128bit, 256bit and 512bit */
+ {8, 8, 8, 16, 32}, /* cost of storing SSE register
+ in 32bit, 64bit, 128bit, 256bit and 512bit */
{16, 16, 16, 32, 64}, /* cost of unaligned loads. */
- {8, 8, 8, 16, 32}, /* cost of storing SSE registers
- in 32,64,128,256 and 512-bit */
{16, 16, 16, 32, 64}, /* cost of unaligned stores. */
- 8, 6, /* SSE->integer and integer->SSE moves */
+ 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
+ 8, /* cost of moving SSE register to integer. */
8, 8, /* Gather load static, per_elt. */
8, 8, /* Gather store static, per_elt. */
32, /* size of l1 cache. */
{8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
static const
struct processor_costs slm_cost = {
+ {
+ /* Start of register allocator costs. integer->integer move cost is 2. */
+ 8, /* cost for loading QImode using movzbl */
+ {8, 8, 8}, /* cost of loading integer registers
+ in QImode, HImode and SImode.
+ Relative to reg-reg move (2). */
+ {6, 6, 6}, /* cost of storing integer registers */
+ 2, /* cost of reg,reg fld/fst */
+ {8, 8, 18}, /* cost of loading fp registers
+ in SFmode, DFmode and XFmode */
+ {6, 6, 18}, /* cost of storing fp registers
+ in SFmode, DFmode and XFmode */
+ 2, /* cost of moving MMX register */
+ {8, 8}, /* cost of loading MMX registers
+ in SImode and DImode */
+ {6, 6}, /* cost of storing MMX registers
+ in SImode and DImode */
+ 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
+ {8, 8, 8, 16, 32}, /* cost of loading SSE registers
+ in 32,64,128,256 and 512-bit */
+ {8, 8, 8, 16, 32}, /* cost of storing SSE registers
+ in 32,64,128,256 and 512-bit */
+ 8, 6, /* SSE->integer and integer->SSE moves */
+ /* End of register allocator costs. */
+ },
+
COSTS_N_INSNS (1), /* cost of an add instruction */
COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
COSTS_N_INSNS (1), /* variable shift costs */
COSTS_N_INSNS (1), /* cost of movzx */
8, /* "large" insn */
17, /* MOVE_RATIO */
-
- /* All move costs are relative to integer->integer move times 2 and thus
- they are latency*2. */
- 8, /* cost for loading QImode using movzbl */
+ 6, /* CLEAR_RATIO */
{8, 8, 8}, /* cost of loading integer registers
in QImode, HImode and SImode.
Relative to reg-reg move (2). */
{6, 6, 6}, /* cost of storing integer registers */
- 2, /* cost of reg,reg fld/fst */
- {8, 8, 18}, /* cost of loading fp registers
- in SFmode, DFmode and XFmode */
- {6, 6, 18}, /* cost of storing fp registers
- in SFmode, DFmode and XFmode */
- 2, /* cost of moving MMX register */
- {8, 8}, /* cost of loading MMX registers
- in SImode and DImode */
- {6, 6}, /* cost of storing MMX registers
- in SImode and DImode */
- 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
- {8, 8, 8, 16, 32}, /* cost of loading SSE registers
- in 32,64,128,256 and 512-bit */
+ {8, 8, 8, 16, 32}, /* cost of loading SSE register
+ in 32bit, 64bit, 128bit, 256bit and 512bit */
+ {8, 8, 8, 16, 32}, /* cost of storing SSE register
+ in SImode, DImode and TImode. */
{16, 16, 16, 32, 64}, /* cost of unaligned loads. */
- {8, 8, 8, 16, 32}, /* cost of storing SSE registers
- in 32,64,128,256 and 512-bit */
{16, 16, 16, 32, 64}, /* cost of unaligned stores. */
- 8, 6, /* SSE->integer and integer->SSE moves */
+ 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
+ 8, /* cost of moving SSE register to integer. */
8, 8, /* Gather load static, per_elt. */
8, 8, /* Gather store static, per_elt. */
32, /* size of l1 cache. */
{8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
static const
struct processor_costs intel_cost = {
+ {
+ /* Start of register allocator costs. integer->integer move cost is 2. */
+ 6, /* cost for loading QImode using movzbl */
+ {4, 4, 4}, /* cost of loading integer registers
+ in QImode, HImode and SImode.
+ Relative to reg-reg move (2). */
+ {6, 6, 6}, /* cost of storing integer registers */
+ 2, /* cost of reg,reg fld/fst */
+ {6, 6, 8}, /* cost of loading fp registers
+ in SFmode, DFmode and XFmode */
+ {6, 6, 10}, /* cost of storing fp registers
+ in SFmode, DFmode and XFmode */
+ 2, /* cost of moving MMX register */
+ {6, 6}, /* cost of loading MMX registers
+ in SImode and DImode */
+ {6, 6}, /* cost of storing MMX registers
+ in SImode and DImode */
+ 2, 2, 2, /* cost of moving XMM,YMM,ZMM register */
+ {6, 6, 6, 6, 6}, /* cost of loading SSE registers
+ in 32,64,128,256 and 512-bit */
+ {6, 6, 6, 6, 6}, /* cost of storing SSE registers
+ in 32,64,128,256 and 512-bit */
+ 4, 4, /* SSE->integer and integer->SSE moves */
+ /* End of register allocator costs. */
+ },
+
COSTS_N_INSNS (1), /* cost of an add instruction */
COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
COSTS_N_INSNS (1), /* variable shift costs */
COSTS_N_INSNS (1), /* cost of movzx */
8, /* "large" insn */
17, /* MOVE_RATIO */
-
- /* All move costs are relative to integer->integer move times 2 and thus
- they are latency*2. */
- 6, /* cost for loading QImode using movzbl */
+ 6, /* CLEAR_RATIO */
{4, 4, 4}, /* cost of loading integer registers
in QImode, HImode and SImode.
Relative to reg-reg move (2). */
{6, 6, 6}, /* cost of storing integer registers */
- 2, /* cost of reg,reg fld/fst */
- {6, 6, 8}, /* cost of loading fp registers
- in SFmode, DFmode and XFmode */
- {6, 6, 10}, /* cost of storing fp registers
- in SFmode, DFmode and XFmode */
- 2, /* cost of moving MMX register */
- {6, 6}, /* cost of loading MMX registers
- in SImode and DImode */
- {6, 6}, /* cost of storing MMX registers
- in SImode and DImode */
- 2, 2, 2, /* cost of moving XMM,YMM,ZMM register */
- {6, 6, 6, 6, 6}, /* cost of loading SSE registers
- in 32,64,128,256 and 512-bit */
+ {6, 6, 6, 6, 6}, /* cost of loading SSE register
+ in 32bit, 64bit, 128bit, 256bit and 512bit */
+ {6, 6, 6, 6, 6}, /* cost of storing SSE register
+ in 32bit, 64bit, 128bit, 256bit and 512bit */
{10, 10, 10, 10, 10}, /* cost of unaligned loads. */
- {6, 6, 6, 6, 6}, /* cost of storing SSE registers
- in 32,64,128,256 and 512-bit */
{10, 10, 10, 10, 10}, /* cost of unaligned loads. */
- 4, 4, /* SSE->integer and integer->SSE moves */
+ 2, 2, 2, /* cost of moving XMM,YMM,ZMM register */
+ 4, /* cost of moving SSE register to integer. */
6, 6, /* Gather load static, per_elt. */
6, 6, /* Gather store static, per_elt. */
32, /* size of l1 cache. */
COSTS_N_INSNS (8), /* cost of FCHS instruction. */
COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
- COSTS_N_INSNS (8), /* cost of cheap SSE instruction. */
+ COSTS_N_INSNS (1), /* cost of cheap SSE instruction. */
COSTS_N_INSNS (8), /* cost of ADDSS/SD SUBSS/SD insns. */
COSTS_N_INSNS (8), /* cost of MULSS instruction. */
COSTS_N_INSNS (8), /* cost of MULSD instruction. */
{-1, libcall, false}}}};
static const
struct processor_costs generic_cost = {
+ {
+ /* Start of register allocator costs. integer->integer move cost is 2. */
+ 6, /* cost for loading QImode using movzbl */
+ {6, 6, 6}, /* cost of loading integer registers
+ in QImode, HImode and SImode.
+ Relative to reg-reg move (2). */
+ {6, 6, 6}, /* cost of storing integer registers */
+ 4, /* cost of reg,reg fld/fst */
+ {6, 6, 12}, /* cost of loading fp registers
+ in SFmode, DFmode and XFmode */
+ {6, 6, 12}, /* cost of storing fp registers
+ in SFmode, DFmode and XFmode */
+ 2, /* cost of moving MMX register */
+ {6, 6}, /* cost of loading MMX registers
+ in SImode and DImode */
+ {6, 6}, /* cost of storing MMX registers
+ in SImode and DImode */
+ 2, 3, 4, /* cost of moving XMM,YMM,ZMM register */
+ {6, 6, 6, 10, 15}, /* cost of loading SSE registers
+ in 32,64,128,256 and 512-bit */
+ {6, 6, 6, 10, 15}, /* cost of storing SSE registers
+ in 32,64,128,256 and 512-bit */
+ 6, 6, /* SSE->integer and integer->SSE moves */
+ /* End of register allocator costs. */
+ },
+
COSTS_N_INSNS (1), /* cost of an add instruction */
/* Setting cost to 2 makes our current implementation of synth_mult result in
use of unnecessary temporary registers causing regression on several
COSTS_N_INSNS (1), /* cost of movzx */
8, /* "large" insn */
17, /* MOVE_RATIO */
-
- /* All move costs are relative to integer->integer move times 2 and thus
- they are latency*2. */
- 6, /* cost for loading QImode using movzbl */
+ 6, /* CLEAR_RATIO */
{6, 6, 6}, /* cost of loading integer registers
in QImode, HImode and SImode.
Relative to reg-reg move (2). */
{6, 6, 6}, /* cost of storing integer registers */
- 4, /* cost of reg,reg fld/fst */
- {6, 6, 12}, /* cost of loading fp registers
- in SFmode, DFmode and XFmode */
- {6, 6, 12}, /* cost of storing fp registers
- in SFmode, DFmode and XFmode */
- 2, /* cost of moving MMX register */
- {6, 6}, /* cost of loading MMX registers
- in SImode and DImode */
- {6, 6}, /* cost of storing MMX registers
- in SImode and DImode */
- 2, 3, 4, /* cost of moving XMM,YMM,ZMM register */
- {6, 6, 6, 10, 15}, /* cost of loading SSE registers
- in 32,64,128,256 and 512-bit */
+ {6, 6, 6, 10, 15}, /* cost of loading SSE register
+ in 32bit, 64bit, 128bit, 256bit and 512bit */
+ {6, 6, 6, 10, 15}, /* cost of storing SSE register
+ in 32bit, 64bit, 128bit, 256bit and 512bit */
{6, 6, 6, 10, 15}, /* cost of unaligned loads. */
- {6, 6, 6, 10, 15}, /* cost of storing SSE registers
- in 32,64,128,256 and 512-bit */
{6, 6, 6, 10, 15}, /* cost of unaligned storess. */
- 6, 6, /* SSE->integer and integer->SSE moves */
+ 2, 3, 4, /* cost of moving XMM,YMM,ZMM register */
+ 6, /* cost of moving SSE register to integer. */
18, 6, /* Gather load static, per_elt. */
18, 6, /* Gather store static, per_elt. */
32, /* size of l1 cache. */
static const
struct processor_costs core_cost = {
+ {
+ /* Start of register allocator costs. integer->integer move cost is 2. */
+ 6, /* cost for loading QImode using movzbl */
+ {4, 4, 4}, /* cost of loading integer registers
+ in QImode, HImode and SImode.
+ Relative to reg-reg move (2). */
+ {6, 6, 6}, /* cost of storing integer registers */
+ 2, /* cost of reg,reg fld/fst */
+ {6, 6, 8}, /* cost of loading fp registers
+ in SFmode, DFmode and XFmode */
+ {6, 6, 10}, /* cost of storing fp registers
+ in SFmode, DFmode and XFmode */
+ 2, /* cost of moving MMX register */
+ {6, 6}, /* cost of loading MMX registers
+ in SImode and DImode */
+ {6, 6}, /* cost of storing MMX registers
+ in SImode and DImode */
+ 2, 2, 4, /* cost of moving XMM,YMM,ZMM register */
+ {6, 6, 6, 6, 12}, /* cost of loading SSE registers
+ in 32,64,128,256 and 512-bit */
+ {6, 6, 6, 6, 12}, /* cost of storing SSE registers
+ in 32,64,128,256 and 512-bit */
+ 6, 6, /* SSE->integer and integer->SSE moves */
+ /* End of register allocator costs. */
+ },
+
COSTS_N_INSNS (1), /* cost of an add instruction */
/* On all chips taken into consideration lea is 2 cycles and more. With
this cost however our current implementation of synth_mult results in
COSTS_N_INSNS (1), /* cost of movzx */
8, /* "large" insn */
17, /* MOVE_RATIO */
-
- /* All move costs are relative to integer->integer move times 2 and thus
- they are latency*2. */
- 6, /* cost for loading QImode using movzbl */
+ 6, /* CLEAR_RATIO */
{4, 4, 4}, /* cost of loading integer registers
in QImode, HImode and SImode.
Relative to reg-reg move (2). */
{6, 6, 6}, /* cost of storing integer registers */
- 2, /* cost of reg,reg fld/fst */
- {6, 6, 8}, /* cost of loading fp registers
- in SFmode, DFmode and XFmode */
- {6, 6, 10}, /* cost of storing fp registers
- in SFmode, DFmode and XFmode */
- 2, /* cost of moving MMX register */
- {6, 6}, /* cost of loading MMX registers
- in SImode and DImode */
- {6, 6}, /* cost of storing MMX registers
- in SImode and DImode */
- 2, 2, 4, /* cost of moving XMM,YMM,ZMM register */
- {6, 6, 6, 6, 12}, /* cost of loading SSE registers
- in 32,64,128,256 and 512-bit */
+ {6, 6, 6, 6, 12}, /* cost of loading SSE register
+ in 32bit, 64bit, 128bit, 256bit and 512bit */
+ {6, 6, 6, 6, 12}, /* cost of storing SSE register
+ in 32bit, 64bit, 128bit, 256bit and 512bit */
{6, 6, 6, 6, 12}, /* cost of unaligned loads. */
- {6, 6, 6, 6, 12}, /* cost of storing SSE registers
- in 32,64,128,256 and 512-bit */
{6, 6, 6, 6, 12}, /* cost of unaligned stores. */
- 2, 2, /* SSE->integer and integer->SSE moves */
+ 2, 2, 4, /* cost of moving XMM,YMM,ZMM register */
+ 2, /* cost of moving SSE register to integer. */
/* VGATHERDPD is 7 uops, rec throughput 5, while VGATHERDPD is 9 uops,
rec. throughput 6.
So 5 uops statically and one uops per load. */