+/* Costs of operations of individual x86 CPUs.
+ Copyright (C) 1988-2017 Free Software Foundation, Inc.
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
/* Processor costs (relative to an add) */
/* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
#define COSTS_N_BYTES(N) ((N) * 2)
COSTS_N_BYTES (3), /* cost of movzx */
0, /* "large" insn */
2, /* MOVE_RATIO */
+
+ /* All move costs are relative to integer->integer move times 2. */
2, /* cost for loading QImode using movzbl */
{2, 2, 2}, /* cost of loading integer registers
in QImode, HImode and SImode.
in SImode and DImode */
{3, 3}, /* cost of storing MMX registers
in SImode and DImode */
- 3, /* cost of moving SSE register */
- {3, 3, 3}, /* cost of loading SSE registers
- in SImode, DImode and TImode */
- {3, 3, 3}, /* cost of storing SSE registers
- in SImode, DImode and TImode */
- 3, /* MMX or SSE register to integer */
+ 3, 3, 3, /* cost of moving XMM,YMM,ZMM register */
+ {3, 3, 3, 3, 3}, /* cost of loading SSE registers
+ in 32,64,128,256 and 512-bit */
+ {3, 3, 3, 3, 3}, /* cost of unaligned SSE load
+ in 128bit, 256bit and 512bit */
+ {3, 3, 3, 3, 3}, /* cost of storing SSE registers
+ in 32,64,128,256 and 512-bit */
+ {3, 3, 3, 3, 3}, /* cost of unaligned SSE store
+ in 128bit, 256bit and 512bit */
+ 3, 3, /* SSE->integer and integer->SSE moves */
0, /* size of l1 cache */
0, /* size of l2 cache */
0, /* size of prefetch block */
COSTS_N_INSNS (2), /* cost of movzx */
15, /* "large" insn */
3, /* MOVE_RATIO */
+
+ /* All move costs are relative to integer->integer move times 2 and thus
+ they are latency*2. */
4, /* cost for loading QImode using movzbl */
{2, 4, 2}, /* cost of loading integer registers
in QImode, HImode and SImode.
in SImode and DImode */
{4, 8}, /* cost of storing MMX registers
in SImode and DImode */
- 2, /* cost of moving SSE register */
- {4, 8, 16}, /* cost of loading SSE registers
- in SImode, DImode and TImode */
- {4, 8, 16}, /* cost of storing SSE registers
- in SImode, DImode and TImode */
- 3, /* MMX or SSE register to integer */
+ 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
+ {4, 8, 16, 32, 64}, /* cost of loading SSE registers
+ in 32,64,128,256 and 512-bit */
+ {4, 8, 16, 32, 64}, /* cost of unaligned loads. */
+ {4, 8, 16, 32, 64}, /* cost of storing SSE registers
+ in 32,64,128,256 and 512-bit */
+ {4, 8, 16, 32, 64}, /* cost of unaligned stores. */
+ 3, 3, /* SSE->integer and integer->SSE moves */
0, /* size of l1 cache */
0, /* size of l2 cache */
0, /* size of prefetch block */
COSTS_N_INSNS (2), /* cost of movzx */
15, /* "large" insn */
3, /* MOVE_RATIO */
+
+ /* All move costs are relative to integer->integer move times 2 and thus
+ they are latency*2. */
4, /* cost for loading QImode using movzbl */
{2, 4, 2}, /* cost of loading integer registers
in QImode, HImode and SImode.
in SImode and DImode */
{4, 8}, /* cost of storing MMX registers
in SImode and DImode */
- 2, /* cost of moving SSE register */
- {4, 8, 16}, /* cost of loading SSE registers
- in SImode, DImode and TImode */
- {4, 8, 16}, /* cost of storing SSE registers
- in SImode, DImode and TImode */
- 3, /* MMX or SSE register to integer */
+ 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
+ {4, 8, 16, 32, 64}, /* cost of loading SSE registers
+ in 32,64,128,256 and 512-bit */
+ {4, 8, 16, 32, 64}, /* cost of unaligned loads. */
+ {4, 8, 16, 32, 64}, /* cost of storing SSE registers
+ in 32,64,128,256 and 512-bit */
+ {4, 8, 16, 32, 64}, /* cost of unaligned stores. */
+ 3, 3, /* SSE->integer and integer->SSE moves */
4, /* size of l1 cache. 486 has 8kB cache
shared for code and data, so 4kB is
not really precise. */
COSTS_N_INSNS (2), /* cost of movzx */
8, /* "large" insn */
6, /* MOVE_RATIO */
+
+ /* All move costs are relative to integer->integer move times 2 and thus
+ they are latency*2. */
6, /* cost for loading QImode using movzbl */
{2, 4, 2}, /* cost of loading integer registers
in QImode, HImode and SImode.
in SImode and DImode */
{8, 8}, /* cost of storing MMX registers
in SImode and DImode */
- 2, /* cost of moving SSE register */
- {4, 8, 16}, /* cost of loading SSE registers
- in SImode, DImode and TImode */
- {4, 8, 16}, /* cost of storing SSE registers
- in SImode, DImode and TImode */
- 3, /* MMX or SSE register to integer */
+ 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
+ {4, 8, 16, 32, 64}, /* cost of loading SSE registers
+ in 32,64,128,256 and 512-bit */
+ {4, 8, 16, 32, 64}, /* cost of unaligned loads. */
+ {4, 8, 16, 32, 64}, /* cost of storing SSE registers
+ in 32,64,128,256 and 512-bit */
+ {4, 8, 16, 32, 64}, /* cost of unaligned stores. */
+ 3, 3, /* SSE->integer and integer->SSE moves */
8, /* size of l1 cache. */
8, /* size of l2 cache */
0, /* size of prefetch block */
COSTS_N_INSNS (2), /* cost of movzx */
8, /* "large" insn */
17, /* MOVE_RATIO */
+
+ /* All move costs are relative to integer->integer move times 2 and thus
+ they are latency*2. */
6, /* cost for loading QImode using movzbl */
{2, 4, 2}, /* cost of loading integer registers
in QImode, HImode and SImode.
in SImode and DImode */
{8, 8}, /* cost of storing MMX registers
in SImode and DImode */
- 2, /* cost of moving SSE register */
- {4, 8, 16}, /* cost of loading SSE registers
- in SImode, DImode and TImode */
- {4, 8, 16}, /* cost of storing SSE registers
- in SImode, DImode and TImode */
- 3, /* MMX or SSE register to integer */
+ 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
+ {4, 8, 16, 32, 64}, /* cost of loading SSE registers
+ in 32,64,128,256 and 512-bit */
+ {4, 8, 16, 32, 64}, /* cost of unaligned loads. */
+ {4, 8, 16, 32, 64}, /* cost of storing SSE registers
+ in 32,64,128,256 and 512-bit */
+ {4, 8, 16, 32, 64}, /* cost of unaligned stores. */
+ 3, 3, /* SSE->integer and integer->SSE moves */
8, /* size of l1 cache. */
8, /* size of l2 cache */
0, /* size of prefetch block */
COSTS_N_INSNS (1), /* cost of movzx */
8, /* "large" insn */
6, /* MOVE_RATIO */
+
+ /* All move costs are relative to integer->integer move times 2 and thus
+ they are latency*2. */
2, /* cost for loading QImode using movzbl */
{4, 4, 4}, /* cost of loading integer registers
in QImode, HImode and SImode.
in SImode and DImode */
{2, 2}, /* cost of storing MMX registers
in SImode and DImode */
- 2, /* cost of moving SSE register */
- {2, 2, 8}, /* cost of loading SSE registers
- in SImode, DImode and TImode */
- {2, 2, 8}, /* cost of storing SSE registers
- in SImode, DImode and TImode */
- 3, /* MMX or SSE register to integer */
+ 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
+ {4, 8, 16, 32, 64}, /* cost of loading SSE registers
+ in 32,64,128,256 and 512-bit */
+ {4, 8, 16, 32, 64}, /* cost of unaligned loads. */
+ {4, 8, 16, 32, 64}, /* cost of storing SSE registers
+ in 32,64,128,256 and 512-bit */
+ {4, 8, 16, 32, 64}, /* cost of unaligned stores. */
+ 3, 3, /* SSE->integer and integer->SSE moves */
8, /* size of l1 cache. */
256, /* size of l2 cache */
32, /* size of prefetch block */
COSTS_N_INSNS (1), /* cost of movzx */
8, /* "large" insn */
4, /* MOVE_RATIO */
- 1, /* cost for loading QImode using movzbl */
- {1, 1, 1}, /* cost of loading integer registers
+
+ /* All move costs are relative to integer->integer move times 2 and thus
+ they are latency*2. */
+ 2, /* cost for loading QImode using movzbl */
+ {2, 2, 2}, /* cost of loading integer registers
in QImode, HImode and SImode.
Relative to reg-reg move (2). */
- {1, 1, 1}, /* cost of storing integer registers */
- 1, /* cost of reg,reg fld/fst */
- {1, 1, 1}, /* cost of loading fp registers
+ {2, 2, 2}, /* cost of storing integer registers */
+ 2, /* cost of reg,reg fld/fst */
+ {2, 2, 2}, /* cost of loading fp registers
in SFmode, DFmode and XFmode */
{4, 6, 6}, /* cost of storing fp registers
in SFmode, DFmode and XFmode */
in SImode and DImode */
{2, 2}, /* cost of storing MMX registers
in SImode and DImode */
- 2, /* cost of moving SSE register */
- {2, 2, 8}, /* cost of loading SSE registers
- in SImode, DImode and TImode */
- {2, 2, 8}, /* cost of storing SSE registers
- in SImode, DImode and TImode */
- 3, /* MMX or SSE register to integer */
+ 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
+ {2, 2, 8, 16, 32}, /* cost of loading SSE registers
+ in 32,64,128,256 and 512-bit */
+ {2, 2, 8, 16, 32}, /* cost of unaligned loads. */
+ {2, 2, 8, 16, 32}, /* cost of storing SSE registers
+ in 32,64,128,256 and 512-bit */
+ {2, 2, 8, 16, 32}, /* cost of unaligned stores. */
+ 6, 6, /* SSE->integer and integer->SSE moves */
64, /* size of l1 cache. */
128, /* size of l2 cache. */
32, /* size of prefetch block */
COSTS_N_INSNS (2), /* cost of movzx */
8, /* "large" insn */
4, /* MOVE_RATIO */
+
+ /* All move costs are relative to integer->integer move times 2 and thus
+ they are latency*2. */
3, /* cost for loading QImode using movzbl */
{4, 5, 4}, /* cost of loading integer registers
in QImode, HImode and SImode.
in SImode and DImode */
{2, 2}, /* cost of storing MMX registers
in SImode and DImode */
- 2, /* cost of moving SSE register */
- {2, 2, 8}, /* cost of loading SSE registers
- in SImode, DImode and TImode */
- {2, 2, 8}, /* cost of storing SSE registers
- in SImode, DImode and TImode */
- 6, /* MMX or SSE register to integer */
+ 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
+ {2, 2, 8, 16, 32}, /* cost of loading SSE registers
+ in 32,64,128,256 and 512-bit */
+ {2, 2, 8, 16, 32}, /* cost of unaligned loads. */
+ {2, 2, 8, 16, 32}, /* cost of storing SSE registers
+ in 32,64,128,256 and 512-bit */
+ {2, 2, 8, 16, 32}, /* cost of unaligned stores. */
+ 6, 6, /* SSE->integer and integer->SSE moves */
32, /* size of l1 cache. */
32, /* size of l2 cache. Some models
have integrated l2 cache, but
COSTS_N_INSNS (1), /* cost of movzx */
8, /* "large" insn */
9, /* MOVE_RATIO */
+
+ /* All move costs are relative to integer->integer move times 2 and thus
+ they are latency*2. */
4, /* cost for loading QImode using movzbl */
{3, 4, 3}, /* cost of loading integer registers
in QImode, HImode and SImode.
in SImode and DImode */
{4, 4}, /* cost of storing MMX registers
in SImode and DImode */
- 2, /* cost of moving SSE register */
- {4, 4, 6}, /* cost of loading SSE registers
- in SImode, DImode and TImode */
- {4, 4, 5}, /* cost of storing SSE registers
- in SImode, DImode and TImode */
- 5, /* MMX or SSE register to integer */
+ 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
+ {4, 4, 6, 12, 24}, /* cost of loading SSE registers
+ in 32,64,128,256 and 512-bit */
+ {4, 4, 6, 12, 24}, /* cost of unaligned loads. */
+ {4, 4, 5, 10, 20}, /* cost of storing SSE registers
+ in 32,64,128,256 and 512-bit */
+ {4, 4, 5, 10, 20}, /* cost of unaligned stores. */
+ 5, 5, /* SSE->integer and integer->SSE moves */
64, /* size of l1 cache. */
256, /* size of l2 cache. */
64, /* size of prefetch block */
COSTS_N_INSNS (1), /* cost of movzx */
8, /* "large" insn */
9, /* MOVE_RATIO */
+
+ /* All move costs are relative to integer->integer move times 2 and thus
+ they are latency*2. */
4, /* cost for loading QImode using movzbl */
{3, 4, 3}, /* cost of loading integer registers
in QImode, HImode and SImode.
in SImode and DImode */
{4, 4}, /* cost of storing MMX registers
in SImode and DImode */
- 2, /* cost of moving SSE register */
- {4, 3, 6}, /* cost of loading SSE registers
- in SImode, DImode and TImode */
- {4, 4, 5}, /* cost of storing SSE registers
- in SImode, DImode and TImode */
- 5, /* MMX or SSE register to integer */
+ 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
+ {4, 3, 6, 12, 24}, /* cost of loading SSE registers
+ in 32,64,128,256 and 512-bit */
+ {4, 3, 6, 12, 24}, /* cost of unaligned loads. */
+ {4, 4, 5, 10, 20}, /* cost of storing SSE registers
+ in 32,64,128,256 and 512-bit */
+ {4, 4, 5, 10, 20}, /* cost of unaligned stores. */
+ 5, 5, /* SSE->integer and integer->SSE moves */
64, /* size of l1 cache. */
512, /* size of l2 cache. */
64, /* size of prefetch block */
COSTS_N_INSNS (1), /* cost of movzx */
8, /* "large" insn */
9, /* MOVE_RATIO */
+
+ /* All move costs are relative to integer->integer move times 2 and thus
+ they are latency*2. */
4, /* cost for loading QImode using movzbl */
{3, 4, 3}, /* cost of loading integer registers
in QImode, HImode and SImode.
in SImode and DImode */
{4, 4}, /* cost of storing MMX registers
in SImode and DImode */
- 2, /* cost of moving SSE register */
- {4, 4, 3}, /* cost of loading SSE registers
- in SImode, DImode and TImode */
- {4, 4, 5}, /* cost of storing SSE registers
- in SImode, DImode and TImode */
- 3, /* MMX or SSE register to integer */
+ 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
+ {4, 4, 3, 6, 12}, /* cost of loading SSE registers
+ in 32,64,128,256 and 512-bit */
+ {4, 4, 3, 7, 12}, /* cost of unaligned loads. */
+ {4, 4, 5, 10, 20}, /* cost of storing SSE registers
+ in 32,64,128,256 and 512-bit */
+ {4, 4, 5, 10, 20}, /* cost of unaligned stores. */
+ 3, 3, /* SSE->integer and integer->SSE moves */
/* On K8:
MOVD reg64, xmmreg Double FSTORE 4
MOVD reg32, xmmreg Double FSTORE 4
COSTS_N_INSNS (1), /* cost of movzx */
8, /* "large" insn */
9, /* MOVE_RATIO */
- 4, /* cost for loading QImode using movzbl */
- {5, 5, 4}, /* cost of loading integer registers
+
+ /* All move costs are relative to integer->integer move times 2 and thus
+ they are latency*2. */
+ 8, /* cost for loading QImode using movzbl */
+ {8, 8, 8}, /* cost of loading integer registers
in QImode, HImode and SImode.
Relative to reg-reg move (2). */
- {4, 4, 4}, /* cost of storing integer registers */
- 2, /* cost of reg,reg fld/fst */
- {5, 5, 12}, /* cost of loading fp registers
+ {8, 8, 8}, /* cost of storing integer registers */
+ 4, /* cost of reg,reg fld/fst */
+ {12, 12, 28}, /* cost of loading fp registers
in SFmode, DFmode and XFmode */
- {4, 4, 8}, /* cost of storing fp registers
+ {10, 10, 18}, /* cost of storing fp registers
in SFmode, DFmode and XFmode */
- 2, /* cost of moving MMX register */
- {4, 4}, /* cost of loading MMX registers
+ 4, /* cost of moving MMX register */
+ {12, 12}, /* cost of loading MMX registers
in SImode and DImode */
- {4, 4}, /* cost of storing MMX registers
+ {10, 10}, /* cost of storing MMX registers
in SImode and DImode */
- 2, /* cost of moving SSE register */
- {4, 4, 4}, /* cost of loading SSE registers
- in SImode, DImode and TImode */
- {4, 4, 4}, /* cost of storing SSE registers
- in SImode, DImode and TImode */
- 2, /* MMX or SSE register to integer */
- /* On K8:
- MOVD reg64, xmmreg Double FSTORE 4
- MOVD reg32, xmmreg Double FSTORE 4
- On AMDFAM10:
- MOVD reg64, xmmreg Double FADD 3
- 1/1 1/1
- MOVD reg32, xmmreg Double FADD 3
- 1/1 1/1 */
+ 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
+ {12, 12, 10, 20, 30}, /* cost of loading SSE registers
+ in 32,64,128,256 and 512-bit */
+ {12, 12, 10, 20, 30}, /* cost of unaligned loads. */
+ {10, 10, 10, 20, 30}, /* cost of storing SSE registers
+ in 32,64,128,256 and 512-bit */
+ {10, 10, 10, 20, 30}, /* cost of unaligned stores. */
+ 16, 20, /* SSE->integer and integer->SSE moves */
16, /* size of l1 cache. */
2048, /* size of l2 cache. */
64, /* size of prefetch block */
COSTS_N_INSNS (1), /* cost of movzx */
8, /* "large" insn */
9, /* MOVE_RATIO */
- 4, /* cost for loading QImode using movzbl */
- {5, 5, 4}, /* cost of loading integer registers
+
+ /* All move costs are relative to integer->integer move times 2 and thus
+ they are latency*2. */
+ 8, /* cost for loading QImode using movzbl */
+ {8, 8, 8}, /* cost of loading integer registers
in QImode, HImode and SImode.
Relative to reg-reg move (2). */
- {4, 4, 4}, /* cost of storing integer registers */
- 2, /* cost of reg,reg fld/fst */
- {5, 5, 12}, /* cost of loading fp registers
+ {8, 8, 8}, /* cost of storing integer registers */
+ 4, /* cost of reg,reg fld/fst */
+ {12, 12, 28}, /* cost of loading fp registers
in SFmode, DFmode and XFmode */
- {4, 4, 8}, /* cost of storing fp registers
+ {10, 10, 18}, /* cost of storing fp registers
in SFmode, DFmode and XFmode */
- 2, /* cost of moving MMX register */
- {4, 4}, /* cost of loading MMX registers
+ 4, /* cost of moving MMX register */
+ {12, 12}, /* cost of loading MMX registers
in SImode and DImode */
- {4, 4}, /* cost of storing MMX registers
+ {10, 10}, /* cost of storing MMX registers
in SImode and DImode */
- 2, /* cost of moving SSE register */
- {4, 4, 4}, /* cost of loading SSE registers
- in SImode, DImode and TImode */
- {4, 4, 4}, /* cost of storing SSE registers
- in SImode, DImode and TImode */
- 2, /* MMX or SSE register to integer */
- /* On K8:
- MOVD reg64, xmmreg Double FSTORE 4
- MOVD reg32, xmmreg Double FSTORE 4
- On AMDFAM10:
- MOVD reg64, xmmreg Double FADD 3
- 1/1 1/1
- MOVD reg32, xmmreg Double FADD 3
- 1/1 1/1 */
+ 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
+ {12, 12, 10, 20, 30}, /* cost of loading SSE registers
+ in 32,64,128,256 and 512-bit */
+ {12, 12, 10, 20, 30}, /* cost of unaligned loads. */
+ {10, 10, 10, 20, 30}, /* cost of storing SSE registers
+ in 32,64,128,256 and 512-bit */
+ {10, 10, 10, 20, 30}, /* cost of unaligned stores. */
+ 16, 20, /* SSE->integer and integer->SSE moves */
16, /* size of l1 cache. */
2048, /* size of l2 cache. */
64, /* size of prefetch block */
COSTS_N_INSNS (1), /* cost of movzx */
8, /* "large" insn */
9, /* MOVE_RATIO */
- 4, /* cost for loading QImode using movzbl */
- {5, 5, 4}, /* cost of loading integer registers
+
+ /* All move costs are relative to integer->integer move times 2 and thus
+ they are latency*2. */
+ 8, /* cost for loading QImode using movzbl */
+ {8, 8, 8}, /* cost of loading integer registers
in QImode, HImode and SImode.
Relative to reg-reg move (2). */
- {4, 4, 4}, /* cost of storing integer registers */
- 2, /* cost of reg,reg fld/fst */
- {5, 5, 12}, /* cost of loading fp registers
+ {8, 8, 8}, /* cost of storing integer registers */
+ 4, /* cost of reg,reg fld/fst */
+ {12, 12, 28}, /* cost of loading fp registers
in SFmode, DFmode and XFmode */
- {4, 4, 8}, /* cost of storing fp registers
+ {10, 10, 18}, /* cost of storing fp registers
in SFmode, DFmode and XFmode */
- 2, /* cost of moving MMX register */
- {4, 4}, /* cost of loading MMX registers
+ 4, /* cost of moving MMX register */
+ {12, 12}, /* cost of loading MMX registers
in SImode and DImode */
- {4, 4}, /* cost of storing MMX registers
+ {10, 10}, /* cost of storing MMX registers
in SImode and DImode */
- 2, /* cost of moving SSE register */
- {4, 4, 4}, /* cost of loading SSE registers
- in SImode, DImode and TImode */
- {4, 4, 4}, /* cost of storing SSE registers
- in SImode, DImode and TImode */
- 2, /* MMX or SSE register to integer */
+ 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
+ {12, 12, 10, 20, 30}, /* cost of loading SSE registers
+ in 32,64,128,256 and 512-bit */
+ {12, 12, 10, 20, 30}, /* cost of unaligned loads. */
+ {10, 10, 10, 20, 30}, /* cost of storing SSE registers
+ in 32,64,128,256 and 512-bit */
+ {10, 10, 10, 20, 30}, /* cost of unaligned stores. */
+ 16, 20, /* SSE->integer and integer->SSE moves */
16, /* size of l1 cache. */
2048, /* size of l2 cache. */
64, /* size of prefetch block */
COSTS_N_INSNS (1), /* cost of movzx */
8, /* "large" insn */
9, /* MOVE_RATIO */
- 4, /* cost for loading QImode using movzbl */
- {5, 5, 4}, /* cost of loading integer registers
+
+ /* All move costs are relative to integer->integer move times 2 and thus
+ they are latency*2. */
+ 8, /* cost for loading QImode using movzbl */
+ {8, 8, 8}, /* cost of loading integer registers
in QImode, HImode and SImode.
Relative to reg-reg move (2). */
- {4, 4, 4}, /* cost of storing integer registers */
- 2, /* cost of reg,reg fld/fst */
- {5, 5, 12}, /* cost of loading fp registers
+ {8, 8, 8}, /* cost of storing integer registers */
+ 4, /* cost of reg,reg fld/fst */
+ {12, 12, 28}, /* cost of loading fp registers
in SFmode, DFmode and XFmode */
- {4, 4, 8}, /* cost of storing fp registers
+ {10, 10, 18}, /* cost of storing fp registers
in SFmode, DFmode and XFmode */
- 2, /* cost of moving MMX register */
- {4, 4}, /* cost of loading MMX registers
+ 4, /* cost of moving MMX register */
+ {12, 12}, /* cost of loading MMX registers
in SImode and DImode */
- {4, 4}, /* cost of storing MMX registers
+ {10, 10}, /* cost of storing MMX registers
in SImode and DImode */
- 2, /* cost of moving SSE register */
- {4, 4, 4}, /* cost of loading SSE registers
- in SImode, DImode and TImode */
- {4, 4, 4}, /* cost of storing SSE registers
- in SImode, DImode and TImode */
- 2, /* MMX or SSE register to integer */
+ 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
+ {12, 12, 10, 20, 30}, /* cost of loading SSE registers
+ in 32,64,128,256 and 512-bit */
+ {12, 12, 10, 20, 30}, /* cost of unaligned loads. */
+ {10, 10, 10, 20, 30}, /* cost of storing SSE registers
+ in 32,64,128,256 and 512-bit */
+ {10, 10, 10, 20, 30}, /* cost of unaligned stores. */
+ 16, 20, /* SSE->integer and integer->SSE moves */
16, /* size of l1 cache. */
2048, /* size of l2 cache. */
64, /* size of prefetch block */
8, /* "large" insn. */
9, /* MOVE_RATIO. */
+ /* All move costs are relative to integer->integer move times 2 and thus
+ they are latency*2. */
+
/* reg-reg moves are done by renaming and thus they are even cheaper than
1 cycle. Becuase reg-reg move cost is 2 and the following tables correspond
to doubles of latencies, we do not model this correctly. It does not
in SImode and DImode. */
{8, 8}, /* cost of storing MMX registers
in SImode and DImode. */
- 2, /* cost of moving SSE register. */
- {6, 6, 6}, /* cost of loading SSE registers
- in SImode, DImode and TImode. */
- {8, 8, 8}, /* cost of storing SSE registers
- in SImode, DImode and TImode. */
- 6, /* MMX or SSE register to integer. */
+ 2, 3, 6, /* cost of moving XMM,YMM,ZMM register. */
+ {6, 6, 6, 10, 20}, /* cost of loading SSE registers
+ in 32,64,128,256 and 512-bit. */
+ {6, 6, 6, 10, 20}, /* cost of unaligned loads. */
+ {8, 8, 8, 8, 16}, /* cost of storing SSE registers
+ in 32,64,128,256 and 512-bit. */
+ {8, 8, 8, 8, 16}, /* cost of unaligned stores. */
+ 6, 6, /* SSE->integer and integer->SSE moves. */
32, /* size of l1 cache. */
512, /* size of l2 cache. */
64, /* size of prefetch block. */
COSTS_N_INSNS (1), /* cost of movzx */
8, /* "large" insn */
9, /* MOVE_RATIO */
- 4, /* cost for loading QImode using movzbl */
- {3, 4, 3}, /* cost of loading integer registers
+
+ /* All move costs are relative to integer->integer move times 2 and thus
+ they are latency*2. */
+ 8, /* cost for loading QImode using movzbl */
+ {6, 8, 6}, /* cost of loading integer registers
in QImode, HImode and SImode.
Relative to reg-reg move (2). */
- {3, 4, 3}, /* cost of storing integer registers */
+ {6, 8, 6}, /* cost of storing integer registers */
4, /* cost of reg,reg fld/fst */
- {4, 4, 12}, /* cost of loading fp registers
+ {12, 12, 28}, /* cost of loading fp registers
in SFmode, DFmode and XFmode */
- {6, 6, 8}, /* cost of storing fp registers
+ {12, 12, 38}, /* cost of storing fp registers
in SFmode, DFmode and XFmode */
- 2, /* cost of moving MMX register */
- {3, 3}, /* cost of loading MMX registers
+ 4, /* cost of moving MMX register */
+ {10, 10}, /* cost of loading MMX registers
in SImode and DImode */
- {4, 4}, /* cost of storing MMX registers
+ {12, 12}, /* cost of storing MMX registers
in SImode and DImode */
- 2, /* cost of moving SSE register */
- {4, 4, 3}, /* cost of loading SSE registers
- in SImode, DImode and TImode */
- {4, 4, 5}, /* cost of storing SSE registers
- in SImode, DImode and TImode */
- 3, /* MMX or SSE register to integer */
- /* On K8:
- MOVD reg64, xmmreg Double FSTORE 4
- MOVD reg32, xmmreg Double FSTORE 4
- On AMDFAM10:
- MOVD reg64, xmmreg Double FADD 3
- 1/1 1/1
- MOVD reg32, xmmreg Double FADD 3
- 1/1 1/1 */
+ 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
+ {10, 10, 12, 24, 48}, /* cost of loading SSE registers
+ in 32,64,128,256 and 512-bit */
+ {10, 10, 12, 24, 48}, /* cost of unaligned loads. */
+ {10, 10, 12, 24, 48}, /* cost of storing SSE registers
+ in 32,64,128,256 and 512-bit */
+ {10, 10, 12, 24, 48}, /* cost of unaligned stores. */
+ 14, 14, /* SSE->integer and integer->SSE moves */
32, /* size of l1 cache. */
512, /* size of l2 cache. */
64, /* size of prefetch block */
COSTS_N_INSNS (1), /* cost of movzx */
8, /* "large" insn */
9, /* MOVE_RATIO */
- 4, /* cost for loading QImode using movzbl */
- {3, 4, 3}, /* cost of loading integer registers
+
+ /* All move costs are relative to integer->integer move times 2 and thus
+ they are latency*2. */
+ 8, /* cost for loading QImode using movzbl */
+ {8, 8, 6}, /* cost of loading integer registers
in QImode, HImode and SImode.
Relative to reg-reg move (2). */
- {3, 4, 3}, /* cost of storing integer registers */
+ {8, 8, 6}, /* cost of storing integer registers */
4, /* cost of reg,reg fld/fst */
- {4, 4, 12}, /* cost of loading fp registers
+ {12, 12, 28}, /* cost of loading fp registers
in SFmode, DFmode and XFmode */
- {6, 6, 8}, /* cost of storing fp registers
+ {12, 12, 38}, /* cost of storing fp registers
in SFmode, DFmode and XFmode */
- 2, /* cost of moving MMX register */
- {3, 3}, /* cost of loading MMX registers
+ 4, /* cost of moving MMX register */
+ {10, 10}, /* cost of loading MMX registers
in SImode and DImode */
- {4, 4}, /* cost of storing MMX registers
+ {12, 12}, /* cost of storing MMX registers
in SImode and DImode */
- 2, /* cost of moving SSE register */
- {4, 4, 3}, /* cost of loading SSE registers
- in SImode, DImode and TImode */
- {4, 4, 5}, /* cost of storing SSE registers
- in SImode, DImode and TImode */
- 3, /* MMX or SSE register to integer */
- /* On K8:
- MOVD reg64, xmmreg Double FSTORE 4
- MOVD reg32, xmmreg Double FSTORE 4
- On AMDFAM10:
- MOVD reg64, xmmreg Double FADD 3
- 1/1 1/1
- MOVD reg32, xmmreg Double FADD 3
- 1/1 1/1 */
+ 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
+ {10, 10, 12, 24, 48}, /* cost of loading SSE registers
+ in 32,64,128,256 and 512-bit */
+ {10, 10, 12, 24, 48}, /* cost of unaligned loads. */
+ {10, 10, 12, 24, 48}, /* cost of storing SSE registers
+ in 32,64,128,256 and 512-bit */
+ {10, 10, 12, 24, 48}, /* cost of unaligned stores. */
+ 14, 14, /* SSE->integer and integer->SSE moves */
32, /* size of l1 cache. */
2048, /* size of l2 cache. */
64, /* size of prefetch block */
COSTS_N_INSNS (1), /* cost of movzx */
16, /* "large" insn */
6, /* MOVE_RATIO */
- 2, /* cost for loading QImode using movzbl */
+
+ /* All move costs are relative to integer->integer move times 2 and thus
+ they are latency*2. */
+ 5, /* cost for loading QImode using movzbl */
{4, 5, 4}, /* cost of loading integer registers
in QImode, HImode and SImode.
Relative to reg-reg move (2). */
{2, 3, 2}, /* cost of storing integer registers */
- 2, /* cost of reg,reg fld/fst */
- {2, 2, 6}, /* cost of loading fp registers
+ 12, /* cost of reg,reg fld/fst */
+ {14, 14, 14}, /* cost of loading fp registers
in SFmode, DFmode and XFmode */
- {4, 4, 6}, /* cost of storing fp registers
+ {14, 14, 14}, /* cost of storing fp registers
in SFmode, DFmode and XFmode */
- 2, /* cost of moving MMX register */
- {2, 2}, /* cost of loading MMX registers
+ 12, /* cost of moving MMX register */
+ {16, 16}, /* cost of loading MMX registers
in SImode and DImode */
- {2, 2}, /* cost of storing MMX registers
+ {16, 16}, /* cost of storing MMX registers
in SImode and DImode */
- 12, /* cost of moving SSE register */
- {12, 12, 12}, /* cost of loading SSE registers
- in SImode, DImode and TImode */
- {2, 2, 8}, /* cost of storing SSE registers
- in SImode, DImode and TImode */
- 10, /* MMX or SSE register to integer */
+ 12, 24, 48, /* cost of moving XMM,YMM,ZMM register */
+ {16, 16, 16, 32, 64}, /* cost of loading SSE registers
+ in 32,64,128,256 and 512-bit */
+ {32, 32, 32, 64, 128}, /* cost of unaligned loads. */
+ {16, 16, 16, 32, 64}, /* cost of storing SSE registers
+ in 32,64,128,256 and 512-bit */
+ {32, 32, 32, 64, 128}, /* cost of unaligned stores. */
+ 20, 12, /* SSE->integer and integer->SSE moves */
8, /* size of l1 cache. */
256, /* size of l2 cache. */
64, /* size of prefetch block */
COSTS_N_INSNS (1), /* cost of movzx */
16, /* "large" insn */
17, /* MOVE_RATIO */
+
+ /* All move costs are relative to integer->integer move times 2 and thus
+ they are latency*2. */
4, /* cost for loading QImode using movzbl */
{4, 4, 4}, /* cost of loading integer registers
in QImode, HImode and SImode.
Relative to reg-reg move (2). */
{4, 4, 4}, /* cost of storing integer registers */
- 3, /* cost of reg,reg fld/fst */
- {12, 12, 12}, /* cost of loading fp registers
+ 12, /* cost of reg,reg fld/fst */
+ {14, 14, 14}, /* cost of loading fp registers
in SFmode, DFmode and XFmode */
- {4, 4, 4}, /* cost of storing fp registers
+ {14, 14, 14}, /* cost of storing fp registers
in SFmode, DFmode and XFmode */
- 6, /* cost of moving MMX register */
+ 14, /* cost of moving MMX register */
{12, 12}, /* cost of loading MMX registers
in SImode and DImode */
{12, 12}, /* cost of storing MMX registers
in SImode and DImode */
- 6, /* cost of moving SSE register */
- {12, 12, 12}, /* cost of loading SSE registers
- in SImode, DImode and TImode */
- {12, 12, 12}, /* cost of storing SSE registers
- in SImode, DImode and TImode */
- 8, /* MMX or SSE register to integer */
+ 6, 12, 24, /* cost of moving XMM,YMM,ZMM register */
+ {12, 12, 12, 24, 48}, /* cost of loading SSE registers
+ in 32,64,128,256 and 512-bit */
+ {24, 24, 24, 48, 96}, /* cost of unaligned loads. */
+ {12, 12, 12, 24, 48}, /* cost of storing SSE registers
+ in 32,64,128,256 and 512-bit */
+ {24, 24, 24, 48, 96}, /* cost of unaligned stores. */
+ 20, 12, /* SSE->integer and integer->SSE moves */
8, /* size of l1 cache. */
1024, /* size of l2 cache. */
64, /* size of prefetch block */
COSTS_N_INSNS (1), /* cost of movzx */
8, /* "large" insn */
17, /* MOVE_RATIO */
- 4, /* cost for loading QImode using movzbl */
- {4, 4, 4}, /* cost of loading integer registers
+
+ /* All move costs are relative to integer->integer move times 2 and thus
+ they are latency*2. */
+ 6, /* cost for loading QImode using movzbl */
+ {6, 6, 6}, /* cost of loading integer registers
in QImode, HImode and SImode.
Relative to reg-reg move (2). */
- {4, 4, 4}, /* cost of storing integer registers */
+ {6, 6, 6}, /* cost of storing integer registers */
4, /* cost of reg,reg fld/fst */
- {12, 12, 12}, /* cost of loading fp registers
+ {6, 6, 18}, /* cost of loading fp registers
in SFmode, DFmode and XFmode */
- {6, 6, 8}, /* cost of storing fp registers
+ {14, 14, 24}, /* cost of storing fp registers
in SFmode, DFmode and XFmode */
2, /* cost of moving MMX register */
{8, 8}, /* cost of loading MMX registers
in SImode and DImode */
- {8, 8}, /* cost of storing MMX registers
+ {10, 10}, /* cost of storing MMX registers
in SImode and DImode */
- 2, /* cost of moving SSE register */
- {8, 8, 8}, /* cost of loading SSE registers
- in SImode, DImode and TImode */
- {8, 8, 8}, /* cost of storing SSE registers
- in SImode, DImode and TImode */
- 5, /* MMX or SSE register to integer */
+ 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
+ {8, 8, 8, 16, 32}, /* cost of loading SSE registers
+ in 32,64,128,256 and 512-bit */
+ {16, 16, 16, 32, 64}, /* cost of unaligned loads. */
+ {8, 8, 8, 16, 32}, /* cost of storing SSE registers
+ in 32,64,128,256 and 512-bit */
+ {16, 16, 16, 32, 64}, /* cost of unaligned stores. */
+ 8, 6, /* SSE->integer and integer->SSE moves */
32, /* size of l1 cache. */
256, /* size of l2 cache. */
64, /* size of prefetch block */
COSTS_N_INSNS (1), /* cost of movzx */
8, /* "large" insn */
17, /* MOVE_RATIO */
- 4, /* cost for loading QImode using movzbl */
- {4, 4, 4}, /* cost of loading integer registers
+
+ /* All move costs are relative to integer->integer move times 2 and thus
+ they are latency*2. */
+ 8, /* cost for loading QImode using movzbl */
+ {8, 8, 8}, /* cost of loading integer registers
in QImode, HImode and SImode.
Relative to reg-reg move (2). */
- {4, 4, 4}, /* cost of storing integer registers */
- 4, /* cost of reg,reg fld/fst */
- {12, 12, 12}, /* cost of loading fp registers
+ {6, 6, 6}, /* cost of storing integer registers */
+ 2, /* cost of reg,reg fld/fst */
+ {8, 8, 18}, /* cost of loading fp registers
in SFmode, DFmode and XFmode */
- {6, 6, 8}, /* cost of storing fp registers
+ {6, 6, 18}, /* cost of storing fp registers
in SFmode, DFmode and XFmode */
2, /* cost of moving MMX register */
{8, 8}, /* cost of loading MMX registers
in SImode and DImode */
- {8, 8}, /* cost of storing MMX registers
+ {6, 6}, /* cost of storing MMX registers
in SImode and DImode */
- 2, /* cost of moving SSE register */
- {8, 8, 8}, /* cost of loading SSE registers
- in SImode, DImode and TImode */
- {8, 8, 8}, /* cost of storing SSE registers
- in SImode, DImode and TImode */
- 5, /* MMX or SSE register to integer */
+ 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
+ {8, 8, 8, 16, 32}, /* cost of loading SSE registers
+ in 32,64,128,256 and 512-bit */
+ {16, 16, 16, 32, 64}, /* cost of unaligned loads. */
+ {8, 8, 8, 16, 32}, /* cost of storing SSE registers
+ in 32,64,128,256 and 512-bit */
+ {16, 16, 16, 32, 64}, /* cost of unaligned stores. */
+ 8, 6, /* SSE->integer and integer->SSE moves */
32, /* size of l1 cache. */
256, /* size of l2 cache. */
64, /* size of prefetch block */
COSTS_N_INSNS (1), /* cost of movzx */
8, /* "large" insn */
17, /* MOVE_RATIO */
+
+ /* All move costs are relative to integer->integer move times 2 and thus
+ they are latency*2. */
6, /* cost for loading QImode using movzbl */
{4, 4, 4}, /* cost of loading integer registers
in QImode, HImode and SImode.
in SImode and DImode */
{6, 6}, /* cost of storing MMX registers
in SImode and DImode */
- 2, /* cost of moving SSE register */
- {6, 6, 6}, /* cost of loading SSE registers
- in SImode, DImode and TImode */
- {6, 6, 6}, /* cost of storing SSE registers
- in SImode, DImode and TImode */
- 2, /* MMX or SSE register to integer */
+ 2, 2, 2, /* cost of moving XMM,YMM,ZMM register */
+ {6, 6, 6, 6, 6}, /* cost of loading SSE registers
+ in 32,64,128,256 and 512-bit */
+ {10, 10, 10, 10, 10}, /* cost of unaligned loads. */
+ {6, 6, 6, 6, 6}, /* cost of storing SSE registers
+ in 32,64,128,256 and 512-bit */
+ {10, 10, 10, 10, 10}, /* cost of unaligned loads. */
+ 4, 4, /* SSE->integer and integer->SSE moves */
32, /* size of l1 cache. */
256, /* size of l2 cache. */
64, /* size of prefetch block */
COSTS_N_INSNS (1), /* cost of movzx */
8, /* "large" insn */
17, /* MOVE_RATIO */
+
+ /* All move costs are relative to integer->integer move times 2 and thus
+ they are latency*2. */
4, /* cost for loading QImode using movzbl */
{4, 4, 4}, /* cost of loading integer registers
in QImode, HImode and SImode.
in SImode and DImode */
{6, 6}, /* cost of storing MMX registers
in SImode and DImode */
- 2, /* cost of moving SSE register */
- {6, 6, 6}, /* cost of loading SSE registers
- in SImode, DImode and TImode */
- {6, 6, 6}, /* cost of storing SSE registers
- in SImode, DImode and TImode */
- 6, /* MMX or SSE register to integer */
+ 2, 3, 4, /* cost of moving XMM,YMM,ZMM register */
+ {6, 6, 6, 10, 15}, /* cost of loading SSE registers
+ in 32,64,128,256 and 512-bit */
+ {10, 10, 10, 15, 20}, /* cost of unaligned loads. */
+ {6, 6, 6, 10, 15}, /* cost of storing SSE registers
+ in 32,64,128,256 and 512-bit */
+ {10, 10, 10, 15, 20}, /* cost of unaligned storess. */
+ 20, 20, /* SSE->integer and integer->SSE moves */
32, /* size of l1 cache. */
512, /* size of l2 cache. */
64, /* size of prefetch block */
COSTS_N_INSNS (1), /* cost of movzx */
8, /* "large" insn */
17, /* MOVE_RATIO */
+
+ /* All move costs are relative to integer->integer move times 2 and thus
+ they are latency*2. */
6, /* cost for loading QImode using movzbl */
{4, 4, 4}, /* cost of loading integer registers
in QImode, HImode and SImode.
in SImode and DImode */
{6, 6}, /* cost of storing MMX registers
in SImode and DImode */
- 2, /* cost of moving SSE register */
- {6, 6, 6}, /* cost of loading SSE registers
- in SImode, DImode and TImode */
- {6, 6, 6}, /* cost of storing SSE registers
- in SImode, DImode and TImode */
- 2, /* MMX or SSE register to integer */
+ 2, 2, 4, /* cost of moving XMM,YMM,ZMM register */
+ {6, 6, 6, 6, 12}, /* cost of loading SSE registers
+ in 32,64,128,256 and 512-bit */
+ {6, 6, 6, 6, 12}, /* cost of unaligned loads. */
+ {6, 6, 6, 6, 12}, /* cost of storing SSE registers
+ in 32,64,128,256 and 512-bit */
+ {6, 6, 6, 6, 12}, /* cost of unaligned stores. */
+ 2, 2, /* SSE->integer and integer->SSE moves */
64, /* size of l1 cache. */
512, /* size of l2 cache. */
64, /* size of prefetch block */